diff options
Diffstat (limited to 'drivers/net/ethernet/mellanox')
475 files changed, 271278 insertions, 0 deletions
diff --git a/drivers/net/ethernet/mellanox/Kconfig b/drivers/net/ethernet/mellanox/Kconfig new file mode 100644 index 000000000..b4f66eb9d --- /dev/null +++ b/drivers/net/ethernet/mellanox/Kconfig @@ -0,0 +1,27 @@ +# SPDX-License-Identifier: GPL-2.0-only +# +# Mellanox driver configuration +# + +config NET_VENDOR_MELLANOX + bool "Mellanox devices" + default y + depends on PCI || I2C + help + If you have a network (Ethernet or RDMA) device belonging to this + class, say Y. + + Note that the answer to this question doesn't directly affect the + kernel: saying N will just cause the configurator to skip all + the questions about Mellanox cards. If you say Y, you will be asked + for your specific card in the following questions. + +if NET_VENDOR_MELLANOX + +source "drivers/net/ethernet/mellanox/mlx4/Kconfig" +source "drivers/net/ethernet/mellanox/mlx5/core/Kconfig" +source "drivers/net/ethernet/mellanox/mlxsw/Kconfig" +source "drivers/net/ethernet/mellanox/mlxfw/Kconfig" +source "drivers/net/ethernet/mellanox/mlxbf_gige/Kconfig" + +endif # NET_VENDOR_MELLANOX diff --git a/drivers/net/ethernet/mellanox/Makefile b/drivers/net/ethernet/mellanox/Makefile new file mode 100644 index 000000000..d4b5f547a --- /dev/null +++ b/drivers/net/ethernet/mellanox/Makefile @@ -0,0 +1,10 @@ +# SPDX-License-Identifier: GPL-2.0-only +# +# Makefile for the Mellanox device drivers. +# + +obj-$(CONFIG_MLX4_CORE) += mlx4/ +obj-$(CONFIG_MLX5_CORE) += mlx5/core/ +obj-$(CONFIG_MLXSW_CORE) += mlxsw/ +obj-$(CONFIG_MLXFW) += mlxfw/ +obj-$(CONFIG_MLXBF_GIGE) += mlxbf_gige/ diff --git a/drivers/net/ethernet/mellanox/mlx4/Kconfig b/drivers/net/ethernet/mellanox/mlx4/Kconfig new file mode 100644 index 000000000..1b4b1f642 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx4/Kconfig @@ -0,0 +1,49 @@ +# SPDX-License-Identifier: GPL-2.0-only +# +# Mellanox driver configuration +# + +config MLX4_EN + tristate "Mellanox Technologies 1/10/40Gbit Ethernet support" + depends on PCI && NETDEVICES && ETHERNET && INET + depends on PTP_1588_CLOCK_OPTIONAL + select MLX4_CORE + help + This driver supports Mellanox Technologies ConnectX Ethernet + devices. + +config MLX4_EN_DCB + bool "Data Center Bridging (DCB) Support" + default y + depends on MLX4_EN && DCB + help + Say Y here if you want to use Data Center Bridging (DCB) in the + driver. + If set to N, will not be able to configure QoS and ratelimit attributes. + This flag is depended on the kernel's DCB support. + + If unsure, set to Y + +config MLX4_CORE + tristate + depends on PCI + select NET_DEVLINK + default n + +config MLX4_DEBUG + bool "Verbose debugging output" if (MLX4_CORE && EXPERT) + depends on MLX4_CORE + default y + help + This option causes debugging code to be compiled into the + mlx4_core driver. The output can be turned on via the + debug_level module parameter (which can also be set after + the driver is loaded through sysfs). + +config MLX4_CORE_GEN2 + bool "Support for old gen2 Mellanox PCI IDs" if (MLX4_CORE) + depends on MLX4_CORE + default y + help + Say Y here if you want to use old gen2 Mellanox devices in the + driver. diff --git a/drivers/net/ethernet/mellanox/mlx4/Makefile b/drivers/net/ethernet/mellanox/mlx4/Makefile new file mode 100644 index 000000000..3f400770f --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx4/Makefile @@ -0,0 +1,12 @@ +# SPDX-License-Identifier: GPL-2.0 +obj-$(CONFIG_MLX4_CORE) += mlx4_core.o + +mlx4_core-y := alloc.o catas.o cmd.o cq.o eq.o fw.o fw_qos.o icm.o intf.o \ + main.o mcg.o mr.o pd.o port.o profile.o qp.o reset.o sense.o \ + srq.o resource_tracker.o crdump.o + +obj-$(CONFIG_MLX4_EN) += mlx4_en.o + +mlx4_en-y := en_main.o en_tx.o en_rx.o en_ethtool.o en_port.o en_cq.o \ + en_resources.o en_netdev.o en_selftest.o en_clock.o +mlx4_en-$(CONFIG_MLX4_EN_DCB) += en_dcb_nl.o diff --git a/drivers/net/ethernet/mellanox/mlx4/alloc.c b/drivers/net/ethernet/mellanox/mlx4/alloc.c new file mode 100644 index 000000000..b330020dc --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx4/alloc.c @@ -0,0 +1,818 @@ +/* + * Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/errno.h> +#include <linux/slab.h> +#include <linux/mm.h> +#include <linux/export.h> +#include <linux/bitmap.h> +#include <linux/dma-mapping.h> +#include <linux/vmalloc.h> + +#include "mlx4.h" + +u32 mlx4_bitmap_alloc(struct mlx4_bitmap *bitmap) +{ + u32 obj; + + spin_lock(&bitmap->lock); + + obj = find_next_zero_bit(bitmap->table, bitmap->max, bitmap->last); + if (obj >= bitmap->max) { + bitmap->top = (bitmap->top + bitmap->max + bitmap->reserved_top) + & bitmap->mask; + obj = find_first_zero_bit(bitmap->table, bitmap->max); + } + + if (obj < bitmap->max) { + set_bit(obj, bitmap->table); + bitmap->last = (obj + 1); + if (bitmap->last == bitmap->max) + bitmap->last = 0; + obj |= bitmap->top; + } else + obj = -1; + + if (obj != -1) + --bitmap->avail; + + spin_unlock(&bitmap->lock); + + return obj; +} + +void mlx4_bitmap_free(struct mlx4_bitmap *bitmap, u32 obj, int use_rr) +{ + mlx4_bitmap_free_range(bitmap, obj, 1, use_rr); +} + +static unsigned long find_aligned_range(unsigned long *bitmap, + u32 start, u32 nbits, + int len, int align, u32 skip_mask) +{ + unsigned long end, i; + +again: + start = ALIGN(start, align); + + while ((start < nbits) && (test_bit(start, bitmap) || + (start & skip_mask))) + start += align; + + if (start >= nbits) + return -1; + + end = start+len; + if (end > nbits) + return -1; + + for (i = start + 1; i < end; i++) { + if (test_bit(i, bitmap) || ((u32)i & skip_mask)) { + start = i + 1; + goto again; + } + } + + return start; +} + +u32 mlx4_bitmap_alloc_range(struct mlx4_bitmap *bitmap, int cnt, + int align, u32 skip_mask) +{ + u32 obj; + + if (likely(cnt == 1 && align == 1 && !skip_mask)) + return mlx4_bitmap_alloc(bitmap); + + spin_lock(&bitmap->lock); + + obj = find_aligned_range(bitmap->table, bitmap->last, + bitmap->max, cnt, align, skip_mask); + if (obj >= bitmap->max) { + bitmap->top = (bitmap->top + bitmap->max + bitmap->reserved_top) + & bitmap->mask; + obj = find_aligned_range(bitmap->table, 0, bitmap->max, + cnt, align, skip_mask); + } + + if (obj < bitmap->max) { + bitmap_set(bitmap->table, obj, cnt); + if (obj == bitmap->last) { + bitmap->last = (obj + cnt); + if (bitmap->last >= bitmap->max) + bitmap->last = 0; + } + obj |= bitmap->top; + } else + obj = -1; + + if (obj != -1) + bitmap->avail -= cnt; + + spin_unlock(&bitmap->lock); + + return obj; +} + +u32 mlx4_bitmap_avail(struct mlx4_bitmap *bitmap) +{ + return bitmap->avail; +} + +static u32 mlx4_bitmap_masked_value(struct mlx4_bitmap *bitmap, u32 obj) +{ + return obj & (bitmap->max + bitmap->reserved_top - 1); +} + +void mlx4_bitmap_free_range(struct mlx4_bitmap *bitmap, u32 obj, int cnt, + int use_rr) +{ + obj &= bitmap->max + bitmap->reserved_top - 1; + + spin_lock(&bitmap->lock); + if (!use_rr) { + bitmap->last = min(bitmap->last, obj); + bitmap->top = (bitmap->top + bitmap->max + bitmap->reserved_top) + & bitmap->mask; + } + bitmap_clear(bitmap->table, obj, cnt); + bitmap->avail += cnt; + spin_unlock(&bitmap->lock); +} + +int mlx4_bitmap_init(struct mlx4_bitmap *bitmap, u32 num, u32 mask, + u32 reserved_bot, u32 reserved_top) +{ + /* num must be a power of 2 */ + if (num != roundup_pow_of_two(num)) + return -EINVAL; + + bitmap->last = 0; + bitmap->top = 0; + bitmap->max = num - reserved_top; + bitmap->mask = mask; + bitmap->reserved_top = reserved_top; + bitmap->avail = num - reserved_top - reserved_bot; + bitmap->effective_len = bitmap->avail; + spin_lock_init(&bitmap->lock); + bitmap->table = bitmap_zalloc(bitmap->max, GFP_KERNEL); + if (!bitmap->table) + return -ENOMEM; + + bitmap_set(bitmap->table, 0, reserved_bot); + + return 0; +} + +void mlx4_bitmap_cleanup(struct mlx4_bitmap *bitmap) +{ + bitmap_free(bitmap->table); +} + +struct mlx4_zone_allocator { + struct list_head entries; + struct list_head prios; + u32 last_uid; + u32 mask; + /* protect the zone_allocator from concurrent accesses */ + spinlock_t lock; + enum mlx4_zone_alloc_flags flags; +}; + +struct mlx4_zone_entry { + struct list_head list; + struct list_head prio_list; + u32 uid; + struct mlx4_zone_allocator *allocator; + struct mlx4_bitmap *bitmap; + int use_rr; + int priority; + int offset; + enum mlx4_zone_flags flags; +}; + +struct mlx4_zone_allocator *mlx4_zone_allocator_create(enum mlx4_zone_alloc_flags flags) +{ + struct mlx4_zone_allocator *zones = kmalloc(sizeof(*zones), GFP_KERNEL); + + if (NULL == zones) + return NULL; + + INIT_LIST_HEAD(&zones->entries); + INIT_LIST_HEAD(&zones->prios); + spin_lock_init(&zones->lock); + zones->last_uid = 0; + zones->mask = 0; + zones->flags = flags; + + return zones; +} + +int mlx4_zone_add_one(struct mlx4_zone_allocator *zone_alloc, + struct mlx4_bitmap *bitmap, + u32 flags, + int priority, + int offset, + u32 *puid) +{ + u32 mask = mlx4_bitmap_masked_value(bitmap, (u32)-1); + struct mlx4_zone_entry *it; + struct mlx4_zone_entry *zone = kmalloc(sizeof(*zone), GFP_KERNEL); + + if (NULL == zone) + return -ENOMEM; + + zone->flags = flags; + zone->bitmap = bitmap; + zone->use_rr = (flags & MLX4_ZONE_USE_RR) ? MLX4_USE_RR : 0; + zone->priority = priority; + zone->offset = offset; + + spin_lock(&zone_alloc->lock); + + zone->uid = zone_alloc->last_uid++; + zone->allocator = zone_alloc; + + if (zone_alloc->mask < mask) + zone_alloc->mask = mask; + + list_for_each_entry(it, &zone_alloc->prios, prio_list) + if (it->priority >= priority) + break; + + if (&it->prio_list == &zone_alloc->prios || it->priority > priority) + list_add_tail(&zone->prio_list, &it->prio_list); + list_add_tail(&zone->list, &it->list); + + spin_unlock(&zone_alloc->lock); + + *puid = zone->uid; + + return 0; +} + +/* Should be called under a lock */ +static void __mlx4_zone_remove_one_entry(struct mlx4_zone_entry *entry) +{ + struct mlx4_zone_allocator *zone_alloc = entry->allocator; + + if (!list_empty(&entry->prio_list)) { + /* Check if we need to add an alternative node to the prio list */ + if (!list_is_last(&entry->list, &zone_alloc->entries)) { + struct mlx4_zone_entry *next = list_first_entry(&entry->list, + typeof(*next), + list); + + if (next->priority == entry->priority) + list_add_tail(&next->prio_list, &entry->prio_list); + } + + list_del(&entry->prio_list); + } + + list_del(&entry->list); + + if (zone_alloc->flags & MLX4_ZONE_ALLOC_FLAGS_NO_OVERLAP) { + u32 mask = 0; + struct mlx4_zone_entry *it; + + list_for_each_entry(it, &zone_alloc->prios, prio_list) { + u32 cur_mask = mlx4_bitmap_masked_value(it->bitmap, (u32)-1); + + if (mask < cur_mask) + mask = cur_mask; + } + zone_alloc->mask = mask; + } +} + +void mlx4_zone_allocator_destroy(struct mlx4_zone_allocator *zone_alloc) +{ + struct mlx4_zone_entry *zone, *tmp; + + spin_lock(&zone_alloc->lock); + + list_for_each_entry_safe(zone, tmp, &zone_alloc->entries, list) { + list_del(&zone->list); + list_del(&zone->prio_list); + kfree(zone); + } + + spin_unlock(&zone_alloc->lock); + kfree(zone_alloc); +} + +/* Should be called under a lock */ +static u32 __mlx4_alloc_from_zone(struct mlx4_zone_entry *zone, int count, + int align, u32 skip_mask, u32 *puid) +{ + u32 uid = 0; + u32 res; + struct mlx4_zone_allocator *zone_alloc = zone->allocator; + struct mlx4_zone_entry *curr_node; + + res = mlx4_bitmap_alloc_range(zone->bitmap, count, + align, skip_mask); + + if (res != (u32)-1) { + res += zone->offset; + uid = zone->uid; + goto out; + } + + list_for_each_entry(curr_node, &zone_alloc->prios, prio_list) { + if (unlikely(curr_node->priority == zone->priority)) + break; + } + + if (zone->flags & MLX4_ZONE_ALLOW_ALLOC_FROM_LOWER_PRIO) { + struct mlx4_zone_entry *it = curr_node; + + list_for_each_entry_continue_reverse(it, &zone_alloc->entries, list) { + res = mlx4_bitmap_alloc_range(it->bitmap, count, + align, skip_mask); + if (res != (u32)-1) { + res += it->offset; + uid = it->uid; + goto out; + } + } + } + + if (zone->flags & MLX4_ZONE_ALLOW_ALLOC_FROM_EQ_PRIO) { + struct mlx4_zone_entry *it = curr_node; + + list_for_each_entry_from(it, &zone_alloc->entries, list) { + if (unlikely(it == zone)) + continue; + + if (unlikely(it->priority != curr_node->priority)) + break; + + res = mlx4_bitmap_alloc_range(it->bitmap, count, + align, skip_mask); + if (res != (u32)-1) { + res += it->offset; + uid = it->uid; + goto out; + } + } + } + + if (zone->flags & MLX4_ZONE_FALLBACK_TO_HIGHER_PRIO) { + if (list_is_last(&curr_node->prio_list, &zone_alloc->prios)) + goto out; + + curr_node = list_first_entry(&curr_node->prio_list, + typeof(*curr_node), + prio_list); + + list_for_each_entry_from(curr_node, &zone_alloc->entries, list) { + res = mlx4_bitmap_alloc_range(curr_node->bitmap, count, + align, skip_mask); + if (res != (u32)-1) { + res += curr_node->offset; + uid = curr_node->uid; + goto out; + } + } + } + +out: + if (NULL != puid && res != (u32)-1) + *puid = uid; + return res; +} + +/* Should be called under a lock */ +static void __mlx4_free_from_zone(struct mlx4_zone_entry *zone, u32 obj, + u32 count) +{ + mlx4_bitmap_free_range(zone->bitmap, obj - zone->offset, count, zone->use_rr); +} + +/* Should be called under a lock */ +static struct mlx4_zone_entry *__mlx4_find_zone_by_uid( + struct mlx4_zone_allocator *zones, u32 uid) +{ + struct mlx4_zone_entry *zone; + + list_for_each_entry(zone, &zones->entries, list) { + if (zone->uid == uid) + return zone; + } + + return NULL; +} + +struct mlx4_bitmap *mlx4_zone_get_bitmap(struct mlx4_zone_allocator *zones, u32 uid) +{ + struct mlx4_zone_entry *zone; + struct mlx4_bitmap *bitmap; + + spin_lock(&zones->lock); + + zone = __mlx4_find_zone_by_uid(zones, uid); + + bitmap = zone == NULL ? NULL : zone->bitmap; + + spin_unlock(&zones->lock); + + return bitmap; +} + +int mlx4_zone_remove_one(struct mlx4_zone_allocator *zones, u32 uid) +{ + struct mlx4_zone_entry *zone; + int res = 0; + + spin_lock(&zones->lock); + + zone = __mlx4_find_zone_by_uid(zones, uid); + + if (NULL == zone) { + res = -1; + goto out; + } + + __mlx4_zone_remove_one_entry(zone); + +out: + spin_unlock(&zones->lock); + kfree(zone); + + return res; +} + +/* Should be called under a lock */ +static struct mlx4_zone_entry *__mlx4_find_zone_by_uid_unique( + struct mlx4_zone_allocator *zones, u32 obj) +{ + struct mlx4_zone_entry *zone, *zone_candidate = NULL; + u32 dist = (u32)-1; + + /* Search for the smallest zone that this obj could be + * allocated from. This is done in order to handle + * situations when small bitmaps are allocated from bigger + * bitmaps (and the allocated space is marked as reserved in + * the bigger bitmap. + */ + list_for_each_entry(zone, &zones->entries, list) { + if (obj >= zone->offset) { + u32 mobj = (obj - zone->offset) & zones->mask; + + if (mobj < zone->bitmap->max) { + u32 curr_dist = zone->bitmap->effective_len; + + if (curr_dist < dist) { + dist = curr_dist; + zone_candidate = zone; + } + } + } + } + + return zone_candidate; +} + +u32 mlx4_zone_alloc_entries(struct mlx4_zone_allocator *zones, u32 uid, int count, + int align, u32 skip_mask, u32 *puid) +{ + struct mlx4_zone_entry *zone; + int res = -1; + + spin_lock(&zones->lock); + + zone = __mlx4_find_zone_by_uid(zones, uid); + + if (NULL == zone) + goto out; + + res = __mlx4_alloc_from_zone(zone, count, align, skip_mask, puid); + +out: + spin_unlock(&zones->lock); + + return res; +} + +u32 mlx4_zone_free_entries(struct mlx4_zone_allocator *zones, u32 uid, u32 obj, u32 count) +{ + struct mlx4_zone_entry *zone; + int res = 0; + + spin_lock(&zones->lock); + + zone = __mlx4_find_zone_by_uid(zones, uid); + + if (NULL == zone) { + res = -1; + goto out; + } + + __mlx4_free_from_zone(zone, obj, count); + +out: + spin_unlock(&zones->lock); + + return res; +} + +u32 mlx4_zone_free_entries_unique(struct mlx4_zone_allocator *zones, u32 obj, u32 count) +{ + struct mlx4_zone_entry *zone; + int res; + + if (!(zones->flags & MLX4_ZONE_ALLOC_FLAGS_NO_OVERLAP)) + return -EFAULT; + + spin_lock(&zones->lock); + + zone = __mlx4_find_zone_by_uid_unique(zones, obj); + + if (NULL == zone) { + res = -1; + goto out; + } + + __mlx4_free_from_zone(zone, obj, count); + res = 0; + +out: + spin_unlock(&zones->lock); + + return res; +} + +static int mlx4_buf_direct_alloc(struct mlx4_dev *dev, int size, + struct mlx4_buf *buf) +{ + dma_addr_t t; + + buf->nbufs = 1; + buf->npages = 1; + buf->page_shift = get_order(size) + PAGE_SHIFT; + buf->direct.buf = + dma_alloc_coherent(&dev->persist->pdev->dev, size, &t, + GFP_KERNEL); + if (!buf->direct.buf) + return -ENOMEM; + + buf->direct.map = t; + + while (t & ((1 << buf->page_shift) - 1)) { + --buf->page_shift; + buf->npages *= 2; + } + + return 0; +} + +/* Handling for queue buffers -- we allocate a bunch of memory and + * register it in a memory region at HCA virtual address 0. If the + * requested size is > max_direct, we split the allocation into + * multiple pages, so we don't require too much contiguous memory. + */ +int mlx4_buf_alloc(struct mlx4_dev *dev, int size, int max_direct, + struct mlx4_buf *buf) +{ + if (size <= max_direct) { + return mlx4_buf_direct_alloc(dev, size, buf); + } else { + dma_addr_t t; + int i; + + buf->direct.buf = NULL; + buf->nbufs = DIV_ROUND_UP(size, PAGE_SIZE); + buf->npages = buf->nbufs; + buf->page_shift = PAGE_SHIFT; + buf->page_list = kcalloc(buf->nbufs, sizeof(*buf->page_list), + GFP_KERNEL); + if (!buf->page_list) + return -ENOMEM; + + for (i = 0; i < buf->nbufs; ++i) { + buf->page_list[i].buf = + dma_alloc_coherent(&dev->persist->pdev->dev, + PAGE_SIZE, &t, GFP_KERNEL); + if (!buf->page_list[i].buf) + goto err_free; + + buf->page_list[i].map = t; + } + } + + return 0; + +err_free: + mlx4_buf_free(dev, size, buf); + + return -ENOMEM; +} +EXPORT_SYMBOL_GPL(mlx4_buf_alloc); + +void mlx4_buf_free(struct mlx4_dev *dev, int size, struct mlx4_buf *buf) +{ + if (buf->nbufs == 1) { + dma_free_coherent(&dev->persist->pdev->dev, size, + buf->direct.buf, buf->direct.map); + } else { + int i; + + for (i = 0; i < buf->nbufs; ++i) + if (buf->page_list[i].buf) + dma_free_coherent(&dev->persist->pdev->dev, + PAGE_SIZE, + buf->page_list[i].buf, + buf->page_list[i].map); + kfree(buf->page_list); + } +} +EXPORT_SYMBOL_GPL(mlx4_buf_free); + +static struct mlx4_db_pgdir *mlx4_alloc_db_pgdir(struct device *dma_device) +{ + struct mlx4_db_pgdir *pgdir; + + pgdir = kzalloc(sizeof(*pgdir), GFP_KERNEL); + if (!pgdir) + return NULL; + + bitmap_fill(pgdir->order1, MLX4_DB_PER_PAGE / 2); + pgdir->bits[0] = pgdir->order0; + pgdir->bits[1] = pgdir->order1; + pgdir->db_page = dma_alloc_coherent(dma_device, PAGE_SIZE, + &pgdir->db_dma, GFP_KERNEL); + if (!pgdir->db_page) { + kfree(pgdir); + return NULL; + } + + return pgdir; +} + +static int mlx4_alloc_db_from_pgdir(struct mlx4_db_pgdir *pgdir, + struct mlx4_db *db, int order) +{ + int o; + int i; + + for (o = order; o <= 1; ++o) { + i = find_first_bit(pgdir->bits[o], MLX4_DB_PER_PAGE >> o); + if (i < MLX4_DB_PER_PAGE >> o) + goto found; + } + + return -ENOMEM; + +found: + clear_bit(i, pgdir->bits[o]); + + i <<= o; + + if (o > order) + set_bit(i ^ 1, pgdir->bits[order]); + + db->u.pgdir = pgdir; + db->index = i; + db->db = pgdir->db_page + db->index; + db->dma = pgdir->db_dma + db->index * 4; + db->order = order; + + return 0; +} + +int mlx4_db_alloc(struct mlx4_dev *dev, struct mlx4_db *db, int order) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_db_pgdir *pgdir; + int ret = 0; + + mutex_lock(&priv->pgdir_mutex); + + list_for_each_entry(pgdir, &priv->pgdir_list, list) + if (!mlx4_alloc_db_from_pgdir(pgdir, db, order)) + goto out; + + pgdir = mlx4_alloc_db_pgdir(&dev->persist->pdev->dev); + if (!pgdir) { + ret = -ENOMEM; + goto out; + } + + list_add(&pgdir->list, &priv->pgdir_list); + + /* This should never fail -- we just allocated an empty page: */ + WARN_ON(mlx4_alloc_db_from_pgdir(pgdir, db, order)); + +out: + mutex_unlock(&priv->pgdir_mutex); + + return ret; +} +EXPORT_SYMBOL_GPL(mlx4_db_alloc); + +void mlx4_db_free(struct mlx4_dev *dev, struct mlx4_db *db) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + int o; + int i; + + mutex_lock(&priv->pgdir_mutex); + + o = db->order; + i = db->index; + + if (db->order == 0 && test_bit(i ^ 1, db->u.pgdir->order0)) { + clear_bit(i ^ 1, db->u.pgdir->order0); + ++o; + } + i >>= o; + set_bit(i, db->u.pgdir->bits[o]); + + if (bitmap_full(db->u.pgdir->order1, MLX4_DB_PER_PAGE / 2)) { + dma_free_coherent(&dev->persist->pdev->dev, PAGE_SIZE, + db->u.pgdir->db_page, db->u.pgdir->db_dma); + list_del(&db->u.pgdir->list); + kfree(db->u.pgdir); + } + + mutex_unlock(&priv->pgdir_mutex); +} +EXPORT_SYMBOL_GPL(mlx4_db_free); + +int mlx4_alloc_hwq_res(struct mlx4_dev *dev, struct mlx4_hwq_resources *wqres, + int size) +{ + int err; + + err = mlx4_db_alloc(dev, &wqres->db, 1); + if (err) + return err; + + *wqres->db.db = 0; + + err = mlx4_buf_direct_alloc(dev, size, &wqres->buf); + if (err) + goto err_db; + + err = mlx4_mtt_init(dev, wqres->buf.npages, wqres->buf.page_shift, + &wqres->mtt); + if (err) + goto err_buf; + + err = mlx4_buf_write_mtt(dev, &wqres->mtt, &wqres->buf); + if (err) + goto err_mtt; + + return 0; + +err_mtt: + mlx4_mtt_cleanup(dev, &wqres->mtt); +err_buf: + mlx4_buf_free(dev, size, &wqres->buf); +err_db: + mlx4_db_free(dev, &wqres->db); + + return err; +} +EXPORT_SYMBOL_GPL(mlx4_alloc_hwq_res); + +void mlx4_free_hwq_res(struct mlx4_dev *dev, struct mlx4_hwq_resources *wqres, + int size) +{ + mlx4_mtt_cleanup(dev, &wqres->mtt); + mlx4_buf_free(dev, size, &wqres->buf); + mlx4_db_free(dev, &wqres->db); +} +EXPORT_SYMBOL_GPL(mlx4_free_hwq_res); diff --git a/drivers/net/ethernet/mellanox/mlx4/catas.c b/drivers/net/ethernet/mellanox/mlx4/catas.c new file mode 100644 index 000000000..0eb7b8363 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx4/catas.c @@ -0,0 +1,335 @@ +/* + * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/workqueue.h> +#include <linux/module.h> + +#include "mlx4.h" + +enum { + MLX4_CATAS_POLL_INTERVAL = 5 * HZ, +}; + + + +int mlx4_internal_err_reset = 1; +module_param_named(internal_err_reset, mlx4_internal_err_reset, int, 0644); +MODULE_PARM_DESC(internal_err_reset, + "Reset device on internal errors if non-zero (default 1)"); + +static int read_vendor_id(struct mlx4_dev *dev) +{ + u16 vendor_id = 0; + int ret; + + ret = pci_read_config_word(dev->persist->pdev, 0, &vendor_id); + if (ret) { + mlx4_err(dev, "Failed to read vendor ID, ret=%d\n", ret); + return ret; + } + + if (vendor_id == 0xffff) { + mlx4_err(dev, "PCI can't be accessed to read vendor id\n"); + return -EINVAL; + } + + return 0; +} + +static int mlx4_reset_master(struct mlx4_dev *dev) +{ + int err = 0; + + if (mlx4_is_master(dev)) + mlx4_report_internal_err_comm_event(dev); + + if (!pci_channel_offline(dev->persist->pdev)) { + err = read_vendor_id(dev); + /* If PCI can't be accessed to read vendor ID we assume that its + * link was disabled and chip was already reset. + */ + if (err) + return 0; + + err = mlx4_reset(dev); + if (err) + mlx4_err(dev, "Fail to reset HCA\n"); + } + + return err; +} + +static int mlx4_reset_slave(struct mlx4_dev *dev) +{ +#define COM_CHAN_RST_REQ_OFFSET 0x10 +#define COM_CHAN_RST_ACK_OFFSET 0x08 + + u32 comm_flags; + u32 rst_req; + u32 rst_ack; + unsigned long end; + struct mlx4_priv *priv = mlx4_priv(dev); + + if (pci_channel_offline(dev->persist->pdev)) + return 0; + + comm_flags = swab32(readl((__iomem char *)priv->mfunc.comm + + MLX4_COMM_CHAN_FLAGS)); + if (comm_flags == 0xffffffff) { + mlx4_err(dev, "VF reset is not needed\n"); + return 0; + } + + if (!(dev->caps.vf_caps & MLX4_VF_CAP_FLAG_RESET)) { + mlx4_err(dev, "VF reset is not supported\n"); + return -EOPNOTSUPP; + } + + rst_req = (comm_flags & (u32)(1 << COM_CHAN_RST_REQ_OFFSET)) >> + COM_CHAN_RST_REQ_OFFSET; + rst_ack = (comm_flags & (u32)(1 << COM_CHAN_RST_ACK_OFFSET)) >> + COM_CHAN_RST_ACK_OFFSET; + if (rst_req != rst_ack) { + mlx4_err(dev, "Communication channel isn't sync, fail to send reset\n"); + return -EIO; + } + + rst_req ^= 1; + mlx4_warn(dev, "VF is sending reset request to Firmware\n"); + comm_flags = rst_req << COM_CHAN_RST_REQ_OFFSET; + __raw_writel((__force u32)cpu_to_be32(comm_flags), + (__iomem char *)priv->mfunc.comm + MLX4_COMM_CHAN_FLAGS); + + end = msecs_to_jiffies(MLX4_COMM_TIME) + jiffies; + while (time_before(jiffies, end)) { + comm_flags = swab32(readl((__iomem char *)priv->mfunc.comm + + MLX4_COMM_CHAN_FLAGS)); + rst_ack = (comm_flags & (u32)(1 << COM_CHAN_RST_ACK_OFFSET)) >> + COM_CHAN_RST_ACK_OFFSET; + + /* Reading rst_req again since the communication channel can + * be reset at any time by the PF and all its bits will be + * set to zero. + */ + rst_req = (comm_flags & (u32)(1 << COM_CHAN_RST_REQ_OFFSET)) >> + COM_CHAN_RST_REQ_OFFSET; + + if (rst_ack == rst_req) { + mlx4_warn(dev, "VF Reset succeed\n"); + return 0; + } + cond_resched(); + } + mlx4_err(dev, "Fail to send reset over the communication channel\n"); + return -ETIMEDOUT; +} + +int mlx4_comm_internal_err(u32 slave_read) +{ + return (u32)COMM_CHAN_EVENT_INTERNAL_ERR == + (slave_read & (u32)COMM_CHAN_EVENT_INTERNAL_ERR) ? 1 : 0; +} + +void mlx4_enter_error_state(struct mlx4_dev_persistent *persist) +{ + int err; + struct mlx4_dev *dev; + + if (!mlx4_internal_err_reset) + return; + + mutex_lock(&persist->device_state_mutex); + if (persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR) + goto out; + + dev = persist->dev; + mlx4_err(dev, "device is going to be reset\n"); + if (mlx4_is_slave(dev)) { + err = mlx4_reset_slave(dev); + } else { + mlx4_crdump_collect(dev); + err = mlx4_reset_master(dev); + } + + if (!err) { + mlx4_err(dev, "device was reset successfully\n"); + } else { + /* EEH could have disabled the PCI channel during reset. That's + * recoverable and the PCI error flow will handle it. + */ + if (!pci_channel_offline(dev->persist->pdev)) + BUG_ON(1); + } + dev->persist->state |= MLX4_DEVICE_STATE_INTERNAL_ERROR; + mutex_unlock(&persist->device_state_mutex); + + /* At that step HW was already reset, now notify clients */ + mlx4_dispatch_event(dev, MLX4_DEV_EVENT_CATASTROPHIC_ERROR, 0); + mlx4_cmd_wake_completions(dev); + return; + +out: + mutex_unlock(&persist->device_state_mutex); +} + +static void mlx4_handle_error_state(struct mlx4_dev_persistent *persist) +{ + struct mlx4_dev *dev = persist->dev; + struct devlink *devlink; + int err = 0; + + mlx4_enter_error_state(persist); + devlink = priv_to_devlink(mlx4_priv(dev)); + devl_lock(devlink); + mutex_lock(&persist->interface_state_mutex); + if (persist->interface_state & MLX4_INTERFACE_STATE_UP && + !(persist->interface_state & MLX4_INTERFACE_STATE_DELETION)) { + err = mlx4_restart_one(persist->pdev); + mlx4_info(persist->dev, "mlx4_restart_one was ended, ret=%d\n", + err); + } + mutex_unlock(&persist->interface_state_mutex); + devl_unlock(devlink); +} + +static void dump_err_buf(struct mlx4_dev *dev) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + + int i; + + mlx4_err(dev, "Internal error detected:\n"); + for (i = 0; i < priv->fw.catas_size; ++i) + mlx4_err(dev, " buf[%02x]: %08x\n", + i, swab32(readl(priv->catas_err.map + i))); +} + +static void poll_catas(struct timer_list *t) +{ + struct mlx4_priv *priv = from_timer(priv, t, catas_err.timer); + struct mlx4_dev *dev = &priv->dev; + u32 slave_read; + + if (mlx4_is_slave(dev)) { + slave_read = swab32(readl(&priv->mfunc.comm->slave_read)); + if (mlx4_comm_internal_err(slave_read)) { + mlx4_warn(dev, "Internal error detected on the communication channel\n"); + goto internal_err; + } + } else if (readl(priv->catas_err.map)) { + dump_err_buf(dev); + goto internal_err; + } + + if (dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR) { + mlx4_warn(dev, "Internal error mark was detected on device\n"); + goto internal_err; + } + + mod_timer(&priv->catas_err.timer, + round_jiffies(jiffies + MLX4_CATAS_POLL_INTERVAL)); + return; + +internal_err: + if (mlx4_internal_err_reset) + queue_work(dev->persist->catas_wq, &dev->persist->catas_work); +} + +static void catas_reset(struct work_struct *work) +{ + struct mlx4_dev_persistent *persist = + container_of(work, struct mlx4_dev_persistent, + catas_work); + + mlx4_handle_error_state(persist); +} + +void mlx4_start_catas_poll(struct mlx4_dev *dev) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + phys_addr_t addr; + + INIT_LIST_HEAD(&priv->catas_err.list); + timer_setup(&priv->catas_err.timer, poll_catas, 0); + priv->catas_err.map = NULL; + + if (!mlx4_is_slave(dev)) { + addr = pci_resource_start(dev->persist->pdev, + priv->fw.catas_bar) + + priv->fw.catas_offset; + + priv->catas_err.map = ioremap(addr, priv->fw.catas_size * 4); + if (!priv->catas_err.map) { + mlx4_warn(dev, "Failed to map internal error buffer at 0x%llx\n", + (unsigned long long)addr); + return; + } + } + + priv->catas_err.timer.expires = + round_jiffies(jiffies + MLX4_CATAS_POLL_INTERVAL); + add_timer(&priv->catas_err.timer); +} + +void mlx4_stop_catas_poll(struct mlx4_dev *dev) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + + del_timer_sync(&priv->catas_err.timer); + + if (priv->catas_err.map) { + iounmap(priv->catas_err.map); + priv->catas_err.map = NULL; + } + + if (dev->persist->interface_state & MLX4_INTERFACE_STATE_DELETION) + flush_workqueue(dev->persist->catas_wq); +} + +int mlx4_catas_init(struct mlx4_dev *dev) +{ + INIT_WORK(&dev->persist->catas_work, catas_reset); + dev->persist->catas_wq = create_singlethread_workqueue("mlx4_health"); + if (!dev->persist->catas_wq) + return -ENOMEM; + + return 0; +} + +void mlx4_catas_end(struct mlx4_dev *dev) +{ + if (dev->persist->catas_wq) { + destroy_workqueue(dev->persist->catas_wq); + dev->persist->catas_wq = NULL; + } +} diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c new file mode 100644 index 000000000..c56d2194c --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c @@ -0,0 +1,3428 @@ +/* + * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005, 2006, 2007, 2008 Mellanox Technologies. All rights reserved. + * Copyright (c) 2005, 2006, 2007 Cisco Systems, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/sched.h> +#include <linux/slab.h> +#include <linux/export.h> +#include <linux/pci.h> +#include <linux/errno.h> + +#include <linux/mlx4/cmd.h> +#include <linux/mlx4/device.h> +#include <linux/semaphore.h> +#include <rdma/ib_smi.h> +#include <linux/delay.h> +#include <linux/etherdevice.h> + +#include <asm/io.h> + +#include "mlx4.h" +#include "fw.h" +#include "fw_qos.h" +#include "mlx4_stats.h" + +#define CMD_POLL_TOKEN 0xffff +#define INBOX_MASK 0xffffffffffffff00ULL + +#define CMD_CHAN_VER 1 +#define CMD_CHAN_IF_REV 1 + +enum { + /* command completed successfully: */ + CMD_STAT_OK = 0x00, + /* Internal error (such as a bus error) occurred while processing command: */ + CMD_STAT_INTERNAL_ERR = 0x01, + /* Operation/command not supported or opcode modifier not supported: */ + CMD_STAT_BAD_OP = 0x02, + /* Parameter not supported or parameter out of range: */ + CMD_STAT_BAD_PARAM = 0x03, + /* System not enabled or bad system state: */ + CMD_STAT_BAD_SYS_STATE = 0x04, + /* Attempt to access reserved or unallocaterd resource: */ + CMD_STAT_BAD_RESOURCE = 0x05, + /* Requested resource is currently executing a command, or is otherwise busy: */ + CMD_STAT_RESOURCE_BUSY = 0x06, + /* Required capability exceeds device limits: */ + CMD_STAT_EXCEED_LIM = 0x08, + /* Resource is not in the appropriate state or ownership: */ + CMD_STAT_BAD_RES_STATE = 0x09, + /* Index out of range: */ + CMD_STAT_BAD_INDEX = 0x0a, + /* FW image corrupted: */ + CMD_STAT_BAD_NVMEM = 0x0b, + /* Error in ICM mapping (e.g. not enough auxiliary ICM pages to execute command): */ + CMD_STAT_ICM_ERROR = 0x0c, + /* Attempt to modify a QP/EE which is not in the presumed state: */ + CMD_STAT_BAD_QP_STATE = 0x10, + /* Bad segment parameters (Address/Size): */ + CMD_STAT_BAD_SEG_PARAM = 0x20, + /* Memory Region has Memory Windows bound to: */ + CMD_STAT_REG_BOUND = 0x21, + /* HCA local attached memory not present: */ + CMD_STAT_LAM_NOT_PRE = 0x22, + /* Bad management packet (silently discarded): */ + CMD_STAT_BAD_PKT = 0x30, + /* More outstanding CQEs in CQ than new CQ size: */ + CMD_STAT_BAD_SIZE = 0x40, + /* Multi Function device support required: */ + CMD_STAT_MULTI_FUNC_REQ = 0x50, +}; + +enum { + HCR_IN_PARAM_OFFSET = 0x00, + HCR_IN_MODIFIER_OFFSET = 0x08, + HCR_OUT_PARAM_OFFSET = 0x0c, + HCR_TOKEN_OFFSET = 0x14, + HCR_STATUS_OFFSET = 0x18, + + HCR_OPMOD_SHIFT = 12, + HCR_T_BIT = 21, + HCR_E_BIT = 22, + HCR_GO_BIT = 23 +}; + +enum { + GO_BIT_TIMEOUT_MSECS = 10000 +}; + +enum mlx4_vlan_transition { + MLX4_VLAN_TRANSITION_VST_VST = 0, + MLX4_VLAN_TRANSITION_VST_VGT = 1, + MLX4_VLAN_TRANSITION_VGT_VST = 2, + MLX4_VLAN_TRANSITION_VGT_VGT = 3, +}; + + +struct mlx4_cmd_context { + struct completion done; + int result; + int next; + u64 out_param; + u16 token; + u8 fw_status; +}; + +static int mlx4_master_process_vhcr(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr_cmd *in_vhcr); + +static int mlx4_status_to_errno(u8 status) +{ + static const int trans_table[] = { + [CMD_STAT_INTERNAL_ERR] = -EIO, + [CMD_STAT_BAD_OP] = -EPERM, + [CMD_STAT_BAD_PARAM] = -EINVAL, + [CMD_STAT_BAD_SYS_STATE] = -ENXIO, + [CMD_STAT_BAD_RESOURCE] = -EBADF, + [CMD_STAT_RESOURCE_BUSY] = -EBUSY, + [CMD_STAT_EXCEED_LIM] = -ENOMEM, + [CMD_STAT_BAD_RES_STATE] = -EBADF, + [CMD_STAT_BAD_INDEX] = -EBADF, + [CMD_STAT_BAD_NVMEM] = -EFAULT, + [CMD_STAT_ICM_ERROR] = -ENFILE, + [CMD_STAT_BAD_QP_STATE] = -EINVAL, + [CMD_STAT_BAD_SEG_PARAM] = -EFAULT, + [CMD_STAT_REG_BOUND] = -EBUSY, + [CMD_STAT_LAM_NOT_PRE] = -EAGAIN, + [CMD_STAT_BAD_PKT] = -EINVAL, + [CMD_STAT_BAD_SIZE] = -ENOMEM, + [CMD_STAT_MULTI_FUNC_REQ] = -EACCES, + }; + + if (status >= ARRAY_SIZE(trans_table) || + (status != CMD_STAT_OK && trans_table[status] == 0)) + return -EIO; + + return trans_table[status]; +} + +static u8 mlx4_errno_to_status(int errno) +{ + switch (errno) { + case -EPERM: + return CMD_STAT_BAD_OP; + case -EINVAL: + return CMD_STAT_BAD_PARAM; + case -ENXIO: + return CMD_STAT_BAD_SYS_STATE; + case -EBUSY: + return CMD_STAT_RESOURCE_BUSY; + case -ENOMEM: + return CMD_STAT_EXCEED_LIM; + case -ENFILE: + return CMD_STAT_ICM_ERROR; + default: + return CMD_STAT_INTERNAL_ERR; + } +} + +static int mlx4_internal_err_ret_value(struct mlx4_dev *dev, u16 op, + u8 op_modifier) +{ + switch (op) { + case MLX4_CMD_UNMAP_ICM: + case MLX4_CMD_UNMAP_ICM_AUX: + case MLX4_CMD_UNMAP_FA: + case MLX4_CMD_2RST_QP: + case MLX4_CMD_HW2SW_EQ: + case MLX4_CMD_HW2SW_CQ: + case MLX4_CMD_HW2SW_SRQ: + case MLX4_CMD_HW2SW_MPT: + case MLX4_CMD_CLOSE_HCA: + case MLX4_QP_FLOW_STEERING_DETACH: + case MLX4_CMD_FREE_RES: + case MLX4_CMD_CLOSE_PORT: + return CMD_STAT_OK; + + case MLX4_CMD_QP_ATTACH: + /* On Detach case return success */ + if (op_modifier == 0) + return CMD_STAT_OK; + return mlx4_status_to_errno(CMD_STAT_INTERNAL_ERR); + + default: + return mlx4_status_to_errno(CMD_STAT_INTERNAL_ERR); + } +} + +static int mlx4_closing_cmd_fatal_error(u16 op, u8 fw_status) +{ + /* Any error during the closing commands below is considered fatal */ + if (op == MLX4_CMD_CLOSE_HCA || + op == MLX4_CMD_HW2SW_EQ || + op == MLX4_CMD_HW2SW_CQ || + op == MLX4_CMD_2RST_QP || + op == MLX4_CMD_HW2SW_SRQ || + op == MLX4_CMD_SYNC_TPT || + op == MLX4_CMD_UNMAP_ICM || + op == MLX4_CMD_UNMAP_ICM_AUX || + op == MLX4_CMD_UNMAP_FA) + return 1; + /* Error on MLX4_CMD_HW2SW_MPT is fatal except when fw status equals + * CMD_STAT_REG_BOUND. + * This status indicates that memory region has memory windows bound to it + * which may result from invalid user space usage and is not fatal. + */ + if (op == MLX4_CMD_HW2SW_MPT && fw_status != CMD_STAT_REG_BOUND) + return 1; + return 0; +} + +static int mlx4_cmd_reset_flow(struct mlx4_dev *dev, u16 op, u8 op_modifier, + int err) +{ + /* Only if reset flow is really active return code is based on + * command, otherwise current error code is returned. + */ + if (mlx4_internal_err_reset) { + mlx4_enter_error_state(dev->persist); + err = mlx4_internal_err_ret_value(dev, op, op_modifier); + } + + return err; +} + +static int comm_pending(struct mlx4_dev *dev) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + u32 status = readl(&priv->mfunc.comm->slave_read); + + return (swab32(status) >> 31) != priv->cmd.comm_toggle; +} + +static int mlx4_comm_cmd_post(struct mlx4_dev *dev, u8 cmd, u16 param) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + u32 val; + + /* To avoid writing to unknown addresses after the device state was + * changed to internal error and the function was rest, + * check the INTERNAL_ERROR flag which is updated under + * device_state_mutex lock. + */ + mutex_lock(&dev->persist->device_state_mutex); + + if (dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR) { + mutex_unlock(&dev->persist->device_state_mutex); + return -EIO; + } + + priv->cmd.comm_toggle ^= 1; + val = param | (cmd << 16) | (priv->cmd.comm_toggle << 31); + __raw_writel((__force u32) cpu_to_be32(val), + &priv->mfunc.comm->slave_write); + mutex_unlock(&dev->persist->device_state_mutex); + return 0; +} + +static int mlx4_comm_cmd_poll(struct mlx4_dev *dev, u8 cmd, u16 param, + unsigned long timeout) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + unsigned long end; + int err = 0; + int ret_from_pending = 0; + + /* First, verify that the master reports correct status */ + if (comm_pending(dev)) { + mlx4_warn(dev, "Communication channel is not idle - my toggle is %d (cmd:0x%x)\n", + priv->cmd.comm_toggle, cmd); + return -EAGAIN; + } + + /* Write command */ + down(&priv->cmd.poll_sem); + if (mlx4_comm_cmd_post(dev, cmd, param)) { + /* Only in case the device state is INTERNAL_ERROR, + * mlx4_comm_cmd_post returns with an error + */ + err = mlx4_status_to_errno(CMD_STAT_INTERNAL_ERR); + goto out; + } + + end = msecs_to_jiffies(timeout) + jiffies; + while (comm_pending(dev) && time_before(jiffies, end)) + cond_resched(); + ret_from_pending = comm_pending(dev); + if (ret_from_pending) { + /* check if the slave is trying to boot in the middle of + * FLR process. The only non-zero result in the RESET command + * is MLX4_DELAY_RESET_SLAVE*/ + if ((MLX4_COMM_CMD_RESET == cmd)) { + err = MLX4_DELAY_RESET_SLAVE; + goto out; + } else { + mlx4_warn(dev, "Communication channel command 0x%x timed out\n", + cmd); + err = mlx4_status_to_errno(CMD_STAT_INTERNAL_ERR); + } + } + + if (err) + mlx4_enter_error_state(dev->persist); +out: + up(&priv->cmd.poll_sem); + return err; +} + +static int mlx4_comm_cmd_wait(struct mlx4_dev *dev, u8 vhcr_cmd, + u16 param, u16 op, unsigned long timeout) +{ + struct mlx4_cmd *cmd = &mlx4_priv(dev)->cmd; + struct mlx4_cmd_context *context; + unsigned long end; + int err = 0; + + down(&cmd->event_sem); + + spin_lock(&cmd->context_lock); + BUG_ON(cmd->free_head < 0); + context = &cmd->context[cmd->free_head]; + context->token += cmd->token_mask + 1; + cmd->free_head = context->next; + spin_unlock(&cmd->context_lock); + + reinit_completion(&context->done); + + if (mlx4_comm_cmd_post(dev, vhcr_cmd, param)) { + /* Only in case the device state is INTERNAL_ERROR, + * mlx4_comm_cmd_post returns with an error + */ + err = mlx4_status_to_errno(CMD_STAT_INTERNAL_ERR); + goto out; + } + + if (!wait_for_completion_timeout(&context->done, + msecs_to_jiffies(timeout))) { + mlx4_warn(dev, "communication channel command 0x%x (op=0x%x) timed out\n", + vhcr_cmd, op); + goto out_reset; + } + + err = context->result; + if (err && context->fw_status != CMD_STAT_MULTI_FUNC_REQ) { + mlx4_err(dev, "command 0x%x failed: fw status = 0x%x\n", + vhcr_cmd, context->fw_status); + if (mlx4_closing_cmd_fatal_error(op, context->fw_status)) + goto out_reset; + } + + /* wait for comm channel ready + * this is necessary for prevention the race + * when switching between event to polling mode + * Skipping this section in case the device is in FATAL_ERROR state, + * In this state, no commands are sent via the comm channel until + * the device has returned from reset. + */ + if (!(dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR)) { + end = msecs_to_jiffies(timeout) + jiffies; + while (comm_pending(dev) && time_before(jiffies, end)) + cond_resched(); + } + goto out; + +out_reset: + err = mlx4_status_to_errno(CMD_STAT_INTERNAL_ERR); + mlx4_enter_error_state(dev->persist); +out: + spin_lock(&cmd->context_lock); + context->next = cmd->free_head; + cmd->free_head = context - cmd->context; + spin_unlock(&cmd->context_lock); + + up(&cmd->event_sem); + return err; +} + +int mlx4_comm_cmd(struct mlx4_dev *dev, u8 cmd, u16 param, + u16 op, unsigned long timeout) +{ + if (dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR) + return mlx4_status_to_errno(CMD_STAT_INTERNAL_ERR); + + if (mlx4_priv(dev)->cmd.use_events) + return mlx4_comm_cmd_wait(dev, cmd, param, op, timeout); + return mlx4_comm_cmd_poll(dev, cmd, param, timeout); +} + +static int cmd_pending(struct mlx4_dev *dev) +{ + u32 status; + + if (pci_channel_offline(dev->persist->pdev)) + return -EIO; + + status = readl(mlx4_priv(dev)->cmd.hcr + HCR_STATUS_OFFSET); + + return (status & swab32(1 << HCR_GO_BIT)) || + (mlx4_priv(dev)->cmd.toggle == + !!(status & swab32(1 << HCR_T_BIT))); +} + +static int mlx4_cmd_post(struct mlx4_dev *dev, u64 in_param, u64 out_param, + u32 in_modifier, u8 op_modifier, u16 op, u16 token, + int event) +{ + struct mlx4_cmd *cmd = &mlx4_priv(dev)->cmd; + u32 __iomem *hcr = cmd->hcr; + int ret = -EIO; + unsigned long end; + + mutex_lock(&dev->persist->device_state_mutex); + /* To avoid writing to unknown addresses after the device state was + * changed to internal error and the chip was reset, + * check the INTERNAL_ERROR flag which is updated under + * device_state_mutex lock. + */ + if (pci_channel_offline(dev->persist->pdev) || + (dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR)) { + /* + * Device is going through error recovery + * and cannot accept commands. + */ + goto out; + } + + end = jiffies; + if (event) + end += msecs_to_jiffies(GO_BIT_TIMEOUT_MSECS); + + while (cmd_pending(dev)) { + if (pci_channel_offline(dev->persist->pdev)) { + /* + * Device is going through error recovery + * and cannot accept commands. + */ + goto out; + } + + if (time_after_eq(jiffies, end)) { + mlx4_err(dev, "%s:cmd_pending failed\n", __func__); + goto out; + } + cond_resched(); + } + + /* + * We use writel (instead of something like memcpy_toio) + * because writes of less than 32 bits to the HCR don't work + * (and some architectures such as ia64 implement memcpy_toio + * in terms of writeb). + */ + __raw_writel((__force u32) cpu_to_be32(in_param >> 32), hcr + 0); + __raw_writel((__force u32) cpu_to_be32(in_param & 0xfffffffful), hcr + 1); + __raw_writel((__force u32) cpu_to_be32(in_modifier), hcr + 2); + __raw_writel((__force u32) cpu_to_be32(out_param >> 32), hcr + 3); + __raw_writel((__force u32) cpu_to_be32(out_param & 0xfffffffful), hcr + 4); + __raw_writel((__force u32) cpu_to_be32(token << 16), hcr + 5); + + /* __raw_writel may not order writes. */ + wmb(); + + __raw_writel((__force u32) cpu_to_be32((1 << HCR_GO_BIT) | + (cmd->toggle << HCR_T_BIT) | + (event ? (1 << HCR_E_BIT) : 0) | + (op_modifier << HCR_OPMOD_SHIFT) | + op), hcr + 6); + + cmd->toggle = cmd->toggle ^ 1; + + ret = 0; + +out: + if (ret) + mlx4_warn(dev, "Could not post command 0x%x: ret=%d, in_param=0x%llx, in_mod=0x%x, op_mod=0x%x\n", + op, ret, in_param, in_modifier, op_modifier); + mutex_unlock(&dev->persist->device_state_mutex); + + return ret; +} + +static int mlx4_slave_cmd(struct mlx4_dev *dev, u64 in_param, u64 *out_param, + int out_is_imm, u32 in_modifier, u8 op_modifier, + u16 op, unsigned long timeout) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_vhcr_cmd *vhcr = priv->mfunc.vhcr; + int ret; + + mutex_lock(&priv->cmd.slave_cmd_mutex); + + vhcr->in_param = cpu_to_be64(in_param); + vhcr->out_param = out_param ? cpu_to_be64(*out_param) : 0; + vhcr->in_modifier = cpu_to_be32(in_modifier); + vhcr->opcode = cpu_to_be16((((u16) op_modifier) << 12) | (op & 0xfff)); + vhcr->token = cpu_to_be16(CMD_POLL_TOKEN); + vhcr->status = 0; + vhcr->flags = !!(priv->cmd.use_events) << 6; + + if (mlx4_is_master(dev)) { + ret = mlx4_master_process_vhcr(dev, dev->caps.function, vhcr); + if (!ret) { + if (out_is_imm) { + if (out_param) + *out_param = + be64_to_cpu(vhcr->out_param); + else { + mlx4_err(dev, "response expected while output mailbox is NULL for command 0x%x\n", + op); + vhcr->status = CMD_STAT_BAD_PARAM; + } + } + ret = mlx4_status_to_errno(vhcr->status); + } + if (ret && + dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR) + ret = mlx4_internal_err_ret_value(dev, op, op_modifier); + } else { + ret = mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR_POST, 0, op, + MLX4_COMM_TIME + timeout); + if (!ret) { + if (out_is_imm) { + if (out_param) + *out_param = + be64_to_cpu(vhcr->out_param); + else { + mlx4_err(dev, "response expected while output mailbox is NULL for command 0x%x\n", + op); + vhcr->status = CMD_STAT_BAD_PARAM; + } + } + ret = mlx4_status_to_errno(vhcr->status); + } else { + if (dev->persist->state & + MLX4_DEVICE_STATE_INTERNAL_ERROR) + ret = mlx4_internal_err_ret_value(dev, op, + op_modifier); + else + mlx4_err(dev, "failed execution of VHCR_POST command opcode 0x%x\n", op); + } + } + + mutex_unlock(&priv->cmd.slave_cmd_mutex); + return ret; +} + +static int mlx4_cmd_poll(struct mlx4_dev *dev, u64 in_param, u64 *out_param, + int out_is_imm, u32 in_modifier, u8 op_modifier, + u16 op, unsigned long timeout) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + void __iomem *hcr = priv->cmd.hcr; + int err = 0; + unsigned long end; + u32 stat; + + down(&priv->cmd.poll_sem); + + if (dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR) { + /* + * Device is going through error recovery + * and cannot accept commands. + */ + err = mlx4_internal_err_ret_value(dev, op, op_modifier); + goto out; + } + + if (out_is_imm && !out_param) { + mlx4_err(dev, "response expected while output mailbox is NULL for command 0x%x\n", + op); + err = -EINVAL; + goto out; + } + + err = mlx4_cmd_post(dev, in_param, out_param ? *out_param : 0, + in_modifier, op_modifier, op, CMD_POLL_TOKEN, 0); + if (err) + goto out_reset; + + end = msecs_to_jiffies(timeout) + jiffies; + while (cmd_pending(dev) && time_before(jiffies, end)) { + if (pci_channel_offline(dev->persist->pdev)) { + /* + * Device is going through error recovery + * and cannot accept commands. + */ + err = -EIO; + goto out_reset; + } + + if (dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR) { + err = mlx4_internal_err_ret_value(dev, op, op_modifier); + goto out; + } + + cond_resched(); + } + + if (cmd_pending(dev)) { + mlx4_warn(dev, "command 0x%x timed out (go bit not cleared)\n", + op); + err = -EIO; + goto out_reset; + } + + if (out_is_imm) + *out_param = + (u64) be32_to_cpu((__force __be32) + __raw_readl(hcr + HCR_OUT_PARAM_OFFSET)) << 32 | + (u64) be32_to_cpu((__force __be32) + __raw_readl(hcr + HCR_OUT_PARAM_OFFSET + 4)); + stat = be32_to_cpu((__force __be32) + __raw_readl(hcr + HCR_STATUS_OFFSET)) >> 24; + err = mlx4_status_to_errno(stat); + if (err) { + mlx4_err(dev, "command 0x%x failed: fw status = 0x%x\n", + op, stat); + if (mlx4_closing_cmd_fatal_error(op, stat)) + goto out_reset; + goto out; + } + +out_reset: + if (err) + err = mlx4_cmd_reset_flow(dev, op, op_modifier, err); +out: + up(&priv->cmd.poll_sem); + return err; +} + +void mlx4_cmd_event(struct mlx4_dev *dev, u16 token, u8 status, u64 out_param) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_cmd_context *context = + &priv->cmd.context[token & priv->cmd.token_mask]; + + /* previously timed out command completing at long last */ + if (token != context->token) + return; + + context->fw_status = status; + context->result = mlx4_status_to_errno(status); + context->out_param = out_param; + + complete(&context->done); +} + +static int mlx4_cmd_wait(struct mlx4_dev *dev, u64 in_param, u64 *out_param, + int out_is_imm, u32 in_modifier, u8 op_modifier, + u16 op, unsigned long timeout) +{ + struct mlx4_cmd *cmd = &mlx4_priv(dev)->cmd; + struct mlx4_cmd_context *context; + long ret_wait; + int err = 0; + + down(&cmd->event_sem); + + spin_lock(&cmd->context_lock); + BUG_ON(cmd->free_head < 0); + context = &cmd->context[cmd->free_head]; + context->token += cmd->token_mask + 1; + cmd->free_head = context->next; + spin_unlock(&cmd->context_lock); + + if (out_is_imm && !out_param) { + mlx4_err(dev, "response expected while output mailbox is NULL for command 0x%x\n", + op); + err = -EINVAL; + goto out; + } + + reinit_completion(&context->done); + + err = mlx4_cmd_post(dev, in_param, out_param ? *out_param : 0, + in_modifier, op_modifier, op, context->token, 1); + if (err) + goto out_reset; + + if (op == MLX4_CMD_SENSE_PORT) { + ret_wait = + wait_for_completion_interruptible_timeout(&context->done, + msecs_to_jiffies(timeout)); + if (ret_wait < 0) { + context->fw_status = 0; + context->out_param = 0; + context->result = 0; + } + } else { + ret_wait = (long)wait_for_completion_timeout(&context->done, + msecs_to_jiffies(timeout)); + } + if (!ret_wait) { + mlx4_warn(dev, "command 0x%x timed out (go bit not cleared)\n", + op); + if (op == MLX4_CMD_NOP) { + err = -EBUSY; + goto out; + } else { + err = -EIO; + goto out_reset; + } + } + + err = context->result; + if (err) { + /* Since we do not want to have this error message always + * displayed at driver start when there are ConnectX2 HCAs + * on the host, we deprecate the error message for this + * specific command/input_mod/opcode_mod/fw-status to be debug. + */ + if (op == MLX4_CMD_SET_PORT && + (in_modifier == 1 || in_modifier == 2) && + op_modifier == MLX4_SET_PORT_IB_OPCODE && + context->fw_status == CMD_STAT_BAD_SIZE) + mlx4_dbg(dev, "command 0x%x failed: fw status = 0x%x\n", + op, context->fw_status); + else + mlx4_err(dev, "command 0x%x failed: fw status = 0x%x\n", + op, context->fw_status); + if (dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR) + err = mlx4_internal_err_ret_value(dev, op, op_modifier); + else if (mlx4_closing_cmd_fatal_error(op, context->fw_status)) + goto out_reset; + + goto out; + } + + if (out_is_imm) + *out_param = context->out_param; + +out_reset: + if (err) + err = mlx4_cmd_reset_flow(dev, op, op_modifier, err); +out: + spin_lock(&cmd->context_lock); + context->next = cmd->free_head; + cmd->free_head = context - cmd->context; + spin_unlock(&cmd->context_lock); + + up(&cmd->event_sem); + return err; +} + +int __mlx4_cmd(struct mlx4_dev *dev, u64 in_param, u64 *out_param, + int out_is_imm, u32 in_modifier, u8 op_modifier, + u16 op, unsigned long timeout, int native) +{ + if (pci_channel_offline(dev->persist->pdev)) + return mlx4_cmd_reset_flow(dev, op, op_modifier, -EIO); + + if (!mlx4_is_mfunc(dev) || (native && mlx4_is_master(dev))) { + int ret; + + if (dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR) + return mlx4_internal_err_ret_value(dev, op, + op_modifier); + down_read(&mlx4_priv(dev)->cmd.switch_sem); + if (mlx4_priv(dev)->cmd.use_events) + ret = mlx4_cmd_wait(dev, in_param, out_param, + out_is_imm, in_modifier, + op_modifier, op, timeout); + else + ret = mlx4_cmd_poll(dev, in_param, out_param, + out_is_imm, in_modifier, + op_modifier, op, timeout); + + up_read(&mlx4_priv(dev)->cmd.switch_sem); + return ret; + } + return mlx4_slave_cmd(dev, in_param, out_param, out_is_imm, + in_modifier, op_modifier, op, timeout); +} +EXPORT_SYMBOL_GPL(__mlx4_cmd); + + +int mlx4_ARM_COMM_CHANNEL(struct mlx4_dev *dev) +{ + return mlx4_cmd(dev, 0, 0, 0, MLX4_CMD_ARM_COMM_CHANNEL, + MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE); +} + +static int mlx4_ACCESS_MEM(struct mlx4_dev *dev, u64 master_addr, + int slave, u64 slave_addr, + int size, int is_read) +{ + u64 in_param; + u64 out_param; + + if ((slave_addr & 0xfff) | (master_addr & 0xfff) | + (slave & ~0x7f) | (size & 0xff)) { + mlx4_err(dev, "Bad access mem params - slave_addr:0x%llx master_addr:0x%llx slave_id:%d size:%d\n", + slave_addr, master_addr, slave, size); + return -EINVAL; + } + + if (is_read) { + in_param = (u64) slave | slave_addr; + out_param = (u64) dev->caps.function | master_addr; + } else { + in_param = (u64) dev->caps.function | master_addr; + out_param = (u64) slave | slave_addr; + } + + return mlx4_cmd_imm(dev, in_param, &out_param, size, 0, + MLX4_CMD_ACCESS_MEM, + MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); +} + +static int query_pkey_block(struct mlx4_dev *dev, u8 port, u16 index, u16 *pkey, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox) +{ + struct ib_smp *in_mad = (struct ib_smp *)(inbox->buf); + struct ib_smp *out_mad = (struct ib_smp *)(outbox->buf); + int err; + int i; + + if (index & 0x1f) + return -EINVAL; + + in_mad->attr_mod = cpu_to_be32(index / 32); + + err = mlx4_cmd_box(dev, inbox->dma, outbox->dma, port, 3, + MLX4_CMD_MAD_IFC, MLX4_CMD_TIME_CLASS_C, + MLX4_CMD_NATIVE); + if (err) + return err; + + for (i = 0; i < 32; ++i) + pkey[i] = be16_to_cpu(((__be16 *) out_mad->data)[i]); + + return err; +} + +static int get_full_pkey_table(struct mlx4_dev *dev, u8 port, u16 *table, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox) +{ + int i; + int err; + + for (i = 0; i < dev->caps.pkey_table_len[port]; i += 32) { + err = query_pkey_block(dev, port, i, table + i, inbox, outbox); + if (err) + return err; + } + + return 0; +} +#define PORT_CAPABILITY_LOCATION_IN_SMP 20 +#define PORT_STATE_OFFSET 32 + +static enum ib_port_state vf_port_state(struct mlx4_dev *dev, int port, int vf) +{ + if (mlx4_get_slave_port_state(dev, vf, port) == SLAVE_PORT_UP) + return IB_PORT_ACTIVE; + else + return IB_PORT_DOWN; +} + +static int mlx4_MAD_IFC_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd) +{ + struct ib_smp *smp = inbox->buf; + u32 index; + u8 port, slave_port; + u8 opcode_modifier; + u16 *table; + int err; + int vidx, pidx; + int network_view; + struct mlx4_priv *priv = mlx4_priv(dev); + struct ib_smp *outsmp = outbox->buf; + __be16 *outtab = (__be16 *)(outsmp->data); + __be32 slave_cap_mask; + __be64 slave_node_guid; + + slave_port = vhcr->in_modifier; + port = mlx4_slave_convert_port(dev, slave, slave_port); + + /* network-view bit is for driver use only, and should not be passed to FW */ + opcode_modifier = vhcr->op_modifier & ~0x8; /* clear netw view bit */ + network_view = !!(vhcr->op_modifier & 0x8); + + if (smp->base_version == 1 && + smp->mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED && + smp->class_version == 1) { + /* host view is paravirtualized */ + if (!network_view && smp->method == IB_MGMT_METHOD_GET) { + if (smp->attr_id == IB_SMP_ATTR_PKEY_TABLE) { + index = be32_to_cpu(smp->attr_mod); + if (port < 1 || port > dev->caps.num_ports) + return -EINVAL; + table = kcalloc((dev->caps.pkey_table_len[port] / 32) + 1, + sizeof(*table) * 32, GFP_KERNEL); + + if (!table) + return -ENOMEM; + /* need to get the full pkey table because the paravirtualized + * pkeys may be scattered among several pkey blocks. + */ + err = get_full_pkey_table(dev, port, table, inbox, outbox); + if (!err) { + for (vidx = index * 32; vidx < (index + 1) * 32; ++vidx) { + pidx = priv->virt2phys_pkey[slave][port - 1][vidx]; + outtab[vidx % 32] = cpu_to_be16(table[pidx]); + } + } + kfree(table); + return err; + } + if (smp->attr_id == IB_SMP_ATTR_PORT_INFO) { + /*get the slave specific caps:*/ + /*do the command */ + smp->attr_mod = cpu_to_be32(port); + err = mlx4_cmd_box(dev, inbox->dma, outbox->dma, + port, opcode_modifier, + vhcr->op, MLX4_CMD_TIME_CLASS_C, MLX4_CMD_NATIVE); + /* modify the response for slaves */ + if (!err && slave != mlx4_master_func_num(dev)) { + u8 *state = outsmp->data + PORT_STATE_OFFSET; + + *state = (*state & 0xf0) | vf_port_state(dev, port, slave); + slave_cap_mask = priv->mfunc.master.slave_state[slave].ib_cap_mask[port]; + memcpy(outsmp->data + PORT_CAPABILITY_LOCATION_IN_SMP, &slave_cap_mask, 4); + } + return err; + } + if (smp->attr_id == IB_SMP_ATTR_GUID_INFO) { + __be64 guid = mlx4_get_admin_guid(dev, slave, + port); + + /* set the PF admin guid to the FW/HW burned + * GUID, if it wasn't yet set + */ + if (slave == 0 && guid == 0) { + smp->attr_mod = 0; + err = mlx4_cmd_box(dev, + inbox->dma, + outbox->dma, + vhcr->in_modifier, + opcode_modifier, + vhcr->op, + MLX4_CMD_TIME_CLASS_C, + MLX4_CMD_NATIVE); + if (err) + return err; + mlx4_set_admin_guid(dev, + *(__be64 *)outsmp-> + data, slave, port); + } else { + memcpy(outsmp->data, &guid, 8); + } + + /* clean all other gids */ + memset(outsmp->data + 8, 0, 56); + return 0; + } + if (smp->attr_id == IB_SMP_ATTR_NODE_INFO) { + err = mlx4_cmd_box(dev, inbox->dma, outbox->dma, + port, opcode_modifier, + vhcr->op, MLX4_CMD_TIME_CLASS_C, MLX4_CMD_NATIVE); + if (!err) { + slave_node_guid = mlx4_get_slave_node_guid(dev, slave); + memcpy(outsmp->data + 12, &slave_node_guid, 8); + } + return err; + } + } + } + + /* Non-privileged VFs are only allowed "host" view LID-routed 'Get' MADs. + * These are the MADs used by ib verbs (such as ib_query_gids). + */ + if (slave != mlx4_master_func_num(dev) && + !mlx4_vf_smi_enabled(dev, slave, port)) { + if (!(smp->mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED && + smp->method == IB_MGMT_METHOD_GET) || network_view) { + mlx4_err(dev, "Unprivileged slave %d is trying to execute a Subnet MGMT MAD, class 0x%x, method 0x%x, view=%s for attr 0x%x. Rejecting\n", + slave, smp->mgmt_class, smp->method, + network_view ? "Network" : "Host", + be16_to_cpu(smp->attr_id)); + return -EPERM; + } + } + + return mlx4_cmd_box(dev, inbox->dma, outbox->dma, + vhcr->in_modifier, opcode_modifier, + vhcr->op, MLX4_CMD_TIME_CLASS_C, MLX4_CMD_NATIVE); +} + +static int mlx4_CMD_EPERM_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd) +{ + return -EPERM; +} + +int mlx4_DMA_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd) +{ + u64 in_param; + u64 out_param; + int err; + + in_param = cmd->has_inbox ? (u64) inbox->dma : vhcr->in_param; + out_param = cmd->has_outbox ? (u64) outbox->dma : vhcr->out_param; + if (cmd->encode_slave_id) { + in_param &= 0xffffffffffffff00ll; + in_param |= slave; + } + + err = __mlx4_cmd(dev, in_param, &out_param, cmd->out_is_imm, + vhcr->in_modifier, vhcr->op_modifier, vhcr->op, + MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); + + if (cmd->out_is_imm) + vhcr->out_param = out_param; + + return err; +} + +static struct mlx4_cmd_info cmd_info[] = { + { + .opcode = MLX4_CMD_QUERY_FW, + .has_inbox = false, + .has_outbox = true, + .out_is_imm = false, + .encode_slave_id = false, + .verify = NULL, + .wrapper = mlx4_QUERY_FW_wrapper + }, + { + .opcode = MLX4_CMD_QUERY_HCA, + .has_inbox = false, + .has_outbox = true, + .out_is_imm = false, + .encode_slave_id = false, + .verify = NULL, + .wrapper = NULL + }, + { + .opcode = MLX4_CMD_QUERY_DEV_CAP, + .has_inbox = false, + .has_outbox = true, + .out_is_imm = false, + .encode_slave_id = false, + .verify = NULL, + .wrapper = mlx4_QUERY_DEV_CAP_wrapper + }, + { + .opcode = MLX4_CMD_QUERY_FUNC_CAP, + .has_inbox = false, + .has_outbox = true, + .out_is_imm = false, + .encode_slave_id = false, + .verify = NULL, + .wrapper = mlx4_QUERY_FUNC_CAP_wrapper + }, + { + .opcode = MLX4_CMD_QUERY_ADAPTER, + .has_inbox = false, + .has_outbox = true, + .out_is_imm = false, + .encode_slave_id = false, + .verify = NULL, + .wrapper = NULL + }, + { + .opcode = MLX4_CMD_INIT_PORT, + .has_inbox = false, + .has_outbox = false, + .out_is_imm = false, + .encode_slave_id = false, + .verify = NULL, + .wrapper = mlx4_INIT_PORT_wrapper + }, + { + .opcode = MLX4_CMD_CLOSE_PORT, + .has_inbox = false, + .has_outbox = false, + .out_is_imm = false, + .encode_slave_id = false, + .verify = NULL, + .wrapper = mlx4_CLOSE_PORT_wrapper + }, + { + .opcode = MLX4_CMD_QUERY_PORT, + .has_inbox = false, + .has_outbox = true, + .out_is_imm = false, + .encode_slave_id = false, + .verify = NULL, + .wrapper = mlx4_QUERY_PORT_wrapper + }, + { + .opcode = MLX4_CMD_SET_PORT, + .has_inbox = true, + .has_outbox = false, + .out_is_imm = false, + .encode_slave_id = false, + .verify = NULL, + .wrapper = mlx4_SET_PORT_wrapper + }, + { + .opcode = MLX4_CMD_MAP_EQ, + .has_inbox = false, + .has_outbox = false, + .out_is_imm = false, + .encode_slave_id = false, + .verify = NULL, + .wrapper = mlx4_MAP_EQ_wrapper + }, + { + .opcode = MLX4_CMD_SW2HW_EQ, + .has_inbox = true, + .has_outbox = false, + .out_is_imm = false, + .encode_slave_id = true, + .verify = NULL, + .wrapper = mlx4_SW2HW_EQ_wrapper + }, + { + .opcode = MLX4_CMD_HW_HEALTH_CHECK, + .has_inbox = false, + .has_outbox = false, + .out_is_imm = false, + .encode_slave_id = false, + .verify = NULL, + .wrapper = NULL + }, + { + .opcode = MLX4_CMD_NOP, + .has_inbox = false, + .has_outbox = false, + .out_is_imm = false, + .encode_slave_id = false, + .verify = NULL, + .wrapper = NULL + }, + { + .opcode = MLX4_CMD_CONFIG_DEV, + .has_inbox = false, + .has_outbox = true, + .out_is_imm = false, + .encode_slave_id = false, + .verify = NULL, + .wrapper = mlx4_CONFIG_DEV_wrapper + }, + { + .opcode = MLX4_CMD_ALLOC_RES, + .has_inbox = false, + .has_outbox = false, + .out_is_imm = true, + .encode_slave_id = false, + .verify = NULL, + .wrapper = mlx4_ALLOC_RES_wrapper + }, + { + .opcode = MLX4_CMD_FREE_RES, + .has_inbox = false, + .has_outbox = false, + .out_is_imm = false, + .encode_slave_id = false, + .verify = NULL, + .wrapper = mlx4_FREE_RES_wrapper + }, + { + .opcode = MLX4_CMD_SW2HW_MPT, + .has_inbox = true, + .has_outbox = false, + .out_is_imm = false, + .encode_slave_id = true, + .verify = NULL, + .wrapper = mlx4_SW2HW_MPT_wrapper + }, + { + .opcode = MLX4_CMD_QUERY_MPT, + .has_inbox = false, + .has_outbox = true, + .out_is_imm = false, + .encode_slave_id = false, + .verify = NULL, + .wrapper = mlx4_QUERY_MPT_wrapper + }, + { + .opcode = MLX4_CMD_HW2SW_MPT, + .has_inbox = false, + .has_outbox = false, + .out_is_imm = false, + .encode_slave_id = false, + .verify = NULL, + .wrapper = mlx4_HW2SW_MPT_wrapper + }, + { + .opcode = MLX4_CMD_READ_MTT, + .has_inbox = false, + .has_outbox = true, + .out_is_imm = false, + .encode_slave_id = false, + .verify = NULL, + .wrapper = NULL + }, + { + .opcode = MLX4_CMD_WRITE_MTT, + .has_inbox = true, + .has_outbox = false, + .out_is_imm = false, + .encode_slave_id = false, + .verify = NULL, + .wrapper = mlx4_WRITE_MTT_wrapper + }, + { + .opcode = MLX4_CMD_SYNC_TPT, + .has_inbox = true, + .has_outbox = false, + .out_is_imm = false, + .encode_slave_id = false, + .verify = NULL, + .wrapper = NULL + }, + { + .opcode = MLX4_CMD_HW2SW_EQ, + .has_inbox = false, + .has_outbox = false, + .out_is_imm = false, + .encode_slave_id = true, + .verify = NULL, + .wrapper = mlx4_HW2SW_EQ_wrapper + }, + { + .opcode = MLX4_CMD_QUERY_EQ, + .has_inbox = false, + .has_outbox = true, + .out_is_imm = false, + .encode_slave_id = true, + .verify = NULL, + .wrapper = mlx4_QUERY_EQ_wrapper + }, + { + .opcode = MLX4_CMD_SW2HW_CQ, + .has_inbox = true, + .has_outbox = false, + .out_is_imm = false, + .encode_slave_id = true, + .verify = NULL, + .wrapper = mlx4_SW2HW_CQ_wrapper + }, + { + .opcode = MLX4_CMD_HW2SW_CQ, + .has_inbox = false, + .has_outbox = false, + .out_is_imm = false, + .encode_slave_id = false, + .verify = NULL, + .wrapper = mlx4_HW2SW_CQ_wrapper + }, + { + .opcode = MLX4_CMD_QUERY_CQ, + .has_inbox = false, + .has_outbox = true, + .out_is_imm = false, + .encode_slave_id = false, + .verify = NULL, + .wrapper = mlx4_QUERY_CQ_wrapper + }, + { + .opcode = MLX4_CMD_MODIFY_CQ, + .has_inbox = true, + .has_outbox = false, + .out_is_imm = true, + .encode_slave_id = false, + .verify = NULL, + .wrapper = mlx4_MODIFY_CQ_wrapper + }, + { + .opcode = MLX4_CMD_SW2HW_SRQ, + .has_inbox = true, + .has_outbox = false, + .out_is_imm = false, + .encode_slave_id = true, + .verify = NULL, + .wrapper = mlx4_SW2HW_SRQ_wrapper + }, + { + .opcode = MLX4_CMD_HW2SW_SRQ, + .has_inbox = false, + .has_outbox = false, + .out_is_imm = false, + .encode_slave_id = false, + .verify = NULL, + .wrapper = mlx4_HW2SW_SRQ_wrapper + }, + { + .opcode = MLX4_CMD_QUERY_SRQ, + .has_inbox = false, + .has_outbox = true, + .out_is_imm = false, + .encode_slave_id = false, + .verify = NULL, + .wrapper = mlx4_QUERY_SRQ_wrapper + }, + { + .opcode = MLX4_CMD_ARM_SRQ, + .has_inbox = false, + .has_outbox = false, + .out_is_imm = false, + .encode_slave_id = false, + .verify = NULL, + .wrapper = mlx4_ARM_SRQ_wrapper + }, + { + .opcode = MLX4_CMD_RST2INIT_QP, + .has_inbox = true, + .has_outbox = false, + .out_is_imm = false, + .encode_slave_id = true, + .verify = NULL, + .wrapper = mlx4_RST2INIT_QP_wrapper + }, + { + .opcode = MLX4_CMD_INIT2INIT_QP, + .has_inbox = true, + .has_outbox = false, + .out_is_imm = false, + .encode_slave_id = false, + .verify = NULL, + .wrapper = mlx4_INIT2INIT_QP_wrapper + }, + { + .opcode = MLX4_CMD_INIT2RTR_QP, + .has_inbox = true, + .has_outbox = false, + .out_is_imm = false, + .encode_slave_id = false, + .verify = NULL, + .wrapper = mlx4_INIT2RTR_QP_wrapper + }, + { + .opcode = MLX4_CMD_RTR2RTS_QP, + .has_inbox = true, + .has_outbox = false, + .out_is_imm = false, + .encode_slave_id = false, + .verify = NULL, + .wrapper = mlx4_RTR2RTS_QP_wrapper + }, + { + .opcode = MLX4_CMD_RTS2RTS_QP, + .has_inbox = true, + .has_outbox = false, + .out_is_imm = false, + .encode_slave_id = false, + .verify = NULL, + .wrapper = mlx4_RTS2RTS_QP_wrapper + }, + { + .opcode = MLX4_CMD_SQERR2RTS_QP, + .has_inbox = true, + .has_outbox = false, + .out_is_imm = false, + .encode_slave_id = false, + .verify = NULL, + .wrapper = mlx4_SQERR2RTS_QP_wrapper + }, + { + .opcode = MLX4_CMD_2ERR_QP, + .has_inbox = false, + .has_outbox = false, + .out_is_imm = false, + .encode_slave_id = false, + .verify = NULL, + .wrapper = mlx4_GEN_QP_wrapper + }, + { + .opcode = MLX4_CMD_RTS2SQD_QP, + .has_inbox = false, + .has_outbox = false, + .out_is_imm = false, + .encode_slave_id = false, + .verify = NULL, + .wrapper = mlx4_GEN_QP_wrapper + }, + { + .opcode = MLX4_CMD_SQD2SQD_QP, + .has_inbox = true, + .has_outbox = false, + .out_is_imm = false, + .encode_slave_id = false, + .verify = NULL, + .wrapper = mlx4_SQD2SQD_QP_wrapper + }, + { + .opcode = MLX4_CMD_SQD2RTS_QP, + .has_inbox = true, + .has_outbox = false, + .out_is_imm = false, + .encode_slave_id = false, + .verify = NULL, + .wrapper = mlx4_SQD2RTS_QP_wrapper + }, + { + .opcode = MLX4_CMD_2RST_QP, + .has_inbox = false, + .has_outbox = false, + .out_is_imm = false, + .encode_slave_id = false, + .verify = NULL, + .wrapper = mlx4_2RST_QP_wrapper + }, + { + .opcode = MLX4_CMD_QUERY_QP, + .has_inbox = false, + .has_outbox = true, + .out_is_imm = false, + .encode_slave_id = false, + .verify = NULL, + .wrapper = mlx4_GEN_QP_wrapper + }, + { + .opcode = MLX4_CMD_SUSPEND_QP, + .has_inbox = false, + .has_outbox = false, + .out_is_imm = false, + .encode_slave_id = false, + .verify = NULL, + .wrapper = mlx4_GEN_QP_wrapper + }, + { + .opcode = MLX4_CMD_UNSUSPEND_QP, + .has_inbox = false, + .has_outbox = false, + .out_is_imm = false, + .encode_slave_id = false, + .verify = NULL, + .wrapper = mlx4_GEN_QP_wrapper + }, + { + .opcode = MLX4_CMD_UPDATE_QP, + .has_inbox = true, + .has_outbox = false, + .out_is_imm = false, + .encode_slave_id = false, + .verify = NULL, + .wrapper = mlx4_UPDATE_QP_wrapper + }, + { + .opcode = MLX4_CMD_GET_OP_REQ, + .has_inbox = false, + .has_outbox = false, + .out_is_imm = false, + .encode_slave_id = false, + .verify = NULL, + .wrapper = mlx4_CMD_EPERM_wrapper, + }, + { + .opcode = MLX4_CMD_ALLOCATE_VPP, + .has_inbox = false, + .has_outbox = true, + .out_is_imm = false, + .encode_slave_id = false, + .verify = NULL, + .wrapper = mlx4_CMD_EPERM_wrapper, + }, + { + .opcode = MLX4_CMD_SET_VPORT_QOS, + .has_inbox = false, + .has_outbox = true, + .out_is_imm = false, + .encode_slave_id = false, + .verify = NULL, + .wrapper = mlx4_CMD_EPERM_wrapper, + }, + { + .opcode = MLX4_CMD_CONF_SPECIAL_QP, + .has_inbox = false, + .has_outbox = false, + .out_is_imm = false, + .encode_slave_id = false, + .verify = NULL, /* XXX verify: only demux can do this */ + .wrapper = NULL + }, + { + .opcode = MLX4_CMD_MAD_IFC, + .has_inbox = true, + .has_outbox = true, + .out_is_imm = false, + .encode_slave_id = false, + .verify = NULL, + .wrapper = mlx4_MAD_IFC_wrapper + }, + { + .opcode = MLX4_CMD_MAD_DEMUX, + .has_inbox = false, + .has_outbox = false, + .out_is_imm = false, + .encode_slave_id = false, + .verify = NULL, + .wrapper = mlx4_CMD_EPERM_wrapper + }, + { + .opcode = MLX4_CMD_QUERY_IF_STAT, + .has_inbox = false, + .has_outbox = true, + .out_is_imm = false, + .encode_slave_id = false, + .verify = NULL, + .wrapper = mlx4_QUERY_IF_STAT_wrapper + }, + { + .opcode = MLX4_CMD_ACCESS_REG, + .has_inbox = true, + .has_outbox = true, + .out_is_imm = false, + .encode_slave_id = false, + .verify = NULL, + .wrapper = mlx4_ACCESS_REG_wrapper, + }, + { + .opcode = MLX4_CMD_CONGESTION_CTRL_OPCODE, + .has_inbox = false, + .has_outbox = false, + .out_is_imm = false, + .encode_slave_id = false, + .verify = NULL, + .wrapper = mlx4_CMD_EPERM_wrapper, + }, + /* Native multicast commands are not available for guests */ + { + .opcode = MLX4_CMD_QP_ATTACH, + .has_inbox = true, + .has_outbox = false, + .out_is_imm = false, + .encode_slave_id = false, + .verify = NULL, + .wrapper = mlx4_QP_ATTACH_wrapper + }, + { + .opcode = MLX4_CMD_PROMISC, + .has_inbox = false, + .has_outbox = false, + .out_is_imm = false, + .encode_slave_id = false, + .verify = NULL, + .wrapper = mlx4_PROMISC_wrapper + }, + /* Ethernet specific commands */ + { + .opcode = MLX4_CMD_SET_VLAN_FLTR, + .has_inbox = true, + .has_outbox = false, + .out_is_imm = false, + .encode_slave_id = false, + .verify = NULL, + .wrapper = mlx4_SET_VLAN_FLTR_wrapper + }, + { + .opcode = MLX4_CMD_SET_MCAST_FLTR, + .has_inbox = false, + .has_outbox = false, + .out_is_imm = false, + .encode_slave_id = false, + .verify = NULL, + .wrapper = mlx4_SET_MCAST_FLTR_wrapper + }, + { + .opcode = MLX4_CMD_DUMP_ETH_STATS, + .has_inbox = false, + .has_outbox = true, + .out_is_imm = false, + .encode_slave_id = false, + .verify = NULL, + .wrapper = mlx4_DUMP_ETH_STATS_wrapper + }, + { + .opcode = MLX4_CMD_INFORM_FLR_DONE, + .has_inbox = false, + .has_outbox = false, + .out_is_imm = false, + .encode_slave_id = false, + .verify = NULL, + .wrapper = NULL + }, + /* flow steering commands */ + { + .opcode = MLX4_QP_FLOW_STEERING_ATTACH, + .has_inbox = true, + .has_outbox = false, + .out_is_imm = true, + .encode_slave_id = false, + .verify = NULL, + .wrapper = mlx4_QP_FLOW_STEERING_ATTACH_wrapper + }, + { + .opcode = MLX4_QP_FLOW_STEERING_DETACH, + .has_inbox = false, + .has_outbox = false, + .out_is_imm = false, + .encode_slave_id = false, + .verify = NULL, + .wrapper = mlx4_QP_FLOW_STEERING_DETACH_wrapper + }, + { + .opcode = MLX4_FLOW_STEERING_IB_UC_QP_RANGE, + .has_inbox = false, + .has_outbox = false, + .out_is_imm = false, + .encode_slave_id = false, + .verify = NULL, + .wrapper = mlx4_CMD_EPERM_wrapper + }, + { + .opcode = MLX4_CMD_VIRT_PORT_MAP, + .has_inbox = false, + .has_outbox = false, + .out_is_imm = false, + .encode_slave_id = false, + .verify = NULL, + .wrapper = mlx4_CMD_EPERM_wrapper + }, +}; + +static int mlx4_master_process_vhcr(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr_cmd *in_vhcr) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_cmd_info *cmd = NULL; + struct mlx4_vhcr_cmd *vhcr_cmd = in_vhcr ? in_vhcr : priv->mfunc.vhcr; + struct mlx4_vhcr *vhcr; + struct mlx4_cmd_mailbox *inbox = NULL; + struct mlx4_cmd_mailbox *outbox = NULL; + u64 in_param; + u64 out_param; + int ret = 0; + int i; + int err = 0; + + /* Create sw representation of Virtual HCR */ + vhcr = kzalloc(sizeof(struct mlx4_vhcr), GFP_KERNEL); + if (!vhcr) + return -ENOMEM; + + /* DMA in the vHCR */ + if (!in_vhcr) { + ret = mlx4_ACCESS_MEM(dev, priv->mfunc.vhcr_dma, slave, + priv->mfunc.master.slave_state[slave].vhcr_dma, + ALIGN(sizeof(struct mlx4_vhcr_cmd), + MLX4_ACCESS_MEM_ALIGN), 1); + if (ret) { + if (!(dev->persist->state & + MLX4_DEVICE_STATE_INTERNAL_ERROR)) + mlx4_err(dev, "%s: Failed reading vhcr ret: 0x%x\n", + __func__, ret); + kfree(vhcr); + return ret; + } + } + + /* Fill SW VHCR fields */ + vhcr->in_param = be64_to_cpu(vhcr_cmd->in_param); + vhcr->out_param = be64_to_cpu(vhcr_cmd->out_param); + vhcr->in_modifier = be32_to_cpu(vhcr_cmd->in_modifier); + vhcr->token = be16_to_cpu(vhcr_cmd->token); + vhcr->op = be16_to_cpu(vhcr_cmd->opcode) & 0xfff; + vhcr->op_modifier = (u8) (be16_to_cpu(vhcr_cmd->opcode) >> 12); + vhcr->e_bit = vhcr_cmd->flags & (1 << 6); + + /* Lookup command */ + for (i = 0; i < ARRAY_SIZE(cmd_info); ++i) { + if (vhcr->op == cmd_info[i].opcode) { + cmd = &cmd_info[i]; + break; + } + } + if (!cmd) { + mlx4_err(dev, "Unknown command:0x%x accepted from slave:%d\n", + vhcr->op, slave); + vhcr_cmd->status = CMD_STAT_BAD_PARAM; + goto out_status; + } + + /* Read inbox */ + if (cmd->has_inbox) { + vhcr->in_param &= INBOX_MASK; + inbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(inbox)) { + vhcr_cmd->status = CMD_STAT_BAD_SIZE; + inbox = NULL; + goto out_status; + } + + ret = mlx4_ACCESS_MEM(dev, inbox->dma, slave, + vhcr->in_param, + MLX4_MAILBOX_SIZE, 1); + if (ret) { + if (!(dev->persist->state & + MLX4_DEVICE_STATE_INTERNAL_ERROR)) + mlx4_err(dev, "%s: Failed reading inbox (cmd:0x%x)\n", + __func__, cmd->opcode); + vhcr_cmd->status = CMD_STAT_INTERNAL_ERR; + goto out_status; + } + } + + /* Apply permission and bound checks if applicable */ + if (cmd->verify && cmd->verify(dev, slave, vhcr, inbox)) { + mlx4_warn(dev, "Command:0x%x from slave: %d failed protection checks for resource_id:%d\n", + vhcr->op, slave, vhcr->in_modifier); + vhcr_cmd->status = CMD_STAT_BAD_OP; + goto out_status; + } + + /* Allocate outbox */ + if (cmd->has_outbox) { + outbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(outbox)) { + vhcr_cmd->status = CMD_STAT_BAD_SIZE; + outbox = NULL; + goto out_status; + } + } + + /* Execute the command! */ + if (cmd->wrapper) { + err = cmd->wrapper(dev, slave, vhcr, inbox, outbox, + cmd); + if (cmd->out_is_imm) + vhcr_cmd->out_param = cpu_to_be64(vhcr->out_param); + } else { + in_param = cmd->has_inbox ? (u64) inbox->dma : + vhcr->in_param; + out_param = cmd->has_outbox ? (u64) outbox->dma : + vhcr->out_param; + err = __mlx4_cmd(dev, in_param, &out_param, + cmd->out_is_imm, vhcr->in_modifier, + vhcr->op_modifier, vhcr->op, + MLX4_CMD_TIME_CLASS_A, + MLX4_CMD_NATIVE); + + if (cmd->out_is_imm) { + vhcr->out_param = out_param; + vhcr_cmd->out_param = cpu_to_be64(vhcr->out_param); + } + } + + if (err) { + if (!(dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR)) { + if (vhcr->op == MLX4_CMD_ALLOC_RES && + (vhcr->in_modifier & 0xff) == RES_COUNTER && + err == -EDQUOT) + mlx4_dbg(dev, + "Unable to allocate counter for slave %d (%d)\n", + slave, err); + else + mlx4_warn(dev, "vhcr command:0x%x slave:%d failed with error:%d, status %d\n", + vhcr->op, slave, vhcr->errno, err); + } + vhcr_cmd->status = mlx4_errno_to_status(err); + goto out_status; + } + + + /* Write outbox if command completed successfully */ + if (cmd->has_outbox && !vhcr_cmd->status) { + ret = mlx4_ACCESS_MEM(dev, outbox->dma, slave, + vhcr->out_param, + MLX4_MAILBOX_SIZE, MLX4_CMD_WRAPPED); + if (ret) { + /* If we failed to write back the outbox after the + *command was successfully executed, we must fail this + * slave, as it is now in undefined state */ + if (!(dev->persist->state & + MLX4_DEVICE_STATE_INTERNAL_ERROR)) + mlx4_err(dev, "%s:Failed writing outbox\n", __func__); + goto out; + } + } + +out_status: + /* DMA back vhcr result */ + if (!in_vhcr) { + ret = mlx4_ACCESS_MEM(dev, priv->mfunc.vhcr_dma, slave, + priv->mfunc.master.slave_state[slave].vhcr_dma, + ALIGN(sizeof(struct mlx4_vhcr), + MLX4_ACCESS_MEM_ALIGN), + MLX4_CMD_WRAPPED); + if (ret) + mlx4_err(dev, "%s:Failed writing vhcr result\n", + __func__); + else if (vhcr->e_bit && + mlx4_GEN_EQE(dev, slave, &priv->mfunc.master.cmd_eqe)) + mlx4_warn(dev, "Failed to generate command completion eqe for slave %d\n", + slave); + } + +out: + kfree(vhcr); + mlx4_free_cmd_mailbox(dev, inbox); + mlx4_free_cmd_mailbox(dev, outbox); + return ret; +} + +static int mlx4_master_immediate_activate_vlan_qos(struct mlx4_priv *priv, + int slave, int port) +{ + struct mlx4_vport_oper_state *vp_oper; + struct mlx4_vport_state *vp_admin; + struct mlx4_vf_immed_vlan_work *work; + struct mlx4_dev *dev = &(priv->dev); + int err; + int admin_vlan_ix = NO_INDX; + + vp_oper = &priv->mfunc.master.vf_oper[slave].vport[port]; + vp_admin = &priv->mfunc.master.vf_admin[slave].vport[port]; + + if (vp_oper->state.default_vlan == vp_admin->default_vlan && + vp_oper->state.default_qos == vp_admin->default_qos && + vp_oper->state.vlan_proto == vp_admin->vlan_proto && + vp_oper->state.link_state == vp_admin->link_state && + vp_oper->state.qos_vport == vp_admin->qos_vport) + return 0; + + if (!(priv->mfunc.master.slave_state[slave].active && + dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_UPDATE_QP)) { + /* even if the UPDATE_QP command isn't supported, we still want + * to set this VF link according to the admin directive + */ + vp_oper->state.link_state = vp_admin->link_state; + return -1; + } + + mlx4_dbg(dev, "updating immediately admin params slave %d port %d\n", + slave, port); + mlx4_dbg(dev, "vlan %d QoS %d link down %d\n", + vp_admin->default_vlan, vp_admin->default_qos, + vp_admin->link_state); + + work = kzalloc(sizeof(*work), GFP_KERNEL); + if (!work) + return -ENOMEM; + + if (vp_oper->state.default_vlan != vp_admin->default_vlan) { + if (MLX4_VGT != vp_admin->default_vlan) { + err = __mlx4_register_vlan(&priv->dev, port, + vp_admin->default_vlan, + &admin_vlan_ix); + if (err) { + kfree(work); + mlx4_warn(&priv->dev, + "No vlan resources slave %d, port %d\n", + slave, port); + return err; + } + } else { + admin_vlan_ix = NO_INDX; + } + work->flags |= MLX4_VF_IMMED_VLAN_FLAG_VLAN; + mlx4_dbg(&priv->dev, + "alloc vlan %d idx %d slave %d port %d\n", + (int)(vp_admin->default_vlan), + admin_vlan_ix, slave, port); + } + + /* save original vlan ix and vlan id */ + work->orig_vlan_id = vp_oper->state.default_vlan; + work->orig_vlan_ix = vp_oper->vlan_idx; + + /* handle new qos */ + if (vp_oper->state.default_qos != vp_admin->default_qos) + work->flags |= MLX4_VF_IMMED_VLAN_FLAG_QOS; + + if (work->flags & MLX4_VF_IMMED_VLAN_FLAG_VLAN) + vp_oper->vlan_idx = admin_vlan_ix; + + vp_oper->state.default_vlan = vp_admin->default_vlan; + vp_oper->state.default_qos = vp_admin->default_qos; + vp_oper->state.vlan_proto = vp_admin->vlan_proto; + vp_oper->state.link_state = vp_admin->link_state; + vp_oper->state.qos_vport = vp_admin->qos_vport; + + if (vp_admin->link_state == IFLA_VF_LINK_STATE_DISABLE) + work->flags |= MLX4_VF_IMMED_VLAN_FLAG_LINK_DISABLE; + + /* iterate over QPs owned by this slave, using UPDATE_QP */ + work->port = port; + work->slave = slave; + work->qos = vp_oper->state.default_qos; + work->qos_vport = vp_oper->state.qos_vport; + work->vlan_id = vp_oper->state.default_vlan; + work->vlan_ix = vp_oper->vlan_idx; + work->vlan_proto = vp_oper->state.vlan_proto; + work->priv = priv; + INIT_WORK(&work->work, mlx4_vf_immed_vlan_work_handler); + queue_work(priv->mfunc.master.comm_wq, &work->work); + + return 0; +} + +static void mlx4_set_default_port_qos(struct mlx4_dev *dev, int port) +{ + struct mlx4_qos_manager *port_qos_ctl; + struct mlx4_priv *priv = mlx4_priv(dev); + + port_qos_ctl = &priv->mfunc.master.qos_ctl[port]; + bitmap_zero(port_qos_ctl->priority_bm, MLX4_NUM_UP); + + /* Enable only default prio at PF init routine */ + set_bit(MLX4_DEFAULT_QOS_PRIO, port_qos_ctl->priority_bm); +} + +static void mlx4_allocate_port_vpps(struct mlx4_dev *dev, int port) +{ + int i; + int err; + int num_vfs; + u16 available_vpp; + u8 vpp_param[MLX4_NUM_UP]; + struct mlx4_qos_manager *port_qos; + struct mlx4_priv *priv = mlx4_priv(dev); + + err = mlx4_ALLOCATE_VPP_get(dev, port, &available_vpp, vpp_param); + if (err) { + mlx4_info(dev, "Failed query available VPPs\n"); + return; + } + + port_qos = &priv->mfunc.master.qos_ctl[port]; + num_vfs = (available_vpp / + bitmap_weight(port_qos->priority_bm, MLX4_NUM_UP)); + + for (i = 0; i < MLX4_NUM_UP; i++) { + if (test_bit(i, port_qos->priority_bm)) + vpp_param[i] = num_vfs; + } + + err = mlx4_ALLOCATE_VPP_set(dev, port, vpp_param); + if (err) { + mlx4_info(dev, "Failed allocating VPPs\n"); + return; + } + + /* Query actual allocated VPP, just to make sure */ + err = mlx4_ALLOCATE_VPP_get(dev, port, &available_vpp, vpp_param); + if (err) { + mlx4_info(dev, "Failed query available VPPs\n"); + return; + } + + port_qos->num_of_qos_vfs = num_vfs; + mlx4_dbg(dev, "Port %d Available VPPs %d\n", port, available_vpp); + + for (i = 0; i < MLX4_NUM_UP; i++) + mlx4_dbg(dev, "Port %d UP %d Allocated %d VPPs\n", port, i, + vpp_param[i]); +} + +static int mlx4_master_activate_admin_state(struct mlx4_priv *priv, int slave) +{ + int p, port, err; + struct mlx4_vport_state *vp_admin; + struct mlx4_vport_oper_state *vp_oper; + struct mlx4_slave_state *slave_state = + &priv->mfunc.master.slave_state[slave]; + struct mlx4_active_ports actv_ports = mlx4_get_active_ports( + &priv->dev, slave); + + for_each_set_bit(p, actv_ports.ports, priv->dev.caps.num_ports) { + port = p + 1; + priv->mfunc.master.vf_oper[slave].smi_enabled[port] = + priv->mfunc.master.vf_admin[slave].enable_smi[port]; + vp_oper = &priv->mfunc.master.vf_oper[slave].vport[port]; + vp_admin = &priv->mfunc.master.vf_admin[slave].vport[port]; + if (vp_admin->vlan_proto != htons(ETH_P_8021AD) || + slave_state->vst_qinq_supported) { + vp_oper->state.vlan_proto = vp_admin->vlan_proto; + vp_oper->state.default_vlan = vp_admin->default_vlan; + vp_oper->state.default_qos = vp_admin->default_qos; + } + vp_oper->state.link_state = vp_admin->link_state; + vp_oper->state.mac = vp_admin->mac; + vp_oper->state.spoofchk = vp_admin->spoofchk; + vp_oper->state.tx_rate = vp_admin->tx_rate; + vp_oper->state.qos_vport = vp_admin->qos_vport; + vp_oper->state.guid = vp_admin->guid; + + if (MLX4_VGT != vp_admin->default_vlan) { + err = __mlx4_register_vlan(&priv->dev, port, + vp_admin->default_vlan, &(vp_oper->vlan_idx)); + if (err) { + vp_oper->vlan_idx = NO_INDX; + vp_oper->state.default_vlan = MLX4_VGT; + vp_oper->state.vlan_proto = htons(ETH_P_8021Q); + mlx4_warn(&priv->dev, + "No vlan resources slave %d, port %d\n", + slave, port); + return err; + } + mlx4_dbg(&priv->dev, "alloc vlan %d idx %d slave %d port %d\n", + (int)(vp_oper->state.default_vlan), + vp_oper->vlan_idx, slave, port); + } + if (vp_admin->spoofchk) { + vp_oper->mac_idx = __mlx4_register_mac(&priv->dev, + port, + vp_admin->mac); + if (0 > vp_oper->mac_idx) { + err = vp_oper->mac_idx; + vp_oper->mac_idx = NO_INDX; + mlx4_warn(&priv->dev, + "No mac resources slave %d, port %d\n", + slave, port); + return err; + } + mlx4_dbg(&priv->dev, "alloc mac %llx idx %d slave %d port %d\n", + vp_oper->state.mac, vp_oper->mac_idx, slave, port); + } + } + return 0; +} + +static void mlx4_master_deactivate_admin_state(struct mlx4_priv *priv, int slave) +{ + int p, port; + struct mlx4_vport_oper_state *vp_oper; + struct mlx4_active_ports actv_ports = mlx4_get_active_ports( + &priv->dev, slave); + + for_each_set_bit(p, actv_ports.ports, priv->dev.caps.num_ports) { + port = p + 1; + priv->mfunc.master.vf_oper[slave].smi_enabled[port] = + MLX4_VF_SMI_DISABLED; + vp_oper = &priv->mfunc.master.vf_oper[slave].vport[port]; + if (NO_INDX != vp_oper->vlan_idx) { + __mlx4_unregister_vlan(&priv->dev, + port, vp_oper->state.default_vlan); + vp_oper->vlan_idx = NO_INDX; + } + if (NO_INDX != vp_oper->mac_idx) { + __mlx4_unregister_mac(&priv->dev, port, vp_oper->state.mac); + vp_oper->mac_idx = NO_INDX; + } + } + return; +} + +static void mlx4_master_do_cmd(struct mlx4_dev *dev, int slave, u8 cmd, + u16 param, u8 toggle) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_slave_state *slave_state = priv->mfunc.master.slave_state; + u32 reply; + u8 is_going_down = 0; + int i; + unsigned long flags; + + slave_state[slave].comm_toggle ^= 1; + reply = (u32) slave_state[slave].comm_toggle << 31; + if (toggle != slave_state[slave].comm_toggle) { + mlx4_warn(dev, "Incorrect toggle %d from slave %d. *** MASTER STATE COMPROMISED ***\n", + toggle, slave); + goto reset_slave; + } + if (cmd == MLX4_COMM_CMD_RESET) { + mlx4_warn(dev, "Received reset from slave:%d\n", slave); + slave_state[slave].active = false; + slave_state[slave].old_vlan_api = false; + slave_state[slave].vst_qinq_supported = false; + mlx4_master_deactivate_admin_state(priv, slave); + for (i = 0; i < MLX4_EVENT_TYPES_NUM; ++i) { + slave_state[slave].event_eq[i].eqn = -1; + slave_state[slave].event_eq[i].token = 0; + } + /*check if we are in the middle of FLR process, + if so return "retry" status to the slave*/ + if (MLX4_COMM_CMD_FLR == slave_state[slave].last_cmd) + goto inform_slave_state; + + mlx4_dispatch_event(dev, MLX4_DEV_EVENT_SLAVE_SHUTDOWN, slave); + + /* write the version in the event field */ + reply |= mlx4_comm_get_version(); + + goto reset_slave; + } + /*command from slave in the middle of FLR*/ + if (cmd != MLX4_COMM_CMD_RESET && + MLX4_COMM_CMD_FLR == slave_state[slave].last_cmd) { + mlx4_warn(dev, "slave:%d is Trying to run cmd(0x%x) in the middle of FLR\n", + slave, cmd); + return; + } + + switch (cmd) { + case MLX4_COMM_CMD_VHCR0: + if (slave_state[slave].last_cmd != MLX4_COMM_CMD_RESET) + goto reset_slave; + slave_state[slave].vhcr_dma = ((u64) param) << 48; + priv->mfunc.master.slave_state[slave].cookie = 0; + break; + case MLX4_COMM_CMD_VHCR1: + if (slave_state[slave].last_cmd != MLX4_COMM_CMD_VHCR0) + goto reset_slave; + slave_state[slave].vhcr_dma |= ((u64) param) << 32; + break; + case MLX4_COMM_CMD_VHCR2: + if (slave_state[slave].last_cmd != MLX4_COMM_CMD_VHCR1) + goto reset_slave; + slave_state[slave].vhcr_dma |= ((u64) param) << 16; + break; + case MLX4_COMM_CMD_VHCR_EN: + if (slave_state[slave].last_cmd != MLX4_COMM_CMD_VHCR2) + goto reset_slave; + slave_state[slave].vhcr_dma |= param; + if (mlx4_master_activate_admin_state(priv, slave)) + goto reset_slave; + slave_state[slave].active = true; + mlx4_dispatch_event(dev, MLX4_DEV_EVENT_SLAVE_INIT, slave); + break; + case MLX4_COMM_CMD_VHCR_POST: + if ((slave_state[slave].last_cmd != MLX4_COMM_CMD_VHCR_EN) && + (slave_state[slave].last_cmd != MLX4_COMM_CMD_VHCR_POST)) { + mlx4_warn(dev, "slave:%d is out of sync, cmd=0x%x, last command=0x%x, reset is needed\n", + slave, cmd, slave_state[slave].last_cmd); + goto reset_slave; + } + + mutex_lock(&priv->cmd.slave_cmd_mutex); + if (mlx4_master_process_vhcr(dev, slave, NULL)) { + mlx4_err(dev, "Failed processing vhcr for slave:%d, resetting slave\n", + slave); + mutex_unlock(&priv->cmd.slave_cmd_mutex); + goto reset_slave; + } + mutex_unlock(&priv->cmd.slave_cmd_mutex); + break; + default: + mlx4_warn(dev, "Bad comm cmd:%d from slave:%d\n", cmd, slave); + goto reset_slave; + } + spin_lock_irqsave(&priv->mfunc.master.slave_state_lock, flags); + if (!slave_state[slave].is_slave_going_down) + slave_state[slave].last_cmd = cmd; + else + is_going_down = 1; + spin_unlock_irqrestore(&priv->mfunc.master.slave_state_lock, flags); + if (is_going_down) { + mlx4_warn(dev, "Slave is going down aborting command(%d) executing from slave:%d\n", + cmd, slave); + return; + } + __raw_writel((__force u32) cpu_to_be32(reply), + &priv->mfunc.comm[slave].slave_read); + + return; + +reset_slave: + /* cleanup any slave resources */ + if (dev->persist->interface_state & MLX4_INTERFACE_STATE_UP) + mlx4_delete_all_resources_for_slave(dev, slave); + + if (cmd != MLX4_COMM_CMD_RESET) { + mlx4_warn(dev, "Turn on internal error to force reset, slave=%d, cmd=0x%x\n", + slave, cmd); + /* Turn on internal error letting slave reset itself immeditaly, + * otherwise it might take till timeout on command is passed + */ + reply |= ((u32)COMM_CHAN_EVENT_INTERNAL_ERR); + } + + spin_lock_irqsave(&priv->mfunc.master.slave_state_lock, flags); + if (!slave_state[slave].is_slave_going_down) + slave_state[slave].last_cmd = MLX4_COMM_CMD_RESET; + spin_unlock_irqrestore(&priv->mfunc.master.slave_state_lock, flags); + /*with slave in the middle of flr, no need to clean resources again.*/ +inform_slave_state: + memset(&slave_state[slave].event_eq, 0, + sizeof(struct mlx4_slave_event_eq_info)); + __raw_writel((__force u32) cpu_to_be32(reply), + &priv->mfunc.comm[slave].slave_read); + wmb(); +} + +/* master command processing */ +void mlx4_master_comm_channel(struct work_struct *work) +{ + struct mlx4_mfunc_master_ctx *master = + container_of(work, + struct mlx4_mfunc_master_ctx, + comm_work); + struct mlx4_mfunc *mfunc = + container_of(master, struct mlx4_mfunc, master); + struct mlx4_priv *priv = + container_of(mfunc, struct mlx4_priv, mfunc); + struct mlx4_dev *dev = &priv->dev; + u32 lbit_vec[COMM_CHANNEL_BIT_ARRAY_SIZE]; + u32 nmbr_bits; + u32 comm_cmd; + int i, slave; + int toggle; + bool first = true; + int served = 0; + int reported = 0; + u32 slt; + + for (i = 0; i < COMM_CHANNEL_BIT_ARRAY_SIZE; i++) + lbit_vec[i] = be32_to_cpu(master->comm_arm_bit_vector[i]); + nmbr_bits = dev->persist->num_vfs + 1; + if (++master->next_slave >= nmbr_bits) + master->next_slave = 0; + slave = master->next_slave; + while (true) { + slave = find_next_bit((const unsigned long *)&lbit_vec, nmbr_bits, slave); + if (!first && slave >= master->next_slave) + break; + if (slave == nmbr_bits) { + if (!first) + break; + first = false; + slave = 0; + continue; + } + ++reported; + comm_cmd = swab32(readl(&mfunc->comm[slave].slave_write)); + slt = swab32(readl(&mfunc->comm[slave].slave_read)) >> 31; + toggle = comm_cmd >> 31; + if (toggle != slt) { + if (master->slave_state[slave].comm_toggle + != slt) { + pr_info("slave %d out of sync. read toggle %d, state toggle %d. Resynching.\n", + slave, slt, + master->slave_state[slave].comm_toggle); + master->slave_state[slave].comm_toggle = + slt; + } + mlx4_master_do_cmd(dev, slave, + comm_cmd >> 16 & 0xff, + comm_cmd & 0xffff, toggle); + ++served; + } + slave++; + } + + if (reported && reported != served) + mlx4_warn(dev, "Got command event with bitmask from %d slaves but %d were served\n", + reported, served); + + if (mlx4_ARM_COMM_CHANNEL(dev)) + mlx4_warn(dev, "Failed to arm comm channel events\n"); +} + +static int sync_toggles(struct mlx4_dev *dev) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + u32 wr_toggle; + u32 rd_toggle; + unsigned long end; + + wr_toggle = swab32(readl(&priv->mfunc.comm->slave_write)); + if (wr_toggle == 0xffffffff) + end = jiffies + msecs_to_jiffies(30000); + else + end = jiffies + msecs_to_jiffies(5000); + + while (time_before(jiffies, end)) { + rd_toggle = swab32(readl(&priv->mfunc.comm->slave_read)); + if (wr_toggle == 0xffffffff || rd_toggle == 0xffffffff) { + /* PCI might be offline */ + + /* If device removal has been requested, + * do not continue retrying. + */ + if (dev->persist->interface_state & + MLX4_INTERFACE_STATE_NOWAIT) { + mlx4_warn(dev, + "communication channel is offline\n"); + return -EIO; + } + + msleep(100); + wr_toggle = swab32(readl(&priv->mfunc.comm-> + slave_write)); + continue; + } + + if (rd_toggle >> 31 == wr_toggle >> 31) { + priv->cmd.comm_toggle = rd_toggle >> 31; + return 0; + } + + cond_resched(); + } + + /* + * we could reach here if for example the previous VM using this + * function misbehaved and left the channel with unsynced state. We + * should fix this here and give this VM a chance to use a properly + * synced channel + */ + mlx4_warn(dev, "recovering from previously mis-behaved VM\n"); + __raw_writel((__force u32) 0, &priv->mfunc.comm->slave_read); + __raw_writel((__force u32) 0, &priv->mfunc.comm->slave_write); + priv->cmd.comm_toggle = 0; + + return 0; +} + +int mlx4_multi_func_init(struct mlx4_dev *dev) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_slave_state *s_state; + int i, j, err, port; + + if (mlx4_is_master(dev)) + priv->mfunc.comm = + ioremap(pci_resource_start(dev->persist->pdev, + priv->fw.comm_bar) + + priv->fw.comm_base, MLX4_COMM_PAGESIZE); + else + priv->mfunc.comm = + ioremap(pci_resource_start(dev->persist->pdev, 2) + + MLX4_SLAVE_COMM_BASE, MLX4_COMM_PAGESIZE); + if (!priv->mfunc.comm) { + mlx4_err(dev, "Couldn't map communication vector\n"); + goto err_vhcr; + } + + if (mlx4_is_master(dev)) { + struct mlx4_vf_oper_state *vf_oper; + struct mlx4_vf_admin_state *vf_admin; + + priv->mfunc.master.slave_state = + kcalloc(dev->num_slaves, + sizeof(struct mlx4_slave_state), + GFP_KERNEL); + if (!priv->mfunc.master.slave_state) + goto err_comm; + + priv->mfunc.master.vf_admin = + kcalloc(dev->num_slaves, + sizeof(struct mlx4_vf_admin_state), + GFP_KERNEL); + if (!priv->mfunc.master.vf_admin) + goto err_comm_admin; + + priv->mfunc.master.vf_oper = + kcalloc(dev->num_slaves, + sizeof(struct mlx4_vf_oper_state), + GFP_KERNEL); + if (!priv->mfunc.master.vf_oper) + goto err_comm_oper; + + priv->mfunc.master.next_slave = 0; + + for (i = 0; i < dev->num_slaves; ++i) { + vf_admin = &priv->mfunc.master.vf_admin[i]; + vf_oper = &priv->mfunc.master.vf_oper[i]; + s_state = &priv->mfunc.master.slave_state[i]; + s_state->last_cmd = MLX4_COMM_CMD_RESET; + s_state->vst_qinq_supported = false; + mutex_init(&priv->mfunc.master.gen_eqe_mutex[i]); + for (j = 0; j < MLX4_EVENT_TYPES_NUM; ++j) + s_state->event_eq[j].eqn = -1; + __raw_writel((__force u32) 0, + &priv->mfunc.comm[i].slave_write); + __raw_writel((__force u32) 0, + &priv->mfunc.comm[i].slave_read); + for (port = 1; port <= MLX4_MAX_PORTS; port++) { + struct mlx4_vport_state *admin_vport; + struct mlx4_vport_state *oper_vport; + + s_state->vlan_filter[port] = + kzalloc(sizeof(struct mlx4_vlan_fltr), + GFP_KERNEL); + if (!s_state->vlan_filter[port]) { + if (--port) + kfree(s_state->vlan_filter[port]); + goto err_slaves; + } + + admin_vport = &vf_admin->vport[port]; + oper_vport = &vf_oper->vport[port].state; + INIT_LIST_HEAD(&s_state->mcast_filters[port]); + admin_vport->default_vlan = MLX4_VGT; + oper_vport->default_vlan = MLX4_VGT; + admin_vport->qos_vport = + MLX4_VPP_DEFAULT_VPORT; + oper_vport->qos_vport = MLX4_VPP_DEFAULT_VPORT; + admin_vport->vlan_proto = htons(ETH_P_8021Q); + oper_vport->vlan_proto = htons(ETH_P_8021Q); + vf_oper->vport[port].vlan_idx = NO_INDX; + vf_oper->vport[port].mac_idx = NO_INDX; + mlx4_set_random_admin_guid(dev, i, port); + } + spin_lock_init(&s_state->lock); + } + + if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_QOS_VPP) { + for (port = 1; port <= dev->caps.num_ports; port++) { + if (mlx4_is_eth(dev, port)) { + mlx4_set_default_port_qos(dev, port); + mlx4_allocate_port_vpps(dev, port); + } + } + } + + memset(&priv->mfunc.master.cmd_eqe, 0, sizeof(struct mlx4_eqe)); + priv->mfunc.master.cmd_eqe.type = MLX4_EVENT_TYPE_CMD; + INIT_WORK(&priv->mfunc.master.comm_work, + mlx4_master_comm_channel); + INIT_WORK(&priv->mfunc.master.slave_event_work, + mlx4_gen_slave_eqe); + INIT_WORK(&priv->mfunc.master.slave_flr_event_work, + mlx4_master_handle_slave_flr); + spin_lock_init(&priv->mfunc.master.slave_state_lock); + spin_lock_init(&priv->mfunc.master.slave_eq.event_lock); + priv->mfunc.master.comm_wq = + create_singlethread_workqueue("mlx4_comm"); + if (!priv->mfunc.master.comm_wq) + goto err_slaves; + + if (mlx4_init_resource_tracker(dev)) + goto err_thread; + + } else { + err = sync_toggles(dev); + if (err) { + mlx4_err(dev, "Couldn't sync toggles\n"); + goto err_comm; + } + } + return 0; + +err_thread: + destroy_workqueue(priv->mfunc.master.comm_wq); +err_slaves: + while (i--) { + for (port = 1; port <= MLX4_MAX_PORTS; port++) + kfree(priv->mfunc.master.slave_state[i].vlan_filter[port]); + } + kfree(priv->mfunc.master.vf_oper); +err_comm_oper: + kfree(priv->mfunc.master.vf_admin); +err_comm_admin: + kfree(priv->mfunc.master.slave_state); +err_comm: + iounmap(priv->mfunc.comm); + priv->mfunc.comm = NULL; +err_vhcr: + dma_free_coherent(&dev->persist->pdev->dev, PAGE_SIZE, + priv->mfunc.vhcr, + priv->mfunc.vhcr_dma); + priv->mfunc.vhcr = NULL; + return -ENOMEM; +} + +int mlx4_cmd_init(struct mlx4_dev *dev) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + int flags = 0; + + if (!priv->cmd.initialized) { + init_rwsem(&priv->cmd.switch_sem); + mutex_init(&priv->cmd.slave_cmd_mutex); + sema_init(&priv->cmd.poll_sem, 1); + priv->cmd.use_events = 0; + priv->cmd.toggle = 1; + priv->cmd.initialized = 1; + flags |= MLX4_CMD_CLEANUP_STRUCT; + } + + if (!mlx4_is_slave(dev) && !priv->cmd.hcr) { + priv->cmd.hcr = ioremap(pci_resource_start(dev->persist->pdev, + 0) + MLX4_HCR_BASE, MLX4_HCR_SIZE); + if (!priv->cmd.hcr) { + mlx4_err(dev, "Couldn't map command register\n"); + goto err; + } + flags |= MLX4_CMD_CLEANUP_HCR; + } + + if (mlx4_is_mfunc(dev) && !priv->mfunc.vhcr) { + priv->mfunc.vhcr = dma_alloc_coherent(&dev->persist->pdev->dev, + PAGE_SIZE, + &priv->mfunc.vhcr_dma, + GFP_KERNEL); + if (!priv->mfunc.vhcr) + goto err; + + flags |= MLX4_CMD_CLEANUP_VHCR; + } + + if (!priv->cmd.pool) { + priv->cmd.pool = dma_pool_create("mlx4_cmd", + &dev->persist->pdev->dev, + MLX4_MAILBOX_SIZE, + MLX4_MAILBOX_SIZE, 0); + if (!priv->cmd.pool) + goto err; + + flags |= MLX4_CMD_CLEANUP_POOL; + } + + return 0; + +err: + mlx4_cmd_cleanup(dev, flags); + return -ENOMEM; +} + +void mlx4_report_internal_err_comm_event(struct mlx4_dev *dev) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + int slave; + u32 slave_read; + + /* If the comm channel has not yet been initialized, + * skip reporting the internal error event to all + * the communication channels. + */ + if (!priv->mfunc.comm) + return; + + /* Report an internal error event to all + * communication channels. + */ + for (slave = 0; slave < dev->num_slaves; slave++) { + slave_read = swab32(readl(&priv->mfunc.comm[slave].slave_read)); + slave_read |= (u32)COMM_CHAN_EVENT_INTERNAL_ERR; + __raw_writel((__force u32)cpu_to_be32(slave_read), + &priv->mfunc.comm[slave].slave_read); + } +} + +void mlx4_multi_func_cleanup(struct mlx4_dev *dev) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + int i, port; + + if (mlx4_is_master(dev)) { + destroy_workqueue(priv->mfunc.master.comm_wq); + for (i = 0; i < dev->num_slaves; i++) { + for (port = 1; port <= MLX4_MAX_PORTS; port++) + kfree(priv->mfunc.master.slave_state[i].vlan_filter[port]); + } + kfree(priv->mfunc.master.slave_state); + kfree(priv->mfunc.master.vf_admin); + kfree(priv->mfunc.master.vf_oper); + dev->num_slaves = 0; + } + + iounmap(priv->mfunc.comm); + priv->mfunc.comm = NULL; +} + +void mlx4_cmd_cleanup(struct mlx4_dev *dev, int cleanup_mask) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + + if (priv->cmd.pool && (cleanup_mask & MLX4_CMD_CLEANUP_POOL)) { + dma_pool_destroy(priv->cmd.pool); + priv->cmd.pool = NULL; + } + + if (!mlx4_is_slave(dev) && priv->cmd.hcr && + (cleanup_mask & MLX4_CMD_CLEANUP_HCR)) { + iounmap(priv->cmd.hcr); + priv->cmd.hcr = NULL; + } + if (mlx4_is_mfunc(dev) && priv->mfunc.vhcr && + (cleanup_mask & MLX4_CMD_CLEANUP_VHCR)) { + dma_free_coherent(&dev->persist->pdev->dev, PAGE_SIZE, + priv->mfunc.vhcr, priv->mfunc.vhcr_dma); + priv->mfunc.vhcr = NULL; + } + if (priv->cmd.initialized && (cleanup_mask & MLX4_CMD_CLEANUP_STRUCT)) + priv->cmd.initialized = 0; +} + +/* + * Switch to using events to issue FW commands (can only be called + * after event queue for command events has been initialized). + */ +int mlx4_cmd_use_events(struct mlx4_dev *dev) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + int i; + int err = 0; + + priv->cmd.context = kmalloc_array(priv->cmd.max_cmds, + sizeof(struct mlx4_cmd_context), + GFP_KERNEL); + if (!priv->cmd.context) + return -ENOMEM; + + if (mlx4_is_mfunc(dev)) + mutex_lock(&priv->cmd.slave_cmd_mutex); + down_write(&priv->cmd.switch_sem); + for (i = 0; i < priv->cmd.max_cmds; ++i) { + priv->cmd.context[i].token = i; + priv->cmd.context[i].next = i + 1; + /* To support fatal error flow, initialize all + * cmd contexts to allow simulating completions + * with complete() at any time. + */ + init_completion(&priv->cmd.context[i].done); + } + + priv->cmd.context[priv->cmd.max_cmds - 1].next = -1; + priv->cmd.free_head = 0; + + sema_init(&priv->cmd.event_sem, priv->cmd.max_cmds); + + for (priv->cmd.token_mask = 1; + priv->cmd.token_mask < priv->cmd.max_cmds; + priv->cmd.token_mask <<= 1) + ; /* nothing */ + --priv->cmd.token_mask; + + down(&priv->cmd.poll_sem); + priv->cmd.use_events = 1; + up_write(&priv->cmd.switch_sem); + if (mlx4_is_mfunc(dev)) + mutex_unlock(&priv->cmd.slave_cmd_mutex); + + return err; +} + +/* + * Switch back to polling (used when shutting down the device) + */ +void mlx4_cmd_use_polling(struct mlx4_dev *dev) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + int i; + + if (mlx4_is_mfunc(dev)) + mutex_lock(&priv->cmd.slave_cmd_mutex); + down_write(&priv->cmd.switch_sem); + priv->cmd.use_events = 0; + + for (i = 0; i < priv->cmd.max_cmds; ++i) + down(&priv->cmd.event_sem); + + kfree(priv->cmd.context); + priv->cmd.context = NULL; + + up(&priv->cmd.poll_sem); + up_write(&priv->cmd.switch_sem); + if (mlx4_is_mfunc(dev)) + mutex_unlock(&priv->cmd.slave_cmd_mutex); +} + +struct mlx4_cmd_mailbox *mlx4_alloc_cmd_mailbox(struct mlx4_dev *dev) +{ + struct mlx4_cmd_mailbox *mailbox; + + mailbox = kmalloc(sizeof(*mailbox), GFP_KERNEL); + if (!mailbox) + return ERR_PTR(-ENOMEM); + + mailbox->buf = dma_pool_zalloc(mlx4_priv(dev)->cmd.pool, GFP_KERNEL, + &mailbox->dma); + if (!mailbox->buf) { + kfree(mailbox); + return ERR_PTR(-ENOMEM); + } + + return mailbox; +} +EXPORT_SYMBOL_GPL(mlx4_alloc_cmd_mailbox); + +void mlx4_free_cmd_mailbox(struct mlx4_dev *dev, + struct mlx4_cmd_mailbox *mailbox) +{ + if (!mailbox) + return; + + dma_pool_free(mlx4_priv(dev)->cmd.pool, mailbox->buf, mailbox->dma); + kfree(mailbox); +} +EXPORT_SYMBOL_GPL(mlx4_free_cmd_mailbox); + +u32 mlx4_comm_get_version(void) +{ + return ((u32) CMD_CHAN_IF_REV << 8) | (u32) CMD_CHAN_VER; +} + +static int mlx4_get_slave_indx(struct mlx4_dev *dev, int vf) +{ + if ((vf < 0) || (vf >= dev->persist->num_vfs)) { + mlx4_err(dev, "Bad vf number:%d (number of activated vf: %d)\n", + vf, dev->persist->num_vfs); + return -EINVAL; + } + + return vf+1; +} + +int mlx4_get_vf_indx(struct mlx4_dev *dev, int slave) +{ + if (slave < 1 || slave > dev->persist->num_vfs) { + mlx4_err(dev, + "Bad slave number:%d (number of activated slaves: %lu)\n", + slave, dev->num_slaves); + return -EINVAL; + } + return slave - 1; +} + +void mlx4_cmd_wake_completions(struct mlx4_dev *dev) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_cmd_context *context; + int i; + + spin_lock(&priv->cmd.context_lock); + if (priv->cmd.context) { + for (i = 0; i < priv->cmd.max_cmds; ++i) { + context = &priv->cmd.context[i]; + context->fw_status = CMD_STAT_INTERNAL_ERR; + context->result = + mlx4_status_to_errno(CMD_STAT_INTERNAL_ERR); + complete(&context->done); + } + } + spin_unlock(&priv->cmd.context_lock); +} + +struct mlx4_active_ports mlx4_get_active_ports(struct mlx4_dev *dev, int slave) +{ + struct mlx4_active_ports actv_ports; + int vf; + + bitmap_zero(actv_ports.ports, MLX4_MAX_PORTS); + + if (slave == 0) { + bitmap_fill(actv_ports.ports, dev->caps.num_ports); + return actv_ports; + } + + vf = mlx4_get_vf_indx(dev, slave); + if (vf < 0) + return actv_ports; + + bitmap_set(actv_ports.ports, dev->dev_vfs[vf].min_port - 1, + min((int)dev->dev_vfs[mlx4_get_vf_indx(dev, slave)].n_ports, + dev->caps.num_ports)); + + return actv_ports; +} +EXPORT_SYMBOL_GPL(mlx4_get_active_ports); + +int mlx4_slave_convert_port(struct mlx4_dev *dev, int slave, int port) +{ + unsigned n; + struct mlx4_active_ports actv_ports = mlx4_get_active_ports(dev, slave); + unsigned m = bitmap_weight(actv_ports.ports, dev->caps.num_ports); + + if (port <= 0 || port > m) + return -EINVAL; + + n = find_first_bit(actv_ports.ports, dev->caps.num_ports); + if (port <= n) + port = n + 1; + + return port; +} +EXPORT_SYMBOL_GPL(mlx4_slave_convert_port); + +int mlx4_phys_to_slave_port(struct mlx4_dev *dev, int slave, int port) +{ + struct mlx4_active_ports actv_ports = mlx4_get_active_ports(dev, slave); + if (test_bit(port - 1, actv_ports.ports)) + return port - + find_first_bit(actv_ports.ports, dev->caps.num_ports); + + return -1; +} +EXPORT_SYMBOL_GPL(mlx4_phys_to_slave_port); + +struct mlx4_slaves_pport mlx4_phys_to_slaves_pport(struct mlx4_dev *dev, + int port) +{ + unsigned i; + struct mlx4_slaves_pport slaves_pport; + + bitmap_zero(slaves_pport.slaves, MLX4_MFUNC_MAX); + + if (port <= 0 || port > dev->caps.num_ports) + return slaves_pport; + + for (i = 0; i < dev->persist->num_vfs + 1; i++) { + struct mlx4_active_ports actv_ports = + mlx4_get_active_ports(dev, i); + if (test_bit(port - 1, actv_ports.ports)) + set_bit(i, slaves_pport.slaves); + } + + return slaves_pport; +} +EXPORT_SYMBOL_GPL(mlx4_phys_to_slaves_pport); + +struct mlx4_slaves_pport mlx4_phys_to_slaves_pport_actv( + struct mlx4_dev *dev, + const struct mlx4_active_ports *crit_ports) +{ + unsigned i; + struct mlx4_slaves_pport slaves_pport; + + bitmap_zero(slaves_pport.slaves, MLX4_MFUNC_MAX); + + for (i = 0; i < dev->persist->num_vfs + 1; i++) { + struct mlx4_active_ports actv_ports = + mlx4_get_active_ports(dev, i); + if (bitmap_equal(crit_ports->ports, actv_ports.ports, + dev->caps.num_ports)) + set_bit(i, slaves_pport.slaves); + } + + return slaves_pport; +} +EXPORT_SYMBOL_GPL(mlx4_phys_to_slaves_pport_actv); + +static int mlx4_slaves_closest_port(struct mlx4_dev *dev, int slave, int port) +{ + struct mlx4_active_ports actv_ports = mlx4_get_active_ports(dev, slave); + int min_port = find_first_bit(actv_ports.ports, dev->caps.num_ports) + + 1; + int max_port = min_port + + bitmap_weight(actv_ports.ports, dev->caps.num_ports); + + if (port < min_port) + port = min_port; + else if (port >= max_port) + port = max_port - 1; + + return port; +} + +static int mlx4_set_vport_qos(struct mlx4_priv *priv, int slave, int port, + int max_tx_rate) +{ + int i; + int err; + struct mlx4_qos_manager *port_qos; + struct mlx4_dev *dev = &priv->dev; + struct mlx4_vport_qos_param vpp_qos[MLX4_NUM_UP]; + + port_qos = &priv->mfunc.master.qos_ctl[port]; + memset(vpp_qos, 0, sizeof(struct mlx4_vport_qos_param) * MLX4_NUM_UP); + + if (slave > port_qos->num_of_qos_vfs) { + mlx4_info(dev, "No available VPP resources for this VF\n"); + return -EINVAL; + } + + /* Query for default QoS values from Vport 0 is needed */ + err = mlx4_SET_VPORT_QOS_get(dev, port, 0, vpp_qos); + if (err) { + mlx4_info(dev, "Failed to query Vport 0 QoS values\n"); + return err; + } + + for (i = 0; i < MLX4_NUM_UP; i++) { + if (test_bit(i, port_qos->priority_bm) && max_tx_rate) { + vpp_qos[i].max_avg_bw = max_tx_rate; + vpp_qos[i].enable = 1; + } else { + /* if user supplied tx_rate == 0, meaning no rate limit + * configuration is required. so we are leaving the + * value of max_avg_bw as queried from Vport 0. + */ + vpp_qos[i].enable = 0; + } + } + + err = mlx4_SET_VPORT_QOS_set(dev, port, slave, vpp_qos); + if (err) { + mlx4_info(dev, "Failed to set Vport %d QoS values\n", slave); + return err; + } + + return 0; +} + +static bool mlx4_is_vf_vst_and_prio_qos(struct mlx4_dev *dev, int port, + struct mlx4_vport_state *vf_admin) +{ + struct mlx4_qos_manager *info; + struct mlx4_priv *priv = mlx4_priv(dev); + + if (!mlx4_is_master(dev) || + !(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_QOS_VPP)) + return false; + + info = &priv->mfunc.master.qos_ctl[port]; + + if (vf_admin->default_vlan != MLX4_VGT && + test_bit(vf_admin->default_qos, info->priority_bm)) + return true; + + return false; +} + +static bool mlx4_valid_vf_state_change(struct mlx4_dev *dev, int port, + struct mlx4_vport_state *vf_admin, + int vlan, int qos) +{ + struct mlx4_vport_state dummy_admin = {0}; + + if (!mlx4_is_vf_vst_and_prio_qos(dev, port, vf_admin) || + !vf_admin->tx_rate) + return true; + + dummy_admin.default_qos = qos; + dummy_admin.default_vlan = vlan; + + /* VF wants to move to other VST state which is valid with current + * rate limit. Either differnt default vlan in VST or other + * supported QoS priority. Otherwise we don't allow this change when + * the TX rate is still configured. + */ + if (mlx4_is_vf_vst_and_prio_qos(dev, port, &dummy_admin)) + return true; + + mlx4_info(dev, "Cannot change VF state to %s while rate is set\n", + (vlan == MLX4_VGT) ? "VGT" : "VST"); + + if (vlan != MLX4_VGT) + mlx4_info(dev, "VST priority %d not supported for QoS\n", qos); + + mlx4_info(dev, "Please set rate to 0 prior to this VF state change\n"); + + return false; +} + +int mlx4_set_vf_mac(struct mlx4_dev *dev, int port, int vf, u8 *mac) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_vport_state *s_info; + int slave; + + if (!mlx4_is_master(dev)) + return -EPROTONOSUPPORT; + + if (is_multicast_ether_addr(mac)) + return -EINVAL; + + slave = mlx4_get_slave_indx(dev, vf); + if (slave < 0) + return -EINVAL; + + port = mlx4_slaves_closest_port(dev, slave, port); + s_info = &priv->mfunc.master.vf_admin[slave].vport[port]; + + if (s_info->spoofchk && is_zero_ether_addr(mac)) { + mlx4_info(dev, "MAC invalidation is not allowed when spoofchk is on\n"); + return -EPERM; + } + + s_info->mac = ether_addr_to_u64(mac); + mlx4_info(dev, "default mac on vf %d port %d to %llX will take effect only after vf restart\n", + vf, port, s_info->mac); + return 0; +} +EXPORT_SYMBOL_GPL(mlx4_set_vf_mac); + + +int mlx4_set_vf_vlan(struct mlx4_dev *dev, int port, int vf, u16 vlan, u8 qos, + __be16 proto) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_vport_state *vf_admin; + struct mlx4_slave_state *slave_state; + struct mlx4_vport_oper_state *vf_oper; + int slave; + + if ((!mlx4_is_master(dev)) || + !(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_VLAN_CONTROL)) + return -EPROTONOSUPPORT; + + if ((vlan > 4095) || (qos > 7)) + return -EINVAL; + + if (proto == htons(ETH_P_8021AD) && + !(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_SVLAN_BY_QP)) + return -EPROTONOSUPPORT; + + if (proto != htons(ETH_P_8021Q) && + proto != htons(ETH_P_8021AD)) + return -EINVAL; + + if ((proto == htons(ETH_P_8021AD)) && + ((vlan == 0) || (vlan == MLX4_VGT))) + return -EINVAL; + + slave = mlx4_get_slave_indx(dev, vf); + if (slave < 0) + return -EINVAL; + + slave_state = &priv->mfunc.master.slave_state[slave]; + if ((proto == htons(ETH_P_8021AD)) && (slave_state->active) && + (!slave_state->vst_qinq_supported)) { + mlx4_err(dev, "vf %d does not support VST QinQ mode\n", vf); + return -EPROTONOSUPPORT; + } + port = mlx4_slaves_closest_port(dev, slave, port); + vf_admin = &priv->mfunc.master.vf_admin[slave].vport[port]; + vf_oper = &priv->mfunc.master.vf_oper[slave].vport[port]; + + if (!mlx4_valid_vf_state_change(dev, port, vf_admin, vlan, qos)) + return -EPERM; + + if ((0 == vlan) && (0 == qos)) + vf_admin->default_vlan = MLX4_VGT; + else + vf_admin->default_vlan = vlan; + vf_admin->default_qos = qos; + vf_admin->vlan_proto = proto; + + /* If rate was configured prior to VST, we saved the configured rate + * in vf_admin->rate and now, if priority supported we enforce the QoS + */ + if (mlx4_is_vf_vst_and_prio_qos(dev, port, vf_admin) && + vf_admin->tx_rate) + vf_admin->qos_vport = slave; + + /* Try to activate new vf state without restart, + * this option is not supported while moving to VST QinQ mode. + */ + if ((proto == htons(ETH_P_8021AD) && + vf_oper->state.vlan_proto != proto) || + mlx4_master_immediate_activate_vlan_qos(priv, slave, port)) + mlx4_info(dev, + "updating vf %d port %d config will take effect on next VF restart\n", + vf, port); + return 0; +} +EXPORT_SYMBOL_GPL(mlx4_set_vf_vlan); + +int mlx4_set_vf_rate(struct mlx4_dev *dev, int port, int vf, int min_tx_rate, + int max_tx_rate) +{ + int err; + int slave; + struct mlx4_vport_state *vf_admin; + struct mlx4_priv *priv = mlx4_priv(dev); + + if (!mlx4_is_master(dev) || + !(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_QOS_VPP)) + return -EPROTONOSUPPORT; + + if (min_tx_rate) { + mlx4_info(dev, "Minimum BW share not supported\n"); + return -EPROTONOSUPPORT; + } + + slave = mlx4_get_slave_indx(dev, vf); + if (slave < 0) + return -EINVAL; + + port = mlx4_slaves_closest_port(dev, slave, port); + vf_admin = &priv->mfunc.master.vf_admin[slave].vport[port]; + + err = mlx4_set_vport_qos(priv, slave, port, max_tx_rate); + if (err) { + mlx4_info(dev, "vf %d failed to set rate %d\n", vf, + max_tx_rate); + return err; + } + + vf_admin->tx_rate = max_tx_rate; + /* if VF is not in supported mode (VST with supported prio), + * we do not change vport configuration for its QPs, but save + * the rate, so it will be enforced when it moves to supported + * mode next time. + */ + if (!mlx4_is_vf_vst_and_prio_qos(dev, port, vf_admin)) { + mlx4_info(dev, + "rate set for VF %d when not in valid state\n", vf); + + if (vf_admin->default_vlan != MLX4_VGT) + mlx4_info(dev, "VST priority not supported by QoS\n"); + else + mlx4_info(dev, "VF in VGT mode (needed VST)\n"); + + mlx4_info(dev, + "rate %d take affect when VF moves to valid state\n", + max_tx_rate); + return 0; + } + + /* If user sets rate 0 assigning default vport for its QPs */ + vf_admin->qos_vport = max_tx_rate ? slave : MLX4_VPP_DEFAULT_VPORT; + + if (priv->mfunc.master.slave_state[slave].active && + dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_UPDATE_QP) + mlx4_master_immediate_activate_vlan_qos(priv, slave, port); + + return 0; +} +EXPORT_SYMBOL_GPL(mlx4_set_vf_rate); + + /* mlx4_get_slave_default_vlan - + * return true if VST ( default vlan) + * if VST, will return vlan & qos (if not NULL) + */ +bool mlx4_get_slave_default_vlan(struct mlx4_dev *dev, int port, int slave, + u16 *vlan, u8 *qos) +{ + struct mlx4_vport_oper_state *vp_oper; + struct mlx4_priv *priv; + + priv = mlx4_priv(dev); + port = mlx4_slaves_closest_port(dev, slave, port); + vp_oper = &priv->mfunc.master.vf_oper[slave].vport[port]; + + if (MLX4_VGT != vp_oper->state.default_vlan) { + if (vlan) + *vlan = vp_oper->state.default_vlan; + if (qos) + *qos = vp_oper->state.default_qos; + return true; + } + return false; +} +EXPORT_SYMBOL_GPL(mlx4_get_slave_default_vlan); + +int mlx4_set_vf_spoofchk(struct mlx4_dev *dev, int port, int vf, bool setting) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_vport_state *s_info; + int slave; + u8 mac[ETH_ALEN]; + + if ((!mlx4_is_master(dev)) || + !(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_FSM)) + return -EPROTONOSUPPORT; + + slave = mlx4_get_slave_indx(dev, vf); + if (slave < 0) + return -EINVAL; + + port = mlx4_slaves_closest_port(dev, slave, port); + s_info = &priv->mfunc.master.vf_admin[slave].vport[port]; + + u64_to_ether_addr(s_info->mac, mac); + if (setting && !is_valid_ether_addr(mac)) { + mlx4_info(dev, "Illegal MAC with spoofchk\n"); + return -EPERM; + } + + s_info->spoofchk = setting; + + return 0; +} +EXPORT_SYMBOL_GPL(mlx4_set_vf_spoofchk); + +int mlx4_get_vf_config(struct mlx4_dev *dev, int port, int vf, struct ifla_vf_info *ivf) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_vport_state *s_info; + int slave; + + if (!mlx4_is_master(dev)) + return -EPROTONOSUPPORT; + + slave = mlx4_get_slave_indx(dev, vf); + if (slave < 0) + return -EINVAL; + + s_info = &priv->mfunc.master.vf_admin[slave].vport[port]; + ivf->vf = vf; + + /* need to convert it to a func */ + ivf->mac[0] = ((s_info->mac >> (5*8)) & 0xff); + ivf->mac[1] = ((s_info->mac >> (4*8)) & 0xff); + ivf->mac[2] = ((s_info->mac >> (3*8)) & 0xff); + ivf->mac[3] = ((s_info->mac >> (2*8)) & 0xff); + ivf->mac[4] = ((s_info->mac >> (1*8)) & 0xff); + ivf->mac[5] = ((s_info->mac) & 0xff); + + ivf->vlan = s_info->default_vlan; + ivf->qos = s_info->default_qos; + ivf->vlan_proto = s_info->vlan_proto; + + if (mlx4_is_vf_vst_and_prio_qos(dev, port, s_info)) + ivf->max_tx_rate = s_info->tx_rate; + else + ivf->max_tx_rate = 0; + + ivf->min_tx_rate = 0; + ivf->spoofchk = s_info->spoofchk; + ivf->linkstate = s_info->link_state; + + return 0; +} +EXPORT_SYMBOL_GPL(mlx4_get_vf_config); + +int mlx4_set_vf_link_state(struct mlx4_dev *dev, int port, int vf, int link_state) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_vport_state *s_info; + int slave; + u8 link_stat_event; + + slave = mlx4_get_slave_indx(dev, vf); + if (slave < 0) + return -EINVAL; + + port = mlx4_slaves_closest_port(dev, slave, port); + switch (link_state) { + case IFLA_VF_LINK_STATE_AUTO: + /* get current link state */ + if (!priv->sense.do_sense_port[port]) + link_stat_event = MLX4_PORT_CHANGE_SUBTYPE_ACTIVE; + else + link_stat_event = MLX4_PORT_CHANGE_SUBTYPE_DOWN; + break; + + case IFLA_VF_LINK_STATE_ENABLE: + link_stat_event = MLX4_PORT_CHANGE_SUBTYPE_ACTIVE; + break; + + case IFLA_VF_LINK_STATE_DISABLE: + link_stat_event = MLX4_PORT_CHANGE_SUBTYPE_DOWN; + break; + + default: + mlx4_warn(dev, "unknown value for link_state %02x on slave %d port %d\n", + link_state, slave, port); + return -EINVAL; + } + s_info = &priv->mfunc.master.vf_admin[slave].vport[port]; + s_info->link_state = link_state; + + /* send event */ + mlx4_gen_port_state_change_eqe(dev, slave, port, link_stat_event); + + if (mlx4_master_immediate_activate_vlan_qos(priv, slave, port)) + mlx4_dbg(dev, + "updating vf %d port %d no link state HW enforcement\n", + vf, port); + return 0; +} +EXPORT_SYMBOL_GPL(mlx4_set_vf_link_state); + +int mlx4_get_counter_stats(struct mlx4_dev *dev, int counter_index, + struct mlx4_counter *counter_stats, int reset) +{ + struct mlx4_cmd_mailbox *mailbox = NULL; + struct mlx4_counter *tmp_counter; + int err; + u32 if_stat_in_mod; + + if (!counter_stats) + return -EINVAL; + + if (counter_index == MLX4_SINK_COUNTER_INDEX(dev)) + return 0; + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + + memset(mailbox->buf, 0, sizeof(struct mlx4_counter)); + if_stat_in_mod = counter_index; + if (reset) + if_stat_in_mod |= MLX4_QUERY_IF_STAT_RESET; + err = mlx4_cmd_box(dev, 0, mailbox->dma, + if_stat_in_mod, 0, + MLX4_CMD_QUERY_IF_STAT, + MLX4_CMD_TIME_CLASS_C, + MLX4_CMD_NATIVE); + if (err) { + mlx4_dbg(dev, "%s: failed to read statistics for counter index %d\n", + __func__, counter_index); + goto if_stat_out; + } + tmp_counter = (struct mlx4_counter *)mailbox->buf; + counter_stats->counter_mode = tmp_counter->counter_mode; + if (counter_stats->counter_mode == 0) { + counter_stats->rx_frames = + cpu_to_be64(be64_to_cpu(counter_stats->rx_frames) + + be64_to_cpu(tmp_counter->rx_frames)); + counter_stats->tx_frames = + cpu_to_be64(be64_to_cpu(counter_stats->tx_frames) + + be64_to_cpu(tmp_counter->tx_frames)); + counter_stats->rx_bytes = + cpu_to_be64(be64_to_cpu(counter_stats->rx_bytes) + + be64_to_cpu(tmp_counter->rx_bytes)); + counter_stats->tx_bytes = + cpu_to_be64(be64_to_cpu(counter_stats->tx_bytes) + + be64_to_cpu(tmp_counter->tx_bytes)); + } + +if_stat_out: + mlx4_free_cmd_mailbox(dev, mailbox); + + return err; +} +EXPORT_SYMBOL_GPL(mlx4_get_counter_stats); + +int mlx4_get_vf_stats(struct mlx4_dev *dev, int port, int vf_idx, + struct ifla_vf_stats *vf_stats) +{ + struct mlx4_counter tmp_vf_stats; + int slave; + int err = 0; + + if (!vf_stats) + return -EINVAL; + + if (!mlx4_is_master(dev)) + return -EPROTONOSUPPORT; + + slave = mlx4_get_slave_indx(dev, vf_idx); + if (slave < 0) + return -EINVAL; + + port = mlx4_slaves_closest_port(dev, slave, port); + err = mlx4_calc_vf_counters(dev, slave, port, &tmp_vf_stats); + if (!err && tmp_vf_stats.counter_mode == 0) { + vf_stats->rx_packets = be64_to_cpu(tmp_vf_stats.rx_frames); + vf_stats->tx_packets = be64_to_cpu(tmp_vf_stats.tx_frames); + vf_stats->rx_bytes = be64_to_cpu(tmp_vf_stats.rx_bytes); + vf_stats->tx_bytes = be64_to_cpu(tmp_vf_stats.tx_bytes); + } + + return err; +} +EXPORT_SYMBOL_GPL(mlx4_get_vf_stats); + +int mlx4_vf_smi_enabled(struct mlx4_dev *dev, int slave, int port) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + + if (slave < 1 || slave >= dev->num_slaves || + port < 1 || port > MLX4_MAX_PORTS) + return 0; + + return priv->mfunc.master.vf_oper[slave].smi_enabled[port] == + MLX4_VF_SMI_ENABLED; +} +EXPORT_SYMBOL_GPL(mlx4_vf_smi_enabled); + +int mlx4_vf_get_enable_smi_admin(struct mlx4_dev *dev, int slave, int port) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + + if (slave == mlx4_master_func_num(dev)) + return 1; + + if (slave < 1 || slave >= dev->num_slaves || + port < 1 || port > MLX4_MAX_PORTS) + return 0; + + return priv->mfunc.master.vf_admin[slave].enable_smi[port] == + MLX4_VF_SMI_ENABLED; +} +EXPORT_SYMBOL_GPL(mlx4_vf_get_enable_smi_admin); + +int mlx4_vf_set_enable_smi_admin(struct mlx4_dev *dev, int slave, int port, + int enabled) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_active_ports actv_ports = mlx4_get_active_ports( + &priv->dev, slave); + int min_port = find_first_bit(actv_ports.ports, + priv->dev.caps.num_ports) + 1; + int max_port = min_port - 1 + + bitmap_weight(actv_ports.ports, priv->dev.caps.num_ports); + + if (slave == mlx4_master_func_num(dev)) + return 0; + + if (slave < 1 || slave >= dev->num_slaves || + port < 1 || port > MLX4_MAX_PORTS || + enabled < 0 || enabled > 1) + return -EINVAL; + + if (min_port == max_port && dev->caps.num_ports > 1) { + mlx4_info(dev, "SMI access disallowed for single ported VFs\n"); + return -EPROTONOSUPPORT; + } + + priv->mfunc.master.vf_admin[slave].enable_smi[port] = enabled; + return 0; +} +EXPORT_SYMBOL_GPL(mlx4_vf_set_enable_smi_admin); diff --git a/drivers/net/ethernet/mellanox/mlx4/cq.c b/drivers/net/ethernet/mellanox/mlx4/cq.c new file mode 100644 index 000000000..4d4f9cf9f --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx4/cq.c @@ -0,0 +1,482 @@ +/* + * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2005, 2006, 2007 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2005, 2006, 2007, 2008 Mellanox Technologies. All rights reserved. + * Copyright (c) 2004 Voltaire, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/hardirq.h> +#include <linux/export.h> + +#include <linux/mlx4/cmd.h> +#include <linux/mlx4/cq.h> + +#include "mlx4.h" +#include "icm.h" + +#define MLX4_CQ_STATUS_OK ( 0 << 28) +#define MLX4_CQ_STATUS_OVERFLOW ( 9 << 28) +#define MLX4_CQ_STATUS_WRITE_FAIL (10 << 28) +#define MLX4_CQ_FLAG_CC ( 1 << 18) +#define MLX4_CQ_FLAG_OI ( 1 << 17) +#define MLX4_CQ_STATE_ARMED ( 9 << 8) +#define MLX4_CQ_STATE_ARMED_SOL ( 6 << 8) +#define MLX4_EQ_STATE_FIRED (10 << 8) + +#define TASKLET_MAX_TIME 2 +#define TASKLET_MAX_TIME_JIFFIES msecs_to_jiffies(TASKLET_MAX_TIME) + +void mlx4_cq_tasklet_cb(struct tasklet_struct *t) +{ + unsigned long flags; + unsigned long end = jiffies + TASKLET_MAX_TIME_JIFFIES; + struct mlx4_eq_tasklet *ctx = from_tasklet(ctx, t, task); + struct mlx4_cq *mcq, *temp; + + spin_lock_irqsave(&ctx->lock, flags); + list_splice_tail_init(&ctx->list, &ctx->process_list); + spin_unlock_irqrestore(&ctx->lock, flags); + + list_for_each_entry_safe(mcq, temp, &ctx->process_list, tasklet_ctx.list) { + list_del_init(&mcq->tasklet_ctx.list); + mcq->tasklet_ctx.comp(mcq); + if (refcount_dec_and_test(&mcq->refcount)) + complete(&mcq->free); + if (time_after(jiffies, end)) + break; + } + + if (!list_empty(&ctx->process_list)) + tasklet_schedule(&ctx->task); +} + +static void mlx4_add_cq_to_tasklet(struct mlx4_cq *cq) +{ + struct mlx4_eq_tasklet *tasklet_ctx = cq->tasklet_ctx.priv; + unsigned long flags; + bool kick; + + spin_lock_irqsave(&tasklet_ctx->lock, flags); + /* When migrating CQs between EQs will be implemented, please note + * that you need to sync this point. It is possible that + * while migrating a CQ, completions on the old EQs could + * still arrive. + */ + if (list_empty_careful(&cq->tasklet_ctx.list)) { + refcount_inc(&cq->refcount); + kick = list_empty(&tasklet_ctx->list); + list_add_tail(&cq->tasklet_ctx.list, &tasklet_ctx->list); + if (kick) + tasklet_schedule(&tasklet_ctx->task); + } + spin_unlock_irqrestore(&tasklet_ctx->lock, flags); +} + +void mlx4_cq_completion(struct mlx4_dev *dev, u32 cqn) +{ + struct mlx4_cq *cq; + + rcu_read_lock(); + cq = radix_tree_lookup(&mlx4_priv(dev)->cq_table.tree, + cqn & (dev->caps.num_cqs - 1)); + rcu_read_unlock(); + + if (!cq) { + mlx4_dbg(dev, "Completion event for bogus CQ %08x\n", cqn); + return; + } + + /* Acessing the CQ outside of rcu_read_lock is safe, because + * the CQ is freed only after interrupt handling is completed. + */ + ++cq->arm_sn; + + cq->comp(cq); +} + +void mlx4_cq_event(struct mlx4_dev *dev, u32 cqn, int event_type) +{ + struct mlx4_cq_table *cq_table = &mlx4_priv(dev)->cq_table; + struct mlx4_cq *cq; + + rcu_read_lock(); + cq = radix_tree_lookup(&cq_table->tree, cqn & (dev->caps.num_cqs - 1)); + rcu_read_unlock(); + + if (!cq) { + mlx4_dbg(dev, "Async event for bogus CQ %08x\n", cqn); + return; + } + + /* Acessing the CQ outside of rcu_read_lock is safe, because + * the CQ is freed only after interrupt handling is completed. + */ + cq->event(cq, event_type); +} + +static int mlx4_SW2HW_CQ(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox, + int cq_num, u8 opmod) +{ + return mlx4_cmd(dev, mailbox->dma, cq_num, opmod, + MLX4_CMD_SW2HW_CQ, MLX4_CMD_TIME_CLASS_A, + MLX4_CMD_WRAPPED); +} + +static int mlx4_MODIFY_CQ(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox, + int cq_num, u32 opmod) +{ + return mlx4_cmd(dev, mailbox->dma, cq_num, opmod, MLX4_CMD_MODIFY_CQ, + MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); +} + +static int mlx4_HW2SW_CQ(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox, + int cq_num) +{ + return mlx4_cmd_box(dev, 0, mailbox ? mailbox->dma : 0, + cq_num, mailbox ? 0 : 1, MLX4_CMD_HW2SW_CQ, + MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); +} + +int mlx4_cq_modify(struct mlx4_dev *dev, struct mlx4_cq *cq, + u16 count, u16 period) +{ + struct mlx4_cmd_mailbox *mailbox; + struct mlx4_cq_context *cq_context; + int err; + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + + cq_context = mailbox->buf; + cq_context->cq_max_count = cpu_to_be16(count); + cq_context->cq_period = cpu_to_be16(period); + + err = mlx4_MODIFY_CQ(dev, mailbox, cq->cqn, 1); + + mlx4_free_cmd_mailbox(dev, mailbox); + return err; +} +EXPORT_SYMBOL_GPL(mlx4_cq_modify); + +int mlx4_cq_resize(struct mlx4_dev *dev, struct mlx4_cq *cq, + int entries, struct mlx4_mtt *mtt) +{ + struct mlx4_cmd_mailbox *mailbox; + struct mlx4_cq_context *cq_context; + u64 mtt_addr; + int err; + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + + cq_context = mailbox->buf; + cq_context->logsize_usrpage = cpu_to_be32(ilog2(entries) << 24); + cq_context->log_page_size = mtt->page_shift - 12; + mtt_addr = mlx4_mtt_addr(dev, mtt); + cq_context->mtt_base_addr_h = mtt_addr >> 32; + cq_context->mtt_base_addr_l = cpu_to_be32(mtt_addr & 0xffffffff); + + err = mlx4_MODIFY_CQ(dev, mailbox, cq->cqn, 0); + + mlx4_free_cmd_mailbox(dev, mailbox); + return err; +} +EXPORT_SYMBOL_GPL(mlx4_cq_resize); + +int __mlx4_cq_alloc_icm(struct mlx4_dev *dev, int *cqn) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_cq_table *cq_table = &priv->cq_table; + int err; + + *cqn = mlx4_bitmap_alloc(&cq_table->bitmap); + if (*cqn == -1) + return -ENOMEM; + + err = mlx4_table_get(dev, &cq_table->table, *cqn); + if (err) + goto err_out; + + err = mlx4_table_get(dev, &cq_table->cmpt_table, *cqn); + if (err) + goto err_put; + return 0; + +err_put: + mlx4_table_put(dev, &cq_table->table, *cqn); + +err_out: + mlx4_bitmap_free(&cq_table->bitmap, *cqn, MLX4_NO_RR); + return err; +} + +static int mlx4_cq_alloc_icm(struct mlx4_dev *dev, int *cqn, u8 usage) +{ + u32 in_modifier = RES_CQ | (((u32)usage & 3) << 30); + u64 out_param; + int err; + + if (mlx4_is_mfunc(dev)) { + err = mlx4_cmd_imm(dev, 0, &out_param, in_modifier, + RES_OP_RESERVE_AND_MAP, MLX4_CMD_ALLOC_RES, + MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); + if (err) + return err; + else { + *cqn = get_param_l(&out_param); + return 0; + } + } + return __mlx4_cq_alloc_icm(dev, cqn); +} + +void __mlx4_cq_free_icm(struct mlx4_dev *dev, int cqn) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_cq_table *cq_table = &priv->cq_table; + + mlx4_table_put(dev, &cq_table->cmpt_table, cqn); + mlx4_table_put(dev, &cq_table->table, cqn); + mlx4_bitmap_free(&cq_table->bitmap, cqn, MLX4_NO_RR); +} + +static void mlx4_cq_free_icm(struct mlx4_dev *dev, int cqn) +{ + u64 in_param = 0; + int err; + + if (mlx4_is_mfunc(dev)) { + set_param_l(&in_param, cqn); + err = mlx4_cmd(dev, in_param, RES_CQ, RES_OP_RESERVE_AND_MAP, + MLX4_CMD_FREE_RES, + MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); + if (err) + mlx4_warn(dev, "Failed freeing cq:%d\n", cqn); + } else + __mlx4_cq_free_icm(dev, cqn); +} + +static int mlx4_init_user_cqes(void *buf, int entries, int cqe_size) +{ + int entries_per_copy = PAGE_SIZE / cqe_size; + void *init_ents; + int err = 0; + int i; + + init_ents = kmalloc(PAGE_SIZE, GFP_KERNEL); + if (!init_ents) + return -ENOMEM; + + /* Populate a list of CQ entries to reduce the number of + * copy_to_user calls. 0xcc is the initialization value + * required by the FW. + */ + memset(init_ents, 0xcc, PAGE_SIZE); + + if (entries_per_copy < entries) { + for (i = 0; i < entries / entries_per_copy; i++) { + err = copy_to_user((void __user *)buf, init_ents, PAGE_SIZE) ? + -EFAULT : 0; + if (err) + goto out; + + buf += PAGE_SIZE; + } + } else { + err = copy_to_user((void __user *)buf, init_ents, + array_size(entries, cqe_size)) ? + -EFAULT : 0; + } + +out: + kfree(init_ents); + + return err; +} + +static void mlx4_init_kernel_cqes(struct mlx4_buf *buf, + int entries, + int cqe_size) +{ + int i; + + if (buf->nbufs == 1) + memset(buf->direct.buf, 0xcc, entries * cqe_size); + else + for (i = 0; i < buf->npages; i++) + memset(buf->page_list[i].buf, 0xcc, + 1UL << buf->page_shift); +} + +int mlx4_cq_alloc(struct mlx4_dev *dev, int nent, + struct mlx4_mtt *mtt, struct mlx4_uar *uar, u64 db_rec, + struct mlx4_cq *cq, unsigned vector, int collapsed, + int timestamp_en, void *buf_addr, bool user_cq) +{ + bool sw_cq_init = dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_SW_CQ_INIT; + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_cq_table *cq_table = &priv->cq_table; + struct mlx4_cmd_mailbox *mailbox; + struct mlx4_cq_context *cq_context; + u64 mtt_addr; + int err; + + if (vector >= dev->caps.num_comp_vectors) + return -EINVAL; + + cq->vector = vector; + + err = mlx4_cq_alloc_icm(dev, &cq->cqn, cq->usage); + if (err) + return err; + + spin_lock(&cq_table->lock); + err = radix_tree_insert(&cq_table->tree, cq->cqn, cq); + spin_unlock(&cq_table->lock); + if (err) + goto err_icm; + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) { + err = PTR_ERR(mailbox); + goto err_radix; + } + + cq_context = mailbox->buf; + cq_context->flags = cpu_to_be32(!!collapsed << 18); + if (timestamp_en) + cq_context->flags |= cpu_to_be32(1 << 19); + + cq_context->logsize_usrpage = + cpu_to_be32((ilog2(nent) << 24) | + mlx4_to_hw_uar_index(dev, uar->index)); + cq_context->comp_eqn = priv->eq_table.eq[MLX4_CQ_TO_EQ_VECTOR(vector)].eqn; + cq_context->log_page_size = mtt->page_shift - MLX4_ICM_PAGE_SHIFT; + + mtt_addr = mlx4_mtt_addr(dev, mtt); + cq_context->mtt_base_addr_h = mtt_addr >> 32; + cq_context->mtt_base_addr_l = cpu_to_be32(mtt_addr & 0xffffffff); + cq_context->db_rec_addr = cpu_to_be64(db_rec); + + if (sw_cq_init) { + if (user_cq) { + err = mlx4_init_user_cqes(buf_addr, nent, + dev->caps.cqe_size); + if (err) + sw_cq_init = false; + } else { + mlx4_init_kernel_cqes(buf_addr, nent, + dev->caps.cqe_size); + } + } + + err = mlx4_SW2HW_CQ(dev, mailbox, cq->cqn, sw_cq_init); + + mlx4_free_cmd_mailbox(dev, mailbox); + if (err) + goto err_radix; + + cq->cons_index = 0; + cq->arm_sn = 1; + cq->uar = uar; + refcount_set(&cq->refcount, 1); + init_completion(&cq->free); + cq->comp = mlx4_add_cq_to_tasklet; + cq->tasklet_ctx.priv = + &priv->eq_table.eq[MLX4_CQ_TO_EQ_VECTOR(vector)].tasklet_ctx; + INIT_LIST_HEAD(&cq->tasklet_ctx.list); + + + cq->irq = priv->eq_table.eq[MLX4_CQ_TO_EQ_VECTOR(vector)].irq; + return 0; + +err_radix: + spin_lock(&cq_table->lock); + radix_tree_delete(&cq_table->tree, cq->cqn); + spin_unlock(&cq_table->lock); + +err_icm: + mlx4_cq_free_icm(dev, cq->cqn); + + return err; +} +EXPORT_SYMBOL_GPL(mlx4_cq_alloc); + +void mlx4_cq_free(struct mlx4_dev *dev, struct mlx4_cq *cq) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_cq_table *cq_table = &priv->cq_table; + int err; + + err = mlx4_HW2SW_CQ(dev, NULL, cq->cqn); + if (err) + mlx4_warn(dev, "HW2SW_CQ failed (%d) for CQN %06x\n", err, cq->cqn); + + spin_lock(&cq_table->lock); + radix_tree_delete(&cq_table->tree, cq->cqn); + spin_unlock(&cq_table->lock); + + synchronize_irq(priv->eq_table.eq[MLX4_CQ_TO_EQ_VECTOR(cq->vector)].irq); + if (priv->eq_table.eq[MLX4_CQ_TO_EQ_VECTOR(cq->vector)].irq != + priv->eq_table.eq[MLX4_EQ_ASYNC].irq) + synchronize_irq(priv->eq_table.eq[MLX4_EQ_ASYNC].irq); + + if (refcount_dec_and_test(&cq->refcount)) + complete(&cq->free); + wait_for_completion(&cq->free); + + mlx4_cq_free_icm(dev, cq->cqn); +} +EXPORT_SYMBOL_GPL(mlx4_cq_free); + +int mlx4_init_cq_table(struct mlx4_dev *dev) +{ + struct mlx4_cq_table *cq_table = &mlx4_priv(dev)->cq_table; + + spin_lock_init(&cq_table->lock); + INIT_RADIX_TREE(&cq_table->tree, GFP_ATOMIC); + if (mlx4_is_slave(dev)) + return 0; + + return mlx4_bitmap_init(&cq_table->bitmap, dev->caps.num_cqs, + dev->caps.num_cqs - 1, dev->caps.reserved_cqs, 0); +} + +void mlx4_cleanup_cq_table(struct mlx4_dev *dev) +{ + if (mlx4_is_slave(dev)) + return; + /* Nothing to do to clean up radix_tree */ + mlx4_bitmap_cleanup(&mlx4_priv(dev)->cq_table.bitmap); +} diff --git a/drivers/net/ethernet/mellanox/mlx4/crdump.c b/drivers/net/ethernet/mellanox/mlx4/crdump.c new file mode 100644 index 000000000..82a07a31c --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx4/crdump.c @@ -0,0 +1,258 @@ +/* + * Copyright (c) 2018, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "mlx4.h" + +#define BAD_ACCESS 0xBADACCE5 +#define HEALTH_BUFFER_SIZE 0x40 +#define CR_ENABLE_BIT swab32(BIT(6)) +#define CR_ENABLE_BIT_OFFSET 0xF3F04 +#define MAX_NUM_OF_DUMPS_TO_STORE (8) + +#define REGION_CR_SPACE "cr-space" +#define REGION_FW_HEALTH "fw-health" + +static const char * const region_cr_space_str = REGION_CR_SPACE; +static const char * const region_fw_health_str = REGION_FW_HEALTH; + +static const struct devlink_region_ops region_cr_space_ops = { + .name = REGION_CR_SPACE, + .destructor = &kvfree, +}; + +static const struct devlink_region_ops region_fw_health_ops = { + .name = REGION_FW_HEALTH, + .destructor = &kvfree, +}; + +/* Set to true in case cr enable bit was set to true before crdump */ +static bool crdump_enbale_bit_set; + +static void crdump_enable_crspace_access(struct mlx4_dev *dev, + u8 __iomem *cr_space) +{ + /* Get current enable bit value */ + crdump_enbale_bit_set = + readl(cr_space + CR_ENABLE_BIT_OFFSET) & CR_ENABLE_BIT; + + /* Enable FW CR filter (set bit6 to 0) */ + if (crdump_enbale_bit_set) + writel(readl(cr_space + CR_ENABLE_BIT_OFFSET) & ~CR_ENABLE_BIT, + cr_space + CR_ENABLE_BIT_OFFSET); + + /* Enable block volatile crspace accesses */ + writel(swab32(1), cr_space + dev->caps.health_buffer_addrs + + HEALTH_BUFFER_SIZE); +} + +static void crdump_disable_crspace_access(struct mlx4_dev *dev, + u8 __iomem *cr_space) +{ + /* Disable block volatile crspace accesses */ + writel(0, cr_space + dev->caps.health_buffer_addrs + + HEALTH_BUFFER_SIZE); + + /* Restore FW CR filter value (set bit6 to original value) */ + if (crdump_enbale_bit_set) + writel(readl(cr_space + CR_ENABLE_BIT_OFFSET) | CR_ENABLE_BIT, + cr_space + CR_ENABLE_BIT_OFFSET); +} + +static void mlx4_crdump_collect_crspace(struct mlx4_dev *dev, + u8 __iomem *cr_space, + u32 id) +{ + struct mlx4_fw_crdump *crdump = &dev->persist->crdump; + struct pci_dev *pdev = dev->persist->pdev; + unsigned long cr_res_size; + u8 *crspace_data; + int offset; + int err; + + if (!crdump->region_crspace) { + mlx4_err(dev, "crdump: cr-space region is NULL\n"); + return; + } + + /* Try to collect CR space */ + cr_res_size = pci_resource_len(pdev, 0); + crspace_data = kvmalloc(cr_res_size, GFP_KERNEL); + if (crspace_data) { + for (offset = 0; offset < cr_res_size; offset += 4) + *(u32 *)(crspace_data + offset) = + readl(cr_space + offset); + + err = devlink_region_snapshot_create(crdump->region_crspace, + crspace_data, id); + if (err) { + kvfree(crspace_data); + mlx4_warn(dev, "crdump: devlink create %s snapshot id %d err %d\n", + region_cr_space_str, id, err); + } else { + mlx4_info(dev, "crdump: added snapshot %d to devlink region %s\n", + id, region_cr_space_str); + } + } else { + mlx4_err(dev, "crdump: Failed to allocate crspace buffer\n"); + } +} + +static void mlx4_crdump_collect_fw_health(struct mlx4_dev *dev, + u8 __iomem *cr_space, + u32 id) +{ + struct mlx4_fw_crdump *crdump = &dev->persist->crdump; + u8 *health_data; + int offset; + int err; + + if (!crdump->region_fw_health) { + mlx4_err(dev, "crdump: fw-health region is NULL\n"); + return; + } + + /* Try to collect health buffer */ + health_data = kvmalloc(HEALTH_BUFFER_SIZE, GFP_KERNEL); + if (health_data) { + u8 __iomem *health_buf_start = + cr_space + dev->caps.health_buffer_addrs; + + for (offset = 0; offset < HEALTH_BUFFER_SIZE; offset += 4) + *(u32 *)(health_data + offset) = + readl(health_buf_start + offset); + + err = devlink_region_snapshot_create(crdump->region_fw_health, + health_data, id); + if (err) { + kvfree(health_data); + mlx4_warn(dev, "crdump: devlink create %s snapshot id %d err %d\n", + region_fw_health_str, id, err); + } else { + mlx4_info(dev, "crdump: added snapshot %d to devlink region %s\n", + id, region_fw_health_str); + } + } else { + mlx4_err(dev, "crdump: Failed to allocate health buffer\n"); + } +} + +int mlx4_crdump_collect(struct mlx4_dev *dev) +{ + struct devlink *devlink = priv_to_devlink(mlx4_priv(dev)); + struct mlx4_fw_crdump *crdump = &dev->persist->crdump; + struct pci_dev *pdev = dev->persist->pdev; + unsigned long cr_res_size; + u8 __iomem *cr_space; + int err; + u32 id; + + if (!dev->caps.health_buffer_addrs) { + mlx4_info(dev, "crdump: FW doesn't support health buffer access, skipping\n"); + return 0; + } + + if (!crdump->snapshot_enable) { + mlx4_info(dev, "crdump: devlink snapshot disabled, skipping\n"); + return 0; + } + + cr_res_size = pci_resource_len(pdev, 0); + + cr_space = ioremap(pci_resource_start(pdev, 0), cr_res_size); + if (!cr_space) { + mlx4_err(dev, "crdump: Failed to map pci cr region\n"); + return -ENODEV; + } + + /* Get the available snapshot ID for the dumps */ + err = devlink_region_snapshot_id_get(devlink, &id); + if (err) { + mlx4_err(dev, "crdump: devlink get snapshot id err %d\n", err); + iounmap(cr_space); + return err; + } + + crdump_enable_crspace_access(dev, cr_space); + + /* Try to capture dumps */ + mlx4_crdump_collect_crspace(dev, cr_space, id); + mlx4_crdump_collect_fw_health(dev, cr_space, id); + + /* Release reference on the snapshot id */ + devlink_region_snapshot_id_put(devlink, id); + + crdump_disable_crspace_access(dev, cr_space); + + iounmap(cr_space); + return 0; +} + +int mlx4_crdump_init(struct mlx4_dev *dev) +{ + struct devlink *devlink = priv_to_devlink(mlx4_priv(dev)); + struct mlx4_fw_crdump *crdump = &dev->persist->crdump; + struct pci_dev *pdev = dev->persist->pdev; + + crdump->snapshot_enable = false; + + /* Create cr-space region */ + crdump->region_crspace = + devl_region_create(devlink, + ®ion_cr_space_ops, + MAX_NUM_OF_DUMPS_TO_STORE, + pci_resource_len(pdev, 0)); + if (IS_ERR(crdump->region_crspace)) + mlx4_warn(dev, "crdump: create devlink region %s err %ld\n", + region_cr_space_str, + PTR_ERR(crdump->region_crspace)); + + /* Create fw-health region */ + crdump->region_fw_health = + devl_region_create(devlink, + ®ion_fw_health_ops, + MAX_NUM_OF_DUMPS_TO_STORE, + HEALTH_BUFFER_SIZE); + if (IS_ERR(crdump->region_fw_health)) + mlx4_warn(dev, "crdump: create devlink region %s err %ld\n", + region_fw_health_str, + PTR_ERR(crdump->region_fw_health)); + + return 0; +} + +void mlx4_crdump_end(struct mlx4_dev *dev) +{ + struct mlx4_fw_crdump *crdump = &dev->persist->crdump; + + devl_region_destroy(crdump->region_fw_health); + devl_region_destroy(crdump->region_crspace); +} diff --git a/drivers/net/ethernet/mellanox/mlx4/en_clock.c b/drivers/net/ethernet/mellanox/mlx4/en_clock.c new file mode 100644 index 000000000..024788549 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx4/en_clock.c @@ -0,0 +1,303 @@ +/* + * Copyright (c) 2012 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#include <linux/mlx4/device.h> +#include <linux/clocksource.h> + +#include "mlx4_en.h" + +/* mlx4_en_read_clock - read raw cycle counter (to be used by time counter) + */ +static u64 mlx4_en_read_clock(const struct cyclecounter *tc) +{ + struct mlx4_en_dev *mdev = + container_of(tc, struct mlx4_en_dev, cycles); + struct mlx4_dev *dev = mdev->dev; + + return mlx4_read_clock(dev) & tc->mask; +} + +u64 mlx4_en_get_cqe_ts(struct mlx4_cqe *cqe) +{ + u64 hi, lo; + struct mlx4_ts_cqe *ts_cqe = (struct mlx4_ts_cqe *)cqe; + + lo = (u64)be16_to_cpu(ts_cqe->timestamp_lo); + hi = ((u64)be32_to_cpu(ts_cqe->timestamp_hi) + !lo) << 16; + + return hi | lo; +} + +void mlx4_en_fill_hwtstamps(struct mlx4_en_dev *mdev, + struct skb_shared_hwtstamps *hwts, + u64 timestamp) +{ + unsigned int seq; + u64 nsec; + + do { + seq = read_seqbegin(&mdev->clock_lock); + nsec = timecounter_cyc2time(&mdev->clock, timestamp); + } while (read_seqretry(&mdev->clock_lock, seq)); + + memset(hwts, 0, sizeof(struct skb_shared_hwtstamps)); + hwts->hwtstamp = ns_to_ktime(nsec); +} + +/** + * mlx4_en_remove_timestamp - disable PTP device + * @mdev: board private structure + * + * Stop the PTP support. + **/ +void mlx4_en_remove_timestamp(struct mlx4_en_dev *mdev) +{ + if (mdev->ptp_clock) { + ptp_clock_unregister(mdev->ptp_clock); + mdev->ptp_clock = NULL; + mlx4_info(mdev, "removed PHC\n"); + } +} + +#define MLX4_EN_WRAP_AROUND_SEC 10UL +/* By scheduling the overflow check every 5 seconds, we have a reasonably + * good chance we wont miss a wrap around. + * TOTO: Use a timer instead of a work queue to increase the guarantee. + */ +#define MLX4_EN_OVERFLOW_PERIOD (MLX4_EN_WRAP_AROUND_SEC * HZ / 2) + +void mlx4_en_ptp_overflow_check(struct mlx4_en_dev *mdev) +{ + bool timeout = time_is_before_jiffies(mdev->last_overflow_check + + MLX4_EN_OVERFLOW_PERIOD); + unsigned long flags; + + if (timeout) { + write_seqlock_irqsave(&mdev->clock_lock, flags); + timecounter_read(&mdev->clock); + write_sequnlock_irqrestore(&mdev->clock_lock, flags); + mdev->last_overflow_check = jiffies; + } +} + +/** + * mlx4_en_phc_adjfreq - adjust the frequency of the hardware clock + * @ptp: ptp clock structure + * @delta: Desired frequency change in parts per billion + * + * Adjust the frequency of the PHC cycle counter by the indicated delta from + * the base frequency. + **/ +static int mlx4_en_phc_adjfreq(struct ptp_clock_info *ptp, s32 delta) +{ + u64 adj; + u32 diff, mult; + int neg_adj = 0; + unsigned long flags; + struct mlx4_en_dev *mdev = container_of(ptp, struct mlx4_en_dev, + ptp_clock_info); + + if (delta < 0) { + neg_adj = 1; + delta = -delta; + } + mult = mdev->nominal_c_mult; + adj = mult; + adj *= delta; + diff = div_u64(adj, 1000000000ULL); + + write_seqlock_irqsave(&mdev->clock_lock, flags); + timecounter_read(&mdev->clock); + mdev->cycles.mult = neg_adj ? mult - diff : mult + diff; + write_sequnlock_irqrestore(&mdev->clock_lock, flags); + + return 0; +} + +/** + * mlx4_en_phc_adjtime - Shift the time of the hardware clock + * @ptp: ptp clock structure + * @delta: Desired change in nanoseconds + * + * Adjust the timer by resetting the timecounter structure. + **/ +static int mlx4_en_phc_adjtime(struct ptp_clock_info *ptp, s64 delta) +{ + struct mlx4_en_dev *mdev = container_of(ptp, struct mlx4_en_dev, + ptp_clock_info); + unsigned long flags; + + write_seqlock_irqsave(&mdev->clock_lock, flags); + timecounter_adjtime(&mdev->clock, delta); + write_sequnlock_irqrestore(&mdev->clock_lock, flags); + + return 0; +} + +/** + * mlx4_en_phc_gettime - Reads the current time from the hardware clock + * @ptp: ptp clock structure + * @ts: timespec structure to hold the current time value + * + * Read the timecounter and return the correct value in ns after converting + * it into a struct timespec. + **/ +static int mlx4_en_phc_gettime(struct ptp_clock_info *ptp, + struct timespec64 *ts) +{ + struct mlx4_en_dev *mdev = container_of(ptp, struct mlx4_en_dev, + ptp_clock_info); + unsigned long flags; + u64 ns; + + write_seqlock_irqsave(&mdev->clock_lock, flags); + ns = timecounter_read(&mdev->clock); + write_sequnlock_irqrestore(&mdev->clock_lock, flags); + + *ts = ns_to_timespec64(ns); + + return 0; +} + +/** + * mlx4_en_phc_settime - Set the current time on the hardware clock + * @ptp: ptp clock structure + * @ts: timespec containing the new time for the cycle counter + * + * Reset the timecounter to use a new base value instead of the kernel + * wall timer value. + **/ +static int mlx4_en_phc_settime(struct ptp_clock_info *ptp, + const struct timespec64 *ts) +{ + struct mlx4_en_dev *mdev = container_of(ptp, struct mlx4_en_dev, + ptp_clock_info); + u64 ns = timespec64_to_ns(ts); + unsigned long flags; + + /* reset the timecounter */ + write_seqlock_irqsave(&mdev->clock_lock, flags); + timecounter_init(&mdev->clock, &mdev->cycles, ns); + write_sequnlock_irqrestore(&mdev->clock_lock, flags); + + return 0; +} + +/** + * mlx4_en_phc_enable - enable or disable an ancillary feature + * @ptp: ptp clock structure + * @request: Desired resource to enable or disable + * @on: Caller passes one to enable or zero to disable + * + * Enable (or disable) ancillary features of the PHC subsystem. + * Currently, no ancillary features are supported. + **/ +static int mlx4_en_phc_enable(struct ptp_clock_info __always_unused *ptp, + struct ptp_clock_request __always_unused *request, + int __always_unused on) +{ + return -EOPNOTSUPP; +} + +static const struct ptp_clock_info mlx4_en_ptp_clock_info = { + .owner = THIS_MODULE, + .max_adj = 100000000, + .n_alarm = 0, + .n_ext_ts = 0, + .n_per_out = 0, + .n_pins = 0, + .pps = 0, + .adjfreq = mlx4_en_phc_adjfreq, + .adjtime = mlx4_en_phc_adjtime, + .gettime64 = mlx4_en_phc_gettime, + .settime64 = mlx4_en_phc_settime, + .enable = mlx4_en_phc_enable, +}; + + +/* This function calculates the max shift that enables the user range + * of MLX4_EN_WRAP_AROUND_SEC values in the cycles register. + */ +static u32 freq_to_shift(u16 freq) +{ + u32 freq_khz = freq * 1000; + u64 max_val_cycles = freq_khz * 1000 * MLX4_EN_WRAP_AROUND_SEC; + u64 max_val_cycles_rounded = 1ULL << fls64(max_val_cycles - 1); + /* calculate max possible multiplier in order to fit in 64bit */ + u64 max_mul = div64_u64(ULLONG_MAX, max_val_cycles_rounded); + + /* This comes from the reverse of clocksource_khz2mult */ + return ilog2(div_u64(max_mul * freq_khz, 1000000)); +} + +void mlx4_en_init_timestamp(struct mlx4_en_dev *mdev) +{ + struct mlx4_dev *dev = mdev->dev; + unsigned long flags; + + /* mlx4_en_init_timestamp is called for each netdev. + * mdev->ptp_clock is common for all ports, skip initialization if + * was done for other port. + */ + if (mdev->ptp_clock) + return; + + seqlock_init(&mdev->clock_lock); + + memset(&mdev->cycles, 0, sizeof(mdev->cycles)); + mdev->cycles.read = mlx4_en_read_clock; + mdev->cycles.mask = CLOCKSOURCE_MASK(48); + mdev->cycles.shift = freq_to_shift(dev->caps.hca_core_clock); + mdev->cycles.mult = + clocksource_khz2mult(1000 * dev->caps.hca_core_clock, mdev->cycles.shift); + mdev->nominal_c_mult = mdev->cycles.mult; + + write_seqlock_irqsave(&mdev->clock_lock, flags); + timecounter_init(&mdev->clock, &mdev->cycles, + ktime_to_ns(ktime_get_real())); + write_sequnlock_irqrestore(&mdev->clock_lock, flags); + + /* Configure the PHC */ + mdev->ptp_clock_info = mlx4_en_ptp_clock_info; + snprintf(mdev->ptp_clock_info.name, 16, "mlx4 ptp"); + + mdev->ptp_clock = ptp_clock_register(&mdev->ptp_clock_info, + &mdev->pdev->dev); + if (IS_ERR(mdev->ptp_clock)) { + mdev->ptp_clock = NULL; + mlx4_err(mdev, "ptp_clock_register failed\n"); + } else if (mdev->ptp_clock) { + mlx4_info(mdev, "registered PHC clock\n"); + } + +} diff --git a/drivers/net/ethernet/mellanox/mlx4/en_cq.c b/drivers/net/ethernet/mellanox/mlx4/en_cq.c new file mode 100644 index 000000000..1184ac575 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx4/en_cq.c @@ -0,0 +1,212 @@ +/* + * Copyright (c) 2007 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#include <linux/mlx4/cq.h> +#include <linux/mlx4/qp.h> +#include <linux/mlx4/cmd.h> + +#include "mlx4_en.h" + +static void mlx4_en_cq_event(struct mlx4_cq *cq, enum mlx4_event event) +{ + return; +} + + +int mlx4_en_create_cq(struct mlx4_en_priv *priv, + struct mlx4_en_cq **pcq, + int entries, int ring, enum cq_type mode, + int node) +{ + struct mlx4_en_dev *mdev = priv->mdev; + struct mlx4_en_cq *cq; + int err; + + cq = kzalloc_node(sizeof(*cq), GFP_KERNEL, node); + if (!cq) { + en_err(priv, "Failed to allocate CQ structure\n"); + return -ENOMEM; + } + + cq->size = entries; + cq->buf_size = cq->size * mdev->dev->caps.cqe_size; + + cq->ring = ring; + cq->type = mode; + cq->vector = mdev->dev->caps.num_comp_vectors; + + /* Allocate HW buffers on provided NUMA node. + * dev->numa_node is used in mtt range allocation flow. + */ + set_dev_node(&mdev->dev->persist->pdev->dev, node); + err = mlx4_alloc_hwq_res(mdev->dev, &cq->wqres, + cq->buf_size); + set_dev_node(&mdev->dev->persist->pdev->dev, mdev->dev->numa_node); + if (err) + goto err_cq; + + cq->buf = (struct mlx4_cqe *)cq->wqres.buf.direct.buf; + *pcq = cq; + + return 0; + +err_cq: + kfree(cq); + *pcq = NULL; + return err; +} + +int mlx4_en_activate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq, + int cq_idx) +{ + struct mlx4_en_dev *mdev = priv->mdev; + int irq, err = 0; + int timestamp_en = 0; + bool assigned_eq = false; + + cq->dev = mdev->pndev[priv->port]; + cq->mcq.set_ci_db = cq->wqres.db.db; + cq->mcq.arm_db = cq->wqres.db.db + 1; + *cq->mcq.set_ci_db = 0; + *cq->mcq.arm_db = 0; + memset(cq->buf, 0, cq->buf_size); + + if (cq->type == RX) { + if (!mlx4_is_eq_vector_valid(mdev->dev, priv->port, + cq->vector)) { + cq->vector = cpumask_first(priv->rx_ring[cq->ring]->affinity_mask); + + err = mlx4_assign_eq(mdev->dev, priv->port, + &cq->vector); + if (err) { + mlx4_err(mdev, "Failed assigning an EQ to CQ vector %d\n", + cq->vector); + goto free_eq; + } + + assigned_eq = true; + } + irq = mlx4_eq_get_irq(mdev->dev, cq->vector); + cq->aff_mask = irq_get_effective_affinity_mask(irq); + } else { + /* For TX we use the same irq per + ring we assigned for the RX */ + struct mlx4_en_cq *rx_cq; + + cq_idx = cq_idx % priv->rx_ring_num; + rx_cq = priv->rx_cq[cq_idx]; + cq->vector = rx_cq->vector; + } + + if (cq->type == RX) + cq->size = priv->rx_ring[cq->ring]->actual_size; + + if ((cq->type != RX && priv->hwtstamp_config.tx_type) || + (cq->type == RX && priv->hwtstamp_config.rx_filter)) + timestamp_en = 1; + + cq->mcq.usage = MLX4_RES_USAGE_DRIVER; + err = mlx4_cq_alloc(mdev->dev, cq->size, &cq->wqres.mtt, + &mdev->priv_uar, cq->wqres.db.dma, &cq->mcq, + cq->vector, 0, timestamp_en, &cq->wqres.buf, false); + if (err) + goto free_eq; + + cq->mcq.event = mlx4_en_cq_event; + + switch (cq->type) { + case TX: + cq->mcq.comp = mlx4_en_tx_irq; + netif_napi_add_tx(cq->dev, &cq->napi, mlx4_en_poll_tx_cq); + napi_enable(&cq->napi); + break; + case RX: + cq->mcq.comp = mlx4_en_rx_irq; + netif_napi_add(cq->dev, &cq->napi, mlx4_en_poll_rx_cq); + napi_enable(&cq->napi); + break; + case TX_XDP: + /* nothing regarding napi, it's shared with rx ring */ + cq->xdp_busy = false; + break; + } + + return 0; + +free_eq: + if (assigned_eq) + mlx4_release_eq(mdev->dev, cq->vector); + cq->vector = mdev->dev->caps.num_comp_vectors; + return err; +} + +void mlx4_en_destroy_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq **pcq) +{ + struct mlx4_en_dev *mdev = priv->mdev; + struct mlx4_en_cq *cq = *pcq; + + mlx4_free_hwq_res(mdev->dev, &cq->wqres, cq->buf_size); + if (mlx4_is_eq_vector_valid(mdev->dev, priv->port, cq->vector) && + cq->type == RX) + mlx4_release_eq(priv->mdev->dev, cq->vector); + cq->vector = 0; + cq->buf_size = 0; + cq->buf = NULL; + kfree(cq); + *pcq = NULL; +} + +void mlx4_en_deactivate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq) +{ + if (cq->type != TX_XDP) { + napi_disable(&cq->napi); + netif_napi_del(&cq->napi); + } + + mlx4_cq_free(priv->mdev->dev, &cq->mcq); +} + +/* Set rx cq moderation parameters */ +int mlx4_en_set_cq_moder(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq) +{ + return mlx4_cq_modify(priv->mdev->dev, &cq->mcq, + cq->moder_cnt, cq->moder_time); +} + +void mlx4_en_arm_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq) +{ + mlx4_cq_arm(&cq->mcq, MLX4_CQ_DB_REQ_NOT, priv->mdev->uar_map, + &priv->mdev->uar_lock); +} + + diff --git a/drivers/net/ethernet/mellanox/mlx4/en_dcb_nl.c b/drivers/net/ethernet/mellanox/mlx4/en_dcb_nl.c new file mode 100644 index 000000000..752a72499 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx4/en_dcb_nl.c @@ -0,0 +1,753 @@ +/* + * Copyright (c) 2011 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#include <linux/dcbnl.h> +#include <linux/math64.h> + +#include "mlx4_en.h" +#include "fw_qos.h" + +enum { + MLX4_CEE_STATE_DOWN = 0, + MLX4_CEE_STATE_UP = 1, +}; + +/* Definitions for QCN + */ + +struct mlx4_congestion_control_mb_prio_802_1_qau_params { + __be32 modify_enable_high; + __be32 modify_enable_low; + __be32 reserved1; + __be32 extended_enable; + __be32 rppp_max_rps; + __be32 rpg_time_reset; + __be32 rpg_byte_reset; + __be32 rpg_threshold; + __be32 rpg_max_rate; + __be32 rpg_ai_rate; + __be32 rpg_hai_rate; + __be32 rpg_gd; + __be32 rpg_min_dec_fac; + __be32 rpg_min_rate; + __be32 max_time_rise; + __be32 max_byte_rise; + __be32 max_qdelta; + __be32 min_qoffset; + __be32 gd_coefficient; + __be32 reserved2[5]; + __be32 cp_sample_base; + __be32 reserved3[39]; +}; + +struct mlx4_congestion_control_mb_prio_802_1_qau_statistics { + __be64 rppp_rp_centiseconds; + __be32 reserved1; + __be32 ignored_cnm; + __be32 rppp_created_rps; + __be32 estimated_total_rate; + __be32 max_active_rate_limiter_index; + __be32 dropped_cnms_busy_fw; + __be32 reserved2; + __be32 cnms_handled_successfully; + __be32 min_total_limiters_rate; + __be32 max_total_limiters_rate; + __be32 reserved3[4]; +}; + +static u8 mlx4_en_dcbnl_getcap(struct net_device *dev, int capid, u8 *cap) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + + switch (capid) { + case DCB_CAP_ATTR_PFC: + *cap = true; + break; + case DCB_CAP_ATTR_DCBX: + *cap = priv->dcbx_cap; + break; + case DCB_CAP_ATTR_PFC_TCS: + *cap = 1 << mlx4_max_tc(priv->mdev->dev); + break; + default: + *cap = false; + break; + } + + return 0; +} + +static u8 mlx4_en_dcbnl_getpfcstate(struct net_device *netdev) +{ + struct mlx4_en_priv *priv = netdev_priv(netdev); + + return priv->cee_config.pfc_state; +} + +static void mlx4_en_dcbnl_setpfcstate(struct net_device *netdev, u8 state) +{ + struct mlx4_en_priv *priv = netdev_priv(netdev); + + priv->cee_config.pfc_state = state; +} + +static void mlx4_en_dcbnl_get_pfc_cfg(struct net_device *netdev, int priority, + u8 *setting) +{ + struct mlx4_en_priv *priv = netdev_priv(netdev); + + *setting = priv->cee_config.dcb_pfc[priority]; +} + +static void mlx4_en_dcbnl_set_pfc_cfg(struct net_device *netdev, int priority, + u8 setting) +{ + struct mlx4_en_priv *priv = netdev_priv(netdev); + + priv->cee_config.dcb_pfc[priority] = setting; + priv->cee_config.pfc_state = true; +} + +static int mlx4_en_dcbnl_getnumtcs(struct net_device *netdev, int tcid, u8 *num) +{ + struct mlx4_en_priv *priv = netdev_priv(netdev); + + if (!(priv->flags & MLX4_EN_FLAG_DCB_ENABLED)) + return -EINVAL; + + if (tcid == DCB_NUMTCS_ATTR_PFC) + *num = mlx4_max_tc(priv->mdev->dev); + else + *num = 0; + + return 0; +} + +static u8 mlx4_en_dcbnl_set_all(struct net_device *netdev) +{ + struct mlx4_en_priv *priv = netdev_priv(netdev); + struct mlx4_en_port_profile *prof = priv->prof; + struct mlx4_en_dev *mdev = priv->mdev; + u8 tx_pause, tx_ppp, rx_pause, rx_ppp; + + if (!(priv->dcbx_cap & DCB_CAP_DCBX_VER_CEE)) + return 1; + + if (priv->cee_config.pfc_state) { + int tc; + rx_ppp = prof->rx_ppp; + tx_ppp = prof->tx_ppp; + + for (tc = 0; tc < CEE_DCBX_MAX_PRIO; tc++) { + u8 tc_mask = 1 << tc; + + switch (priv->cee_config.dcb_pfc[tc]) { + case pfc_disabled: + tx_ppp &= ~tc_mask; + rx_ppp &= ~tc_mask; + break; + case pfc_enabled_full: + tx_ppp |= tc_mask; + rx_ppp |= tc_mask; + break; + case pfc_enabled_tx: + tx_ppp |= tc_mask; + rx_ppp &= ~tc_mask; + break; + case pfc_enabled_rx: + tx_ppp &= ~tc_mask; + rx_ppp |= tc_mask; + break; + default: + break; + } + } + rx_pause = !!(rx_ppp || tx_ppp) ? 0 : prof->rx_pause; + tx_pause = !!(rx_ppp || tx_ppp) ? 0 : prof->tx_pause; + } else { + rx_ppp = 0; + tx_ppp = 0; + rx_pause = prof->rx_pause; + tx_pause = prof->tx_pause; + } + + if (mlx4_SET_PORT_general(mdev->dev, priv->port, + priv->rx_skb_size + ETH_FCS_LEN, + tx_pause, tx_ppp, rx_pause, rx_ppp)) { + en_err(priv, "Failed setting pause params\n"); + return 1; + } + + prof->tx_ppp = tx_ppp; + prof->rx_ppp = rx_ppp; + prof->tx_pause = tx_pause; + prof->rx_pause = rx_pause; + + return 0; +} + +static u8 mlx4_en_dcbnl_get_state(struct net_device *dev) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + + if (priv->flags & MLX4_EN_FLAG_DCB_ENABLED) + return MLX4_CEE_STATE_UP; + + return MLX4_CEE_STATE_DOWN; +} + +static u8 mlx4_en_dcbnl_set_state(struct net_device *dev, u8 state) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + int num_tcs = 0; + + if (!(priv->dcbx_cap & DCB_CAP_DCBX_VER_CEE)) + return 1; + + if (!!(state) == !!(priv->flags & MLX4_EN_FLAG_DCB_ENABLED)) + return 0; + + if (state) { + priv->flags |= MLX4_EN_FLAG_DCB_ENABLED; + num_tcs = IEEE_8021QAZ_MAX_TCS; + } else { + priv->flags &= ~MLX4_EN_FLAG_DCB_ENABLED; + } + + if (mlx4_en_alloc_tx_queue_per_tc(dev, num_tcs)) + return 1; + + return 0; +} + +/* On success returns a non-zero 802.1p user priority bitmap + * otherwise returns 0 as the invalid user priority bitmap to + * indicate an error. + */ +static int mlx4_en_dcbnl_getapp(struct net_device *netdev, u8 idtype, u16 id) +{ + struct mlx4_en_priv *priv = netdev_priv(netdev); + struct dcb_app app = { + .selector = idtype, + .protocol = id, + }; + if (!(priv->dcbx_cap & DCB_CAP_DCBX_VER_CEE)) + return 0; + + return dcb_getapp(netdev, &app); +} + +static int mlx4_en_dcbnl_setapp(struct net_device *netdev, u8 idtype, + u16 id, u8 up) +{ + struct mlx4_en_priv *priv = netdev_priv(netdev); + struct dcb_app app; + + if (!(priv->dcbx_cap & DCB_CAP_DCBX_VER_CEE)) + return -EINVAL; + + memset(&app, 0, sizeof(struct dcb_app)); + app.selector = idtype; + app.protocol = id; + app.priority = up; + + return dcb_setapp(netdev, &app); +} + +static int mlx4_en_dcbnl_ieee_getets(struct net_device *dev, + struct ieee_ets *ets) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + struct ieee_ets *my_ets = &priv->ets; + + if (!my_ets) + return -EINVAL; + + ets->ets_cap = IEEE_8021QAZ_MAX_TCS; + ets->cbs = my_ets->cbs; + memcpy(ets->tc_tx_bw, my_ets->tc_tx_bw, sizeof(ets->tc_tx_bw)); + memcpy(ets->tc_tsa, my_ets->tc_tsa, sizeof(ets->tc_tsa)); + memcpy(ets->prio_tc, my_ets->prio_tc, sizeof(ets->prio_tc)); + + return 0; +} + +static int mlx4_en_ets_validate(struct mlx4_en_priv *priv, struct ieee_ets *ets) +{ + int i; + int total_ets_bw = 0; + int has_ets_tc = 0; + + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { + if (ets->prio_tc[i] >= MLX4_EN_NUM_UP_HIGH) { + en_err(priv, "Bad priority in UP <=> TC mapping. TC: %d, UP: %d\n", + i, ets->prio_tc[i]); + return -EINVAL; + } + + switch (ets->tc_tsa[i]) { + case IEEE_8021QAZ_TSA_VENDOR: + case IEEE_8021QAZ_TSA_STRICT: + break; + case IEEE_8021QAZ_TSA_ETS: + has_ets_tc = 1; + total_ets_bw += ets->tc_tx_bw[i]; + break; + default: + en_err(priv, "TC[%d]: Not supported TSA: %d\n", + i, ets->tc_tsa[i]); + return -EOPNOTSUPP; + } + } + + if (has_ets_tc && total_ets_bw != MLX4_EN_BW_MAX) { + en_err(priv, "Bad ETS BW sum: %d. Should be exactly 100%%\n", + total_ets_bw); + return -EINVAL; + } + + return 0; +} + +static int mlx4_en_config_port_scheduler(struct mlx4_en_priv *priv, + struct ieee_ets *ets, u16 *ratelimit) +{ + struct mlx4_en_dev *mdev = priv->mdev; + int num_strict = 0; + int i; + __u8 tc_tx_bw[IEEE_8021QAZ_MAX_TCS] = { 0 }; + __u8 pg[IEEE_8021QAZ_MAX_TCS] = { 0 }; + + ets = ets ?: &priv->ets; + ratelimit = ratelimit ?: priv->maxrate; + + /* higher TC means higher priority => lower pg */ + for (i = IEEE_8021QAZ_MAX_TCS - 1; i >= 0; i--) { + switch (ets->tc_tsa[i]) { + case IEEE_8021QAZ_TSA_VENDOR: + pg[i] = MLX4_EN_TC_VENDOR; + tc_tx_bw[i] = MLX4_EN_BW_MAX; + break; + case IEEE_8021QAZ_TSA_STRICT: + pg[i] = num_strict++; + tc_tx_bw[i] = MLX4_EN_BW_MAX; + break; + case IEEE_8021QAZ_TSA_ETS: + pg[i] = MLX4_EN_TC_ETS; + tc_tx_bw[i] = ets->tc_tx_bw[i] ?: MLX4_EN_BW_MIN; + break; + } + } + + return mlx4_SET_PORT_SCHEDULER(mdev->dev, priv->port, tc_tx_bw, pg, + ratelimit); +} + +static int +mlx4_en_dcbnl_ieee_setets(struct net_device *dev, struct ieee_ets *ets) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + struct mlx4_en_dev *mdev = priv->mdev; + int err; + + err = mlx4_en_ets_validate(priv, ets); + if (err) + return err; + + err = mlx4_SET_PORT_PRIO2TC(mdev->dev, priv->port, ets->prio_tc); + if (err) + return err; + + err = mlx4_en_config_port_scheduler(priv, ets, NULL); + if (err) + return err; + + memcpy(&priv->ets, ets, sizeof(priv->ets)); + + return 0; +} + +static int mlx4_en_dcbnl_ieee_getpfc(struct net_device *dev, + struct ieee_pfc *pfc) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + + pfc->pfc_cap = IEEE_8021QAZ_MAX_TCS; + pfc->pfc_en = priv->prof->tx_ppp; + + return 0; +} + +static int mlx4_en_dcbnl_ieee_setpfc(struct net_device *dev, + struct ieee_pfc *pfc) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + struct mlx4_en_port_profile *prof = priv->prof; + struct mlx4_en_dev *mdev = priv->mdev; + u32 tx_pause, tx_ppp, rx_pause, rx_ppp; + int err; + + en_dbg(DRV, priv, "cap: 0x%x en: 0x%x mbc: 0x%x delay: %d\n", + pfc->pfc_cap, + pfc->pfc_en, + pfc->mbc, + pfc->delay); + + rx_pause = prof->rx_pause && !pfc->pfc_en; + tx_pause = prof->tx_pause && !pfc->pfc_en; + rx_ppp = pfc->pfc_en; + tx_ppp = pfc->pfc_en; + + err = mlx4_SET_PORT_general(mdev->dev, priv->port, + priv->rx_skb_size + ETH_FCS_LEN, + tx_pause, tx_ppp, rx_pause, rx_ppp); + if (err) { + en_err(priv, "Failed setting pause params\n"); + return err; + } + + mlx4_en_update_pfc_stats_bitmap(mdev->dev, &priv->stats_bitmap, + rx_ppp, rx_pause, tx_ppp, tx_pause); + + prof->tx_ppp = tx_ppp; + prof->rx_ppp = rx_ppp; + prof->rx_pause = rx_pause; + prof->tx_pause = tx_pause; + + return err; +} + +static u8 mlx4_en_dcbnl_getdcbx(struct net_device *dev) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + + return priv->dcbx_cap; +} + +static u8 mlx4_en_dcbnl_setdcbx(struct net_device *dev, u8 mode) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + struct ieee_ets ets = {0}; + struct ieee_pfc pfc = {0}; + + if (mode == priv->dcbx_cap) + return 0; + + if ((mode & DCB_CAP_DCBX_LLD_MANAGED) || + ((mode & DCB_CAP_DCBX_VER_IEEE) && + (mode & DCB_CAP_DCBX_VER_CEE)) || + !(mode & DCB_CAP_DCBX_HOST)) + goto err; + + priv->dcbx_cap = mode; + + ets.ets_cap = IEEE_8021QAZ_MAX_TCS; + pfc.pfc_cap = IEEE_8021QAZ_MAX_TCS; + + if (mode & DCB_CAP_DCBX_VER_IEEE) { + if (mlx4_en_dcbnl_ieee_setets(dev, &ets)) + goto err; + if (mlx4_en_dcbnl_ieee_setpfc(dev, &pfc)) + goto err; + } else if (mode & DCB_CAP_DCBX_VER_CEE) { + if (mlx4_en_dcbnl_set_all(dev)) + goto err; + } else { + if (mlx4_en_dcbnl_ieee_setets(dev, &ets)) + goto err; + if (mlx4_en_dcbnl_ieee_setpfc(dev, &pfc)) + goto err; + if (mlx4_en_alloc_tx_queue_per_tc(dev, 0)) + goto err; + } + + return 0; +err: + return 1; +} + +#define MLX4_RATELIMIT_UNITS_IN_KB 100000 /* rate-limit HW unit in Kbps */ +static int mlx4_en_dcbnl_ieee_getmaxrate(struct net_device *dev, + struct ieee_maxrate *maxrate) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + int i; + + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) + maxrate->tc_maxrate[i] = + priv->maxrate[i] * MLX4_RATELIMIT_UNITS_IN_KB; + + return 0; +} + +static int mlx4_en_dcbnl_ieee_setmaxrate(struct net_device *dev, + struct ieee_maxrate *maxrate) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + u16 tmp[IEEE_8021QAZ_MAX_TCS]; + int i, err; + + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { + /* Convert from Kbps into HW units, rounding result up. + * Setting to 0, means unlimited BW. + */ + tmp[i] = div_u64(maxrate->tc_maxrate[i] + + MLX4_RATELIMIT_UNITS_IN_KB - 1, + MLX4_RATELIMIT_UNITS_IN_KB); + } + + err = mlx4_en_config_port_scheduler(priv, NULL, tmp); + if (err) + return err; + + memcpy(priv->maxrate, tmp, sizeof(priv->maxrate)); + + return 0; +} + +#define RPG_ENABLE_BIT 31 +#define CN_TAG_BIT 30 + +static int mlx4_en_dcbnl_ieee_getqcn(struct net_device *dev, + struct ieee_qcn *qcn) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + struct mlx4_congestion_control_mb_prio_802_1_qau_params *hw_qcn; + struct mlx4_cmd_mailbox *mailbox_out = NULL; + u64 mailbox_in_dma = 0; + u32 inmod = 0; + int i, err; + + if (!(priv->mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_QCN)) + return -EOPNOTSUPP; + + mailbox_out = mlx4_alloc_cmd_mailbox(priv->mdev->dev); + if (IS_ERR(mailbox_out)) + return -ENOMEM; + hw_qcn = + (struct mlx4_congestion_control_mb_prio_802_1_qau_params *) + mailbox_out->buf; + + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { + inmod = priv->port | ((1 << i) << 8) | + (MLX4_CTRL_ALGO_802_1_QAU_REACTION_POINT << 16); + err = mlx4_cmd_box(priv->mdev->dev, mailbox_in_dma, + mailbox_out->dma, + inmod, MLX4_CONGESTION_CONTROL_GET_PARAMS, + MLX4_CMD_CONGESTION_CTRL_OPCODE, + MLX4_CMD_TIME_CLASS_C, + MLX4_CMD_NATIVE); + if (err) { + mlx4_free_cmd_mailbox(priv->mdev->dev, mailbox_out); + return err; + } + + qcn->rpg_enable[i] = + be32_to_cpu(hw_qcn->extended_enable) >> RPG_ENABLE_BIT; + qcn->rppp_max_rps[i] = + be32_to_cpu(hw_qcn->rppp_max_rps); + qcn->rpg_time_reset[i] = + be32_to_cpu(hw_qcn->rpg_time_reset); + qcn->rpg_byte_reset[i] = + be32_to_cpu(hw_qcn->rpg_byte_reset); + qcn->rpg_threshold[i] = + be32_to_cpu(hw_qcn->rpg_threshold); + qcn->rpg_max_rate[i] = + be32_to_cpu(hw_qcn->rpg_max_rate); + qcn->rpg_ai_rate[i] = + be32_to_cpu(hw_qcn->rpg_ai_rate); + qcn->rpg_hai_rate[i] = + be32_to_cpu(hw_qcn->rpg_hai_rate); + qcn->rpg_gd[i] = + be32_to_cpu(hw_qcn->rpg_gd); + qcn->rpg_min_dec_fac[i] = + be32_to_cpu(hw_qcn->rpg_min_dec_fac); + qcn->rpg_min_rate[i] = + be32_to_cpu(hw_qcn->rpg_min_rate); + qcn->cndd_state_machine[i] = + priv->cndd_state[i]; + } + mlx4_free_cmd_mailbox(priv->mdev->dev, mailbox_out); + return 0; +} + +static int mlx4_en_dcbnl_ieee_setqcn(struct net_device *dev, + struct ieee_qcn *qcn) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + struct mlx4_congestion_control_mb_prio_802_1_qau_params *hw_qcn; + struct mlx4_cmd_mailbox *mailbox_in = NULL; + u64 mailbox_in_dma = 0; + u32 inmod = 0; + int i, err; +#define MODIFY_ENABLE_HIGH_MASK 0xc0000000 +#define MODIFY_ENABLE_LOW_MASK 0xffc00000 + + if (!(priv->mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_QCN)) + return -EOPNOTSUPP; + + mailbox_in = mlx4_alloc_cmd_mailbox(priv->mdev->dev); + if (IS_ERR(mailbox_in)) + return -ENOMEM; + + mailbox_in_dma = mailbox_in->dma; + hw_qcn = + (struct mlx4_congestion_control_mb_prio_802_1_qau_params *)mailbox_in->buf; + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { + inmod = priv->port | ((1 << i) << 8) | + (MLX4_CTRL_ALGO_802_1_QAU_REACTION_POINT << 16); + + /* Before updating QCN parameter, + * need to set it's modify enable bit to 1 + */ + + hw_qcn->modify_enable_high = cpu_to_be32( + MODIFY_ENABLE_HIGH_MASK); + hw_qcn->modify_enable_low = cpu_to_be32(MODIFY_ENABLE_LOW_MASK); + + hw_qcn->extended_enable = cpu_to_be32(qcn->rpg_enable[i] << RPG_ENABLE_BIT); + hw_qcn->rppp_max_rps = cpu_to_be32(qcn->rppp_max_rps[i]); + hw_qcn->rpg_time_reset = cpu_to_be32(qcn->rpg_time_reset[i]); + hw_qcn->rpg_byte_reset = cpu_to_be32(qcn->rpg_byte_reset[i]); + hw_qcn->rpg_threshold = cpu_to_be32(qcn->rpg_threshold[i]); + hw_qcn->rpg_max_rate = cpu_to_be32(qcn->rpg_max_rate[i]); + hw_qcn->rpg_ai_rate = cpu_to_be32(qcn->rpg_ai_rate[i]); + hw_qcn->rpg_hai_rate = cpu_to_be32(qcn->rpg_hai_rate[i]); + hw_qcn->rpg_gd = cpu_to_be32(qcn->rpg_gd[i]); + hw_qcn->rpg_min_dec_fac = cpu_to_be32(qcn->rpg_min_dec_fac[i]); + hw_qcn->rpg_min_rate = cpu_to_be32(qcn->rpg_min_rate[i]); + priv->cndd_state[i] = qcn->cndd_state_machine[i]; + if (qcn->cndd_state_machine[i] == DCB_CNDD_INTERIOR_READY) + hw_qcn->extended_enable |= cpu_to_be32(1 << CN_TAG_BIT); + + err = mlx4_cmd(priv->mdev->dev, mailbox_in_dma, inmod, + MLX4_CONGESTION_CONTROL_SET_PARAMS, + MLX4_CMD_CONGESTION_CTRL_OPCODE, + MLX4_CMD_TIME_CLASS_C, + MLX4_CMD_NATIVE); + if (err) { + mlx4_free_cmd_mailbox(priv->mdev->dev, mailbox_in); + return err; + } + } + mlx4_free_cmd_mailbox(priv->mdev->dev, mailbox_in); + return 0; +} + +static int mlx4_en_dcbnl_ieee_getqcnstats(struct net_device *dev, + struct ieee_qcn_stats *qcn_stats) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + struct mlx4_congestion_control_mb_prio_802_1_qau_statistics *hw_qcn_stats; + struct mlx4_cmd_mailbox *mailbox_out = NULL; + u64 mailbox_in_dma = 0; + u32 inmod = 0; + int i, err; + + if (!(priv->mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_QCN)) + return -EOPNOTSUPP; + + mailbox_out = mlx4_alloc_cmd_mailbox(priv->mdev->dev); + if (IS_ERR(mailbox_out)) + return -ENOMEM; + + hw_qcn_stats = + (struct mlx4_congestion_control_mb_prio_802_1_qau_statistics *) + mailbox_out->buf; + + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { + inmod = priv->port | ((1 << i) << 8) | + (MLX4_CTRL_ALGO_802_1_QAU_REACTION_POINT << 16); + err = mlx4_cmd_box(priv->mdev->dev, mailbox_in_dma, + mailbox_out->dma, inmod, + MLX4_CONGESTION_CONTROL_GET_STATISTICS, + MLX4_CMD_CONGESTION_CTRL_OPCODE, + MLX4_CMD_TIME_CLASS_C, + MLX4_CMD_NATIVE); + if (err) { + mlx4_free_cmd_mailbox(priv->mdev->dev, mailbox_out); + return err; + } + qcn_stats->rppp_rp_centiseconds[i] = + be64_to_cpu(hw_qcn_stats->rppp_rp_centiseconds); + qcn_stats->rppp_created_rps[i] = + be32_to_cpu(hw_qcn_stats->rppp_created_rps); + } + mlx4_free_cmd_mailbox(priv->mdev->dev, mailbox_out); + return 0; +} + +const struct dcbnl_rtnl_ops mlx4_en_dcbnl_ops = { + .ieee_getets = mlx4_en_dcbnl_ieee_getets, + .ieee_setets = mlx4_en_dcbnl_ieee_setets, + .ieee_getmaxrate = mlx4_en_dcbnl_ieee_getmaxrate, + .ieee_setmaxrate = mlx4_en_dcbnl_ieee_setmaxrate, + .ieee_getqcn = mlx4_en_dcbnl_ieee_getqcn, + .ieee_setqcn = mlx4_en_dcbnl_ieee_setqcn, + .ieee_getqcnstats = mlx4_en_dcbnl_ieee_getqcnstats, + .ieee_getpfc = mlx4_en_dcbnl_ieee_getpfc, + .ieee_setpfc = mlx4_en_dcbnl_ieee_setpfc, + + .getstate = mlx4_en_dcbnl_get_state, + .setstate = mlx4_en_dcbnl_set_state, + .getpfccfg = mlx4_en_dcbnl_get_pfc_cfg, + .setpfccfg = mlx4_en_dcbnl_set_pfc_cfg, + .setall = mlx4_en_dcbnl_set_all, + .getcap = mlx4_en_dcbnl_getcap, + .getnumtcs = mlx4_en_dcbnl_getnumtcs, + .getpfcstate = mlx4_en_dcbnl_getpfcstate, + .setpfcstate = mlx4_en_dcbnl_setpfcstate, + .getapp = mlx4_en_dcbnl_getapp, + .setapp = mlx4_en_dcbnl_setapp, + + .getdcbx = mlx4_en_dcbnl_getdcbx, + .setdcbx = mlx4_en_dcbnl_setdcbx, +}; + +const struct dcbnl_rtnl_ops mlx4_en_dcbnl_pfc_ops = { + .ieee_getpfc = mlx4_en_dcbnl_ieee_getpfc, + .ieee_setpfc = mlx4_en_dcbnl_ieee_setpfc, + + .setstate = mlx4_en_dcbnl_set_state, + .getpfccfg = mlx4_en_dcbnl_get_pfc_cfg, + .setpfccfg = mlx4_en_dcbnl_set_pfc_cfg, + .setall = mlx4_en_dcbnl_set_all, + .getnumtcs = mlx4_en_dcbnl_getnumtcs, + .getpfcstate = mlx4_en_dcbnl_getpfcstate, + .setpfcstate = mlx4_en_dcbnl_setpfcstate, + .getapp = mlx4_en_dcbnl_getapp, + .setapp = mlx4_en_dcbnl_setapp, + + .getdcbx = mlx4_en_dcbnl_getdcbx, + .setdcbx = mlx4_en_dcbnl_setdcbx, +}; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c new file mode 100644 index 000000000..7d45f1d55 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c @@ -0,0 +1,2193 @@ +/* + * Copyright (c) 2007 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#include <linux/kernel.h> +#include <linux/ethtool.h> +#include <linux/netdevice.h> +#include <linux/mlx4/driver.h> +#include <linux/mlx4/device.h> +#include <linux/in.h> +#include <net/ip.h> +#include <linux/bitmap.h> +#include <linux/mii.h> + +#include "mlx4_en.h" +#include "en_port.h" + +#define EN_ETHTOOL_QP_ATTACH (1ull << 63) +#define EN_ETHTOOL_SHORT_MASK cpu_to_be16(0xffff) +#define EN_ETHTOOL_WORD_MASK cpu_to_be32(0xffffffff) + +int mlx4_en_moderation_update(struct mlx4_en_priv *priv) +{ + int i, t; + int err = 0; + + for (t = 0 ; t < MLX4_EN_NUM_TX_TYPES; t++) { + for (i = 0; i < priv->tx_ring_num[t]; i++) { + priv->tx_cq[t][i]->moder_cnt = priv->tx_frames; + priv->tx_cq[t][i]->moder_time = priv->tx_usecs; + if (priv->port_up) { + err = mlx4_en_set_cq_moder(priv, + priv->tx_cq[t][i]); + if (err) + return err; + } + } + } + + if (priv->adaptive_rx_coal) + return 0; + + for (i = 0; i < priv->rx_ring_num; i++) { + priv->rx_cq[i]->moder_cnt = priv->rx_frames; + priv->rx_cq[i]->moder_time = priv->rx_usecs; + priv->last_moder_time[i] = MLX4_EN_AUTO_CONF; + if (priv->port_up) { + err = mlx4_en_set_cq_moder(priv, priv->rx_cq[i]); + if (err) + return err; + } + } + + return err; +} + +static void +mlx4_en_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *drvinfo) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + struct mlx4_en_dev *mdev = priv->mdev; + + strscpy(drvinfo->driver, DRV_NAME, sizeof(drvinfo->driver)); + strscpy(drvinfo->version, DRV_VERSION, + sizeof(drvinfo->version)); + snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version), + "%d.%d.%d", + (u16) (mdev->dev->caps.fw_ver >> 32), + (u16) ((mdev->dev->caps.fw_ver >> 16) & 0xffff), + (u16) (mdev->dev->caps.fw_ver & 0xffff)); + strscpy(drvinfo->bus_info, pci_name(mdev->dev->persist->pdev), + sizeof(drvinfo->bus_info)); +} + +static const char mlx4_en_priv_flags[][ETH_GSTRING_LEN] = { + "blueflame", + "phv-bit" +}; + +static const char main_strings[][ETH_GSTRING_LEN] = { + /* main statistics */ + "rx_packets", "tx_packets", "rx_bytes", "tx_bytes", "rx_errors", + "tx_errors", "rx_dropped", "tx_dropped", "multicast", "collisions", + "rx_length_errors", "rx_over_errors", "rx_crc_errors", + "rx_frame_errors", "rx_fifo_errors", "rx_missed_errors", + "tx_aborted_errors", "tx_carrier_errors", "tx_fifo_errors", + "tx_heartbeat_errors", "tx_window_errors", + + /* port statistics */ + "tso_packets", + "xmit_more", + "queue_stopped", "wake_queue", "tx_timeout", "rx_alloc_pages", + "rx_csum_good", "rx_csum_none", "rx_csum_complete", "tx_chksum_offload", + + /* pf statistics */ + "pf_rx_packets", + "pf_rx_bytes", + "pf_tx_packets", + "pf_tx_bytes", + + /* priority flow control statistics rx */ + "rx_pause_prio_0", "rx_pause_duration_prio_0", + "rx_pause_transition_prio_0", + "rx_pause_prio_1", "rx_pause_duration_prio_1", + "rx_pause_transition_prio_1", + "rx_pause_prio_2", "rx_pause_duration_prio_2", + "rx_pause_transition_prio_2", + "rx_pause_prio_3", "rx_pause_duration_prio_3", + "rx_pause_transition_prio_3", + "rx_pause_prio_4", "rx_pause_duration_prio_4", + "rx_pause_transition_prio_4", + "rx_pause_prio_5", "rx_pause_duration_prio_5", + "rx_pause_transition_prio_5", + "rx_pause_prio_6", "rx_pause_duration_prio_6", + "rx_pause_transition_prio_6", + "rx_pause_prio_7", "rx_pause_duration_prio_7", + "rx_pause_transition_prio_7", + + /* flow control statistics rx */ + "rx_pause", "rx_pause_duration", "rx_pause_transition", + + /* priority flow control statistics tx */ + "tx_pause_prio_0", "tx_pause_duration_prio_0", + "tx_pause_transition_prio_0", + "tx_pause_prio_1", "tx_pause_duration_prio_1", + "tx_pause_transition_prio_1", + "tx_pause_prio_2", "tx_pause_duration_prio_2", + "tx_pause_transition_prio_2", + "tx_pause_prio_3", "tx_pause_duration_prio_3", + "tx_pause_transition_prio_3", + "tx_pause_prio_4", "tx_pause_duration_prio_4", + "tx_pause_transition_prio_4", + "tx_pause_prio_5", "tx_pause_duration_prio_5", + "tx_pause_transition_prio_5", + "tx_pause_prio_6", "tx_pause_duration_prio_6", + "tx_pause_transition_prio_6", + "tx_pause_prio_7", "tx_pause_duration_prio_7", + "tx_pause_transition_prio_7", + + /* flow control statistics tx */ + "tx_pause", "tx_pause_duration", "tx_pause_transition", + + /* packet statistics */ + "rx_multicast_packets", + "rx_broadcast_packets", + "rx_jabbers", + "rx_in_range_length_error", + "rx_out_range_length_error", + "tx_multicast_packets", + "tx_broadcast_packets", + "rx_prio_0_packets", "rx_prio_0_bytes", + "rx_prio_1_packets", "rx_prio_1_bytes", + "rx_prio_2_packets", "rx_prio_2_bytes", + "rx_prio_3_packets", "rx_prio_3_bytes", + "rx_prio_4_packets", "rx_prio_4_bytes", + "rx_prio_5_packets", "rx_prio_5_bytes", + "rx_prio_6_packets", "rx_prio_6_bytes", + "rx_prio_7_packets", "rx_prio_7_bytes", + "rx_novlan_packets", "rx_novlan_bytes", + "tx_prio_0_packets", "tx_prio_0_bytes", + "tx_prio_1_packets", "tx_prio_1_bytes", + "tx_prio_2_packets", "tx_prio_2_bytes", + "tx_prio_3_packets", "tx_prio_3_bytes", + "tx_prio_4_packets", "tx_prio_4_bytes", + "tx_prio_5_packets", "tx_prio_5_bytes", + "tx_prio_6_packets", "tx_prio_6_bytes", + "tx_prio_7_packets", "tx_prio_7_bytes", + "tx_novlan_packets", "tx_novlan_bytes", + + /* xdp statistics */ + "rx_xdp_drop", + "rx_xdp_redirect", + "rx_xdp_redirect_fail", + "rx_xdp_tx", + "rx_xdp_tx_full", + + /* phy statistics */ + "rx_packets_phy", "rx_bytes_phy", + "tx_packets_phy", "tx_bytes_phy", +}; + +static const char mlx4_en_test_names[][ETH_GSTRING_LEN]= { + "Interrupt Test", + "Link Test", + "Speed Test", + "Register Test", + "Loopback Test", +}; + +static u32 mlx4_en_get_msglevel(struct net_device *dev) +{ + return ((struct mlx4_en_priv *) netdev_priv(dev))->msg_enable; +} + +static void mlx4_en_set_msglevel(struct net_device *dev, u32 val) +{ + ((struct mlx4_en_priv *) netdev_priv(dev))->msg_enable = val; +} + +static void mlx4_en_get_wol(struct net_device *netdev, + struct ethtool_wolinfo *wol) +{ + struct mlx4_en_priv *priv = netdev_priv(netdev); + struct mlx4_caps *caps = &priv->mdev->dev->caps; + int err = 0; + u64 config = 0; + u64 mask; + + if ((priv->port < 1) || (priv->port > 2)) { + en_err(priv, "Failed to get WoL information\n"); + return; + } + + mask = (priv->port == 1) ? MLX4_DEV_CAP_FLAG_WOL_PORT1 : + MLX4_DEV_CAP_FLAG_WOL_PORT2; + + if (!(caps->flags & mask)) { + wol->supported = 0; + wol->wolopts = 0; + return; + } + + if (caps->wol_port[priv->port]) + wol->supported = WAKE_MAGIC; + else + wol->supported = 0; + + err = mlx4_wol_read(priv->mdev->dev, &config, priv->port); + if (err) { + en_err(priv, "Failed to get WoL information\n"); + return; + } + + if ((config & MLX4_EN_WOL_ENABLED) && (config & MLX4_EN_WOL_MAGIC)) + wol->wolopts = WAKE_MAGIC; + else + wol->wolopts = 0; +} + +static int mlx4_en_set_wol(struct net_device *netdev, + struct ethtool_wolinfo *wol) +{ + struct mlx4_en_priv *priv = netdev_priv(netdev); + u64 config = 0; + int err = 0; + u64 mask; + + if ((priv->port < 1) || (priv->port > 2)) + return -EOPNOTSUPP; + + mask = (priv->port == 1) ? MLX4_DEV_CAP_FLAG_WOL_PORT1 : + MLX4_DEV_CAP_FLAG_WOL_PORT2; + + if (!(priv->mdev->dev->caps.flags & mask)) + return -EOPNOTSUPP; + + if (wol->supported & ~WAKE_MAGIC) + return -EINVAL; + + err = mlx4_wol_read(priv->mdev->dev, &config, priv->port); + if (err) { + en_err(priv, "Failed to get WoL info, unable to modify\n"); + return err; + } + + if (wol->wolopts & WAKE_MAGIC) { + config |= MLX4_EN_WOL_DO_MODIFY | MLX4_EN_WOL_ENABLED | + MLX4_EN_WOL_MAGIC; + } else { + config &= ~(MLX4_EN_WOL_ENABLED | MLX4_EN_WOL_MAGIC); + config |= MLX4_EN_WOL_DO_MODIFY; + } + + err = mlx4_wol_write(priv->mdev->dev, config, priv->port); + if (err) + en_err(priv, "Failed to set WoL information\n"); + + return err; +} + +struct bitmap_iterator { + unsigned long *stats_bitmap; + unsigned int count; + unsigned int iterator; + bool advance_array; /* if set, force no increments */ +}; + +static inline void bitmap_iterator_init(struct bitmap_iterator *h, + unsigned long *stats_bitmap, + int count) +{ + h->iterator = 0; + h->advance_array = !bitmap_empty(stats_bitmap, count); + h->count = h->advance_array ? bitmap_weight(stats_bitmap, count) + : count; + h->stats_bitmap = stats_bitmap; +} + +static inline int bitmap_iterator_test(struct bitmap_iterator *h) +{ + return !h->advance_array ? 1 : test_bit(h->iterator, h->stats_bitmap); +} + +static inline int bitmap_iterator_inc(struct bitmap_iterator *h) +{ + return h->iterator++; +} + +static inline unsigned int +bitmap_iterator_count(struct bitmap_iterator *h) +{ + return h->count; +} + +static int mlx4_en_get_sset_count(struct net_device *dev, int sset) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + struct bitmap_iterator it; + + bitmap_iterator_init(&it, priv->stats_bitmap.bitmap, NUM_ALL_STATS); + + switch (sset) { + case ETH_SS_STATS: + return bitmap_iterator_count(&it) + + (priv->tx_ring_num[TX] * 2) + + (priv->rx_ring_num * (3 + NUM_XDP_STATS)); + case ETH_SS_TEST: + return MLX4_EN_NUM_SELF_TEST - !(priv->mdev->dev->caps.flags + & MLX4_DEV_CAP_FLAG_UC_LOOPBACK) * 2; + case ETH_SS_PRIV_FLAGS: + return ARRAY_SIZE(mlx4_en_priv_flags); + default: + return -EOPNOTSUPP; + } +} + +static void mlx4_en_get_ethtool_stats(struct net_device *dev, + struct ethtool_stats *stats, uint64_t *data) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + int index = 0; + int i; + struct bitmap_iterator it; + + bitmap_iterator_init(&it, priv->stats_bitmap.bitmap, NUM_ALL_STATS); + + spin_lock_bh(&priv->stats_lock); + + mlx4_en_fold_software_stats(dev); + + for (i = 0; i < NUM_MAIN_STATS; i++, bitmap_iterator_inc(&it)) + if (bitmap_iterator_test(&it)) + data[index++] = ((unsigned long *)&dev->stats)[i]; + + for (i = 0; i < NUM_PORT_STATS; i++, bitmap_iterator_inc(&it)) + if (bitmap_iterator_test(&it)) + data[index++] = ((unsigned long *)&priv->port_stats)[i]; + + for (i = 0; i < NUM_PF_STATS; i++, bitmap_iterator_inc(&it)) + if (bitmap_iterator_test(&it)) + data[index++] = + ((unsigned long *)&priv->pf_stats)[i]; + + for (i = 0; i < NUM_FLOW_PRIORITY_STATS_RX; + i++, bitmap_iterator_inc(&it)) + if (bitmap_iterator_test(&it)) + data[index++] = + ((u64 *)&priv->rx_priority_flowstats)[i]; + + for (i = 0; i < NUM_FLOW_STATS_RX; i++, bitmap_iterator_inc(&it)) + if (bitmap_iterator_test(&it)) + data[index++] = ((u64 *)&priv->rx_flowstats)[i]; + + for (i = 0; i < NUM_FLOW_PRIORITY_STATS_TX; + i++, bitmap_iterator_inc(&it)) + if (bitmap_iterator_test(&it)) + data[index++] = + ((u64 *)&priv->tx_priority_flowstats)[i]; + + for (i = 0; i < NUM_FLOW_STATS_TX; i++, bitmap_iterator_inc(&it)) + if (bitmap_iterator_test(&it)) + data[index++] = ((u64 *)&priv->tx_flowstats)[i]; + + for (i = 0; i < NUM_PKT_STATS; i++, bitmap_iterator_inc(&it)) + if (bitmap_iterator_test(&it)) + data[index++] = ((unsigned long *)&priv->pkstats)[i]; + + for (i = 0; i < NUM_XDP_STATS; i++, bitmap_iterator_inc(&it)) + if (bitmap_iterator_test(&it)) + data[index++] = ((unsigned long *)&priv->xdp_stats)[i]; + + for (i = 0; i < NUM_PHY_STATS; i++, bitmap_iterator_inc(&it)) + if (bitmap_iterator_test(&it)) + data[index++] = ((unsigned long *)&priv->phy_stats)[i]; + + for (i = 0; i < priv->tx_ring_num[TX]; i++) { + data[index++] = priv->tx_ring[TX][i]->packets; + data[index++] = priv->tx_ring[TX][i]->bytes; + } + for (i = 0; i < priv->rx_ring_num; i++) { + data[index++] = priv->rx_ring[i]->packets; + data[index++] = priv->rx_ring[i]->bytes; + data[index++] = priv->rx_ring[i]->dropped; + data[index++] = priv->rx_ring[i]->xdp_drop; + data[index++] = priv->rx_ring[i]->xdp_redirect; + data[index++] = priv->rx_ring[i]->xdp_redirect_fail; + data[index++] = priv->rx_ring[i]->xdp_tx; + data[index++] = priv->rx_ring[i]->xdp_tx_full; + } + spin_unlock_bh(&priv->stats_lock); + +} + +static void mlx4_en_self_test(struct net_device *dev, + struct ethtool_test *etest, u64 *buf) +{ + mlx4_en_ex_selftest(dev, &etest->flags, buf); +} + +static void mlx4_en_get_strings(struct net_device *dev, + uint32_t stringset, uint8_t *data) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + int index = 0; + int i, strings = 0; + struct bitmap_iterator it; + + bitmap_iterator_init(&it, priv->stats_bitmap.bitmap, NUM_ALL_STATS); + + switch (stringset) { + case ETH_SS_TEST: + for (i = 0; i < MLX4_EN_NUM_SELF_TEST - 2; i++) + strcpy(data + i * ETH_GSTRING_LEN, mlx4_en_test_names[i]); + if (priv->mdev->dev->caps.flags & MLX4_DEV_CAP_FLAG_UC_LOOPBACK) + for (; i < MLX4_EN_NUM_SELF_TEST; i++) + strcpy(data + i * ETH_GSTRING_LEN, mlx4_en_test_names[i]); + break; + + case ETH_SS_STATS: + /* Add main counters */ + for (i = 0; i < NUM_MAIN_STATS; i++, strings++, + bitmap_iterator_inc(&it)) + if (bitmap_iterator_test(&it)) + strcpy(data + (index++) * ETH_GSTRING_LEN, + main_strings[strings]); + + for (i = 0; i < NUM_PORT_STATS; i++, strings++, + bitmap_iterator_inc(&it)) + if (bitmap_iterator_test(&it)) + strcpy(data + (index++) * ETH_GSTRING_LEN, + main_strings[strings]); + + for (i = 0; i < NUM_PF_STATS; i++, strings++, + bitmap_iterator_inc(&it)) + if (bitmap_iterator_test(&it)) + strcpy(data + (index++) * ETH_GSTRING_LEN, + main_strings[strings]); + + for (i = 0; i < NUM_FLOW_STATS; i++, strings++, + bitmap_iterator_inc(&it)) + if (bitmap_iterator_test(&it)) + strcpy(data + (index++) * ETH_GSTRING_LEN, + main_strings[strings]); + + for (i = 0; i < NUM_PKT_STATS; i++, strings++, + bitmap_iterator_inc(&it)) + if (bitmap_iterator_test(&it)) + strcpy(data + (index++) * ETH_GSTRING_LEN, + main_strings[strings]); + + for (i = 0; i < NUM_XDP_STATS; i++, strings++, + bitmap_iterator_inc(&it)) + if (bitmap_iterator_test(&it)) + strcpy(data + (index++) * ETH_GSTRING_LEN, + main_strings[strings]); + + for (i = 0; i < NUM_PHY_STATS; i++, strings++, + bitmap_iterator_inc(&it)) + if (bitmap_iterator_test(&it)) + strcpy(data + (index++) * ETH_GSTRING_LEN, + main_strings[strings]); + + for (i = 0; i < priv->tx_ring_num[TX]; i++) { + sprintf(data + (index++) * ETH_GSTRING_LEN, + "tx%d_packets", i); + sprintf(data + (index++) * ETH_GSTRING_LEN, + "tx%d_bytes", i); + } + for (i = 0; i < priv->rx_ring_num; i++) { + sprintf(data + (index++) * ETH_GSTRING_LEN, + "rx%d_packets", i); + sprintf(data + (index++) * ETH_GSTRING_LEN, + "rx%d_bytes", i); + sprintf(data + (index++) * ETH_GSTRING_LEN, + "rx%d_dropped", i); + sprintf(data + (index++) * ETH_GSTRING_LEN, + "rx%d_xdp_drop", i); + sprintf(data + (index++) * ETH_GSTRING_LEN, + "rx%d_xdp_redirect", i); + sprintf(data + (index++) * ETH_GSTRING_LEN, + "rx%d_xdp_redirect_fail", i); + sprintf(data + (index++) * ETH_GSTRING_LEN, + "rx%d_xdp_tx", i); + sprintf(data + (index++) * ETH_GSTRING_LEN, + "rx%d_xdp_tx_full", i); + } + break; + case ETH_SS_PRIV_FLAGS: + for (i = 0; i < ARRAY_SIZE(mlx4_en_priv_flags); i++) + strcpy(data + i * ETH_GSTRING_LEN, + mlx4_en_priv_flags[i]); + break; + + } +} + +static u32 mlx4_en_autoneg_get(struct net_device *dev) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + struct mlx4_en_dev *mdev = priv->mdev; + u32 autoneg = AUTONEG_DISABLE; + + if ((mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ETH_BACKPL_AN_REP) && + (priv->port_state.flags & MLX4_EN_PORT_ANE)) + autoneg = AUTONEG_ENABLE; + + return autoneg; +} + +static void ptys2ethtool_update_supported_port(unsigned long *mask, + struct mlx4_ptys_reg *ptys_reg) +{ + u32 eth_proto = be32_to_cpu(ptys_reg->eth_proto_cap); + + if (eth_proto & (MLX4_PROT_MASK(MLX4_10GBASE_T) + | MLX4_PROT_MASK(MLX4_1000BASE_T) + | MLX4_PROT_MASK(MLX4_100BASE_TX))) { + __set_bit(ETHTOOL_LINK_MODE_TP_BIT, mask); + } else if (eth_proto & (MLX4_PROT_MASK(MLX4_10GBASE_CR) + | MLX4_PROT_MASK(MLX4_10GBASE_SR) + | MLX4_PROT_MASK(MLX4_56GBASE_SR4) + | MLX4_PROT_MASK(MLX4_40GBASE_CR4) + | MLX4_PROT_MASK(MLX4_40GBASE_SR4) + | MLX4_PROT_MASK(MLX4_1000BASE_CX_SGMII))) { + __set_bit(ETHTOOL_LINK_MODE_FIBRE_BIT, mask); + } else if (eth_proto & (MLX4_PROT_MASK(MLX4_56GBASE_KR4) + | MLX4_PROT_MASK(MLX4_40GBASE_KR4) + | MLX4_PROT_MASK(MLX4_20GBASE_KR2) + | MLX4_PROT_MASK(MLX4_10GBASE_KR) + | MLX4_PROT_MASK(MLX4_10GBASE_KX4) + | MLX4_PROT_MASK(MLX4_1000BASE_KX))) { + __set_bit(ETHTOOL_LINK_MODE_Backplane_BIT, mask); + } +} + +static u32 ptys_get_active_port(struct mlx4_ptys_reg *ptys_reg) +{ + u32 eth_proto = be32_to_cpu(ptys_reg->eth_proto_oper); + + if (!eth_proto) /* link down */ + eth_proto = be32_to_cpu(ptys_reg->eth_proto_cap); + + if (eth_proto & (MLX4_PROT_MASK(MLX4_10GBASE_T) + | MLX4_PROT_MASK(MLX4_1000BASE_T) + | MLX4_PROT_MASK(MLX4_100BASE_TX))) { + return PORT_TP; + } + + if (eth_proto & (MLX4_PROT_MASK(MLX4_10GBASE_SR) + | MLX4_PROT_MASK(MLX4_56GBASE_SR4) + | MLX4_PROT_MASK(MLX4_40GBASE_SR4) + | MLX4_PROT_MASK(MLX4_1000BASE_CX_SGMII))) { + return PORT_FIBRE; + } + + if (eth_proto & (MLX4_PROT_MASK(MLX4_10GBASE_CR) + | MLX4_PROT_MASK(MLX4_56GBASE_CR4) + | MLX4_PROT_MASK(MLX4_40GBASE_CR4))) { + return PORT_DA; + } + + if (eth_proto & (MLX4_PROT_MASK(MLX4_56GBASE_KR4) + | MLX4_PROT_MASK(MLX4_40GBASE_KR4) + | MLX4_PROT_MASK(MLX4_20GBASE_KR2) + | MLX4_PROT_MASK(MLX4_10GBASE_KR) + | MLX4_PROT_MASK(MLX4_10GBASE_KX4) + | MLX4_PROT_MASK(MLX4_1000BASE_KX))) { + return PORT_NONE; + } + return PORT_OTHER; +} + +#define MLX4_LINK_MODES_SZ \ + (sizeof_field(struct mlx4_ptys_reg, eth_proto_cap) * 8) + +enum ethtool_report { + SUPPORTED = 0, + ADVERTISED = 1, +}; + +struct ptys2ethtool_config { + __ETHTOOL_DECLARE_LINK_MODE_MASK(supported); + __ETHTOOL_DECLARE_LINK_MODE_MASK(advertised); + u32 speed; +}; + +static unsigned long *ptys2ethtool_link_mode(struct ptys2ethtool_config *cfg, + enum ethtool_report report) +{ + switch (report) { + case SUPPORTED: + return cfg->supported; + case ADVERTISED: + return cfg->advertised; + } + return NULL; +} + +#define MLX4_BUILD_PTYS2ETHTOOL_CONFIG(reg_, speed_, ...) \ + ({ \ + struct ptys2ethtool_config *cfg; \ + static const unsigned int modes[] = { __VA_ARGS__ }; \ + unsigned int i; \ + cfg = &ptys2ethtool_map[reg_]; \ + cfg->speed = speed_; \ + linkmode_zero(cfg->supported); \ + linkmode_zero(cfg->advertised); \ + for (i = 0 ; i < ARRAY_SIZE(modes) ; ++i) { \ + __set_bit(modes[i], cfg->supported); \ + __set_bit(modes[i], cfg->advertised); \ + } \ + }) + +/* Translates mlx4 link mode to equivalent ethtool Link modes/speed */ +static struct ptys2ethtool_config ptys2ethtool_map[MLX4_LINK_MODES_SZ]; + +void __init mlx4_en_init_ptys2ethtool_map(void) +{ + MLX4_BUILD_PTYS2ETHTOOL_CONFIG(MLX4_100BASE_TX, SPEED_100, + ETHTOOL_LINK_MODE_100baseT_Full_BIT); + MLX4_BUILD_PTYS2ETHTOOL_CONFIG(MLX4_1000BASE_T, SPEED_1000, + ETHTOOL_LINK_MODE_1000baseT_Full_BIT); + MLX4_BUILD_PTYS2ETHTOOL_CONFIG(MLX4_1000BASE_CX_SGMII, SPEED_1000, + ETHTOOL_LINK_MODE_1000baseX_Full_BIT); + MLX4_BUILD_PTYS2ETHTOOL_CONFIG(MLX4_1000BASE_KX, SPEED_1000, + ETHTOOL_LINK_MODE_1000baseKX_Full_BIT); + MLX4_BUILD_PTYS2ETHTOOL_CONFIG(MLX4_10GBASE_T, SPEED_10000, + ETHTOOL_LINK_MODE_10000baseT_Full_BIT); + MLX4_BUILD_PTYS2ETHTOOL_CONFIG(MLX4_10GBASE_CX4, SPEED_10000, + ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT); + MLX4_BUILD_PTYS2ETHTOOL_CONFIG(MLX4_10GBASE_KX4, SPEED_10000, + ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT); + MLX4_BUILD_PTYS2ETHTOOL_CONFIG(MLX4_10GBASE_KR, SPEED_10000, + ETHTOOL_LINK_MODE_10000baseKR_Full_BIT); + MLX4_BUILD_PTYS2ETHTOOL_CONFIG(MLX4_10GBASE_CR, SPEED_10000, + ETHTOOL_LINK_MODE_10000baseCR_Full_BIT); + MLX4_BUILD_PTYS2ETHTOOL_CONFIG(MLX4_10GBASE_SR, SPEED_10000, + ETHTOOL_LINK_MODE_10000baseSR_Full_BIT); + MLX4_BUILD_PTYS2ETHTOOL_CONFIG(MLX4_20GBASE_KR2, SPEED_20000, + ETHTOOL_LINK_MODE_20000baseMLD2_Full_BIT, + ETHTOOL_LINK_MODE_20000baseKR2_Full_BIT); + MLX4_BUILD_PTYS2ETHTOOL_CONFIG(MLX4_40GBASE_CR4, SPEED_40000, + ETHTOOL_LINK_MODE_40000baseCR4_Full_BIT); + MLX4_BUILD_PTYS2ETHTOOL_CONFIG(MLX4_40GBASE_KR4, SPEED_40000, + ETHTOOL_LINK_MODE_40000baseKR4_Full_BIT); + MLX4_BUILD_PTYS2ETHTOOL_CONFIG(MLX4_40GBASE_SR4, SPEED_40000, + ETHTOOL_LINK_MODE_40000baseSR4_Full_BIT); + MLX4_BUILD_PTYS2ETHTOOL_CONFIG(MLX4_56GBASE_KR4, SPEED_56000, + ETHTOOL_LINK_MODE_56000baseKR4_Full_BIT); + MLX4_BUILD_PTYS2ETHTOOL_CONFIG(MLX4_56GBASE_CR4, SPEED_56000, + ETHTOOL_LINK_MODE_56000baseCR4_Full_BIT); + MLX4_BUILD_PTYS2ETHTOOL_CONFIG(MLX4_56GBASE_SR4, SPEED_56000, + ETHTOOL_LINK_MODE_56000baseSR4_Full_BIT); +}; + +static void ptys2ethtool_update_link_modes(unsigned long *link_modes, + u32 eth_proto, + enum ethtool_report report) +{ + int i; + for (i = 0; i < MLX4_LINK_MODES_SZ; i++) { + if (eth_proto & MLX4_PROT_MASK(i)) + linkmode_or(link_modes, link_modes, + ptys2ethtool_link_mode(&ptys2ethtool_map[i], report)); + } +} + +static u32 ethtool2ptys_link_modes(const unsigned long *link_modes, + enum ethtool_report report) +{ + int i; + u32 ptys_modes = 0; + + for (i = 0; i < MLX4_LINK_MODES_SZ; i++) { + ulong *map_mode = ptys2ethtool_link_mode(&ptys2ethtool_map[i], + report); + if (linkmode_intersects(map_mode, link_modes)) + ptys_modes |= 1 << i; + } + return ptys_modes; +} + +/* Convert actual speed (SPEED_XXX) to ptys link modes */ +static u32 speed2ptys_link_modes(u32 speed) +{ + int i; + u32 ptys_modes = 0; + + for (i = 0; i < MLX4_LINK_MODES_SZ; i++) { + if (ptys2ethtool_map[i].speed == speed) + ptys_modes |= 1 << i; + } + return ptys_modes; +} + +static int +ethtool_get_ptys_link_ksettings(struct net_device *dev, + struct ethtool_link_ksettings *link_ksettings) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + struct mlx4_ptys_reg ptys_reg; + u32 eth_proto; + int ret; + + memset(&ptys_reg, 0, sizeof(ptys_reg)); + ptys_reg.local_port = priv->port; + ptys_reg.proto_mask = MLX4_PTYS_EN; + ret = mlx4_ACCESS_PTYS_REG(priv->mdev->dev, + MLX4_ACCESS_REG_QUERY, &ptys_reg); + if (ret) { + en_warn(priv, "Failed to run mlx4_ACCESS_PTYS_REG status(%x)", + ret); + return ret; + } + en_dbg(DRV, priv, "ptys_reg.proto_mask %x\n", + ptys_reg.proto_mask); + en_dbg(DRV, priv, "ptys_reg.eth_proto_cap %x\n", + be32_to_cpu(ptys_reg.eth_proto_cap)); + en_dbg(DRV, priv, "ptys_reg.eth_proto_admin %x\n", + be32_to_cpu(ptys_reg.eth_proto_admin)); + en_dbg(DRV, priv, "ptys_reg.eth_proto_oper %x\n", + be32_to_cpu(ptys_reg.eth_proto_oper)); + en_dbg(DRV, priv, "ptys_reg.eth_proto_lp_adv %x\n", + be32_to_cpu(ptys_reg.eth_proto_lp_adv)); + + /* reset supported/advertising masks */ + ethtool_link_ksettings_zero_link_mode(link_ksettings, supported); + ethtool_link_ksettings_zero_link_mode(link_ksettings, advertising); + + ptys2ethtool_update_supported_port(link_ksettings->link_modes.supported, + &ptys_reg); + + eth_proto = be32_to_cpu(ptys_reg.eth_proto_cap); + ptys2ethtool_update_link_modes(link_ksettings->link_modes.supported, + eth_proto, SUPPORTED); + + eth_proto = be32_to_cpu(ptys_reg.eth_proto_admin); + ptys2ethtool_update_link_modes(link_ksettings->link_modes.advertising, + eth_proto, ADVERTISED); + + ethtool_link_ksettings_add_link_mode(link_ksettings, supported, + Pause); + ethtool_link_ksettings_add_link_mode(link_ksettings, supported, + Asym_Pause); + + if (priv->prof->tx_pause) + ethtool_link_ksettings_add_link_mode(link_ksettings, + advertising, Pause); + if (priv->prof->tx_pause ^ priv->prof->rx_pause) + ethtool_link_ksettings_add_link_mode(link_ksettings, + advertising, Asym_Pause); + + link_ksettings->base.port = ptys_get_active_port(&ptys_reg); + + if (mlx4_en_autoneg_get(dev)) { + ethtool_link_ksettings_add_link_mode(link_ksettings, + supported, Autoneg); + ethtool_link_ksettings_add_link_mode(link_ksettings, + advertising, Autoneg); + } + + link_ksettings->base.autoneg + = (priv->port_state.flags & MLX4_EN_PORT_ANC) ? + AUTONEG_ENABLE : AUTONEG_DISABLE; + + eth_proto = be32_to_cpu(ptys_reg.eth_proto_lp_adv); + + ethtool_link_ksettings_zero_link_mode(link_ksettings, lp_advertising); + ptys2ethtool_update_link_modes( + link_ksettings->link_modes.lp_advertising, + eth_proto, ADVERTISED); + if (priv->port_state.flags & MLX4_EN_PORT_ANC) + ethtool_link_ksettings_add_link_mode(link_ksettings, + lp_advertising, Autoneg); + + link_ksettings->base.phy_address = 0; + link_ksettings->base.mdio_support = 0; + link_ksettings->base.eth_tp_mdix = ETH_TP_MDI_INVALID; + link_ksettings->base.eth_tp_mdix_ctrl = ETH_TP_MDI_AUTO; + + return ret; +} + +static void +ethtool_get_default_link_ksettings( + struct net_device *dev, struct ethtool_link_ksettings *link_ksettings) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + int trans_type; + + link_ksettings->base.autoneg = AUTONEG_DISABLE; + + ethtool_link_ksettings_zero_link_mode(link_ksettings, supported); + ethtool_link_ksettings_add_link_mode(link_ksettings, supported, + 10000baseT_Full); + + ethtool_link_ksettings_zero_link_mode(link_ksettings, advertising); + ethtool_link_ksettings_add_link_mode(link_ksettings, advertising, + 10000baseT_Full); + + trans_type = priv->port_state.transceiver; + if (trans_type > 0 && trans_type <= 0xC) { + link_ksettings->base.port = PORT_FIBRE; + ethtool_link_ksettings_add_link_mode(link_ksettings, + supported, FIBRE); + ethtool_link_ksettings_add_link_mode(link_ksettings, + advertising, FIBRE); + } else if (trans_type == 0x80 || trans_type == 0) { + link_ksettings->base.port = PORT_TP; + ethtool_link_ksettings_add_link_mode(link_ksettings, + supported, TP); + ethtool_link_ksettings_add_link_mode(link_ksettings, + advertising, TP); + } else { + link_ksettings->base.port = -1; + } +} + +static int +mlx4_en_get_link_ksettings(struct net_device *dev, + struct ethtool_link_ksettings *link_ksettings) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + int ret = -EINVAL; + + if (mlx4_en_QUERY_PORT(priv->mdev, priv->port)) + return -ENOMEM; + + en_dbg(DRV, priv, "query port state.flags ANC(%x) ANE(%x)\n", + priv->port_state.flags & MLX4_EN_PORT_ANC, + priv->port_state.flags & MLX4_EN_PORT_ANE); + + if (priv->mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ETH_PROT_CTRL) + ret = ethtool_get_ptys_link_ksettings(dev, link_ksettings); + if (ret) /* ETH PROT CRTL is not supported or PTYS CMD failed */ + ethtool_get_default_link_ksettings(dev, link_ksettings); + + if (netif_carrier_ok(dev)) { + link_ksettings->base.speed = priv->port_state.link_speed; + link_ksettings->base.duplex = DUPLEX_FULL; + } else { + link_ksettings->base.speed = SPEED_UNKNOWN; + link_ksettings->base.duplex = DUPLEX_UNKNOWN; + } + return 0; +} + +/* Calculate PTYS admin according ethtool speed (SPEED_XXX) */ +static __be32 speed_set_ptys_admin(struct mlx4_en_priv *priv, u32 speed, + __be32 proto_cap) +{ + __be32 proto_admin = 0; + + if (!speed) { /* Speed = 0 ==> Reset Link modes */ + proto_admin = proto_cap; + en_info(priv, "Speed was set to 0, Reset advertised Link Modes to default (%x)\n", + be32_to_cpu(proto_cap)); + } else { + u32 ptys_link_modes = speed2ptys_link_modes(speed); + + proto_admin = cpu_to_be32(ptys_link_modes) & proto_cap; + en_info(priv, "Setting Speed to %d\n", speed); + } + return proto_admin; +} + +static int +mlx4_en_set_link_ksettings(struct net_device *dev, + const struct ethtool_link_ksettings *link_ksettings) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + struct mlx4_ptys_reg ptys_reg; + __be32 proto_admin; + u8 cur_autoneg; + int ret; + + u32 ptys_adv = ethtool2ptys_link_modes( + link_ksettings->link_modes.advertising, ADVERTISED); + const int speed = link_ksettings->base.speed; + + en_dbg(DRV, priv, + "Set Speed=%d adv={%*pbl} autoneg=%d duplex=%d\n", + speed, __ETHTOOL_LINK_MODE_MASK_NBITS, + link_ksettings->link_modes.advertising, + link_ksettings->base.autoneg, + link_ksettings->base.duplex); + + if (!(priv->mdev->dev->caps.flags2 & + MLX4_DEV_CAP_FLAG2_ETH_PROT_CTRL) || + (link_ksettings->base.duplex == DUPLEX_HALF)) + return -EINVAL; + + memset(&ptys_reg, 0, sizeof(ptys_reg)); + ptys_reg.local_port = priv->port; + ptys_reg.proto_mask = MLX4_PTYS_EN; + ret = mlx4_ACCESS_PTYS_REG(priv->mdev->dev, + MLX4_ACCESS_REG_QUERY, &ptys_reg); + if (ret) { + en_warn(priv, "Failed to QUERY mlx4_ACCESS_PTYS_REG status(%x)\n", + ret); + return 0; + } + + cur_autoneg = ptys_reg.flags & MLX4_PTYS_AN_DISABLE_ADMIN ? + AUTONEG_DISABLE : AUTONEG_ENABLE; + + if (link_ksettings->base.autoneg == AUTONEG_DISABLE) { + proto_admin = speed_set_ptys_admin(priv, speed, + ptys_reg.eth_proto_cap); + if ((be32_to_cpu(proto_admin) & + (MLX4_PROT_MASK(MLX4_1000BASE_CX_SGMII) | + MLX4_PROT_MASK(MLX4_1000BASE_KX))) && + (ptys_reg.flags & MLX4_PTYS_AN_DISABLE_CAP)) + ptys_reg.flags |= MLX4_PTYS_AN_DISABLE_ADMIN; + } else { + proto_admin = cpu_to_be32(ptys_adv); + ptys_reg.flags &= ~MLX4_PTYS_AN_DISABLE_ADMIN; + } + + proto_admin &= ptys_reg.eth_proto_cap; + if (!proto_admin) { + en_warn(priv, "Not supported link mode(s) requested, check supported link modes.\n"); + return -EINVAL; /* nothing to change due to bad input */ + } + + if ((proto_admin == ptys_reg.eth_proto_admin) && + ((ptys_reg.flags & MLX4_PTYS_AN_DISABLE_CAP) && + (link_ksettings->base.autoneg == cur_autoneg))) + return 0; /* Nothing to change */ + + en_dbg(DRV, priv, "mlx4_ACCESS_PTYS_REG SET: ptys_reg.eth_proto_admin = 0x%x\n", + be32_to_cpu(proto_admin)); + + ptys_reg.eth_proto_admin = proto_admin; + ret = mlx4_ACCESS_PTYS_REG(priv->mdev->dev, MLX4_ACCESS_REG_WRITE, + &ptys_reg); + if (ret) { + en_warn(priv, "Failed to write mlx4_ACCESS_PTYS_REG eth_proto_admin(0x%x) status(0x%x)", + be32_to_cpu(ptys_reg.eth_proto_admin), ret); + return ret; + } + + mutex_lock(&priv->mdev->state_lock); + if (priv->port_up) { + en_warn(priv, "Port link mode changed, restarting port...\n"); + mlx4_en_stop_port(dev, 1); + if (mlx4_en_start_port(dev)) + en_err(priv, "Failed restarting port %d\n", priv->port); + } + mutex_unlock(&priv->mdev->state_lock); + return 0; +} + +static int mlx4_en_get_coalesce(struct net_device *dev, + struct ethtool_coalesce *coal, + struct kernel_ethtool_coalesce *kernel_coal, + struct netlink_ext_ack *extack) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + + coal->tx_coalesce_usecs = priv->tx_usecs; + coal->tx_max_coalesced_frames = priv->tx_frames; + coal->tx_max_coalesced_frames_irq = priv->tx_work_limit; + + coal->rx_coalesce_usecs = priv->rx_usecs; + coal->rx_max_coalesced_frames = priv->rx_frames; + + coal->pkt_rate_low = priv->pkt_rate_low; + coal->rx_coalesce_usecs_low = priv->rx_usecs_low; + coal->pkt_rate_high = priv->pkt_rate_high; + coal->rx_coalesce_usecs_high = priv->rx_usecs_high; + coal->rate_sample_interval = priv->sample_interval; + coal->use_adaptive_rx_coalesce = priv->adaptive_rx_coal; + + return 0; +} + +static int mlx4_en_set_coalesce(struct net_device *dev, + struct ethtool_coalesce *coal, + struct kernel_ethtool_coalesce *kernel_coal, + struct netlink_ext_ack *extack) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + + if (!coal->tx_max_coalesced_frames_irq) + return -EINVAL; + + if (coal->tx_coalesce_usecs > MLX4_EN_MAX_COAL_TIME || + coal->rx_coalesce_usecs > MLX4_EN_MAX_COAL_TIME || + coal->rx_coalesce_usecs_low > MLX4_EN_MAX_COAL_TIME || + coal->rx_coalesce_usecs_high > MLX4_EN_MAX_COAL_TIME) { + netdev_info(dev, "%s: maximum coalesce time supported is %d usecs\n", + __func__, MLX4_EN_MAX_COAL_TIME); + return -ERANGE; + } + + if (coal->tx_max_coalesced_frames > MLX4_EN_MAX_COAL_PKTS || + coal->rx_max_coalesced_frames > MLX4_EN_MAX_COAL_PKTS) { + netdev_info(dev, "%s: maximum coalesced frames supported is %d\n", + __func__, MLX4_EN_MAX_COAL_PKTS); + return -ERANGE; + } + + priv->rx_frames = (coal->rx_max_coalesced_frames == + MLX4_EN_AUTO_CONF) ? + MLX4_EN_RX_COAL_TARGET : + coal->rx_max_coalesced_frames; + priv->rx_usecs = (coal->rx_coalesce_usecs == + MLX4_EN_AUTO_CONF) ? + MLX4_EN_RX_COAL_TIME : + coal->rx_coalesce_usecs; + + /* Setting TX coalescing parameters */ + if (coal->tx_coalesce_usecs != priv->tx_usecs || + coal->tx_max_coalesced_frames != priv->tx_frames) { + priv->tx_usecs = coal->tx_coalesce_usecs; + priv->tx_frames = coal->tx_max_coalesced_frames; + } + + /* Set adaptive coalescing params */ + priv->pkt_rate_low = coal->pkt_rate_low; + priv->rx_usecs_low = coal->rx_coalesce_usecs_low; + priv->pkt_rate_high = coal->pkt_rate_high; + priv->rx_usecs_high = coal->rx_coalesce_usecs_high; + priv->sample_interval = coal->rate_sample_interval; + priv->adaptive_rx_coal = coal->use_adaptive_rx_coalesce; + priv->tx_work_limit = coal->tx_max_coalesced_frames_irq; + + return mlx4_en_moderation_update(priv); +} + +static int mlx4_en_set_pauseparam(struct net_device *dev, + struct ethtool_pauseparam *pause) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + struct mlx4_en_dev *mdev = priv->mdev; + u8 tx_pause, tx_ppp, rx_pause, rx_ppp; + int err; + + if (pause->autoneg) + return -EINVAL; + + tx_pause = !!(pause->tx_pause); + rx_pause = !!(pause->rx_pause); + rx_ppp = (tx_pause || rx_pause) ? 0 : priv->prof->rx_ppp; + tx_ppp = (tx_pause || rx_pause) ? 0 : priv->prof->tx_ppp; + + err = mlx4_SET_PORT_general(mdev->dev, priv->port, + priv->rx_skb_size + ETH_FCS_LEN, + tx_pause, tx_ppp, rx_pause, rx_ppp); + if (err) { + en_err(priv, "Failed setting pause params, err = %d\n", err); + return err; + } + + mlx4_en_update_pfc_stats_bitmap(mdev->dev, &priv->stats_bitmap, + rx_ppp, rx_pause, tx_ppp, tx_pause); + + priv->prof->tx_pause = tx_pause; + priv->prof->rx_pause = rx_pause; + priv->prof->tx_ppp = tx_ppp; + priv->prof->rx_ppp = rx_ppp; + + return err; +} + +static void mlx4_en_get_pause_stats(struct net_device *dev, + struct ethtool_pause_stats *stats) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + struct bitmap_iterator it; + + bitmap_iterator_init(&it, priv->stats_bitmap.bitmap, NUM_ALL_STATS); + + spin_lock_bh(&priv->stats_lock); + if (test_bit(FLOW_PRIORITY_STATS_IDX_TX_FRAMES, + priv->stats_bitmap.bitmap)) + stats->tx_pause_frames = priv->tx_flowstats.tx_pause; + if (test_bit(FLOW_PRIORITY_STATS_IDX_RX_FRAMES, + priv->stats_bitmap.bitmap)) + stats->rx_pause_frames = priv->rx_flowstats.rx_pause; + spin_unlock_bh(&priv->stats_lock); +} + +static void mlx4_en_get_pauseparam(struct net_device *dev, + struct ethtool_pauseparam *pause) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + + pause->tx_pause = priv->prof->tx_pause; + pause->rx_pause = priv->prof->rx_pause; +} + +static int mlx4_en_set_ringparam(struct net_device *dev, + struct ethtool_ringparam *param, + struct kernel_ethtool_ringparam *kernel_param, + struct netlink_ext_ack *extack) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + struct mlx4_en_dev *mdev = priv->mdev; + struct mlx4_en_port_profile new_prof; + struct mlx4_en_priv *tmp; + u32 rx_size, tx_size; + int port_up = 0; + int err = 0; + + if (param->rx_jumbo_pending || param->rx_mini_pending) + return -EINVAL; + + if (param->rx_pending < MLX4_EN_MIN_RX_SIZE) { + en_warn(priv, "%s: rx_pending (%d) < min (%d)\n", + __func__, param->rx_pending, + MLX4_EN_MIN_RX_SIZE); + return -EINVAL; + } + if (param->tx_pending < MLX4_EN_MIN_TX_SIZE) { + en_warn(priv, "%s: tx_pending (%d) < min (%lu)\n", + __func__, param->tx_pending, + MLX4_EN_MIN_TX_SIZE); + return -EINVAL; + } + + rx_size = roundup_pow_of_two(param->rx_pending); + tx_size = roundup_pow_of_two(param->tx_pending); + + if (rx_size == (priv->port_up ? priv->rx_ring[0]->actual_size : + priv->rx_ring[0]->size) && + tx_size == priv->tx_ring[TX][0]->size) + return 0; + + tmp = kzalloc(sizeof(*tmp), GFP_KERNEL); + if (!tmp) + return -ENOMEM; + + mutex_lock(&mdev->state_lock); + memcpy(&new_prof, priv->prof, sizeof(struct mlx4_en_port_profile)); + new_prof.tx_ring_size = tx_size; + new_prof.rx_ring_size = rx_size; + err = mlx4_en_try_alloc_resources(priv, tmp, &new_prof, true); + if (err) + goto out; + + if (priv->port_up) { + port_up = 1; + mlx4_en_stop_port(dev, 1); + } + + mlx4_en_safe_replace_resources(priv, tmp); + + if (port_up) { + err = mlx4_en_start_port(dev); + if (err) + en_err(priv, "Failed starting port\n"); + } + + err = mlx4_en_moderation_update(priv); +out: + kfree(tmp); + mutex_unlock(&mdev->state_lock); + return err; +} + +static void mlx4_en_get_ringparam(struct net_device *dev, + struct ethtool_ringparam *param, + struct kernel_ethtool_ringparam *kernel_param, + struct netlink_ext_ack *extack) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + + memset(param, 0, sizeof(*param)); + param->rx_max_pending = MLX4_EN_MAX_RX_SIZE; + param->tx_max_pending = MLX4_EN_MAX_TX_SIZE; + param->rx_pending = priv->port_up ? + priv->rx_ring[0]->actual_size : priv->rx_ring[0]->size; + param->tx_pending = priv->tx_ring[TX][0]->size; +} + +static u32 mlx4_en_get_rxfh_indir_size(struct net_device *dev) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + + return rounddown_pow_of_two(priv->rx_ring_num); +} + +static u32 mlx4_en_get_rxfh_key_size(struct net_device *netdev) +{ + return MLX4_EN_RSS_KEY_SIZE; +} + +static int mlx4_en_check_rxfh_func(struct net_device *dev, u8 hfunc) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + + /* check if requested function is supported by the device */ + if (hfunc == ETH_RSS_HASH_TOP) { + if (!(priv->mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_RSS_TOP)) + return -EINVAL; + if (!(dev->features & NETIF_F_RXHASH)) + en_warn(priv, "Toeplitz hash function should be used in conjunction with RX hashing for optimal performance\n"); + return 0; + } else if (hfunc == ETH_RSS_HASH_XOR) { + if (!(priv->mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_RSS_XOR)) + return -EINVAL; + if (dev->features & NETIF_F_RXHASH) + en_warn(priv, "Enabling both XOR Hash function and RX Hashing can limit RPS functionality\n"); + return 0; + } + + return -EINVAL; +} + +static int mlx4_en_get_rxfh(struct net_device *dev, u32 *ring_index, u8 *key, + u8 *hfunc) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + u32 n = mlx4_en_get_rxfh_indir_size(dev); + u32 i, rss_rings; + + rss_rings = priv->prof->rss_rings ?: n; + rss_rings = rounddown_pow_of_two(rss_rings); + + for (i = 0; i < n; i++) { + if (!ring_index) + break; + ring_index[i] = i % rss_rings; + } + if (key) + memcpy(key, priv->rss_key, MLX4_EN_RSS_KEY_SIZE); + if (hfunc) + *hfunc = priv->rss_hash_fn; + return 0; +} + +static int mlx4_en_set_rxfh(struct net_device *dev, const u32 *ring_index, + const u8 *key, const u8 hfunc) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + u32 n = mlx4_en_get_rxfh_indir_size(dev); + struct mlx4_en_dev *mdev = priv->mdev; + int port_up = 0; + int err = 0; + int i; + int rss_rings = 0; + + /* Calculate RSS table size and make sure flows are spread evenly + * between rings + */ + for (i = 0; i < n; i++) { + if (!ring_index) + break; + if (i > 0 && !ring_index[i] && !rss_rings) + rss_rings = i; + + if (ring_index[i] != (i % (rss_rings ?: n))) + return -EINVAL; + } + + if (!rss_rings) + rss_rings = n; + + /* RSS table size must be an order of 2 */ + if (!is_power_of_2(rss_rings)) + return -EINVAL; + + if (hfunc != ETH_RSS_HASH_NO_CHANGE) { + err = mlx4_en_check_rxfh_func(dev, hfunc); + if (err) + return err; + } + + mutex_lock(&mdev->state_lock); + if (priv->port_up) { + port_up = 1; + mlx4_en_stop_port(dev, 1); + } + + if (ring_index) + priv->prof->rss_rings = rss_rings; + if (key) + memcpy(priv->rss_key, key, MLX4_EN_RSS_KEY_SIZE); + if (hfunc != ETH_RSS_HASH_NO_CHANGE) + priv->rss_hash_fn = hfunc; + + if (port_up) { + err = mlx4_en_start_port(dev); + if (err) + en_err(priv, "Failed starting port\n"); + } + + mutex_unlock(&mdev->state_lock); + return err; +} + +#define all_zeros_or_all_ones(field) \ + ((field) == 0 || (field) == (__force typeof(field))-1) + +static int mlx4_en_validate_flow(struct net_device *dev, + struct ethtool_rxnfc *cmd) +{ + struct ethtool_usrip4_spec *l3_mask; + struct ethtool_tcpip4_spec *l4_mask; + struct ethhdr *eth_mask; + + if (cmd->fs.location >= MAX_NUM_OF_FS_RULES) + return -EINVAL; + + if (cmd->fs.flow_type & FLOW_MAC_EXT) { + /* dest mac mask must be ff:ff:ff:ff:ff:ff */ + if (!is_broadcast_ether_addr(cmd->fs.m_ext.h_dest)) + return -EINVAL; + } + + switch (cmd->fs.flow_type & ~(FLOW_EXT | FLOW_MAC_EXT)) { + case TCP_V4_FLOW: + case UDP_V4_FLOW: + if (cmd->fs.m_u.tcp_ip4_spec.tos) + return -EINVAL; + l4_mask = &cmd->fs.m_u.tcp_ip4_spec; + /* don't allow mask which isn't all 0 or 1 */ + if (!all_zeros_or_all_ones(l4_mask->ip4src) || + !all_zeros_or_all_ones(l4_mask->ip4dst) || + !all_zeros_or_all_ones(l4_mask->psrc) || + !all_zeros_or_all_ones(l4_mask->pdst)) + return -EINVAL; + break; + case IP_USER_FLOW: + l3_mask = &cmd->fs.m_u.usr_ip4_spec; + if (l3_mask->l4_4_bytes || l3_mask->tos || l3_mask->proto || + cmd->fs.h_u.usr_ip4_spec.ip_ver != ETH_RX_NFC_IP4 || + (!l3_mask->ip4src && !l3_mask->ip4dst) || + !all_zeros_or_all_ones(l3_mask->ip4src) || + !all_zeros_or_all_ones(l3_mask->ip4dst)) + return -EINVAL; + break; + case ETHER_FLOW: + eth_mask = &cmd->fs.m_u.ether_spec; + /* source mac mask must not be set */ + if (!is_zero_ether_addr(eth_mask->h_source)) + return -EINVAL; + + /* dest mac mask must be ff:ff:ff:ff:ff:ff */ + if (!is_broadcast_ether_addr(eth_mask->h_dest)) + return -EINVAL; + + if (!all_zeros_or_all_ones(eth_mask->h_proto)) + return -EINVAL; + break; + default: + return -EINVAL; + } + + if ((cmd->fs.flow_type & FLOW_EXT)) { + if (cmd->fs.m_ext.vlan_etype || + !((cmd->fs.m_ext.vlan_tci & cpu_to_be16(VLAN_VID_MASK)) == + 0 || + (cmd->fs.m_ext.vlan_tci & cpu_to_be16(VLAN_VID_MASK)) == + cpu_to_be16(VLAN_VID_MASK))) + return -EINVAL; + + if (cmd->fs.m_ext.vlan_tci) { + if (be16_to_cpu(cmd->fs.h_ext.vlan_tci) >= VLAN_N_VID) + return -EINVAL; + + } + } + + return 0; +} + +static int mlx4_en_ethtool_add_mac_rule(struct ethtool_rxnfc *cmd, + struct list_head *rule_list_h, + struct mlx4_spec_list *spec_l2, + unsigned char *mac) +{ + __be64 mac_msk = cpu_to_be64(MLX4_MAC_MASK << 16); + + spec_l2->id = MLX4_NET_TRANS_RULE_ID_ETH; + memcpy(spec_l2->eth.dst_mac_msk, &mac_msk, ETH_ALEN); + memcpy(spec_l2->eth.dst_mac, mac, ETH_ALEN); + + if ((cmd->fs.flow_type & FLOW_EXT) && + (cmd->fs.m_ext.vlan_tci & cpu_to_be16(VLAN_VID_MASK))) { + spec_l2->eth.vlan_id = cmd->fs.h_ext.vlan_tci; + spec_l2->eth.vlan_id_msk = cpu_to_be16(VLAN_VID_MASK); + } + + list_add_tail(&spec_l2->list, rule_list_h); + + return 0; +} + +static int mlx4_en_ethtool_add_mac_rule_by_ipv4(struct mlx4_en_priv *priv, + struct ethtool_rxnfc *cmd, + struct list_head *rule_list_h, + struct mlx4_spec_list *spec_l2, + __be32 ipv4_dst) +{ +#ifdef CONFIG_INET + unsigned char mac[ETH_ALEN]; + + if (!ipv4_is_multicast(ipv4_dst)) { + if (cmd->fs.flow_type & FLOW_MAC_EXT) + memcpy(&mac, cmd->fs.h_ext.h_dest, ETH_ALEN); + else + memcpy(&mac, priv->dev->dev_addr, ETH_ALEN); + } else { + ip_eth_mc_map(ipv4_dst, mac); + } + + return mlx4_en_ethtool_add_mac_rule(cmd, rule_list_h, spec_l2, &mac[0]); +#else + return -EINVAL; +#endif +} + +static int add_ip_rule(struct mlx4_en_priv *priv, + struct ethtool_rxnfc *cmd, + struct list_head *list_h) +{ + int err; + struct mlx4_spec_list *spec_l2 = NULL; + struct mlx4_spec_list *spec_l3 = NULL; + struct ethtool_usrip4_spec *l3_mask = &cmd->fs.m_u.usr_ip4_spec; + + spec_l3 = kzalloc(sizeof(*spec_l3), GFP_KERNEL); + spec_l2 = kzalloc(sizeof(*spec_l2), GFP_KERNEL); + if (!spec_l2 || !spec_l3) { + err = -ENOMEM; + goto free_spec; + } + + err = mlx4_en_ethtool_add_mac_rule_by_ipv4(priv, cmd, list_h, spec_l2, + cmd->fs.h_u. + usr_ip4_spec.ip4dst); + if (err) + goto free_spec; + spec_l3->id = MLX4_NET_TRANS_RULE_ID_IPV4; + spec_l3->ipv4.src_ip = cmd->fs.h_u.usr_ip4_spec.ip4src; + if (l3_mask->ip4src) + spec_l3->ipv4.src_ip_msk = EN_ETHTOOL_WORD_MASK; + spec_l3->ipv4.dst_ip = cmd->fs.h_u.usr_ip4_spec.ip4dst; + if (l3_mask->ip4dst) + spec_l3->ipv4.dst_ip_msk = EN_ETHTOOL_WORD_MASK; + list_add_tail(&spec_l3->list, list_h); + + return 0; + +free_spec: + kfree(spec_l2); + kfree(spec_l3); + return err; +} + +static int add_tcp_udp_rule(struct mlx4_en_priv *priv, + struct ethtool_rxnfc *cmd, + struct list_head *list_h, int proto) +{ + int err; + struct mlx4_spec_list *spec_l2 = NULL; + struct mlx4_spec_list *spec_l3 = NULL; + struct mlx4_spec_list *spec_l4 = NULL; + struct ethtool_tcpip4_spec *l4_mask = &cmd->fs.m_u.tcp_ip4_spec; + + spec_l2 = kzalloc(sizeof(*spec_l2), GFP_KERNEL); + spec_l3 = kzalloc(sizeof(*spec_l3), GFP_KERNEL); + spec_l4 = kzalloc(sizeof(*spec_l4), GFP_KERNEL); + if (!spec_l2 || !spec_l3 || !spec_l4) { + err = -ENOMEM; + goto free_spec; + } + + spec_l3->id = MLX4_NET_TRANS_RULE_ID_IPV4; + + if (proto == TCP_V4_FLOW) { + err = mlx4_en_ethtool_add_mac_rule_by_ipv4(priv, cmd, list_h, + spec_l2, + cmd->fs.h_u. + tcp_ip4_spec.ip4dst); + if (err) + goto free_spec; + spec_l4->id = MLX4_NET_TRANS_RULE_ID_TCP; + spec_l3->ipv4.src_ip = cmd->fs.h_u.tcp_ip4_spec.ip4src; + spec_l3->ipv4.dst_ip = cmd->fs.h_u.tcp_ip4_spec.ip4dst; + spec_l4->tcp_udp.src_port = cmd->fs.h_u.tcp_ip4_spec.psrc; + spec_l4->tcp_udp.dst_port = cmd->fs.h_u.tcp_ip4_spec.pdst; + } else { + err = mlx4_en_ethtool_add_mac_rule_by_ipv4(priv, cmd, list_h, + spec_l2, + cmd->fs.h_u. + udp_ip4_spec.ip4dst); + if (err) + goto free_spec; + spec_l4->id = MLX4_NET_TRANS_RULE_ID_UDP; + spec_l3->ipv4.src_ip = cmd->fs.h_u.udp_ip4_spec.ip4src; + spec_l3->ipv4.dst_ip = cmd->fs.h_u.udp_ip4_spec.ip4dst; + spec_l4->tcp_udp.src_port = cmd->fs.h_u.udp_ip4_spec.psrc; + spec_l4->tcp_udp.dst_port = cmd->fs.h_u.udp_ip4_spec.pdst; + } + + if (l4_mask->ip4src) + spec_l3->ipv4.src_ip_msk = EN_ETHTOOL_WORD_MASK; + if (l4_mask->ip4dst) + spec_l3->ipv4.dst_ip_msk = EN_ETHTOOL_WORD_MASK; + + if (l4_mask->psrc) + spec_l4->tcp_udp.src_port_msk = EN_ETHTOOL_SHORT_MASK; + if (l4_mask->pdst) + spec_l4->tcp_udp.dst_port_msk = EN_ETHTOOL_SHORT_MASK; + + list_add_tail(&spec_l3->list, list_h); + list_add_tail(&spec_l4->list, list_h); + + return 0; + +free_spec: + kfree(spec_l2); + kfree(spec_l3); + kfree(spec_l4); + return err; +} + +static int mlx4_en_ethtool_to_net_trans_rule(struct net_device *dev, + struct ethtool_rxnfc *cmd, + struct list_head *rule_list_h) +{ + int err; + struct ethhdr *eth_spec; + struct mlx4_spec_list *spec_l2; + struct mlx4_en_priv *priv = netdev_priv(dev); + + err = mlx4_en_validate_flow(dev, cmd); + if (err) + return err; + + switch (cmd->fs.flow_type & ~(FLOW_EXT | FLOW_MAC_EXT)) { + case ETHER_FLOW: + spec_l2 = kzalloc(sizeof(*spec_l2), GFP_KERNEL); + if (!spec_l2) + return -ENOMEM; + + eth_spec = &cmd->fs.h_u.ether_spec; + mlx4_en_ethtool_add_mac_rule(cmd, rule_list_h, spec_l2, + ð_spec->h_dest[0]); + spec_l2->eth.ether_type = eth_spec->h_proto; + if (eth_spec->h_proto) + spec_l2->eth.ether_type_enable = 1; + break; + case IP_USER_FLOW: + err = add_ip_rule(priv, cmd, rule_list_h); + break; + case TCP_V4_FLOW: + err = add_tcp_udp_rule(priv, cmd, rule_list_h, TCP_V4_FLOW); + break; + case UDP_V4_FLOW: + err = add_tcp_udp_rule(priv, cmd, rule_list_h, UDP_V4_FLOW); + break; + } + + return err; +} + +static int mlx4_en_flow_replace(struct net_device *dev, + struct ethtool_rxnfc *cmd) +{ + int err; + struct mlx4_en_priv *priv = netdev_priv(dev); + struct ethtool_flow_id *loc_rule; + struct mlx4_spec_list *spec, *tmp_spec; + u32 qpn; + u64 reg_id; + + struct mlx4_net_trans_rule rule = { + .queue_mode = MLX4_NET_TRANS_Q_FIFO, + .exclusive = 0, + .allow_loopback = 1, + .promisc_mode = MLX4_FS_REGULAR, + }; + + rule.port = priv->port; + rule.priority = MLX4_DOMAIN_ETHTOOL | cmd->fs.location; + INIT_LIST_HEAD(&rule.list); + + /* Allow direct QP attaches if the EN_ETHTOOL_QP_ATTACH flag is set */ + if (cmd->fs.ring_cookie == RX_CLS_FLOW_DISC) + qpn = priv->drop_qp.qpn; + else if (cmd->fs.ring_cookie & EN_ETHTOOL_QP_ATTACH) { + qpn = cmd->fs.ring_cookie & (EN_ETHTOOL_QP_ATTACH - 1); + } else { + if (cmd->fs.ring_cookie >= priv->rx_ring_num) { + en_warn(priv, "rxnfc: RX ring (%llu) doesn't exist\n", + cmd->fs.ring_cookie); + return -EINVAL; + } + qpn = priv->rss_map.qps[cmd->fs.ring_cookie].qpn; + if (!qpn) { + en_warn(priv, "rxnfc: RX ring (%llu) is inactive\n", + cmd->fs.ring_cookie); + return -EINVAL; + } + } + rule.qpn = qpn; + err = mlx4_en_ethtool_to_net_trans_rule(dev, cmd, &rule.list); + if (err) + goto out_free_list; + + loc_rule = &priv->ethtool_rules[cmd->fs.location]; + if (loc_rule->id) { + err = mlx4_flow_detach(priv->mdev->dev, loc_rule->id); + if (err) { + en_err(priv, "Fail to detach network rule at location %d. registration id = %llx\n", + cmd->fs.location, loc_rule->id); + goto out_free_list; + } + loc_rule->id = 0; + memset(&loc_rule->flow_spec, 0, + sizeof(struct ethtool_rx_flow_spec)); + list_del(&loc_rule->list); + } + err = mlx4_flow_attach(priv->mdev->dev, &rule, ®_id); + if (err) { + en_err(priv, "Fail to attach network rule at location %d\n", + cmd->fs.location); + goto out_free_list; + } + loc_rule->id = reg_id; + memcpy(&loc_rule->flow_spec, &cmd->fs, + sizeof(struct ethtool_rx_flow_spec)); + list_add_tail(&loc_rule->list, &priv->ethtool_list); + +out_free_list: + list_for_each_entry_safe(spec, tmp_spec, &rule.list, list) { + list_del(&spec->list); + kfree(spec); + } + return err; +} + +static int mlx4_en_flow_detach(struct net_device *dev, + struct ethtool_rxnfc *cmd) +{ + int err = 0; + struct ethtool_flow_id *rule; + struct mlx4_en_priv *priv = netdev_priv(dev); + + if (cmd->fs.location >= MAX_NUM_OF_FS_RULES) + return -EINVAL; + + rule = &priv->ethtool_rules[cmd->fs.location]; + if (!rule->id) { + err = -ENOENT; + goto out; + } + + err = mlx4_flow_detach(priv->mdev->dev, rule->id); + if (err) { + en_err(priv, "Fail to detach network rule at location %d. registration id = 0x%llx\n", + cmd->fs.location, rule->id); + goto out; + } + rule->id = 0; + memset(&rule->flow_spec, 0, sizeof(struct ethtool_rx_flow_spec)); + list_del(&rule->list); +out: + return err; + +} + +static int mlx4_en_get_flow(struct net_device *dev, struct ethtool_rxnfc *cmd, + int loc) +{ + int err = 0; + struct ethtool_flow_id *rule; + struct mlx4_en_priv *priv = netdev_priv(dev); + + if (loc < 0 || loc >= MAX_NUM_OF_FS_RULES) + return -EINVAL; + + rule = &priv->ethtool_rules[loc]; + if (rule->id) + memcpy(&cmd->fs, &rule->flow_spec, + sizeof(struct ethtool_rx_flow_spec)); + else + err = -ENOENT; + + return err; +} + +static int mlx4_en_get_num_flows(struct mlx4_en_priv *priv) +{ + + int i, res = 0; + for (i = 0; i < MAX_NUM_OF_FS_RULES; i++) { + if (priv->ethtool_rules[i].id) + res++; + } + return res; + +} + +static int mlx4_en_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd, + u32 *rule_locs) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + struct mlx4_en_dev *mdev = priv->mdev; + int err = 0; + int i = 0, priority = 0; + + if ((cmd->cmd == ETHTOOL_GRXCLSRLCNT || + cmd->cmd == ETHTOOL_GRXCLSRULE || + cmd->cmd == ETHTOOL_GRXCLSRLALL) && + (mdev->dev->caps.steering_mode != + MLX4_STEERING_MODE_DEVICE_MANAGED || !priv->port_up)) + return -EINVAL; + + switch (cmd->cmd) { + case ETHTOOL_GRXRINGS: + cmd->data = priv->rx_ring_num; + break; + case ETHTOOL_GRXCLSRLCNT: + cmd->rule_cnt = mlx4_en_get_num_flows(priv); + break; + case ETHTOOL_GRXCLSRULE: + err = mlx4_en_get_flow(dev, cmd, cmd->fs.location); + break; + case ETHTOOL_GRXCLSRLALL: + cmd->data = MAX_NUM_OF_FS_RULES; + while ((!err || err == -ENOENT) && priority < cmd->rule_cnt) { + err = mlx4_en_get_flow(dev, cmd, i); + if (!err) + rule_locs[priority++] = i; + i++; + } + err = 0; + break; + default: + err = -EOPNOTSUPP; + break; + } + + return err; +} + +static int mlx4_en_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd) +{ + int err = 0; + struct mlx4_en_priv *priv = netdev_priv(dev); + struct mlx4_en_dev *mdev = priv->mdev; + + if (mdev->dev->caps.steering_mode != + MLX4_STEERING_MODE_DEVICE_MANAGED || !priv->port_up) + return -EINVAL; + + switch (cmd->cmd) { + case ETHTOOL_SRXCLSRLINS: + err = mlx4_en_flow_replace(dev, cmd); + break; + case ETHTOOL_SRXCLSRLDEL: + err = mlx4_en_flow_detach(dev, cmd); + break; + default: + en_warn(priv, "Unsupported ethtool command. (%d)\n", cmd->cmd); + return -EINVAL; + } + + return err; +} + +static int mlx4_en_get_max_num_rx_rings(struct net_device *dev) +{ + return min_t(int, num_online_cpus(), MAX_RX_RINGS); +} + +static void mlx4_en_get_channels(struct net_device *dev, + struct ethtool_channels *channel) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + + channel->max_rx = mlx4_en_get_max_num_rx_rings(dev); + channel->max_tx = priv->mdev->profile.max_num_tx_rings_p_up; + + channel->rx_count = priv->rx_ring_num; + channel->tx_count = priv->tx_ring_num[TX] / + priv->prof->num_up; +} + +static int mlx4_en_set_channels(struct net_device *dev, + struct ethtool_channels *channel) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + struct mlx4_en_dev *mdev = priv->mdev; + struct mlx4_en_port_profile new_prof; + struct mlx4_en_priv *tmp; + int total_tx_count; + int port_up = 0; + int xdp_count; + int err = 0; + u8 up; + + if (!channel->tx_count || !channel->rx_count) + return -EINVAL; + + tmp = kzalloc(sizeof(*tmp), GFP_KERNEL); + if (!tmp) + return -ENOMEM; + + mutex_lock(&mdev->state_lock); + xdp_count = priv->tx_ring_num[TX_XDP] ? channel->rx_count : 0; + total_tx_count = channel->tx_count * priv->prof->num_up + xdp_count; + if (total_tx_count > MAX_TX_RINGS) { + err = -EINVAL; + en_err(priv, + "Total number of TX and XDP rings (%d) exceeds the maximum supported (%d)\n", + total_tx_count, MAX_TX_RINGS); + goto out; + } + + memcpy(&new_prof, priv->prof, sizeof(struct mlx4_en_port_profile)); + new_prof.num_tx_rings_p_up = channel->tx_count; + new_prof.tx_ring_num[TX] = channel->tx_count * priv->prof->num_up; + new_prof.tx_ring_num[TX_XDP] = xdp_count; + new_prof.rx_ring_num = channel->rx_count; + + err = mlx4_en_try_alloc_resources(priv, tmp, &new_prof, true); + if (err) + goto out; + + if (priv->port_up) { + port_up = 1; + mlx4_en_stop_port(dev, 1); + } + + mlx4_en_safe_replace_resources(priv, tmp); + + netif_set_real_num_rx_queues(dev, priv->rx_ring_num); + + up = (priv->prof->num_up == MLX4_EN_NUM_UP_LOW) ? + 0 : priv->prof->num_up; + mlx4_en_setup_tc(dev, up); + + en_warn(priv, "Using %d TX rings\n", priv->tx_ring_num[TX]); + en_warn(priv, "Using %d RX rings\n", priv->rx_ring_num); + + if (port_up) { + err = mlx4_en_start_port(dev); + if (err) + en_err(priv, "Failed starting port\n"); + } + + err = mlx4_en_moderation_update(priv); +out: + mutex_unlock(&mdev->state_lock); + kfree(tmp); + return err; +} + +static int mlx4_en_get_ts_info(struct net_device *dev, + struct ethtool_ts_info *info) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + struct mlx4_en_dev *mdev = priv->mdev; + int ret; + + ret = ethtool_op_get_ts_info(dev, info); + if (ret) + return ret; + + if (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_TS) { + info->so_timestamping |= + SOF_TIMESTAMPING_TX_HARDWARE | + SOF_TIMESTAMPING_RX_HARDWARE | + SOF_TIMESTAMPING_RAW_HARDWARE; + + info->tx_types = + (1 << HWTSTAMP_TX_OFF) | + (1 << HWTSTAMP_TX_ON); + + info->rx_filters = + (1 << HWTSTAMP_FILTER_NONE) | + (1 << HWTSTAMP_FILTER_ALL); + + if (mdev->ptp_clock) + info->phc_index = ptp_clock_index(mdev->ptp_clock); + } + + return ret; +} + +static int mlx4_en_set_priv_flags(struct net_device *dev, u32 flags) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + struct mlx4_en_dev *mdev = priv->mdev; + bool bf_enabled_new = !!(flags & MLX4_EN_PRIV_FLAGS_BLUEFLAME); + bool bf_enabled_old = !!(priv->pflags & MLX4_EN_PRIV_FLAGS_BLUEFLAME); + bool phv_enabled_new = !!(flags & MLX4_EN_PRIV_FLAGS_PHV); + bool phv_enabled_old = !!(priv->pflags & MLX4_EN_PRIV_FLAGS_PHV); + int i; + int ret = 0; + + if (bf_enabled_new != bf_enabled_old) { + int t; + + if (bf_enabled_new) { + bool bf_supported = true; + + for (t = 0; t < MLX4_EN_NUM_TX_TYPES; t++) + for (i = 0; i < priv->tx_ring_num[t]; i++) + bf_supported &= + priv->tx_ring[t][i]->bf_alloced; + + if (!bf_supported) { + en_err(priv, "BlueFlame is not supported\n"); + return -EINVAL; + } + + priv->pflags |= MLX4_EN_PRIV_FLAGS_BLUEFLAME; + } else { + priv->pflags &= ~MLX4_EN_PRIV_FLAGS_BLUEFLAME; + } + + for (t = 0; t < MLX4_EN_NUM_TX_TYPES; t++) + for (i = 0; i < priv->tx_ring_num[t]; i++) + priv->tx_ring[t][i]->bf_enabled = + bf_enabled_new; + + en_info(priv, "BlueFlame %s\n", + bf_enabled_new ? "Enabled" : "Disabled"); + } + + if (phv_enabled_new != phv_enabled_old) { + ret = set_phv_bit(mdev->dev, priv->port, (int)phv_enabled_new); + if (ret) + return ret; + else if (phv_enabled_new) + priv->pflags |= MLX4_EN_PRIV_FLAGS_PHV; + else + priv->pflags &= ~MLX4_EN_PRIV_FLAGS_PHV; + en_info(priv, "PHV bit %s\n", + phv_enabled_new ? "Enabled" : "Disabled"); + } + return 0; +} + +static u32 mlx4_en_get_priv_flags(struct net_device *dev) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + + return priv->pflags; +} + +static int mlx4_en_get_tunable(struct net_device *dev, + const struct ethtool_tunable *tuna, + void *data) +{ + const struct mlx4_en_priv *priv = netdev_priv(dev); + int ret = 0; + + switch (tuna->id) { + case ETHTOOL_TX_COPYBREAK: + *(u32 *)data = priv->prof->inline_thold; + break; + default: + ret = -EINVAL; + break; + } + + return ret; +} + +static int mlx4_en_set_tunable(struct net_device *dev, + const struct ethtool_tunable *tuna, + const void *data) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + int val, ret = 0; + + switch (tuna->id) { + case ETHTOOL_TX_COPYBREAK: + val = *(u32 *)data; + if (val < MIN_PKT_LEN || val > MAX_INLINE) + ret = -EINVAL; + else + priv->prof->inline_thold = val; + break; + default: + ret = -EINVAL; + break; + } + + return ret; +} + +static int mlx4_en_get_module_info(struct net_device *dev, + struct ethtool_modinfo *modinfo) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + struct mlx4_en_dev *mdev = priv->mdev; + int ret; + u8 data[4]; + + /* Read first 2 bytes to get Module & REV ID */ + ret = mlx4_get_module_info(mdev->dev, priv->port, + 0/*offset*/, 2/*size*/, data); + if (ret < 2) + return -EIO; + + switch (data[0] /* identifier */) { + case MLX4_MODULE_ID_QSFP: + modinfo->type = ETH_MODULE_SFF_8436; + modinfo->eeprom_len = ETH_MODULE_SFF_8436_LEN; + break; + case MLX4_MODULE_ID_QSFP_PLUS: + if (data[1] >= 0x3) { /* revision id */ + modinfo->type = ETH_MODULE_SFF_8636; + modinfo->eeprom_len = ETH_MODULE_SFF_8636_LEN; + } else { + modinfo->type = ETH_MODULE_SFF_8436; + modinfo->eeprom_len = ETH_MODULE_SFF_8436_LEN; + } + break; + case MLX4_MODULE_ID_QSFP28: + modinfo->type = ETH_MODULE_SFF_8636; + modinfo->eeprom_len = ETH_MODULE_SFF_8636_LEN; + break; + case MLX4_MODULE_ID_SFP: + modinfo->type = ETH_MODULE_SFF_8472; + modinfo->eeprom_len = ETH_MODULE_SFF_8472_LEN; + break; + default: + return -EINVAL; + } + + return 0; +} + +static int mlx4_en_get_module_eeprom(struct net_device *dev, + struct ethtool_eeprom *ee, + u8 *data) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + struct mlx4_en_dev *mdev = priv->mdev; + int offset = ee->offset; + int i = 0, ret; + + if (ee->len == 0) + return -EINVAL; + + memset(data, 0, ee->len); + + while (i < ee->len) { + en_dbg(DRV, priv, + "mlx4_get_module_info i(%d) offset(%d) len(%d)\n", + i, offset, ee->len - i); + + ret = mlx4_get_module_info(mdev->dev, priv->port, + offset, ee->len - i, data + i); + + if (!ret) /* Done reading */ + return 0; + + if (ret < 0) { + en_err(priv, + "mlx4_get_module_info i(%d) offset(%d) bytes_to_read(%d) - FAILED (0x%x)\n", + i, offset, ee->len - i, ret); + return ret; + } + + i += ret; + offset += ret; + } + return 0; +} + +static int mlx4_en_set_phys_id(struct net_device *dev, + enum ethtool_phys_id_state state) +{ + int err; + u16 beacon_duration; + struct mlx4_en_priv *priv = netdev_priv(dev); + struct mlx4_en_dev *mdev = priv->mdev; + + if (!(mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_PORT_BEACON)) + return -EOPNOTSUPP; + + switch (state) { + case ETHTOOL_ID_ACTIVE: + beacon_duration = PORT_BEACON_MAX_LIMIT; + break; + case ETHTOOL_ID_INACTIVE: + beacon_duration = 0; + break; + default: + return -EOPNOTSUPP; + } + + err = mlx4_SET_PORT_BEACON(mdev->dev, priv->port, beacon_duration); + return err; +} + +const struct ethtool_ops mlx4_en_ethtool_ops = { + .supported_coalesce_params = ETHTOOL_COALESCE_USECS | + ETHTOOL_COALESCE_MAX_FRAMES | + ETHTOOL_COALESCE_TX_MAX_FRAMES_IRQ | + ETHTOOL_COALESCE_PKT_RATE_RX_USECS, + .get_drvinfo = mlx4_en_get_drvinfo, + .get_link_ksettings = mlx4_en_get_link_ksettings, + .set_link_ksettings = mlx4_en_set_link_ksettings, + .get_link = ethtool_op_get_link, + .get_strings = mlx4_en_get_strings, + .get_sset_count = mlx4_en_get_sset_count, + .get_ethtool_stats = mlx4_en_get_ethtool_stats, + .self_test = mlx4_en_self_test, + .set_phys_id = mlx4_en_set_phys_id, + .get_wol = mlx4_en_get_wol, + .set_wol = mlx4_en_set_wol, + .get_msglevel = mlx4_en_get_msglevel, + .set_msglevel = mlx4_en_set_msglevel, + .get_coalesce = mlx4_en_get_coalesce, + .set_coalesce = mlx4_en_set_coalesce, + .get_pause_stats = mlx4_en_get_pause_stats, + .get_pauseparam = mlx4_en_get_pauseparam, + .set_pauseparam = mlx4_en_set_pauseparam, + .get_ringparam = mlx4_en_get_ringparam, + .set_ringparam = mlx4_en_set_ringparam, + .get_rxnfc = mlx4_en_get_rxnfc, + .set_rxnfc = mlx4_en_set_rxnfc, + .get_rxfh_indir_size = mlx4_en_get_rxfh_indir_size, + .get_rxfh_key_size = mlx4_en_get_rxfh_key_size, + .get_rxfh = mlx4_en_get_rxfh, + .set_rxfh = mlx4_en_set_rxfh, + .get_channels = mlx4_en_get_channels, + .set_channels = mlx4_en_set_channels, + .get_ts_info = mlx4_en_get_ts_info, + .set_priv_flags = mlx4_en_set_priv_flags, + .get_priv_flags = mlx4_en_get_priv_flags, + .get_tunable = mlx4_en_get_tunable, + .set_tunable = mlx4_en_set_tunable, + .get_module_info = mlx4_en_get_module_info, + .get_module_eeprom = mlx4_en_get_module_eeprom +}; + + + + + diff --git a/drivers/net/ethernet/mellanox/mlx4/en_main.c b/drivers/net/ethernet/mellanox/mlx4/en_main.c new file mode 100644 index 000000000..f1259bdb1 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx4/en_main.c @@ -0,0 +1,398 @@ +/* + * Copyright (c) 2007 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#include <linux/cpumask.h> +#include <linux/module.h> +#include <linux/delay.h> +#include <linux/netdevice.h> +#include <linux/slab.h> + +#include <linux/mlx4/driver.h> +#include <linux/mlx4/device.h> +#include <linux/mlx4/cmd.h> + +#include "mlx4_en.h" + +MODULE_AUTHOR("Liran Liss, Yevgeny Petrilin"); +MODULE_DESCRIPTION("Mellanox ConnectX HCA Ethernet driver"); +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_VERSION(DRV_VERSION); + +static const char mlx4_en_version[] = + DRV_NAME ": Mellanox ConnectX HCA Ethernet driver v" + DRV_VERSION "\n"; + +#define MLX4_EN_PARM_INT(X, def_val, desc) \ + static unsigned int X = def_val;\ + module_param(X , uint, 0444); \ + MODULE_PARM_DESC(X, desc); + + +/* + * Device scope module parameters + */ + +/* Enable RSS UDP traffic */ +MLX4_EN_PARM_INT(udp_rss, 1, + "Enable RSS for incoming UDP traffic or disabled (0)"); + +/* Priority pausing */ +MLX4_EN_PARM_INT(pfctx, 0, "Priority based Flow Control policy on TX[7:0]." + " Per priority bit mask"); +MLX4_EN_PARM_INT(pfcrx, 0, "Priority based Flow Control policy on RX[7:0]." + " Per priority bit mask"); + +MLX4_EN_PARM_INT(inline_thold, MAX_INLINE, + "Threshold for using inline data (range: 17-104, default: 104)"); + +#define MAX_PFC_TX 0xff +#define MAX_PFC_RX 0xff + +void en_print(const char *level, const struct mlx4_en_priv *priv, + const char *format, ...) +{ + va_list args; + struct va_format vaf; + + va_start(args, format); + + vaf.fmt = format; + vaf.va = &args; + if (priv->registered) + printk("%s%s: %s: %pV", + level, DRV_NAME, priv->dev->name, &vaf); + else + printk("%s%s: %s: Port %d: %pV", + level, DRV_NAME, dev_name(&priv->mdev->pdev->dev), + priv->port, &vaf); + va_end(args); +} + +void mlx4_en_update_loopback_state(struct net_device *dev, + netdev_features_t features) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + + if (features & NETIF_F_LOOPBACK) + priv->ctrl_flags |= cpu_to_be32(MLX4_WQE_CTRL_FORCE_LOOPBACK); + else + priv->ctrl_flags &= cpu_to_be32(~MLX4_WQE_CTRL_FORCE_LOOPBACK); + + priv->flags &= ~(MLX4_EN_FLAG_RX_FILTER_NEEDED| + MLX4_EN_FLAG_ENABLE_HW_LOOPBACK); + + /* Drop the packet if SRIOV is not enabled + * and not performing the selftest or flb disabled + */ + if (mlx4_is_mfunc(priv->mdev->dev) && + !(features & NETIF_F_LOOPBACK) && !priv->validate_loopback) + priv->flags |= MLX4_EN_FLAG_RX_FILTER_NEEDED; + + /* Set dmac in Tx WQE if we are in SRIOV mode or if loopback selftest + * is requested + */ + if (mlx4_is_mfunc(priv->mdev->dev) || priv->validate_loopback) + priv->flags |= MLX4_EN_FLAG_ENABLE_HW_LOOPBACK; + + mutex_lock(&priv->mdev->state_lock); + if ((priv->mdev->dev->caps.flags2 & + MLX4_DEV_CAP_FLAG2_UPDATE_QP_SRC_CHECK_LB) && + priv->rss_map.indir_qp && priv->rss_map.indir_qp->qpn) { + int i; + int err = 0; + int loopback = !!(features & NETIF_F_LOOPBACK); + + for (i = 0; i < priv->rx_ring_num; i++) { + int ret; + + ret = mlx4_en_change_mcast_lb(priv, + &priv->rss_map.qps[i], + loopback); + if (!err) + err = ret; + } + if (err) + mlx4_warn(priv->mdev, "failed to change mcast loopback\n"); + } + mutex_unlock(&priv->mdev->state_lock); +} + +static void mlx4_en_get_profile(struct mlx4_en_dev *mdev) +{ + struct mlx4_en_profile *params = &mdev->profile; + int i; + + params->udp_rss = udp_rss; + params->max_num_tx_rings_p_up = mlx4_low_memory_profile() ? + MLX4_EN_MIN_TX_RING_P_UP : + min_t(int, num_online_cpus(), MLX4_EN_MAX_TX_RING_P_UP); + + if (params->udp_rss && !(mdev->dev->caps.flags + & MLX4_DEV_CAP_FLAG_UDP_RSS)) { + mlx4_warn(mdev, "UDP RSS is not supported on this device\n"); + params->udp_rss = 0; + } + for (i = 1; i <= MLX4_MAX_PORTS; i++) { + params->prof[i].rx_pause = !(pfcrx || pfctx); + params->prof[i].rx_ppp = pfcrx; + params->prof[i].tx_pause = !(pfcrx || pfctx); + params->prof[i].tx_ppp = pfctx; + if (mlx4_low_memory_profile()) { + params->prof[i].tx_ring_size = MLX4_EN_MIN_TX_SIZE; + params->prof[i].rx_ring_size = MLX4_EN_MIN_RX_SIZE; + } else { + params->prof[i].tx_ring_size = MLX4_EN_DEF_TX_RING_SIZE; + params->prof[i].rx_ring_size = MLX4_EN_DEF_RX_RING_SIZE; + } + params->prof[i].num_up = MLX4_EN_NUM_UP_LOW; + params->prof[i].num_tx_rings_p_up = params->max_num_tx_rings_p_up; + params->prof[i].tx_ring_num[TX] = params->max_num_tx_rings_p_up * + params->prof[i].num_up; + params->prof[i].rss_rings = 0; + params->prof[i].inline_thold = inline_thold; + } +} + +static void *mlx4_en_get_netdev(struct mlx4_dev *dev, void *ctx, u8 port) +{ + struct mlx4_en_dev *endev = ctx; + + return endev->pndev[port]; +} + +static void mlx4_en_event(struct mlx4_dev *dev, void *endev_ptr, + enum mlx4_dev_event event, unsigned long port) +{ + struct mlx4_en_dev *mdev = (struct mlx4_en_dev *) endev_ptr; + struct mlx4_en_priv *priv; + + switch (event) { + case MLX4_DEV_EVENT_PORT_UP: + case MLX4_DEV_EVENT_PORT_DOWN: + if (!mdev->pndev[port]) + return; + priv = netdev_priv(mdev->pndev[port]); + /* To prevent races, we poll the link state in a separate + task rather than changing it here */ + priv->link_state = event; + queue_work(mdev->workqueue, &priv->linkstate_task); + break; + + case MLX4_DEV_EVENT_CATASTROPHIC_ERROR: + mlx4_err(mdev, "Internal error detected, restarting device\n"); + break; + + case MLX4_DEV_EVENT_SLAVE_INIT: + case MLX4_DEV_EVENT_SLAVE_SHUTDOWN: + break; + default: + if (port < 1 || port > dev->caps.num_ports || + !mdev->pndev[port]) + return; + mlx4_warn(mdev, "Unhandled event %d for port %d\n", event, + (int) port); + } +} + +static void mlx4_en_remove(struct mlx4_dev *dev, void *endev_ptr) +{ + struct mlx4_en_dev *mdev = endev_ptr; + int i; + + mutex_lock(&mdev->state_lock); + mdev->device_up = false; + mutex_unlock(&mdev->state_lock); + + mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_ETH) + if (mdev->pndev[i]) + mlx4_en_destroy_netdev(mdev->pndev[i]); + + destroy_workqueue(mdev->workqueue); + (void) mlx4_mr_free(dev, &mdev->mr); + iounmap(mdev->uar_map); + mlx4_uar_free(dev, &mdev->priv_uar); + mlx4_pd_free(dev, mdev->priv_pdn); + if (mdev->nb.notifier_call) + unregister_netdevice_notifier(&mdev->nb); + kfree(mdev); +} + +static void mlx4_en_activate(struct mlx4_dev *dev, void *ctx) +{ + int i; + struct mlx4_en_dev *mdev = ctx; + + /* Create a netdev for each port */ + mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_ETH) { + mlx4_info(mdev, "Activating port:%d\n", i); + if (mlx4_en_init_netdev(mdev, i, &mdev->profile.prof[i])) + mdev->pndev[i] = NULL; + } + + /* register notifier */ + mdev->nb.notifier_call = mlx4_en_netdev_event; + if (register_netdevice_notifier(&mdev->nb)) { + mdev->nb.notifier_call = NULL; + mlx4_err(mdev, "Failed to create notifier\n"); + } +} + +static void *mlx4_en_add(struct mlx4_dev *dev) +{ + struct mlx4_en_dev *mdev; + int i; + + printk_once(KERN_INFO "%s", mlx4_en_version); + + mdev = kzalloc(sizeof(*mdev), GFP_KERNEL); + if (!mdev) + goto err_free_res; + + if (mlx4_pd_alloc(dev, &mdev->priv_pdn)) + goto err_free_dev; + + if (mlx4_uar_alloc(dev, &mdev->priv_uar)) + goto err_pd; + + mdev->uar_map = ioremap((phys_addr_t) mdev->priv_uar.pfn << PAGE_SHIFT, + PAGE_SIZE); + if (!mdev->uar_map) + goto err_uar; + spin_lock_init(&mdev->uar_lock); + + mdev->dev = dev; + mdev->dma_device = &dev->persist->pdev->dev; + mdev->pdev = dev->persist->pdev; + mdev->device_up = false; + + mdev->LSO_support = !!(dev->caps.flags & (1 << 15)); + if (!mdev->LSO_support) + mlx4_warn(mdev, "LSO not supported, please upgrade to later FW version to enable LSO\n"); + + if (mlx4_mr_alloc(mdev->dev, mdev->priv_pdn, 0, ~0ull, + MLX4_PERM_LOCAL_WRITE | MLX4_PERM_LOCAL_READ, + 0, 0, &mdev->mr)) { + mlx4_err(mdev, "Failed allocating memory region\n"); + goto err_map; + } + if (mlx4_mr_enable(mdev->dev, &mdev->mr)) { + mlx4_err(mdev, "Failed enabling memory region\n"); + goto err_mr; + } + + /* Build device profile according to supplied module parameters */ + mlx4_en_get_profile(mdev); + + /* Configure which ports to start according to module parameters */ + mdev->port_cnt = 0; + mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_ETH) + mdev->port_cnt++; + + /* Set default number of RX rings*/ + mlx4_en_set_num_rx_rings(mdev); + + /* Create our own workqueue for reset/multicast tasks + * Note: we cannot use the shared workqueue because of deadlocks caused + * by the rtnl lock */ + mdev->workqueue = create_singlethread_workqueue("mlx4_en"); + if (!mdev->workqueue) + goto err_mr; + + /* At this stage all non-port specific tasks are complete: + * mark the card state as up */ + mutex_init(&mdev->state_lock); + mdev->device_up = true; + + return mdev; + +err_mr: + (void) mlx4_mr_free(dev, &mdev->mr); +err_map: + if (mdev->uar_map) + iounmap(mdev->uar_map); +err_uar: + mlx4_uar_free(dev, &mdev->priv_uar); +err_pd: + mlx4_pd_free(dev, mdev->priv_pdn); +err_free_dev: + kfree(mdev); +err_free_res: + return NULL; +} + +static struct mlx4_interface mlx4_en_interface = { + .add = mlx4_en_add, + .remove = mlx4_en_remove, + .event = mlx4_en_event, + .get_dev = mlx4_en_get_netdev, + .protocol = MLX4_PROT_ETH, + .activate = mlx4_en_activate, +}; + +static void mlx4_en_verify_params(void) +{ + if (pfctx > MAX_PFC_TX) { + pr_warn("mlx4_en: WARNING: illegal module parameter pfctx 0x%x - should be in range 0-0x%x, will be changed to default (0)\n", + pfctx, MAX_PFC_TX); + pfctx = 0; + } + + if (pfcrx > MAX_PFC_RX) { + pr_warn("mlx4_en: WARNING: illegal module parameter pfcrx 0x%x - should be in range 0-0x%x, will be changed to default (0)\n", + pfcrx, MAX_PFC_RX); + pfcrx = 0; + } + + if (inline_thold < MIN_PKT_LEN || inline_thold > MAX_INLINE) { + pr_warn("mlx4_en: WARNING: illegal module parameter inline_thold %d - should be in range %d-%d, will be changed to default (%d)\n", + inline_thold, MIN_PKT_LEN, MAX_INLINE, MAX_INLINE); + inline_thold = MAX_INLINE; + } +} + +static int __init mlx4_en_init(void) +{ + mlx4_en_verify_params(); + mlx4_en_init_ptys2ethtool_map(); + + return mlx4_register_interface(&mlx4_en_interface); +} + +static void __exit mlx4_en_cleanup(void) +{ + mlx4_unregister_interface(&mlx4_en_interface); +} + +module_init(mlx4_en_init); +module_exit(mlx4_en_cleanup); + diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c new file mode 100644 index 000000000..ca4b93a01 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -0,0 +1,3584 @@ +/* + * Copyright (c) 2007 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#include <linux/bpf.h> +#include <linux/etherdevice.h> +#include <linux/filter.h> +#include <linux/tcp.h> +#include <linux/if_vlan.h> +#include <linux/delay.h> +#include <linux/slab.h> +#include <linux/hash.h> +#include <net/ip.h> +#include <net/vxlan.h> +#include <net/devlink.h> + +#include <linux/mlx4/driver.h> +#include <linux/mlx4/device.h> +#include <linux/mlx4/cmd.h> +#include <linux/mlx4/cq.h> + +#include "mlx4_en.h" +#include "en_port.h" + +#define MLX4_EN_MAX_XDP_MTU ((int)(PAGE_SIZE - ETH_HLEN - (2 * VLAN_HLEN) - \ + XDP_PACKET_HEADROOM - \ + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))) + +int mlx4_en_setup_tc(struct net_device *dev, u8 up) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + int i; + unsigned int offset = 0; + + if (up && up != MLX4_EN_NUM_UP_HIGH) + return -EINVAL; + + netdev_set_num_tc(dev, up); + netif_set_real_num_tx_queues(dev, priv->tx_ring_num[TX]); + /* Partition Tx queues evenly amongst UP's */ + for (i = 0; i < up; i++) { + netdev_set_tc_queue(dev, i, priv->num_tx_rings_p_up, offset); + offset += priv->num_tx_rings_p_up; + } + +#ifdef CONFIG_MLX4_EN_DCB + if (!mlx4_is_slave(priv->mdev->dev)) { + if (up) { + if (priv->dcbx_cap) + priv->flags |= MLX4_EN_FLAG_DCB_ENABLED; + } else { + priv->flags &= ~MLX4_EN_FLAG_DCB_ENABLED; + priv->cee_config.pfc_state = false; + } + } +#endif /* CONFIG_MLX4_EN_DCB */ + + return 0; +} + +int mlx4_en_alloc_tx_queue_per_tc(struct net_device *dev, u8 tc) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + struct mlx4_en_dev *mdev = priv->mdev; + struct mlx4_en_port_profile new_prof; + struct mlx4_en_priv *tmp; + int total_count; + int port_up = 0; + int err = 0; + + tmp = kzalloc(sizeof(*tmp), GFP_KERNEL); + if (!tmp) + return -ENOMEM; + + mutex_lock(&mdev->state_lock); + memcpy(&new_prof, priv->prof, sizeof(struct mlx4_en_port_profile)); + new_prof.num_up = (tc == 0) ? MLX4_EN_NUM_UP_LOW : + MLX4_EN_NUM_UP_HIGH; + new_prof.tx_ring_num[TX] = new_prof.num_tx_rings_p_up * + new_prof.num_up; + total_count = new_prof.tx_ring_num[TX] + new_prof.tx_ring_num[TX_XDP]; + if (total_count > MAX_TX_RINGS) { + err = -EINVAL; + en_err(priv, + "Total number of TX and XDP rings (%d) exceeds the maximum supported (%d)\n", + total_count, MAX_TX_RINGS); + goto out; + } + err = mlx4_en_try_alloc_resources(priv, tmp, &new_prof, true); + if (err) + goto out; + + if (priv->port_up) { + port_up = 1; + mlx4_en_stop_port(dev, 1); + } + + mlx4_en_safe_replace_resources(priv, tmp); + if (port_up) { + err = mlx4_en_start_port(dev); + if (err) { + en_err(priv, "Failed starting port for setup TC\n"); + goto out; + } + } + + err = mlx4_en_setup_tc(dev, tc); +out: + mutex_unlock(&mdev->state_lock); + kfree(tmp); + return err; +} + +static int __mlx4_en_setup_tc(struct net_device *dev, enum tc_setup_type type, + void *type_data) +{ + struct tc_mqprio_qopt *mqprio = type_data; + + if (type != TC_SETUP_QDISC_MQPRIO) + return -EOPNOTSUPP; + + if (mqprio->num_tc && mqprio->num_tc != MLX4_EN_NUM_UP_HIGH) + return -EINVAL; + + mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS; + + return mlx4_en_alloc_tx_queue_per_tc(dev, mqprio->num_tc); +} + +#ifdef CONFIG_RFS_ACCEL + +struct mlx4_en_filter { + struct list_head next; + struct work_struct work; + + u8 ip_proto; + __be32 src_ip; + __be32 dst_ip; + __be16 src_port; + __be16 dst_port; + + int rxq_index; + struct mlx4_en_priv *priv; + u32 flow_id; /* RFS infrastructure id */ + int id; /* mlx4_en driver id */ + u64 reg_id; /* Flow steering API id */ + u8 activated; /* Used to prevent expiry before filter + * is attached + */ + struct hlist_node filter_chain; +}; + +static void mlx4_en_filter_rfs_expire(struct mlx4_en_priv *priv); + +static enum mlx4_net_trans_rule_id mlx4_ip_proto_to_trans_rule_id(u8 ip_proto) +{ + switch (ip_proto) { + case IPPROTO_UDP: + return MLX4_NET_TRANS_RULE_ID_UDP; + case IPPROTO_TCP: + return MLX4_NET_TRANS_RULE_ID_TCP; + default: + return MLX4_NET_TRANS_RULE_NUM; + } +}; + +/* Must not acquire state_lock, as its corresponding work_sync + * is done under it. + */ +static void mlx4_en_filter_work(struct work_struct *work) +{ + struct mlx4_en_filter *filter = container_of(work, + struct mlx4_en_filter, + work); + struct mlx4_en_priv *priv = filter->priv; + struct mlx4_spec_list spec_tcp_udp = { + .id = mlx4_ip_proto_to_trans_rule_id(filter->ip_proto), + { + .tcp_udp = { + .dst_port = filter->dst_port, + .dst_port_msk = (__force __be16)-1, + .src_port = filter->src_port, + .src_port_msk = (__force __be16)-1, + }, + }, + }; + struct mlx4_spec_list spec_ip = { + .id = MLX4_NET_TRANS_RULE_ID_IPV4, + { + .ipv4 = { + .dst_ip = filter->dst_ip, + .dst_ip_msk = (__force __be32)-1, + .src_ip = filter->src_ip, + .src_ip_msk = (__force __be32)-1, + }, + }, + }; + struct mlx4_spec_list spec_eth = { + .id = MLX4_NET_TRANS_RULE_ID_ETH, + }; + struct mlx4_net_trans_rule rule = { + .list = LIST_HEAD_INIT(rule.list), + .queue_mode = MLX4_NET_TRANS_Q_LIFO, + .exclusive = 1, + .allow_loopback = 1, + .promisc_mode = MLX4_FS_REGULAR, + .port = priv->port, + .priority = MLX4_DOMAIN_RFS, + }; + int rc; + __be64 mac_mask = cpu_to_be64(MLX4_MAC_MASK << 16); + + if (spec_tcp_udp.id >= MLX4_NET_TRANS_RULE_NUM) { + en_warn(priv, "RFS: ignoring unsupported ip protocol (%d)\n", + filter->ip_proto); + goto ignore; + } + list_add_tail(&spec_eth.list, &rule.list); + list_add_tail(&spec_ip.list, &rule.list); + list_add_tail(&spec_tcp_udp.list, &rule.list); + + rule.qpn = priv->rss_map.qps[filter->rxq_index].qpn; + memcpy(spec_eth.eth.dst_mac, priv->dev->dev_addr, ETH_ALEN); + memcpy(spec_eth.eth.dst_mac_msk, &mac_mask, ETH_ALEN); + + filter->activated = 0; + + if (filter->reg_id) { + rc = mlx4_flow_detach(priv->mdev->dev, filter->reg_id); + if (rc && rc != -ENOENT) + en_err(priv, "Error detaching flow. rc = %d\n", rc); + } + + rc = mlx4_flow_attach(priv->mdev->dev, &rule, &filter->reg_id); + if (rc) + en_err(priv, "Error attaching flow. err = %d\n", rc); + +ignore: + mlx4_en_filter_rfs_expire(priv); + + filter->activated = 1; +} + +static inline struct hlist_head * +filter_hash_bucket(struct mlx4_en_priv *priv, __be32 src_ip, __be32 dst_ip, + __be16 src_port, __be16 dst_port) +{ + unsigned long l; + int bucket_idx; + + l = (__force unsigned long)src_port | + ((__force unsigned long)dst_port << 2); + l ^= (__force unsigned long)(src_ip ^ dst_ip); + + bucket_idx = hash_long(l, MLX4_EN_FILTER_HASH_SHIFT); + + return &priv->filter_hash[bucket_idx]; +} + +static struct mlx4_en_filter * +mlx4_en_filter_alloc(struct mlx4_en_priv *priv, int rxq_index, __be32 src_ip, + __be32 dst_ip, u8 ip_proto, __be16 src_port, + __be16 dst_port, u32 flow_id) +{ + struct mlx4_en_filter *filter = NULL; + + filter = kzalloc(sizeof(struct mlx4_en_filter), GFP_ATOMIC); + if (!filter) + return NULL; + + filter->priv = priv; + filter->rxq_index = rxq_index; + INIT_WORK(&filter->work, mlx4_en_filter_work); + + filter->src_ip = src_ip; + filter->dst_ip = dst_ip; + filter->ip_proto = ip_proto; + filter->src_port = src_port; + filter->dst_port = dst_port; + + filter->flow_id = flow_id; + + filter->id = priv->last_filter_id++ % RPS_NO_FILTER; + + list_add_tail(&filter->next, &priv->filters); + hlist_add_head(&filter->filter_chain, + filter_hash_bucket(priv, src_ip, dst_ip, src_port, + dst_port)); + + return filter; +} + +static void mlx4_en_filter_free(struct mlx4_en_filter *filter) +{ + struct mlx4_en_priv *priv = filter->priv; + int rc; + + list_del(&filter->next); + + rc = mlx4_flow_detach(priv->mdev->dev, filter->reg_id); + if (rc && rc != -ENOENT) + en_err(priv, "Error detaching flow. rc = %d\n", rc); + + kfree(filter); +} + +static inline struct mlx4_en_filter * +mlx4_en_filter_find(struct mlx4_en_priv *priv, __be32 src_ip, __be32 dst_ip, + u8 ip_proto, __be16 src_port, __be16 dst_port) +{ + struct mlx4_en_filter *filter; + struct mlx4_en_filter *ret = NULL; + + hlist_for_each_entry(filter, + filter_hash_bucket(priv, src_ip, dst_ip, + src_port, dst_port), + filter_chain) { + if (filter->src_ip == src_ip && + filter->dst_ip == dst_ip && + filter->ip_proto == ip_proto && + filter->src_port == src_port && + filter->dst_port == dst_port) { + ret = filter; + break; + } + } + + return ret; +} + +static int +mlx4_en_filter_rfs(struct net_device *net_dev, const struct sk_buff *skb, + u16 rxq_index, u32 flow_id) +{ + struct mlx4_en_priv *priv = netdev_priv(net_dev); + struct mlx4_en_filter *filter; + const struct iphdr *ip; + const __be16 *ports; + u8 ip_proto; + __be32 src_ip; + __be32 dst_ip; + __be16 src_port; + __be16 dst_port; + int nhoff = skb_network_offset(skb); + int ret = 0; + + if (skb->encapsulation) + return -EPROTONOSUPPORT; + + if (skb->protocol != htons(ETH_P_IP)) + return -EPROTONOSUPPORT; + + ip = (const struct iphdr *)(skb->data + nhoff); + if (ip_is_fragment(ip)) + return -EPROTONOSUPPORT; + + if ((ip->protocol != IPPROTO_TCP) && (ip->protocol != IPPROTO_UDP)) + return -EPROTONOSUPPORT; + ports = (const __be16 *)(skb->data + nhoff + 4 * ip->ihl); + + ip_proto = ip->protocol; + src_ip = ip->saddr; + dst_ip = ip->daddr; + src_port = ports[0]; + dst_port = ports[1]; + + spin_lock_bh(&priv->filters_lock); + filter = mlx4_en_filter_find(priv, src_ip, dst_ip, ip_proto, + src_port, dst_port); + if (filter) { + if (filter->rxq_index == rxq_index) + goto out; + + filter->rxq_index = rxq_index; + } else { + filter = mlx4_en_filter_alloc(priv, rxq_index, + src_ip, dst_ip, ip_proto, + src_port, dst_port, flow_id); + if (!filter) { + ret = -ENOMEM; + goto err; + } + } + + queue_work(priv->mdev->workqueue, &filter->work); + +out: + ret = filter->id; +err: + spin_unlock_bh(&priv->filters_lock); + + return ret; +} + +void mlx4_en_cleanup_filters(struct mlx4_en_priv *priv) +{ + struct mlx4_en_filter *filter, *tmp; + LIST_HEAD(del_list); + + spin_lock_bh(&priv->filters_lock); + list_for_each_entry_safe(filter, tmp, &priv->filters, next) { + list_move(&filter->next, &del_list); + hlist_del(&filter->filter_chain); + } + spin_unlock_bh(&priv->filters_lock); + + list_for_each_entry_safe(filter, tmp, &del_list, next) { + cancel_work_sync(&filter->work); + mlx4_en_filter_free(filter); + } +} + +static void mlx4_en_filter_rfs_expire(struct mlx4_en_priv *priv) +{ + struct mlx4_en_filter *filter = NULL, *tmp, *last_filter = NULL; + LIST_HEAD(del_list); + int i = 0; + + spin_lock_bh(&priv->filters_lock); + list_for_each_entry_safe(filter, tmp, &priv->filters, next) { + if (i > MLX4_EN_FILTER_EXPIRY_QUOTA) + break; + + if (filter->activated && + !work_pending(&filter->work) && + rps_may_expire_flow(priv->dev, + filter->rxq_index, filter->flow_id, + filter->id)) { + list_move(&filter->next, &del_list); + hlist_del(&filter->filter_chain); + } else + last_filter = filter; + + i++; + } + + if (last_filter && (&last_filter->next != priv->filters.next)) + list_move(&priv->filters, &last_filter->next); + + spin_unlock_bh(&priv->filters_lock); + + list_for_each_entry_safe(filter, tmp, &del_list, next) + mlx4_en_filter_free(filter); +} +#endif + +static int mlx4_en_vlan_rx_add_vid(struct net_device *dev, + __be16 proto, u16 vid) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + struct mlx4_en_dev *mdev = priv->mdev; + int err; + int idx; + + en_dbg(HW, priv, "adding VLAN:%d\n", vid); + + set_bit(vid, priv->active_vlans); + + /* Add VID to port VLAN filter */ + mutex_lock(&mdev->state_lock); + if (mdev->device_up && priv->port_up) { + err = mlx4_SET_VLAN_FLTR(mdev->dev, priv); + if (err) { + en_err(priv, "Failed configuring VLAN filter\n"); + goto out; + } + } + err = mlx4_register_vlan(mdev->dev, priv->port, vid, &idx); + if (err) + en_dbg(HW, priv, "Failed adding vlan %d\n", vid); + +out: + mutex_unlock(&mdev->state_lock); + return err; +} + +static int mlx4_en_vlan_rx_kill_vid(struct net_device *dev, + __be16 proto, u16 vid) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + struct mlx4_en_dev *mdev = priv->mdev; + int err = 0; + + en_dbg(HW, priv, "Killing VID:%d\n", vid); + + clear_bit(vid, priv->active_vlans); + + /* Remove VID from port VLAN filter */ + mutex_lock(&mdev->state_lock); + mlx4_unregister_vlan(mdev->dev, priv->port, vid); + + if (mdev->device_up && priv->port_up) { + err = mlx4_SET_VLAN_FLTR(mdev->dev, priv); + if (err) + en_err(priv, "Failed configuring VLAN filter\n"); + } + mutex_unlock(&mdev->state_lock); + + return err; +} + +static void mlx4_en_u64_to_mac(struct net_device *dev, u64 src_mac) +{ + u8 addr[ETH_ALEN]; + + u64_to_ether_addr(src_mac, addr); + eth_hw_addr_set(dev, addr); +} + + +static int mlx4_en_tunnel_steer_add(struct mlx4_en_priv *priv, + const unsigned char *addr, + int qpn, u64 *reg_id) +{ + int err; + + if (priv->mdev->dev->caps.tunnel_offload_mode != MLX4_TUNNEL_OFFLOAD_MODE_VXLAN || + priv->mdev->dev->caps.dmfs_high_steer_mode == MLX4_STEERING_DMFS_A0_STATIC) + return 0; /* do nothing */ + + err = mlx4_tunnel_steer_add(priv->mdev->dev, addr, priv->port, qpn, + MLX4_DOMAIN_NIC, reg_id); + if (err) { + en_err(priv, "failed to add vxlan steering rule, err %d\n", err); + return err; + } + en_dbg(DRV, priv, "added vxlan steering rule, mac %pM reg_id %llx\n", addr, *reg_id); + return 0; +} + + +static int mlx4_en_uc_steer_add(struct mlx4_en_priv *priv, + const unsigned char *mac, int *qpn, u64 *reg_id) +{ + struct mlx4_en_dev *mdev = priv->mdev; + struct mlx4_dev *dev = mdev->dev; + int err; + + switch (dev->caps.steering_mode) { + case MLX4_STEERING_MODE_B0: { + struct mlx4_qp qp; + u8 gid[16] = {0}; + + qp.qpn = *qpn; + memcpy(&gid[10], mac, ETH_ALEN); + gid[5] = priv->port; + + err = mlx4_unicast_attach(dev, &qp, gid, 0, MLX4_PROT_ETH); + break; + } + case MLX4_STEERING_MODE_DEVICE_MANAGED: { + struct mlx4_spec_list spec_eth = { {NULL} }; + __be64 mac_mask = cpu_to_be64(MLX4_MAC_MASK << 16); + + struct mlx4_net_trans_rule rule = { + .queue_mode = MLX4_NET_TRANS_Q_FIFO, + .exclusive = 0, + .allow_loopback = 1, + .promisc_mode = MLX4_FS_REGULAR, + .priority = MLX4_DOMAIN_NIC, + }; + + rule.port = priv->port; + rule.qpn = *qpn; + INIT_LIST_HEAD(&rule.list); + + spec_eth.id = MLX4_NET_TRANS_RULE_ID_ETH; + memcpy(spec_eth.eth.dst_mac, mac, ETH_ALEN); + memcpy(spec_eth.eth.dst_mac_msk, &mac_mask, ETH_ALEN); + list_add_tail(&spec_eth.list, &rule.list); + + err = mlx4_flow_attach(dev, &rule, reg_id); + break; + } + default: + return -EINVAL; + } + if (err) + en_warn(priv, "Failed Attaching Unicast\n"); + + return err; +} + +static void mlx4_en_uc_steer_release(struct mlx4_en_priv *priv, + const unsigned char *mac, + int qpn, u64 reg_id) +{ + struct mlx4_en_dev *mdev = priv->mdev; + struct mlx4_dev *dev = mdev->dev; + + switch (dev->caps.steering_mode) { + case MLX4_STEERING_MODE_B0: { + struct mlx4_qp qp; + u8 gid[16] = {0}; + + qp.qpn = qpn; + memcpy(&gid[10], mac, ETH_ALEN); + gid[5] = priv->port; + + mlx4_unicast_detach(dev, &qp, gid, MLX4_PROT_ETH); + break; + } + case MLX4_STEERING_MODE_DEVICE_MANAGED: { + mlx4_flow_detach(dev, reg_id); + break; + } + default: + en_err(priv, "Invalid steering mode.\n"); + } +} + +static int mlx4_en_get_qp(struct mlx4_en_priv *priv) +{ + struct mlx4_en_dev *mdev = priv->mdev; + struct mlx4_dev *dev = mdev->dev; + int index = 0; + int err = 0; + int *qpn = &priv->base_qpn; + u64 mac = ether_addr_to_u64(priv->dev->dev_addr); + + en_dbg(DRV, priv, "Registering MAC: %pM for adding\n", + priv->dev->dev_addr); + index = mlx4_register_mac(dev, priv->port, mac); + if (index < 0) { + err = index; + en_err(priv, "Failed adding MAC: %pM\n", + priv->dev->dev_addr); + return err; + } + + en_info(priv, "Steering Mode %d\n", dev->caps.steering_mode); + + if (dev->caps.steering_mode == MLX4_STEERING_MODE_A0) { + int base_qpn = mlx4_get_base_qpn(dev, priv->port); + *qpn = base_qpn + index; + return 0; + } + + err = mlx4_qp_reserve_range(dev, 1, 1, qpn, MLX4_RESERVE_A0_QP, + MLX4_RES_USAGE_DRIVER); + en_dbg(DRV, priv, "Reserved qp %d\n", *qpn); + if (err) { + en_err(priv, "Failed to reserve qp for mac registration\n"); + mlx4_unregister_mac(dev, priv->port, mac); + return err; + } + + return 0; +} + +static void mlx4_en_put_qp(struct mlx4_en_priv *priv) +{ + struct mlx4_en_dev *mdev = priv->mdev; + struct mlx4_dev *dev = mdev->dev; + int qpn = priv->base_qpn; + + if (dev->caps.steering_mode == MLX4_STEERING_MODE_A0) { + u64 mac = ether_addr_to_u64(priv->dev->dev_addr); + en_dbg(DRV, priv, "Registering MAC: %pM for deleting\n", + priv->dev->dev_addr); + mlx4_unregister_mac(dev, priv->port, mac); + } else { + en_dbg(DRV, priv, "Releasing qp: port %d, qpn %d\n", + priv->port, qpn); + mlx4_qp_release_range(dev, qpn, 1); + priv->flags &= ~MLX4_EN_FLAG_FORCE_PROMISC; + } +} + +static int mlx4_en_replace_mac(struct mlx4_en_priv *priv, int qpn, + unsigned char *new_mac, unsigned char *prev_mac) +{ + struct mlx4_en_dev *mdev = priv->mdev; + struct mlx4_dev *dev = mdev->dev; + int err = 0; + u64 new_mac_u64 = ether_addr_to_u64(new_mac); + + if (dev->caps.steering_mode != MLX4_STEERING_MODE_A0) { + struct hlist_head *bucket; + unsigned int mac_hash; + struct mlx4_mac_entry *entry; + struct hlist_node *tmp; + u64 prev_mac_u64 = ether_addr_to_u64(prev_mac); + + bucket = &priv->mac_hash[prev_mac[MLX4_EN_MAC_HASH_IDX]]; + hlist_for_each_entry_safe(entry, tmp, bucket, hlist) { + if (ether_addr_equal_64bits(entry->mac, prev_mac)) { + mlx4_en_uc_steer_release(priv, entry->mac, + qpn, entry->reg_id); + mlx4_unregister_mac(dev, priv->port, + prev_mac_u64); + hlist_del_rcu(&entry->hlist); + synchronize_rcu(); + memcpy(entry->mac, new_mac, ETH_ALEN); + entry->reg_id = 0; + mac_hash = new_mac[MLX4_EN_MAC_HASH_IDX]; + hlist_add_head_rcu(&entry->hlist, + &priv->mac_hash[mac_hash]); + mlx4_register_mac(dev, priv->port, new_mac_u64); + err = mlx4_en_uc_steer_add(priv, new_mac, + &qpn, + &entry->reg_id); + if (err) + return err; + if (priv->tunnel_reg_id) { + mlx4_flow_detach(priv->mdev->dev, priv->tunnel_reg_id); + priv->tunnel_reg_id = 0; + } + err = mlx4_en_tunnel_steer_add(priv, new_mac, qpn, + &priv->tunnel_reg_id); + return err; + } + } + return -EINVAL; + } + + return __mlx4_replace_mac(dev, priv->port, qpn, new_mac_u64); +} + +static void mlx4_en_update_user_mac(struct mlx4_en_priv *priv, + unsigned char new_mac[ETH_ALEN + 2]) +{ + struct mlx4_en_dev *mdev = priv->mdev; + int err; + + if (!(mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_USER_MAC_EN)) + return; + + err = mlx4_SET_PORT_user_mac(mdev->dev, priv->port, new_mac); + if (err) + en_err(priv, "Failed to pass user MAC(%pM) to Firmware for port %d, with error %d\n", + new_mac, priv->port, err); +} + +static int mlx4_en_do_set_mac(struct mlx4_en_priv *priv, + unsigned char new_mac[ETH_ALEN + 2]) +{ + int err = 0; + + if (priv->port_up) { + /* Remove old MAC and insert the new one */ + err = mlx4_en_replace_mac(priv, priv->base_qpn, + new_mac, priv->current_mac); + if (err) + en_err(priv, "Failed changing HW MAC address\n"); + } else + en_dbg(HW, priv, "Port is down while registering mac, exiting...\n"); + + if (!err) + memcpy(priv->current_mac, new_mac, sizeof(priv->current_mac)); + + return err; +} + +static int mlx4_en_set_mac(struct net_device *dev, void *addr) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + struct mlx4_en_dev *mdev = priv->mdev; + struct sockaddr *saddr = addr; + unsigned char new_mac[ETH_ALEN + 2]; + int err; + + if (!is_valid_ether_addr(saddr->sa_data)) + return -EADDRNOTAVAIL; + + mutex_lock(&mdev->state_lock); + memcpy(new_mac, saddr->sa_data, ETH_ALEN); + err = mlx4_en_do_set_mac(priv, new_mac); + if (err) + goto out; + + eth_hw_addr_set(dev, saddr->sa_data); + mlx4_en_update_user_mac(priv, new_mac); +out: + mutex_unlock(&mdev->state_lock); + + return err; +} + +static void mlx4_en_clear_list(struct net_device *dev) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + struct mlx4_en_mc_list *tmp, *mc_to_del; + + list_for_each_entry_safe(mc_to_del, tmp, &priv->mc_list, list) { + list_del(&mc_to_del->list); + kfree(mc_to_del); + } +} + +static void mlx4_en_cache_mclist(struct net_device *dev) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + struct netdev_hw_addr *ha; + struct mlx4_en_mc_list *tmp; + + mlx4_en_clear_list(dev); + netdev_for_each_mc_addr(ha, dev) { + tmp = kzalloc(sizeof(struct mlx4_en_mc_list), GFP_ATOMIC); + if (!tmp) { + mlx4_en_clear_list(dev); + return; + } + memcpy(tmp->addr, ha->addr, ETH_ALEN); + list_add_tail(&tmp->list, &priv->mc_list); + } +} + +static void update_mclist_flags(struct mlx4_en_priv *priv, + struct list_head *dst, + struct list_head *src) +{ + struct mlx4_en_mc_list *dst_tmp, *src_tmp, *new_mc; + bool found; + + /* Find all the entries that should be removed from dst, + * These are the entries that are not found in src + */ + list_for_each_entry(dst_tmp, dst, list) { + found = false; + list_for_each_entry(src_tmp, src, list) { + if (ether_addr_equal(dst_tmp->addr, src_tmp->addr)) { + found = true; + break; + } + } + if (!found) + dst_tmp->action = MCLIST_REM; + } + + /* Add entries that exist in src but not in dst + * mark them as need to add + */ + list_for_each_entry(src_tmp, src, list) { + found = false; + list_for_each_entry(dst_tmp, dst, list) { + if (ether_addr_equal(dst_tmp->addr, src_tmp->addr)) { + dst_tmp->action = MCLIST_NONE; + found = true; + break; + } + } + if (!found) { + new_mc = kmemdup(src_tmp, + sizeof(struct mlx4_en_mc_list), + GFP_KERNEL); + if (!new_mc) + return; + + new_mc->action = MCLIST_ADD; + list_add_tail(&new_mc->list, dst); + } + } +} + +static void mlx4_en_set_rx_mode(struct net_device *dev) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + + if (!priv->port_up) + return; + + queue_work(priv->mdev->workqueue, &priv->rx_mode_task); +} + +static void mlx4_en_set_promisc_mode(struct mlx4_en_priv *priv, + struct mlx4_en_dev *mdev) +{ + int err = 0; + + if (!(priv->flags & MLX4_EN_FLAG_PROMISC)) { + if (netif_msg_rx_status(priv)) + en_warn(priv, "Entering promiscuous mode\n"); + priv->flags |= MLX4_EN_FLAG_PROMISC; + + /* Enable promiscouos mode */ + switch (mdev->dev->caps.steering_mode) { + case MLX4_STEERING_MODE_DEVICE_MANAGED: + err = mlx4_flow_steer_promisc_add(mdev->dev, + priv->port, + priv->base_qpn, + MLX4_FS_ALL_DEFAULT); + if (err) + en_err(priv, "Failed enabling promiscuous mode\n"); + priv->flags |= MLX4_EN_FLAG_MC_PROMISC; + break; + + case MLX4_STEERING_MODE_B0: + err = mlx4_unicast_promisc_add(mdev->dev, + priv->base_qpn, + priv->port); + if (err) + en_err(priv, "Failed enabling unicast promiscuous mode\n"); + + /* Add the default qp number as multicast + * promisc + */ + if (!(priv->flags & MLX4_EN_FLAG_MC_PROMISC)) { + err = mlx4_multicast_promisc_add(mdev->dev, + priv->base_qpn, + priv->port); + if (err) + en_err(priv, "Failed enabling multicast promiscuous mode\n"); + priv->flags |= MLX4_EN_FLAG_MC_PROMISC; + } + break; + + case MLX4_STEERING_MODE_A0: + err = mlx4_SET_PORT_qpn_calc(mdev->dev, + priv->port, + priv->base_qpn, + 1); + if (err) + en_err(priv, "Failed enabling promiscuous mode\n"); + break; + } + + /* Disable port multicast filter (unconditionally) */ + err = mlx4_SET_MCAST_FLTR(mdev->dev, priv->port, 0, + 0, MLX4_MCAST_DISABLE); + if (err) + en_err(priv, "Failed disabling multicast filter\n"); + } +} + +static void mlx4_en_clear_promisc_mode(struct mlx4_en_priv *priv, + struct mlx4_en_dev *mdev) +{ + int err = 0; + + if (netif_msg_rx_status(priv)) + en_warn(priv, "Leaving promiscuous mode\n"); + priv->flags &= ~MLX4_EN_FLAG_PROMISC; + + /* Disable promiscouos mode */ + switch (mdev->dev->caps.steering_mode) { + case MLX4_STEERING_MODE_DEVICE_MANAGED: + err = mlx4_flow_steer_promisc_remove(mdev->dev, + priv->port, + MLX4_FS_ALL_DEFAULT); + if (err) + en_err(priv, "Failed disabling promiscuous mode\n"); + priv->flags &= ~MLX4_EN_FLAG_MC_PROMISC; + break; + + case MLX4_STEERING_MODE_B0: + err = mlx4_unicast_promisc_remove(mdev->dev, + priv->base_qpn, + priv->port); + if (err) + en_err(priv, "Failed disabling unicast promiscuous mode\n"); + /* Disable Multicast promisc */ + if (priv->flags & MLX4_EN_FLAG_MC_PROMISC) { + err = mlx4_multicast_promisc_remove(mdev->dev, + priv->base_qpn, + priv->port); + if (err) + en_err(priv, "Failed disabling multicast promiscuous mode\n"); + priv->flags &= ~MLX4_EN_FLAG_MC_PROMISC; + } + break; + + case MLX4_STEERING_MODE_A0: + err = mlx4_SET_PORT_qpn_calc(mdev->dev, + priv->port, + priv->base_qpn, 0); + if (err) + en_err(priv, "Failed disabling promiscuous mode\n"); + break; + } +} + +static void mlx4_en_do_multicast(struct mlx4_en_priv *priv, + struct net_device *dev, + struct mlx4_en_dev *mdev) +{ + struct mlx4_en_mc_list *mclist, *tmp; + u64 mcast_addr = 0; + u8 mc_list[16] = {0}; + int err = 0; + + /* Enable/disable the multicast filter according to IFF_ALLMULTI */ + if (dev->flags & IFF_ALLMULTI) { + err = mlx4_SET_MCAST_FLTR(mdev->dev, priv->port, 0, + 0, MLX4_MCAST_DISABLE); + if (err) + en_err(priv, "Failed disabling multicast filter\n"); + + /* Add the default qp number as multicast promisc */ + if (!(priv->flags & MLX4_EN_FLAG_MC_PROMISC)) { + switch (mdev->dev->caps.steering_mode) { + case MLX4_STEERING_MODE_DEVICE_MANAGED: + err = mlx4_flow_steer_promisc_add(mdev->dev, + priv->port, + priv->base_qpn, + MLX4_FS_MC_DEFAULT); + break; + + case MLX4_STEERING_MODE_B0: + err = mlx4_multicast_promisc_add(mdev->dev, + priv->base_qpn, + priv->port); + break; + + case MLX4_STEERING_MODE_A0: + break; + } + if (err) + en_err(priv, "Failed entering multicast promisc mode\n"); + priv->flags |= MLX4_EN_FLAG_MC_PROMISC; + } + } else { + /* Disable Multicast promisc */ + if (priv->flags & MLX4_EN_FLAG_MC_PROMISC) { + switch (mdev->dev->caps.steering_mode) { + case MLX4_STEERING_MODE_DEVICE_MANAGED: + err = mlx4_flow_steer_promisc_remove(mdev->dev, + priv->port, + MLX4_FS_MC_DEFAULT); + break; + + case MLX4_STEERING_MODE_B0: + err = mlx4_multicast_promisc_remove(mdev->dev, + priv->base_qpn, + priv->port); + break; + + case MLX4_STEERING_MODE_A0: + break; + } + if (err) + en_err(priv, "Failed disabling multicast promiscuous mode\n"); + priv->flags &= ~MLX4_EN_FLAG_MC_PROMISC; + } + + err = mlx4_SET_MCAST_FLTR(mdev->dev, priv->port, 0, + 0, MLX4_MCAST_DISABLE); + if (err) + en_err(priv, "Failed disabling multicast filter\n"); + + /* Flush mcast filter and init it with broadcast address */ + mlx4_SET_MCAST_FLTR(mdev->dev, priv->port, ETH_BCAST, + 1, MLX4_MCAST_CONFIG); + + /* Update multicast list - we cache all addresses so they won't + * change while HW is updated holding the command semaphor */ + netif_addr_lock_bh(dev); + mlx4_en_cache_mclist(dev); + netif_addr_unlock_bh(dev); + list_for_each_entry(mclist, &priv->mc_list, list) { + mcast_addr = ether_addr_to_u64(mclist->addr); + mlx4_SET_MCAST_FLTR(mdev->dev, priv->port, + mcast_addr, 0, MLX4_MCAST_CONFIG); + } + err = mlx4_SET_MCAST_FLTR(mdev->dev, priv->port, 0, + 0, MLX4_MCAST_ENABLE); + if (err) + en_err(priv, "Failed enabling multicast filter\n"); + + update_mclist_flags(priv, &priv->curr_list, &priv->mc_list); + list_for_each_entry_safe(mclist, tmp, &priv->curr_list, list) { + if (mclist->action == MCLIST_REM) { + /* detach this address and delete from list */ + memcpy(&mc_list[10], mclist->addr, ETH_ALEN); + mc_list[5] = priv->port; + err = mlx4_multicast_detach(mdev->dev, + priv->rss_map.indir_qp, + mc_list, + MLX4_PROT_ETH, + mclist->reg_id); + if (err) + en_err(priv, "Fail to detach multicast address\n"); + + if (mclist->tunnel_reg_id) { + err = mlx4_flow_detach(priv->mdev->dev, mclist->tunnel_reg_id); + if (err) + en_err(priv, "Failed to detach multicast address\n"); + } + + /* remove from list */ + list_del(&mclist->list); + kfree(mclist); + } else if (mclist->action == MCLIST_ADD) { + /* attach the address */ + memcpy(&mc_list[10], mclist->addr, ETH_ALEN); + /* needed for B0 steering support */ + mc_list[5] = priv->port; + err = mlx4_multicast_attach(mdev->dev, + priv->rss_map.indir_qp, + mc_list, + priv->port, 0, + MLX4_PROT_ETH, + &mclist->reg_id); + if (err) + en_err(priv, "Fail to attach multicast address\n"); + + err = mlx4_en_tunnel_steer_add(priv, &mc_list[10], priv->base_qpn, + &mclist->tunnel_reg_id); + if (err) + en_err(priv, "Failed to attach multicast address\n"); + } + } + } +} + +static void mlx4_en_do_uc_filter(struct mlx4_en_priv *priv, + struct net_device *dev, + struct mlx4_en_dev *mdev) +{ + struct netdev_hw_addr *ha; + struct mlx4_mac_entry *entry; + struct hlist_node *tmp; + bool found; + u64 mac; + int err = 0; + struct hlist_head *bucket; + unsigned int i; + int removed = 0; + u32 prev_flags; + + /* Note that we do not need to protect our mac_hash traversal with rcu, + * since all modification code is protected by mdev->state_lock + */ + + /* find what to remove */ + for (i = 0; i < MLX4_EN_MAC_HASH_SIZE; ++i) { + bucket = &priv->mac_hash[i]; + hlist_for_each_entry_safe(entry, tmp, bucket, hlist) { + found = false; + netdev_for_each_uc_addr(ha, dev) { + if (ether_addr_equal_64bits(entry->mac, + ha->addr)) { + found = true; + break; + } + } + + /* MAC address of the port is not in uc list */ + if (ether_addr_equal_64bits(entry->mac, + priv->current_mac)) + found = true; + + if (!found) { + mac = ether_addr_to_u64(entry->mac); + mlx4_en_uc_steer_release(priv, entry->mac, + priv->base_qpn, + entry->reg_id); + mlx4_unregister_mac(mdev->dev, priv->port, mac); + + hlist_del_rcu(&entry->hlist); + kfree_rcu(entry, rcu); + en_dbg(DRV, priv, "Removed MAC %pM on port:%d\n", + entry->mac, priv->port); + ++removed; + } + } + } + + /* if we didn't remove anything, there is no use in trying to add + * again once we are in a forced promisc mode state + */ + if ((priv->flags & MLX4_EN_FLAG_FORCE_PROMISC) && 0 == removed) + return; + + prev_flags = priv->flags; + priv->flags &= ~MLX4_EN_FLAG_FORCE_PROMISC; + + /* find what to add */ + netdev_for_each_uc_addr(ha, dev) { + found = false; + bucket = &priv->mac_hash[ha->addr[MLX4_EN_MAC_HASH_IDX]]; + hlist_for_each_entry(entry, bucket, hlist) { + if (ether_addr_equal_64bits(entry->mac, ha->addr)) { + found = true; + break; + } + } + + if (!found) { + entry = kmalloc(sizeof(*entry), GFP_KERNEL); + if (!entry) { + en_err(priv, "Failed adding MAC %pM on port:%d (out of memory)\n", + ha->addr, priv->port); + priv->flags |= MLX4_EN_FLAG_FORCE_PROMISC; + break; + } + mac = ether_addr_to_u64(ha->addr); + memcpy(entry->mac, ha->addr, ETH_ALEN); + err = mlx4_register_mac(mdev->dev, priv->port, mac); + if (err < 0) { + en_err(priv, "Failed registering MAC %pM on port %d: %d\n", + ha->addr, priv->port, err); + kfree(entry); + priv->flags |= MLX4_EN_FLAG_FORCE_PROMISC; + break; + } + err = mlx4_en_uc_steer_add(priv, ha->addr, + &priv->base_qpn, + &entry->reg_id); + if (err) { + en_err(priv, "Failed adding MAC %pM on port %d: %d\n", + ha->addr, priv->port, err); + mlx4_unregister_mac(mdev->dev, priv->port, mac); + kfree(entry); + priv->flags |= MLX4_EN_FLAG_FORCE_PROMISC; + break; + } else { + unsigned int mac_hash; + en_dbg(DRV, priv, "Added MAC %pM on port:%d\n", + ha->addr, priv->port); + mac_hash = ha->addr[MLX4_EN_MAC_HASH_IDX]; + bucket = &priv->mac_hash[mac_hash]; + hlist_add_head_rcu(&entry->hlist, bucket); + } + } + } + + if (priv->flags & MLX4_EN_FLAG_FORCE_PROMISC) { + en_warn(priv, "Forcing promiscuous mode on port:%d\n", + priv->port); + } else if (prev_flags & MLX4_EN_FLAG_FORCE_PROMISC) { + en_warn(priv, "Stop forcing promiscuous mode on port:%d\n", + priv->port); + } +} + +static void mlx4_en_do_set_rx_mode(struct work_struct *work) +{ + struct mlx4_en_priv *priv = container_of(work, struct mlx4_en_priv, + rx_mode_task); + struct mlx4_en_dev *mdev = priv->mdev; + struct net_device *dev = priv->dev; + + mutex_lock(&mdev->state_lock); + if (!mdev->device_up) { + en_dbg(HW, priv, "Card is not up, ignoring rx mode change.\n"); + goto out; + } + if (!priv->port_up) { + en_dbg(HW, priv, "Port is down, ignoring rx mode change.\n"); + goto out; + } + + if (!netif_carrier_ok(dev)) { + if (!mlx4_en_QUERY_PORT(mdev, priv->port)) { + if (priv->port_state.link_state) { + netif_carrier_on(dev); + en_dbg(LINK, priv, "Link Up\n"); + } + } + } + + if (dev->priv_flags & IFF_UNICAST_FLT) + mlx4_en_do_uc_filter(priv, dev, mdev); + + /* Promsicuous mode: disable all filters */ + if ((dev->flags & IFF_PROMISC) || + (priv->flags & MLX4_EN_FLAG_FORCE_PROMISC)) { + mlx4_en_set_promisc_mode(priv, mdev); + goto out; + } + + /* Not in promiscuous mode */ + if (priv->flags & MLX4_EN_FLAG_PROMISC) + mlx4_en_clear_promisc_mode(priv, mdev); + + mlx4_en_do_multicast(priv, dev, mdev); +out: + mutex_unlock(&mdev->state_lock); +} + +static int mlx4_en_set_rss_steer_rules(struct mlx4_en_priv *priv) +{ + u64 reg_id; + int err = 0; + int *qpn = &priv->base_qpn; + struct mlx4_mac_entry *entry; + + err = mlx4_en_uc_steer_add(priv, priv->dev->dev_addr, qpn, ®_id); + if (err) + return err; + + err = mlx4_en_tunnel_steer_add(priv, priv->dev->dev_addr, *qpn, + &priv->tunnel_reg_id); + if (err) + goto tunnel_err; + + entry = kmalloc(sizeof(*entry), GFP_KERNEL); + if (!entry) { + err = -ENOMEM; + goto alloc_err; + } + + memcpy(entry->mac, priv->dev->dev_addr, sizeof(entry->mac)); + memcpy(priv->current_mac, entry->mac, sizeof(priv->current_mac)); + entry->reg_id = reg_id; + hlist_add_head_rcu(&entry->hlist, + &priv->mac_hash[entry->mac[MLX4_EN_MAC_HASH_IDX]]); + + return 0; + +alloc_err: + if (priv->tunnel_reg_id) + mlx4_flow_detach(priv->mdev->dev, priv->tunnel_reg_id); + +tunnel_err: + mlx4_en_uc_steer_release(priv, priv->dev->dev_addr, *qpn, reg_id); + return err; +} + +static void mlx4_en_delete_rss_steer_rules(struct mlx4_en_priv *priv) +{ + u64 mac; + unsigned int i; + int qpn = priv->base_qpn; + struct hlist_head *bucket; + struct hlist_node *tmp; + struct mlx4_mac_entry *entry; + + for (i = 0; i < MLX4_EN_MAC_HASH_SIZE; ++i) { + bucket = &priv->mac_hash[i]; + hlist_for_each_entry_safe(entry, tmp, bucket, hlist) { + mac = ether_addr_to_u64(entry->mac); + en_dbg(DRV, priv, "Registering MAC:%pM for deleting\n", + entry->mac); + mlx4_en_uc_steer_release(priv, entry->mac, + qpn, entry->reg_id); + + mlx4_unregister_mac(priv->mdev->dev, priv->port, mac); + hlist_del_rcu(&entry->hlist); + kfree_rcu(entry, rcu); + } + } + + if (priv->tunnel_reg_id) { + mlx4_flow_detach(priv->mdev->dev, priv->tunnel_reg_id); + priv->tunnel_reg_id = 0; + } +} + +static void mlx4_en_tx_timeout(struct net_device *dev, unsigned int txqueue) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + struct mlx4_en_dev *mdev = priv->mdev; + struct mlx4_en_tx_ring *tx_ring = priv->tx_ring[TX][txqueue]; + + if (netif_msg_timer(priv)) + en_warn(priv, "Tx timeout called on port:%d\n", priv->port); + + en_warn(priv, "TX timeout on queue: %d, QP: 0x%x, CQ: 0x%x, Cons: 0x%x, Prod: 0x%x\n", + txqueue, tx_ring->qpn, tx_ring->sp_cqn, + tx_ring->cons, tx_ring->prod); + + priv->port_stats.tx_timeout++; + if (!test_and_set_bit(MLX4_EN_STATE_FLAG_RESTARTING, &priv->state)) { + en_dbg(DRV, priv, "Scheduling port restart\n"); + queue_work(mdev->workqueue, &priv->restart_task); + } +} + + +static void +mlx4_en_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + + spin_lock_bh(&priv->stats_lock); + mlx4_en_fold_software_stats(dev); + netdev_stats_to_stats64(stats, &dev->stats); + spin_unlock_bh(&priv->stats_lock); +} + +static void mlx4_en_set_default_moderation(struct mlx4_en_priv *priv) +{ + struct mlx4_en_cq *cq; + int i, t; + + /* If we haven't received a specific coalescing setting + * (module param), we set the moderation parameters as follows: + * - moder_cnt is set to the number of mtu sized packets to + * satisfy our coalescing target. + * - moder_time is set to a fixed value. + */ + priv->rx_frames = MLX4_EN_RX_COAL_TARGET; + priv->rx_usecs = MLX4_EN_RX_COAL_TIME; + priv->tx_frames = MLX4_EN_TX_COAL_PKTS; + priv->tx_usecs = MLX4_EN_TX_COAL_TIME; + en_dbg(INTR, priv, "Default coalescing params for mtu:%d - rx_frames:%d rx_usecs:%d\n", + priv->dev->mtu, priv->rx_frames, priv->rx_usecs); + + /* Setup cq moderation params */ + for (i = 0; i < priv->rx_ring_num; i++) { + cq = priv->rx_cq[i]; + cq->moder_cnt = priv->rx_frames; + cq->moder_time = priv->rx_usecs; + priv->last_moder_time[i] = MLX4_EN_AUTO_CONF; + priv->last_moder_packets[i] = 0; + priv->last_moder_bytes[i] = 0; + } + + for (t = 0 ; t < MLX4_EN_NUM_TX_TYPES; t++) { + for (i = 0; i < priv->tx_ring_num[t]; i++) { + cq = priv->tx_cq[t][i]; + cq->moder_cnt = priv->tx_frames; + cq->moder_time = priv->tx_usecs; + } + } + + /* Reset auto-moderation params */ + priv->pkt_rate_low = MLX4_EN_RX_RATE_LOW; + priv->rx_usecs_low = MLX4_EN_RX_COAL_TIME_LOW; + priv->pkt_rate_high = MLX4_EN_RX_RATE_HIGH; + priv->rx_usecs_high = MLX4_EN_RX_COAL_TIME_HIGH; + priv->sample_interval = MLX4_EN_SAMPLE_INTERVAL; + priv->adaptive_rx_coal = 1; + priv->last_moder_jiffies = 0; + priv->last_moder_tx_packets = 0; +} + +static void mlx4_en_auto_moderation(struct mlx4_en_priv *priv) +{ + unsigned long period = (unsigned long) (jiffies - priv->last_moder_jiffies); + u32 pkt_rate_high, pkt_rate_low; + struct mlx4_en_cq *cq; + unsigned long packets; + unsigned long rate; + unsigned long avg_pkt_size; + unsigned long rx_packets; + unsigned long rx_bytes; + unsigned long rx_pkt_diff; + int moder_time; + int ring, err; + + if (!priv->adaptive_rx_coal || period < priv->sample_interval * HZ) + return; + + pkt_rate_low = READ_ONCE(priv->pkt_rate_low); + pkt_rate_high = READ_ONCE(priv->pkt_rate_high); + + for (ring = 0; ring < priv->rx_ring_num; ring++) { + rx_packets = READ_ONCE(priv->rx_ring[ring]->packets); + rx_bytes = READ_ONCE(priv->rx_ring[ring]->bytes); + + rx_pkt_diff = rx_packets - priv->last_moder_packets[ring]; + packets = rx_pkt_diff; + rate = packets * HZ / period; + avg_pkt_size = packets ? (rx_bytes - + priv->last_moder_bytes[ring]) / packets : 0; + + /* Apply auto-moderation only when packet rate + * exceeds a rate that it matters */ + if (rate > (MLX4_EN_RX_RATE_THRESH / priv->rx_ring_num) && + avg_pkt_size > MLX4_EN_AVG_PKT_SMALL) { + if (rate <= pkt_rate_low) + moder_time = priv->rx_usecs_low; + else if (rate >= pkt_rate_high) + moder_time = priv->rx_usecs_high; + else + moder_time = (rate - pkt_rate_low) * + (priv->rx_usecs_high - priv->rx_usecs_low) / + (pkt_rate_high - pkt_rate_low) + + priv->rx_usecs_low; + } else { + moder_time = priv->rx_usecs_low; + } + + cq = priv->rx_cq[ring]; + if (moder_time != priv->last_moder_time[ring] || + cq->moder_cnt != priv->rx_frames) { + priv->last_moder_time[ring] = moder_time; + cq->moder_time = moder_time; + cq->moder_cnt = priv->rx_frames; + err = mlx4_en_set_cq_moder(priv, cq); + if (err) + en_err(priv, "Failed modifying moderation for cq:%d\n", + ring); + } + priv->last_moder_packets[ring] = rx_packets; + priv->last_moder_bytes[ring] = rx_bytes; + } + + priv->last_moder_jiffies = jiffies; +} + +static void mlx4_en_do_get_stats(struct work_struct *work) +{ + struct delayed_work *delay = to_delayed_work(work); + struct mlx4_en_priv *priv = container_of(delay, struct mlx4_en_priv, + stats_task); + struct mlx4_en_dev *mdev = priv->mdev; + int err; + + mutex_lock(&mdev->state_lock); + if (mdev->device_up) { + if (priv->port_up) { + err = mlx4_en_DUMP_ETH_STATS(mdev, priv->port, 0); + if (err) + en_dbg(HW, priv, "Could not update stats\n"); + + mlx4_en_auto_moderation(priv); + } + + queue_delayed_work(mdev->workqueue, &priv->stats_task, STATS_DELAY); + } + if (mdev->mac_removed[MLX4_MAX_PORTS + 1 - priv->port]) { + mlx4_en_do_set_mac(priv, priv->current_mac); + mdev->mac_removed[MLX4_MAX_PORTS + 1 - priv->port] = 0; + } + mutex_unlock(&mdev->state_lock); +} + +/* mlx4_en_service_task - Run service task for tasks that needed to be done + * periodically + */ +static void mlx4_en_service_task(struct work_struct *work) +{ + struct delayed_work *delay = to_delayed_work(work); + struct mlx4_en_priv *priv = container_of(delay, struct mlx4_en_priv, + service_task); + struct mlx4_en_dev *mdev = priv->mdev; + + mutex_lock(&mdev->state_lock); + if (mdev->device_up) { + if (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_TS) + mlx4_en_ptp_overflow_check(mdev); + + mlx4_en_recover_from_oom(priv); + queue_delayed_work(mdev->workqueue, &priv->service_task, + SERVICE_TASK_DELAY); + } + mutex_unlock(&mdev->state_lock); +} + +static void mlx4_en_linkstate(struct mlx4_en_priv *priv) +{ + struct mlx4_en_port_state *port_state = &priv->port_state; + struct mlx4_en_dev *mdev = priv->mdev; + struct net_device *dev = priv->dev; + bool up; + + if (mlx4_en_QUERY_PORT(mdev, priv->port)) + port_state->link_state = MLX4_PORT_STATE_DEV_EVENT_PORT_DOWN; + + up = port_state->link_state == MLX4_PORT_STATE_DEV_EVENT_PORT_UP; + if (up == netif_carrier_ok(dev)) + netif_carrier_event(dev); + if (!up) { + en_info(priv, "Link Down\n"); + netif_carrier_off(dev); + } else { + en_info(priv, "Link Up\n"); + netif_carrier_on(dev); + } +} + +static void mlx4_en_linkstate_work(struct work_struct *work) +{ + struct mlx4_en_priv *priv = container_of(work, struct mlx4_en_priv, + linkstate_task); + struct mlx4_en_dev *mdev = priv->mdev; + + mutex_lock(&mdev->state_lock); + mlx4_en_linkstate(priv); + mutex_unlock(&mdev->state_lock); +} + +static int mlx4_en_init_affinity_hint(struct mlx4_en_priv *priv, int ring_idx) +{ + struct mlx4_en_rx_ring *ring = priv->rx_ring[ring_idx]; + int numa_node = priv->mdev->dev->numa_node; + + if (!zalloc_cpumask_var(&ring->affinity_mask, GFP_KERNEL)) + return -ENOMEM; + + cpumask_set_cpu(cpumask_local_spread(ring_idx, numa_node), + ring->affinity_mask); + return 0; +} + +static void mlx4_en_free_affinity_hint(struct mlx4_en_priv *priv, int ring_idx) +{ + free_cpumask_var(priv->rx_ring[ring_idx]->affinity_mask); +} + +static void mlx4_en_init_recycle_ring(struct mlx4_en_priv *priv, + int tx_ring_idx) +{ + struct mlx4_en_tx_ring *tx_ring = priv->tx_ring[TX_XDP][tx_ring_idx]; + int rr_index = tx_ring_idx; + + tx_ring->free_tx_desc = mlx4_en_recycle_tx_desc; + tx_ring->recycle_ring = priv->rx_ring[rr_index]; + en_dbg(DRV, priv, "Set tx_ring[%d][%d]->recycle_ring = rx_ring[%d]\n", + TX_XDP, tx_ring_idx, rr_index); +} + +int mlx4_en_start_port(struct net_device *dev) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + struct mlx4_en_dev *mdev = priv->mdev; + struct mlx4_en_cq *cq; + struct mlx4_en_tx_ring *tx_ring; + int rx_index = 0; + int err = 0; + int i, t; + int j; + u8 mc_list[16] = {0}; + + if (priv->port_up) { + en_dbg(DRV, priv, "start port called while port already up\n"); + return 0; + } + + INIT_LIST_HEAD(&priv->mc_list); + INIT_LIST_HEAD(&priv->curr_list); + INIT_LIST_HEAD(&priv->ethtool_list); + memset(&priv->ethtool_rules[0], 0, + sizeof(struct ethtool_flow_id) * MAX_NUM_OF_FS_RULES); + + /* Calculate Rx buf size */ + dev->mtu = min(dev->mtu, priv->max_mtu); + mlx4_en_calc_rx_buf(dev); + en_dbg(DRV, priv, "Rx buf size:%d\n", priv->rx_skb_size); + + /* Configure rx cq's and rings */ + err = mlx4_en_activate_rx_rings(priv); + if (err) { + en_err(priv, "Failed to activate RX rings\n"); + return err; + } + for (i = 0; i < priv->rx_ring_num; i++) { + cq = priv->rx_cq[i]; + + err = mlx4_en_init_affinity_hint(priv, i); + if (err) { + en_err(priv, "Failed preparing IRQ affinity hint\n"); + goto cq_err; + } + + err = mlx4_en_activate_cq(priv, cq, i); + if (err) { + en_err(priv, "Failed activating Rx CQ\n"); + mlx4_en_free_affinity_hint(priv, i); + goto cq_err; + } + + for (j = 0; j < cq->size; j++) { + struct mlx4_cqe *cqe = NULL; + + cqe = mlx4_en_get_cqe(cq->buf, j, priv->cqe_size) + + priv->cqe_factor; + cqe->owner_sr_opcode = MLX4_CQE_OWNER_MASK; + } + + err = mlx4_en_set_cq_moder(priv, cq); + if (err) { + en_err(priv, "Failed setting cq moderation parameters\n"); + mlx4_en_deactivate_cq(priv, cq); + mlx4_en_free_affinity_hint(priv, i); + goto cq_err; + } + mlx4_en_arm_cq(priv, cq); + priv->rx_ring[i]->cqn = cq->mcq.cqn; + ++rx_index; + } + + /* Set qp number */ + en_dbg(DRV, priv, "Getting qp number for port %d\n", priv->port); + err = mlx4_en_get_qp(priv); + if (err) { + en_err(priv, "Failed getting eth qp\n"); + goto cq_err; + } + mdev->mac_removed[priv->port] = 0; + + priv->counter_index = + mlx4_get_default_counter_index(mdev->dev, priv->port); + + err = mlx4_en_config_rss_steer(priv); + if (err) { + en_err(priv, "Failed configuring rss steering\n"); + goto mac_err; + } + + err = mlx4_en_create_drop_qp(priv); + if (err) + goto rss_err; + + /* Configure tx cq's and rings */ + for (t = 0 ; t < MLX4_EN_NUM_TX_TYPES; t++) { + u8 num_tx_rings_p_up = t == TX ? + priv->num_tx_rings_p_up : priv->tx_ring_num[t]; + + for (i = 0; i < priv->tx_ring_num[t]; i++) { + /* Configure cq */ + cq = priv->tx_cq[t][i]; + err = mlx4_en_activate_cq(priv, cq, i); + if (err) { + en_err(priv, "Failed allocating Tx CQ\n"); + goto tx_err; + } + err = mlx4_en_set_cq_moder(priv, cq); + if (err) { + en_err(priv, "Failed setting cq moderation parameters\n"); + mlx4_en_deactivate_cq(priv, cq); + goto tx_err; + } + en_dbg(DRV, priv, + "Resetting index of collapsed CQ:%d to -1\n", i); + cq->buf->wqe_index = cpu_to_be16(0xffff); + + /* Configure ring */ + tx_ring = priv->tx_ring[t][i]; + err = mlx4_en_activate_tx_ring(priv, tx_ring, + cq->mcq.cqn, + i / num_tx_rings_p_up); + if (err) { + en_err(priv, "Failed allocating Tx ring\n"); + mlx4_en_deactivate_cq(priv, cq); + goto tx_err; + } + clear_bit(MLX4_EN_TX_RING_STATE_RECOVERING, &tx_ring->state); + if (t != TX_XDP) { + tx_ring->tx_queue = netdev_get_tx_queue(dev, i); + tx_ring->recycle_ring = NULL; + + /* Arm CQ for TX completions */ + mlx4_en_arm_cq(priv, cq); + + } else { + mlx4_en_init_tx_xdp_ring_descs(priv, tx_ring); + mlx4_en_init_recycle_ring(priv, i); + /* XDP TX CQ should never be armed */ + } + + /* Set initial ownership of all Tx TXBBs to SW (1) */ + for (j = 0; j < tx_ring->buf_size; j += STAMP_STRIDE) + *((u32 *)(tx_ring->buf + j)) = 0xffffffff; + } + } + + /* Configure port */ + err = mlx4_SET_PORT_general(mdev->dev, priv->port, + priv->rx_skb_size + ETH_FCS_LEN, + priv->prof->tx_pause, + priv->prof->tx_ppp, + priv->prof->rx_pause, + priv->prof->rx_ppp); + if (err) { + en_err(priv, "Failed setting port general configurations for port %d, with error %d\n", + priv->port, err); + goto tx_err; + } + + err = mlx4_SET_PORT_user_mtu(mdev->dev, priv->port, dev->mtu); + if (err) { + en_err(priv, "Failed to pass user MTU(%d) to Firmware for port %d, with error %d\n", + dev->mtu, priv->port, err); + goto tx_err; + } + + /* Set default qp number */ + err = mlx4_SET_PORT_qpn_calc(mdev->dev, priv->port, priv->base_qpn, 0); + if (err) { + en_err(priv, "Failed setting default qp numbers\n"); + goto tx_err; + } + + if (mdev->dev->caps.tunnel_offload_mode == MLX4_TUNNEL_OFFLOAD_MODE_VXLAN) { + err = mlx4_SET_PORT_VXLAN(mdev->dev, priv->port, VXLAN_STEER_BY_OUTER_MAC, 1); + if (err) { + en_err(priv, "Failed setting port L2 tunnel configuration, err %d\n", + err); + goto tx_err; + } + } + + /* Init port */ + en_dbg(HW, priv, "Initializing port\n"); + err = mlx4_INIT_PORT(mdev->dev, priv->port); + if (err) { + en_err(priv, "Failed Initializing port\n"); + goto tx_err; + } + + /* Set Unicast and VXLAN steering rules */ + if (mdev->dev->caps.steering_mode != MLX4_STEERING_MODE_A0 && + mlx4_en_set_rss_steer_rules(priv)) + mlx4_warn(mdev, "Failed setting steering rules\n"); + + /* Attach rx QP to bradcast address */ + eth_broadcast_addr(&mc_list[10]); + mc_list[5] = priv->port; /* needed for B0 steering support */ + if (mlx4_multicast_attach(mdev->dev, priv->rss_map.indir_qp, mc_list, + priv->port, 0, MLX4_PROT_ETH, + &priv->broadcast_id)) + mlx4_warn(mdev, "Failed Attaching Broadcast\n"); + + /* Must redo promiscuous mode setup. */ + priv->flags &= ~(MLX4_EN_FLAG_PROMISC | MLX4_EN_FLAG_MC_PROMISC); + + /* Schedule multicast task to populate multicast list */ + queue_work(mdev->workqueue, &priv->rx_mode_task); + + if (priv->mdev->dev->caps.tunnel_offload_mode == MLX4_TUNNEL_OFFLOAD_MODE_VXLAN) + udp_tunnel_nic_reset_ntf(dev); + + priv->port_up = true; + + /* Process all completions if exist to prevent + * the queues freezing if they are full + */ + for (i = 0; i < priv->rx_ring_num; i++) { + local_bh_disable(); + napi_schedule(&priv->rx_cq[i]->napi); + local_bh_enable(); + } + + clear_bit(MLX4_EN_STATE_FLAG_RESTARTING, &priv->state); + netif_tx_start_all_queues(dev); + netif_device_attach(dev); + + return 0; + +tx_err: + if (t == MLX4_EN_NUM_TX_TYPES) { + t--; + i = priv->tx_ring_num[t]; + } + while (t >= 0) { + while (i--) { + mlx4_en_deactivate_tx_ring(priv, priv->tx_ring[t][i]); + mlx4_en_deactivate_cq(priv, priv->tx_cq[t][i]); + } + if (!t--) + break; + i = priv->tx_ring_num[t]; + } + mlx4_en_destroy_drop_qp(priv); +rss_err: + mlx4_en_release_rss_steer(priv); +mac_err: + mlx4_en_put_qp(priv); +cq_err: + while (rx_index--) { + mlx4_en_deactivate_cq(priv, priv->rx_cq[rx_index]); + mlx4_en_free_affinity_hint(priv, rx_index); + } + for (i = 0; i < priv->rx_ring_num; i++) + mlx4_en_deactivate_rx_ring(priv, priv->rx_ring[i]); + + return err; /* need to close devices */ +} + + +void mlx4_en_stop_port(struct net_device *dev, int detach) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + struct mlx4_en_dev *mdev = priv->mdev; + struct mlx4_en_mc_list *mclist, *tmp; + struct ethtool_flow_id *flow, *tmp_flow; + int i, t; + u8 mc_list[16] = {0}; + + if (!priv->port_up) { + en_dbg(DRV, priv, "stop port called while port already down\n"); + return; + } + + /* close port*/ + mlx4_CLOSE_PORT(mdev->dev, priv->port); + + /* Synchronize with tx routine */ + netif_tx_lock_bh(dev); + if (detach) + netif_device_detach(dev); + netif_tx_stop_all_queues(dev); + netif_tx_unlock_bh(dev); + + netif_tx_disable(dev); + + spin_lock_bh(&priv->stats_lock); + mlx4_en_fold_software_stats(dev); + /* Set port as not active */ + priv->port_up = false; + spin_unlock_bh(&priv->stats_lock); + + priv->counter_index = MLX4_SINK_COUNTER_INDEX(mdev->dev); + + /* Promsicuous mode */ + if (mdev->dev->caps.steering_mode == + MLX4_STEERING_MODE_DEVICE_MANAGED) { + priv->flags &= ~(MLX4_EN_FLAG_PROMISC | + MLX4_EN_FLAG_MC_PROMISC); + mlx4_flow_steer_promisc_remove(mdev->dev, + priv->port, + MLX4_FS_ALL_DEFAULT); + mlx4_flow_steer_promisc_remove(mdev->dev, + priv->port, + MLX4_FS_MC_DEFAULT); + } else if (priv->flags & MLX4_EN_FLAG_PROMISC) { + priv->flags &= ~MLX4_EN_FLAG_PROMISC; + + /* Disable promiscouos mode */ + mlx4_unicast_promisc_remove(mdev->dev, priv->base_qpn, + priv->port); + + /* Disable Multicast promisc */ + if (priv->flags & MLX4_EN_FLAG_MC_PROMISC) { + mlx4_multicast_promisc_remove(mdev->dev, priv->base_qpn, + priv->port); + priv->flags &= ~MLX4_EN_FLAG_MC_PROMISC; + } + } + + /* Detach All multicasts */ + eth_broadcast_addr(&mc_list[10]); + mc_list[5] = priv->port; /* needed for B0 steering support */ + mlx4_multicast_detach(mdev->dev, priv->rss_map.indir_qp, mc_list, + MLX4_PROT_ETH, priv->broadcast_id); + list_for_each_entry(mclist, &priv->curr_list, list) { + memcpy(&mc_list[10], mclist->addr, ETH_ALEN); + mc_list[5] = priv->port; + mlx4_multicast_detach(mdev->dev, priv->rss_map.indir_qp, + mc_list, MLX4_PROT_ETH, mclist->reg_id); + if (mclist->tunnel_reg_id) + mlx4_flow_detach(mdev->dev, mclist->tunnel_reg_id); + } + mlx4_en_clear_list(dev); + list_for_each_entry_safe(mclist, tmp, &priv->curr_list, list) { + list_del(&mclist->list); + kfree(mclist); + } + + /* Flush multicast filter */ + mlx4_SET_MCAST_FLTR(mdev->dev, priv->port, 0, 1, MLX4_MCAST_CONFIG); + + /* Remove flow steering rules for the port*/ + if (mdev->dev->caps.steering_mode == + MLX4_STEERING_MODE_DEVICE_MANAGED) { + ASSERT_RTNL(); + list_for_each_entry_safe(flow, tmp_flow, + &priv->ethtool_list, list) { + mlx4_flow_detach(mdev->dev, flow->id); + list_del(&flow->list); + } + } + + mlx4_en_destroy_drop_qp(priv); + + /* Free TX Rings */ + for (t = 0; t < MLX4_EN_NUM_TX_TYPES; t++) { + for (i = 0; i < priv->tx_ring_num[t]; i++) { + mlx4_en_deactivate_tx_ring(priv, priv->tx_ring[t][i]); + mlx4_en_deactivate_cq(priv, priv->tx_cq[t][i]); + } + } + msleep(10); + + for (t = 0; t < MLX4_EN_NUM_TX_TYPES; t++) + for (i = 0; i < priv->tx_ring_num[t]; i++) + mlx4_en_free_tx_buf(dev, priv->tx_ring[t][i]); + + if (mdev->dev->caps.steering_mode != MLX4_STEERING_MODE_A0) + mlx4_en_delete_rss_steer_rules(priv); + + /* Free RSS qps */ + mlx4_en_release_rss_steer(priv); + + /* Unregister Mac address for the port */ + mlx4_en_put_qp(priv); + if (!(mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_REASSIGN_MAC_EN)) + mdev->mac_removed[priv->port] = 1; + + /* Free RX Rings */ + for (i = 0; i < priv->rx_ring_num; i++) { + struct mlx4_en_cq *cq = priv->rx_cq[i]; + + napi_synchronize(&cq->napi); + mlx4_en_deactivate_rx_ring(priv, priv->rx_ring[i]); + mlx4_en_deactivate_cq(priv, cq); + + mlx4_en_free_affinity_hint(priv, i); + } +} + +static void mlx4_en_restart(struct work_struct *work) +{ + struct mlx4_en_priv *priv = container_of(work, struct mlx4_en_priv, + restart_task); + struct mlx4_en_dev *mdev = priv->mdev; + struct net_device *dev = priv->dev; + + en_dbg(DRV, priv, "Watchdog task called for port %d\n", priv->port); + + rtnl_lock(); + mutex_lock(&mdev->state_lock); + if (priv->port_up) { + mlx4_en_stop_port(dev, 1); + if (mlx4_en_start_port(dev)) + en_err(priv, "Failed restarting port %d\n", priv->port); + } + mutex_unlock(&mdev->state_lock); + rtnl_unlock(); +} + +static void mlx4_en_clear_stats(struct net_device *dev) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + struct mlx4_en_dev *mdev = priv->mdev; + struct mlx4_en_tx_ring **tx_ring; + int i; + + if (!mlx4_is_slave(mdev->dev)) + if (mlx4_en_DUMP_ETH_STATS(mdev, priv->port, 1)) + en_dbg(HW, priv, "Failed dumping statistics\n"); + + memset(&priv->pkstats, 0, sizeof(priv->pkstats)); + memset(&priv->port_stats, 0, sizeof(priv->port_stats)); + memset(&priv->rx_flowstats, 0, sizeof(priv->rx_flowstats)); + memset(&priv->tx_flowstats, 0, sizeof(priv->tx_flowstats)); + memset(&priv->rx_priority_flowstats, 0, + sizeof(priv->rx_priority_flowstats)); + memset(&priv->tx_priority_flowstats, 0, + sizeof(priv->tx_priority_flowstats)); + memset(&priv->pf_stats, 0, sizeof(priv->pf_stats)); + + tx_ring = priv->tx_ring[TX]; + for (i = 0; i < priv->tx_ring_num[TX]; i++) { + tx_ring[i]->bytes = 0; + tx_ring[i]->packets = 0; + tx_ring[i]->tx_csum = 0; + tx_ring[i]->tx_dropped = 0; + tx_ring[i]->queue_stopped = 0; + tx_ring[i]->wake_queue = 0; + tx_ring[i]->tso_packets = 0; + tx_ring[i]->xmit_more = 0; + } + for (i = 0; i < priv->rx_ring_num; i++) { + priv->rx_ring[i]->bytes = 0; + priv->rx_ring[i]->packets = 0; + priv->rx_ring[i]->csum_ok = 0; + priv->rx_ring[i]->csum_none = 0; + priv->rx_ring[i]->csum_complete = 0; + } +} + +static int mlx4_en_open(struct net_device *dev) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + struct mlx4_en_dev *mdev = priv->mdev; + int err = 0; + + mutex_lock(&mdev->state_lock); + + if (!mdev->device_up) { + en_err(priv, "Cannot open - device down/disabled\n"); + err = -EBUSY; + goto out; + } + + /* Reset HW statistics and SW counters */ + mlx4_en_clear_stats(dev); + + err = mlx4_en_start_port(dev); + if (err) { + en_err(priv, "Failed starting port:%d\n", priv->port); + goto out; + } + mlx4_en_linkstate(priv); +out: + mutex_unlock(&mdev->state_lock); + return err; +} + + +static int mlx4_en_close(struct net_device *dev) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + struct mlx4_en_dev *mdev = priv->mdev; + + en_dbg(IFDOWN, priv, "Close port called\n"); + + mutex_lock(&mdev->state_lock); + + mlx4_en_stop_port(dev, 0); + netif_carrier_off(dev); + + mutex_unlock(&mdev->state_lock); + return 0; +} + +static void mlx4_en_free_resources(struct mlx4_en_priv *priv) +{ + int i, t; + +#ifdef CONFIG_RFS_ACCEL + priv->dev->rx_cpu_rmap = NULL; +#endif + + for (t = 0; t < MLX4_EN_NUM_TX_TYPES; t++) { + for (i = 0; i < priv->tx_ring_num[t]; i++) { + if (priv->tx_ring[t] && priv->tx_ring[t][i]) + mlx4_en_destroy_tx_ring(priv, + &priv->tx_ring[t][i]); + if (priv->tx_cq[t] && priv->tx_cq[t][i]) + mlx4_en_destroy_cq(priv, &priv->tx_cq[t][i]); + } + kfree(priv->tx_ring[t]); + kfree(priv->tx_cq[t]); + } + + for (i = 0; i < priv->rx_ring_num; i++) { + if (priv->rx_ring[i]) + mlx4_en_destroy_rx_ring(priv, &priv->rx_ring[i], + priv->prof->rx_ring_size, priv->stride); + if (priv->rx_cq[i]) + mlx4_en_destroy_cq(priv, &priv->rx_cq[i]); + } + +} + +static int mlx4_en_alloc_resources(struct mlx4_en_priv *priv) +{ + struct mlx4_en_port_profile *prof = priv->prof; + int i, t; + int node; + + /* Create tx Rings */ + for (t = 0; t < MLX4_EN_NUM_TX_TYPES; t++) { + for (i = 0; i < priv->tx_ring_num[t]; i++) { + node = cpu_to_node(i % num_online_cpus()); + if (mlx4_en_create_cq(priv, &priv->tx_cq[t][i], + prof->tx_ring_size, i, t, node)) + goto err; + + if (mlx4_en_create_tx_ring(priv, &priv->tx_ring[t][i], + prof->tx_ring_size, + TXBB_SIZE, node, i)) + goto err; + } + } + + /* Create rx Rings */ + for (i = 0; i < priv->rx_ring_num; i++) { + node = cpu_to_node(i % num_online_cpus()); + if (mlx4_en_create_cq(priv, &priv->rx_cq[i], + prof->rx_ring_size, i, RX, node)) + goto err; + + if (mlx4_en_create_rx_ring(priv, &priv->rx_ring[i], + prof->rx_ring_size, priv->stride, + node, i)) + goto err; + + } + +#ifdef CONFIG_RFS_ACCEL + priv->dev->rx_cpu_rmap = mlx4_get_cpu_rmap(priv->mdev->dev, priv->port); +#endif + + return 0; + +err: + en_err(priv, "Failed to allocate NIC resources\n"); + for (i = 0; i < priv->rx_ring_num; i++) { + if (priv->rx_ring[i]) + mlx4_en_destroy_rx_ring(priv, &priv->rx_ring[i], + prof->rx_ring_size, + priv->stride); + if (priv->rx_cq[i]) + mlx4_en_destroy_cq(priv, &priv->rx_cq[i]); + } + for (t = 0; t < MLX4_EN_NUM_TX_TYPES; t++) { + for (i = 0; i < priv->tx_ring_num[t]; i++) { + if (priv->tx_ring[t][i]) + mlx4_en_destroy_tx_ring(priv, + &priv->tx_ring[t][i]); + if (priv->tx_cq[t][i]) + mlx4_en_destroy_cq(priv, &priv->tx_cq[t][i]); + } + } + return -ENOMEM; +} + + +static int mlx4_en_copy_priv(struct mlx4_en_priv *dst, + struct mlx4_en_priv *src, + struct mlx4_en_port_profile *prof) +{ + int t; + + memcpy(&dst->hwtstamp_config, &prof->hwtstamp_config, + sizeof(dst->hwtstamp_config)); + dst->num_tx_rings_p_up = prof->num_tx_rings_p_up; + dst->rx_ring_num = prof->rx_ring_num; + dst->flags = prof->flags; + dst->mdev = src->mdev; + dst->port = src->port; + dst->dev = src->dev; + dst->prof = prof; + dst->stride = roundup_pow_of_two(sizeof(struct mlx4_en_rx_desc) + + DS_SIZE * MLX4_EN_MAX_RX_FRAGS); + + for (t = 0; t < MLX4_EN_NUM_TX_TYPES; t++) { + dst->tx_ring_num[t] = prof->tx_ring_num[t]; + if (!dst->tx_ring_num[t]) + continue; + + dst->tx_ring[t] = kcalloc(MAX_TX_RINGS, + sizeof(struct mlx4_en_tx_ring *), + GFP_KERNEL); + if (!dst->tx_ring[t]) + goto err_free_tx; + + dst->tx_cq[t] = kcalloc(MAX_TX_RINGS, + sizeof(struct mlx4_en_cq *), + GFP_KERNEL); + if (!dst->tx_cq[t]) { + kfree(dst->tx_ring[t]); + goto err_free_tx; + } + } + + return 0; + +err_free_tx: + while (t--) { + kfree(dst->tx_ring[t]); + kfree(dst->tx_cq[t]); + } + return -ENOMEM; +} + +static void mlx4_en_update_priv(struct mlx4_en_priv *dst, + struct mlx4_en_priv *src) +{ + int t; + memcpy(dst->rx_ring, src->rx_ring, + sizeof(struct mlx4_en_rx_ring *) * src->rx_ring_num); + memcpy(dst->rx_cq, src->rx_cq, + sizeof(struct mlx4_en_cq *) * src->rx_ring_num); + memcpy(&dst->hwtstamp_config, &src->hwtstamp_config, + sizeof(dst->hwtstamp_config)); + for (t = 0; t < MLX4_EN_NUM_TX_TYPES; t++) { + dst->tx_ring_num[t] = src->tx_ring_num[t]; + dst->tx_ring[t] = src->tx_ring[t]; + dst->tx_cq[t] = src->tx_cq[t]; + } + dst->num_tx_rings_p_up = src->num_tx_rings_p_up; + dst->rx_ring_num = src->rx_ring_num; + memcpy(dst->prof, src->prof, sizeof(struct mlx4_en_port_profile)); +} + +int mlx4_en_try_alloc_resources(struct mlx4_en_priv *priv, + struct mlx4_en_priv *tmp, + struct mlx4_en_port_profile *prof, + bool carry_xdp_prog) +{ + struct bpf_prog *xdp_prog; + int i, t, ret; + + ret = mlx4_en_copy_priv(tmp, priv, prof); + if (ret) { + en_warn(priv, "%s: mlx4_en_copy_priv() failed, return\n", + __func__); + return ret; + } + + if (mlx4_en_alloc_resources(tmp)) { + en_warn(priv, + "%s: Resource allocation failed, using previous configuration\n", + __func__); + for (t = 0; t < MLX4_EN_NUM_TX_TYPES; t++) { + kfree(tmp->tx_ring[t]); + kfree(tmp->tx_cq[t]); + } + return -ENOMEM; + } + + /* All rx_rings has the same xdp_prog. Pick the first one. */ + xdp_prog = rcu_dereference_protected( + priv->rx_ring[0]->xdp_prog, + lockdep_is_held(&priv->mdev->state_lock)); + + if (xdp_prog && carry_xdp_prog) { + bpf_prog_add(xdp_prog, tmp->rx_ring_num); + for (i = 0; i < tmp->rx_ring_num; i++) + rcu_assign_pointer(tmp->rx_ring[i]->xdp_prog, + xdp_prog); + } + + return 0; +} + +void mlx4_en_safe_replace_resources(struct mlx4_en_priv *priv, + struct mlx4_en_priv *tmp) +{ + mlx4_en_free_resources(priv); + mlx4_en_update_priv(priv, tmp); +} + +void mlx4_en_destroy_netdev(struct net_device *dev) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + struct mlx4_en_dev *mdev = priv->mdev; + + en_dbg(DRV, priv, "Destroying netdev on port:%d\n", priv->port); + + /* Unregister device - this will close the port if it was up */ + if (priv->registered) { + devlink_port_type_clear(mlx4_get_devlink_port(mdev->dev, + priv->port)); + unregister_netdev(dev); + } + + if (priv->allocated) + mlx4_free_hwq_res(mdev->dev, &priv->res, MLX4_EN_PAGE_SIZE); + + cancel_delayed_work(&priv->stats_task); + cancel_delayed_work(&priv->service_task); + /* flush any pending task for this netdev */ + flush_workqueue(mdev->workqueue); + + if (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_TS) + mlx4_en_remove_timestamp(mdev); + + /* Detach the netdev so tasks would not attempt to access it */ + mutex_lock(&mdev->state_lock); + mdev->pndev[priv->port] = NULL; + mdev->upper[priv->port] = NULL; + +#ifdef CONFIG_RFS_ACCEL + mlx4_en_cleanup_filters(priv); +#endif + + mlx4_en_free_resources(priv); + mutex_unlock(&mdev->state_lock); + + free_netdev(dev); +} + +static bool mlx4_en_check_xdp_mtu(struct net_device *dev, int mtu) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + + if (mtu > MLX4_EN_MAX_XDP_MTU) { + en_err(priv, "mtu:%d > max:%d when XDP prog is attached\n", + mtu, MLX4_EN_MAX_XDP_MTU); + return false; + } + + return true; +} + +static int mlx4_en_change_mtu(struct net_device *dev, int new_mtu) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + struct mlx4_en_dev *mdev = priv->mdev; + int err = 0; + + en_dbg(DRV, priv, "Change MTU called - current:%d new:%d\n", + dev->mtu, new_mtu); + + if (priv->tx_ring_num[TX_XDP] && + !mlx4_en_check_xdp_mtu(dev, new_mtu)) + return -EOPNOTSUPP; + + dev->mtu = new_mtu; + + if (netif_running(dev)) { + mutex_lock(&mdev->state_lock); + if (!mdev->device_up) { + /* NIC is probably restarting - let restart task reset + * the port */ + en_dbg(DRV, priv, "Change MTU called with card down!?\n"); + } else { + mlx4_en_stop_port(dev, 1); + err = mlx4_en_start_port(dev); + if (err) { + en_err(priv, "Failed restarting port:%d\n", + priv->port); + if (!test_and_set_bit(MLX4_EN_STATE_FLAG_RESTARTING, + &priv->state)) + queue_work(mdev->workqueue, &priv->restart_task); + } + } + mutex_unlock(&mdev->state_lock); + } + return 0; +} + +static int mlx4_en_hwtstamp_set(struct net_device *dev, struct ifreq *ifr) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + struct mlx4_en_dev *mdev = priv->mdev; + struct hwtstamp_config config; + + if (copy_from_user(&config, ifr->ifr_data, sizeof(config))) + return -EFAULT; + + /* device doesn't support time stamping */ + if (!(mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_TS)) + return -EINVAL; + + /* TX HW timestamp */ + switch (config.tx_type) { + case HWTSTAMP_TX_OFF: + case HWTSTAMP_TX_ON: + break; + default: + return -ERANGE; + } + + /* RX HW timestamp */ + switch (config.rx_filter) { + case HWTSTAMP_FILTER_NONE: + break; + case HWTSTAMP_FILTER_ALL: + case HWTSTAMP_FILTER_SOME: + case HWTSTAMP_FILTER_PTP_V1_L4_EVENT: + case HWTSTAMP_FILTER_PTP_V1_L4_SYNC: + case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ: + case HWTSTAMP_FILTER_PTP_V2_L4_EVENT: + case HWTSTAMP_FILTER_PTP_V2_L4_SYNC: + case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ: + case HWTSTAMP_FILTER_PTP_V2_L2_EVENT: + case HWTSTAMP_FILTER_PTP_V2_L2_SYNC: + case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ: + case HWTSTAMP_FILTER_PTP_V2_EVENT: + case HWTSTAMP_FILTER_PTP_V2_SYNC: + case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ: + case HWTSTAMP_FILTER_NTP_ALL: + config.rx_filter = HWTSTAMP_FILTER_ALL; + break; + default: + return -ERANGE; + } + + if (mlx4_en_reset_config(dev, config, dev->features)) { + config.tx_type = HWTSTAMP_TX_OFF; + config.rx_filter = HWTSTAMP_FILTER_NONE; + } + + return copy_to_user(ifr->ifr_data, &config, + sizeof(config)) ? -EFAULT : 0; +} + +static int mlx4_en_hwtstamp_get(struct net_device *dev, struct ifreq *ifr) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + + return copy_to_user(ifr->ifr_data, &priv->hwtstamp_config, + sizeof(priv->hwtstamp_config)) ? -EFAULT : 0; +} + +static int mlx4_en_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) +{ + switch (cmd) { + case SIOCSHWTSTAMP: + return mlx4_en_hwtstamp_set(dev, ifr); + case SIOCGHWTSTAMP: + return mlx4_en_hwtstamp_get(dev, ifr); + default: + return -EOPNOTSUPP; + } +} + +static netdev_features_t mlx4_en_fix_features(struct net_device *netdev, + netdev_features_t features) +{ + struct mlx4_en_priv *en_priv = netdev_priv(netdev); + struct mlx4_en_dev *mdev = en_priv->mdev; + + /* Since there is no support for separate RX C-TAG/S-TAG vlan accel + * enable/disable make sure S-TAG flag is always in same state as + * C-TAG. + */ + if (features & NETIF_F_HW_VLAN_CTAG_RX && + !(mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_SKIP_OUTER_VLAN)) + features |= NETIF_F_HW_VLAN_STAG_RX; + else + features &= ~NETIF_F_HW_VLAN_STAG_RX; + + return features; +} + +static int mlx4_en_set_features(struct net_device *netdev, + netdev_features_t features) +{ + struct mlx4_en_priv *priv = netdev_priv(netdev); + bool reset = false; + int ret = 0; + + if (DEV_FEATURE_CHANGED(netdev, features, NETIF_F_RXFCS)) { + en_info(priv, "Turn %s RX-FCS\n", + (features & NETIF_F_RXFCS) ? "ON" : "OFF"); + reset = true; + } + + if (DEV_FEATURE_CHANGED(netdev, features, NETIF_F_RXALL)) { + u8 ignore_fcs_value = (features & NETIF_F_RXALL) ? 1 : 0; + + en_info(priv, "Turn %s RX-ALL\n", + ignore_fcs_value ? "ON" : "OFF"); + ret = mlx4_SET_PORT_fcs_check(priv->mdev->dev, + priv->port, ignore_fcs_value); + if (ret) + return ret; + } + + if (DEV_FEATURE_CHANGED(netdev, features, NETIF_F_HW_VLAN_CTAG_RX)) { + en_info(priv, "Turn %s RX vlan strip offload\n", + (features & NETIF_F_HW_VLAN_CTAG_RX) ? "ON" : "OFF"); + reset = true; + } + + if (DEV_FEATURE_CHANGED(netdev, features, NETIF_F_HW_VLAN_CTAG_TX)) + en_info(priv, "Turn %s TX vlan strip offload\n", + (features & NETIF_F_HW_VLAN_CTAG_TX) ? "ON" : "OFF"); + + if (DEV_FEATURE_CHANGED(netdev, features, NETIF_F_HW_VLAN_STAG_TX)) + en_info(priv, "Turn %s TX S-VLAN strip offload\n", + (features & NETIF_F_HW_VLAN_STAG_TX) ? "ON" : "OFF"); + + if (DEV_FEATURE_CHANGED(netdev, features, NETIF_F_LOOPBACK)) { + en_info(priv, "Turn %s loopback\n", + (features & NETIF_F_LOOPBACK) ? "ON" : "OFF"); + mlx4_en_update_loopback_state(netdev, features); + } + + if (reset) { + ret = mlx4_en_reset_config(netdev, priv->hwtstamp_config, + features); + if (ret) + return ret; + } + + return 0; +} + +static int mlx4_en_set_vf_mac(struct net_device *dev, int queue, u8 *mac) +{ + struct mlx4_en_priv *en_priv = netdev_priv(dev); + struct mlx4_en_dev *mdev = en_priv->mdev; + + return mlx4_set_vf_mac(mdev->dev, en_priv->port, queue, mac); +} + +static int mlx4_en_set_vf_vlan(struct net_device *dev, int vf, u16 vlan, u8 qos, + __be16 vlan_proto) +{ + struct mlx4_en_priv *en_priv = netdev_priv(dev); + struct mlx4_en_dev *mdev = en_priv->mdev; + + return mlx4_set_vf_vlan(mdev->dev, en_priv->port, vf, vlan, qos, + vlan_proto); +} + +static int mlx4_en_set_vf_rate(struct net_device *dev, int vf, int min_tx_rate, + int max_tx_rate) +{ + struct mlx4_en_priv *en_priv = netdev_priv(dev); + struct mlx4_en_dev *mdev = en_priv->mdev; + + return mlx4_set_vf_rate(mdev->dev, en_priv->port, vf, min_tx_rate, + max_tx_rate); +} + +static int mlx4_en_set_vf_spoofchk(struct net_device *dev, int vf, bool setting) +{ + struct mlx4_en_priv *en_priv = netdev_priv(dev); + struct mlx4_en_dev *mdev = en_priv->mdev; + + return mlx4_set_vf_spoofchk(mdev->dev, en_priv->port, vf, setting); +} + +static int mlx4_en_get_vf_config(struct net_device *dev, int vf, struct ifla_vf_info *ivf) +{ + struct mlx4_en_priv *en_priv = netdev_priv(dev); + struct mlx4_en_dev *mdev = en_priv->mdev; + + return mlx4_get_vf_config(mdev->dev, en_priv->port, vf, ivf); +} + +static int mlx4_en_set_vf_link_state(struct net_device *dev, int vf, int link_state) +{ + struct mlx4_en_priv *en_priv = netdev_priv(dev); + struct mlx4_en_dev *mdev = en_priv->mdev; + + return mlx4_set_vf_link_state(mdev->dev, en_priv->port, vf, link_state); +} + +static int mlx4_en_get_vf_stats(struct net_device *dev, int vf, + struct ifla_vf_stats *vf_stats) +{ + struct mlx4_en_priv *en_priv = netdev_priv(dev); + struct mlx4_en_dev *mdev = en_priv->mdev; + + return mlx4_get_vf_stats(mdev->dev, en_priv->port, vf, vf_stats); +} + +#define PORT_ID_BYTE_LEN 8 +static int mlx4_en_get_phys_port_id(struct net_device *dev, + struct netdev_phys_item_id *ppid) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + struct mlx4_dev *mdev = priv->mdev->dev; + int i; + u64 phys_port_id = mdev->caps.phys_port_id[priv->port]; + + if (!phys_port_id) + return -EOPNOTSUPP; + + ppid->id_len = sizeof(phys_port_id); + for (i = PORT_ID_BYTE_LEN - 1; i >= 0; --i) { + ppid->id[i] = phys_port_id & 0xff; + phys_port_id >>= 8; + } + return 0; +} + +static int mlx4_udp_tunnel_sync(struct net_device *dev, unsigned int table) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + struct udp_tunnel_info ti; + int ret; + + udp_tunnel_nic_get_port(dev, table, 0, &ti); + priv->vxlan_port = ti.port; + + ret = mlx4_config_vxlan_port(priv->mdev->dev, priv->vxlan_port); + if (ret) + return ret; + + return mlx4_SET_PORT_VXLAN(priv->mdev->dev, priv->port, + VXLAN_STEER_BY_OUTER_MAC, + !!priv->vxlan_port); +} + +static const struct udp_tunnel_nic_info mlx4_udp_tunnels = { + .sync_table = mlx4_udp_tunnel_sync, + .flags = UDP_TUNNEL_NIC_INFO_MAY_SLEEP | + UDP_TUNNEL_NIC_INFO_IPV4_ONLY, + .tables = { + { .n_entries = 1, .tunnel_types = UDP_TUNNEL_TYPE_VXLAN, }, + }, +}; + +static netdev_features_t mlx4_en_features_check(struct sk_buff *skb, + struct net_device *dev, + netdev_features_t features) +{ + features = vlan_features_check(skb, features); + features = vxlan_features_check(skb, features); + + /* The ConnectX-3 doesn't support outer IPv6 checksums but it does + * support inner IPv6 checksums and segmentation so we need to + * strip that feature if this is an IPv6 encapsulated frame. + */ + if (skb->encapsulation && + (skb->ip_summed == CHECKSUM_PARTIAL)) { + struct mlx4_en_priv *priv = netdev_priv(dev); + + if (!priv->vxlan_port || + (ip_hdr(skb)->version != 4) || + (udp_hdr(skb)->dest != priv->vxlan_port)) + features &= ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK); + } + + return features; +} + +static int mlx4_en_set_tx_maxrate(struct net_device *dev, int queue_index, u32 maxrate) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + struct mlx4_en_tx_ring *tx_ring = priv->tx_ring[TX][queue_index]; + struct mlx4_update_qp_params params; + int err; + + if (!(priv->mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_QP_RATE_LIMIT)) + return -EOPNOTSUPP; + + /* rate provided to us in Mbs, check if it fits into 12 bits, if not use Gbs */ + if (maxrate >> 12) { + params.rate_unit = MLX4_QP_RATE_LIMIT_GBS; + params.rate_val = maxrate / 1000; + } else if (maxrate) { + params.rate_unit = MLX4_QP_RATE_LIMIT_MBS; + params.rate_val = maxrate; + } else { /* zero serves to revoke the QP rate-limitation */ + params.rate_unit = 0; + params.rate_val = 0; + } + + err = mlx4_update_qp(priv->mdev->dev, tx_ring->qpn, MLX4_UPDATE_QP_RATE_LIMIT, + ¶ms); + return err; +} + +static int mlx4_xdp_set(struct net_device *dev, struct bpf_prog *prog) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + struct mlx4_en_dev *mdev = priv->mdev; + struct mlx4_en_port_profile new_prof; + struct bpf_prog *old_prog; + struct mlx4_en_priv *tmp; + int tx_changed = 0; + int xdp_ring_num; + int port_up = 0; + int err; + int i; + + xdp_ring_num = prog ? priv->rx_ring_num : 0; + + /* No need to reconfigure buffers when simply swapping the + * program for a new one. + */ + if (priv->tx_ring_num[TX_XDP] == xdp_ring_num) { + if (prog) + bpf_prog_add(prog, priv->rx_ring_num - 1); + + mutex_lock(&mdev->state_lock); + for (i = 0; i < priv->rx_ring_num; i++) { + old_prog = rcu_dereference_protected( + priv->rx_ring[i]->xdp_prog, + lockdep_is_held(&mdev->state_lock)); + rcu_assign_pointer(priv->rx_ring[i]->xdp_prog, prog); + if (old_prog) + bpf_prog_put(old_prog); + } + mutex_unlock(&mdev->state_lock); + return 0; + } + + if (!mlx4_en_check_xdp_mtu(dev, dev->mtu)) + return -EOPNOTSUPP; + + tmp = kzalloc(sizeof(*tmp), GFP_KERNEL); + if (!tmp) + return -ENOMEM; + + if (prog) + bpf_prog_add(prog, priv->rx_ring_num - 1); + + mutex_lock(&mdev->state_lock); + memcpy(&new_prof, priv->prof, sizeof(struct mlx4_en_port_profile)); + new_prof.tx_ring_num[TX_XDP] = xdp_ring_num; + + if (priv->tx_ring_num[TX] + xdp_ring_num > MAX_TX_RINGS) { + tx_changed = 1; + new_prof.tx_ring_num[TX] = + MAX_TX_RINGS - ALIGN(xdp_ring_num, priv->prof->num_up); + en_warn(priv, "Reducing the number of TX rings, to not exceed the max total rings number.\n"); + } + + err = mlx4_en_try_alloc_resources(priv, tmp, &new_prof, false); + if (err) { + if (prog) + bpf_prog_sub(prog, priv->rx_ring_num - 1); + goto unlock_out; + } + + if (priv->port_up) { + port_up = 1; + mlx4_en_stop_port(dev, 1); + } + + mlx4_en_safe_replace_resources(priv, tmp); + if (tx_changed) + netif_set_real_num_tx_queues(dev, priv->tx_ring_num[TX]); + + for (i = 0; i < priv->rx_ring_num; i++) { + old_prog = rcu_dereference_protected( + priv->rx_ring[i]->xdp_prog, + lockdep_is_held(&mdev->state_lock)); + rcu_assign_pointer(priv->rx_ring[i]->xdp_prog, prog); + if (old_prog) + bpf_prog_put(old_prog); + } + + if (port_up) { + err = mlx4_en_start_port(dev); + if (err) { + en_err(priv, "Failed starting port %d for XDP change\n", + priv->port); + if (!test_and_set_bit(MLX4_EN_STATE_FLAG_RESTARTING, &priv->state)) + queue_work(mdev->workqueue, &priv->restart_task); + } + } + +unlock_out: + mutex_unlock(&mdev->state_lock); + kfree(tmp); + return err; +} + +static int mlx4_xdp(struct net_device *dev, struct netdev_bpf *xdp) +{ + switch (xdp->command) { + case XDP_SETUP_PROG: + return mlx4_xdp_set(dev, xdp->prog); + default: + return -EINVAL; + } +} + +static const struct net_device_ops mlx4_netdev_ops = { + .ndo_open = mlx4_en_open, + .ndo_stop = mlx4_en_close, + .ndo_start_xmit = mlx4_en_xmit, + .ndo_select_queue = mlx4_en_select_queue, + .ndo_get_stats64 = mlx4_en_get_stats64, + .ndo_set_rx_mode = mlx4_en_set_rx_mode, + .ndo_set_mac_address = mlx4_en_set_mac, + .ndo_validate_addr = eth_validate_addr, + .ndo_change_mtu = mlx4_en_change_mtu, + .ndo_eth_ioctl = mlx4_en_ioctl, + .ndo_tx_timeout = mlx4_en_tx_timeout, + .ndo_vlan_rx_add_vid = mlx4_en_vlan_rx_add_vid, + .ndo_vlan_rx_kill_vid = mlx4_en_vlan_rx_kill_vid, + .ndo_set_features = mlx4_en_set_features, + .ndo_fix_features = mlx4_en_fix_features, + .ndo_setup_tc = __mlx4_en_setup_tc, +#ifdef CONFIG_RFS_ACCEL + .ndo_rx_flow_steer = mlx4_en_filter_rfs, +#endif + .ndo_get_phys_port_id = mlx4_en_get_phys_port_id, + .ndo_features_check = mlx4_en_features_check, + .ndo_set_tx_maxrate = mlx4_en_set_tx_maxrate, + .ndo_bpf = mlx4_xdp, +}; + +static const struct net_device_ops mlx4_netdev_ops_master = { + .ndo_open = mlx4_en_open, + .ndo_stop = mlx4_en_close, + .ndo_start_xmit = mlx4_en_xmit, + .ndo_select_queue = mlx4_en_select_queue, + .ndo_get_stats64 = mlx4_en_get_stats64, + .ndo_set_rx_mode = mlx4_en_set_rx_mode, + .ndo_set_mac_address = mlx4_en_set_mac, + .ndo_validate_addr = eth_validate_addr, + .ndo_change_mtu = mlx4_en_change_mtu, + .ndo_tx_timeout = mlx4_en_tx_timeout, + .ndo_vlan_rx_add_vid = mlx4_en_vlan_rx_add_vid, + .ndo_vlan_rx_kill_vid = mlx4_en_vlan_rx_kill_vid, + .ndo_set_vf_mac = mlx4_en_set_vf_mac, + .ndo_set_vf_vlan = mlx4_en_set_vf_vlan, + .ndo_set_vf_rate = mlx4_en_set_vf_rate, + .ndo_set_vf_spoofchk = mlx4_en_set_vf_spoofchk, + .ndo_set_vf_link_state = mlx4_en_set_vf_link_state, + .ndo_get_vf_stats = mlx4_en_get_vf_stats, + .ndo_get_vf_config = mlx4_en_get_vf_config, + .ndo_set_features = mlx4_en_set_features, + .ndo_fix_features = mlx4_en_fix_features, + .ndo_setup_tc = __mlx4_en_setup_tc, +#ifdef CONFIG_RFS_ACCEL + .ndo_rx_flow_steer = mlx4_en_filter_rfs, +#endif + .ndo_get_phys_port_id = mlx4_en_get_phys_port_id, + .ndo_features_check = mlx4_en_features_check, + .ndo_set_tx_maxrate = mlx4_en_set_tx_maxrate, + .ndo_bpf = mlx4_xdp, +}; + +struct mlx4_en_bond { + struct work_struct work; + struct mlx4_en_priv *priv; + int is_bonded; + struct mlx4_port_map port_map; +}; + +static void mlx4_en_bond_work(struct work_struct *work) +{ + struct mlx4_en_bond *bond = container_of(work, + struct mlx4_en_bond, + work); + int err = 0; + struct mlx4_dev *dev = bond->priv->mdev->dev; + + if (bond->is_bonded) { + if (!mlx4_is_bonded(dev)) { + err = mlx4_bond(dev); + if (err) + en_err(bond->priv, "Fail to bond device\n"); + } + if (!err) { + err = mlx4_port_map_set(dev, &bond->port_map); + if (err) + en_err(bond->priv, "Fail to set port map [%d][%d]: %d\n", + bond->port_map.port1, + bond->port_map.port2, + err); + } + } else if (mlx4_is_bonded(dev)) { + err = mlx4_unbond(dev); + if (err) + en_err(bond->priv, "Fail to unbond device\n"); + } + dev_put(bond->priv->dev); + kfree(bond); +} + +static int mlx4_en_queue_bond_work(struct mlx4_en_priv *priv, int is_bonded, + u8 v2p_p1, u8 v2p_p2) +{ + struct mlx4_en_bond *bond = NULL; + + bond = kzalloc(sizeof(*bond), GFP_ATOMIC); + if (!bond) + return -ENOMEM; + + INIT_WORK(&bond->work, mlx4_en_bond_work); + bond->priv = priv; + bond->is_bonded = is_bonded; + bond->port_map.port1 = v2p_p1; + bond->port_map.port2 = v2p_p2; + dev_hold(priv->dev); + queue_work(priv->mdev->workqueue, &bond->work); + return 0; +} + +int mlx4_en_netdev_event(struct notifier_block *this, + unsigned long event, void *ptr) +{ + struct net_device *ndev = netdev_notifier_info_to_dev(ptr); + u8 port = 0; + struct mlx4_en_dev *mdev; + struct mlx4_dev *dev; + int i, num_eth_ports = 0; + bool do_bond = true; + struct mlx4_en_priv *priv; + u8 v2p_port1 = 0; + u8 v2p_port2 = 0; + + if (!net_eq(dev_net(ndev), &init_net)) + return NOTIFY_DONE; + + mdev = container_of(this, struct mlx4_en_dev, nb); + dev = mdev->dev; + + /* Go into this mode only when two network devices set on two ports + * of the same mlx4 device are slaves of the same bonding master + */ + mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_ETH) { + ++num_eth_ports; + if (!port && (mdev->pndev[i] == ndev)) + port = i; + mdev->upper[i] = mdev->pndev[i] ? + netdev_master_upper_dev_get(mdev->pndev[i]) : NULL; + /* condition not met: network device is a slave */ + if (!mdev->upper[i]) + do_bond = false; + if (num_eth_ports < 2) + continue; + /* condition not met: same master */ + if (mdev->upper[i] != mdev->upper[i-1]) + do_bond = false; + } + /* condition not met: 2 salves */ + do_bond = (num_eth_ports == 2) ? do_bond : false; + + /* handle only events that come with enough info */ + if ((do_bond && (event != NETDEV_BONDING_INFO)) || !port) + return NOTIFY_DONE; + + priv = netdev_priv(ndev); + if (do_bond) { + struct netdev_notifier_bonding_info *notifier_info = ptr; + struct netdev_bonding_info *bonding_info = + ¬ifier_info->bonding_info; + + /* required mode 1, 2 or 4 */ + if ((bonding_info->master.bond_mode != BOND_MODE_ACTIVEBACKUP) && + (bonding_info->master.bond_mode != BOND_MODE_XOR) && + (bonding_info->master.bond_mode != BOND_MODE_8023AD)) + do_bond = false; + + /* require exactly 2 slaves */ + if (bonding_info->master.num_slaves != 2) + do_bond = false; + + /* calc v2p */ + if (do_bond) { + if (bonding_info->master.bond_mode == + BOND_MODE_ACTIVEBACKUP) { + /* in active-backup mode virtual ports are + * mapped to the physical port of the active + * slave */ + if (bonding_info->slave.state == + BOND_STATE_BACKUP) { + if (port == 1) { + v2p_port1 = 2; + v2p_port2 = 2; + } else { + v2p_port1 = 1; + v2p_port2 = 1; + } + } else { /* BOND_STATE_ACTIVE */ + if (port == 1) { + v2p_port1 = 1; + v2p_port2 = 1; + } else { + v2p_port1 = 2; + v2p_port2 = 2; + } + } + } else { /* Active-Active */ + /* in active-active mode a virtual port is + * mapped to the native physical port if and only + * if the physical port is up */ + __s8 link = bonding_info->slave.link; + + if (port == 1) + v2p_port2 = 2; + else + v2p_port1 = 1; + if ((link == BOND_LINK_UP) || + (link == BOND_LINK_FAIL)) { + if (port == 1) + v2p_port1 = 1; + else + v2p_port2 = 2; + } else { /* BOND_LINK_DOWN || BOND_LINK_BACK */ + if (port == 1) + v2p_port1 = 2; + else + v2p_port2 = 1; + } + } + } + } + + mlx4_en_queue_bond_work(priv, do_bond, + v2p_port1, v2p_port2); + + return NOTIFY_DONE; +} + +void mlx4_en_update_pfc_stats_bitmap(struct mlx4_dev *dev, + struct mlx4_en_stats_bitmap *stats_bitmap, + u8 rx_ppp, u8 rx_pause, + u8 tx_ppp, u8 tx_pause) +{ + int last_i = NUM_MAIN_STATS + NUM_PORT_STATS + NUM_PF_STATS; + + if (!mlx4_is_slave(dev) && + (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_FLOWSTATS_EN)) { + mutex_lock(&stats_bitmap->mutex); + bitmap_clear(stats_bitmap->bitmap, last_i, NUM_FLOW_STATS); + + if (rx_ppp) + bitmap_set(stats_bitmap->bitmap, last_i, + NUM_FLOW_PRIORITY_STATS_RX); + last_i += NUM_FLOW_PRIORITY_STATS_RX; + + if (rx_pause && !(rx_ppp)) + bitmap_set(stats_bitmap->bitmap, last_i, + NUM_FLOW_STATS_RX); + last_i += NUM_FLOW_STATS_RX; + + if (tx_ppp) + bitmap_set(stats_bitmap->bitmap, last_i, + NUM_FLOW_PRIORITY_STATS_TX); + last_i += NUM_FLOW_PRIORITY_STATS_TX; + + if (tx_pause && !(tx_ppp)) + bitmap_set(stats_bitmap->bitmap, last_i, + NUM_FLOW_STATS_TX); + last_i += NUM_FLOW_STATS_TX; + + mutex_unlock(&stats_bitmap->mutex); + } +} + +void mlx4_en_set_stats_bitmap(struct mlx4_dev *dev, + struct mlx4_en_stats_bitmap *stats_bitmap, + u8 rx_ppp, u8 rx_pause, + u8 tx_ppp, u8 tx_pause) +{ + int last_i = 0; + + mutex_init(&stats_bitmap->mutex); + bitmap_zero(stats_bitmap->bitmap, NUM_ALL_STATS); + + if (mlx4_is_slave(dev)) { + bitmap_set(stats_bitmap->bitmap, last_i + + MLX4_FIND_NETDEV_STAT(rx_packets), 1); + bitmap_set(stats_bitmap->bitmap, last_i + + MLX4_FIND_NETDEV_STAT(tx_packets), 1); + bitmap_set(stats_bitmap->bitmap, last_i + + MLX4_FIND_NETDEV_STAT(rx_bytes), 1); + bitmap_set(stats_bitmap->bitmap, last_i + + MLX4_FIND_NETDEV_STAT(tx_bytes), 1); + bitmap_set(stats_bitmap->bitmap, last_i + + MLX4_FIND_NETDEV_STAT(rx_dropped), 1); + bitmap_set(stats_bitmap->bitmap, last_i + + MLX4_FIND_NETDEV_STAT(tx_dropped), 1); + } else { + bitmap_set(stats_bitmap->bitmap, last_i, NUM_MAIN_STATS); + } + last_i += NUM_MAIN_STATS; + + bitmap_set(stats_bitmap->bitmap, last_i, NUM_PORT_STATS); + last_i += NUM_PORT_STATS; + + if (mlx4_is_master(dev)) + bitmap_set(stats_bitmap->bitmap, last_i, + NUM_PF_STATS); + last_i += NUM_PF_STATS; + + mlx4_en_update_pfc_stats_bitmap(dev, stats_bitmap, + rx_ppp, rx_pause, + tx_ppp, tx_pause); + last_i += NUM_FLOW_STATS; + + if (!mlx4_is_slave(dev)) + bitmap_set(stats_bitmap->bitmap, last_i, NUM_PKT_STATS); + last_i += NUM_PKT_STATS; + + bitmap_set(stats_bitmap->bitmap, last_i, NUM_XDP_STATS); + last_i += NUM_XDP_STATS; + + if (!mlx4_is_slave(dev)) + bitmap_set(stats_bitmap->bitmap, last_i, NUM_PHY_STATS); + last_i += NUM_PHY_STATS; +} + +int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, + struct mlx4_en_port_profile *prof) +{ + struct net_device *dev; + struct mlx4_en_priv *priv; + int i, t; + int err; + + dev = alloc_etherdev_mqs(sizeof(struct mlx4_en_priv), + MAX_TX_RINGS, MAX_RX_RINGS); + if (dev == NULL) + return -ENOMEM; + + netif_set_real_num_tx_queues(dev, prof->tx_ring_num[TX]); + netif_set_real_num_rx_queues(dev, prof->rx_ring_num); + + SET_NETDEV_DEV(dev, &mdev->dev->persist->pdev->dev); + dev->dev_port = port - 1; + + /* + * Initialize driver private data + */ + + priv = netdev_priv(dev); + memset(priv, 0, sizeof(struct mlx4_en_priv)); + priv->counter_index = MLX4_SINK_COUNTER_INDEX(mdev->dev); + spin_lock_init(&priv->stats_lock); + INIT_WORK(&priv->rx_mode_task, mlx4_en_do_set_rx_mode); + INIT_WORK(&priv->restart_task, mlx4_en_restart); + INIT_WORK(&priv->linkstate_task, mlx4_en_linkstate_work); + INIT_DELAYED_WORK(&priv->stats_task, mlx4_en_do_get_stats); + INIT_DELAYED_WORK(&priv->service_task, mlx4_en_service_task); +#ifdef CONFIG_RFS_ACCEL + INIT_LIST_HEAD(&priv->filters); + spin_lock_init(&priv->filters_lock); +#endif + + priv->dev = dev; + priv->mdev = mdev; + priv->ddev = &mdev->pdev->dev; + priv->prof = prof; + priv->port = port; + priv->port_up = false; + priv->flags = prof->flags; + priv->pflags = MLX4_EN_PRIV_FLAGS_BLUEFLAME; + priv->ctrl_flags = cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE | + MLX4_WQE_CTRL_SOLICITED); + priv->num_tx_rings_p_up = mdev->profile.max_num_tx_rings_p_up; + priv->tx_work_limit = MLX4_EN_DEFAULT_TX_WORK; + netdev_rss_key_fill(priv->rss_key, sizeof(priv->rss_key)); + + for (t = 0; t < MLX4_EN_NUM_TX_TYPES; t++) { + priv->tx_ring_num[t] = prof->tx_ring_num[t]; + if (!priv->tx_ring_num[t]) + continue; + + priv->tx_ring[t] = kcalloc(MAX_TX_RINGS, + sizeof(struct mlx4_en_tx_ring *), + GFP_KERNEL); + if (!priv->tx_ring[t]) { + err = -ENOMEM; + goto out; + } + priv->tx_cq[t] = kcalloc(MAX_TX_RINGS, + sizeof(struct mlx4_en_cq *), + GFP_KERNEL); + if (!priv->tx_cq[t]) { + err = -ENOMEM; + goto out; + } + } + priv->rx_ring_num = prof->rx_ring_num; + priv->cqe_factor = (mdev->dev->caps.cqe_size == 64) ? 1 : 0; + priv->cqe_size = mdev->dev->caps.cqe_size; + priv->mac_index = -1; + priv->msg_enable = MLX4_EN_MSG_LEVEL; +#ifdef CONFIG_MLX4_EN_DCB + if (!mlx4_is_slave(priv->mdev->dev)) { + u8 prio; + + for (prio = 0; prio < IEEE_8021QAZ_MAX_TCS; ++prio) { + priv->ets.prio_tc[prio] = prio; + priv->ets.tc_tsa[prio] = IEEE_8021QAZ_TSA_VENDOR; + } + + priv->dcbx_cap = DCB_CAP_DCBX_VER_CEE | DCB_CAP_DCBX_HOST | + DCB_CAP_DCBX_VER_IEEE; + priv->flags |= MLX4_EN_DCB_ENABLED; + priv->cee_config.pfc_state = false; + + for (i = 0; i < MLX4_EN_NUM_UP_HIGH; i++) + priv->cee_config.dcb_pfc[i] = pfc_disabled; + + if (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ETS_CFG) { + dev->dcbnl_ops = &mlx4_en_dcbnl_ops; + } else { + en_info(priv, "enabling only PFC DCB ops\n"); + dev->dcbnl_ops = &mlx4_en_dcbnl_pfc_ops; + } + } +#endif + + for (i = 0; i < MLX4_EN_MAC_HASH_SIZE; ++i) + INIT_HLIST_HEAD(&priv->mac_hash[i]); + + /* Query for default mac and max mtu */ + priv->max_mtu = mdev->dev->caps.eth_mtu_cap[priv->port]; + + if (mdev->dev->caps.rx_checksum_flags_port[priv->port] & + MLX4_RX_CSUM_MODE_VAL_NON_TCP_UDP) + priv->flags |= MLX4_EN_FLAG_RX_CSUM_NON_TCP_UDP; + + /* Set default MAC */ + dev->addr_len = ETH_ALEN; + mlx4_en_u64_to_mac(dev, mdev->dev->caps.def_mac[priv->port]); + if (!is_valid_ether_addr(dev->dev_addr)) { + en_err(priv, "Port: %d, invalid mac burned: %pM, quitting\n", + priv->port, dev->dev_addr); + err = -EINVAL; + goto out; + } else if (mlx4_is_slave(priv->mdev->dev) && + (priv->mdev->dev->port_random_macs & 1 << priv->port)) { + /* Random MAC was assigned in mlx4_slave_cap + * in mlx4_core module + */ + dev->addr_assign_type |= NET_ADDR_RANDOM; + en_warn(priv, "Assigned random MAC address %pM\n", dev->dev_addr); + } + + memcpy(priv->current_mac, dev->dev_addr, sizeof(priv->current_mac)); + + priv->stride = roundup_pow_of_two(sizeof(struct mlx4_en_rx_desc) + + DS_SIZE * MLX4_EN_MAX_RX_FRAGS); + err = mlx4_en_alloc_resources(priv); + if (err) + goto out; + + /* Initialize time stamping config */ + priv->hwtstamp_config.flags = 0; + priv->hwtstamp_config.tx_type = HWTSTAMP_TX_OFF; + priv->hwtstamp_config.rx_filter = HWTSTAMP_FILTER_NONE; + + /* Allocate page for receive rings */ + err = mlx4_alloc_hwq_res(mdev->dev, &priv->res, + MLX4_EN_PAGE_SIZE); + if (err) { + en_err(priv, "Failed to allocate page for rx qps\n"); + goto out; + } + priv->allocated = 1; + + /* + * Initialize netdev entry points + */ + if (mlx4_is_master(priv->mdev->dev)) + dev->netdev_ops = &mlx4_netdev_ops_master; + else + dev->netdev_ops = &mlx4_netdev_ops; + dev->watchdog_timeo = MLX4_EN_WATCHDOG_TIMEOUT; + netif_set_real_num_tx_queues(dev, priv->tx_ring_num[TX]); + netif_set_real_num_rx_queues(dev, priv->rx_ring_num); + + dev->ethtool_ops = &mlx4_en_ethtool_ops; + + /* + * Set driver features + */ + dev->hw_features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM; + if (mdev->LSO_support) + dev->hw_features |= NETIF_F_TSO | NETIF_F_TSO6; + + if (mdev->dev->caps.tunnel_offload_mode == + MLX4_TUNNEL_OFFLOAD_MODE_VXLAN) { + dev->hw_features |= NETIF_F_GSO_UDP_TUNNEL | + NETIF_F_GSO_UDP_TUNNEL_CSUM | + NETIF_F_GSO_PARTIAL; + dev->features |= NETIF_F_GSO_UDP_TUNNEL | + NETIF_F_GSO_UDP_TUNNEL_CSUM | + NETIF_F_GSO_PARTIAL; + dev->gso_partial_features = NETIF_F_GSO_UDP_TUNNEL_CSUM; + dev->hw_enc_features = NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | + NETIF_F_RXCSUM | + NETIF_F_TSO | NETIF_F_TSO6 | + NETIF_F_GSO_UDP_TUNNEL | + NETIF_F_GSO_UDP_TUNNEL_CSUM | + NETIF_F_GSO_PARTIAL; + + dev->udp_tunnel_nic_info = &mlx4_udp_tunnels; + } + + dev->vlan_features = dev->hw_features; + + dev->hw_features |= NETIF_F_RXCSUM | NETIF_F_RXHASH; + dev->features = dev->hw_features | NETIF_F_HIGHDMA | + NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX | + NETIF_F_HW_VLAN_CTAG_FILTER; + dev->hw_features |= NETIF_F_LOOPBACK | + NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX; + + if (!(mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_SKIP_OUTER_VLAN)) { + dev->features |= NETIF_F_HW_VLAN_STAG_RX | + NETIF_F_HW_VLAN_STAG_FILTER; + dev->hw_features |= NETIF_F_HW_VLAN_STAG_RX; + } + + if (mlx4_is_slave(mdev->dev)) { + bool vlan_offload_disabled; + int phv; + + err = get_phv_bit(mdev->dev, port, &phv); + if (!err && phv) { + dev->hw_features |= NETIF_F_HW_VLAN_STAG_TX; + priv->pflags |= MLX4_EN_PRIV_FLAGS_PHV; + } + err = mlx4_get_is_vlan_offload_disabled(mdev->dev, port, + &vlan_offload_disabled); + if (!err && vlan_offload_disabled) { + dev->hw_features &= ~(NETIF_F_HW_VLAN_CTAG_TX | + NETIF_F_HW_VLAN_CTAG_RX | + NETIF_F_HW_VLAN_STAG_TX | + NETIF_F_HW_VLAN_STAG_RX); + dev->features &= ~(NETIF_F_HW_VLAN_CTAG_TX | + NETIF_F_HW_VLAN_CTAG_RX | + NETIF_F_HW_VLAN_STAG_TX | + NETIF_F_HW_VLAN_STAG_RX); + } + } else { + if (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_PHV_EN && + !(mdev->dev->caps.flags2 & + MLX4_DEV_CAP_FLAG2_SKIP_OUTER_VLAN)) + dev->hw_features |= NETIF_F_HW_VLAN_STAG_TX; + } + + if (mdev->dev->caps.flags & MLX4_DEV_CAP_FLAG_FCS_KEEP) + dev->hw_features |= NETIF_F_RXFCS; + + if (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_IGNORE_FCS) + dev->hw_features |= NETIF_F_RXALL; + + if (mdev->dev->caps.steering_mode == + MLX4_STEERING_MODE_DEVICE_MANAGED && + mdev->dev->caps.dmfs_high_steer_mode != MLX4_STEERING_DMFS_A0_STATIC) + dev->hw_features |= NETIF_F_NTUPLE; + + if (mdev->dev->caps.steering_mode != MLX4_STEERING_MODE_A0) + dev->priv_flags |= IFF_UNICAST_FLT; + + /* Setting a default hash function value */ + if (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_RSS_TOP) { + priv->rss_hash_fn = ETH_RSS_HASH_TOP; + } else if (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_RSS_XOR) { + priv->rss_hash_fn = ETH_RSS_HASH_XOR; + } else { + en_warn(priv, + "No RSS hash capabilities exposed, using Toeplitz\n"); + priv->rss_hash_fn = ETH_RSS_HASH_TOP; + } + + /* MTU range: 68 - hw-specific max */ + dev->min_mtu = ETH_MIN_MTU; + dev->max_mtu = priv->max_mtu; + + /* supports LSOv2 packets. */ + netif_set_tso_max_size(dev, GSO_MAX_SIZE); + + mdev->pndev[port] = dev; + mdev->upper[port] = NULL; + + netif_carrier_off(dev); + mlx4_en_set_default_moderation(priv); + + en_warn(priv, "Using %d TX rings\n", prof->tx_ring_num[TX]); + en_warn(priv, "Using %d RX rings\n", prof->rx_ring_num); + + mlx4_en_update_loopback_state(priv->dev, priv->dev->features); + + /* Configure port */ + mlx4_en_calc_rx_buf(dev); + err = mlx4_SET_PORT_general(mdev->dev, priv->port, + priv->rx_skb_size + ETH_FCS_LEN, + prof->tx_pause, prof->tx_ppp, + prof->rx_pause, prof->rx_ppp); + if (err) { + en_err(priv, "Failed setting port general configurations for port %d, with error %d\n", + priv->port, err); + goto out; + } + + if (mdev->dev->caps.tunnel_offload_mode == MLX4_TUNNEL_OFFLOAD_MODE_VXLAN) { + err = mlx4_SET_PORT_VXLAN(mdev->dev, priv->port, VXLAN_STEER_BY_OUTER_MAC, 1); + if (err) { + en_err(priv, "Failed setting port L2 tunnel configuration, err %d\n", + err); + goto out; + } + } + + /* Init port */ + en_warn(priv, "Initializing port\n"); + err = mlx4_INIT_PORT(mdev->dev, priv->port); + if (err) { + en_err(priv, "Failed Initializing port\n"); + goto out; + } + queue_delayed_work(mdev->workqueue, &priv->stats_task, STATS_DELAY); + + /* Initialize time stamp mechanism */ + if (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_TS) + mlx4_en_init_timestamp(mdev); + + queue_delayed_work(mdev->workqueue, &priv->service_task, + SERVICE_TASK_DELAY); + + mlx4_en_set_stats_bitmap(mdev->dev, &priv->stats_bitmap, + mdev->profile.prof[priv->port].rx_ppp, + mdev->profile.prof[priv->port].rx_pause, + mdev->profile.prof[priv->port].tx_ppp, + mdev->profile.prof[priv->port].tx_pause); + + err = register_netdev(dev); + if (err) { + en_err(priv, "Netdev registration failed for port %d\n", port); + goto out; + } + + priv->registered = 1; + devlink_port_type_eth_set(mlx4_get_devlink_port(mdev->dev, priv->port), + dev); + + return 0; + +out: + mlx4_en_destroy_netdev(dev); + return err; +} + +int mlx4_en_reset_config(struct net_device *dev, + struct hwtstamp_config ts_config, + netdev_features_t features) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + struct mlx4_en_dev *mdev = priv->mdev; + struct mlx4_en_port_profile new_prof; + struct mlx4_en_priv *tmp; + int port_up = 0; + int err = 0; + + if (priv->hwtstamp_config.tx_type == ts_config.tx_type && + priv->hwtstamp_config.rx_filter == ts_config.rx_filter && + !DEV_FEATURE_CHANGED(dev, features, NETIF_F_HW_VLAN_CTAG_RX) && + !DEV_FEATURE_CHANGED(dev, features, NETIF_F_RXFCS)) + return 0; /* Nothing to change */ + + if (DEV_FEATURE_CHANGED(dev, features, NETIF_F_HW_VLAN_CTAG_RX) && + (features & NETIF_F_HW_VLAN_CTAG_RX) && + (priv->hwtstamp_config.rx_filter != HWTSTAMP_FILTER_NONE)) { + en_warn(priv, "Can't turn ON rx vlan offload while time-stamping rx filter is ON\n"); + return -EINVAL; + } + + tmp = kzalloc(sizeof(*tmp), GFP_KERNEL); + if (!tmp) + return -ENOMEM; + + mutex_lock(&mdev->state_lock); + + memcpy(&new_prof, priv->prof, sizeof(struct mlx4_en_port_profile)); + memcpy(&new_prof.hwtstamp_config, &ts_config, sizeof(ts_config)); + + err = mlx4_en_try_alloc_resources(priv, tmp, &new_prof, true); + if (err) + goto out; + + if (priv->port_up) { + port_up = 1; + mlx4_en_stop_port(dev, 1); + } + + mlx4_en_safe_replace_resources(priv, tmp); + + if (DEV_FEATURE_CHANGED(dev, features, NETIF_F_HW_VLAN_CTAG_RX)) { + if (features & NETIF_F_HW_VLAN_CTAG_RX) + dev->features |= NETIF_F_HW_VLAN_CTAG_RX; + else + dev->features &= ~NETIF_F_HW_VLAN_CTAG_RX; + } else if (ts_config.rx_filter == HWTSTAMP_FILTER_NONE) { + /* RX time-stamping is OFF, update the RX vlan offload + * to the latest wanted state + */ + if (dev->wanted_features & NETIF_F_HW_VLAN_CTAG_RX) + dev->features |= NETIF_F_HW_VLAN_CTAG_RX; + else + dev->features &= ~NETIF_F_HW_VLAN_CTAG_RX; + } + + if (DEV_FEATURE_CHANGED(dev, features, NETIF_F_RXFCS)) { + if (features & NETIF_F_RXFCS) + dev->features |= NETIF_F_RXFCS; + else + dev->features &= ~NETIF_F_RXFCS; + } + + /* RX vlan offload and RX time-stamping can't co-exist ! + * Regardless of the caller's choice, + * Turn Off RX vlan offload in case of time-stamping is ON + */ + if (ts_config.rx_filter != HWTSTAMP_FILTER_NONE) { + if (dev->features & NETIF_F_HW_VLAN_CTAG_RX) + en_warn(priv, "Turning off RX vlan offload since RX time-stamping is ON\n"); + dev->features &= ~NETIF_F_HW_VLAN_CTAG_RX; + } + + if (port_up) { + err = mlx4_en_start_port(dev); + if (err) + en_err(priv, "Failed starting port\n"); + } + + if (!err) + err = mlx4_en_moderation_update(priv); +out: + mutex_unlock(&mdev->state_lock); + kfree(tmp); + if (!err) + netdev_features_change(dev); + return err; +} diff --git a/drivers/net/ethernet/mellanox/mlx4/en_port.c b/drivers/net/ethernet/mellanox/mlx4/en_port.c new file mode 100644 index 000000000..532997eba --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx4/en_port.c @@ -0,0 +1,436 @@ +/* + * Copyright (c) 2007 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + + +#include <linux/if_vlan.h> + +#include <linux/mlx4/device.h> +#include <linux/mlx4/cmd.h> + +#include "en_port.h" +#include "mlx4_en.h" + + +int mlx4_SET_VLAN_FLTR(struct mlx4_dev *dev, struct mlx4_en_priv *priv) +{ + struct mlx4_cmd_mailbox *mailbox; + struct mlx4_set_vlan_fltr_mbox *filter; + int i; + int j; + int index = 0; + u32 entry; + int err = 0; + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + + filter = mailbox->buf; + for (i = VLAN_FLTR_SIZE - 1; i >= 0; i--) { + entry = 0; + for (j = 0; j < 32; j++) + if (test_bit(index++, priv->active_vlans)) + entry |= 1 << j; + filter->entry[i] = cpu_to_be32(entry); + } + err = mlx4_cmd(dev, mailbox->dma, priv->port, 0, MLX4_CMD_SET_VLAN_FLTR, + MLX4_CMD_TIME_CLASS_B, MLX4_CMD_WRAPPED); + mlx4_free_cmd_mailbox(dev, mailbox); + return err; +} + +int mlx4_en_QUERY_PORT(struct mlx4_en_dev *mdev, u8 port) +{ + struct mlx4_en_query_port_context *qport_context; + struct mlx4_en_priv *priv = netdev_priv(mdev->pndev[port]); + struct mlx4_en_port_state *state = &priv->port_state; + struct mlx4_cmd_mailbox *mailbox; + int err; + + mailbox = mlx4_alloc_cmd_mailbox(mdev->dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + err = mlx4_cmd_box(mdev->dev, 0, mailbox->dma, port, 0, + MLX4_CMD_QUERY_PORT, MLX4_CMD_TIME_CLASS_B, + MLX4_CMD_WRAPPED); + if (err) + goto out; + qport_context = mailbox->buf; + + /* This command is always accessed from Ethtool context + * already synchronized, no need in locking */ + state->link_state = !!(qport_context->link_up & MLX4_EN_LINK_UP_MASK); + switch (qport_context->link_speed & MLX4_EN_SPEED_MASK) { + case MLX4_EN_100M_SPEED: + state->link_speed = SPEED_100; + break; + case MLX4_EN_1G_SPEED: + state->link_speed = SPEED_1000; + break; + case MLX4_EN_10G_SPEED_XAUI: + case MLX4_EN_10G_SPEED_XFI: + state->link_speed = SPEED_10000; + break; + case MLX4_EN_20G_SPEED: + state->link_speed = SPEED_20000; + break; + case MLX4_EN_40G_SPEED: + state->link_speed = SPEED_40000; + break; + case MLX4_EN_56G_SPEED: + state->link_speed = SPEED_56000; + break; + default: + state->link_speed = -1; + break; + } + + state->transceiver = qport_context->transceiver; + + state->flags = 0; /* Reset and recalculate the port flags */ + state->flags |= (qport_context->link_up & MLX4_EN_ANC_MASK) ? + MLX4_EN_PORT_ANC : 0; + state->flags |= (qport_context->autoneg & MLX4_EN_AUTONEG_MASK) ? + MLX4_EN_PORT_ANE : 0; + +out: + mlx4_free_cmd_mailbox(mdev->dev, mailbox); + return err; +} + +/* Each counter set is located in struct mlx4_en_stat_out_mbox + * with a const offset between its prio components. + * This function runs over a counter set and sum all of it's prio components. + */ +static unsigned long en_stats_adder(__be64 *start, __be64 *next, int num) +{ + __be64 *curr = start; + unsigned long ret = 0; + int i; + int offset = next - start; + + for (i = 0; i < num; i++) { + ret += be64_to_cpu(*curr); + curr += offset; + } + + return ret; +} + +void mlx4_en_fold_software_stats(struct net_device *dev) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + struct mlx4_en_dev *mdev = priv->mdev; + unsigned long packets, bytes; + int i; + + if (!priv->port_up || mlx4_is_master(mdev->dev)) + return; + + packets = 0; + bytes = 0; + for (i = 0; i < priv->rx_ring_num; i++) { + const struct mlx4_en_rx_ring *ring = priv->rx_ring[i]; + + packets += READ_ONCE(ring->packets); + bytes += READ_ONCE(ring->bytes); + } + dev->stats.rx_packets = packets; + dev->stats.rx_bytes = bytes; + + packets = 0; + bytes = 0; + for (i = 0; i < priv->tx_ring_num[TX]; i++) { + const struct mlx4_en_tx_ring *ring = priv->tx_ring[TX][i]; + + packets += READ_ONCE(ring->packets); + bytes += READ_ONCE(ring->bytes); + } + dev->stats.tx_packets = packets; + dev->stats.tx_bytes = bytes; +} + +int mlx4_en_DUMP_ETH_STATS(struct mlx4_en_dev *mdev, u8 port, u8 reset) +{ + struct mlx4_counter tmp_counter_stats; + struct mlx4_en_stat_out_mbox *mlx4_en_stats; + struct mlx4_en_stat_out_flow_control_mbox *flowstats; + struct net_device *dev = mdev->pndev[port]; + struct mlx4_en_priv *priv = netdev_priv(dev); + struct net_device_stats *stats = &dev->stats; + struct mlx4_cmd_mailbox *mailbox, *mailbox_priority; + u64 in_mod = reset << 8 | port; + int err; + int i, counter_index; + unsigned long sw_tx_dropped = 0; + unsigned long sw_rx_dropped = 0; + + mailbox = mlx4_alloc_cmd_mailbox(mdev->dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + + mailbox_priority = mlx4_alloc_cmd_mailbox(mdev->dev); + if (IS_ERR(mailbox_priority)) { + mlx4_free_cmd_mailbox(mdev->dev, mailbox); + return PTR_ERR(mailbox_priority); + } + + err = mlx4_cmd_box(mdev->dev, 0, mailbox->dma, in_mod, 0, + MLX4_CMD_DUMP_ETH_STATS, MLX4_CMD_TIME_CLASS_B, + MLX4_CMD_NATIVE); + if (err) + goto out; + + mlx4_en_stats = mailbox->buf; + + memset(&tmp_counter_stats, 0, sizeof(tmp_counter_stats)); + counter_index = mlx4_get_default_counter_index(mdev->dev, port); + err = mlx4_get_counter_stats(mdev->dev, counter_index, + &tmp_counter_stats, reset); + + /* 0xffs indicates invalid value */ + memset(mailbox_priority->buf, 0xff, + sizeof(*flowstats) * MLX4_NUM_PRIORITIES); + + if (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_FLOWSTATS_EN) { + memset(mailbox_priority->buf, 0, + sizeof(*flowstats) * MLX4_NUM_PRIORITIES); + err = mlx4_cmd_box(mdev->dev, 0, mailbox_priority->dma, + in_mod | MLX4_DUMP_ETH_STATS_FLOW_CONTROL, + 0, MLX4_CMD_DUMP_ETH_STATS, + MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE); + if (err) + goto out; + } + + flowstats = mailbox_priority->buf; + + spin_lock_bh(&priv->stats_lock); + + mlx4_en_fold_software_stats(dev); + + priv->port_stats.rx_chksum_good = 0; + priv->port_stats.rx_chksum_none = 0; + priv->port_stats.rx_chksum_complete = 0; + priv->port_stats.rx_alloc_pages = 0; + priv->xdp_stats.rx_xdp_drop = 0; + priv->xdp_stats.rx_xdp_redirect = 0; + priv->xdp_stats.rx_xdp_redirect_fail = 0; + priv->xdp_stats.rx_xdp_tx = 0; + priv->xdp_stats.rx_xdp_tx_full = 0; + for (i = 0; i < priv->rx_ring_num; i++) { + const struct mlx4_en_rx_ring *ring = priv->rx_ring[i]; + + sw_rx_dropped += READ_ONCE(ring->dropped); + priv->port_stats.rx_chksum_good += READ_ONCE(ring->csum_ok); + priv->port_stats.rx_chksum_none += READ_ONCE(ring->csum_none); + priv->port_stats.rx_chksum_complete += READ_ONCE(ring->csum_complete); + priv->port_stats.rx_alloc_pages += READ_ONCE(ring->rx_alloc_pages); + priv->xdp_stats.rx_xdp_drop += READ_ONCE(ring->xdp_drop); + priv->xdp_stats.rx_xdp_redirect += READ_ONCE(ring->xdp_redirect); + priv->xdp_stats.rx_xdp_redirect_fail += READ_ONCE(ring->xdp_redirect_fail); + priv->xdp_stats.rx_xdp_tx += READ_ONCE(ring->xdp_tx); + priv->xdp_stats.rx_xdp_tx_full += READ_ONCE(ring->xdp_tx_full); + } + priv->port_stats.tx_chksum_offload = 0; + priv->port_stats.queue_stopped = 0; + priv->port_stats.wake_queue = 0; + priv->port_stats.tso_packets = 0; + priv->port_stats.xmit_more = 0; + + for (i = 0; i < priv->tx_ring_num[TX]; i++) { + const struct mlx4_en_tx_ring *ring = priv->tx_ring[TX][i]; + + sw_tx_dropped += READ_ONCE(ring->tx_dropped); + priv->port_stats.tx_chksum_offload += READ_ONCE(ring->tx_csum); + priv->port_stats.queue_stopped += READ_ONCE(ring->queue_stopped); + priv->port_stats.wake_queue += READ_ONCE(ring->wake_queue); + priv->port_stats.tso_packets += READ_ONCE(ring->tso_packets); + priv->port_stats.xmit_more += READ_ONCE(ring->xmit_more); + } + + if (!mlx4_is_slave(mdev->dev)) { + struct mlx4_en_phy_stats *p_stats = &priv->phy_stats; + + p_stats->rx_packets_phy = + en_stats_adder(&mlx4_en_stats->RTOT_prio_0, + &mlx4_en_stats->RTOT_prio_1, + NUM_PRIORITIES); + p_stats->tx_packets_phy = + en_stats_adder(&mlx4_en_stats->TTOT_prio_0, + &mlx4_en_stats->TTOT_prio_1, + NUM_PRIORITIES); + p_stats->rx_bytes_phy = + en_stats_adder(&mlx4_en_stats->ROCT_prio_0, + &mlx4_en_stats->ROCT_prio_1, + NUM_PRIORITIES); + p_stats->tx_bytes_phy = + en_stats_adder(&mlx4_en_stats->TOCT_prio_0, + &mlx4_en_stats->TOCT_prio_1, + NUM_PRIORITIES); + if (mlx4_is_master(mdev->dev)) { + stats->rx_packets = p_stats->rx_packets_phy; + stats->tx_packets = p_stats->tx_packets_phy; + stats->rx_bytes = p_stats->rx_bytes_phy; + stats->tx_bytes = p_stats->tx_bytes_phy; + } + } + + /* net device stats */ + stats->rx_errors = be64_to_cpu(mlx4_en_stats->PCS) + + be32_to_cpu(mlx4_en_stats->RJBBR) + + be32_to_cpu(mlx4_en_stats->RCRC) + + be32_to_cpu(mlx4_en_stats->RRUNT) + + be64_to_cpu(mlx4_en_stats->RInRangeLengthErr) + + be64_to_cpu(mlx4_en_stats->ROutRangeLengthErr) + + be32_to_cpu(mlx4_en_stats->RSHORT) + + en_stats_adder(&mlx4_en_stats->RGIANT_prio_0, + &mlx4_en_stats->RGIANT_prio_1, + NUM_PRIORITIES); + stats->tx_errors = en_stats_adder(&mlx4_en_stats->TGIANT_prio_0, + &mlx4_en_stats->TGIANT_prio_1, + NUM_PRIORITIES); + stats->multicast = en_stats_adder(&mlx4_en_stats->MCAST_prio_0, + &mlx4_en_stats->MCAST_prio_1, + NUM_PRIORITIES); + stats->rx_dropped = be32_to_cpu(mlx4_en_stats->RDROP) + + sw_rx_dropped; + stats->rx_length_errors = be32_to_cpu(mlx4_en_stats->RdropLength); + stats->rx_crc_errors = be32_to_cpu(mlx4_en_stats->RCRC); + stats->rx_fifo_errors = be32_to_cpu(mlx4_en_stats->RdropOvflw); + stats->tx_dropped = be32_to_cpu(mlx4_en_stats->TDROP) + + sw_tx_dropped; + + /* RX stats */ + priv->pkstats.rx_multicast_packets = stats->multicast; + priv->pkstats.rx_broadcast_packets = + en_stats_adder(&mlx4_en_stats->RBCAST_prio_0, + &mlx4_en_stats->RBCAST_prio_1, + NUM_PRIORITIES); + priv->pkstats.rx_jabbers = be32_to_cpu(mlx4_en_stats->RJBBR); + priv->pkstats.rx_in_range_length_error = + be64_to_cpu(mlx4_en_stats->RInRangeLengthErr); + priv->pkstats.rx_out_range_length_error = + be64_to_cpu(mlx4_en_stats->ROutRangeLengthErr); + + /* Tx stats */ + priv->pkstats.tx_multicast_packets = + en_stats_adder(&mlx4_en_stats->TMCAST_prio_0, + &mlx4_en_stats->TMCAST_prio_1, + NUM_PRIORITIES); + priv->pkstats.tx_broadcast_packets = + en_stats_adder(&mlx4_en_stats->TBCAST_prio_0, + &mlx4_en_stats->TBCAST_prio_1, + NUM_PRIORITIES); + + priv->pkstats.rx_prio[0][0] = be64_to_cpu(mlx4_en_stats->RTOT_prio_0); + priv->pkstats.rx_prio[0][1] = be64_to_cpu(mlx4_en_stats->ROCT_prio_0); + priv->pkstats.rx_prio[1][0] = be64_to_cpu(mlx4_en_stats->RTOT_prio_1); + priv->pkstats.rx_prio[1][1] = be64_to_cpu(mlx4_en_stats->ROCT_prio_1); + priv->pkstats.rx_prio[2][0] = be64_to_cpu(mlx4_en_stats->RTOT_prio_2); + priv->pkstats.rx_prio[2][1] = be64_to_cpu(mlx4_en_stats->ROCT_prio_2); + priv->pkstats.rx_prio[3][0] = be64_to_cpu(mlx4_en_stats->RTOT_prio_3); + priv->pkstats.rx_prio[3][1] = be64_to_cpu(mlx4_en_stats->ROCT_prio_3); + priv->pkstats.rx_prio[4][0] = be64_to_cpu(mlx4_en_stats->RTOT_prio_4); + priv->pkstats.rx_prio[4][1] = be64_to_cpu(mlx4_en_stats->ROCT_prio_4); + priv->pkstats.rx_prio[5][0] = be64_to_cpu(mlx4_en_stats->RTOT_prio_5); + priv->pkstats.rx_prio[5][1] = be64_to_cpu(mlx4_en_stats->ROCT_prio_5); + priv->pkstats.rx_prio[6][0] = be64_to_cpu(mlx4_en_stats->RTOT_prio_6); + priv->pkstats.rx_prio[6][1] = be64_to_cpu(mlx4_en_stats->ROCT_prio_6); + priv->pkstats.rx_prio[7][0] = be64_to_cpu(mlx4_en_stats->RTOT_prio_7); + priv->pkstats.rx_prio[7][1] = be64_to_cpu(mlx4_en_stats->ROCT_prio_7); + priv->pkstats.rx_prio[8][0] = be64_to_cpu(mlx4_en_stats->RTOT_novlan); + priv->pkstats.rx_prio[8][1] = be64_to_cpu(mlx4_en_stats->ROCT_novlan); + priv->pkstats.tx_prio[0][0] = be64_to_cpu(mlx4_en_stats->TTOT_prio_0); + priv->pkstats.tx_prio[0][1] = be64_to_cpu(mlx4_en_stats->TOCT_prio_0); + priv->pkstats.tx_prio[1][0] = be64_to_cpu(mlx4_en_stats->TTOT_prio_1); + priv->pkstats.tx_prio[1][1] = be64_to_cpu(mlx4_en_stats->TOCT_prio_1); + priv->pkstats.tx_prio[2][0] = be64_to_cpu(mlx4_en_stats->TTOT_prio_2); + priv->pkstats.tx_prio[2][1] = be64_to_cpu(mlx4_en_stats->TOCT_prio_2); + priv->pkstats.tx_prio[3][0] = be64_to_cpu(mlx4_en_stats->TTOT_prio_3); + priv->pkstats.tx_prio[3][1] = be64_to_cpu(mlx4_en_stats->TOCT_prio_3); + priv->pkstats.tx_prio[4][0] = be64_to_cpu(mlx4_en_stats->TTOT_prio_4); + priv->pkstats.tx_prio[4][1] = be64_to_cpu(mlx4_en_stats->TOCT_prio_4); + priv->pkstats.tx_prio[5][0] = be64_to_cpu(mlx4_en_stats->TTOT_prio_5); + priv->pkstats.tx_prio[5][1] = be64_to_cpu(mlx4_en_stats->TOCT_prio_5); + priv->pkstats.tx_prio[6][0] = be64_to_cpu(mlx4_en_stats->TTOT_prio_6); + priv->pkstats.tx_prio[6][1] = be64_to_cpu(mlx4_en_stats->TOCT_prio_6); + priv->pkstats.tx_prio[7][0] = be64_to_cpu(mlx4_en_stats->TTOT_prio_7); + priv->pkstats.tx_prio[7][1] = be64_to_cpu(mlx4_en_stats->TOCT_prio_7); + priv->pkstats.tx_prio[8][0] = be64_to_cpu(mlx4_en_stats->TTOT_novlan); + priv->pkstats.tx_prio[8][1] = be64_to_cpu(mlx4_en_stats->TOCT_novlan); + + if (tmp_counter_stats.counter_mode == 0) { + priv->pf_stats.rx_bytes = be64_to_cpu(tmp_counter_stats.rx_bytes); + priv->pf_stats.tx_bytes = be64_to_cpu(tmp_counter_stats.tx_bytes); + priv->pf_stats.rx_packets = be64_to_cpu(tmp_counter_stats.rx_frames); + priv->pf_stats.tx_packets = be64_to_cpu(tmp_counter_stats.tx_frames); + } + + for (i = 0; i < MLX4_NUM_PRIORITIES; i++) { + priv->rx_priority_flowstats[i].rx_pause = + be64_to_cpu(flowstats[i].rx_pause); + priv->rx_priority_flowstats[i].rx_pause_duration = + be64_to_cpu(flowstats[i].rx_pause_duration); + priv->rx_priority_flowstats[i].rx_pause_transition = + be64_to_cpu(flowstats[i].rx_pause_transition); + priv->tx_priority_flowstats[i].tx_pause = + be64_to_cpu(flowstats[i].tx_pause); + priv->tx_priority_flowstats[i].tx_pause_duration = + be64_to_cpu(flowstats[i].tx_pause_duration); + priv->tx_priority_flowstats[i].tx_pause_transition = + be64_to_cpu(flowstats[i].tx_pause_transition); + } + + /* if pfc is not in use, all priorities counters have the same value */ + priv->rx_flowstats.rx_pause = + be64_to_cpu(flowstats[0].rx_pause); + priv->rx_flowstats.rx_pause_duration = + be64_to_cpu(flowstats[0].rx_pause_duration); + priv->rx_flowstats.rx_pause_transition = + be64_to_cpu(flowstats[0].rx_pause_transition); + priv->tx_flowstats.tx_pause = + be64_to_cpu(flowstats[0].tx_pause); + priv->tx_flowstats.tx_pause_duration = + be64_to_cpu(flowstats[0].tx_pause_duration); + priv->tx_flowstats.tx_pause_transition = + be64_to_cpu(flowstats[0].tx_pause_transition); + + spin_unlock_bh(&priv->stats_lock); + +out: + mlx4_free_cmd_mailbox(mdev->dev, mailbox); + mlx4_free_cmd_mailbox(mdev->dev, mailbox_priority); + return err; +} + diff --git a/drivers/net/ethernet/mellanox/mlx4/en_port.h b/drivers/net/ethernet/mellanox/mlx4/en_port.h new file mode 100644 index 000000000..930f961fe --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx4/en_port.h @@ -0,0 +1,586 @@ +/* + * Copyright (c) 2007 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#ifndef _MLX4_EN_PORT_H_ +#define _MLX4_EN_PORT_H_ + + +#define SET_PORT_PROMISC_SHIFT 31 +#define SET_PORT_MC_PROMISC_SHIFT 30 + +#define MLX4_EN_NUM_TC 8 + +#define VLAN_FLTR_SIZE 128 +struct mlx4_set_vlan_fltr_mbox { + __be32 entry[VLAN_FLTR_SIZE]; +}; + + +enum { + MLX4_MCAST_CONFIG = 0, + MLX4_MCAST_DISABLE = 1, + MLX4_MCAST_ENABLE = 2, +}; + +enum mlx4_link_mode { + MLX4_1000BASE_CX_SGMII = 0, + MLX4_1000BASE_KX = 1, + MLX4_10GBASE_CX4 = 2, + MLX4_10GBASE_KX4 = 3, + MLX4_10GBASE_KR = 4, + MLX4_20GBASE_KR2 = 5, + MLX4_40GBASE_CR4 = 6, + MLX4_40GBASE_KR4 = 7, + MLX4_56GBASE_KR4 = 8, + MLX4_10GBASE_CR = 12, + MLX4_10GBASE_SR = 13, + MLX4_40GBASE_SR4 = 15, + MLX4_56GBASE_CR4 = 17, + MLX4_56GBASE_SR4 = 18, + MLX4_100BASE_TX = 24, + MLX4_1000BASE_T = 25, + MLX4_10GBASE_T = 26, +}; + +#define MLX4_PROT_MASK(link_mode) (1<<link_mode) + +enum { + MLX4_EN_100M_SPEED = 0x04, + MLX4_EN_10G_SPEED_XAUI = 0x00, + MLX4_EN_10G_SPEED_XFI = 0x01, + MLX4_EN_1G_SPEED = 0x02, + MLX4_EN_20G_SPEED = 0x08, + MLX4_EN_40G_SPEED = 0x40, + MLX4_EN_56G_SPEED = 0x20, + MLX4_EN_OTHER_SPEED = 0x0f, +}; + +struct mlx4_en_query_port_context { + u8 link_up; +#define MLX4_EN_LINK_UP_MASK 0x80 +#define MLX4_EN_ANC_MASK 0x40 + u8 autoneg; +#define MLX4_EN_AUTONEG_MASK 0x80 + __be16 mtu; + u8 reserved2; + u8 link_speed; +#define MLX4_EN_SPEED_MASK 0x6f + u16 reserved3[5]; + __be64 mac; + u8 transceiver; +}; + + +struct mlx4_en_stat_out_mbox { + /* Received frames with a length of 64 octets */ + __be64 R64_prio_0; + __be64 R64_prio_1; + __be64 R64_prio_2; + __be64 R64_prio_3; + __be64 R64_prio_4; + __be64 R64_prio_5; + __be64 R64_prio_6; + __be64 R64_prio_7; + __be64 R64_novlan; + /* Received frames with a length of 127 octets */ + __be64 R127_prio_0; + __be64 R127_prio_1; + __be64 R127_prio_2; + __be64 R127_prio_3; + __be64 R127_prio_4; + __be64 R127_prio_5; + __be64 R127_prio_6; + __be64 R127_prio_7; + __be64 R127_novlan; + /* Received frames with a length of 255 octets */ + __be64 R255_prio_0; + __be64 R255_prio_1; + __be64 R255_prio_2; + __be64 R255_prio_3; + __be64 R255_prio_4; + __be64 R255_prio_5; + __be64 R255_prio_6; + __be64 R255_prio_7; + __be64 R255_novlan; + /* Received frames with a length of 511 octets */ + __be64 R511_prio_0; + __be64 R511_prio_1; + __be64 R511_prio_2; + __be64 R511_prio_3; + __be64 R511_prio_4; + __be64 R511_prio_5; + __be64 R511_prio_6; + __be64 R511_prio_7; + __be64 R511_novlan; + /* Received frames with a length of 1023 octets */ + __be64 R1023_prio_0; + __be64 R1023_prio_1; + __be64 R1023_prio_2; + __be64 R1023_prio_3; + __be64 R1023_prio_4; + __be64 R1023_prio_5; + __be64 R1023_prio_6; + __be64 R1023_prio_7; + __be64 R1023_novlan; + /* Received frames with a length of 1518 octets */ + __be64 R1518_prio_0; + __be64 R1518_prio_1; + __be64 R1518_prio_2; + __be64 R1518_prio_3; + __be64 R1518_prio_4; + __be64 R1518_prio_5; + __be64 R1518_prio_6; + __be64 R1518_prio_7; + __be64 R1518_novlan; + /* Received frames with a length of 1522 octets */ + __be64 R1522_prio_0; + __be64 R1522_prio_1; + __be64 R1522_prio_2; + __be64 R1522_prio_3; + __be64 R1522_prio_4; + __be64 R1522_prio_5; + __be64 R1522_prio_6; + __be64 R1522_prio_7; + __be64 R1522_novlan; + /* Received frames with a length of 1548 octets */ + __be64 R1548_prio_0; + __be64 R1548_prio_1; + __be64 R1548_prio_2; + __be64 R1548_prio_3; + __be64 R1548_prio_4; + __be64 R1548_prio_5; + __be64 R1548_prio_6; + __be64 R1548_prio_7; + __be64 R1548_novlan; + /* Received frames with a length of 1548 < octets < MTU */ + __be64 R2MTU_prio_0; + __be64 R2MTU_prio_1; + __be64 R2MTU_prio_2; + __be64 R2MTU_prio_3; + __be64 R2MTU_prio_4; + __be64 R2MTU_prio_5; + __be64 R2MTU_prio_6; + __be64 R2MTU_prio_7; + __be64 R2MTU_novlan; + /* Received frames with a length of MTU< octets and good CRC */ + __be64 RGIANT_prio_0; + __be64 RGIANT_prio_1; + __be64 RGIANT_prio_2; + __be64 RGIANT_prio_3; + __be64 RGIANT_prio_4; + __be64 RGIANT_prio_5; + __be64 RGIANT_prio_6; + __be64 RGIANT_prio_7; + __be64 RGIANT_novlan; + /* Received broadcast frames with good CRC */ + __be64 RBCAST_prio_0; + __be64 RBCAST_prio_1; + __be64 RBCAST_prio_2; + __be64 RBCAST_prio_3; + __be64 RBCAST_prio_4; + __be64 RBCAST_prio_5; + __be64 RBCAST_prio_6; + __be64 RBCAST_prio_7; + __be64 RBCAST_novlan; + /* Received multicast frames with good CRC */ + __be64 MCAST_prio_0; + __be64 MCAST_prio_1; + __be64 MCAST_prio_2; + __be64 MCAST_prio_3; + __be64 MCAST_prio_4; + __be64 MCAST_prio_5; + __be64 MCAST_prio_6; + __be64 MCAST_prio_7; + __be64 MCAST_novlan; + /* Received unicast not short or GIANT frames with good CRC */ + __be64 RTOTG_prio_0; + __be64 RTOTG_prio_1; + __be64 RTOTG_prio_2; + __be64 RTOTG_prio_3; + __be64 RTOTG_prio_4; + __be64 RTOTG_prio_5; + __be64 RTOTG_prio_6; + __be64 RTOTG_prio_7; + __be64 RTOTG_novlan; + + /* Count of total octets of received frames, includes framing characters */ + __be64 RTTLOCT_prio_0; + /* Count of total octets of received frames, not including framing + characters */ + __be64 RTTLOCT_NOFRM_prio_0; + /* Count of Total number of octets received + (only for frames without errors) */ + __be64 ROCT_prio_0; + + __be64 RTTLOCT_prio_1; + __be64 RTTLOCT_NOFRM_prio_1; + __be64 ROCT_prio_1; + + __be64 RTTLOCT_prio_2; + __be64 RTTLOCT_NOFRM_prio_2; + __be64 ROCT_prio_2; + + __be64 RTTLOCT_prio_3; + __be64 RTTLOCT_NOFRM_prio_3; + __be64 ROCT_prio_3; + + __be64 RTTLOCT_prio_4; + __be64 RTTLOCT_NOFRM_prio_4; + __be64 ROCT_prio_4; + + __be64 RTTLOCT_prio_5; + __be64 RTTLOCT_NOFRM_prio_5; + __be64 ROCT_prio_5; + + __be64 RTTLOCT_prio_6; + __be64 RTTLOCT_NOFRM_prio_6; + __be64 ROCT_prio_6; + + __be64 RTTLOCT_prio_7; + __be64 RTTLOCT_NOFRM_prio_7; + __be64 ROCT_prio_7; + + __be64 RTTLOCT_novlan; + __be64 RTTLOCT_NOFRM_novlan; + __be64 ROCT_novlan; + + /* Count of Total received frames including bad frames */ + __be64 RTOT_prio_0; + /* Count of Total number of received frames with 802.1Q encapsulation */ + __be64 R1Q_prio_0; + __be64 reserved1; + + __be64 RTOT_prio_1; + __be64 R1Q_prio_1; + __be64 reserved2; + + __be64 RTOT_prio_2; + __be64 R1Q_prio_2; + __be64 reserved3; + + __be64 RTOT_prio_3; + __be64 R1Q_prio_3; + __be64 reserved4; + + __be64 RTOT_prio_4; + __be64 R1Q_prio_4; + __be64 reserved5; + + __be64 RTOT_prio_5; + __be64 R1Q_prio_5; + __be64 reserved6; + + __be64 RTOT_prio_6; + __be64 R1Q_prio_6; + __be64 reserved7; + + __be64 RTOT_prio_7; + __be64 R1Q_prio_7; + __be64 reserved8; + + __be64 RTOT_novlan; + __be64 R1Q_novlan; + __be64 reserved9; + + /* Total number of Successfully Received Control Frames */ + __be64 RCNTL; + __be64 reserved10; + __be64 reserved11; + __be64 reserved12; + /* Count of received frames with a length/type field value between 46 + (42 for VLANtagged frames) and 1500 (also 1500 for VLAN-tagged frames), + inclusive */ + __be64 RInRangeLengthErr; + /* Count of received frames with length/type field between 1501 and 1535 + decimal, inclusive */ + __be64 ROutRangeLengthErr; + /* Count of received frames that are longer than max allowed size for + 802.3 frames (1518/1522) */ + __be64 RFrmTooLong; + /* Count frames received with PCS error */ + __be64 PCS; + + /* Transmit frames with a length of 64 octets */ + __be64 T64_prio_0; + __be64 T64_prio_1; + __be64 T64_prio_2; + __be64 T64_prio_3; + __be64 T64_prio_4; + __be64 T64_prio_5; + __be64 T64_prio_6; + __be64 T64_prio_7; + __be64 T64_novlan; + __be64 T64_loopbk; + /* Transmit frames with a length of 65 to 127 octets. */ + __be64 T127_prio_0; + __be64 T127_prio_1; + __be64 T127_prio_2; + __be64 T127_prio_3; + __be64 T127_prio_4; + __be64 T127_prio_5; + __be64 T127_prio_6; + __be64 T127_prio_7; + __be64 T127_novlan; + __be64 T127_loopbk; + /* Transmit frames with a length of 128 to 255 octets */ + __be64 T255_prio_0; + __be64 T255_prio_1; + __be64 T255_prio_2; + __be64 T255_prio_3; + __be64 T255_prio_4; + __be64 T255_prio_5; + __be64 T255_prio_6; + __be64 T255_prio_7; + __be64 T255_novlan; + __be64 T255_loopbk; + /* Transmit frames with a length of 256 to 511 octets */ + __be64 T511_prio_0; + __be64 T511_prio_1; + __be64 T511_prio_2; + __be64 T511_prio_3; + __be64 T511_prio_4; + __be64 T511_prio_5; + __be64 T511_prio_6; + __be64 T511_prio_7; + __be64 T511_novlan; + __be64 T511_loopbk; + /* Transmit frames with a length of 512 to 1023 octets */ + __be64 T1023_prio_0; + __be64 T1023_prio_1; + __be64 T1023_prio_2; + __be64 T1023_prio_3; + __be64 T1023_prio_4; + __be64 T1023_prio_5; + __be64 T1023_prio_6; + __be64 T1023_prio_7; + __be64 T1023_novlan; + __be64 T1023_loopbk; + /* Transmit frames with a length of 1024 to 1518 octets */ + __be64 T1518_prio_0; + __be64 T1518_prio_1; + __be64 T1518_prio_2; + __be64 T1518_prio_3; + __be64 T1518_prio_4; + __be64 T1518_prio_5; + __be64 T1518_prio_6; + __be64 T1518_prio_7; + __be64 T1518_novlan; + __be64 T1518_loopbk; + /* Counts transmit frames with a length of 1519 to 1522 bytes */ + __be64 T1522_prio_0; + __be64 T1522_prio_1; + __be64 T1522_prio_2; + __be64 T1522_prio_3; + __be64 T1522_prio_4; + __be64 T1522_prio_5; + __be64 T1522_prio_6; + __be64 T1522_prio_7; + __be64 T1522_novlan; + __be64 T1522_loopbk; + /* Transmit frames with a length of 1523 to 1548 octets */ + __be64 T1548_prio_0; + __be64 T1548_prio_1; + __be64 T1548_prio_2; + __be64 T1548_prio_3; + __be64 T1548_prio_4; + __be64 T1548_prio_5; + __be64 T1548_prio_6; + __be64 T1548_prio_7; + __be64 T1548_novlan; + __be64 T1548_loopbk; + /* Counts transmit frames with a length of 1549 to MTU bytes */ + __be64 T2MTU_prio_0; + __be64 T2MTU_prio_1; + __be64 T2MTU_prio_2; + __be64 T2MTU_prio_3; + __be64 T2MTU_prio_4; + __be64 T2MTU_prio_5; + __be64 T2MTU_prio_6; + __be64 T2MTU_prio_7; + __be64 T2MTU_novlan; + __be64 T2MTU_loopbk; + /* Transmit frames with a length greater than MTU octets and a good CRC. */ + __be64 TGIANT_prio_0; + __be64 TGIANT_prio_1; + __be64 TGIANT_prio_2; + __be64 TGIANT_prio_3; + __be64 TGIANT_prio_4; + __be64 TGIANT_prio_5; + __be64 TGIANT_prio_6; + __be64 TGIANT_prio_7; + __be64 TGIANT_novlan; + __be64 TGIANT_loopbk; + /* Transmit broadcast frames with a good CRC */ + __be64 TBCAST_prio_0; + __be64 TBCAST_prio_1; + __be64 TBCAST_prio_2; + __be64 TBCAST_prio_3; + __be64 TBCAST_prio_4; + __be64 TBCAST_prio_5; + __be64 TBCAST_prio_6; + __be64 TBCAST_prio_7; + __be64 TBCAST_novlan; + __be64 TBCAST_loopbk; + /* Transmit multicast frames with a good CRC */ + __be64 TMCAST_prio_0; + __be64 TMCAST_prio_1; + __be64 TMCAST_prio_2; + __be64 TMCAST_prio_3; + __be64 TMCAST_prio_4; + __be64 TMCAST_prio_5; + __be64 TMCAST_prio_6; + __be64 TMCAST_prio_7; + __be64 TMCAST_novlan; + __be64 TMCAST_loopbk; + /* Transmit good frames that are neither broadcast nor multicast */ + __be64 TTOTG_prio_0; + __be64 TTOTG_prio_1; + __be64 TTOTG_prio_2; + __be64 TTOTG_prio_3; + __be64 TTOTG_prio_4; + __be64 TTOTG_prio_5; + __be64 TTOTG_prio_6; + __be64 TTOTG_prio_7; + __be64 TTOTG_novlan; + __be64 TTOTG_loopbk; + + /* total octets of transmitted frames, including framing characters */ + __be64 TTTLOCT_prio_0; + /* total octets of transmitted frames, not including framing characters */ + __be64 TTTLOCT_NOFRM_prio_0; + /* ifOutOctets */ + __be64 TOCT_prio_0; + + __be64 TTTLOCT_prio_1; + __be64 TTTLOCT_NOFRM_prio_1; + __be64 TOCT_prio_1; + + __be64 TTTLOCT_prio_2; + __be64 TTTLOCT_NOFRM_prio_2; + __be64 TOCT_prio_2; + + __be64 TTTLOCT_prio_3; + __be64 TTTLOCT_NOFRM_prio_3; + __be64 TOCT_prio_3; + + __be64 TTTLOCT_prio_4; + __be64 TTTLOCT_NOFRM_prio_4; + __be64 TOCT_prio_4; + + __be64 TTTLOCT_prio_5; + __be64 TTTLOCT_NOFRM_prio_5; + __be64 TOCT_prio_5; + + __be64 TTTLOCT_prio_6; + __be64 TTTLOCT_NOFRM_prio_6; + __be64 TOCT_prio_6; + + __be64 TTTLOCT_prio_7; + __be64 TTTLOCT_NOFRM_prio_7; + __be64 TOCT_prio_7; + + __be64 TTTLOCT_novlan; + __be64 TTTLOCT_NOFRM_novlan; + __be64 TOCT_novlan; + + __be64 TTTLOCT_loopbk; + __be64 TTTLOCT_NOFRM_loopbk; + __be64 TOCT_loopbk; + + /* Total frames transmitted with a good CRC that are not aborted */ + __be64 TTOT_prio_0; + /* Total number of frames transmitted with 802.1Q encapsulation */ + __be64 T1Q_prio_0; + __be64 reserved13; + + __be64 TTOT_prio_1; + __be64 T1Q_prio_1; + __be64 reserved14; + + __be64 TTOT_prio_2; + __be64 T1Q_prio_2; + __be64 reserved15; + + __be64 TTOT_prio_3; + __be64 T1Q_prio_3; + __be64 reserved16; + + __be64 TTOT_prio_4; + __be64 T1Q_prio_4; + __be64 reserved17; + + __be64 TTOT_prio_5; + __be64 T1Q_prio_5; + __be64 reserved18; + + __be64 TTOT_prio_6; + __be64 T1Q_prio_6; + __be64 reserved19; + + __be64 TTOT_prio_7; + __be64 T1Q_prio_7; + __be64 reserved20; + + __be64 TTOT_novlan; + __be64 T1Q_novlan; + __be64 reserved21; + + __be64 TTOT_loopbk; + __be64 T1Q_loopbk; + __be64 reserved22; + + /* Received frames with a length greater than MTU octets and a bad CRC */ + __be32 RJBBR; + /* Received frames with a bad CRC that are not runts, jabbers, + or alignment errors */ + __be32 RCRC; + /* Received frames with SFD with a length of less than 64 octets and a + bad CRC */ + __be32 RRUNT; + /* Received frames with a length less than 64 octets and a good CRC */ + __be32 RSHORT; + /* Total Number of Received Packets Dropped */ + __be32 RDROP; + /* Drop due to overflow */ + __be32 RdropOvflw; + /* Drop due to overflow */ + __be32 RdropLength; + /* Total of good frames. Does not include frames received with + frame-too-long, FCS, or length errors */ + __be32 RTOTFRMS; + /* Total dropped Xmited packets */ + __be32 TDROP; +}; + + +#endif diff --git a/drivers/net/ethernet/mellanox/mlx4/en_resources.c b/drivers/net/ethernet/mellanox/mlx4/en_resources.c new file mode 100644 index 000000000..6883ac75d --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx4/en_resources.c @@ -0,0 +1,115 @@ +/* + * Copyright (c) 2007 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#include <linux/slab.h> +#include <linux/vmalloc.h> +#include <linux/mlx4/qp.h> + +#include "mlx4_en.h" + +void mlx4_en_fill_qp_context(struct mlx4_en_priv *priv, int size, int stride, + int is_tx, int rss, int qpn, int cqn, + int user_prio, struct mlx4_qp_context *context) +{ + struct mlx4_en_dev *mdev = priv->mdev; + struct net_device *dev = priv->dev; + + memset(context, 0, sizeof(*context)); + context->flags = cpu_to_be32(7 << 16 | rss << MLX4_RSS_QPC_FLAG_OFFSET); + context->pd = cpu_to_be32(mdev->priv_pdn); + context->mtu_msgmax = 0xff; + if (!is_tx && !rss) + context->rq_size_stride = ilog2(size) << 3 | (ilog2(stride) - 4); + if (is_tx) { + context->sq_size_stride = ilog2(size) << 3 | (ilog2(stride) - 4); + if (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_PORT_REMAP) + context->params2 |= cpu_to_be32(MLX4_QP_BIT_FPP); + + } else { + context->sq_size_stride = ilog2(TXBB_SIZE) - 4; + } + context->usr_page = cpu_to_be32(mlx4_to_hw_uar_index(mdev->dev, + mdev->priv_uar.index)); + context->local_qpn = cpu_to_be32(qpn); + context->pri_path.ackto = 1 & 0x07; + context->pri_path.sched_queue = 0x83 | (priv->port - 1) << 6; + /* force user priority per tx ring */ + if (user_prio >= 0 && priv->prof->num_up == MLX4_EN_NUM_UP_HIGH) { + context->pri_path.sched_queue |= user_prio << 3; + context->pri_path.feup = MLX4_FEUP_FORCE_ETH_UP; + } + context->pri_path.counter_index = priv->counter_index; + context->cqn_send = cpu_to_be32(cqn); + context->cqn_recv = cpu_to_be32(cqn); + if (!rss && + (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_LB_SRC_CHK) && + context->pri_path.counter_index != + MLX4_SINK_COUNTER_INDEX(mdev->dev)) { + /* disable multicast loopback to qp with same counter */ + if (!(dev->features & NETIF_F_LOOPBACK)) + context->pri_path.fl |= MLX4_FL_ETH_SRC_CHECK_MC_LB; + context->pri_path.control |= MLX4_CTRL_ETH_SRC_CHECK_IF_COUNTER; + } + context->db_rec_addr = cpu_to_be64(priv->res.db.dma << 2); + if (!(dev->features & NETIF_F_HW_VLAN_CTAG_RX)) + context->param3 |= cpu_to_be32(1 << 30); + + if (!is_tx && !rss && + (mdev->dev->caps.tunnel_offload_mode == MLX4_TUNNEL_OFFLOAD_MODE_VXLAN)) { + en_dbg(HW, priv, "Setting RX qp %x tunnel mode to RX tunneled & non-tunneled\n", qpn); + context->srqn = cpu_to_be32(7 << 28); /* this fills bits 30:28 */ + } +} + +int mlx4_en_change_mcast_lb(struct mlx4_en_priv *priv, struct mlx4_qp *qp, + int loopback) +{ + int ret; + struct mlx4_update_qp_params qp_params; + + memset(&qp_params, 0, sizeof(qp_params)); + if (!loopback) + qp_params.flags = MLX4_UPDATE_QP_PARAMS_FLAGS_ETH_CHECK_MC_LB; + + ret = mlx4_update_qp(priv->mdev->dev, qp->qpn, + MLX4_UPDATE_QP_ETH_SRC_CHECK_MC_LB, + &qp_params); + + return ret; +} + +void mlx4_en_sqp_event(struct mlx4_qp *qp, enum mlx4_event event) +{ + return; +} + diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c new file mode 100644 index 000000000..8f762fc17 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -0,0 +1,1285 @@ +/* + * Copyright (c) 2007 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#include <linux/bpf.h> +#include <linux/bpf_trace.h> +#include <linux/mlx4/cq.h> +#include <linux/slab.h> +#include <linux/mlx4/qp.h> +#include <linux/skbuff.h> +#include <linux/rculist.h> +#include <linux/if_ether.h> +#include <linux/if_vlan.h> +#include <linux/vmalloc.h> +#include <linux/irq.h> + +#include <net/ip.h> +#if IS_ENABLED(CONFIG_IPV6) +#include <net/ip6_checksum.h> +#endif + +#include "mlx4_en.h" + +static int mlx4_alloc_page(struct mlx4_en_priv *priv, + struct mlx4_en_rx_alloc *frag, + gfp_t gfp) +{ + struct page *page; + dma_addr_t dma; + + page = alloc_page(gfp); + if (unlikely(!page)) + return -ENOMEM; + dma = dma_map_page(priv->ddev, page, 0, PAGE_SIZE, priv->dma_dir); + if (unlikely(dma_mapping_error(priv->ddev, dma))) { + __free_page(page); + return -ENOMEM; + } + frag->page = page; + frag->dma = dma; + frag->page_offset = priv->rx_headroom; + return 0; +} + +static int mlx4_en_alloc_frags(struct mlx4_en_priv *priv, + struct mlx4_en_rx_ring *ring, + struct mlx4_en_rx_desc *rx_desc, + struct mlx4_en_rx_alloc *frags, + gfp_t gfp) +{ + int i; + + for (i = 0; i < priv->num_frags; i++, frags++) { + if (!frags->page) { + if (mlx4_alloc_page(priv, frags, gfp)) + return -ENOMEM; + ring->rx_alloc_pages++; + } + rx_desc->data[i].addr = cpu_to_be64(frags->dma + + frags->page_offset); + } + return 0; +} + +static void mlx4_en_free_frag(const struct mlx4_en_priv *priv, + struct mlx4_en_rx_alloc *frag) +{ + if (frag->page) { + dma_unmap_page(priv->ddev, frag->dma, + PAGE_SIZE, priv->dma_dir); + __free_page(frag->page); + } + /* We need to clear all fields, otherwise a change of priv->log_rx_info + * could lead to see garbage later in frag->page. + */ + memset(frag, 0, sizeof(*frag)); +} + +static void mlx4_en_init_rx_desc(const struct mlx4_en_priv *priv, + struct mlx4_en_rx_ring *ring, int index) +{ + struct mlx4_en_rx_desc *rx_desc = ring->buf + ring->stride * index; + int possible_frags; + int i; + + /* Set size and memtype fields */ + for (i = 0; i < priv->num_frags; i++) { + rx_desc->data[i].byte_count = + cpu_to_be32(priv->frag_info[i].frag_size); + rx_desc->data[i].lkey = cpu_to_be32(priv->mdev->mr.key); + } + + /* If the number of used fragments does not fill up the ring stride, + * remaining (unused) fragments must be padded with null address/size + * and a special memory key */ + possible_frags = (ring->stride - sizeof(struct mlx4_en_rx_desc)) / DS_SIZE; + for (i = priv->num_frags; i < possible_frags; i++) { + rx_desc->data[i].byte_count = 0; + rx_desc->data[i].lkey = cpu_to_be32(MLX4_EN_MEMTYPE_PAD); + rx_desc->data[i].addr = 0; + } +} + +static int mlx4_en_prepare_rx_desc(struct mlx4_en_priv *priv, + struct mlx4_en_rx_ring *ring, int index, + gfp_t gfp) +{ + struct mlx4_en_rx_desc *rx_desc = ring->buf + + (index << ring->log_stride); + struct mlx4_en_rx_alloc *frags = ring->rx_info + + (index << priv->log_rx_info); + if (likely(ring->page_cache.index > 0)) { + /* XDP uses a single page per frame */ + if (!frags->page) { + ring->page_cache.index--; + frags->page = ring->page_cache.buf[ring->page_cache.index].page; + frags->dma = ring->page_cache.buf[ring->page_cache.index].dma; + } + frags->page_offset = XDP_PACKET_HEADROOM; + rx_desc->data[0].addr = cpu_to_be64(frags->dma + + XDP_PACKET_HEADROOM); + return 0; + } + + return mlx4_en_alloc_frags(priv, ring, rx_desc, frags, gfp); +} + +static bool mlx4_en_is_ring_empty(const struct mlx4_en_rx_ring *ring) +{ + return ring->prod == ring->cons; +} + +static inline void mlx4_en_update_rx_prod_db(struct mlx4_en_rx_ring *ring) +{ + *ring->wqres.db.db = cpu_to_be32(ring->prod & 0xffff); +} + +/* slow path */ +static void mlx4_en_free_rx_desc(const struct mlx4_en_priv *priv, + struct mlx4_en_rx_ring *ring, + int index) +{ + struct mlx4_en_rx_alloc *frags; + int nr; + + frags = ring->rx_info + (index << priv->log_rx_info); + for (nr = 0; nr < priv->num_frags; nr++) { + en_dbg(DRV, priv, "Freeing fragment:%d\n", nr); + mlx4_en_free_frag(priv, frags + nr); + } +} + +/* Function not in fast-path */ +static int mlx4_en_fill_rx_buffers(struct mlx4_en_priv *priv) +{ + struct mlx4_en_rx_ring *ring; + int ring_ind; + int buf_ind; + int new_size; + + for (buf_ind = 0; buf_ind < priv->prof->rx_ring_size; buf_ind++) { + for (ring_ind = 0; ring_ind < priv->rx_ring_num; ring_ind++) { + ring = priv->rx_ring[ring_ind]; + + if (mlx4_en_prepare_rx_desc(priv, ring, + ring->actual_size, + GFP_KERNEL)) { + if (ring->actual_size < MLX4_EN_MIN_RX_SIZE) { + en_err(priv, "Failed to allocate enough rx buffers\n"); + return -ENOMEM; + } else { + new_size = rounddown_pow_of_two(ring->actual_size); + en_warn(priv, "Only %d buffers allocated reducing ring size to %d\n", + ring->actual_size, new_size); + goto reduce_rings; + } + } + ring->actual_size++; + ring->prod++; + } + } + return 0; + +reduce_rings: + for (ring_ind = 0; ring_ind < priv->rx_ring_num; ring_ind++) { + ring = priv->rx_ring[ring_ind]; + while (ring->actual_size > new_size) { + ring->actual_size--; + ring->prod--; + mlx4_en_free_rx_desc(priv, ring, ring->actual_size); + } + } + + return 0; +} + +static void mlx4_en_free_rx_buf(struct mlx4_en_priv *priv, + struct mlx4_en_rx_ring *ring) +{ + int index; + + en_dbg(DRV, priv, "Freeing Rx buf - cons:%d prod:%d\n", + ring->cons, ring->prod); + + /* Unmap and free Rx buffers */ + for (index = 0; index < ring->size; index++) { + en_dbg(DRV, priv, "Processing descriptor:%d\n", index); + mlx4_en_free_rx_desc(priv, ring, index); + } + ring->cons = 0; + ring->prod = 0; +} + +void mlx4_en_set_num_rx_rings(struct mlx4_en_dev *mdev) +{ + int i; + int num_of_eqs; + int num_rx_rings; + struct mlx4_dev *dev = mdev->dev; + + mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_ETH) { + num_of_eqs = max_t(int, MIN_RX_RINGS, + min_t(int, + mlx4_get_eqs_per_port(mdev->dev, i), + DEF_RX_RINGS)); + + num_rx_rings = mlx4_low_memory_profile() ? MIN_RX_RINGS : + min_t(int, num_of_eqs, num_online_cpus()); + mdev->profile.prof[i].rx_ring_num = + rounddown_pow_of_two(num_rx_rings); + } +} + +int mlx4_en_create_rx_ring(struct mlx4_en_priv *priv, + struct mlx4_en_rx_ring **pring, + u32 size, u16 stride, int node, int queue_index) +{ + struct mlx4_en_dev *mdev = priv->mdev; + struct mlx4_en_rx_ring *ring; + int err = -ENOMEM; + int tmp; + + ring = kzalloc_node(sizeof(*ring), GFP_KERNEL, node); + if (!ring) { + en_err(priv, "Failed to allocate RX ring structure\n"); + return -ENOMEM; + } + + ring->prod = 0; + ring->cons = 0; + ring->size = size; + ring->size_mask = size - 1; + ring->stride = stride; + ring->log_stride = ffs(ring->stride) - 1; + ring->buf_size = ring->size * ring->stride + TXBB_SIZE; + + if (xdp_rxq_info_reg(&ring->xdp_rxq, priv->dev, queue_index, 0) < 0) + goto err_ring; + + tmp = size * roundup_pow_of_two(MLX4_EN_MAX_RX_FRAGS * + sizeof(struct mlx4_en_rx_alloc)); + ring->rx_info = kvzalloc_node(tmp, GFP_KERNEL, node); + if (!ring->rx_info) { + err = -ENOMEM; + goto err_xdp_info; + } + + en_dbg(DRV, priv, "Allocated rx_info ring at addr:%p size:%d\n", + ring->rx_info, tmp); + + /* Allocate HW buffers on provided NUMA node */ + set_dev_node(&mdev->dev->persist->pdev->dev, node); + err = mlx4_alloc_hwq_res(mdev->dev, &ring->wqres, ring->buf_size); + set_dev_node(&mdev->dev->persist->pdev->dev, mdev->dev->numa_node); + if (err) + goto err_info; + + ring->buf = ring->wqres.buf.direct.buf; + + ring->hwtstamp_rx_filter = priv->hwtstamp_config.rx_filter; + + *pring = ring; + return 0; + +err_info: + kvfree(ring->rx_info); + ring->rx_info = NULL; +err_xdp_info: + xdp_rxq_info_unreg(&ring->xdp_rxq); +err_ring: + kfree(ring); + *pring = NULL; + + return err; +} + +int mlx4_en_activate_rx_rings(struct mlx4_en_priv *priv) +{ + struct mlx4_en_rx_ring *ring; + int i; + int ring_ind; + int err; + int stride = roundup_pow_of_two(sizeof(struct mlx4_en_rx_desc) + + DS_SIZE * priv->num_frags); + + for (ring_ind = 0; ring_ind < priv->rx_ring_num; ring_ind++) { + ring = priv->rx_ring[ring_ind]; + + ring->prod = 0; + ring->cons = 0; + ring->actual_size = 0; + ring->cqn = priv->rx_cq[ring_ind]->mcq.cqn; + + ring->stride = stride; + if (ring->stride <= TXBB_SIZE) { + /* Stamp first unused send wqe */ + __be32 *ptr = (__be32 *)ring->buf; + __be32 stamp = cpu_to_be32(1 << STAMP_SHIFT); + *ptr = stamp; + /* Move pointer to start of rx section */ + ring->buf += TXBB_SIZE; + } + + ring->log_stride = ffs(ring->stride) - 1; + ring->buf_size = ring->size * ring->stride; + + memset(ring->buf, 0, ring->buf_size); + mlx4_en_update_rx_prod_db(ring); + + /* Initialize all descriptors */ + for (i = 0; i < ring->size; i++) + mlx4_en_init_rx_desc(priv, ring, i); + } + err = mlx4_en_fill_rx_buffers(priv); + if (err) + goto err_buffers; + + for (ring_ind = 0; ring_ind < priv->rx_ring_num; ring_ind++) { + ring = priv->rx_ring[ring_ind]; + + ring->size_mask = ring->actual_size - 1; + mlx4_en_update_rx_prod_db(ring); + } + + return 0; + +err_buffers: + for (ring_ind = 0; ring_ind < priv->rx_ring_num; ring_ind++) + mlx4_en_free_rx_buf(priv, priv->rx_ring[ring_ind]); + + ring_ind = priv->rx_ring_num - 1; + while (ring_ind >= 0) { + if (priv->rx_ring[ring_ind]->stride <= TXBB_SIZE) + priv->rx_ring[ring_ind]->buf -= TXBB_SIZE; + ring_ind--; + } + return err; +} + +/* We recover from out of memory by scheduling our napi poll + * function (mlx4_en_process_cq), which tries to allocate + * all missing RX buffers (call to mlx4_en_refill_rx_buffers). + */ +void mlx4_en_recover_from_oom(struct mlx4_en_priv *priv) +{ + int ring; + + if (!priv->port_up) + return; + + for (ring = 0; ring < priv->rx_ring_num; ring++) { + if (mlx4_en_is_ring_empty(priv->rx_ring[ring])) { + local_bh_disable(); + napi_reschedule(&priv->rx_cq[ring]->napi); + local_bh_enable(); + } + } +} + +/* When the rx ring is running in page-per-packet mode, a released frame can go + * directly into a small cache, to avoid unmapping or touching the page + * allocator. In bpf prog performance scenarios, buffers are either forwarded + * or dropped, never converted to skbs, so every page can come directly from + * this cache when it is sized to be a multiple of the napi budget. + */ +bool mlx4_en_rx_recycle(struct mlx4_en_rx_ring *ring, + struct mlx4_en_rx_alloc *frame) +{ + struct mlx4_en_page_cache *cache = &ring->page_cache; + + if (cache->index >= MLX4_EN_CACHE_SIZE) + return false; + + cache->buf[cache->index].page = frame->page; + cache->buf[cache->index].dma = frame->dma; + cache->index++; + return true; +} + +void mlx4_en_destroy_rx_ring(struct mlx4_en_priv *priv, + struct mlx4_en_rx_ring **pring, + u32 size, u16 stride) +{ + struct mlx4_en_dev *mdev = priv->mdev; + struct mlx4_en_rx_ring *ring = *pring; + struct bpf_prog *old_prog; + + old_prog = rcu_dereference_protected( + ring->xdp_prog, + lockdep_is_held(&mdev->state_lock)); + if (old_prog) + bpf_prog_put(old_prog); + xdp_rxq_info_unreg(&ring->xdp_rxq); + mlx4_free_hwq_res(mdev->dev, &ring->wqres, size * stride + TXBB_SIZE); + kvfree(ring->rx_info); + ring->rx_info = NULL; + kfree(ring); + *pring = NULL; +} + +void mlx4_en_deactivate_rx_ring(struct mlx4_en_priv *priv, + struct mlx4_en_rx_ring *ring) +{ + int i; + + for (i = 0; i < ring->page_cache.index; i++) { + dma_unmap_page(priv->ddev, ring->page_cache.buf[i].dma, + PAGE_SIZE, priv->dma_dir); + put_page(ring->page_cache.buf[i].page); + } + ring->page_cache.index = 0; + mlx4_en_free_rx_buf(priv, ring); + if (ring->stride <= TXBB_SIZE) + ring->buf -= TXBB_SIZE; +} + + +static int mlx4_en_complete_rx_desc(struct mlx4_en_priv *priv, + struct mlx4_en_rx_alloc *frags, + struct sk_buff *skb, + int length) +{ + const struct mlx4_en_frag_info *frag_info = priv->frag_info; + unsigned int truesize = 0; + bool release = true; + int nr, frag_size; + struct page *page; + dma_addr_t dma; + + /* Collect used fragments while replacing them in the HW descriptors */ + for (nr = 0;; frags++) { + frag_size = min_t(int, length, frag_info->frag_size); + + page = frags->page; + if (unlikely(!page)) + goto fail; + + dma = frags->dma; + dma_sync_single_range_for_cpu(priv->ddev, dma, frags->page_offset, + frag_size, priv->dma_dir); + + __skb_fill_page_desc(skb, nr, page, frags->page_offset, + frag_size); + + truesize += frag_info->frag_stride; + if (frag_info->frag_stride == PAGE_SIZE / 2) { + frags->page_offset ^= PAGE_SIZE / 2; + release = page_count(page) != 1 || + page_is_pfmemalloc(page) || + page_to_nid(page) != numa_mem_id(); + } else if (!priv->rx_headroom) { + /* rx_headroom for non XDP setup is always 0. + * When XDP is set, the above condition will + * guarantee page is always released. + */ + u32 sz_align = ALIGN(frag_size, SMP_CACHE_BYTES); + + frags->page_offset += sz_align; + release = frags->page_offset + frag_info->frag_size > PAGE_SIZE; + } + if (release) { + dma_unmap_page(priv->ddev, dma, PAGE_SIZE, priv->dma_dir); + frags->page = NULL; + } else { + page_ref_inc(page); + } + + nr++; + length -= frag_size; + if (!length) + break; + frag_info++; + } + skb->truesize += truesize; + return nr; + +fail: + while (nr > 0) { + nr--; + __skb_frag_unref(skb_shinfo(skb)->frags + nr, false); + } + return 0; +} + +static void validate_loopback(struct mlx4_en_priv *priv, void *va) +{ + const unsigned char *data = va + ETH_HLEN; + int i; + + for (i = 0; i < MLX4_LOOPBACK_TEST_PAYLOAD; i++) { + if (data[i] != (unsigned char)i) + return; + } + /* Loopback found */ + priv->loopback_ok = 1; +} + +static void mlx4_en_refill_rx_buffers(struct mlx4_en_priv *priv, + struct mlx4_en_rx_ring *ring) +{ + u32 missing = ring->actual_size - (ring->prod - ring->cons); + + /* Try to batch allocations, but not too much. */ + if (missing < 8) + return; + do { + if (mlx4_en_prepare_rx_desc(priv, ring, + ring->prod & ring->size_mask, + GFP_ATOMIC | __GFP_MEMALLOC)) + break; + ring->prod++; + } while (likely(--missing)); + + mlx4_en_update_rx_prod_db(ring); +} + +/* When hardware doesn't strip the vlan, we need to calculate the checksum + * over it and add it to the hardware's checksum calculation + */ +static inline __wsum get_fixed_vlan_csum(__wsum hw_checksum, + struct vlan_hdr *vlanh) +{ + return csum_add(hw_checksum, *(__wsum *)vlanh); +} + +/* Although the stack expects checksum which doesn't include the pseudo + * header, the HW adds it. To address that, we are subtracting the pseudo + * header checksum from the checksum value provided by the HW. + */ +static int get_fixed_ipv4_csum(__wsum hw_checksum, struct sk_buff *skb, + struct iphdr *iph) +{ + __u16 length_for_csum = 0; + __wsum csum_pseudo_header = 0; + __u8 ipproto = iph->protocol; + + if (unlikely(ipproto == IPPROTO_SCTP)) + return -1; + + length_for_csum = (be16_to_cpu(iph->tot_len) - (iph->ihl << 2)); + csum_pseudo_header = csum_tcpudp_nofold(iph->saddr, iph->daddr, + length_for_csum, ipproto, 0); + skb->csum = csum_sub(hw_checksum, csum_pseudo_header); + return 0; +} + +#if IS_ENABLED(CONFIG_IPV6) +/* In IPv6 packets, hw_checksum lacks 6 bytes from IPv6 header: + * 4 first bytes : priority, version, flow_lbl + * and 2 additional bytes : nexthdr, hop_limit. + */ +static int get_fixed_ipv6_csum(__wsum hw_checksum, struct sk_buff *skb, + struct ipv6hdr *ipv6h) +{ + __u8 nexthdr = ipv6h->nexthdr; + __wsum temp; + + if (unlikely(nexthdr == IPPROTO_FRAGMENT || + nexthdr == IPPROTO_HOPOPTS || + nexthdr == IPPROTO_SCTP)) + return -1; + + /* priority, version, flow_lbl */ + temp = csum_add(hw_checksum, *(__wsum *)ipv6h); + /* nexthdr and hop_limit */ + skb->csum = csum_add(temp, (__force __wsum)*(__be16 *)&ipv6h->nexthdr); + return 0; +} +#endif + +#define short_frame(size) ((size) <= ETH_ZLEN + ETH_FCS_LEN) + +/* We reach this function only after checking that any of + * the (IPv4 | IPv6) bits are set in cqe->status. + */ +static int check_csum(struct mlx4_cqe *cqe, struct sk_buff *skb, void *va, + netdev_features_t dev_features) +{ + __wsum hw_checksum = 0; + void *hdr; + + /* CQE csum doesn't cover padding octets in short ethernet + * frames. And the pad field is appended prior to calculating + * and appending the FCS field. + * + * Detecting these padded frames requires to verify and parse + * IP headers, so we simply force all those small frames to skip + * checksum complete. + */ + if (short_frame(skb->len)) + return -EINVAL; + + hdr = (u8 *)va + sizeof(struct ethhdr); + hw_checksum = csum_unfold((__force __sum16)cqe->checksum); + + if (cqe->vlan_my_qpn & cpu_to_be32(MLX4_CQE_CVLAN_PRESENT_MASK) && + !(dev_features & NETIF_F_HW_VLAN_CTAG_RX)) { + hw_checksum = get_fixed_vlan_csum(hw_checksum, hdr); + hdr += sizeof(struct vlan_hdr); + } + +#if IS_ENABLED(CONFIG_IPV6) + if (cqe->status & cpu_to_be16(MLX4_CQE_STATUS_IPV6)) + return get_fixed_ipv6_csum(hw_checksum, skb, hdr); +#endif + return get_fixed_ipv4_csum(hw_checksum, skb, hdr); +} + +#if IS_ENABLED(CONFIG_IPV6) +#define MLX4_CQE_STATUS_IP_ANY (MLX4_CQE_STATUS_IPV4 | MLX4_CQE_STATUS_IPV6) +#else +#define MLX4_CQE_STATUS_IP_ANY (MLX4_CQE_STATUS_IPV4) +#endif + +int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int budget) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + int factor = priv->cqe_factor; + struct mlx4_en_rx_ring *ring; + struct bpf_prog *xdp_prog; + int cq_ring = cq->ring; + bool doorbell_pending; + bool xdp_redir_flush; + struct mlx4_cqe *cqe; + struct xdp_buff xdp; + int polled = 0; + int index; + + if (unlikely(!priv->port_up || budget <= 0)) + return 0; + + ring = priv->rx_ring[cq_ring]; + + xdp_prog = rcu_dereference_bh(ring->xdp_prog); + xdp_init_buff(&xdp, priv->frag_info[0].frag_stride, &ring->xdp_rxq); + doorbell_pending = false; + xdp_redir_flush = false; + + /* We assume a 1:1 mapping between CQEs and Rx descriptors, so Rx + * descriptor offset can be deduced from the CQE index instead of + * reading 'cqe->index' */ + index = cq->mcq.cons_index & ring->size_mask; + cqe = mlx4_en_get_cqe(cq->buf, index, priv->cqe_size) + factor; + + /* Process all completed CQEs */ + while (XNOR(cqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK, + cq->mcq.cons_index & cq->size)) { + struct mlx4_en_rx_alloc *frags; + enum pkt_hash_types hash_type; + struct sk_buff *skb; + unsigned int length; + int ip_summed; + void *va; + int nr; + + frags = ring->rx_info + (index << priv->log_rx_info); + va = page_address(frags[0].page) + frags[0].page_offset; + net_prefetchw(va); + /* + * make sure we read the CQE after we read the ownership bit + */ + dma_rmb(); + + /* Drop packet on bad receive or bad checksum */ + if (unlikely((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) == + MLX4_CQE_OPCODE_ERROR)) { + en_err(priv, "CQE completed in error - vendor syndrom:%d syndrom:%d\n", + ((struct mlx4_err_cqe *)cqe)->vendor_err_syndrome, + ((struct mlx4_err_cqe *)cqe)->syndrome); + goto next; + } + if (unlikely(cqe->badfcs_enc & MLX4_CQE_BAD_FCS)) { + en_dbg(RX_ERR, priv, "Accepted frame with bad FCS\n"); + goto next; + } + + /* Check if we need to drop the packet if SRIOV is not enabled + * and not performing the selftest or flb disabled + */ + if (priv->flags & MLX4_EN_FLAG_RX_FILTER_NEEDED) { + const struct ethhdr *ethh = va; + dma_addr_t dma; + /* Get pointer to first fragment since we haven't + * skb yet and cast it to ethhdr struct + */ + dma = frags[0].dma + frags[0].page_offset; + dma_sync_single_for_cpu(priv->ddev, dma, sizeof(*ethh), + DMA_FROM_DEVICE); + + if (is_multicast_ether_addr(ethh->h_dest)) { + struct mlx4_mac_entry *entry; + struct hlist_head *bucket; + unsigned int mac_hash; + + /* Drop the packet, since HW loopback-ed it */ + mac_hash = ethh->h_source[MLX4_EN_MAC_HASH_IDX]; + bucket = &priv->mac_hash[mac_hash]; + hlist_for_each_entry_rcu_bh(entry, bucket, hlist) { + if (ether_addr_equal_64bits(entry->mac, + ethh->h_source)) + goto next; + } + } + } + + if (unlikely(priv->validate_loopback)) { + validate_loopback(priv, va); + goto next; + } + + /* + * Packet is OK - process it. + */ + length = be32_to_cpu(cqe->byte_cnt); + length -= ring->fcs_del; + + /* A bpf program gets first chance to drop the packet. It may + * read bytes but not past the end of the frag. + */ + if (xdp_prog) { + dma_addr_t dma; + void *orig_data; + u32 act; + + dma = frags[0].dma + frags[0].page_offset; + dma_sync_single_for_cpu(priv->ddev, dma, + priv->frag_info[0].frag_size, + DMA_FROM_DEVICE); + + xdp_prepare_buff(&xdp, va - frags[0].page_offset, + frags[0].page_offset, length, false); + orig_data = xdp.data; + + act = bpf_prog_run_xdp(xdp_prog, &xdp); + + length = xdp.data_end - xdp.data; + if (xdp.data != orig_data) { + frags[0].page_offset = xdp.data - + xdp.data_hard_start; + va = xdp.data; + } + + switch (act) { + case XDP_PASS: + break; + case XDP_REDIRECT: + if (likely(!xdp_do_redirect(dev, &xdp, xdp_prog))) { + ring->xdp_redirect++; + xdp_redir_flush = true; + frags[0].page = NULL; + goto next; + } + ring->xdp_redirect_fail++; + trace_xdp_exception(dev, xdp_prog, act); + goto xdp_drop_no_cnt; + case XDP_TX: + if (likely(!mlx4_en_xmit_frame(ring, frags, priv, + length, cq_ring, + &doorbell_pending))) { + frags[0].page = NULL; + goto next; + } + trace_xdp_exception(dev, xdp_prog, act); + goto xdp_drop_no_cnt; /* Drop on xmit failure */ + default: + bpf_warn_invalid_xdp_action(dev, xdp_prog, act); + fallthrough; + case XDP_ABORTED: + trace_xdp_exception(dev, xdp_prog, act); + fallthrough; + case XDP_DROP: + ring->xdp_drop++; +xdp_drop_no_cnt: + goto next; + } + } + + ring->bytes += length; + ring->packets++; + + skb = napi_get_frags(&cq->napi); + if (unlikely(!skb)) + goto next; + + if (unlikely(ring->hwtstamp_rx_filter == HWTSTAMP_FILTER_ALL)) { + u64 timestamp = mlx4_en_get_cqe_ts(cqe); + + mlx4_en_fill_hwtstamps(priv->mdev, skb_hwtstamps(skb), + timestamp); + } + skb_record_rx_queue(skb, cq_ring); + + if (likely(dev->features & NETIF_F_RXCSUM)) { + /* TODO: For IP non TCP/UDP packets when csum complete is + * not an option (not supported or any other reason) we can + * actually check cqe IPOK status bit and report + * CHECKSUM_UNNECESSARY rather than CHECKSUM_NONE + */ + if ((cqe->status & cpu_to_be16(MLX4_CQE_STATUS_TCP | + MLX4_CQE_STATUS_UDP)) && + (cqe->status & cpu_to_be16(MLX4_CQE_STATUS_IPOK)) && + cqe->checksum == cpu_to_be16(0xffff)) { + bool l2_tunnel; + + l2_tunnel = (dev->hw_enc_features & NETIF_F_RXCSUM) && + (cqe->vlan_my_qpn & cpu_to_be32(MLX4_CQE_L2_TUNNEL)); + ip_summed = CHECKSUM_UNNECESSARY; + hash_type = PKT_HASH_TYPE_L4; + if (l2_tunnel) + skb->csum_level = 1; + ring->csum_ok++; + } else { + if (!(priv->flags & MLX4_EN_FLAG_RX_CSUM_NON_TCP_UDP && + (cqe->status & cpu_to_be16(MLX4_CQE_STATUS_IP_ANY)))) + goto csum_none; + if (check_csum(cqe, skb, va, dev->features)) + goto csum_none; + ip_summed = CHECKSUM_COMPLETE; + hash_type = PKT_HASH_TYPE_L3; + ring->csum_complete++; + } + } else { +csum_none: + ip_summed = CHECKSUM_NONE; + hash_type = PKT_HASH_TYPE_L3; + ring->csum_none++; + } + skb->ip_summed = ip_summed; + if (dev->features & NETIF_F_RXHASH) + skb_set_hash(skb, + be32_to_cpu(cqe->immed_rss_invalid), + hash_type); + + if ((cqe->vlan_my_qpn & + cpu_to_be32(MLX4_CQE_CVLAN_PRESENT_MASK)) && + (dev->features & NETIF_F_HW_VLAN_CTAG_RX)) + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), + be16_to_cpu(cqe->sl_vid)); + else if ((cqe->vlan_my_qpn & + cpu_to_be32(MLX4_CQE_SVLAN_PRESENT_MASK)) && + (dev->features & NETIF_F_HW_VLAN_STAG_RX)) + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021AD), + be16_to_cpu(cqe->sl_vid)); + + nr = mlx4_en_complete_rx_desc(priv, frags, skb, length); + if (likely(nr)) { + skb_shinfo(skb)->nr_frags = nr; + skb->len = length; + skb->data_len = length; + napi_gro_frags(&cq->napi); + } else { + __vlan_hwaccel_clear_tag(skb); + skb_clear_hash(skb); + } +next: + ++cq->mcq.cons_index; + index = (cq->mcq.cons_index) & ring->size_mask; + cqe = mlx4_en_get_cqe(cq->buf, index, priv->cqe_size) + factor; + if (unlikely(++polled == budget)) + break; + } + + if (xdp_redir_flush) + xdp_do_flush(); + + if (likely(polled)) { + if (doorbell_pending) { + priv->tx_cq[TX_XDP][cq_ring]->xdp_busy = true; + mlx4_en_xmit_doorbell(priv->tx_ring[TX_XDP][cq_ring]); + } + + mlx4_cq_set_ci(&cq->mcq); + wmb(); /* ensure HW sees CQ consumer before we post new buffers */ + ring->cons = cq->mcq.cons_index; + } + + mlx4_en_refill_rx_buffers(priv, ring); + + return polled; +} + + +void mlx4_en_rx_irq(struct mlx4_cq *mcq) +{ + struct mlx4_en_cq *cq = container_of(mcq, struct mlx4_en_cq, mcq); + struct mlx4_en_priv *priv = netdev_priv(cq->dev); + + if (likely(priv->port_up)) + napi_schedule_irqoff(&cq->napi); + else + mlx4_en_arm_cq(priv, cq); +} + +/* Rx CQ polling - called by NAPI */ +int mlx4_en_poll_rx_cq(struct napi_struct *napi, int budget) +{ + struct mlx4_en_cq *cq = container_of(napi, struct mlx4_en_cq, napi); + struct net_device *dev = cq->dev; + struct mlx4_en_priv *priv = netdev_priv(dev); + struct mlx4_en_cq *xdp_tx_cq = NULL; + bool clean_complete = true; + int done; + + if (!budget) + return 0; + + if (priv->tx_ring_num[TX_XDP]) { + xdp_tx_cq = priv->tx_cq[TX_XDP][cq->ring]; + if (xdp_tx_cq->xdp_busy) { + clean_complete = mlx4_en_process_tx_cq(dev, xdp_tx_cq, + budget) < budget; + xdp_tx_cq->xdp_busy = !clean_complete; + } + } + + done = mlx4_en_process_rx_cq(dev, cq, budget); + + /* If we used up all the quota - we're probably not done yet... */ + if (done == budget || !clean_complete) { + int cpu_curr; + + /* in case we got here because of !clean_complete */ + done = budget; + + cpu_curr = smp_processor_id(); + + if (likely(cpumask_test_cpu(cpu_curr, cq->aff_mask))) + return budget; + + /* Current cpu is not according to smp_irq_affinity - + * probably affinity changed. Need to stop this NAPI + * poll, and restart it on the right CPU. + * Try to avoid returning a too small value (like 0), + * to not fool net_rx_action() and its netdev_budget + */ + if (done) + done--; + } + /* Done for now */ + if (likely(napi_complete_done(napi, done))) + mlx4_en_arm_cq(priv, cq); + return done; +} + +void mlx4_en_calc_rx_buf(struct net_device *dev) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + int eff_mtu = MLX4_EN_EFF_MTU(dev->mtu); + int i = 0; + + /* bpf requires buffers to be set up as 1 packet per page. + * This only works when num_frags == 1. + */ + if (priv->tx_ring_num[TX_XDP]) { + priv->frag_info[0].frag_size = eff_mtu; + /* This will gain efficient xdp frame recycling at the + * expense of more costly truesize accounting + */ + priv->frag_info[0].frag_stride = PAGE_SIZE; + priv->dma_dir = DMA_BIDIRECTIONAL; + priv->rx_headroom = XDP_PACKET_HEADROOM; + i = 1; + } else { + int frag_size_max = 2048, buf_size = 0; + + /* should not happen, right ? */ + if (eff_mtu > PAGE_SIZE + (MLX4_EN_MAX_RX_FRAGS - 1) * 2048) + frag_size_max = PAGE_SIZE; + + while (buf_size < eff_mtu) { + int frag_stride, frag_size = eff_mtu - buf_size; + int pad, nb; + + if (i < MLX4_EN_MAX_RX_FRAGS - 1) + frag_size = min(frag_size, frag_size_max); + + priv->frag_info[i].frag_size = frag_size; + frag_stride = ALIGN(frag_size, SMP_CACHE_BYTES); + /* We can only pack 2 1536-bytes frames in on 4K page + * Therefore, each frame would consume more bytes (truesize) + */ + nb = PAGE_SIZE / frag_stride; + pad = (PAGE_SIZE - nb * frag_stride) / nb; + pad &= ~(SMP_CACHE_BYTES - 1); + priv->frag_info[i].frag_stride = frag_stride + pad; + + buf_size += frag_size; + i++; + } + priv->dma_dir = DMA_FROM_DEVICE; + priv->rx_headroom = 0; + } + + priv->num_frags = i; + priv->rx_skb_size = eff_mtu; + priv->log_rx_info = ROUNDUP_LOG2(i * sizeof(struct mlx4_en_rx_alloc)); + + en_dbg(DRV, priv, "Rx buffer scatter-list (effective-mtu:%d num_frags:%d):\n", + eff_mtu, priv->num_frags); + for (i = 0; i < priv->num_frags; i++) { + en_dbg(DRV, + priv, + " frag:%d - size:%d stride:%d\n", + i, + priv->frag_info[i].frag_size, + priv->frag_info[i].frag_stride); + } +} + +/* RSS related functions */ + +static int mlx4_en_config_rss_qp(struct mlx4_en_priv *priv, int qpn, + struct mlx4_en_rx_ring *ring, + enum mlx4_qp_state *state, + struct mlx4_qp *qp) +{ + struct mlx4_en_dev *mdev = priv->mdev; + struct mlx4_qp_context *context; + int err = 0; + + context = kzalloc(sizeof(*context), GFP_KERNEL); + if (!context) + return -ENOMEM; + + err = mlx4_qp_alloc(mdev->dev, qpn, qp); + if (err) { + en_err(priv, "Failed to allocate qp #%x\n", qpn); + goto out; + } + qp->event = mlx4_en_sqp_event; + + mlx4_en_fill_qp_context(priv, ring->actual_size, ring->stride, 0, 0, + qpn, ring->cqn, -1, context); + context->db_rec_addr = cpu_to_be64(ring->wqres.db.dma); + + /* Cancel FCS removal if FW allows */ + if (mdev->dev->caps.flags & MLX4_DEV_CAP_FLAG_FCS_KEEP) { + context->param3 |= cpu_to_be32(1 << 29); + if (priv->dev->features & NETIF_F_RXFCS) + ring->fcs_del = 0; + else + ring->fcs_del = ETH_FCS_LEN; + } else + ring->fcs_del = 0; + + err = mlx4_qp_to_ready(mdev->dev, &ring->wqres.mtt, context, qp, state); + if (err) { + mlx4_qp_remove(mdev->dev, qp); + mlx4_qp_free(mdev->dev, qp); + } + mlx4_en_update_rx_prod_db(ring); +out: + kfree(context); + return err; +} + +int mlx4_en_create_drop_qp(struct mlx4_en_priv *priv) +{ + int err; + u32 qpn; + + err = mlx4_qp_reserve_range(priv->mdev->dev, 1, 1, &qpn, + MLX4_RESERVE_A0_QP, + MLX4_RES_USAGE_DRIVER); + if (err) { + en_err(priv, "Failed reserving drop qpn\n"); + return err; + } + err = mlx4_qp_alloc(priv->mdev->dev, qpn, &priv->drop_qp); + if (err) { + en_err(priv, "Failed allocating drop qp\n"); + mlx4_qp_release_range(priv->mdev->dev, qpn, 1); + return err; + } + + return 0; +} + +void mlx4_en_destroy_drop_qp(struct mlx4_en_priv *priv) +{ + u32 qpn; + + qpn = priv->drop_qp.qpn; + mlx4_qp_remove(priv->mdev->dev, &priv->drop_qp); + mlx4_qp_free(priv->mdev->dev, &priv->drop_qp); + mlx4_qp_release_range(priv->mdev->dev, qpn, 1); +} + +/* Allocate rx qp's and configure them according to rss map */ +int mlx4_en_config_rss_steer(struct mlx4_en_priv *priv) +{ + struct mlx4_en_dev *mdev = priv->mdev; + struct mlx4_en_rss_map *rss_map = &priv->rss_map; + struct mlx4_qp_context context; + struct mlx4_rss_context *rss_context; + int rss_rings; + void *ptr; + u8 rss_mask = (MLX4_RSS_IPV4 | MLX4_RSS_TCP_IPV4 | MLX4_RSS_IPV6 | + MLX4_RSS_TCP_IPV6); + int i, qpn; + int err = 0; + int good_qps = 0; + u8 flags; + + en_dbg(DRV, priv, "Configuring rss steering\n"); + + flags = priv->rx_ring_num == 1 ? MLX4_RESERVE_A0_QP : 0; + err = mlx4_qp_reserve_range(mdev->dev, priv->rx_ring_num, + priv->rx_ring_num, + &rss_map->base_qpn, flags, + MLX4_RES_USAGE_DRIVER); + if (err) { + en_err(priv, "Failed reserving %d qps\n", priv->rx_ring_num); + return err; + } + + for (i = 0; i < priv->rx_ring_num; i++) { + qpn = rss_map->base_qpn + i; + err = mlx4_en_config_rss_qp(priv, qpn, priv->rx_ring[i], + &rss_map->state[i], + &rss_map->qps[i]); + if (err) + goto rss_err; + + ++good_qps; + } + + if (priv->rx_ring_num == 1) { + rss_map->indir_qp = &rss_map->qps[0]; + priv->base_qpn = rss_map->indir_qp->qpn; + en_info(priv, "Optimized Non-RSS steering\n"); + return 0; + } + + rss_map->indir_qp = kzalloc(sizeof(*rss_map->indir_qp), GFP_KERNEL); + if (!rss_map->indir_qp) { + err = -ENOMEM; + goto rss_err; + } + + /* Configure RSS indirection qp */ + err = mlx4_qp_alloc(mdev->dev, priv->base_qpn, rss_map->indir_qp); + if (err) { + en_err(priv, "Failed to allocate RSS indirection QP\n"); + goto qp_alloc_err; + } + + rss_map->indir_qp->event = mlx4_en_sqp_event; + mlx4_en_fill_qp_context(priv, 0, 0, 0, 1, priv->base_qpn, + priv->rx_ring[0]->cqn, -1, &context); + + if (!priv->prof->rss_rings || priv->prof->rss_rings > priv->rx_ring_num) + rss_rings = priv->rx_ring_num; + else + rss_rings = priv->prof->rss_rings; + + ptr = ((void *) &context) + offsetof(struct mlx4_qp_context, pri_path) + + MLX4_RSS_OFFSET_IN_QPC_PRI_PATH; + rss_context = ptr; + rss_context->base_qpn = cpu_to_be32(ilog2(rss_rings) << 24 | + (rss_map->base_qpn)); + rss_context->default_qpn = cpu_to_be32(rss_map->base_qpn); + if (priv->mdev->profile.udp_rss) { + rss_mask |= MLX4_RSS_UDP_IPV4 | MLX4_RSS_UDP_IPV6; + rss_context->base_qpn_udp = rss_context->default_qpn; + } + + if (mdev->dev->caps.tunnel_offload_mode == MLX4_TUNNEL_OFFLOAD_MODE_VXLAN) { + en_info(priv, "Setting RSS context tunnel type to RSS on inner headers\n"); + rss_mask |= MLX4_RSS_BY_INNER_HEADERS; + } + + rss_context->flags = rss_mask; + rss_context->hash_fn = MLX4_RSS_HASH_TOP; + if (priv->rss_hash_fn == ETH_RSS_HASH_XOR) { + rss_context->hash_fn = MLX4_RSS_HASH_XOR; + } else if (priv->rss_hash_fn == ETH_RSS_HASH_TOP) { + rss_context->hash_fn = MLX4_RSS_HASH_TOP; + memcpy(rss_context->rss_key, priv->rss_key, + MLX4_EN_RSS_KEY_SIZE); + } else { + en_err(priv, "Unknown RSS hash function requested\n"); + err = -EINVAL; + goto indir_err; + } + + err = mlx4_qp_to_ready(mdev->dev, &priv->res.mtt, &context, + rss_map->indir_qp, &rss_map->indir_state); + if (err) + goto indir_err; + + return 0; + +indir_err: + mlx4_qp_modify(mdev->dev, NULL, rss_map->indir_state, + MLX4_QP_STATE_RST, NULL, 0, 0, rss_map->indir_qp); + mlx4_qp_remove(mdev->dev, rss_map->indir_qp); + mlx4_qp_free(mdev->dev, rss_map->indir_qp); +qp_alloc_err: + kfree(rss_map->indir_qp); + rss_map->indir_qp = NULL; +rss_err: + for (i = 0; i < good_qps; i++) { + mlx4_qp_modify(mdev->dev, NULL, rss_map->state[i], + MLX4_QP_STATE_RST, NULL, 0, 0, &rss_map->qps[i]); + mlx4_qp_remove(mdev->dev, &rss_map->qps[i]); + mlx4_qp_free(mdev->dev, &rss_map->qps[i]); + } + mlx4_qp_release_range(mdev->dev, rss_map->base_qpn, priv->rx_ring_num); + return err; +} + +void mlx4_en_release_rss_steer(struct mlx4_en_priv *priv) +{ + struct mlx4_en_dev *mdev = priv->mdev; + struct mlx4_en_rss_map *rss_map = &priv->rss_map; + int i; + + if (priv->rx_ring_num > 1) { + mlx4_qp_modify(mdev->dev, NULL, rss_map->indir_state, + MLX4_QP_STATE_RST, NULL, 0, 0, + rss_map->indir_qp); + mlx4_qp_remove(mdev->dev, rss_map->indir_qp); + mlx4_qp_free(mdev->dev, rss_map->indir_qp); + kfree(rss_map->indir_qp); + rss_map->indir_qp = NULL; + } + + for (i = 0; i < priv->rx_ring_num; i++) { + mlx4_qp_modify(mdev->dev, NULL, rss_map->state[i], + MLX4_QP_STATE_RST, NULL, 0, 0, &rss_map->qps[i]); + mlx4_qp_remove(mdev->dev, &rss_map->qps[i]); + mlx4_qp_free(mdev->dev, &rss_map->qps[i]); + } + mlx4_qp_release_range(mdev->dev, rss_map->base_qpn, priv->rx_ring_num); +} diff --git a/drivers/net/ethernet/mellanox/mlx4/en_selftest.c b/drivers/net/ethernet/mellanox/mlx4/en_selftest.c new file mode 100644 index 000000000..946d9db7c --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx4/en_selftest.c @@ -0,0 +1,204 @@ +/* + * Copyright (c) 2007 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#include <linux/kernel.h> +#include <linux/ethtool.h> +#include <linux/netdevice.h> +#include <linux/delay.h> +#include <linux/mlx4/driver.h> + +#include "mlx4_en.h" + + +static int mlx4_en_test_registers(struct mlx4_en_priv *priv) +{ + return mlx4_cmd(priv->mdev->dev, 0, 0, 0, MLX4_CMD_HW_HEALTH_CHECK, + MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); +} + +static int mlx4_en_test_loopback_xmit(struct mlx4_en_priv *priv) +{ + struct sk_buff *skb; + struct ethhdr *ethh; + unsigned char *packet; + unsigned int packet_size = MLX4_LOOPBACK_TEST_PAYLOAD; + unsigned int i; + int err; + + + /* build the pkt before xmit */ + skb = netdev_alloc_skb(priv->dev, MLX4_LOOPBACK_TEST_PAYLOAD + ETH_HLEN + NET_IP_ALIGN); + if (!skb) + return -ENOMEM; + + skb_reserve(skb, NET_IP_ALIGN); + + ethh = skb_put(skb, sizeof(struct ethhdr)); + packet = skb_put(skb, packet_size); + memcpy(ethh->h_dest, priv->dev->dev_addr, ETH_ALEN); + eth_zero_addr(ethh->h_source); + ethh->h_proto = htons(ETH_P_ARP); + skb_reset_mac_header(skb); + for (i = 0; i < packet_size; ++i) /* fill our packet */ + packet[i] = (unsigned char)(i & 0xff); + + /* xmit the pkt */ + err = mlx4_en_xmit(skb, priv->dev); + return err; +} + +static int mlx4_en_test_loopback(struct mlx4_en_priv *priv) +{ + u32 loopback_ok = 0; + int i; + + priv->loopback_ok = 0; + priv->validate_loopback = 1; + + mlx4_en_update_loopback_state(priv->dev, priv->dev->features); + + /* xmit */ + if (mlx4_en_test_loopback_xmit(priv)) { + en_err(priv, "Transmitting loopback packet failed\n"); + goto mlx4_en_test_loopback_exit; + } + + /* polling for result */ + for (i = 0; i < MLX4_EN_LOOPBACK_RETRIES; ++i) { + msleep(MLX4_EN_LOOPBACK_TIMEOUT); + if (priv->loopback_ok) { + loopback_ok = 1; + break; + } + } + if (!loopback_ok) + en_err(priv, "Loopback packet didn't arrive\n"); + +mlx4_en_test_loopback_exit: + + priv->validate_loopback = 0; + + mlx4_en_update_loopback_state(priv->dev, priv->dev->features); + return !loopback_ok; +} + +static int mlx4_en_test_interrupts(struct mlx4_en_priv *priv) +{ + struct mlx4_en_dev *mdev = priv->mdev; + int err = 0; + int i = 0; + + err = mlx4_test_async(mdev->dev); + /* When not in MSI_X or slave, test only async */ + if (!(mdev->dev->flags & MLX4_FLAG_MSI_X) || mlx4_is_slave(mdev->dev)) + return err; + + /* A loop over all completion vectors of current port, + * for each vector check whether it works by mapping command + * completions to that vector and performing a NOP command + */ + for (i = 0; i < priv->rx_ring_num; i++) { + err = mlx4_test_interrupt(mdev->dev, priv->rx_cq[i]->vector); + if (err) + break; + } + + return err; +} + +static int mlx4_en_test_link(struct mlx4_en_priv *priv) +{ + if (mlx4_en_QUERY_PORT(priv->mdev, priv->port)) + return -ENOMEM; + if (priv->port_state.link_state == 1) + return 0; + else + return 1; +} + +static int mlx4_en_test_speed(struct mlx4_en_priv *priv) +{ + + if (mlx4_en_QUERY_PORT(priv->mdev, priv->port)) + return -ENOMEM; + + /* The device supports 100M, 1G, 10G, 20G, 40G and 56G speed */ + if (priv->port_state.link_speed != SPEED_100 && + priv->port_state.link_speed != SPEED_1000 && + priv->port_state.link_speed != SPEED_10000 && + priv->port_state.link_speed != SPEED_20000 && + priv->port_state.link_speed != SPEED_40000 && + priv->port_state.link_speed != SPEED_56000) + return priv->port_state.link_speed; + + return 0; +} + + +void mlx4_en_ex_selftest(struct net_device *dev, u32 *flags, u64 *buf) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + int i, carrier_ok; + + memset(buf, 0, sizeof(u64) * MLX4_EN_NUM_SELF_TEST); + + if (*flags & ETH_TEST_FL_OFFLINE) { + /* disable the interface */ + carrier_ok = netif_carrier_ok(dev); + + netif_carrier_off(dev); + /* Wait until all tx queues are empty. + * there should not be any additional incoming traffic + * since we turned the carrier off */ + msleep(200); + + if (priv->mdev->dev->caps.flags & + MLX4_DEV_CAP_FLAG_UC_LOOPBACK) { + buf[3] = mlx4_en_test_registers(priv); + if (priv->port_up && dev->mtu >= MLX4_SELFTEST_LB_MIN_MTU) + buf[4] = mlx4_en_test_loopback(priv); + } + + if (carrier_ok) + netif_carrier_on(dev); + + } + buf[0] = mlx4_en_test_interrupts(priv); + buf[1] = mlx4_en_test_link(priv); + buf[2] = mlx4_en_test_speed(priv); + + for (i = 0; i < MLX4_EN_NUM_SELF_TEST; i++) { + if (buf[i]) + *flags |= ETH_TEST_FL_FAILED; + } +} diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c new file mode 100644 index 000000000..7fccf1a79 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c @@ -0,0 +1,1233 @@ +/* + * Copyright (c) 2007 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#include <asm/page.h> +#include <linux/mlx4/cq.h> +#include <linux/slab.h> +#include <linux/mlx4/qp.h> +#include <linux/skbuff.h> +#include <linux/if_vlan.h> +#include <linux/prefetch.h> +#include <linux/vmalloc.h> +#include <linux/tcp.h> +#include <linux/ip.h> +#include <linux/ipv6.h> +#include <linux/indirect_call_wrapper.h> +#include <net/ipv6.h> + +#include "mlx4_en.h" + +int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv, + struct mlx4_en_tx_ring **pring, u32 size, + u16 stride, int node, int queue_index) +{ + struct mlx4_en_dev *mdev = priv->mdev; + struct mlx4_en_tx_ring *ring; + int tmp; + int err; + + ring = kzalloc_node(sizeof(*ring), GFP_KERNEL, node); + if (!ring) { + en_err(priv, "Failed allocating TX ring\n"); + return -ENOMEM; + } + + ring->size = size; + ring->size_mask = size - 1; + ring->sp_stride = stride; + ring->full_size = ring->size - HEADROOM - MAX_DESC_TXBBS; + + tmp = size * sizeof(struct mlx4_en_tx_info); + ring->tx_info = kvmalloc_node(tmp, GFP_KERNEL, node); + if (!ring->tx_info) { + err = -ENOMEM; + goto err_ring; + } + + en_dbg(DRV, priv, "Allocated tx_info ring at addr:%p size:%d\n", + ring->tx_info, tmp); + + ring->bounce_buf = kmalloc_node(MAX_DESC_SIZE, GFP_KERNEL, node); + if (!ring->bounce_buf) { + ring->bounce_buf = kmalloc(MAX_DESC_SIZE, GFP_KERNEL); + if (!ring->bounce_buf) { + err = -ENOMEM; + goto err_info; + } + } + ring->buf_size = ALIGN(size * ring->sp_stride, MLX4_EN_PAGE_SIZE); + + /* Allocate HW buffers on provided NUMA node */ + set_dev_node(&mdev->dev->persist->pdev->dev, node); + err = mlx4_alloc_hwq_res(mdev->dev, &ring->sp_wqres, ring->buf_size); + set_dev_node(&mdev->dev->persist->pdev->dev, mdev->dev->numa_node); + if (err) { + en_err(priv, "Failed allocating hwq resources\n"); + goto err_bounce; + } + + ring->buf = ring->sp_wqres.buf.direct.buf; + + en_dbg(DRV, priv, "Allocated TX ring (addr:%p) - buf:%p size:%d buf_size:%d dma:%llx\n", + ring, ring->buf, ring->size, ring->buf_size, + (unsigned long long) ring->sp_wqres.buf.direct.map); + + err = mlx4_qp_reserve_range(mdev->dev, 1, 1, &ring->qpn, + MLX4_RESERVE_ETH_BF_QP, + MLX4_RES_USAGE_DRIVER); + if (err) { + en_err(priv, "failed reserving qp for TX ring\n"); + goto err_hwq_res; + } + + err = mlx4_qp_alloc(mdev->dev, ring->qpn, &ring->sp_qp); + if (err) { + en_err(priv, "Failed allocating qp %d\n", ring->qpn); + goto err_reserve; + } + ring->sp_qp.event = mlx4_en_sqp_event; + + err = mlx4_bf_alloc(mdev->dev, &ring->bf, node); + if (err) { + en_dbg(DRV, priv, "working without blueflame (%d)\n", err); + ring->bf.uar = &mdev->priv_uar; + ring->bf.uar->map = mdev->uar_map; + ring->bf_enabled = false; + ring->bf_alloced = false; + priv->pflags &= ~MLX4_EN_PRIV_FLAGS_BLUEFLAME; + } else { + ring->bf_alloced = true; + ring->bf_enabled = !!(priv->pflags & + MLX4_EN_PRIV_FLAGS_BLUEFLAME); + } + ring->doorbell_address = ring->bf.uar->map + MLX4_SEND_DOORBELL; + + ring->hwtstamp_tx_type = priv->hwtstamp_config.tx_type; + ring->queue_index = queue_index; + + if (queue_index < priv->num_tx_rings_p_up) + cpumask_set_cpu(cpumask_local_spread(queue_index, + priv->mdev->dev->numa_node), + &ring->sp_affinity_mask); + + *pring = ring; + return 0; + +err_reserve: + mlx4_qp_release_range(mdev->dev, ring->qpn, 1); +err_hwq_res: + mlx4_free_hwq_res(mdev->dev, &ring->sp_wqres, ring->buf_size); +err_bounce: + kfree(ring->bounce_buf); + ring->bounce_buf = NULL; +err_info: + kvfree(ring->tx_info); + ring->tx_info = NULL; +err_ring: + kfree(ring); + *pring = NULL; + return err; +} + +void mlx4_en_destroy_tx_ring(struct mlx4_en_priv *priv, + struct mlx4_en_tx_ring **pring) +{ + struct mlx4_en_dev *mdev = priv->mdev; + struct mlx4_en_tx_ring *ring = *pring; + en_dbg(DRV, priv, "Destroying tx ring, qpn: %d\n", ring->qpn); + + if (ring->bf_alloced) + mlx4_bf_free(mdev->dev, &ring->bf); + mlx4_qp_remove(mdev->dev, &ring->sp_qp); + mlx4_qp_free(mdev->dev, &ring->sp_qp); + mlx4_qp_release_range(priv->mdev->dev, ring->qpn, 1); + mlx4_free_hwq_res(mdev->dev, &ring->sp_wqres, ring->buf_size); + kfree(ring->bounce_buf); + ring->bounce_buf = NULL; + kvfree(ring->tx_info); + ring->tx_info = NULL; + kfree(ring); + *pring = NULL; +} + +int mlx4_en_activate_tx_ring(struct mlx4_en_priv *priv, + struct mlx4_en_tx_ring *ring, + int cq, int user_prio) +{ + struct mlx4_en_dev *mdev = priv->mdev; + int err; + + ring->sp_cqn = cq; + ring->prod = 0; + ring->cons = 0xffffffff; + ring->last_nr_txbb = 1; + memset(ring->tx_info, 0, ring->size * sizeof(struct mlx4_en_tx_info)); + memset(ring->buf, 0, ring->buf_size); + ring->free_tx_desc = mlx4_en_free_tx_desc; + + ring->sp_qp_state = MLX4_QP_STATE_RST; + ring->doorbell_qpn = cpu_to_be32(ring->sp_qp.qpn << 8); + ring->mr_key = cpu_to_be32(mdev->mr.key); + + mlx4_en_fill_qp_context(priv, ring->size, ring->sp_stride, 1, 0, ring->qpn, + ring->sp_cqn, user_prio, &ring->sp_context); + if (ring->bf_alloced) + ring->sp_context.usr_page = + cpu_to_be32(mlx4_to_hw_uar_index(mdev->dev, + ring->bf.uar->index)); + + err = mlx4_qp_to_ready(mdev->dev, &ring->sp_wqres.mtt, &ring->sp_context, + &ring->sp_qp, &ring->sp_qp_state); + if (!cpumask_empty(&ring->sp_affinity_mask)) + netif_set_xps_queue(priv->dev, &ring->sp_affinity_mask, + ring->queue_index); + + return err; +} + +void mlx4_en_deactivate_tx_ring(struct mlx4_en_priv *priv, + struct mlx4_en_tx_ring *ring) +{ + struct mlx4_en_dev *mdev = priv->mdev; + + mlx4_qp_modify(mdev->dev, NULL, ring->sp_qp_state, + MLX4_QP_STATE_RST, NULL, 0, 0, &ring->sp_qp); +} + +static inline bool mlx4_en_is_tx_ring_full(struct mlx4_en_tx_ring *ring) +{ + return ring->prod - ring->cons > ring->full_size; +} + +static void mlx4_en_stamp_wqe(struct mlx4_en_priv *priv, + struct mlx4_en_tx_ring *ring, int index, + u8 owner) +{ + __be32 stamp = cpu_to_be32(STAMP_VAL | (!!owner << STAMP_SHIFT)); + struct mlx4_en_tx_desc *tx_desc = ring->buf + (index << LOG_TXBB_SIZE); + struct mlx4_en_tx_info *tx_info = &ring->tx_info[index]; + void *end = ring->buf + ring->buf_size; + __be32 *ptr = (__be32 *)tx_desc; + int i; + + /* Optimize the common case when there are no wraparounds */ + if (likely((void *)tx_desc + + (tx_info->nr_txbb << LOG_TXBB_SIZE) <= end)) { + /* Stamp the freed descriptor */ + for (i = 0; i < tx_info->nr_txbb << LOG_TXBB_SIZE; + i += STAMP_STRIDE) { + *ptr = stamp; + ptr += STAMP_DWORDS; + } + } else { + /* Stamp the freed descriptor */ + for (i = 0; i < tx_info->nr_txbb << LOG_TXBB_SIZE; + i += STAMP_STRIDE) { + *ptr = stamp; + ptr += STAMP_DWORDS; + if ((void *)ptr >= end) { + ptr = ring->buf; + stamp ^= cpu_to_be32(0x80000000); + } + } + } +} + +INDIRECT_CALLABLE_DECLARE(u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv, + struct mlx4_en_tx_ring *ring, + int index, u64 timestamp, + int napi_mode)); + +u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv, + struct mlx4_en_tx_ring *ring, + int index, u64 timestamp, + int napi_mode) +{ + struct mlx4_en_tx_info *tx_info = &ring->tx_info[index]; + struct mlx4_en_tx_desc *tx_desc = ring->buf + (index << LOG_TXBB_SIZE); + struct mlx4_wqe_data_seg *data = (void *) tx_desc + tx_info->data_offset; + void *end = ring->buf + ring->buf_size; + struct sk_buff *skb = tx_info->skb; + int nr_maps = tx_info->nr_maps; + int i; + + /* We do not touch skb here, so prefetch skb->users location + * to speedup consume_skb() + */ + prefetchw(&skb->users); + + if (unlikely(timestamp)) { + struct skb_shared_hwtstamps hwts; + + mlx4_en_fill_hwtstamps(priv->mdev, &hwts, timestamp); + skb_tstamp_tx(skb, &hwts); + } + + if (!tx_info->inl) { + if (tx_info->linear) + dma_unmap_single(priv->ddev, + tx_info->map0_dma, + tx_info->map0_byte_count, + DMA_TO_DEVICE); + else + dma_unmap_page(priv->ddev, + tx_info->map0_dma, + tx_info->map0_byte_count, + DMA_TO_DEVICE); + /* Optimize the common case when there are no wraparounds */ + if (likely((void *)tx_desc + + (tx_info->nr_txbb << LOG_TXBB_SIZE) <= end)) { + for (i = 1; i < nr_maps; i++) { + data++; + dma_unmap_page(priv->ddev, + (dma_addr_t)be64_to_cpu(data->addr), + be32_to_cpu(data->byte_count), + DMA_TO_DEVICE); + } + } else { + if ((void *)data >= end) + data = ring->buf + ((void *)data - end); + + for (i = 1; i < nr_maps; i++) { + data++; + /* Check for wraparound before unmapping */ + if ((void *) data >= end) + data = ring->buf; + dma_unmap_page(priv->ddev, + (dma_addr_t)be64_to_cpu(data->addr), + be32_to_cpu(data->byte_count), + DMA_TO_DEVICE); + } + } + } + napi_consume_skb(skb, napi_mode); + + return tx_info->nr_txbb; +} + +INDIRECT_CALLABLE_DECLARE(u32 mlx4_en_recycle_tx_desc(struct mlx4_en_priv *priv, + struct mlx4_en_tx_ring *ring, + int index, u64 timestamp, + int napi_mode)); + +u32 mlx4_en_recycle_tx_desc(struct mlx4_en_priv *priv, + struct mlx4_en_tx_ring *ring, + int index, u64 timestamp, + int napi_mode) +{ + struct mlx4_en_tx_info *tx_info = &ring->tx_info[index]; + struct mlx4_en_rx_alloc frame = { + .page = tx_info->page, + .dma = tx_info->map0_dma, + }; + + if (!napi_mode || !mlx4_en_rx_recycle(ring->recycle_ring, &frame)) { + dma_unmap_page(priv->ddev, tx_info->map0_dma, + PAGE_SIZE, priv->dma_dir); + put_page(tx_info->page); + } + + return tx_info->nr_txbb; +} + +int mlx4_en_free_tx_buf(struct net_device *dev, struct mlx4_en_tx_ring *ring) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + int cnt = 0; + + /* Skip last polled descriptor */ + ring->cons += ring->last_nr_txbb; + en_dbg(DRV, priv, "Freeing Tx buf - cons:0x%x prod:0x%x\n", + ring->cons, ring->prod); + + if ((u32) (ring->prod - ring->cons) > ring->size) { + if (netif_msg_tx_err(priv)) + en_warn(priv, "Tx consumer passed producer!\n"); + return 0; + } + + while (ring->cons != ring->prod) { + ring->last_nr_txbb = ring->free_tx_desc(priv, ring, + ring->cons & ring->size_mask, + 0, 0 /* Non-NAPI caller */); + ring->cons += ring->last_nr_txbb; + cnt++; + } + + if (ring->tx_queue) + netdev_tx_reset_queue(ring->tx_queue); + + if (cnt) + en_dbg(DRV, priv, "Freed %d uncompleted tx descriptors\n", cnt); + + return cnt; +} + +static void mlx4_en_handle_err_cqe(struct mlx4_en_priv *priv, struct mlx4_err_cqe *err_cqe, + u16 cqe_index, struct mlx4_en_tx_ring *ring) +{ + struct mlx4_en_dev *mdev = priv->mdev; + struct mlx4_en_tx_info *tx_info; + struct mlx4_en_tx_desc *tx_desc; + u16 wqe_index; + int desc_size; + + en_err(priv, "CQE error - cqn 0x%x, ci 0x%x, vendor syndrome: 0x%x syndrome: 0x%x\n", + ring->sp_cqn, cqe_index, err_cqe->vendor_err_syndrome, err_cqe->syndrome); + print_hex_dump(KERN_WARNING, "", DUMP_PREFIX_OFFSET, 16, 1, err_cqe, sizeof(*err_cqe), + false); + + wqe_index = be16_to_cpu(err_cqe->wqe_index) & ring->size_mask; + tx_info = &ring->tx_info[wqe_index]; + desc_size = tx_info->nr_txbb << LOG_TXBB_SIZE; + en_err(priv, "Related WQE - qpn 0x%x, wqe index 0x%x, wqe size 0x%x\n", ring->qpn, + wqe_index, desc_size); + tx_desc = ring->buf + (wqe_index << LOG_TXBB_SIZE); + print_hex_dump(KERN_WARNING, "", DUMP_PREFIX_OFFSET, 16, 1, tx_desc, desc_size, false); + + if (test_and_set_bit(MLX4_EN_STATE_FLAG_RESTARTING, &priv->state)) + return; + + en_err(priv, "Scheduling port restart\n"); + queue_work(mdev->workqueue, &priv->restart_task); +} + +int mlx4_en_process_tx_cq(struct net_device *dev, + struct mlx4_en_cq *cq, int napi_budget) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + struct mlx4_cq *mcq = &cq->mcq; + struct mlx4_en_tx_ring *ring = priv->tx_ring[cq->type][cq->ring]; + struct mlx4_cqe *cqe; + u16 index, ring_index, stamp_index; + u32 txbbs_skipped = 0; + u32 txbbs_stamp = 0; + u32 cons_index = mcq->cons_index; + int size = cq->size; + u32 size_mask = ring->size_mask; + struct mlx4_cqe *buf = cq->buf; + u32 packets = 0; + u32 bytes = 0; + int factor = priv->cqe_factor; + int done = 0; + int budget = priv->tx_work_limit; + u32 last_nr_txbb; + u32 ring_cons; + + if (unlikely(!priv->port_up)) + return 0; + + netdev_txq_bql_complete_prefetchw(ring->tx_queue); + + index = cons_index & size_mask; + cqe = mlx4_en_get_cqe(buf, index, priv->cqe_size) + factor; + last_nr_txbb = READ_ONCE(ring->last_nr_txbb); + ring_cons = READ_ONCE(ring->cons); + ring_index = ring_cons & size_mask; + stamp_index = ring_index; + + /* Process all completed CQEs */ + while (XNOR(cqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK, + cons_index & size) && (done < budget)) { + u16 new_index; + + /* + * make sure we read the CQE after we read the + * ownership bit + */ + dma_rmb(); + + if (unlikely((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) == + MLX4_CQE_OPCODE_ERROR)) + if (!test_and_set_bit(MLX4_EN_TX_RING_STATE_RECOVERING, &ring->state)) + mlx4_en_handle_err_cqe(priv, (struct mlx4_err_cqe *)cqe, index, + ring); + + /* Skip over last polled CQE */ + new_index = be16_to_cpu(cqe->wqe_index) & size_mask; + + do { + u64 timestamp = 0; + + txbbs_skipped += last_nr_txbb; + ring_index = (ring_index + last_nr_txbb) & size_mask; + + if (unlikely(ring->tx_info[ring_index].ts_requested)) + timestamp = mlx4_en_get_cqe_ts(cqe); + + /* free next descriptor */ + last_nr_txbb = INDIRECT_CALL_2(ring->free_tx_desc, + mlx4_en_free_tx_desc, + mlx4_en_recycle_tx_desc, + priv, ring, ring_index, + timestamp, napi_budget); + + mlx4_en_stamp_wqe(priv, ring, stamp_index, + !!((ring_cons + txbbs_stamp) & + ring->size)); + stamp_index = ring_index; + txbbs_stamp = txbbs_skipped; + packets++; + bytes += ring->tx_info[ring_index].nr_bytes; + } while ((++done < budget) && (ring_index != new_index)); + + ++cons_index; + index = cons_index & size_mask; + cqe = mlx4_en_get_cqe(buf, index, priv->cqe_size) + factor; + } + + /* + * To prevent CQ overflow we first update CQ consumer and only then + * the ring consumer. + */ + mcq->cons_index = cons_index; + mlx4_cq_set_ci(mcq); + wmb(); + + /* we want to dirty this cache line once */ + WRITE_ONCE(ring->last_nr_txbb, last_nr_txbb); + WRITE_ONCE(ring->cons, ring_cons + txbbs_skipped); + + if (cq->type == TX_XDP) + return done; + + netdev_tx_completed_queue(ring->tx_queue, packets, bytes); + + /* Wakeup Tx queue if this stopped, and ring is not full. + */ + if (netif_tx_queue_stopped(ring->tx_queue) && + !mlx4_en_is_tx_ring_full(ring)) { + netif_tx_wake_queue(ring->tx_queue); + ring->wake_queue++; + } + + return done; +} + +void mlx4_en_tx_irq(struct mlx4_cq *mcq) +{ + struct mlx4_en_cq *cq = container_of(mcq, struct mlx4_en_cq, mcq); + struct mlx4_en_priv *priv = netdev_priv(cq->dev); + + if (likely(priv->port_up)) + napi_schedule_irqoff(&cq->napi); + else + mlx4_en_arm_cq(priv, cq); +} + +/* TX CQ polling - called by NAPI */ +int mlx4_en_poll_tx_cq(struct napi_struct *napi, int budget) +{ + struct mlx4_en_cq *cq = container_of(napi, struct mlx4_en_cq, napi); + struct net_device *dev = cq->dev; + struct mlx4_en_priv *priv = netdev_priv(dev); + int work_done; + + work_done = mlx4_en_process_tx_cq(dev, cq, budget); + if (work_done >= budget) + return budget; + + if (napi_complete_done(napi, work_done)) + mlx4_en_arm_cq(priv, cq); + + return 0; +} + +static struct mlx4_en_tx_desc *mlx4_en_bounce_to_desc(struct mlx4_en_priv *priv, + struct mlx4_en_tx_ring *ring, + u32 index, + unsigned int desc_size) +{ + u32 copy = (ring->size - index) << LOG_TXBB_SIZE; + int i; + + for (i = desc_size - copy - 4; i >= 0; i -= 4) { + if ((i & (TXBB_SIZE - 1)) == 0) + wmb(); + + *((u32 *) (ring->buf + i)) = + *((u32 *) (ring->bounce_buf + copy + i)); + } + + for (i = copy - 4; i >= 4 ; i -= 4) { + if ((i & (TXBB_SIZE - 1)) == 0) + wmb(); + + *((u32 *)(ring->buf + (index << LOG_TXBB_SIZE) + i)) = + *((u32 *) (ring->bounce_buf + i)); + } + + /* Return real descriptor location */ + return ring->buf + (index << LOG_TXBB_SIZE); +} + +/* Decide if skb can be inlined in tx descriptor to avoid dma mapping + * + * It seems strange we do not simply use skb_copy_bits(). + * This would allow to inline all skbs iff skb->len <= inline_thold + * + * Note that caller already checked skb was not a gso packet + */ +static bool is_inline(int inline_thold, const struct sk_buff *skb, + const struct skb_shared_info *shinfo, + void **pfrag) +{ + void *ptr; + + if (skb->len > inline_thold || !inline_thold) + return false; + + if (shinfo->nr_frags == 1) { + ptr = skb_frag_address_safe(&shinfo->frags[0]); + if (unlikely(!ptr)) + return false; + *pfrag = ptr; + return true; + } + if (shinfo->nr_frags) + return false; + return true; +} + +static int inline_size(const struct sk_buff *skb) +{ + if (skb->len + CTRL_SIZE + sizeof(struct mlx4_wqe_inline_seg) + <= MLX4_INLINE_ALIGN) + return ALIGN(skb->len + CTRL_SIZE + + sizeof(struct mlx4_wqe_inline_seg), 16); + else + return ALIGN(skb->len + CTRL_SIZE + 2 * + sizeof(struct mlx4_wqe_inline_seg), 16); +} + +static int get_real_size(const struct sk_buff *skb, + const struct skb_shared_info *shinfo, + struct net_device *dev, + int *lso_header_size, + bool *inline_ok, + void **pfrag, + int *hopbyhop) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + int real_size; + + if (shinfo->gso_size) { + *inline_ok = false; + *hopbyhop = 0; + if (skb->encapsulation) { + *lso_header_size = skb_inner_tcp_all_headers(skb); + } else { + /* Detects large IPV6 TCP packets and prepares for removal of + * HBH header that has been pushed by ip6_xmit(), + * mainly so that tcpdump can dissect them. + */ + if (ipv6_has_hopopt_jumbo(skb)) + *hopbyhop = sizeof(struct hop_jumbo_hdr); + *lso_header_size = skb_tcp_all_headers(skb); + } + real_size = CTRL_SIZE + shinfo->nr_frags * DS_SIZE + + ALIGN(*lso_header_size - *hopbyhop + 4, DS_SIZE); + if (unlikely(*lso_header_size != skb_headlen(skb))) { + /* We add a segment for the skb linear buffer only if + * it contains data */ + if (*lso_header_size < skb_headlen(skb)) + real_size += DS_SIZE; + else { + if (netif_msg_tx_err(priv)) + en_warn(priv, "Non-linear headers\n"); + return 0; + } + } + } else { + *lso_header_size = 0; + *inline_ok = is_inline(priv->prof->inline_thold, skb, + shinfo, pfrag); + + if (*inline_ok) + real_size = inline_size(skb); + else + real_size = CTRL_SIZE + + (shinfo->nr_frags + 1) * DS_SIZE; + } + + return real_size; +} + +static void build_inline_wqe(struct mlx4_en_tx_desc *tx_desc, + const struct sk_buff *skb, + const struct skb_shared_info *shinfo, + void *fragptr) +{ + struct mlx4_wqe_inline_seg *inl = &tx_desc->inl; + int spc = MLX4_INLINE_ALIGN - CTRL_SIZE - sizeof(*inl); + unsigned int hlen = skb_headlen(skb); + + if (skb->len <= spc) { + if (likely(skb->len >= MIN_PKT_LEN)) { + inl->byte_count = cpu_to_be32(1 << 31 | skb->len); + } else { + inl->byte_count = cpu_to_be32(1 << 31 | MIN_PKT_LEN); + memset(inl->data + skb->len, 0, + MIN_PKT_LEN - skb->len); + } + skb_copy_from_linear_data(skb, inl->data, hlen); + if (shinfo->nr_frags) + memcpy(inl->data + hlen, fragptr, + skb_frag_size(&shinfo->frags[0])); + + } else { + inl->byte_count = cpu_to_be32(1 << 31 | spc); + if (hlen <= spc) { + skb_copy_from_linear_data(skb, inl->data, hlen); + if (hlen < spc) { + memcpy(inl->data + hlen, + fragptr, spc - hlen); + fragptr += spc - hlen; + } + inl = (void *)inl->data + spc; + memcpy(inl->data, fragptr, skb->len - spc); + } else { + skb_copy_from_linear_data(skb, inl->data, spc); + inl = (void *)inl->data + spc; + skb_copy_from_linear_data_offset(skb, spc, inl->data, + hlen - spc); + if (shinfo->nr_frags) + memcpy(inl->data + hlen - spc, + fragptr, + skb_frag_size(&shinfo->frags[0])); + } + + dma_wmb(); + inl->byte_count = cpu_to_be32(1 << 31 | (skb->len - spc)); + } +} + +u16 mlx4_en_select_queue(struct net_device *dev, struct sk_buff *skb, + struct net_device *sb_dev) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + u16 rings_p_up = priv->num_tx_rings_p_up; + + if (netdev_get_num_tc(dev)) + return netdev_pick_tx(dev, skb, NULL); + + return netdev_pick_tx(dev, skb, NULL) % rings_p_up; +} + +static void mlx4_bf_copy(void __iomem *dst, const void *src, + unsigned int bytecnt) +{ + __iowrite64_copy(dst, src, bytecnt / 8); +} + +void mlx4_en_xmit_doorbell(struct mlx4_en_tx_ring *ring) +{ + wmb(); + /* Since there is no iowrite*_native() that writes the + * value as is, without byteswapping - using the one + * the doesn't do byteswapping in the relevant arch + * endianness. + */ +#if defined(__LITTLE_ENDIAN) + iowrite32( +#else + iowrite32be( +#endif + (__force u32)ring->doorbell_qpn, ring->doorbell_address); +} + +static void mlx4_en_tx_write_desc(struct mlx4_en_tx_ring *ring, + struct mlx4_en_tx_desc *tx_desc, + union mlx4_wqe_qpn_vlan qpn_vlan, + int desc_size, int bf_index, + __be32 op_own, bool bf_ok, + bool send_doorbell) +{ + tx_desc->ctrl.qpn_vlan = qpn_vlan; + + if (bf_ok) { + op_own |= htonl((bf_index & 0xffff) << 8); + /* Ensure new descriptor hits memory + * before setting ownership of this descriptor to HW + */ + dma_wmb(); + tx_desc->ctrl.owner_opcode = op_own; + + wmb(); + + mlx4_bf_copy(ring->bf.reg + ring->bf.offset, &tx_desc->ctrl, + desc_size); + + wmb(); + + ring->bf.offset ^= ring->bf.buf_size; + } else { + /* Ensure new descriptor hits memory + * before setting ownership of this descriptor to HW + */ + dma_wmb(); + tx_desc->ctrl.owner_opcode = op_own; + if (send_doorbell) + mlx4_en_xmit_doorbell(ring); + else + ring->xmit_more++; + } +} + +static bool mlx4_en_build_dma_wqe(struct mlx4_en_priv *priv, + struct skb_shared_info *shinfo, + struct mlx4_wqe_data_seg *data, + struct sk_buff *skb, + int lso_header_size, + __be32 mr_key, + struct mlx4_en_tx_info *tx_info) +{ + struct device *ddev = priv->ddev; + dma_addr_t dma = 0; + u32 byte_count = 0; + int i_frag; + + /* Map fragments if any */ + for (i_frag = shinfo->nr_frags - 1; i_frag >= 0; i_frag--) { + const skb_frag_t *frag = &shinfo->frags[i_frag]; + byte_count = skb_frag_size(frag); + dma = skb_frag_dma_map(ddev, frag, + 0, byte_count, + DMA_TO_DEVICE); + if (dma_mapping_error(ddev, dma)) + goto tx_drop_unmap; + + data->addr = cpu_to_be64(dma); + data->lkey = mr_key; + dma_wmb(); + data->byte_count = cpu_to_be32(byte_count); + --data; + } + + /* Map linear part if needed */ + if (tx_info->linear) { + byte_count = skb_headlen(skb) - lso_header_size; + + dma = dma_map_single(ddev, skb->data + + lso_header_size, byte_count, + DMA_TO_DEVICE); + if (dma_mapping_error(ddev, dma)) + goto tx_drop_unmap; + + data->addr = cpu_to_be64(dma); + data->lkey = mr_key; + dma_wmb(); + data->byte_count = cpu_to_be32(byte_count); + } + /* tx completion can avoid cache line miss for common cases */ + tx_info->map0_dma = dma; + tx_info->map0_byte_count = byte_count; + + return true; + +tx_drop_unmap: + en_err(priv, "DMA mapping error\n"); + + while (++i_frag < shinfo->nr_frags) { + ++data; + dma_unmap_page(ddev, (dma_addr_t)be64_to_cpu(data->addr), + be32_to_cpu(data->byte_count), + DMA_TO_DEVICE); + } + + return false; +} + +netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev) +{ + struct skb_shared_info *shinfo = skb_shinfo(skb); + struct mlx4_en_priv *priv = netdev_priv(dev); + union mlx4_wqe_qpn_vlan qpn_vlan = {}; + struct mlx4_en_tx_ring *ring; + struct mlx4_en_tx_desc *tx_desc; + struct mlx4_wqe_data_seg *data; + struct mlx4_en_tx_info *tx_info; + u32 __maybe_unused ring_cons; + int tx_ind; + int nr_txbb; + int desc_size; + int real_size; + u32 index, bf_index; + struct ipv6hdr *h6; + __be32 op_own; + int lso_header_size; + void *fragptr = NULL; + bool bounce = false; + bool send_doorbell; + bool stop_queue; + bool inline_ok; + u8 data_offset; + int hopbyhop; + bool bf_ok; + + tx_ind = skb_get_queue_mapping(skb); + ring = priv->tx_ring[TX][tx_ind]; + + if (unlikely(!priv->port_up)) + goto tx_drop; + + real_size = get_real_size(skb, shinfo, dev, &lso_header_size, + &inline_ok, &fragptr, &hopbyhop); + if (unlikely(!real_size)) + goto tx_drop_count; + + /* Align descriptor to TXBB size */ + desc_size = ALIGN(real_size, TXBB_SIZE); + nr_txbb = desc_size >> LOG_TXBB_SIZE; + if (unlikely(nr_txbb > MAX_DESC_TXBBS)) { + if (netif_msg_tx_err(priv)) + en_warn(priv, "Oversized header or SG list\n"); + goto tx_drop_count; + } + + bf_ok = ring->bf_enabled; + if (skb_vlan_tag_present(skb)) { + u16 vlan_proto; + + qpn_vlan.vlan_tag = cpu_to_be16(skb_vlan_tag_get(skb)); + vlan_proto = be16_to_cpu(skb->vlan_proto); + if (vlan_proto == ETH_P_8021AD) + qpn_vlan.ins_vlan = MLX4_WQE_CTRL_INS_SVLAN; + else if (vlan_proto == ETH_P_8021Q) + qpn_vlan.ins_vlan = MLX4_WQE_CTRL_INS_CVLAN; + else + qpn_vlan.ins_vlan = 0; + bf_ok = false; + } + + netdev_txq_bql_enqueue_prefetchw(ring->tx_queue); + + /* Packet is good - grab an index and transmit it */ + index = ring->prod & ring->size_mask; + bf_index = ring->prod; + + /* See if we have enough space for whole descriptor TXBB for setting + * SW ownership on next descriptor; if not, use a bounce buffer. */ + if (likely(index + nr_txbb <= ring->size)) + tx_desc = ring->buf + (index << LOG_TXBB_SIZE); + else { + tx_desc = (struct mlx4_en_tx_desc *) ring->bounce_buf; + bounce = true; + bf_ok = false; + } + + /* Save skb in tx_info ring */ + tx_info = &ring->tx_info[index]; + tx_info->skb = skb; + tx_info->nr_txbb = nr_txbb; + + if (!lso_header_size) { + data = &tx_desc->data; + data_offset = offsetof(struct mlx4_en_tx_desc, data); + } else { + int lso_align = ALIGN(lso_header_size - hopbyhop + 4, DS_SIZE); + + data = (void *)&tx_desc->lso + lso_align; + data_offset = offsetof(struct mlx4_en_tx_desc, lso) + lso_align; + } + + /* valid only for none inline segments */ + tx_info->data_offset = data_offset; + + tx_info->inl = inline_ok; + + tx_info->linear = lso_header_size < skb_headlen(skb) && !inline_ok; + + tx_info->nr_maps = shinfo->nr_frags + tx_info->linear; + data += tx_info->nr_maps - 1; + + if (!tx_info->inl) + if (!mlx4_en_build_dma_wqe(priv, shinfo, data, skb, + lso_header_size, ring->mr_key, + tx_info)) + goto tx_drop_count; + + /* + * For timestamping add flag to skb_shinfo and + * set flag for further reference + */ + tx_info->ts_requested = 0; + if (unlikely(ring->hwtstamp_tx_type == HWTSTAMP_TX_ON && + shinfo->tx_flags & SKBTX_HW_TSTAMP)) { + shinfo->tx_flags |= SKBTX_IN_PROGRESS; + tx_info->ts_requested = 1; + } + + /* Prepare ctrl segement apart opcode+ownership, which depends on + * whether LSO is used */ + tx_desc->ctrl.srcrb_flags = priv->ctrl_flags; + if (likely(skb->ip_summed == CHECKSUM_PARTIAL)) { + if (!skb->encapsulation) + tx_desc->ctrl.srcrb_flags |= cpu_to_be32(MLX4_WQE_CTRL_IP_CSUM | + MLX4_WQE_CTRL_TCP_UDP_CSUM); + else + tx_desc->ctrl.srcrb_flags |= cpu_to_be32(MLX4_WQE_CTRL_IP_CSUM); + ring->tx_csum++; + } + + if (priv->flags & MLX4_EN_FLAG_ENABLE_HW_LOOPBACK) { + struct ethhdr *ethh; + + /* Copy dst mac address to wqe. This allows loopback in eSwitch, + * so that VFs and PF can communicate with each other + */ + ethh = (struct ethhdr *)skb->data; + tx_desc->ctrl.srcrb_flags16[0] = get_unaligned((__be16 *)ethh->h_dest); + tx_desc->ctrl.imm = get_unaligned((__be32 *)(ethh->h_dest + 2)); + } + + /* Handle LSO (TSO) packets */ + if (lso_header_size) { + int i; + + /* Mark opcode as LSO */ + op_own = cpu_to_be32(MLX4_OPCODE_LSO | (1 << 6)) | + ((ring->prod & ring->size) ? + cpu_to_be32(MLX4_EN_BIT_DESC_OWN) : 0); + + lso_header_size -= hopbyhop; + /* Fill in the LSO prefix */ + tx_desc->lso.mss_hdr_size = cpu_to_be32( + shinfo->gso_size << 16 | lso_header_size); + + + if (unlikely(hopbyhop)) { + /* remove the HBH header. + * Layout: [Ethernet header][IPv6 header][HBH][TCP header] + */ + memcpy(tx_desc->lso.header, skb->data, ETH_HLEN + sizeof(*h6)); + h6 = (struct ipv6hdr *)((char *)tx_desc->lso.header + ETH_HLEN); + h6->nexthdr = IPPROTO_TCP; + /* Copy the TCP header after the IPv6 one */ + memcpy(h6 + 1, + skb->data + ETH_HLEN + sizeof(*h6) + + sizeof(struct hop_jumbo_hdr), + tcp_hdrlen(skb)); + /* Leave ipv6 payload_len set to 0, as LSO v2 specs request. */ + } else { + /* Copy headers; + * note that we already verified that it is linear + */ + memcpy(tx_desc->lso.header, skb->data, lso_header_size); + } + ring->tso_packets++; + + i = shinfo->gso_segs; + tx_info->nr_bytes = skb->len + (i - 1) * lso_header_size; + ring->packets += i; + } else { + /* Normal (Non LSO) packet */ + op_own = cpu_to_be32(MLX4_OPCODE_SEND) | + ((ring->prod & ring->size) ? + cpu_to_be32(MLX4_EN_BIT_DESC_OWN) : 0); + tx_info->nr_bytes = max_t(unsigned int, skb->len, ETH_ZLEN); + ring->packets++; + } + ring->bytes += tx_info->nr_bytes; + + if (tx_info->inl) + build_inline_wqe(tx_desc, skb, shinfo, fragptr); + + if (skb->encapsulation) { + union { + struct iphdr *v4; + struct ipv6hdr *v6; + unsigned char *hdr; + } ip; + u8 proto; + + ip.hdr = skb_inner_network_header(skb); + proto = (ip.v4->version == 4) ? ip.v4->protocol : + ip.v6->nexthdr; + + if (proto == IPPROTO_TCP || proto == IPPROTO_UDP) + op_own |= cpu_to_be32(MLX4_WQE_CTRL_IIP | MLX4_WQE_CTRL_ILP); + else + op_own |= cpu_to_be32(MLX4_WQE_CTRL_IIP); + } + + ring->prod += nr_txbb; + + /* If we used a bounce buffer then copy descriptor back into place */ + if (unlikely(bounce)) + tx_desc = mlx4_en_bounce_to_desc(priv, ring, index, desc_size); + + skb_tx_timestamp(skb); + + /* Check available TXBBs And 2K spare for prefetch */ + stop_queue = mlx4_en_is_tx_ring_full(ring); + if (unlikely(stop_queue)) { + netif_tx_stop_queue(ring->tx_queue); + ring->queue_stopped++; + } + + send_doorbell = __netdev_tx_sent_queue(ring->tx_queue, + tx_info->nr_bytes, + netdev_xmit_more()); + + real_size = (real_size / 16) & 0x3f; + + bf_ok &= desc_size <= MAX_BF && send_doorbell; + + if (bf_ok) + qpn_vlan.bf_qpn = ring->doorbell_qpn | cpu_to_be32(real_size); + else + qpn_vlan.fence_size = real_size; + + mlx4_en_tx_write_desc(ring, tx_desc, qpn_vlan, desc_size, bf_index, + op_own, bf_ok, send_doorbell); + + if (unlikely(stop_queue)) { + /* If queue was emptied after the if (stop_queue) , and before + * the netif_tx_stop_queue() - need to wake the queue, + * or else it will remain stopped forever. + * Need a memory barrier to make sure ring->cons was not + * updated before queue was stopped. + */ + smp_rmb(); + + if (unlikely(!mlx4_en_is_tx_ring_full(ring))) { + netif_tx_wake_queue(ring->tx_queue); + ring->wake_queue++; + } + } + return NETDEV_TX_OK; + +tx_drop_count: + ring->tx_dropped++; +tx_drop: + dev_kfree_skb_any(skb); + return NETDEV_TX_OK; +} + +#define MLX4_EN_XDP_TX_NRTXBB 1 +#define MLX4_EN_XDP_TX_REAL_SZ (((CTRL_SIZE + MLX4_EN_XDP_TX_NRTXBB * DS_SIZE) \ + / 16) & 0x3f) + +void mlx4_en_init_tx_xdp_ring_descs(struct mlx4_en_priv *priv, + struct mlx4_en_tx_ring *ring) +{ + int i; + + for (i = 0; i < ring->size; i++) { + struct mlx4_en_tx_info *tx_info = &ring->tx_info[i]; + struct mlx4_en_tx_desc *tx_desc = ring->buf + + (i << LOG_TXBB_SIZE); + + tx_info->map0_byte_count = PAGE_SIZE; + tx_info->nr_txbb = MLX4_EN_XDP_TX_NRTXBB; + tx_info->data_offset = offsetof(struct mlx4_en_tx_desc, data); + tx_info->ts_requested = 0; + tx_info->nr_maps = 1; + tx_info->linear = 1; + tx_info->inl = 0; + + tx_desc->data.lkey = ring->mr_key; + tx_desc->ctrl.qpn_vlan.fence_size = MLX4_EN_XDP_TX_REAL_SZ; + tx_desc->ctrl.srcrb_flags = priv->ctrl_flags; + } +} + +netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_ring *rx_ring, + struct mlx4_en_rx_alloc *frame, + struct mlx4_en_priv *priv, unsigned int length, + int tx_ind, bool *doorbell_pending) +{ + struct mlx4_en_tx_desc *tx_desc; + struct mlx4_en_tx_info *tx_info; + struct mlx4_wqe_data_seg *data; + struct mlx4_en_tx_ring *ring; + dma_addr_t dma; + __be32 op_own; + int index; + + if (unlikely(!priv->port_up)) + goto tx_drop; + + ring = priv->tx_ring[TX_XDP][tx_ind]; + + if (unlikely(mlx4_en_is_tx_ring_full(ring))) + goto tx_drop_count; + + index = ring->prod & ring->size_mask; + tx_info = &ring->tx_info[index]; + + tx_desc = ring->buf + (index << LOG_TXBB_SIZE); + data = &tx_desc->data; + + dma = frame->dma; + + tx_info->page = frame->page; + frame->page = NULL; + tx_info->map0_dma = dma; + tx_info->nr_bytes = max_t(unsigned int, length, ETH_ZLEN); + + dma_sync_single_range_for_device(priv->ddev, dma, frame->page_offset, + length, DMA_TO_DEVICE); + + data->addr = cpu_to_be64(dma + frame->page_offset); + dma_wmb(); + data->byte_count = cpu_to_be32(length); + + /* tx completion can avoid cache line miss for common cases */ + + op_own = cpu_to_be32(MLX4_OPCODE_SEND) | + ((ring->prod & ring->size) ? + cpu_to_be32(MLX4_EN_BIT_DESC_OWN) : 0); + + rx_ring->xdp_tx++; + + ring->prod += MLX4_EN_XDP_TX_NRTXBB; + + /* Ensure new descriptor hits memory + * before setting ownership of this descriptor to HW + */ + dma_wmb(); + tx_desc->ctrl.owner_opcode = op_own; + ring->xmit_more++; + + *doorbell_pending = true; + + return NETDEV_TX_OK; + +tx_drop_count: + rx_ring->xdp_tx_full++; + *doorbell_pending = true; +tx_drop: + return NETDEV_TX_BUSY; +} diff --git a/drivers/net/ethernet/mellanox/mlx4/eq.c b/drivers/net/ethernet/mellanox/mlx4/eq.c new file mode 100644 index 000000000..414e390e6 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx4/eq.c @@ -0,0 +1,1563 @@ +/* + * Copyright (c) 2005, 2006, 2007, 2008 Mellanox Technologies. All rights reserved. + * Copyright (c) 2005, 2006, 2007 Cisco Systems, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/interrupt.h> +#include <linux/slab.h> +#include <linux/export.h> +#include <linux/mm.h> +#include <linux/dma-mapping.h> + +#include <linux/mlx4/cmd.h> +#include <linux/cpu_rmap.h> + +#include "mlx4.h" +#include "fw.h" + +enum { + MLX4_IRQNAME_SIZE = 32 +}; + +enum { + MLX4_NUM_ASYNC_EQE = 0x100, + MLX4_NUM_SPARE_EQE = 0x80, + MLX4_EQ_ENTRY_SIZE = 0x20 +}; + +#define MLX4_EQ_STATUS_OK ( 0 << 28) +#define MLX4_EQ_STATUS_WRITE_FAIL (10 << 28) +#define MLX4_EQ_OWNER_SW ( 0 << 24) +#define MLX4_EQ_OWNER_HW ( 1 << 24) +#define MLX4_EQ_FLAG_EC ( 1 << 18) +#define MLX4_EQ_FLAG_OI ( 1 << 17) +#define MLX4_EQ_STATE_ARMED ( 9 << 8) +#define MLX4_EQ_STATE_FIRED (10 << 8) +#define MLX4_EQ_STATE_ALWAYS_ARMED (11 << 8) + +#define MLX4_ASYNC_EVENT_MASK ((1ull << MLX4_EVENT_TYPE_PATH_MIG) | \ + (1ull << MLX4_EVENT_TYPE_COMM_EST) | \ + (1ull << MLX4_EVENT_TYPE_SQ_DRAINED) | \ + (1ull << MLX4_EVENT_TYPE_CQ_ERROR) | \ + (1ull << MLX4_EVENT_TYPE_WQ_CATAS_ERROR) | \ + (1ull << MLX4_EVENT_TYPE_EEC_CATAS_ERROR) | \ + (1ull << MLX4_EVENT_TYPE_PATH_MIG_FAILED) | \ + (1ull << MLX4_EVENT_TYPE_WQ_INVAL_REQ_ERROR) | \ + (1ull << MLX4_EVENT_TYPE_WQ_ACCESS_ERROR) | \ + (1ull << MLX4_EVENT_TYPE_PORT_CHANGE) | \ + (1ull << MLX4_EVENT_TYPE_ECC_DETECT) | \ + (1ull << MLX4_EVENT_TYPE_SRQ_CATAS_ERROR) | \ + (1ull << MLX4_EVENT_TYPE_SRQ_QP_LAST_WQE) | \ + (1ull << MLX4_EVENT_TYPE_SRQ_LIMIT) | \ + (1ull << MLX4_EVENT_TYPE_CMD) | \ + (1ull << MLX4_EVENT_TYPE_OP_REQUIRED) | \ + (1ull << MLX4_EVENT_TYPE_COMM_CHANNEL) | \ + (1ull << MLX4_EVENT_TYPE_FLR_EVENT) | \ + (1ull << MLX4_EVENT_TYPE_FATAL_WARNING)) + +static u64 get_async_ev_mask(struct mlx4_dev *dev) +{ + u64 async_ev_mask = MLX4_ASYNC_EVENT_MASK; + if (dev->caps.flags & MLX4_DEV_CAP_FLAG_PORT_MNG_CHG_EV) + async_ev_mask |= (1ull << MLX4_EVENT_TYPE_PORT_MNG_CHG_EVENT); + if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_RECOVERABLE_ERROR_EVENT) + async_ev_mask |= (1ull << MLX4_EVENT_TYPE_RECOVERABLE_ERROR_EVENT); + + return async_ev_mask; +} + +static void eq_set_ci(struct mlx4_eq *eq, int req_not) +{ + __raw_writel((__force u32) cpu_to_be32((eq->cons_index & 0xffffff) | + req_not << 31), + eq->doorbell); + /* We still want ordering, just not swabbing, so add a barrier */ + wmb(); +} + +static struct mlx4_eqe *get_eqe(struct mlx4_eq *eq, u32 entry, u8 eqe_factor, + u8 eqe_size) +{ + /* (entry & (eq->nent - 1)) gives us a cyclic array */ + unsigned long offset = (entry & (eq->nent - 1)) * eqe_size; + /* CX3 is capable of extending the EQE from 32 to 64 bytes with + * strides of 64B,128B and 256B. + * When 64B EQE is used, the first (in the lower addresses) + * 32 bytes in the 64 byte EQE are reserved and the next 32 bytes + * contain the legacy EQE information. + * In all other cases, the first 32B contains the legacy EQE info. + */ + return eq->page_list[offset / PAGE_SIZE].buf + (offset + (eqe_factor ? MLX4_EQ_ENTRY_SIZE : 0)) % PAGE_SIZE; +} + +static struct mlx4_eqe *next_eqe_sw(struct mlx4_eq *eq, u8 eqe_factor, u8 size) +{ + struct mlx4_eqe *eqe = get_eqe(eq, eq->cons_index, eqe_factor, size); + return !!(eqe->owner & 0x80) ^ !!(eq->cons_index & eq->nent) ? NULL : eqe; +} + +static struct mlx4_eqe *next_slave_event_eqe(struct mlx4_slave_event_eq *slave_eq) +{ + struct mlx4_eqe *eqe = + &slave_eq->event_eqe[slave_eq->cons & (SLAVE_EVENT_EQ_SIZE - 1)]; + return (!!(eqe->owner & 0x80) ^ + !!(slave_eq->cons & SLAVE_EVENT_EQ_SIZE)) ? + eqe : NULL; +} + +void mlx4_gen_slave_eqe(struct work_struct *work) +{ + struct mlx4_mfunc_master_ctx *master = + container_of(work, struct mlx4_mfunc_master_ctx, + slave_event_work); + struct mlx4_mfunc *mfunc = + container_of(master, struct mlx4_mfunc, master); + struct mlx4_priv *priv = container_of(mfunc, struct mlx4_priv, mfunc); + struct mlx4_dev *dev = &priv->dev; + struct mlx4_slave_event_eq *slave_eq = &mfunc->master.slave_eq; + struct mlx4_eqe *eqe; + u8 slave; + int i, phys_port, slave_port; + + for (eqe = next_slave_event_eqe(slave_eq); eqe; + eqe = next_slave_event_eqe(slave_eq)) { + slave = eqe->slave_id; + + if (eqe->type == MLX4_EVENT_TYPE_PORT_CHANGE && + eqe->subtype == MLX4_PORT_CHANGE_SUBTYPE_DOWN && + mlx4_is_bonded(dev)) { + struct mlx4_port_cap port_cap; + + if (!mlx4_QUERY_PORT(dev, 1, &port_cap) && port_cap.link_state) + goto consume; + + if (!mlx4_QUERY_PORT(dev, 2, &port_cap) && port_cap.link_state) + goto consume; + } + /* All active slaves need to receive the event */ + if (slave == ALL_SLAVES) { + for (i = 0; i <= dev->persist->num_vfs; i++) { + phys_port = 0; + if (eqe->type == MLX4_EVENT_TYPE_PORT_MNG_CHG_EVENT && + eqe->subtype == MLX4_DEV_PMC_SUBTYPE_PORT_INFO) { + phys_port = eqe->event.port_mgmt_change.port; + slave_port = mlx4_phys_to_slave_port(dev, i, phys_port); + if (slave_port < 0) /* VF doesn't have this port */ + continue; + eqe->event.port_mgmt_change.port = slave_port; + } + if (mlx4_GEN_EQE(dev, i, eqe)) + mlx4_warn(dev, "Failed to generate event for slave %d\n", + i); + if (phys_port) + eqe->event.port_mgmt_change.port = phys_port; + } + } else { + if (mlx4_GEN_EQE(dev, slave, eqe)) + mlx4_warn(dev, "Failed to generate event for slave %d\n", + slave); + } +consume: + ++slave_eq->cons; + } +} + + +static void slave_event(struct mlx4_dev *dev, u8 slave, struct mlx4_eqe *eqe) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_slave_event_eq *slave_eq = &priv->mfunc.master.slave_eq; + struct mlx4_eqe *s_eqe; + unsigned long flags; + + spin_lock_irqsave(&slave_eq->event_lock, flags); + s_eqe = &slave_eq->event_eqe[slave_eq->prod & (SLAVE_EVENT_EQ_SIZE - 1)]; + if ((!!(s_eqe->owner & 0x80)) ^ + (!!(slave_eq->prod & SLAVE_EVENT_EQ_SIZE))) { + mlx4_warn(dev, "Master failed to generate an EQE for slave: %d. No free EQE on slave events queue\n", + slave); + spin_unlock_irqrestore(&slave_eq->event_lock, flags); + return; + } + + memcpy(s_eqe, eqe, sizeof(struct mlx4_eqe) - 1); + s_eqe->slave_id = slave; + /* ensure all information is written before setting the ownersip bit */ + dma_wmb(); + s_eqe->owner = !!(slave_eq->prod & SLAVE_EVENT_EQ_SIZE) ? 0x0 : 0x80; + ++slave_eq->prod; + + queue_work(priv->mfunc.master.comm_wq, + &priv->mfunc.master.slave_event_work); + spin_unlock_irqrestore(&slave_eq->event_lock, flags); +} + +static void mlx4_slave_event(struct mlx4_dev *dev, int slave, + struct mlx4_eqe *eqe) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + + if (slave < 0 || slave > dev->persist->num_vfs || + slave == dev->caps.function || + !priv->mfunc.master.slave_state[slave].active) + return; + + slave_event(dev, slave, eqe); +} + +#if defined(CONFIG_SMP) +static void mlx4_set_eq_affinity_hint(struct mlx4_priv *priv, int vec) +{ + int hint_err; + struct mlx4_dev *dev = &priv->dev; + struct mlx4_eq *eq = &priv->eq_table.eq[vec]; + + if (!cpumask_available(eq->affinity_mask) || + cpumask_empty(eq->affinity_mask)) + return; + + hint_err = irq_update_affinity_hint(eq->irq, eq->affinity_mask); + if (hint_err) + mlx4_warn(dev, "irq_update_affinity_hint failed, err %d\n", hint_err); +} +#endif + +int mlx4_gen_pkey_eqe(struct mlx4_dev *dev, int slave, u8 port) +{ + struct mlx4_eqe eqe; + + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_slave_state *s_slave = &priv->mfunc.master.slave_state[slave]; + + if (!s_slave->active) + return 0; + + memset(&eqe, 0, sizeof(eqe)); + + eqe.type = MLX4_EVENT_TYPE_PORT_MNG_CHG_EVENT; + eqe.subtype = MLX4_DEV_PMC_SUBTYPE_PKEY_TABLE; + eqe.event.port_mgmt_change.port = mlx4_phys_to_slave_port(dev, slave, port); + + return mlx4_GEN_EQE(dev, slave, &eqe); +} +EXPORT_SYMBOL(mlx4_gen_pkey_eqe); + +int mlx4_gen_guid_change_eqe(struct mlx4_dev *dev, int slave, u8 port) +{ + struct mlx4_eqe eqe; + + /*don't send if we don't have the that slave */ + if (dev->persist->num_vfs < slave) + return 0; + memset(&eqe, 0, sizeof(eqe)); + + eqe.type = MLX4_EVENT_TYPE_PORT_MNG_CHG_EVENT; + eqe.subtype = MLX4_DEV_PMC_SUBTYPE_GUID_INFO; + eqe.event.port_mgmt_change.port = mlx4_phys_to_slave_port(dev, slave, port); + + return mlx4_GEN_EQE(dev, slave, &eqe); +} +EXPORT_SYMBOL(mlx4_gen_guid_change_eqe); + +int mlx4_gen_port_state_change_eqe(struct mlx4_dev *dev, int slave, u8 port, + u8 port_subtype_change) +{ + struct mlx4_eqe eqe; + u8 slave_port = mlx4_phys_to_slave_port(dev, slave, port); + + /*don't send if we don't have the that slave */ + if (dev->persist->num_vfs < slave) + return 0; + memset(&eqe, 0, sizeof(eqe)); + + eqe.type = MLX4_EVENT_TYPE_PORT_CHANGE; + eqe.subtype = port_subtype_change; + eqe.event.port_change.port = cpu_to_be32(slave_port << 28); + + mlx4_dbg(dev, "%s: sending: %d to slave: %d on port: %d\n", __func__, + port_subtype_change, slave, port); + return mlx4_GEN_EQE(dev, slave, &eqe); +} +EXPORT_SYMBOL(mlx4_gen_port_state_change_eqe); + +enum slave_port_state mlx4_get_slave_port_state(struct mlx4_dev *dev, int slave, u8 port) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_slave_state *s_state = priv->mfunc.master.slave_state; + struct mlx4_active_ports actv_ports = mlx4_get_active_ports(dev, slave); + + if (slave >= dev->num_slaves || port > dev->caps.num_ports || + port <= 0 || !test_bit(port - 1, actv_ports.ports)) { + pr_err("%s: Error: asking for slave:%d, port:%d\n", + __func__, slave, port); + return SLAVE_PORT_DOWN; + } + return s_state[slave].port_state[port]; +} +EXPORT_SYMBOL(mlx4_get_slave_port_state); + +static int mlx4_set_slave_port_state(struct mlx4_dev *dev, int slave, u8 port, + enum slave_port_state state) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_slave_state *s_state = priv->mfunc.master.slave_state; + struct mlx4_active_ports actv_ports = mlx4_get_active_ports(dev, slave); + + if (slave >= dev->num_slaves || port > dev->caps.num_ports || + port <= 0 || !test_bit(port - 1, actv_ports.ports)) { + pr_err("%s: Error: asking for slave:%d, port:%d\n", + __func__, slave, port); + return -1; + } + s_state[slave].port_state[port] = state; + + return 0; +} + +static void set_all_slave_state(struct mlx4_dev *dev, u8 port, int event) +{ + int i; + enum slave_port_gen_event gen_event; + struct mlx4_slaves_pport slaves_pport = mlx4_phys_to_slaves_pport(dev, + port); + + for (i = 0; i < dev->persist->num_vfs + 1; i++) + if (test_bit(i, slaves_pport.slaves)) + set_and_calc_slave_port_state(dev, i, port, + event, &gen_event); +} +/************************************************************************** + The function get as input the new event to that port, + and according to the prev state change the slave's port state. + The events are: + MLX4_PORT_STATE_DEV_EVENT_PORT_DOWN, + MLX4_PORT_STATE_DEV_EVENT_PORT_UP + MLX4_PORT_STATE_IB_EVENT_GID_VALID + MLX4_PORT_STATE_IB_EVENT_GID_INVALID +***************************************************************************/ +int set_and_calc_slave_port_state(struct mlx4_dev *dev, int slave, + u8 port, int event, + enum slave_port_gen_event *gen_event) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_slave_state *ctx = NULL; + unsigned long flags; + int ret = -1; + struct mlx4_active_ports actv_ports = mlx4_get_active_ports(dev, slave); + enum slave_port_state cur_state = + mlx4_get_slave_port_state(dev, slave, port); + + *gen_event = SLAVE_PORT_GEN_EVENT_NONE; + + if (slave >= dev->num_slaves || port > dev->caps.num_ports || + port <= 0 || !test_bit(port - 1, actv_ports.ports)) { + pr_err("%s: Error: asking for slave:%d, port:%d\n", + __func__, slave, port); + return ret; + } + + ctx = &priv->mfunc.master.slave_state[slave]; + spin_lock_irqsave(&ctx->lock, flags); + + switch (cur_state) { + case SLAVE_PORT_DOWN: + if (MLX4_PORT_STATE_DEV_EVENT_PORT_UP == event) + mlx4_set_slave_port_state(dev, slave, port, + SLAVE_PENDING_UP); + break; + case SLAVE_PENDING_UP: + if (MLX4_PORT_STATE_DEV_EVENT_PORT_DOWN == event) + mlx4_set_slave_port_state(dev, slave, port, + SLAVE_PORT_DOWN); + else if (MLX4_PORT_STATE_IB_PORT_STATE_EVENT_GID_VALID == event) { + mlx4_set_slave_port_state(dev, slave, port, + SLAVE_PORT_UP); + *gen_event = SLAVE_PORT_GEN_EVENT_UP; + } + break; + case SLAVE_PORT_UP: + if (MLX4_PORT_STATE_DEV_EVENT_PORT_DOWN == event) { + mlx4_set_slave_port_state(dev, slave, port, + SLAVE_PORT_DOWN); + *gen_event = SLAVE_PORT_GEN_EVENT_DOWN; + } else if (MLX4_PORT_STATE_IB_EVENT_GID_INVALID == + event) { + mlx4_set_slave_port_state(dev, slave, port, + SLAVE_PENDING_UP); + *gen_event = SLAVE_PORT_GEN_EVENT_DOWN; + } + break; + default: + pr_err("%s: BUG!!! UNKNOWN state: slave:%d, port:%d\n", + __func__, slave, port); + goto out; + } + ret = mlx4_get_slave_port_state(dev, slave, port); + +out: + spin_unlock_irqrestore(&ctx->lock, flags); + return ret; +} + +EXPORT_SYMBOL(set_and_calc_slave_port_state); + +int mlx4_gen_slaves_port_mgt_ev(struct mlx4_dev *dev, u8 port, int attr) +{ + struct mlx4_eqe eqe; + + memset(&eqe, 0, sizeof(eqe)); + + eqe.type = MLX4_EVENT_TYPE_PORT_MNG_CHG_EVENT; + eqe.subtype = MLX4_DEV_PMC_SUBTYPE_PORT_INFO; + eqe.event.port_mgmt_change.port = port; + eqe.event.port_mgmt_change.params.port_info.changed_attr = + cpu_to_be32((u32) attr); + + slave_event(dev, ALL_SLAVES, &eqe); + return 0; +} +EXPORT_SYMBOL(mlx4_gen_slaves_port_mgt_ev); + +void mlx4_master_handle_slave_flr(struct work_struct *work) +{ + struct mlx4_mfunc_master_ctx *master = + container_of(work, struct mlx4_mfunc_master_ctx, + slave_flr_event_work); + struct mlx4_mfunc *mfunc = + container_of(master, struct mlx4_mfunc, master); + struct mlx4_priv *priv = + container_of(mfunc, struct mlx4_priv, mfunc); + struct mlx4_dev *dev = &priv->dev; + struct mlx4_slave_state *slave_state = priv->mfunc.master.slave_state; + int i; + int err; + unsigned long flags; + + mlx4_dbg(dev, "mlx4_handle_slave_flr\n"); + + for (i = 0 ; i < dev->num_slaves; i++) { + + if (MLX4_COMM_CMD_FLR == slave_state[i].last_cmd) { + mlx4_dbg(dev, "mlx4_handle_slave_flr: clean slave: %d\n", + i); + /* In case of 'Reset flow' FLR can be generated for + * a slave before mlx4_load_one is done. + * make sure interface is up before trying to delete + * slave resources which weren't allocated yet. + */ + if (dev->persist->interface_state & + MLX4_INTERFACE_STATE_UP) + mlx4_delete_all_resources_for_slave(dev, i); + /*return the slave to running mode*/ + spin_lock_irqsave(&priv->mfunc.master.slave_state_lock, flags); + slave_state[i].last_cmd = MLX4_COMM_CMD_RESET; + slave_state[i].is_slave_going_down = 0; + spin_unlock_irqrestore(&priv->mfunc.master.slave_state_lock, flags); + /*notify the FW:*/ + err = mlx4_cmd(dev, 0, i, 0, MLX4_CMD_INFORM_FLR_DONE, + MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); + if (err) + mlx4_warn(dev, "Failed to notify FW on FLR done (slave:%d)\n", + i); + } + } +} + +static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_eqe *eqe; + int cqn; + int eqes_found = 0; + int set_ci = 0; + int port; + int slave = 0; + int ret; + u32 flr_slave; + u8 update_slave_state; + int i; + enum slave_port_gen_event gen_event; + unsigned long flags; + struct mlx4_vport_state *s_info; + int eqe_size = dev->caps.eqe_size; + + while ((eqe = next_eqe_sw(eq, dev->caps.eqe_factor, eqe_size))) { + /* + * Make sure we read EQ entry contents after we've + * checked the ownership bit. + */ + dma_rmb(); + + switch (eqe->type) { + case MLX4_EVENT_TYPE_COMP: + cqn = be32_to_cpu(eqe->event.comp.cqn) & 0xffffff; + mlx4_cq_completion(dev, cqn); + break; + + case MLX4_EVENT_TYPE_PATH_MIG: + case MLX4_EVENT_TYPE_COMM_EST: + case MLX4_EVENT_TYPE_SQ_DRAINED: + case MLX4_EVENT_TYPE_SRQ_QP_LAST_WQE: + case MLX4_EVENT_TYPE_WQ_CATAS_ERROR: + case MLX4_EVENT_TYPE_PATH_MIG_FAILED: + case MLX4_EVENT_TYPE_WQ_INVAL_REQ_ERROR: + case MLX4_EVENT_TYPE_WQ_ACCESS_ERROR: + mlx4_dbg(dev, "event %d arrived\n", eqe->type); + if (mlx4_is_master(dev)) { + /* forward only to slave owning the QP */ + ret = mlx4_get_slave_from_resource_id(dev, + RES_QP, + be32_to_cpu(eqe->event.qp.qpn) + & 0xffffff, &slave); + if (ret && ret != -ENOENT) { + mlx4_dbg(dev, "QP event %02x(%02x) on EQ %d at index %u: could not get slave id (%d)\n", + eqe->type, eqe->subtype, + eq->eqn, eq->cons_index, ret); + break; + } + + if (!ret && slave != dev->caps.function) { + mlx4_slave_event(dev, slave, eqe); + break; + } + + } + mlx4_qp_event(dev, be32_to_cpu(eqe->event.qp.qpn) & + 0xffffff, eqe->type); + break; + + case MLX4_EVENT_TYPE_SRQ_LIMIT: + mlx4_dbg(dev, "%s: MLX4_EVENT_TYPE_SRQ_LIMIT. srq_no=0x%x, eq 0x%x\n", + __func__, be32_to_cpu(eqe->event.srq.srqn), + eq->eqn); + fallthrough; + case MLX4_EVENT_TYPE_SRQ_CATAS_ERROR: + if (mlx4_is_master(dev)) { + /* forward only to slave owning the SRQ */ + ret = mlx4_get_slave_from_resource_id(dev, + RES_SRQ, + be32_to_cpu(eqe->event.srq.srqn) + & 0xffffff, + &slave); + if (ret && ret != -ENOENT) { + mlx4_warn(dev, "SRQ event %02x(%02x) on EQ %d at index %u: could not get slave id (%d)\n", + eqe->type, eqe->subtype, + eq->eqn, eq->cons_index, ret); + break; + } + if (eqe->type == + MLX4_EVENT_TYPE_SRQ_CATAS_ERROR) + mlx4_warn(dev, "%s: slave:%d, srq_no:0x%x, event: %02x(%02x)\n", + __func__, slave, + be32_to_cpu(eqe->event.srq.srqn), + eqe->type, eqe->subtype); + + if (!ret && slave != dev->caps.function) { + if (eqe->type == + MLX4_EVENT_TYPE_SRQ_CATAS_ERROR) + mlx4_warn(dev, "%s: sending event %02x(%02x) to slave:%d\n", + __func__, eqe->type, + eqe->subtype, slave); + mlx4_slave_event(dev, slave, eqe); + break; + } + } + mlx4_srq_event(dev, be32_to_cpu(eqe->event.srq.srqn) & + 0xffffff, eqe->type); + break; + + case MLX4_EVENT_TYPE_CMD: + mlx4_cmd_event(dev, + be16_to_cpu(eqe->event.cmd.token), + eqe->event.cmd.status, + be64_to_cpu(eqe->event.cmd.out_param)); + break; + + case MLX4_EVENT_TYPE_PORT_CHANGE: { + struct mlx4_slaves_pport slaves_port; + port = be32_to_cpu(eqe->event.port_change.port) >> 28; + slaves_port = mlx4_phys_to_slaves_pport(dev, port); + if (eqe->subtype == MLX4_PORT_CHANGE_SUBTYPE_DOWN) { + mlx4_dispatch_event(dev, MLX4_DEV_EVENT_PORT_DOWN, + port); + mlx4_priv(dev)->sense.do_sense_port[port] = 1; + if (!mlx4_is_master(dev)) + break; + for (i = 0; i < dev->persist->num_vfs + 1; + i++) { + int reported_port = mlx4_is_bonded(dev) ? 1 : mlx4_phys_to_slave_port(dev, i, port); + + if (!test_bit(i, slaves_port.slaves) && !mlx4_is_bonded(dev)) + continue; + if (dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH) { + if (i == mlx4_master_func_num(dev)) + continue; + mlx4_dbg(dev, "%s: Sending MLX4_PORT_CHANGE_SUBTYPE_DOWN to slave: %d, port:%d\n", + __func__, i, port); + s_info = &priv->mfunc.master.vf_oper[i].vport[port].state; + if (IFLA_VF_LINK_STATE_AUTO == s_info->link_state) { + eqe->event.port_change.port = + cpu_to_be32( + (be32_to_cpu(eqe->event.port_change.port) & 0xFFFFFFF) + | (reported_port << 28)); + mlx4_slave_event(dev, i, eqe); + } + } else { /* IB port */ + set_and_calc_slave_port_state(dev, i, port, + MLX4_PORT_STATE_DEV_EVENT_PORT_DOWN, + &gen_event); + /*we can be in pending state, then do not send port_down event*/ + if (SLAVE_PORT_GEN_EVENT_DOWN == gen_event) { + if (i == mlx4_master_func_num(dev)) + continue; + eqe->event.port_change.port = + cpu_to_be32( + (be32_to_cpu(eqe->event.port_change.port) & 0xFFFFFFF) + | (mlx4_phys_to_slave_port(dev, i, port) << 28)); + mlx4_slave_event(dev, i, eqe); + } + } + } + } else { + mlx4_dispatch_event(dev, MLX4_DEV_EVENT_PORT_UP, port); + + mlx4_priv(dev)->sense.do_sense_port[port] = 0; + + if (!mlx4_is_master(dev)) + break; + if (dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH) + for (i = 0; + i < dev->persist->num_vfs + 1; + i++) { + int reported_port = mlx4_is_bonded(dev) ? 1 : mlx4_phys_to_slave_port(dev, i, port); + + if (!test_bit(i, slaves_port.slaves) && !mlx4_is_bonded(dev)) + continue; + if (i == mlx4_master_func_num(dev)) + continue; + s_info = &priv->mfunc.master.vf_oper[i].vport[port].state; + if (IFLA_VF_LINK_STATE_AUTO == s_info->link_state) { + eqe->event.port_change.port = + cpu_to_be32( + (be32_to_cpu(eqe->event.port_change.port) & 0xFFFFFFF) + | (reported_port << 28)); + mlx4_slave_event(dev, i, eqe); + } + } + else /* IB port */ + /* port-up event will be sent to a slave when the + * slave's alias-guid is set. This is done in alias_GUID.c + */ + set_all_slave_state(dev, port, MLX4_DEV_EVENT_PORT_UP); + } + break; + } + + case MLX4_EVENT_TYPE_CQ_ERROR: + mlx4_warn(dev, "CQ %s on CQN %06x\n", + eqe->event.cq_err.syndrome == 1 ? + "overrun" : "access violation", + be32_to_cpu(eqe->event.cq_err.cqn) & 0xffffff); + if (mlx4_is_master(dev)) { + ret = mlx4_get_slave_from_resource_id(dev, + RES_CQ, + be32_to_cpu(eqe->event.cq_err.cqn) + & 0xffffff, &slave); + if (ret && ret != -ENOENT) { + mlx4_dbg(dev, "CQ event %02x(%02x) on EQ %d at index %u: could not get slave id (%d)\n", + eqe->type, eqe->subtype, + eq->eqn, eq->cons_index, ret); + break; + } + + if (!ret && slave != dev->caps.function) { + mlx4_slave_event(dev, slave, eqe); + break; + } + } + mlx4_cq_event(dev, + be32_to_cpu(eqe->event.cq_err.cqn) + & 0xffffff, + eqe->type); + break; + + case MLX4_EVENT_TYPE_EQ_OVERFLOW: + mlx4_warn(dev, "EQ overrun on EQN %d\n", eq->eqn); + break; + + case MLX4_EVENT_TYPE_OP_REQUIRED: + atomic_inc(&priv->opreq_count); + /* FW commands can't be executed from interrupt context + * working in deferred task + */ + queue_work(mlx4_wq, &priv->opreq_task); + break; + + case MLX4_EVENT_TYPE_COMM_CHANNEL: + if (!mlx4_is_master(dev)) { + mlx4_warn(dev, "Received comm channel event for non master device\n"); + break; + } + memcpy(&priv->mfunc.master.comm_arm_bit_vector, + eqe->event.comm_channel_arm.bit_vec, + sizeof(eqe->event.comm_channel_arm.bit_vec)); + queue_work(priv->mfunc.master.comm_wq, + &priv->mfunc.master.comm_work); + break; + + case MLX4_EVENT_TYPE_FLR_EVENT: + flr_slave = be32_to_cpu(eqe->event.flr_event.slave_id); + if (!mlx4_is_master(dev)) { + mlx4_warn(dev, "Non-master function received FLR event\n"); + break; + } + + mlx4_dbg(dev, "FLR event for slave: %d\n", flr_slave); + + if (flr_slave >= dev->num_slaves) { + mlx4_warn(dev, + "Got FLR for unknown function: %d\n", + flr_slave); + update_slave_state = 0; + } else + update_slave_state = 1; + + spin_lock_irqsave(&priv->mfunc.master.slave_state_lock, flags); + if (update_slave_state) { + priv->mfunc.master.slave_state[flr_slave].active = false; + priv->mfunc.master.slave_state[flr_slave].last_cmd = MLX4_COMM_CMD_FLR; + priv->mfunc.master.slave_state[flr_slave].is_slave_going_down = 1; + } + spin_unlock_irqrestore(&priv->mfunc.master.slave_state_lock, flags); + mlx4_dispatch_event(dev, MLX4_DEV_EVENT_SLAVE_SHUTDOWN, + flr_slave); + queue_work(priv->mfunc.master.comm_wq, + &priv->mfunc.master.slave_flr_event_work); + break; + + case MLX4_EVENT_TYPE_FATAL_WARNING: + if (eqe->subtype == MLX4_FATAL_WARNING_SUBTYPE_WARMING) { + if (mlx4_is_master(dev)) + for (i = 0; i < dev->num_slaves; i++) { + mlx4_dbg(dev, "%s: Sending MLX4_FATAL_WARNING_SUBTYPE_WARMING to slave: %d\n", + __func__, i); + if (i == dev->caps.function) + continue; + mlx4_slave_event(dev, i, eqe); + } + mlx4_err(dev, "Temperature Threshold was reached! Threshold: %d celsius degrees; Current Temperature: %d\n", + be16_to_cpu(eqe->event.warming.warning_threshold), + be16_to_cpu(eqe->event.warming.current_temperature)); + } else + mlx4_warn(dev, "Unhandled event FATAL WARNING (%02x), subtype %02x on EQ %d at index %u. owner=%x, nent=0x%x, slave=%x, ownership=%s\n", + eqe->type, eqe->subtype, eq->eqn, + eq->cons_index, eqe->owner, eq->nent, + eqe->slave_id, + !!(eqe->owner & 0x80) ^ + !!(eq->cons_index & eq->nent) ? "HW" : "SW"); + + break; + + case MLX4_EVENT_TYPE_PORT_MNG_CHG_EVENT: + mlx4_dispatch_event(dev, MLX4_DEV_EVENT_PORT_MGMT_CHANGE, + (unsigned long) eqe); + break; + + case MLX4_EVENT_TYPE_RECOVERABLE_ERROR_EVENT: + switch (eqe->subtype) { + case MLX4_RECOVERABLE_ERROR_EVENT_SUBTYPE_BAD_CABLE: + mlx4_warn(dev, "Bad cable detected on port %u\n", + eqe->event.bad_cable.port); + break; + case MLX4_RECOVERABLE_ERROR_EVENT_SUBTYPE_UNSUPPORTED_CABLE: + mlx4_warn(dev, "Unsupported cable detected\n"); + break; + default: + mlx4_dbg(dev, + "Unhandled recoverable error event detected: %02x(%02x) on EQ %d at index %u. owner=%x, nent=0x%x, ownership=%s\n", + eqe->type, eqe->subtype, eq->eqn, + eq->cons_index, eqe->owner, eq->nent, + !!(eqe->owner & 0x80) ^ + !!(eq->cons_index & eq->nent) ? "HW" : "SW"); + break; + } + break; + + case MLX4_EVENT_TYPE_EEC_CATAS_ERROR: + case MLX4_EVENT_TYPE_ECC_DETECT: + default: + mlx4_warn(dev, "Unhandled event %02x(%02x) on EQ %d at index %u. owner=%x, nent=0x%x, slave=%x, ownership=%s\n", + eqe->type, eqe->subtype, eq->eqn, + eq->cons_index, eqe->owner, eq->nent, + eqe->slave_id, + !!(eqe->owner & 0x80) ^ + !!(eq->cons_index & eq->nent) ? "HW" : "SW"); + break; + } + + ++eq->cons_index; + eqes_found = 1; + ++set_ci; + + /* + * The HCA will think the queue has overflowed if we + * don't tell it we've been processing events. We + * create our EQs with MLX4_NUM_SPARE_EQE extra + * entries, so we must update our consumer index at + * least that often. + */ + if (unlikely(set_ci >= MLX4_NUM_SPARE_EQE)) { + eq_set_ci(eq, 0); + set_ci = 0; + } + } + + eq_set_ci(eq, 1); + + return eqes_found; +} + +static irqreturn_t mlx4_interrupt(int irq, void *dev_ptr) +{ + struct mlx4_dev *dev = dev_ptr; + struct mlx4_priv *priv = mlx4_priv(dev); + int work = 0; + int i; + + writel(priv->eq_table.clr_mask, priv->eq_table.clr_int); + + for (i = 0; i < dev->caps.num_comp_vectors + 1; ++i) + work |= mlx4_eq_int(dev, &priv->eq_table.eq[i]); + + return IRQ_RETVAL(work); +} + +static irqreturn_t mlx4_msi_x_interrupt(int irq, void *eq_ptr) +{ + struct mlx4_eq *eq = eq_ptr; + struct mlx4_dev *dev = eq->dev; + + mlx4_eq_int(dev, eq); + + /* MSI-X vectors always belong to us */ + return IRQ_HANDLED; +} + +int mlx4_MAP_EQ_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_slave_event_eq_info *event_eq = + priv->mfunc.master.slave_state[slave].event_eq; + u32 in_modifier = vhcr->in_modifier; + u32 eqn = in_modifier & 0x3FF; + u64 in_param = vhcr->in_param; + int err = 0; + int i; + + if (slave == dev->caps.function) + err = mlx4_cmd(dev, in_param, (in_modifier & 0x80000000) | eqn, + 0, MLX4_CMD_MAP_EQ, MLX4_CMD_TIME_CLASS_B, + MLX4_CMD_NATIVE); + if (!err) + for (i = 0; i < MLX4_EVENT_TYPES_NUM; ++i) + if (in_param & (1LL << i)) + event_eq[i].eqn = in_modifier >> 31 ? -1 : eqn; + + return err; +} + +static int mlx4_MAP_EQ(struct mlx4_dev *dev, u64 event_mask, int unmap, + int eq_num) +{ + return mlx4_cmd(dev, event_mask, (unmap << 31) | eq_num, + 0, MLX4_CMD_MAP_EQ, MLX4_CMD_TIME_CLASS_B, + MLX4_CMD_WRAPPED); +} + +static int mlx4_SW2HW_EQ(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox, + int eq_num) +{ + return mlx4_cmd(dev, mailbox->dma, eq_num, 0, + MLX4_CMD_SW2HW_EQ, MLX4_CMD_TIME_CLASS_A, + MLX4_CMD_WRAPPED); +} + +static int mlx4_HW2SW_EQ(struct mlx4_dev *dev, int eq_num) +{ + return mlx4_cmd(dev, 0, eq_num, 1, MLX4_CMD_HW2SW_EQ, + MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); +} + +static int mlx4_num_eq_uar(struct mlx4_dev *dev) +{ + /* + * Each UAR holds 4 EQ doorbells. To figure out how many UARs + * we need to map, take the difference of highest index and + * the lowest index we'll use and add 1. + */ + return (dev->caps.num_comp_vectors + 1 + dev->caps.reserved_eqs) / 4 - + dev->caps.reserved_eqs / 4 + 1; +} + +static void __iomem *mlx4_get_eq_uar(struct mlx4_dev *dev, struct mlx4_eq *eq) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + int index; + + index = eq->eqn / 4 - dev->caps.reserved_eqs / 4; + + if (!priv->eq_table.uar_map[index]) { + priv->eq_table.uar_map[index] = + ioremap( + pci_resource_start(dev->persist->pdev, 2) + + ((eq->eqn / 4) << (dev->uar_page_shift)), + (1 << (dev->uar_page_shift))); + if (!priv->eq_table.uar_map[index]) { + mlx4_err(dev, "Couldn't map EQ doorbell for EQN 0x%06x\n", + eq->eqn); + return NULL; + } + } + + return priv->eq_table.uar_map[index] + 0x800 + 8 * (eq->eqn % 4); +} + +static void mlx4_unmap_uar(struct mlx4_dev *dev) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + int i; + + for (i = 0; i < mlx4_num_eq_uar(dev); ++i) + if (priv->eq_table.uar_map[i]) { + iounmap(priv->eq_table.uar_map[i]); + priv->eq_table.uar_map[i] = NULL; + } +} + +static int mlx4_create_eq(struct mlx4_dev *dev, int nent, + u8 intr, struct mlx4_eq *eq) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_cmd_mailbox *mailbox; + struct mlx4_eq_context *eq_context; + int npages; + u64 *dma_list = NULL; + dma_addr_t t; + u64 mtt_addr; + int err = -ENOMEM; + int i; + + eq->dev = dev; + eq->nent = roundup_pow_of_two(max(nent, 2)); + /* CX3 is capable of extending the CQE/EQE from 32 to 64 bytes, with + * strides of 64B,128B and 256B. + */ + npages = PAGE_ALIGN(eq->nent * dev->caps.eqe_size) / PAGE_SIZE; + + eq->page_list = kmalloc_array(npages, sizeof(*eq->page_list), + GFP_KERNEL); + if (!eq->page_list) + goto err_out; + + for (i = 0; i < npages; ++i) + eq->page_list[i].buf = NULL; + + dma_list = kmalloc_array(npages, sizeof(*dma_list), GFP_KERNEL); + if (!dma_list) + goto err_out_free; + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) + goto err_out_free; + eq_context = mailbox->buf; + + for (i = 0; i < npages; ++i) { + eq->page_list[i].buf = dma_alloc_coherent(&dev->persist-> + pdev->dev, + PAGE_SIZE, &t, + GFP_KERNEL); + if (!eq->page_list[i].buf) + goto err_out_free_pages; + + dma_list[i] = t; + eq->page_list[i].map = t; + } + + eq->eqn = mlx4_bitmap_alloc(&priv->eq_table.bitmap); + if (eq->eqn == -1) + goto err_out_free_pages; + + eq->doorbell = mlx4_get_eq_uar(dev, eq); + if (!eq->doorbell) { + err = -ENOMEM; + goto err_out_free_eq; + } + + err = mlx4_mtt_init(dev, npages, PAGE_SHIFT, &eq->mtt); + if (err) + goto err_out_free_eq; + + err = mlx4_write_mtt(dev, &eq->mtt, 0, npages, dma_list); + if (err) + goto err_out_free_mtt; + + eq_context->flags = cpu_to_be32(MLX4_EQ_STATUS_OK | + MLX4_EQ_STATE_ARMED); + eq_context->log_eq_size = ilog2(eq->nent); + eq_context->intr = intr; + eq_context->log_page_size = PAGE_SHIFT - MLX4_ICM_PAGE_SHIFT; + + mtt_addr = mlx4_mtt_addr(dev, &eq->mtt); + eq_context->mtt_base_addr_h = mtt_addr >> 32; + eq_context->mtt_base_addr_l = cpu_to_be32(mtt_addr & 0xffffffff); + + err = mlx4_SW2HW_EQ(dev, mailbox, eq->eqn); + if (err) { + mlx4_warn(dev, "SW2HW_EQ failed (%d)\n", err); + goto err_out_free_mtt; + } + + kfree(dma_list); + mlx4_free_cmd_mailbox(dev, mailbox); + + eq->cons_index = 0; + + INIT_LIST_HEAD(&eq->tasklet_ctx.list); + INIT_LIST_HEAD(&eq->tasklet_ctx.process_list); + spin_lock_init(&eq->tasklet_ctx.lock); + tasklet_setup(&eq->tasklet_ctx.task, mlx4_cq_tasklet_cb); + + return err; + +err_out_free_mtt: + mlx4_mtt_cleanup(dev, &eq->mtt); + +err_out_free_eq: + mlx4_bitmap_free(&priv->eq_table.bitmap, eq->eqn, MLX4_USE_RR); + +err_out_free_pages: + for (i = 0; i < npages; ++i) + if (eq->page_list[i].buf) + dma_free_coherent(&dev->persist->pdev->dev, PAGE_SIZE, + eq->page_list[i].buf, + eq->page_list[i].map); + + mlx4_free_cmd_mailbox(dev, mailbox); + +err_out_free: + kfree(eq->page_list); + kfree(dma_list); + +err_out: + return err; +} + +static void mlx4_free_eq(struct mlx4_dev *dev, + struct mlx4_eq *eq) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + int err; + int i; + /* CX3 is capable of extending the CQE/EQE from 32 to 64 bytes, with + * strides of 64B,128B and 256B + */ + int npages = PAGE_ALIGN(dev->caps.eqe_size * eq->nent) / PAGE_SIZE; + + err = mlx4_HW2SW_EQ(dev, eq->eqn); + if (err) + mlx4_warn(dev, "HW2SW_EQ failed (%d)\n", err); + + synchronize_irq(eq->irq); + tasklet_disable(&eq->tasklet_ctx.task); + + mlx4_mtt_cleanup(dev, &eq->mtt); + for (i = 0; i < npages; ++i) + dma_free_coherent(&dev->persist->pdev->dev, PAGE_SIZE, + eq->page_list[i].buf, + eq->page_list[i].map); + + kfree(eq->page_list); + mlx4_bitmap_free(&priv->eq_table.bitmap, eq->eqn, MLX4_USE_RR); +} + +static void mlx4_free_irqs(struct mlx4_dev *dev) +{ + struct mlx4_eq_table *eq_table = &mlx4_priv(dev)->eq_table; + int i; + + if (eq_table->have_irq) + free_irq(dev->persist->pdev->irq, dev); + + for (i = 0; i < dev->caps.num_comp_vectors + 1; ++i) + if (eq_table->eq[i].have_irq) { + free_cpumask_var(eq_table->eq[i].affinity_mask); + irq_update_affinity_hint(eq_table->eq[i].irq, NULL); + free_irq(eq_table->eq[i].irq, eq_table->eq + i); + eq_table->eq[i].have_irq = 0; + } + + kfree(eq_table->irq_names); +} + +static int mlx4_map_clr_int(struct mlx4_dev *dev) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + + priv->clr_base = ioremap(pci_resource_start(dev->persist->pdev, + priv->fw.clr_int_bar) + + priv->fw.clr_int_base, MLX4_CLR_INT_SIZE); + if (!priv->clr_base) { + mlx4_err(dev, "Couldn't map interrupt clear register, aborting\n"); + return -ENOMEM; + } + + return 0; +} + +static void mlx4_unmap_clr_int(struct mlx4_dev *dev) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + + iounmap(priv->clr_base); +} + +int mlx4_alloc_eq_table(struct mlx4_dev *dev) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + + priv->eq_table.eq = kcalloc(dev->caps.num_eqs - dev->caps.reserved_eqs, + sizeof(*priv->eq_table.eq), GFP_KERNEL); + if (!priv->eq_table.eq) + return -ENOMEM; + + return 0; +} + +void mlx4_free_eq_table(struct mlx4_dev *dev) +{ + kfree(mlx4_priv(dev)->eq_table.eq); +} + +int mlx4_init_eq_table(struct mlx4_dev *dev) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + int err; + int i; + + priv->eq_table.uar_map = kcalloc(mlx4_num_eq_uar(dev), + sizeof(*priv->eq_table.uar_map), + GFP_KERNEL); + if (!priv->eq_table.uar_map) { + err = -ENOMEM; + goto err_out_free; + } + + err = mlx4_bitmap_init(&priv->eq_table.bitmap, + roundup_pow_of_two(dev->caps.num_eqs), + dev->caps.num_eqs - 1, + dev->caps.reserved_eqs, + roundup_pow_of_two(dev->caps.num_eqs) - + dev->caps.num_eqs); + if (err) + goto err_out_free; + + for (i = 0; i < mlx4_num_eq_uar(dev); ++i) + priv->eq_table.uar_map[i] = NULL; + + if (!mlx4_is_slave(dev)) { + err = mlx4_map_clr_int(dev); + if (err) + goto err_out_bitmap; + + priv->eq_table.clr_mask = + swab32(1 << (priv->eq_table.inta_pin & 31)); + priv->eq_table.clr_int = priv->clr_base + + (priv->eq_table.inta_pin < 32 ? 4 : 0); + } + + priv->eq_table.irq_names = + kmalloc_array(MLX4_IRQNAME_SIZE, + (dev->caps.num_comp_vectors + 1), + GFP_KERNEL); + if (!priv->eq_table.irq_names) { + err = -ENOMEM; + goto err_out_clr_int; + } + + for (i = 0; i < dev->caps.num_comp_vectors + 1; ++i) { + if (i == MLX4_EQ_ASYNC) { + err = mlx4_create_eq(dev, + MLX4_NUM_ASYNC_EQE + MLX4_NUM_SPARE_EQE, + 0, &priv->eq_table.eq[MLX4_EQ_ASYNC]); + } else { + struct mlx4_eq *eq = &priv->eq_table.eq[i]; +#ifdef CONFIG_RFS_ACCEL + int port = find_first_bit(eq->actv_ports.ports, + dev->caps.num_ports) + 1; + + if (port <= dev->caps.num_ports) { + struct mlx4_port_info *info = + &mlx4_priv(dev)->port[port]; + + if (!info->rmap) { + info->rmap = alloc_irq_cpu_rmap( + mlx4_get_eqs_per_port(dev, port)); + if (!info->rmap) { + mlx4_warn(dev, "Failed to allocate cpu rmap\n"); + err = -ENOMEM; + goto err_out_unmap; + } + } + + err = irq_cpu_rmap_add( + info->rmap, eq->irq); + if (err) + mlx4_warn(dev, "Failed adding irq rmap\n"); + } +#endif + err = mlx4_create_eq(dev, dev->quotas.cq + + MLX4_NUM_SPARE_EQE, + (dev->flags & MLX4_FLAG_MSI_X) ? + i + 1 - !!(i > MLX4_EQ_ASYNC) : 0, + eq); + } + if (err) + goto err_out_unmap; + } + + if (dev->flags & MLX4_FLAG_MSI_X) { + const char *eq_name; + + snprintf(priv->eq_table.irq_names + + MLX4_EQ_ASYNC * MLX4_IRQNAME_SIZE, + MLX4_IRQNAME_SIZE, + "mlx4-async@pci:%s", + pci_name(dev->persist->pdev)); + eq_name = priv->eq_table.irq_names + + MLX4_EQ_ASYNC * MLX4_IRQNAME_SIZE; + + err = request_irq(priv->eq_table.eq[MLX4_EQ_ASYNC].irq, + mlx4_msi_x_interrupt, 0, eq_name, + priv->eq_table.eq + MLX4_EQ_ASYNC); + if (err) + goto err_out_unmap; + + priv->eq_table.eq[MLX4_EQ_ASYNC].have_irq = 1; + } else { + snprintf(priv->eq_table.irq_names, + MLX4_IRQNAME_SIZE, + DRV_NAME "@pci:%s", + pci_name(dev->persist->pdev)); + err = request_irq(dev->persist->pdev->irq, mlx4_interrupt, + IRQF_SHARED, priv->eq_table.irq_names, dev); + if (err) + goto err_out_unmap; + + priv->eq_table.have_irq = 1; + } + + err = mlx4_MAP_EQ(dev, get_async_ev_mask(dev), 0, + priv->eq_table.eq[MLX4_EQ_ASYNC].eqn); + if (err) + mlx4_warn(dev, "MAP_EQ for async EQ %d failed (%d)\n", + priv->eq_table.eq[MLX4_EQ_ASYNC].eqn, err); + + /* arm ASYNC eq */ + eq_set_ci(&priv->eq_table.eq[MLX4_EQ_ASYNC], 1); + + return 0; + +err_out_unmap: + while (i > 0) + mlx4_free_eq(dev, &priv->eq_table.eq[--i]); +#ifdef CONFIG_RFS_ACCEL + for (i = 1; i <= dev->caps.num_ports; i++) { + if (mlx4_priv(dev)->port[i].rmap) { + free_irq_cpu_rmap(mlx4_priv(dev)->port[i].rmap); + mlx4_priv(dev)->port[i].rmap = NULL; + } + } +#endif + mlx4_free_irqs(dev); + +err_out_clr_int: + if (!mlx4_is_slave(dev)) + mlx4_unmap_clr_int(dev); + +err_out_bitmap: + mlx4_unmap_uar(dev); + mlx4_bitmap_cleanup(&priv->eq_table.bitmap); + +err_out_free: + kfree(priv->eq_table.uar_map); + + return err; +} + +void mlx4_cleanup_eq_table(struct mlx4_dev *dev) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + int i; + + mlx4_MAP_EQ(dev, get_async_ev_mask(dev), 1, + priv->eq_table.eq[MLX4_EQ_ASYNC].eqn); + +#ifdef CONFIG_RFS_ACCEL + for (i = 1; i <= dev->caps.num_ports; i++) { + if (mlx4_priv(dev)->port[i].rmap) { + free_irq_cpu_rmap(mlx4_priv(dev)->port[i].rmap); + mlx4_priv(dev)->port[i].rmap = NULL; + } + } +#endif + mlx4_free_irqs(dev); + + for (i = 0; i < dev->caps.num_comp_vectors + 1; ++i) + mlx4_free_eq(dev, &priv->eq_table.eq[i]); + + if (!mlx4_is_slave(dev)) + mlx4_unmap_clr_int(dev); + + mlx4_unmap_uar(dev); + mlx4_bitmap_cleanup(&priv->eq_table.bitmap); + + kfree(priv->eq_table.uar_map); +} + +/* A test that verifies that we can accept interrupts + * on the vector allocated for asynchronous events + */ +int mlx4_test_async(struct mlx4_dev *dev) +{ + return mlx4_NOP(dev); +} +EXPORT_SYMBOL(mlx4_test_async); + +/* A test that verifies that we can accept interrupts + * on the given irq vector of the tested port. + * Interrupts are checked using the NOP command. + */ +int mlx4_test_interrupt(struct mlx4_dev *dev, int vector) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + int err; + + /* Temporary use polling for command completions */ + mlx4_cmd_use_polling(dev); + + /* Map the new eq to handle all asynchronous events */ + err = mlx4_MAP_EQ(dev, get_async_ev_mask(dev), 0, + priv->eq_table.eq[MLX4_CQ_TO_EQ_VECTOR(vector)].eqn); + if (err) { + mlx4_warn(dev, "Failed mapping eq for interrupt test\n"); + goto out; + } + + /* Go back to using events */ + mlx4_cmd_use_events(dev); + err = mlx4_NOP(dev); + + /* Return to default */ + mlx4_cmd_use_polling(dev); +out: + mlx4_MAP_EQ(dev, get_async_ev_mask(dev), 0, + priv->eq_table.eq[MLX4_EQ_ASYNC].eqn); + mlx4_cmd_use_events(dev); + + return err; +} +EXPORT_SYMBOL(mlx4_test_interrupt); + +bool mlx4_is_eq_vector_valid(struct mlx4_dev *dev, u8 port, int vector) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + + vector = MLX4_CQ_TO_EQ_VECTOR(vector); + if (vector < 0 || (vector >= dev->caps.num_comp_vectors + 1) || + (vector == MLX4_EQ_ASYNC)) + return false; + + return test_bit(port - 1, priv->eq_table.eq[vector].actv_ports.ports); +} +EXPORT_SYMBOL(mlx4_is_eq_vector_valid); + +u32 mlx4_get_eqs_per_port(struct mlx4_dev *dev, u8 port) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + unsigned int i; + unsigned int sum = 0; + + for (i = 0; i < dev->caps.num_comp_vectors + 1; i++) + sum += !!test_bit(port - 1, + priv->eq_table.eq[i].actv_ports.ports); + + return sum; +} +EXPORT_SYMBOL(mlx4_get_eqs_per_port); + +int mlx4_is_eq_shared(struct mlx4_dev *dev, int vector) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + + vector = MLX4_CQ_TO_EQ_VECTOR(vector); + if (vector <= 0 || (vector >= dev->caps.num_comp_vectors + 1)) + return -EINVAL; + + return !!(bitmap_weight(priv->eq_table.eq[vector].actv_ports.ports, + dev->caps.num_ports) > 1); +} +EXPORT_SYMBOL(mlx4_is_eq_shared); + +struct cpu_rmap *mlx4_get_cpu_rmap(struct mlx4_dev *dev, int port) +{ + return mlx4_priv(dev)->port[port].rmap; +} +EXPORT_SYMBOL(mlx4_get_cpu_rmap); + +int mlx4_assign_eq(struct mlx4_dev *dev, u8 port, int *vector) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + int err = 0, i = 0; + u32 min_ref_count_val = (u32)-1; + int requested_vector = MLX4_CQ_TO_EQ_VECTOR(*vector); + int *prequested_vector = NULL; + + + mutex_lock(&priv->msix_ctl.pool_lock); + if (requested_vector < (dev->caps.num_comp_vectors + 1) && + (requested_vector >= 0) && + (requested_vector != MLX4_EQ_ASYNC)) { + if (test_bit(port - 1, + priv->eq_table.eq[requested_vector].actv_ports.ports)) { + prequested_vector = &requested_vector; + } else { + struct mlx4_eq *eq; + + for (i = 1; i < port; + requested_vector += mlx4_get_eqs_per_port(dev, i++)) + ; + + eq = &priv->eq_table.eq[requested_vector]; + if (requested_vector < dev->caps.num_comp_vectors + 1 && + test_bit(port - 1, eq->actv_ports.ports)) { + prequested_vector = &requested_vector; + } + } + } + + if (!prequested_vector) { + requested_vector = -1; + for (i = 0; min_ref_count_val && i < dev->caps.num_comp_vectors + 1; + i++) { + struct mlx4_eq *eq = &priv->eq_table.eq[i]; + + if (min_ref_count_val > eq->ref_count && + test_bit(port - 1, eq->actv_ports.ports)) { + min_ref_count_val = eq->ref_count; + requested_vector = i; + } + } + + if (requested_vector < 0) { + err = -ENOSPC; + goto err_unlock; + } + + prequested_vector = &requested_vector; + } + + if (!test_bit(*prequested_vector, priv->msix_ctl.pool_bm) && + dev->flags & MLX4_FLAG_MSI_X) { + set_bit(*prequested_vector, priv->msix_ctl.pool_bm); + snprintf(priv->eq_table.irq_names + + *prequested_vector * MLX4_IRQNAME_SIZE, + MLX4_IRQNAME_SIZE, "mlx4-%d@%s", + *prequested_vector, dev_name(&dev->persist->pdev->dev)); + + err = request_irq(priv->eq_table.eq[*prequested_vector].irq, + mlx4_msi_x_interrupt, 0, + &priv->eq_table.irq_names[*prequested_vector << 5], + priv->eq_table.eq + *prequested_vector); + + if (err) { + clear_bit(*prequested_vector, priv->msix_ctl.pool_bm); + *prequested_vector = -1; + } else { +#if defined(CONFIG_SMP) + mlx4_set_eq_affinity_hint(priv, *prequested_vector); +#endif + eq_set_ci(&priv->eq_table.eq[*prequested_vector], 1); + priv->eq_table.eq[*prequested_vector].have_irq = 1; + } + } + + if (!err && *prequested_vector >= 0) + priv->eq_table.eq[*prequested_vector].ref_count++; + +err_unlock: + mutex_unlock(&priv->msix_ctl.pool_lock); + + if (!err && *prequested_vector >= 0) + *vector = MLX4_EQ_TO_CQ_VECTOR(*prequested_vector); + else + *vector = 0; + + return err; +} +EXPORT_SYMBOL(mlx4_assign_eq); + +int mlx4_eq_get_irq(struct mlx4_dev *dev, int cq_vec) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + + return priv->eq_table.eq[MLX4_CQ_TO_EQ_VECTOR(cq_vec)].irq; +} +EXPORT_SYMBOL(mlx4_eq_get_irq); + +void mlx4_release_eq(struct mlx4_dev *dev, int vec) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + int eq_vec = MLX4_CQ_TO_EQ_VECTOR(vec); + + mutex_lock(&priv->msix_ctl.pool_lock); + priv->eq_table.eq[eq_vec].ref_count--; + + /* once we allocated EQ, we don't release it because it might be binded + * to cpu_rmap. + */ + mutex_unlock(&priv->msix_ctl.pool_lock); +} +EXPORT_SYMBOL(mlx4_release_eq); + diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c new file mode 100644 index 000000000..fe48d20d6 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx4/fw.c @@ -0,0 +1,3111 @@ +/* + * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005, 2006, 2007, 2008 Mellanox Technologies. All rights reserved. + * Copyright (c) 2005, 2006, 2007 Cisco Systems, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/etherdevice.h> +#include <linux/mlx4/cmd.h> +#include <linux/module.h> +#include <linux/cache.h> +#include <linux/kernel.h> +#include <uapi/rdma/mlx4-abi.h> + +#include "fw.h" +#include "icm.h" + +enum { + MLX4_COMMAND_INTERFACE_MIN_REV = 2, + MLX4_COMMAND_INTERFACE_MAX_REV = 3, + MLX4_COMMAND_INTERFACE_NEW_PORT_CMDS = 3, +}; + +extern void __buggy_use_of_MLX4_GET(void); +extern void __buggy_use_of_MLX4_PUT(void); + +static bool enable_qos; +module_param(enable_qos, bool, 0444); +MODULE_PARM_DESC(enable_qos, "Enable Enhanced QoS support (default: off)"); + +#define MLX4_GET(dest, source, offset) \ + do { \ + void *__p = (char *) (source) + (offset); \ + __be64 val; \ + switch (sizeof(dest)) { \ + case 1: (dest) = *(u8 *) __p; break; \ + case 2: (dest) = be16_to_cpup(__p); break; \ + case 4: (dest) = be32_to_cpup(__p); break; \ + case 8: val = get_unaligned((__be64 *)__p); \ + (dest) = be64_to_cpu(val); break; \ + default: __buggy_use_of_MLX4_GET(); \ + } \ + } while (0) + +#define MLX4_PUT(dest, source, offset) \ + do { \ + void *__d = ((char *) (dest) + (offset)); \ + switch (sizeof(source)) { \ + case 1: *(u8 *) __d = (source); break; \ + case 2: *(__be16 *) __d = cpu_to_be16(source); break; \ + case 4: *(__be32 *) __d = cpu_to_be32(source); break; \ + case 8: *(__be64 *) __d = cpu_to_be64(source); break; \ + default: __buggy_use_of_MLX4_PUT(); \ + } \ + } while (0) + +static void dump_dev_cap_flags(struct mlx4_dev *dev, u64 flags) +{ + static const char *fname[] = { + [ 0] = "RC transport", + [ 1] = "UC transport", + [ 2] = "UD transport", + [ 3] = "XRC transport", + [ 6] = "SRQ support", + [ 7] = "IPoIB checksum offload", + [ 8] = "P_Key violation counter", + [ 9] = "Q_Key violation counter", + [12] = "Dual Port Different Protocol (DPDP) support", + [15] = "Big LSO headers", + [16] = "MW support", + [17] = "APM support", + [18] = "Atomic ops support", + [19] = "Raw multicast support", + [20] = "Address vector port checking support", + [21] = "UD multicast support", + [30] = "IBoE support", + [32] = "Unicast loopback support", + [34] = "FCS header control", + [37] = "Wake On LAN (port1) support", + [38] = "Wake On LAN (port2) support", + [40] = "UDP RSS support", + [41] = "Unicast VEP steering support", + [42] = "Multicast VEP steering support", + [48] = "Counters support", + [52] = "RSS IP fragments support", + [53] = "Port ETS Scheduler support", + [55] = "Port link type sensing support", + [59] = "Port management change event support", + [61] = "64 byte EQE support", + [62] = "64 byte CQE support", + }; + int i; + + mlx4_dbg(dev, "DEV_CAP flags:\n"); + for (i = 0; i < ARRAY_SIZE(fname); ++i) + if (fname[i] && (flags & (1LL << i))) + mlx4_dbg(dev, " %s\n", fname[i]); +} + +static void dump_dev_cap_flags2(struct mlx4_dev *dev, u64 flags) +{ + static const char * const fname[] = { + [0] = "RSS support", + [1] = "RSS Toeplitz Hash Function support", + [2] = "RSS XOR Hash Function support", + [3] = "Device managed flow steering support", + [4] = "Automatic MAC reassignment support", + [5] = "Time stamping support", + [6] = "VST (control vlan insertion/stripping) support", + [7] = "FSM (MAC anti-spoofing) support", + [8] = "Dynamic QP updates support", + [9] = "Device managed flow steering IPoIB support", + [10] = "TCP/IP offloads/flow-steering for VXLAN support", + [11] = "MAD DEMUX (Secure-Host) support", + [12] = "Large cache line (>64B) CQE stride support", + [13] = "Large cache line (>64B) EQE stride support", + [14] = "Ethernet protocol control support", + [15] = "Ethernet Backplane autoneg support", + [16] = "CONFIG DEV support", + [17] = "Asymmetric EQs support", + [18] = "More than 80 VFs support", + [19] = "Performance optimized for limited rule configuration flow steering support", + [20] = "Recoverable error events support", + [21] = "Port Remap support", + [22] = "QCN support", + [23] = "QP rate limiting support", + [24] = "Ethernet Flow control statistics support", + [25] = "Granular QoS per VF support", + [26] = "Port ETS Scheduler support", + [27] = "Port beacon support", + [28] = "RX-ALL support", + [29] = "802.1ad offload support", + [31] = "Modifying loopback source checks using UPDATE_QP support", + [32] = "Loopback source checks support", + [33] = "RoCEv2 support", + [34] = "DMFS Sniffer support (UC & MC)", + [35] = "Diag counters per port", + [36] = "QinQ VST mode support", + [37] = "sl to vl mapping table change event support", + [38] = "user MAC support", + [39] = "Report driver version to FW support", + [40] = "SW CQ initialization support", + }; + int i; + + for (i = 0; i < ARRAY_SIZE(fname); ++i) + if (fname[i] && (flags & (1LL << i))) + mlx4_dbg(dev, " %s\n", fname[i]); +} + +int mlx4_MOD_STAT_CFG(struct mlx4_dev *dev, struct mlx4_mod_stat_cfg *cfg) +{ + struct mlx4_cmd_mailbox *mailbox; + u32 *inbox; + int err = 0; + +#define MOD_STAT_CFG_IN_SIZE 0x100 + +#define MOD_STAT_CFG_PG_SZ_M_OFFSET 0x002 +#define MOD_STAT_CFG_PG_SZ_OFFSET 0x003 + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + inbox = mailbox->buf; + + MLX4_PUT(inbox, cfg->log_pg_sz, MOD_STAT_CFG_PG_SZ_OFFSET); + MLX4_PUT(inbox, cfg->log_pg_sz_m, MOD_STAT_CFG_PG_SZ_M_OFFSET); + + err = mlx4_cmd(dev, mailbox->dma, 0, 0, MLX4_CMD_MOD_STAT_CFG, + MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); + + mlx4_free_cmd_mailbox(dev, mailbox); + return err; +} + +int mlx4_QUERY_FUNC(struct mlx4_dev *dev, struct mlx4_func *func, int slave) +{ + struct mlx4_cmd_mailbox *mailbox; + u32 *outbox; + u8 in_modifier; + u8 field; + u16 field16; + int err; + +#define QUERY_FUNC_BUS_OFFSET 0x00 +#define QUERY_FUNC_DEVICE_OFFSET 0x01 +#define QUERY_FUNC_FUNCTION_OFFSET 0x01 +#define QUERY_FUNC_PHYSICAL_FUNCTION_OFFSET 0x03 +#define QUERY_FUNC_RSVD_EQS_OFFSET 0x04 +#define QUERY_FUNC_MAX_EQ_OFFSET 0x06 +#define QUERY_FUNC_RSVD_UARS_OFFSET 0x0b + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + outbox = mailbox->buf; + + in_modifier = slave; + + err = mlx4_cmd_box(dev, 0, mailbox->dma, in_modifier, 0, + MLX4_CMD_QUERY_FUNC, + MLX4_CMD_TIME_CLASS_A, + MLX4_CMD_NATIVE); + if (err) + goto out; + + MLX4_GET(field, outbox, QUERY_FUNC_BUS_OFFSET); + func->bus = field & 0xf; + MLX4_GET(field, outbox, QUERY_FUNC_DEVICE_OFFSET); + func->device = field & 0xf1; + MLX4_GET(field, outbox, QUERY_FUNC_FUNCTION_OFFSET); + func->function = field & 0x7; + MLX4_GET(field, outbox, QUERY_FUNC_PHYSICAL_FUNCTION_OFFSET); + func->physical_function = field & 0xf; + MLX4_GET(field16, outbox, QUERY_FUNC_RSVD_EQS_OFFSET); + func->rsvd_eqs = field16 & 0xffff; + MLX4_GET(field16, outbox, QUERY_FUNC_MAX_EQ_OFFSET); + func->max_eq = field16 & 0xffff; + MLX4_GET(field, outbox, QUERY_FUNC_RSVD_UARS_OFFSET); + func->rsvd_uars = field & 0x0f; + + mlx4_dbg(dev, "Bus: %d, Device: %d, Function: %d, Physical function: %d, Max EQs: %d, Reserved EQs: %d, Reserved UARs: %d\n", + func->bus, func->device, func->function, func->physical_function, + func->max_eq, func->rsvd_eqs, func->rsvd_uars); + +out: + mlx4_free_cmd_mailbox(dev, mailbox); + return err; +} + +static int mlx4_activate_vst_qinq(struct mlx4_priv *priv, int slave, int port) +{ + struct mlx4_vport_oper_state *vp_oper; + struct mlx4_vport_state *vp_admin; + int err; + + vp_oper = &priv->mfunc.master.vf_oper[slave].vport[port]; + vp_admin = &priv->mfunc.master.vf_admin[slave].vport[port]; + + if (vp_admin->default_vlan != vp_oper->state.default_vlan) { + err = __mlx4_register_vlan(&priv->dev, port, + vp_admin->default_vlan, + &vp_oper->vlan_idx); + if (err) { + vp_oper->vlan_idx = NO_INDX; + mlx4_warn(&priv->dev, + "No vlan resources slave %d, port %d\n", + slave, port); + return err; + } + mlx4_dbg(&priv->dev, "alloc vlan %d idx %d slave %d port %d\n", + (int)(vp_oper->state.default_vlan), + vp_oper->vlan_idx, slave, port); + } + vp_oper->state.vlan_proto = vp_admin->vlan_proto; + vp_oper->state.default_vlan = vp_admin->default_vlan; + vp_oper->state.default_qos = vp_admin->default_qos; + + return 0; +} + +static int mlx4_handle_vst_qinq(struct mlx4_priv *priv, int slave, int port) +{ + struct mlx4_vport_oper_state *vp_oper; + struct mlx4_slave_state *slave_state; + struct mlx4_vport_state *vp_admin; + int err; + + vp_oper = &priv->mfunc.master.vf_oper[slave].vport[port]; + vp_admin = &priv->mfunc.master.vf_admin[slave].vport[port]; + slave_state = &priv->mfunc.master.slave_state[slave]; + + if ((vp_admin->vlan_proto != htons(ETH_P_8021AD)) || + (!slave_state->active)) + return 0; + + if (vp_oper->state.vlan_proto == vp_admin->vlan_proto && + vp_oper->state.default_vlan == vp_admin->default_vlan && + vp_oper->state.default_qos == vp_admin->default_qos) + return 0; + + if (!slave_state->vst_qinq_supported) { + /* Warn and revert the request to set vst QinQ mode */ + vp_admin->vlan_proto = vp_oper->state.vlan_proto; + vp_admin->default_vlan = vp_oper->state.default_vlan; + vp_admin->default_qos = vp_oper->state.default_qos; + + mlx4_warn(&priv->dev, + "Slave %d does not support VST QinQ mode\n", slave); + return 0; + } + + err = mlx4_activate_vst_qinq(priv, slave, port); + return err; +} + +int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + u8 field, port; + u32 size, proxy_qp, qkey; + int err = 0; + struct mlx4_func func; + +#define QUERY_FUNC_CAP_FLAGS_OFFSET 0x0 +#define QUERY_FUNC_CAP_NUM_PORTS_OFFSET 0x1 +#define QUERY_FUNC_CAP_PF_BHVR_OFFSET 0x4 +#define QUERY_FUNC_CAP_FMR_OFFSET 0x8 +#define QUERY_FUNC_CAP_QP_QUOTA_OFFSET_DEP 0x10 +#define QUERY_FUNC_CAP_CQ_QUOTA_OFFSET_DEP 0x14 +#define QUERY_FUNC_CAP_SRQ_QUOTA_OFFSET_DEP 0x18 +#define QUERY_FUNC_CAP_MPT_QUOTA_OFFSET_DEP 0x20 +#define QUERY_FUNC_CAP_MTT_QUOTA_OFFSET_DEP 0x24 +#define QUERY_FUNC_CAP_MCG_QUOTA_OFFSET_DEP 0x28 +#define QUERY_FUNC_CAP_MAX_EQ_OFFSET 0x2c +#define QUERY_FUNC_CAP_RESERVED_EQ_OFFSET 0x30 +#define QUERY_FUNC_CAP_QP_RESD_LKEY_OFFSET 0x48 + +#define QUERY_FUNC_CAP_QP_QUOTA_OFFSET 0x50 +#define QUERY_FUNC_CAP_CQ_QUOTA_OFFSET 0x54 +#define QUERY_FUNC_CAP_SRQ_QUOTA_OFFSET 0x58 +#define QUERY_FUNC_CAP_MPT_QUOTA_OFFSET 0x60 +#define QUERY_FUNC_CAP_MTT_QUOTA_OFFSET 0x64 +#define QUERY_FUNC_CAP_MCG_QUOTA_OFFSET 0x68 + +#define QUERY_FUNC_CAP_EXTRA_FLAGS_OFFSET 0x6c + +#define QUERY_FUNC_CAP_FMR_FLAG 0x80 +#define QUERY_FUNC_CAP_FLAG_RDMA 0x40 +#define QUERY_FUNC_CAP_FLAG_ETH 0x80 +#define QUERY_FUNC_CAP_FLAG_QUOTAS 0x10 +#define QUERY_FUNC_CAP_FLAG_RESD_LKEY 0x08 +#define QUERY_FUNC_CAP_FLAG_VALID_MAILBOX 0x04 + +#define QUERY_FUNC_CAP_EXTRA_FLAGS_BF_QP_ALLOC_FLAG (1UL << 31) +#define QUERY_FUNC_CAP_EXTRA_FLAGS_A0_QP_ALLOC_FLAG (1UL << 30) + +/* when opcode modifier = 1 */ +#define QUERY_FUNC_CAP_PHYS_PORT_OFFSET 0x3 +#define QUERY_FUNC_CAP_PRIV_VF_QKEY_OFFSET 0x4 +#define QUERY_FUNC_CAP_FLAGS0_OFFSET 0x8 +#define QUERY_FUNC_CAP_FLAGS1_OFFSET 0xc + +#define QUERY_FUNC_CAP_QP0_TUNNEL 0x10 +#define QUERY_FUNC_CAP_QP0_PROXY 0x14 +#define QUERY_FUNC_CAP_QP1_TUNNEL 0x18 +#define QUERY_FUNC_CAP_QP1_PROXY 0x1c +#define QUERY_FUNC_CAP_PHYS_PORT_ID 0x28 + +#define QUERY_FUNC_CAP_FLAGS1_FORCE_MAC 0x40 +#define QUERY_FUNC_CAP_FLAGS1_FORCE_VLAN 0x80 +#define QUERY_FUNC_CAP_FLAGS1_NIC_INFO 0x10 +#define QUERY_FUNC_CAP_VF_ENABLE_QP0 0x08 + +#define QUERY_FUNC_CAP_FLAGS0_FORCE_PHY_WQE_GID 0x80 +#define QUERY_FUNC_CAP_PHV_BIT 0x40 +#define QUERY_FUNC_CAP_VLAN_OFFLOAD_DISABLE 0x20 + +#define QUERY_FUNC_CAP_SUPPORTS_VST_QINQ BIT(30) +#define QUERY_FUNC_CAP_SUPPORTS_NON_POWER_OF_2_NUM_EQS BIT(31) + + if (vhcr->op_modifier == 1) { + struct mlx4_active_ports actv_ports = + mlx4_get_active_ports(dev, slave); + int converted_port = mlx4_slave_convert_port( + dev, slave, vhcr->in_modifier); + struct mlx4_vport_oper_state *vp_oper; + + if (converted_port < 0) + return -EINVAL; + + vhcr->in_modifier = converted_port; + /* phys-port = logical-port */ + field = vhcr->in_modifier - + find_first_bit(actv_ports.ports, dev->caps.num_ports); + MLX4_PUT(outbox->buf, field, QUERY_FUNC_CAP_PHYS_PORT_OFFSET); + + port = vhcr->in_modifier; + proxy_qp = dev->phys_caps.base_proxy_sqpn + 8 * slave + port - 1; + + /* Set nic_info bit to mark new fields support */ + field = QUERY_FUNC_CAP_FLAGS1_NIC_INFO; + + if (mlx4_vf_smi_enabled(dev, slave, port) && + !mlx4_get_parav_qkey(dev, proxy_qp, &qkey)) { + field |= QUERY_FUNC_CAP_VF_ENABLE_QP0; + MLX4_PUT(outbox->buf, qkey, + QUERY_FUNC_CAP_PRIV_VF_QKEY_OFFSET); + } + MLX4_PUT(outbox->buf, field, QUERY_FUNC_CAP_FLAGS1_OFFSET); + + /* size is now the QP number */ + size = dev->phys_caps.base_tunnel_sqpn + 8 * slave + port - 1; + MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_QP0_TUNNEL); + + size += 2; + MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_QP1_TUNNEL); + + MLX4_PUT(outbox->buf, proxy_qp, QUERY_FUNC_CAP_QP0_PROXY); + proxy_qp += 2; + MLX4_PUT(outbox->buf, proxy_qp, QUERY_FUNC_CAP_QP1_PROXY); + + MLX4_PUT(outbox->buf, dev->caps.phys_port_id[vhcr->in_modifier], + QUERY_FUNC_CAP_PHYS_PORT_ID); + + vp_oper = &priv->mfunc.master.vf_oper[slave].vport[port]; + err = mlx4_handle_vst_qinq(priv, slave, port); + if (err) + return err; + + field = 0; + if (dev->caps.phv_bit[port]) + field |= QUERY_FUNC_CAP_PHV_BIT; + if (vp_oper->state.vlan_proto == htons(ETH_P_8021AD)) + field |= QUERY_FUNC_CAP_VLAN_OFFLOAD_DISABLE; + MLX4_PUT(outbox->buf, field, QUERY_FUNC_CAP_FLAGS0_OFFSET); + + } else if (vhcr->op_modifier == 0) { + struct mlx4_active_ports actv_ports = + mlx4_get_active_ports(dev, slave); + struct mlx4_slave_state *slave_state = + &priv->mfunc.master.slave_state[slave]; + + /* enable rdma and ethernet interfaces, new quota locations, + * and reserved lkey + */ + field = (QUERY_FUNC_CAP_FLAG_ETH | QUERY_FUNC_CAP_FLAG_RDMA | + QUERY_FUNC_CAP_FLAG_QUOTAS | QUERY_FUNC_CAP_FLAG_VALID_MAILBOX | + QUERY_FUNC_CAP_FLAG_RESD_LKEY); + MLX4_PUT(outbox->buf, field, QUERY_FUNC_CAP_FLAGS_OFFSET); + + field = min( + bitmap_weight(actv_ports.ports, dev->caps.num_ports), + (unsigned int) dev->caps.num_ports); + MLX4_PUT(outbox->buf, field, QUERY_FUNC_CAP_NUM_PORTS_OFFSET); + + size = dev->caps.function_caps; /* set PF behaviours */ + MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_PF_BHVR_OFFSET); + + field = 0; /* protected FMR support not available as yet */ + MLX4_PUT(outbox->buf, field, QUERY_FUNC_CAP_FMR_OFFSET); + + size = priv->mfunc.master.res_tracker.res_alloc[RES_QP].quota[slave]; + MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_QP_QUOTA_OFFSET); + size = dev->caps.num_qps; + MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_QP_QUOTA_OFFSET_DEP); + + size = priv->mfunc.master.res_tracker.res_alloc[RES_SRQ].quota[slave]; + MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_SRQ_QUOTA_OFFSET); + size = dev->caps.num_srqs; + MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_SRQ_QUOTA_OFFSET_DEP); + + size = priv->mfunc.master.res_tracker.res_alloc[RES_CQ].quota[slave]; + MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_CQ_QUOTA_OFFSET); + size = dev->caps.num_cqs; + MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_CQ_QUOTA_OFFSET_DEP); + + if (!(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_SYS_EQS) || + mlx4_QUERY_FUNC(dev, &func, slave)) { + size = vhcr->in_modifier & + QUERY_FUNC_CAP_SUPPORTS_NON_POWER_OF_2_NUM_EQS ? + dev->caps.num_eqs : + rounddown_pow_of_two(dev->caps.num_eqs); + MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_MAX_EQ_OFFSET); + size = dev->caps.reserved_eqs; + MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_RESERVED_EQ_OFFSET); + } else { + size = vhcr->in_modifier & + QUERY_FUNC_CAP_SUPPORTS_NON_POWER_OF_2_NUM_EQS ? + func.max_eq : + rounddown_pow_of_two(func.max_eq); + MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_MAX_EQ_OFFSET); + size = func.rsvd_eqs; + MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_RESERVED_EQ_OFFSET); + } + + size = priv->mfunc.master.res_tracker.res_alloc[RES_MPT].quota[slave]; + MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_MPT_QUOTA_OFFSET); + size = dev->caps.num_mpts; + MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_MPT_QUOTA_OFFSET_DEP); + + size = priv->mfunc.master.res_tracker.res_alloc[RES_MTT].quota[slave]; + MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_MTT_QUOTA_OFFSET); + size = dev->caps.num_mtts; + MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_MTT_QUOTA_OFFSET_DEP); + + size = dev->caps.num_mgms + dev->caps.num_amgms; + MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_MCG_QUOTA_OFFSET); + MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_MCG_QUOTA_OFFSET_DEP); + + size = QUERY_FUNC_CAP_EXTRA_FLAGS_BF_QP_ALLOC_FLAG | + QUERY_FUNC_CAP_EXTRA_FLAGS_A0_QP_ALLOC_FLAG; + MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_EXTRA_FLAGS_OFFSET); + + size = dev->caps.reserved_lkey + ((slave << 8) & 0xFF00); + MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_QP_RESD_LKEY_OFFSET); + + if (vhcr->in_modifier & QUERY_FUNC_CAP_SUPPORTS_VST_QINQ) + slave_state->vst_qinq_supported = true; + + } else + err = -EINVAL; + + return err; +} + +int mlx4_QUERY_FUNC_CAP(struct mlx4_dev *dev, u8 gen_or_port, + struct mlx4_func_cap *func_cap) +{ + struct mlx4_cmd_mailbox *mailbox; + u32 *outbox; + u8 field, op_modifier; + u32 size, qkey; + int err = 0, quotas = 0; + u32 in_modifier; + u32 slave_caps; + + op_modifier = !!gen_or_port; /* 0 = general, 1 = logical port */ + slave_caps = QUERY_FUNC_CAP_SUPPORTS_VST_QINQ | + QUERY_FUNC_CAP_SUPPORTS_NON_POWER_OF_2_NUM_EQS; + in_modifier = op_modifier ? gen_or_port : slave_caps; + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + + err = mlx4_cmd_box(dev, 0, mailbox->dma, in_modifier, op_modifier, + MLX4_CMD_QUERY_FUNC_CAP, + MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); + if (err) + goto out; + + outbox = mailbox->buf; + + if (!op_modifier) { + MLX4_GET(field, outbox, QUERY_FUNC_CAP_FLAGS_OFFSET); + if (!(field & (QUERY_FUNC_CAP_FLAG_ETH | QUERY_FUNC_CAP_FLAG_RDMA))) { + mlx4_err(dev, "The host supports neither eth nor rdma interfaces\n"); + err = -EPROTONOSUPPORT; + goto out; + } + func_cap->flags = field; + quotas = !!(func_cap->flags & QUERY_FUNC_CAP_FLAG_QUOTAS); + + MLX4_GET(field, outbox, QUERY_FUNC_CAP_NUM_PORTS_OFFSET); + func_cap->num_ports = field; + + MLX4_GET(size, outbox, QUERY_FUNC_CAP_PF_BHVR_OFFSET); + func_cap->pf_context_behaviour = size; + + if (quotas) { + MLX4_GET(size, outbox, QUERY_FUNC_CAP_QP_QUOTA_OFFSET); + func_cap->qp_quota = size & 0xFFFFFF; + + MLX4_GET(size, outbox, QUERY_FUNC_CAP_SRQ_QUOTA_OFFSET); + func_cap->srq_quota = size & 0xFFFFFF; + + MLX4_GET(size, outbox, QUERY_FUNC_CAP_CQ_QUOTA_OFFSET); + func_cap->cq_quota = size & 0xFFFFFF; + + MLX4_GET(size, outbox, QUERY_FUNC_CAP_MPT_QUOTA_OFFSET); + func_cap->mpt_quota = size & 0xFFFFFF; + + MLX4_GET(size, outbox, QUERY_FUNC_CAP_MTT_QUOTA_OFFSET); + func_cap->mtt_quota = size & 0xFFFFFF; + + MLX4_GET(size, outbox, QUERY_FUNC_CAP_MCG_QUOTA_OFFSET); + func_cap->mcg_quota = size & 0xFFFFFF; + + } else { + MLX4_GET(size, outbox, QUERY_FUNC_CAP_QP_QUOTA_OFFSET_DEP); + func_cap->qp_quota = size & 0xFFFFFF; + + MLX4_GET(size, outbox, QUERY_FUNC_CAP_SRQ_QUOTA_OFFSET_DEP); + func_cap->srq_quota = size & 0xFFFFFF; + + MLX4_GET(size, outbox, QUERY_FUNC_CAP_CQ_QUOTA_OFFSET_DEP); + func_cap->cq_quota = size & 0xFFFFFF; + + MLX4_GET(size, outbox, QUERY_FUNC_CAP_MPT_QUOTA_OFFSET_DEP); + func_cap->mpt_quota = size & 0xFFFFFF; + + MLX4_GET(size, outbox, QUERY_FUNC_CAP_MTT_QUOTA_OFFSET_DEP); + func_cap->mtt_quota = size & 0xFFFFFF; + + MLX4_GET(size, outbox, QUERY_FUNC_CAP_MCG_QUOTA_OFFSET_DEP); + func_cap->mcg_quota = size & 0xFFFFFF; + } + MLX4_GET(size, outbox, QUERY_FUNC_CAP_MAX_EQ_OFFSET); + func_cap->max_eq = size & 0xFFFFFF; + + MLX4_GET(size, outbox, QUERY_FUNC_CAP_RESERVED_EQ_OFFSET); + func_cap->reserved_eq = size & 0xFFFFFF; + + if (func_cap->flags & QUERY_FUNC_CAP_FLAG_RESD_LKEY) { + MLX4_GET(size, outbox, QUERY_FUNC_CAP_QP_RESD_LKEY_OFFSET); + func_cap->reserved_lkey = size; + } else { + func_cap->reserved_lkey = 0; + } + + func_cap->extra_flags = 0; + + /* Mailbox data from 0x6c and onward should only be treated if + * QUERY_FUNC_CAP_FLAG_VALID_MAILBOX is set in func_cap->flags + */ + if (func_cap->flags & QUERY_FUNC_CAP_FLAG_VALID_MAILBOX) { + MLX4_GET(size, outbox, QUERY_FUNC_CAP_EXTRA_FLAGS_OFFSET); + if (size & QUERY_FUNC_CAP_EXTRA_FLAGS_BF_QP_ALLOC_FLAG) + func_cap->extra_flags |= MLX4_QUERY_FUNC_FLAGS_BF_RES_QP; + if (size & QUERY_FUNC_CAP_EXTRA_FLAGS_A0_QP_ALLOC_FLAG) + func_cap->extra_flags |= MLX4_QUERY_FUNC_FLAGS_A0_RES_QP; + } + + goto out; + } + + /* logical port query */ + if (gen_or_port > dev->caps.num_ports) { + err = -EINVAL; + goto out; + } + + MLX4_GET(func_cap->flags1, outbox, QUERY_FUNC_CAP_FLAGS1_OFFSET); + if (dev->caps.port_type[gen_or_port] == MLX4_PORT_TYPE_ETH) { + if (func_cap->flags1 & QUERY_FUNC_CAP_FLAGS1_FORCE_VLAN) { + mlx4_err(dev, "VLAN is enforced on this port\n"); + err = -EPROTONOSUPPORT; + goto out; + } + + if (func_cap->flags1 & QUERY_FUNC_CAP_FLAGS1_FORCE_MAC) { + mlx4_err(dev, "Force mac is enabled on this port\n"); + err = -EPROTONOSUPPORT; + goto out; + } + } else if (dev->caps.port_type[gen_or_port] == MLX4_PORT_TYPE_IB) { + MLX4_GET(field, outbox, QUERY_FUNC_CAP_FLAGS0_OFFSET); + if (field & QUERY_FUNC_CAP_FLAGS0_FORCE_PHY_WQE_GID) { + mlx4_err(dev, "phy_wqe_gid is enforced on this ib port\n"); + err = -EPROTONOSUPPORT; + goto out; + } + } + + MLX4_GET(field, outbox, QUERY_FUNC_CAP_PHYS_PORT_OFFSET); + func_cap->physical_port = field; + if (func_cap->physical_port != gen_or_port) { + err = -EINVAL; + goto out; + } + + if (func_cap->flags1 & QUERY_FUNC_CAP_VF_ENABLE_QP0) { + MLX4_GET(qkey, outbox, QUERY_FUNC_CAP_PRIV_VF_QKEY_OFFSET); + func_cap->spec_qps.qp0_qkey = qkey; + } else { + func_cap->spec_qps.qp0_qkey = 0; + } + + MLX4_GET(size, outbox, QUERY_FUNC_CAP_QP0_TUNNEL); + func_cap->spec_qps.qp0_tunnel = size & 0xFFFFFF; + + MLX4_GET(size, outbox, QUERY_FUNC_CAP_QP0_PROXY); + func_cap->spec_qps.qp0_proxy = size & 0xFFFFFF; + + MLX4_GET(size, outbox, QUERY_FUNC_CAP_QP1_TUNNEL); + func_cap->spec_qps.qp1_tunnel = size & 0xFFFFFF; + + MLX4_GET(size, outbox, QUERY_FUNC_CAP_QP1_PROXY); + func_cap->spec_qps.qp1_proxy = size & 0xFFFFFF; + + if (func_cap->flags1 & QUERY_FUNC_CAP_FLAGS1_NIC_INFO) + MLX4_GET(func_cap->phys_port_id, outbox, + QUERY_FUNC_CAP_PHYS_PORT_ID); + + MLX4_GET(func_cap->flags0, outbox, QUERY_FUNC_CAP_FLAGS0_OFFSET); + + /* All other resources are allocated by the master, but we still report + * 'num' and 'reserved' capabilities as follows: + * - num remains the maximum resource index + * - 'num - reserved' is the total available objects of a resource, but + * resource indices may be less than 'reserved' + * TODO: set per-resource quotas */ + +out: + mlx4_free_cmd_mailbox(dev, mailbox); + + return err; +} + +static void disable_unsupported_roce_caps(void *buf); + +int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) +{ + struct mlx4_cmd_mailbox *mailbox; + u32 *outbox; + u8 field; + u32 field32, flags, ext_flags; + u16 size; + u16 stat_rate; + int err; + int i; + +#define QUERY_DEV_CAP_OUT_SIZE 0x100 +#define QUERY_DEV_CAP_MAX_SRQ_SZ_OFFSET 0x10 +#define QUERY_DEV_CAP_MAX_QP_SZ_OFFSET 0x11 +#define QUERY_DEV_CAP_RSVD_QP_OFFSET 0x12 +#define QUERY_DEV_CAP_MAX_QP_OFFSET 0x13 +#define QUERY_DEV_CAP_RSVD_SRQ_OFFSET 0x14 +#define QUERY_DEV_CAP_MAX_SRQ_OFFSET 0x15 +#define QUERY_DEV_CAP_RSVD_EEC_OFFSET 0x16 +#define QUERY_DEV_CAP_MAX_EEC_OFFSET 0x17 +#define QUERY_DEV_CAP_MAX_CQ_SZ_OFFSET 0x19 +#define QUERY_DEV_CAP_RSVD_CQ_OFFSET 0x1a +#define QUERY_DEV_CAP_MAX_CQ_OFFSET 0x1b +#define QUERY_DEV_CAP_MAX_MPT_OFFSET 0x1d +#define QUERY_DEV_CAP_RSVD_EQ_OFFSET 0x1e +#define QUERY_DEV_CAP_MAX_EQ_OFFSET 0x1f +#define QUERY_DEV_CAP_RSVD_MTT_OFFSET 0x20 +#define QUERY_DEV_CAP_MAX_MRW_SZ_OFFSET 0x21 +#define QUERY_DEV_CAP_RSVD_MRW_OFFSET 0x22 +#define QUERY_DEV_CAP_MAX_MTT_SEG_OFFSET 0x23 +#define QUERY_DEV_CAP_NUM_SYS_EQ_OFFSET 0x26 +#define QUERY_DEV_CAP_MAX_AV_OFFSET 0x27 +#define QUERY_DEV_CAP_MAX_REQ_QP_OFFSET 0x29 +#define QUERY_DEV_CAP_MAX_RES_QP_OFFSET 0x2b +#define QUERY_DEV_CAP_MAX_GSO_OFFSET 0x2d +#define QUERY_DEV_CAP_RSS_OFFSET 0x2e +#define QUERY_DEV_CAP_MAX_RDMA_OFFSET 0x2f +#define QUERY_DEV_CAP_RSZ_SRQ_OFFSET 0x33 +#define QUERY_DEV_CAP_PORT_BEACON_OFFSET 0x34 +#define QUERY_DEV_CAP_ACK_DELAY_OFFSET 0x35 +#define QUERY_DEV_CAP_MTU_WIDTH_OFFSET 0x36 +#define QUERY_DEV_CAP_VL_PORT_OFFSET 0x37 +#define QUERY_DEV_CAP_MAX_MSG_SZ_OFFSET 0x38 +#define QUERY_DEV_CAP_MAX_GID_OFFSET 0x3b +#define QUERY_DEV_CAP_RATE_SUPPORT_OFFSET 0x3c +#define QUERY_DEV_CAP_CQ_TS_SUPPORT_OFFSET 0x3e +#define QUERY_DEV_CAP_MAX_PKEY_OFFSET 0x3f +#define QUERY_DEV_CAP_EXT_FLAGS_OFFSET 0x40 +#define QUERY_DEV_CAP_WOL_OFFSET 0x43 +#define QUERY_DEV_CAP_FLAGS_OFFSET 0x44 +#define QUERY_DEV_CAP_RSVD_UAR_OFFSET 0x48 +#define QUERY_DEV_CAP_UAR_SZ_OFFSET 0x49 +#define QUERY_DEV_CAP_PAGE_SZ_OFFSET 0x4b +#define QUERY_DEV_CAP_BF_OFFSET 0x4c +#define QUERY_DEV_CAP_LOG_BF_REG_SZ_OFFSET 0x4d +#define QUERY_DEV_CAP_LOG_MAX_BF_REGS_PER_PAGE_OFFSET 0x4e +#define QUERY_DEV_CAP_LOG_MAX_BF_PAGES_OFFSET 0x4f +#define QUERY_DEV_CAP_MAX_SG_SQ_OFFSET 0x51 +#define QUERY_DEV_CAP_MAX_DESC_SZ_SQ_OFFSET 0x52 +#define QUERY_DEV_CAP_MAX_SG_RQ_OFFSET 0x55 +#define QUERY_DEV_CAP_MAX_DESC_SZ_RQ_OFFSET 0x56 +#define QUERY_DEV_CAP_USER_MAC_EN_OFFSET 0x5C +#define QUERY_DEV_CAP_SVLAN_BY_QP_OFFSET 0x5D +#define QUERY_DEV_CAP_MAX_QP_MCG_OFFSET 0x61 +#define QUERY_DEV_CAP_RSVD_MCG_OFFSET 0x62 +#define QUERY_DEV_CAP_MAX_MCG_OFFSET 0x63 +#define QUERY_DEV_CAP_RSVD_PD_OFFSET 0x64 +#define QUERY_DEV_CAP_MAX_PD_OFFSET 0x65 +#define QUERY_DEV_CAP_RSVD_XRC_OFFSET 0x66 +#define QUERY_DEV_CAP_MAX_XRC_OFFSET 0x67 +#define QUERY_DEV_CAP_MAX_COUNTERS_OFFSET 0x68 +#define QUERY_DEV_CAP_PORT_FLOWSTATS_COUNTERS_OFFSET 0x70 +#define QUERY_DEV_CAP_EXT_2_FLAGS_OFFSET 0x70 +#define QUERY_DEV_CAP_FLOW_STEERING_IPOIB_OFFSET 0x74 +#define QUERY_DEV_CAP_FLOW_STEERING_RANGE_EN_OFFSET 0x76 +#define QUERY_DEV_CAP_FLOW_STEERING_MAX_QP_OFFSET 0x77 +#define QUERY_DEV_CAP_SL2VL_EVENT_OFFSET 0x78 +#define QUERY_DEV_CAP_CQ_EQ_CACHE_LINE_STRIDE 0x7a +#define QUERY_DEV_CAP_ECN_QCN_VER_OFFSET 0x7b +#define QUERY_DEV_CAP_RDMARC_ENTRY_SZ_OFFSET 0x80 +#define QUERY_DEV_CAP_QPC_ENTRY_SZ_OFFSET 0x82 +#define QUERY_DEV_CAP_AUX_ENTRY_SZ_OFFSET 0x84 +#define QUERY_DEV_CAP_ALTC_ENTRY_SZ_OFFSET 0x86 +#define QUERY_DEV_CAP_EQC_ENTRY_SZ_OFFSET 0x88 +#define QUERY_DEV_CAP_CQC_ENTRY_SZ_OFFSET 0x8a +#define QUERY_DEV_CAP_SRQ_ENTRY_SZ_OFFSET 0x8c +#define QUERY_DEV_CAP_C_MPT_ENTRY_SZ_OFFSET 0x8e +#define QUERY_DEV_CAP_MTT_ENTRY_SZ_OFFSET 0x90 +#define QUERY_DEV_CAP_D_MPT_ENTRY_SZ_OFFSET 0x92 +#define QUERY_DEV_CAP_BMME_FLAGS_OFFSET 0x94 +#define QUERY_DEV_CAP_CONFIG_DEV_OFFSET 0x94 +#define QUERY_DEV_CAP_PHV_EN_OFFSET 0x96 +#define QUERY_DEV_CAP_RSVD_LKEY_OFFSET 0x98 +#define QUERY_DEV_CAP_MAX_ICM_SZ_OFFSET 0xa0 +#define QUERY_DEV_CAP_ETH_BACKPL_OFFSET 0x9c +#define QUERY_DEV_CAP_DIAG_RPRT_PER_PORT 0x9c +#define QUERY_DEV_CAP_FW_REASSIGN_MAC 0x9d +#define QUERY_DEV_CAP_VXLAN 0x9e +#define QUERY_DEV_CAP_MAD_DEMUX_OFFSET 0xb0 +#define QUERY_DEV_CAP_DMFS_HIGH_RATE_QPN_BASE_OFFSET 0xa8 +#define QUERY_DEV_CAP_DMFS_HIGH_RATE_QPN_RANGE_OFFSET 0xac +#define QUERY_DEV_CAP_MAP_CLOCK_TO_USER 0xc1 +#define QUERY_DEV_CAP_QP_RATE_LIMIT_NUM_OFFSET 0xcc +#define QUERY_DEV_CAP_QP_RATE_LIMIT_MAX_OFFSET 0xd0 +#define QUERY_DEV_CAP_QP_RATE_LIMIT_MIN_OFFSET 0xd2 +#define QUERY_DEV_CAP_HEALTH_BUFFER_ADDRESS_OFFSET 0xe4 + + dev_cap->flags2 = 0; + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + outbox = mailbox->buf; + + err = mlx4_cmd_box(dev, 0, mailbox->dma, 0, 0, MLX4_CMD_QUERY_DEV_CAP, + MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); + if (err) + goto out; + + if (mlx4_is_mfunc(dev)) + disable_unsupported_roce_caps(outbox); + MLX4_GET(field, outbox, QUERY_DEV_CAP_MAP_CLOCK_TO_USER); + dev_cap->map_clock_to_user = field & 0x80; + MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_QP_OFFSET); + dev_cap->reserved_qps = 1 << (field & 0xf); + MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_QP_OFFSET); + dev_cap->max_qps = 1 << (field & 0x1f); + MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_SRQ_OFFSET); + dev_cap->reserved_srqs = 1 << (field >> 4); + MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_SRQ_OFFSET); + dev_cap->max_srqs = 1 << (field & 0x1f); + MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_CQ_SZ_OFFSET); + dev_cap->max_cq_sz = 1 << field; + MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_CQ_OFFSET); + dev_cap->reserved_cqs = 1 << (field & 0xf); + MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_CQ_OFFSET); + dev_cap->max_cqs = 1 << (field & 0x1f); + MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_MPT_OFFSET); + dev_cap->max_mpts = 1 << (field & 0x3f); + MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_EQ_OFFSET); + dev_cap->reserved_eqs = 1 << (field & 0xf); + MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_EQ_OFFSET); + dev_cap->max_eqs = 1 << (field & 0xf); + MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_MTT_OFFSET); + dev_cap->reserved_mtts = 1 << (field >> 4); + MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_MRW_OFFSET); + dev_cap->reserved_mrws = 1 << (field & 0xf); + MLX4_GET(size, outbox, QUERY_DEV_CAP_NUM_SYS_EQ_OFFSET); + dev_cap->num_sys_eqs = size & 0xfff; + MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_REQ_QP_OFFSET); + dev_cap->max_requester_per_qp = 1 << (field & 0x3f); + MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_RES_QP_OFFSET); + dev_cap->max_responder_per_qp = 1 << (field & 0x3f); + MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_GSO_OFFSET); + field &= 0x1f; + if (!field) + dev_cap->max_gso_sz = 0; + else + dev_cap->max_gso_sz = 1 << field; + + MLX4_GET(field, outbox, QUERY_DEV_CAP_RSS_OFFSET); + if (field & 0x20) + dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_RSS_XOR; + if (field & 0x10) + dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_RSS_TOP; + field &= 0xf; + if (field) { + dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_RSS; + dev_cap->max_rss_tbl_sz = 1 << field; + } else + dev_cap->max_rss_tbl_sz = 0; + MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_RDMA_OFFSET); + dev_cap->max_rdma_global = 1 << (field & 0x3f); + MLX4_GET(field, outbox, QUERY_DEV_CAP_ACK_DELAY_OFFSET); + dev_cap->local_ca_ack_delay = field & 0x1f; + MLX4_GET(field, outbox, QUERY_DEV_CAP_VL_PORT_OFFSET); + dev_cap->num_ports = field & 0xf; + MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_MSG_SZ_OFFSET); + dev_cap->max_msg_sz = 1 << (field & 0x1f); + MLX4_GET(field, outbox, QUERY_DEV_CAP_PORT_FLOWSTATS_COUNTERS_OFFSET); + if (field & 0x10) + dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_FLOWSTATS_EN; + MLX4_GET(field, outbox, QUERY_DEV_CAP_FLOW_STEERING_RANGE_EN_OFFSET); + if (field & 0x80) + dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_FS_EN; + dev_cap->fs_log_max_ucast_qp_range_size = field & 0x1f; + if (field & 0x20) + dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_DMFS_UC_MC_SNIFFER; + MLX4_GET(field, outbox, QUERY_DEV_CAP_PORT_BEACON_OFFSET); + if (field & 0x80) + dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_PORT_BEACON; + MLX4_GET(field, outbox, QUERY_DEV_CAP_FLOW_STEERING_IPOIB_OFFSET); + if (field & 0x80) + dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_DMFS_IPOIB; + MLX4_GET(field, outbox, QUERY_DEV_CAP_FLOW_STEERING_MAX_QP_OFFSET); + dev_cap->fs_max_num_qp_per_entry = field; + MLX4_GET(field, outbox, QUERY_DEV_CAP_SL2VL_EVENT_OFFSET); + if (field & (1 << 5)) + dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_SL_TO_VL_CHANGE_EVENT; + MLX4_GET(field, outbox, QUERY_DEV_CAP_ECN_QCN_VER_OFFSET); + if (field & 0x1) + dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_QCN; + MLX4_GET(stat_rate, outbox, QUERY_DEV_CAP_RATE_SUPPORT_OFFSET); + dev_cap->stat_rate_support = stat_rate; + MLX4_GET(field, outbox, QUERY_DEV_CAP_CQ_TS_SUPPORT_OFFSET); + if (field & 0x80) + dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_TS; + MLX4_GET(ext_flags, outbox, QUERY_DEV_CAP_EXT_FLAGS_OFFSET); + MLX4_GET(flags, outbox, QUERY_DEV_CAP_FLAGS_OFFSET); + dev_cap->flags = flags | (u64)ext_flags << 32; + MLX4_GET(field, outbox, QUERY_DEV_CAP_WOL_OFFSET); + dev_cap->wol_port[1] = !!(field & 0x20); + dev_cap->wol_port[2] = !!(field & 0x40); + MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_UAR_OFFSET); + dev_cap->reserved_uars = field >> 4; + MLX4_GET(field, outbox, QUERY_DEV_CAP_UAR_SZ_OFFSET); + dev_cap->uar_size = 1 << ((field & 0x3f) + 20); + MLX4_GET(field, outbox, QUERY_DEV_CAP_PAGE_SZ_OFFSET); + dev_cap->min_page_sz = 1 << field; + + MLX4_GET(field, outbox, QUERY_DEV_CAP_BF_OFFSET); + if (field & 0x80) { + MLX4_GET(field, outbox, QUERY_DEV_CAP_LOG_BF_REG_SZ_OFFSET); + dev_cap->bf_reg_size = 1 << (field & 0x1f); + MLX4_GET(field, outbox, QUERY_DEV_CAP_LOG_MAX_BF_REGS_PER_PAGE_OFFSET); + if ((1 << (field & 0x3f)) > (PAGE_SIZE / dev_cap->bf_reg_size)) + field = 3; + dev_cap->bf_regs_per_page = 1 << (field & 0x3f); + } else { + dev_cap->bf_reg_size = 0; + } + + MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_SG_SQ_OFFSET); + dev_cap->max_sq_sg = field; + MLX4_GET(size, outbox, QUERY_DEV_CAP_MAX_DESC_SZ_SQ_OFFSET); + dev_cap->max_sq_desc_sz = size; + + MLX4_GET(field, outbox, QUERY_DEV_CAP_USER_MAC_EN_OFFSET); + if (field & (1 << 2)) + dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_USER_MAC_EN; + MLX4_GET(field, outbox, QUERY_DEV_CAP_SVLAN_BY_QP_OFFSET); + if (field & 0x1) + dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_SVLAN_BY_QP; + MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_QP_MCG_OFFSET); + dev_cap->max_qp_per_mcg = 1 << field; + MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_MCG_OFFSET); + dev_cap->reserved_mgms = field & 0xf; + MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_MCG_OFFSET); + dev_cap->max_mcgs = 1 << field; + MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_PD_OFFSET); + dev_cap->reserved_pds = field >> 4; + MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_PD_OFFSET); + dev_cap->max_pds = 1 << (field & 0x3f); + MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_XRC_OFFSET); + dev_cap->reserved_xrcds = field >> 4; + MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_XRC_OFFSET); + dev_cap->max_xrcds = 1 << (field & 0x1f); + + MLX4_GET(size, outbox, QUERY_DEV_CAP_RDMARC_ENTRY_SZ_OFFSET); + dev_cap->rdmarc_entry_sz = size; + MLX4_GET(size, outbox, QUERY_DEV_CAP_QPC_ENTRY_SZ_OFFSET); + dev_cap->qpc_entry_sz = size; + MLX4_GET(size, outbox, QUERY_DEV_CAP_AUX_ENTRY_SZ_OFFSET); + dev_cap->aux_entry_sz = size; + MLX4_GET(size, outbox, QUERY_DEV_CAP_ALTC_ENTRY_SZ_OFFSET); + dev_cap->altc_entry_sz = size; + MLX4_GET(size, outbox, QUERY_DEV_CAP_EQC_ENTRY_SZ_OFFSET); + dev_cap->eqc_entry_sz = size; + MLX4_GET(size, outbox, QUERY_DEV_CAP_CQC_ENTRY_SZ_OFFSET); + dev_cap->cqc_entry_sz = size; + MLX4_GET(size, outbox, QUERY_DEV_CAP_SRQ_ENTRY_SZ_OFFSET); + dev_cap->srq_entry_sz = size; + MLX4_GET(size, outbox, QUERY_DEV_CAP_C_MPT_ENTRY_SZ_OFFSET); + dev_cap->cmpt_entry_sz = size; + MLX4_GET(size, outbox, QUERY_DEV_CAP_MTT_ENTRY_SZ_OFFSET); + dev_cap->mtt_entry_sz = size; + MLX4_GET(size, outbox, QUERY_DEV_CAP_D_MPT_ENTRY_SZ_OFFSET); + dev_cap->dmpt_entry_sz = size; + + MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_SRQ_SZ_OFFSET); + dev_cap->max_srq_sz = 1 << field; + MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_QP_SZ_OFFSET); + dev_cap->max_qp_sz = 1 << field; + MLX4_GET(field, outbox, QUERY_DEV_CAP_RSZ_SRQ_OFFSET); + dev_cap->resize_srq = field & 1; + MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_SG_RQ_OFFSET); + dev_cap->max_rq_sg = field; + MLX4_GET(size, outbox, QUERY_DEV_CAP_MAX_DESC_SZ_RQ_OFFSET); + dev_cap->max_rq_desc_sz = size; + MLX4_GET(field, outbox, QUERY_DEV_CAP_CQ_EQ_CACHE_LINE_STRIDE); + if (field & (1 << 4)) + dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_QOS_VPP; + if (field & (1 << 5)) + dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_ETH_PROT_CTRL; + if (field & (1 << 6)) + dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_CQE_STRIDE; + if (field & (1 << 7)) + dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_EQE_STRIDE; + MLX4_GET(dev_cap->bmme_flags, outbox, + QUERY_DEV_CAP_BMME_FLAGS_OFFSET); + if (dev_cap->bmme_flags & MLX4_FLAG_ROCE_V1_V2) + dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_ROCE_V1_V2; + if (dev_cap->bmme_flags & MLX4_FLAG_PORT_REMAP) + dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_PORT_REMAP; + MLX4_GET(field, outbox, QUERY_DEV_CAP_CONFIG_DEV_OFFSET); + if (field & 0x20) + dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_CONFIG_DEV; + if (field & (1 << 2)) + dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_IGNORE_FCS; + MLX4_GET(field, outbox, QUERY_DEV_CAP_PHV_EN_OFFSET); + if (field & 0x80) + dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_PHV_EN; + if (field & 0x40) + dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_SKIP_OUTER_VLAN; + + MLX4_GET(dev_cap->reserved_lkey, outbox, + QUERY_DEV_CAP_RSVD_LKEY_OFFSET); + MLX4_GET(field32, outbox, QUERY_DEV_CAP_ETH_BACKPL_OFFSET); + if (field32 & (1 << 0)) + dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_ETH_BACKPL_AN_REP; + if (field32 & (1 << 7)) + dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_RECOVERABLE_ERROR_EVENT; + if (field32 & (1 << 8)) + dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_DRIVER_VERSION_TO_FW; + MLX4_GET(field32, outbox, QUERY_DEV_CAP_DIAG_RPRT_PER_PORT); + if (field32 & (1 << 17)) + dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_DIAG_PER_PORT; + MLX4_GET(field, outbox, QUERY_DEV_CAP_FW_REASSIGN_MAC); + if (field & 1<<6) + dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_REASSIGN_MAC_EN; + MLX4_GET(field, outbox, QUERY_DEV_CAP_VXLAN); + if (field & 1<<3) + dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_VXLAN_OFFLOADS; + if (field & (1 << 5)) + dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_ETS_CFG; + MLX4_GET(dev_cap->max_icm_sz, outbox, + QUERY_DEV_CAP_MAX_ICM_SZ_OFFSET); + if (dev_cap->flags & MLX4_DEV_CAP_FLAG_COUNTERS) + MLX4_GET(dev_cap->max_counters, outbox, + QUERY_DEV_CAP_MAX_COUNTERS_OFFSET); + + MLX4_GET(field32, outbox, + QUERY_DEV_CAP_MAD_DEMUX_OFFSET); + if (field32 & (1 << 0)) + dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_MAD_DEMUX; + + MLX4_GET(dev_cap->dmfs_high_rate_qpn_base, outbox, + QUERY_DEV_CAP_DMFS_HIGH_RATE_QPN_BASE_OFFSET); + dev_cap->dmfs_high_rate_qpn_base &= MGM_QPN_MASK; + MLX4_GET(dev_cap->dmfs_high_rate_qpn_range, outbox, + QUERY_DEV_CAP_DMFS_HIGH_RATE_QPN_RANGE_OFFSET); + dev_cap->dmfs_high_rate_qpn_range &= MGM_QPN_MASK; + + MLX4_GET(size, outbox, QUERY_DEV_CAP_QP_RATE_LIMIT_NUM_OFFSET); + dev_cap->rl_caps.num_rates = size; + if (dev_cap->rl_caps.num_rates) { + dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_QP_RATE_LIMIT; + MLX4_GET(size, outbox, QUERY_DEV_CAP_QP_RATE_LIMIT_MAX_OFFSET); + dev_cap->rl_caps.max_val = size & 0xfff; + dev_cap->rl_caps.max_unit = size >> 14; + MLX4_GET(size, outbox, QUERY_DEV_CAP_QP_RATE_LIMIT_MIN_OFFSET); + dev_cap->rl_caps.min_val = size & 0xfff; + dev_cap->rl_caps.min_unit = size >> 14; + } + + MLX4_GET(dev_cap->health_buffer_addrs, outbox, + QUERY_DEV_CAP_HEALTH_BUFFER_ADDRESS_OFFSET); + + MLX4_GET(field32, outbox, QUERY_DEV_CAP_EXT_2_FLAGS_OFFSET); + if (field32 & (1 << 16)) + dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_UPDATE_QP; + if (field32 & (1 << 18)) + dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_UPDATE_QP_SRC_CHECK_LB; + if (field32 & (1 << 19)) + dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_LB_SRC_CHK; + if (field32 & (1 << 26)) + dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_VLAN_CONTROL; + if (field32 & (1 << 20)) + dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_FSM; + if (field32 & (1 << 21)) + dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_80_VFS; + if (field32 & (1 << 23)) + dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_SW_CQ_INIT; + + for (i = 1; i <= dev_cap->num_ports; i++) { + err = mlx4_QUERY_PORT(dev, i, dev_cap->port_cap + i); + if (err) + goto out; + } + + /* + * Each UAR has 4 EQ doorbells; so if a UAR is reserved, then + * we can't use any EQs whose doorbell falls on that page, + * even if the EQ itself isn't reserved. + */ + if (dev_cap->num_sys_eqs == 0) + dev_cap->reserved_eqs = max(dev_cap->reserved_uars * 4, + dev_cap->reserved_eqs); + else + dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_SYS_EQS; + +out: + mlx4_free_cmd_mailbox(dev, mailbox); + return err; +} + +void mlx4_dev_cap_dump(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) +{ + if (dev_cap->bf_reg_size > 0) + mlx4_dbg(dev, "BlueFlame available (reg size %d, regs/page %d)\n", + dev_cap->bf_reg_size, dev_cap->bf_regs_per_page); + else + mlx4_dbg(dev, "BlueFlame not available\n"); + + mlx4_dbg(dev, "Base MM extensions: flags %08x, rsvd L_Key %08x\n", + dev_cap->bmme_flags, dev_cap->reserved_lkey); + mlx4_dbg(dev, "Max ICM size %lld MB\n", + (unsigned long long) dev_cap->max_icm_sz >> 20); + mlx4_dbg(dev, "Max QPs: %d, reserved QPs: %d, entry size: %d\n", + dev_cap->max_qps, dev_cap->reserved_qps, dev_cap->qpc_entry_sz); + mlx4_dbg(dev, "Max SRQs: %d, reserved SRQs: %d, entry size: %d\n", + dev_cap->max_srqs, dev_cap->reserved_srqs, dev_cap->srq_entry_sz); + mlx4_dbg(dev, "Max CQs: %d, reserved CQs: %d, entry size: %d\n", + dev_cap->max_cqs, dev_cap->reserved_cqs, dev_cap->cqc_entry_sz); + mlx4_dbg(dev, "Num sys EQs: %d, max EQs: %d, reserved EQs: %d, entry size: %d\n", + dev_cap->num_sys_eqs, dev_cap->max_eqs, dev_cap->reserved_eqs, + dev_cap->eqc_entry_sz); + mlx4_dbg(dev, "reserved MPTs: %d, reserved MTTs: %d\n", + dev_cap->reserved_mrws, dev_cap->reserved_mtts); + mlx4_dbg(dev, "Max PDs: %d, reserved PDs: %d, reserved UARs: %d\n", + dev_cap->max_pds, dev_cap->reserved_pds, dev_cap->reserved_uars); + mlx4_dbg(dev, "Max QP/MCG: %d, reserved MGMs: %d\n", + dev_cap->max_pds, dev_cap->reserved_mgms); + mlx4_dbg(dev, "Max CQEs: %d, max WQEs: %d, max SRQ WQEs: %d\n", + dev_cap->max_cq_sz, dev_cap->max_qp_sz, dev_cap->max_srq_sz); + mlx4_dbg(dev, "Local CA ACK delay: %d, max MTU: %d, port width cap: %d\n", + dev_cap->local_ca_ack_delay, 128 << dev_cap->port_cap[1].ib_mtu, + dev_cap->port_cap[1].max_port_width); + mlx4_dbg(dev, "Max SQ desc size: %d, max SQ S/G: %d\n", + dev_cap->max_sq_desc_sz, dev_cap->max_sq_sg); + mlx4_dbg(dev, "Max RQ desc size: %d, max RQ S/G: %d\n", + dev_cap->max_rq_desc_sz, dev_cap->max_rq_sg); + mlx4_dbg(dev, "Max GSO size: %d\n", dev_cap->max_gso_sz); + mlx4_dbg(dev, "Max counters: %d\n", dev_cap->max_counters); + mlx4_dbg(dev, "Max RSS Table size: %d\n", dev_cap->max_rss_tbl_sz); + mlx4_dbg(dev, "DMFS high rate steer QPn base: %d\n", + dev_cap->dmfs_high_rate_qpn_base); + mlx4_dbg(dev, "DMFS high rate steer QPn range: %d\n", + dev_cap->dmfs_high_rate_qpn_range); + + if (dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_QP_RATE_LIMIT) { + struct mlx4_rate_limit_caps *rl_caps = &dev_cap->rl_caps; + + mlx4_dbg(dev, "QP Rate-Limit: #rates %d, unit/val max %d/%d, min %d/%d\n", + rl_caps->num_rates, rl_caps->max_unit, rl_caps->max_val, + rl_caps->min_unit, rl_caps->min_val); + } + + dump_dev_cap_flags(dev, dev_cap->flags); + dump_dev_cap_flags2(dev, dev_cap->flags2); +} + +int mlx4_QUERY_PORT(struct mlx4_dev *dev, int port, struct mlx4_port_cap *port_cap) +{ + struct mlx4_cmd_mailbox *mailbox; + u32 *outbox; + u8 field; + u32 field32; + int err; + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + outbox = mailbox->buf; + + if (dev->flags & MLX4_FLAG_OLD_PORT_CMDS) { + err = mlx4_cmd_box(dev, 0, mailbox->dma, 0, 0, MLX4_CMD_QUERY_DEV_CAP, + MLX4_CMD_TIME_CLASS_A, + MLX4_CMD_NATIVE); + + if (err) + goto out; + + MLX4_GET(field, outbox, QUERY_DEV_CAP_VL_PORT_OFFSET); + port_cap->max_vl = field >> 4; + MLX4_GET(field, outbox, QUERY_DEV_CAP_MTU_WIDTH_OFFSET); + port_cap->ib_mtu = field >> 4; + port_cap->max_port_width = field & 0xf; + MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_GID_OFFSET); + port_cap->max_gids = 1 << (field & 0xf); + MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_PKEY_OFFSET); + port_cap->max_pkeys = 1 << (field & 0xf); + } else { +#define QUERY_PORT_SUPPORTED_TYPE_OFFSET 0x00 +#define QUERY_PORT_MTU_OFFSET 0x01 +#define QUERY_PORT_ETH_MTU_OFFSET 0x02 +#define QUERY_PORT_WIDTH_OFFSET 0x06 +#define QUERY_PORT_MAX_GID_PKEY_OFFSET 0x07 +#define QUERY_PORT_MAX_MACVLAN_OFFSET 0x0a +#define QUERY_PORT_MAX_VL_OFFSET 0x0b +#define QUERY_PORT_MAC_OFFSET 0x10 +#define QUERY_PORT_TRANS_VENDOR_OFFSET 0x18 +#define QUERY_PORT_WAVELENGTH_OFFSET 0x1c +#define QUERY_PORT_TRANS_CODE_OFFSET 0x20 + + err = mlx4_cmd_box(dev, 0, mailbox->dma, port, 0, MLX4_CMD_QUERY_PORT, + MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE); + if (err) + goto out; + + MLX4_GET(field, outbox, QUERY_PORT_SUPPORTED_TYPE_OFFSET); + port_cap->link_state = (field & 0x80) >> 7; + port_cap->supported_port_types = field & 3; + port_cap->suggested_type = (field >> 3) & 1; + port_cap->default_sense = (field >> 4) & 1; + port_cap->dmfs_optimized_state = (field >> 5) & 1; + MLX4_GET(field, outbox, QUERY_PORT_MTU_OFFSET); + port_cap->ib_mtu = field & 0xf; + MLX4_GET(field, outbox, QUERY_PORT_WIDTH_OFFSET); + port_cap->max_port_width = field & 0xf; + MLX4_GET(field, outbox, QUERY_PORT_MAX_GID_PKEY_OFFSET); + port_cap->max_gids = 1 << (field >> 4); + port_cap->max_pkeys = 1 << (field & 0xf); + MLX4_GET(field, outbox, QUERY_PORT_MAX_VL_OFFSET); + port_cap->max_vl = field & 0xf; + port_cap->max_tc_eth = field >> 4; + MLX4_GET(field, outbox, QUERY_PORT_MAX_MACVLAN_OFFSET); + port_cap->log_max_macs = field & 0xf; + port_cap->log_max_vlans = field >> 4; + MLX4_GET(port_cap->eth_mtu, outbox, QUERY_PORT_ETH_MTU_OFFSET); + MLX4_GET(port_cap->def_mac, outbox, QUERY_PORT_MAC_OFFSET); + MLX4_GET(field32, outbox, QUERY_PORT_TRANS_VENDOR_OFFSET); + port_cap->trans_type = field32 >> 24; + port_cap->vendor_oui = field32 & 0xffffff; + MLX4_GET(port_cap->wavelength, outbox, QUERY_PORT_WAVELENGTH_OFFSET); + MLX4_GET(port_cap->trans_code, outbox, QUERY_PORT_TRANS_CODE_OFFSET); + } + +out: + mlx4_free_cmd_mailbox(dev, mailbox); + return err; +} + +#define DEV_CAP_EXT_2_FLAG_PFC_COUNTERS (1 << 28) +#define DEV_CAP_EXT_2_FLAG_VLAN_CONTROL (1 << 26) +#define DEV_CAP_EXT_2_FLAG_80_VFS (1 << 21) +#define DEV_CAP_EXT_2_FLAG_FSM (1 << 20) + +int mlx4_QUERY_DEV_CAP_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd) +{ + u64 flags; + int err = 0; + u8 field; + u16 field16; + u32 bmme_flags, field32; + int real_port; + int slave_port; + int first_port; + struct mlx4_active_ports actv_ports; + + err = mlx4_cmd_box(dev, 0, outbox->dma, 0, 0, MLX4_CMD_QUERY_DEV_CAP, + MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); + if (err) + return err; + + disable_unsupported_roce_caps(outbox->buf); + /* add port mng change event capability and disable mw type 1 + * unconditionally to slaves + */ + MLX4_GET(flags, outbox->buf, QUERY_DEV_CAP_EXT_FLAGS_OFFSET); + flags |= MLX4_DEV_CAP_FLAG_PORT_MNG_CHG_EV; + flags &= ~MLX4_DEV_CAP_FLAG_MEM_WINDOW; + actv_ports = mlx4_get_active_ports(dev, slave); + first_port = find_first_bit(actv_ports.ports, dev->caps.num_ports); + for (slave_port = 0, real_port = first_port; + real_port < first_port + + bitmap_weight(actv_ports.ports, dev->caps.num_ports); + ++real_port, ++slave_port) { + if (flags & (MLX4_DEV_CAP_FLAG_WOL_PORT1 << real_port)) + flags |= MLX4_DEV_CAP_FLAG_WOL_PORT1 << slave_port; + else + flags &= ~(MLX4_DEV_CAP_FLAG_WOL_PORT1 << slave_port); + } + for (; slave_port < dev->caps.num_ports; ++slave_port) + flags &= ~(MLX4_DEV_CAP_FLAG_WOL_PORT1 << slave_port); + + /* Not exposing RSS IP fragments to guests */ + flags &= ~MLX4_DEV_CAP_FLAG_RSS_IP_FRAG; + MLX4_PUT(outbox->buf, flags, QUERY_DEV_CAP_EXT_FLAGS_OFFSET); + + MLX4_GET(field, outbox->buf, QUERY_DEV_CAP_VL_PORT_OFFSET); + field &= ~0x0F; + field |= bitmap_weight(actv_ports.ports, dev->caps.num_ports) & 0x0F; + MLX4_PUT(outbox->buf, field, QUERY_DEV_CAP_VL_PORT_OFFSET); + + /* For guests, disable timestamp */ + MLX4_GET(field, outbox->buf, QUERY_DEV_CAP_CQ_TS_SUPPORT_OFFSET); + field &= 0x7f; + MLX4_PUT(outbox->buf, field, QUERY_DEV_CAP_CQ_TS_SUPPORT_OFFSET); + + /* For guests, disable vxlan tunneling and QoS support */ + MLX4_GET(field, outbox->buf, QUERY_DEV_CAP_VXLAN); + field &= 0xd7; + MLX4_PUT(outbox->buf, field, QUERY_DEV_CAP_VXLAN); + + /* For guests, disable port BEACON */ + MLX4_GET(field, outbox->buf, QUERY_DEV_CAP_PORT_BEACON_OFFSET); + field &= 0x7f; + MLX4_PUT(outbox->buf, field, QUERY_DEV_CAP_PORT_BEACON_OFFSET); + + /* For guests, report Blueflame disabled */ + MLX4_GET(field, outbox->buf, QUERY_DEV_CAP_BF_OFFSET); + field &= 0x7f; + MLX4_PUT(outbox->buf, field, QUERY_DEV_CAP_BF_OFFSET); + + /* For guests, disable mw type 2 and port remap*/ + MLX4_GET(bmme_flags, outbox->buf, QUERY_DEV_CAP_BMME_FLAGS_OFFSET); + bmme_flags &= ~MLX4_BMME_FLAG_TYPE_2_WIN; + bmme_flags &= ~MLX4_FLAG_PORT_REMAP; + MLX4_PUT(outbox->buf, bmme_flags, QUERY_DEV_CAP_BMME_FLAGS_OFFSET); + + /* turn off device-managed steering capability if not enabled */ + if (dev->caps.steering_mode != MLX4_STEERING_MODE_DEVICE_MANAGED) { + MLX4_GET(field, outbox->buf, + QUERY_DEV_CAP_FLOW_STEERING_RANGE_EN_OFFSET); + field &= 0x7f; + MLX4_PUT(outbox->buf, field, + QUERY_DEV_CAP_FLOW_STEERING_RANGE_EN_OFFSET); + } + + /* turn off ipoib managed steering for guests */ + MLX4_GET(field, outbox->buf, QUERY_DEV_CAP_FLOW_STEERING_IPOIB_OFFSET); + field &= ~0x80; + MLX4_PUT(outbox->buf, field, QUERY_DEV_CAP_FLOW_STEERING_IPOIB_OFFSET); + + /* turn off host side virt features (VST, FSM, etc) for guests */ + MLX4_GET(field32, outbox->buf, QUERY_DEV_CAP_EXT_2_FLAGS_OFFSET); + field32 &= ~(DEV_CAP_EXT_2_FLAG_VLAN_CONTROL | DEV_CAP_EXT_2_FLAG_80_VFS | + DEV_CAP_EXT_2_FLAG_FSM | DEV_CAP_EXT_2_FLAG_PFC_COUNTERS); + MLX4_PUT(outbox->buf, field32, QUERY_DEV_CAP_EXT_2_FLAGS_OFFSET); + + /* turn off QCN for guests */ + MLX4_GET(field, outbox->buf, QUERY_DEV_CAP_ECN_QCN_VER_OFFSET); + field &= 0xfe; + MLX4_PUT(outbox->buf, field, QUERY_DEV_CAP_ECN_QCN_VER_OFFSET); + + /* turn off QP max-rate limiting for guests */ + field16 = 0; + MLX4_PUT(outbox->buf, field16, QUERY_DEV_CAP_QP_RATE_LIMIT_NUM_OFFSET); + + /* turn off QoS per VF support for guests */ + MLX4_GET(field, outbox->buf, QUERY_DEV_CAP_CQ_EQ_CACHE_LINE_STRIDE); + field &= 0xef; + MLX4_PUT(outbox->buf, field, QUERY_DEV_CAP_CQ_EQ_CACHE_LINE_STRIDE); + + /* turn off ignore FCS feature for guests */ + MLX4_GET(field, outbox->buf, QUERY_DEV_CAP_CONFIG_DEV_OFFSET); + field &= 0xfb; + MLX4_PUT(outbox->buf, field, QUERY_DEV_CAP_CONFIG_DEV_OFFSET); + + return 0; +} + +static void disable_unsupported_roce_caps(void *buf) +{ + u32 flags; + + MLX4_GET(flags, buf, QUERY_DEV_CAP_EXT_FLAGS_OFFSET); + flags &= ~(1UL << 31); + MLX4_PUT(buf, flags, QUERY_DEV_CAP_EXT_FLAGS_OFFSET); + MLX4_GET(flags, buf, QUERY_DEV_CAP_EXT_2_FLAGS_OFFSET); + flags &= ~(1UL << 24); + MLX4_PUT(buf, flags, QUERY_DEV_CAP_EXT_2_FLAGS_OFFSET); + MLX4_GET(flags, buf, QUERY_DEV_CAP_BMME_FLAGS_OFFSET); + flags &= ~(MLX4_FLAG_ROCE_V1_V2); + MLX4_PUT(buf, flags, QUERY_DEV_CAP_BMME_FLAGS_OFFSET); +} + +int mlx4_QUERY_PORT_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + u64 def_mac; + u8 port_type; + u16 short_field; + int err; + int admin_link_state; + int port = mlx4_slave_convert_port(dev, slave, + vhcr->in_modifier & 0xFF); + +#define MLX4_VF_PORT_NO_LINK_SENSE_MASK 0xE0 +#define MLX4_PORT_LINK_UP_MASK 0x80 +#define QUERY_PORT_CUR_MAX_PKEY_OFFSET 0x0c +#define QUERY_PORT_CUR_MAX_GID_OFFSET 0x0e + + if (port < 0) + return -EINVAL; + + /* Protect against untrusted guests: enforce that this is the + * QUERY_PORT general query. + */ + if (vhcr->op_modifier || vhcr->in_modifier & ~0xFF) + return -EINVAL; + + vhcr->in_modifier = port; + + err = mlx4_cmd_box(dev, 0, outbox->dma, vhcr->in_modifier, 0, + MLX4_CMD_QUERY_PORT, MLX4_CMD_TIME_CLASS_B, + MLX4_CMD_NATIVE); + + if (!err && dev->caps.function != slave) { + def_mac = priv->mfunc.master.vf_oper[slave].vport[vhcr->in_modifier].state.mac; + MLX4_PUT(outbox->buf, def_mac, QUERY_PORT_MAC_OFFSET); + + /* get port type - currently only eth is enabled */ + MLX4_GET(port_type, outbox->buf, + QUERY_PORT_SUPPORTED_TYPE_OFFSET); + + /* No link sensing allowed */ + port_type &= MLX4_VF_PORT_NO_LINK_SENSE_MASK; + /* set port type to currently operating port type */ + port_type |= (dev->caps.port_type[vhcr->in_modifier] & 0x3); + + admin_link_state = priv->mfunc.master.vf_oper[slave].vport[vhcr->in_modifier].state.link_state; + if (IFLA_VF_LINK_STATE_ENABLE == admin_link_state) + port_type |= MLX4_PORT_LINK_UP_MASK; + else if (IFLA_VF_LINK_STATE_DISABLE == admin_link_state) + port_type &= ~MLX4_PORT_LINK_UP_MASK; + else if (IFLA_VF_LINK_STATE_AUTO == admin_link_state && mlx4_is_bonded(dev)) { + int other_port = (port == 1) ? 2 : 1; + struct mlx4_port_cap port_cap; + + err = mlx4_QUERY_PORT(dev, other_port, &port_cap); + if (err) + goto out; + port_type |= (port_cap.link_state << 7); + } + + MLX4_PUT(outbox->buf, port_type, + QUERY_PORT_SUPPORTED_TYPE_OFFSET); + + if (dev->caps.port_type[vhcr->in_modifier] == MLX4_PORT_TYPE_ETH) + short_field = mlx4_get_slave_num_gids(dev, slave, port); + else + short_field = 1; /* slave max gids */ + MLX4_PUT(outbox->buf, short_field, + QUERY_PORT_CUR_MAX_GID_OFFSET); + + short_field = dev->caps.pkey_table_len[vhcr->in_modifier]; + MLX4_PUT(outbox->buf, short_field, + QUERY_PORT_CUR_MAX_PKEY_OFFSET); + } +out: + return err; +} + +int mlx4_get_slave_pkey_gid_tbl_len(struct mlx4_dev *dev, u8 port, + int *gid_tbl_len, int *pkey_tbl_len) +{ + struct mlx4_cmd_mailbox *mailbox; + u32 *outbox; + u16 field; + int err; + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + + err = mlx4_cmd_box(dev, 0, mailbox->dma, port, 0, + MLX4_CMD_QUERY_PORT, MLX4_CMD_TIME_CLASS_B, + MLX4_CMD_WRAPPED); + if (err) + goto out; + + outbox = mailbox->buf; + + MLX4_GET(field, outbox, QUERY_PORT_CUR_MAX_GID_OFFSET); + *gid_tbl_len = field; + + MLX4_GET(field, outbox, QUERY_PORT_CUR_MAX_PKEY_OFFSET); + *pkey_tbl_len = field; + +out: + mlx4_free_cmd_mailbox(dev, mailbox); + return err; +} +EXPORT_SYMBOL(mlx4_get_slave_pkey_gid_tbl_len); + +int mlx4_map_cmd(struct mlx4_dev *dev, u16 op, struct mlx4_icm *icm, u64 virt) +{ + struct mlx4_cmd_mailbox *mailbox; + struct mlx4_icm_iter iter; + __be64 *pages; + int lg; + int nent = 0; + int i; + int err = 0; + int ts = 0, tc = 0; + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + pages = mailbox->buf; + + for (mlx4_icm_first(icm, &iter); + !mlx4_icm_last(&iter); + mlx4_icm_next(&iter)) { + /* + * We have to pass pages that are aligned to their + * size, so find the least significant 1 in the + * address or size and use that as our log2 size. + */ + lg = ffs(mlx4_icm_addr(&iter) | mlx4_icm_size(&iter)) - 1; + if (lg < MLX4_ICM_PAGE_SHIFT) { + mlx4_warn(dev, "Got FW area not aligned to %d (%llx/%lx)\n", + MLX4_ICM_PAGE_SIZE, + (unsigned long long) mlx4_icm_addr(&iter), + mlx4_icm_size(&iter)); + err = -EINVAL; + goto out; + } + + for (i = 0; i < mlx4_icm_size(&iter) >> lg; ++i) { + if (virt != -1) { + pages[nent * 2] = cpu_to_be64(virt); + virt += 1ULL << lg; + } + + pages[nent * 2 + 1] = + cpu_to_be64((mlx4_icm_addr(&iter) + (i << lg)) | + (lg - MLX4_ICM_PAGE_SHIFT)); + ts += 1 << (lg - 10); + ++tc; + + if (++nent == MLX4_MAILBOX_SIZE / 16) { + err = mlx4_cmd(dev, mailbox->dma, nent, 0, op, + MLX4_CMD_TIME_CLASS_B, + MLX4_CMD_NATIVE); + if (err) + goto out; + nent = 0; + } + } + } + + if (nent) + err = mlx4_cmd(dev, mailbox->dma, nent, 0, op, + MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE); + if (err) + goto out; + + switch (op) { + case MLX4_CMD_MAP_FA: + mlx4_dbg(dev, "Mapped %d chunks/%d KB for FW\n", tc, ts); + break; + case MLX4_CMD_MAP_ICM_AUX: + mlx4_dbg(dev, "Mapped %d chunks/%d KB for ICM aux\n", tc, ts); + break; + case MLX4_CMD_MAP_ICM: + mlx4_dbg(dev, "Mapped %d chunks/%d KB at %llx for ICM\n", + tc, ts, (unsigned long long) virt - (ts << 10)); + break; + } + +out: + mlx4_free_cmd_mailbox(dev, mailbox); + return err; +} + +int mlx4_MAP_FA(struct mlx4_dev *dev, struct mlx4_icm *icm) +{ + return mlx4_map_cmd(dev, MLX4_CMD_MAP_FA, icm, -1); +} + +int mlx4_UNMAP_FA(struct mlx4_dev *dev) +{ + return mlx4_cmd(dev, 0, 0, 0, MLX4_CMD_UNMAP_FA, + MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE); +} + + +int mlx4_RUN_FW(struct mlx4_dev *dev) +{ + return mlx4_cmd(dev, 0, 0, 0, MLX4_CMD_RUN_FW, + MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); +} + +int mlx4_QUERY_FW(struct mlx4_dev *dev) +{ + struct mlx4_fw *fw = &mlx4_priv(dev)->fw; + struct mlx4_cmd *cmd = &mlx4_priv(dev)->cmd; + struct mlx4_cmd_mailbox *mailbox; + u32 *outbox; + int err = 0; + u64 fw_ver; + u16 cmd_if_rev; + u8 lg; + +#define QUERY_FW_OUT_SIZE 0x100 +#define QUERY_FW_VER_OFFSET 0x00 +#define QUERY_FW_PPF_ID 0x09 +#define QUERY_FW_CMD_IF_REV_OFFSET 0x0a +#define QUERY_FW_MAX_CMD_OFFSET 0x0f +#define QUERY_FW_ERR_START_OFFSET 0x30 +#define QUERY_FW_ERR_SIZE_OFFSET 0x38 +#define QUERY_FW_ERR_BAR_OFFSET 0x3c + +#define QUERY_FW_SIZE_OFFSET 0x00 +#define QUERY_FW_CLR_INT_BASE_OFFSET 0x20 +#define QUERY_FW_CLR_INT_BAR_OFFSET 0x28 + +#define QUERY_FW_COMM_BASE_OFFSET 0x40 +#define QUERY_FW_COMM_BAR_OFFSET 0x48 + +#define QUERY_FW_CLOCK_OFFSET 0x50 +#define QUERY_FW_CLOCK_BAR 0x58 + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + outbox = mailbox->buf; + + err = mlx4_cmd_box(dev, 0, mailbox->dma, 0, 0, MLX4_CMD_QUERY_FW, + MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); + if (err) + goto out; + + MLX4_GET(fw_ver, outbox, QUERY_FW_VER_OFFSET); + /* + * FW subminor version is at more significant bits than minor + * version, so swap here. + */ + dev->caps.fw_ver = (fw_ver & 0xffff00000000ull) | + ((fw_ver & 0xffff0000ull) >> 16) | + ((fw_ver & 0x0000ffffull) << 16); + + MLX4_GET(lg, outbox, QUERY_FW_PPF_ID); + dev->caps.function = lg; + + if (mlx4_is_slave(dev)) + goto out; + + + MLX4_GET(cmd_if_rev, outbox, QUERY_FW_CMD_IF_REV_OFFSET); + if (cmd_if_rev < MLX4_COMMAND_INTERFACE_MIN_REV || + cmd_if_rev > MLX4_COMMAND_INTERFACE_MAX_REV) { + mlx4_err(dev, "Installed FW has unsupported command interface revision %d\n", + cmd_if_rev); + mlx4_err(dev, "(Installed FW version is %d.%d.%03d)\n", + (int) (dev->caps.fw_ver >> 32), + (int) (dev->caps.fw_ver >> 16) & 0xffff, + (int) dev->caps.fw_ver & 0xffff); + mlx4_err(dev, "This driver version supports only revisions %d to %d\n", + MLX4_COMMAND_INTERFACE_MIN_REV, MLX4_COMMAND_INTERFACE_MAX_REV); + err = -ENODEV; + goto out; + } + + if (cmd_if_rev < MLX4_COMMAND_INTERFACE_NEW_PORT_CMDS) + dev->flags |= MLX4_FLAG_OLD_PORT_CMDS; + + MLX4_GET(lg, outbox, QUERY_FW_MAX_CMD_OFFSET); + cmd->max_cmds = 1 << lg; + + mlx4_dbg(dev, "FW version %d.%d.%03d (cmd intf rev %d), max commands %d\n", + (int) (dev->caps.fw_ver >> 32), + (int) (dev->caps.fw_ver >> 16) & 0xffff, + (int) dev->caps.fw_ver & 0xffff, + cmd_if_rev, cmd->max_cmds); + + MLX4_GET(fw->catas_offset, outbox, QUERY_FW_ERR_START_OFFSET); + MLX4_GET(fw->catas_size, outbox, QUERY_FW_ERR_SIZE_OFFSET); + MLX4_GET(fw->catas_bar, outbox, QUERY_FW_ERR_BAR_OFFSET); + fw->catas_bar = (fw->catas_bar >> 6) * 2; + + mlx4_dbg(dev, "Catastrophic error buffer at 0x%llx, size 0x%x, BAR %d\n", + (unsigned long long) fw->catas_offset, fw->catas_size, fw->catas_bar); + + MLX4_GET(fw->fw_pages, outbox, QUERY_FW_SIZE_OFFSET); + MLX4_GET(fw->clr_int_base, outbox, QUERY_FW_CLR_INT_BASE_OFFSET); + MLX4_GET(fw->clr_int_bar, outbox, QUERY_FW_CLR_INT_BAR_OFFSET); + fw->clr_int_bar = (fw->clr_int_bar >> 6) * 2; + + MLX4_GET(fw->comm_base, outbox, QUERY_FW_COMM_BASE_OFFSET); + MLX4_GET(fw->comm_bar, outbox, QUERY_FW_COMM_BAR_OFFSET); + fw->comm_bar = (fw->comm_bar >> 6) * 2; + mlx4_dbg(dev, "Communication vector bar:%d offset:0x%llx\n", + fw->comm_bar, fw->comm_base); + mlx4_dbg(dev, "FW size %d KB\n", fw->fw_pages >> 2); + + MLX4_GET(fw->clock_offset, outbox, QUERY_FW_CLOCK_OFFSET); + MLX4_GET(fw->clock_bar, outbox, QUERY_FW_CLOCK_BAR); + fw->clock_bar = (fw->clock_bar >> 6) * 2; + mlx4_dbg(dev, "Internal clock bar:%d offset:0x%llx\n", + fw->clock_bar, fw->clock_offset); + + /* + * Round up number of system pages needed in case + * MLX4_ICM_PAGE_SIZE < PAGE_SIZE. + */ + fw->fw_pages = + ALIGN(fw->fw_pages, PAGE_SIZE / MLX4_ICM_PAGE_SIZE) >> + (PAGE_SHIFT - MLX4_ICM_PAGE_SHIFT); + + mlx4_dbg(dev, "Clear int @ %llx, BAR %d\n", + (unsigned long long) fw->clr_int_base, fw->clr_int_bar); + +out: + mlx4_free_cmd_mailbox(dev, mailbox); + return err; +} + +int mlx4_QUERY_FW_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd) +{ + u8 *outbuf; + int err; + + outbuf = outbox->buf; + err = mlx4_cmd_box(dev, 0, outbox->dma, 0, 0, MLX4_CMD_QUERY_FW, + MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); + if (err) + return err; + + /* for slaves, set pci PPF ID to invalid and zero out everything + * else except FW version */ + outbuf[0] = outbuf[1] = 0; + memset(&outbuf[8], 0, QUERY_FW_OUT_SIZE - 8); + outbuf[QUERY_FW_PPF_ID] = MLX4_INVALID_SLAVE_ID; + + return 0; +} + +static void get_board_id(void *vsd, char *board_id) +{ + int i; + +#define VSD_OFFSET_SIG1 0x00 +#define VSD_OFFSET_SIG2 0xde +#define VSD_OFFSET_MLX_BOARD_ID 0xd0 +#define VSD_OFFSET_TS_BOARD_ID 0x20 + +#define VSD_SIGNATURE_TOPSPIN 0x5ad + + memset(board_id, 0, MLX4_BOARD_ID_LEN); + + if (be16_to_cpup(vsd + VSD_OFFSET_SIG1) == VSD_SIGNATURE_TOPSPIN && + be16_to_cpup(vsd + VSD_OFFSET_SIG2) == VSD_SIGNATURE_TOPSPIN) { + strscpy(board_id, vsd + VSD_OFFSET_TS_BOARD_ID, MLX4_BOARD_ID_LEN); + } else { + /* + * The board ID is a string but the firmware byte + * swaps each 4-byte word before passing it back to + * us. Therefore we need to swab it before printing. + */ + u32 *bid_u32 = (u32 *)board_id; + + for (i = 0; i < 4; ++i) { + u32 *addr; + u32 val; + + addr = (u32 *) (vsd + VSD_OFFSET_MLX_BOARD_ID + i * 4); + val = get_unaligned(addr); + val = swab32(val); + put_unaligned(val, &bid_u32[i]); + } + } +} + +int mlx4_QUERY_ADAPTER(struct mlx4_dev *dev, struct mlx4_adapter *adapter) +{ + struct mlx4_cmd_mailbox *mailbox; + u32 *outbox; + int err; + +#define QUERY_ADAPTER_OUT_SIZE 0x100 +#define QUERY_ADAPTER_INTA_PIN_OFFSET 0x10 +#define QUERY_ADAPTER_VSD_OFFSET 0x20 + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + outbox = mailbox->buf; + + err = mlx4_cmd_box(dev, 0, mailbox->dma, 0, 0, MLX4_CMD_QUERY_ADAPTER, + MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); + if (err) + goto out; + + MLX4_GET(adapter->inta_pin, outbox, QUERY_ADAPTER_INTA_PIN_OFFSET); + + get_board_id(outbox + QUERY_ADAPTER_VSD_OFFSET / 4, + adapter->board_id); + +out: + mlx4_free_cmd_mailbox(dev, mailbox); + return err; +} + +int mlx4_INIT_HCA(struct mlx4_dev *dev, struct mlx4_init_hca_param *param) +{ + struct mlx4_cmd_mailbox *mailbox; + __be32 *inbox; + int err; + static const u8 a0_dmfs_hw_steering[] = { + [MLX4_STEERING_DMFS_A0_DEFAULT] = 0, + [MLX4_STEERING_DMFS_A0_DYNAMIC] = 1, + [MLX4_STEERING_DMFS_A0_STATIC] = 2, + [MLX4_STEERING_DMFS_A0_DISABLE] = 3 + }; + +#define INIT_HCA_IN_SIZE 0x200 +#define INIT_HCA_VERSION_OFFSET 0x000 +#define INIT_HCA_VERSION 2 +#define INIT_HCA_VXLAN_OFFSET 0x0c +#define INIT_HCA_CACHELINE_SZ_OFFSET 0x0e +#define INIT_HCA_FLAGS_OFFSET 0x014 +#define INIT_HCA_RECOVERABLE_ERROR_EVENT_OFFSET 0x018 +#define INIT_HCA_QPC_OFFSET 0x020 +#define INIT_HCA_QPC_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x10) +#define INIT_HCA_LOG_QP_OFFSET (INIT_HCA_QPC_OFFSET + 0x17) +#define INIT_HCA_SRQC_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x28) +#define INIT_HCA_LOG_SRQ_OFFSET (INIT_HCA_QPC_OFFSET + 0x2f) +#define INIT_HCA_CQC_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x30) +#define INIT_HCA_LOG_CQ_OFFSET (INIT_HCA_QPC_OFFSET + 0x37) +#define INIT_HCA_EQE_CQE_OFFSETS (INIT_HCA_QPC_OFFSET + 0x38) +#define INIT_HCA_EQE_CQE_STRIDE_OFFSET (INIT_HCA_QPC_OFFSET + 0x3b) +#define INIT_HCA_ALTC_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x40) +#define INIT_HCA_AUXC_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x50) +#define INIT_HCA_EQC_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x60) +#define INIT_HCA_LOG_EQ_OFFSET (INIT_HCA_QPC_OFFSET + 0x67) +#define INIT_HCA_NUM_SYS_EQS_OFFSET (INIT_HCA_QPC_OFFSET + 0x6a) +#define INIT_HCA_RDMARC_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x70) +#define INIT_HCA_LOG_RD_OFFSET (INIT_HCA_QPC_OFFSET + 0x77) +#define INIT_HCA_MCAST_OFFSET 0x0c0 +#define INIT_HCA_MC_BASE_OFFSET (INIT_HCA_MCAST_OFFSET + 0x00) +#define INIT_HCA_LOG_MC_ENTRY_SZ_OFFSET (INIT_HCA_MCAST_OFFSET + 0x13) +#define INIT_HCA_LOG_MC_HASH_SZ_OFFSET (INIT_HCA_MCAST_OFFSET + 0x17) +#define INIT_HCA_UC_STEERING_OFFSET (INIT_HCA_MCAST_OFFSET + 0x18) +#define INIT_HCA_LOG_MC_TABLE_SZ_OFFSET (INIT_HCA_MCAST_OFFSET + 0x1b) +#define INIT_HCA_DEVICE_MANAGED_FLOW_STEERING_EN 0x6 +#define INIT_HCA_DRIVER_VERSION_OFFSET 0x140 +#define INIT_HCA_DRIVER_VERSION_SZ 0x40 +#define INIT_HCA_FS_PARAM_OFFSET 0x1d0 +#define INIT_HCA_FS_BASE_OFFSET (INIT_HCA_FS_PARAM_OFFSET + 0x00) +#define INIT_HCA_FS_LOG_ENTRY_SZ_OFFSET (INIT_HCA_FS_PARAM_OFFSET + 0x13) +#define INIT_HCA_FS_A0_OFFSET (INIT_HCA_FS_PARAM_OFFSET + 0x18) +#define INIT_HCA_FS_LOG_TABLE_SZ_OFFSET (INIT_HCA_FS_PARAM_OFFSET + 0x1b) +#define INIT_HCA_FS_ETH_BITS_OFFSET (INIT_HCA_FS_PARAM_OFFSET + 0x21) +#define INIT_HCA_FS_ETH_NUM_ADDRS_OFFSET (INIT_HCA_FS_PARAM_OFFSET + 0x22) +#define INIT_HCA_FS_IB_BITS_OFFSET (INIT_HCA_FS_PARAM_OFFSET + 0x25) +#define INIT_HCA_FS_IB_NUM_ADDRS_OFFSET (INIT_HCA_FS_PARAM_OFFSET + 0x26) +#define INIT_HCA_TPT_OFFSET 0x0f0 +#define INIT_HCA_DMPT_BASE_OFFSET (INIT_HCA_TPT_OFFSET + 0x00) +#define INIT_HCA_TPT_MW_OFFSET (INIT_HCA_TPT_OFFSET + 0x08) +#define INIT_HCA_LOG_MPT_SZ_OFFSET (INIT_HCA_TPT_OFFSET + 0x0b) +#define INIT_HCA_MTT_BASE_OFFSET (INIT_HCA_TPT_OFFSET + 0x10) +#define INIT_HCA_CMPT_BASE_OFFSET (INIT_HCA_TPT_OFFSET + 0x18) +#define INIT_HCA_UAR_OFFSET 0x120 +#define INIT_HCA_LOG_UAR_SZ_OFFSET (INIT_HCA_UAR_OFFSET + 0x0a) +#define INIT_HCA_UAR_PAGE_SZ_OFFSET (INIT_HCA_UAR_OFFSET + 0x0b) + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + inbox = mailbox->buf; + + *((u8 *) mailbox->buf + INIT_HCA_VERSION_OFFSET) = INIT_HCA_VERSION; + + *((u8 *) mailbox->buf + INIT_HCA_CACHELINE_SZ_OFFSET) = + ((ilog2(cache_line_size()) - 4) << 5) | (1 << 4); + +#if defined(__LITTLE_ENDIAN) + *(inbox + INIT_HCA_FLAGS_OFFSET / 4) &= ~cpu_to_be32(1 << 1); +#elif defined(__BIG_ENDIAN) + *(inbox + INIT_HCA_FLAGS_OFFSET / 4) |= cpu_to_be32(1 << 1); +#else +#error Host endianness not defined +#endif + /* Check port for UD address vector: */ + *(inbox + INIT_HCA_FLAGS_OFFSET / 4) |= cpu_to_be32(1); + + /* Enable IPoIB checksumming if we can: */ + if (dev->caps.flags & MLX4_DEV_CAP_FLAG_IPOIB_CSUM) + *(inbox + INIT_HCA_FLAGS_OFFSET / 4) |= cpu_to_be32(1 << 3); + + /* Enable QoS support if module parameter set */ + if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ETS_CFG && enable_qos) + *(inbox + INIT_HCA_FLAGS_OFFSET / 4) |= cpu_to_be32(1 << 2); + + /* enable counters */ + if (dev->caps.flags & MLX4_DEV_CAP_FLAG_COUNTERS) + *(inbox + INIT_HCA_FLAGS_OFFSET / 4) |= cpu_to_be32(1 << 4); + + /* Enable RSS spread to fragmented IP packets when supported */ + if (dev->caps.flags & MLX4_DEV_CAP_FLAG_RSS_IP_FRAG) + *(inbox + INIT_HCA_FLAGS_OFFSET / 4) |= cpu_to_be32(1 << 13); + + /* CX3 is capable of extending CQEs/EQEs from 32 to 64 bytes */ + if (dev->caps.flags & MLX4_DEV_CAP_FLAG_64B_EQE) { + *(inbox + INIT_HCA_EQE_CQE_OFFSETS / 4) |= cpu_to_be32(1 << 29); + dev->caps.eqe_size = 64; + dev->caps.eqe_factor = 1; + } else { + dev->caps.eqe_size = 32; + dev->caps.eqe_factor = 0; + } + + if (dev->caps.flags & MLX4_DEV_CAP_FLAG_64B_CQE) { + *(inbox + INIT_HCA_EQE_CQE_OFFSETS / 4) |= cpu_to_be32(1 << 30); + dev->caps.cqe_size = 64; + dev->caps.userspace_caps |= MLX4_USER_DEV_CAP_LARGE_CQE; + } else { + dev->caps.cqe_size = 32; + } + + /* CX3 is capable of extending CQEs\EQEs to strides larger than 64B */ + if ((dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_EQE_STRIDE) && + (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_CQE_STRIDE)) { + dev->caps.eqe_size = cache_line_size(); + dev->caps.cqe_size = cache_line_size(); + dev->caps.eqe_factor = 0; + MLX4_PUT(inbox, (u8)((ilog2(dev->caps.eqe_size) - 5) << 4 | + (ilog2(dev->caps.eqe_size) - 5)), + INIT_HCA_EQE_CQE_STRIDE_OFFSET); + + /* User still need to know to support CQE > 32B */ + dev->caps.userspace_caps |= MLX4_USER_DEV_CAP_LARGE_CQE; + } + + if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_RECOVERABLE_ERROR_EVENT) + *(inbox + INIT_HCA_RECOVERABLE_ERROR_EVENT_OFFSET / 4) |= cpu_to_be32(1 << 31); + + if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_DRIVER_VERSION_TO_FW) { + u8 *dst = (u8 *)(inbox + INIT_HCA_DRIVER_VERSION_OFFSET / 4); + + strncpy(dst, DRV_NAME_FOR_FW, INIT_HCA_DRIVER_VERSION_SZ - 1); + mlx4_dbg(dev, "Reporting Driver Version to FW: %s\n", dst); + } + + /* QPC/EEC/CQC/EQC/RDMARC attributes */ + + MLX4_PUT(inbox, param->qpc_base, INIT_HCA_QPC_BASE_OFFSET); + MLX4_PUT(inbox, param->log_num_qps, INIT_HCA_LOG_QP_OFFSET); + MLX4_PUT(inbox, param->srqc_base, INIT_HCA_SRQC_BASE_OFFSET); + MLX4_PUT(inbox, param->log_num_srqs, INIT_HCA_LOG_SRQ_OFFSET); + MLX4_PUT(inbox, param->cqc_base, INIT_HCA_CQC_BASE_OFFSET); + MLX4_PUT(inbox, param->log_num_cqs, INIT_HCA_LOG_CQ_OFFSET); + MLX4_PUT(inbox, param->altc_base, INIT_HCA_ALTC_BASE_OFFSET); + MLX4_PUT(inbox, param->auxc_base, INIT_HCA_AUXC_BASE_OFFSET); + MLX4_PUT(inbox, param->eqc_base, INIT_HCA_EQC_BASE_OFFSET); + MLX4_PUT(inbox, param->log_num_eqs, INIT_HCA_LOG_EQ_OFFSET); + MLX4_PUT(inbox, param->num_sys_eqs, INIT_HCA_NUM_SYS_EQS_OFFSET); + MLX4_PUT(inbox, param->rdmarc_base, INIT_HCA_RDMARC_BASE_OFFSET); + MLX4_PUT(inbox, param->log_rd_per_qp, INIT_HCA_LOG_RD_OFFSET); + + /* steering attributes */ + if (dev->caps.steering_mode == + MLX4_STEERING_MODE_DEVICE_MANAGED) { + *(inbox + INIT_HCA_FLAGS_OFFSET / 4) |= + cpu_to_be32(1 << + INIT_HCA_DEVICE_MANAGED_FLOW_STEERING_EN); + + MLX4_PUT(inbox, param->mc_base, INIT_HCA_FS_BASE_OFFSET); + MLX4_PUT(inbox, param->log_mc_entry_sz, + INIT_HCA_FS_LOG_ENTRY_SZ_OFFSET); + MLX4_PUT(inbox, param->log_mc_table_sz, + INIT_HCA_FS_LOG_TABLE_SZ_OFFSET); + /* Enable Ethernet flow steering + * with udp unicast and tcp unicast + */ + if (dev->caps.dmfs_high_steer_mode != + MLX4_STEERING_DMFS_A0_STATIC) + MLX4_PUT(inbox, + (u8)(MLX4_FS_UDP_UC_EN | MLX4_FS_TCP_UC_EN), + INIT_HCA_FS_ETH_BITS_OFFSET); + MLX4_PUT(inbox, (u16) MLX4_FS_NUM_OF_L2_ADDR, + INIT_HCA_FS_ETH_NUM_ADDRS_OFFSET); + /* Enable IPoIB flow steering + * with udp unicast and tcp unicast + */ + MLX4_PUT(inbox, (u8) (MLX4_FS_UDP_UC_EN | MLX4_FS_TCP_UC_EN), + INIT_HCA_FS_IB_BITS_OFFSET); + MLX4_PUT(inbox, (u16) MLX4_FS_NUM_OF_L2_ADDR, + INIT_HCA_FS_IB_NUM_ADDRS_OFFSET); + + if (dev->caps.dmfs_high_steer_mode != + MLX4_STEERING_DMFS_A0_NOT_SUPPORTED) + MLX4_PUT(inbox, + ((u8)(a0_dmfs_hw_steering[dev->caps.dmfs_high_steer_mode] + << 6)), + INIT_HCA_FS_A0_OFFSET); + } else { + MLX4_PUT(inbox, param->mc_base, INIT_HCA_MC_BASE_OFFSET); + MLX4_PUT(inbox, param->log_mc_entry_sz, + INIT_HCA_LOG_MC_ENTRY_SZ_OFFSET); + MLX4_PUT(inbox, param->log_mc_hash_sz, + INIT_HCA_LOG_MC_HASH_SZ_OFFSET); + MLX4_PUT(inbox, param->log_mc_table_sz, + INIT_HCA_LOG_MC_TABLE_SZ_OFFSET); + if (dev->caps.steering_mode == MLX4_STEERING_MODE_B0) + MLX4_PUT(inbox, (u8) (1 << 3), + INIT_HCA_UC_STEERING_OFFSET); + } + + /* TPT attributes */ + + MLX4_PUT(inbox, param->dmpt_base, INIT_HCA_DMPT_BASE_OFFSET); + MLX4_PUT(inbox, param->mw_enabled, INIT_HCA_TPT_MW_OFFSET); + MLX4_PUT(inbox, param->log_mpt_sz, INIT_HCA_LOG_MPT_SZ_OFFSET); + MLX4_PUT(inbox, param->mtt_base, INIT_HCA_MTT_BASE_OFFSET); + MLX4_PUT(inbox, param->cmpt_base, INIT_HCA_CMPT_BASE_OFFSET); + + /* UAR attributes */ + + MLX4_PUT(inbox, param->uar_page_sz, INIT_HCA_UAR_PAGE_SZ_OFFSET); + MLX4_PUT(inbox, param->log_uar_sz, INIT_HCA_LOG_UAR_SZ_OFFSET); + + /* set parser VXLAN attributes */ + if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_VXLAN_OFFLOADS) { + u8 parser_params = 0; + MLX4_PUT(inbox, parser_params, INIT_HCA_VXLAN_OFFSET); + } + + err = mlx4_cmd(dev, mailbox->dma, 0, 0, MLX4_CMD_INIT_HCA, + MLX4_CMD_TIME_CLASS_C, MLX4_CMD_NATIVE); + + if (err) + mlx4_err(dev, "INIT_HCA returns %d\n", err); + + mlx4_free_cmd_mailbox(dev, mailbox); + return err; +} + +int mlx4_QUERY_HCA(struct mlx4_dev *dev, + struct mlx4_init_hca_param *param) +{ + struct mlx4_cmd_mailbox *mailbox; + __be32 *outbox; + u64 qword_field; + u32 dword_field; + u16 word_field; + u8 byte_field; + int err; + static const u8 a0_dmfs_query_hw_steering[] = { + [0] = MLX4_STEERING_DMFS_A0_DEFAULT, + [1] = MLX4_STEERING_DMFS_A0_DYNAMIC, + [2] = MLX4_STEERING_DMFS_A0_STATIC, + [3] = MLX4_STEERING_DMFS_A0_DISABLE + }; + +#define QUERY_HCA_GLOBAL_CAPS_OFFSET 0x04 +#define QUERY_HCA_CORE_CLOCK_OFFSET 0x0c + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + outbox = mailbox->buf; + + err = mlx4_cmd_box(dev, 0, mailbox->dma, 0, 0, + MLX4_CMD_QUERY_HCA, + MLX4_CMD_TIME_CLASS_B, + !mlx4_is_slave(dev)); + if (err) + goto out; + + MLX4_GET(param->global_caps, outbox, QUERY_HCA_GLOBAL_CAPS_OFFSET); + MLX4_GET(param->hca_core_clock, outbox, QUERY_HCA_CORE_CLOCK_OFFSET); + + /* QPC/EEC/CQC/EQC/RDMARC attributes */ + + MLX4_GET(qword_field, outbox, INIT_HCA_QPC_BASE_OFFSET); + param->qpc_base = qword_field & ~((u64)0x1f); + MLX4_GET(byte_field, outbox, INIT_HCA_LOG_QP_OFFSET); + param->log_num_qps = byte_field & 0x1f; + MLX4_GET(qword_field, outbox, INIT_HCA_SRQC_BASE_OFFSET); + param->srqc_base = qword_field & ~((u64)0x1f); + MLX4_GET(byte_field, outbox, INIT_HCA_LOG_SRQ_OFFSET); + param->log_num_srqs = byte_field & 0x1f; + MLX4_GET(qword_field, outbox, INIT_HCA_CQC_BASE_OFFSET); + param->cqc_base = qword_field & ~((u64)0x1f); + MLX4_GET(byte_field, outbox, INIT_HCA_LOG_CQ_OFFSET); + param->log_num_cqs = byte_field & 0x1f; + MLX4_GET(qword_field, outbox, INIT_HCA_ALTC_BASE_OFFSET); + param->altc_base = qword_field; + MLX4_GET(qword_field, outbox, INIT_HCA_AUXC_BASE_OFFSET); + param->auxc_base = qword_field; + MLX4_GET(qword_field, outbox, INIT_HCA_EQC_BASE_OFFSET); + param->eqc_base = qword_field & ~((u64)0x1f); + MLX4_GET(byte_field, outbox, INIT_HCA_LOG_EQ_OFFSET); + param->log_num_eqs = byte_field & 0x1f; + MLX4_GET(word_field, outbox, INIT_HCA_NUM_SYS_EQS_OFFSET); + param->num_sys_eqs = word_field & 0xfff; + MLX4_GET(qword_field, outbox, INIT_HCA_RDMARC_BASE_OFFSET); + param->rdmarc_base = qword_field & ~((u64)0x1f); + MLX4_GET(byte_field, outbox, INIT_HCA_LOG_RD_OFFSET); + param->log_rd_per_qp = byte_field & 0x7; + + MLX4_GET(dword_field, outbox, INIT_HCA_FLAGS_OFFSET); + if (dword_field & (1 << INIT_HCA_DEVICE_MANAGED_FLOW_STEERING_EN)) { + param->steering_mode = MLX4_STEERING_MODE_DEVICE_MANAGED; + } else { + MLX4_GET(byte_field, outbox, INIT_HCA_UC_STEERING_OFFSET); + if (byte_field & 0x8) + param->steering_mode = MLX4_STEERING_MODE_B0; + else + param->steering_mode = MLX4_STEERING_MODE_A0; + } + + if (dword_field & (1 << 13)) + param->rss_ip_frags = 1; + + /* steering attributes */ + if (param->steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED) { + MLX4_GET(param->mc_base, outbox, INIT_HCA_FS_BASE_OFFSET); + MLX4_GET(byte_field, outbox, INIT_HCA_FS_LOG_ENTRY_SZ_OFFSET); + param->log_mc_entry_sz = byte_field & 0x1f; + MLX4_GET(byte_field, outbox, INIT_HCA_FS_LOG_TABLE_SZ_OFFSET); + param->log_mc_table_sz = byte_field & 0x1f; + MLX4_GET(byte_field, outbox, INIT_HCA_FS_A0_OFFSET); + param->dmfs_high_steer_mode = + a0_dmfs_query_hw_steering[(byte_field >> 6) & 3]; + } else { + MLX4_GET(param->mc_base, outbox, INIT_HCA_MC_BASE_OFFSET); + MLX4_GET(byte_field, outbox, INIT_HCA_LOG_MC_ENTRY_SZ_OFFSET); + param->log_mc_entry_sz = byte_field & 0x1f; + MLX4_GET(byte_field, outbox, INIT_HCA_LOG_MC_HASH_SZ_OFFSET); + param->log_mc_hash_sz = byte_field & 0x1f; + MLX4_GET(byte_field, outbox, INIT_HCA_LOG_MC_TABLE_SZ_OFFSET); + param->log_mc_table_sz = byte_field & 0x1f; + } + + /* CX3 is capable of extending CQEs/EQEs from 32 to 64 bytes */ + MLX4_GET(byte_field, outbox, INIT_HCA_EQE_CQE_OFFSETS); + if (byte_field & 0x20) /* 64-bytes eqe enabled */ + param->dev_cap_enabled |= MLX4_DEV_CAP_64B_EQE_ENABLED; + if (byte_field & 0x40) /* 64-bytes cqe enabled */ + param->dev_cap_enabled |= MLX4_DEV_CAP_64B_CQE_ENABLED; + + /* CX3 is capable of extending CQEs\EQEs to strides larger than 64B */ + MLX4_GET(byte_field, outbox, INIT_HCA_EQE_CQE_STRIDE_OFFSET); + if (byte_field) { + param->dev_cap_enabled |= MLX4_DEV_CAP_EQE_STRIDE_ENABLED; + param->dev_cap_enabled |= MLX4_DEV_CAP_CQE_STRIDE_ENABLED; + param->cqe_size = 1 << ((byte_field & + MLX4_CQE_SIZE_MASK_STRIDE) + 5); + param->eqe_size = 1 << (((byte_field & + MLX4_EQE_SIZE_MASK_STRIDE) >> 4) + 5); + } + + /* TPT attributes */ + + MLX4_GET(param->dmpt_base, outbox, INIT_HCA_DMPT_BASE_OFFSET); + MLX4_GET(byte_field, outbox, INIT_HCA_TPT_MW_OFFSET); + param->mw_enabled = byte_field >> 7; + MLX4_GET(byte_field, outbox, INIT_HCA_LOG_MPT_SZ_OFFSET); + param->log_mpt_sz = byte_field & 0x3f; + MLX4_GET(param->mtt_base, outbox, INIT_HCA_MTT_BASE_OFFSET); + MLX4_GET(param->cmpt_base, outbox, INIT_HCA_CMPT_BASE_OFFSET); + + /* UAR attributes */ + + MLX4_GET(param->uar_page_sz, outbox, INIT_HCA_UAR_PAGE_SZ_OFFSET); + MLX4_GET(byte_field, outbox, INIT_HCA_LOG_UAR_SZ_OFFSET); + param->log_uar_sz = byte_field & 0xf; + + /* phv_check enable */ + MLX4_GET(byte_field, outbox, INIT_HCA_CACHELINE_SZ_OFFSET); + if (byte_field & 0x2) + param->phv_check_en = 1; +out: + mlx4_free_cmd_mailbox(dev, mailbox); + + return err; +} + +static int mlx4_hca_core_clock_update(struct mlx4_dev *dev) +{ + struct mlx4_cmd_mailbox *mailbox; + __be32 *outbox; + int err; + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) { + mlx4_warn(dev, "hca_core_clock mailbox allocation failed\n"); + return PTR_ERR(mailbox); + } + outbox = mailbox->buf; + + err = mlx4_cmd_box(dev, 0, mailbox->dma, 0, 0, + MLX4_CMD_QUERY_HCA, + MLX4_CMD_TIME_CLASS_B, + !mlx4_is_slave(dev)); + if (err) { + mlx4_warn(dev, "hca_core_clock update failed\n"); + goto out; + } + + MLX4_GET(dev->caps.hca_core_clock, outbox, QUERY_HCA_CORE_CLOCK_OFFSET); + +out: + mlx4_free_cmd_mailbox(dev, mailbox); + + return err; +} + +/* for IB-type ports only in SRIOV mode. Checks that both proxy QP0 + * and real QP0 are active, so that the paravirtualized QP0 is ready + * to operate */ +static int check_qp0_state(struct mlx4_dev *dev, int function, int port) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + /* irrelevant if not infiniband */ + if (priv->mfunc.master.qp0_state[port].proxy_qp0_active && + priv->mfunc.master.qp0_state[port].qp0_active) + return 1; + return 0; +} + +int mlx4_INIT_PORT_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + int port = mlx4_slave_convert_port(dev, slave, vhcr->in_modifier); + int err; + + if (port < 0) + return -EINVAL; + + if (priv->mfunc.master.slave_state[slave].init_port_mask & (1 << port)) + return 0; + + if (dev->caps.port_mask[port] != MLX4_PORT_TYPE_IB) { + /* Enable port only if it was previously disabled */ + if (!priv->mfunc.master.init_port_ref[port]) { + err = mlx4_cmd(dev, 0, port, 0, MLX4_CMD_INIT_PORT, + MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); + if (err) + return err; + } + priv->mfunc.master.slave_state[slave].init_port_mask |= (1 << port); + } else { + if (slave == mlx4_master_func_num(dev)) { + if (check_qp0_state(dev, slave, port) && + !priv->mfunc.master.qp0_state[port].port_active) { + err = mlx4_cmd(dev, 0, port, 0, MLX4_CMD_INIT_PORT, + MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); + if (err) + return err; + priv->mfunc.master.qp0_state[port].port_active = 1; + priv->mfunc.master.slave_state[slave].init_port_mask |= (1 << port); + } + } else + priv->mfunc.master.slave_state[slave].init_port_mask |= (1 << port); + } + ++priv->mfunc.master.init_port_ref[port]; + return 0; +} + +int mlx4_INIT_PORT(struct mlx4_dev *dev, int port) +{ + struct mlx4_cmd_mailbox *mailbox; + u32 *inbox; + int err; + u32 flags; + u16 field; + + if (dev->flags & MLX4_FLAG_OLD_PORT_CMDS) { +#define INIT_PORT_IN_SIZE 256 +#define INIT_PORT_FLAGS_OFFSET 0x00 +#define INIT_PORT_FLAG_SIG (1 << 18) +#define INIT_PORT_FLAG_NG (1 << 17) +#define INIT_PORT_FLAG_G0 (1 << 16) +#define INIT_PORT_VL_SHIFT 4 +#define INIT_PORT_PORT_WIDTH_SHIFT 8 +#define INIT_PORT_MTU_OFFSET 0x04 +#define INIT_PORT_MAX_GID_OFFSET 0x06 +#define INIT_PORT_MAX_PKEY_OFFSET 0x0a +#define INIT_PORT_GUID0_OFFSET 0x10 +#define INIT_PORT_NODE_GUID_OFFSET 0x18 +#define INIT_PORT_SI_GUID_OFFSET 0x20 + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + inbox = mailbox->buf; + + flags = 0; + flags |= (dev->caps.vl_cap[port] & 0xf) << INIT_PORT_VL_SHIFT; + flags |= (dev->caps.port_width_cap[port] & 0xf) << INIT_PORT_PORT_WIDTH_SHIFT; + MLX4_PUT(inbox, flags, INIT_PORT_FLAGS_OFFSET); + + field = 128 << dev->caps.ib_mtu_cap[port]; + MLX4_PUT(inbox, field, INIT_PORT_MTU_OFFSET); + field = dev->caps.gid_table_len[port]; + MLX4_PUT(inbox, field, INIT_PORT_MAX_GID_OFFSET); + field = dev->caps.pkey_table_len[port]; + MLX4_PUT(inbox, field, INIT_PORT_MAX_PKEY_OFFSET); + + err = mlx4_cmd(dev, mailbox->dma, port, 0, MLX4_CMD_INIT_PORT, + MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); + + mlx4_free_cmd_mailbox(dev, mailbox); + } else + err = mlx4_cmd(dev, 0, port, 0, MLX4_CMD_INIT_PORT, + MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); + + if (!err) + mlx4_hca_core_clock_update(dev); + + return err; +} +EXPORT_SYMBOL_GPL(mlx4_INIT_PORT); + +int mlx4_CLOSE_PORT_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + int port = mlx4_slave_convert_port(dev, slave, vhcr->in_modifier); + int err; + + if (port < 0) + return -EINVAL; + + if (!(priv->mfunc.master.slave_state[slave].init_port_mask & + (1 << port))) + return 0; + + if (dev->caps.port_mask[port] != MLX4_PORT_TYPE_IB) { + if (priv->mfunc.master.init_port_ref[port] == 1) { + err = mlx4_cmd(dev, 0, port, 0, MLX4_CMD_CLOSE_PORT, + MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); + if (err) + return err; + } + priv->mfunc.master.slave_state[slave].init_port_mask &= ~(1 << port); + } else { + /* infiniband port */ + if (slave == mlx4_master_func_num(dev)) { + if (!priv->mfunc.master.qp0_state[port].qp0_active && + priv->mfunc.master.qp0_state[port].port_active) { + err = mlx4_cmd(dev, 0, port, 0, MLX4_CMD_CLOSE_PORT, + MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); + if (err) + return err; + priv->mfunc.master.slave_state[slave].init_port_mask &= ~(1 << port); + priv->mfunc.master.qp0_state[port].port_active = 0; + } + } else + priv->mfunc.master.slave_state[slave].init_port_mask &= ~(1 << port); + } + --priv->mfunc.master.init_port_ref[port]; + return 0; +} + +int mlx4_CLOSE_PORT(struct mlx4_dev *dev, int port) +{ + return mlx4_cmd(dev, 0, port, 0, MLX4_CMD_CLOSE_PORT, + MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); +} +EXPORT_SYMBOL_GPL(mlx4_CLOSE_PORT); + +int mlx4_CLOSE_HCA(struct mlx4_dev *dev, int panic) +{ + return mlx4_cmd(dev, 0, 0, panic, MLX4_CMD_CLOSE_HCA, + MLX4_CMD_TIME_CLASS_C, MLX4_CMD_NATIVE); +} + +struct mlx4_config_dev { + __be32 update_flags; + __be32 rsvd1[3]; + __be16 vxlan_udp_dport; + __be16 rsvd2; + __be16 roce_v2_entropy; + __be16 roce_v2_udp_dport; + __be32 roce_flags; + __be32 rsvd4[25]; + __be16 rsvd5; + u8 rsvd6; + u8 rx_checksum_val; +}; + +#define MLX4_VXLAN_UDP_DPORT (1 << 0) +#define MLX4_ROCE_V2_UDP_DPORT BIT(3) +#define MLX4_DISABLE_RX_PORT BIT(18) + +static int mlx4_CONFIG_DEV_set(struct mlx4_dev *dev, struct mlx4_config_dev *config_dev) +{ + int err; + struct mlx4_cmd_mailbox *mailbox; + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + + memcpy(mailbox->buf, config_dev, sizeof(*config_dev)); + + err = mlx4_cmd(dev, mailbox->dma, 0, 0, MLX4_CMD_CONFIG_DEV, + MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE); + + mlx4_free_cmd_mailbox(dev, mailbox); + return err; +} + +static int mlx4_CONFIG_DEV_get(struct mlx4_dev *dev, struct mlx4_config_dev *config_dev) +{ + int err; + struct mlx4_cmd_mailbox *mailbox; + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + + err = mlx4_cmd_box(dev, 0, mailbox->dma, 0, 1, MLX4_CMD_CONFIG_DEV, + MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); + + if (!err) + memcpy(config_dev, mailbox->buf, sizeof(*config_dev)); + + mlx4_free_cmd_mailbox(dev, mailbox); + return err; +} + +/* Conversion between the HW values and the actual functionality. + * The value represented by the array index, + * and the functionality determined by the flags. + */ +static const u8 config_dev_csum_flags[] = { + [0] = 0, + [1] = MLX4_RX_CSUM_MODE_VAL_NON_TCP_UDP, + [2] = MLX4_RX_CSUM_MODE_VAL_NON_TCP_UDP | + MLX4_RX_CSUM_MODE_L4, + [3] = MLX4_RX_CSUM_MODE_L4 | + MLX4_RX_CSUM_MODE_IP_OK_IP_NON_TCP_UDP | + MLX4_RX_CSUM_MODE_MULTI_VLAN +}; + +int mlx4_config_dev_retrieval(struct mlx4_dev *dev, + struct mlx4_config_dev_params *params) +{ + struct mlx4_config_dev config_dev = {0}; + int err; + u8 csum_mask; + +#define CONFIG_DEV_RX_CSUM_MODE_MASK 0x7 +#define CONFIG_DEV_RX_CSUM_MODE_PORT1_BIT_OFFSET 0 +#define CONFIG_DEV_RX_CSUM_MODE_PORT2_BIT_OFFSET 4 + + if (!(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_CONFIG_DEV)) + return -EOPNOTSUPP; + + err = mlx4_CONFIG_DEV_get(dev, &config_dev); + if (err) + return err; + + csum_mask = (config_dev.rx_checksum_val >> CONFIG_DEV_RX_CSUM_MODE_PORT1_BIT_OFFSET) & + CONFIG_DEV_RX_CSUM_MODE_MASK; + + if (csum_mask >= ARRAY_SIZE(config_dev_csum_flags)) + return -EINVAL; + params->rx_csum_flags_port_1 = config_dev_csum_flags[csum_mask]; + + csum_mask = (config_dev.rx_checksum_val >> CONFIG_DEV_RX_CSUM_MODE_PORT2_BIT_OFFSET) & + CONFIG_DEV_RX_CSUM_MODE_MASK; + + if (csum_mask >= ARRAY_SIZE(config_dev_csum_flags)) + return -EINVAL; + params->rx_csum_flags_port_2 = config_dev_csum_flags[csum_mask]; + + params->vxlan_udp_dport = be16_to_cpu(config_dev.vxlan_udp_dport); + + return 0; +} +EXPORT_SYMBOL_GPL(mlx4_config_dev_retrieval); + +int mlx4_config_vxlan_port(struct mlx4_dev *dev, __be16 udp_port) +{ + struct mlx4_config_dev config_dev; + + memset(&config_dev, 0, sizeof(config_dev)); + config_dev.update_flags = cpu_to_be32(MLX4_VXLAN_UDP_DPORT); + config_dev.vxlan_udp_dport = udp_port; + + return mlx4_CONFIG_DEV_set(dev, &config_dev); +} +EXPORT_SYMBOL_GPL(mlx4_config_vxlan_port); + +#define CONFIG_DISABLE_RX_PORT BIT(15) +int mlx4_disable_rx_port_check(struct mlx4_dev *dev, bool dis) +{ + struct mlx4_config_dev config_dev; + + memset(&config_dev, 0, sizeof(config_dev)); + config_dev.update_flags = cpu_to_be32(MLX4_DISABLE_RX_PORT); + if (dis) + config_dev.roce_flags = + cpu_to_be32(CONFIG_DISABLE_RX_PORT); + + return mlx4_CONFIG_DEV_set(dev, &config_dev); +} + +int mlx4_config_roce_v2_port(struct mlx4_dev *dev, u16 udp_port) +{ + struct mlx4_config_dev config_dev; + + memset(&config_dev, 0, sizeof(config_dev)); + config_dev.update_flags = cpu_to_be32(MLX4_ROCE_V2_UDP_DPORT); + config_dev.roce_v2_udp_dport = cpu_to_be16(udp_port); + + return mlx4_CONFIG_DEV_set(dev, &config_dev); +} +EXPORT_SYMBOL_GPL(mlx4_config_roce_v2_port); + +int mlx4_virt2phy_port_map(struct mlx4_dev *dev, u32 port1, u32 port2) +{ + struct mlx4_cmd_mailbox *mailbox; + struct { + __be32 v_port1; + __be32 v_port2; + } *v2p; + int err; + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) + return -ENOMEM; + + v2p = mailbox->buf; + v2p->v_port1 = cpu_to_be32(port1); + v2p->v_port2 = cpu_to_be32(port2); + + err = mlx4_cmd(dev, mailbox->dma, 0, + MLX4_SET_PORT_VIRT2PHY, MLX4_CMD_VIRT_PORT_MAP, + MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE); + + mlx4_free_cmd_mailbox(dev, mailbox); + return err; +} + + +int mlx4_SET_ICM_SIZE(struct mlx4_dev *dev, u64 icm_size, u64 *aux_pages) +{ + int ret = mlx4_cmd_imm(dev, icm_size, aux_pages, 0, 0, + MLX4_CMD_SET_ICM_SIZE, + MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); + if (ret) + return ret; + + /* + * Round up number of system pages needed in case + * MLX4_ICM_PAGE_SIZE < PAGE_SIZE. + */ + *aux_pages = ALIGN(*aux_pages, PAGE_SIZE / MLX4_ICM_PAGE_SIZE) >> + (PAGE_SHIFT - MLX4_ICM_PAGE_SHIFT); + + return 0; +} + +int mlx4_NOP(struct mlx4_dev *dev) +{ + /* Input modifier of 0x1f means "finish as soon as possible." */ + return mlx4_cmd(dev, 0, 0x1f, 0, MLX4_CMD_NOP, MLX4_CMD_TIME_CLASS_A, + MLX4_CMD_NATIVE); +} + +int mlx4_query_diag_counters(struct mlx4_dev *dev, u8 op_modifier, + const u32 offset[], + u32 value[], size_t array_len, u8 port) +{ + struct mlx4_cmd_mailbox *mailbox; + u32 *outbox; + size_t i; + int ret; + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + + outbox = mailbox->buf; + + ret = mlx4_cmd_box(dev, 0, mailbox->dma, port, op_modifier, + MLX4_CMD_DIAG_RPRT, MLX4_CMD_TIME_CLASS_A, + MLX4_CMD_NATIVE); + if (ret) + goto out; + + for (i = 0; i < array_len; i++) { + if (offset[i] > MLX4_MAILBOX_SIZE) { + ret = -EINVAL; + goto out; + } + + MLX4_GET(value[i], outbox, offset[i]); + } + +out: + mlx4_free_cmd_mailbox(dev, mailbox); + return ret; +} +EXPORT_SYMBOL(mlx4_query_diag_counters); + +int mlx4_get_phys_port_id(struct mlx4_dev *dev) +{ + u8 port; + u32 *outbox; + struct mlx4_cmd_mailbox *mailbox; + u32 in_mod; + u32 guid_hi, guid_lo; + int err, ret = 0; +#define MOD_STAT_CFG_PORT_OFFSET 8 +#define MOD_STAT_CFG_GUID_H 0X14 +#define MOD_STAT_CFG_GUID_L 0X1c + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + outbox = mailbox->buf; + + for (port = 1; port <= dev->caps.num_ports; port++) { + in_mod = port << MOD_STAT_CFG_PORT_OFFSET; + err = mlx4_cmd_box(dev, 0, mailbox->dma, in_mod, 0x2, + MLX4_CMD_MOD_STAT_CFG, MLX4_CMD_TIME_CLASS_A, + MLX4_CMD_NATIVE); + if (err) { + mlx4_err(dev, "Fail to get port %d uplink guid\n", + port); + ret = err; + } else { + MLX4_GET(guid_hi, outbox, MOD_STAT_CFG_GUID_H); + MLX4_GET(guid_lo, outbox, MOD_STAT_CFG_GUID_L); + dev->caps.phys_port_id[port] = (u64)guid_lo | + (u64)guid_hi << 32; + } + } + mlx4_free_cmd_mailbox(dev, mailbox); + return ret; +} + +#define MLX4_WOL_SETUP_MODE (5 << 28) +int mlx4_wol_read(struct mlx4_dev *dev, u64 *config, int port) +{ + u32 in_mod = MLX4_WOL_SETUP_MODE | port << 8; + + return mlx4_cmd_imm(dev, 0, config, in_mod, 0x3, + MLX4_CMD_MOD_STAT_CFG, MLX4_CMD_TIME_CLASS_A, + MLX4_CMD_NATIVE); +} +EXPORT_SYMBOL_GPL(mlx4_wol_read); + +int mlx4_wol_write(struct mlx4_dev *dev, u64 config, int port) +{ + u32 in_mod = MLX4_WOL_SETUP_MODE | port << 8; + + return mlx4_cmd(dev, config, in_mod, 0x1, MLX4_CMD_MOD_STAT_CFG, + MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); +} +EXPORT_SYMBOL_GPL(mlx4_wol_write); + +enum { + ADD_TO_MCG = 0x26, +}; + + +void mlx4_opreq_action(struct work_struct *work) +{ + struct mlx4_priv *priv = container_of(work, struct mlx4_priv, + opreq_task); + struct mlx4_dev *dev = &priv->dev; + int num_tasks = atomic_read(&priv->opreq_count); + struct mlx4_cmd_mailbox *mailbox; + struct mlx4_mgm *mgm; + u32 *outbox; + u32 modifier; + u16 token; + u16 type; + int err; + u32 num_qps; + struct mlx4_qp qp; + int i; + u8 rem_mcg; + u8 prot; + +#define GET_OP_REQ_MODIFIER_OFFSET 0x08 +#define GET_OP_REQ_TOKEN_OFFSET 0x14 +#define GET_OP_REQ_TYPE_OFFSET 0x1a +#define GET_OP_REQ_DATA_OFFSET 0x20 + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) { + mlx4_err(dev, "Failed to allocate mailbox for GET_OP_REQ\n"); + return; + } + outbox = mailbox->buf; + + while (num_tasks) { + err = mlx4_cmd_box(dev, 0, mailbox->dma, 0, 0, + MLX4_CMD_GET_OP_REQ, MLX4_CMD_TIME_CLASS_A, + MLX4_CMD_NATIVE); + if (err) { + mlx4_err(dev, "Failed to retrieve required operation: %d\n", + err); + goto out; + } + MLX4_GET(modifier, outbox, GET_OP_REQ_MODIFIER_OFFSET); + MLX4_GET(token, outbox, GET_OP_REQ_TOKEN_OFFSET); + MLX4_GET(type, outbox, GET_OP_REQ_TYPE_OFFSET); + type &= 0xfff; + + switch (type) { + case ADD_TO_MCG: + if (dev->caps.steering_mode == + MLX4_STEERING_MODE_DEVICE_MANAGED) { + mlx4_warn(dev, "ADD MCG operation is not supported in DEVICE_MANAGED steering mode\n"); + err = EPERM; + break; + } + mgm = (struct mlx4_mgm *)((u8 *)(outbox) + + GET_OP_REQ_DATA_OFFSET); + num_qps = be32_to_cpu(mgm->members_count) & + MGM_QPN_MASK; + rem_mcg = ((u8 *)(&mgm->members_count))[0] & 1; + prot = ((u8 *)(&mgm->members_count))[0] >> 6; + + for (i = 0; i < num_qps; i++) { + qp.qpn = be32_to_cpu(mgm->qp[i]); + if (rem_mcg) + err = mlx4_multicast_detach(dev, &qp, + mgm->gid, + prot, 0); + else + err = mlx4_multicast_attach(dev, &qp, + mgm->gid, + mgm->gid[5] + , 0, prot, + NULL); + if (err) + break; + } + break; + default: + mlx4_warn(dev, "Bad type for required operation\n"); + err = EINVAL; + break; + } + err = mlx4_cmd(dev, 0, ((u32) err | + (__force u32)cpu_to_be32(token) << 16), + 1, MLX4_CMD_GET_OP_REQ, MLX4_CMD_TIME_CLASS_A, + MLX4_CMD_NATIVE); + if (err) { + mlx4_err(dev, "Failed to acknowledge required request: %d\n", + err); + goto out; + } + memset(outbox, 0, 0xffc); + num_tasks = atomic_dec_return(&priv->opreq_count); + } + +out: + mlx4_free_cmd_mailbox(dev, mailbox); +} + +static int mlx4_check_smp_firewall_active(struct mlx4_dev *dev, + struct mlx4_cmd_mailbox *mailbox) +{ +#define MLX4_CMD_MAD_DEMUX_SET_ATTR_OFFSET 0x10 +#define MLX4_CMD_MAD_DEMUX_GETRESP_ATTR_OFFSET 0x20 +#define MLX4_CMD_MAD_DEMUX_TRAP_ATTR_OFFSET 0x40 +#define MLX4_CMD_MAD_DEMUX_TRAP_REPRESS_ATTR_OFFSET 0x70 + + u32 set_attr_mask, getresp_attr_mask; + u32 trap_attr_mask, traprepress_attr_mask; + + MLX4_GET(set_attr_mask, mailbox->buf, + MLX4_CMD_MAD_DEMUX_SET_ATTR_OFFSET); + mlx4_dbg(dev, "SMP firewall set_attribute_mask = 0x%x\n", + set_attr_mask); + + MLX4_GET(getresp_attr_mask, mailbox->buf, + MLX4_CMD_MAD_DEMUX_GETRESP_ATTR_OFFSET); + mlx4_dbg(dev, "SMP firewall getresp_attribute_mask = 0x%x\n", + getresp_attr_mask); + + MLX4_GET(trap_attr_mask, mailbox->buf, + MLX4_CMD_MAD_DEMUX_TRAP_ATTR_OFFSET); + mlx4_dbg(dev, "SMP firewall trap_attribute_mask = 0x%x\n", + trap_attr_mask); + + MLX4_GET(traprepress_attr_mask, mailbox->buf, + MLX4_CMD_MAD_DEMUX_TRAP_REPRESS_ATTR_OFFSET); + mlx4_dbg(dev, "SMP firewall traprepress_attribute_mask = 0x%x\n", + traprepress_attr_mask); + + if (set_attr_mask && getresp_attr_mask && trap_attr_mask && + traprepress_attr_mask) + return 1; + + return 0; +} + +int mlx4_config_mad_demux(struct mlx4_dev *dev) +{ + struct mlx4_cmd_mailbox *mailbox; + int err; + + /* Check if mad_demux is supported */ + if (!(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_MAD_DEMUX)) + return 0; + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) { + mlx4_warn(dev, "Failed to allocate mailbox for cmd MAD_DEMUX"); + return -ENOMEM; + } + + /* Query mad_demux to find out which MADs are handled by internal sma */ + err = mlx4_cmd_box(dev, 0, mailbox->dma, 0x01 /* subn mgmt class */, + MLX4_CMD_MAD_DEMUX_QUERY_RESTR, MLX4_CMD_MAD_DEMUX, + MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE); + if (err) { + mlx4_warn(dev, "MLX4_CMD_MAD_DEMUX: query restrictions failed (%d)\n", + err); + goto out; + } + + if (mlx4_check_smp_firewall_active(dev, mailbox)) + dev->flags |= MLX4_FLAG_SECURE_HOST; + + /* Config mad_demux to handle all MADs returned by the query above */ + err = mlx4_cmd(dev, mailbox->dma, 0x01 /* subn mgmt class */, + MLX4_CMD_MAD_DEMUX_CONFIG, MLX4_CMD_MAD_DEMUX, + MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE); + if (err) { + mlx4_warn(dev, "MLX4_CMD_MAD_DEMUX: configure failed (%d)\n", err); + goto out; + } + + if (dev->flags & MLX4_FLAG_SECURE_HOST) + mlx4_warn(dev, "HCA operating in secure-host mode. SMP firewall activated.\n"); +out: + mlx4_free_cmd_mailbox(dev, mailbox); + return err; +} + +/* Access Reg commands */ +enum mlx4_access_reg_masks { + MLX4_ACCESS_REG_STATUS_MASK = 0x7f, + MLX4_ACCESS_REG_METHOD_MASK = 0x7f, + MLX4_ACCESS_REG_LEN_MASK = 0x7ff +}; + +struct mlx4_access_reg { + __be16 constant1; + u8 status; + u8 resrvd1; + __be16 reg_id; + u8 method; + u8 constant2; + __be32 resrvd2[2]; + __be16 len_const; + __be16 resrvd3; +#define MLX4_ACCESS_REG_HEADER_SIZE (20) + u8 reg_data[MLX4_MAILBOX_SIZE-MLX4_ACCESS_REG_HEADER_SIZE]; +} __attribute__((__packed__)); + +/** + * mlx4_ACCESS_REG - Generic access reg command. + * @dev: mlx4_dev. + * @reg_id: register ID to access. + * @method: Access method Read/Write. + * @reg_len: register length to Read/Write in bytes. + * @reg_data: reg_data pointer to Read/Write From/To. + * + * Access ConnectX registers FW command. + * Returns 0 on success and copies outbox mlx4_access_reg data + * field into reg_data or a negative error code. + */ +static int mlx4_ACCESS_REG(struct mlx4_dev *dev, u16 reg_id, + enum mlx4_access_reg_method method, + u16 reg_len, void *reg_data) +{ + struct mlx4_cmd_mailbox *inbox, *outbox; + struct mlx4_access_reg *inbuf, *outbuf; + int err; + + inbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(inbox)) + return PTR_ERR(inbox); + + outbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(outbox)) { + mlx4_free_cmd_mailbox(dev, inbox); + return PTR_ERR(outbox); + } + + inbuf = inbox->buf; + outbuf = outbox->buf; + + inbuf->constant1 = cpu_to_be16(0x1<<11 | 0x4); + inbuf->constant2 = 0x1; + inbuf->reg_id = cpu_to_be16(reg_id); + inbuf->method = method & MLX4_ACCESS_REG_METHOD_MASK; + + reg_len = min(reg_len, (u16)(sizeof(inbuf->reg_data))); + inbuf->len_const = + cpu_to_be16(((reg_len/4 + 1) & MLX4_ACCESS_REG_LEN_MASK) | + ((0x3) << 12)); + + memcpy(inbuf->reg_data, reg_data, reg_len); + err = mlx4_cmd_box(dev, inbox->dma, outbox->dma, 0, 0, + MLX4_CMD_ACCESS_REG, MLX4_CMD_TIME_CLASS_C, + MLX4_CMD_WRAPPED); + if (err) + goto out; + + if (outbuf->status & MLX4_ACCESS_REG_STATUS_MASK) { + err = outbuf->status & MLX4_ACCESS_REG_STATUS_MASK; + mlx4_err(dev, + "MLX4_CMD_ACCESS_REG(%x) returned REG status (%x)\n", + reg_id, err); + goto out; + } + + memcpy(reg_data, outbuf->reg_data, reg_len); +out: + mlx4_free_cmd_mailbox(dev, inbox); + mlx4_free_cmd_mailbox(dev, outbox); + return err; +} + +/* ConnectX registers IDs */ +enum mlx4_reg_id { + MLX4_REG_ID_PTYS = 0x5004, +}; + +/** + * mlx4_ACCESS_PTYS_REG - Access PTYs (Port Type and Speed) + * register + * @dev: mlx4_dev. + * @method: Access method Read/Write. + * @ptys_reg: PTYS register data pointer. + * + * Access ConnectX PTYS register, to Read/Write Port Type/Speed + * configuration + * Returns 0 on success or a negative error code. + */ +int mlx4_ACCESS_PTYS_REG(struct mlx4_dev *dev, + enum mlx4_access_reg_method method, + struct mlx4_ptys_reg *ptys_reg) +{ + return mlx4_ACCESS_REG(dev, MLX4_REG_ID_PTYS, + method, sizeof(*ptys_reg), ptys_reg); +} +EXPORT_SYMBOL_GPL(mlx4_ACCESS_PTYS_REG); + +int mlx4_ACCESS_REG_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd) +{ + struct mlx4_access_reg *inbuf = inbox->buf; + u8 method = inbuf->method & MLX4_ACCESS_REG_METHOD_MASK; + u16 reg_id = be16_to_cpu(inbuf->reg_id); + + if (slave != mlx4_master_func_num(dev) && + method == MLX4_ACCESS_REG_WRITE) + return -EPERM; + + if (reg_id == MLX4_REG_ID_PTYS) { + struct mlx4_ptys_reg *ptys_reg = + (struct mlx4_ptys_reg *)inbuf->reg_data; + + ptys_reg->local_port = + mlx4_slave_convert_port(dev, slave, + ptys_reg->local_port); + } + + return mlx4_cmd_box(dev, inbox->dma, outbox->dma, vhcr->in_modifier, + 0, MLX4_CMD_ACCESS_REG, MLX4_CMD_TIME_CLASS_C, + MLX4_CMD_NATIVE); +} + +static int mlx4_SET_PORT_phv_bit(struct mlx4_dev *dev, u8 port, u8 phv_bit) +{ +#define SET_PORT_GEN_PHV_VALID 0x10 +#define SET_PORT_GEN_PHV_EN 0x80 + + struct mlx4_cmd_mailbox *mailbox; + struct mlx4_set_port_general_context *context; + u32 in_mod; + int err; + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + context = mailbox->buf; + + context->flags2 |= SET_PORT_GEN_PHV_VALID; + if (phv_bit) + context->phv_en |= SET_PORT_GEN_PHV_EN; + + in_mod = MLX4_SET_PORT_GENERAL << 8 | port; + err = mlx4_cmd(dev, mailbox->dma, in_mod, MLX4_SET_PORT_ETH_OPCODE, + MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B, + MLX4_CMD_NATIVE); + + mlx4_free_cmd_mailbox(dev, mailbox); + return err; +} + +int get_phv_bit(struct mlx4_dev *dev, u8 port, int *phv) +{ + int err; + struct mlx4_func_cap func_cap; + + memset(&func_cap, 0, sizeof(func_cap)); + err = mlx4_QUERY_FUNC_CAP(dev, port, &func_cap); + if (!err) + *phv = func_cap.flags0 & QUERY_FUNC_CAP_PHV_BIT; + return err; +} +EXPORT_SYMBOL(get_phv_bit); + +int set_phv_bit(struct mlx4_dev *dev, u8 port, int new_val) +{ + int ret; + + if (mlx4_is_slave(dev)) + return -EPERM; + + if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_PHV_EN && + !(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_SKIP_OUTER_VLAN)) { + ret = mlx4_SET_PORT_phv_bit(dev, port, new_val); + if (!ret) + dev->caps.phv_bit[port] = new_val; + return ret; + } + + return -EOPNOTSUPP; +} +EXPORT_SYMBOL(set_phv_bit); + +int mlx4_get_is_vlan_offload_disabled(struct mlx4_dev *dev, u8 port, + bool *vlan_offload_disabled) +{ + struct mlx4_func_cap func_cap; + int err; + + memset(&func_cap, 0, sizeof(func_cap)); + err = mlx4_QUERY_FUNC_CAP(dev, port, &func_cap); + if (!err) + *vlan_offload_disabled = + !!(func_cap.flags0 & + QUERY_FUNC_CAP_VLAN_OFFLOAD_DISABLE); + return err; +} +EXPORT_SYMBOL(mlx4_get_is_vlan_offload_disabled); + +void mlx4_replace_zero_macs(struct mlx4_dev *dev) +{ + int i; + u8 mac_addr[ETH_ALEN]; + + dev->port_random_macs = 0; + for (i = 1; i <= dev->caps.num_ports; ++i) + if (!dev->caps.def_mac[i] && + dev->caps.port_type[i] == MLX4_PORT_TYPE_ETH) { + eth_random_addr(mac_addr); + dev->port_random_macs |= 1 << i; + dev->caps.def_mac[i] = ether_addr_to_u64(mac_addr); + } +} +EXPORT_SYMBOL_GPL(mlx4_replace_zero_macs); diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.h b/drivers/net/ethernet/mellanox/mlx4/fw.h new file mode 100644 index 000000000..cf64e54ee --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx4/fw.h @@ -0,0 +1,257 @@ +/* + * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005, 2006, 2007, 2008 Mellanox Technologies. All rights reserved. + * Copyright (c) 2006, 2007 Cisco Systems. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef MLX4_FW_H +#define MLX4_FW_H + +#include "mlx4.h" +#include "icm.h" + +struct mlx4_mod_stat_cfg { + u8 log_pg_sz; + u8 log_pg_sz_m; +}; + +struct mlx4_port_cap { + u8 link_state; + u8 supported_port_types; + u8 suggested_type; + u8 default_sense; + u8 log_max_macs; + u8 log_max_vlans; + int ib_mtu; + int max_port_width; + int max_vl; + int max_tc_eth; + int max_gids; + int max_pkeys; + u64 def_mac; + u16 eth_mtu; + int trans_type; + int vendor_oui; + u16 wavelength; + u64 trans_code; + u8 dmfs_optimized_state; +}; + +struct mlx4_dev_cap { + int max_srq_sz; + int max_qp_sz; + int reserved_qps; + int max_qps; + int reserved_srqs; + int max_srqs; + int max_cq_sz; + int reserved_cqs; + int max_cqs; + int max_mpts; + int reserved_eqs; + int max_eqs; + int num_sys_eqs; + int reserved_mtts; + int reserved_mrws; + int max_requester_per_qp; + int max_responder_per_qp; + int max_rdma_global; + int local_ca_ack_delay; + int num_ports; + u32 max_msg_sz; + u16 stat_rate_support; + int fs_log_max_ucast_qp_range_size; + int fs_max_num_qp_per_entry; + u64 flags; + u64 flags2; + int reserved_uars; + int uar_size; + int min_page_sz; + int bf_reg_size; + int bf_regs_per_page; + int max_sq_sg; + int max_sq_desc_sz; + int max_rq_sg; + int max_rq_desc_sz; + int max_qp_per_mcg; + int reserved_mgms; + int max_mcgs; + int reserved_pds; + int max_pds; + int reserved_xrcds; + int max_xrcds; + int qpc_entry_sz; + int rdmarc_entry_sz; + int altc_entry_sz; + int aux_entry_sz; + int srq_entry_sz; + int cqc_entry_sz; + int eqc_entry_sz; + int dmpt_entry_sz; + int cmpt_entry_sz; + int mtt_entry_sz; + int resize_srq; + u32 bmme_flags; + u32 reserved_lkey; + u64 max_icm_sz; + int max_gso_sz; + int max_rss_tbl_sz; + u32 max_counters; + u32 dmfs_high_rate_qpn_base; + u32 dmfs_high_rate_qpn_range; + struct mlx4_rate_limit_caps rl_caps; + u32 health_buffer_addrs; + struct mlx4_port_cap port_cap[MLX4_MAX_PORTS + 1]; + bool wol_port[MLX4_MAX_PORTS + 1]; + bool map_clock_to_user; +}; + +struct mlx4_func_cap { + u8 num_ports; + u8 flags; + u32 pf_context_behaviour; + int qp_quota; + int cq_quota; + int srq_quota; + int mpt_quota; + int mtt_quota; + int max_eq; + int reserved_eq; + int mcg_quota; + struct mlx4_spec_qps spec_qps; + u32 reserved_lkey; + u8 physical_port; + u8 flags0; + u8 flags1; + u64 phys_port_id; + u32 extra_flags; +}; + +struct mlx4_func { + int bus; + int device; + int function; + int physical_function; + int rsvd_eqs; + int max_eq; + int rsvd_uars; +}; + +struct mlx4_adapter { + char board_id[MLX4_BOARD_ID_LEN]; + u8 inta_pin; +}; + +struct mlx4_init_hca_param { + u64 qpc_base; + u64 rdmarc_base; + u64 auxc_base; + u64 altc_base; + u64 srqc_base; + u64 cqc_base; + u64 eqc_base; + u64 mc_base; + u64 dmpt_base; + u64 cmpt_base; + u64 mtt_base; + u64 global_caps; + u8 log_mc_entry_sz; + u8 log_mc_hash_sz; + u16 hca_core_clock; /* Internal Clock Frequency (in MHz) */ + u8 log_num_qps; + u8 log_num_srqs; + u8 log_num_cqs; + u8 log_num_eqs; + u16 num_sys_eqs; + u8 log_rd_per_qp; + u8 log_mc_table_sz; + u8 log_mpt_sz; + u8 log_uar_sz; + u8 mw_enabled; /* Enable memory windows */ + u8 uar_page_sz; /* log pg sz in 4k chunks */ + u8 steering_mode; /* for QUERY_HCA */ + u8 dmfs_high_steer_mode; /* for QUERY_HCA */ + u64 dev_cap_enabled; + u16 cqe_size; /* For use only when CQE stride feature enabled */ + u16 eqe_size; /* For use only when EQE stride feature enabled */ + u8 rss_ip_frags; + u8 phv_check_en; /* for QUERY_HCA */ +}; + +struct mlx4_init_ib_param { + int port_width; + int vl_cap; + int mtu_cap; + u16 gid_cap; + u16 pkey_cap; + int set_guid0; + u64 guid0; + int set_node_guid; + u64 node_guid; + int set_si_guid; + u64 si_guid; +}; + +struct mlx4_set_ib_param { + int set_si_guid; + int reset_qkey_viol; + u64 si_guid; + u32 cap_mask; +}; + +void mlx4_dev_cap_dump(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap); +int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap); +int mlx4_QUERY_PORT(struct mlx4_dev *dev, int port, struct mlx4_port_cap *port_cap); +int mlx4_QUERY_FUNC_CAP(struct mlx4_dev *dev, u8 gen_or_port, + struct mlx4_func_cap *func_cap); +int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd); +int mlx4_QUERY_FUNC(struct mlx4_dev *dev, struct mlx4_func *func, int slave); +int mlx4_MAP_FA(struct mlx4_dev *dev, struct mlx4_icm *icm); +int mlx4_UNMAP_FA(struct mlx4_dev *dev); +int mlx4_RUN_FW(struct mlx4_dev *dev); +int mlx4_QUERY_FW(struct mlx4_dev *dev); +int mlx4_QUERY_ADAPTER(struct mlx4_dev *dev, struct mlx4_adapter *adapter); +int mlx4_INIT_HCA(struct mlx4_dev *dev, struct mlx4_init_hca_param *param); +int mlx4_QUERY_HCA(struct mlx4_dev *dev, struct mlx4_init_hca_param *param); +int mlx4_CLOSE_HCA(struct mlx4_dev *dev, int panic); +int mlx4_map_cmd(struct mlx4_dev *dev, u16 op, struct mlx4_icm *icm, u64 virt); +int mlx4_SET_ICM_SIZE(struct mlx4_dev *dev, u64 icm_size, u64 *aux_pages); +int mlx4_MAP_ICM_AUX(struct mlx4_dev *dev, struct mlx4_icm *icm); +int mlx4_UNMAP_ICM_AUX(struct mlx4_dev *dev); +int mlx4_NOP(struct mlx4_dev *dev); +int mlx4_MOD_STAT_CFG(struct mlx4_dev *dev, struct mlx4_mod_stat_cfg *cfg); +void mlx4_opreq_action(struct work_struct *work); + +#endif /* MLX4_FW_H */ diff --git a/drivers/net/ethernet/mellanox/mlx4/fw_qos.c b/drivers/net/ethernet/mellanox/mlx4/fw_qos.c new file mode 100644 index 000000000..3a09d7122 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx4/fw_qos.c @@ -0,0 +1,289 @@ +/* + * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005, 2006, 2007, 2008 Mellanox Technologies. + * All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/export.h> +#include "fw_qos.h" +#include "fw.h" + +enum { + /* allocate vpp opcode modifiers */ + MLX4_ALLOCATE_VPP_ALLOCATE = 0x0, + MLX4_ALLOCATE_VPP_QUERY = 0x1 +}; + +enum { + /* set vport qos opcode modifiers */ + MLX4_SET_VPORT_QOS_SET = 0x0, + MLX4_SET_VPORT_QOS_QUERY = 0x1 +}; + +struct mlx4_set_port_prio2tc_context { + u8 prio2tc[4]; +}; + +struct mlx4_port_scheduler_tc_cfg_be { + __be16 pg; + __be16 bw_precentage; + __be16 max_bw_units; /* 3-100Mbps, 4-1Gbps, other values - reserved */ + __be16 max_bw_value; +}; + +struct mlx4_set_port_scheduler_context { + struct mlx4_port_scheduler_tc_cfg_be tc[MLX4_NUM_TC]; +}; + +/* Granular Qos (per VF) section */ +struct mlx4_alloc_vpp_param { + __be32 available_vpp; + __be32 vpp_p_up[MLX4_NUM_UP]; +}; + +struct mlx4_prio_qos_param { + __be32 bw_share; + __be32 max_avg_bw; + __be32 reserved; + __be32 enable; + __be32 reserved1[4]; +}; + +struct mlx4_set_vport_context { + __be32 reserved[8]; + struct mlx4_prio_qos_param qos_p_up[MLX4_NUM_UP]; +}; + +int mlx4_SET_PORT_PRIO2TC(struct mlx4_dev *dev, u8 port, u8 *prio2tc) +{ + struct mlx4_cmd_mailbox *mailbox; + struct mlx4_set_port_prio2tc_context *context; + int err; + u32 in_mod; + int i; + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + + context = mailbox->buf; + + for (i = 0; i < MLX4_NUM_UP; i += 2) + context->prio2tc[i >> 1] = prio2tc[i] << 4 | prio2tc[i + 1]; + + in_mod = MLX4_SET_PORT_PRIO2TC << 8 | port; + err = mlx4_cmd(dev, mailbox->dma, in_mod, 1, MLX4_CMD_SET_PORT, + MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE); + + mlx4_free_cmd_mailbox(dev, mailbox); + return err; +} +EXPORT_SYMBOL(mlx4_SET_PORT_PRIO2TC); + +int mlx4_SET_PORT_SCHEDULER(struct mlx4_dev *dev, u8 port, u8 *tc_tx_bw, + u8 *pg, u16 *ratelimit) +{ + struct mlx4_cmd_mailbox *mailbox; + struct mlx4_set_port_scheduler_context *context; + int err; + u32 in_mod; + int i; + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + + context = mailbox->buf; + + for (i = 0; i < MLX4_NUM_TC; i++) { + struct mlx4_port_scheduler_tc_cfg_be *tc = &context->tc[i]; + u16 r; + + if (ratelimit && ratelimit[i]) { + if (ratelimit[i] <= MLX4_MAX_100M_UNITS_VAL) { + r = ratelimit[i]; + tc->max_bw_units = + htons(MLX4_RATELIMIT_100M_UNITS); + } else { + r = ratelimit[i] / 10; + tc->max_bw_units = + htons(MLX4_RATELIMIT_1G_UNITS); + } + tc->max_bw_value = htons(r); + } else { + tc->max_bw_value = htons(MLX4_RATELIMIT_DEFAULT); + tc->max_bw_units = htons(MLX4_RATELIMIT_1G_UNITS); + } + + tc->pg = htons(pg[i]); + tc->bw_precentage = htons(tc_tx_bw[i]); + } + + in_mod = MLX4_SET_PORT_SCHEDULER << 8 | port; + err = mlx4_cmd(dev, mailbox->dma, in_mod, 1, MLX4_CMD_SET_PORT, + MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE); + + mlx4_free_cmd_mailbox(dev, mailbox); + return err; +} +EXPORT_SYMBOL(mlx4_SET_PORT_SCHEDULER); + +int mlx4_ALLOCATE_VPP_get(struct mlx4_dev *dev, u8 port, + u16 *available_vpp, u8 *vpp_p_up) +{ + int i; + int err; + struct mlx4_cmd_mailbox *mailbox; + struct mlx4_alloc_vpp_param *out_param; + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + + out_param = mailbox->buf; + + err = mlx4_cmd_box(dev, 0, mailbox->dma, port, + MLX4_ALLOCATE_VPP_QUERY, + MLX4_CMD_ALLOCATE_VPP, + MLX4_CMD_TIME_CLASS_A, + MLX4_CMD_NATIVE); + if (err) + goto out; + + /* Total number of supported VPPs */ + *available_vpp = (u16)be32_to_cpu(out_param->available_vpp); + + for (i = 0; i < MLX4_NUM_UP; i++) + vpp_p_up[i] = (u8)be32_to_cpu(out_param->vpp_p_up[i]); + +out: + mlx4_free_cmd_mailbox(dev, mailbox); + + return err; +} +EXPORT_SYMBOL(mlx4_ALLOCATE_VPP_get); + +int mlx4_ALLOCATE_VPP_set(struct mlx4_dev *dev, u8 port, u8 *vpp_p_up) +{ + int i; + int err; + struct mlx4_cmd_mailbox *mailbox; + struct mlx4_alloc_vpp_param *in_param; + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + + in_param = mailbox->buf; + + for (i = 0; i < MLX4_NUM_UP; i++) + in_param->vpp_p_up[i] = cpu_to_be32(vpp_p_up[i]); + + err = mlx4_cmd(dev, mailbox->dma, port, + MLX4_ALLOCATE_VPP_ALLOCATE, + MLX4_CMD_ALLOCATE_VPP, + MLX4_CMD_TIME_CLASS_A, + MLX4_CMD_NATIVE); + + mlx4_free_cmd_mailbox(dev, mailbox); + return err; +} +EXPORT_SYMBOL(mlx4_ALLOCATE_VPP_set); + +int mlx4_SET_VPORT_QOS_get(struct mlx4_dev *dev, u8 port, u8 vport, + struct mlx4_vport_qos_param *out_param) +{ + int i; + int err; + struct mlx4_cmd_mailbox *mailbox; + struct mlx4_set_vport_context *ctx; + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + + ctx = mailbox->buf; + + err = mlx4_cmd_box(dev, 0, mailbox->dma, (vport << 8) | port, + MLX4_SET_VPORT_QOS_QUERY, + MLX4_CMD_SET_VPORT_QOS, + MLX4_CMD_TIME_CLASS_A, + MLX4_CMD_NATIVE); + if (err) + goto out; + + for (i = 0; i < MLX4_NUM_UP; i++) { + out_param[i].bw_share = be32_to_cpu(ctx->qos_p_up[i].bw_share); + out_param[i].max_avg_bw = + be32_to_cpu(ctx->qos_p_up[i].max_avg_bw); + out_param[i].enable = + !!(be32_to_cpu(ctx->qos_p_up[i].enable) & 31); + } + +out: + mlx4_free_cmd_mailbox(dev, mailbox); + + return err; +} +EXPORT_SYMBOL(mlx4_SET_VPORT_QOS_get); + +int mlx4_SET_VPORT_QOS_set(struct mlx4_dev *dev, u8 port, u8 vport, + struct mlx4_vport_qos_param *in_param) +{ + int i; + int err; + struct mlx4_cmd_mailbox *mailbox; + struct mlx4_set_vport_context *ctx; + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + + ctx = mailbox->buf; + + for (i = 0; i < MLX4_NUM_UP; i++) { + ctx->qos_p_up[i].bw_share = cpu_to_be32(in_param[i].bw_share); + ctx->qos_p_up[i].max_avg_bw = + cpu_to_be32(in_param[i].max_avg_bw); + ctx->qos_p_up[i].enable = + cpu_to_be32(in_param[i].enable << 31); + } + + err = mlx4_cmd(dev, mailbox->dma, (vport << 8) | port, + MLX4_SET_VPORT_QOS_SET, + MLX4_CMD_SET_VPORT_QOS, + MLX4_CMD_TIME_CLASS_A, + MLX4_CMD_NATIVE); + + mlx4_free_cmd_mailbox(dev, mailbox); + return err; +} +EXPORT_SYMBOL(mlx4_SET_VPORT_QOS_set); diff --git a/drivers/net/ethernet/mellanox/mlx4/fw_qos.h b/drivers/net/ethernet/mellanox/mlx4/fw_qos.h new file mode 100644 index 000000000..954b86faa --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx4/fw_qos.h @@ -0,0 +1,145 @@ +/* + * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005, 2006, 2007, 2008 Mellanox Technologies. + * All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef MLX4_FW_QOS_H +#define MLX4_FW_QOS_H + +#include <linux/mlx4/cmd.h> +#include <linux/mlx4/device.h> + +#define MLX4_NUM_UP 8 +#define MLX4_NUM_TC 8 + +/* Default supported priorities for VPP allocation */ +#define MLX4_DEFAULT_QOS_PRIO (0) + +/* Derived from FW feature definition, 0 is the default vport fo all QPs */ +#define MLX4_VPP_DEFAULT_VPORT (0) + +struct mlx4_vport_qos_param { + u32 bw_share; + u32 max_avg_bw; + u8 enable; +}; + +/** + * mlx4_SET_PORT_PRIO2TC - This routine maps user priorities to traffic + * classes of a given port and device. + * + * @dev: mlx4_dev. + * @port: Physical port number. + * @prio2tc: Array of TC associated with each priorities. + * + * Returns 0 on success or a negative mlx4_core errno code. + **/ +int mlx4_SET_PORT_PRIO2TC(struct mlx4_dev *dev, u8 port, u8 *prio2tc); + +/** + * mlx4_SET_PORT_SCHEDULER - This routine configures the arbitration between + * traffic classes (ETS) and configured rate limit for traffic classes. + * tc_tx_bw, pg and ratelimit are arrays where each index represents a TC. + * The description for those parameters below refers to a single TC. + * + * @dev: mlx4_dev. + * @port: Physical port number. + * @tc_tx_bw: The percentage of the bandwidth allocated for traffic class + * within a TC group. The sum of the bw_percentage of all the traffic + * classes within a TC group must equal 100% for correct operation. + * @pg: The TC group the traffic class is associated with. + * @ratelimit: The maximal bandwidth allowed for the use by this traffic class. + * + * Returns 0 on success or a negative mlx4_core errno code. + **/ +int mlx4_SET_PORT_SCHEDULER(struct mlx4_dev *dev, u8 port, u8 *tc_tx_bw, + u8 *pg, u16 *ratelimit); +/** + * mlx4_ALLOCATE_VPP_get - Query port VPP available resources and allocation. + * Before distribution of VPPs to priorities, only available_vpp is returned. + * After initialization it returns the distribution of VPPs among priorities. + * + * @dev: mlx4_dev. + * @port: Physical port number. + * @available_vpp: Pointer to variable where number of available VPPs is stored + * @vpp_p_up: Distribution of VPPs to priorities is stored in this array + * + * Returns 0 on success or a negative mlx4_core errno code. + **/ +int mlx4_ALLOCATE_VPP_get(struct mlx4_dev *dev, u8 port, + u16 *available_vpp, u8 *vpp_p_up); +/** + * mlx4_ALLOCATE_VPP_set - Distribution of VPPs among differnt priorities. + * The total number of VPPs assigned to all for a port must not exceed + * the value reported by available_vpp in mlx4_ALLOCATE_VPP_get. + * VPP allocation is allowed only after the port type has been set, + * and while no QPs are open for this port. + * + * @dev: mlx4_dev. + * @port: Physical port number. + * @vpp_p_up: Allocation of VPPs to different priorities. + * + * Returns 0 on success or a negative mlx4_core errno code. + **/ +int mlx4_ALLOCATE_VPP_set(struct mlx4_dev *dev, u8 port, u8 *vpp_p_up); + +/** + * mlx4_SET_VPORT_QOS_get - Query QoS proporties of a Vport. + * Each priority allowed for the Vport is assigned with a share of the BW, + * and a BW limitation. This commands query the current QoS values. + * + * @dev: mlx4_dev. + * @port: Physical port number. + * @vport: Vport id. + * @out_param: Array of mlx4_vport_qos_param that will contain the values. + * + * Returns 0 on success or a negative mlx4_core errno code. + **/ +int mlx4_SET_VPORT_QOS_get(struct mlx4_dev *dev, u8 port, u8 vport, + struct mlx4_vport_qos_param *out_param); + +/** + * mlx4_SET_VPORT_QOS_set - Set QoS proporties of a Vport. + * QoS parameters can be modified at any time, but must be initialized + * before any QP is associated with the VPort. + * + * @dev: mlx4_dev. + * @port: Physical port number. + * @vport: Vport id. + * @in_param: Array of mlx4_vport_qos_param which holds the requested values. + * + * Returns 0 on success or a negative mlx4_core errno code. + **/ +int mlx4_SET_VPORT_QOS_set(struct mlx4_dev *dev, u8 port, u8 vport, + struct mlx4_vport_qos_param *in_param); + +#endif /* MLX4_FW_QOS_H */ diff --git a/drivers/net/ethernet/mellanox/mlx4/icm.c b/drivers/net/ethernet/mellanox/mlx4/icm.c new file mode 100644 index 000000000..59b8b3c73 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx4/icm.c @@ -0,0 +1,493 @@ +/* + * Copyright (c) 2005, 2006, 2007, 2008 Mellanox Technologies. All rights reserved. + * Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/errno.h> +#include <linux/mm.h> +#include <linux/scatterlist.h> +#include <linux/slab.h> + +#include <linux/mlx4/cmd.h> + +#include "mlx4.h" +#include "icm.h" +#include "fw.h" + +/* + * We allocate in as big chunks as we can, up to a maximum of 256 KB + * per chunk. Note that the chunks are not necessarily in contiguous + * physical memory. + */ +enum { + MLX4_ICM_ALLOC_SIZE = 1 << 18, + MLX4_TABLE_CHUNK_SIZE = 1 << 18, +}; + +static void mlx4_free_icm_pages(struct mlx4_dev *dev, struct mlx4_icm_chunk *chunk) +{ + int i; + + if (chunk->nsg > 0) + dma_unmap_sg(&dev->persist->pdev->dev, chunk->sg, chunk->npages, + DMA_BIDIRECTIONAL); + + for (i = 0; i < chunk->npages; ++i) + __free_pages(sg_page(&chunk->sg[i]), + get_order(chunk->sg[i].length)); +} + +static void mlx4_free_icm_coherent(struct mlx4_dev *dev, struct mlx4_icm_chunk *chunk) +{ + int i; + + for (i = 0; i < chunk->npages; ++i) + dma_free_coherent(&dev->persist->pdev->dev, + chunk->buf[i].size, + chunk->buf[i].addr, + chunk->buf[i].dma_addr); +} + +void mlx4_free_icm(struct mlx4_dev *dev, struct mlx4_icm *icm, int coherent) +{ + struct mlx4_icm_chunk *chunk, *tmp; + + if (!icm) + return; + + list_for_each_entry_safe(chunk, tmp, &icm->chunk_list, list) { + if (coherent) + mlx4_free_icm_coherent(dev, chunk); + else + mlx4_free_icm_pages(dev, chunk); + + kfree(chunk); + } + + kfree(icm); +} + +static int mlx4_alloc_icm_pages(struct scatterlist *mem, int order, + gfp_t gfp_mask, int node) +{ + struct page *page; + + page = alloc_pages_node(node, gfp_mask, order); + if (!page) { + page = alloc_pages(gfp_mask, order); + if (!page) + return -ENOMEM; + } + + sg_set_page(mem, page, PAGE_SIZE << order, 0); + return 0; +} + +static int mlx4_alloc_icm_coherent(struct device *dev, struct mlx4_icm_buf *buf, + int order, gfp_t gfp_mask) +{ + buf->addr = dma_alloc_coherent(dev, PAGE_SIZE << order, + &buf->dma_addr, gfp_mask); + if (!buf->addr) + return -ENOMEM; + + if (offset_in_page(buf->addr)) { + dma_free_coherent(dev, PAGE_SIZE << order, buf->addr, + buf->dma_addr); + return -ENOMEM; + } + + buf->size = PAGE_SIZE << order; + return 0; +} + +struct mlx4_icm *mlx4_alloc_icm(struct mlx4_dev *dev, int npages, + gfp_t gfp_mask, int coherent) +{ + struct mlx4_icm *icm; + struct mlx4_icm_chunk *chunk = NULL; + int cur_order; + gfp_t mask; + int ret; + + /* We use sg_set_buf for coherent allocs, which assumes low memory */ + BUG_ON(coherent && (gfp_mask & __GFP_HIGHMEM)); + + icm = kmalloc_node(sizeof(*icm), + gfp_mask & ~(__GFP_HIGHMEM | __GFP_NOWARN), + dev->numa_node); + if (!icm) { + icm = kmalloc(sizeof(*icm), + gfp_mask & ~(__GFP_HIGHMEM | __GFP_NOWARN)); + if (!icm) + return NULL; + } + + icm->refcount = 0; + INIT_LIST_HEAD(&icm->chunk_list); + + cur_order = get_order(MLX4_ICM_ALLOC_SIZE); + + while (npages > 0) { + if (!chunk) { + chunk = kzalloc_node(sizeof(*chunk), + gfp_mask & ~(__GFP_HIGHMEM | + __GFP_NOWARN), + dev->numa_node); + if (!chunk) { + chunk = kzalloc(sizeof(*chunk), + gfp_mask & ~(__GFP_HIGHMEM | + __GFP_NOWARN)); + if (!chunk) + goto fail; + } + chunk->coherent = coherent; + + if (!coherent) + sg_init_table(chunk->sg, MLX4_ICM_CHUNK_LEN); + list_add_tail(&chunk->list, &icm->chunk_list); + } + + while (1 << cur_order > npages) + --cur_order; + + mask = gfp_mask; + if (cur_order) + mask &= ~__GFP_DIRECT_RECLAIM; + + if (coherent) + ret = mlx4_alloc_icm_coherent(&dev->persist->pdev->dev, + &chunk->buf[chunk->npages], + cur_order, mask); + else + ret = mlx4_alloc_icm_pages(&chunk->sg[chunk->npages], + cur_order, mask, + dev->numa_node); + + if (ret) { + if (--cur_order < 0) + goto fail; + else + continue; + } + + ++chunk->npages; + + if (coherent) + ++chunk->nsg; + else if (chunk->npages == MLX4_ICM_CHUNK_LEN) { + chunk->nsg = dma_map_sg(&dev->persist->pdev->dev, + chunk->sg, chunk->npages, + DMA_BIDIRECTIONAL); + + if (!chunk->nsg) + goto fail; + } + + if (chunk->npages == MLX4_ICM_CHUNK_LEN) + chunk = NULL; + + npages -= 1 << cur_order; + } + + if (!coherent && chunk) { + chunk->nsg = dma_map_sg(&dev->persist->pdev->dev, chunk->sg, + chunk->npages, DMA_BIDIRECTIONAL); + + if (!chunk->nsg) + goto fail; + } + + return icm; + +fail: + mlx4_free_icm(dev, icm, coherent); + return NULL; +} + +static int mlx4_MAP_ICM(struct mlx4_dev *dev, struct mlx4_icm *icm, u64 virt) +{ + return mlx4_map_cmd(dev, MLX4_CMD_MAP_ICM, icm, virt); +} + +static int mlx4_UNMAP_ICM(struct mlx4_dev *dev, u64 virt, u32 page_count) +{ + return mlx4_cmd(dev, virt, page_count, 0, MLX4_CMD_UNMAP_ICM, + MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE); +} + +int mlx4_MAP_ICM_AUX(struct mlx4_dev *dev, struct mlx4_icm *icm) +{ + return mlx4_map_cmd(dev, MLX4_CMD_MAP_ICM_AUX, icm, -1); +} + +int mlx4_UNMAP_ICM_AUX(struct mlx4_dev *dev) +{ + return mlx4_cmd(dev, 0, 0, 0, MLX4_CMD_UNMAP_ICM_AUX, + MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE); +} + +int mlx4_table_get(struct mlx4_dev *dev, struct mlx4_icm_table *table, u32 obj) +{ + u32 i = (obj & (table->num_obj - 1)) / + (MLX4_TABLE_CHUNK_SIZE / table->obj_size); + int ret = 0; + + mutex_lock(&table->mutex); + + if (table->icm[i]) { + ++table->icm[i]->refcount; + goto out; + } + + table->icm[i] = mlx4_alloc_icm(dev, MLX4_TABLE_CHUNK_SIZE >> PAGE_SHIFT, + (table->lowmem ? GFP_KERNEL : GFP_HIGHUSER) | + __GFP_NOWARN, table->coherent); + if (!table->icm[i]) { + ret = -ENOMEM; + goto out; + } + + if (mlx4_MAP_ICM(dev, table->icm[i], table->virt + + (u64) i * MLX4_TABLE_CHUNK_SIZE)) { + mlx4_free_icm(dev, table->icm[i], table->coherent); + table->icm[i] = NULL; + ret = -ENOMEM; + goto out; + } + + ++table->icm[i]->refcount; + +out: + mutex_unlock(&table->mutex); + return ret; +} + +void mlx4_table_put(struct mlx4_dev *dev, struct mlx4_icm_table *table, u32 obj) +{ + u32 i; + u64 offset; + + i = (obj & (table->num_obj - 1)) / (MLX4_TABLE_CHUNK_SIZE / table->obj_size); + + mutex_lock(&table->mutex); + + if (--table->icm[i]->refcount == 0) { + offset = (u64) i * MLX4_TABLE_CHUNK_SIZE; + mlx4_UNMAP_ICM(dev, table->virt + offset, + MLX4_TABLE_CHUNK_SIZE / MLX4_ICM_PAGE_SIZE); + mlx4_free_icm(dev, table->icm[i], table->coherent); + table->icm[i] = NULL; + } + + mutex_unlock(&table->mutex); +} + +void *mlx4_table_find(struct mlx4_icm_table *table, u32 obj, + dma_addr_t *dma_handle) +{ + int offset, dma_offset, i; + u64 idx; + struct mlx4_icm_chunk *chunk; + struct mlx4_icm *icm; + void *addr = NULL; + + if (!table->lowmem) + return NULL; + + mutex_lock(&table->mutex); + + idx = (u64) (obj & (table->num_obj - 1)) * table->obj_size; + icm = table->icm[idx / MLX4_TABLE_CHUNK_SIZE]; + dma_offset = offset = idx % MLX4_TABLE_CHUNK_SIZE; + + if (!icm) + goto out; + + list_for_each_entry(chunk, &icm->chunk_list, list) { + for (i = 0; i < chunk->npages; ++i) { + dma_addr_t dma_addr; + size_t len; + + if (table->coherent) { + len = chunk->buf[i].size; + dma_addr = chunk->buf[i].dma_addr; + addr = chunk->buf[i].addr; + } else { + struct page *page; + + len = sg_dma_len(&chunk->sg[i]); + dma_addr = sg_dma_address(&chunk->sg[i]); + + /* XXX: we should never do this for highmem + * allocation. This function either needs + * to be split, or the kernel virtual address + * return needs to be made optional. + */ + page = sg_page(&chunk->sg[i]); + addr = lowmem_page_address(page); + } + + if (dma_handle && dma_offset >= 0) { + if (len > dma_offset) + *dma_handle = dma_addr + dma_offset; + dma_offset -= len; + } + + /* + * DMA mapping can merge pages but not split them, + * so if we found the page, dma_handle has already + * been assigned to. + */ + if (len > offset) + goto out; + offset -= len; + } + } + + addr = NULL; +out: + mutex_unlock(&table->mutex); + return addr ? addr + offset : NULL; +} + +int mlx4_table_get_range(struct mlx4_dev *dev, struct mlx4_icm_table *table, + u32 start, u32 end) +{ + int inc = MLX4_TABLE_CHUNK_SIZE / table->obj_size; + int err; + u32 i; + + for (i = start; i <= end; i += inc) { + err = mlx4_table_get(dev, table, i); + if (err) + goto fail; + } + + return 0; + +fail: + while (i > start) { + i -= inc; + mlx4_table_put(dev, table, i); + } + + return err; +} + +void mlx4_table_put_range(struct mlx4_dev *dev, struct mlx4_icm_table *table, + u32 start, u32 end) +{ + u32 i; + + for (i = start; i <= end; i += MLX4_TABLE_CHUNK_SIZE / table->obj_size) + mlx4_table_put(dev, table, i); +} + +int mlx4_init_icm_table(struct mlx4_dev *dev, struct mlx4_icm_table *table, + u64 virt, int obj_size, u32 nobj, int reserved, + int use_lowmem, int use_coherent) +{ + int obj_per_chunk; + int num_icm; + unsigned chunk_size; + int i; + u64 size; + + obj_per_chunk = MLX4_TABLE_CHUNK_SIZE / obj_size; + if (WARN_ON(!obj_per_chunk)) + return -EINVAL; + num_icm = DIV_ROUND_UP(nobj, obj_per_chunk); + + table->icm = kvcalloc(num_icm, sizeof(*table->icm), GFP_KERNEL); + if (!table->icm) + return -ENOMEM; + table->virt = virt; + table->num_icm = num_icm; + table->num_obj = nobj; + table->obj_size = obj_size; + table->lowmem = use_lowmem; + table->coherent = use_coherent; + mutex_init(&table->mutex); + + size = (u64) nobj * obj_size; + for (i = 0; i * MLX4_TABLE_CHUNK_SIZE < reserved * obj_size; ++i) { + chunk_size = MLX4_TABLE_CHUNK_SIZE; + if ((i + 1) * MLX4_TABLE_CHUNK_SIZE > size) + chunk_size = PAGE_ALIGN(size - + i * MLX4_TABLE_CHUNK_SIZE); + + table->icm[i] = mlx4_alloc_icm(dev, chunk_size >> PAGE_SHIFT, + (use_lowmem ? GFP_KERNEL : GFP_HIGHUSER) | + __GFP_NOWARN, use_coherent); + if (!table->icm[i]) + goto err; + if (mlx4_MAP_ICM(dev, table->icm[i], virt + i * MLX4_TABLE_CHUNK_SIZE)) { + mlx4_free_icm(dev, table->icm[i], use_coherent); + table->icm[i] = NULL; + goto err; + } + + /* + * Add a reference to this ICM chunk so that it never + * gets freed (since it contains reserved firmware objects). + */ + ++table->icm[i]->refcount; + } + + return 0; + +err: + for (i = 0; i < num_icm; ++i) + if (table->icm[i]) { + mlx4_UNMAP_ICM(dev, virt + i * MLX4_TABLE_CHUNK_SIZE, + MLX4_TABLE_CHUNK_SIZE / MLX4_ICM_PAGE_SIZE); + mlx4_free_icm(dev, table->icm[i], use_coherent); + } + + kvfree(table->icm); + + return -ENOMEM; +} + +void mlx4_cleanup_icm_table(struct mlx4_dev *dev, struct mlx4_icm_table *table) +{ + int i; + + for (i = 0; i < table->num_icm; ++i) + if (table->icm[i]) { + mlx4_UNMAP_ICM(dev, table->virt + i * MLX4_TABLE_CHUNK_SIZE, + MLX4_TABLE_CHUNK_SIZE / MLX4_ICM_PAGE_SIZE); + mlx4_free_icm(dev, table->icm[i], table->coherent); + } + + kvfree(table->icm); +} diff --git a/drivers/net/ethernet/mellanox/mlx4/icm.h b/drivers/net/ethernet/mellanox/mlx4/icm.h new file mode 100644 index 000000000..d199874b1 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx4/icm.h @@ -0,0 +1,144 @@ +/* + * Copyright (c) 2005, 2006, 2007, 2008 Mellanox Technologies. All rights reserved. + * Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef MLX4_ICM_H +#define MLX4_ICM_H + +#include <linux/list.h> +#include <linux/pci.h> +#include <linux/mutex.h> + +#define MLX4_ICM_CHUNK_LEN \ + ((256 - sizeof(struct list_head) - 2 * sizeof(int)) / \ + (sizeof(struct scatterlist))) + +enum { + MLX4_ICM_PAGE_SHIFT = 12, + MLX4_ICM_PAGE_SIZE = 1 << MLX4_ICM_PAGE_SHIFT, +}; + +struct mlx4_icm_buf { + void *addr; + size_t size; + dma_addr_t dma_addr; +}; + +struct mlx4_icm_chunk { + struct list_head list; + int npages; + int nsg; + bool coherent; + union { + struct scatterlist sg[MLX4_ICM_CHUNK_LEN]; + struct mlx4_icm_buf buf[MLX4_ICM_CHUNK_LEN]; + }; +}; + +struct mlx4_icm { + struct list_head chunk_list; + int refcount; +}; + +struct mlx4_icm_iter { + struct mlx4_icm *icm; + struct mlx4_icm_chunk *chunk; + int page_idx; +}; + +struct mlx4_dev; + +struct mlx4_icm *mlx4_alloc_icm(struct mlx4_dev *dev, int npages, + gfp_t gfp_mask, int coherent); +void mlx4_free_icm(struct mlx4_dev *dev, struct mlx4_icm *icm, int coherent); + +int mlx4_table_get(struct mlx4_dev *dev, struct mlx4_icm_table *table, u32 obj); +void mlx4_table_put(struct mlx4_dev *dev, struct mlx4_icm_table *table, u32 obj); +int mlx4_table_get_range(struct mlx4_dev *dev, struct mlx4_icm_table *table, + u32 start, u32 end); +void mlx4_table_put_range(struct mlx4_dev *dev, struct mlx4_icm_table *table, + u32 start, u32 end); +int mlx4_init_icm_table(struct mlx4_dev *dev, struct mlx4_icm_table *table, + u64 virt, int obj_size, u32 nobj, int reserved, + int use_lowmem, int use_coherent); +void mlx4_cleanup_icm_table(struct mlx4_dev *dev, struct mlx4_icm_table *table); +void *mlx4_table_find(struct mlx4_icm_table *table, u32 obj, dma_addr_t *dma_handle); + +static inline void mlx4_icm_first(struct mlx4_icm *icm, + struct mlx4_icm_iter *iter) +{ + iter->icm = icm; + iter->chunk = list_empty(&icm->chunk_list) ? + NULL : list_entry(icm->chunk_list.next, + struct mlx4_icm_chunk, list); + iter->page_idx = 0; +} + +static inline int mlx4_icm_last(struct mlx4_icm_iter *iter) +{ + return !iter->chunk; +} + +static inline void mlx4_icm_next(struct mlx4_icm_iter *iter) +{ + if (++iter->page_idx >= iter->chunk->nsg) { + if (iter->chunk->list.next == &iter->icm->chunk_list) { + iter->chunk = NULL; + return; + } + + iter->chunk = list_entry(iter->chunk->list.next, + struct mlx4_icm_chunk, list); + iter->page_idx = 0; + } +} + +static inline dma_addr_t mlx4_icm_addr(struct mlx4_icm_iter *iter) +{ + if (iter->chunk->coherent) + return iter->chunk->buf[iter->page_idx].dma_addr; + else + return sg_dma_address(&iter->chunk->sg[iter->page_idx]); +} + +static inline unsigned long mlx4_icm_size(struct mlx4_icm_iter *iter) +{ + if (iter->chunk->coherent) + return iter->chunk->buf[iter->page_idx].size; + else + return sg_dma_len(&iter->chunk->sg[iter->page_idx]); +} + +int mlx4_MAP_ICM_AUX(struct mlx4_dev *dev, struct mlx4_icm *icm); +int mlx4_UNMAP_ICM_AUX(struct mlx4_dev *dev); + +#endif /* MLX4_ICM_H */ diff --git a/drivers/net/ethernet/mellanox/mlx4/intf.c b/drivers/net/ethernet/mellanox/mlx4/intf.c new file mode 100644 index 000000000..65482f004 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx4/intf.c @@ -0,0 +1,275 @@ +/* + * Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/slab.h> +#include <linux/export.h> +#include <linux/errno.h> +#include <net/devlink.h> + +#include "mlx4.h" + +struct mlx4_device_context { + struct list_head list; + struct list_head bond_list; + struct mlx4_interface *intf; + void *context; +}; + +static LIST_HEAD(intf_list); +static LIST_HEAD(dev_list); +static DEFINE_MUTEX(intf_mutex); + +static void mlx4_add_device(struct mlx4_interface *intf, struct mlx4_priv *priv) +{ + struct mlx4_device_context *dev_ctx; + + dev_ctx = kmalloc(sizeof(*dev_ctx), GFP_KERNEL); + if (!dev_ctx) + return; + + dev_ctx->intf = intf; + dev_ctx->context = intf->add(&priv->dev); + + if (dev_ctx->context) { + spin_lock_irq(&priv->ctx_lock); + list_add_tail(&dev_ctx->list, &priv->ctx_list); + spin_unlock_irq(&priv->ctx_lock); + if (intf->activate) + intf->activate(&priv->dev, dev_ctx->context); + } else + kfree(dev_ctx); + +} + +static void mlx4_remove_device(struct mlx4_interface *intf, struct mlx4_priv *priv) +{ + struct mlx4_device_context *dev_ctx; + + list_for_each_entry(dev_ctx, &priv->ctx_list, list) + if (dev_ctx->intf == intf) { + spin_lock_irq(&priv->ctx_lock); + list_del(&dev_ctx->list); + spin_unlock_irq(&priv->ctx_lock); + + intf->remove(&priv->dev, dev_ctx->context); + kfree(dev_ctx); + return; + } +} + +int mlx4_register_interface(struct mlx4_interface *intf) +{ + struct mlx4_priv *priv; + + if (!intf->add || !intf->remove) + return -EINVAL; + + mutex_lock(&intf_mutex); + + list_add_tail(&intf->list, &intf_list); + list_for_each_entry(priv, &dev_list, dev_list) { + if (mlx4_is_mfunc(&priv->dev) && (intf->flags & MLX4_INTFF_BONDING)) { + mlx4_dbg(&priv->dev, + "SRIOV, disabling HA mode for intf proto %d\n", intf->protocol); + intf->flags &= ~MLX4_INTFF_BONDING; + } + mlx4_add_device(intf, priv); + } + + mutex_unlock(&intf_mutex); + + return 0; +} +EXPORT_SYMBOL_GPL(mlx4_register_interface); + +void mlx4_unregister_interface(struct mlx4_interface *intf) +{ + struct mlx4_priv *priv; + + mutex_lock(&intf_mutex); + + list_for_each_entry(priv, &dev_list, dev_list) + mlx4_remove_device(intf, priv); + + list_del(&intf->list); + + mutex_unlock(&intf_mutex); +} +EXPORT_SYMBOL_GPL(mlx4_unregister_interface); + +int mlx4_do_bond(struct mlx4_dev *dev, bool enable) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_device_context *dev_ctx = NULL, *temp_dev_ctx; + unsigned long flags; + int ret; + LIST_HEAD(bond_list); + + if (!(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_PORT_REMAP)) + return -EOPNOTSUPP; + + ret = mlx4_disable_rx_port_check(dev, enable); + if (ret) { + mlx4_err(dev, "Fail to %s rx port check\n", + enable ? "enable" : "disable"); + return ret; + } + if (enable) { + dev->flags |= MLX4_FLAG_BONDED; + } else { + ret = mlx4_virt2phy_port_map(dev, 1, 2); + if (ret) { + mlx4_err(dev, "Fail to reset port map\n"); + return ret; + } + dev->flags &= ~MLX4_FLAG_BONDED; + } + + spin_lock_irqsave(&priv->ctx_lock, flags); + list_for_each_entry_safe(dev_ctx, temp_dev_ctx, &priv->ctx_list, list) { + if (dev_ctx->intf->flags & MLX4_INTFF_BONDING) { + list_add_tail(&dev_ctx->bond_list, &bond_list); + list_del(&dev_ctx->list); + } + } + spin_unlock_irqrestore(&priv->ctx_lock, flags); + + list_for_each_entry(dev_ctx, &bond_list, bond_list) { + dev_ctx->intf->remove(dev, dev_ctx->context); + dev_ctx->context = dev_ctx->intf->add(dev); + + spin_lock_irqsave(&priv->ctx_lock, flags); + list_add_tail(&dev_ctx->list, &priv->ctx_list); + spin_unlock_irqrestore(&priv->ctx_lock, flags); + + mlx4_dbg(dev, "Interface for protocol %d restarted with bonded mode %s\n", + dev_ctx->intf->protocol, enable ? + "enabled" : "disabled"); + } + return 0; +} + +void mlx4_dispatch_event(struct mlx4_dev *dev, enum mlx4_dev_event type, + unsigned long param) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_device_context *dev_ctx; + unsigned long flags; + + spin_lock_irqsave(&priv->ctx_lock, flags); + + list_for_each_entry(dev_ctx, &priv->ctx_list, list) + if (dev_ctx->intf->event) + dev_ctx->intf->event(dev, dev_ctx->context, type, param); + + spin_unlock_irqrestore(&priv->ctx_lock, flags); +} + +int mlx4_register_device(struct mlx4_dev *dev) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_interface *intf; + + mutex_lock(&intf_mutex); + + dev->persist->interface_state |= MLX4_INTERFACE_STATE_UP; + list_add_tail(&priv->dev_list, &dev_list); + list_for_each_entry(intf, &intf_list, list) + mlx4_add_device(intf, priv); + + mutex_unlock(&intf_mutex); + mlx4_start_catas_poll(dev); + + return 0; +} + +void mlx4_unregister_device(struct mlx4_dev *dev) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_interface *intf; + + if (!(dev->persist->interface_state & MLX4_INTERFACE_STATE_UP)) + return; + + mlx4_stop_catas_poll(dev); + if (dev->persist->interface_state & MLX4_INTERFACE_STATE_DELETION && + mlx4_is_slave(dev)) { + /* In mlx4_remove_one on a VF */ + u32 slave_read = + swab32(readl(&mlx4_priv(dev)->mfunc.comm->slave_read)); + + if (mlx4_comm_internal_err(slave_read)) { + mlx4_dbg(dev, "%s: comm channel is down, entering error state.\n", + __func__); + mlx4_enter_error_state(dev->persist); + } + } + mutex_lock(&intf_mutex); + + list_for_each_entry(intf, &intf_list, list) + mlx4_remove_device(intf, priv); + + list_del(&priv->dev_list); + dev->persist->interface_state &= ~MLX4_INTERFACE_STATE_UP; + + mutex_unlock(&intf_mutex); +} + +void *mlx4_get_protocol_dev(struct mlx4_dev *dev, enum mlx4_protocol proto, int port) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_device_context *dev_ctx; + unsigned long flags; + void *result = NULL; + + spin_lock_irqsave(&priv->ctx_lock, flags); + + list_for_each_entry(dev_ctx, &priv->ctx_list, list) + if (dev_ctx->intf->protocol == proto && dev_ctx->intf->get_dev) { + result = dev_ctx->intf->get_dev(dev, dev_ctx->context, port); + break; + } + + spin_unlock_irqrestore(&priv->ctx_lock, flags); + + return result; +} +EXPORT_SYMBOL_GPL(mlx4_get_protocol_dev); + +struct devlink_port *mlx4_get_devlink_port(struct mlx4_dev *dev, int port) +{ + struct mlx4_port_info *info = &mlx4_priv(dev)->port[port]; + + return &info->devlink_port; +} +EXPORT_SYMBOL_GPL(mlx4_get_devlink_port); diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c new file mode 100644 index 000000000..d3fc86cd3 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -0,0 +1,4544 @@ +/* + * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2005, 2006, 2007, 2008 Mellanox Technologies. All rights reserved. + * Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/errno.h> +#include <linux/pci.h> +#include <linux/dma-mapping.h> +#include <linux/slab.h> +#include <linux/io-mapping.h> +#include <linux/delay.h> +#include <linux/kmod.h> +#include <linux/etherdevice.h> +#include <net/devlink.h> + +#include <uapi/rdma/mlx4-abi.h> +#include <linux/mlx4/device.h> +#include <linux/mlx4/doorbell.h> + +#include "mlx4.h" +#include "fw.h" +#include "icm.h" + +MODULE_AUTHOR("Roland Dreier"); +MODULE_DESCRIPTION("Mellanox ConnectX HCA low-level driver"); +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_VERSION(DRV_VERSION); + +struct workqueue_struct *mlx4_wq; + +#ifdef CONFIG_MLX4_DEBUG + +int mlx4_debug_level; /* 0 by default */ +module_param_named(debug_level, mlx4_debug_level, int, 0644); +MODULE_PARM_DESC(debug_level, "Enable debug tracing if > 0"); + +#endif /* CONFIG_MLX4_DEBUG */ + +#ifdef CONFIG_PCI_MSI + +static int msi_x = 1; +module_param(msi_x, int, 0444); +MODULE_PARM_DESC(msi_x, "0 - don't use MSI-X, 1 - use MSI-X, >1 - limit number of MSI-X irqs to msi_x"); + +#else /* CONFIG_PCI_MSI */ + +#define msi_x (0) + +#endif /* CONFIG_PCI_MSI */ + +static uint8_t num_vfs[3] = {0, 0, 0}; +static int num_vfs_argc; +module_param_array(num_vfs, byte, &num_vfs_argc, 0444); +MODULE_PARM_DESC(num_vfs, "enable #num_vfs functions if num_vfs > 0\n" + "num_vfs=port1,port2,port1+2"); + +static uint8_t probe_vf[3] = {0, 0, 0}; +static int probe_vfs_argc; +module_param_array(probe_vf, byte, &probe_vfs_argc, 0444); +MODULE_PARM_DESC(probe_vf, "number of vfs to probe by pf driver (num_vfs > 0)\n" + "probe_vf=port1,port2,port1+2"); + +static int mlx4_log_num_mgm_entry_size = MLX4_DEFAULT_MGM_LOG_ENTRY_SIZE; +module_param_named(log_num_mgm_entry_size, + mlx4_log_num_mgm_entry_size, int, 0444); +MODULE_PARM_DESC(log_num_mgm_entry_size, "log mgm size, that defines the num" + " of qp per mcg, for example:" + " 10 gives 248.range: 7 <=" + " log_num_mgm_entry_size <= 12." + " To activate device managed" + " flow steering when available, set to -1"); + +static bool enable_64b_cqe_eqe = true; +module_param(enable_64b_cqe_eqe, bool, 0444); +MODULE_PARM_DESC(enable_64b_cqe_eqe, + "Enable 64 byte CQEs/EQEs when the FW supports this (default: True)"); + +static bool enable_4k_uar; +module_param(enable_4k_uar, bool, 0444); +MODULE_PARM_DESC(enable_4k_uar, + "Enable using 4K UAR. Should not be enabled if have VFs which do not support 4K UARs (default: false)"); + +#define PF_CONTEXT_BEHAVIOUR_MASK (MLX4_FUNC_CAP_64B_EQE_CQE | \ + MLX4_FUNC_CAP_EQE_CQE_STRIDE | \ + MLX4_FUNC_CAP_DMFS_A0_STATIC) + +#define RESET_PERSIST_MASK_FLAGS (MLX4_FLAG_SRIOV) + +static char mlx4_version[] = + DRV_NAME ": Mellanox ConnectX core driver v" + DRV_VERSION "\n"; + +static const struct mlx4_profile default_profile = { + .num_qp = 1 << 18, + .num_srq = 1 << 16, + .rdmarc_per_qp = 1 << 4, + .num_cq = 1 << 16, + .num_mcg = 1 << 13, + .num_mpt = 1 << 19, + .num_mtt = 1 << 20, /* It is really num mtt segements */ +}; + +static const struct mlx4_profile low_mem_profile = { + .num_qp = 1 << 17, + .num_srq = 1 << 6, + .rdmarc_per_qp = 1 << 4, + .num_cq = 1 << 8, + .num_mcg = 1 << 8, + .num_mpt = 1 << 9, + .num_mtt = 1 << 7, +}; + +static int log_num_mac = 7; +module_param_named(log_num_mac, log_num_mac, int, 0444); +MODULE_PARM_DESC(log_num_mac, "Log2 max number of MACs per ETH port (1-7)"); + +static int log_num_vlan; +module_param_named(log_num_vlan, log_num_vlan, int, 0444); +MODULE_PARM_DESC(log_num_vlan, "Log2 max number of VLANs per ETH port (0-7)"); +/* Log2 max number of VLANs per ETH port (0-7) */ +#define MLX4_LOG_NUM_VLANS 7 +#define MLX4_MIN_LOG_NUM_VLANS 0 +#define MLX4_MIN_LOG_NUM_MAC 1 + +static bool use_prio; +module_param_named(use_prio, use_prio, bool, 0444); +MODULE_PARM_DESC(use_prio, "Enable steering by VLAN priority on ETH ports (deprecated)"); + +int log_mtts_per_seg = ilog2(1); +module_param_named(log_mtts_per_seg, log_mtts_per_seg, int, 0444); +MODULE_PARM_DESC(log_mtts_per_seg, "Log2 number of MTT entries per segment " + "(0-7) (default: 0)"); + +static int port_type_array[2] = {MLX4_PORT_TYPE_NONE, MLX4_PORT_TYPE_NONE}; +static int arr_argc = 2; +module_param_array(port_type_array, int, &arr_argc, 0444); +MODULE_PARM_DESC(port_type_array, "Array of port types: HW_DEFAULT (0) is default " + "1 for IB, 2 for Ethernet"); + +struct mlx4_port_config { + struct list_head list; + enum mlx4_port_type port_type[MLX4_MAX_PORTS + 1]; + struct pci_dev *pdev; +}; + +static atomic_t pf_loading = ATOMIC_INIT(0); + +static int mlx4_devlink_ierr_reset_get(struct devlink *devlink, u32 id, + struct devlink_param_gset_ctx *ctx) +{ + ctx->val.vbool = !!mlx4_internal_err_reset; + return 0; +} + +static int mlx4_devlink_ierr_reset_set(struct devlink *devlink, u32 id, + struct devlink_param_gset_ctx *ctx) +{ + mlx4_internal_err_reset = ctx->val.vbool; + return 0; +} + +static int mlx4_devlink_crdump_snapshot_get(struct devlink *devlink, u32 id, + struct devlink_param_gset_ctx *ctx) +{ + struct mlx4_priv *priv = devlink_priv(devlink); + struct mlx4_dev *dev = &priv->dev; + + ctx->val.vbool = dev->persist->crdump.snapshot_enable; + return 0; +} + +static int mlx4_devlink_crdump_snapshot_set(struct devlink *devlink, u32 id, + struct devlink_param_gset_ctx *ctx) +{ + struct mlx4_priv *priv = devlink_priv(devlink); + struct mlx4_dev *dev = &priv->dev; + + dev->persist->crdump.snapshot_enable = ctx->val.vbool; + return 0; +} + +static int +mlx4_devlink_max_macs_validate(struct devlink *devlink, u32 id, + union devlink_param_value val, + struct netlink_ext_ack *extack) +{ + u32 value = val.vu32; + + if (value < 1 || value > 128) + return -ERANGE; + + if (!is_power_of_2(value)) { + NL_SET_ERR_MSG_MOD(extack, "max_macs supported must be power of 2"); + return -EINVAL; + } + + return 0; +} + +enum mlx4_devlink_param_id { + MLX4_DEVLINK_PARAM_ID_BASE = DEVLINK_PARAM_GENERIC_ID_MAX, + MLX4_DEVLINK_PARAM_ID_ENABLE_64B_CQE_EQE, + MLX4_DEVLINK_PARAM_ID_ENABLE_4K_UAR, +}; + +static const struct devlink_param mlx4_devlink_params[] = { + DEVLINK_PARAM_GENERIC(INT_ERR_RESET, + BIT(DEVLINK_PARAM_CMODE_RUNTIME) | + BIT(DEVLINK_PARAM_CMODE_DRIVERINIT), + mlx4_devlink_ierr_reset_get, + mlx4_devlink_ierr_reset_set, NULL), + DEVLINK_PARAM_GENERIC(MAX_MACS, + BIT(DEVLINK_PARAM_CMODE_DRIVERINIT), + NULL, NULL, mlx4_devlink_max_macs_validate), + DEVLINK_PARAM_GENERIC(REGION_SNAPSHOT, + BIT(DEVLINK_PARAM_CMODE_RUNTIME) | + BIT(DEVLINK_PARAM_CMODE_DRIVERINIT), + mlx4_devlink_crdump_snapshot_get, + mlx4_devlink_crdump_snapshot_set, NULL), + DEVLINK_PARAM_DRIVER(MLX4_DEVLINK_PARAM_ID_ENABLE_64B_CQE_EQE, + "enable_64b_cqe_eqe", DEVLINK_PARAM_TYPE_BOOL, + BIT(DEVLINK_PARAM_CMODE_DRIVERINIT), + NULL, NULL, NULL), + DEVLINK_PARAM_DRIVER(MLX4_DEVLINK_PARAM_ID_ENABLE_4K_UAR, + "enable_4k_uar", DEVLINK_PARAM_TYPE_BOOL, + BIT(DEVLINK_PARAM_CMODE_DRIVERINIT), + NULL, NULL, NULL), +}; + +static void mlx4_devlink_set_params_init_values(struct devlink *devlink) +{ + union devlink_param_value value; + + value.vbool = !!mlx4_internal_err_reset; + devlink_param_driverinit_value_set(devlink, + DEVLINK_PARAM_GENERIC_ID_INT_ERR_RESET, + value); + + value.vu32 = 1UL << log_num_mac; + devlink_param_driverinit_value_set(devlink, + DEVLINK_PARAM_GENERIC_ID_MAX_MACS, + value); + + value.vbool = enable_64b_cqe_eqe; + devlink_param_driverinit_value_set(devlink, + MLX4_DEVLINK_PARAM_ID_ENABLE_64B_CQE_EQE, + value); + + value.vbool = enable_4k_uar; + devlink_param_driverinit_value_set(devlink, + MLX4_DEVLINK_PARAM_ID_ENABLE_4K_UAR, + value); + + value.vbool = false; + devlink_param_driverinit_value_set(devlink, + DEVLINK_PARAM_GENERIC_ID_REGION_SNAPSHOT, + value); +} + +static inline void mlx4_set_num_reserved_uars(struct mlx4_dev *dev, + struct mlx4_dev_cap *dev_cap) +{ + /* The reserved_uars is calculated by system page size unit. + * Therefore, adjustment is added when the uar page size is less + * than the system page size + */ + dev->caps.reserved_uars = + max_t(int, + mlx4_get_num_reserved_uar(dev), + dev_cap->reserved_uars / + (1 << (PAGE_SHIFT - dev->uar_page_shift))); +} + +int mlx4_check_port_params(struct mlx4_dev *dev, + enum mlx4_port_type *port_type) +{ + int i; + + if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_DPDP)) { + for (i = 0; i < dev->caps.num_ports - 1; i++) { + if (port_type[i] != port_type[i + 1]) { + mlx4_err(dev, "Only same port types supported on this HCA, aborting\n"); + return -EOPNOTSUPP; + } + } + } + + for (i = 0; i < dev->caps.num_ports; i++) { + if (!(port_type[i] & dev->caps.supported_type[i+1])) { + mlx4_err(dev, "Requested port type for port %d is not supported on this HCA\n", + i + 1); + return -EOPNOTSUPP; + } + } + return 0; +} + +static void mlx4_set_port_mask(struct mlx4_dev *dev) +{ + int i; + + for (i = 1; i <= dev->caps.num_ports; ++i) + dev->caps.port_mask[i] = dev->caps.port_type[i]; +} + +enum { + MLX4_QUERY_FUNC_NUM_SYS_EQS = 1 << 0, +}; + +static int mlx4_query_func(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) +{ + int err = 0; + struct mlx4_func func; + + if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_SYS_EQS) { + err = mlx4_QUERY_FUNC(dev, &func, 0); + if (err) { + mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting.\n"); + return err; + } + dev_cap->max_eqs = func.max_eq; + dev_cap->reserved_eqs = func.rsvd_eqs; + dev_cap->reserved_uars = func.rsvd_uars; + err |= MLX4_QUERY_FUNC_NUM_SYS_EQS; + } + return err; +} + +static void mlx4_enable_cqe_eqe_stride(struct mlx4_dev *dev) +{ + struct mlx4_caps *dev_cap = &dev->caps; + + /* FW not supporting or cancelled by user */ + if (!(dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_EQE_STRIDE) || + !(dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_CQE_STRIDE)) + return; + + /* Must have 64B CQE_EQE enabled by FW to use bigger stride + * When FW has NCSI it may decide not to report 64B CQE/EQEs + */ + if (!(dev_cap->flags & MLX4_DEV_CAP_FLAG_64B_EQE) || + !(dev_cap->flags & MLX4_DEV_CAP_FLAG_64B_CQE)) { + dev_cap->flags2 &= ~MLX4_DEV_CAP_FLAG2_CQE_STRIDE; + dev_cap->flags2 &= ~MLX4_DEV_CAP_FLAG2_EQE_STRIDE; + return; + } + + if (cache_line_size() == 128 || cache_line_size() == 256) { + mlx4_dbg(dev, "Enabling CQE stride cacheLine supported\n"); + /* Changing the real data inside CQE size to 32B */ + dev_cap->flags &= ~MLX4_DEV_CAP_FLAG_64B_CQE; + dev_cap->flags &= ~MLX4_DEV_CAP_FLAG_64B_EQE; + + if (mlx4_is_master(dev)) + dev_cap->function_caps |= MLX4_FUNC_CAP_EQE_CQE_STRIDE; + } else { + if (cache_line_size() != 32 && cache_line_size() != 64) + mlx4_dbg(dev, "Disabling CQE stride, cacheLine size unsupported\n"); + dev_cap->flags2 &= ~MLX4_DEV_CAP_FLAG2_CQE_STRIDE; + dev_cap->flags2 &= ~MLX4_DEV_CAP_FLAG2_EQE_STRIDE; + } +} + +static int _mlx4_dev_port(struct mlx4_dev *dev, int port, + struct mlx4_port_cap *port_cap) +{ + dev->caps.vl_cap[port] = port_cap->max_vl; + dev->caps.ib_mtu_cap[port] = port_cap->ib_mtu; + dev->phys_caps.gid_phys_table_len[port] = port_cap->max_gids; + dev->phys_caps.pkey_phys_table_len[port] = port_cap->max_pkeys; + /* set gid and pkey table operating lengths by default + * to non-sriov values + */ + dev->caps.gid_table_len[port] = port_cap->max_gids; + dev->caps.pkey_table_len[port] = port_cap->max_pkeys; + dev->caps.port_width_cap[port] = port_cap->max_port_width; + dev->caps.eth_mtu_cap[port] = port_cap->eth_mtu; + dev->caps.max_tc_eth = port_cap->max_tc_eth; + dev->caps.def_mac[port] = port_cap->def_mac; + dev->caps.supported_type[port] = port_cap->supported_port_types; + dev->caps.suggested_type[port] = port_cap->suggested_type; + dev->caps.default_sense[port] = port_cap->default_sense; + dev->caps.trans_type[port] = port_cap->trans_type; + dev->caps.vendor_oui[port] = port_cap->vendor_oui; + dev->caps.wavelength[port] = port_cap->wavelength; + dev->caps.trans_code[port] = port_cap->trans_code; + + return 0; +} + +static int mlx4_dev_port(struct mlx4_dev *dev, int port, + struct mlx4_port_cap *port_cap) +{ + int err = 0; + + err = mlx4_QUERY_PORT(dev, port, port_cap); + + if (err) + mlx4_err(dev, "QUERY_PORT command failed.\n"); + + return err; +} + +static inline void mlx4_enable_ignore_fcs(struct mlx4_dev *dev) +{ + if (!(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_IGNORE_FCS)) + return; + + if (mlx4_is_mfunc(dev)) { + mlx4_dbg(dev, "SRIOV mode - Disabling Ignore FCS"); + dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_IGNORE_FCS; + return; + } + + if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_FCS_KEEP)) { + mlx4_dbg(dev, + "Keep FCS is not supported - Disabling Ignore FCS"); + dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_IGNORE_FCS; + return; + } +} + +#define MLX4_A0_STEERING_TABLE_SIZE 256 +static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) +{ + int err; + int i; + + err = mlx4_QUERY_DEV_CAP(dev, dev_cap); + if (err) { + mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting\n"); + return err; + } + mlx4_dev_cap_dump(dev, dev_cap); + + if (dev_cap->min_page_sz > PAGE_SIZE) { + mlx4_err(dev, "HCA minimum page size of %d bigger than kernel PAGE_SIZE of %ld, aborting\n", + dev_cap->min_page_sz, PAGE_SIZE); + return -ENODEV; + } + if (dev_cap->num_ports > MLX4_MAX_PORTS) { + mlx4_err(dev, "HCA has %d ports, but we only support %d, aborting\n", + dev_cap->num_ports, MLX4_MAX_PORTS); + return -ENODEV; + } + + if (dev_cap->uar_size > pci_resource_len(dev->persist->pdev, 2)) { + mlx4_err(dev, "HCA reported UAR size of 0x%x bigger than PCI resource 2 size of 0x%llx, aborting\n", + dev_cap->uar_size, + (unsigned long long) + pci_resource_len(dev->persist->pdev, 2)); + return -ENODEV; + } + + dev->caps.num_ports = dev_cap->num_ports; + dev->caps.num_sys_eqs = dev_cap->num_sys_eqs; + dev->phys_caps.num_phys_eqs = dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_SYS_EQS ? + dev->caps.num_sys_eqs : + MLX4_MAX_EQ_NUM; + for (i = 1; i <= dev->caps.num_ports; ++i) { + err = _mlx4_dev_port(dev, i, dev_cap->port_cap + i); + if (err) { + mlx4_err(dev, "QUERY_PORT command failed, aborting\n"); + return err; + } + } + + dev->caps.map_clock_to_user = dev_cap->map_clock_to_user; + dev->caps.uar_page_size = PAGE_SIZE; + dev->caps.num_uars = dev_cap->uar_size / PAGE_SIZE; + dev->caps.local_ca_ack_delay = dev_cap->local_ca_ack_delay; + dev->caps.bf_reg_size = dev_cap->bf_reg_size; + dev->caps.bf_regs_per_page = dev_cap->bf_regs_per_page; + dev->caps.max_sq_sg = dev_cap->max_sq_sg; + dev->caps.max_rq_sg = dev_cap->max_rq_sg; + dev->caps.max_wqes = dev_cap->max_qp_sz; + dev->caps.max_qp_init_rdma = dev_cap->max_requester_per_qp; + dev->caps.max_srq_wqes = dev_cap->max_srq_sz; + dev->caps.max_srq_sge = dev_cap->max_rq_sg - 1; + dev->caps.reserved_srqs = dev_cap->reserved_srqs; + dev->caps.max_sq_desc_sz = dev_cap->max_sq_desc_sz; + dev->caps.max_rq_desc_sz = dev_cap->max_rq_desc_sz; + /* + * Subtract 1 from the limit because we need to allocate a + * spare CQE to enable resizing the CQ. + */ + dev->caps.max_cqes = dev_cap->max_cq_sz - 1; + dev->caps.reserved_cqs = dev_cap->reserved_cqs; + dev->caps.reserved_eqs = dev_cap->reserved_eqs; + dev->caps.reserved_mtts = dev_cap->reserved_mtts; + dev->caps.reserved_mrws = dev_cap->reserved_mrws; + + dev->caps.reserved_pds = dev_cap->reserved_pds; + dev->caps.reserved_xrcds = (dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) ? + dev_cap->reserved_xrcds : 0; + dev->caps.max_xrcds = (dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) ? + dev_cap->max_xrcds : 0; + dev->caps.mtt_entry_sz = dev_cap->mtt_entry_sz; + + dev->caps.max_msg_sz = dev_cap->max_msg_sz; + dev->caps.page_size_cap = ~(u32) (dev_cap->min_page_sz - 1); + dev->caps.flags = dev_cap->flags; + dev->caps.flags2 = dev_cap->flags2; + dev->caps.bmme_flags = dev_cap->bmme_flags; + dev->caps.reserved_lkey = dev_cap->reserved_lkey; + dev->caps.stat_rate_support = dev_cap->stat_rate_support; + dev->caps.max_gso_sz = dev_cap->max_gso_sz; + dev->caps.max_rss_tbl_sz = dev_cap->max_rss_tbl_sz; + dev->caps.wol_port[1] = dev_cap->wol_port[1]; + dev->caps.wol_port[2] = dev_cap->wol_port[2]; + dev->caps.health_buffer_addrs = dev_cap->health_buffer_addrs; + + /* Save uar page shift */ + if (!mlx4_is_slave(dev)) { + /* Virtual PCI function needs to determine UAR page size from + * firmware. Only master PCI function can set the uar page size + */ + if (enable_4k_uar || !dev->persist->num_vfs) + dev->uar_page_shift = DEFAULT_UAR_PAGE_SHIFT; + else + dev->uar_page_shift = PAGE_SHIFT; + + mlx4_set_num_reserved_uars(dev, dev_cap); + } + + if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_PHV_EN) { + struct mlx4_init_hca_param hca_param; + + memset(&hca_param, 0, sizeof(hca_param)); + err = mlx4_QUERY_HCA(dev, &hca_param); + /* Turn off PHV_EN flag in case phv_check_en is set. + * phv_check_en is a HW check that parse the packet and verify + * phv bit was reported correctly in the wqe. To allow QinQ + * PHV_EN flag should be set and phv_check_en must be cleared + * otherwise QinQ packets will be drop by the HW. + */ + if (err || hca_param.phv_check_en) + dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_PHV_EN; + } + + /* Sense port always allowed on supported devices for ConnectX-1 and -2 */ + if (mlx4_priv(dev)->pci_dev_data & MLX4_PCI_DEV_FORCE_SENSE_PORT) + dev->caps.flags |= MLX4_DEV_CAP_FLAG_SENSE_SUPPORT; + /* Don't do sense port on multifunction devices (for now at least) */ + if (mlx4_is_mfunc(dev)) + dev->caps.flags &= ~MLX4_DEV_CAP_FLAG_SENSE_SUPPORT; + + if (mlx4_low_memory_profile()) { + dev->caps.log_num_macs = MLX4_MIN_LOG_NUM_MAC; + dev->caps.log_num_vlans = MLX4_MIN_LOG_NUM_VLANS; + } else { + dev->caps.log_num_macs = log_num_mac; + dev->caps.log_num_vlans = MLX4_LOG_NUM_VLANS; + } + + for (i = 1; i <= dev->caps.num_ports; ++i) { + dev->caps.port_type[i] = MLX4_PORT_TYPE_NONE; + if (dev->caps.supported_type[i]) { + /* if only ETH is supported - assign ETH */ + if (dev->caps.supported_type[i] == MLX4_PORT_TYPE_ETH) + dev->caps.port_type[i] = MLX4_PORT_TYPE_ETH; + /* if only IB is supported, assign IB */ + else if (dev->caps.supported_type[i] == + MLX4_PORT_TYPE_IB) + dev->caps.port_type[i] = MLX4_PORT_TYPE_IB; + else { + /* if IB and ETH are supported, we set the port + * type according to user selection of port type; + * if user selected none, take the FW hint */ + if (port_type_array[i - 1] == MLX4_PORT_TYPE_NONE) + dev->caps.port_type[i] = dev->caps.suggested_type[i] ? + MLX4_PORT_TYPE_ETH : MLX4_PORT_TYPE_IB; + else + dev->caps.port_type[i] = port_type_array[i - 1]; + } + } + /* + * Link sensing is allowed on the port if 3 conditions are true: + * 1. Both protocols are supported on the port. + * 2. Different types are supported on the port + * 3. FW declared that it supports link sensing + */ + mlx4_priv(dev)->sense.sense_allowed[i] = + ((dev->caps.supported_type[i] == MLX4_PORT_TYPE_AUTO) && + (dev->caps.flags & MLX4_DEV_CAP_FLAG_DPDP) && + (dev->caps.flags & MLX4_DEV_CAP_FLAG_SENSE_SUPPORT)); + + /* + * If "default_sense" bit is set, we move the port to "AUTO" mode + * and perform sense_port FW command to try and set the correct + * port type from beginning + */ + if (mlx4_priv(dev)->sense.sense_allowed[i] && dev->caps.default_sense[i]) { + enum mlx4_port_type sensed_port = MLX4_PORT_TYPE_NONE; + dev->caps.possible_type[i] = MLX4_PORT_TYPE_AUTO; + mlx4_SENSE_PORT(dev, i, &sensed_port); + if (sensed_port != MLX4_PORT_TYPE_NONE) + dev->caps.port_type[i] = sensed_port; + } else { + dev->caps.possible_type[i] = dev->caps.port_type[i]; + } + + if (dev->caps.log_num_macs > dev_cap->port_cap[i].log_max_macs) { + dev->caps.log_num_macs = dev_cap->port_cap[i].log_max_macs; + mlx4_warn(dev, "Requested number of MACs is too much for port %d, reducing to %d\n", + i, 1 << dev->caps.log_num_macs); + } + if (dev->caps.log_num_vlans > dev_cap->port_cap[i].log_max_vlans) { + dev->caps.log_num_vlans = dev_cap->port_cap[i].log_max_vlans; + mlx4_warn(dev, "Requested number of VLANs is too much for port %d, reducing to %d\n", + i, 1 << dev->caps.log_num_vlans); + } + } + + if (mlx4_is_master(dev) && (dev->caps.num_ports == 2) && + (port_type_array[0] == MLX4_PORT_TYPE_IB) && + (port_type_array[1] == MLX4_PORT_TYPE_ETH)) { + mlx4_warn(dev, + "Granular QoS per VF not supported with IB/Eth configuration\n"); + dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_QOS_VPP; + } + + dev->caps.max_counters = dev_cap->max_counters; + + dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW] = dev_cap->reserved_qps; + dev->caps.reserved_qps_cnt[MLX4_QP_REGION_ETH_ADDR] = + dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_ADDR] = + (1 << dev->caps.log_num_macs) * + (1 << dev->caps.log_num_vlans) * + dev->caps.num_ports; + dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_EXCH] = MLX4_NUM_FEXCH; + + if (dev_cap->dmfs_high_rate_qpn_base > 0 && + dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_FS_EN) + dev->caps.dmfs_high_rate_qpn_base = dev_cap->dmfs_high_rate_qpn_base; + else + dev->caps.dmfs_high_rate_qpn_base = + dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW]; + + if (dev_cap->dmfs_high_rate_qpn_range > 0 && + dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_FS_EN) { + dev->caps.dmfs_high_rate_qpn_range = dev_cap->dmfs_high_rate_qpn_range; + dev->caps.dmfs_high_steer_mode = MLX4_STEERING_DMFS_A0_DEFAULT; + dev->caps.flags2 |= MLX4_DEV_CAP_FLAG2_FS_A0; + } else { + dev->caps.dmfs_high_steer_mode = MLX4_STEERING_DMFS_A0_NOT_SUPPORTED; + dev->caps.dmfs_high_rate_qpn_base = + dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW]; + dev->caps.dmfs_high_rate_qpn_range = MLX4_A0_STEERING_TABLE_SIZE; + } + + dev->caps.rl_caps = dev_cap->rl_caps; + + dev->caps.reserved_qps_cnt[MLX4_QP_REGION_RSS_RAW_ETH] = + dev->caps.dmfs_high_rate_qpn_range; + + dev->caps.reserved_qps = dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW] + + dev->caps.reserved_qps_cnt[MLX4_QP_REGION_ETH_ADDR] + + dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_ADDR] + + dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_EXCH]; + + dev->caps.sqp_demux = (mlx4_is_master(dev)) ? MLX4_MAX_NUM_SLAVES : 0; + + if (!enable_64b_cqe_eqe && !mlx4_is_slave(dev)) { + if (dev_cap->flags & + (MLX4_DEV_CAP_FLAG_64B_CQE | MLX4_DEV_CAP_FLAG_64B_EQE)) { + mlx4_warn(dev, "64B EQEs/CQEs supported by the device but not enabled\n"); + dev->caps.flags &= ~MLX4_DEV_CAP_FLAG_64B_CQE; + dev->caps.flags &= ~MLX4_DEV_CAP_FLAG_64B_EQE; + } + + if (dev_cap->flags2 & + (MLX4_DEV_CAP_FLAG2_CQE_STRIDE | + MLX4_DEV_CAP_FLAG2_EQE_STRIDE)) { + mlx4_warn(dev, "Disabling EQE/CQE stride per user request\n"); + dev_cap->flags2 &= ~MLX4_DEV_CAP_FLAG2_CQE_STRIDE; + dev_cap->flags2 &= ~MLX4_DEV_CAP_FLAG2_EQE_STRIDE; + } + } + + if ((dev->caps.flags & + (MLX4_DEV_CAP_FLAG_64B_CQE | MLX4_DEV_CAP_FLAG_64B_EQE)) && + mlx4_is_master(dev)) + dev->caps.function_caps |= MLX4_FUNC_CAP_64B_EQE_CQE; + + if (!mlx4_is_slave(dev)) { + mlx4_enable_cqe_eqe_stride(dev); + dev->caps.alloc_res_qp_mask = + (dev->caps.bf_reg_size ? MLX4_RESERVE_ETH_BF_QP : 0) | + MLX4_RESERVE_A0_QP; + + if (!(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ETS_CFG) && + dev->caps.flags & MLX4_DEV_CAP_FLAG_SET_ETH_SCHED) { + mlx4_warn(dev, "Old device ETS support detected\n"); + mlx4_warn(dev, "Consider upgrading device FW.\n"); + dev->caps.flags2 |= MLX4_DEV_CAP_FLAG2_ETS_CFG; + } + + } else { + dev->caps.alloc_res_qp_mask = 0; + } + + mlx4_enable_ignore_fcs(dev); + + return 0; +} + +/*The function checks if there are live vf, return the num of them*/ +static int mlx4_how_many_lives_vf(struct mlx4_dev *dev) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_slave_state *s_state; + int i; + int ret = 0; + + for (i = 1/*the ppf is 0*/; i < dev->num_slaves; ++i) { + s_state = &priv->mfunc.master.slave_state[i]; + if (s_state->active && s_state->last_cmd != + MLX4_COMM_CMD_RESET) { + mlx4_warn(dev, "%s: slave: %d is still active\n", + __func__, i); + ret++; + } + } + return ret; +} + +int mlx4_get_parav_qkey(struct mlx4_dev *dev, u32 qpn, u32 *qkey) +{ + u32 qk = MLX4_RESERVED_QKEY_BASE; + + if (qpn >= dev->phys_caps.base_tunnel_sqpn + 8 * MLX4_MFUNC_MAX || + qpn < dev->phys_caps.base_proxy_sqpn) + return -EINVAL; + + if (qpn >= dev->phys_caps.base_tunnel_sqpn) + /* tunnel qp */ + qk += qpn - dev->phys_caps.base_tunnel_sqpn; + else + qk += qpn - dev->phys_caps.base_proxy_sqpn; + *qkey = qk; + return 0; +} +EXPORT_SYMBOL(mlx4_get_parav_qkey); + +void mlx4_sync_pkey_table(struct mlx4_dev *dev, int slave, int port, int i, int val) +{ + struct mlx4_priv *priv = container_of(dev, struct mlx4_priv, dev); + + if (!mlx4_is_master(dev)) + return; + + priv->virt2phys_pkey[slave][port - 1][i] = val; +} +EXPORT_SYMBOL(mlx4_sync_pkey_table); + +void mlx4_put_slave_node_guid(struct mlx4_dev *dev, int slave, __be64 guid) +{ + struct mlx4_priv *priv = container_of(dev, struct mlx4_priv, dev); + + if (!mlx4_is_master(dev)) + return; + + priv->slave_node_guids[slave] = guid; +} +EXPORT_SYMBOL(mlx4_put_slave_node_guid); + +__be64 mlx4_get_slave_node_guid(struct mlx4_dev *dev, int slave) +{ + struct mlx4_priv *priv = container_of(dev, struct mlx4_priv, dev); + + if (!mlx4_is_master(dev)) + return 0; + + return priv->slave_node_guids[slave]; +} +EXPORT_SYMBOL(mlx4_get_slave_node_guid); + +int mlx4_is_slave_active(struct mlx4_dev *dev, int slave) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_slave_state *s_slave; + + if (!mlx4_is_master(dev)) + return 0; + + s_slave = &priv->mfunc.master.slave_state[slave]; + return !!s_slave->active; +} +EXPORT_SYMBOL(mlx4_is_slave_active); + +void mlx4_handle_eth_header_mcast_prio(struct mlx4_net_trans_rule_hw_ctrl *ctrl, + struct _rule_hw *eth_header) +{ + if (is_multicast_ether_addr(eth_header->eth.dst_mac) || + is_broadcast_ether_addr(eth_header->eth.dst_mac)) { + struct mlx4_net_trans_rule_hw_eth *eth = + (struct mlx4_net_trans_rule_hw_eth *)eth_header; + struct _rule_hw *next_rule = (struct _rule_hw *)(eth + 1); + bool last_rule = next_rule->size == 0 && next_rule->id == 0 && + next_rule->rsvd == 0; + + if (last_rule) + ctrl->prio = cpu_to_be16(MLX4_DOMAIN_NIC); + } +} +EXPORT_SYMBOL(mlx4_handle_eth_header_mcast_prio); + +static void slave_adjust_steering_mode(struct mlx4_dev *dev, + struct mlx4_dev_cap *dev_cap, + struct mlx4_init_hca_param *hca_param) +{ + dev->caps.steering_mode = hca_param->steering_mode; + if (dev->caps.steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED) { + dev->caps.num_qp_per_mgm = dev_cap->fs_max_num_qp_per_entry; + dev->caps.fs_log_max_ucast_qp_range_size = + dev_cap->fs_log_max_ucast_qp_range_size; + } else + dev->caps.num_qp_per_mgm = + 4 * ((1 << hca_param->log_mc_entry_sz)/16 - 2); + + mlx4_dbg(dev, "Steering mode is: %s\n", + mlx4_steering_mode_str(dev->caps.steering_mode)); +} + +static void mlx4_slave_destroy_special_qp_cap(struct mlx4_dev *dev) +{ + kfree(dev->caps.spec_qps); + dev->caps.spec_qps = NULL; +} + +static int mlx4_slave_special_qp_cap(struct mlx4_dev *dev) +{ + struct mlx4_func_cap *func_cap = NULL; + struct mlx4_caps *caps = &dev->caps; + int i, err = 0; + + func_cap = kzalloc(sizeof(*func_cap), GFP_KERNEL); + caps->spec_qps = kcalloc(caps->num_ports, sizeof(*caps->spec_qps), GFP_KERNEL); + + if (!func_cap || !caps->spec_qps) { + mlx4_err(dev, "Failed to allocate memory for special qps cap\n"); + err = -ENOMEM; + goto err_mem; + } + + for (i = 1; i <= caps->num_ports; ++i) { + err = mlx4_QUERY_FUNC_CAP(dev, i, func_cap); + if (err) { + mlx4_err(dev, "QUERY_FUNC_CAP port command failed for port %d, aborting (%d)\n", + i, err); + goto err_mem; + } + caps->spec_qps[i - 1] = func_cap->spec_qps; + caps->port_mask[i] = caps->port_type[i]; + caps->phys_port_id[i] = func_cap->phys_port_id; + err = mlx4_get_slave_pkey_gid_tbl_len(dev, i, + &caps->gid_table_len[i], + &caps->pkey_table_len[i]); + if (err) { + mlx4_err(dev, "QUERY_PORT command failed for port %d, aborting (%d)\n", + i, err); + goto err_mem; + } + } + +err_mem: + if (err) + mlx4_slave_destroy_special_qp_cap(dev); + kfree(func_cap); + return err; +} + +static int mlx4_slave_cap(struct mlx4_dev *dev) +{ + int err; + u32 page_size; + struct mlx4_dev_cap *dev_cap = NULL; + struct mlx4_func_cap *func_cap = NULL; + struct mlx4_init_hca_param *hca_param = NULL; + + hca_param = kzalloc(sizeof(*hca_param), GFP_KERNEL); + func_cap = kzalloc(sizeof(*func_cap), GFP_KERNEL); + dev_cap = kzalloc(sizeof(*dev_cap), GFP_KERNEL); + if (!hca_param || !func_cap || !dev_cap) { + mlx4_err(dev, "Failed to allocate memory for slave_cap\n"); + err = -ENOMEM; + goto free_mem; + } + + err = mlx4_QUERY_HCA(dev, hca_param); + if (err) { + mlx4_err(dev, "QUERY_HCA command failed, aborting\n"); + goto free_mem; + } + + /* fail if the hca has an unknown global capability + * at this time global_caps should be always zeroed + */ + if (hca_param->global_caps) { + mlx4_err(dev, "Unknown hca global capabilities\n"); + err = -EINVAL; + goto free_mem; + } + + dev->caps.hca_core_clock = hca_param->hca_core_clock; + + dev->caps.max_qp_dest_rdma = 1 << hca_param->log_rd_per_qp; + err = mlx4_dev_cap(dev, dev_cap); + if (err) { + mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting\n"); + goto free_mem; + } + + err = mlx4_QUERY_FW(dev); + if (err) + mlx4_err(dev, "QUERY_FW command failed: could not get FW version\n"); + + page_size = ~dev->caps.page_size_cap + 1; + mlx4_warn(dev, "HCA minimum page size:%d\n", page_size); + if (page_size > PAGE_SIZE) { + mlx4_err(dev, "HCA minimum page size of %d bigger than kernel PAGE_SIZE of %ld, aborting\n", + page_size, PAGE_SIZE); + err = -ENODEV; + goto free_mem; + } + + /* Set uar_page_shift for VF */ + dev->uar_page_shift = hca_param->uar_page_sz + 12; + + /* Make sure the master uar page size is valid */ + if (dev->uar_page_shift > PAGE_SHIFT) { + mlx4_err(dev, + "Invalid configuration: uar page size is larger than system page size\n"); + err = -ENODEV; + goto free_mem; + } + + /* Set reserved_uars based on the uar_page_shift */ + mlx4_set_num_reserved_uars(dev, dev_cap); + + /* Although uar page size in FW differs from system page size, + * upper software layers (mlx4_ib, mlx4_en and part of mlx4_core) + * still works with assumption that uar page size == system page size + */ + dev->caps.uar_page_size = PAGE_SIZE; + + err = mlx4_QUERY_FUNC_CAP(dev, 0, func_cap); + if (err) { + mlx4_err(dev, "QUERY_FUNC_CAP general command failed, aborting (%d)\n", + err); + goto free_mem; + } + + if ((func_cap->pf_context_behaviour | PF_CONTEXT_BEHAVIOUR_MASK) != + PF_CONTEXT_BEHAVIOUR_MASK) { + mlx4_err(dev, "Unknown pf context behaviour %x known flags %x\n", + func_cap->pf_context_behaviour, + PF_CONTEXT_BEHAVIOUR_MASK); + err = -EINVAL; + goto free_mem; + } + + dev->caps.num_ports = func_cap->num_ports; + dev->quotas.qp = func_cap->qp_quota; + dev->quotas.srq = func_cap->srq_quota; + dev->quotas.cq = func_cap->cq_quota; + dev->quotas.mpt = func_cap->mpt_quota; + dev->quotas.mtt = func_cap->mtt_quota; + dev->caps.num_qps = 1 << hca_param->log_num_qps; + dev->caps.num_srqs = 1 << hca_param->log_num_srqs; + dev->caps.num_cqs = 1 << hca_param->log_num_cqs; + dev->caps.num_mpts = 1 << hca_param->log_mpt_sz; + dev->caps.num_eqs = func_cap->max_eq; + dev->caps.reserved_eqs = func_cap->reserved_eq; + dev->caps.reserved_lkey = func_cap->reserved_lkey; + dev->caps.num_pds = MLX4_NUM_PDS; + dev->caps.num_mgms = 0; + dev->caps.num_amgms = 0; + + if (dev->caps.num_ports > MLX4_MAX_PORTS) { + mlx4_err(dev, "HCA has %d ports, but we only support %d, aborting\n", + dev->caps.num_ports, MLX4_MAX_PORTS); + err = -ENODEV; + goto free_mem; + } + + mlx4_replace_zero_macs(dev); + + err = mlx4_slave_special_qp_cap(dev); + if (err) { + mlx4_err(dev, "Set special QP caps failed. aborting\n"); + goto free_mem; + } + + if (dev->caps.uar_page_size * (dev->caps.num_uars - + dev->caps.reserved_uars) > + pci_resource_len(dev->persist->pdev, + 2)) { + mlx4_err(dev, "HCA reported UAR region size of 0x%x bigger than PCI resource 2 size of 0x%llx, aborting\n", + dev->caps.uar_page_size * dev->caps.num_uars, + (unsigned long long) + pci_resource_len(dev->persist->pdev, 2)); + err = -ENOMEM; + goto err_mem; + } + + if (hca_param->dev_cap_enabled & MLX4_DEV_CAP_64B_EQE_ENABLED) { + dev->caps.eqe_size = 64; + dev->caps.eqe_factor = 1; + } else { + dev->caps.eqe_size = 32; + dev->caps.eqe_factor = 0; + } + + if (hca_param->dev_cap_enabled & MLX4_DEV_CAP_64B_CQE_ENABLED) { + dev->caps.cqe_size = 64; + dev->caps.userspace_caps |= MLX4_USER_DEV_CAP_LARGE_CQE; + } else { + dev->caps.cqe_size = 32; + } + + if (hca_param->dev_cap_enabled & MLX4_DEV_CAP_EQE_STRIDE_ENABLED) { + dev->caps.eqe_size = hca_param->eqe_size; + dev->caps.eqe_factor = 0; + } + + if (hca_param->dev_cap_enabled & MLX4_DEV_CAP_CQE_STRIDE_ENABLED) { + dev->caps.cqe_size = hca_param->cqe_size; + /* User still need to know when CQE > 32B */ + dev->caps.userspace_caps |= MLX4_USER_DEV_CAP_LARGE_CQE; + } + + dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_TS; + mlx4_warn(dev, "Timestamping is not supported in slave mode\n"); + + dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_USER_MAC_EN; + mlx4_dbg(dev, "User MAC FW update is not supported in slave mode\n"); + + slave_adjust_steering_mode(dev, dev_cap, hca_param); + mlx4_dbg(dev, "RSS support for IP fragments is %s\n", + hca_param->rss_ip_frags ? "on" : "off"); + + if (func_cap->extra_flags & MLX4_QUERY_FUNC_FLAGS_BF_RES_QP && + dev->caps.bf_reg_size) + dev->caps.alloc_res_qp_mask |= MLX4_RESERVE_ETH_BF_QP; + + if (func_cap->extra_flags & MLX4_QUERY_FUNC_FLAGS_A0_RES_QP) + dev->caps.alloc_res_qp_mask |= MLX4_RESERVE_A0_QP; + +err_mem: + if (err) + mlx4_slave_destroy_special_qp_cap(dev); +free_mem: + kfree(hca_param); + kfree(func_cap); + kfree(dev_cap); + return err; +} + +static void mlx4_request_modules(struct mlx4_dev *dev) +{ + int port; + int has_ib_port = false; + int has_eth_port = false; +#define EN_DRV_NAME "mlx4_en" +#define IB_DRV_NAME "mlx4_ib" + + for (port = 1; port <= dev->caps.num_ports; port++) { + if (dev->caps.port_type[port] == MLX4_PORT_TYPE_IB) + has_ib_port = true; + else if (dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH) + has_eth_port = true; + } + + if (has_eth_port) + request_module_nowait(EN_DRV_NAME); + if (has_ib_port || (dev->caps.flags & MLX4_DEV_CAP_FLAG_IBOE)) + request_module_nowait(IB_DRV_NAME); +} + +/* + * Change the port configuration of the device. + * Every user of this function must hold the port mutex. + */ +int mlx4_change_port_types(struct mlx4_dev *dev, + enum mlx4_port_type *port_types) +{ + int err = 0; + int change = 0; + int port; + + for (port = 0; port < dev->caps.num_ports; port++) { + /* Change the port type only if the new type is different + * from the current, and not set to Auto */ + if (port_types[port] != dev->caps.port_type[port + 1]) + change = 1; + } + if (change) { + mlx4_unregister_device(dev); + for (port = 1; port <= dev->caps.num_ports; port++) { + mlx4_CLOSE_PORT(dev, port); + dev->caps.port_type[port] = port_types[port - 1]; + err = mlx4_SET_PORT(dev, port, -1); + if (err) { + mlx4_err(dev, "Failed to set port %d, aborting\n", + port); + goto out; + } + } + mlx4_set_port_mask(dev); + err = mlx4_register_device(dev); + if (err) { + mlx4_err(dev, "Failed to register device\n"); + goto out; + } + mlx4_request_modules(dev); + } + +out: + return err; +} + +static ssize_t show_port_type(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct mlx4_port_info *info = container_of(attr, struct mlx4_port_info, + port_attr); + struct mlx4_dev *mdev = info->dev; + char type[8]; + + sprintf(type, "%s", + (mdev->caps.port_type[info->port] == MLX4_PORT_TYPE_IB) ? + "ib" : "eth"); + if (mdev->caps.possible_type[info->port] == MLX4_PORT_TYPE_AUTO) + sprintf(buf, "auto (%s)\n", type); + else + sprintf(buf, "%s\n", type); + + return strlen(buf); +} + +static int __set_port_type(struct mlx4_port_info *info, + enum mlx4_port_type port_type) +{ + struct mlx4_dev *mdev = info->dev; + struct mlx4_priv *priv = mlx4_priv(mdev); + enum mlx4_port_type types[MLX4_MAX_PORTS]; + enum mlx4_port_type new_types[MLX4_MAX_PORTS]; + int i; + int err = 0; + + if ((port_type & mdev->caps.supported_type[info->port]) != port_type) { + mlx4_err(mdev, + "Requested port type for port %d is not supported on this HCA\n", + info->port); + return -EOPNOTSUPP; + } + + mlx4_stop_sense(mdev); + mutex_lock(&priv->port_mutex); + info->tmp_type = port_type; + + /* Possible type is always the one that was delivered */ + mdev->caps.possible_type[info->port] = info->tmp_type; + + for (i = 0; i < mdev->caps.num_ports; i++) { + types[i] = priv->port[i+1].tmp_type ? priv->port[i+1].tmp_type : + mdev->caps.possible_type[i+1]; + if (types[i] == MLX4_PORT_TYPE_AUTO) + types[i] = mdev->caps.port_type[i+1]; + } + + if (!(mdev->caps.flags & MLX4_DEV_CAP_FLAG_DPDP) && + !(mdev->caps.flags & MLX4_DEV_CAP_FLAG_SENSE_SUPPORT)) { + for (i = 1; i <= mdev->caps.num_ports; i++) { + if (mdev->caps.possible_type[i] == MLX4_PORT_TYPE_AUTO) { + mdev->caps.possible_type[i] = mdev->caps.port_type[i]; + err = -EOPNOTSUPP; + } + } + } + if (err) { + mlx4_err(mdev, "Auto sensing is not supported on this HCA. Set only 'eth' or 'ib' for both ports (should be the same)\n"); + goto out; + } + + mlx4_do_sense_ports(mdev, new_types, types); + + err = mlx4_check_port_params(mdev, new_types); + if (err) + goto out; + + /* We are about to apply the changes after the configuration + * was verified, no need to remember the temporary types + * any more */ + for (i = 0; i < mdev->caps.num_ports; i++) + priv->port[i + 1].tmp_type = 0; + + err = mlx4_change_port_types(mdev, new_types); + +out: + mlx4_start_sense(mdev); + mutex_unlock(&priv->port_mutex); + + return err; +} + +static ssize_t set_port_type(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct mlx4_port_info *info = container_of(attr, struct mlx4_port_info, + port_attr); + struct mlx4_dev *mdev = info->dev; + enum mlx4_port_type port_type; + static DEFINE_MUTEX(set_port_type_mutex); + int err; + + mutex_lock(&set_port_type_mutex); + + if (!strcmp(buf, "ib\n")) { + port_type = MLX4_PORT_TYPE_IB; + } else if (!strcmp(buf, "eth\n")) { + port_type = MLX4_PORT_TYPE_ETH; + } else if (!strcmp(buf, "auto\n")) { + port_type = MLX4_PORT_TYPE_AUTO; + } else { + mlx4_err(mdev, "%s is not supported port type\n", buf); + err = -EINVAL; + goto err_out; + } + + err = __set_port_type(info, port_type); + +err_out: + mutex_unlock(&set_port_type_mutex); + + return err ? err : count; +} + +enum ibta_mtu { + IB_MTU_256 = 1, + IB_MTU_512 = 2, + IB_MTU_1024 = 3, + IB_MTU_2048 = 4, + IB_MTU_4096 = 5 +}; + +static inline int int_to_ibta_mtu(int mtu) +{ + switch (mtu) { + case 256: return IB_MTU_256; + case 512: return IB_MTU_512; + case 1024: return IB_MTU_1024; + case 2048: return IB_MTU_2048; + case 4096: return IB_MTU_4096; + default: return -1; + } +} + +static inline int ibta_mtu_to_int(enum ibta_mtu mtu) +{ + switch (mtu) { + case IB_MTU_256: return 256; + case IB_MTU_512: return 512; + case IB_MTU_1024: return 1024; + case IB_MTU_2048: return 2048; + case IB_MTU_4096: return 4096; + default: return -1; + } +} + +static ssize_t show_port_ib_mtu(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct mlx4_port_info *info = container_of(attr, struct mlx4_port_info, + port_mtu_attr); + struct mlx4_dev *mdev = info->dev; + + if (mdev->caps.port_type[info->port] == MLX4_PORT_TYPE_ETH) + mlx4_warn(mdev, "port level mtu is only used for IB ports\n"); + + sprintf(buf, "%d\n", + ibta_mtu_to_int(mdev->caps.port_ib_mtu[info->port])); + return strlen(buf); +} + +static ssize_t set_port_ib_mtu(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct mlx4_port_info *info = container_of(attr, struct mlx4_port_info, + port_mtu_attr); + struct mlx4_dev *mdev = info->dev; + struct mlx4_priv *priv = mlx4_priv(mdev); + int err, port, mtu, ibta_mtu = -1; + + if (mdev->caps.port_type[info->port] == MLX4_PORT_TYPE_ETH) { + mlx4_warn(mdev, "port level mtu is only used for IB ports\n"); + return -EINVAL; + } + + err = kstrtoint(buf, 0, &mtu); + if (!err) + ibta_mtu = int_to_ibta_mtu(mtu); + + if (err || ibta_mtu < 0) { + mlx4_err(mdev, "%s is invalid IBTA mtu\n", buf); + return -EINVAL; + } + + mdev->caps.port_ib_mtu[info->port] = ibta_mtu; + + mlx4_stop_sense(mdev); + mutex_lock(&priv->port_mutex); + mlx4_unregister_device(mdev); + for (port = 1; port <= mdev->caps.num_ports; port++) { + mlx4_CLOSE_PORT(mdev, port); + err = mlx4_SET_PORT(mdev, port, -1); + if (err) { + mlx4_err(mdev, "Failed to set port %d, aborting\n", + port); + goto err_set_port; + } + } + err = mlx4_register_device(mdev); +err_set_port: + mutex_unlock(&priv->port_mutex); + mlx4_start_sense(mdev); + return err ? err : count; +} + +/* bond for multi-function device */ +#define MAX_MF_BOND_ALLOWED_SLAVES 63 +static int mlx4_mf_bond(struct mlx4_dev *dev) +{ + int err = 0; + int nvfs; + struct mlx4_slaves_pport slaves_port1; + struct mlx4_slaves_pport slaves_port2; + DECLARE_BITMAP(slaves_port_1_2, MLX4_MFUNC_MAX); + + slaves_port1 = mlx4_phys_to_slaves_pport(dev, 1); + slaves_port2 = mlx4_phys_to_slaves_pport(dev, 2); + bitmap_and(slaves_port_1_2, + slaves_port1.slaves, slaves_port2.slaves, + dev->persist->num_vfs + 1); + + /* only single port vfs are allowed */ + if (bitmap_weight(slaves_port_1_2, dev->persist->num_vfs + 1) > 1) { + mlx4_warn(dev, "HA mode unsupported for dual ported VFs\n"); + return -EINVAL; + } + + /* number of virtual functions is number of total functions minus one + * physical function for each port. + */ + nvfs = bitmap_weight(slaves_port1.slaves, dev->persist->num_vfs + 1) + + bitmap_weight(slaves_port2.slaves, dev->persist->num_vfs + 1) - 2; + + /* limit on maximum allowed VFs */ + if (nvfs > MAX_MF_BOND_ALLOWED_SLAVES) { + mlx4_warn(dev, "HA mode is not supported for %d VFs (max %d are allowed)\n", + nvfs, MAX_MF_BOND_ALLOWED_SLAVES); + return -EINVAL; + } + + if (dev->caps.steering_mode != MLX4_STEERING_MODE_DEVICE_MANAGED) { + mlx4_warn(dev, "HA mode unsupported for NON DMFS steering\n"); + return -EINVAL; + } + + err = mlx4_bond_mac_table(dev); + if (err) + return err; + err = mlx4_bond_vlan_table(dev); + if (err) + goto err1; + err = mlx4_bond_fs_rules(dev); + if (err) + goto err2; + + return 0; +err2: + (void)mlx4_unbond_vlan_table(dev); +err1: + (void)mlx4_unbond_mac_table(dev); + return err; +} + +static int mlx4_mf_unbond(struct mlx4_dev *dev) +{ + int ret, ret1; + + ret = mlx4_unbond_fs_rules(dev); + if (ret) + mlx4_warn(dev, "multifunction unbond for flow rules failed (%d)\n", ret); + ret1 = mlx4_unbond_mac_table(dev); + if (ret1) { + mlx4_warn(dev, "multifunction unbond for MAC table failed (%d)\n", ret1); + ret = ret1; + } + ret1 = mlx4_unbond_vlan_table(dev); + if (ret1) { + mlx4_warn(dev, "multifunction unbond for VLAN table failed (%d)\n", ret1); + ret = ret1; + } + return ret; +} + +int mlx4_bond(struct mlx4_dev *dev) +{ + int ret = 0; + struct mlx4_priv *priv = mlx4_priv(dev); + + mutex_lock(&priv->bond_mutex); + + if (!mlx4_is_bonded(dev)) { + ret = mlx4_do_bond(dev, true); + if (ret) + mlx4_err(dev, "Failed to bond device: %d\n", ret); + if (!ret && mlx4_is_master(dev)) { + ret = mlx4_mf_bond(dev); + if (ret) { + mlx4_err(dev, "bond for multifunction failed\n"); + mlx4_do_bond(dev, false); + } + } + } + + mutex_unlock(&priv->bond_mutex); + if (!ret) + mlx4_dbg(dev, "Device is bonded\n"); + + return ret; +} +EXPORT_SYMBOL_GPL(mlx4_bond); + +int mlx4_unbond(struct mlx4_dev *dev) +{ + int ret = 0; + struct mlx4_priv *priv = mlx4_priv(dev); + + mutex_lock(&priv->bond_mutex); + + if (mlx4_is_bonded(dev)) { + int ret2 = 0; + + ret = mlx4_do_bond(dev, false); + if (ret) + mlx4_err(dev, "Failed to unbond device: %d\n", ret); + if (mlx4_is_master(dev)) + ret2 = mlx4_mf_unbond(dev); + if (ret2) { + mlx4_warn(dev, "Failed to unbond device for multifunction (%d)\n", ret2); + ret = ret2; + } + } + + mutex_unlock(&priv->bond_mutex); + if (!ret) + mlx4_dbg(dev, "Device is unbonded\n"); + + return ret; +} +EXPORT_SYMBOL_GPL(mlx4_unbond); + + +int mlx4_port_map_set(struct mlx4_dev *dev, struct mlx4_port_map *v2p) +{ + u8 port1 = v2p->port1; + u8 port2 = v2p->port2; + struct mlx4_priv *priv = mlx4_priv(dev); + int err; + + if (!(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_PORT_REMAP)) + return -EOPNOTSUPP; + + mutex_lock(&priv->bond_mutex); + + /* zero means keep current mapping for this port */ + if (port1 == 0) + port1 = priv->v2p.port1; + if (port2 == 0) + port2 = priv->v2p.port2; + + if ((port1 < 1) || (port1 > MLX4_MAX_PORTS) || + (port2 < 1) || (port2 > MLX4_MAX_PORTS) || + (port1 == 2 && port2 == 1)) { + /* besides boundary checks cross mapping makes + * no sense and therefore not allowed */ + err = -EINVAL; + } else if ((port1 == priv->v2p.port1) && + (port2 == priv->v2p.port2)) { + err = 0; + } else { + err = mlx4_virt2phy_port_map(dev, port1, port2); + if (!err) { + mlx4_dbg(dev, "port map changed: [%d][%d]\n", + port1, port2); + priv->v2p.port1 = port1; + priv->v2p.port2 = port2; + } else { + mlx4_err(dev, "Failed to change port mape: %d\n", err); + } + } + + mutex_unlock(&priv->bond_mutex); + return err; +} +EXPORT_SYMBOL_GPL(mlx4_port_map_set); + +static int mlx4_load_fw(struct mlx4_dev *dev) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + int err; + + priv->fw.fw_icm = mlx4_alloc_icm(dev, priv->fw.fw_pages, + GFP_HIGHUSER | __GFP_NOWARN, 0); + if (!priv->fw.fw_icm) { + mlx4_err(dev, "Couldn't allocate FW area, aborting\n"); + return -ENOMEM; + } + + err = mlx4_MAP_FA(dev, priv->fw.fw_icm); + if (err) { + mlx4_err(dev, "MAP_FA command failed, aborting\n"); + goto err_free; + } + + err = mlx4_RUN_FW(dev); + if (err) { + mlx4_err(dev, "RUN_FW command failed, aborting\n"); + goto err_unmap_fa; + } + + return 0; + +err_unmap_fa: + mlx4_UNMAP_FA(dev); + +err_free: + mlx4_free_icm(dev, priv->fw.fw_icm, 0); + return err; +} + +static int mlx4_init_cmpt_table(struct mlx4_dev *dev, u64 cmpt_base, + int cmpt_entry_sz) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + int err; + int num_eqs; + + err = mlx4_init_icm_table(dev, &priv->qp_table.cmpt_table, + cmpt_base + + ((u64) (MLX4_CMPT_TYPE_QP * + cmpt_entry_sz) << MLX4_CMPT_SHIFT), + cmpt_entry_sz, dev->caps.num_qps, + dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW], + 0, 0); + if (err) + goto err; + + err = mlx4_init_icm_table(dev, &priv->srq_table.cmpt_table, + cmpt_base + + ((u64) (MLX4_CMPT_TYPE_SRQ * + cmpt_entry_sz) << MLX4_CMPT_SHIFT), + cmpt_entry_sz, dev->caps.num_srqs, + dev->caps.reserved_srqs, 0, 0); + if (err) + goto err_qp; + + err = mlx4_init_icm_table(dev, &priv->cq_table.cmpt_table, + cmpt_base + + ((u64) (MLX4_CMPT_TYPE_CQ * + cmpt_entry_sz) << MLX4_CMPT_SHIFT), + cmpt_entry_sz, dev->caps.num_cqs, + dev->caps.reserved_cqs, 0, 0); + if (err) + goto err_srq; + + num_eqs = dev->phys_caps.num_phys_eqs; + err = mlx4_init_icm_table(dev, &priv->eq_table.cmpt_table, + cmpt_base + + ((u64) (MLX4_CMPT_TYPE_EQ * + cmpt_entry_sz) << MLX4_CMPT_SHIFT), + cmpt_entry_sz, num_eqs, num_eqs, 0, 0); + if (err) + goto err_cq; + + return 0; + +err_cq: + mlx4_cleanup_icm_table(dev, &priv->cq_table.cmpt_table); + +err_srq: + mlx4_cleanup_icm_table(dev, &priv->srq_table.cmpt_table); + +err_qp: + mlx4_cleanup_icm_table(dev, &priv->qp_table.cmpt_table); + +err: + return err; +} + +static int mlx4_init_icm(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap, + struct mlx4_init_hca_param *init_hca, u64 icm_size) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + u64 aux_pages; + int num_eqs; + int err; + + err = mlx4_SET_ICM_SIZE(dev, icm_size, &aux_pages); + if (err) { + mlx4_err(dev, "SET_ICM_SIZE command failed, aborting\n"); + return err; + } + + mlx4_dbg(dev, "%lld KB of HCA context requires %lld KB aux memory\n", + (unsigned long long) icm_size >> 10, + (unsigned long long) aux_pages << 2); + + priv->fw.aux_icm = mlx4_alloc_icm(dev, aux_pages, + GFP_HIGHUSER | __GFP_NOWARN, 0); + if (!priv->fw.aux_icm) { + mlx4_err(dev, "Couldn't allocate aux memory, aborting\n"); + return -ENOMEM; + } + + err = mlx4_MAP_ICM_AUX(dev, priv->fw.aux_icm); + if (err) { + mlx4_err(dev, "MAP_ICM_AUX command failed, aborting\n"); + goto err_free_aux; + } + + err = mlx4_init_cmpt_table(dev, init_hca->cmpt_base, dev_cap->cmpt_entry_sz); + if (err) { + mlx4_err(dev, "Failed to map cMPT context memory, aborting\n"); + goto err_unmap_aux; + } + + + num_eqs = dev->phys_caps.num_phys_eqs; + err = mlx4_init_icm_table(dev, &priv->eq_table.table, + init_hca->eqc_base, dev_cap->eqc_entry_sz, + num_eqs, num_eqs, 0, 0); + if (err) { + mlx4_err(dev, "Failed to map EQ context memory, aborting\n"); + goto err_unmap_cmpt; + } + + /* + * Reserved MTT entries must be aligned up to a cacheline + * boundary, since the FW will write to them, while the driver + * writes to all other MTT entries. (The variable + * dev->caps.mtt_entry_sz below is really the MTT segment + * size, not the raw entry size) + */ + dev->caps.reserved_mtts = + ALIGN(dev->caps.reserved_mtts * dev->caps.mtt_entry_sz, + dma_get_cache_alignment()) / dev->caps.mtt_entry_sz; + + err = mlx4_init_icm_table(dev, &priv->mr_table.mtt_table, + init_hca->mtt_base, + dev->caps.mtt_entry_sz, + dev->caps.num_mtts, + dev->caps.reserved_mtts, 1, 0); + if (err) { + mlx4_err(dev, "Failed to map MTT context memory, aborting\n"); + goto err_unmap_eq; + } + + err = mlx4_init_icm_table(dev, &priv->mr_table.dmpt_table, + init_hca->dmpt_base, + dev_cap->dmpt_entry_sz, + dev->caps.num_mpts, + dev->caps.reserved_mrws, 1, 1); + if (err) { + mlx4_err(dev, "Failed to map dMPT context memory, aborting\n"); + goto err_unmap_mtt; + } + + err = mlx4_init_icm_table(dev, &priv->qp_table.qp_table, + init_hca->qpc_base, + dev_cap->qpc_entry_sz, + dev->caps.num_qps, + dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW], + 0, 0); + if (err) { + mlx4_err(dev, "Failed to map QP context memory, aborting\n"); + goto err_unmap_dmpt; + } + + err = mlx4_init_icm_table(dev, &priv->qp_table.auxc_table, + init_hca->auxc_base, + dev_cap->aux_entry_sz, + dev->caps.num_qps, + dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW], + 0, 0); + if (err) { + mlx4_err(dev, "Failed to map AUXC context memory, aborting\n"); + goto err_unmap_qp; + } + + err = mlx4_init_icm_table(dev, &priv->qp_table.altc_table, + init_hca->altc_base, + dev_cap->altc_entry_sz, + dev->caps.num_qps, + dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW], + 0, 0); + if (err) { + mlx4_err(dev, "Failed to map ALTC context memory, aborting\n"); + goto err_unmap_auxc; + } + + err = mlx4_init_icm_table(dev, &priv->qp_table.rdmarc_table, + init_hca->rdmarc_base, + dev_cap->rdmarc_entry_sz << priv->qp_table.rdmarc_shift, + dev->caps.num_qps, + dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW], + 0, 0); + if (err) { + mlx4_err(dev, "Failed to map RDMARC context memory, aborting\n"); + goto err_unmap_altc; + } + + err = mlx4_init_icm_table(dev, &priv->cq_table.table, + init_hca->cqc_base, + dev_cap->cqc_entry_sz, + dev->caps.num_cqs, + dev->caps.reserved_cqs, 0, 0); + if (err) { + mlx4_err(dev, "Failed to map CQ context memory, aborting\n"); + goto err_unmap_rdmarc; + } + + err = mlx4_init_icm_table(dev, &priv->srq_table.table, + init_hca->srqc_base, + dev_cap->srq_entry_sz, + dev->caps.num_srqs, + dev->caps.reserved_srqs, 0, 0); + if (err) { + mlx4_err(dev, "Failed to map SRQ context memory, aborting\n"); + goto err_unmap_cq; + } + + /* + * For flow steering device managed mode it is required to use + * mlx4_init_icm_table. For B0 steering mode it's not strictly + * required, but for simplicity just map the whole multicast + * group table now. The table isn't very big and it's a lot + * easier than trying to track ref counts. + */ + err = mlx4_init_icm_table(dev, &priv->mcg_table.table, + init_hca->mc_base, + mlx4_get_mgm_entry_size(dev), + dev->caps.num_mgms + dev->caps.num_amgms, + dev->caps.num_mgms + dev->caps.num_amgms, + 0, 0); + if (err) { + mlx4_err(dev, "Failed to map MCG context memory, aborting\n"); + goto err_unmap_srq; + } + + return 0; + +err_unmap_srq: + mlx4_cleanup_icm_table(dev, &priv->srq_table.table); + +err_unmap_cq: + mlx4_cleanup_icm_table(dev, &priv->cq_table.table); + +err_unmap_rdmarc: + mlx4_cleanup_icm_table(dev, &priv->qp_table.rdmarc_table); + +err_unmap_altc: + mlx4_cleanup_icm_table(dev, &priv->qp_table.altc_table); + +err_unmap_auxc: + mlx4_cleanup_icm_table(dev, &priv->qp_table.auxc_table); + +err_unmap_qp: + mlx4_cleanup_icm_table(dev, &priv->qp_table.qp_table); + +err_unmap_dmpt: + mlx4_cleanup_icm_table(dev, &priv->mr_table.dmpt_table); + +err_unmap_mtt: + mlx4_cleanup_icm_table(dev, &priv->mr_table.mtt_table); + +err_unmap_eq: + mlx4_cleanup_icm_table(dev, &priv->eq_table.table); + +err_unmap_cmpt: + mlx4_cleanup_icm_table(dev, &priv->eq_table.cmpt_table); + mlx4_cleanup_icm_table(dev, &priv->cq_table.cmpt_table); + mlx4_cleanup_icm_table(dev, &priv->srq_table.cmpt_table); + mlx4_cleanup_icm_table(dev, &priv->qp_table.cmpt_table); + +err_unmap_aux: + mlx4_UNMAP_ICM_AUX(dev); + +err_free_aux: + mlx4_free_icm(dev, priv->fw.aux_icm, 0); + + return err; +} + +static void mlx4_free_icms(struct mlx4_dev *dev) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + + mlx4_cleanup_icm_table(dev, &priv->mcg_table.table); + mlx4_cleanup_icm_table(dev, &priv->srq_table.table); + mlx4_cleanup_icm_table(dev, &priv->cq_table.table); + mlx4_cleanup_icm_table(dev, &priv->qp_table.rdmarc_table); + mlx4_cleanup_icm_table(dev, &priv->qp_table.altc_table); + mlx4_cleanup_icm_table(dev, &priv->qp_table.auxc_table); + mlx4_cleanup_icm_table(dev, &priv->qp_table.qp_table); + mlx4_cleanup_icm_table(dev, &priv->mr_table.dmpt_table); + mlx4_cleanup_icm_table(dev, &priv->mr_table.mtt_table); + mlx4_cleanup_icm_table(dev, &priv->eq_table.table); + mlx4_cleanup_icm_table(dev, &priv->eq_table.cmpt_table); + mlx4_cleanup_icm_table(dev, &priv->cq_table.cmpt_table); + mlx4_cleanup_icm_table(dev, &priv->srq_table.cmpt_table); + mlx4_cleanup_icm_table(dev, &priv->qp_table.cmpt_table); + + mlx4_UNMAP_ICM_AUX(dev); + mlx4_free_icm(dev, priv->fw.aux_icm, 0); +} + +static void mlx4_slave_exit(struct mlx4_dev *dev) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + + mutex_lock(&priv->cmd.slave_cmd_mutex); + if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0, MLX4_COMM_CMD_NA_OP, + MLX4_COMM_TIME)) + mlx4_warn(dev, "Failed to close slave function\n"); + mutex_unlock(&priv->cmd.slave_cmd_mutex); +} + +static int map_bf_area(struct mlx4_dev *dev) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + resource_size_t bf_start; + resource_size_t bf_len; + int err = 0; + + if (!dev->caps.bf_reg_size) + return -ENXIO; + + bf_start = pci_resource_start(dev->persist->pdev, 2) + + (dev->caps.num_uars << PAGE_SHIFT); + bf_len = pci_resource_len(dev->persist->pdev, 2) - + (dev->caps.num_uars << PAGE_SHIFT); + priv->bf_mapping = io_mapping_create_wc(bf_start, bf_len); + if (!priv->bf_mapping) + err = -ENOMEM; + + return err; +} + +static void unmap_bf_area(struct mlx4_dev *dev) +{ + if (mlx4_priv(dev)->bf_mapping) + io_mapping_free(mlx4_priv(dev)->bf_mapping); +} + +u64 mlx4_read_clock(struct mlx4_dev *dev) +{ + u32 clockhi, clocklo, clockhi1; + u64 cycles; + int i; + struct mlx4_priv *priv = mlx4_priv(dev); + + for (i = 0; i < 10; i++) { + clockhi = swab32(readl(priv->clock_mapping)); + clocklo = swab32(readl(priv->clock_mapping + 4)); + clockhi1 = swab32(readl(priv->clock_mapping)); + if (clockhi == clockhi1) + break; + } + + cycles = (u64) clockhi << 32 | (u64) clocklo; + + return cycles; +} +EXPORT_SYMBOL_GPL(mlx4_read_clock); + + +static int map_internal_clock(struct mlx4_dev *dev) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + + priv->clock_mapping = + ioremap(pci_resource_start(dev->persist->pdev, + priv->fw.clock_bar) + + priv->fw.clock_offset, MLX4_CLOCK_SIZE); + + if (!priv->clock_mapping) + return -ENOMEM; + + return 0; +} + +int mlx4_get_internal_clock_params(struct mlx4_dev *dev, + struct mlx4_clock_params *params) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + + if (mlx4_is_slave(dev)) + return -EOPNOTSUPP; + + if (!dev->caps.map_clock_to_user) { + mlx4_dbg(dev, "Map clock to user is not supported.\n"); + return -EOPNOTSUPP; + } + + if (!params) + return -EINVAL; + + params->bar = priv->fw.clock_bar; + params->offset = priv->fw.clock_offset; + params->size = MLX4_CLOCK_SIZE; + + return 0; +} +EXPORT_SYMBOL_GPL(mlx4_get_internal_clock_params); + +static void unmap_internal_clock(struct mlx4_dev *dev) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + + if (priv->clock_mapping) + iounmap(priv->clock_mapping); +} + +static void mlx4_close_hca(struct mlx4_dev *dev) +{ + unmap_internal_clock(dev); + unmap_bf_area(dev); + if (mlx4_is_slave(dev)) + mlx4_slave_exit(dev); + else { + mlx4_CLOSE_HCA(dev, 0); + mlx4_free_icms(dev); + } +} + +static void mlx4_close_fw(struct mlx4_dev *dev) +{ + if (!mlx4_is_slave(dev)) { + mlx4_UNMAP_FA(dev); + mlx4_free_icm(dev, mlx4_priv(dev)->fw.fw_icm, 0); + } +} + +static int mlx4_comm_check_offline(struct mlx4_dev *dev) +{ +#define COMM_CHAN_OFFLINE_OFFSET 0x09 + + u32 comm_flags; + u32 offline_bit; + unsigned long end; + struct mlx4_priv *priv = mlx4_priv(dev); + + end = msecs_to_jiffies(MLX4_COMM_OFFLINE_TIME_OUT) + jiffies; + while (time_before(jiffies, end)) { + comm_flags = swab32(readl((__iomem char *)priv->mfunc.comm + + MLX4_COMM_CHAN_FLAGS)); + offline_bit = (comm_flags & + (u32)(1 << COMM_CHAN_OFFLINE_OFFSET)); + if (!offline_bit) + return 0; + + /* If device removal has been requested, + * do not continue retrying. + */ + if (dev->persist->interface_state & + MLX4_INTERFACE_STATE_NOWAIT) + break; + + /* There are cases as part of AER/Reset flow that PF needs + * around 100 msec to load. We therefore sleep for 100 msec + * to allow other tasks to make use of that CPU during this + * time interval. + */ + msleep(100); + } + mlx4_err(dev, "Communication channel is offline.\n"); + return -EIO; +} + +static void mlx4_reset_vf_support(struct mlx4_dev *dev) +{ +#define COMM_CHAN_RST_OFFSET 0x1e + + struct mlx4_priv *priv = mlx4_priv(dev); + u32 comm_rst; + u32 comm_caps; + + comm_caps = swab32(readl((__iomem char *)priv->mfunc.comm + + MLX4_COMM_CHAN_CAPS)); + comm_rst = (comm_caps & (u32)(1 << COMM_CHAN_RST_OFFSET)); + + if (comm_rst) + dev->caps.vf_caps |= MLX4_VF_CAP_FLAG_RESET; +} + +static int mlx4_init_slave(struct mlx4_dev *dev) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + u64 dma = (u64) priv->mfunc.vhcr_dma; + int ret_from_reset = 0; + u32 slave_read; + u32 cmd_channel_ver; + + if (atomic_read(&pf_loading)) { + mlx4_warn(dev, "PF is not ready - Deferring probe\n"); + return -EPROBE_DEFER; + } + + mutex_lock(&priv->cmd.slave_cmd_mutex); + priv->cmd.max_cmds = 1; + if (mlx4_comm_check_offline(dev)) { + mlx4_err(dev, "PF is not responsive, skipping initialization\n"); + goto err_offline; + } + + mlx4_reset_vf_support(dev); + mlx4_warn(dev, "Sending reset\n"); + ret_from_reset = mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0, + MLX4_COMM_CMD_NA_OP, MLX4_COMM_TIME); + /* if we are in the middle of flr the slave will try + * NUM_OF_RESET_RETRIES times before leaving.*/ + if (ret_from_reset) { + if (MLX4_DELAY_RESET_SLAVE == ret_from_reset) { + mlx4_warn(dev, "slave is currently in the middle of FLR - Deferring probe\n"); + mutex_unlock(&priv->cmd.slave_cmd_mutex); + return -EPROBE_DEFER; + } else + goto err; + } + + /* check the driver version - the slave I/F revision + * must match the master's */ + slave_read = swab32(readl(&priv->mfunc.comm->slave_read)); + cmd_channel_ver = mlx4_comm_get_version(); + + if (MLX4_COMM_GET_IF_REV(cmd_channel_ver) != + MLX4_COMM_GET_IF_REV(slave_read)) { + mlx4_err(dev, "slave driver version is not supported by the master\n"); + goto err; + } + + mlx4_warn(dev, "Sending vhcr0\n"); + if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR0, dma >> 48, + MLX4_COMM_CMD_NA_OP, MLX4_COMM_TIME)) + goto err; + if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR1, dma >> 32, + MLX4_COMM_CMD_NA_OP, MLX4_COMM_TIME)) + goto err; + if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR2, dma >> 16, + MLX4_COMM_CMD_NA_OP, MLX4_COMM_TIME)) + goto err; + if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR_EN, dma, + MLX4_COMM_CMD_NA_OP, MLX4_COMM_TIME)) + goto err; + + mutex_unlock(&priv->cmd.slave_cmd_mutex); + return 0; + +err: + mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0, MLX4_COMM_CMD_NA_OP, 0); +err_offline: + mutex_unlock(&priv->cmd.slave_cmd_mutex); + return -EIO; +} + +static void mlx4_parav_master_pf_caps(struct mlx4_dev *dev) +{ + int i; + + for (i = 1; i <= dev->caps.num_ports; i++) { + if (dev->caps.port_type[i] == MLX4_PORT_TYPE_ETH) + dev->caps.gid_table_len[i] = + mlx4_get_slave_num_gids(dev, 0, i); + else + dev->caps.gid_table_len[i] = 1; + dev->caps.pkey_table_len[i] = + dev->phys_caps.pkey_phys_table_len[i] - 1; + } +} + +static int choose_log_fs_mgm_entry_size(int qp_per_entry) +{ + int i = MLX4_MIN_MGM_LOG_ENTRY_SIZE; + + for (i = MLX4_MIN_MGM_LOG_ENTRY_SIZE; i <= MLX4_MAX_MGM_LOG_ENTRY_SIZE; + i++) { + if (qp_per_entry <= 4 * ((1 << i) / 16 - 2)) + break; + } + + return (i <= MLX4_MAX_MGM_LOG_ENTRY_SIZE) ? i : -1; +} + +static const char *dmfs_high_rate_steering_mode_str(int dmfs_high_steer_mode) +{ + switch (dmfs_high_steer_mode) { + case MLX4_STEERING_DMFS_A0_DEFAULT: + return "default performance"; + + case MLX4_STEERING_DMFS_A0_DYNAMIC: + return "dynamic hybrid mode"; + + case MLX4_STEERING_DMFS_A0_STATIC: + return "performance optimized for limited rule configuration (static)"; + + case MLX4_STEERING_DMFS_A0_DISABLE: + return "disabled performance optimized steering"; + + case MLX4_STEERING_DMFS_A0_NOT_SUPPORTED: + return "performance optimized steering not supported"; + + default: + return "Unrecognized mode"; + } +} + +#define MLX4_DMFS_A0_STEERING (1UL << 2) + +static void choose_steering_mode(struct mlx4_dev *dev, + struct mlx4_dev_cap *dev_cap) +{ + if (mlx4_log_num_mgm_entry_size <= 0) { + if ((-mlx4_log_num_mgm_entry_size) & MLX4_DMFS_A0_STEERING) { + if (dev->caps.dmfs_high_steer_mode == + MLX4_STEERING_DMFS_A0_NOT_SUPPORTED) + mlx4_err(dev, "DMFS high rate mode not supported\n"); + else + dev->caps.dmfs_high_steer_mode = + MLX4_STEERING_DMFS_A0_STATIC; + } + } + + if (mlx4_log_num_mgm_entry_size <= 0 && + dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_FS_EN && + (!mlx4_is_mfunc(dev) || + (dev_cap->fs_max_num_qp_per_entry >= + (dev->persist->num_vfs + 1))) && + choose_log_fs_mgm_entry_size(dev_cap->fs_max_num_qp_per_entry) >= + MLX4_MIN_MGM_LOG_ENTRY_SIZE) { + dev->oper_log_mgm_entry_size = + choose_log_fs_mgm_entry_size(dev_cap->fs_max_num_qp_per_entry); + dev->caps.steering_mode = MLX4_STEERING_MODE_DEVICE_MANAGED; + dev->caps.num_qp_per_mgm = dev_cap->fs_max_num_qp_per_entry; + dev->caps.fs_log_max_ucast_qp_range_size = + dev_cap->fs_log_max_ucast_qp_range_size; + } else { + if (dev->caps.dmfs_high_steer_mode != + MLX4_STEERING_DMFS_A0_NOT_SUPPORTED) + dev->caps.dmfs_high_steer_mode = MLX4_STEERING_DMFS_A0_DISABLE; + if (dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_UC_STEER && + dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_MC_STEER) + dev->caps.steering_mode = MLX4_STEERING_MODE_B0; + else { + dev->caps.steering_mode = MLX4_STEERING_MODE_A0; + + if (dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_UC_STEER || + dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_MC_STEER) + mlx4_warn(dev, "Must have both UC_STEER and MC_STEER flags set to use B0 steering - falling back to A0 steering mode\n"); + } + dev->oper_log_mgm_entry_size = + mlx4_log_num_mgm_entry_size > 0 ? + mlx4_log_num_mgm_entry_size : + MLX4_DEFAULT_MGM_LOG_ENTRY_SIZE; + dev->caps.num_qp_per_mgm = mlx4_get_qp_per_mgm(dev); + } + mlx4_dbg(dev, "Steering mode is: %s, oper_log_mgm_entry_size = %d, modparam log_num_mgm_entry_size = %d\n", + mlx4_steering_mode_str(dev->caps.steering_mode), + dev->oper_log_mgm_entry_size, + mlx4_log_num_mgm_entry_size); +} + +static void choose_tunnel_offload_mode(struct mlx4_dev *dev, + struct mlx4_dev_cap *dev_cap) +{ + if (dev->caps.steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED && + dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_VXLAN_OFFLOADS) + dev->caps.tunnel_offload_mode = MLX4_TUNNEL_OFFLOAD_MODE_VXLAN; + else + dev->caps.tunnel_offload_mode = MLX4_TUNNEL_OFFLOAD_MODE_NONE; + + mlx4_dbg(dev, "Tunneling offload mode is: %s\n", (dev->caps.tunnel_offload_mode + == MLX4_TUNNEL_OFFLOAD_MODE_VXLAN) ? "vxlan" : "none"); +} + +static int mlx4_validate_optimized_steering(struct mlx4_dev *dev) +{ + int i; + struct mlx4_port_cap port_cap; + + if (dev->caps.dmfs_high_steer_mode == MLX4_STEERING_DMFS_A0_NOT_SUPPORTED) + return -EINVAL; + + for (i = 1; i <= dev->caps.num_ports; i++) { + if (mlx4_dev_port(dev, i, &port_cap)) { + mlx4_err(dev, + "QUERY_DEV_CAP command failed, can't verify DMFS high rate steering.\n"); + } else if ((dev->caps.dmfs_high_steer_mode != + MLX4_STEERING_DMFS_A0_DEFAULT) && + (port_cap.dmfs_optimized_state == + !!(dev->caps.dmfs_high_steer_mode == + MLX4_STEERING_DMFS_A0_DISABLE))) { + mlx4_err(dev, + "DMFS high rate steer mode differ, driver requested %s but %s in FW.\n", + dmfs_high_rate_steering_mode_str( + dev->caps.dmfs_high_steer_mode), + (port_cap.dmfs_optimized_state ? + "enabled" : "disabled")); + } + } + + return 0; +} + +static int mlx4_init_fw(struct mlx4_dev *dev) +{ + struct mlx4_mod_stat_cfg mlx4_cfg; + int err = 0; + + if (!mlx4_is_slave(dev)) { + err = mlx4_QUERY_FW(dev); + if (err) { + if (err == -EACCES) + mlx4_info(dev, "non-primary physical function, skipping\n"); + else + mlx4_err(dev, "QUERY_FW command failed, aborting\n"); + return err; + } + + err = mlx4_load_fw(dev); + if (err) { + mlx4_err(dev, "Failed to start FW, aborting\n"); + return err; + } + + mlx4_cfg.log_pg_sz_m = 1; + mlx4_cfg.log_pg_sz = 0; + err = mlx4_MOD_STAT_CFG(dev, &mlx4_cfg); + if (err) + mlx4_warn(dev, "Failed to override log_pg_sz parameter\n"); + } + + return err; +} + +static int mlx4_init_hca(struct mlx4_dev *dev) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_init_hca_param *init_hca = NULL; + struct mlx4_dev_cap *dev_cap = NULL; + struct mlx4_adapter adapter; + struct mlx4_profile profile; + u64 icm_size; + struct mlx4_config_dev_params params; + int err; + + if (!mlx4_is_slave(dev)) { + dev_cap = kzalloc(sizeof(*dev_cap), GFP_KERNEL); + init_hca = kzalloc(sizeof(*init_hca), GFP_KERNEL); + + if (!dev_cap || !init_hca) { + err = -ENOMEM; + goto out_free; + } + + err = mlx4_dev_cap(dev, dev_cap); + if (err) { + mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting\n"); + goto out_free; + } + + choose_steering_mode(dev, dev_cap); + choose_tunnel_offload_mode(dev, dev_cap); + + if (dev->caps.dmfs_high_steer_mode == MLX4_STEERING_DMFS_A0_STATIC && + mlx4_is_master(dev)) + dev->caps.function_caps |= MLX4_FUNC_CAP_DMFS_A0_STATIC; + + err = mlx4_get_phys_port_id(dev); + if (err) + mlx4_err(dev, "Fail to get physical port id\n"); + + if (mlx4_is_master(dev)) + mlx4_parav_master_pf_caps(dev); + + if (mlx4_low_memory_profile()) { + mlx4_info(dev, "Running from within kdump kernel. Using low memory profile\n"); + profile = low_mem_profile; + } else { + profile = default_profile; + } + if (dev->caps.steering_mode == + MLX4_STEERING_MODE_DEVICE_MANAGED) + profile.num_mcg = MLX4_FS_NUM_MCG; + + icm_size = mlx4_make_profile(dev, &profile, dev_cap, + init_hca); + if ((long long) icm_size < 0) { + err = icm_size; + goto out_free; + } + + if (enable_4k_uar || !dev->persist->num_vfs) { + init_hca->log_uar_sz = ilog2(dev->caps.num_uars) + + PAGE_SHIFT - DEFAULT_UAR_PAGE_SHIFT; + init_hca->uar_page_sz = DEFAULT_UAR_PAGE_SHIFT - 12; + } else { + init_hca->log_uar_sz = ilog2(dev->caps.num_uars); + init_hca->uar_page_sz = PAGE_SHIFT - 12; + } + + init_hca->mw_enabled = 0; + if (dev->caps.flags & MLX4_DEV_CAP_FLAG_MEM_WINDOW || + dev->caps.bmme_flags & MLX4_BMME_FLAG_TYPE_2_WIN) + init_hca->mw_enabled = INIT_HCA_TPT_MW_ENABLE; + + err = mlx4_init_icm(dev, dev_cap, init_hca, icm_size); + if (err) + goto out_free; + + err = mlx4_INIT_HCA(dev, init_hca); + if (err) { + mlx4_err(dev, "INIT_HCA command failed, aborting\n"); + goto err_free_icm; + } + + if (dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_SYS_EQS) { + err = mlx4_query_func(dev, dev_cap); + if (err < 0) { + mlx4_err(dev, "QUERY_FUNC command failed, aborting.\n"); + goto err_close; + } else if (err & MLX4_QUERY_FUNC_NUM_SYS_EQS) { + dev->caps.num_eqs = dev_cap->max_eqs; + dev->caps.reserved_eqs = dev_cap->reserved_eqs; + dev->caps.reserved_uars = dev_cap->reserved_uars; + } + } + + /* + * If TS is supported by FW + * read HCA frequency by QUERY_HCA command + */ + if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_TS) { + err = mlx4_QUERY_HCA(dev, init_hca); + if (err) { + mlx4_err(dev, "QUERY_HCA command failed, disable timestamp\n"); + dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_TS; + } else { + dev->caps.hca_core_clock = + init_hca->hca_core_clock; + } + + /* In case we got HCA frequency 0 - disable timestamping + * to avoid dividing by zero + */ + if (!dev->caps.hca_core_clock) { + dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_TS; + mlx4_err(dev, + "HCA frequency is 0 - timestamping is not supported\n"); + } else if (map_internal_clock(dev)) { + /* + * Map internal clock, + * in case of failure disable timestamping + */ + dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_TS; + mlx4_err(dev, "Failed to map internal clock. Timestamping is not supported\n"); + } + } + + if (dev->caps.dmfs_high_steer_mode != + MLX4_STEERING_DMFS_A0_NOT_SUPPORTED) { + if (mlx4_validate_optimized_steering(dev)) + mlx4_warn(dev, "Optimized steering validation failed\n"); + + if (dev->caps.dmfs_high_steer_mode == + MLX4_STEERING_DMFS_A0_DISABLE) { + dev->caps.dmfs_high_rate_qpn_base = + dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW]; + dev->caps.dmfs_high_rate_qpn_range = + MLX4_A0_STEERING_TABLE_SIZE; + } + + mlx4_info(dev, "DMFS high rate steer mode is: %s\n", + dmfs_high_rate_steering_mode_str( + dev->caps.dmfs_high_steer_mode)); + } + } else { + err = mlx4_init_slave(dev); + if (err) { + if (err != -EPROBE_DEFER) + mlx4_err(dev, "Failed to initialize slave\n"); + return err; + } + + err = mlx4_slave_cap(dev); + if (err) { + mlx4_err(dev, "Failed to obtain slave caps\n"); + goto err_close; + } + } + + if (map_bf_area(dev)) + mlx4_dbg(dev, "Failed to map blue flame area\n"); + + /*Only the master set the ports, all the rest got it from it.*/ + if (!mlx4_is_slave(dev)) + mlx4_set_port_mask(dev); + + err = mlx4_QUERY_ADAPTER(dev, &adapter); + if (err) { + mlx4_err(dev, "QUERY_ADAPTER command failed, aborting\n"); + goto unmap_bf; + } + + /* Query CONFIG_DEV parameters */ + err = mlx4_config_dev_retrieval(dev, ¶ms); + if (err && err != -EOPNOTSUPP) { + mlx4_err(dev, "Failed to query CONFIG_DEV parameters\n"); + } else if (!err) { + dev->caps.rx_checksum_flags_port[1] = params.rx_csum_flags_port_1; + dev->caps.rx_checksum_flags_port[2] = params.rx_csum_flags_port_2; + } + priv->eq_table.inta_pin = adapter.inta_pin; + memcpy(dev->board_id, adapter.board_id, sizeof(dev->board_id)); + + err = 0; + goto out_free; + +unmap_bf: + unmap_internal_clock(dev); + unmap_bf_area(dev); + + if (mlx4_is_slave(dev)) + mlx4_slave_destroy_special_qp_cap(dev); + +err_close: + if (mlx4_is_slave(dev)) + mlx4_slave_exit(dev); + else + mlx4_CLOSE_HCA(dev, 0); + +err_free_icm: + if (!mlx4_is_slave(dev)) + mlx4_free_icms(dev); + +out_free: + kfree(dev_cap); + kfree(init_hca); + + return err; +} + +static int mlx4_init_counters_table(struct mlx4_dev *dev) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + int nent_pow2; + + if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_COUNTERS)) + return -ENOENT; + + if (!dev->caps.max_counters) + return -ENOSPC; + + nent_pow2 = roundup_pow_of_two(dev->caps.max_counters); + /* reserve last counter index for sink counter */ + return mlx4_bitmap_init(&priv->counters_bitmap, nent_pow2, + nent_pow2 - 1, 0, + nent_pow2 - dev->caps.max_counters + 1); +} + +static void mlx4_cleanup_counters_table(struct mlx4_dev *dev) +{ + if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_COUNTERS)) + return; + + if (!dev->caps.max_counters) + return; + + mlx4_bitmap_cleanup(&mlx4_priv(dev)->counters_bitmap); +} + +static void mlx4_cleanup_default_counters(struct mlx4_dev *dev) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + int port; + + for (port = 0; port < dev->caps.num_ports; port++) + if (priv->def_counter[port] != -1) + mlx4_counter_free(dev, priv->def_counter[port]); +} + +static int mlx4_allocate_default_counters(struct mlx4_dev *dev) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + int port, err = 0; + u32 idx; + + for (port = 0; port < dev->caps.num_ports; port++) + priv->def_counter[port] = -1; + + for (port = 0; port < dev->caps.num_ports; port++) { + err = mlx4_counter_alloc(dev, &idx, MLX4_RES_USAGE_DRIVER); + + if (!err || err == -ENOSPC) { + priv->def_counter[port] = idx; + err = 0; + } else if (err == -ENOENT) { + err = 0; + continue; + } else if (mlx4_is_slave(dev) && err == -EINVAL) { + priv->def_counter[port] = MLX4_SINK_COUNTER_INDEX(dev); + mlx4_warn(dev, "can't allocate counter from old PF driver, using index %d\n", + MLX4_SINK_COUNTER_INDEX(dev)); + err = 0; + } else { + mlx4_err(dev, "%s: failed to allocate default counter port %d err %d\n", + __func__, port + 1, err); + mlx4_cleanup_default_counters(dev); + return err; + } + + mlx4_dbg(dev, "%s: default counter index %d for port %d\n", + __func__, priv->def_counter[port], port + 1); + } + + return err; +} + +int __mlx4_counter_alloc(struct mlx4_dev *dev, u32 *idx) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + + if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_COUNTERS)) + return -ENOENT; + + *idx = mlx4_bitmap_alloc(&priv->counters_bitmap); + if (*idx == -1) { + *idx = MLX4_SINK_COUNTER_INDEX(dev); + return -ENOSPC; + } + + return 0; +} + +int mlx4_counter_alloc(struct mlx4_dev *dev, u32 *idx, u8 usage) +{ + u32 in_modifier = RES_COUNTER | (((u32)usage & 3) << 30); + u64 out_param; + int err; + + if (mlx4_is_mfunc(dev)) { + err = mlx4_cmd_imm(dev, 0, &out_param, in_modifier, + RES_OP_RESERVE, MLX4_CMD_ALLOC_RES, + MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); + if (!err) + *idx = get_param_l(&out_param); + if (WARN_ON(err == -ENOSPC)) + err = -EINVAL; + return err; + } + return __mlx4_counter_alloc(dev, idx); +} +EXPORT_SYMBOL_GPL(mlx4_counter_alloc); + +static int __mlx4_clear_if_stat(struct mlx4_dev *dev, + u8 counter_index) +{ + struct mlx4_cmd_mailbox *if_stat_mailbox; + int err; + u32 if_stat_in_mod = (counter_index & 0xff) | MLX4_QUERY_IF_STAT_RESET; + + if_stat_mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(if_stat_mailbox)) + return PTR_ERR(if_stat_mailbox); + + err = mlx4_cmd_box(dev, 0, if_stat_mailbox->dma, if_stat_in_mod, 0, + MLX4_CMD_QUERY_IF_STAT, MLX4_CMD_TIME_CLASS_C, + MLX4_CMD_NATIVE); + + mlx4_free_cmd_mailbox(dev, if_stat_mailbox); + return err; +} + +void __mlx4_counter_free(struct mlx4_dev *dev, u32 idx) +{ + if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_COUNTERS)) + return; + + if (idx == MLX4_SINK_COUNTER_INDEX(dev)) + return; + + __mlx4_clear_if_stat(dev, idx); + + mlx4_bitmap_free(&mlx4_priv(dev)->counters_bitmap, idx, MLX4_USE_RR); + return; +} + +void mlx4_counter_free(struct mlx4_dev *dev, u32 idx) +{ + u64 in_param = 0; + + if (mlx4_is_mfunc(dev)) { + set_param_l(&in_param, idx); + mlx4_cmd(dev, in_param, RES_COUNTER, RES_OP_RESERVE, + MLX4_CMD_FREE_RES, MLX4_CMD_TIME_CLASS_A, + MLX4_CMD_WRAPPED); + return; + } + __mlx4_counter_free(dev, idx); +} +EXPORT_SYMBOL_GPL(mlx4_counter_free); + +int mlx4_get_default_counter_index(struct mlx4_dev *dev, int port) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + + return priv->def_counter[port - 1]; +} +EXPORT_SYMBOL_GPL(mlx4_get_default_counter_index); + +void mlx4_set_admin_guid(struct mlx4_dev *dev, __be64 guid, int entry, int port) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + + priv->mfunc.master.vf_admin[entry].vport[port].guid = guid; +} +EXPORT_SYMBOL_GPL(mlx4_set_admin_guid); + +__be64 mlx4_get_admin_guid(struct mlx4_dev *dev, int entry, int port) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + + return priv->mfunc.master.vf_admin[entry].vport[port].guid; +} +EXPORT_SYMBOL_GPL(mlx4_get_admin_guid); + +void mlx4_set_random_admin_guid(struct mlx4_dev *dev, int entry, int port) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + __be64 guid; + + /* hw GUID */ + if (entry == 0) + return; + + get_random_bytes((char *)&guid, sizeof(guid)); + guid &= ~(cpu_to_be64(1ULL << 56)); + guid |= cpu_to_be64(1ULL << 57); + priv->mfunc.master.vf_admin[entry].vport[port].guid = guid; +} + +static int mlx4_setup_hca(struct mlx4_dev *dev) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + int err; + int port; + __be32 ib_port_default_caps; + + err = mlx4_init_uar_table(dev); + if (err) { + mlx4_err(dev, "Failed to initialize user access region table, aborting\n"); + return err; + } + + err = mlx4_uar_alloc(dev, &priv->driver_uar); + if (err) { + mlx4_err(dev, "Failed to allocate driver access region, aborting\n"); + goto err_uar_table_free; + } + + priv->kar = ioremap((phys_addr_t) priv->driver_uar.pfn << PAGE_SHIFT, PAGE_SIZE); + if (!priv->kar) { + mlx4_err(dev, "Couldn't map kernel access region, aborting\n"); + err = -ENOMEM; + goto err_uar_free; + } + + err = mlx4_init_pd_table(dev); + if (err) { + mlx4_err(dev, "Failed to initialize protection domain table, aborting\n"); + goto err_kar_unmap; + } + + err = mlx4_init_xrcd_table(dev); + if (err) { + mlx4_err(dev, "Failed to initialize reliable connection domain table, aborting\n"); + goto err_pd_table_free; + } + + err = mlx4_init_mr_table(dev); + if (err) { + mlx4_err(dev, "Failed to initialize memory region table, aborting\n"); + goto err_xrcd_table_free; + } + + if (!mlx4_is_slave(dev)) { + err = mlx4_init_mcg_table(dev); + if (err) { + mlx4_err(dev, "Failed to initialize multicast group table, aborting\n"); + goto err_mr_table_free; + } + err = mlx4_config_mad_demux(dev); + if (err) { + mlx4_err(dev, "Failed in config_mad_demux, aborting\n"); + goto err_mcg_table_free; + } + } + + err = mlx4_init_eq_table(dev); + if (err) { + mlx4_err(dev, "Failed to initialize event queue table, aborting\n"); + goto err_mcg_table_free; + } + + err = mlx4_cmd_use_events(dev); + if (err) { + mlx4_err(dev, "Failed to switch to event-driven firmware commands, aborting\n"); + goto err_eq_table_free; + } + + err = mlx4_NOP(dev); + if (err) { + if (dev->flags & MLX4_FLAG_MSI_X) { + mlx4_warn(dev, "NOP command failed to generate MSI-X interrupt IRQ %d)\n", + priv->eq_table.eq[MLX4_EQ_ASYNC].irq); + mlx4_warn(dev, "Trying again without MSI-X\n"); + } else { + mlx4_err(dev, "NOP command failed to generate interrupt (IRQ %d), aborting\n", + priv->eq_table.eq[MLX4_EQ_ASYNC].irq); + mlx4_err(dev, "BIOS or ACPI interrupt routing problem?\n"); + } + + goto err_cmd_poll; + } + + mlx4_dbg(dev, "NOP command IRQ test passed\n"); + + err = mlx4_init_cq_table(dev); + if (err) { + mlx4_err(dev, "Failed to initialize completion queue table, aborting\n"); + goto err_cmd_poll; + } + + err = mlx4_init_srq_table(dev); + if (err) { + mlx4_err(dev, "Failed to initialize shared receive queue table, aborting\n"); + goto err_cq_table_free; + } + + err = mlx4_init_qp_table(dev); + if (err) { + mlx4_err(dev, "Failed to initialize queue pair table, aborting\n"); + goto err_srq_table_free; + } + + if (!mlx4_is_slave(dev)) { + err = mlx4_init_counters_table(dev); + if (err && err != -ENOENT) { + mlx4_err(dev, "Failed to initialize counters table, aborting\n"); + goto err_qp_table_free; + } + } + + err = mlx4_allocate_default_counters(dev); + if (err) { + mlx4_err(dev, "Failed to allocate default counters, aborting\n"); + goto err_counters_table_free; + } + + if (!mlx4_is_slave(dev)) { + for (port = 1; port <= dev->caps.num_ports; port++) { + ib_port_default_caps = 0; + err = mlx4_get_port_ib_caps(dev, port, + &ib_port_default_caps); + if (err) + mlx4_warn(dev, "failed to get port %d default ib capabilities (%d). Continuing with caps = 0\n", + port, err); + dev->caps.ib_port_def_cap[port] = ib_port_default_caps; + + /* initialize per-slave default ib port capabilities */ + if (mlx4_is_master(dev)) { + int i; + for (i = 0; i < dev->num_slaves; i++) { + if (i == mlx4_master_func_num(dev)) + continue; + priv->mfunc.master.slave_state[i].ib_cap_mask[port] = + ib_port_default_caps; + } + } + + if (mlx4_is_mfunc(dev)) + dev->caps.port_ib_mtu[port] = IB_MTU_2048; + else + dev->caps.port_ib_mtu[port] = IB_MTU_4096; + + err = mlx4_SET_PORT(dev, port, mlx4_is_master(dev) ? + dev->caps.pkey_table_len[port] : -1); + if (err) { + mlx4_err(dev, "Failed to set port %d, aborting\n", + port); + goto err_default_countes_free; + } + } + } + + return 0; + +err_default_countes_free: + mlx4_cleanup_default_counters(dev); + +err_counters_table_free: + if (!mlx4_is_slave(dev)) + mlx4_cleanup_counters_table(dev); + +err_qp_table_free: + mlx4_cleanup_qp_table(dev); + +err_srq_table_free: + mlx4_cleanup_srq_table(dev); + +err_cq_table_free: + mlx4_cleanup_cq_table(dev); + +err_cmd_poll: + mlx4_cmd_use_polling(dev); + +err_eq_table_free: + mlx4_cleanup_eq_table(dev); + +err_mcg_table_free: + if (!mlx4_is_slave(dev)) + mlx4_cleanup_mcg_table(dev); + +err_mr_table_free: + mlx4_cleanup_mr_table(dev); + +err_xrcd_table_free: + mlx4_cleanup_xrcd_table(dev); + +err_pd_table_free: + mlx4_cleanup_pd_table(dev); + +err_kar_unmap: + iounmap(priv->kar); + +err_uar_free: + mlx4_uar_free(dev, &priv->driver_uar); + +err_uar_table_free: + mlx4_cleanup_uar_table(dev); + return err; +} + +static int mlx4_init_affinity_hint(struct mlx4_dev *dev, int port, int eqn) +{ + int requested_cpu = 0; + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_eq *eq; + int off = 0; + int i; + + if (eqn > dev->caps.num_comp_vectors) + return -EINVAL; + + for (i = 1; i < port; i++) + off += mlx4_get_eqs_per_port(dev, i); + + requested_cpu = eqn - off - !!(eqn > MLX4_EQ_ASYNC); + + /* Meaning EQs are shared, and this call comes from the second port */ + if (requested_cpu < 0) + return 0; + + eq = &priv->eq_table.eq[eqn]; + + if (!zalloc_cpumask_var(&eq->affinity_mask, GFP_KERNEL)) + return -ENOMEM; + + cpumask_set_cpu(requested_cpu, eq->affinity_mask); + + return 0; +} + +static void mlx4_enable_msi_x(struct mlx4_dev *dev) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + struct msix_entry *entries; + int i; + int port = 0; + + if (msi_x) { + int nreq = min3(dev->caps.num_ports * + (int)num_online_cpus() + 1, + dev->caps.num_eqs - dev->caps.reserved_eqs, + MAX_MSIX); + + if (msi_x > 1) + nreq = min_t(int, nreq, msi_x); + + entries = kcalloc(nreq, sizeof(*entries), GFP_KERNEL); + if (!entries) + goto no_msi; + + for (i = 0; i < nreq; ++i) + entries[i].entry = i; + + nreq = pci_enable_msix_range(dev->persist->pdev, entries, 2, + nreq); + + if (nreq < 0 || nreq < MLX4_EQ_ASYNC) { + kfree(entries); + goto no_msi; + } + /* 1 is reserved for events (asyncrounous EQ) */ + dev->caps.num_comp_vectors = nreq - 1; + + priv->eq_table.eq[MLX4_EQ_ASYNC].irq = entries[0].vector; + bitmap_zero(priv->eq_table.eq[MLX4_EQ_ASYNC].actv_ports.ports, + dev->caps.num_ports); + + for (i = 0; i < dev->caps.num_comp_vectors + 1; i++) { + if (i == MLX4_EQ_ASYNC) + continue; + + priv->eq_table.eq[i].irq = + entries[i + 1 - !!(i > MLX4_EQ_ASYNC)].vector; + + if (MLX4_IS_LEGACY_EQ_MODE(dev->caps)) { + bitmap_fill(priv->eq_table.eq[i].actv_ports.ports, + dev->caps.num_ports); + /* We don't set affinity hint when there + * aren't enough EQs + */ + } else { + set_bit(port, + priv->eq_table.eq[i].actv_ports.ports); + if (mlx4_init_affinity_hint(dev, port + 1, i)) + mlx4_warn(dev, "Couldn't init hint cpumask for EQ %d\n", + i); + } + /* We divide the Eqs evenly between the two ports. + * (dev->caps.num_comp_vectors / dev->caps.num_ports) + * refers to the number of Eqs per port + * (i.e eqs_per_port). Theoretically, we would like to + * write something like (i + 1) % eqs_per_port == 0. + * However, since there's an asynchronous Eq, we have + * to skip over it by comparing this condition to + * !!((i + 1) > MLX4_EQ_ASYNC). + */ + if ((dev->caps.num_comp_vectors > dev->caps.num_ports) && + ((i + 1) % + (dev->caps.num_comp_vectors / dev->caps.num_ports)) == + !!((i + 1) > MLX4_EQ_ASYNC)) + /* If dev->caps.num_comp_vectors < dev->caps.num_ports, + * everything is shared anyway. + */ + port++; + } + + dev->flags |= MLX4_FLAG_MSI_X; + + kfree(entries); + return; + } + +no_msi: + dev->caps.num_comp_vectors = 1; + + BUG_ON(MLX4_EQ_ASYNC >= 2); + for (i = 0; i < 2; ++i) { + priv->eq_table.eq[i].irq = dev->persist->pdev->irq; + if (i != MLX4_EQ_ASYNC) { + bitmap_fill(priv->eq_table.eq[i].actv_ports.ports, + dev->caps.num_ports); + } + } +} + +static int mlx4_init_port_info(struct mlx4_dev *dev, int port) +{ + struct devlink *devlink = priv_to_devlink(mlx4_priv(dev)); + struct mlx4_port_info *info = &mlx4_priv(dev)->port[port]; + int err; + + err = devl_port_register(devlink, &info->devlink_port, port); + if (err) + return err; + + /* Ethernet and IB drivers will normally set the port type, + * but if they are not built set the type now to prevent + * devlink_port_type_warn() from firing. + */ + if (!IS_ENABLED(CONFIG_MLX4_EN) && + dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH) + devlink_port_type_eth_set(&info->devlink_port, NULL); + else if (!IS_ENABLED(CONFIG_MLX4_INFINIBAND) && + dev->caps.port_type[port] == MLX4_PORT_TYPE_IB) + devlink_port_type_ib_set(&info->devlink_port, NULL); + + info->dev = dev; + info->port = port; + if (!mlx4_is_slave(dev)) { + mlx4_init_mac_table(dev, &info->mac_table); + mlx4_init_vlan_table(dev, &info->vlan_table); + mlx4_init_roce_gid_table(dev, &info->gid_table); + info->base_qpn = mlx4_get_base_qpn(dev, port); + } + + sprintf(info->dev_name, "mlx4_port%d", port); + info->port_attr.attr.name = info->dev_name; + if (mlx4_is_mfunc(dev)) { + info->port_attr.attr.mode = 0444; + } else { + info->port_attr.attr.mode = 0644; + info->port_attr.store = set_port_type; + } + info->port_attr.show = show_port_type; + sysfs_attr_init(&info->port_attr.attr); + + err = device_create_file(&dev->persist->pdev->dev, &info->port_attr); + if (err) { + mlx4_err(dev, "Failed to create file for port %d\n", port); + devlink_port_type_clear(&info->devlink_port); + devl_port_unregister(&info->devlink_port); + info->port = -1; + return err; + } + + sprintf(info->dev_mtu_name, "mlx4_port%d_mtu", port); + info->port_mtu_attr.attr.name = info->dev_mtu_name; + if (mlx4_is_mfunc(dev)) { + info->port_mtu_attr.attr.mode = 0444; + } else { + info->port_mtu_attr.attr.mode = 0644; + info->port_mtu_attr.store = set_port_ib_mtu; + } + info->port_mtu_attr.show = show_port_ib_mtu; + sysfs_attr_init(&info->port_mtu_attr.attr); + + err = device_create_file(&dev->persist->pdev->dev, + &info->port_mtu_attr); + if (err) { + mlx4_err(dev, "Failed to create mtu file for port %d\n", port); + device_remove_file(&info->dev->persist->pdev->dev, + &info->port_attr); + devlink_port_type_clear(&info->devlink_port); + devl_port_unregister(&info->devlink_port); + info->port = -1; + return err; + } + + return 0; +} + +static void mlx4_cleanup_port_info(struct mlx4_port_info *info) +{ + if (info->port < 0) + return; + + device_remove_file(&info->dev->persist->pdev->dev, &info->port_attr); + device_remove_file(&info->dev->persist->pdev->dev, + &info->port_mtu_attr); + devlink_port_type_clear(&info->devlink_port); + devl_port_unregister(&info->devlink_port); + +#ifdef CONFIG_RFS_ACCEL + free_irq_cpu_rmap(info->rmap); + info->rmap = NULL; +#endif +} + +static int mlx4_init_steering(struct mlx4_dev *dev) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + int num_entries = dev->caps.num_ports; + int i, j; + + priv->steer = kcalloc(num_entries, sizeof(struct mlx4_steer), + GFP_KERNEL); + if (!priv->steer) + return -ENOMEM; + + for (i = 0; i < num_entries; i++) + for (j = 0; j < MLX4_NUM_STEERS; j++) { + INIT_LIST_HEAD(&priv->steer[i].promisc_qps[j]); + INIT_LIST_HEAD(&priv->steer[i].steer_entries[j]); + } + return 0; +} + +static void mlx4_clear_steering(struct mlx4_dev *dev) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_steer_index *entry, *tmp_entry; + struct mlx4_promisc_qp *pqp, *tmp_pqp; + int num_entries = dev->caps.num_ports; + int i, j; + + for (i = 0; i < num_entries; i++) { + for (j = 0; j < MLX4_NUM_STEERS; j++) { + list_for_each_entry_safe(pqp, tmp_pqp, + &priv->steer[i].promisc_qps[j], + list) { + list_del(&pqp->list); + kfree(pqp); + } + list_for_each_entry_safe(entry, tmp_entry, + &priv->steer[i].steer_entries[j], + list) { + list_del(&entry->list); + list_for_each_entry_safe(pqp, tmp_pqp, + &entry->duplicates, + list) { + list_del(&pqp->list); + kfree(pqp); + } + kfree(entry); + } + } + } + kfree(priv->steer); +} + +static int extended_func_num(struct pci_dev *pdev) +{ + return PCI_SLOT(pdev->devfn) * 8 + PCI_FUNC(pdev->devfn); +} + +#define MLX4_OWNER_BASE 0x8069c +#define MLX4_OWNER_SIZE 4 + +static int mlx4_get_ownership(struct mlx4_dev *dev) +{ + void __iomem *owner; + u32 ret; + + if (pci_channel_offline(dev->persist->pdev)) + return -EIO; + + owner = ioremap(pci_resource_start(dev->persist->pdev, 0) + + MLX4_OWNER_BASE, + MLX4_OWNER_SIZE); + if (!owner) { + mlx4_err(dev, "Failed to obtain ownership bit\n"); + return -ENOMEM; + } + + ret = readl(owner); + iounmap(owner); + return (int) !!ret; +} + +static void mlx4_free_ownership(struct mlx4_dev *dev) +{ + void __iomem *owner; + + if (pci_channel_offline(dev->persist->pdev)) + return; + + owner = ioremap(pci_resource_start(dev->persist->pdev, 0) + + MLX4_OWNER_BASE, + MLX4_OWNER_SIZE); + if (!owner) { + mlx4_err(dev, "Failed to obtain ownership bit\n"); + return; + } + writel(0, owner); + msleep(1000); + iounmap(owner); +} + +#define SRIOV_VALID_STATE(flags) (!!((flags) & MLX4_FLAG_SRIOV) ==\ + !!((flags) & MLX4_FLAG_MASTER)) + +static u64 mlx4_enable_sriov(struct mlx4_dev *dev, struct pci_dev *pdev, + u8 total_vfs, int existing_vfs, int reset_flow) +{ + u64 dev_flags = dev->flags; + int err = 0; + int fw_enabled_sriov_vfs = min(pci_sriov_get_totalvfs(pdev), + MLX4_MAX_NUM_VF); + + if (reset_flow) { + dev->dev_vfs = kcalloc(total_vfs, sizeof(*dev->dev_vfs), + GFP_KERNEL); + if (!dev->dev_vfs) + goto free_mem; + return dev_flags; + } + + atomic_inc(&pf_loading); + if (dev->flags & MLX4_FLAG_SRIOV) { + if (existing_vfs != total_vfs) { + mlx4_err(dev, "SR-IOV was already enabled, but with num_vfs (%d) different than requested (%d)\n", + existing_vfs, total_vfs); + total_vfs = existing_vfs; + } + } + + dev->dev_vfs = kcalloc(total_vfs, sizeof(*dev->dev_vfs), GFP_KERNEL); + if (NULL == dev->dev_vfs) { + mlx4_err(dev, "Failed to allocate memory for VFs\n"); + goto disable_sriov; + } + + if (!(dev->flags & MLX4_FLAG_SRIOV)) { + if (total_vfs > fw_enabled_sriov_vfs) { + mlx4_err(dev, "requested vfs (%d) > available vfs (%d). Continuing without SR_IOV\n", + total_vfs, fw_enabled_sriov_vfs); + err = -ENOMEM; + goto disable_sriov; + } + mlx4_warn(dev, "Enabling SR-IOV with %d VFs\n", total_vfs); + err = pci_enable_sriov(pdev, total_vfs); + } + if (err) { + mlx4_err(dev, "Failed to enable SR-IOV, continuing without SR-IOV (err = %d)\n", + err); + goto disable_sriov; + } else { + mlx4_warn(dev, "Running in master mode\n"); + dev_flags |= MLX4_FLAG_SRIOV | + MLX4_FLAG_MASTER; + dev_flags &= ~MLX4_FLAG_SLAVE; + dev->persist->num_vfs = total_vfs; + } + return dev_flags; + +disable_sriov: + atomic_dec(&pf_loading); +free_mem: + dev->persist->num_vfs = 0; + kfree(dev->dev_vfs); + dev->dev_vfs = NULL; + return dev_flags & ~MLX4_FLAG_MASTER; +} + +enum { + MLX4_DEV_CAP_CHECK_NUM_VFS_ABOVE_64 = -1, +}; + +static int mlx4_check_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap, + int *nvfs) +{ + int requested_vfs = nvfs[0] + nvfs[1] + nvfs[2]; + /* Checking for 64 VFs as a limitation of CX2 */ + if (!(dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_80_VFS) && + requested_vfs >= 64) { + mlx4_err(dev, "Requested %d VFs, but FW does not support more than 64\n", + requested_vfs); + return MLX4_DEV_CAP_CHECK_NUM_VFS_ABOVE_64; + } + return 0; +} + +static int mlx4_pci_enable_device(struct mlx4_dev *dev) +{ + struct pci_dev *pdev = dev->persist->pdev; + int err = 0; + + mutex_lock(&dev->persist->pci_status_mutex); + if (dev->persist->pci_status == MLX4_PCI_STATUS_DISABLED) { + err = pci_enable_device(pdev); + if (!err) + dev->persist->pci_status = MLX4_PCI_STATUS_ENABLED; + } + mutex_unlock(&dev->persist->pci_status_mutex); + + return err; +} + +static void mlx4_pci_disable_device(struct mlx4_dev *dev) +{ + struct pci_dev *pdev = dev->persist->pdev; + + mutex_lock(&dev->persist->pci_status_mutex); + if (dev->persist->pci_status == MLX4_PCI_STATUS_ENABLED) { + pci_disable_device(pdev); + dev->persist->pci_status = MLX4_PCI_STATUS_DISABLED; + } + mutex_unlock(&dev->persist->pci_status_mutex); +} + +static int mlx4_load_one(struct pci_dev *pdev, int pci_dev_data, + int total_vfs, int *nvfs, struct mlx4_priv *priv, + int reset_flow) +{ + struct devlink *devlink = priv_to_devlink(priv); + struct mlx4_dev *dev; + unsigned sum = 0; + int err; + int port; + int i; + struct mlx4_dev_cap *dev_cap = NULL; + int existing_vfs = 0; + + devl_assert_locked(devlink); + dev = &priv->dev; + + INIT_LIST_HEAD(&priv->ctx_list); + spin_lock_init(&priv->ctx_lock); + + mutex_init(&priv->port_mutex); + mutex_init(&priv->bond_mutex); + + INIT_LIST_HEAD(&priv->pgdir_list); + mutex_init(&priv->pgdir_mutex); + spin_lock_init(&priv->cmd.context_lock); + + INIT_LIST_HEAD(&priv->bf_list); + mutex_init(&priv->bf_mutex); + + dev->rev_id = pdev->revision; + dev->numa_node = dev_to_node(&pdev->dev); + + /* Detect if this device is a virtual function */ + if (pci_dev_data & MLX4_PCI_DEV_IS_VF) { + mlx4_warn(dev, "Detected virtual function - running in slave mode\n"); + dev->flags |= MLX4_FLAG_SLAVE; + } else { + /* We reset the device and enable SRIOV only for physical + * devices. Try to claim ownership on the device; + * if already taken, skip -- do not allow multiple PFs */ + err = mlx4_get_ownership(dev); + if (err) { + if (err < 0) + return err; + else { + mlx4_warn(dev, "Multiple PFs not yet supported - Skipping PF\n"); + return -EINVAL; + } + } + + atomic_set(&priv->opreq_count, 0); + INIT_WORK(&priv->opreq_task, mlx4_opreq_action); + + /* + * Now reset the HCA before we touch the PCI capabilities or + * attempt a firmware command, since a boot ROM may have left + * the HCA in an undefined state. + */ + err = mlx4_reset(dev); + if (err) { + mlx4_err(dev, "Failed to reset HCA, aborting\n"); + goto err_sriov; + } + + if (total_vfs) { + dev->flags = MLX4_FLAG_MASTER; + existing_vfs = pci_num_vf(pdev); + if (existing_vfs) + dev->flags |= MLX4_FLAG_SRIOV; + dev->persist->num_vfs = total_vfs; + } + } + + /* on load remove any previous indication of internal error, + * device is up. + */ + dev->persist->state = MLX4_DEVICE_STATE_UP; + +slave_start: + err = mlx4_cmd_init(dev); + if (err) { + mlx4_err(dev, "Failed to init command interface, aborting\n"); + goto err_sriov; + } + + /* In slave functions, the communication channel must be initialized + * before posting commands. Also, init num_slaves before calling + * mlx4_init_hca */ + if (mlx4_is_mfunc(dev)) { + if (mlx4_is_master(dev)) { + dev->num_slaves = MLX4_MAX_NUM_SLAVES; + + } else { + dev->num_slaves = 0; + err = mlx4_multi_func_init(dev); + if (err) { + mlx4_err(dev, "Failed to init slave mfunc interface, aborting\n"); + goto err_cmd; + } + } + } + + err = mlx4_init_fw(dev); + if (err) { + mlx4_err(dev, "Failed to init fw, aborting.\n"); + goto err_mfunc; + } + + if (mlx4_is_master(dev)) { + /* when we hit the goto slave_start below, dev_cap already initialized */ + if (!dev_cap) { + dev_cap = kzalloc(sizeof(*dev_cap), GFP_KERNEL); + + if (!dev_cap) { + err = -ENOMEM; + goto err_fw; + } + + err = mlx4_QUERY_DEV_CAP(dev, dev_cap); + if (err) { + mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting.\n"); + goto err_fw; + } + + if (mlx4_check_dev_cap(dev, dev_cap, nvfs)) + goto err_fw; + + if (!(dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_SYS_EQS)) { + u64 dev_flags = mlx4_enable_sriov(dev, pdev, + total_vfs, + existing_vfs, + reset_flow); + + mlx4_close_fw(dev); + mlx4_cmd_cleanup(dev, MLX4_CMD_CLEANUP_ALL); + dev->flags = dev_flags; + if (!SRIOV_VALID_STATE(dev->flags)) { + mlx4_err(dev, "Invalid SRIOV state\n"); + goto err_sriov; + } + err = mlx4_reset(dev); + if (err) { + mlx4_err(dev, "Failed to reset HCA, aborting.\n"); + goto err_sriov; + } + goto slave_start; + } + } else { + /* Legacy mode FW requires SRIOV to be enabled before + * doing QUERY_DEV_CAP, since max_eq's value is different if + * SRIOV is enabled. + */ + memset(dev_cap, 0, sizeof(*dev_cap)); + err = mlx4_QUERY_DEV_CAP(dev, dev_cap); + if (err) { + mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting.\n"); + goto err_fw; + } + + if (mlx4_check_dev_cap(dev, dev_cap, nvfs)) + goto err_fw; + } + } + + err = mlx4_init_hca(dev); + if (err) { + if (err == -EACCES) { + /* Not primary Physical function + * Running in slave mode */ + mlx4_cmd_cleanup(dev, MLX4_CMD_CLEANUP_ALL); + /* We're not a PF */ + if (dev->flags & MLX4_FLAG_SRIOV) { + if (!existing_vfs) + pci_disable_sriov(pdev); + if (mlx4_is_master(dev) && !reset_flow) + atomic_dec(&pf_loading); + dev->flags &= ~MLX4_FLAG_SRIOV; + } + if (!mlx4_is_slave(dev)) + mlx4_free_ownership(dev); + dev->flags |= MLX4_FLAG_SLAVE; + dev->flags &= ~MLX4_FLAG_MASTER; + goto slave_start; + } else + goto err_fw; + } + + if (mlx4_is_master(dev) && (dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_SYS_EQS)) { + u64 dev_flags = mlx4_enable_sriov(dev, pdev, total_vfs, + existing_vfs, reset_flow); + + if ((dev->flags ^ dev_flags) & (MLX4_FLAG_MASTER | MLX4_FLAG_SLAVE)) { + mlx4_cmd_cleanup(dev, MLX4_CMD_CLEANUP_VHCR); + dev->flags = dev_flags; + err = mlx4_cmd_init(dev); + if (err) { + /* Only VHCR is cleaned up, so could still + * send FW commands + */ + mlx4_err(dev, "Failed to init VHCR command interface, aborting\n"); + goto err_close; + } + } else { + dev->flags = dev_flags; + } + + if (!SRIOV_VALID_STATE(dev->flags)) { + mlx4_err(dev, "Invalid SRIOV state\n"); + err = -EINVAL; + goto err_close; + } + } + + /* check if the device is functioning at its maximum possible speed. + * No return code for this call, just warn the user in case of PCI + * express device capabilities are under-satisfied by the bus. + */ + if (!mlx4_is_slave(dev)) + pcie_print_link_status(dev->persist->pdev); + + /* In master functions, the communication channel must be initialized + * after obtaining its address from fw */ + if (mlx4_is_master(dev)) { + if (dev->caps.num_ports < 2 && + num_vfs_argc > 1) { + err = -EINVAL; + mlx4_err(dev, + "Error: Trying to configure VFs on port 2, but HCA has only %d physical ports\n", + dev->caps.num_ports); + goto err_close; + } + memcpy(dev->persist->nvfs, nvfs, sizeof(dev->persist->nvfs)); + + for (i = 0; + i < sizeof(dev->persist->nvfs)/ + sizeof(dev->persist->nvfs[0]); i++) { + unsigned j; + + for (j = 0; j < dev->persist->nvfs[i]; ++sum, ++j) { + dev->dev_vfs[sum].min_port = i < 2 ? i + 1 : 1; + dev->dev_vfs[sum].n_ports = i < 2 ? 1 : + dev->caps.num_ports; + } + } + + /* In master functions, the communication channel + * must be initialized after obtaining its address from fw + */ + err = mlx4_multi_func_init(dev); + if (err) { + mlx4_err(dev, "Failed to init master mfunc interface, aborting.\n"); + goto err_close; + } + } + + err = mlx4_alloc_eq_table(dev); + if (err) + goto err_master_mfunc; + + bitmap_zero(priv->msix_ctl.pool_bm, MAX_MSIX); + mutex_init(&priv->msix_ctl.pool_lock); + + mlx4_enable_msi_x(dev); + if ((mlx4_is_mfunc(dev)) && + !(dev->flags & MLX4_FLAG_MSI_X)) { + err = -EOPNOTSUPP; + mlx4_err(dev, "INTx is not supported in multi-function mode, aborting\n"); + goto err_free_eq; + } + + if (!mlx4_is_slave(dev)) { + err = mlx4_init_steering(dev); + if (err) + goto err_disable_msix; + } + + mlx4_init_quotas(dev); + + err = mlx4_setup_hca(dev); + if (err == -EBUSY && (dev->flags & MLX4_FLAG_MSI_X) && + !mlx4_is_mfunc(dev)) { + dev->flags &= ~MLX4_FLAG_MSI_X; + dev->caps.num_comp_vectors = 1; + pci_disable_msix(pdev); + err = mlx4_setup_hca(dev); + } + + if (err) + goto err_steer; + + /* When PF resources are ready arm its comm channel to enable + * getting commands + */ + if (mlx4_is_master(dev)) { + err = mlx4_ARM_COMM_CHANNEL(dev); + if (err) { + mlx4_err(dev, " Failed to arm comm channel eq: %x\n", + err); + goto err_steer; + } + } + + for (port = 1; port <= dev->caps.num_ports; port++) { + err = mlx4_init_port_info(dev, port); + if (err) + goto err_port; + } + + priv->v2p.port1 = 1; + priv->v2p.port2 = 2; + + err = mlx4_register_device(dev); + if (err) + goto err_port; + + mlx4_request_modules(dev); + + mlx4_sense_init(dev); + mlx4_start_sense(dev); + + priv->removed = 0; + + if (mlx4_is_master(dev) && dev->persist->num_vfs && !reset_flow) + atomic_dec(&pf_loading); + + kfree(dev_cap); + return 0; + +err_port: + for (--port; port >= 1; --port) + mlx4_cleanup_port_info(&priv->port[port]); + + mlx4_cleanup_default_counters(dev); + if (!mlx4_is_slave(dev)) + mlx4_cleanup_counters_table(dev); + mlx4_cleanup_qp_table(dev); + mlx4_cleanup_srq_table(dev); + mlx4_cleanup_cq_table(dev); + mlx4_cmd_use_polling(dev); + mlx4_cleanup_eq_table(dev); + mlx4_cleanup_mcg_table(dev); + mlx4_cleanup_mr_table(dev); + mlx4_cleanup_xrcd_table(dev); + mlx4_cleanup_pd_table(dev); + mlx4_cleanup_uar_table(dev); + +err_steer: + if (!mlx4_is_slave(dev)) + mlx4_clear_steering(dev); + +err_disable_msix: + if (dev->flags & MLX4_FLAG_MSI_X) + pci_disable_msix(pdev); + +err_free_eq: + mlx4_free_eq_table(dev); + +err_master_mfunc: + if (mlx4_is_master(dev)) { + mlx4_free_resource_tracker(dev, RES_TR_FREE_STRUCTS_ONLY); + mlx4_multi_func_cleanup(dev); + } + + if (mlx4_is_slave(dev)) + mlx4_slave_destroy_special_qp_cap(dev); + +err_close: + mlx4_close_hca(dev); + +err_fw: + mlx4_close_fw(dev); + +err_mfunc: + if (mlx4_is_slave(dev)) + mlx4_multi_func_cleanup(dev); + +err_cmd: + mlx4_cmd_cleanup(dev, MLX4_CMD_CLEANUP_ALL); + +err_sriov: + if (dev->flags & MLX4_FLAG_SRIOV && !existing_vfs) { + pci_disable_sriov(pdev); + dev->flags &= ~MLX4_FLAG_SRIOV; + } + + if (mlx4_is_master(dev) && dev->persist->num_vfs && !reset_flow) + atomic_dec(&pf_loading); + + kfree(priv->dev.dev_vfs); + + if (!mlx4_is_slave(dev)) + mlx4_free_ownership(dev); + + kfree(dev_cap); + return err; +} + +static int __mlx4_init_one(struct pci_dev *pdev, int pci_dev_data, + struct mlx4_priv *priv) +{ + int err; + int nvfs[MLX4_MAX_PORTS + 1] = {0, 0, 0}; + int prb_vf[MLX4_MAX_PORTS + 1] = {0, 0, 0}; + const int param_map[MLX4_MAX_PORTS + 1][MLX4_MAX_PORTS + 1] = { + {2, 0, 0}, {0, 1, 2}, {0, 1, 2} }; + unsigned total_vfs = 0; + unsigned int i; + + pr_info(DRV_NAME ": Initializing %s\n", pci_name(pdev)); + + err = mlx4_pci_enable_device(&priv->dev); + if (err) { + dev_err(&pdev->dev, "Cannot enable PCI device, aborting\n"); + return err; + } + + /* Due to requirement that all VFs and the PF are *guaranteed* 2 MACS + * per port, we must limit the number of VFs to 63 (since their are + * 128 MACs) + */ + for (i = 0; i < ARRAY_SIZE(nvfs) && i < num_vfs_argc; + total_vfs += nvfs[param_map[num_vfs_argc - 1][i]], i++) { + nvfs[param_map[num_vfs_argc - 1][i]] = num_vfs[i]; + if (nvfs[i] < 0) { + dev_err(&pdev->dev, "num_vfs module parameter cannot be negative\n"); + err = -EINVAL; + goto err_disable_pdev; + } + } + for (i = 0; i < ARRAY_SIZE(prb_vf) && i < probe_vfs_argc; + i++) { + prb_vf[param_map[probe_vfs_argc - 1][i]] = probe_vf[i]; + if (prb_vf[i] < 0 || prb_vf[i] > nvfs[i]) { + dev_err(&pdev->dev, "probe_vf module parameter cannot be negative or greater than num_vfs\n"); + err = -EINVAL; + goto err_disable_pdev; + } + } + if (total_vfs > MLX4_MAX_NUM_VF) { + dev_err(&pdev->dev, + "Requested more VF's (%d) than allowed by hw (%d)\n", + total_vfs, MLX4_MAX_NUM_VF); + err = -EINVAL; + goto err_disable_pdev; + } + + for (i = 0; i < MLX4_MAX_PORTS; i++) { + if (nvfs[i] + nvfs[2] > MLX4_MAX_NUM_VF_P_PORT) { + dev_err(&pdev->dev, + "Requested more VF's (%d) for port (%d) than allowed by driver (%d)\n", + nvfs[i] + nvfs[2], i + 1, + MLX4_MAX_NUM_VF_P_PORT); + err = -EINVAL; + goto err_disable_pdev; + } + } + + /* Check for BARs. */ + if (!(pci_dev_data & MLX4_PCI_DEV_IS_VF) && + !(pci_resource_flags(pdev, 0) & IORESOURCE_MEM)) { + dev_err(&pdev->dev, "Missing DCS, aborting (driver_data: 0x%x, pci_resource_flags(pdev, 0):0x%lx)\n", + pci_dev_data, pci_resource_flags(pdev, 0)); + err = -ENODEV; + goto err_disable_pdev; + } + if (!(pci_resource_flags(pdev, 2) & IORESOURCE_MEM)) { + dev_err(&pdev->dev, "Missing UAR, aborting\n"); + err = -ENODEV; + goto err_disable_pdev; + } + + err = pci_request_regions(pdev, DRV_NAME); + if (err) { + dev_err(&pdev->dev, "Couldn't get PCI resources, aborting\n"); + goto err_disable_pdev; + } + + pci_set_master(pdev); + + err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); + if (err) { + dev_warn(&pdev->dev, "Warning: couldn't set 64-bit PCI DMA mask\n"); + err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)); + if (err) { + dev_err(&pdev->dev, "Can't set PCI DMA mask, aborting\n"); + goto err_release_regions; + } + } + + /* Allow large DMA segments, up to the firmware limit of 1 GB */ + dma_set_max_seg_size(&pdev->dev, 1024 * 1024 * 1024); + /* Detect if this device is a virtual function */ + if (pci_dev_data & MLX4_PCI_DEV_IS_VF) { + /* When acting as pf, we normally skip vfs unless explicitly + * requested to probe them. + */ + if (total_vfs) { + unsigned vfs_offset = 0; + + for (i = 0; i < ARRAY_SIZE(nvfs) && + vfs_offset + nvfs[i] < extended_func_num(pdev); + vfs_offset += nvfs[i], i++) + ; + if (i == ARRAY_SIZE(nvfs)) { + err = -ENODEV; + goto err_release_regions; + } + if ((extended_func_num(pdev) - vfs_offset) + > prb_vf[i]) { + dev_warn(&pdev->dev, "Skipping virtual function:%d\n", + extended_func_num(pdev)); + err = -ENODEV; + goto err_release_regions; + } + } + } + + err = mlx4_crdump_init(&priv->dev); + if (err) + goto err_release_regions; + + err = mlx4_catas_init(&priv->dev); + if (err) + goto err_crdump; + + err = mlx4_load_one(pdev, pci_dev_data, total_vfs, nvfs, priv, 0); + if (err) + goto err_catas; + + return 0; + +err_catas: + mlx4_catas_end(&priv->dev); + +err_crdump: + mlx4_crdump_end(&priv->dev); + +err_release_regions: + pci_release_regions(pdev); + +err_disable_pdev: + mlx4_pci_disable_device(&priv->dev); + return err; +} + +static int mlx4_devlink_port_type_set(struct devlink_port *devlink_port, + enum devlink_port_type port_type) +{ + struct mlx4_port_info *info = container_of(devlink_port, + struct mlx4_port_info, + devlink_port); + enum mlx4_port_type mlx4_port_type; + + switch (port_type) { + case DEVLINK_PORT_TYPE_AUTO: + mlx4_port_type = MLX4_PORT_TYPE_AUTO; + break; + case DEVLINK_PORT_TYPE_ETH: + mlx4_port_type = MLX4_PORT_TYPE_ETH; + break; + case DEVLINK_PORT_TYPE_IB: + mlx4_port_type = MLX4_PORT_TYPE_IB; + break; + default: + return -EOPNOTSUPP; + } + + return __set_port_type(info, mlx4_port_type); +} + +static void mlx4_devlink_param_load_driverinit_values(struct devlink *devlink) +{ + struct mlx4_priv *priv = devlink_priv(devlink); + struct mlx4_dev *dev = &priv->dev; + struct mlx4_fw_crdump *crdump = &dev->persist->crdump; + union devlink_param_value saved_value; + int err; + + err = devlink_param_driverinit_value_get(devlink, + DEVLINK_PARAM_GENERIC_ID_INT_ERR_RESET, + &saved_value); + if (!err && mlx4_internal_err_reset != saved_value.vbool) { + mlx4_internal_err_reset = saved_value.vbool; + /* Notify on value changed on runtime configuration mode */ + devlink_param_value_changed(devlink, + DEVLINK_PARAM_GENERIC_ID_INT_ERR_RESET); + } + err = devlink_param_driverinit_value_get(devlink, + DEVLINK_PARAM_GENERIC_ID_MAX_MACS, + &saved_value); + if (!err) + log_num_mac = order_base_2(saved_value.vu32); + err = devlink_param_driverinit_value_get(devlink, + MLX4_DEVLINK_PARAM_ID_ENABLE_64B_CQE_EQE, + &saved_value); + if (!err) + enable_64b_cqe_eqe = saved_value.vbool; + err = devlink_param_driverinit_value_get(devlink, + MLX4_DEVLINK_PARAM_ID_ENABLE_4K_UAR, + &saved_value); + if (!err) + enable_4k_uar = saved_value.vbool; + err = devlink_param_driverinit_value_get(devlink, + DEVLINK_PARAM_GENERIC_ID_REGION_SNAPSHOT, + &saved_value); + if (!err && crdump->snapshot_enable != saved_value.vbool) { + crdump->snapshot_enable = saved_value.vbool; + devlink_param_value_changed(devlink, + DEVLINK_PARAM_GENERIC_ID_REGION_SNAPSHOT); + } +} + +static void mlx4_restart_one_down(struct pci_dev *pdev); +static int mlx4_restart_one_up(struct pci_dev *pdev, bool reload, + struct devlink *devlink); + +static int mlx4_devlink_reload_down(struct devlink *devlink, bool netns_change, + enum devlink_reload_action action, + enum devlink_reload_limit limit, + struct netlink_ext_ack *extack) +{ + struct mlx4_priv *priv = devlink_priv(devlink); + struct mlx4_dev *dev = &priv->dev; + struct mlx4_dev_persistent *persist = dev->persist; + + if (netns_change) { + NL_SET_ERR_MSG_MOD(extack, "Namespace change is not supported"); + return -EOPNOTSUPP; + } + if (persist->num_vfs) + mlx4_warn(persist->dev, "Reload performed on PF, will cause reset on operating Virtual Functions\n"); + mlx4_restart_one_down(persist->pdev); + return 0; +} + +static int mlx4_devlink_reload_up(struct devlink *devlink, enum devlink_reload_action action, + enum devlink_reload_limit limit, u32 *actions_performed, + struct netlink_ext_ack *extack) +{ + struct mlx4_priv *priv = devlink_priv(devlink); + struct mlx4_dev *dev = &priv->dev; + struct mlx4_dev_persistent *persist = dev->persist; + int err; + + *actions_performed = BIT(DEVLINK_RELOAD_ACTION_DRIVER_REINIT); + err = mlx4_restart_one_up(persist->pdev, true, devlink); + if (err) + mlx4_err(persist->dev, "mlx4_restart_one_up failed, ret=%d\n", + err); + + return err; +} + +static const struct devlink_ops mlx4_devlink_ops = { + .port_type_set = mlx4_devlink_port_type_set, + .reload_actions = BIT(DEVLINK_RELOAD_ACTION_DRIVER_REINIT), + .reload_down = mlx4_devlink_reload_down, + .reload_up = mlx4_devlink_reload_up, +}; + +static int mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id) +{ + struct devlink *devlink; + struct mlx4_priv *priv; + struct mlx4_dev *dev; + int ret; + + printk_once(KERN_INFO "%s", mlx4_version); + + devlink = devlink_alloc(&mlx4_devlink_ops, sizeof(*priv), &pdev->dev); + if (!devlink) + return -ENOMEM; + devl_lock(devlink); + priv = devlink_priv(devlink); + + dev = &priv->dev; + dev->persist = kzalloc(sizeof(*dev->persist), GFP_KERNEL); + if (!dev->persist) { + ret = -ENOMEM; + goto err_devlink_free; + } + dev->persist->pdev = pdev; + dev->persist->dev = dev; + pci_set_drvdata(pdev, dev->persist); + priv->pci_dev_data = id->driver_data; + mutex_init(&dev->persist->device_state_mutex); + mutex_init(&dev->persist->interface_state_mutex); + mutex_init(&dev->persist->pci_status_mutex); + + ret = devlink_params_register(devlink, mlx4_devlink_params, + ARRAY_SIZE(mlx4_devlink_params)); + if (ret) + goto err_devlink_unregister; + mlx4_devlink_set_params_init_values(devlink); + ret = __mlx4_init_one(pdev, id->driver_data, priv); + if (ret) + goto err_params_unregister; + + pci_save_state(pdev); + devlink_set_features(devlink, DEVLINK_F_RELOAD); + devl_unlock(devlink); + devlink_register(devlink); + return 0; + +err_params_unregister: + devlink_params_unregister(devlink, mlx4_devlink_params, + ARRAY_SIZE(mlx4_devlink_params)); +err_devlink_unregister: + kfree(dev->persist); +err_devlink_free: + devl_unlock(devlink); + devlink_free(devlink); + return ret; +} + +static void mlx4_clean_dev(struct mlx4_dev *dev) +{ + struct mlx4_dev_persistent *persist = dev->persist; + struct mlx4_priv *priv = mlx4_priv(dev); + unsigned long flags = (dev->flags & RESET_PERSIST_MASK_FLAGS); + + memset(priv, 0, sizeof(*priv)); + priv->dev.persist = persist; + priv->dev.flags = flags; +} + +static void mlx4_unload_one(struct pci_dev *pdev) +{ + struct mlx4_dev_persistent *persist = pci_get_drvdata(pdev); + struct mlx4_dev *dev = persist->dev; + struct mlx4_priv *priv = mlx4_priv(dev); + int pci_dev_data; + struct devlink *devlink; + int p, i; + + devlink = priv_to_devlink(priv); + devl_assert_locked(devlink); + if (priv->removed) + return; + + /* saving current ports type for further use */ + for (i = 0; i < dev->caps.num_ports; i++) { + dev->persist->curr_port_type[i] = dev->caps.port_type[i + 1]; + dev->persist->curr_port_poss_type[i] = dev->caps. + possible_type[i + 1]; + } + + pci_dev_data = priv->pci_dev_data; + + mlx4_stop_sense(dev); + mlx4_unregister_device(dev); + + for (p = 1; p <= dev->caps.num_ports; p++) { + mlx4_cleanup_port_info(&priv->port[p]); + mlx4_CLOSE_PORT(dev, p); + } + + if (mlx4_is_master(dev)) + mlx4_free_resource_tracker(dev, + RES_TR_FREE_SLAVES_ONLY); + + mlx4_cleanup_default_counters(dev); + if (!mlx4_is_slave(dev)) + mlx4_cleanup_counters_table(dev); + mlx4_cleanup_qp_table(dev); + mlx4_cleanup_srq_table(dev); + mlx4_cleanup_cq_table(dev); + mlx4_cmd_use_polling(dev); + mlx4_cleanup_eq_table(dev); + mlx4_cleanup_mcg_table(dev); + mlx4_cleanup_mr_table(dev); + mlx4_cleanup_xrcd_table(dev); + mlx4_cleanup_pd_table(dev); + + if (mlx4_is_master(dev)) + mlx4_free_resource_tracker(dev, + RES_TR_FREE_STRUCTS_ONLY); + + iounmap(priv->kar); + mlx4_uar_free(dev, &priv->driver_uar); + mlx4_cleanup_uar_table(dev); + if (!mlx4_is_slave(dev)) + mlx4_clear_steering(dev); + mlx4_free_eq_table(dev); + if (mlx4_is_master(dev)) + mlx4_multi_func_cleanup(dev); + mlx4_close_hca(dev); + mlx4_close_fw(dev); + if (mlx4_is_slave(dev)) + mlx4_multi_func_cleanup(dev); + mlx4_cmd_cleanup(dev, MLX4_CMD_CLEANUP_ALL); + + if (dev->flags & MLX4_FLAG_MSI_X) + pci_disable_msix(pdev); + + if (!mlx4_is_slave(dev)) + mlx4_free_ownership(dev); + + mlx4_slave_destroy_special_qp_cap(dev); + kfree(dev->dev_vfs); + + mlx4_clean_dev(dev); + priv->pci_dev_data = pci_dev_data; + priv->removed = 1; +} + +static void mlx4_remove_one(struct pci_dev *pdev) +{ + struct mlx4_dev_persistent *persist = pci_get_drvdata(pdev); + struct mlx4_dev *dev = persist->dev; + struct mlx4_priv *priv = mlx4_priv(dev); + struct devlink *devlink = priv_to_devlink(priv); + int active_vfs = 0; + + devlink_unregister(devlink); + + devl_lock(devlink); + if (mlx4_is_slave(dev)) + persist->interface_state |= MLX4_INTERFACE_STATE_NOWAIT; + + mutex_lock(&persist->interface_state_mutex); + persist->interface_state |= MLX4_INTERFACE_STATE_DELETION; + mutex_unlock(&persist->interface_state_mutex); + + /* Disabling SR-IOV is not allowed while there are active vf's */ + if (mlx4_is_master(dev) && dev->flags & MLX4_FLAG_SRIOV) { + active_vfs = mlx4_how_many_lives_vf(dev); + if (active_vfs) { + pr_warn("Removing PF when there are active VF's !!\n"); + pr_warn("Will not disable SR-IOV.\n"); + } + } + + /* device marked to be under deletion running now without the lock + * letting other tasks to be terminated + */ + if (persist->interface_state & MLX4_INTERFACE_STATE_UP) + mlx4_unload_one(pdev); + else + mlx4_info(dev, "%s: interface is down\n", __func__); + mlx4_catas_end(dev); + mlx4_crdump_end(dev); + if (dev->flags & MLX4_FLAG_SRIOV && !active_vfs) { + mlx4_warn(dev, "Disabling SR-IOV\n"); + pci_disable_sriov(pdev); + } + + pci_release_regions(pdev); + mlx4_pci_disable_device(dev); + devlink_params_unregister(devlink, mlx4_devlink_params, + ARRAY_SIZE(mlx4_devlink_params)); + kfree(dev->persist); + devl_unlock(devlink); + devlink_free(devlink); +} + +static int restore_current_port_types(struct mlx4_dev *dev, + enum mlx4_port_type *types, + enum mlx4_port_type *poss_types) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + int err, i; + + mlx4_stop_sense(dev); + + mutex_lock(&priv->port_mutex); + for (i = 0; i < dev->caps.num_ports; i++) + dev->caps.possible_type[i + 1] = poss_types[i]; + err = mlx4_change_port_types(dev, types); + mlx4_start_sense(dev); + mutex_unlock(&priv->port_mutex); + + return err; +} + +static void mlx4_restart_one_down(struct pci_dev *pdev) +{ + mlx4_unload_one(pdev); +} + +static int mlx4_restart_one_up(struct pci_dev *pdev, bool reload, + struct devlink *devlink) +{ + struct mlx4_dev_persistent *persist = pci_get_drvdata(pdev); + struct mlx4_dev *dev = persist->dev; + struct mlx4_priv *priv = mlx4_priv(dev); + int nvfs[MLX4_MAX_PORTS + 1] = {0, 0, 0}; + int pci_dev_data, err, total_vfs; + + pci_dev_data = priv->pci_dev_data; + total_vfs = dev->persist->num_vfs; + memcpy(nvfs, dev->persist->nvfs, sizeof(dev->persist->nvfs)); + + if (reload) + mlx4_devlink_param_load_driverinit_values(devlink); + err = mlx4_load_one(pdev, pci_dev_data, total_vfs, nvfs, priv, 1); + if (err) { + mlx4_err(dev, "%s: ERROR: mlx4_load_one failed, pci_name=%s, err=%d\n", + __func__, pci_name(pdev), err); + return err; + } + + err = restore_current_port_types(dev, dev->persist->curr_port_type, + dev->persist->curr_port_poss_type); + if (err) + mlx4_err(dev, "could not restore original port types (%d)\n", + err); + + return err; +} + +int mlx4_restart_one(struct pci_dev *pdev) +{ + mlx4_restart_one_down(pdev); + return mlx4_restart_one_up(pdev, false, NULL); +} + +#define MLX_SP(id) { PCI_VDEVICE(MELLANOX, id), MLX4_PCI_DEV_FORCE_SENSE_PORT } +#define MLX_VF(id) { PCI_VDEVICE(MELLANOX, id), MLX4_PCI_DEV_IS_VF } +#define MLX_GN(id) { PCI_VDEVICE(MELLANOX, id), 0 } + +static const struct pci_device_id mlx4_pci_table[] = { +#ifdef CONFIG_MLX4_CORE_GEN2 + /* MT25408 "Hermon" */ + MLX_SP(PCI_DEVICE_ID_MELLANOX_HERMON_SDR), /* SDR */ + MLX_SP(PCI_DEVICE_ID_MELLANOX_HERMON_DDR), /* DDR */ + MLX_SP(PCI_DEVICE_ID_MELLANOX_HERMON_QDR), /* QDR */ + MLX_SP(PCI_DEVICE_ID_MELLANOX_HERMON_DDR_GEN2), /* DDR Gen2 */ + MLX_SP(PCI_DEVICE_ID_MELLANOX_HERMON_QDR_GEN2), /* QDR Gen2 */ + MLX_SP(PCI_DEVICE_ID_MELLANOX_HERMON_EN), /* EN 10GigE */ + MLX_SP(PCI_DEVICE_ID_MELLANOX_HERMON_EN_GEN2), /* EN 10GigE Gen2 */ + /* MT25458 ConnectX EN 10GBASE-T */ + MLX_SP(PCI_DEVICE_ID_MELLANOX_CONNECTX_EN), + MLX_SP(PCI_DEVICE_ID_MELLANOX_CONNECTX_EN_T_GEN2), /* Gen2 */ + /* MT26468 ConnectX EN 10GigE PCIe Gen2*/ + MLX_SP(PCI_DEVICE_ID_MELLANOX_CONNECTX_EN_GEN2), + /* MT26438 ConnectX EN 40GigE PCIe Gen2 5GT/s */ + MLX_SP(PCI_DEVICE_ID_MELLANOX_CONNECTX_EN_5_GEN2), + /* MT26478 ConnectX2 40GigE PCIe Gen2 */ + MLX_SP(PCI_DEVICE_ID_MELLANOX_CONNECTX2), + /* MT25400 Family [ConnectX-2] */ + MLX_VF(0x1002), /* Virtual Function */ +#endif /* CONFIG_MLX4_CORE_GEN2 */ + /* MT27500 Family [ConnectX-3] */ + MLX_GN(PCI_DEVICE_ID_MELLANOX_CONNECTX3), + MLX_VF(0x1004), /* Virtual Function */ + MLX_GN(0x1005), /* MT27510 Family */ + MLX_GN(0x1006), /* MT27511 Family */ + MLX_GN(PCI_DEVICE_ID_MELLANOX_CONNECTX3_PRO), /* MT27520 Family */ + MLX_GN(0x1008), /* MT27521 Family */ + MLX_GN(0x1009), /* MT27530 Family */ + MLX_GN(0x100a), /* MT27531 Family */ + MLX_GN(0x100b), /* MT27540 Family */ + MLX_GN(0x100c), /* MT27541 Family */ + MLX_GN(0x100d), /* MT27550 Family */ + MLX_GN(0x100e), /* MT27551 Family */ + MLX_GN(0x100f), /* MT27560 Family */ + MLX_GN(0x1010), /* MT27561 Family */ + + /* + * See the mellanox_check_broken_intx_masking() quirk when + * adding devices + */ + + { 0, } +}; + +MODULE_DEVICE_TABLE(pci, mlx4_pci_table); + +static pci_ers_result_t mlx4_pci_err_detected(struct pci_dev *pdev, + pci_channel_state_t state) +{ + struct mlx4_dev_persistent *persist = pci_get_drvdata(pdev); + struct mlx4_dev *dev = persist->dev; + struct devlink *devlink; + + mlx4_err(persist->dev, "mlx4_pci_err_detected was called\n"); + mlx4_enter_error_state(persist); + + devlink = priv_to_devlink(mlx4_priv(dev)); + devl_lock(devlink); + mutex_lock(&persist->interface_state_mutex); + if (persist->interface_state & MLX4_INTERFACE_STATE_UP) + mlx4_unload_one(pdev); + + mutex_unlock(&persist->interface_state_mutex); + devl_unlock(devlink); + if (state == pci_channel_io_perm_failure) + return PCI_ERS_RESULT_DISCONNECT; + + mlx4_pci_disable_device(persist->dev); + return PCI_ERS_RESULT_NEED_RESET; +} + +static pci_ers_result_t mlx4_pci_slot_reset(struct pci_dev *pdev) +{ + struct mlx4_dev_persistent *persist = pci_get_drvdata(pdev); + struct mlx4_dev *dev = persist->dev; + int err; + + mlx4_err(dev, "mlx4_pci_slot_reset was called\n"); + err = mlx4_pci_enable_device(dev); + if (err) { + mlx4_err(dev, "Can not re-enable device, err=%d\n", err); + return PCI_ERS_RESULT_DISCONNECT; + } + + pci_set_master(pdev); + pci_restore_state(pdev); + pci_save_state(pdev); + return PCI_ERS_RESULT_RECOVERED; +} + +static void mlx4_pci_resume(struct pci_dev *pdev) +{ + struct mlx4_dev_persistent *persist = pci_get_drvdata(pdev); + struct mlx4_dev *dev = persist->dev; + struct mlx4_priv *priv = mlx4_priv(dev); + int nvfs[MLX4_MAX_PORTS + 1] = {0, 0, 0}; + struct devlink *devlink; + int total_vfs; + int err; + + mlx4_err(dev, "%s was called\n", __func__); + total_vfs = dev->persist->num_vfs; + memcpy(nvfs, dev->persist->nvfs, sizeof(dev->persist->nvfs)); + + devlink = priv_to_devlink(priv); + devl_lock(devlink); + mutex_lock(&persist->interface_state_mutex); + if (!(persist->interface_state & MLX4_INTERFACE_STATE_UP)) { + err = mlx4_load_one(pdev, priv->pci_dev_data, total_vfs, nvfs, + priv, 1); + if (err) { + mlx4_err(dev, "%s: mlx4_load_one failed, err=%d\n", + __func__, err); + goto end; + } + + err = restore_current_port_types(dev, dev->persist-> + curr_port_type, dev->persist-> + curr_port_poss_type); + if (err) + mlx4_err(dev, "could not restore original port types (%d)\n", err); + } +end: + mutex_unlock(&persist->interface_state_mutex); + devl_unlock(devlink); +} + +static void mlx4_shutdown(struct pci_dev *pdev) +{ + struct mlx4_dev_persistent *persist = pci_get_drvdata(pdev); + struct mlx4_dev *dev = persist->dev; + struct devlink *devlink; + + mlx4_info(persist->dev, "mlx4_shutdown was called\n"); + devlink = priv_to_devlink(mlx4_priv(dev)); + devl_lock(devlink); + mutex_lock(&persist->interface_state_mutex); + if (persist->interface_state & MLX4_INTERFACE_STATE_UP) + mlx4_unload_one(pdev); + mutex_unlock(&persist->interface_state_mutex); + devl_unlock(devlink); + mlx4_pci_disable_device(dev); +} + +static const struct pci_error_handlers mlx4_err_handler = { + .error_detected = mlx4_pci_err_detected, + .slot_reset = mlx4_pci_slot_reset, + .resume = mlx4_pci_resume, +}; + +static int __maybe_unused mlx4_suspend(struct device *dev_d) +{ + struct pci_dev *pdev = to_pci_dev(dev_d); + struct mlx4_dev_persistent *persist = pci_get_drvdata(pdev); + struct mlx4_dev *dev = persist->dev; + struct devlink *devlink; + + mlx4_err(dev, "suspend was called\n"); + devlink = priv_to_devlink(mlx4_priv(dev)); + devl_lock(devlink); + mutex_lock(&persist->interface_state_mutex); + if (persist->interface_state & MLX4_INTERFACE_STATE_UP) + mlx4_unload_one(pdev); + mutex_unlock(&persist->interface_state_mutex); + devl_unlock(devlink); + + return 0; +} + +static int __maybe_unused mlx4_resume(struct device *dev_d) +{ + struct pci_dev *pdev = to_pci_dev(dev_d); + struct mlx4_dev_persistent *persist = pci_get_drvdata(pdev); + struct mlx4_dev *dev = persist->dev; + struct mlx4_priv *priv = mlx4_priv(dev); + int nvfs[MLX4_MAX_PORTS + 1] = {0, 0, 0}; + struct devlink *devlink; + int total_vfs; + int ret = 0; + + mlx4_err(dev, "resume was called\n"); + total_vfs = dev->persist->num_vfs; + memcpy(nvfs, dev->persist->nvfs, sizeof(dev->persist->nvfs)); + + devlink = priv_to_devlink(priv); + devl_lock(devlink); + mutex_lock(&persist->interface_state_mutex); + if (!(persist->interface_state & MLX4_INTERFACE_STATE_UP)) { + ret = mlx4_load_one(pdev, priv->pci_dev_data, total_vfs, + nvfs, priv, 1); + if (!ret) { + ret = restore_current_port_types(dev, + dev->persist->curr_port_type, + dev->persist->curr_port_poss_type); + if (ret) + mlx4_err(dev, "resume: could not restore original port types (%d)\n", ret); + } + } + mutex_unlock(&persist->interface_state_mutex); + devl_unlock(devlink); + + return ret; +} + +static SIMPLE_DEV_PM_OPS(mlx4_pm_ops, mlx4_suspend, mlx4_resume); + +static struct pci_driver mlx4_driver = { + .name = DRV_NAME, + .id_table = mlx4_pci_table, + .probe = mlx4_init_one, + .shutdown = mlx4_shutdown, + .remove = mlx4_remove_one, + .driver.pm = &mlx4_pm_ops, + .err_handler = &mlx4_err_handler, +}; + +static int __init mlx4_verify_params(void) +{ + if (msi_x < 0) { + pr_warn("mlx4_core: bad msi_x: %d\n", msi_x); + return -1; + } + + if ((log_num_mac < 0) || (log_num_mac > 7)) { + pr_warn("mlx4_core: bad num_mac: %d\n", log_num_mac); + return -1; + } + + if (log_num_vlan != 0) + pr_warn("mlx4_core: log_num_vlan - obsolete module param, using %d\n", + MLX4_LOG_NUM_VLANS); + + if (use_prio != 0) + pr_warn("mlx4_core: use_prio - obsolete module param, ignored\n"); + + if ((log_mtts_per_seg < 0) || (log_mtts_per_seg > 7)) { + pr_warn("mlx4_core: bad log_mtts_per_seg: %d\n", + log_mtts_per_seg); + return -1; + } + + /* Check if module param for ports type has legal combination */ + if (port_type_array[0] == false && port_type_array[1] == true) { + pr_warn("Module parameter configuration ETH/IB is not supported. Switching to default configuration IB/IB\n"); + port_type_array[0] = true; + } + + if (mlx4_log_num_mgm_entry_size < -7 || + (mlx4_log_num_mgm_entry_size > 0 && + (mlx4_log_num_mgm_entry_size < MLX4_MIN_MGM_LOG_ENTRY_SIZE || + mlx4_log_num_mgm_entry_size > MLX4_MAX_MGM_LOG_ENTRY_SIZE))) { + pr_warn("mlx4_core: mlx4_log_num_mgm_entry_size (%d) not in legal range (-7..0 or %d..%d)\n", + mlx4_log_num_mgm_entry_size, + MLX4_MIN_MGM_LOG_ENTRY_SIZE, + MLX4_MAX_MGM_LOG_ENTRY_SIZE); + return -1; + } + + return 0; +} + +static int __init mlx4_init(void) +{ + int ret; + + if (mlx4_verify_params()) + return -EINVAL; + + + mlx4_wq = create_singlethread_workqueue("mlx4"); + if (!mlx4_wq) + return -ENOMEM; + + ret = pci_register_driver(&mlx4_driver); + if (ret < 0) + destroy_workqueue(mlx4_wq); + return ret < 0 ? ret : 0; +} + +static void __exit mlx4_cleanup(void) +{ + pci_unregister_driver(&mlx4_driver); + destroy_workqueue(mlx4_wq); +} + +module_init(mlx4_init); +module_exit(mlx4_cleanup); diff --git a/drivers/net/ethernet/mellanox/mlx4/mcg.c b/drivers/net/ethernet/mellanox/mlx4/mcg.c new file mode 100644 index 000000000..f1716a83a --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx4/mcg.c @@ -0,0 +1,1649 @@ +/* + * Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/string.h> +#include <linux/etherdevice.h> + +#include <linux/mlx4/cmd.h> +#include <linux/mlx4/qp.h> +#include <linux/export.h> + +#include "mlx4.h" + +int mlx4_get_mgm_entry_size(struct mlx4_dev *dev) +{ + return 1 << dev->oper_log_mgm_entry_size; +} + +int mlx4_get_qp_per_mgm(struct mlx4_dev *dev) +{ + return 4 * (mlx4_get_mgm_entry_size(dev) / 16 - 2); +} + +static int mlx4_QP_FLOW_STEERING_ATTACH(struct mlx4_dev *dev, + struct mlx4_cmd_mailbox *mailbox, + u32 size, + u64 *reg_id) +{ + u64 imm; + int err = 0; + + err = mlx4_cmd_imm(dev, mailbox->dma, &imm, size, 0, + MLX4_QP_FLOW_STEERING_ATTACH, MLX4_CMD_TIME_CLASS_A, + MLX4_CMD_NATIVE); + if (err) + return err; + *reg_id = imm; + + return err; +} + +static int mlx4_QP_FLOW_STEERING_DETACH(struct mlx4_dev *dev, u64 regid) +{ + int err = 0; + + err = mlx4_cmd(dev, regid, 0, 0, + MLX4_QP_FLOW_STEERING_DETACH, MLX4_CMD_TIME_CLASS_A, + MLX4_CMD_NATIVE); + + return err; +} + +static int mlx4_READ_ENTRY(struct mlx4_dev *dev, int index, + struct mlx4_cmd_mailbox *mailbox) +{ + return mlx4_cmd_box(dev, 0, mailbox->dma, index, 0, MLX4_CMD_READ_MCG, + MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); +} + +static int mlx4_WRITE_ENTRY(struct mlx4_dev *dev, int index, + struct mlx4_cmd_mailbox *mailbox) +{ + return mlx4_cmd(dev, mailbox->dma, index, 0, MLX4_CMD_WRITE_MCG, + MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); +} + +static int mlx4_WRITE_PROMISC(struct mlx4_dev *dev, u8 port, u8 steer, + struct mlx4_cmd_mailbox *mailbox) +{ + u32 in_mod; + + in_mod = (u32) port << 16 | steer << 1; + return mlx4_cmd(dev, mailbox->dma, in_mod, 0x1, + MLX4_CMD_WRITE_MCG, MLX4_CMD_TIME_CLASS_A, + MLX4_CMD_NATIVE); +} + +static int mlx4_GID_HASH(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox, + u16 *hash, u8 op_mod) +{ + u64 imm; + int err; + + err = mlx4_cmd_imm(dev, mailbox->dma, &imm, 0, op_mod, + MLX4_CMD_MGID_HASH, MLX4_CMD_TIME_CLASS_A, + MLX4_CMD_NATIVE); + + if (!err) + *hash = imm; + + return err; +} + +static struct mlx4_promisc_qp *get_promisc_qp(struct mlx4_dev *dev, u8 port, + enum mlx4_steer_type steer, + u32 qpn) +{ + struct mlx4_steer *s_steer; + struct mlx4_promisc_qp *pqp; + + if (port < 1 || port > dev->caps.num_ports) + return NULL; + + s_steer = &mlx4_priv(dev)->steer[port - 1]; + + list_for_each_entry(pqp, &s_steer->promisc_qps[steer], list) { + if (pqp->qpn == qpn) + return pqp; + } + /* not found */ + return NULL; +} + +/* + * Add new entry to steering data structure. + * All promisc QPs should be added as well + */ +static int new_steering_entry(struct mlx4_dev *dev, u8 port, + enum mlx4_steer_type steer, + unsigned int index, u32 qpn) +{ + struct mlx4_steer *s_steer; + struct mlx4_cmd_mailbox *mailbox; + struct mlx4_mgm *mgm; + u32 members_count; + struct mlx4_steer_index *new_entry; + struct mlx4_promisc_qp *pqp; + struct mlx4_promisc_qp *dqp = NULL; + u32 prot; + int err; + + if (port < 1 || port > dev->caps.num_ports) + return -EINVAL; + + s_steer = &mlx4_priv(dev)->steer[port - 1]; + new_entry = kzalloc(sizeof(*new_entry), GFP_KERNEL); + if (!new_entry) + return -ENOMEM; + + INIT_LIST_HEAD(&new_entry->duplicates); + new_entry->index = index; + list_add_tail(&new_entry->list, &s_steer->steer_entries[steer]); + + /* If the given qpn is also a promisc qp, + * it should be inserted to duplicates list + */ + pqp = get_promisc_qp(dev, port, steer, qpn); + if (pqp) { + dqp = kmalloc(sizeof(*dqp), GFP_KERNEL); + if (!dqp) { + err = -ENOMEM; + goto out_alloc; + } + dqp->qpn = qpn; + list_add_tail(&dqp->list, &new_entry->duplicates); + } + + /* if no promisc qps for this vep, we are done */ + if (list_empty(&s_steer->promisc_qps[steer])) + return 0; + + /* now need to add all the promisc qps to the new + * steering entry, as they should also receive the packets + * destined to this address */ + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) { + err = -ENOMEM; + goto out_alloc; + } + mgm = mailbox->buf; + + err = mlx4_READ_ENTRY(dev, index, mailbox); + if (err) + goto out_mailbox; + + members_count = be32_to_cpu(mgm->members_count) & 0xffffff; + prot = be32_to_cpu(mgm->members_count) >> 30; + list_for_each_entry(pqp, &s_steer->promisc_qps[steer], list) { + /* don't add already existing qpn */ + if (pqp->qpn == qpn) + continue; + if (members_count == dev->caps.num_qp_per_mgm) { + /* out of space */ + err = -ENOMEM; + goto out_mailbox; + } + + /* add the qpn */ + mgm->qp[members_count++] = cpu_to_be32(pqp->qpn & MGM_QPN_MASK); + } + /* update the qps count and update the entry with all the promisc qps*/ + mgm->members_count = cpu_to_be32(members_count | (prot << 30)); + err = mlx4_WRITE_ENTRY(dev, index, mailbox); + +out_mailbox: + mlx4_free_cmd_mailbox(dev, mailbox); + if (!err) + return 0; +out_alloc: + if (dqp) { + list_del(&dqp->list); + kfree(dqp); + } + list_del(&new_entry->list); + kfree(new_entry); + return err; +} + +/* update the data structures with existing steering entry */ +static int existing_steering_entry(struct mlx4_dev *dev, u8 port, + enum mlx4_steer_type steer, + unsigned int index, u32 qpn) +{ + struct mlx4_steer *s_steer; + struct mlx4_steer_index *tmp_entry, *entry = NULL; + struct mlx4_promisc_qp *pqp; + struct mlx4_promisc_qp *dqp; + + if (port < 1 || port > dev->caps.num_ports) + return -EINVAL; + + s_steer = &mlx4_priv(dev)->steer[port - 1]; + + pqp = get_promisc_qp(dev, port, steer, qpn); + if (!pqp) + return 0; /* nothing to do */ + + list_for_each_entry(tmp_entry, &s_steer->steer_entries[steer], list) { + if (tmp_entry->index == index) { + entry = tmp_entry; + break; + } + } + if (unlikely(!entry)) { + mlx4_warn(dev, "Steering entry at index %x is not registered\n", index); + return -EINVAL; + } + + /* the given qpn is listed as a promisc qpn + * we need to add it as a duplicate to this entry + * for future references */ + list_for_each_entry(dqp, &entry->duplicates, list) { + if (qpn == dqp->qpn) + return 0; /* qp is already duplicated */ + } + + /* add the qp as a duplicate on this index */ + dqp = kmalloc(sizeof(*dqp), GFP_KERNEL); + if (!dqp) + return -ENOMEM; + dqp->qpn = qpn; + list_add_tail(&dqp->list, &entry->duplicates); + + return 0; +} + +/* Check whether a qpn is a duplicate on steering entry + * If so, it should not be removed from mgm */ +static bool check_duplicate_entry(struct mlx4_dev *dev, u8 port, + enum mlx4_steer_type steer, + unsigned int index, u32 qpn) +{ + struct mlx4_steer *s_steer; + struct mlx4_steer_index *tmp_entry, *entry = NULL; + struct mlx4_promisc_qp *dqp, *tmp_dqp; + + if (port < 1 || port > dev->caps.num_ports) + return NULL; + + s_steer = &mlx4_priv(dev)->steer[port - 1]; + + /* if qp is not promisc, it cannot be duplicated */ + if (!get_promisc_qp(dev, port, steer, qpn)) + return false; + + /* The qp is promisc qp so it is a duplicate on this index + * Find the index entry, and remove the duplicate */ + list_for_each_entry(tmp_entry, &s_steer->steer_entries[steer], list) { + if (tmp_entry->index == index) { + entry = tmp_entry; + break; + } + } + if (unlikely(!entry)) { + mlx4_warn(dev, "Steering entry for index %x is not registered\n", index); + return false; + } + list_for_each_entry_safe(dqp, tmp_dqp, &entry->duplicates, list) { + if (dqp->qpn == qpn) { + list_del(&dqp->list); + kfree(dqp); + } + } + return true; +} + +/* Returns true if all the QPs != tqpn contained in this entry + * are Promisc QPs. Returns false otherwise. + */ +static bool promisc_steering_entry(struct mlx4_dev *dev, u8 port, + enum mlx4_steer_type steer, + unsigned int index, u32 tqpn, + u32 *members_count) +{ + struct mlx4_cmd_mailbox *mailbox; + struct mlx4_mgm *mgm; + u32 m_count; + bool ret = false; + int i; + + if (port < 1 || port > dev->caps.num_ports) + return false; + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) + return false; + mgm = mailbox->buf; + + if (mlx4_READ_ENTRY(dev, index, mailbox)) + goto out; + m_count = be32_to_cpu(mgm->members_count) & 0xffffff; + if (members_count) + *members_count = m_count; + + for (i = 0; i < m_count; i++) { + u32 qpn = be32_to_cpu(mgm->qp[i]) & MGM_QPN_MASK; + if (!get_promisc_qp(dev, port, steer, qpn) && qpn != tqpn) { + /* the qp is not promisc, the entry can't be removed */ + goto out; + } + } + ret = true; +out: + mlx4_free_cmd_mailbox(dev, mailbox); + return ret; +} + +/* IF a steering entry contains only promisc QPs, it can be removed. */ +static bool can_remove_steering_entry(struct mlx4_dev *dev, u8 port, + enum mlx4_steer_type steer, + unsigned int index, u32 tqpn) +{ + struct mlx4_steer *s_steer; + struct mlx4_steer_index *entry = NULL, *tmp_entry; + u32 members_count; + bool ret = false; + + if (port < 1 || port > dev->caps.num_ports) + return NULL; + + s_steer = &mlx4_priv(dev)->steer[port - 1]; + + if (!promisc_steering_entry(dev, port, steer, index, + tqpn, &members_count)) + goto out; + + /* All the qps currently registered for this entry are promiscuous, + * Checking for duplicates */ + ret = true; + list_for_each_entry_safe(entry, tmp_entry, &s_steer->steer_entries[steer], list) { + if (entry->index == index) { + if (list_empty(&entry->duplicates) || + members_count == 1) { + struct mlx4_promisc_qp *pqp, *tmp_pqp; + /* If there is only 1 entry in duplicates then + * this is the QP we want to delete, going over + * the list and deleting the entry. + */ + list_del(&entry->list); + list_for_each_entry_safe(pqp, tmp_pqp, + &entry->duplicates, + list) { + list_del(&pqp->list); + kfree(pqp); + } + kfree(entry); + } else { + /* This entry contains duplicates so it shouldn't be removed */ + ret = false; + goto out; + } + } + } + +out: + return ret; +} + +static int add_promisc_qp(struct mlx4_dev *dev, u8 port, + enum mlx4_steer_type steer, u32 qpn) +{ + struct mlx4_steer *s_steer; + struct mlx4_cmd_mailbox *mailbox; + struct mlx4_mgm *mgm; + struct mlx4_steer_index *entry; + struct mlx4_promisc_qp *pqp; + struct mlx4_promisc_qp *dqp; + u32 members_count; + u32 prot; + int i; + bool found; + int err; + struct mlx4_priv *priv = mlx4_priv(dev); + + if (port < 1 || port > dev->caps.num_ports) + return -EINVAL; + + s_steer = &mlx4_priv(dev)->steer[port - 1]; + + mutex_lock(&priv->mcg_table.mutex); + + if (get_promisc_qp(dev, port, steer, qpn)) { + err = 0; /* Noting to do, already exists */ + goto out_mutex; + } + + pqp = kmalloc(sizeof(*pqp), GFP_KERNEL); + if (!pqp) { + err = -ENOMEM; + goto out_mutex; + } + pqp->qpn = qpn; + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) { + err = -ENOMEM; + goto out_alloc; + } + mgm = mailbox->buf; + + if (!(mlx4_is_mfunc(dev) && steer == MLX4_UC_STEER)) { + /* The promisc QP needs to be added for each one of the steering + * entries. If it already exists, needs to be added as + * a duplicate for this entry. + */ + list_for_each_entry(entry, + &s_steer->steer_entries[steer], + list) { + err = mlx4_READ_ENTRY(dev, entry->index, mailbox); + if (err) + goto out_mailbox; + + members_count = be32_to_cpu(mgm->members_count) & + 0xffffff; + prot = be32_to_cpu(mgm->members_count) >> 30; + found = false; + for (i = 0; i < members_count; i++) { + if ((be32_to_cpu(mgm->qp[i]) & + MGM_QPN_MASK) == qpn) { + /* Entry already exists. + * Add to duplicates. + */ + dqp = kmalloc(sizeof(*dqp), GFP_KERNEL); + if (!dqp) { + err = -ENOMEM; + goto out_mailbox; + } + dqp->qpn = qpn; + list_add_tail(&dqp->list, + &entry->duplicates); + found = true; + } + } + if (!found) { + /* Need to add the qpn to mgm */ + if (members_count == + dev->caps.num_qp_per_mgm) { + /* entry is full */ + err = -ENOMEM; + goto out_mailbox; + } + mgm->qp[members_count++] = + cpu_to_be32(qpn & MGM_QPN_MASK); + mgm->members_count = + cpu_to_be32(members_count | + (prot << 30)); + err = mlx4_WRITE_ENTRY(dev, entry->index, + mailbox); + if (err) + goto out_mailbox; + } + } + } + + /* add the new qpn to list of promisc qps */ + list_add_tail(&pqp->list, &s_steer->promisc_qps[steer]); + /* now need to add all the promisc qps to default entry */ + memset(mgm, 0, sizeof(*mgm)); + members_count = 0; + list_for_each_entry(dqp, &s_steer->promisc_qps[steer], list) { + if (members_count == dev->caps.num_qp_per_mgm) { + /* entry is full */ + err = -ENOMEM; + goto out_list; + } + mgm->qp[members_count++] = cpu_to_be32(dqp->qpn & MGM_QPN_MASK); + } + mgm->members_count = cpu_to_be32(members_count | MLX4_PROT_ETH << 30); + + err = mlx4_WRITE_PROMISC(dev, port, steer, mailbox); + if (err) + goto out_list; + + mlx4_free_cmd_mailbox(dev, mailbox); + mutex_unlock(&priv->mcg_table.mutex); + return 0; + +out_list: + list_del(&pqp->list); +out_mailbox: + mlx4_free_cmd_mailbox(dev, mailbox); +out_alloc: + kfree(pqp); +out_mutex: + mutex_unlock(&priv->mcg_table.mutex); + return err; +} + +static int remove_promisc_qp(struct mlx4_dev *dev, u8 port, + enum mlx4_steer_type steer, u32 qpn) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_steer *s_steer; + struct mlx4_cmd_mailbox *mailbox; + struct mlx4_mgm *mgm; + struct mlx4_steer_index *entry, *tmp_entry; + struct mlx4_promisc_qp *pqp; + struct mlx4_promisc_qp *dqp; + u32 members_count; + bool found; + bool back_to_list = false; + int i; + int err; + + if (port < 1 || port > dev->caps.num_ports) + return -EINVAL; + + s_steer = &mlx4_priv(dev)->steer[port - 1]; + mutex_lock(&priv->mcg_table.mutex); + + pqp = get_promisc_qp(dev, port, steer, qpn); + if (unlikely(!pqp)) { + mlx4_warn(dev, "QP %x is not promiscuous QP\n", qpn); + /* nothing to do */ + err = 0; + goto out_mutex; + } + + /*remove from list of promisc qps */ + list_del(&pqp->list); + + /* set the default entry not to include the removed one */ + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) { + err = -ENOMEM; + back_to_list = true; + goto out_list; + } + mgm = mailbox->buf; + members_count = 0; + list_for_each_entry(dqp, &s_steer->promisc_qps[steer], list) + mgm->qp[members_count++] = cpu_to_be32(dqp->qpn & MGM_QPN_MASK); + mgm->members_count = cpu_to_be32(members_count | MLX4_PROT_ETH << 30); + + err = mlx4_WRITE_PROMISC(dev, port, steer, mailbox); + if (err) + goto out_mailbox; + + if (!(mlx4_is_mfunc(dev) && steer == MLX4_UC_STEER)) { + /* Remove the QP from all the steering entries */ + list_for_each_entry_safe(entry, tmp_entry, + &s_steer->steer_entries[steer], + list) { + found = false; + list_for_each_entry(dqp, &entry->duplicates, list) { + if (dqp->qpn == qpn) { + found = true; + break; + } + } + if (found) { + /* A duplicate, no need to change the MGM, + * only update the duplicates list + */ + list_del(&dqp->list); + kfree(dqp); + } else { + int loc = -1; + + err = mlx4_READ_ENTRY(dev, + entry->index, + mailbox); + if (err) + goto out_mailbox; + members_count = + be32_to_cpu(mgm->members_count) & + 0xffffff; + if (!members_count) { + mlx4_warn(dev, "QP %06x wasn't found in entry %x mcount=0. deleting entry...\n", + qpn, entry->index); + list_del(&entry->list); + kfree(entry); + continue; + } + + for (i = 0; i < members_count; ++i) + if ((be32_to_cpu(mgm->qp[i]) & + MGM_QPN_MASK) == qpn) { + loc = i; + break; + } + + if (loc < 0) { + mlx4_err(dev, "QP %06x wasn't found in entry %d\n", + qpn, entry->index); + err = -EINVAL; + goto out_mailbox; + } + + /* Copy the last QP in this MGM + * over removed QP + */ + mgm->qp[loc] = mgm->qp[members_count - 1]; + mgm->qp[members_count - 1] = 0; + mgm->members_count = + cpu_to_be32(--members_count | + (MLX4_PROT_ETH << 30)); + + err = mlx4_WRITE_ENTRY(dev, + entry->index, + mailbox); + if (err) + goto out_mailbox; + } + } + } + +out_mailbox: + mlx4_free_cmd_mailbox(dev, mailbox); +out_list: + if (back_to_list) + list_add_tail(&pqp->list, &s_steer->promisc_qps[steer]); + else + kfree(pqp); +out_mutex: + mutex_unlock(&priv->mcg_table.mutex); + return err; +} + +/* + * Caller must hold MCG table semaphore. gid and mgm parameters must + * be properly aligned for command interface. + * + * Returns 0 unless a firmware command error occurs. + * + * If GID is found in MGM or MGM is empty, *index = *hash, *prev = -1 + * and *mgm holds MGM entry. + * + * if GID is found in AMGM, *index = index in AMGM, *prev = index of + * previous entry in hash chain and *mgm holds AMGM entry. + * + * If no AMGM exists for given gid, *index = -1, *prev = index of last + * entry in hash chain and *mgm holds end of hash chain. + */ +static int find_entry(struct mlx4_dev *dev, u8 port, + u8 *gid, enum mlx4_protocol prot, + struct mlx4_cmd_mailbox *mgm_mailbox, + int *prev, int *index) +{ + struct mlx4_cmd_mailbox *mailbox; + struct mlx4_mgm *mgm = mgm_mailbox->buf; + u8 *mgid; + int err; + u16 hash; + u8 op_mod = (prot == MLX4_PROT_ETH) ? + !!(dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_MC_STEER) : 0; + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) + return -ENOMEM; + mgid = mailbox->buf; + + memcpy(mgid, gid, 16); + + err = mlx4_GID_HASH(dev, mailbox, &hash, op_mod); + mlx4_free_cmd_mailbox(dev, mailbox); + if (err) + return err; + + if (0) + mlx4_dbg(dev, "Hash for %pI6 is %04x\n", gid, hash); + + *index = hash; + *prev = -1; + + do { + err = mlx4_READ_ENTRY(dev, *index, mgm_mailbox); + if (err) + return err; + + if (!(be32_to_cpu(mgm->members_count) & 0xffffff)) { + if (*index != hash) { + mlx4_err(dev, "Found zero MGID in AMGM\n"); + err = -EINVAL; + } + return err; + } + + if (!memcmp(mgm->gid, gid, 16) && + be32_to_cpu(mgm->members_count) >> 30 == prot) + return err; + + *prev = *index; + *index = be32_to_cpu(mgm->next_gid_index) >> 6; + } while (*index); + + *index = -1; + return err; +} + +static const u8 __promisc_mode[] = { + [MLX4_FS_REGULAR] = 0x0, + [MLX4_FS_ALL_DEFAULT] = 0x1, + [MLX4_FS_MC_DEFAULT] = 0x3, + [MLX4_FS_MIRROR_RX_PORT] = 0x4, + [MLX4_FS_MIRROR_SX_PORT] = 0x5, + [MLX4_FS_UC_SNIFFER] = 0x6, + [MLX4_FS_MC_SNIFFER] = 0x7, +}; + +int mlx4_map_sw_to_hw_steering_mode(struct mlx4_dev *dev, + enum mlx4_net_trans_promisc_mode flow_type) +{ + if (flow_type >= MLX4_FS_MODE_NUM) { + mlx4_err(dev, "Invalid flow type. type = %d\n", flow_type); + return -EINVAL; + } + return __promisc_mode[flow_type]; +} +EXPORT_SYMBOL_GPL(mlx4_map_sw_to_hw_steering_mode); + +static void trans_rule_ctrl_to_hw(struct mlx4_net_trans_rule *ctrl, + struct mlx4_net_trans_rule_hw_ctrl *hw) +{ + u8 flags = 0; + + flags = ctrl->queue_mode == MLX4_NET_TRANS_Q_LIFO ? 1 : 0; + flags |= ctrl->exclusive ? (1 << 2) : 0; + flags |= ctrl->allow_loopback ? (1 << 3) : 0; + + hw->flags = flags; + hw->type = __promisc_mode[ctrl->promisc_mode]; + hw->prio = cpu_to_be16(ctrl->priority); + hw->port = ctrl->port; + hw->qpn = cpu_to_be32(ctrl->qpn); +} + +const u16 __sw_id_hw[] = { + [MLX4_NET_TRANS_RULE_ID_ETH] = 0xE001, + [MLX4_NET_TRANS_RULE_ID_IB] = 0xE005, + [MLX4_NET_TRANS_RULE_ID_IPV6] = 0xE003, + [MLX4_NET_TRANS_RULE_ID_IPV4] = 0xE002, + [MLX4_NET_TRANS_RULE_ID_TCP] = 0xE004, + [MLX4_NET_TRANS_RULE_ID_UDP] = 0xE006, + [MLX4_NET_TRANS_RULE_ID_VXLAN] = 0xE008 +}; + +int mlx4_map_sw_to_hw_steering_id(struct mlx4_dev *dev, + enum mlx4_net_trans_rule_id id) +{ + if (id >= MLX4_NET_TRANS_RULE_NUM) { + mlx4_err(dev, "Invalid network rule id. id = %d\n", id); + return -EINVAL; + } + return __sw_id_hw[id]; +} +EXPORT_SYMBOL_GPL(mlx4_map_sw_to_hw_steering_id); + +static const int __rule_hw_sz[] = { + [MLX4_NET_TRANS_RULE_ID_ETH] = + sizeof(struct mlx4_net_trans_rule_hw_eth), + [MLX4_NET_TRANS_RULE_ID_IB] = + sizeof(struct mlx4_net_trans_rule_hw_ib), + [MLX4_NET_TRANS_RULE_ID_IPV6] = 0, + [MLX4_NET_TRANS_RULE_ID_IPV4] = + sizeof(struct mlx4_net_trans_rule_hw_ipv4), + [MLX4_NET_TRANS_RULE_ID_TCP] = + sizeof(struct mlx4_net_trans_rule_hw_tcp_udp), + [MLX4_NET_TRANS_RULE_ID_UDP] = + sizeof(struct mlx4_net_trans_rule_hw_tcp_udp), + [MLX4_NET_TRANS_RULE_ID_VXLAN] = + sizeof(struct mlx4_net_trans_rule_hw_vxlan) +}; + +int mlx4_hw_rule_sz(struct mlx4_dev *dev, + enum mlx4_net_trans_rule_id id) +{ + if (id >= MLX4_NET_TRANS_RULE_NUM) { + mlx4_err(dev, "Invalid network rule id. id = %d\n", id); + return -EINVAL; + } + + return __rule_hw_sz[id]; +} +EXPORT_SYMBOL_GPL(mlx4_hw_rule_sz); + +static int parse_trans_rule(struct mlx4_dev *dev, struct mlx4_spec_list *spec, + struct _rule_hw *rule_hw) +{ + if (mlx4_hw_rule_sz(dev, spec->id) < 0) + return -EINVAL; + memset(rule_hw, 0, mlx4_hw_rule_sz(dev, spec->id)); + rule_hw->id = cpu_to_be16(__sw_id_hw[spec->id]); + rule_hw->size = mlx4_hw_rule_sz(dev, spec->id) >> 2; + + switch (spec->id) { + case MLX4_NET_TRANS_RULE_ID_ETH: + memcpy(rule_hw->eth.dst_mac, spec->eth.dst_mac, ETH_ALEN); + memcpy(rule_hw->eth.dst_mac_msk, spec->eth.dst_mac_msk, + ETH_ALEN); + memcpy(rule_hw->eth.src_mac, spec->eth.src_mac, ETH_ALEN); + memcpy(rule_hw->eth.src_mac_msk, spec->eth.src_mac_msk, + ETH_ALEN); + if (spec->eth.ether_type_enable) { + rule_hw->eth.ether_type_enable = 1; + rule_hw->eth.ether_type = spec->eth.ether_type; + } + rule_hw->eth.vlan_tag = spec->eth.vlan_id; + rule_hw->eth.vlan_tag_msk = spec->eth.vlan_id_msk; + break; + + case MLX4_NET_TRANS_RULE_ID_IB: + rule_hw->ib.l3_qpn = spec->ib.l3_qpn; + rule_hw->ib.qpn_mask = spec->ib.qpn_msk; + memcpy(&rule_hw->ib.dst_gid, &spec->ib.dst_gid, 16); + memcpy(&rule_hw->ib.dst_gid_msk, &spec->ib.dst_gid_msk, 16); + break; + + case MLX4_NET_TRANS_RULE_ID_IPV6: + return -EOPNOTSUPP; + + case MLX4_NET_TRANS_RULE_ID_IPV4: + rule_hw->ipv4.src_ip = spec->ipv4.src_ip; + rule_hw->ipv4.src_ip_msk = spec->ipv4.src_ip_msk; + rule_hw->ipv4.dst_ip = spec->ipv4.dst_ip; + rule_hw->ipv4.dst_ip_msk = spec->ipv4.dst_ip_msk; + break; + + case MLX4_NET_TRANS_RULE_ID_TCP: + case MLX4_NET_TRANS_RULE_ID_UDP: + rule_hw->tcp_udp.dst_port = spec->tcp_udp.dst_port; + rule_hw->tcp_udp.dst_port_msk = spec->tcp_udp.dst_port_msk; + rule_hw->tcp_udp.src_port = spec->tcp_udp.src_port; + rule_hw->tcp_udp.src_port_msk = spec->tcp_udp.src_port_msk; + break; + + case MLX4_NET_TRANS_RULE_ID_VXLAN: + rule_hw->vxlan.vni = + cpu_to_be32(be32_to_cpu(spec->vxlan.vni) << 8); + rule_hw->vxlan.vni_mask = + cpu_to_be32(be32_to_cpu(spec->vxlan.vni_mask) << 8); + break; + + default: + return -EINVAL; + } + + return __rule_hw_sz[spec->id]; +} + +static void mlx4_err_rule(struct mlx4_dev *dev, char *str, + struct mlx4_net_trans_rule *rule) +{ +#define BUF_SIZE 256 + struct mlx4_spec_list *cur; + char buf[BUF_SIZE]; + int len = 0; + + mlx4_err(dev, "%s", str); + len += scnprintf(buf + len, BUF_SIZE - len, + "port = %d prio = 0x%x qp = 0x%x ", + rule->port, rule->priority, rule->qpn); + + list_for_each_entry(cur, &rule->list, list) { + switch (cur->id) { + case MLX4_NET_TRANS_RULE_ID_ETH: + len += scnprintf(buf + len, BUF_SIZE - len, + "dmac = %pM ", &cur->eth.dst_mac); + if (cur->eth.ether_type) + len += scnprintf(buf + len, BUF_SIZE - len, + "ethertype = 0x%x ", + be16_to_cpu(cur->eth.ether_type)); + if (cur->eth.vlan_id) + len += scnprintf(buf + len, BUF_SIZE - len, + "vlan-id = %d ", + be16_to_cpu(cur->eth.vlan_id)); + break; + + case MLX4_NET_TRANS_RULE_ID_IPV4: + if (cur->ipv4.src_ip) + len += scnprintf(buf + len, BUF_SIZE - len, + "src-ip = %pI4 ", + &cur->ipv4.src_ip); + if (cur->ipv4.dst_ip) + len += scnprintf(buf + len, BUF_SIZE - len, + "dst-ip = %pI4 ", + &cur->ipv4.dst_ip); + break; + + case MLX4_NET_TRANS_RULE_ID_TCP: + case MLX4_NET_TRANS_RULE_ID_UDP: + if (cur->tcp_udp.src_port) + len += scnprintf(buf + len, BUF_SIZE - len, + "src-port = %d ", + be16_to_cpu(cur->tcp_udp.src_port)); + if (cur->tcp_udp.dst_port) + len += scnprintf(buf + len, BUF_SIZE - len, + "dst-port = %d ", + be16_to_cpu(cur->tcp_udp.dst_port)); + break; + + case MLX4_NET_TRANS_RULE_ID_IB: + len += scnprintf(buf + len, BUF_SIZE - len, + "dst-gid = %pI6\n", cur->ib.dst_gid); + len += scnprintf(buf + len, BUF_SIZE - len, + "dst-gid-mask = %pI6\n", + cur->ib.dst_gid_msk); + break; + + case MLX4_NET_TRANS_RULE_ID_VXLAN: + len += scnprintf(buf + len, BUF_SIZE - len, + "VNID = %d ", be32_to_cpu(cur->vxlan.vni)); + break; + case MLX4_NET_TRANS_RULE_ID_IPV6: + break; + + default: + break; + } + } + len += scnprintf(buf + len, BUF_SIZE - len, "\n"); + mlx4_err(dev, "%s", buf); + + if (len >= BUF_SIZE) + mlx4_err(dev, "Network rule error message was truncated, print buffer is too small\n"); +} + +int mlx4_flow_attach(struct mlx4_dev *dev, + struct mlx4_net_trans_rule *rule, u64 *reg_id) +{ + struct mlx4_cmd_mailbox *mailbox; + struct mlx4_spec_list *cur; + u32 size = 0; + int ret; + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + + if (!mlx4_qp_lookup(dev, rule->qpn)) { + mlx4_err_rule(dev, "QP doesn't exist\n", rule); + ret = -EINVAL; + goto out; + } + + trans_rule_ctrl_to_hw(rule, mailbox->buf); + + size += sizeof(struct mlx4_net_trans_rule_hw_ctrl); + + list_for_each_entry(cur, &rule->list, list) { + ret = parse_trans_rule(dev, cur, mailbox->buf + size); + if (ret < 0) + goto out; + + size += ret; + } + + ret = mlx4_QP_FLOW_STEERING_ATTACH(dev, mailbox, size >> 2, reg_id); + if (ret == -ENOMEM) { + mlx4_err_rule(dev, + "mcg table is full. Fail to register network rule\n", + rule); + } else if (ret) { + if (ret == -ENXIO) { + if (dev->caps.steering_mode != MLX4_STEERING_MODE_DEVICE_MANAGED) + mlx4_err_rule(dev, + "DMFS is not enabled, " + "failed to register network rule.\n", + rule); + else + mlx4_err_rule(dev, + "Rule exceeds the dmfs_high_rate_mode limitations, " + "failed to register network rule.\n", + rule); + + } else { + mlx4_err_rule(dev, "Fail to register network rule.\n", rule); + } + } + +out: + mlx4_free_cmd_mailbox(dev, mailbox); + + return ret; +} +EXPORT_SYMBOL_GPL(mlx4_flow_attach); + +int mlx4_flow_detach(struct mlx4_dev *dev, u64 reg_id) +{ + int err; + + err = mlx4_QP_FLOW_STEERING_DETACH(dev, reg_id); + if (err) + mlx4_err(dev, "Fail to detach network rule. registration id = 0x%llx\n", + reg_id); + return err; +} +EXPORT_SYMBOL_GPL(mlx4_flow_detach); + +int mlx4_tunnel_steer_add(struct mlx4_dev *dev, const unsigned char *addr, + int port, int qpn, u16 prio, u64 *reg_id) +{ + int err; + struct mlx4_spec_list spec_eth_outer = { {NULL} }; + struct mlx4_spec_list spec_vxlan = { {NULL} }; + struct mlx4_spec_list spec_eth_inner = { {NULL} }; + + struct mlx4_net_trans_rule rule = { + .queue_mode = MLX4_NET_TRANS_Q_FIFO, + .exclusive = 0, + .allow_loopback = 1, + .promisc_mode = MLX4_FS_REGULAR, + }; + + __be64 mac_mask = cpu_to_be64(MLX4_MAC_MASK << 16); + + rule.port = port; + rule.qpn = qpn; + rule.priority = prio; + INIT_LIST_HEAD(&rule.list); + + spec_eth_outer.id = MLX4_NET_TRANS_RULE_ID_ETH; + memcpy(spec_eth_outer.eth.dst_mac, addr, ETH_ALEN); + memcpy(spec_eth_outer.eth.dst_mac_msk, &mac_mask, ETH_ALEN); + + spec_vxlan.id = MLX4_NET_TRANS_RULE_ID_VXLAN; /* any vxlan header */ + spec_eth_inner.id = MLX4_NET_TRANS_RULE_ID_ETH; /* any inner eth header */ + + list_add_tail(&spec_eth_outer.list, &rule.list); + list_add_tail(&spec_vxlan.list, &rule.list); + list_add_tail(&spec_eth_inner.list, &rule.list); + + err = mlx4_flow_attach(dev, &rule, reg_id); + return err; +} +EXPORT_SYMBOL(mlx4_tunnel_steer_add); + +int mlx4_FLOW_STEERING_IB_UC_QP_RANGE(struct mlx4_dev *dev, u32 min_range_qpn, + u32 max_range_qpn) +{ + int err; + u64 in_param; + + in_param = ((u64) min_range_qpn) << 32; + in_param |= ((u64) max_range_qpn) & 0xFFFFFFFF; + + err = mlx4_cmd(dev, in_param, 0, 0, + MLX4_FLOW_STEERING_IB_UC_QP_RANGE, + MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); + + return err; +} +EXPORT_SYMBOL_GPL(mlx4_FLOW_STEERING_IB_UC_QP_RANGE); + +int mlx4_qp_attach_common(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16], + int block_mcast_loopback, enum mlx4_protocol prot, + enum mlx4_steer_type steer) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_cmd_mailbox *mailbox; + struct mlx4_mgm *mgm; + u32 members_count; + int index = -1, prev; + int link = 0; + int i; + int err; + u8 port = gid[5]; + u8 new_entry = 0; + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + mgm = mailbox->buf; + + mutex_lock(&priv->mcg_table.mutex); + err = find_entry(dev, port, gid, prot, + mailbox, &prev, &index); + if (err) + goto out; + + if (index != -1) { + if (!(be32_to_cpu(mgm->members_count) & 0xffffff)) { + new_entry = 1; + memcpy(mgm->gid, gid, 16); + } + } else { + link = 1; + + index = mlx4_bitmap_alloc(&priv->mcg_table.bitmap); + if (index == -1) { + mlx4_err(dev, "No AMGM entries left\n"); + err = -ENOMEM; + goto out; + } + index += dev->caps.num_mgms; + + new_entry = 1; + memset(mgm, 0, sizeof(*mgm)); + memcpy(mgm->gid, gid, 16); + } + + members_count = be32_to_cpu(mgm->members_count) & 0xffffff; + if (members_count == dev->caps.num_qp_per_mgm) { + mlx4_err(dev, "MGM at index %x is full\n", index); + err = -ENOMEM; + goto out; + } + + for (i = 0; i < members_count; ++i) + if ((be32_to_cpu(mgm->qp[i]) & MGM_QPN_MASK) == qp->qpn) { + mlx4_dbg(dev, "QP %06x already a member of MGM\n", qp->qpn); + err = 0; + goto out; + } + + if (block_mcast_loopback) + mgm->qp[members_count++] = cpu_to_be32((qp->qpn & MGM_QPN_MASK) | + (1U << MGM_BLCK_LB_BIT)); + else + mgm->qp[members_count++] = cpu_to_be32(qp->qpn & MGM_QPN_MASK); + + mgm->members_count = cpu_to_be32(members_count | (u32) prot << 30); + + err = mlx4_WRITE_ENTRY(dev, index, mailbox); + if (err) + goto out; + + if (!link) + goto out; + + err = mlx4_READ_ENTRY(dev, prev, mailbox); + if (err) + goto out; + + mgm->next_gid_index = cpu_to_be32(index << 6); + + err = mlx4_WRITE_ENTRY(dev, prev, mailbox); + if (err) + goto out; + +out: + if (prot == MLX4_PROT_ETH && index != -1) { + /* manage the steering entry for promisc mode */ + if (new_entry) + err = new_steering_entry(dev, port, steer, + index, qp->qpn); + else + err = existing_steering_entry(dev, port, steer, + index, qp->qpn); + } + if (err && link && index != -1) { + if (index < dev->caps.num_mgms) + mlx4_warn(dev, "Got AMGM index %d < %d\n", + index, dev->caps.num_mgms); + else + mlx4_bitmap_free(&priv->mcg_table.bitmap, + index - dev->caps.num_mgms, MLX4_USE_RR); + } + mutex_unlock(&priv->mcg_table.mutex); + + mlx4_free_cmd_mailbox(dev, mailbox); + return err; +} + +int mlx4_qp_detach_common(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16], + enum mlx4_protocol prot, enum mlx4_steer_type steer) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_cmd_mailbox *mailbox; + struct mlx4_mgm *mgm; + u32 members_count; + int prev, index; + int i, loc = -1; + int err; + u8 port = gid[5]; + bool removed_entry = false; + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + mgm = mailbox->buf; + + mutex_lock(&priv->mcg_table.mutex); + + err = find_entry(dev, port, gid, prot, + mailbox, &prev, &index); + if (err) + goto out; + + if (index == -1) { + mlx4_err(dev, "MGID %pI6 not found\n", gid); + err = -EINVAL; + goto out; + } + + /* If this QP is also a promisc QP, it shouldn't be removed only if + * at least one none promisc QP is also attached to this MCG + */ + if (prot == MLX4_PROT_ETH && + check_duplicate_entry(dev, port, steer, index, qp->qpn) && + !promisc_steering_entry(dev, port, steer, index, qp->qpn, NULL)) + goto out; + + members_count = be32_to_cpu(mgm->members_count) & 0xffffff; + for (i = 0; i < members_count; ++i) + if ((be32_to_cpu(mgm->qp[i]) & MGM_QPN_MASK) == qp->qpn) { + loc = i; + break; + } + + if (loc == -1) { + mlx4_err(dev, "QP %06x not found in MGM\n", qp->qpn); + err = -EINVAL; + goto out; + } + + /* copy the last QP in this MGM over removed QP */ + mgm->qp[loc] = mgm->qp[members_count - 1]; + mgm->qp[members_count - 1] = 0; + mgm->members_count = cpu_to_be32(--members_count | (u32) prot << 30); + + if (prot == MLX4_PROT_ETH) + removed_entry = can_remove_steering_entry(dev, port, steer, + index, qp->qpn); + if (members_count && (prot != MLX4_PROT_ETH || !removed_entry)) { + err = mlx4_WRITE_ENTRY(dev, index, mailbox); + goto out; + } + + /* We are going to delete the entry, members count should be 0 */ + mgm->members_count = cpu_to_be32((u32) prot << 30); + + if (prev == -1) { + /* Remove entry from MGM */ + int amgm_index = be32_to_cpu(mgm->next_gid_index) >> 6; + if (amgm_index) { + err = mlx4_READ_ENTRY(dev, amgm_index, mailbox); + if (err) + goto out; + } else + memset(mgm->gid, 0, 16); + + err = mlx4_WRITE_ENTRY(dev, index, mailbox); + if (err) + goto out; + + if (amgm_index) { + if (amgm_index < dev->caps.num_mgms) + mlx4_warn(dev, "MGM entry %d had AMGM index %d < %d\n", + index, amgm_index, dev->caps.num_mgms); + else + mlx4_bitmap_free(&priv->mcg_table.bitmap, + amgm_index - dev->caps.num_mgms, MLX4_USE_RR); + } + } else { + /* Remove entry from AMGM */ + int cur_next_index = be32_to_cpu(mgm->next_gid_index) >> 6; + err = mlx4_READ_ENTRY(dev, prev, mailbox); + if (err) + goto out; + + mgm->next_gid_index = cpu_to_be32(cur_next_index << 6); + + err = mlx4_WRITE_ENTRY(dev, prev, mailbox); + if (err) + goto out; + + if (index < dev->caps.num_mgms) + mlx4_warn(dev, "entry %d had next AMGM index %d < %d\n", + prev, index, dev->caps.num_mgms); + else + mlx4_bitmap_free(&priv->mcg_table.bitmap, + index - dev->caps.num_mgms, MLX4_USE_RR); + } + +out: + mutex_unlock(&priv->mcg_table.mutex); + + mlx4_free_cmd_mailbox(dev, mailbox); + if (err && dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR) + /* In case device is under an error, return success as a closing command */ + err = 0; + return err; +} + +static int mlx4_QP_ATTACH(struct mlx4_dev *dev, struct mlx4_qp *qp, + u8 gid[16], u8 attach, u8 block_loopback, + enum mlx4_protocol prot) +{ + struct mlx4_cmd_mailbox *mailbox; + int err = 0; + int qpn; + + if (!mlx4_is_mfunc(dev)) + return -EBADF; + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + + memcpy(mailbox->buf, gid, 16); + qpn = qp->qpn; + qpn |= (prot << 28); + if (attach && block_loopback) + qpn |= (1 << 31); + + err = mlx4_cmd(dev, mailbox->dma, qpn, attach, + MLX4_CMD_QP_ATTACH, MLX4_CMD_TIME_CLASS_A, + MLX4_CMD_WRAPPED); + + mlx4_free_cmd_mailbox(dev, mailbox); + if (err && !attach && + dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR) + err = 0; + return err; +} + +int mlx4_trans_to_dmfs_attach(struct mlx4_dev *dev, struct mlx4_qp *qp, + u8 gid[16], u8 port, + int block_mcast_loopback, + enum mlx4_protocol prot, u64 *reg_id) +{ + struct mlx4_spec_list spec = { {NULL} }; + __be64 mac_mask = cpu_to_be64(MLX4_MAC_MASK << 16); + + struct mlx4_net_trans_rule rule = { + .queue_mode = MLX4_NET_TRANS_Q_FIFO, + .exclusive = 0, + .promisc_mode = MLX4_FS_REGULAR, + .priority = MLX4_DOMAIN_NIC, + }; + + rule.allow_loopback = !block_mcast_loopback; + rule.port = port; + rule.qpn = qp->qpn; + INIT_LIST_HEAD(&rule.list); + + switch (prot) { + case MLX4_PROT_ETH: + spec.id = MLX4_NET_TRANS_RULE_ID_ETH; + memcpy(spec.eth.dst_mac, &gid[10], ETH_ALEN); + memcpy(spec.eth.dst_mac_msk, &mac_mask, ETH_ALEN); + break; + + case MLX4_PROT_IB_IPV6: + spec.id = MLX4_NET_TRANS_RULE_ID_IB; + memcpy(spec.ib.dst_gid, gid, 16); + memset(&spec.ib.dst_gid_msk, 0xff, 16); + break; + default: + return -EINVAL; + } + list_add_tail(&spec.list, &rule.list); + + return mlx4_flow_attach(dev, &rule, reg_id); +} + +int mlx4_multicast_attach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16], + u8 port, int block_mcast_loopback, + enum mlx4_protocol prot, u64 *reg_id) +{ + switch (dev->caps.steering_mode) { + case MLX4_STEERING_MODE_A0: + if (prot == MLX4_PROT_ETH) + return 0; + fallthrough; + + case MLX4_STEERING_MODE_B0: + if (prot == MLX4_PROT_ETH) + gid[7] |= (MLX4_MC_STEER << 1); + + if (mlx4_is_mfunc(dev)) + return mlx4_QP_ATTACH(dev, qp, gid, 1, + block_mcast_loopback, prot); + return mlx4_qp_attach_common(dev, qp, gid, + block_mcast_loopback, prot, + MLX4_MC_STEER); + + case MLX4_STEERING_MODE_DEVICE_MANAGED: + return mlx4_trans_to_dmfs_attach(dev, qp, gid, port, + block_mcast_loopback, + prot, reg_id); + default: + return -EINVAL; + } +} +EXPORT_SYMBOL_GPL(mlx4_multicast_attach); + +int mlx4_multicast_detach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16], + enum mlx4_protocol prot, u64 reg_id) +{ + switch (dev->caps.steering_mode) { + case MLX4_STEERING_MODE_A0: + if (prot == MLX4_PROT_ETH) + return 0; + fallthrough; + + case MLX4_STEERING_MODE_B0: + if (prot == MLX4_PROT_ETH) + gid[7] |= (MLX4_MC_STEER << 1); + + if (mlx4_is_mfunc(dev)) + return mlx4_QP_ATTACH(dev, qp, gid, 0, 0, prot); + + return mlx4_qp_detach_common(dev, qp, gid, prot, + MLX4_MC_STEER); + + case MLX4_STEERING_MODE_DEVICE_MANAGED: + return mlx4_flow_detach(dev, reg_id); + + default: + return -EINVAL; + } +} +EXPORT_SYMBOL_GPL(mlx4_multicast_detach); + +int mlx4_flow_steer_promisc_add(struct mlx4_dev *dev, u8 port, + u32 qpn, enum mlx4_net_trans_promisc_mode mode) +{ + struct mlx4_net_trans_rule rule = { + .queue_mode = MLX4_NET_TRANS_Q_FIFO, + .exclusive = 0, + .allow_loopback = 1, + }; + + u64 *regid_p; + + switch (mode) { + case MLX4_FS_ALL_DEFAULT: + regid_p = &dev->regid_promisc_array[port]; + break; + case MLX4_FS_MC_DEFAULT: + regid_p = &dev->regid_allmulti_array[port]; + break; + default: + return -1; + } + + if (*regid_p != 0) + return -1; + + rule.promisc_mode = mode; + rule.port = port; + rule.qpn = qpn; + INIT_LIST_HEAD(&rule.list); + mlx4_info(dev, "going promisc on %x\n", port); + + return mlx4_flow_attach(dev, &rule, regid_p); +} +EXPORT_SYMBOL_GPL(mlx4_flow_steer_promisc_add); + +int mlx4_flow_steer_promisc_remove(struct mlx4_dev *dev, u8 port, + enum mlx4_net_trans_promisc_mode mode) +{ + int ret; + u64 *regid_p; + + switch (mode) { + case MLX4_FS_ALL_DEFAULT: + regid_p = &dev->regid_promisc_array[port]; + break; + case MLX4_FS_MC_DEFAULT: + regid_p = &dev->regid_allmulti_array[port]; + break; + default: + return -1; + } + + if (*regid_p == 0) + return -1; + + ret = mlx4_flow_detach(dev, *regid_p); + if (ret == 0) + *regid_p = 0; + + return ret; +} +EXPORT_SYMBOL_GPL(mlx4_flow_steer_promisc_remove); + +int mlx4_unicast_attach(struct mlx4_dev *dev, + struct mlx4_qp *qp, u8 gid[16], + int block_mcast_loopback, enum mlx4_protocol prot) +{ + if (prot == MLX4_PROT_ETH) + gid[7] |= (MLX4_UC_STEER << 1); + + if (mlx4_is_mfunc(dev)) + return mlx4_QP_ATTACH(dev, qp, gid, 1, + block_mcast_loopback, prot); + + return mlx4_qp_attach_common(dev, qp, gid, block_mcast_loopback, + prot, MLX4_UC_STEER); +} +EXPORT_SYMBOL_GPL(mlx4_unicast_attach); + +int mlx4_unicast_detach(struct mlx4_dev *dev, struct mlx4_qp *qp, + u8 gid[16], enum mlx4_protocol prot) +{ + if (prot == MLX4_PROT_ETH) + gid[7] |= (MLX4_UC_STEER << 1); + + if (mlx4_is_mfunc(dev)) + return mlx4_QP_ATTACH(dev, qp, gid, 0, 0, prot); + + return mlx4_qp_detach_common(dev, qp, gid, prot, MLX4_UC_STEER); +} +EXPORT_SYMBOL_GPL(mlx4_unicast_detach); + +int mlx4_PROMISC_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd) +{ + u32 qpn = (u32) vhcr->in_param & 0xffffffff; + int port = mlx4_slave_convert_port(dev, slave, vhcr->in_param >> 62); + enum mlx4_steer_type steer = vhcr->in_modifier; + + if (port < 0) + return -EINVAL; + + /* Promiscuous unicast is not allowed in mfunc */ + if (mlx4_is_mfunc(dev) && steer == MLX4_UC_STEER) + return 0; + + if (vhcr->op_modifier) + return add_promisc_qp(dev, port, steer, qpn); + else + return remove_promisc_qp(dev, port, steer, qpn); +} + +static int mlx4_PROMISC(struct mlx4_dev *dev, u32 qpn, + enum mlx4_steer_type steer, u8 add, u8 port) +{ + return mlx4_cmd(dev, (u64) qpn | (u64) port << 62, (u32) steer, add, + MLX4_CMD_PROMISC, MLX4_CMD_TIME_CLASS_A, + MLX4_CMD_WRAPPED); +} + +int mlx4_multicast_promisc_add(struct mlx4_dev *dev, u32 qpn, u8 port) +{ + if (mlx4_is_mfunc(dev)) + return mlx4_PROMISC(dev, qpn, MLX4_MC_STEER, 1, port); + + return add_promisc_qp(dev, port, MLX4_MC_STEER, qpn); +} +EXPORT_SYMBOL_GPL(mlx4_multicast_promisc_add); + +int mlx4_multicast_promisc_remove(struct mlx4_dev *dev, u32 qpn, u8 port) +{ + if (mlx4_is_mfunc(dev)) + return mlx4_PROMISC(dev, qpn, MLX4_MC_STEER, 0, port); + + return remove_promisc_qp(dev, port, MLX4_MC_STEER, qpn); +} +EXPORT_SYMBOL_GPL(mlx4_multicast_promisc_remove); + +int mlx4_unicast_promisc_add(struct mlx4_dev *dev, u32 qpn, u8 port) +{ + if (mlx4_is_mfunc(dev)) + return mlx4_PROMISC(dev, qpn, MLX4_UC_STEER, 1, port); + + return add_promisc_qp(dev, port, MLX4_UC_STEER, qpn); +} +EXPORT_SYMBOL_GPL(mlx4_unicast_promisc_add); + +int mlx4_unicast_promisc_remove(struct mlx4_dev *dev, u32 qpn, u8 port) +{ + if (mlx4_is_mfunc(dev)) + return mlx4_PROMISC(dev, qpn, MLX4_UC_STEER, 0, port); + + return remove_promisc_qp(dev, port, MLX4_UC_STEER, qpn); +} +EXPORT_SYMBOL_GPL(mlx4_unicast_promisc_remove); + +int mlx4_init_mcg_table(struct mlx4_dev *dev) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + int err; + + /* No need for mcg_table when fw managed the mcg table*/ + if (dev->caps.steering_mode == + MLX4_STEERING_MODE_DEVICE_MANAGED) + return 0; + err = mlx4_bitmap_init(&priv->mcg_table.bitmap, dev->caps.num_amgms, + dev->caps.num_amgms - 1, 0, 0); + if (err) + return err; + + mutex_init(&priv->mcg_table.mutex); + + return 0; +} + +void mlx4_cleanup_mcg_table(struct mlx4_dev *dev) +{ + if (dev->caps.steering_mode != + MLX4_STEERING_MODE_DEVICE_MANAGED) + mlx4_bitmap_cleanup(&mlx4_priv(dev)->mcg_table.bitmap); +} diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h new file mode 100644 index 000000000..6ccf34066 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h @@ -0,0 +1,1486 @@ +/* + * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2005, 2006, 2007 Cisco Systems. All rights reserved. + * Copyright (c) 2005, 2006, 2007, 2008 Mellanox Technologies. All rights reserved. + * Copyright (c) 2004 Voltaire, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef MLX4_H +#define MLX4_H + +#include <linux/mutex.h> +#include <linux/radix-tree.h> +#include <linux/rbtree.h> +#include <linux/timer.h> +#include <linux/semaphore.h> +#include <linux/workqueue.h> +#include <linux/interrupt.h> +#include <linux/spinlock.h> +#include <net/devlink.h> +#include <linux/rwsem.h> + +#include <linux/mlx4/device.h> +#include <linux/mlx4/driver.h> +#include <linux/mlx4/doorbell.h> +#include <linux/mlx4/cmd.h> +#include "fw_qos.h" + +#define DRV_NAME "mlx4_core" +#define DRV_VERSION "4.0-0" +#define DRV_NAME_FOR_FW "Linux," DRV_NAME "," DRV_VERSION + +#define MLX4_FS_UDP_UC_EN (1 << 1) +#define MLX4_FS_TCP_UC_EN (1 << 2) +#define MLX4_FS_NUM_OF_L2_ADDR 8 +#define MLX4_FS_MGM_LOG_ENTRY_SIZE 7 +#define MLX4_FS_NUM_MCG (1 << 17) + +#define INIT_HCA_TPT_MW_ENABLE (1 << 7) + +#define MLX4_QUERY_IF_STAT_RESET BIT(31) + +enum { + MLX4_HCR_BASE = 0x80680, + MLX4_HCR_SIZE = 0x0001c, + MLX4_CLR_INT_SIZE = 0x00008, + MLX4_SLAVE_COMM_BASE = 0x0, + MLX4_COMM_PAGESIZE = 0x1000, + MLX4_CLOCK_SIZE = 0x00008, + MLX4_COMM_CHAN_CAPS = 0x8, + MLX4_COMM_CHAN_FLAGS = 0xc +}; + +enum { + MLX4_DEFAULT_MGM_LOG_ENTRY_SIZE = 10, + MLX4_MIN_MGM_LOG_ENTRY_SIZE = 7, + MLX4_MAX_MGM_LOG_ENTRY_SIZE = 12, + MLX4_MAX_QP_PER_MGM = 4 * ((1 << MLX4_MAX_MGM_LOG_ENTRY_SIZE) / 16 - 2), +}; + +enum { + MLX4_NUM_PDS = 1 << 15 +}; + +enum { + MLX4_CMPT_TYPE_QP = 0, + MLX4_CMPT_TYPE_SRQ = 1, + MLX4_CMPT_TYPE_CQ = 2, + MLX4_CMPT_TYPE_EQ = 3, + MLX4_CMPT_NUM_TYPE +}; + +enum { + MLX4_CMPT_SHIFT = 24, + MLX4_NUM_CMPTS = MLX4_CMPT_NUM_TYPE << MLX4_CMPT_SHIFT +}; + +enum mlx4_mpt_state { + MLX4_MPT_DISABLED = 0, + MLX4_MPT_EN_HW, + MLX4_MPT_EN_SW +}; + +#define MLX4_COMM_TIME 10000 +#define MLX4_COMM_OFFLINE_TIME_OUT 30000 +#define MLX4_COMM_CMD_NA_OP 0x0 + + +enum { + MLX4_COMM_CMD_RESET, + MLX4_COMM_CMD_VHCR0, + MLX4_COMM_CMD_VHCR1, + MLX4_COMM_CMD_VHCR2, + MLX4_COMM_CMD_VHCR_EN, + MLX4_COMM_CMD_VHCR_POST, + MLX4_COMM_CMD_FLR = 254 +}; + +enum { + MLX4_VF_SMI_DISABLED, + MLX4_VF_SMI_ENABLED +}; + +/*The flag indicates that the slave should delay the RESET cmd*/ +#define MLX4_DELAY_RESET_SLAVE 0xbbbbbbb +/*indicates how many retries will be done if we are in the middle of FLR*/ +#define NUM_OF_RESET_RETRIES 10 +#define SLEEP_TIME_IN_RESET (2 * 1000) +enum mlx4_resource { + RES_QP, + RES_CQ, + RES_SRQ, + RES_XRCD, + RES_MPT, + RES_MTT, + RES_MAC, + RES_VLAN, + RES_NPORT_ID, + RES_COUNTER, + RES_FS_RULE, + RES_EQ, + MLX4_NUM_OF_RESOURCE_TYPE +}; + +enum mlx4_alloc_mode { + RES_OP_RESERVE, + RES_OP_RESERVE_AND_MAP, + RES_OP_MAP_ICM, +}; + +enum mlx4_res_tracker_free_type { + RES_TR_FREE_ALL, + RES_TR_FREE_SLAVES_ONLY, + RES_TR_FREE_STRUCTS_ONLY, +}; + +/* + *Virtual HCR structures. + * mlx4_vhcr is the sw representation, in machine endianness + * + * mlx4_vhcr_cmd is the formalized structure, the one that is passed + * to FW to go through communication channel. + * It is big endian, and has the same structure as the physical HCR + * used by command interface + */ +struct mlx4_vhcr { + u64 in_param; + u64 out_param; + u32 in_modifier; + u32 errno; + u16 op; + u16 token; + u8 op_modifier; + u8 e_bit; +}; + +struct mlx4_vhcr_cmd { + __be64 in_param; + __be32 in_modifier; + u32 reserved1; + __be64 out_param; + __be16 token; + u16 reserved; + u8 status; + u8 flags; + __be16 opcode; +}; + +struct mlx4_cmd_info { + u16 opcode; + bool has_inbox; + bool has_outbox; + bool out_is_imm; + bool encode_slave_id; + int (*verify)(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox); + int (*wrapper)(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd); +}; + +#ifdef CONFIG_MLX4_DEBUG +extern int mlx4_debug_level; +#else /* CONFIG_MLX4_DEBUG */ +#define mlx4_debug_level (0) +#endif /* CONFIG_MLX4_DEBUG */ + +#define mlx4_dbg(mdev, format, ...) \ +do { \ + if (mlx4_debug_level) \ + dev_printk(KERN_DEBUG, \ + &(mdev)->persist->pdev->dev, format, \ + ##__VA_ARGS__); \ +} while (0) + +#define mlx4_err(mdev, format, ...) \ + dev_err(&(mdev)->persist->pdev->dev, format, ##__VA_ARGS__) +#define mlx4_info(mdev, format, ...) \ + dev_info(&(mdev)->persist->pdev->dev, format, ##__VA_ARGS__) +#define mlx4_warn(mdev, format, ...) \ + dev_warn(&(mdev)->persist->pdev->dev, format, ##__VA_ARGS__) + +extern int log_mtts_per_seg; +extern int mlx4_internal_err_reset; + +#define MLX4_MAX_NUM_SLAVES (min(MLX4_MAX_NUM_PF + MLX4_MAX_NUM_VF, \ + MLX4_MFUNC_MAX)) +#define ALL_SLAVES 0xff + +struct mlx4_bitmap { + u32 last; + u32 top; + u32 max; + u32 reserved_top; + u32 mask; + u32 avail; + u32 effective_len; + spinlock_t lock; + unsigned long *table; +}; + +struct mlx4_buddy { + unsigned long **bits; + unsigned int *num_free; + u32 max_order; + spinlock_t lock; +}; + +struct mlx4_icm; + +struct mlx4_icm_table { + u64 virt; + int num_icm; + u32 num_obj; + int obj_size; + int lowmem; + int coherent; + struct mutex mutex; + struct mlx4_icm **icm; +}; + +#define MLX4_MPT_FLAG_SW_OWNS (0xfUL << 28) +#define MLX4_MPT_FLAG_FREE (0x3UL << 28) +#define MLX4_MPT_FLAG_MIO (1 << 17) +#define MLX4_MPT_FLAG_BIND_ENABLE (1 << 15) +#define MLX4_MPT_FLAG_PHYSICAL (1 << 9) +#define MLX4_MPT_FLAG_REGION (1 << 8) + +#define MLX4_MPT_PD_MASK (0x1FFFFUL) +#define MLX4_MPT_PD_VF_MASK (0xFE0000UL) +#define MLX4_MPT_PD_FLAG_FAST_REG (1 << 27) +#define MLX4_MPT_PD_FLAG_RAE (1 << 28) +#define MLX4_MPT_PD_FLAG_EN_INV (3 << 24) + +#define MLX4_MPT_QP_FLAG_BOUND_QP (1 << 7) + +#define MLX4_MPT_STATUS_SW 0xF0 +#define MLX4_MPT_STATUS_HW 0x00 + +#define MLX4_CQE_SIZE_MASK_STRIDE 0x3 +#define MLX4_EQE_SIZE_MASK_STRIDE 0x30 + +#define MLX4_EQ_ASYNC 0 +#define MLX4_EQ_TO_CQ_VECTOR(vector) ((vector) - \ + !!((int)(vector) >= MLX4_EQ_ASYNC)) +#define MLX4_CQ_TO_EQ_VECTOR(vector) ((vector) + \ + !!((int)(vector) >= MLX4_EQ_ASYNC)) + +/* + * Must be packed because mtt_seg is 64 bits but only aligned to 32 bits. + */ +struct mlx4_mpt_entry { + __be32 flags; + __be32 qpn; + __be32 key; + __be32 pd_flags; + __be64 start; + __be64 length; + __be32 lkey; + __be32 win_cnt; + u8 reserved1[3]; + u8 mtt_rep; + __be64 mtt_addr; + __be32 mtt_sz; + __be32 entity_size; + __be32 first_byte_offset; +} __packed; + +/* + * Must be packed because start is 64 bits but only aligned to 32 bits. + */ +struct mlx4_eq_context { + __be32 flags; + u16 reserved1[3]; + __be16 page_offset; + u8 log_eq_size; + u8 reserved2[4]; + u8 eq_period; + u8 reserved3; + u8 eq_max_count; + u8 reserved4[3]; + u8 intr; + u8 log_page_size; + u8 reserved5[2]; + u8 mtt_base_addr_h; + __be32 mtt_base_addr_l; + u32 reserved6[2]; + __be32 consumer_index; + __be32 producer_index; + u32 reserved7[4]; +}; + +struct mlx4_cq_context { + __be32 flags; + u16 reserved1[3]; + __be16 page_offset; + __be32 logsize_usrpage; + __be16 cq_period; + __be16 cq_max_count; + u8 reserved2[3]; + u8 comp_eqn; + u8 log_page_size; + u8 reserved3[2]; + u8 mtt_base_addr_h; + __be32 mtt_base_addr_l; + __be32 last_notified_index; + __be32 solicit_producer_index; + __be32 consumer_index; + __be32 producer_index; + u32 reserved4[2]; + __be64 db_rec_addr; +}; + +struct mlx4_srq_context { + __be32 state_logsize_srqn; + u8 logstride; + u8 reserved1; + __be16 xrcd; + __be32 pg_offset_cqn; + u32 reserved2; + u8 log_page_size; + u8 reserved3[2]; + u8 mtt_base_addr_h; + __be32 mtt_base_addr_l; + __be32 pd; + __be16 limit_watermark; + __be16 wqe_cnt; + u16 reserved4; + __be16 wqe_counter; + u32 reserved5; + __be64 db_rec_addr; +}; + +struct mlx4_eq_tasklet { + struct list_head list; + struct list_head process_list; + struct tasklet_struct task; + /* lock on completion tasklet list */ + spinlock_t lock; +}; + +struct mlx4_eq { + struct mlx4_dev *dev; + void __iomem *doorbell; + int eqn; + u32 cons_index; + u16 irq; + u16 have_irq; + int nent; + struct mlx4_buf_list *page_list; + struct mlx4_mtt mtt; + struct mlx4_eq_tasklet tasklet_ctx; + struct mlx4_active_ports actv_ports; + u32 ref_count; + cpumask_var_t affinity_mask; +}; + +struct mlx4_slave_eqe { + u8 type; + u8 port; + u32 param; +}; + +struct mlx4_slave_event_eq_info { + int eqn; + u16 token; +}; + +struct mlx4_profile { + int num_qp; + int rdmarc_per_qp; + int num_srq; + int num_cq; + int num_mcg; + int num_mpt; + unsigned num_mtt; +}; + +struct mlx4_fw { + u64 clr_int_base; + u64 catas_offset; + u64 comm_base; + u64 clock_offset; + struct mlx4_icm *fw_icm; + struct mlx4_icm *aux_icm; + u32 catas_size; + u16 fw_pages; + u8 clr_int_bar; + u8 catas_bar; + u8 comm_bar; + u8 clock_bar; +}; + +struct mlx4_comm { + u32 slave_write; + u32 slave_read; +}; + +enum { + MLX4_MCAST_CONFIG = 0, + MLX4_MCAST_DISABLE = 1, + MLX4_MCAST_ENABLE = 2, +}; + +#define VLAN_FLTR_SIZE 128 + +struct mlx4_vlan_fltr { + __be32 entry[VLAN_FLTR_SIZE]; +}; + +struct mlx4_mcast_entry { + struct list_head list; + u64 addr; +}; + +struct mlx4_promisc_qp { + struct list_head list; + u32 qpn; +}; + +struct mlx4_steer_index { + struct list_head list; + unsigned int index; + struct list_head duplicates; +}; + +#define MLX4_EVENT_TYPES_NUM 64 + +struct mlx4_slave_state { + u8 comm_toggle; + u8 last_cmd; + u8 init_port_mask; + bool active; + bool old_vlan_api; + bool vst_qinq_supported; + u8 function; + dma_addr_t vhcr_dma; + u16 user_mtu[MLX4_MAX_PORTS + 1]; + u16 mtu[MLX4_MAX_PORTS + 1]; + __be32 ib_cap_mask[MLX4_MAX_PORTS + 1]; + struct mlx4_slave_eqe eq[MLX4_MFUNC_MAX_EQES]; + struct list_head mcast_filters[MLX4_MAX_PORTS + 1]; + struct mlx4_vlan_fltr *vlan_filter[MLX4_MAX_PORTS + 1]; + /* event type to eq number lookup */ + struct mlx4_slave_event_eq_info event_eq[MLX4_EVENT_TYPES_NUM]; + u16 eq_pi; + u16 eq_ci; + spinlock_t lock; + /*initialized via the kzalloc*/ + u8 is_slave_going_down; + u32 cookie; + enum slave_port_state port_state[MLX4_MAX_PORTS + 1]; +}; + +#define MLX4_VGT 4095 +#define NO_INDX (-1) + +struct mlx4_vport_state { + u64 mac; + u16 default_vlan; + u8 default_qos; + __be16 vlan_proto; + u32 tx_rate; + bool spoofchk; + u32 link_state; + u8 qos_vport; + __be64 guid; +}; + +struct mlx4_vf_admin_state { + struct mlx4_vport_state vport[MLX4_MAX_PORTS + 1]; + u8 enable_smi[MLX4_MAX_PORTS + 1]; +}; + +struct mlx4_vport_oper_state { + struct mlx4_vport_state state; + int mac_idx; + int vlan_idx; +}; + +struct mlx4_vf_oper_state { + struct mlx4_vport_oper_state vport[MLX4_MAX_PORTS + 1]; + u8 smi_enabled[MLX4_MAX_PORTS + 1]; +}; + +struct slave_list { + struct mutex mutex; + struct list_head res_list[MLX4_NUM_OF_RESOURCE_TYPE]; +}; + +struct resource_allocator { + spinlock_t alloc_lock; /* protect quotas */ + union { + unsigned int res_reserved; + unsigned int res_port_rsvd[MLX4_MAX_PORTS]; + }; + union { + int res_free; + int res_port_free[MLX4_MAX_PORTS]; + }; + int *quota; + int *allocated; + int *guaranteed; +}; + +struct mlx4_resource_tracker { + spinlock_t lock; + /* tree for each resources */ + struct rb_root res_tree[MLX4_NUM_OF_RESOURCE_TYPE]; + /* num_of_slave's lists, one per slave */ + struct slave_list *slave_list; + struct resource_allocator res_alloc[MLX4_NUM_OF_RESOURCE_TYPE]; +}; + +#define SLAVE_EVENT_EQ_SIZE 128 +struct mlx4_slave_event_eq { + u32 eqn; + u32 cons; + u32 prod; + spinlock_t event_lock; + struct mlx4_eqe event_eqe[SLAVE_EVENT_EQ_SIZE]; +}; + +struct mlx4_qos_manager { + int num_of_qos_vfs; + DECLARE_BITMAP(priority_bm, MLX4_NUM_UP); +}; + +struct mlx4_master_qp0_state { + int proxy_qp0_active; + int qp0_active; + int port_active; +}; + +struct mlx4_mfunc_master_ctx { + struct mlx4_slave_state *slave_state; + struct mlx4_vf_admin_state *vf_admin; + struct mlx4_vf_oper_state *vf_oper; + struct mlx4_master_qp0_state qp0_state[MLX4_MAX_PORTS + 1]; + int init_port_ref[MLX4_MAX_PORTS + 1]; + u16 max_mtu[MLX4_MAX_PORTS + 1]; + u16 max_user_mtu[MLX4_MAX_PORTS + 1]; + u8 pptx; + u8 pprx; + int disable_mcast_ref[MLX4_MAX_PORTS + 1]; + struct mlx4_resource_tracker res_tracker; + struct workqueue_struct *comm_wq; + struct work_struct comm_work; + struct work_struct slave_event_work; + struct work_struct slave_flr_event_work; + spinlock_t slave_state_lock; + __be32 comm_arm_bit_vector[4]; + struct mlx4_eqe cmd_eqe; + struct mlx4_slave_event_eq slave_eq; + struct mutex gen_eqe_mutex[MLX4_MFUNC_MAX]; + struct mlx4_qos_manager qos_ctl[MLX4_MAX_PORTS + 1]; + u32 next_slave; /* mlx4_master_comm_channel */ +}; + +struct mlx4_mfunc { + struct mlx4_comm __iomem *comm; + struct mlx4_vhcr_cmd *vhcr; + dma_addr_t vhcr_dma; + + struct mlx4_mfunc_master_ctx master; +}; + +#define MGM_QPN_MASK 0x00FFFFFF +#define MGM_BLCK_LB_BIT 30 + +struct mlx4_mgm { + __be32 next_gid_index; + __be32 members_count; + u32 reserved[2]; + u8 gid[16]; + __be32 qp[MLX4_MAX_QP_PER_MGM]; +}; + +struct mlx4_cmd { + struct dma_pool *pool; + void __iomem *hcr; + struct mutex slave_cmd_mutex; + struct semaphore poll_sem; + struct semaphore event_sem; + struct rw_semaphore switch_sem; + int max_cmds; + spinlock_t context_lock; + int free_head; + struct mlx4_cmd_context *context; + u16 token_mask; + u8 use_events; + u8 toggle; + u8 comm_toggle; + u8 initialized; +}; + +enum { + MLX4_VF_IMMED_VLAN_FLAG_VLAN = 1 << 0, + MLX4_VF_IMMED_VLAN_FLAG_QOS = 1 << 1, + MLX4_VF_IMMED_VLAN_FLAG_LINK_DISABLE = 1 << 2, +}; +struct mlx4_vf_immed_vlan_work { + struct work_struct work; + struct mlx4_priv *priv; + int flags; + int slave; + int vlan_ix; + int orig_vlan_ix; + u8 port; + u8 qos; + u8 qos_vport; + u16 vlan_id; + u16 orig_vlan_id; + __be16 vlan_proto; +}; + + +struct mlx4_uar_table { + struct mlx4_bitmap bitmap; +}; + +struct mlx4_mr_table { + struct mlx4_bitmap mpt_bitmap; + struct mlx4_buddy mtt_buddy; + u64 mtt_base; + u64 mpt_base; + struct mlx4_icm_table mtt_table; + struct mlx4_icm_table dmpt_table; +}; + +struct mlx4_cq_table { + struct mlx4_bitmap bitmap; + spinlock_t lock; + struct radix_tree_root tree; + struct mlx4_icm_table table; + struct mlx4_icm_table cmpt_table; +}; + +struct mlx4_eq_table { + struct mlx4_bitmap bitmap; + char *irq_names; + void __iomem *clr_int; + void __iomem **uar_map; + u32 clr_mask; + struct mlx4_eq *eq; + struct mlx4_icm_table table; + struct mlx4_icm_table cmpt_table; + int have_irq; + u8 inta_pin; +}; + +struct mlx4_srq_table { + struct mlx4_bitmap bitmap; + spinlock_t lock; + struct radix_tree_root tree; + struct mlx4_icm_table table; + struct mlx4_icm_table cmpt_table; +}; + +enum mlx4_qp_table_zones { + MLX4_QP_TABLE_ZONE_GENERAL, + MLX4_QP_TABLE_ZONE_RSS, + MLX4_QP_TABLE_ZONE_RAW_ETH, + MLX4_QP_TABLE_ZONE_NUM +}; + +struct mlx4_qp_table { + struct mlx4_bitmap *bitmap_gen; + struct mlx4_zone_allocator *zones; + u32 zones_uids[MLX4_QP_TABLE_ZONE_NUM]; + u32 rdmarc_base; + int rdmarc_shift; + spinlock_t lock; + struct mlx4_icm_table qp_table; + struct mlx4_icm_table auxc_table; + struct mlx4_icm_table altc_table; + struct mlx4_icm_table rdmarc_table; + struct mlx4_icm_table cmpt_table; +}; + +struct mlx4_mcg_table { + struct mutex mutex; + struct mlx4_bitmap bitmap; + struct mlx4_icm_table table; +}; + +struct mlx4_catas_err { + u32 __iomem *map; + struct timer_list timer; + struct list_head list; +}; + +#define MLX4_MAX_MAC_NUM 128 +#define MLX4_MAC_TABLE_SIZE (MLX4_MAX_MAC_NUM << 3) + +struct mlx4_mac_table { + __be64 entries[MLX4_MAX_MAC_NUM]; + int refs[MLX4_MAX_MAC_NUM]; + bool is_dup[MLX4_MAX_MAC_NUM]; + struct mutex mutex; + int total; + int max; +}; + +#define MLX4_ROCE_GID_ENTRY_SIZE 16 + +struct mlx4_roce_gid_entry { + u8 raw[MLX4_ROCE_GID_ENTRY_SIZE]; +}; + +struct mlx4_roce_gid_table { + struct mlx4_roce_gid_entry roce_gids[MLX4_ROCE_MAX_GIDS]; + struct mutex mutex; +}; + +#define MLX4_MAX_VLAN_NUM 128 +#define MLX4_VLAN_TABLE_SIZE (MLX4_MAX_VLAN_NUM << 2) + +struct mlx4_vlan_table { + __be32 entries[MLX4_MAX_VLAN_NUM]; + int refs[MLX4_MAX_VLAN_NUM]; + int is_dup[MLX4_MAX_VLAN_NUM]; + struct mutex mutex; + int total; + int max; +}; + +#define SET_PORT_GEN_ALL_VALID (MLX4_FLAG_V_MTU_MASK | \ + MLX4_FLAG_V_PPRX_MASK | \ + MLX4_FLAG_V_PPTX_MASK) +#define SET_PORT_PROMISC_SHIFT 31 +#define SET_PORT_MC_PROMISC_SHIFT 30 + +enum { + MCAST_DIRECT_ONLY = 0, + MCAST_DIRECT = 1, + MCAST_DEFAULT = 2 +}; + + +struct mlx4_set_port_general_context { + u16 reserved1; + u8 flags2; + u8 flags; + union { + u8 ignore_fcs; + u8 roce_mode; + }; + u8 reserved2; + __be16 mtu; + u8 pptx; + u8 pfctx; + u16 reserved3; + u8 pprx; + u8 pfcrx; + u16 reserved4; + u32 reserved5; + u8 phv_en; + u8 reserved6[5]; + __be16 user_mtu; + u16 reserved7; + u8 user_mac[6]; +}; + +struct mlx4_set_port_rqp_calc_context { + __be32 base_qpn; + u8 rererved; + u8 n_mac; + u8 n_vlan; + u8 n_prio; + u8 reserved2[3]; + u8 mac_miss; + u8 intra_no_vlan; + u8 no_vlan; + u8 intra_vlan_miss; + u8 vlan_miss; + u8 reserved3[3]; + u8 no_vlan_prio; + __be32 promisc; + __be32 mcast; +}; + +struct mlx4_port_info { + struct mlx4_dev *dev; + int port; + char dev_name[16]; + struct device_attribute port_attr; + enum mlx4_port_type tmp_type; + char dev_mtu_name[16]; + struct device_attribute port_mtu_attr; + struct mlx4_mac_table mac_table; + struct mlx4_vlan_table vlan_table; + struct mlx4_roce_gid_table gid_table; + int base_qpn; + struct cpu_rmap *rmap; + struct devlink_port devlink_port; +}; + +struct mlx4_sense { + struct mlx4_dev *dev; + u8 do_sense_port[MLX4_MAX_PORTS + 1]; + u8 sense_allowed[MLX4_MAX_PORTS + 1]; + struct delayed_work sense_poll; +}; + +struct mlx4_msix_ctl { + DECLARE_BITMAP(pool_bm, MAX_MSIX); + struct mutex pool_lock; +}; + +struct mlx4_steer { + struct list_head promisc_qps[MLX4_NUM_STEERS]; + struct list_head steer_entries[MLX4_NUM_STEERS]; +}; + +enum { + MLX4_PCI_DEV_IS_VF = 1 << 0, + MLX4_PCI_DEV_FORCE_SENSE_PORT = 1 << 1, +}; + +enum { + MLX4_NO_RR = 0, + MLX4_USE_RR = 1, +}; + +struct mlx4_priv { + struct mlx4_dev dev; + + struct list_head dev_list; + struct list_head ctx_list; + spinlock_t ctx_lock; + + int pci_dev_data; + int removed; + + struct list_head pgdir_list; + struct mutex pgdir_mutex; + + struct mlx4_fw fw; + struct mlx4_cmd cmd; + struct mlx4_mfunc mfunc; + + struct mlx4_bitmap pd_bitmap; + struct mlx4_bitmap xrcd_bitmap; + struct mlx4_uar_table uar_table; + struct mlx4_mr_table mr_table; + struct mlx4_cq_table cq_table; + struct mlx4_eq_table eq_table; + struct mlx4_srq_table srq_table; + struct mlx4_qp_table qp_table; + struct mlx4_mcg_table mcg_table; + struct mlx4_bitmap counters_bitmap; + int def_counter[MLX4_MAX_PORTS]; + + struct mlx4_catas_err catas_err; + + void __iomem *clr_base; + + struct mlx4_uar driver_uar; + void __iomem *kar; + struct mlx4_port_info port[MLX4_MAX_PORTS + 1]; + struct mlx4_sense sense; + struct mutex port_mutex; + struct mlx4_msix_ctl msix_ctl; + struct mlx4_steer *steer; + struct list_head bf_list; + struct mutex bf_mutex; + struct io_mapping *bf_mapping; + void __iomem *clock_mapping; + int reserved_mtts; + int fs_hash_mode; + u8 virt2phys_pkey[MLX4_MFUNC_MAX][MLX4_MAX_PORTS][MLX4_MAX_PORT_PKEYS]; + struct mlx4_port_map v2p; /* cached port mapping configuration */ + struct mutex bond_mutex; /* for bond mode */ + __be64 slave_node_guids[MLX4_MFUNC_MAX]; + + atomic_t opreq_count; + struct work_struct opreq_task; +}; + +static inline struct mlx4_priv *mlx4_priv(struct mlx4_dev *dev) +{ + return container_of(dev, struct mlx4_priv, dev); +} + +#define MLX4_SENSE_RANGE (HZ * 3) + +extern struct workqueue_struct *mlx4_wq; + +u32 mlx4_bitmap_alloc(struct mlx4_bitmap *bitmap); +void mlx4_bitmap_free(struct mlx4_bitmap *bitmap, u32 obj, int use_rr); +u32 mlx4_bitmap_alloc_range(struct mlx4_bitmap *bitmap, int cnt, + int align, u32 skip_mask); +void mlx4_bitmap_free_range(struct mlx4_bitmap *bitmap, u32 obj, int cnt, + int use_rr); +u32 mlx4_bitmap_avail(struct mlx4_bitmap *bitmap); +int mlx4_bitmap_init(struct mlx4_bitmap *bitmap, u32 num, u32 mask, + u32 reserved_bot, u32 resetrved_top); +void mlx4_bitmap_cleanup(struct mlx4_bitmap *bitmap); + +int mlx4_reset(struct mlx4_dev *dev); + +int mlx4_alloc_eq_table(struct mlx4_dev *dev); +void mlx4_free_eq_table(struct mlx4_dev *dev); + +int mlx4_init_pd_table(struct mlx4_dev *dev); +int mlx4_init_xrcd_table(struct mlx4_dev *dev); +int mlx4_init_uar_table(struct mlx4_dev *dev); +int mlx4_init_mr_table(struct mlx4_dev *dev); +int mlx4_init_eq_table(struct mlx4_dev *dev); +int mlx4_init_cq_table(struct mlx4_dev *dev); +int mlx4_init_qp_table(struct mlx4_dev *dev); +int mlx4_init_srq_table(struct mlx4_dev *dev); +int mlx4_init_mcg_table(struct mlx4_dev *dev); + +void mlx4_cleanup_pd_table(struct mlx4_dev *dev); +void mlx4_cleanup_xrcd_table(struct mlx4_dev *dev); +void mlx4_cleanup_uar_table(struct mlx4_dev *dev); +void mlx4_cleanup_mr_table(struct mlx4_dev *dev); +void mlx4_cleanup_eq_table(struct mlx4_dev *dev); +void mlx4_cleanup_cq_table(struct mlx4_dev *dev); +void mlx4_cleanup_qp_table(struct mlx4_dev *dev); +void mlx4_cleanup_srq_table(struct mlx4_dev *dev); +void mlx4_cleanup_mcg_table(struct mlx4_dev *dev); +int __mlx4_qp_alloc_icm(struct mlx4_dev *dev, int qpn); +void __mlx4_qp_free_icm(struct mlx4_dev *dev, int qpn); +int __mlx4_cq_alloc_icm(struct mlx4_dev *dev, int *cqn); +void __mlx4_cq_free_icm(struct mlx4_dev *dev, int cqn); +int __mlx4_srq_alloc_icm(struct mlx4_dev *dev, int *srqn); +void __mlx4_srq_free_icm(struct mlx4_dev *dev, int srqn); +int __mlx4_mpt_reserve(struct mlx4_dev *dev); +void __mlx4_mpt_release(struct mlx4_dev *dev, u32 index); +int __mlx4_mpt_alloc_icm(struct mlx4_dev *dev, u32 index); +void __mlx4_mpt_free_icm(struct mlx4_dev *dev, u32 index); +u32 __mlx4_alloc_mtt_range(struct mlx4_dev *dev, int order); +void __mlx4_free_mtt_range(struct mlx4_dev *dev, u32 first_seg, int order); + +int mlx4_WRITE_MTT_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd); +int mlx4_SYNC_TPT_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd); +int mlx4_SW2HW_MPT_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd); +int mlx4_HW2SW_MPT_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd); +int mlx4_QUERY_MPT_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd); +int mlx4_SW2HW_EQ_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd); +int mlx4_CONFIG_DEV_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd); +int mlx4_DMA_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd); +int __mlx4_qp_reserve_range(struct mlx4_dev *dev, int cnt, int align, + int *base, u8 flags); +void __mlx4_qp_release_range(struct mlx4_dev *dev, int base_qpn, int cnt); +int __mlx4_register_mac(struct mlx4_dev *dev, u8 port, u64 mac); +void __mlx4_unregister_mac(struct mlx4_dev *dev, u8 port, u64 mac); +int __mlx4_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt, + int start_index, int npages, u64 *page_list); +int __mlx4_counter_alloc(struct mlx4_dev *dev, u32 *idx); +void __mlx4_counter_free(struct mlx4_dev *dev, u32 idx); +int mlx4_calc_vf_counters(struct mlx4_dev *dev, int slave, int port, + struct mlx4_counter *data); +int __mlx4_xrcd_alloc(struct mlx4_dev *dev, u32 *xrcdn); +void __mlx4_xrcd_free(struct mlx4_dev *dev, u32 xrcdn); + +void mlx4_start_catas_poll(struct mlx4_dev *dev); +void mlx4_stop_catas_poll(struct mlx4_dev *dev); +int mlx4_catas_init(struct mlx4_dev *dev); +void mlx4_catas_end(struct mlx4_dev *dev); +int mlx4_crdump_init(struct mlx4_dev *dev); +void mlx4_crdump_end(struct mlx4_dev *dev); +int mlx4_restart_one(struct pci_dev *pdev); +int mlx4_register_device(struct mlx4_dev *dev); +void mlx4_unregister_device(struct mlx4_dev *dev); +void mlx4_dispatch_event(struct mlx4_dev *dev, enum mlx4_dev_event type, + unsigned long param); + +struct mlx4_dev_cap; +struct mlx4_init_hca_param; + +u64 mlx4_make_profile(struct mlx4_dev *dev, + struct mlx4_profile *request, + struct mlx4_dev_cap *dev_cap, + struct mlx4_init_hca_param *init_hca); +void mlx4_master_comm_channel(struct work_struct *work); +void mlx4_gen_slave_eqe(struct work_struct *work); +void mlx4_master_handle_slave_flr(struct work_struct *work); + +int mlx4_ALLOC_RES_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd); +int mlx4_FREE_RES_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd); +int mlx4_MAP_EQ_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd); +int mlx4_COMM_INT_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd); +int mlx4_HW2SW_EQ_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd); +int mlx4_QUERY_EQ_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd); +int mlx4_SW2HW_CQ_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd); +int mlx4_HW2SW_CQ_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd); +int mlx4_QUERY_CQ_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd); +int mlx4_MODIFY_CQ_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd); +int mlx4_SW2HW_SRQ_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd); +int mlx4_HW2SW_SRQ_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd); +int mlx4_QUERY_SRQ_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd); +int mlx4_ARM_SRQ_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd); +int mlx4_GEN_QP_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd); +int mlx4_RST2INIT_QP_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd); +int mlx4_INIT2INIT_QP_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd); +int mlx4_INIT2RTR_QP_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd); +int mlx4_RTR2RTS_QP_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd); +int mlx4_RTS2RTS_QP_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd); +int mlx4_SQERR2RTS_QP_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd); +int mlx4_2ERR_QP_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd); +int mlx4_RTS2SQD_QP_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd); +int mlx4_SQD2SQD_QP_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd); +int mlx4_SQD2RTS_QP_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd); +int mlx4_2RST_QP_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd); +int mlx4_QUERY_QP_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd); + +int mlx4_GEN_EQE(struct mlx4_dev *dev, int slave, struct mlx4_eqe *eqe); + +enum { + MLX4_CMD_CLEANUP_STRUCT = 1UL << 0, + MLX4_CMD_CLEANUP_POOL = 1UL << 1, + MLX4_CMD_CLEANUP_HCR = 1UL << 2, + MLX4_CMD_CLEANUP_VHCR = 1UL << 3, + MLX4_CMD_CLEANUP_ALL = (MLX4_CMD_CLEANUP_VHCR << 1) - 1 +}; + +int mlx4_cmd_init(struct mlx4_dev *dev); +void mlx4_cmd_cleanup(struct mlx4_dev *dev, int cleanup_mask); +int mlx4_multi_func_init(struct mlx4_dev *dev); +int mlx4_ARM_COMM_CHANNEL(struct mlx4_dev *dev); +void mlx4_multi_func_cleanup(struct mlx4_dev *dev); +void mlx4_cmd_event(struct mlx4_dev *dev, u16 token, u8 status, u64 out_param); +int mlx4_cmd_use_events(struct mlx4_dev *dev); +void mlx4_cmd_use_polling(struct mlx4_dev *dev); + +int mlx4_comm_cmd(struct mlx4_dev *dev, u8 cmd, u16 param, + u16 op, unsigned long timeout); + +void mlx4_cq_tasklet_cb(struct tasklet_struct *t); +void mlx4_cq_completion(struct mlx4_dev *dev, u32 cqn); +void mlx4_cq_event(struct mlx4_dev *dev, u32 cqn, int event_type); + +void mlx4_qp_event(struct mlx4_dev *dev, u32 qpn, int event_type); + +void mlx4_srq_event(struct mlx4_dev *dev, u32 srqn, int event_type); + +void mlx4_enter_error_state(struct mlx4_dev_persistent *persist); +int mlx4_comm_internal_err(u32 slave_read); + +int mlx4_crdump_collect(struct mlx4_dev *dev); + +int mlx4_SENSE_PORT(struct mlx4_dev *dev, int port, + enum mlx4_port_type *type); +void mlx4_do_sense_ports(struct mlx4_dev *dev, + enum mlx4_port_type *stype, + enum mlx4_port_type *defaults); +void mlx4_start_sense(struct mlx4_dev *dev); +void mlx4_stop_sense(struct mlx4_dev *dev); +void mlx4_sense_init(struct mlx4_dev *dev); +int mlx4_check_port_params(struct mlx4_dev *dev, + enum mlx4_port_type *port_type); +int mlx4_change_port_types(struct mlx4_dev *dev, + enum mlx4_port_type *port_types); + +void mlx4_init_mac_table(struct mlx4_dev *dev, struct mlx4_mac_table *table); +void mlx4_init_vlan_table(struct mlx4_dev *dev, struct mlx4_vlan_table *table); +void mlx4_init_roce_gid_table(struct mlx4_dev *dev, + struct mlx4_roce_gid_table *table); +void __mlx4_unregister_vlan(struct mlx4_dev *dev, u8 port, u16 vlan); +int __mlx4_register_vlan(struct mlx4_dev *dev, u8 port, u16 vlan, int *index); +int mlx4_bond_vlan_table(struct mlx4_dev *dev); +int mlx4_unbond_vlan_table(struct mlx4_dev *dev); +int mlx4_bond_mac_table(struct mlx4_dev *dev); +int mlx4_unbond_mac_table(struct mlx4_dev *dev); + +int mlx4_SET_PORT(struct mlx4_dev *dev, u8 port, int pkey_tbl_sz); +/* resource tracker functions*/ +int mlx4_get_slave_from_resource_id(struct mlx4_dev *dev, + enum mlx4_resource resource_type, + u64 resource_id, int *slave); +void mlx4_delete_all_resources_for_slave(struct mlx4_dev *dev, int slave_id); +void mlx4_reset_roce_gids(struct mlx4_dev *dev, int slave); +int mlx4_init_resource_tracker(struct mlx4_dev *dev); + +void mlx4_free_resource_tracker(struct mlx4_dev *dev, + enum mlx4_res_tracker_free_type type); + +int mlx4_QUERY_FW_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd); +int mlx4_SET_PORT_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd); +int mlx4_INIT_PORT_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd); +int mlx4_CLOSE_PORT_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd); +int mlx4_QUERY_DEV_CAP_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd); +int mlx4_QUERY_PORT_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd); +int mlx4_get_port_ib_caps(struct mlx4_dev *dev, u8 port, __be32 *caps); + +int mlx4_get_slave_pkey_gid_tbl_len(struct mlx4_dev *dev, u8 port, + int *gid_tbl_len, int *pkey_tbl_len); + +int mlx4_QP_ATTACH_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd); + +int mlx4_UPDATE_QP_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd); + +int mlx4_PROMISC_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd); +int mlx4_qp_detach_common(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16], + enum mlx4_protocol prot, enum mlx4_steer_type steer); +int mlx4_qp_attach_common(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16], + int block_mcast_loopback, enum mlx4_protocol prot, + enum mlx4_steer_type steer); +int mlx4_trans_to_dmfs_attach(struct mlx4_dev *dev, struct mlx4_qp *qp, + u8 gid[16], u8 port, + int block_mcast_loopback, + enum mlx4_protocol prot, u64 *reg_id); +int mlx4_SET_MCAST_FLTR_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd); +int mlx4_SET_VLAN_FLTR_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd); +int mlx4_common_set_vlan_fltr(struct mlx4_dev *dev, int function, + int port, void *buf); +int mlx4_DUMP_ETH_STATS_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd); +int mlx4_PKEY_TABLE_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd); +int mlx4_QUERY_IF_STAT_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd); +int mlx4_QP_FLOW_STEERING_ATTACH_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd); +int mlx4_QP_FLOW_STEERING_DETACH_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd); +int mlx4_ACCESS_REG_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd); + +int mlx4_get_mgm_entry_size(struct mlx4_dev *dev); +int mlx4_get_qp_per_mgm(struct mlx4_dev *dev); + +static inline void set_param_l(u64 *arg, u32 val) +{ + *arg = (*arg & 0xffffffff00000000ULL) | (u64) val; +} + +static inline void set_param_h(u64 *arg, u32 val) +{ + *arg = (*arg & 0xffffffff) | ((u64) val << 32); +} + +static inline u32 get_param_l(u64 *arg) +{ + return (u32) (*arg & 0xffffffff); +} + +static inline u32 get_param_h(u64 *arg) +{ + return (u32)(*arg >> 32); +} + +static inline spinlock_t *mlx4_tlock(struct mlx4_dev *dev) +{ + return &mlx4_priv(dev)->mfunc.master.res_tracker.lock; +} + +#define NOT_MASKED_PD_BITS 17 + +void mlx4_vf_immed_vlan_work_handler(struct work_struct *_work); + +void mlx4_init_quotas(struct mlx4_dev *dev); + +/* for VFs, replace zero MACs with randomly-generated MACs at driver start */ +void mlx4_replace_zero_macs(struct mlx4_dev *dev); +int mlx4_get_slave_num_gids(struct mlx4_dev *dev, int slave, int port); +/* Returns the VF index of slave */ +int mlx4_get_vf_indx(struct mlx4_dev *dev, int slave); +int mlx4_config_mad_demux(struct mlx4_dev *dev); +int mlx4_do_bond(struct mlx4_dev *dev, bool enable); +int mlx4_bond_fs_rules(struct mlx4_dev *dev); +int mlx4_unbond_fs_rules(struct mlx4_dev *dev); + +enum mlx4_zone_flags { + MLX4_ZONE_ALLOW_ALLOC_FROM_LOWER_PRIO = 1UL << 0, + MLX4_ZONE_ALLOW_ALLOC_FROM_EQ_PRIO = 1UL << 1, + MLX4_ZONE_FALLBACK_TO_HIGHER_PRIO = 1UL << 2, + MLX4_ZONE_USE_RR = 1UL << 3, +}; + +enum mlx4_zone_alloc_flags { + /* No two objects could overlap between zones. UID + * could be left unused. If this flag is given and + * two overlapped zones are used, an object will be free'd + * from the smallest possible matching zone. + */ + MLX4_ZONE_ALLOC_FLAGS_NO_OVERLAP = 1UL << 0, +}; + +struct mlx4_zone_allocator; + +/* Create a new zone allocator */ +struct mlx4_zone_allocator *mlx4_zone_allocator_create(enum mlx4_zone_alloc_flags flags); + +/* Attach a mlx4_bitmap <bitmap> of priority <priority> to the zone allocator + * <zone_alloc>. Allocating an object from this zone adds an offset <offset>. + * Similarly, when searching for an object to free, this offset it taken into + * account. The use_rr mlx4_ib parameter for allocating objects from this <bitmap> + * is given through the MLX4_ZONE_USE_RR flag in <flags>. + * When an allocation fails, <zone_alloc> tries to allocate from other zones + * according to the policy set by <flags>. <puid> is the unique identifier + * received to this zone. + */ +int mlx4_zone_add_one(struct mlx4_zone_allocator *zone_alloc, + struct mlx4_bitmap *bitmap, + u32 flags, + int priority, + int offset, + u32 *puid); + +/* Remove bitmap indicated by <uid> from <zone_alloc> */ +int mlx4_zone_remove_one(struct mlx4_zone_allocator *zone_alloc, u32 uid); + +/* Delete the zone allocator <zone_alloc. This function doesn't destroy + * the attached bitmaps. + */ +void mlx4_zone_allocator_destroy(struct mlx4_zone_allocator *zone_alloc); + +/* Allocate <count> objects with align <align> and skip_mask <skip_mask> + * from the mlx4_bitmap whose uid is <uid>. The bitmap which we actually + * allocated from is returned in <puid>. If the allocation fails, a negative + * number is returned. Otherwise, the offset of the first object is returned. + */ +u32 mlx4_zone_alloc_entries(struct mlx4_zone_allocator *zones, u32 uid, int count, + int align, u32 skip_mask, u32 *puid); + +/* Free <count> objects, start from <obj> of the uid <uid> from zone_allocator + * <zones>. + */ +u32 mlx4_zone_free_entries(struct mlx4_zone_allocator *zones, + u32 uid, u32 obj, u32 count); + +/* If <zones> was allocated with MLX4_ZONE_ALLOC_FLAGS_NO_OVERLAP, instead of + * specifying the uid when freeing an object, zone allocator could figure it by + * itself. Other parameters are similar to mlx4_zone_free. + */ +u32 mlx4_zone_free_entries_unique(struct mlx4_zone_allocator *zones, u32 obj, u32 count); + +/* Returns a pointer to mlx4_bitmap that was attached to <zones> with <uid> */ +struct mlx4_bitmap *mlx4_zone_get_bitmap(struct mlx4_zone_allocator *zones, u32 uid); + +#endif /* MLX4_H */ diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h new file mode 100644 index 000000000..e132ff4c8 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h @@ -0,0 +1,837 @@ +/* + * Copyright (c) 2007 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#ifndef _MLX4_EN_H_ +#define _MLX4_EN_H_ + +#include <linux/bitops.h> +#include <linux/compiler.h> +#include <linux/ethtool.h> +#include <linux/list.h> +#include <linux/mutex.h> +#include <linux/netdevice.h> +#include <linux/if_vlan.h> +#include <linux/net_tstamp.h> +#ifdef CONFIG_MLX4_EN_DCB +#include <linux/dcbnl.h> +#endif +#include <linux/cpu_rmap.h> +#include <linux/ptp_clock_kernel.h> +#include <linux/irq.h> +#include <net/xdp.h> + +#include <linux/mlx4/device.h> +#include <linux/mlx4/qp.h> +#include <linux/mlx4/cq.h> +#include <linux/mlx4/srq.h> +#include <linux/mlx4/doorbell.h> +#include <linux/mlx4/cmd.h> + +#include "en_port.h" +#include "mlx4_stats.h" + +#define DRV_NAME "mlx4_en" +#define DRV_VERSION "4.0-0" + +#define MLX4_EN_MSG_LEVEL (NETIF_MSG_LINK | NETIF_MSG_IFDOWN) + +/* + * Device constants + */ + + +#define MLX4_EN_PAGE_SHIFT 12 +#define MLX4_EN_PAGE_SIZE (1 << MLX4_EN_PAGE_SHIFT) +#define DEF_RX_RINGS 16 +#define MAX_RX_RINGS 128 +#define MIN_RX_RINGS 1 +#define LOG_TXBB_SIZE 6 +#define TXBB_SIZE BIT(LOG_TXBB_SIZE) +#define HEADROOM (2048 / TXBB_SIZE + 1) +#define STAMP_STRIDE 64 +#define STAMP_DWORDS (STAMP_STRIDE / 4) +#define STAMP_SHIFT 31 +#define STAMP_VAL 0x7fffffff +#define STATS_DELAY (HZ / 4) +#define SERVICE_TASK_DELAY (HZ / 4) +#define MAX_NUM_OF_FS_RULES 256 + +#define MLX4_EN_FILTER_HASH_SHIFT 4 +#define MLX4_EN_FILTER_EXPIRY_QUOTA 60 + +/* Typical TSO descriptor with 16 gather entries is 352 bytes... */ +#define MAX_DESC_SIZE 512 +#define MAX_DESC_TXBBS (MAX_DESC_SIZE / TXBB_SIZE) + +/* + * OS related constants and tunables + */ + +#define MLX4_EN_PRIV_FLAGS_BLUEFLAME 1 +#define MLX4_EN_PRIV_FLAGS_PHV 2 + +#define MLX4_EN_WATCHDOG_TIMEOUT (15 * HZ) + +/* Use the maximum between 16384 and a single page */ +#define MLX4_EN_ALLOC_SIZE PAGE_ALIGN(16384) + +#define MLX4_EN_MAX_RX_FRAGS 4 + +/* Maximum ring sizes */ +#define MLX4_EN_MAX_TX_SIZE 8192 +#define MLX4_EN_MAX_RX_SIZE 8192 + +/* Minimum ring size for our page-allocation scheme to work */ +#define MLX4_EN_MIN_RX_SIZE (MLX4_EN_ALLOC_SIZE / SMP_CACHE_BYTES) +#define MLX4_EN_MIN_TX_SIZE (4096 / TXBB_SIZE) + +#define MLX4_EN_SMALL_PKT_SIZE 64 +#define MLX4_EN_MIN_TX_RING_P_UP 1 +#define MLX4_EN_MAX_TX_RING_P_UP 32 +#define MLX4_EN_NUM_UP_LOW 1 +#define MLX4_EN_NUM_UP_HIGH 8 +#define MLX4_EN_DEF_RX_RING_SIZE 1024 +#define MLX4_EN_DEF_TX_RING_SIZE MLX4_EN_DEF_RX_RING_SIZE +#define MAX_TX_RINGS (MLX4_EN_MAX_TX_RING_P_UP * \ + MLX4_EN_NUM_UP_HIGH) + +#define MLX4_EN_DEFAULT_TX_WORK 256 + +/* Target number of packets to coalesce with interrupt moderation */ +#define MLX4_EN_RX_COAL_TARGET 44 +#define MLX4_EN_RX_COAL_TIME 0x10 + +#define MLX4_EN_TX_COAL_PKTS 16 +#define MLX4_EN_TX_COAL_TIME 0x10 + +#define MLX4_EN_MAX_COAL_PKTS U16_MAX +#define MLX4_EN_MAX_COAL_TIME U16_MAX + +#define MLX4_EN_RX_RATE_LOW 400000 +#define MLX4_EN_RX_COAL_TIME_LOW 0 +#define MLX4_EN_RX_RATE_HIGH 450000 +#define MLX4_EN_RX_COAL_TIME_HIGH 128 +#define MLX4_EN_RX_SIZE_THRESH 1024 +#define MLX4_EN_RX_RATE_THRESH (1000000 / MLX4_EN_RX_COAL_TIME_HIGH) +#define MLX4_EN_SAMPLE_INTERVAL 0 +#define MLX4_EN_AVG_PKT_SMALL 256 + +#define MLX4_EN_AUTO_CONF 0xffff + +#define MLX4_EN_DEF_RX_PAUSE 1 +#define MLX4_EN_DEF_TX_PAUSE 1 + +/* Interval between successive polls in the Tx routine when polling is used + instead of interrupts (in per-core Tx rings) - should be power of 2 */ +#define MLX4_EN_TX_POLL_MODER 16 +#define MLX4_EN_TX_POLL_TIMEOUT (HZ / 4) + +#define SMALL_PACKET_SIZE (256 - NET_IP_ALIGN) +#define HEADER_COPY_SIZE (128 - NET_IP_ALIGN) +#define MLX4_LOOPBACK_TEST_PAYLOAD (HEADER_COPY_SIZE - ETH_HLEN) +#define PREAMBLE_LEN 8 +#define MLX4_SELFTEST_LB_MIN_MTU (MLX4_LOOPBACK_TEST_PAYLOAD + NET_IP_ALIGN + \ + ETH_HLEN + PREAMBLE_LEN) + +/* VLAN_HLEN is added twice,to support skb vlan tagged with multiple + * headers. (For example: ETH_P_8021Q and ETH_P_8021AD). + */ +#define MLX4_EN_EFF_MTU(mtu) ((mtu) + ETH_HLEN + (2 * VLAN_HLEN)) +#define ETH_BCAST 0xffffffffffffULL + +#define MLX4_EN_LOOPBACK_RETRIES 5 +#define MLX4_EN_LOOPBACK_TIMEOUT 100 + +/* Constants for TX flow */ +enum { + MAX_INLINE = 104, /* 128 - 16 - 4 - 4 */ + MAX_BF = 256, + MIN_PKT_LEN = 17, +}; + +/* + * Configurables + */ + +enum cq_type { + /* keep tx types first */ + TX, + TX_XDP, +#define MLX4_EN_NUM_TX_TYPES (TX_XDP + 1) + RX, +}; + + +/* + * Useful macros + */ +#define ROUNDUP_LOG2(x) ilog2(roundup_pow_of_two(x)) +#define XNOR(x, y) (!(x) == !(y)) + + +struct mlx4_en_tx_info { + union { + struct sk_buff *skb; + struct page *page; + }; + dma_addr_t map0_dma; + u32 map0_byte_count; + u32 nr_txbb; + u32 nr_bytes; + u8 linear; + u8 data_offset; + u8 inl; + u8 ts_requested; + u8 nr_maps; +} ____cacheline_aligned_in_smp; + + +#define MLX4_EN_BIT_DESC_OWN 0x80000000 +#define CTRL_SIZE sizeof(struct mlx4_wqe_ctrl_seg) +#define MLX4_EN_MEMTYPE_PAD 0x100 +#define DS_SIZE sizeof(struct mlx4_wqe_data_seg) + + +struct mlx4_en_tx_desc { + struct mlx4_wqe_ctrl_seg ctrl; + union { + struct mlx4_wqe_data_seg data; /* at least one data segment */ + struct mlx4_wqe_lso_seg lso; + struct mlx4_wqe_inline_seg inl; + }; +}; + +#define MLX4_EN_USE_SRQ 0x01000000 + +#define MLX4_EN_CX3_LOW_ID 0x1000 +#define MLX4_EN_CX3_HIGH_ID 0x1005 + +struct mlx4_en_rx_alloc { + struct page *page; + dma_addr_t dma; + u32 page_offset; +}; + +#define MLX4_EN_CACHE_SIZE (2 * NAPI_POLL_WEIGHT) + +struct mlx4_en_page_cache { + u32 index; + struct { + struct page *page; + dma_addr_t dma; + } buf[MLX4_EN_CACHE_SIZE]; +}; + +enum { + MLX4_EN_TX_RING_STATE_RECOVERING, +}; + +struct mlx4_en_priv; + +struct mlx4_en_tx_ring { + /* cache line used and dirtied in tx completion + * (mlx4_en_free_tx_buf()) + */ + u32 last_nr_txbb; + u32 cons; + unsigned long wake_queue; + struct netdev_queue *tx_queue; + u32 (*free_tx_desc)(struct mlx4_en_priv *priv, + struct mlx4_en_tx_ring *ring, + int index, + u64 timestamp, int napi_mode); + struct mlx4_en_rx_ring *recycle_ring; + + /* cache line used and dirtied in mlx4_en_xmit() */ + u32 prod ____cacheline_aligned_in_smp; + unsigned int tx_dropped; + unsigned long bytes; + unsigned long packets; + unsigned long tx_csum; + unsigned long tso_packets; + unsigned long xmit_more; + struct mlx4_bf bf; + + /* Following part should be mostly read */ + void __iomem *doorbell_address; + __be32 doorbell_qpn; + __be32 mr_key; + u32 size; /* number of TXBBs */ + u32 size_mask; + u32 full_size; + u32 buf_size; + void *buf; + struct mlx4_en_tx_info *tx_info; + int qpn; + u8 queue_index; + bool bf_enabled; + bool bf_alloced; + u8 hwtstamp_tx_type; + u8 *bounce_buf; + + /* Not used in fast path + * Only queue_stopped might be used if BQL is not properly working. + */ + unsigned long queue_stopped; + unsigned long state; + struct mlx4_hwq_resources sp_wqres; + struct mlx4_qp sp_qp; + struct mlx4_qp_context sp_context; + cpumask_t sp_affinity_mask; + enum mlx4_qp_state sp_qp_state; + u16 sp_stride; + u16 sp_cqn; /* index of port CQ associated with this ring */ +} ____cacheline_aligned_in_smp; + +struct mlx4_en_rx_desc { + /* actual number of entries depends on rx ring stride */ + struct mlx4_wqe_data_seg data[0]; +}; + +struct mlx4_en_rx_ring { + struct mlx4_hwq_resources wqres; + u32 size ; /* number of Rx descs*/ + u32 actual_size; + u32 size_mask; + u16 stride; + u16 log_stride; + u16 cqn; /* index of port CQ associated with this ring */ + u32 prod; + u32 cons; + u32 buf_size; + u8 fcs_del; + void *buf; + void *rx_info; + struct bpf_prog __rcu *xdp_prog; + struct mlx4_en_page_cache page_cache; + unsigned long bytes; + unsigned long packets; + unsigned long csum_ok; + unsigned long csum_none; + unsigned long csum_complete; + unsigned long rx_alloc_pages; + unsigned long xdp_drop; + unsigned long xdp_redirect; + unsigned long xdp_redirect_fail; + unsigned long xdp_tx; + unsigned long xdp_tx_full; + unsigned long dropped; + int hwtstamp_rx_filter; + cpumask_var_t affinity_mask; + struct xdp_rxq_info xdp_rxq; +}; + +struct mlx4_en_cq { + struct mlx4_cq mcq; + struct mlx4_hwq_resources wqres; + int ring; + struct net_device *dev; + union { + struct napi_struct napi; + bool xdp_busy; + }; + int size; + int buf_size; + int vector; + enum cq_type type; + u16 moder_time; + u16 moder_cnt; + struct mlx4_cqe *buf; +#define MLX4_EN_OPCODE_ERROR 0x1e + + const struct cpumask *aff_mask; +}; + +struct mlx4_en_port_profile { + u32 flags; + u32 tx_ring_num[MLX4_EN_NUM_TX_TYPES]; + u32 rx_ring_num; + u32 tx_ring_size; + u32 rx_ring_size; + u8 num_tx_rings_p_up; + u8 rx_pause; + u8 rx_ppp; + u8 tx_pause; + u8 tx_ppp; + u8 num_up; + int rss_rings; + int inline_thold; + struct hwtstamp_config hwtstamp_config; +}; + +struct mlx4_en_profile { + int udp_rss; + u8 rss_mask; + u32 active_ports; + u32 small_pkt_int; + u8 no_reset; + u8 max_num_tx_rings_p_up; + struct mlx4_en_port_profile prof[MLX4_MAX_PORTS + 1]; +}; + +struct mlx4_en_dev { + struct mlx4_dev *dev; + struct pci_dev *pdev; + struct mutex state_lock; + struct net_device *pndev[MLX4_MAX_PORTS + 1]; + struct net_device *upper[MLX4_MAX_PORTS + 1]; + u32 port_cnt; + bool device_up; + struct mlx4_en_profile profile; + u32 LSO_support; + struct workqueue_struct *workqueue; + struct device *dma_device; + void __iomem *uar_map; + struct mlx4_uar priv_uar; + struct mlx4_mr mr; + u32 priv_pdn; + spinlock_t uar_lock; + u8 mac_removed[MLX4_MAX_PORTS + 1]; + u32 nominal_c_mult; + struct cyclecounter cycles; + seqlock_t clock_lock; + struct timecounter clock; + unsigned long last_overflow_check; + struct ptp_clock *ptp_clock; + struct ptp_clock_info ptp_clock_info; + struct notifier_block nb; +}; + + +struct mlx4_en_rss_map { + int base_qpn; + struct mlx4_qp qps[MAX_RX_RINGS]; + enum mlx4_qp_state state[MAX_RX_RINGS]; + struct mlx4_qp *indir_qp; + enum mlx4_qp_state indir_state; +}; + +enum mlx4_en_port_flag { + MLX4_EN_PORT_ANC = 1<<0, /* Auto-negotiation complete */ + MLX4_EN_PORT_ANE = 1<<1, /* Auto-negotiation enabled */ +}; + +struct mlx4_en_port_state { + int link_state; + int link_speed; + int transceiver; + u32 flags; +}; + +enum mlx4_en_mclist_act { + MCLIST_NONE, + MCLIST_REM, + MCLIST_ADD, +}; + +struct mlx4_en_mc_list { + struct list_head list; + enum mlx4_en_mclist_act action; + u8 addr[ETH_ALEN]; + u64 reg_id; + u64 tunnel_reg_id; +}; + +struct mlx4_en_frag_info { + u16 frag_size; + u32 frag_stride; +}; + +#ifdef CONFIG_MLX4_EN_DCB +/* Minimal TC BW - setting to 0 will block traffic */ +#define MLX4_EN_BW_MIN 1 +#define MLX4_EN_BW_MAX 100 /* Utilize 100% of the line */ + +#define MLX4_EN_TC_VENDOR 0 +#define MLX4_EN_TC_ETS 7 + +enum dcb_pfc_type { + pfc_disabled = 0, + pfc_enabled_full, + pfc_enabled_tx, + pfc_enabled_rx +}; + +struct mlx4_en_cee_config { + bool pfc_state; + enum dcb_pfc_type dcb_pfc[MLX4_EN_NUM_UP_HIGH]; +}; +#endif + +struct ethtool_flow_id { + struct list_head list; + struct ethtool_rx_flow_spec flow_spec; + u64 id; +}; + +enum { + MLX4_EN_FLAG_PROMISC = (1 << 0), + MLX4_EN_FLAG_MC_PROMISC = (1 << 1), + /* whether we need to enable hardware loopback by putting dmac + * in Tx WQE + */ + MLX4_EN_FLAG_ENABLE_HW_LOOPBACK = (1 << 2), + /* whether we need to drop packets that hardware loopback-ed */ + MLX4_EN_FLAG_RX_FILTER_NEEDED = (1 << 3), + MLX4_EN_FLAG_FORCE_PROMISC = (1 << 4), + MLX4_EN_FLAG_RX_CSUM_NON_TCP_UDP = (1 << 5), +#ifdef CONFIG_MLX4_EN_DCB + MLX4_EN_FLAG_DCB_ENABLED = (1 << 6), +#endif +}; + +#define PORT_BEACON_MAX_LIMIT (65535) +#define MLX4_EN_MAC_HASH_SIZE (1 << BITS_PER_BYTE) +#define MLX4_EN_MAC_HASH_IDX 5 + +struct mlx4_en_stats_bitmap { + DECLARE_BITMAP(bitmap, NUM_ALL_STATS); + struct mutex mutex; /* for mutual access to stats bitmap */ +}; + +enum { + MLX4_EN_STATE_FLAG_RESTARTING, +}; + +struct mlx4_en_priv { + struct mlx4_en_dev *mdev; + struct mlx4_en_port_profile *prof; + struct net_device *dev; + unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)]; + struct mlx4_en_port_state port_state; + spinlock_t stats_lock; + struct ethtool_flow_id ethtool_rules[MAX_NUM_OF_FS_RULES]; + /* To allow rules removal while port is going down */ + struct list_head ethtool_list; + + unsigned long last_moder_packets[MAX_RX_RINGS]; + unsigned long last_moder_tx_packets; + unsigned long last_moder_bytes[MAX_RX_RINGS]; + unsigned long last_moder_jiffies; + int last_moder_time[MAX_RX_RINGS]; + u16 rx_usecs; + u16 rx_frames; + u16 tx_usecs; + u16 tx_frames; + u32 pkt_rate_low; + u16 rx_usecs_low; + u32 pkt_rate_high; + u16 rx_usecs_high; + u32 sample_interval; + u32 adaptive_rx_coal; + u32 msg_enable; + u32 loopback_ok; + u32 validate_loopback; + + struct mlx4_hwq_resources res; + int link_state; + bool port_up; + int port; + int registered; + int allocated; + int stride; + unsigned char current_mac[ETH_ALEN + 2]; + int mac_index; + unsigned max_mtu; + int base_qpn; + int cqe_factor; + int cqe_size; + + struct mlx4_en_rss_map rss_map; + __be32 ctrl_flags; + u32 flags; + u8 num_tx_rings_p_up; + u32 tx_work_limit; + u32 tx_ring_num[MLX4_EN_NUM_TX_TYPES]; + u32 rx_ring_num; + u32 rx_skb_size; + struct mlx4_en_frag_info frag_info[MLX4_EN_MAX_RX_FRAGS]; + u8 num_frags; + u8 log_rx_info; + u8 dma_dir; + u16 rx_headroom; + + struct mlx4_en_tx_ring **tx_ring[MLX4_EN_NUM_TX_TYPES]; + struct mlx4_en_rx_ring *rx_ring[MAX_RX_RINGS]; + struct mlx4_en_cq **tx_cq[MLX4_EN_NUM_TX_TYPES]; + struct mlx4_en_cq *rx_cq[MAX_RX_RINGS]; + struct mlx4_qp drop_qp; + struct work_struct rx_mode_task; + struct work_struct restart_task; + struct work_struct linkstate_task; + struct delayed_work stats_task; + struct delayed_work service_task; + struct mlx4_en_pkt_stats pkstats; + struct mlx4_en_counter_stats pf_stats; + struct mlx4_en_flow_stats_rx rx_priority_flowstats[MLX4_NUM_PRIORITIES]; + struct mlx4_en_flow_stats_tx tx_priority_flowstats[MLX4_NUM_PRIORITIES]; + struct mlx4_en_flow_stats_rx rx_flowstats; + struct mlx4_en_flow_stats_tx tx_flowstats; + struct mlx4_en_port_stats port_stats; + struct mlx4_en_xdp_stats xdp_stats; + struct mlx4_en_phy_stats phy_stats; + struct mlx4_en_stats_bitmap stats_bitmap; + struct list_head mc_list; + struct list_head curr_list; + u64 broadcast_id; + struct mlx4_en_stat_out_mbox hw_stats; + int vids[128]; + bool wol; + struct device *ddev; + struct hlist_head mac_hash[MLX4_EN_MAC_HASH_SIZE]; + struct hwtstamp_config hwtstamp_config; + u32 counter_index; + +#ifdef CONFIG_MLX4_EN_DCB +#define MLX4_EN_DCB_ENABLED 0x3 + struct ieee_ets ets; + u16 maxrate[IEEE_8021QAZ_MAX_TCS]; + enum dcbnl_cndd_states cndd_state[IEEE_8021QAZ_MAX_TCS]; + struct mlx4_en_cee_config cee_config; + u8 dcbx_cap; +#endif +#ifdef CONFIG_RFS_ACCEL + spinlock_t filters_lock; + int last_filter_id; + struct list_head filters; + struct hlist_head filter_hash[1 << MLX4_EN_FILTER_HASH_SHIFT]; +#endif + u64 tunnel_reg_id; + __be16 vxlan_port; + + u32 pflags; + u8 rss_key[MLX4_EN_RSS_KEY_SIZE]; + u8 rss_hash_fn; + unsigned long state; +}; + +enum mlx4_en_wol { + MLX4_EN_WOL_MAGIC = (1ULL << 61), + MLX4_EN_WOL_ENABLED = (1ULL << 62), +}; + +struct mlx4_mac_entry { + struct hlist_node hlist; + unsigned char mac[ETH_ALEN + 2]; + u64 reg_id; + struct rcu_head rcu; +}; + +static inline struct mlx4_cqe *mlx4_en_get_cqe(void *buf, int idx, int cqe_sz) +{ + return buf + idx * cqe_sz; +} + +#define MLX4_EN_WOL_DO_MODIFY (1ULL << 63) + +void mlx4_en_init_ptys2ethtool_map(void); +void mlx4_en_update_loopback_state(struct net_device *dev, + netdev_features_t features); + +void mlx4_en_destroy_netdev(struct net_device *dev); +int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, + struct mlx4_en_port_profile *prof); + +int mlx4_en_start_port(struct net_device *dev); +void mlx4_en_stop_port(struct net_device *dev, int detach); + +void mlx4_en_set_stats_bitmap(struct mlx4_dev *dev, + struct mlx4_en_stats_bitmap *stats_bitmap, + u8 rx_ppp, u8 rx_pause, + u8 tx_ppp, u8 tx_pause); + +int mlx4_en_try_alloc_resources(struct mlx4_en_priv *priv, + struct mlx4_en_priv *tmp, + struct mlx4_en_port_profile *prof, + bool carry_xdp_prog); +void mlx4_en_safe_replace_resources(struct mlx4_en_priv *priv, + struct mlx4_en_priv *tmp); + +int mlx4_en_create_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq **pcq, + int entries, int ring, enum cq_type mode, int node); +void mlx4_en_destroy_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq **pcq); +int mlx4_en_activate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq, + int cq_idx); +void mlx4_en_deactivate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq); +int mlx4_en_set_cq_moder(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq); +void mlx4_en_arm_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq); + +void mlx4_en_tx_irq(struct mlx4_cq *mcq); +u16 mlx4_en_select_queue(struct net_device *dev, struct sk_buff *skb, + struct net_device *sb_dev); +netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev); +netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_ring *rx_ring, + struct mlx4_en_rx_alloc *frame, + struct mlx4_en_priv *priv, unsigned int length, + int tx_ind, bool *doorbell_pending); +void mlx4_en_xmit_doorbell(struct mlx4_en_tx_ring *ring); +bool mlx4_en_rx_recycle(struct mlx4_en_rx_ring *ring, + struct mlx4_en_rx_alloc *frame); + +int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv, + struct mlx4_en_tx_ring **pring, + u32 size, u16 stride, + int node, int queue_index); +void mlx4_en_destroy_tx_ring(struct mlx4_en_priv *priv, + struct mlx4_en_tx_ring **pring); +void mlx4_en_init_tx_xdp_ring_descs(struct mlx4_en_priv *priv, + struct mlx4_en_tx_ring *ring); +int mlx4_en_activate_tx_ring(struct mlx4_en_priv *priv, + struct mlx4_en_tx_ring *ring, + int cq, int user_prio); +void mlx4_en_deactivate_tx_ring(struct mlx4_en_priv *priv, + struct mlx4_en_tx_ring *ring); +void mlx4_en_set_num_rx_rings(struct mlx4_en_dev *mdev); +void mlx4_en_recover_from_oom(struct mlx4_en_priv *priv); +int mlx4_en_create_rx_ring(struct mlx4_en_priv *priv, + struct mlx4_en_rx_ring **pring, + u32 size, u16 stride, int node, int queue_index); +void mlx4_en_destroy_rx_ring(struct mlx4_en_priv *priv, + struct mlx4_en_rx_ring **pring, + u32 size, u16 stride); +int mlx4_en_activate_rx_rings(struct mlx4_en_priv *priv); +void mlx4_en_deactivate_rx_ring(struct mlx4_en_priv *priv, + struct mlx4_en_rx_ring *ring); +int mlx4_en_process_rx_cq(struct net_device *dev, + struct mlx4_en_cq *cq, + int budget); +int mlx4_en_poll_rx_cq(struct napi_struct *napi, int budget); +int mlx4_en_poll_tx_cq(struct napi_struct *napi, int budget); +int mlx4_en_process_tx_cq(struct net_device *dev, + struct mlx4_en_cq *cq, int napi_budget); +u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv, + struct mlx4_en_tx_ring *ring, + int index, u64 timestamp, + int napi_mode); +u32 mlx4_en_recycle_tx_desc(struct mlx4_en_priv *priv, + struct mlx4_en_tx_ring *ring, + int index, u64 timestamp, + int napi_mode); +void mlx4_en_fill_qp_context(struct mlx4_en_priv *priv, int size, int stride, + int is_tx, int rss, int qpn, int cqn, int user_prio, + struct mlx4_qp_context *context); +void mlx4_en_sqp_event(struct mlx4_qp *qp, enum mlx4_event event); +int mlx4_en_change_mcast_lb(struct mlx4_en_priv *priv, struct mlx4_qp *qp, + int loopback); +void mlx4_en_calc_rx_buf(struct net_device *dev); +int mlx4_en_config_rss_steer(struct mlx4_en_priv *priv); +void mlx4_en_release_rss_steer(struct mlx4_en_priv *priv); +int mlx4_en_create_drop_qp(struct mlx4_en_priv *priv); +void mlx4_en_destroy_drop_qp(struct mlx4_en_priv *priv); +int mlx4_en_free_tx_buf(struct net_device *dev, struct mlx4_en_tx_ring *ring); +void mlx4_en_rx_irq(struct mlx4_cq *mcq); + +int mlx4_SET_MCAST_FLTR(struct mlx4_dev *dev, u8 port, u64 mac, u64 clear, u8 mode); +int mlx4_SET_VLAN_FLTR(struct mlx4_dev *dev, struct mlx4_en_priv *priv); + +void mlx4_en_fold_software_stats(struct net_device *dev); +int mlx4_en_DUMP_ETH_STATS(struct mlx4_en_dev *mdev, u8 port, u8 reset); +int mlx4_en_QUERY_PORT(struct mlx4_en_dev *mdev, u8 port); + +#ifdef CONFIG_MLX4_EN_DCB +extern const struct dcbnl_rtnl_ops mlx4_en_dcbnl_ops; +extern const struct dcbnl_rtnl_ops mlx4_en_dcbnl_pfc_ops; +#endif + +int mlx4_en_setup_tc(struct net_device *dev, u8 up); +int mlx4_en_alloc_tx_queue_per_tc(struct net_device *dev, u8 tc); + +#ifdef CONFIG_RFS_ACCEL +void mlx4_en_cleanup_filters(struct mlx4_en_priv *priv); +#endif + +#define MLX4_EN_NUM_SELF_TEST 5 +void mlx4_en_ex_selftest(struct net_device *dev, u32 *flags, u64 *buf); +void mlx4_en_ptp_overflow_check(struct mlx4_en_dev *mdev); + +#define DEV_FEATURE_CHANGED(dev, new_features, feature) \ + ((dev->features & feature) ^ (new_features & feature)) + +int mlx4_en_moderation_update(struct mlx4_en_priv *priv); +int mlx4_en_reset_config(struct net_device *dev, + struct hwtstamp_config ts_config, + netdev_features_t new_features); +void mlx4_en_update_pfc_stats_bitmap(struct mlx4_dev *dev, + struct mlx4_en_stats_bitmap *stats_bitmap, + u8 rx_ppp, u8 rx_pause, + u8 tx_ppp, u8 tx_pause); +int mlx4_en_netdev_event(struct notifier_block *this, + unsigned long event, void *ptr); + +/* + * Functions for time stamping + */ +u64 mlx4_en_get_cqe_ts(struct mlx4_cqe *cqe); +void mlx4_en_fill_hwtstamps(struct mlx4_en_dev *mdev, + struct skb_shared_hwtstamps *hwts, + u64 timestamp); +void mlx4_en_init_timestamp(struct mlx4_en_dev *mdev); +void mlx4_en_remove_timestamp(struct mlx4_en_dev *mdev); + +/* Globals + */ +extern const struct ethtool_ops mlx4_en_ethtool_ops; + + + +/* + * printk / logging functions + */ + +__printf(3, 4) +void en_print(const char *level, const struct mlx4_en_priv *priv, + const char *format, ...); + +#define en_dbg(mlevel, priv, format, ...) \ +do { \ + if (NETIF_MSG_##mlevel & (priv)->msg_enable) \ + en_print(KERN_DEBUG, priv, format, ##__VA_ARGS__); \ +} while (0) +#define en_warn(priv, format, ...) \ + en_print(KERN_WARNING, priv, format, ##__VA_ARGS__) +#define en_err(priv, format, ...) \ + en_print(KERN_ERR, priv, format, ##__VA_ARGS__) +#define en_info(priv, format, ...) \ + en_print(KERN_INFO, priv, format, ##__VA_ARGS__) + +#define mlx4_err(mdev, format, ...) \ + pr_err(DRV_NAME " %s: " format, \ + dev_name(&(mdev)->pdev->dev), ##__VA_ARGS__) +#define mlx4_info(mdev, format, ...) \ + pr_info(DRV_NAME " %s: " format, \ + dev_name(&(mdev)->pdev->dev), ##__VA_ARGS__) +#define mlx4_warn(mdev, format, ...) \ + pr_warn(DRV_NAME " %s: " format, \ + dev_name(&(mdev)->pdev->dev), ##__VA_ARGS__) + +#endif diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_stats.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_stats.h new file mode 100644 index 000000000..e9cd4bb6f --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_stats.h @@ -0,0 +1,130 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _MLX4_STATS_ +#define _MLX4_STATS_ + +#define NUM_PRIORITIES 9 +#define NUM_PRIORITY_STATS 2 + +struct mlx4_en_pkt_stats { + unsigned long rx_multicast_packets; + unsigned long rx_broadcast_packets; + unsigned long rx_jabbers; + unsigned long rx_in_range_length_error; + unsigned long rx_out_range_length_error; + unsigned long tx_multicast_packets; + unsigned long tx_broadcast_packets; + unsigned long rx_prio[NUM_PRIORITIES][NUM_PRIORITY_STATS]; + unsigned long tx_prio[NUM_PRIORITIES][NUM_PRIORITY_STATS]; +#define NUM_PKT_STATS 43 +}; + +struct mlx4_en_counter_stats { + unsigned long rx_packets; + unsigned long rx_bytes; + unsigned long tx_packets; + unsigned long tx_bytes; +#define NUM_PF_STATS 4 +}; + +struct mlx4_en_port_stats { + unsigned long tso_packets; + unsigned long xmit_more; + unsigned long queue_stopped; + unsigned long wake_queue; + unsigned long tx_timeout; + unsigned long rx_alloc_pages; + unsigned long rx_chksum_good; + unsigned long rx_chksum_none; + unsigned long rx_chksum_complete; + unsigned long tx_chksum_offload; +#define NUM_PORT_STATS 10 +}; + +struct mlx4_en_xdp_stats { + unsigned long rx_xdp_drop; + unsigned long rx_xdp_redirect; + unsigned long rx_xdp_redirect_fail; + unsigned long rx_xdp_tx; + unsigned long rx_xdp_tx_full; +#define NUM_XDP_STATS 5 +}; + +struct mlx4_en_phy_stats { + unsigned long rx_packets_phy; + unsigned long rx_bytes_phy; + unsigned long tx_packets_phy; + unsigned long tx_bytes_phy; +#define NUM_PHY_STATS 4 +}; + +#define NUM_MAIN_STATS 21 + +#define MLX4_NUM_PRIORITIES 8 + +struct mlx4_en_flow_stats_rx { + u64 rx_pause; + u64 rx_pause_duration; + u64 rx_pause_transition; +#define NUM_FLOW_STATS_RX 3 +#define NUM_FLOW_PRIORITY_STATS_RX (NUM_FLOW_STATS_RX * \ + MLX4_NUM_PRIORITIES) +}; + +#define FLOW_PRIORITY_STATS_IDX_RX_FRAMES (NUM_MAIN_STATS + \ + NUM_PORT_STATS + \ + NUM_PF_STATS + \ + NUM_FLOW_PRIORITY_STATS_RX) + +struct mlx4_en_flow_stats_tx { + u64 tx_pause; + u64 tx_pause_duration; + u64 tx_pause_transition; +#define NUM_FLOW_STATS_TX 3 +#define NUM_FLOW_PRIORITY_STATS_TX (NUM_FLOW_STATS_TX * \ + MLX4_NUM_PRIORITIES) +}; + +#define FLOW_PRIORITY_STATS_IDX_TX_FRAMES (NUM_MAIN_STATS + \ + NUM_PORT_STATS + \ + NUM_PF_STATS + \ + NUM_FLOW_PRIORITY_STATS_RX + \ + NUM_FLOW_STATS_RX + \ + NUM_FLOW_PRIORITY_STATS_TX) + +#define NUM_FLOW_STATS (NUM_FLOW_STATS_RX + NUM_FLOW_STATS_TX + \ + NUM_FLOW_PRIORITY_STATS_TX + \ + NUM_FLOW_PRIORITY_STATS_RX) + +struct mlx4_en_stat_out_flow_control_mbox { + /* Total number of PAUSE frames received from the far-end port */ + __be64 rx_pause; + /* Total number of microseconds that far-end port requested to pause + * transmission of packets + */ + __be64 rx_pause_duration; + /* Number of received transmission from XOFF state to XON state */ + __be64 rx_pause_transition; + /* Total number of PAUSE frames sent from the far-end port */ + __be64 tx_pause; + /* Total time in microseconds that transmission of packets has been + * paused + */ + __be64 tx_pause_duration; + /* Number of transmitter transitions from XOFF state to XON state */ + __be64 tx_pause_transition; + /* Reserverd */ + __be64 reserved[2]; +}; + +enum { + MLX4_DUMP_ETH_STATS_FLOW_CONTROL = 1 << 12 +}; + +#define NUM_ALL_STATS (NUM_MAIN_STATS + NUM_PORT_STATS + NUM_PKT_STATS + \ + NUM_FLOW_STATS + NUM_PF_STATS + \ + NUM_XDP_STATS + NUM_PHY_STATS) + +#define MLX4_FIND_NETDEV_STAT(n) (offsetof(struct net_device_stats, n) / \ + sizeof(((struct net_device_stats *)0)->n)) + +#endif diff --git a/drivers/net/ethernet/mellanox/mlx4/mr.c b/drivers/net/ethernet/mellanox/mlx4/mr.c new file mode 100644 index 000000000..d7444782b --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx4/mr.c @@ -0,0 +1,974 @@ +/* + * Copyright (c) 2004 Topspin Communications. All rights reserved. + * Copyright (c) 2005, 2006, 2007, 2008 Mellanox Technologies. All rights reserved. + * Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/errno.h> +#include <linux/export.h> +#include <linux/slab.h> +#include <linux/kernel.h> +#include <linux/vmalloc.h> + +#include <linux/mlx4/cmd.h> + +#include "mlx4.h" +#include "icm.h" + +static u32 mlx4_buddy_alloc(struct mlx4_buddy *buddy, int order) +{ + int o; + int m; + u32 seg; + + spin_lock(&buddy->lock); + + for (o = order; o <= buddy->max_order; ++o) + if (buddy->num_free[o]) { + m = 1 << (buddy->max_order - o); + seg = find_first_bit(buddy->bits[o], m); + if (seg < m) + goto found; + } + + spin_unlock(&buddy->lock); + return -1; + + found: + clear_bit(seg, buddy->bits[o]); + --buddy->num_free[o]; + + while (o > order) { + --o; + seg <<= 1; + set_bit(seg ^ 1, buddy->bits[o]); + ++buddy->num_free[o]; + } + + spin_unlock(&buddy->lock); + + seg <<= order; + + return seg; +} + +static void mlx4_buddy_free(struct mlx4_buddy *buddy, u32 seg, int order) +{ + seg >>= order; + + spin_lock(&buddy->lock); + + while (test_bit(seg ^ 1, buddy->bits[order])) { + clear_bit(seg ^ 1, buddy->bits[order]); + --buddy->num_free[order]; + seg >>= 1; + ++order; + } + + set_bit(seg, buddy->bits[order]); + ++buddy->num_free[order]; + + spin_unlock(&buddy->lock); +} + +static int mlx4_buddy_init(struct mlx4_buddy *buddy, int max_order) +{ + int i, s; + + buddy->max_order = max_order; + spin_lock_init(&buddy->lock); + + buddy->bits = kcalloc(buddy->max_order + 1, sizeof(long *), + GFP_KERNEL); + buddy->num_free = kcalloc(buddy->max_order + 1, sizeof(*buddy->num_free), + GFP_KERNEL); + if (!buddy->bits || !buddy->num_free) + goto err_out; + + for (i = 0; i <= buddy->max_order; ++i) { + s = BITS_TO_LONGS(1UL << (buddy->max_order - i)); + buddy->bits[i] = kvmalloc_array(s, sizeof(long), GFP_KERNEL | __GFP_ZERO); + if (!buddy->bits[i]) + goto err_out_free; + } + + set_bit(0, buddy->bits[buddy->max_order]); + buddy->num_free[buddy->max_order] = 1; + + return 0; + +err_out_free: + for (i = 0; i <= buddy->max_order; ++i) + kvfree(buddy->bits[i]); + +err_out: + kfree(buddy->bits); + kfree(buddy->num_free); + + return -ENOMEM; +} + +static void mlx4_buddy_cleanup(struct mlx4_buddy *buddy) +{ + int i; + + for (i = 0; i <= buddy->max_order; ++i) + kvfree(buddy->bits[i]); + + kfree(buddy->bits); + kfree(buddy->num_free); +} + +u32 __mlx4_alloc_mtt_range(struct mlx4_dev *dev, int order) +{ + struct mlx4_mr_table *mr_table = &mlx4_priv(dev)->mr_table; + u32 seg; + int seg_order; + u32 offset; + + seg_order = max_t(int, order - log_mtts_per_seg, 0); + + seg = mlx4_buddy_alloc(&mr_table->mtt_buddy, seg_order); + if (seg == -1) + return -1; + + offset = seg * (1 << log_mtts_per_seg); + + if (mlx4_table_get_range(dev, &mr_table->mtt_table, offset, + offset + (1 << order) - 1)) { + mlx4_buddy_free(&mr_table->mtt_buddy, seg, seg_order); + return -1; + } + + return offset; +} + +static u32 mlx4_alloc_mtt_range(struct mlx4_dev *dev, int order) +{ + u64 in_param = 0; + u64 out_param; + int err; + + if (mlx4_is_mfunc(dev)) { + set_param_l(&in_param, order); + err = mlx4_cmd_imm(dev, in_param, &out_param, RES_MTT, + RES_OP_RESERVE_AND_MAP, + MLX4_CMD_ALLOC_RES, + MLX4_CMD_TIME_CLASS_A, + MLX4_CMD_WRAPPED); + if (err) + return -1; + return get_param_l(&out_param); + } + return __mlx4_alloc_mtt_range(dev, order); +} + +int mlx4_mtt_init(struct mlx4_dev *dev, int npages, int page_shift, + struct mlx4_mtt *mtt) +{ + int i; + + if (!npages) { + mtt->order = -1; + mtt->page_shift = MLX4_ICM_PAGE_SHIFT; + return 0; + } else + mtt->page_shift = page_shift; + + for (mtt->order = 0, i = 1; i < npages; i <<= 1) + ++mtt->order; + + mtt->offset = mlx4_alloc_mtt_range(dev, mtt->order); + if (mtt->offset == -1) + return -ENOMEM; + + return 0; +} +EXPORT_SYMBOL_GPL(mlx4_mtt_init); + +void __mlx4_free_mtt_range(struct mlx4_dev *dev, u32 offset, int order) +{ + u32 first_seg; + int seg_order; + struct mlx4_mr_table *mr_table = &mlx4_priv(dev)->mr_table; + + seg_order = max_t(int, order - log_mtts_per_seg, 0); + first_seg = offset / (1 << log_mtts_per_seg); + + mlx4_buddy_free(&mr_table->mtt_buddy, first_seg, seg_order); + mlx4_table_put_range(dev, &mr_table->mtt_table, offset, + offset + (1 << order) - 1); +} + +static void mlx4_free_mtt_range(struct mlx4_dev *dev, u32 offset, int order) +{ + u64 in_param = 0; + int err; + + if (mlx4_is_mfunc(dev)) { + set_param_l(&in_param, offset); + set_param_h(&in_param, order); + err = mlx4_cmd(dev, in_param, RES_MTT, RES_OP_RESERVE_AND_MAP, + MLX4_CMD_FREE_RES, + MLX4_CMD_TIME_CLASS_A, + MLX4_CMD_WRAPPED); + if (err) + mlx4_warn(dev, "Failed to free mtt range at:%d order:%d\n", + offset, order); + return; + } + __mlx4_free_mtt_range(dev, offset, order); +} + +void mlx4_mtt_cleanup(struct mlx4_dev *dev, struct mlx4_mtt *mtt) +{ + if (mtt->order < 0) + return; + + mlx4_free_mtt_range(dev, mtt->offset, mtt->order); +} +EXPORT_SYMBOL_GPL(mlx4_mtt_cleanup); + +u64 mlx4_mtt_addr(struct mlx4_dev *dev, struct mlx4_mtt *mtt) +{ + return (u64) mtt->offset * dev->caps.mtt_entry_sz; +} +EXPORT_SYMBOL_GPL(mlx4_mtt_addr); + +static u32 hw_index_to_key(u32 ind) +{ + return (ind >> 24) | (ind << 8); +} + +static u32 key_to_hw_index(u32 key) +{ + return (key << 24) | (key >> 8); +} + +static int mlx4_SW2HW_MPT(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox, + int mpt_index) +{ + return mlx4_cmd(dev, mailbox->dma, mpt_index, + 0, MLX4_CMD_SW2HW_MPT, MLX4_CMD_TIME_CLASS_B, + MLX4_CMD_WRAPPED); +} + +static int mlx4_HW2SW_MPT(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox, + int mpt_index) +{ + return mlx4_cmd_box(dev, 0, mailbox ? mailbox->dma : 0, mpt_index, + !mailbox, MLX4_CMD_HW2SW_MPT, + MLX4_CMD_TIME_CLASS_B, MLX4_CMD_WRAPPED); +} + +/* Must protect against concurrent access */ +int mlx4_mr_hw_get_mpt(struct mlx4_dev *dev, struct mlx4_mr *mmr, + struct mlx4_mpt_entry ***mpt_entry) +{ + int err; + int key = key_to_hw_index(mmr->key) & (dev->caps.num_mpts - 1); + struct mlx4_cmd_mailbox *mailbox = NULL; + + if (mmr->enabled != MLX4_MPT_EN_HW) + return -EINVAL; + + err = mlx4_HW2SW_MPT(dev, NULL, key); + if (err) { + mlx4_warn(dev, "HW2SW_MPT failed (%d).", err); + mlx4_warn(dev, "Most likely the MR has MWs bound to it.\n"); + return err; + } + + mmr->enabled = MLX4_MPT_EN_SW; + + if (!mlx4_is_mfunc(dev)) { + **mpt_entry = mlx4_table_find( + &mlx4_priv(dev)->mr_table.dmpt_table, + key, NULL); + } else { + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + + err = mlx4_cmd_box(dev, 0, mailbox->dma, key, + 0, MLX4_CMD_QUERY_MPT, + MLX4_CMD_TIME_CLASS_B, + MLX4_CMD_WRAPPED); + if (err) + goto free_mailbox; + + *mpt_entry = (struct mlx4_mpt_entry **)&mailbox->buf; + } + + if (!(*mpt_entry) || !(**mpt_entry)) { + err = -ENOMEM; + goto free_mailbox; + } + + return 0; + +free_mailbox: + mlx4_free_cmd_mailbox(dev, mailbox); + return err; +} +EXPORT_SYMBOL_GPL(mlx4_mr_hw_get_mpt); + +int mlx4_mr_hw_write_mpt(struct mlx4_dev *dev, struct mlx4_mr *mmr, + struct mlx4_mpt_entry **mpt_entry) +{ + int err; + + if (!mlx4_is_mfunc(dev)) { + /* Make sure any changes to this entry are flushed */ + wmb(); + + *(u8 *)(*mpt_entry) = MLX4_MPT_STATUS_HW; + + /* Make sure the new status is written */ + wmb(); + + err = mlx4_SYNC_TPT(dev); + } else { + int key = key_to_hw_index(mmr->key) & (dev->caps.num_mpts - 1); + + struct mlx4_cmd_mailbox *mailbox = + container_of((void *)mpt_entry, struct mlx4_cmd_mailbox, + buf); + + (*mpt_entry)->lkey = 0; + err = mlx4_SW2HW_MPT(dev, mailbox, key); + } + + if (!err) { + mmr->pd = be32_to_cpu((*mpt_entry)->pd_flags) & MLX4_MPT_PD_MASK; + mmr->enabled = MLX4_MPT_EN_HW; + } + return err; +} +EXPORT_SYMBOL_GPL(mlx4_mr_hw_write_mpt); + +void mlx4_mr_hw_put_mpt(struct mlx4_dev *dev, + struct mlx4_mpt_entry **mpt_entry) +{ + if (mlx4_is_mfunc(dev)) { + struct mlx4_cmd_mailbox *mailbox = + container_of((void *)mpt_entry, struct mlx4_cmd_mailbox, + buf); + mlx4_free_cmd_mailbox(dev, mailbox); + } +} +EXPORT_SYMBOL_GPL(mlx4_mr_hw_put_mpt); + +int mlx4_mr_hw_change_pd(struct mlx4_dev *dev, struct mlx4_mpt_entry *mpt_entry, + u32 pdn) +{ + u32 pd_flags = be32_to_cpu(mpt_entry->pd_flags) & ~MLX4_MPT_PD_MASK; + /* The wrapper function will put the slave's id here */ + if (mlx4_is_mfunc(dev)) + pd_flags &= ~MLX4_MPT_PD_VF_MASK; + + mpt_entry->pd_flags = cpu_to_be32(pd_flags | + (pdn & MLX4_MPT_PD_MASK) + | MLX4_MPT_PD_FLAG_EN_INV); + return 0; +} +EXPORT_SYMBOL_GPL(mlx4_mr_hw_change_pd); + +int mlx4_mr_hw_change_access(struct mlx4_dev *dev, + struct mlx4_mpt_entry *mpt_entry, + u32 access) +{ + u32 flags = (be32_to_cpu(mpt_entry->flags) & ~MLX4_PERM_MASK) | + (access & MLX4_PERM_MASK); + + mpt_entry->flags = cpu_to_be32(flags); + return 0; +} +EXPORT_SYMBOL_GPL(mlx4_mr_hw_change_access); + +static int mlx4_mr_alloc_reserved(struct mlx4_dev *dev, u32 mridx, u32 pd, + u64 iova, u64 size, u32 access, int npages, + int page_shift, struct mlx4_mr *mr) +{ + mr->iova = iova; + mr->size = size; + mr->pd = pd; + mr->access = access; + mr->enabled = MLX4_MPT_DISABLED; + mr->key = hw_index_to_key(mridx); + + return mlx4_mtt_init(dev, npages, page_shift, &mr->mtt); +} + +static int mlx4_WRITE_MTT(struct mlx4_dev *dev, + struct mlx4_cmd_mailbox *mailbox, + int num_entries) +{ + return mlx4_cmd(dev, mailbox->dma, num_entries, 0, MLX4_CMD_WRITE_MTT, + MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); +} + +int __mlx4_mpt_reserve(struct mlx4_dev *dev) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + + return mlx4_bitmap_alloc(&priv->mr_table.mpt_bitmap); +} + +static int mlx4_mpt_reserve(struct mlx4_dev *dev) +{ + u64 out_param; + + if (mlx4_is_mfunc(dev)) { + if (mlx4_cmd_imm(dev, 0, &out_param, RES_MPT, RES_OP_RESERVE, + MLX4_CMD_ALLOC_RES, + MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED)) + return -1; + return get_param_l(&out_param); + } + return __mlx4_mpt_reserve(dev); +} + +void __mlx4_mpt_release(struct mlx4_dev *dev, u32 index) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + + mlx4_bitmap_free(&priv->mr_table.mpt_bitmap, index, MLX4_NO_RR); +} + +static void mlx4_mpt_release(struct mlx4_dev *dev, u32 index) +{ + u64 in_param = 0; + + if (mlx4_is_mfunc(dev)) { + set_param_l(&in_param, index); + if (mlx4_cmd(dev, in_param, RES_MPT, RES_OP_RESERVE, + MLX4_CMD_FREE_RES, + MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED)) + mlx4_warn(dev, "Failed to release mr index:%d\n", + index); + return; + } + __mlx4_mpt_release(dev, index); +} + +int __mlx4_mpt_alloc_icm(struct mlx4_dev *dev, u32 index) +{ + struct mlx4_mr_table *mr_table = &mlx4_priv(dev)->mr_table; + + return mlx4_table_get(dev, &mr_table->dmpt_table, index); +} + +static int mlx4_mpt_alloc_icm(struct mlx4_dev *dev, u32 index) +{ + u64 param = 0; + + if (mlx4_is_mfunc(dev)) { + set_param_l(¶m, index); + return mlx4_cmd_imm(dev, param, ¶m, RES_MPT, RES_OP_MAP_ICM, + MLX4_CMD_ALLOC_RES, + MLX4_CMD_TIME_CLASS_A, + MLX4_CMD_WRAPPED); + } + return __mlx4_mpt_alloc_icm(dev, index); +} + +void __mlx4_mpt_free_icm(struct mlx4_dev *dev, u32 index) +{ + struct mlx4_mr_table *mr_table = &mlx4_priv(dev)->mr_table; + + mlx4_table_put(dev, &mr_table->dmpt_table, index); +} + +static void mlx4_mpt_free_icm(struct mlx4_dev *dev, u32 index) +{ + u64 in_param = 0; + + if (mlx4_is_mfunc(dev)) { + set_param_l(&in_param, index); + if (mlx4_cmd(dev, in_param, RES_MPT, RES_OP_MAP_ICM, + MLX4_CMD_FREE_RES, MLX4_CMD_TIME_CLASS_A, + MLX4_CMD_WRAPPED)) + mlx4_warn(dev, "Failed to free icm of mr index:%d\n", + index); + return; + } + return __mlx4_mpt_free_icm(dev, index); +} + +int mlx4_mr_alloc(struct mlx4_dev *dev, u32 pd, u64 iova, u64 size, u32 access, + int npages, int page_shift, struct mlx4_mr *mr) +{ + u32 index; + int err; + + index = mlx4_mpt_reserve(dev); + if (index == -1) + return -ENOMEM; + + err = mlx4_mr_alloc_reserved(dev, index, pd, iova, size, + access, npages, page_shift, mr); + if (err) + mlx4_mpt_release(dev, index); + + return err; +} +EXPORT_SYMBOL_GPL(mlx4_mr_alloc); + +static int mlx4_mr_free_reserved(struct mlx4_dev *dev, struct mlx4_mr *mr) +{ + int err; + + if (mr->enabled == MLX4_MPT_EN_HW) { + err = mlx4_HW2SW_MPT(dev, NULL, + key_to_hw_index(mr->key) & + (dev->caps.num_mpts - 1)); + if (err) { + mlx4_warn(dev, "HW2SW_MPT failed (%d), MR has MWs bound to it\n", + err); + return err; + } + + mr->enabled = MLX4_MPT_EN_SW; + } + mlx4_mtt_cleanup(dev, &mr->mtt); + + return 0; +} + +int mlx4_mr_free(struct mlx4_dev *dev, struct mlx4_mr *mr) +{ + int ret; + + ret = mlx4_mr_free_reserved(dev, mr); + if (ret) + return ret; + if (mr->enabled) + mlx4_mpt_free_icm(dev, key_to_hw_index(mr->key)); + mlx4_mpt_release(dev, key_to_hw_index(mr->key)); + + return 0; +} +EXPORT_SYMBOL_GPL(mlx4_mr_free); + +void mlx4_mr_rereg_mem_cleanup(struct mlx4_dev *dev, struct mlx4_mr *mr) +{ + mlx4_mtt_cleanup(dev, &mr->mtt); + mr->mtt.order = -1; +} +EXPORT_SYMBOL_GPL(mlx4_mr_rereg_mem_cleanup); + +int mlx4_mr_rereg_mem_write(struct mlx4_dev *dev, struct mlx4_mr *mr, + u64 iova, u64 size, int npages, + int page_shift, struct mlx4_mpt_entry *mpt_entry) +{ + int err; + + err = mlx4_mtt_init(dev, npages, page_shift, &mr->mtt); + if (err) + return err; + + mpt_entry->start = cpu_to_be64(iova); + mpt_entry->length = cpu_to_be64(size); + mpt_entry->entity_size = cpu_to_be32(page_shift); + mpt_entry->flags &= ~(cpu_to_be32(MLX4_MPT_FLAG_FREE | + MLX4_MPT_FLAG_SW_OWNS)); + if (mr->mtt.order < 0) { + mpt_entry->flags |= cpu_to_be32(MLX4_MPT_FLAG_PHYSICAL); + mpt_entry->mtt_addr = 0; + } else { + mpt_entry->mtt_addr = cpu_to_be64(mlx4_mtt_addr(dev, + &mr->mtt)); + if (mr->mtt.page_shift == 0) + mpt_entry->mtt_sz = cpu_to_be32(1 << mr->mtt.order); + } + if (mr->mtt.order >= 0 && mr->mtt.page_shift == 0) { + /* fast register MR in free state */ + mpt_entry->flags |= cpu_to_be32(MLX4_MPT_FLAG_FREE); + mpt_entry->pd_flags |= cpu_to_be32(MLX4_MPT_PD_FLAG_FAST_REG | + MLX4_MPT_PD_FLAG_RAE); + } else { + mpt_entry->flags |= cpu_to_be32(MLX4_MPT_FLAG_SW_OWNS); + } + mr->enabled = MLX4_MPT_EN_SW; + + return 0; +} +EXPORT_SYMBOL_GPL(mlx4_mr_rereg_mem_write); + +int mlx4_mr_enable(struct mlx4_dev *dev, struct mlx4_mr *mr) +{ + struct mlx4_cmd_mailbox *mailbox; + struct mlx4_mpt_entry *mpt_entry; + int err; + + err = mlx4_mpt_alloc_icm(dev, key_to_hw_index(mr->key)); + if (err) + return err; + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) { + err = PTR_ERR(mailbox); + goto err_table; + } + mpt_entry = mailbox->buf; + mpt_entry->flags = cpu_to_be32(MLX4_MPT_FLAG_MIO | + MLX4_MPT_FLAG_REGION | + mr->access); + + mpt_entry->key = cpu_to_be32(key_to_hw_index(mr->key)); + mpt_entry->pd_flags = cpu_to_be32(mr->pd | MLX4_MPT_PD_FLAG_EN_INV); + mpt_entry->start = cpu_to_be64(mr->iova); + mpt_entry->length = cpu_to_be64(mr->size); + mpt_entry->entity_size = cpu_to_be32(mr->mtt.page_shift); + + if (mr->mtt.order < 0) { + mpt_entry->flags |= cpu_to_be32(MLX4_MPT_FLAG_PHYSICAL); + mpt_entry->mtt_addr = 0; + } else { + mpt_entry->mtt_addr = cpu_to_be64(mlx4_mtt_addr(dev, + &mr->mtt)); + } + + if (mr->mtt.order >= 0 && mr->mtt.page_shift == 0) { + /* fast register MR in free state */ + mpt_entry->flags |= cpu_to_be32(MLX4_MPT_FLAG_FREE); + mpt_entry->pd_flags |= cpu_to_be32(MLX4_MPT_PD_FLAG_FAST_REG | + MLX4_MPT_PD_FLAG_RAE); + mpt_entry->mtt_sz = cpu_to_be32(1 << mr->mtt.order); + } else { + mpt_entry->flags |= cpu_to_be32(MLX4_MPT_FLAG_SW_OWNS); + } + + err = mlx4_SW2HW_MPT(dev, mailbox, + key_to_hw_index(mr->key) & (dev->caps.num_mpts - 1)); + if (err) { + mlx4_warn(dev, "SW2HW_MPT failed (%d)\n", err); + goto err_cmd; + } + mr->enabled = MLX4_MPT_EN_HW; + + mlx4_free_cmd_mailbox(dev, mailbox); + + return 0; + +err_cmd: + mlx4_free_cmd_mailbox(dev, mailbox); + +err_table: + mlx4_mpt_free_icm(dev, key_to_hw_index(mr->key)); + return err; +} +EXPORT_SYMBOL_GPL(mlx4_mr_enable); + +static int mlx4_write_mtt_chunk(struct mlx4_dev *dev, struct mlx4_mtt *mtt, + int start_index, int npages, u64 *page_list) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + __be64 *mtts; + dma_addr_t dma_handle; + int i; + + mtts = mlx4_table_find(&priv->mr_table.mtt_table, mtt->offset + + start_index, &dma_handle); + + if (!mtts) + return -ENOMEM; + + dma_sync_single_for_cpu(&dev->persist->pdev->dev, dma_handle, + npages * sizeof(u64), DMA_TO_DEVICE); + + for (i = 0; i < npages; ++i) + mtts[i] = cpu_to_be64(page_list[i] | MLX4_MTT_FLAG_PRESENT); + + dma_sync_single_for_device(&dev->persist->pdev->dev, dma_handle, + npages * sizeof(u64), DMA_TO_DEVICE); + + return 0; +} + +int __mlx4_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt, + int start_index, int npages, u64 *page_list) +{ + int err = 0; + int chunk; + int mtts_per_page; + int max_mtts_first_page; + + /* compute how may mtts fit in the first page */ + mtts_per_page = PAGE_SIZE / sizeof(u64); + max_mtts_first_page = mtts_per_page - (mtt->offset + start_index) + % mtts_per_page; + + chunk = min_t(int, max_mtts_first_page, npages); + + while (npages > 0) { + err = mlx4_write_mtt_chunk(dev, mtt, start_index, chunk, page_list); + if (err) + return err; + npages -= chunk; + start_index += chunk; + page_list += chunk; + + chunk = min_t(int, mtts_per_page, npages); + } + return err; +} + +int mlx4_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt, + int start_index, int npages, u64 *page_list) +{ + struct mlx4_cmd_mailbox *mailbox = NULL; + __be64 *inbox = NULL; + int chunk; + int err = 0; + int i; + + if (mtt->order < 0) + return -EINVAL; + + if (mlx4_is_mfunc(dev)) { + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + inbox = mailbox->buf; + + while (npages > 0) { + chunk = min_t(int, MLX4_MAILBOX_SIZE / sizeof(u64) - 2, + npages); + inbox[0] = cpu_to_be64(mtt->offset + start_index); + inbox[1] = 0; + for (i = 0; i < chunk; ++i) + inbox[i + 2] = cpu_to_be64(page_list[i] | + MLX4_MTT_FLAG_PRESENT); + err = mlx4_WRITE_MTT(dev, mailbox, chunk); + if (err) { + mlx4_free_cmd_mailbox(dev, mailbox); + return err; + } + + npages -= chunk; + start_index += chunk; + page_list += chunk; + } + mlx4_free_cmd_mailbox(dev, mailbox); + return err; + } + + return __mlx4_write_mtt(dev, mtt, start_index, npages, page_list); +} +EXPORT_SYMBOL_GPL(mlx4_write_mtt); + +int mlx4_buf_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt, + struct mlx4_buf *buf) +{ + u64 *page_list; + int err; + int i; + + page_list = kcalloc(buf->npages, sizeof(*page_list), GFP_KERNEL); + if (!page_list) + return -ENOMEM; + + for (i = 0; i < buf->npages; ++i) + if (buf->nbufs == 1) + page_list[i] = buf->direct.map + (i << buf->page_shift); + else + page_list[i] = buf->page_list[i].map; + + err = mlx4_write_mtt(dev, mtt, 0, buf->npages, page_list); + + kfree(page_list); + return err; +} +EXPORT_SYMBOL_GPL(mlx4_buf_write_mtt); + +int mlx4_mw_alloc(struct mlx4_dev *dev, u32 pd, enum mlx4_mw_type type, + struct mlx4_mw *mw) +{ + u32 index; + + if ((type == MLX4_MW_TYPE_1 && + !(dev->caps.flags & MLX4_DEV_CAP_FLAG_MEM_WINDOW)) || + (type == MLX4_MW_TYPE_2 && + !(dev->caps.bmme_flags & MLX4_BMME_FLAG_TYPE_2_WIN))) + return -EOPNOTSUPP; + + index = mlx4_mpt_reserve(dev); + if (index == -1) + return -ENOMEM; + + mw->key = hw_index_to_key(index); + mw->pd = pd; + mw->type = type; + mw->enabled = MLX4_MPT_DISABLED; + + return 0; +} +EXPORT_SYMBOL_GPL(mlx4_mw_alloc); + +int mlx4_mw_enable(struct mlx4_dev *dev, struct mlx4_mw *mw) +{ + struct mlx4_cmd_mailbox *mailbox; + struct mlx4_mpt_entry *mpt_entry; + int err; + + err = mlx4_mpt_alloc_icm(dev, key_to_hw_index(mw->key)); + if (err) + return err; + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) { + err = PTR_ERR(mailbox); + goto err_table; + } + mpt_entry = mailbox->buf; + + /* Note that the MLX4_MPT_FLAG_REGION bit in mpt_entry->flags is turned + * off, thus creating a memory window and not a memory region. + */ + mpt_entry->key = cpu_to_be32(key_to_hw_index(mw->key)); + mpt_entry->pd_flags = cpu_to_be32(mw->pd); + if (mw->type == MLX4_MW_TYPE_2) { + mpt_entry->flags |= cpu_to_be32(MLX4_MPT_FLAG_FREE); + mpt_entry->qpn = cpu_to_be32(MLX4_MPT_QP_FLAG_BOUND_QP); + mpt_entry->pd_flags |= cpu_to_be32(MLX4_MPT_PD_FLAG_EN_INV); + } + + err = mlx4_SW2HW_MPT(dev, mailbox, + key_to_hw_index(mw->key) & + (dev->caps.num_mpts - 1)); + if (err) { + mlx4_warn(dev, "SW2HW_MPT failed (%d)\n", err); + goto err_cmd; + } + mw->enabled = MLX4_MPT_EN_HW; + + mlx4_free_cmd_mailbox(dev, mailbox); + + return 0; + +err_cmd: + mlx4_free_cmd_mailbox(dev, mailbox); + +err_table: + mlx4_mpt_free_icm(dev, key_to_hw_index(mw->key)); + return err; +} +EXPORT_SYMBOL_GPL(mlx4_mw_enable); + +void mlx4_mw_free(struct mlx4_dev *dev, struct mlx4_mw *mw) +{ + int err; + + if (mw->enabled == MLX4_MPT_EN_HW) { + err = mlx4_HW2SW_MPT(dev, NULL, + key_to_hw_index(mw->key) & + (dev->caps.num_mpts - 1)); + if (err) + mlx4_warn(dev, "xxx HW2SW_MPT failed (%d)\n", err); + + mw->enabled = MLX4_MPT_EN_SW; + } + if (mw->enabled) + mlx4_mpt_free_icm(dev, key_to_hw_index(mw->key)); + mlx4_mpt_release(dev, key_to_hw_index(mw->key)); +} +EXPORT_SYMBOL_GPL(mlx4_mw_free); + +int mlx4_init_mr_table(struct mlx4_dev *dev) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_mr_table *mr_table = &priv->mr_table; + int err; + + /* Nothing to do for slaves - all MR handling is forwarded + * to the master */ + if (mlx4_is_slave(dev)) + return 0; + + if (!is_power_of_2(dev->caps.num_mpts)) + return -EINVAL; + + err = mlx4_bitmap_init(&mr_table->mpt_bitmap, dev->caps.num_mpts, + ~0, dev->caps.reserved_mrws, 0); + if (err) + return err; + + err = mlx4_buddy_init(&mr_table->mtt_buddy, + ilog2((u32)dev->caps.num_mtts / + (1 << log_mtts_per_seg))); + if (err) + goto err_buddy; + + if (dev->caps.reserved_mtts) { + priv->reserved_mtts = + mlx4_alloc_mtt_range(dev, + fls(dev->caps.reserved_mtts - 1)); + if (priv->reserved_mtts < 0) { + mlx4_warn(dev, "MTT table of order %u is too small\n", + mr_table->mtt_buddy.max_order); + err = -ENOMEM; + goto err_reserve_mtts; + } + } + + return 0; + +err_reserve_mtts: + mlx4_buddy_cleanup(&mr_table->mtt_buddy); + +err_buddy: + mlx4_bitmap_cleanup(&mr_table->mpt_bitmap); + + return err; +} + +void mlx4_cleanup_mr_table(struct mlx4_dev *dev) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_mr_table *mr_table = &priv->mr_table; + + if (mlx4_is_slave(dev)) + return; + if (priv->reserved_mtts >= 0) + mlx4_free_mtt_range(dev, priv->reserved_mtts, + fls(dev->caps.reserved_mtts - 1)); + mlx4_buddy_cleanup(&mr_table->mtt_buddy); + mlx4_bitmap_cleanup(&mr_table->mpt_bitmap); +} + +int mlx4_SYNC_TPT(struct mlx4_dev *dev) +{ + return mlx4_cmd(dev, 0, 0, 0, MLX4_CMD_SYNC_TPT, + MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); +} +EXPORT_SYMBOL_GPL(mlx4_SYNC_TPT); diff --git a/drivers/net/ethernet/mellanox/mlx4/pd.c b/drivers/net/ethernet/mellanox/mlx4/pd.c new file mode 100644 index 000000000..6fc156a39 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx4/pd.c @@ -0,0 +1,295 @@ +/* + * Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2005 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/errno.h> +#include <linux/export.h> +#include <linux/io-mapping.h> + +#include <asm/page.h> + +#include "mlx4.h" +#include "icm.h" + +enum { + MLX4_NUM_RESERVED_UARS = 8 +}; + +int mlx4_pd_alloc(struct mlx4_dev *dev, u32 *pdn) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + + *pdn = mlx4_bitmap_alloc(&priv->pd_bitmap); + if (*pdn == -1) + return -ENOMEM; + + return 0; +} +EXPORT_SYMBOL_GPL(mlx4_pd_alloc); + +void mlx4_pd_free(struct mlx4_dev *dev, u32 pdn) +{ + mlx4_bitmap_free(&mlx4_priv(dev)->pd_bitmap, pdn, MLX4_USE_RR); +} +EXPORT_SYMBOL_GPL(mlx4_pd_free); + +int __mlx4_xrcd_alloc(struct mlx4_dev *dev, u32 *xrcdn) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + + *xrcdn = mlx4_bitmap_alloc(&priv->xrcd_bitmap); + if (*xrcdn == -1) + return -ENOMEM; + + return 0; +} + +int mlx4_xrcd_alloc(struct mlx4_dev *dev, u32 *xrcdn) +{ + u64 out_param; + int err; + + if (mlx4_is_mfunc(dev)) { + err = mlx4_cmd_imm(dev, 0, &out_param, + RES_XRCD, RES_OP_RESERVE, + MLX4_CMD_ALLOC_RES, + MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); + if (err) + return err; + + *xrcdn = get_param_l(&out_param); + return 0; + } + return __mlx4_xrcd_alloc(dev, xrcdn); +} +EXPORT_SYMBOL_GPL(mlx4_xrcd_alloc); + +void __mlx4_xrcd_free(struct mlx4_dev *dev, u32 xrcdn) +{ + mlx4_bitmap_free(&mlx4_priv(dev)->xrcd_bitmap, xrcdn, MLX4_USE_RR); +} + +void mlx4_xrcd_free(struct mlx4_dev *dev, u32 xrcdn) +{ + u64 in_param = 0; + int err; + + if (mlx4_is_mfunc(dev)) { + set_param_l(&in_param, xrcdn); + err = mlx4_cmd(dev, in_param, RES_XRCD, + RES_OP_RESERVE, MLX4_CMD_FREE_RES, + MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); + if (err) + mlx4_warn(dev, "Failed to release xrcdn %d\n", xrcdn); + } else + __mlx4_xrcd_free(dev, xrcdn); +} +EXPORT_SYMBOL_GPL(mlx4_xrcd_free); + +int mlx4_init_pd_table(struct mlx4_dev *dev) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + + return mlx4_bitmap_init(&priv->pd_bitmap, dev->caps.num_pds, + (1 << NOT_MASKED_PD_BITS) - 1, + dev->caps.reserved_pds, 0); +} + +void mlx4_cleanup_pd_table(struct mlx4_dev *dev) +{ + mlx4_bitmap_cleanup(&mlx4_priv(dev)->pd_bitmap); +} + +int mlx4_init_xrcd_table(struct mlx4_dev *dev) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + + return mlx4_bitmap_init(&priv->xrcd_bitmap, (1 << 16), + (1 << 16) - 1, dev->caps.reserved_xrcds + 1, 0); +} + +void mlx4_cleanup_xrcd_table(struct mlx4_dev *dev) +{ + mlx4_bitmap_cleanup(&mlx4_priv(dev)->xrcd_bitmap); +} + +int mlx4_uar_alloc(struct mlx4_dev *dev, struct mlx4_uar *uar) +{ + int offset; + + uar->index = mlx4_bitmap_alloc(&mlx4_priv(dev)->uar_table.bitmap); + if (uar->index == -1) + return -ENOMEM; + + if (mlx4_is_slave(dev)) + offset = uar->index % ((int)pci_resource_len(dev->persist->pdev, + 2) / + dev->caps.uar_page_size); + else + offset = uar->index; + uar->pfn = (pci_resource_start(dev->persist->pdev, 2) >> PAGE_SHIFT) + + offset; + uar->map = NULL; + return 0; +} +EXPORT_SYMBOL_GPL(mlx4_uar_alloc); + +void mlx4_uar_free(struct mlx4_dev *dev, struct mlx4_uar *uar) +{ + mlx4_bitmap_free(&mlx4_priv(dev)->uar_table.bitmap, uar->index, MLX4_USE_RR); +} +EXPORT_SYMBOL_GPL(mlx4_uar_free); + +int mlx4_bf_alloc(struct mlx4_dev *dev, struct mlx4_bf *bf, int node) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_uar *uar; + int err = 0; + int idx; + + if (!priv->bf_mapping) + return -ENOMEM; + + mutex_lock(&priv->bf_mutex); + if (!list_empty(&priv->bf_list)) + uar = list_entry(priv->bf_list.next, struct mlx4_uar, bf_list); + else { + if (mlx4_bitmap_avail(&priv->uar_table.bitmap) < MLX4_NUM_RESERVED_UARS) { + err = -ENOMEM; + goto out; + } + uar = kmalloc_node(sizeof(*uar), GFP_KERNEL, node); + if (!uar) { + uar = kmalloc(sizeof(*uar), GFP_KERNEL); + if (!uar) { + err = -ENOMEM; + goto out; + } + } + err = mlx4_uar_alloc(dev, uar); + if (err) + goto free_kmalloc; + + uar->map = ioremap(uar->pfn << PAGE_SHIFT, PAGE_SIZE); + if (!uar->map) { + err = -ENOMEM; + goto free_uar; + } + + uar->bf_map = io_mapping_map_wc(priv->bf_mapping, + uar->index << PAGE_SHIFT, + PAGE_SIZE); + if (!uar->bf_map) { + err = -ENOMEM; + goto unamp_uar; + } + uar->free_bf_bmap = 0; + list_add(&uar->bf_list, &priv->bf_list); + } + + idx = ffz(uar->free_bf_bmap); + uar->free_bf_bmap |= 1 << idx; + bf->uar = uar; + bf->offset = 0; + bf->buf_size = dev->caps.bf_reg_size / 2; + bf->reg = uar->bf_map + idx * dev->caps.bf_reg_size; + if (uar->free_bf_bmap == (1 << dev->caps.bf_regs_per_page) - 1) + list_del_init(&uar->bf_list); + + goto out; + +unamp_uar: + bf->uar = NULL; + iounmap(uar->map); + +free_uar: + mlx4_uar_free(dev, uar); + +free_kmalloc: + kfree(uar); + +out: + mutex_unlock(&priv->bf_mutex); + return err; +} +EXPORT_SYMBOL_GPL(mlx4_bf_alloc); + +void mlx4_bf_free(struct mlx4_dev *dev, struct mlx4_bf *bf) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + int idx; + + if (!bf->uar || !bf->uar->bf_map) + return; + + mutex_lock(&priv->bf_mutex); + idx = (bf->reg - bf->uar->bf_map) / dev->caps.bf_reg_size; + bf->uar->free_bf_bmap &= ~(1 << idx); + if (!bf->uar->free_bf_bmap) { + if (!list_empty(&bf->uar->bf_list)) + list_del(&bf->uar->bf_list); + + io_mapping_unmap(bf->uar->bf_map); + iounmap(bf->uar->map); + mlx4_uar_free(dev, bf->uar); + kfree(bf->uar); + } else if (list_empty(&bf->uar->bf_list)) + list_add(&bf->uar->bf_list, &priv->bf_list); + + mutex_unlock(&priv->bf_mutex); +} +EXPORT_SYMBOL_GPL(mlx4_bf_free); + +int mlx4_init_uar_table(struct mlx4_dev *dev) +{ + int num_reserved_uar = mlx4_get_num_reserved_uar(dev); + + mlx4_dbg(dev, "uar_page_shift = %d", dev->uar_page_shift); + mlx4_dbg(dev, "Effective reserved_uars=%d", dev->caps.reserved_uars); + + if (dev->caps.num_uars <= num_reserved_uar) { + mlx4_err( + dev, "Only %d UAR pages (need more than %d)\n", + dev->caps.num_uars, num_reserved_uar); + mlx4_err(dev, "Increase firmware log2_uar_bar_megabytes?\n"); + return -ENODEV; + } + + return mlx4_bitmap_init(&mlx4_priv(dev)->uar_table.bitmap, + dev->caps.num_uars, dev->caps.num_uars - 1, + dev->caps.reserved_uars, 0); +} + +void mlx4_cleanup_uar_table(struct mlx4_dev *dev) +{ + mlx4_bitmap_cleanup(&mlx4_priv(dev)->uar_table.bitmap); +} diff --git a/drivers/net/ethernet/mellanox/mlx4/port.c b/drivers/net/ethernet/mellanox/mlx4/port.c new file mode 100644 index 000000000..256a06b3c --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx4/port.c @@ -0,0 +1,2229 @@ +/* + * Copyright (c) 2007 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/errno.h> +#include <linux/if_ether.h> +#include <linux/if_vlan.h> +#include <linux/export.h> + +#include <linux/mlx4/cmd.h> + +#include "mlx4.h" +#include "mlx4_stats.h" + +#define MLX4_MAC_VALID (1ull << 63) + +#define MLX4_VLAN_VALID (1u << 31) +#define MLX4_VLAN_MASK 0xfff + +#define MLX4_STATS_TRAFFIC_COUNTERS_MASK 0xfULL +#define MLX4_STATS_TRAFFIC_DROPS_MASK 0xc0ULL +#define MLX4_STATS_ERROR_COUNTERS_MASK 0x1ffc30ULL +#define MLX4_STATS_PORT_COUNTERS_MASK 0x1fe00000ULL + +#define MLX4_FLAG2_V_IGNORE_FCS_MASK BIT(1) +#define MLX4_FLAG2_V_USER_MTU_MASK BIT(5) +#define MLX4_FLAG2_V_USER_MAC_MASK BIT(6) +#define MLX4_FLAG_V_MTU_MASK BIT(0) +#define MLX4_FLAG_V_PPRX_MASK BIT(1) +#define MLX4_FLAG_V_PPTX_MASK BIT(2) +#define MLX4_IGNORE_FCS_MASK 0x1 +#define MLX4_TC_MAX_NUMBER 8 + +void mlx4_init_mac_table(struct mlx4_dev *dev, struct mlx4_mac_table *table) +{ + int i; + + mutex_init(&table->mutex); + for (i = 0; i < MLX4_MAX_MAC_NUM; i++) { + table->entries[i] = 0; + table->refs[i] = 0; + table->is_dup[i] = false; + } + table->max = 1 << dev->caps.log_num_macs; + table->total = 0; +} + +void mlx4_init_vlan_table(struct mlx4_dev *dev, struct mlx4_vlan_table *table) +{ + int i; + + mutex_init(&table->mutex); + for (i = 0; i < MLX4_MAX_VLAN_NUM; i++) { + table->entries[i] = 0; + table->refs[i] = 0; + table->is_dup[i] = false; + } + table->max = (1 << dev->caps.log_num_vlans) - MLX4_VLAN_REGULAR; + table->total = 0; +} + +void mlx4_init_roce_gid_table(struct mlx4_dev *dev, + struct mlx4_roce_gid_table *table) +{ + int i; + + mutex_init(&table->mutex); + for (i = 0; i < MLX4_ROCE_MAX_GIDS; i++) + memset(table->roce_gids[i].raw, 0, MLX4_ROCE_GID_ENTRY_SIZE); +} + +static int validate_index(struct mlx4_dev *dev, + struct mlx4_mac_table *table, int index) +{ + int err = 0; + + if (index < 0 || index >= table->max || !table->entries[index]) { + mlx4_warn(dev, "No valid Mac entry for the given index\n"); + err = -EINVAL; + } + return err; +} + +static int find_index(struct mlx4_dev *dev, + struct mlx4_mac_table *table, u64 mac) +{ + int i; + + for (i = 0; i < MLX4_MAX_MAC_NUM; i++) { + if (table->refs[i] && + (MLX4_MAC_MASK & mac) == + (MLX4_MAC_MASK & be64_to_cpu(table->entries[i]))) + return i; + } + /* Mac not found */ + return -EINVAL; +} + +static int mlx4_set_port_mac_table(struct mlx4_dev *dev, u8 port, + __be64 *entries) +{ + struct mlx4_cmd_mailbox *mailbox; + u32 in_mod; + int err; + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + + memcpy(mailbox->buf, entries, MLX4_MAC_TABLE_SIZE); + + in_mod = MLX4_SET_PORT_MAC_TABLE << 8 | port; + + err = mlx4_cmd(dev, mailbox->dma, in_mod, MLX4_SET_PORT_ETH_OPCODE, + MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B, + MLX4_CMD_NATIVE); + + mlx4_free_cmd_mailbox(dev, mailbox); + return err; +} + +int mlx4_find_cached_mac(struct mlx4_dev *dev, u8 port, u64 mac, int *idx) +{ + struct mlx4_port_info *info = &mlx4_priv(dev)->port[port]; + struct mlx4_mac_table *table = &info->mac_table; + int i; + + for (i = 0; i < MLX4_MAX_MAC_NUM; i++) { + if (!table->refs[i]) + continue; + + if (mac == (MLX4_MAC_MASK & be64_to_cpu(table->entries[i]))) { + *idx = i; + return 0; + } + } + + return -ENOENT; +} +EXPORT_SYMBOL_GPL(mlx4_find_cached_mac); + +static bool mlx4_need_mf_bond(struct mlx4_dev *dev) +{ + int i, num_eth_ports = 0; + + if (!mlx4_is_mfunc(dev)) + return false; + mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_ETH) + ++num_eth_ports; + + return (num_eth_ports == 2) ? true : false; +} + +int __mlx4_register_mac(struct mlx4_dev *dev, u8 port, u64 mac) +{ + struct mlx4_port_info *info = &mlx4_priv(dev)->port[port]; + struct mlx4_mac_table *table = &info->mac_table; + int i, err = 0; + int free = -1; + int free_for_dup = -1; + bool dup = mlx4_is_mf_bonded(dev); + u8 dup_port = (port == 1) ? 2 : 1; + struct mlx4_mac_table *dup_table = &mlx4_priv(dev)->port[dup_port].mac_table; + bool need_mf_bond = mlx4_need_mf_bond(dev); + bool can_mf_bond = true; + + mlx4_dbg(dev, "Registering MAC: 0x%llx for port %d %s duplicate\n", + (unsigned long long)mac, port, + dup ? "with" : "without"); + + if (need_mf_bond) { + if (port == 1) { + mutex_lock(&table->mutex); + mutex_lock_nested(&dup_table->mutex, SINGLE_DEPTH_NESTING); + } else { + mutex_lock(&dup_table->mutex); + mutex_lock_nested(&table->mutex, SINGLE_DEPTH_NESTING); + } + } else { + mutex_lock(&table->mutex); + } + + if (need_mf_bond) { + int index_at_port = -1; + int index_at_dup_port = -1; + + for (i = 0; i < MLX4_MAX_MAC_NUM; i++) { + if (((MLX4_MAC_MASK & mac) == (MLX4_MAC_MASK & be64_to_cpu(table->entries[i])))) + index_at_port = i; + if (((MLX4_MAC_MASK & mac) == (MLX4_MAC_MASK & be64_to_cpu(dup_table->entries[i])))) + index_at_dup_port = i; + } + + /* check that same mac is not in the tables at different indices */ + if ((index_at_port != index_at_dup_port) && + (index_at_port >= 0) && + (index_at_dup_port >= 0)) + can_mf_bond = false; + + /* If the mac is already in the primary table, the slot must be + * available in the duplicate table as well. + */ + if (index_at_port >= 0 && index_at_dup_port < 0 && + dup_table->refs[index_at_port]) { + can_mf_bond = false; + } + /* If the mac is already in the duplicate table, check that the + * corresponding index is not occupied in the primary table, or + * the primary table already contains the mac at the same index. + * Otherwise, you cannot bond (primary contains a different mac + * at that index). + */ + if (index_at_dup_port >= 0) { + if (!table->refs[index_at_dup_port] || + ((MLX4_MAC_MASK & mac) == (MLX4_MAC_MASK & be64_to_cpu(table->entries[index_at_dup_port])))) + free_for_dup = index_at_dup_port; + else + can_mf_bond = false; + } + } + + for (i = 0; i < MLX4_MAX_MAC_NUM; i++) { + if (!table->refs[i]) { + if (free < 0) + free = i; + if (free_for_dup < 0 && need_mf_bond && can_mf_bond) { + if (!dup_table->refs[i]) + free_for_dup = i; + } + continue; + } + + if ((MLX4_MAC_MASK & mac) == + (MLX4_MAC_MASK & be64_to_cpu(table->entries[i]))) { + /* MAC already registered, increment ref count */ + err = i; + ++table->refs[i]; + if (dup) { + u64 dup_mac = MLX4_MAC_MASK & be64_to_cpu(dup_table->entries[i]); + + if (dup_mac != mac || !dup_table->is_dup[i]) { + mlx4_warn(dev, "register mac: expect duplicate mac 0x%llx on port %d index %d\n", + mac, dup_port, i); + } + } + goto out; + } + } + + if (need_mf_bond && (free_for_dup < 0)) { + if (dup) { + mlx4_warn(dev, "Fail to allocate duplicate MAC table entry\n"); + mlx4_warn(dev, "High Availability for virtual functions may not work as expected\n"); + dup = false; + } + can_mf_bond = false; + } + + if (need_mf_bond && can_mf_bond) + free = free_for_dup; + + mlx4_dbg(dev, "Free MAC index is %d\n", free); + + if (table->total == table->max) { + /* No free mac entries */ + err = -ENOSPC; + goto out; + } + + /* Register new MAC */ + table->entries[free] = cpu_to_be64(mac | MLX4_MAC_VALID); + + err = mlx4_set_port_mac_table(dev, port, table->entries); + if (unlikely(err)) { + mlx4_err(dev, "Failed adding MAC: 0x%llx\n", + (unsigned long long) mac); + table->entries[free] = 0; + goto out; + } + table->refs[free] = 1; + table->is_dup[free] = false; + ++table->total; + if (dup) { + dup_table->refs[free] = 0; + dup_table->is_dup[free] = true; + dup_table->entries[free] = cpu_to_be64(mac | MLX4_MAC_VALID); + + err = mlx4_set_port_mac_table(dev, dup_port, dup_table->entries); + if (unlikely(err)) { + mlx4_warn(dev, "Failed adding duplicate mac: 0x%llx\n", mac); + dup_table->is_dup[free] = false; + dup_table->entries[free] = 0; + goto out; + } + ++dup_table->total; + } + err = free; +out: + if (need_mf_bond) { + if (port == 2) { + mutex_unlock(&table->mutex); + mutex_unlock(&dup_table->mutex); + } else { + mutex_unlock(&dup_table->mutex); + mutex_unlock(&table->mutex); + } + } else { + mutex_unlock(&table->mutex); + } + return err; +} +EXPORT_SYMBOL_GPL(__mlx4_register_mac); + +int mlx4_register_mac(struct mlx4_dev *dev, u8 port, u64 mac) +{ + u64 out_param = 0; + int err = -EINVAL; + + if (mlx4_is_mfunc(dev)) { + if (!(dev->flags & MLX4_FLAG_OLD_REG_MAC)) { + err = mlx4_cmd_imm(dev, mac, &out_param, + ((u32) port) << 8 | (u32) RES_MAC, + RES_OP_RESERVE_AND_MAP, MLX4_CMD_ALLOC_RES, + MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); + } + if (err && err == -EINVAL && mlx4_is_slave(dev)) { + /* retry using old REG_MAC format */ + set_param_l(&out_param, port); + err = mlx4_cmd_imm(dev, mac, &out_param, RES_MAC, + RES_OP_RESERVE_AND_MAP, MLX4_CMD_ALLOC_RES, + MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); + if (!err) + dev->flags |= MLX4_FLAG_OLD_REG_MAC; + } + if (err) + return err; + + return get_param_l(&out_param); + } + return __mlx4_register_mac(dev, port, mac); +} +EXPORT_SYMBOL_GPL(mlx4_register_mac); + +int mlx4_get_base_qpn(struct mlx4_dev *dev, u8 port) +{ + return dev->caps.reserved_qps_base[MLX4_QP_REGION_ETH_ADDR] + + (port - 1) * (1 << dev->caps.log_num_macs); +} +EXPORT_SYMBOL_GPL(mlx4_get_base_qpn); + +void __mlx4_unregister_mac(struct mlx4_dev *dev, u8 port, u64 mac) +{ + struct mlx4_port_info *info; + struct mlx4_mac_table *table; + int index; + bool dup = mlx4_is_mf_bonded(dev); + u8 dup_port = (port == 1) ? 2 : 1; + struct mlx4_mac_table *dup_table = &mlx4_priv(dev)->port[dup_port].mac_table; + + if (port < 1 || port > dev->caps.num_ports) { + mlx4_warn(dev, "invalid port number (%d), aborting...\n", port); + return; + } + info = &mlx4_priv(dev)->port[port]; + table = &info->mac_table; + + if (dup) { + if (port == 1) { + mutex_lock(&table->mutex); + mutex_lock_nested(&dup_table->mutex, SINGLE_DEPTH_NESTING); + } else { + mutex_lock(&dup_table->mutex); + mutex_lock_nested(&table->mutex, SINGLE_DEPTH_NESTING); + } + } else { + mutex_lock(&table->mutex); + } + + index = find_index(dev, table, mac); + + if (validate_index(dev, table, index)) + goto out; + + if (--table->refs[index] || table->is_dup[index]) { + mlx4_dbg(dev, "Have more references for index %d, no need to modify mac table\n", + index); + if (!table->refs[index]) + dup_table->is_dup[index] = false; + goto out; + } + + table->entries[index] = 0; + if (mlx4_set_port_mac_table(dev, port, table->entries)) + mlx4_warn(dev, "Fail to set mac in port %d during unregister\n", port); + --table->total; + + if (dup) { + dup_table->is_dup[index] = false; + if (dup_table->refs[index]) + goto out; + dup_table->entries[index] = 0; + if (mlx4_set_port_mac_table(dev, dup_port, dup_table->entries)) + mlx4_warn(dev, "Fail to set mac in duplicate port %d during unregister\n", dup_port); + + --table->total; + } +out: + if (dup) { + if (port == 2) { + mutex_unlock(&table->mutex); + mutex_unlock(&dup_table->mutex); + } else { + mutex_unlock(&dup_table->mutex); + mutex_unlock(&table->mutex); + } + } else { + mutex_unlock(&table->mutex); + } +} +EXPORT_SYMBOL_GPL(__mlx4_unregister_mac); + +void mlx4_unregister_mac(struct mlx4_dev *dev, u8 port, u64 mac) +{ + u64 out_param = 0; + + if (mlx4_is_mfunc(dev)) { + if (!(dev->flags & MLX4_FLAG_OLD_REG_MAC)) { + (void) mlx4_cmd_imm(dev, mac, &out_param, + ((u32) port) << 8 | (u32) RES_MAC, + RES_OP_RESERVE_AND_MAP, MLX4_CMD_FREE_RES, + MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); + } else { + /* use old unregister mac format */ + set_param_l(&out_param, port); + (void) mlx4_cmd_imm(dev, mac, &out_param, RES_MAC, + RES_OP_RESERVE_AND_MAP, MLX4_CMD_FREE_RES, + MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); + } + return; + } + __mlx4_unregister_mac(dev, port, mac); + return; +} +EXPORT_SYMBOL_GPL(mlx4_unregister_mac); + +int __mlx4_replace_mac(struct mlx4_dev *dev, u8 port, int qpn, u64 new_mac) +{ + struct mlx4_port_info *info = &mlx4_priv(dev)->port[port]; + struct mlx4_mac_table *table = &info->mac_table; + int index = qpn - info->base_qpn; + int err = 0; + bool dup = mlx4_is_mf_bonded(dev); + u8 dup_port = (port == 1) ? 2 : 1; + struct mlx4_mac_table *dup_table = &mlx4_priv(dev)->port[dup_port].mac_table; + + /* CX1 doesn't support multi-functions */ + if (dup) { + if (port == 1) { + mutex_lock(&table->mutex); + mutex_lock_nested(&dup_table->mutex, SINGLE_DEPTH_NESTING); + } else { + mutex_lock(&dup_table->mutex); + mutex_lock_nested(&table->mutex, SINGLE_DEPTH_NESTING); + } + } else { + mutex_lock(&table->mutex); + } + + err = validate_index(dev, table, index); + if (err) + goto out; + + table->entries[index] = cpu_to_be64(new_mac | MLX4_MAC_VALID); + + err = mlx4_set_port_mac_table(dev, port, table->entries); + if (unlikely(err)) { + mlx4_err(dev, "Failed adding MAC: 0x%llx\n", + (unsigned long long) new_mac); + table->entries[index] = 0; + } else { + if (dup) { + dup_table->entries[index] = cpu_to_be64(new_mac | MLX4_MAC_VALID); + + err = mlx4_set_port_mac_table(dev, dup_port, dup_table->entries); + if (unlikely(err)) { + mlx4_err(dev, "Failed adding duplicate MAC: 0x%llx\n", + (unsigned long long)new_mac); + dup_table->entries[index] = 0; + } + } + } +out: + if (dup) { + if (port == 2) { + mutex_unlock(&table->mutex); + mutex_unlock(&dup_table->mutex); + } else { + mutex_unlock(&dup_table->mutex); + mutex_unlock(&table->mutex); + } + } else { + mutex_unlock(&table->mutex); + } + return err; +} +EXPORT_SYMBOL_GPL(__mlx4_replace_mac); + +static int mlx4_set_port_vlan_table(struct mlx4_dev *dev, u8 port, + __be32 *entries) +{ + struct mlx4_cmd_mailbox *mailbox; + u32 in_mod; + int err; + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + + memcpy(mailbox->buf, entries, MLX4_VLAN_TABLE_SIZE); + in_mod = MLX4_SET_PORT_VLAN_TABLE << 8 | port; + err = mlx4_cmd(dev, mailbox->dma, in_mod, MLX4_SET_PORT_ETH_OPCODE, + MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B, + MLX4_CMD_NATIVE); + + mlx4_free_cmd_mailbox(dev, mailbox); + + return err; +} + +int mlx4_find_cached_vlan(struct mlx4_dev *dev, u8 port, u16 vid, int *idx) +{ + struct mlx4_vlan_table *table = &mlx4_priv(dev)->port[port].vlan_table; + int i; + + for (i = 0; i < MLX4_MAX_VLAN_NUM; ++i) { + if (table->refs[i] && + (vid == (MLX4_VLAN_MASK & + be32_to_cpu(table->entries[i])))) { + /* VLAN already registered, increase reference count */ + *idx = i; + return 0; + } + } + + return -ENOENT; +} +EXPORT_SYMBOL_GPL(mlx4_find_cached_vlan); + +int __mlx4_register_vlan(struct mlx4_dev *dev, u8 port, u16 vlan, + int *index) +{ + struct mlx4_vlan_table *table = &mlx4_priv(dev)->port[port].vlan_table; + int i, err = 0; + int free = -1; + int free_for_dup = -1; + bool dup = mlx4_is_mf_bonded(dev); + u8 dup_port = (port == 1) ? 2 : 1; + struct mlx4_vlan_table *dup_table = &mlx4_priv(dev)->port[dup_port].vlan_table; + bool need_mf_bond = mlx4_need_mf_bond(dev); + bool can_mf_bond = true; + + mlx4_dbg(dev, "Registering VLAN: %d for port %d %s duplicate\n", + vlan, port, + dup ? "with" : "without"); + + if (need_mf_bond) { + if (port == 1) { + mutex_lock(&table->mutex); + mutex_lock_nested(&dup_table->mutex, SINGLE_DEPTH_NESTING); + } else { + mutex_lock(&dup_table->mutex); + mutex_lock_nested(&table->mutex, SINGLE_DEPTH_NESTING); + } + } else { + mutex_lock(&table->mutex); + } + + if (table->total == table->max) { + /* No free vlan entries */ + err = -ENOSPC; + goto out; + } + + if (need_mf_bond) { + int index_at_port = -1; + int index_at_dup_port = -1; + + for (i = MLX4_VLAN_REGULAR; i < MLX4_MAX_VLAN_NUM; i++) { + if (vlan == (MLX4_VLAN_MASK & be32_to_cpu(table->entries[i]))) + index_at_port = i; + if (vlan == (MLX4_VLAN_MASK & be32_to_cpu(dup_table->entries[i]))) + index_at_dup_port = i; + } + /* check that same vlan is not in the tables at different indices */ + if ((index_at_port != index_at_dup_port) && + (index_at_port >= 0) && + (index_at_dup_port >= 0)) + can_mf_bond = false; + + /* If the vlan is already in the primary table, the slot must be + * available in the duplicate table as well. + */ + if (index_at_port >= 0 && index_at_dup_port < 0 && + dup_table->refs[index_at_port]) { + can_mf_bond = false; + } + /* If the vlan is already in the duplicate table, check that the + * corresponding index is not occupied in the primary table, or + * the primary table already contains the vlan at the same index. + * Otherwise, you cannot bond (primary contains a different vlan + * at that index). + */ + if (index_at_dup_port >= 0) { + if (!table->refs[index_at_dup_port] || + (vlan == (MLX4_VLAN_MASK & be32_to_cpu(dup_table->entries[index_at_dup_port])))) + free_for_dup = index_at_dup_port; + else + can_mf_bond = false; + } + } + + for (i = MLX4_VLAN_REGULAR; i < MLX4_MAX_VLAN_NUM; i++) { + if (!table->refs[i]) { + if (free < 0) + free = i; + if (free_for_dup < 0 && need_mf_bond && can_mf_bond) { + if (!dup_table->refs[i]) + free_for_dup = i; + } + } + + if ((table->refs[i] || table->is_dup[i]) && + (vlan == (MLX4_VLAN_MASK & + be32_to_cpu(table->entries[i])))) { + /* Vlan already registered, increase references count */ + mlx4_dbg(dev, "vlan %u is already registered.\n", vlan); + *index = i; + ++table->refs[i]; + if (dup) { + u16 dup_vlan = MLX4_VLAN_MASK & be32_to_cpu(dup_table->entries[i]); + + if (dup_vlan != vlan || !dup_table->is_dup[i]) { + mlx4_warn(dev, "register vlan: expected duplicate vlan %u on port %d index %d\n", + vlan, dup_port, i); + } + } + goto out; + } + } + + if (need_mf_bond && (free_for_dup < 0)) { + if (dup) { + mlx4_warn(dev, "Fail to allocate duplicate VLAN table entry\n"); + mlx4_warn(dev, "High Availability for virtual functions may not work as expected\n"); + dup = false; + } + can_mf_bond = false; + } + + if (need_mf_bond && can_mf_bond) + free = free_for_dup; + + if (free < 0) { + err = -ENOMEM; + goto out; + } + + /* Register new VLAN */ + table->refs[free] = 1; + table->is_dup[free] = false; + table->entries[free] = cpu_to_be32(vlan | MLX4_VLAN_VALID); + + err = mlx4_set_port_vlan_table(dev, port, table->entries); + if (unlikely(err)) { + mlx4_warn(dev, "Failed adding vlan: %u\n", vlan); + table->refs[free] = 0; + table->entries[free] = 0; + goto out; + } + ++table->total; + if (dup) { + dup_table->refs[free] = 0; + dup_table->is_dup[free] = true; + dup_table->entries[free] = cpu_to_be32(vlan | MLX4_VLAN_VALID); + + err = mlx4_set_port_vlan_table(dev, dup_port, dup_table->entries); + if (unlikely(err)) { + mlx4_warn(dev, "Failed adding duplicate vlan: %u\n", vlan); + dup_table->is_dup[free] = false; + dup_table->entries[free] = 0; + goto out; + } + ++dup_table->total; + } + + *index = free; +out: + if (need_mf_bond) { + if (port == 2) { + mutex_unlock(&table->mutex); + mutex_unlock(&dup_table->mutex); + } else { + mutex_unlock(&dup_table->mutex); + mutex_unlock(&table->mutex); + } + } else { + mutex_unlock(&table->mutex); + } + return err; +} + +int mlx4_register_vlan(struct mlx4_dev *dev, u8 port, u16 vlan, int *index) +{ + u64 out_param = 0; + int err; + + if (vlan > 4095) + return -EINVAL; + + if (mlx4_is_mfunc(dev)) { + err = mlx4_cmd_imm(dev, vlan, &out_param, + ((u32) port) << 8 | (u32) RES_VLAN, + RES_OP_RESERVE_AND_MAP, MLX4_CMD_ALLOC_RES, + MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); + if (!err) + *index = get_param_l(&out_param); + + return err; + } + return __mlx4_register_vlan(dev, port, vlan, index); +} +EXPORT_SYMBOL_GPL(mlx4_register_vlan); + +void __mlx4_unregister_vlan(struct mlx4_dev *dev, u8 port, u16 vlan) +{ + struct mlx4_vlan_table *table = &mlx4_priv(dev)->port[port].vlan_table; + int index; + bool dup = mlx4_is_mf_bonded(dev); + u8 dup_port = (port == 1) ? 2 : 1; + struct mlx4_vlan_table *dup_table = &mlx4_priv(dev)->port[dup_port].vlan_table; + + if (dup) { + if (port == 1) { + mutex_lock(&table->mutex); + mutex_lock_nested(&dup_table->mutex, SINGLE_DEPTH_NESTING); + } else { + mutex_lock(&dup_table->mutex); + mutex_lock_nested(&table->mutex, SINGLE_DEPTH_NESTING); + } + } else { + mutex_lock(&table->mutex); + } + + if (mlx4_find_cached_vlan(dev, port, vlan, &index)) { + mlx4_warn(dev, "vlan 0x%x is not in the vlan table\n", vlan); + goto out; + } + + if (index < MLX4_VLAN_REGULAR) { + mlx4_warn(dev, "Trying to free special vlan index %d\n", index); + goto out; + } + + if (--table->refs[index] || table->is_dup[index]) { + mlx4_dbg(dev, "Have %d more references for index %d, no need to modify vlan table\n", + table->refs[index], index); + if (!table->refs[index]) + dup_table->is_dup[index] = false; + goto out; + } + table->entries[index] = 0; + if (mlx4_set_port_vlan_table(dev, port, table->entries)) + mlx4_warn(dev, "Fail to set vlan in port %d during unregister\n", port); + --table->total; + if (dup) { + dup_table->is_dup[index] = false; + if (dup_table->refs[index]) + goto out; + dup_table->entries[index] = 0; + if (mlx4_set_port_vlan_table(dev, dup_port, dup_table->entries)) + mlx4_warn(dev, "Fail to set vlan in duplicate port %d during unregister\n", dup_port); + --dup_table->total; + } +out: + if (dup) { + if (port == 2) { + mutex_unlock(&table->mutex); + mutex_unlock(&dup_table->mutex); + } else { + mutex_unlock(&dup_table->mutex); + mutex_unlock(&table->mutex); + } + } else { + mutex_unlock(&table->mutex); + } +} + +void mlx4_unregister_vlan(struct mlx4_dev *dev, u8 port, u16 vlan) +{ + u64 out_param = 0; + + if (mlx4_is_mfunc(dev)) { + (void) mlx4_cmd_imm(dev, vlan, &out_param, + ((u32) port) << 8 | (u32) RES_VLAN, + RES_OP_RESERVE_AND_MAP, + MLX4_CMD_FREE_RES, MLX4_CMD_TIME_CLASS_A, + MLX4_CMD_WRAPPED); + return; + } + __mlx4_unregister_vlan(dev, port, vlan); +} +EXPORT_SYMBOL_GPL(mlx4_unregister_vlan); + +int mlx4_bond_mac_table(struct mlx4_dev *dev) +{ + struct mlx4_mac_table *t1 = &mlx4_priv(dev)->port[1].mac_table; + struct mlx4_mac_table *t2 = &mlx4_priv(dev)->port[2].mac_table; + int ret = 0; + int i; + bool update1 = false; + bool update2 = false; + + mutex_lock(&t1->mutex); + mutex_lock(&t2->mutex); + for (i = 0; i < MLX4_MAX_MAC_NUM; i++) { + if ((t1->entries[i] != t2->entries[i]) && + t1->entries[i] && t2->entries[i]) { + mlx4_warn(dev, "can't duplicate entry %d in mac table\n", i); + ret = -EINVAL; + goto unlock; + } + } + + for (i = 0; i < MLX4_MAX_MAC_NUM; i++) { + if (t1->entries[i] && !t2->entries[i]) { + t2->entries[i] = t1->entries[i]; + t2->is_dup[i] = true; + update2 = true; + } else if (!t1->entries[i] && t2->entries[i]) { + t1->entries[i] = t2->entries[i]; + t1->is_dup[i] = true; + update1 = true; + } else if (t1->entries[i] && t2->entries[i]) { + t1->is_dup[i] = true; + t2->is_dup[i] = true; + } + } + + if (update1) { + ret = mlx4_set_port_mac_table(dev, 1, t1->entries); + if (ret) + mlx4_warn(dev, "failed to set MAC table for port 1 (%d)\n", ret); + } + if (!ret && update2) { + ret = mlx4_set_port_mac_table(dev, 2, t2->entries); + if (ret) + mlx4_warn(dev, "failed to set MAC table for port 2 (%d)\n", ret); + } + + if (ret) + mlx4_warn(dev, "failed to create mirror MAC tables\n"); +unlock: + mutex_unlock(&t2->mutex); + mutex_unlock(&t1->mutex); + return ret; +} + +int mlx4_unbond_mac_table(struct mlx4_dev *dev) +{ + struct mlx4_mac_table *t1 = &mlx4_priv(dev)->port[1].mac_table; + struct mlx4_mac_table *t2 = &mlx4_priv(dev)->port[2].mac_table; + int ret = 0; + int ret1; + int i; + bool update1 = false; + bool update2 = false; + + mutex_lock(&t1->mutex); + mutex_lock(&t2->mutex); + for (i = 0; i < MLX4_MAX_MAC_NUM; i++) { + if (t1->entries[i] != t2->entries[i]) { + mlx4_warn(dev, "mac table is in an unexpected state when trying to unbond\n"); + ret = -EINVAL; + goto unlock; + } + } + + for (i = 0; i < MLX4_MAX_MAC_NUM; i++) { + if (!t1->entries[i]) + continue; + t1->is_dup[i] = false; + if (!t1->refs[i]) { + t1->entries[i] = 0; + update1 = true; + } + t2->is_dup[i] = false; + if (!t2->refs[i]) { + t2->entries[i] = 0; + update2 = true; + } + } + + if (update1) { + ret = mlx4_set_port_mac_table(dev, 1, t1->entries); + if (ret) + mlx4_warn(dev, "failed to unmirror MAC tables for port 1(%d)\n", ret); + } + if (update2) { + ret1 = mlx4_set_port_mac_table(dev, 2, t2->entries); + if (ret1) { + mlx4_warn(dev, "failed to unmirror MAC tables for port 2(%d)\n", ret1); + ret = ret1; + } + } +unlock: + mutex_unlock(&t2->mutex); + mutex_unlock(&t1->mutex); + return ret; +} + +int mlx4_bond_vlan_table(struct mlx4_dev *dev) +{ + struct mlx4_vlan_table *t1 = &mlx4_priv(dev)->port[1].vlan_table; + struct mlx4_vlan_table *t2 = &mlx4_priv(dev)->port[2].vlan_table; + int ret = 0; + int i; + bool update1 = false; + bool update2 = false; + + mutex_lock(&t1->mutex); + mutex_lock(&t2->mutex); + for (i = 0; i < MLX4_MAX_VLAN_NUM; i++) { + if ((t1->entries[i] != t2->entries[i]) && + t1->entries[i] && t2->entries[i]) { + mlx4_warn(dev, "can't duplicate entry %d in vlan table\n", i); + ret = -EINVAL; + goto unlock; + } + } + + for (i = 0; i < MLX4_MAX_VLAN_NUM; i++) { + if (t1->entries[i] && !t2->entries[i]) { + t2->entries[i] = t1->entries[i]; + t2->is_dup[i] = true; + update2 = true; + } else if (!t1->entries[i] && t2->entries[i]) { + t1->entries[i] = t2->entries[i]; + t1->is_dup[i] = true; + update1 = true; + } else if (t1->entries[i] && t2->entries[i]) { + t1->is_dup[i] = true; + t2->is_dup[i] = true; + } + } + + if (update1) { + ret = mlx4_set_port_vlan_table(dev, 1, t1->entries); + if (ret) + mlx4_warn(dev, "failed to set VLAN table for port 1 (%d)\n", ret); + } + if (!ret && update2) { + ret = mlx4_set_port_vlan_table(dev, 2, t2->entries); + if (ret) + mlx4_warn(dev, "failed to set VLAN table for port 2 (%d)\n", ret); + } + + if (ret) + mlx4_warn(dev, "failed to create mirror VLAN tables\n"); +unlock: + mutex_unlock(&t2->mutex); + mutex_unlock(&t1->mutex); + return ret; +} + +int mlx4_unbond_vlan_table(struct mlx4_dev *dev) +{ + struct mlx4_vlan_table *t1 = &mlx4_priv(dev)->port[1].vlan_table; + struct mlx4_vlan_table *t2 = &mlx4_priv(dev)->port[2].vlan_table; + int ret = 0; + int ret1; + int i; + bool update1 = false; + bool update2 = false; + + mutex_lock(&t1->mutex); + mutex_lock(&t2->mutex); + for (i = 0; i < MLX4_MAX_VLAN_NUM; i++) { + if (t1->entries[i] != t2->entries[i]) { + mlx4_warn(dev, "vlan table is in an unexpected state when trying to unbond\n"); + ret = -EINVAL; + goto unlock; + } + } + + for (i = 0; i < MLX4_MAX_VLAN_NUM; i++) { + if (!t1->entries[i]) + continue; + t1->is_dup[i] = false; + if (!t1->refs[i]) { + t1->entries[i] = 0; + update1 = true; + } + t2->is_dup[i] = false; + if (!t2->refs[i]) { + t2->entries[i] = 0; + update2 = true; + } + } + + if (update1) { + ret = mlx4_set_port_vlan_table(dev, 1, t1->entries); + if (ret) + mlx4_warn(dev, "failed to unmirror VLAN tables for port 1(%d)\n", ret); + } + if (update2) { + ret1 = mlx4_set_port_vlan_table(dev, 2, t2->entries); + if (ret1) { + mlx4_warn(dev, "failed to unmirror VLAN tables for port 2(%d)\n", ret1); + ret = ret1; + } + } +unlock: + mutex_unlock(&t2->mutex); + mutex_unlock(&t1->mutex); + return ret; +} + +int mlx4_get_port_ib_caps(struct mlx4_dev *dev, u8 port, __be32 *caps) +{ + struct mlx4_cmd_mailbox *inmailbox, *outmailbox; + u8 *inbuf, *outbuf; + int err; + + inmailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(inmailbox)) + return PTR_ERR(inmailbox); + + outmailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(outmailbox)) { + mlx4_free_cmd_mailbox(dev, inmailbox); + return PTR_ERR(outmailbox); + } + + inbuf = inmailbox->buf; + outbuf = outmailbox->buf; + inbuf[0] = 1; + inbuf[1] = 1; + inbuf[2] = 1; + inbuf[3] = 1; + *(__be16 *) (&inbuf[16]) = cpu_to_be16(0x0015); + *(__be32 *) (&inbuf[20]) = cpu_to_be32(port); + + err = mlx4_cmd_box(dev, inmailbox->dma, outmailbox->dma, port, 3, + MLX4_CMD_MAD_IFC, MLX4_CMD_TIME_CLASS_C, + MLX4_CMD_NATIVE); + if (!err) + *caps = *(__be32 *) (outbuf + 84); + mlx4_free_cmd_mailbox(dev, inmailbox); + mlx4_free_cmd_mailbox(dev, outmailbox); + return err; +} +static struct mlx4_roce_gid_entry zgid_entry; + +int mlx4_get_slave_num_gids(struct mlx4_dev *dev, int slave, int port) +{ + int vfs; + int slave_gid = slave; + unsigned i; + struct mlx4_slaves_pport slaves_pport; + struct mlx4_active_ports actv_ports; + unsigned max_port_p_one; + + if (slave == 0) + return MLX4_ROCE_PF_GIDS; + + /* Slave is a VF */ + slaves_pport = mlx4_phys_to_slaves_pport(dev, port); + actv_ports = mlx4_get_active_ports(dev, slave); + max_port_p_one = find_first_bit(actv_ports.ports, dev->caps.num_ports) + + bitmap_weight(actv_ports.ports, dev->caps.num_ports) + 1; + + for (i = 1; i < max_port_p_one; i++) { + struct mlx4_active_ports exclusive_ports; + struct mlx4_slaves_pport slaves_pport_actv; + bitmap_zero(exclusive_ports.ports, dev->caps.num_ports); + set_bit(i - 1, exclusive_ports.ports); + if (i == port) + continue; + slaves_pport_actv = mlx4_phys_to_slaves_pport_actv( + dev, &exclusive_ports); + slave_gid -= bitmap_weight(slaves_pport_actv.slaves, + dev->persist->num_vfs + 1); + } + vfs = bitmap_weight(slaves_pport.slaves, dev->persist->num_vfs + 1) - 1; + if (slave_gid <= ((MLX4_ROCE_MAX_GIDS - MLX4_ROCE_PF_GIDS) % vfs)) + return ((MLX4_ROCE_MAX_GIDS - MLX4_ROCE_PF_GIDS) / vfs) + 1; + return (MLX4_ROCE_MAX_GIDS - MLX4_ROCE_PF_GIDS) / vfs; +} + +int mlx4_get_base_gid_ix(struct mlx4_dev *dev, int slave, int port) +{ + int gids; + unsigned i; + int slave_gid = slave; + int vfs; + + struct mlx4_slaves_pport slaves_pport; + struct mlx4_active_ports actv_ports; + unsigned max_port_p_one; + + if (slave == 0) + return 0; + + slaves_pport = mlx4_phys_to_slaves_pport(dev, port); + actv_ports = mlx4_get_active_ports(dev, slave); + max_port_p_one = find_first_bit(actv_ports.ports, dev->caps.num_ports) + + bitmap_weight(actv_ports.ports, dev->caps.num_ports) + 1; + + for (i = 1; i < max_port_p_one; i++) { + struct mlx4_active_ports exclusive_ports; + struct mlx4_slaves_pport slaves_pport_actv; + bitmap_zero(exclusive_ports.ports, dev->caps.num_ports); + set_bit(i - 1, exclusive_ports.ports); + if (i == port) + continue; + slaves_pport_actv = mlx4_phys_to_slaves_pport_actv( + dev, &exclusive_ports); + slave_gid -= bitmap_weight(slaves_pport_actv.slaves, + dev->persist->num_vfs + 1); + } + gids = MLX4_ROCE_MAX_GIDS - MLX4_ROCE_PF_GIDS; + vfs = bitmap_weight(slaves_pport.slaves, dev->persist->num_vfs + 1) - 1; + if (slave_gid <= gids % vfs) + return MLX4_ROCE_PF_GIDS + ((gids / vfs) + 1) * (slave_gid - 1); + + return MLX4_ROCE_PF_GIDS + (gids % vfs) + + ((gids / vfs) * (slave_gid - 1)); +} +EXPORT_SYMBOL_GPL(mlx4_get_base_gid_ix); + +static int mlx4_reset_roce_port_gids(struct mlx4_dev *dev, int slave, + int port, struct mlx4_cmd_mailbox *mailbox) +{ + struct mlx4_roce_gid_entry *gid_entry_mbox; + struct mlx4_priv *priv = mlx4_priv(dev); + int num_gids, base, offset; + int i, err; + + num_gids = mlx4_get_slave_num_gids(dev, slave, port); + base = mlx4_get_base_gid_ix(dev, slave, port); + + memset(mailbox->buf, 0, MLX4_MAILBOX_SIZE); + + mutex_lock(&(priv->port[port].gid_table.mutex)); + /* Zero-out gids belonging to that slave in the port GID table */ + for (i = 0, offset = base; i < num_gids; offset++, i++) + memcpy(priv->port[port].gid_table.roce_gids[offset].raw, + zgid_entry.raw, MLX4_ROCE_GID_ENTRY_SIZE); + + /* Now, copy roce port gids table to mailbox for passing to FW */ + gid_entry_mbox = (struct mlx4_roce_gid_entry *)mailbox->buf; + for (i = 0; i < MLX4_ROCE_MAX_GIDS; gid_entry_mbox++, i++) + memcpy(gid_entry_mbox->raw, + priv->port[port].gid_table.roce_gids[i].raw, + MLX4_ROCE_GID_ENTRY_SIZE); + + err = mlx4_cmd(dev, mailbox->dma, + ((u32)port) | (MLX4_SET_PORT_GID_TABLE << 8), + MLX4_SET_PORT_ETH_OPCODE, MLX4_CMD_SET_PORT, + MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE); + mutex_unlock(&(priv->port[port].gid_table.mutex)); + return err; +} + + +void mlx4_reset_roce_gids(struct mlx4_dev *dev, int slave) +{ + struct mlx4_active_ports actv_ports; + struct mlx4_cmd_mailbox *mailbox; + int num_eth_ports, err; + int i; + + if (slave < 0 || slave > dev->persist->num_vfs) + return; + + actv_ports = mlx4_get_active_ports(dev, slave); + + for (i = 0, num_eth_ports = 0; i < dev->caps.num_ports; i++) { + if (test_bit(i, actv_ports.ports)) { + if (dev->caps.port_type[i + 1] != MLX4_PORT_TYPE_ETH) + continue; + num_eth_ports++; + } + } + + if (!num_eth_ports) + return; + + /* have ETH ports. Alloc mailbox for SET_PORT command */ + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) + return; + + for (i = 0; i < dev->caps.num_ports; i++) { + if (test_bit(i, actv_ports.ports)) { + if (dev->caps.port_type[i + 1] != MLX4_PORT_TYPE_ETH) + continue; + err = mlx4_reset_roce_port_gids(dev, slave, i + 1, mailbox); + if (err) + mlx4_warn(dev, "Could not reset ETH port GID table for slave %d, port %d (%d)\n", + slave, i + 1, err); + } + } + + mlx4_free_cmd_mailbox(dev, mailbox); + return; +} + +static void +mlx4_en_set_port_mtu(struct mlx4_dev *dev, int slave, int port, + struct mlx4_set_port_general_context *gen_context) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_mfunc_master_ctx *master = &priv->mfunc.master; + struct mlx4_slave_state *slave_st = &master->slave_state[slave]; + u16 mtu, prev_mtu; + + /* Mtu is configured as the max USER_MTU among all + * the functions on the port. + */ + mtu = be16_to_cpu(gen_context->mtu); + mtu = min_t(int, mtu, dev->caps.eth_mtu_cap[port] + + ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN); + prev_mtu = slave_st->mtu[port]; + slave_st->mtu[port] = mtu; + if (mtu > master->max_mtu[port]) + master->max_mtu[port] = mtu; + if (mtu < prev_mtu && prev_mtu == master->max_mtu[port]) { + int i; + + slave_st->mtu[port] = mtu; + master->max_mtu[port] = mtu; + for (i = 0; i < dev->num_slaves; i++) + master->max_mtu[port] = + max_t(u16, master->max_mtu[port], + master->slave_state[i].mtu[port]); + } + gen_context->mtu = cpu_to_be16(master->max_mtu[port]); +} + +static void +mlx4_en_set_port_user_mtu(struct mlx4_dev *dev, int slave, int port, + struct mlx4_set_port_general_context *gen_context) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_mfunc_master_ctx *master = &priv->mfunc.master; + struct mlx4_slave_state *slave_st = &master->slave_state[slave]; + u16 user_mtu, prev_user_mtu; + + /* User Mtu is configured as the max USER_MTU among all + * the functions on the port. + */ + user_mtu = be16_to_cpu(gen_context->user_mtu); + user_mtu = min_t(int, user_mtu, dev->caps.eth_mtu_cap[port]); + prev_user_mtu = slave_st->user_mtu[port]; + slave_st->user_mtu[port] = user_mtu; + if (user_mtu > master->max_user_mtu[port]) + master->max_user_mtu[port] = user_mtu; + if (user_mtu < prev_user_mtu && + prev_user_mtu == master->max_user_mtu[port]) { + int i; + + slave_st->user_mtu[port] = user_mtu; + master->max_user_mtu[port] = user_mtu; + for (i = 0; i < dev->num_slaves; i++) + master->max_user_mtu[port] = + max_t(u16, master->max_user_mtu[port], + master->slave_state[i].user_mtu[port]); + } + gen_context->user_mtu = cpu_to_be16(master->max_user_mtu[port]); +} + +static void +mlx4_en_set_port_global_pause(struct mlx4_dev *dev, int slave, + struct mlx4_set_port_general_context *gen_context) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_mfunc_master_ctx *master = &priv->mfunc.master; + + /* Slave cannot change Global Pause configuration */ + if (slave != mlx4_master_func_num(dev) && + (gen_context->pptx != master->pptx || + gen_context->pprx != master->pprx)) { + gen_context->pptx = master->pptx; + gen_context->pprx = master->pprx; + mlx4_warn(dev, "denying Global Pause change for slave:%d\n", + slave); + } else { + master->pptx = gen_context->pptx; + master->pprx = gen_context->pprx; + } +} + +static int mlx4_common_set_port(struct mlx4_dev *dev, int slave, u32 in_mod, + u8 op_mod, struct mlx4_cmd_mailbox *inbox) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_port_info *port_info; + struct mlx4_set_port_rqp_calc_context *qpn_context; + struct mlx4_set_port_general_context *gen_context; + struct mlx4_roce_gid_entry *gid_entry_tbl, *gid_entry_mbox, *gid_entry_mb1; + int reset_qkey_viols; + int port; + int is_eth; + int num_gids; + int base; + u32 in_modifier; + u32 promisc; + int err; + int i, j; + int offset; + __be32 agg_cap_mask; + __be32 slave_cap_mask; + __be32 new_cap_mask; + + port = in_mod & 0xff; + in_modifier = in_mod >> 8; + is_eth = op_mod; + port_info = &priv->port[port]; + + /* Slaves cannot perform SET_PORT operations, + * except for changing MTU and USER_MTU. + */ + if (is_eth) { + if (slave != dev->caps.function && + in_modifier != MLX4_SET_PORT_GENERAL && + in_modifier != MLX4_SET_PORT_GID_TABLE) { + mlx4_warn(dev, "denying SET_PORT for slave:%d\n", + slave); + return -EINVAL; + } + switch (in_modifier) { + case MLX4_SET_PORT_RQP_CALC: + qpn_context = inbox->buf; + qpn_context->base_qpn = + cpu_to_be32(port_info->base_qpn); + qpn_context->n_mac = 0x7; + promisc = be32_to_cpu(qpn_context->promisc) >> + SET_PORT_PROMISC_SHIFT; + qpn_context->promisc = cpu_to_be32( + promisc << SET_PORT_PROMISC_SHIFT | + port_info->base_qpn); + promisc = be32_to_cpu(qpn_context->mcast) >> + SET_PORT_MC_PROMISC_SHIFT; + qpn_context->mcast = cpu_to_be32( + promisc << SET_PORT_MC_PROMISC_SHIFT | + port_info->base_qpn); + break; + case MLX4_SET_PORT_GENERAL: + gen_context = inbox->buf; + + if (gen_context->flags & MLX4_FLAG_V_MTU_MASK) + mlx4_en_set_port_mtu(dev, slave, port, + gen_context); + + if (gen_context->flags2 & MLX4_FLAG2_V_USER_MTU_MASK) + mlx4_en_set_port_user_mtu(dev, slave, port, + gen_context); + + if (gen_context->flags & + (MLX4_FLAG_V_PPRX_MASK | MLX4_FLAG_V_PPTX_MASK)) + mlx4_en_set_port_global_pause(dev, slave, + gen_context); + + break; + case MLX4_SET_PORT_GID_TABLE: + /* change to MULTIPLE entries: number of guest's gids + * need a FOR-loop here over number of gids the guest has. + * 1. Check no duplicates in gids passed by slave + */ + num_gids = mlx4_get_slave_num_gids(dev, slave, port); + base = mlx4_get_base_gid_ix(dev, slave, port); + gid_entry_mbox = (struct mlx4_roce_gid_entry *)(inbox->buf); + for (i = 0; i < num_gids; gid_entry_mbox++, i++) { + if (!memcmp(gid_entry_mbox->raw, zgid_entry.raw, + sizeof(zgid_entry))) + continue; + gid_entry_mb1 = gid_entry_mbox + 1; + for (j = i + 1; j < num_gids; gid_entry_mb1++, j++) { + if (!memcmp(gid_entry_mb1->raw, + zgid_entry.raw, sizeof(zgid_entry))) + continue; + if (!memcmp(gid_entry_mb1->raw, gid_entry_mbox->raw, + sizeof(gid_entry_mbox->raw))) { + /* found duplicate */ + return -EINVAL; + } + } + } + + /* 2. Check that do not have duplicates in OTHER + * entries in the port GID table + */ + + mutex_lock(&(priv->port[port].gid_table.mutex)); + for (i = 0; i < MLX4_ROCE_MAX_GIDS; i++) { + if (i >= base && i < base + num_gids) + continue; /* don't compare to slave's current gids */ + gid_entry_tbl = &priv->port[port].gid_table.roce_gids[i]; + if (!memcmp(gid_entry_tbl->raw, zgid_entry.raw, sizeof(zgid_entry))) + continue; + gid_entry_mbox = (struct mlx4_roce_gid_entry *)(inbox->buf); + for (j = 0; j < num_gids; gid_entry_mbox++, j++) { + if (!memcmp(gid_entry_mbox->raw, zgid_entry.raw, + sizeof(zgid_entry))) + continue; + if (!memcmp(gid_entry_mbox->raw, gid_entry_tbl->raw, + sizeof(gid_entry_tbl->raw))) { + /* found duplicate */ + mlx4_warn(dev, "requested gid entry for slave:%d is a duplicate of gid at index %d\n", + slave, i); + mutex_unlock(&(priv->port[port].gid_table.mutex)); + return -EINVAL; + } + } + } + + /* insert slave GIDs with memcpy, starting at slave's base index */ + gid_entry_mbox = (struct mlx4_roce_gid_entry *)(inbox->buf); + for (i = 0, offset = base; i < num_gids; gid_entry_mbox++, offset++, i++) + memcpy(priv->port[port].gid_table.roce_gids[offset].raw, + gid_entry_mbox->raw, MLX4_ROCE_GID_ENTRY_SIZE); + + /* Now, copy roce port gids table to current mailbox for passing to FW */ + gid_entry_mbox = (struct mlx4_roce_gid_entry *)(inbox->buf); + for (i = 0; i < MLX4_ROCE_MAX_GIDS; gid_entry_mbox++, i++) + memcpy(gid_entry_mbox->raw, + priv->port[port].gid_table.roce_gids[i].raw, + MLX4_ROCE_GID_ENTRY_SIZE); + + err = mlx4_cmd(dev, inbox->dma, in_mod & 0xffff, op_mod, + MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B, + MLX4_CMD_NATIVE); + mutex_unlock(&(priv->port[port].gid_table.mutex)); + return err; + } + + return mlx4_cmd(dev, inbox->dma, in_mod & 0xffff, op_mod, + MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B, + MLX4_CMD_NATIVE); + } + + /* Slaves are not allowed to SET_PORT beacon (LED) blink */ + if (op_mod == MLX4_SET_PORT_BEACON_OPCODE) { + mlx4_warn(dev, "denying SET_PORT Beacon slave:%d\n", slave); + return -EPERM; + } + + /* For IB, we only consider: + * - The capability mask, which is set to the aggregate of all + * slave function capabilities + * - The QKey violatin counter - reset according to each request. + */ + + if (dev->flags & MLX4_FLAG_OLD_PORT_CMDS) { + reset_qkey_viols = (*(u8 *) inbox->buf) & 0x40; + new_cap_mask = ((__be32 *) inbox->buf)[2]; + } else { + reset_qkey_viols = ((u8 *) inbox->buf)[3] & 0x1; + new_cap_mask = ((__be32 *) inbox->buf)[1]; + } + + /* slave may not set the IS_SM capability for the port */ + if (slave != mlx4_master_func_num(dev) && + (be32_to_cpu(new_cap_mask) & MLX4_PORT_CAP_IS_SM)) + return -EINVAL; + + /* No DEV_MGMT in multifunc mode */ + if (mlx4_is_mfunc(dev) && + (be32_to_cpu(new_cap_mask) & MLX4_PORT_CAP_DEV_MGMT_SUP)) + return -EINVAL; + + agg_cap_mask = 0; + slave_cap_mask = + priv->mfunc.master.slave_state[slave].ib_cap_mask[port]; + priv->mfunc.master.slave_state[slave].ib_cap_mask[port] = new_cap_mask; + for (i = 0; i < dev->num_slaves; i++) + agg_cap_mask |= + priv->mfunc.master.slave_state[i].ib_cap_mask[port]; + + /* only clear mailbox for guests. Master may be setting + * MTU or PKEY table size + */ + if (slave != dev->caps.function) + memset(inbox->buf, 0, 256); + if (dev->flags & MLX4_FLAG_OLD_PORT_CMDS) { + *(u8 *) inbox->buf |= !!reset_qkey_viols << 6; + ((__be32 *) inbox->buf)[2] = agg_cap_mask; + } else { + ((u8 *) inbox->buf)[3] |= !!reset_qkey_viols; + ((__be32 *) inbox->buf)[1] = agg_cap_mask; + } + + err = mlx4_cmd(dev, inbox->dma, port, is_eth, MLX4_CMD_SET_PORT, + MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE); + if (err) + priv->mfunc.master.slave_state[slave].ib_cap_mask[port] = + slave_cap_mask; + return err; +} + +int mlx4_SET_PORT_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd) +{ + int port = mlx4_slave_convert_port( + dev, slave, vhcr->in_modifier & 0xFF); + + if (port < 0) + return -EINVAL; + + vhcr->in_modifier = (vhcr->in_modifier & ~0xFF) | + (port & 0xFF); + + return mlx4_common_set_port(dev, slave, vhcr->in_modifier, + vhcr->op_modifier, inbox); +} + +/* bit locations for set port command with zero op modifier */ +enum { + MLX4_SET_PORT_VL_CAP = 4, /* bits 7:4 */ + MLX4_SET_PORT_MTU_CAP = 12, /* bits 15:12 */ + MLX4_CHANGE_PORT_PKEY_TBL_SZ = 20, + MLX4_CHANGE_PORT_VL_CAP = 21, + MLX4_CHANGE_PORT_MTU_CAP = 22, +}; + +int mlx4_SET_PORT(struct mlx4_dev *dev, u8 port, int pkey_tbl_sz) +{ + struct mlx4_cmd_mailbox *mailbox; + int err, vl_cap, pkey_tbl_flag = 0; + + if (dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH) + return 0; + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + + ((__be32 *) mailbox->buf)[1] = dev->caps.ib_port_def_cap[port]; + + if (pkey_tbl_sz >= 0 && mlx4_is_master(dev)) { + pkey_tbl_flag = 1; + ((__be16 *) mailbox->buf)[20] = cpu_to_be16(pkey_tbl_sz); + } + + /* IB VL CAP enum isn't used by the firmware, just numerical values */ + for (vl_cap = 8; vl_cap >= 1; vl_cap >>= 1) { + ((__be32 *) mailbox->buf)[0] = cpu_to_be32( + (1 << MLX4_CHANGE_PORT_MTU_CAP) | + (1 << MLX4_CHANGE_PORT_VL_CAP) | + (pkey_tbl_flag << MLX4_CHANGE_PORT_PKEY_TBL_SZ) | + (dev->caps.port_ib_mtu[port] << MLX4_SET_PORT_MTU_CAP) | + (vl_cap << MLX4_SET_PORT_VL_CAP)); + err = mlx4_cmd(dev, mailbox->dma, port, + MLX4_SET_PORT_IB_OPCODE, MLX4_CMD_SET_PORT, + MLX4_CMD_TIME_CLASS_B, MLX4_CMD_WRAPPED); + if (err != -ENOMEM) + break; + } + + mlx4_free_cmd_mailbox(dev, mailbox); + return err; +} + +#define SET_PORT_ROCE_2_FLAGS 0x10 +#define MLX4_SET_PORT_ROCE_V1_V2 0x2 +int mlx4_SET_PORT_general(struct mlx4_dev *dev, u8 port, int mtu, + u8 pptx, u8 pfctx, u8 pprx, u8 pfcrx) +{ + struct mlx4_cmd_mailbox *mailbox; + struct mlx4_set_port_general_context *context; + int err; + u32 in_mod; + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + context = mailbox->buf; + context->flags = SET_PORT_GEN_ALL_VALID; + context->mtu = cpu_to_be16(mtu); + context->pptx = (pptx * (!pfctx)) << 7; + context->pfctx = pfctx; + context->pprx = (pprx * (!pfcrx)) << 7; + context->pfcrx = pfcrx; + + if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2) { + context->flags |= SET_PORT_ROCE_2_FLAGS; + context->roce_mode |= + MLX4_SET_PORT_ROCE_V1_V2 << 4; + } + in_mod = MLX4_SET_PORT_GENERAL << 8 | port; + err = mlx4_cmd(dev, mailbox->dma, in_mod, MLX4_SET_PORT_ETH_OPCODE, + MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B, + MLX4_CMD_WRAPPED); + + mlx4_free_cmd_mailbox(dev, mailbox); + return err; +} +EXPORT_SYMBOL(mlx4_SET_PORT_general); + +int mlx4_SET_PORT_qpn_calc(struct mlx4_dev *dev, u8 port, u32 base_qpn, + u8 promisc) +{ + struct mlx4_cmd_mailbox *mailbox; + struct mlx4_set_port_rqp_calc_context *context; + int err; + u32 in_mod; + u32 m_promisc = (dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_MC_STEER) ? + MCAST_DIRECT : MCAST_DEFAULT; + + if (dev->caps.steering_mode != MLX4_STEERING_MODE_A0) + return 0; + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + context = mailbox->buf; + context->base_qpn = cpu_to_be32(base_qpn); + context->n_mac = dev->caps.log_num_macs; + context->promisc = cpu_to_be32(promisc << SET_PORT_PROMISC_SHIFT | + base_qpn); + context->mcast = cpu_to_be32(m_promisc << SET_PORT_MC_PROMISC_SHIFT | + base_qpn); + context->intra_no_vlan = 0; + context->no_vlan = MLX4_NO_VLAN_IDX; + context->intra_vlan_miss = 0; + context->vlan_miss = MLX4_VLAN_MISS_IDX; + + in_mod = MLX4_SET_PORT_RQP_CALC << 8 | port; + err = mlx4_cmd(dev, mailbox->dma, in_mod, MLX4_SET_PORT_ETH_OPCODE, + MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B, + MLX4_CMD_WRAPPED); + + mlx4_free_cmd_mailbox(dev, mailbox); + return err; +} +EXPORT_SYMBOL(mlx4_SET_PORT_qpn_calc); + +int mlx4_SET_PORT_user_mtu(struct mlx4_dev *dev, u8 port, u16 user_mtu) +{ + struct mlx4_cmd_mailbox *mailbox; + struct mlx4_set_port_general_context *context; + u32 in_mod; + int err; + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + context = mailbox->buf; + context->flags2 |= MLX4_FLAG2_V_USER_MTU_MASK; + context->user_mtu = cpu_to_be16(user_mtu); + + in_mod = MLX4_SET_PORT_GENERAL << 8 | port; + err = mlx4_cmd(dev, mailbox->dma, in_mod, MLX4_SET_PORT_ETH_OPCODE, + MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B, + MLX4_CMD_WRAPPED); + + mlx4_free_cmd_mailbox(dev, mailbox); + return err; +} +EXPORT_SYMBOL(mlx4_SET_PORT_user_mtu); + +int mlx4_SET_PORT_user_mac(struct mlx4_dev *dev, u8 port, u8 *user_mac) +{ + struct mlx4_cmd_mailbox *mailbox; + struct mlx4_set_port_general_context *context; + u32 in_mod; + int err; + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + context = mailbox->buf; + context->flags2 |= MLX4_FLAG2_V_USER_MAC_MASK; + memcpy(context->user_mac, user_mac, sizeof(context->user_mac)); + + in_mod = MLX4_SET_PORT_GENERAL << 8 | port; + err = mlx4_cmd(dev, mailbox->dma, in_mod, MLX4_SET_PORT_ETH_OPCODE, + MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B, + MLX4_CMD_NATIVE); + + mlx4_free_cmd_mailbox(dev, mailbox); + return err; +} +EXPORT_SYMBOL(mlx4_SET_PORT_user_mac); + +int mlx4_SET_PORT_fcs_check(struct mlx4_dev *dev, u8 port, u8 ignore_fcs_value) +{ + struct mlx4_cmd_mailbox *mailbox; + struct mlx4_set_port_general_context *context; + u32 in_mod; + int err; + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + context = mailbox->buf; + context->flags2 |= MLX4_FLAG2_V_IGNORE_FCS_MASK; + if (ignore_fcs_value) + context->ignore_fcs |= MLX4_IGNORE_FCS_MASK; + else + context->ignore_fcs &= ~MLX4_IGNORE_FCS_MASK; + + in_mod = MLX4_SET_PORT_GENERAL << 8 | port; + err = mlx4_cmd(dev, mailbox->dma, in_mod, 1, MLX4_CMD_SET_PORT, + MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE); + + mlx4_free_cmd_mailbox(dev, mailbox); + return err; +} +EXPORT_SYMBOL(mlx4_SET_PORT_fcs_check); + +enum { + VXLAN_ENABLE_MODIFY = 1 << 7, + VXLAN_STEERING_MODIFY = 1 << 6, + + VXLAN_ENABLE = 1 << 7, +}; + +struct mlx4_set_port_vxlan_context { + u32 reserved1; + u8 modify_flags; + u8 reserved2; + u8 enable_flags; + u8 steering; +}; + +int mlx4_SET_PORT_VXLAN(struct mlx4_dev *dev, u8 port, u8 steering, int enable) +{ + int err; + u32 in_mod; + struct mlx4_cmd_mailbox *mailbox; + struct mlx4_set_port_vxlan_context *context; + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + context = mailbox->buf; + memset(context, 0, sizeof(*context)); + + context->modify_flags = VXLAN_ENABLE_MODIFY | VXLAN_STEERING_MODIFY; + if (enable) + context->enable_flags = VXLAN_ENABLE; + context->steering = steering; + + in_mod = MLX4_SET_PORT_VXLAN << 8 | port; + err = mlx4_cmd(dev, mailbox->dma, in_mod, MLX4_SET_PORT_ETH_OPCODE, + MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B, + MLX4_CMD_NATIVE); + + mlx4_free_cmd_mailbox(dev, mailbox); + return err; +} +EXPORT_SYMBOL(mlx4_SET_PORT_VXLAN); + +int mlx4_SET_PORT_BEACON(struct mlx4_dev *dev, u8 port, u16 time) +{ + int err; + struct mlx4_cmd_mailbox *mailbox; + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + + *((__be32 *)mailbox->buf) = cpu_to_be32(time); + + err = mlx4_cmd(dev, mailbox->dma, port, MLX4_SET_PORT_BEACON_OPCODE, + MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B, + MLX4_CMD_NATIVE); + + mlx4_free_cmd_mailbox(dev, mailbox); + return err; +} +EXPORT_SYMBOL(mlx4_SET_PORT_BEACON); + +int mlx4_SET_MCAST_FLTR_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd) +{ + int err = 0; + + return err; +} + +int mlx4_SET_MCAST_FLTR(struct mlx4_dev *dev, u8 port, + u64 mac, u64 clear, u8 mode) +{ + return mlx4_cmd(dev, (mac | (clear << 63)), port, mode, + MLX4_CMD_SET_MCAST_FLTR, MLX4_CMD_TIME_CLASS_B, + MLX4_CMD_WRAPPED); +} +EXPORT_SYMBOL(mlx4_SET_MCAST_FLTR); + +int mlx4_SET_VLAN_FLTR_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd) +{ + int err = 0; + + return err; +} + +int mlx4_DUMP_ETH_STATS_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd) +{ + return 0; +} + +int mlx4_get_slave_from_roce_gid(struct mlx4_dev *dev, int port, u8 *gid, + int *slave_id) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + int i, found_ix = -1; + int vf_gids = MLX4_ROCE_MAX_GIDS - MLX4_ROCE_PF_GIDS; + struct mlx4_slaves_pport slaves_pport; + unsigned num_vfs; + int slave_gid; + + if (!mlx4_is_mfunc(dev)) + return -EINVAL; + + slaves_pport = mlx4_phys_to_slaves_pport(dev, port); + num_vfs = bitmap_weight(slaves_pport.slaves, + dev->persist->num_vfs + 1) - 1; + + for (i = 0; i < MLX4_ROCE_MAX_GIDS; i++) { + if (!memcmp(priv->port[port].gid_table.roce_gids[i].raw, gid, + MLX4_ROCE_GID_ENTRY_SIZE)) { + found_ix = i; + break; + } + } + + if (found_ix >= 0) { + /* Calculate a slave_gid which is the slave number in the gid + * table and not a globally unique slave number. + */ + if (found_ix < MLX4_ROCE_PF_GIDS) + slave_gid = 0; + else if (found_ix < MLX4_ROCE_PF_GIDS + (vf_gids % num_vfs) * + (vf_gids / num_vfs + 1)) + slave_gid = ((found_ix - MLX4_ROCE_PF_GIDS) / + (vf_gids / num_vfs + 1)) + 1; + else + slave_gid = + ((found_ix - MLX4_ROCE_PF_GIDS - + ((vf_gids % num_vfs) * ((vf_gids / num_vfs + 1)))) / + (vf_gids / num_vfs)) + vf_gids % num_vfs + 1; + + /* Calculate the globally unique slave id */ + if (slave_gid) { + struct mlx4_active_ports exclusive_ports; + struct mlx4_active_ports actv_ports; + struct mlx4_slaves_pport slaves_pport_actv; + unsigned max_port_p_one; + int num_vfs_before = 0; + int candidate_slave_gid; + + /* Calculate how many VFs are on the previous port, if exists */ + for (i = 1; i < port; i++) { + bitmap_zero(exclusive_ports.ports, dev->caps.num_ports); + set_bit(i - 1, exclusive_ports.ports); + slaves_pport_actv = + mlx4_phys_to_slaves_pport_actv( + dev, &exclusive_ports); + num_vfs_before += bitmap_weight( + slaves_pport_actv.slaves, + dev->persist->num_vfs + 1); + } + + /* candidate_slave_gid isn't necessarily the correct slave, but + * it has the same number of ports and is assigned to the same + * ports as the real slave we're looking for. On dual port VF, + * slave_gid = [single port VFs on port <port>] + + * [offset of the current slave from the first dual port VF] + + * 1 (for the PF). + */ + candidate_slave_gid = slave_gid + num_vfs_before; + + actv_ports = mlx4_get_active_ports(dev, candidate_slave_gid); + max_port_p_one = find_first_bit( + actv_ports.ports, dev->caps.num_ports) + + bitmap_weight(actv_ports.ports, + dev->caps.num_ports) + 1; + + /* Calculate the real slave number */ + for (i = 1; i < max_port_p_one; i++) { + if (i == port) + continue; + bitmap_zero(exclusive_ports.ports, + dev->caps.num_ports); + set_bit(i - 1, exclusive_ports.ports); + slaves_pport_actv = + mlx4_phys_to_slaves_pport_actv( + dev, &exclusive_ports); + slave_gid += bitmap_weight( + slaves_pport_actv.slaves, + dev->persist->num_vfs + 1); + } + } + *slave_id = slave_gid; + } + + return (found_ix >= 0) ? 0 : -EINVAL; +} +EXPORT_SYMBOL(mlx4_get_slave_from_roce_gid); + +int mlx4_get_roce_gid_from_slave(struct mlx4_dev *dev, int port, int slave_id, + u8 *gid) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + + if (!mlx4_is_master(dev)) + return -EINVAL; + + memcpy(gid, priv->port[port].gid_table.roce_gids[slave_id].raw, + MLX4_ROCE_GID_ENTRY_SIZE); + return 0; +} +EXPORT_SYMBOL(mlx4_get_roce_gid_from_slave); + +/* Cable Module Info */ +#define MODULE_INFO_MAX_READ 48 + +#define I2C_ADDR_LOW 0x50 +#define I2C_ADDR_HIGH 0x51 +#define I2C_PAGE_SIZE 256 +#define I2C_HIGH_PAGE_SIZE 128 + +/* Module Info Data */ +struct mlx4_cable_info { + u8 i2c_addr; + u8 page_num; + __be16 dev_mem_address; + __be16 reserved1; + __be16 size; + __be32 reserved2[2]; + u8 data[MODULE_INFO_MAX_READ]; +}; + +enum cable_info_err { + CABLE_INF_INV_PORT = 0x1, + CABLE_INF_OP_NOSUP = 0x2, + CABLE_INF_NOT_CONN = 0x3, + CABLE_INF_NO_EEPRM = 0x4, + CABLE_INF_PAGE_ERR = 0x5, + CABLE_INF_INV_ADDR = 0x6, + CABLE_INF_I2C_ADDR = 0x7, + CABLE_INF_QSFP_VIO = 0x8, + CABLE_INF_I2C_BUSY = 0x9, +}; + +#define MAD_STATUS_2_CABLE_ERR(mad_status) ((mad_status >> 8) & 0xFF) + +static inline const char *cable_info_mad_err_str(u16 mad_status) +{ + u8 err = MAD_STATUS_2_CABLE_ERR(mad_status); + + switch (err) { + case CABLE_INF_INV_PORT: + return "invalid port selected"; + case CABLE_INF_OP_NOSUP: + return "operation not supported for this port (the port is of type CX4 or internal)"; + case CABLE_INF_NOT_CONN: + return "cable is not connected"; + case CABLE_INF_NO_EEPRM: + return "the connected cable has no EPROM (passive copper cable)"; + case CABLE_INF_PAGE_ERR: + return "page number is greater than 15"; + case CABLE_INF_INV_ADDR: + return "invalid device_address or size (that is, size equals 0 or address+size is greater than 256)"; + case CABLE_INF_I2C_ADDR: + return "invalid I2C slave address"; + case CABLE_INF_QSFP_VIO: + return "at least one cable violates the QSFP specification and ignores the modsel signal"; + case CABLE_INF_I2C_BUSY: + return "I2C bus is constantly busy"; + } + return "Unknown Error"; +} + +static int mlx4_get_module_id(struct mlx4_dev *dev, u8 port, u8 *module_id) +{ + struct mlx4_cmd_mailbox *inbox, *outbox; + struct mlx4_mad_ifc *inmad, *outmad; + struct mlx4_cable_info *cable_info; + int ret; + + inbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(inbox)) + return PTR_ERR(inbox); + + outbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(outbox)) { + mlx4_free_cmd_mailbox(dev, inbox); + return PTR_ERR(outbox); + } + + inmad = (struct mlx4_mad_ifc *)(inbox->buf); + outmad = (struct mlx4_mad_ifc *)(outbox->buf); + + inmad->method = 0x1; /* Get */ + inmad->class_version = 0x1; + inmad->mgmt_class = 0x1; + inmad->base_version = 0x1; + inmad->attr_id = cpu_to_be16(0xFF60); /* Module Info */ + + cable_info = (struct mlx4_cable_info *)inmad->data; + cable_info->dev_mem_address = 0; + cable_info->page_num = 0; + cable_info->i2c_addr = I2C_ADDR_LOW; + cable_info->size = cpu_to_be16(1); + + ret = mlx4_cmd_box(dev, inbox->dma, outbox->dma, port, 3, + MLX4_CMD_MAD_IFC, MLX4_CMD_TIME_CLASS_C, + MLX4_CMD_NATIVE); + if (ret) + goto out; + + if (be16_to_cpu(outmad->status)) { + /* Mad returned with bad status */ + ret = be16_to_cpu(outmad->status); + mlx4_warn(dev, + "MLX4_CMD_MAD_IFC Get Module ID attr(%x) port(%d) i2c_addr(%x) offset(%d) size(%d): Response Mad Status(%x) - %s\n", + 0xFF60, port, I2C_ADDR_LOW, 0, 1, ret, + cable_info_mad_err_str(ret)); + ret = -ret; + goto out; + } + cable_info = (struct mlx4_cable_info *)outmad->data; + *module_id = cable_info->data[0]; +out: + mlx4_free_cmd_mailbox(dev, inbox); + mlx4_free_cmd_mailbox(dev, outbox); + return ret; +} + +static void mlx4_sfp_eeprom_params_set(u8 *i2c_addr, u8 *page_num, u16 *offset) +{ + *i2c_addr = I2C_ADDR_LOW; + *page_num = 0; + + if (*offset < I2C_PAGE_SIZE) + return; + + *i2c_addr = I2C_ADDR_HIGH; + *offset -= I2C_PAGE_SIZE; +} + +static void mlx4_qsfp_eeprom_params_set(u8 *i2c_addr, u8 *page_num, u16 *offset) +{ + /* Offsets 0-255 belong to page 0. + * Offsets 256-639 belong to pages 01, 02, 03. + * For example, offset 400 is page 02: 1 + (400 - 256) / 128 = 2 + */ + if (*offset < I2C_PAGE_SIZE) + *page_num = 0; + else + *page_num = 1 + (*offset - I2C_PAGE_SIZE) / I2C_HIGH_PAGE_SIZE; + *i2c_addr = I2C_ADDR_LOW; + *offset -= *page_num * I2C_HIGH_PAGE_SIZE; +} + +/** + * mlx4_get_module_info - Read cable module eeprom data + * @dev: mlx4_dev. + * @port: port number. + * @offset: byte offset in eeprom to start reading data from. + * @size: num of bytes to read. + * @data: output buffer to put the requested data into. + * + * Reads cable module eeprom data, puts the outcome data into + * data pointer paramer. + * Returns num of read bytes on success or a negative error + * code. + */ +int mlx4_get_module_info(struct mlx4_dev *dev, u8 port, + u16 offset, u16 size, u8 *data) +{ + struct mlx4_cmd_mailbox *inbox, *outbox; + struct mlx4_mad_ifc *inmad, *outmad; + struct mlx4_cable_info *cable_info; + u8 module_id, i2c_addr, page_num; + int ret; + + if (size > MODULE_INFO_MAX_READ) + size = MODULE_INFO_MAX_READ; + + ret = mlx4_get_module_id(dev, port, &module_id); + if (ret) + return ret; + + switch (module_id) { + case MLX4_MODULE_ID_SFP: + mlx4_sfp_eeprom_params_set(&i2c_addr, &page_num, &offset); + break; + case MLX4_MODULE_ID_QSFP: + case MLX4_MODULE_ID_QSFP_PLUS: + case MLX4_MODULE_ID_QSFP28: + mlx4_qsfp_eeprom_params_set(&i2c_addr, &page_num, &offset); + break; + default: + mlx4_err(dev, "Module ID not recognized: %#x\n", module_id); + return -EINVAL; + } + + inbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(inbox)) + return PTR_ERR(inbox); + + outbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(outbox)) { + mlx4_free_cmd_mailbox(dev, inbox); + return PTR_ERR(outbox); + } + + inmad = (struct mlx4_mad_ifc *)(inbox->buf); + outmad = (struct mlx4_mad_ifc *)(outbox->buf); + + inmad->method = 0x1; /* Get */ + inmad->class_version = 0x1; + inmad->mgmt_class = 0x1; + inmad->base_version = 0x1; + inmad->attr_id = cpu_to_be16(0xFF60); /* Module Info */ + + if (offset < I2C_PAGE_SIZE && offset + size > I2C_PAGE_SIZE) + /* Cross pages reads are not allowed + * read until offset 256 in low page + */ + size -= offset + size - I2C_PAGE_SIZE; + + cable_info = (struct mlx4_cable_info *)inmad->data; + cable_info->dev_mem_address = cpu_to_be16(offset); + cable_info->page_num = page_num; + cable_info->i2c_addr = i2c_addr; + cable_info->size = cpu_to_be16(size); + + ret = mlx4_cmd_box(dev, inbox->dma, outbox->dma, port, 3, + MLX4_CMD_MAD_IFC, MLX4_CMD_TIME_CLASS_C, + MLX4_CMD_NATIVE); + if (ret) + goto out; + + if (be16_to_cpu(outmad->status)) { + /* Mad returned with bad status */ + ret = be16_to_cpu(outmad->status); + mlx4_warn(dev, + "MLX4_CMD_MAD_IFC Get Module info attr(%x) port(%d) i2c_addr(%x) offset(%d) size(%d): Response Mad Status(%x) - %s\n", + 0xFF60, port, i2c_addr, offset, size, + ret, cable_info_mad_err_str(ret)); + + if (i2c_addr == I2C_ADDR_HIGH && + MAD_STATUS_2_CABLE_ERR(ret) == CABLE_INF_I2C_ADDR) + /* Some SFP cables do not support i2c slave + * address 0x51 (high page), abort silently. + */ + ret = 0; + else + ret = -ret; + goto out; + } + cable_info = (struct mlx4_cable_info *)outmad->data; + memcpy(data, cable_info->data, size); + ret = size; +out: + mlx4_free_cmd_mailbox(dev, inbox); + mlx4_free_cmd_mailbox(dev, outbox); + return ret; +} +EXPORT_SYMBOL(mlx4_get_module_info); + +int mlx4_max_tc(struct mlx4_dev *dev) +{ + u8 num_tc = dev->caps.max_tc_eth; + + if (!num_tc) + num_tc = MLX4_TC_MAX_NUMBER; + + return num_tc; +} +EXPORT_SYMBOL(mlx4_max_tc); diff --git a/drivers/net/ethernet/mellanox/mlx4/profile.c b/drivers/net/ethernet/mellanox/mlx4/profile.c new file mode 100644 index 000000000..ba361c5fb --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx4/profile.c @@ -0,0 +1,270 @@ +/* + * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Mellanox Technologies. All rights reserved. + * Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/slab.h> + +#include "mlx4.h" +#include "fw.h" + +enum { + MLX4_RES_QP, + MLX4_RES_RDMARC, + MLX4_RES_ALTC, + MLX4_RES_AUXC, + MLX4_RES_SRQ, + MLX4_RES_CQ, + MLX4_RES_EQ, + MLX4_RES_DMPT, + MLX4_RES_CMPT, + MLX4_RES_MTT, + MLX4_RES_MCG, + MLX4_RES_NUM +}; + +static const char *res_name[] = { + [MLX4_RES_QP] = "QP", + [MLX4_RES_RDMARC] = "RDMARC", + [MLX4_RES_ALTC] = "ALTC", + [MLX4_RES_AUXC] = "AUXC", + [MLX4_RES_SRQ] = "SRQ", + [MLX4_RES_CQ] = "CQ", + [MLX4_RES_EQ] = "EQ", + [MLX4_RES_DMPT] = "DMPT", + [MLX4_RES_CMPT] = "CMPT", + [MLX4_RES_MTT] = "MTT", + [MLX4_RES_MCG] = "MCG", +}; + +u64 mlx4_make_profile(struct mlx4_dev *dev, + struct mlx4_profile *request, + struct mlx4_dev_cap *dev_cap, + struct mlx4_init_hca_param *init_hca) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_resource { + u64 size; + u64 start; + int type; + u32 num; + int log_num; + }; + + u64 total_size = 0; + struct mlx4_resource *profile; + struct sysinfo si; + int i, j; + + profile = kcalloc(MLX4_RES_NUM, sizeof(*profile), GFP_KERNEL); + if (!profile) + return -ENOMEM; + + /* + * We want to scale the number of MTTs with the size of the + * system memory, since it makes sense to register a lot of + * memory on a system with a lot of memory. As a heuristic, + * make sure we have enough MTTs to cover twice the system + * memory (with PAGE_SIZE entries). + * + * This number has to be a power of two and fit into 32 bits + * due to device limitations, so cap this at 2^31 as well. + * That limits us to 8TB of memory registration per HCA with + * 4KB pages, which is probably OK for the next few months. + */ + si_meminfo(&si); + request->num_mtt = + roundup_pow_of_two(max_t(unsigned, request->num_mtt, + min(1UL << (31 - log_mtts_per_seg), + (si.totalram << 1) >> log_mtts_per_seg))); + + + profile[MLX4_RES_QP].size = dev_cap->qpc_entry_sz; + profile[MLX4_RES_RDMARC].size = dev_cap->rdmarc_entry_sz; + profile[MLX4_RES_ALTC].size = dev_cap->altc_entry_sz; + profile[MLX4_RES_AUXC].size = dev_cap->aux_entry_sz; + profile[MLX4_RES_SRQ].size = dev_cap->srq_entry_sz; + profile[MLX4_RES_CQ].size = dev_cap->cqc_entry_sz; + profile[MLX4_RES_EQ].size = dev_cap->eqc_entry_sz; + profile[MLX4_RES_DMPT].size = dev_cap->dmpt_entry_sz; + profile[MLX4_RES_CMPT].size = dev_cap->cmpt_entry_sz; + profile[MLX4_RES_MTT].size = dev_cap->mtt_entry_sz; + profile[MLX4_RES_MCG].size = mlx4_get_mgm_entry_size(dev); + + profile[MLX4_RES_QP].num = request->num_qp; + profile[MLX4_RES_RDMARC].num = request->num_qp * request->rdmarc_per_qp; + profile[MLX4_RES_ALTC].num = request->num_qp; + profile[MLX4_RES_AUXC].num = request->num_qp; + profile[MLX4_RES_SRQ].num = request->num_srq; + profile[MLX4_RES_CQ].num = request->num_cq; + profile[MLX4_RES_EQ].num = mlx4_is_mfunc(dev) ? dev->phys_caps.num_phys_eqs : + min_t(unsigned, dev_cap->max_eqs, MAX_MSIX); + profile[MLX4_RES_DMPT].num = request->num_mpt; + profile[MLX4_RES_CMPT].num = MLX4_NUM_CMPTS; + profile[MLX4_RES_MTT].num = request->num_mtt * (1 << log_mtts_per_seg); + profile[MLX4_RES_MCG].num = request->num_mcg; + + for (i = 0; i < MLX4_RES_NUM; ++i) { + profile[i].type = i; + profile[i].num = roundup_pow_of_two(profile[i].num); + profile[i].log_num = ilog2(profile[i].num); + profile[i].size *= profile[i].num; + profile[i].size = max(profile[i].size, (u64) PAGE_SIZE); + } + + /* + * Sort the resources in decreasing order of size. Since they + * all have sizes that are powers of 2, we'll be able to keep + * resources aligned to their size and pack them without gaps + * using the sorted order. + */ + for (i = MLX4_RES_NUM; i > 0; --i) + for (j = 1; j < i; ++j) { + if (profile[j].size > profile[j - 1].size) + swap(profile[j], profile[j - 1]); + } + + for (i = 0; i < MLX4_RES_NUM; ++i) { + if (profile[i].size) { + profile[i].start = total_size; + total_size += profile[i].size; + } + + if (total_size > dev_cap->max_icm_sz) { + mlx4_err(dev, "Profile requires 0x%llx bytes; won't fit in 0x%llx bytes of context memory\n", + (unsigned long long) total_size, + (unsigned long long) dev_cap->max_icm_sz); + kfree(profile); + return -ENOMEM; + } + + if (profile[i].size) + mlx4_dbg(dev, " profile[%2d] (%6s): 2^%02d entries @ 0x%10llx, size 0x%10llx\n", + i, res_name[profile[i].type], + profile[i].log_num, + (unsigned long long) profile[i].start, + (unsigned long long) profile[i].size); + } + + mlx4_dbg(dev, "HCA context memory: reserving %d KB\n", + (int) (total_size >> 10)); + + for (i = 0; i < MLX4_RES_NUM; ++i) { + switch (profile[i].type) { + case MLX4_RES_QP: + dev->caps.num_qps = profile[i].num; + init_hca->qpc_base = profile[i].start; + init_hca->log_num_qps = profile[i].log_num; + break; + case MLX4_RES_RDMARC: + for (priv->qp_table.rdmarc_shift = 0; + request->num_qp << priv->qp_table.rdmarc_shift < profile[i].num; + ++priv->qp_table.rdmarc_shift) + ; /* nothing */ + dev->caps.max_qp_dest_rdma = 1 << priv->qp_table.rdmarc_shift; + priv->qp_table.rdmarc_base = (u32) profile[i].start; + init_hca->rdmarc_base = profile[i].start; + init_hca->log_rd_per_qp = priv->qp_table.rdmarc_shift; + break; + case MLX4_RES_ALTC: + init_hca->altc_base = profile[i].start; + break; + case MLX4_RES_AUXC: + init_hca->auxc_base = profile[i].start; + break; + case MLX4_RES_SRQ: + dev->caps.num_srqs = profile[i].num; + init_hca->srqc_base = profile[i].start; + init_hca->log_num_srqs = profile[i].log_num; + break; + case MLX4_RES_CQ: + dev->caps.num_cqs = profile[i].num; + init_hca->cqc_base = profile[i].start; + init_hca->log_num_cqs = profile[i].log_num; + break; + case MLX4_RES_EQ: + if (dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_SYS_EQS) { + init_hca->log_num_eqs = 0x1f; + init_hca->eqc_base = profile[i].start; + init_hca->num_sys_eqs = dev_cap->num_sys_eqs; + } else { + dev->caps.num_eqs = roundup_pow_of_two( + min_t(unsigned, + dev_cap->max_eqs, + MAX_MSIX)); + init_hca->eqc_base = profile[i].start; + init_hca->log_num_eqs = ilog2(dev->caps.num_eqs); + } + break; + case MLX4_RES_DMPT: + dev->caps.num_mpts = profile[i].num; + priv->mr_table.mpt_base = profile[i].start; + init_hca->dmpt_base = profile[i].start; + init_hca->log_mpt_sz = profile[i].log_num; + break; + case MLX4_RES_CMPT: + init_hca->cmpt_base = profile[i].start; + break; + case MLX4_RES_MTT: + dev->caps.num_mtts = profile[i].num; + priv->mr_table.mtt_base = profile[i].start; + init_hca->mtt_base = profile[i].start; + break; + case MLX4_RES_MCG: + init_hca->mc_base = profile[i].start; + init_hca->log_mc_entry_sz = + ilog2(mlx4_get_mgm_entry_size(dev)); + init_hca->log_mc_table_sz = profile[i].log_num; + if (dev->caps.steering_mode == + MLX4_STEERING_MODE_DEVICE_MANAGED) { + dev->caps.num_mgms = profile[i].num; + } else { + init_hca->log_mc_hash_sz = + profile[i].log_num - 1; + dev->caps.num_mgms = profile[i].num >> 1; + dev->caps.num_amgms = profile[i].num >> 1; + } + break; + default: + break; + } + } + + /* + * PDs don't take any HCA memory, but we assign them as part + * of the HCA profile anyway. + */ + dev->caps.num_pds = MLX4_NUM_PDS; + + kfree(profile); + return total_size; +} diff --git a/drivers/net/ethernet/mellanox/mlx4/qp.c b/drivers/net/ethernet/mellanox/mlx4/qp.c new file mode 100644 index 000000000..48cfaa7ea --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx4/qp.c @@ -0,0 +1,966 @@ +/* + * Copyright (c) 2004 Topspin Communications. All rights reserved. + * Copyright (c) 2005, 2006, 2007 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2005, 2006, 2007, 2008 Mellanox Technologies. All rights reserved. + * Copyright (c) 2004 Voltaire, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/gfp.h> +#include <linux/export.h> + +#include <linux/mlx4/cmd.h> +#include <linux/mlx4/qp.h> + +#include "mlx4.h" +#include "icm.h" + +/* QP to support BF should have bits 6,7 cleared */ +#define MLX4_BF_QP_SKIP_MASK 0xc0 +#define MLX4_MAX_BF_QP_RANGE 0x40 + +void mlx4_qp_event(struct mlx4_dev *dev, u32 qpn, int event_type) +{ + struct mlx4_qp_table *qp_table = &mlx4_priv(dev)->qp_table; + struct mlx4_qp *qp; + + spin_lock(&qp_table->lock); + + qp = __mlx4_qp_lookup(dev, qpn); + if (qp) + refcount_inc(&qp->refcount); + + spin_unlock(&qp_table->lock); + + if (!qp) { + mlx4_dbg(dev, "Async event for none existent QP %08x\n", qpn); + return; + } + + qp->event(qp, event_type); + + if (refcount_dec_and_test(&qp->refcount)) + complete(&qp->free); +} + +/* used for INIT/CLOSE port logic */ +static int is_master_qp0(struct mlx4_dev *dev, struct mlx4_qp *qp, int *real_qp0, int *proxy_qp0) +{ + /* this procedure is called after we already know we are on the master */ + /* qp0 is either the proxy qp0, or the real qp0 */ + u32 pf_proxy_offset = dev->phys_caps.base_proxy_sqpn + 8 * mlx4_master_func_num(dev); + *proxy_qp0 = qp->qpn >= pf_proxy_offset && qp->qpn <= pf_proxy_offset + 1; + + *real_qp0 = qp->qpn >= dev->phys_caps.base_sqpn && + qp->qpn <= dev->phys_caps.base_sqpn + 1; + + return *real_qp0 || *proxy_qp0; +} + +static int __mlx4_qp_modify(struct mlx4_dev *dev, struct mlx4_mtt *mtt, + enum mlx4_qp_state cur_state, enum mlx4_qp_state new_state, + struct mlx4_qp_context *context, + enum mlx4_qp_optpar optpar, + int sqd_event, struct mlx4_qp *qp, int native) +{ + static const u16 op[MLX4_QP_NUM_STATE][MLX4_QP_NUM_STATE] = { + [MLX4_QP_STATE_RST] = { + [MLX4_QP_STATE_RST] = MLX4_CMD_2RST_QP, + [MLX4_QP_STATE_ERR] = MLX4_CMD_2ERR_QP, + [MLX4_QP_STATE_INIT] = MLX4_CMD_RST2INIT_QP, + }, + [MLX4_QP_STATE_INIT] = { + [MLX4_QP_STATE_RST] = MLX4_CMD_2RST_QP, + [MLX4_QP_STATE_ERR] = MLX4_CMD_2ERR_QP, + [MLX4_QP_STATE_INIT] = MLX4_CMD_INIT2INIT_QP, + [MLX4_QP_STATE_RTR] = MLX4_CMD_INIT2RTR_QP, + }, + [MLX4_QP_STATE_RTR] = { + [MLX4_QP_STATE_RST] = MLX4_CMD_2RST_QP, + [MLX4_QP_STATE_ERR] = MLX4_CMD_2ERR_QP, + [MLX4_QP_STATE_RTS] = MLX4_CMD_RTR2RTS_QP, + }, + [MLX4_QP_STATE_RTS] = { + [MLX4_QP_STATE_RST] = MLX4_CMD_2RST_QP, + [MLX4_QP_STATE_ERR] = MLX4_CMD_2ERR_QP, + [MLX4_QP_STATE_RTS] = MLX4_CMD_RTS2RTS_QP, + [MLX4_QP_STATE_SQD] = MLX4_CMD_RTS2SQD_QP, + }, + [MLX4_QP_STATE_SQD] = { + [MLX4_QP_STATE_RST] = MLX4_CMD_2RST_QP, + [MLX4_QP_STATE_ERR] = MLX4_CMD_2ERR_QP, + [MLX4_QP_STATE_RTS] = MLX4_CMD_SQD2RTS_QP, + [MLX4_QP_STATE_SQD] = MLX4_CMD_SQD2SQD_QP, + }, + [MLX4_QP_STATE_SQER] = { + [MLX4_QP_STATE_RST] = MLX4_CMD_2RST_QP, + [MLX4_QP_STATE_ERR] = MLX4_CMD_2ERR_QP, + [MLX4_QP_STATE_RTS] = MLX4_CMD_SQERR2RTS_QP, + }, + [MLX4_QP_STATE_ERR] = { + [MLX4_QP_STATE_RST] = MLX4_CMD_2RST_QP, + [MLX4_QP_STATE_ERR] = MLX4_CMD_2ERR_QP, + } + }; + + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_cmd_mailbox *mailbox; + int ret = 0; + int real_qp0 = 0; + int proxy_qp0 = 0; + u8 port; + + if (cur_state >= MLX4_QP_NUM_STATE || new_state >= MLX4_QP_NUM_STATE || + !op[cur_state][new_state]) + return -EINVAL; + + if (op[cur_state][new_state] == MLX4_CMD_2RST_QP) { + ret = mlx4_cmd(dev, 0, qp->qpn, 2, + MLX4_CMD_2RST_QP, MLX4_CMD_TIME_CLASS_A, native); + if (mlx4_is_master(dev) && cur_state != MLX4_QP_STATE_ERR && + cur_state != MLX4_QP_STATE_RST && + is_master_qp0(dev, qp, &real_qp0, &proxy_qp0)) { + port = (qp->qpn & 1) + 1; + if (proxy_qp0) + priv->mfunc.master.qp0_state[port].proxy_qp0_active = 0; + else + priv->mfunc.master.qp0_state[port].qp0_active = 0; + } + return ret; + } + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + + if (cur_state == MLX4_QP_STATE_RST && new_state == MLX4_QP_STATE_INIT) { + u64 mtt_addr = mlx4_mtt_addr(dev, mtt); + context->mtt_base_addr_h = mtt_addr >> 32; + context->mtt_base_addr_l = cpu_to_be32(mtt_addr & 0xffffffff); + context->log_page_size = mtt->page_shift - MLX4_ICM_PAGE_SHIFT; + } + + if ((cur_state == MLX4_QP_STATE_RTR) && + (new_state == MLX4_QP_STATE_RTS) && + dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2) + context->roce_entropy = + cpu_to_be16(mlx4_qp_roce_entropy(dev, qp->qpn)); + + *(__be32 *) mailbox->buf = cpu_to_be32(optpar); + memcpy(mailbox->buf + 8, context, sizeof(*context)); + + ((struct mlx4_qp_context *) (mailbox->buf + 8))->local_qpn = + cpu_to_be32(qp->qpn); + + ret = mlx4_cmd(dev, mailbox->dma, + qp->qpn | (!!sqd_event << 31), + new_state == MLX4_QP_STATE_RST ? 2 : 0, + op[cur_state][new_state], MLX4_CMD_TIME_CLASS_C, native); + + if (mlx4_is_master(dev) && is_master_qp0(dev, qp, &real_qp0, &proxy_qp0)) { + port = (qp->qpn & 1) + 1; + if (cur_state != MLX4_QP_STATE_ERR && + cur_state != MLX4_QP_STATE_RST && + new_state == MLX4_QP_STATE_ERR) { + if (proxy_qp0) + priv->mfunc.master.qp0_state[port].proxy_qp0_active = 0; + else + priv->mfunc.master.qp0_state[port].qp0_active = 0; + } else if (new_state == MLX4_QP_STATE_RTR) { + if (proxy_qp0) + priv->mfunc.master.qp0_state[port].proxy_qp0_active = 1; + else + priv->mfunc.master.qp0_state[port].qp0_active = 1; + } + } + + mlx4_free_cmd_mailbox(dev, mailbox); + return ret; +} + +int mlx4_qp_modify(struct mlx4_dev *dev, struct mlx4_mtt *mtt, + enum mlx4_qp_state cur_state, enum mlx4_qp_state new_state, + struct mlx4_qp_context *context, + enum mlx4_qp_optpar optpar, + int sqd_event, struct mlx4_qp *qp) +{ + return __mlx4_qp_modify(dev, mtt, cur_state, new_state, context, + optpar, sqd_event, qp, 0); +} +EXPORT_SYMBOL_GPL(mlx4_qp_modify); + +int __mlx4_qp_reserve_range(struct mlx4_dev *dev, int cnt, int align, + int *base, u8 flags) +{ + u32 uid; + int bf_qp = !!(flags & (u8)MLX4_RESERVE_ETH_BF_QP); + + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_qp_table *qp_table = &priv->qp_table; + + if (cnt > MLX4_MAX_BF_QP_RANGE && bf_qp) + return -ENOMEM; + + uid = MLX4_QP_TABLE_ZONE_GENERAL; + if (flags & (u8)MLX4_RESERVE_A0_QP) { + if (bf_qp) + uid = MLX4_QP_TABLE_ZONE_RAW_ETH; + else + uid = MLX4_QP_TABLE_ZONE_RSS; + } + + *base = mlx4_zone_alloc_entries(qp_table->zones, uid, cnt, align, + bf_qp ? MLX4_BF_QP_SKIP_MASK : 0, NULL); + if (*base == -1) + return -ENOMEM; + + return 0; +} + +int mlx4_qp_reserve_range(struct mlx4_dev *dev, int cnt, int align, + int *base, u8 flags, u8 usage) +{ + u32 in_modifier = RES_QP | (((u32)usage & 3) << 30); + u64 in_param = 0; + u64 out_param; + int err; + + /* Turn off all unsupported QP allocation flags */ + flags &= dev->caps.alloc_res_qp_mask; + + if (mlx4_is_mfunc(dev)) { + set_param_l(&in_param, (((u32)flags) << 24) | (u32)cnt); + set_param_h(&in_param, align); + err = mlx4_cmd_imm(dev, in_param, &out_param, + in_modifier, RES_OP_RESERVE, + MLX4_CMD_ALLOC_RES, + MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); + if (err) + return err; + + *base = get_param_l(&out_param); + return 0; + } + return __mlx4_qp_reserve_range(dev, cnt, align, base, flags); +} +EXPORT_SYMBOL_GPL(mlx4_qp_reserve_range); + +void __mlx4_qp_release_range(struct mlx4_dev *dev, int base_qpn, int cnt) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_qp_table *qp_table = &priv->qp_table; + + if (mlx4_is_qp_reserved(dev, (u32) base_qpn)) + return; + mlx4_zone_free_entries_unique(qp_table->zones, base_qpn, cnt); +} + +void mlx4_qp_release_range(struct mlx4_dev *dev, int base_qpn, int cnt) +{ + u64 in_param = 0; + int err; + + if (!cnt) + return; + + if (mlx4_is_mfunc(dev)) { + set_param_l(&in_param, base_qpn); + set_param_h(&in_param, cnt); + err = mlx4_cmd(dev, in_param, RES_QP, RES_OP_RESERVE, + MLX4_CMD_FREE_RES, + MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); + if (err) { + mlx4_warn(dev, "Failed to release qp range base:%d cnt:%d\n", + base_qpn, cnt); + } + } else + __mlx4_qp_release_range(dev, base_qpn, cnt); +} +EXPORT_SYMBOL_GPL(mlx4_qp_release_range); + +int __mlx4_qp_alloc_icm(struct mlx4_dev *dev, int qpn) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_qp_table *qp_table = &priv->qp_table; + int err; + + err = mlx4_table_get(dev, &qp_table->qp_table, qpn); + if (err) + goto err_out; + + err = mlx4_table_get(dev, &qp_table->auxc_table, qpn); + if (err) + goto err_put_qp; + + err = mlx4_table_get(dev, &qp_table->altc_table, qpn); + if (err) + goto err_put_auxc; + + err = mlx4_table_get(dev, &qp_table->rdmarc_table, qpn); + if (err) + goto err_put_altc; + + err = mlx4_table_get(dev, &qp_table->cmpt_table, qpn); + if (err) + goto err_put_rdmarc; + + return 0; + +err_put_rdmarc: + mlx4_table_put(dev, &qp_table->rdmarc_table, qpn); + +err_put_altc: + mlx4_table_put(dev, &qp_table->altc_table, qpn); + +err_put_auxc: + mlx4_table_put(dev, &qp_table->auxc_table, qpn); + +err_put_qp: + mlx4_table_put(dev, &qp_table->qp_table, qpn); + +err_out: + return err; +} + +static int mlx4_qp_alloc_icm(struct mlx4_dev *dev, int qpn) +{ + u64 param = 0; + + if (mlx4_is_mfunc(dev)) { + set_param_l(¶m, qpn); + return mlx4_cmd_imm(dev, param, ¶m, RES_QP, RES_OP_MAP_ICM, + MLX4_CMD_ALLOC_RES, MLX4_CMD_TIME_CLASS_A, + MLX4_CMD_WRAPPED); + } + return __mlx4_qp_alloc_icm(dev, qpn); +} + +void __mlx4_qp_free_icm(struct mlx4_dev *dev, int qpn) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_qp_table *qp_table = &priv->qp_table; + + mlx4_table_put(dev, &qp_table->cmpt_table, qpn); + mlx4_table_put(dev, &qp_table->rdmarc_table, qpn); + mlx4_table_put(dev, &qp_table->altc_table, qpn); + mlx4_table_put(dev, &qp_table->auxc_table, qpn); + mlx4_table_put(dev, &qp_table->qp_table, qpn); +} + +static void mlx4_qp_free_icm(struct mlx4_dev *dev, int qpn) +{ + u64 in_param = 0; + + if (mlx4_is_mfunc(dev)) { + set_param_l(&in_param, qpn); + if (mlx4_cmd(dev, in_param, RES_QP, RES_OP_MAP_ICM, + MLX4_CMD_FREE_RES, MLX4_CMD_TIME_CLASS_A, + MLX4_CMD_WRAPPED)) + mlx4_warn(dev, "Failed to free icm of qp:%d\n", qpn); + } else + __mlx4_qp_free_icm(dev, qpn); +} + +struct mlx4_qp *mlx4_qp_lookup(struct mlx4_dev *dev, u32 qpn) +{ + struct mlx4_qp_table *qp_table = &mlx4_priv(dev)->qp_table; + struct mlx4_qp *qp; + + spin_lock_irq(&qp_table->lock); + + qp = __mlx4_qp_lookup(dev, qpn); + + spin_unlock_irq(&qp_table->lock); + return qp; +} + +int mlx4_qp_alloc(struct mlx4_dev *dev, int qpn, struct mlx4_qp *qp) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_qp_table *qp_table = &priv->qp_table; + int err; + + if (!qpn) + return -EINVAL; + + qp->qpn = qpn; + + err = mlx4_qp_alloc_icm(dev, qpn); + if (err) + return err; + + spin_lock_irq(&qp_table->lock); + err = radix_tree_insert(&dev->qp_table_tree, qp->qpn & + (dev->caps.num_qps - 1), qp); + spin_unlock_irq(&qp_table->lock); + if (err) + goto err_icm; + + refcount_set(&qp->refcount, 1); + init_completion(&qp->free); + + return 0; + +err_icm: + mlx4_qp_free_icm(dev, qpn); + return err; +} + +EXPORT_SYMBOL_GPL(mlx4_qp_alloc); + +int mlx4_update_qp(struct mlx4_dev *dev, u32 qpn, + enum mlx4_update_qp_attr attr, + struct mlx4_update_qp_params *params) +{ + struct mlx4_cmd_mailbox *mailbox; + struct mlx4_update_qp_context *cmd; + u64 pri_addr_path_mask = 0; + u64 qp_mask = 0; + int err = 0; + + if (!attr || (attr & ~MLX4_UPDATE_QP_SUPPORTED_ATTRS)) + return -EINVAL; + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + + cmd = (struct mlx4_update_qp_context *)mailbox->buf; + + if (attr & MLX4_UPDATE_QP_SMAC) { + pri_addr_path_mask |= 1ULL << MLX4_UPD_QP_PATH_MASK_MAC_INDEX; + cmd->qp_context.pri_path.grh_mylmc = params->smac_index; + } + + if (attr & MLX4_UPDATE_QP_ETH_SRC_CHECK_MC_LB) { + if (!(dev->caps.flags2 + & MLX4_DEV_CAP_FLAG2_UPDATE_QP_SRC_CHECK_LB)) { + mlx4_warn(dev, + "Trying to set src check LB, but it isn't supported\n"); + err = -EOPNOTSUPP; + goto out; + } + pri_addr_path_mask |= + 1ULL << MLX4_UPD_QP_PATH_MASK_ETH_SRC_CHECK_MC_LB; + if (params->flags & + MLX4_UPDATE_QP_PARAMS_FLAGS_ETH_CHECK_MC_LB) { + cmd->qp_context.pri_path.fl |= + MLX4_FL_ETH_SRC_CHECK_MC_LB; + } + } + + if (attr & MLX4_UPDATE_QP_VSD) { + qp_mask |= 1ULL << MLX4_UPD_QP_MASK_VSD; + if (params->flags & MLX4_UPDATE_QP_PARAMS_FLAGS_VSD_ENABLE) + cmd->qp_context.param3 |= cpu_to_be32(MLX4_STRIP_VLAN); + } + + if (attr & MLX4_UPDATE_QP_RATE_LIMIT) { + qp_mask |= 1ULL << MLX4_UPD_QP_MASK_RATE_LIMIT; + cmd->qp_context.rate_limit_params = cpu_to_be16((params->rate_unit << 14) | params->rate_val); + } + + if (attr & MLX4_UPDATE_QP_QOS_VPORT) { + if (!(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_QOS_VPP)) { + mlx4_warn(dev, "Granular QoS per VF is not enabled\n"); + err = -EOPNOTSUPP; + goto out; + } + + qp_mask |= 1ULL << MLX4_UPD_QP_MASK_QOS_VPP; + cmd->qp_context.qos_vport = params->qos_vport; + } + + cmd->primary_addr_path_mask = cpu_to_be64(pri_addr_path_mask); + cmd->qp_mask = cpu_to_be64(qp_mask); + + err = mlx4_cmd(dev, mailbox->dma, qpn & 0xffffff, 0, + MLX4_CMD_UPDATE_QP, MLX4_CMD_TIME_CLASS_A, + MLX4_CMD_NATIVE); +out: + mlx4_free_cmd_mailbox(dev, mailbox); + return err; +} +EXPORT_SYMBOL_GPL(mlx4_update_qp); + +void mlx4_qp_remove(struct mlx4_dev *dev, struct mlx4_qp *qp) +{ + struct mlx4_qp_table *qp_table = &mlx4_priv(dev)->qp_table; + unsigned long flags; + + spin_lock_irqsave(&qp_table->lock, flags); + radix_tree_delete(&dev->qp_table_tree, qp->qpn & (dev->caps.num_qps - 1)); + spin_unlock_irqrestore(&qp_table->lock, flags); +} +EXPORT_SYMBOL_GPL(mlx4_qp_remove); + +void mlx4_qp_free(struct mlx4_dev *dev, struct mlx4_qp *qp) +{ + if (refcount_dec_and_test(&qp->refcount)) + complete(&qp->free); + wait_for_completion(&qp->free); + + mlx4_qp_free_icm(dev, qp->qpn); +} +EXPORT_SYMBOL_GPL(mlx4_qp_free); + +static int mlx4_CONF_SPECIAL_QP(struct mlx4_dev *dev, u32 base_qpn) +{ + return mlx4_cmd(dev, 0, base_qpn, 0, MLX4_CMD_CONF_SPECIAL_QP, + MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE); +} + +#define MLX4_QP_TABLE_RSS_ETH_PRIORITY 2 +#define MLX4_QP_TABLE_RAW_ETH_PRIORITY 1 +#define MLX4_QP_TABLE_RAW_ETH_SIZE 256 + +static int mlx4_create_zones(struct mlx4_dev *dev, + u32 reserved_bottom_general, + u32 reserved_top_general, + u32 reserved_bottom_rss, + u32 start_offset_rss, + u32 max_table_offset) +{ + struct mlx4_qp_table *qp_table = &mlx4_priv(dev)->qp_table; + struct mlx4_bitmap (*bitmap)[MLX4_QP_TABLE_ZONE_NUM] = NULL; + int bitmap_initialized = 0; + u32 last_offset; + int k; + int err; + + qp_table->zones = mlx4_zone_allocator_create(MLX4_ZONE_ALLOC_FLAGS_NO_OVERLAP); + + if (NULL == qp_table->zones) + return -ENOMEM; + + bitmap = kmalloc(sizeof(*bitmap), GFP_KERNEL); + + if (NULL == bitmap) { + err = -ENOMEM; + goto free_zone; + } + + err = mlx4_bitmap_init(*bitmap + MLX4_QP_TABLE_ZONE_GENERAL, dev->caps.num_qps, + (1 << 23) - 1, reserved_bottom_general, + reserved_top_general); + + if (err) + goto free_bitmap; + + ++bitmap_initialized; + + err = mlx4_zone_add_one(qp_table->zones, *bitmap + MLX4_QP_TABLE_ZONE_GENERAL, + MLX4_ZONE_FALLBACK_TO_HIGHER_PRIO | + MLX4_ZONE_USE_RR, 0, + 0, qp_table->zones_uids + MLX4_QP_TABLE_ZONE_GENERAL); + + if (err) + goto free_bitmap; + + err = mlx4_bitmap_init(*bitmap + MLX4_QP_TABLE_ZONE_RSS, + reserved_bottom_rss, + reserved_bottom_rss - 1, + dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW], + reserved_bottom_rss - start_offset_rss); + + if (err) + goto free_bitmap; + + ++bitmap_initialized; + + err = mlx4_zone_add_one(qp_table->zones, *bitmap + MLX4_QP_TABLE_ZONE_RSS, + MLX4_ZONE_ALLOW_ALLOC_FROM_LOWER_PRIO | + MLX4_ZONE_ALLOW_ALLOC_FROM_EQ_PRIO | + MLX4_ZONE_USE_RR, MLX4_QP_TABLE_RSS_ETH_PRIORITY, + 0, qp_table->zones_uids + MLX4_QP_TABLE_ZONE_RSS); + + if (err) + goto free_bitmap; + + last_offset = dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW]; + /* We have a single zone for the A0 steering QPs area of the FW. This area + * needs to be split into subareas. One set of subareas is for RSS QPs + * (in which qp number bits 6 and/or 7 are set); the other set of subareas + * is for RAW_ETH QPs, which require that both bits 6 and 7 are zero. + * Currently, the values returned by the FW (A0 steering area starting qp number + * and A0 steering area size) are such that there are only two subareas -- one + * for RSS and one for RAW_ETH. + */ + for (k = MLX4_QP_TABLE_ZONE_RSS + 1; k < sizeof(*bitmap)/sizeof((*bitmap)[0]); + k++) { + int size; + u32 offset = start_offset_rss; + u32 bf_mask; + u32 requested_size; + + /* Assuming MLX4_BF_QP_SKIP_MASK is consecutive ones, this calculates + * a mask of all LSB bits set until (and not including) the first + * set bit of MLX4_BF_QP_SKIP_MASK. For example, if MLX4_BF_QP_SKIP_MASK + * is 0xc0, bf_mask will be 0x3f. + */ + bf_mask = (MLX4_BF_QP_SKIP_MASK & ~(MLX4_BF_QP_SKIP_MASK - 1)) - 1; + requested_size = min((u32)MLX4_QP_TABLE_RAW_ETH_SIZE, bf_mask + 1); + + if (((last_offset & MLX4_BF_QP_SKIP_MASK) && + ((int)(max_table_offset - last_offset)) >= + roundup_pow_of_two(MLX4_BF_QP_SKIP_MASK)) || + (!(last_offset & MLX4_BF_QP_SKIP_MASK) && + !((last_offset + requested_size - 1) & + MLX4_BF_QP_SKIP_MASK))) + size = requested_size; + else { + u32 candidate_offset = + (last_offset | MLX4_BF_QP_SKIP_MASK | bf_mask) + 1; + + if (last_offset & MLX4_BF_QP_SKIP_MASK) + last_offset = candidate_offset; + + /* From this point, the BF bits are 0 */ + + if (last_offset > max_table_offset) { + /* need to skip */ + size = -1; + } else { + size = min3(max_table_offset - last_offset, + bf_mask - (last_offset & bf_mask), + requested_size); + if (size < requested_size) { + int candidate_size; + + candidate_size = min3( + max_table_offset - candidate_offset, + bf_mask - (last_offset & bf_mask), + requested_size); + + /* We will not take this path if last_offset was + * already set above to candidate_offset + */ + if (candidate_size > size) { + last_offset = candidate_offset; + size = candidate_size; + } + } + } + } + + if (size > 0) { + /* mlx4_bitmap_alloc_range will find a contiguous range of "size" + * QPs in which both bits 6 and 7 are zero, because we pass it the + * MLX4_BF_SKIP_MASK). + */ + offset = mlx4_bitmap_alloc_range( + *bitmap + MLX4_QP_TABLE_ZONE_RSS, + size, 1, + MLX4_BF_QP_SKIP_MASK); + + if (offset == (u32)-1) { + err = -ENOMEM; + break; + } + + last_offset = offset + size; + + err = mlx4_bitmap_init(*bitmap + k, roundup_pow_of_two(size), + roundup_pow_of_two(size) - 1, 0, + roundup_pow_of_two(size) - size); + } else { + /* Add an empty bitmap, we'll allocate from different zones (since + * at least one is reserved) + */ + err = mlx4_bitmap_init(*bitmap + k, 1, + MLX4_QP_TABLE_RAW_ETH_SIZE - 1, 0, + 0); + if (!err) + mlx4_bitmap_alloc_range(*bitmap + k, 1, 1, 0); + } + + if (err) + break; + + ++bitmap_initialized; + + err = mlx4_zone_add_one(qp_table->zones, *bitmap + k, + MLX4_ZONE_ALLOW_ALLOC_FROM_LOWER_PRIO | + MLX4_ZONE_ALLOW_ALLOC_FROM_EQ_PRIO | + MLX4_ZONE_USE_RR, MLX4_QP_TABLE_RAW_ETH_PRIORITY, + offset, qp_table->zones_uids + k); + + if (err) + break; + } + + if (err) + goto free_bitmap; + + qp_table->bitmap_gen = *bitmap; + + return err; + +free_bitmap: + for (k = 0; k < bitmap_initialized; k++) + mlx4_bitmap_cleanup(*bitmap + k); + kfree(bitmap); +free_zone: + mlx4_zone_allocator_destroy(qp_table->zones); + return err; +} + +static void mlx4_cleanup_qp_zones(struct mlx4_dev *dev) +{ + struct mlx4_qp_table *qp_table = &mlx4_priv(dev)->qp_table; + + if (qp_table->zones) { + int i; + + for (i = 0; + i < ARRAY_SIZE(qp_table->zones_uids); + i++) { + struct mlx4_bitmap *bitmap = + mlx4_zone_get_bitmap(qp_table->zones, + qp_table->zones_uids[i]); + + mlx4_zone_remove_one(qp_table->zones, qp_table->zones_uids[i]); + if (NULL == bitmap) + continue; + + mlx4_bitmap_cleanup(bitmap); + } + mlx4_zone_allocator_destroy(qp_table->zones); + kfree(qp_table->bitmap_gen); + qp_table->bitmap_gen = NULL; + qp_table->zones = NULL; + } +} + +int mlx4_init_qp_table(struct mlx4_dev *dev) +{ + struct mlx4_qp_table *qp_table = &mlx4_priv(dev)->qp_table; + int err; + int reserved_from_top = 0; + int reserved_from_bot; + int k; + int fixed_reserved_from_bot_rv = 0; + int bottom_reserved_for_rss_bitmap; + u32 max_table_offset = dev->caps.dmfs_high_rate_qpn_base + + dev->caps.dmfs_high_rate_qpn_range; + + spin_lock_init(&qp_table->lock); + INIT_RADIX_TREE(&dev->qp_table_tree, GFP_ATOMIC); + if (mlx4_is_slave(dev)) + return 0; + + /* We reserve 2 extra QPs per port for the special QPs. The + * block of special QPs must be aligned to a multiple of 8, so + * round up. + * + * We also reserve the MSB of the 24-bit QP number to indicate + * that a QP is an XRC QP. + */ + for (k = 0; k <= MLX4_QP_REGION_BOTTOM; k++) + fixed_reserved_from_bot_rv += dev->caps.reserved_qps_cnt[k]; + + if (fixed_reserved_from_bot_rv < max_table_offset) + fixed_reserved_from_bot_rv = max_table_offset; + + /* We reserve at least 1 extra for bitmaps that we don't have enough space for*/ + bottom_reserved_for_rss_bitmap = + roundup_pow_of_two(fixed_reserved_from_bot_rv + 1); + dev->phys_caps.base_sqpn = ALIGN(bottom_reserved_for_rss_bitmap, 8); + + { + int sort[MLX4_NUM_QP_REGION]; + int i, j; + int last_base = dev->caps.num_qps; + + for (i = 1; i < MLX4_NUM_QP_REGION; ++i) + sort[i] = i; + + for (i = MLX4_NUM_QP_REGION; i > MLX4_QP_REGION_BOTTOM; --i) { + for (j = MLX4_QP_REGION_BOTTOM + 2; j < i; ++j) { + if (dev->caps.reserved_qps_cnt[sort[j]] > + dev->caps.reserved_qps_cnt[sort[j - 1]]) + swap(sort[j], sort[j - 1]); + } + } + + for (i = MLX4_QP_REGION_BOTTOM + 1; i < MLX4_NUM_QP_REGION; ++i) { + last_base -= dev->caps.reserved_qps_cnt[sort[i]]; + dev->caps.reserved_qps_base[sort[i]] = last_base; + reserved_from_top += + dev->caps.reserved_qps_cnt[sort[i]]; + } + } + + /* Reserve 8 real SQPs in both native and SRIOV modes. + * In addition, in SRIOV mode, reserve 8 proxy SQPs per function + * (for all PFs and VFs), and 8 corresponding tunnel QPs. + * Each proxy SQP works opposite its own tunnel QP. + * + * The QPs are arranged as follows: + * a. 8 real SQPs + * b. All the proxy SQPs (8 per function) + * c. All the tunnel QPs (8 per function) + */ + reserved_from_bot = mlx4_num_reserved_sqps(dev); + if (reserved_from_bot + reserved_from_top > dev->caps.num_qps) { + mlx4_err(dev, "Number of reserved QPs is higher than number of QPs\n"); + return -EINVAL; + } + + err = mlx4_create_zones(dev, reserved_from_bot, reserved_from_bot, + bottom_reserved_for_rss_bitmap, + fixed_reserved_from_bot_rv, + max_table_offset); + + if (err) + return err; + + if (mlx4_is_mfunc(dev)) { + /* for PPF use */ + dev->phys_caps.base_proxy_sqpn = dev->phys_caps.base_sqpn + 8; + dev->phys_caps.base_tunnel_sqpn = dev->phys_caps.base_sqpn + 8 + 8 * MLX4_MFUNC_MAX; + + /* In mfunc, calculate proxy and tunnel qp offsets for the PF here, + * since the PF does not call mlx4_slave_caps */ + dev->caps.spec_qps = kcalloc(dev->caps.num_ports, + sizeof(*dev->caps.spec_qps), + GFP_KERNEL); + if (!dev->caps.spec_qps) { + err = -ENOMEM; + goto err_mem; + } + + for (k = 0; k < dev->caps.num_ports; k++) { + dev->caps.spec_qps[k].qp0_proxy = dev->phys_caps.base_proxy_sqpn + + 8 * mlx4_master_func_num(dev) + k; + dev->caps.spec_qps[k].qp0_tunnel = dev->caps.spec_qps[k].qp0_proxy + 8 * MLX4_MFUNC_MAX; + dev->caps.spec_qps[k].qp1_proxy = dev->phys_caps.base_proxy_sqpn + + 8 * mlx4_master_func_num(dev) + MLX4_MAX_PORTS + k; + dev->caps.spec_qps[k].qp1_tunnel = dev->caps.spec_qps[k].qp1_proxy + 8 * MLX4_MFUNC_MAX; + } + } + + + err = mlx4_CONF_SPECIAL_QP(dev, dev->phys_caps.base_sqpn); + if (err) + goto err_mem; + + return err; + +err_mem: + kfree(dev->caps.spec_qps); + dev->caps.spec_qps = NULL; + mlx4_cleanup_qp_zones(dev); + return err; +} + +void mlx4_cleanup_qp_table(struct mlx4_dev *dev) +{ + if (mlx4_is_slave(dev)) + return; + + mlx4_CONF_SPECIAL_QP(dev, 0); + + mlx4_cleanup_qp_zones(dev); +} + +int mlx4_qp_query(struct mlx4_dev *dev, struct mlx4_qp *qp, + struct mlx4_qp_context *context) +{ + struct mlx4_cmd_mailbox *mailbox; + int err; + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + + err = mlx4_cmd_box(dev, 0, mailbox->dma, qp->qpn, 0, + MLX4_CMD_QUERY_QP, MLX4_CMD_TIME_CLASS_A, + MLX4_CMD_WRAPPED); + if (!err) + memcpy(context, mailbox->buf + 8, sizeof(*context)); + + mlx4_free_cmd_mailbox(dev, mailbox); + return err; +} +EXPORT_SYMBOL_GPL(mlx4_qp_query); + +int mlx4_qp_to_ready(struct mlx4_dev *dev, struct mlx4_mtt *mtt, + struct mlx4_qp_context *context, + struct mlx4_qp *qp, enum mlx4_qp_state *qp_state) +{ + int err; + int i; + static const enum mlx4_qp_state states[] = { + MLX4_QP_STATE_RST, + MLX4_QP_STATE_INIT, + MLX4_QP_STATE_RTR, + MLX4_QP_STATE_RTS + }; + + for (i = 0; i < ARRAY_SIZE(states) - 1; i++) { + context->flags &= cpu_to_be32(~(0xf << 28)); + context->flags |= cpu_to_be32(states[i + 1] << 28); + if (states[i + 1] != MLX4_QP_STATE_RTR) + context->params2 &= ~cpu_to_be32(MLX4_QP_BIT_FPP); + err = mlx4_qp_modify(dev, mtt, states[i], states[i + 1], + context, 0, 0, qp); + if (err) { + mlx4_err(dev, "Failed to bring QP to state: %d with error: %d\n", + states[i + 1], err); + return err; + } + + *qp_state = states[i + 1]; + } + + return 0; +} +EXPORT_SYMBOL_GPL(mlx4_qp_to_ready); + +u16 mlx4_qp_roce_entropy(struct mlx4_dev *dev, u32 qpn) +{ + struct mlx4_qp_context context; + struct mlx4_qp qp; + int err; + + qp.qpn = qpn; + err = mlx4_qp_query(dev, &qp, &context); + if (!err) { + u32 dest_qpn = be32_to_cpu(context.remote_qpn) & 0xffffff; + u16 folded_dst = folded_qp(dest_qpn); + u16 folded_src = folded_qp(qpn); + + return (dest_qpn != qpn) ? + ((folded_dst ^ folded_src) | 0xC000) : + folded_src | 0xC000; + } + return 0xdead; +} diff --git a/drivers/net/ethernet/mellanox/mlx4/reset.c b/drivers/net/ethernet/mellanox/mlx4/reset.c new file mode 100644 index 000000000..0076d8858 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx4/reset.c @@ -0,0 +1,184 @@ +/* + * Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/errno.h> +#include <linux/pci.h> +#include <linux/delay.h> +#include <linux/slab.h> +#include <linux/jiffies.h> + +#include "mlx4.h" + +int mlx4_reset(struct mlx4_dev *dev) +{ + void __iomem *reset; + u32 *hca_header = NULL; + int pcie_cap; + u16 devctl; + u16 linkctl; + u16 vendor; + unsigned long end; + u32 sem; + int i; + int err = 0; + +#define MLX4_RESET_BASE 0xf0000 +#define MLX4_RESET_SIZE 0x400 +#define MLX4_SEM_OFFSET 0x3fc +#define MLX4_RESET_OFFSET 0x10 +#define MLX4_RESET_VALUE swab32(1) + +#define MLX4_SEM_TIMEOUT_JIFFIES (10 * HZ) +#define MLX4_RESET_TIMEOUT_JIFFIES (2 * HZ) + + /* + * Reset the chip. This is somewhat ugly because we have to + * save off the PCI header before reset and then restore it + * after the chip reboots. We skip config space offsets 22 + * and 23 since those have a special meaning. + */ + + /* Do we need to save off the full 4K PCI Express header?? */ + hca_header = kmalloc(256, GFP_KERNEL); + if (!hca_header) { + err = -ENOMEM; + mlx4_err(dev, "Couldn't allocate memory to save HCA PCI header, aborting\n"); + goto out; + } + + pcie_cap = pci_pcie_cap(dev->persist->pdev); + + for (i = 0; i < 64; ++i) { + if (i == 22 || i == 23) + continue; + if (pci_read_config_dword(dev->persist->pdev, i * 4, + hca_header + i)) { + err = -ENODEV; + mlx4_err(dev, "Couldn't save HCA PCI header, aborting\n"); + goto out; + } + } + + reset = ioremap(pci_resource_start(dev->persist->pdev, 0) + + MLX4_RESET_BASE, + MLX4_RESET_SIZE); + if (!reset) { + err = -ENOMEM; + mlx4_err(dev, "Couldn't map HCA reset register, aborting\n"); + goto out; + } + + /* grab HW semaphore to lock out flash updates */ + end = jiffies + MLX4_SEM_TIMEOUT_JIFFIES; + do { + sem = readl(reset + MLX4_SEM_OFFSET); + if (!sem) + break; + + msleep(1); + } while (time_before(jiffies, end)); + + if (sem) { + mlx4_err(dev, "Failed to obtain HW semaphore, aborting\n"); + err = -EAGAIN; + iounmap(reset); + goto out; + } + + /* actually hit reset */ + writel(MLX4_RESET_VALUE, reset + MLX4_RESET_OFFSET); + iounmap(reset); + + /* Docs say to wait one second before accessing device */ + msleep(1000); + + end = jiffies + MLX4_RESET_TIMEOUT_JIFFIES; + do { + if (!pci_read_config_word(dev->persist->pdev, PCI_VENDOR_ID, + &vendor) && vendor != 0xffff) + break; + + msleep(1); + } while (time_before(jiffies, end)); + + if (vendor == 0xffff) { + err = -ENODEV; + mlx4_err(dev, "PCI device did not come back after reset, aborting\n"); + goto out; + } + + /* Now restore the PCI headers */ + if (pcie_cap) { + devctl = hca_header[(pcie_cap + PCI_EXP_DEVCTL) / 4]; + if (pcie_capability_write_word(dev->persist->pdev, + PCI_EXP_DEVCTL, + devctl)) { + err = -ENODEV; + mlx4_err(dev, "Couldn't restore HCA PCI Express Device Control register, aborting\n"); + goto out; + } + linkctl = hca_header[(pcie_cap + PCI_EXP_LNKCTL) / 4]; + if (pcie_capability_write_word(dev->persist->pdev, + PCI_EXP_LNKCTL, + linkctl)) { + err = -ENODEV; + mlx4_err(dev, "Couldn't restore HCA PCI Express Link control register, aborting\n"); + goto out; + } + } + + for (i = 0; i < 16; ++i) { + if (i * 4 == PCI_COMMAND) + continue; + + if (pci_write_config_dword(dev->persist->pdev, i * 4, + hca_header[i])) { + err = -ENODEV; + mlx4_err(dev, "Couldn't restore HCA reg %x, aborting\n", + i); + goto out; + } + } + + if (pci_write_config_dword(dev->persist->pdev, PCI_COMMAND, + hca_header[PCI_COMMAND / 4])) { + err = -ENODEV; + mlx4_err(dev, "Couldn't restore HCA COMMAND, aborting\n"); + goto out; + } + +out: + kfree(hca_header); + + return err; +} diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c new file mode 100644 index 000000000..771b92019 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c @@ -0,0 +1,5423 @@ +/* + * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005, 2006, 2007, 2008 Mellanox Technologies. + * All rights reserved. + * Copyright (c) 2005, 2006, 2007 Cisco Systems, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/sched.h> +#include <linux/pci.h> +#include <linux/errno.h> +#include <linux/kernel.h> +#include <linux/io.h> +#include <linux/slab.h> +#include <linux/mlx4/cmd.h> +#include <linux/mlx4/qp.h> +#include <linux/if_ether.h> +#include <linux/etherdevice.h> + +#include "mlx4.h" +#include "fw.h" +#include "mlx4_stats.h" + +#define MLX4_MAC_VALID (1ull << 63) +#define MLX4_PF_COUNTERS_PER_PORT 2 +#define MLX4_VF_COUNTERS_PER_PORT 1 + +struct mac_res { + struct list_head list; + u64 mac; + int ref_count; + u8 smac_index; + u8 port; +}; + +struct vlan_res { + struct list_head list; + u16 vlan; + int ref_count; + int vlan_index; + u8 port; +}; + +struct res_common { + struct list_head list; + struct rb_node node; + u64 res_id; + int owner; + int state; + int from_state; + int to_state; + int removing; + const char *func_name; +}; + +enum { + RES_ANY_BUSY = 1 +}; + +struct res_gid { + struct list_head list; + u8 gid[16]; + enum mlx4_protocol prot; + enum mlx4_steer_type steer; + u64 reg_id; +}; + +enum res_qp_states { + RES_QP_BUSY = RES_ANY_BUSY, + + /* QP number was allocated */ + RES_QP_RESERVED, + + /* ICM memory for QP context was mapped */ + RES_QP_MAPPED, + + /* QP is in hw ownership */ + RES_QP_HW +}; + +struct res_qp { + struct res_common com; + struct res_mtt *mtt; + struct res_cq *rcq; + struct res_cq *scq; + struct res_srq *srq; + struct list_head mcg_list; + spinlock_t mcg_spl; + int local_qpn; + atomic_t ref_count; + u32 qpc_flags; + /* saved qp params before VST enforcement in order to restore on VGT */ + u8 sched_queue; + __be32 param3; + u8 vlan_control; + u8 fvl_rx; + u8 pri_path_fl; + u8 vlan_index; + u8 feup; +}; + +enum res_mtt_states { + RES_MTT_BUSY = RES_ANY_BUSY, + RES_MTT_ALLOCATED, +}; + +static inline const char *mtt_states_str(enum res_mtt_states state) +{ + switch (state) { + case RES_MTT_BUSY: return "RES_MTT_BUSY"; + case RES_MTT_ALLOCATED: return "RES_MTT_ALLOCATED"; + default: return "Unknown"; + } +} + +struct res_mtt { + struct res_common com; + int order; + atomic_t ref_count; +}; + +enum res_mpt_states { + RES_MPT_BUSY = RES_ANY_BUSY, + RES_MPT_RESERVED, + RES_MPT_MAPPED, + RES_MPT_HW, +}; + +struct res_mpt { + struct res_common com; + struct res_mtt *mtt; + int key; +}; + +enum res_eq_states { + RES_EQ_BUSY = RES_ANY_BUSY, + RES_EQ_RESERVED, + RES_EQ_HW, +}; + +struct res_eq { + struct res_common com; + struct res_mtt *mtt; +}; + +enum res_cq_states { + RES_CQ_BUSY = RES_ANY_BUSY, + RES_CQ_ALLOCATED, + RES_CQ_HW, +}; + +struct res_cq { + struct res_common com; + struct res_mtt *mtt; + atomic_t ref_count; +}; + +enum res_srq_states { + RES_SRQ_BUSY = RES_ANY_BUSY, + RES_SRQ_ALLOCATED, + RES_SRQ_HW, +}; + +struct res_srq { + struct res_common com; + struct res_mtt *mtt; + struct res_cq *cq; + atomic_t ref_count; +}; + +enum res_counter_states { + RES_COUNTER_BUSY = RES_ANY_BUSY, + RES_COUNTER_ALLOCATED, +}; + +struct res_counter { + struct res_common com; + int port; +}; + +enum res_xrcdn_states { + RES_XRCD_BUSY = RES_ANY_BUSY, + RES_XRCD_ALLOCATED, +}; + +struct res_xrcdn { + struct res_common com; + int port; +}; + +enum res_fs_rule_states { + RES_FS_RULE_BUSY = RES_ANY_BUSY, + RES_FS_RULE_ALLOCATED, +}; + +struct res_fs_rule { + struct res_common com; + int qpn; + /* VF DMFS mbox with port flipped */ + void *mirr_mbox; + /* > 0 --> apply mirror when getting into HA mode */ + /* = 0 --> un-apply mirror when getting out of HA mode */ + u32 mirr_mbox_size; + struct list_head mirr_list; + u64 mirr_rule_id; +}; + +static void *res_tracker_lookup(struct rb_root *root, u64 res_id) +{ + struct rb_node *node = root->rb_node; + + while (node) { + struct res_common *res = rb_entry(node, struct res_common, + node); + + if (res_id < res->res_id) + node = node->rb_left; + else if (res_id > res->res_id) + node = node->rb_right; + else + return res; + } + return NULL; +} + +static int res_tracker_insert(struct rb_root *root, struct res_common *res) +{ + struct rb_node **new = &(root->rb_node), *parent = NULL; + + /* Figure out where to put new node */ + while (*new) { + struct res_common *this = rb_entry(*new, struct res_common, + node); + + parent = *new; + if (res->res_id < this->res_id) + new = &((*new)->rb_left); + else if (res->res_id > this->res_id) + new = &((*new)->rb_right); + else + return -EEXIST; + } + + /* Add new node and rebalance tree. */ + rb_link_node(&res->node, parent, new); + rb_insert_color(&res->node, root); + + return 0; +} + +enum qp_transition { + QP_TRANS_INIT2RTR, + QP_TRANS_RTR2RTS, + QP_TRANS_RTS2RTS, + QP_TRANS_SQERR2RTS, + QP_TRANS_SQD2SQD, + QP_TRANS_SQD2RTS +}; + +/* For Debug uses */ +static const char *resource_str(enum mlx4_resource rt) +{ + switch (rt) { + case RES_QP: return "RES_QP"; + case RES_CQ: return "RES_CQ"; + case RES_SRQ: return "RES_SRQ"; + case RES_MPT: return "RES_MPT"; + case RES_MTT: return "RES_MTT"; + case RES_MAC: return "RES_MAC"; + case RES_VLAN: return "RES_VLAN"; + case RES_EQ: return "RES_EQ"; + case RES_COUNTER: return "RES_COUNTER"; + case RES_FS_RULE: return "RES_FS_RULE"; + case RES_XRCD: return "RES_XRCD"; + default: return "Unknown resource type !!!"; + } +} + +static void rem_slave_vlans(struct mlx4_dev *dev, int slave); +static inline int mlx4_grant_resource(struct mlx4_dev *dev, int slave, + enum mlx4_resource res_type, int count, + int port) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + struct resource_allocator *res_alloc = + &priv->mfunc.master.res_tracker.res_alloc[res_type]; + int err = -EDQUOT; + int allocated, free, reserved, guaranteed, from_free; + int from_rsvd; + + if (slave > dev->persist->num_vfs) + return -EINVAL; + + spin_lock(&res_alloc->alloc_lock); + allocated = (port > 0) ? + res_alloc->allocated[(port - 1) * + (dev->persist->num_vfs + 1) + slave] : + res_alloc->allocated[slave]; + free = (port > 0) ? res_alloc->res_port_free[port - 1] : + res_alloc->res_free; + reserved = (port > 0) ? res_alloc->res_port_rsvd[port - 1] : + res_alloc->res_reserved; + guaranteed = res_alloc->guaranteed[slave]; + + if (allocated + count > res_alloc->quota[slave]) { + mlx4_warn(dev, "VF %d port %d res %s: quota exceeded, count %d alloc %d quota %d\n", + slave, port, resource_str(res_type), count, + allocated, res_alloc->quota[slave]); + goto out; + } + + if (allocated + count <= guaranteed) { + err = 0; + from_rsvd = count; + } else { + /* portion may need to be obtained from free area */ + if (guaranteed - allocated > 0) + from_free = count - (guaranteed - allocated); + else + from_free = count; + + from_rsvd = count - from_free; + + if (free - from_free >= reserved) + err = 0; + else + mlx4_warn(dev, "VF %d port %d res %s: free pool empty, free %d from_free %d rsvd %d\n", + slave, port, resource_str(res_type), free, + from_free, reserved); + } + + if (!err) { + /* grant the request */ + if (port > 0) { + res_alloc->allocated[(port - 1) * + (dev->persist->num_vfs + 1) + slave] += count; + res_alloc->res_port_free[port - 1] -= count; + res_alloc->res_port_rsvd[port - 1] -= from_rsvd; + } else { + res_alloc->allocated[slave] += count; + res_alloc->res_free -= count; + res_alloc->res_reserved -= from_rsvd; + } + } + +out: + spin_unlock(&res_alloc->alloc_lock); + return err; +} + +static inline void mlx4_release_resource(struct mlx4_dev *dev, int slave, + enum mlx4_resource res_type, int count, + int port) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + struct resource_allocator *res_alloc = + &priv->mfunc.master.res_tracker.res_alloc[res_type]; + int allocated, guaranteed, from_rsvd; + + if (slave > dev->persist->num_vfs) + return; + + spin_lock(&res_alloc->alloc_lock); + + allocated = (port > 0) ? + res_alloc->allocated[(port - 1) * + (dev->persist->num_vfs + 1) + slave] : + res_alloc->allocated[slave]; + guaranteed = res_alloc->guaranteed[slave]; + + if (allocated - count >= guaranteed) { + from_rsvd = 0; + } else { + /* portion may need to be returned to reserved area */ + if (allocated - guaranteed > 0) + from_rsvd = count - (allocated - guaranteed); + else + from_rsvd = count; + } + + if (port > 0) { + res_alloc->allocated[(port - 1) * + (dev->persist->num_vfs + 1) + slave] -= count; + res_alloc->res_port_free[port - 1] += count; + res_alloc->res_port_rsvd[port - 1] += from_rsvd; + } else { + res_alloc->allocated[slave] -= count; + res_alloc->res_free += count; + res_alloc->res_reserved += from_rsvd; + } + + spin_unlock(&res_alloc->alloc_lock); + return; +} + +static inline void initialize_res_quotas(struct mlx4_dev *dev, + struct resource_allocator *res_alloc, + enum mlx4_resource res_type, + int vf, int num_instances) +{ + res_alloc->guaranteed[vf] = num_instances / + (2 * (dev->persist->num_vfs + 1)); + res_alloc->quota[vf] = (num_instances / 2) + res_alloc->guaranteed[vf]; + if (vf == mlx4_master_func_num(dev)) { + res_alloc->res_free = num_instances; + if (res_type == RES_MTT) { + /* reserved mtts will be taken out of the PF allocation */ + res_alloc->res_free += dev->caps.reserved_mtts; + res_alloc->guaranteed[vf] += dev->caps.reserved_mtts; + res_alloc->quota[vf] += dev->caps.reserved_mtts; + } + } +} + +void mlx4_init_quotas(struct mlx4_dev *dev) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + int pf; + + /* quotas for VFs are initialized in mlx4_slave_cap */ + if (mlx4_is_slave(dev)) + return; + + if (!mlx4_is_mfunc(dev)) { + dev->quotas.qp = dev->caps.num_qps - dev->caps.reserved_qps - + mlx4_num_reserved_sqps(dev); + dev->quotas.cq = dev->caps.num_cqs - dev->caps.reserved_cqs; + dev->quotas.srq = dev->caps.num_srqs - dev->caps.reserved_srqs; + dev->quotas.mtt = dev->caps.num_mtts - dev->caps.reserved_mtts; + dev->quotas.mpt = dev->caps.num_mpts - dev->caps.reserved_mrws; + return; + } + + pf = mlx4_master_func_num(dev); + dev->quotas.qp = + priv->mfunc.master.res_tracker.res_alloc[RES_QP].quota[pf]; + dev->quotas.cq = + priv->mfunc.master.res_tracker.res_alloc[RES_CQ].quota[pf]; + dev->quotas.srq = + priv->mfunc.master.res_tracker.res_alloc[RES_SRQ].quota[pf]; + dev->quotas.mtt = + priv->mfunc.master.res_tracker.res_alloc[RES_MTT].quota[pf]; + dev->quotas.mpt = + priv->mfunc.master.res_tracker.res_alloc[RES_MPT].quota[pf]; +} + +static int +mlx4_calc_res_counter_guaranteed(struct mlx4_dev *dev, + struct resource_allocator *res_alloc, + int vf) +{ + struct mlx4_active_ports actv_ports; + int ports, counters_guaranteed; + + /* For master, only allocate according to the number of phys ports */ + if (vf == mlx4_master_func_num(dev)) + return MLX4_PF_COUNTERS_PER_PORT * dev->caps.num_ports; + + /* calculate real number of ports for the VF */ + actv_ports = mlx4_get_active_ports(dev, vf); + ports = bitmap_weight(actv_ports.ports, dev->caps.num_ports); + counters_guaranteed = ports * MLX4_VF_COUNTERS_PER_PORT; + + /* If we do not have enough counters for this VF, do not + * allocate any for it. '-1' to reduce the sink counter. + */ + if ((res_alloc->res_reserved + counters_guaranteed) > + (dev->caps.max_counters - 1)) + return 0; + + return counters_guaranteed; +} + +int mlx4_init_resource_tracker(struct mlx4_dev *dev) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + int i, j; + int t; + + priv->mfunc.master.res_tracker.slave_list = + kcalloc(dev->num_slaves, sizeof(struct slave_list), + GFP_KERNEL); + if (!priv->mfunc.master.res_tracker.slave_list) + return -ENOMEM; + + for (i = 0 ; i < dev->num_slaves; i++) { + for (t = 0; t < MLX4_NUM_OF_RESOURCE_TYPE; ++t) + INIT_LIST_HEAD(&priv->mfunc.master.res_tracker. + slave_list[i].res_list[t]); + mutex_init(&priv->mfunc.master.res_tracker.slave_list[i].mutex); + } + + mlx4_dbg(dev, "Started init_resource_tracker: %ld slaves\n", + dev->num_slaves); + for (i = 0 ; i < MLX4_NUM_OF_RESOURCE_TYPE; i++) + priv->mfunc.master.res_tracker.res_tree[i] = RB_ROOT; + + for (i = 0; i < MLX4_NUM_OF_RESOURCE_TYPE; i++) { + struct resource_allocator *res_alloc = + &priv->mfunc.master.res_tracker.res_alloc[i]; + res_alloc->quota = kmalloc_array(dev->persist->num_vfs + 1, + sizeof(int), + GFP_KERNEL); + res_alloc->guaranteed = kmalloc_array(dev->persist->num_vfs + 1, + sizeof(int), + GFP_KERNEL); + if (i == RES_MAC || i == RES_VLAN) + res_alloc->allocated = + kcalloc(MLX4_MAX_PORTS * + (dev->persist->num_vfs + 1), + sizeof(int), GFP_KERNEL); + else + res_alloc->allocated = + kcalloc(dev->persist->num_vfs + 1, + sizeof(int), GFP_KERNEL); + /* Reduce the sink counter */ + if (i == RES_COUNTER) + res_alloc->res_free = dev->caps.max_counters - 1; + + if (!res_alloc->quota || !res_alloc->guaranteed || + !res_alloc->allocated) + goto no_mem_err; + + spin_lock_init(&res_alloc->alloc_lock); + for (t = 0; t < dev->persist->num_vfs + 1; t++) { + struct mlx4_active_ports actv_ports = + mlx4_get_active_ports(dev, t); + switch (i) { + case RES_QP: + initialize_res_quotas(dev, res_alloc, RES_QP, + t, dev->caps.num_qps - + dev->caps.reserved_qps - + mlx4_num_reserved_sqps(dev)); + break; + case RES_CQ: + initialize_res_quotas(dev, res_alloc, RES_CQ, + t, dev->caps.num_cqs - + dev->caps.reserved_cqs); + break; + case RES_SRQ: + initialize_res_quotas(dev, res_alloc, RES_SRQ, + t, dev->caps.num_srqs - + dev->caps.reserved_srqs); + break; + case RES_MPT: + initialize_res_quotas(dev, res_alloc, RES_MPT, + t, dev->caps.num_mpts - + dev->caps.reserved_mrws); + break; + case RES_MTT: + initialize_res_quotas(dev, res_alloc, RES_MTT, + t, dev->caps.num_mtts - + dev->caps.reserved_mtts); + break; + case RES_MAC: + if (t == mlx4_master_func_num(dev)) { + int max_vfs_pport = 0; + /* Calculate the max vfs per port for */ + /* both ports. */ + for (j = 0; j < dev->caps.num_ports; + j++) { + struct mlx4_slaves_pport slaves_pport = + mlx4_phys_to_slaves_pport(dev, j + 1); + unsigned current_slaves = + bitmap_weight(slaves_pport.slaves, + dev->caps.num_ports) - 1; + if (max_vfs_pport < current_slaves) + max_vfs_pport = + current_slaves; + } + res_alloc->quota[t] = + MLX4_MAX_MAC_NUM - + 2 * max_vfs_pport; + res_alloc->guaranteed[t] = 2; + for (j = 0; j < MLX4_MAX_PORTS; j++) + res_alloc->res_port_free[j] = + MLX4_MAX_MAC_NUM; + } else { + res_alloc->quota[t] = MLX4_MAX_MAC_NUM; + res_alloc->guaranteed[t] = 2; + } + break; + case RES_VLAN: + if (t == mlx4_master_func_num(dev)) { + res_alloc->quota[t] = MLX4_MAX_VLAN_NUM; + res_alloc->guaranteed[t] = MLX4_MAX_VLAN_NUM / 2; + for (j = 0; j < MLX4_MAX_PORTS; j++) + res_alloc->res_port_free[j] = + res_alloc->quota[t]; + } else { + res_alloc->quota[t] = MLX4_MAX_VLAN_NUM / 2; + res_alloc->guaranteed[t] = 0; + } + break; + case RES_COUNTER: + res_alloc->quota[t] = dev->caps.max_counters; + res_alloc->guaranteed[t] = + mlx4_calc_res_counter_guaranteed(dev, res_alloc, t); + break; + default: + break; + } + if (i == RES_MAC || i == RES_VLAN) { + for (j = 0; j < dev->caps.num_ports; j++) + if (test_bit(j, actv_ports.ports)) + res_alloc->res_port_rsvd[j] += + res_alloc->guaranteed[t]; + } else { + res_alloc->res_reserved += res_alloc->guaranteed[t]; + } + } + } + spin_lock_init(&priv->mfunc.master.res_tracker.lock); + return 0; + +no_mem_err: + for (i = 0; i < MLX4_NUM_OF_RESOURCE_TYPE; i++) { + kfree(priv->mfunc.master.res_tracker.res_alloc[i].allocated); + priv->mfunc.master.res_tracker.res_alloc[i].allocated = NULL; + kfree(priv->mfunc.master.res_tracker.res_alloc[i].guaranteed); + priv->mfunc.master.res_tracker.res_alloc[i].guaranteed = NULL; + kfree(priv->mfunc.master.res_tracker.res_alloc[i].quota); + priv->mfunc.master.res_tracker.res_alloc[i].quota = NULL; + } + return -ENOMEM; +} + +void mlx4_free_resource_tracker(struct mlx4_dev *dev, + enum mlx4_res_tracker_free_type type) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + int i; + + if (priv->mfunc.master.res_tracker.slave_list) { + if (type != RES_TR_FREE_STRUCTS_ONLY) { + for (i = 0; i < dev->num_slaves; i++) { + if (type == RES_TR_FREE_ALL || + dev->caps.function != i) + mlx4_delete_all_resources_for_slave(dev, i); + } + /* free master's vlans */ + i = dev->caps.function; + mlx4_reset_roce_gids(dev, i); + mutex_lock(&priv->mfunc.master.res_tracker.slave_list[i].mutex); + rem_slave_vlans(dev, i); + mutex_unlock(&priv->mfunc.master.res_tracker.slave_list[i].mutex); + } + + if (type != RES_TR_FREE_SLAVES_ONLY) { + for (i = 0; i < MLX4_NUM_OF_RESOURCE_TYPE; i++) { + kfree(priv->mfunc.master.res_tracker.res_alloc[i].allocated); + priv->mfunc.master.res_tracker.res_alloc[i].allocated = NULL; + kfree(priv->mfunc.master.res_tracker.res_alloc[i].guaranteed); + priv->mfunc.master.res_tracker.res_alloc[i].guaranteed = NULL; + kfree(priv->mfunc.master.res_tracker.res_alloc[i].quota); + priv->mfunc.master.res_tracker.res_alloc[i].quota = NULL; + } + kfree(priv->mfunc.master.res_tracker.slave_list); + priv->mfunc.master.res_tracker.slave_list = NULL; + } + } +} + +static void update_pkey_index(struct mlx4_dev *dev, int slave, + struct mlx4_cmd_mailbox *inbox) +{ + u8 sched = *(u8 *)(inbox->buf + 64); + u8 orig_index = *(u8 *)(inbox->buf + 35); + u8 new_index; + struct mlx4_priv *priv = mlx4_priv(dev); + int port; + + port = (sched >> 6 & 1) + 1; + + new_index = priv->virt2phys_pkey[slave][port - 1][orig_index]; + *(u8 *)(inbox->buf + 35) = new_index; +} + +static void update_gid(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *inbox, + u8 slave) +{ + struct mlx4_qp_context *qp_ctx = inbox->buf + 8; + enum mlx4_qp_optpar optpar = be32_to_cpu(*(__be32 *) inbox->buf); + u32 ts = (be32_to_cpu(qp_ctx->flags) >> 16) & 0xff; + int port; + + if (MLX4_QP_ST_UD == ts) { + port = (qp_ctx->pri_path.sched_queue >> 6 & 1) + 1; + if (mlx4_is_eth(dev, port)) + qp_ctx->pri_path.mgid_index = + mlx4_get_base_gid_ix(dev, slave, port) | 0x80; + else + qp_ctx->pri_path.mgid_index = slave | 0x80; + + } else if (MLX4_QP_ST_RC == ts || MLX4_QP_ST_XRC == ts || MLX4_QP_ST_UC == ts) { + if (optpar & MLX4_QP_OPTPAR_PRIMARY_ADDR_PATH) { + port = (qp_ctx->pri_path.sched_queue >> 6 & 1) + 1; + if (mlx4_is_eth(dev, port)) { + qp_ctx->pri_path.mgid_index += + mlx4_get_base_gid_ix(dev, slave, port); + qp_ctx->pri_path.mgid_index &= 0x7f; + } else { + qp_ctx->pri_path.mgid_index = slave & 0x7F; + } + } + if (optpar & MLX4_QP_OPTPAR_ALT_ADDR_PATH) { + port = (qp_ctx->alt_path.sched_queue >> 6 & 1) + 1; + if (mlx4_is_eth(dev, port)) { + qp_ctx->alt_path.mgid_index += + mlx4_get_base_gid_ix(dev, slave, port); + qp_ctx->alt_path.mgid_index &= 0x7f; + } else { + qp_ctx->alt_path.mgid_index = slave & 0x7F; + } + } + } +} + +static int handle_counter(struct mlx4_dev *dev, struct mlx4_qp_context *qpc, + u8 slave, int port); + +static int update_vport_qp_param(struct mlx4_dev *dev, + struct mlx4_cmd_mailbox *inbox, + u8 slave, u32 qpn) +{ + struct mlx4_qp_context *qpc = inbox->buf + 8; + struct mlx4_vport_oper_state *vp_oper; + struct mlx4_priv *priv; + u32 qp_type; + int port, err = 0; + + port = (qpc->pri_path.sched_queue & 0x40) ? 2 : 1; + priv = mlx4_priv(dev); + vp_oper = &priv->mfunc.master.vf_oper[slave].vport[port]; + qp_type = (be32_to_cpu(qpc->flags) >> 16) & 0xff; + + err = handle_counter(dev, qpc, slave, port); + if (err) + goto out; + + if (MLX4_VGT != vp_oper->state.default_vlan) { + /* the reserved QPs (special, proxy, tunnel) + * do not operate over vlans + */ + if (mlx4_is_qp_reserved(dev, qpn)) + return 0; + + /* force strip vlan by clear vsd, MLX QP refers to Raw Ethernet */ + if (qp_type == MLX4_QP_ST_UD || + (qp_type == MLX4_QP_ST_MLX && mlx4_is_eth(dev, port))) { + if (dev->caps.bmme_flags & MLX4_BMME_FLAG_VSD_INIT2RTR) { + *(__be32 *)inbox->buf = + cpu_to_be32(be32_to_cpu(*(__be32 *)inbox->buf) | + MLX4_QP_OPTPAR_VLAN_STRIPPING); + qpc->param3 &= ~cpu_to_be32(MLX4_STRIP_VLAN); + } else { + struct mlx4_update_qp_params params = {.flags = 0}; + + err = mlx4_update_qp(dev, qpn, MLX4_UPDATE_QP_VSD, ¶ms); + if (err) + goto out; + } + } + + /* preserve IF_COUNTER flag */ + qpc->pri_path.vlan_control &= + MLX4_CTRL_ETH_SRC_CHECK_IF_COUNTER; + if (vp_oper->state.link_state == IFLA_VF_LINK_STATE_DISABLE && + dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_UPDATE_QP) { + qpc->pri_path.vlan_control |= + MLX4_VLAN_CTRL_ETH_TX_BLOCK_TAGGED | + MLX4_VLAN_CTRL_ETH_TX_BLOCK_PRIO_TAGGED | + MLX4_VLAN_CTRL_ETH_TX_BLOCK_UNTAGGED | + MLX4_VLAN_CTRL_ETH_RX_BLOCK_PRIO_TAGGED | + MLX4_VLAN_CTRL_ETH_RX_BLOCK_UNTAGGED | + MLX4_VLAN_CTRL_ETH_RX_BLOCK_TAGGED; + } else if (0 != vp_oper->state.default_vlan) { + if (vp_oper->state.vlan_proto == htons(ETH_P_8021AD)) { + /* vst QinQ should block untagged on TX, + * but cvlan is in payload and phv is set so + * hw see it as untagged. Block tagged instead. + */ + qpc->pri_path.vlan_control |= + MLX4_VLAN_CTRL_ETH_TX_BLOCK_PRIO_TAGGED | + MLX4_VLAN_CTRL_ETH_TX_BLOCK_TAGGED | + MLX4_VLAN_CTRL_ETH_RX_BLOCK_PRIO_TAGGED | + MLX4_VLAN_CTRL_ETH_RX_BLOCK_UNTAGGED; + } else { /* vst 802.1Q */ + qpc->pri_path.vlan_control |= + MLX4_VLAN_CTRL_ETH_TX_BLOCK_TAGGED | + MLX4_VLAN_CTRL_ETH_RX_BLOCK_PRIO_TAGGED | + MLX4_VLAN_CTRL_ETH_RX_BLOCK_UNTAGGED; + } + } else { /* priority tagged */ + qpc->pri_path.vlan_control |= + MLX4_VLAN_CTRL_ETH_TX_BLOCK_TAGGED | + MLX4_VLAN_CTRL_ETH_RX_BLOCK_TAGGED; + } + + qpc->pri_path.fvl_rx |= MLX4_FVL_RX_FORCE_ETH_VLAN; + qpc->pri_path.vlan_index = vp_oper->vlan_idx; + qpc->pri_path.fl |= MLX4_FL_ETH_HIDE_CQE_VLAN; + if (vp_oper->state.vlan_proto == htons(ETH_P_8021AD)) + qpc->pri_path.fl |= MLX4_FL_SV; + else + qpc->pri_path.fl |= MLX4_FL_CV; + qpc->pri_path.feup |= MLX4_FEUP_FORCE_ETH_UP | MLX4_FVL_FORCE_ETH_VLAN; + qpc->pri_path.sched_queue &= 0xC7; + qpc->pri_path.sched_queue |= (vp_oper->state.default_qos) << 3; + qpc->qos_vport = vp_oper->state.qos_vport; + } + if (vp_oper->state.spoofchk) { + qpc->pri_path.feup |= MLX4_FSM_FORCE_ETH_SRC_MAC; + qpc->pri_path.grh_mylmc = (0x80 & qpc->pri_path.grh_mylmc) + vp_oper->mac_idx; + } +out: + return err; +} + +static int mpt_mask(struct mlx4_dev *dev) +{ + return dev->caps.num_mpts - 1; +} + +static const char *mlx4_resource_type_to_str(enum mlx4_resource t) +{ + switch (t) { + case RES_QP: + return "QP"; + case RES_CQ: + return "CQ"; + case RES_SRQ: + return "SRQ"; + case RES_XRCD: + return "XRCD"; + case RES_MPT: + return "MPT"; + case RES_MTT: + return "MTT"; + case RES_MAC: + return "MAC"; + case RES_VLAN: + return "VLAN"; + case RES_COUNTER: + return "COUNTER"; + case RES_FS_RULE: + return "FS_RULE"; + case RES_EQ: + return "EQ"; + default: + return "INVALID RESOURCE"; + } +} + +static void *find_res(struct mlx4_dev *dev, u64 res_id, + enum mlx4_resource type) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + + return res_tracker_lookup(&priv->mfunc.master.res_tracker.res_tree[type], + res_id); +} + +static int _get_res(struct mlx4_dev *dev, int slave, u64 res_id, + enum mlx4_resource type, + void *res, const char *func_name) +{ + struct res_common *r; + int err = 0; + + spin_lock_irq(mlx4_tlock(dev)); + r = find_res(dev, res_id, type); + if (!r) { + err = -ENONET; + goto exit; + } + + if (r->state == RES_ANY_BUSY) { + mlx4_warn(dev, + "%s(%d) trying to get resource %llx of type %s, but it's already taken by %s\n", + func_name, slave, res_id, mlx4_resource_type_to_str(type), + r->func_name); + err = -EBUSY; + goto exit; + } + + if (r->owner != slave) { + err = -EPERM; + goto exit; + } + + r->from_state = r->state; + r->state = RES_ANY_BUSY; + r->func_name = func_name; + + if (res) + *((struct res_common **)res) = r; + +exit: + spin_unlock_irq(mlx4_tlock(dev)); + return err; +} + +#define get_res(dev, slave, res_id, type, res) \ + _get_res((dev), (slave), (res_id), (type), (res), __func__) + +int mlx4_get_slave_from_resource_id(struct mlx4_dev *dev, + enum mlx4_resource type, + u64 res_id, int *slave) +{ + + struct res_common *r; + int err = -ENOENT; + int id = res_id; + + if (type == RES_QP) + id &= 0x7fffff; + spin_lock(mlx4_tlock(dev)); + + r = find_res(dev, id, type); + if (r) { + *slave = r->owner; + err = 0; + } + spin_unlock(mlx4_tlock(dev)); + + return err; +} + +static void put_res(struct mlx4_dev *dev, int slave, u64 res_id, + enum mlx4_resource type) +{ + struct res_common *r; + + spin_lock_irq(mlx4_tlock(dev)); + r = find_res(dev, res_id, type); + if (r) { + r->state = r->from_state; + r->func_name = ""; + } + spin_unlock_irq(mlx4_tlock(dev)); +} + +static int counter_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd, + u64 in_param, u64 *out_param, int port); + +static int handle_existing_counter(struct mlx4_dev *dev, u8 slave, int port, + int counter_index) +{ + struct res_common *r; + struct res_counter *counter; + int ret = 0; + + if (counter_index == MLX4_SINK_COUNTER_INDEX(dev)) + return ret; + + spin_lock_irq(mlx4_tlock(dev)); + r = find_res(dev, counter_index, RES_COUNTER); + if (!r || r->owner != slave) { + ret = -EINVAL; + } else { + counter = container_of(r, struct res_counter, com); + if (!counter->port) + counter->port = port; + } + + spin_unlock_irq(mlx4_tlock(dev)); + return ret; +} + +static int handle_unexisting_counter(struct mlx4_dev *dev, + struct mlx4_qp_context *qpc, u8 slave, + int port) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker; + struct res_common *tmp; + struct res_counter *counter; + u64 counter_idx = MLX4_SINK_COUNTER_INDEX(dev); + int err = 0; + + spin_lock_irq(mlx4_tlock(dev)); + list_for_each_entry(tmp, + &tracker->slave_list[slave].res_list[RES_COUNTER], + list) { + counter = container_of(tmp, struct res_counter, com); + if (port == counter->port) { + qpc->pri_path.counter_index = counter->com.res_id; + spin_unlock_irq(mlx4_tlock(dev)); + return 0; + } + } + spin_unlock_irq(mlx4_tlock(dev)); + + /* No existing counter, need to allocate a new counter */ + err = counter_alloc_res(dev, slave, RES_OP_RESERVE, 0, 0, &counter_idx, + port); + if (err == -ENOENT) { + err = 0; + } else if (err && err != -ENOSPC) { + mlx4_err(dev, "%s: failed to create new counter for slave %d err %d\n", + __func__, slave, err); + } else { + qpc->pri_path.counter_index = counter_idx; + mlx4_dbg(dev, "%s: alloc new counter for slave %d index %d\n", + __func__, slave, qpc->pri_path.counter_index); + err = 0; + } + + return err; +} + +static int handle_counter(struct mlx4_dev *dev, struct mlx4_qp_context *qpc, + u8 slave, int port) +{ + if (qpc->pri_path.counter_index != MLX4_SINK_COUNTER_INDEX(dev)) + return handle_existing_counter(dev, slave, port, + qpc->pri_path.counter_index); + + return handle_unexisting_counter(dev, qpc, slave, port); +} + +static struct res_common *alloc_qp_tr(int id) +{ + struct res_qp *ret; + + ret = kzalloc(sizeof(*ret), GFP_KERNEL); + if (!ret) + return NULL; + + ret->com.res_id = id; + ret->com.state = RES_QP_RESERVED; + ret->local_qpn = id; + INIT_LIST_HEAD(&ret->mcg_list); + spin_lock_init(&ret->mcg_spl); + atomic_set(&ret->ref_count, 0); + + return &ret->com; +} + +static struct res_common *alloc_mtt_tr(int id, int order) +{ + struct res_mtt *ret; + + ret = kzalloc(sizeof(*ret), GFP_KERNEL); + if (!ret) + return NULL; + + ret->com.res_id = id; + ret->order = order; + ret->com.state = RES_MTT_ALLOCATED; + atomic_set(&ret->ref_count, 0); + + return &ret->com; +} + +static struct res_common *alloc_mpt_tr(int id, int key) +{ + struct res_mpt *ret; + + ret = kzalloc(sizeof(*ret), GFP_KERNEL); + if (!ret) + return NULL; + + ret->com.res_id = id; + ret->com.state = RES_MPT_RESERVED; + ret->key = key; + + return &ret->com; +} + +static struct res_common *alloc_eq_tr(int id) +{ + struct res_eq *ret; + + ret = kzalloc(sizeof(*ret), GFP_KERNEL); + if (!ret) + return NULL; + + ret->com.res_id = id; + ret->com.state = RES_EQ_RESERVED; + + return &ret->com; +} + +static struct res_common *alloc_cq_tr(int id) +{ + struct res_cq *ret; + + ret = kzalloc(sizeof(*ret), GFP_KERNEL); + if (!ret) + return NULL; + + ret->com.res_id = id; + ret->com.state = RES_CQ_ALLOCATED; + atomic_set(&ret->ref_count, 0); + + return &ret->com; +} + +static struct res_common *alloc_srq_tr(int id) +{ + struct res_srq *ret; + + ret = kzalloc(sizeof(*ret), GFP_KERNEL); + if (!ret) + return NULL; + + ret->com.res_id = id; + ret->com.state = RES_SRQ_ALLOCATED; + atomic_set(&ret->ref_count, 0); + + return &ret->com; +} + +static struct res_common *alloc_counter_tr(int id, int port) +{ + struct res_counter *ret; + + ret = kzalloc(sizeof(*ret), GFP_KERNEL); + if (!ret) + return NULL; + + ret->com.res_id = id; + ret->com.state = RES_COUNTER_ALLOCATED; + ret->port = port; + + return &ret->com; +} + +static struct res_common *alloc_xrcdn_tr(int id) +{ + struct res_xrcdn *ret; + + ret = kzalloc(sizeof(*ret), GFP_KERNEL); + if (!ret) + return NULL; + + ret->com.res_id = id; + ret->com.state = RES_XRCD_ALLOCATED; + + return &ret->com; +} + +static struct res_common *alloc_fs_rule_tr(u64 id, int qpn) +{ + struct res_fs_rule *ret; + + ret = kzalloc(sizeof(*ret), GFP_KERNEL); + if (!ret) + return NULL; + + ret->com.res_id = id; + ret->com.state = RES_FS_RULE_ALLOCATED; + ret->qpn = qpn; + return &ret->com; +} + +static struct res_common *alloc_tr(u64 id, enum mlx4_resource type, int slave, + int extra) +{ + struct res_common *ret; + + switch (type) { + case RES_QP: + ret = alloc_qp_tr(id); + break; + case RES_MPT: + ret = alloc_mpt_tr(id, extra); + break; + case RES_MTT: + ret = alloc_mtt_tr(id, extra); + break; + case RES_EQ: + ret = alloc_eq_tr(id); + break; + case RES_CQ: + ret = alloc_cq_tr(id); + break; + case RES_SRQ: + ret = alloc_srq_tr(id); + break; + case RES_MAC: + pr_err("implementation missing\n"); + return NULL; + case RES_COUNTER: + ret = alloc_counter_tr(id, extra); + break; + case RES_XRCD: + ret = alloc_xrcdn_tr(id); + break; + case RES_FS_RULE: + ret = alloc_fs_rule_tr(id, extra); + break; + default: + return NULL; + } + if (ret) + ret->owner = slave; + + return ret; +} + +int mlx4_calc_vf_counters(struct mlx4_dev *dev, int slave, int port, + struct mlx4_counter *data) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker; + struct res_common *tmp; + struct res_counter *counter; + int *counters_arr; + int i = 0, err = 0; + + memset(data, 0, sizeof(*data)); + + counters_arr = kmalloc_array(dev->caps.max_counters, + sizeof(*counters_arr), GFP_KERNEL); + if (!counters_arr) + return -ENOMEM; + + spin_lock_irq(mlx4_tlock(dev)); + list_for_each_entry(tmp, + &tracker->slave_list[slave].res_list[RES_COUNTER], + list) { + counter = container_of(tmp, struct res_counter, com); + if (counter->port == port) { + counters_arr[i] = (int)tmp->res_id; + i++; + } + } + spin_unlock_irq(mlx4_tlock(dev)); + counters_arr[i] = -1; + + i = 0; + + while (counters_arr[i] != -1) { + err = mlx4_get_counter_stats(dev, counters_arr[i], data, + 0); + if (err) { + memset(data, 0, sizeof(*data)); + goto table_changed; + } + i++; + } + +table_changed: + kfree(counters_arr); + return 0; +} + +static int add_res_range(struct mlx4_dev *dev, int slave, u64 base, int count, + enum mlx4_resource type, int extra) +{ + int i; + int err; + struct mlx4_priv *priv = mlx4_priv(dev); + struct res_common **res_arr; + struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker; + struct rb_root *root = &tracker->res_tree[type]; + + res_arr = kcalloc(count, sizeof(*res_arr), GFP_KERNEL); + if (!res_arr) + return -ENOMEM; + + for (i = 0; i < count; ++i) { + res_arr[i] = alloc_tr(base + i, type, slave, extra); + if (!res_arr[i]) { + for (--i; i >= 0; --i) + kfree(res_arr[i]); + + kfree(res_arr); + return -ENOMEM; + } + } + + spin_lock_irq(mlx4_tlock(dev)); + for (i = 0; i < count; ++i) { + if (find_res(dev, base + i, type)) { + err = -EEXIST; + goto undo; + } + err = res_tracker_insert(root, res_arr[i]); + if (err) + goto undo; + list_add_tail(&res_arr[i]->list, + &tracker->slave_list[slave].res_list[type]); + } + spin_unlock_irq(mlx4_tlock(dev)); + kfree(res_arr); + + return 0; + +undo: + for (--i; i >= 0; --i) { + rb_erase(&res_arr[i]->node, root); + list_del_init(&res_arr[i]->list); + } + + spin_unlock_irq(mlx4_tlock(dev)); + + for (i = 0; i < count; ++i) + kfree(res_arr[i]); + + kfree(res_arr); + + return err; +} + +static int remove_qp_ok(struct res_qp *res) +{ + if (res->com.state == RES_QP_BUSY || atomic_read(&res->ref_count) || + !list_empty(&res->mcg_list)) { + pr_err("resource tracker: fail to remove qp, state %d, ref_count %d\n", + res->com.state, atomic_read(&res->ref_count)); + return -EBUSY; + } else if (res->com.state != RES_QP_RESERVED) { + return -EPERM; + } + + return 0; +} + +static int remove_mtt_ok(struct res_mtt *res, int order) +{ + if (res->com.state == RES_MTT_BUSY || + atomic_read(&res->ref_count)) { + pr_devel("%s-%d: state %s, ref_count %d\n", + __func__, __LINE__, + mtt_states_str(res->com.state), + atomic_read(&res->ref_count)); + return -EBUSY; + } else if (res->com.state != RES_MTT_ALLOCATED) + return -EPERM; + else if (res->order != order) + return -EINVAL; + + return 0; +} + +static int remove_mpt_ok(struct res_mpt *res) +{ + if (res->com.state == RES_MPT_BUSY) + return -EBUSY; + else if (res->com.state != RES_MPT_RESERVED) + return -EPERM; + + return 0; +} + +static int remove_eq_ok(struct res_eq *res) +{ + if (res->com.state == RES_MPT_BUSY) + return -EBUSY; + else if (res->com.state != RES_MPT_RESERVED) + return -EPERM; + + return 0; +} + +static int remove_counter_ok(struct res_counter *res) +{ + if (res->com.state == RES_COUNTER_BUSY) + return -EBUSY; + else if (res->com.state != RES_COUNTER_ALLOCATED) + return -EPERM; + + return 0; +} + +static int remove_xrcdn_ok(struct res_xrcdn *res) +{ + if (res->com.state == RES_XRCD_BUSY) + return -EBUSY; + else if (res->com.state != RES_XRCD_ALLOCATED) + return -EPERM; + + return 0; +} + +static int remove_fs_rule_ok(struct res_fs_rule *res) +{ + if (res->com.state == RES_FS_RULE_BUSY) + return -EBUSY; + else if (res->com.state != RES_FS_RULE_ALLOCATED) + return -EPERM; + + return 0; +} + +static int remove_cq_ok(struct res_cq *res) +{ + if (res->com.state == RES_CQ_BUSY) + return -EBUSY; + else if (res->com.state != RES_CQ_ALLOCATED) + return -EPERM; + + return 0; +} + +static int remove_srq_ok(struct res_srq *res) +{ + if (res->com.state == RES_SRQ_BUSY) + return -EBUSY; + else if (res->com.state != RES_SRQ_ALLOCATED) + return -EPERM; + + return 0; +} + +static int remove_ok(struct res_common *res, enum mlx4_resource type, int extra) +{ + switch (type) { + case RES_QP: + return remove_qp_ok((struct res_qp *)res); + case RES_CQ: + return remove_cq_ok((struct res_cq *)res); + case RES_SRQ: + return remove_srq_ok((struct res_srq *)res); + case RES_MPT: + return remove_mpt_ok((struct res_mpt *)res); + case RES_MTT: + return remove_mtt_ok((struct res_mtt *)res, extra); + case RES_MAC: + return -EOPNOTSUPP; + case RES_EQ: + return remove_eq_ok((struct res_eq *)res); + case RES_COUNTER: + return remove_counter_ok((struct res_counter *)res); + case RES_XRCD: + return remove_xrcdn_ok((struct res_xrcdn *)res); + case RES_FS_RULE: + return remove_fs_rule_ok((struct res_fs_rule *)res); + default: + return -EINVAL; + } +} + +static int rem_res_range(struct mlx4_dev *dev, int slave, u64 base, int count, + enum mlx4_resource type, int extra) +{ + u64 i; + int err; + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker; + struct res_common *r; + + spin_lock_irq(mlx4_tlock(dev)); + for (i = base; i < base + count; ++i) { + r = res_tracker_lookup(&tracker->res_tree[type], i); + if (!r) { + err = -ENOENT; + goto out; + } + if (r->owner != slave) { + err = -EPERM; + goto out; + } + err = remove_ok(r, type, extra); + if (err) + goto out; + } + + for (i = base; i < base + count; ++i) { + r = res_tracker_lookup(&tracker->res_tree[type], i); + rb_erase(&r->node, &tracker->res_tree[type]); + list_del(&r->list); + kfree(r); + } + err = 0; + +out: + spin_unlock_irq(mlx4_tlock(dev)); + + return err; +} + +static int qp_res_start_move_to(struct mlx4_dev *dev, int slave, int qpn, + enum res_qp_states state, struct res_qp **qp, + int alloc) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker; + struct res_qp *r; + int err = 0; + + spin_lock_irq(mlx4_tlock(dev)); + r = res_tracker_lookup(&tracker->res_tree[RES_QP], qpn); + if (!r) + err = -ENOENT; + else if (r->com.owner != slave) + err = -EPERM; + else { + switch (state) { + case RES_QP_BUSY: + mlx4_dbg(dev, "%s: failed RES_QP, 0x%llx\n", + __func__, r->com.res_id); + err = -EBUSY; + break; + + case RES_QP_RESERVED: + if (r->com.state == RES_QP_MAPPED && !alloc) + break; + + mlx4_dbg(dev, "failed RES_QP, 0x%llx\n", r->com.res_id); + err = -EINVAL; + break; + + case RES_QP_MAPPED: + if ((r->com.state == RES_QP_RESERVED && alloc) || + r->com.state == RES_QP_HW) + break; + else { + mlx4_dbg(dev, "failed RES_QP, 0x%llx\n", + r->com.res_id); + err = -EINVAL; + } + + break; + + case RES_QP_HW: + if (r->com.state != RES_QP_MAPPED) + err = -EINVAL; + break; + default: + err = -EINVAL; + } + + if (!err) { + r->com.from_state = r->com.state; + r->com.to_state = state; + r->com.state = RES_QP_BUSY; + if (qp) + *qp = r; + } + } + + spin_unlock_irq(mlx4_tlock(dev)); + + return err; +} + +static int mr_res_start_move_to(struct mlx4_dev *dev, int slave, int index, + enum res_mpt_states state, struct res_mpt **mpt) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker; + struct res_mpt *r; + int err = 0; + + spin_lock_irq(mlx4_tlock(dev)); + r = res_tracker_lookup(&tracker->res_tree[RES_MPT], index); + if (!r) + err = -ENOENT; + else if (r->com.owner != slave) + err = -EPERM; + else { + switch (state) { + case RES_MPT_BUSY: + err = -EINVAL; + break; + + case RES_MPT_RESERVED: + if (r->com.state != RES_MPT_MAPPED) + err = -EINVAL; + break; + + case RES_MPT_MAPPED: + if (r->com.state != RES_MPT_RESERVED && + r->com.state != RES_MPT_HW) + err = -EINVAL; + break; + + case RES_MPT_HW: + if (r->com.state != RES_MPT_MAPPED) + err = -EINVAL; + break; + default: + err = -EINVAL; + } + + if (!err) { + r->com.from_state = r->com.state; + r->com.to_state = state; + r->com.state = RES_MPT_BUSY; + if (mpt) + *mpt = r; + } + } + + spin_unlock_irq(mlx4_tlock(dev)); + + return err; +} + +static int eq_res_start_move_to(struct mlx4_dev *dev, int slave, int index, + enum res_eq_states state, struct res_eq **eq) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker; + struct res_eq *r; + int err = 0; + + spin_lock_irq(mlx4_tlock(dev)); + r = res_tracker_lookup(&tracker->res_tree[RES_EQ], index); + if (!r) + err = -ENOENT; + else if (r->com.owner != slave) + err = -EPERM; + else { + switch (state) { + case RES_EQ_BUSY: + err = -EINVAL; + break; + + case RES_EQ_RESERVED: + if (r->com.state != RES_EQ_HW) + err = -EINVAL; + break; + + case RES_EQ_HW: + if (r->com.state != RES_EQ_RESERVED) + err = -EINVAL; + break; + + default: + err = -EINVAL; + } + + if (!err) { + r->com.from_state = r->com.state; + r->com.to_state = state; + r->com.state = RES_EQ_BUSY; + } + } + + spin_unlock_irq(mlx4_tlock(dev)); + + if (!err && eq) + *eq = r; + + return err; +} + +static int cq_res_start_move_to(struct mlx4_dev *dev, int slave, int cqn, + enum res_cq_states state, struct res_cq **cq) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker; + struct res_cq *r; + int err; + + spin_lock_irq(mlx4_tlock(dev)); + r = res_tracker_lookup(&tracker->res_tree[RES_CQ], cqn); + if (!r) { + err = -ENOENT; + } else if (r->com.owner != slave) { + err = -EPERM; + } else if (state == RES_CQ_ALLOCATED) { + if (r->com.state != RES_CQ_HW) + err = -EINVAL; + else if (atomic_read(&r->ref_count)) + err = -EBUSY; + else + err = 0; + } else if (state != RES_CQ_HW || r->com.state != RES_CQ_ALLOCATED) { + err = -EINVAL; + } else { + err = 0; + } + + if (!err) { + r->com.from_state = r->com.state; + r->com.to_state = state; + r->com.state = RES_CQ_BUSY; + if (cq) + *cq = r; + } + + spin_unlock_irq(mlx4_tlock(dev)); + + return err; +} + +static int srq_res_start_move_to(struct mlx4_dev *dev, int slave, int index, + enum res_srq_states state, struct res_srq **srq) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker; + struct res_srq *r; + int err = 0; + + spin_lock_irq(mlx4_tlock(dev)); + r = res_tracker_lookup(&tracker->res_tree[RES_SRQ], index); + if (!r) { + err = -ENOENT; + } else if (r->com.owner != slave) { + err = -EPERM; + } else if (state == RES_SRQ_ALLOCATED) { + if (r->com.state != RES_SRQ_HW) + err = -EINVAL; + else if (atomic_read(&r->ref_count)) + err = -EBUSY; + } else if (state != RES_SRQ_HW || r->com.state != RES_SRQ_ALLOCATED) { + err = -EINVAL; + } + + if (!err) { + r->com.from_state = r->com.state; + r->com.to_state = state; + r->com.state = RES_SRQ_BUSY; + if (srq) + *srq = r; + } + + spin_unlock_irq(mlx4_tlock(dev)); + + return err; +} + +static void res_abort_move(struct mlx4_dev *dev, int slave, + enum mlx4_resource type, int id) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker; + struct res_common *r; + + spin_lock_irq(mlx4_tlock(dev)); + r = res_tracker_lookup(&tracker->res_tree[type], id); + if (r && (r->owner == slave)) + r->state = r->from_state; + spin_unlock_irq(mlx4_tlock(dev)); +} + +static void res_end_move(struct mlx4_dev *dev, int slave, + enum mlx4_resource type, int id) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker; + struct res_common *r; + + spin_lock_irq(mlx4_tlock(dev)); + r = res_tracker_lookup(&tracker->res_tree[type], id); + if (r && (r->owner == slave)) + r->state = r->to_state; + spin_unlock_irq(mlx4_tlock(dev)); +} + +static int valid_reserved(struct mlx4_dev *dev, int slave, int qpn) +{ + return mlx4_is_qp_reserved(dev, qpn) && + (mlx4_is_master(dev) || mlx4_is_guest_proxy(dev, slave, qpn)); +} + +static int fw_reserved(struct mlx4_dev *dev, int qpn) +{ + return qpn < dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW]; +} + +static int qp_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd, + u64 in_param, u64 *out_param) +{ + int err; + int count; + int align; + int base; + int qpn; + u8 flags; + + switch (op) { + case RES_OP_RESERVE: + count = get_param_l(&in_param) & 0xffffff; + /* Turn off all unsupported QP allocation flags that the + * slave tries to set. + */ + flags = (get_param_l(&in_param) >> 24) & dev->caps.alloc_res_qp_mask; + align = get_param_h(&in_param); + err = mlx4_grant_resource(dev, slave, RES_QP, count, 0); + if (err) + return err; + + err = __mlx4_qp_reserve_range(dev, count, align, &base, flags); + if (err) { + mlx4_release_resource(dev, slave, RES_QP, count, 0); + return err; + } + + err = add_res_range(dev, slave, base, count, RES_QP, 0); + if (err) { + mlx4_release_resource(dev, slave, RES_QP, count, 0); + __mlx4_qp_release_range(dev, base, count); + return err; + } + set_param_l(out_param, base); + break; + case RES_OP_MAP_ICM: + qpn = get_param_l(&in_param) & 0x7fffff; + if (valid_reserved(dev, slave, qpn)) { + err = add_res_range(dev, slave, qpn, 1, RES_QP, 0); + if (err) + return err; + } + + err = qp_res_start_move_to(dev, slave, qpn, RES_QP_MAPPED, + NULL, 1); + if (err) + return err; + + if (!fw_reserved(dev, qpn)) { + err = __mlx4_qp_alloc_icm(dev, qpn); + if (err) { + res_abort_move(dev, slave, RES_QP, qpn); + return err; + } + } + + res_end_move(dev, slave, RES_QP, qpn); + break; + + default: + err = -EINVAL; + break; + } + return err; +} + +static int mtt_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd, + u64 in_param, u64 *out_param) +{ + int err = -EINVAL; + int base; + int order; + + if (op != RES_OP_RESERVE_AND_MAP) + return err; + + order = get_param_l(&in_param); + + err = mlx4_grant_resource(dev, slave, RES_MTT, 1 << order, 0); + if (err) + return err; + + base = __mlx4_alloc_mtt_range(dev, order); + if (base == -1) { + mlx4_release_resource(dev, slave, RES_MTT, 1 << order, 0); + return -ENOMEM; + } + + err = add_res_range(dev, slave, base, 1, RES_MTT, order); + if (err) { + mlx4_release_resource(dev, slave, RES_MTT, 1 << order, 0); + __mlx4_free_mtt_range(dev, base, order); + } else { + set_param_l(out_param, base); + } + + return err; +} + +static int mpt_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd, + u64 in_param, u64 *out_param) +{ + int err = -EINVAL; + int index; + int id; + struct res_mpt *mpt; + + switch (op) { + case RES_OP_RESERVE: + err = mlx4_grant_resource(dev, slave, RES_MPT, 1, 0); + if (err) + break; + + index = __mlx4_mpt_reserve(dev); + if (index == -1) { + mlx4_release_resource(dev, slave, RES_MPT, 1, 0); + break; + } + id = index & mpt_mask(dev); + + err = add_res_range(dev, slave, id, 1, RES_MPT, index); + if (err) { + mlx4_release_resource(dev, slave, RES_MPT, 1, 0); + __mlx4_mpt_release(dev, index); + break; + } + set_param_l(out_param, index); + break; + case RES_OP_MAP_ICM: + index = get_param_l(&in_param); + id = index & mpt_mask(dev); + err = mr_res_start_move_to(dev, slave, id, + RES_MPT_MAPPED, &mpt); + if (err) + return err; + + err = __mlx4_mpt_alloc_icm(dev, mpt->key); + if (err) { + res_abort_move(dev, slave, RES_MPT, id); + return err; + } + + res_end_move(dev, slave, RES_MPT, id); + break; + } + return err; +} + +static int cq_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd, + u64 in_param, u64 *out_param) +{ + int cqn; + int err; + + switch (op) { + case RES_OP_RESERVE_AND_MAP: + err = mlx4_grant_resource(dev, slave, RES_CQ, 1, 0); + if (err) + break; + + err = __mlx4_cq_alloc_icm(dev, &cqn); + if (err) { + mlx4_release_resource(dev, slave, RES_CQ, 1, 0); + break; + } + + err = add_res_range(dev, slave, cqn, 1, RES_CQ, 0); + if (err) { + mlx4_release_resource(dev, slave, RES_CQ, 1, 0); + __mlx4_cq_free_icm(dev, cqn); + break; + } + + set_param_l(out_param, cqn); + break; + + default: + err = -EINVAL; + } + + return err; +} + +static int srq_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd, + u64 in_param, u64 *out_param) +{ + int srqn; + int err; + + switch (op) { + case RES_OP_RESERVE_AND_MAP: + err = mlx4_grant_resource(dev, slave, RES_SRQ, 1, 0); + if (err) + break; + + err = __mlx4_srq_alloc_icm(dev, &srqn); + if (err) { + mlx4_release_resource(dev, slave, RES_SRQ, 1, 0); + break; + } + + err = add_res_range(dev, slave, srqn, 1, RES_SRQ, 0); + if (err) { + mlx4_release_resource(dev, slave, RES_SRQ, 1, 0); + __mlx4_srq_free_icm(dev, srqn); + break; + } + + set_param_l(out_param, srqn); + break; + + default: + err = -EINVAL; + } + + return err; +} + +static int mac_find_smac_ix_in_slave(struct mlx4_dev *dev, int slave, int port, + u8 smac_index, u64 *mac) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker; + struct list_head *mac_list = + &tracker->slave_list[slave].res_list[RES_MAC]; + struct mac_res *res, *tmp; + + list_for_each_entry_safe(res, tmp, mac_list, list) { + if (res->smac_index == smac_index && res->port == (u8) port) { + *mac = res->mac; + return 0; + } + } + return -ENOENT; +} + +static int mac_add_to_slave(struct mlx4_dev *dev, int slave, u64 mac, int port, u8 smac_index) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker; + struct list_head *mac_list = + &tracker->slave_list[slave].res_list[RES_MAC]; + struct mac_res *res, *tmp; + + list_for_each_entry_safe(res, tmp, mac_list, list) { + if (res->mac == mac && res->port == (u8) port) { + /* mac found. update ref count */ + ++res->ref_count; + return 0; + } + } + + if (mlx4_grant_resource(dev, slave, RES_MAC, 1, port)) + return -EINVAL; + res = kzalloc(sizeof(*res), GFP_KERNEL); + if (!res) { + mlx4_release_resource(dev, slave, RES_MAC, 1, port); + return -ENOMEM; + } + res->mac = mac; + res->port = (u8) port; + res->smac_index = smac_index; + res->ref_count = 1; + list_add_tail(&res->list, + &tracker->slave_list[slave].res_list[RES_MAC]); + return 0; +} + +static void mac_del_from_slave(struct mlx4_dev *dev, int slave, u64 mac, + int port) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker; + struct list_head *mac_list = + &tracker->slave_list[slave].res_list[RES_MAC]; + struct mac_res *res, *tmp; + + list_for_each_entry_safe(res, tmp, mac_list, list) { + if (res->mac == mac && res->port == (u8) port) { + if (!--res->ref_count) { + list_del(&res->list); + mlx4_release_resource(dev, slave, RES_MAC, 1, port); + kfree(res); + } + break; + } + } +} + +static void rem_slave_macs(struct mlx4_dev *dev, int slave) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker; + struct list_head *mac_list = + &tracker->slave_list[slave].res_list[RES_MAC]; + struct mac_res *res, *tmp; + int i; + + list_for_each_entry_safe(res, tmp, mac_list, list) { + list_del(&res->list); + /* dereference the mac the num times the slave referenced it */ + for (i = 0; i < res->ref_count; i++) + __mlx4_unregister_mac(dev, res->port, res->mac); + mlx4_release_resource(dev, slave, RES_MAC, 1, res->port); + kfree(res); + } +} + +static int mac_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd, + u64 in_param, u64 *out_param, int in_port) +{ + int err = -EINVAL; + int port; + u64 mac; + u8 smac_index; + + if (op != RES_OP_RESERVE_AND_MAP) + return err; + + port = !in_port ? get_param_l(out_param) : in_port; + port = mlx4_slave_convert_port( + dev, slave, port); + + if (port < 0) + return -EINVAL; + mac = in_param; + + err = __mlx4_register_mac(dev, port, mac); + if (err >= 0) { + smac_index = err; + set_param_l(out_param, err); + err = 0; + } + + if (!err) { + err = mac_add_to_slave(dev, slave, mac, port, smac_index); + if (err) + __mlx4_unregister_mac(dev, port, mac); + } + return err; +} + +static int vlan_add_to_slave(struct mlx4_dev *dev, int slave, u16 vlan, + int port, int vlan_index) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker; + struct list_head *vlan_list = + &tracker->slave_list[slave].res_list[RES_VLAN]; + struct vlan_res *res, *tmp; + + list_for_each_entry_safe(res, tmp, vlan_list, list) { + if (res->vlan == vlan && res->port == (u8) port) { + /* vlan found. update ref count */ + ++res->ref_count; + return 0; + } + } + + if (mlx4_grant_resource(dev, slave, RES_VLAN, 1, port)) + return -EINVAL; + res = kzalloc(sizeof(*res), GFP_KERNEL); + if (!res) { + mlx4_release_resource(dev, slave, RES_VLAN, 1, port); + return -ENOMEM; + } + res->vlan = vlan; + res->port = (u8) port; + res->vlan_index = vlan_index; + res->ref_count = 1; + list_add_tail(&res->list, + &tracker->slave_list[slave].res_list[RES_VLAN]); + return 0; +} + + +static void vlan_del_from_slave(struct mlx4_dev *dev, int slave, u16 vlan, + int port) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker; + struct list_head *vlan_list = + &tracker->slave_list[slave].res_list[RES_VLAN]; + struct vlan_res *res, *tmp; + + list_for_each_entry_safe(res, tmp, vlan_list, list) { + if (res->vlan == vlan && res->port == (u8) port) { + if (!--res->ref_count) { + list_del(&res->list); + mlx4_release_resource(dev, slave, RES_VLAN, + 1, port); + kfree(res); + } + break; + } + } +} + +static void rem_slave_vlans(struct mlx4_dev *dev, int slave) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker; + struct list_head *vlan_list = + &tracker->slave_list[slave].res_list[RES_VLAN]; + struct vlan_res *res, *tmp; + int i; + + list_for_each_entry_safe(res, tmp, vlan_list, list) { + list_del(&res->list); + /* dereference the vlan the num times the slave referenced it */ + for (i = 0; i < res->ref_count; i++) + __mlx4_unregister_vlan(dev, res->port, res->vlan); + mlx4_release_resource(dev, slave, RES_VLAN, 1, res->port); + kfree(res); + } +} + +static int vlan_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd, + u64 in_param, u64 *out_param, int in_port) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_slave_state *slave_state = priv->mfunc.master.slave_state; + int err; + u16 vlan; + int vlan_index; + int port; + + port = !in_port ? get_param_l(out_param) : in_port; + + if (!port || op != RES_OP_RESERVE_AND_MAP) + return -EINVAL; + + port = mlx4_slave_convert_port( + dev, slave, port); + + if (port < 0) + return -EINVAL; + /* upstream kernels had NOP for reg/unreg vlan. Continue this. */ + if (!in_port && port > 0 && port <= dev->caps.num_ports) { + slave_state[slave].old_vlan_api = true; + return 0; + } + + vlan = (u16) in_param; + + err = __mlx4_register_vlan(dev, port, vlan, &vlan_index); + if (!err) { + set_param_l(out_param, (u32) vlan_index); + err = vlan_add_to_slave(dev, slave, vlan, port, vlan_index); + if (err) + __mlx4_unregister_vlan(dev, port, vlan); + } + return err; +} + +static int counter_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd, + u64 in_param, u64 *out_param, int port) +{ + u32 index; + int err; + + if (op != RES_OP_RESERVE) + return -EINVAL; + + err = mlx4_grant_resource(dev, slave, RES_COUNTER, 1, 0); + if (err) + return err; + + err = __mlx4_counter_alloc(dev, &index); + if (err) { + mlx4_release_resource(dev, slave, RES_COUNTER, 1, 0); + return err; + } + + err = add_res_range(dev, slave, index, 1, RES_COUNTER, port); + if (err) { + __mlx4_counter_free(dev, index); + mlx4_release_resource(dev, slave, RES_COUNTER, 1, 0); + } else { + set_param_l(out_param, index); + } + + return err; +} + +static int xrcdn_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd, + u64 in_param, u64 *out_param) +{ + u32 xrcdn; + int err; + + if (op != RES_OP_RESERVE) + return -EINVAL; + + err = __mlx4_xrcd_alloc(dev, &xrcdn); + if (err) + return err; + + err = add_res_range(dev, slave, xrcdn, 1, RES_XRCD, 0); + if (err) + __mlx4_xrcd_free(dev, xrcdn); + else + set_param_l(out_param, xrcdn); + + return err; +} + +int mlx4_ALLOC_RES_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd) +{ + int err; + int alop = vhcr->op_modifier; + + switch (vhcr->in_modifier & 0xFF) { + case RES_QP: + err = qp_alloc_res(dev, slave, vhcr->op_modifier, alop, + vhcr->in_param, &vhcr->out_param); + break; + + case RES_MTT: + err = mtt_alloc_res(dev, slave, vhcr->op_modifier, alop, + vhcr->in_param, &vhcr->out_param); + break; + + case RES_MPT: + err = mpt_alloc_res(dev, slave, vhcr->op_modifier, alop, + vhcr->in_param, &vhcr->out_param); + break; + + case RES_CQ: + err = cq_alloc_res(dev, slave, vhcr->op_modifier, alop, + vhcr->in_param, &vhcr->out_param); + break; + + case RES_SRQ: + err = srq_alloc_res(dev, slave, vhcr->op_modifier, alop, + vhcr->in_param, &vhcr->out_param); + break; + + case RES_MAC: + err = mac_alloc_res(dev, slave, vhcr->op_modifier, alop, + vhcr->in_param, &vhcr->out_param, + (vhcr->in_modifier >> 8) & 0xFF); + break; + + case RES_VLAN: + err = vlan_alloc_res(dev, slave, vhcr->op_modifier, alop, + vhcr->in_param, &vhcr->out_param, + (vhcr->in_modifier >> 8) & 0xFF); + break; + + case RES_COUNTER: + err = counter_alloc_res(dev, slave, vhcr->op_modifier, alop, + vhcr->in_param, &vhcr->out_param, 0); + break; + + case RES_XRCD: + err = xrcdn_alloc_res(dev, slave, vhcr->op_modifier, alop, + vhcr->in_param, &vhcr->out_param); + break; + + default: + err = -EINVAL; + break; + } + + return err; +} + +static int qp_free_res(struct mlx4_dev *dev, int slave, int op, int cmd, + u64 in_param) +{ + int err; + int count; + int base; + int qpn; + + switch (op) { + case RES_OP_RESERVE: + base = get_param_l(&in_param) & 0x7fffff; + count = get_param_h(&in_param); + err = rem_res_range(dev, slave, base, count, RES_QP, 0); + if (err) + break; + mlx4_release_resource(dev, slave, RES_QP, count, 0); + __mlx4_qp_release_range(dev, base, count); + break; + case RES_OP_MAP_ICM: + qpn = get_param_l(&in_param) & 0x7fffff; + err = qp_res_start_move_to(dev, slave, qpn, RES_QP_RESERVED, + NULL, 0); + if (err) + return err; + + if (!fw_reserved(dev, qpn)) + __mlx4_qp_free_icm(dev, qpn); + + res_end_move(dev, slave, RES_QP, qpn); + + if (valid_reserved(dev, slave, qpn)) + err = rem_res_range(dev, slave, qpn, 1, RES_QP, 0); + break; + default: + err = -EINVAL; + break; + } + return err; +} + +static int mtt_free_res(struct mlx4_dev *dev, int slave, int op, int cmd, + u64 in_param, u64 *out_param) +{ + int err = -EINVAL; + int base; + int order; + + if (op != RES_OP_RESERVE_AND_MAP) + return err; + + base = get_param_l(&in_param); + order = get_param_h(&in_param); + err = rem_res_range(dev, slave, base, 1, RES_MTT, order); + if (!err) { + mlx4_release_resource(dev, slave, RES_MTT, 1 << order, 0); + __mlx4_free_mtt_range(dev, base, order); + } + return err; +} + +static int mpt_free_res(struct mlx4_dev *dev, int slave, int op, int cmd, + u64 in_param) +{ + int err = -EINVAL; + int index; + int id; + struct res_mpt *mpt; + + switch (op) { + case RES_OP_RESERVE: + index = get_param_l(&in_param); + id = index & mpt_mask(dev); + err = get_res(dev, slave, id, RES_MPT, &mpt); + if (err) + break; + index = mpt->key; + put_res(dev, slave, id, RES_MPT); + + err = rem_res_range(dev, slave, id, 1, RES_MPT, 0); + if (err) + break; + mlx4_release_resource(dev, slave, RES_MPT, 1, 0); + __mlx4_mpt_release(dev, index); + break; + case RES_OP_MAP_ICM: + index = get_param_l(&in_param); + id = index & mpt_mask(dev); + err = mr_res_start_move_to(dev, slave, id, + RES_MPT_RESERVED, &mpt); + if (err) + return err; + + __mlx4_mpt_free_icm(dev, mpt->key); + res_end_move(dev, slave, RES_MPT, id); + break; + default: + err = -EINVAL; + break; + } + return err; +} + +static int cq_free_res(struct mlx4_dev *dev, int slave, int op, int cmd, + u64 in_param, u64 *out_param) +{ + int cqn; + int err; + + switch (op) { + case RES_OP_RESERVE_AND_MAP: + cqn = get_param_l(&in_param); + err = rem_res_range(dev, slave, cqn, 1, RES_CQ, 0); + if (err) + break; + + mlx4_release_resource(dev, slave, RES_CQ, 1, 0); + __mlx4_cq_free_icm(dev, cqn); + break; + + default: + err = -EINVAL; + break; + } + + return err; +} + +static int srq_free_res(struct mlx4_dev *dev, int slave, int op, int cmd, + u64 in_param, u64 *out_param) +{ + int srqn; + int err; + + switch (op) { + case RES_OP_RESERVE_AND_MAP: + srqn = get_param_l(&in_param); + err = rem_res_range(dev, slave, srqn, 1, RES_SRQ, 0); + if (err) + break; + + mlx4_release_resource(dev, slave, RES_SRQ, 1, 0); + __mlx4_srq_free_icm(dev, srqn); + break; + + default: + err = -EINVAL; + break; + } + + return err; +} + +static int mac_free_res(struct mlx4_dev *dev, int slave, int op, int cmd, + u64 in_param, u64 *out_param, int in_port) +{ + int port; + int err = 0; + + switch (op) { + case RES_OP_RESERVE_AND_MAP: + port = !in_port ? get_param_l(out_param) : in_port; + port = mlx4_slave_convert_port( + dev, slave, port); + + if (port < 0) + return -EINVAL; + mac_del_from_slave(dev, slave, in_param, port); + __mlx4_unregister_mac(dev, port, in_param); + break; + default: + err = -EINVAL; + break; + } + + return err; + +} + +static int vlan_free_res(struct mlx4_dev *dev, int slave, int op, int cmd, + u64 in_param, u64 *out_param, int port) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_slave_state *slave_state = priv->mfunc.master.slave_state; + int err = 0; + + port = mlx4_slave_convert_port( + dev, slave, port); + + if (port < 0) + return -EINVAL; + switch (op) { + case RES_OP_RESERVE_AND_MAP: + if (slave_state[slave].old_vlan_api) + return 0; + if (!port) + return -EINVAL; + vlan_del_from_slave(dev, slave, in_param, port); + __mlx4_unregister_vlan(dev, port, in_param); + break; + default: + err = -EINVAL; + break; + } + + return err; +} + +static int counter_free_res(struct mlx4_dev *dev, int slave, int op, int cmd, + u64 in_param, u64 *out_param) +{ + int index; + int err; + + if (op != RES_OP_RESERVE) + return -EINVAL; + + index = get_param_l(&in_param); + if (index == MLX4_SINK_COUNTER_INDEX(dev)) + return 0; + + err = rem_res_range(dev, slave, index, 1, RES_COUNTER, 0); + if (err) + return err; + + __mlx4_counter_free(dev, index); + mlx4_release_resource(dev, slave, RES_COUNTER, 1, 0); + + return err; +} + +static int xrcdn_free_res(struct mlx4_dev *dev, int slave, int op, int cmd, + u64 in_param, u64 *out_param) +{ + int xrcdn; + int err; + + if (op != RES_OP_RESERVE) + return -EINVAL; + + xrcdn = get_param_l(&in_param); + err = rem_res_range(dev, slave, xrcdn, 1, RES_XRCD, 0); + if (err) + return err; + + __mlx4_xrcd_free(dev, xrcdn); + + return err; +} + +int mlx4_FREE_RES_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd) +{ + int err = -EINVAL; + int alop = vhcr->op_modifier; + + switch (vhcr->in_modifier & 0xFF) { + case RES_QP: + err = qp_free_res(dev, slave, vhcr->op_modifier, alop, + vhcr->in_param); + break; + + case RES_MTT: + err = mtt_free_res(dev, slave, vhcr->op_modifier, alop, + vhcr->in_param, &vhcr->out_param); + break; + + case RES_MPT: + err = mpt_free_res(dev, slave, vhcr->op_modifier, alop, + vhcr->in_param); + break; + + case RES_CQ: + err = cq_free_res(dev, slave, vhcr->op_modifier, alop, + vhcr->in_param, &vhcr->out_param); + break; + + case RES_SRQ: + err = srq_free_res(dev, slave, vhcr->op_modifier, alop, + vhcr->in_param, &vhcr->out_param); + break; + + case RES_MAC: + err = mac_free_res(dev, slave, vhcr->op_modifier, alop, + vhcr->in_param, &vhcr->out_param, + (vhcr->in_modifier >> 8) & 0xFF); + break; + + case RES_VLAN: + err = vlan_free_res(dev, slave, vhcr->op_modifier, alop, + vhcr->in_param, &vhcr->out_param, + (vhcr->in_modifier >> 8) & 0xFF); + break; + + case RES_COUNTER: + err = counter_free_res(dev, slave, vhcr->op_modifier, alop, + vhcr->in_param, &vhcr->out_param); + break; + + case RES_XRCD: + err = xrcdn_free_res(dev, slave, vhcr->op_modifier, alop, + vhcr->in_param, &vhcr->out_param); + break; + + default: + break; + } + return err; +} + +/* ugly but other choices are uglier */ +static int mr_phys_mpt(struct mlx4_mpt_entry *mpt) +{ + return (be32_to_cpu(mpt->flags) >> 9) & 1; +} + +static int mr_get_mtt_addr(struct mlx4_mpt_entry *mpt) +{ + return (int)be64_to_cpu(mpt->mtt_addr) & 0xfffffff8; +} + +static int mr_get_mtt_size(struct mlx4_mpt_entry *mpt) +{ + return be32_to_cpu(mpt->mtt_sz); +} + +static u32 mr_get_pd(struct mlx4_mpt_entry *mpt) +{ + return be32_to_cpu(mpt->pd_flags) & 0x00ffffff; +} + +static int mr_is_fmr(struct mlx4_mpt_entry *mpt) +{ + return be32_to_cpu(mpt->pd_flags) & MLX4_MPT_PD_FLAG_FAST_REG; +} + +static int mr_is_bind_enabled(struct mlx4_mpt_entry *mpt) +{ + return be32_to_cpu(mpt->flags) & MLX4_MPT_FLAG_BIND_ENABLE; +} + +static int mr_is_region(struct mlx4_mpt_entry *mpt) +{ + return be32_to_cpu(mpt->flags) & MLX4_MPT_FLAG_REGION; +} + +static int qp_get_mtt_addr(struct mlx4_qp_context *qpc) +{ + return be32_to_cpu(qpc->mtt_base_addr_l) & 0xfffffff8; +} + +static int srq_get_mtt_addr(struct mlx4_srq_context *srqc) +{ + return be32_to_cpu(srqc->mtt_base_addr_l) & 0xfffffff8; +} + +static int qp_get_mtt_size(struct mlx4_qp_context *qpc) +{ + int page_shift = (qpc->log_page_size & 0x3f) + 12; + int log_sq_size = (qpc->sq_size_stride >> 3) & 0xf; + int log_sq_sride = qpc->sq_size_stride & 7; + int log_rq_size = (qpc->rq_size_stride >> 3) & 0xf; + int log_rq_stride = qpc->rq_size_stride & 7; + int srq = (be32_to_cpu(qpc->srqn) >> 24) & 1; + int rss = (be32_to_cpu(qpc->flags) >> 13) & 1; + u32 ts = (be32_to_cpu(qpc->flags) >> 16) & 0xff; + int xrc = (ts == MLX4_QP_ST_XRC) ? 1 : 0; + int sq_size; + int rq_size; + int total_pages; + int total_mem; + int page_offset = (be32_to_cpu(qpc->params2) >> 6) & 0x3f; + int tot; + + sq_size = 1 << (log_sq_size + log_sq_sride + 4); + rq_size = (srq|rss|xrc) ? 0 : (1 << (log_rq_size + log_rq_stride + 4)); + total_mem = sq_size + rq_size; + tot = (total_mem + (page_offset << 6)) >> page_shift; + total_pages = !tot ? 1 : roundup_pow_of_two(tot); + + return total_pages; +} + +static int check_mtt_range(struct mlx4_dev *dev, int slave, int start, + int size, struct res_mtt *mtt) +{ + int res_start = mtt->com.res_id; + int res_size = (1 << mtt->order); + + if (start < res_start || start + size > res_start + res_size) + return -EPERM; + return 0; +} + +int mlx4_SW2HW_MPT_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd) +{ + int err; + int index = vhcr->in_modifier; + struct res_mtt *mtt; + struct res_mpt *mpt = NULL; + int mtt_base = mr_get_mtt_addr(inbox->buf) / dev->caps.mtt_entry_sz; + int phys; + int id; + u32 pd; + int pd_slave; + + id = index & mpt_mask(dev); + err = mr_res_start_move_to(dev, slave, id, RES_MPT_HW, &mpt); + if (err) + return err; + + /* Disable memory windows for VFs. */ + if (!mr_is_region(inbox->buf)) { + err = -EPERM; + goto ex_abort; + } + + /* Make sure that the PD bits related to the slave id are zeros. */ + pd = mr_get_pd(inbox->buf); + pd_slave = (pd >> 17) & 0x7f; + if (pd_slave != 0 && --pd_slave != slave) { + err = -EPERM; + goto ex_abort; + } + + if (mr_is_fmr(inbox->buf)) { + /* FMR and Bind Enable are forbidden in slave devices. */ + if (mr_is_bind_enabled(inbox->buf)) { + err = -EPERM; + goto ex_abort; + } + /* FMR and Memory Windows are also forbidden. */ + if (!mr_is_region(inbox->buf)) { + err = -EPERM; + goto ex_abort; + } + } + + phys = mr_phys_mpt(inbox->buf); + if (!phys) { + err = get_res(dev, slave, mtt_base, RES_MTT, &mtt); + if (err) + goto ex_abort; + + err = check_mtt_range(dev, slave, mtt_base, + mr_get_mtt_size(inbox->buf), mtt); + if (err) + goto ex_put; + + mpt->mtt = mtt; + } + + err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd); + if (err) + goto ex_put; + + if (!phys) { + atomic_inc(&mtt->ref_count); + put_res(dev, slave, mtt->com.res_id, RES_MTT); + } + + res_end_move(dev, slave, RES_MPT, id); + return 0; + +ex_put: + if (!phys) + put_res(dev, slave, mtt->com.res_id, RES_MTT); +ex_abort: + res_abort_move(dev, slave, RES_MPT, id); + + return err; +} + +int mlx4_HW2SW_MPT_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd) +{ + int err; + int index = vhcr->in_modifier; + struct res_mpt *mpt; + int id; + + id = index & mpt_mask(dev); + err = mr_res_start_move_to(dev, slave, id, RES_MPT_MAPPED, &mpt); + if (err) + return err; + + err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd); + if (err) + goto ex_abort; + + if (mpt->mtt) + atomic_dec(&mpt->mtt->ref_count); + + res_end_move(dev, slave, RES_MPT, id); + return 0; + +ex_abort: + res_abort_move(dev, slave, RES_MPT, id); + + return err; +} + +int mlx4_QUERY_MPT_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd) +{ + int err; + int index = vhcr->in_modifier; + struct res_mpt *mpt; + int id; + + id = index & mpt_mask(dev); + err = get_res(dev, slave, id, RES_MPT, &mpt); + if (err) + return err; + + if (mpt->com.from_state == RES_MPT_MAPPED) { + /* In order to allow rereg in SRIOV, we need to alter the MPT entry. To do + * that, the VF must read the MPT. But since the MPT entry memory is not + * in the VF's virtual memory space, it must use QUERY_MPT to obtain the + * entry contents. To guarantee that the MPT cannot be changed, the driver + * must perform HW2SW_MPT before this query and return the MPT entry to HW + * ownership fofollowing the change. The change here allows the VF to + * perform QUERY_MPT also when the entry is in SW ownership. + */ + struct mlx4_mpt_entry *mpt_entry = mlx4_table_find( + &mlx4_priv(dev)->mr_table.dmpt_table, + mpt->key, NULL); + + if (NULL == mpt_entry || NULL == outbox->buf) { + err = -EINVAL; + goto out; + } + + memcpy(outbox->buf, mpt_entry, sizeof(*mpt_entry)); + + err = 0; + } else if (mpt->com.from_state == RES_MPT_HW) { + err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd); + } else { + err = -EBUSY; + goto out; + } + + +out: + put_res(dev, slave, id, RES_MPT); + return err; +} + +static int qp_get_rcqn(struct mlx4_qp_context *qpc) +{ + return be32_to_cpu(qpc->cqn_recv) & 0xffffff; +} + +static int qp_get_scqn(struct mlx4_qp_context *qpc) +{ + return be32_to_cpu(qpc->cqn_send) & 0xffffff; +} + +static u32 qp_get_srqn(struct mlx4_qp_context *qpc) +{ + return be32_to_cpu(qpc->srqn) & 0x1ffffff; +} + +static void adjust_proxy_tun_qkey(struct mlx4_dev *dev, struct mlx4_vhcr *vhcr, + struct mlx4_qp_context *context) +{ + u32 qpn = vhcr->in_modifier & 0xffffff; + u32 qkey = 0; + + if (mlx4_get_parav_qkey(dev, qpn, &qkey)) + return; + + /* adjust qkey in qp context */ + context->qkey = cpu_to_be32(qkey); +} + +static int adjust_qp_sched_queue(struct mlx4_dev *dev, int slave, + struct mlx4_qp_context *qpc, + struct mlx4_cmd_mailbox *inbox); + +int mlx4_RST2INIT_QP_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd) +{ + int err; + int qpn = vhcr->in_modifier & 0x7fffff; + struct res_mtt *mtt; + struct res_qp *qp; + struct mlx4_qp_context *qpc = inbox->buf + 8; + int mtt_base = qp_get_mtt_addr(qpc) / dev->caps.mtt_entry_sz; + int mtt_size = qp_get_mtt_size(qpc); + struct res_cq *rcq; + struct res_cq *scq; + int rcqn = qp_get_rcqn(qpc); + int scqn = qp_get_scqn(qpc); + u32 srqn = qp_get_srqn(qpc) & 0xffffff; + int use_srq = (qp_get_srqn(qpc) >> 24) & 1; + struct res_srq *srq; + int local_qpn = vhcr->in_modifier & 0xffffff; + + err = adjust_qp_sched_queue(dev, slave, qpc, inbox); + if (err) + return err; + + err = qp_res_start_move_to(dev, slave, qpn, RES_QP_HW, &qp, 0); + if (err) + return err; + qp->local_qpn = local_qpn; + qp->sched_queue = 0; + qp->param3 = 0; + qp->vlan_control = 0; + qp->fvl_rx = 0; + qp->pri_path_fl = 0; + qp->vlan_index = 0; + qp->feup = 0; + qp->qpc_flags = be32_to_cpu(qpc->flags); + + err = get_res(dev, slave, mtt_base, RES_MTT, &mtt); + if (err) + goto ex_abort; + + err = check_mtt_range(dev, slave, mtt_base, mtt_size, mtt); + if (err) + goto ex_put_mtt; + + err = get_res(dev, slave, rcqn, RES_CQ, &rcq); + if (err) + goto ex_put_mtt; + + if (scqn != rcqn) { + err = get_res(dev, slave, scqn, RES_CQ, &scq); + if (err) + goto ex_put_rcq; + } else + scq = rcq; + + if (use_srq) { + err = get_res(dev, slave, srqn, RES_SRQ, &srq); + if (err) + goto ex_put_scq; + } + + adjust_proxy_tun_qkey(dev, vhcr, qpc); + update_pkey_index(dev, slave, inbox); + err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd); + if (err) + goto ex_put_srq; + atomic_inc(&mtt->ref_count); + qp->mtt = mtt; + atomic_inc(&rcq->ref_count); + qp->rcq = rcq; + atomic_inc(&scq->ref_count); + qp->scq = scq; + + if (scqn != rcqn) + put_res(dev, slave, scqn, RES_CQ); + + if (use_srq) { + atomic_inc(&srq->ref_count); + put_res(dev, slave, srqn, RES_SRQ); + qp->srq = srq; + } + + /* Save param3 for dynamic changes from VST back to VGT */ + qp->param3 = qpc->param3; + put_res(dev, slave, rcqn, RES_CQ); + put_res(dev, slave, mtt_base, RES_MTT); + res_end_move(dev, slave, RES_QP, qpn); + + return 0; + +ex_put_srq: + if (use_srq) + put_res(dev, slave, srqn, RES_SRQ); +ex_put_scq: + if (scqn != rcqn) + put_res(dev, slave, scqn, RES_CQ); +ex_put_rcq: + put_res(dev, slave, rcqn, RES_CQ); +ex_put_mtt: + put_res(dev, slave, mtt_base, RES_MTT); +ex_abort: + res_abort_move(dev, slave, RES_QP, qpn); + + return err; +} + +static int eq_get_mtt_addr(struct mlx4_eq_context *eqc) +{ + return be32_to_cpu(eqc->mtt_base_addr_l) & 0xfffffff8; +} + +static int eq_get_mtt_size(struct mlx4_eq_context *eqc) +{ + int log_eq_size = eqc->log_eq_size & 0x1f; + int page_shift = (eqc->log_page_size & 0x3f) + 12; + + if (log_eq_size + 5 < page_shift) + return 1; + + return 1 << (log_eq_size + 5 - page_shift); +} + +static int cq_get_mtt_addr(struct mlx4_cq_context *cqc) +{ + return be32_to_cpu(cqc->mtt_base_addr_l) & 0xfffffff8; +} + +static int cq_get_mtt_size(struct mlx4_cq_context *cqc) +{ + int log_cq_size = (be32_to_cpu(cqc->logsize_usrpage) >> 24) & 0x1f; + int page_shift = (cqc->log_page_size & 0x3f) + 12; + + if (log_cq_size + 5 < page_shift) + return 1; + + return 1 << (log_cq_size + 5 - page_shift); +} + +int mlx4_SW2HW_EQ_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd) +{ + int err; + int eqn = vhcr->in_modifier; + int res_id = (slave << 10) | eqn; + struct mlx4_eq_context *eqc = inbox->buf; + int mtt_base = eq_get_mtt_addr(eqc) / dev->caps.mtt_entry_sz; + int mtt_size = eq_get_mtt_size(eqc); + struct res_eq *eq; + struct res_mtt *mtt; + + err = add_res_range(dev, slave, res_id, 1, RES_EQ, 0); + if (err) + return err; + err = eq_res_start_move_to(dev, slave, res_id, RES_EQ_HW, &eq); + if (err) + goto out_add; + + err = get_res(dev, slave, mtt_base, RES_MTT, &mtt); + if (err) + goto out_move; + + err = check_mtt_range(dev, slave, mtt_base, mtt_size, mtt); + if (err) + goto out_put; + + err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd); + if (err) + goto out_put; + + atomic_inc(&mtt->ref_count); + eq->mtt = mtt; + put_res(dev, slave, mtt->com.res_id, RES_MTT); + res_end_move(dev, slave, RES_EQ, res_id); + return 0; + +out_put: + put_res(dev, slave, mtt->com.res_id, RES_MTT); +out_move: + res_abort_move(dev, slave, RES_EQ, res_id); +out_add: + rem_res_range(dev, slave, res_id, 1, RES_EQ, 0); + return err; +} + +int mlx4_CONFIG_DEV_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd) +{ + int err; + u8 get = vhcr->op_modifier; + + if (get != 1) + return -EPERM; + + err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd); + + return err; +} + +static int get_containing_mtt(struct mlx4_dev *dev, int slave, int start, + int len, struct res_mtt **res) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker; + struct res_mtt *mtt; + int err = -EINVAL; + + spin_lock_irq(mlx4_tlock(dev)); + list_for_each_entry(mtt, &tracker->slave_list[slave].res_list[RES_MTT], + com.list) { + if (!check_mtt_range(dev, slave, start, len, mtt)) { + *res = mtt; + mtt->com.from_state = mtt->com.state; + mtt->com.state = RES_MTT_BUSY; + err = 0; + break; + } + } + spin_unlock_irq(mlx4_tlock(dev)); + + return err; +} + +static int verify_qp_parameters(struct mlx4_dev *dev, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + enum qp_transition transition, u8 slave) +{ + u32 qp_type; + u32 qpn; + struct mlx4_qp_context *qp_ctx; + enum mlx4_qp_optpar optpar; + int port; + int num_gids; + + qp_ctx = inbox->buf + 8; + qp_type = (be32_to_cpu(qp_ctx->flags) >> 16) & 0xff; + optpar = be32_to_cpu(*(__be32 *) inbox->buf); + + if (slave != mlx4_master_func_num(dev)) { + qp_ctx->params2 &= ~cpu_to_be32(MLX4_QP_BIT_FPP); + /* setting QP rate-limit is disallowed for VFs */ + if (qp_ctx->rate_limit_params) + return -EPERM; + } + + switch (qp_type) { + case MLX4_QP_ST_RC: + case MLX4_QP_ST_XRC: + case MLX4_QP_ST_UC: + switch (transition) { + case QP_TRANS_INIT2RTR: + case QP_TRANS_RTR2RTS: + case QP_TRANS_RTS2RTS: + case QP_TRANS_SQD2SQD: + case QP_TRANS_SQD2RTS: + if (slave != mlx4_master_func_num(dev)) { + if (optpar & MLX4_QP_OPTPAR_PRIMARY_ADDR_PATH) { + port = (qp_ctx->pri_path.sched_queue >> 6 & 1) + 1; + if (dev->caps.port_mask[port] != MLX4_PORT_TYPE_IB) + num_gids = mlx4_get_slave_num_gids(dev, slave, port); + else + num_gids = 1; + if (qp_ctx->pri_path.mgid_index >= num_gids) + return -EINVAL; + } + if (optpar & MLX4_QP_OPTPAR_ALT_ADDR_PATH) { + port = (qp_ctx->alt_path.sched_queue >> 6 & 1) + 1; + if (dev->caps.port_mask[port] != MLX4_PORT_TYPE_IB) + num_gids = mlx4_get_slave_num_gids(dev, slave, port); + else + num_gids = 1; + if (qp_ctx->alt_path.mgid_index >= num_gids) + return -EINVAL; + } + } + break; + default: + break; + } + break; + + case MLX4_QP_ST_MLX: + qpn = vhcr->in_modifier & 0x7fffff; + port = (qp_ctx->pri_path.sched_queue >> 6 & 1) + 1; + if (transition == QP_TRANS_INIT2RTR && + slave != mlx4_master_func_num(dev) && + mlx4_is_qp_reserved(dev, qpn) && + !mlx4_vf_smi_enabled(dev, slave, port)) { + /* only enabled VFs may create MLX proxy QPs */ + mlx4_err(dev, "%s: unprivileged slave %d attempting to create an MLX proxy special QP on port %d\n", + __func__, slave, port); + return -EPERM; + } + break; + + default: + break; + } + + return 0; +} + +int mlx4_WRITE_MTT_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd) +{ + struct mlx4_mtt mtt; + __be64 *page_list = inbox->buf; + u64 *pg_list = (u64 *)page_list; + int i; + struct res_mtt *rmtt = NULL; + int start = be64_to_cpu(page_list[0]); + int npages = vhcr->in_modifier; + int err; + + err = get_containing_mtt(dev, slave, start, npages, &rmtt); + if (err) + return err; + + /* Call the SW implementation of write_mtt: + * - Prepare a dummy mtt struct + * - Translate inbox contents to simple addresses in host endianness */ + mtt.offset = 0; /* TBD this is broken but I don't handle it since + we don't really use it */ + mtt.order = 0; + mtt.page_shift = 0; + for (i = 0; i < npages; ++i) + pg_list[i + 2] = (be64_to_cpu(page_list[i + 2]) & ~1ULL); + + err = __mlx4_write_mtt(dev, &mtt, be64_to_cpu(page_list[0]), npages, + ((u64 *)page_list + 2)); + + if (rmtt) + put_res(dev, slave, rmtt->com.res_id, RES_MTT); + + return err; +} + +int mlx4_HW2SW_EQ_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd) +{ + int eqn = vhcr->in_modifier; + int res_id = eqn | (slave << 10); + struct res_eq *eq; + int err; + + err = eq_res_start_move_to(dev, slave, res_id, RES_EQ_RESERVED, &eq); + if (err) + return err; + + err = get_res(dev, slave, eq->mtt->com.res_id, RES_MTT, NULL); + if (err) + goto ex_abort; + + err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd); + if (err) + goto ex_put; + + atomic_dec(&eq->mtt->ref_count); + put_res(dev, slave, eq->mtt->com.res_id, RES_MTT); + res_end_move(dev, slave, RES_EQ, res_id); + rem_res_range(dev, slave, res_id, 1, RES_EQ, 0); + + return 0; + +ex_put: + put_res(dev, slave, eq->mtt->com.res_id, RES_MTT); +ex_abort: + res_abort_move(dev, slave, RES_EQ, res_id); + + return err; +} + +int mlx4_GEN_EQE(struct mlx4_dev *dev, int slave, struct mlx4_eqe *eqe) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_slave_event_eq_info *event_eq; + struct mlx4_cmd_mailbox *mailbox; + u32 in_modifier = 0; + int err; + int res_id; + struct res_eq *req; + + if (!priv->mfunc.master.slave_state) + return -EINVAL; + + /* check for slave valid, slave not PF, and slave active */ + if (slave < 0 || slave > dev->persist->num_vfs || + slave == dev->caps.function || + !priv->mfunc.master.slave_state[slave].active) + return 0; + + event_eq = &priv->mfunc.master.slave_state[slave].event_eq[eqe->type]; + + /* Create the event only if the slave is registered */ + if (event_eq->eqn < 0) + return 0; + + mutex_lock(&priv->mfunc.master.gen_eqe_mutex[slave]); + res_id = (slave << 10) | event_eq->eqn; + err = get_res(dev, slave, res_id, RES_EQ, &req); + if (err) + goto unlock; + + if (req->com.from_state != RES_EQ_HW) { + err = -EINVAL; + goto put; + } + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) { + err = PTR_ERR(mailbox); + goto put; + } + + if (eqe->type == MLX4_EVENT_TYPE_CMD) { + ++event_eq->token; + eqe->event.cmd.token = cpu_to_be16(event_eq->token); + } + + memcpy(mailbox->buf, (u8 *) eqe, 28); + + in_modifier = (slave & 0xff) | ((event_eq->eqn & 0x3ff) << 16); + + err = mlx4_cmd(dev, mailbox->dma, in_modifier, 0, + MLX4_CMD_GEN_EQE, MLX4_CMD_TIME_CLASS_B, + MLX4_CMD_NATIVE); + + put_res(dev, slave, res_id, RES_EQ); + mutex_unlock(&priv->mfunc.master.gen_eqe_mutex[slave]); + mlx4_free_cmd_mailbox(dev, mailbox); + return err; + +put: + put_res(dev, slave, res_id, RES_EQ); + +unlock: + mutex_unlock(&priv->mfunc.master.gen_eqe_mutex[slave]); + return err; +} + +int mlx4_QUERY_EQ_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd) +{ + int eqn = vhcr->in_modifier; + int res_id = eqn | (slave << 10); + struct res_eq *eq; + int err; + + err = get_res(dev, slave, res_id, RES_EQ, &eq); + if (err) + return err; + + if (eq->com.from_state != RES_EQ_HW) { + err = -EINVAL; + goto ex_put; + } + + err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd); + +ex_put: + put_res(dev, slave, res_id, RES_EQ); + return err; +} + +int mlx4_SW2HW_CQ_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd) +{ + int err; + int cqn = vhcr->in_modifier; + struct mlx4_cq_context *cqc = inbox->buf; + int mtt_base = cq_get_mtt_addr(cqc) / dev->caps.mtt_entry_sz; + struct res_cq *cq = NULL; + struct res_mtt *mtt; + + err = cq_res_start_move_to(dev, slave, cqn, RES_CQ_HW, &cq); + if (err) + return err; + err = get_res(dev, slave, mtt_base, RES_MTT, &mtt); + if (err) + goto out_move; + err = check_mtt_range(dev, slave, mtt_base, cq_get_mtt_size(cqc), mtt); + if (err) + goto out_put; + err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd); + if (err) + goto out_put; + atomic_inc(&mtt->ref_count); + cq->mtt = mtt; + put_res(dev, slave, mtt->com.res_id, RES_MTT); + res_end_move(dev, slave, RES_CQ, cqn); + return 0; + +out_put: + put_res(dev, slave, mtt->com.res_id, RES_MTT); +out_move: + res_abort_move(dev, slave, RES_CQ, cqn); + return err; +} + +int mlx4_HW2SW_CQ_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd) +{ + int err; + int cqn = vhcr->in_modifier; + struct res_cq *cq = NULL; + + err = cq_res_start_move_to(dev, slave, cqn, RES_CQ_ALLOCATED, &cq); + if (err) + return err; + err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd); + if (err) + goto out_move; + atomic_dec(&cq->mtt->ref_count); + res_end_move(dev, slave, RES_CQ, cqn); + return 0; + +out_move: + res_abort_move(dev, slave, RES_CQ, cqn); + return err; +} + +int mlx4_QUERY_CQ_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd) +{ + int cqn = vhcr->in_modifier; + struct res_cq *cq; + int err; + + err = get_res(dev, slave, cqn, RES_CQ, &cq); + if (err) + return err; + + if (cq->com.from_state != RES_CQ_HW) + goto ex_put; + + err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd); +ex_put: + put_res(dev, slave, cqn, RES_CQ); + + return err; +} + +static int handle_resize(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd, + struct res_cq *cq) +{ + int err; + struct res_mtt *orig_mtt; + struct res_mtt *mtt; + struct mlx4_cq_context *cqc = inbox->buf; + int mtt_base = cq_get_mtt_addr(cqc) / dev->caps.mtt_entry_sz; + + err = get_res(dev, slave, cq->mtt->com.res_id, RES_MTT, &orig_mtt); + if (err) + return err; + + if (orig_mtt != cq->mtt) { + err = -EINVAL; + goto ex_put; + } + + err = get_res(dev, slave, mtt_base, RES_MTT, &mtt); + if (err) + goto ex_put; + + err = check_mtt_range(dev, slave, mtt_base, cq_get_mtt_size(cqc), mtt); + if (err) + goto ex_put1; + err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd); + if (err) + goto ex_put1; + atomic_dec(&orig_mtt->ref_count); + put_res(dev, slave, orig_mtt->com.res_id, RES_MTT); + atomic_inc(&mtt->ref_count); + cq->mtt = mtt; + put_res(dev, slave, mtt->com.res_id, RES_MTT); + return 0; + +ex_put1: + put_res(dev, slave, mtt->com.res_id, RES_MTT); +ex_put: + put_res(dev, slave, orig_mtt->com.res_id, RES_MTT); + + return err; + +} + +int mlx4_MODIFY_CQ_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd) +{ + int cqn = vhcr->in_modifier; + struct res_cq *cq; + int err; + + err = get_res(dev, slave, cqn, RES_CQ, &cq); + if (err) + return err; + + if (cq->com.from_state != RES_CQ_HW) + goto ex_put; + + if (vhcr->op_modifier == 0) { + err = handle_resize(dev, slave, vhcr, inbox, outbox, cmd, cq); + goto ex_put; + } + + err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd); +ex_put: + put_res(dev, slave, cqn, RES_CQ); + + return err; +} + +static int srq_get_mtt_size(struct mlx4_srq_context *srqc) +{ + int log_srq_size = (be32_to_cpu(srqc->state_logsize_srqn) >> 24) & 0xf; + int log_rq_stride = srqc->logstride & 7; + int page_shift = (srqc->log_page_size & 0x3f) + 12; + + if (log_srq_size + log_rq_stride + 4 < page_shift) + return 1; + + return 1 << (log_srq_size + log_rq_stride + 4 - page_shift); +} + +int mlx4_SW2HW_SRQ_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd) +{ + int err; + int srqn = vhcr->in_modifier; + struct res_mtt *mtt; + struct res_srq *srq = NULL; + struct mlx4_srq_context *srqc = inbox->buf; + int mtt_base = srq_get_mtt_addr(srqc) / dev->caps.mtt_entry_sz; + + if (srqn != (be32_to_cpu(srqc->state_logsize_srqn) & 0xffffff)) + return -EINVAL; + + err = srq_res_start_move_to(dev, slave, srqn, RES_SRQ_HW, &srq); + if (err) + return err; + err = get_res(dev, slave, mtt_base, RES_MTT, &mtt); + if (err) + goto ex_abort; + err = check_mtt_range(dev, slave, mtt_base, srq_get_mtt_size(srqc), + mtt); + if (err) + goto ex_put_mtt; + + err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd); + if (err) + goto ex_put_mtt; + + atomic_inc(&mtt->ref_count); + srq->mtt = mtt; + put_res(dev, slave, mtt->com.res_id, RES_MTT); + res_end_move(dev, slave, RES_SRQ, srqn); + return 0; + +ex_put_mtt: + put_res(dev, slave, mtt->com.res_id, RES_MTT); +ex_abort: + res_abort_move(dev, slave, RES_SRQ, srqn); + + return err; +} + +int mlx4_HW2SW_SRQ_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd) +{ + int err; + int srqn = vhcr->in_modifier; + struct res_srq *srq = NULL; + + err = srq_res_start_move_to(dev, slave, srqn, RES_SRQ_ALLOCATED, &srq); + if (err) + return err; + err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd); + if (err) + goto ex_abort; + atomic_dec(&srq->mtt->ref_count); + if (srq->cq) + atomic_dec(&srq->cq->ref_count); + res_end_move(dev, slave, RES_SRQ, srqn); + + return 0; + +ex_abort: + res_abort_move(dev, slave, RES_SRQ, srqn); + + return err; +} + +int mlx4_QUERY_SRQ_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd) +{ + int err; + int srqn = vhcr->in_modifier; + struct res_srq *srq; + + err = get_res(dev, slave, srqn, RES_SRQ, &srq); + if (err) + return err; + if (srq->com.from_state != RES_SRQ_HW) { + err = -EBUSY; + goto out; + } + err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd); +out: + put_res(dev, slave, srqn, RES_SRQ); + return err; +} + +int mlx4_ARM_SRQ_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd) +{ + int err; + int srqn = vhcr->in_modifier; + struct res_srq *srq; + + err = get_res(dev, slave, srqn, RES_SRQ, &srq); + if (err) + return err; + + if (srq->com.from_state != RES_SRQ_HW) { + err = -EBUSY; + goto out; + } + + err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd); +out: + put_res(dev, slave, srqn, RES_SRQ); + return err; +} + +int mlx4_GEN_QP_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd) +{ + int err; + int qpn = vhcr->in_modifier & 0x7fffff; + struct res_qp *qp; + + err = get_res(dev, slave, qpn, RES_QP, &qp); + if (err) + return err; + if (qp->com.from_state != RES_QP_HW) { + err = -EBUSY; + goto out; + } + + err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd); +out: + put_res(dev, slave, qpn, RES_QP); + return err; +} + +int mlx4_INIT2INIT_QP_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd) +{ + struct mlx4_qp_context *context = inbox->buf + 8; + adjust_proxy_tun_qkey(dev, vhcr, context); + update_pkey_index(dev, slave, inbox); + return mlx4_GEN_QP_wrapper(dev, slave, vhcr, inbox, outbox, cmd); +} + +static int adjust_qp_sched_queue(struct mlx4_dev *dev, int slave, + struct mlx4_qp_context *qpc, + struct mlx4_cmd_mailbox *inbox) +{ + enum mlx4_qp_optpar optpar = be32_to_cpu(*(__be32 *)inbox->buf); + u8 pri_sched_queue; + int port = mlx4_slave_convert_port( + dev, slave, (qpc->pri_path.sched_queue >> 6 & 1) + 1) - 1; + + if (port < 0) + return -EINVAL; + + pri_sched_queue = (qpc->pri_path.sched_queue & ~(1 << 6)) | + ((port & 1) << 6); + + if (optpar & (MLX4_QP_OPTPAR_PRIMARY_ADDR_PATH | MLX4_QP_OPTPAR_SCHED_QUEUE) || + qpc->pri_path.sched_queue || mlx4_is_eth(dev, port + 1)) { + qpc->pri_path.sched_queue = pri_sched_queue; + } + + if (optpar & MLX4_QP_OPTPAR_ALT_ADDR_PATH) { + port = mlx4_slave_convert_port( + dev, slave, (qpc->alt_path.sched_queue >> 6 & 1) + + 1) - 1; + if (port < 0) + return -EINVAL; + qpc->alt_path.sched_queue = + (qpc->alt_path.sched_queue & ~(1 << 6)) | + (port & 1) << 6; + } + return 0; +} + +static int roce_verify_mac(struct mlx4_dev *dev, int slave, + struct mlx4_qp_context *qpc, + struct mlx4_cmd_mailbox *inbox) +{ + u64 mac; + int port; + u32 ts = (be32_to_cpu(qpc->flags) >> 16) & 0xff; + u8 sched = *(u8 *)(inbox->buf + 64); + u8 smac_ix; + + port = (sched >> 6 & 1) + 1; + if (mlx4_is_eth(dev, port) && (ts != MLX4_QP_ST_MLX)) { + smac_ix = qpc->pri_path.grh_mylmc & 0x7f; + if (mac_find_smac_ix_in_slave(dev, slave, port, smac_ix, &mac)) + return -ENOENT; + } + return 0; +} + +int mlx4_INIT2RTR_QP_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd) +{ + int err; + struct mlx4_qp_context *qpc = inbox->buf + 8; + int qpn = vhcr->in_modifier & 0x7fffff; + struct res_qp *qp; + u8 orig_sched_queue; + u8 orig_vlan_control = qpc->pri_path.vlan_control; + u8 orig_fvl_rx = qpc->pri_path.fvl_rx; + u8 orig_pri_path_fl = qpc->pri_path.fl; + u8 orig_vlan_index = qpc->pri_path.vlan_index; + u8 orig_feup = qpc->pri_path.feup; + + err = adjust_qp_sched_queue(dev, slave, qpc, inbox); + if (err) + return err; + err = verify_qp_parameters(dev, vhcr, inbox, QP_TRANS_INIT2RTR, slave); + if (err) + return err; + + if (roce_verify_mac(dev, slave, qpc, inbox)) + return -EINVAL; + + update_pkey_index(dev, slave, inbox); + update_gid(dev, inbox, (u8)slave); + adjust_proxy_tun_qkey(dev, vhcr, qpc); + orig_sched_queue = qpc->pri_path.sched_queue; + + err = get_res(dev, slave, qpn, RES_QP, &qp); + if (err) + return err; + if (qp->com.from_state != RES_QP_HW) { + err = -EBUSY; + goto out; + } + + err = update_vport_qp_param(dev, inbox, slave, qpn); + if (err) + goto out; + + err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd); +out: + /* if no error, save sched queue value passed in by VF. This is + * essentially the QOS value provided by the VF. This will be useful + * if we allow dynamic changes from VST back to VGT + */ + if (!err) { + qp->sched_queue = orig_sched_queue; + qp->vlan_control = orig_vlan_control; + qp->fvl_rx = orig_fvl_rx; + qp->pri_path_fl = orig_pri_path_fl; + qp->vlan_index = orig_vlan_index; + qp->feup = orig_feup; + } + put_res(dev, slave, qpn, RES_QP); + return err; +} + +int mlx4_RTR2RTS_QP_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd) +{ + int err; + struct mlx4_qp_context *context = inbox->buf + 8; + + err = adjust_qp_sched_queue(dev, slave, context, inbox); + if (err) + return err; + err = verify_qp_parameters(dev, vhcr, inbox, QP_TRANS_RTR2RTS, slave); + if (err) + return err; + + update_pkey_index(dev, slave, inbox); + update_gid(dev, inbox, (u8)slave); + adjust_proxy_tun_qkey(dev, vhcr, context); + return mlx4_GEN_QP_wrapper(dev, slave, vhcr, inbox, outbox, cmd); +} + +int mlx4_RTS2RTS_QP_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd) +{ + int err; + struct mlx4_qp_context *context = inbox->buf + 8; + + err = adjust_qp_sched_queue(dev, slave, context, inbox); + if (err) + return err; + err = verify_qp_parameters(dev, vhcr, inbox, QP_TRANS_RTS2RTS, slave); + if (err) + return err; + + update_pkey_index(dev, slave, inbox); + update_gid(dev, inbox, (u8)slave); + adjust_proxy_tun_qkey(dev, vhcr, context); + return mlx4_GEN_QP_wrapper(dev, slave, vhcr, inbox, outbox, cmd); +} + + +int mlx4_SQERR2RTS_QP_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd) +{ + struct mlx4_qp_context *context = inbox->buf + 8; + int err = adjust_qp_sched_queue(dev, slave, context, inbox); + if (err) + return err; + adjust_proxy_tun_qkey(dev, vhcr, context); + return mlx4_GEN_QP_wrapper(dev, slave, vhcr, inbox, outbox, cmd); +} + +int mlx4_SQD2SQD_QP_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd) +{ + int err; + struct mlx4_qp_context *context = inbox->buf + 8; + + err = adjust_qp_sched_queue(dev, slave, context, inbox); + if (err) + return err; + err = verify_qp_parameters(dev, vhcr, inbox, QP_TRANS_SQD2SQD, slave); + if (err) + return err; + + adjust_proxy_tun_qkey(dev, vhcr, context); + update_gid(dev, inbox, (u8)slave); + update_pkey_index(dev, slave, inbox); + return mlx4_GEN_QP_wrapper(dev, slave, vhcr, inbox, outbox, cmd); +} + +int mlx4_SQD2RTS_QP_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd) +{ + int err; + struct mlx4_qp_context *context = inbox->buf + 8; + + err = adjust_qp_sched_queue(dev, slave, context, inbox); + if (err) + return err; + err = verify_qp_parameters(dev, vhcr, inbox, QP_TRANS_SQD2RTS, slave); + if (err) + return err; + + adjust_proxy_tun_qkey(dev, vhcr, context); + update_gid(dev, inbox, (u8)slave); + update_pkey_index(dev, slave, inbox); + return mlx4_GEN_QP_wrapper(dev, slave, vhcr, inbox, outbox, cmd); +} + +int mlx4_2RST_QP_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd) +{ + int err; + int qpn = vhcr->in_modifier & 0x7fffff; + struct res_qp *qp; + + err = qp_res_start_move_to(dev, slave, qpn, RES_QP_MAPPED, &qp, 0); + if (err) + return err; + err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd); + if (err) + goto ex_abort; + + atomic_dec(&qp->mtt->ref_count); + atomic_dec(&qp->rcq->ref_count); + atomic_dec(&qp->scq->ref_count); + if (qp->srq) + atomic_dec(&qp->srq->ref_count); + res_end_move(dev, slave, RES_QP, qpn); + return 0; + +ex_abort: + res_abort_move(dev, slave, RES_QP, qpn); + + return err; +} + +static struct res_gid *find_gid(struct mlx4_dev *dev, int slave, + struct res_qp *rqp, u8 *gid) +{ + struct res_gid *res; + + list_for_each_entry(res, &rqp->mcg_list, list) { + if (!memcmp(res->gid, gid, 16)) + return res; + } + return NULL; +} + +static int add_mcg_res(struct mlx4_dev *dev, int slave, struct res_qp *rqp, + u8 *gid, enum mlx4_protocol prot, + enum mlx4_steer_type steer, u64 reg_id) +{ + struct res_gid *res; + int err; + + res = kzalloc(sizeof(*res), GFP_KERNEL); + if (!res) + return -ENOMEM; + + spin_lock_irq(&rqp->mcg_spl); + if (find_gid(dev, slave, rqp, gid)) { + kfree(res); + err = -EEXIST; + } else { + memcpy(res->gid, gid, 16); + res->prot = prot; + res->steer = steer; + res->reg_id = reg_id; + list_add_tail(&res->list, &rqp->mcg_list); + err = 0; + } + spin_unlock_irq(&rqp->mcg_spl); + + return err; +} + +static int rem_mcg_res(struct mlx4_dev *dev, int slave, struct res_qp *rqp, + u8 *gid, enum mlx4_protocol prot, + enum mlx4_steer_type steer, u64 *reg_id) +{ + struct res_gid *res; + int err; + + spin_lock_irq(&rqp->mcg_spl); + res = find_gid(dev, slave, rqp, gid); + if (!res || res->prot != prot || res->steer != steer) + err = -EINVAL; + else { + *reg_id = res->reg_id; + list_del(&res->list); + kfree(res); + err = 0; + } + spin_unlock_irq(&rqp->mcg_spl); + + return err; +} + +static int qp_attach(struct mlx4_dev *dev, int slave, struct mlx4_qp *qp, + u8 gid[16], int block_loopback, enum mlx4_protocol prot, + enum mlx4_steer_type type, u64 *reg_id) +{ + switch (dev->caps.steering_mode) { + case MLX4_STEERING_MODE_DEVICE_MANAGED: { + int port = mlx4_slave_convert_port(dev, slave, gid[5]); + if (port < 0) + return port; + return mlx4_trans_to_dmfs_attach(dev, qp, gid, port, + block_loopback, prot, + reg_id); + } + case MLX4_STEERING_MODE_B0: + if (prot == MLX4_PROT_ETH) { + int port = mlx4_slave_convert_port(dev, slave, gid[5]); + if (port < 0) + return port; + gid[5] = port; + } + return mlx4_qp_attach_common(dev, qp, gid, + block_loopback, prot, type); + default: + return -EINVAL; + } +} + +static int qp_detach(struct mlx4_dev *dev, struct mlx4_qp *qp, + u8 gid[16], enum mlx4_protocol prot, + enum mlx4_steer_type type, u64 reg_id) +{ + switch (dev->caps.steering_mode) { + case MLX4_STEERING_MODE_DEVICE_MANAGED: + return mlx4_flow_detach(dev, reg_id); + case MLX4_STEERING_MODE_B0: + return mlx4_qp_detach_common(dev, qp, gid, prot, type); + default: + return -EINVAL; + } +} + +static int mlx4_adjust_port(struct mlx4_dev *dev, int slave, + u8 *gid, enum mlx4_protocol prot) +{ + int real_port; + + if (prot != MLX4_PROT_ETH) + return 0; + + if (dev->caps.steering_mode == MLX4_STEERING_MODE_B0 || + dev->caps.steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED) { + real_port = mlx4_slave_convert_port(dev, slave, gid[5]); + if (real_port < 0) + return -EINVAL; + gid[5] = real_port; + } + + return 0; +} + +int mlx4_QP_ATTACH_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd) +{ + struct mlx4_qp qp; /* dummy for calling attach/detach */ + u8 *gid = inbox->buf; + enum mlx4_protocol prot = (vhcr->in_modifier >> 28) & 0x7; + int err; + int qpn; + struct res_qp *rqp; + u64 reg_id = 0; + int attach = vhcr->op_modifier; + int block_loopback = vhcr->in_modifier >> 31; + u8 steer_type_mask = 2; + enum mlx4_steer_type type = (gid[7] & steer_type_mask) >> 1; + + qpn = vhcr->in_modifier & 0xffffff; + err = get_res(dev, slave, qpn, RES_QP, &rqp); + if (err) + return err; + + qp.qpn = qpn; + if (attach) { + err = qp_attach(dev, slave, &qp, gid, block_loopback, prot, + type, ®_id); + if (err) { + pr_err("Fail to attach rule to qp 0x%x\n", qpn); + goto ex_put; + } + err = add_mcg_res(dev, slave, rqp, gid, prot, type, reg_id); + if (err) + goto ex_detach; + } else { + err = mlx4_adjust_port(dev, slave, gid, prot); + if (err) + goto ex_put; + + err = rem_mcg_res(dev, slave, rqp, gid, prot, type, ®_id); + if (err) + goto ex_put; + + err = qp_detach(dev, &qp, gid, prot, type, reg_id); + if (err) + pr_err("Fail to detach rule from qp 0x%x reg_id = 0x%llx\n", + qpn, reg_id); + } + put_res(dev, slave, qpn, RES_QP); + return err; + +ex_detach: + qp_detach(dev, &qp, gid, prot, type, reg_id); +ex_put: + put_res(dev, slave, qpn, RES_QP); + return err; +} + +/* + * MAC validation for Flow Steering rules. + * VF can attach rules only with a mac address which is assigned to it. + */ +static int validate_eth_header_mac(int slave, struct _rule_hw *eth_header, + struct list_head *rlist) +{ + struct mac_res *res, *tmp; + __be64 be_mac; + + /* make sure it isn't multicast or broadcast mac*/ + if (!is_multicast_ether_addr(eth_header->eth.dst_mac) && + !is_broadcast_ether_addr(eth_header->eth.dst_mac)) { + list_for_each_entry_safe(res, tmp, rlist, list) { + be_mac = cpu_to_be64(res->mac << 16); + if (ether_addr_equal((u8 *)&be_mac, eth_header->eth.dst_mac)) + return 0; + } + pr_err("MAC %pM doesn't belong to VF %d, Steering rule rejected\n", + eth_header->eth.dst_mac, slave); + return -EINVAL; + } + return 0; +} + +/* + * In case of missing eth header, append eth header with a MAC address + * assigned to the VF. + */ +static int add_eth_header(struct mlx4_dev *dev, int slave, + struct mlx4_cmd_mailbox *inbox, + struct list_head *rlist, int header_id) +{ + struct mac_res *res, *tmp; + u8 port; + struct mlx4_net_trans_rule_hw_ctrl *ctrl; + struct mlx4_net_trans_rule_hw_eth *eth_header; + struct mlx4_net_trans_rule_hw_ipv4 *ip_header; + struct mlx4_net_trans_rule_hw_tcp_udp *l4_header; + __be64 be_mac = 0; + __be64 mac_msk = cpu_to_be64(MLX4_MAC_MASK << 16); + + ctrl = (struct mlx4_net_trans_rule_hw_ctrl *)inbox->buf; + port = ctrl->port; + eth_header = (struct mlx4_net_trans_rule_hw_eth *)(ctrl + 1); + + /* Clear a space in the inbox for eth header */ + switch (header_id) { + case MLX4_NET_TRANS_RULE_ID_IPV4: + ip_header = + (struct mlx4_net_trans_rule_hw_ipv4 *)(eth_header + 1); + memmove(ip_header, eth_header, + sizeof(*ip_header) + sizeof(*l4_header)); + break; + case MLX4_NET_TRANS_RULE_ID_TCP: + case MLX4_NET_TRANS_RULE_ID_UDP: + l4_header = (struct mlx4_net_trans_rule_hw_tcp_udp *) + (eth_header + 1); + memmove(l4_header, eth_header, sizeof(*l4_header)); + break; + default: + return -EINVAL; + } + list_for_each_entry_safe(res, tmp, rlist, list) { + if (port == res->port) { + be_mac = cpu_to_be64(res->mac << 16); + break; + } + } + if (!be_mac) { + pr_err("Failed adding eth header to FS rule, Can't find matching MAC for port %d\n", + port); + return -EINVAL; + } + + memset(eth_header, 0, sizeof(*eth_header)); + eth_header->size = sizeof(*eth_header) >> 2; + eth_header->id = cpu_to_be16(__sw_id_hw[MLX4_NET_TRANS_RULE_ID_ETH]); + memcpy(eth_header->dst_mac, &be_mac, ETH_ALEN); + memcpy(eth_header->dst_mac_msk, &mac_msk, ETH_ALEN); + + return 0; + +} + +#define MLX4_UPD_QP_PATH_MASK_SUPPORTED ( \ + 1ULL << MLX4_UPD_QP_PATH_MASK_MAC_INDEX |\ + 1ULL << MLX4_UPD_QP_PATH_MASK_ETH_SRC_CHECK_MC_LB) +int mlx4_UPDATE_QP_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd_info) +{ + int err; + u32 qpn = vhcr->in_modifier & 0xffffff; + struct res_qp *rqp; + u64 mac; + unsigned port; + u64 pri_addr_path_mask; + struct mlx4_update_qp_context *cmd; + int smac_index; + + cmd = (struct mlx4_update_qp_context *)inbox->buf; + + pri_addr_path_mask = be64_to_cpu(cmd->primary_addr_path_mask); + if (cmd->qp_mask || cmd->secondary_addr_path_mask || + (pri_addr_path_mask & ~MLX4_UPD_QP_PATH_MASK_SUPPORTED)) + return -EPERM; + + if ((pri_addr_path_mask & + (1ULL << MLX4_UPD_QP_PATH_MASK_ETH_SRC_CHECK_MC_LB)) && + !(dev->caps.flags2 & + MLX4_DEV_CAP_FLAG2_UPDATE_QP_SRC_CHECK_LB)) { + mlx4_warn(dev, "Src check LB for slave %d isn't supported\n", + slave); + return -EOPNOTSUPP; + } + + /* Just change the smac for the QP */ + err = get_res(dev, slave, qpn, RES_QP, &rqp); + if (err) { + mlx4_err(dev, "Updating qpn 0x%x for slave %d rejected\n", qpn, slave); + return err; + } + + port = (rqp->sched_queue >> 6 & 1) + 1; + + if (pri_addr_path_mask & (1ULL << MLX4_UPD_QP_PATH_MASK_MAC_INDEX)) { + smac_index = cmd->qp_context.pri_path.grh_mylmc; + err = mac_find_smac_ix_in_slave(dev, slave, port, + smac_index, &mac); + + if (err) { + mlx4_err(dev, "Failed to update qpn 0x%x, MAC is invalid. smac_ix: %d\n", + qpn, smac_index); + goto err_mac; + } + } + + err = mlx4_cmd(dev, inbox->dma, + vhcr->in_modifier, 0, + MLX4_CMD_UPDATE_QP, MLX4_CMD_TIME_CLASS_A, + MLX4_CMD_NATIVE); + if (err) { + mlx4_err(dev, "Failed to update qpn on qpn 0x%x, command failed\n", qpn); + goto err_mac; + } + +err_mac: + put_res(dev, slave, qpn, RES_QP); + return err; +} + +static u32 qp_attach_mbox_size(void *mbox) +{ + u32 size = sizeof(struct mlx4_net_trans_rule_hw_ctrl); + struct _rule_hw *rule_header; + + rule_header = (struct _rule_hw *)(mbox + size); + + while (rule_header->size) { + size += rule_header->size * sizeof(u32); + rule_header += 1; + } + return size; +} + +static int mlx4_do_mirror_rule(struct mlx4_dev *dev, struct res_fs_rule *fs_rule); + +int mlx4_QP_FLOW_STEERING_ATTACH_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd) +{ + + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker; + struct list_head *rlist = &tracker->slave_list[slave].res_list[RES_MAC]; + int err; + int qpn; + struct res_qp *rqp; + struct mlx4_net_trans_rule_hw_ctrl *ctrl; + struct _rule_hw *rule_header; + int header_id; + struct res_fs_rule *rrule; + u32 mbox_size; + + if (dev->caps.steering_mode != + MLX4_STEERING_MODE_DEVICE_MANAGED) + return -EOPNOTSUPP; + + ctrl = (struct mlx4_net_trans_rule_hw_ctrl *)inbox->buf; + err = mlx4_slave_convert_port(dev, slave, ctrl->port); + if (err <= 0) + return -EINVAL; + ctrl->port = err; + qpn = be32_to_cpu(ctrl->qpn) & 0xffffff; + err = get_res(dev, slave, qpn, RES_QP, &rqp); + if (err) { + pr_err("Steering rule with qpn 0x%x rejected\n", qpn); + return err; + } + rule_header = (struct _rule_hw *)(ctrl + 1); + header_id = map_hw_to_sw_id(be16_to_cpu(rule_header->id)); + + if (header_id == MLX4_NET_TRANS_RULE_ID_ETH) + mlx4_handle_eth_header_mcast_prio(ctrl, rule_header); + + switch (header_id) { + case MLX4_NET_TRANS_RULE_ID_ETH: + if (validate_eth_header_mac(slave, rule_header, rlist)) { + err = -EINVAL; + goto err_put_qp; + } + break; + case MLX4_NET_TRANS_RULE_ID_IB: + break; + case MLX4_NET_TRANS_RULE_ID_IPV4: + case MLX4_NET_TRANS_RULE_ID_TCP: + case MLX4_NET_TRANS_RULE_ID_UDP: + pr_warn("Can't attach FS rule without L2 headers, adding L2 header\n"); + if (add_eth_header(dev, slave, inbox, rlist, header_id)) { + err = -EINVAL; + goto err_put_qp; + } + vhcr->in_modifier += + sizeof(struct mlx4_net_trans_rule_hw_eth) >> 2; + break; + default: + pr_err("Corrupted mailbox\n"); + err = -EINVAL; + goto err_put_qp; + } + + err = mlx4_cmd_imm(dev, inbox->dma, &vhcr->out_param, + vhcr->in_modifier, 0, + MLX4_QP_FLOW_STEERING_ATTACH, MLX4_CMD_TIME_CLASS_A, + MLX4_CMD_NATIVE); + if (err) + goto err_put_qp; + + + err = add_res_range(dev, slave, vhcr->out_param, 1, RES_FS_RULE, qpn); + if (err) { + mlx4_err(dev, "Fail to add flow steering resources\n"); + goto err_detach; + } + + err = get_res(dev, slave, vhcr->out_param, RES_FS_RULE, &rrule); + if (err) + goto err_detach; + + mbox_size = qp_attach_mbox_size(inbox->buf); + rrule->mirr_mbox = kmalloc(mbox_size, GFP_KERNEL); + if (!rrule->mirr_mbox) { + err = -ENOMEM; + goto err_put_rule; + } + rrule->mirr_mbox_size = mbox_size; + rrule->mirr_rule_id = 0; + memcpy(rrule->mirr_mbox, inbox->buf, mbox_size); + + /* set different port */ + ctrl = (struct mlx4_net_trans_rule_hw_ctrl *)rrule->mirr_mbox; + if (ctrl->port == 1) + ctrl->port = 2; + else + ctrl->port = 1; + + if (mlx4_is_bonded(dev)) + mlx4_do_mirror_rule(dev, rrule); + + atomic_inc(&rqp->ref_count); + +err_put_rule: + put_res(dev, slave, vhcr->out_param, RES_FS_RULE); +err_detach: + /* detach rule on error */ + if (err) + mlx4_cmd(dev, vhcr->out_param, 0, 0, + MLX4_QP_FLOW_STEERING_DETACH, MLX4_CMD_TIME_CLASS_A, + MLX4_CMD_NATIVE); +err_put_qp: + put_res(dev, slave, qpn, RES_QP); + return err; +} + +static int mlx4_undo_mirror_rule(struct mlx4_dev *dev, struct res_fs_rule *fs_rule) +{ + int err; + + err = rem_res_range(dev, fs_rule->com.owner, fs_rule->com.res_id, 1, RES_FS_RULE, 0); + if (err) { + mlx4_err(dev, "Fail to remove flow steering resources\n"); + return err; + } + + mlx4_cmd(dev, fs_rule->com.res_id, 0, 0, MLX4_QP_FLOW_STEERING_DETACH, + MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); + return 0; +} + +int mlx4_QP_FLOW_STEERING_DETACH_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd) +{ + int err; + struct res_qp *rqp; + struct res_fs_rule *rrule; + u64 mirr_reg_id; + int qpn; + + if (dev->caps.steering_mode != + MLX4_STEERING_MODE_DEVICE_MANAGED) + return -EOPNOTSUPP; + + err = get_res(dev, slave, vhcr->in_param, RES_FS_RULE, &rrule); + if (err) + return err; + + if (!rrule->mirr_mbox) { + mlx4_err(dev, "Mirror rules cannot be removed explicitly\n"); + put_res(dev, slave, vhcr->in_param, RES_FS_RULE); + return -EINVAL; + } + mirr_reg_id = rrule->mirr_rule_id; + kfree(rrule->mirr_mbox); + qpn = rrule->qpn; + + /* Release the rule form busy state before removal */ + put_res(dev, slave, vhcr->in_param, RES_FS_RULE); + err = get_res(dev, slave, qpn, RES_QP, &rqp); + if (err) + return err; + + if (mirr_reg_id && mlx4_is_bonded(dev)) { + err = get_res(dev, slave, mirr_reg_id, RES_FS_RULE, &rrule); + if (err) { + mlx4_err(dev, "Fail to get resource of mirror rule\n"); + } else { + put_res(dev, slave, mirr_reg_id, RES_FS_RULE); + mlx4_undo_mirror_rule(dev, rrule); + } + } + err = rem_res_range(dev, slave, vhcr->in_param, 1, RES_FS_RULE, 0); + if (err) { + mlx4_err(dev, "Fail to remove flow steering resources\n"); + goto out; + } + + err = mlx4_cmd(dev, vhcr->in_param, 0, 0, + MLX4_QP_FLOW_STEERING_DETACH, MLX4_CMD_TIME_CLASS_A, + MLX4_CMD_NATIVE); + if (!err) + atomic_dec(&rqp->ref_count); +out: + put_res(dev, slave, qpn, RES_QP); + return err; +} + +enum { + BUSY_MAX_RETRIES = 10 +}; + +int mlx4_QUERY_IF_STAT_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd) +{ + int err; + int index = vhcr->in_modifier & 0xffff; + + err = get_res(dev, slave, index, RES_COUNTER, NULL); + if (err) + return err; + + err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd); + put_res(dev, slave, index, RES_COUNTER); + return err; +} + +static void detach_qp(struct mlx4_dev *dev, int slave, struct res_qp *rqp) +{ + struct res_gid *rgid; + struct res_gid *tmp; + struct mlx4_qp qp; /* dummy for calling attach/detach */ + + list_for_each_entry_safe(rgid, tmp, &rqp->mcg_list, list) { + switch (dev->caps.steering_mode) { + case MLX4_STEERING_MODE_DEVICE_MANAGED: + mlx4_flow_detach(dev, rgid->reg_id); + break; + case MLX4_STEERING_MODE_B0: + qp.qpn = rqp->local_qpn; + (void) mlx4_qp_detach_common(dev, &qp, rgid->gid, + rgid->prot, rgid->steer); + break; + } + list_del(&rgid->list); + kfree(rgid); + } +} + +static int _move_all_busy(struct mlx4_dev *dev, int slave, + enum mlx4_resource type, int print) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_resource_tracker *tracker = + &priv->mfunc.master.res_tracker; + struct list_head *rlist = &tracker->slave_list[slave].res_list[type]; + struct res_common *r; + struct res_common *tmp; + int busy; + + busy = 0; + spin_lock_irq(mlx4_tlock(dev)); + list_for_each_entry_safe(r, tmp, rlist, list) { + if (r->owner == slave) { + if (!r->removing) { + if (r->state == RES_ANY_BUSY) { + if (print) + mlx4_dbg(dev, + "%s id 0x%llx is busy\n", + resource_str(type), + r->res_id); + ++busy; + } else { + r->from_state = r->state; + r->state = RES_ANY_BUSY; + r->removing = 1; + } + } + } + } + spin_unlock_irq(mlx4_tlock(dev)); + + return busy; +} + +static int move_all_busy(struct mlx4_dev *dev, int slave, + enum mlx4_resource type) +{ + unsigned long begin; + int busy; + + begin = jiffies; + do { + busy = _move_all_busy(dev, slave, type, 0); + if (time_after(jiffies, begin + 5 * HZ)) + break; + if (busy) + cond_resched(); + } while (busy); + + if (busy) + busy = _move_all_busy(dev, slave, type, 1); + + return busy; +} +static void rem_slave_qps(struct mlx4_dev *dev, int slave) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker; + struct list_head *qp_list = + &tracker->slave_list[slave].res_list[RES_QP]; + struct res_qp *qp; + struct res_qp *tmp; + int state; + u64 in_param; + int qpn; + int err; + + err = move_all_busy(dev, slave, RES_QP); + if (err) + mlx4_warn(dev, "rem_slave_qps: Could not move all qps to busy for slave %d\n", + slave); + + spin_lock_irq(mlx4_tlock(dev)); + list_for_each_entry_safe(qp, tmp, qp_list, com.list) { + spin_unlock_irq(mlx4_tlock(dev)); + if (qp->com.owner == slave) { + qpn = qp->com.res_id; + detach_qp(dev, slave, qp); + state = qp->com.from_state; + while (state != 0) { + switch (state) { + case RES_QP_RESERVED: + spin_lock_irq(mlx4_tlock(dev)); + rb_erase(&qp->com.node, + &tracker->res_tree[RES_QP]); + list_del(&qp->com.list); + spin_unlock_irq(mlx4_tlock(dev)); + if (!valid_reserved(dev, slave, qpn)) { + __mlx4_qp_release_range(dev, qpn, 1); + mlx4_release_resource(dev, slave, + RES_QP, 1, 0); + } + kfree(qp); + state = 0; + break; + case RES_QP_MAPPED: + if (!valid_reserved(dev, slave, qpn)) + __mlx4_qp_free_icm(dev, qpn); + state = RES_QP_RESERVED; + break; + case RES_QP_HW: + in_param = slave; + err = mlx4_cmd(dev, in_param, + qp->local_qpn, 2, + MLX4_CMD_2RST_QP, + MLX4_CMD_TIME_CLASS_A, + MLX4_CMD_NATIVE); + if (err) + mlx4_dbg(dev, "rem_slave_qps: failed to move slave %d qpn %d to reset\n", + slave, qp->local_qpn); + atomic_dec(&qp->rcq->ref_count); + atomic_dec(&qp->scq->ref_count); + atomic_dec(&qp->mtt->ref_count); + if (qp->srq) + atomic_dec(&qp->srq->ref_count); + state = RES_QP_MAPPED; + break; + default: + state = 0; + } + } + } + spin_lock_irq(mlx4_tlock(dev)); + } + spin_unlock_irq(mlx4_tlock(dev)); +} + +static void rem_slave_srqs(struct mlx4_dev *dev, int slave) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker; + struct list_head *srq_list = + &tracker->slave_list[slave].res_list[RES_SRQ]; + struct res_srq *srq; + struct res_srq *tmp; + int state; + u64 in_param; + int srqn; + int err; + + err = move_all_busy(dev, slave, RES_SRQ); + if (err) + mlx4_warn(dev, "rem_slave_srqs: Could not move all srqs - too busy for slave %d\n", + slave); + + spin_lock_irq(mlx4_tlock(dev)); + list_for_each_entry_safe(srq, tmp, srq_list, com.list) { + spin_unlock_irq(mlx4_tlock(dev)); + if (srq->com.owner == slave) { + srqn = srq->com.res_id; + state = srq->com.from_state; + while (state != 0) { + switch (state) { + case RES_SRQ_ALLOCATED: + __mlx4_srq_free_icm(dev, srqn); + spin_lock_irq(mlx4_tlock(dev)); + rb_erase(&srq->com.node, + &tracker->res_tree[RES_SRQ]); + list_del(&srq->com.list); + spin_unlock_irq(mlx4_tlock(dev)); + mlx4_release_resource(dev, slave, + RES_SRQ, 1, 0); + kfree(srq); + state = 0; + break; + + case RES_SRQ_HW: + in_param = slave; + err = mlx4_cmd(dev, in_param, srqn, 1, + MLX4_CMD_HW2SW_SRQ, + MLX4_CMD_TIME_CLASS_A, + MLX4_CMD_NATIVE); + if (err) + mlx4_dbg(dev, "rem_slave_srqs: failed to move slave %d srq %d to SW ownership\n", + slave, srqn); + + atomic_dec(&srq->mtt->ref_count); + if (srq->cq) + atomic_dec(&srq->cq->ref_count); + state = RES_SRQ_ALLOCATED; + break; + + default: + state = 0; + } + } + } + spin_lock_irq(mlx4_tlock(dev)); + } + spin_unlock_irq(mlx4_tlock(dev)); +} + +static void rem_slave_cqs(struct mlx4_dev *dev, int slave) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker; + struct list_head *cq_list = + &tracker->slave_list[slave].res_list[RES_CQ]; + struct res_cq *cq; + struct res_cq *tmp; + int state; + u64 in_param; + int cqn; + int err; + + err = move_all_busy(dev, slave, RES_CQ); + if (err) + mlx4_warn(dev, "rem_slave_cqs: Could not move all cqs - too busy for slave %d\n", + slave); + + spin_lock_irq(mlx4_tlock(dev)); + list_for_each_entry_safe(cq, tmp, cq_list, com.list) { + spin_unlock_irq(mlx4_tlock(dev)); + if (cq->com.owner == slave && !atomic_read(&cq->ref_count)) { + cqn = cq->com.res_id; + state = cq->com.from_state; + while (state != 0) { + switch (state) { + case RES_CQ_ALLOCATED: + __mlx4_cq_free_icm(dev, cqn); + spin_lock_irq(mlx4_tlock(dev)); + rb_erase(&cq->com.node, + &tracker->res_tree[RES_CQ]); + list_del(&cq->com.list); + spin_unlock_irq(mlx4_tlock(dev)); + mlx4_release_resource(dev, slave, + RES_CQ, 1, 0); + kfree(cq); + state = 0; + break; + + case RES_CQ_HW: + in_param = slave; + err = mlx4_cmd(dev, in_param, cqn, 1, + MLX4_CMD_HW2SW_CQ, + MLX4_CMD_TIME_CLASS_A, + MLX4_CMD_NATIVE); + if (err) + mlx4_dbg(dev, "rem_slave_cqs: failed to move slave %d cq %d to SW ownership\n", + slave, cqn); + atomic_dec(&cq->mtt->ref_count); + state = RES_CQ_ALLOCATED; + break; + + default: + state = 0; + } + } + } + spin_lock_irq(mlx4_tlock(dev)); + } + spin_unlock_irq(mlx4_tlock(dev)); +} + +static void rem_slave_mrs(struct mlx4_dev *dev, int slave) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker; + struct list_head *mpt_list = + &tracker->slave_list[slave].res_list[RES_MPT]; + struct res_mpt *mpt; + struct res_mpt *tmp; + int state; + u64 in_param; + int mptn; + int err; + + err = move_all_busy(dev, slave, RES_MPT); + if (err) + mlx4_warn(dev, "rem_slave_mrs: Could not move all mpts - too busy for slave %d\n", + slave); + + spin_lock_irq(mlx4_tlock(dev)); + list_for_each_entry_safe(mpt, tmp, mpt_list, com.list) { + spin_unlock_irq(mlx4_tlock(dev)); + if (mpt->com.owner == slave) { + mptn = mpt->com.res_id; + state = mpt->com.from_state; + while (state != 0) { + switch (state) { + case RES_MPT_RESERVED: + __mlx4_mpt_release(dev, mpt->key); + spin_lock_irq(mlx4_tlock(dev)); + rb_erase(&mpt->com.node, + &tracker->res_tree[RES_MPT]); + list_del(&mpt->com.list); + spin_unlock_irq(mlx4_tlock(dev)); + mlx4_release_resource(dev, slave, + RES_MPT, 1, 0); + kfree(mpt); + state = 0; + break; + + case RES_MPT_MAPPED: + __mlx4_mpt_free_icm(dev, mpt->key); + state = RES_MPT_RESERVED; + break; + + case RES_MPT_HW: + in_param = slave; + err = mlx4_cmd(dev, in_param, mptn, 0, + MLX4_CMD_HW2SW_MPT, + MLX4_CMD_TIME_CLASS_A, + MLX4_CMD_NATIVE); + if (err) + mlx4_dbg(dev, "rem_slave_mrs: failed to move slave %d mpt %d to SW ownership\n", + slave, mptn); + if (mpt->mtt) + atomic_dec(&mpt->mtt->ref_count); + state = RES_MPT_MAPPED; + break; + default: + state = 0; + } + } + } + spin_lock_irq(mlx4_tlock(dev)); + } + spin_unlock_irq(mlx4_tlock(dev)); +} + +static void rem_slave_mtts(struct mlx4_dev *dev, int slave) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_resource_tracker *tracker = + &priv->mfunc.master.res_tracker; + struct list_head *mtt_list = + &tracker->slave_list[slave].res_list[RES_MTT]; + struct res_mtt *mtt; + struct res_mtt *tmp; + int state; + int base; + int err; + + err = move_all_busy(dev, slave, RES_MTT); + if (err) + mlx4_warn(dev, "rem_slave_mtts: Could not move all mtts - too busy for slave %d\n", + slave); + + spin_lock_irq(mlx4_tlock(dev)); + list_for_each_entry_safe(mtt, tmp, mtt_list, com.list) { + spin_unlock_irq(mlx4_tlock(dev)); + if (mtt->com.owner == slave) { + base = mtt->com.res_id; + state = mtt->com.from_state; + while (state != 0) { + switch (state) { + case RES_MTT_ALLOCATED: + __mlx4_free_mtt_range(dev, base, + mtt->order); + spin_lock_irq(mlx4_tlock(dev)); + rb_erase(&mtt->com.node, + &tracker->res_tree[RES_MTT]); + list_del(&mtt->com.list); + spin_unlock_irq(mlx4_tlock(dev)); + mlx4_release_resource(dev, slave, RES_MTT, + 1 << mtt->order, 0); + kfree(mtt); + state = 0; + break; + + default: + state = 0; + } + } + } + spin_lock_irq(mlx4_tlock(dev)); + } + spin_unlock_irq(mlx4_tlock(dev)); +} + +static int mlx4_do_mirror_rule(struct mlx4_dev *dev, struct res_fs_rule *fs_rule) +{ + struct mlx4_cmd_mailbox *mailbox; + int err; + struct res_fs_rule *mirr_rule; + u64 reg_id; + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + + if (!fs_rule->mirr_mbox) { + mlx4_err(dev, "rule mirroring mailbox is null\n"); + mlx4_free_cmd_mailbox(dev, mailbox); + return -EINVAL; + } + memcpy(mailbox->buf, fs_rule->mirr_mbox, fs_rule->mirr_mbox_size); + err = mlx4_cmd_imm(dev, mailbox->dma, ®_id, fs_rule->mirr_mbox_size >> 2, 0, + MLX4_QP_FLOW_STEERING_ATTACH, MLX4_CMD_TIME_CLASS_A, + MLX4_CMD_NATIVE); + mlx4_free_cmd_mailbox(dev, mailbox); + + if (err) + goto err; + + err = add_res_range(dev, fs_rule->com.owner, reg_id, 1, RES_FS_RULE, fs_rule->qpn); + if (err) + goto err_detach; + + err = get_res(dev, fs_rule->com.owner, reg_id, RES_FS_RULE, &mirr_rule); + if (err) + goto err_rem; + + fs_rule->mirr_rule_id = reg_id; + mirr_rule->mirr_rule_id = 0; + mirr_rule->mirr_mbox_size = 0; + mirr_rule->mirr_mbox = NULL; + put_res(dev, fs_rule->com.owner, reg_id, RES_FS_RULE); + + return 0; +err_rem: + rem_res_range(dev, fs_rule->com.owner, reg_id, 1, RES_FS_RULE, 0); +err_detach: + mlx4_cmd(dev, reg_id, 0, 0, MLX4_QP_FLOW_STEERING_DETACH, + MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); +err: + return err; +} + +static int mlx4_mirror_fs_rules(struct mlx4_dev *dev, bool bond) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_resource_tracker *tracker = + &priv->mfunc.master.res_tracker; + struct rb_root *root = &tracker->res_tree[RES_FS_RULE]; + struct rb_node *p; + struct res_fs_rule *fs_rule; + int err = 0; + LIST_HEAD(mirr_list); + + for (p = rb_first(root); p; p = rb_next(p)) { + fs_rule = rb_entry(p, struct res_fs_rule, com.node); + if ((bond && fs_rule->mirr_mbox_size) || + (!bond && !fs_rule->mirr_mbox_size)) + list_add_tail(&fs_rule->mirr_list, &mirr_list); + } + + list_for_each_entry(fs_rule, &mirr_list, mirr_list) { + if (bond) + err += mlx4_do_mirror_rule(dev, fs_rule); + else + err += mlx4_undo_mirror_rule(dev, fs_rule); + } + return err; +} + +int mlx4_bond_fs_rules(struct mlx4_dev *dev) +{ + return mlx4_mirror_fs_rules(dev, true); +} + +int mlx4_unbond_fs_rules(struct mlx4_dev *dev) +{ + return mlx4_mirror_fs_rules(dev, false); +} + +static void rem_slave_fs_rule(struct mlx4_dev *dev, int slave) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_resource_tracker *tracker = + &priv->mfunc.master.res_tracker; + struct list_head *fs_rule_list = + &tracker->slave_list[slave].res_list[RES_FS_RULE]; + struct res_fs_rule *fs_rule; + struct res_fs_rule *tmp; + int state; + u64 base; + int err; + + err = move_all_busy(dev, slave, RES_FS_RULE); + if (err) + mlx4_warn(dev, "rem_slave_fs_rule: Could not move all mtts to busy for slave %d\n", + slave); + + spin_lock_irq(mlx4_tlock(dev)); + list_for_each_entry_safe(fs_rule, tmp, fs_rule_list, com.list) { + spin_unlock_irq(mlx4_tlock(dev)); + if (fs_rule->com.owner == slave) { + base = fs_rule->com.res_id; + state = fs_rule->com.from_state; + while (state != 0) { + switch (state) { + case RES_FS_RULE_ALLOCATED: + /* detach rule */ + err = mlx4_cmd(dev, base, 0, 0, + MLX4_QP_FLOW_STEERING_DETACH, + MLX4_CMD_TIME_CLASS_A, + MLX4_CMD_NATIVE); + + spin_lock_irq(mlx4_tlock(dev)); + rb_erase(&fs_rule->com.node, + &tracker->res_tree[RES_FS_RULE]); + list_del(&fs_rule->com.list); + spin_unlock_irq(mlx4_tlock(dev)); + kfree(fs_rule->mirr_mbox); + kfree(fs_rule); + state = 0; + break; + + default: + state = 0; + } + } + } + spin_lock_irq(mlx4_tlock(dev)); + } + spin_unlock_irq(mlx4_tlock(dev)); +} + +static void rem_slave_eqs(struct mlx4_dev *dev, int slave) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker; + struct list_head *eq_list = + &tracker->slave_list[slave].res_list[RES_EQ]; + struct res_eq *eq; + struct res_eq *tmp; + int err; + int state; + int eqn; + + err = move_all_busy(dev, slave, RES_EQ); + if (err) + mlx4_warn(dev, "rem_slave_eqs: Could not move all eqs - too busy for slave %d\n", + slave); + + spin_lock_irq(mlx4_tlock(dev)); + list_for_each_entry_safe(eq, tmp, eq_list, com.list) { + spin_unlock_irq(mlx4_tlock(dev)); + if (eq->com.owner == slave) { + eqn = eq->com.res_id; + state = eq->com.from_state; + while (state != 0) { + switch (state) { + case RES_EQ_RESERVED: + spin_lock_irq(mlx4_tlock(dev)); + rb_erase(&eq->com.node, + &tracker->res_tree[RES_EQ]); + list_del(&eq->com.list); + spin_unlock_irq(mlx4_tlock(dev)); + kfree(eq); + state = 0; + break; + + case RES_EQ_HW: + err = mlx4_cmd(dev, slave, eqn & 0x3ff, + 1, MLX4_CMD_HW2SW_EQ, + MLX4_CMD_TIME_CLASS_A, + MLX4_CMD_NATIVE); + if (err) + mlx4_dbg(dev, "rem_slave_eqs: failed to move slave %d eqs %d to SW ownership\n", + slave, eqn & 0x3ff); + atomic_dec(&eq->mtt->ref_count); + state = RES_EQ_RESERVED; + break; + + default: + state = 0; + } + } + } + spin_lock_irq(mlx4_tlock(dev)); + } + spin_unlock_irq(mlx4_tlock(dev)); +} + +static void rem_slave_counters(struct mlx4_dev *dev, int slave) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker; + struct list_head *counter_list = + &tracker->slave_list[slave].res_list[RES_COUNTER]; + struct res_counter *counter; + struct res_counter *tmp; + int err; + int *counters_arr = NULL; + int i, j; + + err = move_all_busy(dev, slave, RES_COUNTER); + if (err) + mlx4_warn(dev, "rem_slave_counters: Could not move all counters - too busy for slave %d\n", + slave); + + counters_arr = kmalloc_array(dev->caps.max_counters, + sizeof(*counters_arr), GFP_KERNEL); + if (!counters_arr) + return; + + do { + i = 0; + j = 0; + spin_lock_irq(mlx4_tlock(dev)); + list_for_each_entry_safe(counter, tmp, counter_list, com.list) { + if (counter->com.owner == slave) { + counters_arr[i++] = counter->com.res_id; + rb_erase(&counter->com.node, + &tracker->res_tree[RES_COUNTER]); + list_del(&counter->com.list); + kfree(counter); + } + } + spin_unlock_irq(mlx4_tlock(dev)); + + while (j < i) { + __mlx4_counter_free(dev, counters_arr[j++]); + mlx4_release_resource(dev, slave, RES_COUNTER, 1, 0); + } + } while (i); + + kfree(counters_arr); +} + +static void rem_slave_xrcdns(struct mlx4_dev *dev, int slave) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker; + struct list_head *xrcdn_list = + &tracker->slave_list[slave].res_list[RES_XRCD]; + struct res_xrcdn *xrcd; + struct res_xrcdn *tmp; + int err; + int xrcdn; + + err = move_all_busy(dev, slave, RES_XRCD); + if (err) + mlx4_warn(dev, "rem_slave_xrcdns: Could not move all xrcdns - too busy for slave %d\n", + slave); + + spin_lock_irq(mlx4_tlock(dev)); + list_for_each_entry_safe(xrcd, tmp, xrcdn_list, com.list) { + if (xrcd->com.owner == slave) { + xrcdn = xrcd->com.res_id; + rb_erase(&xrcd->com.node, &tracker->res_tree[RES_XRCD]); + list_del(&xrcd->com.list); + kfree(xrcd); + __mlx4_xrcd_free(dev, xrcdn); + } + } + spin_unlock_irq(mlx4_tlock(dev)); +} + +void mlx4_delete_all_resources_for_slave(struct mlx4_dev *dev, int slave) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + mlx4_reset_roce_gids(dev, slave); + mutex_lock(&priv->mfunc.master.res_tracker.slave_list[slave].mutex); + rem_slave_vlans(dev, slave); + rem_slave_macs(dev, slave); + rem_slave_fs_rule(dev, slave); + rem_slave_qps(dev, slave); + rem_slave_srqs(dev, slave); + rem_slave_cqs(dev, slave); + rem_slave_mrs(dev, slave); + rem_slave_eqs(dev, slave); + rem_slave_mtts(dev, slave); + rem_slave_counters(dev, slave); + rem_slave_xrcdns(dev, slave); + mutex_unlock(&priv->mfunc.master.res_tracker.slave_list[slave].mutex); +} + +static void update_qos_vpp(struct mlx4_update_qp_context *ctx, + struct mlx4_vf_immed_vlan_work *work) +{ + ctx->qp_mask |= cpu_to_be64(1ULL << MLX4_UPD_QP_MASK_QOS_VPP); + ctx->qp_context.qos_vport = work->qos_vport; +} + +void mlx4_vf_immed_vlan_work_handler(struct work_struct *_work) +{ + struct mlx4_vf_immed_vlan_work *work = + container_of(_work, struct mlx4_vf_immed_vlan_work, work); + struct mlx4_cmd_mailbox *mailbox; + struct mlx4_update_qp_context *upd_context; + struct mlx4_dev *dev = &work->priv->dev; + struct mlx4_resource_tracker *tracker = + &work->priv->mfunc.master.res_tracker; + struct list_head *qp_list = + &tracker->slave_list[work->slave].res_list[RES_QP]; + struct res_qp *qp; + struct res_qp *tmp; + u64 qp_path_mask_vlan_ctrl = + ((1ULL << MLX4_UPD_QP_PATH_MASK_ETH_TX_BLOCK_UNTAGGED) | + (1ULL << MLX4_UPD_QP_PATH_MASK_ETH_TX_BLOCK_1P) | + (1ULL << MLX4_UPD_QP_PATH_MASK_ETH_TX_BLOCK_TAGGED) | + (1ULL << MLX4_UPD_QP_PATH_MASK_ETH_RX_BLOCK_UNTAGGED) | + (1ULL << MLX4_UPD_QP_PATH_MASK_ETH_RX_BLOCK_1P) | + (1ULL << MLX4_UPD_QP_PATH_MASK_ETH_RX_BLOCK_TAGGED)); + + u64 qp_path_mask = ((1ULL << MLX4_UPD_QP_PATH_MASK_VLAN_INDEX) | + (1ULL << MLX4_UPD_QP_PATH_MASK_FVL) | + (1ULL << MLX4_UPD_QP_PATH_MASK_CV) | + (1ULL << MLX4_UPD_QP_PATH_MASK_SV) | + (1ULL << MLX4_UPD_QP_PATH_MASK_ETH_HIDE_CQE_VLAN) | + (1ULL << MLX4_UPD_QP_PATH_MASK_FEUP) | + (1ULL << MLX4_UPD_QP_PATH_MASK_FVL_RX) | + (1ULL << MLX4_UPD_QP_PATH_MASK_SCHED_QUEUE)); + + int err; + int port, errors = 0; + u8 vlan_control; + + if (mlx4_is_slave(dev)) { + mlx4_warn(dev, "Trying to update-qp in slave %d\n", + work->slave); + goto out; + } + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) + goto out; + if (work->flags & MLX4_VF_IMMED_VLAN_FLAG_LINK_DISABLE) /* block all */ + vlan_control = MLX4_VLAN_CTRL_ETH_TX_BLOCK_TAGGED | + MLX4_VLAN_CTRL_ETH_TX_BLOCK_PRIO_TAGGED | + MLX4_VLAN_CTRL_ETH_TX_BLOCK_UNTAGGED | + MLX4_VLAN_CTRL_ETH_RX_BLOCK_PRIO_TAGGED | + MLX4_VLAN_CTRL_ETH_RX_BLOCK_UNTAGGED | + MLX4_VLAN_CTRL_ETH_RX_BLOCK_TAGGED; + else if (!work->vlan_id) + vlan_control = MLX4_VLAN_CTRL_ETH_TX_BLOCK_TAGGED | + MLX4_VLAN_CTRL_ETH_RX_BLOCK_TAGGED; + else if (work->vlan_proto == htons(ETH_P_8021AD)) + vlan_control = MLX4_VLAN_CTRL_ETH_TX_BLOCK_PRIO_TAGGED | + MLX4_VLAN_CTRL_ETH_TX_BLOCK_TAGGED | + MLX4_VLAN_CTRL_ETH_RX_BLOCK_PRIO_TAGGED | + MLX4_VLAN_CTRL_ETH_RX_BLOCK_UNTAGGED; + else /* vst 802.1Q */ + vlan_control = MLX4_VLAN_CTRL_ETH_TX_BLOCK_TAGGED | + MLX4_VLAN_CTRL_ETH_RX_BLOCK_PRIO_TAGGED | + MLX4_VLAN_CTRL_ETH_RX_BLOCK_UNTAGGED; + + upd_context = mailbox->buf; + upd_context->qp_mask = cpu_to_be64(1ULL << MLX4_UPD_QP_MASK_VSD); + + spin_lock_irq(mlx4_tlock(dev)); + list_for_each_entry_safe(qp, tmp, qp_list, com.list) { + spin_unlock_irq(mlx4_tlock(dev)); + if (qp->com.owner == work->slave) { + if (qp->com.from_state != RES_QP_HW || + !qp->sched_queue || /* no INIT2RTR trans yet */ + mlx4_is_qp_reserved(dev, qp->local_qpn) || + qp->qpc_flags & (1 << MLX4_RSS_QPC_FLAG_OFFSET)) { + spin_lock_irq(mlx4_tlock(dev)); + continue; + } + port = (qp->sched_queue >> 6 & 1) + 1; + if (port != work->port) { + spin_lock_irq(mlx4_tlock(dev)); + continue; + } + if (MLX4_QP_ST_RC == ((qp->qpc_flags >> 16) & 0xff)) + upd_context->primary_addr_path_mask = cpu_to_be64(qp_path_mask); + else + upd_context->primary_addr_path_mask = + cpu_to_be64(qp_path_mask | qp_path_mask_vlan_ctrl); + if (work->vlan_id == MLX4_VGT) { + upd_context->qp_context.param3 = qp->param3; + upd_context->qp_context.pri_path.vlan_control = qp->vlan_control; + upd_context->qp_context.pri_path.fvl_rx = qp->fvl_rx; + upd_context->qp_context.pri_path.vlan_index = qp->vlan_index; + upd_context->qp_context.pri_path.fl = qp->pri_path_fl; + upd_context->qp_context.pri_path.feup = qp->feup; + upd_context->qp_context.pri_path.sched_queue = + qp->sched_queue; + } else { + upd_context->qp_context.param3 = qp->param3 & ~cpu_to_be32(MLX4_STRIP_VLAN); + upd_context->qp_context.pri_path.vlan_control = vlan_control; + upd_context->qp_context.pri_path.vlan_index = work->vlan_ix; + upd_context->qp_context.pri_path.fvl_rx = + qp->fvl_rx | MLX4_FVL_RX_FORCE_ETH_VLAN; + upd_context->qp_context.pri_path.fl = + qp->pri_path_fl | MLX4_FL_ETH_HIDE_CQE_VLAN; + if (work->vlan_proto == htons(ETH_P_8021AD)) + upd_context->qp_context.pri_path.fl |= MLX4_FL_SV; + else + upd_context->qp_context.pri_path.fl |= MLX4_FL_CV; + upd_context->qp_context.pri_path.feup = + qp->feup | MLX4_FEUP_FORCE_ETH_UP | MLX4_FVL_FORCE_ETH_VLAN; + upd_context->qp_context.pri_path.sched_queue = + qp->sched_queue & 0xC7; + upd_context->qp_context.pri_path.sched_queue |= + ((work->qos & 0x7) << 3); + + if (dev->caps.flags2 & + MLX4_DEV_CAP_FLAG2_QOS_VPP) + update_qos_vpp(upd_context, work); + } + + err = mlx4_cmd(dev, mailbox->dma, + qp->local_qpn & 0xffffff, + 0, MLX4_CMD_UPDATE_QP, + MLX4_CMD_TIME_CLASS_C, MLX4_CMD_NATIVE); + if (err) { + mlx4_info(dev, "UPDATE_QP failed for slave %d, port %d, qpn %d (%d)\n", + work->slave, port, qp->local_qpn, err); + errors++; + } + } + spin_lock_irq(mlx4_tlock(dev)); + } + spin_unlock_irq(mlx4_tlock(dev)); + mlx4_free_cmd_mailbox(dev, mailbox); + + if (errors) + mlx4_err(dev, "%d UPDATE_QP failures for slave %d, port %d\n", + errors, work->slave, work->port); + + /* unregister previous vlan_id if needed and we had no errors + * while updating the QPs + */ + if (work->flags & MLX4_VF_IMMED_VLAN_FLAG_VLAN && !errors && + NO_INDX != work->orig_vlan_ix) + __mlx4_unregister_vlan(&work->priv->dev, work->port, + work->orig_vlan_id); +out: + kfree(work); + return; +} diff --git a/drivers/net/ethernet/mellanox/mlx4/sense.c b/drivers/net/ethernet/mellanox/mlx4/sense.c new file mode 100644 index 000000000..094773d88 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx4/sense.c @@ -0,0 +1,143 @@ +/* + * Copyright (c) 2007 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#include <linux/errno.h> +#include <linux/if_ether.h> + +#include <linux/mlx4/cmd.h> + +#include "mlx4.h" + +int mlx4_SENSE_PORT(struct mlx4_dev *dev, int port, + enum mlx4_port_type *type) +{ + u64 out_param; + int err = 0; + + err = mlx4_cmd_imm(dev, 0, &out_param, port, 0, + MLX4_CMD_SENSE_PORT, MLX4_CMD_TIME_CLASS_B, + MLX4_CMD_WRAPPED); + if (err) { + mlx4_err(dev, "Sense command failed for port: %d\n", port); + return err; + } + + if (out_param > 2) { + mlx4_err(dev, "Sense returned illegal value: 0x%llx\n", out_param); + return -EINVAL; + } + + *type = out_param; + return 0; +} + +void mlx4_do_sense_ports(struct mlx4_dev *dev, + enum mlx4_port_type *stype, + enum mlx4_port_type *defaults) +{ + struct mlx4_sense *sense = &mlx4_priv(dev)->sense; + int err; + int i; + + for (i = 1; i <= dev->caps.num_ports; i++) { + stype[i - 1] = 0; + if (sense->do_sense_port[i] && sense->sense_allowed[i] && + dev->caps.possible_type[i] == MLX4_PORT_TYPE_AUTO) { + err = mlx4_SENSE_PORT(dev, i, &stype[i - 1]); + if (err) + stype[i - 1] = defaults[i - 1]; + } else + stype[i - 1] = defaults[i - 1]; + } + + /* + * If sensed nothing, remain in current configuration. + */ + for (i = 0; i < dev->caps.num_ports; i++) + stype[i] = stype[i] ? stype[i] : defaults[i]; + +} + +static void mlx4_sense_port(struct work_struct *work) +{ + struct delayed_work *delay = to_delayed_work(work); + struct mlx4_sense *sense = container_of(delay, struct mlx4_sense, + sense_poll); + struct mlx4_dev *dev = sense->dev; + struct mlx4_priv *priv = mlx4_priv(dev); + enum mlx4_port_type stype[MLX4_MAX_PORTS]; + + mutex_lock(&priv->port_mutex); + mlx4_do_sense_ports(dev, stype, &dev->caps.port_type[1]); + + if (mlx4_check_port_params(dev, stype)) + goto sense_again; + + if (mlx4_change_port_types(dev, stype)) + mlx4_err(dev, "Failed to change port_types\n"); + +sense_again: + mutex_unlock(&priv->port_mutex); + queue_delayed_work(mlx4_wq , &sense->sense_poll, + round_jiffies_relative(MLX4_SENSE_RANGE)); +} + +void mlx4_start_sense(struct mlx4_dev *dev) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_sense *sense = &priv->sense; + + if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_DPDP)) + return; + + queue_delayed_work(mlx4_wq , &sense->sense_poll, + round_jiffies_relative(MLX4_SENSE_RANGE)); +} + +void mlx4_stop_sense(struct mlx4_dev *dev) +{ + cancel_delayed_work_sync(&mlx4_priv(dev)->sense.sense_poll); +} + +void mlx4_sense_init(struct mlx4_dev *dev) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_sense *sense = &priv->sense; + int port; + + sense->dev = dev; + for (port = 1; port <= dev->caps.num_ports; port++) + sense->do_sense_port[port] = 1; + + INIT_DEFERRABLE_WORK(&sense->sense_poll, mlx4_sense_port); +} diff --git a/drivers/net/ethernet/mellanox/mlx4/srq.c b/drivers/net/ethernet/mellanox/mlx4/srq.c new file mode 100644 index 000000000..dd890f5d7 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx4/srq.c @@ -0,0 +1,304 @@ +/* + * Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + + +#include <linux/mlx4/cmd.h> +#include <linux/mlx4/srq.h> +#include <linux/export.h> +#include <linux/gfp.h> + +#include "mlx4.h" +#include "icm.h" + +void mlx4_srq_event(struct mlx4_dev *dev, u32 srqn, int event_type) +{ + struct mlx4_srq_table *srq_table = &mlx4_priv(dev)->srq_table; + struct mlx4_srq *srq; + + rcu_read_lock(); + srq = radix_tree_lookup(&srq_table->tree, srqn & (dev->caps.num_srqs - 1)); + rcu_read_unlock(); + if (srq) + refcount_inc(&srq->refcount); + else { + mlx4_warn(dev, "Async event for bogus SRQ %08x\n", srqn); + return; + } + + srq->event(srq, event_type); + + if (refcount_dec_and_test(&srq->refcount)) + complete(&srq->free); +} + +static int mlx4_SW2HW_SRQ(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox, + int srq_num) +{ + return mlx4_cmd(dev, mailbox->dma, srq_num, 0, + MLX4_CMD_SW2HW_SRQ, MLX4_CMD_TIME_CLASS_A, + MLX4_CMD_WRAPPED); +} + +static int mlx4_HW2SW_SRQ(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox, + int srq_num) +{ + return mlx4_cmd_box(dev, 0, mailbox ? mailbox->dma : 0, srq_num, + mailbox ? 0 : 1, MLX4_CMD_HW2SW_SRQ, + MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); +} + +static int mlx4_ARM_SRQ(struct mlx4_dev *dev, int srq_num, int limit_watermark) +{ + return mlx4_cmd(dev, limit_watermark, srq_num, 0, MLX4_CMD_ARM_SRQ, + MLX4_CMD_TIME_CLASS_B, MLX4_CMD_WRAPPED); +} + +static int mlx4_QUERY_SRQ(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox, + int srq_num) +{ + return mlx4_cmd_box(dev, 0, mailbox->dma, srq_num, 0, MLX4_CMD_QUERY_SRQ, + MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); +} + +int __mlx4_srq_alloc_icm(struct mlx4_dev *dev, int *srqn) +{ + struct mlx4_srq_table *srq_table = &mlx4_priv(dev)->srq_table; + int err; + + + *srqn = mlx4_bitmap_alloc(&srq_table->bitmap); + if (*srqn == -1) + return -ENOMEM; + + err = mlx4_table_get(dev, &srq_table->table, *srqn); + if (err) + goto err_out; + + err = mlx4_table_get(dev, &srq_table->cmpt_table, *srqn); + if (err) + goto err_put; + return 0; + +err_put: + mlx4_table_put(dev, &srq_table->table, *srqn); + +err_out: + mlx4_bitmap_free(&srq_table->bitmap, *srqn, MLX4_NO_RR); + return err; +} + +static int mlx4_srq_alloc_icm(struct mlx4_dev *dev, int *srqn) +{ + u64 out_param; + int err; + + if (mlx4_is_mfunc(dev)) { + err = mlx4_cmd_imm(dev, 0, &out_param, RES_SRQ, + RES_OP_RESERVE_AND_MAP, + MLX4_CMD_ALLOC_RES, + MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); + if (!err) + *srqn = get_param_l(&out_param); + + return err; + } + return __mlx4_srq_alloc_icm(dev, srqn); +} + +void __mlx4_srq_free_icm(struct mlx4_dev *dev, int srqn) +{ + struct mlx4_srq_table *srq_table = &mlx4_priv(dev)->srq_table; + + mlx4_table_put(dev, &srq_table->cmpt_table, srqn); + mlx4_table_put(dev, &srq_table->table, srqn); + mlx4_bitmap_free(&srq_table->bitmap, srqn, MLX4_NO_RR); +} + +static void mlx4_srq_free_icm(struct mlx4_dev *dev, int srqn) +{ + u64 in_param = 0; + + if (mlx4_is_mfunc(dev)) { + set_param_l(&in_param, srqn); + if (mlx4_cmd(dev, in_param, RES_SRQ, RES_OP_RESERVE_AND_MAP, + MLX4_CMD_FREE_RES, + MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED)) + mlx4_warn(dev, "Failed freeing cq:%d\n", srqn); + return; + } + __mlx4_srq_free_icm(dev, srqn); +} + +int mlx4_srq_alloc(struct mlx4_dev *dev, u32 pdn, u32 cqn, u16 xrcd, + struct mlx4_mtt *mtt, u64 db_rec, struct mlx4_srq *srq) +{ + struct mlx4_srq_table *srq_table = &mlx4_priv(dev)->srq_table; + struct mlx4_cmd_mailbox *mailbox; + struct mlx4_srq_context *srq_context; + u64 mtt_addr; + int err; + + err = mlx4_srq_alloc_icm(dev, &srq->srqn); + if (err) + return err; + + spin_lock_irq(&srq_table->lock); + err = radix_tree_insert(&srq_table->tree, srq->srqn, srq); + spin_unlock_irq(&srq_table->lock); + if (err) + goto err_icm; + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) { + err = PTR_ERR(mailbox); + goto err_radix; + } + + srq_context = mailbox->buf; + srq_context->state_logsize_srqn = cpu_to_be32((ilog2(srq->max) << 24) | + srq->srqn); + srq_context->logstride = srq->wqe_shift - 4; + srq_context->xrcd = cpu_to_be16(xrcd); + srq_context->pg_offset_cqn = cpu_to_be32(cqn & 0xffffff); + srq_context->log_page_size = mtt->page_shift - MLX4_ICM_PAGE_SHIFT; + + mtt_addr = mlx4_mtt_addr(dev, mtt); + srq_context->mtt_base_addr_h = mtt_addr >> 32; + srq_context->mtt_base_addr_l = cpu_to_be32(mtt_addr & 0xffffffff); + srq_context->pd = cpu_to_be32(pdn); + srq_context->db_rec_addr = cpu_to_be64(db_rec); + + err = mlx4_SW2HW_SRQ(dev, mailbox, srq->srqn); + mlx4_free_cmd_mailbox(dev, mailbox); + if (err) + goto err_radix; + + refcount_set(&srq->refcount, 1); + init_completion(&srq->free); + + return 0; + +err_radix: + spin_lock_irq(&srq_table->lock); + radix_tree_delete(&srq_table->tree, srq->srqn); + spin_unlock_irq(&srq_table->lock); + +err_icm: + mlx4_srq_free_icm(dev, srq->srqn); + return err; +} +EXPORT_SYMBOL_GPL(mlx4_srq_alloc); + +void mlx4_srq_free(struct mlx4_dev *dev, struct mlx4_srq *srq) +{ + struct mlx4_srq_table *srq_table = &mlx4_priv(dev)->srq_table; + int err; + + err = mlx4_HW2SW_SRQ(dev, NULL, srq->srqn); + if (err) + mlx4_warn(dev, "HW2SW_SRQ failed (%d) for SRQN %06x\n", err, srq->srqn); + + spin_lock_irq(&srq_table->lock); + radix_tree_delete(&srq_table->tree, srq->srqn); + spin_unlock_irq(&srq_table->lock); + + if (refcount_dec_and_test(&srq->refcount)) + complete(&srq->free); + wait_for_completion(&srq->free); + + mlx4_srq_free_icm(dev, srq->srqn); +} +EXPORT_SYMBOL_GPL(mlx4_srq_free); + +int mlx4_srq_arm(struct mlx4_dev *dev, struct mlx4_srq *srq, int limit_watermark) +{ + return mlx4_ARM_SRQ(dev, srq->srqn, limit_watermark); +} +EXPORT_SYMBOL_GPL(mlx4_srq_arm); + +int mlx4_srq_query(struct mlx4_dev *dev, struct mlx4_srq *srq, int *limit_watermark) +{ + struct mlx4_cmd_mailbox *mailbox; + struct mlx4_srq_context *srq_context; + int err; + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + + srq_context = mailbox->buf; + + err = mlx4_QUERY_SRQ(dev, mailbox, srq->srqn); + if (err) + goto err_out; + *limit_watermark = be16_to_cpu(srq_context->limit_watermark); + +err_out: + mlx4_free_cmd_mailbox(dev, mailbox); + return err; +} +EXPORT_SYMBOL_GPL(mlx4_srq_query); + +int mlx4_init_srq_table(struct mlx4_dev *dev) +{ + struct mlx4_srq_table *srq_table = &mlx4_priv(dev)->srq_table; + + spin_lock_init(&srq_table->lock); + INIT_RADIX_TREE(&srq_table->tree, GFP_ATOMIC); + if (mlx4_is_slave(dev)) + return 0; + + return mlx4_bitmap_init(&srq_table->bitmap, dev->caps.num_srqs, + dev->caps.num_srqs - 1, dev->caps.reserved_srqs, 0); +} + +void mlx4_cleanup_srq_table(struct mlx4_dev *dev) +{ + if (mlx4_is_slave(dev)) + return; + mlx4_bitmap_cleanup(&mlx4_priv(dev)->srq_table.bitmap); +} + +struct mlx4_srq *mlx4_srq_lookup(struct mlx4_dev *dev, u32 srqn) +{ + struct mlx4_srq_table *srq_table = &mlx4_priv(dev)->srq_table; + struct mlx4_srq *srq; + + rcu_read_lock(); + srq = radix_tree_lookup(&srq_table->tree, + srqn & (dev->caps.num_srqs - 1)); + rcu_read_unlock(); + + return srq; +} +EXPORT_SYMBOL_GPL(mlx4_srq_lookup); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig new file mode 100644 index 000000000..26685fd0f --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig @@ -0,0 +1,190 @@ +# SPDX-License-Identifier: GPL-2.0-only +# +# Mellanox driver configuration +# + +config MLX5_CORE + tristate "Mellanox 5th generation network adapters (ConnectX series) core driver" + depends on PCI + select AUXILIARY_BUS + select NET_DEVLINK + depends on VXLAN || !VXLAN + depends on MLXFW || !MLXFW + depends on PTP_1588_CLOCK_OPTIONAL + depends on PCI_HYPERV_INTERFACE || !PCI_HYPERV_INTERFACE + help + Core driver for low level functionality of the ConnectX-4 and + Connect-IB cards by Mellanox Technologies. + +config MLX5_FPGA + bool "Mellanox Technologies Innova support" + depends on MLX5_CORE + help + Build support for the Innova family of network cards by Mellanox + Technologies. Innova network cards are comprised of a ConnectX chip + and an FPGA chip on one board. If you select this option, the + mlx5_core driver will include the Innova FPGA core and allow building + sandbox-specific client drivers. + +config MLX5_CORE_EN + bool "Mellanox 5th generation network adapters (ConnectX series) Ethernet support" + depends on NETDEVICES && ETHERNET && INET && PCI && MLX5_CORE + select PAGE_POOL + select DIMLIB + help + Ethernet support in Mellanox Technologies ConnectX-4 NIC. + +config MLX5_EN_ARFS + bool "Mellanox MLX5 ethernet accelerated receive flow steering (ARFS) support" + depends on MLX5_CORE_EN && RFS_ACCEL + default y + help + Mellanox MLX5 ethernet hardware-accelerated receive flow steering support, + Enables ethernet netdevice arfs support and ntuple filtering. + +config MLX5_EN_RXNFC + bool "Mellanox MLX5 ethernet rx nfc flow steering support" + depends on MLX5_CORE_EN + default y + help + Mellanox MLX5 ethernet rx nfc flow steering support + Enables ethtool receive network flow classification, which allows user defined + flow rules to direct traffic into arbitrary rx queue via ethtool set/get_rxnfc + API. + +config MLX5_MPFS + bool "Mellanox Technologies MLX5 MPFS support" + depends on MLX5_CORE_EN + default y + help + Mellanox Technologies Ethernet Multi-Physical Function Switch (MPFS) + support in ConnectX NIC. MPFs is required for when multi-PF configuration + is enabled to allow passing user configured unicast MAC addresses to the + requesting PF. + +config MLX5_ESWITCH + bool "Mellanox Technologies MLX5 SRIOV E-Switch support" + depends on MLX5_CORE_EN && NET_SWITCHDEV + default y + help + Mellanox Technologies Ethernet SRIOV E-Switch support in ConnectX NIC. + E-Switch provides internal SRIOV packet steering and switching for the + enabled VFs and PF in two available modes: + Legacy SRIOV mode (L2 mac vlan steering based). + Switchdev mode (eswitch offloads). + +config MLX5_BRIDGE + bool + depends on MLX5_ESWITCH && BRIDGE + default y + help + mlx5 ConnectX offloads support for Ethernet Bridging (BRIDGE). + Enable adding representors of mlx5 uplink and VF ports to Bridge and + offloading rules for traffic between such ports. Supports VLANs (trunk and + access modes). + +config MLX5_CLS_ACT + bool "MLX5 TC classifier action support" + depends on MLX5_ESWITCH && NET_CLS_ACT + default y + help + mlx5 ConnectX offloads support for TC classifier action (NET_CLS_ACT), + works in both native NIC mode and Switchdev SRIOV mode. + Actions get attached to a Hardware offloaded classifiers and are + invoked after a successful classification. Actions are used to + overwrite the classification result, instantly drop or redirect and/or + reformat packets in wire speeds without involving the host cpu. + + If set to N, TC offloads in both NIC and switchdev modes will be disabled. + If unsure, set to Y + +config MLX5_TC_CT + bool "MLX5 TC connection tracking offload support" + depends on MLX5_CLS_ACT && NF_FLOW_TABLE && NET_ACT_CT && NET_TC_SKB_EXT + default y + help + Say Y here if you want to support offloading connection tracking rules + via tc ct action. + + If unsure, set to Y + +config MLX5_TC_SAMPLE + bool "MLX5 TC sample offload support" + depends on MLX5_CLS_ACT + depends on PSAMPLE=y || PSAMPLE=n || MLX5_CORE=m + default y + help + Say Y here if you want to support offloading sample rules via tc + sample action. + If set to N, will not be able to configure tc rules with sample + action. + + If unsure, set to Y + +config MLX5_CORE_EN_DCB + bool "Data Center Bridging (DCB) Support" + default y + depends on MLX5_CORE_EN && DCB + help + Say Y here if you want to use Data Center Bridging (DCB) in the + driver. + If set to N, will not be able to configure QoS and ratelimit attributes. + This flag is depended on the kernel's DCB support. + + If unsure, set to Y + +config MLX5_CORE_IPOIB + bool "Mellanox 5th generation network adapters (connectX series) IPoIB offloads support" + depends on MLX5_CORE_EN + help + MLX5 IPoIB offloads & acceleration support. + +config MLX5_EN_MACSEC + bool "Connect-X support for MACSec offload" + depends on MLX5_CORE_EN + depends on MACSEC + default n + help + Build support for MACsec cryptography-offload acceleration in the NIC. + +config MLX5_EN_IPSEC + bool "Mellanox Technologies IPsec Connect-X support" + depends on MLX5_CORE_EN + depends on XFRM_OFFLOAD + depends on INET_ESP_OFFLOAD || INET6_ESP_OFFLOAD + help + Build support for IPsec cryptography-offload acceleration in the NIC. + +config MLX5_EN_TLS + bool "Mellanox Technologies TLS Connect-X support" + depends on TLS_DEVICE + depends on TLS=y || MLX5_CORE=m + depends on MLX5_CORE_EN + help + Build support for TLS cryptography-offload acceleration in the NIC. + +config MLX5_SW_STEERING + bool "Mellanox Technologies software-managed steering" + depends on MLX5_CORE_EN && MLX5_ESWITCH + select CRC32 + default y + help + Build support for software-managed steering in the NIC. + +config MLX5_SF + bool "Mellanox Technologies subfunction device support using auxiliary device" + depends on MLX5_CORE && MLX5_CORE_EN + help + Build support for subfuction device in the NIC. A Mellanox subfunction + device can support RDMA, netdevice and vdpa device. + It is similar to a SRIOV VF but it doesn't require SRIOV support. + +config MLX5_SF_MANAGER + bool + depends on MLX5_SF && MLX5_ESWITCH + default y + help + Build support for subfuction port in the NIC. A Mellanox subfunction + port is managed through devlink. A subfunction supports RDMA, netdevice + and vdpa device. It is similar to a SRIOV VF but it doesn't require + SRIOV support. diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile new file mode 100644 index 000000000..a22c32aab --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -0,0 +1,123 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Makefile for Mellanox 5th generation network adapters +# (ConnectX series) core & netdev driver +# + +subdir-ccflags-y += -I$(src) + +obj-$(CONFIG_MLX5_CORE) += mlx5_core.o + +# +# mlx5 core basic +# +mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \ + health.o mcg.o cq.o alloc.o port.o mr.o pd.o \ + transobj.o vport.o sriov.o fs_cmd.o fs_core.o pci_irq.o \ + fs_counters.o fs_ft_pool.o rl.o lag/debugfs.o lag/lag.o dev.o events.o wq.o lib/gid.o \ + lib/devcom.o lib/pci_vsc.o lib/dm.o lib/fs_ttc.o diag/fs_tracepoint.o \ + diag/fw_tracer.o diag/crdump.o devlink.o diag/rsc_dump.o \ + fw_reset.o qos.o lib/tout.o lib/aso.o + +# +# Netdev basic +# +mlx5_core-$(CONFIG_MLX5_CORE_EN) += en/rqt.o en/tir.o en/rss.o en/rx_res.o \ + en/channels.o en_main.o en_common.o en_fs.o en_ethtool.o \ + en_tx.o en_rx.o en_dim.o en_txrx.o en/xdp.o en_stats.o \ + en_selftest.o en/port.o en/monitor_stats.o en/health.o \ + en/reporter_tx.o en/reporter_rx.o en/params.o en/xsk/pool.o \ + en/xsk/setup.o en/xsk/rx.o en/xsk/tx.o en/devlink.o en/ptp.o \ + en/qos.o en/htb.o en/trap.o en/fs_tt_redirect.o en/selq.o \ + lib/crypto.o + +# +# Netdev extra +# +mlx5_core-$(CONFIG_MLX5_EN_ARFS) += en_arfs.o +mlx5_core-$(CONFIG_MLX5_EN_RXNFC) += en_fs_ethtool.o +mlx5_core-$(CONFIG_MLX5_CORE_EN_DCB) += en_dcbnl.o en/port_buffer.o +mlx5_core-$(CONFIG_PCI_HYPERV_INTERFACE) += en/hv_vhca_stats.o +mlx5_core-$(CONFIG_MLX5_ESWITCH) += lag/mp.o lag/port_sel.o lib/geneve.o lib/port_tun.o \ + en_rep.o en/rep/bond.o en/mod_hdr.o \ + en/mapping.o lag/mpesw.o +mlx5_core-$(CONFIG_MLX5_CLS_ACT) += en_tc.o en/rep/tc.o en/rep/neigh.o \ + lib/fs_chains.o en/tc_tun.o \ + esw/indir_table.o en/tc_tun_encap.o \ + en/tc_tun_vxlan.o en/tc_tun_gre.o en/tc_tun_geneve.o \ + en/tc_tun_mplsoudp.o diag/en_tc_tracepoint.o \ + en/tc/post_act.o en/tc/int_port.o en/tc/meter.o \ + en/tc/post_meter.o + +mlx5_core-$(CONFIG_MLX5_CLS_ACT) += en/tc/act/act.o en/tc/act/drop.o en/tc/act/trap.o \ + en/tc/act/accept.o en/tc/act/mark.o en/tc/act/goto.o \ + en/tc/act/tun.o en/tc/act/csum.o en/tc/act/pedit.o \ + en/tc/act/vlan.o en/tc/act/vlan_mangle.o en/tc/act/mpls.o \ + en/tc/act/mirred.o en/tc/act/mirred_nic.o \ + en/tc/act/ct.o en/tc/act/sample.o en/tc/act/ptype.o \ + en/tc/act/redirect_ingress.o en/tc/act/police.o + +ifneq ($(CONFIG_MLX5_TC_CT),) + mlx5_core-y += en/tc_ct.o en/tc/ct_fs_dmfs.o + mlx5_core-$(CONFIG_MLX5_SW_STEERING) += en/tc/ct_fs_smfs.o +endif + +mlx5_core-$(CONFIG_MLX5_TC_SAMPLE) += en/tc/sample.o + +# +# Core extra +# +mlx5_core-$(CONFIG_MLX5_ESWITCH) += eswitch.o eswitch_offloads.o eswitch_offloads_termtbl.o \ + ecpf.o rdma.o esw/legacy.o \ + esw/debugfs.o esw/devlink_port.o esw/vporttbl.o esw/qos.o + +mlx5_core-$(CONFIG_MLX5_ESWITCH) += esw/acl/helper.o \ + esw/acl/egress_lgcy.o esw/acl/egress_ofld.o \ + esw/acl/ingress_lgcy.o esw/acl/ingress_ofld.o + +mlx5_core-$(CONFIG_MLX5_BRIDGE) += esw/bridge.o en/rep/bridge.o + +mlx5_core-$(CONFIG_MLX5_MPFS) += lib/mpfs.o +mlx5_core-$(CONFIG_VXLAN) += lib/vxlan.o +mlx5_core-$(CONFIG_PTP_1588_CLOCK) += lib/clock.o +mlx5_core-$(CONFIG_PCI_HYPERV_INTERFACE) += lib/hv.o lib/hv_vhca.o + +# +# Ipoib netdev +# +mlx5_core-$(CONFIG_MLX5_CORE_IPOIB) += ipoib/ipoib.o ipoib/ethtool.o ipoib/ipoib_vlan.o + +# +# Accelerations & FPGA +# +mlx5_core-$(CONFIG_MLX5_FPGA) += fpga/cmd.o fpga/core.o fpga/conn.o fpga/sdk.o + +mlx5_core-$(CONFIG_MLX5_EN_MACSEC) += en_accel/macsec.o en_accel/macsec_fs.o \ + en_accel/macsec_stats.o + +mlx5_core-$(CONFIG_MLX5_EN_IPSEC) += en_accel/ipsec.o en_accel/ipsec_rxtx.o \ + en_accel/ipsec_stats.o en_accel/ipsec_fs.o \ + en_accel/ipsec_offload.o + +mlx5_core-$(CONFIG_MLX5_EN_TLS) += en_accel/ktls_stats.o \ + en_accel/fs_tcp.o en_accel/ktls.o en_accel/ktls_txrx.o \ + en_accel/ktls_tx.o en_accel/ktls_rx.o + +mlx5_core-$(CONFIG_MLX5_SW_STEERING) += steering/dr_domain.o steering/dr_table.o \ + steering/dr_matcher.o steering/dr_rule.o \ + steering/dr_icm_pool.o steering/dr_buddy.o \ + steering/dr_ste.o steering/dr_send.o \ + steering/dr_ste_v0.o steering/dr_ste_v1.o \ + steering/dr_ste_v2.o \ + steering/dr_cmd.o steering/dr_fw.o \ + steering/dr_action.o steering/fs_dr.o \ + steering/dr_dbg.o lib/smfs.o +# +# SF device +# +mlx5_core-$(CONFIG_MLX5_SF) += sf/vhca_event.o sf/dev/dev.o sf/dev/driver.o irq_affinity.o + +# +# SF manager +# +mlx5_core-$(CONFIG_MLX5_SF_MANAGER) += sf/cmd.o sf/hw_table.o sf/devlink.o diff --git a/drivers/net/ethernet/mellanox/mlx5/core/alloc.c b/drivers/net/ethernet/mellanox/mlx5/core/alloc.c new file mode 100644 index 000000000..6aca004e8 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/alloc.c @@ -0,0 +1,250 @@ +/* + * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/errno.h> +#include <linux/slab.h> +#include <linux/mm.h> +#include <linux/export.h> +#include <linux/bitmap.h> +#include <linux/dma-mapping.h> +#include <linux/vmalloc.h> +#include <linux/mlx5/driver.h> + +#include "mlx5_core.h" + +struct mlx5_db_pgdir { + struct list_head list; + unsigned long *bitmap; + __be32 *db_page; + dma_addr_t db_dma; +}; + +/* Handling for queue buffers -- we allocate a bunch of memory and + * register it in a memory region at HCA virtual address 0. + */ + +static void *mlx5_dma_zalloc_coherent_node(struct mlx5_core_dev *dev, + size_t size, dma_addr_t *dma_handle, + int node) +{ + struct device *device = mlx5_core_dma_dev(dev); + struct mlx5_priv *priv = &dev->priv; + int original_node; + void *cpu_handle; + + mutex_lock(&priv->alloc_mutex); + original_node = dev_to_node(device); + set_dev_node(device, node); + cpu_handle = dma_alloc_coherent(device, size, dma_handle, + GFP_KERNEL); + set_dev_node(device, original_node); + mutex_unlock(&priv->alloc_mutex); + return cpu_handle; +} + +int mlx5_frag_buf_alloc_node(struct mlx5_core_dev *dev, int size, + struct mlx5_frag_buf *buf, int node) +{ + int i; + + buf->size = size; + buf->npages = DIV_ROUND_UP(size, PAGE_SIZE); + buf->page_shift = PAGE_SHIFT; + buf->frags = kcalloc(buf->npages, sizeof(struct mlx5_buf_list), + GFP_KERNEL); + if (!buf->frags) + goto err_out; + + for (i = 0; i < buf->npages; i++) { + struct mlx5_buf_list *frag = &buf->frags[i]; + int frag_sz = min_t(int, size, PAGE_SIZE); + + frag->buf = mlx5_dma_zalloc_coherent_node(dev, frag_sz, + &frag->map, node); + if (!frag->buf) + goto err_free_buf; + if (frag->map & ((1 << buf->page_shift) - 1)) { + dma_free_coherent(mlx5_core_dma_dev(dev), frag_sz, + buf->frags[i].buf, buf->frags[i].map); + mlx5_core_warn(dev, "unexpected map alignment: %pad, page_shift=%d\n", + &frag->map, buf->page_shift); + goto err_free_buf; + } + size -= frag_sz; + } + + return 0; + +err_free_buf: + while (i--) + dma_free_coherent(mlx5_core_dma_dev(dev), PAGE_SIZE, buf->frags[i].buf, + buf->frags[i].map); + kfree(buf->frags); +err_out: + return -ENOMEM; +} +EXPORT_SYMBOL_GPL(mlx5_frag_buf_alloc_node); + +void mlx5_frag_buf_free(struct mlx5_core_dev *dev, struct mlx5_frag_buf *buf) +{ + int size = buf->size; + int i; + + for (i = 0; i < buf->npages; i++) { + int frag_sz = min_t(int, size, PAGE_SIZE); + + dma_free_coherent(mlx5_core_dma_dev(dev), frag_sz, buf->frags[i].buf, + buf->frags[i].map); + size -= frag_sz; + } + kfree(buf->frags); +} +EXPORT_SYMBOL_GPL(mlx5_frag_buf_free); + +static struct mlx5_db_pgdir *mlx5_alloc_db_pgdir(struct mlx5_core_dev *dev, + int node) +{ + u32 db_per_page = PAGE_SIZE / cache_line_size(); + struct mlx5_db_pgdir *pgdir; + + pgdir = kzalloc_node(sizeof(*pgdir), GFP_KERNEL, node); + if (!pgdir) + return NULL; + + pgdir->bitmap = bitmap_zalloc_node(db_per_page, GFP_KERNEL, node); + if (!pgdir->bitmap) { + kfree(pgdir); + return NULL; + } + + bitmap_fill(pgdir->bitmap, db_per_page); + + pgdir->db_page = mlx5_dma_zalloc_coherent_node(dev, PAGE_SIZE, + &pgdir->db_dma, node); + if (!pgdir->db_page) { + bitmap_free(pgdir->bitmap); + kfree(pgdir); + return NULL; + } + + return pgdir; +} + +static int mlx5_alloc_db_from_pgdir(struct mlx5_db_pgdir *pgdir, + struct mlx5_db *db) +{ + u32 db_per_page = PAGE_SIZE / cache_line_size(); + int offset; + int i; + + i = find_first_bit(pgdir->bitmap, db_per_page); + if (i >= db_per_page) + return -ENOMEM; + + __clear_bit(i, pgdir->bitmap); + + db->u.pgdir = pgdir; + db->index = i; + offset = db->index * cache_line_size(); + db->db = pgdir->db_page + offset / sizeof(*pgdir->db_page); + db->dma = pgdir->db_dma + offset; + + db->db[0] = 0; + db->db[1] = 0; + + return 0; +} + +int mlx5_db_alloc_node(struct mlx5_core_dev *dev, struct mlx5_db *db, int node) +{ + struct mlx5_db_pgdir *pgdir; + int ret = 0; + + mutex_lock(&dev->priv.pgdir_mutex); + + list_for_each_entry(pgdir, &dev->priv.pgdir_list, list) + if (!mlx5_alloc_db_from_pgdir(pgdir, db)) + goto out; + + pgdir = mlx5_alloc_db_pgdir(dev, node); + if (!pgdir) { + ret = -ENOMEM; + goto out; + } + + list_add(&pgdir->list, &dev->priv.pgdir_list); + + /* This should never fail -- we just allocated an empty page: */ + WARN_ON(mlx5_alloc_db_from_pgdir(pgdir, db)); + +out: + mutex_unlock(&dev->priv.pgdir_mutex); + + return ret; +} +EXPORT_SYMBOL_GPL(mlx5_db_alloc_node); + +void mlx5_db_free(struct mlx5_core_dev *dev, struct mlx5_db *db) +{ + u32 db_per_page = PAGE_SIZE / cache_line_size(); + + mutex_lock(&dev->priv.pgdir_mutex); + + __set_bit(db->index, db->u.pgdir->bitmap); + + if (bitmap_full(db->u.pgdir->bitmap, db_per_page)) { + dma_free_coherent(mlx5_core_dma_dev(dev), PAGE_SIZE, + db->u.pgdir->db_page, db->u.pgdir->db_dma); + list_del(&db->u.pgdir->list); + bitmap_free(db->u.pgdir->bitmap); + kfree(db->u.pgdir); + } + + mutex_unlock(&dev->priv.pgdir_mutex); +} +EXPORT_SYMBOL_GPL(mlx5_db_free); + +void mlx5_fill_page_frag_array_perm(struct mlx5_frag_buf *buf, __be64 *pas, u8 perm) +{ + int i; + + WARN_ON(perm & 0xfc); + for (i = 0; i < buf->npages; i++) + pas[i] = cpu_to_be64(buf->frags[i].map | perm); +} +EXPORT_SYMBOL_GPL(mlx5_fill_page_frag_array_perm); + +void mlx5_fill_page_frag_array(struct mlx5_frag_buf *buf, __be64 *pas) +{ + mlx5_fill_page_frag_array_perm(buf, pas, 0); +} +EXPORT_SYMBOL_GPL(mlx5_fill_page_frag_array); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c new file mode 100644 index 000000000..ac6a0785b --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -0,0 +1,2311 @@ +/* + * Copyright (c) 2013-2016, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/highmem.h> +#include <linux/errno.h> +#include <linux/pci.h> +#include <linux/dma-mapping.h> +#include <linux/slab.h> +#include <linux/delay.h> +#include <linux/random.h> +#include <linux/io-mapping.h> +#include <linux/mlx5/driver.h> +#include <linux/mlx5/eq.h> +#include <linux/debugfs.h> + +#include "mlx5_core.h" +#include "lib/eq.h" +#include "lib/tout.h" +#define CREATE_TRACE_POINTS +#include "diag/cmd_tracepoint.h" + +struct mlx5_ifc_mbox_out_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_at_40[0x40]; +}; + +struct mlx5_ifc_mbox_in_bits { + u8 opcode[0x10]; + u8 uid[0x10]; + + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + + u8 reserved_at_40[0x40]; +}; + +enum { + CMD_IF_REV = 5, +}; + +enum { + CMD_MODE_POLLING, + CMD_MODE_EVENTS +}; + +enum { + MLX5_CMD_DELIVERY_STAT_OK = 0x0, + MLX5_CMD_DELIVERY_STAT_SIGNAT_ERR = 0x1, + MLX5_CMD_DELIVERY_STAT_TOK_ERR = 0x2, + MLX5_CMD_DELIVERY_STAT_BAD_BLK_NUM_ERR = 0x3, + MLX5_CMD_DELIVERY_STAT_OUT_PTR_ALIGN_ERR = 0x4, + MLX5_CMD_DELIVERY_STAT_IN_PTR_ALIGN_ERR = 0x5, + MLX5_CMD_DELIVERY_STAT_FW_ERR = 0x6, + MLX5_CMD_DELIVERY_STAT_IN_LENGTH_ERR = 0x7, + MLX5_CMD_DELIVERY_STAT_OUT_LENGTH_ERR = 0x8, + MLX5_CMD_DELIVERY_STAT_RES_FLD_NOT_CLR_ERR = 0x9, + MLX5_CMD_DELIVERY_STAT_CMD_DESCR_ERR = 0x10, +}; + +static u16 in_to_opcode(void *in) +{ + return MLX5_GET(mbox_in, in, opcode); +} + +/* Returns true for opcodes that might be triggered very frequently and throttle + * the command interface. Limit their command slots usage. + */ +static bool mlx5_cmd_is_throttle_opcode(u16 op) +{ + switch (op) { + case MLX5_CMD_OP_CREATE_GENERAL_OBJECT: + case MLX5_CMD_OP_DESTROY_GENERAL_OBJECT: + case MLX5_CMD_OP_MODIFY_GENERAL_OBJECT: + case MLX5_CMD_OP_QUERY_GENERAL_OBJECT: + return true; + } + return false; +} + +static struct mlx5_cmd_work_ent * +cmd_alloc_ent(struct mlx5_cmd *cmd, struct mlx5_cmd_msg *in, + struct mlx5_cmd_msg *out, void *uout, int uout_size, + mlx5_cmd_cbk_t cbk, void *context, int page_queue) +{ + gfp_t alloc_flags = cbk ? GFP_ATOMIC : GFP_KERNEL; + struct mlx5_cmd_work_ent *ent; + + ent = kzalloc(sizeof(*ent), alloc_flags); + if (!ent) + return ERR_PTR(-ENOMEM); + + ent->idx = -EINVAL; + ent->in = in; + ent->out = out; + ent->uout = uout; + ent->uout_size = uout_size; + ent->callback = cbk; + ent->context = context; + ent->cmd = cmd; + ent->page_queue = page_queue; + ent->op = in_to_opcode(in->first.data); + refcount_set(&ent->refcnt, 1); + + return ent; +} + +static void cmd_free_ent(struct mlx5_cmd_work_ent *ent) +{ + kfree(ent); +} + +static u8 alloc_token(struct mlx5_cmd *cmd) +{ + u8 token; + + spin_lock(&cmd->token_lock); + cmd->token++; + if (cmd->token == 0) + cmd->token++; + token = cmd->token; + spin_unlock(&cmd->token_lock); + + return token; +} + +static int cmd_alloc_index(struct mlx5_cmd *cmd, struct mlx5_cmd_work_ent *ent) +{ + unsigned long flags; + int ret; + + spin_lock_irqsave(&cmd->alloc_lock, flags); + ret = find_first_bit(&cmd->vars.bitmask, cmd->vars.max_reg_cmds); + if (ret < cmd->vars.max_reg_cmds) { + clear_bit(ret, &cmd->vars.bitmask); + ent->idx = ret; + cmd->ent_arr[ent->idx] = ent; + } + spin_unlock_irqrestore(&cmd->alloc_lock, flags); + + return ret < cmd->vars.max_reg_cmds ? ret : -ENOMEM; +} + +static void cmd_free_index(struct mlx5_cmd *cmd, int idx) +{ + lockdep_assert_held(&cmd->alloc_lock); + set_bit(idx, &cmd->vars.bitmask); +} + +static void cmd_ent_get(struct mlx5_cmd_work_ent *ent) +{ + refcount_inc(&ent->refcnt); +} + +static void cmd_ent_put(struct mlx5_cmd_work_ent *ent) +{ + struct mlx5_cmd *cmd = ent->cmd; + unsigned long flags; + + spin_lock_irqsave(&cmd->alloc_lock, flags); + if (!refcount_dec_and_test(&ent->refcnt)) + goto out; + + if (ent->idx >= 0) { + cmd_free_index(cmd, ent->idx); + up(ent->page_queue ? &cmd->vars.pages_sem : &cmd->vars.sem); + } + + cmd_free_ent(ent); +out: + spin_unlock_irqrestore(&cmd->alloc_lock, flags); +} + +static struct mlx5_cmd_layout *get_inst(struct mlx5_cmd *cmd, int idx) +{ + return cmd->cmd_buf + (idx << cmd->vars.log_stride); +} + +static int mlx5_calc_cmd_blocks(struct mlx5_cmd_msg *msg) +{ + int size = msg->len; + int blen = size - min_t(int, sizeof(msg->first.data), size); + + return DIV_ROUND_UP(blen, MLX5_CMD_DATA_BLOCK_SIZE); +} + +static u8 xor8_buf(void *buf, size_t offset, int len) +{ + u8 *ptr = buf; + u8 sum = 0; + int i; + int end = len + offset; + + for (i = offset; i < end; i++) + sum ^= ptr[i]; + + return sum; +} + +static int verify_block_sig(struct mlx5_cmd_prot_block *block) +{ + size_t rsvd0_off = offsetof(struct mlx5_cmd_prot_block, rsvd0); + int xor_len = sizeof(*block) - sizeof(block->data) - 1; + + if (xor8_buf(block, rsvd0_off, xor_len) != 0xff) + return -EHWPOISON; + + if (xor8_buf(block, 0, sizeof(*block)) != 0xff) + return -EHWPOISON; + + return 0; +} + +static void calc_block_sig(struct mlx5_cmd_prot_block *block) +{ + int ctrl_xor_len = sizeof(*block) - sizeof(block->data) - 2; + size_t rsvd0_off = offsetof(struct mlx5_cmd_prot_block, rsvd0); + + block->ctrl_sig = ~xor8_buf(block, rsvd0_off, ctrl_xor_len); + block->sig = ~xor8_buf(block, 0, sizeof(*block) - 1); +} + +static void calc_chain_sig(struct mlx5_cmd_msg *msg) +{ + struct mlx5_cmd_mailbox *next = msg->next; + int n = mlx5_calc_cmd_blocks(msg); + int i = 0; + + for (i = 0; i < n && next; i++) { + calc_block_sig(next->buf); + next = next->next; + } +} + +static void set_signature(struct mlx5_cmd_work_ent *ent, int csum) +{ + ent->lay->sig = ~xor8_buf(ent->lay, 0, sizeof(*ent->lay)); + if (csum) { + calc_chain_sig(ent->in); + calc_chain_sig(ent->out); + } +} + +static void poll_timeout(struct mlx5_cmd_work_ent *ent) +{ + struct mlx5_core_dev *dev = container_of(ent->cmd, struct mlx5_core_dev, cmd); + u64 cmd_to_ms = mlx5_tout_ms(dev, CMD); + unsigned long poll_end; + u8 own; + + poll_end = jiffies + msecs_to_jiffies(cmd_to_ms + 1000); + + do { + own = READ_ONCE(ent->lay->status_own); + if (!(own & CMD_OWNER_HW)) { + ent->ret = 0; + return; + } + cond_resched(); + } while (time_before(jiffies, poll_end)); + + ent->ret = -ETIMEDOUT; +} + +static int verify_signature(struct mlx5_cmd_work_ent *ent) +{ + struct mlx5_cmd_mailbox *next = ent->out->next; + int n = mlx5_calc_cmd_blocks(ent->out); + int err; + u8 sig; + int i = 0; + + sig = xor8_buf(ent->lay, 0, sizeof(*ent->lay)); + if (sig != 0xff) + return -EHWPOISON; + + for (i = 0; i < n && next; i++) { + err = verify_block_sig(next->buf); + if (err) + return -EHWPOISON; + + next = next->next; + } + + return 0; +} + +static void dump_buf(void *buf, int size, int data_only, int offset, int idx) +{ + __be32 *p = buf; + int i; + + for (i = 0; i < size; i += 16) { + pr_debug("cmd[%d]: %03x: %08x %08x %08x %08x\n", idx, offset, + be32_to_cpu(p[0]), be32_to_cpu(p[1]), + be32_to_cpu(p[2]), be32_to_cpu(p[3])); + p += 4; + offset += 16; + } + if (!data_only) + pr_debug("\n"); +} + +static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op, + u32 *synd, u8 *status) +{ + *synd = 0; + *status = 0; + + switch (op) { + case MLX5_CMD_OP_TEARDOWN_HCA: + case MLX5_CMD_OP_DISABLE_HCA: + case MLX5_CMD_OP_MANAGE_PAGES: + case MLX5_CMD_OP_DESTROY_MKEY: + case MLX5_CMD_OP_DESTROY_EQ: + case MLX5_CMD_OP_DESTROY_CQ: + case MLX5_CMD_OP_DESTROY_QP: + case MLX5_CMD_OP_DESTROY_PSV: + case MLX5_CMD_OP_DESTROY_SRQ: + case MLX5_CMD_OP_DESTROY_XRC_SRQ: + case MLX5_CMD_OP_DESTROY_XRQ: + case MLX5_CMD_OP_DESTROY_DCT: + case MLX5_CMD_OP_DEALLOC_Q_COUNTER: + case MLX5_CMD_OP_DESTROY_SCHEDULING_ELEMENT: + case MLX5_CMD_OP_DESTROY_QOS_PARA_VPORT: + case MLX5_CMD_OP_DEALLOC_PD: + case MLX5_CMD_OP_DEALLOC_UAR: + case MLX5_CMD_OP_DETACH_FROM_MCG: + case MLX5_CMD_OP_DEALLOC_XRCD: + case MLX5_CMD_OP_DEALLOC_TRANSPORT_DOMAIN: + case MLX5_CMD_OP_DELETE_VXLAN_UDP_DPORT: + case MLX5_CMD_OP_DELETE_L2_TABLE_ENTRY: + case MLX5_CMD_OP_DESTROY_LAG: + case MLX5_CMD_OP_DESTROY_VPORT_LAG: + case MLX5_CMD_OP_DESTROY_TIR: + case MLX5_CMD_OP_DESTROY_SQ: + case MLX5_CMD_OP_DESTROY_RQ: + case MLX5_CMD_OP_DESTROY_RMP: + case MLX5_CMD_OP_DESTROY_TIS: + case MLX5_CMD_OP_DESTROY_RQT: + case MLX5_CMD_OP_DESTROY_FLOW_TABLE: + case MLX5_CMD_OP_DESTROY_FLOW_GROUP: + case MLX5_CMD_OP_DELETE_FLOW_TABLE_ENTRY: + case MLX5_CMD_OP_DEALLOC_FLOW_COUNTER: + case MLX5_CMD_OP_2ERR_QP: + case MLX5_CMD_OP_2RST_QP: + case MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT: + case MLX5_CMD_OP_MODIFY_FLOW_TABLE: + case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY: + case MLX5_CMD_OP_SET_FLOW_TABLE_ROOT: + case MLX5_CMD_OP_DEALLOC_PACKET_REFORMAT_CONTEXT: + case MLX5_CMD_OP_DEALLOC_MODIFY_HEADER_CONTEXT: + case MLX5_CMD_OP_FPGA_DESTROY_QP: + case MLX5_CMD_OP_DESTROY_GENERAL_OBJECT: + case MLX5_CMD_OP_DEALLOC_MEMIC: + case MLX5_CMD_OP_PAGE_FAULT_RESUME: + case MLX5_CMD_OP_QUERY_ESW_FUNCTIONS: + case MLX5_CMD_OP_DEALLOC_SF: + case MLX5_CMD_OP_DESTROY_UCTX: + case MLX5_CMD_OP_DESTROY_UMEM: + case MLX5_CMD_OP_MODIFY_RQT: + return MLX5_CMD_STAT_OK; + + case MLX5_CMD_OP_QUERY_HCA_CAP: + case MLX5_CMD_OP_QUERY_ADAPTER: + case MLX5_CMD_OP_INIT_HCA: + case MLX5_CMD_OP_ENABLE_HCA: + case MLX5_CMD_OP_QUERY_PAGES: + case MLX5_CMD_OP_SET_HCA_CAP: + case MLX5_CMD_OP_QUERY_ISSI: + case MLX5_CMD_OP_SET_ISSI: + case MLX5_CMD_OP_CREATE_MKEY: + case MLX5_CMD_OP_QUERY_MKEY: + case MLX5_CMD_OP_QUERY_SPECIAL_CONTEXTS: + case MLX5_CMD_OP_CREATE_EQ: + case MLX5_CMD_OP_QUERY_EQ: + case MLX5_CMD_OP_GEN_EQE: + case MLX5_CMD_OP_CREATE_CQ: + case MLX5_CMD_OP_QUERY_CQ: + case MLX5_CMD_OP_MODIFY_CQ: + case MLX5_CMD_OP_CREATE_QP: + case MLX5_CMD_OP_RST2INIT_QP: + case MLX5_CMD_OP_INIT2RTR_QP: + case MLX5_CMD_OP_RTR2RTS_QP: + case MLX5_CMD_OP_RTS2RTS_QP: + case MLX5_CMD_OP_SQERR2RTS_QP: + case MLX5_CMD_OP_QUERY_QP: + case MLX5_CMD_OP_SQD_RTS_QP: + case MLX5_CMD_OP_INIT2INIT_QP: + case MLX5_CMD_OP_CREATE_PSV: + case MLX5_CMD_OP_CREATE_SRQ: + case MLX5_CMD_OP_QUERY_SRQ: + case MLX5_CMD_OP_ARM_RQ: + case MLX5_CMD_OP_CREATE_XRC_SRQ: + case MLX5_CMD_OP_QUERY_XRC_SRQ: + case MLX5_CMD_OP_ARM_XRC_SRQ: + case MLX5_CMD_OP_CREATE_XRQ: + case MLX5_CMD_OP_QUERY_XRQ: + case MLX5_CMD_OP_ARM_XRQ: + case MLX5_CMD_OP_CREATE_DCT: + case MLX5_CMD_OP_DRAIN_DCT: + case MLX5_CMD_OP_QUERY_DCT: + case MLX5_CMD_OP_ARM_DCT_FOR_KEY_VIOLATION: + case MLX5_CMD_OP_QUERY_VPORT_STATE: + case MLX5_CMD_OP_MODIFY_VPORT_STATE: + case MLX5_CMD_OP_QUERY_ESW_VPORT_CONTEXT: + case MLX5_CMD_OP_MODIFY_ESW_VPORT_CONTEXT: + case MLX5_CMD_OP_QUERY_NIC_VPORT_CONTEXT: + case MLX5_CMD_OP_QUERY_ROCE_ADDRESS: + case MLX5_CMD_OP_SET_ROCE_ADDRESS: + case MLX5_CMD_OP_QUERY_HCA_VPORT_CONTEXT: + case MLX5_CMD_OP_MODIFY_HCA_VPORT_CONTEXT: + case MLX5_CMD_OP_QUERY_HCA_VPORT_GID: + case MLX5_CMD_OP_QUERY_HCA_VPORT_PKEY: + case MLX5_CMD_OP_QUERY_VNIC_ENV: + case MLX5_CMD_OP_QUERY_VPORT_COUNTER: + case MLX5_CMD_OP_ALLOC_Q_COUNTER: + case MLX5_CMD_OP_QUERY_Q_COUNTER: + case MLX5_CMD_OP_SET_MONITOR_COUNTER: + case MLX5_CMD_OP_ARM_MONITOR_COUNTER: + case MLX5_CMD_OP_SET_PP_RATE_LIMIT: + case MLX5_CMD_OP_QUERY_RATE_LIMIT: + case MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT: + case MLX5_CMD_OP_QUERY_SCHEDULING_ELEMENT: + case MLX5_CMD_OP_MODIFY_SCHEDULING_ELEMENT: + case MLX5_CMD_OP_CREATE_QOS_PARA_VPORT: + case MLX5_CMD_OP_ALLOC_PD: + case MLX5_CMD_OP_ALLOC_UAR: + case MLX5_CMD_OP_CONFIG_INT_MODERATION: + case MLX5_CMD_OP_ACCESS_REG: + case MLX5_CMD_OP_ATTACH_TO_MCG: + case MLX5_CMD_OP_GET_DROPPED_PACKET_LOG: + case MLX5_CMD_OP_MAD_IFC: + case MLX5_CMD_OP_QUERY_MAD_DEMUX: + case MLX5_CMD_OP_SET_MAD_DEMUX: + case MLX5_CMD_OP_NOP: + case MLX5_CMD_OP_ALLOC_XRCD: + case MLX5_CMD_OP_ALLOC_TRANSPORT_DOMAIN: + case MLX5_CMD_OP_QUERY_CONG_STATUS: + case MLX5_CMD_OP_MODIFY_CONG_STATUS: + case MLX5_CMD_OP_QUERY_CONG_PARAMS: + case MLX5_CMD_OP_MODIFY_CONG_PARAMS: + case MLX5_CMD_OP_QUERY_CONG_STATISTICS: + case MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT: + case MLX5_CMD_OP_SET_L2_TABLE_ENTRY: + case MLX5_CMD_OP_QUERY_L2_TABLE_ENTRY: + case MLX5_CMD_OP_CREATE_LAG: + case MLX5_CMD_OP_MODIFY_LAG: + case MLX5_CMD_OP_QUERY_LAG: + case MLX5_CMD_OP_CREATE_VPORT_LAG: + case MLX5_CMD_OP_CREATE_TIR: + case MLX5_CMD_OP_MODIFY_TIR: + case MLX5_CMD_OP_QUERY_TIR: + case MLX5_CMD_OP_CREATE_SQ: + case MLX5_CMD_OP_MODIFY_SQ: + case MLX5_CMD_OP_QUERY_SQ: + case MLX5_CMD_OP_CREATE_RQ: + case MLX5_CMD_OP_MODIFY_RQ: + case MLX5_CMD_OP_QUERY_RQ: + case MLX5_CMD_OP_CREATE_RMP: + case MLX5_CMD_OP_MODIFY_RMP: + case MLX5_CMD_OP_QUERY_RMP: + case MLX5_CMD_OP_CREATE_TIS: + case MLX5_CMD_OP_MODIFY_TIS: + case MLX5_CMD_OP_QUERY_TIS: + case MLX5_CMD_OP_CREATE_RQT: + case MLX5_CMD_OP_QUERY_RQT: + + case MLX5_CMD_OP_CREATE_FLOW_TABLE: + case MLX5_CMD_OP_QUERY_FLOW_TABLE: + case MLX5_CMD_OP_CREATE_FLOW_GROUP: + case MLX5_CMD_OP_QUERY_FLOW_GROUP: + case MLX5_CMD_OP_QUERY_FLOW_TABLE_ENTRY: + case MLX5_CMD_OP_ALLOC_FLOW_COUNTER: + case MLX5_CMD_OP_QUERY_FLOW_COUNTER: + case MLX5_CMD_OP_ALLOC_PACKET_REFORMAT_CONTEXT: + case MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT: + case MLX5_CMD_OP_FPGA_CREATE_QP: + case MLX5_CMD_OP_FPGA_MODIFY_QP: + case MLX5_CMD_OP_FPGA_QUERY_QP: + case MLX5_CMD_OP_FPGA_QUERY_QP_COUNTERS: + case MLX5_CMD_OP_CREATE_GENERAL_OBJECT: + case MLX5_CMD_OP_MODIFY_GENERAL_OBJECT: + case MLX5_CMD_OP_QUERY_GENERAL_OBJECT: + case MLX5_CMD_OP_CREATE_UCTX: + case MLX5_CMD_OP_CREATE_UMEM: + case MLX5_CMD_OP_ALLOC_MEMIC: + case MLX5_CMD_OP_MODIFY_XRQ: + case MLX5_CMD_OP_RELEASE_XRQ_ERROR: + case MLX5_CMD_OP_QUERY_VHCA_STATE: + case MLX5_CMD_OP_MODIFY_VHCA_STATE: + case MLX5_CMD_OP_ALLOC_SF: + case MLX5_CMD_OP_SUSPEND_VHCA: + case MLX5_CMD_OP_RESUME_VHCA: + case MLX5_CMD_OP_QUERY_VHCA_MIGRATION_STATE: + case MLX5_CMD_OP_SAVE_VHCA_STATE: + case MLX5_CMD_OP_LOAD_VHCA_STATE: + *status = MLX5_DRIVER_STATUS_ABORTED; + *synd = MLX5_DRIVER_SYND; + return -ENOLINK; + default: + mlx5_core_err(dev, "Unknown FW command (%d)\n", op); + return -EINVAL; + } +} + +const char *mlx5_command_str(int command) +{ +#define MLX5_COMMAND_STR_CASE(__cmd) case MLX5_CMD_OP_ ## __cmd: return #__cmd + + switch (command) { + MLX5_COMMAND_STR_CASE(QUERY_HCA_CAP); + MLX5_COMMAND_STR_CASE(QUERY_ADAPTER); + MLX5_COMMAND_STR_CASE(INIT_HCA); + MLX5_COMMAND_STR_CASE(TEARDOWN_HCA); + MLX5_COMMAND_STR_CASE(ENABLE_HCA); + MLX5_COMMAND_STR_CASE(DISABLE_HCA); + MLX5_COMMAND_STR_CASE(QUERY_PAGES); + MLX5_COMMAND_STR_CASE(MANAGE_PAGES); + MLX5_COMMAND_STR_CASE(SET_HCA_CAP); + MLX5_COMMAND_STR_CASE(QUERY_ISSI); + MLX5_COMMAND_STR_CASE(SET_ISSI); + MLX5_COMMAND_STR_CASE(SET_DRIVER_VERSION); + MLX5_COMMAND_STR_CASE(CREATE_MKEY); + MLX5_COMMAND_STR_CASE(QUERY_MKEY); + MLX5_COMMAND_STR_CASE(DESTROY_MKEY); + MLX5_COMMAND_STR_CASE(QUERY_SPECIAL_CONTEXTS); + MLX5_COMMAND_STR_CASE(PAGE_FAULT_RESUME); + MLX5_COMMAND_STR_CASE(CREATE_EQ); + MLX5_COMMAND_STR_CASE(DESTROY_EQ); + MLX5_COMMAND_STR_CASE(QUERY_EQ); + MLX5_COMMAND_STR_CASE(GEN_EQE); + MLX5_COMMAND_STR_CASE(CREATE_CQ); + MLX5_COMMAND_STR_CASE(DESTROY_CQ); + MLX5_COMMAND_STR_CASE(QUERY_CQ); + MLX5_COMMAND_STR_CASE(MODIFY_CQ); + MLX5_COMMAND_STR_CASE(CREATE_QP); + MLX5_COMMAND_STR_CASE(DESTROY_QP); + MLX5_COMMAND_STR_CASE(RST2INIT_QP); + MLX5_COMMAND_STR_CASE(INIT2RTR_QP); + MLX5_COMMAND_STR_CASE(RTR2RTS_QP); + MLX5_COMMAND_STR_CASE(RTS2RTS_QP); + MLX5_COMMAND_STR_CASE(SQERR2RTS_QP); + MLX5_COMMAND_STR_CASE(2ERR_QP); + MLX5_COMMAND_STR_CASE(2RST_QP); + MLX5_COMMAND_STR_CASE(QUERY_QP); + MLX5_COMMAND_STR_CASE(SQD_RTS_QP); + MLX5_COMMAND_STR_CASE(INIT2INIT_QP); + MLX5_COMMAND_STR_CASE(CREATE_PSV); + MLX5_COMMAND_STR_CASE(DESTROY_PSV); + MLX5_COMMAND_STR_CASE(CREATE_SRQ); + MLX5_COMMAND_STR_CASE(DESTROY_SRQ); + MLX5_COMMAND_STR_CASE(QUERY_SRQ); + MLX5_COMMAND_STR_CASE(ARM_RQ); + MLX5_COMMAND_STR_CASE(CREATE_XRC_SRQ); + MLX5_COMMAND_STR_CASE(DESTROY_XRC_SRQ); + MLX5_COMMAND_STR_CASE(QUERY_XRC_SRQ); + MLX5_COMMAND_STR_CASE(ARM_XRC_SRQ); + MLX5_COMMAND_STR_CASE(CREATE_DCT); + MLX5_COMMAND_STR_CASE(DESTROY_DCT); + MLX5_COMMAND_STR_CASE(DRAIN_DCT); + MLX5_COMMAND_STR_CASE(QUERY_DCT); + MLX5_COMMAND_STR_CASE(ARM_DCT_FOR_KEY_VIOLATION); + MLX5_COMMAND_STR_CASE(QUERY_VPORT_STATE); + MLX5_COMMAND_STR_CASE(MODIFY_VPORT_STATE); + MLX5_COMMAND_STR_CASE(QUERY_ESW_VPORT_CONTEXT); + MLX5_COMMAND_STR_CASE(MODIFY_ESW_VPORT_CONTEXT); + MLX5_COMMAND_STR_CASE(QUERY_NIC_VPORT_CONTEXT); + MLX5_COMMAND_STR_CASE(MODIFY_NIC_VPORT_CONTEXT); + MLX5_COMMAND_STR_CASE(QUERY_ROCE_ADDRESS); + MLX5_COMMAND_STR_CASE(SET_ROCE_ADDRESS); + MLX5_COMMAND_STR_CASE(QUERY_HCA_VPORT_CONTEXT); + MLX5_COMMAND_STR_CASE(MODIFY_HCA_VPORT_CONTEXT); + MLX5_COMMAND_STR_CASE(QUERY_HCA_VPORT_GID); + MLX5_COMMAND_STR_CASE(QUERY_HCA_VPORT_PKEY); + MLX5_COMMAND_STR_CASE(QUERY_VNIC_ENV); + MLX5_COMMAND_STR_CASE(QUERY_VPORT_COUNTER); + MLX5_COMMAND_STR_CASE(ALLOC_Q_COUNTER); + MLX5_COMMAND_STR_CASE(DEALLOC_Q_COUNTER); + MLX5_COMMAND_STR_CASE(QUERY_Q_COUNTER); + MLX5_COMMAND_STR_CASE(SET_MONITOR_COUNTER); + MLX5_COMMAND_STR_CASE(ARM_MONITOR_COUNTER); + MLX5_COMMAND_STR_CASE(SET_PP_RATE_LIMIT); + MLX5_COMMAND_STR_CASE(QUERY_RATE_LIMIT); + MLX5_COMMAND_STR_CASE(CREATE_SCHEDULING_ELEMENT); + MLX5_COMMAND_STR_CASE(DESTROY_SCHEDULING_ELEMENT); + MLX5_COMMAND_STR_CASE(QUERY_SCHEDULING_ELEMENT); + MLX5_COMMAND_STR_CASE(MODIFY_SCHEDULING_ELEMENT); + MLX5_COMMAND_STR_CASE(CREATE_QOS_PARA_VPORT); + MLX5_COMMAND_STR_CASE(DESTROY_QOS_PARA_VPORT); + MLX5_COMMAND_STR_CASE(ALLOC_PD); + MLX5_COMMAND_STR_CASE(DEALLOC_PD); + MLX5_COMMAND_STR_CASE(ALLOC_UAR); + MLX5_COMMAND_STR_CASE(DEALLOC_UAR); + MLX5_COMMAND_STR_CASE(CONFIG_INT_MODERATION); + MLX5_COMMAND_STR_CASE(ACCESS_REG); + MLX5_COMMAND_STR_CASE(ATTACH_TO_MCG); + MLX5_COMMAND_STR_CASE(DETACH_FROM_MCG); + MLX5_COMMAND_STR_CASE(GET_DROPPED_PACKET_LOG); + MLX5_COMMAND_STR_CASE(MAD_IFC); + MLX5_COMMAND_STR_CASE(QUERY_MAD_DEMUX); + MLX5_COMMAND_STR_CASE(SET_MAD_DEMUX); + MLX5_COMMAND_STR_CASE(NOP); + MLX5_COMMAND_STR_CASE(ALLOC_XRCD); + MLX5_COMMAND_STR_CASE(DEALLOC_XRCD); + MLX5_COMMAND_STR_CASE(ALLOC_TRANSPORT_DOMAIN); + MLX5_COMMAND_STR_CASE(DEALLOC_TRANSPORT_DOMAIN); + MLX5_COMMAND_STR_CASE(QUERY_CONG_STATUS); + MLX5_COMMAND_STR_CASE(MODIFY_CONG_STATUS); + MLX5_COMMAND_STR_CASE(QUERY_CONG_PARAMS); + MLX5_COMMAND_STR_CASE(MODIFY_CONG_PARAMS); + MLX5_COMMAND_STR_CASE(QUERY_CONG_STATISTICS); + MLX5_COMMAND_STR_CASE(ADD_VXLAN_UDP_DPORT); + MLX5_COMMAND_STR_CASE(DELETE_VXLAN_UDP_DPORT); + MLX5_COMMAND_STR_CASE(SET_L2_TABLE_ENTRY); + MLX5_COMMAND_STR_CASE(QUERY_L2_TABLE_ENTRY); + MLX5_COMMAND_STR_CASE(DELETE_L2_TABLE_ENTRY); + MLX5_COMMAND_STR_CASE(SET_WOL_ROL); + MLX5_COMMAND_STR_CASE(QUERY_WOL_ROL); + MLX5_COMMAND_STR_CASE(CREATE_LAG); + MLX5_COMMAND_STR_CASE(MODIFY_LAG); + MLX5_COMMAND_STR_CASE(QUERY_LAG); + MLX5_COMMAND_STR_CASE(DESTROY_LAG); + MLX5_COMMAND_STR_CASE(CREATE_VPORT_LAG); + MLX5_COMMAND_STR_CASE(DESTROY_VPORT_LAG); + MLX5_COMMAND_STR_CASE(CREATE_TIR); + MLX5_COMMAND_STR_CASE(MODIFY_TIR); + MLX5_COMMAND_STR_CASE(DESTROY_TIR); + MLX5_COMMAND_STR_CASE(QUERY_TIR); + MLX5_COMMAND_STR_CASE(CREATE_SQ); + MLX5_COMMAND_STR_CASE(MODIFY_SQ); + MLX5_COMMAND_STR_CASE(DESTROY_SQ); + MLX5_COMMAND_STR_CASE(QUERY_SQ); + MLX5_COMMAND_STR_CASE(CREATE_RQ); + MLX5_COMMAND_STR_CASE(MODIFY_RQ); + MLX5_COMMAND_STR_CASE(DESTROY_RQ); + MLX5_COMMAND_STR_CASE(QUERY_RQ); + MLX5_COMMAND_STR_CASE(CREATE_RMP); + MLX5_COMMAND_STR_CASE(MODIFY_RMP); + MLX5_COMMAND_STR_CASE(DESTROY_RMP); + MLX5_COMMAND_STR_CASE(QUERY_RMP); + MLX5_COMMAND_STR_CASE(CREATE_TIS); + MLX5_COMMAND_STR_CASE(MODIFY_TIS); + MLX5_COMMAND_STR_CASE(DESTROY_TIS); + MLX5_COMMAND_STR_CASE(QUERY_TIS); + MLX5_COMMAND_STR_CASE(CREATE_RQT); + MLX5_COMMAND_STR_CASE(MODIFY_RQT); + MLX5_COMMAND_STR_CASE(DESTROY_RQT); + MLX5_COMMAND_STR_CASE(QUERY_RQT); + MLX5_COMMAND_STR_CASE(SET_FLOW_TABLE_ROOT); + MLX5_COMMAND_STR_CASE(CREATE_FLOW_TABLE); + MLX5_COMMAND_STR_CASE(DESTROY_FLOW_TABLE); + MLX5_COMMAND_STR_CASE(QUERY_FLOW_TABLE); + MLX5_COMMAND_STR_CASE(CREATE_FLOW_GROUP); + MLX5_COMMAND_STR_CASE(DESTROY_FLOW_GROUP); + MLX5_COMMAND_STR_CASE(QUERY_FLOW_GROUP); + MLX5_COMMAND_STR_CASE(SET_FLOW_TABLE_ENTRY); + MLX5_COMMAND_STR_CASE(QUERY_FLOW_TABLE_ENTRY); + MLX5_COMMAND_STR_CASE(DELETE_FLOW_TABLE_ENTRY); + MLX5_COMMAND_STR_CASE(ALLOC_FLOW_COUNTER); + MLX5_COMMAND_STR_CASE(DEALLOC_FLOW_COUNTER); + MLX5_COMMAND_STR_CASE(QUERY_FLOW_COUNTER); + MLX5_COMMAND_STR_CASE(MODIFY_FLOW_TABLE); + MLX5_COMMAND_STR_CASE(ALLOC_PACKET_REFORMAT_CONTEXT); + MLX5_COMMAND_STR_CASE(DEALLOC_PACKET_REFORMAT_CONTEXT); + MLX5_COMMAND_STR_CASE(ALLOC_MODIFY_HEADER_CONTEXT); + MLX5_COMMAND_STR_CASE(DEALLOC_MODIFY_HEADER_CONTEXT); + MLX5_COMMAND_STR_CASE(FPGA_CREATE_QP); + MLX5_COMMAND_STR_CASE(FPGA_MODIFY_QP); + MLX5_COMMAND_STR_CASE(FPGA_QUERY_QP); + MLX5_COMMAND_STR_CASE(FPGA_QUERY_QP_COUNTERS); + MLX5_COMMAND_STR_CASE(FPGA_DESTROY_QP); + MLX5_COMMAND_STR_CASE(CREATE_XRQ); + MLX5_COMMAND_STR_CASE(DESTROY_XRQ); + MLX5_COMMAND_STR_CASE(QUERY_XRQ); + MLX5_COMMAND_STR_CASE(ARM_XRQ); + MLX5_COMMAND_STR_CASE(CREATE_GENERAL_OBJECT); + MLX5_COMMAND_STR_CASE(DESTROY_GENERAL_OBJECT); + MLX5_COMMAND_STR_CASE(MODIFY_GENERAL_OBJECT); + MLX5_COMMAND_STR_CASE(QUERY_GENERAL_OBJECT); + MLX5_COMMAND_STR_CASE(QUERY_MODIFY_HEADER_CONTEXT); + MLX5_COMMAND_STR_CASE(ALLOC_MEMIC); + MLX5_COMMAND_STR_CASE(DEALLOC_MEMIC); + MLX5_COMMAND_STR_CASE(QUERY_ESW_FUNCTIONS); + MLX5_COMMAND_STR_CASE(CREATE_UCTX); + MLX5_COMMAND_STR_CASE(DESTROY_UCTX); + MLX5_COMMAND_STR_CASE(CREATE_UMEM); + MLX5_COMMAND_STR_CASE(DESTROY_UMEM); + MLX5_COMMAND_STR_CASE(RELEASE_XRQ_ERROR); + MLX5_COMMAND_STR_CASE(MODIFY_XRQ); + MLX5_COMMAND_STR_CASE(QUERY_VHCA_STATE); + MLX5_COMMAND_STR_CASE(MODIFY_VHCA_STATE); + MLX5_COMMAND_STR_CASE(ALLOC_SF); + MLX5_COMMAND_STR_CASE(DEALLOC_SF); + MLX5_COMMAND_STR_CASE(SUSPEND_VHCA); + MLX5_COMMAND_STR_CASE(RESUME_VHCA); + MLX5_COMMAND_STR_CASE(QUERY_VHCA_MIGRATION_STATE); + MLX5_COMMAND_STR_CASE(SAVE_VHCA_STATE); + MLX5_COMMAND_STR_CASE(LOAD_VHCA_STATE); + default: return "unknown command opcode"; + } +} + +static const char *cmd_status_str(u8 status) +{ + switch (status) { + case MLX5_CMD_STAT_OK: + return "OK"; + case MLX5_CMD_STAT_INT_ERR: + return "internal error"; + case MLX5_CMD_STAT_BAD_OP_ERR: + return "bad operation"; + case MLX5_CMD_STAT_BAD_PARAM_ERR: + return "bad parameter"; + case MLX5_CMD_STAT_BAD_SYS_STATE_ERR: + return "bad system state"; + case MLX5_CMD_STAT_BAD_RES_ERR: + return "bad resource"; + case MLX5_CMD_STAT_RES_BUSY: + return "resource busy"; + case MLX5_CMD_STAT_LIM_ERR: + return "limits exceeded"; + case MLX5_CMD_STAT_BAD_RES_STATE_ERR: + return "bad resource state"; + case MLX5_CMD_STAT_IX_ERR: + return "bad index"; + case MLX5_CMD_STAT_NO_RES_ERR: + return "no resources"; + case MLX5_CMD_STAT_BAD_INP_LEN_ERR: + return "bad input length"; + case MLX5_CMD_STAT_BAD_OUTP_LEN_ERR: + return "bad output length"; + case MLX5_CMD_STAT_BAD_QP_STATE_ERR: + return "bad QP state"; + case MLX5_CMD_STAT_BAD_PKT_ERR: + return "bad packet (discarded)"; + case MLX5_CMD_STAT_BAD_SIZE_OUTS_CQES_ERR: + return "bad size too many outstanding CQEs"; + default: + return "unknown status"; + } +} + +static int cmd_status_to_err(u8 status) +{ + switch (status) { + case MLX5_CMD_STAT_OK: return 0; + case MLX5_CMD_STAT_INT_ERR: return -EIO; + case MLX5_CMD_STAT_BAD_OP_ERR: return -EINVAL; + case MLX5_CMD_STAT_BAD_PARAM_ERR: return -EINVAL; + case MLX5_CMD_STAT_BAD_SYS_STATE_ERR: return -EIO; + case MLX5_CMD_STAT_BAD_RES_ERR: return -EINVAL; + case MLX5_CMD_STAT_RES_BUSY: return -EBUSY; + case MLX5_CMD_STAT_LIM_ERR: return -ENOMEM; + case MLX5_CMD_STAT_BAD_RES_STATE_ERR: return -EINVAL; + case MLX5_CMD_STAT_IX_ERR: return -EINVAL; + case MLX5_CMD_STAT_NO_RES_ERR: return -EAGAIN; + case MLX5_CMD_STAT_BAD_INP_LEN_ERR: return -EIO; + case MLX5_CMD_STAT_BAD_OUTP_LEN_ERR: return -EIO; + case MLX5_CMD_STAT_BAD_QP_STATE_ERR: return -EINVAL; + case MLX5_CMD_STAT_BAD_PKT_ERR: return -EINVAL; + case MLX5_CMD_STAT_BAD_SIZE_OUTS_CQES_ERR: return -EINVAL; + default: return -EIO; + } +} + +void mlx5_cmd_out_err(struct mlx5_core_dev *dev, u16 opcode, u16 op_mod, void *out) +{ + u32 syndrome = MLX5_GET(mbox_out, out, syndrome); + u8 status = MLX5_GET(mbox_out, out, status); + + mlx5_core_err_rl(dev, + "%s(0x%x) op_mod(0x%x) failed, status %s(0x%x), syndrome (0x%x), err(%d)\n", + mlx5_command_str(opcode), opcode, op_mod, + cmd_status_str(status), status, syndrome, cmd_status_to_err(status)); +} +EXPORT_SYMBOL(mlx5_cmd_out_err); + +static void cmd_status_print(struct mlx5_core_dev *dev, void *in, void *out) +{ + u16 opcode, op_mod; + u16 uid; + + opcode = in_to_opcode(in); + op_mod = MLX5_GET(mbox_in, in, op_mod); + uid = MLX5_GET(mbox_in, in, uid); + + if (!uid && opcode != MLX5_CMD_OP_DESTROY_MKEY) + mlx5_cmd_out_err(dev, opcode, op_mod, out); +} + +int mlx5_cmd_check(struct mlx5_core_dev *dev, int err, void *in, void *out) +{ + /* aborted due to PCI error or via reset flow mlx5_cmd_trigger_completions() */ + if (err == -ENXIO) { + u16 opcode = in_to_opcode(in); + u32 syndrome; + u8 status; + + /* PCI Error, emulate command return status, for smooth reset */ + err = mlx5_internal_err_ret_value(dev, opcode, &syndrome, &status); + MLX5_SET(mbox_out, out, status, status); + MLX5_SET(mbox_out, out, syndrome, syndrome); + if (!err) + return 0; + } + + /* driver or FW delivery error */ + if (err != -EREMOTEIO && err) + return err; + + /* check outbox status */ + err = cmd_status_to_err(MLX5_GET(mbox_out, out, status)); + if (err) + cmd_status_print(dev, in, out); + + return err; +} +EXPORT_SYMBOL(mlx5_cmd_check); + +static void dump_command(struct mlx5_core_dev *dev, + struct mlx5_cmd_work_ent *ent, int input) +{ + struct mlx5_cmd_msg *msg = input ? ent->in : ent->out; + struct mlx5_cmd_mailbox *next = msg->next; + int n = mlx5_calc_cmd_blocks(msg); + u16 op = ent->op; + int data_only; + u32 offset = 0; + int dump_len; + int i; + + mlx5_core_dbg(dev, "cmd[%d]: start dump\n", ent->idx); + data_only = !!(mlx5_core_debug_mask & (1 << MLX5_CMD_DATA)); + + if (data_only) + mlx5_core_dbg_mask(dev, 1 << MLX5_CMD_DATA, + "cmd[%d]: dump command data %s(0x%x) %s\n", + ent->idx, mlx5_command_str(op), op, + input ? "INPUT" : "OUTPUT"); + else + mlx5_core_dbg(dev, "cmd[%d]: dump command %s(0x%x) %s\n", + ent->idx, mlx5_command_str(op), op, + input ? "INPUT" : "OUTPUT"); + + if (data_only) { + if (input) { + dump_buf(ent->lay->in, sizeof(ent->lay->in), 1, offset, ent->idx); + offset += sizeof(ent->lay->in); + } else { + dump_buf(ent->lay->out, sizeof(ent->lay->out), 1, offset, ent->idx); + offset += sizeof(ent->lay->out); + } + } else { + dump_buf(ent->lay, sizeof(*ent->lay), 0, offset, ent->idx); + offset += sizeof(*ent->lay); + } + + for (i = 0; i < n && next; i++) { + if (data_only) { + dump_len = min_t(int, MLX5_CMD_DATA_BLOCK_SIZE, msg->len - offset); + dump_buf(next->buf, dump_len, 1, offset, ent->idx); + offset += MLX5_CMD_DATA_BLOCK_SIZE; + } else { + mlx5_core_dbg(dev, "cmd[%d]: command block:\n", ent->idx); + dump_buf(next->buf, sizeof(struct mlx5_cmd_prot_block), 0, offset, + ent->idx); + offset += sizeof(struct mlx5_cmd_prot_block); + } + next = next->next; + } + + if (data_only) + pr_debug("\n"); + + mlx5_core_dbg(dev, "cmd[%d]: end dump\n", ent->idx); +} + +static void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool forced); + +static void cb_timeout_handler(struct work_struct *work) +{ + struct delayed_work *dwork = container_of(work, struct delayed_work, + work); + struct mlx5_cmd_work_ent *ent = container_of(dwork, + struct mlx5_cmd_work_ent, + cb_timeout_work); + struct mlx5_core_dev *dev = container_of(ent->cmd, struct mlx5_core_dev, + cmd); + + mlx5_cmd_eq_recover(dev); + + /* Maybe got handled by eq recover ? */ + if (!test_bit(MLX5_CMD_ENT_STATE_PENDING_COMP, &ent->state)) { + mlx5_core_warn(dev, "cmd[%d]: %s(0x%x) Async, recovered after timeout\n", ent->idx, + mlx5_command_str(ent->op), ent->op); + goto out; /* phew, already handled */ + } + + ent->ret = -ETIMEDOUT; + mlx5_core_warn(dev, "cmd[%d]: %s(0x%x) Async, timeout. Will cause a leak of a command resource\n", + ent->idx, mlx5_command_str(ent->op), ent->op); + mlx5_cmd_comp_handler(dev, 1ULL << ent->idx, true); + +out: + cmd_ent_put(ent); /* for the cmd_ent_get() took on schedule delayed work */ +} + +static void free_msg(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *msg); +static void mlx5_free_cmd_msg(struct mlx5_core_dev *dev, + struct mlx5_cmd_msg *msg); + +static bool opcode_allowed(struct mlx5_cmd *cmd, u16 opcode) +{ + if (cmd->allowed_opcode == CMD_ALLOWED_OPCODE_ALL) + return true; + + return cmd->allowed_opcode == opcode; +} + +bool mlx5_cmd_is_down(struct mlx5_core_dev *dev) +{ + return pci_channel_offline(dev->pdev) || + dev->cmd.state != MLX5_CMDIF_STATE_UP || + dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR; +} + +static void cmd_work_handler(struct work_struct *work) +{ + struct mlx5_cmd_work_ent *ent = container_of(work, struct mlx5_cmd_work_ent, work); + struct mlx5_cmd *cmd = ent->cmd; + bool poll_cmd = ent->polling; + struct mlx5_cmd_layout *lay; + struct mlx5_core_dev *dev; + unsigned long cb_timeout; + struct semaphore *sem; + unsigned long flags; + int alloc_ret; + int cmd_mode; + + dev = container_of(cmd, struct mlx5_core_dev, cmd); + cb_timeout = msecs_to_jiffies(mlx5_tout_ms(dev, CMD)); + + complete(&ent->handling); + sem = ent->page_queue ? &cmd->vars.pages_sem : &cmd->vars.sem; + down(sem); + if (!ent->page_queue) { + alloc_ret = cmd_alloc_index(cmd, ent); + if (alloc_ret < 0) { + mlx5_core_err_rl(dev, "failed to allocate command entry\n"); + if (ent->callback) { + ent->callback(-EAGAIN, ent->context); + mlx5_free_cmd_msg(dev, ent->out); + free_msg(dev, ent->in); + cmd_ent_put(ent); + } else { + ent->ret = -EAGAIN; + complete(&ent->done); + } + up(sem); + return; + } + } else { + ent->idx = cmd->vars.max_reg_cmds; + spin_lock_irqsave(&cmd->alloc_lock, flags); + clear_bit(ent->idx, &cmd->vars.bitmask); + cmd->ent_arr[ent->idx] = ent; + spin_unlock_irqrestore(&cmd->alloc_lock, flags); + } + + lay = get_inst(cmd, ent->idx); + ent->lay = lay; + memset(lay, 0, sizeof(*lay)); + memcpy(lay->in, ent->in->first.data, sizeof(lay->in)); + if (ent->in->next) + lay->in_ptr = cpu_to_be64(ent->in->next->dma); + lay->inlen = cpu_to_be32(ent->in->len); + if (ent->out->next) + lay->out_ptr = cpu_to_be64(ent->out->next->dma); + lay->outlen = cpu_to_be32(ent->out->len); + lay->type = MLX5_PCI_CMD_XPORT; + lay->token = ent->token; + lay->status_own = CMD_OWNER_HW; + set_signature(ent, !cmd->checksum_disabled); + dump_command(dev, ent, 1); + ent->ts1 = ktime_get_ns(); + cmd_mode = cmd->mode; + + if (ent->callback && schedule_delayed_work(&ent->cb_timeout_work, cb_timeout)) + cmd_ent_get(ent); + set_bit(MLX5_CMD_ENT_STATE_PENDING_COMP, &ent->state); + + cmd_ent_get(ent); /* for the _real_ FW event on completion */ + /* Skip sending command to fw if internal error */ + if (mlx5_cmd_is_down(dev) || !opcode_allowed(&dev->cmd, ent->op)) { + ent->ret = -ENXIO; + mlx5_cmd_comp_handler(dev, 1ULL << ent->idx, true); + return; + } + + /* ring doorbell after the descriptor is valid */ + mlx5_core_dbg(dev, "writing 0x%x to command doorbell\n", 1 << ent->idx); + wmb(); + iowrite32be(1 << ent->idx, &dev->iseg->cmd_dbell); + /* if not in polling don't use ent after this point */ + if (cmd_mode == CMD_MODE_POLLING || poll_cmd) { + poll_timeout(ent); + /* make sure we read the descriptor after ownership is SW */ + rmb(); + mlx5_cmd_comp_handler(dev, 1ULL << ent->idx, (ent->ret == -ETIMEDOUT)); + } +} + +static int deliv_status_to_err(u8 status) +{ + switch (status) { + case MLX5_CMD_DELIVERY_STAT_OK: + case MLX5_DRIVER_STATUS_ABORTED: + return 0; + case MLX5_CMD_DELIVERY_STAT_SIGNAT_ERR: + case MLX5_CMD_DELIVERY_STAT_TOK_ERR: + return -EBADR; + case MLX5_CMD_DELIVERY_STAT_BAD_BLK_NUM_ERR: + case MLX5_CMD_DELIVERY_STAT_OUT_PTR_ALIGN_ERR: + case MLX5_CMD_DELIVERY_STAT_IN_PTR_ALIGN_ERR: + return -EFAULT; /* Bad address */ + case MLX5_CMD_DELIVERY_STAT_IN_LENGTH_ERR: + case MLX5_CMD_DELIVERY_STAT_OUT_LENGTH_ERR: + case MLX5_CMD_DELIVERY_STAT_CMD_DESCR_ERR: + case MLX5_CMD_DELIVERY_STAT_RES_FLD_NOT_CLR_ERR: + return -ENOMSG; + case MLX5_CMD_DELIVERY_STAT_FW_ERR: + return -EIO; + default: + return -EINVAL; + } +} + +static const char *deliv_status_to_str(u8 status) +{ + switch (status) { + case MLX5_CMD_DELIVERY_STAT_OK: + return "no errors"; + case MLX5_CMD_DELIVERY_STAT_SIGNAT_ERR: + return "signature error"; + case MLX5_CMD_DELIVERY_STAT_TOK_ERR: + return "token error"; + case MLX5_CMD_DELIVERY_STAT_BAD_BLK_NUM_ERR: + return "bad block number"; + case MLX5_CMD_DELIVERY_STAT_OUT_PTR_ALIGN_ERR: + return "output pointer not aligned to block size"; + case MLX5_CMD_DELIVERY_STAT_IN_PTR_ALIGN_ERR: + return "input pointer not aligned to block size"; + case MLX5_CMD_DELIVERY_STAT_FW_ERR: + return "firmware internal error"; + case MLX5_CMD_DELIVERY_STAT_IN_LENGTH_ERR: + return "command input length error"; + case MLX5_CMD_DELIVERY_STAT_OUT_LENGTH_ERR: + return "command output length error"; + case MLX5_CMD_DELIVERY_STAT_RES_FLD_NOT_CLR_ERR: + return "reserved fields not cleared"; + case MLX5_CMD_DELIVERY_STAT_CMD_DESCR_ERR: + return "bad command descriptor type"; + default: + return "unknown status code"; + } +} + +enum { + MLX5_CMD_TIMEOUT_RECOVER_MSEC = 5 * 1000, +}; + +static void wait_func_handle_exec_timeout(struct mlx5_core_dev *dev, + struct mlx5_cmd_work_ent *ent) +{ + unsigned long timeout = msecs_to_jiffies(MLX5_CMD_TIMEOUT_RECOVER_MSEC); + + mlx5_cmd_eq_recover(dev); + + /* Re-wait on the ent->done after executing the recovery flow. If the + * recovery flow (or any other recovery flow running simultaneously) + * has recovered an EQE, it should cause the entry to be completed by + * the command interface. + */ + if (wait_for_completion_timeout(&ent->done, timeout)) { + mlx5_core_warn(dev, "cmd[%d]: %s(0x%x) recovered after timeout\n", ent->idx, + mlx5_command_str(ent->op), ent->op); + return; + } + + mlx5_core_warn(dev, "cmd[%d]: %s(0x%x) No done completion\n", ent->idx, + mlx5_command_str(ent->op), ent->op); + + ent->ret = -ETIMEDOUT; + mlx5_cmd_comp_handler(dev, 1ULL << ent->idx, true); +} + +static int wait_func(struct mlx5_core_dev *dev, struct mlx5_cmd_work_ent *ent) +{ + unsigned long timeout = msecs_to_jiffies(mlx5_tout_ms(dev, CMD)); + struct mlx5_cmd *cmd = &dev->cmd; + int err; + + if (!wait_for_completion_timeout(&ent->handling, timeout) && + cancel_work_sync(&ent->work)) { + ent->ret = -ECANCELED; + goto out_err; + } + if (cmd->mode == CMD_MODE_POLLING || ent->polling) + wait_for_completion(&ent->done); + else if (!wait_for_completion_timeout(&ent->done, timeout)) + wait_func_handle_exec_timeout(dev, ent); + +out_err: + err = ent->ret; + + if (err == -ETIMEDOUT) { + mlx5_core_warn(dev, "%s(0x%x) timeout. Will cause a leak of a command resource\n", + mlx5_command_str(ent->op), ent->op); + } else if (err == -ECANCELED) { + mlx5_core_warn(dev, "%s(0x%x) canceled on out of queue timeout.\n", + mlx5_command_str(ent->op), ent->op); + } + mlx5_core_dbg(dev, "err %d, delivery status %s(%d)\n", + err, deliv_status_to_str(ent->status), ent->status); + + return err; +} + +/* Notes: + * 1. Callback functions may not sleep + * 2. page queue commands do not support asynchrous completion + * + * return value in case (!callback): + * ret < 0 : Command execution couldn't be submitted by driver + * ret > 0 : Command execution couldn't be performed by firmware + * ret == 0: Command was executed by FW, Caller must check FW outbox status. + * + * return value in case (callback): + * ret < 0 : Command execution couldn't be submitted by driver + * ret == 0: Command will be submitted to FW for execution + * and the callback will be called for further status updates + */ +static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in, + struct mlx5_cmd_msg *out, void *uout, int uout_size, + mlx5_cmd_cbk_t callback, + void *context, int page_queue, + u8 token, bool force_polling) +{ + struct mlx5_cmd *cmd = &dev->cmd; + struct mlx5_cmd_work_ent *ent; + struct mlx5_cmd_stats *stats; + u8 status = 0; + int err = 0; + s64 ds; + + if (callback && page_queue) + return -EINVAL; + + ent = cmd_alloc_ent(cmd, in, out, uout, uout_size, + callback, context, page_queue); + if (IS_ERR(ent)) + return PTR_ERR(ent); + + /* put for this ent is when consumed, depending on the use case + * 1) (!callback) blocking flow: by caller after wait_func completes + * 2) (callback) flow: by mlx5_cmd_comp_handler() when ent is handled + */ + + ent->token = token; + ent->polling = force_polling; + + init_completion(&ent->handling); + if (!callback) + init_completion(&ent->done); + + INIT_DELAYED_WORK(&ent->cb_timeout_work, cb_timeout_handler); + INIT_WORK(&ent->work, cmd_work_handler); + if (page_queue) { + cmd_work_handler(&ent->work); + } else if (!queue_work(cmd->wq, &ent->work)) { + mlx5_core_warn(dev, "failed to queue work\n"); + err = -EALREADY; + goto out_free; + } + + if (callback) + return 0; /* mlx5_cmd_comp_handler() will put(ent) */ + + err = wait_func(dev, ent); + if (err == -ETIMEDOUT || err == -ECANCELED) + goto out_free; + + ds = ent->ts2 - ent->ts1; + if (ent->op < MLX5_CMD_OP_MAX) { + stats = &cmd->stats[ent->op]; + spin_lock_irq(&stats->lock); + stats->sum += ds; + ++stats->n; + spin_unlock_irq(&stats->lock); + } + mlx5_core_dbg_mask(dev, 1 << MLX5_CMD_TIME, + "fw exec time for %s is %lld nsec\n", + mlx5_command_str(ent->op), ds); + +out_free: + status = ent->status; + cmd_ent_put(ent); + return err ? : status; +} + +static ssize_t dbg_write(struct file *filp, const char __user *buf, + size_t count, loff_t *pos) +{ + struct mlx5_core_dev *dev = filp->private_data; + struct mlx5_cmd_debug *dbg = &dev->cmd.dbg; + char lbuf[3]; + int err; + + if (!dbg->in_msg || !dbg->out_msg) + return -ENOMEM; + + if (count < sizeof(lbuf) - 1) + return -EINVAL; + + if (copy_from_user(lbuf, buf, sizeof(lbuf) - 1)) + return -EFAULT; + + lbuf[sizeof(lbuf) - 1] = 0; + + if (strcmp(lbuf, "go")) + return -EINVAL; + + err = mlx5_cmd_exec(dev, dbg->in_msg, dbg->inlen, dbg->out_msg, dbg->outlen); + + return err ? err : count; +} + +static const struct file_operations fops = { + .owner = THIS_MODULE, + .open = simple_open, + .write = dbg_write, +}; + +static int mlx5_copy_to_msg(struct mlx5_cmd_msg *to, void *from, int size, + u8 token) +{ + struct mlx5_cmd_prot_block *block; + struct mlx5_cmd_mailbox *next; + int copy; + + if (!to || !from) + return -ENOMEM; + + copy = min_t(int, size, sizeof(to->first.data)); + memcpy(to->first.data, from, copy); + size -= copy; + from += copy; + + next = to->next; + while (size) { + if (!next) { + /* this is a BUG */ + return -ENOMEM; + } + + copy = min_t(int, size, MLX5_CMD_DATA_BLOCK_SIZE); + block = next->buf; + memcpy(block->data, from, copy); + from += copy; + size -= copy; + block->token = token; + next = next->next; + } + + return 0; +} + +static int mlx5_copy_from_msg(void *to, struct mlx5_cmd_msg *from, int size) +{ + struct mlx5_cmd_prot_block *block; + struct mlx5_cmd_mailbox *next; + int copy; + + if (!to || !from) + return -ENOMEM; + + copy = min_t(int, size, sizeof(from->first.data)); + memcpy(to, from->first.data, copy); + size -= copy; + to += copy; + + next = from->next; + while (size) { + if (!next) { + /* this is a BUG */ + return -ENOMEM; + } + + copy = min_t(int, size, MLX5_CMD_DATA_BLOCK_SIZE); + block = next->buf; + + memcpy(to, block->data, copy); + to += copy; + size -= copy; + next = next->next; + } + + return 0; +} + +static struct mlx5_cmd_mailbox *alloc_cmd_box(struct mlx5_core_dev *dev, + gfp_t flags) +{ + struct mlx5_cmd_mailbox *mailbox; + + mailbox = kmalloc(sizeof(*mailbox), flags); + if (!mailbox) + return ERR_PTR(-ENOMEM); + + mailbox->buf = dma_pool_zalloc(dev->cmd.pool, flags, + &mailbox->dma); + if (!mailbox->buf) { + mlx5_core_dbg(dev, "failed allocation\n"); + kfree(mailbox); + return ERR_PTR(-ENOMEM); + } + mailbox->next = NULL; + + return mailbox; +} + +static void free_cmd_box(struct mlx5_core_dev *dev, + struct mlx5_cmd_mailbox *mailbox) +{ + dma_pool_free(dev->cmd.pool, mailbox->buf, mailbox->dma); + kfree(mailbox); +} + +static struct mlx5_cmd_msg *mlx5_alloc_cmd_msg(struct mlx5_core_dev *dev, + gfp_t flags, int size, + u8 token) +{ + struct mlx5_cmd_mailbox *tmp, *head = NULL; + struct mlx5_cmd_prot_block *block; + struct mlx5_cmd_msg *msg; + int err; + int n; + int i; + + msg = kzalloc(sizeof(*msg), flags); + if (!msg) + return ERR_PTR(-ENOMEM); + + msg->len = size; + n = mlx5_calc_cmd_blocks(msg); + + for (i = 0; i < n; i++) { + tmp = alloc_cmd_box(dev, flags); + if (IS_ERR(tmp)) { + mlx5_core_warn(dev, "failed allocating block\n"); + err = PTR_ERR(tmp); + goto err_alloc; + } + + block = tmp->buf; + tmp->next = head; + block->next = cpu_to_be64(tmp->next ? tmp->next->dma : 0); + block->block_num = cpu_to_be32(n - i - 1); + block->token = token; + head = tmp; + } + msg->next = head; + return msg; + +err_alloc: + while (head) { + tmp = head->next; + free_cmd_box(dev, head); + head = tmp; + } + kfree(msg); + + return ERR_PTR(err); +} + +static void mlx5_free_cmd_msg(struct mlx5_core_dev *dev, + struct mlx5_cmd_msg *msg) +{ + struct mlx5_cmd_mailbox *head = msg->next; + struct mlx5_cmd_mailbox *next; + + while (head) { + next = head->next; + free_cmd_box(dev, head); + head = next; + } + kfree(msg); +} + +static ssize_t data_write(struct file *filp, const char __user *buf, + size_t count, loff_t *pos) +{ + struct mlx5_core_dev *dev = filp->private_data; + struct mlx5_cmd_debug *dbg = &dev->cmd.dbg; + void *ptr; + + if (*pos != 0) + return -EINVAL; + + kfree(dbg->in_msg); + dbg->in_msg = NULL; + dbg->inlen = 0; + ptr = memdup_user(buf, count); + if (IS_ERR(ptr)) + return PTR_ERR(ptr); + dbg->in_msg = ptr; + dbg->inlen = count; + + *pos = count; + + return count; +} + +static ssize_t data_read(struct file *filp, char __user *buf, size_t count, + loff_t *pos) +{ + struct mlx5_core_dev *dev = filp->private_data; + struct mlx5_cmd_debug *dbg = &dev->cmd.dbg; + + if (!dbg->out_msg) + return -ENOMEM; + + return simple_read_from_buffer(buf, count, pos, dbg->out_msg, + dbg->outlen); +} + +static const struct file_operations dfops = { + .owner = THIS_MODULE, + .open = simple_open, + .write = data_write, + .read = data_read, +}; + +static ssize_t outlen_read(struct file *filp, char __user *buf, size_t count, + loff_t *pos) +{ + struct mlx5_core_dev *dev = filp->private_data; + struct mlx5_cmd_debug *dbg = &dev->cmd.dbg; + char outlen[8]; + int err; + + err = snprintf(outlen, sizeof(outlen), "%d", dbg->outlen); + if (err < 0) + return err; + + return simple_read_from_buffer(buf, count, pos, outlen, err); +} + +static ssize_t outlen_write(struct file *filp, const char __user *buf, + size_t count, loff_t *pos) +{ + struct mlx5_core_dev *dev = filp->private_data; + struct mlx5_cmd_debug *dbg = &dev->cmd.dbg; + char outlen_str[8] = {0}; + int outlen; + void *ptr; + int err; + + if (*pos != 0 || count > 6) + return -EINVAL; + + kfree(dbg->out_msg); + dbg->out_msg = NULL; + dbg->outlen = 0; + + if (copy_from_user(outlen_str, buf, count)) + return -EFAULT; + + err = sscanf(outlen_str, "%d", &outlen); + if (err != 1) + return -EINVAL; + + ptr = kzalloc(outlen, GFP_KERNEL); + if (!ptr) + return -ENOMEM; + + dbg->out_msg = ptr; + dbg->outlen = outlen; + + *pos = count; + + return count; +} + +static const struct file_operations olfops = { + .owner = THIS_MODULE, + .open = simple_open, + .write = outlen_write, + .read = outlen_read, +}; + +static void set_wqname(struct mlx5_core_dev *dev) +{ + struct mlx5_cmd *cmd = &dev->cmd; + + snprintf(cmd->wq_name, sizeof(cmd->wq_name), "mlx5_cmd_%s", + dev_name(dev->device)); +} + +static void clean_debug_files(struct mlx5_core_dev *dev) +{ + struct mlx5_cmd_debug *dbg = &dev->cmd.dbg; + + if (!mlx5_debugfs_root) + return; + + mlx5_cmdif_debugfs_cleanup(dev); + debugfs_remove_recursive(dbg->dbg_root); +} + +static void create_debugfs_files(struct mlx5_core_dev *dev) +{ + struct mlx5_cmd_debug *dbg = &dev->cmd.dbg; + + dbg->dbg_root = debugfs_create_dir("cmd", mlx5_debugfs_get_dev_root(dev)); + + debugfs_create_file("in", 0400, dbg->dbg_root, dev, &dfops); + debugfs_create_file("out", 0200, dbg->dbg_root, dev, &dfops); + debugfs_create_file("out_len", 0600, dbg->dbg_root, dev, &olfops); + debugfs_create_u8("status", 0600, dbg->dbg_root, &dbg->status); + debugfs_create_file("run", 0200, dbg->dbg_root, dev, &fops); + + mlx5_cmdif_debugfs_init(dev); +} + +void mlx5_cmd_allowed_opcode(struct mlx5_core_dev *dev, u16 opcode) +{ + struct mlx5_cmd *cmd = &dev->cmd; + int i; + + for (i = 0; i < cmd->vars.max_reg_cmds; i++) + down(&cmd->vars.sem); + down(&cmd->vars.pages_sem); + + cmd->allowed_opcode = opcode; + + up(&cmd->vars.pages_sem); + for (i = 0; i < cmd->vars.max_reg_cmds; i++) + up(&cmd->vars.sem); +} + +static void mlx5_cmd_change_mod(struct mlx5_core_dev *dev, int mode) +{ + struct mlx5_cmd *cmd = &dev->cmd; + int i; + + for (i = 0; i < cmd->vars.max_reg_cmds; i++) + down(&cmd->vars.sem); + down(&cmd->vars.pages_sem); + + cmd->mode = mode; + + up(&cmd->vars.pages_sem); + for (i = 0; i < cmd->vars.max_reg_cmds; i++) + up(&cmd->vars.sem); +} + +static int cmd_comp_notifier(struct notifier_block *nb, + unsigned long type, void *data) +{ + struct mlx5_core_dev *dev; + struct mlx5_cmd *cmd; + struct mlx5_eqe *eqe; + + cmd = mlx5_nb_cof(nb, struct mlx5_cmd, nb); + dev = container_of(cmd, struct mlx5_core_dev, cmd); + eqe = data; + + mlx5_cmd_comp_handler(dev, be32_to_cpu(eqe->data.cmd.vector), false); + + return NOTIFY_OK; +} +void mlx5_cmd_use_events(struct mlx5_core_dev *dev) +{ + MLX5_NB_INIT(&dev->cmd.nb, cmd_comp_notifier, CMD); + mlx5_eq_notifier_register(dev, &dev->cmd.nb); + mlx5_cmd_change_mod(dev, CMD_MODE_EVENTS); +} + +void mlx5_cmd_use_polling(struct mlx5_core_dev *dev) +{ + mlx5_cmd_change_mod(dev, CMD_MODE_POLLING); + mlx5_eq_notifier_unregister(dev, &dev->cmd.nb); +} + +static void free_msg(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *msg) +{ + unsigned long flags; + + if (msg->parent) { + spin_lock_irqsave(&msg->parent->lock, flags); + list_add_tail(&msg->list, &msg->parent->head); + spin_unlock_irqrestore(&msg->parent->lock, flags); + } else { + mlx5_free_cmd_msg(dev, msg); + } +} + +static void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool forced) +{ + struct mlx5_cmd *cmd = &dev->cmd; + struct mlx5_cmd_work_ent *ent; + mlx5_cmd_cbk_t callback; + void *context; + int err; + int i; + s64 ds; + struct mlx5_cmd_stats *stats; + unsigned long flags; + unsigned long vector; + + /* there can be at most 32 command queues */ + vector = vec & 0xffffffff; + for (i = 0; i < (1 << cmd->vars.log_sz); i++) { + if (test_bit(i, &vector)) { + ent = cmd->ent_arr[i]; + + /* if we already completed the command, ignore it */ + if (!test_and_clear_bit(MLX5_CMD_ENT_STATE_PENDING_COMP, + &ent->state)) { + /* only real completion can free the cmd slot */ + if (!forced) { + mlx5_core_err(dev, "Command completion arrived after timeout (entry idx = %d).\n", + ent->idx); + cmd_ent_put(ent); + } + continue; + } + + if (ent->callback && cancel_delayed_work(&ent->cb_timeout_work)) + cmd_ent_put(ent); /* timeout work was canceled */ + + if (!forced || /* Real FW completion */ + mlx5_cmd_is_down(dev) || /* No real FW completion is expected */ + !opcode_allowed(cmd, ent->op)) + cmd_ent_put(ent); + + ent->ts2 = ktime_get_ns(); + memcpy(ent->out->first.data, ent->lay->out, sizeof(ent->lay->out)); + dump_command(dev, ent, 0); + + if (vec & MLX5_TRIGGERED_CMD_COMP) + ent->ret = -ENXIO; + + if (!ent->ret) { /* Command completed by FW */ + if (!cmd->checksum_disabled) + ent->ret = verify_signature(ent); + + ent->status = ent->lay->status_own >> 1; + + mlx5_core_dbg(dev, "command completed. ret 0x%x, delivery status %s(0x%x)\n", + ent->ret, deliv_status_to_str(ent->status), ent->status); + } + + if (ent->callback) { + ds = ent->ts2 - ent->ts1; + if (ent->op < MLX5_CMD_OP_MAX) { + stats = &cmd->stats[ent->op]; + spin_lock_irqsave(&stats->lock, flags); + stats->sum += ds; + ++stats->n; + spin_unlock_irqrestore(&stats->lock, flags); + } + + callback = ent->callback; + context = ent->context; + err = ent->ret ? : ent->status; + if (err > 0) /* Failed in FW, command didn't execute */ + err = deliv_status_to_err(err); + + if (!err) + err = mlx5_copy_from_msg(ent->uout, + ent->out, + ent->uout_size); + + mlx5_free_cmd_msg(dev, ent->out); + free_msg(dev, ent->in); + + /* final consumer is done, release ent */ + cmd_ent_put(ent); + callback(err, context); + } else { + /* release wait_func() so mlx5_cmd_invoke() + * can make the final ent_put() + */ + complete(&ent->done); + } + } + } +} + +static void mlx5_cmd_trigger_completions(struct mlx5_core_dev *dev) +{ + struct mlx5_cmd *cmd = &dev->cmd; + unsigned long bitmask; + unsigned long flags; + u64 vector; + int i; + + /* wait for pending handlers to complete */ + mlx5_eq_synchronize_cmd_irq(dev); + spin_lock_irqsave(&dev->cmd.alloc_lock, flags); + vector = ~dev->cmd.vars.bitmask & ((1ul << (1 << dev->cmd.vars.log_sz)) - 1); + if (!vector) + goto no_trig; + + bitmask = vector; + /* we must increment the allocated entries refcount before triggering the completions + * to guarantee pending commands will not get freed in the meanwhile. + * For that reason, it also has to be done inside the alloc_lock. + */ + for_each_set_bit(i, &bitmask, (1 << cmd->vars.log_sz)) + cmd_ent_get(cmd->ent_arr[i]); + vector |= MLX5_TRIGGERED_CMD_COMP; + spin_unlock_irqrestore(&dev->cmd.alloc_lock, flags); + + mlx5_core_dbg(dev, "vector 0x%llx\n", vector); + mlx5_cmd_comp_handler(dev, vector, true); + for_each_set_bit(i, &bitmask, (1 << cmd->vars.log_sz)) + cmd_ent_put(cmd->ent_arr[i]); + return; + +no_trig: + spin_unlock_irqrestore(&dev->cmd.alloc_lock, flags); +} + +void mlx5_cmd_flush(struct mlx5_core_dev *dev) +{ + struct mlx5_cmd *cmd = &dev->cmd; + int i; + + for (i = 0; i < cmd->vars.max_reg_cmds; i++) { + while (down_trylock(&cmd->vars.sem)) { + mlx5_cmd_trigger_completions(dev); + cond_resched(); + } + } + + while (down_trylock(&cmd->vars.pages_sem)) { + mlx5_cmd_trigger_completions(dev); + cond_resched(); + } + + /* Unlock cmdif */ + up(&cmd->vars.pages_sem); + for (i = 0; i < cmd->vars.max_reg_cmds; i++) + up(&cmd->vars.sem); +} + +static struct mlx5_cmd_msg *alloc_msg(struct mlx5_core_dev *dev, int in_size, + gfp_t gfp) +{ + struct mlx5_cmd_msg *msg = ERR_PTR(-ENOMEM); + struct cmd_msg_cache *ch = NULL; + struct mlx5_cmd *cmd = &dev->cmd; + int i; + + if (in_size <= 16) + goto cache_miss; + + for (i = 0; i < MLX5_NUM_COMMAND_CACHES; i++) { + ch = &cmd->cache[i]; + if (in_size > ch->max_inbox_size) + continue; + spin_lock_irq(&ch->lock); + if (list_empty(&ch->head)) { + spin_unlock_irq(&ch->lock); + continue; + } + msg = list_entry(ch->head.next, typeof(*msg), list); + /* For cached lists, we must explicitly state what is + * the real size + */ + msg->len = in_size; + list_del(&msg->list); + spin_unlock_irq(&ch->lock); + break; + } + + if (!IS_ERR(msg)) + return msg; + +cache_miss: + msg = mlx5_alloc_cmd_msg(dev, gfp, in_size, 0); + return msg; +} + +static int is_manage_pages(void *in) +{ + return in_to_opcode(in) == MLX5_CMD_OP_MANAGE_PAGES; +} + +/* Notes: + * 1. Callback functions may not sleep + * 2. Page queue commands do not support asynchrous completion + */ +static int cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out, + int out_size, mlx5_cmd_cbk_t callback, void *context, + bool force_polling) +{ + struct mlx5_cmd_msg *inb, *outb; + u16 opcode = in_to_opcode(in); + bool throttle_op; + int pages_queue; + gfp_t gfp; + u8 token; + int err; + + if (mlx5_cmd_is_down(dev) || !opcode_allowed(&dev->cmd, opcode)) + return -ENXIO; + + throttle_op = mlx5_cmd_is_throttle_opcode(opcode); + if (throttle_op) { + /* atomic context may not sleep */ + if (callback) + return -EINVAL; + down(&dev->cmd.vars.throttle_sem); + } + + pages_queue = is_manage_pages(in); + gfp = callback ? GFP_ATOMIC : GFP_KERNEL; + + inb = alloc_msg(dev, in_size, gfp); + if (IS_ERR(inb)) { + err = PTR_ERR(inb); + goto out_up; + } + + token = alloc_token(&dev->cmd); + + err = mlx5_copy_to_msg(inb, in, in_size, token); + if (err) { + mlx5_core_warn(dev, "err %d\n", err); + goto out_in; + } + + outb = mlx5_alloc_cmd_msg(dev, gfp, out_size, token); + if (IS_ERR(outb)) { + err = PTR_ERR(outb); + goto out_in; + } + + err = mlx5_cmd_invoke(dev, inb, outb, out, out_size, callback, context, + pages_queue, token, force_polling); + if (callback) + return err; + + if (err > 0) /* Failed in FW, command didn't execute */ + err = deliv_status_to_err(err); + + if (err) + goto out_out; + + /* command completed by FW */ + err = mlx5_copy_from_msg(out, outb, out_size); +out_out: + mlx5_free_cmd_msg(dev, outb); +out_in: + free_msg(dev, inb); +out_up: + if (throttle_op) + up(&dev->cmd.vars.throttle_sem); + return err; +} + +static void mlx5_cmd_err_trace(struct mlx5_core_dev *dev, u16 opcode, u16 op_mod, void *out) +{ + u32 syndrome = MLX5_GET(mbox_out, out, syndrome); + u8 status = MLX5_GET(mbox_out, out, status); + + trace_mlx5_cmd(mlx5_command_str(opcode), opcode, op_mod, + cmd_status_str(status), status, syndrome, + cmd_status_to_err(status)); +} + +static void cmd_status_log(struct mlx5_core_dev *dev, u16 opcode, u8 status, + u32 syndrome, int err) +{ + const char *namep = mlx5_command_str(opcode); + struct mlx5_cmd_stats *stats; + + if (!err || !(strcmp(namep, "unknown command opcode"))) + return; + + stats = &dev->cmd.stats[opcode]; + spin_lock_irq(&stats->lock); + stats->failed++; + if (err < 0) + stats->last_failed_errno = -err; + if (err == -EREMOTEIO) { + stats->failed_mbox_status++; + stats->last_failed_mbox_status = status; + stats->last_failed_syndrome = syndrome; + } + spin_unlock_irq(&stats->lock); +} + +/* preserve -EREMOTEIO for outbox.status != OK, otherwise return err as is */ +static int cmd_status_err(struct mlx5_core_dev *dev, int err, u16 opcode, u16 op_mod, void *out) +{ + u32 syndrome = MLX5_GET(mbox_out, out, syndrome); + u8 status = MLX5_GET(mbox_out, out, status); + + if (err == -EREMOTEIO) /* -EREMOTEIO is preserved */ + err = -EIO; + + if (!err && status != MLX5_CMD_STAT_OK) { + err = -EREMOTEIO; + mlx5_cmd_err_trace(dev, opcode, op_mod, out); + } + + cmd_status_log(dev, opcode, status, syndrome, err); + return err; +} + +/** + * mlx5_cmd_do - Executes a fw command, wait for completion. + * Unlike mlx5_cmd_exec, this function will not translate or intercept + * outbox.status and will return -EREMOTEIO when + * outbox.status != MLX5_CMD_STAT_OK + * + * @dev: mlx5 core device + * @in: inbox mlx5_ifc command buffer + * @in_size: inbox buffer size + * @out: outbox mlx5_ifc buffer + * @out_size: outbox size + * + * @return: + * -EREMOTEIO : Command executed by FW, outbox.status != MLX5_CMD_STAT_OK. + * Caller must check FW outbox status. + * 0 : Command execution successful, outbox.status == MLX5_CMD_STAT_OK. + * < 0 : Command execution couldn't be performed by firmware or driver + */ +int mlx5_cmd_do(struct mlx5_core_dev *dev, void *in, int in_size, void *out, int out_size) +{ + int err = cmd_exec(dev, in, in_size, out, out_size, NULL, NULL, false); + u16 op_mod = MLX5_GET(mbox_in, in, op_mod); + u16 opcode = in_to_opcode(in); + + return cmd_status_err(dev, err, opcode, op_mod, out); +} +EXPORT_SYMBOL(mlx5_cmd_do); + +/** + * mlx5_cmd_exec - Executes a fw command, wait for completion + * + * @dev: mlx5 core device + * @in: inbox mlx5_ifc command buffer + * @in_size: inbox buffer size + * @out: outbox mlx5_ifc buffer + * @out_size: outbox size + * + * @return: 0 if no error, FW command execution was successful + * and outbox status is ok. + */ +int mlx5_cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out, + int out_size) +{ + int err = mlx5_cmd_do(dev, in, in_size, out, out_size); + + return mlx5_cmd_check(dev, err, in, out); +} +EXPORT_SYMBOL(mlx5_cmd_exec); + +/** + * mlx5_cmd_exec_polling - Executes a fw command, poll for completion + * Needed for driver force teardown, when command completion EQ + * will not be available to complete the command + * + * @dev: mlx5 core device + * @in: inbox mlx5_ifc command buffer + * @in_size: inbox buffer size + * @out: outbox mlx5_ifc buffer + * @out_size: outbox size + * + * @return: 0 if no error, FW command execution was successful + * and outbox status is ok. + */ +int mlx5_cmd_exec_polling(struct mlx5_core_dev *dev, void *in, int in_size, + void *out, int out_size) +{ + int err = cmd_exec(dev, in, in_size, out, out_size, NULL, NULL, true); + u16 op_mod = MLX5_GET(mbox_in, in, op_mod); + u16 opcode = in_to_opcode(in); + + err = cmd_status_err(dev, err, opcode, op_mod, out); + return mlx5_cmd_check(dev, err, in, out); +} +EXPORT_SYMBOL(mlx5_cmd_exec_polling); + +void mlx5_cmd_init_async_ctx(struct mlx5_core_dev *dev, + struct mlx5_async_ctx *ctx) +{ + ctx->dev = dev; + /* Starts at 1 to avoid doing wake_up if we are not cleaning up */ + atomic_set(&ctx->num_inflight, 1); + init_completion(&ctx->inflight_done); +} +EXPORT_SYMBOL(mlx5_cmd_init_async_ctx); + +/** + * mlx5_cmd_cleanup_async_ctx - Clean up an async_ctx + * @ctx: The ctx to clean + * + * Upon return all callbacks given to mlx5_cmd_exec_cb() have been called. The + * caller must ensure that mlx5_cmd_exec_cb() is not called during or after + * the call mlx5_cleanup_async_ctx(). + */ +void mlx5_cmd_cleanup_async_ctx(struct mlx5_async_ctx *ctx) +{ + if (!atomic_dec_and_test(&ctx->num_inflight)) + wait_for_completion(&ctx->inflight_done); +} +EXPORT_SYMBOL(mlx5_cmd_cleanup_async_ctx); + +static void mlx5_cmd_exec_cb_handler(int status, void *_work) +{ + struct mlx5_async_work *work = _work; + struct mlx5_async_ctx *ctx; + + ctx = work->ctx; + status = cmd_status_err(ctx->dev, status, work->opcode, work->op_mod, work->out); + work->user_callback(status, work); + if (atomic_dec_and_test(&ctx->num_inflight)) + complete(&ctx->inflight_done); +} + +int mlx5_cmd_exec_cb(struct mlx5_async_ctx *ctx, void *in, int in_size, + void *out, int out_size, mlx5_async_cbk_t callback, + struct mlx5_async_work *work) +{ + int ret; + + work->ctx = ctx; + work->user_callback = callback; + work->opcode = in_to_opcode(in); + work->op_mod = MLX5_GET(mbox_in, in, op_mod); + work->out = out; + if (WARN_ON(!atomic_inc_not_zero(&ctx->num_inflight))) + return -EIO; + ret = cmd_exec(ctx->dev, in, in_size, out, out_size, + mlx5_cmd_exec_cb_handler, work, false); + if (ret && atomic_dec_and_test(&ctx->num_inflight)) + complete(&ctx->inflight_done); + + return ret; +} +EXPORT_SYMBOL(mlx5_cmd_exec_cb); + +static void destroy_msg_cache(struct mlx5_core_dev *dev) +{ + struct cmd_msg_cache *ch; + struct mlx5_cmd_msg *msg; + struct mlx5_cmd_msg *n; + int i; + + for (i = 0; i < MLX5_NUM_COMMAND_CACHES; i++) { + ch = &dev->cmd.cache[i]; + list_for_each_entry_safe(msg, n, &ch->head, list) { + list_del(&msg->list); + mlx5_free_cmd_msg(dev, msg); + } + } +} + +static unsigned cmd_cache_num_ent[MLX5_NUM_COMMAND_CACHES] = { + 512, 32, 16, 8, 2 +}; + +static unsigned cmd_cache_ent_size[MLX5_NUM_COMMAND_CACHES] = { + 16 + MLX5_CMD_DATA_BLOCK_SIZE, + 16 + MLX5_CMD_DATA_BLOCK_SIZE * 2, + 16 + MLX5_CMD_DATA_BLOCK_SIZE * 16, + 16 + MLX5_CMD_DATA_BLOCK_SIZE * 256, + 16 + MLX5_CMD_DATA_BLOCK_SIZE * 512, +}; + +static void create_msg_cache(struct mlx5_core_dev *dev) +{ + struct mlx5_cmd *cmd = &dev->cmd; + struct cmd_msg_cache *ch; + struct mlx5_cmd_msg *msg; + int i; + int k; + + /* Initialize and fill the caches with initial entries */ + for (k = 0; k < MLX5_NUM_COMMAND_CACHES; k++) { + ch = &cmd->cache[k]; + spin_lock_init(&ch->lock); + INIT_LIST_HEAD(&ch->head); + ch->num_ent = cmd_cache_num_ent[k]; + ch->max_inbox_size = cmd_cache_ent_size[k]; + for (i = 0; i < ch->num_ent; i++) { + msg = mlx5_alloc_cmd_msg(dev, GFP_KERNEL | __GFP_NOWARN, + ch->max_inbox_size, 0); + if (IS_ERR(msg)) + break; + msg->parent = ch; + list_add_tail(&msg->list, &ch->head); + } + } +} + +static int alloc_cmd_page(struct mlx5_core_dev *dev, struct mlx5_cmd *cmd) +{ + cmd->cmd_alloc_buf = dma_alloc_coherent(mlx5_core_dma_dev(dev), MLX5_ADAPTER_PAGE_SIZE, + &cmd->alloc_dma, GFP_KERNEL); + if (!cmd->cmd_alloc_buf) + return -ENOMEM; + + /* make sure it is aligned to 4K */ + if (!((uintptr_t)cmd->cmd_alloc_buf & (MLX5_ADAPTER_PAGE_SIZE - 1))) { + cmd->cmd_buf = cmd->cmd_alloc_buf; + cmd->dma = cmd->alloc_dma; + cmd->alloc_size = MLX5_ADAPTER_PAGE_SIZE; + return 0; + } + + dma_free_coherent(mlx5_core_dma_dev(dev), MLX5_ADAPTER_PAGE_SIZE, cmd->cmd_alloc_buf, + cmd->alloc_dma); + cmd->cmd_alloc_buf = dma_alloc_coherent(mlx5_core_dma_dev(dev), + 2 * MLX5_ADAPTER_PAGE_SIZE - 1, + &cmd->alloc_dma, GFP_KERNEL); + if (!cmd->cmd_alloc_buf) + return -ENOMEM; + + cmd->cmd_buf = PTR_ALIGN(cmd->cmd_alloc_buf, MLX5_ADAPTER_PAGE_SIZE); + cmd->dma = ALIGN(cmd->alloc_dma, MLX5_ADAPTER_PAGE_SIZE); + cmd->alloc_size = 2 * MLX5_ADAPTER_PAGE_SIZE - 1; + return 0; +} + +static void free_cmd_page(struct mlx5_core_dev *dev, struct mlx5_cmd *cmd) +{ + dma_free_coherent(mlx5_core_dma_dev(dev), cmd->alloc_size, cmd->cmd_alloc_buf, + cmd->alloc_dma); +} + +static u16 cmdif_rev(struct mlx5_core_dev *dev) +{ + return ioread32be(&dev->iseg->cmdif_rev_fw_sub) >> 16; +} + +int mlx5_cmd_init(struct mlx5_core_dev *dev) +{ + int size = sizeof(struct mlx5_cmd_prot_block); + int align = roundup_pow_of_two(size); + struct mlx5_cmd *cmd = &dev->cmd; + u32 cmd_h, cmd_l; + u16 cmd_if_rev; + int err; + int i; + + memset(cmd, 0, sizeof(*cmd)); + cmd_if_rev = cmdif_rev(dev); + if (cmd_if_rev != CMD_IF_REV) { + mlx5_core_err(dev, + "Driver cmdif rev(%d) differs from firmware's(%d)\n", + CMD_IF_REV, cmd_if_rev); + return -EINVAL; + } + + cmd->pool = dma_pool_create("mlx5_cmd", mlx5_core_dma_dev(dev), size, align, 0); + if (!cmd->pool) + return -ENOMEM; + + err = alloc_cmd_page(dev, cmd); + if (err) + goto err_free_pool; + + cmd_l = ioread32be(&dev->iseg->cmdq_addr_l_sz) & 0xff; + cmd->vars.log_sz = cmd_l >> 4 & 0xf; + cmd->vars.log_stride = cmd_l & 0xf; + if (1 << cmd->vars.log_sz > MLX5_MAX_COMMANDS) { + mlx5_core_err(dev, "firmware reports too many outstanding commands %d\n", + 1 << cmd->vars.log_sz); + err = -EINVAL; + goto err_free_page; + } + + if (cmd->vars.log_sz + cmd->vars.log_stride > MLX5_ADAPTER_PAGE_SHIFT) { + mlx5_core_err(dev, "command queue size overflow\n"); + err = -EINVAL; + goto err_free_page; + } + + cmd->state = MLX5_CMDIF_STATE_DOWN; + cmd->checksum_disabled = 1; + cmd->vars.max_reg_cmds = (1 << cmd->vars.log_sz) - 1; + cmd->vars.bitmask = (1UL << cmd->vars.max_reg_cmds) - 1; + + cmd->vars.cmdif_rev = ioread32be(&dev->iseg->cmdif_rev_fw_sub) >> 16; + if (cmd->vars.cmdif_rev > CMD_IF_REV) { + mlx5_core_err(dev, "driver does not support command interface version. driver %d, firmware %d\n", + CMD_IF_REV, cmd->vars.cmdif_rev); + err = -EOPNOTSUPP; + goto err_free_page; + } + + spin_lock_init(&cmd->alloc_lock); + spin_lock_init(&cmd->token_lock); + for (i = 0; i < MLX5_CMD_OP_MAX; i++) + spin_lock_init(&cmd->stats[i].lock); + + sema_init(&cmd->vars.sem, cmd->vars.max_reg_cmds); + sema_init(&cmd->vars.pages_sem, 1); + sema_init(&cmd->vars.throttle_sem, DIV_ROUND_UP(cmd->vars.max_reg_cmds, 2)); + + cmd_h = (u32)((u64)(cmd->dma) >> 32); + cmd_l = (u32)(cmd->dma); + if (cmd_l & 0xfff) { + mlx5_core_err(dev, "invalid command queue address\n"); + err = -ENOMEM; + goto err_free_page; + } + + iowrite32be(cmd_h, &dev->iseg->cmdq_addr_h); + iowrite32be(cmd_l, &dev->iseg->cmdq_addr_l_sz); + + /* Make sure firmware sees the complete address before we proceed */ + wmb(); + + mlx5_core_dbg(dev, "descriptor at dma 0x%llx\n", (unsigned long long)(cmd->dma)); + + cmd->mode = CMD_MODE_POLLING; + cmd->allowed_opcode = CMD_ALLOWED_OPCODE_ALL; + + create_msg_cache(dev); + + set_wqname(dev); + cmd->wq = create_singlethread_workqueue(cmd->wq_name); + if (!cmd->wq) { + mlx5_core_err(dev, "failed to create command workqueue\n"); + err = -ENOMEM; + goto err_cache; + } + + create_debugfs_files(dev); + + return 0; + +err_cache: + destroy_msg_cache(dev); + +err_free_page: + free_cmd_page(dev, cmd); + +err_free_pool: + dma_pool_destroy(cmd->pool); + return err; +} + +void mlx5_cmd_cleanup(struct mlx5_core_dev *dev) +{ + struct mlx5_cmd *cmd = &dev->cmd; + + clean_debug_files(dev); + destroy_workqueue(cmd->wq); + destroy_msg_cache(dev); + free_cmd_page(dev, cmd); + dma_pool_destroy(cmd->pool); +} + +void mlx5_cmd_set_state(struct mlx5_core_dev *dev, + enum mlx5_cmdif_state cmdif_state) +{ + dev->cmd.state = cmdif_state; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cq.c b/drivers/net/ethernet/mellanox/mlx5/core/cq.c new file mode 100644 index 000000000..4caa1b6f4 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/cq.c @@ -0,0 +1,228 @@ +/* + * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/kernel.h> +#include <linux/hardirq.h> +#include <linux/mlx5/driver.h> +#include <rdma/ib_verbs.h> +#include <linux/mlx5/cq.h> +#include "mlx5_core.h" +#include "lib/eq.h" + +#define TASKLET_MAX_TIME 2 +#define TASKLET_MAX_TIME_JIFFIES msecs_to_jiffies(TASKLET_MAX_TIME) + +void mlx5_cq_tasklet_cb(struct tasklet_struct *t) +{ + unsigned long flags; + unsigned long end = jiffies + TASKLET_MAX_TIME_JIFFIES; + struct mlx5_eq_tasklet *ctx = from_tasklet(ctx, t, task); + struct mlx5_core_cq *mcq; + struct mlx5_core_cq *temp; + + spin_lock_irqsave(&ctx->lock, flags); + list_splice_tail_init(&ctx->list, &ctx->process_list); + spin_unlock_irqrestore(&ctx->lock, flags); + + list_for_each_entry_safe(mcq, temp, &ctx->process_list, + tasklet_ctx.list) { + list_del_init(&mcq->tasklet_ctx.list); + mcq->tasklet_ctx.comp(mcq, NULL); + mlx5_cq_put(mcq); + if (time_after(jiffies, end)) + break; + } + + if (!list_empty(&ctx->process_list)) + tasklet_schedule(&ctx->task); +} + +static void mlx5_add_cq_to_tasklet(struct mlx5_core_cq *cq, + struct mlx5_eqe *eqe) +{ + unsigned long flags; + struct mlx5_eq_tasklet *tasklet_ctx = cq->tasklet_ctx.priv; + + spin_lock_irqsave(&tasklet_ctx->lock, flags); + /* When migrating CQs between EQs will be implemented, please note + * that you need to sync this point. It is possible that + * while migrating a CQ, completions on the old EQs could + * still arrive. + */ + if (list_empty_careful(&cq->tasklet_ctx.list)) { + mlx5_cq_hold(cq); + list_add_tail(&cq->tasklet_ctx.list, &tasklet_ctx->list); + } + spin_unlock_irqrestore(&tasklet_ctx->lock, flags); +} + +/* Callers must verify outbox status in case of err */ +int mlx5_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, + u32 *in, int inlen, u32 *out, int outlen) +{ + int eqn = MLX5_GET(cqc, MLX5_ADDR_OF(create_cq_in, in, cq_context), + c_eqn_or_apu_element); + u32 din[MLX5_ST_SZ_DW(destroy_cq_in)] = {}; + struct mlx5_eq_comp *eq; + int err; + + eq = mlx5_eqn2comp_eq(dev, eqn); + if (IS_ERR(eq)) + return PTR_ERR(eq); + + memset(out, 0, outlen); + MLX5_SET(create_cq_in, in, opcode, MLX5_CMD_OP_CREATE_CQ); + err = mlx5_cmd_do(dev, in, inlen, out, outlen); + if (err) + return err; + + cq->cqn = MLX5_GET(create_cq_out, out, cqn); + cq->cons_index = 0; + cq->arm_sn = 0; + cq->eq = eq; + cq->uid = MLX5_GET(create_cq_in, in, uid); + refcount_set(&cq->refcount, 1); + init_completion(&cq->free); + if (!cq->comp) + cq->comp = mlx5_add_cq_to_tasklet; + /* assuming CQ will be deleted before the EQ */ + cq->tasklet_ctx.priv = &eq->tasklet_ctx; + INIT_LIST_HEAD(&cq->tasklet_ctx.list); + + /* Add to comp EQ CQ tree to recv comp events */ + err = mlx5_eq_add_cq(&eq->core, cq); + if (err) + goto err_cmd; + + /* Add to async EQ CQ tree to recv async events */ + err = mlx5_eq_add_cq(mlx5_get_async_eq(dev), cq); + if (err) + goto err_cq_add; + + cq->pid = current->pid; + err = mlx5_debug_cq_add(dev, cq); + if (err) + mlx5_core_dbg(dev, "failed adding CP 0x%x to debug file system\n", + cq->cqn); + + cq->uar = dev->priv.uar; + cq->irqn = eq->core.irqn; + + return 0; + +err_cq_add: + mlx5_eq_del_cq(&eq->core, cq); +err_cmd: + MLX5_SET(destroy_cq_in, din, opcode, MLX5_CMD_OP_DESTROY_CQ); + MLX5_SET(destroy_cq_in, din, cqn, cq->cqn); + MLX5_SET(destroy_cq_in, din, uid, cq->uid); + mlx5_cmd_exec_in(dev, destroy_cq, din); + return err; +} +EXPORT_SYMBOL(mlx5_create_cq); + +/* oubox is checked and err val is normalized */ +int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, + u32 *in, int inlen, u32 *out, int outlen) +{ + int err = mlx5_create_cq(dev, cq, in, inlen, out, outlen); + + return mlx5_cmd_check(dev, err, in, out); +} +EXPORT_SYMBOL(mlx5_core_create_cq); + +int mlx5_core_destroy_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq) +{ + u32 in[MLX5_ST_SZ_DW(destroy_cq_in)] = {}; + int err; + + mlx5_debug_cq_remove(dev, cq); + + mlx5_eq_del_cq(mlx5_get_async_eq(dev), cq); + mlx5_eq_del_cq(&cq->eq->core, cq); + + MLX5_SET(destroy_cq_in, in, opcode, MLX5_CMD_OP_DESTROY_CQ); + MLX5_SET(destroy_cq_in, in, cqn, cq->cqn); + MLX5_SET(destroy_cq_in, in, uid, cq->uid); + err = mlx5_cmd_exec_in(dev, destroy_cq, in); + if (err) + return err; + + synchronize_irq(cq->irqn); + mlx5_cq_put(cq); + wait_for_completion(&cq->free); + + return 0; +} +EXPORT_SYMBOL(mlx5_core_destroy_cq); + +int mlx5_core_query_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, + u32 *out) +{ + u32 in[MLX5_ST_SZ_DW(query_cq_in)] = {}; + + MLX5_SET(query_cq_in, in, opcode, MLX5_CMD_OP_QUERY_CQ); + MLX5_SET(query_cq_in, in, cqn, cq->cqn); + return mlx5_cmd_exec_inout(dev, query_cq, in, out); +} +EXPORT_SYMBOL(mlx5_core_query_cq); + +int mlx5_core_modify_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, + u32 *in, int inlen) +{ + u32 out[MLX5_ST_SZ_DW(modify_cq_out)] = {}; + + MLX5_SET(modify_cq_in, in, opcode, MLX5_CMD_OP_MODIFY_CQ); + MLX5_SET(modify_cq_in, in, uid, cq->uid); + return mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); +} +EXPORT_SYMBOL(mlx5_core_modify_cq); + +int mlx5_core_modify_cq_moderation(struct mlx5_core_dev *dev, + struct mlx5_core_cq *cq, + u16 cq_period, + u16 cq_max_count) +{ + u32 in[MLX5_ST_SZ_DW(modify_cq_in)] = {}; + void *cqc; + + MLX5_SET(modify_cq_in, in, cqn, cq->cqn); + cqc = MLX5_ADDR_OF(modify_cq_in, in, cq_context); + MLX5_SET(cqc, cqc, cq_period, cq_period); + MLX5_SET(cqc, cqc, cq_max_count, cq_max_count); + MLX5_SET(modify_cq_in, in, + modify_field_select_resize_field_select.modify_field_select.modify_field_select, + MLX5_CQ_MODIFY_PERIOD | MLX5_CQ_MODIFY_COUNT); + + return mlx5_core_modify_cq(dev, cq, in, sizeof(in)); +} +EXPORT_SYMBOL(mlx5_core_modify_cq_moderation); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c b/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c new file mode 100644 index 000000000..e0b0729e2 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c @@ -0,0 +1,574 @@ +/* + * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/debugfs.h> +#include <linux/mlx5/qp.h> +#include <linux/mlx5/cq.h> +#include <linux/mlx5/driver.h> +#include "mlx5_core.h" +#include "lib/eq.h" + +enum { + QP_PID, + QP_STATE, + QP_XPORT, + QP_MTU, + QP_N_RECV, + QP_RECV_SZ, + QP_N_SEND, + QP_LOG_PG_SZ, + QP_RQPN, +}; + +static char *qp_fields[] = { + [QP_PID] = "pid", + [QP_STATE] = "state", + [QP_XPORT] = "transport", + [QP_MTU] = "mtu", + [QP_N_RECV] = "num_recv", + [QP_RECV_SZ] = "rcv_wqe_sz", + [QP_N_SEND] = "num_send", + [QP_LOG_PG_SZ] = "log2_page_sz", + [QP_RQPN] = "remote_qpn", +}; + +enum { + EQ_NUM_EQES, + EQ_INTR, + EQ_LOG_PG_SZ, +}; + +static char *eq_fields[] = { + [EQ_NUM_EQES] = "num_eqes", + [EQ_INTR] = "intr", + [EQ_LOG_PG_SZ] = "log_page_size", +}; + +enum { + CQ_PID, + CQ_NUM_CQES, + CQ_LOG_PG_SZ, +}; + +static char *cq_fields[] = { + [CQ_PID] = "pid", + [CQ_NUM_CQES] = "num_cqes", + [CQ_LOG_PG_SZ] = "log_page_size", +}; + +struct dentry *mlx5_debugfs_root; +EXPORT_SYMBOL(mlx5_debugfs_root); + +void mlx5_register_debugfs(void) +{ + mlx5_debugfs_root = debugfs_create_dir("mlx5", NULL); +} + +void mlx5_unregister_debugfs(void) +{ + debugfs_remove(mlx5_debugfs_root); +} + +struct dentry *mlx5_debugfs_get_dev_root(struct mlx5_core_dev *dev) +{ + return dev->priv.dbg.dbg_root; +} +EXPORT_SYMBOL(mlx5_debugfs_get_dev_root); + +void mlx5_qp_debugfs_init(struct mlx5_core_dev *dev) +{ + dev->priv.dbg.qp_debugfs = debugfs_create_dir("QPs", dev->priv.dbg.dbg_root); +} +EXPORT_SYMBOL(mlx5_qp_debugfs_init); + +void mlx5_qp_debugfs_cleanup(struct mlx5_core_dev *dev) +{ + debugfs_remove_recursive(dev->priv.dbg.qp_debugfs); +} +EXPORT_SYMBOL(mlx5_qp_debugfs_cleanup); + +void mlx5_eq_debugfs_init(struct mlx5_core_dev *dev) +{ + dev->priv.dbg.eq_debugfs = debugfs_create_dir("EQs", dev->priv.dbg.dbg_root); +} + +void mlx5_eq_debugfs_cleanup(struct mlx5_core_dev *dev) +{ + debugfs_remove_recursive(dev->priv.dbg.eq_debugfs); +} + +static ssize_t average_read(struct file *filp, char __user *buf, size_t count, + loff_t *pos) +{ + struct mlx5_cmd_stats *stats; + u64 field = 0; + int ret; + char tbuf[22]; + + stats = filp->private_data; + spin_lock_irq(&stats->lock); + if (stats->n) + field = div64_u64(stats->sum, stats->n); + spin_unlock_irq(&stats->lock); + ret = snprintf(tbuf, sizeof(tbuf), "%llu\n", field); + return simple_read_from_buffer(buf, count, pos, tbuf, ret); +} + +static ssize_t average_write(struct file *filp, const char __user *buf, + size_t count, loff_t *pos) +{ + struct mlx5_cmd_stats *stats; + + stats = filp->private_data; + spin_lock_irq(&stats->lock); + stats->sum = 0; + stats->n = 0; + spin_unlock_irq(&stats->lock); + + *pos += count; + + return count; +} + +static const struct file_operations stats_fops = { + .owner = THIS_MODULE, + .open = simple_open, + .read = average_read, + .write = average_write, +}; + +static ssize_t slots_read(struct file *filp, char __user *buf, size_t count, + loff_t *pos) +{ + struct mlx5_cmd *cmd; + char tbuf[6]; + int weight; + int field; + int ret; + + cmd = filp->private_data; + weight = bitmap_weight(&cmd->vars.bitmask, cmd->vars.max_reg_cmds); + field = cmd->vars.max_reg_cmds - weight; + ret = snprintf(tbuf, sizeof(tbuf), "%d\n", field); + return simple_read_from_buffer(buf, count, pos, tbuf, ret); +} + +static const struct file_operations slots_fops = { + .owner = THIS_MODULE, + .open = simple_open, + .read = slots_read, +}; + +void mlx5_cmdif_debugfs_init(struct mlx5_core_dev *dev) +{ + struct mlx5_cmd_stats *stats; + struct dentry **cmd; + const char *namep; + int i; + + cmd = &dev->priv.dbg.cmdif_debugfs; + *cmd = debugfs_create_dir("commands", dev->priv.dbg.dbg_root); + + debugfs_create_file("slots_inuse", 0400, *cmd, &dev->cmd, &slots_fops); + + for (i = 0; i < MLX5_CMD_OP_MAX; i++) { + stats = &dev->cmd.stats[i]; + namep = mlx5_command_str(i); + if (strcmp(namep, "unknown command opcode")) { + stats->root = debugfs_create_dir(namep, *cmd); + + debugfs_create_file("average", 0400, stats->root, stats, + &stats_fops); + debugfs_create_u64("n", 0400, stats->root, &stats->n); + debugfs_create_u64("failed", 0400, stats->root, &stats->failed); + debugfs_create_u64("failed_mbox_status", 0400, stats->root, + &stats->failed_mbox_status); + debugfs_create_u32("last_failed_errno", 0400, stats->root, + &stats->last_failed_errno); + debugfs_create_u8("last_failed_mbox_status", 0400, stats->root, + &stats->last_failed_mbox_status); + debugfs_create_x32("last_failed_syndrome", 0400, stats->root, + &stats->last_failed_syndrome); + } + } +} + +void mlx5_cmdif_debugfs_cleanup(struct mlx5_core_dev *dev) +{ + debugfs_remove_recursive(dev->priv.dbg.cmdif_debugfs); +} + +void mlx5_cq_debugfs_init(struct mlx5_core_dev *dev) +{ + dev->priv.dbg.cq_debugfs = debugfs_create_dir("CQs", dev->priv.dbg.dbg_root); +} + +void mlx5_cq_debugfs_cleanup(struct mlx5_core_dev *dev) +{ + debugfs_remove_recursive(dev->priv.dbg.cq_debugfs); +} + +void mlx5_pages_debugfs_init(struct mlx5_core_dev *dev) +{ + struct dentry *pages; + + dev->priv.dbg.pages_debugfs = debugfs_create_dir("pages", dev->priv.dbg.dbg_root); + pages = dev->priv.dbg.pages_debugfs; + + debugfs_create_u32("fw_pages_total", 0400, pages, &dev->priv.fw_pages); + debugfs_create_u32("fw_pages_vfs", 0400, pages, &dev->priv.page_counters[MLX5_VF]); + debugfs_create_u32("fw_pages_sfs", 0400, pages, &dev->priv.page_counters[MLX5_SF]); + debugfs_create_u32("fw_pages_host_pf", 0400, pages, &dev->priv.page_counters[MLX5_HOST_PF]); + debugfs_create_u32("fw_pages_alloc_failed", 0400, pages, &dev->priv.fw_pages_alloc_failed); + debugfs_create_u32("fw_pages_give_dropped", 0400, pages, &dev->priv.give_pages_dropped); + debugfs_create_u32("fw_pages_reclaim_discard", 0400, pages, + &dev->priv.reclaim_pages_discard); +} + +void mlx5_pages_debugfs_cleanup(struct mlx5_core_dev *dev) +{ + debugfs_remove_recursive(dev->priv.dbg.pages_debugfs); +} + +static u64 qp_read_field(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp, + int index, int *is_str) +{ + int outlen = MLX5_ST_SZ_BYTES(query_qp_out); + u32 in[MLX5_ST_SZ_DW(query_qp_in)] = {}; + u64 param = 0; + u32 *out; + int state; + u32 *qpc; + int err; + + out = kzalloc(outlen, GFP_KERNEL); + if (!out) + return 0; + + MLX5_SET(query_qp_in, in, opcode, MLX5_CMD_OP_QUERY_QP); + MLX5_SET(query_qp_in, in, qpn, qp->qpn); + err = mlx5_cmd_exec_inout(dev, query_qp, in, out); + if (err) + goto out; + + *is_str = 0; + + qpc = MLX5_ADDR_OF(query_qp_out, out, qpc); + switch (index) { + case QP_PID: + param = qp->pid; + break; + case QP_STATE: + state = MLX5_GET(qpc, qpc, state); + param = (unsigned long)mlx5_qp_state_str(state); + *is_str = 1; + break; + case QP_XPORT: + param = (unsigned long)mlx5_qp_type_str(MLX5_GET(qpc, qpc, st)); + *is_str = 1; + break; + case QP_MTU: + switch (MLX5_GET(qpc, qpc, mtu)) { + case IB_MTU_256: + param = 256; + break; + case IB_MTU_512: + param = 512; + break; + case IB_MTU_1024: + param = 1024; + break; + case IB_MTU_2048: + param = 2048; + break; + case IB_MTU_4096: + param = 4096; + break; + default: + param = 0; + } + break; + case QP_N_RECV: + param = 1 << MLX5_GET(qpc, qpc, log_rq_size); + break; + case QP_RECV_SZ: + param = 1 << (MLX5_GET(qpc, qpc, log_rq_stride) + 4); + break; + case QP_N_SEND: + if (!MLX5_GET(qpc, qpc, no_sq)) + param = 1 << MLX5_GET(qpc, qpc, log_sq_size); + break; + case QP_LOG_PG_SZ: + param = MLX5_GET(qpc, qpc, log_page_size) + 12; + break; + case QP_RQPN: + param = MLX5_GET(qpc, qpc, remote_qpn); + break; + } +out: + kfree(out); + return param; +} + +static u64 eq_read_field(struct mlx5_core_dev *dev, struct mlx5_eq *eq, + int index) +{ + int outlen = MLX5_ST_SZ_BYTES(query_eq_out); + u32 in[MLX5_ST_SZ_DW(query_eq_in)] = {}; + u64 param = 0; + void *ctx; + u32 *out; + int err; + + out = kzalloc(outlen, GFP_KERNEL); + if (!out) + return param; + + MLX5_SET(query_eq_in, in, opcode, MLX5_CMD_OP_QUERY_EQ); + MLX5_SET(query_eq_in, in, eq_number, eq->eqn); + err = mlx5_cmd_exec_inout(dev, query_eq, in, out); + if (err) { + mlx5_core_warn(dev, "failed to query eq\n"); + goto out; + } + ctx = MLX5_ADDR_OF(query_eq_out, out, eq_context_entry); + + switch (index) { + case EQ_NUM_EQES: + param = 1 << MLX5_GET(eqc, ctx, log_eq_size); + break; + case EQ_INTR: + param = MLX5_GET(eqc, ctx, intr); + break; + case EQ_LOG_PG_SZ: + param = MLX5_GET(eqc, ctx, log_page_size) + 12; + break; + } + +out: + kfree(out); + return param; +} + +static u64 cq_read_field(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, + int index) +{ + int outlen = MLX5_ST_SZ_BYTES(query_cq_out); + u64 param = 0; + void *ctx; + u32 *out; + int err; + + out = kvzalloc(outlen, GFP_KERNEL); + if (!out) + return param; + + err = mlx5_core_query_cq(dev, cq, out); + if (err) { + mlx5_core_warn(dev, "failed to query cq\n"); + goto out; + } + ctx = MLX5_ADDR_OF(query_cq_out, out, cq_context); + + switch (index) { + case CQ_PID: + param = cq->pid; + break; + case CQ_NUM_CQES: + param = 1 << MLX5_GET(cqc, ctx, log_cq_size); + break; + case CQ_LOG_PG_SZ: + param = MLX5_GET(cqc, ctx, log_page_size); + break; + } + +out: + kvfree(out); + return param; +} + +static ssize_t dbg_read(struct file *filp, char __user *buf, size_t count, + loff_t *pos) +{ + struct mlx5_field_desc *desc; + struct mlx5_rsc_debug *d; + char tbuf[18]; + int is_str = 0; + u64 field; + int ret; + + desc = filp->private_data; + d = (void *)(desc - desc->i) - sizeof(*d); + switch (d->type) { + case MLX5_DBG_RSC_QP: + field = qp_read_field(d->dev, d->object, desc->i, &is_str); + break; + + case MLX5_DBG_RSC_EQ: + field = eq_read_field(d->dev, d->object, desc->i); + break; + + case MLX5_DBG_RSC_CQ: + field = cq_read_field(d->dev, d->object, desc->i); + break; + + default: + mlx5_core_warn(d->dev, "invalid resource type %d\n", d->type); + return -EINVAL; + } + + if (is_str) + ret = snprintf(tbuf, sizeof(tbuf), "%s\n", (const char *)(unsigned long)field); + else + ret = snprintf(tbuf, sizeof(tbuf), "0x%llx\n", field); + + return simple_read_from_buffer(buf, count, pos, tbuf, ret); +} + +static const struct file_operations fops = { + .owner = THIS_MODULE, + .open = simple_open, + .read = dbg_read, +}; + +static int add_res_tree(struct mlx5_core_dev *dev, enum dbg_rsc_type type, + struct dentry *root, struct mlx5_rsc_debug **dbg, + int rsn, char **field, int nfile, void *data) +{ + struct mlx5_rsc_debug *d; + char resn[32]; + int i; + + d = kzalloc(struct_size(d, fields, nfile), GFP_KERNEL); + if (!d) + return -ENOMEM; + + d->dev = dev; + d->object = data; + d->type = type; + sprintf(resn, "0x%x", rsn); + d->root = debugfs_create_dir(resn, root); + + for (i = 0; i < nfile; i++) { + d->fields[i].i = i; + debugfs_create_file(field[i], 0400, d->root, &d->fields[i], + &fops); + } + *dbg = d; + + return 0; +} + +static void rem_res_tree(struct mlx5_rsc_debug *d) +{ + debugfs_remove_recursive(d->root); + kfree(d); +} + +int mlx5_debug_qp_add(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp) +{ + int err; + + if (!mlx5_debugfs_root) + return 0; + + err = add_res_tree(dev, MLX5_DBG_RSC_QP, dev->priv.dbg.qp_debugfs, + &qp->dbg, qp->qpn, qp_fields, + ARRAY_SIZE(qp_fields), qp); + if (err) + qp->dbg = NULL; + + return err; +} +EXPORT_SYMBOL(mlx5_debug_qp_add); + +void mlx5_debug_qp_remove(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp) +{ + if (!mlx5_debugfs_root) + return; + + if (qp->dbg) + rem_res_tree(qp->dbg); +} +EXPORT_SYMBOL(mlx5_debug_qp_remove); + +int mlx5_debug_eq_add(struct mlx5_core_dev *dev, struct mlx5_eq *eq) +{ + int err; + + if (!mlx5_debugfs_root) + return 0; + + err = add_res_tree(dev, MLX5_DBG_RSC_EQ, dev->priv.dbg.eq_debugfs, + &eq->dbg, eq->eqn, eq_fields, + ARRAY_SIZE(eq_fields), eq); + if (err) + eq->dbg = NULL; + + return err; +} + +void mlx5_debug_eq_remove(struct mlx5_core_dev *dev, struct mlx5_eq *eq) +{ + if (!mlx5_debugfs_root) + return; + + if (eq->dbg) + rem_res_tree(eq->dbg); +} + +int mlx5_debug_cq_add(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq) +{ + int err; + + if (!mlx5_debugfs_root) + return 0; + + err = add_res_tree(dev, MLX5_DBG_RSC_CQ, dev->priv.dbg.cq_debugfs, + &cq->dbg, cq->cqn, cq_fields, + ARRAY_SIZE(cq_fields), cq); + if (err) + cq->dbg = NULL; + + return err; +} + +void mlx5_debug_cq_remove(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq) +{ + if (!mlx5_debugfs_root) + return; + + if (cq->dbg) { + rem_res_tree(cq->dbg); + cq->dbg = NULL; + } +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/dev.c new file mode 100644 index 000000000..02bb9d43f --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/dev.c @@ -0,0 +1,648 @@ +/* + * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/mlx5/driver.h> +#include <linux/mlx5/eswitch.h> +#include <linux/mlx5/mlx5_ifc_vdpa.h> +#include <linux/mlx5/vport.h> +#include "mlx5_core.h" + +/* intf dev list mutex */ +static DEFINE_MUTEX(mlx5_intf_mutex); +static DEFINE_IDA(mlx5_adev_ida); + +static bool is_eth_rep_supported(struct mlx5_core_dev *dev) +{ + if (!IS_ENABLED(CONFIG_MLX5_ESWITCH)) + return false; + + if (!MLX5_ESWITCH_MANAGER(dev)) + return false; + + if (!is_mdev_switchdev_mode(dev)) + return false; + + return true; +} + +bool mlx5_eth_supported(struct mlx5_core_dev *dev) +{ + if (!IS_ENABLED(CONFIG_MLX5_CORE_EN)) + return false; + + if (MLX5_CAP_GEN(dev, port_type) != MLX5_CAP_PORT_TYPE_ETH) + return false; + + if (!MLX5_CAP_GEN(dev, eth_net_offloads)) { + mlx5_core_warn(dev, "Missing eth_net_offloads capability\n"); + return false; + } + + if (!MLX5_CAP_GEN(dev, nic_flow_table)) { + mlx5_core_warn(dev, "Missing nic_flow_table capability\n"); + return false; + } + + if (!MLX5_CAP_ETH(dev, csum_cap)) { + mlx5_core_warn(dev, "Missing csum_cap capability\n"); + return false; + } + + if (!MLX5_CAP_ETH(dev, max_lso_cap)) { + mlx5_core_warn(dev, "Missing max_lso_cap capability\n"); + return false; + } + + if (!MLX5_CAP_ETH(dev, vlan_cap)) { + mlx5_core_warn(dev, "Missing vlan_cap capability\n"); + return false; + } + + if (!MLX5_CAP_ETH(dev, rss_ind_tbl_cap)) { + mlx5_core_warn(dev, "Missing rss_ind_tbl_cap capability\n"); + return false; + } + + if (MLX5_CAP_FLOWTABLE(dev, + flow_table_properties_nic_receive.max_ft_level) < 3) { + mlx5_core_warn(dev, "max_ft_level < 3\n"); + return false; + } + + if (!MLX5_CAP_ETH(dev, self_lb_en_modifiable)) + mlx5_core_warn(dev, "Self loop back prevention is not supported\n"); + if (!MLX5_CAP_GEN(dev, cq_moderation)) + mlx5_core_warn(dev, "CQ moderation is not supported\n"); + + return true; +} + +static bool is_eth_enabled(struct mlx5_core_dev *dev) +{ + union devlink_param_value val; + int err; + + err = devlink_param_driverinit_value_get(priv_to_devlink(dev), + DEVLINK_PARAM_GENERIC_ID_ENABLE_ETH, + &val); + return err ? false : val.vbool; +} + +bool mlx5_vnet_supported(struct mlx5_core_dev *dev) +{ + if (!IS_ENABLED(CONFIG_MLX5_VDPA_NET)) + return false; + + if (mlx5_core_is_pf(dev)) + return false; + + if (!(MLX5_CAP_GEN_64(dev, general_obj_types) & + MLX5_GENERAL_OBJ_TYPES_CAP_VIRTIO_NET_Q)) + return false; + + if (!(MLX5_CAP_DEV_VDPA_EMULATION(dev, event_mode) & + MLX5_VIRTIO_Q_EVENT_MODE_QP_MODE)) + return false; + + if (!MLX5_CAP_DEV_VDPA_EMULATION(dev, eth_frame_offload_type)) + return false; + + return true; +} + +static bool is_vnet_enabled(struct mlx5_core_dev *dev) +{ + union devlink_param_value val; + int err; + + err = devlink_param_driverinit_value_get(priv_to_devlink(dev), + DEVLINK_PARAM_GENERIC_ID_ENABLE_VNET, + &val); + return err ? false : val.vbool; +} + +static bool is_ib_rep_supported(struct mlx5_core_dev *dev) +{ + if (!IS_ENABLED(CONFIG_MLX5_INFINIBAND)) + return false; + + if (dev->priv.flags & MLX5_PRIV_FLAGS_DISABLE_IB_ADEV) + return false; + + if (!is_eth_rep_supported(dev)) + return false; + + if (!MLX5_ESWITCH_MANAGER(dev)) + return false; + + if (!is_mdev_switchdev_mode(dev)) + return false; + + if (mlx5_core_mp_enabled(dev)) + return false; + + return true; +} + +static bool is_mp_supported(struct mlx5_core_dev *dev) +{ + if (!IS_ENABLED(CONFIG_MLX5_INFINIBAND)) + return false; + + if (dev->priv.flags & MLX5_PRIV_FLAGS_DISABLE_IB_ADEV) + return false; + + if (is_ib_rep_supported(dev)) + return false; + + if (MLX5_CAP_GEN(dev, port_type) != MLX5_CAP_PORT_TYPE_ETH) + return false; + + if (!mlx5_core_is_mp_slave(dev)) + return false; + + return true; +} + +bool mlx5_rdma_supported(struct mlx5_core_dev *dev) +{ + if (!IS_ENABLED(CONFIG_MLX5_INFINIBAND)) + return false; + + if (dev->priv.flags & MLX5_PRIV_FLAGS_DISABLE_IB_ADEV) + return false; + + if (is_ib_rep_supported(dev)) + return false; + + if (is_mp_supported(dev)) + return false; + + return true; +} + +static bool is_ib_enabled(struct mlx5_core_dev *dev) +{ + union devlink_param_value val; + int err; + + err = devlink_param_driverinit_value_get(priv_to_devlink(dev), + DEVLINK_PARAM_GENERIC_ID_ENABLE_RDMA, + &val); + return err ? false : val.vbool; +} + +enum { + MLX5_INTERFACE_PROTOCOL_ETH, + MLX5_INTERFACE_PROTOCOL_ETH_REP, + + MLX5_INTERFACE_PROTOCOL_IB, + MLX5_INTERFACE_PROTOCOL_IB_REP, + MLX5_INTERFACE_PROTOCOL_MPIB, + + MLX5_INTERFACE_PROTOCOL_VNET, +}; + +static const struct mlx5_adev_device { + const char *suffix; + bool (*is_supported)(struct mlx5_core_dev *dev); + bool (*is_enabled)(struct mlx5_core_dev *dev); +} mlx5_adev_devices[] = { + [MLX5_INTERFACE_PROTOCOL_VNET] = { .suffix = "vnet", + .is_supported = &mlx5_vnet_supported, + .is_enabled = &is_vnet_enabled }, + [MLX5_INTERFACE_PROTOCOL_IB] = { .suffix = "rdma", + .is_supported = &mlx5_rdma_supported, + .is_enabled = &is_ib_enabled }, + [MLX5_INTERFACE_PROTOCOL_ETH] = { .suffix = "eth", + .is_supported = &mlx5_eth_supported, + .is_enabled = &is_eth_enabled }, + [MLX5_INTERFACE_PROTOCOL_ETH_REP] = { .suffix = "eth-rep", + .is_supported = &is_eth_rep_supported }, + [MLX5_INTERFACE_PROTOCOL_IB_REP] = { .suffix = "rdma-rep", + .is_supported = &is_ib_rep_supported }, + [MLX5_INTERFACE_PROTOCOL_MPIB] = { .suffix = "multiport", + .is_supported = &is_mp_supported }, +}; + +int mlx5_adev_idx_alloc(void) +{ + return ida_alloc(&mlx5_adev_ida, GFP_KERNEL); +} + +void mlx5_adev_idx_free(int idx) +{ + ida_free(&mlx5_adev_ida, idx); +} + +int mlx5_adev_init(struct mlx5_core_dev *dev) +{ + struct mlx5_priv *priv = &dev->priv; + + priv->adev = kcalloc(ARRAY_SIZE(mlx5_adev_devices), + sizeof(struct mlx5_adev *), GFP_KERNEL); + if (!priv->adev) + return -ENOMEM; + + return 0; +} + +void mlx5_adev_cleanup(struct mlx5_core_dev *dev) +{ + struct mlx5_priv *priv = &dev->priv; + + kfree(priv->adev); +} + +static void adev_release(struct device *dev) +{ + struct mlx5_adev *mlx5_adev = + container_of(dev, struct mlx5_adev, adev.dev); + struct mlx5_priv *priv = &mlx5_adev->mdev->priv; + int idx = mlx5_adev->idx; + + kfree(mlx5_adev); + priv->adev[idx] = NULL; +} + +static struct mlx5_adev *add_adev(struct mlx5_core_dev *dev, int idx) +{ + const char *suffix = mlx5_adev_devices[idx].suffix; + struct auxiliary_device *adev; + struct mlx5_adev *madev; + int ret; + + madev = kzalloc(sizeof(*madev), GFP_KERNEL); + if (!madev) + return ERR_PTR(-ENOMEM); + + adev = &madev->adev; + adev->id = dev->priv.adev_idx; + adev->name = suffix; + adev->dev.parent = dev->device; + adev->dev.release = adev_release; + madev->mdev = dev; + madev->idx = idx; + + ret = auxiliary_device_init(adev); + if (ret) { + kfree(madev); + return ERR_PTR(ret); + } + + ret = auxiliary_device_add(adev); + if (ret) { + auxiliary_device_uninit(adev); + return ERR_PTR(ret); + } + return madev; +} + +static void del_adev(struct auxiliary_device *adev) +{ + auxiliary_device_delete(adev); + auxiliary_device_uninit(adev); +} + +int mlx5_attach_device(struct mlx5_core_dev *dev) +{ + struct mlx5_priv *priv = &dev->priv; + struct auxiliary_device *adev; + struct auxiliary_driver *adrv; + int ret = 0, i; + + devl_assert_locked(priv_to_devlink(dev)); + mutex_lock(&mlx5_intf_mutex); + priv->flags &= ~MLX5_PRIV_FLAGS_DETACH; + priv->flags |= MLX5_PRIV_FLAGS_MLX5E_LOCKED_FLOW; + for (i = 0; i < ARRAY_SIZE(mlx5_adev_devices); i++) { + if (!priv->adev[i]) { + bool is_supported = false; + + if (mlx5_adev_devices[i].is_enabled) { + bool enabled; + + enabled = mlx5_adev_devices[i].is_enabled(dev); + if (!enabled) + continue; + } + + if (mlx5_adev_devices[i].is_supported) + is_supported = mlx5_adev_devices[i].is_supported(dev); + + if (!is_supported) + continue; + + priv->adev[i] = add_adev(dev, i); + if (IS_ERR(priv->adev[i])) { + ret = PTR_ERR(priv->adev[i]); + priv->adev[i] = NULL; + } + } else { + adev = &priv->adev[i]->adev; + + /* Pay attention that this is not PCI driver that + * mlx5_core_dev is connected, but auxiliary driver. + * + * Here we can race of module unload with devlink + * reload, but we don't need to take extra lock because + * we are holding global mlx5_intf_mutex. + */ + if (!adev->dev.driver) + continue; + adrv = to_auxiliary_drv(adev->dev.driver); + + if (adrv->resume) + ret = adrv->resume(adev); + } + if (ret) { + mlx5_core_warn(dev, "Device[%d] (%s) failed to load\n", + i, mlx5_adev_devices[i].suffix); + + break; + } + } + priv->flags &= ~MLX5_PRIV_FLAGS_MLX5E_LOCKED_FLOW; + mutex_unlock(&mlx5_intf_mutex); + return ret; +} + +void mlx5_detach_device(struct mlx5_core_dev *dev, bool suspend) +{ + struct mlx5_priv *priv = &dev->priv; + struct auxiliary_device *adev; + struct auxiliary_driver *adrv; + pm_message_t pm = {}; + int i; + + devl_assert_locked(priv_to_devlink(dev)); + mutex_lock(&mlx5_intf_mutex); + priv->flags |= MLX5_PRIV_FLAGS_MLX5E_LOCKED_FLOW; + for (i = ARRAY_SIZE(mlx5_adev_devices) - 1; i >= 0; i--) { + if (!priv->adev[i]) + continue; + + if (mlx5_adev_devices[i].is_enabled) { + bool enabled; + + enabled = mlx5_adev_devices[i].is_enabled(dev); + if (!enabled) + goto skip_suspend; + } + + adev = &priv->adev[i]->adev; + /* Auxiliary driver was unbind manually through sysfs */ + if (!adev->dev.driver) + goto skip_suspend; + + adrv = to_auxiliary_drv(adev->dev.driver); + + if (adrv->suspend && suspend) { + adrv->suspend(adev, pm); + continue; + } + +skip_suspend: + del_adev(&priv->adev[i]->adev); + priv->adev[i] = NULL; + } + priv->flags &= ~MLX5_PRIV_FLAGS_MLX5E_LOCKED_FLOW; + priv->flags |= MLX5_PRIV_FLAGS_DETACH; + mutex_unlock(&mlx5_intf_mutex); +} + +int mlx5_register_device(struct mlx5_core_dev *dev) +{ + int ret; + + devl_assert_locked(priv_to_devlink(dev)); + mutex_lock(&mlx5_intf_mutex); + dev->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV; + ret = mlx5_rescan_drivers_locked(dev); + mutex_unlock(&mlx5_intf_mutex); + if (ret) + mlx5_unregister_device(dev); + + return ret; +} + +void mlx5_unregister_device(struct mlx5_core_dev *dev) +{ + devl_assert_locked(priv_to_devlink(dev)); + mutex_lock(&mlx5_intf_mutex); + dev->priv.flags = MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV; + mlx5_rescan_drivers_locked(dev); + mutex_unlock(&mlx5_intf_mutex); +} + +static int add_drivers(struct mlx5_core_dev *dev) +{ + struct mlx5_priv *priv = &dev->priv; + int i, ret = 0; + + for (i = 0; i < ARRAY_SIZE(mlx5_adev_devices); i++) { + bool is_supported = false; + + if (priv->adev[i]) + continue; + + if (mlx5_adev_devices[i].is_supported) + is_supported = mlx5_adev_devices[i].is_supported(dev); + + if (!is_supported) + continue; + + priv->adev[i] = add_adev(dev, i); + if (IS_ERR(priv->adev[i])) { + mlx5_core_warn(dev, "Device[%d] (%s) failed to load\n", + i, mlx5_adev_devices[i].suffix); + /* We continue to rescan drivers and leave to the caller + * to make decision if to release everything or continue. + */ + ret = PTR_ERR(priv->adev[i]); + priv->adev[i] = NULL; + } + } + return ret; +} + +static void delete_drivers(struct mlx5_core_dev *dev) +{ + struct mlx5_priv *priv = &dev->priv; + bool delete_all; + int i; + + delete_all = priv->flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV; + + for (i = ARRAY_SIZE(mlx5_adev_devices) - 1; i >= 0; i--) { + bool is_supported = false; + + if (!priv->adev[i]) + continue; + + if (mlx5_adev_devices[i].is_enabled) { + bool enabled; + + enabled = mlx5_adev_devices[i].is_enabled(dev); + if (!enabled) + goto del_adev; + } + + if (mlx5_adev_devices[i].is_supported && !delete_all) + is_supported = mlx5_adev_devices[i].is_supported(dev); + + if (is_supported) + continue; + +del_adev: + del_adev(&priv->adev[i]->adev); + priv->adev[i] = NULL; + } +} + +/* This function is used after mlx5_core_dev is reconfigured. + */ +int mlx5_rescan_drivers_locked(struct mlx5_core_dev *dev) +{ + struct mlx5_priv *priv = &dev->priv; + int err = 0; + + lockdep_assert_held(&mlx5_intf_mutex); + if (priv->flags & MLX5_PRIV_FLAGS_DETACH) + return 0; + + priv->flags |= MLX5_PRIV_FLAGS_MLX5E_LOCKED_FLOW; + delete_drivers(dev); + if (priv->flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV) + goto out; + + err = add_drivers(dev); + +out: + priv->flags &= ~MLX5_PRIV_FLAGS_MLX5E_LOCKED_FLOW; + return err; +} + +bool mlx5_same_hw_devs(struct mlx5_core_dev *dev, struct mlx5_core_dev *peer_dev) +{ + u64 fsystem_guid, psystem_guid; + + fsystem_guid = mlx5_query_nic_system_image_guid(dev); + psystem_guid = mlx5_query_nic_system_image_guid(peer_dev); + + return (fsystem_guid && psystem_guid && fsystem_guid == psystem_guid); +} + +static u32 mlx5_gen_pci_id(const struct mlx5_core_dev *dev) +{ + return (u32)((pci_domain_nr(dev->pdev->bus) << 16) | + (dev->pdev->bus->number << 8) | + PCI_SLOT(dev->pdev->devfn)); +} + +static int _next_phys_dev(struct mlx5_core_dev *mdev, + const struct mlx5_core_dev *curr) +{ + if (!mlx5_core_is_pf(mdev)) + return 0; + + if (mdev == curr) + return 0; + + if (!mlx5_same_hw_devs(mdev, (struct mlx5_core_dev *)curr) && + mlx5_gen_pci_id(mdev) != mlx5_gen_pci_id(curr)) + return 0; + + return 1; +} + +static void *pci_get_other_drvdata(struct device *this, struct device *other) +{ + if (this->driver != other->driver) + return NULL; + + return pci_get_drvdata(to_pci_dev(other)); +} + +static int next_phys_dev_lag(struct device *dev, const void *data) +{ + struct mlx5_core_dev *mdev, *this = (struct mlx5_core_dev *)data; + + mdev = pci_get_other_drvdata(this->device, dev); + if (!mdev) + return 0; + + if (!MLX5_CAP_GEN(mdev, vport_group_manager) || + !MLX5_CAP_GEN(mdev, lag_master) || + (MLX5_CAP_GEN(mdev, num_lag_ports) > MLX5_MAX_PORTS || + MLX5_CAP_GEN(mdev, num_lag_ports) <= 1)) + return 0; + + return _next_phys_dev(mdev, data); +} + +static struct mlx5_core_dev *mlx5_get_next_dev(struct mlx5_core_dev *dev, + int (*match)(struct device *dev, const void *data)) +{ + struct device *next; + + if (!mlx5_core_is_pf(dev)) + return NULL; + + next = bus_find_device(&pci_bus_type, NULL, dev, match); + if (!next) + return NULL; + + put_device(next); + return pci_get_drvdata(to_pci_dev(next)); +} + +/* Must be called with intf_mutex held */ +struct mlx5_core_dev *mlx5_get_next_phys_dev_lag(struct mlx5_core_dev *dev) +{ + lockdep_assert_held(&mlx5_intf_mutex); + return mlx5_get_next_dev(dev, &next_phys_dev_lag); +} + +void mlx5_dev_list_lock(void) +{ + mutex_lock(&mlx5_intf_mutex); +} +void mlx5_dev_list_unlock(void) +{ + mutex_unlock(&mlx5_intf_mutex); +} + +int mlx5_dev_list_trylock(void) +{ + return mutex_trylock(&mlx5_intf_mutex); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c new file mode 100644 index 000000000..3749eb83d --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c @@ -0,0 +1,918 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2019 Mellanox Technologies */ + +#include <devlink.h> + +#include "mlx5_core.h" +#include "fw_reset.h" +#include "fs_core.h" +#include "eswitch.h" +#include "esw/qos.h" +#include "sf/dev/dev.h" +#include "sf/sf.h" + +static int mlx5_devlink_flash_update(struct devlink *devlink, + struct devlink_flash_update_params *params, + struct netlink_ext_ack *extack) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + + return mlx5_firmware_flash(dev, params->fw, extack); +} + +static u8 mlx5_fw_ver_major(u32 version) +{ + return (version >> 24) & 0xff; +} + +static u8 mlx5_fw_ver_minor(u32 version) +{ + return (version >> 16) & 0xff; +} + +static u16 mlx5_fw_ver_subminor(u32 version) +{ + return version & 0xffff; +} + +#define DEVLINK_FW_STRING_LEN 32 + +static int +mlx5_devlink_info_get(struct devlink *devlink, struct devlink_info_req *req, + struct netlink_ext_ack *extack) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + char version_str[DEVLINK_FW_STRING_LEN]; + u32 running_fw, stored_fw; + int err; + + err = devlink_info_driver_name_put(req, KBUILD_MODNAME); + if (err) + return err; + + err = devlink_info_version_fixed_put(req, "fw.psid", dev->board_id); + if (err) + return err; + + err = mlx5_fw_version_query(dev, &running_fw, &stored_fw); + if (err) + return err; + + snprintf(version_str, sizeof(version_str), "%d.%d.%04d", + mlx5_fw_ver_major(running_fw), mlx5_fw_ver_minor(running_fw), + mlx5_fw_ver_subminor(running_fw)); + err = devlink_info_version_running_put(req, "fw.version", version_str); + if (err) + return err; + err = devlink_info_version_running_put(req, + DEVLINK_INFO_VERSION_GENERIC_FW, + version_str); + if (err) + return err; + + /* no pending version, return running (stored) version */ + if (stored_fw == 0) + stored_fw = running_fw; + + snprintf(version_str, sizeof(version_str), "%d.%d.%04d", + mlx5_fw_ver_major(stored_fw), mlx5_fw_ver_minor(stored_fw), + mlx5_fw_ver_subminor(stored_fw)); + err = devlink_info_version_stored_put(req, "fw.version", version_str); + if (err) + return err; + return devlink_info_version_stored_put(req, + DEVLINK_INFO_VERSION_GENERIC_FW, + version_str); +} + +static int mlx5_devlink_reload_fw_activate(struct devlink *devlink, struct netlink_ext_ack *extack) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + u8 reset_level, reset_type, net_port_alive; + int err; + + err = mlx5_fw_reset_query(dev, &reset_level, &reset_type); + if (err) + return err; + if (!(reset_level & MLX5_MFRL_REG_RESET_LEVEL3)) { + NL_SET_ERR_MSG_MOD(extack, "FW activate requires reboot"); + return -EINVAL; + } + + net_port_alive = !!(reset_type & MLX5_MFRL_REG_RESET_TYPE_NET_PORT_ALIVE); + err = mlx5_fw_reset_set_reset_sync(dev, net_port_alive, extack); + if (err) + return err; + + err = mlx5_fw_reset_wait_reset_done(dev); + if (err) + return err; + + mlx5_unload_one_devl_locked(dev, true); + err = mlx5_health_wait_pci_up(dev); + if (err) + NL_SET_ERR_MSG_MOD(extack, "FW activate aborted, PCI reads fail after reset"); + + return err; +} + +static int mlx5_devlink_trigger_fw_live_patch(struct devlink *devlink, + struct netlink_ext_ack *extack) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + u8 reset_level; + int err; + + err = mlx5_fw_reset_query(dev, &reset_level, NULL); + if (err) + return err; + if (!(reset_level & MLX5_MFRL_REG_RESET_LEVEL0)) { + NL_SET_ERR_MSG_MOD(extack, + "FW upgrade to the stored FW can't be done by FW live patching"); + return -EINVAL; + } + + return mlx5_fw_reset_set_live_patch(dev); +} + +static int mlx5_devlink_reload_down(struct devlink *devlink, bool netns_change, + enum devlink_reload_action action, + enum devlink_reload_limit limit, + struct netlink_ext_ack *extack) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + struct pci_dev *pdev = dev->pdev; + bool sf_dev_allocated; + int ret = 0; + + sf_dev_allocated = mlx5_sf_dev_allocated(dev); + if (sf_dev_allocated) { + /* Reload results in deleting SF device which further results in + * unregistering devlink instance while holding devlink_mutext. + * Hence, do not support reload. + */ + NL_SET_ERR_MSG_MOD(extack, "reload is unsupported when SFs are allocated"); + return -EOPNOTSUPP; + } + + if (mlx5_lag_is_active(dev)) { + NL_SET_ERR_MSG_MOD(extack, "reload is unsupported in Lag mode"); + return -EOPNOTSUPP; + } + + if (pci_num_vf(pdev)) { + NL_SET_ERR_MSG_MOD(extack, "reload while VFs are present is unfavorable"); + } + + switch (action) { + case DEVLINK_RELOAD_ACTION_DRIVER_REINIT: + mlx5_unload_one_devl_locked(dev, false); + break; + case DEVLINK_RELOAD_ACTION_FW_ACTIVATE: + if (limit == DEVLINK_RELOAD_LIMIT_NO_RESET) + ret = mlx5_devlink_trigger_fw_live_patch(devlink, extack); + else + ret = mlx5_devlink_reload_fw_activate(devlink, extack); + break; + default: + /* Unsupported action should not get to this function */ + WARN_ON(1); + ret = -EOPNOTSUPP; + } + + return ret; +} + +static int mlx5_devlink_reload_up(struct devlink *devlink, enum devlink_reload_action action, + enum devlink_reload_limit limit, u32 *actions_performed, + struct netlink_ext_ack *extack) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + int ret = 0; + + *actions_performed = BIT(action); + switch (action) { + case DEVLINK_RELOAD_ACTION_DRIVER_REINIT: + ret = mlx5_load_one_devl_locked(dev, false); + break; + case DEVLINK_RELOAD_ACTION_FW_ACTIVATE: + if (limit == DEVLINK_RELOAD_LIMIT_NO_RESET) + break; + /* On fw_activate action, also driver is reloaded and reinit performed */ + *actions_performed |= BIT(DEVLINK_RELOAD_ACTION_DRIVER_REINIT); + ret = mlx5_load_one_devl_locked(dev, true); + break; + default: + /* Unsupported action should not get to this function */ + WARN_ON(1); + ret = -EOPNOTSUPP; + } + + return ret; +} + +static struct mlx5_devlink_trap *mlx5_find_trap_by_id(struct mlx5_core_dev *dev, int trap_id) +{ + struct mlx5_devlink_trap *dl_trap; + + list_for_each_entry(dl_trap, &dev->priv.traps, list) + if (dl_trap->trap.id == trap_id) + return dl_trap; + + return NULL; +} + +static int mlx5_devlink_trap_init(struct devlink *devlink, const struct devlink_trap *trap, + void *trap_ctx) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + struct mlx5_devlink_trap *dl_trap; + + dl_trap = kzalloc(sizeof(*dl_trap), GFP_KERNEL); + if (!dl_trap) + return -ENOMEM; + + dl_trap->trap.id = trap->id; + dl_trap->trap.action = DEVLINK_TRAP_ACTION_DROP; + dl_trap->item = trap_ctx; + + if (mlx5_find_trap_by_id(dev, trap->id)) { + kfree(dl_trap); + mlx5_core_err(dev, "Devlink trap: Trap 0x%x already found", trap->id); + return -EEXIST; + } + + list_add_tail(&dl_trap->list, &dev->priv.traps); + return 0; +} + +static void mlx5_devlink_trap_fini(struct devlink *devlink, const struct devlink_trap *trap, + void *trap_ctx) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + struct mlx5_devlink_trap *dl_trap; + + dl_trap = mlx5_find_trap_by_id(dev, trap->id); + if (!dl_trap) { + mlx5_core_err(dev, "Devlink trap: Missing trap id 0x%x", trap->id); + return; + } + list_del(&dl_trap->list); + kfree(dl_trap); +} + +static int mlx5_devlink_trap_action_set(struct devlink *devlink, + const struct devlink_trap *trap, + enum devlink_trap_action action, + struct netlink_ext_ack *extack) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + enum devlink_trap_action action_orig; + struct mlx5_devlink_trap *dl_trap; + int err = 0; + + if (is_mdev_switchdev_mode(dev)) { + NL_SET_ERR_MSG_MOD(extack, "Devlink traps can't be set in switchdev mode"); + return -EOPNOTSUPP; + } + + dl_trap = mlx5_find_trap_by_id(dev, trap->id); + if (!dl_trap) { + mlx5_core_err(dev, "Devlink trap: Set action on invalid trap id 0x%x", trap->id); + err = -EINVAL; + goto out; + } + + if (action != DEVLINK_TRAP_ACTION_DROP && action != DEVLINK_TRAP_ACTION_TRAP) { + err = -EOPNOTSUPP; + goto out; + } + + if (action == dl_trap->trap.action) + goto out; + + action_orig = dl_trap->trap.action; + dl_trap->trap.action = action; + err = mlx5_blocking_notifier_call_chain(dev, MLX5_DRIVER_EVENT_TYPE_TRAP, + &dl_trap->trap); + if (err) + dl_trap->trap.action = action_orig; +out: + return err; +} + +static const struct devlink_ops mlx5_devlink_ops = { +#ifdef CONFIG_MLX5_ESWITCH + .eswitch_mode_set = mlx5_devlink_eswitch_mode_set, + .eswitch_mode_get = mlx5_devlink_eswitch_mode_get, + .eswitch_inline_mode_set = mlx5_devlink_eswitch_inline_mode_set, + .eswitch_inline_mode_get = mlx5_devlink_eswitch_inline_mode_get, + .eswitch_encap_mode_set = mlx5_devlink_eswitch_encap_mode_set, + .eswitch_encap_mode_get = mlx5_devlink_eswitch_encap_mode_get, + .port_function_hw_addr_get = mlx5_devlink_port_function_hw_addr_get, + .port_function_hw_addr_set = mlx5_devlink_port_function_hw_addr_set, + .rate_leaf_tx_share_set = mlx5_esw_devlink_rate_leaf_tx_share_set, + .rate_leaf_tx_max_set = mlx5_esw_devlink_rate_leaf_tx_max_set, + .rate_node_tx_share_set = mlx5_esw_devlink_rate_node_tx_share_set, + .rate_node_tx_max_set = mlx5_esw_devlink_rate_node_tx_max_set, + .rate_node_new = mlx5_esw_devlink_rate_node_new, + .rate_node_del = mlx5_esw_devlink_rate_node_del, + .rate_leaf_parent_set = mlx5_esw_devlink_rate_parent_set, +#endif +#ifdef CONFIG_MLX5_SF_MANAGER + .port_new = mlx5_devlink_sf_port_new, + .port_del = mlx5_devlink_sf_port_del, + .port_fn_state_get = mlx5_devlink_sf_port_fn_state_get, + .port_fn_state_set = mlx5_devlink_sf_port_fn_state_set, +#endif + .flash_update = mlx5_devlink_flash_update, + .info_get = mlx5_devlink_info_get, + .reload_actions = BIT(DEVLINK_RELOAD_ACTION_DRIVER_REINIT) | + BIT(DEVLINK_RELOAD_ACTION_FW_ACTIVATE), + .reload_limits = BIT(DEVLINK_RELOAD_LIMIT_NO_RESET), + .reload_down = mlx5_devlink_reload_down, + .reload_up = mlx5_devlink_reload_up, + .trap_init = mlx5_devlink_trap_init, + .trap_fini = mlx5_devlink_trap_fini, + .trap_action_set = mlx5_devlink_trap_action_set, +}; + +void mlx5_devlink_trap_report(struct mlx5_core_dev *dev, int trap_id, struct sk_buff *skb, + struct devlink_port *dl_port) +{ + struct devlink *devlink = priv_to_devlink(dev); + struct mlx5_devlink_trap *dl_trap; + + dl_trap = mlx5_find_trap_by_id(dev, trap_id); + if (!dl_trap) { + mlx5_core_err(dev, "Devlink trap: Report on invalid trap id 0x%x", trap_id); + return; + } + + if (dl_trap->trap.action != DEVLINK_TRAP_ACTION_TRAP) { + mlx5_core_dbg(dev, "Devlink trap: Trap id %d has action %d", trap_id, + dl_trap->trap.action); + return; + } + devlink_trap_report(devlink, skb, dl_trap->item, dl_port, NULL); +} + +int mlx5_devlink_trap_get_num_active(struct mlx5_core_dev *dev) +{ + struct mlx5_devlink_trap *dl_trap; + int count = 0; + + list_for_each_entry(dl_trap, &dev->priv.traps, list) + if (dl_trap->trap.action == DEVLINK_TRAP_ACTION_TRAP) + count++; + + return count; +} + +int mlx5_devlink_traps_get_action(struct mlx5_core_dev *dev, int trap_id, + enum devlink_trap_action *action) +{ + struct mlx5_devlink_trap *dl_trap; + + dl_trap = mlx5_find_trap_by_id(dev, trap_id); + if (!dl_trap) { + mlx5_core_err(dev, "Devlink trap: Get action on invalid trap id 0x%x", + trap_id); + return -EINVAL; + } + + *action = dl_trap->trap.action; + return 0; +} + +struct devlink *mlx5_devlink_alloc(struct device *dev) +{ + return devlink_alloc(&mlx5_devlink_ops, sizeof(struct mlx5_core_dev), + dev); +} + +void mlx5_devlink_free(struct devlink *devlink) +{ + devlink_free(devlink); +} + +static int mlx5_devlink_fs_mode_validate(struct devlink *devlink, u32 id, + union devlink_param_value val, + struct netlink_ext_ack *extack) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + char *value = val.vstr; + int err = 0; + + if (!strcmp(value, "dmfs")) { + return 0; + } else if (!strcmp(value, "smfs")) { + u8 eswitch_mode; + bool smfs_cap; + + eswitch_mode = mlx5_eswitch_mode(dev); + smfs_cap = mlx5_fs_dr_is_supported(dev); + + if (!smfs_cap) { + err = -EOPNOTSUPP; + NL_SET_ERR_MSG_MOD(extack, + "Software managed steering is not supported by current device"); + } + + else if (eswitch_mode == MLX5_ESWITCH_OFFLOADS) { + NL_SET_ERR_MSG_MOD(extack, + "Software managed steering is not supported when eswitch offloads enabled."); + err = -EOPNOTSUPP; + } + } else { + NL_SET_ERR_MSG_MOD(extack, + "Bad parameter: supported values are [\"dmfs\", \"smfs\"]"); + err = -EINVAL; + } + + return err; +} + +static int mlx5_devlink_fs_mode_set(struct devlink *devlink, u32 id, + struct devlink_param_gset_ctx *ctx) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + enum mlx5_flow_steering_mode mode; + + if (!strcmp(ctx->val.vstr, "smfs")) + mode = MLX5_FLOW_STEERING_MODE_SMFS; + else + mode = MLX5_FLOW_STEERING_MODE_DMFS; + dev->priv.steering->mode = mode; + + return 0; +} + +static int mlx5_devlink_fs_mode_get(struct devlink *devlink, u32 id, + struct devlink_param_gset_ctx *ctx) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + + if (dev->priv.steering->mode == MLX5_FLOW_STEERING_MODE_SMFS) + strcpy(ctx->val.vstr, "smfs"); + else + strcpy(ctx->val.vstr, "dmfs"); + return 0; +} + +static int mlx5_devlink_enable_roce_validate(struct devlink *devlink, u32 id, + union devlink_param_value val, + struct netlink_ext_ack *extack) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + bool new_state = val.vbool; + + if (new_state && !MLX5_CAP_GEN(dev, roce) && + !(MLX5_CAP_GEN(dev, roce_rw_supported) && MLX5_CAP_GEN_MAX(dev, roce))) { + NL_SET_ERR_MSG_MOD(extack, "Device doesn't support RoCE"); + return -EOPNOTSUPP; + } + if (mlx5_core_is_mp_slave(dev) || mlx5_lag_is_active(dev)) { + NL_SET_ERR_MSG_MOD(extack, "Multi port slave/Lag device can't configure RoCE"); + return -EOPNOTSUPP; + } + + return 0; +} + +#ifdef CONFIG_MLX5_ESWITCH +static int mlx5_devlink_large_group_num_validate(struct devlink *devlink, u32 id, + union devlink_param_value val, + struct netlink_ext_ack *extack) +{ + int group_num = val.vu32; + + if (group_num < 1 || group_num > 1024) { + NL_SET_ERR_MSG_MOD(extack, + "Unsupported group number, supported range is 1-1024"); + return -EOPNOTSUPP; + } + + return 0; +} + +static int mlx5_devlink_esw_port_metadata_set(struct devlink *devlink, u32 id, + struct devlink_param_gset_ctx *ctx) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + + if (!MLX5_ESWITCH_MANAGER(dev)) + return -EOPNOTSUPP; + + return mlx5_esw_offloads_vport_metadata_set(dev->priv.eswitch, ctx->val.vbool); +} + +static int mlx5_devlink_esw_port_metadata_get(struct devlink *devlink, u32 id, + struct devlink_param_gset_ctx *ctx) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + + if (!MLX5_ESWITCH_MANAGER(dev)) + return -EOPNOTSUPP; + + ctx->val.vbool = mlx5_eswitch_vport_match_metadata_enabled(dev->priv.eswitch); + return 0; +} + +static int mlx5_devlink_esw_port_metadata_validate(struct devlink *devlink, u32 id, + union devlink_param_value val, + struct netlink_ext_ack *extack) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + u8 esw_mode; + + if (!MLX5_ESWITCH_MANAGER(dev)) { + NL_SET_ERR_MSG_MOD(extack, "E-Switch is unsupported"); + return -EOPNOTSUPP; + } + esw_mode = mlx5_eswitch_mode(dev); + if (esw_mode == MLX5_ESWITCH_OFFLOADS) { + NL_SET_ERR_MSG_MOD(extack, + "E-Switch must either disabled or non switchdev mode"); + return -EBUSY; + } + return 0; +} + +#endif + +static int mlx5_devlink_enable_remote_dev_reset_set(struct devlink *devlink, u32 id, + struct devlink_param_gset_ctx *ctx) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + + mlx5_fw_reset_enable_remote_dev_reset_set(dev, ctx->val.vbool); + return 0; +} + +static int mlx5_devlink_enable_remote_dev_reset_get(struct devlink *devlink, u32 id, + struct devlink_param_gset_ctx *ctx) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + + ctx->val.vbool = mlx5_fw_reset_enable_remote_dev_reset_get(dev); + return 0; +} + +static int mlx5_devlink_eq_depth_validate(struct devlink *devlink, u32 id, + union devlink_param_value val, + struct netlink_ext_ack *extack) +{ + return (val.vu32 >= 64 && val.vu32 <= 4096) ? 0 : -EINVAL; +} + +static const struct devlink_param mlx5_devlink_params[] = { + DEVLINK_PARAM_DRIVER(MLX5_DEVLINK_PARAM_ID_FLOW_STEERING_MODE, + "flow_steering_mode", DEVLINK_PARAM_TYPE_STRING, + BIT(DEVLINK_PARAM_CMODE_RUNTIME), + mlx5_devlink_fs_mode_get, mlx5_devlink_fs_mode_set, + mlx5_devlink_fs_mode_validate), + DEVLINK_PARAM_GENERIC(ENABLE_ROCE, BIT(DEVLINK_PARAM_CMODE_DRIVERINIT), + NULL, NULL, mlx5_devlink_enable_roce_validate), +#ifdef CONFIG_MLX5_ESWITCH + DEVLINK_PARAM_DRIVER(MLX5_DEVLINK_PARAM_ID_ESW_LARGE_GROUP_NUM, + "fdb_large_groups", DEVLINK_PARAM_TYPE_U32, + BIT(DEVLINK_PARAM_CMODE_DRIVERINIT), + NULL, NULL, + mlx5_devlink_large_group_num_validate), + DEVLINK_PARAM_DRIVER(MLX5_DEVLINK_PARAM_ID_ESW_PORT_METADATA, + "esw_port_metadata", DEVLINK_PARAM_TYPE_BOOL, + BIT(DEVLINK_PARAM_CMODE_RUNTIME), + mlx5_devlink_esw_port_metadata_get, + mlx5_devlink_esw_port_metadata_set, + mlx5_devlink_esw_port_metadata_validate), +#endif + DEVLINK_PARAM_GENERIC(ENABLE_REMOTE_DEV_RESET, BIT(DEVLINK_PARAM_CMODE_RUNTIME), + mlx5_devlink_enable_remote_dev_reset_get, + mlx5_devlink_enable_remote_dev_reset_set, NULL), + DEVLINK_PARAM_GENERIC(IO_EQ_SIZE, BIT(DEVLINK_PARAM_CMODE_DRIVERINIT), + NULL, NULL, mlx5_devlink_eq_depth_validate), + DEVLINK_PARAM_GENERIC(EVENT_EQ_SIZE, BIT(DEVLINK_PARAM_CMODE_DRIVERINIT), + NULL, NULL, mlx5_devlink_eq_depth_validate), +}; + +static void mlx5_devlink_set_params_init_values(struct devlink *devlink) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + union devlink_param_value value; + + value.vbool = MLX5_CAP_GEN(dev, roce); + devlink_param_driverinit_value_set(devlink, + DEVLINK_PARAM_GENERIC_ID_ENABLE_ROCE, + value); + +#ifdef CONFIG_MLX5_ESWITCH + value.vu32 = ESW_OFFLOADS_DEFAULT_NUM_GROUPS; + devlink_param_driverinit_value_set(devlink, + MLX5_DEVLINK_PARAM_ID_ESW_LARGE_GROUP_NUM, + value); +#endif + + value.vu32 = MLX5_COMP_EQ_SIZE; + devlink_param_driverinit_value_set(devlink, + DEVLINK_PARAM_GENERIC_ID_IO_EQ_SIZE, + value); + + value.vu32 = MLX5_NUM_ASYNC_EQE; + devlink_param_driverinit_value_set(devlink, + DEVLINK_PARAM_GENERIC_ID_EVENT_EQ_SIZE, + value); +} + +static const struct devlink_param enable_eth_param = + DEVLINK_PARAM_GENERIC(ENABLE_ETH, BIT(DEVLINK_PARAM_CMODE_DRIVERINIT), + NULL, NULL, NULL); + +static int mlx5_devlink_eth_param_register(struct devlink *devlink) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + union devlink_param_value value; + int err; + + if (!mlx5_eth_supported(dev)) + return 0; + + err = devlink_param_register(devlink, &enable_eth_param); + if (err) + return err; + + value.vbool = true; + devlink_param_driverinit_value_set(devlink, + DEVLINK_PARAM_GENERIC_ID_ENABLE_ETH, + value); + return 0; +} + +static void mlx5_devlink_eth_param_unregister(struct devlink *devlink) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + + if (!mlx5_eth_supported(dev)) + return; + + devlink_param_unregister(devlink, &enable_eth_param); +} + +static int mlx5_devlink_enable_rdma_validate(struct devlink *devlink, u32 id, + union devlink_param_value val, + struct netlink_ext_ack *extack) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + bool new_state = val.vbool; + + if (new_state && !mlx5_rdma_supported(dev)) + return -EOPNOTSUPP; + return 0; +} + +static const struct devlink_param enable_rdma_param = + DEVLINK_PARAM_GENERIC(ENABLE_RDMA, BIT(DEVLINK_PARAM_CMODE_DRIVERINIT), + NULL, NULL, mlx5_devlink_enable_rdma_validate); + +static int mlx5_devlink_rdma_param_register(struct devlink *devlink) +{ + union devlink_param_value value; + int err; + + if (!IS_ENABLED(CONFIG_MLX5_INFINIBAND)) + return 0; + + err = devlink_param_register(devlink, &enable_rdma_param); + if (err) + return err; + + value.vbool = true; + devlink_param_driverinit_value_set(devlink, + DEVLINK_PARAM_GENERIC_ID_ENABLE_RDMA, + value); + return 0; +} + +static void mlx5_devlink_rdma_param_unregister(struct devlink *devlink) +{ + if (!IS_ENABLED(CONFIG_MLX5_INFINIBAND)) + return; + + devlink_param_unregister(devlink, &enable_rdma_param); +} + +static const struct devlink_param enable_vnet_param = + DEVLINK_PARAM_GENERIC(ENABLE_VNET, BIT(DEVLINK_PARAM_CMODE_DRIVERINIT), + NULL, NULL, NULL); + +static int mlx5_devlink_vnet_param_register(struct devlink *devlink) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + union devlink_param_value value; + int err; + + if (!mlx5_vnet_supported(dev)) + return 0; + + err = devlink_param_register(devlink, &enable_vnet_param); + if (err) + return err; + + value.vbool = true; + devlink_param_driverinit_value_set(devlink, + DEVLINK_PARAM_GENERIC_ID_ENABLE_VNET, + value); + return 0; +} + +static void mlx5_devlink_vnet_param_unregister(struct devlink *devlink) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + + if (!mlx5_vnet_supported(dev)) + return; + + devlink_param_unregister(devlink, &enable_vnet_param); +} + +static int mlx5_devlink_auxdev_params_register(struct devlink *devlink) +{ + int err; + + err = mlx5_devlink_eth_param_register(devlink); + if (err) + return err; + + err = mlx5_devlink_rdma_param_register(devlink); + if (err) + goto rdma_err; + + err = mlx5_devlink_vnet_param_register(devlink); + if (err) + goto vnet_err; + return 0; + +vnet_err: + mlx5_devlink_rdma_param_unregister(devlink); +rdma_err: + mlx5_devlink_eth_param_unregister(devlink); + return err; +} + +static void mlx5_devlink_auxdev_params_unregister(struct devlink *devlink) +{ + mlx5_devlink_vnet_param_unregister(devlink); + mlx5_devlink_rdma_param_unregister(devlink); + mlx5_devlink_eth_param_unregister(devlink); +} + +static int mlx5_devlink_max_uc_list_validate(struct devlink *devlink, u32 id, + union devlink_param_value val, + struct netlink_ext_ack *extack) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + + if (val.vu32 == 0) { + NL_SET_ERR_MSG_MOD(extack, "max_macs value must be greater than 0"); + return -EINVAL; + } + + if (!is_power_of_2(val.vu32)) { + NL_SET_ERR_MSG_MOD(extack, "Only power of 2 values are supported for max_macs"); + return -EINVAL; + } + + if (ilog2(val.vu32) > + MLX5_CAP_GEN_MAX(dev, log_max_current_uc_list)) { + NL_SET_ERR_MSG_MOD(extack, "max_macs value is out of the supported range"); + return -EINVAL; + } + + return 0; +} + +static const struct devlink_param max_uc_list_param = + DEVLINK_PARAM_GENERIC(MAX_MACS, BIT(DEVLINK_PARAM_CMODE_DRIVERINIT), + NULL, NULL, mlx5_devlink_max_uc_list_validate); + +static int mlx5_devlink_max_uc_list_param_register(struct devlink *devlink) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + union devlink_param_value value; + int err; + + if (!MLX5_CAP_GEN_MAX(dev, log_max_current_uc_list_wr_supported)) + return 0; + + err = devlink_param_register(devlink, &max_uc_list_param); + if (err) + return err; + + value.vu32 = 1 << MLX5_CAP_GEN(dev, log_max_current_uc_list); + devlink_param_driverinit_value_set(devlink, + DEVLINK_PARAM_GENERIC_ID_MAX_MACS, + value); + return 0; +} + +static void +mlx5_devlink_max_uc_list_param_unregister(struct devlink *devlink) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + + if (!MLX5_CAP_GEN_MAX(dev, log_max_current_uc_list_wr_supported)) + return; + + devlink_param_unregister(devlink, &max_uc_list_param); +} + +#define MLX5_TRAP_DROP(_id, _group_id) \ + DEVLINK_TRAP_GENERIC(DROP, DROP, _id, \ + DEVLINK_TRAP_GROUP_GENERIC_ID_##_group_id, \ + DEVLINK_TRAP_METADATA_TYPE_F_IN_PORT) + +static const struct devlink_trap mlx5_traps_arr[] = { + MLX5_TRAP_DROP(INGRESS_VLAN_FILTER, L2_DROPS), + MLX5_TRAP_DROP(DMAC_FILTER, L2_DROPS), +}; + +static const struct devlink_trap_group mlx5_trap_groups_arr[] = { + DEVLINK_TRAP_GROUP_GENERIC(L2_DROPS, 0), +}; + +static int mlx5_devlink_traps_register(struct devlink *devlink) +{ + struct mlx5_core_dev *core_dev = devlink_priv(devlink); + int err; + + err = devl_trap_groups_register(devlink, mlx5_trap_groups_arr, + ARRAY_SIZE(mlx5_trap_groups_arr)); + if (err) + return err; + + err = devl_traps_register(devlink, mlx5_traps_arr, ARRAY_SIZE(mlx5_traps_arr), + &core_dev->priv); + if (err) + goto err_trap_group; + return 0; + +err_trap_group: + devl_trap_groups_unregister(devlink, mlx5_trap_groups_arr, + ARRAY_SIZE(mlx5_trap_groups_arr)); + return err; +} + +static void mlx5_devlink_traps_unregister(struct devlink *devlink) +{ + devl_traps_unregister(devlink, mlx5_traps_arr, ARRAY_SIZE(mlx5_traps_arr)); + devl_trap_groups_unregister(devlink, mlx5_trap_groups_arr, + ARRAY_SIZE(mlx5_trap_groups_arr)); +} + +int mlx5_devlink_register(struct devlink *devlink) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + int err; + + err = devlink_params_register(devlink, mlx5_devlink_params, + ARRAY_SIZE(mlx5_devlink_params)); + if (err) + return err; + + mlx5_devlink_set_params_init_values(devlink); + + err = mlx5_devlink_auxdev_params_register(devlink); + if (err) + goto auxdev_reg_err; + + err = mlx5_devlink_max_uc_list_param_register(devlink); + if (err) + goto max_uc_list_err; + + err = mlx5_devlink_traps_register(devlink); + if (err) + goto traps_reg_err; + + if (!mlx5_core_is_mp_slave(dev)) + devlink_set_features(devlink, DEVLINK_F_RELOAD); + + return 0; + +traps_reg_err: + mlx5_devlink_max_uc_list_param_unregister(devlink); +max_uc_list_err: + mlx5_devlink_auxdev_params_unregister(devlink); +auxdev_reg_err: + devlink_params_unregister(devlink, mlx5_devlink_params, + ARRAY_SIZE(mlx5_devlink_params)); + return err; +} + +void mlx5_devlink_unregister(struct devlink *devlink) +{ + mlx5_devlink_traps_unregister(devlink); + mlx5_devlink_max_uc_list_param_unregister(devlink); + mlx5_devlink_auxdev_params_unregister(devlink); + devlink_params_unregister(devlink, mlx5_devlink_params, + ARRAY_SIZE(mlx5_devlink_params)); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.h b/drivers/net/ethernet/mellanox/mlx5/core/devlink.h new file mode 100644 index 000000000..30bf48827 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.h @@ -0,0 +1,39 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2019, Mellanox Technologies */ + +#ifndef __MLX5_DEVLINK_H__ +#define __MLX5_DEVLINK_H__ + +#include <net/devlink.h> + +enum mlx5_devlink_param_id { + MLX5_DEVLINK_PARAM_ID_BASE = DEVLINK_PARAM_GENERIC_ID_MAX, + MLX5_DEVLINK_PARAM_ID_FLOW_STEERING_MODE, + MLX5_DEVLINK_PARAM_ID_ESW_LARGE_GROUP_NUM, + MLX5_DEVLINK_PARAM_ID_ESW_PORT_METADATA, +}; + +struct mlx5_trap_ctx { + int id; + int action; +}; + +struct mlx5_devlink_trap { + struct mlx5_trap_ctx trap; + void *item; + struct list_head list; +}; + +struct mlx5_core_dev; +void mlx5_devlink_trap_report(struct mlx5_core_dev *dev, int trap_id, struct sk_buff *skb, + struct devlink_port *dl_port); +int mlx5_devlink_trap_get_num_active(struct mlx5_core_dev *dev); +int mlx5_devlink_traps_get_action(struct mlx5_core_dev *dev, int trap_id, + enum devlink_trap_action *action); + +struct devlink *mlx5_devlink_alloc(struct device *dev); +void mlx5_devlink_free(struct devlink *devlink); +int mlx5_devlink_register(struct devlink *devlink); +void mlx5_devlink_unregister(struct devlink *devlink); + +#endif /* __MLX5_DEVLINK_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/cmd_tracepoint.h b/drivers/net/ethernet/mellanox/mlx5/core/diag/cmd_tracepoint.h new file mode 100644 index 000000000..406ebe174 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/cmd_tracepoint.h @@ -0,0 +1,45 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM mlx5 + +#if !defined(_MLX5_CMD_TP_H_) || defined(TRACE_HEADER_MULTI_READ) +#define _MLX5_CMD_TP_H_ + +#include <linux/tracepoint.h> +#include <linux/trace_seq.h> + +TRACE_EVENT(mlx5_cmd, + TP_PROTO(const char *command_str, u16 opcode, u16 op_mod, + const char *status_str, u8 status, u32 syndrome, int err), + TP_ARGS(command_str, opcode, op_mod, status_str, status, syndrome, err), + TP_STRUCT__entry(__string(command_str, command_str) + __field(u16, opcode) + __field(u16, op_mod) + __string(status_str, status_str) + __field(u8, status) + __field(u32, syndrome) + __field(int, err) + ), + TP_fast_assign(__assign_str(command_str, command_str); + __entry->opcode = opcode; + __entry->op_mod = op_mod; + __assign_str(status_str, status_str); + __entry->status = status; + __entry->syndrome = syndrome; + __entry->err = err; + ), + TP_printk("%s(0x%x) op_mod(0x%x) failed, status %s(0x%x), syndrome (0x%x), err(%d)", + __get_str(command_str), __entry->opcode, __entry->op_mod, + __get_str(status_str), __entry->status, __entry->syndrome, + __entry->err) +); + +#endif /* _MLX5_CMD_TP_H_ */ + +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH ./diag +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_FILE cmd_tracepoint +#include <trace/define_trace.h> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/crdump.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/crdump.c new file mode 100644 index 000000000..28d02749d --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/crdump.c @@ -0,0 +1,115 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2019 Mellanox Technologies */ + +#include <linux/mlx5/driver.h> +#include "mlx5_core.h" +#include "lib/pci_vsc.h" +#include "lib/mlx5.h" + +#define BAD_ACCESS 0xBADACCE5 +#define MLX5_PROTECTED_CR_SCAN_CRSPACE 0x7 + +static bool mlx5_crdump_enabled(struct mlx5_core_dev *dev) +{ + return !!dev->priv.health.crdump_size; +} + +static int mlx5_crdump_fill(struct mlx5_core_dev *dev, u32 *cr_data) +{ + u32 crdump_size = dev->priv.health.crdump_size; + int i, ret; + + for (i = 0; i < (crdump_size / 4); i++) + cr_data[i] = BAD_ACCESS; + + ret = mlx5_vsc_gw_read_block_fast(dev, cr_data, crdump_size); + if (ret <= 0) { + if (ret == 0) + return -EIO; + return ret; + } + + if (crdump_size != ret) { + mlx5_core_warn(dev, "failed to read full dump, read %d out of %u\n", + ret, crdump_size); + return -EINVAL; + } + + return 0; +} + +int mlx5_crdump_collect(struct mlx5_core_dev *dev, u32 *cr_data) +{ + int ret; + + if (!mlx5_crdump_enabled(dev)) + return -ENODEV; + + ret = mlx5_vsc_gw_lock(dev); + if (ret) { + mlx5_core_warn(dev, "crdump: failed to lock vsc gw err %d\n", + ret); + return ret; + } + /* Verify no other PF is running cr-dump or sw reset */ + ret = mlx5_vsc_sem_set_space(dev, MLX5_SEMAPHORE_SW_RESET, + MLX5_VSC_LOCK); + if (ret) { + mlx5_core_warn(dev, "Failed to lock SW reset semaphore\n"); + goto unlock_gw; + } + + ret = mlx5_vsc_gw_set_space(dev, MLX5_VSC_SPACE_SCAN_CRSPACE, NULL); + if (ret) + goto unlock_sem; + + ret = mlx5_crdump_fill(dev, cr_data); + +unlock_sem: + mlx5_vsc_sem_set_space(dev, MLX5_SEMAPHORE_SW_RESET, MLX5_VSC_UNLOCK); +unlock_gw: + mlx5_vsc_gw_unlock(dev); + return ret; +} + +int mlx5_crdump_enable(struct mlx5_core_dev *dev) +{ + struct mlx5_priv *priv = &dev->priv; + u32 space_size; + int ret; + + if (!mlx5_core_is_pf(dev) || !mlx5_vsc_accessible(dev) || + mlx5_crdump_enabled(dev)) + return 0; + + ret = mlx5_vsc_gw_lock(dev); + if (ret) + return ret; + + /* Check if space is supported and get space size */ + ret = mlx5_vsc_gw_set_space(dev, MLX5_VSC_SPACE_SCAN_CRSPACE, + &space_size); + if (ret) { + /* Unlock and mask error since space is not supported */ + mlx5_vsc_gw_unlock(dev); + return 0; + } + + if (!space_size) { + mlx5_core_warn(dev, "Invalid Crspace size, zero\n"); + mlx5_vsc_gw_unlock(dev); + return -EINVAL; + } + + ret = mlx5_vsc_gw_unlock(dev); + if (ret) + return ret; + + priv->health.crdump_size = space_size; + return 0; +} + +void mlx5_crdump_disable(struct mlx5_core_dev *dev) +{ + dev->priv.health.crdump_size = 0; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/en_rep_tracepoint.h b/drivers/net/ethernet/mellanox/mlx5/core/diag/en_rep_tracepoint.h new file mode 100644 index 000000000..f15718db5 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/en_rep_tracepoint.h @@ -0,0 +1,54 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2019 Mellanox Technologies. */ + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM mlx5 + +#if !defined(_MLX5_EN_REP_TP_) || defined(TRACE_HEADER_MULTI_READ) +#define _MLX5_EN_REP_TP_ + +#include <linux/tracepoint.h> +#include <linux/trace_seq.h> +#include "en_rep.h" + +TRACE_EVENT(mlx5e_rep_neigh_update, + TP_PROTO(const struct mlx5e_neigh_hash_entry *nhe, const u8 *ha, + bool neigh_connected), + TP_ARGS(nhe, ha, neigh_connected), + TP_STRUCT__entry(__string(devname, nhe->neigh_dev->name) + __array(u8, ha, ETH_ALEN) + __array(u8, v4, 4) + __array(u8, v6, 16) + __field(bool, neigh_connected) + ), + TP_fast_assign(const struct mlx5e_neigh *mn = &nhe->m_neigh; + struct in6_addr *pin6; + __be32 *p32; + + __assign_str(devname, nhe->neigh_dev->name); + __entry->neigh_connected = neigh_connected; + memcpy(__entry->ha, ha, ETH_ALEN); + + p32 = (__be32 *)__entry->v4; + pin6 = (struct in6_addr *)__entry->v6; + if (mn->family == AF_INET) { + *p32 = mn->dst_ip.v4; + ipv6_addr_set_v4mapped(*p32, pin6); + } else if (mn->family == AF_INET6) { + *pin6 = mn->dst_ip.v6; + } + ), + TP_printk("netdev: %s MAC: %pM IPv4: %pI4 IPv6: %pI6c neigh_connected=%d\n", + __get_str(devname), __entry->ha, + __entry->v4, __entry->v6, __entry->neigh_connected + ) +); + +#endif /* _MLX5_EN_REP_TP_ */ + +/* This part must be outside protection */ +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH ./diag +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_FILE en_rep_tracepoint +#include <trace/define_trace.h> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/en_tc_tracepoint.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/en_tc_tracepoint.c new file mode 100644 index 000000000..c5dc6c50f --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/en_tc_tracepoint.c @@ -0,0 +1,58 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2019 Mellanox Technologies. */ + +#define CREATE_TRACE_POINTS +#include "en_tc_tracepoint.h" + +void put_ids_to_array(int *ids, + const struct flow_action_entry *entries, + unsigned int num) +{ + unsigned int i; + + for (i = 0; i < num; i++) + ids[i] = entries[i].id; +} + +#define NAME_SIZE 16 + +static const char FLOWACT2STR[NUM_FLOW_ACTIONS][NAME_SIZE] = { + [FLOW_ACTION_ACCEPT] = "ACCEPT", + [FLOW_ACTION_DROP] = "DROP", + [FLOW_ACTION_TRAP] = "TRAP", + [FLOW_ACTION_GOTO] = "GOTO", + [FLOW_ACTION_REDIRECT] = "REDIRECT", + [FLOW_ACTION_MIRRED] = "MIRRED", + [FLOW_ACTION_VLAN_PUSH] = "VLAN_PUSH", + [FLOW_ACTION_VLAN_POP] = "VLAN_POP", + [FLOW_ACTION_VLAN_MANGLE] = "VLAN_MANGLE", + [FLOW_ACTION_TUNNEL_ENCAP] = "TUNNEL_ENCAP", + [FLOW_ACTION_TUNNEL_DECAP] = "TUNNEL_DECAP", + [FLOW_ACTION_MANGLE] = "MANGLE", + [FLOW_ACTION_ADD] = "ADD", + [FLOW_ACTION_CSUM] = "CSUM", + [FLOW_ACTION_MARK] = "MARK", + [FLOW_ACTION_WAKE] = "WAKE", + [FLOW_ACTION_QUEUE] = "QUEUE", + [FLOW_ACTION_SAMPLE] = "SAMPLE", + [FLOW_ACTION_POLICE] = "POLICE", + [FLOW_ACTION_CT] = "CT", +}; + +const char *parse_action(struct trace_seq *p, + int *ids, + unsigned int num) +{ + const char *ret = trace_seq_buffer_ptr(p); + unsigned int i; + + for (i = 0; i < num; i++) { + if (ids[i] < NUM_FLOW_ACTIONS) + trace_seq_printf(p, "%s ", FLOWACT2STR[ids[i]]); + else + trace_seq_printf(p, "UNKNOWN "); + } + + trace_seq_putc(p, 0); + return ret; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/en_tc_tracepoint.h b/drivers/net/ethernet/mellanox/mlx5/core/diag/en_tc_tracepoint.h new file mode 100644 index 000000000..ac52ef37f --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/en_tc_tracepoint.h @@ -0,0 +1,114 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2019 Mellanox Technologies. */ + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM mlx5 + +#if !defined(_MLX5_TC_TP_) || defined(TRACE_HEADER_MULTI_READ) +#define _MLX5_TC_TP_ + +#include <linux/tracepoint.h> +#include <linux/trace_seq.h> +#include <net/flow_offload.h> +#include "en_rep.h" + +#define __parse_action(ids, num) parse_action(p, ids, num) + +void put_ids_to_array(int *ids, + const struct flow_action_entry *entries, + unsigned int num); + +const char *parse_action(struct trace_seq *p, + int *ids, + unsigned int num); + +DECLARE_EVENT_CLASS(mlx5e_flower_template, + TP_PROTO(const struct flow_cls_offload *f), + TP_ARGS(f), + TP_STRUCT__entry(__field(void *, cookie) + __field(unsigned int, num) + __dynamic_array(int, ids, f->rule ? + f->rule->action.num_entries : 0) + ), + TP_fast_assign(__entry->cookie = (void *)f->cookie; + __entry->num = (f->rule ? + f->rule->action.num_entries : 0); + if (__entry->num) + put_ids_to_array(__get_dynamic_array(ids), + f->rule->action.entries, + f->rule->action.num_entries); + ), + TP_printk("cookie=%p actions= %s\n", + __entry->cookie, __entry->num ? + __parse_action(__get_dynamic_array(ids), + __entry->num) : "NULL" + ) +); + +DEFINE_EVENT(mlx5e_flower_template, mlx5e_configure_flower, + TP_PROTO(const struct flow_cls_offload *f), + TP_ARGS(f) + ); + +DEFINE_EVENT(mlx5e_flower_template, mlx5e_delete_flower, + TP_PROTO(const struct flow_cls_offload *f), + TP_ARGS(f) + ); + +TRACE_EVENT(mlx5e_stats_flower, + TP_PROTO(const struct flow_cls_offload *f), + TP_ARGS(f), + TP_STRUCT__entry(__field(void *, cookie) + __field(u64, bytes) + __field(u64, packets) + __field(u64, lastused) + ), + TP_fast_assign(__entry->cookie = (void *)f->cookie; + __entry->bytes = f->stats.bytes; + __entry->packets = f->stats.pkts; + __entry->lastused = f->stats.lastused; + ), + TP_printk("cookie=%p bytes=%llu packets=%llu lastused=%llu\n", + __entry->cookie, __entry->bytes, + __entry->packets, __entry->lastused + ) +); + +TRACE_EVENT(mlx5e_tc_update_neigh_used_value, + TP_PROTO(const struct mlx5e_neigh_hash_entry *nhe, bool neigh_used), + TP_ARGS(nhe, neigh_used), + TP_STRUCT__entry(__string(devname, nhe->neigh_dev->name) + __array(u8, v4, 4) + __array(u8, v6, 16) + __field(bool, neigh_used) + ), + TP_fast_assign(const struct mlx5e_neigh *mn = &nhe->m_neigh; + struct in6_addr *pin6; + __be32 *p32; + + __assign_str(devname, nhe->neigh_dev->name); + __entry->neigh_used = neigh_used; + + p32 = (__be32 *)__entry->v4; + pin6 = (struct in6_addr *)__entry->v6; + if (mn->family == AF_INET) { + *p32 = mn->dst_ip.v4; + ipv6_addr_set_v4mapped(*p32, pin6); + } else if (mn->family == AF_INET6) { + *pin6 = mn->dst_ip.v6; + } + ), + TP_printk("netdev: %s IPv4: %pI4 IPv6: %pI6c neigh_used=%d\n", + __get_str(devname), __entry->v4, __entry->v6, + __entry->neigh_used + ) +); + +#endif /* _MLX5_TC_TP_ */ + +/* This part must be outside protection */ +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH ./diag +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_FILE en_tc_tracepoint +#include <trace/define_trace.h> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c new file mode 100644 index 000000000..c5bb79a4f --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c @@ -0,0 +1,279 @@ +/* + * Copyright (c) 2017, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#define CREATE_TRACE_POINTS + +#include "fs_tracepoint.h" +#include <linux/stringify.h> + +#define DECLARE_MASK_VAL(type, name) struct {type m; type v; } name +#define MASK_VAL(type, spec, name, mask, val, fld) \ + DECLARE_MASK_VAL(type, name) = \ + {.m = MLX5_GET(spec, mask, fld),\ + .v = MLX5_GET(spec, val, fld)} +#define MASK_VAL_BE(type, spec, name, mask, val, fld) \ + DECLARE_MASK_VAL(type, name) = \ + {.m = MLX5_GET_BE(type, spec, mask, fld),\ + .v = MLX5_GET_BE(type, spec, val, fld)} +#define GET_MASKED_VAL(name) (name.m & name.v) + +#define GET_MASK_VAL(name, type, mask, val, fld) \ + (name.m = MLX5_GET(type, mask, fld), \ + name.v = MLX5_GET(type, val, fld), \ + name.m & name.v) +#define PRINT_MASKED_VAL(name, p, format) { \ + if (name.m) \ + trace_seq_printf(p, __stringify(name) "=" format " ", name.v); \ + } +#define PRINT_MASKED_VALP(name, cast, p, format) { \ + if (name.m) \ + trace_seq_printf(p, __stringify(name) "=" format " ", \ + (cast)&name.v);\ + } + +static void print_lyr_2_4_hdrs(struct trace_seq *p, + const u32 *mask, const u32 *value) +{ +#define MASK_VAL_L2(type, name, fld) \ + MASK_VAL(type, fte_match_set_lyr_2_4, name, mask, value, fld) + DECLARE_MASK_VAL(u64, smac) = { + .m = MLX5_GET(fte_match_set_lyr_2_4, mask, smac_47_16) << 16 | + MLX5_GET(fte_match_set_lyr_2_4, mask, smac_15_0), + .v = MLX5_GET(fte_match_set_lyr_2_4, value, smac_47_16) << 16 | + MLX5_GET(fte_match_set_lyr_2_4, value, smac_15_0)}; + DECLARE_MASK_VAL(u64, dmac) = { + .m = MLX5_GET(fte_match_set_lyr_2_4, mask, dmac_47_16) << 16 | + MLX5_GET(fte_match_set_lyr_2_4, mask, dmac_15_0), + .v = MLX5_GET(fte_match_set_lyr_2_4, value, dmac_47_16) << 16 | + MLX5_GET(fte_match_set_lyr_2_4, value, dmac_15_0)}; + MASK_VAL_L2(u16, ethertype, ethertype); + MASK_VAL_L2(u8, ip_version, ip_version); + + PRINT_MASKED_VALP(smac, u8 *, p, "%pM"); + PRINT_MASKED_VALP(dmac, u8 *, p, "%pM"); + PRINT_MASKED_VAL(ethertype, p, "%04x"); + + if ((ethertype.m == 0xffff && ethertype.v == ETH_P_IP) || + (ip_version.m == 0xf && ip_version.v == 4)) { +#define MASK_VAL_L2_BE(type, name, fld) \ + MASK_VAL_BE(type, fte_match_set_lyr_2_4, name, mask, value, fld) + MASK_VAL_L2_BE(u32, src_ipv4, + src_ipv4_src_ipv6.ipv4_layout.ipv4); + MASK_VAL_L2_BE(u32, dst_ipv4, + dst_ipv4_dst_ipv6.ipv4_layout.ipv4); + + PRINT_MASKED_VALP(src_ipv4, typeof(&src_ipv4.v), p, + "%pI4"); + PRINT_MASKED_VALP(dst_ipv4, typeof(&dst_ipv4.v), p, + "%pI4"); + } else if ((ethertype.m == 0xffff && ethertype.v == ETH_P_IPV6) || + (ip_version.m == 0xf && ip_version.v == 6)) { + static const struct in6_addr full_ones = { + .in6_u.u6_addr32 = {__constant_htonl(0xffffffff), + __constant_htonl(0xffffffff), + __constant_htonl(0xffffffff), + __constant_htonl(0xffffffff)}, + }; + DECLARE_MASK_VAL(struct in6_addr, src_ipv6); + DECLARE_MASK_VAL(struct in6_addr, dst_ipv6); + + memcpy(src_ipv6.m.in6_u.u6_addr8, + MLX5_ADDR_OF(fte_match_set_lyr_2_4, mask, + src_ipv4_src_ipv6.ipv6_layout.ipv6), + sizeof(src_ipv6.m)); + memcpy(dst_ipv6.m.in6_u.u6_addr8, + MLX5_ADDR_OF(fte_match_set_lyr_2_4, mask, + dst_ipv4_dst_ipv6.ipv6_layout.ipv6), + sizeof(dst_ipv6.m)); + memcpy(src_ipv6.v.in6_u.u6_addr8, + MLX5_ADDR_OF(fte_match_set_lyr_2_4, value, + src_ipv4_src_ipv6.ipv6_layout.ipv6), + sizeof(src_ipv6.v)); + memcpy(dst_ipv6.v.in6_u.u6_addr8, + MLX5_ADDR_OF(fte_match_set_lyr_2_4, value, + dst_ipv4_dst_ipv6.ipv6_layout.ipv6), + sizeof(dst_ipv6.v)); + + if (!memcmp(&src_ipv6.m, &full_ones, sizeof(full_ones))) + trace_seq_printf(p, "src_ipv6=%pI6 ", + &src_ipv6.v); + if (!memcmp(&dst_ipv6.m, &full_ones, sizeof(full_ones))) + trace_seq_printf(p, "dst_ipv6=%pI6 ", + &dst_ipv6.v); + } + +#define PRINT_MASKED_VAL_L2(type, name, fld, p, format) {\ + MASK_VAL_L2(type, name, fld); \ + PRINT_MASKED_VAL(name, p, format); \ +} + + PRINT_MASKED_VAL_L2(u8, ip_protocol, ip_protocol, p, "%02x"); + PRINT_MASKED_VAL_L2(u16, tcp_flags, tcp_flags, p, "%x"); + PRINT_MASKED_VAL_L2(u16, tcp_sport, tcp_sport, p, "%u"); + PRINT_MASKED_VAL_L2(u16, tcp_dport, tcp_dport, p, "%u"); + PRINT_MASKED_VAL_L2(u16, udp_sport, udp_sport, p, "%u"); + PRINT_MASKED_VAL_L2(u16, udp_dport, udp_dport, p, "%u"); + PRINT_MASKED_VAL_L2(u16, first_vid, first_vid, p, "%04x"); + PRINT_MASKED_VAL_L2(u8, first_prio, first_prio, p, "%x"); + PRINT_MASKED_VAL_L2(u8, first_cfi, first_cfi, p, "%d"); + PRINT_MASKED_VAL_L2(u8, ip_dscp, ip_dscp, p, "%02x"); + PRINT_MASKED_VAL_L2(u8, ip_ecn, ip_ecn, p, "%x"); + PRINT_MASKED_VAL_L2(u8, cvlan_tag, cvlan_tag, p, "%d"); + PRINT_MASKED_VAL_L2(u8, svlan_tag, svlan_tag, p, "%d"); + PRINT_MASKED_VAL_L2(u8, frag, frag, p, "%d"); +} + +static void print_misc_parameters_hdrs(struct trace_seq *p, + const u32 *mask, const u32 *value) +{ +#define MASK_VAL_MISC(type, name, fld) \ + MASK_VAL(type, fte_match_set_misc, name, mask, value, fld) +#define PRINT_MASKED_VAL_MISC(type, name, fld, p, format) {\ + MASK_VAL_MISC(type, name, fld); \ + PRINT_MASKED_VAL(name, p, format); \ +} + DECLARE_MASK_VAL(u64, gre_key) = { + .m = MLX5_GET(fte_match_set_misc, mask, gre_key.nvgre.hi) << 8 | + MLX5_GET(fte_match_set_misc, mask, gre_key.nvgre.lo), + .v = MLX5_GET(fte_match_set_misc, value, gre_key.nvgre.hi) << 8 | + MLX5_GET(fte_match_set_misc, value, gre_key.nvgre.lo)}; + + PRINT_MASKED_VAL(gre_key, p, "%llu"); + PRINT_MASKED_VAL_MISC(u32, source_sqn, source_sqn, p, "%u"); + PRINT_MASKED_VAL_MISC(u16, source_port, source_port, p, "%u"); + PRINT_MASKED_VAL_MISC(u8, outer_second_prio, outer_second_prio, + p, "%u"); + PRINT_MASKED_VAL_MISC(u8, outer_second_cfi, outer_second_cfi, p, "%u"); + PRINT_MASKED_VAL_MISC(u16, outer_second_vid, outer_second_vid, p, "%u"); + PRINT_MASKED_VAL_MISC(u8, inner_second_prio, inner_second_prio, + p, "%u"); + PRINT_MASKED_VAL_MISC(u8, inner_second_cfi, inner_second_cfi, p, "%u"); + PRINT_MASKED_VAL_MISC(u16, inner_second_vid, inner_second_vid, p, "%u"); + + PRINT_MASKED_VAL_MISC(u8, outer_second_cvlan_tag, + outer_second_cvlan_tag, p, "%u"); + PRINT_MASKED_VAL_MISC(u8, inner_second_cvlan_tag, + inner_second_cvlan_tag, p, "%u"); + PRINT_MASKED_VAL_MISC(u8, outer_second_svlan_tag, + outer_second_svlan_tag, p, "%u"); + PRINT_MASKED_VAL_MISC(u8, inner_second_svlan_tag, + inner_second_svlan_tag, p, "%u"); + + PRINT_MASKED_VAL_MISC(u8, gre_protocol, gre_protocol, p, "%u"); + + PRINT_MASKED_VAL_MISC(u32, vxlan_vni, vxlan_vni, p, "%u"); + PRINT_MASKED_VAL_MISC(u32, outer_ipv6_flow_label, outer_ipv6_flow_label, + p, "%x"); + PRINT_MASKED_VAL_MISC(u32, inner_ipv6_flow_label, inner_ipv6_flow_label, + p, "%x"); +} + +const char *parse_fs_hdrs(struct trace_seq *p, + u8 match_criteria_enable, + const u32 *mask_outer, + const u32 *mask_misc, + const u32 *mask_inner, + const u32 *value_outer, + const u32 *value_misc, + const u32 *value_inner) +{ + const char *ret = trace_seq_buffer_ptr(p); + + if (match_criteria_enable & + 1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_OUTER_HEADERS) { + trace_seq_printf(p, "[outer] "); + print_lyr_2_4_hdrs(p, mask_outer, value_outer); + } + + if (match_criteria_enable & + 1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_MISC_PARAMETERS) { + trace_seq_printf(p, "[misc] "); + print_misc_parameters_hdrs(p, mask_misc, value_misc); + } + if (match_criteria_enable & + 1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_INNER_HEADERS) { + trace_seq_printf(p, "[inner] "); + print_lyr_2_4_hdrs(p, mask_inner, value_inner); + } + trace_seq_putc(p, 0); + return ret; +} + +const char *parse_fs_dst(struct trace_seq *p, + const struct mlx5_flow_destination *dst, + u32 counter_id) +{ + const char *ret = trace_seq_buffer_ptr(p); + + switch (dst->type) { + case MLX5_FLOW_DESTINATION_TYPE_UPLINK: + trace_seq_printf(p, "uplink\n"); + break; + case MLX5_FLOW_DESTINATION_TYPE_VPORT: + trace_seq_printf(p, "vport=%u\n", dst->vport.num); + break; + case MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE: + trace_seq_printf(p, "ft=%p\n", dst->ft); + break; + case MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM: + trace_seq_printf(p, "ft_num=%u\n", dst->ft_num); + break; + case MLX5_FLOW_DESTINATION_TYPE_TIR: + trace_seq_printf(p, "tir=%u\n", dst->tir_num); + break; + case MLX5_FLOW_DESTINATION_TYPE_FLOW_SAMPLER: + trace_seq_printf(p, "sampler_id=%u\n", dst->sampler_id); + break; + case MLX5_FLOW_DESTINATION_TYPE_COUNTER: + trace_seq_printf(p, "counter_id=%u\n", counter_id); + break; + case MLX5_FLOW_DESTINATION_TYPE_PORT: + trace_seq_printf(p, "port\n"); + break; + case MLX5_FLOW_DESTINATION_TYPE_NONE: + trace_seq_printf(p, "none\n"); + break; + } + + trace_seq_putc(p, 0); + return ret; +} + +EXPORT_TRACEPOINT_SYMBOL(mlx5_fs_add_ft); +EXPORT_TRACEPOINT_SYMBOL(mlx5_fs_del_ft); +EXPORT_TRACEPOINT_SYMBOL(mlx5_fs_add_fg); +EXPORT_TRACEPOINT_SYMBOL(mlx5_fs_del_fg); +EXPORT_TRACEPOINT_SYMBOL(mlx5_fs_set_fte); +EXPORT_TRACEPOINT_SYMBOL(mlx5_fs_del_fte); +EXPORT_TRACEPOINT_SYMBOL(mlx5_fs_add_rule); +EXPORT_TRACEPOINT_SYMBOL(mlx5_fs_del_rule); + diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h new file mode 100644 index 000000000..ddf1b87f1 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h @@ -0,0 +1,323 @@ +/* + * Copyright (c) 2017, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#if !defined(_MLX5_FS_TP_) || defined(TRACE_HEADER_MULTI_READ) +#define _MLX5_FS_TP_ + +#include <linux/tracepoint.h> +#include <linux/trace_seq.h> +#include "../fs_core.h" + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM mlx5 + +#define __parse_fs_hdrs(match_criteria_enable, mouter, mmisc, minner, vouter, \ + vinner, vmisc) \ + parse_fs_hdrs(p, match_criteria_enable, mouter, mmisc, minner, vouter,\ + vinner, vmisc) + +const char *parse_fs_hdrs(struct trace_seq *p, + u8 match_criteria_enable, + const u32 *mask_outer, + const u32 *mask_misc, + const u32 *mask_inner, + const u32 *value_outer, + const u32 *value_misc, + const u32 *value_inner); + +#define __parse_fs_dst(dst, counter_id) \ + parse_fs_dst(p, (const struct mlx5_flow_destination *)dst, counter_id) + +const char *parse_fs_dst(struct trace_seq *p, + const struct mlx5_flow_destination *dst, + u32 counter_id); + +TRACE_EVENT(mlx5_fs_add_ft, + TP_PROTO(const struct mlx5_flow_table *ft), + TP_ARGS(ft), + TP_STRUCT__entry( + __field(const struct mlx5_flow_table *, ft) + __field(u32, id) + __field(u32, level) + __field(u32, type) + ), + TP_fast_assign( + __entry->ft = ft; + __entry->id = ft->id; + __entry->level = ft->level; + __entry->type = ft->type; + ), + TP_printk("ft=%p id=%u level=%u type=%u \n", + __entry->ft, __entry->id, __entry->level, __entry->type) + ); + +TRACE_EVENT(mlx5_fs_del_ft, + TP_PROTO(const struct mlx5_flow_table *ft), + TP_ARGS(ft), + TP_STRUCT__entry( + __field(const struct mlx5_flow_table *, ft) + __field(u32, id) + ), + TP_fast_assign( + __entry->ft = ft; + __entry->id = ft->id; + + ), + TP_printk("ft=%p id=%u\n", + __entry->ft, __entry->id) + ); + +TRACE_EVENT(mlx5_fs_add_fg, + TP_PROTO(const struct mlx5_flow_group *fg), + TP_ARGS(fg), + TP_STRUCT__entry( + __field(const struct mlx5_flow_group *, fg) + __field(const struct mlx5_flow_table *, ft) + __field(u32, start_index) + __field(u32, end_index) + __field(u32, id) + __field(u8, mask_enable) + __array(u32, mask_outer, MLX5_ST_SZ_DW(fte_match_set_lyr_2_4)) + __array(u32, mask_inner, MLX5_ST_SZ_DW(fte_match_set_lyr_2_4)) + __array(u32, mask_misc, MLX5_ST_SZ_DW(fte_match_set_misc)) + ), + TP_fast_assign( + __entry->fg = fg; + fs_get_obj(__entry->ft, fg->node.parent); + __entry->start_index = fg->start_index; + __entry->end_index = fg->start_index + fg->max_ftes; + __entry->id = fg->id; + __entry->mask_enable = fg->mask.match_criteria_enable; + memcpy(__entry->mask_outer, + MLX5_ADDR_OF(fte_match_param, + &fg->mask.match_criteria, + outer_headers), + sizeof(__entry->mask_outer)); + memcpy(__entry->mask_inner, + MLX5_ADDR_OF(fte_match_param, + &fg->mask.match_criteria, + inner_headers), + sizeof(__entry->mask_inner)); + memcpy(__entry->mask_misc, + MLX5_ADDR_OF(fte_match_param, + &fg->mask.match_criteria, + misc_parameters), + sizeof(__entry->mask_misc)); + + ), + TP_printk("fg=%p ft=%p id=%u start=%u end=%u bit_mask=%02x %s\n", + __entry->fg, __entry->ft, __entry->id, + __entry->start_index, __entry->end_index, + __entry->mask_enable, + __parse_fs_hdrs(__entry->mask_enable, + __entry->mask_outer, + __entry->mask_misc, + __entry->mask_inner, + __entry->mask_outer, + __entry->mask_misc, + __entry->mask_inner)) + ); + +TRACE_EVENT(mlx5_fs_del_fg, + TP_PROTO(const struct mlx5_flow_group *fg), + TP_ARGS(fg), + TP_STRUCT__entry( + __field(const struct mlx5_flow_group *, fg) + __field(u32, id) + ), + TP_fast_assign( + __entry->fg = fg; + __entry->id = fg->id; + + ), + TP_printk("fg=%p id=%u\n", + __entry->fg, __entry->id) + ); + +#define ACTION_FLAGS \ + {MLX5_FLOW_CONTEXT_ACTION_ALLOW, "ALLOW"},\ + {MLX5_FLOW_CONTEXT_ACTION_DROP, "DROP"},\ + {MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, "FWD"},\ + {MLX5_FLOW_CONTEXT_ACTION_COUNT, "CNT"},\ + {MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT, "REFORMAT"},\ + {MLX5_FLOW_CONTEXT_ACTION_DECAP, "DECAP"},\ + {MLX5_FLOW_CONTEXT_ACTION_MOD_HDR, "MOD_HDR"},\ + {MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH, "VLAN_PUSH"},\ + {MLX5_FLOW_CONTEXT_ACTION_VLAN_POP, "VLAN_POP"},\ + {MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2, "VLAN_PUSH_2"},\ + {MLX5_FLOW_CONTEXT_ACTION_VLAN_POP_2, "VLAN_POP_2"},\ + {MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO, "NEXT_PRIO"} + +TRACE_EVENT(mlx5_fs_set_fte, + TP_PROTO(const struct fs_fte *fte, int new_fte), + TP_ARGS(fte, new_fte), + TP_STRUCT__entry( + __field(const struct fs_fte *, fte) + __field(const struct mlx5_flow_group *, fg) + __field(u32, group_index) + __field(u32, index) + __field(u32, action) + __field(u32, flow_tag) + __field(u32, flow_source) + __field(u8, mask_enable) + __field(int, new_fte) + __array(u32, mask_outer, MLX5_ST_SZ_DW(fte_match_set_lyr_2_4)) + __array(u32, mask_inner, MLX5_ST_SZ_DW(fte_match_set_lyr_2_4)) + __array(u32, mask_misc, MLX5_ST_SZ_DW(fte_match_set_misc)) + __array(u32, value_outer, MLX5_ST_SZ_DW(fte_match_set_lyr_2_4)) + __array(u32, value_inner, MLX5_ST_SZ_DW(fte_match_set_lyr_2_4)) + __array(u32, value_misc, MLX5_ST_SZ_DW(fte_match_set_misc)) + ), + TP_fast_assign( + __entry->fte = fte; + __entry->new_fte = new_fte; + fs_get_obj(__entry->fg, fte->node.parent); + __entry->group_index = __entry->fg->id; + __entry->index = fte->index; + __entry->action = fte->action.action; + __entry->mask_enable = __entry->fg->mask.match_criteria_enable; + __entry->flow_tag = fte->flow_context.flow_tag; + __entry->flow_source = fte->flow_context.flow_source; + memcpy(__entry->mask_outer, + MLX5_ADDR_OF(fte_match_param, + &__entry->fg->mask.match_criteria, + outer_headers), + sizeof(__entry->mask_outer)); + memcpy(__entry->mask_inner, + MLX5_ADDR_OF(fte_match_param, + &__entry->fg->mask.match_criteria, + inner_headers), + sizeof(__entry->mask_inner)); + memcpy(__entry->mask_misc, + MLX5_ADDR_OF(fte_match_param, + &__entry->fg->mask.match_criteria, + misc_parameters), + sizeof(__entry->mask_misc)); + memcpy(__entry->value_outer, + MLX5_ADDR_OF(fte_match_param, + &fte->val, + outer_headers), + sizeof(__entry->value_outer)); + memcpy(__entry->value_inner, + MLX5_ADDR_OF(fte_match_param, + &fte->val, + inner_headers), + sizeof(__entry->value_inner)); + memcpy(__entry->value_misc, + MLX5_ADDR_OF(fte_match_param, + &fte->val, + misc_parameters), + sizeof(__entry->value_misc)); + + ), + TP_printk("op=%s fte=%p fg=%p index=%u group_index=%u action=<%s> flow_tag=%x %s\n", + __entry->new_fte ? "add" : "set", + __entry->fte, __entry->fg, __entry->index, + __entry->group_index, __print_flags(__entry->action, "|", + ACTION_FLAGS), + __entry->flow_tag, + __parse_fs_hdrs(__entry->mask_enable, + __entry->mask_outer, + __entry->mask_misc, + __entry->mask_inner, + __entry->value_outer, + __entry->value_misc, + __entry->value_inner)) + ); + +TRACE_EVENT(mlx5_fs_del_fte, + TP_PROTO(const struct fs_fte *fte), + TP_ARGS(fte), + TP_STRUCT__entry( + __field(const struct fs_fte *, fte) + __field(u32, index) + ), + TP_fast_assign( + __entry->fte = fte; + __entry->index = fte->index; + + ), + TP_printk("fte=%p index=%u\n", + __entry->fte, __entry->index) + ); + +TRACE_EVENT(mlx5_fs_add_rule, + TP_PROTO(const struct mlx5_flow_rule *rule), + TP_ARGS(rule), + TP_STRUCT__entry( + __field(const struct mlx5_flow_rule *, rule) + __field(const struct fs_fte *, fte) + __field(u32, sw_action) + __field(u32, index) + __field(u32, counter_id) + __array(u8, destination, sizeof(struct mlx5_flow_destination)) + ), + TP_fast_assign( + __entry->rule = rule; + fs_get_obj(__entry->fte, rule->node.parent); + __entry->index = __entry->fte->dests_size - 1; + __entry->sw_action = rule->sw_action; + memcpy(__entry->destination, + &rule->dest_attr, + sizeof(__entry->destination)); + if (rule->dest_attr.type & + MLX5_FLOW_DESTINATION_TYPE_COUNTER) + __entry->counter_id = + rule->dest_attr.counter_id; + ), + TP_printk("rule=%p fte=%p index=%u sw_action=<%s> [dst] %s\n", + __entry->rule, __entry->fte, __entry->index, + __print_flags(__entry->sw_action, "|", ACTION_FLAGS), + __parse_fs_dst(__entry->destination, __entry->counter_id)) + ); + +TRACE_EVENT(mlx5_fs_del_rule, + TP_PROTO(const struct mlx5_flow_rule *rule), + TP_ARGS(rule), + TP_STRUCT__entry( + __field(const struct mlx5_flow_rule *, rule) + __field(const struct fs_fte *, fte) + ), + TP_fast_assign( + __entry->rule = rule; + fs_get_obj(__entry->fte, rule->node.parent); + ), + TP_printk("rule=%p fte=%p\n", + __entry->rule, __entry->fte) + ); +#endif + +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH ./diag +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_FILE fs_tracepoint +#include <trace/define_trace.h> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c new file mode 100644 index 000000000..3ba54ffa5 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c @@ -0,0 +1,1154 @@ +/* + * Copyright (c) 2018, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#define CREATE_TRACE_POINTS +#include "lib/eq.h" +#include "fw_tracer.h" +#include "fw_tracer_tracepoint.h" + +static int mlx5_query_mtrc_caps(struct mlx5_fw_tracer *tracer) +{ + u32 *string_db_base_address_out = tracer->str_db.base_address_out; + u32 *string_db_size_out = tracer->str_db.size_out; + struct mlx5_core_dev *dev = tracer->dev; + u32 out[MLX5_ST_SZ_DW(mtrc_cap)] = {0}; + u32 in[MLX5_ST_SZ_DW(mtrc_cap)] = {0}; + void *mtrc_cap_sp; + int err, i; + + err = mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out), + MLX5_REG_MTRC_CAP, 0, 0); + if (err) { + mlx5_core_warn(dev, "FWTracer: Error reading tracer caps %d\n", + err); + return err; + } + + if (!MLX5_GET(mtrc_cap, out, trace_to_memory)) { + mlx5_core_dbg(dev, "FWTracer: Device does not support logging traces to memory\n"); + return -ENOTSUPP; + } + + tracer->trc_ver = MLX5_GET(mtrc_cap, out, trc_ver); + tracer->str_db.first_string_trace = + MLX5_GET(mtrc_cap, out, first_string_trace); + tracer->str_db.num_string_trace = + MLX5_GET(mtrc_cap, out, num_string_trace); + tracer->str_db.num_string_db = MLX5_GET(mtrc_cap, out, num_string_db); + tracer->owner = !!MLX5_GET(mtrc_cap, out, trace_owner); + tracer->str_db.loaded = false; + + for (i = 0; i < tracer->str_db.num_string_db; i++) { + mtrc_cap_sp = MLX5_ADDR_OF(mtrc_cap, out, string_db_param[i]); + string_db_base_address_out[i] = MLX5_GET(mtrc_string_db_param, + mtrc_cap_sp, + string_db_base_address); + string_db_size_out[i] = MLX5_GET(mtrc_string_db_param, + mtrc_cap_sp, string_db_size); + } + + return err; +} + +static int mlx5_set_mtrc_caps_trace_owner(struct mlx5_fw_tracer *tracer, + u32 *out, u32 out_size, + u8 trace_owner) +{ + struct mlx5_core_dev *dev = tracer->dev; + u32 in[MLX5_ST_SZ_DW(mtrc_cap)] = {0}; + + MLX5_SET(mtrc_cap, in, trace_owner, trace_owner); + + return mlx5_core_access_reg(dev, in, sizeof(in), out, out_size, + MLX5_REG_MTRC_CAP, 0, 1); +} + +static int mlx5_fw_tracer_ownership_acquire(struct mlx5_fw_tracer *tracer) +{ + struct mlx5_core_dev *dev = tracer->dev; + u32 out[MLX5_ST_SZ_DW(mtrc_cap)] = {0}; + int err; + + err = mlx5_set_mtrc_caps_trace_owner(tracer, out, sizeof(out), + MLX5_FW_TRACER_ACQUIRE_OWNERSHIP); + if (err) { + mlx5_core_warn(dev, "FWTracer: Acquire tracer ownership failed %d\n", + err); + return err; + } + + tracer->owner = !!MLX5_GET(mtrc_cap, out, trace_owner); + + if (!tracer->owner) + return -EBUSY; + + return 0; +} + +static void mlx5_fw_tracer_ownership_release(struct mlx5_fw_tracer *tracer) +{ + u32 out[MLX5_ST_SZ_DW(mtrc_cap)] = {0}; + + mlx5_set_mtrc_caps_trace_owner(tracer, out, sizeof(out), + MLX5_FW_TRACER_RELEASE_OWNERSHIP); + tracer->owner = false; +} + +static int mlx5_fw_tracer_create_log_buf(struct mlx5_fw_tracer *tracer) +{ + struct mlx5_core_dev *dev = tracer->dev; + struct device *ddev; + dma_addr_t dma; + void *buff; + gfp_t gfp; + int err; + + tracer->buff.size = TRACE_BUFFER_SIZE_BYTE; + + gfp = GFP_KERNEL | __GFP_ZERO; + buff = (void *)__get_free_pages(gfp, + get_order(tracer->buff.size)); + if (!buff) { + err = -ENOMEM; + mlx5_core_warn(dev, "FWTracer: Failed to allocate pages, %d\n", err); + return err; + } + tracer->buff.log_buf = buff; + + ddev = mlx5_core_dma_dev(dev); + dma = dma_map_single(ddev, buff, tracer->buff.size, DMA_FROM_DEVICE); + if (dma_mapping_error(ddev, dma)) { + mlx5_core_warn(dev, "FWTracer: Unable to map DMA: %d\n", + dma_mapping_error(ddev, dma)); + err = -ENOMEM; + goto free_pages; + } + tracer->buff.dma = dma; + + return 0; + +free_pages: + free_pages((unsigned long)tracer->buff.log_buf, get_order(tracer->buff.size)); + + return err; +} + +static void mlx5_fw_tracer_destroy_log_buf(struct mlx5_fw_tracer *tracer) +{ + struct mlx5_core_dev *dev = tracer->dev; + struct device *ddev; + + if (!tracer->buff.log_buf) + return; + + ddev = mlx5_core_dma_dev(dev); + dma_unmap_single(ddev, tracer->buff.dma, tracer->buff.size, DMA_FROM_DEVICE); + free_pages((unsigned long)tracer->buff.log_buf, get_order(tracer->buff.size)); +} + +static int mlx5_fw_tracer_create_mkey(struct mlx5_fw_tracer *tracer) +{ + struct mlx5_core_dev *dev = tracer->dev; + int err, inlen, i; + __be64 *mtt; + void *mkc; + u32 *in; + + inlen = MLX5_ST_SZ_BYTES(create_mkey_in) + + sizeof(*mtt) * round_up(TRACER_BUFFER_PAGE_NUM, 2); + + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + MLX5_SET(create_mkey_in, in, translations_octword_actual_size, + DIV_ROUND_UP(TRACER_BUFFER_PAGE_NUM, 2)); + mtt = (__be64 *)MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt); + for (i = 0 ; i < TRACER_BUFFER_PAGE_NUM ; i++) + mtt[i] = cpu_to_be64(tracer->buff.dma + i * PAGE_SIZE); + + mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); + MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_MTT); + MLX5_SET(mkc, mkc, lr, 1); + MLX5_SET(mkc, mkc, lw, 1); + MLX5_SET(mkc, mkc, pd, tracer->buff.pdn); + MLX5_SET(mkc, mkc, bsf_octword_size, 0); + MLX5_SET(mkc, mkc, qpn, 0xffffff); + MLX5_SET(mkc, mkc, log_page_size, PAGE_SHIFT); + MLX5_SET(mkc, mkc, translations_octword_size, + DIV_ROUND_UP(TRACER_BUFFER_PAGE_NUM, 2)); + MLX5_SET64(mkc, mkc, start_addr, tracer->buff.dma); + MLX5_SET64(mkc, mkc, len, tracer->buff.size); + err = mlx5_core_create_mkey(dev, &tracer->buff.mkey, in, inlen); + if (err) + mlx5_core_warn(dev, "FWTracer: Failed to create mkey, %d\n", err); + + kvfree(in); + + return err; +} + +static void mlx5_fw_tracer_free_strings_db(struct mlx5_fw_tracer *tracer) +{ + u32 num_string_db = tracer->str_db.num_string_db; + int i; + + for (i = 0; i < num_string_db; i++) { + kfree(tracer->str_db.buffer[i]); + tracer->str_db.buffer[i] = NULL; + } +} + +static int mlx5_fw_tracer_allocate_strings_db(struct mlx5_fw_tracer *tracer) +{ + u32 *string_db_size_out = tracer->str_db.size_out; + u32 num_string_db = tracer->str_db.num_string_db; + int i; + + for (i = 0; i < num_string_db; i++) { + tracer->str_db.buffer[i] = kzalloc(string_db_size_out[i], GFP_KERNEL); + if (!tracer->str_db.buffer[i]) + goto free_strings_db; + } + + return 0; + +free_strings_db: + mlx5_fw_tracer_free_strings_db(tracer); + return -ENOMEM; +} + +static void +mlx5_fw_tracer_init_saved_traces_array(struct mlx5_fw_tracer *tracer) +{ + tracer->st_arr.saved_traces_index = 0; + mutex_init(&tracer->st_arr.lock); +} + +static void +mlx5_fw_tracer_clean_saved_traces_array(struct mlx5_fw_tracer *tracer) +{ + mutex_destroy(&tracer->st_arr.lock); +} + +static void mlx5_tracer_read_strings_db(struct work_struct *work) +{ + struct mlx5_fw_tracer *tracer = container_of(work, struct mlx5_fw_tracer, + read_fw_strings_work); + u32 num_of_reads, num_string_db = tracer->str_db.num_string_db; + struct mlx5_core_dev *dev = tracer->dev; + u32 in[MLX5_ST_SZ_DW(mtrc_cap)] = {0}; + u32 leftovers, offset; + int err = 0, i, j; + u32 *out, outlen; + void *out_value; + + outlen = MLX5_ST_SZ_BYTES(mtrc_stdb) + STRINGS_DB_READ_SIZE_BYTES; + out = kzalloc(outlen, GFP_KERNEL); + if (!out) { + err = -ENOMEM; + goto out; + } + + for (i = 0; i < num_string_db; i++) { + offset = 0; + MLX5_SET(mtrc_stdb, in, string_db_index, i); + num_of_reads = tracer->str_db.size_out[i] / + STRINGS_DB_READ_SIZE_BYTES; + leftovers = (tracer->str_db.size_out[i] % + STRINGS_DB_READ_SIZE_BYTES) / + STRINGS_DB_LEFTOVER_SIZE_BYTES; + + MLX5_SET(mtrc_stdb, in, read_size, STRINGS_DB_READ_SIZE_BYTES); + for (j = 0; j < num_of_reads; j++) { + MLX5_SET(mtrc_stdb, in, start_offset, offset); + + err = mlx5_core_access_reg(dev, in, sizeof(in), out, + outlen, MLX5_REG_MTRC_STDB, + 0, 1); + if (err) { + mlx5_core_dbg(dev, "FWTracer: Failed to read strings DB %d\n", + err); + goto out_free; + } + + out_value = MLX5_ADDR_OF(mtrc_stdb, out, string_db_data); + memcpy(tracer->str_db.buffer[i] + offset, out_value, + STRINGS_DB_READ_SIZE_BYTES); + offset += STRINGS_DB_READ_SIZE_BYTES; + } + + /* Strings database is aligned to 64, need to read leftovers*/ + MLX5_SET(mtrc_stdb, in, read_size, + STRINGS_DB_LEFTOVER_SIZE_BYTES); + for (j = 0; j < leftovers; j++) { + MLX5_SET(mtrc_stdb, in, start_offset, offset); + + err = mlx5_core_access_reg(dev, in, sizeof(in), out, + outlen, MLX5_REG_MTRC_STDB, + 0, 1); + if (err) { + mlx5_core_dbg(dev, "FWTracer: Failed to read strings DB %d\n", + err); + goto out_free; + } + + out_value = MLX5_ADDR_OF(mtrc_stdb, out, string_db_data); + memcpy(tracer->str_db.buffer[i] + offset, out_value, + STRINGS_DB_LEFTOVER_SIZE_BYTES); + offset += STRINGS_DB_LEFTOVER_SIZE_BYTES; + } + } + + tracer->str_db.loaded = true; + +out_free: + kfree(out); +out: + return; +} + +static void mlx5_fw_tracer_arm(struct mlx5_core_dev *dev) +{ + u32 out[MLX5_ST_SZ_DW(mtrc_ctrl)] = {0}; + u32 in[MLX5_ST_SZ_DW(mtrc_ctrl)] = {0}; + int err; + + MLX5_SET(mtrc_ctrl, in, arm_event, 1); + + err = mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out), + MLX5_REG_MTRC_CTRL, 0, 1); + if (err) + mlx5_core_warn(dev, "FWTracer: Failed to arm tracer event %d\n", err); +} + +static const char *VAL_PARM = "%llx"; +static const char *REPLACE_64_VAL_PARM = "%x%x"; +static const char *PARAM_CHAR = "%"; + +static int mlx5_tracer_message_hash(u32 message_id) +{ + return jhash_1word(message_id, 0) & (MESSAGE_HASH_SIZE - 1); +} + +static struct tracer_string_format *mlx5_tracer_message_insert(struct mlx5_fw_tracer *tracer, + struct tracer_event *tracer_event) +{ + struct hlist_head *head = + &tracer->hash[mlx5_tracer_message_hash(tracer_event->string_event.tmsn)]; + struct tracer_string_format *cur_string; + + cur_string = kzalloc(sizeof(*cur_string), GFP_KERNEL); + if (!cur_string) + return NULL; + + hlist_add_head(&cur_string->hlist, head); + + return cur_string; +} + +static struct tracer_string_format *mlx5_tracer_get_string(struct mlx5_fw_tracer *tracer, + struct tracer_event *tracer_event) +{ + struct tracer_string_format *cur_string; + u32 str_ptr, offset; + int i; + + str_ptr = tracer_event->string_event.string_param; + + for (i = 0; i < tracer->str_db.num_string_db; i++) { + if (str_ptr > tracer->str_db.base_address_out[i] && + str_ptr < tracer->str_db.base_address_out[i] + + tracer->str_db.size_out[i]) { + offset = str_ptr - tracer->str_db.base_address_out[i]; + /* add it to the hash */ + cur_string = mlx5_tracer_message_insert(tracer, tracer_event); + if (!cur_string) + return NULL; + cur_string->string = (char *)(tracer->str_db.buffer[i] + + offset); + return cur_string; + } + } + + return NULL; +} + +static void mlx5_tracer_clean_message(struct tracer_string_format *str_frmt) +{ + hlist_del(&str_frmt->hlist); + kfree(str_frmt); +} + +static int mlx5_tracer_get_num_of_params(char *str) +{ + char *substr, *pstr = str; + int num_of_params = 0; + + /* replace %llx with %x%x */ + substr = strstr(pstr, VAL_PARM); + while (substr) { + memcpy(substr, REPLACE_64_VAL_PARM, 4); + pstr = substr; + substr = strstr(pstr, VAL_PARM); + } + + /* count all the % characters */ + substr = strstr(str, PARAM_CHAR); + while (substr) { + num_of_params += 1; + str = substr + 1; + substr = strstr(str, PARAM_CHAR); + } + + return num_of_params; +} + +static struct tracer_string_format *mlx5_tracer_message_find(struct hlist_head *head, + u8 event_id, u32 tmsn) +{ + struct tracer_string_format *message; + + hlist_for_each_entry(message, head, hlist) + if (message->event_id == event_id && message->tmsn == tmsn) + return message; + + return NULL; +} + +static struct tracer_string_format *mlx5_tracer_message_get(struct mlx5_fw_tracer *tracer, + struct tracer_event *tracer_event) +{ + struct hlist_head *head = + &tracer->hash[mlx5_tracer_message_hash(tracer_event->string_event.tmsn)]; + + return mlx5_tracer_message_find(head, tracer_event->event_id, tracer_event->string_event.tmsn); +} + +static void poll_trace(struct mlx5_fw_tracer *tracer, + struct tracer_event *tracer_event, u64 *trace) +{ + u32 timestamp_low, timestamp_mid, timestamp_high, urts; + + tracer_event->event_id = MLX5_GET(tracer_event, trace, event_id); + tracer_event->lost_event = MLX5_GET(tracer_event, trace, lost); + + switch (tracer_event->event_id) { + case TRACER_EVENT_TYPE_TIMESTAMP: + tracer_event->type = TRACER_EVENT_TYPE_TIMESTAMP; + urts = MLX5_GET(tracer_timestamp_event, trace, urts); + if (tracer->trc_ver == 0) + tracer_event->timestamp_event.unreliable = !!(urts >> 2); + else + tracer_event->timestamp_event.unreliable = !!(urts & 1); + + timestamp_low = MLX5_GET(tracer_timestamp_event, + trace, timestamp7_0); + timestamp_mid = MLX5_GET(tracer_timestamp_event, + trace, timestamp39_8); + timestamp_high = MLX5_GET(tracer_timestamp_event, + trace, timestamp52_40); + + tracer_event->timestamp_event.timestamp = + ((u64)timestamp_high << 40) | + ((u64)timestamp_mid << 8) | + (u64)timestamp_low; + break; + default: + if (tracer_event->event_id >= tracer->str_db.first_string_trace && + tracer_event->event_id <= tracer->str_db.first_string_trace + + tracer->str_db.num_string_trace) { + tracer_event->type = TRACER_EVENT_TYPE_STRING; + tracer_event->string_event.timestamp = + MLX5_GET(tracer_string_event, trace, timestamp); + tracer_event->string_event.string_param = + MLX5_GET(tracer_string_event, trace, string_param); + tracer_event->string_event.tmsn = + MLX5_GET(tracer_string_event, trace, tmsn); + tracer_event->string_event.tdsn = + MLX5_GET(tracer_string_event, trace, tdsn); + } else { + tracer_event->type = TRACER_EVENT_TYPE_UNRECOGNIZED; + } + break; + } +} + +static u64 get_block_timestamp(struct mlx5_fw_tracer *tracer, u64 *ts_event) +{ + struct tracer_event tracer_event; + u8 event_id; + + event_id = MLX5_GET(tracer_event, ts_event, event_id); + + if (event_id == TRACER_EVENT_TYPE_TIMESTAMP) + poll_trace(tracer, &tracer_event, ts_event); + else + tracer_event.timestamp_event.timestamp = 0; + + return tracer_event.timestamp_event.timestamp; +} + +static void mlx5_fw_tracer_clean_print_hash(struct mlx5_fw_tracer *tracer) +{ + struct tracer_string_format *str_frmt; + struct hlist_node *n; + int i; + + for (i = 0; i < MESSAGE_HASH_SIZE; i++) { + hlist_for_each_entry_safe(str_frmt, n, &tracer->hash[i], hlist) + mlx5_tracer_clean_message(str_frmt); + } +} + +static void mlx5_fw_tracer_clean_ready_list(struct mlx5_fw_tracer *tracer) +{ + struct tracer_string_format *str_frmt, *tmp_str; + + list_for_each_entry_safe(str_frmt, tmp_str, &tracer->ready_strings_list, + list) + list_del(&str_frmt->list); +} + +static void mlx5_fw_tracer_save_trace(struct mlx5_fw_tracer *tracer, + u64 timestamp, bool lost, + u8 event_id, char *msg) +{ + struct mlx5_fw_trace_data *trace_data; + + mutex_lock(&tracer->st_arr.lock); + trace_data = &tracer->st_arr.straces[tracer->st_arr.saved_traces_index]; + trace_data->timestamp = timestamp; + trace_data->lost = lost; + trace_data->event_id = event_id; + strscpy_pad(trace_data->msg, msg, TRACE_STR_MSG); + + tracer->st_arr.saved_traces_index = + (tracer->st_arr.saved_traces_index + 1) & (SAVED_TRACES_NUM - 1); + mutex_unlock(&tracer->st_arr.lock); +} + +static noinline +void mlx5_tracer_print_trace(struct tracer_string_format *str_frmt, + struct mlx5_core_dev *dev, + u64 trace_timestamp) +{ + char tmp[512]; + + snprintf(tmp, sizeof(tmp), str_frmt->string, + str_frmt->params[0], + str_frmt->params[1], + str_frmt->params[2], + str_frmt->params[3], + str_frmt->params[4], + str_frmt->params[5], + str_frmt->params[6]); + + trace_mlx5_fw(dev->tracer, trace_timestamp, str_frmt->lost, + str_frmt->event_id, tmp); + + mlx5_fw_tracer_save_trace(dev->tracer, trace_timestamp, + str_frmt->lost, str_frmt->event_id, tmp); + + /* remove it from hash */ + mlx5_tracer_clean_message(str_frmt); +} + +static int mlx5_tracer_handle_string_trace(struct mlx5_fw_tracer *tracer, + struct tracer_event *tracer_event) +{ + struct tracer_string_format *cur_string; + + if (tracer_event->string_event.tdsn == 0) { + cur_string = mlx5_tracer_get_string(tracer, tracer_event); + if (!cur_string) + return -1; + + cur_string->num_of_params = mlx5_tracer_get_num_of_params(cur_string->string); + cur_string->last_param_num = 0; + cur_string->event_id = tracer_event->event_id; + cur_string->tmsn = tracer_event->string_event.tmsn; + cur_string->timestamp = tracer_event->string_event.timestamp; + cur_string->lost = tracer_event->lost_event; + if (cur_string->num_of_params == 0) /* trace with no params */ + list_add_tail(&cur_string->list, &tracer->ready_strings_list); + } else { + cur_string = mlx5_tracer_message_get(tracer, tracer_event); + if (!cur_string) { + pr_debug("%s Got string event for unknown string tmsn: %d\n", + __func__, tracer_event->string_event.tmsn); + return -1; + } + cur_string->last_param_num += 1; + if (cur_string->last_param_num > TRACER_MAX_PARAMS) { + pr_debug("%s Number of params exceeds the max (%d)\n", + __func__, TRACER_MAX_PARAMS); + list_add_tail(&cur_string->list, &tracer->ready_strings_list); + return 0; + } + /* keep the new parameter */ + cur_string->params[cur_string->last_param_num - 1] = + tracer_event->string_event.string_param; + if (cur_string->last_param_num == cur_string->num_of_params) + list_add_tail(&cur_string->list, &tracer->ready_strings_list); + } + + return 0; +} + +static void mlx5_tracer_handle_timestamp_trace(struct mlx5_fw_tracer *tracer, + struct tracer_event *tracer_event) +{ + struct tracer_timestamp_event timestamp_event = + tracer_event->timestamp_event; + struct tracer_string_format *str_frmt, *tmp_str; + struct mlx5_core_dev *dev = tracer->dev; + u64 trace_timestamp; + + list_for_each_entry_safe(str_frmt, tmp_str, &tracer->ready_strings_list, list) { + list_del(&str_frmt->list); + if (str_frmt->timestamp < (timestamp_event.timestamp & MASK_6_0)) + trace_timestamp = (timestamp_event.timestamp & MASK_52_7) | + (str_frmt->timestamp & MASK_6_0); + else + trace_timestamp = ((timestamp_event.timestamp - 1) & MASK_52_7) | + (str_frmt->timestamp & MASK_6_0); + + mlx5_tracer_print_trace(str_frmt, dev, trace_timestamp); + } +} + +static int mlx5_tracer_handle_trace(struct mlx5_fw_tracer *tracer, + struct tracer_event *tracer_event) +{ + if (tracer_event->type == TRACER_EVENT_TYPE_STRING) { + mlx5_tracer_handle_string_trace(tracer, tracer_event); + } else if (tracer_event->type == TRACER_EVENT_TYPE_TIMESTAMP) { + if (!tracer_event->timestamp_event.unreliable) + mlx5_tracer_handle_timestamp_trace(tracer, tracer_event); + } else { + pr_debug("%s Got unrecognised type %d for parsing, exiting..\n", + __func__, tracer_event->type); + } + return 0; +} + +static void mlx5_fw_tracer_handle_traces(struct work_struct *work) +{ + struct mlx5_fw_tracer *tracer = + container_of(work, struct mlx5_fw_tracer, handle_traces_work); + u64 block_timestamp, last_block_timestamp, tmp_trace_block[TRACES_PER_BLOCK]; + u32 block_count, start_offset, prev_start_offset, prev_consumer_index; + u32 trace_event_size = MLX5_ST_SZ_BYTES(tracer_event); + struct mlx5_core_dev *dev = tracer->dev; + struct tracer_event tracer_event; + int i; + + mlx5_core_dbg(dev, "FWTracer: Handle Trace event, owner=(%d)\n", tracer->owner); + if (!tracer->owner) + return; + + if (unlikely(!tracer->str_db.loaded)) + goto arm; + + block_count = tracer->buff.size / TRACER_BLOCK_SIZE_BYTE; + start_offset = tracer->buff.consumer_index * TRACER_BLOCK_SIZE_BYTE; + + /* Copy the block to local buffer to avoid HW override while being processed */ + memcpy(tmp_trace_block, tracer->buff.log_buf + start_offset, + TRACER_BLOCK_SIZE_BYTE); + + block_timestamp = + get_block_timestamp(tracer, &tmp_trace_block[TRACES_PER_BLOCK - 1]); + + while (block_timestamp > tracer->last_timestamp) { + /* Check block override if it's not the first block */ + if (tracer->last_timestamp) { + u64 *ts_event; + /* To avoid block override be the HW in case of buffer + * wraparound, the time stamp of the previous block + * should be compared to the last timestamp handled + * by the driver. + */ + prev_consumer_index = + (tracer->buff.consumer_index - 1) & (block_count - 1); + prev_start_offset = prev_consumer_index * TRACER_BLOCK_SIZE_BYTE; + + ts_event = tracer->buff.log_buf + prev_start_offset + + (TRACES_PER_BLOCK - 1) * trace_event_size; + last_block_timestamp = get_block_timestamp(tracer, ts_event); + /* If previous timestamp different from last stored + * timestamp then there is a good chance that the + * current buffer is overwritten and therefore should + * not be parsed. + */ + if (tracer->last_timestamp != last_block_timestamp) { + mlx5_core_warn(dev, "FWTracer: Events were lost\n"); + tracer->last_timestamp = block_timestamp; + tracer->buff.consumer_index = + (tracer->buff.consumer_index + 1) & (block_count - 1); + break; + } + } + + /* Parse events */ + for (i = 0; i < TRACES_PER_BLOCK ; i++) { + poll_trace(tracer, &tracer_event, &tmp_trace_block[i]); + mlx5_tracer_handle_trace(tracer, &tracer_event); + } + + tracer->buff.consumer_index = + (tracer->buff.consumer_index + 1) & (block_count - 1); + + tracer->last_timestamp = block_timestamp; + start_offset = tracer->buff.consumer_index * TRACER_BLOCK_SIZE_BYTE; + memcpy(tmp_trace_block, tracer->buff.log_buf + start_offset, + TRACER_BLOCK_SIZE_BYTE); + block_timestamp = get_block_timestamp(tracer, + &tmp_trace_block[TRACES_PER_BLOCK - 1]); + } + +arm: + mlx5_fw_tracer_arm(dev); +} + +static int mlx5_fw_tracer_set_mtrc_conf(struct mlx5_fw_tracer *tracer) +{ + struct mlx5_core_dev *dev = tracer->dev; + u32 out[MLX5_ST_SZ_DW(mtrc_conf)] = {0}; + u32 in[MLX5_ST_SZ_DW(mtrc_conf)] = {0}; + int err; + + MLX5_SET(mtrc_conf, in, trace_mode, TRACE_TO_MEMORY); + MLX5_SET(mtrc_conf, in, log_trace_buffer_size, + ilog2(TRACER_BUFFER_PAGE_NUM)); + MLX5_SET(mtrc_conf, in, trace_mkey, tracer->buff.mkey); + + err = mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out), + MLX5_REG_MTRC_CONF, 0, 1); + if (err) + mlx5_core_warn(dev, "FWTracer: Failed to set tracer configurations %d\n", err); + + tracer->buff.consumer_index = 0; + return err; +} + +static int mlx5_fw_tracer_set_mtrc_ctrl(struct mlx5_fw_tracer *tracer, u8 status, u8 arm) +{ + struct mlx5_core_dev *dev = tracer->dev; + u32 out[MLX5_ST_SZ_DW(mtrc_ctrl)] = {0}; + u32 in[MLX5_ST_SZ_DW(mtrc_ctrl)] = {0}; + int err; + + MLX5_SET(mtrc_ctrl, in, modify_field_select, TRACE_STATUS); + MLX5_SET(mtrc_ctrl, in, trace_status, status); + MLX5_SET(mtrc_ctrl, in, arm_event, arm); + + err = mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out), + MLX5_REG_MTRC_CTRL, 0, 1); + + if (!err && status) + tracer->last_timestamp = 0; + + return err; +} + +static int mlx5_fw_tracer_start(struct mlx5_fw_tracer *tracer) +{ + struct mlx5_core_dev *dev = tracer->dev; + int err; + + err = mlx5_fw_tracer_ownership_acquire(tracer); + if (err) { + mlx5_core_dbg(dev, "FWTracer: Ownership was not granted %d\n", err); + /* Don't fail since ownership can be acquired on a later FW event */ + return 0; + } + + err = mlx5_fw_tracer_set_mtrc_conf(tracer); + if (err) { + mlx5_core_warn(dev, "FWTracer: Failed to set tracer configuration %d\n", err); + goto release_ownership; + } + + /* enable tracer & trace events */ + err = mlx5_fw_tracer_set_mtrc_ctrl(tracer, 1, 1); + if (err) { + mlx5_core_warn(dev, "FWTracer: Failed to enable tracer %d\n", err); + goto release_ownership; + } + + mlx5_core_dbg(dev, "FWTracer: Ownership granted and active\n"); + return 0; + +release_ownership: + mlx5_fw_tracer_ownership_release(tracer); + return err; +} + +static void mlx5_fw_tracer_ownership_change(struct work_struct *work) +{ + struct mlx5_fw_tracer *tracer = + container_of(work, struct mlx5_fw_tracer, ownership_change_work); + + mlx5_core_dbg(tracer->dev, "FWTracer: ownership changed, current=(%d)\n", tracer->owner); + if (tracer->owner) { + mlx5_fw_tracer_ownership_acquire(tracer); + return; + } + + mlx5_fw_tracer_start(tracer); +} + +static int mlx5_fw_tracer_set_core_dump_reg(struct mlx5_core_dev *dev, + u32 *in, int size_in) +{ + u32 out[MLX5_ST_SZ_DW(core_dump_reg)] = {}; + + if (!MLX5_CAP_DEBUG(dev, core_dump_general) && + !MLX5_CAP_DEBUG(dev, core_dump_qp)) + return -EOPNOTSUPP; + + return mlx5_core_access_reg(dev, in, size_in, out, sizeof(out), + MLX5_REG_CORE_DUMP, 0, 1); +} + +int mlx5_fw_tracer_trigger_core_dump_general(struct mlx5_core_dev *dev) +{ + struct mlx5_fw_tracer *tracer = dev->tracer; + u32 in[MLX5_ST_SZ_DW(core_dump_reg)] = {}; + int err; + + if (!MLX5_CAP_DEBUG(dev, core_dump_general) || !tracer) + return -EOPNOTSUPP; + if (!tracer->owner) + return -EPERM; + + MLX5_SET(core_dump_reg, in, core_dump_type, 0x0); + + err = mlx5_fw_tracer_set_core_dump_reg(dev, in, sizeof(in)); + if (err) + return err; + queue_work(tracer->work_queue, &tracer->handle_traces_work); + flush_workqueue(tracer->work_queue); + return 0; +} + +static int +mlx5_devlink_fmsg_fill_trace(struct devlink_fmsg *fmsg, + struct mlx5_fw_trace_data *trace_data) +{ + int err; + + err = devlink_fmsg_obj_nest_start(fmsg); + if (err) + return err; + + err = devlink_fmsg_u64_pair_put(fmsg, "timestamp", trace_data->timestamp); + if (err) + return err; + + err = devlink_fmsg_bool_pair_put(fmsg, "lost", trace_data->lost); + if (err) + return err; + + err = devlink_fmsg_u8_pair_put(fmsg, "event_id", trace_data->event_id); + if (err) + return err; + + err = devlink_fmsg_string_pair_put(fmsg, "msg", trace_data->msg); + if (err) + return err; + + err = devlink_fmsg_obj_nest_end(fmsg); + if (err) + return err; + return 0; +} + +int mlx5_fw_tracer_get_saved_traces_objects(struct mlx5_fw_tracer *tracer, + struct devlink_fmsg *fmsg) +{ + struct mlx5_fw_trace_data *straces = tracer->st_arr.straces; + u32 index, start_index, end_index; + u32 saved_traces_index; + int err; + + if (!straces[0].timestamp) + return -ENOMSG; + + mutex_lock(&tracer->st_arr.lock); + saved_traces_index = tracer->st_arr.saved_traces_index; + if (straces[saved_traces_index].timestamp) + start_index = saved_traces_index; + else + start_index = 0; + end_index = (saved_traces_index - 1) & (SAVED_TRACES_NUM - 1); + + err = devlink_fmsg_arr_pair_nest_start(fmsg, "dump fw traces"); + if (err) + goto unlock; + index = start_index; + while (index != end_index) { + err = mlx5_devlink_fmsg_fill_trace(fmsg, &straces[index]); + if (err) + goto unlock; + + index = (index + 1) & (SAVED_TRACES_NUM - 1); + } + + err = devlink_fmsg_arr_pair_nest_end(fmsg); +unlock: + mutex_unlock(&tracer->st_arr.lock); + return err; +} + +/* Create software resources (Buffers, etc ..) */ +struct mlx5_fw_tracer *mlx5_fw_tracer_create(struct mlx5_core_dev *dev) +{ + struct mlx5_fw_tracer *tracer = NULL; + int err; + + if (!MLX5_CAP_MCAM_REG(dev, tracer_registers)) { + mlx5_core_dbg(dev, "FWTracer: Tracer capability not present\n"); + return NULL; + } + + tracer = kvzalloc(sizeof(*tracer), GFP_KERNEL); + if (!tracer) + return ERR_PTR(-ENOMEM); + + tracer->work_queue = create_singlethread_workqueue("mlx5_fw_tracer"); + if (!tracer->work_queue) { + err = -ENOMEM; + goto free_tracer; + } + + tracer->dev = dev; + + INIT_LIST_HEAD(&tracer->ready_strings_list); + INIT_WORK(&tracer->ownership_change_work, mlx5_fw_tracer_ownership_change); + INIT_WORK(&tracer->read_fw_strings_work, mlx5_tracer_read_strings_db); + INIT_WORK(&tracer->handle_traces_work, mlx5_fw_tracer_handle_traces); + + + err = mlx5_query_mtrc_caps(tracer); + if (err) { + mlx5_core_dbg(dev, "FWTracer: Failed to query capabilities %d\n", err); + goto destroy_workqueue; + } + + err = mlx5_fw_tracer_create_log_buf(tracer); + if (err) { + mlx5_core_warn(dev, "FWTracer: Create log buffer failed %d\n", err); + goto destroy_workqueue; + } + + err = mlx5_fw_tracer_allocate_strings_db(tracer); + if (err) { + mlx5_core_warn(dev, "FWTracer: Allocate strings database failed %d\n", err); + goto free_log_buf; + } + + mlx5_fw_tracer_init_saved_traces_array(tracer); + mlx5_core_dbg(dev, "FWTracer: Tracer created\n"); + + return tracer; + +free_log_buf: + mlx5_fw_tracer_destroy_log_buf(tracer); +destroy_workqueue: + tracer->dev = NULL; + destroy_workqueue(tracer->work_queue); +free_tracer: + kvfree(tracer); + return ERR_PTR(err); +} + +static int fw_tracer_event(struct notifier_block *nb, unsigned long action, void *data); + +/* Create HW resources + start tracer */ +int mlx5_fw_tracer_init(struct mlx5_fw_tracer *tracer) +{ + struct mlx5_core_dev *dev; + int err; + + if (IS_ERR_OR_NULL(tracer)) + return 0; + + dev = tracer->dev; + + if (!tracer->str_db.loaded) + queue_work(tracer->work_queue, &tracer->read_fw_strings_work); + + err = mlx5_core_alloc_pd(dev, &tracer->buff.pdn); + if (err) { + mlx5_core_warn(dev, "FWTracer: Failed to allocate PD %d\n", err); + goto err_cancel_work; + } + + err = mlx5_fw_tracer_create_mkey(tracer); + if (err) { + mlx5_core_warn(dev, "FWTracer: Failed to create mkey %d\n", err); + goto err_dealloc_pd; + } + + MLX5_NB_INIT(&tracer->nb, fw_tracer_event, DEVICE_TRACER); + mlx5_eq_notifier_register(dev, &tracer->nb); + + err = mlx5_fw_tracer_start(tracer); + if (err) { + mlx5_core_warn(dev, "FWTracer: Failed to start tracer %d\n", err); + goto err_notifier_unregister; + } + return 0; + +err_notifier_unregister: + mlx5_eq_notifier_unregister(dev, &tracer->nb); + mlx5_core_destroy_mkey(dev, tracer->buff.mkey); +err_dealloc_pd: + mlx5_core_dealloc_pd(dev, tracer->buff.pdn); +err_cancel_work: + cancel_work_sync(&tracer->read_fw_strings_work); + return err; +} + +/* Stop tracer + Cleanup HW resources */ +void mlx5_fw_tracer_cleanup(struct mlx5_fw_tracer *tracer) +{ + if (IS_ERR_OR_NULL(tracer)) + return; + + mlx5_core_dbg(tracer->dev, "FWTracer: Cleanup, is owner ? (%d)\n", + tracer->owner); + mlx5_eq_notifier_unregister(tracer->dev, &tracer->nb); + cancel_work_sync(&tracer->ownership_change_work); + cancel_work_sync(&tracer->handle_traces_work); + + if (tracer->owner) + mlx5_fw_tracer_ownership_release(tracer); + + mlx5_core_destroy_mkey(tracer->dev, tracer->buff.mkey); + mlx5_core_dealloc_pd(tracer->dev, tracer->buff.pdn); +} + +/* Free software resources (Buffers, etc ..) */ +void mlx5_fw_tracer_destroy(struct mlx5_fw_tracer *tracer) +{ + if (IS_ERR_OR_NULL(tracer)) + return; + + mlx5_core_dbg(tracer->dev, "FWTracer: Destroy\n"); + + cancel_work_sync(&tracer->read_fw_strings_work); + mlx5_fw_tracer_clean_ready_list(tracer); + mlx5_fw_tracer_clean_print_hash(tracer); + mlx5_fw_tracer_clean_saved_traces_array(tracer); + mlx5_fw_tracer_free_strings_db(tracer); + mlx5_fw_tracer_destroy_log_buf(tracer); + destroy_workqueue(tracer->work_queue); + kvfree(tracer); +} + +static int mlx5_fw_tracer_recreate_strings_db(struct mlx5_fw_tracer *tracer) +{ + struct mlx5_core_dev *dev; + int err; + + cancel_work_sync(&tracer->read_fw_strings_work); + mlx5_fw_tracer_clean_ready_list(tracer); + mlx5_fw_tracer_clean_print_hash(tracer); + mlx5_fw_tracer_clean_saved_traces_array(tracer); + mlx5_fw_tracer_free_strings_db(tracer); + + dev = tracer->dev; + err = mlx5_query_mtrc_caps(tracer); + if (err) { + mlx5_core_dbg(dev, "FWTracer: Failed to query capabilities %d\n", err); + return err; + } + + err = mlx5_fw_tracer_allocate_strings_db(tracer); + if (err) { + mlx5_core_warn(dev, "FWTracer: Allocate strings DB failed %d\n", err); + return err; + } + mlx5_fw_tracer_init_saved_traces_array(tracer); + + return 0; +} + +int mlx5_fw_tracer_reload(struct mlx5_fw_tracer *tracer) +{ + struct mlx5_core_dev *dev; + int err; + + if (IS_ERR_OR_NULL(tracer)) + return 0; + + dev = tracer->dev; + mlx5_fw_tracer_cleanup(tracer); + err = mlx5_fw_tracer_recreate_strings_db(tracer); + if (err) { + mlx5_core_warn(dev, "Failed to recreate FW tracer strings DB\n"); + return err; + } + err = mlx5_fw_tracer_init(tracer); + if (err) { + mlx5_core_warn(dev, "Failed to re-initialize FW tracer\n"); + return err; + } + + return 0; +} + +static int fw_tracer_event(struct notifier_block *nb, unsigned long action, void *data) +{ + struct mlx5_fw_tracer *tracer = mlx5_nb_cof(nb, struct mlx5_fw_tracer, nb); + struct mlx5_core_dev *dev = tracer->dev; + struct mlx5_eqe *eqe = data; + + switch (eqe->sub_type) { + case MLX5_TRACER_SUBTYPE_OWNERSHIP_CHANGE: + queue_work(tracer->work_queue, &tracer->ownership_change_work); + break; + case MLX5_TRACER_SUBTYPE_TRACES_AVAILABLE: + queue_work(tracer->work_queue, &tracer->handle_traces_work); + break; + default: + mlx5_core_dbg(dev, "FWTracer: Event with unrecognized subtype: sub_type %d\n", + eqe->sub_type); + } + + return NOTIFY_OK; +} + +EXPORT_TRACEPOINT_SYMBOL(mlx5_fw); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h new file mode 100644 index 000000000..4762b55b0 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h @@ -0,0 +1,196 @@ +/* + * Copyright (c) 2018, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __LIB_TRACER_H__ +#define __LIB_TRACER_H__ + +#include <linux/mlx5/driver.h> +#include "mlx5_core.h" + +#define STRINGS_DB_SECTIONS_NUM 8 +#define STRINGS_DB_READ_SIZE_BYTES 256 +#define STRINGS_DB_LEFTOVER_SIZE_BYTES 64 +#define TRACER_BUFFER_PAGE_NUM 64 +#define TRACER_BUFFER_CHUNK 4096 +#define TRACE_BUFFER_SIZE_BYTE (TRACER_BUFFER_PAGE_NUM * TRACER_BUFFER_CHUNK) + +#define TRACER_BLOCK_SIZE_BYTE 256 +#define TRACES_PER_BLOCK 32 + +#define TRACE_STR_MSG 256 +#define SAVED_TRACES_NUM 8192 + +#define TRACER_MAX_PARAMS 7 +#define MESSAGE_HASH_BITS 6 +#define MESSAGE_HASH_SIZE BIT(MESSAGE_HASH_BITS) + +#define MASK_52_7 (0x1FFFFFFFFFFF80) +#define MASK_6_0 (0x7F) + +struct mlx5_fw_trace_data { + u64 timestamp; + bool lost; + u8 event_id; + char msg[TRACE_STR_MSG]; +}; + +struct mlx5_fw_tracer { + struct mlx5_core_dev *dev; + struct mlx5_nb nb; + bool owner; + u8 trc_ver; + struct workqueue_struct *work_queue; + struct work_struct ownership_change_work; + struct work_struct read_fw_strings_work; + + /* Strings DB */ + struct { + u8 first_string_trace; + u8 num_string_trace; + u32 num_string_db; + u32 base_address_out[STRINGS_DB_SECTIONS_NUM]; + u32 size_out[STRINGS_DB_SECTIONS_NUM]; + void *buffer[STRINGS_DB_SECTIONS_NUM]; + bool loaded; + } str_db; + + /* Log Buffer */ + struct { + u32 pdn; + void *log_buf; + dma_addr_t dma; + u32 size; + u32 mkey; + u32 consumer_index; + } buff; + + /* Saved Traces Array */ + struct { + struct mlx5_fw_trace_data straces[SAVED_TRACES_NUM]; + u32 saved_traces_index; + struct mutex lock; /* Protect st_arr access */ + } st_arr; + + u64 last_timestamp; + struct work_struct handle_traces_work; + struct hlist_head hash[MESSAGE_HASH_SIZE]; + struct list_head ready_strings_list; +}; + +struct tracer_string_format { + char *string; + int params[TRACER_MAX_PARAMS]; + int num_of_params; + int last_param_num; + u8 event_id; + u32 tmsn; + struct hlist_node hlist; + struct list_head list; + u32 timestamp; + bool lost; +}; + +enum mlx5_fw_tracer_ownership_state { + MLX5_FW_TRACER_RELEASE_OWNERSHIP, + MLX5_FW_TRACER_ACQUIRE_OWNERSHIP, +}; + +enum tracer_ctrl_fields_select { + TRACE_STATUS = 1 << 0, +}; + +enum tracer_event_type { + TRACER_EVENT_TYPE_STRING, + TRACER_EVENT_TYPE_TIMESTAMP = 0xFF, + TRACER_EVENT_TYPE_UNRECOGNIZED, +}; + +enum tracing_mode { + TRACE_TO_MEMORY = 1 << 0, +}; + +struct tracer_timestamp_event { + u64 timestamp; + u8 unreliable; +}; + +struct tracer_string_event { + u32 timestamp; + u32 tmsn; + u32 tdsn; + u32 string_param; +}; + +struct tracer_event { + bool lost_event; + u32 type; + u8 event_id; + union { + struct tracer_string_event string_event; + struct tracer_timestamp_event timestamp_event; + }; +}; + +struct mlx5_ifc_tracer_event_bits { + u8 lost[0x1]; + u8 timestamp[0x7]; + u8 event_id[0x8]; + u8 event_data[0x30]; +}; + +struct mlx5_ifc_tracer_string_event_bits { + u8 lost[0x1]; + u8 timestamp[0x7]; + u8 event_id[0x8]; + u8 tmsn[0xd]; + u8 tdsn[0x3]; + u8 string_param[0x20]; +}; + +struct mlx5_ifc_tracer_timestamp_event_bits { + u8 timestamp7_0[0x8]; + u8 event_id[0x8]; + u8 urts[0x3]; + u8 timestamp52_40[0xd]; + u8 timestamp39_8[0x20]; +}; + +struct mlx5_fw_tracer *mlx5_fw_tracer_create(struct mlx5_core_dev *dev); +int mlx5_fw_tracer_init(struct mlx5_fw_tracer *tracer); +void mlx5_fw_tracer_cleanup(struct mlx5_fw_tracer *tracer); +void mlx5_fw_tracer_destroy(struct mlx5_fw_tracer *tracer); +int mlx5_fw_tracer_trigger_core_dump_general(struct mlx5_core_dev *dev); +int mlx5_fw_tracer_get_saved_traces_objects(struct mlx5_fw_tracer *tracer, + struct devlink_fmsg *fmsg); +int mlx5_fw_tracer_reload(struct mlx5_fw_tracer *tracer); + +#endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer_tracepoint.h b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer_tracepoint.h new file mode 100644 index 000000000..3038be575 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer_tracepoint.h @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2018, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#if !defined(__LIB_TRACER_TRACEPOINT_H__) || defined(TRACE_HEADER_MULTI_READ) +#define __LIB_TRACER_TRACEPOINT_H__ + +#include <linux/tracepoint.h> +#include "fw_tracer.h" + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM mlx5 + +/* Tracepoint for FWTracer messages: */ +TRACE_EVENT(mlx5_fw, + TP_PROTO(const struct mlx5_fw_tracer *tracer, u64 trace_timestamp, + bool lost, u8 event_id, const char *msg), + + TP_ARGS(tracer, trace_timestamp, lost, event_id, msg), + + TP_STRUCT__entry( + __string(dev_name, dev_name(tracer->dev->device)) + __field(u64, trace_timestamp) + __field(bool, lost) + __field(u8, event_id) + __string(msg, msg) + ), + + TP_fast_assign( + __assign_str(dev_name, + dev_name(tracer->dev->device)); + __entry->trace_timestamp = trace_timestamp; + __entry->lost = lost; + __entry->event_id = event_id; + __assign_str(msg, msg); + ), + + TP_printk("%s [0x%llx] %d [0x%x] %s", + __get_str(dev_name), + __entry->trace_timestamp, + __entry->lost, __entry->event_id, + __get_str(msg)) +); + +#endif + +#undef TRACE_INCLUDE_PATH +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_PATH ./diag +#define TRACE_INCLUDE_FILE fw_tracer_tracepoint +#include <trace/define_trace.h> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/rsc_dump.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/rsc_dump.c new file mode 100644 index 000000000..c5b560a8b --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/rsc_dump.c @@ -0,0 +1,311 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2019 Mellanox Technologies. */ + +#include "rsc_dump.h" +#include "lib/mlx5.h" + +#define MLX5_SGMT_TYPE(SGMT) MLX5_SGMT_TYPE_##SGMT +#define MLX5_SGMT_STR_ASSING(SGMT)[MLX5_SGMT_TYPE(SGMT)] = #SGMT +static const char *const mlx5_rsc_sgmt_name[] = { + MLX5_SGMT_STR_ASSING(HW_CQPC), + MLX5_SGMT_STR_ASSING(HW_SQPC), + MLX5_SGMT_STR_ASSING(HW_RQPC), + MLX5_SGMT_STR_ASSING(FULL_SRQC), + MLX5_SGMT_STR_ASSING(FULL_CQC), + MLX5_SGMT_STR_ASSING(FULL_EQC), + MLX5_SGMT_STR_ASSING(FULL_QPC), + MLX5_SGMT_STR_ASSING(SND_BUFF), + MLX5_SGMT_STR_ASSING(RCV_BUFF), + MLX5_SGMT_STR_ASSING(SRQ_BUFF), + MLX5_SGMT_STR_ASSING(CQ_BUFF), + MLX5_SGMT_STR_ASSING(EQ_BUFF), + MLX5_SGMT_STR_ASSING(SX_SLICE), + MLX5_SGMT_STR_ASSING(SX_SLICE_ALL), + MLX5_SGMT_STR_ASSING(RDB), + MLX5_SGMT_STR_ASSING(RX_SLICE_ALL), + MLX5_SGMT_STR_ASSING(PRM_QUERY_QP), + MLX5_SGMT_STR_ASSING(PRM_QUERY_CQ), + MLX5_SGMT_STR_ASSING(PRM_QUERY_MKEY), +}; + +struct mlx5_rsc_dump { + u32 pdn; + u32 mkey; + u32 number_of_menu_items; + u16 fw_segment_type[MLX5_SGMT_TYPE_NUM]; +}; + +struct mlx5_rsc_dump_cmd { + u64 mem_size; + u8 cmd[MLX5_ST_SZ_BYTES(resource_dump)]; +}; + +static int mlx5_rsc_dump_sgmt_get_by_name(char *name) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(mlx5_rsc_sgmt_name); i++) + if (!strcmp(name, mlx5_rsc_sgmt_name[i])) + return i; + + return -EINVAL; +} + +#define MLX5_RSC_DUMP_MENU_HEADER_SIZE (MLX5_ST_SZ_BYTES(resource_dump_info_segment) + \ + MLX5_ST_SZ_BYTES(resource_dump_command_segment) + \ + MLX5_ST_SZ_BYTES(resource_dump_menu_segment)) + +static int mlx5_rsc_dump_read_menu_sgmt(struct mlx5_rsc_dump *rsc_dump, struct page *page, + int read_size, int start_idx) +{ + void *data = page_address(page); + enum mlx5_sgmt_type sgmt_idx; + int num_of_items; + char *sgmt_name; + void *member; + int size = 0; + void *menu; + int i; + + if (!start_idx) { + menu = MLX5_ADDR_OF(menu_resource_dump_response, data, menu); + rsc_dump->number_of_menu_items = MLX5_GET(resource_dump_menu_segment, menu, + num_of_records); + size = MLX5_RSC_DUMP_MENU_HEADER_SIZE; + data += size; + } + num_of_items = rsc_dump->number_of_menu_items; + + for (i = 0; start_idx + i < num_of_items; i++) { + size += MLX5_ST_SZ_BYTES(resource_dump_menu_record); + if (size >= read_size) + return start_idx + i; + + member = data + MLX5_ST_SZ_BYTES(resource_dump_menu_record) * i; + sgmt_name = MLX5_ADDR_OF(resource_dump_menu_record, member, segment_name); + sgmt_idx = mlx5_rsc_dump_sgmt_get_by_name(sgmt_name); + if (sgmt_idx == -EINVAL) + continue; + rsc_dump->fw_segment_type[sgmt_idx] = MLX5_GET(resource_dump_menu_record, + member, segment_type); + } + return 0; +} + +static int mlx5_rsc_dump_trigger(struct mlx5_core_dev *dev, struct mlx5_rsc_dump_cmd *cmd, + struct page *page) +{ + struct mlx5_rsc_dump *rsc_dump = dev->rsc_dump; + struct device *ddev = mlx5_core_dma_dev(dev); + u32 out_seq_num; + u32 in_seq_num; + dma_addr_t dma; + int err; + + dma = dma_map_page(ddev, page, 0, cmd->mem_size, DMA_FROM_DEVICE); + if (unlikely(dma_mapping_error(ddev, dma))) + return -ENOMEM; + + in_seq_num = MLX5_GET(resource_dump, cmd->cmd, seq_num); + MLX5_SET(resource_dump, cmd->cmd, mkey, rsc_dump->mkey); + MLX5_SET64(resource_dump, cmd->cmd, address, dma); + + err = mlx5_core_access_reg(dev, cmd->cmd, sizeof(cmd->cmd), cmd->cmd, + sizeof(cmd->cmd), MLX5_REG_RESOURCE_DUMP, 0, 1); + if (err) { + mlx5_core_err(dev, "Resource dump: Failed to access err %d\n", err); + goto out; + } + out_seq_num = MLX5_GET(resource_dump, cmd->cmd, seq_num); + if (out_seq_num && (in_seq_num + 1 != out_seq_num)) + err = -EIO; +out: + dma_unmap_page(ddev, dma, cmd->mem_size, DMA_FROM_DEVICE); + return err; +} + +struct mlx5_rsc_dump_cmd *mlx5_rsc_dump_cmd_create(struct mlx5_core_dev *dev, + struct mlx5_rsc_key *key) +{ + struct mlx5_rsc_dump_cmd *cmd; + int sgmt_type; + + if (IS_ERR_OR_NULL(dev->rsc_dump)) + return ERR_PTR(-EOPNOTSUPP); + + sgmt_type = dev->rsc_dump->fw_segment_type[key->rsc]; + if (!sgmt_type && key->rsc != MLX5_SGMT_TYPE_MENU) + return ERR_PTR(-EOPNOTSUPP); + + cmd = kzalloc(sizeof(*cmd), GFP_KERNEL); + if (!cmd) { + mlx5_core_err(dev, "Resource dump: Failed to allocate command\n"); + return ERR_PTR(-ENOMEM); + } + MLX5_SET(resource_dump, cmd->cmd, segment_type, sgmt_type); + MLX5_SET(resource_dump, cmd->cmd, index1, key->index1); + MLX5_SET(resource_dump, cmd->cmd, index2, key->index2); + MLX5_SET(resource_dump, cmd->cmd, num_of_obj1, key->num_of_obj1); + MLX5_SET(resource_dump, cmd->cmd, num_of_obj2, key->num_of_obj2); + MLX5_SET(resource_dump, cmd->cmd, size, key->size); + cmd->mem_size = key->size; + return cmd; +} +EXPORT_SYMBOL(mlx5_rsc_dump_cmd_create); + +void mlx5_rsc_dump_cmd_destroy(struct mlx5_rsc_dump_cmd *cmd) +{ + kfree(cmd); +} +EXPORT_SYMBOL(mlx5_rsc_dump_cmd_destroy); + +int mlx5_rsc_dump_next(struct mlx5_core_dev *dev, struct mlx5_rsc_dump_cmd *cmd, + struct page *page, int *size) +{ + bool more_dump; + int err; + + if (IS_ERR_OR_NULL(dev->rsc_dump)) + return -EOPNOTSUPP; + + err = mlx5_rsc_dump_trigger(dev, cmd, page); + if (err) { + mlx5_core_err(dev, "Resource dump: Failed to trigger dump, %d\n", err); + return err; + } + *size = MLX5_GET(resource_dump, cmd->cmd, size); + more_dump = MLX5_GET(resource_dump, cmd->cmd, more_dump); + + return more_dump; +} +EXPORT_SYMBOL(mlx5_rsc_dump_next); + +#define MLX5_RSC_DUMP_MENU_SEGMENT 0xffff +static int mlx5_rsc_dump_menu(struct mlx5_core_dev *dev) +{ + struct mlx5_rsc_dump_cmd *cmd = NULL; + struct mlx5_rsc_key key = {}; + struct page *page; + int start_idx = 0; + int size; + int err; + + page = alloc_page(GFP_KERNEL); + if (!page) + return -ENOMEM; + + key.rsc = MLX5_SGMT_TYPE_MENU; + key.size = PAGE_SIZE; + cmd = mlx5_rsc_dump_cmd_create(dev, &key); + if (IS_ERR(cmd)) { + err = PTR_ERR(cmd); + goto free_page; + } + MLX5_SET(resource_dump, cmd->cmd, segment_type, MLX5_RSC_DUMP_MENU_SEGMENT); + + do { + err = mlx5_rsc_dump_next(dev, cmd, page, &size); + if (err < 0) + goto destroy_cmd; + + start_idx = mlx5_rsc_dump_read_menu_sgmt(dev->rsc_dump, page, size, start_idx); + + } while (err > 0); + +destroy_cmd: + mlx5_rsc_dump_cmd_destroy(cmd); +free_page: + __free_page(page); + + return err; +} + +static int mlx5_rsc_dump_create_mkey(struct mlx5_core_dev *mdev, u32 pdn, + u32 *mkey) +{ + int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); + void *mkc; + u32 *in; + int err; + + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); + MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_PA); + MLX5_SET(mkc, mkc, lw, 1); + MLX5_SET(mkc, mkc, lr, 1); + + MLX5_SET(mkc, mkc, pd, pdn); + MLX5_SET(mkc, mkc, length64, 1); + MLX5_SET(mkc, mkc, qpn, 0xffffff); + + err = mlx5_core_create_mkey(mdev, mkey, in, inlen); + + kvfree(in); + return err; +} + +struct mlx5_rsc_dump *mlx5_rsc_dump_create(struct mlx5_core_dev *dev) +{ + struct mlx5_rsc_dump *rsc_dump; + + if (!MLX5_CAP_DEBUG(dev, resource_dump)) { + mlx5_core_dbg(dev, "Resource dump: capability not present\n"); + return NULL; + } + rsc_dump = kzalloc(sizeof(*rsc_dump), GFP_KERNEL); + if (!rsc_dump) + return ERR_PTR(-ENOMEM); + + return rsc_dump; +} + +void mlx5_rsc_dump_destroy(struct mlx5_core_dev *dev) +{ + if (IS_ERR_OR_NULL(dev->rsc_dump)) + return; + kfree(dev->rsc_dump); +} + +int mlx5_rsc_dump_init(struct mlx5_core_dev *dev) +{ + struct mlx5_rsc_dump *rsc_dump = dev->rsc_dump; + int err; + + if (IS_ERR_OR_NULL(dev->rsc_dump)) + return 0; + + err = mlx5_core_alloc_pd(dev, &rsc_dump->pdn); + if (err) { + mlx5_core_warn(dev, "Resource dump: Failed to allocate PD %d\n", err); + return err; + } + err = mlx5_rsc_dump_create_mkey(dev, rsc_dump->pdn, &rsc_dump->mkey); + if (err) { + mlx5_core_err(dev, "Resource dump: Failed to create mkey, %d\n", err); + goto free_pd; + } + err = mlx5_rsc_dump_menu(dev); + if (err) { + mlx5_core_err(dev, "Resource dump: Failed to read menu, %d\n", err); + goto destroy_mkey; + } + return err; + +destroy_mkey: + mlx5_core_destroy_mkey(dev, rsc_dump->mkey); +free_pd: + mlx5_core_dealloc_pd(dev, rsc_dump->pdn); + return err; +} + +void mlx5_rsc_dump_cleanup(struct mlx5_core_dev *dev) +{ + if (IS_ERR_OR_NULL(dev->rsc_dump)) + return; + + mlx5_core_destroy_mkey(dev, dev->rsc_dump->mkey); + mlx5_core_dealloc_pd(dev, dev->rsc_dump->pdn); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/rsc_dump.h b/drivers/net/ethernet/mellanox/mlx5/core/diag/rsc_dump.h new file mode 100644 index 000000000..64c4956db --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/rsc_dump.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2019 Mellanox Technologies. */ + +#ifndef __MLX5_RSC_DUMP_H +#define __MLX5_RSC_DUMP_H + +#include <linux/mlx5/rsc_dump.h> +#include <linux/mlx5/driver.h> +#include "mlx5_core.h" + +#define MLX5_RSC_DUMP_ALL 0xFFFF +struct mlx5_rsc_dump_cmd; +struct mlx5_rsc_dump; + +struct mlx5_rsc_dump *mlx5_rsc_dump_create(struct mlx5_core_dev *dev); +void mlx5_rsc_dump_destroy(struct mlx5_core_dev *dev); + +int mlx5_rsc_dump_init(struct mlx5_core_dev *dev); +void mlx5_rsc_dump_cleanup(struct mlx5_core_dev *dev); + +struct mlx5_rsc_dump_cmd *mlx5_rsc_dump_cmd_create(struct mlx5_core_dev *dev, + struct mlx5_rsc_key *key); +void mlx5_rsc_dump_cmd_destroy(struct mlx5_rsc_dump_cmd *cmd); + +int mlx5_rsc_dump_next(struct mlx5_core_dev *dev, struct mlx5_rsc_dump_cmd *cmd, + struct page *page, int *size); +#endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c b/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c new file mode 100644 index 000000000..d000236dd --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c @@ -0,0 +1,97 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2019 Mellanox Technologies. */ + +#include "ecpf.h" + +bool mlx5_read_embedded_cpu(struct mlx5_core_dev *dev) +{ + return (ioread32be(&dev->iseg->initializing) >> MLX5_ECPU_BIT_NUM) & 1; +} + +static bool mlx5_ecpf_esw_admins_host_pf(const struct mlx5_core_dev *dev) +{ + /* In separate host mode, PF enables itself. + * When ECPF is eswitch manager, eswitch enables host PF after + * eswitch is setup. + */ + return mlx5_core_is_ecpf_esw_manager(dev); +} + +int mlx5_cmd_host_pf_enable_hca(struct mlx5_core_dev *dev) +{ + u32 out[MLX5_ST_SZ_DW(enable_hca_out)] = {}; + u32 in[MLX5_ST_SZ_DW(enable_hca_in)] = {}; + + MLX5_SET(enable_hca_in, in, opcode, MLX5_CMD_OP_ENABLE_HCA); + MLX5_SET(enable_hca_in, in, function_id, 0); + MLX5_SET(enable_hca_in, in, embedded_cpu_function, 0); + return mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out)); +} + +int mlx5_cmd_host_pf_disable_hca(struct mlx5_core_dev *dev) +{ + u32 out[MLX5_ST_SZ_DW(disable_hca_out)] = {}; + u32 in[MLX5_ST_SZ_DW(disable_hca_in)] = {}; + + MLX5_SET(disable_hca_in, in, opcode, MLX5_CMD_OP_DISABLE_HCA); + MLX5_SET(disable_hca_in, in, function_id, 0); + MLX5_SET(disable_hca_in, in, embedded_cpu_function, 0); + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); +} + +static int mlx5_host_pf_init(struct mlx5_core_dev *dev) +{ + int err; + + if (mlx5_ecpf_esw_admins_host_pf(dev)) + return 0; + + /* ECPF shall enable HCA for host PF in the same way a PF + * does this for its VFs when ECPF is not a eswitch manager. + */ + err = mlx5_cmd_host_pf_enable_hca(dev); + if (err) + mlx5_core_err(dev, "Failed to enable external host PF HCA err(%d)\n", err); + + return err; +} + +static void mlx5_host_pf_cleanup(struct mlx5_core_dev *dev) +{ + int err; + + if (mlx5_ecpf_esw_admins_host_pf(dev)) + return; + + err = mlx5_cmd_host_pf_disable_hca(dev); + if (err) { + mlx5_core_err(dev, "Failed to disable external host PF HCA err(%d)\n", err); + return; + } +} + +int mlx5_ec_init(struct mlx5_core_dev *dev) +{ + if (!mlx5_core_is_ecpf(dev)) + return 0; + + return mlx5_host_pf_init(dev); +} + +void mlx5_ec_cleanup(struct mlx5_core_dev *dev) +{ + int err; + + if (!mlx5_core_is_ecpf(dev)) + return; + + mlx5_host_pf_cleanup(dev); + + err = mlx5_wait_for_pages(dev, &dev->priv.page_counters[MLX5_HOST_PF]); + if (err) + mlx5_core_warn(dev, "Timeout reclaiming external host PF pages err(%d)\n", err); + + err = mlx5_wait_for_pages(dev, &dev->priv.page_counters[MLX5_VF]); + if (err) + mlx5_core_warn(dev, "Timeout reclaiming external host VFs pages err(%d)\n", err); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ecpf.h b/drivers/net/ethernet/mellanox/mlx5/core/ecpf.h new file mode 100644 index 000000000..40b6ad76d --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/ecpf.h @@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2019 Mellanox Technologies. */ + +#ifndef __MLX5_ECPF_H__ +#define __MLX5_ECPF_H__ + +#include <linux/mlx5/driver.h> +#include "mlx5_core.h" + +#ifdef CONFIG_MLX5_ESWITCH + +enum { + MLX5_ECPU_BIT_NUM = 23, +}; + +bool mlx5_read_embedded_cpu(struct mlx5_core_dev *dev); +int mlx5_ec_init(struct mlx5_core_dev *dev); +void mlx5_ec_cleanup(struct mlx5_core_dev *dev); + +int mlx5_cmd_host_pf_enable_hca(struct mlx5_core_dev *dev); +int mlx5_cmd_host_pf_disable_hca(struct mlx5_core_dev *dev); + +#else /* CONFIG_MLX5_ESWITCH */ + +static inline bool +mlx5_read_embedded_cpu(struct mlx5_core_dev *dev) { return false; } +static inline int mlx5_ec_init(struct mlx5_core_dev *dev) { return 0; } +static inline void mlx5_ec_cleanup(struct mlx5_core_dev *dev) {} + +#endif /* CONFIG_MLX5_ESWITCH */ + +#endif /* __MLX5_ECPF_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h new file mode 100644 index 000000000..0ee456480 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -0,0 +1,1246 @@ +/* + * Copyright (c) 2015-2016, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __MLX5_EN_H__ +#define __MLX5_EN_H__ + +#include <linux/if_vlan.h> +#include <linux/etherdevice.h> +#include <linux/timecounter.h> +#include <linux/net_tstamp.h> +#include <linux/crash_dump.h> +#include <linux/mlx5/driver.h> +#include <linux/mlx5/qp.h> +#include <linux/mlx5/cq.h> +#include <linux/mlx5/port.h> +#include <linux/mlx5/vport.h> +#include <linux/mlx5/transobj.h> +#include <linux/mlx5/fs.h> +#include <linux/rhashtable.h> +#include <net/udp_tunnel.h> +#include <net/switchdev.h> +#include <net/xdp.h> +#include <linux/dim.h> +#include <linux/bits.h> +#include "wq.h" +#include "mlx5_core.h" +#include "en_stats.h" +#include "en/dcbnl.h" +#include "en/fs.h" +#include "en/qos.h" +#include "lib/hv_vhca.h" +#include "lib/clock.h" +#include "en/rx_res.h" +#include "en/selq.h" + +extern const struct net_device_ops mlx5e_netdev_ops; +struct page_pool; + +#define MLX5E_METADATA_ETHER_TYPE (0x8CE4) +#define MLX5E_METADATA_ETHER_LEN 8 + +#define MLX5E_ETH_HARD_MTU (ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN) + +#define MLX5E_HW2SW_MTU(params, hwmtu) ((hwmtu) - ((params)->hard_mtu)) +#define MLX5E_SW2HW_MTU(params, swmtu) ((swmtu) + ((params)->hard_mtu)) + +#define MLX5E_MAX_NUM_TC 8 +#define MLX5E_MAX_NUM_MQPRIO_CH_TC TC_QOPT_MAX_QUEUE + +#define MLX5_RX_HEADROOM NET_SKB_PAD +#define MLX5_SKB_FRAG_SZ(len) (SKB_DATA_ALIGN(len) + \ + SKB_DATA_ALIGN(sizeof(struct skb_shared_info))) + +#define MLX5E_RX_MAX_HEAD (256) +#define MLX5E_SHAMPO_LOG_MAX_HEADER_ENTRY_SIZE (9) +#define MLX5E_SHAMPO_WQ_HEADER_PER_PAGE (PAGE_SIZE >> MLX5E_SHAMPO_LOG_MAX_HEADER_ENTRY_SIZE) +#define MLX5E_SHAMPO_WQ_BASE_HEAD_ENTRY_SIZE (64) +#define MLX5E_SHAMPO_WQ_RESRV_SIZE (64 * 1024) +#define MLX5E_SHAMPO_WQ_BASE_RESRV_SIZE (4096) + +#define MLX5_MPWRQ_MIN_LOG_STRIDE_SZ(mdev) \ + (6 + MLX5_CAP_GEN(mdev, cache_line_128byte)) /* HW restriction */ +#define MLX5_MPWRQ_LOG_STRIDE_SZ(mdev, req) \ + max_t(u32, MLX5_MPWRQ_MIN_LOG_STRIDE_SZ(mdev), req) +#define MLX5_MPWRQ_DEF_LOG_STRIDE_SZ(mdev) \ + MLX5_MPWRQ_LOG_STRIDE_SZ(mdev, order_base_2(MLX5E_RX_MAX_HEAD)) + +#define MLX5_MPWRQ_MAX_LOG_WQE_SZ 18 + +/* Keep in sync with mlx5e_mpwrq_log_wqe_sz. + * These are theoretical maximums, which can be further restricted by + * capabilities. These values are used for static resource allocations and + * sanity checks. + * MLX5_SEND_WQE_MAX_SIZE is a bit bigger than the maximum cacheline-aligned WQE + * size actually used at runtime, but it's not a problem when calculating static + * array sizes. + */ +#define MLX5_UMR_MAX_MTT_SPACE \ + (ALIGN_DOWN(MLX5_SEND_WQE_MAX_SIZE - sizeof(struct mlx5e_umr_wqe), \ + MLX5_UMR_MTT_ALIGNMENT)) +#define MLX5_MPWRQ_MAX_PAGES_PER_WQE \ + rounddown_pow_of_two(MLX5_UMR_MAX_MTT_SPACE / sizeof(struct mlx5_mtt)) + +#define MLX5E_MAX_RQ_NUM_MTTS \ + (ALIGN_DOWN(U16_MAX, 4) * 2) /* Fits into u16 and aligned by WQEBB. */ +#define MLX5E_MAX_RQ_NUM_KSMS (U16_MAX - 1) /* So that num_ksms fits into u16. */ +#define MLX5E_ORDER2_MAX_PACKET_MTU (order_base_2(10 * 1024)) + +#define MLX5E_MIN_SKB_FRAG_SZ (MLX5_SKB_FRAG_SZ(MLX5_RX_HEADROOM)) +#define MLX5E_LOG_MAX_RX_WQE_BULK \ + (ilog2(PAGE_SIZE / roundup_pow_of_two(MLX5E_MIN_SKB_FRAG_SZ))) + +#define MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE 0x6 +#define MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE 0xa +#define MLX5E_PARAMS_MAXIMUM_LOG_SQ_SIZE 0xd + +#define MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE (1 + MLX5E_LOG_MAX_RX_WQE_BULK) +#define MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE 0xa +#define MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE 0xd + +#define MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE_MPW 0x2 + +#define MLX5E_DEFAULT_LRO_TIMEOUT 32 +#define MLX5E_LRO_TIMEOUT_ARR_SIZE 4 + +#define MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC 0x10 +#define MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC_FROM_CQE 0x3 +#define MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_PKTS 0x20 +#define MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC 0x10 +#define MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC_FROM_CQE 0x10 +#define MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_PKTS 0x20 +#define MLX5E_PARAMS_DEFAULT_MIN_RX_WQES 0x80 +#define MLX5E_PARAMS_DEFAULT_MIN_RX_WQES_MPW 0x2 + +#define MLX5E_MIN_NUM_CHANNELS 0x1 +#define MLX5E_MAX_NUM_CHANNELS (MLX5E_INDIR_RQT_SIZE / 2) +#define MLX5E_TX_CQ_POLL_BUDGET 128 +#define MLX5E_TX_XSK_POLL_BUDGET 64 +#define MLX5E_SQ_RECOVER_MIN_INTERVAL 500 /* msecs */ + +#define MLX5E_KLM_UMR_WQE_SZ(sgl_len)\ + (sizeof(struct mlx5e_umr_wqe) +\ + (sizeof(struct mlx5_klm) * (sgl_len))) + +#define MLX5E_KLM_UMR_WQEBBS(klm_entries) \ + (DIV_ROUND_UP(MLX5E_KLM_UMR_WQE_SZ(klm_entries), MLX5_SEND_WQE_BB)) + +#define MLX5E_KLM_UMR_DS_CNT(klm_entries)\ + (DIV_ROUND_UP(MLX5E_KLM_UMR_WQE_SZ(klm_entries), MLX5_SEND_WQE_DS)) + +#define MLX5E_KLM_MAX_ENTRIES_PER_WQE(wqe_size)\ + (((wqe_size) - sizeof(struct mlx5e_umr_wqe)) / sizeof(struct mlx5_klm)) + +#define MLX5E_KLM_ENTRIES_PER_WQE(wqe_size)\ + ALIGN_DOWN(MLX5E_KLM_MAX_ENTRIES_PER_WQE(wqe_size), MLX5_UMR_KLM_ALIGNMENT) + +#define MLX5E_MAX_KLM_PER_WQE(mdev) \ + MLX5E_KLM_ENTRIES_PER_WQE(MLX5_SEND_WQE_BB * mlx5e_get_max_sq_aligned_wqebbs(mdev)) + +#define MLX5E_MSG_LEVEL NETIF_MSG_LINK + +#define mlx5e_dbg(mlevel, priv, format, ...) \ +do { \ + if (NETIF_MSG_##mlevel & (priv)->msglevel) \ + netdev_warn(priv->netdev, format, \ + ##__VA_ARGS__); \ +} while (0) + +#define mlx5e_state_dereference(priv, p) \ + rcu_dereference_protected((p), lockdep_is_held(&(priv)->state_lock)) + +static inline u8 mlx5e_get_num_lag_ports(struct mlx5_core_dev *mdev) +{ + if (mlx5_lag_is_lacp_owner(mdev)) + return 1; + + return clamp_t(u8, MLX5_CAP_GEN(mdev, num_lag_ports), 1, MLX5_MAX_PORTS); +} + +static inline u16 mlx5_min_rx_wqes(int wq_type, u32 wq_size) +{ + switch (wq_type) { + case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: + return min_t(u16, MLX5E_PARAMS_DEFAULT_MIN_RX_WQES_MPW, + wq_size / 2); + default: + return min_t(u16, MLX5E_PARAMS_DEFAULT_MIN_RX_WQES, + wq_size / 2); + } +} + +/* Use this function to get max num channels (rxqs/txqs) only to create netdev */ +static inline int mlx5e_get_max_num_channels(struct mlx5_core_dev *mdev) +{ + return is_kdump_kernel() ? + MLX5E_MIN_NUM_CHANNELS : + min_t(int, mlx5_comp_vectors_count(mdev), MLX5E_MAX_NUM_CHANNELS); +} + +/* The maximum WQE size can be retrieved by max_wqe_sz_sq in + * bytes units. Driver hardens the limitation to 1KB (16 + * WQEBBs), unless firmware capability is stricter. + */ +static inline u8 mlx5e_get_max_sq_wqebbs(struct mlx5_core_dev *mdev) +{ + BUILD_BUG_ON(MLX5_SEND_WQE_MAX_WQEBBS > U8_MAX); + + return (u8)min_t(u16, MLX5_SEND_WQE_MAX_WQEBBS, + MLX5_CAP_GEN(mdev, max_wqe_sz_sq) / MLX5_SEND_WQE_BB); +} + +static inline u8 mlx5e_get_max_sq_aligned_wqebbs(struct mlx5_core_dev *mdev) +{ +/* The return value will be multiplied by MLX5_SEND_WQEBB_NUM_DS. + * Since max_sq_wqebbs may be up to MLX5_SEND_WQE_MAX_WQEBBS == 16, + * see mlx5e_get_max_sq_wqebbs(), the multiplication (16 * 4 == 64) + * overflows the 6-bit DS field of Ctrl Segment. Use a bound lower + * than MLX5_SEND_WQE_MAX_WQEBBS to let a full-session WQE be + * cache-aligned. + */ + u8 wqebbs = mlx5e_get_max_sq_wqebbs(mdev); + + wqebbs = min_t(u8, wqebbs, MLX5_SEND_WQE_MAX_WQEBBS - 1); +#if L1_CACHE_BYTES >= 128 + wqebbs = ALIGN_DOWN(wqebbs, 2); +#endif + return wqebbs; +} + +struct mlx5e_tx_wqe { + struct mlx5_wqe_ctrl_seg ctrl; + struct mlx5_wqe_eth_seg eth; + struct mlx5_wqe_data_seg data[]; +}; + +struct mlx5e_rx_wqe_ll { + struct mlx5_wqe_srq_next_seg next; + struct mlx5_wqe_data_seg data[]; +}; + +struct mlx5e_rx_wqe_cyc { + struct mlx5_wqe_data_seg data[0]; +}; + +struct mlx5e_umr_wqe { + struct mlx5_wqe_ctrl_seg ctrl; + struct mlx5_wqe_umr_ctrl_seg uctrl; + struct mlx5_mkey_seg mkc; + union { + DECLARE_FLEX_ARRAY(struct mlx5_mtt, inline_mtts); + DECLARE_FLEX_ARRAY(struct mlx5_klm, inline_klms); + DECLARE_FLEX_ARRAY(struct mlx5_ksm, inline_ksms); + }; +}; + +enum mlx5e_priv_flag { + MLX5E_PFLAG_RX_CQE_BASED_MODER, + MLX5E_PFLAG_TX_CQE_BASED_MODER, + MLX5E_PFLAG_RX_CQE_COMPRESS, + MLX5E_PFLAG_RX_STRIDING_RQ, + MLX5E_PFLAG_RX_NO_CSUM_COMPLETE, + MLX5E_PFLAG_XDP_TX_MPWQE, + MLX5E_PFLAG_SKB_TX_MPWQE, + MLX5E_PFLAG_TX_PORT_TS, + MLX5E_NUM_PFLAGS, /* Keep last */ +}; + +#define MLX5E_SET_PFLAG(params, pflag, enable) \ + do { \ + if (enable) \ + (params)->pflags |= BIT(pflag); \ + else \ + (params)->pflags &= ~(BIT(pflag)); \ + } while (0) + +#define MLX5E_GET_PFLAG(params, pflag) (!!((params)->pflags & (BIT(pflag)))) + +enum packet_merge { + MLX5E_PACKET_MERGE_NONE, + MLX5E_PACKET_MERGE_LRO, + MLX5E_PACKET_MERGE_SHAMPO, +}; + +struct mlx5e_packet_merge_param { + enum packet_merge type; + u32 timeout; + struct { + u8 match_criteria_type; + u8 alignment_granularity; + } shampo; +}; + +struct mlx5e_params { + u8 log_sq_size; + u8 rq_wq_type; + u8 log_rq_mtu_frames; + u16 num_channels; + struct { + u16 mode; + u8 num_tc; + struct netdev_tc_txq tc_to_txq[TC_MAX_QUEUE]; + struct { + u64 max_rate[TC_MAX_QUEUE]; + u32 hw_id[TC_MAX_QUEUE]; + } channel; + } mqprio; + bool rx_cqe_compress_def; + struct dim_cq_moder rx_cq_moderation; + struct dim_cq_moder tx_cq_moderation; + struct mlx5e_packet_merge_param packet_merge; + u8 tx_min_inline_mode; + bool vlan_strip_disable; + bool scatter_fcs_en; + bool rx_dim_enabled; + bool tx_dim_enabled; + u32 pflags; + struct bpf_prog *xdp_prog; + struct mlx5e_xsk *xsk; + unsigned int sw_mtu; + int hard_mtu; + bool ptp_rx; +}; + +static inline u8 mlx5e_get_dcb_num_tc(struct mlx5e_params *params) +{ + return params->mqprio.mode == TC_MQPRIO_MODE_DCB ? + params->mqprio.num_tc : 1; +} + +enum { + MLX5E_RQ_STATE_ENABLED, + MLX5E_RQ_STATE_RECOVERING, + MLX5E_RQ_STATE_AM, + MLX5E_RQ_STATE_NO_CSUM_COMPLETE, + MLX5E_RQ_STATE_CSUM_FULL, /* cqe_csum_full hw bit is set */ + MLX5E_RQ_STATE_MINI_CQE_HW_STRIDX, /* set when mini_cqe_resp_stride_index cap is used */ + MLX5E_RQ_STATE_SHAMPO, /* set when SHAMPO cap is used */ +}; + +struct mlx5e_cq { + /* data path - accessed per cqe */ + struct mlx5_cqwq wq; + + /* data path - accessed per napi poll */ + u16 event_ctr; + struct napi_struct *napi; + struct mlx5_core_cq mcq; + struct mlx5e_ch_stats *ch_stats; + + /* control */ + struct net_device *netdev; + struct mlx5_core_dev *mdev; + struct mlx5e_priv *priv; + struct mlx5_wq_ctrl wq_ctrl; +} ____cacheline_aligned_in_smp; + +struct mlx5e_cq_decomp { + /* cqe decompression */ + struct mlx5_cqe64 title; + struct mlx5_mini_cqe8 mini_arr[MLX5_MINI_CQE_ARRAY_SIZE]; + u8 mini_arr_idx; + u16 left; + u16 wqe_counter; +} ____cacheline_aligned_in_smp; + +enum mlx5e_dma_map_type { + MLX5E_DMA_MAP_SINGLE, + MLX5E_DMA_MAP_PAGE +}; + +struct mlx5e_sq_dma { + dma_addr_t addr; + u32 size; + enum mlx5e_dma_map_type type; +}; + +enum { + MLX5E_SQ_STATE_ENABLED, + MLX5E_SQ_STATE_MPWQE, + MLX5E_SQ_STATE_RECOVERING, + MLX5E_SQ_STATE_IPSEC, + MLX5E_SQ_STATE_AM, + MLX5E_SQ_STATE_VLAN_NEED_L2_INLINE, + MLX5E_SQ_STATE_PENDING_XSK_TX, + MLX5E_SQ_STATE_PENDING_TLS_RX_RESYNC, + MLX5E_SQ_STATE_XDP_MULTIBUF, +}; + +struct mlx5e_tx_mpwqe { + /* Current MPWQE session */ + struct mlx5e_tx_wqe *wqe; + u32 bytes_count; + u8 ds_count; + u8 pkt_count; + u8 inline_on; +}; + +struct mlx5e_skb_fifo { + struct sk_buff **fifo; + u16 *pc; + u16 *cc; + u16 mask; +}; + +struct mlx5e_ptpsq; + +struct mlx5e_txqsq { + /* data path */ + + /* dirtied @completion */ + u16 cc; + u16 skb_fifo_cc; + u32 dma_fifo_cc; + struct dim dim; /* Adaptive Moderation */ + + /* dirtied @xmit */ + u16 pc ____cacheline_aligned_in_smp; + u16 skb_fifo_pc; + u32 dma_fifo_pc; + struct mlx5e_tx_mpwqe mpwqe; + + struct mlx5e_cq cq; + + /* read only */ + struct mlx5_wq_cyc wq; + u32 dma_fifo_mask; + struct mlx5e_sq_stats *stats; + struct { + struct mlx5e_sq_dma *dma_fifo; + struct mlx5e_skb_fifo skb_fifo; + struct mlx5e_tx_wqe_info *wqe_info; + } db; + void __iomem *uar_map; + struct netdev_queue *txq; + u32 sqn; + u16 stop_room; + u8 max_sq_mpw_wqebbs; + u8 min_inline_mode; + struct device *pdev; + __be32 mkey_be; + unsigned long state; + unsigned int hw_mtu; + struct mlx5_clock *clock; + struct net_device *netdev; + struct mlx5_core_dev *mdev; + struct mlx5e_priv *priv; + + /* control path */ + struct mlx5_wq_ctrl wq_ctrl; + int ch_ix; + int txq_ix; + u32 rate_limit; + struct work_struct recover_work; + struct mlx5e_ptpsq *ptpsq; + cqe_ts_to_ns ptp_cyc2time; +} ____cacheline_aligned_in_smp; + +union mlx5e_alloc_unit { + struct page *page; + struct xdp_buff *xsk; +}; + +/* XDP packets can be transmitted in different ways. On completion, we need to + * distinguish between them to clean up things in a proper way. + */ +enum mlx5e_xdp_xmit_mode { + /* An xdp_frame was transmitted due to either XDP_REDIRECT from another + * device or XDP_TX from an XSK RQ. The frame has to be unmapped and + * returned. + */ + MLX5E_XDP_XMIT_MODE_FRAME, + + /* The xdp_frame was created in place as a result of XDP_TX from a + * regular RQ. No DMA remapping happened, and the page belongs to us. + */ + MLX5E_XDP_XMIT_MODE_PAGE, + + /* No xdp_frame was created at all, the transmit happened from a UMEM + * page. The UMEM Completion Ring producer pointer has to be increased. + */ + MLX5E_XDP_XMIT_MODE_XSK, +}; + +struct mlx5e_xdp_info { + enum mlx5e_xdp_xmit_mode mode; + union { + struct { + struct xdp_frame *xdpf; + dma_addr_t dma_addr; + } frame; + struct { + struct mlx5e_rq *rq; + struct page *page; + } page; + }; +}; + +struct mlx5e_xmit_data { + dma_addr_t dma_addr; + void *data; + u32 len; +}; + +struct mlx5e_xdp_info_fifo { + struct mlx5e_xdp_info *xi; + u32 *cc; + u32 *pc; + u32 mask; +}; + +struct mlx5e_xdpsq; +typedef int (*mlx5e_fp_xmit_xdp_frame_check)(struct mlx5e_xdpsq *); +typedef bool (*mlx5e_fp_xmit_xdp_frame)(struct mlx5e_xdpsq *, + struct mlx5e_xmit_data *, + struct skb_shared_info *, + int); + +struct mlx5e_xdpsq { + /* data path */ + + /* dirtied @completion */ + u32 xdpi_fifo_cc; + u16 cc; + + /* dirtied @xmit */ + u32 xdpi_fifo_pc ____cacheline_aligned_in_smp; + u16 pc; + struct mlx5_wqe_ctrl_seg *doorbell_cseg; + struct mlx5e_tx_mpwqe mpwqe; + + struct mlx5e_cq cq; + + /* read only */ + struct xsk_buff_pool *xsk_pool; + struct mlx5_wq_cyc wq; + struct mlx5e_xdpsq_stats *stats; + mlx5e_fp_xmit_xdp_frame_check xmit_xdp_frame_check; + mlx5e_fp_xmit_xdp_frame xmit_xdp_frame; + struct { + struct mlx5e_xdp_wqe_info *wqe_info; + struct mlx5e_xdp_info_fifo xdpi_fifo; + } db; + void __iomem *uar_map; + u32 sqn; + struct device *pdev; + __be32 mkey_be; + u16 stop_room; + u8 max_sq_mpw_wqebbs; + u8 min_inline_mode; + unsigned long state; + unsigned int hw_mtu; + + /* control path */ + struct mlx5_wq_ctrl wq_ctrl; + struct mlx5e_channel *channel; +} ____cacheline_aligned_in_smp; + +struct mlx5e_ktls_resync_resp; + +struct mlx5e_icosq { + /* data path */ + u16 cc; + u16 pc; + + struct mlx5_wqe_ctrl_seg *doorbell_cseg; + struct mlx5e_cq cq; + + /* write@xmit, read@completion */ + struct { + struct mlx5e_icosq_wqe_info *wqe_info; + } db; + + /* read only */ + struct mlx5_wq_cyc wq; + void __iomem *uar_map; + u32 sqn; + u16 reserved_room; + unsigned long state; + struct mlx5e_ktls_resync_resp *ktls_resync; + + /* control path */ + struct mlx5_wq_ctrl wq_ctrl; + struct mlx5e_channel *channel; + + struct work_struct recover_work; +} ____cacheline_aligned_in_smp; + +struct mlx5e_wqe_frag_info { + union mlx5e_alloc_unit *au; + u32 offset; + bool last_in_page; +}; + +struct mlx5e_mpw_info { + u16 consumed_strides; + DECLARE_BITMAP(xdp_xmit_bitmap, MLX5_MPWRQ_MAX_PAGES_PER_WQE); + union mlx5e_alloc_unit alloc_units[]; +}; + +#define MLX5E_MAX_RX_FRAGS 4 + +/* a single cache unit is capable to serve one napi call (for non-striding rq) + * or a MPWQE (for striding rq). + */ +#define MLX5E_CACHE_UNIT (MLX5_MPWRQ_MAX_PAGES_PER_WQE > NAPI_POLL_WEIGHT ? \ + MLX5_MPWRQ_MAX_PAGES_PER_WQE : NAPI_POLL_WEIGHT) +#define MLX5E_CACHE_SIZE (4 * roundup_pow_of_two(MLX5E_CACHE_UNIT)) +struct mlx5e_page_cache { + u32 head; + u32 tail; + struct page *page_cache[MLX5E_CACHE_SIZE]; +}; + +struct mlx5e_rq; +typedef void (*mlx5e_fp_handle_rx_cqe)(struct mlx5e_rq*, struct mlx5_cqe64*); +typedef struct sk_buff * +(*mlx5e_fp_skb_from_cqe_mpwrq)(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, + u16 cqe_bcnt, u32 head_offset, u32 page_idx); +typedef struct sk_buff * +(*mlx5e_fp_skb_from_cqe)(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi, + u32 cqe_bcnt); +typedef bool (*mlx5e_fp_post_rx_wqes)(struct mlx5e_rq *rq); +typedef void (*mlx5e_fp_dealloc_wqe)(struct mlx5e_rq*, u16); +typedef void (*mlx5e_fp_shampo_dealloc_hd)(struct mlx5e_rq*, u16, u16, bool); + +int mlx5e_rq_set_handlers(struct mlx5e_rq *rq, struct mlx5e_params *params, bool xsk); +void mlx5e_rq_set_trap_handlers(struct mlx5e_rq *rq, struct mlx5e_params *params); + +enum mlx5e_rq_flag { + MLX5E_RQ_FLAG_XDP_XMIT, + MLX5E_RQ_FLAG_XDP_REDIRECT, +}; + +struct mlx5e_rq_frag_info { + int frag_size; + int frag_stride; +}; + +struct mlx5e_rq_frags_info { + struct mlx5e_rq_frag_info arr[MLX5E_MAX_RX_FRAGS]; + u8 num_frags; + u8 log_num_frags; + u8 wqe_bulk; + u8 wqe_index_mask; +}; + +struct mlx5e_dma_info { + dma_addr_t addr; + struct page *page; +}; + +struct mlx5e_shampo_hd { + u32 mkey; + struct mlx5e_dma_info *info; + struct page *last_page; + u16 hd_per_wq; + u16 hd_per_wqe; + unsigned long *bitmap; + u16 pi; + u16 ci; + __be32 key; + u64 last_addr; +}; + +struct mlx5e_hw_gro_data { + struct sk_buff *skb; + struct flow_keys fk; + int second_ip_id; +}; + +enum mlx5e_mpwrq_umr_mode { + MLX5E_MPWRQ_UMR_MODE_ALIGNED, + MLX5E_MPWRQ_UMR_MODE_UNALIGNED, + MLX5E_MPWRQ_UMR_MODE_OVERSIZED, + MLX5E_MPWRQ_UMR_MODE_TRIPLE, +}; + +struct mlx5e_rq { + /* data path */ + union { + struct { + struct mlx5_wq_cyc wq; + struct mlx5e_wqe_frag_info *frags; + union mlx5e_alloc_unit *alloc_units; + struct mlx5e_rq_frags_info info; + mlx5e_fp_skb_from_cqe skb_from_cqe; + } wqe; + struct { + struct mlx5_wq_ll wq; + struct mlx5e_umr_wqe umr_wqe; + struct mlx5e_mpw_info *info; + mlx5e_fp_skb_from_cqe_mpwrq skb_from_cqe_mpwrq; + __be32 umr_mkey_be; + u16 num_strides; + u16 actual_wq_head; + u8 log_stride_sz; + u8 umr_in_progress; + u8 umr_last_bulk; + u8 umr_completed; + u8 min_wqe_bulk; + u8 page_shift; + u8 pages_per_wqe; + u8 umr_wqebbs; + u8 mtts_per_wqe; + u8 umr_mode; + struct mlx5e_shampo_hd *shampo; + } mpwqe; + }; + struct { + u16 headroom; + u32 frame0_sz; + u8 map_dir; /* dma map direction */ + } buff; + + struct device *pdev; + struct net_device *netdev; + struct mlx5e_rq_stats *stats; + struct mlx5e_cq cq; + struct mlx5e_cq_decomp cqd; + struct mlx5e_page_cache page_cache; + struct hwtstamp_config *tstamp; + struct mlx5_clock *clock; + struct mlx5e_icosq *icosq; + struct mlx5e_priv *priv; + + struct mlx5e_hw_gro_data *hw_gro_data; + + mlx5e_fp_handle_rx_cqe handle_rx_cqe; + mlx5e_fp_post_rx_wqes post_wqes; + mlx5e_fp_dealloc_wqe dealloc_wqe; + + unsigned long state; + int ix; + unsigned int hw_mtu; + + struct dim dim; /* Dynamic Interrupt Moderation */ + + /* XDP */ + struct bpf_prog __rcu *xdp_prog; + struct mlx5e_xdpsq *xdpsq; + DECLARE_BITMAP(flags, 8); + struct page_pool *page_pool; + + /* AF_XDP zero-copy */ + struct xsk_buff_pool *xsk_pool; + + struct work_struct recover_work; + + /* control */ + struct mlx5_wq_ctrl wq_ctrl; + __be32 mkey_be; + u8 wq_type; + u32 rqn; + struct mlx5_core_dev *mdev; + struct mlx5e_channel *channel; + struct mlx5e_dma_info wqe_overflow; + + /* XDP read-mostly */ + struct xdp_rxq_info xdp_rxq; + cqe_ts_to_ns ptp_cyc2time; +} ____cacheline_aligned_in_smp; + +enum mlx5e_channel_state { + MLX5E_CHANNEL_STATE_XSK, + MLX5E_CHANNEL_NUM_STATES +}; + +struct mlx5e_channel { + /* data path */ + struct mlx5e_rq rq; + struct mlx5e_xdpsq rq_xdpsq; + struct mlx5e_txqsq sq[MLX5E_MAX_NUM_TC]; + struct mlx5e_icosq icosq; /* internal control operations */ + struct mlx5e_txqsq __rcu * __rcu *qos_sqs; + bool xdp; + struct napi_struct napi; + struct device *pdev; + struct net_device *netdev; + __be32 mkey_be; + u16 qos_sqs_size; + u8 num_tc; + u8 lag_port; + + /* XDP_REDIRECT */ + struct mlx5e_xdpsq xdpsq; + + /* AF_XDP zero-copy */ + struct mlx5e_rq xskrq; + struct mlx5e_xdpsq xsksq; + + /* Async ICOSQ */ + struct mlx5e_icosq async_icosq; + /* async_icosq can be accessed from any CPU - the spinlock protects it. */ + spinlock_t async_icosq_lock; + + /* data path - accessed per napi poll */ + const struct cpumask *aff_mask; + struct mlx5e_ch_stats *stats; + + /* control */ + struct mlx5e_priv *priv; + struct mlx5_core_dev *mdev; + struct hwtstamp_config *tstamp; + DECLARE_BITMAP(state, MLX5E_CHANNEL_NUM_STATES); + int ix; + int cpu; + /* Sync between icosq recovery and XSK enable/disable. */ + struct mutex icosq_recovery_lock; +}; + +struct mlx5e_ptp; + +struct mlx5e_channels { + struct mlx5e_channel **c; + struct mlx5e_ptp *ptp; + unsigned int num; + struct mlx5e_params params; +}; + +struct mlx5e_channel_stats { + struct mlx5e_ch_stats ch; + struct mlx5e_sq_stats sq[MLX5E_MAX_NUM_TC]; + struct mlx5e_rq_stats rq; + struct mlx5e_rq_stats xskrq; + struct mlx5e_xdpsq_stats rq_xdpsq; + struct mlx5e_xdpsq_stats xdpsq; + struct mlx5e_xdpsq_stats xsksq; +} ____cacheline_aligned_in_smp; + +struct mlx5e_ptp_stats { + struct mlx5e_ch_stats ch; + struct mlx5e_sq_stats sq[MLX5E_MAX_NUM_TC]; + struct mlx5e_ptp_cq_stats cq[MLX5E_MAX_NUM_TC]; + struct mlx5e_rq_stats rq; +} ____cacheline_aligned_in_smp; + +enum { + MLX5E_STATE_OPENED, + MLX5E_STATE_DESTROYING, + MLX5E_STATE_XDP_TX_ENABLED, + MLX5E_STATE_XDP_ACTIVE, + MLX5E_STATE_CHANNELS_ACTIVE, +}; + +struct mlx5e_modify_sq_param { + int curr_state; + int next_state; + int rl_update; + int rl_index; + bool qos_update; + u16 qos_queue_group_id; +}; + +#if IS_ENABLED(CONFIG_PCI_HYPERV_INTERFACE) +struct mlx5e_hv_vhca_stats_agent { + struct mlx5_hv_vhca_agent *agent; + struct delayed_work work; + u16 delay; + void *buf; +}; +#endif + +struct mlx5e_xsk { + /* XSK buffer pools are stored separately from channels, + * because we don't want to lose them when channels are + * recreated. The kernel also stores buffer pool, but it doesn't + * distinguish between zero-copy and non-zero-copy UMEMs, so + * rely on our mechanism. + */ + struct xsk_buff_pool **pools; + u16 refcnt; + bool ever_used; +}; + +/* Temporary storage for variables that are allocated when struct mlx5e_priv is + * initialized, and used where we can't allocate them because that functions + * must not fail. Use with care and make sure the same variable is not used + * simultaneously by multiple users. + */ +struct mlx5e_scratchpad { + cpumask_var_t cpumask; +}; + +struct mlx5e_trap; +struct mlx5e_htb; + +struct mlx5e_priv { + /* priv data path fields - start */ + struct mlx5e_selq selq; + struct mlx5e_txqsq **txq2sq; +#ifdef CONFIG_MLX5_CORE_EN_DCB + struct mlx5e_dcbx_dp dcbx_dp; +#endif + /* priv data path fields - end */ + + u32 msglevel; + unsigned long state; + struct mutex state_lock; /* Protects Interface state */ + struct mlx5e_rq drop_rq; + + struct mlx5e_channels channels; + u32 tisn[MLX5_MAX_PORTS][MLX5E_MAX_NUM_TC]; + struct mlx5e_rx_res *rx_res; + u32 *tx_rates; + + struct mlx5e_flow_steering *fs; + + struct workqueue_struct *wq; + struct work_struct update_carrier_work; + struct work_struct set_rx_mode_work; + struct work_struct tx_timeout_work; + struct work_struct update_stats_work; + struct work_struct monitor_counters_work; + struct mlx5_nb monitor_counters_nb; + + struct mlx5_core_dev *mdev; + struct net_device *netdev; + struct mlx5e_trap *en_trap; + struct mlx5e_stats stats; + struct mlx5e_channel_stats **channel_stats; + struct mlx5e_channel_stats trap_stats; + struct mlx5e_ptp_stats ptp_stats; + struct mlx5e_sq_stats **htb_qos_sq_stats; + u16 htb_max_qos_sqs; + u16 stats_nch; + u16 max_nch; + u8 max_opened_tc; + bool tx_ptp_opened; + bool rx_ptp_opened; + struct hwtstamp_config tstamp; + u16 q_counter; + u16 drop_rq_q_counter; + struct notifier_block events_nb; + struct notifier_block blocking_events_nb; + + struct udp_tunnel_nic_info nic_info; +#ifdef CONFIG_MLX5_CORE_EN_DCB + struct mlx5e_dcbx dcbx; +#endif + + const struct mlx5e_profile *profile; + void *ppriv; +#ifdef CONFIG_MLX5_EN_MACSEC + struct mlx5e_macsec *macsec; +#endif +#ifdef CONFIG_MLX5_EN_IPSEC + struct mlx5e_ipsec *ipsec; +#endif +#ifdef CONFIG_MLX5_EN_TLS + struct mlx5e_tls *tls; +#endif + struct devlink_health_reporter *tx_reporter; + struct devlink_health_reporter *rx_reporter; + struct mlx5e_xsk xsk; +#if IS_ENABLED(CONFIG_PCI_HYPERV_INTERFACE) + struct mlx5e_hv_vhca_stats_agent stats_agent; +#endif + struct mlx5e_scratchpad scratchpad; + struct mlx5e_htb *htb; + struct mlx5e_mqprio_rl *mqprio_rl; +}; + +struct mlx5e_rx_handlers { + mlx5e_fp_handle_rx_cqe handle_rx_cqe; + mlx5e_fp_handle_rx_cqe handle_rx_cqe_mpwqe; + mlx5e_fp_handle_rx_cqe handle_rx_cqe_mpwqe_shampo; +}; + +extern const struct mlx5e_rx_handlers mlx5e_rx_handlers_nic; + +enum mlx5e_profile_feature { + MLX5E_PROFILE_FEATURE_PTP_RX, + MLX5E_PROFILE_FEATURE_PTP_TX, + MLX5E_PROFILE_FEATURE_QOS_HTB, + MLX5E_PROFILE_FEATURE_FS_VLAN, + MLX5E_PROFILE_FEATURE_FS_TC, +}; + +struct mlx5e_profile { + int (*init)(struct mlx5_core_dev *mdev, + struct net_device *netdev); + void (*cleanup)(struct mlx5e_priv *priv); + int (*init_rx)(struct mlx5e_priv *priv); + void (*cleanup_rx)(struct mlx5e_priv *priv); + int (*init_tx)(struct mlx5e_priv *priv); + void (*cleanup_tx)(struct mlx5e_priv *priv); + void (*enable)(struct mlx5e_priv *priv); + void (*disable)(struct mlx5e_priv *priv); + int (*update_rx)(struct mlx5e_priv *priv); + void (*update_stats)(struct mlx5e_priv *priv); + void (*update_carrier)(struct mlx5e_priv *priv); + int (*max_nch_limit)(struct mlx5_core_dev *mdev); + unsigned int (*stats_grps_num)(struct mlx5e_priv *priv); + mlx5e_stats_grp_t *stats_grps; + const struct mlx5e_rx_handlers *rx_handlers; + int max_tc; + u32 features; +}; + +#define mlx5e_profile_feature_cap(profile, feature) \ + ((profile)->features & BIT(MLX5E_PROFILE_FEATURE_##feature)) + +void mlx5e_build_ptys2ethtool_map(void); + +bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev, u8 page_shift, + enum mlx5e_mpwrq_umr_mode umr_mode); + +void mlx5e_shampo_dealloc_hd(struct mlx5e_rq *rq, u16 len, u16 start, bool close); +void mlx5e_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats); +void mlx5e_fold_sw_stats64(struct mlx5e_priv *priv, struct rtnl_link_stats64 *s); + +int mlx5e_self_test_num(struct mlx5e_priv *priv); +int mlx5e_self_test_fill_strings(struct mlx5e_priv *priv, u8 *data); +void mlx5e_self_test(struct net_device *ndev, struct ethtool_test *etest, + u64 *buf); +void mlx5e_set_rx_mode_work(struct work_struct *work); + +int mlx5e_hwstamp_set(struct mlx5e_priv *priv, struct ifreq *ifr); +int mlx5e_hwstamp_get(struct mlx5e_priv *priv, struct ifreq *ifr); +int mlx5e_modify_rx_cqe_compression_locked(struct mlx5e_priv *priv, bool val, bool rx_filter); + +int mlx5e_vlan_rx_add_vid(struct net_device *dev, __always_unused __be16 proto, + u16 vid); +int mlx5e_vlan_rx_kill_vid(struct net_device *dev, __always_unused __be16 proto, + u16 vid); +void mlx5e_timestamp_init(struct mlx5e_priv *priv); + +struct mlx5e_xsk_param; + +struct mlx5e_rq_param; +int mlx5e_open_rq(struct mlx5e_params *params, struct mlx5e_rq_param *param, + struct mlx5e_xsk_param *xsk, int node, + struct mlx5e_rq *rq); +#define MLX5E_RQ_WQES_TIMEOUT 20000 /* msecs */ +int mlx5e_wait_for_min_rx_wqes(struct mlx5e_rq *rq, int wait_time); +void mlx5e_close_rq(struct mlx5e_rq *rq); +int mlx5e_create_rq(struct mlx5e_rq *rq, struct mlx5e_rq_param *param); +void mlx5e_destroy_rq(struct mlx5e_rq *rq); + +struct mlx5e_sq_param; +int mlx5e_open_xdpsq(struct mlx5e_channel *c, struct mlx5e_params *params, + struct mlx5e_sq_param *param, struct xsk_buff_pool *xsk_pool, + struct mlx5e_xdpsq *sq, bool is_redirect); +void mlx5e_close_xdpsq(struct mlx5e_xdpsq *sq); + +struct mlx5e_create_cq_param { + struct napi_struct *napi; + struct mlx5e_ch_stats *ch_stats; + int node; + int ix; +}; + +struct mlx5e_cq_param; +int mlx5e_open_cq(struct mlx5e_priv *priv, struct dim_cq_moder moder, + struct mlx5e_cq_param *param, struct mlx5e_create_cq_param *ccp, + struct mlx5e_cq *cq); +void mlx5e_close_cq(struct mlx5e_cq *cq); + +int mlx5e_open_locked(struct net_device *netdev); +int mlx5e_close_locked(struct net_device *netdev); + +void mlx5e_trigger_napi_icosq(struct mlx5e_channel *c); +void mlx5e_trigger_napi_sched(struct napi_struct *napi); + +int mlx5e_open_channels(struct mlx5e_priv *priv, + struct mlx5e_channels *chs); +void mlx5e_close_channels(struct mlx5e_channels *chs); + +/* Function pointer to be used to modify HW or kernel settings while + * switching channels + */ +typedef int (*mlx5e_fp_preactivate)(struct mlx5e_priv *priv, void *context); +#define MLX5E_DEFINE_PREACTIVATE_WRAPPER_CTX(fn) \ +int fn##_ctx(struct mlx5e_priv *priv, void *context) \ +{ \ + return fn(priv); \ +} +int mlx5e_safe_reopen_channels(struct mlx5e_priv *priv); +int mlx5e_safe_switch_params(struct mlx5e_priv *priv, + struct mlx5e_params *new_params, + mlx5e_fp_preactivate preactivate, + void *context, bool reset); +int mlx5e_update_tx_netdev_queues(struct mlx5e_priv *priv); +int mlx5e_num_channels_changed_ctx(struct mlx5e_priv *priv, void *context); +void mlx5e_activate_priv_channels(struct mlx5e_priv *priv); +void mlx5e_deactivate_priv_channels(struct mlx5e_priv *priv); +int mlx5e_ptp_rx_manage_fs_ctx(struct mlx5e_priv *priv, void *ctx); + +int mlx5e_flush_rq(struct mlx5e_rq *rq, int curr_state); +void mlx5e_activate_rq(struct mlx5e_rq *rq); +void mlx5e_deactivate_rq(struct mlx5e_rq *rq); +void mlx5e_activate_icosq(struct mlx5e_icosq *icosq); +void mlx5e_deactivate_icosq(struct mlx5e_icosq *icosq); + +int mlx5e_modify_sq(struct mlx5_core_dev *mdev, u32 sqn, + struct mlx5e_modify_sq_param *p); +int mlx5e_open_txqsq(struct mlx5e_channel *c, u32 tisn, int txq_ix, + struct mlx5e_params *params, struct mlx5e_sq_param *param, + struct mlx5e_txqsq *sq, int tc, u16 qos_queue_group_id, + struct mlx5e_sq_stats *sq_stats); +void mlx5e_activate_txqsq(struct mlx5e_txqsq *sq); +void mlx5e_deactivate_txqsq(struct mlx5e_txqsq *sq); +void mlx5e_free_txqsq(struct mlx5e_txqsq *sq); +void mlx5e_tx_disable_queue(struct netdev_queue *txq); +int mlx5e_alloc_txqsq_db(struct mlx5e_txqsq *sq, int numa); +void mlx5e_free_txqsq_db(struct mlx5e_txqsq *sq); +struct mlx5e_create_sq_param; +int mlx5e_create_sq_rdy(struct mlx5_core_dev *mdev, + struct mlx5e_sq_param *param, + struct mlx5e_create_sq_param *csp, + u16 qos_queue_group_id, + u32 *sqn); +void mlx5e_tx_err_cqe_work(struct work_struct *recover_work); +void mlx5e_close_txqsq(struct mlx5e_txqsq *sq); + +static inline bool mlx5_tx_swp_supported(struct mlx5_core_dev *mdev) +{ + return MLX5_CAP_ETH(mdev, swp) && + MLX5_CAP_ETH(mdev, swp_csum) && MLX5_CAP_ETH(mdev, swp_lso); +} + +extern const struct ethtool_ops mlx5e_ethtool_ops; + +int mlx5e_create_mkey(struct mlx5_core_dev *mdev, u32 pdn, u32 *mkey); +int mlx5e_create_mdev_resources(struct mlx5_core_dev *mdev); +void mlx5e_destroy_mdev_resources(struct mlx5_core_dev *mdev); +int mlx5e_refresh_tirs(struct mlx5e_priv *priv, bool enable_uc_lb, + bool enable_mc_lb); +void mlx5e_mkey_set_relaxed_ordering(struct mlx5_core_dev *mdev, void *mkc); + +/* common netdev helpers */ +void mlx5e_create_q_counters(struct mlx5e_priv *priv); +void mlx5e_destroy_q_counters(struct mlx5e_priv *priv); +int mlx5e_open_drop_rq(struct mlx5e_priv *priv, + struct mlx5e_rq *drop_rq); +void mlx5e_close_drop_rq(struct mlx5e_rq *drop_rq); + +int mlx5e_create_tis(struct mlx5_core_dev *mdev, void *in, u32 *tisn); +void mlx5e_destroy_tis(struct mlx5_core_dev *mdev, u32 tisn); + +int mlx5e_create_tises(struct mlx5e_priv *priv); +void mlx5e_destroy_tises(struct mlx5e_priv *priv); +int mlx5e_update_nic_rx(struct mlx5e_priv *priv); +void mlx5e_update_carrier(struct mlx5e_priv *priv); +int mlx5e_close(struct net_device *netdev); +int mlx5e_open(struct net_device *netdev); + +void mlx5e_queue_update_stats(struct mlx5e_priv *priv); + +int mlx5e_set_dev_port_mtu(struct mlx5e_priv *priv); +int mlx5e_set_dev_port_mtu_ctx(struct mlx5e_priv *priv, void *context); +int mlx5e_change_mtu(struct net_device *netdev, int new_mtu, + mlx5e_fp_preactivate preactivate); +void mlx5e_vxlan_set_netdev_info(struct mlx5e_priv *priv); + +/* ethtool helpers */ +void mlx5e_ethtool_get_drvinfo(struct mlx5e_priv *priv, + struct ethtool_drvinfo *drvinfo); +void mlx5e_ethtool_get_strings(struct mlx5e_priv *priv, + uint32_t stringset, uint8_t *data); +int mlx5e_ethtool_get_sset_count(struct mlx5e_priv *priv, int sset); +void mlx5e_ethtool_get_ethtool_stats(struct mlx5e_priv *priv, + struct ethtool_stats *stats, u64 *data); +void mlx5e_ethtool_get_ringparam(struct mlx5e_priv *priv, + struct ethtool_ringparam *param, + struct kernel_ethtool_ringparam *kernel_param); +int mlx5e_ethtool_set_ringparam(struct mlx5e_priv *priv, + struct ethtool_ringparam *param); +void mlx5e_ethtool_get_channels(struct mlx5e_priv *priv, + struct ethtool_channels *ch); +int mlx5e_ethtool_set_channels(struct mlx5e_priv *priv, + struct ethtool_channels *ch); +int mlx5e_ethtool_get_coalesce(struct mlx5e_priv *priv, + struct ethtool_coalesce *coal, + struct kernel_ethtool_coalesce *kernel_coal); +int mlx5e_ethtool_set_coalesce(struct mlx5e_priv *priv, + struct ethtool_coalesce *coal, + struct kernel_ethtool_coalesce *kernel_coal, + struct netlink_ext_ack *extack); +int mlx5e_ethtool_get_link_ksettings(struct mlx5e_priv *priv, + struct ethtool_link_ksettings *link_ksettings); +int mlx5e_ethtool_set_link_ksettings(struct mlx5e_priv *priv, + const struct ethtool_link_ksettings *link_ksettings); +int mlx5e_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key, u8 *hfunc); +int mlx5e_set_rxfh(struct net_device *dev, const u32 *indir, const u8 *key, + const u8 hfunc); +int mlx5e_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info, + u32 *rule_locs); +int mlx5e_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd); +u32 mlx5e_ethtool_get_rxfh_key_size(struct mlx5e_priv *priv); +u32 mlx5e_ethtool_get_rxfh_indir_size(struct mlx5e_priv *priv); +int mlx5e_ethtool_get_ts_info(struct mlx5e_priv *priv, + struct ethtool_ts_info *info); +int mlx5e_ethtool_flash_device(struct mlx5e_priv *priv, + struct ethtool_flash *flash); +void mlx5e_ethtool_get_pauseparam(struct mlx5e_priv *priv, + struct ethtool_pauseparam *pauseparam); +int mlx5e_ethtool_set_pauseparam(struct mlx5e_priv *priv, + struct ethtool_pauseparam *pauseparam); + +/* mlx5e generic netdev management API */ +static inline bool +mlx5e_tx_mpwqe_supported(struct mlx5_core_dev *mdev) +{ + return !is_kdump_kernel() && + MLX5_CAP_ETH(mdev, enhanced_multi_pkt_send_wqe); +} + +int mlx5e_get_pf_num_tirs(struct mlx5_core_dev *mdev); +int mlx5e_priv_init(struct mlx5e_priv *priv, + const struct mlx5e_profile *profile, + struct net_device *netdev, + struct mlx5_core_dev *mdev); +void mlx5e_priv_cleanup(struct mlx5e_priv *priv); +struct net_device * +mlx5e_create_netdev(struct mlx5_core_dev *mdev, const struct mlx5e_profile *profile); +int mlx5e_attach_netdev(struct mlx5e_priv *priv); +void mlx5e_detach_netdev(struct mlx5e_priv *priv); +void mlx5e_destroy_netdev(struct mlx5e_priv *priv); +int mlx5e_netdev_change_profile(struct mlx5e_priv *priv, + const struct mlx5e_profile *new_profile, void *new_ppriv); +void mlx5e_netdev_attach_nic_profile(struct mlx5e_priv *priv); +void mlx5e_set_netdev_mtu_boundaries(struct mlx5e_priv *priv); +void mlx5e_build_nic_params(struct mlx5e_priv *priv, struct mlx5e_xsk *xsk, u16 mtu); +void mlx5e_rx_dim_work(struct work_struct *work); +void mlx5e_tx_dim_work(struct work_struct *work); + +netdev_features_t mlx5e_features_check(struct sk_buff *skb, + struct net_device *netdev, + netdev_features_t features); +int mlx5e_set_features(struct net_device *netdev, netdev_features_t features); +#ifdef CONFIG_MLX5_ESWITCH +int mlx5e_set_vf_mac(struct net_device *dev, int vf, u8 *mac); +int mlx5e_set_vf_rate(struct net_device *dev, int vf, int min_tx_rate, int max_tx_rate); +int mlx5e_get_vf_config(struct net_device *dev, int vf, struct ifla_vf_info *ivi); +int mlx5e_get_vf_stats(struct net_device *dev, int vf, struct ifla_vf_stats *vf_stats); +#endif +#endif /* __MLX5_EN_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/channels.c b/drivers/net/ethernet/mellanox/mlx5/core/en/channels.c new file mode 100644 index 000000000..48581ea3a --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/channels.c @@ -0,0 +1,51 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2021, Mellanox Technologies inc. All rights reserved. */ + +#include "channels.h" +#include "en.h" +#include "en/ptp.h" + +unsigned int mlx5e_channels_get_num(struct mlx5e_channels *chs) +{ + return chs->num; +} + +static struct mlx5e_channel *mlx5e_channels_get(struct mlx5e_channels *chs, unsigned int ix) +{ + WARN_ON_ONCE(ix >= mlx5e_channels_get_num(chs)); + return chs->c[ix]; +} + +bool mlx5e_channels_is_xsk(struct mlx5e_channels *chs, unsigned int ix) +{ + struct mlx5e_channel *c = mlx5e_channels_get(chs, ix); + + return test_bit(MLX5E_CHANNEL_STATE_XSK, c->state); +} + +void mlx5e_channels_get_regular_rqn(struct mlx5e_channels *chs, unsigned int ix, u32 *rqn) +{ + struct mlx5e_channel *c = mlx5e_channels_get(chs, ix); + + *rqn = c->rq.rqn; +} + +void mlx5e_channels_get_xsk_rqn(struct mlx5e_channels *chs, unsigned int ix, u32 *rqn) +{ + struct mlx5e_channel *c = mlx5e_channels_get(chs, ix); + + WARN_ON_ONCE(!test_bit(MLX5E_CHANNEL_STATE_XSK, c->state)); + + *rqn = c->xskrq.rqn; +} + +bool mlx5e_channels_get_ptp_rqn(struct mlx5e_channels *chs, u32 *rqn) +{ + struct mlx5e_ptp *c = chs->ptp; + + if (!c || !test_bit(MLX5E_PTP_STATE_RX, c->state)) + return false; + + *rqn = c->rq.rqn; + return true; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/channels.h b/drivers/net/ethernet/mellanox/mlx5/core/en/channels.h new file mode 100644 index 000000000..637ca90da --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/channels.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2021, Mellanox Technologies inc. All rights reserved. */ + +#ifndef __MLX5_EN_CHANNELS_H__ +#define __MLX5_EN_CHANNELS_H__ + +#include <linux/kernel.h> + +struct mlx5e_channels; + +unsigned int mlx5e_channels_get_num(struct mlx5e_channels *chs); +bool mlx5e_channels_is_xsk(struct mlx5e_channels *chs, unsigned int ix); +void mlx5e_channels_get_regular_rqn(struct mlx5e_channels *chs, unsigned int ix, u32 *rqn); +void mlx5e_channels_get_xsk_rqn(struct mlx5e_channels *chs, unsigned int ix, u32 *rqn); +bool mlx5e_channels_get_ptp_rqn(struct mlx5e_channels *chs, u32 *rqn); + +#endif /* __MLX5_EN_CHANNELS_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/dcbnl.h b/drivers/net/ethernet/mellanox/mlx5/core/en/dcbnl.h new file mode 100644 index 000000000..b59aee75d --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/dcbnl.h @@ -0,0 +1,53 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2020 Mellanox Technologies. */ + +#ifndef __MLX5E_DCBNL_H__ +#define __MLX5E_DCBNL_H__ + +#ifdef CONFIG_MLX5_CORE_EN_DCB + +#define MLX5E_MAX_PRIORITY (8) + +struct mlx5e_cee_config { + /* bw pct for priority group */ + u8 pg_bw_pct[CEE_DCBX_MAX_PGS]; + u8 prio_to_pg_map[CEE_DCBX_MAX_PRIO]; + bool pfc_setting[CEE_DCBX_MAX_PRIO]; + bool pfc_enable; +}; + +struct mlx5e_dcbx { + enum mlx5_dcbx_oper_mode mode; + struct mlx5e_cee_config cee_cfg; /* pending configuration */ + u8 dscp_app_cnt; + + /* The only setting that cannot be read from FW */ + u8 tc_tsa[IEEE_8021QAZ_MAX_TCS]; + u8 cap; + + /* Buffer configuration */ + bool manual_buffer; + u32 cable_len; + u32 xoff; + u16 port_buff_cell_sz; +}; + +#define MLX5E_MAX_DSCP (64) + +struct mlx5e_dcbx_dp { + u8 dscp2prio[MLX5E_MAX_DSCP]; + u8 trust_state; +}; + +void mlx5e_dcbnl_build_netdev(struct net_device *netdev); +void mlx5e_dcbnl_initialize(struct mlx5e_priv *priv); +void mlx5e_dcbnl_init_app(struct mlx5e_priv *priv); +void mlx5e_dcbnl_delete_app(struct mlx5e_priv *priv); +#else +static inline void mlx5e_dcbnl_build_netdev(struct net_device *netdev) {} +static inline void mlx5e_dcbnl_initialize(struct mlx5e_priv *priv) {} +static inline void mlx5e_dcbnl_init_app(struct mlx5e_priv *priv) {} +static inline void mlx5e_dcbnl_delete_app(struct mlx5e_priv *priv) {} +#endif + +#endif /* __MLX5E_DCBNL_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.c new file mode 100644 index 000000000..b69f9d10c --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.c @@ -0,0 +1,81 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2020, Mellanox Technologies inc. All rights reserved. */ + +#include "en/devlink.h" +#include "eswitch.h" + +static void +mlx5e_devlink_get_port_parent_id(struct mlx5_core_dev *dev, struct netdev_phys_item_id *ppid) +{ + u64 parent_id; + + parent_id = mlx5_query_nic_system_image_guid(dev); + ppid->id_len = sizeof(parent_id); + memcpy(ppid->id, &parent_id, sizeof(parent_id)); +} + +int mlx5e_devlink_port_register(struct mlx5e_priv *priv) +{ + struct devlink *devlink = priv_to_devlink(priv->mdev); + struct devlink_port_attrs attrs = {}; + struct netdev_phys_item_id ppid = {}; + struct devlink_port *dl_port; + unsigned int dl_port_index; + int ret; + + if (mlx5_core_is_pf(priv->mdev)) { + attrs.flavour = DEVLINK_PORT_FLAVOUR_PHYSICAL; + attrs.phys.port_number = mlx5_get_dev_index(priv->mdev); + if (MLX5_ESWITCH_MANAGER(priv->mdev)) { + mlx5e_devlink_get_port_parent_id(priv->mdev, &ppid); + memcpy(attrs.switch_id.id, ppid.id, ppid.id_len); + attrs.switch_id.id_len = ppid.id_len; + } + dl_port_index = mlx5_esw_vport_to_devlink_port_index(priv->mdev, + MLX5_VPORT_UPLINK); + } else { + attrs.flavour = DEVLINK_PORT_FLAVOUR_VIRTUAL; + dl_port_index = mlx5_esw_vport_to_devlink_port_index(priv->mdev, 0); + } + + dl_port = mlx5e_devlink_get_dl_port(priv); + memset(dl_port, 0, sizeof(*dl_port)); + devlink_port_attrs_set(dl_port, &attrs); + + if (!(priv->mdev->priv.flags & MLX5_PRIV_FLAGS_MLX5E_LOCKED_FLOW)) + devl_lock(devlink); + ret = devl_port_register(devlink, dl_port, dl_port_index); + if (!(priv->mdev->priv.flags & MLX5_PRIV_FLAGS_MLX5E_LOCKED_FLOW)) + devl_unlock(devlink); + + return ret; +} + +void mlx5e_devlink_port_type_eth_set(struct mlx5e_priv *priv) +{ + struct devlink_port *dl_port = mlx5e_devlink_get_dl_port(priv); + + devlink_port_type_eth_set(dl_port, priv->netdev); +} + +void mlx5e_devlink_port_unregister(struct mlx5e_priv *priv) +{ + struct devlink_port *dl_port = mlx5e_devlink_get_dl_port(priv); + struct devlink *devlink = priv_to_devlink(priv->mdev); + + if (!(priv->mdev->priv.flags & MLX5_PRIV_FLAGS_MLX5E_LOCKED_FLOW)) + devl_lock(devlink); + devl_port_unregister(dl_port); + if (!(priv->mdev->priv.flags & MLX5_PRIV_FLAGS_MLX5E_LOCKED_FLOW)) + devl_unlock(devlink); +} + +struct devlink_port *mlx5e_get_devlink_port(struct net_device *dev) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + + if (!netif_device_present(dev)) + return NULL; + + return mlx5e_devlink_get_dl_port(priv); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.h b/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.h new file mode 100644 index 000000000..10b50feb9 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2020, Mellanox Technologies inc. All rights reserved. */ + +#ifndef __MLX5E_EN_DEVLINK_H +#define __MLX5E_EN_DEVLINK_H + +#include <net/devlink.h> +#include "en.h" + +int mlx5e_devlink_port_register(struct mlx5e_priv *priv); +void mlx5e_devlink_port_unregister(struct mlx5e_priv *priv); +void mlx5e_devlink_port_type_eth_set(struct mlx5e_priv *priv); +struct devlink_port *mlx5e_get_devlink_port(struct net_device *dev); + +static inline struct devlink_port * +mlx5e_devlink_get_dl_port(struct mlx5e_priv *priv) +{ + return &priv->mdev->mlx5e_res.dl_port; +} + +#endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h new file mode 100644 index 000000000..bf2741eb7 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h @@ -0,0 +1,204 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2018 Mellanox Technologies. */ + +#ifndef __MLX5E_FLOW_STEER_H__ +#define __MLX5E_FLOW_STEER_H__ + +#include "mod_hdr.h" +#include "lib/fs_ttc.h" + +struct mlx5e_post_act; +struct mlx5e_tc_table; + +enum { + MLX5E_TC_FT_LEVEL = 0, + MLX5E_TC_TTC_FT_LEVEL, + MLX5E_TC_MISS_LEVEL, +}; + +enum { + MLX5E_TC_PRIO = 0, + MLX5E_NIC_PRIO +}; + +struct mlx5e_flow_table { + int num_groups; + struct mlx5_flow_table *t; + struct mlx5_flow_group **g; +}; + +struct mlx5e_l2_rule { + u8 addr[ETH_ALEN + 2]; + struct mlx5_flow_handle *rule; +}; + +#define MLX5E_L2_ADDR_HASH_SIZE BIT(BITS_PER_BYTE) + +struct mlx5e_promisc_table { + struct mlx5e_flow_table ft; + struct mlx5_flow_handle *rule; +}; + +/* Forward declaration and APIs to get private fields of vlan_table */ +struct mlx5e_vlan_table; +unsigned long *mlx5e_vlan_get_active_svlans(struct mlx5e_vlan_table *vlan); +struct mlx5_flow_table *mlx5e_vlan_get_flowtable(struct mlx5e_vlan_table *vlan); + +struct mlx5e_l2_table { + struct mlx5e_flow_table ft; + struct hlist_head netdev_uc[MLX5E_L2_ADDR_HASH_SIZE]; + struct hlist_head netdev_mc[MLX5E_L2_ADDR_HASH_SIZE]; + struct mlx5e_l2_rule broadcast; + struct mlx5e_l2_rule allmulti; + struct mlx5_flow_handle *trap_rule; + bool broadcast_enabled; + bool allmulti_enabled; + bool promisc_enabled; +}; + +#define MLX5E_NUM_INDIR_TIRS (MLX5_NUM_TT - 1) + +#define MLX5_HASH_IP (MLX5_HASH_FIELD_SEL_SRC_IP |\ + MLX5_HASH_FIELD_SEL_DST_IP) +#define MLX5_HASH_IP_L4PORTS (MLX5_HASH_FIELD_SEL_SRC_IP |\ + MLX5_HASH_FIELD_SEL_DST_IP |\ + MLX5_HASH_FIELD_SEL_L4_SPORT |\ + MLX5_HASH_FIELD_SEL_L4_DPORT) +#define MLX5_HASH_IP_IPSEC_SPI (MLX5_HASH_FIELD_SEL_SRC_IP |\ + MLX5_HASH_FIELD_SEL_DST_IP |\ + MLX5_HASH_FIELD_SEL_IPSEC_SPI) + +/* NIC prio FTS */ +enum { + MLX5E_PROMISC_FT_LEVEL, + MLX5E_VLAN_FT_LEVEL, + MLX5E_L2_FT_LEVEL, + MLX5E_TTC_FT_LEVEL, + MLX5E_INNER_TTC_FT_LEVEL, + MLX5E_FS_TT_UDP_FT_LEVEL = MLX5E_INNER_TTC_FT_LEVEL + 1, + MLX5E_FS_TT_ANY_FT_LEVEL = MLX5E_INNER_TTC_FT_LEVEL + 1, +#ifdef CONFIG_MLX5_EN_TLS + MLX5E_ACCEL_FS_TCP_FT_LEVEL = MLX5E_INNER_TTC_FT_LEVEL + 1, +#endif +#ifdef CONFIG_MLX5_EN_ARFS + MLX5E_ARFS_FT_LEVEL = MLX5E_INNER_TTC_FT_LEVEL + 1, +#endif +#ifdef CONFIG_MLX5_EN_IPSEC + MLX5E_ACCEL_FS_ESP_FT_LEVEL = MLX5E_INNER_TTC_FT_LEVEL + 1, + MLX5E_ACCEL_FS_ESP_FT_ERR_LEVEL, +#endif +}; + +struct mlx5e_flow_steering; +struct mlx5e_rx_res; + +#ifdef CONFIG_MLX5_EN_ARFS +struct mlx5e_arfs_tables; + +int mlx5e_arfs_create_tables(struct mlx5e_flow_steering *fs, + struct mlx5e_rx_res *rx_res, bool ntuple); +void mlx5e_arfs_destroy_tables(struct mlx5e_flow_steering *fs, bool ntuple); +int mlx5e_arfs_enable(struct mlx5e_flow_steering *fs); +int mlx5e_arfs_disable(struct mlx5e_flow_steering *fs); +int mlx5e_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb, + u16 rxq_index, u32 flow_id); +#else +static inline int mlx5e_arfs_create_tables(struct mlx5e_flow_steering *fs, + struct mlx5e_rx_res *rx_res, bool ntuple) +{ return 0; } +static inline void mlx5e_arfs_destroy_tables(struct mlx5e_flow_steering *fs, bool ntuple) {} +static inline int mlx5e_arfs_enable(struct mlx5e_flow_steering *fs) +{ return -EOPNOTSUPP; } +static inline int mlx5e_arfs_disable(struct mlx5e_flow_steering *fs) +{ return -EOPNOTSUPP; } +#endif + +#ifdef CONFIG_MLX5_EN_TLS +struct mlx5e_accel_fs_tcp; +#endif + +struct mlx5e_profile; +struct mlx5e_fs_udp; +struct mlx5e_fs_any; +struct mlx5e_ptp_fs; + +void mlx5e_set_ttc_params(struct mlx5e_flow_steering *fs, + struct mlx5e_rx_res *rx_res, + struct ttc_params *ttc_params, bool tunnel); + +void mlx5e_destroy_ttc_table(struct mlx5e_flow_steering *fs); +int mlx5e_create_ttc_table(struct mlx5e_flow_steering *fs, + struct mlx5e_rx_res *rx_res); + +void mlx5e_destroy_flow_table(struct mlx5e_flow_table *ft); + +void mlx5e_enable_cvlan_filter(struct mlx5e_flow_steering *fs, bool promisc); +void mlx5e_disable_cvlan_filter(struct mlx5e_flow_steering *fs, bool promisc); + +int mlx5e_create_flow_steering(struct mlx5e_flow_steering *fs, + struct mlx5e_rx_res *rx_res, + const struct mlx5e_profile *profile, + struct net_device *netdev); +void mlx5e_destroy_flow_steering(struct mlx5e_flow_steering *fs, bool ntuple, + const struct mlx5e_profile *profile); + +struct mlx5e_flow_steering *mlx5e_fs_init(const struct mlx5e_profile *profile, + struct mlx5_core_dev *mdev, + bool state_destroy); +void mlx5e_fs_cleanup(struct mlx5e_flow_steering *fs); +struct mlx5e_vlan_table *mlx5e_fs_get_vlan(struct mlx5e_flow_steering *fs); +void mlx5e_fs_set_tc(struct mlx5e_flow_steering *fs, struct mlx5e_tc_table *tc); +struct mlx5e_tc_table *mlx5e_fs_get_tc(struct mlx5e_flow_steering *fs); +struct mlx5e_l2_table *mlx5e_fs_get_l2(struct mlx5e_flow_steering *fs); +struct mlx5_flow_namespace *mlx5e_fs_get_ns(struct mlx5e_flow_steering *fs, bool egress); +void mlx5e_fs_set_ns(struct mlx5e_flow_steering *fs, struct mlx5_flow_namespace *ns, bool egress); +#ifdef CONFIG_MLX5_EN_RXNFC +struct mlx5e_ethtool_steering *mlx5e_fs_get_ethtool(struct mlx5e_flow_steering *fs); +#endif +struct mlx5_ttc_table *mlx5e_fs_get_ttc(struct mlx5e_flow_steering *fs, bool inner); +void mlx5e_fs_set_ttc(struct mlx5e_flow_steering *fs, struct mlx5_ttc_table *ttc, bool inner); +#ifdef CONFIG_MLX5_EN_ARFS +struct mlx5e_arfs_tables *mlx5e_fs_get_arfs(struct mlx5e_flow_steering *fs); +void mlx5e_fs_set_arfs(struct mlx5e_flow_steering *fs, struct mlx5e_arfs_tables *arfs); +#endif +struct mlx5e_ptp_fs *mlx5e_fs_get_ptp(struct mlx5e_flow_steering *fs); +void mlx5e_fs_set_ptp(struct mlx5e_flow_steering *fs, struct mlx5e_ptp_fs *ptp_fs); +struct mlx5e_fs_any *mlx5e_fs_get_any(struct mlx5e_flow_steering *fs); +void mlx5e_fs_set_any(struct mlx5e_flow_steering *fs, struct mlx5e_fs_any *any); +struct mlx5e_fs_udp *mlx5e_fs_get_udp(struct mlx5e_flow_steering *fs); +void mlx5e_fs_set_udp(struct mlx5e_flow_steering *fs, struct mlx5e_fs_udp *udp); +#ifdef CONFIG_MLX5_EN_TLS +struct mlx5e_accel_fs_tcp *mlx5e_fs_get_accel_tcp(struct mlx5e_flow_steering *fs); +void mlx5e_fs_set_accel_tcp(struct mlx5e_flow_steering *fs, struct mlx5e_accel_fs_tcp *accel_tcp); +#endif +void mlx5e_fs_set_state_destroy(struct mlx5e_flow_steering *fs, bool state_destroy); +void mlx5e_fs_set_vlan_strip_disable(struct mlx5e_flow_steering *fs, bool vlan_strip_disable); + +struct mlx5_core_dev *mlx5e_fs_get_mdev(struct mlx5e_flow_steering *fs); +int mlx5e_add_vlan_trap(struct mlx5e_flow_steering *fs, int trap_id, int tir_num); +void mlx5e_remove_vlan_trap(struct mlx5e_flow_steering *fs); +int mlx5e_add_mac_trap(struct mlx5e_flow_steering *fs, int trap_id, int tir_num); +void mlx5e_remove_mac_trap(struct mlx5e_flow_steering *fs); +void mlx5e_fs_set_rx_mode_work(struct mlx5e_flow_steering *fs, struct net_device *netdev); +int mlx5e_fs_vlan_rx_add_vid(struct mlx5e_flow_steering *fs, + struct net_device *netdev, + __be16 proto, u16 vid); +int mlx5e_fs_vlan_rx_kill_vid(struct mlx5e_flow_steering *fs, + struct net_device *netdev, + __be16 proto, u16 vid); +void mlx5e_fs_init_l2_addr(struct mlx5e_flow_steering *fs, struct net_device *netdev); + +#define fs_err(fs, fmt, ...) \ + mlx5_core_err(mlx5e_fs_get_mdev(fs), fmt, ##__VA_ARGS__) + +#define fs_dbg(fs, fmt, ...) \ + mlx5_core_dbg(mlx5e_fs_get_mdev(fs), fmt, ##__VA_ARGS__) + +#define fs_warn(fs, fmt, ...) \ + mlx5_core_warn(mlx5e_fs_get_mdev(fs), fmt, ##__VA_ARGS__) + +#define fs_warn_once(fs, fmt, ...) \ + mlx5_core_warn_once(mlx5e_fs_get_mdev(fs), fmt, ##__VA_ARGS__) + +#endif /* __MLX5E_FLOW_STEER_H__ */ + diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/fs_ethtool.h b/drivers/net/ethernet/mellanox/mlx5/core/en/fs_ethtool.h new file mode 100644 index 000000000..9e276fd3c --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/fs_ethtool.h @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. */ + +#ifndef __MLX5E_FS_ETHTOOL_H__ +#define __MLX5E_FS_ETHTOOL_H__ + +struct mlx5e_priv; +struct mlx5e_ethtool_steering; +#ifdef CONFIG_MLX5_EN_RXNFC +int mlx5e_ethtool_alloc(struct mlx5e_ethtool_steering **ethtool); +void mlx5e_ethtool_free(struct mlx5e_ethtool_steering *ethtool); +void mlx5e_ethtool_init_steering(struct mlx5e_flow_steering *fs); +void mlx5e_ethtool_cleanup_steering(struct mlx5e_flow_steering *fs); +int mlx5e_ethtool_set_rxnfc(struct mlx5e_priv *priv, struct ethtool_rxnfc *cmd); +int mlx5e_ethtool_get_rxnfc(struct mlx5e_priv *priv, + struct ethtool_rxnfc *info, u32 *rule_locs); +#else +static inline int mlx5e_ethtool_alloc(struct mlx5e_ethtool_steering **ethtool) +{ return 0; } +static inline void mlx5e_ethtool_free(struct mlx5e_ethtool_steering *ethtool) { } +static inline void mlx5e_ethtool_init_steering(struct mlx5e_flow_steering *fs) { } +static inline void mlx5e_ethtool_cleanup_steering(struct mlx5e_flow_steering *fs) { } +static inline int mlx5e_ethtool_set_rxnfc(struct mlx5e_priv *priv, struct ethtool_rxnfc *cmd) +{ return -EOPNOTSUPP; } +static inline int mlx5e_ethtool_get_rxnfc(struct mlx5e_priv *priv, + struct ethtool_rxnfc *info, u32 *rule_locs) +{ return -EOPNOTSUPP; } +#endif +#endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.c b/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.c new file mode 100644 index 000000000..671adbad0 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.c @@ -0,0 +1,615 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2021, Mellanox Technologies inc. All rights reserved. */ + +#include "en/fs_tt_redirect.h" +#include "fs_core.h" +#include "mlx5_core.h" + +enum fs_udp_type { + FS_IPV4_UDP, + FS_IPV6_UDP, + FS_UDP_NUM_TYPES, +}; + +struct mlx5e_fs_udp { + struct mlx5e_flow_table tables[FS_UDP_NUM_TYPES]; + struct mlx5_flow_handle *default_rules[FS_UDP_NUM_TYPES]; + int ref_cnt; +}; + +struct mlx5e_fs_any { + struct mlx5e_flow_table table; + struct mlx5_flow_handle *default_rule; + int ref_cnt; +}; + +static char *fs_udp_type2str(enum fs_udp_type i) +{ + switch (i) { + case FS_IPV4_UDP: + return "UDP v4"; + default: /* FS_IPV6_UDP */ + return "UDP v6"; + } +} + +static enum mlx5_traffic_types fs_udp2tt(enum fs_udp_type i) +{ + switch (i) { + case FS_IPV4_UDP: + return MLX5_TT_IPV4_UDP; + default: /* FS_IPV6_UDP */ + return MLX5_TT_IPV6_UDP; + } +} + +static enum fs_udp_type tt2fs_udp(enum mlx5_traffic_types i) +{ + switch (i) { + case MLX5_TT_IPV4_UDP: + return FS_IPV4_UDP; + case MLX5_TT_IPV6_UDP: + return FS_IPV6_UDP; + default: + return FS_UDP_NUM_TYPES; + } +} + +void mlx5e_fs_tt_redirect_del_rule(struct mlx5_flow_handle *rule) +{ + mlx5_del_flow_rules(rule); +} + +static void fs_udp_set_dport_flow(struct mlx5_flow_spec *spec, enum fs_udp_type type, + u16 udp_dport) +{ + spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ip_protocol); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_protocol, IPPROTO_UDP); + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ip_version); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_version, + type == FS_IPV4_UDP ? 4 : 6); + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.udp_dport); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.udp_dport, udp_dport); +} + +struct mlx5_flow_handle * +mlx5e_fs_tt_redirect_udp_add_rule(struct mlx5e_flow_steering *fs, + enum mlx5_traffic_types ttc_type, + u32 tir_num, u16 d_port) +{ + struct mlx5e_fs_udp *fs_udp = mlx5e_fs_get_udp(fs); + enum fs_udp_type type = tt2fs_udp(ttc_type); + struct mlx5_flow_destination dest = {}; + struct mlx5_flow_table *ft = NULL; + MLX5_DECLARE_FLOW_ACT(flow_act); + struct mlx5_flow_handle *rule; + struct mlx5_flow_spec *spec; + int err; + + if (type == FS_UDP_NUM_TYPES) + return ERR_PTR(-EINVAL); + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) + return ERR_PTR(-ENOMEM); + + ft = fs_udp->tables[type].t; + + fs_udp_set_dport_flow(spec, type, d_port); + dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR; + dest.tir_num = tir_num; + + rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1); + kvfree(spec); + + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + fs_err(fs, "%s: add %s rule failed, err %d\n", + __func__, fs_udp_type2str(type), err); + } + return rule; +} + +static int fs_udp_add_default_rule(struct mlx5e_flow_steering *fs, enum fs_udp_type type) +{ + struct mlx5_ttc_table *ttc = mlx5e_fs_get_ttc(fs, false); + struct mlx5e_fs_udp *fs_udp = mlx5e_fs_get_udp(fs); + struct mlx5e_flow_table *fs_udp_t; + struct mlx5_flow_destination dest; + MLX5_DECLARE_FLOW_ACT(flow_act); + struct mlx5_flow_handle *rule; + int err; + + fs_udp_t = &fs_udp->tables[type]; + + dest = mlx5_ttc_get_default_dest(ttc, fs_udp2tt(type)); + rule = mlx5_add_flow_rules(fs_udp_t->t, NULL, &flow_act, &dest, 1); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + fs_err(fs, "%s: add default rule failed, fs type=%d, err %d\n", + __func__, type, err); + return err; + } + + fs_udp->default_rules[type] = rule; + return 0; +} + +#define MLX5E_FS_UDP_NUM_GROUPS (2) +#define MLX5E_FS_UDP_GROUP1_SIZE (BIT(16)) +#define MLX5E_FS_UDP_GROUP2_SIZE (BIT(0)) +#define MLX5E_FS_UDP_TABLE_SIZE (MLX5E_FS_UDP_GROUP1_SIZE +\ + MLX5E_FS_UDP_GROUP2_SIZE) +static int fs_udp_create_groups(struct mlx5e_flow_table *ft, enum fs_udp_type type) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + void *outer_headers_c; + int ix = 0; + u32 *in; + int err; + u8 *mc; + + ft->g = kcalloc(MLX5E_FS_UDP_NUM_GROUPS, sizeof(*ft->g), GFP_KERNEL); + in = kvzalloc(inlen, GFP_KERNEL); + if (!in || !ft->g) { + kfree(ft->g); + ft->g = NULL; + kvfree(in); + return -ENOMEM; + } + + mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria); + outer_headers_c = MLX5_ADDR_OF(fte_match_param, mc, outer_headers); + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, ip_protocol); + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, ip_version); + + switch (type) { + case FS_IPV4_UDP: + case FS_IPV6_UDP: + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, udp_dport); + break; + default: + err = -EINVAL; + goto out; + } + /* Match on udp protocol, Ipv4/6 and dport */ + MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5E_FS_UDP_GROUP1_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); + if (IS_ERR(ft->g[ft->num_groups])) + goto err; + ft->num_groups++; + + /* Default Flow Group */ + memset(in, 0, inlen); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5E_FS_UDP_GROUP2_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); + if (IS_ERR(ft->g[ft->num_groups])) + goto err; + ft->num_groups++; + + kvfree(in); + return 0; + +err: + err = PTR_ERR(ft->g[ft->num_groups]); + ft->g[ft->num_groups] = NULL; +out: + kvfree(in); + + return err; +} + +static int fs_udp_create_table(struct mlx5e_flow_steering *fs, enum fs_udp_type type) +{ + struct mlx5_flow_namespace *ns = mlx5e_fs_get_ns(fs, false); + struct mlx5e_fs_udp *fs_udp = mlx5e_fs_get_udp(fs); + struct mlx5_flow_table_attr ft_attr = {}; + struct mlx5e_flow_table *ft; + int err; + + ft = &fs_udp->tables[type]; + ft->num_groups = 0; + + ft_attr.max_fte = MLX5E_FS_UDP_TABLE_SIZE; + ft_attr.level = MLX5E_FS_TT_UDP_FT_LEVEL; + ft_attr.prio = MLX5E_NIC_PRIO; + + ft->t = mlx5_create_flow_table(ns, &ft_attr); + if (IS_ERR(ft->t)) { + err = PTR_ERR(ft->t); + ft->t = NULL; + return err; + } + + mlx5_core_dbg(mlx5e_fs_get_mdev(fs), "Created fs %s table id %u level %u\n", + fs_udp_type2str(type), ft->t->id, ft->t->level); + + err = fs_udp_create_groups(ft, type); + if (err) + goto err; + + err = fs_udp_add_default_rule(fs, type); + if (err) + goto err; + + return 0; + +err: + mlx5e_destroy_flow_table(ft); + return err; +} + +static void fs_udp_destroy_table(struct mlx5e_fs_udp *fs_udp, int i) +{ + if (IS_ERR_OR_NULL(fs_udp->tables[i].t)) + return; + + mlx5_del_flow_rules(fs_udp->default_rules[i]); + mlx5e_destroy_flow_table(&fs_udp->tables[i]); + fs_udp->tables[i].t = NULL; +} + +static int fs_udp_disable(struct mlx5e_flow_steering *fs) +{ + struct mlx5_ttc_table *ttc = mlx5e_fs_get_ttc(fs, false); + int err, i; + + for (i = 0; i < FS_UDP_NUM_TYPES; i++) { + /* Modify ttc rules destination to point back to the indir TIRs */ + err = mlx5_ttc_fwd_default_dest(ttc, fs_udp2tt(i)); + if (err) { + fs_err(fs, "%s: modify ttc[%d] default destination failed, err(%d)\n", + __func__, fs_udp2tt(i), err); + return err; + } + } + + return 0; +} + +static int fs_udp_enable(struct mlx5e_flow_steering *fs) +{ + struct mlx5_ttc_table *ttc = mlx5e_fs_get_ttc(fs, false); + struct mlx5e_fs_udp *udp = mlx5e_fs_get_udp(fs); + struct mlx5_flow_destination dest = {}; + int err, i; + + dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + for (i = 0; i < FS_UDP_NUM_TYPES; i++) { + dest.ft = udp->tables[i].t; + + /* Modify ttc rules destination to point on the accel_fs FTs */ + err = mlx5_ttc_fwd_dest(ttc, fs_udp2tt(i), &dest); + if (err) { + fs_err(fs, "%s: modify ttc[%d] destination to accel failed, err(%d)\n", + __func__, fs_udp2tt(i), err); + return err; + } + } + return 0; +} + +void mlx5e_fs_tt_redirect_udp_destroy(struct mlx5e_flow_steering *fs) +{ + struct mlx5e_fs_udp *fs_udp = mlx5e_fs_get_udp(fs); + int i; + + if (!fs_udp) + return; + + if (--fs_udp->ref_cnt) + return; + + fs_udp_disable(fs); + + for (i = 0; i < FS_UDP_NUM_TYPES; i++) + fs_udp_destroy_table(fs_udp, i); + + kfree(fs_udp); + mlx5e_fs_set_udp(fs, NULL); +} + +int mlx5e_fs_tt_redirect_udp_create(struct mlx5e_flow_steering *fs) +{ + struct mlx5e_fs_udp *udp = mlx5e_fs_get_udp(fs); + int i, err; + + if (udp) { + udp->ref_cnt++; + return 0; + } + + udp = kzalloc(sizeof(*udp), GFP_KERNEL); + if (!udp) + return -ENOMEM; + mlx5e_fs_set_udp(fs, udp); + + for (i = 0; i < FS_UDP_NUM_TYPES; i++) { + err = fs_udp_create_table(fs, i); + if (err) + goto err_destroy_tables; + } + + err = fs_udp_enable(fs); + if (err) + goto err_destroy_tables; + + udp->ref_cnt = 1; + + return 0; + +err_destroy_tables: + while (--i >= 0) + fs_udp_destroy_table(udp, i); + + kfree(udp); + mlx5e_fs_set_udp(fs, NULL); + return err; +} + +static void fs_any_set_ethertype_flow(struct mlx5_flow_spec *spec, u16 ether_type) +{ + spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ethertype); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.ethertype, ether_type); +} + +struct mlx5_flow_handle * +mlx5e_fs_tt_redirect_any_add_rule(struct mlx5e_flow_steering *fs, + u32 tir_num, u16 ether_type) +{ + struct mlx5e_fs_any *fs_any = mlx5e_fs_get_any(fs); + struct mlx5_flow_destination dest = {}; + struct mlx5_flow_table *ft = NULL; + MLX5_DECLARE_FLOW_ACT(flow_act); + struct mlx5_flow_handle *rule; + struct mlx5_flow_spec *spec; + int err; + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) + return ERR_PTR(-ENOMEM); + + ft = fs_any->table.t; + + fs_any_set_ethertype_flow(spec, ether_type); + dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR; + dest.tir_num = tir_num; + + rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1); + kvfree(spec); + + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + fs_err(fs, "%s: add ANY rule failed, err %d\n", + __func__, err); + } + return rule; +} + +static int fs_any_add_default_rule(struct mlx5e_flow_steering *fs) +{ + struct mlx5_ttc_table *ttc = mlx5e_fs_get_ttc(fs, false); + struct mlx5e_fs_any *fs_any = mlx5e_fs_get_any(fs); + struct mlx5e_flow_table *fs_any_t; + struct mlx5_flow_destination dest; + MLX5_DECLARE_FLOW_ACT(flow_act); + struct mlx5_flow_handle *rule; + int err; + + fs_any_t = &fs_any->table; + dest = mlx5_ttc_get_default_dest(ttc, MLX5_TT_ANY); + rule = mlx5_add_flow_rules(fs_any_t->t, NULL, &flow_act, &dest, 1); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + fs_err(fs, "%s: add default rule failed, fs type=ANY, err %d\n", + __func__, err); + return err; + } + + fs_any->default_rule = rule; + return 0; +} + +#define MLX5E_FS_ANY_NUM_GROUPS (2) +#define MLX5E_FS_ANY_GROUP1_SIZE (BIT(16)) +#define MLX5E_FS_ANY_GROUP2_SIZE (BIT(0)) +#define MLX5E_FS_ANY_TABLE_SIZE (MLX5E_FS_ANY_GROUP1_SIZE +\ + MLX5E_FS_ANY_GROUP2_SIZE) + +static int fs_any_create_groups(struct mlx5e_flow_table *ft) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + void *outer_headers_c; + int ix = 0; + u32 *in; + int err; + u8 *mc; + + ft->g = kcalloc(MLX5E_FS_UDP_NUM_GROUPS, sizeof(*ft->g), GFP_KERNEL); + in = kvzalloc(inlen, GFP_KERNEL); + if (!in || !ft->g) { + kfree(ft->g); + ft->g = NULL; + kvfree(in); + return -ENOMEM; + } + + /* Match on ethertype */ + mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria); + outer_headers_c = MLX5_ADDR_OF(fte_match_param, mc, outer_headers); + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, ethertype); + MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5E_FS_ANY_GROUP1_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); + if (IS_ERR(ft->g[ft->num_groups])) + goto err; + ft->num_groups++; + + /* Default Flow Group */ + memset(in, 0, inlen); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5E_FS_ANY_GROUP2_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); + if (IS_ERR(ft->g[ft->num_groups])) + goto err; + ft->num_groups++; + + kvfree(in); + return 0; + +err: + err = PTR_ERR(ft->g[ft->num_groups]); + ft->g[ft->num_groups] = NULL; + kvfree(in); + + return err; +} + +static int fs_any_create_table(struct mlx5e_flow_steering *fs) +{ + struct mlx5_flow_namespace *ns = mlx5e_fs_get_ns(fs, false); + struct mlx5e_fs_any *fs_any = mlx5e_fs_get_any(fs); + struct mlx5e_flow_table *ft = &fs_any->table; + struct mlx5_flow_table_attr ft_attr = {}; + int err; + + ft->num_groups = 0; + + ft_attr.max_fte = MLX5E_FS_UDP_TABLE_SIZE; + ft_attr.level = MLX5E_FS_TT_ANY_FT_LEVEL; + ft_attr.prio = MLX5E_NIC_PRIO; + + ft->t = mlx5_create_flow_table(ns, &ft_attr); + if (IS_ERR(ft->t)) { + err = PTR_ERR(ft->t); + ft->t = NULL; + return err; + } + + mlx5_core_dbg(mlx5e_fs_get_mdev(fs), "Created fs ANY table id %u level %u\n", + ft->t->id, ft->t->level); + + err = fs_any_create_groups(ft); + if (err) + goto err; + + err = fs_any_add_default_rule(fs); + if (err) + goto err; + + return 0; + +err: + mlx5e_destroy_flow_table(ft); + return err; +} + +static int fs_any_disable(struct mlx5e_flow_steering *fs) +{ + struct mlx5_ttc_table *ttc = mlx5e_fs_get_ttc(fs, false); + int err; + + /* Modify ttc rules destination to point back to the indir TIRs */ + err = mlx5_ttc_fwd_default_dest(ttc, MLX5_TT_ANY); + if (err) { + fs_err(fs, + "%s: modify ttc[%d] default destination failed, err(%d)\n", + __func__, MLX5_TT_ANY, err); + return err; + } + return 0; +} + +static int fs_any_enable(struct mlx5e_flow_steering *fs) +{ + struct mlx5_ttc_table *ttc = mlx5e_fs_get_ttc(fs, false); + struct mlx5e_fs_any *any = mlx5e_fs_get_any(fs); + struct mlx5_flow_destination dest = {}; + int err; + + dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dest.ft = any->table.t; + + /* Modify ttc rules destination to point on the accel_fs FTs */ + err = mlx5_ttc_fwd_dest(ttc, MLX5_TT_ANY, &dest); + if (err) { + fs_err(fs, + "%s: modify ttc[%d] destination to accel failed, err(%d)\n", + __func__, MLX5_TT_ANY, err); + return err; + } + return 0; +} + +static void fs_any_destroy_table(struct mlx5e_fs_any *fs_any) +{ + if (IS_ERR_OR_NULL(fs_any->table.t)) + return; + + mlx5_del_flow_rules(fs_any->default_rule); + mlx5e_destroy_flow_table(&fs_any->table); + fs_any->table.t = NULL; +} + +void mlx5e_fs_tt_redirect_any_destroy(struct mlx5e_flow_steering *fs) +{ + struct mlx5e_fs_any *fs_any = mlx5e_fs_get_any(fs); + + if (!fs_any) + return; + + if (--fs_any->ref_cnt) + return; + + fs_any_disable(fs); + + fs_any_destroy_table(fs_any); + + kfree(fs_any); + mlx5e_fs_set_any(fs, NULL); +} + +int mlx5e_fs_tt_redirect_any_create(struct mlx5e_flow_steering *fs) +{ + struct mlx5e_fs_any *fs_any = mlx5e_fs_get_any(fs); + int err; + + if (fs_any) { + fs_any->ref_cnt++; + return 0; + } + + fs_any = kzalloc(sizeof(*fs_any), GFP_KERNEL); + if (!fs_any) + return -ENOMEM; + mlx5e_fs_set_any(fs, fs_any); + + err = fs_any_create_table(fs); + if (err) + goto err_free_any; + + err = fs_any_enable(fs); + if (err) + goto err_destroy_table; + + fs_any->ref_cnt = 1; + + return 0; + +err_destroy_table: + fs_any_destroy_table(fs_any); +err_free_any: + mlx5e_fs_set_any(fs, NULL); + kfree(fs_any); + return err; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.h b/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.h new file mode 100644 index 000000000..5780fd7ad --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2021 Mellanox Technologies. */ + +#ifndef __MLX5E_FS_TT_REDIRECT_H__ +#define __MLX5E_FS_TT_REDIRECT_H__ + +#include "en/fs.h" + +void mlx5e_fs_tt_redirect_del_rule(struct mlx5_flow_handle *rule); + +/* UDP traffic type redirect */ +struct mlx5_flow_handle * +mlx5e_fs_tt_redirect_udp_add_rule(struct mlx5e_flow_steering *fs, + enum mlx5_traffic_types ttc_type, + u32 tir_num, u16 d_port); +void mlx5e_fs_tt_redirect_udp_destroy(struct mlx5e_flow_steering *fs); +int mlx5e_fs_tt_redirect_udp_create(struct mlx5e_flow_steering *fs); + +/* ANY traffic type redirect*/ +struct mlx5_flow_handle * +mlx5e_fs_tt_redirect_any_add_rule(struct mlx5e_flow_steering *fs, + u32 tir_num, u16 ether_type); +void mlx5e_fs_tt_redirect_any_destroy(struct mlx5e_flow_steering *fs); +int mlx5e_fs_tt_redirect_any_create(struct mlx5e_flow_steering *fs); +#endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/health.c b/drivers/net/ethernet/mellanox/mlx5/core/en/health.c new file mode 100644 index 000000000..6f4e6c34b --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/health.c @@ -0,0 +1,339 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2019 Mellanox Technologies. + +#include "health.h" +#include "lib/eq.h" +#include "lib/mlx5.h" + +int mlx5e_health_fmsg_named_obj_nest_start(struct devlink_fmsg *fmsg, char *name) +{ + int err; + + err = devlink_fmsg_pair_nest_start(fmsg, name); + if (err) + return err; + + err = devlink_fmsg_obj_nest_start(fmsg); + if (err) + return err; + + return 0; +} + +int mlx5e_health_fmsg_named_obj_nest_end(struct devlink_fmsg *fmsg) +{ + int err; + + err = devlink_fmsg_obj_nest_end(fmsg); + if (err) + return err; + + err = devlink_fmsg_pair_nest_end(fmsg); + if (err) + return err; + + return 0; +} + +int mlx5e_health_cq_diag_fmsg(struct mlx5e_cq *cq, struct devlink_fmsg *fmsg) +{ + u32 out[MLX5_ST_SZ_DW(query_cq_out)] = {}; + u8 hw_status; + void *cqc; + int err; + + err = mlx5_core_query_cq(cq->mdev, &cq->mcq, out); + if (err) + return err; + + cqc = MLX5_ADDR_OF(query_cq_out, out, cq_context); + hw_status = MLX5_GET(cqc, cqc, status); + + err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "CQ"); + if (err) + return err; + + err = devlink_fmsg_u32_pair_put(fmsg, "cqn", cq->mcq.cqn); + if (err) + return err; + + err = devlink_fmsg_u8_pair_put(fmsg, "HW status", hw_status); + if (err) + return err; + + err = devlink_fmsg_u32_pair_put(fmsg, "ci", mlx5_cqwq_get_ci(&cq->wq)); + if (err) + return err; + + err = devlink_fmsg_u32_pair_put(fmsg, "size", mlx5_cqwq_get_size(&cq->wq)); + if (err) + return err; + + err = mlx5e_health_fmsg_named_obj_nest_end(fmsg); + if (err) + return err; + + return 0; +} + +int mlx5e_health_cq_common_diag_fmsg(struct mlx5e_cq *cq, struct devlink_fmsg *fmsg) +{ + u8 cq_log_stride; + u32 cq_sz; + int err; + + cq_sz = mlx5_cqwq_get_size(&cq->wq); + cq_log_stride = mlx5_cqwq_get_log_stride_size(&cq->wq); + + err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "CQ"); + if (err) + return err; + + err = devlink_fmsg_u64_pair_put(fmsg, "stride size", BIT(cq_log_stride)); + if (err) + return err; + + err = devlink_fmsg_u32_pair_put(fmsg, "size", cq_sz); + if (err) + return err; + + err = mlx5e_health_fmsg_named_obj_nest_end(fmsg); + if (err) + return err; + + return 0; +} + +int mlx5e_health_eq_diag_fmsg(struct mlx5_eq_comp *eq, struct devlink_fmsg *fmsg) +{ + int err; + + err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "EQ"); + if (err) + return err; + + err = devlink_fmsg_u8_pair_put(fmsg, "eqn", eq->core.eqn); + if (err) + return err; + + err = devlink_fmsg_u32_pair_put(fmsg, "irqn", eq->core.irqn); + if (err) + return err; + + err = devlink_fmsg_u32_pair_put(fmsg, "vecidx", eq->core.vecidx); + if (err) + return err; + + err = devlink_fmsg_u32_pair_put(fmsg, "ci", eq->core.cons_index); + if (err) + return err; + + err = devlink_fmsg_u32_pair_put(fmsg, "size", eq_get_size(&eq->core)); + if (err) + return err; + + return mlx5e_health_fmsg_named_obj_nest_end(fmsg); +} + +void mlx5e_health_create_reporters(struct mlx5e_priv *priv) +{ + mlx5e_reporter_tx_create(priv); + mlx5e_reporter_rx_create(priv); +} + +void mlx5e_health_destroy_reporters(struct mlx5e_priv *priv) +{ + mlx5e_reporter_rx_destroy(priv); + mlx5e_reporter_tx_destroy(priv); +} + +void mlx5e_health_channels_update(struct mlx5e_priv *priv) +{ + if (priv->tx_reporter) + devlink_health_reporter_state_update(priv->tx_reporter, + DEVLINK_HEALTH_REPORTER_STATE_HEALTHY); + if (priv->rx_reporter) + devlink_health_reporter_state_update(priv->rx_reporter, + DEVLINK_HEALTH_REPORTER_STATE_HEALTHY); +} + +int mlx5e_health_sq_to_ready(struct mlx5_core_dev *mdev, struct net_device *dev, u32 sqn) +{ + struct mlx5e_modify_sq_param msp = {}; + int err; + + msp.curr_state = MLX5_SQC_STATE_ERR; + msp.next_state = MLX5_SQC_STATE_RST; + + err = mlx5e_modify_sq(mdev, sqn, &msp); + if (err) { + netdev_err(dev, "Failed to move sq 0x%x to reset\n", sqn); + return err; + } + + memset(&msp, 0, sizeof(msp)); + msp.curr_state = MLX5_SQC_STATE_RST; + msp.next_state = MLX5_SQC_STATE_RDY; + + err = mlx5e_modify_sq(mdev, sqn, &msp); + if (err) { + netdev_err(dev, "Failed to move sq 0x%x to ready\n", sqn); + return err; + } + + return 0; +} + +int mlx5e_health_recover_channels(struct mlx5e_priv *priv) +{ + int err = 0; + + rtnl_lock(); + mutex_lock(&priv->state_lock); + + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) + goto out; + + err = mlx5e_safe_reopen_channels(priv); + +out: + mutex_unlock(&priv->state_lock); + rtnl_unlock(); + + return err; +} + +int mlx5e_health_channel_eq_recover(struct net_device *dev, struct mlx5_eq_comp *eq, + struct mlx5e_ch_stats *stats) +{ + u32 eqe_count; + + netdev_err(dev, "EQ 0x%x: Cons = 0x%x, irqn = 0x%x\n", + eq->core.eqn, eq->core.cons_index, eq->core.irqn); + + eqe_count = mlx5_eq_poll_irq_disabled(eq); + if (!eqe_count) + return -EIO; + + netdev_err(dev, "Recovered %d eqes on EQ 0x%x\n", + eqe_count, eq->core.eqn); + + stats->eq_rearm++; + return 0; +} + +int mlx5e_health_report(struct mlx5e_priv *priv, + struct devlink_health_reporter *reporter, char *err_str, + struct mlx5e_err_ctx *err_ctx) +{ + netdev_err(priv->netdev, "%s\n", err_str); + + if (!reporter) + return err_ctx->recover(err_ctx->ctx); + + return devlink_health_report(reporter, err_str, err_ctx); +} + +#define MLX5_HEALTH_DEVLINK_MAX_SIZE 1024 +static int mlx5e_health_rsc_fmsg_binary(struct devlink_fmsg *fmsg, + const void *value, u32 value_len) + +{ + u32 data_size; + int err = 0; + u32 offset; + + for (offset = 0; offset < value_len; offset += data_size) { + data_size = value_len - offset; + if (data_size > MLX5_HEALTH_DEVLINK_MAX_SIZE) + data_size = MLX5_HEALTH_DEVLINK_MAX_SIZE; + err = devlink_fmsg_binary_put(fmsg, value + offset, data_size); + if (err) + break; + } + return err; +} + +int mlx5e_health_rsc_fmsg_dump(struct mlx5e_priv *priv, struct mlx5_rsc_key *key, + struct devlink_fmsg *fmsg) +{ + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5_rsc_dump_cmd *cmd; + struct page *page; + int cmd_err, err; + int end_err; + int size; + + if (IS_ERR_OR_NULL(mdev->rsc_dump)) + return -EOPNOTSUPP; + + page = alloc_page(GFP_KERNEL); + if (!page) + return -ENOMEM; + + err = devlink_fmsg_binary_pair_nest_start(fmsg, "data"); + if (err) + goto free_page; + + cmd = mlx5_rsc_dump_cmd_create(mdev, key); + if (IS_ERR(cmd)) { + err = PTR_ERR(cmd); + goto free_page; + } + + do { + cmd_err = mlx5_rsc_dump_next(mdev, cmd, page, &size); + if (cmd_err < 0) { + err = cmd_err; + goto destroy_cmd; + } + + err = mlx5e_health_rsc_fmsg_binary(fmsg, page_address(page), size); + if (err) + goto destroy_cmd; + + } while (cmd_err > 0); + +destroy_cmd: + mlx5_rsc_dump_cmd_destroy(cmd); + end_err = devlink_fmsg_binary_pair_nest_end(fmsg); + if (end_err) + err = end_err; +free_page: + __free_page(page); + return err; +} + +int mlx5e_health_queue_dump(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg, + int queue_idx, char *lbl) +{ + struct mlx5_rsc_key key = {}; + int err; + + key.rsc = MLX5_SGMT_TYPE_FULL_QPC; + key.index1 = queue_idx; + key.size = PAGE_SIZE; + key.num_of_obj1 = 1; + + err = devlink_fmsg_obj_nest_start(fmsg); + if (err) + return err; + + err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, lbl); + if (err) + return err; + + err = devlink_fmsg_u32_pair_put(fmsg, "index", queue_idx); + if (err) + return err; + + err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg); + if (err) + return err; + + err = mlx5e_health_fmsg_named_obj_nest_end(fmsg); + if (err) + return err; + + return devlink_fmsg_obj_nest_end(fmsg); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/health.h b/drivers/net/ethernet/mellanox/mlx5/core/en/health.h new file mode 100644 index 000000000..0107e4e73 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/health.h @@ -0,0 +1,58 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2019 Mellanox Technologies. */ + +#ifndef __MLX5E_EN_HEALTH_H +#define __MLX5E_EN_HEALTH_H + +#include "en.h" +#include "diag/rsc_dump.h" + +static inline bool cqe_syndrome_needs_recover(u8 syndrome) +{ + return syndrome == MLX5_CQE_SYNDROME_LOCAL_QP_OP_ERR || + syndrome == MLX5_CQE_SYNDROME_LOCAL_PROT_ERR || + syndrome == MLX5_CQE_SYNDROME_WR_FLUSH_ERR; +} + +void mlx5e_reporter_tx_create(struct mlx5e_priv *priv); +void mlx5e_reporter_tx_destroy(struct mlx5e_priv *priv); +void mlx5e_reporter_tx_err_cqe(struct mlx5e_txqsq *sq); +int mlx5e_reporter_tx_timeout(struct mlx5e_txqsq *sq); + +int mlx5e_health_cq_diag_fmsg(struct mlx5e_cq *cq, struct devlink_fmsg *fmsg); +int mlx5e_health_cq_common_diag_fmsg(struct mlx5e_cq *cq, struct devlink_fmsg *fmsg); +int mlx5e_health_eq_diag_fmsg(struct mlx5_eq_comp *eq, struct devlink_fmsg *fmsg); +int mlx5e_health_fmsg_named_obj_nest_start(struct devlink_fmsg *fmsg, char *name); +int mlx5e_health_fmsg_named_obj_nest_end(struct devlink_fmsg *fmsg); + +void mlx5e_reporter_rx_create(struct mlx5e_priv *priv); +void mlx5e_reporter_rx_destroy(struct mlx5e_priv *priv); +void mlx5e_reporter_icosq_cqe_err(struct mlx5e_icosq *icosq); +void mlx5e_reporter_rq_cqe_err(struct mlx5e_rq *rq); +void mlx5e_reporter_rx_timeout(struct mlx5e_rq *rq); +void mlx5e_reporter_icosq_suspend_recovery(struct mlx5e_channel *c); +void mlx5e_reporter_icosq_resume_recovery(struct mlx5e_channel *c); + +#define MLX5E_REPORTER_PER_Q_MAX_LEN 256 + +struct mlx5e_err_ctx { + int (*recover)(void *ctx); + int (*dump)(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg, void *ctx); + void *ctx; +}; + +int mlx5e_health_sq_to_ready(struct mlx5_core_dev *mdev, struct net_device *dev, u32 sqn); +int mlx5e_health_channel_eq_recover(struct net_device *dev, struct mlx5_eq_comp *eq, + struct mlx5e_ch_stats *stats); +int mlx5e_health_recover_channels(struct mlx5e_priv *priv); +int mlx5e_health_report(struct mlx5e_priv *priv, + struct devlink_health_reporter *reporter, char *err_str, + struct mlx5e_err_ctx *err_ctx); +void mlx5e_health_create_reporters(struct mlx5e_priv *priv); +void mlx5e_health_destroy_reporters(struct mlx5e_priv *priv); +void mlx5e_health_channels_update(struct mlx5e_priv *priv); +int mlx5e_health_rsc_fmsg_dump(struct mlx5e_priv *priv, struct mlx5_rsc_key *key, + struct devlink_fmsg *fmsg); +int mlx5e_health_queue_dump(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg, + int queue_idx, char *lbl); +#endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/htb.c b/drivers/net/ethernet/mellanox/mlx5/core/en/htb.c new file mode 100644 index 000000000..09d441ecb --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/htb.c @@ -0,0 +1,722 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#include <net/pkt_cls.h> +#include "htb.h" +#include "en.h" +#include "../qos.h" + +struct mlx5e_qos_node { + struct hlist_node hnode; + struct mlx5e_qos_node *parent; + u64 rate; + u32 bw_share; + u32 max_average_bw; + u32 hw_id; + u32 classid; /* 16-bit, except root. */ + u16 qid; +}; + +struct mlx5e_htb { + DECLARE_HASHTABLE(qos_tc2node, order_base_2(MLX5E_QOS_MAX_LEAF_NODES)); + DECLARE_BITMAP(qos_used_qids, MLX5E_QOS_MAX_LEAF_NODES); + struct mlx5_core_dev *mdev; + struct net_device *netdev; + struct mlx5e_priv *priv; + struct mlx5e_selq *selq; +}; + +#define MLX5E_QOS_QID_INNER 0xffff +#define MLX5E_HTB_CLASSID_ROOT 0xffffffff + +/* Software representation of the QoS tree */ + +int mlx5e_htb_enumerate_leaves(struct mlx5e_htb *htb, mlx5e_fp_htb_enumerate callback, void *data) +{ + struct mlx5e_qos_node *node = NULL; + int bkt, err; + + hash_for_each(htb->qos_tc2node, bkt, node, hnode) { + if (node->qid == MLX5E_QOS_QID_INNER) + continue; + err = callback(data, node->qid, node->hw_id); + if (err) + return err; + } + return 0; +} + +int mlx5e_htb_cur_leaf_nodes(struct mlx5e_htb *htb) +{ + int last; + + last = find_last_bit(htb->qos_used_qids, mlx5e_qos_max_leaf_nodes(htb->mdev)); + return last == mlx5e_qos_max_leaf_nodes(htb->mdev) ? 0 : last + 1; +} + +static int mlx5e_htb_find_unused_qos_qid(struct mlx5e_htb *htb) +{ + int size = mlx5e_qos_max_leaf_nodes(htb->mdev); + struct mlx5e_priv *priv = htb->priv; + int res; + + WARN_ONCE(!mutex_is_locked(&priv->state_lock), "%s: state_lock is not held\n", __func__); + res = find_first_zero_bit(htb->qos_used_qids, size); + + return res == size ? -ENOSPC : res; +} + +static struct mlx5e_qos_node * +mlx5e_htb_node_create_leaf(struct mlx5e_htb *htb, u16 classid, u16 qid, + struct mlx5e_qos_node *parent) +{ + struct mlx5e_qos_node *node; + + node = kzalloc(sizeof(*node), GFP_KERNEL); + if (!node) + return ERR_PTR(-ENOMEM); + + node->parent = parent; + + node->qid = qid; + __set_bit(qid, htb->qos_used_qids); + + node->classid = classid; + hash_add_rcu(htb->qos_tc2node, &node->hnode, classid); + + mlx5e_update_tx_netdev_queues(htb->priv); + + return node; +} + +static struct mlx5e_qos_node *mlx5e_htb_node_create_root(struct mlx5e_htb *htb) +{ + struct mlx5e_qos_node *node; + + node = kzalloc(sizeof(*node), GFP_KERNEL); + if (!node) + return ERR_PTR(-ENOMEM); + + node->qid = MLX5E_QOS_QID_INNER; + node->classid = MLX5E_HTB_CLASSID_ROOT; + hash_add_rcu(htb->qos_tc2node, &node->hnode, node->classid); + + return node; +} + +static struct mlx5e_qos_node *mlx5e_htb_node_find(struct mlx5e_htb *htb, u32 classid) +{ + struct mlx5e_qos_node *node = NULL; + + hash_for_each_possible(htb->qos_tc2node, node, hnode, classid) { + if (node->classid == classid) + break; + } + + return node; +} + +static struct mlx5e_qos_node *mlx5e_htb_node_find_rcu(struct mlx5e_htb *htb, u32 classid) +{ + struct mlx5e_qos_node *node = NULL; + + hash_for_each_possible_rcu(htb->qos_tc2node, node, hnode, classid) { + if (node->classid == classid) + break; + } + + return node; +} + +static void mlx5e_htb_node_delete(struct mlx5e_htb *htb, struct mlx5e_qos_node *node) +{ + hash_del_rcu(&node->hnode); + if (node->qid != MLX5E_QOS_QID_INNER) { + __clear_bit(node->qid, htb->qos_used_qids); + mlx5e_update_tx_netdev_queues(htb->priv); + } + /* Make sure this qid is no longer selected by mlx5e_select_queue, so + * that mlx5e_reactivate_qos_sq can safely restart the netdev TX queue. + */ + synchronize_net(); + kfree(node); +} + +/* TX datapath API */ + +int mlx5e_htb_get_txq_by_classid(struct mlx5e_htb *htb, u16 classid) +{ + struct mlx5e_qos_node *node; + u16 qid; + int res; + + rcu_read_lock(); + + node = mlx5e_htb_node_find_rcu(htb, classid); + if (!node) { + res = -ENOENT; + goto out; + } + qid = READ_ONCE(node->qid); + if (qid == MLX5E_QOS_QID_INNER) { + res = -EINVAL; + goto out; + } + res = mlx5e_qid_from_qos(&htb->priv->channels, qid); + +out: + rcu_read_unlock(); + return res; +} + +/* HTB TC handlers */ + +static int +mlx5e_htb_root_add(struct mlx5e_htb *htb, u16 htb_maj_id, u16 htb_defcls, + struct netlink_ext_ack *extack) +{ + struct mlx5e_priv *priv = htb->priv; + struct mlx5e_qos_node *root; + bool opened; + int err; + + qos_dbg(htb->mdev, "TC_HTB_CREATE handle %04x:, default :%04x\n", htb_maj_id, htb_defcls); + + mlx5e_selq_prepare_htb(htb->selq, htb_maj_id, htb_defcls); + + opened = test_bit(MLX5E_STATE_OPENED, &priv->state); + if (opened) { + err = mlx5e_qos_alloc_queues(priv, &priv->channels); + if (err) + goto err_cancel_selq; + } + + root = mlx5e_htb_node_create_root(htb); + if (IS_ERR(root)) { + err = PTR_ERR(root); + goto err_free_queues; + } + + err = mlx5_qos_create_root_node(htb->mdev, &root->hw_id); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Firmware error. Try upgrading firmware."); + goto err_sw_node_delete; + } + + mlx5e_selq_apply(htb->selq); + + return 0; + +err_sw_node_delete: + mlx5e_htb_node_delete(htb, root); + +err_free_queues: + if (opened) + mlx5e_qos_close_all_queues(&priv->channels); +err_cancel_selq: + mlx5e_selq_cancel(htb->selq); + return err; +} + +static int mlx5e_htb_root_del(struct mlx5e_htb *htb) +{ + struct mlx5e_priv *priv = htb->priv; + struct mlx5e_qos_node *root; + int err; + + qos_dbg(htb->mdev, "TC_HTB_DESTROY\n"); + + /* Wait until real_num_tx_queues is updated for mlx5e_select_queue, + * so that we can safely switch to its non-HTB non-PTP fastpath. + */ + synchronize_net(); + + mlx5e_selq_prepare_htb(htb->selq, 0, 0); + mlx5e_selq_apply(htb->selq); + + root = mlx5e_htb_node_find(htb, MLX5E_HTB_CLASSID_ROOT); + if (!root) { + qos_err(htb->mdev, "Failed to find the root node in the QoS tree\n"); + return -ENOENT; + } + err = mlx5_qos_destroy_node(htb->mdev, root->hw_id); + if (err) + qos_err(htb->mdev, "Failed to destroy root node %u, err = %d\n", + root->hw_id, err); + mlx5e_htb_node_delete(htb, root); + + mlx5e_qos_deactivate_all_queues(&priv->channels); + mlx5e_qos_close_all_queues(&priv->channels); + + return err; +} + +static int mlx5e_htb_convert_rate(struct mlx5e_htb *htb, u64 rate, + struct mlx5e_qos_node *parent, u32 *bw_share) +{ + u64 share = 0; + + while (parent->classid != MLX5E_HTB_CLASSID_ROOT && !parent->max_average_bw) + parent = parent->parent; + + if (parent->max_average_bw) + share = div64_u64(div_u64(rate * 100, BYTES_IN_MBIT), + parent->max_average_bw); + else + share = 101; + + *bw_share = share == 0 ? 1 : share > 100 ? 0 : share; + + qos_dbg(htb->mdev, "Convert: rate %llu, parent ceil %llu -> bw_share %u\n", + rate, (u64)parent->max_average_bw * BYTES_IN_MBIT, *bw_share); + + return 0; +} + +static void mlx5e_htb_convert_ceil(struct mlx5e_htb *htb, u64 ceil, u32 *max_average_bw) +{ + /* Hardware treats 0 as "unlimited", set at least 1. */ + *max_average_bw = max_t(u32, div_u64(ceil, BYTES_IN_MBIT), 1); + + qos_dbg(htb->mdev, "Convert: ceil %llu -> max_average_bw %u\n", + ceil, *max_average_bw); +} + +int +mlx5e_htb_leaf_alloc_queue(struct mlx5e_htb *htb, u16 classid, + u32 parent_classid, u64 rate, u64 ceil, + struct netlink_ext_ack *extack) +{ + struct mlx5e_qos_node *node, *parent; + struct mlx5e_priv *priv = htb->priv; + int qid; + int err; + + qos_dbg(htb->mdev, "TC_HTB_LEAF_ALLOC_QUEUE classid %04x, parent %04x, rate %llu, ceil %llu\n", + classid, parent_classid, rate, ceil); + + qid = mlx5e_htb_find_unused_qos_qid(htb); + if (qid < 0) { + NL_SET_ERR_MSG_MOD(extack, "Maximum amount of leaf classes is reached."); + return qid; + } + + parent = mlx5e_htb_node_find(htb, parent_classid); + if (!parent) + return -EINVAL; + + node = mlx5e_htb_node_create_leaf(htb, classid, qid, parent); + if (IS_ERR(node)) + return PTR_ERR(node); + + node->rate = rate; + mlx5e_htb_convert_rate(htb, rate, node->parent, &node->bw_share); + mlx5e_htb_convert_ceil(htb, ceil, &node->max_average_bw); + + err = mlx5_qos_create_leaf_node(htb->mdev, node->parent->hw_id, + node->bw_share, node->max_average_bw, + &node->hw_id); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Firmware error when creating a leaf node."); + qos_err(htb->mdev, "Failed to create a leaf node (class %04x), err = %d\n", + classid, err); + mlx5e_htb_node_delete(htb, node); + return err; + } + + if (test_bit(MLX5E_STATE_OPENED, &priv->state)) { + err = mlx5e_open_qos_sq(priv, &priv->channels, node->qid, node->hw_id); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Error creating an SQ."); + qos_warn(htb->mdev, "Failed to create a QoS SQ (class %04x), err = %d\n", + classid, err); + } else { + mlx5e_activate_qos_sq(priv, node->qid, node->hw_id); + } + } + + return mlx5e_qid_from_qos(&priv->channels, node->qid); +} + +int +mlx5e_htb_leaf_to_inner(struct mlx5e_htb *htb, u16 classid, u16 child_classid, + u64 rate, u64 ceil, struct netlink_ext_ack *extack) +{ + struct mlx5e_qos_node *node, *child; + struct mlx5e_priv *priv = htb->priv; + int err, tmp_err; + u32 new_hw_id; + u16 qid; + + qos_dbg(htb->mdev, "TC_HTB_LEAF_TO_INNER classid %04x, upcoming child %04x, rate %llu, ceil %llu\n", + classid, child_classid, rate, ceil); + + node = mlx5e_htb_node_find(htb, classid); + if (!node) + return -ENOENT; + + err = mlx5_qos_create_inner_node(htb->mdev, node->parent->hw_id, + node->bw_share, node->max_average_bw, + &new_hw_id); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Firmware error when creating an inner node."); + qos_err(htb->mdev, "Failed to create an inner node (class %04x), err = %d\n", + classid, err); + return err; + } + + /* Intentionally reuse the qid for the upcoming first child. */ + child = mlx5e_htb_node_create_leaf(htb, child_classid, node->qid, node); + if (IS_ERR(child)) { + err = PTR_ERR(child); + goto err_destroy_hw_node; + } + + child->rate = rate; + mlx5e_htb_convert_rate(htb, rate, node, &child->bw_share); + mlx5e_htb_convert_ceil(htb, ceil, &child->max_average_bw); + + err = mlx5_qos_create_leaf_node(htb->mdev, new_hw_id, child->bw_share, + child->max_average_bw, &child->hw_id); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Firmware error when creating a leaf node."); + qos_err(htb->mdev, "Failed to create a leaf node (class %04x), err = %d\n", + classid, err); + goto err_delete_sw_node; + } + + /* No fail point. */ + + qid = node->qid; + /* Pairs with mlx5e_htb_get_txq_by_classid. */ + WRITE_ONCE(node->qid, MLX5E_QOS_QID_INNER); + + if (test_bit(MLX5E_STATE_OPENED, &priv->state)) { + mlx5e_deactivate_qos_sq(priv, qid); + mlx5e_close_qos_sq(priv, qid); + } + + err = mlx5_qos_destroy_node(htb->mdev, node->hw_id); + if (err) /* Not fatal. */ + qos_warn(htb->mdev, "Failed to destroy leaf node %u (class %04x), err = %d\n", + node->hw_id, classid, err); + + node->hw_id = new_hw_id; + + if (test_bit(MLX5E_STATE_OPENED, &priv->state)) { + err = mlx5e_open_qos_sq(priv, &priv->channels, child->qid, child->hw_id); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Error creating an SQ."); + qos_warn(htb->mdev, "Failed to create a QoS SQ (class %04x), err = %d\n", + classid, err); + } else { + mlx5e_activate_qos_sq(priv, child->qid, child->hw_id); + } + } + + return 0; + +err_delete_sw_node: + child->qid = MLX5E_QOS_QID_INNER; + mlx5e_htb_node_delete(htb, child); + +err_destroy_hw_node: + tmp_err = mlx5_qos_destroy_node(htb->mdev, new_hw_id); + if (tmp_err) /* Not fatal. */ + qos_warn(htb->mdev, "Failed to roll back creation of an inner node %u (class %04x), err = %d\n", + new_hw_id, classid, tmp_err); + return err; +} + +static struct mlx5e_qos_node *mlx5e_htb_node_find_by_qid(struct mlx5e_htb *htb, u16 qid) +{ + struct mlx5e_qos_node *node = NULL; + int bkt; + + hash_for_each(htb->qos_tc2node, bkt, node, hnode) + if (node->qid == qid) + break; + + return node; +} + +int mlx5e_htb_leaf_del(struct mlx5e_htb *htb, u16 *classid, + struct netlink_ext_ack *extack) +{ + struct mlx5e_priv *priv = htb->priv; + struct mlx5e_qos_node *node; + struct netdev_queue *txq; + u16 qid, moved_qid; + bool opened; + int err; + + qos_dbg(htb->mdev, "TC_HTB_LEAF_DEL classid %04x\n", *classid); + + node = mlx5e_htb_node_find(htb, *classid); + if (!node) + return -ENOENT; + + /* Store qid for reuse. */ + qid = node->qid; + + opened = test_bit(MLX5E_STATE_OPENED, &priv->state); + if (opened) { + txq = netdev_get_tx_queue(htb->netdev, + mlx5e_qid_from_qos(&priv->channels, qid)); + mlx5e_deactivate_qos_sq(priv, qid); + mlx5e_close_qos_sq(priv, qid); + } + + err = mlx5_qos_destroy_node(htb->mdev, node->hw_id); + if (err) /* Not fatal. */ + qos_warn(htb->mdev, "Failed to destroy leaf node %u (class %04x), err = %d\n", + node->hw_id, *classid, err); + + mlx5e_htb_node_delete(htb, node); + + moved_qid = mlx5e_htb_cur_leaf_nodes(htb); + + if (moved_qid == 0) { + /* The last QoS SQ was just destroyed. */ + if (opened) + mlx5e_reactivate_qos_sq(priv, qid, txq); + return 0; + } + moved_qid--; + + if (moved_qid < qid) { + /* The highest QoS SQ was just destroyed. */ + WARN(moved_qid != qid - 1, "Gaps in queue numeration: destroyed queue %u, the highest queue is %u", + qid, moved_qid); + if (opened) + mlx5e_reactivate_qos_sq(priv, qid, txq); + return 0; + } + + WARN(moved_qid == qid, "Can't move node with qid %u to itself", qid); + qos_dbg(htb->mdev, "Moving QoS SQ %u to %u\n", moved_qid, qid); + + node = mlx5e_htb_node_find_by_qid(htb, moved_qid); + WARN(!node, "Could not find a node with qid %u to move to queue %u", + moved_qid, qid); + + /* Stop traffic to the old queue. */ + WRITE_ONCE(node->qid, MLX5E_QOS_QID_INNER); + __clear_bit(moved_qid, priv->htb->qos_used_qids); + + if (opened) { + txq = netdev_get_tx_queue(htb->netdev, + mlx5e_qid_from_qos(&priv->channels, moved_qid)); + mlx5e_deactivate_qos_sq(priv, moved_qid); + mlx5e_close_qos_sq(priv, moved_qid); + } + + /* Prevent packets from the old class from getting into the new one. */ + mlx5e_reset_qdisc(htb->netdev, moved_qid); + + __set_bit(qid, htb->qos_used_qids); + WRITE_ONCE(node->qid, qid); + + if (test_bit(MLX5E_STATE_OPENED, &priv->state)) { + err = mlx5e_open_qos_sq(priv, &priv->channels, node->qid, node->hw_id); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Error creating an SQ."); + qos_warn(htb->mdev, "Failed to create a QoS SQ (class %04x) while moving qid %u to %u, err = %d\n", + node->classid, moved_qid, qid, err); + } else { + mlx5e_activate_qos_sq(priv, node->qid, node->hw_id); + } + } + + mlx5e_update_tx_netdev_queues(priv); + if (opened) + mlx5e_reactivate_qos_sq(priv, moved_qid, txq); + + *classid = node->classid; + return 0; +} + +int +mlx5e_htb_leaf_del_last(struct mlx5e_htb *htb, u16 classid, bool force, + struct netlink_ext_ack *extack) +{ + struct mlx5e_qos_node *node, *parent; + struct mlx5e_priv *priv = htb->priv; + u32 old_hw_id, new_hw_id; + int err, saved_err = 0; + u16 qid; + + qos_dbg(htb->mdev, "TC_HTB_LEAF_DEL_LAST%s classid %04x\n", + force ? "_FORCE" : "", classid); + + node = mlx5e_htb_node_find(htb, classid); + if (!node) + return -ENOENT; + + err = mlx5_qos_create_leaf_node(htb->mdev, node->parent->parent->hw_id, + node->parent->bw_share, + node->parent->max_average_bw, + &new_hw_id); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Firmware error when creating a leaf node."); + qos_err(htb->mdev, "Failed to create a leaf node (class %04x), err = %d\n", + classid, err); + if (!force) + return err; + saved_err = err; + } + + /* Store qid for reuse and prevent clearing the bit. */ + qid = node->qid; + /* Pairs with mlx5e_htb_get_txq_by_classid. */ + WRITE_ONCE(node->qid, MLX5E_QOS_QID_INNER); + + if (test_bit(MLX5E_STATE_OPENED, &priv->state)) { + mlx5e_deactivate_qos_sq(priv, qid); + mlx5e_close_qos_sq(priv, qid); + } + + /* Prevent packets from the old class from getting into the new one. */ + mlx5e_reset_qdisc(htb->netdev, qid); + + err = mlx5_qos_destroy_node(htb->mdev, node->hw_id); + if (err) /* Not fatal. */ + qos_warn(htb->mdev, "Failed to destroy leaf node %u (class %04x), err = %d\n", + node->hw_id, classid, err); + + parent = node->parent; + mlx5e_htb_node_delete(htb, node); + + node = parent; + WRITE_ONCE(node->qid, qid); + + /* Early return on error in force mode. Parent will still be an inner + * node to be deleted by a following delete operation. + */ + if (saved_err) + return saved_err; + + old_hw_id = node->hw_id; + node->hw_id = new_hw_id; + + if (test_bit(MLX5E_STATE_OPENED, &priv->state)) { + err = mlx5e_open_qos_sq(priv, &priv->channels, node->qid, node->hw_id); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Error creating an SQ."); + qos_warn(htb->mdev, "Failed to create a QoS SQ (class %04x), err = %d\n", + classid, err); + } else { + mlx5e_activate_qos_sq(priv, node->qid, node->hw_id); + } + } + + err = mlx5_qos_destroy_node(htb->mdev, old_hw_id); + if (err) /* Not fatal. */ + qos_warn(htb->mdev, "Failed to destroy leaf node %u (class %04x), err = %d\n", + node->hw_id, classid, err); + + return 0; +} + +static int +mlx5e_htb_update_children(struct mlx5e_htb *htb, struct mlx5e_qos_node *node, + struct netlink_ext_ack *extack) +{ + struct mlx5e_qos_node *child; + int err = 0; + int bkt; + + hash_for_each(htb->qos_tc2node, bkt, child, hnode) { + u32 old_bw_share = child->bw_share; + int err_one; + + if (child->parent != node) + continue; + + mlx5e_htb_convert_rate(htb, child->rate, node, &child->bw_share); + if (child->bw_share == old_bw_share) + continue; + + err_one = mlx5_qos_update_node(htb->mdev, child->bw_share, + child->max_average_bw, child->hw_id); + if (!err && err_one) { + err = err_one; + + NL_SET_ERR_MSG_MOD(extack, "Firmware error when modifying a child node."); + qos_err(htb->mdev, "Failed to modify a child node (class %04x), err = %d\n", + node->classid, err); + } + } + + return err; +} + +int +mlx5e_htb_node_modify(struct mlx5e_htb *htb, u16 classid, u64 rate, u64 ceil, + struct netlink_ext_ack *extack) +{ + u32 bw_share, max_average_bw; + struct mlx5e_qos_node *node; + bool ceil_changed = false; + int err; + + qos_dbg(htb->mdev, "TC_HTB_LEAF_MODIFY classid %04x, rate %llu, ceil %llu\n", + classid, rate, ceil); + + node = mlx5e_htb_node_find(htb, classid); + if (!node) + return -ENOENT; + + node->rate = rate; + mlx5e_htb_convert_rate(htb, rate, node->parent, &bw_share); + mlx5e_htb_convert_ceil(htb, ceil, &max_average_bw); + + err = mlx5_qos_update_node(htb->mdev, bw_share, + max_average_bw, node->hw_id); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Firmware error when modifying a node."); + qos_err(htb->mdev, "Failed to modify a node (class %04x), err = %d\n", + classid, err); + return err; + } + + if (max_average_bw != node->max_average_bw) + ceil_changed = true; + + node->bw_share = bw_share; + node->max_average_bw = max_average_bw; + + if (ceil_changed) + err = mlx5e_htb_update_children(htb, node, extack); + + return err; +} + +struct mlx5e_htb *mlx5e_htb_alloc(void) +{ + return kvzalloc(sizeof(struct mlx5e_htb), GFP_KERNEL); +} + +void mlx5e_htb_free(struct mlx5e_htb *htb) +{ + kvfree(htb); +} + +int mlx5e_htb_init(struct mlx5e_htb *htb, struct tc_htb_qopt_offload *htb_qopt, + struct net_device *netdev, struct mlx5_core_dev *mdev, + struct mlx5e_selq *selq, struct mlx5e_priv *priv) +{ + htb->mdev = mdev; + htb->netdev = netdev; + htb->selq = selq; + htb->priv = priv; + hash_init(htb->qos_tc2node); + return mlx5e_htb_root_add(htb, htb_qopt->parent_classid, htb_qopt->classid, + htb_qopt->extack); +} + +void mlx5e_htb_cleanup(struct mlx5e_htb *htb) +{ + mlx5e_htb_root_del(htb); +} + diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/htb.h b/drivers/net/ethernet/mellanox/mlx5/core/en/htb.h new file mode 100644 index 000000000..8386f1ea4 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/htb.h @@ -0,0 +1,46 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#ifndef __MLX5E_EN_HTB_H_ +#define __MLX5E_EN_HTB_H_ + +#include "qos.h" + +#define MLX5E_QOS_MAX_LEAF_NODES 256 + +struct mlx5e_selq; +struct mlx5e_htb; + +typedef int (*mlx5e_fp_htb_enumerate)(void *data, u16 qid, u32 hw_id); +int mlx5e_htb_enumerate_leaves(struct mlx5e_htb *htb, mlx5e_fp_htb_enumerate callback, void *data); + +int mlx5e_htb_cur_leaf_nodes(struct mlx5e_htb *htb); + +/* TX datapath API */ +int mlx5e_htb_get_txq_by_classid(struct mlx5e_htb *htb, u16 classid); + +/* HTB TC handlers */ + +int +mlx5e_htb_leaf_alloc_queue(struct mlx5e_htb *htb, u16 classid, + u32 parent_classid, u64 rate, u64 ceil, + struct netlink_ext_ack *extack); +int +mlx5e_htb_leaf_to_inner(struct mlx5e_htb *htb, u16 classid, u16 child_classid, + u64 rate, u64 ceil, struct netlink_ext_ack *extack); +int mlx5e_htb_leaf_del(struct mlx5e_htb *htb, u16 *classid, + struct netlink_ext_ack *extack); +int +mlx5e_htb_leaf_del_last(struct mlx5e_htb *htb, u16 classid, bool force, + struct netlink_ext_ack *extack); +int +mlx5e_htb_node_modify(struct mlx5e_htb *htb, u16 classid, u64 rate, u64 ceil, + struct netlink_ext_ack *extack); +struct mlx5e_htb *mlx5e_htb_alloc(void); +void mlx5e_htb_free(struct mlx5e_htb *htb); +int mlx5e_htb_init(struct mlx5e_htb *htb, struct tc_htb_qopt_offload *htb_qopt, + struct net_device *netdev, struct mlx5_core_dev *mdev, + struct mlx5e_selq *selq, struct mlx5e_priv *priv); +void mlx5e_htb_cleanup(struct mlx5e_htb *htb); +#endif + diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.c new file mode 100644 index 000000000..b4f3bd7d3 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.c @@ -0,0 +1,159 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2018 Mellanox Technologies + +#include "en.h" +#include "en/hv_vhca_stats.h" +#include "lib/hv_vhca.h" +#include "lib/hv.h" + +struct mlx5e_hv_vhca_per_ring_stats { + u64 rx_packets; + u64 rx_bytes; + u64 tx_packets; + u64 tx_bytes; +}; + +static void +mlx5e_hv_vhca_fill_ring_stats(struct mlx5e_priv *priv, int ch, + struct mlx5e_hv_vhca_per_ring_stats *data) +{ + struct mlx5e_channel_stats *stats; + int tc; + + stats = priv->channel_stats[ch]; + data->rx_packets = stats->rq.packets; + data->rx_bytes = stats->rq.bytes; + + for (tc = 0; tc < priv->max_opened_tc; tc++) { + data->tx_packets += stats->sq[tc].packets; + data->tx_bytes += stats->sq[tc].bytes; + } +} + +static void mlx5e_hv_vhca_fill_stats(struct mlx5e_priv *priv, void *data, + int buf_len) +{ + int ch, i = 0; + + for (ch = 0; ch < priv->stats_nch; ch++) { + void *buf = data + i; + + if (WARN_ON_ONCE(buf + + sizeof(struct mlx5e_hv_vhca_per_ring_stats) > + data + buf_len)) + return; + + mlx5e_hv_vhca_fill_ring_stats(priv, ch, buf); + i += sizeof(struct mlx5e_hv_vhca_per_ring_stats); + } +} + +static int mlx5e_hv_vhca_stats_buf_size(struct mlx5e_priv *priv) +{ + return (sizeof(struct mlx5e_hv_vhca_per_ring_stats) * + priv->stats_nch); +} + +static void mlx5e_hv_vhca_stats_work(struct work_struct *work) +{ + struct mlx5e_hv_vhca_stats_agent *sagent; + struct mlx5_hv_vhca_agent *agent; + struct delayed_work *dwork; + struct mlx5e_priv *priv; + int buf_len, rc; + void *buf; + + dwork = to_delayed_work(work); + sagent = container_of(dwork, struct mlx5e_hv_vhca_stats_agent, work); + priv = container_of(sagent, struct mlx5e_priv, stats_agent); + buf_len = mlx5e_hv_vhca_stats_buf_size(priv); + agent = sagent->agent; + buf = sagent->buf; + + memset(buf, 0, buf_len); + mlx5e_hv_vhca_fill_stats(priv, buf, buf_len); + + rc = mlx5_hv_vhca_agent_write(agent, buf, buf_len); + if (rc) { + mlx5_core_err(priv->mdev, + "%s: Failed to write stats, err = %d\n", + __func__, rc); + return; + } + + if (sagent->delay) + queue_delayed_work(priv->wq, &sagent->work, sagent->delay); +} + +enum { + MLX5_HV_VHCA_STATS_VERSION = 1, + MLX5_HV_VHCA_STATS_UPDATE_ONCE = 0xFFFF, +}; + +static void mlx5e_hv_vhca_stats_control(struct mlx5_hv_vhca_agent *agent, + struct mlx5_hv_vhca_control_block *block) +{ + struct mlx5e_hv_vhca_stats_agent *sagent; + struct mlx5e_priv *priv; + + priv = mlx5_hv_vhca_agent_priv(agent); + sagent = &priv->stats_agent; + + block->version = MLX5_HV_VHCA_STATS_VERSION; + block->rings = priv->stats_nch; + + if (!block->command) { + cancel_delayed_work_sync(&priv->stats_agent.work); + return; + } + + sagent->delay = block->command == MLX5_HV_VHCA_STATS_UPDATE_ONCE ? 0 : + msecs_to_jiffies(block->command * 100); + + queue_delayed_work(priv->wq, &sagent->work, sagent->delay); +} + +static void mlx5e_hv_vhca_stats_cleanup(struct mlx5_hv_vhca_agent *agent) +{ + struct mlx5e_priv *priv = mlx5_hv_vhca_agent_priv(agent); + + cancel_delayed_work_sync(&priv->stats_agent.work); +} + +void mlx5e_hv_vhca_stats_create(struct mlx5e_priv *priv) +{ + int buf_len = mlx5e_hv_vhca_stats_buf_size(priv); + struct mlx5_hv_vhca_agent *agent; + + priv->stats_agent.buf = kvzalloc(buf_len, GFP_KERNEL); + if (!priv->stats_agent.buf) + return; + + agent = mlx5_hv_vhca_agent_create(priv->mdev->hv_vhca, + MLX5_HV_VHCA_AGENT_STATS, + mlx5e_hv_vhca_stats_control, NULL, + mlx5e_hv_vhca_stats_cleanup, + priv); + + if (IS_ERR_OR_NULL(agent)) { + if (IS_ERR(agent)) + netdev_warn(priv->netdev, + "Failed to create hv vhca stats agent, err = %ld\n", + PTR_ERR(agent)); + + kvfree(priv->stats_agent.buf); + return; + } + + priv->stats_agent.agent = agent; + INIT_DELAYED_WORK(&priv->stats_agent.work, mlx5e_hv_vhca_stats_work); +} + +void mlx5e_hv_vhca_stats_destroy(struct mlx5e_priv *priv) +{ + if (IS_ERR_OR_NULL(priv->stats_agent.agent)) + return; + + mlx5_hv_vhca_agent_destroy(priv->stats_agent.agent); + kvfree(priv->stats_agent.buf); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.h new file mode 100644 index 000000000..29c8c6d32 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2019 Mellanox Technologies. */ + +#ifndef __MLX5_EN_STATS_VHCA_H__ +#define __MLX5_EN_STATS_VHCA_H__ +#include "en.h" + +#if IS_ENABLED(CONFIG_PCI_HYPERV_INTERFACE) + +void mlx5e_hv_vhca_stats_create(struct mlx5e_priv *priv); +void mlx5e_hv_vhca_stats_destroy(struct mlx5e_priv *priv); + +#else +static inline void mlx5e_hv_vhca_stats_create(struct mlx5e_priv *priv) {} +static inline void mlx5e_hv_vhca_stats_destroy(struct mlx5e_priv *priv) {} +#endif + +#endif /* __MLX5_EN_STATS_VHCA_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/mapping.c b/drivers/net/ethernet/mellanox/mlx5/core/en/mapping.c new file mode 100644 index 000000000..4e72ca807 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/mapping.c @@ -0,0 +1,263 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2018 Mellanox Technologies */ + +#include <linux/jhash.h> +#include <linux/slab.h> +#include <linux/xarray.h> +#include <linux/hashtable.h> +#include <linux/refcount.h> + +#include "mapping.h" + +#define MAPPING_GRACE_PERIOD 2000 + +static LIST_HEAD(shared_ctx_list); +static DEFINE_MUTEX(shared_ctx_lock); + +struct mapping_ctx { + struct xarray xarray; + DECLARE_HASHTABLE(ht, 8); + struct mutex lock; /* Guards hashtable and xarray */ + unsigned long max_id; + size_t data_size; + bool delayed_removal; + struct delayed_work dwork; + struct list_head pending_list; + spinlock_t pending_list_lock; /* Guards pending list */ + u64 id; + u8 type; + struct list_head list; + refcount_t refcount; +}; + +struct mapping_item { + struct rcu_head rcu; + struct list_head list; + unsigned long timeout; + struct hlist_node node; + int cnt; + u32 id; + char data[]; +}; + +int mapping_add(struct mapping_ctx *ctx, void *data, u32 *id) +{ + struct mapping_item *mi; + int err = -ENOMEM; + u32 hash_key; + + mutex_lock(&ctx->lock); + + hash_key = jhash(data, ctx->data_size, 0); + hash_for_each_possible(ctx->ht, mi, node, hash_key) { + if (!memcmp(data, mi->data, ctx->data_size)) + goto attach; + } + + mi = kzalloc(sizeof(*mi) + ctx->data_size, GFP_KERNEL); + if (!mi) + goto err_alloc; + + memcpy(mi->data, data, ctx->data_size); + hash_add(ctx->ht, &mi->node, hash_key); + + err = xa_alloc(&ctx->xarray, &mi->id, mi, XA_LIMIT(1, ctx->max_id), + GFP_KERNEL); + if (err) + goto err_assign; +attach: + ++mi->cnt; + *id = mi->id; + + mutex_unlock(&ctx->lock); + + return 0; + +err_assign: + hash_del(&mi->node); + kfree(mi); +err_alloc: + mutex_unlock(&ctx->lock); + + return err; +} + +static void mapping_remove_and_free(struct mapping_ctx *ctx, + struct mapping_item *mi) +{ + xa_erase(&ctx->xarray, mi->id); + kfree_rcu(mi, rcu); +} + +static void mapping_free_item(struct mapping_ctx *ctx, + struct mapping_item *mi) +{ + if (!ctx->delayed_removal) { + mapping_remove_and_free(ctx, mi); + return; + } + + mi->timeout = jiffies + msecs_to_jiffies(MAPPING_GRACE_PERIOD); + + spin_lock(&ctx->pending_list_lock); + list_add_tail(&mi->list, &ctx->pending_list); + spin_unlock(&ctx->pending_list_lock); + + schedule_delayed_work(&ctx->dwork, MAPPING_GRACE_PERIOD); +} + +int mapping_remove(struct mapping_ctx *ctx, u32 id) +{ + unsigned long index = id; + struct mapping_item *mi; + int err = -ENOENT; + + mutex_lock(&ctx->lock); + mi = xa_load(&ctx->xarray, index); + if (!mi) + goto out; + err = 0; + + if (--mi->cnt > 0) + goto out; + + hash_del(&mi->node); + mapping_free_item(ctx, mi); +out: + mutex_unlock(&ctx->lock); + + return err; +} + +int mapping_find(struct mapping_ctx *ctx, u32 id, void *data) +{ + unsigned long index = id; + struct mapping_item *mi; + int err = -ENOENT; + + rcu_read_lock(); + mi = xa_load(&ctx->xarray, index); + if (!mi) + goto err_find; + + memcpy(data, mi->data, ctx->data_size); + err = 0; + +err_find: + rcu_read_unlock(); + return err; +} + +static void +mapping_remove_and_free_list(struct mapping_ctx *ctx, struct list_head *list) +{ + struct mapping_item *mi; + + list_for_each_entry(mi, list, list) + mapping_remove_and_free(ctx, mi); +} + +static void mapping_work_handler(struct work_struct *work) +{ + unsigned long min_timeout = 0, now = jiffies; + struct mapping_item *mi, *next; + LIST_HEAD(pending_items); + struct mapping_ctx *ctx; + + ctx = container_of(work, struct mapping_ctx, dwork.work); + + spin_lock(&ctx->pending_list_lock); + list_for_each_entry_safe(mi, next, &ctx->pending_list, list) { + if (time_after(now, mi->timeout)) + list_move(&mi->list, &pending_items); + else if (!min_timeout || + time_before(mi->timeout, min_timeout)) + min_timeout = mi->timeout; + } + spin_unlock(&ctx->pending_list_lock); + + mapping_remove_and_free_list(ctx, &pending_items); + + if (min_timeout) + schedule_delayed_work(&ctx->dwork, abs(min_timeout - now)); +} + +static void mapping_flush_work(struct mapping_ctx *ctx) +{ + if (!ctx->delayed_removal) + return; + + cancel_delayed_work_sync(&ctx->dwork); + mapping_remove_and_free_list(ctx, &ctx->pending_list); +} + +struct mapping_ctx * +mapping_create(size_t data_size, u32 max_id, bool delayed_removal) +{ + struct mapping_ctx *ctx; + + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); + if (!ctx) + return ERR_PTR(-ENOMEM); + + ctx->max_id = max_id ? max_id : UINT_MAX; + ctx->data_size = data_size; + + if (delayed_removal) { + INIT_DELAYED_WORK(&ctx->dwork, mapping_work_handler); + INIT_LIST_HEAD(&ctx->pending_list); + spin_lock_init(&ctx->pending_list_lock); + ctx->delayed_removal = true; + } + + mutex_init(&ctx->lock); + xa_init_flags(&ctx->xarray, XA_FLAGS_ALLOC1); + + refcount_set(&ctx->refcount, 1); + INIT_LIST_HEAD(&ctx->list); + + return ctx; +} + +struct mapping_ctx * +mapping_create_for_id(u64 id, u8 type, size_t data_size, u32 max_id, bool delayed_removal) +{ + struct mapping_ctx *ctx; + + mutex_lock(&shared_ctx_lock); + list_for_each_entry(ctx, &shared_ctx_list, list) { + if (ctx->id == id && ctx->type == type) { + if (refcount_inc_not_zero(&ctx->refcount)) + goto unlock; + break; + } + } + + ctx = mapping_create(data_size, max_id, delayed_removal); + if (IS_ERR(ctx)) + goto unlock; + + ctx->id = id; + ctx->type = type; + list_add(&ctx->list, &shared_ctx_list); + +unlock: + mutex_unlock(&shared_ctx_lock); + return ctx; +} + +void mapping_destroy(struct mapping_ctx *ctx) +{ + if (!refcount_dec_and_test(&ctx->refcount)) + return; + + mutex_lock(&shared_ctx_lock); + list_del(&ctx->list); + mutex_unlock(&shared_ctx_lock); + + mapping_flush_work(ctx); + xa_destroy(&ctx->xarray); + mutex_destroy(&ctx->lock); + + kfree(ctx); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/mapping.h b/drivers/net/ethernet/mellanox/mlx5/core/en/mapping.h new file mode 100644 index 000000000..4e2119f0f --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/mapping.h @@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2019 Mellanox Technologies */ + +#ifndef __MLX5_MAPPING_H__ +#define __MLX5_MAPPING_H__ + +struct mapping_ctx; + +int mapping_add(struct mapping_ctx *ctx, void *data, u32 *id); +int mapping_remove(struct mapping_ctx *ctx, u32 id); +int mapping_find(struct mapping_ctx *ctx, u32 id, void *data); + +/* mapping uses an xarray to map data to ids in add(), and for find(). + * For locking, it uses a internal xarray spin lock for add()/remove(), + * find() uses rcu_read_lock(). + * Choosing delayed_removal postpones the removal of a previously mapped + * id by MAPPING_GRACE_PERIOD milliseconds. + * This is to avoid races against hardware, where we mark the packet in + * hardware with a previous id, and quick remove() and add() reusing the same + * previous id. Then find() will get the new mapping instead of the old + * which was used to mark the packet. + */ +struct mapping_ctx *mapping_create(size_t data_size, u32 max_id, + bool delayed_removal); +void mapping_destroy(struct mapping_ctx *ctx); + +/* adds mapping with an id or get an existing mapping with the same id + */ +struct mapping_ctx * +mapping_create_for_id(u64 id, u8 type, size_t data_size, u32 max_id, bool delayed_removal); + +#endif /* __MLX5_MAPPING_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/mod_hdr.c b/drivers/net/ethernet/mellanox/mlx5/core/en/mod_hdr.c new file mode 100644 index 000000000..17325c5d6 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/mod_hdr.c @@ -0,0 +1,215 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2020 Mellanox Technologies + +#include <linux/jhash.h> +#include "mod_hdr.h" + +#define MLX5_MH_ACT_SZ MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto) + +struct mod_hdr_key { + int num_actions; + void *actions; +}; + +struct mlx5e_mod_hdr_handle { + /* a node of a hash table which keeps all the mod_hdr entries */ + struct hlist_node mod_hdr_hlist; + + struct mod_hdr_key key; + + struct mlx5_modify_hdr *modify_hdr; + + refcount_t refcnt; + struct completion res_ready; + int compl_result; +}; + +static u32 hash_mod_hdr_info(struct mod_hdr_key *key) +{ + return jhash(key->actions, + key->num_actions * MLX5_MH_ACT_SZ, 0); +} + +static int cmp_mod_hdr_info(struct mod_hdr_key *a, struct mod_hdr_key *b) +{ + if (a->num_actions != b->num_actions) + return 1; + + return memcmp(a->actions, b->actions, + a->num_actions * MLX5_MH_ACT_SZ); +} + +void mlx5e_mod_hdr_tbl_init(struct mod_hdr_tbl *tbl) +{ + mutex_init(&tbl->lock); + hash_init(tbl->hlist); +} + +void mlx5e_mod_hdr_tbl_destroy(struct mod_hdr_tbl *tbl) +{ + mutex_destroy(&tbl->lock); +} + +static struct mlx5e_mod_hdr_handle *mod_hdr_get(struct mod_hdr_tbl *tbl, + struct mod_hdr_key *key, + u32 hash_key) +{ + struct mlx5e_mod_hdr_handle *mh, *found = NULL; + + hash_for_each_possible(tbl->hlist, mh, mod_hdr_hlist, hash_key) { + if (!cmp_mod_hdr_info(&mh->key, key)) { + refcount_inc(&mh->refcnt); + found = mh; + break; + } + } + + return found; +} + +struct mlx5e_mod_hdr_handle * +mlx5e_mod_hdr_attach(struct mlx5_core_dev *mdev, + struct mod_hdr_tbl *tbl, + enum mlx5_flow_namespace_type namespace, + struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts) +{ + int num_actions, actions_size, err; + struct mlx5e_mod_hdr_handle *mh; + struct mod_hdr_key key; + u32 hash_key; + + num_actions = mod_hdr_acts->num_actions; + actions_size = MLX5_MH_ACT_SZ * num_actions; + + key.actions = mod_hdr_acts->actions; + key.num_actions = num_actions; + + hash_key = hash_mod_hdr_info(&key); + + mutex_lock(&tbl->lock); + mh = mod_hdr_get(tbl, &key, hash_key); + if (mh) { + mutex_unlock(&tbl->lock); + wait_for_completion(&mh->res_ready); + + if (mh->compl_result < 0) { + err = -EREMOTEIO; + goto attach_header_err; + } + goto attach_header; + } + + mh = kzalloc(sizeof(*mh) + actions_size, GFP_KERNEL); + if (!mh) { + mutex_unlock(&tbl->lock); + return ERR_PTR(-ENOMEM); + } + + mh->key.actions = (void *)mh + sizeof(*mh); + memcpy(mh->key.actions, key.actions, actions_size); + mh->key.num_actions = num_actions; + refcount_set(&mh->refcnt, 1); + init_completion(&mh->res_ready); + + hash_add(tbl->hlist, &mh->mod_hdr_hlist, hash_key); + mutex_unlock(&tbl->lock); + + mh->modify_hdr = mlx5_modify_header_alloc(mdev, namespace, + mh->key.num_actions, + mh->key.actions); + if (IS_ERR(mh->modify_hdr)) { + err = PTR_ERR(mh->modify_hdr); + mh->compl_result = err; + goto alloc_header_err; + } + mh->compl_result = 1; + complete_all(&mh->res_ready); + +attach_header: + return mh; + +alloc_header_err: + complete_all(&mh->res_ready); +attach_header_err: + mlx5e_mod_hdr_detach(mdev, tbl, mh); + return ERR_PTR(err); +} + +void mlx5e_mod_hdr_detach(struct mlx5_core_dev *mdev, + struct mod_hdr_tbl *tbl, + struct mlx5e_mod_hdr_handle *mh) +{ + if (!refcount_dec_and_mutex_lock(&mh->refcnt, &tbl->lock)) + return; + hash_del(&mh->mod_hdr_hlist); + mutex_unlock(&tbl->lock); + + if (mh->compl_result > 0) + mlx5_modify_header_dealloc(mdev, mh->modify_hdr); + + kfree(mh); +} + +struct mlx5_modify_hdr *mlx5e_mod_hdr_get(struct mlx5e_mod_hdr_handle *mh) +{ + return mh->modify_hdr; +} + +char * +mlx5e_mod_hdr_alloc(struct mlx5_core_dev *mdev, int namespace, + struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts) +{ + int new_num_actions, max_hw_actions; + size_t new_sz, old_sz; + void *ret; + + if (mod_hdr_acts->num_actions < mod_hdr_acts->max_actions) + goto out; + + max_hw_actions = mlx5e_mod_hdr_max_actions(mdev, namespace); + new_num_actions = min(max_hw_actions, + mod_hdr_acts->actions ? + mod_hdr_acts->max_actions * 2 : 1); + if (mod_hdr_acts->max_actions == new_num_actions) + return ERR_PTR(-ENOSPC); + + new_sz = MLX5_MH_ACT_SZ * new_num_actions; + old_sz = mod_hdr_acts->max_actions * MLX5_MH_ACT_SZ; + + if (mod_hdr_acts->is_static) { + ret = kzalloc(new_sz, GFP_KERNEL); + if (ret) { + memcpy(ret, mod_hdr_acts->actions, old_sz); + mod_hdr_acts->is_static = false; + } + } else { + ret = krealloc(mod_hdr_acts->actions, new_sz, GFP_KERNEL); + if (ret) + memset(ret + old_sz, 0, new_sz - old_sz); + } + if (!ret) + return ERR_PTR(-ENOMEM); + + mod_hdr_acts->actions = ret; + mod_hdr_acts->max_actions = new_num_actions; + +out: + return mod_hdr_acts->actions + (mod_hdr_acts->num_actions * MLX5_MH_ACT_SZ); +} + +void +mlx5e_mod_hdr_dealloc(struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts) +{ + if (!mod_hdr_acts->is_static) + kfree(mod_hdr_acts->actions); + + mod_hdr_acts->actions = NULL; + mod_hdr_acts->num_actions = 0; + mod_hdr_acts->max_actions = 0; +} + +char * +mlx5e_mod_hdr_get_item(struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts, int pos) +{ + return mod_hdr_acts->actions + (pos * MLX5_MH_ACT_SZ); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/mod_hdr.h b/drivers/net/ethernet/mellanox/mlx5/core/en/mod_hdr.h new file mode 100644 index 000000000..b8dac418d --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/mod_hdr.h @@ -0,0 +1,57 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2020 Mellanox Technologies */ + +#ifndef __MLX5E_EN_MOD_HDR_H__ +#define __MLX5E_EN_MOD_HDR_H__ + +#include <linux/hashtable.h> +#include <linux/mlx5/fs.h> + +#define MLX5_MH_ACT_SZ MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto) + +struct mlx5e_mod_hdr_handle; + +struct mlx5e_tc_mod_hdr_acts { + int num_actions; + int max_actions; + bool is_static; + void *actions; +}; + +#define DECLARE_MOD_HDR_ACTS_ACTIONS(name, len) \ + u8 name[len][MLX5_MH_ACT_SZ] = {} + +#define DECLARE_MOD_HDR_ACTS(name, acts_arr) \ + struct mlx5e_tc_mod_hdr_acts name = { \ + .max_actions = ARRAY_SIZE(acts_arr), \ + .is_static = true, \ + .actions = acts_arr, \ + } + +char *mlx5e_mod_hdr_alloc(struct mlx5_core_dev *mdev, int namespace, + struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts); +void mlx5e_mod_hdr_dealloc(struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts); +char *mlx5e_mod_hdr_get_item(struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts, int pos); + +struct mlx5e_mod_hdr_handle * +mlx5e_mod_hdr_attach(struct mlx5_core_dev *mdev, + struct mod_hdr_tbl *tbl, + enum mlx5_flow_namespace_type namespace, + struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts); +void mlx5e_mod_hdr_detach(struct mlx5_core_dev *mdev, + struct mod_hdr_tbl *tbl, + struct mlx5e_mod_hdr_handle *mh); +struct mlx5_modify_hdr *mlx5e_mod_hdr_get(struct mlx5e_mod_hdr_handle *mh); + +void mlx5e_mod_hdr_tbl_init(struct mod_hdr_tbl *tbl); +void mlx5e_mod_hdr_tbl_destroy(struct mod_hdr_tbl *tbl); + +static inline int mlx5e_mod_hdr_max_actions(struct mlx5_core_dev *mdev, int namespace) +{ + if (namespace == MLX5_FLOW_NAMESPACE_FDB) /* FDB offloading */ + return MLX5_CAP_ESW_FLOWTABLE_FDB(mdev, max_modify_header_actions); + else /* namespace is MLX5_FLOW_NAMESPACE_KERNEL - NIC offloading */ + return MLX5_CAP_FLOWTABLE_NIC_RX(mdev, max_modify_header_actions); +} + +#endif /* __MLX5E_EN_MOD_HDR_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/monitor_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en/monitor_stats.c new file mode 100644 index 000000000..254c84739 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/monitor_stats.c @@ -0,0 +1,151 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2018 Mellanox Technologies. */ + +#include "en.h" +#include "monitor_stats.h" +#include "lib/eq.h" + +/* Driver will set the following watch counters list: + * Ppcnt.802_3: + * a_in_range_length_errors Type: 0x0, Counter: 0x0, group_id = N/A + * a_out_of_range_length_field Type: 0x0, Counter: 0x1, group_id = N/A + * a_frame_too_long_errors Type: 0x0, Counter: 0x2, group_id = N/A + * a_frame_check_sequence_errors Type: 0x0, Counter: 0x3, group_id = N/A + * a_alignment_errors Type: 0x0, Counter: 0x4, group_id = N/A + * if_out_discards Type: 0x0, Counter: 0x5, group_id = N/A + * Q_Counters: + * Q[index].rx_out_of_buffer Type: 0x1, Counter: 0x4, group_id = counter_ix + */ + +#define NUM_REQ_PPCNT_COUNTER_S1 MLX5_CMD_SET_MONITOR_NUM_PPCNT_COUNTER_SET1 +#define NUM_REQ_Q_COUNTERS_S1 MLX5_CMD_SET_MONITOR_NUM_Q_COUNTERS_SET1 + +int mlx5e_monitor_counter_supported(struct mlx5e_priv *priv) +{ + struct mlx5_core_dev *mdev = priv->mdev; + + if (!MLX5_CAP_GEN(mdev, max_num_of_monitor_counters)) + return false; + if (MLX5_CAP_PCAM_REG(mdev, ppcnt) && + MLX5_CAP_GEN(mdev, num_ppcnt_monitor_counters) < + NUM_REQ_PPCNT_COUNTER_S1) + return false; + if (MLX5_CAP_GEN(mdev, num_q_monitor_counters) < + NUM_REQ_Q_COUNTERS_S1) + return false; + return true; +} + +void mlx5e_monitor_counter_arm(struct mlx5e_priv *priv) +{ + u32 in[MLX5_ST_SZ_DW(arm_monitor_counter_in)] = {}; + + MLX5_SET(arm_monitor_counter_in, in, opcode, + MLX5_CMD_OP_ARM_MONITOR_COUNTER); + mlx5_cmd_exec_in(priv->mdev, arm_monitor_counter, in); +} + +static void mlx5e_monitor_counters_work(struct work_struct *work) +{ + struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv, + monitor_counters_work); + + mutex_lock(&priv->state_lock); + mlx5e_stats_update_ndo_stats(priv); + mutex_unlock(&priv->state_lock); + mlx5e_monitor_counter_arm(priv); +} + +static int mlx5e_monitor_event_handler(struct notifier_block *nb, + unsigned long event, void *eqe) +{ + struct mlx5e_priv *priv = mlx5_nb_cof(nb, struct mlx5e_priv, + monitor_counters_nb); + queue_work(priv->wq, &priv->monitor_counters_work); + return NOTIFY_OK; +} + +static int fill_monitor_counter_ppcnt_set1(int cnt, u32 *in) +{ + enum mlx5_monitor_counter_ppcnt ppcnt_cnt; + + for (ppcnt_cnt = 0; + ppcnt_cnt < NUM_REQ_PPCNT_COUNTER_S1; + ppcnt_cnt++, cnt++) { + MLX5_SET(set_monitor_counter_in, in, + monitor_counter[cnt].type, + MLX5_QUERY_MONITOR_CNT_TYPE_PPCNT); + MLX5_SET(set_monitor_counter_in, in, + monitor_counter[cnt].counter, + ppcnt_cnt); + } + return ppcnt_cnt; +} + +static int fill_monitor_counter_q_counter_set1(int cnt, int q_counter, u32 *in) +{ + MLX5_SET(set_monitor_counter_in, in, + monitor_counter[cnt].type, + MLX5_QUERY_MONITOR_CNT_TYPE_Q_COUNTER); + MLX5_SET(set_monitor_counter_in, in, + monitor_counter[cnt].counter, + MLX5_QUERY_MONITOR_Q_COUNTER_RX_OUT_OF_BUFFER); + MLX5_SET(set_monitor_counter_in, in, + monitor_counter[cnt].counter_group_id, + q_counter); + return 1; +} + +/* check if mlx5e_monitor_counter_supported before calling this function*/ +static void mlx5e_set_monitor_counter(struct mlx5e_priv *priv) +{ + struct mlx5_core_dev *mdev = priv->mdev; + int max_num_of_counters = MLX5_CAP_GEN(mdev, max_num_of_monitor_counters); + int num_q_counters = MLX5_CAP_GEN(mdev, num_q_monitor_counters); + int num_ppcnt_counters = !MLX5_CAP_PCAM_REG(mdev, ppcnt) ? 0 : + MLX5_CAP_GEN(mdev, num_ppcnt_monitor_counters); + u32 in[MLX5_ST_SZ_DW(set_monitor_counter_in)] = {}; + int q_counter = priv->q_counter; + int cnt = 0; + + if (num_ppcnt_counters >= NUM_REQ_PPCNT_COUNTER_S1 && + max_num_of_counters >= (NUM_REQ_PPCNT_COUNTER_S1 + cnt)) + cnt += fill_monitor_counter_ppcnt_set1(cnt, in); + + if (num_q_counters >= NUM_REQ_Q_COUNTERS_S1 && + max_num_of_counters >= (NUM_REQ_Q_COUNTERS_S1 + cnt) && + q_counter) + cnt += fill_monitor_counter_q_counter_set1(cnt, q_counter, in); + + MLX5_SET(set_monitor_counter_in, in, num_of_counters, cnt); + MLX5_SET(set_monitor_counter_in, in, opcode, + MLX5_CMD_OP_SET_MONITOR_COUNTER); + + mlx5_cmd_exec_in(mdev, set_monitor_counter, in); +} + +/* check if mlx5e_monitor_counter_supported before calling this function*/ +void mlx5e_monitor_counter_init(struct mlx5e_priv *priv) +{ + INIT_WORK(&priv->monitor_counters_work, mlx5e_monitor_counters_work); + MLX5_NB_INIT(&priv->monitor_counters_nb, mlx5e_monitor_event_handler, + MONITOR_COUNTER); + mlx5_eq_notifier_register(priv->mdev, &priv->monitor_counters_nb); + + mlx5e_set_monitor_counter(priv); + mlx5e_monitor_counter_arm(priv); + queue_work(priv->wq, &priv->update_stats_work); +} + +/* check if mlx5e_monitor_counter_supported before calling this function*/ +void mlx5e_monitor_counter_cleanup(struct mlx5e_priv *priv) +{ + u32 in[MLX5_ST_SZ_DW(set_monitor_counter_in)] = {}; + + MLX5_SET(set_monitor_counter_in, in, opcode, + MLX5_CMD_OP_SET_MONITOR_COUNTER); + + mlx5_cmd_exec_in(priv->mdev, set_monitor_counter, in); + mlx5_eq_notifier_unregister(priv->mdev, &priv->monitor_counters_nb); + cancel_work_sync(&priv->monitor_counters_work); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/monitor_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en/monitor_stats.h new file mode 100644 index 000000000..e1ac4b3d2 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/monitor_stats.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2018 Mellanox Technologies. */ + +#ifndef __MLX5_MONITOR_H__ +#define __MLX5_MONITOR_H__ + +int mlx5e_monitor_counter_supported(struct mlx5e_priv *priv); +void mlx5e_monitor_counter_init(struct mlx5e_priv *priv); +void mlx5e_monitor_counter_cleanup(struct mlx5e_priv *priv); +void mlx5e_monitor_counter_arm(struct mlx5e_priv *priv); + +#endif /* __MLX5_MONITOR_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c new file mode 100644 index 000000000..d3de1b7a8 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c @@ -0,0 +1,1240 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2019 Mellanox Technologies. */ + +#include "en/params.h" +#include "en/txrx.h" +#include "en/port.h" +#include "en_accel/en_accel.h" +#include "en_accel/ipsec.h" +#include <net/xdp_sock_drv.h> + +static u8 mlx5e_mpwrq_min_page_shift(struct mlx5_core_dev *mdev) +{ + u8 min_page_shift = MLX5_CAP_GEN_2(mdev, log_min_mkey_entity_size); + + return min_page_shift ? : 12; +} + +u8 mlx5e_mpwrq_page_shift(struct mlx5_core_dev *mdev, struct mlx5e_xsk_param *xsk) +{ + u8 req_page_shift = xsk ? order_base_2(xsk->chunk_size) : PAGE_SHIFT; + u8 min_page_shift = mlx5e_mpwrq_min_page_shift(mdev); + + /* Regular RQ uses order-0 pages, the NIC must be able to map them. */ + if (WARN_ON_ONCE(!xsk && req_page_shift < min_page_shift)) + min_page_shift = req_page_shift; + + return max(req_page_shift, min_page_shift); +} + +enum mlx5e_mpwrq_umr_mode +mlx5e_mpwrq_umr_mode(struct mlx5_core_dev *mdev, struct mlx5e_xsk_param *xsk) +{ + /* Different memory management schemes use different mechanisms to map + * user-mode memory. The stricter guarantees we have, the faster + * mechanisms we use: + * 1. MTT - direct mapping in page granularity. + * 2. KSM - indirect mapping to another MKey to arbitrary addresses, but + * all mappings have the same size. + * 3. KLM - indirect mapping to another MKey to arbitrary addresses, and + * mappings can have different sizes. + */ + u8 page_shift = mlx5e_mpwrq_page_shift(mdev, xsk); + bool unaligned = xsk ? xsk->unaligned : false; + bool oversized = false; + + if (xsk) { + oversized = xsk->chunk_size < (1 << page_shift); + WARN_ON_ONCE(xsk->chunk_size > (1 << page_shift)); + } + + /* XSK frame size doesn't match the UMR page size, either because the + * frame size is not a power of two, or it's smaller than the minimal + * page size supported by the firmware. + * It's possible to receive packets bigger than MTU in certain setups. + * To avoid writing over the XSK frame boundary, the top region of each + * stride is mapped to a garbage page, resulting in two mappings of + * different sizes per frame. + */ + if (oversized) { + /* An optimization for frame sizes equal to 3 * power_of_two. + * 3 KSMs point to the frame, and one KSM points to the garbage + * page, which works faster than KLM. + */ + if (xsk->chunk_size % 3 == 0 && is_power_of_2(xsk->chunk_size / 3)) + return MLX5E_MPWRQ_UMR_MODE_TRIPLE; + + return MLX5E_MPWRQ_UMR_MODE_OVERSIZED; + } + + /* XSK frames can start at arbitrary unaligned locations, but they all + * have the same size which is a power of two. It allows to optimize to + * one KSM per frame. + */ + if (unaligned) + return MLX5E_MPWRQ_UMR_MODE_UNALIGNED; + + /* XSK: frames are naturally aligned, MTT can be used. + * Non-XSK: Allocations happen in units of CPU pages, therefore, the + * mappings are naturally aligned. + */ + return MLX5E_MPWRQ_UMR_MODE_ALIGNED; +} + +u8 mlx5e_mpwrq_umr_entry_size(enum mlx5e_mpwrq_umr_mode mode) +{ + switch (mode) { + case MLX5E_MPWRQ_UMR_MODE_ALIGNED: + return sizeof(struct mlx5_mtt); + case MLX5E_MPWRQ_UMR_MODE_UNALIGNED: + return sizeof(struct mlx5_ksm); + case MLX5E_MPWRQ_UMR_MODE_OVERSIZED: + return sizeof(struct mlx5_klm) * 2; + case MLX5E_MPWRQ_UMR_MODE_TRIPLE: + return sizeof(struct mlx5_ksm) * 4; + } + WARN_ONCE(1, "MPWRQ UMR mode %d is not known\n", mode); + return 0; +} + +u8 mlx5e_mpwrq_log_wqe_sz(struct mlx5_core_dev *mdev, u8 page_shift, + enum mlx5e_mpwrq_umr_mode umr_mode) +{ + u8 umr_entry_size = mlx5e_mpwrq_umr_entry_size(umr_mode); + u8 max_pages_per_wqe, max_log_mpwqe_size; + u16 max_wqe_size; + + /* Keep in sync with MLX5_MPWRQ_MAX_PAGES_PER_WQE. */ + max_wqe_size = mlx5e_get_max_sq_aligned_wqebbs(mdev) * MLX5_SEND_WQE_BB; + max_pages_per_wqe = ALIGN_DOWN(max_wqe_size - sizeof(struct mlx5e_umr_wqe), + MLX5_UMR_MTT_ALIGNMENT) / umr_entry_size; + max_log_mpwqe_size = ilog2(max_pages_per_wqe) + page_shift; + + WARN_ON_ONCE(max_log_mpwqe_size < MLX5E_ORDER2_MAX_PACKET_MTU); + + return min_t(u8, max_log_mpwqe_size, MLX5_MPWRQ_MAX_LOG_WQE_SZ); +} + +u8 mlx5e_mpwrq_pages_per_wqe(struct mlx5_core_dev *mdev, u8 page_shift, + enum mlx5e_mpwrq_umr_mode umr_mode) +{ + u8 log_wqe_sz = mlx5e_mpwrq_log_wqe_sz(mdev, page_shift, umr_mode); + u8 pages_per_wqe; + + pages_per_wqe = log_wqe_sz > page_shift ? (1 << (log_wqe_sz - page_shift)) : 1; + + /* Two MTTs are needed to form an octword. The number of MTTs is encoded + * in octwords in a UMR WQE, so we need at least two to avoid mapping + * garbage addresses. + */ + if (WARN_ON_ONCE(pages_per_wqe < 2 && umr_mode == MLX5E_MPWRQ_UMR_MODE_ALIGNED)) + pages_per_wqe = 2; + + /* Sanity check for further calculations to succeed. */ + BUILD_BUG_ON(MLX5_MPWRQ_MAX_PAGES_PER_WQE > 64); + if (WARN_ON_ONCE(pages_per_wqe > MLX5_MPWRQ_MAX_PAGES_PER_WQE)) + return MLX5_MPWRQ_MAX_PAGES_PER_WQE; + + return pages_per_wqe; +} + +u16 mlx5e_mpwrq_umr_wqe_sz(struct mlx5_core_dev *mdev, u8 page_shift, + enum mlx5e_mpwrq_umr_mode umr_mode) +{ + u8 pages_per_wqe = mlx5e_mpwrq_pages_per_wqe(mdev, page_shift, umr_mode); + u8 umr_entry_size = mlx5e_mpwrq_umr_entry_size(umr_mode); + u16 umr_wqe_sz; + + umr_wqe_sz = sizeof(struct mlx5e_umr_wqe) + + ALIGN(pages_per_wqe * umr_entry_size, MLX5_UMR_MTT_ALIGNMENT); + + WARN_ON_ONCE(DIV_ROUND_UP(umr_wqe_sz, MLX5_SEND_WQE_DS) > MLX5_WQE_CTRL_DS_MASK); + + return umr_wqe_sz; +} + +u8 mlx5e_mpwrq_umr_wqebbs(struct mlx5_core_dev *mdev, u8 page_shift, + enum mlx5e_mpwrq_umr_mode umr_mode) +{ + return DIV_ROUND_UP(mlx5e_mpwrq_umr_wqe_sz(mdev, page_shift, umr_mode), + MLX5_SEND_WQE_BB); +} + +u8 mlx5e_mpwrq_mtts_per_wqe(struct mlx5_core_dev *mdev, u8 page_shift, + enum mlx5e_mpwrq_umr_mode umr_mode) +{ + u8 pages_per_wqe = mlx5e_mpwrq_pages_per_wqe(mdev, page_shift, umr_mode); + + /* Add another page as a buffer between WQEs. This page will absorb + * write overflow by the hardware, when receiving packets larger than + * MTU. These oversize packets are dropped by the driver at a later + * stage. + */ + return ALIGN(pages_per_wqe + 1, + MLX5_SEND_WQE_BB / mlx5e_mpwrq_umr_entry_size(umr_mode)); +} + +u32 mlx5e_mpwrq_max_num_entries(struct mlx5_core_dev *mdev, + enum mlx5e_mpwrq_umr_mode umr_mode) +{ + /* Same limits apply to KSMs and KLMs. */ + u32 klm_limit = min(MLX5E_MAX_RQ_NUM_KSMS, + 1 << MLX5_CAP_GEN(mdev, log_max_klm_list_size)); + + switch (umr_mode) { + case MLX5E_MPWRQ_UMR_MODE_ALIGNED: + return MLX5E_MAX_RQ_NUM_MTTS; + case MLX5E_MPWRQ_UMR_MODE_UNALIGNED: + return klm_limit; + case MLX5E_MPWRQ_UMR_MODE_OVERSIZED: + /* Each entry is two KLMs. */ + return klm_limit / 2; + case MLX5E_MPWRQ_UMR_MODE_TRIPLE: + /* Each entry is four KSMs. */ + return klm_limit / 4; + } + WARN_ONCE(1, "MPWRQ UMR mode %d is not known\n", umr_mode); + return 0; +} + +static u8 mlx5e_mpwrq_max_log_rq_size(struct mlx5_core_dev *mdev, u8 page_shift, + enum mlx5e_mpwrq_umr_mode umr_mode) +{ + u8 mtts_per_wqe = mlx5e_mpwrq_mtts_per_wqe(mdev, page_shift, umr_mode); + u32 max_entries = mlx5e_mpwrq_max_num_entries(mdev, umr_mode); + + return ilog2(max_entries / mtts_per_wqe); +} + +u8 mlx5e_mpwrq_max_log_rq_pkts(struct mlx5_core_dev *mdev, u8 page_shift, + enum mlx5e_mpwrq_umr_mode umr_mode) +{ + return mlx5e_mpwrq_max_log_rq_size(mdev, page_shift, umr_mode) + + mlx5e_mpwrq_log_wqe_sz(mdev, page_shift, umr_mode) - + MLX5E_ORDER2_MAX_PACKET_MTU; +} + +u16 mlx5e_get_linear_rq_headroom(struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk) +{ + u16 headroom; + + if (xsk) + return xsk->headroom; + + headroom = NET_IP_ALIGN; + if (params->xdp_prog) + headroom += XDP_PACKET_HEADROOM; + else + headroom += MLX5_RX_HEADROOM; + + return headroom; +} + +static u32 mlx5e_rx_get_linear_sz_xsk(struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk) +{ + u32 hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu); + + return xsk->headroom + hw_mtu; +} + +static u32 mlx5e_rx_get_linear_sz_skb(struct mlx5e_params *params, bool xsk) +{ + /* SKBs built on XDP_PASS on XSK RQs don't have headroom. */ + u16 headroom = xsk ? 0 : mlx5e_get_linear_rq_headroom(params, NULL); + u32 hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu); + + return MLX5_SKB_FRAG_SZ(headroom + hw_mtu); +} + +static u32 mlx5e_rx_get_linear_stride_sz(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk, + bool mpwqe) +{ + /* XSK frames are mapped as individual pages, because frames may come in + * an arbitrary order from random locations in the UMEM. + */ + if (xsk) + return mpwqe ? 1 << mlx5e_mpwrq_page_shift(mdev, xsk) : PAGE_SIZE; + + /* XDP in mlx5e doesn't support multiple packets per page. */ + if (params->xdp_prog) + return PAGE_SIZE; + + return roundup_pow_of_two(mlx5e_rx_get_linear_sz_skb(params, false)); +} + +static u8 mlx5e_mpwqe_log_pkts_per_wqe(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk) +{ + u32 linear_stride_sz = mlx5e_rx_get_linear_stride_sz(mdev, params, xsk, true); + enum mlx5e_mpwrq_umr_mode umr_mode = mlx5e_mpwrq_umr_mode(mdev, xsk); + u8 page_shift = mlx5e_mpwrq_page_shift(mdev, xsk); + + return mlx5e_mpwrq_log_wqe_sz(mdev, page_shift, umr_mode) - + order_base_2(linear_stride_sz); +} + +bool mlx5e_rx_is_linear_skb(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk) +{ + if (params->packet_merge.type != MLX5E_PACKET_MERGE_NONE) + return false; + + /* Both XSK and non-XSK cases allocate an SKB on XDP_PASS. Packet data + * must fit into a CPU page. + */ + if (mlx5e_rx_get_linear_sz_skb(params, xsk) > PAGE_SIZE) + return false; + + /* XSK frames must be big enough to hold the packet data. */ + if (xsk && mlx5e_rx_get_linear_sz_xsk(params, xsk) > xsk->chunk_size) + return false; + + return true; +} + +static bool mlx5e_verify_rx_mpwqe_strides(struct mlx5_core_dev *mdev, + u8 log_stride_sz, u8 log_num_strides, + u8 page_shift, + enum mlx5e_mpwrq_umr_mode umr_mode) +{ + if (log_stride_sz + log_num_strides != + mlx5e_mpwrq_log_wqe_sz(mdev, page_shift, umr_mode)) + return false; + + if (log_stride_sz < MLX5_MPWQE_LOG_STRIDE_SZ_BASE || + log_stride_sz > MLX5_MPWQE_LOG_STRIDE_SZ_MAX) + return false; + + if (log_num_strides > MLX5_MPWQE_LOG_NUM_STRIDES_MAX) + return false; + + if (MLX5_CAP_GEN(mdev, ext_stride_num_range)) + return log_num_strides >= MLX5_MPWQE_LOG_NUM_STRIDES_EXT_BASE; + + return log_num_strides >= MLX5_MPWQE_LOG_NUM_STRIDES_BASE; +} + +bool mlx5e_rx_mpwqe_is_linear_skb(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk) +{ + enum mlx5e_mpwrq_umr_mode umr_mode = mlx5e_mpwrq_umr_mode(mdev, xsk); + u8 page_shift = mlx5e_mpwrq_page_shift(mdev, xsk); + u8 log_num_strides; + u8 log_stride_sz; + u8 log_wqe_sz; + + if (!mlx5e_rx_is_linear_skb(mdev, params, xsk)) + return false; + + log_stride_sz = order_base_2(mlx5e_rx_get_linear_stride_sz(mdev, params, xsk, true)); + log_wqe_sz = mlx5e_mpwrq_log_wqe_sz(mdev, page_shift, umr_mode); + + if (log_wqe_sz < log_stride_sz) + return false; + + log_num_strides = log_wqe_sz - log_stride_sz; + + return mlx5e_verify_rx_mpwqe_strides(mdev, log_stride_sz, + log_num_strides, page_shift, + umr_mode); +} + +u8 mlx5e_mpwqe_get_log_rq_size(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk) +{ + enum mlx5e_mpwrq_umr_mode umr_mode = mlx5e_mpwrq_umr_mode(mdev, xsk); + u8 log_pkts_per_wqe, page_shift, max_log_rq_size; + + log_pkts_per_wqe = mlx5e_mpwqe_log_pkts_per_wqe(mdev, params, xsk); + page_shift = mlx5e_mpwrq_page_shift(mdev, xsk); + max_log_rq_size = mlx5e_mpwrq_max_log_rq_size(mdev, page_shift, umr_mode); + + /* Numbers are unsigned, don't subtract to avoid underflow. */ + if (params->log_rq_mtu_frames < + log_pkts_per_wqe + MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE_MPW) + return MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE_MPW; + + /* Ethtool's rx_max_pending is calculated for regular RQ, that uses + * pages of PAGE_SIZE. Max length of an XSK RQ might differ if it uses a + * frame size not equal to PAGE_SIZE. + * A stricter condition is checked in mlx5e_mpwrq_validate_xsk, WARN on + * unexpected failure. + */ + if (WARN_ON_ONCE(params->log_rq_mtu_frames > log_pkts_per_wqe + max_log_rq_size)) + return max_log_rq_size; + + return params->log_rq_mtu_frames - log_pkts_per_wqe; +} + +u8 mlx5e_shampo_get_log_hd_entry_size(struct mlx5_core_dev *mdev, + struct mlx5e_params *params) +{ + return order_base_2(DIV_ROUND_UP(MLX5E_RX_MAX_HEAD, MLX5E_SHAMPO_WQ_BASE_HEAD_ENTRY_SIZE)); +} + +u8 mlx5e_shampo_get_log_rsrv_size(struct mlx5_core_dev *mdev, + struct mlx5e_params *params) +{ + return order_base_2(MLX5E_SHAMPO_WQ_RESRV_SIZE / MLX5E_SHAMPO_WQ_BASE_RESRV_SIZE); +} + +u8 mlx5e_shampo_get_log_pkt_per_rsrv(struct mlx5_core_dev *mdev, + struct mlx5e_params *params) +{ + u32 resrv_size = BIT(mlx5e_shampo_get_log_rsrv_size(mdev, params)) * + PAGE_SIZE; + + return order_base_2(DIV_ROUND_UP(resrv_size, params->sw_mtu)); +} + +u8 mlx5e_mpwqe_get_log_stride_size(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk) +{ + if (mlx5e_rx_mpwqe_is_linear_skb(mdev, params, xsk)) + return order_base_2(mlx5e_rx_get_linear_stride_sz(mdev, params, xsk, true)); + + return MLX5_MPWRQ_DEF_LOG_STRIDE_SZ(mdev); +} + +u8 mlx5e_mpwqe_get_log_num_strides(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk) +{ + enum mlx5e_mpwrq_umr_mode umr_mode = mlx5e_mpwrq_umr_mode(mdev, xsk); + u8 page_shift = mlx5e_mpwrq_page_shift(mdev, xsk); + + return mlx5e_mpwrq_log_wqe_sz(mdev, page_shift, umr_mode) - + mlx5e_mpwqe_get_log_stride_size(mdev, params, xsk); +} + +u8 mlx5e_mpwqe_get_min_wqe_bulk(unsigned int wq_sz) +{ +#define UMR_WQE_BULK (2) + return min_t(unsigned int, UMR_WQE_BULK, wq_sz / 2 - 1); +} + +u16 mlx5e_get_rq_headroom(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk) +{ + u16 linear_headroom = mlx5e_get_linear_rq_headroom(params, xsk); + + if (params->rq_wq_type == MLX5_WQ_TYPE_CYCLIC) + return linear_headroom; + + if (mlx5e_rx_mpwqe_is_linear_skb(mdev, params, xsk)) + return linear_headroom; + + if (params->packet_merge.type == MLX5E_PACKET_MERGE_SHAMPO) + return linear_headroom; + + return 0; +} + +u16 mlx5e_calc_sq_stop_room(struct mlx5_core_dev *mdev, struct mlx5e_params *params) +{ + bool is_mpwqe = MLX5E_GET_PFLAG(params, MLX5E_PFLAG_SKB_TX_MPWQE); + u16 stop_room; + + stop_room = mlx5e_ktls_get_stop_room(mdev, params); + stop_room += mlx5e_stop_room_for_max_wqe(mdev); + if (is_mpwqe) + /* A MPWQE can take up to the maximum cacheline-aligned WQE + + * all the normal stop room can be taken if a new packet breaks + * the active MPWQE session and allocates its WQEs right away. + */ + stop_room += mlx5e_stop_room_for_mpwqe(mdev); + + return stop_room; +} + +int mlx5e_validate_params(struct mlx5_core_dev *mdev, struct mlx5e_params *params) +{ + size_t sq_size = 1 << params->log_sq_size; + u16 stop_room; + + stop_room = mlx5e_calc_sq_stop_room(mdev, params); + if (stop_room >= sq_size) { + mlx5_core_err(mdev, "Stop room %u is bigger than the SQ size %zu\n", + stop_room, sq_size); + return -EINVAL; + } + + return 0; +} + +static struct dim_cq_moder mlx5e_get_def_tx_moderation(u8 cq_period_mode) +{ + struct dim_cq_moder moder = {}; + + moder.cq_period_mode = cq_period_mode; + moder.pkts = MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_PKTS; + moder.usec = MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC; + if (cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE) + moder.usec = MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC_FROM_CQE; + + return moder; +} + +static struct dim_cq_moder mlx5e_get_def_rx_moderation(u8 cq_period_mode) +{ + struct dim_cq_moder moder = {}; + + moder.cq_period_mode = cq_period_mode; + moder.pkts = MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_PKTS; + moder.usec = MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC; + if (cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE) + moder.usec = MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC_FROM_CQE; + + return moder; +} + +static u8 mlx5_to_net_dim_cq_period_mode(u8 cq_period_mode) +{ + return cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE ? + DIM_CQ_PERIOD_MODE_START_FROM_CQE : + DIM_CQ_PERIOD_MODE_START_FROM_EQE; +} + +void mlx5e_reset_tx_moderation(struct mlx5e_params *params, u8 cq_period_mode) +{ + if (params->tx_dim_enabled) { + u8 dim_period_mode = mlx5_to_net_dim_cq_period_mode(cq_period_mode); + + params->tx_cq_moderation = net_dim_get_def_tx_moderation(dim_period_mode); + } else { + params->tx_cq_moderation = mlx5e_get_def_tx_moderation(cq_period_mode); + } +} + +void mlx5e_reset_rx_moderation(struct mlx5e_params *params, u8 cq_period_mode) +{ + if (params->rx_dim_enabled) { + u8 dim_period_mode = mlx5_to_net_dim_cq_period_mode(cq_period_mode); + + params->rx_cq_moderation = net_dim_get_def_rx_moderation(dim_period_mode); + } else { + params->rx_cq_moderation = mlx5e_get_def_rx_moderation(cq_period_mode); + } +} + +void mlx5e_set_tx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode) +{ + mlx5e_reset_tx_moderation(params, cq_period_mode); + MLX5E_SET_PFLAG(params, MLX5E_PFLAG_TX_CQE_BASED_MODER, + params->tx_cq_moderation.cq_period_mode == + MLX5_CQ_PERIOD_MODE_START_FROM_CQE); +} + +void mlx5e_set_rx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode) +{ + mlx5e_reset_rx_moderation(params, cq_period_mode); + MLX5E_SET_PFLAG(params, MLX5E_PFLAG_RX_CQE_BASED_MODER, + params->rx_cq_moderation.cq_period_mode == + MLX5_CQ_PERIOD_MODE_START_FROM_CQE); +} + +bool slow_pci_heuristic(struct mlx5_core_dev *mdev) +{ + u32 link_speed = 0; + u32 pci_bw = 0; + + mlx5e_port_max_linkspeed(mdev, &link_speed); + pci_bw = pcie_bandwidth_available(mdev->pdev, NULL, NULL, NULL); + mlx5_core_dbg_once(mdev, "Max link speed = %d, PCI BW = %d\n", + link_speed, pci_bw); + +#define MLX5E_SLOW_PCI_RATIO (2) + + return link_speed && pci_bw && + link_speed > MLX5E_SLOW_PCI_RATIO * pci_bw; +} + +int mlx5e_mpwrq_validate_regular(struct mlx5_core_dev *mdev, struct mlx5e_params *params) +{ + enum mlx5e_mpwrq_umr_mode umr_mode = mlx5e_mpwrq_umr_mode(mdev, NULL); + u8 page_shift = mlx5e_mpwrq_page_shift(mdev, NULL); + + if (!mlx5e_check_fragmented_striding_rq_cap(mdev, page_shift, umr_mode)) + return -EOPNOTSUPP; + + if (params->xdp_prog && !mlx5e_rx_mpwqe_is_linear_skb(mdev, params, NULL)) + return -EINVAL; + + return 0; +} + +int mlx5e_mpwrq_validate_xsk(struct mlx5_core_dev *mdev, struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk) +{ + enum mlx5e_mpwrq_umr_mode umr_mode = mlx5e_mpwrq_umr_mode(mdev, xsk); + u8 page_shift = mlx5e_mpwrq_page_shift(mdev, xsk); + bool unaligned = xsk ? xsk->unaligned : false; + u16 max_mtu_pkts; + + if (!mlx5e_check_fragmented_striding_rq_cap(mdev, page_shift, umr_mode)) + return -EOPNOTSUPP; + + if (!mlx5e_rx_mpwqe_is_linear_skb(mdev, params, xsk)) + return -EINVAL; + + /* Current RQ length is too big for the given frame size, the + * needed number of WQEs exceeds the maximum. + */ + max_mtu_pkts = min_t(u8, MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE, + mlx5e_mpwrq_max_log_rq_pkts(mdev, page_shift, unaligned)); + if (params->log_rq_mtu_frames > max_mtu_pkts) { + mlx5_core_err(mdev, "Current RQ length %d is too big for XSK with given frame size %u\n", + 1 << params->log_rq_mtu_frames, xsk->chunk_size); + return -EINVAL; + } + + return 0; +} + +void mlx5e_init_rq_type_params(struct mlx5_core_dev *mdev, + struct mlx5e_params *params) +{ + params->log_rq_mtu_frames = is_kdump_kernel() ? + MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE : + MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE; + + mlx5_core_info(mdev, "MLX5E: StrdRq(%d) RqSz(%ld) StrdSz(%ld) RxCqeCmprss(%d)\n", + params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ, + params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ ? + BIT(mlx5e_mpwqe_get_log_rq_size(mdev, params, NULL)) : + BIT(params->log_rq_mtu_frames), + BIT(mlx5e_mpwqe_get_log_stride_size(mdev, params, NULL)), + MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS)); +} + +void mlx5e_set_rq_type(struct mlx5_core_dev *mdev, struct mlx5e_params *params) +{ + params->rq_wq_type = MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_STRIDING_RQ) ? + MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ : + MLX5_WQ_TYPE_CYCLIC; +} + +void mlx5e_build_rq_params(struct mlx5_core_dev *mdev, + struct mlx5e_params *params) +{ + /* Prefer Striding RQ, unless any of the following holds: + * - Striding RQ configuration is not possible/supported. + * - CQE compression is ON, and stride_index mini_cqe layout is not supported. + * - Legacy RQ would use linear SKB while Striding RQ would use non-linear. + * + * No XSK params: checking the availability of striding RQ in general. + */ + if ((!MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS) || + MLX5_CAP_GEN(mdev, mini_cqe_resp_stride_index)) && + !mlx5e_mpwrq_validate_regular(mdev, params) && + (mlx5e_rx_mpwqe_is_linear_skb(mdev, params, NULL) || + !mlx5e_rx_is_linear_skb(mdev, params, NULL))) + MLX5E_SET_PFLAG(params, MLX5E_PFLAG_RX_STRIDING_RQ, true); + mlx5e_set_rq_type(mdev, params); + mlx5e_init_rq_type_params(mdev, params); +} + +/* Build queue parameters */ + +void mlx5e_build_create_cq_param(struct mlx5e_create_cq_param *ccp, struct mlx5e_channel *c) +{ + *ccp = (struct mlx5e_create_cq_param) { + .napi = &c->napi, + .ch_stats = c->stats, + .node = cpu_to_node(c->cpu), + .ix = c->ix, + }; +} + +static int mlx5e_max_nonlinear_mtu(int first_frag_size, int frag_size, bool xdp) +{ + if (xdp) + /* XDP requires all fragments to be of the same size. */ + return first_frag_size + (MLX5E_MAX_RX_FRAGS - 1) * frag_size; + + /* Optimization for small packets: the last fragment is bigger than the others. */ + return first_frag_size + (MLX5E_MAX_RX_FRAGS - 2) * frag_size + PAGE_SIZE; +} + +#define DEFAULT_FRAG_SIZE (2048) + +static int mlx5e_build_rq_frags_info(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk, + struct mlx5e_rq_frags_info *info) +{ + u32 byte_count = MLX5E_SW2HW_MTU(params, params->sw_mtu); + int frag_size_max = DEFAULT_FRAG_SIZE; + int first_frag_size_max; + u32 buf_size = 0; + u16 headroom; + int max_mtu; + int i; + + if (mlx5e_rx_is_linear_skb(mdev, params, xsk)) { + int frag_stride; + + frag_stride = mlx5e_rx_get_linear_stride_sz(mdev, params, xsk, false); + + info->arr[0].frag_size = byte_count; + info->arr[0].frag_stride = frag_stride; + info->num_frags = 1; + + /* N WQEs share the same page, N = PAGE_SIZE / frag_stride. The + * first WQE in the page is responsible for allocation of this + * page, this WQE's index is k*N. If WQEs [k*N+1; k*N+N-1] are + * still not completed, the allocation must stop before k*N. + */ + info->wqe_index_mask = (PAGE_SIZE / frag_stride) - 1; + + goto out; + } + + headroom = mlx5e_get_linear_rq_headroom(params, xsk); + first_frag_size_max = SKB_WITH_OVERHEAD(frag_size_max - headroom); + + max_mtu = mlx5e_max_nonlinear_mtu(first_frag_size_max, frag_size_max, + params->xdp_prog); + if (byte_count > max_mtu || params->xdp_prog) { + frag_size_max = PAGE_SIZE; + first_frag_size_max = SKB_WITH_OVERHEAD(frag_size_max - headroom); + + max_mtu = mlx5e_max_nonlinear_mtu(first_frag_size_max, frag_size_max, + params->xdp_prog); + if (byte_count > max_mtu) { + mlx5_core_err(mdev, "MTU %u is too big for non-linear legacy RQ (max %d)\n", + params->sw_mtu, max_mtu); + return -EINVAL; + } + } + + i = 0; + while (buf_size < byte_count) { + int frag_size = byte_count - buf_size; + + if (i == 0) + frag_size = min(frag_size, first_frag_size_max); + else if (i < MLX5E_MAX_RX_FRAGS - 1) + frag_size = min(frag_size, frag_size_max); + + info->arr[i].frag_size = frag_size; + buf_size += frag_size; + + if (params->xdp_prog) { + /* XDP multi buffer expects fragments of the same size. */ + info->arr[i].frag_stride = frag_size_max; + } else { + if (i == 0) { + /* Ensure that headroom and tailroom are included. */ + frag_size += headroom; + frag_size += SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); + } + info->arr[i].frag_stride = roundup_pow_of_two(frag_size); + } + + i++; + } + info->num_frags = i; + + /* The last fragment of WQE with index 2*N may share the page with the + * first fragment of WQE with index 2*N+1 in certain cases. If WQE 2*N+1 + * is not completed yet, WQE 2*N must not be allocated, as it's + * responsible for allocating a new page. + */ + if (frag_size_max == PAGE_SIZE) { + /* No WQE can start in the middle of a page. */ + info->wqe_index_mask = 0; + } else { + /* PAGE_SIZEs starting from 8192 don't use 2K-sized fragments, + * because there would be more than MLX5E_MAX_RX_FRAGS of them. + */ + WARN_ON(PAGE_SIZE != 2 * DEFAULT_FRAG_SIZE); + + /* Odd number of fragments allows to pack the last fragment of + * the previous WQE and the first fragment of the next WQE into + * the same page. + * As long as DEFAULT_FRAG_SIZE is 2048, and MLX5E_MAX_RX_FRAGS + * is 4, the last fragment can be bigger than the rest only if + * it's the fourth one, so WQEs consisting of 3 fragments will + * always share a page. + * When a page is shared, WQE bulk size is 2, otherwise just 1. + */ + info->wqe_index_mask = info->num_frags % 2; + } + +out: + /* Bulking optimization to skip allocation until at least 8 WQEs can be + * allocated in a row. At the same time, never start allocation when + * the page is still used by older WQEs. + */ + info->wqe_bulk = max_t(u8, info->wqe_index_mask + 1, 8); + + info->log_num_frags = order_base_2(info->num_frags); + + return 0; +} + +static u8 mlx5e_get_rqwq_log_stride(u8 wq_type, int ndsegs) +{ + int sz = sizeof(struct mlx5_wqe_data_seg) * ndsegs; + + switch (wq_type) { + case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: + sz += sizeof(struct mlx5e_rx_wqe_ll); + break; + default: /* MLX5_WQ_TYPE_CYCLIC */ + sz += sizeof(struct mlx5e_rx_wqe_cyc); + } + + return order_base_2(sz); +} + +static void mlx5e_build_common_cq_param(struct mlx5_core_dev *mdev, + struct mlx5e_cq_param *param) +{ + void *cqc = param->cqc; + + MLX5_SET(cqc, cqc, uar_page, mdev->priv.uar->index); + if (MLX5_CAP_GEN(mdev, cqe_128_always) && cache_line_size() >= 128) + MLX5_SET(cqc, cqc, cqe_sz, CQE_STRIDE_128_PAD); +} + +static u32 mlx5e_shampo_get_log_cq_size(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk) +{ + int rsrv_size = BIT(mlx5e_shampo_get_log_rsrv_size(mdev, params)) * PAGE_SIZE; + u16 num_strides = BIT(mlx5e_mpwqe_get_log_num_strides(mdev, params, xsk)); + int pkt_per_rsrv = BIT(mlx5e_shampo_get_log_pkt_per_rsrv(mdev, params)); + u8 log_stride_sz = mlx5e_mpwqe_get_log_stride_size(mdev, params, xsk); + int wq_size = BIT(mlx5e_mpwqe_get_log_rq_size(mdev, params, xsk)); + int wqe_size = BIT(log_stride_sz) * num_strides; + + /* +1 is for the case that the pkt_per_rsrv dont consume the reservation + * so we get a filler cqe for the rest of the reservation. + */ + return order_base_2((wqe_size / rsrv_size) * wq_size * (pkt_per_rsrv + 1)); +} + +static void mlx5e_build_rx_cq_param(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk, + struct mlx5e_cq_param *param) +{ + bool hw_stridx = false; + void *cqc = param->cqc; + u8 log_cq_size; + + switch (params->rq_wq_type) { + case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: + hw_stridx = MLX5_CAP_GEN(mdev, mini_cqe_resp_stride_index); + if (params->packet_merge.type == MLX5E_PACKET_MERGE_SHAMPO) + log_cq_size = mlx5e_shampo_get_log_cq_size(mdev, params, xsk); + else + log_cq_size = mlx5e_mpwqe_get_log_rq_size(mdev, params, xsk) + + mlx5e_mpwqe_get_log_num_strides(mdev, params, xsk); + break; + default: /* MLX5_WQ_TYPE_CYCLIC */ + log_cq_size = params->log_rq_mtu_frames; + } + + MLX5_SET(cqc, cqc, log_cq_size, log_cq_size); + if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS)) { + MLX5_SET(cqc, cqc, mini_cqe_res_format, hw_stridx ? + MLX5_CQE_FORMAT_CSUM_STRIDX : MLX5_CQE_FORMAT_CSUM); + MLX5_SET(cqc, cqc, cqe_comp_en, 1); + } + + mlx5e_build_common_cq_param(mdev, param); + param->cq_period_mode = params->rx_cq_moderation.cq_period_mode; +} + +static u8 rq_end_pad_mode(struct mlx5_core_dev *mdev, struct mlx5e_params *params) +{ + bool lro_en = params->packet_merge.type == MLX5E_PACKET_MERGE_LRO; + bool ro = pcie_relaxed_ordering_enabled(mdev->pdev) && + MLX5_CAP_GEN(mdev, relaxed_ordering_write); + + return ro && lro_en ? + MLX5_WQ_END_PAD_MODE_NONE : MLX5_WQ_END_PAD_MODE_ALIGN; +} + +int mlx5e_build_rq_param(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk, + u16 q_counter, + struct mlx5e_rq_param *param) +{ + void *rqc = param->rqc; + void *wq = MLX5_ADDR_OF(rqc, rqc, wq); + int ndsegs = 1; + int err; + + switch (params->rq_wq_type) { + case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: { + u8 log_wqe_num_of_strides = mlx5e_mpwqe_get_log_num_strides(mdev, params, xsk); + u8 log_wqe_stride_size = mlx5e_mpwqe_get_log_stride_size(mdev, params, xsk); + enum mlx5e_mpwrq_umr_mode umr_mode = mlx5e_mpwrq_umr_mode(mdev, xsk); + u8 page_shift = mlx5e_mpwrq_page_shift(mdev, xsk); + + if (!mlx5e_verify_rx_mpwqe_strides(mdev, log_wqe_stride_size, + log_wqe_num_of_strides, + page_shift, umr_mode)) { + mlx5_core_err(mdev, + "Bad RX MPWQE params: log_stride_size %u, log_num_strides %u, umr_mode %d\n", + log_wqe_stride_size, log_wqe_num_of_strides, + umr_mode); + return -EINVAL; + } + + MLX5_SET(wq, wq, log_wqe_num_of_strides, + log_wqe_num_of_strides - MLX5_MPWQE_LOG_NUM_STRIDES_BASE); + MLX5_SET(wq, wq, log_wqe_stride_size, + log_wqe_stride_size - MLX5_MPWQE_LOG_STRIDE_SZ_BASE); + MLX5_SET(wq, wq, log_wq_sz, mlx5e_mpwqe_get_log_rq_size(mdev, params, xsk)); + if (params->packet_merge.type == MLX5E_PACKET_MERGE_SHAMPO) { + MLX5_SET(wq, wq, shampo_enable, true); + MLX5_SET(wq, wq, log_reservation_size, + mlx5e_shampo_get_log_rsrv_size(mdev, params)); + MLX5_SET(wq, wq, + log_max_num_of_packets_per_reservation, + mlx5e_shampo_get_log_pkt_per_rsrv(mdev, params)); + MLX5_SET(wq, wq, log_headers_entry_size, + mlx5e_shampo_get_log_hd_entry_size(mdev, params)); + MLX5_SET(rqc, rqc, reservation_timeout, + params->packet_merge.timeout); + MLX5_SET(rqc, rqc, shampo_match_criteria_type, + params->packet_merge.shampo.match_criteria_type); + MLX5_SET(rqc, rqc, shampo_no_match_alignment_granularity, + params->packet_merge.shampo.alignment_granularity); + } + break; + } + default: /* MLX5_WQ_TYPE_CYCLIC */ + MLX5_SET(wq, wq, log_wq_sz, params->log_rq_mtu_frames); + err = mlx5e_build_rq_frags_info(mdev, params, xsk, ¶m->frags_info); + if (err) + return err; + ndsegs = param->frags_info.num_frags; + } + + MLX5_SET(wq, wq, wq_type, params->rq_wq_type); + MLX5_SET(wq, wq, end_padding_mode, rq_end_pad_mode(mdev, params)); + MLX5_SET(wq, wq, log_wq_stride, + mlx5e_get_rqwq_log_stride(params->rq_wq_type, ndsegs)); + MLX5_SET(wq, wq, pd, mdev->mlx5e_res.hw_objs.pdn); + MLX5_SET(rqc, rqc, counter_set_id, q_counter); + MLX5_SET(rqc, rqc, vsd, params->vlan_strip_disable); + MLX5_SET(rqc, rqc, scatter_fcs, params->scatter_fcs_en); + + param->wq.buf_numa_node = dev_to_node(mlx5_core_dma_dev(mdev)); + mlx5e_build_rx_cq_param(mdev, params, xsk, ¶m->cqp); + + return 0; +} + +void mlx5e_build_drop_rq_param(struct mlx5_core_dev *mdev, + u16 q_counter, + struct mlx5e_rq_param *param) +{ + void *rqc = param->rqc; + void *wq = MLX5_ADDR_OF(rqc, rqc, wq); + + MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC); + MLX5_SET(wq, wq, log_wq_stride, + mlx5e_get_rqwq_log_stride(MLX5_WQ_TYPE_CYCLIC, 1)); + MLX5_SET(rqc, rqc, counter_set_id, q_counter); + + param->wq.buf_numa_node = dev_to_node(mlx5_core_dma_dev(mdev)); +} + +void mlx5e_build_tx_cq_param(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, + struct mlx5e_cq_param *param) +{ + void *cqc = param->cqc; + + MLX5_SET(cqc, cqc, log_cq_size, params->log_sq_size); + + mlx5e_build_common_cq_param(mdev, param); + param->cq_period_mode = params->tx_cq_moderation.cq_period_mode; +} + +void mlx5e_build_sq_param_common(struct mlx5_core_dev *mdev, + struct mlx5e_sq_param *param) +{ + void *sqc = param->sqc; + void *wq = MLX5_ADDR_OF(sqc, sqc, wq); + + MLX5_SET(wq, wq, log_wq_stride, ilog2(MLX5_SEND_WQE_BB)); + MLX5_SET(wq, wq, pd, mdev->mlx5e_res.hw_objs.pdn); + + param->wq.buf_numa_node = dev_to_node(mlx5_core_dma_dev(mdev)); +} + +void mlx5e_build_sq_param(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, + struct mlx5e_sq_param *param) +{ + void *sqc = param->sqc; + void *wq = MLX5_ADDR_OF(sqc, sqc, wq); + bool allow_swp; + + allow_swp = mlx5_geneve_tx_allowed(mdev) || + (mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_CRYPTO); + mlx5e_build_sq_param_common(mdev, param); + MLX5_SET(wq, wq, log_wq_sz, params->log_sq_size); + MLX5_SET(sqc, sqc, allow_swp, allow_swp); + param->is_mpw = MLX5E_GET_PFLAG(params, MLX5E_PFLAG_SKB_TX_MPWQE); + param->stop_room = mlx5e_calc_sq_stop_room(mdev, params); + mlx5e_build_tx_cq_param(mdev, params, ¶m->cqp); +} + +static void mlx5e_build_ico_cq_param(struct mlx5_core_dev *mdev, + u8 log_wq_size, + struct mlx5e_cq_param *param) +{ + void *cqc = param->cqc; + + MLX5_SET(cqc, cqc, log_cq_size, log_wq_size); + + mlx5e_build_common_cq_param(mdev, param); + + param->cq_period_mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE; +} + +/* This function calculates the maximum number of headers entries that are needed + * per WQE, the formula is based on the size of the reservations and the + * restriction we have about max packets for reservation that is equal to max + * headers per reservation. + */ +u32 mlx5e_shampo_hd_per_wqe(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, + struct mlx5e_rq_param *rq_param) +{ + int resv_size = BIT(mlx5e_shampo_get_log_rsrv_size(mdev, params)) * PAGE_SIZE; + u16 num_strides = BIT(mlx5e_mpwqe_get_log_num_strides(mdev, params, NULL)); + int pkt_per_resv = BIT(mlx5e_shampo_get_log_pkt_per_rsrv(mdev, params)); + u8 log_stride_sz = mlx5e_mpwqe_get_log_stride_size(mdev, params, NULL); + int wqe_size = BIT(log_stride_sz) * num_strides; + u32 hd_per_wqe; + + /* Assumption: hd_per_wqe % 8 == 0. */ + hd_per_wqe = (wqe_size / resv_size) * pkt_per_resv; + mlx5_core_dbg(mdev, "%s hd_per_wqe = %d rsrv_size = %d wqe_size = %d pkt_per_resv = %d\n", + __func__, hd_per_wqe, resv_size, wqe_size, pkt_per_resv); + return hd_per_wqe; +} + +/* This function calculates the maximum number of headers entries that are needed + * for the WQ, this value is uesed to allocate the header buffer in HW, thus + * must be a pow of 2. + */ +u32 mlx5e_shampo_hd_per_wq(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, + struct mlx5e_rq_param *rq_param) +{ + void *wqc = MLX5_ADDR_OF(rqc, rq_param->rqc, wq); + int wq_size = BIT(MLX5_GET(wq, wqc, log_wq_sz)); + u32 hd_per_wqe, hd_per_wq; + + hd_per_wqe = mlx5e_shampo_hd_per_wqe(mdev, params, rq_param); + hd_per_wq = roundup_pow_of_two(hd_per_wqe * wq_size); + return hd_per_wq; +} + +static u32 mlx5e_shampo_icosq_sz(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, + struct mlx5e_rq_param *rq_param) +{ + int max_num_of_umr_per_wqe, max_hd_per_wqe, max_klm_per_umr, rest; + void *wqc = MLX5_ADDR_OF(rqc, rq_param->rqc, wq); + int wq_size = BIT(MLX5_GET(wq, wqc, log_wq_sz)); + u32 wqebbs; + + max_klm_per_umr = MLX5E_MAX_KLM_PER_WQE(mdev); + max_hd_per_wqe = mlx5e_shampo_hd_per_wqe(mdev, params, rq_param); + max_num_of_umr_per_wqe = max_hd_per_wqe / max_klm_per_umr; + rest = max_hd_per_wqe % max_klm_per_umr; + wqebbs = MLX5E_KLM_UMR_WQEBBS(max_klm_per_umr) * max_num_of_umr_per_wqe; + if (rest) + wqebbs += MLX5E_KLM_UMR_WQEBBS(rest); + wqebbs *= wq_size; + return wqebbs; +} + +static u32 mlx5e_mpwrq_total_umr_wqebbs(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk) +{ + enum mlx5e_mpwrq_umr_mode umr_mode = mlx5e_mpwrq_umr_mode(mdev, xsk); + u8 page_shift = mlx5e_mpwrq_page_shift(mdev, xsk); + u8 umr_wqebbs; + + umr_wqebbs = mlx5e_mpwrq_umr_wqebbs(mdev, page_shift, umr_mode); + + return umr_wqebbs * (1 << mlx5e_mpwqe_get_log_rq_size(mdev, params, xsk)); +} + +static u8 mlx5e_build_icosq_log_wq_sz(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, + struct mlx5e_rq_param *rqp) +{ + u32 wqebbs, total_pages, useful_space; + + /* MLX5_WQ_TYPE_CYCLIC */ + if (params->rq_wq_type != MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) + return MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE; + + /* UMR WQEs for the regular RQ. */ + wqebbs = mlx5e_mpwrq_total_umr_wqebbs(mdev, params, NULL); + + /* If XDP program is attached, XSK may be turned on at any time without + * restarting the channel. ICOSQ must be big enough to fit UMR WQEs of + * both regular RQ and XSK RQ. + * + * XSK uses different values of page_shift, and the total number of UMR + * WQEBBs depends on it. This dependency is complex and not monotonic, + * especially taking into consideration that some of the parameters come + * from capabilities. Hence, we have to try all valid values of XSK + * frame size (and page_shift) to find the maximum. + */ + if (params->xdp_prog) { + u32 max_xsk_wqebbs = 0; + u8 frame_shift; + + for (frame_shift = XDP_UMEM_MIN_CHUNK_SHIFT; + frame_shift <= PAGE_SHIFT; frame_shift++) { + /* The headroom doesn't affect the calculation. */ + struct mlx5e_xsk_param xsk = { + .chunk_size = 1 << frame_shift, + .unaligned = false, + }; + + /* XSK aligned mode. */ + max_xsk_wqebbs = max(max_xsk_wqebbs, + mlx5e_mpwrq_total_umr_wqebbs(mdev, params, &xsk)); + + /* XSK unaligned mode, frame size is a power of two. */ + xsk.unaligned = true; + max_xsk_wqebbs = max(max_xsk_wqebbs, + mlx5e_mpwrq_total_umr_wqebbs(mdev, params, &xsk)); + + /* XSK unaligned mode, frame size is not equal to stride size. */ + xsk.chunk_size -= 1; + max_xsk_wqebbs = max(max_xsk_wqebbs, + mlx5e_mpwrq_total_umr_wqebbs(mdev, params, &xsk)); + + /* XSK unaligned mode, frame size is a triple power of two. */ + xsk.chunk_size = (1 << frame_shift) / 4 * 3; + max_xsk_wqebbs = max(max_xsk_wqebbs, + mlx5e_mpwrq_total_umr_wqebbs(mdev, params, &xsk)); + } + + wqebbs += max_xsk_wqebbs; + } + + if (params->packet_merge.type == MLX5E_PACKET_MERGE_SHAMPO) + wqebbs += mlx5e_shampo_icosq_sz(mdev, params, rqp); + + /* UMR WQEs don't cross the page boundary, they are padded with NOPs. + * This padding is always smaller than the max WQE size. That gives us + * at least (PAGE_SIZE - (max WQE size - MLX5_SEND_WQE_BB)) useful bytes + * per page. The number of pages is estimated as the total size of WQEs + * divided by the useful space in page, rounding up. If some WQEs don't + * fully fit into the useful space, they can occupy part of the padding, + * which proves this estimation to be correct (reserve enough space). + */ + useful_space = PAGE_SIZE - mlx5e_get_max_sq_wqebbs(mdev) + MLX5_SEND_WQE_BB; + total_pages = DIV_ROUND_UP(wqebbs * MLX5_SEND_WQE_BB, useful_space); + wqebbs = total_pages * (PAGE_SIZE / MLX5_SEND_WQE_BB); + + return max_t(u8, MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE, order_base_2(wqebbs)); +} + +static u8 mlx5e_build_async_icosq_log_wq_sz(struct mlx5_core_dev *mdev) +{ + if (mlx5e_is_ktls_rx(mdev)) + return MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE; + + return MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE; +} + +static void mlx5e_build_icosq_param(struct mlx5_core_dev *mdev, + u8 log_wq_size, + struct mlx5e_sq_param *param) +{ + void *sqc = param->sqc; + void *wq = MLX5_ADDR_OF(sqc, sqc, wq); + + mlx5e_build_sq_param_common(mdev, param); + + MLX5_SET(wq, wq, log_wq_sz, log_wq_size); + MLX5_SET(sqc, sqc, reg_umr, MLX5_CAP_ETH(mdev, reg_umr_sq)); + mlx5e_build_ico_cq_param(mdev, log_wq_size, ¶m->cqp); +} + +static void mlx5e_build_async_icosq_param(struct mlx5_core_dev *mdev, + u8 log_wq_size, + struct mlx5e_sq_param *param) +{ + void *sqc = param->sqc; + void *wq = MLX5_ADDR_OF(sqc, sqc, wq); + + mlx5e_build_sq_param_common(mdev, param); + param->stop_room = mlx5e_stop_room_for_wqe(mdev, 1); /* for XSK NOP */ + param->is_tls = mlx5e_is_ktls_rx(mdev); + if (param->is_tls) + param->stop_room += mlx5e_stop_room_for_wqe(mdev, 1); /* for TLS RX resync NOP */ + MLX5_SET(sqc, sqc, reg_umr, MLX5_CAP_ETH(mdev, reg_umr_sq)); + MLX5_SET(wq, wq, log_wq_sz, log_wq_size); + mlx5e_build_ico_cq_param(mdev, log_wq_size, ¶m->cqp); +} + +void mlx5e_build_xdpsq_param(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk, + struct mlx5e_sq_param *param) +{ + void *sqc = param->sqc; + void *wq = MLX5_ADDR_OF(sqc, sqc, wq); + + mlx5e_build_sq_param_common(mdev, param); + MLX5_SET(wq, wq, log_wq_sz, params->log_sq_size); + param->is_mpw = MLX5E_GET_PFLAG(params, MLX5E_PFLAG_XDP_TX_MPWQE); + param->is_xdp_mb = !mlx5e_rx_is_linear_skb(mdev, params, xsk); + mlx5e_build_tx_cq_param(mdev, params, ¶m->cqp); +} + +int mlx5e_build_channel_param(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, + u16 q_counter, + struct mlx5e_channel_param *cparam) +{ + u8 icosq_log_wq_sz, async_icosq_log_wq_sz; + int err; + + err = mlx5e_build_rq_param(mdev, params, NULL, q_counter, &cparam->rq); + if (err) + return err; + + icosq_log_wq_sz = mlx5e_build_icosq_log_wq_sz(mdev, params, &cparam->rq); + async_icosq_log_wq_sz = mlx5e_build_async_icosq_log_wq_sz(mdev); + + mlx5e_build_sq_param(mdev, params, &cparam->txq_sq); + mlx5e_build_xdpsq_param(mdev, params, NULL, &cparam->xdp_sq); + mlx5e_build_icosq_param(mdev, icosq_log_wq_sz, &cparam->icosq); + mlx5e_build_async_icosq_param(mdev, async_icosq_log_wq_sz, &cparam->async_icosq); + + return 0; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h new file mode 100644 index 000000000..034debd14 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h @@ -0,0 +1,157 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2019 Mellanox Technologies. */ + +#ifndef __MLX5_EN_PARAMS_H__ +#define __MLX5_EN_PARAMS_H__ + +#include "en.h" + +struct mlx5e_xsk_param { + u16 headroom; + u16 chunk_size; + bool unaligned; +}; + +struct mlx5e_cq_param { + u32 cqc[MLX5_ST_SZ_DW(cqc)]; + struct mlx5_wq_param wq; + u16 eq_ix; + u8 cq_period_mode; +}; + +struct mlx5e_rq_param { + struct mlx5e_cq_param cqp; + u32 rqc[MLX5_ST_SZ_DW(rqc)]; + struct mlx5_wq_param wq; + struct mlx5e_rq_frags_info frags_info; +}; + +struct mlx5e_sq_param { + struct mlx5e_cq_param cqp; + u32 sqc[MLX5_ST_SZ_DW(sqc)]; + struct mlx5_wq_param wq; + bool is_mpw; + bool is_tls; + bool is_xdp_mb; + u16 stop_room; +}; + +struct mlx5e_channel_param { + struct mlx5e_rq_param rq; + struct mlx5e_sq_param txq_sq; + struct mlx5e_sq_param xdp_sq; + struct mlx5e_sq_param icosq; + struct mlx5e_sq_param async_icosq; +}; + +struct mlx5e_create_sq_param { + struct mlx5_wq_ctrl *wq_ctrl; + u32 cqn; + u32 ts_cqe_to_dest_cqn; + u32 tisn; + u8 tis_lst_sz; + u8 min_inline_mode; +}; + +/* Striding RQ dynamic parameters */ + +u8 mlx5e_mpwrq_page_shift(struct mlx5_core_dev *mdev, struct mlx5e_xsk_param *xsk); +enum mlx5e_mpwrq_umr_mode +mlx5e_mpwrq_umr_mode(struct mlx5_core_dev *mdev, struct mlx5e_xsk_param *xsk); +u8 mlx5e_mpwrq_umr_entry_size(enum mlx5e_mpwrq_umr_mode mode); +u8 mlx5e_mpwrq_log_wqe_sz(struct mlx5_core_dev *mdev, u8 page_shift, + enum mlx5e_mpwrq_umr_mode umr_mode); +u8 mlx5e_mpwrq_pages_per_wqe(struct mlx5_core_dev *mdev, u8 page_shift, + enum mlx5e_mpwrq_umr_mode umr_mode); +u16 mlx5e_mpwrq_umr_wqe_sz(struct mlx5_core_dev *mdev, u8 page_shift, + enum mlx5e_mpwrq_umr_mode umr_mode); +u8 mlx5e_mpwrq_umr_wqebbs(struct mlx5_core_dev *mdev, u8 page_shift, + enum mlx5e_mpwrq_umr_mode umr_mode); +u8 mlx5e_mpwrq_mtts_per_wqe(struct mlx5_core_dev *mdev, u8 page_shift, + enum mlx5e_mpwrq_umr_mode umr_mode); +u32 mlx5e_mpwrq_max_num_entries(struct mlx5_core_dev *mdev, + enum mlx5e_mpwrq_umr_mode umr_mode); +u8 mlx5e_mpwrq_max_log_rq_pkts(struct mlx5_core_dev *mdev, u8 page_shift, + enum mlx5e_mpwrq_umr_mode umr_mode); + +/* Parameter calculations */ + +void mlx5e_reset_tx_moderation(struct mlx5e_params *params, u8 cq_period_mode); +void mlx5e_reset_rx_moderation(struct mlx5e_params *params, u8 cq_period_mode); +void mlx5e_set_tx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode); +void mlx5e_set_rx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode); + +bool slow_pci_heuristic(struct mlx5_core_dev *mdev); +int mlx5e_mpwrq_validate_regular(struct mlx5_core_dev *mdev, struct mlx5e_params *params); +int mlx5e_mpwrq_validate_xsk(struct mlx5_core_dev *mdev, struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk); +void mlx5e_build_rq_params(struct mlx5_core_dev *mdev, struct mlx5e_params *params); +void mlx5e_set_rq_type(struct mlx5_core_dev *mdev, struct mlx5e_params *params); +void mlx5e_init_rq_type_params(struct mlx5_core_dev *mdev, struct mlx5e_params *params); + +u16 mlx5e_get_linear_rq_headroom(struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk); +bool mlx5e_rx_is_linear_skb(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk); +bool mlx5e_rx_mpwqe_is_linear_skb(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk); +u8 mlx5e_mpwqe_get_log_rq_size(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk); +u8 mlx5e_shampo_get_log_hd_entry_size(struct mlx5_core_dev *mdev, + struct mlx5e_params *params); +u8 mlx5e_shampo_get_log_rsrv_size(struct mlx5_core_dev *mdev, + struct mlx5e_params *params); +u8 mlx5e_shampo_get_log_pkt_per_rsrv(struct mlx5_core_dev *mdev, + struct mlx5e_params *params); +u32 mlx5e_shampo_hd_per_wqe(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, + struct mlx5e_rq_param *rq_param); +u32 mlx5e_shampo_hd_per_wq(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, + struct mlx5e_rq_param *rq_param); +u8 mlx5e_mpwqe_get_log_stride_size(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk); +u8 mlx5e_mpwqe_get_log_num_strides(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk); +u8 mlx5e_mpwqe_get_min_wqe_bulk(unsigned int wq_sz); +u16 mlx5e_get_rq_headroom(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk); + +/* Build queue parameters */ + +void mlx5e_build_create_cq_param(struct mlx5e_create_cq_param *ccp, struct mlx5e_channel *c); +int mlx5e_build_rq_param(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk, + u16 q_counter, + struct mlx5e_rq_param *param); +void mlx5e_build_drop_rq_param(struct mlx5_core_dev *mdev, + u16 q_counter, + struct mlx5e_rq_param *param); +void mlx5e_build_sq_param_common(struct mlx5_core_dev *mdev, + struct mlx5e_sq_param *param); +void mlx5e_build_sq_param(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, + struct mlx5e_sq_param *param); +void mlx5e_build_tx_cq_param(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, + struct mlx5e_cq_param *param); +void mlx5e_build_xdpsq_param(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk, + struct mlx5e_sq_param *param); +int mlx5e_build_channel_param(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, + u16 q_counter, + struct mlx5e_channel_param *cparam); + +u16 mlx5e_calc_sq_stop_room(struct mlx5_core_dev *mdev, struct mlx5e_params *params); +int mlx5e_validate_params(struct mlx5_core_dev *mdev, struct mlx5e_params *params); + +#endif /* __MLX5_EN_PARAMS_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port.c b/drivers/net/ethernet/mellanox/mlx5/core/en/port.c new file mode 100644 index 000000000..89510cac4 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port.c @@ -0,0 +1,594 @@ +/* + * Copyright (c) 2018, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "port.h" + +/* speed in units of 1Mb */ +static const u32 mlx5e_link_speed[MLX5E_LINK_MODES_NUMBER] = { + [MLX5E_1000BASE_CX_SGMII] = 1000, + [MLX5E_1000BASE_KX] = 1000, + [MLX5E_10GBASE_CX4] = 10000, + [MLX5E_10GBASE_KX4] = 10000, + [MLX5E_10GBASE_KR] = 10000, + [MLX5E_20GBASE_KR2] = 20000, + [MLX5E_40GBASE_CR4] = 40000, + [MLX5E_40GBASE_KR4] = 40000, + [MLX5E_56GBASE_R4] = 56000, + [MLX5E_10GBASE_CR] = 10000, + [MLX5E_10GBASE_SR] = 10000, + [MLX5E_10GBASE_ER] = 10000, + [MLX5E_40GBASE_SR4] = 40000, + [MLX5E_40GBASE_LR4] = 40000, + [MLX5E_50GBASE_SR2] = 50000, + [MLX5E_100GBASE_CR4] = 100000, + [MLX5E_100GBASE_SR4] = 100000, + [MLX5E_100GBASE_KR4] = 100000, + [MLX5E_100GBASE_LR4] = 100000, + [MLX5E_100BASE_TX] = 100, + [MLX5E_1000BASE_T] = 1000, + [MLX5E_10GBASE_T] = 10000, + [MLX5E_25GBASE_CR] = 25000, + [MLX5E_25GBASE_KR] = 25000, + [MLX5E_25GBASE_SR] = 25000, + [MLX5E_50GBASE_CR2] = 50000, + [MLX5E_50GBASE_KR2] = 50000, +}; + +static const u32 mlx5e_ext_link_speed[MLX5E_EXT_LINK_MODES_NUMBER] = { + [MLX5E_SGMII_100M] = 100, + [MLX5E_1000BASE_X_SGMII] = 1000, + [MLX5E_5GBASE_R] = 5000, + [MLX5E_10GBASE_XFI_XAUI_1] = 10000, + [MLX5E_40GBASE_XLAUI_4_XLPPI_4] = 40000, + [MLX5E_25GAUI_1_25GBASE_CR_KR] = 25000, + [MLX5E_50GAUI_2_LAUI_2_50GBASE_CR2_KR2] = 50000, + [MLX5E_50GAUI_1_LAUI_1_50GBASE_CR_KR] = 50000, + [MLX5E_CAUI_4_100GBASE_CR4_KR4] = 100000, + [MLX5E_100GAUI_2_100GBASE_CR2_KR2] = 100000, + [MLX5E_200GAUI_4_200GBASE_CR4_KR4] = 200000, + [MLX5E_400GAUI_8] = 400000, + [MLX5E_100GAUI_1_100GBASE_CR_KR] = 100000, + [MLX5E_200GAUI_2_200GBASE_CR2_KR2] = 200000, + [MLX5E_400GAUI_4_400GBASE_CR4_KR4] = 400000, +}; + +bool mlx5e_ptys_ext_supported(struct mlx5_core_dev *mdev) +{ + struct mlx5e_port_eth_proto eproto; + int err; + + if (MLX5_CAP_PCAM_FEATURE(mdev, ptys_extended_ethernet)) + return true; + + err = mlx5_port_query_eth_proto(mdev, 1, true, &eproto); + if (err) + return false; + + return !!eproto.cap; +} + +static void mlx5e_port_get_speed_arr(struct mlx5_core_dev *mdev, + const u32 **arr, u32 *size, + bool force_legacy) +{ + bool ext = force_legacy ? false : mlx5e_ptys_ext_supported(mdev); + + *size = ext ? ARRAY_SIZE(mlx5e_ext_link_speed) : + ARRAY_SIZE(mlx5e_link_speed); + *arr = ext ? mlx5e_ext_link_speed : mlx5e_link_speed; +} + +int mlx5_port_query_eth_proto(struct mlx5_core_dev *dev, u8 port, bool ext, + struct mlx5e_port_eth_proto *eproto) +{ + u32 out[MLX5_ST_SZ_DW(ptys_reg)]; + int err; + + if (!eproto) + return -EINVAL; + + err = mlx5_query_port_ptys(dev, out, sizeof(out), MLX5_PTYS_EN, port); + if (err) + return err; + + eproto->cap = MLX5_GET_ETH_PROTO(ptys_reg, out, ext, + eth_proto_capability); + eproto->admin = MLX5_GET_ETH_PROTO(ptys_reg, out, ext, eth_proto_admin); + eproto->oper = MLX5_GET_ETH_PROTO(ptys_reg, out, ext, eth_proto_oper); + return 0; +} + +void mlx5_port_query_eth_autoneg(struct mlx5_core_dev *dev, u8 *an_status, + u8 *an_disable_cap, u8 *an_disable_admin) +{ + u32 out[MLX5_ST_SZ_DW(ptys_reg)]; + + *an_status = 0; + *an_disable_cap = 0; + *an_disable_admin = 0; + + if (mlx5_query_port_ptys(dev, out, sizeof(out), MLX5_PTYS_EN, 1)) + return; + + *an_status = MLX5_GET(ptys_reg, out, an_status); + *an_disable_cap = MLX5_GET(ptys_reg, out, an_disable_cap); + *an_disable_admin = MLX5_GET(ptys_reg, out, an_disable_admin); +} + +int mlx5_port_set_eth_ptys(struct mlx5_core_dev *dev, bool an_disable, + u32 proto_admin, bool ext) +{ + u32 out[MLX5_ST_SZ_DW(ptys_reg)]; + u32 in[MLX5_ST_SZ_DW(ptys_reg)]; + u8 an_disable_admin; + u8 an_disable_cap; + u8 an_status; + + mlx5_port_query_eth_autoneg(dev, &an_status, &an_disable_cap, + &an_disable_admin); + if (!an_disable_cap && an_disable) + return -EPERM; + + memset(in, 0, sizeof(in)); + + MLX5_SET(ptys_reg, in, local_port, 1); + MLX5_SET(ptys_reg, in, an_disable_admin, an_disable); + MLX5_SET(ptys_reg, in, proto_mask, MLX5_PTYS_EN); + if (ext) + MLX5_SET(ptys_reg, in, ext_eth_proto_admin, proto_admin); + else + MLX5_SET(ptys_reg, in, eth_proto_admin, proto_admin); + + return mlx5_core_access_reg(dev, in, sizeof(in), out, + sizeof(out), MLX5_REG_PTYS, 0, 1); +} + +u32 mlx5e_port_ptys2speed(struct mlx5_core_dev *mdev, u32 eth_proto_oper, + bool force_legacy) +{ + unsigned long temp = eth_proto_oper; + const u32 *table; + u32 speed = 0; + u32 max_size; + int i; + + mlx5e_port_get_speed_arr(mdev, &table, &max_size, force_legacy); + i = find_first_bit(&temp, max_size); + if (i < max_size) + speed = table[i]; + return speed; +} + +int mlx5e_port_linkspeed(struct mlx5_core_dev *mdev, u32 *speed) +{ + struct mlx5e_port_eth_proto eproto; + bool force_legacy = false; + bool ext; + int err; + + ext = mlx5e_ptys_ext_supported(mdev); + err = mlx5_port_query_eth_proto(mdev, 1, ext, &eproto); + if (err) + goto out; + if (ext && !eproto.admin) { + force_legacy = true; + err = mlx5_port_query_eth_proto(mdev, 1, false, &eproto); + if (err) + goto out; + } + *speed = mlx5e_port_ptys2speed(mdev, eproto.oper, force_legacy); + if (!(*speed)) + err = -EINVAL; + +out: + return err; +} + +int mlx5e_port_max_linkspeed(struct mlx5_core_dev *mdev, u32 *speed) +{ + struct mlx5e_port_eth_proto eproto; + u32 max_speed = 0; + const u32 *table; + u32 max_size; + bool ext; + int err; + int i; + + ext = mlx5e_ptys_ext_supported(mdev); + err = mlx5_port_query_eth_proto(mdev, 1, ext, &eproto); + if (err) + return err; + + mlx5e_port_get_speed_arr(mdev, &table, &max_size, false); + for (i = 0; i < max_size; ++i) + if (eproto.cap & MLX5E_PROT_MASK(i)) + max_speed = max(max_speed, table[i]); + + *speed = max_speed; + return 0; +} + +u32 mlx5e_port_speed2linkmodes(struct mlx5_core_dev *mdev, u32 speed, + bool force_legacy) +{ + u32 link_modes = 0; + const u32 *table; + u32 max_size; + int i; + + mlx5e_port_get_speed_arr(mdev, &table, &max_size, force_legacy); + for (i = 0; i < max_size; ++i) { + if (table[i] == speed) + link_modes |= MLX5E_PROT_MASK(i); + } + return link_modes; +} + +int mlx5e_port_query_pbmc(struct mlx5_core_dev *mdev, void *out) +{ + int sz = MLX5_ST_SZ_BYTES(pbmc_reg); + void *in; + int err; + + in = kzalloc(sz, GFP_KERNEL); + if (!in) + return -ENOMEM; + + MLX5_SET(pbmc_reg, in, local_port, 1); + err = mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PBMC, 0, 0); + + kfree(in); + return err; +} + +int mlx5e_port_set_pbmc(struct mlx5_core_dev *mdev, void *in) +{ + int sz = MLX5_ST_SZ_BYTES(pbmc_reg); + void *out; + int err; + + out = kzalloc(sz, GFP_KERNEL); + if (!out) + return -ENOMEM; + + MLX5_SET(pbmc_reg, in, local_port, 1); + err = mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PBMC, 0, 1); + + kfree(out); + return err; +} + +/* buffer[i]: buffer that priority i mapped to */ +int mlx5e_port_query_priority2buffer(struct mlx5_core_dev *mdev, u8 *buffer) +{ + int sz = MLX5_ST_SZ_BYTES(pptb_reg); + u32 prio_x_buff; + void *out; + void *in; + int prio; + int err; + + in = kzalloc(sz, GFP_KERNEL); + out = kzalloc(sz, GFP_KERNEL); + if (!in || !out) { + err = -ENOMEM; + goto out; + } + + MLX5_SET(pptb_reg, in, local_port, 1); + err = mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPTB, 0, 0); + if (err) + goto out; + + prio_x_buff = MLX5_GET(pptb_reg, out, prio_x_buff); + for (prio = 0; prio < 8; prio++) { + buffer[prio] = (u8)(prio_x_buff >> (4 * prio)) & 0xF; + mlx5_core_dbg(mdev, "prio %d, buffer %d\n", prio, buffer[prio]); + } +out: + kfree(in); + kfree(out); + return err; +} + +int mlx5e_port_set_priority2buffer(struct mlx5_core_dev *mdev, u8 *buffer) +{ + int sz = MLX5_ST_SZ_BYTES(pptb_reg); + u32 prio_x_buff; + void *out; + void *in; + int prio; + int err; + + in = kzalloc(sz, GFP_KERNEL); + out = kzalloc(sz, GFP_KERNEL); + if (!in || !out) { + err = -ENOMEM; + goto out; + } + + /* First query the pptb register */ + MLX5_SET(pptb_reg, in, local_port, 1); + err = mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPTB, 0, 0); + if (err) + goto out; + + memcpy(in, out, sz); + MLX5_SET(pptb_reg, in, local_port, 1); + + /* Update the pm and prio_x_buff */ + MLX5_SET(pptb_reg, in, pm, 0xFF); + + prio_x_buff = 0; + for (prio = 0; prio < 8; prio++) + prio_x_buff |= (buffer[prio] << (4 * prio)); + MLX5_SET(pptb_reg, in, prio_x_buff, prio_x_buff); + + err = mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPTB, 0, 1); + +out: + kfree(in); + kfree(out); + return err; +} + +enum mlx5e_fec_supported_link_mode { + MLX5E_FEC_SUPPORTED_LINK_MODES_10G_40G, + MLX5E_FEC_SUPPORTED_LINK_MODES_25G, + MLX5E_FEC_SUPPORTED_LINK_MODES_50G, + MLX5E_FEC_SUPPORTED_LINK_MODES_56G, + MLX5E_FEC_SUPPORTED_LINK_MODES_100G, + MLX5E_FEC_SUPPORTED_LINK_MODE_50G_1X, + MLX5E_FEC_SUPPORTED_LINK_MODE_100G_2X, + MLX5E_FEC_SUPPORTED_LINK_MODE_200G_4X, + MLX5E_FEC_SUPPORTED_LINK_MODE_400G_8X, + MLX5E_MAX_FEC_SUPPORTED_LINK_MODE, +}; + +#define MLX5E_FEC_FIRST_50G_PER_LANE_MODE MLX5E_FEC_SUPPORTED_LINK_MODE_50G_1X + +#define MLX5E_FEC_OVERRIDE_ADMIN_POLICY(buf, policy, write, link) \ + do { \ + u16 *_policy = &(policy); \ + u32 *_buf = buf; \ + \ + if (write) \ + MLX5_SET(pplm_reg, _buf, fec_override_admin_##link, *_policy); \ + else \ + *_policy = MLX5_GET(pplm_reg, _buf, fec_override_admin_##link); \ + } while (0) + +/* get/set FEC admin field for a given speed */ +static int mlx5e_fec_admin_field(u32 *pplm, u16 *fec_policy, bool write, + enum mlx5e_fec_supported_link_mode link_mode) +{ + switch (link_mode) { + case MLX5E_FEC_SUPPORTED_LINK_MODES_10G_40G: + MLX5E_FEC_OVERRIDE_ADMIN_POLICY(pplm, *fec_policy, write, 10g_40g); + break; + case MLX5E_FEC_SUPPORTED_LINK_MODES_25G: + MLX5E_FEC_OVERRIDE_ADMIN_POLICY(pplm, *fec_policy, write, 25g); + break; + case MLX5E_FEC_SUPPORTED_LINK_MODES_50G: + MLX5E_FEC_OVERRIDE_ADMIN_POLICY(pplm, *fec_policy, write, 50g); + break; + case MLX5E_FEC_SUPPORTED_LINK_MODES_56G: + MLX5E_FEC_OVERRIDE_ADMIN_POLICY(pplm, *fec_policy, write, 56g); + break; + case MLX5E_FEC_SUPPORTED_LINK_MODES_100G: + MLX5E_FEC_OVERRIDE_ADMIN_POLICY(pplm, *fec_policy, write, 100g); + break; + case MLX5E_FEC_SUPPORTED_LINK_MODE_50G_1X: + MLX5E_FEC_OVERRIDE_ADMIN_POLICY(pplm, *fec_policy, write, 50g_1x); + break; + case MLX5E_FEC_SUPPORTED_LINK_MODE_100G_2X: + MLX5E_FEC_OVERRIDE_ADMIN_POLICY(pplm, *fec_policy, write, 100g_2x); + break; + case MLX5E_FEC_SUPPORTED_LINK_MODE_200G_4X: + MLX5E_FEC_OVERRIDE_ADMIN_POLICY(pplm, *fec_policy, write, 200g_4x); + break; + case MLX5E_FEC_SUPPORTED_LINK_MODE_400G_8X: + MLX5E_FEC_OVERRIDE_ADMIN_POLICY(pplm, *fec_policy, write, 400g_8x); + break; + default: + return -EINVAL; + } + return 0; +} + +#define MLX5E_GET_FEC_OVERRIDE_CAP(buf, link) \ + MLX5_GET(pplm_reg, buf, fec_override_cap_##link) + +/* returns FEC capabilities for a given speed */ +static int mlx5e_get_fec_cap_field(u32 *pplm, u16 *fec_cap, + enum mlx5e_fec_supported_link_mode link_mode) +{ + switch (link_mode) { + case MLX5E_FEC_SUPPORTED_LINK_MODES_10G_40G: + *fec_cap = MLX5E_GET_FEC_OVERRIDE_CAP(pplm, 10g_40g); + break; + case MLX5E_FEC_SUPPORTED_LINK_MODES_25G: + *fec_cap = MLX5E_GET_FEC_OVERRIDE_CAP(pplm, 25g); + break; + case MLX5E_FEC_SUPPORTED_LINK_MODES_50G: + *fec_cap = MLX5E_GET_FEC_OVERRIDE_CAP(pplm, 50g); + break; + case MLX5E_FEC_SUPPORTED_LINK_MODES_56G: + *fec_cap = MLX5E_GET_FEC_OVERRIDE_CAP(pplm, 56g); + break; + case MLX5E_FEC_SUPPORTED_LINK_MODES_100G: + *fec_cap = MLX5E_GET_FEC_OVERRIDE_CAP(pplm, 100g); + break; + case MLX5E_FEC_SUPPORTED_LINK_MODE_50G_1X: + *fec_cap = MLX5E_GET_FEC_OVERRIDE_CAP(pplm, 50g_1x); + break; + case MLX5E_FEC_SUPPORTED_LINK_MODE_100G_2X: + *fec_cap = MLX5E_GET_FEC_OVERRIDE_CAP(pplm, 100g_2x); + break; + case MLX5E_FEC_SUPPORTED_LINK_MODE_200G_4X: + *fec_cap = MLX5E_GET_FEC_OVERRIDE_CAP(pplm, 200g_4x); + break; + case MLX5E_FEC_SUPPORTED_LINK_MODE_400G_8X: + *fec_cap = MLX5E_GET_FEC_OVERRIDE_CAP(pplm, 400g_8x); + break; + default: + return -EINVAL; + } + return 0; +} + +bool mlx5e_fec_in_caps(struct mlx5_core_dev *dev, int fec_policy) +{ + bool fec_50g_per_lane = MLX5_CAP_PCAM_FEATURE(dev, fec_50G_per_lane_in_pplm); + u32 out[MLX5_ST_SZ_DW(pplm_reg)] = {}; + u32 in[MLX5_ST_SZ_DW(pplm_reg)] = {}; + int sz = MLX5_ST_SZ_BYTES(pplm_reg); + int err; + int i; + + if (!MLX5_CAP_GEN(dev, pcam_reg) || !MLX5_CAP_PCAM_REG(dev, pplm)) + return false; + + MLX5_SET(pplm_reg, in, local_port, 1); + err = mlx5_core_access_reg(dev, in, sz, out, sz, MLX5_REG_PPLM, 0, 0); + if (err) + return false; + + for (i = 0; i < MLX5E_MAX_FEC_SUPPORTED_LINK_MODE; i++) { + u16 fec_caps; + + if (i >= MLX5E_FEC_FIRST_50G_PER_LANE_MODE && !fec_50g_per_lane) + break; + + mlx5e_get_fec_cap_field(out, &fec_caps, i); + if (fec_caps & fec_policy) + return true; + } + return false; +} + +int mlx5e_get_fec_mode(struct mlx5_core_dev *dev, u32 *fec_mode_active, + u16 *fec_configured_mode) +{ + bool fec_50g_per_lane = MLX5_CAP_PCAM_FEATURE(dev, fec_50G_per_lane_in_pplm); + u32 out[MLX5_ST_SZ_DW(pplm_reg)] = {}; + u32 in[MLX5_ST_SZ_DW(pplm_reg)] = {}; + int sz = MLX5_ST_SZ_BYTES(pplm_reg); + int err; + int i; + + if (!MLX5_CAP_GEN(dev, pcam_reg)) + return -EOPNOTSUPP; + + if (!MLX5_CAP_PCAM_REG(dev, pplm)) + return -EOPNOTSUPP; + + MLX5_SET(pplm_reg, in, local_port, 1); + err = mlx5_core_access_reg(dev, in, sz, out, sz, MLX5_REG_PPLM, 0, 0); + if (err) + return err; + + *fec_mode_active = MLX5_GET(pplm_reg, out, fec_mode_active); + + if (!fec_configured_mode) + goto out; + + *fec_configured_mode = 0; + for (i = 0; i < MLX5E_MAX_FEC_SUPPORTED_LINK_MODE; i++) { + if (i >= MLX5E_FEC_FIRST_50G_PER_LANE_MODE && !fec_50g_per_lane) + break; + + mlx5e_fec_admin_field(out, fec_configured_mode, 0, i); + if (*fec_configured_mode != 0) + goto out; + } +out: + return 0; +} + +int mlx5e_set_fec_mode(struct mlx5_core_dev *dev, u16 fec_policy) +{ + bool fec_50g_per_lane = MLX5_CAP_PCAM_FEATURE(dev, fec_50G_per_lane_in_pplm); + u32 out[MLX5_ST_SZ_DW(pplm_reg)] = {}; + u32 in[MLX5_ST_SZ_DW(pplm_reg)] = {}; + int sz = MLX5_ST_SZ_BYTES(pplm_reg); + u16 fec_policy_auto = 0; + int err; + int i; + + if (!MLX5_CAP_GEN(dev, pcam_reg)) + return -EOPNOTSUPP; + + if (!MLX5_CAP_PCAM_REG(dev, pplm)) + return -EOPNOTSUPP; + + if (fec_policy >= (1 << MLX5E_FEC_LLRS_272_257_1) && !fec_50g_per_lane) + return -EOPNOTSUPP; + + if (fec_policy && !mlx5e_fec_in_caps(dev, fec_policy)) + return -EOPNOTSUPP; + + MLX5_SET(pplm_reg, in, local_port, 1); + err = mlx5_core_access_reg(dev, in, sz, out, sz, MLX5_REG_PPLM, 0, 0); + if (err) + return err; + + MLX5_SET(pplm_reg, out, local_port, 1); + + for (i = 0; i < MLX5E_MAX_FEC_SUPPORTED_LINK_MODE; i++) { + u16 conf_fec = fec_policy; + u16 fec_caps = 0; + + if (i >= MLX5E_FEC_FIRST_50G_PER_LANE_MODE && !fec_50g_per_lane) + break; + + /* RS fec in ethtool is mapped to MLX5E_FEC_RS_528_514 + * to link modes up to 25G per lane and to + * MLX5E_FEC_RS_544_514 in the new link modes based on + * 50 G per lane + */ + if (conf_fec == (1 << MLX5E_FEC_RS_528_514) && + i >= MLX5E_FEC_FIRST_50G_PER_LANE_MODE) + conf_fec = (1 << MLX5E_FEC_RS_544_514); + + mlx5e_get_fec_cap_field(out, &fec_caps, i); + + /* policy supported for link speed */ + if (fec_caps & conf_fec) + mlx5e_fec_admin_field(out, &conf_fec, 1, i); + else + /* set FEC to auto*/ + mlx5e_fec_admin_field(out, &fec_policy_auto, 1, i); + } + + return mlx5_core_access_reg(dev, out, sz, out, sz, MLX5_REG_PPLM, 0, 1); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port.h b/drivers/net/ethernet/mellanox/mlx5/core/en/port.h new file mode 100644 index 000000000..7a7defe60 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port.h @@ -0,0 +1,76 @@ +/* + * Copyright (c) 2018, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __MLX5E_EN_PORT_H +#define __MLX5E_EN_PORT_H + +#include <linux/mlx5/driver.h> +#include "en.h" + +struct mlx5e_port_eth_proto { + u32 cap; + u32 admin; + u32 oper; +}; + +int mlx5_port_query_eth_proto(struct mlx5_core_dev *dev, u8 port, bool ext, + struct mlx5e_port_eth_proto *eproto); +void mlx5_port_query_eth_autoneg(struct mlx5_core_dev *dev, u8 *an_status, + u8 *an_disable_cap, u8 *an_disable_admin); +int mlx5_port_set_eth_ptys(struct mlx5_core_dev *dev, bool an_disable, + u32 proto_admin, bool ext); +u32 mlx5e_port_ptys2speed(struct mlx5_core_dev *mdev, u32 eth_proto_oper, + bool force_legacy); +int mlx5e_port_linkspeed(struct mlx5_core_dev *mdev, u32 *speed); +int mlx5e_port_max_linkspeed(struct mlx5_core_dev *mdev, u32 *speed); +u32 mlx5e_port_speed2linkmodes(struct mlx5_core_dev *mdev, u32 speed, + bool force_legacy); +bool mlx5e_ptys_ext_supported(struct mlx5_core_dev *mdev); +int mlx5e_port_query_pbmc(struct mlx5_core_dev *mdev, void *out); +int mlx5e_port_set_pbmc(struct mlx5_core_dev *mdev, void *in); +int mlx5e_port_query_priority2buffer(struct mlx5_core_dev *mdev, u8 *buffer); +int mlx5e_port_set_priority2buffer(struct mlx5_core_dev *mdev, u8 *buffer); + +bool mlx5e_fec_in_caps(struct mlx5_core_dev *dev, int fec_policy); +int mlx5e_get_fec_mode(struct mlx5_core_dev *dev, u32 *fec_mode_active, + u16 *fec_configured_mode); +int mlx5e_set_fec_mode(struct mlx5_core_dev *dev, u16 fec_policy); + +enum { + MLX5E_FEC_NOFEC, + MLX5E_FEC_FIRECODE, + MLX5E_FEC_RS_528_514, + MLX5E_FEC_RS_544_514 = 7, + MLX5E_FEC_LLRS_272_257_1 = 9, +}; + +#endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c b/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c new file mode 100644 index 000000000..c9d5d8d93 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c @@ -0,0 +1,362 @@ +/* + * Copyright (c) 2018, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "port_buffer.h" + +int mlx5e_port_query_buffer(struct mlx5e_priv *priv, + struct mlx5e_port_buffer *port_buffer) +{ + u16 port_buff_cell_sz = priv->dcbx.port_buff_cell_sz; + struct mlx5_core_dev *mdev = priv->mdev; + int sz = MLX5_ST_SZ_BYTES(pbmc_reg); + u32 total_used = 0; + void *buffer; + void *out; + int err; + int i; + + out = kzalloc(sz, GFP_KERNEL); + if (!out) + return -ENOMEM; + + err = mlx5e_port_query_pbmc(mdev, out); + if (err) + goto out; + + for (i = 0; i < MLX5E_MAX_BUFFER; i++) { + buffer = MLX5_ADDR_OF(pbmc_reg, out, buffer[i]); + port_buffer->buffer[i].lossy = + MLX5_GET(bufferx_reg, buffer, lossy); + port_buffer->buffer[i].epsb = + MLX5_GET(bufferx_reg, buffer, epsb); + port_buffer->buffer[i].size = + MLX5_GET(bufferx_reg, buffer, size) * port_buff_cell_sz; + port_buffer->buffer[i].xon = + MLX5_GET(bufferx_reg, buffer, xon_threshold) * port_buff_cell_sz; + port_buffer->buffer[i].xoff = + MLX5_GET(bufferx_reg, buffer, xoff_threshold) * port_buff_cell_sz; + total_used += port_buffer->buffer[i].size; + + mlx5e_dbg(HW, priv, "buffer %d: size=%d, xon=%d, xoff=%d, epsb=%d, lossy=%d\n", i, + port_buffer->buffer[i].size, + port_buffer->buffer[i].xon, + port_buffer->buffer[i].xoff, + port_buffer->buffer[i].epsb, + port_buffer->buffer[i].lossy); + } + + port_buffer->port_buffer_size = + MLX5_GET(pbmc_reg, out, port_buffer_size) * port_buff_cell_sz; + port_buffer->spare_buffer_size = + port_buffer->port_buffer_size - total_used; + + mlx5e_dbg(HW, priv, "total buffer size=%d, spare buffer size=%d\n", + port_buffer->port_buffer_size, + port_buffer->spare_buffer_size); +out: + kfree(out); + return err; +} + +static int port_set_buffer(struct mlx5e_priv *priv, + struct mlx5e_port_buffer *port_buffer) +{ + u16 port_buff_cell_sz = priv->dcbx.port_buff_cell_sz; + struct mlx5_core_dev *mdev = priv->mdev; + int sz = MLX5_ST_SZ_BYTES(pbmc_reg); + void *in; + int err; + int i; + + in = kzalloc(sz, GFP_KERNEL); + if (!in) + return -ENOMEM; + + err = mlx5e_port_query_pbmc(mdev, in); + if (err) + goto out; + + for (i = 0; i < MLX5E_MAX_BUFFER; i++) { + void *buffer = MLX5_ADDR_OF(pbmc_reg, in, buffer[i]); + u64 size = port_buffer->buffer[i].size; + u64 xoff = port_buffer->buffer[i].xoff; + u64 xon = port_buffer->buffer[i].xon; + + do_div(size, port_buff_cell_sz); + do_div(xoff, port_buff_cell_sz); + do_div(xon, port_buff_cell_sz); + MLX5_SET(bufferx_reg, buffer, size, size); + MLX5_SET(bufferx_reg, buffer, lossy, port_buffer->buffer[i].lossy); + MLX5_SET(bufferx_reg, buffer, xoff_threshold, xoff); + MLX5_SET(bufferx_reg, buffer, xon_threshold, xon); + } + + err = mlx5e_port_set_pbmc(mdev, in); +out: + kfree(in); + return err; +} + +/* xoff = ((301+2.16 * len [m]) * speed [Gbps] + 2.72 MTU [B]) + * minimum speed value is 40Gbps + */ +static u32 calculate_xoff(struct mlx5e_priv *priv, unsigned int mtu) +{ + u32 speed; + u32 xoff; + int err; + + err = mlx5e_port_linkspeed(priv->mdev, &speed); + if (err) + speed = SPEED_40000; + speed = max_t(u32, speed, SPEED_40000); + + xoff = (301 + 216 * priv->dcbx.cable_len / 100) * speed / 1000 + 272 * mtu / 100; + + mlx5e_dbg(HW, priv, "%s: xoff=%d\n", __func__, xoff); + return xoff; +} + +static int update_xoff_threshold(struct mlx5e_port_buffer *port_buffer, + u32 xoff, unsigned int max_mtu, u16 port_buff_cell_sz) +{ + int i; + + for (i = 0; i < MLX5E_MAX_BUFFER; i++) { + if (port_buffer->buffer[i].lossy) { + port_buffer->buffer[i].xoff = 0; + port_buffer->buffer[i].xon = 0; + continue; + } + + if (port_buffer->buffer[i].size < + (xoff + max_mtu + port_buff_cell_sz)) { + pr_err("buffer_size[%d]=%d is not enough for lossless buffer\n", + i, port_buffer->buffer[i].size); + return -ENOMEM; + } + + port_buffer->buffer[i].xoff = port_buffer->buffer[i].size - xoff; + port_buffer->buffer[i].xon = + port_buffer->buffer[i].xoff - max_mtu; + } + + return 0; +} + +/** + * update_buffer_lossy - Update buffer configuration based on pfc + * @max_mtu: netdev's max_mtu + * @pfc_en: <input> current pfc configuration + * @buffer: <input> current prio to buffer mapping + * @xoff: <input> xoff value + * @port_buff_cell_sz: <input> port buffer cell_size + * @port_buffer: <output> port receive buffer configuration + * @change: <output> + * + * Update buffer configuration based on pfc configuration and + * priority to buffer mapping. + * Buffer's lossy bit is changed to: + * lossless if there is at least one PFC enabled priority + * mapped to this buffer lossy if all priorities mapped to + * this buffer are PFC disabled + * + * @return: 0 if no error, + * sets change to true if buffer configuration was modified. + */ +static int update_buffer_lossy(unsigned int max_mtu, + u8 pfc_en, u8 *buffer, u32 xoff, u16 port_buff_cell_sz, + struct mlx5e_port_buffer *port_buffer, + bool *change) +{ + bool changed = false; + u8 lossy_count; + u8 prio_count; + u8 lossy; + int prio; + int err; + int i; + + for (i = 0; i < MLX5E_MAX_BUFFER; i++) { + prio_count = 0; + lossy_count = 0; + + for (prio = 0; prio < MLX5E_MAX_PRIORITY; prio++) { + if (buffer[prio] != i) + continue; + + prio_count++; + lossy_count += !(pfc_en & (1 << prio)); + } + + if (lossy_count == prio_count) + lossy = 1; + else /* lossy_count < prio_count */ + lossy = 0; + + if (lossy != port_buffer->buffer[i].lossy) { + port_buffer->buffer[i].lossy = lossy; + changed = true; + } + } + + if (changed) { + err = update_xoff_threshold(port_buffer, xoff, max_mtu, port_buff_cell_sz); + if (err) + return err; + + *change = true; + } + + return 0; +} + +static int fill_pfc_en(struct mlx5_core_dev *mdev, u8 *pfc_en) +{ + u32 g_rx_pause, g_tx_pause; + int err; + + err = mlx5_query_port_pause(mdev, &g_rx_pause, &g_tx_pause); + if (err) + return err; + + /* If global pause enabled, set all active buffers to lossless. + * Otherwise, check PFC setting. + */ + if (g_rx_pause || g_tx_pause) + *pfc_en = 0xff; + else + err = mlx5_query_port_pfc(mdev, pfc_en, NULL); + + return err; +} + +#define MINIMUM_MAX_MTU 9216 +int mlx5e_port_manual_buffer_config(struct mlx5e_priv *priv, + u32 change, unsigned int mtu, + struct ieee_pfc *pfc, + u32 *buffer_size, + u8 *prio2buffer) +{ + u16 port_buff_cell_sz = priv->dcbx.port_buff_cell_sz; + struct mlx5e_port_buffer port_buffer; + u32 xoff = calculate_xoff(priv, mtu); + bool update_prio2buffer = false; + u8 buffer[MLX5E_MAX_PRIORITY]; + bool update_buffer = false; + unsigned int max_mtu; + u32 total_used = 0; + u8 curr_pfc_en; + int err; + int i; + + mlx5e_dbg(HW, priv, "%s: change=%x\n", __func__, change); + max_mtu = max_t(unsigned int, priv->netdev->max_mtu, MINIMUM_MAX_MTU); + + err = mlx5e_port_query_buffer(priv, &port_buffer); + if (err) + return err; + + if (change & MLX5E_PORT_BUFFER_CABLE_LEN) { + update_buffer = true; + err = update_xoff_threshold(&port_buffer, xoff, max_mtu, port_buff_cell_sz); + if (err) + return err; + } + + if (change & MLX5E_PORT_BUFFER_PFC) { + err = mlx5e_port_query_priority2buffer(priv->mdev, buffer); + if (err) + return err; + + err = update_buffer_lossy(max_mtu, pfc->pfc_en, buffer, xoff, port_buff_cell_sz, + &port_buffer, &update_buffer); + if (err) + return err; + } + + if (change & MLX5E_PORT_BUFFER_PRIO2BUFFER) { + update_prio2buffer = true; + err = fill_pfc_en(priv->mdev, &curr_pfc_en); + if (err) + return err; + + err = update_buffer_lossy(max_mtu, curr_pfc_en, prio2buffer, xoff, + port_buff_cell_sz, &port_buffer, &update_buffer); + if (err) + return err; + } + + if (change & MLX5E_PORT_BUFFER_SIZE) { + for (i = 0; i < MLX5E_MAX_BUFFER; i++) { + mlx5e_dbg(HW, priv, "%s: buffer[%d]=%d\n", __func__, i, buffer_size[i]); + if (!port_buffer.buffer[i].lossy && !buffer_size[i]) { + mlx5e_dbg(HW, priv, "%s: lossless buffer[%d] size cannot be zero\n", + __func__, i); + return -EINVAL; + } + + port_buffer.buffer[i].size = buffer_size[i]; + total_used += buffer_size[i]; + } + + mlx5e_dbg(HW, priv, "%s: total buffer requested=%d\n", __func__, total_used); + + if (total_used > port_buffer.port_buffer_size) + return -EINVAL; + + update_buffer = true; + err = update_xoff_threshold(&port_buffer, xoff, max_mtu, port_buff_cell_sz); + if (err) + return err; + } + + /* Need to update buffer configuration if xoff value is changed */ + if (!update_buffer && xoff != priv->dcbx.xoff) { + update_buffer = true; + err = update_xoff_threshold(&port_buffer, xoff, max_mtu, port_buff_cell_sz); + if (err) + return err; + } + priv->dcbx.xoff = xoff; + + /* Apply the settings */ + if (update_buffer) { + err = port_set_buffer(priv, &port_buffer); + if (err) + return err; + } + + if (update_prio2buffer) + err = mlx5e_port_set_priority2buffer(priv->mdev, prio2buffer); + + return err; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.h b/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.h new file mode 100644 index 000000000..80af7a5ac --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.h @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2018, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __MLX5_EN_PORT_BUFFER_H__ +#define __MLX5_EN_PORT_BUFFER_H__ + +#include "en.h" +#include "port.h" + +#define MLX5E_MAX_BUFFER 8 +#define MLX5E_DEFAULT_CABLE_LEN 7 /* 7 meters */ + +#define MLX5_BUFFER_SUPPORTED(mdev) (MLX5_CAP_GEN(mdev, pcam_reg) && \ + MLX5_CAP_PCAM_REG(mdev, pbmc) && \ + MLX5_CAP_PCAM_REG(mdev, pptb)) + +enum { + MLX5E_PORT_BUFFER_CABLE_LEN = BIT(0), + MLX5E_PORT_BUFFER_PFC = BIT(1), + MLX5E_PORT_BUFFER_PRIO2BUFFER = BIT(2), + MLX5E_PORT_BUFFER_SIZE = BIT(3), +}; + +struct mlx5e_bufferx_reg { + u8 lossy; + u8 epsb; + u32 size; + u32 xoff; + u32 xon; +}; + +struct mlx5e_port_buffer { + u32 port_buffer_size; + u32 spare_buffer_size; + struct mlx5e_bufferx_reg buffer[MLX5E_MAX_BUFFER]; +}; + +int mlx5e_port_manual_buffer_config(struct mlx5e_priv *priv, + u32 change, unsigned int mtu, + struct ieee_pfc *pfc, + u32 *buffer_size, + u8 *prio2buffer); + +int mlx5e_port_query_buffer(struct mlx5e_priv *priv, + struct mlx5e_port_buffer *port_buffer); +#endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c new file mode 100644 index 000000000..72b4781f0 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c @@ -0,0 +1,877 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2020 Mellanox Technologies + +#include "en/ptp.h" +#include "en/txrx.h" +#include "en/params.h" +#include "en/fs_tt_redirect.h" + +struct mlx5e_ptp_fs { + struct mlx5_flow_handle *l2_rule; + struct mlx5_flow_handle *udp_v4_rule; + struct mlx5_flow_handle *udp_v6_rule; + bool valid; +}; + +struct mlx5e_ptp_params { + struct mlx5e_params params; + struct mlx5e_sq_param txq_sq_param; + struct mlx5e_rq_param rq_param; +}; + +struct mlx5e_skb_cb_hwtstamp { + ktime_t cqe_hwtstamp; + ktime_t port_hwtstamp; +}; + +void mlx5e_skb_cb_hwtstamp_init(struct sk_buff *skb) +{ + memset(skb->cb, 0, sizeof(struct mlx5e_skb_cb_hwtstamp)); +} + +static struct mlx5e_skb_cb_hwtstamp *mlx5e_skb_cb_get_hwts(struct sk_buff *skb) +{ + BUILD_BUG_ON(sizeof(struct mlx5e_skb_cb_hwtstamp) > sizeof(skb->cb)); + return (struct mlx5e_skb_cb_hwtstamp *)skb->cb; +} + +static void mlx5e_skb_cb_hwtstamp_tx(struct sk_buff *skb, + struct mlx5e_ptp_cq_stats *cq_stats) +{ + struct skb_shared_hwtstamps hwts = {}; + ktime_t diff; + + diff = abs(mlx5e_skb_cb_get_hwts(skb)->port_hwtstamp - + mlx5e_skb_cb_get_hwts(skb)->cqe_hwtstamp); + + /* Maximal allowed diff is 1 / 128 second */ + if (diff > (NSEC_PER_SEC >> 7)) { + cq_stats->abort++; + cq_stats->abort_abs_diff_ns += diff; + return; + } + + hwts.hwtstamp = mlx5e_skb_cb_get_hwts(skb)->port_hwtstamp; + skb_tstamp_tx(skb, &hwts); +} + +void mlx5e_skb_cb_hwtstamp_handler(struct sk_buff *skb, int hwtstamp_type, + ktime_t hwtstamp, + struct mlx5e_ptp_cq_stats *cq_stats) +{ + switch (hwtstamp_type) { + case (MLX5E_SKB_CB_CQE_HWTSTAMP): + mlx5e_skb_cb_get_hwts(skb)->cqe_hwtstamp = hwtstamp; + break; + case (MLX5E_SKB_CB_PORT_HWTSTAMP): + mlx5e_skb_cb_get_hwts(skb)->port_hwtstamp = hwtstamp; + break; + } + + /* If both CQEs arrive, check and report the port tstamp, and clear skb cb as + * skb soon to be released. + */ + if (!mlx5e_skb_cb_get_hwts(skb)->cqe_hwtstamp || + !mlx5e_skb_cb_get_hwts(skb)->port_hwtstamp) + return; + + mlx5e_skb_cb_hwtstamp_tx(skb, cq_stats); + memset(skb->cb, 0, sizeof(struct mlx5e_skb_cb_hwtstamp)); +} + +#define PTP_WQE_CTR2IDX(val) ((val) & ptpsq->ts_cqe_ctr_mask) + +static bool mlx5e_ptp_ts_cqe_drop(struct mlx5e_ptpsq *ptpsq, u16 skb_cc, u16 skb_id) +{ + return (ptpsq->ts_cqe_ctr_mask && (skb_cc != skb_id)); +} + +static bool mlx5e_ptp_ts_cqe_ooo(struct mlx5e_ptpsq *ptpsq, u16 skb_id) +{ + u16 skb_cc = PTP_WQE_CTR2IDX(ptpsq->skb_fifo_cc); + u16 skb_pc = PTP_WQE_CTR2IDX(ptpsq->skb_fifo_pc); + + if (PTP_WQE_CTR2IDX(skb_id - skb_cc) >= PTP_WQE_CTR2IDX(skb_pc - skb_cc)) + return true; + + return false; +} + +static void mlx5e_ptp_skb_fifo_ts_cqe_resync(struct mlx5e_ptpsq *ptpsq, u16 skb_cc, + u16 skb_id, int budget) +{ + struct skb_shared_hwtstamps hwts = {}; + struct sk_buff *skb; + + ptpsq->cq_stats->resync_event++; + + while (skb_cc != skb_id) { + skb = mlx5e_skb_fifo_pop(&ptpsq->skb_fifo); + hwts.hwtstamp = mlx5e_skb_cb_get_hwts(skb)->cqe_hwtstamp; + skb_tstamp_tx(skb, &hwts); + ptpsq->cq_stats->resync_cqe++; + napi_consume_skb(skb, budget); + skb_cc = PTP_WQE_CTR2IDX(ptpsq->skb_fifo_cc); + } +} + +static void mlx5e_ptp_handle_ts_cqe(struct mlx5e_ptpsq *ptpsq, + struct mlx5_cqe64 *cqe, + int budget) +{ + u16 skb_id = PTP_WQE_CTR2IDX(be16_to_cpu(cqe->wqe_counter)); + u16 skb_cc = PTP_WQE_CTR2IDX(ptpsq->skb_fifo_cc); + struct mlx5e_txqsq *sq = &ptpsq->txqsq; + struct sk_buff *skb; + ktime_t hwtstamp; + + if (unlikely(MLX5E_RX_ERR_CQE(cqe))) { + skb = mlx5e_skb_fifo_pop(&ptpsq->skb_fifo); + ptpsq->cq_stats->err_cqe++; + goto out; + } + + if (mlx5e_ptp_ts_cqe_drop(ptpsq, skb_cc, skb_id)) { + if (mlx5e_ptp_ts_cqe_ooo(ptpsq, skb_id)) { + /* already handled by a previous resync */ + ptpsq->cq_stats->ooo_cqe_drop++; + return; + } + mlx5e_ptp_skb_fifo_ts_cqe_resync(ptpsq, skb_cc, skb_id, budget); + } + + skb = mlx5e_skb_fifo_pop(&ptpsq->skb_fifo); + hwtstamp = mlx5e_cqe_ts_to_ns(sq->ptp_cyc2time, sq->clock, get_cqe_ts(cqe)); + mlx5e_skb_cb_hwtstamp_handler(skb, MLX5E_SKB_CB_PORT_HWTSTAMP, + hwtstamp, ptpsq->cq_stats); + ptpsq->cq_stats->cqe++; + +out: + napi_consume_skb(skb, budget); +} + +static bool mlx5e_ptp_poll_ts_cq(struct mlx5e_cq *cq, int budget) +{ + struct mlx5e_ptpsq *ptpsq = container_of(cq, struct mlx5e_ptpsq, ts_cq); + struct mlx5_cqwq *cqwq = &cq->wq; + struct mlx5_cqe64 *cqe; + int work_done = 0; + + if (unlikely(!test_bit(MLX5E_SQ_STATE_ENABLED, &ptpsq->txqsq.state))) + return false; + + cqe = mlx5_cqwq_get_cqe(cqwq); + if (!cqe) + return false; + + do { + mlx5_cqwq_pop(cqwq); + + mlx5e_ptp_handle_ts_cqe(ptpsq, cqe, budget); + } while ((++work_done < budget) && (cqe = mlx5_cqwq_get_cqe(cqwq))); + + mlx5_cqwq_update_db_record(cqwq); + + /* ensure cq space is freed before enabling more cqes */ + wmb(); + + mlx5e_txqsq_wake(&ptpsq->txqsq); + + return work_done == budget; +} + +static int mlx5e_ptp_napi_poll(struct napi_struct *napi, int budget) +{ + struct mlx5e_ptp *c = container_of(napi, struct mlx5e_ptp, napi); + struct mlx5e_ch_stats *ch_stats = c->stats; + struct mlx5e_rq *rq = &c->rq; + bool busy = false; + int work_done = 0; + int i; + + rcu_read_lock(); + + ch_stats->poll++; + + if (test_bit(MLX5E_PTP_STATE_TX, c->state)) { + for (i = 0; i < c->num_tc; i++) { + busy |= mlx5e_poll_tx_cq(&c->ptpsq[i].txqsq.cq, budget); + busy |= mlx5e_ptp_poll_ts_cq(&c->ptpsq[i].ts_cq, budget); + } + } + if (test_bit(MLX5E_PTP_STATE_RX, c->state) && likely(budget)) { + work_done = mlx5e_poll_rx_cq(&rq->cq, budget); + busy |= work_done == budget; + busy |= INDIRECT_CALL_2(rq->post_wqes, + mlx5e_post_rx_mpwqes, + mlx5e_post_rx_wqes, + rq); + } + + if (busy) { + work_done = budget; + goto out; + } + + if (unlikely(!napi_complete_done(napi, work_done))) + goto out; + + ch_stats->arm++; + + if (test_bit(MLX5E_PTP_STATE_TX, c->state)) { + for (i = 0; i < c->num_tc; i++) { + mlx5e_cq_arm(&c->ptpsq[i].txqsq.cq); + mlx5e_cq_arm(&c->ptpsq[i].ts_cq); + } + } + if (test_bit(MLX5E_PTP_STATE_RX, c->state)) + mlx5e_cq_arm(&rq->cq); + +out: + rcu_read_unlock(); + + return work_done; +} + +static int mlx5e_ptp_alloc_txqsq(struct mlx5e_ptp *c, int txq_ix, + struct mlx5e_params *params, + struct mlx5e_sq_param *param, + struct mlx5e_txqsq *sq, int tc, + struct mlx5e_ptpsq *ptpsq) +{ + void *sqc_wq = MLX5_ADDR_OF(sqc, param->sqc, wq); + struct mlx5_core_dev *mdev = c->mdev; + struct mlx5_wq_cyc *wq = &sq->wq; + int err; + int node; + + sq->pdev = c->pdev; + sq->clock = &mdev->clock; + sq->mkey_be = c->mkey_be; + sq->netdev = c->netdev; + sq->priv = c->priv; + sq->mdev = mdev; + sq->ch_ix = MLX5E_PTP_CHANNEL_IX; + sq->txq_ix = txq_ix; + sq->uar_map = mdev->mlx5e_res.hw_objs.bfreg.map; + sq->min_inline_mode = params->tx_min_inline_mode; + sq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu); + sq->stats = &c->priv->ptp_stats.sq[tc]; + sq->ptpsq = ptpsq; + INIT_WORK(&sq->recover_work, mlx5e_tx_err_cqe_work); + if (!MLX5_CAP_ETH(mdev, wqe_vlan_insert)) + set_bit(MLX5E_SQ_STATE_VLAN_NEED_L2_INLINE, &sq->state); + sq->stop_room = param->stop_room; + sq->ptp_cyc2time = mlx5_sq_ts_translator(mdev); + + node = dev_to_node(mlx5_core_dma_dev(mdev)); + + param->wq.db_numa_node = node; + err = mlx5_wq_cyc_create(mdev, ¶m->wq, sqc_wq, wq, &sq->wq_ctrl); + if (err) + return err; + wq->db = &wq->db[MLX5_SND_DBR]; + + err = mlx5e_alloc_txqsq_db(sq, node); + if (err) + goto err_sq_wq_destroy; + + return 0; + +err_sq_wq_destroy: + mlx5_wq_destroy(&sq->wq_ctrl); + + return err; +} + +static void mlx5e_ptp_destroy_sq(struct mlx5_core_dev *mdev, u32 sqn) +{ + mlx5_core_destroy_sq(mdev, sqn); +} + +static int mlx5e_ptp_alloc_traffic_db(struct mlx5e_ptpsq *ptpsq, int numa) +{ + int wq_sz = mlx5_wq_cyc_get_size(&ptpsq->txqsq.wq); + struct mlx5_core_dev *mdev = ptpsq->txqsq.mdev; + + ptpsq->skb_fifo.fifo = kvzalloc_node(array_size(wq_sz, sizeof(*ptpsq->skb_fifo.fifo)), + GFP_KERNEL, numa); + if (!ptpsq->skb_fifo.fifo) + return -ENOMEM; + + ptpsq->skb_fifo.pc = &ptpsq->skb_fifo_pc; + ptpsq->skb_fifo.cc = &ptpsq->skb_fifo_cc; + ptpsq->skb_fifo.mask = wq_sz - 1; + if (MLX5_CAP_GEN_2(mdev, ts_cqe_metadata_size2wqe_counter)) + ptpsq->ts_cqe_ctr_mask = + (1 << MLX5_CAP_GEN_2(mdev, ts_cqe_metadata_size2wqe_counter)) - 1; + return 0; +} + +static void mlx5e_ptp_drain_skb_fifo(struct mlx5e_skb_fifo *skb_fifo) +{ + while (*skb_fifo->pc != *skb_fifo->cc) { + struct sk_buff *skb = mlx5e_skb_fifo_pop(skb_fifo); + + dev_kfree_skb_any(skb); + } +} + +static void mlx5e_ptp_free_traffic_db(struct mlx5e_skb_fifo *skb_fifo) +{ + mlx5e_ptp_drain_skb_fifo(skb_fifo); + kvfree(skb_fifo->fifo); +} + +static int mlx5e_ptp_open_txqsq(struct mlx5e_ptp *c, u32 tisn, + int txq_ix, struct mlx5e_ptp_params *cparams, + int tc, struct mlx5e_ptpsq *ptpsq) +{ + struct mlx5e_sq_param *sqp = &cparams->txq_sq_param; + struct mlx5e_txqsq *txqsq = &ptpsq->txqsq; + struct mlx5e_create_sq_param csp = {}; + int err; + + err = mlx5e_ptp_alloc_txqsq(c, txq_ix, &cparams->params, sqp, + txqsq, tc, ptpsq); + if (err) + return err; + + csp.tisn = tisn; + csp.tis_lst_sz = 1; + csp.cqn = txqsq->cq.mcq.cqn; + csp.wq_ctrl = &txqsq->wq_ctrl; + csp.min_inline_mode = txqsq->min_inline_mode; + csp.ts_cqe_to_dest_cqn = ptpsq->ts_cq.mcq.cqn; + + err = mlx5e_create_sq_rdy(c->mdev, sqp, &csp, 0, &txqsq->sqn); + if (err) + goto err_free_txqsq; + + err = mlx5e_ptp_alloc_traffic_db(ptpsq, + dev_to_node(mlx5_core_dma_dev(c->mdev))); + if (err) + goto err_free_txqsq; + + return 0; + +err_free_txqsq: + mlx5e_free_txqsq(txqsq); + + return err; +} + +static void mlx5e_ptp_close_txqsq(struct mlx5e_ptpsq *ptpsq) +{ + struct mlx5e_txqsq *sq = &ptpsq->txqsq; + struct mlx5_core_dev *mdev = sq->mdev; + + mlx5e_ptp_free_traffic_db(&ptpsq->skb_fifo); + cancel_work_sync(&sq->recover_work); + mlx5e_ptp_destroy_sq(mdev, sq->sqn); + mlx5e_free_txqsq_descs(sq); + mlx5e_free_txqsq(sq); +} + +static int mlx5e_ptp_open_txqsqs(struct mlx5e_ptp *c, + struct mlx5e_ptp_params *cparams) +{ + struct mlx5e_params *params = &cparams->params; + u8 num_tc = mlx5e_get_dcb_num_tc(params); + int ix_base; + int err; + int tc; + + ix_base = num_tc * params->num_channels; + + for (tc = 0; tc < num_tc; tc++) { + int txq_ix = ix_base + tc; + + err = mlx5e_ptp_open_txqsq(c, c->priv->tisn[c->lag_port][tc], txq_ix, + cparams, tc, &c->ptpsq[tc]); + if (err) + goto close_txqsq; + } + + return 0; + +close_txqsq: + for (--tc; tc >= 0; tc--) + mlx5e_ptp_close_txqsq(&c->ptpsq[tc]); + + return err; +} + +static void mlx5e_ptp_close_txqsqs(struct mlx5e_ptp *c) +{ + int tc; + + for (tc = 0; tc < c->num_tc; tc++) + mlx5e_ptp_close_txqsq(&c->ptpsq[tc]); +} + +static int mlx5e_ptp_open_tx_cqs(struct mlx5e_ptp *c, + struct mlx5e_ptp_params *cparams) +{ + struct mlx5e_params *params = &cparams->params; + struct mlx5e_create_cq_param ccp = {}; + struct dim_cq_moder ptp_moder = {}; + struct mlx5e_cq_param *cq_param; + u8 num_tc; + int err; + int tc; + + num_tc = mlx5e_get_dcb_num_tc(params); + + ccp.node = dev_to_node(mlx5_core_dma_dev(c->mdev)); + ccp.ch_stats = c->stats; + ccp.napi = &c->napi; + ccp.ix = MLX5E_PTP_CHANNEL_IX; + + cq_param = &cparams->txq_sq_param.cqp; + + for (tc = 0; tc < num_tc; tc++) { + struct mlx5e_cq *cq = &c->ptpsq[tc].txqsq.cq; + + err = mlx5e_open_cq(c->priv, ptp_moder, cq_param, &ccp, cq); + if (err) + goto out_err_txqsq_cq; + } + + for (tc = 0; tc < num_tc; tc++) { + struct mlx5e_cq *cq = &c->ptpsq[tc].ts_cq; + struct mlx5e_ptpsq *ptpsq = &c->ptpsq[tc]; + + err = mlx5e_open_cq(c->priv, ptp_moder, cq_param, &ccp, cq); + if (err) + goto out_err_ts_cq; + + ptpsq->cq_stats = &c->priv->ptp_stats.cq[tc]; + } + + return 0; + +out_err_ts_cq: + for (--tc; tc >= 0; tc--) + mlx5e_close_cq(&c->ptpsq[tc].ts_cq); + tc = num_tc; +out_err_txqsq_cq: + for (--tc; tc >= 0; tc--) + mlx5e_close_cq(&c->ptpsq[tc].txqsq.cq); + + return err; +} + +static int mlx5e_ptp_open_rx_cq(struct mlx5e_ptp *c, + struct mlx5e_ptp_params *cparams) +{ + struct mlx5e_create_cq_param ccp = {}; + struct dim_cq_moder ptp_moder = {}; + struct mlx5e_cq_param *cq_param; + struct mlx5e_cq *cq = &c->rq.cq; + + ccp.node = dev_to_node(mlx5_core_dma_dev(c->mdev)); + ccp.ch_stats = c->stats; + ccp.napi = &c->napi; + ccp.ix = MLX5E_PTP_CHANNEL_IX; + + cq_param = &cparams->rq_param.cqp; + + return mlx5e_open_cq(c->priv, ptp_moder, cq_param, &ccp, cq); +} + +static void mlx5e_ptp_close_tx_cqs(struct mlx5e_ptp *c) +{ + int tc; + + for (tc = 0; tc < c->num_tc; tc++) + mlx5e_close_cq(&c->ptpsq[tc].ts_cq); + + for (tc = 0; tc < c->num_tc; tc++) + mlx5e_close_cq(&c->ptpsq[tc].txqsq.cq); +} + +static void mlx5e_ptp_build_sq_param(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, + struct mlx5e_sq_param *param) +{ + void *sqc = param->sqc; + void *wq; + + mlx5e_build_sq_param_common(mdev, param); + + wq = MLX5_ADDR_OF(sqc, sqc, wq); + MLX5_SET(wq, wq, log_wq_sz, params->log_sq_size); + param->stop_room = mlx5e_stop_room_for_max_wqe(mdev); + mlx5e_build_tx_cq_param(mdev, params, ¶m->cqp); +} + +static void mlx5e_ptp_build_rq_param(struct mlx5_core_dev *mdev, + struct net_device *netdev, + u16 q_counter, + struct mlx5e_ptp_params *ptp_params) +{ + struct mlx5e_rq_param *rq_params = &ptp_params->rq_param; + struct mlx5e_params *params = &ptp_params->params; + + params->rq_wq_type = MLX5_WQ_TYPE_CYCLIC; + mlx5e_init_rq_type_params(mdev, params); + params->sw_mtu = netdev->max_mtu; + mlx5e_build_rq_param(mdev, params, NULL, q_counter, rq_params); +} + +static void mlx5e_ptp_build_params(struct mlx5e_ptp *c, + struct mlx5e_ptp_params *cparams, + struct mlx5e_params *orig) +{ + struct mlx5e_params *params = &cparams->params; + + params->tx_min_inline_mode = orig->tx_min_inline_mode; + params->num_channels = orig->num_channels; + params->hard_mtu = orig->hard_mtu; + params->sw_mtu = orig->sw_mtu; + params->mqprio = orig->mqprio; + + /* SQ */ + if (test_bit(MLX5E_PTP_STATE_TX, c->state)) { + params->log_sq_size = orig->log_sq_size; + mlx5e_ptp_build_sq_param(c->mdev, params, &cparams->txq_sq_param); + } + /* RQ */ + if (test_bit(MLX5E_PTP_STATE_RX, c->state)) { + params->vlan_strip_disable = orig->vlan_strip_disable; + mlx5e_ptp_build_rq_param(c->mdev, c->netdev, c->priv->q_counter, cparams); + } +} + +static int mlx5e_init_ptp_rq(struct mlx5e_ptp *c, struct mlx5e_params *params, + struct mlx5e_rq *rq) +{ + struct mlx5_core_dev *mdev = c->mdev; + struct mlx5e_priv *priv = c->priv; + int err; + + rq->wq_type = params->rq_wq_type; + rq->pdev = c->pdev; + rq->netdev = priv->netdev; + rq->priv = priv; + rq->clock = &mdev->clock; + rq->tstamp = &priv->tstamp; + rq->mdev = mdev; + rq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu); + rq->stats = &c->priv->ptp_stats.rq; + rq->ix = MLX5E_PTP_CHANNEL_IX; + rq->ptp_cyc2time = mlx5_rq_ts_translator(mdev); + err = mlx5e_rq_set_handlers(rq, params, false); + if (err) + return err; + + return xdp_rxq_info_reg(&rq->xdp_rxq, rq->netdev, rq->ix, 0); +} + +static int mlx5e_ptp_open_rq(struct mlx5e_ptp *c, struct mlx5e_params *params, + struct mlx5e_rq_param *rq_param) +{ + int node = dev_to_node(c->mdev->device); + int err; + + err = mlx5e_init_ptp_rq(c, params, &c->rq); + if (err) + return err; + + return mlx5e_open_rq(params, rq_param, NULL, node, &c->rq); +} + +static int mlx5e_ptp_open_queues(struct mlx5e_ptp *c, + struct mlx5e_ptp_params *cparams) +{ + int err; + + if (test_bit(MLX5E_PTP_STATE_TX, c->state)) { + err = mlx5e_ptp_open_tx_cqs(c, cparams); + if (err) + return err; + + err = mlx5e_ptp_open_txqsqs(c, cparams); + if (err) + goto close_tx_cqs; + } + if (test_bit(MLX5E_PTP_STATE_RX, c->state)) { + err = mlx5e_ptp_open_rx_cq(c, cparams); + if (err) + goto close_txqsq; + + err = mlx5e_ptp_open_rq(c, &cparams->params, &cparams->rq_param); + if (err) + goto close_rx_cq; + } + return 0; + +close_rx_cq: + if (test_bit(MLX5E_PTP_STATE_RX, c->state)) + mlx5e_close_cq(&c->rq.cq); +close_txqsq: + if (test_bit(MLX5E_PTP_STATE_TX, c->state)) + mlx5e_ptp_close_txqsqs(c); +close_tx_cqs: + if (test_bit(MLX5E_PTP_STATE_TX, c->state)) + mlx5e_ptp_close_tx_cqs(c); + + return err; +} + +static void mlx5e_ptp_close_queues(struct mlx5e_ptp *c) +{ + if (test_bit(MLX5E_PTP_STATE_RX, c->state)) { + mlx5e_close_rq(&c->rq); + mlx5e_close_cq(&c->rq.cq); + } + if (test_bit(MLX5E_PTP_STATE_TX, c->state)) { + mlx5e_ptp_close_txqsqs(c); + mlx5e_ptp_close_tx_cqs(c); + } +} + +static int mlx5e_ptp_set_state(struct mlx5e_ptp *c, struct mlx5e_params *params) +{ + if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_TX_PORT_TS)) + __set_bit(MLX5E_PTP_STATE_TX, c->state); + + if (params->ptp_rx) + __set_bit(MLX5E_PTP_STATE_RX, c->state); + + return bitmap_empty(c->state, MLX5E_PTP_STATE_NUM_STATES) ? -EINVAL : 0; +} + +static void mlx5e_ptp_rx_unset_fs(struct mlx5e_flow_steering *fs) +{ + struct mlx5e_ptp_fs *ptp_fs = mlx5e_fs_get_ptp(fs); + + if (!ptp_fs->valid) + return; + + mlx5e_fs_tt_redirect_del_rule(ptp_fs->l2_rule); + mlx5e_fs_tt_redirect_any_destroy(fs); + + mlx5e_fs_tt_redirect_del_rule(ptp_fs->udp_v6_rule); + mlx5e_fs_tt_redirect_del_rule(ptp_fs->udp_v4_rule); + mlx5e_fs_tt_redirect_udp_destroy(fs); + ptp_fs->valid = false; +} + +static int mlx5e_ptp_rx_set_fs(struct mlx5e_priv *priv) +{ + u32 tirn = mlx5e_rx_res_get_tirn_ptp(priv->rx_res); + struct mlx5e_flow_steering *fs = priv->fs; + struct mlx5_flow_handle *rule; + struct mlx5e_ptp_fs *ptp_fs; + int err; + + ptp_fs = mlx5e_fs_get_ptp(fs); + if (ptp_fs->valid) + return 0; + + err = mlx5e_fs_tt_redirect_udp_create(fs); + if (err) + goto out_free; + + rule = mlx5e_fs_tt_redirect_udp_add_rule(fs, MLX5_TT_IPV4_UDP, + tirn, PTP_EV_PORT); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + goto out_destroy_fs_udp; + } + ptp_fs->udp_v4_rule = rule; + + rule = mlx5e_fs_tt_redirect_udp_add_rule(fs, MLX5_TT_IPV6_UDP, + tirn, PTP_EV_PORT); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + goto out_destroy_udp_v4_rule; + } + ptp_fs->udp_v6_rule = rule; + + err = mlx5e_fs_tt_redirect_any_create(fs); + if (err) + goto out_destroy_udp_v6_rule; + + rule = mlx5e_fs_tt_redirect_any_add_rule(fs, tirn, ETH_P_1588); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + goto out_destroy_fs_any; + } + ptp_fs->l2_rule = rule; + ptp_fs->valid = true; + + return 0; + +out_destroy_fs_any: + mlx5e_fs_tt_redirect_any_destroy(fs); +out_destroy_udp_v6_rule: + mlx5e_fs_tt_redirect_del_rule(ptp_fs->udp_v6_rule); +out_destroy_udp_v4_rule: + mlx5e_fs_tt_redirect_del_rule(ptp_fs->udp_v4_rule); +out_destroy_fs_udp: + mlx5e_fs_tt_redirect_udp_destroy(fs); +out_free: + return err; +} + +int mlx5e_ptp_open(struct mlx5e_priv *priv, struct mlx5e_params *params, + u8 lag_port, struct mlx5e_ptp **cp) +{ + struct net_device *netdev = priv->netdev; + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5e_ptp_params *cparams; + struct mlx5e_ptp *c; + int err; + + + c = kvzalloc_node(sizeof(*c), GFP_KERNEL, dev_to_node(mlx5_core_dma_dev(mdev))); + cparams = kvzalloc(sizeof(*cparams), GFP_KERNEL); + if (!c || !cparams) { + err = -ENOMEM; + goto err_free; + } + + c->priv = priv; + c->mdev = priv->mdev; + c->tstamp = &priv->tstamp; + c->pdev = mlx5_core_dma_dev(priv->mdev); + c->netdev = priv->netdev; + c->mkey_be = cpu_to_be32(priv->mdev->mlx5e_res.hw_objs.mkey); + c->num_tc = mlx5e_get_dcb_num_tc(params); + c->stats = &priv->ptp_stats.ch; + c->lag_port = lag_port; + + err = mlx5e_ptp_set_state(c, params); + if (err) + goto err_free; + + netif_napi_add(netdev, &c->napi, mlx5e_ptp_napi_poll); + + mlx5e_ptp_build_params(c, cparams, params); + + err = mlx5e_ptp_open_queues(c, cparams); + if (unlikely(err)) + goto err_napi_del; + + if (test_bit(MLX5E_PTP_STATE_RX, c->state)) + priv->rx_ptp_opened = true; + + *cp = c; + + kvfree(cparams); + + return 0; + +err_napi_del: + netif_napi_del(&c->napi); +err_free: + kvfree(cparams); + kvfree(c); + return err; +} + +void mlx5e_ptp_close(struct mlx5e_ptp *c) +{ + mlx5e_ptp_close_queues(c); + netif_napi_del(&c->napi); + + kvfree(c); +} + +void mlx5e_ptp_activate_channel(struct mlx5e_ptp *c) +{ + int tc; + + napi_enable(&c->napi); + + if (test_bit(MLX5E_PTP_STATE_TX, c->state)) { + for (tc = 0; tc < c->num_tc; tc++) + mlx5e_activate_txqsq(&c->ptpsq[tc].txqsq); + } + if (test_bit(MLX5E_PTP_STATE_RX, c->state)) { + mlx5e_ptp_rx_set_fs(c->priv); + mlx5e_activate_rq(&c->rq); + mlx5e_trigger_napi_sched(&c->napi); + } +} + +void mlx5e_ptp_deactivate_channel(struct mlx5e_ptp *c) +{ + int tc; + + if (test_bit(MLX5E_PTP_STATE_RX, c->state)) + mlx5e_deactivate_rq(&c->rq); + + if (test_bit(MLX5E_PTP_STATE_TX, c->state)) { + for (tc = 0; tc < c->num_tc; tc++) + mlx5e_deactivate_txqsq(&c->ptpsq[tc].txqsq); + } + + napi_disable(&c->napi); +} + +int mlx5e_ptp_get_rqn(struct mlx5e_ptp *c, u32 *rqn) +{ + if (!c || !test_bit(MLX5E_PTP_STATE_RX, c->state)) + return -EINVAL; + + *rqn = c->rq.rqn; + return 0; +} + +int mlx5e_ptp_alloc_rx_fs(struct mlx5e_flow_steering *fs, + const struct mlx5e_profile *profile) +{ + struct mlx5e_ptp_fs *ptp_fs; + + if (!mlx5e_profile_feature_cap(profile, PTP_RX)) + return 0; + + ptp_fs = kzalloc(sizeof(*ptp_fs), GFP_KERNEL); + if (!ptp_fs) + return -ENOMEM; + mlx5e_fs_set_ptp(fs, ptp_fs); + + return 0; +} + +void mlx5e_ptp_free_rx_fs(struct mlx5e_flow_steering *fs, + const struct mlx5e_profile *profile) +{ + struct mlx5e_ptp_fs *ptp_fs = mlx5e_fs_get_ptp(fs); + + if (!mlx5e_profile_feature_cap(profile, PTP_RX)) + return; + + mlx5e_ptp_rx_unset_fs(fs); + kfree(ptp_fs); +} + +int mlx5e_ptp_rx_manage_fs(struct mlx5e_priv *priv, bool set) +{ + struct mlx5e_ptp *c = priv->channels.ptp; + + if (!mlx5e_profile_feature_cap(priv->profile, PTP_RX)) + return 0; + + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) + return 0; + + if (set) { + if (!c || !test_bit(MLX5E_PTP_STATE_RX, c->state)) { + netdev_WARN_ONCE(priv->netdev, "Don't try to add PTP RX-FS rules"); + return -EINVAL; + } + return mlx5e_ptp_rx_set_fs(priv); + } + /* set == false */ + if (c && test_bit(MLX5E_PTP_STATE_RX, c->state)) { + netdev_WARN_ONCE(priv->netdev, "Don't try to remove PTP RX-FS rules"); + return -EINVAL; + } + mlx5e_ptp_rx_unset_fs(priv->fs); + return 0; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h new file mode 100644 index 000000000..cc7efde88 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h @@ -0,0 +1,102 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2020 Mellanox Technologies. */ + +#ifndef __MLX5_EN_PTP_H__ +#define __MLX5_EN_PTP_H__ + +#include "en.h" +#include "en_stats.h" +#include "en/txrx.h" +#include <linux/ptp_classify.h> + +#define MLX5E_PTP_CHANNEL_IX 0 + +struct mlx5e_ptpsq { + struct mlx5e_txqsq txqsq; + struct mlx5e_cq ts_cq; + u16 skb_fifo_cc; + u16 skb_fifo_pc; + struct mlx5e_skb_fifo skb_fifo; + struct mlx5e_ptp_cq_stats *cq_stats; + u16 ts_cqe_ctr_mask; +}; + +enum { + MLX5E_PTP_STATE_TX, + MLX5E_PTP_STATE_RX, + MLX5E_PTP_STATE_NUM_STATES, +}; + +struct mlx5e_ptp { + /* data path */ + struct mlx5e_ptpsq ptpsq[MLX5E_MAX_NUM_TC]; + struct mlx5e_rq rq; + struct napi_struct napi; + struct device *pdev; + struct net_device *netdev; + __be32 mkey_be; + u8 num_tc; + u8 lag_port; + + /* data path - accessed per napi poll */ + struct mlx5e_ch_stats *stats; + + /* control */ + struct mlx5e_priv *priv; + struct mlx5_core_dev *mdev; + struct hwtstamp_config *tstamp; + DECLARE_BITMAP(state, MLX5E_PTP_STATE_NUM_STATES); +}; + +static inline bool mlx5e_use_ptpsq(struct sk_buff *skb) +{ + struct flow_keys fk; + + if (!(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) + return false; + + if (!skb_flow_dissect_flow_keys(skb, &fk, 0)) + return false; + + if (fk.basic.n_proto == htons(ETH_P_1588)) + return true; + + if (fk.basic.n_proto != htons(ETH_P_IP) && + fk.basic.n_proto != htons(ETH_P_IPV6)) + return false; + + return (fk.basic.ip_proto == IPPROTO_UDP && + fk.ports.dst == htons(PTP_EV_PORT)); +} + +static inline bool mlx5e_ptpsq_fifo_has_room(struct mlx5e_txqsq *sq) +{ + if (!sq->ptpsq) + return true; + + return mlx5e_skb_fifo_has_room(&sq->ptpsq->skb_fifo); +} + +int mlx5e_ptp_open(struct mlx5e_priv *priv, struct mlx5e_params *params, + u8 lag_port, struct mlx5e_ptp **cp); +void mlx5e_ptp_close(struct mlx5e_ptp *c); +void mlx5e_ptp_activate_channel(struct mlx5e_ptp *c); +void mlx5e_ptp_deactivate_channel(struct mlx5e_ptp *c); +int mlx5e_ptp_get_rqn(struct mlx5e_ptp *c, u32 *rqn); +int mlx5e_ptp_alloc_rx_fs(struct mlx5e_flow_steering *fs, + const struct mlx5e_profile *profile); +void mlx5e_ptp_free_rx_fs(struct mlx5e_flow_steering *fs, + const struct mlx5e_profile *profile); +int mlx5e_ptp_rx_manage_fs(struct mlx5e_priv *priv, bool set); + +enum { + MLX5E_SKB_CB_CQE_HWTSTAMP = BIT(0), + MLX5E_SKB_CB_PORT_HWTSTAMP = BIT(1), +}; + +void mlx5e_skb_cb_hwtstamp_handler(struct sk_buff *skb, int hwtstamp_type, + ktime_t hwtstamp, + struct mlx5e_ptp_cq_stats *cq_stats); + +void mlx5e_skb_cb_hwtstamp_init(struct sk_buff *skb); +#endif /* __MLX5_EN_PTP_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c new file mode 100644 index 000000000..2842195ee --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c @@ -0,0 +1,518 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2020, Mellanox Technologies inc. All rights reserved. */ +#include <net/sch_generic.h> + +#include <net/pkt_cls.h> +#include "en.h" +#include "params.h" +#include "../qos.h" +#include "en/htb.h" + +struct qos_sq_callback_params { + struct mlx5e_priv *priv; + struct mlx5e_channels *chs; +}; + +int mlx5e_qos_bytes_rate_check(struct mlx5_core_dev *mdev, u64 nbytes) +{ + if (nbytes < BYTES_IN_MBIT) { + qos_warn(mdev, "Input rate (%llu Bytes/sec) below minimum supported (%u Bytes/sec)\n", + nbytes, BYTES_IN_MBIT); + return -EINVAL; + } + return 0; +} + +static u32 mlx5e_qos_bytes2mbits(struct mlx5_core_dev *mdev, u64 nbytes) +{ + return div_u64(nbytes, BYTES_IN_MBIT); +} + +int mlx5e_qos_max_leaf_nodes(struct mlx5_core_dev *mdev) +{ + return min(MLX5E_QOS_MAX_LEAF_NODES, mlx5_qos_max_leaf_nodes(mdev)); +} + +/* TX datapath API */ + +u16 mlx5e_qid_from_qos(struct mlx5e_channels *chs, u16 qid) +{ + /* These channel params are safe to access from the datapath, because: + * 1. This function is called only after checking selq->htb_maj_id != 0, + * and the number of queues can't change while HTB offload is active. + * 2. When selq->htb_maj_id becomes 0, synchronize_rcu waits for + * mlx5e_select_queue to finish while holding priv->state_lock, + * preventing other code from changing the number of queues. + */ + bool is_ptp = MLX5E_GET_PFLAG(&chs->params, MLX5E_PFLAG_TX_PORT_TS); + + return (chs->params.num_channels + is_ptp) * mlx5e_get_dcb_num_tc(&chs->params) + qid; +} + +/* SQ lifecycle */ + +static struct mlx5e_txqsq *mlx5e_get_qos_sq(struct mlx5e_priv *priv, int qid) +{ + struct mlx5e_params *params = &priv->channels.params; + struct mlx5e_txqsq __rcu **qos_sqs; + struct mlx5e_channel *c; + int ix; + + ix = qid % params->num_channels; + qid /= params->num_channels; + c = priv->channels.c[ix]; + + qos_sqs = mlx5e_state_dereference(priv, c->qos_sqs); + return mlx5e_state_dereference(priv, qos_sqs[qid]); +} + +int mlx5e_open_qos_sq(struct mlx5e_priv *priv, struct mlx5e_channels *chs, + u16 node_qid, u32 hw_id) +{ + struct mlx5e_create_cq_param ccp = {}; + struct mlx5e_txqsq __rcu **qos_sqs; + struct mlx5e_sq_param param_sq; + struct mlx5e_cq_param param_cq; + int txq_ix, ix, qid, err = 0; + struct mlx5e_params *params; + struct mlx5e_channel *c; + struct mlx5e_txqsq *sq; + + params = &chs->params; + + txq_ix = mlx5e_qid_from_qos(chs, node_qid); + + WARN_ON(node_qid > priv->htb_max_qos_sqs); + if (node_qid == priv->htb_max_qos_sqs) { + struct mlx5e_sq_stats *stats, **stats_list = NULL; + + if (priv->htb_max_qos_sqs == 0) { + stats_list = kvcalloc(mlx5e_qos_max_leaf_nodes(priv->mdev), + sizeof(*stats_list), + GFP_KERNEL); + if (!stats_list) + return -ENOMEM; + } + stats = kzalloc(sizeof(*stats), GFP_KERNEL); + if (!stats) { + kvfree(stats_list); + return -ENOMEM; + } + if (stats_list) + WRITE_ONCE(priv->htb_qos_sq_stats, stats_list); + WRITE_ONCE(priv->htb_qos_sq_stats[node_qid], stats); + /* Order htb_max_qos_sqs increment after writing the array pointer. + * Pairs with smp_load_acquire in en_stats.c. + */ + smp_store_release(&priv->htb_max_qos_sqs, priv->htb_max_qos_sqs + 1); + } + + ix = node_qid % params->num_channels; + qid = node_qid / params->num_channels; + c = chs->c[ix]; + + qos_sqs = mlx5e_state_dereference(priv, c->qos_sqs); + sq = kzalloc(sizeof(*sq), GFP_KERNEL); + + if (!sq) + return -ENOMEM; + + mlx5e_build_create_cq_param(&ccp, c); + + memset(¶m_sq, 0, sizeof(param_sq)); + memset(¶m_cq, 0, sizeof(param_cq)); + mlx5e_build_sq_param(priv->mdev, params, ¶m_sq); + mlx5e_build_tx_cq_param(priv->mdev, params, ¶m_cq); + err = mlx5e_open_cq(priv, params->tx_cq_moderation, ¶m_cq, &ccp, &sq->cq); + if (err) + goto err_free_sq; + err = mlx5e_open_txqsq(c, priv->tisn[c->lag_port][0], txq_ix, params, + ¶m_sq, sq, 0, hw_id, + priv->htb_qos_sq_stats[node_qid]); + if (err) + goto err_close_cq; + + rcu_assign_pointer(qos_sqs[qid], sq); + + return 0; + +err_close_cq: + mlx5e_close_cq(&sq->cq); +err_free_sq: + kfree(sq); + return err; +} + +static int mlx5e_open_qos_sq_cb_wrapper(void *data, u16 node_qid, u32 hw_id) +{ + struct qos_sq_callback_params *cb_params = data; + + return mlx5e_open_qos_sq(cb_params->priv, cb_params->chs, node_qid, hw_id); +} + +int mlx5e_activate_qos_sq(void *data, u16 node_qid, u32 hw_id) +{ + struct mlx5e_priv *priv = data; + struct mlx5e_txqsq *sq; + u16 qid; + + sq = mlx5e_get_qos_sq(priv, node_qid); + + qid = mlx5e_qid_from_qos(&priv->channels, node_qid); + + /* If it's a new queue, it will be marked as started at this point. + * Stop it before updating txq2sq. + */ + mlx5e_tx_disable_queue(netdev_get_tx_queue(priv->netdev, qid)); + + priv->txq2sq[qid] = sq; + + /* Make the change to txq2sq visible before the queue is started. + * As mlx5e_xmit runs under a spinlock, there is an implicit ACQUIRE, + * which pairs with this barrier. + */ + smp_wmb(); + + qos_dbg(priv->mdev, "Activate QoS SQ qid %u\n", node_qid); + mlx5e_activate_txqsq(sq); + + return 0; +} + +void mlx5e_deactivate_qos_sq(struct mlx5e_priv *priv, u16 qid) +{ + struct mlx5e_txqsq *sq; + + sq = mlx5e_get_qos_sq(priv, qid); + if (!sq) /* Handle the case when the SQ failed to open. */ + return; + + qos_dbg(priv->mdev, "Deactivate QoS SQ qid %u\n", qid); + mlx5e_deactivate_txqsq(sq); + + priv->txq2sq[mlx5e_qid_from_qos(&priv->channels, qid)] = NULL; + + /* Make the change to txq2sq visible before the queue is started again. + * As mlx5e_xmit runs under a spinlock, there is an implicit ACQUIRE, + * which pairs with this barrier. + */ + smp_wmb(); +} + +void mlx5e_close_qos_sq(struct mlx5e_priv *priv, u16 qid) +{ + struct mlx5e_txqsq __rcu **qos_sqs; + struct mlx5e_params *params; + struct mlx5e_channel *c; + struct mlx5e_txqsq *sq; + int ix; + + params = &priv->channels.params; + + ix = qid % params->num_channels; + qid /= params->num_channels; + c = priv->channels.c[ix]; + qos_sqs = mlx5e_state_dereference(priv, c->qos_sqs); + sq = rcu_replace_pointer(qos_sqs[qid], NULL, lockdep_is_held(&priv->state_lock)); + if (!sq) /* Handle the case when the SQ failed to open. */ + return; + + synchronize_rcu(); /* Sync with NAPI. */ + + mlx5e_close_txqsq(sq); + mlx5e_close_cq(&sq->cq); + kfree(sq); +} + +void mlx5e_qos_close_queues(struct mlx5e_channel *c) +{ + struct mlx5e_txqsq __rcu **qos_sqs; + int i; + + qos_sqs = rcu_replace_pointer(c->qos_sqs, NULL, lockdep_is_held(&c->priv->state_lock)); + if (!qos_sqs) + return; + synchronize_rcu(); /* Sync with NAPI. */ + + for (i = 0; i < c->qos_sqs_size; i++) { + struct mlx5e_txqsq *sq; + + sq = mlx5e_state_dereference(c->priv, qos_sqs[i]); + if (!sq) /* Handle the case when the SQ failed to open. */ + continue; + + mlx5e_close_txqsq(sq); + mlx5e_close_cq(&sq->cq); + kfree(sq); + } + + kvfree(qos_sqs); +} + +void mlx5e_qos_close_all_queues(struct mlx5e_channels *chs) +{ + int i; + + for (i = 0; i < chs->num; i++) + mlx5e_qos_close_queues(chs->c[i]); +} + +int mlx5e_qos_alloc_queues(struct mlx5e_priv *priv, struct mlx5e_channels *chs) +{ + u16 qos_sqs_size; + int i; + + qos_sqs_size = DIV_ROUND_UP(mlx5e_qos_max_leaf_nodes(priv->mdev), chs->num); + + for (i = 0; i < chs->num; i++) { + struct mlx5e_txqsq **sqs; + + sqs = kvcalloc(qos_sqs_size, sizeof(struct mlx5e_txqsq *), GFP_KERNEL); + if (!sqs) + goto err_free; + + WRITE_ONCE(chs->c[i]->qos_sqs_size, qos_sqs_size); + smp_wmb(); /* Pairs with mlx5e_napi_poll. */ + rcu_assign_pointer(chs->c[i]->qos_sqs, sqs); + } + + return 0; + +err_free: + while (--i >= 0) { + struct mlx5e_txqsq **sqs; + + sqs = rcu_replace_pointer(chs->c[i]->qos_sqs, NULL, + lockdep_is_held(&priv->state_lock)); + + synchronize_rcu(); /* Sync with NAPI. */ + kvfree(sqs); + } + return -ENOMEM; +} + +int mlx5e_qos_open_queues(struct mlx5e_priv *priv, struct mlx5e_channels *chs) +{ + struct qos_sq_callback_params callback_params; + int err; + + err = mlx5e_qos_alloc_queues(priv, chs); + if (err) + return err; + + callback_params.priv = priv; + callback_params.chs = chs; + + err = mlx5e_htb_enumerate_leaves(priv->htb, mlx5e_open_qos_sq_cb_wrapper, &callback_params); + if (err) { + mlx5e_qos_close_all_queues(chs); + return err; + } + + return 0; +} + +void mlx5e_qos_activate_queues(struct mlx5e_priv *priv) +{ + mlx5e_htb_enumerate_leaves(priv->htb, mlx5e_activate_qos_sq, priv); +} + +void mlx5e_qos_deactivate_queues(struct mlx5e_channel *c) +{ + struct mlx5e_params *params = &c->priv->channels.params; + struct mlx5e_txqsq __rcu **qos_sqs; + int i; + + qos_sqs = mlx5e_state_dereference(c->priv, c->qos_sqs); + if (!qos_sqs) + return; + + for (i = 0; i < c->qos_sqs_size; i++) { + u16 qid = params->num_channels * i + c->ix; + struct mlx5e_txqsq *sq; + + sq = mlx5e_state_dereference(c->priv, qos_sqs[i]); + if (!sq) /* Handle the case when the SQ failed to open. */ + continue; + + qos_dbg(c->mdev, "Deactivate QoS SQ qid %u\n", qid); + mlx5e_deactivate_txqsq(sq); + + /* The queue is disabled, no synchronization with datapath is needed. */ + c->priv->txq2sq[mlx5e_qid_from_qos(&c->priv->channels, qid)] = NULL; + } +} + +void mlx5e_qos_deactivate_all_queues(struct mlx5e_channels *chs) +{ + int i; + + for (i = 0; i < chs->num; i++) + mlx5e_qos_deactivate_queues(chs->c[i]); +} + +void mlx5e_reactivate_qos_sq(struct mlx5e_priv *priv, u16 qid, struct netdev_queue *txq) +{ + qos_dbg(priv->mdev, "Reactivate QoS SQ qid %u\n", qid); + netdev_tx_reset_queue(txq); + netif_tx_start_queue(txq); +} + +void mlx5e_reset_qdisc(struct net_device *dev, u16 qid) +{ + struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, qid); + struct Qdisc *qdisc = dev_queue->qdisc_sleeping; + + if (!qdisc) + return; + + spin_lock_bh(qdisc_lock(qdisc)); + qdisc_reset(qdisc); + spin_unlock_bh(qdisc_lock(qdisc)); +} + +int mlx5e_htb_setup_tc(struct mlx5e_priv *priv, struct tc_htb_qopt_offload *htb_qopt) +{ + struct mlx5e_htb *htb = priv->htb; + int res; + + if (!htb && htb_qopt->command != TC_HTB_CREATE) + return -EINVAL; + + switch (htb_qopt->command) { + case TC_HTB_CREATE: + if (!mlx5_qos_is_supported(priv->mdev)) { + NL_SET_ERR_MSG_MOD(htb_qopt->extack, + "Missing QoS capabilities. Try disabling SRIOV or use a supported device."); + return -EOPNOTSUPP; + } + priv->htb = mlx5e_htb_alloc(); + htb = priv->htb; + if (!htb) + return -ENOMEM; + res = mlx5e_htb_init(htb, htb_qopt, priv->netdev, priv->mdev, &priv->selq, priv); + if (res) { + mlx5e_htb_free(htb); + priv->htb = NULL; + } + return res; + case TC_HTB_DESTROY: + mlx5e_htb_cleanup(htb); + mlx5e_htb_free(htb); + priv->htb = NULL; + return 0; + case TC_HTB_LEAF_ALLOC_QUEUE: + res = mlx5e_htb_leaf_alloc_queue(htb, htb_qopt->classid, htb_qopt->parent_classid, + htb_qopt->rate, htb_qopt->ceil, htb_qopt->extack); + if (res < 0) + return res; + htb_qopt->qid = res; + return 0; + case TC_HTB_LEAF_TO_INNER: + return mlx5e_htb_leaf_to_inner(htb, htb_qopt->parent_classid, htb_qopt->classid, + htb_qopt->rate, htb_qopt->ceil, htb_qopt->extack); + case TC_HTB_LEAF_DEL: + return mlx5e_htb_leaf_del(htb, &htb_qopt->classid, htb_qopt->extack); + case TC_HTB_LEAF_DEL_LAST: + case TC_HTB_LEAF_DEL_LAST_FORCE: + return mlx5e_htb_leaf_del_last(htb, htb_qopt->classid, + htb_qopt->command == TC_HTB_LEAF_DEL_LAST_FORCE, + htb_qopt->extack); + case TC_HTB_NODE_MODIFY: + return mlx5e_htb_node_modify(htb, htb_qopt->classid, htb_qopt->rate, htb_qopt->ceil, + htb_qopt->extack); + case TC_HTB_LEAF_QUERY_QUEUE: + res = mlx5e_htb_get_txq_by_classid(htb, htb_qopt->classid); + if (res < 0) + return res; + htb_qopt->qid = res; + return 0; + default: + return -EOPNOTSUPP; + } +} + +struct mlx5e_mqprio_rl { + struct mlx5_core_dev *mdev; + u32 root_id; + u32 *leaves_id; + u8 num_tc; +}; + +struct mlx5e_mqprio_rl *mlx5e_mqprio_rl_alloc(void) +{ + return kvzalloc(sizeof(struct mlx5e_mqprio_rl), GFP_KERNEL); +} + +void mlx5e_mqprio_rl_free(struct mlx5e_mqprio_rl *rl) +{ + kvfree(rl); +} + +int mlx5e_mqprio_rl_init(struct mlx5e_mqprio_rl *rl, struct mlx5_core_dev *mdev, u8 num_tc, + u64 max_rate[]) +{ + int err; + int tc; + + if (!mlx5_qos_is_supported(mdev)) { + qos_warn(mdev, "Missing QoS capabilities. Try disabling SRIOV or use a supported device."); + return -EOPNOTSUPP; + } + if (num_tc > mlx5e_qos_max_leaf_nodes(mdev)) + return -EINVAL; + + rl->mdev = mdev; + rl->num_tc = num_tc; + rl->leaves_id = kvcalloc(num_tc, sizeof(*rl->leaves_id), GFP_KERNEL); + if (!rl->leaves_id) + return -ENOMEM; + + err = mlx5_qos_create_root_node(mdev, &rl->root_id); + if (err) + goto err_free_leaves; + + qos_dbg(mdev, "Root created, id %#x\n", rl->root_id); + + for (tc = 0; tc < num_tc; tc++) { + u32 max_average_bw; + + max_average_bw = mlx5e_qos_bytes2mbits(mdev, max_rate[tc]); + err = mlx5_qos_create_leaf_node(mdev, rl->root_id, 0, max_average_bw, + &rl->leaves_id[tc]); + if (err) + goto err_destroy_leaves; + + qos_dbg(mdev, "Leaf[%d] created, id %#x, max average bw %u Mbits/sec\n", + tc, rl->leaves_id[tc], max_average_bw); + } + return 0; + +err_destroy_leaves: + while (--tc >= 0) + mlx5_qos_destroy_node(mdev, rl->leaves_id[tc]); + mlx5_qos_destroy_node(mdev, rl->root_id); +err_free_leaves: + kvfree(rl->leaves_id); + return err; +} + +void mlx5e_mqprio_rl_cleanup(struct mlx5e_mqprio_rl *rl) +{ + int tc; + + for (tc = 0; tc < rl->num_tc; tc++) + mlx5_qos_destroy_node(rl->mdev, rl->leaves_id[tc]); + mlx5_qos_destroy_node(rl->mdev, rl->root_id); + kvfree(rl->leaves_id); +} + +int mlx5e_mqprio_rl_get_node_hw_id(struct mlx5e_mqprio_rl *rl, int tc, u32 *hw_id) +{ + if (tc >= rl->num_tc) + return -EINVAL; + + *hw_id = rl->leaves_id[tc]; + return 0; +} + diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/qos.h b/drivers/net/ethernet/mellanox/mlx5/core/en/qos.h new file mode 100644 index 000000000..4947afa23 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/qos.h @@ -0,0 +1,51 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2020, Mellanox Technologies inc. All rights reserved. */ + +#ifndef __MLX5E_EN_QOS_H +#define __MLX5E_EN_QOS_H + +#include <linux/mlx5/driver.h> + +#define BYTES_IN_MBIT 125000 + +struct mlx5e_priv; +struct mlx5e_htb; +struct mlx5e_channels; +struct mlx5e_channel; +struct tc_htb_qopt_offload; + +int mlx5e_qos_bytes_rate_check(struct mlx5_core_dev *mdev, u64 nbytes); +int mlx5e_qos_max_leaf_nodes(struct mlx5_core_dev *mdev); + +/* SQ lifecycle */ +int mlx5e_open_qos_sq(struct mlx5e_priv *priv, struct mlx5e_channels *chs, + u16 node_qid, u32 hw_id); +int mlx5e_activate_qos_sq(void *data, u16 node_qid, u32 hw_id); +void mlx5e_deactivate_qos_sq(struct mlx5e_priv *priv, u16 qid); +void mlx5e_close_qos_sq(struct mlx5e_priv *priv, u16 qid); +void mlx5e_reactivate_qos_sq(struct mlx5e_priv *priv, u16 qid, struct netdev_queue *txq); +void mlx5e_reset_qdisc(struct net_device *dev, u16 qid); + +int mlx5e_qos_open_queues(struct mlx5e_priv *priv, struct mlx5e_channels *chs); +void mlx5e_qos_activate_queues(struct mlx5e_priv *priv); +void mlx5e_qos_deactivate_queues(struct mlx5e_channel *c); +void mlx5e_qos_deactivate_all_queues(struct mlx5e_channels *chs); +void mlx5e_qos_close_queues(struct mlx5e_channel *c); +void mlx5e_qos_close_all_queues(struct mlx5e_channels *chs); +int mlx5e_qos_alloc_queues(struct mlx5e_priv *priv, struct mlx5e_channels *chs); + +/* TX datapath API */ +u16 mlx5e_qid_from_qos(struct mlx5e_channels *chs, u16 qid); + +/* HTB API */ +int mlx5e_htb_setup_tc(struct mlx5e_priv *priv, struct tc_htb_qopt_offload *htb); + +/* MQPRIO TX rate limit */ +struct mlx5e_mqprio_rl; +struct mlx5e_mqprio_rl *mlx5e_mqprio_rl_alloc(void); +void mlx5e_mqprio_rl_free(struct mlx5e_mqprio_rl *rl); +int mlx5e_mqprio_rl_init(struct mlx5e_mqprio_rl *rl, struct mlx5_core_dev *mdev, u8 num_tc, + u64 max_rate[]); +void mlx5e_mqprio_rl_cleanup(struct mlx5e_mqprio_rl *rl); +int mlx5e_mqprio_rl_get_node_hw_id(struct mlx5e_mqprio_rl *rl, int tc, u32 *hw_id); +#endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c new file mode 100644 index 000000000..b6f5c1bcd --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c @@ -0,0 +1,351 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2020 Mellanox Technologies Inc. All rights reserved. */ + +#include <linux/netdevice.h> +#include <linux/list.h> +#include <net/lag.h> + +#include "mlx5_core.h" +#include "eswitch.h" +#include "esw/acl/ofld.h" +#include "en_rep.h" + +struct mlx5e_rep_bond { + struct notifier_block nb; + struct netdev_net_notifier nn; + struct list_head metadata_list; +}; + +struct mlx5e_rep_bond_slave_entry { + struct list_head list; + struct net_device *netdev; +}; + +struct mlx5e_rep_bond_metadata { + struct list_head list; /* link to global list of rep_bond_metadata */ + struct mlx5_eswitch *esw; + /* private of uplink holding rep bond metadata list */ + struct net_device *lag_dev; + u32 metadata_reg_c_0; + + struct list_head slaves_list; /* slaves list */ + int slaves; +}; + +static struct mlx5e_rep_bond_metadata * +mlx5e_lookup_rep_bond_metadata(struct mlx5_rep_uplink_priv *uplink_priv, + const struct net_device *lag_dev) +{ + struct mlx5e_rep_bond_metadata *found = NULL; + struct mlx5e_rep_bond_metadata *cur; + + list_for_each_entry(cur, &uplink_priv->bond->metadata_list, list) { + if (cur->lag_dev == lag_dev) { + found = cur; + break; + } + } + + return found; +} + +static struct mlx5e_rep_bond_slave_entry * +mlx5e_lookup_rep_bond_slave_entry(struct mlx5e_rep_bond_metadata *mdata, + const struct net_device *netdev) +{ + struct mlx5e_rep_bond_slave_entry *found = NULL; + struct mlx5e_rep_bond_slave_entry *cur; + + list_for_each_entry(cur, &mdata->slaves_list, list) { + if (cur->netdev == netdev) { + found = cur; + break; + } + } + + return found; +} + +static void mlx5e_rep_bond_metadata_release(struct mlx5e_rep_bond_metadata *mdata) +{ + netdev_dbg(mdata->lag_dev, "destroy rep_bond_metadata(%d)\n", + mdata->metadata_reg_c_0); + list_del(&mdata->list); + mlx5_esw_match_metadata_free(mdata->esw, mdata->metadata_reg_c_0); + WARN_ON(!list_empty(&mdata->slaves_list)); + kfree(mdata); +} + +/* This must be called under rtnl_lock */ +int mlx5e_rep_bond_enslave(struct mlx5_eswitch *esw, struct net_device *netdev, + struct net_device *lag_dev) +{ + struct mlx5e_rep_bond_slave_entry *s_entry; + struct mlx5e_rep_bond_metadata *mdata; + struct mlx5e_rep_priv *rpriv; + struct mlx5e_priv *priv; + int err; + + ASSERT_RTNL(); + + rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); + mdata = mlx5e_lookup_rep_bond_metadata(&rpriv->uplink_priv, lag_dev); + if (!mdata) { + /* First netdev becomes slave, no metadata presents the lag_dev. Create one */ + mdata = kzalloc(sizeof(*mdata), GFP_KERNEL); + if (!mdata) + return -ENOMEM; + + mdata->lag_dev = lag_dev; + mdata->esw = esw; + INIT_LIST_HEAD(&mdata->slaves_list); + mdata->metadata_reg_c_0 = mlx5_esw_match_metadata_alloc(esw); + if (!mdata->metadata_reg_c_0) { + kfree(mdata); + return -ENOSPC; + } + list_add(&mdata->list, &rpriv->uplink_priv.bond->metadata_list); + + netdev_dbg(lag_dev, "create rep_bond_metadata(%d)\n", + mdata->metadata_reg_c_0); + } + + s_entry = kzalloc(sizeof(*s_entry), GFP_KERNEL); + if (!s_entry) { + err = -ENOMEM; + goto entry_alloc_err; + } + + s_entry->netdev = netdev; + priv = netdev_priv(netdev); + rpriv = priv->ppriv; + + err = mlx5_esw_acl_ingress_vport_bond_update(esw, rpriv->rep->vport, + mdata->metadata_reg_c_0); + if (err) + goto ingress_err; + + mdata->slaves++; + list_add_tail(&s_entry->list, &mdata->slaves_list); + netdev_dbg(netdev, "enslave rep vport(%d) lag_dev(%s) metadata(0x%x)\n", + rpriv->rep->vport, lag_dev->name, mdata->metadata_reg_c_0); + + return 0; + +ingress_err: + kfree(s_entry); +entry_alloc_err: + if (!mdata->slaves) + mlx5e_rep_bond_metadata_release(mdata); + return err; +} + +/* This must be called under rtnl_lock */ +void mlx5e_rep_bond_unslave(struct mlx5_eswitch *esw, + const struct net_device *netdev, + const struct net_device *lag_dev) +{ + struct mlx5e_rep_bond_slave_entry *s_entry; + struct mlx5e_rep_bond_metadata *mdata; + struct mlx5e_rep_priv *rpriv; + struct mlx5e_priv *priv; + + ASSERT_RTNL(); + + rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); + mdata = mlx5e_lookup_rep_bond_metadata(&rpriv->uplink_priv, lag_dev); + if (!mdata) + return; + + s_entry = mlx5e_lookup_rep_bond_slave_entry(mdata, netdev); + if (!s_entry) + return; + + priv = netdev_priv(netdev); + rpriv = priv->ppriv; + + /* Reset bond_metadata to zero first then reset all ingress/egress + * acls and rx rules of unslave representor's vport + */ + mlx5_esw_acl_ingress_vport_bond_update(esw, rpriv->rep->vport, 0); + mlx5_esw_acl_egress_vport_unbond(esw, rpriv->rep->vport); + mlx5e_rep_bond_update(priv, false); + + list_del(&s_entry->list); + + netdev_dbg(netdev, "unslave rep vport(%d) lag_dev(%s) metadata(0x%x)\n", + rpriv->rep->vport, lag_dev->name, mdata->metadata_reg_c_0); + + if (--mdata->slaves == 0) + mlx5e_rep_bond_metadata_release(mdata); + kfree(s_entry); +} + +static bool mlx5e_rep_is_lag_netdev(struct net_device *netdev) +{ + return netif_is_lag_port(netdev) && mlx5e_eswitch_vf_rep(netdev); +} + +static void mlx5e_rep_changelowerstate_event(struct net_device *netdev, void *ptr) +{ + struct netdev_notifier_changelowerstate_info *info; + struct netdev_lag_lower_state_info *lag_info; + struct mlx5e_rep_priv *rpriv; + struct net_device *lag_dev; + struct mlx5e_priv *priv; + struct list_head *iter; + struct net_device *dev; + u16 acl_vport_num; + u16 fwd_vport_num; + int err; + + info = ptr; + lag_info = info->lower_state_info; + /* This is not an event of a representor becoming active slave */ + if (!lag_info->tx_enabled) + return; + + priv = netdev_priv(netdev); + rpriv = priv->ppriv; + fwd_vport_num = rpriv->rep->vport; + lag_dev = netdev_master_upper_dev_get(netdev); + if (!lag_dev) + return; + + netdev_dbg(netdev, "lag_dev(%s)'s slave vport(%d) is txable(%d)\n", + lag_dev->name, fwd_vport_num, net_lag_port_dev_txable(netdev)); + + /* Point everyone's egress acl to the vport of the active representor */ + netdev_for_each_lower_dev(lag_dev, dev, iter) { + priv = netdev_priv(dev); + rpriv = priv->ppriv; + acl_vport_num = rpriv->rep->vport; + if (acl_vport_num != fwd_vport_num) { + /* Only single rx_rule for unique bond_metadata should be + * present, delete it if it's saved as passive vport's + * rx_rule with destination as passive vport's root_ft + */ + mlx5e_rep_bond_update(priv, true); + err = mlx5_esw_acl_egress_vport_bond(priv->mdev->priv.eswitch, + fwd_vport_num, + acl_vport_num); + if (err) + netdev_warn(dev, + "configure slave vport(%d) egress fwd, err(%d)", + acl_vport_num, err); + } + } + + /* Insert new rx_rule for unique bond_metadata, save it as active vport's + * rx_rule with new destination as active vport's root_ft + */ + err = mlx5e_rep_bond_update(netdev_priv(netdev), false); + if (err) + netdev_warn(netdev, "configure active slave vport(%d) rx_rule, err(%d)", + fwd_vport_num, err); +} + +static void mlx5e_rep_changeupper_event(struct net_device *netdev, void *ptr) +{ + struct netdev_notifier_changeupper_info *info = ptr; + struct mlx5e_rep_priv *rpriv; + struct net_device *lag_dev; + struct mlx5e_priv *priv; + + priv = netdev_priv(netdev); + rpriv = priv->ppriv; + lag_dev = info->upper_dev; + + netdev_dbg(netdev, "%sslave vport(%d) lag(%s)\n", + info->linking ? "en" : "un", rpriv->rep->vport, lag_dev->name); + + if (info->linking) + mlx5e_rep_bond_enslave(priv->mdev->priv.eswitch, netdev, lag_dev); + else + mlx5e_rep_bond_unslave(priv->mdev->priv.eswitch, netdev, lag_dev); +} + +/* Bond device of representors and netdev events are used here in specific way + * to support eswitch vports bonding and to perform failover of eswitch vport + * by modifying the vport's egress acl of lower dev representors. Thus this + * also change the traditional behavior of lower dev under bond device. + * All non-representor netdevs or representors of other vendors as lower dev + * of bond device are not supported. + */ +static int mlx5e_rep_esw_bond_netevent(struct notifier_block *nb, + unsigned long event, void *ptr) +{ + struct net_device *netdev = netdev_notifier_info_to_dev(ptr); + struct mlx5e_rep_priv *rpriv; + struct mlx5e_rep_bond *bond; + struct mlx5e_priv *priv; + + if (!mlx5e_rep_is_lag_netdev(netdev)) + return NOTIFY_DONE; + + bond = container_of(nb, struct mlx5e_rep_bond, nb); + priv = netdev_priv(netdev); + rpriv = mlx5_eswitch_get_uplink_priv(priv->mdev->priv.eswitch, REP_ETH); + /* Verify VF representor is on the same device of the bond handling the netevent. */ + if (rpriv->uplink_priv.bond != bond) + return NOTIFY_DONE; + + switch (event) { + case NETDEV_CHANGELOWERSTATE: + mlx5e_rep_changelowerstate_event(netdev, ptr); + break; + case NETDEV_CHANGEUPPER: + mlx5e_rep_changeupper_event(netdev, ptr); + break; + } + return NOTIFY_DONE; +} + +/* If HW support eswitch vports bonding, register a specific notifier to + * handle it when two or more representors are bonded + */ +int mlx5e_rep_bond_init(struct mlx5e_rep_priv *rpriv) +{ + struct mlx5_rep_uplink_priv *uplink_priv = &rpriv->uplink_priv; + struct net_device *netdev = rpriv->netdev; + struct mlx5e_priv *priv; + int ret = 0; + + priv = netdev_priv(netdev); + if (!mlx5_esw_acl_egress_fwd2vport_supported(priv->mdev->priv.eswitch)) + goto out; + + uplink_priv->bond = kvzalloc(sizeof(*uplink_priv->bond), GFP_KERNEL); + if (!uplink_priv->bond) { + ret = -ENOMEM; + goto out; + } + + INIT_LIST_HEAD(&uplink_priv->bond->metadata_list); + uplink_priv->bond->nb.notifier_call = mlx5e_rep_esw_bond_netevent; + ret = register_netdevice_notifier_dev_net(netdev, + &uplink_priv->bond->nb, + &uplink_priv->bond->nn); + if (ret) { + netdev_err(netdev, "register bonding netevent notifier, err(%d)\n", ret); + kvfree(uplink_priv->bond); + uplink_priv->bond = NULL; + } + +out: + return ret; +} + +void mlx5e_rep_bond_cleanup(struct mlx5e_rep_priv *rpriv) +{ + struct mlx5e_priv *priv = netdev_priv(rpriv->netdev); + + if (!mlx5_esw_acl_egress_fwd2vport_supported(priv->mdev->priv.eswitch) || + !rpriv->uplink_priv.bond) + return; + + unregister_netdevice_notifier_dev_net(rpriv->netdev, + &rpriv->uplink_priv.bond->nb, + &rpriv->uplink_priv.bond->nn); + kvfree(rpriv->uplink_priv.bond); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c new file mode 100644 index 000000000..ce85b48d3 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c @@ -0,0 +1,569 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2021 Mellanox Technologies. */ + +#include <linux/netdevice.h> +#include <linux/if_bridge.h> +#include <net/netevent.h> +#include <net/switchdev.h> +#include "bridge.h" +#include "esw/bridge.h" +#include "en_rep.h" + +#define MLX5_ESW_BRIDGE_UPDATE_INTERVAL 1000 + +struct mlx5_bridge_switchdev_fdb_work { + struct work_struct work; + struct switchdev_notifier_fdb_info fdb_info; + struct net_device *dev; + struct mlx5_esw_bridge_offloads *br_offloads; + bool add; +}; + +static bool mlx5_esw_bridge_dev_same_esw(struct net_device *dev, struct mlx5_eswitch *esw) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + + return esw == priv->mdev->priv.eswitch; +} + +static bool mlx5_esw_bridge_dev_same_hw(struct net_device *dev, struct mlx5_eswitch *esw) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5_core_dev *mdev, *esw_mdev; + u64 system_guid, esw_system_guid; + + mdev = priv->mdev; + esw_mdev = esw->dev; + + system_guid = mlx5_query_nic_system_image_guid(mdev); + esw_system_guid = mlx5_query_nic_system_image_guid(esw_mdev); + + return system_guid == esw_system_guid; +} + +static struct net_device * +mlx5_esw_bridge_lag_rep_get(struct net_device *dev, struct mlx5_eswitch *esw) +{ + struct net_device *lower; + struct list_head *iter; + + netdev_for_each_lower_dev(dev, lower, iter) { + struct mlx5_core_dev *mdev; + struct mlx5e_priv *priv; + + if (!mlx5e_eswitch_rep(lower)) + continue; + + priv = netdev_priv(lower); + mdev = priv->mdev; + if (mlx5_lag_is_shared_fdb(mdev) && mlx5_esw_bridge_dev_same_esw(lower, esw)) + return lower; + } + + return NULL; +} + +static struct net_device * +mlx5_esw_bridge_rep_vport_num_vhca_id_get(struct net_device *dev, struct mlx5_eswitch *esw, + u16 *vport_num, u16 *esw_owner_vhca_id) +{ + struct mlx5e_rep_priv *rpriv; + struct mlx5e_priv *priv; + + if (netif_is_lag_master(dev)) + dev = mlx5_esw_bridge_lag_rep_get(dev, esw); + + if (!dev || !mlx5e_eswitch_rep(dev) || !mlx5_esw_bridge_dev_same_hw(dev, esw)) + return NULL; + + priv = netdev_priv(dev); + rpriv = priv->ppriv; + *vport_num = rpriv->rep->vport; + *esw_owner_vhca_id = MLX5_CAP_GEN(priv->mdev, vhca_id); + return dev; +} + +static struct net_device * +mlx5_esw_bridge_lower_rep_vport_num_vhca_id_get(struct net_device *dev, struct mlx5_eswitch *esw, + u16 *vport_num, u16 *esw_owner_vhca_id) +{ + struct net_device *lower_dev; + struct list_head *iter; + + if (netif_is_lag_master(dev) || mlx5e_eswitch_rep(dev)) + return mlx5_esw_bridge_rep_vport_num_vhca_id_get(dev, esw, vport_num, + esw_owner_vhca_id); + + netdev_for_each_lower_dev(dev, lower_dev, iter) { + struct net_device *rep; + + if (netif_is_bridge_master(lower_dev)) + continue; + + rep = mlx5_esw_bridge_lower_rep_vport_num_vhca_id_get(lower_dev, esw, vport_num, + esw_owner_vhca_id); + if (rep) + return rep; + } + + return NULL; +} + +static bool mlx5_esw_bridge_is_local(struct net_device *dev, struct net_device *rep, + struct mlx5_eswitch *esw) +{ + struct mlx5_core_dev *mdev; + struct mlx5e_priv *priv; + + if (!mlx5_esw_bridge_dev_same_esw(rep, esw)) + return false; + + priv = netdev_priv(rep); + mdev = priv->mdev; + if (netif_is_lag_master(dev)) + return mlx5_lag_is_shared_fdb(mdev) && mlx5_lag_is_master(mdev); + return true; +} + +static int mlx5_esw_bridge_port_changeupper(struct notifier_block *nb, void *ptr) +{ + struct mlx5_esw_bridge_offloads *br_offloads = container_of(nb, + struct mlx5_esw_bridge_offloads, + netdev_nb); + struct net_device *dev = netdev_notifier_info_to_dev(ptr); + struct netdev_notifier_changeupper_info *info = ptr; + struct net_device *upper = info->upper_dev, *rep; + struct mlx5_eswitch *esw = br_offloads->esw; + u16 vport_num, esw_owner_vhca_id; + struct netlink_ext_ack *extack; + int ifindex = upper->ifindex; + int err = 0; + + if (!netif_is_bridge_master(upper)) + return 0; + + rep = mlx5_esw_bridge_rep_vport_num_vhca_id_get(dev, esw, &vport_num, &esw_owner_vhca_id); + if (!rep) + return 0; + + extack = netdev_notifier_info_to_extack(&info->info); + + if (mlx5_esw_bridge_is_local(dev, rep, esw)) + err = info->linking ? + mlx5_esw_bridge_vport_link(ifindex, vport_num, esw_owner_vhca_id, + br_offloads, extack) : + mlx5_esw_bridge_vport_unlink(ifindex, vport_num, esw_owner_vhca_id, + br_offloads, extack); + else if (mlx5_esw_bridge_dev_same_hw(rep, esw)) + err = info->linking ? + mlx5_esw_bridge_vport_peer_link(ifindex, vport_num, esw_owner_vhca_id, + br_offloads, extack) : + mlx5_esw_bridge_vport_peer_unlink(ifindex, vport_num, esw_owner_vhca_id, + br_offloads, extack); + + return err; +} + +static int +mlx5_esw_bridge_changeupper_validate_netdev(void *ptr) +{ + struct net_device *dev = netdev_notifier_info_to_dev(ptr); + struct netdev_notifier_changeupper_info *info = ptr; + struct net_device *upper = info->upper_dev; + struct net_device *lower; + struct list_head *iter; + + if (!netif_is_bridge_master(upper) || !netif_is_lag_master(dev)) + return 0; + + netdev_for_each_lower_dev(dev, lower, iter) { + struct mlx5_core_dev *mdev; + struct mlx5e_priv *priv; + + if (!mlx5e_eswitch_rep(lower)) + continue; + + priv = netdev_priv(lower); + mdev = priv->mdev; + if (!mlx5_lag_is_active(mdev)) + return -EAGAIN; + if (!mlx5_lag_is_shared_fdb(mdev)) + return -EOPNOTSUPP; + } + + return 0; +} + +static int mlx5_esw_bridge_switchdev_port_event(struct notifier_block *nb, + unsigned long event, void *ptr) +{ + int err = 0; + + switch (event) { + case NETDEV_PRECHANGEUPPER: + err = mlx5_esw_bridge_changeupper_validate_netdev(ptr); + break; + + case NETDEV_CHANGEUPPER: + err = mlx5_esw_bridge_port_changeupper(nb, ptr); + break; + } + + return notifier_from_errno(err); +} + +static int +mlx5_esw_bridge_port_obj_add(struct net_device *dev, + struct switchdev_notifier_port_obj_info *port_obj_info, + struct mlx5_esw_bridge_offloads *br_offloads) +{ + struct netlink_ext_ack *extack = switchdev_notifier_info_to_extack(&port_obj_info->info); + const struct switchdev_obj *obj = port_obj_info->obj; + const struct switchdev_obj_port_vlan *vlan; + u16 vport_num, esw_owner_vhca_id; + int err; + + if (!mlx5_esw_bridge_rep_vport_num_vhca_id_get(dev, br_offloads->esw, &vport_num, + &esw_owner_vhca_id)) + return 0; + + port_obj_info->handled = true; + + switch (obj->id) { + case SWITCHDEV_OBJ_ID_PORT_VLAN: + vlan = SWITCHDEV_OBJ_PORT_VLAN(obj); + err = mlx5_esw_bridge_port_vlan_add(vport_num, esw_owner_vhca_id, vlan->vid, + vlan->flags, br_offloads, extack); + break; + default: + return -EOPNOTSUPP; + } + return err; +} + +static int +mlx5_esw_bridge_port_obj_del(struct net_device *dev, + struct switchdev_notifier_port_obj_info *port_obj_info, + struct mlx5_esw_bridge_offloads *br_offloads) +{ + const struct switchdev_obj *obj = port_obj_info->obj; + const struct switchdev_obj_port_vlan *vlan; + u16 vport_num, esw_owner_vhca_id; + + if (!mlx5_esw_bridge_rep_vport_num_vhca_id_get(dev, br_offloads->esw, &vport_num, + &esw_owner_vhca_id)) + return 0; + + port_obj_info->handled = true; + + switch (obj->id) { + case SWITCHDEV_OBJ_ID_PORT_VLAN: + vlan = SWITCHDEV_OBJ_PORT_VLAN(obj); + mlx5_esw_bridge_port_vlan_del(vport_num, esw_owner_vhca_id, vlan->vid, br_offloads); + break; + default: + return -EOPNOTSUPP; + } + return 0; +} + +static int +mlx5_esw_bridge_port_obj_attr_set(struct net_device *dev, + struct switchdev_notifier_port_attr_info *port_attr_info, + struct mlx5_esw_bridge_offloads *br_offloads) +{ + struct netlink_ext_ack *extack = switchdev_notifier_info_to_extack(&port_attr_info->info); + const struct switchdev_attr *attr = port_attr_info->attr; + u16 vport_num, esw_owner_vhca_id; + int err = 0; + + if (!mlx5_esw_bridge_lower_rep_vport_num_vhca_id_get(dev, br_offloads->esw, &vport_num, + &esw_owner_vhca_id)) + return 0; + + port_attr_info->handled = true; + + switch (attr->id) { + case SWITCHDEV_ATTR_ID_PORT_PRE_BRIDGE_FLAGS: + if (attr->u.brport_flags.mask & ~(BR_LEARNING | BR_FLOOD | BR_MCAST_FLOOD)) { + NL_SET_ERR_MSG_MOD(extack, "Flag is not supported"); + err = -EINVAL; + } + break; + case SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS: + break; + case SWITCHDEV_ATTR_ID_BRIDGE_AGEING_TIME: + err = mlx5_esw_bridge_ageing_time_set(vport_num, esw_owner_vhca_id, + attr->u.ageing_time, br_offloads); + break; + case SWITCHDEV_ATTR_ID_BRIDGE_VLAN_FILTERING: + err = mlx5_esw_bridge_vlan_filtering_set(vport_num, esw_owner_vhca_id, + attr->u.vlan_filtering, br_offloads); + break; + case SWITCHDEV_ATTR_ID_BRIDGE_VLAN_PROTOCOL: + err = mlx5_esw_bridge_vlan_proto_set(vport_num, + esw_owner_vhca_id, + attr->u.vlan_protocol, + br_offloads); + break; + default: + err = -EOPNOTSUPP; + } + + return err; +} + +static int mlx5_esw_bridge_event_blocking(struct notifier_block *nb, + unsigned long event, void *ptr) +{ + struct mlx5_esw_bridge_offloads *br_offloads = container_of(nb, + struct mlx5_esw_bridge_offloads, + nb_blk); + struct net_device *dev = switchdev_notifier_info_to_dev(ptr); + int err; + + switch (event) { + case SWITCHDEV_PORT_OBJ_ADD: + err = mlx5_esw_bridge_port_obj_add(dev, ptr, br_offloads); + break; + case SWITCHDEV_PORT_OBJ_DEL: + err = mlx5_esw_bridge_port_obj_del(dev, ptr, br_offloads); + break; + case SWITCHDEV_PORT_ATTR_SET: + err = mlx5_esw_bridge_port_obj_attr_set(dev, ptr, br_offloads); + break; + default: + err = 0; + } + + return notifier_from_errno(err); +} + +static void +mlx5_esw_bridge_cleanup_switchdev_fdb_work(struct mlx5_bridge_switchdev_fdb_work *fdb_work) +{ + dev_put(fdb_work->dev); + kfree(fdb_work->fdb_info.addr); + kfree(fdb_work); +} + +static void mlx5_esw_bridge_switchdev_fdb_event_work(struct work_struct *work) +{ + struct mlx5_bridge_switchdev_fdb_work *fdb_work = + container_of(work, struct mlx5_bridge_switchdev_fdb_work, work); + struct switchdev_notifier_fdb_info *fdb_info = + &fdb_work->fdb_info; + struct mlx5_esw_bridge_offloads *br_offloads = + fdb_work->br_offloads; + struct net_device *dev = fdb_work->dev; + u16 vport_num, esw_owner_vhca_id; + + rtnl_lock(); + + if (!mlx5_esw_bridge_rep_vport_num_vhca_id_get(dev, br_offloads->esw, &vport_num, + &esw_owner_vhca_id)) + goto out; + + if (fdb_work->add) + mlx5_esw_bridge_fdb_create(dev, vport_num, esw_owner_vhca_id, br_offloads, + fdb_info); + else + mlx5_esw_bridge_fdb_remove(dev, vport_num, esw_owner_vhca_id, br_offloads, + fdb_info); + +out: + rtnl_unlock(); + mlx5_esw_bridge_cleanup_switchdev_fdb_work(fdb_work); +} + +static struct mlx5_bridge_switchdev_fdb_work * +mlx5_esw_bridge_init_switchdev_fdb_work(struct net_device *dev, bool add, + struct switchdev_notifier_fdb_info *fdb_info, + struct mlx5_esw_bridge_offloads *br_offloads) +{ + struct mlx5_bridge_switchdev_fdb_work *work; + u8 *addr; + + work = kzalloc(sizeof(*work), GFP_ATOMIC); + if (!work) + return ERR_PTR(-ENOMEM); + + INIT_WORK(&work->work, mlx5_esw_bridge_switchdev_fdb_event_work); + memcpy(&work->fdb_info, fdb_info, sizeof(work->fdb_info)); + + addr = kzalloc(ETH_ALEN, GFP_ATOMIC); + if (!addr) { + kfree(work); + return ERR_PTR(-ENOMEM); + } + ether_addr_copy(addr, fdb_info->addr); + work->fdb_info.addr = addr; + + dev_hold(dev); + work->dev = dev; + work->br_offloads = br_offloads; + work->add = add; + return work; +} + +static int mlx5_esw_bridge_switchdev_event(struct notifier_block *nb, + unsigned long event, void *ptr) +{ + struct mlx5_esw_bridge_offloads *br_offloads = container_of(nb, + struct mlx5_esw_bridge_offloads, + nb); + struct net_device *dev = switchdev_notifier_info_to_dev(ptr); + struct switchdev_notifier_fdb_info *fdb_info; + struct mlx5_bridge_switchdev_fdb_work *work; + struct mlx5_eswitch *esw = br_offloads->esw; + struct switchdev_notifier_info *info = ptr; + u16 vport_num, esw_owner_vhca_id; + struct net_device *upper, *rep; + + if (event == SWITCHDEV_PORT_ATTR_SET) { + int err = mlx5_esw_bridge_port_obj_attr_set(dev, ptr, br_offloads); + + return notifier_from_errno(err); + } + + upper = netdev_master_upper_dev_get_rcu(dev); + if (!upper) + return NOTIFY_DONE; + if (!netif_is_bridge_master(upper)) + return NOTIFY_DONE; + + rep = mlx5_esw_bridge_rep_vport_num_vhca_id_get(dev, esw, &vport_num, &esw_owner_vhca_id); + if (!rep) + return NOTIFY_DONE; + + switch (event) { + case SWITCHDEV_FDB_ADD_TO_BRIDGE: + fdb_info = container_of(info, + struct switchdev_notifier_fdb_info, + info); + mlx5_esw_bridge_fdb_update_used(dev, vport_num, esw_owner_vhca_id, br_offloads, + fdb_info); + break; + case SWITCHDEV_FDB_DEL_TO_BRIDGE: + /* only handle the event on peers */ + if (mlx5_esw_bridge_is_local(dev, rep, esw)) + break; + fallthrough; + case SWITCHDEV_FDB_ADD_TO_DEVICE: + case SWITCHDEV_FDB_DEL_TO_DEVICE: + fdb_info = container_of(info, + struct switchdev_notifier_fdb_info, + info); + + work = mlx5_esw_bridge_init_switchdev_fdb_work(dev, + event == SWITCHDEV_FDB_ADD_TO_DEVICE, + fdb_info, + br_offloads); + if (IS_ERR(work)) { + WARN_ONCE(1, "Failed to init switchdev work, err=%ld", + PTR_ERR(work)); + return notifier_from_errno(PTR_ERR(work)); + } + + queue_work(br_offloads->wq, &work->work); + break; + default: + break; + } + return NOTIFY_DONE; +} + +static void mlx5_esw_bridge_update_work(struct work_struct *work) +{ + struct mlx5_esw_bridge_offloads *br_offloads = container_of(work, + struct mlx5_esw_bridge_offloads, + update_work.work); + + rtnl_lock(); + mlx5_esw_bridge_update(br_offloads); + rtnl_unlock(); + + queue_delayed_work(br_offloads->wq, &br_offloads->update_work, + msecs_to_jiffies(MLX5_ESW_BRIDGE_UPDATE_INTERVAL)); +} + +void mlx5e_rep_bridge_init(struct mlx5e_priv *priv) +{ + struct mlx5_esw_bridge_offloads *br_offloads; + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5_eswitch *esw = + mdev->priv.eswitch; + int err; + + rtnl_lock(); + br_offloads = mlx5_esw_bridge_init(esw); + rtnl_unlock(); + if (IS_ERR(br_offloads)) { + esw_warn(mdev, "Failed to init esw bridge (err=%ld)\n", PTR_ERR(br_offloads)); + return; + } + + br_offloads->wq = alloc_ordered_workqueue("mlx5_bridge_wq", 0); + if (!br_offloads->wq) { + esw_warn(mdev, "Failed to allocate bridge offloads workqueue\n"); + goto err_alloc_wq; + } + + br_offloads->nb.notifier_call = mlx5_esw_bridge_switchdev_event; + err = register_switchdev_notifier(&br_offloads->nb); + if (err) { + esw_warn(mdev, "Failed to register switchdev notifier (err=%d)\n", err); + goto err_register_swdev; + } + + br_offloads->nb_blk.notifier_call = mlx5_esw_bridge_event_blocking; + err = register_switchdev_blocking_notifier(&br_offloads->nb_blk); + if (err) { + esw_warn(mdev, "Failed to register blocking switchdev notifier (err=%d)\n", err); + goto err_register_swdev_blk; + } + + br_offloads->netdev_nb.notifier_call = mlx5_esw_bridge_switchdev_port_event; + err = register_netdevice_notifier_net(&init_net, &br_offloads->netdev_nb); + if (err) { + esw_warn(mdev, "Failed to register bridge offloads netdevice notifier (err=%d)\n", + err); + goto err_register_netdev; + } + INIT_DELAYED_WORK(&br_offloads->update_work, mlx5_esw_bridge_update_work); + queue_delayed_work(br_offloads->wq, &br_offloads->update_work, + msecs_to_jiffies(MLX5_ESW_BRIDGE_UPDATE_INTERVAL)); + return; + +err_register_netdev: + unregister_switchdev_blocking_notifier(&br_offloads->nb_blk); +err_register_swdev_blk: + unregister_switchdev_notifier(&br_offloads->nb); +err_register_swdev: + destroy_workqueue(br_offloads->wq); +err_alloc_wq: + rtnl_lock(); + mlx5_esw_bridge_cleanup(esw); + rtnl_unlock(); +} + +void mlx5e_rep_bridge_cleanup(struct mlx5e_priv *priv) +{ + struct mlx5_esw_bridge_offloads *br_offloads; + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5_eswitch *esw = + mdev->priv.eswitch; + + br_offloads = esw->br_offloads; + if (!br_offloads) + return; + + cancel_delayed_work_sync(&br_offloads->update_work); + unregister_netdevice_notifier_net(&init_net, &br_offloads->netdev_nb); + unregister_switchdev_blocking_notifier(&br_offloads->nb_blk); + unregister_switchdev_notifier(&br_offloads->nb); + destroy_workqueue(br_offloads->wq); + rtnl_lock(); + mlx5_esw_bridge_cleanup(esw); + rtnl_unlock(); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.h b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.h new file mode 100644 index 000000000..fbeb64242 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2021 Mellanox Technologies. */ + +#ifndef __MLX5_EN_REP_BRIDGE__ +#define __MLX5_EN_REP_BRIDGE__ + +#include "en.h" + +#if IS_ENABLED(CONFIG_MLX5_BRIDGE) + +void mlx5e_rep_bridge_init(struct mlx5e_priv *priv); +void mlx5e_rep_bridge_cleanup(struct mlx5e_priv *priv); + +#else /* CONFIG_MLX5_BRIDGE */ + +static inline void mlx5e_rep_bridge_init(struct mlx5e_priv *priv) {} +static inline void mlx5e_rep_bridge_cleanup(struct mlx5e_priv *priv) {} + +#endif /* CONFIG_MLX5_BRIDGE */ + +#endif /* __MLX5_EN_REP_BRIDGE__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.c new file mode 100644 index 000000000..2e9bee4e5 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.c @@ -0,0 +1,398 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2020 Mellanox Technologies. */ + +#include <linux/refcount.h> +#include <linux/list.h> +#include <linux/rculist.h> +#include <linux/rtnetlink.h> +#include <linux/workqueue.h> +#include <linux/spinlock.h> +#include <linux/notifier.h> +#include <net/netevent.h> +#include <net/arp.h> +#include "neigh.h" +#include "tc.h" +#include "en_rep.h" +#include "fs_core.h" +#include "diag/en_rep_tracepoint.h" + +static unsigned long mlx5e_rep_ipv6_interval(void) +{ + if (IS_ENABLED(CONFIG_IPV6) && ipv6_stub->nd_tbl) + return NEIGH_VAR(&ipv6_stub->nd_tbl->parms, DELAY_PROBE_TIME); + + return ~0UL; +} + +static void mlx5e_rep_neigh_update_init_interval(struct mlx5e_rep_priv *rpriv) +{ + unsigned long ipv4_interval = NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME); + unsigned long ipv6_interval = mlx5e_rep_ipv6_interval(); + struct net_device *netdev = rpriv->netdev; + struct mlx5e_priv *priv = netdev_priv(netdev); + + rpriv->neigh_update.min_interval = min_t(unsigned long, ipv6_interval, ipv4_interval); + mlx5_fc_update_sampling_interval(priv->mdev, rpriv->neigh_update.min_interval); +} + +void mlx5e_rep_queue_neigh_stats_work(struct mlx5e_priv *priv) +{ + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update; + + mlx5_fc_queue_stats_work(priv->mdev, + &neigh_update->neigh_stats_work, + neigh_update->min_interval); +} + +static bool mlx5e_rep_neigh_entry_hold(struct mlx5e_neigh_hash_entry *nhe) +{ + return refcount_inc_not_zero(&nhe->refcnt); +} + +static void mlx5e_rep_neigh_entry_remove(struct mlx5e_neigh_hash_entry *nhe); + +void mlx5e_rep_neigh_entry_release(struct mlx5e_neigh_hash_entry *nhe) +{ + if (refcount_dec_and_test(&nhe->refcnt)) { + mlx5e_rep_neigh_entry_remove(nhe); + kfree_rcu(nhe, rcu); + } +} + +static struct mlx5e_neigh_hash_entry * +mlx5e_get_next_nhe(struct mlx5e_rep_priv *rpriv, + struct mlx5e_neigh_hash_entry *nhe) +{ + struct mlx5e_neigh_hash_entry *next = NULL; + + rcu_read_lock(); + + for (next = nhe ? + list_next_or_null_rcu(&rpriv->neigh_update.neigh_list, + &nhe->neigh_list, + struct mlx5e_neigh_hash_entry, + neigh_list) : + list_first_or_null_rcu(&rpriv->neigh_update.neigh_list, + struct mlx5e_neigh_hash_entry, + neigh_list); + next; + next = list_next_or_null_rcu(&rpriv->neigh_update.neigh_list, + &next->neigh_list, + struct mlx5e_neigh_hash_entry, + neigh_list)) + if (mlx5e_rep_neigh_entry_hold(next)) + break; + + rcu_read_unlock(); + + if (nhe) + mlx5e_rep_neigh_entry_release(nhe); + + return next; +} + +static void mlx5e_rep_neigh_stats_work(struct work_struct *work) +{ + struct mlx5e_rep_priv *rpriv = container_of(work, struct mlx5e_rep_priv, + neigh_update.neigh_stats_work.work); + struct net_device *netdev = rpriv->netdev; + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5e_neigh_hash_entry *nhe = NULL; + + rtnl_lock(); + if (!list_empty(&rpriv->neigh_update.neigh_list)) + mlx5e_rep_queue_neigh_stats_work(priv); + + while ((nhe = mlx5e_get_next_nhe(rpriv, nhe)) != NULL) + mlx5e_tc_update_neigh_used_value(nhe); + + rtnl_unlock(); +} + +struct neigh_update_work { + struct work_struct work; + struct neighbour *n; + struct mlx5e_neigh_hash_entry *nhe; +}; + +static void mlx5e_release_neigh_update_work(struct neigh_update_work *update_work) +{ + neigh_release(update_work->n); + mlx5e_rep_neigh_entry_release(update_work->nhe); + kfree(update_work); +} + +static void mlx5e_rep_neigh_update(struct work_struct *work) +{ + struct neigh_update_work *update_work = container_of(work, struct neigh_update_work, + work); + struct mlx5e_neigh_hash_entry *nhe = update_work->nhe; + struct neighbour *n = update_work->n; + struct mlx5e_encap_entry *e = NULL; + bool neigh_connected, same_dev; + unsigned char ha[ETH_ALEN]; + u8 nud_state, dead; + + rtnl_lock(); + + /* If these parameters are changed after we release the lock, + * we'll receive another event letting us know about it. + * We use this lock to avoid inconsistency between the neigh validity + * and it's hw address. + */ + read_lock_bh(&n->lock); + memcpy(ha, n->ha, ETH_ALEN); + nud_state = n->nud_state; + dead = n->dead; + same_dev = READ_ONCE(nhe->neigh_dev) == n->dev; + read_unlock_bh(&n->lock); + + neigh_connected = (nud_state & NUD_VALID) && !dead; + + trace_mlx5e_rep_neigh_update(nhe, ha, neigh_connected); + + if (!same_dev) + goto out; + + /* mlx5e_get_next_init_encap() releases previous encap before returning + * the next one. + */ + while ((e = mlx5e_get_next_init_encap(nhe, e)) != NULL) + mlx5e_rep_update_flows(netdev_priv(e->out_dev), e, neigh_connected, ha); + +out: + rtnl_unlock(); + mlx5e_release_neigh_update_work(update_work); +} + +static struct neigh_update_work *mlx5e_alloc_neigh_update_work(struct mlx5e_priv *priv, + struct neighbour *n) +{ + struct neigh_update_work *update_work; + struct mlx5e_neigh_hash_entry *nhe; + struct mlx5e_neigh m_neigh = {}; + + update_work = kzalloc(sizeof(*update_work), GFP_ATOMIC); + if (WARN_ON(!update_work)) + return NULL; + + m_neigh.family = n->ops->family; + memcpy(&m_neigh.dst_ip, n->primary_key, n->tbl->key_len); + + /* Obtain reference to nhe as last step in order not to release it in + * atomic context. + */ + rcu_read_lock(); + nhe = mlx5e_rep_neigh_entry_lookup(priv, &m_neigh); + rcu_read_unlock(); + if (!nhe) { + kfree(update_work); + return NULL; + } + + INIT_WORK(&update_work->work, mlx5e_rep_neigh_update); + neigh_hold(n); + update_work->n = n; + update_work->nhe = nhe; + + return update_work; +} + +static int mlx5e_rep_netevent_event(struct notifier_block *nb, + unsigned long event, void *ptr) +{ + struct mlx5e_rep_priv *rpriv = container_of(nb, struct mlx5e_rep_priv, + neigh_update.netevent_nb); + struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update; + struct net_device *netdev = rpriv->netdev; + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5e_neigh_hash_entry *nhe = NULL; + struct neigh_update_work *update_work; + struct neigh_parms *p; + struct neighbour *n; + bool found = false; + + switch (event) { + case NETEVENT_NEIGH_UPDATE: + n = ptr; +#if IS_ENABLED(CONFIG_IPV6) + if (n->tbl != ipv6_stub->nd_tbl && n->tbl != &arp_tbl) +#else + if (n->tbl != &arp_tbl) +#endif + return NOTIFY_DONE; + + update_work = mlx5e_alloc_neigh_update_work(priv, n); + if (!update_work) + return NOTIFY_DONE; + + queue_work(priv->wq, &update_work->work); + break; + + case NETEVENT_DELAY_PROBE_TIME_UPDATE: + p = ptr; + + /* We check the device is present since we don't care about + * changes in the default table, we only care about changes + * done per device delay prob time parameter. + */ +#if IS_ENABLED(CONFIG_IPV6) + if (!p->dev || (p->tbl != ipv6_stub->nd_tbl && p->tbl != &arp_tbl)) +#else + if (!p->dev || p->tbl != &arp_tbl) +#endif + return NOTIFY_DONE; + + rcu_read_lock(); + list_for_each_entry_rcu(nhe, &neigh_update->neigh_list, + neigh_list) { + if (p->dev == READ_ONCE(nhe->neigh_dev)) { + found = true; + break; + } + } + rcu_read_unlock(); + if (!found) + return NOTIFY_DONE; + + neigh_update->min_interval = min_t(unsigned long, + NEIGH_VAR(p, DELAY_PROBE_TIME), + neigh_update->min_interval); + mlx5_fc_update_sampling_interval(priv->mdev, + neigh_update->min_interval); + break; + } + return NOTIFY_DONE; +} + +static const struct rhashtable_params mlx5e_neigh_ht_params = { + .head_offset = offsetof(struct mlx5e_neigh_hash_entry, rhash_node), + .key_offset = offsetof(struct mlx5e_neigh_hash_entry, m_neigh), + .key_len = sizeof(struct mlx5e_neigh), + .automatic_shrinking = true, +}; + +int mlx5e_rep_neigh_init(struct mlx5e_rep_priv *rpriv) +{ + struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update; + int err; + + err = rhashtable_init(&neigh_update->neigh_ht, &mlx5e_neigh_ht_params); + if (err) + goto out_err; + + INIT_LIST_HEAD(&neigh_update->neigh_list); + mutex_init(&neigh_update->encap_lock); + INIT_DELAYED_WORK(&neigh_update->neigh_stats_work, + mlx5e_rep_neigh_stats_work); + mlx5e_rep_neigh_update_init_interval(rpriv); + + neigh_update->netevent_nb.notifier_call = mlx5e_rep_netevent_event; + err = register_netevent_notifier(&neigh_update->netevent_nb); + if (err) + goto out_notifier; + return 0; + +out_notifier: + neigh_update->netevent_nb.notifier_call = NULL; + rhashtable_destroy(&neigh_update->neigh_ht); +out_err: + netdev_warn(rpriv->netdev, + "Failed to initialize neighbours handling for vport %d\n", + rpriv->rep->vport); + return err; +} + +void mlx5e_rep_neigh_cleanup(struct mlx5e_rep_priv *rpriv) +{ + struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update; + struct mlx5e_priv *priv = netdev_priv(rpriv->netdev); + + if (!rpriv->neigh_update.netevent_nb.notifier_call) + return; + + unregister_netevent_notifier(&neigh_update->netevent_nb); + + flush_workqueue(priv->wq); /* flush neigh update works */ + + cancel_delayed_work_sync(&rpriv->neigh_update.neigh_stats_work); + + mutex_destroy(&neigh_update->encap_lock); + rhashtable_destroy(&neigh_update->neigh_ht); +} + +static int mlx5e_rep_neigh_entry_insert(struct mlx5e_priv *priv, + struct mlx5e_neigh_hash_entry *nhe) +{ + struct mlx5e_rep_priv *rpriv = priv->ppriv; + int err; + + err = rhashtable_insert_fast(&rpriv->neigh_update.neigh_ht, + &nhe->rhash_node, + mlx5e_neigh_ht_params); + if (err) + return err; + + list_add_rcu(&nhe->neigh_list, &rpriv->neigh_update.neigh_list); + + return err; +} + +static void mlx5e_rep_neigh_entry_remove(struct mlx5e_neigh_hash_entry *nhe) +{ + struct mlx5e_rep_priv *rpriv = nhe->priv->ppriv; + + mutex_lock(&rpriv->neigh_update.encap_lock); + + list_del_rcu(&nhe->neigh_list); + + rhashtable_remove_fast(&rpriv->neigh_update.neigh_ht, + &nhe->rhash_node, + mlx5e_neigh_ht_params); + mutex_unlock(&rpriv->neigh_update.encap_lock); +} + +/* This function must only be called under the representor's encap_lock or + * inside rcu read lock section. + */ +struct mlx5e_neigh_hash_entry * +mlx5e_rep_neigh_entry_lookup(struct mlx5e_priv *priv, + struct mlx5e_neigh *m_neigh) +{ + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update; + struct mlx5e_neigh_hash_entry *nhe; + + nhe = rhashtable_lookup_fast(&neigh_update->neigh_ht, m_neigh, + mlx5e_neigh_ht_params); + return nhe && mlx5e_rep_neigh_entry_hold(nhe) ? nhe : NULL; +} + +int mlx5e_rep_neigh_entry_create(struct mlx5e_priv *priv, + struct mlx5e_neigh *m_neigh, + struct net_device *neigh_dev, + struct mlx5e_neigh_hash_entry **nhe) +{ + int err; + + *nhe = kzalloc(sizeof(**nhe), GFP_KERNEL); + if (!*nhe) + return -ENOMEM; + + (*nhe)->priv = priv; + memcpy(&(*nhe)->m_neigh, m_neigh, sizeof(*m_neigh)); + spin_lock_init(&(*nhe)->encap_list_lock); + INIT_LIST_HEAD(&(*nhe)->encap_list); + refcount_set(&(*nhe)->refcnt, 1); + WRITE_ONCE((*nhe)->neigh_dev, neigh_dev); + + err = mlx5e_rep_neigh_entry_insert(priv, *nhe); + if (err) + goto out_free; + return 0; + +out_free: + kfree(*nhe); + return err; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.h b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.h new file mode 100644 index 000000000..6fe0ab970 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.h @@ -0,0 +1,35 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2020 Mellanox Technologies. */ + +#ifndef __MLX5_EN_REP_NEIGH__ +#define __MLX5_EN_REP_NEIGH__ + +#include "en.h" +#include "en_rep.h" + +#if IS_ENABLED(CONFIG_MLX5_CLS_ACT) + +int mlx5e_rep_neigh_init(struct mlx5e_rep_priv *rpriv); +void mlx5e_rep_neigh_cleanup(struct mlx5e_rep_priv *rpriv); + +struct mlx5e_neigh_hash_entry * +mlx5e_rep_neigh_entry_lookup(struct mlx5e_priv *priv, + struct mlx5e_neigh *m_neigh); +int mlx5e_rep_neigh_entry_create(struct mlx5e_priv *priv, + struct mlx5e_neigh *m_neigh, + struct net_device *neigh_dev, + struct mlx5e_neigh_hash_entry **nhe); +void mlx5e_rep_neigh_entry_release(struct mlx5e_neigh_hash_entry *nhe); + +void mlx5e_rep_queue_neigh_stats_work(struct mlx5e_priv *priv); + +#else /* CONFIG_MLX5_CLS_ACT */ + +static inline int +mlx5e_rep_neigh_init(struct mlx5e_rep_priv *rpriv) { return 0; } +static inline void +mlx5e_rep_neigh_cleanup(struct mlx5e_rep_priv *rpriv) {} + +#endif /* CONFIG_MLX5_CLS_ACT */ + +#endif /* __MLX5_EN_REP_NEIGH__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c new file mode 100644 index 000000000..fac7e3ff2 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c @@ -0,0 +1,900 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2020 Mellanox Technologies. */ + +#include <net/dst_metadata.h> +#include <linux/netdevice.h> +#include <linux/if_macvlan.h> +#include <linux/list.h> +#include <linux/rculist.h> +#include <linux/rtnetlink.h> +#include <linux/workqueue.h> +#include <linux/spinlock.h> +#include "tc.h" +#include "neigh.h" +#include "en_rep.h" +#include "eswitch.h" +#include "lib/fs_chains.h" +#include "en/tc_ct.h" +#include "en/mapping.h" +#include "en/tc_tun.h" +#include "lib/port_tun.h" +#include "en/tc/sample.h" +#include "en_accel/ipsec_rxtx.h" +#include "en/tc/int_port.h" +#include "en/tc/act/act.h" + +struct mlx5e_rep_indr_block_priv { + struct net_device *netdev; + struct mlx5e_rep_priv *rpriv; + enum flow_block_binder_type binder_type; + + struct list_head list; +}; + +int mlx5e_rep_encap_entry_attach(struct mlx5e_priv *priv, + struct mlx5e_encap_entry *e, + struct mlx5e_neigh *m_neigh, + struct net_device *neigh_dev) +{ + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5_rep_uplink_priv *uplink_priv = &rpriv->uplink_priv; + struct mlx5_tun_entropy *tun_entropy = &uplink_priv->tun_entropy; + struct mlx5e_neigh_hash_entry *nhe; + int err; + + err = mlx5_tun_entropy_refcount_inc(tun_entropy, e->reformat_type); + if (err) + return err; + + mutex_lock(&rpriv->neigh_update.encap_lock); + nhe = mlx5e_rep_neigh_entry_lookup(priv, m_neigh); + if (!nhe) { + err = mlx5e_rep_neigh_entry_create(priv, m_neigh, neigh_dev, &nhe); + if (err) { + mutex_unlock(&rpriv->neigh_update.encap_lock); + mlx5_tun_entropy_refcount_dec(tun_entropy, + e->reformat_type); + return err; + } + } + + e->nhe = nhe; + spin_lock(&nhe->encap_list_lock); + list_add_rcu(&e->encap_list, &nhe->encap_list); + spin_unlock(&nhe->encap_list_lock); + + mutex_unlock(&rpriv->neigh_update.encap_lock); + + return 0; +} + +void mlx5e_rep_encap_entry_detach(struct mlx5e_priv *priv, + struct mlx5e_encap_entry *e) +{ + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5_rep_uplink_priv *uplink_priv = &rpriv->uplink_priv; + struct mlx5_tun_entropy *tun_entropy = &uplink_priv->tun_entropy; + + if (!e->nhe) + return; + + spin_lock(&e->nhe->encap_list_lock); + list_del_rcu(&e->encap_list); + spin_unlock(&e->nhe->encap_list_lock); + + mlx5e_rep_neigh_entry_release(e->nhe); + e->nhe = NULL; + mlx5_tun_entropy_refcount_dec(tun_entropy, e->reformat_type); +} + +void mlx5e_rep_update_flows(struct mlx5e_priv *priv, + struct mlx5e_encap_entry *e, + bool neigh_connected, + unsigned char ha[ETH_ALEN]) +{ + struct ethhdr *eth = (struct ethhdr *)e->encap_header; + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + bool encap_connected; + LIST_HEAD(flow_list); + + ASSERT_RTNL(); + + mutex_lock(&esw->offloads.encap_tbl_lock); + encap_connected = !!(e->flags & MLX5_ENCAP_ENTRY_VALID); + if (encap_connected == neigh_connected && ether_addr_equal(e->h_dest, ha)) + goto unlock; + + mlx5e_take_all_encap_flows(e, &flow_list); + + if ((e->flags & MLX5_ENCAP_ENTRY_VALID) && + (!neigh_connected || !ether_addr_equal(e->h_dest, ha))) + mlx5e_tc_encap_flows_del(priv, e, &flow_list); + + if (neigh_connected && !(e->flags & MLX5_ENCAP_ENTRY_VALID)) { + struct net_device *route_dev; + + ether_addr_copy(e->h_dest, ha); + ether_addr_copy(eth->h_dest, ha); + /* Update the encap source mac, in case that we delete + * the flows when encap source mac changed. + */ + route_dev = __dev_get_by_index(dev_net(priv->netdev), e->route_dev_ifindex); + if (route_dev) + ether_addr_copy(eth->h_source, route_dev->dev_addr); + + mlx5e_tc_encap_flows_add(priv, e, &flow_list); + } +unlock: + mutex_unlock(&esw->offloads.encap_tbl_lock); + mlx5e_put_flow_list(priv, &flow_list); +} + +static int +mlx5e_rep_setup_tc_cls_flower(struct mlx5e_priv *priv, + struct flow_cls_offload *cls_flower, int flags) +{ + switch (cls_flower->command) { + case FLOW_CLS_REPLACE: + return mlx5e_configure_flower(priv->netdev, priv, cls_flower, + flags); + case FLOW_CLS_DESTROY: + return mlx5e_delete_flower(priv->netdev, priv, cls_flower, + flags); + case FLOW_CLS_STATS: + return mlx5e_stats_flower(priv->netdev, priv, cls_flower, + flags); + default: + return -EOPNOTSUPP; + } +} + +static +int mlx5e_rep_setup_tc_cls_matchall(struct mlx5e_priv *priv, + struct tc_cls_matchall_offload *ma) +{ + switch (ma->command) { + case TC_CLSMATCHALL_REPLACE: + return mlx5e_tc_configure_matchall(priv, ma); + case TC_CLSMATCHALL_DESTROY: + return mlx5e_tc_delete_matchall(priv, ma); + case TC_CLSMATCHALL_STATS: + mlx5e_tc_stats_matchall(priv, ma); + return 0; + default: + return -EOPNOTSUPP; + } +} + +static int mlx5e_rep_setup_tc_cb(enum tc_setup_type type, void *type_data, + void *cb_priv) +{ + unsigned long flags = MLX5_TC_FLAG(INGRESS) | MLX5_TC_FLAG(ESW_OFFLOAD); + struct mlx5e_priv *priv = cb_priv; + + if (!priv->netdev || !netif_device_present(priv->netdev)) + return -EOPNOTSUPP; + + switch (type) { + case TC_SETUP_CLSFLOWER: + return mlx5e_rep_setup_tc_cls_flower(priv, type_data, flags); + case TC_SETUP_CLSMATCHALL: + return mlx5e_rep_setup_tc_cls_matchall(priv, type_data); + default: + return -EOPNOTSUPP; + } +} + +static int mlx5e_rep_setup_ft_cb(enum tc_setup_type type, void *type_data, + void *cb_priv) +{ + struct flow_cls_offload tmp, *f = type_data; + struct mlx5e_priv *priv = cb_priv; + struct mlx5_eswitch *esw; + unsigned long flags; + int err; + + flags = MLX5_TC_FLAG(INGRESS) | + MLX5_TC_FLAG(ESW_OFFLOAD) | + MLX5_TC_FLAG(FT_OFFLOAD); + esw = priv->mdev->priv.eswitch; + + switch (type) { + case TC_SETUP_CLSFLOWER: + memcpy(&tmp, f, sizeof(*f)); + + if (!mlx5_chains_prios_supported(esw_chains(esw))) + return -EOPNOTSUPP; + + /* Re-use tc offload path by moving the ft flow to the + * reserved ft chain. + * + * FT offload can use prio range [0, INT_MAX], so we normalize + * it to range [1, mlx5_esw_chains_get_prio_range(esw)] + * as with tc, where prio 0 isn't supported. + * + * We only support chain 0 of FT offload. + */ + if (tmp.common.prio >= mlx5_chains_get_prio_range(esw_chains(esw))) + return -EOPNOTSUPP; + if (tmp.common.chain_index != 0) + return -EOPNOTSUPP; + + tmp.common.chain_index = mlx5_chains_get_nf_ft_chain(esw_chains(esw)); + tmp.common.prio++; + err = mlx5e_rep_setup_tc_cls_flower(priv, &tmp, flags); + memcpy(&f->stats, &tmp.stats, sizeof(f->stats)); + return err; + default: + return -EOPNOTSUPP; + } +} + +static LIST_HEAD(mlx5e_rep_block_tc_cb_list); +static LIST_HEAD(mlx5e_rep_block_ft_cb_list); +int mlx5e_rep_setup_tc(struct net_device *dev, enum tc_setup_type type, + void *type_data) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct flow_block_offload *f = type_data; + + f->unlocked_driver_cb = true; + + switch (type) { + case TC_SETUP_BLOCK: + return flow_block_cb_setup_simple(type_data, + &mlx5e_rep_block_tc_cb_list, + mlx5e_rep_setup_tc_cb, + priv, priv, true); + case TC_SETUP_FT: + return flow_block_cb_setup_simple(type_data, + &mlx5e_rep_block_ft_cb_list, + mlx5e_rep_setup_ft_cb, + priv, priv, true); + default: + return -EOPNOTSUPP; + } +} + +int mlx5e_rep_tc_init(struct mlx5e_rep_priv *rpriv) +{ + struct mlx5_rep_uplink_priv *uplink_priv = &rpriv->uplink_priv; + int err; + + mutex_init(&uplink_priv->unready_flows_lock); + INIT_LIST_HEAD(&uplink_priv->unready_flows); + + /* init shared tc flow table */ + err = mlx5e_tc_esw_init(uplink_priv); + return err; +} + +void mlx5e_rep_tc_cleanup(struct mlx5e_rep_priv *rpriv) +{ + /* delete shared tc flow table */ + mlx5e_tc_esw_cleanup(&rpriv->uplink_priv); + mutex_destroy(&rpriv->uplink_priv.unready_flows_lock); +} + +void mlx5e_rep_tc_enable(struct mlx5e_priv *priv) +{ + struct mlx5e_rep_priv *rpriv = priv->ppriv; + + INIT_WORK(&rpriv->uplink_priv.reoffload_flows_work, + mlx5e_tc_reoffload_flows_work); +} + +void mlx5e_rep_tc_disable(struct mlx5e_priv *priv) +{ + struct mlx5e_rep_priv *rpriv = priv->ppriv; + + cancel_work_sync(&rpriv->uplink_priv.reoffload_flows_work); +} + +int mlx5e_rep_tc_event_port_affinity(struct mlx5e_priv *priv) +{ + struct mlx5e_rep_priv *rpriv = priv->ppriv; + + queue_work(priv->wq, &rpriv->uplink_priv.reoffload_flows_work); + + return NOTIFY_OK; +} + +static struct mlx5e_rep_indr_block_priv * +mlx5e_rep_indr_block_priv_lookup(struct mlx5e_rep_priv *rpriv, + struct net_device *netdev, + enum flow_block_binder_type binder_type) +{ + struct mlx5e_rep_indr_block_priv *cb_priv; + + list_for_each_entry(cb_priv, + &rpriv->uplink_priv.tc_indr_block_priv_list, + list) + if (cb_priv->netdev == netdev && + cb_priv->binder_type == binder_type) + return cb_priv; + + return NULL; +} + +static int +mlx5e_rep_indr_offload(struct net_device *netdev, + struct flow_cls_offload *flower, + struct mlx5e_rep_indr_block_priv *indr_priv, + unsigned long flags) +{ + struct mlx5e_priv *priv = netdev_priv(indr_priv->rpriv->netdev); + int err = 0; + + if (!netif_device_present(indr_priv->rpriv->netdev)) + return -EOPNOTSUPP; + + switch (flower->command) { + case FLOW_CLS_REPLACE: + err = mlx5e_configure_flower(netdev, priv, flower, flags); + break; + case FLOW_CLS_DESTROY: + err = mlx5e_delete_flower(netdev, priv, flower, flags); + break; + case FLOW_CLS_STATS: + err = mlx5e_stats_flower(netdev, priv, flower, flags); + break; + default: + err = -EOPNOTSUPP; + } + + return err; +} + +static int mlx5e_rep_indr_setup_tc_cb(enum tc_setup_type type, + void *type_data, void *indr_priv) +{ + unsigned long flags = MLX5_TC_FLAG(ESW_OFFLOAD); + struct mlx5e_rep_indr_block_priv *priv = indr_priv; + + flags |= (priv->binder_type == FLOW_BLOCK_BINDER_TYPE_CLSACT_EGRESS) ? + MLX5_TC_FLAG(EGRESS) : + MLX5_TC_FLAG(INGRESS); + + switch (type) { + case TC_SETUP_CLSFLOWER: + return mlx5e_rep_indr_offload(priv->netdev, type_data, priv, + flags); + default: + return -EOPNOTSUPP; + } +} + +static int mlx5e_rep_indr_setup_ft_cb(enum tc_setup_type type, + void *type_data, void *indr_priv) +{ + struct mlx5e_rep_indr_block_priv *priv = indr_priv; + struct flow_cls_offload *f = type_data; + struct flow_cls_offload tmp; + struct mlx5e_priv *mpriv; + struct mlx5_eswitch *esw; + unsigned long flags; + int err; + + mpriv = netdev_priv(priv->rpriv->netdev); + esw = mpriv->mdev->priv.eswitch; + + flags = MLX5_TC_FLAG(EGRESS) | + MLX5_TC_FLAG(ESW_OFFLOAD) | + MLX5_TC_FLAG(FT_OFFLOAD); + + switch (type) { + case TC_SETUP_CLSFLOWER: + memcpy(&tmp, f, sizeof(*f)); + + /* Re-use tc offload path by moving the ft flow to the + * reserved ft chain. + * + * FT offload can use prio range [0, INT_MAX], so we normalize + * it to range [1, mlx5_esw_chains_get_prio_range(esw)] + * as with tc, where prio 0 isn't supported. + * + * We only support chain 0 of FT offload. + */ + if (!mlx5_chains_prios_supported(esw_chains(esw)) || + tmp.common.prio >= mlx5_chains_get_prio_range(esw_chains(esw)) || + tmp.common.chain_index) + return -EOPNOTSUPP; + + tmp.common.chain_index = mlx5_chains_get_nf_ft_chain(esw_chains(esw)); + tmp.common.prio++; + err = mlx5e_rep_indr_offload(priv->netdev, &tmp, priv, flags); + memcpy(&f->stats, &tmp.stats, sizeof(f->stats)); + return err; + default: + return -EOPNOTSUPP; + } +} + +static void mlx5e_rep_indr_block_unbind(void *cb_priv) +{ + struct mlx5e_rep_indr_block_priv *indr_priv = cb_priv; + + list_del(&indr_priv->list); + kfree(indr_priv); +} + +static LIST_HEAD(mlx5e_block_cb_list); + +static bool mlx5e_rep_macvlan_mode_supported(const struct net_device *dev) +{ + struct macvlan_dev *macvlan = netdev_priv(dev); + + return macvlan->mode == MACVLAN_MODE_PASSTHRU; +} + +static int +mlx5e_rep_indr_setup_block(struct net_device *netdev, struct Qdisc *sch, + struct mlx5e_rep_priv *rpriv, + struct flow_block_offload *f, + flow_setup_cb_t *setup_cb, + void *data, + void (*cleanup)(struct flow_block_cb *block_cb)) +{ + struct mlx5e_priv *priv = netdev_priv(rpriv->netdev); + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + bool is_ovs_int_port = netif_is_ovs_master(netdev); + struct mlx5e_rep_indr_block_priv *indr_priv; + struct flow_block_cb *block_cb; + + if (!mlx5e_tc_tun_device_to_offload(priv, netdev) && + !(is_vlan_dev(netdev) && vlan_dev_real_dev(netdev) == rpriv->netdev) && + !is_ovs_int_port) { + if (!(netif_is_macvlan(netdev) && macvlan_dev_real_dev(netdev) == rpriv->netdev)) + return -EOPNOTSUPP; + if (!mlx5e_rep_macvlan_mode_supported(netdev)) { + netdev_warn(netdev, "Offloading ingress filter is supported only with macvlan passthru mode"); + return -EOPNOTSUPP; + } + } + + if (f->binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS && + f->binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_EGRESS) + return -EOPNOTSUPP; + + if (f->binder_type == FLOW_BLOCK_BINDER_TYPE_CLSACT_EGRESS && !is_ovs_int_port) + return -EOPNOTSUPP; + + if (is_ovs_int_port && !mlx5e_tc_int_port_supported(esw)) + return -EOPNOTSUPP; + + f->unlocked_driver_cb = true; + f->driver_block_list = &mlx5e_block_cb_list; + + switch (f->command) { + case FLOW_BLOCK_BIND: + indr_priv = mlx5e_rep_indr_block_priv_lookup(rpriv, netdev, f->binder_type); + if (indr_priv) + return -EEXIST; + + indr_priv = kmalloc(sizeof(*indr_priv), GFP_KERNEL); + if (!indr_priv) + return -ENOMEM; + + indr_priv->netdev = netdev; + indr_priv->rpriv = rpriv; + indr_priv->binder_type = f->binder_type; + list_add(&indr_priv->list, + &rpriv->uplink_priv.tc_indr_block_priv_list); + + block_cb = flow_indr_block_cb_alloc(setup_cb, indr_priv, indr_priv, + mlx5e_rep_indr_block_unbind, + f, netdev, sch, data, rpriv, + cleanup); + if (IS_ERR(block_cb)) { + list_del(&indr_priv->list); + kfree(indr_priv); + return PTR_ERR(block_cb); + } + flow_block_cb_add(block_cb, f); + list_add_tail(&block_cb->driver_list, &mlx5e_block_cb_list); + + return 0; + case FLOW_BLOCK_UNBIND: + indr_priv = mlx5e_rep_indr_block_priv_lookup(rpriv, netdev, f->binder_type); + if (!indr_priv) + return -ENOENT; + + block_cb = flow_block_cb_lookup(f->block, setup_cb, indr_priv); + if (!block_cb) + return -ENOENT; + + flow_indr_block_cb_remove(block_cb, f); + list_del(&block_cb->driver_list); + return 0; + default: + return -EOPNOTSUPP; + } + return 0; +} + +static int +mlx5e_rep_indr_replace_act(struct mlx5e_rep_priv *rpriv, + struct flow_offload_action *fl_act) + +{ + struct mlx5e_priv *priv = netdev_priv(rpriv->netdev); + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + enum mlx5_flow_namespace_type ns_type; + struct flow_action_entry *action; + struct mlx5e_tc_act *act; + bool add = false; + int i; + + /* There is no use case currently for more than one action (e.g. pedit). + * when there will be, need to handle cleaning multiple actions on err. + */ + if (!flow_offload_has_one_action(&fl_act->action)) + return -EOPNOTSUPP; + + if (esw && esw->mode == MLX5_ESWITCH_OFFLOADS) + ns_type = MLX5_FLOW_NAMESPACE_FDB; + else + ns_type = MLX5_FLOW_NAMESPACE_KERNEL; + + flow_action_for_each(i, action, &fl_act->action) { + act = mlx5e_tc_act_get(action->id, ns_type); + if (!act) + continue; + + if (!act->offload_action) + continue; + + if (!act->offload_action(priv, fl_act, action)) + add = true; + } + + return add ? 0 : -EOPNOTSUPP; +} + +static int +mlx5e_rep_indr_destroy_act(struct mlx5e_rep_priv *rpriv, + struct flow_offload_action *fl_act) +{ + struct mlx5e_priv *priv = netdev_priv(rpriv->netdev); + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + enum mlx5_flow_namespace_type ns_type; + struct mlx5e_tc_act *act; + + if (esw && esw->mode == MLX5_ESWITCH_OFFLOADS) + ns_type = MLX5_FLOW_NAMESPACE_FDB; + else + ns_type = MLX5_FLOW_NAMESPACE_KERNEL; + + act = mlx5e_tc_act_get(fl_act->id, ns_type); + if (!act || !act->destroy_action) + return -EOPNOTSUPP; + + return act->destroy_action(priv, fl_act); +} + +static int +mlx5e_rep_indr_stats_act(struct mlx5e_rep_priv *rpriv, + struct flow_offload_action *fl_act) + +{ + struct mlx5e_priv *priv = netdev_priv(rpriv->netdev); + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + enum mlx5_flow_namespace_type ns_type; + struct mlx5e_tc_act *act; + + if (esw && esw->mode == MLX5_ESWITCH_OFFLOADS) + ns_type = MLX5_FLOW_NAMESPACE_FDB; + else + ns_type = MLX5_FLOW_NAMESPACE_KERNEL; + + act = mlx5e_tc_act_get(fl_act->id, ns_type); + if (!act || !act->stats_action) + return -EOPNOTSUPP; + + return act->stats_action(priv, fl_act); +} + +static int +mlx5e_rep_indr_setup_act(struct mlx5e_rep_priv *rpriv, + struct flow_offload_action *fl_act) +{ + switch (fl_act->command) { + case FLOW_ACT_REPLACE: + return mlx5e_rep_indr_replace_act(rpriv, fl_act); + case FLOW_ACT_DESTROY: + return mlx5e_rep_indr_destroy_act(rpriv, fl_act); + case FLOW_ACT_STATS: + return mlx5e_rep_indr_stats_act(rpriv, fl_act); + default: + return -EOPNOTSUPP; + } +} + +static int +mlx5e_rep_indr_no_dev_setup(struct mlx5e_rep_priv *rpriv, + enum tc_setup_type type, + void *data) +{ + if (!data) + return -EOPNOTSUPP; + + switch (type) { + case TC_SETUP_ACT: + return mlx5e_rep_indr_setup_act(rpriv, data); + default: + return -EOPNOTSUPP; + } +} + +static +int mlx5e_rep_indr_setup_cb(struct net_device *netdev, struct Qdisc *sch, void *cb_priv, + enum tc_setup_type type, void *type_data, + void *data, + void (*cleanup)(struct flow_block_cb *block_cb)) +{ + if (!netdev) + return mlx5e_rep_indr_no_dev_setup(cb_priv, type, data); + + switch (type) { + case TC_SETUP_BLOCK: + return mlx5e_rep_indr_setup_block(netdev, sch, cb_priv, type_data, + mlx5e_rep_indr_setup_tc_cb, + data, cleanup); + case TC_SETUP_FT: + return mlx5e_rep_indr_setup_block(netdev, sch, cb_priv, type_data, + mlx5e_rep_indr_setup_ft_cb, + data, cleanup); + default: + return -EOPNOTSUPP; + } +} + +int mlx5e_rep_tc_netdevice_event_register(struct mlx5e_rep_priv *rpriv) +{ + struct mlx5_rep_uplink_priv *uplink_priv = &rpriv->uplink_priv; + + /* init indirect block notifications */ + INIT_LIST_HEAD(&uplink_priv->tc_indr_block_priv_list); + + return flow_indr_dev_register(mlx5e_rep_indr_setup_cb, rpriv); +} + +void mlx5e_rep_tc_netdevice_event_unregister(struct mlx5e_rep_priv *rpriv) +{ + flow_indr_dev_unregister(mlx5e_rep_indr_setup_cb, rpriv, + mlx5e_rep_indr_block_unbind); +} + +static bool mlx5e_restore_tunnel(struct mlx5e_priv *priv, struct sk_buff *skb, + struct mlx5e_tc_update_priv *tc_priv, + u32 tunnel_id) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct tunnel_match_enc_opts enc_opts = {}; + struct mlx5_rep_uplink_priv *uplink_priv; + struct mlx5e_rep_priv *uplink_rpriv; + struct metadata_dst *tun_dst; + struct tunnel_match_key key; + u32 tun_id, enc_opts_id; + struct net_device *dev; + int err; + + enc_opts_id = tunnel_id & ENC_OPTS_BITS_MASK; + tun_id = tunnel_id >> ENC_OPTS_BITS; + + if (!tun_id) + return true; + + uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); + uplink_priv = &uplink_rpriv->uplink_priv; + + err = mapping_find(uplink_priv->tunnel_mapping, tun_id, &key); + if (err) { + WARN_ON_ONCE(true); + netdev_dbg(priv->netdev, + "Couldn't find tunnel for tun_id: %d, err: %d\n", + tun_id, err); + return false; + } + + if (enc_opts_id) { + err = mapping_find(uplink_priv->tunnel_enc_opts_mapping, + enc_opts_id, &enc_opts); + if (err) { + netdev_dbg(priv->netdev, + "Couldn't find tunnel (opts) for tun_id: %d, err: %d\n", + enc_opts_id, err); + return false; + } + } + + if (key.enc_control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) { + tun_dst = __ip_tun_set_dst(key.enc_ipv4.src, key.enc_ipv4.dst, + key.enc_ip.tos, key.enc_ip.ttl, + key.enc_tp.dst, TUNNEL_KEY, + key32_to_tunnel_id(key.enc_key_id.keyid), + enc_opts.key.len); + } else if (key.enc_control.addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) { + tun_dst = __ipv6_tun_set_dst(&key.enc_ipv6.src, &key.enc_ipv6.dst, + key.enc_ip.tos, key.enc_ip.ttl, + key.enc_tp.dst, 0, TUNNEL_KEY, + key32_to_tunnel_id(key.enc_key_id.keyid), + enc_opts.key.len); + } else { + netdev_dbg(priv->netdev, + "Couldn't restore tunnel, unsupported addr_type: %d\n", + key.enc_control.addr_type); + return false; + } + + if (!tun_dst) { + netdev_dbg(priv->netdev, "Couldn't restore tunnel, no tun_dst\n"); + return false; + } + + tun_dst->u.tun_info.key.tp_src = key.enc_tp.src; + + if (enc_opts.key.len) + ip_tunnel_info_opts_set(&tun_dst->u.tun_info, + enc_opts.key.data, + enc_opts.key.len, + enc_opts.key.dst_opt_type); + + skb_dst_set(skb, (struct dst_entry *)tun_dst); + dev = dev_get_by_index(&init_net, key.filter_ifindex); + if (!dev) { + netdev_dbg(priv->netdev, + "Couldn't find tunnel device with ifindex: %d\n", + key.filter_ifindex); + return false; + } + + /* Set fwd_dev so we do dev_put() after datapath */ + tc_priv->fwd_dev = dev; + + skb->dev = dev; + + return true; +} + +static bool mlx5e_restore_skb_chain(struct sk_buff *skb, u32 chain, u32 reg_c1, + struct mlx5e_tc_update_priv *tc_priv) +{ + struct mlx5e_priv *priv = netdev_priv(skb->dev); + u32 tunnel_id = (reg_c1 >> ESW_TUN_OFFSET) & TUNNEL_ID_MASK; + +#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT) + if (chain) { + struct mlx5_rep_uplink_priv *uplink_priv; + struct mlx5e_rep_priv *uplink_rpriv; + struct tc_skb_ext *tc_skb_ext; + struct mlx5_eswitch *esw; + u32 zone_restore_id; + + tc_skb_ext = tc_skb_ext_alloc(skb); + if (!tc_skb_ext) { + WARN_ON(1); + return false; + } + tc_skb_ext->chain = chain; + zone_restore_id = reg_c1 & ESW_ZONE_ID_MASK; + esw = priv->mdev->priv.eswitch; + uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); + uplink_priv = &uplink_rpriv->uplink_priv; + if (!mlx5e_tc_ct_restore_flow(uplink_priv->ct_priv, skb, + zone_restore_id)) + return false; + } +#endif /* CONFIG_NET_TC_SKB_EXT */ + + return mlx5e_restore_tunnel(priv, skb, tc_priv, tunnel_id); +} + +static void mlx5_rep_tc_post_napi_receive(struct mlx5e_tc_update_priv *tc_priv) +{ + if (tc_priv->fwd_dev) + dev_put(tc_priv->fwd_dev); +} + +static void mlx5e_restore_skb_sample(struct mlx5e_priv *priv, struct sk_buff *skb, + struct mlx5_mapped_obj *mapped_obj, + struct mlx5e_tc_update_priv *tc_priv) +{ + if (!mlx5e_restore_tunnel(priv, skb, tc_priv, mapped_obj->sample.tunnel_id)) { + netdev_dbg(priv->netdev, + "Failed to restore tunnel info for sampled packet\n"); + return; + } + mlx5e_tc_sample_skb(skb, mapped_obj); + mlx5_rep_tc_post_napi_receive(tc_priv); +} + +static bool mlx5e_restore_skb_int_port(struct mlx5e_priv *priv, struct sk_buff *skb, + struct mlx5_mapped_obj *mapped_obj, + struct mlx5e_tc_update_priv *tc_priv, + bool *forward_tx, + u32 reg_c1) +{ + u32 tunnel_id = (reg_c1 >> ESW_TUN_OFFSET) & TUNNEL_ID_MASK; + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5_rep_uplink_priv *uplink_priv; + struct mlx5e_rep_priv *uplink_rpriv; + + /* Tunnel restore takes precedence over int port restore */ + if (tunnel_id) + return mlx5e_restore_tunnel(priv, skb, tc_priv, tunnel_id); + + uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); + uplink_priv = &uplink_rpriv->uplink_priv; + + if (mlx5e_tc_int_port_dev_fwd(uplink_priv->int_port_priv, skb, + mapped_obj->int_port_metadata, forward_tx)) { + /* Set fwd_dev for future dev_put */ + tc_priv->fwd_dev = skb->dev; + + return true; + } + + return false; +} + +void mlx5e_rep_tc_receive(struct mlx5_cqe64 *cqe, struct mlx5e_rq *rq, + struct sk_buff *skb) +{ + u32 reg_c1 = be32_to_cpu(cqe->ft_metadata); + struct mlx5e_tc_update_priv tc_priv = {}; + struct mlx5_mapped_obj mapped_obj; + struct mlx5_eswitch *esw; + bool forward_tx = false; + struct mlx5e_priv *priv; + u32 reg_c0; + int err; + + reg_c0 = (be32_to_cpu(cqe->sop_drop_qpn) & MLX5E_TC_FLOW_ID_MASK); + if (!reg_c0 || reg_c0 == MLX5_FS_DEFAULT_FLOW_TAG) + goto forward; + + /* If reg_c0 is not equal to the default flow tag then skb->mark + * is not supported and must be reset back to 0. + */ + skb->mark = 0; + + priv = netdev_priv(skb->dev); + esw = priv->mdev->priv.eswitch; + err = mapping_find(esw->offloads.reg_c0_obj_pool, reg_c0, &mapped_obj); + if (err) { + netdev_dbg(priv->netdev, + "Couldn't find mapped object for reg_c0: %d, err: %d\n", + reg_c0, err); + goto free_skb; + } + + if (mapped_obj.type == MLX5_MAPPED_OBJ_CHAIN) { + if (!mlx5e_restore_skb_chain(skb, mapped_obj.chain, reg_c1, &tc_priv) && + !mlx5_ipsec_is_rx_flow(cqe)) + goto free_skb; + } else if (mapped_obj.type == MLX5_MAPPED_OBJ_SAMPLE) { + mlx5e_restore_skb_sample(priv, skb, &mapped_obj, &tc_priv); + goto free_skb; + } else if (mapped_obj.type == MLX5_MAPPED_OBJ_INT_PORT_METADATA) { + if (!mlx5e_restore_skb_int_port(priv, skb, &mapped_obj, &tc_priv, + &forward_tx, reg_c1)) + goto free_skb; + } else { + netdev_dbg(priv->netdev, "Invalid mapped object type: %d\n", mapped_obj.type); + goto free_skb; + } + +forward: + if (forward_tx) + dev_queue_xmit(skb); + else + napi_gro_receive(rq->cq.napi, skb); + + mlx5_rep_tc_post_napi_receive(&tc_priv); + + return; + +free_skb: + dev_kfree_skb_any(skb); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.h b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.h new file mode 100644 index 000000000..7c9dd3a75 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.h @@ -0,0 +1,73 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2020 Mellanox Technologies. */ + +#ifndef __MLX5_EN_REP_TC_H__ +#define __MLX5_EN_REP_TC_H__ + +#include <linux/skbuff.h> +#include "en_tc.h" +#include "en_rep.h" + +#if IS_ENABLED(CONFIG_MLX5_CLS_ACT) + +int mlx5e_rep_tc_init(struct mlx5e_rep_priv *rpriv); +void mlx5e_rep_tc_cleanup(struct mlx5e_rep_priv *rpriv); + +int mlx5e_rep_tc_netdevice_event_register(struct mlx5e_rep_priv *rpriv); +void mlx5e_rep_tc_netdevice_event_unregister(struct mlx5e_rep_priv *rpriv); + +void mlx5e_rep_tc_enable(struct mlx5e_priv *priv); +void mlx5e_rep_tc_disable(struct mlx5e_priv *priv); + +int mlx5e_rep_tc_event_port_affinity(struct mlx5e_priv *priv); + +void mlx5e_rep_update_flows(struct mlx5e_priv *priv, + struct mlx5e_encap_entry *e, + bool neigh_connected, + unsigned char ha[ETH_ALEN]); + +int mlx5e_rep_encap_entry_attach(struct mlx5e_priv *priv, + struct mlx5e_encap_entry *e, + struct mlx5e_neigh *m_neigh, + struct net_device *neigh_dev); +void mlx5e_rep_encap_entry_detach(struct mlx5e_priv *priv, + struct mlx5e_encap_entry *e); + +int mlx5e_rep_setup_tc(struct net_device *dev, enum tc_setup_type type, + void *type_data); + +void mlx5e_rep_tc_receive(struct mlx5_cqe64 *cqe, struct mlx5e_rq *rq, + struct sk_buff *skb); + +#else /* CONFIG_MLX5_CLS_ACT */ + +struct mlx5e_rep_priv; +static inline int +mlx5e_rep_tc_init(struct mlx5e_rep_priv *rpriv) { return 0; } +static inline void +mlx5e_rep_tc_cleanup(struct mlx5e_rep_priv *rpriv) {} + +static inline int +mlx5e_rep_tc_netdevice_event_register(struct mlx5e_rep_priv *rpriv) { return 0; } +static inline void +mlx5e_rep_tc_netdevice_event_unregister(struct mlx5e_rep_priv *rpriv) {} + +static inline void +mlx5e_rep_tc_enable(struct mlx5e_priv *priv) {} +static inline void +mlx5e_rep_tc_disable(struct mlx5e_priv *priv) {} + +static inline int +mlx5e_rep_tc_event_port_affinity(struct mlx5e_priv *priv) { return NOTIFY_DONE; } + +static inline int +mlx5e_rep_setup_tc(struct net_device *dev, enum tc_setup_type type, + void *type_data) { return -EOPNOTSUPP; } + +static inline void +mlx5e_rep_tc_receive(struct mlx5_cqe64 *cqe, struct mlx5e_rq *rq, + struct sk_buff *skb) { napi_gro_receive(rq->cq.napi, skb); } + +#endif /* CONFIG_MLX5_CLS_ACT */ + +#endif /* __MLX5_EN_REP_TC_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c new file mode 100644 index 000000000..9b1f1369a --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c @@ -0,0 +1,759 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2019 Mellanox Technologies. + +#include "health.h" +#include "params.h" +#include "txrx.h" +#include "devlink.h" +#include "ptp.h" +#include "lib/tout.h" + +static int mlx5e_query_rq_state(struct mlx5_core_dev *dev, u32 rqn, u8 *state) +{ + int outlen = MLX5_ST_SZ_BYTES(query_rq_out); + void *out; + void *rqc; + int err; + + out = kvzalloc(outlen, GFP_KERNEL); + if (!out) + return -ENOMEM; + + err = mlx5_core_query_rq(dev, rqn, out); + if (err) + goto out; + + rqc = MLX5_ADDR_OF(query_rq_out, out, rq_context); + *state = MLX5_GET(rqc, rqc, state); + +out: + kvfree(out); + return err; +} + +static int mlx5e_wait_for_icosq_flush(struct mlx5e_icosq *icosq) +{ + struct mlx5_core_dev *dev = icosq->channel->mdev; + unsigned long exp_time; + + exp_time = jiffies + msecs_to_jiffies(mlx5_tout_ms(dev, FLUSH_ON_ERROR)); + + while (time_before(jiffies, exp_time)) { + if (icosq->cc == icosq->pc) + return 0; + + msleep(20); + } + + netdev_err(icosq->channel->netdev, + "Wait for ICOSQ 0x%x flush timeout (cc = 0x%x, pc = 0x%x)\n", + icosq->sqn, icosq->cc, icosq->pc); + + return -ETIMEDOUT; +} + +static void mlx5e_reset_icosq_cc_pc(struct mlx5e_icosq *icosq) +{ + WARN_ONCE(icosq->cc != icosq->pc, "ICOSQ 0x%x: cc (0x%x) != pc (0x%x)\n", + icosq->sqn, icosq->cc, icosq->pc); + icosq->cc = 0; + icosq->pc = 0; +} + +static int mlx5e_rx_reporter_err_icosq_cqe_recover(void *ctx) +{ + struct mlx5e_rq *xskrq = NULL; + struct mlx5_core_dev *mdev; + struct mlx5e_icosq *icosq; + struct net_device *dev; + struct mlx5e_rq *rq; + u8 state; + int err; + + icosq = ctx; + + mutex_lock(&icosq->channel->icosq_recovery_lock); + + /* mlx5e_close_rq cancels this work before RQ and ICOSQ are killed. */ + rq = &icosq->channel->rq; + if (test_bit(MLX5E_RQ_STATE_ENABLED, &icosq->channel->xskrq.state)) + xskrq = &icosq->channel->xskrq; + mdev = icosq->channel->mdev; + dev = icosq->channel->netdev; + err = mlx5_core_query_sq_state(mdev, icosq->sqn, &state); + if (err) { + netdev_err(dev, "Failed to query ICOSQ 0x%x state. err = %d\n", + icosq->sqn, err); + goto out; + } + + if (state != MLX5_SQC_STATE_ERR) + goto out; + + mlx5e_deactivate_rq(rq); + if (xskrq) + mlx5e_deactivate_rq(xskrq); + + err = mlx5e_wait_for_icosq_flush(icosq); + if (err) + goto out; + + mlx5e_deactivate_icosq(icosq); + + /* At this point, both the rq and the icosq are disabled */ + + err = mlx5e_health_sq_to_ready(mdev, dev, icosq->sqn); + if (err) + goto out; + + mlx5e_reset_icosq_cc_pc(icosq); + + mlx5e_free_rx_in_progress_descs(rq); + if (xskrq) + mlx5e_free_rx_in_progress_descs(xskrq); + + clear_bit(MLX5E_SQ_STATE_RECOVERING, &icosq->state); + mlx5e_activate_icosq(icosq); + + mlx5e_activate_rq(rq); + rq->stats->recover++; + + if (xskrq) { + mlx5e_activate_rq(xskrq); + xskrq->stats->recover++; + } + + mlx5e_trigger_napi_icosq(icosq->channel); + + mutex_unlock(&icosq->channel->icosq_recovery_lock); + + return 0; +out: + clear_bit(MLX5E_SQ_STATE_RECOVERING, &icosq->state); + mutex_unlock(&icosq->channel->icosq_recovery_lock); + return err; +} + +static int mlx5e_rx_reporter_err_rq_cqe_recover(void *ctx) +{ + struct mlx5e_rq *rq = ctx; + int err; + + mlx5e_deactivate_rq(rq); + err = mlx5e_flush_rq(rq, MLX5_RQC_STATE_ERR); + clear_bit(MLX5E_RQ_STATE_RECOVERING, &rq->state); + if (err) + return err; + + mlx5e_activate_rq(rq); + rq->stats->recover++; + if (rq->channel) + mlx5e_trigger_napi_icosq(rq->channel); + else + mlx5e_trigger_napi_sched(rq->cq.napi); + return 0; +} + +static int mlx5e_rx_reporter_timeout_recover(void *ctx) +{ + struct mlx5_eq_comp *eq; + struct mlx5e_rq *rq; + int err; + + rq = ctx; + eq = rq->cq.mcq.eq; + + err = mlx5e_health_channel_eq_recover(rq->netdev, eq, rq->cq.ch_stats); + if (err && rq->icosq) + clear_bit(MLX5E_SQ_STATE_ENABLED, &rq->icosq->state); + + return err; +} + +static int mlx5e_rx_reporter_recover_from_ctx(struct mlx5e_err_ctx *err_ctx) +{ + return err_ctx->recover(err_ctx->ctx); +} + +static int mlx5e_rx_reporter_recover(struct devlink_health_reporter *reporter, + void *context, + struct netlink_ext_ack *extack) +{ + struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter); + struct mlx5e_err_ctx *err_ctx = context; + + return err_ctx ? mlx5e_rx_reporter_recover_from_ctx(err_ctx) : + mlx5e_health_recover_channels(priv); +} + +static int mlx5e_reporter_icosq_diagnose(struct mlx5e_icosq *icosq, u8 hw_state, + struct devlink_fmsg *fmsg) +{ + int err; + + err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "ICOSQ"); + if (err) + return err; + + err = devlink_fmsg_u32_pair_put(fmsg, "sqn", icosq->sqn); + if (err) + return err; + + err = devlink_fmsg_u8_pair_put(fmsg, "HW state", hw_state); + if (err) + return err; + + err = devlink_fmsg_u32_pair_put(fmsg, "cc", icosq->cc); + if (err) + return err; + + err = devlink_fmsg_u32_pair_put(fmsg, "pc", icosq->pc); + if (err) + return err; + + err = devlink_fmsg_u32_pair_put(fmsg, "WQE size", + mlx5_wq_cyc_get_size(&icosq->wq)); + if (err) + return err; + + err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "CQ"); + if (err) + return err; + + err = devlink_fmsg_u32_pair_put(fmsg, "cqn", icosq->cq.mcq.cqn); + if (err) + return err; + + err = devlink_fmsg_u32_pair_put(fmsg, "cc", icosq->cq.wq.cc); + if (err) + return err; + + err = devlink_fmsg_u32_pair_put(fmsg, "size", mlx5_cqwq_get_size(&icosq->cq.wq)); + if (err) + return err; + + err = mlx5e_health_fmsg_named_obj_nest_end(fmsg); + if (err) + return err; + + return mlx5e_health_fmsg_named_obj_nest_end(fmsg); +} + +static int +mlx5e_rx_reporter_build_diagnose_output_rq_common(struct mlx5e_rq *rq, + struct devlink_fmsg *fmsg) +{ + u16 wqe_counter; + int wqes_sz; + u8 hw_state; + u16 wq_head; + int err; + + err = mlx5e_query_rq_state(rq->mdev, rq->rqn, &hw_state); + if (err) + return err; + + wqes_sz = mlx5e_rqwq_get_cur_sz(rq); + wq_head = mlx5e_rqwq_get_head(rq); + wqe_counter = mlx5e_rqwq_get_wqe_counter(rq); + + err = devlink_fmsg_u32_pair_put(fmsg, "rqn", rq->rqn); + if (err) + return err; + + err = devlink_fmsg_u8_pair_put(fmsg, "HW state", hw_state); + if (err) + return err; + + err = devlink_fmsg_u8_pair_put(fmsg, "SW state", rq->state); + if (err) + return err; + + err = devlink_fmsg_u32_pair_put(fmsg, "WQE counter", wqe_counter); + if (err) + return err; + + err = devlink_fmsg_u32_pair_put(fmsg, "posted WQEs", wqes_sz); + if (err) + return err; + + err = devlink_fmsg_u32_pair_put(fmsg, "cc", wq_head); + if (err) + return err; + + err = mlx5e_health_cq_diag_fmsg(&rq->cq, fmsg); + if (err) + return err; + + err = mlx5e_health_eq_diag_fmsg(rq->cq.mcq.eq, fmsg); + if (err) + return err; + + if (rq->icosq) { + struct mlx5e_icosq *icosq = rq->icosq; + u8 icosq_hw_state; + + err = mlx5_core_query_sq_state(rq->mdev, icosq->sqn, &icosq_hw_state); + if (err) + return err; + + err = mlx5e_reporter_icosq_diagnose(icosq, icosq_hw_state, fmsg); + if (err) + return err; + } + + return 0; +} + +static int mlx5e_rx_reporter_build_diagnose_output(struct mlx5e_rq *rq, + struct devlink_fmsg *fmsg) +{ + int err; + + err = devlink_fmsg_obj_nest_start(fmsg); + if (err) + return err; + + err = devlink_fmsg_u32_pair_put(fmsg, "channel ix", rq->ix); + if (err) + return err; + + err = mlx5e_rx_reporter_build_diagnose_output_rq_common(rq, fmsg); + if (err) + return err; + + return devlink_fmsg_obj_nest_end(fmsg); +} + +static int mlx5e_rx_reporter_diagnose_generic_rq(struct mlx5e_rq *rq, + struct devlink_fmsg *fmsg) +{ + struct mlx5e_priv *priv = rq->priv; + struct mlx5e_params *params; + u32 rq_stride, rq_sz; + bool real_time; + int err; + + params = &priv->channels.params; + rq_sz = mlx5e_rqwq_get_size(rq); + real_time = mlx5_is_real_time_rq(priv->mdev); + rq_stride = BIT(mlx5e_mpwqe_get_log_stride_size(priv->mdev, params, NULL)); + + err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "RQ"); + if (err) + return err; + + err = devlink_fmsg_u8_pair_put(fmsg, "type", params->rq_wq_type); + if (err) + return err; + + err = devlink_fmsg_u64_pair_put(fmsg, "stride size", rq_stride); + if (err) + return err; + + err = devlink_fmsg_u32_pair_put(fmsg, "size", rq_sz); + if (err) + return err; + + err = devlink_fmsg_string_pair_put(fmsg, "ts_format", real_time ? "RT" : "FRC"); + if (err) + return err; + + err = mlx5e_health_cq_common_diag_fmsg(&rq->cq, fmsg); + if (err) + return err; + + return mlx5e_health_fmsg_named_obj_nest_end(fmsg); +} + +static int +mlx5e_rx_reporter_diagnose_common_ptp_config(struct mlx5e_priv *priv, struct mlx5e_ptp *ptp_ch, + struct devlink_fmsg *fmsg) +{ + int err; + + err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "PTP"); + if (err) + return err; + + err = devlink_fmsg_u32_pair_put(fmsg, "filter_type", priv->tstamp.rx_filter); + if (err) + return err; + + err = mlx5e_rx_reporter_diagnose_generic_rq(&ptp_ch->rq, fmsg); + if (err) + return err; + + return mlx5e_health_fmsg_named_obj_nest_end(fmsg); +} + +static int +mlx5e_rx_reporter_diagnose_common_config(struct devlink_health_reporter *reporter, + struct devlink_fmsg *fmsg) +{ + struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter); + struct mlx5e_rq *generic_rq = &priv->channels.c[0]->rq; + struct mlx5e_ptp *ptp_ch = priv->channels.ptp; + int err; + + err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "Common config"); + if (err) + return err; + + err = mlx5e_rx_reporter_diagnose_generic_rq(generic_rq, fmsg); + if (err) + return err; + + if (ptp_ch && test_bit(MLX5E_PTP_STATE_RX, ptp_ch->state)) { + err = mlx5e_rx_reporter_diagnose_common_ptp_config(priv, ptp_ch, fmsg); + if (err) + return err; + } + + return mlx5e_health_fmsg_named_obj_nest_end(fmsg); +} + +static int mlx5e_rx_reporter_build_diagnose_output_ptp_rq(struct mlx5e_rq *rq, + struct devlink_fmsg *fmsg) +{ + int err; + + err = devlink_fmsg_obj_nest_start(fmsg); + if (err) + return err; + + err = devlink_fmsg_string_pair_put(fmsg, "channel", "ptp"); + if (err) + return err; + + err = mlx5e_rx_reporter_build_diagnose_output_rq_common(rq, fmsg); + if (err) + return err; + + err = devlink_fmsg_obj_nest_end(fmsg); + if (err) + return err; + + return 0; +} + +static int mlx5e_rx_reporter_diagnose(struct devlink_health_reporter *reporter, + struct devlink_fmsg *fmsg, + struct netlink_ext_ack *extack) +{ + struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter); + struct mlx5e_ptp *ptp_ch = priv->channels.ptp; + int i, err = 0; + + mutex_lock(&priv->state_lock); + + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) + goto unlock; + + err = mlx5e_rx_reporter_diagnose_common_config(reporter, fmsg); + if (err) + goto unlock; + + err = devlink_fmsg_arr_pair_nest_start(fmsg, "RQs"); + if (err) + goto unlock; + + for (i = 0; i < priv->channels.num; i++) { + struct mlx5e_channel *c = priv->channels.c[i]; + struct mlx5e_rq *rq; + + rq = test_bit(MLX5E_CHANNEL_STATE_XSK, c->state) ? + &c->xskrq : &c->rq; + + err = mlx5e_rx_reporter_build_diagnose_output(rq, fmsg); + if (err) + goto unlock; + } + if (ptp_ch && test_bit(MLX5E_PTP_STATE_RX, ptp_ch->state)) { + err = mlx5e_rx_reporter_build_diagnose_output_ptp_rq(&ptp_ch->rq, fmsg); + if (err) + goto unlock; + } + err = devlink_fmsg_arr_pair_nest_end(fmsg); +unlock: + mutex_unlock(&priv->state_lock); + return err; +} + +static int mlx5e_rx_reporter_dump_icosq(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg, + void *ctx) +{ + struct mlx5e_txqsq *icosq = ctx; + struct mlx5_rsc_key key = {}; + int err; + + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) + return 0; + + err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "SX Slice"); + if (err) + return err; + + key.size = PAGE_SIZE; + key.rsc = MLX5_SGMT_TYPE_SX_SLICE_ALL; + err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg); + if (err) + return err; + + err = mlx5e_health_fmsg_named_obj_nest_end(fmsg); + if (err) + return err; + + err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "ICOSQ"); + if (err) + return err; + + err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "QPC"); + if (err) + return err; + + key.rsc = MLX5_SGMT_TYPE_FULL_QPC; + key.index1 = icosq->sqn; + key.num_of_obj1 = 1; + + err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg); + if (err) + return err; + + err = mlx5e_health_fmsg_named_obj_nest_end(fmsg); + if (err) + return err; + + err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "send_buff"); + if (err) + return err; + + key.rsc = MLX5_SGMT_TYPE_SND_BUFF; + key.num_of_obj2 = MLX5_RSC_DUMP_ALL; + + err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg); + if (err) + return err; + + err = mlx5e_health_fmsg_named_obj_nest_end(fmsg); + if (err) + return err; + + return mlx5e_health_fmsg_named_obj_nest_end(fmsg); +} + +static int mlx5e_rx_reporter_dump_rq(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg, + void *ctx) +{ + struct mlx5_rsc_key key = {}; + struct mlx5e_rq *rq = ctx; + int err; + + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) + return 0; + + err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "RX Slice"); + if (err) + return err; + + key.size = PAGE_SIZE; + key.rsc = MLX5_SGMT_TYPE_RX_SLICE_ALL; + err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg); + if (err) + return err; + + err = mlx5e_health_fmsg_named_obj_nest_end(fmsg); + if (err) + return err; + + err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "RQ"); + if (err) + return err; + + err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "QPC"); + if (err) + return err; + + key.rsc = MLX5_SGMT_TYPE_FULL_QPC; + key.index1 = rq->rqn; + key.num_of_obj1 = 1; + + err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg); + if (err) + return err; + + err = mlx5e_health_fmsg_named_obj_nest_end(fmsg); + if (err) + return err; + + err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "receive_buff"); + if (err) + return err; + + key.rsc = MLX5_SGMT_TYPE_RCV_BUFF; + key.num_of_obj2 = MLX5_RSC_DUMP_ALL; + err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg); + if (err) + return err; + + err = mlx5e_health_fmsg_named_obj_nest_end(fmsg); + if (err) + return err; + + return mlx5e_health_fmsg_named_obj_nest_end(fmsg); +} + +static int mlx5e_rx_reporter_dump_all_rqs(struct mlx5e_priv *priv, + struct devlink_fmsg *fmsg) +{ + struct mlx5e_ptp *ptp_ch = priv->channels.ptp; + struct mlx5_rsc_key key = {}; + int i, err; + + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) + return 0; + + err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "RX Slice"); + if (err) + return err; + + key.size = PAGE_SIZE; + key.rsc = MLX5_SGMT_TYPE_RX_SLICE_ALL; + err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg); + if (err) + return err; + + err = mlx5e_health_fmsg_named_obj_nest_end(fmsg); + if (err) + return err; + + err = devlink_fmsg_arr_pair_nest_start(fmsg, "RQs"); + if (err) + return err; + + for (i = 0; i < priv->channels.num; i++) { + struct mlx5e_rq *rq = &priv->channels.c[i]->rq; + + err = mlx5e_health_queue_dump(priv, fmsg, rq->rqn, "RQ"); + if (err) + return err; + } + + if (ptp_ch && test_bit(MLX5E_PTP_STATE_RX, ptp_ch->state)) { + err = mlx5e_health_queue_dump(priv, fmsg, ptp_ch->rq.rqn, "PTP RQ"); + if (err) + return err; + } + + return devlink_fmsg_arr_pair_nest_end(fmsg); +} + +static int mlx5e_rx_reporter_dump_from_ctx(struct mlx5e_priv *priv, + struct mlx5e_err_ctx *err_ctx, + struct devlink_fmsg *fmsg) +{ + return err_ctx->dump(priv, fmsg, err_ctx->ctx); +} + +static int mlx5e_rx_reporter_dump(struct devlink_health_reporter *reporter, + struct devlink_fmsg *fmsg, void *context, + struct netlink_ext_ack *extack) +{ + struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter); + struct mlx5e_err_ctx *err_ctx = context; + + return err_ctx ? mlx5e_rx_reporter_dump_from_ctx(priv, err_ctx, fmsg) : + mlx5e_rx_reporter_dump_all_rqs(priv, fmsg); +} + +void mlx5e_reporter_rx_timeout(struct mlx5e_rq *rq) +{ + char err_str[MLX5E_REPORTER_PER_Q_MAX_LEN]; + struct mlx5e_icosq *icosq = rq->icosq; + struct mlx5e_priv *priv = rq->priv; + struct mlx5e_err_ctx err_ctx = {}; + char icosq_str[32] = {}; + + err_ctx.ctx = rq; + err_ctx.recover = mlx5e_rx_reporter_timeout_recover; + err_ctx.dump = mlx5e_rx_reporter_dump_rq; + + if (icosq) + snprintf(icosq_str, sizeof(icosq_str), "ICOSQ: 0x%x, ", icosq->sqn); + snprintf(err_str, sizeof(err_str), + "RX timeout on channel: %d, %s RQ: 0x%x, CQ: 0x%x", + rq->ix, icosq_str, rq->rqn, rq->cq.mcq.cqn); + + mlx5e_health_report(priv, priv->rx_reporter, err_str, &err_ctx); +} + +void mlx5e_reporter_rq_cqe_err(struct mlx5e_rq *rq) +{ + char err_str[MLX5E_REPORTER_PER_Q_MAX_LEN]; + struct mlx5e_priv *priv = rq->priv; + struct mlx5e_err_ctx err_ctx = {}; + + err_ctx.ctx = rq; + err_ctx.recover = mlx5e_rx_reporter_err_rq_cqe_recover; + err_ctx.dump = mlx5e_rx_reporter_dump_rq; + snprintf(err_str, sizeof(err_str), "ERR CQE on RQ: 0x%x", rq->rqn); + + mlx5e_health_report(priv, priv->rx_reporter, err_str, &err_ctx); +} + +void mlx5e_reporter_icosq_cqe_err(struct mlx5e_icosq *icosq) +{ + struct mlx5e_priv *priv = icosq->channel->priv; + char err_str[MLX5E_REPORTER_PER_Q_MAX_LEN]; + struct mlx5e_err_ctx err_ctx = {}; + + err_ctx.ctx = icosq; + err_ctx.recover = mlx5e_rx_reporter_err_icosq_cqe_recover; + err_ctx.dump = mlx5e_rx_reporter_dump_icosq; + snprintf(err_str, sizeof(err_str), "ERR CQE on ICOSQ: 0x%x", icosq->sqn); + + mlx5e_health_report(priv, priv->rx_reporter, err_str, &err_ctx); +} + +void mlx5e_reporter_icosq_suspend_recovery(struct mlx5e_channel *c) +{ + mutex_lock(&c->icosq_recovery_lock); +} + +void mlx5e_reporter_icosq_resume_recovery(struct mlx5e_channel *c) +{ + mutex_unlock(&c->icosq_recovery_lock); +} + +static const struct devlink_health_reporter_ops mlx5_rx_reporter_ops = { + .name = "rx", + .recover = mlx5e_rx_reporter_recover, + .diagnose = mlx5e_rx_reporter_diagnose, + .dump = mlx5e_rx_reporter_dump, +}; + +#define MLX5E_REPORTER_RX_GRACEFUL_PERIOD 500 + +void mlx5e_reporter_rx_create(struct mlx5e_priv *priv) +{ + struct devlink_port *dl_port = mlx5e_devlink_get_dl_port(priv); + struct devlink_health_reporter *reporter; + + reporter = devlink_port_health_reporter_create(dl_port, &mlx5_rx_reporter_ops, + MLX5E_REPORTER_RX_GRACEFUL_PERIOD, priv); + if (IS_ERR(reporter)) { + netdev_warn(priv->netdev, "Failed to create rx reporter, err = %ld\n", + PTR_ERR(reporter)); + return; + } + priv->rx_reporter = reporter; +} + +void mlx5e_reporter_rx_destroy(struct mlx5e_priv *priv) +{ + if (!priv->rx_reporter) + return; + + devlink_port_health_reporter_destroy(priv->rx_reporter); + priv->rx_reporter = NULL; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c new file mode 100644 index 000000000..60bc5b577 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c @@ -0,0 +1,614 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2019 Mellanox Technologies. */ + +#include "health.h" +#include "en/ptp.h" +#include "en/devlink.h" +#include "lib/tout.h" + +static int mlx5e_wait_for_sq_flush(struct mlx5e_txqsq *sq) +{ + struct mlx5_core_dev *dev = sq->mdev; + unsigned long exp_time; + + exp_time = jiffies + msecs_to_jiffies(mlx5_tout_ms(dev, FLUSH_ON_ERROR)); + + while (time_before(jiffies, exp_time)) { + if (sq->cc == sq->pc) + return 0; + + msleep(20); + } + + netdev_err(sq->netdev, + "Wait for SQ 0x%x flush timeout (sq cc = 0x%x, sq pc = 0x%x)\n", + sq->sqn, sq->cc, sq->pc); + + return -ETIMEDOUT; +} + +static void mlx5e_reset_txqsq_cc_pc(struct mlx5e_txqsq *sq) +{ + WARN_ONCE(sq->cc != sq->pc, + "SQ 0x%x: cc (0x%x) != pc (0x%x)\n", + sq->sqn, sq->cc, sq->pc); + sq->cc = 0; + sq->dma_fifo_cc = 0; + sq->pc = 0; +} + +static int mlx5e_tx_reporter_err_cqe_recover(void *ctx) +{ + struct mlx5_core_dev *mdev; + struct net_device *dev; + struct mlx5e_txqsq *sq; + u8 state; + int err; + + sq = ctx; + mdev = sq->mdev; + dev = sq->netdev; + + if (!test_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state)) + return 0; + + err = mlx5_core_query_sq_state(mdev, sq->sqn, &state); + if (err) { + netdev_err(dev, "Failed to query SQ 0x%x state. err = %d\n", + sq->sqn, err); + goto out; + } + + if (state != MLX5_SQC_STATE_ERR) + goto out; + + mlx5e_tx_disable_queue(sq->txq); + + err = mlx5e_wait_for_sq_flush(sq); + if (err) + goto out; + + /* At this point, no new packets will arrive from the stack as TXQ is + * marked with QUEUE_STATE_DRV_XOFF. In addition, NAPI cleared all + * pending WQEs. SQ can safely reset the SQ. + */ + + err = mlx5e_health_sq_to_ready(mdev, dev, sq->sqn); + if (err) + goto out; + + mlx5e_reset_txqsq_cc_pc(sq); + sq->stats->recover++; + clear_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state); + mlx5e_activate_txqsq(sq); + + return 0; +out: + clear_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state); + return err; +} + +struct mlx5e_tx_timeout_ctx { + struct mlx5e_txqsq *sq; + signed int status; +}; + +static int mlx5e_tx_reporter_timeout_recover(void *ctx) +{ + struct mlx5e_tx_timeout_ctx *to_ctx; + struct mlx5e_priv *priv; + struct mlx5_eq_comp *eq; + struct mlx5e_txqsq *sq; + int err; + + to_ctx = ctx; + sq = to_ctx->sq; + eq = sq->cq.mcq.eq; + priv = sq->priv; + err = mlx5e_health_channel_eq_recover(sq->netdev, eq, sq->cq.ch_stats); + if (!err) { + to_ctx->status = 0; /* this sq recovered */ + return err; + } + + err = mlx5e_safe_reopen_channels(priv); + if (!err) { + to_ctx->status = 1; /* all channels recovered */ + return err; + } + + to_ctx->status = err; + clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state); + netdev_err(priv->netdev, + "mlx5e_safe_reopen_channels failed recovering from a tx_timeout, err(%d).\n", + err); + + return err; +} + +/* state lock cannot be grabbed within this function. + * It can cause a dead lock or a read-after-free. + */ +static int mlx5e_tx_reporter_recover_from_ctx(struct mlx5e_err_ctx *err_ctx) +{ + return err_ctx->recover(err_ctx->ctx); +} + +static int mlx5e_tx_reporter_recover(struct devlink_health_reporter *reporter, + void *context, + struct netlink_ext_ack *extack) +{ + struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter); + struct mlx5e_err_ctx *err_ctx = context; + + return err_ctx ? mlx5e_tx_reporter_recover_from_ctx(err_ctx) : + mlx5e_health_recover_channels(priv); +} + +static int +mlx5e_tx_reporter_build_diagnose_output_sq_common(struct devlink_fmsg *fmsg, + struct mlx5e_txqsq *sq, int tc) +{ + bool stopped = netif_xmit_stopped(sq->txq); + struct mlx5e_priv *priv = sq->priv; + u8 state; + int err; + + err = mlx5_core_query_sq_state(priv->mdev, sq->sqn, &state); + if (err) + return err; + + err = devlink_fmsg_u32_pair_put(fmsg, "tc", tc); + if (err) + return err; + + err = devlink_fmsg_u32_pair_put(fmsg, "txq ix", sq->txq_ix); + if (err) + return err; + + err = devlink_fmsg_u32_pair_put(fmsg, "sqn", sq->sqn); + if (err) + return err; + + err = devlink_fmsg_u8_pair_put(fmsg, "HW state", state); + if (err) + return err; + + err = devlink_fmsg_bool_pair_put(fmsg, "stopped", stopped); + if (err) + return err; + + err = devlink_fmsg_u32_pair_put(fmsg, "cc", sq->cc); + if (err) + return err; + + err = devlink_fmsg_u32_pair_put(fmsg, "pc", sq->pc); + if (err) + return err; + + err = mlx5e_health_cq_diag_fmsg(&sq->cq, fmsg); + if (err) + return err; + + return mlx5e_health_eq_diag_fmsg(sq->cq.mcq.eq, fmsg); +} + +static int +mlx5e_tx_reporter_build_diagnose_output(struct devlink_fmsg *fmsg, + struct mlx5e_txqsq *sq, int tc) +{ + int err; + + err = devlink_fmsg_obj_nest_start(fmsg); + if (err) + return err; + + err = devlink_fmsg_u32_pair_put(fmsg, "channel ix", sq->ch_ix); + if (err) + return err; + + err = mlx5e_tx_reporter_build_diagnose_output_sq_common(fmsg, sq, tc); + if (err) + return err; + + err = devlink_fmsg_obj_nest_end(fmsg); + if (err) + return err; + + return 0; +} + +static int +mlx5e_tx_reporter_build_diagnose_output_ptpsq(struct devlink_fmsg *fmsg, + struct mlx5e_ptpsq *ptpsq, int tc) +{ + int err; + + err = devlink_fmsg_obj_nest_start(fmsg); + if (err) + return err; + + err = devlink_fmsg_string_pair_put(fmsg, "channel", "ptp"); + if (err) + return err; + + err = mlx5e_tx_reporter_build_diagnose_output_sq_common(fmsg, &ptpsq->txqsq, tc); + if (err) + return err; + + err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "Port TS"); + if (err) + return err; + + err = mlx5e_health_cq_diag_fmsg(&ptpsq->ts_cq, fmsg); + if (err) + return err; + + err = mlx5e_health_fmsg_named_obj_nest_end(fmsg); + if (err) + return err; + + err = devlink_fmsg_obj_nest_end(fmsg); + if (err) + return err; + + return 0; +} + +static int +mlx5e_tx_reporter_diagnose_generic_txqsq(struct devlink_fmsg *fmsg, + struct mlx5e_txqsq *txqsq) +{ + u32 sq_stride, sq_sz; + bool real_time; + int err; + + err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "SQ"); + if (err) + return err; + + real_time = mlx5_is_real_time_sq(txqsq->mdev); + sq_sz = mlx5_wq_cyc_get_size(&txqsq->wq); + sq_stride = MLX5_SEND_WQE_BB; + + err = devlink_fmsg_u64_pair_put(fmsg, "stride size", sq_stride); + if (err) + return err; + + err = devlink_fmsg_u32_pair_put(fmsg, "size", sq_sz); + if (err) + return err; + + err = devlink_fmsg_string_pair_put(fmsg, "ts_format", real_time ? "RT" : "FRC"); + if (err) + return err; + + err = mlx5e_health_cq_common_diag_fmsg(&txqsq->cq, fmsg); + if (err) + return err; + + return mlx5e_health_fmsg_named_obj_nest_end(fmsg); +} + +static int +mlx5e_tx_reporter_diagnose_generic_tx_port_ts(struct devlink_fmsg *fmsg, + struct mlx5e_ptpsq *ptpsq) +{ + int err; + + err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "Port TS"); + if (err) + return err; + + err = mlx5e_health_cq_common_diag_fmsg(&ptpsq->ts_cq, fmsg); + if (err) + return err; + + return mlx5e_health_fmsg_named_obj_nest_end(fmsg); +} + +static int +mlx5e_tx_reporter_diagnose_common_config(struct devlink_health_reporter *reporter, + struct devlink_fmsg *fmsg) +{ + struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter); + struct mlx5e_txqsq *generic_sq = priv->txq2sq[0]; + struct mlx5e_ptp *ptp_ch = priv->channels.ptp; + struct mlx5e_ptpsq *generic_ptpsq; + int err; + + err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "Common Config"); + if (err) + return err; + + err = mlx5e_tx_reporter_diagnose_generic_txqsq(fmsg, generic_sq); + if (err) + return err; + + if (!ptp_ch || !test_bit(MLX5E_PTP_STATE_TX, ptp_ch->state)) + goto out; + + generic_ptpsq = &ptp_ch->ptpsq[0]; + + err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "PTP"); + if (err) + return err; + + err = mlx5e_tx_reporter_diagnose_generic_txqsq(fmsg, &generic_ptpsq->txqsq); + if (err) + return err; + + err = mlx5e_tx_reporter_diagnose_generic_tx_port_ts(fmsg, generic_ptpsq); + if (err) + return err; + + err = mlx5e_health_fmsg_named_obj_nest_end(fmsg); + if (err) + return err; + +out: + return mlx5e_health_fmsg_named_obj_nest_end(fmsg); +} + +static int mlx5e_tx_reporter_diagnose(struct devlink_health_reporter *reporter, + struct devlink_fmsg *fmsg, + struct netlink_ext_ack *extack) +{ + struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter); + struct mlx5e_ptp *ptp_ch = priv->channels.ptp; + + int i, tc, err = 0; + + mutex_lock(&priv->state_lock); + + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) + goto unlock; + + err = mlx5e_tx_reporter_diagnose_common_config(reporter, fmsg); + if (err) + goto unlock; + + err = devlink_fmsg_arr_pair_nest_start(fmsg, "SQs"); + if (err) + goto unlock; + + for (i = 0; i < priv->channels.num; i++) { + struct mlx5e_channel *c = priv->channels.c[i]; + + for (tc = 0; tc < mlx5e_get_dcb_num_tc(&priv->channels.params); tc++) { + struct mlx5e_txqsq *sq = &c->sq[tc]; + + err = mlx5e_tx_reporter_build_diagnose_output(fmsg, sq, tc); + if (err) + goto unlock; + } + } + + if (!ptp_ch || !test_bit(MLX5E_PTP_STATE_TX, ptp_ch->state)) + goto close_sqs_nest; + + for (tc = 0; tc < mlx5e_get_dcb_num_tc(&priv->channels.params); tc++) { + err = mlx5e_tx_reporter_build_diagnose_output_ptpsq(fmsg, + &ptp_ch->ptpsq[tc], + tc); + if (err) + goto unlock; + } + +close_sqs_nest: + err = devlink_fmsg_arr_pair_nest_end(fmsg); + if (err) + goto unlock; + +unlock: + mutex_unlock(&priv->state_lock); + return err; +} + +static int mlx5e_tx_reporter_dump_sq(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg, + void *ctx) +{ + struct mlx5_rsc_key key = {}; + struct mlx5e_txqsq *sq = ctx; + int err; + + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) + return 0; + + err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "SX Slice"); + if (err) + return err; + + key.size = PAGE_SIZE; + key.rsc = MLX5_SGMT_TYPE_SX_SLICE_ALL; + err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg); + if (err) + return err; + + err = mlx5e_health_fmsg_named_obj_nest_end(fmsg); + if (err) + return err; + + err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "SQ"); + if (err) + return err; + + err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "QPC"); + if (err) + return err; + + key.rsc = MLX5_SGMT_TYPE_FULL_QPC; + key.index1 = sq->sqn; + key.num_of_obj1 = 1; + + err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg); + if (err) + return err; + + err = mlx5e_health_fmsg_named_obj_nest_end(fmsg); + if (err) + return err; + + err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "send_buff"); + if (err) + return err; + + key.rsc = MLX5_SGMT_TYPE_SND_BUFF; + key.num_of_obj2 = MLX5_RSC_DUMP_ALL; + err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg); + if (err) + return err; + + err = mlx5e_health_fmsg_named_obj_nest_end(fmsg); + if (err) + return err; + + return mlx5e_health_fmsg_named_obj_nest_end(fmsg); +} + +static int mlx5e_tx_reporter_timeout_dump(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg, + void *ctx) +{ + struct mlx5e_tx_timeout_ctx *to_ctx = ctx; + + return mlx5e_tx_reporter_dump_sq(priv, fmsg, to_ctx->sq); +} + +static int mlx5e_tx_reporter_dump_all_sqs(struct mlx5e_priv *priv, + struct devlink_fmsg *fmsg) +{ + struct mlx5e_ptp *ptp_ch = priv->channels.ptp; + struct mlx5_rsc_key key = {}; + int i, tc, err; + + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) + return 0; + + err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "SX Slice"); + if (err) + return err; + + key.size = PAGE_SIZE; + key.rsc = MLX5_SGMT_TYPE_SX_SLICE_ALL; + err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg); + if (err) + return err; + + err = mlx5e_health_fmsg_named_obj_nest_end(fmsg); + if (err) + return err; + + err = devlink_fmsg_arr_pair_nest_start(fmsg, "SQs"); + if (err) + return err; + + for (i = 0; i < priv->channels.num; i++) { + struct mlx5e_channel *c = priv->channels.c[i]; + + for (tc = 0; tc < mlx5e_get_dcb_num_tc(&priv->channels.params); tc++) { + struct mlx5e_txqsq *sq = &c->sq[tc]; + + err = mlx5e_health_queue_dump(priv, fmsg, sq->sqn, "SQ"); + if (err) + return err; + } + } + + if (ptp_ch && test_bit(MLX5E_PTP_STATE_TX, ptp_ch->state)) { + for (tc = 0; tc < mlx5e_get_dcb_num_tc(&priv->channels.params); tc++) { + struct mlx5e_txqsq *sq = &ptp_ch->ptpsq[tc].txqsq; + + err = mlx5e_health_queue_dump(priv, fmsg, sq->sqn, "PTP SQ"); + if (err) + return err; + } + } + + return devlink_fmsg_arr_pair_nest_end(fmsg); +} + +static int mlx5e_tx_reporter_dump_from_ctx(struct mlx5e_priv *priv, + struct mlx5e_err_ctx *err_ctx, + struct devlink_fmsg *fmsg) +{ + return err_ctx->dump(priv, fmsg, err_ctx->ctx); +} + +static int mlx5e_tx_reporter_dump(struct devlink_health_reporter *reporter, + struct devlink_fmsg *fmsg, void *context, + struct netlink_ext_ack *extack) +{ + struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter); + struct mlx5e_err_ctx *err_ctx = context; + + return err_ctx ? mlx5e_tx_reporter_dump_from_ctx(priv, err_ctx, fmsg) : + mlx5e_tx_reporter_dump_all_sqs(priv, fmsg); +} + +void mlx5e_reporter_tx_err_cqe(struct mlx5e_txqsq *sq) +{ + char err_str[MLX5E_REPORTER_PER_Q_MAX_LEN]; + struct mlx5e_priv *priv = sq->priv; + struct mlx5e_err_ctx err_ctx = {}; + + err_ctx.ctx = sq; + err_ctx.recover = mlx5e_tx_reporter_err_cqe_recover; + err_ctx.dump = mlx5e_tx_reporter_dump_sq; + snprintf(err_str, sizeof(err_str), "ERR CQE on SQ: 0x%x", sq->sqn); + + mlx5e_health_report(priv, priv->tx_reporter, err_str, &err_ctx); +} + +int mlx5e_reporter_tx_timeout(struct mlx5e_txqsq *sq) +{ + char err_str[MLX5E_REPORTER_PER_Q_MAX_LEN]; + struct mlx5e_tx_timeout_ctx to_ctx = {}; + struct mlx5e_priv *priv = sq->priv; + struct mlx5e_err_ctx err_ctx = {}; + + to_ctx.sq = sq; + err_ctx.ctx = &to_ctx; + err_ctx.recover = mlx5e_tx_reporter_timeout_recover; + err_ctx.dump = mlx5e_tx_reporter_timeout_dump; + snprintf(err_str, sizeof(err_str), + "TX timeout on queue: %d, SQ: 0x%x, CQ: 0x%x, SQ Cons: 0x%x SQ Prod: 0x%x, usecs since last trans: %u", + sq->ch_ix, sq->sqn, sq->cq.mcq.cqn, sq->cc, sq->pc, + jiffies_to_usecs(jiffies - READ_ONCE(sq->txq->trans_start))); + + mlx5e_health_report(priv, priv->tx_reporter, err_str, &err_ctx); + return to_ctx.status; +} + +static const struct devlink_health_reporter_ops mlx5_tx_reporter_ops = { + .name = "tx", + .recover = mlx5e_tx_reporter_recover, + .diagnose = mlx5e_tx_reporter_diagnose, + .dump = mlx5e_tx_reporter_dump, +}; + +#define MLX5_REPORTER_TX_GRACEFUL_PERIOD 500 + +void mlx5e_reporter_tx_create(struct mlx5e_priv *priv) +{ + struct devlink_port *dl_port = mlx5e_devlink_get_dl_port(priv); + struct devlink_health_reporter *reporter; + + reporter = devlink_port_health_reporter_create(dl_port, &mlx5_tx_reporter_ops, + MLX5_REPORTER_TX_GRACEFUL_PERIOD, priv); + if (IS_ERR(reporter)) { + netdev_warn(priv->netdev, + "Failed to create tx reporter, err = %ld\n", + PTR_ERR(reporter)); + return; + } + priv->tx_reporter = reporter; +} + +void mlx5e_reporter_tx_destroy(struct mlx5e_priv *priv) +{ + if (!priv->tx_reporter) + return; + + devlink_port_health_reporter_destroy(priv->tx_reporter); + priv->tx_reporter = NULL; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rqt.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rqt.c new file mode 100644 index 000000000..b915fb29d --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rqt.c @@ -0,0 +1,170 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2021, Mellanox Technologies inc. All rights reserved. */ + +#include "rqt.h" +#include <linux/mlx5/transobj.h> + +void mlx5e_rss_params_indir_init_uniform(struct mlx5e_rss_params_indir *indir, + unsigned int num_channels) +{ + unsigned int i; + + for (i = 0; i < MLX5E_INDIR_RQT_SIZE; i++) + indir->table[i] = i % num_channels; +} + +static int mlx5e_rqt_init(struct mlx5e_rqt *rqt, struct mlx5_core_dev *mdev, + u16 max_size, u32 *init_rqns, u16 init_size) +{ + void *rqtc; + int inlen; + int err; + u32 *in; + int i; + + rqt->mdev = mdev; + rqt->size = max_size; + + inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + sizeof(u32) * init_size; + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context); + + MLX5_SET(rqtc, rqtc, rqt_max_size, rqt->size); + + MLX5_SET(rqtc, rqtc, rqt_actual_size, init_size); + for (i = 0; i < init_size; i++) + MLX5_SET(rqtc, rqtc, rq_num[i], init_rqns[i]); + + err = mlx5_core_create_rqt(rqt->mdev, in, inlen, &rqt->rqtn); + + kvfree(in); + return err; +} + +int mlx5e_rqt_init_direct(struct mlx5e_rqt *rqt, struct mlx5_core_dev *mdev, + bool indir_enabled, u32 init_rqn) +{ + u16 max_size = indir_enabled ? MLX5E_INDIR_RQT_SIZE : 1; + + return mlx5e_rqt_init(rqt, mdev, max_size, &init_rqn, 1); +} + +static int mlx5e_bits_invert(unsigned long a, int size) +{ + int inv = 0; + int i; + + for (i = 0; i < size; i++) + inv |= (test_bit(size - i - 1, &a) ? 1 : 0) << i; + + return inv; +} + +static int mlx5e_calc_indir_rqns(u32 *rss_rqns, u32 *rqns, unsigned int num_rqns, + u8 hfunc, struct mlx5e_rss_params_indir *indir) +{ + unsigned int i; + + for (i = 0; i < MLX5E_INDIR_RQT_SIZE; i++) { + unsigned int ix = i; + + if (hfunc == ETH_RSS_HASH_XOR) + ix = mlx5e_bits_invert(ix, ilog2(MLX5E_INDIR_RQT_SIZE)); + + ix = indir->table[ix]; + + if (WARN_ON(ix >= num_rqns)) + /* Could be a bug in the driver or in the kernel part of + * ethtool: indir table refers to non-existent RQs. + */ + return -EINVAL; + rss_rqns[i] = rqns[ix]; + } + + return 0; +} + +int mlx5e_rqt_init_indir(struct mlx5e_rqt *rqt, struct mlx5_core_dev *mdev, + u32 *rqns, unsigned int num_rqns, + u8 hfunc, struct mlx5e_rss_params_indir *indir) +{ + u32 *rss_rqns; + int err; + + rss_rqns = kvmalloc_array(MLX5E_INDIR_RQT_SIZE, sizeof(*rss_rqns), GFP_KERNEL); + if (!rss_rqns) + return -ENOMEM; + + err = mlx5e_calc_indir_rqns(rss_rqns, rqns, num_rqns, hfunc, indir); + if (err) + goto out; + + err = mlx5e_rqt_init(rqt, mdev, MLX5E_INDIR_RQT_SIZE, rss_rqns, MLX5E_INDIR_RQT_SIZE); + +out: + kvfree(rss_rqns); + return err; +} + +void mlx5e_rqt_destroy(struct mlx5e_rqt *rqt) +{ + mlx5_core_destroy_rqt(rqt->mdev, rqt->rqtn); +} + +static int mlx5e_rqt_redirect(struct mlx5e_rqt *rqt, u32 *rqns, unsigned int size) +{ + unsigned int i; + void *rqtc; + int inlen; + u32 *in; + int err; + + inlen = MLX5_ST_SZ_BYTES(modify_rqt_in) + sizeof(u32) * size; + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + rqtc = MLX5_ADDR_OF(modify_rqt_in, in, ctx); + + MLX5_SET(modify_rqt_in, in, bitmask.rqn_list, 1); + MLX5_SET(rqtc, rqtc, rqt_actual_size, size); + for (i = 0; i < size; i++) + MLX5_SET(rqtc, rqtc, rq_num[i], rqns[i]); + + err = mlx5_core_modify_rqt(rqt->mdev, rqt->rqtn, in, inlen); + + kvfree(in); + return err; +} + +int mlx5e_rqt_redirect_direct(struct mlx5e_rqt *rqt, u32 rqn) +{ + return mlx5e_rqt_redirect(rqt, &rqn, 1); +} + +int mlx5e_rqt_redirect_indir(struct mlx5e_rqt *rqt, u32 *rqns, unsigned int num_rqns, + u8 hfunc, struct mlx5e_rss_params_indir *indir) +{ + u32 *rss_rqns; + int err; + + if (WARN_ON(rqt->size != MLX5E_INDIR_RQT_SIZE)) + return -EINVAL; + + rss_rqns = kvmalloc_array(MLX5E_INDIR_RQT_SIZE, sizeof(*rss_rqns), GFP_KERNEL); + if (!rss_rqns) + return -ENOMEM; + + err = mlx5e_calc_indir_rqns(rss_rqns, rqns, num_rqns, hfunc, indir); + if (err) + goto out; + + err = mlx5e_rqt_redirect(rqt, rss_rqns, MLX5E_INDIR_RQT_SIZE); + +out: + kvfree(rss_rqns); + return err; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rqt.h b/drivers/net/ethernet/mellanox/mlx5/core/en/rqt.h new file mode 100644 index 000000000..60c985a12 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rqt.h @@ -0,0 +1,42 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2021, Mellanox Technologies inc. All rights reserved. */ + +#ifndef __MLX5_EN_RQT_H__ +#define __MLX5_EN_RQT_H__ + +#include <linux/kernel.h> + +#define MLX5E_INDIR_RQT_SIZE (1 << 8) + +struct mlx5_core_dev; + +struct mlx5e_rss_params_indir { + u32 table[MLX5E_INDIR_RQT_SIZE]; +}; + +void mlx5e_rss_params_indir_init_uniform(struct mlx5e_rss_params_indir *indir, + unsigned int num_channels); + +struct mlx5e_rqt { + struct mlx5_core_dev *mdev; + u32 rqtn; + u16 size; +}; + +int mlx5e_rqt_init_direct(struct mlx5e_rqt *rqt, struct mlx5_core_dev *mdev, + bool indir_enabled, u32 init_rqn); +int mlx5e_rqt_init_indir(struct mlx5e_rqt *rqt, struct mlx5_core_dev *mdev, + u32 *rqns, unsigned int num_rqns, + u8 hfunc, struct mlx5e_rss_params_indir *indir); +void mlx5e_rqt_destroy(struct mlx5e_rqt *rqt); + +static inline u32 mlx5e_rqt_get_rqtn(struct mlx5e_rqt *rqt) +{ + return rqt->rqtn; +} + +int mlx5e_rqt_redirect_direct(struct mlx5e_rqt *rqt, u32 rqn); +int mlx5e_rqt_redirect_indir(struct mlx5e_rqt *rqt, u32 *rqns, unsigned int num_rqns, + u8 hfunc, struct mlx5e_rss_params_indir *indir); + +#endif /* __MLX5_EN_RQT_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rss.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rss.c new file mode 100644 index 000000000..7f93426b8 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rss.c @@ -0,0 +1,606 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. + +#include "rss.h" + +#define mlx5e_rss_warn(__dev, format, ...) \ + dev_warn((__dev)->device, "%s:%d:(pid %d): " format, \ + __func__, __LINE__, current->pid, \ + ##__VA_ARGS__) + +static const struct mlx5e_rss_params_traffic_type rss_default_config[MLX5E_NUM_INDIR_TIRS] = { + [MLX5_TT_IPV4_TCP] = { + .l3_prot_type = MLX5_L3_PROT_TYPE_IPV4, + .l4_prot_type = MLX5_L4_PROT_TYPE_TCP, + .rx_hash_fields = MLX5_HASH_IP_L4PORTS, + }, + [MLX5_TT_IPV6_TCP] = { + .l3_prot_type = MLX5_L3_PROT_TYPE_IPV6, + .l4_prot_type = MLX5_L4_PROT_TYPE_TCP, + .rx_hash_fields = MLX5_HASH_IP_L4PORTS, + }, + [MLX5_TT_IPV4_UDP] = { + .l3_prot_type = MLX5_L3_PROT_TYPE_IPV4, + .l4_prot_type = MLX5_L4_PROT_TYPE_UDP, + .rx_hash_fields = MLX5_HASH_IP_L4PORTS, + }, + [MLX5_TT_IPV6_UDP] = { + .l3_prot_type = MLX5_L3_PROT_TYPE_IPV6, + .l4_prot_type = MLX5_L4_PROT_TYPE_UDP, + .rx_hash_fields = MLX5_HASH_IP_L4PORTS, + }, + [MLX5_TT_IPV4_IPSEC_AH] = { + .l3_prot_type = MLX5_L3_PROT_TYPE_IPV4, + .l4_prot_type = 0, + .rx_hash_fields = MLX5_HASH_IP_IPSEC_SPI, + }, + [MLX5_TT_IPV6_IPSEC_AH] = { + .l3_prot_type = MLX5_L3_PROT_TYPE_IPV6, + .l4_prot_type = 0, + .rx_hash_fields = MLX5_HASH_IP_IPSEC_SPI, + }, + [MLX5_TT_IPV4_IPSEC_ESP] = { + .l3_prot_type = MLX5_L3_PROT_TYPE_IPV4, + .l4_prot_type = 0, + .rx_hash_fields = MLX5_HASH_IP_IPSEC_SPI, + }, + [MLX5_TT_IPV6_IPSEC_ESP] = { + .l3_prot_type = MLX5_L3_PROT_TYPE_IPV6, + .l4_prot_type = 0, + .rx_hash_fields = MLX5_HASH_IP_IPSEC_SPI, + }, + [MLX5_TT_IPV4] = { + .l3_prot_type = MLX5_L3_PROT_TYPE_IPV4, + .l4_prot_type = 0, + .rx_hash_fields = MLX5_HASH_IP, + }, + [MLX5_TT_IPV6] = { + .l3_prot_type = MLX5_L3_PROT_TYPE_IPV6, + .l4_prot_type = 0, + .rx_hash_fields = MLX5_HASH_IP, + }, +}; + +struct mlx5e_rss_params_traffic_type +mlx5e_rss_get_default_tt_config(enum mlx5_traffic_types tt) +{ + return rss_default_config[tt]; +} + +struct mlx5e_rss { + struct mlx5e_rss_params_hash hash; + struct mlx5e_rss_params_indir indir; + u32 rx_hash_fields[MLX5E_NUM_INDIR_TIRS]; + struct mlx5e_tir *tir[MLX5E_NUM_INDIR_TIRS]; + struct mlx5e_tir *inner_tir[MLX5E_NUM_INDIR_TIRS]; + struct mlx5e_rqt rqt; + struct mlx5_core_dev *mdev; + u32 drop_rqn; + bool inner_ft_support; + bool enabled; + refcount_t refcnt; +}; + +struct mlx5e_rss *mlx5e_rss_alloc(void) +{ + return kvzalloc(sizeof(struct mlx5e_rss), GFP_KERNEL); +} + +void mlx5e_rss_free(struct mlx5e_rss *rss) +{ + kvfree(rss); +} + +static void mlx5e_rss_params_init(struct mlx5e_rss *rss) +{ + enum mlx5_traffic_types tt; + + rss->hash.hfunc = ETH_RSS_HASH_TOP; + netdev_rss_key_fill(rss->hash.toeplitz_hash_key, + sizeof(rss->hash.toeplitz_hash_key)); + for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) + rss->rx_hash_fields[tt] = + mlx5e_rss_get_default_tt_config(tt).rx_hash_fields; +} + +static struct mlx5e_tir **rss_get_tirp(struct mlx5e_rss *rss, enum mlx5_traffic_types tt, + bool inner) +{ + return inner ? &rss->inner_tir[tt] : &rss->tir[tt]; +} + +static struct mlx5e_tir *rss_get_tir(struct mlx5e_rss *rss, enum mlx5_traffic_types tt, + bool inner) +{ + return *rss_get_tirp(rss, tt, inner); +} + +static struct mlx5e_rss_params_traffic_type +mlx5e_rss_get_tt_config(struct mlx5e_rss *rss, enum mlx5_traffic_types tt) +{ + struct mlx5e_rss_params_traffic_type rss_tt; + + rss_tt = mlx5e_rss_get_default_tt_config(tt); + rss_tt.rx_hash_fields = rss->rx_hash_fields[tt]; + return rss_tt; +} + +static int mlx5e_rss_create_tir(struct mlx5e_rss *rss, + enum mlx5_traffic_types tt, + const struct mlx5e_packet_merge_param *init_pkt_merge_param, + bool inner) +{ + struct mlx5e_rss_params_traffic_type rss_tt; + struct mlx5e_tir_builder *builder; + struct mlx5e_tir **tir_p; + struct mlx5e_tir *tir; + u32 rqtn; + int err; + + if (inner && !rss->inner_ft_support) { + mlx5e_rss_warn(rss->mdev, + "Cannot create inner indirect TIR[%d], RSS inner FT is not supported.\n", + tt); + return -EINVAL; + } + + tir_p = rss_get_tirp(rss, tt, inner); + if (*tir_p) + return -EINVAL; + + tir = kvzalloc(sizeof(*tir), GFP_KERNEL); + if (!tir) + return -ENOMEM; + + builder = mlx5e_tir_builder_alloc(false); + if (!builder) { + err = -ENOMEM; + goto free_tir; + } + + rqtn = mlx5e_rqt_get_rqtn(&rss->rqt); + mlx5e_tir_builder_build_rqt(builder, rss->mdev->mlx5e_res.hw_objs.td.tdn, + rqtn, rss->inner_ft_support); + mlx5e_tir_builder_build_packet_merge(builder, init_pkt_merge_param); + rss_tt = mlx5e_rss_get_tt_config(rss, tt); + mlx5e_tir_builder_build_rss(builder, &rss->hash, &rss_tt, inner); + + err = mlx5e_tir_init(tir, builder, rss->mdev, true); + mlx5e_tir_builder_free(builder); + if (err) { + mlx5e_rss_warn(rss->mdev, "Failed to create %sindirect TIR: err = %d, tt = %d\n", + inner ? "inner " : "", err, tt); + goto free_tir; + } + + *tir_p = tir; + return 0; + +free_tir: + kvfree(tir); + return err; +} + +static void mlx5e_rss_destroy_tir(struct mlx5e_rss *rss, enum mlx5_traffic_types tt, + bool inner) +{ + struct mlx5e_tir **tir_p; + struct mlx5e_tir *tir; + + tir_p = rss_get_tirp(rss, tt, inner); + if (!*tir_p) + return; + + tir = *tir_p; + mlx5e_tir_destroy(tir); + kvfree(tir); + *tir_p = NULL; +} + +static int mlx5e_rss_create_tirs(struct mlx5e_rss *rss, + const struct mlx5e_packet_merge_param *init_pkt_merge_param, + bool inner) +{ + enum mlx5_traffic_types tt, max_tt; + int err; + + for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) { + err = mlx5e_rss_create_tir(rss, tt, init_pkt_merge_param, inner); + if (err) + goto err_destroy_tirs; + } + + return 0; + +err_destroy_tirs: + max_tt = tt; + for (tt = 0; tt < max_tt; tt++) + mlx5e_rss_destroy_tir(rss, tt, inner); + return err; +} + +static void mlx5e_rss_destroy_tirs(struct mlx5e_rss *rss, bool inner) +{ + enum mlx5_traffic_types tt; + + for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) + mlx5e_rss_destroy_tir(rss, tt, inner); +} + +static int mlx5e_rss_update_tir(struct mlx5e_rss *rss, enum mlx5_traffic_types tt, + bool inner) +{ + struct mlx5e_rss_params_traffic_type rss_tt; + struct mlx5e_tir_builder *builder; + struct mlx5e_tir *tir; + int err; + + tir = rss_get_tir(rss, tt, inner); + if (!tir) + return 0; + + builder = mlx5e_tir_builder_alloc(true); + if (!builder) + return -ENOMEM; + + rss_tt = mlx5e_rss_get_tt_config(rss, tt); + + mlx5e_tir_builder_build_rss(builder, &rss->hash, &rss_tt, inner); + err = mlx5e_tir_modify(tir, builder); + + mlx5e_tir_builder_free(builder); + return err; +} + +static int mlx5e_rss_update_tirs(struct mlx5e_rss *rss) +{ + enum mlx5_traffic_types tt; + int err, retval; + + retval = 0; + + for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) { + err = mlx5e_rss_update_tir(rss, tt, false); + if (err) { + retval = retval ? : err; + mlx5e_rss_warn(rss->mdev, + "Failed to update RSS hash of indirect TIR for traffic type %d: err = %d\n", + tt, err); + } + + if (!rss->inner_ft_support) + continue; + + err = mlx5e_rss_update_tir(rss, tt, true); + if (err) { + retval = retval ? : err; + mlx5e_rss_warn(rss->mdev, + "Failed to update RSS hash of inner indirect TIR for traffic type %d: err = %d\n", + tt, err); + } + } + return retval; +} + +int mlx5e_rss_init_no_tirs(struct mlx5e_rss *rss, struct mlx5_core_dev *mdev, + bool inner_ft_support, u32 drop_rqn) +{ + rss->mdev = mdev; + rss->inner_ft_support = inner_ft_support; + rss->drop_rqn = drop_rqn; + + mlx5e_rss_params_init(rss); + refcount_set(&rss->refcnt, 1); + + return mlx5e_rqt_init_direct(&rss->rqt, mdev, true, drop_rqn); +} + +int mlx5e_rss_init(struct mlx5e_rss *rss, struct mlx5_core_dev *mdev, + bool inner_ft_support, u32 drop_rqn, + const struct mlx5e_packet_merge_param *init_pkt_merge_param) +{ + int err; + + err = mlx5e_rss_init_no_tirs(rss, mdev, inner_ft_support, drop_rqn); + if (err) + goto err_out; + + err = mlx5e_rss_create_tirs(rss, init_pkt_merge_param, false); + if (err) + goto err_destroy_rqt; + + if (inner_ft_support) { + err = mlx5e_rss_create_tirs(rss, init_pkt_merge_param, true); + if (err) + goto err_destroy_tirs; + } + + return 0; + +err_destroy_tirs: + mlx5e_rss_destroy_tirs(rss, false); +err_destroy_rqt: + mlx5e_rqt_destroy(&rss->rqt); +err_out: + return err; +} + +int mlx5e_rss_cleanup(struct mlx5e_rss *rss) +{ + if (!refcount_dec_if_one(&rss->refcnt)) + return -EBUSY; + + mlx5e_rss_destroy_tirs(rss, false); + + if (rss->inner_ft_support) + mlx5e_rss_destroy_tirs(rss, true); + + mlx5e_rqt_destroy(&rss->rqt); + + return 0; +} + +void mlx5e_rss_refcnt_inc(struct mlx5e_rss *rss) +{ + refcount_inc(&rss->refcnt); +} + +void mlx5e_rss_refcnt_dec(struct mlx5e_rss *rss) +{ + refcount_dec(&rss->refcnt); +} + +unsigned int mlx5e_rss_refcnt_read(struct mlx5e_rss *rss) +{ + return refcount_read(&rss->refcnt); +} + +u32 mlx5e_rss_get_tirn(struct mlx5e_rss *rss, enum mlx5_traffic_types tt, + bool inner) +{ + struct mlx5e_tir *tir; + + WARN_ON(inner && !rss->inner_ft_support); + tir = rss_get_tir(rss, tt, inner); + WARN_ON(!tir); + + return mlx5e_tir_get_tirn(tir); +} + +/* Fill the "tirn" output parameter. + * Create the requested TIR if it's its first usage. + */ +int mlx5e_rss_obtain_tirn(struct mlx5e_rss *rss, + enum mlx5_traffic_types tt, + const struct mlx5e_packet_merge_param *init_pkt_merge_param, + bool inner, u32 *tirn) +{ + struct mlx5e_tir *tir; + + tir = rss_get_tir(rss, tt, inner); + if (!tir) { /* TIR doesn't exist, create one */ + int err; + + err = mlx5e_rss_create_tir(rss, tt, init_pkt_merge_param, inner); + if (err) + return err; + tir = rss_get_tir(rss, tt, inner); + } + + *tirn = mlx5e_tir_get_tirn(tir); + return 0; +} + +static int mlx5e_rss_apply(struct mlx5e_rss *rss, u32 *rqns, unsigned int num_rqns) +{ + int err; + + err = mlx5e_rqt_redirect_indir(&rss->rqt, rqns, num_rqns, rss->hash.hfunc, &rss->indir); + if (err) + mlx5e_rss_warn(rss->mdev, "Failed to redirect RQT %#x to channels: err = %d\n", + mlx5e_rqt_get_rqtn(&rss->rqt), err); + return err; +} + +void mlx5e_rss_enable(struct mlx5e_rss *rss, u32 *rqns, unsigned int num_rqns) +{ + rss->enabled = true; + mlx5e_rss_apply(rss, rqns, num_rqns); +} + +void mlx5e_rss_disable(struct mlx5e_rss *rss) +{ + int err; + + rss->enabled = false; + err = mlx5e_rqt_redirect_direct(&rss->rqt, rss->drop_rqn); + if (err) + mlx5e_rss_warn(rss->mdev, "Failed to redirect RQT %#x to drop RQ %#x: err = %d\n", + mlx5e_rqt_get_rqtn(&rss->rqt), rss->drop_rqn, err); +} + +int mlx5e_rss_packet_merge_set_param(struct mlx5e_rss *rss, + struct mlx5e_packet_merge_param *pkt_merge_param) +{ + struct mlx5e_tir_builder *builder; + enum mlx5_traffic_types tt; + int err, final_err; + + builder = mlx5e_tir_builder_alloc(true); + if (!builder) + return -ENOMEM; + + mlx5e_tir_builder_build_packet_merge(builder, pkt_merge_param); + + final_err = 0; + + for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) { + struct mlx5e_tir *tir; + + tir = rss_get_tir(rss, tt, false); + if (!tir) + goto inner_tir; + err = mlx5e_tir_modify(tir, builder); + if (err) { + mlx5e_rss_warn(rss->mdev, "Failed to update packet merge state of indirect TIR %#x for traffic type %d: err = %d\n", + mlx5e_tir_get_tirn(tir), tt, err); + if (!final_err) + final_err = err; + } + +inner_tir: + if (!rss->inner_ft_support) + continue; + + tir = rss_get_tir(rss, tt, true); + if (!tir) + continue; + err = mlx5e_tir_modify(tir, builder); + if (err) { + mlx5e_rss_warn(rss->mdev, "Failed to update packet merge state of inner indirect TIR %#x for traffic type %d: err = %d\n", + mlx5e_tir_get_tirn(tir), tt, err); + if (!final_err) + final_err = err; + } + } + + mlx5e_tir_builder_free(builder); + return final_err; +} + +int mlx5e_rss_get_rxfh(struct mlx5e_rss *rss, u32 *indir, u8 *key, u8 *hfunc) +{ + unsigned int i; + + if (indir) + for (i = 0; i < MLX5E_INDIR_RQT_SIZE; i++) + indir[i] = rss->indir.table[i]; + + if (key) + memcpy(key, rss->hash.toeplitz_hash_key, + sizeof(rss->hash.toeplitz_hash_key)); + + if (hfunc) + *hfunc = rss->hash.hfunc; + + return 0; +} + +int mlx5e_rss_set_rxfh(struct mlx5e_rss *rss, const u32 *indir, + const u8 *key, const u8 *hfunc, + u32 *rqns, unsigned int num_rqns) +{ + bool changed_indir = false; + bool changed_hash = false; + struct mlx5e_rss *old_rss; + int err = 0; + + old_rss = mlx5e_rss_alloc(); + if (!old_rss) + return -ENOMEM; + + *old_rss = *rss; + + if (hfunc && *hfunc != rss->hash.hfunc) { + switch (*hfunc) { + case ETH_RSS_HASH_XOR: + case ETH_RSS_HASH_TOP: + break; + default: + err = -EINVAL; + goto out; + } + changed_hash = true; + changed_indir = true; + rss->hash.hfunc = *hfunc; + } + + if (key) { + if (rss->hash.hfunc == ETH_RSS_HASH_TOP) + changed_hash = true; + memcpy(rss->hash.toeplitz_hash_key, key, + sizeof(rss->hash.toeplitz_hash_key)); + } + + if (indir) { + unsigned int i; + + changed_indir = true; + + for (i = 0; i < MLX5E_INDIR_RQT_SIZE; i++) + rss->indir.table[i] = indir[i]; + } + + if (changed_indir && rss->enabled) { + err = mlx5e_rss_apply(rss, rqns, num_rqns); + if (err) { + *rss = *old_rss; + goto out; + } + } + + if (changed_hash) + mlx5e_rss_update_tirs(rss); + +out: + mlx5e_rss_free(old_rss); + return err; +} + +struct mlx5e_rss_params_hash mlx5e_rss_get_hash(struct mlx5e_rss *rss) +{ + return rss->hash; +} + +u8 mlx5e_rss_get_hash_fields(struct mlx5e_rss *rss, enum mlx5_traffic_types tt) +{ + return rss->rx_hash_fields[tt]; +} + +int mlx5e_rss_set_hash_fields(struct mlx5e_rss *rss, enum mlx5_traffic_types tt, + u8 rx_hash_fields) +{ + u8 old_rx_hash_fields; + int err; + + old_rx_hash_fields = rss->rx_hash_fields[tt]; + + if (old_rx_hash_fields == rx_hash_fields) + return 0; + + rss->rx_hash_fields[tt] = rx_hash_fields; + + err = mlx5e_rss_update_tir(rss, tt, false); + if (err) { + rss->rx_hash_fields[tt] = old_rx_hash_fields; + mlx5e_rss_warn(rss->mdev, + "Failed to update RSS hash fields of indirect TIR for traffic type %d: err = %d\n", + tt, err); + return err; + } + + if (!(rss->inner_ft_support)) + return 0; + + err = mlx5e_rss_update_tir(rss, tt, true); + if (err) { + /* Partial update happened. Try to revert - it may fail too, but + * there is nothing more we can do. + */ + rss->rx_hash_fields[tt] = old_rx_hash_fields; + mlx5e_rss_warn(rss->mdev, + "Failed to update RSS hash fields of inner indirect TIR for traffic type %d: err = %d\n", + tt, err); + if (mlx5e_rss_update_tir(rss, tt, false)) + mlx5e_rss_warn(rss->mdev, + "Partial update of RSS hash fields happened: failed to revert indirect TIR for traffic type %d to the old values\n", + tt); + } + + return err; +} + +void mlx5e_rss_set_indir_uniform(struct mlx5e_rss *rss, unsigned int nch) +{ + mlx5e_rss_params_indir_init_uniform(&rss->indir, nch); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rss.h b/drivers/net/ethernet/mellanox/mlx5/core/en/rss.h new file mode 100644 index 000000000..c6b216416 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rss.h @@ -0,0 +1,50 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. */ + +#ifndef __MLX5_EN_RSS_H__ +#define __MLX5_EN_RSS_H__ + +#include "rqt.h" +#include "tir.h" +#include "fs.h" + +struct mlx5e_rss_params_traffic_type +mlx5e_rss_get_default_tt_config(enum mlx5_traffic_types tt); + +struct mlx5e_rss; + +struct mlx5e_rss *mlx5e_rss_alloc(void); +void mlx5e_rss_free(struct mlx5e_rss *rss); +int mlx5e_rss_init(struct mlx5e_rss *rss, struct mlx5_core_dev *mdev, + bool inner_ft_support, u32 drop_rqn, + const struct mlx5e_packet_merge_param *init_pkt_merge_param); +int mlx5e_rss_init_no_tirs(struct mlx5e_rss *rss, struct mlx5_core_dev *mdev, + bool inner_ft_support, u32 drop_rqn); +int mlx5e_rss_cleanup(struct mlx5e_rss *rss); + +void mlx5e_rss_refcnt_inc(struct mlx5e_rss *rss); +void mlx5e_rss_refcnt_dec(struct mlx5e_rss *rss); +unsigned int mlx5e_rss_refcnt_read(struct mlx5e_rss *rss); + +u32 mlx5e_rss_get_tirn(struct mlx5e_rss *rss, enum mlx5_traffic_types tt, + bool inner); +int mlx5e_rss_obtain_tirn(struct mlx5e_rss *rss, + enum mlx5_traffic_types tt, + const struct mlx5e_packet_merge_param *init_pkt_merge_param, + bool inner, u32 *tirn); + +void mlx5e_rss_enable(struct mlx5e_rss *rss, u32 *rqns, unsigned int num_rqns); +void mlx5e_rss_disable(struct mlx5e_rss *rss); + +int mlx5e_rss_packet_merge_set_param(struct mlx5e_rss *rss, + struct mlx5e_packet_merge_param *pkt_merge_param); +int mlx5e_rss_get_rxfh(struct mlx5e_rss *rss, u32 *indir, u8 *key, u8 *hfunc); +int mlx5e_rss_set_rxfh(struct mlx5e_rss *rss, const u32 *indir, + const u8 *key, const u8 *hfunc, + u32 *rqns, unsigned int num_rqns); +struct mlx5e_rss_params_hash mlx5e_rss_get_hash(struct mlx5e_rss *rss); +u8 mlx5e_rss_get_hash_fields(struct mlx5e_rss *rss, enum mlx5_traffic_types tt); +int mlx5e_rss_set_hash_fields(struct mlx5e_rss *rss, enum mlx5_traffic_types tt, + u8 rx_hash_fields); +void mlx5e_rss_set_indir_uniform(struct mlx5e_rss *rss, unsigned int nch); +#endif /* __MLX5_EN_RSS_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c new file mode 100644 index 000000000..e1095bc36 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c @@ -0,0 +1,640 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2021, Mellanox Technologies inc. All rights reserved. */ + +#include "rx_res.h" +#include "channels.h" +#include "params.h" + +#define MLX5E_MAX_NUM_RSS 16 + +struct mlx5e_rx_res { + struct mlx5_core_dev *mdev; + enum mlx5e_rx_res_features features; + unsigned int max_nch; + u32 drop_rqn; + + struct mlx5e_packet_merge_param pkt_merge_param; + struct rw_semaphore pkt_merge_param_sem; + + struct mlx5e_rss *rss[MLX5E_MAX_NUM_RSS]; + bool rss_active; + u32 rss_rqns[MLX5E_INDIR_RQT_SIZE]; + unsigned int rss_nch; + + struct { + struct mlx5e_rqt direct_rqt; + struct mlx5e_tir direct_tir; + } *channels; + + struct { + struct mlx5e_rqt rqt; + struct mlx5e_tir tir; + } ptp; +}; + +/* API for rx_res_rss_* */ + +static int mlx5e_rx_res_rss_init_def(struct mlx5e_rx_res *res, + unsigned int init_nch) +{ + bool inner_ft_support = res->features & MLX5E_RX_RES_FEATURE_INNER_FT; + struct mlx5e_rss *rss; + int err; + + if (WARN_ON(res->rss[0])) + return -EINVAL; + + rss = mlx5e_rss_alloc(); + if (!rss) + return -ENOMEM; + + err = mlx5e_rss_init(rss, res->mdev, inner_ft_support, res->drop_rqn, + &res->pkt_merge_param); + if (err) + goto err_rss_free; + + mlx5e_rss_set_indir_uniform(rss, init_nch); + + res->rss[0] = rss; + + return 0; + +err_rss_free: + mlx5e_rss_free(rss); + return err; +} + +int mlx5e_rx_res_rss_init(struct mlx5e_rx_res *res, u32 *rss_idx, unsigned int init_nch) +{ + bool inner_ft_support = res->features & MLX5E_RX_RES_FEATURE_INNER_FT; + struct mlx5e_rss *rss; + int err, i; + + for (i = 1; i < MLX5E_MAX_NUM_RSS; i++) + if (!res->rss[i]) + break; + + if (i == MLX5E_MAX_NUM_RSS) + return -ENOSPC; + + rss = mlx5e_rss_alloc(); + if (!rss) + return -ENOMEM; + + err = mlx5e_rss_init_no_tirs(rss, res->mdev, inner_ft_support, res->drop_rqn); + if (err) + goto err_rss_free; + + mlx5e_rss_set_indir_uniform(rss, init_nch); + if (res->rss_active) + mlx5e_rss_enable(rss, res->rss_rqns, res->rss_nch); + + res->rss[i] = rss; + *rss_idx = i; + + return 0; + +err_rss_free: + mlx5e_rss_free(rss); + return err; +} + +static int __mlx5e_rx_res_rss_destroy(struct mlx5e_rx_res *res, u32 rss_idx) +{ + struct mlx5e_rss *rss = res->rss[rss_idx]; + int err; + + err = mlx5e_rss_cleanup(rss); + if (err) + return err; + + mlx5e_rss_free(rss); + res->rss[rss_idx] = NULL; + + return 0; +} + +int mlx5e_rx_res_rss_destroy(struct mlx5e_rx_res *res, u32 rss_idx) +{ + struct mlx5e_rss *rss; + + if (rss_idx >= MLX5E_MAX_NUM_RSS) + return -EINVAL; + + rss = res->rss[rss_idx]; + if (!rss) + return -EINVAL; + + return __mlx5e_rx_res_rss_destroy(res, rss_idx); +} + +static void mlx5e_rx_res_rss_destroy_all(struct mlx5e_rx_res *res) +{ + int i; + + for (i = 0; i < MLX5E_MAX_NUM_RSS; i++) { + struct mlx5e_rss *rss = res->rss[i]; + int err; + + if (!rss) + continue; + + err = __mlx5e_rx_res_rss_destroy(res, i); + if (err) { + unsigned int refcount; + + refcount = mlx5e_rss_refcnt_read(rss); + mlx5_core_warn(res->mdev, + "Failed to destroy RSS context %d, refcount = %u, err = %d\n", + i, refcount, err); + } + } +} + +static void mlx5e_rx_res_rss_enable(struct mlx5e_rx_res *res) +{ + int i; + + res->rss_active = true; + + for (i = 0; i < MLX5E_MAX_NUM_RSS; i++) { + struct mlx5e_rss *rss = res->rss[i]; + + if (!rss) + continue; + mlx5e_rss_enable(rss, res->rss_rqns, res->rss_nch); + } +} + +static void mlx5e_rx_res_rss_disable(struct mlx5e_rx_res *res) +{ + int i; + + res->rss_active = false; + + for (i = 0; i < MLX5E_MAX_NUM_RSS; i++) { + struct mlx5e_rss *rss = res->rss[i]; + + if (!rss) + continue; + mlx5e_rss_disable(rss); + } +} + +/* Updates the indirection table SW shadow, does not update the HW resources yet */ +void mlx5e_rx_res_rss_set_indir_uniform(struct mlx5e_rx_res *res, unsigned int nch) +{ + WARN_ON_ONCE(res->rss_active); + mlx5e_rss_set_indir_uniform(res->rss[0], nch); +} + +int mlx5e_rx_res_rss_get_rxfh(struct mlx5e_rx_res *res, u32 rss_idx, + u32 *indir, u8 *key, u8 *hfunc) +{ + struct mlx5e_rss *rss; + + if (rss_idx >= MLX5E_MAX_NUM_RSS) + return -EINVAL; + + rss = res->rss[rss_idx]; + if (!rss) + return -ENOENT; + + return mlx5e_rss_get_rxfh(rss, indir, key, hfunc); +} + +int mlx5e_rx_res_rss_set_rxfh(struct mlx5e_rx_res *res, u32 rss_idx, + const u32 *indir, const u8 *key, const u8 *hfunc) +{ + struct mlx5e_rss *rss; + + if (rss_idx >= MLX5E_MAX_NUM_RSS) + return -EINVAL; + + rss = res->rss[rss_idx]; + if (!rss) + return -ENOENT; + + return mlx5e_rss_set_rxfh(rss, indir, key, hfunc, res->rss_rqns, res->rss_nch); +} + +u8 mlx5e_rx_res_rss_get_hash_fields(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt) +{ + struct mlx5e_rss *rss = res->rss[0]; + + return mlx5e_rss_get_hash_fields(rss, tt); +} + +int mlx5e_rx_res_rss_set_hash_fields(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt, + u8 rx_hash_fields) +{ + struct mlx5e_rss *rss = res->rss[0]; + + return mlx5e_rss_set_hash_fields(rss, tt, rx_hash_fields); +} + +int mlx5e_rx_res_rss_cnt(struct mlx5e_rx_res *res) +{ + int i, cnt; + + cnt = 0; + for (i = 0; i < MLX5E_MAX_NUM_RSS; i++) + if (res->rss[i]) + cnt++; + + return cnt; +} + +int mlx5e_rx_res_rss_index(struct mlx5e_rx_res *res, struct mlx5e_rss *rss) +{ + int i; + + if (!rss) + return -EINVAL; + + for (i = 0; i < MLX5E_MAX_NUM_RSS; i++) + if (rss == res->rss[i]) + return i; + + return -ENOENT; +} + +struct mlx5e_rss *mlx5e_rx_res_rss_get(struct mlx5e_rx_res *res, u32 rss_idx) +{ + if (rss_idx >= MLX5E_MAX_NUM_RSS) + return NULL; + + return res->rss[rss_idx]; +} + +/* End of API rx_res_rss_* */ + +struct mlx5e_rx_res *mlx5e_rx_res_alloc(void) +{ + return kvzalloc(sizeof(struct mlx5e_rx_res), GFP_KERNEL); +} + +static int mlx5e_rx_res_channels_init(struct mlx5e_rx_res *res) +{ + bool inner_ft_support = res->features & MLX5E_RX_RES_FEATURE_INNER_FT; + struct mlx5e_tir_builder *builder; + int err = 0; + int ix; + + builder = mlx5e_tir_builder_alloc(false); + if (!builder) + return -ENOMEM; + + res->channels = kvcalloc(res->max_nch, sizeof(*res->channels), GFP_KERNEL); + if (!res->channels) { + err = -ENOMEM; + goto out; + } + + for (ix = 0; ix < res->max_nch; ix++) { + err = mlx5e_rqt_init_direct(&res->channels[ix].direct_rqt, + res->mdev, false, res->drop_rqn); + if (err) { + mlx5_core_warn(res->mdev, "Failed to create a direct RQT: err = %d, ix = %u\n", + err, ix); + goto err_destroy_direct_rqts; + } + } + + for (ix = 0; ix < res->max_nch; ix++) { + mlx5e_tir_builder_build_rqt(builder, res->mdev->mlx5e_res.hw_objs.td.tdn, + mlx5e_rqt_get_rqtn(&res->channels[ix].direct_rqt), + inner_ft_support); + mlx5e_tir_builder_build_packet_merge(builder, &res->pkt_merge_param); + mlx5e_tir_builder_build_direct(builder); + + err = mlx5e_tir_init(&res->channels[ix].direct_tir, builder, res->mdev, true); + if (err) { + mlx5_core_warn(res->mdev, "Failed to create a direct TIR: err = %d, ix = %u\n", + err, ix); + goto err_destroy_direct_tirs; + } + + mlx5e_tir_builder_clear(builder); + } + + goto out; + +err_destroy_direct_tirs: + while (--ix >= 0) + mlx5e_tir_destroy(&res->channels[ix].direct_tir); + + ix = res->max_nch; +err_destroy_direct_rqts: + while (--ix >= 0) + mlx5e_rqt_destroy(&res->channels[ix].direct_rqt); + + kvfree(res->channels); + +out: + mlx5e_tir_builder_free(builder); + + return err; +} + +static int mlx5e_rx_res_ptp_init(struct mlx5e_rx_res *res) +{ + bool inner_ft_support = res->features & MLX5E_RX_RES_FEATURE_INNER_FT; + struct mlx5e_tir_builder *builder; + int err; + + builder = mlx5e_tir_builder_alloc(false); + if (!builder) + return -ENOMEM; + + err = mlx5e_rqt_init_direct(&res->ptp.rqt, res->mdev, false, res->drop_rqn); + if (err) + goto out; + + /* Separated from the channels RQs, does not share pkt_merge state with them */ + mlx5e_tir_builder_build_rqt(builder, res->mdev->mlx5e_res.hw_objs.td.tdn, + mlx5e_rqt_get_rqtn(&res->ptp.rqt), + inner_ft_support); + mlx5e_tir_builder_build_direct(builder); + + err = mlx5e_tir_init(&res->ptp.tir, builder, res->mdev, true); + if (err) + goto err_destroy_ptp_rqt; + + goto out; + +err_destroy_ptp_rqt: + mlx5e_rqt_destroy(&res->ptp.rqt); + +out: + mlx5e_tir_builder_free(builder); + return err; +} + +static void mlx5e_rx_res_channels_destroy(struct mlx5e_rx_res *res) +{ + unsigned int ix; + + for (ix = 0; ix < res->max_nch; ix++) { + mlx5e_tir_destroy(&res->channels[ix].direct_tir); + mlx5e_rqt_destroy(&res->channels[ix].direct_rqt); + } + + kvfree(res->channels); +} + +static void mlx5e_rx_res_ptp_destroy(struct mlx5e_rx_res *res) +{ + mlx5e_tir_destroy(&res->ptp.tir); + mlx5e_rqt_destroy(&res->ptp.rqt); +} + +int mlx5e_rx_res_init(struct mlx5e_rx_res *res, struct mlx5_core_dev *mdev, + enum mlx5e_rx_res_features features, unsigned int max_nch, + u32 drop_rqn, const struct mlx5e_packet_merge_param *init_pkt_merge_param, + unsigned int init_nch) +{ + int err; + + res->mdev = mdev; + res->features = features; + res->max_nch = max_nch; + res->drop_rqn = drop_rqn; + + res->pkt_merge_param = *init_pkt_merge_param; + init_rwsem(&res->pkt_merge_param_sem); + + err = mlx5e_rx_res_rss_init_def(res, init_nch); + if (err) + goto err_out; + + err = mlx5e_rx_res_channels_init(res); + if (err) + goto err_rss_destroy; + + err = mlx5e_rx_res_ptp_init(res); + if (err) + goto err_channels_destroy; + + return 0; + +err_channels_destroy: + mlx5e_rx_res_channels_destroy(res); +err_rss_destroy: + __mlx5e_rx_res_rss_destroy(res, 0); +err_out: + return err; +} + +void mlx5e_rx_res_destroy(struct mlx5e_rx_res *res) +{ + mlx5e_rx_res_ptp_destroy(res); + mlx5e_rx_res_channels_destroy(res); + mlx5e_rx_res_rss_destroy_all(res); +} + +void mlx5e_rx_res_free(struct mlx5e_rx_res *res) +{ + kvfree(res); +} + +u32 mlx5e_rx_res_get_tirn_direct(struct mlx5e_rx_res *res, unsigned int ix) +{ + return mlx5e_tir_get_tirn(&res->channels[ix].direct_tir); +} + +u32 mlx5e_rx_res_get_tirn_rss(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt) +{ + struct mlx5e_rss *rss = res->rss[0]; + + return mlx5e_rss_get_tirn(rss, tt, false); +} + +u32 mlx5e_rx_res_get_tirn_rss_inner(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt) +{ + struct mlx5e_rss *rss = res->rss[0]; + + return mlx5e_rss_get_tirn(rss, tt, true); +} + +u32 mlx5e_rx_res_get_tirn_ptp(struct mlx5e_rx_res *res) +{ + WARN_ON(!(res->features & MLX5E_RX_RES_FEATURE_PTP)); + return mlx5e_tir_get_tirn(&res->ptp.tir); +} + +static u32 mlx5e_rx_res_get_rqtn_direct(struct mlx5e_rx_res *res, unsigned int ix) +{ + return mlx5e_rqt_get_rqtn(&res->channels[ix].direct_rqt); +} + +static void mlx5e_rx_res_channel_activate_direct(struct mlx5e_rx_res *res, + struct mlx5e_channels *chs, + unsigned int ix) +{ + u32 rqn = res->rss_rqns[ix]; + int err; + + err = mlx5e_rqt_redirect_direct(&res->channels[ix].direct_rqt, rqn); + if (err) + mlx5_core_warn(res->mdev, "Failed to redirect direct RQT %#x to RQ %#x (channel %u): err = %d\n", + mlx5e_rqt_get_rqtn(&res->channels[ix].direct_rqt), + rqn, ix, err); +} + +static void mlx5e_rx_res_channel_deactivate_direct(struct mlx5e_rx_res *res, + unsigned int ix) +{ + int err; + + err = mlx5e_rqt_redirect_direct(&res->channels[ix].direct_rqt, res->drop_rqn); + if (err) + mlx5_core_warn(res->mdev, "Failed to redirect direct RQT %#x to drop RQ %#x (channel %u): err = %d\n", + mlx5e_rqt_get_rqtn(&res->channels[ix].direct_rqt), + res->drop_rqn, ix, err); +} + +void mlx5e_rx_res_channels_activate(struct mlx5e_rx_res *res, struct mlx5e_channels *chs) +{ + unsigned int nch, ix; + int err; + + nch = mlx5e_channels_get_num(chs); + + for (ix = 0; ix < chs->num; ix++) { + if (mlx5e_channels_is_xsk(chs, ix)) + mlx5e_channels_get_xsk_rqn(chs, ix, &res->rss_rqns[ix]); + else + mlx5e_channels_get_regular_rqn(chs, ix, &res->rss_rqns[ix]); + } + res->rss_nch = chs->num; + + mlx5e_rx_res_rss_enable(res); + + for (ix = 0; ix < nch; ix++) + mlx5e_rx_res_channel_activate_direct(res, chs, ix); + for (ix = nch; ix < res->max_nch; ix++) + mlx5e_rx_res_channel_deactivate_direct(res, ix); + + if (res->features & MLX5E_RX_RES_FEATURE_PTP) { + u32 rqn; + + if (!mlx5e_channels_get_ptp_rqn(chs, &rqn)) + rqn = res->drop_rqn; + + err = mlx5e_rqt_redirect_direct(&res->ptp.rqt, rqn); + if (err) + mlx5_core_warn(res->mdev, "Failed to redirect direct RQT %#x to RQ %#x (PTP): err = %d\n", + mlx5e_rqt_get_rqtn(&res->ptp.rqt), + rqn, err); + } +} + +void mlx5e_rx_res_channels_deactivate(struct mlx5e_rx_res *res) +{ + unsigned int ix; + int err; + + mlx5e_rx_res_rss_disable(res); + + for (ix = 0; ix < res->max_nch; ix++) + mlx5e_rx_res_channel_deactivate_direct(res, ix); + + if (res->features & MLX5E_RX_RES_FEATURE_PTP) { + err = mlx5e_rqt_redirect_direct(&res->ptp.rqt, res->drop_rqn); + if (err) + mlx5_core_warn(res->mdev, "Failed to redirect direct RQT %#x to drop RQ %#x (PTP): err = %d\n", + mlx5e_rqt_get_rqtn(&res->ptp.rqt), + res->drop_rqn, err); + } +} + +void mlx5e_rx_res_xsk_update(struct mlx5e_rx_res *res, struct mlx5e_channels *chs, + unsigned int ix, bool xsk) +{ + if (xsk) + mlx5e_channels_get_xsk_rqn(chs, ix, &res->rss_rqns[ix]); + else + mlx5e_channels_get_regular_rqn(chs, ix, &res->rss_rqns[ix]); + + mlx5e_rx_res_rss_enable(res); + + mlx5e_rx_res_channel_activate_direct(res, chs, ix); +} + +int mlx5e_rx_res_packet_merge_set_param(struct mlx5e_rx_res *res, + struct mlx5e_packet_merge_param *pkt_merge_param) +{ + struct mlx5e_tir_builder *builder; + int err, final_err; + unsigned int ix; + + builder = mlx5e_tir_builder_alloc(true); + if (!builder) + return -ENOMEM; + + down_write(&res->pkt_merge_param_sem); + res->pkt_merge_param = *pkt_merge_param; + + mlx5e_tir_builder_build_packet_merge(builder, pkt_merge_param); + + final_err = 0; + + for (ix = 0; ix < MLX5E_MAX_NUM_RSS; ix++) { + struct mlx5e_rss *rss = res->rss[ix]; + + if (!rss) + continue; + + err = mlx5e_rss_packet_merge_set_param(rss, pkt_merge_param); + if (err) + final_err = final_err ? : err; + } + + for (ix = 0; ix < res->max_nch; ix++) { + err = mlx5e_tir_modify(&res->channels[ix].direct_tir, builder); + if (err) { + mlx5_core_warn(res->mdev, "Failed to update packet merge state of direct TIR %#x for channel %u: err = %d\n", + mlx5e_tir_get_tirn(&res->channels[ix].direct_tir), ix, err); + if (!final_err) + final_err = err; + } + } + + up_write(&res->pkt_merge_param_sem); + mlx5e_tir_builder_free(builder); + return final_err; +} + +struct mlx5e_rss_params_hash mlx5e_rx_res_get_current_hash(struct mlx5e_rx_res *res) +{ + return mlx5e_rss_get_hash(res->rss[0]); +} + +int mlx5e_rx_res_tls_tir_create(struct mlx5e_rx_res *res, unsigned int rxq, + struct mlx5e_tir *tir) +{ + bool inner_ft_support = res->features & MLX5E_RX_RES_FEATURE_INNER_FT; + struct mlx5e_tir_builder *builder; + u32 rqtn; + int err; + + builder = mlx5e_tir_builder_alloc(false); + if (!builder) + return -ENOMEM; + + rqtn = mlx5e_rx_res_get_rqtn_direct(res, rxq); + + mlx5e_tir_builder_build_rqt(builder, res->mdev->mlx5e_res.hw_objs.td.tdn, rqtn, + inner_ft_support); + mlx5e_tir_builder_build_direct(builder); + mlx5e_tir_builder_build_tls(builder); + down_read(&res->pkt_merge_param_sem); + mlx5e_tir_builder_build_packet_merge(builder, &res->pkt_merge_param); + err = mlx5e_tir_init(tir, builder, res->mdev, false); + up_read(&res->pkt_merge_param_sem); + + mlx5e_tir_builder_free(builder); + + return err; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h b/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h new file mode 100644 index 000000000..5d5f64fab --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h @@ -0,0 +1,69 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2021, Mellanox Technologies inc. All rights reserved. */ + +#ifndef __MLX5_EN_RX_RES_H__ +#define __MLX5_EN_RX_RES_H__ + +#include <linux/kernel.h> +#include "rqt.h" +#include "tir.h" +#include "fs.h" +#include "rss.h" + +struct mlx5e_rx_res; + +struct mlx5e_channels; +struct mlx5e_rss_params_hash; + +enum mlx5e_rx_res_features { + MLX5E_RX_RES_FEATURE_INNER_FT = BIT(0), + MLX5E_RX_RES_FEATURE_PTP = BIT(1), +}; + +/* Setup */ +struct mlx5e_rx_res *mlx5e_rx_res_alloc(void); +int mlx5e_rx_res_init(struct mlx5e_rx_res *res, struct mlx5_core_dev *mdev, + enum mlx5e_rx_res_features features, unsigned int max_nch, + u32 drop_rqn, const struct mlx5e_packet_merge_param *init_pkt_merge_param, + unsigned int init_nch); +void mlx5e_rx_res_destroy(struct mlx5e_rx_res *res); +void mlx5e_rx_res_free(struct mlx5e_rx_res *res); + +/* TIRN getters for flow steering */ +u32 mlx5e_rx_res_get_tirn_direct(struct mlx5e_rx_res *res, unsigned int ix); +u32 mlx5e_rx_res_get_tirn_rss(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt); +u32 mlx5e_rx_res_get_tirn_rss_inner(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt); +u32 mlx5e_rx_res_get_tirn_ptp(struct mlx5e_rx_res *res); + +/* Activate/deactivate API */ +void mlx5e_rx_res_channels_activate(struct mlx5e_rx_res *res, struct mlx5e_channels *chs); +void mlx5e_rx_res_channels_deactivate(struct mlx5e_rx_res *res); +void mlx5e_rx_res_xsk_update(struct mlx5e_rx_res *res, struct mlx5e_channels *chs, + unsigned int ix, bool xsk); + +/* Configuration API */ +void mlx5e_rx_res_rss_set_indir_uniform(struct mlx5e_rx_res *res, unsigned int nch); +int mlx5e_rx_res_rss_get_rxfh(struct mlx5e_rx_res *res, u32 rss_idx, + u32 *indir, u8 *key, u8 *hfunc); +int mlx5e_rx_res_rss_set_rxfh(struct mlx5e_rx_res *res, u32 rss_idx, + const u32 *indir, const u8 *key, const u8 *hfunc); + +u8 mlx5e_rx_res_rss_get_hash_fields(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt); +int mlx5e_rx_res_rss_set_hash_fields(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt, + u8 rx_hash_fields); +int mlx5e_rx_res_packet_merge_set_param(struct mlx5e_rx_res *res, + struct mlx5e_packet_merge_param *pkt_merge_param); + +int mlx5e_rx_res_rss_init(struct mlx5e_rx_res *res, u32 *rss_idx, unsigned int init_nch); +int mlx5e_rx_res_rss_destroy(struct mlx5e_rx_res *res, u32 rss_idx); +int mlx5e_rx_res_rss_cnt(struct mlx5e_rx_res *res); +int mlx5e_rx_res_rss_index(struct mlx5e_rx_res *res, struct mlx5e_rss *rss); +struct mlx5e_rss *mlx5e_rx_res_rss_get(struct mlx5e_rx_res *res, u32 rss_idx); + +/* Workaround for hairpin */ +struct mlx5e_rss_params_hash mlx5e_rx_res_get_current_hash(struct mlx5e_rx_res *res); + +/* Accel TIRs */ +int mlx5e_rx_res_tls_tir_create(struct mlx5e_rx_res *res, unsigned int rxq, + struct mlx5e_tir *tir); +#endif /* __MLX5_EN_RX_RES_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/selq.c b/drivers/net/ethernet/mellanox/mlx5/core/en/selq.c new file mode 100644 index 000000000..f675b1926 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/selq.c @@ -0,0 +1,266 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#include "selq.h" +#include <linux/slab.h> +#include <linux/netdevice.h> +#include <linux/rcupdate.h> +#include "en.h" +#include "en/ptp.h" +#include "en/htb.h" + +struct mlx5e_selq_params { + unsigned int num_regular_queues; + unsigned int num_channels; + unsigned int num_tcs; + union { + u8 is_special_queues; + struct { + bool is_htb : 1; + bool is_ptp : 1; + }; + }; + u16 htb_maj_id; + u16 htb_defcls; +}; + +int mlx5e_selq_init(struct mlx5e_selq *selq, struct mutex *state_lock) +{ + struct mlx5e_selq_params *init_params; + + selq->state_lock = state_lock; + + selq->standby = kvzalloc(sizeof(*selq->standby), GFP_KERNEL); + if (!selq->standby) + return -ENOMEM; + + init_params = kvzalloc(sizeof(*selq->active), GFP_KERNEL); + if (!init_params) { + kvfree(selq->standby); + selq->standby = NULL; + return -ENOMEM; + } + /* Assign dummy values, so that mlx5e_select_queue won't crash. */ + *init_params = (struct mlx5e_selq_params) { + .num_regular_queues = 1, + .num_channels = 1, + .num_tcs = 1, + .is_htb = false, + .is_ptp = false, + .htb_maj_id = 0, + .htb_defcls = 0, + }; + rcu_assign_pointer(selq->active, init_params); + + return 0; +} + +void mlx5e_selq_cleanup(struct mlx5e_selq *selq) +{ + WARN_ON_ONCE(selq->is_prepared); + + kvfree(selq->standby); + selq->standby = NULL; + selq->is_prepared = true; + + mlx5e_selq_apply(selq); + + kvfree(selq->standby); + selq->standby = NULL; +} + +void mlx5e_selq_prepare_params(struct mlx5e_selq *selq, struct mlx5e_params *params) +{ + struct mlx5e_selq_params *selq_active; + + lockdep_assert_held(selq->state_lock); + WARN_ON_ONCE(selq->is_prepared); + + selq->is_prepared = true; + + selq_active = rcu_dereference_protected(selq->active, + lockdep_is_held(selq->state_lock)); + *selq->standby = *selq_active; + selq->standby->num_channels = params->num_channels; + selq->standby->num_tcs = mlx5e_get_dcb_num_tc(params); + selq->standby->num_regular_queues = + selq->standby->num_channels * selq->standby->num_tcs; + selq->standby->is_ptp = MLX5E_GET_PFLAG(params, MLX5E_PFLAG_TX_PORT_TS); +} + +bool mlx5e_selq_is_htb_enabled(struct mlx5e_selq *selq) +{ + struct mlx5e_selq_params *selq_active = + rcu_dereference_protected(selq->active, lockdep_is_held(selq->state_lock)); + + return selq_active->htb_maj_id; +} + +void mlx5e_selq_prepare_htb(struct mlx5e_selq *selq, u16 htb_maj_id, u16 htb_defcls) +{ + struct mlx5e_selq_params *selq_active; + + lockdep_assert_held(selq->state_lock); + WARN_ON_ONCE(selq->is_prepared); + + selq->is_prepared = true; + + selq_active = rcu_dereference_protected(selq->active, + lockdep_is_held(selq->state_lock)); + *selq->standby = *selq_active; + selq->standby->is_htb = htb_maj_id; + selq->standby->htb_maj_id = htb_maj_id; + selq->standby->htb_defcls = htb_defcls; +} + +void mlx5e_selq_apply(struct mlx5e_selq *selq) +{ + struct mlx5e_selq_params *old_params; + + WARN_ON_ONCE(!selq->is_prepared); + + selq->is_prepared = false; + + old_params = rcu_replace_pointer(selq->active, selq->standby, + lockdep_is_held(selq->state_lock)); + synchronize_net(); /* Wait until ndo_select_queue starts emitting correct values. */ + selq->standby = old_params; +} + +void mlx5e_selq_cancel(struct mlx5e_selq *selq) +{ + lockdep_assert_held(selq->state_lock); + WARN_ON_ONCE(!selq->is_prepared); + + selq->is_prepared = false; +} + +#ifdef CONFIG_MLX5_CORE_EN_DCB +static int mlx5e_get_dscp_up(struct mlx5e_priv *priv, struct sk_buff *skb) +{ + int dscp_cp = 0; + + if (skb->protocol == htons(ETH_P_IP)) + dscp_cp = ipv4_get_dsfield(ip_hdr(skb)) >> 2; + else if (skb->protocol == htons(ETH_P_IPV6)) + dscp_cp = ipv6_get_dsfield(ipv6_hdr(skb)) >> 2; + + return priv->dcbx_dp.dscp2prio[dscp_cp]; +} +#endif + +static int mlx5e_get_up(struct mlx5e_priv *priv, struct sk_buff *skb) +{ +#ifdef CONFIG_MLX5_CORE_EN_DCB + if (READ_ONCE(priv->dcbx_dp.trust_state) == MLX5_QPTS_TRUST_DSCP) + return mlx5e_get_dscp_up(priv, skb); +#endif + if (skb_vlan_tag_present(skb)) + return skb_vlan_tag_get_prio(skb); + return 0; +} + +static u16 mlx5e_select_ptpsq(struct net_device *dev, struct sk_buff *skb, + struct mlx5e_selq_params *selq) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + int up; + + up = selq->num_tcs > 1 ? mlx5e_get_up(priv, skb) : 0; + + return selq->num_regular_queues + up; +} + +static int mlx5e_select_htb_queue(struct mlx5e_priv *priv, struct sk_buff *skb, + struct mlx5e_selq_params *selq) +{ + u16 classid; + + /* Order maj_id before defcls - pairs with mlx5e_htb_root_add. */ + if ((TC_H_MAJ(skb->priority) >> 16) == selq->htb_maj_id) + classid = TC_H_MIN(skb->priority); + else + classid = selq->htb_defcls; + + if (!classid) + return 0; + + return mlx5e_htb_get_txq_by_classid(priv->htb, classid); +} + +u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb, + struct net_device *sb_dev) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5e_selq_params *selq; + int txq_ix, up; + + selq = rcu_dereference_bh(priv->selq.active); + + /* This is a workaround needed only for the mlx5e_netdev_change_profile + * flow that zeroes out the whole priv without unregistering the netdev + * and without preventing ndo_select_queue from being called. + */ + if (unlikely(!selq)) + return 0; + + if (likely(!selq->is_special_queues)) { + /* No special queues, netdev_pick_tx returns one of the regular ones. */ + + txq_ix = netdev_pick_tx(dev, skb, NULL); + + if (selq->num_tcs <= 1) + return txq_ix; + + up = mlx5e_get_up(priv, skb); + + /* Normalize any picked txq_ix to [0, num_channels), + * So we can return a txq_ix that matches the channel and + * packet UP. + */ + return mlx5e_txq_to_ch_ix(txq_ix, selq->num_channels) + + up * selq->num_channels; + } + + if (unlikely(selq->htb_maj_id)) { + /* num_tcs == 1, shortcut for PTP */ + + txq_ix = mlx5e_select_htb_queue(priv, skb, selq); + if (txq_ix > 0) + return txq_ix; + + if (unlikely(selq->is_ptp && mlx5e_use_ptpsq(skb))) + return selq->num_channels; + + txq_ix = netdev_pick_tx(dev, skb, NULL); + + /* Fix netdev_pick_tx() not to choose ptp_channel and HTB txqs. + * If they are selected, switch to regular queues. + * Driver to select these queues only at mlx5e_select_ptpsq() + * and mlx5e_select_htb_queue(). + */ + return mlx5e_txq_to_ch_ix_htb(txq_ix, selq->num_channels); + } + + /* PTP is enabled */ + + if (mlx5e_use_ptpsq(skb)) + return mlx5e_select_ptpsq(dev, skb, selq); + + txq_ix = netdev_pick_tx(dev, skb, NULL); + + /* Normalize any picked txq_ix to [0, num_channels). Queues in range + * [0, num_regular_queues) will be mapped to the corresponding channel + * index, so that we can apply the packet's UP (if num_tcs > 1). + * If netdev_pick_tx() picks ptp_channel, switch to a regular queue, + * because driver should select the PTP only at mlx5e_select_ptpsq(). + */ + txq_ix = mlx5e_txq_to_ch_ix(txq_ix, selq->num_channels); + + if (selq->num_tcs <= 1) + return txq_ix; + + up = mlx5e_get_up(priv, skb); + + return txq_ix + up * selq->num_channels; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/selq.h b/drivers/net/ethernet/mellanox/mlx5/core/en/selq.h new file mode 100644 index 000000000..fd590f80e --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/selq.h @@ -0,0 +1,53 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#ifndef __MLX5_EN_SELQ_H__ +#define __MLX5_EN_SELQ_H__ + +#include <linux/kernel.h> + +struct mlx5e_selq_params; + +struct mlx5e_selq { + struct mlx5e_selq_params __rcu *active; + struct mlx5e_selq_params *standby; + struct mutex *state_lock; /* points to priv->state_lock */ + bool is_prepared; +}; + +struct mlx5e_params; +struct net_device; +struct sk_buff; + +int mlx5e_selq_init(struct mlx5e_selq *selq, struct mutex *state_lock); +void mlx5e_selq_cleanup(struct mlx5e_selq *selq); +void mlx5e_selq_prepare_params(struct mlx5e_selq *selq, struct mlx5e_params *params); +void mlx5e_selq_prepare_htb(struct mlx5e_selq *selq, u16 htb_maj_id, u16 htb_defcls); +bool mlx5e_selq_is_htb_enabled(struct mlx5e_selq *selq); +void mlx5e_selq_apply(struct mlx5e_selq *selq); +void mlx5e_selq_cancel(struct mlx5e_selq *selq); + +static inline u16 mlx5e_txq_to_ch_ix(u16 txq, u16 num_channels) +{ + while (unlikely(txq >= num_channels)) + txq -= num_channels; + return txq; +} + +static inline u16 mlx5e_txq_to_ch_ix_htb(u16 txq, u16 num_channels) +{ + if (unlikely(txq >= num_channels)) { + if (unlikely(txq >= num_channels << 3)) + txq %= num_channels; + else + do + txq -= num_channels; + while (txq >= num_channels); + } + return txq; +} + +u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb, + struct net_device *sb_dev); + +#endif /* __MLX5_EN_SELQ_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/accept.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/accept.c new file mode 100644 index 000000000..21aab9635 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/accept.c @@ -0,0 +1,31 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + +#include "act.h" +#include "en/tc_priv.h" + +static bool +tc_act_can_offload_accept(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + int act_index, + struct mlx5_flow_attr *attr) +{ + return true; +} + +static int +tc_act_parse_accept(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + struct mlx5e_priv *priv, + struct mlx5_flow_attr *attr) +{ + attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + attr->flags |= MLX5_ATTR_FLAG_ACCEPT; + + return 0; +} + +struct mlx5e_tc_act mlx5e_tc_act_accept = { + .can_offload = tc_act_can_offload_accept, + .parse_action = tc_act_parse_accept, +}; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.c new file mode 100644 index 000000000..3337241cf --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.c @@ -0,0 +1,154 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + +#include "act.h" +#include "en/tc/post_act.h" +#include "en/tc_priv.h" +#include "mlx5_core.h" + +static struct mlx5e_tc_act *tc_acts_fdb[NUM_FLOW_ACTIONS] = { + [FLOW_ACTION_ACCEPT] = &mlx5e_tc_act_accept, + [FLOW_ACTION_DROP] = &mlx5e_tc_act_drop, + [FLOW_ACTION_TRAP] = &mlx5e_tc_act_trap, + [FLOW_ACTION_GOTO] = &mlx5e_tc_act_goto, + [FLOW_ACTION_REDIRECT] = &mlx5e_tc_act_mirred, + [FLOW_ACTION_MIRRED] = &mlx5e_tc_act_mirred, + [FLOW_ACTION_REDIRECT_INGRESS] = &mlx5e_tc_act_redirect_ingress, + [FLOW_ACTION_VLAN_PUSH] = &mlx5e_tc_act_vlan, + [FLOW_ACTION_VLAN_POP] = &mlx5e_tc_act_vlan, + [FLOW_ACTION_VLAN_MANGLE] = &mlx5e_tc_act_vlan_mangle, + [FLOW_ACTION_TUNNEL_ENCAP] = &mlx5e_tc_act_tun_encap, + [FLOW_ACTION_TUNNEL_DECAP] = &mlx5e_tc_act_tun_decap, + [FLOW_ACTION_MANGLE] = &mlx5e_tc_act_pedit, + [FLOW_ACTION_ADD] = &mlx5e_tc_act_pedit, + [FLOW_ACTION_CSUM] = &mlx5e_tc_act_csum, + [FLOW_ACTION_PTYPE] = &mlx5e_tc_act_ptype, + [FLOW_ACTION_SAMPLE] = &mlx5e_tc_act_sample, + [FLOW_ACTION_POLICE] = &mlx5e_tc_act_police, + [FLOW_ACTION_CT] = &mlx5e_tc_act_ct, + [FLOW_ACTION_MPLS_PUSH] = &mlx5e_tc_act_mpls_push, + [FLOW_ACTION_MPLS_POP] = &mlx5e_tc_act_mpls_pop, + [FLOW_ACTION_VLAN_PUSH_ETH] = &mlx5e_tc_act_vlan, + [FLOW_ACTION_VLAN_POP_ETH] = &mlx5e_tc_act_vlan, +}; + +static struct mlx5e_tc_act *tc_acts_nic[NUM_FLOW_ACTIONS] = { + [FLOW_ACTION_ACCEPT] = &mlx5e_tc_act_accept, + [FLOW_ACTION_DROP] = &mlx5e_tc_act_drop, + [FLOW_ACTION_GOTO] = &mlx5e_tc_act_goto, + [FLOW_ACTION_REDIRECT] = &mlx5e_tc_act_mirred_nic, + [FLOW_ACTION_MANGLE] = &mlx5e_tc_act_pedit, + [FLOW_ACTION_ADD] = &mlx5e_tc_act_pedit, + [FLOW_ACTION_CSUM] = &mlx5e_tc_act_csum, + [FLOW_ACTION_MARK] = &mlx5e_tc_act_mark, + [FLOW_ACTION_CT] = &mlx5e_tc_act_ct, +}; + +/** + * mlx5e_tc_act_get() - Get an action parser for an action id. + * @act_id: Flow action id. + * @ns_type: flow namespace type. + */ +struct mlx5e_tc_act * +mlx5e_tc_act_get(enum flow_action_id act_id, + enum mlx5_flow_namespace_type ns_type) +{ + struct mlx5e_tc_act **tc_acts; + + tc_acts = ns_type == MLX5_FLOW_NAMESPACE_FDB ? tc_acts_fdb : tc_acts_nic; + + return tc_acts[act_id]; +} + +/** + * mlx5e_tc_act_init_parse_state() - Init a new parse_state. + * @parse_state: Parsing state. + * @flow: mlx5e tc flow being handled. + * @flow_action: flow action to parse. + * @extack: to set an error msg. + * + * The same parse_state should be passed to action parsers + * for tracking the current parsing state. + */ +void +mlx5e_tc_act_init_parse_state(struct mlx5e_tc_act_parse_state *parse_state, + struct mlx5e_tc_flow *flow, + struct flow_action *flow_action, + struct netlink_ext_ack *extack) +{ + memset(parse_state, 0, sizeof(*parse_state)); + parse_state->flow = flow; + parse_state->extack = extack; + parse_state->flow_action = flow_action; +} + +void +mlx5e_tc_act_reorder_flow_actions(struct flow_action *flow_action, + struct mlx5e_tc_flow_action *flow_action_reorder) +{ + struct flow_action_entry *act; + int i, j = 0; + + flow_action_for_each(i, act, flow_action) { + /* Add CT action to be first. */ + if (act->id == FLOW_ACTION_CT) + flow_action_reorder->entries[j++] = act; + } + + flow_action_for_each(i, act, flow_action) { + if (act->id == FLOW_ACTION_CT) + continue; + flow_action_reorder->entries[j++] = act; + } +} + +int +mlx5e_tc_act_post_parse(struct mlx5e_tc_act_parse_state *parse_state, + struct flow_action *flow_action, + struct mlx5_flow_attr *attr, + enum mlx5_flow_namespace_type ns_type) +{ + struct flow_action_entry *act; + struct mlx5e_tc_act *tc_act; + struct mlx5e_priv *priv; + int err = 0, i; + + priv = parse_state->flow->priv; + + flow_action_for_each(i, act, flow_action) { + tc_act = mlx5e_tc_act_get(act->id, ns_type); + if (!tc_act || !tc_act->post_parse) + continue; + + err = tc_act->post_parse(parse_state, priv, attr); + if (err) + goto out; + } + +out: + return err; +} + +int +mlx5e_tc_act_set_next_post_act(struct mlx5e_tc_flow *flow, + struct mlx5_flow_attr *attr, + struct mlx5_flow_attr *next_attr) +{ + struct mlx5_core_dev *mdev = flow->priv->mdev; + struct mlx5e_tc_mod_hdr_acts *mod_acts; + int err; + + mod_acts = &attr->parse_attr->mod_hdr_acts; + + /* Set handle on current post act rule to next post act rule. */ + err = mlx5e_tc_post_act_set_handle(mdev, next_attr->post_act_handle, mod_acts); + if (err) { + mlx5_core_warn(mdev, "Failed setting post action handle"); + return err; + } + + attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | + MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; + + return 0; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.h new file mode 100644 index 000000000..e1570ff05 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.h @@ -0,0 +1,116 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#ifndef __MLX5_EN_TC_ACT_H__ +#define __MLX5_EN_TC_ACT_H__ + +#include <net/tc_act/tc_pedit.h> +#include <net/flow_offload.h> +#include <linux/netlink.h> +#include "eswitch.h" +#include "pedit.h" + +struct mlx5_flow_attr; + +struct mlx5e_tc_act_parse_state { + struct flow_action *flow_action; + struct mlx5e_tc_flow *flow; + struct netlink_ext_ack *extack; + u32 actions; + bool ct; + bool ct_clear; + bool encap; + bool decap; + bool mpls_push; + bool eth_push; + bool eth_pop; + bool ptype_host; + const struct ip_tunnel_info *tun_info; + struct mlx5e_mpls_info mpls_info; + int ifindexes[MLX5_MAX_FLOW_FWD_VPORTS]; + int if_count; + struct mlx5_tc_ct_priv *ct_priv; +}; + +struct mlx5e_tc_act { + bool (*can_offload)(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + int act_index, + struct mlx5_flow_attr *attr); + + int (*parse_action)(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + struct mlx5e_priv *priv, + struct mlx5_flow_attr *attr); + + int (*post_parse)(struct mlx5e_tc_act_parse_state *parse_state, + struct mlx5e_priv *priv, + struct mlx5_flow_attr *attr); + + bool (*is_multi_table_act)(struct mlx5e_priv *priv, + const struct flow_action_entry *act, + struct mlx5_flow_attr *attr); + + int (*offload_action)(struct mlx5e_priv *priv, + struct flow_offload_action *fl_act, + struct flow_action_entry *act); + + int (*destroy_action)(struct mlx5e_priv *priv, + struct flow_offload_action *fl_act); + + int (*stats_action)(struct mlx5e_priv *priv, + struct flow_offload_action *fl_act); +}; + +struct mlx5e_tc_flow_action { + unsigned int num_entries; + struct flow_action_entry **entries; +}; + +extern struct mlx5e_tc_act mlx5e_tc_act_drop; +extern struct mlx5e_tc_act mlx5e_tc_act_trap; +extern struct mlx5e_tc_act mlx5e_tc_act_accept; +extern struct mlx5e_tc_act mlx5e_tc_act_mark; +extern struct mlx5e_tc_act mlx5e_tc_act_goto; +extern struct mlx5e_tc_act mlx5e_tc_act_tun_encap; +extern struct mlx5e_tc_act mlx5e_tc_act_tun_decap; +extern struct mlx5e_tc_act mlx5e_tc_act_csum; +extern struct mlx5e_tc_act mlx5e_tc_act_pedit; +extern struct mlx5e_tc_act mlx5e_tc_act_vlan; +extern struct mlx5e_tc_act mlx5e_tc_act_vlan_mangle; +extern struct mlx5e_tc_act mlx5e_tc_act_mpls_push; +extern struct mlx5e_tc_act mlx5e_tc_act_mpls_pop; +extern struct mlx5e_tc_act mlx5e_tc_act_mirred; +extern struct mlx5e_tc_act mlx5e_tc_act_mirred_nic; +extern struct mlx5e_tc_act mlx5e_tc_act_ct; +extern struct mlx5e_tc_act mlx5e_tc_act_sample; +extern struct mlx5e_tc_act mlx5e_tc_act_ptype; +extern struct mlx5e_tc_act mlx5e_tc_act_redirect_ingress; +extern struct mlx5e_tc_act mlx5e_tc_act_police; + +struct mlx5e_tc_act * +mlx5e_tc_act_get(enum flow_action_id act_id, + enum mlx5_flow_namespace_type ns_type); + +void +mlx5e_tc_act_init_parse_state(struct mlx5e_tc_act_parse_state *parse_state, + struct mlx5e_tc_flow *flow, + struct flow_action *flow_action, + struct netlink_ext_ack *extack); + +void +mlx5e_tc_act_reorder_flow_actions(struct flow_action *flow_action, + struct mlx5e_tc_flow_action *flow_action_reorder); + +int +mlx5e_tc_act_post_parse(struct mlx5e_tc_act_parse_state *parse_state, + struct flow_action *flow_action, + struct mlx5_flow_attr *attr, + enum mlx5_flow_namespace_type ns_type); + +int +mlx5e_tc_act_set_next_post_act(struct mlx5e_tc_flow *flow, + struct mlx5_flow_attr *attr, + struct mlx5_flow_attr *next_attr); + +#endif /* __MLX5_EN_TC_ACT_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/csum.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/csum.c new file mode 100644 index 000000000..c0f08ae6a --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/csum.c @@ -0,0 +1,62 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + +#include <linux/tc_act/tc_csum.h> +#include "act.h" +#include "en/tc_priv.h" + +static bool +csum_offload_supported(struct mlx5e_priv *priv, + u32 action, + u32 update_flags, + struct netlink_ext_ack *extack) +{ + u32 prot_flags = TCA_CSUM_UPDATE_FLAG_IPV4HDR | TCA_CSUM_UPDATE_FLAG_TCP | + TCA_CSUM_UPDATE_FLAG_UDP; + + /* The HW recalcs checksums only if re-writing headers */ + if (!(action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)) { + NL_SET_ERR_MSG_MOD(extack, + "TC csum action is only offloaded with pedit"); + netdev_warn(priv->netdev, + "TC csum action is only offloaded with pedit\n"); + return false; + } + + if (update_flags & ~prot_flags) { + NL_SET_ERR_MSG_MOD(extack, + "can't offload TC csum action for some header/s"); + netdev_warn(priv->netdev, + "can't offload TC csum action for some header/s - flags %#x\n", + update_flags); + return false; + } + + return true; +} + +static bool +tc_act_can_offload_csum(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + int act_index, + struct mlx5_flow_attr *attr) +{ + struct mlx5e_tc_flow *flow = parse_state->flow; + + return csum_offload_supported(flow->priv, attr->action, + act->csum_flags, parse_state->extack); +} + +static int +tc_act_parse_csum(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + struct mlx5e_priv *priv, + struct mlx5_flow_attr *attr) +{ + return 0; +} + +struct mlx5e_tc_act mlx5e_tc_act_csum = { + .can_offload = tc_act_can_offload_csum, + .parse_action = tc_act_parse_csum, +}; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ct.c new file mode 100644 index 000000000..a829c9428 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ct.c @@ -0,0 +1,104 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + +#include "act.h" +#include "en/tc_priv.h" +#include "en/tc_ct.h" + +static bool +tc_act_can_offload_ct(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + int act_index, + struct mlx5_flow_attr *attr) +{ + bool clear_action = act->ct.action & TCA_CT_ACT_CLEAR; + struct netlink_ext_ack *extack = parse_state->extack; + + if (parse_state->ct && !clear_action) { + NL_SET_ERR_MSG_MOD(extack, "Multiple CT actions are not supported"); + return false; + } + + return true; +} + +static int +tc_act_parse_ct(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + struct mlx5e_priv *priv, + struct mlx5_flow_attr *attr) +{ + bool clear_action = act->ct.action & TCA_CT_ACT_CLEAR; + int err; + + /* It's redundant to do ct clear more than once. */ + if (clear_action && parse_state->ct_clear) + return 0; + + err = mlx5_tc_ct_parse_action(parse_state->ct_priv, attr, + &attr->parse_attr->mod_hdr_acts, + act, parse_state->extack); + if (err) + return err; + + + if (mlx5e_is_eswitch_flow(parse_state->flow)) + attr->esw_attr->split_count = attr->esw_attr->out_count; + + if (clear_action) { + parse_state->ct_clear = true; + } else { + attr->flags |= MLX5_ATTR_FLAG_CT; + flow_flag_set(parse_state->flow, CT); + parse_state->ct = true; + } + + return 0; +} + +static int +tc_act_post_parse_ct(struct mlx5e_tc_act_parse_state *parse_state, + struct mlx5e_priv *priv, + struct mlx5_flow_attr *attr) +{ + struct mlx5e_tc_mod_hdr_acts *mod_acts = &attr->parse_attr->mod_hdr_acts; + int err; + + /* If ct action exist, we can ignore previous ct_clear actions */ + if (parse_state->ct) + return 0; + + if (parse_state->ct_clear) { + err = mlx5_tc_ct_set_ct_clear_regs(parse_state->ct_priv, mod_acts); + if (err) { + NL_SET_ERR_MSG_MOD(parse_state->extack, + "Failed to set registers for ct clear"); + return err; + } + attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; + + /* Prevent handling of additional, redundant clear actions */ + parse_state->ct_clear = false; + } + + return 0; +} + +static bool +tc_act_is_multi_table_act_ct(struct mlx5e_priv *priv, + const struct flow_action_entry *act, + struct mlx5_flow_attr *attr) +{ + if (act->ct.action & TCA_CT_ACT_CLEAR) + return false; + + return true; +} + +struct mlx5e_tc_act mlx5e_tc_act_ct = { + .can_offload = tc_act_can_offload_ct, + .parse_action = tc_act_parse_ct, + .is_multi_table_act = tc_act_is_multi_table_act_ct, + .post_parse = tc_act_post_parse_ct, +}; + diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/drop.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/drop.c new file mode 100644 index 000000000..dd025a95c --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/drop.c @@ -0,0 +1,30 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + +#include "act.h" +#include "en/tc_priv.h" + +static bool +tc_act_can_offload_drop(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + int act_index, + struct mlx5_flow_attr *attr) +{ + return true; +} + +static int +tc_act_parse_drop(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + struct mlx5e_priv *priv, + struct mlx5_flow_attr *attr) +{ + attr->action |= MLX5_FLOW_CONTEXT_ACTION_DROP; + + return 0; +} + +struct mlx5e_tc_act mlx5e_tc_act_drop = { + .can_offload = tc_act_can_offload_drop, + .parse_action = tc_act_parse_drop, +}; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/goto.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/goto.c new file mode 100644 index 000000000..25174f686 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/goto.c @@ -0,0 +1,124 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + +#include "act.h" +#include "en/tc_priv.h" +#include "eswitch.h" + +static int +validate_goto_chain(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, + struct mlx5_flow_attr *attr, + const struct flow_action_entry *act, + struct netlink_ext_ack *extack) +{ + struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs); + bool is_esw = mlx5e_is_eswitch_flow(flow); + bool ft_flow = mlx5e_is_ft_flow(flow); + u32 dest_chain = act->chain_index; + struct mlx5_fs_chains *chains; + struct mlx5_eswitch *esw; + u32 reformat_and_fwd; + u32 max_chain; + + esw = priv->mdev->priv.eswitch; + chains = is_esw ? esw_chains(esw) : mlx5e_nic_chains(tc); + max_chain = mlx5_chains_get_chain_range(chains); + reformat_and_fwd = is_esw ? + MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev, reformat_and_fwd_to_table) : + MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, reformat_and_fwd_to_table); + + if (ft_flow) { + NL_SET_ERR_MSG_MOD(extack, "Goto action is not supported"); + return -EOPNOTSUPP; + } + + if (!mlx5_chains_backwards_supported(chains) && + dest_chain <= attr->chain) { + NL_SET_ERR_MSG_MOD(extack, "Goto lower numbered chain isn't supported"); + return -EOPNOTSUPP; + } + + if (dest_chain > max_chain) { + NL_SET_ERR_MSG_MOD(extack, + "Requested destination chain is out of supported range"); + return -EOPNOTSUPP; + } + + if (attr->action & (MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT | + MLX5_FLOW_CONTEXT_ACTION_DECAP) && + !reformat_and_fwd) { + NL_SET_ERR_MSG_MOD(extack, + "Goto chain is not allowed if action has reformat or decap"); + return -EOPNOTSUPP; + } + + return 0; +} + +static bool +tc_act_can_offload_goto(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + int act_index, + struct mlx5_flow_attr *attr) +{ + struct netlink_ext_ack *extack = parse_state->extack; + struct mlx5e_tc_flow *flow = parse_state->flow; + + if (validate_goto_chain(flow->priv, flow, attr, act, extack)) + return false; + + return true; +} + +static int +tc_act_parse_goto(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + struct mlx5e_priv *priv, + struct mlx5_flow_attr *attr) +{ + attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + attr->dest_chain = act->chain_index; + + return 0; +} + +static int +tc_act_post_parse_goto(struct mlx5e_tc_act_parse_state *parse_state, + struct mlx5e_priv *priv, + struct mlx5_flow_attr *attr) +{ + struct mlx5e_tc_flow_parse_attr *parse_attr = attr->parse_attr; + struct netlink_ext_ack *extack = parse_state->extack; + struct mlx5e_tc_flow *flow = parse_state->flow; + + if (!attr->dest_chain) + return 0; + + if (parse_state->decap) { + /* It can be supported if we'll create a mapping for + * the tunnel device only (without tunnel), and set + * this tunnel id with this decap flow. + * + * On restore (miss), we'll just set this saved tunnel + * device. + */ + + NL_SET_ERR_MSG_MOD(extack, "Decap with goto isn't supported"); + netdev_warn(priv->netdev, "Decap with goto isn't supported"); + return -EOPNOTSUPP; + } + + if (!mlx5e_is_eswitch_flow(flow) && parse_attr->mirred_ifindex[0]) { + NL_SET_ERR_MSG_MOD(extack, "Mirroring goto chain rules isn't supported"); + return -EOPNOTSUPP; + } + + return 0; +} + +struct mlx5e_tc_act mlx5e_tc_act_goto = { + .can_offload = tc_act_can_offload_goto, + .parse_action = tc_act_parse_goto, + .post_parse = tc_act_post_parse_goto, +}; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mark.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mark.c new file mode 100644 index 000000000..e8d227595 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mark.c @@ -0,0 +1,36 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + +#include "act.h" +#include "en_tc.h" + +static bool +tc_act_can_offload_mark(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + int act_index, + struct mlx5_flow_attr *attr) +{ + if (act->mark & ~MLX5E_TC_FLOW_ID_MASK) { + NL_SET_ERR_MSG_MOD(parse_state->extack, "Bad flow mark, only 16 bit supported"); + return false; + } + + return true; +} + +static int +tc_act_parse_mark(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + struct mlx5e_priv *priv, + struct mlx5_flow_attr *attr) +{ + attr->nic_attr->flow_tag = act->mark; + attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + + return 0; +} + +struct mlx5e_tc_act mlx5e_tc_act_mark = { + .can_offload = tc_act_can_offload_mark, + .parse_action = tc_act_parse_mark, +}; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c new file mode 100644 index 000000000..4ac7de3f6 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c @@ -0,0 +1,337 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + +#include <linux/if_macvlan.h> +#include <linux/if_vlan.h> +#include <net/bareudp.h> +#include <net/bonding.h> +#include "act.h" +#include "vlan.h" +#include "en/tc_tun_encap.h" +#include "en/tc_priv.h" +#include "en_rep.h" +#include "lag/lag.h" + +static bool +same_vf_reps(struct mlx5e_priv *priv, struct net_device *out_dev) +{ + return mlx5e_eswitch_vf_rep(priv->netdev) && + priv->netdev == out_dev; +} + +static int +verify_uplink_forwarding(struct mlx5e_priv *priv, + struct mlx5_flow_attr *attr, + struct net_device *out_dev, + struct netlink_ext_ack *extack) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5e_rep_priv *rep_priv; + + /* Forwarding non encapsulated traffic between + * uplink ports is allowed only if + * termination_table_raw_traffic cap is set. + * + * Input vport was stored attr->in_rep. + * In LAG case, *priv* is the private data of + * uplink which may be not the input vport. + */ + rep_priv = mlx5e_rep_to_rep_priv(attr->esw_attr->in_rep); + + if (!(mlx5e_eswitch_uplink_rep(rep_priv->netdev) && + mlx5e_eswitch_uplink_rep(out_dev))) + return 0; + + if (!MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, + termination_table_raw_traffic)) { + NL_SET_ERR_MSG_MOD(extack, + "devices are both uplink, can't offload forwarding"); + return -EOPNOTSUPP; + } else if (out_dev != rep_priv->netdev) { + NL_SET_ERR_MSG_MOD(extack, + "devices are not the same uplink, can't offload forwarding"); + return -EOPNOTSUPP; + } + return 0; +} + +static bool +is_duplicated_output_device(struct net_device *dev, + struct net_device *out_dev, + int *ifindexes, int if_count, + struct netlink_ext_ack *extack) +{ + int i; + + for (i = 0; i < if_count; i++) { + if (ifindexes[i] == out_dev->ifindex) { + NL_SET_ERR_MSG_MOD(extack, "can't duplicate output to same device"); + netdev_err(dev, "can't duplicate output to same device: %s\n", + out_dev->name); + return true; + } + } + + return false; +} + +static struct net_device * +get_fdb_out_dev(struct net_device *uplink_dev, struct net_device *out_dev) +{ + struct net_device *fdb_out_dev = out_dev; + struct net_device *uplink_upper; + + rcu_read_lock(); + uplink_upper = netdev_master_upper_dev_get_rcu(uplink_dev); + if (uplink_upper && netif_is_lag_master(uplink_upper) && + uplink_upper == out_dev) { + fdb_out_dev = uplink_dev; + } else if (netif_is_lag_master(out_dev)) { + fdb_out_dev = bond_option_active_slave_get_rcu(netdev_priv(out_dev)); + if (fdb_out_dev && + (!mlx5e_eswitch_rep(fdb_out_dev) || + !netdev_port_same_parent_id(fdb_out_dev, uplink_dev))) + fdb_out_dev = NULL; + } + rcu_read_unlock(); + return fdb_out_dev; +} + +static bool +tc_act_can_offload_mirred(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + int act_index, + struct mlx5_flow_attr *attr) +{ + struct netlink_ext_ack *extack = parse_state->extack; + struct mlx5e_tc_flow *flow = parse_state->flow; + struct mlx5e_tc_flow_parse_attr *parse_attr; + struct net_device *out_dev = act->dev; + struct mlx5e_priv *priv = flow->priv; + struct mlx5_esw_flow_attr *esw_attr; + + parse_attr = attr->parse_attr; + esw_attr = attr->esw_attr; + + if (!out_dev) { + /* out_dev is NULL when filters with + * non-existing mirred device are replayed to + * the driver. + */ + return false; + } + + if (parse_state->mpls_push && !netif_is_bareudp(out_dev)) { + NL_SET_ERR_MSG_MOD(extack, "mpls is supported only through a bareudp device"); + return false; + } + + if (parse_state->eth_pop && !parse_state->mpls_push) { + NL_SET_ERR_MSG_MOD(extack, "vlan pop eth is supported only with mpls push"); + return false; + } + + if (flow_flag_test(parse_state->flow, L3_TO_L2_DECAP) && !parse_state->eth_push) { + NL_SET_ERR_MSG_MOD(extack, "mpls pop is only supported with vlan eth push"); + return false; + } + + if (mlx5e_is_ft_flow(flow) && out_dev == priv->netdev) { + /* Ignore forward to self rules generated + * by adding both mlx5 devs to the flow table + * block on a normal nft offload setup. + */ + return false; + } + + if (esw_attr->out_count >= MLX5_MAX_FLOW_FWD_VPORTS) { + NL_SET_ERR_MSG_MOD(extack, + "can't support more output ports, can't offload forwarding"); + netdev_warn(priv->netdev, + "can't support more than %d output ports, can't offload forwarding\n", + esw_attr->out_count); + return false; + } + + if (parse_state->encap || + netdev_port_same_parent_id(priv->netdev, out_dev) || + netif_is_ovs_master(out_dev)) + return true; + + if (parse_attr->filter_dev != priv->netdev) { + /* All mlx5 devices are called to configure + * high level device filters. Therefore, the + * *attempt* to install a filter on invalid + * eswitch should not trigger an explicit error + */ + return false; + } + + NL_SET_ERR_MSG_MOD(extack, "devices are not on same switch HW, can't offload forwarding"); + + return false; +} + +static int +parse_mirred_encap(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + struct mlx5_flow_attr *attr) +{ + struct mlx5e_tc_flow_parse_attr *parse_attr = attr->parse_attr; + struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; + struct net_device *out_dev = act->dev; + + parse_attr->mirred_ifindex[esw_attr->out_count] = out_dev->ifindex; + parse_attr->tun_info[esw_attr->out_count] = + mlx5e_dup_tun_info(parse_state->tun_info); + + if (!parse_attr->tun_info[esw_attr->out_count]) + return -ENOMEM; + + parse_state->encap = false; + + if (parse_state->mpls_push) { + memcpy(&parse_attr->mpls_info[esw_attr->out_count], + &parse_state->mpls_info, sizeof(parse_state->mpls_info)); + parse_state->mpls_push = false; + } + esw_attr->dests[esw_attr->out_count].flags |= MLX5_ESW_DEST_ENCAP; + esw_attr->out_count++; + /* attr->dests[].rep is resolved when we handle encap */ + + return 0; +} + +static int +parse_mirred(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + struct mlx5e_priv *priv, + struct mlx5_flow_attr *attr) +{ + struct mlx5e_tc_flow_parse_attr *parse_attr = attr->parse_attr; + struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; + struct netlink_ext_ack *extack = parse_state->extack; + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct net_device *out_dev = act->dev; + struct net_device *uplink_dev; + struct mlx5e_priv *out_priv; + struct mlx5_eswitch *esw; + bool is_uplink_rep; + int *ifindexes; + int if_count; + int err; + + esw = priv->mdev->priv.eswitch; + uplink_dev = mlx5_eswitch_uplink_get_proto_dev(esw, REP_ETH); + ifindexes = parse_state->ifindexes; + if_count = parse_state->if_count; + + if (is_duplicated_output_device(priv->netdev, out_dev, ifindexes, if_count, extack)) + return -EOPNOTSUPP; + + parse_state->ifindexes[if_count] = out_dev->ifindex; + parse_state->if_count++; + is_uplink_rep = mlx5e_eswitch_uplink_rep(out_dev); + err = mlx5_lag_do_mirred(priv->mdev, out_dev); + if (err) + return err; + + out_dev = get_fdb_out_dev(uplink_dev, out_dev); + if (!out_dev) + return -ENODEV; + + if (is_vlan_dev(out_dev)) { + err = mlx5e_tc_act_vlan_add_push_action(priv, attr, &out_dev, extack); + if (err) + return err; + } + + if (is_vlan_dev(parse_attr->filter_dev)) { + err = mlx5e_tc_act_vlan_add_pop_action(priv, attr, extack); + if (err) + return err; + } + + if (netif_is_macvlan(out_dev)) + out_dev = macvlan_dev_real_dev(out_dev); + + err = verify_uplink_forwarding(priv, attr, out_dev, extack); + if (err) + return err; + + if (!mlx5e_is_valid_eswitch_fwd_dev(priv, out_dev)) { + NL_SET_ERR_MSG_MOD(extack, + "devices are not on same switch HW, can't offload forwarding"); + return -EOPNOTSUPP; + } + + if (same_vf_reps(priv, out_dev)) { + NL_SET_ERR_MSG_MOD(extack, "can't forward from a VF to itself"); + return -EOPNOTSUPP; + } + + out_priv = netdev_priv(out_dev); + rpriv = out_priv->ppriv; + esw_attr->dests[esw_attr->out_count].rep = rpriv->rep; + esw_attr->dests[esw_attr->out_count].mdev = out_priv->mdev; + + /* If output device is bond master then rules are not explicit + * so we don't attempt to count them. + */ + if (is_uplink_rep && MLX5_CAP_PORT_SELECTION(priv->mdev, port_select_flow_table) && + MLX5_CAP_GEN(priv->mdev, create_lag_when_not_master_up)) + attr->lag.count = true; + + esw_attr->out_count++; + + return 0; +} + +static int +parse_mirred_ovs_master(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + struct mlx5e_priv *priv, + struct mlx5_flow_attr *attr) +{ + struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; + struct net_device *out_dev = act->dev; + int err; + + err = mlx5e_set_fwd_to_int_port_actions(priv, attr, out_dev->ifindex, + MLX5E_TC_INT_PORT_EGRESS, + &attr->action, esw_attr->out_count); + if (err) + return err; + + esw_attr->out_count++; + return 0; +} + +static int +tc_act_parse_mirred(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + struct mlx5e_priv *priv, + struct mlx5_flow_attr *attr) +{ + struct net_device *out_dev = act->dev; + int err = -EOPNOTSUPP; + + if (parse_state->encap) + err = parse_mirred_encap(parse_state, act, attr); + else if (netdev_port_same_parent_id(priv->netdev, out_dev)) + err = parse_mirred(parse_state, act, priv, attr); + else if (netif_is_ovs_master(out_dev)) + err = parse_mirred_ovs_master(parse_state, act, priv, attr); + + if (err) + return err; + + attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + + return 0; +} + +struct mlx5e_tc_act mlx5e_tc_act_mirred = { + .can_offload = tc_act_can_offload_mirred, + .parse_action = tc_act_parse_mirred, +}; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred_nic.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred_nic.c new file mode 100644 index 000000000..90b4c1b34 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred_nic.c @@ -0,0 +1,51 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + +#include "act.h" +#include "en/tc_priv.h" + +static bool +tc_act_can_offload_mirred_nic(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + int act_index, + struct mlx5_flow_attr *attr) +{ + struct netlink_ext_ack *extack = parse_state->extack; + struct mlx5e_tc_flow *flow = parse_state->flow; + struct net_device *out_dev = act->dev; + struct mlx5e_priv *priv = flow->priv; + + if (act->id != FLOW_ACTION_REDIRECT) + return false; + + if (priv->netdev->netdev_ops != out_dev->netdev_ops || + !mlx5e_same_hw_devs(priv, netdev_priv(out_dev))) { + NL_SET_ERR_MSG_MOD(extack, + "devices are not on same switch HW, can't offload forwarding"); + netdev_warn(priv->netdev, + "devices %s %s not on same switch HW, can't offload forwarding\n", + netdev_name(priv->netdev), + out_dev->name); + return false; + } + + return true; +} + +static int +tc_act_parse_mirred_nic(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + struct mlx5e_priv *priv, + struct mlx5_flow_attr *attr) +{ + attr->parse_attr->mirred_ifindex[0] = act->dev->ifindex; + flow_flag_set(parse_state->flow, HAIRPIN); + attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + + return 0; +} + +struct mlx5e_tc_act mlx5e_tc_act_mirred_nic = { + .can_offload = tc_act_can_offload_mirred_nic, + .parse_action = tc_act_parse_mirred_nic, +}; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mpls.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mpls.c new file mode 100644 index 000000000..f106190bf --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mpls.c @@ -0,0 +1,99 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + +#include <net/bareudp.h> +#include "act.h" +#include "en/tc_priv.h" + +static bool +tc_act_can_offload_mpls_push(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + int act_index, + struct mlx5_flow_attr *attr) +{ + struct netlink_ext_ack *extack = parse_state->extack; + struct mlx5e_priv *priv = parse_state->flow->priv; + + if (!MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev, reformat_l2_to_l3_tunnel) || + act->mpls_push.proto != htons(ETH_P_MPLS_UC)) { + NL_SET_ERR_MSG_MOD(extack, "mpls push is supported only for mpls_uc protocol"); + return false; + } + + return true; +} + +static void +copy_mpls_info(struct mlx5e_mpls_info *mpls_info, + const struct flow_action_entry *act) +{ + mpls_info->label = act->mpls_push.label; + mpls_info->tc = act->mpls_push.tc; + mpls_info->bos = act->mpls_push.bos; + mpls_info->ttl = act->mpls_push.ttl; +} + +static int +tc_act_parse_mpls_push(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + struct mlx5e_priv *priv, + struct mlx5_flow_attr *attr) +{ + parse_state->mpls_push = true; + copy_mpls_info(&parse_state->mpls_info, act); + + return 0; +} + +static bool +tc_act_can_offload_mpls_pop(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + int act_index, + struct mlx5_flow_attr *attr) +{ + struct netlink_ext_ack *extack = parse_state->extack; + struct net_device *filter_dev; + + filter_dev = attr->parse_attr->filter_dev; + + /* we only support mpls pop if it is the first action + * or it is second action after tunnel key unset + * and the filter net device is bareudp. Subsequent + * actions can be pedit and the last can be mirred + * egress redirect. + */ + if ((act_index == 1 && !parse_state->decap) || act_index > 1) { + NL_SET_ERR_MSG_MOD(extack, "mpls pop supported only as first action or with decap"); + return false; + } + + if (!netif_is_bareudp(filter_dev)) { + NL_SET_ERR_MSG_MOD(extack, "mpls pop supported only on bareudp devices"); + return false; + } + + return true; +} + +static int +tc_act_parse_mpls_pop(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + struct mlx5e_priv *priv, + struct mlx5_flow_attr *attr) +{ + attr->esw_attr->eth.h_proto = act->mpls_pop.proto; + attr->action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT; + flow_flag_set(parse_state->flow, L3_TO_L2_DECAP); + + return 0; +} + +struct mlx5e_tc_act mlx5e_tc_act_mpls_push = { + .can_offload = tc_act_can_offload_mpls_push, + .parse_action = tc_act_parse_mpls_push, +}; + +struct mlx5e_tc_act mlx5e_tc_act_mpls_pop = { + .can_offload = tc_act_can_offload_mpls_pop, + .parse_action = tc_act_parse_mpls_pop, +}; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/pedit.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/pedit.c new file mode 100644 index 000000000..47597c524 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/pedit.c @@ -0,0 +1,119 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + +#include <linux/if_vlan.h> +#include "act.h" +#include "pedit.h" +#include "en/tc_priv.h" +#include "en/mod_hdr.h" + +static int pedit_header_offsets[] = { + [FLOW_ACT_MANGLE_HDR_TYPE_ETH] = offsetof(struct pedit_headers, eth), + [FLOW_ACT_MANGLE_HDR_TYPE_IP4] = offsetof(struct pedit_headers, ip4), + [FLOW_ACT_MANGLE_HDR_TYPE_IP6] = offsetof(struct pedit_headers, ip6), + [FLOW_ACT_MANGLE_HDR_TYPE_TCP] = offsetof(struct pedit_headers, tcp), + [FLOW_ACT_MANGLE_HDR_TYPE_UDP] = offsetof(struct pedit_headers, udp), +}; + +#define pedit_header(_ph, _htype) ((void *)(_ph) + pedit_header_offsets[_htype]) + +static int +set_pedit_val(u8 hdr_type, u32 mask, u32 val, u32 offset, + struct pedit_headers_action *hdrs, + struct netlink_ext_ack *extack) +{ + u32 *curr_pmask, *curr_pval; + + curr_pmask = (u32 *)(pedit_header(&hdrs->masks, hdr_type) + offset); + curr_pval = (u32 *)(pedit_header(&hdrs->vals, hdr_type) + offset); + + if (*curr_pmask & mask) { /* disallow acting twice on the same location */ + NL_SET_ERR_MSG_MOD(extack, + "curr_pmask and new mask same. Acting twice on same location"); + goto out_err; + } + + *curr_pmask |= mask; + *curr_pval |= (val & mask); + + return 0; + +out_err: + return -EOPNOTSUPP; +} + +int +mlx5e_tc_act_pedit_parse_action(struct mlx5e_priv *priv, + const struct flow_action_entry *act, int namespace, + struct pedit_headers_action *hdrs, + struct netlink_ext_ack *extack) +{ + u8 cmd = (act->id == FLOW_ACTION_MANGLE) ? 0 : 1; + u8 htype = act->mangle.htype; + int err = -EOPNOTSUPP; + u32 mask, val, offset; + + if (htype == FLOW_ACT_MANGLE_UNSPEC) { + NL_SET_ERR_MSG_MOD(extack, "legacy pedit isn't offloaded"); + goto out_err; + } + + if (!mlx5e_mod_hdr_max_actions(priv->mdev, namespace)) { + NL_SET_ERR_MSG_MOD(extack, "The pedit offload action is not supported"); + goto out_err; + } + + mask = act->mangle.mask; + val = act->mangle.val; + offset = act->mangle.offset; + + err = set_pedit_val(htype, ~mask, val, offset, &hdrs[cmd], extack); + if (err) + goto out_err; + + hdrs[cmd].pedits++; + + return 0; +out_err: + return err; +} + +static bool +tc_act_can_offload_pedit(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + int act_index, + struct mlx5_flow_attr *attr) +{ + return true; +} + +static int +tc_act_parse_pedit(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + struct mlx5e_priv *priv, + struct mlx5_flow_attr *attr) +{ + struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; + struct mlx5e_tc_flow *flow = parse_state->flow; + enum mlx5_flow_namespace_type ns_type; + int err; + + ns_type = mlx5e_get_flow_namespace(flow); + + err = mlx5e_tc_act_pedit_parse_action(flow->priv, act, ns_type, attr->parse_attr->hdrs, + parse_state->extack); + if (err) + return err; + + attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; + + if (ns_type == MLX5_FLOW_NAMESPACE_FDB) + esw_attr->split_count = esw_attr->out_count; + + return 0; +} + +struct mlx5e_tc_act mlx5e_tc_act_pedit = { + .can_offload = tc_act_can_offload_pedit, + .parse_action = tc_act_parse_pedit, +}; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/pedit.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/pedit.h new file mode 100644 index 000000000..434c8bd71 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/pedit.h @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#ifndef __MLX5_EN_TC_ACT_PEDIT_H__ +#define __MLX5_EN_TC_ACT_PEDIT_H__ + +#include "en_tc.h" + +struct pedit_headers { + struct ethhdr eth; + struct vlan_hdr vlan; + struct iphdr ip4; + struct ipv6hdr ip6; + struct tcphdr tcp; + struct udphdr udp; +}; + +struct pedit_headers_action { + struct pedit_headers vals; + struct pedit_headers masks; + u32 pedits; +}; + +int +mlx5e_tc_act_pedit_parse_action(struct mlx5e_priv *priv, + const struct flow_action_entry *act, int namespace, + struct pedit_headers_action *hdrs, + struct netlink_ext_ack *extack); + +#endif /* __MLX5_EN_TC_ACT_PEDIT_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/police.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/police.c new file mode 100644 index 000000000..c8e5ca65b --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/police.c @@ -0,0 +1,157 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + +#include "act.h" +#include "en/tc_priv.h" + +static bool +tc_act_can_offload_police(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + int act_index, + struct mlx5_flow_attr *attr) +{ + if (act->police.notexceed.act_id != FLOW_ACTION_PIPE && + act->police.notexceed.act_id != FLOW_ACTION_ACCEPT) { + NL_SET_ERR_MSG_MOD(parse_state->extack, + "Offload not supported when conform action is not pipe or ok"); + return false; + } + if (mlx5e_policer_validate(parse_state->flow_action, act, + parse_state->extack)) + return false; + + return !!mlx5e_get_flow_meters(parse_state->flow->priv->mdev); +} + +static int +fill_meter_params_from_act(const struct flow_action_entry *act, + struct mlx5e_flow_meter_params *params) +{ + params->index = act->hw_index; + if (act->police.rate_bytes_ps) { + params->mode = MLX5_RATE_LIMIT_BPS; + /* change rate to bits per second */ + params->rate = act->police.rate_bytes_ps << 3; + params->burst = act->police.burst; + } else if (act->police.rate_pkt_ps) { + params->mode = MLX5_RATE_LIMIT_PPS; + params->rate = act->police.rate_pkt_ps; + params->burst = act->police.burst_pkt; + } else { + return -EOPNOTSUPP; + } + + return 0; +} + +static int +tc_act_parse_police(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + struct mlx5e_priv *priv, + struct mlx5_flow_attr *attr) +{ + int err; + + err = fill_meter_params_from_act(act, &attr->meter_attr.params); + if (err) + return err; + + attr->action |= MLX5_FLOW_CONTEXT_ACTION_EXECUTE_ASO; + attr->exe_aso_type = MLX5_EXE_ASO_FLOW_METER; + + return 0; +} + +static bool +tc_act_is_multi_table_act_police(struct mlx5e_priv *priv, + const struct flow_action_entry *act, + struct mlx5_flow_attr *attr) +{ + return true; +} + +static int +tc_act_police_offload(struct mlx5e_priv *priv, + struct flow_offload_action *fl_act, + struct flow_action_entry *act) +{ + struct mlx5e_flow_meter_params params = {}; + struct mlx5e_flow_meter_handle *meter; + int err = 0; + + err = mlx5e_policer_validate(&fl_act->action, act, fl_act->extack); + if (err) + return err; + + err = fill_meter_params_from_act(act, ¶ms); + if (err) + return err; + + meter = mlx5e_tc_meter_get(priv->mdev, ¶ms); + if (IS_ERR(meter) && PTR_ERR(meter) == -ENOENT) { + meter = mlx5e_tc_meter_replace(priv->mdev, ¶ms); + } else if (!IS_ERR(meter)) { + err = mlx5e_tc_meter_update(meter, ¶ms); + mlx5e_tc_meter_put(meter); + } + + if (IS_ERR(meter)) { + NL_SET_ERR_MSG_MOD(fl_act->extack, "Failed to get flow meter"); + mlx5_core_err(priv->mdev, "Failed to get flow meter %d\n", params.index); + err = PTR_ERR(meter); + } + + return err; +} + +static int +tc_act_police_destroy(struct mlx5e_priv *priv, + struct flow_offload_action *fl_act) +{ + struct mlx5e_flow_meter_params params = {}; + struct mlx5e_flow_meter_handle *meter; + + params.index = fl_act->index; + meter = mlx5e_tc_meter_get(priv->mdev, ¶ms); + if (IS_ERR(meter)) { + NL_SET_ERR_MSG_MOD(fl_act->extack, "Failed to get flow meter"); + mlx5_core_err(priv->mdev, "Failed to get flow meter %d\n", params.index); + return PTR_ERR(meter); + } + /* first put for the get and second for cleanup */ + mlx5e_tc_meter_put(meter); + mlx5e_tc_meter_put(meter); + return 0; +} + +static int +tc_act_police_stats(struct mlx5e_priv *priv, + struct flow_offload_action *fl_act) +{ + struct mlx5e_flow_meter_params params = {}; + struct mlx5e_flow_meter_handle *meter; + u64 bytes, packets, drops, lastuse; + + params.index = fl_act->index; + meter = mlx5e_tc_meter_get(priv->mdev, ¶ms); + if (IS_ERR(meter)) { + NL_SET_ERR_MSG_MOD(fl_act->extack, "Failed to get flow meter"); + mlx5_core_err(priv->mdev, "Failed to get flow meter %d\n", params.index); + return PTR_ERR(meter); + } + + mlx5e_tc_meter_get_stats(meter, &bytes, &packets, &drops, &lastuse); + flow_stats_update(&fl_act->stats, bytes, packets, drops, lastuse, + FLOW_ACTION_HW_STATS_DELAYED); + mlx5e_tc_meter_put(meter); + return 0; +} + +struct mlx5e_tc_act mlx5e_tc_act_police = { + .can_offload = tc_act_can_offload_police, + .parse_action = tc_act_parse_police, + .is_multi_table_act = tc_act_is_multi_table_act_police, + .offload_action = tc_act_police_offload, + .destroy_action = tc_act_police_destroy, + .stats_action = tc_act_police_stats, +}; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ptype.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ptype.c new file mode 100644 index 000000000..6454b031f --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ptype.c @@ -0,0 +1,36 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + +#include "act.h" +#include "en/tc_priv.h" + +static bool +tc_act_can_offload_ptype(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + int act_index, + struct mlx5_flow_attr *attr) +{ + return true; +} + +static int +tc_act_parse_ptype(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + struct mlx5e_priv *priv, + struct mlx5_flow_attr *attr) +{ + struct netlink_ext_ack *extack = parse_state->extack; + + if (act->ptype != PACKET_HOST) { + NL_SET_ERR_MSG_MOD(extack, "skbedit ptype is only supported with type host"); + return -EOPNOTSUPP; + } + + parse_state->ptype_host = true; + return 0; +} + +struct mlx5e_tc_act mlx5e_tc_act_ptype = { + .can_offload = tc_act_can_offload_ptype, + .parse_action = tc_act_parse_ptype, +}; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/redirect_ingress.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/redirect_ingress.c new file mode 100644 index 000000000..ad09a8a5f --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/redirect_ingress.c @@ -0,0 +1,78 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + +#include "act.h" +#include "en/tc_priv.h" + +static bool +tc_act_can_offload_redirect_ingress(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + int act_index, + struct mlx5_flow_attr *attr) +{ + struct netlink_ext_ack *extack = parse_state->extack; + struct mlx5e_tc_flow_parse_attr *parse_attr; + struct net_device *out_dev = act->dev; + struct mlx5_esw_flow_attr *esw_attr; + + parse_attr = attr->parse_attr; + esw_attr = attr->esw_attr; + + if (!out_dev) + return false; + + if (!netif_is_ovs_master(out_dev)) { + NL_SET_ERR_MSG_MOD(extack, + "redirect to ingress is supported only for OVS internal ports"); + return false; + } + + if (netif_is_ovs_master(parse_attr->filter_dev)) { + NL_SET_ERR_MSG_MOD(extack, + "redirect to ingress is not supported from internal port"); + return false; + } + + if (!parse_state->ptype_host) { + NL_SET_ERR_MSG_MOD(extack, + "redirect to int port ingress requires ptype=host action"); + return false; + } + + if (esw_attr->out_count) { + NL_SET_ERR_MSG_MOD(extack, + "redirect to int port ingress is supported only as single destination"); + return false; + } + + return true; +} + +static int +tc_act_parse_redirect_ingress(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + struct mlx5e_priv *priv, + struct mlx5_flow_attr *attr) +{ + struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; + struct net_device *out_dev = act->dev; + int err; + + attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + + err = mlx5e_set_fwd_to_int_port_actions(priv, attr, out_dev->ifindex, + MLX5E_TC_INT_PORT_INGRESS, + &attr->action, esw_attr->out_count); + if (err) + return err; + + esw_attr->out_count++; + + return 0; +} + +struct mlx5e_tc_act mlx5e_tc_act_redirect_ingress = { + .can_offload = tc_act_can_offload_redirect_ingress, + .parse_action = tc_act_parse_redirect_ingress, +}; + diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/sample.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/sample.c new file mode 100644 index 000000000..2c0196431 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/sample.c @@ -0,0 +1,71 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + +#include <net/psample.h> +#include "act.h" +#include "en/tc_priv.h" +#include "en/tc/act/sample.h" + +static bool +tc_act_can_offload_sample(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + int act_index, + struct mlx5_flow_attr *attr) +{ + struct netlink_ext_ack *extack = parse_state->extack; + bool ct_nat; + + ct_nat = attr->ct_attr.ct_action & TCA_CT_ACT_NAT; + + if (flow_flag_test(parse_state->flow, CT) && ct_nat) { + NL_SET_ERR_MSG_MOD(extack, "Sample action with CT NAT is not supported"); + return false; + } + + return true; +} + +static int +tc_act_parse_sample(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + struct mlx5e_priv *priv, + struct mlx5_flow_attr *attr) +{ + struct mlx5e_sample_attr *sample_attr = &attr->sample_attr; + + sample_attr->rate = act->sample.rate; + sample_attr->group_num = act->sample.psample_group->group_num; + + if (act->sample.truncate) + sample_attr->trunc_size = act->sample.trunc_size; + + attr->flags |= MLX5_ATTR_FLAG_SAMPLE; + flow_flag_set(parse_state->flow, SAMPLE); + + return 0; +} + +bool +mlx5e_tc_act_sample_is_multi_table(struct mlx5_core_dev *mdev, + struct mlx5_flow_attr *attr) +{ + if (MLX5_CAP_GEN(mdev, reg_c_preserve) || + attr->action & MLX5_FLOW_CONTEXT_ACTION_DECAP) + return true; + + return false; +} + +static bool +tc_act_is_multi_table_act_sample(struct mlx5e_priv *priv, + const struct flow_action_entry *act, + struct mlx5_flow_attr *attr) +{ + return mlx5e_tc_act_sample_is_multi_table(priv->mdev, attr); +} + +struct mlx5e_tc_act mlx5e_tc_act_sample = { + .can_offload = tc_act_can_offload_sample, + .parse_action = tc_act_parse_sample, + .is_multi_table_act = tc_act_is_multi_table_act_sample, +}; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/sample.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/sample.h new file mode 100644 index 000000000..3efb3a15c --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/sample.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#ifndef __MLX5_EN_TC_ACT_SAMPLE_H__ +#define __MLX5_EN_TC_ACT_SAMPLE_H__ + +#include <net/flow_offload.h> +#include "en/tc_priv.h" + +bool +mlx5e_tc_act_sample_is_multi_table(struct mlx5_core_dev *mdev, + struct mlx5_flow_attr *attr); + +#endif /* __MLX5_EN_TC_ACT_SAMPLE_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/trap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/trap.c new file mode 100644 index 000000000..53b270f65 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/trap.c @@ -0,0 +1,38 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + +#include "act.h" +#include "en/tc_priv.h" + +static bool +tc_act_can_offload_trap(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + int act_index, + struct mlx5_flow_attr *attr) +{ + struct netlink_ext_ack *extack = parse_state->extack; + + if (parse_state->flow_action->num_entries != 1) { + NL_SET_ERR_MSG_MOD(extack, "action trap is supported as a sole action only"); + return false; + } + + return true; +} + +static int +tc_act_parse_trap(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + struct mlx5e_priv *priv, + struct mlx5_flow_attr *attr) +{ + attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + attr->flags |= MLX5_ATTR_FLAG_SLOW_PATH; + + return 0; +} + +struct mlx5e_tc_act mlx5e_tc_act_trap = { + .can_offload = tc_act_can_offload_trap, + .parse_action = tc_act_parse_trap, +}; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/tun.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/tun.c new file mode 100644 index 000000000..b4fa2de97 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/tun.c @@ -0,0 +1,63 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + +#include "act.h" +#include "en/tc_tun_encap.h" +#include "en/tc_priv.h" + +static bool +tc_act_can_offload_tun_encap(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + int act_index, + struct mlx5_flow_attr *attr) +{ + if (!act->tunnel) { + NL_SET_ERR_MSG_MOD(parse_state->extack, + "Zero tunnel attributes is not supported"); + return false; + } + + return true; +} + +static int +tc_act_parse_tun_encap(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + struct mlx5e_priv *priv, + struct mlx5_flow_attr *attr) +{ + parse_state->tun_info = act->tunnel; + parse_state->encap = true; + + return 0; +} + +static bool +tc_act_can_offload_tun_decap(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + int act_index, + struct mlx5_flow_attr *attr) +{ + return true; +} + +static int +tc_act_parse_tun_decap(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + struct mlx5e_priv *priv, + struct mlx5_flow_attr *attr) +{ + parse_state->decap = true; + + return 0; +} + +struct mlx5e_tc_act mlx5e_tc_act_tun_encap = { + .can_offload = tc_act_can_offload_tun_encap, + .parse_action = tc_act_parse_tun_encap, +}; + +struct mlx5e_tc_act mlx5e_tc_act_tun_decap = { + .can_offload = tc_act_can_offload_tun_decap, + .parse_action = tc_act_parse_tun_decap, +}; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan.c new file mode 100644 index 000000000..b86ac604d --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan.c @@ -0,0 +1,228 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + +#include <linux/if_vlan.h> +#include "act.h" +#include "vlan.h" +#include "en/tc_priv.h" + +static int +add_vlan_prio_tag_rewrite_action(struct mlx5e_priv *priv, + struct mlx5e_tc_flow_parse_attr *parse_attr, + u32 *action, struct netlink_ext_ack *extack) +{ + const struct flow_action_entry prio_tag_act = { + .vlan.vid = 0, + .vlan.prio = + MLX5_GET(fte_match_set_lyr_2_4, + mlx5e_get_match_headers_value(*action, + &parse_attr->spec), + first_prio) & + MLX5_GET(fte_match_set_lyr_2_4, + mlx5e_get_match_headers_criteria(*action, + &parse_attr->spec), + first_prio), + }; + + return mlx5e_tc_act_vlan_add_rewrite_action(priv, MLX5_FLOW_NAMESPACE_FDB, + &prio_tag_act, parse_attr, action, + extack); +} + +static int +parse_tc_vlan_action(struct mlx5e_priv *priv, + const struct flow_action_entry *act, + struct mlx5_esw_flow_attr *attr, + u32 *action, + struct netlink_ext_ack *extack, + struct mlx5e_tc_act_parse_state *parse_state) +{ + u8 vlan_idx = attr->total_vlan; + + if (vlan_idx >= MLX5_FS_VLAN_DEPTH) { + NL_SET_ERR_MSG_MOD(extack, "Total vlans used is greater than supported"); + return -EOPNOTSUPP; + } + + switch (act->id) { + case FLOW_ACTION_VLAN_POP: + if (vlan_idx) { + if (!mlx5_eswitch_vlan_actions_supported(priv->mdev, + MLX5_FS_VLAN_DEPTH)) { + NL_SET_ERR_MSG_MOD(extack, "vlan pop action is not supported"); + return -EOPNOTSUPP; + } + + *action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_POP_2; + } else { + *action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_POP; + } + break; + case FLOW_ACTION_VLAN_PUSH: + attr->vlan_vid[vlan_idx] = act->vlan.vid; + attr->vlan_prio[vlan_idx] = act->vlan.prio; + attr->vlan_proto[vlan_idx] = act->vlan.proto; + if (!attr->vlan_proto[vlan_idx]) + attr->vlan_proto[vlan_idx] = htons(ETH_P_8021Q); + + if (vlan_idx) { + if (!mlx5_eswitch_vlan_actions_supported(priv->mdev, + MLX5_FS_VLAN_DEPTH)) { + NL_SET_ERR_MSG_MOD(extack, + "vlan push action is not supported for vlan depth > 1"); + return -EOPNOTSUPP; + } + + *action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2; + } else { + if (!mlx5_eswitch_vlan_actions_supported(priv->mdev, 1) && + (act->vlan.proto != htons(ETH_P_8021Q) || + act->vlan.prio)) { + NL_SET_ERR_MSG_MOD(extack, "vlan push action is not supported"); + return -EOPNOTSUPP; + } + + *action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH; + } + break; + case FLOW_ACTION_VLAN_POP_ETH: + parse_state->eth_pop = true; + break; + case FLOW_ACTION_VLAN_PUSH_ETH: + if (!flow_flag_test(parse_state->flow, L3_TO_L2_DECAP)) + return -EOPNOTSUPP; + parse_state->eth_push = true; + memcpy(attr->eth.h_dest, act->vlan_push_eth.dst, ETH_ALEN); + memcpy(attr->eth.h_source, act->vlan_push_eth.src, ETH_ALEN); + break; + default: + NL_SET_ERR_MSG_MOD(extack, "Unexpected action id for VLAN"); + return -EINVAL; + } + + attr->total_vlan = vlan_idx + 1; + + return 0; +} + +int +mlx5e_tc_act_vlan_add_push_action(struct mlx5e_priv *priv, + struct mlx5_flow_attr *attr, + struct net_device **out_dev, + struct netlink_ext_ack *extack) +{ + struct net_device *vlan_dev = *out_dev; + struct flow_action_entry vlan_act = { + .id = FLOW_ACTION_VLAN_PUSH, + .vlan.vid = vlan_dev_vlan_id(vlan_dev), + .vlan.proto = vlan_dev_vlan_proto(vlan_dev), + .vlan.prio = 0, + }; + int err; + + err = parse_tc_vlan_action(priv, &vlan_act, attr->esw_attr, &attr->action, extack, NULL); + if (err) + return err; + + rcu_read_lock(); + *out_dev = dev_get_by_index_rcu(dev_net(vlan_dev), dev_get_iflink(vlan_dev)); + rcu_read_unlock(); + if (!*out_dev) + return -ENODEV; + + if (is_vlan_dev(*out_dev)) + err = mlx5e_tc_act_vlan_add_push_action(priv, attr, out_dev, extack); + + return err; +} + +int +mlx5e_tc_act_vlan_add_pop_action(struct mlx5e_priv *priv, + struct mlx5_flow_attr *attr, + struct netlink_ext_ack *extack) +{ + struct flow_action_entry vlan_act = { + .id = FLOW_ACTION_VLAN_POP, + }; + int nest_level, err = 0; + + nest_level = attr->parse_attr->filter_dev->lower_level - + priv->netdev->lower_level; + while (nest_level--) { + err = parse_tc_vlan_action(priv, &vlan_act, attr->esw_attr, &attr->action, + extack, NULL); + if (err) + return err; + } + + return err; +} + +static bool +tc_act_can_offload_vlan(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + int act_index, + struct mlx5_flow_attr *attr) +{ + return true; +} + +static int +tc_act_parse_vlan(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + struct mlx5e_priv *priv, + struct mlx5_flow_attr *attr) +{ + struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; + int err; + + if (act->id == FLOW_ACTION_VLAN_PUSH && + (attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP)) { + /* Replace vlan pop+push with vlan modify */ + attr->action &= ~MLX5_FLOW_CONTEXT_ACTION_VLAN_POP; + err = mlx5e_tc_act_vlan_add_rewrite_action(priv, MLX5_FLOW_NAMESPACE_FDB, act, + attr->parse_attr, &attr->action, + parse_state->extack); + } else { + err = parse_tc_vlan_action(priv, act, esw_attr, &attr->action, + parse_state->extack, parse_state); + } + + if (err) + return err; + + esw_attr->split_count = esw_attr->out_count; + + return 0; +} + +static int +tc_act_post_parse_vlan(struct mlx5e_tc_act_parse_state *parse_state, + struct mlx5e_priv *priv, + struct mlx5_flow_attr *attr) +{ + struct mlx5e_tc_flow_parse_attr *parse_attr = attr->parse_attr; + struct netlink_ext_ack *extack = parse_state->extack; + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + int err; + + if (MLX5_CAP_GEN(esw->dev, prio_tag_required) && + attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP) { + /* For prio tag mode, replace vlan pop with rewrite vlan prio + * tag rewrite. + */ + attr->action &= ~MLX5_FLOW_CONTEXT_ACTION_VLAN_POP; + err = add_vlan_prio_tag_rewrite_action(priv, parse_attr, + &attr->action, extack); + if (err) + return err; + } + + return 0; +} + +struct mlx5e_tc_act mlx5e_tc_act_vlan = { + .can_offload = tc_act_can_offload_vlan, + .parse_action = tc_act_parse_vlan, + .post_parse = tc_act_post_parse_vlan, +}; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan.h new file mode 100644 index 000000000..2fa58c6f4 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan.h @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#ifndef __MLX5_EN_TC_ACT_VLAN_H__ +#define __MLX5_EN_TC_ACT_VLAN_H__ + +#include <net/flow_offload.h> +#include "en/tc_priv.h" + +struct pedit_headers_action; + +int +mlx5e_tc_act_vlan_add_push_action(struct mlx5e_priv *priv, + struct mlx5_flow_attr *attr, + struct net_device **out_dev, + struct netlink_ext_ack *extack); + +int +mlx5e_tc_act_vlan_add_pop_action(struct mlx5e_priv *priv, + struct mlx5_flow_attr *attr, + struct netlink_ext_ack *extack); + +int +mlx5e_tc_act_vlan_add_rewrite_action(struct mlx5e_priv *priv, int namespace, + const struct flow_action_entry *act, + struct mlx5e_tc_flow_parse_attr *parse_attr, + u32 *action, struct netlink_ext_ack *extack); + +#endif /* __MLX5_EN_TC_ACT_VLAN_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan_mangle.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan_mangle.c new file mode 100644 index 000000000..9a8a1a6bd --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan_mangle.c @@ -0,0 +1,86 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + +#include <linux/if_vlan.h> +#include "act.h" +#include "vlan.h" +#include "en/tc_priv.h" + +struct pedit_headers_action; + +int +mlx5e_tc_act_vlan_add_rewrite_action(struct mlx5e_priv *priv, int namespace, + const struct flow_action_entry *act, + struct mlx5e_tc_flow_parse_attr *parse_attr, + u32 *action, struct netlink_ext_ack *extack) +{ + u16 mask16 = VLAN_VID_MASK; + u16 val16 = act->vlan.vid & VLAN_VID_MASK; + const struct flow_action_entry pedit_act = { + .id = FLOW_ACTION_MANGLE, + .mangle.htype = FLOW_ACT_MANGLE_HDR_TYPE_ETH, + .mangle.offset = offsetof(struct vlan_ethhdr, h_vlan_TCI), + .mangle.mask = ~(u32)be16_to_cpu(*(__be16 *)&mask16), + .mangle.val = (u32)be16_to_cpu(*(__be16 *)&val16), + }; + u8 match_prio_mask, match_prio_val; + void *headers_c, *headers_v; + int err; + + headers_c = mlx5e_get_match_headers_criteria(*action, &parse_attr->spec); + headers_v = mlx5e_get_match_headers_value(*action, &parse_attr->spec); + + if (!(MLX5_GET(fte_match_set_lyr_2_4, headers_c, cvlan_tag) && + MLX5_GET(fte_match_set_lyr_2_4, headers_v, cvlan_tag))) { + NL_SET_ERR_MSG_MOD(extack, "VLAN rewrite action must have VLAN protocol match"); + return -EOPNOTSUPP; + } + + match_prio_mask = MLX5_GET(fte_match_set_lyr_2_4, headers_c, first_prio); + match_prio_val = MLX5_GET(fte_match_set_lyr_2_4, headers_v, first_prio); + if (act->vlan.prio != (match_prio_val & match_prio_mask)) { + NL_SET_ERR_MSG_MOD(extack, "Changing VLAN prio is not supported"); + return -EOPNOTSUPP; + } + + err = mlx5e_tc_act_pedit_parse_action(priv, &pedit_act, namespace, parse_attr->hdrs, + extack); + *action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; + + return err; +} + +static bool +tc_act_can_offload_vlan_mangle(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + int act_index, + struct mlx5_flow_attr *attr) +{ + return true; +} + +static int +tc_act_parse_vlan_mangle(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + struct mlx5e_priv *priv, + struct mlx5_flow_attr *attr) +{ + enum mlx5_flow_namespace_type ns_type; + int err; + + ns_type = mlx5e_get_flow_namespace(parse_state->flow); + err = mlx5e_tc_act_vlan_add_rewrite_action(priv, ns_type, act, attr->parse_attr, + &attr->action, parse_state->extack); + if (err) + return err; + + if (ns_type == MLX5_FLOW_NAMESPACE_FDB) + attr->esw_attr->split_count = attr->esw_attr->out_count; + + return 0; +} + +struct mlx5e_tc_act mlx5e_tc_act_vlan_mangle = { + .can_offload = tc_act_can_offload_vlan_mangle, + .parse_action = tc_act_parse_vlan_mangle, +}; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs.h new file mode 100644 index 000000000..bb6b1a979 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs.h @@ -0,0 +1,49 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. */ + +#ifndef __MLX5_EN_TC_CT_FS_H__ +#define __MLX5_EN_TC_CT_FS_H__ + +struct mlx5_ct_fs { + const struct net_device *netdev; + struct mlx5_core_dev *dev; + + /* private data */ + void *priv_data[]; +}; + +struct mlx5_ct_fs_rule { +}; + +struct mlx5_ct_fs_ops { + int (*init)(struct mlx5_ct_fs *fs, struct mlx5_flow_table *ct, + struct mlx5_flow_table *ct_nat, struct mlx5_flow_table *post_ct); + void (*destroy)(struct mlx5_ct_fs *fs); + + struct mlx5_ct_fs_rule * (*ct_rule_add)(struct mlx5_ct_fs *fs, + struct mlx5_flow_spec *spec, + struct mlx5_flow_attr *attr, + struct flow_rule *flow_rule); + void (*ct_rule_del)(struct mlx5_ct_fs *fs, struct mlx5_ct_fs_rule *fs_rule); + + size_t priv_size; +}; + +static inline void *mlx5_ct_fs_priv(struct mlx5_ct_fs *fs) +{ + return &fs->priv_data; +} + +struct mlx5_ct_fs_ops *mlx5_ct_fs_dmfs_ops_get(void); + +#if IS_ENABLED(CONFIG_MLX5_SW_STEERING) +struct mlx5_ct_fs_ops *mlx5_ct_fs_smfs_ops_get(void); +#else +static inline struct mlx5_ct_fs_ops * +mlx5_ct_fs_smfs_ops_get(void) +{ + return NULL; +} +#endif /* IS_ENABLED(CONFIG_MLX5_SW_STEERING) */ + +#endif /* __MLX5_EN_TC_CT_FS_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs_dmfs.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs_dmfs.c new file mode 100644 index 000000000..ae4f55be4 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs_dmfs.c @@ -0,0 +1,79 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. */ + +#include "en_tc.h" +#include "en/tc_ct.h" +#include "en/tc/ct_fs.h" + +#define ct_dbg(fmt, args...)\ + netdev_dbg(fs->netdev, "ct_fs_dmfs debug: " fmt "\n", ##args) + +struct mlx5_ct_fs_dmfs_rule { + struct mlx5_ct_fs_rule fs_rule; + struct mlx5_flow_handle *rule; + struct mlx5_flow_attr *attr; +}; + +static int +mlx5_ct_fs_dmfs_init(struct mlx5_ct_fs *fs, struct mlx5_flow_table *ct, + struct mlx5_flow_table *ct_nat, struct mlx5_flow_table *post_ct) +{ + return 0; +} + +static void +mlx5_ct_fs_dmfs_destroy(struct mlx5_ct_fs *fs) +{ +} + +static struct mlx5_ct_fs_rule * +mlx5_ct_fs_dmfs_ct_rule_add(struct mlx5_ct_fs *fs, struct mlx5_flow_spec *spec, + struct mlx5_flow_attr *attr, struct flow_rule *flow_rule) +{ + struct mlx5e_priv *priv = netdev_priv(fs->netdev); + struct mlx5_ct_fs_dmfs_rule *dmfs_rule; + int err; + + dmfs_rule = kzalloc(sizeof(*dmfs_rule), GFP_KERNEL); + if (!dmfs_rule) + return ERR_PTR(-ENOMEM); + + dmfs_rule->rule = mlx5_tc_rule_insert(priv, spec, attr); + if (IS_ERR(dmfs_rule->rule)) { + err = PTR_ERR(dmfs_rule->rule); + ct_dbg("Failed to add ct entry fs rule"); + goto err_insert; + } + + dmfs_rule->attr = attr; + + return &dmfs_rule->fs_rule; + +err_insert: + kfree(dmfs_rule); + return ERR_PTR(err); +} + +static void +mlx5_ct_fs_dmfs_ct_rule_del(struct mlx5_ct_fs *fs, struct mlx5_ct_fs_rule *fs_rule) +{ + struct mlx5_ct_fs_dmfs_rule *dmfs_rule = container_of(fs_rule, + struct mlx5_ct_fs_dmfs_rule, + fs_rule); + + mlx5_tc_rule_delete(netdev_priv(fs->netdev), dmfs_rule->rule, dmfs_rule->attr); + kfree(dmfs_rule); +} + +static struct mlx5_ct_fs_ops dmfs_ops = { + .ct_rule_add = mlx5_ct_fs_dmfs_ct_rule_add, + .ct_rule_del = mlx5_ct_fs_dmfs_ct_rule_del, + + .init = mlx5_ct_fs_dmfs_init, + .destroy = mlx5_ct_fs_dmfs_destroy, +}; + +struct mlx5_ct_fs_ops *mlx5_ct_fs_dmfs_ops_get(void) +{ + return &dmfs_ops; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs_smfs.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs_smfs.c new file mode 100644 index 000000000..2b80fe735 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs_smfs.c @@ -0,0 +1,380 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. */ + +#include <linux/refcount.h> + +#include "en_tc.h" +#include "en/tc_priv.h" +#include "en/tc_ct.h" +#include "en/tc/ct_fs.h" + +#include "lib/smfs.h" + +#define INIT_ERR_PREFIX "ct_fs_smfs init failed" +#define ct_dbg(fmt, args...)\ + netdev_dbg(fs->netdev, "ct_fs_smfs debug: " fmt "\n", ##args) +#define MLX5_CT_TCP_FLAGS_MASK cpu_to_be16(be32_to_cpu(TCP_FLAG_RST | TCP_FLAG_FIN) >> 16) + +struct mlx5_ct_fs_smfs_matcher { + struct mlx5dr_matcher *dr_matcher; + struct list_head list; + int prio; + refcount_t ref; +}; + +struct mlx5_ct_fs_smfs_matchers { + struct mlx5_ct_fs_smfs_matcher smfs_matchers[6]; + struct list_head used; +}; + +struct mlx5_ct_fs_smfs { + struct mlx5dr_table *ct_tbl, *ct_nat_tbl; + struct mlx5_ct_fs_smfs_matchers matchers; + struct mlx5_ct_fs_smfs_matchers matchers_nat; + struct mlx5dr_action *fwd_action; + struct mlx5_flow_table *ct_nat; + struct mutex lock; /* Guards matchers */ +}; + +struct mlx5_ct_fs_smfs_rule { + struct mlx5_ct_fs_rule fs_rule; + struct mlx5dr_rule *rule; + struct mlx5dr_action *count_action; + struct mlx5_ct_fs_smfs_matcher *smfs_matcher; +}; + +static inline void +mlx5_ct_fs_smfs_fill_mask(struct mlx5_ct_fs *fs, struct mlx5_flow_spec *spec, bool ipv4, bool tcp, + bool gre) +{ + void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, outer_headers); + + if (likely(MLX5_CAP_FLOWTABLE_NIC_RX(fs->dev, ft_field_support.outer_ip_version))) + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ip_version); + else + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ethertype); + + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ip_protocol); + if (likely(ipv4)) { + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, + src_ipv4_src_ipv6.ipv4_layout.ipv4); + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, + dst_ipv4_dst_ipv6.ipv4_layout.ipv4); + } else { + memset(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, + dst_ipv4_dst_ipv6.ipv6_layout.ipv6), + 0xFF, + MLX5_FLD_SZ_BYTES(fte_match_set_lyr_2_4, + dst_ipv4_dst_ipv6.ipv6_layout.ipv6)); + memset(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, + src_ipv4_src_ipv6.ipv6_layout.ipv6), + 0xFF, + MLX5_FLD_SZ_BYTES(fte_match_set_lyr_2_4, + src_ipv4_src_ipv6.ipv6_layout.ipv6)); + } + + if (likely(tcp)) { + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, tcp_sport); + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, tcp_dport); + MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_flags, + ntohs(MLX5_CT_TCP_FLAGS_MASK)); + } else if (!gre) { + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, udp_sport); + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, udp_dport); + } + + mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG, 0, MLX5_CT_ZONE_MASK); +} + +static struct mlx5dr_matcher * +mlx5_ct_fs_smfs_matcher_create(struct mlx5_ct_fs *fs, struct mlx5dr_table *tbl, bool ipv4, + bool tcp, bool gre, u32 priority) +{ + struct mlx5dr_matcher *dr_matcher; + struct mlx5_flow_spec *spec; + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) + return ERR_PTR(-ENOMEM); + + mlx5_ct_fs_smfs_fill_mask(fs, spec, ipv4, tcp, gre); + spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2 | MLX5_MATCH_OUTER_HEADERS; + + dr_matcher = mlx5_smfs_matcher_create(tbl, priority, spec); + kvfree(spec); + if (!dr_matcher) + return ERR_PTR(-EINVAL); + + return dr_matcher; +} + +static struct mlx5_ct_fs_smfs_matcher * +mlx5_ct_fs_smfs_matcher_get(struct mlx5_ct_fs *fs, bool nat, bool ipv4, bool tcp, bool gre) +{ + struct mlx5_ct_fs_smfs *fs_smfs = mlx5_ct_fs_priv(fs); + struct mlx5_ct_fs_smfs_matcher *m, *smfs_matcher; + struct mlx5_ct_fs_smfs_matchers *matchers; + struct mlx5dr_matcher *dr_matcher; + struct mlx5dr_table *tbl; + struct list_head *prev; + int prio; + + matchers = nat ? &fs_smfs->matchers_nat : &fs_smfs->matchers; + smfs_matcher = &matchers->smfs_matchers[ipv4 * 3 + tcp * 2 + gre]; + + if (refcount_inc_not_zero(&smfs_matcher->ref)) + return smfs_matcher; + + mutex_lock(&fs_smfs->lock); + + /* Retry with lock, as another thread might have already created the relevant matcher + * till we acquired the lock + */ + if (refcount_inc_not_zero(&smfs_matcher->ref)) + goto out_unlock; + + // Find next available priority in sorted used list + prio = 0; + prev = &matchers->used; + list_for_each_entry(m, &matchers->used, list) { + prev = &m->list; + + if (m->prio == prio) + prio = m->prio + 1; + else + break; + } + + tbl = nat ? fs_smfs->ct_nat_tbl : fs_smfs->ct_tbl; + dr_matcher = mlx5_ct_fs_smfs_matcher_create(fs, tbl, ipv4, tcp, gre, prio); + if (IS_ERR(dr_matcher)) { + netdev_warn(fs->netdev, + "ct_fs_smfs: failed to create matcher (nat %d, ipv4 %d, tcp %d, gre %d), err: %ld\n", + nat, ipv4, tcp, gre, PTR_ERR(dr_matcher)); + + smfs_matcher = ERR_CAST(dr_matcher); + goto out_unlock; + } + + smfs_matcher->dr_matcher = dr_matcher; + smfs_matcher->prio = prio; + list_add(&smfs_matcher->list, prev); + refcount_set(&smfs_matcher->ref, 1); + +out_unlock: + mutex_unlock(&fs_smfs->lock); + return smfs_matcher; +} + +static void +mlx5_ct_fs_smfs_matcher_put(struct mlx5_ct_fs *fs, struct mlx5_ct_fs_smfs_matcher *smfs_matcher) +{ + struct mlx5_ct_fs_smfs *fs_smfs = mlx5_ct_fs_priv(fs); + + if (!refcount_dec_and_mutex_lock(&smfs_matcher->ref, &fs_smfs->lock)) + return; + + mlx5_smfs_matcher_destroy(smfs_matcher->dr_matcher); + list_del(&smfs_matcher->list); + mutex_unlock(&fs_smfs->lock); +} + +static int +mlx5_ct_fs_smfs_init(struct mlx5_ct_fs *fs, struct mlx5_flow_table *ct, + struct mlx5_flow_table *ct_nat, struct mlx5_flow_table *post_ct) +{ + struct mlx5dr_table *ct_tbl, *ct_nat_tbl, *post_ct_tbl; + struct mlx5_ct_fs_smfs *fs_smfs = mlx5_ct_fs_priv(fs); + + post_ct_tbl = mlx5_smfs_table_get_from_fs_ft(post_ct); + ct_nat_tbl = mlx5_smfs_table_get_from_fs_ft(ct_nat); + ct_tbl = mlx5_smfs_table_get_from_fs_ft(ct); + fs_smfs->ct_nat = ct_nat; + + if (!ct_tbl || !ct_nat_tbl || !post_ct_tbl) { + netdev_warn(fs->netdev, "ct_fs_smfs: failed to init, missing backing dr tables"); + return -EOPNOTSUPP; + } + + ct_dbg("using smfs steering"); + + fs_smfs->fwd_action = mlx5_smfs_action_create_dest_table(post_ct_tbl); + if (!fs_smfs->fwd_action) { + return -EINVAL; + } + + fs_smfs->ct_tbl = ct_tbl; + fs_smfs->ct_nat_tbl = ct_nat_tbl; + mutex_init(&fs_smfs->lock); + INIT_LIST_HEAD(&fs_smfs->matchers.used); + INIT_LIST_HEAD(&fs_smfs->matchers_nat.used); + + return 0; +} + +static void +mlx5_ct_fs_smfs_destroy(struct mlx5_ct_fs *fs) +{ + struct mlx5_ct_fs_smfs *fs_smfs = mlx5_ct_fs_priv(fs); + + mlx5_smfs_action_destroy(fs_smfs->fwd_action); +} + +static inline bool +mlx5_tc_ct_valid_used_dissector_keys(const u32 used_keys) +{ +#define DISS_BIT(name) BIT(FLOW_DISSECTOR_KEY_ ## name) + const u32 basic_keys = DISS_BIT(BASIC) | DISS_BIT(CONTROL) | DISS_BIT(META); + const u32 ipv4_tcp = basic_keys | DISS_BIT(IPV4_ADDRS) | DISS_BIT(PORTS) | DISS_BIT(TCP); + const u32 ipv6_tcp = basic_keys | DISS_BIT(IPV6_ADDRS) | DISS_BIT(PORTS) | DISS_BIT(TCP); + const u32 ipv4_udp = basic_keys | DISS_BIT(IPV4_ADDRS) | DISS_BIT(PORTS); + const u32 ipv6_udp = basic_keys | DISS_BIT(IPV6_ADDRS) | DISS_BIT(PORTS); + const u32 ipv4_gre = basic_keys | DISS_BIT(IPV4_ADDRS); + const u32 ipv6_gre = basic_keys | DISS_BIT(IPV6_ADDRS); + + return (used_keys == ipv4_tcp || used_keys == ipv4_udp || used_keys == ipv6_tcp || + used_keys == ipv6_udp || used_keys == ipv4_gre || used_keys == ipv6_gre); +} + +static bool +mlx5_ct_fs_smfs_ct_validate_flow_rule(struct mlx5_ct_fs *fs, struct flow_rule *flow_rule) +{ + struct flow_match_ipv4_addrs ipv4_addrs; + struct flow_match_ipv6_addrs ipv6_addrs; + struct flow_match_control control; + struct flow_match_basic basic; + struct flow_match_ports ports; + struct flow_match_tcp tcp; + + if (!mlx5_tc_ct_valid_used_dissector_keys(flow_rule->match.dissector->used_keys)) { + ct_dbg("rule uses unexpected dissectors (0x%08x)", + flow_rule->match.dissector->used_keys); + return false; + } + + flow_rule_match_basic(flow_rule, &basic); + flow_rule_match_control(flow_rule, &control); + flow_rule_match_ipv4_addrs(flow_rule, &ipv4_addrs); + flow_rule_match_ipv6_addrs(flow_rule, &ipv6_addrs); + if (basic.key->ip_proto != IPPROTO_GRE) + flow_rule_match_ports(flow_rule, &ports); + if (basic.key->ip_proto == IPPROTO_TCP) + flow_rule_match_tcp(flow_rule, &tcp); + + if (basic.mask->n_proto != htons(0xFFFF) || + (basic.key->n_proto != htons(ETH_P_IP) && basic.key->n_proto != htons(ETH_P_IPV6)) || + basic.mask->ip_proto != 0xFF || + (basic.key->ip_proto != IPPROTO_UDP && basic.key->ip_proto != IPPROTO_TCP && + basic.key->ip_proto != IPPROTO_GRE)) { + ct_dbg("rule uses unexpected basic match (n_proto 0x%04x/0x%04x, ip_proto 0x%02x/0x%02x)", + ntohs(basic.key->n_proto), ntohs(basic.mask->n_proto), + basic.key->ip_proto, basic.mask->ip_proto); + return false; + } + + if (basic.key->ip_proto != IPPROTO_GRE && + (ports.mask->src != htons(0xFFFF) || ports.mask->dst != htons(0xFFFF))) { + ct_dbg("rule uses ports match (src 0x%04x, dst 0x%04x)", + ports.mask->src, ports.mask->dst); + return false; + } + + if (basic.key->ip_proto == IPPROTO_TCP && tcp.mask->flags != MLX5_CT_TCP_FLAGS_MASK) { + ct_dbg("rule uses unexpected tcp match (flags 0x%02x)", tcp.mask->flags); + return false; + } + + return true; +} + +static struct mlx5_ct_fs_rule * +mlx5_ct_fs_smfs_ct_rule_add(struct mlx5_ct_fs *fs, struct mlx5_flow_spec *spec, + struct mlx5_flow_attr *attr, struct flow_rule *flow_rule) +{ + struct mlx5_ct_fs_smfs *fs_smfs = mlx5_ct_fs_priv(fs); + struct mlx5_ct_fs_smfs_matcher *smfs_matcher; + struct mlx5_ct_fs_smfs_rule *smfs_rule; + struct mlx5dr_action *actions[5]; + struct mlx5dr_rule *rule; + int num_actions = 0, err; + bool nat, tcp, ipv4, gre; + + if (!mlx5_ct_fs_smfs_ct_validate_flow_rule(fs, flow_rule)) + return ERR_PTR(-EOPNOTSUPP); + + smfs_rule = kzalloc(sizeof(*smfs_rule), GFP_KERNEL); + if (!smfs_rule) + return ERR_PTR(-ENOMEM); + + smfs_rule->count_action = mlx5_smfs_action_create_flow_counter(mlx5_fc_id(attr->counter)); + if (!smfs_rule->count_action) { + err = -EINVAL; + goto err_count; + } + + actions[num_actions++] = smfs_rule->count_action; + actions[num_actions++] = attr->modify_hdr->action.dr_action; + actions[num_actions++] = fs_smfs->fwd_action; + + nat = (attr->ft == fs_smfs->ct_nat); + ipv4 = mlx5e_tc_get_ip_version(spec, true) == 4; + tcp = MLX5_GET(fte_match_param, spec->match_value, + outer_headers.ip_protocol) == IPPROTO_TCP; + gre = MLX5_GET(fte_match_param, spec->match_value, + outer_headers.ip_protocol) == IPPROTO_GRE; + + smfs_matcher = mlx5_ct_fs_smfs_matcher_get(fs, nat, ipv4, tcp, gre); + if (IS_ERR(smfs_matcher)) { + err = PTR_ERR(smfs_matcher); + goto err_matcher; + } + + rule = mlx5_smfs_rule_create(smfs_matcher->dr_matcher, spec, num_actions, actions, + spec->flow_context.flow_source); + if (!rule) { + err = -EINVAL; + goto err_create; + } + + smfs_rule->rule = rule; + smfs_rule->smfs_matcher = smfs_matcher; + + return &smfs_rule->fs_rule; + +err_create: + mlx5_ct_fs_smfs_matcher_put(fs, smfs_matcher); +err_matcher: + mlx5_smfs_action_destroy(smfs_rule->count_action); +err_count: + kfree(smfs_rule); + return ERR_PTR(err); +} + +static void +mlx5_ct_fs_smfs_ct_rule_del(struct mlx5_ct_fs *fs, struct mlx5_ct_fs_rule *fs_rule) +{ + struct mlx5_ct_fs_smfs_rule *smfs_rule = container_of(fs_rule, + struct mlx5_ct_fs_smfs_rule, + fs_rule); + + mlx5_smfs_rule_destroy(smfs_rule->rule); + mlx5_ct_fs_smfs_matcher_put(fs, smfs_rule->smfs_matcher); + mlx5_smfs_action_destroy(smfs_rule->count_action); + kfree(smfs_rule); +} + +static struct mlx5_ct_fs_ops fs_smfs_ops = { + .ct_rule_add = mlx5_ct_fs_smfs_ct_rule_add, + .ct_rule_del = mlx5_ct_fs_smfs_ct_rule_del, + + .init = mlx5_ct_fs_smfs_init, + .destroy = mlx5_ct_fs_smfs_destroy, + + .priv_size = sizeof(struct mlx5_ct_fs_smfs), +}; + +struct mlx5_ct_fs_ops * +mlx5_ct_fs_smfs_ops_get(void) +{ + return &fs_smfs_ops; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/int_port.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/int_port.c new file mode 100644 index 000000000..ca834bbcb --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/int_port.c @@ -0,0 +1,457 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#include <linux/mlx5/fs.h> +#include "en/mapping.h" +#include "en/tc/int_port.h" +#include "en.h" +#include "en_rep.h" +#include "en_tc.h" + +struct mlx5e_tc_int_port { + enum mlx5e_tc_int_port_type type; + int ifindex; + u32 match_metadata; + u32 mapping; + struct list_head list; + struct mlx5_flow_handle *rx_rule; + refcount_t refcnt; + struct rcu_head rcu_head; +}; + +struct mlx5e_tc_int_port_priv { + struct mlx5_core_dev *dev; + struct mutex int_ports_lock; /* Protects int ports list */ + struct list_head int_ports; /* Uses int_ports_lock */ + u16 num_ports; + bool ul_rep_rx_ready; /* Set when uplink is performing teardown */ + struct mapping_ctx *metadata_mapping; /* Metadata for source port rewrite and matching */ +}; + +bool mlx5e_tc_int_port_supported(const struct mlx5_eswitch *esw) +{ + return mlx5_eswitch_vport_match_metadata_enabled(esw) && + MLX5_CAP_GEN(esw->dev, reg_c_preserve); +} + +u32 mlx5e_tc_int_port_get_metadata(struct mlx5e_tc_int_port *int_port) +{ + return int_port->match_metadata; +} + +int mlx5e_tc_int_port_get_flow_source(struct mlx5e_tc_int_port *int_port) +{ + /* For egress forwarding we can have the case + * where the packet came from a vport and redirected + * to int port or it came from the uplink, going + * via internal port and hairpinned back to uplink + * so we set the source to any port in this case. + */ + return int_port->type == MLX5E_TC_INT_PORT_EGRESS ? + MLX5_FLOW_CONTEXT_FLOW_SOURCE_ANY_VPORT : + MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK; +} + +u32 mlx5e_tc_int_port_get_metadata_for_match(struct mlx5e_tc_int_port *int_port) +{ + return int_port->match_metadata << (32 - ESW_SOURCE_PORT_METADATA_BITS); +} + +static struct mlx5_flow_handle * +mlx5e_int_port_create_rx_rule(struct mlx5_eswitch *esw, + struct mlx5e_tc_int_port *int_port, + struct mlx5_flow_destination *dest) + +{ + struct mlx5_flow_context *flow_context; + struct mlx5_flow_act flow_act = {}; + struct mlx5_flow_handle *flow_rule; + struct mlx5_flow_spec *spec; + void *misc; + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) + return ERR_PTR(-ENOMEM); + + misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters_2); + MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0, + mlx5e_tc_int_port_get_metadata_for_match(int_port)); + + misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters_2); + MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0, + mlx5_eswitch_get_vport_metadata_mask()); + + spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2; + + /* Overwrite flow tag with the int port metadata mapping + * instead of the chain mapping. + */ + flow_context = &spec->flow_context; + flow_context->flags |= FLOW_CONTEXT_HAS_TAG; + flow_context->flow_tag = int_port->mapping; + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + flow_rule = mlx5_add_flow_rules(esw->offloads.ft_offloads, spec, + &flow_act, dest, 1); + if (IS_ERR(flow_rule)) + mlx5_core_warn(esw->dev, "ft offloads: Failed to add internal vport rx rule err %ld\n", + PTR_ERR(flow_rule)); + + kvfree(spec); + + return flow_rule; +} + +static struct mlx5e_tc_int_port * +mlx5e_int_port_lookup(struct mlx5e_tc_int_port_priv *priv, + int ifindex, + enum mlx5e_tc_int_port_type type) +{ + struct mlx5e_tc_int_port *int_port; + + if (!priv->ul_rep_rx_ready) + goto not_found; + + list_for_each_entry(int_port, &priv->int_ports, list) + if (int_port->ifindex == ifindex && int_port->type == type) { + refcount_inc(&int_port->refcnt); + return int_port; + } + +not_found: + return NULL; +} + +static int mlx5e_int_port_metadata_alloc(struct mlx5e_tc_int_port_priv *priv, + int ifindex, enum mlx5e_tc_int_port_type type, + u32 *id) +{ + u32 mapped_key[2] = {type, ifindex}; + int err; + + err = mapping_add(priv->metadata_mapping, mapped_key, id); + if (err) + return err; + + /* Fill upper 4 bits of PFNUM with reserved value */ + *id |= 0xf << ESW_VPORT_BITS; + + return 0; +} + +static void mlx5e_int_port_metadata_free(struct mlx5e_tc_int_port_priv *priv, + u32 id) +{ + id &= (1 << ESW_VPORT_BITS) - 1; + mapping_remove(priv->metadata_mapping, id); +} + +/* Must be called with priv->int_ports_lock held */ +static struct mlx5e_tc_int_port * +mlx5e_int_port_add(struct mlx5e_tc_int_port_priv *priv, + int ifindex, + enum mlx5e_tc_int_port_type type) +{ + struct mlx5_eswitch *esw = priv->dev->priv.eswitch; + struct mlx5_mapped_obj mapped_obj = {}; + struct mlx5e_rep_priv *uplink_rpriv; + struct mlx5e_tc_int_port *int_port; + struct mlx5_flow_destination dest; + struct mapping_ctx *ctx; + u32 match_metadata; + u32 mapping; + int err; + + if (priv->num_ports == MLX5E_TC_MAX_INT_PORT_NUM) { + mlx5_core_dbg(priv->dev, "Cannot add a new int port, max supported %d", + MLX5E_TC_MAX_INT_PORT_NUM); + return ERR_PTR(-ENOSPC); + } + + int_port = kzalloc(sizeof(*int_port), GFP_KERNEL); + if (!int_port) + return ERR_PTR(-ENOMEM); + + err = mlx5e_int_port_metadata_alloc(priv, ifindex, type, &match_metadata); + if (err) { + mlx5_core_warn(esw->dev, "Cannot add a new internal port, metadata allocation failed for ifindex %d", + ifindex); + goto err_metadata; + } + + /* map metadata to reg_c0 object for miss handling */ + ctx = esw->offloads.reg_c0_obj_pool; + mapped_obj.type = MLX5_MAPPED_OBJ_INT_PORT_METADATA; + mapped_obj.int_port_metadata = match_metadata; + err = mapping_add(ctx, &mapped_obj, &mapping); + if (err) + goto err_map; + + int_port->type = type; + int_port->ifindex = ifindex; + int_port->match_metadata = match_metadata; + int_port->mapping = mapping; + + /* Create a match on internal vport metadata in vport table */ + uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); + + dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dest.ft = uplink_rpriv->root_ft; + + int_port->rx_rule = mlx5e_int_port_create_rx_rule(esw, int_port, &dest); + if (IS_ERR(int_port->rx_rule)) { + err = PTR_ERR(int_port->rx_rule); + mlx5_core_warn(esw->dev, "Can't add internal port rx rule, err %d", err); + goto err_rx_rule; + } + + refcount_set(&int_port->refcnt, 1); + list_add_rcu(&int_port->list, &priv->int_ports); + priv->num_ports++; + + return int_port; + +err_rx_rule: + mapping_remove(ctx, int_port->mapping); + +err_map: + mlx5e_int_port_metadata_free(priv, match_metadata); + +err_metadata: + kfree(int_port); + + return ERR_PTR(err); +} + +/* Must be called with priv->int_ports_lock held */ +static void +mlx5e_int_port_remove(struct mlx5e_tc_int_port_priv *priv, + struct mlx5e_tc_int_port *int_port) +{ + struct mlx5_eswitch *esw = priv->dev->priv.eswitch; + struct mapping_ctx *ctx; + + ctx = esw->offloads.reg_c0_obj_pool; + + list_del_rcu(&int_port->list); + + /* The following parameters are not used by the + * rcu readers of this int_port object so it is + * safe to release them. + */ + if (int_port->rx_rule) + mlx5_del_flow_rules(int_port->rx_rule); + mapping_remove(ctx, int_port->mapping); + mlx5e_int_port_metadata_free(priv, int_port->match_metadata); + kfree_rcu(int_port); + priv->num_ports--; +} + +/* Must be called with rcu_read_lock held */ +static struct mlx5e_tc_int_port * +mlx5e_int_port_get_from_metadata(struct mlx5e_tc_int_port_priv *priv, + u32 metadata) +{ + struct mlx5e_tc_int_port *int_port; + + list_for_each_entry_rcu(int_port, &priv->int_ports, list) + if (int_port->match_metadata == metadata) + return int_port; + + return NULL; +} + +struct mlx5e_tc_int_port * +mlx5e_tc_int_port_get(struct mlx5e_tc_int_port_priv *priv, + int ifindex, + enum mlx5e_tc_int_port_type type) +{ + struct mlx5e_tc_int_port *int_port; + + if (!priv) + return ERR_PTR(-EOPNOTSUPP); + + mutex_lock(&priv->int_ports_lock); + + /* Reject request if ul rep not ready */ + if (!priv->ul_rep_rx_ready) { + int_port = ERR_PTR(-EOPNOTSUPP); + goto done; + } + + int_port = mlx5e_int_port_lookup(priv, ifindex, type); + if (int_port) + goto done; + + /* Alloc and add new int port to list */ + int_port = mlx5e_int_port_add(priv, ifindex, type); + +done: + mutex_unlock(&priv->int_ports_lock); + + return int_port; +} + +void +mlx5e_tc_int_port_put(struct mlx5e_tc_int_port_priv *priv, + struct mlx5e_tc_int_port *int_port) +{ + if (!refcount_dec_and_mutex_lock(&int_port->refcnt, &priv->int_ports_lock)) + return; + + mlx5e_int_port_remove(priv, int_port); + mutex_unlock(&priv->int_ports_lock); +} + +struct mlx5e_tc_int_port_priv * +mlx5e_tc_int_port_init(struct mlx5e_priv *priv) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5e_tc_int_port_priv *int_port_priv; + u64 mapping_id; + + if (!mlx5e_tc_int_port_supported(esw)) + return NULL; + + int_port_priv = kzalloc(sizeof(*int_port_priv), GFP_KERNEL); + if (!int_port_priv) + return NULL; + + mapping_id = mlx5_query_nic_system_image_guid(priv->mdev); + + int_port_priv->metadata_mapping = mapping_create_for_id(mapping_id, MAPPING_TYPE_INT_PORT, + sizeof(u32) * 2, + (1 << ESW_VPORT_BITS) - 1, true); + if (IS_ERR(int_port_priv->metadata_mapping)) { + mlx5_core_warn(priv->mdev, "Can't allocate metadata mapping of int port offload, err=%ld\n", + PTR_ERR(int_port_priv->metadata_mapping)); + goto err_mapping; + } + + int_port_priv->dev = priv->mdev; + mutex_init(&int_port_priv->int_ports_lock); + INIT_LIST_HEAD(&int_port_priv->int_ports); + + return int_port_priv; + +err_mapping: + kfree(int_port_priv); + + return NULL; +} + +void +mlx5e_tc_int_port_cleanup(struct mlx5e_tc_int_port_priv *priv) +{ + if (!priv) + return; + + mutex_destroy(&priv->int_ports_lock); + mapping_destroy(priv->metadata_mapping); + kfree(priv); +} + +/* Int port rx rules reside in ul rep rx tables. + * It is possible the ul rep will go down while there are + * still int port rules in its rx table so proper cleanup + * is required to free resources. + */ +void mlx5e_tc_int_port_init_rep_rx(struct mlx5e_priv *priv) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5_rep_uplink_priv *uplink_priv; + struct mlx5e_tc_int_port_priv *ppriv; + struct mlx5e_rep_priv *uplink_rpriv; + + uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); + uplink_priv = &uplink_rpriv->uplink_priv; + + ppriv = uplink_priv->int_port_priv; + + if (!ppriv) + return; + + mutex_lock(&ppriv->int_ports_lock); + ppriv->ul_rep_rx_ready = true; + mutex_unlock(&ppriv->int_ports_lock); +} + +void mlx5e_tc_int_port_cleanup_rep_rx(struct mlx5e_priv *priv) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5_rep_uplink_priv *uplink_priv; + struct mlx5e_tc_int_port_priv *ppriv; + struct mlx5e_rep_priv *uplink_rpriv; + struct mlx5e_tc_int_port *int_port; + + uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); + uplink_priv = &uplink_rpriv->uplink_priv; + + ppriv = uplink_priv->int_port_priv; + + if (!ppriv) + return; + + mutex_lock(&ppriv->int_ports_lock); + + ppriv->ul_rep_rx_ready = false; + + list_for_each_entry(int_port, &ppriv->int_ports, list) { + if (!IS_ERR_OR_NULL(int_port->rx_rule)) + mlx5_del_flow_rules(int_port->rx_rule); + + int_port->rx_rule = NULL; + } + + mutex_unlock(&ppriv->int_ports_lock); +} + +bool +mlx5e_tc_int_port_dev_fwd(struct mlx5e_tc_int_port_priv *priv, + struct sk_buff *skb, u32 int_vport_metadata, + bool *forward_tx) +{ + enum mlx5e_tc_int_port_type fwd_type; + struct mlx5e_tc_int_port *int_port; + struct net_device *dev; + int ifindex; + + if (!priv) + return false; + + rcu_read_lock(); + int_port = mlx5e_int_port_get_from_metadata(priv, int_vport_metadata); + if (!int_port) { + rcu_read_unlock(); + mlx5_core_dbg(priv->dev, "Unable to find int port with metadata 0x%.8x\n", + int_vport_metadata); + return false; + } + + ifindex = int_port->ifindex; + fwd_type = int_port->type; + rcu_read_unlock(); + + dev = dev_get_by_index(&init_net, ifindex); + if (!dev) { + mlx5_core_dbg(priv->dev, + "Couldn't find internal port device with ifindex: %d\n", + ifindex); + return false; + } + + skb->skb_iif = dev->ifindex; + skb->dev = dev; + + if (fwd_type == MLX5E_TC_INT_PORT_INGRESS) { + skb->pkt_type = PACKET_HOST; + skb_set_redirected(skb, true); + *forward_tx = false; + } else { + skb_reset_network_header(skb); + skb_push_rcsum(skb, skb->mac_len); + skb_set_redirected(skb, false); + *forward_tx = true; + } + + return true; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/int_port.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/int_port.h new file mode 100644 index 000000000..e72c79d30 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/int_port.h @@ -0,0 +1,65 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#ifndef __MLX5_EN_TC_INT_PORT_H__ +#define __MLX5_EN_TC_INT_PORT_H__ + +#include "en.h" + +struct mlx5e_tc_int_port; +struct mlx5e_tc_int_port_priv; + +enum mlx5e_tc_int_port_type { + MLX5E_TC_INT_PORT_INGRESS, + MLX5E_TC_INT_PORT_EGRESS, +}; + +#if IS_ENABLED(CONFIG_MLX5_CLS_ACT) +bool mlx5e_tc_int_port_supported(const struct mlx5_eswitch *esw); + +struct mlx5e_tc_int_port_priv * +mlx5e_tc_int_port_init(struct mlx5e_priv *priv); +void +mlx5e_tc_int_port_cleanup(struct mlx5e_tc_int_port_priv *priv); + +void mlx5e_tc_int_port_init_rep_rx(struct mlx5e_priv *priv); +void mlx5e_tc_int_port_cleanup_rep_rx(struct mlx5e_priv *priv); + +bool +mlx5e_tc_int_port_dev_fwd(struct mlx5e_tc_int_port_priv *priv, + struct sk_buff *skb, u32 int_vport_metadata, + bool *forward_tx); +struct mlx5e_tc_int_port * +mlx5e_tc_int_port_get(struct mlx5e_tc_int_port_priv *priv, + int ifindex, + enum mlx5e_tc_int_port_type type); +void +mlx5e_tc_int_port_put(struct mlx5e_tc_int_port_priv *priv, + struct mlx5e_tc_int_port *int_port); + +u32 mlx5e_tc_int_port_get_metadata(struct mlx5e_tc_int_port *int_port); +u32 mlx5e_tc_int_port_get_metadata_for_match(struct mlx5e_tc_int_port *int_port); +int mlx5e_tc_int_port_get_flow_source(struct mlx5e_tc_int_port *int_port); +#else /* CONFIG_MLX5_CLS_ACT */ +static inline u32 +mlx5e_tc_int_port_get_metadata_for_match(struct mlx5e_tc_int_port *int_port) +{ + return 0; +} + +static inline int +mlx5e_tc_int_port_get_flow_source(struct mlx5e_tc_int_port *int_port) +{ + return 0; +} + +static inline bool mlx5e_tc_int_port_supported(const struct mlx5_eswitch *esw) +{ + return false; +} + +static inline void mlx5e_tc_int_port_init_rep_rx(struct mlx5e_priv *priv) {} +static inline void mlx5e_tc_int_port_cleanup_rep_rx(struct mlx5e_priv *priv) {} + +#endif /* CONFIG_MLX5_CLS_ACT */ +#endif /* __MLX5_EN_TC_INT_PORT_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/meter.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/meter.c new file mode 100644 index 000000000..be74e1403 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/meter.c @@ -0,0 +1,585 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + +#include <linux/math64.h> +#include "lib/aso.h" +#include "en/tc/post_act.h" +#include "meter.h" +#include "en/tc_priv.h" + +#define MLX5_START_COLOR_SHIFT 28 +#define MLX5_METER_MODE_SHIFT 24 +#define MLX5_CBS_EXP_SHIFT 24 +#define MLX5_CBS_MAN_SHIFT 16 +#define MLX5_CIR_EXP_SHIFT 8 + +/* cir = 8*(10^9)*cir_mantissa/(2^cir_exponent)) bits/s */ +#define MLX5_CONST_CIR 8000000000ULL +#define MLX5_CALC_CIR(m, e) ((MLX5_CONST_CIR * (m)) >> (e)) +#define MLX5_MAX_CIR ((MLX5_CONST_CIR * 0x100) - 1) + +/* cbs = cbs_mantissa*2^cbs_exponent */ +#define MLX5_CALC_CBS(m, e) ((m) << (e)) +#define MLX5_MAX_CBS ((0x100ULL << 0x1F) - 1) +#define MLX5_MAX_HW_CBS 0x7FFFFFFF + +struct mlx5e_flow_meter_aso_obj { + struct list_head entry; + int base_id; + int total_meters; + + unsigned long meters_map[0]; /* must be at the end of this struct */ +}; + +struct mlx5e_flow_meters { + enum mlx5_flow_namespace_type ns_type; + struct mlx5_aso *aso; + struct mutex aso_lock; /* Protects aso operations */ + int log_granularity; + u32 pdn; + + DECLARE_HASHTABLE(hashtbl, 8); + + struct mutex sync_lock; /* protect flow meter operations */ + struct list_head partial_list; + struct list_head full_list; + + struct mlx5_core_dev *mdev; + struct mlx5e_post_act *post_act; +}; + +static void +mlx5e_flow_meter_cir_calc(u64 cir, u8 *man, u8 *exp) +{ + s64 _cir, _delta, delta = S64_MAX; + u8 e, _man = 0, _exp = 0; + u64 m; + + for (e = 0; e <= 0x1F; e++) { /* exp width 5bit */ + m = cir << e; + if ((s64)m < 0) /* overflow */ + break; + m = div64_u64(m, MLX5_CONST_CIR); + if (m > 0xFF) /* man width 8 bit */ + continue; + _cir = MLX5_CALC_CIR(m, e); + _delta = cir - _cir; + if (_delta < delta) { + _man = m; + _exp = e; + if (!_delta) + goto found; + delta = _delta; + } + } + +found: + *man = _man; + *exp = _exp; +} + +static void +mlx5e_flow_meter_cbs_calc(u64 cbs, u8 *man, u8 *exp) +{ + s64 _cbs, _delta, delta = S64_MAX; + u8 e, _man = 0, _exp = 0; + u64 m; + + for (e = 0; e <= 0x1F; e++) { /* exp width 5bit */ + m = cbs >> e; + if (m > 0xFF) /* man width 8 bit */ + continue; + _cbs = MLX5_CALC_CBS(m, e); + _delta = cbs - _cbs; + if (_delta < delta) { + _man = m; + _exp = e; + if (!_delta) + goto found; + delta = _delta; + } + } + +found: + *man = _man; + *exp = _exp; +} + +int +mlx5e_tc_meter_modify(struct mlx5_core_dev *mdev, + struct mlx5e_flow_meter_handle *meter, + struct mlx5e_flow_meter_params *meter_params) +{ + struct mlx5_wqe_aso_ctrl_seg *aso_ctrl; + struct mlx5_wqe_aso_data_seg *aso_data; + struct mlx5e_flow_meters *flow_meters; + u8 cir_man, cir_exp, cbs_man, cbs_exp; + struct mlx5_aso_wqe *aso_wqe; + unsigned long expires; + struct mlx5_aso *aso; + u64 rate, burst; + u8 ds_cnt; + int err; + + rate = meter_params->rate; + burst = meter_params->burst; + + /* HW treats each packet as 128 bytes in PPS mode */ + if (meter_params->mode == MLX5_RATE_LIMIT_PPS) { + rate <<= 10; + burst <<= 7; + } + + if (!rate || rate > MLX5_MAX_CIR || !burst || burst > MLX5_MAX_CBS) + return -EINVAL; + + /* HW has limitation of total 31 bits for cbs */ + if (burst > MLX5_MAX_HW_CBS) { + mlx5_core_warn(mdev, + "burst(%lld) is too large, use HW allowed value(%d)\n", + burst, MLX5_MAX_HW_CBS); + burst = MLX5_MAX_HW_CBS; + } + + mlx5_core_dbg(mdev, "meter mode=%d\n", meter_params->mode); + mlx5e_flow_meter_cir_calc(rate, &cir_man, &cir_exp); + mlx5_core_dbg(mdev, "rate=%lld, cir=%lld, exp=%d, man=%d\n", + rate, MLX5_CALC_CIR(cir_man, cir_exp), cir_exp, cir_man); + mlx5e_flow_meter_cbs_calc(burst, &cbs_man, &cbs_exp); + mlx5_core_dbg(mdev, "burst=%lld, cbs=%lld, exp=%d, man=%d\n", + burst, MLX5_CALC_CBS((u64)cbs_man, cbs_exp), cbs_exp, cbs_man); + + if (!cir_man || !cbs_man) + return -EINVAL; + + flow_meters = meter->flow_meters; + aso = flow_meters->aso; + + mutex_lock(&flow_meters->aso_lock); + aso_wqe = mlx5_aso_get_wqe(aso); + ds_cnt = DIV_ROUND_UP(sizeof(struct mlx5_aso_wqe_data), MLX5_SEND_WQE_DS); + mlx5_aso_build_wqe(aso, ds_cnt, aso_wqe, meter->obj_id, + MLX5_ACCESS_ASO_OPC_MOD_FLOW_METER); + + aso_ctrl = &aso_wqe->aso_ctrl; + memset(aso_ctrl, 0, sizeof(*aso_ctrl)); + aso_ctrl->data_mask_mode = MLX5_ASO_DATA_MASK_MODE_BYTEWISE_64BYTE << 6; + aso_ctrl->condition_1_0_operand = MLX5_ASO_ALWAYS_TRUE | + MLX5_ASO_ALWAYS_TRUE << 4; + aso_ctrl->data_offset_condition_operand = MLX5_ASO_LOGICAL_OR << 6; + aso_ctrl->data_mask = cpu_to_be64(0x80FFFFFFULL << (meter->idx ? 0 : 32)); + + aso_data = (struct mlx5_wqe_aso_data_seg *)(aso_wqe + 1); + memset(aso_data, 0, sizeof(*aso_data)); + aso_data->bytewise_data[meter->idx * 8] = cpu_to_be32((0x1 << 31) | /* valid */ + (MLX5_FLOW_METER_COLOR_GREEN << MLX5_START_COLOR_SHIFT)); + if (meter_params->mode == MLX5_RATE_LIMIT_PPS) + aso_data->bytewise_data[meter->idx * 8] |= + cpu_to_be32(MLX5_FLOW_METER_MODE_NUM_PACKETS << MLX5_METER_MODE_SHIFT); + else + aso_data->bytewise_data[meter->idx * 8] |= + cpu_to_be32(MLX5_FLOW_METER_MODE_BYTES_IP_LENGTH << MLX5_METER_MODE_SHIFT); + + aso_data->bytewise_data[meter->idx * 8 + 2] = cpu_to_be32((cbs_exp << MLX5_CBS_EXP_SHIFT) | + (cbs_man << MLX5_CBS_MAN_SHIFT) | + (cir_exp << MLX5_CIR_EXP_SHIFT) | + cir_man); + + mlx5_aso_post_wqe(aso, true, &aso_wqe->ctrl); + + /* With newer FW, the wait for the first ASO WQE is more than 2us, put the wait 10ms. */ + expires = jiffies + msecs_to_jiffies(10); + do { + err = mlx5_aso_poll_cq(aso, true); + if (err) + usleep_range(2, 10); + } while (err && time_is_after_jiffies(expires)); + mutex_unlock(&flow_meters->aso_lock); + + return err; +} + +static int +mlx5e_flow_meter_create_aso_obj(struct mlx5e_flow_meters *flow_meters, int *obj_id) +{ + u32 in[MLX5_ST_SZ_DW(create_flow_meter_aso_obj_in)] = {}; + u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)]; + struct mlx5_core_dev *mdev = flow_meters->mdev; + void *obj; + int err; + + MLX5_SET(general_obj_in_cmd_hdr, in, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT); + MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, + MLX5_GENERAL_OBJECT_TYPES_FLOW_METER_ASO); + MLX5_SET(general_obj_in_cmd_hdr, in, log_obj_range, flow_meters->log_granularity); + + obj = MLX5_ADDR_OF(create_flow_meter_aso_obj_in, in, flow_meter_aso_obj); + MLX5_SET(flow_meter_aso_obj, obj, meter_aso_access_pd, flow_meters->pdn); + + err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); + if (!err) { + *obj_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id); + mlx5_core_dbg(mdev, "flow meter aso obj(0x%x) created\n", *obj_id); + } + + return err; +} + +static void +mlx5e_flow_meter_destroy_aso_obj(struct mlx5_core_dev *mdev, u32 obj_id) +{ + u32 in[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)] = {}; + u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)]; + + MLX5_SET(general_obj_in_cmd_hdr, in, opcode, MLX5_CMD_OP_DESTROY_GENERAL_OBJECT); + MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, + MLX5_GENERAL_OBJECT_TYPES_FLOW_METER_ASO); + MLX5_SET(general_obj_in_cmd_hdr, in, obj_id, obj_id); + + mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); + mlx5_core_dbg(mdev, "flow meter aso obj(0x%x) destroyed\n", obj_id); +} + +static struct mlx5e_flow_meter_handle * +__mlx5e_flow_meter_alloc(struct mlx5e_flow_meters *flow_meters) +{ + struct mlx5_core_dev *mdev = flow_meters->mdev; + struct mlx5e_flow_meter_aso_obj *meters_obj; + struct mlx5e_flow_meter_handle *meter; + struct mlx5_fc *counter; + int err, pos, total; + u32 id; + + meter = kzalloc(sizeof(*meter), GFP_KERNEL); + if (!meter) + return ERR_PTR(-ENOMEM); + + counter = mlx5_fc_create(mdev, true); + if (IS_ERR(counter)) { + err = PTR_ERR(counter); + goto err_red_counter; + } + meter->red_counter = counter; + + counter = mlx5_fc_create(mdev, true); + if (IS_ERR(counter)) { + err = PTR_ERR(counter); + goto err_green_counter; + } + meter->green_counter = counter; + + meters_obj = list_first_entry_or_null(&flow_meters->partial_list, + struct mlx5e_flow_meter_aso_obj, + entry); + /* 2 meters in one object */ + total = 1 << (flow_meters->log_granularity + 1); + if (!meters_obj) { + err = mlx5e_flow_meter_create_aso_obj(flow_meters, &id); + if (err) { + mlx5_core_err(mdev, "Failed to create flow meter ASO object\n"); + goto err_create; + } + + meters_obj = kzalloc(sizeof(*meters_obj) + BITS_TO_BYTES(total), + GFP_KERNEL); + if (!meters_obj) { + err = -ENOMEM; + goto err_mem; + } + + meters_obj->base_id = id; + meters_obj->total_meters = total; + list_add(&meters_obj->entry, &flow_meters->partial_list); + pos = 0; + } else { + pos = find_first_zero_bit(meters_obj->meters_map, total); + if (bitmap_weight(meters_obj->meters_map, total) == total - 1) { + list_del(&meters_obj->entry); + list_add(&meters_obj->entry, &flow_meters->full_list); + } + } + + bitmap_set(meters_obj->meters_map, pos, 1); + meter->flow_meters = flow_meters; + meter->meters_obj = meters_obj; + meter->obj_id = meters_obj->base_id + pos / 2; + meter->idx = pos % 2; + + mlx5_core_dbg(mdev, "flow meter allocated, obj_id=0x%x, index=%d\n", + meter->obj_id, meter->idx); + + return meter; + +err_mem: + mlx5e_flow_meter_destroy_aso_obj(mdev, id); +err_create: + mlx5_fc_destroy(mdev, meter->green_counter); +err_green_counter: + mlx5_fc_destroy(mdev, meter->red_counter); +err_red_counter: + kfree(meter); + return ERR_PTR(err); +} + +static void +__mlx5e_flow_meter_free(struct mlx5e_flow_meter_handle *meter) +{ + struct mlx5e_flow_meters *flow_meters = meter->flow_meters; + struct mlx5_core_dev *mdev = flow_meters->mdev; + struct mlx5e_flow_meter_aso_obj *meters_obj; + int n, pos; + + mlx5_fc_destroy(mdev, meter->green_counter); + mlx5_fc_destroy(mdev, meter->red_counter); + + meters_obj = meter->meters_obj; + pos = (meter->obj_id - meters_obj->base_id) * 2 + meter->idx; + bitmap_clear(meters_obj->meters_map, pos, 1); + n = bitmap_weight(meters_obj->meters_map, meters_obj->total_meters); + if (n == 0) { + list_del(&meters_obj->entry); + mlx5e_flow_meter_destroy_aso_obj(mdev, meters_obj->base_id); + kfree(meters_obj); + } else if (n == meters_obj->total_meters - 1) { + list_del(&meters_obj->entry); + list_add(&meters_obj->entry, &flow_meters->partial_list); + } + + mlx5_core_dbg(mdev, "flow meter freed, obj_id=0x%x, index=%d\n", + meter->obj_id, meter->idx); + kfree(meter); +} + +static struct mlx5e_flow_meter_handle * +__mlx5e_tc_meter_get(struct mlx5e_flow_meters *flow_meters, u32 index) +{ + struct mlx5e_flow_meter_handle *meter; + + hash_for_each_possible(flow_meters->hashtbl, meter, hlist, index) + if (meter->params.index == index) + goto add_ref; + + return ERR_PTR(-ENOENT); + +add_ref: + meter->refcnt++; + + return meter; +} + +struct mlx5e_flow_meter_handle * +mlx5e_tc_meter_get(struct mlx5_core_dev *mdev, struct mlx5e_flow_meter_params *params) +{ + struct mlx5e_flow_meters *flow_meters; + struct mlx5e_flow_meter_handle *meter; + + flow_meters = mlx5e_get_flow_meters(mdev); + if (!flow_meters) + return ERR_PTR(-EOPNOTSUPP); + + mutex_lock(&flow_meters->sync_lock); + meter = __mlx5e_tc_meter_get(flow_meters, params->index); + mutex_unlock(&flow_meters->sync_lock); + + return meter; +} + +static void +__mlx5e_tc_meter_put(struct mlx5e_flow_meter_handle *meter) +{ + if (--meter->refcnt == 0) { + hash_del(&meter->hlist); + __mlx5e_flow_meter_free(meter); + } +} + +void +mlx5e_tc_meter_put(struct mlx5e_flow_meter_handle *meter) +{ + struct mlx5e_flow_meters *flow_meters = meter->flow_meters; + + mutex_lock(&flow_meters->sync_lock); + __mlx5e_tc_meter_put(meter); + mutex_unlock(&flow_meters->sync_lock); +} + +static struct mlx5e_flow_meter_handle * +mlx5e_tc_meter_alloc(struct mlx5e_flow_meters *flow_meters, + struct mlx5e_flow_meter_params *params) +{ + struct mlx5e_flow_meter_handle *meter; + + meter = __mlx5e_flow_meter_alloc(flow_meters); + if (IS_ERR(meter)) + return meter; + + hash_add(flow_meters->hashtbl, &meter->hlist, params->index); + meter->params.index = params->index; + meter->refcnt++; + + return meter; +} + +static int +__mlx5e_tc_meter_update(struct mlx5e_flow_meter_handle *meter, + struct mlx5e_flow_meter_params *params) +{ + struct mlx5_core_dev *mdev = meter->flow_meters->mdev; + int err = 0; + + if (meter->params.mode != params->mode || meter->params.rate != params->rate || + meter->params.burst != params->burst) { + err = mlx5e_tc_meter_modify(mdev, meter, params); + if (err) + goto out; + + meter->params.mode = params->mode; + meter->params.rate = params->rate; + meter->params.burst = params->burst; + } + +out: + return err; +} + +int +mlx5e_tc_meter_update(struct mlx5e_flow_meter_handle *meter, + struct mlx5e_flow_meter_params *params) +{ + struct mlx5_core_dev *mdev = meter->flow_meters->mdev; + struct mlx5e_flow_meters *flow_meters; + int err; + + flow_meters = mlx5e_get_flow_meters(mdev); + if (!flow_meters) + return -EOPNOTSUPP; + + mutex_lock(&flow_meters->sync_lock); + err = __mlx5e_tc_meter_update(meter, params); + mutex_unlock(&flow_meters->sync_lock); + return err; +} + +struct mlx5e_flow_meter_handle * +mlx5e_tc_meter_replace(struct mlx5_core_dev *mdev, struct mlx5e_flow_meter_params *params) +{ + struct mlx5e_flow_meters *flow_meters; + struct mlx5e_flow_meter_handle *meter; + int err; + + flow_meters = mlx5e_get_flow_meters(mdev); + if (!flow_meters) + return ERR_PTR(-EOPNOTSUPP); + + mutex_lock(&flow_meters->sync_lock); + meter = __mlx5e_tc_meter_get(flow_meters, params->index); + if (IS_ERR(meter)) { + meter = mlx5e_tc_meter_alloc(flow_meters, params); + if (IS_ERR(meter)) { + err = PTR_ERR(meter); + goto err_get; + } + } + + err = __mlx5e_tc_meter_update(meter, params); + if (err) + goto err_update; + + mutex_unlock(&flow_meters->sync_lock); + return meter; + +err_update: + __mlx5e_tc_meter_put(meter); +err_get: + mutex_unlock(&flow_meters->sync_lock); + return ERR_PTR(err); +} + +enum mlx5_flow_namespace_type +mlx5e_tc_meter_get_namespace(struct mlx5e_flow_meters *flow_meters) +{ + return flow_meters->ns_type; +} + +struct mlx5e_flow_meters * +mlx5e_flow_meters_init(struct mlx5e_priv *priv, + enum mlx5_flow_namespace_type ns_type, + struct mlx5e_post_act *post_act) +{ + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5e_flow_meters *flow_meters; + int err; + + if (!(MLX5_CAP_GEN_64(mdev, general_obj_types) & + MLX5_HCA_CAP_GENERAL_OBJECT_TYPES_FLOW_METER_ASO)) + return ERR_PTR(-EOPNOTSUPP); + + if (IS_ERR_OR_NULL(post_act)) { + netdev_dbg(priv->netdev, + "flow meter offload is not supported, post action is missing\n"); + return ERR_PTR(-EOPNOTSUPP); + } + + flow_meters = kzalloc(sizeof(*flow_meters), GFP_KERNEL); + if (!flow_meters) + return ERR_PTR(-ENOMEM); + + err = mlx5_core_alloc_pd(mdev, &flow_meters->pdn); + if (err) { + mlx5_core_err(mdev, "Failed to alloc pd for flow meter aso, err=%d\n", err); + goto err_out; + } + + flow_meters->aso = mlx5_aso_create(mdev, flow_meters->pdn); + if (IS_ERR(flow_meters->aso)) { + mlx5_core_warn(mdev, "Failed to create aso wqe for flow meter\n"); + err = PTR_ERR(flow_meters->aso); + goto err_sq; + } + + mutex_init(&flow_meters->sync_lock); + INIT_LIST_HEAD(&flow_meters->partial_list); + INIT_LIST_HEAD(&flow_meters->full_list); + + flow_meters->ns_type = ns_type; + flow_meters->mdev = mdev; + flow_meters->post_act = post_act; + mutex_init(&flow_meters->aso_lock); + flow_meters->log_granularity = min_t(int, 6, + MLX5_CAP_QOS(mdev, log_meter_aso_max_alloc)); + + return flow_meters; + +err_sq: + mlx5_core_dealloc_pd(mdev, flow_meters->pdn); +err_out: + kfree(flow_meters); + return ERR_PTR(err); +} + +void +mlx5e_flow_meters_cleanup(struct mlx5e_flow_meters *flow_meters) +{ + if (IS_ERR_OR_NULL(flow_meters)) + return; + + mlx5_aso_destroy(flow_meters->aso); + mlx5_core_dealloc_pd(flow_meters->mdev, flow_meters->pdn); + kfree(flow_meters); +} + +void +mlx5e_tc_meter_get_stats(struct mlx5e_flow_meter_handle *meter, + u64 *bytes, u64 *packets, u64 *drops, u64 *lastuse) +{ + u64 bytes1, packets1, lastuse1; + u64 bytes2, packets2, lastuse2; + + mlx5_fc_query_cached(meter->green_counter, &bytes1, &packets1, &lastuse1); + mlx5_fc_query_cached(meter->red_counter, &bytes2, &packets2, &lastuse2); + + *bytes = bytes1 + bytes2; + *packets = packets1 + packets2; + *drops = packets2; + *lastuse = max_t(u64, lastuse1, lastuse2); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/meter.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/meter.h new file mode 100644 index 000000000..6de6e8a16 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/meter.h @@ -0,0 +1,74 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#ifndef __MLX5_EN_FLOW_METER_H__ +#define __MLX5_EN_FLOW_METER_H__ + +struct mlx5e_post_meter_priv; +struct mlx5e_flow_meter_aso_obj; +struct mlx5e_flow_meters; +struct mlx5_flow_attr; + +enum mlx5e_flow_meter_mode { + MLX5_RATE_LIMIT_BPS, + MLX5_RATE_LIMIT_PPS, +}; + +struct mlx5e_flow_meter_params { + enum mlx5e_flow_meter_mode mode; + /* police action index */ + u32 index; + u64 rate; + u64 burst; +}; + +struct mlx5e_flow_meter_handle { + struct mlx5e_flow_meters *flow_meters; + struct mlx5e_flow_meter_aso_obj *meters_obj; + u32 obj_id; + u8 idx; + + int refcnt; + struct hlist_node hlist; + struct mlx5e_flow_meter_params params; + + struct mlx5_fc *green_counter; + struct mlx5_fc *red_counter; +}; + +struct mlx5e_meter_attr { + struct mlx5e_flow_meter_params params; + struct mlx5e_flow_meter_handle *meter; + struct mlx5e_post_meter_priv *post_meter; +}; + +int +mlx5e_tc_meter_modify(struct mlx5_core_dev *mdev, + struct mlx5e_flow_meter_handle *meter, + struct mlx5e_flow_meter_params *meter_params); + +struct mlx5e_flow_meter_handle * +mlx5e_tc_meter_get(struct mlx5_core_dev *mdev, struct mlx5e_flow_meter_params *params); +void +mlx5e_tc_meter_put(struct mlx5e_flow_meter_handle *meter); +int +mlx5e_tc_meter_update(struct mlx5e_flow_meter_handle *meter, + struct mlx5e_flow_meter_params *params); +struct mlx5e_flow_meter_handle * +mlx5e_tc_meter_replace(struct mlx5_core_dev *mdev, struct mlx5e_flow_meter_params *params); + +enum mlx5_flow_namespace_type +mlx5e_tc_meter_get_namespace(struct mlx5e_flow_meters *flow_meters); + +struct mlx5e_flow_meters * +mlx5e_flow_meters_init(struct mlx5e_priv *priv, + enum mlx5_flow_namespace_type ns_type, + struct mlx5e_post_act *post_action); +void +mlx5e_flow_meters_cleanup(struct mlx5e_flow_meters *flow_meters); + +void +mlx5e_tc_meter_get_stats(struct mlx5e_flow_meter_handle *meter, + u64 *bytes, u64 *packets, u64 *drops, u64 *lastuse); + +#endif /* __MLX5_EN_FLOW_METER_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.c new file mode 100644 index 000000000..0290e0dea --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.c @@ -0,0 +1,177 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + +#include "en/tc_priv.h" +#include "en_tc.h" +#include "post_act.h" +#include "mlx5_core.h" +#include "fs_core.h" + +struct mlx5e_post_act { + enum mlx5_flow_namespace_type ns_type; + struct mlx5_fs_chains *chains; + struct mlx5_flow_table *ft; + struct mlx5e_priv *priv; + struct xarray ids; +}; + +struct mlx5e_post_act_handle { + enum mlx5_flow_namespace_type ns_type; + struct mlx5_flow_attr *attr; + struct mlx5_flow_handle *rule; + u32 id; +}; + +#define MLX5_POST_ACTION_BITS MLX5_REG_MAPPING_MBITS(FTEID_TO_REG) +#define MLX5_POST_ACTION_MASK MLX5_REG_MAPPING_MASK(FTEID_TO_REG) +#define MLX5_POST_ACTION_MAX MLX5_POST_ACTION_MASK + +struct mlx5e_post_act * +mlx5e_tc_post_act_init(struct mlx5e_priv *priv, struct mlx5_fs_chains *chains, + enum mlx5_flow_namespace_type ns_type) +{ + enum fs_flow_table_type table_type = ns_type == MLX5_FLOW_NAMESPACE_FDB ? + FS_FT_FDB : FS_FT_NIC_RX; + struct mlx5e_post_act *post_act; + int err; + + if (!MLX5_CAP_FLOWTABLE_TYPE(priv->mdev, ignore_flow_level, table_type)) { + if (priv->mdev->coredev_type == MLX5_COREDEV_PF) + mlx5_core_warn(priv->mdev, "firmware level support is missing\n"); + err = -EOPNOTSUPP; + goto err_check; + } + + post_act = kzalloc(sizeof(*post_act), GFP_KERNEL); + if (!post_act) { + err = -ENOMEM; + goto err_check; + } + post_act->ft = mlx5_chains_create_global_table(chains); + if (IS_ERR(post_act->ft)) { + err = PTR_ERR(post_act->ft); + mlx5_core_warn(priv->mdev, "failed to create post action table, err: %d\n", err); + goto err_ft; + } + post_act->chains = chains; + post_act->ns_type = ns_type; + post_act->priv = priv; + xa_init_flags(&post_act->ids, XA_FLAGS_ALLOC1); + return post_act; + +err_ft: + kfree(post_act); +err_check: + return ERR_PTR(err); +} + +void +mlx5e_tc_post_act_destroy(struct mlx5e_post_act *post_act) +{ + if (IS_ERR_OR_NULL(post_act)) + return; + + xa_destroy(&post_act->ids); + mlx5_chains_destroy_global_table(post_act->chains, post_act->ft); + kfree(post_act); +} + +int +mlx5e_tc_post_act_offload(struct mlx5e_post_act *post_act, + struct mlx5e_post_act_handle *handle) +{ + struct mlx5_flow_spec *spec; + int err; + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) + return -ENOMEM; + + /* Post action rule matches on fte_id and executes original rule's tc rule action */ + mlx5e_tc_match_to_reg_match(spec, FTEID_TO_REG, handle->id, MLX5_POST_ACTION_MASK); + + handle->rule = mlx5e_tc_rule_offload(post_act->priv, spec, handle->attr); + if (IS_ERR(handle->rule)) { + err = PTR_ERR(handle->rule); + netdev_warn(post_act->priv->netdev, "Failed to add post action rule"); + goto err_rule; + } + + kvfree(spec); + return 0; + +err_rule: + kvfree(spec); + return err; +} + +struct mlx5e_post_act_handle * +mlx5e_tc_post_act_add(struct mlx5e_post_act *post_act, struct mlx5_flow_attr *post_attr) +{ + struct mlx5e_post_act_handle *handle; + int err; + + handle = kzalloc(sizeof(*handle), GFP_KERNEL); + if (!handle) { + kfree(handle); + return ERR_PTR(-ENOMEM); + } + + post_attr->chain = 0; + post_attr->prio = 0; + post_attr->ft = post_act->ft; + post_attr->inner_match_level = MLX5_MATCH_NONE; + post_attr->outer_match_level = MLX5_MATCH_NONE; + post_attr->action &= ~MLX5_FLOW_CONTEXT_ACTION_DECAP; + post_attr->flags |= MLX5_ATTR_FLAG_NO_IN_PORT; + + handle->ns_type = post_act->ns_type; + /* Splits were handled before post action */ + if (handle->ns_type == MLX5_FLOW_NAMESPACE_FDB) + post_attr->esw_attr->split_count = 0; + + err = xa_alloc(&post_act->ids, &handle->id, post_attr, + XA_LIMIT(1, MLX5_POST_ACTION_MAX), GFP_KERNEL); + if (err) + goto err_xarray; + + handle->attr = post_attr; + + return handle; + +err_xarray: + kfree(handle); + return ERR_PTR(err); +} + +void +mlx5e_tc_post_act_unoffload(struct mlx5e_post_act *post_act, + struct mlx5e_post_act_handle *handle) +{ + mlx5e_tc_rule_unoffload(post_act->priv, handle->rule, handle->attr); + handle->rule = NULL; +} + +void +mlx5e_tc_post_act_del(struct mlx5e_post_act *post_act, struct mlx5e_post_act_handle *handle) +{ + if (!IS_ERR_OR_NULL(handle->rule)) + mlx5e_tc_post_act_unoffload(post_act, handle); + xa_erase(&post_act->ids, handle->id); + kfree(handle); +} + +struct mlx5_flow_table * +mlx5e_tc_post_act_get_ft(struct mlx5e_post_act *post_act) +{ + return post_act->ft; +} + +/* Allocate a header modify action to write the post action handle fte id to a register. */ +int +mlx5e_tc_post_act_set_handle(struct mlx5_core_dev *dev, + struct mlx5e_post_act_handle *handle, + struct mlx5e_tc_mod_hdr_acts *acts) +{ + return mlx5e_tc_match_to_reg_set(dev, acts, handle->ns_type, FTEID_TO_REG, handle->id); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.h new file mode 100644 index 000000000..40b8df184 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.h @@ -0,0 +1,43 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#ifndef __MLX5_POST_ACTION_H__ +#define __MLX5_POST_ACTION_H__ + +#include "en.h" +#include "lib/fs_chains.h" + +struct mlx5_flow_attr; +struct mlx5e_priv; +struct mlx5e_tc_mod_hdr_acts; + +struct mlx5e_post_act * +mlx5e_tc_post_act_init(struct mlx5e_priv *priv, struct mlx5_fs_chains *chains, + enum mlx5_flow_namespace_type ns_type); + +void +mlx5e_tc_post_act_destroy(struct mlx5e_post_act *post_act); + +struct mlx5e_post_act_handle * +mlx5e_tc_post_act_add(struct mlx5e_post_act *post_act, struct mlx5_flow_attr *post_attr); + +void +mlx5e_tc_post_act_del(struct mlx5e_post_act *post_act, struct mlx5e_post_act_handle *handle); + +int +mlx5e_tc_post_act_offload(struct mlx5e_post_act *post_act, + struct mlx5e_post_act_handle *handle); + +void +mlx5e_tc_post_act_unoffload(struct mlx5e_post_act *post_act, + struct mlx5e_post_act_handle *handle); + +struct mlx5_flow_table * +mlx5e_tc_post_act_get_ft(struct mlx5e_post_act *post_act); + +int +mlx5e_tc_post_act_set_handle(struct mlx5_core_dev *dev, + struct mlx5e_post_act_handle *handle, + struct mlx5e_tc_mod_hdr_acts *acts); + +#endif /* __MLX5_POST_ACTION_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_meter.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_meter.c new file mode 100644 index 000000000..8b77e8228 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_meter.c @@ -0,0 +1,209 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + +#include "en/tc_priv.h" +#include "post_meter.h" +#include "en/tc/post_act.h" + +#define MLX5_PACKET_COLOR_BITS MLX5_REG_MAPPING_MBITS(PACKET_COLOR_TO_REG) +#define MLX5_PACKET_COLOR_MASK MLX5_REG_MAPPING_MASK(PACKET_COLOR_TO_REG) + +struct mlx5e_post_meter_priv { + struct mlx5_flow_table *ft; + struct mlx5_flow_group *fg; + struct mlx5_flow_handle *fwd_green_rule; + struct mlx5_flow_handle *drop_red_rule; +}; + +struct mlx5_flow_table * +mlx5e_post_meter_get_ft(struct mlx5e_post_meter_priv *post_meter) +{ + return post_meter->ft; +} + +static int +mlx5e_post_meter_table_create(struct mlx5e_priv *priv, + enum mlx5_flow_namespace_type ns_type, + struct mlx5e_post_meter_priv *post_meter) +{ + struct mlx5_flow_table_attr ft_attr = {}; + struct mlx5_flow_namespace *root_ns; + + root_ns = mlx5_get_flow_namespace(priv->mdev, ns_type); + if (!root_ns) { + mlx5_core_warn(priv->mdev, "Failed to get namespace for flow meter\n"); + return -EOPNOTSUPP; + } + + ft_attr.flags = MLX5_FLOW_TABLE_UNMANAGED; + ft_attr.prio = FDB_SLOW_PATH; + ft_attr.max_fte = 2; + ft_attr.level = 1; + + post_meter->ft = mlx5_create_flow_table(root_ns, &ft_attr); + if (IS_ERR(post_meter->ft)) { + mlx5_core_warn(priv->mdev, "Failed to create post_meter table\n"); + return PTR_ERR(post_meter->ft); + } + + return 0; +} + +static int +mlx5e_post_meter_fg_create(struct mlx5e_priv *priv, + struct mlx5e_post_meter_priv *post_meter) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + void *misc2, *match_criteria; + u32 *flow_group_in; + int err = 0; + + flow_group_in = kvzalloc(inlen, GFP_KERNEL); + if (!flow_group_in) + return -ENOMEM; + + MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, + MLX5_MATCH_MISC_PARAMETERS_2); + match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, + match_criteria); + misc2 = MLX5_ADDR_OF(fte_match_param, match_criteria, misc_parameters_2); + MLX5_SET(fte_match_set_misc2, misc2, metadata_reg_c_5, MLX5_PACKET_COLOR_MASK); + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 1); + + post_meter->fg = mlx5_create_flow_group(post_meter->ft, flow_group_in); + if (IS_ERR(post_meter->fg)) { + mlx5_core_warn(priv->mdev, "Failed to create post_meter flow group\n"); + err = PTR_ERR(post_meter->fg); + } + + kvfree(flow_group_in); + return err; +} + +static int +mlx5e_post_meter_rules_create(struct mlx5e_priv *priv, + struct mlx5e_post_meter_priv *post_meter, + struct mlx5e_post_act *post_act, + struct mlx5_fc *green_counter, + struct mlx5_fc *red_counter) +{ + struct mlx5_flow_destination dest[2] = {}; + struct mlx5_flow_act flow_act = {}; + struct mlx5_flow_handle *rule; + struct mlx5_flow_spec *spec; + int err; + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) + return -ENOMEM; + + mlx5e_tc_match_to_reg_match(spec, PACKET_COLOR_TO_REG, + MLX5_FLOW_METER_COLOR_RED, MLX5_PACKET_COLOR_MASK); + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP | + MLX5_FLOW_CONTEXT_ACTION_COUNT; + flow_act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL; + dest[0].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; + dest[0].counter_id = mlx5_fc_id(red_counter); + + rule = mlx5_add_flow_rules(post_meter->ft, spec, &flow_act, dest, 1); + if (IS_ERR(rule)) { + mlx5_core_warn(priv->mdev, "Failed to create post_meter flow drop rule\n"); + err = PTR_ERR(rule); + goto err_red; + } + post_meter->drop_red_rule = rule; + + mlx5e_tc_match_to_reg_match(spec, PACKET_COLOR_TO_REG, + MLX5_FLOW_METER_COLOR_GREEN, MLX5_PACKET_COLOR_MASK); + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | + MLX5_FLOW_CONTEXT_ACTION_COUNT; + dest[0].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dest[0].ft = mlx5e_tc_post_act_get_ft(post_act); + dest[1].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; + dest[1].counter_id = mlx5_fc_id(green_counter); + + rule = mlx5_add_flow_rules(post_meter->ft, spec, &flow_act, dest, 2); + if (IS_ERR(rule)) { + mlx5_core_warn(priv->mdev, "Failed to create post_meter flow fwd rule\n"); + err = PTR_ERR(rule); + goto err_green; + } + post_meter->fwd_green_rule = rule; + + kvfree(spec); + return 0; + +err_green: + mlx5_del_flow_rules(post_meter->drop_red_rule); +err_red: + kvfree(spec); + return err; +} + +static void +mlx5e_post_meter_rules_destroy(struct mlx5e_post_meter_priv *post_meter) +{ + mlx5_del_flow_rules(post_meter->drop_red_rule); + mlx5_del_flow_rules(post_meter->fwd_green_rule); +} + +static void +mlx5e_post_meter_fg_destroy(struct mlx5e_post_meter_priv *post_meter) +{ + mlx5_destroy_flow_group(post_meter->fg); +} + +static void +mlx5e_post_meter_table_destroy(struct mlx5e_post_meter_priv *post_meter) +{ + mlx5_destroy_flow_table(post_meter->ft); +} + +struct mlx5e_post_meter_priv * +mlx5e_post_meter_init(struct mlx5e_priv *priv, + enum mlx5_flow_namespace_type ns_type, + struct mlx5e_post_act *post_act, + struct mlx5_fc *green_counter, + struct mlx5_fc *red_counter) +{ + struct mlx5e_post_meter_priv *post_meter; + int err; + + post_meter = kzalloc(sizeof(*post_meter), GFP_KERNEL); + if (!post_meter) + return ERR_PTR(-ENOMEM); + + err = mlx5e_post_meter_table_create(priv, ns_type, post_meter); + if (err) + goto err_ft; + + err = mlx5e_post_meter_fg_create(priv, post_meter); + if (err) + goto err_fg; + + err = mlx5e_post_meter_rules_create(priv, post_meter, post_act, green_counter, + red_counter); + if (err) + goto err_rules; + + return post_meter; + +err_rules: + mlx5e_post_meter_fg_destroy(post_meter); +err_fg: + mlx5e_post_meter_table_destroy(post_meter); +err_ft: + kfree(post_meter); + return ERR_PTR(err); +} + +void +mlx5e_post_meter_cleanup(struct mlx5e_post_meter_priv *post_meter) +{ + mlx5e_post_meter_rules_destroy(post_meter); + mlx5e_post_meter_fg_destroy(post_meter); + mlx5e_post_meter_table_destroy(post_meter); + kfree(post_meter); +} + diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_meter.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_meter.h new file mode 100644 index 000000000..34d0e4b9f --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_meter.h @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#ifndef __MLX5_EN_POST_METER_H__ +#define __MLX5_EN_POST_METER_H__ + +#define packet_color_to_reg { \ + .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_5, \ + .moffset = 0, \ + .mlen = 8, \ + .soffset = MLX5_BYTE_OFF(fte_match_param, \ + misc_parameters_2.metadata_reg_c_5), \ +} + +struct mlx5e_post_meter_priv; + +struct mlx5_flow_table * +mlx5e_post_meter_get_ft(struct mlx5e_post_meter_priv *post_meter); + +struct mlx5e_post_meter_priv * +mlx5e_post_meter_init(struct mlx5e_priv *priv, + enum mlx5_flow_namespace_type ns_type, + struct mlx5e_post_act *post_act, + struct mlx5_fc *green_counter, + struct mlx5_fc *red_counter); +void +mlx5e_post_meter_cleanup(struct mlx5e_post_meter_priv *post_meter); + +#endif /* __MLX5_EN_POST_METER_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.c new file mode 100644 index 000000000..c57b09727 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.c @@ -0,0 +1,655 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2021 Mellanox Technologies. */ + +#include <linux/skbuff.h> +#include <net/psample.h> +#include "en/mapping.h" +#include "en/tc/post_act.h" +#include "en/tc/act/sample.h" +#include "en/mod_hdr.h" +#include "sample.h" +#include "eswitch.h" +#include "en_tc.h" +#include "fs_core.h" + +#define MLX5_ESW_VPORT_TBL_SIZE_SAMPLE (64 * 1024) + +static struct esw_vport_tbl_namespace mlx5_esw_vport_tbl_sample_ns = { + .max_fte = MLX5_ESW_VPORT_TBL_SIZE_SAMPLE, + .max_num_groups = 0, /* default num of groups */ + .flags = 0, +}; + +struct mlx5e_tc_psample { + struct mlx5_eswitch *esw; + struct mlx5_flow_table *termtbl; + struct mlx5_flow_handle *termtbl_rule; + DECLARE_HASHTABLE(hashtbl, 8); + struct mutex ht_lock; /* protect hashtbl */ + DECLARE_HASHTABLE(restore_hashtbl, 8); + struct mutex restore_lock; /* protect restore_hashtbl */ + struct mlx5e_post_act *post_act; +}; + +struct mlx5e_sampler { + struct hlist_node hlist; + u32 sampler_id; + u32 sample_ratio; + u32 sample_table_id; + u32 default_table_id; + int count; +}; + +struct mlx5e_sample_flow { + struct mlx5e_sampler *sampler; + struct mlx5e_sample_restore *restore; + struct mlx5_flow_attr *pre_attr; + struct mlx5_flow_handle *pre_rule; + struct mlx5_flow_attr *post_attr; + struct mlx5_flow_handle *post_rule; +}; + +struct mlx5e_sample_restore { + struct hlist_node hlist; + struct mlx5_modify_hdr *modify_hdr; + struct mlx5_flow_handle *rule; + u32 obj_id; + int count; +}; + +static int +sampler_termtbl_create(struct mlx5e_tc_psample *tc_psample) +{ + struct mlx5_eswitch *esw = tc_psample->esw; + struct mlx5_flow_table_attr ft_attr = {}; + struct mlx5_flow_destination dest = {}; + struct mlx5_core_dev *dev = esw->dev; + struct mlx5_flow_namespace *root_ns; + struct mlx5_flow_act act = {}; + int err; + + if (!MLX5_CAP_ESW_FLOWTABLE_FDB(dev, termination_table)) { + mlx5_core_warn(dev, "termination table is not supported\n"); + return -EOPNOTSUPP; + } + + root_ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_FDB); + if (!root_ns) { + mlx5_core_warn(dev, "failed to get FDB flow namespace\n"); + return -EOPNOTSUPP; + } + + ft_attr.flags = MLX5_FLOW_TABLE_TERMINATION | MLX5_FLOW_TABLE_UNMANAGED; + ft_attr.autogroup.max_num_groups = 1; + ft_attr.prio = FDB_SLOW_PATH; + ft_attr.max_fte = 1; + ft_attr.level = 1; + tc_psample->termtbl = mlx5_create_auto_grouped_flow_table(root_ns, &ft_attr); + if (IS_ERR(tc_psample->termtbl)) { + err = PTR_ERR(tc_psample->termtbl); + mlx5_core_warn(dev, "failed to create termtbl, err: %d\n", err); + return err; + } + + act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + dest.vport.num = esw->manager_vport; + dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT; + tc_psample->termtbl_rule = mlx5_add_flow_rules(tc_psample->termtbl, NULL, &act, &dest, 1); + if (IS_ERR(tc_psample->termtbl_rule)) { + err = PTR_ERR(tc_psample->termtbl_rule); + mlx5_core_warn(dev, "failed to create termtbl rule, err: %d\n", err); + mlx5_destroy_flow_table(tc_psample->termtbl); + return err; + } + + return 0; +} + +static void +sampler_termtbl_destroy(struct mlx5e_tc_psample *tc_psample) +{ + mlx5_del_flow_rules(tc_psample->termtbl_rule); + mlx5_destroy_flow_table(tc_psample->termtbl); +} + +static int +sampler_obj_create(struct mlx5_core_dev *mdev, struct mlx5e_sampler *sampler) +{ + u32 in[MLX5_ST_SZ_DW(create_sampler_obj_in)] = {}; + u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)]; + u64 general_obj_types; + void *obj; + int err; + + general_obj_types = MLX5_CAP_GEN_64(mdev, general_obj_types); + if (!(general_obj_types & MLX5_HCA_CAP_GENERAL_OBJECT_TYPES_SAMPLER)) + return -EOPNOTSUPP; + if (!MLX5_CAP_ESW_FLOWTABLE_FDB(mdev, ignore_flow_level)) + return -EOPNOTSUPP; + + obj = MLX5_ADDR_OF(create_sampler_obj_in, in, sampler_object); + MLX5_SET(sampler_obj, obj, table_type, FS_FT_FDB); + MLX5_SET(sampler_obj, obj, ignore_flow_level, 1); + MLX5_SET(sampler_obj, obj, level, 1); + MLX5_SET(sampler_obj, obj, sample_ratio, sampler->sample_ratio); + MLX5_SET(sampler_obj, obj, sample_table_id, sampler->sample_table_id); + MLX5_SET(sampler_obj, obj, default_table_id, sampler->default_table_id); + MLX5_SET(general_obj_in_cmd_hdr, in, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT); + MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, MLX5_GENERAL_OBJECT_TYPES_SAMPLER); + + err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); + if (!err) + sampler->sampler_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id); + + return err; +} + +static void +sampler_obj_destroy(struct mlx5_core_dev *mdev, u32 sampler_id) +{ + u32 in[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)] = {}; + u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)]; + + MLX5_SET(general_obj_in_cmd_hdr, in, opcode, MLX5_CMD_OP_DESTROY_GENERAL_OBJECT); + MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, MLX5_GENERAL_OBJECT_TYPES_SAMPLER); + MLX5_SET(general_obj_in_cmd_hdr, in, obj_id, sampler_id); + + mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); +} + +static u32 +sampler_hash(u32 sample_ratio, u32 default_table_id) +{ + return jhash_2words(sample_ratio, default_table_id, 0); +} + +static int +sampler_cmp(u32 sample_ratio1, u32 default_table_id1, u32 sample_ratio2, u32 default_table_id2) +{ + return sample_ratio1 != sample_ratio2 || default_table_id1 != default_table_id2; +} + +static struct mlx5e_sampler * +sampler_get(struct mlx5e_tc_psample *tc_psample, u32 sample_ratio, u32 default_table_id) +{ + struct mlx5e_sampler *sampler; + u32 hash_key; + int err; + + mutex_lock(&tc_psample->ht_lock); + hash_key = sampler_hash(sample_ratio, default_table_id); + hash_for_each_possible(tc_psample->hashtbl, sampler, hlist, hash_key) + if (!sampler_cmp(sampler->sample_ratio, sampler->default_table_id, + sample_ratio, default_table_id)) + goto add_ref; + + sampler = kzalloc(sizeof(*sampler), GFP_KERNEL); + if (!sampler) { + err = -ENOMEM; + goto err_alloc; + } + + sampler->sample_table_id = tc_psample->termtbl->id; + sampler->default_table_id = default_table_id; + sampler->sample_ratio = sample_ratio; + + err = sampler_obj_create(tc_psample->esw->dev, sampler); + if (err) + goto err_create; + + hash_add(tc_psample->hashtbl, &sampler->hlist, hash_key); + +add_ref: + sampler->count++; + mutex_unlock(&tc_psample->ht_lock); + return sampler; + +err_create: + kfree(sampler); +err_alloc: + mutex_unlock(&tc_psample->ht_lock); + return ERR_PTR(err); +} + +static void +sampler_put(struct mlx5e_tc_psample *tc_psample, struct mlx5e_sampler *sampler) +{ + mutex_lock(&tc_psample->ht_lock); + if (--sampler->count == 0) { + hash_del(&sampler->hlist); + sampler_obj_destroy(tc_psample->esw->dev, sampler->sampler_id); + kfree(sampler); + } + mutex_unlock(&tc_psample->ht_lock); +} + +/* obj_id is used to restore the sample parameters. + * Set fte_id in original flow table, then match it in the default table. + * Only set it for NICs can preserve reg_c or decap action. For other cases, + * use the same match in the default table. + * Use one header rewrite for both obj_id and fte_id. + */ +static struct mlx5_modify_hdr * +sample_modify_hdr_get(struct mlx5_core_dev *mdev, u32 obj_id, + struct mlx5e_tc_mod_hdr_acts *mod_acts) +{ + struct mlx5_modify_hdr *modify_hdr; + int err; + + err = mlx5e_tc_match_to_reg_set(mdev, mod_acts, MLX5_FLOW_NAMESPACE_FDB, + CHAIN_TO_REG, obj_id); + if (err) + goto err_set_regc0; + + modify_hdr = mlx5_modify_header_alloc(mdev, MLX5_FLOW_NAMESPACE_FDB, + mod_acts->num_actions, + mod_acts->actions); + if (IS_ERR(modify_hdr)) { + err = PTR_ERR(modify_hdr); + goto err_modify_hdr; + } + + mlx5e_mod_hdr_dealloc(mod_acts); + return modify_hdr; + +err_modify_hdr: + mlx5e_mod_hdr_dealloc(mod_acts); +err_set_regc0: + return ERR_PTR(err); +} + +static struct mlx5e_sample_restore * +sample_restore_get(struct mlx5e_tc_psample *tc_psample, u32 obj_id, + struct mlx5e_tc_mod_hdr_acts *mod_acts) +{ + struct mlx5_eswitch *esw = tc_psample->esw; + struct mlx5_core_dev *mdev = esw->dev; + struct mlx5e_sample_restore *restore; + struct mlx5_modify_hdr *modify_hdr; + int err; + + mutex_lock(&tc_psample->restore_lock); + hash_for_each_possible(tc_psample->restore_hashtbl, restore, hlist, obj_id) + if (restore->obj_id == obj_id) + goto add_ref; + + restore = kzalloc(sizeof(*restore), GFP_KERNEL); + if (!restore) { + err = -ENOMEM; + goto err_alloc; + } + restore->obj_id = obj_id; + + modify_hdr = sample_modify_hdr_get(mdev, obj_id, mod_acts); + if (IS_ERR(modify_hdr)) { + err = PTR_ERR(modify_hdr); + goto err_modify_hdr; + } + restore->modify_hdr = modify_hdr; + + restore->rule = esw_add_restore_rule(esw, obj_id); + if (IS_ERR(restore->rule)) { + err = PTR_ERR(restore->rule); + goto err_restore; + } + + hash_add(tc_psample->restore_hashtbl, &restore->hlist, obj_id); +add_ref: + restore->count++; + mutex_unlock(&tc_psample->restore_lock); + return restore; + +err_restore: + mlx5_modify_header_dealloc(mdev, restore->modify_hdr); +err_modify_hdr: + kfree(restore); +err_alloc: + mutex_unlock(&tc_psample->restore_lock); + return ERR_PTR(err); +} + +static void +sample_restore_put(struct mlx5e_tc_psample *tc_psample, struct mlx5e_sample_restore *restore) +{ + mutex_lock(&tc_psample->restore_lock); + if (--restore->count == 0) + hash_del(&restore->hlist); + mutex_unlock(&tc_psample->restore_lock); + + if (!restore->count) { + mlx5_del_flow_rules(restore->rule); + mlx5_modify_header_dealloc(tc_psample->esw->dev, restore->modify_hdr); + kfree(restore); + } +} + +void mlx5e_tc_sample_skb(struct sk_buff *skb, struct mlx5_mapped_obj *mapped_obj) +{ + u32 trunc_size = mapped_obj->sample.trunc_size; + struct psample_group psample_group = {}; + struct psample_metadata md = {}; + + md.trunc_size = trunc_size ? min(trunc_size, skb->len) : skb->len; + md.in_ifindex = skb->dev->ifindex; + psample_group.group_num = mapped_obj->sample.group_id; + psample_group.net = &init_net; + skb_push(skb, skb->mac_len); + + psample_sample_packet(&psample_group, skb, mapped_obj->sample.rate, &md); +} + +static int +add_post_rule(struct mlx5_eswitch *esw, struct mlx5e_sample_flow *sample_flow, + struct mlx5_flow_spec *spec, struct mlx5_flow_attr *attr, + u32 *default_tbl_id) +{ + struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; + u32 attr_sz = ns_to_attr_sz(MLX5_FLOW_NAMESPACE_FDB); + struct mlx5_vport_tbl_attr per_vport_tbl_attr; + struct mlx5_flow_table *default_tbl; + struct mlx5_flow_attr *post_attr; + int err; + + /* Allocate default table per vport, chain and prio. Otherwise, there is + * only one default table for the same sampler object. Rules with different + * prio and chain may overlap. For CT sample action, per vport default + * table is needed to resotre the metadata. + */ + per_vport_tbl_attr.chain = attr->chain; + per_vport_tbl_attr.prio = attr->prio; + per_vport_tbl_attr.vport = esw_attr->in_rep->vport; + per_vport_tbl_attr.vport_ns = &mlx5_esw_vport_tbl_sample_ns; + default_tbl = mlx5_esw_vporttbl_get(esw, &per_vport_tbl_attr); + if (IS_ERR(default_tbl)) { + err = PTR_ERR(default_tbl); + goto err_default_tbl; + } + *default_tbl_id = default_tbl->id; + + post_attr = mlx5_alloc_flow_attr(MLX5_FLOW_NAMESPACE_FDB); + if (!post_attr) { + err = -ENOMEM; + goto err_attr; + } + sample_flow->post_attr = post_attr; + memcpy(post_attr, attr, attr_sz); + /* Perform the original matches on the default table. + * Offload all actions except the sample action. + */ + post_attr->chain = 0; + post_attr->prio = 0; + post_attr->ft = default_tbl; + post_attr->flags = MLX5_ATTR_FLAG_NO_IN_PORT; + + /* When offloading sample and encap action, if there is no valid + * neigh data struct, a slow path rule is offloaded first. Source + * port metadata match is set at that time. A per vport table is + * already allocated. No need to match it again. So clear the source + * port metadata match. + */ + mlx5_eswitch_clear_rule_source_port(esw, spec); + sample_flow->post_rule = mlx5_eswitch_add_offloaded_rule(esw, spec, post_attr); + if (IS_ERR(sample_flow->post_rule)) { + err = PTR_ERR(sample_flow->post_rule); + goto err_rule; + } + return 0; + +err_rule: + kfree(post_attr); +err_attr: + mlx5_esw_vporttbl_put(esw, &per_vport_tbl_attr); +err_default_tbl: + return err; +} + +static void +del_post_rule(struct mlx5_eswitch *esw, struct mlx5e_sample_flow *sample_flow, + struct mlx5_flow_attr *attr) +{ + struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; + struct mlx5_vport_tbl_attr tbl_attr; + + mlx5_eswitch_del_offloaded_rule(esw, sample_flow->post_rule, sample_flow->post_attr); + kfree(sample_flow->post_attr); + tbl_attr.chain = attr->chain; + tbl_attr.prio = attr->prio; + tbl_attr.vport = esw_attr->in_rep->vport; + tbl_attr.vport_ns = &mlx5_esw_vport_tbl_sample_ns; + mlx5_esw_vporttbl_put(esw, &tbl_attr); +} + +/* For the following typical flow table: + * + * +-------------------------------+ + * + original flow table + + * +-------------------------------+ + * + original match + + * +-------------------------------+ + * + sample action + other actions + + * +-------------------------------+ + * + * We translate the tc filter with sample action to the following HW model: + * + * +---------------------+ + * + original flow table + + * +---------------------+ + * + original match + + * +---------------------+ + * | set fte_id (if reg_c preserve cap) + * | do decap (if required) + * v + * +------------------------------------------------+ + * + Flow Sampler Object + + * +------------------------------------------------+ + * + sample ratio + + * +------------------------------------------------+ + * + sample table id | default table id + + * +------------------------------------------------+ + * | | + * v v + * +-----------------------------+ +-------------------+ + * + sample table + + default table + + * +-----------------------------+ +-------------------+ + * + forward to management vport + | + * +-----------------------------+ | + * +-------+------+ + * | |reg_c preserve cap + * | |or decap action + * v v + * +-----------------+ +-------------+ + * + per vport table + + post action + + * +-----------------+ +-------------+ + * + original match + + * +-----------------+ + * + other actions + + * +-----------------+ + */ +struct mlx5_flow_handle * +mlx5e_tc_sample_offload(struct mlx5e_tc_psample *tc_psample, + struct mlx5_flow_spec *spec, + struct mlx5_flow_attr *attr) +{ + struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; + struct mlx5_esw_flow_attr *pre_esw_attr; + struct mlx5_mapped_obj restore_obj = {}; + struct mlx5e_tc_mod_hdr_acts *mod_acts; + struct mlx5e_sample_flow *sample_flow; + struct mlx5e_sample_attr *sample_attr; + struct mlx5_flow_attr *pre_attr; + struct mlx5_eswitch *esw; + u32 default_tbl_id; + u32 obj_id; + int err; + + if (IS_ERR_OR_NULL(tc_psample)) + return ERR_PTR(-EOPNOTSUPP); + + sample_flow = kzalloc(sizeof(*sample_flow), GFP_KERNEL); + if (!sample_flow) + return ERR_PTR(-ENOMEM); + sample_attr = &attr->sample_attr; + sample_attr->sample_flow = sample_flow; + + /* For NICs with reg_c_preserve support or decap action, use + * post action instead of the per vport, chain and prio table. + * Only match the fte id instead of the same match in the + * original flow table. + */ + esw = tc_psample->esw; + if (mlx5e_tc_act_sample_is_multi_table(esw->dev, attr)) { + struct mlx5_flow_table *ft; + + ft = mlx5e_tc_post_act_get_ft(tc_psample->post_act); + default_tbl_id = ft->id; + } else { + err = add_post_rule(esw, sample_flow, spec, attr, &default_tbl_id); + if (err) + goto err_post_rule; + } + + /* Create sampler object. */ + sample_flow->sampler = sampler_get(tc_psample, sample_attr->rate, default_tbl_id); + if (IS_ERR(sample_flow->sampler)) { + err = PTR_ERR(sample_flow->sampler); + goto err_sampler; + } + sample_attr->sampler_id = sample_flow->sampler->sampler_id; + + /* Create an id mapping reg_c0 value to sample object. */ + restore_obj.type = MLX5_MAPPED_OBJ_SAMPLE; + restore_obj.sample.group_id = sample_attr->group_num; + restore_obj.sample.rate = sample_attr->rate; + restore_obj.sample.trunc_size = sample_attr->trunc_size; + restore_obj.sample.tunnel_id = attr->tunnel_id; + err = mapping_add(esw->offloads.reg_c0_obj_pool, &restore_obj, &obj_id); + if (err) + goto err_obj_id; + sample_attr->restore_obj_id = obj_id; + + /* Create sample restore context. */ + mod_acts = &attr->parse_attr->mod_hdr_acts; + sample_flow->restore = sample_restore_get(tc_psample, obj_id, mod_acts); + if (IS_ERR(sample_flow->restore)) { + err = PTR_ERR(sample_flow->restore); + goto err_sample_restore; + } + + /* Perform the original matches on the original table. Offload the + * sample action. The destination is the sampler object. + */ + pre_attr = mlx5_alloc_flow_attr(MLX5_FLOW_NAMESPACE_FDB); + if (!pre_attr) { + err = -ENOMEM; + goto err_alloc_pre_flow_attr; + } + pre_attr->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; + /* For decap action, do decap in the original flow table instead of the + * default flow table. + */ + if (attr->action & MLX5_FLOW_CONTEXT_ACTION_DECAP) + pre_attr->action |= MLX5_FLOW_CONTEXT_ACTION_DECAP; + pre_attr->modify_hdr = sample_flow->restore->modify_hdr; + pre_attr->flags = MLX5_ATTR_FLAG_SAMPLE; + pre_attr->inner_match_level = attr->inner_match_level; + pre_attr->outer_match_level = attr->outer_match_level; + pre_attr->chain = attr->chain; + pre_attr->prio = attr->prio; + pre_attr->ft = attr->ft; + pre_attr->sample_attr = *sample_attr; + pre_esw_attr = pre_attr->esw_attr; + pre_esw_attr->in_mdev = esw_attr->in_mdev; + pre_esw_attr->in_rep = esw_attr->in_rep; + sample_flow->pre_rule = mlx5_eswitch_add_offloaded_rule(esw, spec, pre_attr); + if (IS_ERR(sample_flow->pre_rule)) { + err = PTR_ERR(sample_flow->pre_rule); + goto err_pre_offload_rule; + } + sample_flow->pre_attr = pre_attr; + + return sample_flow->pre_rule; + +err_pre_offload_rule: + kfree(pre_attr); +err_alloc_pre_flow_attr: + sample_restore_put(tc_psample, sample_flow->restore); +err_sample_restore: + mapping_remove(esw->offloads.reg_c0_obj_pool, obj_id); +err_obj_id: + sampler_put(tc_psample, sample_flow->sampler); +err_sampler: + if (sample_flow->post_rule) + del_post_rule(esw, sample_flow, attr); +err_post_rule: + kfree(sample_flow); + return ERR_PTR(err); +} + +void +mlx5e_tc_sample_unoffload(struct mlx5e_tc_psample *tc_psample, + struct mlx5_flow_handle *rule, + struct mlx5_flow_attr *attr) +{ + struct mlx5e_sample_flow *sample_flow; + struct mlx5_eswitch *esw; + + if (IS_ERR_OR_NULL(tc_psample)) + return; + + /* The following delete order can't be changed, otherwise, + * will hit fw syndromes. + */ + esw = tc_psample->esw; + sample_flow = attr->sample_attr.sample_flow; + mlx5_eswitch_del_offloaded_rule(esw, sample_flow->pre_rule, sample_flow->pre_attr); + + sample_restore_put(tc_psample, sample_flow->restore); + mapping_remove(esw->offloads.reg_c0_obj_pool, attr->sample_attr.restore_obj_id); + sampler_put(tc_psample, sample_flow->sampler); + if (sample_flow->post_rule) + del_post_rule(esw, sample_flow, attr); + + kfree(sample_flow->pre_attr); + kfree(sample_flow); +} + +struct mlx5e_tc_psample * +mlx5e_tc_sample_init(struct mlx5_eswitch *esw, struct mlx5e_post_act *post_act) +{ + struct mlx5e_tc_psample *tc_psample; + int err; + + tc_psample = kzalloc(sizeof(*tc_psample), GFP_KERNEL); + if (!tc_psample) + return ERR_PTR(-ENOMEM); + if (IS_ERR_OR_NULL(post_act)) { + err = PTR_ERR(post_act); + goto err_post_act; + } + tc_psample->post_act = post_act; + tc_psample->esw = esw; + err = sampler_termtbl_create(tc_psample); + if (err) + goto err_post_act; + + mutex_init(&tc_psample->ht_lock); + mutex_init(&tc_psample->restore_lock); + + return tc_psample; + +err_post_act: + kfree(tc_psample); + return ERR_PTR(err); +} + +void +mlx5e_tc_sample_cleanup(struct mlx5e_tc_psample *tc_psample) +{ + if (IS_ERR_OR_NULL(tc_psample)) + return; + + mutex_destroy(&tc_psample->restore_lock); + mutex_destroy(&tc_psample->ht_lock); + sampler_termtbl_destroy(tc_psample); + kfree(tc_psample); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.h new file mode 100644 index 000000000..a569367ea --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.h @@ -0,0 +1,66 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2021 Mellanox Technologies. */ + +#ifndef __MLX5_EN_TC_SAMPLE_H__ +#define __MLX5_EN_TC_SAMPLE_H__ + +#include "eswitch.h" + +struct mlx5_flow_attr; +struct mlx5e_tc_psample; +struct mlx5e_post_act; + +struct mlx5e_sample_attr { + u32 group_num; + u32 rate; + u32 trunc_size; + u32 restore_obj_id; + u32 sampler_id; + struct mlx5e_sample_flow *sample_flow; +}; + +#if IS_ENABLED(CONFIG_MLX5_TC_SAMPLE) + +void mlx5e_tc_sample_skb(struct sk_buff *skb, struct mlx5_mapped_obj *mapped_obj); + +struct mlx5_flow_handle * +mlx5e_tc_sample_offload(struct mlx5e_tc_psample *sample_priv, + struct mlx5_flow_spec *spec, + struct mlx5_flow_attr *attr); + +void +mlx5e_tc_sample_unoffload(struct mlx5e_tc_psample *sample_priv, + struct mlx5_flow_handle *rule, + struct mlx5_flow_attr *attr); + +struct mlx5e_tc_psample * +mlx5e_tc_sample_init(struct mlx5_eswitch *esw, struct mlx5e_post_act *post_act); + +void +mlx5e_tc_sample_cleanup(struct mlx5e_tc_psample *tc_psample); + +#else /* CONFIG_MLX5_TC_SAMPLE */ + +static inline struct mlx5_flow_handle * +mlx5e_tc_sample_offload(struct mlx5e_tc_psample *tc_psample, + struct mlx5_flow_spec *spec, + struct mlx5_flow_attr *attr) +{ return ERR_PTR(-EOPNOTSUPP); } + +static inline void +mlx5e_tc_sample_unoffload(struct mlx5e_tc_psample *tc_psample, + struct mlx5_flow_handle *rule, + struct mlx5_flow_attr *attr) {} + +static inline struct mlx5e_tc_psample * +mlx5e_tc_sample_init(struct mlx5_eswitch *esw, struct mlx5e_post_act *post_act) +{ return ERR_PTR(-EOPNOTSUPP); } + +static inline void +mlx5e_tc_sample_cleanup(struct mlx5e_tc_psample *tc_psample) {} + +static inline void +mlx5e_tc_sample_skb(struct sk_buff *skb, struct mlx5_mapped_obj *mapped_obj) {} + +#endif /* CONFIG_MLX5_TC_SAMPLE */ +#endif /* __MLX5_EN_TC_SAMPLE_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c new file mode 100644 index 000000000..f01f7dfdb --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c @@ -0,0 +1,2272 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2019 Mellanox Technologies. */ + +#include <net/netfilter/nf_conntrack.h> +#include <net/netfilter/nf_conntrack_core.h> +#include <net/netfilter/nf_conntrack_zones.h> +#include <net/netfilter/nf_conntrack_labels.h> +#include <net/netfilter/nf_conntrack_helper.h> +#include <net/netfilter/nf_conntrack_acct.h> +#include <uapi/linux/tc_act/tc_pedit.h> +#include <net/tc_act/tc_ct.h> +#include <net/flow_offload.h> +#include <net/netfilter/nf_flow_table.h> +#include <linux/workqueue.h> +#include <linux/refcount.h> +#include <linux/xarray.h> +#include <linux/if_macvlan.h> +#include <linux/debugfs.h> + +#include "lib/fs_chains.h" +#include "en/tc_ct.h" +#include "en/tc/ct_fs.h" +#include "en/tc_priv.h" +#include "en/mod_hdr.h" +#include "en/mapping.h" +#include "en/tc/post_act.h" +#include "en.h" +#include "en_tc.h" +#include "en_rep.h" +#include "fs_core.h" + +#define MLX5_CT_STATE_ESTABLISHED_BIT BIT(1) +#define MLX5_CT_STATE_TRK_BIT BIT(2) +#define MLX5_CT_STATE_NAT_BIT BIT(3) +#define MLX5_CT_STATE_REPLY_BIT BIT(4) +#define MLX5_CT_STATE_RELATED_BIT BIT(5) +#define MLX5_CT_STATE_INVALID_BIT BIT(6) + +#define MLX5_CT_LABELS_BITS MLX5_REG_MAPPING_MBITS(LABELS_TO_REG) +#define MLX5_CT_LABELS_MASK MLX5_REG_MAPPING_MASK(LABELS_TO_REG) + +/* Statically allocate modify actions for + * ipv6 and port nat (5) + tuple fields (4) + nic mode zone restore (1) = 10. + * This will be increased dynamically if needed (for the ipv6 snat + dnat). + */ +#define MLX5_CT_MIN_MOD_ACTS 10 + +#define ct_dbg(fmt, args...)\ + netdev_dbg(ct_priv->netdev, "ct_debug: " fmt "\n", ##args) + +struct mlx5_tc_ct_debugfs { + struct { + atomic_t offloaded; + atomic_t rx_dropped; + } stats; + + struct dentry *root; +}; + +struct mlx5_tc_ct_priv { + struct mlx5_core_dev *dev; + const struct net_device *netdev; + struct mod_hdr_tbl *mod_hdr_tbl; + struct xarray tuple_ids; + struct rhashtable zone_ht; + struct rhashtable ct_tuples_ht; + struct rhashtable ct_tuples_nat_ht; + struct mlx5_flow_table *ct; + struct mlx5_flow_table *ct_nat; + struct mlx5e_post_act *post_act; + struct mutex control_lock; /* guards parallel adds/dels */ + struct mapping_ctx *zone_mapping; + struct mapping_ctx *labels_mapping; + enum mlx5_flow_namespace_type ns_type; + struct mlx5_fs_chains *chains; + struct mlx5_ct_fs *fs; + struct mlx5_ct_fs_ops *fs_ops; + spinlock_t ht_lock; /* protects ft entries */ + struct workqueue_struct *wq; + + struct mlx5_tc_ct_debugfs debugfs; +}; + +struct mlx5_ct_flow { + struct mlx5_flow_attr *pre_ct_attr; + struct mlx5_flow_handle *pre_ct_rule; + struct mlx5_ct_ft *ft; + u32 chain_mapping; +}; + +struct mlx5_ct_zone_rule { + struct mlx5_ct_fs_rule *rule; + struct mlx5e_mod_hdr_handle *mh; + struct mlx5_flow_attr *attr; + bool nat; +}; + +struct mlx5_tc_ct_pre { + struct mlx5_flow_table *ft; + struct mlx5_flow_group *flow_grp; + struct mlx5_flow_group *miss_grp; + struct mlx5_flow_handle *flow_rule; + struct mlx5_flow_handle *miss_rule; + struct mlx5_modify_hdr *modify_hdr; +}; + +struct mlx5_ct_ft { + struct rhash_head node; + u16 zone; + u32 zone_restore_id; + refcount_t refcount; + struct nf_flowtable *nf_ft; + struct mlx5_tc_ct_priv *ct_priv; + struct rhashtable ct_entries_ht; + struct mlx5_tc_ct_pre pre_ct; + struct mlx5_tc_ct_pre pre_ct_nat; +}; + +struct mlx5_ct_tuple { + u16 addr_type; + __be16 n_proto; + u8 ip_proto; + struct { + union { + __be32 src_v4; + struct in6_addr src_v6; + }; + union { + __be32 dst_v4; + struct in6_addr dst_v6; + }; + } ip; + struct { + __be16 src; + __be16 dst; + } port; + + u16 zone; +}; + +struct mlx5_ct_counter { + struct mlx5_fc *counter; + refcount_t refcount; + bool is_shared; +}; + +enum { + MLX5_CT_ENTRY_FLAG_VALID, +}; + +struct mlx5_ct_entry { + struct rhash_head node; + struct rhash_head tuple_node; + struct rhash_head tuple_nat_node; + struct mlx5_ct_counter *counter; + unsigned long cookie; + unsigned long restore_cookie; + struct mlx5_ct_tuple tuple; + struct mlx5_ct_tuple tuple_nat; + struct mlx5_ct_zone_rule zone_rules[2]; + + struct mlx5_tc_ct_priv *ct_priv; + struct work_struct work; + + refcount_t refcnt; + unsigned long flags; +}; + +static void +mlx5_tc_ct_entry_destroy_mod_hdr(struct mlx5_tc_ct_priv *ct_priv, + struct mlx5_flow_attr *attr, + struct mlx5e_mod_hdr_handle *mh); + +static const struct rhashtable_params cts_ht_params = { + .head_offset = offsetof(struct mlx5_ct_entry, node), + .key_offset = offsetof(struct mlx5_ct_entry, cookie), + .key_len = sizeof(((struct mlx5_ct_entry *)0)->cookie), + .automatic_shrinking = true, + .min_size = 16 * 1024, +}; + +static const struct rhashtable_params zone_params = { + .head_offset = offsetof(struct mlx5_ct_ft, node), + .key_offset = offsetof(struct mlx5_ct_ft, zone), + .key_len = sizeof(((struct mlx5_ct_ft *)0)->zone), + .automatic_shrinking = true, +}; + +static const struct rhashtable_params tuples_ht_params = { + .head_offset = offsetof(struct mlx5_ct_entry, tuple_node), + .key_offset = offsetof(struct mlx5_ct_entry, tuple), + .key_len = sizeof(((struct mlx5_ct_entry *)0)->tuple), + .automatic_shrinking = true, + .min_size = 16 * 1024, +}; + +static const struct rhashtable_params tuples_nat_ht_params = { + .head_offset = offsetof(struct mlx5_ct_entry, tuple_nat_node), + .key_offset = offsetof(struct mlx5_ct_entry, tuple_nat), + .key_len = sizeof(((struct mlx5_ct_entry *)0)->tuple_nat), + .automatic_shrinking = true, + .min_size = 16 * 1024, +}; + +static bool +mlx5_tc_ct_entry_has_nat(struct mlx5_ct_entry *entry) +{ + return !!(entry->tuple_nat_node.next); +} + +static int +mlx5_get_label_mapping(struct mlx5_tc_ct_priv *ct_priv, + u32 *labels, u32 *id) +{ + if (!memchr_inv(labels, 0, sizeof(u32) * 4)) { + *id = 0; + return 0; + } + + if (mapping_add(ct_priv->labels_mapping, labels, id)) + return -EOPNOTSUPP; + + return 0; +} + +static void +mlx5_put_label_mapping(struct mlx5_tc_ct_priv *ct_priv, u32 id) +{ + if (id) + mapping_remove(ct_priv->labels_mapping, id); +} + +static int +mlx5_tc_ct_rule_to_tuple(struct mlx5_ct_tuple *tuple, struct flow_rule *rule) +{ + struct flow_match_control control; + struct flow_match_basic basic; + + flow_rule_match_basic(rule, &basic); + flow_rule_match_control(rule, &control); + + tuple->n_proto = basic.key->n_proto; + tuple->ip_proto = basic.key->ip_proto; + tuple->addr_type = control.key->addr_type; + + if (tuple->addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) { + struct flow_match_ipv4_addrs match; + + flow_rule_match_ipv4_addrs(rule, &match); + tuple->ip.src_v4 = match.key->src; + tuple->ip.dst_v4 = match.key->dst; + } else if (tuple->addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) { + struct flow_match_ipv6_addrs match; + + flow_rule_match_ipv6_addrs(rule, &match); + tuple->ip.src_v6 = match.key->src; + tuple->ip.dst_v6 = match.key->dst; + } else { + return -EOPNOTSUPP; + } + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) { + struct flow_match_ports match; + + flow_rule_match_ports(rule, &match); + switch (tuple->ip_proto) { + case IPPROTO_TCP: + case IPPROTO_UDP: + tuple->port.src = match.key->src; + tuple->port.dst = match.key->dst; + break; + default: + return -EOPNOTSUPP; + } + } else { + if (tuple->ip_proto != IPPROTO_GRE) + return -EOPNOTSUPP; + } + + return 0; +} + +static int +mlx5_tc_ct_rule_to_tuple_nat(struct mlx5_ct_tuple *tuple, + struct flow_rule *rule) +{ + struct flow_action *flow_action = &rule->action; + struct flow_action_entry *act; + u32 offset, val, ip6_offset; + int i; + + flow_action_for_each(i, act, flow_action) { + if (act->id != FLOW_ACTION_MANGLE) + continue; + + offset = act->mangle.offset; + val = act->mangle.val; + switch (act->mangle.htype) { + case FLOW_ACT_MANGLE_HDR_TYPE_IP4: + if (offset == offsetof(struct iphdr, saddr)) + tuple->ip.src_v4 = cpu_to_be32(val); + else if (offset == offsetof(struct iphdr, daddr)) + tuple->ip.dst_v4 = cpu_to_be32(val); + else + return -EOPNOTSUPP; + break; + + case FLOW_ACT_MANGLE_HDR_TYPE_IP6: + ip6_offset = (offset - offsetof(struct ipv6hdr, saddr)); + ip6_offset /= 4; + if (ip6_offset < 4) + tuple->ip.src_v6.s6_addr32[ip6_offset] = cpu_to_be32(val); + else if (ip6_offset < 8) + tuple->ip.dst_v6.s6_addr32[ip6_offset - 4] = cpu_to_be32(val); + else + return -EOPNOTSUPP; + break; + + case FLOW_ACT_MANGLE_HDR_TYPE_TCP: + if (offset == offsetof(struct tcphdr, source)) + tuple->port.src = cpu_to_be16(val); + else if (offset == offsetof(struct tcphdr, dest)) + tuple->port.dst = cpu_to_be16(val); + else + return -EOPNOTSUPP; + break; + + case FLOW_ACT_MANGLE_HDR_TYPE_UDP: + if (offset == offsetof(struct udphdr, source)) + tuple->port.src = cpu_to_be16(val); + else if (offset == offsetof(struct udphdr, dest)) + tuple->port.dst = cpu_to_be16(val); + else + return -EOPNOTSUPP; + break; + + default: + return -EOPNOTSUPP; + } + } + + return 0; +} + +static int +mlx5_tc_ct_get_flow_source_match(struct mlx5_tc_ct_priv *ct_priv, + struct net_device *ndev) +{ + struct mlx5e_priv *other_priv = netdev_priv(ndev); + struct mlx5_core_dev *mdev = ct_priv->dev; + bool vf_rep, uplink_rep; + + vf_rep = mlx5e_eswitch_vf_rep(ndev) && mlx5_same_hw_devs(mdev, other_priv->mdev); + uplink_rep = mlx5e_eswitch_uplink_rep(ndev) && mlx5_same_hw_devs(mdev, other_priv->mdev); + + if (vf_rep) + return MLX5_FLOW_CONTEXT_FLOW_SOURCE_LOCAL_VPORT; + if (uplink_rep) + return MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK; + if (is_vlan_dev(ndev)) + return mlx5_tc_ct_get_flow_source_match(ct_priv, vlan_dev_real_dev(ndev)); + if (netif_is_macvlan(ndev)) + return mlx5_tc_ct_get_flow_source_match(ct_priv, macvlan_dev_real_dev(ndev)); + if (mlx5e_get_tc_tun(ndev) || netif_is_lag_master(ndev)) + return MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK; + + return MLX5_FLOW_CONTEXT_FLOW_SOURCE_ANY_VPORT; +} + +static int +mlx5_tc_ct_set_tuple_match(struct mlx5_tc_ct_priv *ct_priv, + struct mlx5_flow_spec *spec, + struct flow_rule *rule) +{ + void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + outer_headers); + void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, + outer_headers); + u16 addr_type = 0; + u8 ip_proto = 0; + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) { + struct flow_match_basic match; + + flow_rule_match_basic(rule, &match); + + mlx5e_tc_set_ethertype(ct_priv->dev, &match, true, headers_c, headers_v); + MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol, + match.mask->ip_proto); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol, + match.key->ip_proto); + + ip_proto = match.key->ip_proto; + } + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) { + struct flow_match_control match; + + flow_rule_match_control(rule, &match); + addr_type = match.key->addr_type; + } + + if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) { + struct flow_match_ipv4_addrs match; + + flow_rule_match_ipv4_addrs(rule, &match); + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, + src_ipv4_src_ipv6.ipv4_layout.ipv4), + &match.mask->src, sizeof(match.mask->src)); + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, + src_ipv4_src_ipv6.ipv4_layout.ipv4), + &match.key->src, sizeof(match.key->src)); + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, + dst_ipv4_dst_ipv6.ipv4_layout.ipv4), + &match.mask->dst, sizeof(match.mask->dst)); + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, + dst_ipv4_dst_ipv6.ipv4_layout.ipv4), + &match.key->dst, sizeof(match.key->dst)); + } + + if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) { + struct flow_match_ipv6_addrs match; + + flow_rule_match_ipv6_addrs(rule, &match); + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, + src_ipv4_src_ipv6.ipv6_layout.ipv6), + &match.mask->src, sizeof(match.mask->src)); + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, + src_ipv4_src_ipv6.ipv6_layout.ipv6), + &match.key->src, sizeof(match.key->src)); + + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, + dst_ipv4_dst_ipv6.ipv6_layout.ipv6), + &match.mask->dst, sizeof(match.mask->dst)); + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, + dst_ipv4_dst_ipv6.ipv6_layout.ipv6), + &match.key->dst, sizeof(match.key->dst)); + } + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) { + struct flow_match_ports match; + + flow_rule_match_ports(rule, &match); + switch (ip_proto) { + case IPPROTO_TCP: + MLX5_SET(fte_match_set_lyr_2_4, headers_c, + tcp_sport, ntohs(match.mask->src)); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, + tcp_sport, ntohs(match.key->src)); + + MLX5_SET(fte_match_set_lyr_2_4, headers_c, + tcp_dport, ntohs(match.mask->dst)); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, + tcp_dport, ntohs(match.key->dst)); + break; + + case IPPROTO_UDP: + MLX5_SET(fte_match_set_lyr_2_4, headers_c, + udp_sport, ntohs(match.mask->src)); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, + udp_sport, ntohs(match.key->src)); + + MLX5_SET(fte_match_set_lyr_2_4, headers_c, + udp_dport, ntohs(match.mask->dst)); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, + udp_dport, ntohs(match.key->dst)); + break; + default: + break; + } + } + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_TCP)) { + struct flow_match_tcp match; + + flow_rule_match_tcp(rule, &match); + MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_flags, + ntohs(match.mask->flags)); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_flags, + ntohs(match.key->flags)); + } + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_META)) { + struct flow_match_meta match; + + flow_rule_match_meta(rule, &match); + + if (match.key->ingress_ifindex & match.mask->ingress_ifindex) { + struct net_device *dev; + + dev = dev_get_by_index(&init_net, match.key->ingress_ifindex); + if (dev && MLX5_CAP_ESW_FLOWTABLE(ct_priv->dev, flow_source)) + spec->flow_context.flow_source = + mlx5_tc_ct_get_flow_source_match(ct_priv, dev); + + dev_put(dev); + } + } + + return 0; +} + +static void +mlx5_tc_ct_counter_put(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_entry *entry) +{ + if (entry->counter->is_shared && + !refcount_dec_and_test(&entry->counter->refcount)) + return; + + mlx5_fc_destroy(ct_priv->dev, entry->counter->counter); + kfree(entry->counter); +} + +static void +mlx5_tc_ct_entry_del_rule(struct mlx5_tc_ct_priv *ct_priv, + struct mlx5_ct_entry *entry, + bool nat) +{ + struct mlx5_ct_zone_rule *zone_rule = &entry->zone_rules[nat]; + struct mlx5_flow_attr *attr = zone_rule->attr; + + ct_dbg("Deleting ct entry rule in zone %d", entry->tuple.zone); + + ct_priv->fs_ops->ct_rule_del(ct_priv->fs, zone_rule->rule); + mlx5_tc_ct_entry_destroy_mod_hdr(ct_priv, zone_rule->attr, zone_rule->mh); + mlx5_put_label_mapping(ct_priv, attr->ct_attr.ct_labels_id); + kfree(attr); +} + +static void +mlx5_tc_ct_entry_del_rules(struct mlx5_tc_ct_priv *ct_priv, + struct mlx5_ct_entry *entry) +{ + mlx5_tc_ct_entry_del_rule(ct_priv, entry, true); + mlx5_tc_ct_entry_del_rule(ct_priv, entry, false); + + atomic_dec(&ct_priv->debugfs.stats.offloaded); +} + +static struct flow_action_entry * +mlx5_tc_ct_get_ct_metadata_action(struct flow_rule *flow_rule) +{ + struct flow_action *flow_action = &flow_rule->action; + struct flow_action_entry *act; + int i; + + flow_action_for_each(i, act, flow_action) { + if (act->id == FLOW_ACTION_CT_METADATA) + return act; + } + + return NULL; +} + +static int +mlx5_tc_ct_entry_set_registers(struct mlx5_tc_ct_priv *ct_priv, + struct mlx5e_tc_mod_hdr_acts *mod_acts, + u8 ct_state, + u32 mark, + u32 labels_id, + u8 zone_restore_id) +{ + enum mlx5_flow_namespace_type ns = ct_priv->ns_type; + struct mlx5_core_dev *dev = ct_priv->dev; + int err; + + err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns, + CTSTATE_TO_REG, ct_state); + if (err) + return err; + + err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns, + MARK_TO_REG, mark); + if (err) + return err; + + err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns, + LABELS_TO_REG, labels_id); + if (err) + return err; + + err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns, + ZONE_RESTORE_TO_REG, zone_restore_id); + if (err) + return err; + + /* Make another copy of zone id in reg_b for + * NIC rx flows since we don't copy reg_c1 to + * reg_b upon miss. + */ + if (ns != MLX5_FLOW_NAMESPACE_FDB) { + err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns, + NIC_ZONE_RESTORE_TO_REG, zone_restore_id); + if (err) + return err; + } + return 0; +} + +int mlx5_tc_ct_set_ct_clear_regs(struct mlx5_tc_ct_priv *priv, + struct mlx5e_tc_mod_hdr_acts *mod_acts) +{ + return mlx5_tc_ct_entry_set_registers(priv, mod_acts, 0, 0, 0, 0); +} + +static int +mlx5_tc_ct_parse_mangle_to_mod_act(struct flow_action_entry *act, + char *modact) +{ + u32 offset = act->mangle.offset, field; + + switch (act->mangle.htype) { + case FLOW_ACT_MANGLE_HDR_TYPE_IP4: + MLX5_SET(set_action_in, modact, length, 0); + if (offset == offsetof(struct iphdr, saddr)) + field = MLX5_ACTION_IN_FIELD_OUT_SIPV4; + else if (offset == offsetof(struct iphdr, daddr)) + field = MLX5_ACTION_IN_FIELD_OUT_DIPV4; + else + return -EOPNOTSUPP; + break; + + case FLOW_ACT_MANGLE_HDR_TYPE_IP6: + MLX5_SET(set_action_in, modact, length, 0); + if (offset == offsetof(struct ipv6hdr, saddr) + 12) + field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_31_0; + else if (offset == offsetof(struct ipv6hdr, saddr) + 8) + field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_63_32; + else if (offset == offsetof(struct ipv6hdr, saddr) + 4) + field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_95_64; + else if (offset == offsetof(struct ipv6hdr, saddr)) + field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_127_96; + else if (offset == offsetof(struct ipv6hdr, daddr) + 12) + field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_31_0; + else if (offset == offsetof(struct ipv6hdr, daddr) + 8) + field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_63_32; + else if (offset == offsetof(struct ipv6hdr, daddr) + 4) + field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_95_64; + else if (offset == offsetof(struct ipv6hdr, daddr)) + field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_127_96; + else + return -EOPNOTSUPP; + break; + + case FLOW_ACT_MANGLE_HDR_TYPE_TCP: + MLX5_SET(set_action_in, modact, length, 16); + if (offset == offsetof(struct tcphdr, source)) + field = MLX5_ACTION_IN_FIELD_OUT_TCP_SPORT; + else if (offset == offsetof(struct tcphdr, dest)) + field = MLX5_ACTION_IN_FIELD_OUT_TCP_DPORT; + else + return -EOPNOTSUPP; + break; + + case FLOW_ACT_MANGLE_HDR_TYPE_UDP: + MLX5_SET(set_action_in, modact, length, 16); + if (offset == offsetof(struct udphdr, source)) + field = MLX5_ACTION_IN_FIELD_OUT_UDP_SPORT; + else if (offset == offsetof(struct udphdr, dest)) + field = MLX5_ACTION_IN_FIELD_OUT_UDP_DPORT; + else + return -EOPNOTSUPP; + break; + + default: + return -EOPNOTSUPP; + } + + MLX5_SET(set_action_in, modact, action_type, MLX5_ACTION_TYPE_SET); + MLX5_SET(set_action_in, modact, offset, 0); + MLX5_SET(set_action_in, modact, field, field); + MLX5_SET(set_action_in, modact, data, act->mangle.val); + + return 0; +} + +static int +mlx5_tc_ct_entry_create_nat(struct mlx5_tc_ct_priv *ct_priv, + struct flow_rule *flow_rule, + struct mlx5e_tc_mod_hdr_acts *mod_acts) +{ + struct flow_action *flow_action = &flow_rule->action; + struct mlx5_core_dev *mdev = ct_priv->dev; + struct flow_action_entry *act; + char *modact; + int err, i; + + flow_action_for_each(i, act, flow_action) { + switch (act->id) { + case FLOW_ACTION_MANGLE: { + modact = mlx5e_mod_hdr_alloc(mdev, ct_priv->ns_type, mod_acts); + if (IS_ERR(modact)) + return PTR_ERR(modact); + + err = mlx5_tc_ct_parse_mangle_to_mod_act(act, modact); + if (err) + return err; + + mod_acts->num_actions++; + } + break; + + case FLOW_ACTION_CT_METADATA: + /* Handled earlier */ + continue; + default: + return -EOPNOTSUPP; + } + } + + return 0; +} + +static int +mlx5_tc_ct_entry_create_mod_hdr(struct mlx5_tc_ct_priv *ct_priv, + struct mlx5_flow_attr *attr, + struct flow_rule *flow_rule, + struct mlx5e_mod_hdr_handle **mh, + u8 zone_restore_id, bool nat_table, bool has_nat) +{ + DECLARE_MOD_HDR_ACTS_ACTIONS(actions_arr, MLX5_CT_MIN_MOD_ACTS); + DECLARE_MOD_HDR_ACTS(mod_acts, actions_arr); + struct flow_action_entry *meta; + u16 ct_state = 0; + int err; + + meta = mlx5_tc_ct_get_ct_metadata_action(flow_rule); + if (!meta) + return -EOPNOTSUPP; + + err = mlx5_get_label_mapping(ct_priv, meta->ct_metadata.labels, + &attr->ct_attr.ct_labels_id); + if (err) + return -EOPNOTSUPP; + if (nat_table) { + if (has_nat) { + err = mlx5_tc_ct_entry_create_nat(ct_priv, flow_rule, &mod_acts); + if (err) + goto err_mapping; + } + + ct_state |= MLX5_CT_STATE_NAT_BIT; + } + + ct_state |= MLX5_CT_STATE_ESTABLISHED_BIT | MLX5_CT_STATE_TRK_BIT; + ct_state |= meta->ct_metadata.orig_dir ? 0 : MLX5_CT_STATE_REPLY_BIT; + err = mlx5_tc_ct_entry_set_registers(ct_priv, &mod_acts, + ct_state, + meta->ct_metadata.mark, + attr->ct_attr.ct_labels_id, + zone_restore_id); + if (err) + goto err_mapping; + + if (nat_table && has_nat) { + attr->modify_hdr = mlx5_modify_header_alloc(ct_priv->dev, ct_priv->ns_type, + mod_acts.num_actions, + mod_acts.actions); + if (IS_ERR(attr->modify_hdr)) { + err = PTR_ERR(attr->modify_hdr); + goto err_mapping; + } + + *mh = NULL; + } else { + *mh = mlx5e_mod_hdr_attach(ct_priv->dev, + ct_priv->mod_hdr_tbl, + ct_priv->ns_type, + &mod_acts); + if (IS_ERR(*mh)) { + err = PTR_ERR(*mh); + goto err_mapping; + } + attr->modify_hdr = mlx5e_mod_hdr_get(*mh); + } + + mlx5e_mod_hdr_dealloc(&mod_acts); + return 0; + +err_mapping: + mlx5e_mod_hdr_dealloc(&mod_acts); + mlx5_put_label_mapping(ct_priv, attr->ct_attr.ct_labels_id); + return err; +} + +static void +mlx5_tc_ct_entry_destroy_mod_hdr(struct mlx5_tc_ct_priv *ct_priv, + struct mlx5_flow_attr *attr, + struct mlx5e_mod_hdr_handle *mh) +{ + if (mh) + mlx5e_mod_hdr_detach(ct_priv->dev, ct_priv->mod_hdr_tbl, mh); + else + mlx5_modify_header_dealloc(ct_priv->dev, attr->modify_hdr); +} + +static int +mlx5_tc_ct_entry_add_rule(struct mlx5_tc_ct_priv *ct_priv, + struct flow_rule *flow_rule, + struct mlx5_ct_entry *entry, + bool nat, u8 zone_restore_id) +{ + struct mlx5_ct_zone_rule *zone_rule = &entry->zone_rules[nat]; + struct mlx5e_priv *priv = netdev_priv(ct_priv->netdev); + struct mlx5_flow_spec *spec = NULL; + struct mlx5_flow_attr *attr; + int err; + + zone_rule->nat = nat; + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) + return -ENOMEM; + + attr = mlx5_alloc_flow_attr(ct_priv->ns_type); + if (!attr) { + err = -ENOMEM; + goto err_attr; + } + + err = mlx5_tc_ct_entry_create_mod_hdr(ct_priv, attr, flow_rule, + &zone_rule->mh, + zone_restore_id, + nat, + mlx5_tc_ct_entry_has_nat(entry)); + if (err) { + ct_dbg("Failed to create ct entry mod hdr"); + goto err_mod_hdr; + } + + attr->action = MLX5_FLOW_CONTEXT_ACTION_MOD_HDR | + MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | + MLX5_FLOW_CONTEXT_ACTION_COUNT; + attr->dest_chain = 0; + attr->dest_ft = mlx5e_tc_post_act_get_ft(ct_priv->post_act); + attr->ft = nat ? ct_priv->ct_nat : ct_priv->ct; + if (entry->tuple.ip_proto == IPPROTO_TCP || + entry->tuple.ip_proto == IPPROTO_UDP) + attr->outer_match_level = MLX5_MATCH_L4; + else + attr->outer_match_level = MLX5_MATCH_L3; + attr->counter = entry->counter->counter; + attr->flags |= MLX5_ATTR_FLAG_NO_IN_PORT; + if (ct_priv->ns_type == MLX5_FLOW_NAMESPACE_FDB) + attr->esw_attr->in_mdev = priv->mdev; + + mlx5_tc_ct_set_tuple_match(ct_priv, spec, flow_rule); + mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG, entry->tuple.zone, MLX5_CT_ZONE_MASK); + + zone_rule->rule = ct_priv->fs_ops->ct_rule_add(ct_priv->fs, spec, attr, flow_rule); + if (IS_ERR(zone_rule->rule)) { + err = PTR_ERR(zone_rule->rule); + ct_dbg("Failed to add ct entry rule, nat: %d", nat); + goto err_rule; + } + + zone_rule->attr = attr; + + kvfree(spec); + ct_dbg("Offloaded ct entry rule in zone %d", entry->tuple.zone); + + return 0; + +err_rule: + mlx5_tc_ct_entry_destroy_mod_hdr(ct_priv, zone_rule->attr, zone_rule->mh); + mlx5_put_label_mapping(ct_priv, attr->ct_attr.ct_labels_id); +err_mod_hdr: + kfree(attr); +err_attr: + kvfree(spec); + return err; +} + +static bool +mlx5_tc_ct_entry_valid(struct mlx5_ct_entry *entry) +{ + return test_bit(MLX5_CT_ENTRY_FLAG_VALID, &entry->flags); +} + +static struct mlx5_ct_entry * +mlx5_tc_ct_entry_get(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_tuple *tuple) +{ + struct mlx5_ct_entry *entry; + + entry = rhashtable_lookup_fast(&ct_priv->ct_tuples_ht, tuple, + tuples_ht_params); + if (entry && mlx5_tc_ct_entry_valid(entry) && + refcount_inc_not_zero(&entry->refcnt)) { + return entry; + } else if (!entry) { + entry = rhashtable_lookup_fast(&ct_priv->ct_tuples_nat_ht, + tuple, tuples_nat_ht_params); + if (entry && mlx5_tc_ct_entry_valid(entry) && + refcount_inc_not_zero(&entry->refcnt)) + return entry; + } + + return entry ? ERR_PTR(-EINVAL) : NULL; +} + +static void mlx5_tc_ct_entry_remove_from_tuples(struct mlx5_ct_entry *entry) +{ + struct mlx5_tc_ct_priv *ct_priv = entry->ct_priv; + + rhashtable_remove_fast(&ct_priv->ct_tuples_nat_ht, + &entry->tuple_nat_node, + tuples_nat_ht_params); + rhashtable_remove_fast(&ct_priv->ct_tuples_ht, &entry->tuple_node, + tuples_ht_params); +} + +static void mlx5_tc_ct_entry_del(struct mlx5_ct_entry *entry) +{ + struct mlx5_tc_ct_priv *ct_priv = entry->ct_priv; + + mlx5_tc_ct_entry_del_rules(ct_priv, entry); + + spin_lock_bh(&ct_priv->ht_lock); + mlx5_tc_ct_entry_remove_from_tuples(entry); + spin_unlock_bh(&ct_priv->ht_lock); + + mlx5_tc_ct_counter_put(ct_priv, entry); + kfree(entry); +} + +static void +mlx5_tc_ct_entry_put(struct mlx5_ct_entry *entry) +{ + if (!refcount_dec_and_test(&entry->refcnt)) + return; + + mlx5_tc_ct_entry_del(entry); +} + +static void mlx5_tc_ct_entry_del_work(struct work_struct *work) +{ + struct mlx5_ct_entry *entry = container_of(work, struct mlx5_ct_entry, work); + + mlx5_tc_ct_entry_del(entry); +} + +static void +__mlx5_tc_ct_entry_put(struct mlx5_ct_entry *entry) +{ + if (!refcount_dec_and_test(&entry->refcnt)) + return; + + INIT_WORK(&entry->work, mlx5_tc_ct_entry_del_work); + queue_work(entry->ct_priv->wq, &entry->work); +} + +static struct mlx5_ct_counter * +mlx5_tc_ct_counter_create(struct mlx5_tc_ct_priv *ct_priv) +{ + struct mlx5_ct_counter *counter; + int ret; + + counter = kzalloc(sizeof(*counter), GFP_KERNEL); + if (!counter) + return ERR_PTR(-ENOMEM); + + counter->is_shared = false; + counter->counter = mlx5_fc_create_ex(ct_priv->dev, true); + if (IS_ERR(counter->counter)) { + ct_dbg("Failed to create counter for ct entry"); + ret = PTR_ERR(counter->counter); + kfree(counter); + return ERR_PTR(ret); + } + + return counter; +} + +static struct mlx5_ct_counter * +mlx5_tc_ct_shared_counter_get(struct mlx5_tc_ct_priv *ct_priv, + struct mlx5_ct_entry *entry) +{ + struct mlx5_ct_tuple rev_tuple = entry->tuple; + struct mlx5_ct_counter *shared_counter; + struct mlx5_ct_entry *rev_entry; + + /* get the reversed tuple */ + swap(rev_tuple.port.src, rev_tuple.port.dst); + + if (rev_tuple.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) { + __be32 tmp_addr = rev_tuple.ip.src_v4; + + rev_tuple.ip.src_v4 = rev_tuple.ip.dst_v4; + rev_tuple.ip.dst_v4 = tmp_addr; + } else if (rev_tuple.addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) { + struct in6_addr tmp_addr = rev_tuple.ip.src_v6; + + rev_tuple.ip.src_v6 = rev_tuple.ip.dst_v6; + rev_tuple.ip.dst_v6 = tmp_addr; + } else { + return ERR_PTR(-EOPNOTSUPP); + } + + /* Use the same counter as the reverse direction */ + spin_lock_bh(&ct_priv->ht_lock); + rev_entry = mlx5_tc_ct_entry_get(ct_priv, &rev_tuple); + + if (IS_ERR(rev_entry)) { + spin_unlock_bh(&ct_priv->ht_lock); + goto create_counter; + } + + if (rev_entry && refcount_inc_not_zero(&rev_entry->counter->refcount)) { + ct_dbg("Using shared counter entry=0x%p rev=0x%p", entry, rev_entry); + shared_counter = rev_entry->counter; + spin_unlock_bh(&ct_priv->ht_lock); + + mlx5_tc_ct_entry_put(rev_entry); + return shared_counter; + } + + spin_unlock_bh(&ct_priv->ht_lock); + +create_counter: + + shared_counter = mlx5_tc_ct_counter_create(ct_priv); + if (IS_ERR(shared_counter)) + return shared_counter; + + shared_counter->is_shared = true; + refcount_set(&shared_counter->refcount, 1); + return shared_counter; +} + +static int +mlx5_tc_ct_entry_add_rules(struct mlx5_tc_ct_priv *ct_priv, + struct flow_rule *flow_rule, + struct mlx5_ct_entry *entry, + u8 zone_restore_id) +{ + int err; + + if (nf_ct_acct_enabled(dev_net(ct_priv->netdev))) + entry->counter = mlx5_tc_ct_counter_create(ct_priv); + else + entry->counter = mlx5_tc_ct_shared_counter_get(ct_priv, entry); + + if (IS_ERR(entry->counter)) { + err = PTR_ERR(entry->counter); + return err; + } + + err = mlx5_tc_ct_entry_add_rule(ct_priv, flow_rule, entry, false, + zone_restore_id); + if (err) + goto err_orig; + + err = mlx5_tc_ct_entry_add_rule(ct_priv, flow_rule, entry, true, + zone_restore_id); + if (err) + goto err_nat; + + atomic_inc(&ct_priv->debugfs.stats.offloaded); + return 0; + +err_nat: + mlx5_tc_ct_entry_del_rule(ct_priv, entry, false); +err_orig: + mlx5_tc_ct_counter_put(ct_priv, entry); + return err; +} + +static int +mlx5_tc_ct_block_flow_offload_add(struct mlx5_ct_ft *ft, + struct flow_cls_offload *flow) +{ + struct flow_rule *flow_rule = flow_cls_offload_flow_rule(flow); + struct mlx5_tc_ct_priv *ct_priv = ft->ct_priv; + struct flow_action_entry *meta_action; + unsigned long cookie = flow->cookie; + struct mlx5_ct_entry *entry; + int err; + + meta_action = mlx5_tc_ct_get_ct_metadata_action(flow_rule); + if (!meta_action) + return -EOPNOTSUPP; + + spin_lock_bh(&ct_priv->ht_lock); + entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie, cts_ht_params); + if (entry && refcount_inc_not_zero(&entry->refcnt)) { + spin_unlock_bh(&ct_priv->ht_lock); + mlx5_tc_ct_entry_put(entry); + return -EEXIST; + } + spin_unlock_bh(&ct_priv->ht_lock); + + entry = kzalloc(sizeof(*entry), GFP_KERNEL); + if (!entry) + return -ENOMEM; + + entry->tuple.zone = ft->zone; + entry->cookie = flow->cookie; + entry->restore_cookie = meta_action->ct_metadata.cookie; + refcount_set(&entry->refcnt, 2); + entry->ct_priv = ct_priv; + + err = mlx5_tc_ct_rule_to_tuple(&entry->tuple, flow_rule); + if (err) + goto err_set; + + memcpy(&entry->tuple_nat, &entry->tuple, sizeof(entry->tuple)); + err = mlx5_tc_ct_rule_to_tuple_nat(&entry->tuple_nat, flow_rule); + if (err) + goto err_set; + + spin_lock_bh(&ct_priv->ht_lock); + + err = rhashtable_lookup_insert_fast(&ft->ct_entries_ht, &entry->node, + cts_ht_params); + if (err) + goto err_entries; + + err = rhashtable_lookup_insert_fast(&ct_priv->ct_tuples_ht, + &entry->tuple_node, + tuples_ht_params); + if (err) + goto err_tuple; + + if (memcmp(&entry->tuple, &entry->tuple_nat, sizeof(entry->tuple))) { + err = rhashtable_lookup_insert_fast(&ct_priv->ct_tuples_nat_ht, + &entry->tuple_nat_node, + tuples_nat_ht_params); + if (err) + goto err_tuple_nat; + } + spin_unlock_bh(&ct_priv->ht_lock); + + err = mlx5_tc_ct_entry_add_rules(ct_priv, flow_rule, entry, + ft->zone_restore_id); + if (err) + goto err_rules; + + set_bit(MLX5_CT_ENTRY_FLAG_VALID, &entry->flags); + mlx5_tc_ct_entry_put(entry); /* this function reference */ + + return 0; + +err_rules: + spin_lock_bh(&ct_priv->ht_lock); + if (mlx5_tc_ct_entry_has_nat(entry)) + rhashtable_remove_fast(&ct_priv->ct_tuples_nat_ht, + &entry->tuple_nat_node, tuples_nat_ht_params); +err_tuple_nat: + rhashtable_remove_fast(&ct_priv->ct_tuples_ht, + &entry->tuple_node, + tuples_ht_params); +err_tuple: + rhashtable_remove_fast(&ft->ct_entries_ht, + &entry->node, + cts_ht_params); +err_entries: + spin_unlock_bh(&ct_priv->ht_lock); +err_set: + kfree(entry); + if (err != -EEXIST) + netdev_warn(ct_priv->netdev, "Failed to offload ct entry, err: %d\n", err); + return err; +} + +static int +mlx5_tc_ct_block_flow_offload_del(struct mlx5_ct_ft *ft, + struct flow_cls_offload *flow) +{ + struct mlx5_tc_ct_priv *ct_priv = ft->ct_priv; + unsigned long cookie = flow->cookie; + struct mlx5_ct_entry *entry; + + spin_lock_bh(&ct_priv->ht_lock); + entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie, cts_ht_params); + if (!entry) { + spin_unlock_bh(&ct_priv->ht_lock); + return -ENOENT; + } + + if (!mlx5_tc_ct_entry_valid(entry)) { + spin_unlock_bh(&ct_priv->ht_lock); + return -EINVAL; + } + + rhashtable_remove_fast(&ft->ct_entries_ht, &entry->node, cts_ht_params); + spin_unlock_bh(&ct_priv->ht_lock); + + mlx5_tc_ct_entry_put(entry); + + return 0; +} + +static int +mlx5_tc_ct_block_flow_offload_stats(struct mlx5_ct_ft *ft, + struct flow_cls_offload *f) +{ + struct mlx5_tc_ct_priv *ct_priv = ft->ct_priv; + unsigned long cookie = f->cookie; + struct mlx5_ct_entry *entry; + u64 lastuse, packets, bytes; + + spin_lock_bh(&ct_priv->ht_lock); + entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie, cts_ht_params); + if (!entry) { + spin_unlock_bh(&ct_priv->ht_lock); + return -ENOENT; + } + + if (!mlx5_tc_ct_entry_valid(entry) || !refcount_inc_not_zero(&entry->refcnt)) { + spin_unlock_bh(&ct_priv->ht_lock); + return -EINVAL; + } + + spin_unlock_bh(&ct_priv->ht_lock); + + mlx5_fc_query_cached(entry->counter->counter, &bytes, &packets, &lastuse); + flow_stats_update(&f->stats, bytes, packets, 0, lastuse, + FLOW_ACTION_HW_STATS_DELAYED); + + mlx5_tc_ct_entry_put(entry); + return 0; +} + +static int +mlx5_tc_ct_block_flow_offload(enum tc_setup_type type, void *type_data, + void *cb_priv) +{ + struct flow_cls_offload *f = type_data; + struct mlx5_ct_ft *ft = cb_priv; + + if (type != TC_SETUP_CLSFLOWER) + return -EOPNOTSUPP; + + switch (f->command) { + case FLOW_CLS_REPLACE: + return mlx5_tc_ct_block_flow_offload_add(ft, f); + case FLOW_CLS_DESTROY: + return mlx5_tc_ct_block_flow_offload_del(ft, f); + case FLOW_CLS_STATS: + return mlx5_tc_ct_block_flow_offload_stats(ft, f); + default: + break; + } + + return -EOPNOTSUPP; +} + +static bool +mlx5_tc_ct_skb_to_tuple(struct sk_buff *skb, struct mlx5_ct_tuple *tuple, + u16 zone) +{ + struct flow_keys flow_keys; + + skb_reset_network_header(skb); + skb_flow_dissect_flow_keys(skb, &flow_keys, FLOW_DISSECTOR_F_STOP_BEFORE_ENCAP); + + tuple->zone = zone; + + if (flow_keys.basic.ip_proto != IPPROTO_TCP && + flow_keys.basic.ip_proto != IPPROTO_UDP && + flow_keys.basic.ip_proto != IPPROTO_GRE) + return false; + + if (flow_keys.basic.ip_proto == IPPROTO_TCP || + flow_keys.basic.ip_proto == IPPROTO_UDP) { + tuple->port.src = flow_keys.ports.src; + tuple->port.dst = flow_keys.ports.dst; + } + tuple->n_proto = flow_keys.basic.n_proto; + tuple->ip_proto = flow_keys.basic.ip_proto; + + switch (flow_keys.basic.n_proto) { + case htons(ETH_P_IP): + tuple->addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; + tuple->ip.src_v4 = flow_keys.addrs.v4addrs.src; + tuple->ip.dst_v4 = flow_keys.addrs.v4addrs.dst; + break; + + case htons(ETH_P_IPV6): + tuple->addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; + tuple->ip.src_v6 = flow_keys.addrs.v6addrs.src; + tuple->ip.dst_v6 = flow_keys.addrs.v6addrs.dst; + break; + default: + goto out; + } + + return true; + +out: + return false; +} + +int mlx5_tc_ct_add_no_trk_match(struct mlx5_flow_spec *spec) +{ + u32 ctstate = 0, ctstate_mask = 0; + + mlx5e_tc_match_to_reg_get_match(spec, CTSTATE_TO_REG, + &ctstate, &ctstate_mask); + + if ((ctstate & ctstate_mask) == MLX5_CT_STATE_TRK_BIT) + return -EOPNOTSUPP; + + ctstate_mask |= MLX5_CT_STATE_TRK_BIT; + mlx5e_tc_match_to_reg_match(spec, CTSTATE_TO_REG, + ctstate, ctstate_mask); + + return 0; +} + +void mlx5_tc_ct_match_del(struct mlx5_tc_ct_priv *priv, struct mlx5_ct_attr *ct_attr) +{ + if (!priv || !ct_attr->ct_labels_id) + return; + + mlx5_put_label_mapping(priv, ct_attr->ct_labels_id); +} + +int +mlx5_tc_ct_match_add(struct mlx5_tc_ct_priv *priv, + struct mlx5_flow_spec *spec, + struct flow_cls_offload *f, + struct mlx5_ct_attr *ct_attr, + struct netlink_ext_ack *extack) +{ + bool trk, est, untrk, unest, new, rpl, unrpl, rel, unrel, inv, uninv; + struct flow_rule *rule = flow_cls_offload_flow_rule(f); + struct flow_dissector_key_ct *mask, *key; + u32 ctstate = 0, ctstate_mask = 0; + u16 ct_state_on, ct_state_off; + u16 ct_state, ct_state_mask; + struct flow_match_ct match; + u32 ct_labels[4]; + + if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CT)) + return 0; + + if (!priv) { + NL_SET_ERR_MSG_MOD(extack, + "offload of ct matching isn't available"); + return -EOPNOTSUPP; + } + + flow_rule_match_ct(rule, &match); + + key = match.key; + mask = match.mask; + + ct_state = key->ct_state; + ct_state_mask = mask->ct_state; + + if (ct_state_mask & ~(TCA_FLOWER_KEY_CT_FLAGS_TRACKED | + TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED | + TCA_FLOWER_KEY_CT_FLAGS_NEW | + TCA_FLOWER_KEY_CT_FLAGS_REPLY | + TCA_FLOWER_KEY_CT_FLAGS_RELATED | + TCA_FLOWER_KEY_CT_FLAGS_INVALID)) { + NL_SET_ERR_MSG_MOD(extack, + "only ct_state trk, est, new and rpl are supported for offload"); + return -EOPNOTSUPP; + } + + ct_state_on = ct_state & ct_state_mask; + ct_state_off = (ct_state & ct_state_mask) ^ ct_state_mask; + trk = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_TRACKED; + new = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_NEW; + est = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED; + rpl = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_REPLY; + rel = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_RELATED; + inv = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_INVALID; + untrk = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_TRACKED; + unest = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED; + unrpl = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_REPLY; + unrel = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_RELATED; + uninv = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_INVALID; + + ctstate |= trk ? MLX5_CT_STATE_TRK_BIT : 0; + ctstate |= est ? MLX5_CT_STATE_ESTABLISHED_BIT : 0; + ctstate |= rpl ? MLX5_CT_STATE_REPLY_BIT : 0; + ctstate_mask |= (untrk || trk) ? MLX5_CT_STATE_TRK_BIT : 0; + ctstate_mask |= (unest || est) ? MLX5_CT_STATE_ESTABLISHED_BIT : 0; + ctstate_mask |= (unrpl || rpl) ? MLX5_CT_STATE_REPLY_BIT : 0; + ctstate_mask |= unrel ? MLX5_CT_STATE_RELATED_BIT : 0; + ctstate_mask |= uninv ? MLX5_CT_STATE_INVALID_BIT : 0; + + if (rel) { + NL_SET_ERR_MSG_MOD(extack, + "matching on ct_state +rel isn't supported"); + return -EOPNOTSUPP; + } + + if (inv) { + NL_SET_ERR_MSG_MOD(extack, + "matching on ct_state +inv isn't supported"); + return -EOPNOTSUPP; + } + + if (new) { + NL_SET_ERR_MSG_MOD(extack, + "matching on ct_state +new isn't supported"); + return -EOPNOTSUPP; + } + + if (mask->ct_zone) + mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG, + key->ct_zone, MLX5_CT_ZONE_MASK); + if (ctstate_mask) + mlx5e_tc_match_to_reg_match(spec, CTSTATE_TO_REG, + ctstate, ctstate_mask); + if (mask->ct_mark) + mlx5e_tc_match_to_reg_match(spec, MARK_TO_REG, + key->ct_mark, mask->ct_mark); + if (mask->ct_labels[0] || mask->ct_labels[1] || mask->ct_labels[2] || + mask->ct_labels[3]) { + ct_labels[0] = key->ct_labels[0] & mask->ct_labels[0]; + ct_labels[1] = key->ct_labels[1] & mask->ct_labels[1]; + ct_labels[2] = key->ct_labels[2] & mask->ct_labels[2]; + ct_labels[3] = key->ct_labels[3] & mask->ct_labels[3]; + if (mlx5_get_label_mapping(priv, ct_labels, &ct_attr->ct_labels_id)) + return -EOPNOTSUPP; + mlx5e_tc_match_to_reg_match(spec, LABELS_TO_REG, ct_attr->ct_labels_id, + MLX5_CT_LABELS_MASK); + } + + return 0; +} + +int +mlx5_tc_ct_parse_action(struct mlx5_tc_ct_priv *priv, + struct mlx5_flow_attr *attr, + struct mlx5e_tc_mod_hdr_acts *mod_acts, + const struct flow_action_entry *act, + struct netlink_ext_ack *extack) +{ + if (!priv) { + NL_SET_ERR_MSG_MOD(extack, + "offload of ct action isn't available"); + return -EOPNOTSUPP; + } + + attr->ct_attr.zone = act->ct.zone; + attr->ct_attr.ct_action = act->ct.action; + attr->ct_attr.nf_ft = act->ct.flow_table; + + return 0; +} + +static int tc_ct_pre_ct_add_rules(struct mlx5_ct_ft *ct_ft, + struct mlx5_tc_ct_pre *pre_ct, + bool nat) +{ + struct mlx5_tc_ct_priv *ct_priv = ct_ft->ct_priv; + struct mlx5e_tc_mod_hdr_acts pre_mod_acts = {}; + struct mlx5_core_dev *dev = ct_priv->dev; + struct mlx5_flow_table *ft = pre_ct->ft; + struct mlx5_flow_destination dest = {}; + struct mlx5_flow_act flow_act = {}; + struct mlx5_modify_hdr *mod_hdr; + struct mlx5_flow_handle *rule; + struct mlx5_flow_spec *spec; + u32 ctstate; + u16 zone; + int err; + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) + return -ENOMEM; + + zone = ct_ft->zone & MLX5_CT_ZONE_MASK; + err = mlx5e_tc_match_to_reg_set(dev, &pre_mod_acts, ct_priv->ns_type, + ZONE_TO_REG, zone); + if (err) { + ct_dbg("Failed to set zone register mapping"); + goto err_mapping; + } + + mod_hdr = mlx5_modify_header_alloc(dev, ct_priv->ns_type, + pre_mod_acts.num_actions, + pre_mod_acts.actions); + + if (IS_ERR(mod_hdr)) { + err = PTR_ERR(mod_hdr); + ct_dbg("Failed to create pre ct mod hdr"); + goto err_mapping; + } + pre_ct->modify_hdr = mod_hdr; + + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | + MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; + flow_act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL; + flow_act.modify_hdr = mod_hdr; + dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + + /* add flow rule */ + mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG, + zone, MLX5_CT_ZONE_MASK); + ctstate = MLX5_CT_STATE_TRK_BIT; + if (nat) + ctstate |= MLX5_CT_STATE_NAT_BIT; + mlx5e_tc_match_to_reg_match(spec, CTSTATE_TO_REG, ctstate, ctstate); + + dest.ft = mlx5e_tc_post_act_get_ft(ct_priv->post_act); + rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + ct_dbg("Failed to add pre ct flow rule zone %d", zone); + goto err_flow_rule; + } + pre_ct->flow_rule = rule; + + /* add miss rule */ + dest.ft = nat ? ct_priv->ct_nat : ct_priv->ct; + rule = mlx5_add_flow_rules(ft, NULL, &flow_act, &dest, 1); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + ct_dbg("Failed to add pre ct miss rule zone %d", zone); + goto err_miss_rule; + } + pre_ct->miss_rule = rule; + + mlx5e_mod_hdr_dealloc(&pre_mod_acts); + kvfree(spec); + return 0; + +err_miss_rule: + mlx5_del_flow_rules(pre_ct->flow_rule); +err_flow_rule: + mlx5_modify_header_dealloc(dev, pre_ct->modify_hdr); +err_mapping: + mlx5e_mod_hdr_dealloc(&pre_mod_acts); + kvfree(spec); + return err; +} + +static void +tc_ct_pre_ct_del_rules(struct mlx5_ct_ft *ct_ft, + struct mlx5_tc_ct_pre *pre_ct) +{ + struct mlx5_tc_ct_priv *ct_priv = ct_ft->ct_priv; + struct mlx5_core_dev *dev = ct_priv->dev; + + mlx5_del_flow_rules(pre_ct->flow_rule); + mlx5_del_flow_rules(pre_ct->miss_rule); + mlx5_modify_header_dealloc(dev, pre_ct->modify_hdr); +} + +static int +mlx5_tc_ct_alloc_pre_ct(struct mlx5_ct_ft *ct_ft, + struct mlx5_tc_ct_pre *pre_ct, + bool nat) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5_tc_ct_priv *ct_priv = ct_ft->ct_priv; + struct mlx5_core_dev *dev = ct_priv->dev; + struct mlx5_flow_table_attr ft_attr = {}; + struct mlx5_flow_namespace *ns; + struct mlx5_flow_table *ft; + struct mlx5_flow_group *g; + u32 metadata_reg_c_2_mask; + u32 *flow_group_in; + void *misc; + int err; + + ns = mlx5_get_flow_namespace(dev, ct_priv->ns_type); + if (!ns) { + err = -EOPNOTSUPP; + ct_dbg("Failed to get flow namespace"); + return err; + } + + flow_group_in = kvzalloc(inlen, GFP_KERNEL); + if (!flow_group_in) + return -ENOMEM; + + ft_attr.flags = MLX5_FLOW_TABLE_UNMANAGED; + ft_attr.prio = ct_priv->ns_type == MLX5_FLOW_NAMESPACE_FDB ? + FDB_TC_OFFLOAD : MLX5E_TC_PRIO; + ft_attr.max_fte = 2; + ft_attr.level = 1; + ft = mlx5_create_flow_table(ns, &ft_attr); + if (IS_ERR(ft)) { + err = PTR_ERR(ft); + ct_dbg("Failed to create pre ct table"); + goto out_free; + } + pre_ct->ft = ft; + + /* create flow group */ + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 0); + MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, + MLX5_MATCH_MISC_PARAMETERS_2); + + misc = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, + match_criteria.misc_parameters_2); + + metadata_reg_c_2_mask = MLX5_CT_ZONE_MASK; + metadata_reg_c_2_mask |= (MLX5_CT_STATE_TRK_BIT << 16); + if (nat) + metadata_reg_c_2_mask |= (MLX5_CT_STATE_NAT_BIT << 16); + + MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_2, + metadata_reg_c_2_mask); + + g = mlx5_create_flow_group(ft, flow_group_in); + if (IS_ERR(g)) { + err = PTR_ERR(g); + ct_dbg("Failed to create pre ct group"); + goto err_flow_grp; + } + pre_ct->flow_grp = g; + + /* create miss group */ + memset(flow_group_in, 0, inlen); + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 1); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 1); + g = mlx5_create_flow_group(ft, flow_group_in); + if (IS_ERR(g)) { + err = PTR_ERR(g); + ct_dbg("Failed to create pre ct miss group"); + goto err_miss_grp; + } + pre_ct->miss_grp = g; + + err = tc_ct_pre_ct_add_rules(ct_ft, pre_ct, nat); + if (err) + goto err_add_rules; + + kvfree(flow_group_in); + return 0; + +err_add_rules: + mlx5_destroy_flow_group(pre_ct->miss_grp); +err_miss_grp: + mlx5_destroy_flow_group(pre_ct->flow_grp); +err_flow_grp: + mlx5_destroy_flow_table(ft); +out_free: + kvfree(flow_group_in); + return err; +} + +static void +mlx5_tc_ct_free_pre_ct(struct mlx5_ct_ft *ct_ft, + struct mlx5_tc_ct_pre *pre_ct) +{ + tc_ct_pre_ct_del_rules(ct_ft, pre_ct); + mlx5_destroy_flow_group(pre_ct->miss_grp); + mlx5_destroy_flow_group(pre_ct->flow_grp); + mlx5_destroy_flow_table(pre_ct->ft); +} + +static int +mlx5_tc_ct_alloc_pre_ct_tables(struct mlx5_ct_ft *ft) +{ + int err; + + err = mlx5_tc_ct_alloc_pre_ct(ft, &ft->pre_ct, false); + if (err) + return err; + + err = mlx5_tc_ct_alloc_pre_ct(ft, &ft->pre_ct_nat, true); + if (err) + goto err_pre_ct_nat; + + return 0; + +err_pre_ct_nat: + mlx5_tc_ct_free_pre_ct(ft, &ft->pre_ct); + return err; +} + +static void +mlx5_tc_ct_free_pre_ct_tables(struct mlx5_ct_ft *ft) +{ + mlx5_tc_ct_free_pre_ct(ft, &ft->pre_ct_nat); + mlx5_tc_ct_free_pre_ct(ft, &ft->pre_ct); +} + +/* To avoid false lock dependency warning set the ct_entries_ht lock + * class different than the lock class of the ht being used when deleting + * last flow from a group and then deleting a group, we get into del_sw_flow_group() + * which call rhashtable_destroy on fg->ftes_hash which will take ht->mutex but + * it's different than the ht->mutex here. + */ +static struct lock_class_key ct_entries_ht_lock_key; + +static struct mlx5_ct_ft * +mlx5_tc_ct_add_ft_cb(struct mlx5_tc_ct_priv *ct_priv, u16 zone, + struct nf_flowtable *nf_ft) +{ + struct mlx5_ct_ft *ft; + int err; + + ft = rhashtable_lookup_fast(&ct_priv->zone_ht, &zone, zone_params); + if (ft) { + refcount_inc(&ft->refcount); + return ft; + } + + ft = kzalloc(sizeof(*ft), GFP_KERNEL); + if (!ft) + return ERR_PTR(-ENOMEM); + + err = mapping_add(ct_priv->zone_mapping, &zone, &ft->zone_restore_id); + if (err) + goto err_mapping; + + ft->zone = zone; + ft->nf_ft = nf_ft; + ft->ct_priv = ct_priv; + refcount_set(&ft->refcount, 1); + + err = mlx5_tc_ct_alloc_pre_ct_tables(ft); + if (err) + goto err_alloc_pre_ct; + + err = rhashtable_init(&ft->ct_entries_ht, &cts_ht_params); + if (err) + goto err_init; + + lockdep_set_class(&ft->ct_entries_ht.mutex, &ct_entries_ht_lock_key); + + err = rhashtable_insert_fast(&ct_priv->zone_ht, &ft->node, + zone_params); + if (err) + goto err_insert; + + err = nf_flow_table_offload_add_cb(ft->nf_ft, + mlx5_tc_ct_block_flow_offload, ft); + if (err) + goto err_add_cb; + + return ft; + +err_add_cb: + rhashtable_remove_fast(&ct_priv->zone_ht, &ft->node, zone_params); +err_insert: + rhashtable_destroy(&ft->ct_entries_ht); +err_init: + mlx5_tc_ct_free_pre_ct_tables(ft); +err_alloc_pre_ct: + mapping_remove(ct_priv->zone_mapping, ft->zone_restore_id); +err_mapping: + kfree(ft); + return ERR_PTR(err); +} + +static void +mlx5_tc_ct_flush_ft_entry(void *ptr, void *arg) +{ + struct mlx5_ct_entry *entry = ptr; + + mlx5_tc_ct_entry_put(entry); +} + +static void +mlx5_tc_ct_del_ft_cb(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_ft *ft) +{ + if (!refcount_dec_and_test(&ft->refcount)) + return; + + flush_workqueue(ct_priv->wq); + nf_flow_table_offload_del_cb(ft->nf_ft, + mlx5_tc_ct_block_flow_offload, ft); + rhashtable_remove_fast(&ct_priv->zone_ht, &ft->node, zone_params); + rhashtable_free_and_destroy(&ft->ct_entries_ht, + mlx5_tc_ct_flush_ft_entry, + ct_priv); + mlx5_tc_ct_free_pre_ct_tables(ft); + mapping_remove(ct_priv->zone_mapping, ft->zone_restore_id); + kfree(ft); +} + +/* We translate the tc filter with CT action to the following HW model: + * + * +---------------------+ + * + ft prio (tc chain) + + * + original match + + * +---------------------+ + * | set chain miss mapping + * | set fte_id + * | set tunnel_id + * | do decap + * v + * +---------------------+ + * + pre_ct/pre_ct_nat + if matches +-------------------------+ + * + zone+nat match +---------------->+ post_act (see below) + + * +---------------------+ set zone +-------------------------+ + * | set zone + * v + * +--------------------+ + * + CT (nat or no nat) + + * + tuple + zone match + + * +--------------------+ + * | set mark + * | set labels_id + * | set established + * | set zone_restore + * | do nat (if needed) + * v + * +--------------+ + * + post_act + original filter actions + * + fte_id match +------------------------> + * +--------------+ + */ +static struct mlx5_flow_handle * +__mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *ct_priv, + struct mlx5_flow_spec *orig_spec, + struct mlx5_flow_attr *attr) +{ + bool nat = attr->ct_attr.ct_action & TCA_CT_ACT_NAT; + struct mlx5e_priv *priv = netdev_priv(ct_priv->netdev); + struct mlx5e_tc_mod_hdr_acts *pre_mod_acts; + u32 attr_sz = ns_to_attr_sz(ct_priv->ns_type); + struct mlx5_flow_attr *pre_ct_attr; + struct mlx5_modify_hdr *mod_hdr; + struct mlx5_ct_flow *ct_flow; + int chain_mapping = 0, err; + struct mlx5_ct_ft *ft; + + ct_flow = kzalloc(sizeof(*ct_flow), GFP_KERNEL); + if (!ct_flow) { + return ERR_PTR(-ENOMEM); + } + + /* Register for CT established events */ + ft = mlx5_tc_ct_add_ft_cb(ct_priv, attr->ct_attr.zone, + attr->ct_attr.nf_ft); + if (IS_ERR(ft)) { + err = PTR_ERR(ft); + ct_dbg("Failed to register to ft callback"); + goto err_ft; + } + ct_flow->ft = ft; + + /* Base flow attributes of both rules on original rule attribute */ + ct_flow->pre_ct_attr = mlx5_alloc_flow_attr(ct_priv->ns_type); + if (!ct_flow->pre_ct_attr) { + err = -ENOMEM; + goto err_alloc_pre; + } + + pre_ct_attr = ct_flow->pre_ct_attr; + memcpy(pre_ct_attr, attr, attr_sz); + pre_mod_acts = &pre_ct_attr->parse_attr->mod_hdr_acts; + + /* Modify the original rule's action to fwd and modify, leave decap */ + pre_ct_attr->action = attr->action & MLX5_FLOW_CONTEXT_ACTION_DECAP; + pre_ct_attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | + MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; + + /* Write chain miss tag for miss in ct table as we + * don't go though all prios of this chain as normal tc rules + * miss. + */ + err = mlx5_chains_get_chain_mapping(ct_priv->chains, attr->chain, + &chain_mapping); + if (err) { + ct_dbg("Failed to get chain register mapping for chain"); + goto err_get_chain; + } + ct_flow->chain_mapping = chain_mapping; + + err = mlx5e_tc_match_to_reg_set(priv->mdev, pre_mod_acts, ct_priv->ns_type, + CHAIN_TO_REG, chain_mapping); + if (err) { + ct_dbg("Failed to set chain register mapping"); + goto err_mapping; + } + + /* If original flow is decap, we do it before going into ct table + * so add a rewrite for the tunnel match_id. + */ + if ((pre_ct_attr->action & MLX5_FLOW_CONTEXT_ACTION_DECAP) && + attr->chain == 0) { + err = mlx5e_tc_match_to_reg_set(priv->mdev, pre_mod_acts, + ct_priv->ns_type, + TUNNEL_TO_REG, + attr->tunnel_id); + if (err) { + ct_dbg("Failed to set tunnel register mapping"); + goto err_mapping; + } + } + + mod_hdr = mlx5_modify_header_alloc(priv->mdev, ct_priv->ns_type, + pre_mod_acts->num_actions, + pre_mod_acts->actions); + if (IS_ERR(mod_hdr)) { + err = PTR_ERR(mod_hdr); + ct_dbg("Failed to create pre ct mod hdr"); + goto err_mapping; + } + pre_ct_attr->modify_hdr = mod_hdr; + + /* Change original rule point to ct table */ + pre_ct_attr->dest_chain = 0; + pre_ct_attr->dest_ft = nat ? ft->pre_ct_nat.ft : ft->pre_ct.ft; + ct_flow->pre_ct_rule = mlx5_tc_rule_insert(priv, orig_spec, + pre_ct_attr); + if (IS_ERR(ct_flow->pre_ct_rule)) { + err = PTR_ERR(ct_flow->pre_ct_rule); + ct_dbg("Failed to add pre ct rule"); + goto err_insert_orig; + } + + attr->ct_attr.ct_flow = ct_flow; + mlx5e_mod_hdr_dealloc(pre_mod_acts); + + return ct_flow->pre_ct_rule; + +err_insert_orig: + mlx5_modify_header_dealloc(priv->mdev, pre_ct_attr->modify_hdr); +err_mapping: + mlx5e_mod_hdr_dealloc(pre_mod_acts); + mlx5_chains_put_chain_mapping(ct_priv->chains, ct_flow->chain_mapping); +err_get_chain: + kfree(ct_flow->pre_ct_attr); +err_alloc_pre: + mlx5_tc_ct_del_ft_cb(ct_priv, ft); +err_ft: + kfree(ct_flow); + netdev_warn(priv->netdev, "Failed to offload ct flow, err %d\n", err); + return ERR_PTR(err); +} + +struct mlx5_flow_handle * +mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *priv, + struct mlx5_flow_spec *spec, + struct mlx5_flow_attr *attr, + struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts) +{ + struct mlx5_flow_handle *rule; + + if (!priv) + return ERR_PTR(-EOPNOTSUPP); + + mutex_lock(&priv->control_lock); + rule = __mlx5_tc_ct_flow_offload(priv, spec, attr); + mutex_unlock(&priv->control_lock); + + return rule; +} + +static void +__mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *ct_priv, + struct mlx5_ct_flow *ct_flow, + struct mlx5_flow_attr *attr) +{ + struct mlx5_flow_attr *pre_ct_attr = ct_flow->pre_ct_attr; + struct mlx5e_priv *priv = netdev_priv(ct_priv->netdev); + + mlx5_tc_rule_delete(priv, ct_flow->pre_ct_rule, pre_ct_attr); + mlx5_modify_header_dealloc(priv->mdev, pre_ct_attr->modify_hdr); + + mlx5_chains_put_chain_mapping(ct_priv->chains, ct_flow->chain_mapping); + mlx5_tc_ct_del_ft_cb(ct_priv, ct_flow->ft); + + kfree(ct_flow->pre_ct_attr); + kfree(ct_flow); +} + +void +mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *priv, + struct mlx5_flow_attr *attr) +{ + struct mlx5_ct_flow *ct_flow = attr->ct_attr.ct_flow; + + /* We are called on error to clean up stuff from parsing + * but we don't have anything for now + */ + if (!ct_flow) + return; + + mutex_lock(&priv->control_lock); + __mlx5_tc_ct_delete_flow(priv, ct_flow, attr); + mutex_unlock(&priv->control_lock); +} + +static int +mlx5_tc_ct_fs_init(struct mlx5_tc_ct_priv *ct_priv) +{ + struct mlx5_flow_table *post_ct = mlx5e_tc_post_act_get_ft(ct_priv->post_act); + struct mlx5_ct_fs_ops *fs_ops = mlx5_ct_fs_dmfs_ops_get(); + int err; + + if (ct_priv->ns_type == MLX5_FLOW_NAMESPACE_FDB && + ct_priv->dev->priv.steering->mode == MLX5_FLOW_STEERING_MODE_SMFS) { + ct_dbg("Using SMFS ct flow steering provider"); + fs_ops = mlx5_ct_fs_smfs_ops_get(); + } + + ct_priv->fs = kzalloc(sizeof(*ct_priv->fs) + fs_ops->priv_size, GFP_KERNEL); + if (!ct_priv->fs) + return -ENOMEM; + + ct_priv->fs->netdev = ct_priv->netdev; + ct_priv->fs->dev = ct_priv->dev; + ct_priv->fs_ops = fs_ops; + + err = ct_priv->fs_ops->init(ct_priv->fs, ct_priv->ct, ct_priv->ct_nat, post_ct); + if (err) + goto err_init; + + return 0; + +err_init: + kfree(ct_priv->fs); + return err; +} + +static int +mlx5_tc_ct_init_check_esw_support(struct mlx5_eswitch *esw, + const char **err_msg) +{ + if (!mlx5_eswitch_vlan_actions_supported(esw->dev, 1)) { + /* vlan workaround should be avoided for multi chain rules. + * This is just a sanity check as pop vlan action should + * be supported by any FW that supports ignore_flow_level + */ + + *err_msg = "firmware vlan actions support is missing"; + return -EOPNOTSUPP; + } + + if (!MLX5_CAP_ESW_FLOWTABLE(esw->dev, + fdb_modify_header_fwd_to_table)) { + /* CT always writes to registers which are mod header actions. + * Therefore, mod header and goto is required + */ + + *err_msg = "firmware fwd and modify support is missing"; + return -EOPNOTSUPP; + } + + if (!mlx5_eswitch_reg_c1_loopback_enabled(esw)) { + *err_msg = "register loopback isn't supported"; + return -EOPNOTSUPP; + } + + return 0; +} + +static int +mlx5_tc_ct_init_check_support(struct mlx5e_priv *priv, + enum mlx5_flow_namespace_type ns_type, + struct mlx5e_post_act *post_act) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + const char *err_msg = NULL; + int err = 0; + +#if !IS_ENABLED(CONFIG_NET_TC_SKB_EXT) + /* cannot restore chain ID on HW miss */ + + err_msg = "tc skb extension missing"; + err = -EOPNOTSUPP; + goto out_err; +#endif + if (IS_ERR_OR_NULL(post_act)) { + /* Ignore_flow_level support isn't supported by default for VFs and so post_act + * won't be supported. Skip showing error msg. + */ + if (priv->mdev->coredev_type == MLX5_COREDEV_PF) + err_msg = "post action is missing"; + err = -EOPNOTSUPP; + goto out_err; + } + + if (ns_type == MLX5_FLOW_NAMESPACE_FDB) + err = mlx5_tc_ct_init_check_esw_support(esw, &err_msg); + +out_err: + if (err && err_msg) + netdev_dbg(priv->netdev, "tc ct offload not supported, %s\n", err_msg); + return err; +} + +static void +mlx5_ct_tc_create_dbgfs(struct mlx5_tc_ct_priv *ct_priv) +{ + struct mlx5_tc_ct_debugfs *ct_dbgfs = &ct_priv->debugfs; + + ct_dbgfs->root = debugfs_create_dir("ct", mlx5_debugfs_get_dev_root(ct_priv->dev)); + debugfs_create_atomic_t("offloaded", 0400, ct_dbgfs->root, + &ct_dbgfs->stats.offloaded); + debugfs_create_atomic_t("rx_dropped", 0400, ct_dbgfs->root, + &ct_dbgfs->stats.rx_dropped); +} + +static void +mlx5_ct_tc_remove_dbgfs(struct mlx5_tc_ct_priv *ct_priv) +{ + debugfs_remove_recursive(ct_priv->debugfs.root); +} + +#define INIT_ERR_PREFIX "tc ct offload init failed" + +struct mlx5_tc_ct_priv * +mlx5_tc_ct_init(struct mlx5e_priv *priv, struct mlx5_fs_chains *chains, + struct mod_hdr_tbl *mod_hdr, + enum mlx5_flow_namespace_type ns_type, + struct mlx5e_post_act *post_act) +{ + struct mlx5_tc_ct_priv *ct_priv; + struct mlx5_core_dev *dev; + u64 mapping_id; + int err; + + dev = priv->mdev; + err = mlx5_tc_ct_init_check_support(priv, ns_type, post_act); + if (err) + goto err_support; + + ct_priv = kzalloc(sizeof(*ct_priv), GFP_KERNEL); + if (!ct_priv) + goto err_alloc; + + mapping_id = mlx5_query_nic_system_image_guid(dev); + + ct_priv->zone_mapping = mapping_create_for_id(mapping_id, MAPPING_TYPE_ZONE, + sizeof(u16), 0, true); + if (IS_ERR(ct_priv->zone_mapping)) { + err = PTR_ERR(ct_priv->zone_mapping); + goto err_mapping_zone; + } + + ct_priv->labels_mapping = mapping_create_for_id(mapping_id, MAPPING_TYPE_LABELS, + sizeof(u32) * 4, 0, true); + if (IS_ERR(ct_priv->labels_mapping)) { + err = PTR_ERR(ct_priv->labels_mapping); + goto err_mapping_labels; + } + + spin_lock_init(&ct_priv->ht_lock); + ct_priv->ns_type = ns_type; + ct_priv->chains = chains; + ct_priv->netdev = priv->netdev; + ct_priv->dev = priv->mdev; + ct_priv->mod_hdr_tbl = mod_hdr; + ct_priv->ct = mlx5_chains_create_global_table(chains); + if (IS_ERR(ct_priv->ct)) { + err = PTR_ERR(ct_priv->ct); + mlx5_core_warn(dev, + "%s, failed to create ct table err: %d\n", + INIT_ERR_PREFIX, err); + goto err_ct_tbl; + } + + ct_priv->ct_nat = mlx5_chains_create_global_table(chains); + if (IS_ERR(ct_priv->ct_nat)) { + err = PTR_ERR(ct_priv->ct_nat); + mlx5_core_warn(dev, + "%s, failed to create ct nat table err: %d\n", + INIT_ERR_PREFIX, err); + goto err_ct_nat_tbl; + } + + ct_priv->post_act = post_act; + mutex_init(&ct_priv->control_lock); + if (rhashtable_init(&ct_priv->zone_ht, &zone_params)) + goto err_ct_zone_ht; + if (rhashtable_init(&ct_priv->ct_tuples_ht, &tuples_ht_params)) + goto err_ct_tuples_ht; + if (rhashtable_init(&ct_priv->ct_tuples_nat_ht, &tuples_nat_ht_params)) + goto err_ct_tuples_nat_ht; + + ct_priv->wq = alloc_ordered_workqueue("mlx5e_ct_priv_wq", 0); + if (!ct_priv->wq) { + err = -ENOMEM; + goto err_wq; + } + + err = mlx5_tc_ct_fs_init(ct_priv); + if (err) + goto err_init_fs; + + mlx5_ct_tc_create_dbgfs(ct_priv); + return ct_priv; + +err_init_fs: + destroy_workqueue(ct_priv->wq); +err_wq: + rhashtable_destroy(&ct_priv->ct_tuples_nat_ht); +err_ct_tuples_nat_ht: + rhashtable_destroy(&ct_priv->ct_tuples_ht); +err_ct_tuples_ht: + rhashtable_destroy(&ct_priv->zone_ht); +err_ct_zone_ht: + mlx5_chains_destroy_global_table(chains, ct_priv->ct_nat); +err_ct_nat_tbl: + mlx5_chains_destroy_global_table(chains, ct_priv->ct); +err_ct_tbl: + mapping_destroy(ct_priv->labels_mapping); +err_mapping_labels: + mapping_destroy(ct_priv->zone_mapping); +err_mapping_zone: + kfree(ct_priv); +err_alloc: +err_support: + + return NULL; +} + +void +mlx5_tc_ct_clean(struct mlx5_tc_ct_priv *ct_priv) +{ + struct mlx5_fs_chains *chains; + + if (!ct_priv) + return; + + destroy_workqueue(ct_priv->wq); + mlx5_ct_tc_remove_dbgfs(ct_priv); + chains = ct_priv->chains; + + ct_priv->fs_ops->destroy(ct_priv->fs); + kfree(ct_priv->fs); + + mlx5_chains_destroy_global_table(chains, ct_priv->ct_nat); + mlx5_chains_destroy_global_table(chains, ct_priv->ct); + mapping_destroy(ct_priv->zone_mapping); + mapping_destroy(ct_priv->labels_mapping); + + rhashtable_destroy(&ct_priv->ct_tuples_ht); + rhashtable_destroy(&ct_priv->ct_tuples_nat_ht); + rhashtable_destroy(&ct_priv->zone_ht); + mutex_destroy(&ct_priv->control_lock); + kfree(ct_priv); +} + +bool +mlx5e_tc_ct_restore_flow(struct mlx5_tc_ct_priv *ct_priv, + struct sk_buff *skb, u8 zone_restore_id) +{ + struct mlx5_ct_tuple tuple = {}; + struct mlx5_ct_entry *entry; + u16 zone; + + if (!ct_priv || !zone_restore_id) + return true; + + if (mapping_find(ct_priv->zone_mapping, zone_restore_id, &zone)) + goto out_inc_drop; + + if (!mlx5_tc_ct_skb_to_tuple(skb, &tuple, zone)) + goto out_inc_drop; + + spin_lock(&ct_priv->ht_lock); + + entry = mlx5_tc_ct_entry_get(ct_priv, &tuple); + if (!entry) { + spin_unlock(&ct_priv->ht_lock); + goto out_inc_drop; + } + + if (IS_ERR(entry)) { + spin_unlock(&ct_priv->ht_lock); + goto out_inc_drop; + } + spin_unlock(&ct_priv->ht_lock); + + tcf_ct_flow_table_restore_skb(skb, entry->restore_cookie); + __mlx5_tc_ct_entry_put(entry); + + return true; + +out_inc_drop: + atomic_inc(&ct_priv->debugfs.stats.rx_dropped); + return false; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h new file mode 100644 index 000000000..5bbd6b928 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h @@ -0,0 +1,220 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2018 Mellanox Technologies. */ + +#ifndef __MLX5_EN_TC_CT_H__ +#define __MLX5_EN_TC_CT_H__ + +#include <net/pkt_cls.h> +#include <linux/mlx5/fs.h> +#include <net/tc_act/tc_ct.h> + +#include "en.h" + +struct mlx5_flow_attr; +struct mlx5e_tc_mod_hdr_acts; +struct mlx5_rep_uplink_priv; +struct mlx5e_tc_flow; +struct mlx5e_priv; + +struct mlx5_fs_chains; +struct mlx5_tc_ct_priv; +struct mlx5_ct_flow; + +struct nf_flowtable; + +struct mlx5_ct_attr { + u16 zone; + u16 ct_action; + struct mlx5_ct_flow *ct_flow; + struct nf_flowtable *nf_ft; + u32 ct_labels_id; +}; + +#define zone_to_reg_ct {\ + .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_2,\ + .moffset = 0,\ + .mlen = 16,\ + .soffset = MLX5_BYTE_OFF(fte_match_param,\ + misc_parameters_2.metadata_reg_c_2),\ +} + +#define ctstate_to_reg_ct {\ + .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_2,\ + .moffset = 16,\ + .mlen = 16,\ + .soffset = MLX5_BYTE_OFF(fte_match_param,\ + misc_parameters_2.metadata_reg_c_2),\ +} + +#define mark_to_reg_ct {\ + .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_3,\ + .moffset = 0,\ + .mlen = 32,\ + .soffset = MLX5_BYTE_OFF(fte_match_param,\ + misc_parameters_2.metadata_reg_c_3),\ +} + +#define labels_to_reg_ct {\ + .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_4,\ + .moffset = 0,\ + .mlen = 32,\ + .soffset = MLX5_BYTE_OFF(fte_match_param,\ + misc_parameters_2.metadata_reg_c_4),\ +} + +/* 8 LSB of metadata C5 are reserved for packet color */ +#define fteid_to_reg_ct {\ + .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_5,\ + .moffset = 8,\ + .mlen = 24,\ + .soffset = MLX5_BYTE_OFF(fte_match_param,\ + misc_parameters_2.metadata_reg_c_5),\ +} + +#define zone_restore_to_reg_ct {\ + .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_1,\ + .moffset = 0,\ + .mlen = ESW_ZONE_ID_BITS,\ + .soffset = MLX5_BYTE_OFF(fte_match_param,\ + misc_parameters_2.metadata_reg_c_1),\ +} + +#define nic_zone_restore_to_reg_ct {\ + .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_B,\ + .moffset = 16,\ + .mlen = ESW_ZONE_ID_BITS,\ +} + +#define MLX5_CT_ZONE_BITS MLX5_REG_MAPPING_MBITS(ZONE_TO_REG) +#define MLX5_CT_ZONE_MASK MLX5_REG_MAPPING_MASK(ZONE_TO_REG) + +#if IS_ENABLED(CONFIG_MLX5_TC_CT) + +struct mlx5_tc_ct_priv * +mlx5_tc_ct_init(struct mlx5e_priv *priv, struct mlx5_fs_chains *chains, + struct mod_hdr_tbl *mod_hdr, + enum mlx5_flow_namespace_type ns_type, + struct mlx5e_post_act *post_act); +void +mlx5_tc_ct_clean(struct mlx5_tc_ct_priv *ct_priv); + +void +mlx5_tc_ct_match_del(struct mlx5_tc_ct_priv *priv, struct mlx5_ct_attr *ct_attr); + +int +mlx5_tc_ct_match_add(struct mlx5_tc_ct_priv *priv, + struct mlx5_flow_spec *spec, + struct flow_cls_offload *f, + struct mlx5_ct_attr *ct_attr, + struct netlink_ext_ack *extack); +int mlx5_tc_ct_add_no_trk_match(struct mlx5_flow_spec *spec); +int +mlx5_tc_ct_parse_action(struct mlx5_tc_ct_priv *priv, + struct mlx5_flow_attr *attr, + struct mlx5e_tc_mod_hdr_acts *mod_acts, + const struct flow_action_entry *act, + struct netlink_ext_ack *extack); + +struct mlx5_flow_handle * +mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *priv, + struct mlx5_flow_spec *spec, + struct mlx5_flow_attr *attr, + struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts); +void +mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *priv, + struct mlx5_flow_attr *attr); + +bool +mlx5e_tc_ct_restore_flow(struct mlx5_tc_ct_priv *ct_priv, + struct sk_buff *skb, u8 zone_restore_id); + +int +mlx5_tc_ct_set_ct_clear_regs(struct mlx5_tc_ct_priv *priv, + struct mlx5e_tc_mod_hdr_acts *mod_acts); + +#else /* CONFIG_MLX5_TC_CT */ + +static inline struct mlx5_tc_ct_priv * +mlx5_tc_ct_init(struct mlx5e_priv *priv, struct mlx5_fs_chains *chains, + struct mod_hdr_tbl *mod_hdr, + enum mlx5_flow_namespace_type ns_type, + struct mlx5e_post_act *post_act) +{ + return NULL; +} + +static inline void +mlx5_tc_ct_clean(struct mlx5_tc_ct_priv *ct_priv) +{ +} + +static inline void +mlx5_tc_ct_match_del(struct mlx5_tc_ct_priv *priv, struct mlx5_ct_attr *ct_attr) {} + +static inline int +mlx5_tc_ct_match_add(struct mlx5_tc_ct_priv *priv, + struct mlx5_flow_spec *spec, + struct flow_cls_offload *f, + struct mlx5_ct_attr *ct_attr, + struct netlink_ext_ack *extack) +{ + struct flow_rule *rule = flow_cls_offload_flow_rule(f); + + if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CT)) + return 0; + + NL_SET_ERR_MSG_MOD(extack, "mlx5 tc ct offload isn't enabled."); + return -EOPNOTSUPP; +} + +static inline int +mlx5_tc_ct_add_no_trk_match(struct mlx5_flow_spec *spec) +{ + return 0; +} + +static inline int +mlx5_tc_ct_set_ct_clear_regs(struct mlx5_tc_ct_priv *priv, + struct mlx5e_tc_mod_hdr_acts *mod_acts) +{ + return -EOPNOTSUPP; +} + +static inline int +mlx5_tc_ct_parse_action(struct mlx5_tc_ct_priv *priv, + struct mlx5_flow_attr *attr, + struct mlx5e_tc_mod_hdr_acts *mod_acts, + const struct flow_action_entry *act, + struct netlink_ext_ack *extack) +{ + NL_SET_ERR_MSG_MOD(extack, "mlx5 tc ct offload isn't enabled."); + return -EOPNOTSUPP; +} + +static inline struct mlx5_flow_handle * +mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *priv, + struct mlx5_flow_spec *spec, + struct mlx5_flow_attr *attr, + struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts) +{ + return ERR_PTR(-EOPNOTSUPP); +} + +static inline void +mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *priv, + struct mlx5_flow_attr *attr) +{ +} + +static inline bool +mlx5e_tc_ct_restore_flow(struct mlx5_tc_ct_priv *ct_priv, + struct sk_buff *skb, u8 zone_restore_id) +{ + if (!zone_restore_id) + return true; + + return false; +} + +#endif /* !IS_ENABLED(CONFIG_MLX5_TC_CT) */ +#endif /* __MLX5_EN_TC_CT_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h new file mode 100644 index 000000000..2e42d7c54 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h @@ -0,0 +1,218 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2021 Mellanox Technologies. */ + +#ifndef __MLX5_EN_TC_PRIV_H__ +#define __MLX5_EN_TC_PRIV_H__ + +#include "en_tc.h" +#include "en/tc/act/act.h" + +#define MLX5E_TC_FLOW_BASE (MLX5E_TC_FLAG_LAST_EXPORTED_BIT + 1) + +#define MLX5E_TC_MAX_SPLITS 1 + + +enum { + MLX5E_TC_FLOW_FLAG_INGRESS = MLX5E_TC_FLAG_INGRESS_BIT, + MLX5E_TC_FLOW_FLAG_EGRESS = MLX5E_TC_FLAG_EGRESS_BIT, + MLX5E_TC_FLOW_FLAG_ESWITCH = MLX5E_TC_FLAG_ESW_OFFLOAD_BIT, + MLX5E_TC_FLOW_FLAG_FT = MLX5E_TC_FLAG_FT_OFFLOAD_BIT, + MLX5E_TC_FLOW_FLAG_NIC = MLX5E_TC_FLAG_NIC_OFFLOAD_BIT, + MLX5E_TC_FLOW_FLAG_OFFLOADED = MLX5E_TC_FLOW_BASE, + MLX5E_TC_FLOW_FLAG_HAIRPIN = MLX5E_TC_FLOW_BASE + 1, + MLX5E_TC_FLOW_FLAG_HAIRPIN_RSS = MLX5E_TC_FLOW_BASE + 2, + MLX5E_TC_FLOW_FLAG_SLOW = MLX5E_TC_FLOW_BASE + 3, + MLX5E_TC_FLOW_FLAG_DUP = MLX5E_TC_FLOW_BASE + 4, + MLX5E_TC_FLOW_FLAG_NOT_READY = MLX5E_TC_FLOW_BASE + 5, + MLX5E_TC_FLOW_FLAG_DELETED = MLX5E_TC_FLOW_BASE + 6, + MLX5E_TC_FLOW_FLAG_CT = MLX5E_TC_FLOW_BASE + 7, + MLX5E_TC_FLOW_FLAG_L3_TO_L2_DECAP = MLX5E_TC_FLOW_BASE + 8, + MLX5E_TC_FLOW_FLAG_TUN_RX = MLX5E_TC_FLOW_BASE + 9, + MLX5E_TC_FLOW_FLAG_FAILED = MLX5E_TC_FLOW_BASE + 10, + MLX5E_TC_FLOW_FLAG_SAMPLE = MLX5E_TC_FLOW_BASE + 11, +}; + +struct mlx5e_tc_flow_parse_attr { + const struct ip_tunnel_info *tun_info[MLX5_MAX_FLOW_FWD_VPORTS]; + struct mlx5e_mpls_info mpls_info[MLX5_MAX_FLOW_FWD_VPORTS]; + struct net_device *filter_dev; + struct mlx5_flow_spec spec; + struct pedit_headers_action hdrs[__PEDIT_CMD_MAX]; + struct mlx5e_tc_mod_hdr_acts mod_hdr_acts; + int mirred_ifindex[MLX5_MAX_FLOW_FWD_VPORTS]; + struct mlx5e_tc_act_parse_state parse_state; +}; + +struct mlx5_fs_chains *mlx5e_nic_chains(struct mlx5e_tc_table *tc); + +/* Helper struct for accessing a struct containing list_head array. + * Containing struct + * |- Helper array + * [0] Helper item 0 + * |- list_head item 0 + * |- index (0) + * [1] Helper item 1 + * |- list_head item 1 + * |- index (1) + * To access the containing struct from one of the list_head items: + * 1. Get the helper item from the list_head item using + * helper item = + * container_of(list_head item, helper struct type, list_head field) + * 2. Get the contining struct from the helper item and its index in the array: + * containing struct = + * container_of(helper item, containing struct type, helper field[index]) + */ +struct encap_flow_item { + struct mlx5e_encap_entry *e; /* attached encap instance */ + struct list_head list; + int index; +}; + +struct encap_route_flow_item { + struct mlx5e_route_entry *r; /* attached route instance */ + int index; +}; + +struct mlx5e_tc_flow { + struct rhash_head node; + struct mlx5e_priv *priv; + u64 cookie; + unsigned long flags; + struct mlx5_flow_handle *rule[MLX5E_TC_MAX_SPLITS + 1]; + + /* flows sharing the same reformat object - currently mpls decap */ + struct list_head l3_to_l2_reformat; + struct mlx5e_decap_entry *decap_reformat; + + /* flows sharing same route entry */ + struct list_head decap_routes; + struct mlx5e_route_entry *decap_route; + struct encap_route_flow_item encap_routes[MLX5_MAX_FLOW_FWD_VPORTS]; + + /* Flow can be associated with multiple encap IDs. + * The number of encaps is bounded by the number of supported + * destinations. + */ + struct encap_flow_item encaps[MLX5_MAX_FLOW_FWD_VPORTS]; + struct mlx5e_tc_flow *peer_flow; + struct mlx5e_mod_hdr_handle *mh; /* attached mod header instance */ + struct mlx5e_mod_hdr_handle *slow_mh; /* attached mod header instance for slow path */ + struct mlx5e_hairpin_entry *hpe; /* attached hairpin instance */ + struct list_head hairpin; /* flows sharing the same hairpin */ + struct list_head peer; /* flows with peer flow */ + struct list_head unready; /* flows not ready to be offloaded (e.g + * due to missing route) + */ + struct net_device *orig_dev; /* netdev adding flow first */ + int tmp_entry_index; + struct list_head tmp_list; /* temporary flow list used by neigh update */ + refcount_t refcnt; + struct rcu_head rcu_head; + struct completion init_done; + struct completion del_hw_done; + struct mlx5_flow_attr *attr; + struct list_head attrs; + u32 chain_mapping; +}; + +struct mlx5_flow_handle * +mlx5e_tc_rule_offload(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct mlx5_flow_attr *attr); + +void +mlx5e_tc_rule_unoffload(struct mlx5e_priv *priv, + struct mlx5_flow_handle *rule, + struct mlx5_flow_attr *attr); + +u8 mlx5e_tc_get_ip_version(struct mlx5_flow_spec *spec, bool outer); + +struct mlx5_flow_handle * +mlx5e_tc_offload_fdb_rules(struct mlx5_eswitch *esw, + struct mlx5e_tc_flow *flow, + struct mlx5_flow_spec *spec, + struct mlx5_flow_attr *attr); + +struct mlx5_flow_attr * +mlx5e_tc_get_encap_attr(struct mlx5e_tc_flow *flow); + +void mlx5e_tc_unoffload_flow_post_acts(struct mlx5e_tc_flow *flow); +int mlx5e_tc_offload_flow_post_acts(struct mlx5e_tc_flow *flow); + +bool mlx5e_is_eswitch_flow(struct mlx5e_tc_flow *flow); +bool mlx5e_is_ft_flow(struct mlx5e_tc_flow *flow); +bool mlx5e_is_offloaded_flow(struct mlx5e_tc_flow *flow); +int mlx5e_get_flow_namespace(struct mlx5e_tc_flow *flow); +bool mlx5e_same_hw_devs(struct mlx5e_priv *priv, struct mlx5e_priv *peer_priv); + +static inline void __flow_flag_set(struct mlx5e_tc_flow *flow, unsigned long flag) +{ + /* Complete all memory stores before setting bit. */ + smp_mb__before_atomic(); + set_bit(flag, &flow->flags); +} + +#define flow_flag_set(flow, flag) __flow_flag_set(flow, MLX5E_TC_FLOW_FLAG_##flag) + +static inline bool __flow_flag_test_and_set(struct mlx5e_tc_flow *flow, + unsigned long flag) +{ + /* test_and_set_bit() provides all necessary barriers */ + return test_and_set_bit(flag, &flow->flags); +} + +#define flow_flag_test_and_set(flow, flag) \ + __flow_flag_test_and_set(flow, \ + MLX5E_TC_FLOW_FLAG_##flag) + +static inline void __flow_flag_clear(struct mlx5e_tc_flow *flow, unsigned long flag) +{ + /* Complete all memory stores before clearing bit. */ + smp_mb__before_atomic(); + clear_bit(flag, &flow->flags); +} + +#define flow_flag_clear(flow, flag) __flow_flag_clear(flow, \ + MLX5E_TC_FLOW_FLAG_##flag) + +static inline bool __flow_flag_test(struct mlx5e_tc_flow *flow, unsigned long flag) +{ + bool ret = test_bit(flag, &flow->flags); + + /* Read fields of flow structure only after checking flags. */ + smp_mb__after_atomic(); + return ret; +} + +#define flow_flag_test(flow, flag) __flow_flag_test(flow, \ + MLX5E_TC_FLOW_FLAG_##flag) + +void mlx5e_tc_unoffload_from_slow_path(struct mlx5_eswitch *esw, + struct mlx5e_tc_flow *flow); +struct mlx5_flow_handle * +mlx5e_tc_offload_to_slow_path(struct mlx5_eswitch *esw, + struct mlx5e_tc_flow *flow, + struct mlx5_flow_spec *spec); + +void mlx5e_tc_unoffload_fdb_rules(struct mlx5_eswitch *esw, + struct mlx5e_tc_flow *flow, + struct mlx5_flow_attr *attr); + +struct mlx5e_tc_flow *mlx5e_flow_get(struct mlx5e_tc_flow *flow); +void mlx5e_flow_put(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow); + +struct mlx5_fc *mlx5e_tc_get_counter(struct mlx5e_tc_flow *flow); + +struct mlx5e_tc_int_port_priv * +mlx5e_get_int_port_priv(struct mlx5e_priv *priv); + +struct mlx5e_flow_meters *mlx5e_get_flow_meters(struct mlx5_core_dev *dev); + +void *mlx5e_get_match_headers_value(u32 flags, struct mlx5_flow_spec *spec); +void *mlx5e_get_match_headers_criteria(u32 flags, struct mlx5_flow_spec *spec); + +int mlx5e_policer_validate(const struct flow_action *action, + const struct flow_action_entry *act, + struct netlink_ext_ack *extack); + +#endif /* __MLX5_EN_TC_PRIV_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c new file mode 100644 index 000000000..83bb0811e --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c @@ -0,0 +1,991 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2018 Mellanox Technologies. */ + +#include <net/inet_ecn.h> +#include <net/vxlan.h> +#include <net/gre.h> +#include <net/geneve.h> +#include <net/bareudp.h> +#include "en/tc_tun.h" +#include "en/tc_priv.h" +#include "en_tc.h" +#include "rep/tc.h" +#include "rep/neigh.h" +#include "lag/lag.h" +#include "lag/mp.h" + +struct mlx5e_tc_tun_route_attr { + struct net_device *out_dev; + struct net_device *route_dev; + union { + struct flowi4 fl4; + struct flowi6 fl6; + } fl; + struct neighbour *n; + u8 ttl; +}; + +#define TC_TUN_ROUTE_ATTR_INIT(name) struct mlx5e_tc_tun_route_attr name = {} + +static void mlx5e_tc_tun_route_attr_cleanup(struct mlx5e_tc_tun_route_attr *attr) +{ + if (attr->n) + neigh_release(attr->n); + if (attr->route_dev) + dev_put(attr->route_dev); +} + +struct mlx5e_tc_tunnel *mlx5e_get_tc_tun(struct net_device *tunnel_dev) +{ + if (netif_is_vxlan(tunnel_dev)) + return &vxlan_tunnel; + else if (netif_is_geneve(tunnel_dev)) + return &geneve_tunnel; + else if (netif_is_gretap(tunnel_dev) || + netif_is_ip6gretap(tunnel_dev)) + return &gre_tunnel; + else if (netif_is_bareudp(tunnel_dev)) + return &mplsoudp_tunnel; + else + return NULL; +} + +static int get_route_and_out_devs(struct mlx5e_priv *priv, + struct net_device *dev, + struct net_device **route_dev, + struct net_device **out_dev) +{ + struct net_device *uplink_dev, *uplink_upper, *real_dev; + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + bool dst_is_lag_dev; + + real_dev = is_vlan_dev(dev) ? vlan_dev_real_dev(dev) : dev; + uplink_dev = mlx5_eswitch_uplink_get_proto_dev(esw, REP_ETH); + + rcu_read_lock(); + uplink_upper = netdev_master_upper_dev_get_rcu(uplink_dev); + /* mlx5_lag_is_sriov() is a blocking function which can't be called + * while holding rcu read lock. Take the net_device for correctness + * sake. + */ + if (uplink_upper) + dev_hold(uplink_upper); + rcu_read_unlock(); + + dst_is_lag_dev = (uplink_upper && + netif_is_lag_master(uplink_upper) && + real_dev == uplink_upper && + mlx5_lag_is_sriov(priv->mdev)); + if (uplink_upper) + dev_put(uplink_upper); + + /* if the egress device isn't on the same HW e-switch or + * it's a LAG device, use the uplink + */ + *route_dev = dev; + if (!netdev_port_same_parent_id(priv->netdev, real_dev) || + dst_is_lag_dev || is_vlan_dev(*route_dev) || + netif_is_ovs_master(*route_dev)) + *out_dev = uplink_dev; + else if (mlx5e_eswitch_rep(dev) && + mlx5e_is_valid_eswitch_fwd_dev(priv, dev)) + *out_dev = *route_dev; + else + return -EOPNOTSUPP; + + if (!(mlx5e_eswitch_rep(*out_dev) && + mlx5e_is_uplink_rep(netdev_priv(*out_dev)))) + return -EOPNOTSUPP; + + if (mlx5e_eswitch_uplink_rep(priv->netdev) && *out_dev != priv->netdev) + return -EOPNOTSUPP; + + return 0; +} + +static int mlx5e_route_lookup_ipv4_get(struct mlx5e_priv *priv, + struct net_device *dev, + struct mlx5e_tc_tun_route_attr *attr) +{ + struct net_device *route_dev; + struct net_device *out_dev; + struct neighbour *n; + struct rtable *rt; + +#if IS_ENABLED(CONFIG_INET) + struct mlx5_core_dev *mdev = priv->mdev; + struct net_device *uplink_dev; + int ret; + + if (mlx5_lag_is_multipath(mdev)) { + struct mlx5_eswitch *esw = mdev->priv.eswitch; + + uplink_dev = mlx5_eswitch_uplink_get_proto_dev(esw, REP_ETH); + attr->fl.fl4.flowi4_oif = uplink_dev->ifindex; + } else { + struct mlx5e_tc_tunnel *tunnel = mlx5e_get_tc_tun(dev); + + if (tunnel && tunnel->get_remote_ifindex) + attr->fl.fl4.flowi4_oif = tunnel->get_remote_ifindex(dev); + } + + rt = ip_route_output_key(dev_net(dev), &attr->fl.fl4); + if (IS_ERR(rt)) + return PTR_ERR(rt); + + if (rt->rt_type != RTN_UNICAST) { + ret = -ENETUNREACH; + goto err_rt_release; + } + + if (mlx5_lag_is_multipath(mdev) && rt->rt_gw_family != AF_INET) { + ret = -ENETUNREACH; + goto err_rt_release; + } +#else + return -EOPNOTSUPP; +#endif + + ret = get_route_and_out_devs(priv, rt->dst.dev, &route_dev, &out_dev); + if (ret < 0) + goto err_rt_release; + dev_hold(route_dev); + + if (!attr->ttl) + attr->ttl = ip4_dst_hoplimit(&rt->dst); + n = dst_neigh_lookup(&rt->dst, &attr->fl.fl4.daddr); + if (!n) { + ret = -ENOMEM; + goto err_dev_release; + } + + ip_rt_put(rt); + attr->route_dev = route_dev; + attr->out_dev = out_dev; + attr->n = n; + return 0; + +err_dev_release: + dev_put(route_dev); +err_rt_release: + ip_rt_put(rt); + return ret; +} + +static void mlx5e_route_lookup_ipv4_put(struct mlx5e_tc_tun_route_attr *attr) +{ + mlx5e_tc_tun_route_attr_cleanup(attr); +} + +static const char *mlx5e_netdev_kind(struct net_device *dev) +{ + if (dev->rtnl_link_ops) + return dev->rtnl_link_ops->kind; + else + return "unknown"; +} + +static int mlx5e_gen_ip_tunnel_header(char buf[], __u8 *ip_proto, + struct mlx5e_encap_entry *e) +{ + if (!e->tunnel) { + pr_warn("mlx5: Cannot generate tunnel header for this tunnel\n"); + return -EOPNOTSUPP; + } + + return e->tunnel->generate_ip_tun_hdr(buf, ip_proto, e); +} + +static char *gen_eth_tnl_hdr(char *buf, struct net_device *dev, + struct mlx5e_encap_entry *e, + u16 proto) +{ + struct ethhdr *eth = (struct ethhdr *)buf; + char *ip; + + ether_addr_copy(eth->h_dest, e->h_dest); + ether_addr_copy(eth->h_source, dev->dev_addr); + if (is_vlan_dev(dev)) { + struct vlan_hdr *vlan = (struct vlan_hdr *) + ((char *)eth + ETH_HLEN); + ip = (char *)vlan + VLAN_HLEN; + eth->h_proto = vlan_dev_vlan_proto(dev); + vlan->h_vlan_TCI = htons(vlan_dev_vlan_id(dev)); + vlan->h_vlan_encapsulated_proto = htons(proto); + } else { + eth->h_proto = htons(proto); + ip = (char *)eth + ETH_HLEN; + } + + return ip; +} + +int mlx5e_tc_tun_create_header_ipv4(struct mlx5e_priv *priv, + struct net_device *mirred_dev, + struct mlx5e_encap_entry *e) +{ + int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size); + const struct ip_tunnel_key *tun_key = &e->tun_info->key; + struct mlx5_pkt_reformat_params reformat_params; + struct mlx5e_neigh m_neigh = {}; + TC_TUN_ROUTE_ATTR_INIT(attr); + int ipv4_encap_size; + char *encap_header; + struct iphdr *ip; + u8 nud_state; + int err; + + /* add the IP fields */ + attr.fl.fl4.flowi4_tos = tun_key->tos & ~INET_ECN_MASK; + attr.fl.fl4.daddr = tun_key->u.ipv4.dst; + attr.fl.fl4.saddr = tun_key->u.ipv4.src; + attr.ttl = tun_key->ttl; + + err = mlx5e_route_lookup_ipv4_get(priv, mirred_dev, &attr); + if (err) + return err; + + ipv4_encap_size = + (is_vlan_dev(attr.route_dev) ? VLAN_ETH_HLEN : ETH_HLEN) + + sizeof(struct iphdr) + + e->tunnel->calc_hlen(e); + + if (max_encap_size < ipv4_encap_size) { + mlx5_core_warn(priv->mdev, "encap size %d too big, max supported is %d\n", + ipv4_encap_size, max_encap_size); + err = -EOPNOTSUPP; + goto release_neigh; + } + + encap_header = kzalloc(ipv4_encap_size, GFP_KERNEL); + if (!encap_header) { + err = -ENOMEM; + goto release_neigh; + } + + m_neigh.family = attr.n->ops->family; + memcpy(&m_neigh.dst_ip, attr.n->primary_key, attr.n->tbl->key_len); + e->out_dev = attr.out_dev; + e->route_dev_ifindex = attr.route_dev->ifindex; + + /* It's important to add the neigh to the hash table before checking + * the neigh validity state. So if we'll get a notification, in case the + * neigh changes it's validity state, we would find the relevant neigh + * in the hash. + */ + err = mlx5e_rep_encap_entry_attach(netdev_priv(attr.out_dev), e, &m_neigh, attr.n->dev); + if (err) + goto free_encap; + + read_lock_bh(&attr.n->lock); + nud_state = attr.n->nud_state; + ether_addr_copy(e->h_dest, attr.n->ha); + read_unlock_bh(&attr.n->lock); + + /* add ethernet header */ + ip = (struct iphdr *)gen_eth_tnl_hdr(encap_header, attr.route_dev, e, + ETH_P_IP); + + /* add ip header */ + ip->tos = tun_key->tos; + ip->version = 0x4; + ip->ihl = 0x5; + ip->ttl = attr.ttl; + ip->daddr = attr.fl.fl4.daddr; + ip->saddr = attr.fl.fl4.saddr; + + /* add tunneling protocol header */ + err = mlx5e_gen_ip_tunnel_header((char *)ip + sizeof(struct iphdr), + &ip->protocol, e); + if (err) + goto destroy_neigh_entry; + + e->encap_size = ipv4_encap_size; + e->encap_header = encap_header; + + if (!(nud_state & NUD_VALID)) { + neigh_event_send(attr.n, NULL); + /* the encap entry will be made valid on neigh update event + * and not used before that. + */ + goto release_neigh; + } + + memset(&reformat_params, 0, sizeof(reformat_params)); + reformat_params.type = e->reformat_type; + reformat_params.size = ipv4_encap_size; + reformat_params.data = encap_header; + e->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev, &reformat_params, + MLX5_FLOW_NAMESPACE_FDB); + if (IS_ERR(e->pkt_reformat)) { + err = PTR_ERR(e->pkt_reformat); + goto destroy_neigh_entry; + } + + e->flags |= MLX5_ENCAP_ENTRY_VALID; + mlx5e_rep_queue_neigh_stats_work(netdev_priv(attr.out_dev)); + mlx5e_route_lookup_ipv4_put(&attr); + return err; + +destroy_neigh_entry: + mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e); +free_encap: + kfree(encap_header); +release_neigh: + mlx5e_route_lookup_ipv4_put(&attr); + return err; +} + +int mlx5e_tc_tun_update_header_ipv4(struct mlx5e_priv *priv, + struct net_device *mirred_dev, + struct mlx5e_encap_entry *e) +{ + int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size); + const struct ip_tunnel_key *tun_key = &e->tun_info->key; + struct mlx5_pkt_reformat_params reformat_params; + TC_TUN_ROUTE_ATTR_INIT(attr); + int ipv4_encap_size; + char *encap_header; + struct iphdr *ip; + u8 nud_state; + int err; + + /* add the IP fields */ + attr.fl.fl4.flowi4_tos = tun_key->tos & ~INET_ECN_MASK; + attr.fl.fl4.daddr = tun_key->u.ipv4.dst; + attr.fl.fl4.saddr = tun_key->u.ipv4.src; + attr.ttl = tun_key->ttl; + + err = mlx5e_route_lookup_ipv4_get(priv, mirred_dev, &attr); + if (err) + return err; + + ipv4_encap_size = + (is_vlan_dev(attr.route_dev) ? VLAN_ETH_HLEN : ETH_HLEN) + + sizeof(struct iphdr) + + e->tunnel->calc_hlen(e); + + if (max_encap_size < ipv4_encap_size) { + mlx5_core_warn(priv->mdev, "encap size %d too big, max supported is %d\n", + ipv4_encap_size, max_encap_size); + err = -EOPNOTSUPP; + goto release_neigh; + } + + encap_header = kzalloc(ipv4_encap_size, GFP_KERNEL); + if (!encap_header) { + err = -ENOMEM; + goto release_neigh; + } + + e->route_dev_ifindex = attr.route_dev->ifindex; + + read_lock_bh(&attr.n->lock); + nud_state = attr.n->nud_state; + ether_addr_copy(e->h_dest, attr.n->ha); + WRITE_ONCE(e->nhe->neigh_dev, attr.n->dev); + read_unlock_bh(&attr.n->lock); + + /* add ethernet header */ + ip = (struct iphdr *)gen_eth_tnl_hdr(encap_header, attr.route_dev, e, + ETH_P_IP); + + /* add ip header */ + ip->tos = tun_key->tos; + ip->version = 0x4; + ip->ihl = 0x5; + ip->ttl = attr.ttl; + ip->daddr = attr.fl.fl4.daddr; + ip->saddr = attr.fl.fl4.saddr; + + /* add tunneling protocol header */ + err = mlx5e_gen_ip_tunnel_header((char *)ip + sizeof(struct iphdr), + &ip->protocol, e); + if (err) + goto free_encap; + + e->encap_size = ipv4_encap_size; + kfree(e->encap_header); + e->encap_header = encap_header; + + if (!(nud_state & NUD_VALID)) { + neigh_event_send(attr.n, NULL); + /* the encap entry will be made valid on neigh update event + * and not used before that. + */ + goto release_neigh; + } + + memset(&reformat_params, 0, sizeof(reformat_params)); + reformat_params.type = e->reformat_type; + reformat_params.size = ipv4_encap_size; + reformat_params.data = encap_header; + e->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev, &reformat_params, + MLX5_FLOW_NAMESPACE_FDB); + if (IS_ERR(e->pkt_reformat)) { + err = PTR_ERR(e->pkt_reformat); + goto free_encap; + } + + e->flags |= MLX5_ENCAP_ENTRY_VALID; + mlx5e_rep_queue_neigh_stats_work(netdev_priv(attr.out_dev)); + mlx5e_route_lookup_ipv4_put(&attr); + return err; + +free_encap: + kfree(encap_header); +release_neigh: + mlx5e_route_lookup_ipv4_put(&attr); + return err; +} + +#if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6) +static int mlx5e_route_lookup_ipv6_get(struct mlx5e_priv *priv, + struct net_device *dev, + struct mlx5e_tc_tun_route_attr *attr) +{ + struct mlx5e_tc_tunnel *tunnel = mlx5e_get_tc_tun(dev); + struct net_device *route_dev; + struct net_device *out_dev; + struct dst_entry *dst; + struct neighbour *n; + int ret; + + if (tunnel && tunnel->get_remote_ifindex) + attr->fl.fl6.flowi6_oif = tunnel->get_remote_ifindex(dev); + dst = ipv6_stub->ipv6_dst_lookup_flow(dev_net(dev), NULL, &attr->fl.fl6, + NULL); + if (IS_ERR(dst)) + return PTR_ERR(dst); + + if (!attr->ttl) + attr->ttl = ip6_dst_hoplimit(dst); + + ret = get_route_and_out_devs(priv, dst->dev, &route_dev, &out_dev); + if (ret < 0) + goto err_dst_release; + + dev_hold(route_dev); + n = dst_neigh_lookup(dst, &attr->fl.fl6.daddr); + if (!n) { + ret = -ENOMEM; + goto err_dev_release; + } + + dst_release(dst); + attr->out_dev = out_dev; + attr->route_dev = route_dev; + attr->n = n; + return 0; + +err_dev_release: + dev_put(route_dev); +err_dst_release: + dst_release(dst); + return ret; +} + +static void mlx5e_route_lookup_ipv6_put(struct mlx5e_tc_tun_route_attr *attr) +{ + mlx5e_tc_tun_route_attr_cleanup(attr); +} + +int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv, + struct net_device *mirred_dev, + struct mlx5e_encap_entry *e) +{ + int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size); + const struct ip_tunnel_key *tun_key = &e->tun_info->key; + struct mlx5_pkt_reformat_params reformat_params; + struct mlx5e_neigh m_neigh = {}; + TC_TUN_ROUTE_ATTR_INIT(attr); + struct ipv6hdr *ip6h; + int ipv6_encap_size; + char *encap_header; + u8 nud_state; + int err; + + attr.ttl = tun_key->ttl; + attr.fl.fl6.flowlabel = ip6_make_flowinfo(tun_key->tos, tun_key->label); + attr.fl.fl6.daddr = tun_key->u.ipv6.dst; + attr.fl.fl6.saddr = tun_key->u.ipv6.src; + + err = mlx5e_route_lookup_ipv6_get(priv, mirred_dev, &attr); + if (err) + return err; + + ipv6_encap_size = + (is_vlan_dev(attr.route_dev) ? VLAN_ETH_HLEN : ETH_HLEN) + + sizeof(struct ipv6hdr) + + e->tunnel->calc_hlen(e); + + if (max_encap_size < ipv6_encap_size) { + mlx5_core_warn(priv->mdev, "encap size %d too big, max supported is %d\n", + ipv6_encap_size, max_encap_size); + err = -EOPNOTSUPP; + goto release_neigh; + } + + encap_header = kzalloc(ipv6_encap_size, GFP_KERNEL); + if (!encap_header) { + err = -ENOMEM; + goto release_neigh; + } + + m_neigh.family = attr.n->ops->family; + memcpy(&m_neigh.dst_ip, attr.n->primary_key, attr.n->tbl->key_len); + e->out_dev = attr.out_dev; + e->route_dev_ifindex = attr.route_dev->ifindex; + + /* It's important to add the neigh to the hash table before checking + * the neigh validity state. So if we'll get a notification, in case the + * neigh changes it's validity state, we would find the relevant neigh + * in the hash. + */ + err = mlx5e_rep_encap_entry_attach(netdev_priv(attr.out_dev), e, &m_neigh, attr.n->dev); + if (err) + goto free_encap; + + read_lock_bh(&attr.n->lock); + nud_state = attr.n->nud_state; + ether_addr_copy(e->h_dest, attr.n->ha); + read_unlock_bh(&attr.n->lock); + + /* add ethernet header */ + ip6h = (struct ipv6hdr *)gen_eth_tnl_hdr(encap_header, attr.route_dev, e, + ETH_P_IPV6); + + /* add ip header */ + ip6_flow_hdr(ip6h, tun_key->tos, 0); + /* the HW fills up ipv6 payload len */ + ip6h->hop_limit = attr.ttl; + ip6h->daddr = attr.fl.fl6.daddr; + ip6h->saddr = attr.fl.fl6.saddr; + + /* add tunneling protocol header */ + err = mlx5e_gen_ip_tunnel_header((char *)ip6h + sizeof(struct ipv6hdr), + &ip6h->nexthdr, e); + if (err) + goto destroy_neigh_entry; + + e->encap_size = ipv6_encap_size; + e->encap_header = encap_header; + + if (!(nud_state & NUD_VALID)) { + neigh_event_send(attr.n, NULL); + /* the encap entry will be made valid on neigh update event + * and not used before that. + */ + goto release_neigh; + } + + memset(&reformat_params, 0, sizeof(reformat_params)); + reformat_params.type = e->reformat_type; + reformat_params.size = ipv6_encap_size; + reformat_params.data = encap_header; + e->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev, &reformat_params, + MLX5_FLOW_NAMESPACE_FDB); + if (IS_ERR(e->pkt_reformat)) { + err = PTR_ERR(e->pkt_reformat); + goto destroy_neigh_entry; + } + + e->flags |= MLX5_ENCAP_ENTRY_VALID; + mlx5e_rep_queue_neigh_stats_work(netdev_priv(attr.out_dev)); + mlx5e_route_lookup_ipv6_put(&attr); + return err; + +destroy_neigh_entry: + mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e); +free_encap: + kfree(encap_header); +release_neigh: + mlx5e_route_lookup_ipv6_put(&attr); + return err; +} + +int mlx5e_tc_tun_update_header_ipv6(struct mlx5e_priv *priv, + struct net_device *mirred_dev, + struct mlx5e_encap_entry *e) +{ + int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size); + const struct ip_tunnel_key *tun_key = &e->tun_info->key; + struct mlx5_pkt_reformat_params reformat_params; + TC_TUN_ROUTE_ATTR_INIT(attr); + struct ipv6hdr *ip6h; + int ipv6_encap_size; + char *encap_header; + u8 nud_state; + int err; + + attr.ttl = tun_key->ttl; + + attr.fl.fl6.flowlabel = ip6_make_flowinfo(tun_key->tos, tun_key->label); + attr.fl.fl6.daddr = tun_key->u.ipv6.dst; + attr.fl.fl6.saddr = tun_key->u.ipv6.src; + + err = mlx5e_route_lookup_ipv6_get(priv, mirred_dev, &attr); + if (err) + return err; + + ipv6_encap_size = + (is_vlan_dev(attr.route_dev) ? VLAN_ETH_HLEN : ETH_HLEN) + + sizeof(struct ipv6hdr) + + e->tunnel->calc_hlen(e); + + if (max_encap_size < ipv6_encap_size) { + mlx5_core_warn(priv->mdev, "encap size %d too big, max supported is %d\n", + ipv6_encap_size, max_encap_size); + err = -EOPNOTSUPP; + goto release_neigh; + } + + encap_header = kzalloc(ipv6_encap_size, GFP_KERNEL); + if (!encap_header) { + err = -ENOMEM; + goto release_neigh; + } + + e->route_dev_ifindex = attr.route_dev->ifindex; + + read_lock_bh(&attr.n->lock); + nud_state = attr.n->nud_state; + ether_addr_copy(e->h_dest, attr.n->ha); + WRITE_ONCE(e->nhe->neigh_dev, attr.n->dev); + read_unlock_bh(&attr.n->lock); + + /* add ethernet header */ + ip6h = (struct ipv6hdr *)gen_eth_tnl_hdr(encap_header, attr.route_dev, e, + ETH_P_IPV6); + + /* add ip header */ + ip6_flow_hdr(ip6h, tun_key->tos, 0); + /* the HW fills up ipv6 payload len */ + ip6h->hop_limit = attr.ttl; + ip6h->daddr = attr.fl.fl6.daddr; + ip6h->saddr = attr.fl.fl6.saddr; + + /* add tunneling protocol header */ + err = mlx5e_gen_ip_tunnel_header((char *)ip6h + sizeof(struct ipv6hdr), + &ip6h->nexthdr, e); + if (err) + goto free_encap; + + e->encap_size = ipv6_encap_size; + kfree(e->encap_header); + e->encap_header = encap_header; + + if (!(nud_state & NUD_VALID)) { + neigh_event_send(attr.n, NULL); + /* the encap entry will be made valid on neigh update event + * and not used before that. + */ + goto release_neigh; + } + + memset(&reformat_params, 0, sizeof(reformat_params)); + reformat_params.type = e->reformat_type; + reformat_params.size = ipv6_encap_size; + reformat_params.data = encap_header; + e->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev, &reformat_params, + MLX5_FLOW_NAMESPACE_FDB); + if (IS_ERR(e->pkt_reformat)) { + err = PTR_ERR(e->pkt_reformat); + goto free_encap; + } + + e->flags |= MLX5_ENCAP_ENTRY_VALID; + mlx5e_rep_queue_neigh_stats_work(netdev_priv(attr.out_dev)); + mlx5e_route_lookup_ipv6_put(&attr); + return err; + +free_encap: + kfree(encap_header); +release_neigh: + mlx5e_route_lookup_ipv6_put(&attr); + return err; +} +#endif + +int mlx5e_tc_tun_route_lookup(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct mlx5_flow_attr *flow_attr, + struct net_device *filter_dev) +{ + struct mlx5_esw_flow_attr *esw_attr = flow_attr->esw_attr; + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5e_tc_int_port *int_port; + TC_TUN_ROUTE_ATTR_INIT(attr); + u16 vport_num; + int err = 0; + + if (flow_attr->tun_ip_version == 4) { + /* Addresses are swapped for decap */ + attr.fl.fl4.saddr = esw_attr->rx_tun_attr->dst_ip.v4; + attr.fl.fl4.daddr = esw_attr->rx_tun_attr->src_ip.v4; + err = mlx5e_route_lookup_ipv4_get(priv, filter_dev, &attr); + } +#if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6) + else if (flow_attr->tun_ip_version == 6) { + /* Addresses are swapped for decap */ + attr.fl.fl6.saddr = esw_attr->rx_tun_attr->dst_ip.v6; + attr.fl.fl6.daddr = esw_attr->rx_tun_attr->src_ip.v6; + err = mlx5e_route_lookup_ipv6_get(priv, filter_dev, &attr); + } +#endif + else + return 0; + + if (err) + return err; + + if (attr.route_dev->netdev_ops == &mlx5e_netdev_ops && + mlx5e_tc_is_vf_tunnel(attr.out_dev, attr.route_dev)) { + err = mlx5e_tc_query_route_vport(attr.out_dev, attr.route_dev, &vport_num); + if (err) + goto out; + + esw_attr->rx_tun_attr->decap_vport = vport_num; + } else if (netif_is_ovs_master(attr.route_dev) && mlx5e_tc_int_port_supported(esw)) { + int_port = mlx5e_tc_int_port_get(mlx5e_get_int_port_priv(priv), + attr.route_dev->ifindex, + MLX5E_TC_INT_PORT_INGRESS); + if (IS_ERR(int_port)) { + err = PTR_ERR(int_port); + goto out; + } + esw_attr->int_port = int_port; + } + +out: + if (flow_attr->tun_ip_version == 4) + mlx5e_route_lookup_ipv4_put(&attr); +#if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6) + else if (flow_attr->tun_ip_version == 6) + mlx5e_route_lookup_ipv6_put(&attr); +#endif + return err; +} + +bool mlx5e_tc_tun_device_to_offload(struct mlx5e_priv *priv, + struct net_device *netdev) +{ + struct mlx5e_tc_tunnel *tunnel = mlx5e_get_tc_tun(netdev); + + if (tunnel && tunnel->can_offload(priv)) + return true; + else + return false; +} + +int mlx5e_tc_tun_init_encap_attr(struct net_device *tunnel_dev, + struct mlx5e_priv *priv, + struct mlx5e_encap_entry *e, + struct netlink_ext_ack *extack) +{ + struct mlx5e_tc_tunnel *tunnel = mlx5e_get_tc_tun(tunnel_dev); + + if (!tunnel) { + e->reformat_type = -1; + return -EOPNOTSUPP; + } + + return tunnel->init_encap_attr(tunnel_dev, priv, e, extack); +} + +int mlx5e_tc_tun_parse(struct net_device *filter_dev, + struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct flow_cls_offload *f, + u8 *match_level) +{ + struct mlx5e_tc_tunnel *tunnel = mlx5e_get_tc_tun(filter_dev); + struct flow_rule *rule = flow_cls_offload_flow_rule(f); + void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + outer_headers); + void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, + outer_headers); + struct netlink_ext_ack *extack = f->common.extack; + int err = 0; + + if (!tunnel) { + netdev_warn(priv->netdev, + "decapsulation offload is not supported for %s net device\n", + mlx5e_netdev_kind(filter_dev)); + err = -EOPNOTSUPP; + goto out; + } + + *match_level = tunnel->match_level; + + if (tunnel->parse_udp_ports) { + err = tunnel->parse_udp_ports(priv, spec, f, + headers_c, headers_v); + if (err) + goto out; + } + + if (tunnel->parse_tunnel) { + err = tunnel->parse_tunnel(priv, spec, f, + headers_c, headers_v); + if (err) + goto out; + } + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_CONTROL)) { + struct flow_dissector_key_basic key_basic = {}; + struct flow_dissector_key_basic mask_basic = { + .n_proto = htons(0xFFFF), + }; + struct flow_match_basic match_basic = { + .key = &key_basic, .mask = &mask_basic, + }; + struct flow_match_control match; + u16 addr_type; + + flow_rule_match_enc_control(rule, &match); + addr_type = match.key->addr_type; + + /* For tunnel addr_type used same key id`s as for non-tunnel */ + if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) { + struct flow_match_ipv4_addrs match; + + flow_rule_match_enc_ipv4_addrs(rule, &match); + MLX5_SET(fte_match_set_lyr_2_4, headers_c, + src_ipv4_src_ipv6.ipv4_layout.ipv4, + ntohl(match.mask->src)); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, + src_ipv4_src_ipv6.ipv4_layout.ipv4, + ntohl(match.key->src)); + + MLX5_SET(fte_match_set_lyr_2_4, headers_c, + dst_ipv4_dst_ipv6.ipv4_layout.ipv4, + ntohl(match.mask->dst)); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, + dst_ipv4_dst_ipv6.ipv4_layout.ipv4, + ntohl(match.key->dst)); + + key_basic.n_proto = htons(ETH_P_IP); + mlx5e_tc_set_ethertype(priv->mdev, &match_basic, true, + headers_c, headers_v); + } else if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) { + struct flow_match_ipv6_addrs match; + + flow_rule_match_enc_ipv6_addrs(rule, &match); + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, + src_ipv4_src_ipv6.ipv6_layout.ipv6), + &match.mask->src, MLX5_FLD_SZ_BYTES(ipv6_layout, + ipv6)); + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, + src_ipv4_src_ipv6.ipv6_layout.ipv6), + &match.key->src, MLX5_FLD_SZ_BYTES(ipv6_layout, + ipv6)); + + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, + dst_ipv4_dst_ipv6.ipv6_layout.ipv6), + &match.mask->dst, MLX5_FLD_SZ_BYTES(ipv6_layout, + ipv6)); + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, + dst_ipv4_dst_ipv6.ipv6_layout.ipv6), + &match.key->dst, MLX5_FLD_SZ_BYTES(ipv6_layout, + ipv6)); + + key_basic.n_proto = htons(ETH_P_IPV6); + mlx5e_tc_set_ethertype(priv->mdev, &match_basic, true, + headers_c, headers_v); + } + } + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IP)) { + struct flow_match_ip match; + + flow_rule_match_enc_ip(rule, &match); + MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_ecn, + match.mask->tos & 0x3); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_ecn, + match.key->tos & 0x3); + + MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_dscp, + match.mask->tos >> 2); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_dscp, + match.key->tos >> 2); + + MLX5_SET(fte_match_set_lyr_2_4, headers_c, ttl_hoplimit, + match.mask->ttl); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ttl_hoplimit, + match.key->ttl); + + if (match.mask->ttl && + !MLX5_CAP_ESW_FLOWTABLE_FDB + (priv->mdev, + ft_field_support.outer_ipv4_ttl)) { + NL_SET_ERR_MSG_MOD(extack, + "Matching on TTL is not supported"); + err = -EOPNOTSUPP; + goto out; + } + } + + /* let software handle IP fragments */ + MLX5_SET(fte_match_set_lyr_2_4, headers_c, frag, 1); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, frag, 0); + + return 0; + +out: + return err; +} + +int mlx5e_tc_tun_parse_udp_ports(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct flow_cls_offload *f, + void *headers_c, + void *headers_v) +{ + struct flow_rule *rule = flow_cls_offload_flow_rule(f); + struct netlink_ext_ack *extack = f->common.extack; + struct flow_match_ports enc_ports; + + /* Full udp dst port must be given */ + + if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_PORTS)) { + NL_SET_ERR_MSG_MOD(extack, + "UDP tunnel decap filter must include enc_dst_port condition"); + netdev_warn(priv->netdev, + "UDP tunnel decap filter must include enc_dst_port condition\n"); + return -EOPNOTSUPP; + } + + flow_rule_match_enc_ports(rule, &enc_ports); + + if (memchr_inv(&enc_ports.mask->dst, 0xff, + sizeof(enc_ports.mask->dst))) { + NL_SET_ERR_MSG_MOD(extack, + "UDP tunnel decap filter must match enc_dst_port fully"); + netdev_warn(priv->netdev, + "UDP tunnel decap filter must match enc_dst_port fully\n"); + return -EOPNOTSUPP; + } + + /* match on UDP protocol and dst port number */ + + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ip_protocol); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol, IPPROTO_UDP); + + MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_dport, + ntohs(enc_ports.mask->dst)); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_dport, + ntohs(enc_ports.key->dst)); + + /* UDP src port on outer header is generated by HW, + * so it is probably a bad idea to request matching it. + * Nonetheless, it is allowed. + */ + + MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_sport, + ntohs(enc_ports.mask->src)); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_sport, + ntohs(enc_ports.key->src)); + + return 0; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h new file mode 100644 index 000000000..b38f693bb --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h @@ -0,0 +1,120 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2018 Mellanox Technologies. */ + +#ifndef __MLX5_EN_TC_TUNNEL_H__ +#define __MLX5_EN_TC_TUNNEL_H__ + +#include <linux/netdevice.h> +#include <linux/mlx5/fs.h> +#include <net/pkt_cls.h> +#include <linux/netlink.h> +#include "en.h" +#include "en_rep.h" + +#ifdef CONFIG_MLX5_ESWITCH + +enum { + MLX5E_TC_TUNNEL_TYPE_UNKNOWN, + MLX5E_TC_TUNNEL_TYPE_VXLAN, + MLX5E_TC_TUNNEL_TYPE_GENEVE, + MLX5E_TC_TUNNEL_TYPE_GRETAP, + MLX5E_TC_TUNNEL_TYPE_MPLSOUDP, +}; + +struct mlx5e_encap_key { + const struct ip_tunnel_key *ip_tun_key; + struct mlx5e_tc_tunnel *tc_tunnel; +}; + +struct mlx5e_tc_tunnel { + int tunnel_type; + enum mlx5_flow_match_level match_level; + + bool (*can_offload)(struct mlx5e_priv *priv); + int (*calc_hlen)(struct mlx5e_encap_entry *e); + int (*init_encap_attr)(struct net_device *tunnel_dev, + struct mlx5e_priv *priv, + struct mlx5e_encap_entry *e, + struct netlink_ext_ack *extack); + int (*generate_ip_tun_hdr)(char buf[], + __u8 *ip_proto, + struct mlx5e_encap_entry *e); + int (*parse_udp_ports)(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct flow_cls_offload *f, + void *headers_c, + void *headers_v); + int (*parse_tunnel)(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct flow_cls_offload *f, + void *headers_c, + void *headers_v); + bool (*encap_info_equal)(struct mlx5e_encap_key *a, + struct mlx5e_encap_key *b); + int (*get_remote_ifindex)(struct net_device *mirred_dev); +}; + +extern struct mlx5e_tc_tunnel vxlan_tunnel; +extern struct mlx5e_tc_tunnel geneve_tunnel; +extern struct mlx5e_tc_tunnel gre_tunnel; +extern struct mlx5e_tc_tunnel mplsoudp_tunnel; + +struct mlx5e_tc_tunnel *mlx5e_get_tc_tun(struct net_device *tunnel_dev); + +int mlx5e_tc_tun_init_encap_attr(struct net_device *tunnel_dev, + struct mlx5e_priv *priv, + struct mlx5e_encap_entry *e, + struct netlink_ext_ack *extack); + +int mlx5e_tc_tun_create_header_ipv4(struct mlx5e_priv *priv, + struct net_device *mirred_dev, + struct mlx5e_encap_entry *e); +int mlx5e_tc_tun_update_header_ipv4(struct mlx5e_priv *priv, + struct net_device *mirred_dev, + struct mlx5e_encap_entry *e); + +#if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6) +int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv, + struct net_device *mirred_dev, + struct mlx5e_encap_entry *e); +int mlx5e_tc_tun_update_header_ipv6(struct mlx5e_priv *priv, + struct net_device *mirred_dev, + struct mlx5e_encap_entry *e); +#else +static inline int +mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv, + struct net_device *mirred_dev, + struct mlx5e_encap_entry *e) +{ return -EOPNOTSUPP; } +static inline int +mlx5e_tc_tun_update_header_ipv6(struct mlx5e_priv *priv, + struct net_device *mirred_dev, + struct mlx5e_encap_entry *e) +{ return -EOPNOTSUPP; } +#endif +int mlx5e_tc_tun_route_lookup(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct mlx5_flow_attr *attr, + struct net_device *filter_dev); + +bool mlx5e_tc_tun_device_to_offload(struct mlx5e_priv *priv, + struct net_device *netdev); + +int mlx5e_tc_tun_parse(struct net_device *filter_dev, + struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct flow_cls_offload *f, + u8 *match_level); + +int mlx5e_tc_tun_parse_udp_ports(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct flow_cls_offload *f, + void *headers_c, + void *headers_v); + +bool mlx5e_tc_tun_encap_info_equal_generic(struct mlx5e_encap_key *a, + struct mlx5e_encap_key *b); + +#endif /* CONFIG_MLX5_ESWITCH */ + +#endif //__MLX5_EN_TC_TUNNEL_H__ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c new file mode 100644 index 000000000..907ad6ffe --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c @@ -0,0 +1,1766 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2021 Mellanox Technologies. */ + +#include <net/fib_notifier.h> +#include <net/nexthop.h> +#include "tc_tun_encap.h" +#include "en_tc.h" +#include "tc_tun.h" +#include "rep/tc.h" +#include "diag/en_tc_tracepoint.h" + +enum { + MLX5E_ROUTE_ENTRY_VALID = BIT(0), +}; + +static int mlx5e_set_int_port_tunnel(struct mlx5e_priv *priv, + struct mlx5_flow_attr *attr, + struct mlx5e_encap_entry *e, + int out_index) +{ + struct net_device *route_dev; + int err = 0; + + route_dev = dev_get_by_index(dev_net(e->out_dev), e->route_dev_ifindex); + + if (!route_dev || !netif_is_ovs_master(route_dev) || + attr->parse_attr->filter_dev == e->out_dev) + goto out; + + err = mlx5e_set_fwd_to_int_port_actions(priv, attr, e->route_dev_ifindex, + MLX5E_TC_INT_PORT_EGRESS, + &attr->action, out_index); + +out: + if (route_dev) + dev_put(route_dev); + + return err; +} + +struct mlx5e_route_key { + int ip_version; + union { + __be32 v4; + struct in6_addr v6; + } endpoint_ip; +}; + +struct mlx5e_route_entry { + struct mlx5e_route_key key; + struct list_head encap_entries; + struct list_head decap_flows; + u32 flags; + struct hlist_node hlist; + refcount_t refcnt; + int tunnel_dev_index; + struct rcu_head rcu; +}; + +struct mlx5e_tc_tun_encap { + struct mlx5e_priv *priv; + struct notifier_block fib_nb; + spinlock_t route_lock; /* protects route_tbl */ + unsigned long route_tbl_last_update; + DECLARE_HASHTABLE(route_tbl, 8); +}; + +static bool mlx5e_route_entry_valid(struct mlx5e_route_entry *r) +{ + return r->flags & MLX5E_ROUTE_ENTRY_VALID; +} + +int mlx5e_tc_set_attr_rx_tun(struct mlx5e_tc_flow *flow, + struct mlx5_flow_spec *spec) +{ + struct mlx5_esw_flow_attr *esw_attr = flow->attr->esw_attr; + struct mlx5_rx_tun_attr *tun_attr; + void *daddr, *saddr; + u8 ip_version; + + tun_attr = kvzalloc(sizeof(*tun_attr), GFP_KERNEL); + if (!tun_attr) + return -ENOMEM; + + esw_attr->rx_tun_attr = tun_attr; + ip_version = mlx5e_tc_get_ip_version(spec, true); + + if (ip_version == 4) { + daddr = MLX5_ADDR_OF(fte_match_param, spec->match_value, + outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4); + saddr = MLX5_ADDR_OF(fte_match_param, spec->match_value, + outer_headers.src_ipv4_src_ipv6.ipv4_layout.ipv4); + tun_attr->dst_ip.v4 = *(__be32 *)daddr; + tun_attr->src_ip.v4 = *(__be32 *)saddr; + if (!tun_attr->dst_ip.v4 || !tun_attr->src_ip.v4) + return 0; + } +#if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6) + else if (ip_version == 6) { + int ipv6_size = MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6); + + daddr = MLX5_ADDR_OF(fte_match_param, spec->match_value, + outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6); + saddr = MLX5_ADDR_OF(fte_match_param, spec->match_value, + outer_headers.src_ipv4_src_ipv6.ipv6_layout.ipv6); + memcpy(&tun_attr->dst_ip.v6, daddr, ipv6_size); + memcpy(&tun_attr->src_ip.v6, saddr, ipv6_size); + if (ipv6_addr_any(&tun_attr->dst_ip.v6) || + ipv6_addr_any(&tun_attr->src_ip.v6)) + return 0; + } +#endif + /* Only set the flag if both src and dst ip addresses exist. They are + * required to establish routing. + */ + flow_flag_set(flow, TUN_RX); + flow->attr->tun_ip_version = ip_version; + return 0; +} + +static bool mlx5e_tc_flow_all_encaps_valid(struct mlx5_esw_flow_attr *esw_attr) +{ + bool all_flow_encaps_valid = true; + int i; + + /* Flow can be associated with multiple encap entries. + * Before offloading the flow verify that all of them have + * a valid neighbour. + */ + for (i = 0; i < MLX5_MAX_FLOW_FWD_VPORTS; i++) { + if (!(esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP)) + continue; + if (!(esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP_VALID)) { + all_flow_encaps_valid = false; + break; + } + } + + return all_flow_encaps_valid; +} + +void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv, + struct mlx5e_encap_entry *e, + struct list_head *flow_list) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5_pkt_reformat_params reformat_params; + struct mlx5_esw_flow_attr *esw_attr; + struct mlx5_flow_handle *rule; + struct mlx5_flow_attr *attr; + struct mlx5_flow_spec *spec; + struct mlx5e_tc_flow *flow; + int err; + + if (e->flags & MLX5_ENCAP_ENTRY_NO_ROUTE) + return; + + memset(&reformat_params, 0, sizeof(reformat_params)); + reformat_params.type = e->reformat_type; + reformat_params.size = e->encap_size; + reformat_params.data = e->encap_header; + e->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev, + &reformat_params, + MLX5_FLOW_NAMESPACE_FDB); + if (IS_ERR(e->pkt_reformat)) { + mlx5_core_warn(priv->mdev, "Failed to offload cached encapsulation header, %lu\n", + PTR_ERR(e->pkt_reformat)); + return; + } + e->flags |= MLX5_ENCAP_ENTRY_VALID; + mlx5e_rep_queue_neigh_stats_work(priv); + + list_for_each_entry(flow, flow_list, tmp_list) { + if (!mlx5e_is_offloaded_flow(flow) || !flow_flag_test(flow, SLOW)) + continue; + + spec = &flow->attr->parse_attr->spec; + + attr = mlx5e_tc_get_encap_attr(flow); + esw_attr = attr->esw_attr; + esw_attr->dests[flow->tmp_entry_index].pkt_reformat = e->pkt_reformat; + esw_attr->dests[flow->tmp_entry_index].flags |= MLX5_ESW_DEST_ENCAP_VALID; + + /* Do not offload flows with unresolved neighbors */ + if (!mlx5e_tc_flow_all_encaps_valid(esw_attr)) + continue; + + err = mlx5e_tc_offload_flow_post_acts(flow); + if (err) { + mlx5_core_warn(priv->mdev, "Failed to update flow post acts, %d\n", + err); + continue; + } + + /* update from slow path rule to encap rule */ + rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, flow->attr); + if (IS_ERR(rule)) { + mlx5e_tc_unoffload_flow_post_acts(flow); + err = PTR_ERR(rule); + mlx5_core_warn(priv->mdev, "Failed to update cached encapsulation flow, %d\n", + err); + continue; + } + + mlx5e_tc_unoffload_from_slow_path(esw, flow); + flow->rule[0] = rule; + /* was unset when slow path rule removed */ + flow_flag_set(flow, OFFLOADED); + } +} + +void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv, + struct mlx5e_encap_entry *e, + struct list_head *flow_list) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5_esw_flow_attr *esw_attr; + struct mlx5_flow_handle *rule; + struct mlx5_flow_attr *attr; + struct mlx5_flow_spec *spec; + struct mlx5e_tc_flow *flow; + int err; + + list_for_each_entry(flow, flow_list, tmp_list) { + if (!mlx5e_is_offloaded_flow(flow)) + continue; + + attr = mlx5e_tc_get_encap_attr(flow); + esw_attr = attr->esw_attr; + /* mark the flow's encap dest as non-valid */ + esw_attr->dests[flow->tmp_entry_index].flags &= ~MLX5_ESW_DEST_ENCAP_VALID; + esw_attr->dests[flow->tmp_entry_index].pkt_reformat = NULL; + + /* Clear pkt_reformat before checking slow path flag. Because + * in next iteration, the same flow is already set slow path + * flag, but still need to clear the pkt_reformat. + */ + if (flow_flag_test(flow, SLOW)) + continue; + + /* update from encap rule to slow path rule */ + spec = &flow->attr->parse_attr->spec; + rule = mlx5e_tc_offload_to_slow_path(esw, flow, spec); + + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + mlx5_core_warn(priv->mdev, "Failed to update slow path (encap) flow, %d\n", + err); + continue; + } + + mlx5e_tc_unoffload_fdb_rules(esw, flow, flow->attr); + mlx5e_tc_unoffload_flow_post_acts(flow); + flow->rule[0] = rule; + /* was unset when fast path rule removed */ + flow_flag_set(flow, OFFLOADED); + } + + /* we know that the encap is valid */ + e->flags &= ~MLX5_ENCAP_ENTRY_VALID; + mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat); + e->pkt_reformat = NULL; +} + +static void mlx5e_take_tmp_flow(struct mlx5e_tc_flow *flow, + struct list_head *flow_list, + int index) +{ + if (IS_ERR(mlx5e_flow_get(flow))) { + /* Flow is being deleted concurrently. Wait for it to be + * unoffloaded from hardware, otherwise deleting encap will + * fail. + */ + wait_for_completion(&flow->del_hw_done); + return; + } + wait_for_completion(&flow->init_done); + + flow->tmp_entry_index = index; + list_add(&flow->tmp_list, flow_list); +} + +/* Takes reference to all flows attached to encap and adds the flows to + * flow_list using 'tmp_list' list_head in mlx5e_tc_flow. + */ +void mlx5e_take_all_encap_flows(struct mlx5e_encap_entry *e, struct list_head *flow_list) +{ + struct encap_flow_item *efi; + struct mlx5e_tc_flow *flow; + + list_for_each_entry(efi, &e->flows, list) { + flow = container_of(efi, struct mlx5e_tc_flow, encaps[efi->index]); + mlx5e_take_tmp_flow(flow, flow_list, efi->index); + } +} + +/* Takes reference to all flows attached to route and adds the flows to + * flow_list using 'tmp_list' list_head in mlx5e_tc_flow. + */ +static void mlx5e_take_all_route_decap_flows(struct mlx5e_route_entry *r, + struct list_head *flow_list) +{ + struct mlx5e_tc_flow *flow; + + list_for_each_entry(flow, &r->decap_flows, decap_routes) + mlx5e_take_tmp_flow(flow, flow_list, 0); +} + +typedef bool (match_cb)(struct mlx5e_encap_entry *); + +static struct mlx5e_encap_entry * +mlx5e_get_next_matching_encap(struct mlx5e_neigh_hash_entry *nhe, + struct mlx5e_encap_entry *e, + match_cb match) +{ + struct mlx5e_encap_entry *next = NULL; + +retry: + rcu_read_lock(); + + /* find encap with non-zero reference counter value */ + for (next = e ? + list_next_or_null_rcu(&nhe->encap_list, + &e->encap_list, + struct mlx5e_encap_entry, + encap_list) : + list_first_or_null_rcu(&nhe->encap_list, + struct mlx5e_encap_entry, + encap_list); + next; + next = list_next_or_null_rcu(&nhe->encap_list, + &next->encap_list, + struct mlx5e_encap_entry, + encap_list)) + if (mlx5e_encap_take(next)) + break; + + rcu_read_unlock(); + + /* release starting encap */ + if (e) + mlx5e_encap_put(netdev_priv(e->out_dev), e); + if (!next) + return next; + + /* wait for encap to be fully initialized */ + wait_for_completion(&next->res_ready); + /* continue searching if encap entry is not in valid state after completion */ + if (!match(next)) { + e = next; + goto retry; + } + + return next; +} + +static bool mlx5e_encap_valid(struct mlx5e_encap_entry *e) +{ + return e->flags & MLX5_ENCAP_ENTRY_VALID; +} + +static struct mlx5e_encap_entry * +mlx5e_get_next_valid_encap(struct mlx5e_neigh_hash_entry *nhe, + struct mlx5e_encap_entry *e) +{ + return mlx5e_get_next_matching_encap(nhe, e, mlx5e_encap_valid); +} + +static bool mlx5e_encap_initialized(struct mlx5e_encap_entry *e) +{ + return e->compl_result >= 0; +} + +struct mlx5e_encap_entry * +mlx5e_get_next_init_encap(struct mlx5e_neigh_hash_entry *nhe, + struct mlx5e_encap_entry *e) +{ + return mlx5e_get_next_matching_encap(nhe, e, mlx5e_encap_initialized); +} + +void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe) +{ + struct mlx5e_neigh *m_neigh = &nhe->m_neigh; + struct mlx5e_encap_entry *e = NULL; + struct mlx5e_tc_flow *flow; + struct mlx5_fc *counter; + struct neigh_table *tbl; + bool neigh_used = false; + struct neighbour *n; + u64 lastuse; + + if (m_neigh->family == AF_INET) + tbl = &arp_tbl; +#if IS_ENABLED(CONFIG_IPV6) + else if (m_neigh->family == AF_INET6) + tbl = ipv6_stub->nd_tbl; +#endif + else + return; + + /* mlx5e_get_next_valid_encap() releases previous encap before returning + * next one. + */ + while ((e = mlx5e_get_next_valid_encap(nhe, e)) != NULL) { + struct mlx5e_priv *priv = netdev_priv(e->out_dev); + struct encap_flow_item *efi, *tmp; + struct mlx5_eswitch *esw; + LIST_HEAD(flow_list); + + esw = priv->mdev->priv.eswitch; + mutex_lock(&esw->offloads.encap_tbl_lock); + list_for_each_entry_safe(efi, tmp, &e->flows, list) { + flow = container_of(efi, struct mlx5e_tc_flow, + encaps[efi->index]); + if (IS_ERR(mlx5e_flow_get(flow))) + continue; + list_add(&flow->tmp_list, &flow_list); + + if (mlx5e_is_offloaded_flow(flow)) { + counter = mlx5e_tc_get_counter(flow); + lastuse = mlx5_fc_query_lastuse(counter); + if (time_after((unsigned long)lastuse, nhe->reported_lastuse)) { + neigh_used = true; + break; + } + } + } + mutex_unlock(&esw->offloads.encap_tbl_lock); + + mlx5e_put_flow_list(priv, &flow_list); + if (neigh_used) { + /* release current encap before breaking the loop */ + mlx5e_encap_put(priv, e); + break; + } + } + + trace_mlx5e_tc_update_neigh_used_value(nhe, neigh_used); + + if (neigh_used) { + nhe->reported_lastuse = jiffies; + + /* find the relevant neigh according to the cached device and + * dst ip pair + */ + n = neigh_lookup(tbl, &m_neigh->dst_ip, READ_ONCE(nhe->neigh_dev)); + if (!n) + return; + + neigh_event_send(n, NULL); + neigh_release(n); + } +} + +static void mlx5e_encap_dealloc(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e) +{ + WARN_ON(!list_empty(&e->flows)); + + if (e->compl_result > 0) { + mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e); + + if (e->flags & MLX5_ENCAP_ENTRY_VALID) + mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat); + } + + kfree(e->tun_info); + kfree(e->encap_header); + kfree_rcu(e, rcu); +} + +static void mlx5e_decap_dealloc(struct mlx5e_priv *priv, + struct mlx5e_decap_entry *d) +{ + WARN_ON(!list_empty(&d->flows)); + + if (!d->compl_result) + mlx5_packet_reformat_dealloc(priv->mdev, d->pkt_reformat); + + kfree_rcu(d, rcu); +} + +void mlx5e_encap_put(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + + if (!refcount_dec_and_mutex_lock(&e->refcnt, &esw->offloads.encap_tbl_lock)) + return; + list_del(&e->route_list); + hash_del_rcu(&e->encap_hlist); + mutex_unlock(&esw->offloads.encap_tbl_lock); + + mlx5e_encap_dealloc(priv, e); +} + +static void mlx5e_decap_put(struct mlx5e_priv *priv, struct mlx5e_decap_entry *d) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + + if (!refcount_dec_and_mutex_lock(&d->refcnt, &esw->offloads.decap_tbl_lock)) + return; + hash_del_rcu(&d->hlist); + mutex_unlock(&esw->offloads.decap_tbl_lock); + + mlx5e_decap_dealloc(priv, d); +} + +static void mlx5e_detach_encap_route(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, + int out_index); + +void mlx5e_detach_encap(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, + struct mlx5_flow_attr *attr, + int out_index) +{ + struct mlx5e_encap_entry *e = flow->encaps[out_index].e; + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + + if (!mlx5e_is_eswitch_flow(flow)) + return; + + if (attr->esw_attr->dests[out_index].flags & + MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE) + mlx5e_detach_encap_route(priv, flow, out_index); + + /* flow wasn't fully initialized */ + if (!e) + return; + + mutex_lock(&esw->offloads.encap_tbl_lock); + list_del(&flow->encaps[out_index].list); + flow->encaps[out_index].e = NULL; + if (!refcount_dec_and_test(&e->refcnt)) { + mutex_unlock(&esw->offloads.encap_tbl_lock); + return; + } + list_del(&e->route_list); + hash_del_rcu(&e->encap_hlist); + mutex_unlock(&esw->offloads.encap_tbl_lock); + + mlx5e_encap_dealloc(priv, e); +} + +void mlx5e_detach_decap(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5e_decap_entry *d = flow->decap_reformat; + + if (!d) + return; + + mutex_lock(&esw->offloads.decap_tbl_lock); + list_del(&flow->l3_to_l2_reformat); + flow->decap_reformat = NULL; + + if (!refcount_dec_and_test(&d->refcnt)) { + mutex_unlock(&esw->offloads.decap_tbl_lock); + return; + } + hash_del_rcu(&d->hlist); + mutex_unlock(&esw->offloads.decap_tbl_lock); + + mlx5e_decap_dealloc(priv, d); +} + +bool mlx5e_tc_tun_encap_info_equal_generic(struct mlx5e_encap_key *a, + struct mlx5e_encap_key *b) +{ + return memcmp(a->ip_tun_key, b->ip_tun_key, sizeof(*a->ip_tun_key)) == 0 && + a->tc_tunnel->tunnel_type == b->tc_tunnel->tunnel_type; +} + +static int cmp_decap_info(struct mlx5e_decap_key *a, + struct mlx5e_decap_key *b) +{ + return memcmp(&a->key, &b->key, sizeof(b->key)); +} + +static int hash_encap_info(struct mlx5e_encap_key *key) +{ + return jhash(key->ip_tun_key, sizeof(*key->ip_tun_key), + key->tc_tunnel->tunnel_type); +} + +static int hash_decap_info(struct mlx5e_decap_key *key) +{ + return jhash(&key->key, sizeof(key->key), 0); +} + +bool mlx5e_encap_take(struct mlx5e_encap_entry *e) +{ + return refcount_inc_not_zero(&e->refcnt); +} + +static bool mlx5e_decap_take(struct mlx5e_decap_entry *e) +{ + return refcount_inc_not_zero(&e->refcnt); +} + +static struct mlx5e_encap_entry * +mlx5e_encap_get(struct mlx5e_priv *priv, struct mlx5e_encap_key *key, + uintptr_t hash_key) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5e_encap_key e_key; + struct mlx5e_encap_entry *e; + + hash_for_each_possible_rcu(esw->offloads.encap_tbl, e, + encap_hlist, hash_key) { + e_key.ip_tun_key = &e->tun_info->key; + e_key.tc_tunnel = e->tunnel; + if (e->tunnel->encap_info_equal(&e_key, key) && + mlx5e_encap_take(e)) + return e; + } + + return NULL; +} + +static struct mlx5e_decap_entry * +mlx5e_decap_get(struct mlx5e_priv *priv, struct mlx5e_decap_key *key, + uintptr_t hash_key) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5e_decap_key r_key; + struct mlx5e_decap_entry *e; + + hash_for_each_possible_rcu(esw->offloads.decap_tbl, e, + hlist, hash_key) { + r_key = e->key; + if (!cmp_decap_info(&r_key, key) && + mlx5e_decap_take(e)) + return e; + } + return NULL; +} + +struct ip_tunnel_info *mlx5e_dup_tun_info(const struct ip_tunnel_info *tun_info) +{ + size_t tun_size = sizeof(*tun_info) + tun_info->options_len; + + return kmemdup(tun_info, tun_size, GFP_KERNEL); +} + +static bool is_duplicated_encap_entry(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, + int out_index, + struct mlx5e_encap_entry *e, + struct netlink_ext_ack *extack) +{ + int i; + + for (i = 0; i < out_index; i++) { + if (flow->encaps[i].e != e) + continue; + NL_SET_ERR_MSG_MOD(extack, "can't duplicate encap action"); + netdev_err(priv->netdev, "can't duplicate encap action\n"); + return true; + } + + return false; +} + +static int mlx5e_set_vf_tunnel(struct mlx5_eswitch *esw, + struct mlx5_flow_attr *attr, + struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts, + struct net_device *out_dev, + int route_dev_ifindex, + int out_index) +{ + struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; + struct net_device *route_dev; + u16 vport_num; + int err = 0; + u32 data; + + route_dev = dev_get_by_index(dev_net(out_dev), route_dev_ifindex); + + if (!route_dev || route_dev->netdev_ops != &mlx5e_netdev_ops || + !mlx5e_tc_is_vf_tunnel(out_dev, route_dev)) + goto out; + + err = mlx5e_tc_query_route_vport(out_dev, route_dev, &vport_num); + if (err) + goto out; + + attr->dest_chain = 0; + attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; + esw_attr->dests[out_index].flags |= MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE; + data = mlx5_eswitch_get_vport_metadata_for_set(esw_attr->in_mdev->priv.eswitch, + vport_num); + err = mlx5e_tc_match_to_reg_set_and_get_id(esw->dev, mod_hdr_acts, + MLX5_FLOW_NAMESPACE_FDB, + VPORT_TO_REG, data); + if (err >= 0) { + esw_attr->dests[out_index].src_port_rewrite_act_id = err; + err = 0; + } + +out: + if (route_dev) + dev_put(route_dev); + return err; +} + +static int mlx5e_update_vf_tunnel(struct mlx5_eswitch *esw, + struct mlx5_esw_flow_attr *attr, + struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts, + struct net_device *out_dev, + int route_dev_ifindex, + int out_index) +{ + int act_id = attr->dests[out_index].src_port_rewrite_act_id; + struct net_device *route_dev; + u16 vport_num; + int err = 0; + u32 data; + + route_dev = dev_get_by_index(dev_net(out_dev), route_dev_ifindex); + + if (!route_dev || route_dev->netdev_ops != &mlx5e_netdev_ops || + !mlx5e_tc_is_vf_tunnel(out_dev, route_dev)) { + err = -ENODEV; + goto out; + } + + err = mlx5e_tc_query_route_vport(out_dev, route_dev, &vport_num); + if (err) + goto out; + + data = mlx5_eswitch_get_vport_metadata_for_set(attr->in_mdev->priv.eswitch, + vport_num); + mlx5e_tc_match_to_reg_mod_hdr_change(esw->dev, mod_hdr_acts, VPORT_TO_REG, act_id, data); + +out: + if (route_dev) + dev_put(route_dev); + return err; +} + +static unsigned int mlx5e_route_tbl_get_last_update(struct mlx5e_priv *priv) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5_rep_uplink_priv *uplink_priv; + struct mlx5e_rep_priv *uplink_rpriv; + struct mlx5e_tc_tun_encap *encap; + unsigned int ret; + + uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); + uplink_priv = &uplink_rpriv->uplink_priv; + encap = uplink_priv->encap; + + spin_lock_bh(&encap->route_lock); + ret = encap->route_tbl_last_update; + spin_unlock_bh(&encap->route_lock); + return ret; +} + +static int mlx5e_attach_encap_route(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, + struct mlx5_flow_attr *attr, + struct mlx5e_encap_entry *e, + bool new_encap_entry, + unsigned long tbl_time_before, + int out_index); + +int mlx5e_attach_encap(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, + struct mlx5_flow_attr *attr, + struct net_device *mirred_dev, + int out_index, + struct netlink_ext_ack *extack, + struct net_device **encap_dev) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5e_tc_flow_parse_attr *parse_attr; + const struct ip_tunnel_info *tun_info; + const struct mlx5e_mpls_info *mpls_info; + unsigned long tbl_time_before = 0; + struct mlx5e_encap_entry *e; + struct mlx5e_encap_key key; + bool entry_created = false; + unsigned short family; + uintptr_t hash_key; + int err = 0; + + parse_attr = attr->parse_attr; + tun_info = parse_attr->tun_info[out_index]; + mpls_info = &parse_attr->mpls_info[out_index]; + family = ip_tunnel_info_af(tun_info); + key.ip_tun_key = &tun_info->key; + key.tc_tunnel = mlx5e_get_tc_tun(mirred_dev); + if (!key.tc_tunnel) { + NL_SET_ERR_MSG_MOD(extack, "Unsupported tunnel"); + return -EOPNOTSUPP; + } + + hash_key = hash_encap_info(&key); + + mutex_lock(&esw->offloads.encap_tbl_lock); + e = mlx5e_encap_get(priv, &key, hash_key); + + /* must verify if encap is valid or not */ + if (e) { + /* Check that entry was not already attached to this flow */ + if (is_duplicated_encap_entry(priv, flow, out_index, e, extack)) { + err = -EOPNOTSUPP; + goto out_err; + } + + mutex_unlock(&esw->offloads.encap_tbl_lock); + wait_for_completion(&e->res_ready); + + /* Protect against concurrent neigh update. */ + mutex_lock(&esw->offloads.encap_tbl_lock); + if (e->compl_result < 0) { + err = -EREMOTEIO; + goto out_err; + } + goto attach_flow; + } + + e = kzalloc(sizeof(*e), GFP_KERNEL); + if (!e) { + err = -ENOMEM; + goto out_err; + } + + refcount_set(&e->refcnt, 1); + init_completion(&e->res_ready); + entry_created = true; + INIT_LIST_HEAD(&e->route_list); + + tun_info = mlx5e_dup_tun_info(tun_info); + if (!tun_info) { + err = -ENOMEM; + goto out_err_init; + } + e->tun_info = tun_info; + memcpy(&e->mpls_info, mpls_info, sizeof(*mpls_info)); + err = mlx5e_tc_tun_init_encap_attr(mirred_dev, priv, e, extack); + if (err) + goto out_err_init; + + INIT_LIST_HEAD(&e->flows); + hash_add_rcu(esw->offloads.encap_tbl, &e->encap_hlist, hash_key); + tbl_time_before = mlx5e_route_tbl_get_last_update(priv); + mutex_unlock(&esw->offloads.encap_tbl_lock); + + if (family == AF_INET) + err = mlx5e_tc_tun_create_header_ipv4(priv, mirred_dev, e); + else if (family == AF_INET6) + err = mlx5e_tc_tun_create_header_ipv6(priv, mirred_dev, e); + + /* Protect against concurrent neigh update. */ + mutex_lock(&esw->offloads.encap_tbl_lock); + complete_all(&e->res_ready); + if (err) { + e->compl_result = err; + goto out_err; + } + e->compl_result = 1; + +attach_flow: + err = mlx5e_attach_encap_route(priv, flow, attr, e, entry_created, + tbl_time_before, out_index); + if (err) + goto out_err; + + err = mlx5e_set_int_port_tunnel(priv, attr, e, out_index); + if (err == -EOPNOTSUPP) { + /* If device doesn't support int port offload, + * redirect to uplink vport. + */ + mlx5_core_dbg(priv->mdev, "attaching int port as encap dev not supported, using uplink\n"); + err = 0; + } else if (err) { + goto out_err; + } + + flow->encaps[out_index].e = e; + list_add(&flow->encaps[out_index].list, &e->flows); + flow->encaps[out_index].index = out_index; + *encap_dev = e->out_dev; + if (e->flags & MLX5_ENCAP_ENTRY_VALID) { + attr->esw_attr->dests[out_index].pkt_reformat = e->pkt_reformat; + attr->esw_attr->dests[out_index].flags |= MLX5_ESW_DEST_ENCAP_VALID; + } else { + flow_flag_set(flow, SLOW); + } + mutex_unlock(&esw->offloads.encap_tbl_lock); + + return err; + +out_err: + mutex_unlock(&esw->offloads.encap_tbl_lock); + if (e) + mlx5e_encap_put(priv, e); + return err; + +out_err_init: + mutex_unlock(&esw->offloads.encap_tbl_lock); + kfree(tun_info); + kfree(e); + return err; +} + +int mlx5e_attach_decap(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, + struct netlink_ext_ack *extack) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5_esw_flow_attr *attr = flow->attr->esw_attr; + struct mlx5_pkt_reformat_params reformat_params; + struct mlx5e_decap_entry *d; + struct mlx5e_decap_key key; + uintptr_t hash_key; + int err = 0; + + if (sizeof(attr->eth) > MLX5_CAP_ESW(priv->mdev, max_encap_header_size)) { + NL_SET_ERR_MSG_MOD(extack, + "encap header larger than max supported"); + return -EOPNOTSUPP; + } + + key.key = attr->eth; + hash_key = hash_decap_info(&key); + mutex_lock(&esw->offloads.decap_tbl_lock); + d = mlx5e_decap_get(priv, &key, hash_key); + if (d) { + mutex_unlock(&esw->offloads.decap_tbl_lock); + wait_for_completion(&d->res_ready); + mutex_lock(&esw->offloads.decap_tbl_lock); + if (d->compl_result) { + err = -EREMOTEIO; + goto out_free; + } + goto found; + } + + d = kzalloc(sizeof(*d), GFP_KERNEL); + if (!d) { + err = -ENOMEM; + goto out_err; + } + + d->key = key; + refcount_set(&d->refcnt, 1); + init_completion(&d->res_ready); + INIT_LIST_HEAD(&d->flows); + hash_add_rcu(esw->offloads.decap_tbl, &d->hlist, hash_key); + mutex_unlock(&esw->offloads.decap_tbl_lock); + + memset(&reformat_params, 0, sizeof(reformat_params)); + reformat_params.type = MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2; + reformat_params.size = sizeof(attr->eth); + reformat_params.data = &attr->eth; + d->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev, + &reformat_params, + MLX5_FLOW_NAMESPACE_FDB); + if (IS_ERR(d->pkt_reformat)) { + err = PTR_ERR(d->pkt_reformat); + d->compl_result = err; + } + mutex_lock(&esw->offloads.decap_tbl_lock); + complete_all(&d->res_ready); + if (err) + goto out_free; + +found: + flow->decap_reformat = d; + attr->decap_pkt_reformat = d->pkt_reformat; + list_add(&flow->l3_to_l2_reformat, &d->flows); + mutex_unlock(&esw->offloads.decap_tbl_lock); + return 0; + +out_free: + mutex_unlock(&esw->offloads.decap_tbl_lock); + mlx5e_decap_put(priv, d); + return err; + +out_err: + mutex_unlock(&esw->offloads.decap_tbl_lock); + return err; +} + +static int cmp_route_info(struct mlx5e_route_key *a, + struct mlx5e_route_key *b) +{ + if (a->ip_version == 4 && b->ip_version == 4) + return memcmp(&a->endpoint_ip.v4, &b->endpoint_ip.v4, + sizeof(a->endpoint_ip.v4)); + else if (a->ip_version == 6 && b->ip_version == 6) + return memcmp(&a->endpoint_ip.v6, &b->endpoint_ip.v6, + sizeof(a->endpoint_ip.v6)); + return 1; +} + +static u32 hash_route_info(struct mlx5e_route_key *key) +{ + if (key->ip_version == 4) + return jhash(&key->endpoint_ip.v4, sizeof(key->endpoint_ip.v4), 0); + return jhash(&key->endpoint_ip.v6, sizeof(key->endpoint_ip.v6), 0); +} + +static void mlx5e_route_dealloc(struct mlx5e_priv *priv, + struct mlx5e_route_entry *r) +{ + WARN_ON(!list_empty(&r->decap_flows)); + WARN_ON(!list_empty(&r->encap_entries)); + + kfree_rcu(r, rcu); +} + +static void mlx5e_route_put(struct mlx5e_priv *priv, struct mlx5e_route_entry *r) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + + if (!refcount_dec_and_mutex_lock(&r->refcnt, &esw->offloads.encap_tbl_lock)) + return; + + hash_del_rcu(&r->hlist); + mutex_unlock(&esw->offloads.encap_tbl_lock); + + mlx5e_route_dealloc(priv, r); +} + +static void mlx5e_route_put_locked(struct mlx5e_priv *priv, struct mlx5e_route_entry *r) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + + lockdep_assert_held(&esw->offloads.encap_tbl_lock); + + if (!refcount_dec_and_test(&r->refcnt)) + return; + hash_del_rcu(&r->hlist); + mlx5e_route_dealloc(priv, r); +} + +static struct mlx5e_route_entry * +mlx5e_route_get(struct mlx5e_tc_tun_encap *encap, struct mlx5e_route_key *key, + u32 hash_key) +{ + struct mlx5e_route_key r_key; + struct mlx5e_route_entry *r; + + hash_for_each_possible(encap->route_tbl, r, hlist, hash_key) { + r_key = r->key; + if (!cmp_route_info(&r_key, key) && + refcount_inc_not_zero(&r->refcnt)) + return r; + } + return NULL; +} + +static struct mlx5e_route_entry * +mlx5e_route_get_create(struct mlx5e_priv *priv, + struct mlx5e_route_key *key, + int tunnel_dev_index, + unsigned long *route_tbl_change_time) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5_rep_uplink_priv *uplink_priv; + struct mlx5e_rep_priv *uplink_rpriv; + struct mlx5e_tc_tun_encap *encap; + struct mlx5e_route_entry *r; + u32 hash_key; + + uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); + uplink_priv = &uplink_rpriv->uplink_priv; + encap = uplink_priv->encap; + + hash_key = hash_route_info(key); + spin_lock_bh(&encap->route_lock); + r = mlx5e_route_get(encap, key, hash_key); + spin_unlock_bh(&encap->route_lock); + if (r) { + if (!mlx5e_route_entry_valid(r)) { + mlx5e_route_put_locked(priv, r); + return ERR_PTR(-EINVAL); + } + return r; + } + + r = kzalloc(sizeof(*r), GFP_KERNEL); + if (!r) + return ERR_PTR(-ENOMEM); + + r->key = *key; + r->flags |= MLX5E_ROUTE_ENTRY_VALID; + r->tunnel_dev_index = tunnel_dev_index; + refcount_set(&r->refcnt, 1); + INIT_LIST_HEAD(&r->decap_flows); + INIT_LIST_HEAD(&r->encap_entries); + + spin_lock_bh(&encap->route_lock); + *route_tbl_change_time = encap->route_tbl_last_update; + hash_add(encap->route_tbl, &r->hlist, hash_key); + spin_unlock_bh(&encap->route_lock); + + return r; +} + +static struct mlx5e_route_entry * +mlx5e_route_lookup_for_update(struct mlx5e_tc_tun_encap *encap, struct mlx5e_route_key *key) +{ + u32 hash_key = hash_route_info(key); + struct mlx5e_route_entry *r; + + spin_lock_bh(&encap->route_lock); + encap->route_tbl_last_update = jiffies; + r = mlx5e_route_get(encap, key, hash_key); + spin_unlock_bh(&encap->route_lock); + + return r; +} + +struct mlx5e_tc_fib_event_data { + struct work_struct work; + unsigned long event; + struct mlx5e_route_entry *r; + struct net_device *ul_dev; +}; + +static void mlx5e_tc_fib_event_work(struct work_struct *work); +static struct mlx5e_tc_fib_event_data * +mlx5e_tc_init_fib_work(unsigned long event, struct net_device *ul_dev, gfp_t flags) +{ + struct mlx5e_tc_fib_event_data *fib_work; + + fib_work = kzalloc(sizeof(*fib_work), flags); + if (WARN_ON(!fib_work)) + return NULL; + + INIT_WORK(&fib_work->work, mlx5e_tc_fib_event_work); + fib_work->event = event; + fib_work->ul_dev = ul_dev; + + return fib_work; +} + +static int +mlx5e_route_enqueue_update(struct mlx5e_priv *priv, + struct mlx5e_route_entry *r, + unsigned long event) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5e_tc_fib_event_data *fib_work; + struct mlx5e_rep_priv *uplink_rpriv; + struct net_device *ul_dev; + + uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); + ul_dev = uplink_rpriv->netdev; + + fib_work = mlx5e_tc_init_fib_work(event, ul_dev, GFP_KERNEL); + if (!fib_work) + return -ENOMEM; + + dev_hold(ul_dev); + refcount_inc(&r->refcnt); + fib_work->r = r; + queue_work(priv->wq, &fib_work->work); + + return 0; +} + +int mlx5e_attach_decap_route(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + unsigned long tbl_time_before, tbl_time_after; + struct mlx5e_tc_flow_parse_attr *parse_attr; + struct mlx5_flow_attr *attr = flow->attr; + struct mlx5_esw_flow_attr *esw_attr; + struct mlx5e_route_entry *r; + struct mlx5e_route_key key; + int err = 0; + + esw_attr = attr->esw_attr; + parse_attr = attr->parse_attr; + mutex_lock(&esw->offloads.encap_tbl_lock); + if (!esw_attr->rx_tun_attr) + goto out; + + tbl_time_before = mlx5e_route_tbl_get_last_update(priv); + tbl_time_after = tbl_time_before; + err = mlx5e_tc_tun_route_lookup(priv, &parse_attr->spec, attr, parse_attr->filter_dev); + if (err || !esw_attr->rx_tun_attr->decap_vport) + goto out; + + key.ip_version = attr->tun_ip_version; + if (key.ip_version == 4) + key.endpoint_ip.v4 = esw_attr->rx_tun_attr->dst_ip.v4; + else + key.endpoint_ip.v6 = esw_attr->rx_tun_attr->dst_ip.v6; + + r = mlx5e_route_get_create(priv, &key, parse_attr->filter_dev->ifindex, + &tbl_time_after); + if (IS_ERR(r)) { + err = PTR_ERR(r); + goto out; + } + /* Routing changed concurrently. FIB event handler might have missed new + * entry, schedule update. + */ + if (tbl_time_before != tbl_time_after) { + err = mlx5e_route_enqueue_update(priv, r, FIB_EVENT_ENTRY_REPLACE); + if (err) { + mlx5e_route_put_locked(priv, r); + goto out; + } + } + + flow->decap_route = r; + list_add(&flow->decap_routes, &r->decap_flows); + mutex_unlock(&esw->offloads.encap_tbl_lock); + return 0; + +out: + mutex_unlock(&esw->offloads.encap_tbl_lock); + return err; +} + +static int mlx5e_attach_encap_route(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, + struct mlx5_flow_attr *attr, + struct mlx5e_encap_entry *e, + bool new_encap_entry, + unsigned long tbl_time_before, + int out_index) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + unsigned long tbl_time_after = tbl_time_before; + struct mlx5e_tc_flow_parse_attr *parse_attr; + const struct ip_tunnel_info *tun_info; + struct mlx5_esw_flow_attr *esw_attr; + struct mlx5e_route_entry *r; + struct mlx5e_route_key key; + unsigned short family; + int err = 0; + + esw_attr = attr->esw_attr; + parse_attr = attr->parse_attr; + tun_info = parse_attr->tun_info[out_index]; + family = ip_tunnel_info_af(tun_info); + + if (family == AF_INET) { + key.endpoint_ip.v4 = tun_info->key.u.ipv4.src; + key.ip_version = 4; + } else if (family == AF_INET6) { + key.endpoint_ip.v6 = tun_info->key.u.ipv6.src; + key.ip_version = 6; + } + + err = mlx5e_set_vf_tunnel(esw, attr, &parse_attr->mod_hdr_acts, e->out_dev, + e->route_dev_ifindex, out_index); + if (err || !(esw_attr->dests[out_index].flags & + MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE)) + return err; + + r = mlx5e_route_get_create(priv, &key, parse_attr->mirred_ifindex[out_index], + &tbl_time_after); + if (IS_ERR(r)) + return PTR_ERR(r); + /* Routing changed concurrently. FIB event handler might have missed new + * entry, schedule update. + */ + if (tbl_time_before != tbl_time_after) { + err = mlx5e_route_enqueue_update(priv, r, FIB_EVENT_ENTRY_REPLACE); + if (err) { + mlx5e_route_put_locked(priv, r); + return err; + } + } + + flow->encap_routes[out_index].r = r; + if (new_encap_entry) + list_add(&e->route_list, &r->encap_entries); + flow->encap_routes[out_index].index = out_index; + return 0; +} + +void mlx5e_detach_decap_route(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5e_route_entry *r = flow->decap_route; + + if (!r) + return; + + mutex_lock(&esw->offloads.encap_tbl_lock); + list_del(&flow->decap_routes); + flow->decap_route = NULL; + + if (!refcount_dec_and_test(&r->refcnt)) { + mutex_unlock(&esw->offloads.encap_tbl_lock); + return; + } + hash_del_rcu(&r->hlist); + mutex_unlock(&esw->offloads.encap_tbl_lock); + + mlx5e_route_dealloc(priv, r); +} + +static void mlx5e_detach_encap_route(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, + int out_index) +{ + struct mlx5e_route_entry *r = flow->encap_routes[out_index].r; + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5e_encap_entry *e, *tmp; + + if (!r) + return; + + mutex_lock(&esw->offloads.encap_tbl_lock); + flow->encap_routes[out_index].r = NULL; + + if (!refcount_dec_and_test(&r->refcnt)) { + mutex_unlock(&esw->offloads.encap_tbl_lock); + return; + } + list_for_each_entry_safe(e, tmp, &r->encap_entries, route_list) + list_del_init(&e->route_list); + hash_del_rcu(&r->hlist); + mutex_unlock(&esw->offloads.encap_tbl_lock); + + mlx5e_route_dealloc(priv, r); +} + +static void mlx5e_invalidate_encap(struct mlx5e_priv *priv, + struct mlx5e_encap_entry *e, + struct list_head *encap_flows) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5e_tc_flow *flow; + + list_for_each_entry(flow, encap_flows, tmp_list) { + struct mlx5_esw_flow_attr *esw_attr; + struct mlx5_flow_attr *attr; + + if (!mlx5e_is_offloaded_flow(flow)) + continue; + + attr = mlx5e_tc_get_encap_attr(flow); + esw_attr = attr->esw_attr; + + if (flow_flag_test(flow, SLOW)) + mlx5e_tc_unoffload_from_slow_path(esw, flow); + else + mlx5e_tc_unoffload_fdb_rules(esw, flow, flow->attr); + mlx5_modify_header_dealloc(priv->mdev, attr->modify_hdr); + attr->modify_hdr = NULL; + + esw_attr->dests[flow->tmp_entry_index].flags &= + ~MLX5_ESW_DEST_ENCAP_VALID; + esw_attr->dests[flow->tmp_entry_index].pkt_reformat = NULL; + } + + e->flags |= MLX5_ENCAP_ENTRY_NO_ROUTE; + if (e->flags & MLX5_ENCAP_ENTRY_VALID) { + e->flags &= ~MLX5_ENCAP_ENTRY_VALID; + mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat); + e->pkt_reformat = NULL; + } +} + +static void mlx5e_reoffload_encap(struct mlx5e_priv *priv, + struct net_device *tunnel_dev, + struct mlx5e_encap_entry *e, + struct list_head *encap_flows) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5e_tc_flow *flow; + int err; + + err = ip_tunnel_info_af(e->tun_info) == AF_INET ? + mlx5e_tc_tun_update_header_ipv4(priv, tunnel_dev, e) : + mlx5e_tc_tun_update_header_ipv6(priv, tunnel_dev, e); + if (err) + mlx5_core_warn(priv->mdev, "Failed to update encap header, %d", err); + e->flags &= ~MLX5_ENCAP_ENTRY_NO_ROUTE; + + list_for_each_entry(flow, encap_flows, tmp_list) { + struct mlx5e_tc_flow_parse_attr *parse_attr; + struct mlx5_esw_flow_attr *esw_attr; + struct mlx5_flow_handle *rule; + struct mlx5_flow_attr *attr; + struct mlx5_flow_spec *spec; + + if (flow_flag_test(flow, FAILED)) + continue; + + spec = &flow->attr->parse_attr->spec; + + attr = mlx5e_tc_get_encap_attr(flow); + esw_attr = attr->esw_attr; + parse_attr = attr->parse_attr; + + err = mlx5e_update_vf_tunnel(esw, esw_attr, &parse_attr->mod_hdr_acts, + e->out_dev, e->route_dev_ifindex, + flow->tmp_entry_index); + if (err) { + mlx5_core_warn(priv->mdev, "Failed to update VF tunnel err=%d", err); + continue; + } + + err = mlx5e_tc_add_flow_mod_hdr(priv, flow, attr); + if (err) { + mlx5_core_warn(priv->mdev, "Failed to update flow mod_hdr err=%d", + err); + continue; + } + + if (e->flags & MLX5_ENCAP_ENTRY_VALID) { + esw_attr->dests[flow->tmp_entry_index].pkt_reformat = e->pkt_reformat; + esw_attr->dests[flow->tmp_entry_index].flags |= MLX5_ESW_DEST_ENCAP_VALID; + if (!mlx5e_tc_flow_all_encaps_valid(esw_attr)) + goto offload_to_slow_path; + + err = mlx5e_tc_offload_flow_post_acts(flow); + if (err) { + mlx5_core_warn(priv->mdev, "Failed to update flow post acts, %d\n", + err); + goto offload_to_slow_path; + } + + /* update from slow path rule to encap rule */ + rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, flow->attr); + if (IS_ERR(rule)) { + mlx5e_tc_unoffload_flow_post_acts(flow); + err = PTR_ERR(rule); + mlx5_core_warn(priv->mdev, "Failed to update cached encapsulation flow, %d\n", + err); + } else { + flow->rule[0] = rule; + } + } else { +offload_to_slow_path: + rule = mlx5e_tc_offload_to_slow_path(esw, flow, spec); + /* mark the flow's encap dest as non-valid */ + esw_attr->dests[flow->tmp_entry_index].flags &= + ~MLX5_ESW_DEST_ENCAP_VALID; + + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + mlx5_core_warn(priv->mdev, "Failed to update slow path (encap) flow, %d\n", + err); + } else { + flow->rule[0] = rule; + } + } + flow_flag_set(flow, OFFLOADED); + } +} + +static int mlx5e_update_route_encaps(struct mlx5e_priv *priv, + struct mlx5e_route_entry *r, + struct list_head *flow_list, + bool replace) +{ + struct net_device *tunnel_dev; + struct mlx5e_encap_entry *e; + + tunnel_dev = __dev_get_by_index(dev_net(priv->netdev), r->tunnel_dev_index); + if (!tunnel_dev) + return -ENODEV; + + list_for_each_entry(e, &r->encap_entries, route_list) { + LIST_HEAD(encap_flows); + + mlx5e_take_all_encap_flows(e, &encap_flows); + if (list_empty(&encap_flows)) + continue; + + if (mlx5e_route_entry_valid(r)) + mlx5e_invalidate_encap(priv, e, &encap_flows); + + if (!replace) { + list_splice(&encap_flows, flow_list); + continue; + } + + mlx5e_reoffload_encap(priv, tunnel_dev, e, &encap_flows); + list_splice(&encap_flows, flow_list); + } + + return 0; +} + +static void mlx5e_unoffload_flow_list(struct mlx5e_priv *priv, + struct list_head *flow_list) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5e_tc_flow *flow; + + list_for_each_entry(flow, flow_list, tmp_list) + if (mlx5e_is_offloaded_flow(flow)) + mlx5e_tc_unoffload_fdb_rules(esw, flow, flow->attr); +} + +static void mlx5e_reoffload_decap(struct mlx5e_priv *priv, + struct list_head *decap_flows) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5e_tc_flow *flow; + + list_for_each_entry(flow, decap_flows, tmp_list) { + struct mlx5e_tc_flow_parse_attr *parse_attr; + struct mlx5_flow_attr *attr = flow->attr; + struct mlx5_flow_handle *rule; + struct mlx5_flow_spec *spec; + int err; + + if (flow_flag_test(flow, FAILED)) + continue; + + parse_attr = attr->parse_attr; + spec = &parse_attr->spec; + err = mlx5e_tc_tun_route_lookup(priv, spec, attr, parse_attr->filter_dev); + if (err) { + mlx5_core_warn(priv->mdev, "Failed to lookup route for flow, %d\n", + err); + continue; + } + + rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, attr); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + mlx5_core_warn(priv->mdev, "Failed to update cached decap flow, %d\n", + err); + } else { + flow->rule[0] = rule; + flow_flag_set(flow, OFFLOADED); + } + } +} + +static int mlx5e_update_route_decap_flows(struct mlx5e_priv *priv, + struct mlx5e_route_entry *r, + struct list_head *flow_list, + bool replace) +{ + struct net_device *tunnel_dev; + LIST_HEAD(decap_flows); + + tunnel_dev = __dev_get_by_index(dev_net(priv->netdev), r->tunnel_dev_index); + if (!tunnel_dev) + return -ENODEV; + + mlx5e_take_all_route_decap_flows(r, &decap_flows); + if (mlx5e_route_entry_valid(r)) + mlx5e_unoffload_flow_list(priv, &decap_flows); + if (replace) + mlx5e_reoffload_decap(priv, &decap_flows); + + list_splice(&decap_flows, flow_list); + + return 0; +} + +static void mlx5e_tc_fib_event_work(struct work_struct *work) +{ + struct mlx5e_tc_fib_event_data *event_data = + container_of(work, struct mlx5e_tc_fib_event_data, work); + struct net_device *ul_dev = event_data->ul_dev; + struct mlx5e_priv *priv = netdev_priv(ul_dev); + struct mlx5e_route_entry *r = event_data->r; + struct mlx5_eswitch *esw; + LIST_HEAD(flow_list); + bool replace; + int err; + + /* sync with concurrent neigh updates */ + rtnl_lock(); + esw = priv->mdev->priv.eswitch; + mutex_lock(&esw->offloads.encap_tbl_lock); + replace = event_data->event == FIB_EVENT_ENTRY_REPLACE; + + if (!mlx5e_route_entry_valid(r) && !replace) + goto out; + + err = mlx5e_update_route_encaps(priv, r, &flow_list, replace); + if (err) + mlx5_core_warn(priv->mdev, "Failed to update route encaps, %d\n", + err); + + err = mlx5e_update_route_decap_flows(priv, r, &flow_list, replace); + if (err) + mlx5_core_warn(priv->mdev, "Failed to update route decap flows, %d\n", + err); + + if (replace) + r->flags |= MLX5E_ROUTE_ENTRY_VALID; +out: + mutex_unlock(&esw->offloads.encap_tbl_lock); + rtnl_unlock(); + + mlx5e_put_flow_list(priv, &flow_list); + mlx5e_route_put(priv, event_data->r); + dev_put(event_data->ul_dev); + kfree(event_data); +} + +static struct mlx5e_tc_fib_event_data * +mlx5e_init_fib_work_ipv4(struct mlx5e_priv *priv, + struct net_device *ul_dev, + struct mlx5e_tc_tun_encap *encap, + unsigned long event, + struct fib_notifier_info *info) +{ + struct fib_entry_notifier_info *fen_info; + struct mlx5e_tc_fib_event_data *fib_work; + struct mlx5e_route_entry *r; + struct mlx5e_route_key key; + struct net_device *fib_dev; + + fen_info = container_of(info, struct fib_entry_notifier_info, info); + if (fen_info->fi->nh) + return NULL; + fib_dev = fib_info_nh(fen_info->fi, 0)->fib_nh_dev; + if (!fib_dev || fib_dev->netdev_ops != &mlx5e_netdev_ops || + fen_info->dst_len != 32) + return NULL; + + fib_work = mlx5e_tc_init_fib_work(event, ul_dev, GFP_ATOMIC); + if (!fib_work) + return ERR_PTR(-ENOMEM); + + key.endpoint_ip.v4 = htonl(fen_info->dst); + key.ip_version = 4; + + /* Can't fail after this point because releasing reference to r + * requires obtaining sleeping mutex which we can't do in atomic + * context. + */ + r = mlx5e_route_lookup_for_update(encap, &key); + if (!r) + goto out; + fib_work->r = r; + dev_hold(ul_dev); + + return fib_work; + +out: + kfree(fib_work); + return NULL; +} + +static struct mlx5e_tc_fib_event_data * +mlx5e_init_fib_work_ipv6(struct mlx5e_priv *priv, + struct net_device *ul_dev, + struct mlx5e_tc_tun_encap *encap, + unsigned long event, + struct fib_notifier_info *info) +{ + struct fib6_entry_notifier_info *fen_info; + struct mlx5e_tc_fib_event_data *fib_work; + struct mlx5e_route_entry *r; + struct mlx5e_route_key key; + struct net_device *fib_dev; + + fen_info = container_of(info, struct fib6_entry_notifier_info, info); + fib_dev = fib6_info_nh_dev(fen_info->rt); + if (fib_dev->netdev_ops != &mlx5e_netdev_ops || + fen_info->rt->fib6_dst.plen != 128) + return NULL; + + fib_work = mlx5e_tc_init_fib_work(event, ul_dev, GFP_ATOMIC); + if (!fib_work) + return ERR_PTR(-ENOMEM); + + memcpy(&key.endpoint_ip.v6, &fen_info->rt->fib6_dst.addr, + sizeof(fen_info->rt->fib6_dst.addr)); + key.ip_version = 6; + + /* Can't fail after this point because releasing reference to r + * requires obtaining sleeping mutex which we can't do in atomic + * context. + */ + r = mlx5e_route_lookup_for_update(encap, &key); + if (!r) + goto out; + fib_work->r = r; + dev_hold(ul_dev); + + return fib_work; + +out: + kfree(fib_work); + return NULL; +} + +static int mlx5e_tc_tun_fib_event(struct notifier_block *nb, unsigned long event, void *ptr) +{ + struct mlx5e_tc_fib_event_data *fib_work; + struct fib_notifier_info *info = ptr; + struct mlx5e_tc_tun_encap *encap; + struct net_device *ul_dev; + struct mlx5e_priv *priv; + + encap = container_of(nb, struct mlx5e_tc_tun_encap, fib_nb); + priv = encap->priv; + ul_dev = priv->netdev; + priv = netdev_priv(ul_dev); + + switch (event) { + case FIB_EVENT_ENTRY_REPLACE: + case FIB_EVENT_ENTRY_DEL: + if (info->family == AF_INET) + fib_work = mlx5e_init_fib_work_ipv4(priv, ul_dev, encap, event, info); + else if (info->family == AF_INET6) + fib_work = mlx5e_init_fib_work_ipv6(priv, ul_dev, encap, event, info); + else + return NOTIFY_DONE; + + if (!IS_ERR_OR_NULL(fib_work)) { + queue_work(priv->wq, &fib_work->work); + } else if (IS_ERR(fib_work)) { + NL_SET_ERR_MSG_MOD(info->extack, "Failed to init fib work"); + mlx5_core_warn(priv->mdev, "Failed to init fib work, %ld\n", + PTR_ERR(fib_work)); + } + + break; + default: + return NOTIFY_DONE; + } + + return NOTIFY_DONE; +} + +struct mlx5e_tc_tun_encap *mlx5e_tc_tun_init(struct mlx5e_priv *priv) +{ + struct mlx5e_tc_tun_encap *encap; + int err; + + encap = kvzalloc(sizeof(*encap), GFP_KERNEL); + if (!encap) + return ERR_PTR(-ENOMEM); + + encap->priv = priv; + encap->fib_nb.notifier_call = mlx5e_tc_tun_fib_event; + spin_lock_init(&encap->route_lock); + hash_init(encap->route_tbl); + err = register_fib_notifier(dev_net(priv->netdev), &encap->fib_nb, + NULL, NULL); + if (err) { + kvfree(encap); + return ERR_PTR(err); + } + + return encap; +} + +void mlx5e_tc_tun_cleanup(struct mlx5e_tc_tun_encap *encap) +{ + if (!encap) + return; + + unregister_fib_notifier(dev_net(encap->priv->netdev), &encap->fib_nb); + flush_workqueue(encap->priv->wq); /* flush fib event works */ + kvfree(encap); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.h new file mode 100644 index 000000000..8ad273dde --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.h @@ -0,0 +1,41 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2021 Mellanox Technologies. */ + +#ifndef __MLX5_EN_TC_TUN_ENCAP_H__ +#define __MLX5_EN_TC_TUN_ENCAP_H__ + +#include "tc_priv.h" + +void mlx5e_detach_encap(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, + struct mlx5_flow_attr *attr, + int out_index); + +int mlx5e_attach_encap(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, + struct mlx5_flow_attr *attr, + struct net_device *mirred_dev, + int out_index, + struct netlink_ext_ack *extack, + struct net_device **encap_dev); + +int mlx5e_attach_decap(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, + struct netlink_ext_ack *extack); +void mlx5e_detach_decap(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow); + +int mlx5e_attach_decap_route(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow); +void mlx5e_detach_decap_route(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow); + +struct ip_tunnel_info *mlx5e_dup_tun_info(const struct ip_tunnel_info *tun_info); + +int mlx5e_tc_set_attr_rx_tun(struct mlx5e_tc_flow *flow, + struct mlx5_flow_spec *spec); + +struct mlx5e_tc_tun_encap *mlx5e_tc_tun_init(struct mlx5e_priv *priv); +void mlx5e_tc_tun_cleanup(struct mlx5e_tc_tun_encap *encap); + +#endif /* __MLX5_EN_TC_TUN_ENCAP_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_geneve.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_geneve.c new file mode 100644 index 000000000..054d80c4e --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_geneve.c @@ -0,0 +1,375 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2018 Mellanox Technologies. */ + +#include <net/geneve.h> +#include "lib/geneve.h" +#include "en/tc_tun.h" + +#define MLX5E_GENEVE_VER 0 + +static bool mlx5e_tc_tun_can_offload_geneve(struct mlx5e_priv *priv) +{ + return !!(MLX5_CAP_GEN(priv->mdev, flex_parser_protocols) & MLX5_FLEX_PROTO_GENEVE); +} + +static int mlx5e_tc_tun_calc_hlen_geneve(struct mlx5e_encap_entry *e) +{ + return sizeof(struct udphdr) + + sizeof(struct genevehdr) + + e->tun_info->options_len; +} + +static int mlx5e_tc_tun_check_udp_dport_geneve(struct mlx5e_priv *priv, + struct flow_cls_offload *f) +{ + struct flow_rule *rule = flow_cls_offload_flow_rule(f); + struct netlink_ext_ack *extack = f->common.extack; + struct flow_match_ports enc_ports; + + if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_PORTS)) + return -EOPNOTSUPP; + + flow_rule_match_enc_ports(rule, &enc_ports); + + /* Currently we support only default GENEVE + * port, so udp dst port must match. + */ + if (be16_to_cpu(enc_ports.key->dst) != GENEVE_UDP_PORT) { + NL_SET_ERR_MSG_MOD(extack, + "Matched UDP dst port is not registered as a GENEVE port"); + netdev_warn(priv->netdev, + "UDP port %d is not registered as a GENEVE port\n", + be16_to_cpu(enc_ports.key->dst)); + return -EOPNOTSUPP; + } + + return 0; +} + +static int mlx5e_tc_tun_parse_udp_ports_geneve(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct flow_cls_offload *f, + void *headers_c, + void *headers_v) +{ + int err; + + err = mlx5e_tc_tun_parse_udp_ports(priv, spec, f, headers_c, headers_v); + if (err) + return err; + + return mlx5e_tc_tun_check_udp_dport_geneve(priv, f); +} + +static int mlx5e_tc_tun_init_encap_attr_geneve(struct net_device *tunnel_dev, + struct mlx5e_priv *priv, + struct mlx5e_encap_entry *e, + struct netlink_ext_ack *extack) +{ + e->tunnel = &geneve_tunnel; + + /* Reformat type for GENEVE encap is similar to VXLAN: + * in both cases the HW adds in the same place a + * defined encapsulation header that the SW provides. + */ + e->reformat_type = MLX5_REFORMAT_TYPE_L2_TO_VXLAN; + return 0; +} + +static void mlx5e_tunnel_id_to_vni(__be64 tun_id, __u8 *vni) +{ +#ifdef __BIG_ENDIAN + vni[0] = (__force __u8)(tun_id >> 16); + vni[1] = (__force __u8)(tun_id >> 8); + vni[2] = (__force __u8)tun_id; +#else + vni[0] = (__force __u8)((__force u64)tun_id >> 40); + vni[1] = (__force __u8)((__force u64)tun_id >> 48); + vni[2] = (__force __u8)((__force u64)tun_id >> 56); +#endif +} + +static int mlx5e_gen_ip_tunnel_header_geneve(char buf[], + __u8 *ip_proto, + struct mlx5e_encap_entry *e) +{ + const struct ip_tunnel_info *tun_info = e->tun_info; + struct udphdr *udp = (struct udphdr *)(buf); + struct genevehdr *geneveh; + + geneveh = (struct genevehdr *)((char *)udp + sizeof(struct udphdr)); + + *ip_proto = IPPROTO_UDP; + + udp->dest = tun_info->key.tp_dst; + + memset(geneveh, 0, sizeof(*geneveh)); + geneveh->ver = MLX5E_GENEVE_VER; + geneveh->opt_len = tun_info->options_len / 4; + geneveh->oam = !!(tun_info->key.tun_flags & TUNNEL_OAM); + geneveh->critical = !!(tun_info->key.tun_flags & TUNNEL_CRIT_OPT); + mlx5e_tunnel_id_to_vni(tun_info->key.tun_id, geneveh->vni); + geneveh->proto_type = htons(ETH_P_TEB); + + if (tun_info->key.tun_flags & TUNNEL_GENEVE_OPT) { + if (!geneveh->opt_len) + return -EOPNOTSUPP; + ip_tunnel_info_opts_get(geneveh->options, tun_info); + } + + return 0; +} + +static int mlx5e_tc_tun_parse_geneve_vni(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct flow_cls_offload *f) +{ + struct flow_rule *rule = flow_cls_offload_flow_rule(f); + struct netlink_ext_ack *extack = f->common.extack; + struct flow_match_enc_keyid enc_keyid; + void *misc_c, *misc_v; + + misc_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters); + misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters); + + if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID)) + return 0; + + flow_rule_match_enc_keyid(rule, &enc_keyid); + + if (!enc_keyid.mask->keyid) + return 0; + + if (!MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev, ft_field_support.outer_geneve_vni)) { + NL_SET_ERR_MSG_MOD(extack, "Matching on GENEVE VNI is not supported"); + netdev_warn(priv->netdev, "Matching on GENEVE VNI is not supported\n"); + return -EOPNOTSUPP; + } + + MLX5_SET(fte_match_set_misc, misc_c, geneve_vni, be32_to_cpu(enc_keyid.mask->keyid)); + MLX5_SET(fte_match_set_misc, misc_v, geneve_vni, be32_to_cpu(enc_keyid.key->keyid)); + + return 0; +} + +static int mlx5e_tc_tun_parse_geneve_options(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct flow_cls_offload *f) +{ + u8 max_tlv_option_data_len = MLX5_CAP_GEN(priv->mdev, max_geneve_tlv_option_data_len); + u8 max_tlv_options = MLX5_CAP_GEN(priv->mdev, max_geneve_tlv_options); + struct flow_rule *rule = flow_cls_offload_flow_rule(f); + struct netlink_ext_ack *extack = f->common.extack; + void *misc_c, *misc_v, *misc_3_c, *misc_3_v; + struct geneve_opt *option_key, *option_mask; + __be32 opt_data_key = 0, opt_data_mask = 0; + struct flow_match_enc_opts enc_opts; + int res = 0; + + misc_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters); + misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters); + misc_3_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters_3); + misc_3_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters_3); + + if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_OPTS)) + return 0; + + flow_rule_match_enc_opts(rule, &enc_opts); + + if (memchr_inv(&enc_opts.mask->data, 0, sizeof(enc_opts.mask->data)) && + !MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev, + ft_field_support.geneve_tlv_option_0_data)) { + NL_SET_ERR_MSG_MOD(extack, + "Matching on GENEVE options is not supported"); + netdev_warn(priv->netdev, + "Matching on GENEVE options is not supported\n"); + return -EOPNOTSUPP; + } + + /* make sure that we're talking about GENEVE options */ + + if (enc_opts.key->dst_opt_type != TUNNEL_GENEVE_OPT) { + NL_SET_ERR_MSG_MOD(extack, + "Matching on GENEVE options: option type is not GENEVE"); + netdev_warn(priv->netdev, + "Matching on GENEVE options: option type is not GENEVE\n"); + return -EOPNOTSUPP; + } + + if (enc_opts.mask->len && + !MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev, + ft_field_support.outer_geneve_opt_len)) { + NL_SET_ERR_MSG_MOD(extack, "Matching on GENEVE options len is not supported"); + netdev_warn(priv->netdev, + "Matching on GENEVE options len is not supported\n"); + return -EOPNOTSUPP; + } + + /* max_geneve_tlv_option_data_len comes in multiples of 4 bytes, and it + * doesn't include the TLV option header. 'geneve_opt_len' is a total + * len of all the options, including the headers, also multiples of 4 + * bytes. Len that comes from the dissector is in bytes. + */ + + if ((enc_opts.key->len / 4) > ((max_tlv_option_data_len + 1) * max_tlv_options)) { + NL_SET_ERR_MSG_MOD(extack, + "Matching on GENEVE options: unsupported options len"); + netdev_warn(priv->netdev, + "Matching on GENEVE options: unsupported options len (len=%d)\n", + enc_opts.key->len); + return -EOPNOTSUPP; + } + + MLX5_SET(fte_match_set_misc, misc_c, geneve_opt_len, enc_opts.mask->len / 4); + MLX5_SET(fte_match_set_misc, misc_v, geneve_opt_len, enc_opts.key->len / 4); + + /* we support matching on one option only, so just get it */ + option_key = (struct geneve_opt *)&enc_opts.key->data[0]; + option_mask = (struct geneve_opt *)&enc_opts.mask->data[0]; + + if (option_mask->opt_class == 0 && option_mask->type == 0 && + !memchr_inv(option_mask->opt_data, 0, option_mask->length * 4)) + return 0; + + if (option_key->length > max_tlv_option_data_len) { + NL_SET_ERR_MSG_MOD(extack, + "Matching on GENEVE options: unsupported option len"); + netdev_warn(priv->netdev, + "Matching on GENEVE options: unsupported option len (key=%d, mask=%d)\n", + option_key->length, option_mask->length); + return -EOPNOTSUPP; + } + + /* data can't be all 0 - fail to offload such rule */ + if (!memchr_inv(option_key->opt_data, 0, option_key->length * 4)) { + NL_SET_ERR_MSG_MOD(extack, + "Matching on GENEVE options: can't match on 0 data field"); + netdev_warn(priv->netdev, + "Matching on GENEVE options: can't match on 0 data field\n"); + return -EOPNOTSUPP; + } + + /* add new GENEVE TLV options object */ + res = mlx5_geneve_tlv_option_add(priv->mdev->geneve, option_key); + if (res) { + NL_SET_ERR_MSG_MOD(extack, + "Matching on GENEVE options: failed creating TLV opt object"); + netdev_warn(priv->netdev, + "Matching on GENEVE options: failed creating TLV opt object (class:type:len = 0x%x:0x%x:%d)\n", + be16_to_cpu(option_key->opt_class), + option_key->type, option_key->length); + return res; + } + + /* In general, after creating the object, need to query it + * in order to check which option data to set in misc3. + * But we support only geneve_tlv_option_0_data, so no + * point querying at this stage. + */ + + memcpy(&opt_data_key, option_key->opt_data, option_key->length * 4); + memcpy(&opt_data_mask, option_mask->opt_data, option_mask->length * 4); + MLX5_SET(fte_match_set_misc3, misc_3_v, + geneve_tlv_option_0_data, be32_to_cpu(opt_data_key)); + MLX5_SET(fte_match_set_misc3, misc_3_c, + geneve_tlv_option_0_data, be32_to_cpu(opt_data_mask)); + if (MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev, + ft_field_support.geneve_tlv_option_0_exist)) { + MLX5_SET_TO_ONES(fte_match_set_misc, misc_c, geneve_tlv_option_0_exist); + MLX5_SET_TO_ONES(fte_match_set_misc, misc_v, geneve_tlv_option_0_exist); + } + + spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_3; + + return 0; +} + +static int mlx5e_tc_tun_parse_geneve_params(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct flow_cls_offload *f) +{ + void *misc_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters); + void *misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters); + struct netlink_ext_ack *extack = f->common.extack; + + /* match on OAM - packets with OAM bit on should NOT be offloaded */ + + if (!MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev, ft_field_support.outer_geneve_oam)) { + NL_SET_ERR_MSG_MOD(extack, "Matching on GENEVE OAM is not supported"); + netdev_warn(priv->netdev, "Matching on GENEVE OAM is not supported\n"); + return -EOPNOTSUPP; + } + MLX5_SET_TO_ONES(fte_match_set_misc, misc_c, geneve_oam); + MLX5_SET(fte_match_set_misc, misc_v, geneve_oam, 0); + + /* Match on GENEVE protocol. We support only Transparent Eth Bridge. */ + + if (MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev, + ft_field_support.outer_geneve_protocol_type)) { + MLX5_SET_TO_ONES(fte_match_set_misc, misc_c, geneve_protocol_type); + MLX5_SET(fte_match_set_misc, misc_v, geneve_protocol_type, ETH_P_TEB); + } + + spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS; + + return 0; +} + +static int mlx5e_tc_tun_parse_geneve(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct flow_cls_offload *f, + void *headers_c, + void *headers_v) +{ + int err; + + err = mlx5e_tc_tun_parse_geneve_params(priv, spec, f); + if (err) + return err; + + err = mlx5e_tc_tun_parse_geneve_vni(priv, spec, f); + if (err) + return err; + + return mlx5e_tc_tun_parse_geneve_options(priv, spec, f); +} + +static bool mlx5e_tc_tun_encap_info_equal_geneve(struct mlx5e_encap_key *a, + struct mlx5e_encap_key *b) +{ + struct ip_tunnel_info *a_info; + struct ip_tunnel_info *b_info; + bool a_has_opts, b_has_opts; + + if (!mlx5e_tc_tun_encap_info_equal_generic(a, b)) + return false; + + a_has_opts = !!(a->ip_tun_key->tun_flags & TUNNEL_GENEVE_OPT); + b_has_opts = !!(b->ip_tun_key->tun_flags & TUNNEL_GENEVE_OPT); + + /* keys are equal when both don't have any options attached */ + if (!a_has_opts && !b_has_opts) + return true; + + if (a_has_opts != b_has_opts) + return false; + + /* geneve options stored in memory next to ip_tunnel_info struct */ + a_info = container_of(a->ip_tun_key, struct ip_tunnel_info, key); + b_info = container_of(b->ip_tun_key, struct ip_tunnel_info, key); + + return a_info->options_len == b_info->options_len && + memcmp(a_info + 1, b_info + 1, a_info->options_len) == 0; +} + +struct mlx5e_tc_tunnel geneve_tunnel = { + .tunnel_type = MLX5E_TC_TUNNEL_TYPE_GENEVE, + .match_level = MLX5_MATCH_L4, + .can_offload = mlx5e_tc_tun_can_offload_geneve, + .calc_hlen = mlx5e_tc_tun_calc_hlen_geneve, + .init_encap_attr = mlx5e_tc_tun_init_encap_attr_geneve, + .generate_ip_tun_hdr = mlx5e_gen_ip_tunnel_header_geneve, + .parse_udp_ports = mlx5e_tc_tun_parse_udp_ports_geneve, + .parse_tunnel = mlx5e_tc_tun_parse_geneve, + .encap_info_equal = mlx5e_tc_tun_encap_info_equal_geneve, +}; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_gre.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_gre.c new file mode 100644 index 000000000..ada14f057 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_gre.c @@ -0,0 +1,98 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2018 Mellanox Technologies. */ + +#include <net/gre.h> +#include "en/tc_tun.h" + +static bool mlx5e_tc_tun_can_offload_gretap(struct mlx5e_priv *priv) +{ + return !!MLX5_CAP_ESW(priv->mdev, nvgre_encap_decap); +} + +static int mlx5e_tc_tun_calc_hlen_gretap(struct mlx5e_encap_entry *e) +{ + return gre_calc_hlen(e->tun_info->key.tun_flags); +} + +static int mlx5e_tc_tun_init_encap_attr_gretap(struct net_device *tunnel_dev, + struct mlx5e_priv *priv, + struct mlx5e_encap_entry *e, + struct netlink_ext_ack *extack) +{ + e->tunnel = &gre_tunnel; + e->reformat_type = MLX5_REFORMAT_TYPE_L2_TO_NVGRE; + return 0; +} + +static int mlx5e_gen_ip_tunnel_header_gretap(char buf[], + __u8 *ip_proto, + struct mlx5e_encap_entry *e) +{ + const struct ip_tunnel_key *tun_key = &e->tun_info->key; + struct gre_base_hdr *greh = (struct gre_base_hdr *)(buf); + __be32 tun_id = tunnel_id_to_key32(tun_key->tun_id); + int hdr_len; + + *ip_proto = IPPROTO_GRE; + + /* the HW does not calculate GRE csum or sequences */ + if (tun_key->tun_flags & (TUNNEL_CSUM | TUNNEL_SEQ)) + return -EOPNOTSUPP; + + greh->protocol = htons(ETH_P_TEB); + + /* GRE key */ + hdr_len = mlx5e_tc_tun_calc_hlen_gretap(e); + greh->flags = gre_tnl_flags_to_gre_flags(tun_key->tun_flags); + if (tun_key->tun_flags & TUNNEL_KEY) { + __be32 *ptr = (__be32 *)(((u8 *)greh) + hdr_len - 4); + *ptr = tun_id; + } + + return 0; +} + +static int mlx5e_tc_tun_parse_gretap(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct flow_cls_offload *f, + void *headers_c, + void *headers_v) +{ + void *misc_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters); + void *misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters); + struct flow_rule *rule = flow_cls_offload_flow_rule(f); + + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ip_protocol); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol, IPPROTO_GRE); + + /* gre protocol */ + MLX5_SET_TO_ONES(fte_match_set_misc, misc_c, gre_protocol); + MLX5_SET(fte_match_set_misc, misc_v, gre_protocol, ETH_P_TEB); + + /* gre key */ + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID)) { + struct flow_match_enc_keyid enc_keyid; + + flow_rule_match_enc_keyid(rule, &enc_keyid); + MLX5_SET(fte_match_set_misc, misc_c, + gre_key.key, be32_to_cpu(enc_keyid.mask->keyid)); + MLX5_SET(fte_match_set_misc, misc_v, + gre_key.key, be32_to_cpu(enc_keyid.key->keyid)); + } + + spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS; + + return 0; +} + +struct mlx5e_tc_tunnel gre_tunnel = { + .tunnel_type = MLX5E_TC_TUNNEL_TYPE_GRETAP, + .match_level = MLX5_MATCH_L3, + .can_offload = mlx5e_tc_tun_can_offload_gretap, + .calc_hlen = mlx5e_tc_tun_calc_hlen_gretap, + .init_encap_attr = mlx5e_tc_tun_init_encap_attr_gretap, + .generate_ip_tun_hdr = mlx5e_gen_ip_tunnel_header_gretap, + .parse_udp_ports = NULL, + .parse_tunnel = mlx5e_tc_tun_parse_gretap, + .encap_info_equal = mlx5e_tc_tun_encap_info_equal_generic, +}; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_mplsoudp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_mplsoudp.c new file mode 100644 index 000000000..c5b1617d5 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_mplsoudp.c @@ -0,0 +1,128 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2018 Mellanox Technologies. */ + +#include <net/bareudp.h> +#include <net/mpls.h> +#include "en/tc_tun.h" + +static bool can_offload(struct mlx5e_priv *priv) +{ + return MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev, reformat_l3_tunnel_to_l2); +} + +static int calc_hlen(struct mlx5e_encap_entry *e) +{ + return sizeof(struct udphdr) + MPLS_HLEN; +} + +static int init_encap_attr(struct net_device *tunnel_dev, + struct mlx5e_priv *priv, + struct mlx5e_encap_entry *e, + struct netlink_ext_ack *extack) +{ + e->tunnel = &mplsoudp_tunnel; + e->reformat_type = MLX5_REFORMAT_TYPE_L2_TO_L3_TUNNEL; + return 0; +} + +static int generate_ip_tun_hdr(char buf[], + __u8 *ip_proto, + struct mlx5e_encap_entry *r) +{ + const struct ip_tunnel_key *tun_key = &r->tun_info->key; + const struct mlx5e_mpls_info *mpls_info = &r->mpls_info; + struct udphdr *udp = (struct udphdr *)(buf); + struct mpls_shim_hdr *mpls; + + mpls = (struct mpls_shim_hdr *)(udp + 1); + *ip_proto = IPPROTO_UDP; + + udp->dest = tun_key->tp_dst; + *mpls = mpls_entry_encode(mpls_info->label, mpls_info->ttl, mpls_info->tc, mpls_info->bos); + + return 0; +} + +static int parse_udp_ports(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct flow_cls_offload *f, + void *headers_c, + void *headers_v) +{ + return mlx5e_tc_tun_parse_udp_ports(priv, spec, f, headers_c, headers_v); +} + +static int parse_tunnel(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct flow_cls_offload *f, + void *headers_c, + void *headers_v) +{ + struct flow_rule *rule = flow_cls_offload_flow_rule(f); + struct flow_match_mpls match; + void *misc2_c; + void *misc2_v; + + if (!MLX5_CAP_ETH(priv->mdev, tunnel_stateless_mpls_over_udp) && + !(MLX5_CAP_GEN(priv->mdev, flex_parser_protocols) & MLX5_FLEX_PROTO_CW_MPLS_UDP)) + return -EOPNOTSUPP; + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID)) + return -EOPNOTSUPP; + + if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_MPLS)) + return 0; + + flow_rule_match_mpls(rule, &match); + + /* Only support matching the first LSE */ + if (match.mask->used_lses != 1) + return -EOPNOTSUPP; + + misc2_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + misc_parameters_2); + misc2_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, + misc_parameters_2); + + MLX5_SET(fte_match_set_misc2, misc2_c, + outer_first_mpls_over_udp.mpls_label, + match.mask->ls[0].mpls_label); + MLX5_SET(fte_match_set_misc2, misc2_v, + outer_first_mpls_over_udp.mpls_label, + match.key->ls[0].mpls_label); + + MLX5_SET(fte_match_set_misc2, misc2_c, + outer_first_mpls_over_udp.mpls_exp, + match.mask->ls[0].mpls_tc); + MLX5_SET(fte_match_set_misc2, misc2_v, + outer_first_mpls_over_udp.mpls_exp, match.key->ls[0].mpls_tc); + + MLX5_SET(fte_match_set_misc2, misc2_c, + outer_first_mpls_over_udp.mpls_s_bos, + match.mask->ls[0].mpls_bos); + MLX5_SET(fte_match_set_misc2, misc2_v, + outer_first_mpls_over_udp.mpls_s_bos, + match.key->ls[0].mpls_bos); + + MLX5_SET(fte_match_set_misc2, misc2_c, + outer_first_mpls_over_udp.mpls_ttl, + match.mask->ls[0].mpls_ttl); + MLX5_SET(fte_match_set_misc2, misc2_v, + outer_first_mpls_over_udp.mpls_ttl, + match.key->ls[0].mpls_ttl); + spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_2; + + return 0; +} + +struct mlx5e_tc_tunnel mplsoudp_tunnel = { + .tunnel_type = MLX5E_TC_TUNNEL_TYPE_MPLSOUDP, + .match_level = MLX5_MATCH_L4, + .can_offload = can_offload, + .calc_hlen = calc_hlen, + .init_encap_attr = init_encap_attr, + .generate_ip_tun_hdr = generate_ip_tun_hdr, + .parse_udp_ports = parse_udp_ports, + .parse_tunnel = parse_tunnel, + .encap_info_equal = mlx5e_tc_tun_encap_info_equal_generic, +}; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_vxlan.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_vxlan.c new file mode 100644 index 000000000..1f62c702b --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_vxlan.c @@ -0,0 +1,165 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2018 Mellanox Technologies. */ + +#include <net/vxlan.h> +#include "lib/vxlan.h" +#include "en/tc_tun.h" + +static bool mlx5e_tc_tun_can_offload_vxlan(struct mlx5e_priv *priv) +{ + return !!MLX5_CAP_ESW(priv->mdev, vxlan_encap_decap); +} + +static int mlx5e_tc_tun_calc_hlen_vxlan(struct mlx5e_encap_entry *e) +{ + return VXLAN_HLEN; +} + +static int mlx5e_tc_tun_check_udp_dport_vxlan(struct mlx5e_priv *priv, + struct flow_cls_offload *f) +{ + struct flow_rule *rule = flow_cls_offload_flow_rule(f); + struct netlink_ext_ack *extack = f->common.extack; + struct flow_match_ports enc_ports; + + if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_PORTS)) + return -EOPNOTSUPP; + + flow_rule_match_enc_ports(rule, &enc_ports); + + /* check the UDP destination port validity */ + + if (!mlx5_vxlan_lookup_port(priv->mdev->vxlan, + be16_to_cpu(enc_ports.key->dst))) { + NL_SET_ERR_MSG_MOD(extack, + "Matched UDP dst port is not registered as a VXLAN port"); + netdev_warn(priv->netdev, + "UDP port %d is not registered as a VXLAN port\n", + be16_to_cpu(enc_ports.key->dst)); + return -EOPNOTSUPP; + } + + return 0; +} + +static int mlx5e_tc_tun_parse_udp_ports_vxlan(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct flow_cls_offload *f, + void *headers_c, + void *headers_v) +{ + int err = 0; + + err = mlx5e_tc_tun_parse_udp_ports(priv, spec, f, headers_c, headers_v); + if (err) + return err; + + return mlx5e_tc_tun_check_udp_dport_vxlan(priv, f); +} + +static int mlx5e_tc_tun_init_encap_attr_vxlan(struct net_device *tunnel_dev, + struct mlx5e_priv *priv, + struct mlx5e_encap_entry *e, + struct netlink_ext_ack *extack) +{ + int dst_port = be16_to_cpu(e->tun_info->key.tp_dst); + + e->tunnel = &vxlan_tunnel; + + if (!mlx5_vxlan_lookup_port(priv->mdev->vxlan, dst_port)) { + NL_SET_ERR_MSG_MOD(extack, + "vxlan udp dport was not registered with the HW"); + netdev_warn(priv->netdev, + "%d isn't an offloaded vxlan udp dport\n", + dst_port); + return -EOPNOTSUPP; + } + + e->reformat_type = MLX5_REFORMAT_TYPE_L2_TO_VXLAN; + return 0; +} + +static int mlx5e_gen_ip_tunnel_header_vxlan(char buf[], + __u8 *ip_proto, + struct mlx5e_encap_entry *e) +{ + const struct ip_tunnel_key *tun_key = &e->tun_info->key; + __be32 tun_id = tunnel_id_to_key32(tun_key->tun_id); + struct udphdr *udp = (struct udphdr *)(buf); + struct vxlanhdr *vxh; + + if (tun_key->tun_flags & TUNNEL_VXLAN_OPT) + return -EOPNOTSUPP; + vxh = (struct vxlanhdr *)((char *)udp + sizeof(struct udphdr)); + *ip_proto = IPPROTO_UDP; + + udp->dest = tun_key->tp_dst; + vxh->vx_flags = VXLAN_HF_VNI; + vxh->vx_vni = vxlan_vni_field(tun_id); + + return 0; +} + +static int mlx5e_tc_tun_parse_vxlan(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct flow_cls_offload *f, + void *headers_c, + void *headers_v) +{ + struct flow_rule *rule = flow_cls_offload_flow_rule(f); + struct netlink_ext_ack *extack = f->common.extack; + struct flow_match_enc_keyid enc_keyid; + void *misc_c, *misc_v; + + misc_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters); + misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters); + + if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID)) + return 0; + + flow_rule_match_enc_keyid(rule, &enc_keyid); + + if (!enc_keyid.mask->keyid) + return 0; + + /* match on VNI is required */ + + if (!MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev, + ft_field_support.outer_vxlan_vni)) { + NL_SET_ERR_MSG_MOD(extack, + "Matching on VXLAN VNI is not supported"); + netdev_warn(priv->netdev, + "Matching on VXLAN VNI is not supported\n"); + return -EOPNOTSUPP; + } + + MLX5_SET(fte_match_set_misc, misc_c, vxlan_vni, + be32_to_cpu(enc_keyid.mask->keyid)); + MLX5_SET(fte_match_set_misc, misc_v, vxlan_vni, + be32_to_cpu(enc_keyid.key->keyid)); + + spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS; + + return 0; +} + +static int mlx5e_tc_tun_get_remote_ifindex(struct net_device *mirred_dev) +{ + const struct vxlan_dev *vxlan = netdev_priv(mirred_dev); + const struct vxlan_rdst *dst = &vxlan->default_dst; + + return dst->remote_ifindex; +} + +struct mlx5e_tc_tunnel vxlan_tunnel = { + .tunnel_type = MLX5E_TC_TUNNEL_TYPE_VXLAN, + .match_level = MLX5_MATCH_L4, + .can_offload = mlx5e_tc_tun_can_offload_vxlan, + .calc_hlen = mlx5e_tc_tun_calc_hlen_vxlan, + .init_encap_attr = mlx5e_tc_tun_init_encap_attr_vxlan, + .generate_ip_tun_hdr = mlx5e_gen_ip_tunnel_header_vxlan, + .parse_udp_ports = mlx5e_tc_tun_parse_udp_ports_vxlan, + .parse_tunnel = mlx5e_tc_tun_parse_vxlan, + .encap_info_equal = mlx5e_tc_tun_encap_info_equal_generic, + .get_remote_ifindex = mlx5e_tc_tun_get_remote_ifindex, +}; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tir.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tir.c new file mode 100644 index 000000000..d4239e3b3 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tir.c @@ -0,0 +1,203 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2021, Mellanox Technologies inc. All rights reserved. */ + +#include "tir.h" +#include "params.h" +#include <linux/mlx5/transobj.h> + +#define MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ (64 * 1024) + +/* max() doesn't work inside square brackets. */ +#define MLX5E_TIR_CMD_IN_SZ_DW ( \ + MLX5_ST_SZ_DW(create_tir_in) > MLX5_ST_SZ_DW(modify_tir_in) ? \ + MLX5_ST_SZ_DW(create_tir_in) : MLX5_ST_SZ_DW(modify_tir_in) \ +) + +struct mlx5e_tir_builder { + u32 in[MLX5E_TIR_CMD_IN_SZ_DW]; + bool modify; +}; + +struct mlx5e_tir_builder *mlx5e_tir_builder_alloc(bool modify) +{ + struct mlx5e_tir_builder *builder; + + builder = kvzalloc(sizeof(*builder), GFP_KERNEL); + builder->modify = modify; + + return builder; +} + +void mlx5e_tir_builder_free(struct mlx5e_tir_builder *builder) +{ + kvfree(builder); +} + +void mlx5e_tir_builder_clear(struct mlx5e_tir_builder *builder) +{ + memset(builder->in, 0, sizeof(builder->in)); +} + +static void *mlx5e_tir_builder_get_tirc(struct mlx5e_tir_builder *builder) +{ + if (builder->modify) + return MLX5_ADDR_OF(modify_tir_in, builder->in, ctx); + return MLX5_ADDR_OF(create_tir_in, builder->in, ctx); +} + +void mlx5e_tir_builder_build_inline(struct mlx5e_tir_builder *builder, u32 tdn, u32 rqn) +{ + void *tirc = mlx5e_tir_builder_get_tirc(builder); + + WARN_ON(builder->modify); + + MLX5_SET(tirc, tirc, transport_domain, tdn); + MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_DIRECT); + MLX5_SET(tirc, tirc, rx_hash_fn, MLX5_RX_HASH_FN_NONE); + MLX5_SET(tirc, tirc, inline_rqn, rqn); +} + +void mlx5e_tir_builder_build_rqt(struct mlx5e_tir_builder *builder, u32 tdn, + u32 rqtn, bool inner_ft_support) +{ + void *tirc = mlx5e_tir_builder_get_tirc(builder); + + WARN_ON(builder->modify); + + MLX5_SET(tirc, tirc, transport_domain, tdn); + MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT); + MLX5_SET(tirc, tirc, indirect_table, rqtn); + MLX5_SET(tirc, tirc, tunneled_offload_en, inner_ft_support); +} + +void mlx5e_tir_builder_build_packet_merge(struct mlx5e_tir_builder *builder, + const struct mlx5e_packet_merge_param *pkt_merge_param) +{ + void *tirc = mlx5e_tir_builder_get_tirc(builder); + const unsigned int rough_max_l2_l3_hdr_sz = 256; + + if (builder->modify) + MLX5_SET(modify_tir_in, builder->in, bitmask.packet_merge, 1); + + switch (pkt_merge_param->type) { + case MLX5E_PACKET_MERGE_LRO: + MLX5_SET(tirc, tirc, packet_merge_mask, + MLX5_TIRC_PACKET_MERGE_MASK_IPV4_LRO | + MLX5_TIRC_PACKET_MERGE_MASK_IPV6_LRO); + MLX5_SET(tirc, tirc, lro_max_ip_payload_size, + (MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ - rough_max_l2_l3_hdr_sz) >> 8); + MLX5_SET(tirc, tirc, lro_timeout_period_usecs, pkt_merge_param->timeout); + break; + default: + break; + } +} + +static int mlx5e_hfunc_to_hw(u8 hfunc) +{ + switch (hfunc) { + case ETH_RSS_HASH_TOP: + return MLX5_RX_HASH_FN_TOEPLITZ; + case ETH_RSS_HASH_XOR: + return MLX5_RX_HASH_FN_INVERTED_XOR8; + default: + return MLX5_RX_HASH_FN_NONE; + } +} + +void mlx5e_tir_builder_build_rss(struct mlx5e_tir_builder *builder, + const struct mlx5e_rss_params_hash *rss_hash, + const struct mlx5e_rss_params_traffic_type *rss_tt, + bool inner) +{ + void *tirc = mlx5e_tir_builder_get_tirc(builder); + void *hfso; + + if (builder->modify) + MLX5_SET(modify_tir_in, builder->in, bitmask.hash, 1); + + MLX5_SET(tirc, tirc, rx_hash_fn, mlx5e_hfunc_to_hw(rss_hash->hfunc)); + if (rss_hash->hfunc == ETH_RSS_HASH_TOP) { + const size_t len = MLX5_FLD_SZ_BYTES(tirc, rx_hash_toeplitz_key); + void *rss_key = MLX5_ADDR_OF(tirc, tirc, rx_hash_toeplitz_key); + + MLX5_SET(tirc, tirc, rx_hash_symmetric, 1); + memcpy(rss_key, rss_hash->toeplitz_hash_key, len); + } + + if (inner) + hfso = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_inner); + else + hfso = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer); + MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, rss_tt->l3_prot_type); + MLX5_SET(rx_hash_field_select, hfso, l4_prot_type, rss_tt->l4_prot_type); + MLX5_SET(rx_hash_field_select, hfso, selected_fields, rss_tt->rx_hash_fields); +} + +void mlx5e_tir_builder_build_direct(struct mlx5e_tir_builder *builder) +{ + void *tirc = mlx5e_tir_builder_get_tirc(builder); + + WARN_ON(builder->modify); + + MLX5_SET(tirc, tirc, rx_hash_fn, MLX5_RX_HASH_FN_INVERTED_XOR8); +} + +void mlx5e_tir_builder_build_tls(struct mlx5e_tir_builder *builder) +{ + void *tirc = mlx5e_tir_builder_get_tirc(builder); + + WARN_ON(builder->modify); + + MLX5_SET(tirc, tirc, tls_en, 1); + MLX5_SET(tirc, tirc, self_lb_block, + MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST | + MLX5_TIRC_SELF_LB_BLOCK_BLOCK_MULTICAST); +} + +int mlx5e_tir_init(struct mlx5e_tir *tir, struct mlx5e_tir_builder *builder, + struct mlx5_core_dev *mdev, bool reg) +{ + int err; + + tir->mdev = mdev; + + err = mlx5_core_create_tir(tir->mdev, builder->in, &tir->tirn); + if (err) + return err; + + if (reg) { + struct mlx5e_hw_objs *res = &tir->mdev->mlx5e_res.hw_objs; + + mutex_lock(&res->td.list_lock); + list_add(&tir->list, &res->td.tirs_list); + mutex_unlock(&res->td.list_lock); + } else { + INIT_LIST_HEAD(&tir->list); + } + + return 0; +} + +void mlx5e_tir_destroy(struct mlx5e_tir *tir) +{ + struct mlx5e_hw_objs *res = &tir->mdev->mlx5e_res.hw_objs; + + /* Skip mutex if list_del is no-op (the TIR wasn't registered in the + * list). list_empty will never return true for an item of tirs_list, + * and READ_ONCE/WRITE_ONCE in list_empty/list_del guarantee consistency + * of the list->next value. + */ + if (!list_empty(&tir->list)) { + mutex_lock(&res->td.list_lock); + list_del(&tir->list); + mutex_unlock(&res->td.list_lock); + } + + mlx5_core_destroy_tir(tir->mdev, tir->tirn); +} + +int mlx5e_tir_modify(struct mlx5e_tir *tir, struct mlx5e_tir_builder *builder) +{ + return mlx5_core_modify_tir(tir->mdev, tir->tirn, builder->in); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tir.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tir.h new file mode 100644 index 000000000..857a84bcd --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tir.h @@ -0,0 +1,58 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2021, Mellanox Technologies inc. All rights reserved. */ + +#ifndef __MLX5_EN_TIR_H__ +#define __MLX5_EN_TIR_H__ + +#include <linux/kernel.h> + +struct mlx5e_rss_params_hash { + u8 hfunc; + u8 toeplitz_hash_key[40]; +}; + +struct mlx5e_rss_params_traffic_type { + u8 l3_prot_type; + u8 l4_prot_type; + u32 rx_hash_fields; +}; + +struct mlx5e_tir_builder; +struct mlx5e_packet_merge_param; + +struct mlx5e_tir_builder *mlx5e_tir_builder_alloc(bool modify); +void mlx5e_tir_builder_free(struct mlx5e_tir_builder *builder); +void mlx5e_tir_builder_clear(struct mlx5e_tir_builder *builder); + +void mlx5e_tir_builder_build_inline(struct mlx5e_tir_builder *builder, u32 tdn, u32 rqn); +void mlx5e_tir_builder_build_rqt(struct mlx5e_tir_builder *builder, u32 tdn, + u32 rqtn, bool inner_ft_support); +void mlx5e_tir_builder_build_packet_merge(struct mlx5e_tir_builder *builder, + const struct mlx5e_packet_merge_param *pkt_merge_param); +void mlx5e_tir_builder_build_rss(struct mlx5e_tir_builder *builder, + const struct mlx5e_rss_params_hash *rss_hash, + const struct mlx5e_rss_params_traffic_type *rss_tt, + bool inner); +void mlx5e_tir_builder_build_direct(struct mlx5e_tir_builder *builder); +void mlx5e_tir_builder_build_tls(struct mlx5e_tir_builder *builder); + +struct mlx5_core_dev; + +struct mlx5e_tir { + struct mlx5_core_dev *mdev; + u32 tirn; + struct list_head list; +}; + +int mlx5e_tir_init(struct mlx5e_tir *tir, struct mlx5e_tir_builder *builder, + struct mlx5_core_dev *mdev, bool reg); +void mlx5e_tir_destroy(struct mlx5e_tir *tir); + +static inline u32 mlx5e_tir_get_tirn(struct mlx5e_tir *tir) +{ + return tir->tirn; +} + +int mlx5e_tir_modify(struct mlx5e_tir *tir, struct mlx5e_tir_builder *builder); + +#endif /* __MLX5_EN_TIR_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c new file mode 100644 index 000000000..201ac7dd3 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c @@ -0,0 +1,331 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2020 Mellanox Technologies */ + +#include <net/page_pool.h> +#include "en/txrx.h" +#include "en/params.h" +#include "en/trap.h" + +static int mlx5e_trap_napi_poll(struct napi_struct *napi, int budget) +{ + struct mlx5e_trap *trap_ctx = container_of(napi, struct mlx5e_trap, napi); + struct mlx5e_ch_stats *ch_stats = trap_ctx->stats; + struct mlx5e_rq *rq = &trap_ctx->rq; + bool busy = false; + int work_done = 0; + + rcu_read_lock(); + + ch_stats->poll++; + + work_done = mlx5e_poll_rx_cq(&rq->cq, budget); + busy |= work_done == budget; + busy |= rq->post_wqes(rq); + + if (busy) { + work_done = budget; + goto out; + } + + if (unlikely(!napi_complete_done(napi, work_done))) + goto out; + + mlx5e_cq_arm(&rq->cq); + +out: + rcu_read_unlock(); + return work_done; +} + +static void mlx5e_init_trap_rq(struct mlx5e_trap *t, struct mlx5e_params *params, + struct mlx5e_rq *rq) +{ + struct mlx5_core_dev *mdev = t->mdev; + struct mlx5e_priv *priv = t->priv; + + rq->wq_type = params->rq_wq_type; + rq->pdev = t->pdev; + rq->netdev = priv->netdev; + rq->priv = priv; + rq->clock = &mdev->clock; + rq->tstamp = &priv->tstamp; + rq->mdev = mdev; + rq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu); + rq->stats = &priv->trap_stats.rq; + rq->ptp_cyc2time = mlx5_rq_ts_translator(mdev); + xdp_rxq_info_unused(&rq->xdp_rxq); + mlx5e_rq_set_trap_handlers(rq, params); +} + +static int mlx5e_open_trap_rq(struct mlx5e_priv *priv, struct mlx5e_trap *t) +{ + struct mlx5e_rq_param *rq_param = &t->rq_param; + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5e_create_cq_param ccp = {}; + struct dim_cq_moder trap_moder = {}; + struct mlx5e_rq *rq = &t->rq; + int node; + int err; + + node = dev_to_node(mdev->device); + + ccp.node = node; + ccp.ch_stats = t->stats; + ccp.napi = &t->napi; + ccp.ix = 0; + err = mlx5e_open_cq(priv, trap_moder, &rq_param->cqp, &ccp, &rq->cq); + if (err) + return err; + + mlx5e_init_trap_rq(t, &t->params, rq); + err = mlx5e_open_rq(&t->params, rq_param, NULL, node, rq); + if (err) + goto err_destroy_cq; + + return 0; + +err_destroy_cq: + mlx5e_close_cq(&rq->cq); + + return err; +} + +static void mlx5e_close_trap_rq(struct mlx5e_rq *rq) +{ + mlx5e_close_rq(rq); + mlx5e_close_cq(&rq->cq); +} + +static int mlx5e_create_trap_direct_rq_tir(struct mlx5_core_dev *mdev, struct mlx5e_tir *tir, + u32 rqn) +{ + struct mlx5e_tir_builder *builder; + int err; + + builder = mlx5e_tir_builder_alloc(false); + if (!builder) + return -ENOMEM; + + mlx5e_tir_builder_build_inline(builder, mdev->mlx5e_res.hw_objs.td.tdn, rqn); + err = mlx5e_tir_init(tir, builder, mdev, true); + + mlx5e_tir_builder_free(builder); + + return err; +} + +static void mlx5e_build_trap_params(struct mlx5_core_dev *mdev, + int max_mtu, u16 q_counter, + struct mlx5e_trap *t) +{ + struct mlx5e_params *params = &t->params; + + params->rq_wq_type = MLX5_WQ_TYPE_CYCLIC; + mlx5e_init_rq_type_params(mdev, params); + params->sw_mtu = max_mtu; + mlx5e_build_rq_param(mdev, params, NULL, q_counter, &t->rq_param); +} + +static struct mlx5e_trap *mlx5e_open_trap(struct mlx5e_priv *priv) +{ + int cpu = cpumask_first(mlx5_comp_irq_get_affinity_mask(priv->mdev, 0)); + struct net_device *netdev = priv->netdev; + struct mlx5e_trap *t; + int err; + + t = kvzalloc_node(sizeof(*t), GFP_KERNEL, cpu_to_node(cpu)); + if (!t) + return ERR_PTR(-ENOMEM); + + mlx5e_build_trap_params(priv->mdev, netdev->max_mtu, priv->q_counter, t); + + t->priv = priv; + t->mdev = priv->mdev; + t->tstamp = &priv->tstamp; + t->pdev = mlx5_core_dma_dev(priv->mdev); + t->netdev = priv->netdev; + t->mkey_be = cpu_to_be32(priv->mdev->mlx5e_res.hw_objs.mkey); + t->stats = &priv->trap_stats.ch; + + netif_napi_add(netdev, &t->napi, mlx5e_trap_napi_poll); + + err = mlx5e_open_trap_rq(priv, t); + if (unlikely(err)) + goto err_napi_del; + + err = mlx5e_create_trap_direct_rq_tir(t->mdev, &t->tir, t->rq.rqn); + if (err) + goto err_close_trap_rq; + + return t; + +err_close_trap_rq: + mlx5e_close_trap_rq(&t->rq); +err_napi_del: + netif_napi_del(&t->napi); + kvfree(t); + return ERR_PTR(err); +} + +void mlx5e_close_trap(struct mlx5e_trap *trap) +{ + mlx5e_tir_destroy(&trap->tir); + mlx5e_close_trap_rq(&trap->rq); + netif_napi_del(&trap->napi); + kvfree(trap); +} + +static void mlx5e_activate_trap(struct mlx5e_trap *trap) +{ + napi_enable(&trap->napi); + mlx5e_activate_rq(&trap->rq); + mlx5e_trigger_napi_sched(&trap->napi); +} + +void mlx5e_deactivate_trap(struct mlx5e_priv *priv) +{ + struct mlx5e_trap *trap = priv->en_trap; + + mlx5e_deactivate_rq(&trap->rq); + napi_disable(&trap->napi); +} + +static struct mlx5e_trap *mlx5e_add_trap_queue(struct mlx5e_priv *priv) +{ + struct mlx5e_trap *trap; + + trap = mlx5e_open_trap(priv); + if (IS_ERR(trap)) + goto out; + + mlx5e_activate_trap(trap); +out: + return trap; +} + +static void mlx5e_del_trap_queue(struct mlx5e_priv *priv) +{ + mlx5e_deactivate_trap(priv); + mlx5e_close_trap(priv->en_trap); + priv->en_trap = NULL; +} + +static int mlx5e_trap_get_tirn(struct mlx5e_trap *en_trap) +{ + return en_trap->tir.tirn; +} + +static int mlx5e_handle_action_trap(struct mlx5e_priv *priv, int trap_id) +{ + bool open_queue = !priv->en_trap; + struct mlx5e_trap *trap; + int err; + + if (open_queue) { + trap = mlx5e_add_trap_queue(priv); + if (IS_ERR(trap)) + return PTR_ERR(trap); + priv->en_trap = trap; + } + + switch (trap_id) { + case DEVLINK_TRAP_GENERIC_ID_INGRESS_VLAN_FILTER: + err = mlx5e_add_vlan_trap(priv->fs, trap_id, mlx5e_trap_get_tirn(priv->en_trap)); + if (err) + goto err_out; + break; + case DEVLINK_TRAP_GENERIC_ID_DMAC_FILTER: + err = mlx5e_add_mac_trap(priv->fs, trap_id, mlx5e_trap_get_tirn(priv->en_trap)); + if (err) + goto err_out; + break; + default: + netdev_warn(priv->netdev, "%s: Unknown trap id %d\n", __func__, trap_id); + err = -EINVAL; + goto err_out; + } + return 0; + +err_out: + if (open_queue) + mlx5e_del_trap_queue(priv); + return err; +} + +static int mlx5e_handle_action_drop(struct mlx5e_priv *priv, int trap_id) +{ + switch (trap_id) { + case DEVLINK_TRAP_GENERIC_ID_INGRESS_VLAN_FILTER: + mlx5e_remove_vlan_trap(priv->fs); + break; + case DEVLINK_TRAP_GENERIC_ID_DMAC_FILTER: + mlx5e_remove_mac_trap(priv->fs); + break; + default: + netdev_warn(priv->netdev, "%s: Unknown trap id %d\n", __func__, trap_id); + return -EINVAL; + } + if (priv->en_trap && !mlx5_devlink_trap_get_num_active(priv->mdev)) + mlx5e_del_trap_queue(priv); + + return 0; +} + +int mlx5e_handle_trap_event(struct mlx5e_priv *priv, struct mlx5_trap_ctx *trap_ctx) +{ + int err = 0; + + /* Traps are unarmed when interface is down, no need to update + * them. The configuration is saved in the core driver, + * queried and applied upon interface up operation in + * mlx5e_open_locked(). + */ + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) + return 0; + + switch (trap_ctx->action) { + case DEVLINK_TRAP_ACTION_TRAP: + err = mlx5e_handle_action_trap(priv, trap_ctx->id); + break; + case DEVLINK_TRAP_ACTION_DROP: + err = mlx5e_handle_action_drop(priv, trap_ctx->id); + break; + default: + netdev_warn(priv->netdev, "%s: Unsupported action %d\n", __func__, + trap_ctx->action); + err = -EINVAL; + } + return err; +} + +static int mlx5e_apply_trap(struct mlx5e_priv *priv, int trap_id, bool enable) +{ + enum devlink_trap_action action; + int err; + + err = mlx5_devlink_traps_get_action(priv->mdev, trap_id, &action); + if (err) + return err; + if (action == DEVLINK_TRAP_ACTION_TRAP) + err = enable ? mlx5e_handle_action_trap(priv, trap_id) : + mlx5e_handle_action_drop(priv, trap_id); + return err; +} + +static const int mlx5e_traps_arr[] = { + DEVLINK_TRAP_GENERIC_ID_INGRESS_VLAN_FILTER, + DEVLINK_TRAP_GENERIC_ID_DMAC_FILTER, +}; + +int mlx5e_apply_traps(struct mlx5e_priv *priv, bool enable) +{ + int err; + int i; + + for (i = 0; i < ARRAY_SIZE(mlx5e_traps_arr); i++) { + err = mlx5e_apply_trap(priv, mlx5e_traps_arr[i], enable); + if (err) + return err; + } + return 0; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/trap.h b/drivers/net/ethernet/mellanox/mlx5/core/en/trap.h new file mode 100644 index 000000000..aa3f17658 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/trap.h @@ -0,0 +1,37 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2020, Mellanox Technologies */ + +#ifndef __MLX5E_TRAP_H__ +#define __MLX5E_TRAP_H__ + +#include "../en.h" +#include "../devlink.h" + +struct mlx5e_trap { + /* data path */ + struct mlx5e_rq rq; + struct mlx5e_tir tir; + struct napi_struct napi; + struct device *pdev; + struct net_device *netdev; + __be32 mkey_be; + + /* data path - accessed per napi poll */ + struct mlx5e_ch_stats *stats; + + /* control */ + struct mlx5e_priv *priv; + struct mlx5_core_dev *mdev; + struct hwtstamp_config *tstamp; + DECLARE_BITMAP(state, MLX5E_CHANNEL_NUM_STATES); + + struct mlx5e_params params; + struct mlx5e_rq_param rq_param; +}; + +void mlx5e_close_trap(struct mlx5e_trap *trap); +void mlx5e_deactivate_trap(struct mlx5e_priv *priv); +int mlx5e_handle_trap_event(struct mlx5e_priv *priv, struct mlx5_trap_ctx *trap_ctx); +int mlx5e_apply_traps(struct mlx5e_priv *priv, bool enable); + +#endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h new file mode 100644 index 000000000..344245c01 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h @@ -0,0 +1,494 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2019 Mellanox Technologies. */ + +#ifndef __MLX5_EN_TXRX_H___ +#define __MLX5_EN_TXRX_H___ + +#include "en.h" +#include <linux/indirect_call_wrapper.h> + +#define MLX5E_TX_WQE_EMPTY_DS_COUNT (sizeof(struct mlx5e_tx_wqe) / MLX5_SEND_WQE_DS) + +#define INL_HDR_START_SZ (sizeof(((struct mlx5_wqe_eth_seg *)NULL)->inline_hdr.start)) + +/* IPSEC inline data includes: + * 1. ESP trailer: up to 255 bytes of padding, 1 byte for pad length, 1 byte for + * next header. + * 2. ESP authentication data: 16 bytes for ICV. + */ +#define MLX5E_MAX_TX_IPSEC_DS DIV_ROUND_UP(sizeof(struct mlx5_wqe_inline_seg) + \ + 255 + 1 + 1 + 16, MLX5_SEND_WQE_DS) + +/* 366 should be big enough to cover all L2, L3 and L4 headers with possible + * encapsulations. + */ +#define MLX5E_MAX_TX_INLINE_DS DIV_ROUND_UP(366 - INL_HDR_START_SZ + VLAN_HLEN, \ + MLX5_SEND_WQE_DS) + +/* Sync the calculation with mlx5e_sq_calc_wqe_attr. */ +#define MLX5E_MAX_TX_WQEBBS DIV_ROUND_UP(MLX5E_TX_WQE_EMPTY_DS_COUNT + \ + MLX5E_MAX_TX_INLINE_DS + \ + MLX5E_MAX_TX_IPSEC_DS + \ + MAX_SKB_FRAGS + 1, \ + MLX5_SEND_WQEBB_NUM_DS) + +#define MLX5E_RX_ERR_CQE(cqe) (get_cqe_opcode(cqe) != MLX5_CQE_RESP_SEND) + +static inline +ktime_t mlx5e_cqe_ts_to_ns(cqe_ts_to_ns func, struct mlx5_clock *clock, u64 cqe_ts) +{ + return INDIRECT_CALL_2(func, mlx5_real_time_cyc2time, mlx5_timecounter_cyc2time, + clock, cqe_ts); +} + +enum mlx5e_icosq_wqe_type { + MLX5E_ICOSQ_WQE_NOP, + MLX5E_ICOSQ_WQE_UMR_RX, + MLX5E_ICOSQ_WQE_SHAMPO_HD_UMR, +#ifdef CONFIG_MLX5_EN_TLS + MLX5E_ICOSQ_WQE_UMR_TLS, + MLX5E_ICOSQ_WQE_SET_PSV_TLS, + MLX5E_ICOSQ_WQE_GET_PSV_TLS, +#endif +}; + +/* General */ +static inline bool mlx5e_skb_is_multicast(struct sk_buff *skb) +{ + return skb->pkt_type == PACKET_MULTICAST || skb->pkt_type == PACKET_BROADCAST; +} + +void mlx5e_trigger_irq(struct mlx5e_icosq *sq); +void mlx5e_completion_event(struct mlx5_core_cq *mcq, struct mlx5_eqe *eqe); +void mlx5e_cq_error_event(struct mlx5_core_cq *mcq, enum mlx5_event event); +int mlx5e_napi_poll(struct napi_struct *napi, int budget); +int mlx5e_poll_ico_cq(struct mlx5e_cq *cq); + +/* RX */ +void mlx5e_page_dma_unmap(struct mlx5e_rq *rq, struct page *page); +void mlx5e_page_release_dynamic(struct mlx5e_rq *rq, struct page *page, bool recycle); +INDIRECT_CALLABLE_DECLARE(bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq)); +INDIRECT_CALLABLE_DECLARE(bool mlx5e_post_rx_mpwqes(struct mlx5e_rq *rq)); +int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget); +void mlx5e_free_rx_descs(struct mlx5e_rq *rq); +void mlx5e_free_rx_in_progress_descs(struct mlx5e_rq *rq); + +/* TX */ +netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev); +bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget); +void mlx5e_free_txqsq_descs(struct mlx5e_txqsq *sq); + +static inline bool +mlx5e_skb_fifo_has_room(struct mlx5e_skb_fifo *fifo) +{ + return (u16)(*fifo->pc - *fifo->cc) < fifo->mask; +} + +static inline bool +mlx5e_wqc_has_room_for(struct mlx5_wq_cyc *wq, u16 cc, u16 pc, u16 n) +{ + return (mlx5_wq_cyc_ctr2ix(wq, cc - pc) >= n) || (cc == pc); +} + +static inline void *mlx5e_fetch_wqe(struct mlx5_wq_cyc *wq, u16 pi, size_t wqe_size) +{ + void *wqe; + + wqe = mlx5_wq_cyc_get_wqe(wq, pi); + memset(wqe, 0, wqe_size); + + return wqe; +} + +#define MLX5E_TX_FETCH_WQE(sq, pi) \ + ((struct mlx5e_tx_wqe *)mlx5e_fetch_wqe(&(sq)->wq, pi, sizeof(struct mlx5e_tx_wqe))) + +static inline struct mlx5e_tx_wqe * +mlx5e_post_nop(struct mlx5_wq_cyc *wq, u32 sqn, u16 *pc) +{ + u16 pi = mlx5_wq_cyc_ctr2ix(wq, *pc); + struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(wq, pi); + struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl; + + memset(cseg, 0, sizeof(*cseg)); + + cseg->opmod_idx_opcode = cpu_to_be32((*pc << 8) | MLX5_OPCODE_NOP); + cseg->qpn_ds = cpu_to_be32((sqn << 8) | 0x01); + + (*pc)++; + + return wqe; +} + +static inline struct mlx5e_tx_wqe * +mlx5e_post_nop_fence(struct mlx5_wq_cyc *wq, u32 sqn, u16 *pc) +{ + u16 pi = mlx5_wq_cyc_ctr2ix(wq, *pc); + struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(wq, pi); + struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl; + + memset(cseg, 0, sizeof(*cseg)); + + cseg->opmod_idx_opcode = cpu_to_be32((*pc << 8) | MLX5_OPCODE_NOP); + cseg->qpn_ds = cpu_to_be32((sqn << 8) | 0x01); + cseg->fm_ce_se = MLX5_FENCE_MODE_INITIATOR_SMALL; + + (*pc)++; + + return wqe; +} + +struct mlx5e_tx_wqe_info { + struct sk_buff *skb; + u32 num_bytes; + u8 num_wqebbs; + u8 num_dma; + u8 num_fifo_pkts; +#ifdef CONFIG_MLX5_EN_TLS + struct page *resync_dump_frag_page; +#endif +}; + +static inline u16 mlx5e_txqsq_get_next_pi(struct mlx5e_txqsq *sq, u16 size) +{ + struct mlx5_wq_cyc *wq = &sq->wq; + u16 pi, contig_wqebbs; + + pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc); + contig_wqebbs = mlx5_wq_cyc_get_contig_wqebbs(wq, pi); + if (unlikely(contig_wqebbs < size)) { + struct mlx5e_tx_wqe_info *wi, *edge_wi; + + wi = &sq->db.wqe_info[pi]; + edge_wi = wi + contig_wqebbs; + + /* Fill SQ frag edge with NOPs to avoid WQE wrapping two pages. */ + for (; wi < edge_wi; wi++) { + *wi = (struct mlx5e_tx_wqe_info) { + .num_wqebbs = 1, + }; + mlx5e_post_nop(wq, sq->sqn, &sq->pc); + } + sq->stats->nop += contig_wqebbs; + + pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc); + } + + return pi; +} + +void mlx5e_txqsq_wake(struct mlx5e_txqsq *sq); + +static inline u16 mlx5e_shampo_get_cqe_header_index(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) +{ + return be16_to_cpu(cqe->shampo.header_entry_index) & (rq->mpwqe.shampo->hd_per_wq - 1); +} + +struct mlx5e_shampo_umr { + u16 len; +}; + +struct mlx5e_icosq_wqe_info { + u8 wqe_type; + u8 num_wqebbs; + + /* Auxiliary data for different wqe types. */ + union { + struct { + struct mlx5e_rq *rq; + } umr; + struct mlx5e_shampo_umr shampo; +#ifdef CONFIG_MLX5_EN_TLS + struct { + struct mlx5e_ktls_offload_context_rx *priv_rx; + } tls_set_params; + struct { + struct mlx5e_ktls_rx_resync_buf *buf; + } tls_get_params; +#endif + }; +}; + +void mlx5e_free_icosq_descs(struct mlx5e_icosq *sq); + +static inline u16 mlx5e_icosq_get_next_pi(struct mlx5e_icosq *sq, u16 size) +{ + struct mlx5_wq_cyc *wq = &sq->wq; + u16 pi, contig_wqebbs; + + pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc); + contig_wqebbs = mlx5_wq_cyc_get_contig_wqebbs(wq, pi); + if (unlikely(contig_wqebbs < size)) { + struct mlx5e_icosq_wqe_info *wi, *edge_wi; + + wi = &sq->db.wqe_info[pi]; + edge_wi = wi + contig_wqebbs; + + /* Fill SQ frag edge with NOPs to avoid WQE wrapping two pages. */ + for (; wi < edge_wi; wi++) { + *wi = (struct mlx5e_icosq_wqe_info) { + .wqe_type = MLX5E_ICOSQ_WQE_NOP, + .num_wqebbs = 1, + }; + mlx5e_post_nop(wq, sq->sqn, &sq->pc); + } + + pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc); + } + + return pi; +} + +static inline void +mlx5e_notify_hw(struct mlx5_wq_cyc *wq, u16 pc, void __iomem *uar_map, + struct mlx5_wqe_ctrl_seg *ctrl) +{ + ctrl->fm_ce_se |= MLX5_WQE_CTRL_CQ_UPDATE; + /* ensure wqe is visible to device before updating doorbell record */ + dma_wmb(); + + *wq->db = cpu_to_be32(pc); + + /* ensure doorbell record is visible to device before ringing the + * doorbell + */ + wmb(); + + mlx5_write64((__be32 *)ctrl, uar_map); +} + +static inline void mlx5e_cq_arm(struct mlx5e_cq *cq) +{ + struct mlx5_core_cq *mcq; + + mcq = &cq->mcq; + mlx5_cq_arm(mcq, MLX5_CQ_DB_REQ_NOT, mcq->uar->map, cq->wq.cc); +} + +static inline struct mlx5e_sq_dma * +mlx5e_dma_get(struct mlx5e_txqsq *sq, u32 i) +{ + return &sq->db.dma_fifo[i & sq->dma_fifo_mask]; +} + +static inline void +mlx5e_dma_push(struct mlx5e_txqsq *sq, dma_addr_t addr, u32 size, + enum mlx5e_dma_map_type map_type) +{ + struct mlx5e_sq_dma *dma = mlx5e_dma_get(sq, sq->dma_fifo_pc++); + + dma->addr = addr; + dma->size = size; + dma->type = map_type; +} + +static inline +struct sk_buff **mlx5e_skb_fifo_get(struct mlx5e_skb_fifo *fifo, u16 i) +{ + return &fifo->fifo[i & fifo->mask]; +} + +static inline +void mlx5e_skb_fifo_push(struct mlx5e_skb_fifo *fifo, struct sk_buff *skb) +{ + struct sk_buff **skb_item = mlx5e_skb_fifo_get(fifo, (*fifo->pc)++); + + *skb_item = skb; +} + +static inline +struct sk_buff *mlx5e_skb_fifo_pop(struct mlx5e_skb_fifo *fifo) +{ + WARN_ON_ONCE(*fifo->pc == *fifo->cc); + + return *mlx5e_skb_fifo_get(fifo, (*fifo->cc)++); +} + +static inline void +mlx5e_tx_dma_unmap(struct device *pdev, struct mlx5e_sq_dma *dma) +{ + switch (dma->type) { + case MLX5E_DMA_MAP_SINGLE: + dma_unmap_single(pdev, dma->addr, dma->size, DMA_TO_DEVICE); + break; + case MLX5E_DMA_MAP_PAGE: + dma_unmap_page(pdev, dma->addr, dma->size, DMA_TO_DEVICE); + break; + default: + WARN_ONCE(true, "mlx5e_tx_dma_unmap unknown DMA type!\n"); + } +} + +void mlx5e_sq_xmit_simple(struct mlx5e_txqsq *sq, struct sk_buff *skb, bool xmit_more); +void mlx5e_tx_mpwqe_ensure_complete(struct mlx5e_txqsq *sq); + +static inline bool mlx5e_tx_mpwqe_is_full(struct mlx5e_tx_mpwqe *session, u8 max_sq_mpw_wqebbs) +{ + return session->ds_count == max_sq_mpw_wqebbs * MLX5_SEND_WQEBB_NUM_DS; +} + +static inline void mlx5e_rqwq_reset(struct mlx5e_rq *rq) +{ + if (rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) { + mlx5_wq_ll_reset(&rq->mpwqe.wq); + rq->mpwqe.actual_wq_head = 0; + } else { + mlx5_wq_cyc_reset(&rq->wqe.wq); + } +} + +static inline void mlx5e_dump_error_cqe(struct mlx5e_cq *cq, u32 qn, + struct mlx5_err_cqe *err_cqe) +{ + struct mlx5_cqwq *wq = &cq->wq; + u32 ci; + + ci = mlx5_cqwq_ctr2ix(wq, wq->cc - 1); + + netdev_err(cq->netdev, + "Error cqe on cqn 0x%x, ci 0x%x, qn 0x%x, opcode 0x%x, syndrome 0x%x, vendor syndrome 0x%x\n", + cq->mcq.cqn, ci, qn, + get_cqe_opcode((struct mlx5_cqe64 *)err_cqe), + err_cqe->syndrome, err_cqe->vendor_err_synd); + mlx5_dump_err_cqe(cq->mdev, err_cqe); +} + +static inline u32 mlx5e_rqwq_get_size(struct mlx5e_rq *rq) +{ + switch (rq->wq_type) { + case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: + return mlx5_wq_ll_get_size(&rq->mpwqe.wq); + default: + return mlx5_wq_cyc_get_size(&rq->wqe.wq); + } +} + +static inline u32 mlx5e_rqwq_get_cur_sz(struct mlx5e_rq *rq) +{ + switch (rq->wq_type) { + case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: + return rq->mpwqe.wq.cur_sz; + default: + return rq->wqe.wq.cur_sz; + } +} + +static inline u16 mlx5e_rqwq_get_head(struct mlx5e_rq *rq) +{ + switch (rq->wq_type) { + case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: + return mlx5_wq_ll_get_head(&rq->mpwqe.wq); + default: + return mlx5_wq_cyc_get_head(&rq->wqe.wq); + } +} + +static inline u16 mlx5e_rqwq_get_wqe_counter(struct mlx5e_rq *rq) +{ + switch (rq->wq_type) { + case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: + return mlx5_wq_ll_get_counter(&rq->mpwqe.wq); + default: + return mlx5_wq_cyc_get_counter(&rq->wqe.wq); + } +} + +/* SW parser related functions */ + +struct mlx5e_swp_spec { + __be16 l3_proto; + u8 l4_proto; + u8 is_tun; + __be16 tun_l3_proto; + u8 tun_l4_proto; +}; + +static inline void mlx5e_eseg_swp_offsets_add_vlan(struct mlx5_wqe_eth_seg *eseg) +{ + /* SWP offsets are in 2-bytes words */ + eseg->swp_outer_l3_offset += VLAN_HLEN / 2; + eseg->swp_outer_l4_offset += VLAN_HLEN / 2; + eseg->swp_inner_l3_offset += VLAN_HLEN / 2; + eseg->swp_inner_l4_offset += VLAN_HLEN / 2; +} + +static inline void +mlx5e_set_eseg_swp(struct sk_buff *skb, struct mlx5_wqe_eth_seg *eseg, + struct mlx5e_swp_spec *swp_spec) +{ + /* SWP offsets are in 2-bytes words */ + eseg->swp_outer_l3_offset = skb_network_offset(skb) / 2; + if (swp_spec->l3_proto == htons(ETH_P_IPV6)) + eseg->swp_flags |= MLX5_ETH_WQE_SWP_OUTER_L3_IPV6; + if (swp_spec->l4_proto) { + eseg->swp_outer_l4_offset = skb_transport_offset(skb) / 2; + if (swp_spec->l4_proto == IPPROTO_UDP) + eseg->swp_flags |= MLX5_ETH_WQE_SWP_OUTER_L4_UDP; + } + + if (swp_spec->is_tun) { + eseg->swp_inner_l3_offset = skb_inner_network_offset(skb) / 2; + if (swp_spec->tun_l3_proto == htons(ETH_P_IPV6)) + eseg->swp_flags |= MLX5_ETH_WQE_SWP_INNER_L3_IPV6; + } else { /* typically for ipsec when xfrm mode != XFRM_MODE_TUNNEL */ + eseg->swp_inner_l3_offset = skb_network_offset(skb) / 2; + if (swp_spec->l3_proto == htons(ETH_P_IPV6)) + eseg->swp_flags |= MLX5_ETH_WQE_SWP_INNER_L3_IPV6; + } + switch (swp_spec->tun_l4_proto) { + case IPPROTO_UDP: + eseg->swp_flags |= MLX5_ETH_WQE_SWP_INNER_L4_UDP; + fallthrough; + case IPPROTO_TCP: + eseg->swp_inner_l4_offset = skb_inner_transport_offset(skb) / 2; + break; + } +} + +#define MLX5E_STOP_ROOM(wqebbs) ((wqebbs) * 2 - 1) + +static inline u16 mlx5e_stop_room_for_wqe(struct mlx5_core_dev *mdev, u16 wqe_size) +{ + WARN_ON_ONCE(PAGE_SIZE / MLX5_SEND_WQE_BB < mlx5e_get_max_sq_wqebbs(mdev)); + + /* A WQE must not cross the page boundary, hence two conditions: + * 1. Its size must not exceed the page size. + * 2. If the WQE size is X, and the space remaining in a page is less + * than X, this space needs to be padded with NOPs. So, one WQE of + * size X may require up to X-1 WQEBBs of padding, which makes the + * stop room of X-1 + X. + * WQE size is also limited by the hardware limit. + */ + WARN_ONCE(wqe_size > mlx5e_get_max_sq_wqebbs(mdev), + "wqe_size %u is greater than max SQ WQEBBs %u", + wqe_size, mlx5e_get_max_sq_wqebbs(mdev)); + + return MLX5E_STOP_ROOM(wqe_size); +} + +static inline u16 mlx5e_stop_room_for_max_wqe(struct mlx5_core_dev *mdev) +{ + return MLX5E_STOP_ROOM(mlx5e_get_max_sq_wqebbs(mdev)); +} + +static inline u16 mlx5e_stop_room_for_mpwqe(struct mlx5_core_dev *mdev) +{ + u8 mpwqe_wqebbs = mlx5e_get_max_sq_aligned_wqebbs(mdev); + + return mlx5e_stop_room_for_wqe(mdev, mpwqe_wqebbs); +} + +static inline bool mlx5e_icosq_can_post_wqe(struct mlx5e_icosq *sq, u16 wqe_size) +{ + u16 room = sq->reserved_room + MLX5E_STOP_ROOM(wqe_size); + + return mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, room); +} + +static inline struct mlx5e_mpw_info *mlx5e_get_mpw_info(struct mlx5e_rq *rq, int i) +{ + size_t isz = struct_size(rq->mpwqe.info, alloc_units, rq->mpwqe.pages_per_wqe); + + return (struct mlx5e_mpw_info *)((char *)rq->mpwqe.info + array_size(i, isz)); +} +#endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c new file mode 100644 index 000000000..20507ef2f --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c @@ -0,0 +1,691 @@ +/* + * Copyright (c) 2018, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/bpf_trace.h> +#include <net/xdp_sock_drv.h> +#include "en/xdp.h" +#include "en/params.h" + +int mlx5e_xdp_max_mtu(struct mlx5e_params *params, struct mlx5e_xsk_param *xsk) +{ + int hr = mlx5e_get_linear_rq_headroom(params, xsk); + + /* Let S := SKB_DATA_ALIGN(sizeof(struct skb_shared_info)). + * The condition checked in mlx5e_rx_is_linear_skb is: + * SKB_DATA_ALIGN(sw_mtu + hard_mtu + hr) + S <= PAGE_SIZE (1) + * (Note that hw_mtu == sw_mtu + hard_mtu.) + * What is returned from this function is: + * max_mtu = PAGE_SIZE - S - hr - hard_mtu (2) + * After assigning sw_mtu := max_mtu, the left side of (1) turns to + * SKB_DATA_ALIGN(PAGE_SIZE - S) + S, which is equal to PAGE_SIZE, + * because both PAGE_SIZE and S are already aligned. Any number greater + * than max_mtu would make the left side of (1) greater than PAGE_SIZE, + * so max_mtu is the maximum MTU allowed. + */ + + return MLX5E_HW2SW_MTU(params, SKB_MAX_HEAD(hr)); +} + +static inline bool +mlx5e_xmit_xdp_buff(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq, + struct page *page, struct xdp_buff *xdp) +{ + struct skb_shared_info *sinfo = NULL; + struct mlx5e_xmit_data xdptxd; + struct mlx5e_xdp_info xdpi; + struct xdp_frame *xdpf; + dma_addr_t dma_addr; + int i; + + xdpf = xdp_convert_buff_to_frame(xdp); + if (unlikely(!xdpf)) + return false; + + xdptxd.data = xdpf->data; + xdptxd.len = xdpf->len; + + if (xdp->rxq->mem.type == MEM_TYPE_XSK_BUFF_POOL) { + /* The xdp_buff was in the UMEM and was copied into a newly + * allocated page. The UMEM page was returned via the ZCA, and + * this new page has to be mapped at this point and has to be + * unmapped and returned via xdp_return_frame on completion. + */ + + /* Prevent double recycling of the UMEM page. Even in case this + * function returns false, the xdp_buff shouldn't be recycled, + * as it was already done in xdp_convert_zc_to_xdp_frame. + */ + __set_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags); /* non-atomic */ + + xdpi.mode = MLX5E_XDP_XMIT_MODE_FRAME; + + dma_addr = dma_map_single(sq->pdev, xdptxd.data, xdptxd.len, + DMA_TO_DEVICE); + if (dma_mapping_error(sq->pdev, dma_addr)) { + xdp_return_frame(xdpf); + return false; + } + + xdptxd.dma_addr = dma_addr; + xdpi.frame.xdpf = xdpf; + xdpi.frame.dma_addr = dma_addr; + + if (unlikely(!INDIRECT_CALL_2(sq->xmit_xdp_frame, mlx5e_xmit_xdp_frame_mpwqe, + mlx5e_xmit_xdp_frame, sq, &xdptxd, NULL, 0))) + return false; + + mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, &xdpi); + return true; + } + + /* Driver assumes that xdp_convert_buff_to_frame returns an xdp_frame + * that points to the same memory region as the original xdp_buff. It + * allows to map the memory only once and to use the DMA_BIDIRECTIONAL + * mode. + */ + + xdpi.mode = MLX5E_XDP_XMIT_MODE_PAGE; + xdpi.page.rq = rq; + + dma_addr = page_pool_get_dma_addr(page) + (xdpf->data - (void *)xdpf); + dma_sync_single_for_device(sq->pdev, dma_addr, xdptxd.len, DMA_BIDIRECTIONAL); + + if (unlikely(xdp_frame_has_frags(xdpf))) { + sinfo = xdp_get_shared_info_from_frame(xdpf); + + for (i = 0; i < sinfo->nr_frags; i++) { + skb_frag_t *frag = &sinfo->frags[i]; + dma_addr_t addr; + u32 len; + + addr = page_pool_get_dma_addr(skb_frag_page(frag)) + + skb_frag_off(frag); + len = skb_frag_size(frag); + dma_sync_single_for_device(sq->pdev, addr, len, + DMA_BIDIRECTIONAL); + } + } + + xdptxd.dma_addr = dma_addr; + + if (unlikely(!INDIRECT_CALL_2(sq->xmit_xdp_frame, mlx5e_xmit_xdp_frame_mpwqe, + mlx5e_xmit_xdp_frame, sq, &xdptxd, sinfo, 0))) + return false; + + xdpi.page.page = page; + mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, &xdpi); + + if (unlikely(xdp_frame_has_frags(xdpf))) { + for (i = 0; i < sinfo->nr_frags; i++) { + skb_frag_t *frag = &sinfo->frags[i]; + + xdpi.page.page = skb_frag_page(frag); + mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, &xdpi); + } + } + + return true; +} + +/* returns true if packet was consumed by xdp */ +bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct page *page, + struct bpf_prog *prog, struct xdp_buff *xdp) +{ + u32 act; + int err; + + act = bpf_prog_run_xdp(prog, xdp); + switch (act) { + case XDP_PASS: + return false; + case XDP_TX: + if (unlikely(!mlx5e_xmit_xdp_buff(rq->xdpsq, rq, page, xdp))) + goto xdp_abort; + __set_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags); /* non-atomic */ + return true; + case XDP_REDIRECT: + /* When XDP enabled then page-refcnt==1 here */ + err = xdp_do_redirect(rq->netdev, xdp, prog); + if (unlikely(err)) + goto xdp_abort; + __set_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags); + __set_bit(MLX5E_RQ_FLAG_XDP_REDIRECT, rq->flags); + if (xdp->rxq->mem.type != MEM_TYPE_XSK_BUFF_POOL) + mlx5e_page_dma_unmap(rq, page); + rq->stats->xdp_redirect++; + return true; + default: + bpf_warn_invalid_xdp_action(rq->netdev, prog, act); + fallthrough; + case XDP_ABORTED: +xdp_abort: + trace_xdp_exception(rq->netdev, prog, act); + fallthrough; + case XDP_DROP: + rq->stats->xdp_drop++; + return true; + } +} + +static u16 mlx5e_xdpsq_get_next_pi(struct mlx5e_xdpsq *sq, u16 size) +{ + struct mlx5_wq_cyc *wq = &sq->wq; + u16 pi, contig_wqebbs; + + pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc); + contig_wqebbs = mlx5_wq_cyc_get_contig_wqebbs(wq, pi); + if (unlikely(contig_wqebbs < size)) { + struct mlx5e_xdp_wqe_info *wi, *edge_wi; + + wi = &sq->db.wqe_info[pi]; + edge_wi = wi + contig_wqebbs; + + /* Fill SQ frag edge with NOPs to avoid WQE wrapping two pages. */ + for (; wi < edge_wi; wi++) { + *wi = (struct mlx5e_xdp_wqe_info) { + .num_wqebbs = 1, + .num_pkts = 0, + }; + mlx5e_post_nop(wq, sq->sqn, &sq->pc); + } + sq->stats->nops += contig_wqebbs; + + pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc); + } + + return pi; +} + +static void mlx5e_xdp_mpwqe_session_start(struct mlx5e_xdpsq *sq) +{ + struct mlx5e_tx_mpwqe *session = &sq->mpwqe; + struct mlx5e_xdpsq_stats *stats = sq->stats; + struct mlx5e_tx_wqe *wqe; + u16 pi; + + pi = mlx5e_xdpsq_get_next_pi(sq, sq->max_sq_mpw_wqebbs); + wqe = MLX5E_TX_FETCH_WQE(sq, pi); + net_prefetchw(wqe->data); + + *session = (struct mlx5e_tx_mpwqe) { + .wqe = wqe, + .bytes_count = 0, + .ds_count = MLX5E_TX_WQE_EMPTY_DS_COUNT, + .pkt_count = 0, + .inline_on = mlx5e_xdp_get_inline_state(sq, session->inline_on), + }; + + stats->mpwqe++; +} + +void mlx5e_xdp_mpwqe_complete(struct mlx5e_xdpsq *sq) +{ + struct mlx5_wq_cyc *wq = &sq->wq; + struct mlx5e_tx_mpwqe *session = &sq->mpwqe; + struct mlx5_wqe_ctrl_seg *cseg = &session->wqe->ctrl; + u16 ds_count = session->ds_count; + u16 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc); + struct mlx5e_xdp_wqe_info *wi = &sq->db.wqe_info[pi]; + + cseg->opmod_idx_opcode = + cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_ENHANCED_MPSW); + cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_count); + + wi->num_wqebbs = DIV_ROUND_UP(ds_count, MLX5_SEND_WQEBB_NUM_DS); + wi->num_pkts = session->pkt_count; + + sq->pc += wi->num_wqebbs; + + sq->doorbell_cseg = cseg; + + session->wqe = NULL; /* Close session */ +} + +enum { + MLX5E_XDP_CHECK_OK = 1, + MLX5E_XDP_CHECK_START_MPWQE = 2, +}; + +INDIRECT_CALLABLE_SCOPE int mlx5e_xmit_xdp_frame_check_mpwqe(struct mlx5e_xdpsq *sq) +{ + if (unlikely(!sq->mpwqe.wqe)) { + if (unlikely(!mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, + sq->stop_room))) { + /* SQ is full, ring doorbell */ + mlx5e_xmit_xdp_doorbell(sq); + sq->stats->full++; + return -EBUSY; + } + + return MLX5E_XDP_CHECK_START_MPWQE; + } + + return MLX5E_XDP_CHECK_OK; +} + +INDIRECT_CALLABLE_SCOPE bool +mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptxd, + struct skb_shared_info *sinfo, int check_result); + +INDIRECT_CALLABLE_SCOPE bool +mlx5e_xmit_xdp_frame_mpwqe(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptxd, + struct skb_shared_info *sinfo, int check_result) +{ + struct mlx5e_tx_mpwqe *session = &sq->mpwqe; + struct mlx5e_xdpsq_stats *stats = sq->stats; + + if (unlikely(sinfo)) { + /* MPWQE is enabled, but a multi-buffer packet is queued for + * transmission. MPWQE can't send fragmented packets, so close + * the current session and fall back to a regular WQE. + */ + if (unlikely(sq->mpwqe.wqe)) + mlx5e_xdp_mpwqe_complete(sq); + return mlx5e_xmit_xdp_frame(sq, xdptxd, sinfo, 0); + } + + if (unlikely(xdptxd->len > sq->hw_mtu)) { + stats->err++; + return false; + } + + if (!check_result) + check_result = mlx5e_xmit_xdp_frame_check_mpwqe(sq); + if (unlikely(check_result < 0)) + return false; + + if (check_result == MLX5E_XDP_CHECK_START_MPWQE) { + /* Start the session when nothing can fail, so it's guaranteed + * that if there is an active session, it has at least one dseg, + * and it's safe to complete it at any time. + */ + mlx5e_xdp_mpwqe_session_start(sq); + } + + mlx5e_xdp_mpwqe_add_dseg(sq, xdptxd, stats); + + if (unlikely(mlx5e_xdp_mpwqe_is_full(session, sq->max_sq_mpw_wqebbs))) + mlx5e_xdp_mpwqe_complete(sq); + + stats->xmit++; + return true; +} + +static int mlx5e_xmit_xdp_frame_check_stop_room(struct mlx5e_xdpsq *sq, int stop_room) +{ + if (unlikely(!mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, stop_room))) { + /* SQ is full, ring doorbell */ + mlx5e_xmit_xdp_doorbell(sq); + sq->stats->full++; + return -EBUSY; + } + + return MLX5E_XDP_CHECK_OK; +} + +INDIRECT_CALLABLE_SCOPE int mlx5e_xmit_xdp_frame_check(struct mlx5e_xdpsq *sq) +{ + return mlx5e_xmit_xdp_frame_check_stop_room(sq, 1); +} + +INDIRECT_CALLABLE_SCOPE bool +mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptxd, + struct skb_shared_info *sinfo, int check_result) +{ + struct mlx5_wq_cyc *wq = &sq->wq; + struct mlx5_wqe_ctrl_seg *cseg; + struct mlx5_wqe_data_seg *dseg; + struct mlx5_wqe_eth_seg *eseg; + struct mlx5e_tx_wqe *wqe; + + dma_addr_t dma_addr = xdptxd->dma_addr; + u32 dma_len = xdptxd->len; + u16 ds_cnt, inline_hdr_sz; + u8 num_wqebbs = 1; + int num_frags = 0; + u16 pi; + + struct mlx5e_xdpsq_stats *stats = sq->stats; + + if (unlikely(dma_len < MLX5E_XDP_MIN_INLINE || sq->hw_mtu < dma_len)) { + stats->err++; + return false; + } + + ds_cnt = MLX5E_TX_WQE_EMPTY_DS_COUNT + 1; + if (sq->min_inline_mode != MLX5_INLINE_MODE_NONE) + ds_cnt++; + + /* check_result must be 0 if sinfo is passed. */ + if (!check_result) { + int stop_room = 1; + + if (unlikely(sinfo)) { + ds_cnt += sinfo->nr_frags; + num_frags = sinfo->nr_frags; + num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS); + /* Assuming MLX5_CAP_GEN(mdev, max_wqe_sz_sq) is big + * enough to hold all fragments. + */ + stop_room = MLX5E_STOP_ROOM(num_wqebbs); + } + + check_result = mlx5e_xmit_xdp_frame_check_stop_room(sq, stop_room); + } + if (unlikely(check_result < 0)) + return false; + + pi = mlx5e_xdpsq_get_next_pi(sq, num_wqebbs); + wqe = mlx5_wq_cyc_get_wqe(wq, pi); + net_prefetchw(wqe); + + cseg = &wqe->ctrl; + eseg = &wqe->eth; + dseg = wqe->data; + + inline_hdr_sz = 0; + + /* copy the inline part if required */ + if (sq->min_inline_mode != MLX5_INLINE_MODE_NONE) { + memcpy(eseg->inline_hdr.start, xdptxd->data, sizeof(eseg->inline_hdr.start)); + memcpy(dseg, xdptxd->data + sizeof(eseg->inline_hdr.start), + MLX5E_XDP_MIN_INLINE - sizeof(eseg->inline_hdr.start)); + dma_len -= MLX5E_XDP_MIN_INLINE; + dma_addr += MLX5E_XDP_MIN_INLINE; + inline_hdr_sz = MLX5E_XDP_MIN_INLINE; + dseg++; + } + + /* write the dma part */ + dseg->addr = cpu_to_be64(dma_addr); + dseg->byte_count = cpu_to_be32(dma_len); + + cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_SEND); + + if (unlikely(test_bit(MLX5E_SQ_STATE_XDP_MULTIBUF, &sq->state))) { + u8 num_pkts = 1 + num_frags; + int i; + + memset(&cseg->trailer, 0, sizeof(cseg->trailer)); + memset(eseg, 0, sizeof(*eseg) - sizeof(eseg->trailer)); + + eseg->inline_hdr.sz = cpu_to_be16(inline_hdr_sz); + dseg->lkey = sq->mkey_be; + + for (i = 0; i < num_frags; i++) { + skb_frag_t *frag = &sinfo->frags[i]; + dma_addr_t addr; + + addr = page_pool_get_dma_addr(skb_frag_page(frag)) + + skb_frag_off(frag); + + dseg++; + dseg->addr = cpu_to_be64(addr); + dseg->byte_count = cpu_to_be32(skb_frag_size(frag)); + dseg->lkey = sq->mkey_be; + } + + cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt); + + sq->db.wqe_info[pi] = (struct mlx5e_xdp_wqe_info) { + .num_wqebbs = num_wqebbs, + .num_pkts = num_pkts, + }; + + sq->pc += num_wqebbs; + } else { + cseg->fm_ce_se = 0; + + sq->pc++; + } + + sq->doorbell_cseg = cseg; + + stats->xmit++; + return true; +} + +static void mlx5e_free_xdpsq_desc(struct mlx5e_xdpsq *sq, + struct mlx5e_xdp_wqe_info *wi, + u32 *xsk_frames, + bool recycle, + struct xdp_frame_bulk *bq) +{ + struct mlx5e_xdp_info_fifo *xdpi_fifo = &sq->db.xdpi_fifo; + u16 i; + + for (i = 0; i < wi->num_pkts; i++) { + struct mlx5e_xdp_info xdpi = mlx5e_xdpi_fifo_pop(xdpi_fifo); + + switch (xdpi.mode) { + case MLX5E_XDP_XMIT_MODE_FRAME: + /* XDP_TX from the XSK RQ and XDP_REDIRECT */ + dma_unmap_single(sq->pdev, xdpi.frame.dma_addr, + xdpi.frame.xdpf->len, DMA_TO_DEVICE); + xdp_return_frame_bulk(xdpi.frame.xdpf, bq); + break; + case MLX5E_XDP_XMIT_MODE_PAGE: + /* XDP_TX from the regular RQ */ + mlx5e_page_release_dynamic(xdpi.page.rq, xdpi.page.page, recycle); + break; + case MLX5E_XDP_XMIT_MODE_XSK: + /* AF_XDP send */ + (*xsk_frames)++; + break; + default: + WARN_ON_ONCE(true); + } + } +} + +bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq) +{ + struct xdp_frame_bulk bq; + struct mlx5e_xdpsq *sq; + struct mlx5_cqe64 *cqe; + u32 xsk_frames = 0; + u16 sqcc; + int i; + + xdp_frame_bulk_init(&bq); + + sq = container_of(cq, struct mlx5e_xdpsq, cq); + + if (unlikely(!test_bit(MLX5E_SQ_STATE_ENABLED, &sq->state))) + return false; + + cqe = mlx5_cqwq_get_cqe(&cq->wq); + if (!cqe) + return false; + + /* sq->cc must be updated only after mlx5_cqwq_update_db_record(), + * otherwise a cq overrun may occur + */ + sqcc = sq->cc; + + i = 0; + do { + struct mlx5e_xdp_wqe_info *wi; + u16 wqe_counter, ci; + bool last_wqe; + + mlx5_cqwq_pop(&cq->wq); + + wqe_counter = be16_to_cpu(cqe->wqe_counter); + + do { + last_wqe = (sqcc == wqe_counter); + ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sqcc); + wi = &sq->db.wqe_info[ci]; + + sqcc += wi->num_wqebbs; + + mlx5e_free_xdpsq_desc(sq, wi, &xsk_frames, true, &bq); + } while (!last_wqe); + + if (unlikely(get_cqe_opcode(cqe) != MLX5_CQE_REQ)) { + netdev_WARN_ONCE(sq->channel->netdev, + "Bad OP in XDPSQ CQE: 0x%x\n", + get_cqe_opcode(cqe)); + mlx5e_dump_error_cqe(&sq->cq, sq->sqn, + (struct mlx5_err_cqe *)cqe); + mlx5_wq_cyc_wqe_dump(&sq->wq, ci, wi->num_wqebbs); + } + } while ((++i < MLX5E_TX_CQ_POLL_BUDGET) && (cqe = mlx5_cqwq_get_cqe(&cq->wq))); + + xdp_flush_frame_bulk(&bq); + + if (xsk_frames) + xsk_tx_completed(sq->xsk_pool, xsk_frames); + + sq->stats->cqes += i; + + mlx5_cqwq_update_db_record(&cq->wq); + + /* ensure cq space is freed before enabling more cqes */ + wmb(); + + sq->cc = sqcc; + return (i == MLX5E_TX_CQ_POLL_BUDGET); +} + +void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq) +{ + struct xdp_frame_bulk bq; + u32 xsk_frames = 0; + + xdp_frame_bulk_init(&bq); + + rcu_read_lock(); /* need for xdp_return_frame_bulk */ + + while (sq->cc != sq->pc) { + struct mlx5e_xdp_wqe_info *wi; + u16 ci; + + ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sq->cc); + wi = &sq->db.wqe_info[ci]; + + sq->cc += wi->num_wqebbs; + + mlx5e_free_xdpsq_desc(sq, wi, &xsk_frames, false, &bq); + } + + xdp_flush_frame_bulk(&bq); + rcu_read_unlock(); + + if (xsk_frames) + xsk_tx_completed(sq->xsk_pool, xsk_frames); +} + +int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames, + u32 flags) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5e_xdpsq *sq; + int nxmit = 0; + int sq_num; + int i; + + /* this flag is sufficient, no need to test internal sq state */ + if (unlikely(!mlx5e_xdp_tx_is_enabled(priv))) + return -ENETDOWN; + + if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) + return -EINVAL; + + sq_num = smp_processor_id(); + + if (unlikely(sq_num >= priv->channels.num)) + return -ENXIO; + + sq = &priv->channels.c[sq_num]->xdpsq; + + for (i = 0; i < n; i++) { + struct xdp_frame *xdpf = frames[i]; + struct mlx5e_xmit_data xdptxd; + struct mlx5e_xdp_info xdpi; + bool ret; + + xdptxd.data = xdpf->data; + xdptxd.len = xdpf->len; + xdptxd.dma_addr = dma_map_single(sq->pdev, xdptxd.data, + xdptxd.len, DMA_TO_DEVICE); + + if (unlikely(dma_mapping_error(sq->pdev, xdptxd.dma_addr))) + break; + + xdpi.mode = MLX5E_XDP_XMIT_MODE_FRAME; + xdpi.frame.xdpf = xdpf; + xdpi.frame.dma_addr = xdptxd.dma_addr; + + ret = INDIRECT_CALL_2(sq->xmit_xdp_frame, mlx5e_xmit_xdp_frame_mpwqe, + mlx5e_xmit_xdp_frame, sq, &xdptxd, NULL, 0); + if (unlikely(!ret)) { + dma_unmap_single(sq->pdev, xdptxd.dma_addr, + xdptxd.len, DMA_TO_DEVICE); + break; + } + mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, &xdpi); + nxmit++; + } + + if (flags & XDP_XMIT_FLUSH) { + if (sq->mpwqe.wqe) + mlx5e_xdp_mpwqe_complete(sq); + mlx5e_xmit_xdp_doorbell(sq); + } + + return nxmit; +} + +void mlx5e_xdp_rx_poll_complete(struct mlx5e_rq *rq) +{ + struct mlx5e_xdpsq *xdpsq = rq->xdpsq; + + if (xdpsq->mpwqe.wqe) + mlx5e_xdp_mpwqe_complete(xdpsq); + + mlx5e_xmit_xdp_doorbell(xdpsq); + + if (test_bit(MLX5E_RQ_FLAG_XDP_REDIRECT, rq->flags)) { + xdp_do_flush_map(); + __clear_bit(MLX5E_RQ_FLAG_XDP_REDIRECT, rq->flags); + } +} + +void mlx5e_set_xmit_fp(struct mlx5e_xdpsq *sq, bool is_mpw) +{ + sq->xmit_xdp_frame_check = is_mpw ? + mlx5e_xmit_xdp_frame_check_mpwqe : mlx5e_xmit_xdp_frame_check; + sq->xmit_xdp_frame = is_mpw ? + mlx5e_xmit_xdp_frame_mpwqe : mlx5e_xmit_xdp_frame; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h new file mode 100644 index 000000000..bc2d9034a --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h @@ -0,0 +1,186 @@ +/* + * Copyright (c) 2018, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __MLX5_EN_XDP_H__ +#define __MLX5_EN_XDP_H__ + +#include <linux/indirect_call_wrapper.h> + +#include "en.h" +#include "en/txrx.h" + +#define MLX5E_XDP_MIN_INLINE (ETH_HLEN + VLAN_HLEN) + +#define MLX5E_XDP_INLINE_WQE_MAX_DS_CNT 16 +#define MLX5E_XDP_INLINE_WQE_SZ_THRSD \ + (MLX5E_XDP_INLINE_WQE_MAX_DS_CNT * MLX5_SEND_WQE_DS - \ + sizeof(struct mlx5_wqe_inline_seg)) + +struct mlx5e_xsk_param; +int mlx5e_xdp_max_mtu(struct mlx5e_params *params, struct mlx5e_xsk_param *xsk); +bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct page *page, + struct bpf_prog *prog, struct xdp_buff *xdp); +void mlx5e_xdp_mpwqe_complete(struct mlx5e_xdpsq *sq); +bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq); +void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq); +void mlx5e_set_xmit_fp(struct mlx5e_xdpsq *sq, bool is_mpw); +void mlx5e_xdp_rx_poll_complete(struct mlx5e_rq *rq); +int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames, + u32 flags); + +INDIRECT_CALLABLE_DECLARE(bool mlx5e_xmit_xdp_frame_mpwqe(struct mlx5e_xdpsq *sq, + struct mlx5e_xmit_data *xdptxd, + struct skb_shared_info *sinfo, + int check_result)); +INDIRECT_CALLABLE_DECLARE(bool mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, + struct mlx5e_xmit_data *xdptxd, + struct skb_shared_info *sinfo, + int check_result)); +INDIRECT_CALLABLE_DECLARE(int mlx5e_xmit_xdp_frame_check_mpwqe(struct mlx5e_xdpsq *sq)); +INDIRECT_CALLABLE_DECLARE(int mlx5e_xmit_xdp_frame_check(struct mlx5e_xdpsq *sq)); + +static inline void mlx5e_xdp_tx_enable(struct mlx5e_priv *priv) +{ + set_bit(MLX5E_STATE_XDP_TX_ENABLED, &priv->state); + + if (priv->channels.params.xdp_prog) + set_bit(MLX5E_STATE_XDP_ACTIVE, &priv->state); +} + +static inline void mlx5e_xdp_tx_disable(struct mlx5e_priv *priv) +{ + if (priv->channels.params.xdp_prog) + clear_bit(MLX5E_STATE_XDP_ACTIVE, &priv->state); + + clear_bit(MLX5E_STATE_XDP_TX_ENABLED, &priv->state); + /* Let other device's napi(s) and XSK wakeups see our new state. */ + synchronize_net(); +} + +static inline bool mlx5e_xdp_tx_is_enabled(struct mlx5e_priv *priv) +{ + return test_bit(MLX5E_STATE_XDP_TX_ENABLED, &priv->state); +} + +static inline bool mlx5e_xdp_is_active(struct mlx5e_priv *priv) +{ + return test_bit(MLX5E_STATE_XDP_ACTIVE, &priv->state); +} + +static inline void mlx5e_xmit_xdp_doorbell(struct mlx5e_xdpsq *sq) +{ + if (sq->doorbell_cseg) { + mlx5e_notify_hw(&sq->wq, sq->pc, sq->uar_map, sq->doorbell_cseg); + sq->doorbell_cseg = NULL; + } +} + +/* Enable inline WQEs to shift some load from a congested HCA (HW) to + * a less congested cpu (SW). + */ +static inline bool mlx5e_xdp_get_inline_state(struct mlx5e_xdpsq *sq, bool cur) +{ + u16 outstanding = sq->xdpi_fifo_pc - sq->xdpi_fifo_cc; + +#define MLX5E_XDP_INLINE_WATERMARK_LOW 10 +#define MLX5E_XDP_INLINE_WATERMARK_HIGH 128 + + if (cur && outstanding <= MLX5E_XDP_INLINE_WATERMARK_LOW) + return false; + + if (!cur && outstanding >= MLX5E_XDP_INLINE_WATERMARK_HIGH) + return true; + + return cur; +} + +static inline bool mlx5e_xdp_mpwqe_is_full(struct mlx5e_tx_mpwqe *session, u8 max_sq_mpw_wqebbs) +{ + if (session->inline_on) + return session->ds_count + MLX5E_XDP_INLINE_WQE_MAX_DS_CNT > + max_sq_mpw_wqebbs * MLX5_SEND_WQEBB_NUM_DS; + + return mlx5e_tx_mpwqe_is_full(session, max_sq_mpw_wqebbs); +} + +struct mlx5e_xdp_wqe_info { + u8 num_wqebbs; + u8 num_pkts; +}; + +static inline void +mlx5e_xdp_mpwqe_add_dseg(struct mlx5e_xdpsq *sq, + struct mlx5e_xmit_data *xdptxd, + struct mlx5e_xdpsq_stats *stats) +{ + struct mlx5e_tx_mpwqe *session = &sq->mpwqe; + struct mlx5_wqe_data_seg *dseg = + (struct mlx5_wqe_data_seg *)session->wqe + session->ds_count; + u32 dma_len = xdptxd->len; + + session->pkt_count++; + session->bytes_count += dma_len; + + if (session->inline_on && dma_len <= MLX5E_XDP_INLINE_WQE_SZ_THRSD) { + struct mlx5_wqe_inline_seg *inline_dseg = + (struct mlx5_wqe_inline_seg *)dseg; + u16 ds_len = sizeof(*inline_dseg) + dma_len; + u16 ds_cnt = DIV_ROUND_UP(ds_len, MLX5_SEND_WQE_DS); + + inline_dseg->byte_count = cpu_to_be32(dma_len | MLX5_INLINE_SEG); + memcpy(inline_dseg->data, xdptxd->data, dma_len); + + session->ds_count += ds_cnt; + stats->inlnw++; + return; + } + + dseg->addr = cpu_to_be64(xdptxd->dma_addr); + dseg->byte_count = cpu_to_be32(dma_len); + dseg->lkey = sq->mkey_be; + session->ds_count++; +} + +static inline void +mlx5e_xdpi_fifo_push(struct mlx5e_xdp_info_fifo *fifo, + struct mlx5e_xdp_info *xi) +{ + u32 i = (*fifo->pc)++ & fifo->mask; + + fifo->xi[i] = *xi; +} + +static inline struct mlx5e_xdp_info +mlx5e_xdpi_fifo_pop(struct mlx5e_xdp_info_fifo *fifo) +{ + return fifo->xi[(*fifo->cc)++ & fifo->mask]; +} +#endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.c new file mode 100644 index 000000000..ebada0c5a --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.c @@ -0,0 +1,230 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2019-2020, Mellanox Technologies inc. All rights reserved. */ + +#include <net/xdp_sock_drv.h> +#include "pool.h" +#include "setup.h" +#include "en/params.h" + +static int mlx5e_xsk_map_pool(struct mlx5e_priv *priv, + struct xsk_buff_pool *pool) +{ + struct device *dev = mlx5_core_dma_dev(priv->mdev); + + return xsk_pool_dma_map(pool, dev, DMA_ATTR_SKIP_CPU_SYNC); +} + +static void mlx5e_xsk_unmap_pool(struct mlx5e_priv *priv, + struct xsk_buff_pool *pool) +{ + return xsk_pool_dma_unmap(pool, DMA_ATTR_SKIP_CPU_SYNC); +} + +static int mlx5e_xsk_get_pools(struct mlx5e_xsk *xsk) +{ + if (!xsk->pools) { + xsk->pools = kcalloc(MLX5E_MAX_NUM_CHANNELS, + sizeof(*xsk->pools), GFP_KERNEL); + if (unlikely(!xsk->pools)) + return -ENOMEM; + } + + xsk->refcnt++; + xsk->ever_used = true; + + return 0; +} + +static void mlx5e_xsk_put_pools(struct mlx5e_xsk *xsk) +{ + if (!--xsk->refcnt) { + kfree(xsk->pools); + xsk->pools = NULL; + } +} + +static int mlx5e_xsk_add_pool(struct mlx5e_xsk *xsk, struct xsk_buff_pool *pool, u16 ix) +{ + int err; + + err = mlx5e_xsk_get_pools(xsk); + if (unlikely(err)) + return err; + + xsk->pools[ix] = pool; + return 0; +} + +static void mlx5e_xsk_remove_pool(struct mlx5e_xsk *xsk, u16 ix) +{ + xsk->pools[ix] = NULL; + + mlx5e_xsk_put_pools(xsk); +} + +static bool mlx5e_xsk_is_pool_sane(struct xsk_buff_pool *pool) +{ + return xsk_pool_get_headroom(pool) <= 0xffff && + xsk_pool_get_chunk_size(pool) <= 0xffff; +} + +void mlx5e_build_xsk_param(struct xsk_buff_pool *pool, struct mlx5e_xsk_param *xsk) +{ + xsk->headroom = xsk_pool_get_headroom(pool); + xsk->chunk_size = xsk_pool_get_chunk_size(pool); + xsk->unaligned = pool->unaligned; +} + +static int mlx5e_xsk_enable_locked(struct mlx5e_priv *priv, + struct xsk_buff_pool *pool, u16 ix) +{ + struct mlx5e_params *params = &priv->channels.params; + struct mlx5e_xsk_param xsk; + struct mlx5e_channel *c; + int err; + + if (unlikely(mlx5e_xsk_get_pool(&priv->channels.params, &priv->xsk, ix))) + return -EBUSY; + + if (unlikely(!mlx5e_xsk_is_pool_sane(pool))) + return -EINVAL; + + err = mlx5e_xsk_map_pool(priv, pool); + if (unlikely(err)) + return err; + + err = mlx5e_xsk_add_pool(&priv->xsk, pool, ix); + if (unlikely(err)) + goto err_unmap_pool; + + mlx5e_build_xsk_param(pool, &xsk); + + if (priv->channels.params.rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ && + mlx5e_mpwrq_umr_mode(priv->mdev, &xsk) == MLX5E_MPWRQ_UMR_MODE_OVERSIZED) { + const char *recommendation = is_power_of_2(xsk.chunk_size) ? + "Upgrade firmware" : "Disable striding RQ"; + + mlx5_core_warn(priv->mdev, "Expected slowdown with XSK frame size %u. %s for better performance.\n", + xsk.chunk_size, recommendation); + } + + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) { + /* XSK objects will be created on open. */ + goto validate_closed; + } + + if (!params->xdp_prog) { + /* XSK objects will be created when an XDP program is set, + * and the channels are reopened. + */ + goto validate_closed; + } + + c = priv->channels.c[ix]; + + err = mlx5e_open_xsk(priv, params, &xsk, pool, c); + if (unlikely(err)) + goto err_remove_pool; + + mlx5e_activate_xsk(c); + mlx5e_trigger_napi_icosq(c); + + /* Don't wait for WQEs, because the newer xdpsock sample doesn't provide + * any Fill Ring entries at the setup stage. + */ + + mlx5e_rx_res_xsk_update(priv->rx_res, &priv->channels, ix, true); + + mlx5e_deactivate_rq(&c->rq); + mlx5e_flush_rq(&c->rq, MLX5_RQC_STATE_RDY); + + return 0; + +err_remove_pool: + mlx5e_xsk_remove_pool(&priv->xsk, ix); + +err_unmap_pool: + mlx5e_xsk_unmap_pool(priv, pool); + + return err; + +validate_closed: + /* Check the configuration in advance, rather than fail at a later stage + * (in mlx5e_xdp_set or on open) and end up with no channels. + */ + if (!mlx5e_validate_xsk_param(params, &xsk, priv->mdev)) { + err = -EINVAL; + goto err_remove_pool; + } + + return 0; +} + +static int mlx5e_xsk_disable_locked(struct mlx5e_priv *priv, u16 ix) +{ + struct xsk_buff_pool *pool = mlx5e_xsk_get_pool(&priv->channels.params, + &priv->xsk, ix); + struct mlx5e_channel *c; + + if (unlikely(!pool)) + return -EINVAL; + + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) + goto remove_pool; + + /* XSK RQ and SQ are only created if XDP program is set. */ + if (!priv->channels.params.xdp_prog) + goto remove_pool; + + c = priv->channels.c[ix]; + + mlx5e_activate_rq(&c->rq); + mlx5e_trigger_napi_icosq(c); + mlx5e_wait_for_min_rx_wqes(&c->rq, MLX5E_RQ_WQES_TIMEOUT); + + mlx5e_rx_res_xsk_update(priv->rx_res, &priv->channels, ix, false); + + mlx5e_deactivate_xsk(c); + mlx5e_close_xsk(c); + +remove_pool: + mlx5e_xsk_remove_pool(&priv->xsk, ix); + mlx5e_xsk_unmap_pool(priv, pool); + + return 0; +} + +static int mlx5e_xsk_enable_pool(struct mlx5e_priv *priv, struct xsk_buff_pool *pool, + u16 ix) +{ + int err; + + mutex_lock(&priv->state_lock); + err = mlx5e_xsk_enable_locked(priv, pool, ix); + mutex_unlock(&priv->state_lock); + + return err; +} + +static int mlx5e_xsk_disable_pool(struct mlx5e_priv *priv, u16 ix) +{ + int err; + + mutex_lock(&priv->state_lock); + err = mlx5e_xsk_disable_locked(priv, ix); + mutex_unlock(&priv->state_lock); + + return err; +} + +int mlx5e_xsk_setup_pool(struct net_device *dev, struct xsk_buff_pool *pool, u16 qid) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5e_params *params = &priv->channels.params; + + if (unlikely(qid >= params->num_channels)) + return -EINVAL; + + return pool ? mlx5e_xsk_enable_pool(priv, pool, qid) : + mlx5e_xsk_disable_pool(priv, qid); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.h new file mode 100644 index 000000000..dca0010a0 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2019-2020, Mellanox Technologies inc. All rights reserved. */ + +#ifndef __MLX5_EN_XSK_POOL_H__ +#define __MLX5_EN_XSK_POOL_H__ + +#include "en.h" + +static inline struct xsk_buff_pool *mlx5e_xsk_get_pool(struct mlx5e_params *params, + struct mlx5e_xsk *xsk, u16 ix) +{ + if (!xsk || !xsk->pools) + return NULL; + + if (unlikely(ix >= params->num_channels)) + return NULL; + + return xsk->pools[ix]; +} + +struct mlx5e_xsk_param; +void mlx5e_build_xsk_param(struct xsk_buff_pool *pool, struct mlx5e_xsk_param *xsk); + +/* .ndo_bpf callback. */ +int mlx5e_xsk_setup_pool(struct net_device *dev, struct xsk_buff_pool *pool, u16 qid); + +#endif /* __MLX5_EN_XSK_POOL_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c new file mode 100644 index 000000000..c91b54d9f --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c @@ -0,0 +1,311 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2019 Mellanox Technologies. */ + +#include "rx.h" +#include "en/xdp.h" +#include <net/xdp_sock_drv.h> +#include <linux/filter.h> + +/* RX data path */ + +int mlx5e_xsk_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) +{ + struct mlx5e_mpw_info *wi = mlx5e_get_mpw_info(rq, ix); + struct mlx5e_icosq *icosq = rq->icosq; + struct mlx5_wq_cyc *wq = &icosq->wq; + struct mlx5e_umr_wqe *umr_wqe; + int batch, i; + u32 offset; /* 17-bit value with MTT. */ + u16 pi; + + if (unlikely(!xsk_buff_can_alloc(rq->xsk_pool, rq->mpwqe.pages_per_wqe))) + goto err; + + BUILD_BUG_ON(sizeof(wi->alloc_units[0]) != sizeof(wi->alloc_units[0].xsk)); + batch = xsk_buff_alloc_batch(rq->xsk_pool, (struct xdp_buff **)wi->alloc_units, + rq->mpwqe.pages_per_wqe); + + /* If batch < pages_per_wqe, either: + * 1. Some (or all) descriptors were invalid. + * 2. dma_need_sync is true, and it fell back to allocating one frame. + * In either case, try to continue allocating frames one by one, until + * the first error, which will mean there are no more valid descriptors. + */ + for (; batch < rq->mpwqe.pages_per_wqe; batch++) { + wi->alloc_units[batch].xsk = xsk_buff_alloc(rq->xsk_pool); + if (unlikely(!wi->alloc_units[batch].xsk)) + goto err_reuse_batch; + } + + pi = mlx5e_icosq_get_next_pi(icosq, rq->mpwqe.umr_wqebbs); + umr_wqe = mlx5_wq_cyc_get_wqe(wq, pi); + memcpy(umr_wqe, &rq->mpwqe.umr_wqe, sizeof(struct mlx5e_umr_wqe)); + + if (likely(rq->mpwqe.umr_mode == MLX5E_MPWRQ_UMR_MODE_ALIGNED)) { + for (i = 0; i < batch; i++) { + dma_addr_t addr = xsk_buff_xdp_get_frame_dma(wi->alloc_units[i].xsk); + + umr_wqe->inline_mtts[i] = (struct mlx5_mtt) { + .ptag = cpu_to_be64(addr | MLX5_EN_WR), + }; + } + } else if (unlikely(rq->mpwqe.umr_mode == MLX5E_MPWRQ_UMR_MODE_UNALIGNED)) { + for (i = 0; i < batch; i++) { + dma_addr_t addr = xsk_buff_xdp_get_frame_dma(wi->alloc_units[i].xsk); + + umr_wqe->inline_ksms[i] = (struct mlx5_ksm) { + .key = rq->mkey_be, + .va = cpu_to_be64(addr), + }; + } + } else if (likely(rq->mpwqe.umr_mode == MLX5E_MPWRQ_UMR_MODE_TRIPLE)) { + u32 mapping_size = 1 << (rq->mpwqe.page_shift - 2); + + for (i = 0; i < batch; i++) { + dma_addr_t addr = xsk_buff_xdp_get_frame_dma(wi->alloc_units[i].xsk); + + umr_wqe->inline_ksms[i << 2] = (struct mlx5_ksm) { + .key = rq->mkey_be, + .va = cpu_to_be64(addr), + }; + umr_wqe->inline_ksms[(i << 2) + 1] = (struct mlx5_ksm) { + .key = rq->mkey_be, + .va = cpu_to_be64(addr + mapping_size), + }; + umr_wqe->inline_ksms[(i << 2) + 2] = (struct mlx5_ksm) { + .key = rq->mkey_be, + .va = cpu_to_be64(addr + mapping_size * 2), + }; + umr_wqe->inline_ksms[(i << 2) + 3] = (struct mlx5_ksm) { + .key = rq->mkey_be, + .va = cpu_to_be64(rq->wqe_overflow.addr), + }; + } + } else { + __be32 pad_size = cpu_to_be32((1 << rq->mpwqe.page_shift) - + rq->xsk_pool->chunk_size); + __be32 frame_size = cpu_to_be32(rq->xsk_pool->chunk_size); + + for (i = 0; i < batch; i++) { + dma_addr_t addr = xsk_buff_xdp_get_frame_dma(wi->alloc_units[i].xsk); + + umr_wqe->inline_klms[i << 1] = (struct mlx5_klm) { + .key = rq->mkey_be, + .va = cpu_to_be64(addr), + .bcount = frame_size, + }; + umr_wqe->inline_klms[(i << 1) + 1] = (struct mlx5_klm) { + .key = rq->mkey_be, + .va = cpu_to_be64(rq->wqe_overflow.addr), + .bcount = pad_size, + }; + } + } + + bitmap_zero(wi->xdp_xmit_bitmap, rq->mpwqe.pages_per_wqe); + wi->consumed_strides = 0; + + umr_wqe->ctrl.opmod_idx_opcode = + cpu_to_be32((icosq->pc << MLX5_WQE_CTRL_WQE_INDEX_SHIFT) | MLX5_OPCODE_UMR); + + /* Optimized for speed: keep in sync with mlx5e_mpwrq_umr_entry_size. */ + offset = ix * rq->mpwqe.mtts_per_wqe; + if (likely(rq->mpwqe.umr_mode == MLX5E_MPWRQ_UMR_MODE_ALIGNED)) + offset = offset * sizeof(struct mlx5_mtt) / MLX5_OCTWORD; + else if (unlikely(rq->mpwqe.umr_mode == MLX5E_MPWRQ_UMR_MODE_OVERSIZED)) + offset = offset * sizeof(struct mlx5_klm) * 2 / MLX5_OCTWORD; + else if (unlikely(rq->mpwqe.umr_mode == MLX5E_MPWRQ_UMR_MODE_TRIPLE)) + offset = offset * sizeof(struct mlx5_ksm) * 4 / MLX5_OCTWORD; + umr_wqe->uctrl.xlt_offset = cpu_to_be16(offset); + + icosq->db.wqe_info[pi] = (struct mlx5e_icosq_wqe_info) { + .wqe_type = MLX5E_ICOSQ_WQE_UMR_RX, + .num_wqebbs = rq->mpwqe.umr_wqebbs, + .umr.rq = rq, + }; + + icosq->pc += rq->mpwqe.umr_wqebbs; + + icosq->doorbell_cseg = &umr_wqe->ctrl; + + return 0; + +err_reuse_batch: + while (--batch >= 0) + xsk_buff_free(wi->alloc_units[batch].xsk); + +err: + rq->stats->buff_alloc_err++; + return -ENOMEM; +} + +int mlx5e_xsk_alloc_rx_wqes_batched(struct mlx5e_rq *rq, u16 ix, int wqe_bulk) +{ + struct mlx5_wq_cyc *wq = &rq->wqe.wq; + struct xdp_buff **buffs; + u32 contig, alloc; + int i; + + /* mlx5e_init_frags_partition creates a 1:1 mapping between + * rq->wqe.frags and rq->wqe.alloc_units, which allows us to + * allocate XDP buffers straight into alloc_units. + */ + BUILD_BUG_ON(sizeof(rq->wqe.alloc_units[0]) != + sizeof(rq->wqe.alloc_units[0].xsk)); + buffs = (struct xdp_buff **)rq->wqe.alloc_units; + contig = mlx5_wq_cyc_get_size(wq) - ix; + if (wqe_bulk <= contig) { + alloc = xsk_buff_alloc_batch(rq->xsk_pool, buffs + ix, wqe_bulk); + } else { + alloc = xsk_buff_alloc_batch(rq->xsk_pool, buffs + ix, contig); + if (likely(alloc == contig)) + alloc += xsk_buff_alloc_batch(rq->xsk_pool, buffs, wqe_bulk - contig); + } + + for (i = 0; i < alloc; i++) { + int j = mlx5_wq_cyc_ctr2ix(wq, ix + i); + struct mlx5e_wqe_frag_info *frag; + struct mlx5e_rx_wqe_cyc *wqe; + dma_addr_t addr; + + wqe = mlx5_wq_cyc_get_wqe(wq, j); + /* Assumes log_num_frags == 0. */ + frag = &rq->wqe.frags[j]; + + addr = xsk_buff_xdp_get_frame_dma(frag->au->xsk); + wqe->data[0].addr = cpu_to_be64(addr + rq->buff.headroom); + } + + return alloc; +} + +int mlx5e_xsk_alloc_rx_wqes(struct mlx5e_rq *rq, u16 ix, int wqe_bulk) +{ + struct mlx5_wq_cyc *wq = &rq->wqe.wq; + int i; + + for (i = 0; i < wqe_bulk; i++) { + int j = mlx5_wq_cyc_ctr2ix(wq, ix + i); + struct mlx5e_wqe_frag_info *frag; + struct mlx5e_rx_wqe_cyc *wqe; + dma_addr_t addr; + + wqe = mlx5_wq_cyc_get_wqe(wq, j); + /* Assumes log_num_frags == 0. */ + frag = &rq->wqe.frags[j]; + + frag->au->xsk = xsk_buff_alloc(rq->xsk_pool); + if (unlikely(!frag->au->xsk)) + return i; + + addr = xsk_buff_xdp_get_frame_dma(frag->au->xsk); + wqe->data[0].addr = cpu_to_be64(addr + rq->buff.headroom); + } + + return wqe_bulk; +} + +static struct sk_buff *mlx5e_xsk_construct_skb(struct mlx5e_rq *rq, struct xdp_buff *xdp) +{ + u32 totallen = xdp->data_end - xdp->data_meta; + u32 metalen = xdp->data - xdp->data_meta; + struct sk_buff *skb; + + skb = napi_alloc_skb(rq->cq.napi, totallen); + if (unlikely(!skb)) { + rq->stats->buff_alloc_err++; + return NULL; + } + + skb_put_data(skb, xdp->data_meta, totallen); + + if (metalen) { + skb_metadata_set(skb, metalen); + __skb_pull(skb, metalen); + } + + return skb; +} + +struct sk_buff *mlx5e_xsk_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, + struct mlx5e_mpw_info *wi, + u16 cqe_bcnt, + u32 head_offset, + u32 page_idx) +{ + struct xdp_buff *xdp = wi->alloc_units[page_idx].xsk; + struct bpf_prog *prog; + + /* Check packet size. Note LRO doesn't use linear SKB */ + if (unlikely(cqe_bcnt > rq->hw_mtu)) { + rq->stats->oversize_pkts_sw_drop++; + return NULL; + } + + /* head_offset is not used in this function, because xdp->data and the + * DMA address point directly to the necessary place. Furthermore, in + * the current implementation, UMR pages are mapped to XSK frames, so + * head_offset should always be 0. + */ + WARN_ON_ONCE(head_offset); + + xsk_buff_set_size(xdp, cqe_bcnt); + xsk_buff_dma_sync_for_cpu(xdp, rq->xsk_pool); + net_prefetch(xdp->data); + + /* Possible flows: + * - XDP_REDIRECT to XSKMAP: + * The page is owned by the userspace from now. + * - XDP_TX and other XDP_REDIRECTs: + * The page was returned by ZCA and recycled. + * - XDP_DROP: + * Recycle the page. + * - XDP_PASS: + * Allocate an SKB, copy the data and recycle the page. + * + * Pages to be recycled go to the Reuse Ring on MPWQE deallocation. Its + * size is the same as the Driver RX Ring's size, and pages for WQEs are + * allocated first from the Reuse Ring, so it has enough space. + */ + + prog = rcu_dereference(rq->xdp_prog); + if (likely(prog && mlx5e_xdp_handle(rq, NULL, prog, xdp))) { + if (likely(__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags))) + __set_bit(page_idx, wi->xdp_xmit_bitmap); /* non-atomic */ + return NULL; /* page/packet was consumed by XDP */ + } + + /* XDP_PASS: copy the data from the UMEM to a new SKB and reuse the + * frame. On SKB allocation failure, NULL is returned. + */ + return mlx5e_xsk_construct_skb(rq, xdp); +} + +struct sk_buff *mlx5e_xsk_skb_from_cqe_linear(struct mlx5e_rq *rq, + struct mlx5e_wqe_frag_info *wi, + u32 cqe_bcnt) +{ + struct xdp_buff *xdp = wi->au->xsk; + struct bpf_prog *prog; + + /* wi->offset is not used in this function, because xdp->data and the + * DMA address point directly to the necessary place. Furthermore, the + * XSK allocator allocates frames per packet, instead of pages, so + * wi->offset should always be 0. + */ + WARN_ON_ONCE(wi->offset); + + xsk_buff_set_size(xdp, cqe_bcnt); + xsk_buff_dma_sync_for_cpu(xdp, rq->xsk_pool); + net_prefetch(xdp->data); + + prog = rcu_dereference(rq->xdp_prog); + if (likely(prog && mlx5e_xdp_handle(rq, NULL, prog, xdp))) + return NULL; /* page/packet was consumed by XDP */ + + /* XDP_PASS: copy the data from the UMEM to a new SKB. The frame reuse + * will be handled by mlx5e_free_rx_wqe. + * On SKB allocation failure, NULL is returned. + */ + return mlx5e_xsk_construct_skb(rq, xdp); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h new file mode 100644 index 000000000..087c943bd --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2019 Mellanox Technologies. */ + +#ifndef __MLX5_EN_XSK_RX_H__ +#define __MLX5_EN_XSK_RX_H__ + +#include "en.h" + +/* RX data path */ + +int mlx5e_xsk_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix); +int mlx5e_xsk_alloc_rx_wqes_batched(struct mlx5e_rq *rq, u16 ix, int wqe_bulk); +int mlx5e_xsk_alloc_rx_wqes(struct mlx5e_rq *rq, u16 ix, int wqe_bulk); +struct sk_buff *mlx5e_xsk_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, + struct mlx5e_mpw_info *wi, + u16 cqe_bcnt, + u32 head_offset, + u32 page_idx); +struct sk_buff *mlx5e_xsk_skb_from_cqe_linear(struct mlx5e_rq *rq, + struct mlx5e_wqe_frag_info *wi, + u32 cqe_bcnt); + +#endif /* __MLX5_EN_XSK_RX_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c new file mode 100644 index 000000000..ff03c4383 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c @@ -0,0 +1,191 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2019 Mellanox Technologies. */ + +#include "setup.h" +#include "en/params.h" +#include "en/txrx.h" +#include "en/health.h" +#include <net/xdp_sock_drv.h> + +/* The limitation of 2048 can be altered, but shouldn't go beyond the minimal + * stride size of striding RQ. + */ +#define MLX5E_MIN_XSK_CHUNK_SIZE max(2048, XDP_UMEM_MIN_CHUNK_SIZE) + +bool mlx5e_validate_xsk_param(struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk, + struct mlx5_core_dev *mdev) +{ + /* AF_XDP doesn't support frames larger than PAGE_SIZE. */ + if (xsk->chunk_size > PAGE_SIZE || xsk->chunk_size < MLX5E_MIN_XSK_CHUNK_SIZE) + return false; + + /* frag_sz is different for regular and XSK RQs, so ensure that linear + * SKB mode is possible. + */ + switch (params->rq_wq_type) { + case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: + return !mlx5e_mpwrq_validate_xsk(mdev, params, xsk); + default: /* MLX5_WQ_TYPE_CYCLIC */ + return mlx5e_rx_is_linear_skb(mdev, params, xsk); + } +} + +static void mlx5e_build_xsk_cparam(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk, + u16 q_counter, + struct mlx5e_channel_param *cparam) +{ + mlx5e_build_rq_param(mdev, params, xsk, q_counter, &cparam->rq); + mlx5e_build_xdpsq_param(mdev, params, xsk, &cparam->xdp_sq); +} + +static int mlx5e_init_xsk_rq(struct mlx5e_channel *c, + struct mlx5e_params *params, + struct xsk_buff_pool *pool, + struct mlx5e_xsk_param *xsk, + struct mlx5e_rq *rq) +{ + struct mlx5_core_dev *mdev = c->mdev; + int rq_xdp_ix; + int err; + + rq->wq_type = params->rq_wq_type; + rq->pdev = c->pdev; + rq->netdev = c->netdev; + rq->priv = c->priv; + rq->tstamp = c->tstamp; + rq->clock = &mdev->clock; + rq->icosq = &c->icosq; + rq->ix = c->ix; + rq->channel = c; + rq->mdev = mdev; + rq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu); + rq->xdpsq = &c->rq_xdpsq; + rq->xsk_pool = pool; + rq->stats = &c->priv->channel_stats[c->ix]->xskrq; + rq->ptp_cyc2time = mlx5_rq_ts_translator(mdev); + rq_xdp_ix = c->ix; + err = mlx5e_rq_set_handlers(rq, params, xsk); + if (err) + return err; + + return xdp_rxq_info_reg(&rq->xdp_rxq, rq->netdev, rq_xdp_ix, 0); +} + +static int mlx5e_open_xsk_rq(struct mlx5e_channel *c, struct mlx5e_params *params, + struct mlx5e_rq_param *rq_params, struct xsk_buff_pool *pool, + struct mlx5e_xsk_param *xsk) +{ + int err; + + err = mlx5e_init_xsk_rq(c, params, pool, xsk, &c->xskrq); + if (err) + return err; + + return mlx5e_open_rq(params, rq_params, xsk, cpu_to_node(c->cpu), &c->xskrq); +} + +int mlx5e_open_xsk(struct mlx5e_priv *priv, struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk, struct xsk_buff_pool *pool, + struct mlx5e_channel *c) +{ + struct mlx5e_channel_param *cparam; + struct mlx5e_create_cq_param ccp; + int err; + + mlx5e_build_create_cq_param(&ccp, c); + + if (!mlx5e_validate_xsk_param(params, xsk, priv->mdev)) + return -EINVAL; + + cparam = kvzalloc(sizeof(*cparam), GFP_KERNEL); + if (!cparam) + return -ENOMEM; + + mlx5e_build_xsk_cparam(priv->mdev, params, xsk, priv->q_counter, cparam); + + err = mlx5e_open_cq(c->priv, params->rx_cq_moderation, &cparam->rq.cqp, &ccp, + &c->xskrq.cq); + if (unlikely(err)) + goto err_free_cparam; + + err = mlx5e_open_xsk_rq(c, params, &cparam->rq, pool, xsk); + if (unlikely(err)) + goto err_close_rx_cq; + + err = mlx5e_open_cq(c->priv, params->tx_cq_moderation, &cparam->xdp_sq.cqp, &ccp, + &c->xsksq.cq); + if (unlikely(err)) + goto err_close_rq; + + /* Create a separate SQ, so that when the buff pool is disabled, we could + * close this SQ safely and stop receiving CQEs. In other case, e.g., if + * the XDPSQ was used instead, we might run into trouble when the buff pool + * is disabled and then re-enabled, but the SQ continues receiving CQEs + * from the old buff pool. + */ + err = mlx5e_open_xdpsq(c, params, &cparam->xdp_sq, pool, &c->xsksq, true); + if (unlikely(err)) + goto err_close_tx_cq; + + kvfree(cparam); + + set_bit(MLX5E_CHANNEL_STATE_XSK, c->state); + + return 0; + +err_close_tx_cq: + mlx5e_close_cq(&c->xsksq.cq); + +err_close_rq: + mlx5e_close_rq(&c->xskrq); + +err_close_rx_cq: + mlx5e_close_cq(&c->xskrq.cq); + +err_free_cparam: + kvfree(cparam); + + return err; +} + +void mlx5e_close_xsk(struct mlx5e_channel *c) +{ + clear_bit(MLX5E_CHANNEL_STATE_XSK, c->state); + synchronize_net(); /* Sync with NAPI. */ + + mlx5e_close_rq(&c->xskrq); + mlx5e_close_cq(&c->xskrq.cq); + mlx5e_close_xdpsq(&c->xsksq); + mlx5e_close_cq(&c->xsksq.cq); + + memset(&c->xskrq, 0, sizeof(c->xskrq)); + memset(&c->xsksq, 0, sizeof(c->xsksq)); +} + +void mlx5e_activate_xsk(struct mlx5e_channel *c) +{ + /* ICOSQ recovery deactivates RQs. Suspend the recovery to avoid + * activating XSKRQ in the middle of recovery. + */ + mlx5e_reporter_icosq_suspend_recovery(c); + set_bit(MLX5E_RQ_STATE_ENABLED, &c->xskrq.state); + mlx5e_reporter_icosq_resume_recovery(c); + + /* TX queue is created active. */ +} + +void mlx5e_deactivate_xsk(struct mlx5e_channel *c) +{ + /* ICOSQ recovery may reactivate XSKRQ if clear_bit is called in the + * middle of recovery. Suspend the recovery to avoid it. + */ + mlx5e_reporter_icosq_suspend_recovery(c); + clear_bit(MLX5E_RQ_STATE_ENABLED, &c->xskrq.state); + mlx5e_reporter_icosq_resume_recovery(c); + synchronize_net(); /* Sync with NAPI to prevent mlx5e_post_rx_wqes. */ + + /* TX queue is disabled on close. */ +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.h new file mode 100644 index 000000000..50e111b85 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2019 Mellanox Technologies. */ + +#ifndef __MLX5_EN_XSK_SETUP_H__ +#define __MLX5_EN_XSK_SETUP_H__ + +#include "en.h" + +struct mlx5e_xsk_param; + +bool mlx5e_validate_xsk_param(struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk, + struct mlx5_core_dev *mdev); +int mlx5e_open_xsk(struct mlx5e_priv *priv, struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk, struct xsk_buff_pool *pool, + struct mlx5e_channel *c); +void mlx5e_close_xsk(struct mlx5e_channel *c); +void mlx5e_activate_xsk(struct mlx5e_channel *c); +void mlx5e_deactivate_xsk(struct mlx5e_channel *c); + +#endif /* __MLX5_EN_XSK_SETUP_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c new file mode 100644 index 000000000..367a9505c --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c @@ -0,0 +1,123 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2019 Mellanox Technologies. */ + +#include "tx.h" +#include "pool.h" +#include "en/xdp.h" +#include "en/params.h" +#include <net/xdp_sock_drv.h> + +int mlx5e_xsk_wakeup(struct net_device *dev, u32 qid, u32 flags) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5e_params *params = &priv->channels.params; + struct mlx5e_channel *c; + + if (unlikely(!mlx5e_xdp_is_active(priv))) + return -ENETDOWN; + + if (unlikely(qid >= params->num_channels)) + return -EINVAL; + + c = priv->channels.c[qid]; + + if (!napi_if_scheduled_mark_missed(&c->napi)) { + /* To avoid WQE overrun, don't post a NOP if async_icosq is not + * active and not polled by NAPI. Return 0, because the upcoming + * activate will trigger the IRQ for us. + */ + if (unlikely(!test_bit(MLX5E_SQ_STATE_ENABLED, &c->async_icosq.state))) + return 0; + + if (test_and_set_bit(MLX5E_SQ_STATE_PENDING_XSK_TX, &c->async_icosq.state)) + return 0; + + mlx5e_trigger_napi_icosq(c); + } + + return 0; +} + +/* When TX fails (because of the size of the packet), we need to get completions + * in order, so post a NOP to get a CQE. Since AF_XDP doesn't distinguish + * between successful TX and errors, handling in mlx5e_poll_xdpsq_cq is the + * same. + */ +static void mlx5e_xsk_tx_post_err(struct mlx5e_xdpsq *sq, + struct mlx5e_xdp_info *xdpi) +{ + u16 pi = mlx5_wq_cyc_ctr2ix(&sq->wq, sq->pc); + struct mlx5e_xdp_wqe_info *wi = &sq->db.wqe_info[pi]; + struct mlx5e_tx_wqe *nopwqe; + + wi->num_wqebbs = 1; + wi->num_pkts = 1; + + nopwqe = mlx5e_post_nop(&sq->wq, sq->sqn, &sq->pc); + mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, xdpi); + sq->doorbell_cseg = &nopwqe->ctrl; +} + +bool mlx5e_xsk_tx(struct mlx5e_xdpsq *sq, unsigned int budget) +{ + struct xsk_buff_pool *pool = sq->xsk_pool; + struct mlx5e_xmit_data xdptxd; + struct mlx5e_xdp_info xdpi; + bool work_done = true; + bool flush = false; + + xdpi.mode = MLX5E_XDP_XMIT_MODE_XSK; + + for (; budget; budget--) { + int check_result = INDIRECT_CALL_2(sq->xmit_xdp_frame_check, + mlx5e_xmit_xdp_frame_check_mpwqe, + mlx5e_xmit_xdp_frame_check, + sq); + struct xdp_desc desc; + bool ret; + + if (unlikely(check_result < 0)) { + work_done = false; + break; + } + + if (!xsk_tx_peek_desc(pool, &desc)) { + /* TX will get stuck until something wakes it up by + * triggering NAPI. Currently it's expected that the + * application calls sendto() if there are consumed, but + * not completed frames. + */ + break; + } + + xdptxd.dma_addr = xsk_buff_raw_get_dma(pool, desc.addr); + xdptxd.data = xsk_buff_raw_get_data(pool, desc.addr); + xdptxd.len = desc.len; + + xsk_buff_raw_dma_sync_for_device(pool, xdptxd.dma_addr, xdptxd.len); + + ret = INDIRECT_CALL_2(sq->xmit_xdp_frame, mlx5e_xmit_xdp_frame_mpwqe, + mlx5e_xmit_xdp_frame, sq, &xdptxd, NULL, + check_result); + if (unlikely(!ret)) { + if (sq->mpwqe.wqe) + mlx5e_xdp_mpwqe_complete(sq); + + mlx5e_xsk_tx_post_err(sq, &xdpi); + } else { + mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, &xdpi); + } + + flush = true; + } + + if (flush) { + if (sq->mpwqe.wqe) + mlx5e_xdp_mpwqe_complete(sq); + mlx5e_xmit_xdp_doorbell(sq); + + xsk_tx_release(pool); + } + + return !(budget && work_done); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.h new file mode 100644 index 000000000..9c505158b --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2019 Mellanox Technologies. */ + +#ifndef __MLX5_EN_XSK_TX_H__ +#define __MLX5_EN_XSK_TX_H__ + +#include "en.h" + +/* TX data path */ + +int mlx5e_xsk_wakeup(struct net_device *dev, u32 qid, u32 flags); + +bool mlx5e_xsk_tx(struct mlx5e_xdpsq *sq, unsigned int budget); + +#endif /* __MLX5_EN_XSK_TX_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h new file mode 100644 index 000000000..07187028f --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h @@ -0,0 +1,222 @@ +/* + * Copyright (c) 2018 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#ifndef __MLX5E_EN_ACCEL_H__ +#define __MLX5E_EN_ACCEL_H__ + +#include <linux/skbuff.h> +#include <linux/netdevice.h> +#include "en_accel/ipsec_rxtx.h" +#include "en_accel/ktls.h" +#include "en_accel/ktls_txrx.h" +#include <en_accel/macsec.h> +#include "en.h" +#include "en/txrx.h" + +#if IS_ENABLED(CONFIG_GENEVE) +#include <net/geneve.h> + +static inline bool mlx5_geneve_tx_allowed(struct mlx5_core_dev *mdev) +{ + return mlx5_tx_swp_supported(mdev); +} + +static inline void +mlx5e_tx_tunnel_accel(struct sk_buff *skb, struct mlx5_wqe_eth_seg *eseg, u16 ihs) +{ + struct mlx5e_swp_spec swp_spec = {}; + unsigned int offset = 0; + __be16 l3_proto; + u8 l4_proto; + + l3_proto = vlan_get_protocol(skb); + switch (l3_proto) { + case htons(ETH_P_IP): + l4_proto = ip_hdr(skb)->protocol; + break; + case htons(ETH_P_IPV6): + l4_proto = ipv6_find_hdr(skb, &offset, -1, NULL, NULL); + break; + default: + return; + } + + if (l4_proto != IPPROTO_UDP || + udp_hdr(skb)->dest != cpu_to_be16(GENEVE_UDP_PORT)) + return; + swp_spec.l3_proto = l3_proto; + swp_spec.l4_proto = l4_proto; + swp_spec.is_tun = true; + if (inner_ip_hdr(skb)->version == 6) { + swp_spec.tun_l3_proto = htons(ETH_P_IPV6); + swp_spec.tun_l4_proto = inner_ipv6_hdr(skb)->nexthdr; + } else { + swp_spec.tun_l3_proto = htons(ETH_P_IP); + swp_spec.tun_l4_proto = inner_ip_hdr(skb)->protocol; + } + + mlx5e_set_eseg_swp(skb, eseg, &swp_spec); + if (skb_vlan_tag_present(skb) && ihs) + mlx5e_eseg_swp_offsets_add_vlan(eseg); +} + +#else +static inline bool mlx5_geneve_tx_allowed(struct mlx5_core_dev *mdev) +{ + return false; +} + +#endif /* CONFIG_GENEVE */ + +static inline void +mlx5e_udp_gso_handle_tx_skb(struct sk_buff *skb) +{ + int payload_len = skb_shinfo(skb)->gso_size + sizeof(struct udphdr); + + udp_hdr(skb)->len = htons(payload_len); +} + +struct mlx5e_accel_tx_state { +#ifdef CONFIG_MLX5_EN_TLS + struct mlx5e_accel_tx_tls_state tls; +#endif +#ifdef CONFIG_MLX5_EN_IPSEC + struct mlx5e_accel_tx_ipsec_state ipsec; +#endif +}; + +static inline bool mlx5e_accel_tx_begin(struct net_device *dev, + struct mlx5e_txqsq *sq, + struct sk_buff *skb, + struct mlx5e_accel_tx_state *state) +{ + if (skb_is_gso(skb) && skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) + mlx5e_udp_gso_handle_tx_skb(skb); + +#ifdef CONFIG_MLX5_EN_TLS + /* May send SKBs and WQEs. */ + if (mlx5e_ktls_skb_offloaded(skb)) + if (unlikely(!mlx5e_ktls_handle_tx_skb(dev, sq, skb, + &state->tls))) + return false; +#endif + +#ifdef CONFIG_MLX5_EN_IPSEC + if (test_bit(MLX5E_SQ_STATE_IPSEC, &sq->state) && xfrm_offload(skb)) { + if (unlikely(!mlx5e_ipsec_handle_tx_skb(dev, skb, &state->ipsec))) + return false; + } +#endif + +#ifdef CONFIG_MLX5_EN_MACSEC + if (unlikely(mlx5e_macsec_skb_is_offload(skb))) { + struct mlx5e_priv *priv = netdev_priv(dev); + + if (unlikely(!mlx5e_macsec_handle_tx_skb(priv->macsec, skb))) + return false; + } +#endif + + return true; +} + +static inline unsigned int mlx5e_accel_tx_ids_len(struct mlx5e_txqsq *sq, + struct mlx5e_accel_tx_state *state) +{ +#ifdef CONFIG_MLX5_EN_IPSEC + if (test_bit(MLX5E_SQ_STATE_IPSEC, &sq->state)) + return mlx5e_ipsec_tx_ids_len(&state->ipsec); +#endif + + return 0; +} + +/* Part of the eseg touched by TX offloads */ +#define MLX5E_ACCEL_ESEG_LEN offsetof(struct mlx5_wqe_eth_seg, mss) + +static inline void mlx5e_accel_tx_eseg(struct mlx5e_priv *priv, + struct sk_buff *skb, + struct mlx5_wqe_eth_seg *eseg, u16 ihs) +{ +#ifdef CONFIG_MLX5_EN_IPSEC + if (xfrm_offload(skb)) + mlx5e_ipsec_tx_build_eseg(priv, skb, eseg); +#endif + +#ifdef CONFIG_MLX5_EN_MACSEC + if (unlikely(mlx5e_macsec_skb_is_offload(skb))) + mlx5e_macsec_tx_build_eseg(priv->macsec, skb, eseg); +#endif + +#if IS_ENABLED(CONFIG_GENEVE) + if (skb->encapsulation && skb->ip_summed == CHECKSUM_PARTIAL) + mlx5e_tx_tunnel_accel(skb, eseg, ihs); +#endif +} + +static inline void mlx5e_accel_tx_finish(struct mlx5e_txqsq *sq, + struct mlx5e_tx_wqe *wqe, + struct mlx5e_accel_tx_state *state, + struct mlx5_wqe_inline_seg *inlseg) +{ +#ifdef CONFIG_MLX5_EN_TLS + mlx5e_ktls_handle_tx_wqe(&wqe->ctrl, &state->tls); +#endif + +#ifdef CONFIG_MLX5_EN_IPSEC + if (test_bit(MLX5E_SQ_STATE_IPSEC, &sq->state) && + state->ipsec.xo && state->ipsec.tailen) + mlx5e_ipsec_handle_tx_wqe(wqe, &state->ipsec, inlseg); +#endif +} + +static inline int mlx5e_accel_init_rx(struct mlx5e_priv *priv) +{ + return mlx5e_ktls_init_rx(priv); +} + +static inline void mlx5e_accel_cleanup_rx(struct mlx5e_priv *priv) +{ + mlx5e_ktls_cleanup_rx(priv); +} + +static inline int mlx5e_accel_init_tx(struct mlx5e_priv *priv) +{ + return mlx5e_ktls_init_tx(priv); +} + +static inline void mlx5e_accel_cleanup_tx(struct mlx5e_priv *priv) +{ + mlx5e_ktls_cleanup_tx(priv); +} +#endif /* __MLX5E_EN_ACCEL_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c new file mode 100644 index 000000000..06c474049 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c @@ -0,0 +1,404 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2020, Mellanox Technologies inc. All rights reserved. */ + +#include <mlx5_core.h> +#include "en_accel/fs_tcp.h" +#include "fs_core.h" + +enum accel_fs_tcp_type { + ACCEL_FS_IPV4_TCP, + ACCEL_FS_IPV6_TCP, + ACCEL_FS_TCP_NUM_TYPES, +}; + +struct mlx5e_accel_fs_tcp { + struct mlx5e_flow_table tables[ACCEL_FS_TCP_NUM_TYPES]; + struct mlx5_flow_handle *default_rules[ACCEL_FS_TCP_NUM_TYPES]; +}; + +static enum mlx5_traffic_types fs_accel2tt(enum accel_fs_tcp_type i) +{ + switch (i) { + case ACCEL_FS_IPV4_TCP: + return MLX5_TT_IPV4_TCP; + default: /* ACCEL_FS_IPV6_TCP */ + return MLX5_TT_IPV6_TCP; + } +} + +static void accel_fs_tcp_set_ipv4_flow(struct mlx5_flow_spec *spec, struct sock *sk) +{ + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ip_protocol); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_protocol, IPPROTO_TCP); + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ip_version); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_version, 4); + memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_value, + outer_headers.src_ipv4_src_ipv6.ipv4_layout.ipv4), + &inet_sk(sk)->inet_daddr, 4); + memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_value, + outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4), + &inet_sk(sk)->inet_rcv_saddr, 4); + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, + outer_headers.src_ipv4_src_ipv6.ipv4_layout.ipv4); + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, + outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4); +} + +#if IS_ENABLED(CONFIG_IPV6) +static void accel_fs_tcp_set_ipv6_flow(struct mlx5_flow_spec *spec, struct sock *sk) +{ + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ip_protocol); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_protocol, IPPROTO_TCP); + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ip_version); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_version, 6); + memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_value, + outer_headers.src_ipv4_src_ipv6.ipv6_layout.ipv6), + &sk->sk_v6_daddr, 16); + memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_value, + outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6), + &inet6_sk(sk)->saddr, 16); + memset(MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + outer_headers.src_ipv4_src_ipv6.ipv6_layout.ipv6), + 0xff, 16); + memset(MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6), + 0xff, 16); +} +#endif + +void mlx5e_accel_fs_del_sk(struct mlx5_flow_handle *rule) +{ + mlx5_del_flow_rules(rule); +} + +struct mlx5_flow_handle *mlx5e_accel_fs_add_sk(struct mlx5e_flow_steering *fs, + struct sock *sk, u32 tirn, + uint32_t flow_tag) +{ + struct mlx5e_accel_fs_tcp *fs_tcp = mlx5e_fs_get_accel_tcp(fs); + struct mlx5_flow_destination dest = {}; + struct mlx5e_flow_table *ft = NULL; + MLX5_DECLARE_FLOW_ACT(flow_act); + struct mlx5_flow_handle *flow; + struct mlx5_flow_spec *spec; + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) + return ERR_PTR(-ENOMEM); + + spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + + switch (sk->sk_family) { + case AF_INET: + accel_fs_tcp_set_ipv4_flow(spec, sk); + ft = &fs_tcp->tables[ACCEL_FS_IPV4_TCP]; + fs_dbg(fs, "%s flow is %pI4:%d -> %pI4:%d\n", __func__, + &inet_sk(sk)->inet_rcv_saddr, + inet_sk(sk)->inet_sport, + &inet_sk(sk)->inet_daddr, + inet_sk(sk)->inet_dport); + break; +#if IS_ENABLED(CONFIG_IPV6) + case AF_INET6: + if (!ipv6_only_sock(sk) && + ipv6_addr_type(&sk->sk_v6_daddr) == IPV6_ADDR_MAPPED) { + accel_fs_tcp_set_ipv4_flow(spec, sk); + ft = &fs_tcp->tables[ACCEL_FS_IPV4_TCP]; + } else { + accel_fs_tcp_set_ipv6_flow(spec, sk); + ft = &fs_tcp->tables[ACCEL_FS_IPV6_TCP]; + } + break; +#endif + default: + break; + } + + if (!ft) { + flow = ERR_PTR(-EINVAL); + goto out; + } + + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, + outer_headers.tcp_dport); + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, + outer_headers.tcp_sport); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.tcp_dport, + ntohs(inet_sk(sk)->inet_sport)); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.tcp_sport, + ntohs(inet_sk(sk)->inet_dport)); + + dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR; + dest.tir_num = tirn; + if (flow_tag != MLX5_FS_DEFAULT_FLOW_TAG) { + spec->flow_context.flow_tag = flow_tag; + spec->flow_context.flags = FLOW_CONTEXT_HAS_TAG; + } + + flow = mlx5_add_flow_rules(ft->t, spec, &flow_act, &dest, 1); + + if (IS_ERR(flow)) + fs_err(fs, "mlx5_add_flow_rules() failed, flow is %ld\n", PTR_ERR(flow)); + +out: + kvfree(spec); + return flow; +} + +static int accel_fs_tcp_add_default_rule(struct mlx5e_flow_steering *fs, + enum accel_fs_tcp_type type) +{ + struct mlx5e_accel_fs_tcp *fs_tcp = mlx5e_fs_get_accel_tcp(fs); + struct mlx5_ttc_table *ttc = mlx5e_fs_get_ttc(fs, false); + struct mlx5e_flow_table *accel_fs_t; + struct mlx5_flow_destination dest; + MLX5_DECLARE_FLOW_ACT(flow_act); + struct mlx5_flow_handle *rule; + int err = 0; + + accel_fs_t = &fs_tcp->tables[type]; + + dest = mlx5_ttc_get_default_dest(ttc, fs_accel2tt(type)); + rule = mlx5_add_flow_rules(accel_fs_t->t, NULL, &flow_act, &dest, 1); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + fs_err(fs, "%s: add default rule failed, accel_fs type=%d, err %d\n", + __func__, type, err); + return err; + } + + fs_tcp->default_rules[type] = rule; + return 0; +} + +#define MLX5E_ACCEL_FS_TCP_NUM_GROUPS (2) +#define MLX5E_ACCEL_FS_TCP_GROUP1_SIZE (BIT(16) - 1) +#define MLX5E_ACCEL_FS_TCP_GROUP2_SIZE (BIT(0)) +#define MLX5E_ACCEL_FS_TCP_TABLE_SIZE (MLX5E_ACCEL_FS_TCP_GROUP1_SIZE +\ + MLX5E_ACCEL_FS_TCP_GROUP2_SIZE) +static int accel_fs_tcp_create_groups(struct mlx5e_flow_table *ft, + enum accel_fs_tcp_type type) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + void *outer_headers_c; + int ix = 0; + u32 *in; + int err; + u8 *mc; + + ft->g = kcalloc(MLX5E_ACCEL_FS_TCP_NUM_GROUPS, sizeof(*ft->g), GFP_KERNEL); + in = kvzalloc(inlen, GFP_KERNEL); + if (!in || !ft->g) { + kfree(ft->g); + ft->g = NULL; + kvfree(in); + return -ENOMEM; + } + + mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria); + outer_headers_c = MLX5_ADDR_OF(fte_match_param, mc, outer_headers); + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, ip_protocol); + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, ip_version); + + switch (type) { + case ACCEL_FS_IPV4_TCP: + case ACCEL_FS_IPV6_TCP: + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, tcp_dport); + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, tcp_sport); + break; + default: + err = -EINVAL; + goto out; + } + + switch (type) { + case ACCEL_FS_IPV4_TCP: + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, + src_ipv4_src_ipv6.ipv4_layout.ipv4); + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, + dst_ipv4_dst_ipv6.ipv4_layout.ipv4); + break; + case ACCEL_FS_IPV6_TCP: + memset(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c, + src_ipv4_src_ipv6.ipv6_layout.ipv6), + 0xff, 16); + memset(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c, + dst_ipv4_dst_ipv6.ipv6_layout.ipv6), + 0xff, 16); + break; + default: + err = -EINVAL; + goto out; + } + + MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5E_ACCEL_FS_TCP_GROUP1_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); + if (IS_ERR(ft->g[ft->num_groups])) + goto err; + ft->num_groups++; + + /* Default Flow Group */ + memset(in, 0, inlen); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5E_ACCEL_FS_TCP_GROUP2_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); + if (IS_ERR(ft->g[ft->num_groups])) + goto err; + ft->num_groups++; + + kvfree(in); + return 0; + +err: + err = PTR_ERR(ft->g[ft->num_groups]); + ft->g[ft->num_groups] = NULL; +out: + kvfree(in); + + return err; +} + +static int accel_fs_tcp_create_table(struct mlx5e_flow_steering *fs, enum accel_fs_tcp_type type) +{ + struct mlx5e_accel_fs_tcp *accel_tcp = mlx5e_fs_get_accel_tcp(fs); + struct mlx5_flow_namespace *ns = mlx5e_fs_get_ns(fs, false); + struct mlx5e_flow_table *ft = &accel_tcp->tables[type]; + struct mlx5_flow_table_attr ft_attr = {}; + int err; + + ft->num_groups = 0; + + ft_attr.max_fte = MLX5E_ACCEL_FS_TCP_TABLE_SIZE; + ft_attr.level = MLX5E_ACCEL_FS_TCP_FT_LEVEL; + ft_attr.prio = MLX5E_NIC_PRIO; + + ft->t = mlx5_create_flow_table(ns, &ft_attr); + if (IS_ERR(ft->t)) { + err = PTR_ERR(ft->t); + ft->t = NULL; + return err; + } + + fs_dbg(fs, "Created fs accel table id %u level %u\n", + ft->t->id, ft->t->level); + + err = accel_fs_tcp_create_groups(ft, type); + if (err) + goto err; + + err = accel_fs_tcp_add_default_rule(fs, type); + if (err) + goto err; + + return 0; +err: + mlx5e_destroy_flow_table(ft); + return err; +} + +static int accel_fs_tcp_disable(struct mlx5e_flow_steering *fs) +{ + struct mlx5_ttc_table *ttc = mlx5e_fs_get_ttc(fs, false); + int err, i; + + for (i = 0; i < ACCEL_FS_TCP_NUM_TYPES; i++) { + /* Modify ttc rules destination to point back to the indir TIRs */ + err = mlx5_ttc_fwd_default_dest(ttc, fs_accel2tt(i)); + if (err) { + fs_err(fs, + "%s: modify ttc[%d] default destination failed, err(%d)\n", + __func__, fs_accel2tt(i), err); + return err; + } + } + + return 0; +} + +static int accel_fs_tcp_enable(struct mlx5e_flow_steering *fs) +{ + struct mlx5e_accel_fs_tcp *accel_tcp = mlx5e_fs_get_accel_tcp(fs); + struct mlx5_ttc_table *ttc = mlx5e_fs_get_ttc(fs, false); + struct mlx5_flow_destination dest = {}; + int err, i; + + dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + for (i = 0; i < ACCEL_FS_TCP_NUM_TYPES; i++) { + dest.ft = accel_tcp->tables[i].t; + + /* Modify ttc rules destination to point on the accel_fs FTs */ + err = mlx5_ttc_fwd_dest(ttc, fs_accel2tt(i), &dest); + if (err) { + fs_err(fs, "%s: modify ttc[%d] destination to accel failed, err(%d)\n", + __func__, fs_accel2tt(i), err); + return err; + } + } + return 0; +} + +static void accel_fs_tcp_destroy_table(struct mlx5e_flow_steering *fs, int i) +{ + struct mlx5e_accel_fs_tcp *fs_tcp = mlx5e_fs_get_accel_tcp(fs); + + if (IS_ERR_OR_NULL(fs_tcp->tables[i].t)) + return; + + mlx5_del_flow_rules(fs_tcp->default_rules[i]); + mlx5e_destroy_flow_table(&fs_tcp->tables[i]); + fs_tcp->tables[i].t = NULL; +} + +void mlx5e_accel_fs_tcp_destroy(struct mlx5e_flow_steering *fs) +{ + struct mlx5e_accel_fs_tcp *accel_tcp = mlx5e_fs_get_accel_tcp(fs); + int i; + + if (!accel_tcp) + return; + + accel_fs_tcp_disable(fs); + + for (i = 0; i < ACCEL_FS_TCP_NUM_TYPES; i++) + accel_fs_tcp_destroy_table(fs, i); + + kvfree(accel_tcp); + mlx5e_fs_set_accel_tcp(fs, NULL); +} + +int mlx5e_accel_fs_tcp_create(struct mlx5e_flow_steering *fs) +{ + struct mlx5e_accel_fs_tcp *accel_tcp; + int i, err; + + if (!MLX5_CAP_FLOWTABLE_NIC_RX(mlx5e_fs_get_mdev(fs), ft_field_support.outer_ip_version)) + return -EOPNOTSUPP; + + accel_tcp = kvzalloc(sizeof(*accel_tcp), GFP_KERNEL); + if (!accel_tcp) + return -ENOMEM; + mlx5e_fs_set_accel_tcp(fs, accel_tcp); + + for (i = 0; i < ACCEL_FS_TCP_NUM_TYPES; i++) { + err = accel_fs_tcp_create_table(fs, i); + if (err) + goto err_destroy_tables; + } + + err = accel_fs_tcp_enable(fs); + if (err) + goto err_destroy_tables; + + return 0; + +err_destroy_tables: + while (--i >= 0) + accel_fs_tcp_destroy_table(fs, i); + kvfree(accel_tcp); + mlx5e_fs_set_accel_tcp(fs, NULL); + return err; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.h new file mode 100644 index 000000000..a032bff48 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2020, Mellanox Technologies inc. All rights reserved. */ + +#ifndef __MLX5E_ACCEL_FS_TCP_H__ +#define __MLX5E_ACCEL_FS_TCP_H__ + +#include "en/fs.h" + +#ifdef CONFIG_MLX5_EN_TLS +int mlx5e_accel_fs_tcp_create(struct mlx5e_flow_steering *fs); +void mlx5e_accel_fs_tcp_destroy(struct mlx5e_flow_steering *fs); +struct mlx5_flow_handle *mlx5e_accel_fs_add_sk(struct mlx5e_flow_steering *fs, + struct sock *sk, u32 tirn, + uint32_t flow_tag); +void mlx5e_accel_fs_del_sk(struct mlx5_flow_handle *rule); +#else +static inline int mlx5e_accel_fs_tcp_create(struct mlx5e_flow_steering *fs) { return 0; } +static inline void mlx5e_accel_fs_tcp_destroy(struct mlx5e_flow_steering *fs) {} +static inline struct mlx5_flow_handle *mlx5e_accel_fs_add_sk(struct mlx5e_flow_steering *fs, + struct sock *sk, u32 tirn, + uint32_t flow_tag) +{ return ERR_PTR(-EOPNOTSUPP); } +static inline void mlx5e_accel_fs_del_sk(struct mlx5_flow_handle *rule) {} +#endif + +#endif /* __MLX5E_ACCEL_FS_TCP_H__ */ + diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c new file mode 100644 index 000000000..a71560186 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c @@ -0,0 +1,469 @@ +/* + * Copyright (c) 2017 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#include <crypto/internal/geniv.h> +#include <crypto/aead.h> +#include <linux/inetdevice.h> +#include <linux/netdevice.h> + +#include "en.h" +#include "ipsec.h" +#include "ipsec_rxtx.h" + +static struct mlx5e_ipsec_sa_entry *to_ipsec_sa_entry(struct xfrm_state *x) +{ + return (struct mlx5e_ipsec_sa_entry *)x->xso.offload_handle; +} + +struct xfrm_state *mlx5e_ipsec_sadb_rx_lookup(struct mlx5e_ipsec *ipsec, + unsigned int handle) +{ + struct mlx5e_ipsec_sa_entry *sa_entry; + struct xfrm_state *ret = NULL; + + rcu_read_lock(); + hash_for_each_possible_rcu(ipsec->sadb_rx, sa_entry, hlist, handle) + if (sa_entry->handle == handle) { + ret = sa_entry->x; + xfrm_state_hold(ret); + break; + } + rcu_read_unlock(); + + return ret; +} + +static int mlx5e_ipsec_sadb_rx_add(struct mlx5e_ipsec_sa_entry *sa_entry) +{ + unsigned int handle = sa_entry->ipsec_obj_id; + struct mlx5e_ipsec *ipsec = sa_entry->ipsec; + struct mlx5e_ipsec_sa_entry *_sa_entry; + unsigned long flags; + + rcu_read_lock(); + hash_for_each_possible_rcu(ipsec->sadb_rx, _sa_entry, hlist, handle) + if (_sa_entry->handle == handle) { + rcu_read_unlock(); + return -EEXIST; + } + rcu_read_unlock(); + + spin_lock_irqsave(&ipsec->sadb_rx_lock, flags); + sa_entry->handle = handle; + hash_add_rcu(ipsec->sadb_rx, &sa_entry->hlist, sa_entry->handle); + spin_unlock_irqrestore(&ipsec->sadb_rx_lock, flags); + + return 0; +} + +static void mlx5e_ipsec_sadb_rx_del(struct mlx5e_ipsec_sa_entry *sa_entry) +{ + struct mlx5e_ipsec *ipsec = sa_entry->ipsec; + unsigned long flags; + + spin_lock_irqsave(&ipsec->sadb_rx_lock, flags); + hash_del_rcu(&sa_entry->hlist); + spin_unlock_irqrestore(&ipsec->sadb_rx_lock, flags); +} + +static bool mlx5e_ipsec_update_esn_state(struct mlx5e_ipsec_sa_entry *sa_entry) +{ + struct xfrm_replay_state_esn *replay_esn; + u32 seq_bottom = 0; + u8 overlap; + + if (!(sa_entry->x->props.flags & XFRM_STATE_ESN)) { + sa_entry->esn_state.trigger = 0; + return false; + } + + replay_esn = sa_entry->x->replay_esn; + if (replay_esn->seq >= replay_esn->replay_window) + seq_bottom = replay_esn->seq - replay_esn->replay_window + 1; + + overlap = sa_entry->esn_state.overlap; + + sa_entry->esn_state.esn = xfrm_replay_seqhi(sa_entry->x, + htonl(seq_bottom)); + + sa_entry->esn_state.trigger = 1; + if (unlikely(overlap && seq_bottom < MLX5E_IPSEC_ESN_SCOPE_MID)) { + sa_entry->esn_state.overlap = 0; + return true; + } else if (unlikely(!overlap && + (seq_bottom >= MLX5E_IPSEC_ESN_SCOPE_MID))) { + sa_entry->esn_state.overlap = 1; + return true; + } + + return false; +} + +static void +mlx5e_ipsec_build_accel_xfrm_attrs(struct mlx5e_ipsec_sa_entry *sa_entry, + struct mlx5_accel_esp_xfrm_attrs *attrs) +{ + struct xfrm_state *x = sa_entry->x; + struct aes_gcm_keymat *aes_gcm = &attrs->aes_gcm; + struct aead_geniv_ctx *geniv_ctx; + struct crypto_aead *aead; + unsigned int crypto_data_len, key_len; + int ivsize; + + memset(attrs, 0, sizeof(*attrs)); + + /* key */ + crypto_data_len = (x->aead->alg_key_len + 7) / 8; + key_len = crypto_data_len - 4; /* 4 bytes salt at end */ + + memcpy(aes_gcm->aes_key, x->aead->alg_key, key_len); + aes_gcm->key_len = key_len * 8; + + /* salt and seq_iv */ + aead = x->data; + geniv_ctx = crypto_aead_ctx(aead); + ivsize = crypto_aead_ivsize(aead); + memcpy(&aes_gcm->seq_iv, &geniv_ctx->salt, ivsize); + memcpy(&aes_gcm->salt, x->aead->alg_key + key_len, + sizeof(aes_gcm->salt)); + + /* iv len */ + aes_gcm->icv_len = x->aead->alg_icv_len; + + /* esn */ + if (sa_entry->esn_state.trigger) { + attrs->flags |= MLX5_ACCEL_ESP_FLAGS_ESN_TRIGGERED; + attrs->esn = sa_entry->esn_state.esn; + if (sa_entry->esn_state.overlap) + attrs->flags |= MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP; + } + + /* action */ + attrs->action = (x->xso.dir == XFRM_DEV_OFFLOAD_OUT) ? + MLX5_ACCEL_ESP_ACTION_ENCRYPT : + MLX5_ACCEL_ESP_ACTION_DECRYPT; + /* flags */ + attrs->flags |= (x->props.mode == XFRM_MODE_TRANSPORT) ? + MLX5_ACCEL_ESP_FLAGS_TRANSPORT : + MLX5_ACCEL_ESP_FLAGS_TUNNEL; + + /* spi */ + attrs->spi = be32_to_cpu(x->id.spi); + + /* source , destination ips */ + memcpy(&attrs->saddr, x->props.saddr.a6, sizeof(attrs->saddr)); + memcpy(&attrs->daddr, x->id.daddr.a6, sizeof(attrs->daddr)); + attrs->is_ipv6 = (x->props.family != AF_INET); +} + +static inline int mlx5e_xfrm_validate_state(struct xfrm_state *x) +{ + struct net_device *netdev = x->xso.real_dev; + struct mlx5e_priv *priv; + + priv = netdev_priv(netdev); + + if (x->props.aalgo != SADB_AALG_NONE) { + netdev_info(netdev, "Cannot offload authenticated xfrm states\n"); + return -EINVAL; + } + if (x->props.ealgo != SADB_X_EALG_AES_GCM_ICV16) { + netdev_info(netdev, "Only AES-GCM-ICV16 xfrm state may be offloaded\n"); + return -EINVAL; + } + if (x->props.calgo != SADB_X_CALG_NONE) { + netdev_info(netdev, "Cannot offload compressed xfrm states\n"); + return -EINVAL; + } + if (x->props.flags & XFRM_STATE_ESN && + !(mlx5_ipsec_device_caps(priv->mdev) & MLX5_IPSEC_CAP_ESN)) { + netdev_info(netdev, "Cannot offload ESN xfrm states\n"); + return -EINVAL; + } + if (x->props.family != AF_INET && + x->props.family != AF_INET6) { + netdev_info(netdev, "Only IPv4/6 xfrm states may be offloaded\n"); + return -EINVAL; + } + if (x->props.mode != XFRM_MODE_TRANSPORT && + x->props.mode != XFRM_MODE_TUNNEL) { + dev_info(&netdev->dev, "Only transport and tunnel xfrm states may be offloaded\n"); + return -EINVAL; + } + if (x->id.proto != IPPROTO_ESP) { + netdev_info(netdev, "Only ESP xfrm state may be offloaded\n"); + return -EINVAL; + } + if (x->encap) { + netdev_info(netdev, "Encapsulated xfrm state may not be offloaded\n"); + return -EINVAL; + } + if (!x->aead) { + netdev_info(netdev, "Cannot offload xfrm states without aead\n"); + return -EINVAL; + } + if (x->aead->alg_icv_len != 128) { + netdev_info(netdev, "Cannot offload xfrm states with AEAD ICV length other than 128bit\n"); + return -EINVAL; + } + if ((x->aead->alg_key_len != 128 + 32) && + (x->aead->alg_key_len != 256 + 32)) { + netdev_info(netdev, "Cannot offload xfrm states with AEAD key length other than 128/256 bit\n"); + return -EINVAL; + } + if (x->tfcpad) { + netdev_info(netdev, "Cannot offload xfrm states with tfc padding\n"); + return -EINVAL; + } + if (!x->geniv) { + netdev_info(netdev, "Cannot offload xfrm states without geniv\n"); + return -EINVAL; + } + if (strcmp(x->geniv, "seqiv")) { + netdev_info(netdev, "Cannot offload xfrm states with geniv other than seqiv\n"); + return -EINVAL; + } + return 0; +} + +static void _update_xfrm_state(struct work_struct *work) +{ + struct mlx5e_ipsec_modify_state_work *modify_work = + container_of(work, struct mlx5e_ipsec_modify_state_work, work); + struct mlx5e_ipsec_sa_entry *sa_entry = container_of( + modify_work, struct mlx5e_ipsec_sa_entry, modify_work); + + mlx5_accel_esp_modify_xfrm(sa_entry, &modify_work->attrs); +} + +static int mlx5e_xfrm_add_state(struct xfrm_state *x) +{ + struct mlx5e_ipsec_sa_entry *sa_entry = NULL; + struct net_device *netdev = x->xso.real_dev; + struct mlx5e_priv *priv; + int err; + + priv = netdev_priv(netdev); + if (!priv->ipsec) + return -EOPNOTSUPP; + + err = mlx5e_xfrm_validate_state(x); + if (err) + return err; + + sa_entry = kzalloc(sizeof(*sa_entry), GFP_KERNEL); + if (!sa_entry) { + err = -ENOMEM; + goto out; + } + + sa_entry->x = x; + sa_entry->ipsec = priv->ipsec; + + /* check esn */ + mlx5e_ipsec_update_esn_state(sa_entry); + + mlx5e_ipsec_build_accel_xfrm_attrs(sa_entry, &sa_entry->attrs); + /* create hw context */ + err = mlx5_ipsec_create_sa_ctx(sa_entry); + if (err) + goto err_xfrm; + + err = mlx5e_accel_ipsec_fs_add_rule(priv, sa_entry); + if (err) + goto err_hw_ctx; + + if (x->xso.dir == XFRM_DEV_OFFLOAD_IN) { + err = mlx5e_ipsec_sadb_rx_add(sa_entry); + if (err) + goto err_add_rule; + } else { + sa_entry->set_iv_op = (x->props.flags & XFRM_STATE_ESN) ? + mlx5e_ipsec_set_iv_esn : mlx5e_ipsec_set_iv; + } + + INIT_WORK(&sa_entry->modify_work.work, _update_xfrm_state); + x->xso.offload_handle = (unsigned long)sa_entry; + goto out; + +err_add_rule: + mlx5e_accel_ipsec_fs_del_rule(priv, sa_entry); +err_hw_ctx: + mlx5_ipsec_free_sa_ctx(sa_entry); +err_xfrm: + kfree(sa_entry); +out: + return err; +} + +static void mlx5e_xfrm_del_state(struct xfrm_state *x) +{ + struct mlx5e_ipsec_sa_entry *sa_entry = to_ipsec_sa_entry(x); + + if (x->xso.dir == XFRM_DEV_OFFLOAD_IN) + mlx5e_ipsec_sadb_rx_del(sa_entry); +} + +static void mlx5e_xfrm_free_state(struct xfrm_state *x) +{ + struct mlx5e_ipsec_sa_entry *sa_entry = to_ipsec_sa_entry(x); + struct mlx5e_priv *priv = netdev_priv(x->xso.dev); + + cancel_work_sync(&sa_entry->modify_work.work); + mlx5e_accel_ipsec_fs_del_rule(priv, sa_entry); + mlx5_ipsec_free_sa_ctx(sa_entry); + kfree(sa_entry); +} + +int mlx5e_ipsec_init(struct mlx5e_priv *priv) +{ + struct mlx5e_ipsec *ipsec; + int ret; + + if (!mlx5_ipsec_device_caps(priv->mdev)) { + netdev_dbg(priv->netdev, "Not an IPSec offload device\n"); + return 0; + } + + ipsec = kzalloc(sizeof(*ipsec), GFP_KERNEL); + if (!ipsec) + return -ENOMEM; + + hash_init(ipsec->sadb_rx); + spin_lock_init(&ipsec->sadb_rx_lock); + ipsec->mdev = priv->mdev; + ipsec->wq = alloc_ordered_workqueue("mlx5e_ipsec: %s", 0, + priv->netdev->name); + if (!ipsec->wq) { + ret = -ENOMEM; + goto err_wq; + } + + ret = mlx5e_accel_ipsec_fs_init(ipsec); + if (ret) + goto err_fs_init; + + priv->ipsec = ipsec; + netdev_dbg(priv->netdev, "IPSec attached to netdevice\n"); + return 0; + +err_fs_init: + destroy_workqueue(ipsec->wq); +err_wq: + kfree(ipsec); + return (ret != -EOPNOTSUPP) ? ret : 0; +} + +void mlx5e_ipsec_cleanup(struct mlx5e_priv *priv) +{ + struct mlx5e_ipsec *ipsec = priv->ipsec; + + if (!ipsec) + return; + + mlx5e_accel_ipsec_fs_cleanup(ipsec); + destroy_workqueue(ipsec->wq); + kfree(ipsec); + priv->ipsec = NULL; +} + +static bool mlx5e_ipsec_offload_ok(struct sk_buff *skb, struct xfrm_state *x) +{ + if (x->props.family == AF_INET) { + /* Offload with IPv4 options is not supported yet */ + if (ip_hdr(skb)->ihl > 5) + return false; + } else { + /* Offload with IPv6 extension headers is not support yet */ + if (ipv6_ext_hdr(ipv6_hdr(skb)->nexthdr)) + return false; + } + + return true; +} + +static void mlx5e_xfrm_advance_esn_state(struct xfrm_state *x) +{ + struct mlx5e_ipsec_sa_entry *sa_entry = to_ipsec_sa_entry(x); + struct mlx5e_ipsec_modify_state_work *modify_work = + &sa_entry->modify_work; + bool need_update; + + need_update = mlx5e_ipsec_update_esn_state(sa_entry); + if (!need_update) + return; + + mlx5e_ipsec_build_accel_xfrm_attrs(sa_entry, &modify_work->attrs); + queue_work(sa_entry->ipsec->wq, &modify_work->work); +} + +static const struct xfrmdev_ops mlx5e_ipsec_xfrmdev_ops = { + .xdo_dev_state_add = mlx5e_xfrm_add_state, + .xdo_dev_state_delete = mlx5e_xfrm_del_state, + .xdo_dev_state_free = mlx5e_xfrm_free_state, + .xdo_dev_offload_ok = mlx5e_ipsec_offload_ok, + .xdo_dev_state_advance_esn = mlx5e_xfrm_advance_esn_state, +}; + +void mlx5e_ipsec_build_netdev(struct mlx5e_priv *priv) +{ + struct mlx5_core_dev *mdev = priv->mdev; + struct net_device *netdev = priv->netdev; + + if (!mlx5_ipsec_device_caps(mdev)) + return; + + mlx5_core_info(mdev, "mlx5e: IPSec ESP acceleration enabled\n"); + netdev->xfrmdev_ops = &mlx5e_ipsec_xfrmdev_ops; + netdev->features |= NETIF_F_HW_ESP; + netdev->hw_enc_features |= NETIF_F_HW_ESP; + + if (!MLX5_CAP_ETH(mdev, swp_csum)) { + mlx5_core_dbg(mdev, "mlx5e: SWP checksum not supported\n"); + return; + } + + netdev->features |= NETIF_F_HW_ESP_TX_CSUM; + netdev->hw_enc_features |= NETIF_F_HW_ESP_TX_CSUM; + + if (!MLX5_CAP_ETH(mdev, swp_lso)) { + mlx5_core_dbg(mdev, "mlx5e: ESP LSO not supported\n"); + return; + } + + netdev->gso_partial_features |= NETIF_F_GSO_ESP; + mlx5_core_dbg(mdev, "mlx5e: ESP GSO capability turned on\n"); + netdev->features |= NETIF_F_GSO_ESP; + netdev->hw_features |= NETIF_F_GSO_ESP; + netdev->hw_enc_features |= NETIF_F_GSO_ESP; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h new file mode 100644 index 000000000..785f18814 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h @@ -0,0 +1,196 @@ +/* + * Copyright (c) 2017 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#ifndef __MLX5E_IPSEC_H__ +#define __MLX5E_IPSEC_H__ + + +#include <linux/mlx5/device.h> +#include <net/xfrm.h> +#include <linux/idr.h> + +#define MLX5E_IPSEC_SADB_RX_BITS 10 +#define MLX5E_IPSEC_ESN_SCOPE_MID 0x80000000L + +enum mlx5_accel_esp_flags { + MLX5_ACCEL_ESP_FLAGS_TUNNEL = 0, /* Default */ + MLX5_ACCEL_ESP_FLAGS_TRANSPORT = 1UL << 0, + MLX5_ACCEL_ESP_FLAGS_ESN_TRIGGERED = 1UL << 1, + MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP = 1UL << 2, +}; + +enum mlx5_accel_esp_action { + MLX5_ACCEL_ESP_ACTION_DECRYPT, + MLX5_ACCEL_ESP_ACTION_ENCRYPT, +}; + +struct aes_gcm_keymat { + u64 seq_iv; + + u32 salt; + u32 icv_len; + + u32 key_len; + u32 aes_key[256 / 32]; +}; + +struct mlx5_accel_esp_xfrm_attrs { + enum mlx5_accel_esp_action action; + u32 esn; + u32 spi; + u32 flags; + struct aes_gcm_keymat aes_gcm; + + union { + __be32 a4; + __be32 a6[4]; + } saddr; + + union { + __be32 a4; + __be32 a6[4]; + } daddr; + + u8 is_ipv6; +}; + +enum mlx5_ipsec_cap { + MLX5_IPSEC_CAP_CRYPTO = 1 << 0, + MLX5_IPSEC_CAP_ESN = 1 << 1, +}; + +struct mlx5e_priv; + +struct mlx5e_ipsec_sw_stats { + atomic64_t ipsec_rx_drop_sp_alloc; + atomic64_t ipsec_rx_drop_sadb_miss; + atomic64_t ipsec_rx_drop_syndrome; + atomic64_t ipsec_tx_drop_bundle; + atomic64_t ipsec_tx_drop_no_state; + atomic64_t ipsec_tx_drop_not_ip; + atomic64_t ipsec_tx_drop_trailer; +}; + +struct mlx5e_accel_fs_esp; +struct mlx5e_ipsec_tx; + +struct mlx5e_ipsec { + struct mlx5_core_dev *mdev; + DECLARE_HASHTABLE(sadb_rx, MLX5E_IPSEC_SADB_RX_BITS); + spinlock_t sadb_rx_lock; /* Protects sadb_rx */ + struct mlx5e_ipsec_sw_stats sw_stats; + struct workqueue_struct *wq; + struct mlx5e_accel_fs_esp *rx_fs; + struct mlx5e_ipsec_tx *tx_fs; +}; + +struct mlx5e_ipsec_esn_state { + u32 esn; + u8 trigger: 1; + u8 overlap: 1; +}; + +struct mlx5e_ipsec_rule { + struct mlx5_flow_handle *rule; + struct mlx5_modify_hdr *set_modify_hdr; +}; + +struct mlx5e_ipsec_modify_state_work { + struct work_struct work; + struct mlx5_accel_esp_xfrm_attrs attrs; +}; + +struct mlx5e_ipsec_sa_entry { + struct hlist_node hlist; /* Item in SADB_RX hashtable */ + struct mlx5e_ipsec_esn_state esn_state; + unsigned int handle; /* Handle in SADB_RX */ + struct xfrm_state *x; + struct mlx5e_ipsec *ipsec; + struct mlx5_accel_esp_xfrm_attrs attrs; + void (*set_iv_op)(struct sk_buff *skb, struct xfrm_state *x, + struct xfrm_offload *xo); + u32 ipsec_obj_id; + u32 enc_key_id; + struct mlx5e_ipsec_rule ipsec_rule; + struct mlx5e_ipsec_modify_state_work modify_work; +}; + +#ifdef CONFIG_MLX5_EN_IPSEC +int mlx5e_ipsec_init(struct mlx5e_priv *priv); +void mlx5e_ipsec_cleanup(struct mlx5e_priv *priv); +void mlx5e_ipsec_build_netdev(struct mlx5e_priv *priv); + +struct xfrm_state *mlx5e_ipsec_sadb_rx_lookup(struct mlx5e_ipsec *dev, + unsigned int handle); + +void mlx5e_accel_ipsec_fs_cleanup(struct mlx5e_ipsec *ipsec); +int mlx5e_accel_ipsec_fs_init(struct mlx5e_ipsec *ipsec); +int mlx5e_accel_ipsec_fs_add_rule(struct mlx5e_priv *priv, + struct mlx5e_ipsec_sa_entry *sa_entry); +void mlx5e_accel_ipsec_fs_del_rule(struct mlx5e_priv *priv, + struct mlx5e_ipsec_sa_entry *sa_entry); + +int mlx5_ipsec_create_sa_ctx(struct mlx5e_ipsec_sa_entry *sa_entry); +void mlx5_ipsec_free_sa_ctx(struct mlx5e_ipsec_sa_entry *sa_entry); + +u32 mlx5_ipsec_device_caps(struct mlx5_core_dev *mdev); + +void mlx5_accel_esp_modify_xfrm(struct mlx5e_ipsec_sa_entry *sa_entry, + const struct mlx5_accel_esp_xfrm_attrs *attrs); + +static inline struct mlx5_core_dev * +mlx5e_ipsec_sa2dev(struct mlx5e_ipsec_sa_entry *sa_entry) +{ + return sa_entry->ipsec->mdev; +} +#else +static inline int mlx5e_ipsec_init(struct mlx5e_priv *priv) +{ + return 0; +} + +static inline void mlx5e_ipsec_cleanup(struct mlx5e_priv *priv) +{ +} + +static inline void mlx5e_ipsec_build_netdev(struct mlx5e_priv *priv) +{ +} + +static inline u32 mlx5_ipsec_device_caps(struct mlx5_core_dev *mdev) +{ + return 0; +} +#endif + +#endif /* __MLX5E_IPSEC_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c new file mode 100644 index 000000000..b859e4a4c --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c @@ -0,0 +1,606 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2020, Mellanox Technologies inc. All rights reserved. */ + +#include <linux/netdevice.h> +#include "en.h" +#include "en/fs.h" +#include "ipsec.h" +#include "fs_core.h" + +#define NUM_IPSEC_FTE BIT(15) + +enum accel_fs_esp_type { + ACCEL_FS_ESP4, + ACCEL_FS_ESP6, + ACCEL_FS_ESP_NUM_TYPES, +}; + +struct mlx5e_ipsec_rx_err { + struct mlx5_flow_table *ft; + struct mlx5_flow_handle *rule; + struct mlx5_modify_hdr *copy_modify_hdr; +}; + +struct mlx5e_accel_fs_esp_prot { + struct mlx5_flow_table *ft; + struct mlx5_flow_group *miss_group; + struct mlx5_flow_handle *miss_rule; + struct mlx5_flow_destination default_dest; + struct mlx5e_ipsec_rx_err rx_err; + u32 refcnt; + struct mutex prot_mutex; /* protect ESP4/ESP6 protocol */ +}; + +struct mlx5e_accel_fs_esp { + struct mlx5e_accel_fs_esp_prot fs_prot[ACCEL_FS_ESP_NUM_TYPES]; +}; + +struct mlx5e_ipsec_tx { + struct mlx5_flow_namespace *ns; + struct mlx5_flow_table *ft; + struct mutex mutex; /* Protect IPsec TX steering */ + u32 refcnt; +}; + +/* IPsec RX flow steering */ +static enum mlx5_traffic_types fs_esp2tt(enum accel_fs_esp_type i) +{ + if (i == ACCEL_FS_ESP4) + return MLX5_TT_IPV4_IPSEC_ESP; + return MLX5_TT_IPV6_IPSEC_ESP; +} + +static int rx_err_add_rule(struct mlx5e_priv *priv, + struct mlx5e_accel_fs_esp_prot *fs_prot, + struct mlx5e_ipsec_rx_err *rx_err) +{ + u8 action[MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto)] = {}; + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5_flow_act flow_act = {}; + struct mlx5_modify_hdr *modify_hdr; + struct mlx5_flow_handle *fte; + struct mlx5_flow_spec *spec; + int err; + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) + return -ENOMEM; + + /* Action to copy 7 bit ipsec_syndrome to regB[24:30] */ + MLX5_SET(copy_action_in, action, action_type, MLX5_ACTION_TYPE_COPY); + MLX5_SET(copy_action_in, action, src_field, MLX5_ACTION_IN_FIELD_IPSEC_SYNDROME); + MLX5_SET(copy_action_in, action, src_offset, 0); + MLX5_SET(copy_action_in, action, length, 7); + MLX5_SET(copy_action_in, action, dst_field, MLX5_ACTION_IN_FIELD_METADATA_REG_B); + MLX5_SET(copy_action_in, action, dst_offset, 24); + + modify_hdr = mlx5_modify_header_alloc(mdev, MLX5_FLOW_NAMESPACE_KERNEL, + 1, action); + + if (IS_ERR(modify_hdr)) { + err = PTR_ERR(modify_hdr); + netdev_err(priv->netdev, + "fail to alloc ipsec copy modify_header_id err=%d\n", err); + goto out_spec; + } + + /* create fte */ + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_MOD_HDR | + MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + flow_act.modify_hdr = modify_hdr; + fte = mlx5_add_flow_rules(rx_err->ft, spec, &flow_act, + &fs_prot->default_dest, 1); + if (IS_ERR(fte)) { + err = PTR_ERR(fte); + netdev_err(priv->netdev, "fail to add ipsec rx err copy rule err=%d\n", err); + goto out; + } + + kvfree(spec); + rx_err->rule = fte; + rx_err->copy_modify_hdr = modify_hdr; + return 0; + +out: + mlx5_modify_header_dealloc(mdev, modify_hdr); +out_spec: + kvfree(spec); + return err; +} + +static int rx_fs_create(struct mlx5e_priv *priv, + struct mlx5e_accel_fs_esp_prot *fs_prot) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5_flow_table *ft = fs_prot->ft; + struct mlx5_flow_group *miss_group; + struct mlx5_flow_handle *miss_rule; + MLX5_DECLARE_FLOW_ACT(flow_act); + struct mlx5_flow_spec *spec; + u32 *flow_group_in; + int err = 0; + + flow_group_in = kvzalloc(inlen, GFP_KERNEL); + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!flow_group_in || !spec) { + err = -ENOMEM; + goto out; + } + + /* Create miss_group */ + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, ft->max_fte - 1); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, ft->max_fte - 1); + miss_group = mlx5_create_flow_group(ft, flow_group_in); + if (IS_ERR(miss_group)) { + err = PTR_ERR(miss_group); + netdev_err(priv->netdev, "fail to create ipsec rx miss_group err=%d\n", err); + goto out; + } + fs_prot->miss_group = miss_group; + + /* Create miss rule */ + miss_rule = mlx5_add_flow_rules(ft, spec, &flow_act, &fs_prot->default_dest, 1); + if (IS_ERR(miss_rule)) { + mlx5_destroy_flow_group(fs_prot->miss_group); + err = PTR_ERR(miss_rule); + netdev_err(priv->netdev, "fail to create ipsec rx miss_rule err=%d\n", err); + goto out; + } + fs_prot->miss_rule = miss_rule; +out: + kvfree(flow_group_in); + kvfree(spec); + return err; +} + +static void rx_destroy(struct mlx5e_priv *priv, enum accel_fs_esp_type type) +{ + struct mlx5e_accel_fs_esp_prot *fs_prot; + struct mlx5e_accel_fs_esp *accel_esp; + + accel_esp = priv->ipsec->rx_fs; + + /* The netdev unreg already happened, so all offloaded rule are already removed */ + fs_prot = &accel_esp->fs_prot[type]; + + mlx5_del_flow_rules(fs_prot->miss_rule); + mlx5_destroy_flow_group(fs_prot->miss_group); + mlx5_destroy_flow_table(fs_prot->ft); + + mlx5_del_flow_rules(fs_prot->rx_err.rule); + mlx5_modify_header_dealloc(priv->mdev, fs_prot->rx_err.copy_modify_hdr); + mlx5_destroy_flow_table(fs_prot->rx_err.ft); +} + +static int rx_create(struct mlx5e_priv *priv, enum accel_fs_esp_type type) +{ + struct mlx5_flow_namespace *ns = mlx5e_fs_get_ns(priv->fs, false); + struct mlx5_ttc_table *ttc = mlx5e_fs_get_ttc(priv->fs, false); + struct mlx5_flow_table_attr ft_attr = {}; + struct mlx5e_accel_fs_esp_prot *fs_prot; + struct mlx5e_accel_fs_esp *accel_esp; + struct mlx5_flow_table *ft; + int err; + + accel_esp = priv->ipsec->rx_fs; + fs_prot = &accel_esp->fs_prot[type]; + fs_prot->default_dest = + mlx5_ttc_get_default_dest(ttc, fs_esp2tt(type)); + + ft_attr.max_fte = 1; + ft_attr.autogroup.max_num_groups = 1; + ft_attr.level = MLX5E_ACCEL_FS_ESP_FT_ERR_LEVEL; + ft_attr.prio = MLX5E_NIC_PRIO; + ft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr); + if (IS_ERR(ft)) + return PTR_ERR(ft); + + fs_prot->rx_err.ft = ft; + err = rx_err_add_rule(priv, fs_prot, &fs_prot->rx_err); + if (err) + goto err_add; + + /* Create FT */ + ft_attr.max_fte = NUM_IPSEC_FTE; + ft_attr.level = MLX5E_ACCEL_FS_ESP_FT_LEVEL; + ft_attr.prio = MLX5E_NIC_PRIO; + ft_attr.autogroup.num_reserved_entries = 1; + ft_attr.autogroup.max_num_groups = 1; + ft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr); + if (IS_ERR(ft)) { + err = PTR_ERR(ft); + goto err_fs_ft; + } + fs_prot->ft = ft; + + err = rx_fs_create(priv, fs_prot); + if (err) + goto err_fs; + + return 0; + +err_fs: + mlx5_destroy_flow_table(fs_prot->ft); +err_fs_ft: + mlx5_del_flow_rules(fs_prot->rx_err.rule); + mlx5_modify_header_dealloc(priv->mdev, fs_prot->rx_err.copy_modify_hdr); +err_add: + mlx5_destroy_flow_table(fs_prot->rx_err.ft); + return err; +} + +static int rx_ft_get(struct mlx5e_priv *priv, enum accel_fs_esp_type type) +{ + struct mlx5_ttc_table *ttc = mlx5e_fs_get_ttc(priv->fs, false); + struct mlx5e_accel_fs_esp_prot *fs_prot; + struct mlx5_flow_destination dest = {}; + struct mlx5e_accel_fs_esp *accel_esp; + int err = 0; + + accel_esp = priv->ipsec->rx_fs; + fs_prot = &accel_esp->fs_prot[type]; + mutex_lock(&fs_prot->prot_mutex); + if (fs_prot->refcnt) + goto skip; + + /* create FT */ + err = rx_create(priv, type); + if (err) + goto out; + + /* connect */ + dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dest.ft = fs_prot->ft; + mlx5_ttc_fwd_dest(ttc, fs_esp2tt(type), &dest); + +skip: + fs_prot->refcnt++; +out: + mutex_unlock(&fs_prot->prot_mutex); + return err; +} + +static void rx_ft_put(struct mlx5e_priv *priv, enum accel_fs_esp_type type) +{ + struct mlx5_ttc_table *ttc = mlx5e_fs_get_ttc(priv->fs, false); + struct mlx5e_accel_fs_esp_prot *fs_prot; + struct mlx5e_accel_fs_esp *accel_esp; + + accel_esp = priv->ipsec->rx_fs; + fs_prot = &accel_esp->fs_prot[type]; + mutex_lock(&fs_prot->prot_mutex); + fs_prot->refcnt--; + if (fs_prot->refcnt) + goto out; + + /* disconnect */ + mlx5_ttc_fwd_default_dest(ttc, fs_esp2tt(type)); + + /* remove FT */ + rx_destroy(priv, type); + +out: + mutex_unlock(&fs_prot->prot_mutex); +} + +/* IPsec TX flow steering */ +static int tx_create(struct mlx5e_priv *priv) +{ + struct mlx5_flow_table_attr ft_attr = {}; + struct mlx5e_ipsec *ipsec = priv->ipsec; + struct mlx5_flow_table *ft; + int err; + + ft_attr.max_fte = NUM_IPSEC_FTE; + ft_attr.autogroup.max_num_groups = 1; + ft = mlx5_create_auto_grouped_flow_table(ipsec->tx_fs->ns, &ft_attr); + if (IS_ERR(ft)) { + err = PTR_ERR(ft); + netdev_err(priv->netdev, "fail to create ipsec tx ft err=%d\n", err); + return err; + } + ipsec->tx_fs->ft = ft; + return 0; +} + +static int tx_ft_get(struct mlx5e_priv *priv) +{ + struct mlx5e_ipsec_tx *tx_fs = priv->ipsec->tx_fs; + int err = 0; + + mutex_lock(&tx_fs->mutex); + if (tx_fs->refcnt) + goto skip; + + err = tx_create(priv); + if (err) + goto out; +skip: + tx_fs->refcnt++; +out: + mutex_unlock(&tx_fs->mutex); + return err; +} + +static void tx_ft_put(struct mlx5e_priv *priv) +{ + struct mlx5e_ipsec_tx *tx_fs = priv->ipsec->tx_fs; + + mutex_lock(&tx_fs->mutex); + tx_fs->refcnt--; + if (tx_fs->refcnt) + goto out; + + mlx5_destroy_flow_table(tx_fs->ft); +out: + mutex_unlock(&tx_fs->mutex); +} + +static void setup_fte_common(struct mlx5_accel_esp_xfrm_attrs *attrs, + u32 ipsec_obj_id, + struct mlx5_flow_spec *spec, + struct mlx5_flow_act *flow_act) +{ + u8 ip_version = attrs->is_ipv6 ? 6 : 4; + + spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS | MLX5_MATCH_MISC_PARAMETERS; + + /* ip_version */ + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ip_version); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_version, ip_version); + + /* Non fragmented */ + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.frag); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.frag, 0); + + /* ESP header */ + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ip_protocol); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_protocol, IPPROTO_ESP); + + /* SPI number */ + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, misc_parameters.outer_esp_spi); + MLX5_SET(fte_match_param, spec->match_value, + misc_parameters.outer_esp_spi, attrs->spi); + + if (ip_version == 4) { + memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_value, + outer_headers.src_ipv4_src_ipv6.ipv4_layout.ipv4), + &attrs->saddr.a4, 4); + memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_value, + outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4), + &attrs->daddr.a4, 4); + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, + outer_headers.src_ipv4_src_ipv6.ipv4_layout.ipv4); + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, + outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4); + } else { + memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_value, + outer_headers.src_ipv4_src_ipv6.ipv6_layout.ipv6), + &attrs->saddr.a6, 16); + memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_value, + outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6), + &attrs->daddr.a6, 16); + memset(MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + outer_headers.src_ipv4_src_ipv6.ipv6_layout.ipv6), + 0xff, 16); + memset(MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6), + 0xff, 16); + } + + flow_act->crypto.type = MLX5_FLOW_CONTEXT_ENCRYPT_DECRYPT_TYPE_IPSEC; + flow_act->crypto.obj_id = ipsec_obj_id; + flow_act->flags |= FLOW_ACT_NO_APPEND; +} + +static int rx_add_rule(struct mlx5e_priv *priv, + struct mlx5e_ipsec_sa_entry *sa_entry) +{ + u8 action[MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto)] = {}; + struct mlx5e_ipsec_rule *ipsec_rule = &sa_entry->ipsec_rule; + struct mlx5_accel_esp_xfrm_attrs *attrs = &sa_entry->attrs; + u32 ipsec_obj_id = sa_entry->ipsec_obj_id; + struct mlx5_modify_hdr *modify_hdr = NULL; + struct mlx5e_accel_fs_esp_prot *fs_prot; + struct mlx5_flow_destination dest = {}; + struct mlx5e_accel_fs_esp *accel_esp; + struct mlx5_flow_act flow_act = {}; + struct mlx5_flow_handle *rule; + enum accel_fs_esp_type type; + struct mlx5_flow_spec *spec; + int err = 0; + + accel_esp = priv->ipsec->rx_fs; + type = attrs->is_ipv6 ? ACCEL_FS_ESP6 : ACCEL_FS_ESP4; + fs_prot = &accel_esp->fs_prot[type]; + + err = rx_ft_get(priv, type); + if (err) + return err; + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) { + err = -ENOMEM; + goto out_err; + } + + setup_fte_common(attrs, ipsec_obj_id, spec, &flow_act); + + /* Set bit[31] ipsec marker */ + /* Set bit[23-0] ipsec_obj_id */ + MLX5_SET(set_action_in, action, action_type, MLX5_ACTION_TYPE_SET); + MLX5_SET(set_action_in, action, field, MLX5_ACTION_IN_FIELD_METADATA_REG_B); + MLX5_SET(set_action_in, action, data, (ipsec_obj_id | BIT(31))); + MLX5_SET(set_action_in, action, offset, 0); + MLX5_SET(set_action_in, action, length, 32); + + modify_hdr = mlx5_modify_header_alloc(priv->mdev, MLX5_FLOW_NAMESPACE_KERNEL, + 1, action); + if (IS_ERR(modify_hdr)) { + err = PTR_ERR(modify_hdr); + netdev_err(priv->netdev, + "fail to alloc ipsec set modify_header_id err=%d\n", err); + modify_hdr = NULL; + goto out_err; + } + + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | + MLX5_FLOW_CONTEXT_ACTION_CRYPTO_DECRYPT | + MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; + dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + flow_act.modify_hdr = modify_hdr; + dest.ft = fs_prot->rx_err.ft; + rule = mlx5_add_flow_rules(fs_prot->ft, spec, &flow_act, &dest, 1); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + netdev_err(priv->netdev, "fail to add ipsec rule attrs->action=0x%x, err=%d\n", + attrs->action, err); + goto out_err; + } + + ipsec_rule->rule = rule; + ipsec_rule->set_modify_hdr = modify_hdr; + goto out; + +out_err: + if (modify_hdr) + mlx5_modify_header_dealloc(priv->mdev, modify_hdr); + rx_ft_put(priv, type); + +out: + kvfree(spec); + return err; +} + +static int tx_add_rule(struct mlx5e_priv *priv, + struct mlx5e_ipsec_sa_entry *sa_entry) +{ + struct mlx5_flow_act flow_act = {}; + struct mlx5_flow_handle *rule; + struct mlx5_flow_spec *spec; + int err = 0; + + err = tx_ft_get(priv); + if (err) + return err; + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) { + err = -ENOMEM; + goto out; + } + + setup_fte_common(&sa_entry->attrs, sa_entry->ipsec_obj_id, spec, + &flow_act); + + /* Add IPsec indicator in metadata_reg_a */ + spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_2; + MLX5_SET(fte_match_param, spec->match_criteria, misc_parameters_2.metadata_reg_a, + MLX5_ETH_WQE_FT_META_IPSEC); + MLX5_SET(fte_match_param, spec->match_value, misc_parameters_2.metadata_reg_a, + MLX5_ETH_WQE_FT_META_IPSEC); + + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_ALLOW | + MLX5_FLOW_CONTEXT_ACTION_CRYPTO_ENCRYPT; + rule = mlx5_add_flow_rules(priv->ipsec->tx_fs->ft, spec, &flow_act, NULL, 0); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + netdev_err(priv->netdev, "fail to add ipsec rule attrs->action=0x%x, err=%d\n", + sa_entry->attrs.action, err); + goto out; + } + + sa_entry->ipsec_rule.rule = rule; + +out: + kvfree(spec); + if (err) + tx_ft_put(priv); + return err; +} + +int mlx5e_accel_ipsec_fs_add_rule(struct mlx5e_priv *priv, + struct mlx5e_ipsec_sa_entry *sa_entry) +{ + if (sa_entry->attrs.action == MLX5_ACCEL_ESP_ACTION_ENCRYPT) + return tx_add_rule(priv, sa_entry); + + return rx_add_rule(priv, sa_entry); +} + +void mlx5e_accel_ipsec_fs_del_rule(struct mlx5e_priv *priv, + struct mlx5e_ipsec_sa_entry *sa_entry) +{ + struct mlx5e_ipsec_rule *ipsec_rule = &sa_entry->ipsec_rule; + struct mlx5_core_dev *mdev = mlx5e_ipsec_sa2dev(sa_entry); + + mlx5_del_flow_rules(ipsec_rule->rule); + + if (sa_entry->attrs.action == MLX5_ACCEL_ESP_ACTION_ENCRYPT) { + tx_ft_put(priv); + return; + } + + mlx5_modify_header_dealloc(mdev, ipsec_rule->set_modify_hdr); + rx_ft_put(priv, + sa_entry->attrs.is_ipv6 ? ACCEL_FS_ESP6 : ACCEL_FS_ESP4); +} + +void mlx5e_accel_ipsec_fs_cleanup(struct mlx5e_ipsec *ipsec) +{ + struct mlx5e_accel_fs_esp_prot *fs_prot; + struct mlx5e_accel_fs_esp *accel_esp; + enum accel_fs_esp_type i; + + if (!ipsec->rx_fs) + return; + + mutex_destroy(&ipsec->tx_fs->mutex); + WARN_ON(ipsec->tx_fs->refcnt); + kfree(ipsec->tx_fs); + + accel_esp = ipsec->rx_fs; + for (i = 0; i < ACCEL_FS_ESP_NUM_TYPES; i++) { + fs_prot = &accel_esp->fs_prot[i]; + mutex_destroy(&fs_prot->prot_mutex); + WARN_ON(fs_prot->refcnt); + } + kfree(ipsec->rx_fs); +} + +int mlx5e_accel_ipsec_fs_init(struct mlx5e_ipsec *ipsec) +{ + struct mlx5e_accel_fs_esp_prot *fs_prot; + struct mlx5e_accel_fs_esp *accel_esp; + struct mlx5_flow_namespace *ns; + enum accel_fs_esp_type i; + int err = -ENOMEM; + + ns = mlx5_get_flow_namespace(ipsec->mdev, + MLX5_FLOW_NAMESPACE_EGRESS_IPSEC); + if (!ns) + return -EOPNOTSUPP; + + ipsec->tx_fs = kzalloc(sizeof(*ipsec->tx_fs), GFP_KERNEL); + if (!ipsec->tx_fs) + return -ENOMEM; + + ipsec->rx_fs = kzalloc(sizeof(*ipsec->rx_fs), GFP_KERNEL); + if (!ipsec->rx_fs) + goto err_rx; + + mutex_init(&ipsec->tx_fs->mutex); + ipsec->tx_fs->ns = ns; + + accel_esp = ipsec->rx_fs; + for (i = 0; i < ACCEL_FS_ESP_NUM_TYPES; i++) { + fs_prot = &accel_esp->fs_prot[i]; + mutex_init(&fs_prot->prot_mutex); + } + + return 0; + +err_rx: + kfree(ipsec->tx_fs); + return err; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c new file mode 100644 index 000000000..792724ce7 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c @@ -0,0 +1,205 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2017, Mellanox Technologies inc. All rights reserved. */ + +#include "mlx5_core.h" +#include "ipsec.h" +#include "lib/mlx5.h" + +u32 mlx5_ipsec_device_caps(struct mlx5_core_dev *mdev) +{ + u32 caps = 0; + + if (!MLX5_CAP_GEN(mdev, ipsec_offload)) + return 0; + + if (!MLX5_CAP_GEN(mdev, log_max_dek)) + return 0; + + if (!(MLX5_CAP_GEN_64(mdev, general_obj_types) & + MLX5_HCA_CAP_GENERAL_OBJECT_TYPES_IPSEC)) + return 0; + + if (!MLX5_CAP_FLOWTABLE_NIC_TX(mdev, ipsec_encrypt) || + !MLX5_CAP_FLOWTABLE_NIC_RX(mdev, ipsec_decrypt)) + return 0; + + if (!MLX5_CAP_IPSEC(mdev, ipsec_crypto_esp_aes_gcm_128_encrypt) || + !MLX5_CAP_IPSEC(mdev, ipsec_crypto_esp_aes_gcm_128_decrypt)) + return 0; + + if (MLX5_CAP_IPSEC(mdev, ipsec_crypto_offload) && + MLX5_CAP_ETH(mdev, insert_trailer) && MLX5_CAP_ETH(mdev, swp)) + caps |= MLX5_IPSEC_CAP_CRYPTO; + + if (!caps) + return 0; + + if (MLX5_CAP_IPSEC(mdev, ipsec_esn)) + caps |= MLX5_IPSEC_CAP_ESN; + + /* We can accommodate up to 2^24 different IPsec objects + * because we use up to 24 bit in flow table metadata + * to hold the IPsec Object unique handle. + */ + WARN_ON_ONCE(MLX5_CAP_IPSEC(mdev, log_max_ipsec_offload) > 24); + return caps; +} +EXPORT_SYMBOL_GPL(mlx5_ipsec_device_caps); + +static int mlx5_create_ipsec_obj(struct mlx5e_ipsec_sa_entry *sa_entry) +{ + struct mlx5_accel_esp_xfrm_attrs *attrs = &sa_entry->attrs; + struct mlx5_core_dev *mdev = mlx5e_ipsec_sa2dev(sa_entry); + struct aes_gcm_keymat *aes_gcm = &attrs->aes_gcm; + u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)]; + u32 in[MLX5_ST_SZ_DW(create_ipsec_obj_in)] = {}; + void *obj, *salt_p, *salt_iv_p; + int err; + + obj = MLX5_ADDR_OF(create_ipsec_obj_in, in, ipsec_object); + + /* salt and seq_iv */ + salt_p = MLX5_ADDR_OF(ipsec_obj, obj, salt); + memcpy(salt_p, &aes_gcm->salt, sizeof(aes_gcm->salt)); + + MLX5_SET(ipsec_obj, obj, icv_length, MLX5_IPSEC_OBJECT_ICV_LEN_16B); + salt_iv_p = MLX5_ADDR_OF(ipsec_obj, obj, implicit_iv); + memcpy(salt_iv_p, &aes_gcm->seq_iv, sizeof(aes_gcm->seq_iv)); + /* esn */ + if (attrs->flags & MLX5_ACCEL_ESP_FLAGS_ESN_TRIGGERED) { + MLX5_SET(ipsec_obj, obj, esn_en, 1); + MLX5_SET(ipsec_obj, obj, esn_msb, attrs->esn); + if (attrs->flags & MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP) + MLX5_SET(ipsec_obj, obj, esn_overlap, 1); + } + + MLX5_SET(ipsec_obj, obj, dekn, sa_entry->enc_key_id); + + /* general object fields set */ + MLX5_SET(general_obj_in_cmd_hdr, in, opcode, + MLX5_CMD_OP_CREATE_GENERAL_OBJECT); + MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, + MLX5_GENERAL_OBJECT_TYPES_IPSEC); + + err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); + if (!err) + sa_entry->ipsec_obj_id = + MLX5_GET(general_obj_out_cmd_hdr, out, obj_id); + + return err; +} + +static void mlx5_destroy_ipsec_obj(struct mlx5e_ipsec_sa_entry *sa_entry) +{ + struct mlx5_core_dev *mdev = mlx5e_ipsec_sa2dev(sa_entry); + u32 in[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)] = {}; + u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)]; + + MLX5_SET(general_obj_in_cmd_hdr, in, opcode, + MLX5_CMD_OP_DESTROY_GENERAL_OBJECT); + MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, + MLX5_GENERAL_OBJECT_TYPES_IPSEC); + MLX5_SET(general_obj_in_cmd_hdr, in, obj_id, sa_entry->ipsec_obj_id); + + mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); +} + +int mlx5_ipsec_create_sa_ctx(struct mlx5e_ipsec_sa_entry *sa_entry) +{ + struct aes_gcm_keymat *aes_gcm = &sa_entry->attrs.aes_gcm; + struct mlx5_core_dev *mdev = mlx5e_ipsec_sa2dev(sa_entry); + int err; + + /* key */ + err = mlx5_create_encryption_key(mdev, aes_gcm->aes_key, + aes_gcm->key_len / BITS_PER_BYTE, + MLX5_ACCEL_OBJ_IPSEC_KEY, + &sa_entry->enc_key_id); + if (err) { + mlx5_core_dbg(mdev, "Failed to create encryption key (err = %d)\n", err); + return err; + } + + err = mlx5_create_ipsec_obj(sa_entry); + if (err) { + mlx5_core_dbg(mdev, "Failed to create IPsec object (err = %d)\n", err); + goto err_enc_key; + } + + return 0; + +err_enc_key: + mlx5_destroy_encryption_key(mdev, sa_entry->enc_key_id); + return err; +} + +void mlx5_ipsec_free_sa_ctx(struct mlx5e_ipsec_sa_entry *sa_entry) +{ + struct mlx5_core_dev *mdev = mlx5e_ipsec_sa2dev(sa_entry); + + mlx5_destroy_ipsec_obj(sa_entry); + mlx5_destroy_encryption_key(mdev, sa_entry->enc_key_id); +} + +static int mlx5_modify_ipsec_obj(struct mlx5e_ipsec_sa_entry *sa_entry, + const struct mlx5_accel_esp_xfrm_attrs *attrs) +{ + struct mlx5_core_dev *mdev = mlx5e_ipsec_sa2dev(sa_entry); + u32 in[MLX5_ST_SZ_DW(modify_ipsec_obj_in)] = {}; + u32 out[MLX5_ST_SZ_DW(query_ipsec_obj_out)]; + u64 modify_field_select = 0; + u64 general_obj_types; + void *obj; + int err; + + if (!(attrs->flags & MLX5_ACCEL_ESP_FLAGS_ESN_TRIGGERED)) + return 0; + + general_obj_types = MLX5_CAP_GEN_64(mdev, general_obj_types); + if (!(general_obj_types & MLX5_HCA_CAP_GENERAL_OBJECT_TYPES_IPSEC)) + return -EINVAL; + + /* general object fields set */ + MLX5_SET(general_obj_in_cmd_hdr, in, opcode, MLX5_CMD_OP_QUERY_GENERAL_OBJECT); + MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, MLX5_GENERAL_OBJECT_TYPES_IPSEC); + MLX5_SET(general_obj_in_cmd_hdr, in, obj_id, sa_entry->ipsec_obj_id); + err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); + if (err) { + mlx5_core_err(mdev, "Query IPsec object failed (Object id %d), err = %d\n", + sa_entry->ipsec_obj_id, err); + return err; + } + + obj = MLX5_ADDR_OF(query_ipsec_obj_out, out, ipsec_object); + modify_field_select = MLX5_GET64(ipsec_obj, obj, modify_field_select); + + /* esn */ + if (!(modify_field_select & MLX5_MODIFY_IPSEC_BITMASK_ESN_OVERLAP) || + !(modify_field_select & MLX5_MODIFY_IPSEC_BITMASK_ESN_MSB)) + return -EOPNOTSUPP; + + obj = MLX5_ADDR_OF(modify_ipsec_obj_in, in, ipsec_object); + MLX5_SET64(ipsec_obj, obj, modify_field_select, + MLX5_MODIFY_IPSEC_BITMASK_ESN_OVERLAP | + MLX5_MODIFY_IPSEC_BITMASK_ESN_MSB); + MLX5_SET(ipsec_obj, obj, esn_msb, attrs->esn); + if (attrs->flags & MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP) + MLX5_SET(ipsec_obj, obj, esn_overlap, 1); + + /* general object fields set */ + MLX5_SET(general_obj_in_cmd_hdr, in, opcode, MLX5_CMD_OP_MODIFY_GENERAL_OBJECT); + + return mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); +} + +void mlx5_accel_esp_modify_xfrm(struct mlx5e_ipsec_sa_entry *sa_entry, + const struct mlx5_accel_esp_xfrm_attrs *attrs) +{ + int err; + + err = mlx5_modify_ipsec_obj(sa_entry, attrs); + if (err) + return; + + memcpy(&sa_entry->attrs, attrs, sizeof(sa_entry->attrs)); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c new file mode 100644 index 000000000..c4a84f0a3 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c @@ -0,0 +1,356 @@ +/* + * Copyright (c) 2017 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#include <crypto/aead.h> +#include <net/xfrm.h> +#include <net/esp.h> +#include "ipsec.h" +#include "ipsec_rxtx.h" +#include "en.h" + +enum { + MLX5E_IPSEC_TX_SYNDROME_OFFLOAD = 0x8, + MLX5E_IPSEC_TX_SYNDROME_OFFLOAD_WITH_LSO_TCP = 0x9, +}; + +static int mlx5e_ipsec_remove_trailer(struct sk_buff *skb, struct xfrm_state *x) +{ + unsigned int alen = crypto_aead_authsize(x->data); + struct ipv6hdr *ipv6hdr = ipv6_hdr(skb); + struct iphdr *ipv4hdr = ip_hdr(skb); + unsigned int trailer_len; + u8 plen; + int ret; + + ret = skb_copy_bits(skb, skb->len - alen - 2, &plen, 1); + if (unlikely(ret)) + return ret; + + trailer_len = alen + plen + 2; + + ret = pskb_trim(skb, skb->len - trailer_len); + if (unlikely(ret)) + return ret; + if (skb->protocol == htons(ETH_P_IP)) { + ipv4hdr->tot_len = htons(ntohs(ipv4hdr->tot_len) - trailer_len); + ip_send_check(ipv4hdr); + } else { + ipv6hdr->payload_len = htons(ntohs(ipv6hdr->payload_len) - + trailer_len); + } + return 0; +} + +static void mlx5e_ipsec_set_swp(struct sk_buff *skb, + struct mlx5_wqe_eth_seg *eseg, u8 mode, + struct xfrm_offload *xo) +{ + /* Tunnel Mode: + * SWP: OutL3 InL3 InL4 + * Pkt: MAC IP ESP IP L4 + * + * Transport Mode: + * SWP: OutL3 OutL4 + * Pkt: MAC IP ESP L4 + * + * Tunnel(VXLAN TCP/UDP) over Transport Mode + * SWP: OutL3 InL3 InL4 + * Pkt: MAC IP ESP UDP VXLAN IP L4 + */ + + /* Shared settings */ + eseg->swp_outer_l3_offset = skb_network_offset(skb) / 2; + if (skb->protocol == htons(ETH_P_IPV6)) + eseg->swp_flags |= MLX5_ETH_WQE_SWP_OUTER_L3_IPV6; + + /* Tunnel mode */ + if (mode == XFRM_MODE_TUNNEL) { + eseg->swp_inner_l3_offset = skb_inner_network_offset(skb) / 2; + if (xo->proto == IPPROTO_IPV6) + eseg->swp_flags |= MLX5_ETH_WQE_SWP_INNER_L3_IPV6; + + switch (xo->inner_ipproto) { + case IPPROTO_UDP: + eseg->swp_flags |= MLX5_ETH_WQE_SWP_INNER_L4_UDP; + fallthrough; + case IPPROTO_TCP: + /* IP | ESP | IP | [TCP | UDP] */ + eseg->swp_inner_l4_offset = skb_inner_transport_offset(skb) / 2; + break; + default: + break; + } + return; + } + + /* Transport mode */ + if (mode != XFRM_MODE_TRANSPORT) + return; + + if (!xo->inner_ipproto) { + switch (xo->proto) { + case IPPROTO_UDP: + eseg->swp_flags |= MLX5_ETH_WQE_SWP_OUTER_L4_UDP; + fallthrough; + case IPPROTO_TCP: + /* IP | ESP | TCP */ + eseg->swp_outer_l4_offset = skb_inner_transport_offset(skb) / 2; + break; + default: + break; + } + } else { + /* Tunnel(VXLAN TCP/UDP) over Transport Mode */ + switch (xo->inner_ipproto) { + case IPPROTO_UDP: + eseg->swp_flags |= MLX5_ETH_WQE_SWP_INNER_L4_UDP; + fallthrough; + case IPPROTO_TCP: + eseg->swp_inner_l3_offset = skb_inner_network_offset(skb) / 2; + eseg->swp_inner_l4_offset = + (skb->csum_start + skb->head - skb->data) / 2; + if (inner_ip_hdr(skb)->version == 6) + eseg->swp_flags |= MLX5_ETH_WQE_SWP_INNER_L3_IPV6; + break; + default: + break; + } + } + +} + +void mlx5e_ipsec_set_iv_esn(struct sk_buff *skb, struct xfrm_state *x, + struct xfrm_offload *xo) +{ + struct xfrm_replay_state_esn *replay_esn = x->replay_esn; + __u32 oseq = replay_esn->oseq; + int iv_offset; + __be64 seqno; + u32 seq_hi; + + if (unlikely(skb_is_gso(skb) && oseq < MLX5E_IPSEC_ESN_SCOPE_MID && + MLX5E_IPSEC_ESN_SCOPE_MID < (oseq - skb_shinfo(skb)->gso_segs))) { + seq_hi = xo->seq.hi - 1; + } else { + seq_hi = xo->seq.hi; + } + + /* Place the SN in the IV field */ + seqno = cpu_to_be64(xo->seq.low + ((u64)seq_hi << 32)); + iv_offset = skb_transport_offset(skb) + sizeof(struct ip_esp_hdr); + skb_store_bits(skb, iv_offset, &seqno, 8); +} + +void mlx5e_ipsec_set_iv(struct sk_buff *skb, struct xfrm_state *x, + struct xfrm_offload *xo) +{ + int iv_offset; + __be64 seqno; + + /* Place the SN in the IV field */ + seqno = cpu_to_be64(xo->seq.low + ((u64)xo->seq.hi << 32)); + iv_offset = skb_transport_offset(skb) + sizeof(struct ip_esp_hdr); + skb_store_bits(skb, iv_offset, &seqno, 8); +} + +void mlx5e_ipsec_handle_tx_wqe(struct mlx5e_tx_wqe *wqe, + struct mlx5e_accel_tx_ipsec_state *ipsec_st, + struct mlx5_wqe_inline_seg *inlseg) +{ + inlseg->byte_count = cpu_to_be32(ipsec_st->tailen | MLX5_INLINE_SEG); + esp_output_fill_trailer((u8 *)inlseg->data, 0, ipsec_st->plen, ipsec_st->xo->proto); +} + +static int mlx5e_ipsec_set_state(struct mlx5e_priv *priv, + struct sk_buff *skb, + struct xfrm_state *x, + struct xfrm_offload *xo, + struct mlx5e_accel_tx_ipsec_state *ipsec_st) +{ + unsigned int blksize, clen, alen, plen; + struct crypto_aead *aead; + unsigned int tailen; + + ipsec_st->x = x; + ipsec_st->xo = xo; + aead = x->data; + alen = crypto_aead_authsize(aead); + blksize = ALIGN(crypto_aead_blocksize(aead), 4); + clen = ALIGN(skb->len + 2, blksize); + plen = max_t(u32, clen - skb->len, 4); + tailen = plen + alen; + ipsec_st->plen = plen; + ipsec_st->tailen = tailen; + + return 0; +} + +void mlx5e_ipsec_tx_build_eseg(struct mlx5e_priv *priv, struct sk_buff *skb, + struct mlx5_wqe_eth_seg *eseg) +{ + struct xfrm_offload *xo = xfrm_offload(skb); + struct xfrm_encap_tmpl *encap; + struct xfrm_state *x; + struct sec_path *sp; + u8 l3_proto; + + sp = skb_sec_path(skb); + if (unlikely(sp->len != 1)) + return; + + x = xfrm_input_state(skb); + if (unlikely(!x)) + return; + + if (unlikely(!x->xso.offload_handle || + (skb->protocol != htons(ETH_P_IP) && + skb->protocol != htons(ETH_P_IPV6)))) + return; + + mlx5e_ipsec_set_swp(skb, eseg, x->props.mode, xo); + + l3_proto = (x->props.family == AF_INET) ? + ((struct iphdr *)skb_network_header(skb))->protocol : + ((struct ipv6hdr *)skb_network_header(skb))->nexthdr; + + eseg->flow_table_metadata |= cpu_to_be32(MLX5_ETH_WQE_FT_META_IPSEC); + eseg->trailer |= cpu_to_be32(MLX5_ETH_WQE_INSERT_TRAILER); + encap = x->encap; + if (!encap) { + eseg->trailer |= (l3_proto == IPPROTO_ESP) ? + cpu_to_be32(MLX5_ETH_WQE_TRAILER_HDR_OUTER_IP_ASSOC) : + cpu_to_be32(MLX5_ETH_WQE_TRAILER_HDR_OUTER_L4_ASSOC); + } else if (encap->encap_type == UDP_ENCAP_ESPINUDP) { + eseg->trailer |= (l3_proto == IPPROTO_ESP) ? + cpu_to_be32(MLX5_ETH_WQE_TRAILER_HDR_INNER_IP_ASSOC) : + cpu_to_be32(MLX5_ETH_WQE_TRAILER_HDR_INNER_L4_ASSOC); + } +} + +bool mlx5e_ipsec_handle_tx_skb(struct net_device *netdev, + struct sk_buff *skb, + struct mlx5e_accel_tx_ipsec_state *ipsec_st) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct xfrm_offload *xo = xfrm_offload(skb); + struct mlx5e_ipsec_sa_entry *sa_entry; + struct xfrm_state *x; + struct sec_path *sp; + + sp = skb_sec_path(skb); + if (unlikely(sp->len != 1)) { + atomic64_inc(&priv->ipsec->sw_stats.ipsec_tx_drop_bundle); + goto drop; + } + + x = xfrm_input_state(skb); + if (unlikely(!x)) { + atomic64_inc(&priv->ipsec->sw_stats.ipsec_tx_drop_no_state); + goto drop; + } + + if (unlikely(!x->xso.offload_handle || + (skb->protocol != htons(ETH_P_IP) && + skb->protocol != htons(ETH_P_IPV6)))) { + atomic64_inc(&priv->ipsec->sw_stats.ipsec_tx_drop_not_ip); + goto drop; + } + + if (!skb_is_gso(skb)) + if (unlikely(mlx5e_ipsec_remove_trailer(skb, x))) { + atomic64_inc(&priv->ipsec->sw_stats.ipsec_tx_drop_trailer); + goto drop; + } + + sa_entry = (struct mlx5e_ipsec_sa_entry *)x->xso.offload_handle; + sa_entry->set_iv_op(skb, x, xo); + mlx5e_ipsec_set_state(priv, skb, x, xo, ipsec_st); + + return true; + +drop: + kfree_skb(skb); + return false; +} + +enum { + MLX5E_IPSEC_OFFLOAD_RX_SYNDROME_DECRYPTED, + MLX5E_IPSEC_OFFLOAD_RX_SYNDROME_AUTH_FAILED, + MLX5E_IPSEC_OFFLOAD_RX_SYNDROME_BAD_TRAILER, +}; + +void mlx5e_ipsec_offload_handle_rx_skb(struct net_device *netdev, + struct sk_buff *skb, + struct mlx5_cqe64 *cqe) +{ + u32 ipsec_meta_data = be32_to_cpu(cqe->ft_metadata); + struct mlx5e_priv *priv; + struct xfrm_offload *xo; + struct xfrm_state *xs; + struct sec_path *sp; + u32 sa_handle; + + sa_handle = MLX5_IPSEC_METADATA_HANDLE(ipsec_meta_data); + priv = netdev_priv(netdev); + sp = secpath_set(skb); + if (unlikely(!sp)) { + atomic64_inc(&priv->ipsec->sw_stats.ipsec_rx_drop_sp_alloc); + return; + } + + xs = mlx5e_ipsec_sadb_rx_lookup(priv->ipsec, sa_handle); + if (unlikely(!xs)) { + atomic64_inc(&priv->ipsec->sw_stats.ipsec_rx_drop_sadb_miss); + return; + } + + sp->xvec[sp->len++] = xs; + sp->olen++; + + xo = xfrm_offload(skb); + xo->flags = CRYPTO_DONE; + + switch (MLX5_IPSEC_METADATA_SYNDROM(ipsec_meta_data)) { + case MLX5E_IPSEC_OFFLOAD_RX_SYNDROME_DECRYPTED: + xo->status = CRYPTO_SUCCESS; + break; + case MLX5E_IPSEC_OFFLOAD_RX_SYNDROME_AUTH_FAILED: + xo->status = CRYPTO_TUNNEL_ESP_AUTH_FAILED; + break; + case MLX5E_IPSEC_OFFLOAD_RX_SYNDROME_BAD_TRAILER: + xo->status = CRYPTO_INVALID_PACKET_SYNTAX; + break; + default: + atomic64_inc(&priv->ipsec->sw_stats.ipsec_rx_drop_syndrome); + } +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h new file mode 100644 index 000000000..1878a70b9 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h @@ -0,0 +1,169 @@ +/* + * Copyright (c) 2017 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#ifndef __MLX5E_IPSEC_RXTX_H__ +#define __MLX5E_IPSEC_RXTX_H__ + +#include <linux/skbuff.h> +#include <net/xfrm.h> +#include "en.h" +#include "en/txrx.h" + +/* Bit31: IPsec marker, Bit30: reserved, Bit29-24: IPsec syndrome, Bit23-0: IPsec obj id */ +#define MLX5_IPSEC_METADATA_MARKER(metadata) (((metadata) >> 31) & 0x1) +#define MLX5_IPSEC_METADATA_SYNDROM(metadata) (((metadata) >> 24) & GENMASK(5, 0)) +#define MLX5_IPSEC_METADATA_HANDLE(metadata) ((metadata) & GENMASK(23, 0)) + +struct mlx5e_accel_tx_ipsec_state { + struct xfrm_offload *xo; + struct xfrm_state *x; + u32 tailen; + u32 plen; +}; + +#ifdef CONFIG_MLX5_EN_IPSEC + +void mlx5e_ipsec_inverse_table_init(void); +void mlx5e_ipsec_set_iv_esn(struct sk_buff *skb, struct xfrm_state *x, + struct xfrm_offload *xo); +void mlx5e_ipsec_set_iv(struct sk_buff *skb, struct xfrm_state *x, + struct xfrm_offload *xo); +bool mlx5e_ipsec_handle_tx_skb(struct net_device *netdev, + struct sk_buff *skb, + struct mlx5e_accel_tx_ipsec_state *ipsec_st); +void mlx5e_ipsec_handle_tx_wqe(struct mlx5e_tx_wqe *wqe, + struct mlx5e_accel_tx_ipsec_state *ipsec_st, + struct mlx5_wqe_inline_seg *inlseg); +void mlx5e_ipsec_offload_handle_rx_skb(struct net_device *netdev, + struct sk_buff *skb, + struct mlx5_cqe64 *cqe); +static inline unsigned int mlx5e_ipsec_tx_ids_len(struct mlx5e_accel_tx_ipsec_state *ipsec_st) +{ + return ipsec_st->tailen; +} + +static inline bool mlx5_ipsec_is_rx_flow(struct mlx5_cqe64 *cqe) +{ + return MLX5_IPSEC_METADATA_MARKER(be32_to_cpu(cqe->ft_metadata)); +} + +static inline bool mlx5e_ipsec_eseg_meta(struct mlx5_wqe_eth_seg *eseg) +{ + return eseg->flow_table_metadata & cpu_to_be32(MLX5_ETH_WQE_FT_META_IPSEC); +} + +void mlx5e_ipsec_tx_build_eseg(struct mlx5e_priv *priv, struct sk_buff *skb, + struct mlx5_wqe_eth_seg *eseg); + +static inline netdev_features_t +mlx5e_ipsec_feature_check(struct sk_buff *skb, netdev_features_t features) +{ + struct xfrm_offload *xo = xfrm_offload(skb); + struct sec_path *sp = skb_sec_path(skb); + + if (sp && sp->len && xo) { + struct xfrm_state *x = sp->xvec[0]; + + if (!x || !x->xso.offload_handle) + goto out_disable; + + if (xo->inner_ipproto) { + /* Cannot support tunnel packet over IPsec tunnel mode + * because we cannot offload three IP header csum + */ + if (x->props.mode == XFRM_MODE_TUNNEL) + goto out_disable; + + /* Only support UDP or TCP L4 checksum */ + if (xo->inner_ipproto != IPPROTO_UDP && + xo->inner_ipproto != IPPROTO_TCP) + goto out_disable; + } + + return features; + + } + + /* Disable CSUM and GSO for software IPsec */ +out_disable: + return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK); +} + +static inline bool +mlx5e_ipsec_txwqe_build_eseg_csum(struct mlx5e_txqsq *sq, struct sk_buff *skb, + struct mlx5_wqe_eth_seg *eseg) +{ + u8 inner_ipproto; + + if (!mlx5e_ipsec_eseg_meta(eseg)) + return false; + + eseg->cs_flags = MLX5_ETH_WQE_L3_CSUM; + inner_ipproto = xfrm_offload(skb)->inner_ipproto; + if (inner_ipproto) { + eseg->cs_flags |= MLX5_ETH_WQE_L3_INNER_CSUM; + if (inner_ipproto == IPPROTO_TCP || inner_ipproto == IPPROTO_UDP) + eseg->cs_flags |= MLX5_ETH_WQE_L4_INNER_CSUM; + } else if (likely(skb->ip_summed == CHECKSUM_PARTIAL)) { + eseg->cs_flags |= MLX5_ETH_WQE_L4_CSUM; + sq->stats->csum_partial_inner++; + } + + return true; +} +#else +static inline +void mlx5e_ipsec_offload_handle_rx_skb(struct net_device *netdev, + struct sk_buff *skb, + struct mlx5_cqe64 *cqe) +{} + +static inline bool mlx5e_ipsec_eseg_meta(struct mlx5_wqe_eth_seg *eseg) +{ + return false; +} + +static inline bool mlx5_ipsec_is_rx_flow(struct mlx5_cqe64 *cqe) { return false; } +static inline netdev_features_t +mlx5e_ipsec_feature_check(struct sk_buff *skb, netdev_features_t features) +{ return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK); } + +static inline bool +mlx5e_ipsec_txwqe_build_eseg_csum(struct mlx5e_txqsq *sq, struct sk_buff *skb, + struct mlx5_wqe_eth_seg *eseg) +{ + return false; +} +#endif /* CONFIG_MLX5_EN_IPSEC */ + +#endif /* __MLX5E_IPSEC_RXTX_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_stats.c new file mode 100644 index 000000000..9de84821d --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_stats.c @@ -0,0 +1,84 @@ +/* + * Copyright (c) 2017 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#include <linux/ethtool.h> +#include <net/sock.h> + +#include "en.h" +#include "ipsec.h" + +static const struct counter_desc mlx5e_ipsec_sw_stats_desc[] = { + { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_sw_stats, ipsec_rx_drop_sp_alloc) }, + { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_sw_stats, ipsec_rx_drop_sadb_miss) }, + { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_sw_stats, ipsec_rx_drop_syndrome) }, + { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_sw_stats, ipsec_tx_drop_bundle) }, + { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_sw_stats, ipsec_tx_drop_no_state) }, + { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_sw_stats, ipsec_tx_drop_not_ip) }, + { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_sw_stats, ipsec_tx_drop_trailer) }, +}; + +#define MLX5E_READ_CTR_ATOMIC64(ptr, dsc, i) \ + atomic64_read((atomic64_t *)((char *)(ptr) + (dsc)[i].offset)) + +#define NUM_IPSEC_SW_COUNTERS ARRAY_SIZE(mlx5e_ipsec_sw_stats_desc) + +static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(ipsec_sw) +{ + return priv->ipsec ? NUM_IPSEC_SW_COUNTERS : 0; +} + +static inline MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(ipsec_sw) {} + +static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(ipsec_sw) +{ + unsigned int i; + + if (priv->ipsec) + for (i = 0; i < NUM_IPSEC_SW_COUNTERS; i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, + mlx5e_ipsec_sw_stats_desc[i].format); + return idx; +} + +static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(ipsec_sw) +{ + int i; + + if (priv->ipsec) + for (i = 0; i < NUM_IPSEC_SW_COUNTERS; i++) + data[idx++] = MLX5E_READ_CTR_ATOMIC64(&priv->ipsec->sw_stats, + mlx5e_ipsec_sw_stats_desc, i); + return idx; +} + +MLX5E_DEFINE_STATS_GRP(ipsec_sw, 0); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c new file mode 100644 index 000000000..da2184c94 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c @@ -0,0 +1,199 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2019 Mellanox Technologies. + +#include "en.h" +#include "lib/mlx5.h" +#include "en_accel/ktls.h" +#include "en_accel/ktls_utils.h" +#include "en_accel/fs_tcp.h" + +int mlx5_ktls_create_key(struct mlx5_core_dev *mdev, + struct tls_crypto_info *crypto_info, + u32 *p_key_id) +{ + u32 sz_bytes; + void *key; + + switch (crypto_info->cipher_type) { + case TLS_CIPHER_AES_GCM_128: { + struct tls12_crypto_info_aes_gcm_128 *info = + (struct tls12_crypto_info_aes_gcm_128 *)crypto_info; + + key = info->key; + sz_bytes = sizeof(info->key); + break; + } + case TLS_CIPHER_AES_GCM_256: { + struct tls12_crypto_info_aes_gcm_256 *info = + (struct tls12_crypto_info_aes_gcm_256 *)crypto_info; + + key = info->key; + sz_bytes = sizeof(info->key); + break; + } + default: + return -EINVAL; + } + + return mlx5_create_encryption_key(mdev, key, sz_bytes, + MLX5_ACCEL_OBJ_TLS_KEY, + p_key_id); +} + +void mlx5_ktls_destroy_key(struct mlx5_core_dev *mdev, u32 key_id) +{ + mlx5_destroy_encryption_key(mdev, key_id); +} + +static int mlx5e_ktls_add(struct net_device *netdev, struct sock *sk, + enum tls_offload_ctx_dir direction, + struct tls_crypto_info *crypto_info, + u32 start_offload_tcp_sn) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5_core_dev *mdev = priv->mdev; + int err; + + if (!mlx5e_ktls_type_check(mdev, crypto_info)) + return -EOPNOTSUPP; + + if (direction == TLS_OFFLOAD_CTX_DIR_TX) + err = mlx5e_ktls_add_tx(netdev, sk, crypto_info, start_offload_tcp_sn); + else + err = mlx5e_ktls_add_rx(netdev, sk, crypto_info, start_offload_tcp_sn); + + return err; +} + +static void mlx5e_ktls_del(struct net_device *netdev, + struct tls_context *tls_ctx, + enum tls_offload_ctx_dir direction) +{ + if (direction == TLS_OFFLOAD_CTX_DIR_TX) + mlx5e_ktls_del_tx(netdev, tls_ctx); + else + mlx5e_ktls_del_rx(netdev, tls_ctx); +} + +static int mlx5e_ktls_resync(struct net_device *netdev, + struct sock *sk, u32 seq, u8 *rcd_sn, + enum tls_offload_ctx_dir direction) +{ + if (unlikely(direction != TLS_OFFLOAD_CTX_DIR_RX)) + return -EOPNOTSUPP; + + mlx5e_ktls_rx_resync(netdev, sk, seq, rcd_sn); + return 0; +} + +static const struct tlsdev_ops mlx5e_ktls_ops = { + .tls_dev_add = mlx5e_ktls_add, + .tls_dev_del = mlx5e_ktls_del, + .tls_dev_resync = mlx5e_ktls_resync, +}; + +bool mlx5e_is_ktls_rx(struct mlx5_core_dev *mdev) +{ + u8 max_sq_wqebbs = mlx5e_get_max_sq_wqebbs(mdev); + + if (is_kdump_kernel() || !MLX5_CAP_GEN(mdev, tls_rx)) + return false; + + /* Check the possibility to post the required ICOSQ WQEs. */ + if (WARN_ON_ONCE(max_sq_wqebbs < MLX5E_TLS_SET_STATIC_PARAMS_WQEBBS)) + return false; + if (WARN_ON_ONCE(max_sq_wqebbs < MLX5E_TLS_SET_PROGRESS_PARAMS_WQEBBS)) + return false; + if (WARN_ON_ONCE(max_sq_wqebbs < MLX5E_KTLS_GET_PROGRESS_WQEBBS)) + return false; + + return true; +} + +void mlx5e_ktls_build_netdev(struct mlx5e_priv *priv) +{ + struct net_device *netdev = priv->netdev; + struct mlx5_core_dev *mdev = priv->mdev; + + if (!mlx5e_is_ktls_tx(mdev) && !mlx5e_is_ktls_rx(mdev)) + return; + + if (mlx5e_is_ktls_tx(mdev)) { + netdev->hw_features |= NETIF_F_HW_TLS_TX; + netdev->features |= NETIF_F_HW_TLS_TX; + } + + if (mlx5e_is_ktls_rx(mdev)) + netdev->hw_features |= NETIF_F_HW_TLS_RX; + + netdev->tlsdev_ops = &mlx5e_ktls_ops; +} + +int mlx5e_ktls_set_feature_rx(struct net_device *netdev, bool enable) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + int err = 0; + + mutex_lock(&priv->state_lock); + if (enable) + err = mlx5e_accel_fs_tcp_create(priv->fs); + else + mlx5e_accel_fs_tcp_destroy(priv->fs); + mutex_unlock(&priv->state_lock); + + return err; +} + +int mlx5e_ktls_init_rx(struct mlx5e_priv *priv) +{ + int err; + + if (!mlx5e_is_ktls_rx(priv->mdev)) + return 0; + + priv->tls->rx_wq = create_singlethread_workqueue("mlx5e_tls_rx"); + if (!priv->tls->rx_wq) + return -ENOMEM; + + if (priv->netdev->features & NETIF_F_HW_TLS_RX) { + err = mlx5e_accel_fs_tcp_create(priv->fs); + if (err) { + destroy_workqueue(priv->tls->rx_wq); + return err; + } + } + + return 0; +} + +void mlx5e_ktls_cleanup_rx(struct mlx5e_priv *priv) +{ + if (!mlx5e_is_ktls_rx(priv->mdev)) + return; + + if (priv->netdev->features & NETIF_F_HW_TLS_RX) + mlx5e_accel_fs_tcp_destroy(priv->fs); + + destroy_workqueue(priv->tls->rx_wq); +} + +int mlx5e_ktls_init(struct mlx5e_priv *priv) +{ + struct mlx5e_tls *tls; + + if (!mlx5e_is_ktls_device(priv->mdev)) + return 0; + + tls = kzalloc(sizeof(*tls), GFP_KERNEL); + if (!tls) + return -ENOMEM; + + priv->tls = tls; + return 0; +} + +void mlx5e_ktls_cleanup(struct mlx5e_priv *priv) +{ + kfree(priv->tls); + priv->tls = NULL; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h new file mode 100644 index 000000000..1c35045e4 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h @@ -0,0 +1,145 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2019 Mellanox Technologies. */ + +#ifndef __MLX5E_KTLS_H__ +#define __MLX5E_KTLS_H__ + +#include <linux/tls.h> +#include <net/tls.h> +#include "en.h" + +#ifdef CONFIG_MLX5_EN_TLS +int mlx5_ktls_create_key(struct mlx5_core_dev *mdev, + struct tls_crypto_info *crypto_info, + u32 *p_key_id); +void mlx5_ktls_destroy_key(struct mlx5_core_dev *mdev, u32 key_id); + +static inline bool mlx5e_is_ktls_device(struct mlx5_core_dev *mdev) +{ + if (is_kdump_kernel()) + return false; + + if (!MLX5_CAP_GEN(mdev, tls_tx) && !MLX5_CAP_GEN(mdev, tls_rx)) + return false; + + if (!MLX5_CAP_GEN(mdev, log_max_dek)) + return false; + + return (MLX5_CAP_TLS(mdev, tls_1_2_aes_gcm_128) || + MLX5_CAP_TLS(mdev, tls_1_2_aes_gcm_256)); +} + +static inline bool mlx5e_ktls_type_check(struct mlx5_core_dev *mdev, + struct tls_crypto_info *crypto_info) +{ + switch (crypto_info->cipher_type) { + case TLS_CIPHER_AES_GCM_128: + if (crypto_info->version == TLS_1_2_VERSION) + return MLX5_CAP_TLS(mdev, tls_1_2_aes_gcm_128); + break; + case TLS_CIPHER_AES_GCM_256: + if (crypto_info->version == TLS_1_2_VERSION) + return MLX5_CAP_TLS(mdev, tls_1_2_aes_gcm_256); + break; + } + + return false; +} + +void mlx5e_ktls_build_netdev(struct mlx5e_priv *priv); +int mlx5e_ktls_init_tx(struct mlx5e_priv *priv); +void mlx5e_ktls_cleanup_tx(struct mlx5e_priv *priv); +int mlx5e_ktls_init_rx(struct mlx5e_priv *priv); +void mlx5e_ktls_cleanup_rx(struct mlx5e_priv *priv); +int mlx5e_ktls_set_feature_rx(struct net_device *netdev, bool enable); +struct mlx5e_ktls_resync_resp * +mlx5e_ktls_rx_resync_create_resp_list(void); +void mlx5e_ktls_rx_resync_destroy_resp_list(struct mlx5e_ktls_resync_resp *resp_list); + +static inline bool mlx5e_is_ktls_tx(struct mlx5_core_dev *mdev) +{ + return !is_kdump_kernel() && MLX5_CAP_GEN(mdev, tls_tx); +} + +bool mlx5e_is_ktls_rx(struct mlx5_core_dev *mdev); + +struct mlx5e_tls_sw_stats { + atomic64_t tx_tls_ctx; + atomic64_t tx_tls_del; + atomic64_t tx_tls_pool_alloc; + atomic64_t tx_tls_pool_free; + atomic64_t rx_tls_ctx; + atomic64_t rx_tls_del; +}; + +struct mlx5e_tls { + struct mlx5e_tls_sw_stats sw_stats; + struct workqueue_struct *rx_wq; + struct mlx5e_tls_tx_pool *tx_pool; +}; + +int mlx5e_ktls_init(struct mlx5e_priv *priv); +void mlx5e_ktls_cleanup(struct mlx5e_priv *priv); + +int mlx5e_ktls_get_count(struct mlx5e_priv *priv); +int mlx5e_ktls_get_strings(struct mlx5e_priv *priv, uint8_t *data); +int mlx5e_ktls_get_stats(struct mlx5e_priv *priv, u64 *data); + +#else +static inline void mlx5e_ktls_build_netdev(struct mlx5e_priv *priv) +{ +} + +static inline int mlx5e_ktls_init_tx(struct mlx5e_priv *priv) +{ + return 0; +} + +static inline void mlx5e_ktls_cleanup_tx(struct mlx5e_priv *priv) +{ +} + +static inline int mlx5e_ktls_init_rx(struct mlx5e_priv *priv) +{ + return 0; +} + +static inline void mlx5e_ktls_cleanup_rx(struct mlx5e_priv *priv) +{ +} + +static inline int mlx5e_ktls_set_feature_rx(struct net_device *netdev, bool enable) +{ + netdev_warn(netdev, "kTLS is not supported\n"); + return -EOPNOTSUPP; +} + +static inline struct mlx5e_ktls_resync_resp * +mlx5e_ktls_rx_resync_create_resp_list(void) +{ + return ERR_PTR(-EOPNOTSUPP); +} + +static inline void +mlx5e_ktls_rx_resync_destroy_resp_list(struct mlx5e_ktls_resync_resp *resp_list) {} + +static inline bool mlx5e_is_ktls_rx(struct mlx5_core_dev *mdev) +{ + return false; +} + +static inline int mlx5e_ktls_init(struct mlx5e_priv *priv) { return 0; } +static inline void mlx5e_ktls_cleanup(struct mlx5e_priv *priv) { } +static inline int mlx5e_ktls_get_count(struct mlx5e_priv *priv) { return 0; } +static inline int mlx5e_ktls_get_strings(struct mlx5e_priv *priv, uint8_t *data) +{ + return 0; +} + +static inline int mlx5e_ktls_get_stats(struct mlx5e_priv *priv, u64 *data) +{ + return 0; +} +#endif + +#endif /* __MLX5E_TLS_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c new file mode 100644 index 000000000..3e5483474 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c @@ -0,0 +1,782 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2019 Mellanox Technologies. + +#include <net/inet6_hashtables.h> +#include "en_accel/en_accel.h" +#include "en_accel/ktls.h" +#include "en_accel/ktls_txrx.h" +#include "en_accel/ktls_utils.h" +#include "en_accel/fs_tcp.h" + +struct accel_rule { + struct work_struct work; + struct mlx5e_priv *priv; + struct mlx5_flow_handle *rule; +}; + +#define PROGRESS_PARAMS_WRITE_UNIT 64 +#define PROGRESS_PARAMS_PADDED_SIZE \ + (ALIGN(sizeof(struct mlx5_wqe_tls_progress_params_seg), \ + PROGRESS_PARAMS_WRITE_UNIT)) + +struct mlx5e_ktls_rx_resync_buf { + union { + struct mlx5_wqe_tls_progress_params_seg progress; + u8 pad[PROGRESS_PARAMS_PADDED_SIZE]; + } ____cacheline_aligned_in_smp; + dma_addr_t dma_addr; + struct mlx5e_ktls_offload_context_rx *priv_rx; +}; + +enum { + MLX5E_PRIV_RX_FLAG_DELETING, + MLX5E_NUM_PRIV_RX_FLAGS, +}; + +struct mlx5e_ktls_rx_resync_ctx { + struct tls_offload_resync_async core; + struct work_struct work; + struct mlx5e_priv *priv; + refcount_t refcnt; + __be64 sw_rcd_sn_be; + u32 seq; +}; + +struct mlx5e_ktls_offload_context_rx { + union mlx5e_crypto_info crypto_info; + struct accel_rule rule; + struct sock *sk; + struct mlx5e_rq_stats *rq_stats; + struct mlx5e_tls_sw_stats *sw_stats; + struct completion add_ctx; + struct mlx5e_tir tir; + u32 key_id; + u32 rxq; + DECLARE_BITMAP(flags, MLX5E_NUM_PRIV_RX_FLAGS); + + /* resync */ + spinlock_t lock; /* protects resync fields */ + struct mlx5e_ktls_rx_resync_ctx resync; + struct list_head list; +}; + +static bool mlx5e_ktls_priv_rx_put(struct mlx5e_ktls_offload_context_rx *priv_rx) +{ + if (!refcount_dec_and_test(&priv_rx->resync.refcnt)) + return false; + + kfree(priv_rx); + return true; +} + +static void mlx5e_ktls_priv_rx_get(struct mlx5e_ktls_offload_context_rx *priv_rx) +{ + refcount_inc(&priv_rx->resync.refcnt); +} + +struct mlx5e_ktls_resync_resp { + /* protects list changes */ + spinlock_t lock; + struct list_head list; +}; + +void mlx5e_ktls_rx_resync_destroy_resp_list(struct mlx5e_ktls_resync_resp *resp_list) +{ + kvfree(resp_list); +} + +struct mlx5e_ktls_resync_resp * +mlx5e_ktls_rx_resync_create_resp_list(void) +{ + struct mlx5e_ktls_resync_resp *resp_list; + + resp_list = kvzalloc(sizeof(*resp_list), GFP_KERNEL); + if (!resp_list) + return ERR_PTR(-ENOMEM); + + INIT_LIST_HEAD(&resp_list->list); + spin_lock_init(&resp_list->lock); + + return resp_list; +} + +static void accel_rule_handle_work(struct work_struct *work) +{ + struct mlx5e_ktls_offload_context_rx *priv_rx; + struct accel_rule *accel_rule; + struct mlx5_flow_handle *rule; + + accel_rule = container_of(work, struct accel_rule, work); + priv_rx = container_of(accel_rule, struct mlx5e_ktls_offload_context_rx, rule); + if (unlikely(test_bit(MLX5E_PRIV_RX_FLAG_DELETING, priv_rx->flags))) + goto out; + + rule = mlx5e_accel_fs_add_sk(accel_rule->priv->fs, priv_rx->sk, + mlx5e_tir_get_tirn(&priv_rx->tir), + MLX5_FS_DEFAULT_FLOW_TAG); + if (!IS_ERR_OR_NULL(rule)) + accel_rule->rule = rule; +out: + complete(&priv_rx->add_ctx); +} + +static void accel_rule_init(struct accel_rule *rule, struct mlx5e_priv *priv) +{ + INIT_WORK(&rule->work, accel_rule_handle_work); + rule->priv = priv; +} + +static void icosq_fill_wi(struct mlx5e_icosq *sq, u16 pi, + struct mlx5e_icosq_wqe_info *wi) +{ + sq->db.wqe_info[pi] = *wi; +} + +static struct mlx5_wqe_ctrl_seg * +post_static_params(struct mlx5e_icosq *sq, + struct mlx5e_ktls_offload_context_rx *priv_rx) +{ + struct mlx5e_set_tls_static_params_wqe *wqe; + struct mlx5e_icosq_wqe_info wi; + u16 pi, num_wqebbs; + + num_wqebbs = MLX5E_TLS_SET_STATIC_PARAMS_WQEBBS; + if (unlikely(!mlx5e_icosq_can_post_wqe(sq, num_wqebbs))) + return ERR_PTR(-ENOSPC); + + pi = mlx5e_icosq_get_next_pi(sq, num_wqebbs); + wqe = MLX5E_TLS_FETCH_SET_STATIC_PARAMS_WQE(sq, pi); + mlx5e_ktls_build_static_params(wqe, sq->pc, sq->sqn, &priv_rx->crypto_info, + mlx5e_tir_get_tirn(&priv_rx->tir), + priv_rx->key_id, priv_rx->resync.seq, false, + TLS_OFFLOAD_CTX_DIR_RX); + wi = (struct mlx5e_icosq_wqe_info) { + .wqe_type = MLX5E_ICOSQ_WQE_UMR_TLS, + .num_wqebbs = num_wqebbs, + .tls_set_params.priv_rx = priv_rx, + }; + icosq_fill_wi(sq, pi, &wi); + sq->pc += num_wqebbs; + + return &wqe->ctrl; +} + +static struct mlx5_wqe_ctrl_seg * +post_progress_params(struct mlx5e_icosq *sq, + struct mlx5e_ktls_offload_context_rx *priv_rx, + u32 next_record_tcp_sn) +{ + struct mlx5e_set_tls_progress_params_wqe *wqe; + struct mlx5e_icosq_wqe_info wi; + u16 pi, num_wqebbs; + + num_wqebbs = MLX5E_TLS_SET_PROGRESS_PARAMS_WQEBBS; + if (unlikely(!mlx5e_icosq_can_post_wqe(sq, num_wqebbs))) + return ERR_PTR(-ENOSPC); + + pi = mlx5e_icosq_get_next_pi(sq, num_wqebbs); + wqe = MLX5E_TLS_FETCH_SET_PROGRESS_PARAMS_WQE(sq, pi); + mlx5e_ktls_build_progress_params(wqe, sq->pc, sq->sqn, + mlx5e_tir_get_tirn(&priv_rx->tir), + false, next_record_tcp_sn, + TLS_OFFLOAD_CTX_DIR_RX); + wi = (struct mlx5e_icosq_wqe_info) { + .wqe_type = MLX5E_ICOSQ_WQE_SET_PSV_TLS, + .num_wqebbs = num_wqebbs, + .tls_set_params.priv_rx = priv_rx, + }; + + icosq_fill_wi(sq, pi, &wi); + sq->pc += num_wqebbs; + + return &wqe->ctrl; +} + +static int post_rx_param_wqes(struct mlx5e_channel *c, + struct mlx5e_ktls_offload_context_rx *priv_rx, + u32 next_record_tcp_sn) +{ + struct mlx5_wqe_ctrl_seg *cseg; + struct mlx5e_icosq *sq; + int err; + + err = 0; + sq = &c->async_icosq; + spin_lock_bh(&c->async_icosq_lock); + + cseg = post_static_params(sq, priv_rx); + if (IS_ERR(cseg)) + goto err_out; + cseg = post_progress_params(sq, priv_rx, next_record_tcp_sn); + if (IS_ERR(cseg)) + goto err_out; + + mlx5e_notify_hw(&sq->wq, sq->pc, sq->uar_map, cseg); +unlock: + spin_unlock_bh(&c->async_icosq_lock); + + return err; + +err_out: + priv_rx->rq_stats->tls_resync_req_skip++; + err = PTR_ERR(cseg); + complete(&priv_rx->add_ctx); + goto unlock; +} + +static void +mlx5e_set_ktls_rx_priv_ctx(struct tls_context *tls_ctx, + struct mlx5e_ktls_offload_context_rx *priv_rx) +{ + struct mlx5e_ktls_offload_context_rx **ctx = + __tls_driver_ctx(tls_ctx, TLS_OFFLOAD_CTX_DIR_RX); + + BUILD_BUG_ON(sizeof(priv_rx) > TLS_DRIVER_STATE_SIZE_RX); + + *ctx = priv_rx; +} + +static struct mlx5e_ktls_offload_context_rx * +mlx5e_get_ktls_rx_priv_ctx(struct tls_context *tls_ctx) +{ + struct mlx5e_ktls_offload_context_rx **ctx = + __tls_driver_ctx(tls_ctx, TLS_OFFLOAD_CTX_DIR_RX); + + return *ctx; +} + +/* Re-sync */ +/* Runs in work context */ +static int +resync_post_get_progress_params(struct mlx5e_icosq *sq, + struct mlx5e_ktls_offload_context_rx *priv_rx) +{ + struct mlx5e_get_tls_progress_params_wqe *wqe; + struct mlx5e_ktls_rx_resync_buf *buf; + struct mlx5e_icosq_wqe_info wi; + struct mlx5_wqe_ctrl_seg *cseg; + struct mlx5_seg_get_psv *psv; + struct device *pdev; + int err; + u16 pi; + + buf = kzalloc(sizeof(*buf), GFP_KERNEL); + if (unlikely(!buf)) { + err = -ENOMEM; + goto err_out; + } + + pdev = mlx5_core_dma_dev(sq->channel->priv->mdev); + buf->dma_addr = dma_map_single(pdev, &buf->progress, + PROGRESS_PARAMS_PADDED_SIZE, DMA_FROM_DEVICE); + if (unlikely(dma_mapping_error(pdev, buf->dma_addr))) { + err = -ENOMEM; + goto err_free; + } + + buf->priv_rx = priv_rx; + + spin_lock_bh(&sq->channel->async_icosq_lock); + + if (unlikely(!mlx5e_icosq_can_post_wqe(sq, MLX5E_KTLS_GET_PROGRESS_WQEBBS))) { + spin_unlock_bh(&sq->channel->async_icosq_lock); + err = -ENOSPC; + goto err_dma_unmap; + } + + pi = mlx5e_icosq_get_next_pi(sq, MLX5E_KTLS_GET_PROGRESS_WQEBBS); + wqe = MLX5E_TLS_FETCH_GET_PROGRESS_PARAMS_WQE(sq, pi); + +#define GET_PSV_DS_CNT (DIV_ROUND_UP(sizeof(*wqe), MLX5_SEND_WQE_DS)) + + cseg = &wqe->ctrl; + cseg->opmod_idx_opcode = + cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_GET_PSV | + (MLX5_OPC_MOD_TLS_TIR_PROGRESS_PARAMS << 24)); + cseg->qpn_ds = + cpu_to_be32((sq->sqn << MLX5_WQE_CTRL_QPN_SHIFT) | GET_PSV_DS_CNT); + + psv = &wqe->psv; + psv->num_psv = 1 << 4; + psv->l_key = sq->channel->mkey_be; + psv->psv_index[0] = cpu_to_be32(mlx5e_tir_get_tirn(&priv_rx->tir)); + psv->va = cpu_to_be64(buf->dma_addr); + + wi = (struct mlx5e_icosq_wqe_info) { + .wqe_type = MLX5E_ICOSQ_WQE_GET_PSV_TLS, + .num_wqebbs = MLX5E_KTLS_GET_PROGRESS_WQEBBS, + .tls_get_params.buf = buf, + }; + icosq_fill_wi(sq, pi, &wi); + sq->pc++; + mlx5e_notify_hw(&sq->wq, sq->pc, sq->uar_map, cseg); + spin_unlock_bh(&sq->channel->async_icosq_lock); + + return 0; + +err_dma_unmap: + dma_unmap_single(pdev, buf->dma_addr, PROGRESS_PARAMS_PADDED_SIZE, DMA_FROM_DEVICE); +err_free: + kfree(buf); +err_out: + priv_rx->rq_stats->tls_resync_req_skip++; + return err; +} + +/* Function is called with elevated refcount. + * It decreases it only if no WQE is posted. + */ +static void resync_handle_work(struct work_struct *work) +{ + struct mlx5e_ktls_offload_context_rx *priv_rx; + struct mlx5e_ktls_rx_resync_ctx *resync; + struct mlx5e_channel *c; + struct mlx5e_icosq *sq; + + resync = container_of(work, struct mlx5e_ktls_rx_resync_ctx, work); + priv_rx = container_of(resync, struct mlx5e_ktls_offload_context_rx, resync); + + if (unlikely(test_bit(MLX5E_PRIV_RX_FLAG_DELETING, priv_rx->flags))) { + mlx5e_ktls_priv_rx_put(priv_rx); + return; + } + + c = resync->priv->channels.c[priv_rx->rxq]; + sq = &c->async_icosq; + + if (resync_post_get_progress_params(sq, priv_rx)) + mlx5e_ktls_priv_rx_put(priv_rx); +} + +static void resync_init(struct mlx5e_ktls_rx_resync_ctx *resync, + struct mlx5e_priv *priv) +{ + INIT_WORK(&resync->work, resync_handle_work); + resync->priv = priv; + refcount_set(&resync->refcnt, 1); +} + +/* Function can be called with the refcount being either elevated or not. + * It does not affect the refcount. + */ +static void resync_handle_seq_match(struct mlx5e_ktls_offload_context_rx *priv_rx, + struct mlx5e_channel *c) +{ + struct mlx5e_ktls_resync_resp *ktls_resync; + struct mlx5e_icosq *sq; + bool trigger_poll; + + sq = &c->async_icosq; + ktls_resync = sq->ktls_resync; + trigger_poll = false; + + spin_lock_bh(&ktls_resync->lock); + spin_lock_bh(&priv_rx->lock); + switch (priv_rx->crypto_info.crypto_info.cipher_type) { + case TLS_CIPHER_AES_GCM_128: { + struct tls12_crypto_info_aes_gcm_128 *info = + &priv_rx->crypto_info.crypto_info_128; + + memcpy(info->rec_seq, &priv_rx->resync.sw_rcd_sn_be, + sizeof(info->rec_seq)); + break; + } + case TLS_CIPHER_AES_GCM_256: { + struct tls12_crypto_info_aes_gcm_256 *info = + &priv_rx->crypto_info.crypto_info_256; + + memcpy(info->rec_seq, &priv_rx->resync.sw_rcd_sn_be, + sizeof(info->rec_seq)); + break; + } + default: + WARN_ONCE(1, "Unsupported cipher type %u\n", + priv_rx->crypto_info.crypto_info.cipher_type); + spin_unlock_bh(&priv_rx->lock); + spin_unlock_bh(&ktls_resync->lock); + return; + } + + if (list_empty(&priv_rx->list)) { + list_add_tail(&priv_rx->list, &ktls_resync->list); + trigger_poll = !test_and_set_bit(MLX5E_SQ_STATE_PENDING_TLS_RX_RESYNC, &sq->state); + } + spin_unlock_bh(&priv_rx->lock); + spin_unlock_bh(&ktls_resync->lock); + + if (!trigger_poll) + return; + + if (!napi_if_scheduled_mark_missed(&c->napi)) { + spin_lock_bh(&c->async_icosq_lock); + mlx5e_trigger_irq(sq); + spin_unlock_bh(&c->async_icosq_lock); + } +} + +/* Function can be called with the refcount being either elevated or not. + * It decreases the refcount and may free the kTLS priv context. + * Refcount is not elevated only if tls_dev_del has been called, but GET_PSV was + * already in flight. + */ +void mlx5e_ktls_handle_get_psv_completion(struct mlx5e_icosq_wqe_info *wi, + struct mlx5e_icosq *sq) +{ + struct mlx5e_ktls_rx_resync_buf *buf = wi->tls_get_params.buf; + struct mlx5e_ktls_offload_context_rx *priv_rx; + struct mlx5e_ktls_rx_resync_ctx *resync; + u8 tracker_state, auth_state, *ctx; + struct device *dev; + u32 hw_seq; + + priv_rx = buf->priv_rx; + resync = &priv_rx->resync; + dev = mlx5_core_dma_dev(resync->priv->mdev); + if (unlikely(test_bit(MLX5E_PRIV_RX_FLAG_DELETING, priv_rx->flags))) + goto out; + + dma_sync_single_for_cpu(dev, buf->dma_addr, PROGRESS_PARAMS_PADDED_SIZE, + DMA_FROM_DEVICE); + + ctx = buf->progress.ctx; + tracker_state = MLX5_GET(tls_progress_params, ctx, record_tracker_state); + auth_state = MLX5_GET(tls_progress_params, ctx, auth_state); + if (tracker_state != MLX5E_TLS_PROGRESS_PARAMS_RECORD_TRACKER_STATE_TRACKING || + auth_state != MLX5E_TLS_PROGRESS_PARAMS_AUTH_STATE_NO_OFFLOAD) { + priv_rx->rq_stats->tls_resync_req_skip++; + goto out; + } + + hw_seq = MLX5_GET(tls_progress_params, ctx, hw_resync_tcp_sn); + tls_offload_rx_resync_async_request_end(priv_rx->sk, cpu_to_be32(hw_seq)); + priv_rx->rq_stats->tls_resync_req_end++; +out: + mlx5e_ktls_priv_rx_put(priv_rx); + dma_unmap_single(dev, buf->dma_addr, PROGRESS_PARAMS_PADDED_SIZE, DMA_FROM_DEVICE); + kfree(buf); +} + +/* Runs in NAPI. + * Function elevates the refcount, unless no work is queued. + */ +static bool resync_queue_get_psv(struct sock *sk) +{ + struct mlx5e_ktls_offload_context_rx *priv_rx; + struct mlx5e_ktls_rx_resync_ctx *resync; + + priv_rx = mlx5e_get_ktls_rx_priv_ctx(tls_get_ctx(sk)); + if (unlikely(!priv_rx)) + return false; + + if (unlikely(test_bit(MLX5E_PRIV_RX_FLAG_DELETING, priv_rx->flags))) + return false; + + resync = &priv_rx->resync; + mlx5e_ktls_priv_rx_get(priv_rx); + if (unlikely(!queue_work(resync->priv->tls->rx_wq, &resync->work))) + mlx5e_ktls_priv_rx_put(priv_rx); + + return true; +} + +/* Runs in NAPI */ +static void resync_update_sn(struct mlx5e_rq *rq, struct sk_buff *skb) +{ + struct ethhdr *eth = (struct ethhdr *)(skb->data); + struct net_device *netdev = rq->netdev; + struct net *net = dev_net(netdev); + struct sock *sk = NULL; + unsigned int datalen; + struct iphdr *iph; + struct tcphdr *th; + __be32 seq; + int depth = 0; + + __vlan_get_protocol(skb, eth->h_proto, &depth); + iph = (struct iphdr *)(skb->data + depth); + + if (iph->version == 4) { + depth += sizeof(struct iphdr); + th = (void *)iph + sizeof(struct iphdr); + + sk = inet_lookup_established(net, net->ipv4.tcp_death_row.hashinfo, + iph->saddr, th->source, iph->daddr, + th->dest, netdev->ifindex); +#if IS_ENABLED(CONFIG_IPV6) + } else { + struct ipv6hdr *ipv6h = (struct ipv6hdr *)iph; + + depth += sizeof(struct ipv6hdr); + th = (void *)ipv6h + sizeof(struct ipv6hdr); + + sk = __inet6_lookup_established(net, net->ipv4.tcp_death_row.hashinfo, + &ipv6h->saddr, th->source, + &ipv6h->daddr, ntohs(th->dest), + netdev->ifindex, 0); +#endif + } + + depth += sizeof(struct tcphdr); + + if (unlikely(!sk)) + return; + + if (unlikely(sk->sk_state == TCP_TIME_WAIT)) + goto unref; + + if (unlikely(!resync_queue_get_psv(sk))) + goto unref; + + seq = th->seq; + datalen = skb->len - depth; + tls_offload_rx_resync_async_request_start(sk, seq, datalen); + rq->stats->tls_resync_req_start++; + +unref: + sock_gen_put(sk); +} + +void mlx5e_ktls_rx_resync(struct net_device *netdev, struct sock *sk, + u32 seq, u8 *rcd_sn) +{ + struct mlx5e_ktls_offload_context_rx *priv_rx; + struct mlx5e_ktls_rx_resync_ctx *resync; + struct mlx5e_priv *priv; + struct mlx5e_channel *c; + + priv_rx = mlx5e_get_ktls_rx_priv_ctx(tls_get_ctx(sk)); + if (unlikely(!priv_rx)) + return; + + resync = &priv_rx->resync; + resync->sw_rcd_sn_be = *(__be64 *)rcd_sn; + resync->seq = seq; + + priv = netdev_priv(netdev); + c = priv->channels.c[priv_rx->rxq]; + + resync_handle_seq_match(priv_rx, c); +} + +/* End of resync section */ + +void mlx5e_ktls_handle_rx_skb(struct mlx5e_rq *rq, struct sk_buff *skb, + struct mlx5_cqe64 *cqe, u32 *cqe_bcnt) +{ + struct mlx5e_rq_stats *stats = rq->stats; + + switch (get_cqe_tls_offload(cqe)) { + case CQE_TLS_OFFLOAD_DECRYPTED: + skb->decrypted = 1; + stats->tls_decrypted_packets++; + stats->tls_decrypted_bytes += *cqe_bcnt; + break; + case CQE_TLS_OFFLOAD_RESYNC: + stats->tls_resync_req_pkt++; + resync_update_sn(rq, skb); + break; + default: /* CQE_TLS_OFFLOAD_ERROR: */ + stats->tls_err++; + break; + } +} + +void mlx5e_ktls_handle_ctx_completion(struct mlx5e_icosq_wqe_info *wi) +{ + struct mlx5e_ktls_offload_context_rx *priv_rx = wi->tls_set_params.priv_rx; + struct accel_rule *rule = &priv_rx->rule; + + if (unlikely(test_bit(MLX5E_PRIV_RX_FLAG_DELETING, priv_rx->flags))) { + complete(&priv_rx->add_ctx); + return; + } + queue_work(rule->priv->tls->rx_wq, &rule->work); +} + +static int mlx5e_ktls_sk_get_rxq(struct sock *sk) +{ + int rxq = sk_rx_queue_get(sk); + + if (unlikely(rxq == -1)) + rxq = 0; + + return rxq; +} + +int mlx5e_ktls_add_rx(struct net_device *netdev, struct sock *sk, + struct tls_crypto_info *crypto_info, + u32 start_offload_tcp_sn) +{ + struct mlx5e_ktls_offload_context_rx *priv_rx; + struct mlx5e_ktls_rx_resync_ctx *resync; + struct tls_context *tls_ctx; + struct mlx5_core_dev *mdev; + struct mlx5e_priv *priv; + int rxq, err; + + tls_ctx = tls_get_ctx(sk); + priv = netdev_priv(netdev); + mdev = priv->mdev; + priv_rx = kzalloc(sizeof(*priv_rx), GFP_KERNEL); + if (unlikely(!priv_rx)) + return -ENOMEM; + + err = mlx5_ktls_create_key(mdev, crypto_info, &priv_rx->key_id); + if (err) + goto err_create_key; + + INIT_LIST_HEAD(&priv_rx->list); + spin_lock_init(&priv_rx->lock); + switch (crypto_info->cipher_type) { + case TLS_CIPHER_AES_GCM_128: + priv_rx->crypto_info.crypto_info_128 = + *(struct tls12_crypto_info_aes_gcm_128 *)crypto_info; + break; + case TLS_CIPHER_AES_GCM_256: + priv_rx->crypto_info.crypto_info_256 = + *(struct tls12_crypto_info_aes_gcm_256 *)crypto_info; + break; + default: + WARN_ONCE(1, "Unsupported cipher type %u\n", + crypto_info->cipher_type); + return -EOPNOTSUPP; + } + + rxq = mlx5e_ktls_sk_get_rxq(sk); + priv_rx->rxq = rxq; + priv_rx->sk = sk; + + priv_rx->rq_stats = &priv->channel_stats[rxq]->rq; + priv_rx->sw_stats = &priv->tls->sw_stats; + mlx5e_set_ktls_rx_priv_ctx(tls_ctx, priv_rx); + + err = mlx5e_rx_res_tls_tir_create(priv->rx_res, rxq, &priv_rx->tir); + if (err) + goto err_create_tir; + + init_completion(&priv_rx->add_ctx); + + accel_rule_init(&priv_rx->rule, priv); + resync = &priv_rx->resync; + resync_init(resync, priv); + tls_offload_ctx_rx(tls_ctx)->resync_async = &resync->core; + tls_offload_rx_resync_set_type(sk, TLS_OFFLOAD_SYNC_TYPE_DRIVER_REQ_ASYNC); + + err = post_rx_param_wqes(priv->channels.c[rxq], priv_rx, start_offload_tcp_sn); + if (err) + goto err_post_wqes; + + atomic64_inc(&priv_rx->sw_stats->rx_tls_ctx); + + return 0; + +err_post_wqes: + mlx5e_tir_destroy(&priv_rx->tir); +err_create_tir: + mlx5_ktls_destroy_key(mdev, priv_rx->key_id); +err_create_key: + kfree(priv_rx); + return err; +} + +void mlx5e_ktls_del_rx(struct net_device *netdev, struct tls_context *tls_ctx) +{ + struct mlx5e_ktls_offload_context_rx *priv_rx; + struct mlx5e_ktls_rx_resync_ctx *resync; + struct mlx5_core_dev *mdev; + struct mlx5e_priv *priv; + + priv = netdev_priv(netdev); + mdev = priv->mdev; + + priv_rx = mlx5e_get_ktls_rx_priv_ctx(tls_ctx); + set_bit(MLX5E_PRIV_RX_FLAG_DELETING, priv_rx->flags); + mlx5e_set_ktls_rx_priv_ctx(tls_ctx, NULL); + synchronize_net(); /* Sync with NAPI */ + if (!cancel_work_sync(&priv_rx->rule.work)) + /* completion is needed, as the priv_rx in the add flow + * is maintained on the wqe info (wi), not on the socket. + */ + wait_for_completion(&priv_rx->add_ctx); + resync = &priv_rx->resync; + if (cancel_work_sync(&resync->work)) + mlx5e_ktls_priv_rx_put(priv_rx); + + atomic64_inc(&priv_rx->sw_stats->rx_tls_del); + if (priv_rx->rule.rule) + mlx5e_accel_fs_del_sk(priv_rx->rule.rule); + + mlx5e_tir_destroy(&priv_rx->tir); + mlx5_ktls_destroy_key(mdev, priv_rx->key_id); + /* priv_rx should normally be freed here, but if there is an outstanding + * GET_PSV, deallocation will be delayed until the CQE for GET_PSV is + * processed. + */ + mlx5e_ktls_priv_rx_put(priv_rx); +} + +bool mlx5e_ktls_rx_handle_resync_list(struct mlx5e_channel *c, int budget) +{ + struct mlx5e_ktls_offload_context_rx *priv_rx, *tmp; + struct mlx5e_ktls_resync_resp *ktls_resync; + struct mlx5_wqe_ctrl_seg *db_cseg; + struct mlx5e_icosq *sq; + LIST_HEAD(local_list); + int i, j; + + sq = &c->async_icosq; + + if (unlikely(!test_bit(MLX5E_SQ_STATE_ENABLED, &sq->state))) + return false; + + ktls_resync = sq->ktls_resync; + db_cseg = NULL; + i = 0; + + spin_lock(&ktls_resync->lock); + list_for_each_entry_safe(priv_rx, tmp, &ktls_resync->list, list) { + list_move(&priv_rx->list, &local_list); + if (++i == budget) + break; + } + if (list_empty(&ktls_resync->list)) + clear_bit(MLX5E_SQ_STATE_PENDING_TLS_RX_RESYNC, &sq->state); + spin_unlock(&ktls_resync->lock); + + spin_lock(&c->async_icosq_lock); + for (j = 0; j < i; j++) { + struct mlx5_wqe_ctrl_seg *cseg; + + priv_rx = list_first_entry(&local_list, + struct mlx5e_ktls_offload_context_rx, + list); + spin_lock(&priv_rx->lock); + cseg = post_static_params(sq, priv_rx); + if (IS_ERR(cseg)) { + spin_unlock(&priv_rx->lock); + break; + } + list_del_init(&priv_rx->list); + spin_unlock(&priv_rx->lock); + db_cseg = cseg; + } + if (db_cseg) + mlx5e_notify_hw(&sq->wq, sq->pc, sq->uar_map, db_cseg); + spin_unlock(&c->async_icosq_lock); + + priv_rx->rq_stats->tls_resync_res_ok += j; + + if (!list_empty(&local_list)) { + /* This happens only if ICOSQ is full. + * There is no need to mark busy or explicitly ask for a NAPI cycle, + * it will be triggered by the outstanding ICOSQ completions. + */ + spin_lock(&ktls_resync->lock); + list_splice(&local_list, &ktls_resync->list); + set_bit(MLX5E_SQ_STATE_PENDING_TLS_RX_RESYNC, &sq->state); + spin_unlock(&ktls_resync->lock); + priv_rx->rq_stats->tls_resync_res_retry++; + } + + return i == budget; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_stats.c new file mode 100644 index 000000000..7c1c0eb16 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_stats.c @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2018 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#include <linux/ethtool.h> +#include <net/sock.h> + +#include "en.h" +#include "fpga/sdk.h" +#include "en_accel/ktls.h" + +static const struct counter_desc mlx5e_ktls_sw_stats_desc[] = { + { MLX5E_DECLARE_STAT(struct mlx5e_tls_sw_stats, tx_tls_ctx) }, + { MLX5E_DECLARE_STAT(struct mlx5e_tls_sw_stats, tx_tls_del) }, + { MLX5E_DECLARE_STAT(struct mlx5e_tls_sw_stats, tx_tls_pool_alloc) }, + { MLX5E_DECLARE_STAT(struct mlx5e_tls_sw_stats, tx_tls_pool_free) }, + { MLX5E_DECLARE_STAT(struct mlx5e_tls_sw_stats, rx_tls_ctx) }, + { MLX5E_DECLARE_STAT(struct mlx5e_tls_sw_stats, rx_tls_del) }, +}; + +#define MLX5E_READ_CTR_ATOMIC64(ptr, dsc, i) \ + atomic64_read((atomic64_t *)((char *)(ptr) + (dsc)[i].offset)) + +int mlx5e_ktls_get_count(struct mlx5e_priv *priv) +{ + if (!priv->tls) + return 0; + + return ARRAY_SIZE(mlx5e_ktls_sw_stats_desc); +} + +int mlx5e_ktls_get_strings(struct mlx5e_priv *priv, uint8_t *data) +{ + unsigned int i, n, idx = 0; + + if (!priv->tls) + return 0; + + n = mlx5e_ktls_get_count(priv); + + for (i = 0; i < n; i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, + mlx5e_ktls_sw_stats_desc[i].format); + + return n; +} + +int mlx5e_ktls_get_stats(struct mlx5e_priv *priv, u64 *data) +{ + unsigned int i, n, idx = 0; + + if (!priv->tls) + return 0; + + n = mlx5e_ktls_get_count(priv); + + for (i = 0; i < n; i++) + data[idx++] = MLX5E_READ_CTR_ATOMIC64(&priv->tls->sw_stats, + mlx5e_ktls_sw_stats_desc, + i); + + return n; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c new file mode 100644 index 000000000..2e0335246 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c @@ -0,0 +1,921 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2019 Mellanox Technologies. + +#include "en_accel/ktls.h" +#include "en_accel/ktls_txrx.h" +#include "en_accel/ktls_utils.h" + +struct mlx5e_dump_wqe { + struct mlx5_wqe_ctrl_seg ctrl; + struct mlx5_wqe_data_seg data; +}; + +#define MLX5E_KTLS_DUMP_WQEBBS \ + (DIV_ROUND_UP(sizeof(struct mlx5e_dump_wqe), MLX5_SEND_WQE_BB)) + +static u8 +mlx5e_ktls_dumps_num_wqes(struct mlx5e_params *params, unsigned int nfrags, + unsigned int sync_len) +{ + /* Given the MTU and sync_len, calculates an upper bound for the + * number of DUMP WQEs needed for the TX resync of a record. + */ + return nfrags + DIV_ROUND_UP(sync_len, MLX5E_SW2HW_MTU(params, params->sw_mtu)); +} + +u16 mlx5e_ktls_get_stop_room(struct mlx5_core_dev *mdev, struct mlx5e_params *params) +{ + u16 num_dumps, stop_room = 0; + + if (!mlx5e_is_ktls_tx(mdev)) + return 0; + + num_dumps = mlx5e_ktls_dumps_num_wqes(params, MAX_SKB_FRAGS, TLS_MAX_PAYLOAD_SIZE); + + stop_room += mlx5e_stop_room_for_wqe(mdev, MLX5E_TLS_SET_STATIC_PARAMS_WQEBBS); + stop_room += mlx5e_stop_room_for_wqe(mdev, MLX5E_TLS_SET_PROGRESS_PARAMS_WQEBBS); + stop_room += num_dumps * mlx5e_stop_room_for_wqe(mdev, MLX5E_KTLS_DUMP_WQEBBS); + stop_room += 1; /* fence nop */ + + return stop_room; +} + +static void mlx5e_ktls_set_tisc(struct mlx5_core_dev *mdev, void *tisc) +{ + MLX5_SET(tisc, tisc, tls_en, 1); + MLX5_SET(tisc, tisc, pd, mdev->mlx5e_res.hw_objs.pdn); + MLX5_SET(tisc, tisc, transport_domain, mdev->mlx5e_res.hw_objs.td.tdn); +} + +static int mlx5e_ktls_create_tis(struct mlx5_core_dev *mdev, u32 *tisn) +{ + u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {}; + + mlx5e_ktls_set_tisc(mdev, MLX5_ADDR_OF(create_tis_in, in, ctx)); + + return mlx5_core_create_tis(mdev, in, tisn); +} + +static int mlx5e_ktls_create_tis_cb(struct mlx5_core_dev *mdev, + struct mlx5_async_ctx *async_ctx, + u32 *out, int outlen, + mlx5_async_cbk_t callback, + struct mlx5_async_work *context) +{ + u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {}; + + mlx5e_ktls_set_tisc(mdev, MLX5_ADDR_OF(create_tis_in, in, ctx)); + MLX5_SET(create_tis_in, in, opcode, MLX5_CMD_OP_CREATE_TIS); + + return mlx5_cmd_exec_cb(async_ctx, in, sizeof(in), + out, outlen, callback, context); +} + +static int mlx5e_ktls_destroy_tis_cb(struct mlx5_core_dev *mdev, u32 tisn, + struct mlx5_async_ctx *async_ctx, + u32 *out, int outlen, + mlx5_async_cbk_t callback, + struct mlx5_async_work *context) +{ + u32 in[MLX5_ST_SZ_DW(destroy_tis_in)] = {}; + + MLX5_SET(destroy_tis_in, in, opcode, MLX5_CMD_OP_DESTROY_TIS); + MLX5_SET(destroy_tis_in, in, tisn, tisn); + + return mlx5_cmd_exec_cb(async_ctx, in, sizeof(in), + out, outlen, callback, context); +} + +struct mlx5e_ktls_offload_context_tx { + /* fast path */ + u32 expected_seq; + u32 tisn; + bool ctx_post_pending; + /* control / resync */ + struct list_head list_node; /* member of the pool */ + union mlx5e_crypto_info crypto_info; + struct tls_offload_context_tx *tx_ctx; + struct mlx5_core_dev *mdev; + struct mlx5e_tls_sw_stats *sw_stats; + u32 key_id; + u8 create_err : 1; +}; + +static void +mlx5e_set_ktls_tx_priv_ctx(struct tls_context *tls_ctx, + struct mlx5e_ktls_offload_context_tx *priv_tx) +{ + struct mlx5e_ktls_offload_context_tx **ctx = + __tls_driver_ctx(tls_ctx, TLS_OFFLOAD_CTX_DIR_TX); + + BUILD_BUG_ON(sizeof(priv_tx) > TLS_DRIVER_STATE_SIZE_TX); + + *ctx = priv_tx; +} + +static struct mlx5e_ktls_offload_context_tx * +mlx5e_get_ktls_tx_priv_ctx(struct tls_context *tls_ctx) +{ + struct mlx5e_ktls_offload_context_tx **ctx = + __tls_driver_ctx(tls_ctx, TLS_OFFLOAD_CTX_DIR_TX); + + return *ctx; +} + +/* struct for callback API management */ +struct mlx5e_async_ctx { + struct mlx5_async_work context; + struct mlx5_async_ctx async_ctx; + struct work_struct work; + struct mlx5e_ktls_offload_context_tx *priv_tx; + struct completion complete; + int err; + union { + u32 out_create[MLX5_ST_SZ_DW(create_tis_out)]; + u32 out_destroy[MLX5_ST_SZ_DW(destroy_tis_out)]; + }; +}; + +static struct mlx5e_async_ctx *mlx5e_bulk_async_init(struct mlx5_core_dev *mdev, int n) +{ + struct mlx5e_async_ctx *bulk_async; + int i; + + bulk_async = kvcalloc(n, sizeof(struct mlx5e_async_ctx), GFP_KERNEL); + if (!bulk_async) + return NULL; + + for (i = 0; i < n; i++) { + struct mlx5e_async_ctx *async = &bulk_async[i]; + + mlx5_cmd_init_async_ctx(mdev, &async->async_ctx); + init_completion(&async->complete); + } + + return bulk_async; +} + +static void mlx5e_bulk_async_cleanup(struct mlx5e_async_ctx *bulk_async, int n) +{ + int i; + + for (i = 0; i < n; i++) { + struct mlx5e_async_ctx *async = &bulk_async[i]; + + mlx5_cmd_cleanup_async_ctx(&async->async_ctx); + } + kvfree(bulk_async); +} + +static void create_tis_callback(int status, struct mlx5_async_work *context) +{ + struct mlx5e_async_ctx *async = + container_of(context, struct mlx5e_async_ctx, context); + struct mlx5e_ktls_offload_context_tx *priv_tx = async->priv_tx; + + if (status) { + async->err = status; + priv_tx->create_err = 1; + goto out; + } + + priv_tx->tisn = MLX5_GET(create_tis_out, async->out_create, tisn); +out: + complete(&async->complete); +} + +static void destroy_tis_callback(int status, struct mlx5_async_work *context) +{ + struct mlx5e_async_ctx *async = + container_of(context, struct mlx5e_async_ctx, context); + struct mlx5e_ktls_offload_context_tx *priv_tx = async->priv_tx; + + complete(&async->complete); + kfree(priv_tx); +} + +static struct mlx5e_ktls_offload_context_tx * +mlx5e_tls_priv_tx_init(struct mlx5_core_dev *mdev, struct mlx5e_tls_sw_stats *sw_stats, + struct mlx5e_async_ctx *async) +{ + struct mlx5e_ktls_offload_context_tx *priv_tx; + int err; + + priv_tx = kzalloc(sizeof(*priv_tx), GFP_KERNEL); + if (!priv_tx) + return ERR_PTR(-ENOMEM); + + priv_tx->mdev = mdev; + priv_tx->sw_stats = sw_stats; + + if (!async) { + err = mlx5e_ktls_create_tis(mdev, &priv_tx->tisn); + if (err) + goto err_out; + } else { + async->priv_tx = priv_tx; + err = mlx5e_ktls_create_tis_cb(mdev, &async->async_ctx, + async->out_create, sizeof(async->out_create), + create_tis_callback, &async->context); + if (err) + goto err_out; + } + + return priv_tx; + +err_out: + kfree(priv_tx); + return ERR_PTR(err); +} + +static void mlx5e_tls_priv_tx_cleanup(struct mlx5e_ktls_offload_context_tx *priv_tx, + struct mlx5e_async_ctx *async) +{ + if (priv_tx->create_err) { + complete(&async->complete); + kfree(priv_tx); + return; + } + async->priv_tx = priv_tx; + mlx5e_ktls_destroy_tis_cb(priv_tx->mdev, priv_tx->tisn, + &async->async_ctx, + async->out_destroy, sizeof(async->out_destroy), + destroy_tis_callback, &async->context); +} + +static void mlx5e_tls_priv_tx_list_cleanup(struct mlx5_core_dev *mdev, + struct list_head *list, int size) +{ + struct mlx5e_ktls_offload_context_tx *obj, *n; + struct mlx5e_async_ctx *bulk_async; + int i; + + bulk_async = mlx5e_bulk_async_init(mdev, size); + if (!bulk_async) + return; + + i = 0; + list_for_each_entry_safe(obj, n, list, list_node) { + mlx5e_tls_priv_tx_cleanup(obj, &bulk_async[i]); + i++; + } + + for (i = 0; i < size; i++) { + struct mlx5e_async_ctx *async = &bulk_async[i]; + + wait_for_completion(&async->complete); + } + mlx5e_bulk_async_cleanup(bulk_async, size); +} + +/* Recycling pool API */ + +#define MLX5E_TLS_TX_POOL_BULK (16) +#define MLX5E_TLS_TX_POOL_HIGH (4 * 1024) +#define MLX5E_TLS_TX_POOL_LOW (MLX5E_TLS_TX_POOL_HIGH / 4) + +struct mlx5e_tls_tx_pool { + struct mlx5_core_dev *mdev; + struct mlx5e_tls_sw_stats *sw_stats; + struct mutex lock; /* Protects access to the pool */ + struct list_head list; + size_t size; + + struct workqueue_struct *wq; + struct work_struct create_work; + struct work_struct destroy_work; +}; + +static void create_work(struct work_struct *work) +{ + struct mlx5e_tls_tx_pool *pool = + container_of(work, struct mlx5e_tls_tx_pool, create_work); + struct mlx5e_ktls_offload_context_tx *obj; + struct mlx5e_async_ctx *bulk_async; + LIST_HEAD(local_list); + int i, j, err = 0; + + bulk_async = mlx5e_bulk_async_init(pool->mdev, MLX5E_TLS_TX_POOL_BULK); + if (!bulk_async) + return; + + for (i = 0; i < MLX5E_TLS_TX_POOL_BULK; i++) { + obj = mlx5e_tls_priv_tx_init(pool->mdev, pool->sw_stats, &bulk_async[i]); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + break; + } + list_add(&obj->list_node, &local_list); + } + + for (j = 0; j < i; j++) { + struct mlx5e_async_ctx *async = &bulk_async[j]; + + wait_for_completion(&async->complete); + if (!err && async->err) + err = async->err; + } + atomic64_add(i, &pool->sw_stats->tx_tls_pool_alloc); + mlx5e_bulk_async_cleanup(bulk_async, MLX5E_TLS_TX_POOL_BULK); + if (err) + goto err_out; + + mutex_lock(&pool->lock); + if (pool->size + MLX5E_TLS_TX_POOL_BULK >= MLX5E_TLS_TX_POOL_HIGH) { + mutex_unlock(&pool->lock); + goto err_out; + } + list_splice(&local_list, &pool->list); + pool->size += MLX5E_TLS_TX_POOL_BULK; + if (pool->size <= MLX5E_TLS_TX_POOL_LOW) + queue_work(pool->wq, work); + mutex_unlock(&pool->lock); + return; + +err_out: + mlx5e_tls_priv_tx_list_cleanup(pool->mdev, &local_list, i); + atomic64_add(i, &pool->sw_stats->tx_tls_pool_free); +} + +static void destroy_work(struct work_struct *work) +{ + struct mlx5e_tls_tx_pool *pool = + container_of(work, struct mlx5e_tls_tx_pool, destroy_work); + struct mlx5e_ktls_offload_context_tx *obj; + LIST_HEAD(local_list); + int i = 0; + + mutex_lock(&pool->lock); + if (pool->size < MLX5E_TLS_TX_POOL_HIGH) { + mutex_unlock(&pool->lock); + return; + } + + list_for_each_entry(obj, &pool->list, list_node) + if (++i == MLX5E_TLS_TX_POOL_BULK) + break; + + list_cut_position(&local_list, &pool->list, &obj->list_node); + pool->size -= MLX5E_TLS_TX_POOL_BULK; + if (pool->size >= MLX5E_TLS_TX_POOL_HIGH) + queue_work(pool->wq, work); + mutex_unlock(&pool->lock); + + mlx5e_tls_priv_tx_list_cleanup(pool->mdev, &local_list, MLX5E_TLS_TX_POOL_BULK); + atomic64_add(MLX5E_TLS_TX_POOL_BULK, &pool->sw_stats->tx_tls_pool_free); +} + +static struct mlx5e_tls_tx_pool *mlx5e_tls_tx_pool_init(struct mlx5_core_dev *mdev, + struct mlx5e_tls_sw_stats *sw_stats) +{ + struct mlx5e_tls_tx_pool *pool; + + BUILD_BUG_ON(MLX5E_TLS_TX_POOL_LOW + MLX5E_TLS_TX_POOL_BULK >= MLX5E_TLS_TX_POOL_HIGH); + + pool = kvzalloc(sizeof(*pool), GFP_KERNEL); + if (!pool) + return NULL; + + pool->wq = create_singlethread_workqueue("mlx5e_tls_tx_pool"); + if (!pool->wq) + goto err_free; + + INIT_LIST_HEAD(&pool->list); + mutex_init(&pool->lock); + + INIT_WORK(&pool->create_work, create_work); + INIT_WORK(&pool->destroy_work, destroy_work); + + pool->mdev = mdev; + pool->sw_stats = sw_stats; + + return pool; + +err_free: + kvfree(pool); + return NULL; +} + +static void mlx5e_tls_tx_pool_list_cleanup(struct mlx5e_tls_tx_pool *pool) +{ + while (pool->size > MLX5E_TLS_TX_POOL_BULK) { + struct mlx5e_ktls_offload_context_tx *obj; + LIST_HEAD(local_list); + int i = 0; + + list_for_each_entry(obj, &pool->list, list_node) + if (++i == MLX5E_TLS_TX_POOL_BULK) + break; + + list_cut_position(&local_list, &pool->list, &obj->list_node); + mlx5e_tls_priv_tx_list_cleanup(pool->mdev, &local_list, MLX5E_TLS_TX_POOL_BULK); + atomic64_add(MLX5E_TLS_TX_POOL_BULK, &pool->sw_stats->tx_tls_pool_free); + pool->size -= MLX5E_TLS_TX_POOL_BULK; + } + if (pool->size) { + mlx5e_tls_priv_tx_list_cleanup(pool->mdev, &pool->list, pool->size); + atomic64_add(pool->size, &pool->sw_stats->tx_tls_pool_free); + } +} + +static void mlx5e_tls_tx_pool_cleanup(struct mlx5e_tls_tx_pool *pool) +{ + mlx5e_tls_tx_pool_list_cleanup(pool); + destroy_workqueue(pool->wq); + kvfree(pool); +} + +static void pool_push(struct mlx5e_tls_tx_pool *pool, struct mlx5e_ktls_offload_context_tx *obj) +{ + mutex_lock(&pool->lock); + list_add(&obj->list_node, &pool->list); + if (++pool->size == MLX5E_TLS_TX_POOL_HIGH) + queue_work(pool->wq, &pool->destroy_work); + mutex_unlock(&pool->lock); +} + +static struct mlx5e_ktls_offload_context_tx *pool_pop(struct mlx5e_tls_tx_pool *pool) +{ + struct mlx5e_ktls_offload_context_tx *obj; + + mutex_lock(&pool->lock); + if (unlikely(pool->size == 0)) { + /* pool is empty: + * - trigger the populating work, and + * - serve the current context via the regular blocking api. + */ + queue_work(pool->wq, &pool->create_work); + mutex_unlock(&pool->lock); + obj = mlx5e_tls_priv_tx_init(pool->mdev, pool->sw_stats, NULL); + if (!IS_ERR(obj)) + atomic64_inc(&pool->sw_stats->tx_tls_pool_alloc); + return obj; + } + + obj = list_first_entry(&pool->list, struct mlx5e_ktls_offload_context_tx, + list_node); + list_del(&obj->list_node); + if (--pool->size == MLX5E_TLS_TX_POOL_LOW) + queue_work(pool->wq, &pool->create_work); + mutex_unlock(&pool->lock); + return obj; +} + +/* End of pool API */ + +int mlx5e_ktls_add_tx(struct net_device *netdev, struct sock *sk, + struct tls_crypto_info *crypto_info, u32 start_offload_tcp_sn) +{ + struct mlx5e_ktls_offload_context_tx *priv_tx; + struct mlx5e_tls_tx_pool *pool; + struct tls_context *tls_ctx; + struct mlx5e_priv *priv; + int err; + + tls_ctx = tls_get_ctx(sk); + priv = netdev_priv(netdev); + pool = priv->tls->tx_pool; + + priv_tx = pool_pop(pool); + if (IS_ERR(priv_tx)) + return PTR_ERR(priv_tx); + + err = mlx5_ktls_create_key(pool->mdev, crypto_info, &priv_tx->key_id); + if (err) + goto err_create_key; + + priv_tx->expected_seq = start_offload_tcp_sn; + switch (crypto_info->cipher_type) { + case TLS_CIPHER_AES_GCM_128: + priv_tx->crypto_info.crypto_info_128 = + *(struct tls12_crypto_info_aes_gcm_128 *)crypto_info; + break; + case TLS_CIPHER_AES_GCM_256: + priv_tx->crypto_info.crypto_info_256 = + *(struct tls12_crypto_info_aes_gcm_256 *)crypto_info; + break; + default: + WARN_ONCE(1, "Unsupported cipher type %u\n", + crypto_info->cipher_type); + return -EOPNOTSUPP; + } + priv_tx->tx_ctx = tls_offload_ctx_tx(tls_ctx); + + mlx5e_set_ktls_tx_priv_ctx(tls_ctx, priv_tx); + + priv_tx->ctx_post_pending = true; + atomic64_inc(&priv_tx->sw_stats->tx_tls_ctx); + + return 0; + +err_create_key: + pool_push(pool, priv_tx); + return err; +} + +void mlx5e_ktls_del_tx(struct net_device *netdev, struct tls_context *tls_ctx) +{ + struct mlx5e_ktls_offload_context_tx *priv_tx; + struct mlx5e_tls_tx_pool *pool; + struct mlx5e_priv *priv; + + priv_tx = mlx5e_get_ktls_tx_priv_ctx(tls_ctx); + priv = netdev_priv(netdev); + pool = priv->tls->tx_pool; + + atomic64_inc(&priv_tx->sw_stats->tx_tls_del); + mlx5_ktls_destroy_key(priv_tx->mdev, priv_tx->key_id); + pool_push(pool, priv_tx); +} + +static void tx_fill_wi(struct mlx5e_txqsq *sq, + u16 pi, u8 num_wqebbs, u32 num_bytes, + struct page *page) +{ + struct mlx5e_tx_wqe_info *wi = &sq->db.wqe_info[pi]; + + *wi = (struct mlx5e_tx_wqe_info) { + .num_wqebbs = num_wqebbs, + .num_bytes = num_bytes, + .resync_dump_frag_page = page, + }; +} + +static bool +mlx5e_ktls_tx_offload_test_and_clear_pending(struct mlx5e_ktls_offload_context_tx *priv_tx) +{ + bool ret = priv_tx->ctx_post_pending; + + priv_tx->ctx_post_pending = false; + + return ret; +} + +static void +post_static_params(struct mlx5e_txqsq *sq, + struct mlx5e_ktls_offload_context_tx *priv_tx, + bool fence) +{ + struct mlx5e_set_tls_static_params_wqe *wqe; + u16 pi, num_wqebbs; + + num_wqebbs = MLX5E_TLS_SET_STATIC_PARAMS_WQEBBS; + pi = mlx5e_txqsq_get_next_pi(sq, num_wqebbs); + wqe = MLX5E_TLS_FETCH_SET_STATIC_PARAMS_WQE(sq, pi); + mlx5e_ktls_build_static_params(wqe, sq->pc, sq->sqn, &priv_tx->crypto_info, + priv_tx->tisn, priv_tx->key_id, 0, fence, + TLS_OFFLOAD_CTX_DIR_TX); + tx_fill_wi(sq, pi, num_wqebbs, 0, NULL); + sq->pc += num_wqebbs; +} + +static void +post_progress_params(struct mlx5e_txqsq *sq, + struct mlx5e_ktls_offload_context_tx *priv_tx, + bool fence) +{ + struct mlx5e_set_tls_progress_params_wqe *wqe; + u16 pi, num_wqebbs; + + num_wqebbs = MLX5E_TLS_SET_PROGRESS_PARAMS_WQEBBS; + pi = mlx5e_txqsq_get_next_pi(sq, num_wqebbs); + wqe = MLX5E_TLS_FETCH_SET_PROGRESS_PARAMS_WQE(sq, pi); + mlx5e_ktls_build_progress_params(wqe, sq->pc, sq->sqn, priv_tx->tisn, fence, 0, + TLS_OFFLOAD_CTX_DIR_TX); + tx_fill_wi(sq, pi, num_wqebbs, 0, NULL); + sq->pc += num_wqebbs; +} + +static void tx_post_fence_nop(struct mlx5e_txqsq *sq) +{ + struct mlx5_wq_cyc *wq = &sq->wq; + u16 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc); + + tx_fill_wi(sq, pi, 1, 0, NULL); + + mlx5e_post_nop_fence(wq, sq->sqn, &sq->pc); +} + +static void +mlx5e_ktls_tx_post_param_wqes(struct mlx5e_txqsq *sq, + struct mlx5e_ktls_offload_context_tx *priv_tx, + bool skip_static_post, bool fence_first_post) +{ + bool progress_fence = skip_static_post || !fence_first_post; + + if (!skip_static_post) + post_static_params(sq, priv_tx, fence_first_post); + + post_progress_params(sq, priv_tx, progress_fence); + tx_post_fence_nop(sq); +} + +struct tx_sync_info { + u64 rcd_sn; + u32 sync_len; + int nr_frags; + skb_frag_t frags[MAX_SKB_FRAGS]; +}; + +enum mlx5e_ktls_sync_retval { + MLX5E_KTLS_SYNC_DONE, + MLX5E_KTLS_SYNC_FAIL, + MLX5E_KTLS_SYNC_SKIP_NO_DATA, +}; + +static enum mlx5e_ktls_sync_retval +tx_sync_info_get(struct mlx5e_ktls_offload_context_tx *priv_tx, + u32 tcp_seq, int datalen, struct tx_sync_info *info) +{ + struct tls_offload_context_tx *tx_ctx = priv_tx->tx_ctx; + enum mlx5e_ktls_sync_retval ret = MLX5E_KTLS_SYNC_DONE; + struct tls_record_info *record; + int remaining, i = 0; + unsigned long flags; + bool ends_before; + + spin_lock_irqsave(&tx_ctx->lock, flags); + record = tls_get_record(tx_ctx, tcp_seq, &info->rcd_sn); + + if (unlikely(!record)) { + ret = MLX5E_KTLS_SYNC_FAIL; + goto out; + } + + /* There are the following cases: + * 1. packet ends before start marker: bypass offload. + * 2. packet starts before start marker and ends after it: drop, + * not supported, breaks contract with kernel. + * 3. packet ends before tls record info starts: drop, + * this packet was already acknowledged and its record info + * was released. + */ + ends_before = before(tcp_seq + datalen - 1, tls_record_start_seq(record)); + + if (unlikely(tls_record_is_start_marker(record))) { + ret = ends_before ? MLX5E_KTLS_SYNC_SKIP_NO_DATA : MLX5E_KTLS_SYNC_FAIL; + goto out; + } else if (ends_before) { + ret = MLX5E_KTLS_SYNC_FAIL; + goto out; + } + + info->sync_len = tcp_seq - tls_record_start_seq(record); + remaining = info->sync_len; + while (remaining > 0) { + skb_frag_t *frag = &record->frags[i]; + + get_page(skb_frag_page(frag)); + remaining -= skb_frag_size(frag); + info->frags[i++] = *frag; + } + /* reduce the part which will be sent with the original SKB */ + if (remaining < 0) + skb_frag_size_add(&info->frags[i - 1], remaining); + info->nr_frags = i; +out: + spin_unlock_irqrestore(&tx_ctx->lock, flags); + return ret; +} + +static void +tx_post_resync_params(struct mlx5e_txqsq *sq, + struct mlx5e_ktls_offload_context_tx *priv_tx, + u64 rcd_sn) +{ + __be64 rn_be = cpu_to_be64(rcd_sn); + bool skip_static_post; + u16 rec_seq_sz; + char *rec_seq; + + switch (priv_tx->crypto_info.crypto_info.cipher_type) { + case TLS_CIPHER_AES_GCM_128: { + struct tls12_crypto_info_aes_gcm_128 *info = &priv_tx->crypto_info.crypto_info_128; + + rec_seq = info->rec_seq; + rec_seq_sz = sizeof(info->rec_seq); + break; + } + case TLS_CIPHER_AES_GCM_256: { + struct tls12_crypto_info_aes_gcm_256 *info = &priv_tx->crypto_info.crypto_info_256; + + rec_seq = info->rec_seq; + rec_seq_sz = sizeof(info->rec_seq); + break; + } + default: + WARN_ONCE(1, "Unsupported cipher type %u\n", + priv_tx->crypto_info.crypto_info.cipher_type); + return; + } + + skip_static_post = !memcmp(rec_seq, &rn_be, rec_seq_sz); + if (!skip_static_post) + memcpy(rec_seq, &rn_be, rec_seq_sz); + + mlx5e_ktls_tx_post_param_wqes(sq, priv_tx, skip_static_post, true); +} + +static int +tx_post_resync_dump(struct mlx5e_txqsq *sq, skb_frag_t *frag, u32 tisn) +{ + struct mlx5_wqe_ctrl_seg *cseg; + struct mlx5_wqe_data_seg *dseg; + struct mlx5e_dump_wqe *wqe; + dma_addr_t dma_addr = 0; + u16 ds_cnt; + int fsz; + u16 pi; + + BUILD_BUG_ON(MLX5E_KTLS_DUMP_WQEBBS != 1); + pi = mlx5_wq_cyc_ctr2ix(&sq->wq, sq->pc); + wqe = MLX5E_TLS_FETCH_DUMP_WQE(sq, pi); + + ds_cnt = sizeof(*wqe) / MLX5_SEND_WQE_DS; + + cseg = &wqe->ctrl; + dseg = &wqe->data; + + cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_DUMP); + cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt); + cseg->tis_tir_num = cpu_to_be32(tisn << 8); + + fsz = skb_frag_size(frag); + dma_addr = skb_frag_dma_map(sq->pdev, frag, 0, fsz, + DMA_TO_DEVICE); + if (unlikely(dma_mapping_error(sq->pdev, dma_addr))) + return -ENOMEM; + + dseg->addr = cpu_to_be64(dma_addr); + dseg->lkey = sq->mkey_be; + dseg->byte_count = cpu_to_be32(fsz); + mlx5e_dma_push(sq, dma_addr, fsz, MLX5E_DMA_MAP_PAGE); + + tx_fill_wi(sq, pi, MLX5E_KTLS_DUMP_WQEBBS, fsz, skb_frag_page(frag)); + sq->pc += MLX5E_KTLS_DUMP_WQEBBS; + + return 0; +} + +void mlx5e_ktls_tx_handle_resync_dump_comp(struct mlx5e_txqsq *sq, + struct mlx5e_tx_wqe_info *wi, + u32 *dma_fifo_cc) +{ + struct mlx5e_sq_stats *stats; + struct mlx5e_sq_dma *dma; + + dma = mlx5e_dma_get(sq, (*dma_fifo_cc)++); + stats = sq->stats; + + mlx5e_tx_dma_unmap(sq->pdev, dma); + put_page(wi->resync_dump_frag_page); + stats->tls_dump_packets++; + stats->tls_dump_bytes += wi->num_bytes; +} + +static enum mlx5e_ktls_sync_retval +mlx5e_ktls_tx_handle_ooo(struct mlx5e_ktls_offload_context_tx *priv_tx, + struct mlx5e_txqsq *sq, + int datalen, + u32 seq) +{ + enum mlx5e_ktls_sync_retval ret; + struct tx_sync_info info = {}; + int i; + + ret = tx_sync_info_get(priv_tx, seq, datalen, &info); + if (unlikely(ret != MLX5E_KTLS_SYNC_DONE)) + /* We might get here with ret == FAIL if a retransmission + * reaches the driver after the relevant record is acked. + * It should be safe to drop the packet in this case + */ + return ret; + + tx_post_resync_params(sq, priv_tx, info.rcd_sn); + + for (i = 0; i < info.nr_frags; i++) { + unsigned int orig_fsz, frag_offset = 0, n = 0; + skb_frag_t *f = &info.frags[i]; + + orig_fsz = skb_frag_size(f); + + do { + unsigned int fsz; + + n++; + fsz = min_t(unsigned int, sq->hw_mtu, orig_fsz - frag_offset); + skb_frag_size_set(f, fsz); + if (tx_post_resync_dump(sq, f, priv_tx->tisn)) { + page_ref_add(skb_frag_page(f), n - 1); + goto err_out; + } + + skb_frag_off_add(f, fsz); + frag_offset += fsz; + } while (frag_offset < orig_fsz); + + page_ref_add(skb_frag_page(f), n - 1); + } + + return MLX5E_KTLS_SYNC_DONE; + +err_out: + for (; i < info.nr_frags; i++) + /* The put_page() here undoes the page ref obtained in tx_sync_info_get(). + * Page refs obtained for the DUMP WQEs above (by page_ref_add) will be + * released only upon their completions (or in mlx5e_free_txqsq_descs, + * if channel closes). + */ + put_page(skb_frag_page(&info.frags[i])); + + return MLX5E_KTLS_SYNC_FAIL; +} + +bool mlx5e_ktls_handle_tx_skb(struct net_device *netdev, struct mlx5e_txqsq *sq, + struct sk_buff *skb, + struct mlx5e_accel_tx_tls_state *state) +{ + struct mlx5e_ktls_offload_context_tx *priv_tx; + struct mlx5e_sq_stats *stats = sq->stats; + struct net_device *tls_netdev; + struct tls_context *tls_ctx; + int datalen; + u32 seq; + + datalen = skb->len - skb_tcp_all_headers(skb); + if (!datalen) + return true; + + mlx5e_tx_mpwqe_ensure_complete(sq); + + tls_ctx = tls_get_ctx(skb->sk); + tls_netdev = rcu_dereference_bh(tls_ctx->netdev); + /* Don't WARN on NULL: if tls_device_down is running in parallel, + * netdev might become NULL, even if tls_is_sk_tx_device_offloaded was + * true. Rather continue processing this packet. + */ + if (WARN_ON_ONCE(tls_netdev && tls_netdev != netdev)) + goto err_out; + + priv_tx = mlx5e_get_ktls_tx_priv_ctx(tls_ctx); + + if (unlikely(mlx5e_ktls_tx_offload_test_and_clear_pending(priv_tx))) + mlx5e_ktls_tx_post_param_wqes(sq, priv_tx, false, false); + + seq = ntohl(tcp_hdr(skb)->seq); + if (unlikely(priv_tx->expected_seq != seq)) { + enum mlx5e_ktls_sync_retval ret = + mlx5e_ktls_tx_handle_ooo(priv_tx, sq, datalen, seq); + + stats->tls_ooo++; + + switch (ret) { + case MLX5E_KTLS_SYNC_DONE: + break; + case MLX5E_KTLS_SYNC_SKIP_NO_DATA: + stats->tls_skip_no_sync_data++; + if (likely(!skb->decrypted)) + goto out; + WARN_ON_ONCE(1); + goto err_out; + case MLX5E_KTLS_SYNC_FAIL: + stats->tls_drop_no_sync_data++; + goto err_out; + } + } + + priv_tx->expected_seq = seq + datalen; + + state->tls_tisn = priv_tx->tisn; + + stats->tls_encrypted_packets += skb_is_gso(skb) ? skb_shinfo(skb)->gso_segs : 1; + stats->tls_encrypted_bytes += datalen; + +out: + return true; + +err_out: + dev_kfree_skb_any(skb); + return false; +} + +int mlx5e_ktls_init_tx(struct mlx5e_priv *priv) +{ + if (!mlx5e_is_ktls_tx(priv->mdev)) + return 0; + + priv->tls->tx_pool = mlx5e_tls_tx_pool_init(priv->mdev, &priv->tls->sw_stats); + if (!priv->tls->tx_pool) + return -ENOMEM; + + return 0; +} + +void mlx5e_ktls_cleanup_tx(struct mlx5e_priv *priv) +{ + if (!mlx5e_is_ktls_tx(priv->mdev)) + return; + + mlx5e_tls_tx_pool_cleanup(priv->tls->tx_pool); + priv->tls->tx_pool = NULL; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_txrx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_txrx.c new file mode 100644 index 000000000..570a912dd --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_txrx.c @@ -0,0 +1,138 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2020, Mellanox Technologies inc. All rights reserved. */ + +#include "en_accel/ktls_txrx.h" +#include "en_accel/ktls_utils.h" + +enum { + MLX5E_STATIC_PARAMS_CONTEXT_TLS_1_2 = 0x2, +}; + +enum { + MLX5E_ENCRYPTION_STANDARD_TLS = 0x1, +}; + +#define EXTRACT_INFO_FIELDS do { \ + salt = info->salt; \ + rec_seq = info->rec_seq; \ + salt_sz = sizeof(info->salt); \ + rec_seq_sz = sizeof(info->rec_seq); \ +} while (0) + +static void +fill_static_params(struct mlx5_wqe_tls_static_params_seg *params, + union mlx5e_crypto_info *crypto_info, + u32 key_id, u32 resync_tcp_sn) +{ + char *initial_rn, *gcm_iv; + u16 salt_sz, rec_seq_sz; + char *salt, *rec_seq; + u8 tls_version; + u8 *ctx; + + ctx = params->ctx; + + switch (crypto_info->crypto_info.cipher_type) { + case TLS_CIPHER_AES_GCM_128: { + struct tls12_crypto_info_aes_gcm_128 *info = + &crypto_info->crypto_info_128; + + EXTRACT_INFO_FIELDS; + break; + } + case TLS_CIPHER_AES_GCM_256: { + struct tls12_crypto_info_aes_gcm_256 *info = + &crypto_info->crypto_info_256; + + EXTRACT_INFO_FIELDS; + break; + } + default: + WARN_ONCE(1, "Unsupported cipher type %u\n", + crypto_info->crypto_info.cipher_type); + return; + } + + gcm_iv = MLX5_ADDR_OF(tls_static_params, ctx, gcm_iv); + initial_rn = MLX5_ADDR_OF(tls_static_params, ctx, initial_record_number); + + memcpy(gcm_iv, salt, salt_sz); + memcpy(initial_rn, rec_seq, rec_seq_sz); + + tls_version = MLX5E_STATIC_PARAMS_CONTEXT_TLS_1_2; + + MLX5_SET(tls_static_params, ctx, tls_version, tls_version); + MLX5_SET(tls_static_params, ctx, const_1, 1); + MLX5_SET(tls_static_params, ctx, const_2, 2); + MLX5_SET(tls_static_params, ctx, encryption_standard, + MLX5E_ENCRYPTION_STANDARD_TLS); + MLX5_SET(tls_static_params, ctx, resync_tcp_sn, resync_tcp_sn); + MLX5_SET(tls_static_params, ctx, dek_index, key_id); +} + +void +mlx5e_ktls_build_static_params(struct mlx5e_set_tls_static_params_wqe *wqe, + u16 pc, u32 sqn, + union mlx5e_crypto_info *crypto_info, + u32 tis_tir_num, u32 key_id, u32 resync_tcp_sn, + bool fence, enum tls_offload_ctx_dir direction) +{ + struct mlx5_wqe_umr_ctrl_seg *ucseg = &wqe->uctrl; + struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl; + u8 opmod = direction == TLS_OFFLOAD_CTX_DIR_TX ? + MLX5_OPC_MOD_TLS_TIS_STATIC_PARAMS : + MLX5_OPC_MOD_TLS_TIR_STATIC_PARAMS; + +#define STATIC_PARAMS_DS_CNT DIV_ROUND_UP(sizeof(*wqe), MLX5_SEND_WQE_DS) + + cseg->opmod_idx_opcode = cpu_to_be32((pc << 8) | MLX5_OPCODE_UMR | (opmod << 24)); + cseg->qpn_ds = cpu_to_be32((sqn << MLX5_WQE_CTRL_QPN_SHIFT) | + STATIC_PARAMS_DS_CNT); + cseg->fm_ce_se = fence ? MLX5_FENCE_MODE_INITIATOR_SMALL : 0; + cseg->tis_tir_num = cpu_to_be32(tis_tir_num << 8); + + ucseg->flags = MLX5_UMR_INLINE; + ucseg->bsf_octowords = cpu_to_be16(MLX5_ST_SZ_BYTES(tls_static_params) / 16); + + fill_static_params(&wqe->params, crypto_info, key_id, resync_tcp_sn); +} + +static void +fill_progress_params(struct mlx5_wqe_tls_progress_params_seg *params, u32 tis_tir_num, + u32 next_record_tcp_sn) +{ + u8 *ctx = params->ctx; + + params->tis_tir_num = cpu_to_be32(tis_tir_num); + + MLX5_SET(tls_progress_params, ctx, next_record_tcp_sn, + next_record_tcp_sn); + MLX5_SET(tls_progress_params, ctx, record_tracker_state, + MLX5E_TLS_PROGRESS_PARAMS_RECORD_TRACKER_STATE_START); + MLX5_SET(tls_progress_params, ctx, auth_state, + MLX5E_TLS_PROGRESS_PARAMS_AUTH_STATE_NO_OFFLOAD); +} + +void +mlx5e_ktls_build_progress_params(struct mlx5e_set_tls_progress_params_wqe *wqe, + u16 pc, u32 sqn, + u32 tis_tir_num, bool fence, + u32 next_record_tcp_sn, + enum tls_offload_ctx_dir direction) +{ + struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl; + u8 opmod = direction == TLS_OFFLOAD_CTX_DIR_TX ? + MLX5_OPC_MOD_TLS_TIS_PROGRESS_PARAMS : + MLX5_OPC_MOD_TLS_TIR_PROGRESS_PARAMS; + +#define PROGRESS_PARAMS_DS_CNT DIV_ROUND_UP(sizeof(*wqe), MLX5_SEND_WQE_DS) + + cseg->opmod_idx_opcode = + cpu_to_be32((pc << 8) | MLX5_OPCODE_SET_PSV | (opmod << 24)); + cseg->qpn_ds = cpu_to_be32((sqn << MLX5_WQE_CTRL_QPN_SHIFT) | + PROGRESS_PARAMS_DS_CNT); + cseg->fm_ce_se = fence ? MLX5_FENCE_MODE_INITIATOR_SMALL : 0; + + fill_progress_params(&wqe->params, tis_tir_num, next_record_tcp_sn); +} + diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_txrx.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_txrx.h new file mode 100644 index 000000000..2dd78dd4a --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_txrx.h @@ -0,0 +1,98 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2020, Mellanox Technologies inc. All rights reserved. */ + +#ifndef __MLX5E_KTLS_TXRX_H__ +#define __MLX5E_KTLS_TXRX_H__ + +#ifdef CONFIG_MLX5_EN_TLS + +#include <net/tls.h> +#include "en.h" +#include "en/txrx.h" + +struct mlx5e_accel_tx_tls_state { + u32 tls_tisn; +}; + +u16 mlx5e_ktls_get_stop_room(struct mlx5_core_dev *mdev, struct mlx5e_params *params); + +bool mlx5e_ktls_handle_tx_skb(struct net_device *netdev, struct mlx5e_txqsq *sq, + struct sk_buff *skb, + struct mlx5e_accel_tx_tls_state *state); +void mlx5e_ktls_handle_rx_skb(struct mlx5e_rq *rq, struct sk_buff *skb, + struct mlx5_cqe64 *cqe, u32 *cqe_bcnt); + +void mlx5e_ktls_handle_ctx_completion(struct mlx5e_icosq_wqe_info *wi); +void mlx5e_ktls_handle_get_psv_completion(struct mlx5e_icosq_wqe_info *wi, + struct mlx5e_icosq *sq); + +void mlx5e_ktls_tx_handle_resync_dump_comp(struct mlx5e_txqsq *sq, + struct mlx5e_tx_wqe_info *wi, + u32 *dma_fifo_cc); +static inline bool +mlx5e_ktls_tx_try_handle_resync_dump_comp(struct mlx5e_txqsq *sq, + struct mlx5e_tx_wqe_info *wi, + u32 *dma_fifo_cc) +{ + if (unlikely(wi->resync_dump_frag_page)) { + mlx5e_ktls_tx_handle_resync_dump_comp(sq, wi, dma_fifo_cc); + return true; + } + return false; +} + +bool mlx5e_ktls_rx_handle_resync_list(struct mlx5e_channel *c, int budget); + +static inline bool +mlx5e_ktls_rx_pending_resync_list(struct mlx5e_channel *c, int budget) +{ + return budget && test_bit(MLX5E_SQ_STATE_PENDING_TLS_RX_RESYNC, &c->async_icosq.state); +} + +static inline bool mlx5e_ktls_skb_offloaded(struct sk_buff *skb) +{ + return skb->sk && tls_is_sk_tx_device_offloaded(skb->sk); +} + +static inline void +mlx5e_ktls_handle_tx_wqe(struct mlx5_wqe_ctrl_seg *cseg, + struct mlx5e_accel_tx_tls_state *state) +{ + cseg->tis_tir_num = cpu_to_be32(state->tls_tisn << 8); +} +#else +static inline bool +mlx5e_ktls_tx_try_handle_resync_dump_comp(struct mlx5e_txqsq *sq, + struct mlx5e_tx_wqe_info *wi, + u32 *dma_fifo_cc) +{ + return false; +} + +static inline bool +mlx5e_ktls_rx_handle_resync_list(struct mlx5e_channel *c, int budget) +{ + return false; +} + +static inline bool +mlx5e_ktls_rx_pending_resync_list(struct mlx5e_channel *c, int budget) +{ + return false; +} + +static inline u16 mlx5e_ktls_get_stop_room(struct mlx5_core_dev *mdev, + struct mlx5e_params *params) +{ + return 0; +} + +static inline void mlx5e_ktls_handle_rx_skb(struct mlx5e_rq *rq, + struct sk_buff *skb, + struct mlx5_cqe64 *cqe, + u32 *cqe_bcnt) +{ +} +#endif /* CONFIG_MLX5_EN_TLS */ + +#endif /* __MLX5E_TLS_TXRX_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_utils.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_utils.h new file mode 100644 index 000000000..3d79cd379 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_utils.h @@ -0,0 +1,91 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2020, Mellanox Technologies inc. All rights reserved. */ + +#ifndef __MLX5E_KTLS_UTILS_H__ +#define __MLX5E_KTLS_UTILS_H__ + +#include <net/tls.h> +#include "en.h" + +enum { + MLX5E_TLS_PROGRESS_PARAMS_AUTH_STATE_NO_OFFLOAD = 0, + MLX5E_TLS_PROGRESS_PARAMS_AUTH_STATE_OFFLOAD = 1, + MLX5E_TLS_PROGRESS_PARAMS_AUTH_STATE_AUTHENTICATION = 2, +}; + +enum { + MLX5E_TLS_PROGRESS_PARAMS_RECORD_TRACKER_STATE_START = 0, + MLX5E_TLS_PROGRESS_PARAMS_RECORD_TRACKER_STATE_TRACKING = 1, + MLX5E_TLS_PROGRESS_PARAMS_RECORD_TRACKER_STATE_SEARCHING = 2, +}; + +int mlx5e_ktls_add_tx(struct net_device *netdev, struct sock *sk, + struct tls_crypto_info *crypto_info, u32 start_offload_tcp_sn); +void mlx5e_ktls_del_tx(struct net_device *netdev, struct tls_context *tls_ctx); +int mlx5e_ktls_add_rx(struct net_device *netdev, struct sock *sk, + struct tls_crypto_info *crypto_info, u32 start_offload_tcp_sn); +void mlx5e_ktls_del_rx(struct net_device *netdev, struct tls_context *tls_ctx); +void mlx5e_ktls_rx_resync(struct net_device *netdev, struct sock *sk, u32 seq, u8 *rcd_sn); + +union mlx5e_crypto_info { + struct tls_crypto_info crypto_info; + struct tls12_crypto_info_aes_gcm_128 crypto_info_128; + struct tls12_crypto_info_aes_gcm_256 crypto_info_256; +}; + +struct mlx5e_set_tls_static_params_wqe { + struct mlx5_wqe_ctrl_seg ctrl; + struct mlx5_wqe_umr_ctrl_seg uctrl; + struct mlx5_mkey_seg mkc; + struct mlx5_wqe_tls_static_params_seg params; +}; + +struct mlx5e_set_tls_progress_params_wqe { + struct mlx5_wqe_ctrl_seg ctrl; + struct mlx5_wqe_tls_progress_params_seg params; +}; + +struct mlx5e_get_tls_progress_params_wqe { + struct mlx5_wqe_ctrl_seg ctrl; + struct mlx5_seg_get_psv psv; +}; + +#define MLX5E_TLS_SET_STATIC_PARAMS_WQEBBS \ + (DIV_ROUND_UP(sizeof(struct mlx5e_set_tls_static_params_wqe), MLX5_SEND_WQE_BB)) + +#define MLX5E_TLS_SET_PROGRESS_PARAMS_WQEBBS \ + (DIV_ROUND_UP(sizeof(struct mlx5e_set_tls_progress_params_wqe), MLX5_SEND_WQE_BB)) + +#define MLX5E_KTLS_GET_PROGRESS_WQEBBS \ + (DIV_ROUND_UP(sizeof(struct mlx5e_get_tls_progress_params_wqe), MLX5_SEND_WQE_BB)) + +#define MLX5E_TLS_FETCH_SET_STATIC_PARAMS_WQE(sq, pi) \ + ((struct mlx5e_set_tls_static_params_wqe *)\ + mlx5e_fetch_wqe(&(sq)->wq, pi, sizeof(struct mlx5e_set_tls_static_params_wqe))) + +#define MLX5E_TLS_FETCH_SET_PROGRESS_PARAMS_WQE(sq, pi) \ + ((struct mlx5e_set_tls_progress_params_wqe *)\ + mlx5e_fetch_wqe(&(sq)->wq, pi, sizeof(struct mlx5e_set_tls_progress_params_wqe))) + +#define MLX5E_TLS_FETCH_GET_PROGRESS_PARAMS_WQE(sq, pi) \ + ((struct mlx5e_get_tls_progress_params_wqe *)\ + mlx5e_fetch_wqe(&(sq)->wq, pi, sizeof(struct mlx5e_get_tls_progress_params_wqe))) + +#define MLX5E_TLS_FETCH_DUMP_WQE(sq, pi) \ + ((struct mlx5e_dump_wqe *)\ + mlx5e_fetch_wqe(&(sq)->wq, pi, sizeof(struct mlx5e_dump_wqe))) + +void +mlx5e_ktls_build_static_params(struct mlx5e_set_tls_static_params_wqe *wqe, + u16 pc, u32 sqn, + union mlx5e_crypto_info *crypto_info, + u32 tis_tir_num, u32 key_id, u32 resync_tcp_sn, + bool fence, enum tls_offload_ctx_dir direction); +void +mlx5e_ktls_build_progress_params(struct mlx5e_set_tls_progress_params_wqe *wqe, + u16 pc, u32 sqn, + u32 tis_tir_num, bool fence, + u32 next_record_tcp_sn, + enum tls_offload_ctx_dir direction); + +#endif /* __MLX5E_TLS_UTILS_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c new file mode 100644 index 000000000..a7832a018 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c @@ -0,0 +1,1866 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#include <linux/mlx5/device.h> +#include <linux/mlx5/mlx5_ifc.h> +#include <linux/xarray.h> + +#include "en.h" +#include "lib/aso.h" +#include "lib/mlx5.h" +#include "en_accel/macsec.h" +#include "en_accel/macsec_fs.h" + +#define MLX5_MACSEC_EPN_SCOPE_MID 0x80000000L +#define MLX5E_MACSEC_ASO_CTX_SZ MLX5_ST_SZ_BYTES(macsec_aso) + +enum mlx5_macsec_aso_event_arm { + MLX5E_ASO_EPN_ARM = BIT(0), +}; + +enum { + MLX5_MACSEC_ASO_REMOVE_FLOW_PKT_CNT_OFFSET, +}; + +struct mlx5e_macsec_handle { + struct mlx5e_macsec *macsec; + u32 obj_id; + u8 idx; +}; + +enum { + MLX5_MACSEC_EPN, +}; + +struct mlx5e_macsec_aso_out { + u8 event_arm; + u32 mode_param; +}; + +struct mlx5e_macsec_aso_in { + u8 mode; + u32 obj_id; +}; + +struct mlx5e_macsec_epn_state { + u32 epn_msb; + u8 epn_enabled; + u8 overlap; +}; + +struct mlx5e_macsec_async_work { + struct mlx5e_macsec *macsec; + struct mlx5_core_dev *mdev; + struct work_struct work; + u32 obj_id; +}; + +struct mlx5e_macsec_sa { + bool active; + u8 assoc_num; + u32 macsec_obj_id; + u32 enc_key_id; + u32 next_pn; + sci_t sci; + ssci_t ssci; + salt_t salt; + + struct rhash_head hash; + u32 fs_id; + union mlx5e_macsec_rule *macsec_rule; + struct rcu_head rcu_head; + struct mlx5e_macsec_epn_state epn_state; +}; + +struct mlx5e_macsec_rx_sc; +struct mlx5e_macsec_rx_sc_xarray_element { + u32 fs_id; + struct mlx5e_macsec_rx_sc *rx_sc; +}; + +struct mlx5e_macsec_rx_sc { + bool active; + sci_t sci; + struct mlx5e_macsec_sa *rx_sa[MACSEC_NUM_AN]; + struct list_head rx_sc_list_element; + struct mlx5e_macsec_rx_sc_xarray_element *sc_xarray_element; + struct metadata_dst *md_dst; + struct rcu_head rcu_head; +}; + +struct mlx5e_macsec_umr { + u8 __aligned(64) ctx[MLX5_ST_SZ_BYTES(macsec_aso)]; + dma_addr_t dma_addr; + u32 mkey; +}; + +struct mlx5e_macsec_aso { + /* ASO */ + struct mlx5_aso *maso; + /* Protects macsec ASO */ + struct mutex aso_lock; + /* UMR */ + struct mlx5e_macsec_umr *umr; + + u32 pdn; +}; + +static const struct rhashtable_params rhash_sci = { + .key_len = sizeof_field(struct mlx5e_macsec_sa, sci), + .key_offset = offsetof(struct mlx5e_macsec_sa, sci), + .head_offset = offsetof(struct mlx5e_macsec_sa, hash), + .automatic_shrinking = true, + .min_size = 1, +}; + +struct mlx5e_macsec_device { + const struct net_device *netdev; + struct mlx5e_macsec_sa *tx_sa[MACSEC_NUM_AN]; + struct list_head macsec_rx_sc_list_head; + unsigned char *dev_addr; + struct list_head macsec_device_list_element; +}; + +struct mlx5e_macsec { + struct list_head macsec_device_list_head; + int num_of_devices; + struct mlx5e_macsec_fs *macsec_fs; + struct mutex lock; /* Protects mlx5e_macsec internal contexts */ + + /* Tx sci -> fs id mapping handling */ + struct rhashtable sci_hash; /* sci -> mlx5e_macsec_sa */ + + /* Rx fs_id -> rx_sc mapping */ + struct xarray sc_xarray; + + struct mlx5_core_dev *mdev; + + /* Stats manage */ + struct mlx5e_macsec_stats stats; + + /* ASO */ + struct mlx5e_macsec_aso aso; + + struct notifier_block nb; + struct workqueue_struct *wq; +}; + +struct mlx5_macsec_obj_attrs { + u32 aso_pdn; + u32 next_pn; + __be64 sci; + u32 enc_key_id; + bool encrypt; + struct mlx5e_macsec_epn_state epn_state; + salt_t salt; + __be32 ssci; + bool replay_protect; + u32 replay_window; +}; + +struct mlx5_aso_ctrl_param { + u8 data_mask_mode; + u8 condition_0_operand; + u8 condition_1_operand; + u8 condition_0_offset; + u8 condition_1_offset; + u8 data_offset; + u8 condition_operand; + u32 condition_0_data; + u32 condition_0_mask; + u32 condition_1_data; + u32 condition_1_mask; + u64 bitwise_data; + u64 data_mask; +}; + +static int mlx5e_macsec_aso_reg_mr(struct mlx5_core_dev *mdev, struct mlx5e_macsec_aso *aso) +{ + struct mlx5e_macsec_umr *umr; + struct device *dma_device; + dma_addr_t dma_addr; + int err; + + umr = kzalloc(sizeof(*umr), GFP_KERNEL); + if (!umr) { + err = -ENOMEM; + return err; + } + + dma_device = &mdev->pdev->dev; + dma_addr = dma_map_single(dma_device, umr->ctx, sizeof(umr->ctx), DMA_BIDIRECTIONAL); + err = dma_mapping_error(dma_device, dma_addr); + if (err) { + mlx5_core_err(mdev, "Can't map dma device, err=%d\n", err); + goto out_dma; + } + + err = mlx5e_create_mkey(mdev, aso->pdn, &umr->mkey); + if (err) { + mlx5_core_err(mdev, "Can't create mkey, err=%d\n", err); + goto out_mkey; + } + + umr->dma_addr = dma_addr; + + aso->umr = umr; + + return 0; + +out_mkey: + dma_unmap_single(dma_device, dma_addr, sizeof(umr->ctx), DMA_BIDIRECTIONAL); +out_dma: + kfree(umr); + return err; +} + +static void mlx5e_macsec_aso_dereg_mr(struct mlx5_core_dev *mdev, struct mlx5e_macsec_aso *aso) +{ + struct mlx5e_macsec_umr *umr = aso->umr; + + mlx5_core_destroy_mkey(mdev, umr->mkey); + dma_unmap_single(&mdev->pdev->dev, umr->dma_addr, sizeof(umr->ctx), DMA_BIDIRECTIONAL); + kfree(umr); +} + +static int macsec_set_replay_protection(struct mlx5_macsec_obj_attrs *attrs, void *aso_ctx) +{ + u8 window_sz; + + if (!attrs->replay_protect) + return 0; + + switch (attrs->replay_window) { + case 256: + window_sz = MLX5_MACSEC_ASO_REPLAY_WIN_256BIT; + break; + case 128: + window_sz = MLX5_MACSEC_ASO_REPLAY_WIN_128BIT; + break; + case 64: + window_sz = MLX5_MACSEC_ASO_REPLAY_WIN_64BIT; + break; + case 32: + window_sz = MLX5_MACSEC_ASO_REPLAY_WIN_32BIT; + break; + default: + return -EINVAL; + } + MLX5_SET(macsec_aso, aso_ctx, window_size, window_sz); + MLX5_SET(macsec_aso, aso_ctx, mode, MLX5_MACSEC_ASO_REPLAY_PROTECTION); + + return 0; +} + +static int mlx5e_macsec_create_object(struct mlx5_core_dev *mdev, + struct mlx5_macsec_obj_attrs *attrs, + bool is_tx, + u32 *macsec_obj_id) +{ + u32 in[MLX5_ST_SZ_DW(create_macsec_obj_in)] = {}; + u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)]; + void *aso_ctx; + void *obj; + int err; + + obj = MLX5_ADDR_OF(create_macsec_obj_in, in, macsec_object); + aso_ctx = MLX5_ADDR_OF(macsec_offload_obj, obj, macsec_aso); + + MLX5_SET(macsec_offload_obj, obj, confidentiality_en, attrs->encrypt); + MLX5_SET(macsec_offload_obj, obj, dekn, attrs->enc_key_id); + MLX5_SET(macsec_offload_obj, obj, aso_return_reg, MLX5_MACSEC_ASO_REG_C_4_5); + MLX5_SET(macsec_offload_obj, obj, macsec_aso_access_pd, attrs->aso_pdn); + MLX5_SET(macsec_aso, aso_ctx, mode_parameter, attrs->next_pn); + + /* Epn */ + if (attrs->epn_state.epn_enabled) { + void *salt_p; + int i; + + MLX5_SET(macsec_aso, aso_ctx, epn_event_arm, 1); + MLX5_SET(macsec_offload_obj, obj, epn_en, 1); + MLX5_SET(macsec_offload_obj, obj, epn_msb, attrs->epn_state.epn_msb); + MLX5_SET(macsec_offload_obj, obj, epn_overlap, attrs->epn_state.overlap); + MLX5_SET64(macsec_offload_obj, obj, sci, (__force u64)attrs->ssci); + salt_p = MLX5_ADDR_OF(macsec_offload_obj, obj, salt); + for (i = 0; i < 3 ; i++) + memcpy((u32 *)salt_p + i, &attrs->salt.bytes[4 * (2 - i)], 4); + } else { + MLX5_SET64(macsec_offload_obj, obj, sci, (__force u64)(attrs->sci)); + } + + MLX5_SET(macsec_aso, aso_ctx, valid, 0x1); + if (is_tx) { + MLX5_SET(macsec_aso, aso_ctx, mode, MLX5_MACSEC_ASO_INC_SN); + } else { + err = macsec_set_replay_protection(attrs, aso_ctx); + if (err) + return err; + } + + /* general object fields set */ + MLX5_SET(general_obj_in_cmd_hdr, in, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT); + MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, MLX5_GENERAL_OBJECT_TYPES_MACSEC); + + err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); + if (err) { + mlx5_core_err(mdev, + "MACsec offload: Failed to create MACsec object (err = %d)\n", + err); + return err; + } + + *macsec_obj_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id); + + return err; +} + +static void mlx5e_macsec_destroy_object(struct mlx5_core_dev *mdev, u32 macsec_obj_id) +{ + u32 in[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)] = {}; + u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)]; + + MLX5_SET(general_obj_in_cmd_hdr, in, opcode, MLX5_CMD_OP_DESTROY_GENERAL_OBJECT); + MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, MLX5_GENERAL_OBJECT_TYPES_MACSEC); + MLX5_SET(general_obj_in_cmd_hdr, in, obj_id, macsec_obj_id); + + mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); +} + +static void mlx5e_macsec_cleanup_sa(struct mlx5e_macsec *macsec, + struct mlx5e_macsec_sa *sa, + bool is_tx) +{ + int action = (is_tx) ? MLX5_ACCEL_MACSEC_ACTION_ENCRYPT : + MLX5_ACCEL_MACSEC_ACTION_DECRYPT; + + if ((is_tx) && sa->fs_id) { + /* Make sure ongoing datapath readers sees a valid SA */ + rhashtable_remove_fast(&macsec->sci_hash, &sa->hash, rhash_sci); + sa->fs_id = 0; + } + + if (!sa->macsec_rule) + return; + + mlx5e_macsec_fs_del_rule(macsec->macsec_fs, sa->macsec_rule, action); + mlx5e_macsec_destroy_object(macsec->mdev, sa->macsec_obj_id); + sa->macsec_rule = NULL; +} + +static int mlx5e_macsec_init_sa(struct macsec_context *ctx, + struct mlx5e_macsec_sa *sa, + bool encrypt, + bool is_tx) +{ + struct mlx5e_priv *priv = netdev_priv(ctx->netdev); + struct mlx5e_macsec *macsec = priv->macsec; + struct mlx5_macsec_rule_attrs rule_attrs; + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5_macsec_obj_attrs obj_attrs; + union mlx5e_macsec_rule *macsec_rule; + int err; + + obj_attrs.next_pn = sa->next_pn; + obj_attrs.sci = cpu_to_be64((__force u64)sa->sci); + obj_attrs.enc_key_id = sa->enc_key_id; + obj_attrs.encrypt = encrypt; + obj_attrs.aso_pdn = macsec->aso.pdn; + obj_attrs.epn_state = sa->epn_state; + + if (sa->epn_state.epn_enabled) { + obj_attrs.ssci = cpu_to_be32((__force u32)sa->ssci); + memcpy(&obj_attrs.salt, &sa->salt, sizeof(sa->salt)); + } + + obj_attrs.replay_window = ctx->secy->replay_window; + obj_attrs.replay_protect = ctx->secy->replay_protect; + + err = mlx5e_macsec_create_object(mdev, &obj_attrs, is_tx, &sa->macsec_obj_id); + if (err) + return err; + + rule_attrs.macsec_obj_id = sa->macsec_obj_id; + rule_attrs.sci = sa->sci; + rule_attrs.assoc_num = sa->assoc_num; + rule_attrs.action = (is_tx) ? MLX5_ACCEL_MACSEC_ACTION_ENCRYPT : + MLX5_ACCEL_MACSEC_ACTION_DECRYPT; + + macsec_rule = mlx5e_macsec_fs_add_rule(macsec->macsec_fs, ctx, &rule_attrs, &sa->fs_id); + if (!macsec_rule) { + err = -ENOMEM; + goto destroy_macsec_object; + } + + sa->macsec_rule = macsec_rule; + + if (is_tx) { + err = rhashtable_insert_fast(&macsec->sci_hash, &sa->hash, rhash_sci); + if (err) + goto destroy_macsec_object_and_rule; + } + + return 0; + +destroy_macsec_object_and_rule: + mlx5e_macsec_cleanup_sa(macsec, sa, is_tx); +destroy_macsec_object: + mlx5e_macsec_destroy_object(mdev, sa->macsec_obj_id); + + return err; +} + +static struct mlx5e_macsec_rx_sc * +mlx5e_macsec_get_rx_sc_from_sc_list(const struct list_head *list, sci_t sci) +{ + struct mlx5e_macsec_rx_sc *iter; + + list_for_each_entry_rcu(iter, list, rx_sc_list_element) { + if (iter->sci == sci) + return iter; + } + + return NULL; +} + +static int macsec_rx_sa_active_update(struct macsec_context *ctx, + struct mlx5e_macsec_sa *rx_sa, + bool active) +{ + struct mlx5e_priv *priv = netdev_priv(ctx->netdev); + struct mlx5e_macsec *macsec = priv->macsec; + int err = 0; + + if (rx_sa->active == active) + return 0; + + rx_sa->active = active; + if (!active) { + mlx5e_macsec_cleanup_sa(macsec, rx_sa, false); + return 0; + } + + err = mlx5e_macsec_init_sa(ctx, rx_sa, true, false); + if (err) + rx_sa->active = false; + + return err; +} + +static bool mlx5e_macsec_secy_features_validate(struct macsec_context *ctx) +{ + const struct net_device *netdev = ctx->netdev; + const struct macsec_secy *secy = ctx->secy; + + if (secy->validate_frames != MACSEC_VALIDATE_STRICT) { + netdev_err(netdev, + "MACsec offload is supported only when validate_frame is in strict mode\n"); + return false; + } + + if (secy->icv_len != MACSEC_DEFAULT_ICV_LEN) { + netdev_err(netdev, "MACsec offload is supported only when icv_len is %d\n", + MACSEC_DEFAULT_ICV_LEN); + return false; + } + + if (!secy->protect_frames) { + netdev_err(netdev, + "MACsec offload is supported only when protect_frames is set\n"); + return false; + } + + if (!ctx->secy->tx_sc.encrypt) { + netdev_err(netdev, "MACsec offload: encrypt off isn't supported\n"); + return false; + } + + return true; +} + +static struct mlx5e_macsec_device * +mlx5e_macsec_get_macsec_device_context(const struct mlx5e_macsec *macsec, + const struct macsec_context *ctx) +{ + struct mlx5e_macsec_device *iter; + const struct list_head *list; + + list = &macsec->macsec_device_list_head; + list_for_each_entry_rcu(iter, list, macsec_device_list_element) { + if (iter->netdev == ctx->secy->netdev) + return iter; + } + + return NULL; +} + +static void update_macsec_epn(struct mlx5e_macsec_sa *sa, const struct macsec_key *key, + const pn_t *next_pn_halves, ssci_t ssci) +{ + struct mlx5e_macsec_epn_state *epn_state = &sa->epn_state; + + sa->ssci = ssci; + sa->salt = key->salt; + epn_state->epn_enabled = 1; + epn_state->epn_msb = next_pn_halves->upper; + epn_state->overlap = next_pn_halves->lower < MLX5_MACSEC_EPN_SCOPE_MID ? 0 : 1; +} + +static int mlx5e_macsec_add_txsa(struct macsec_context *ctx) +{ + const struct macsec_tx_sc *tx_sc = &ctx->secy->tx_sc; + const struct macsec_tx_sa *ctx_tx_sa = ctx->sa.tx_sa; + struct mlx5e_priv *priv = netdev_priv(ctx->netdev); + const struct macsec_secy *secy = ctx->secy; + struct mlx5e_macsec_device *macsec_device; + struct mlx5_core_dev *mdev = priv->mdev; + u8 assoc_num = ctx->sa.assoc_num; + struct mlx5e_macsec_sa *tx_sa; + struct mlx5e_macsec *macsec; + int err = 0; + + mutex_lock(&priv->macsec->lock); + + macsec = priv->macsec; + macsec_device = mlx5e_macsec_get_macsec_device_context(macsec, ctx); + if (!macsec_device) { + netdev_err(ctx->netdev, "MACsec offload: Failed to find device context\n"); + err = -EEXIST; + goto out; + } + + if (macsec_device->tx_sa[assoc_num]) { + netdev_err(ctx->netdev, "MACsec offload tx_sa: %d already exist\n", assoc_num); + err = -EEXIST; + goto out; + } + + tx_sa = kzalloc(sizeof(*tx_sa), GFP_KERNEL); + if (!tx_sa) { + err = -ENOMEM; + goto out; + } + + tx_sa->active = ctx_tx_sa->active; + tx_sa->next_pn = ctx_tx_sa->next_pn_halves.lower; + tx_sa->sci = secy->sci; + tx_sa->assoc_num = assoc_num; + + if (secy->xpn) + update_macsec_epn(tx_sa, &ctx_tx_sa->key, &ctx_tx_sa->next_pn_halves, + ctx_tx_sa->ssci); + + err = mlx5_create_encryption_key(mdev, ctx->sa.key, secy->key_len, + MLX5_ACCEL_OBJ_MACSEC_KEY, + &tx_sa->enc_key_id); + if (err) + goto destroy_sa; + + macsec_device->tx_sa[assoc_num] = tx_sa; + if (!secy->operational || + assoc_num != tx_sc->encoding_sa || + !tx_sa->active) + goto out; + + err = mlx5e_macsec_init_sa(ctx, tx_sa, tx_sc->encrypt, true); + if (err) + goto destroy_encryption_key; + + mutex_unlock(&macsec->lock); + + return 0; + +destroy_encryption_key: + macsec_device->tx_sa[assoc_num] = NULL; + mlx5_destroy_encryption_key(mdev, tx_sa->enc_key_id); +destroy_sa: + kfree(tx_sa); +out: + mutex_unlock(&macsec->lock); + + return err; +} + +static int mlx5e_macsec_upd_txsa(struct macsec_context *ctx) +{ + const struct macsec_tx_sc *tx_sc = &ctx->secy->tx_sc; + const struct macsec_tx_sa *ctx_tx_sa = ctx->sa.tx_sa; + struct mlx5e_priv *priv = netdev_priv(ctx->netdev); + struct mlx5e_macsec_device *macsec_device; + u8 assoc_num = ctx->sa.assoc_num; + struct mlx5e_macsec_sa *tx_sa; + struct mlx5e_macsec *macsec; + struct net_device *netdev; + int err = 0; + + mutex_lock(&priv->macsec->lock); + + macsec = priv->macsec; + netdev = ctx->netdev; + macsec_device = mlx5e_macsec_get_macsec_device_context(macsec, ctx); + if (!macsec_device) { + netdev_err(netdev, "MACsec offload: Failed to find device context\n"); + err = -EINVAL; + goto out; + } + + tx_sa = macsec_device->tx_sa[assoc_num]; + if (!tx_sa) { + netdev_err(netdev, "MACsec offload: TX sa 0x%x doesn't exist\n", assoc_num); + err = -EEXIST; + goto out; + } + + if (ctx->sa.update_pn) { + netdev_err(netdev, "MACsec offload: update TX sa %d PN isn't supported\n", + assoc_num); + err = -EINVAL; + goto out; + } + + if (tx_sa->active == ctx_tx_sa->active) + goto out; + + tx_sa->active = ctx_tx_sa->active; + if (tx_sa->assoc_num != tx_sc->encoding_sa) + goto out; + + if (ctx_tx_sa->active) { + err = mlx5e_macsec_init_sa(ctx, tx_sa, tx_sc->encrypt, true); + if (err) + goto out; + } else { + if (!tx_sa->macsec_rule) { + err = -EINVAL; + goto out; + } + + mlx5e_macsec_cleanup_sa(macsec, tx_sa, true); + } +out: + mutex_unlock(&macsec->lock); + + return err; +} + +static int mlx5e_macsec_del_txsa(struct macsec_context *ctx) +{ + struct mlx5e_priv *priv = netdev_priv(ctx->netdev); + struct mlx5e_macsec_device *macsec_device; + u8 assoc_num = ctx->sa.assoc_num; + struct mlx5e_macsec_sa *tx_sa; + struct mlx5e_macsec *macsec; + int err = 0; + + mutex_lock(&priv->macsec->lock); + macsec = priv->macsec; + macsec_device = mlx5e_macsec_get_macsec_device_context(macsec, ctx); + if (!macsec_device) { + netdev_err(ctx->netdev, "MACsec offload: Failed to find device context\n"); + err = -EINVAL; + goto out; + } + + tx_sa = macsec_device->tx_sa[assoc_num]; + if (!tx_sa) { + netdev_err(ctx->netdev, "MACsec offload: TX sa 0x%x doesn't exist\n", assoc_num); + err = -EEXIST; + goto out; + } + + mlx5e_macsec_cleanup_sa(macsec, tx_sa, true); + mlx5_destroy_encryption_key(macsec->mdev, tx_sa->enc_key_id); + kfree_rcu(tx_sa); + macsec_device->tx_sa[assoc_num] = NULL; + +out: + mutex_unlock(&macsec->lock); + + return err; +} + +static u32 mlx5e_macsec_get_sa_from_hashtable(struct rhashtable *sci_hash, sci_t *sci) +{ + struct mlx5e_macsec_sa *macsec_sa; + u32 fs_id = 0; + + rcu_read_lock(); + macsec_sa = rhashtable_lookup(sci_hash, sci, rhash_sci); + if (macsec_sa) + fs_id = macsec_sa->fs_id; + rcu_read_unlock(); + + return fs_id; +} + +static int mlx5e_macsec_add_rxsc(struct macsec_context *ctx) +{ + struct mlx5e_macsec_rx_sc_xarray_element *sc_xarray_element; + struct mlx5e_priv *priv = netdev_priv(ctx->netdev); + const struct macsec_rx_sc *ctx_rx_sc = ctx->rx_sc; + struct mlx5e_macsec_device *macsec_device; + struct mlx5e_macsec_rx_sc *rx_sc; + struct list_head *rx_sc_list; + struct mlx5e_macsec *macsec; + int err = 0; + + mutex_lock(&priv->macsec->lock); + macsec = priv->macsec; + macsec_device = mlx5e_macsec_get_macsec_device_context(macsec, ctx); + if (!macsec_device) { + netdev_err(ctx->netdev, "MACsec offload: Failed to find device context\n"); + err = -EINVAL; + goto out; + } + + rx_sc_list = &macsec_device->macsec_rx_sc_list_head; + rx_sc = mlx5e_macsec_get_rx_sc_from_sc_list(rx_sc_list, ctx_rx_sc->sci); + if (rx_sc) { + netdev_err(ctx->netdev, "MACsec offload: rx_sc (sci %lld) already exists\n", + ctx_rx_sc->sci); + err = -EEXIST; + goto out; + } + + rx_sc = kzalloc(sizeof(*rx_sc), GFP_KERNEL); + if (!rx_sc) { + err = -ENOMEM; + goto out; + } + + sc_xarray_element = kzalloc(sizeof(*sc_xarray_element), GFP_KERNEL); + if (!sc_xarray_element) { + err = -ENOMEM; + goto destroy_rx_sc; + } + + sc_xarray_element->rx_sc = rx_sc; + err = xa_alloc(&macsec->sc_xarray, &sc_xarray_element->fs_id, sc_xarray_element, + XA_LIMIT(1, MLX5_MACEC_RX_FS_ID_MAX), GFP_KERNEL); + if (err) { + if (err == -EBUSY) + netdev_err(ctx->netdev, + "MACsec offload: unable to create entry for RX SC (%d Rx SCs already allocated)\n", + MLX5_MACEC_RX_FS_ID_MAX); + goto destroy_sc_xarray_elemenet; + } + + rx_sc->md_dst = metadata_dst_alloc(0, METADATA_MACSEC, GFP_KERNEL); + if (!rx_sc->md_dst) { + err = -ENOMEM; + goto erase_xa_alloc; + } + + rx_sc->sci = ctx_rx_sc->sci; + rx_sc->active = ctx_rx_sc->active; + list_add_rcu(&rx_sc->rx_sc_list_element, rx_sc_list); + + rx_sc->sc_xarray_element = sc_xarray_element; + rx_sc->md_dst->u.macsec_info.sci = rx_sc->sci; + mutex_unlock(&macsec->lock); + + return 0; + +erase_xa_alloc: + xa_erase(&macsec->sc_xarray, sc_xarray_element->fs_id); +destroy_sc_xarray_elemenet: + kfree(sc_xarray_element); +destroy_rx_sc: + kfree(rx_sc); + +out: + mutex_unlock(&macsec->lock); + + return err; +} + +static int mlx5e_macsec_upd_rxsc(struct macsec_context *ctx) +{ + struct mlx5e_priv *priv = netdev_priv(ctx->netdev); + const struct macsec_rx_sc *ctx_rx_sc = ctx->rx_sc; + struct mlx5e_macsec_device *macsec_device; + struct mlx5e_macsec_rx_sc *rx_sc; + struct mlx5e_macsec_sa *rx_sa; + struct mlx5e_macsec *macsec; + struct list_head *list; + int i; + int err = 0; + + mutex_lock(&priv->macsec->lock); + + macsec = priv->macsec; + macsec_device = mlx5e_macsec_get_macsec_device_context(macsec, ctx); + if (!macsec_device) { + netdev_err(ctx->netdev, "MACsec offload: Failed to find device context\n"); + err = -EINVAL; + goto out; + } + + list = &macsec_device->macsec_rx_sc_list_head; + rx_sc = mlx5e_macsec_get_rx_sc_from_sc_list(list, ctx_rx_sc->sci); + if (!rx_sc) { + err = -EINVAL; + goto out; + } + + if (rx_sc->active == ctx_rx_sc->active) + goto out; + + rx_sc->active = ctx_rx_sc->active; + for (i = 0; i < MACSEC_NUM_AN; ++i) { + rx_sa = rx_sc->rx_sa[i]; + if (!rx_sa) + continue; + + err = macsec_rx_sa_active_update(ctx, rx_sa, rx_sa->active && ctx_rx_sc->active); + if (err) + goto out; + } + +out: + mutex_unlock(&macsec->lock); + + return err; +} + +static void macsec_del_rxsc_ctx(struct mlx5e_macsec *macsec, struct mlx5e_macsec_rx_sc *rx_sc) +{ + struct mlx5e_macsec_sa *rx_sa; + int i; + + for (i = 0; i < MACSEC_NUM_AN; ++i) { + rx_sa = rx_sc->rx_sa[i]; + if (!rx_sa) + continue; + + mlx5e_macsec_cleanup_sa(macsec, rx_sa, false); + mlx5_destroy_encryption_key(macsec->mdev, rx_sa->enc_key_id); + + kfree(rx_sa); + rx_sc->rx_sa[i] = NULL; + } + + /* At this point the relevant MACsec offload Rx rule already removed at + * mlx5e_macsec_cleanup_sa need to wait for datapath to finish current + * Rx related data propagating using xa_erase which uses rcu to sync, + * once fs_id is erased then this rx_sc is hidden from datapath. + */ + list_del_rcu(&rx_sc->rx_sc_list_element); + xa_erase(&macsec->sc_xarray, rx_sc->sc_xarray_element->fs_id); + metadata_dst_free(rx_sc->md_dst); + kfree(rx_sc->sc_xarray_element); + kfree_rcu(rx_sc); +} + +static int mlx5e_macsec_del_rxsc(struct macsec_context *ctx) +{ + struct mlx5e_priv *priv = netdev_priv(ctx->netdev); + struct mlx5e_macsec_device *macsec_device; + struct mlx5e_macsec_rx_sc *rx_sc; + struct mlx5e_macsec *macsec; + struct list_head *list; + int err = 0; + + mutex_lock(&priv->macsec->lock); + + macsec = priv->macsec; + macsec_device = mlx5e_macsec_get_macsec_device_context(macsec, ctx); + if (!macsec_device) { + netdev_err(ctx->netdev, "MACsec offload: Failed to find device context\n"); + err = -EINVAL; + goto out; + } + + list = &macsec_device->macsec_rx_sc_list_head; + rx_sc = mlx5e_macsec_get_rx_sc_from_sc_list(list, ctx->rx_sc->sci); + if (!rx_sc) { + netdev_err(ctx->netdev, + "MACsec offload rx_sc sci %lld doesn't exist\n", + ctx->sa.rx_sa->sc->sci); + err = -EINVAL; + goto out; + } + + macsec_del_rxsc_ctx(macsec, rx_sc); +out: + mutex_unlock(&macsec->lock); + + return err; +} + +static int mlx5e_macsec_add_rxsa(struct macsec_context *ctx) +{ + const struct macsec_rx_sa *ctx_rx_sa = ctx->sa.rx_sa; + struct mlx5e_priv *priv = netdev_priv(ctx->netdev); + struct mlx5e_macsec_device *macsec_device; + struct mlx5_core_dev *mdev = priv->mdev; + u8 assoc_num = ctx->sa.assoc_num; + struct mlx5e_macsec_rx_sc *rx_sc; + sci_t sci = ctx_rx_sa->sc->sci; + struct mlx5e_macsec_sa *rx_sa; + struct mlx5e_macsec *macsec; + struct list_head *list; + int err = 0; + + mutex_lock(&priv->macsec->lock); + + macsec = priv->macsec; + macsec_device = mlx5e_macsec_get_macsec_device_context(macsec, ctx); + if (!macsec_device) { + netdev_err(ctx->netdev, "MACsec offload: Failed to find device context\n"); + err = -EINVAL; + goto out; + } + + list = &macsec_device->macsec_rx_sc_list_head; + rx_sc = mlx5e_macsec_get_rx_sc_from_sc_list(list, sci); + if (!rx_sc) { + netdev_err(ctx->netdev, + "MACsec offload rx_sc sci %lld doesn't exist\n", + ctx->sa.rx_sa->sc->sci); + err = -EINVAL; + goto out; + } + + if (rx_sc->rx_sa[assoc_num]) { + netdev_err(ctx->netdev, + "MACsec offload rx_sc sci %lld rx_sa %d already exist\n", + sci, assoc_num); + err = -EEXIST; + goto out; + } + + rx_sa = kzalloc(sizeof(*rx_sa), GFP_KERNEL); + if (!rx_sa) { + err = -ENOMEM; + goto out; + } + + rx_sa->active = ctx_rx_sa->active; + rx_sa->next_pn = ctx_rx_sa->next_pn; + rx_sa->sci = sci; + rx_sa->assoc_num = assoc_num; + rx_sa->fs_id = rx_sc->sc_xarray_element->fs_id; + + if (ctx->secy->xpn) + update_macsec_epn(rx_sa, &ctx_rx_sa->key, &ctx_rx_sa->next_pn_halves, + ctx_rx_sa->ssci); + + err = mlx5_create_encryption_key(mdev, ctx->sa.key, ctx->secy->key_len, + MLX5_ACCEL_OBJ_MACSEC_KEY, + &rx_sa->enc_key_id); + if (err) + goto destroy_sa; + + rx_sc->rx_sa[assoc_num] = rx_sa; + if (!rx_sa->active) + goto out; + + //TODO - add support for both authentication and encryption flows + err = mlx5e_macsec_init_sa(ctx, rx_sa, true, false); + if (err) + goto destroy_encryption_key; + + goto out; + +destroy_encryption_key: + rx_sc->rx_sa[assoc_num] = NULL; + mlx5_destroy_encryption_key(mdev, rx_sa->enc_key_id); +destroy_sa: + kfree(rx_sa); +out: + mutex_unlock(&macsec->lock); + + return err; +} + +static int mlx5e_macsec_upd_rxsa(struct macsec_context *ctx) +{ + const struct macsec_rx_sa *ctx_rx_sa = ctx->sa.rx_sa; + struct mlx5e_priv *priv = netdev_priv(ctx->netdev); + struct mlx5e_macsec_device *macsec_device; + u8 assoc_num = ctx->sa.assoc_num; + struct mlx5e_macsec_rx_sc *rx_sc; + sci_t sci = ctx_rx_sa->sc->sci; + struct mlx5e_macsec_sa *rx_sa; + struct mlx5e_macsec *macsec; + struct list_head *list; + int err = 0; + + mutex_lock(&priv->macsec->lock); + + macsec = priv->macsec; + macsec_device = mlx5e_macsec_get_macsec_device_context(macsec, ctx); + if (!macsec_device) { + netdev_err(ctx->netdev, "MACsec offload: Failed to find device context\n"); + err = -EINVAL; + goto out; + } + + list = &macsec_device->macsec_rx_sc_list_head; + rx_sc = mlx5e_macsec_get_rx_sc_from_sc_list(list, sci); + if (!rx_sc) { + netdev_err(ctx->netdev, + "MACsec offload rx_sc sci %lld doesn't exist\n", + ctx->sa.rx_sa->sc->sci); + err = -EINVAL; + goto out; + } + + rx_sa = rx_sc->rx_sa[assoc_num]; + if (!rx_sa) { + netdev_err(ctx->netdev, + "MACsec offload rx_sc sci %lld rx_sa %d doesn't exist\n", + sci, assoc_num); + err = -EINVAL; + goto out; + } + + if (ctx->sa.update_pn) { + netdev_err(ctx->netdev, + "MACsec offload update RX sa %d PN isn't supported\n", + assoc_num); + err = -EINVAL; + goto out; + } + + err = macsec_rx_sa_active_update(ctx, rx_sa, ctx_rx_sa->active); +out: + mutex_unlock(&macsec->lock); + + return err; +} + +static int mlx5e_macsec_del_rxsa(struct macsec_context *ctx) +{ + struct mlx5e_priv *priv = netdev_priv(ctx->netdev); + struct mlx5e_macsec_device *macsec_device; + sci_t sci = ctx->sa.rx_sa->sc->sci; + struct mlx5e_macsec_rx_sc *rx_sc; + u8 assoc_num = ctx->sa.assoc_num; + struct mlx5e_macsec_sa *rx_sa; + struct mlx5e_macsec *macsec; + struct list_head *list; + int err = 0; + + mutex_lock(&priv->macsec->lock); + + macsec = priv->macsec; + macsec_device = mlx5e_macsec_get_macsec_device_context(macsec, ctx); + if (!macsec_device) { + netdev_err(ctx->netdev, "MACsec offload: Failed to find device context\n"); + err = -EINVAL; + goto out; + } + + list = &macsec_device->macsec_rx_sc_list_head; + rx_sc = mlx5e_macsec_get_rx_sc_from_sc_list(list, sci); + if (!rx_sc) { + netdev_err(ctx->netdev, + "MACsec offload rx_sc sci %lld doesn't exist\n", + ctx->sa.rx_sa->sc->sci); + err = -EINVAL; + goto out; + } + + rx_sa = rx_sc->rx_sa[assoc_num]; + if (!rx_sa) { + netdev_err(ctx->netdev, + "MACsec offload rx_sc sci %lld rx_sa %d doesn't exist\n", + sci, assoc_num); + err = -EINVAL; + goto out; + } + + mlx5e_macsec_cleanup_sa(macsec, rx_sa, false); + mlx5_destroy_encryption_key(macsec->mdev, rx_sa->enc_key_id); + kfree(rx_sa); + rx_sc->rx_sa[assoc_num] = NULL; + +out: + mutex_unlock(&macsec->lock); + + return err; +} + +static int mlx5e_macsec_add_secy(struct macsec_context *ctx) +{ + struct mlx5e_priv *priv = netdev_priv(ctx->netdev); + const struct net_device *dev = ctx->secy->netdev; + const struct net_device *netdev = ctx->netdev; + struct mlx5e_macsec_device *macsec_device; + struct mlx5e_macsec *macsec; + int err = 0; + + if (!mlx5e_macsec_secy_features_validate(ctx)) + return -EINVAL; + + mutex_lock(&priv->macsec->lock); + macsec = priv->macsec; + if (mlx5e_macsec_get_macsec_device_context(macsec, ctx)) { + netdev_err(netdev, "MACsec offload: MACsec net_device already exist\n"); + goto out; + } + + if (macsec->num_of_devices >= MLX5_MACSEC_NUM_OF_SUPPORTED_INTERFACES) { + netdev_err(netdev, "Currently, only %d MACsec offload devices can be set\n", + MLX5_MACSEC_NUM_OF_SUPPORTED_INTERFACES); + err = -EBUSY; + goto out; + } + + macsec_device = kzalloc(sizeof(*macsec_device), GFP_KERNEL); + if (!macsec_device) { + err = -ENOMEM; + goto out; + } + + macsec_device->dev_addr = kmemdup(dev->dev_addr, dev->addr_len, GFP_KERNEL); + if (!macsec_device->dev_addr) { + kfree(macsec_device); + err = -ENOMEM; + goto out; + } + + macsec_device->netdev = dev; + + INIT_LIST_HEAD_RCU(&macsec_device->macsec_rx_sc_list_head); + list_add_rcu(&macsec_device->macsec_device_list_element, &macsec->macsec_device_list_head); + + ++macsec->num_of_devices; +out: + mutex_unlock(&macsec->lock); + + return err; +} + +static int macsec_upd_secy_hw_address(struct macsec_context *ctx, + struct mlx5e_macsec_device *macsec_device) +{ + struct mlx5e_priv *priv = netdev_priv(ctx->netdev); + const struct net_device *dev = ctx->secy->netdev; + struct mlx5e_macsec *macsec = priv->macsec; + struct mlx5e_macsec_rx_sc *rx_sc, *tmp; + struct mlx5e_macsec_sa *rx_sa; + struct list_head *list; + int i, err = 0; + + + list = &macsec_device->macsec_rx_sc_list_head; + list_for_each_entry_safe(rx_sc, tmp, list, rx_sc_list_element) { + for (i = 0; i < MACSEC_NUM_AN; ++i) { + rx_sa = rx_sc->rx_sa[i]; + if (!rx_sa || !rx_sa->macsec_rule) + continue; + + mlx5e_macsec_cleanup_sa(macsec, rx_sa, false); + } + } + + list_for_each_entry_safe(rx_sc, tmp, list, rx_sc_list_element) { + for (i = 0; i < MACSEC_NUM_AN; ++i) { + rx_sa = rx_sc->rx_sa[i]; + if (!rx_sa) + continue; + + if (rx_sa->active) { + err = mlx5e_macsec_init_sa(ctx, rx_sa, true, false); + if (err) + goto out; + } + } + } + + memcpy(macsec_device->dev_addr, dev->dev_addr, dev->addr_len); +out: + return err; +} + +/* this function is called from 2 macsec ops functions: + * macsec_set_mac_address – MAC address was changed, therefore we need to destroy + * and create new Tx contexts(macsec object + steering). + * macsec_changelink – in this case the tx SC or SecY may be changed, therefore need to + * destroy Tx and Rx contexts(macsec object + steering) + */ +static int mlx5e_macsec_upd_secy(struct macsec_context *ctx) +{ + const struct macsec_tx_sc *tx_sc = &ctx->secy->tx_sc; + struct mlx5e_priv *priv = netdev_priv(ctx->netdev); + const struct net_device *dev = ctx->secy->netdev; + struct mlx5e_macsec_device *macsec_device; + struct mlx5e_macsec_sa *tx_sa; + struct mlx5e_macsec *macsec; + int i, err = 0; + + if (!mlx5e_macsec_secy_features_validate(ctx)) + return -EINVAL; + + mutex_lock(&priv->macsec->lock); + + macsec = priv->macsec; + macsec_device = mlx5e_macsec_get_macsec_device_context(macsec, ctx); + if (!macsec_device) { + netdev_err(ctx->netdev, "MACsec offload: Failed to find device context\n"); + err = -EINVAL; + goto out; + } + + /* if the dev_addr hasn't change, it mean the callback is from macsec_changelink */ + if (!memcmp(macsec_device->dev_addr, dev->dev_addr, dev->addr_len)) { + err = macsec_upd_secy_hw_address(ctx, macsec_device); + if (err) + goto out; + } + + for (i = 0; i < MACSEC_NUM_AN; ++i) { + tx_sa = macsec_device->tx_sa[i]; + if (!tx_sa) + continue; + + mlx5e_macsec_cleanup_sa(macsec, tx_sa, true); + } + + for (i = 0; i < MACSEC_NUM_AN; ++i) { + tx_sa = macsec_device->tx_sa[i]; + if (!tx_sa) + continue; + + if (tx_sa->assoc_num == tx_sc->encoding_sa && tx_sa->active) { + err = mlx5e_macsec_init_sa(ctx, tx_sa, tx_sc->encrypt, true); + if (err) + goto out; + } + } + +out: + mutex_unlock(&macsec->lock); + + return err; +} + +static int mlx5e_macsec_del_secy(struct macsec_context *ctx) +{ + struct mlx5e_priv *priv = netdev_priv(ctx->netdev); + struct mlx5e_macsec_device *macsec_device; + struct mlx5e_macsec_rx_sc *rx_sc, *tmp; + struct mlx5e_macsec_sa *tx_sa; + struct mlx5e_macsec *macsec; + struct list_head *list; + int err = 0; + int i; + + mutex_lock(&priv->macsec->lock); + macsec = priv->macsec; + macsec_device = mlx5e_macsec_get_macsec_device_context(macsec, ctx); + if (!macsec_device) { + netdev_err(ctx->netdev, "MACsec offload: Failed to find device context\n"); + err = -EINVAL; + + goto out; + } + + for (i = 0; i < MACSEC_NUM_AN; ++i) { + tx_sa = macsec_device->tx_sa[i]; + if (!tx_sa) + continue; + + mlx5e_macsec_cleanup_sa(macsec, tx_sa, true); + mlx5_destroy_encryption_key(macsec->mdev, tx_sa->enc_key_id); + kfree(tx_sa); + macsec_device->tx_sa[i] = NULL; + } + + list = &macsec_device->macsec_rx_sc_list_head; + list_for_each_entry_safe(rx_sc, tmp, list, rx_sc_list_element) + macsec_del_rxsc_ctx(macsec, rx_sc); + + kfree(macsec_device->dev_addr); + macsec_device->dev_addr = NULL; + + list_del_rcu(&macsec_device->macsec_device_list_element); + --macsec->num_of_devices; + kfree(macsec_device); + +out: + mutex_unlock(&macsec->lock); + + return err; +} + +static void macsec_build_accel_attrs(struct mlx5e_macsec_sa *sa, + struct mlx5_macsec_obj_attrs *attrs) +{ + attrs->epn_state.epn_msb = sa->epn_state.epn_msb; + attrs->epn_state.overlap = sa->epn_state.overlap; +} + +static void macsec_aso_build_wqe_ctrl_seg(struct mlx5e_macsec_aso *macsec_aso, + struct mlx5_wqe_aso_ctrl_seg *aso_ctrl, + struct mlx5_aso_ctrl_param *param) +{ + memset(aso_ctrl, 0, sizeof(*aso_ctrl)); + if (macsec_aso->umr->dma_addr) { + aso_ctrl->va_l = cpu_to_be32(macsec_aso->umr->dma_addr | ASO_CTRL_READ_EN); + aso_ctrl->va_h = cpu_to_be32((u64)macsec_aso->umr->dma_addr >> 32); + aso_ctrl->l_key = cpu_to_be32(macsec_aso->umr->mkey); + } + + if (!param) + return; + + aso_ctrl->data_mask_mode = param->data_mask_mode << 6; + aso_ctrl->condition_1_0_operand = param->condition_1_operand | + param->condition_0_operand << 4; + aso_ctrl->condition_1_0_offset = param->condition_1_offset | + param->condition_0_offset << 4; + aso_ctrl->data_offset_condition_operand = param->data_offset | + param->condition_operand << 6; + aso_ctrl->condition_0_data = cpu_to_be32(param->condition_0_data); + aso_ctrl->condition_0_mask = cpu_to_be32(param->condition_0_mask); + aso_ctrl->condition_1_data = cpu_to_be32(param->condition_1_data); + aso_ctrl->condition_1_mask = cpu_to_be32(param->condition_1_mask); + aso_ctrl->bitwise_data = cpu_to_be64(param->bitwise_data); + aso_ctrl->data_mask = cpu_to_be64(param->data_mask); +} + +static int mlx5e_macsec_modify_obj(struct mlx5_core_dev *mdev, struct mlx5_macsec_obj_attrs *attrs, + u32 macsec_id) +{ + u32 in[MLX5_ST_SZ_DW(modify_macsec_obj_in)] = {}; + u32 out[MLX5_ST_SZ_DW(query_macsec_obj_out)]; + u64 modify_field_select = 0; + void *obj; + int err; + + /* General object fields set */ + MLX5_SET(general_obj_in_cmd_hdr, in, opcode, MLX5_CMD_OP_QUERY_GENERAL_OBJECT); + MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, MLX5_GENERAL_OBJECT_TYPES_MACSEC); + MLX5_SET(general_obj_in_cmd_hdr, in, obj_id, macsec_id); + err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); + if (err) { + mlx5_core_err(mdev, "Query MACsec object failed (Object id %d), err = %d\n", + macsec_id, err); + return err; + } + + obj = MLX5_ADDR_OF(query_macsec_obj_out, out, macsec_object); + modify_field_select = MLX5_GET64(macsec_offload_obj, obj, modify_field_select); + + /* EPN */ + if (!(modify_field_select & MLX5_MODIFY_MACSEC_BITMASK_EPN_OVERLAP) || + !(modify_field_select & MLX5_MODIFY_MACSEC_BITMASK_EPN_MSB)) { + mlx5_core_dbg(mdev, "MACsec object field is not modifiable (Object id %d)\n", + macsec_id); + return -EOPNOTSUPP; + } + + obj = MLX5_ADDR_OF(modify_macsec_obj_in, in, macsec_object); + MLX5_SET64(macsec_offload_obj, obj, modify_field_select, + MLX5_MODIFY_MACSEC_BITMASK_EPN_OVERLAP | MLX5_MODIFY_MACSEC_BITMASK_EPN_MSB); + MLX5_SET(macsec_offload_obj, obj, epn_msb, attrs->epn_state.epn_msb); + MLX5_SET(macsec_offload_obj, obj, epn_overlap, attrs->epn_state.overlap); + + /* General object fields set */ + MLX5_SET(general_obj_in_cmd_hdr, in, opcode, MLX5_CMD_OP_MODIFY_GENERAL_OBJECT); + + return mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); +} + +static void macsec_aso_build_ctrl(struct mlx5e_macsec_aso *aso, + struct mlx5_wqe_aso_ctrl_seg *aso_ctrl, + struct mlx5e_macsec_aso_in *in) +{ + struct mlx5_aso_ctrl_param param = {}; + + param.data_mask_mode = MLX5_ASO_DATA_MASK_MODE_BITWISE_64BIT; + param.condition_0_operand = MLX5_ASO_ALWAYS_TRUE; + param.condition_1_operand = MLX5_ASO_ALWAYS_TRUE; + if (in->mode == MLX5_MACSEC_EPN) { + param.data_offset = MLX5_MACSEC_ASO_REMOVE_FLOW_PKT_CNT_OFFSET; + param.bitwise_data = BIT_ULL(54); + param.data_mask = param.bitwise_data; + } + macsec_aso_build_wqe_ctrl_seg(aso, aso_ctrl, ¶m); +} + +static int macsec_aso_set_arm_event(struct mlx5_core_dev *mdev, struct mlx5e_macsec *macsec, + struct mlx5e_macsec_aso_in *in) +{ + struct mlx5e_macsec_aso *aso; + struct mlx5_aso_wqe *aso_wqe; + struct mlx5_aso *maso; + int err; + + aso = &macsec->aso; + maso = aso->maso; + + mutex_lock(&aso->aso_lock); + aso_wqe = mlx5_aso_get_wqe(maso); + mlx5_aso_build_wqe(maso, MLX5_MACSEC_ASO_DS_CNT, aso_wqe, in->obj_id, + MLX5_ACCESS_ASO_OPC_MOD_MACSEC); + macsec_aso_build_ctrl(aso, &aso_wqe->aso_ctrl, in); + mlx5_aso_post_wqe(maso, false, &aso_wqe->ctrl); + err = mlx5_aso_poll_cq(maso, false); + mutex_unlock(&aso->aso_lock); + + return err; +} + +static int macsec_aso_query(struct mlx5_core_dev *mdev, struct mlx5e_macsec *macsec, + struct mlx5e_macsec_aso_in *in, struct mlx5e_macsec_aso_out *out) +{ + struct mlx5e_macsec_aso *aso; + struct mlx5_aso_wqe *aso_wqe; + struct mlx5_aso *maso; + unsigned long expires; + int err; + + aso = &macsec->aso; + maso = aso->maso; + + mutex_lock(&aso->aso_lock); + + aso_wqe = mlx5_aso_get_wqe(maso); + mlx5_aso_build_wqe(maso, MLX5_MACSEC_ASO_DS_CNT, aso_wqe, in->obj_id, + MLX5_ACCESS_ASO_OPC_MOD_MACSEC); + macsec_aso_build_wqe_ctrl_seg(aso, &aso_wqe->aso_ctrl, NULL); + + mlx5_aso_post_wqe(maso, false, &aso_wqe->ctrl); + expires = jiffies + msecs_to_jiffies(10); + do { + err = mlx5_aso_poll_cq(maso, false); + if (err) + usleep_range(2, 10); + } while (err && time_is_after_jiffies(expires)); + + if (err) + goto err_out; + + if (MLX5_GET(macsec_aso, aso->umr->ctx, epn_event_arm)) + out->event_arm |= MLX5E_ASO_EPN_ARM; + + out->mode_param = MLX5_GET(macsec_aso, aso->umr->ctx, mode_parameter); + +err_out: + mutex_unlock(&aso->aso_lock); + return err; +} + +static struct mlx5e_macsec_sa *get_macsec_tx_sa_from_obj_id(const struct mlx5e_macsec *macsec, + const u32 obj_id) +{ + const struct list_head *device_list; + struct mlx5e_macsec_sa *macsec_sa; + struct mlx5e_macsec_device *iter; + int i; + + device_list = &macsec->macsec_device_list_head; + + list_for_each_entry(iter, device_list, macsec_device_list_element) { + for (i = 0; i < MACSEC_NUM_AN; ++i) { + macsec_sa = iter->tx_sa[i]; + if (!macsec_sa || !macsec_sa->active) + continue; + if (macsec_sa->macsec_obj_id == obj_id) + return macsec_sa; + } + } + + return NULL; +} + +static struct mlx5e_macsec_sa *get_macsec_rx_sa_from_obj_id(const struct mlx5e_macsec *macsec, + const u32 obj_id) +{ + const struct list_head *device_list, *sc_list; + struct mlx5e_macsec_rx_sc *mlx5e_rx_sc; + struct mlx5e_macsec_sa *macsec_sa; + struct mlx5e_macsec_device *iter; + int i; + + device_list = &macsec->macsec_device_list_head; + + list_for_each_entry(iter, device_list, macsec_device_list_element) { + sc_list = &iter->macsec_rx_sc_list_head; + list_for_each_entry(mlx5e_rx_sc, sc_list, rx_sc_list_element) { + for (i = 0; i < MACSEC_NUM_AN; ++i) { + macsec_sa = mlx5e_rx_sc->rx_sa[i]; + if (!macsec_sa || !macsec_sa->active) + continue; + if (macsec_sa->macsec_obj_id == obj_id) + return macsec_sa; + } + } + } + + return NULL; +} + +static void macsec_epn_update(struct mlx5e_macsec *macsec, struct mlx5_core_dev *mdev, + struct mlx5e_macsec_sa *sa, u32 obj_id, u32 mode_param) +{ + struct mlx5_macsec_obj_attrs attrs = {}; + struct mlx5e_macsec_aso_in in = {}; + + /* When the bottom of the replay protection window (mode_param) crosses 2^31 (half sequence + * number wraparound) hence mode_param > MLX5_MACSEC_EPN_SCOPE_MID the SW should update the + * esn_overlap to OLD (1). + * When the bottom of the replay protection window (mode_param) crosses 2^32 (full sequence + * number wraparound) hence mode_param < MLX5_MACSEC_EPN_SCOPE_MID since it did a + * wraparound, the SW should update the esn_overlap to NEW (0), and increment the esn_msb. + */ + + if (mode_param < MLX5_MACSEC_EPN_SCOPE_MID) { + sa->epn_state.epn_msb++; + sa->epn_state.overlap = 0; + } else { + sa->epn_state.overlap = 1; + } + + macsec_build_accel_attrs(sa, &attrs); + mlx5e_macsec_modify_obj(mdev, &attrs, obj_id); + + /* Re-set EPN arm event */ + in.obj_id = obj_id; + in.mode = MLX5_MACSEC_EPN; + macsec_aso_set_arm_event(mdev, macsec, &in); +} + +static void macsec_async_event(struct work_struct *work) +{ + struct mlx5e_macsec_async_work *async_work; + struct mlx5e_macsec_aso_out out = {}; + struct mlx5e_macsec_aso_in in = {}; + struct mlx5e_macsec_sa *macsec_sa; + struct mlx5e_macsec *macsec; + struct mlx5_core_dev *mdev; + u32 obj_id; + + async_work = container_of(work, struct mlx5e_macsec_async_work, work); + macsec = async_work->macsec; + mutex_lock(&macsec->lock); + + mdev = async_work->mdev; + obj_id = async_work->obj_id; + macsec_sa = get_macsec_tx_sa_from_obj_id(macsec, obj_id); + if (!macsec_sa) { + macsec_sa = get_macsec_rx_sa_from_obj_id(macsec, obj_id); + if (!macsec_sa) { + mlx5_core_dbg(mdev, "MACsec SA is not found (SA object id %d)\n", obj_id); + goto out_async_work; + } + } + + /* Query MACsec ASO context */ + in.obj_id = obj_id; + macsec_aso_query(mdev, macsec, &in, &out); + + /* EPN case */ + if (macsec_sa->epn_state.epn_enabled && !(out.event_arm & MLX5E_ASO_EPN_ARM)) + macsec_epn_update(macsec, mdev, macsec_sa, obj_id, out.mode_param); + +out_async_work: + kfree(async_work); + mutex_unlock(&macsec->lock); +} + +static int macsec_obj_change_event(struct notifier_block *nb, unsigned long event, void *data) +{ + struct mlx5e_macsec *macsec = container_of(nb, struct mlx5e_macsec, nb); + struct mlx5e_macsec_async_work *async_work; + struct mlx5_eqe_obj_change *obj_change; + struct mlx5_eqe *eqe = data; + u16 obj_type; + u32 obj_id; + + if (event != MLX5_EVENT_TYPE_OBJECT_CHANGE) + return NOTIFY_DONE; + + obj_change = &eqe->data.obj_change; + obj_type = be16_to_cpu(obj_change->obj_type); + obj_id = be32_to_cpu(obj_change->obj_id); + + if (obj_type != MLX5_GENERAL_OBJECT_TYPES_MACSEC) + return NOTIFY_DONE; + + async_work = kzalloc(sizeof(*async_work), GFP_ATOMIC); + if (!async_work) + return NOTIFY_DONE; + + async_work->macsec = macsec; + async_work->mdev = macsec->mdev; + async_work->obj_id = obj_id; + + INIT_WORK(&async_work->work, macsec_async_event); + + WARN_ON(!queue_work(macsec->wq, &async_work->work)); + + return NOTIFY_OK; +} + +static int mlx5e_macsec_aso_init(struct mlx5e_macsec_aso *aso, struct mlx5_core_dev *mdev) +{ + struct mlx5_aso *maso; + int err; + + err = mlx5_core_alloc_pd(mdev, &aso->pdn); + if (err) { + mlx5_core_err(mdev, + "MACsec offload: Failed to alloc pd for MACsec ASO, err=%d\n", + err); + return err; + } + + maso = mlx5_aso_create(mdev, aso->pdn); + if (IS_ERR(maso)) { + err = PTR_ERR(maso); + goto err_aso; + } + + err = mlx5e_macsec_aso_reg_mr(mdev, aso); + if (err) + goto err_aso_reg; + + mutex_init(&aso->aso_lock); + + aso->maso = maso; + + return 0; + +err_aso_reg: + mlx5_aso_destroy(maso); +err_aso: + mlx5_core_dealloc_pd(mdev, aso->pdn); + return err; +} + +static void mlx5e_macsec_aso_cleanup(struct mlx5e_macsec_aso *aso, struct mlx5_core_dev *mdev) +{ + if (!aso) + return; + + mlx5e_macsec_aso_dereg_mr(mdev, aso); + + mlx5_aso_destroy(aso->maso); + + mlx5_core_dealloc_pd(mdev, aso->pdn); +} + +bool mlx5e_is_macsec_device(const struct mlx5_core_dev *mdev) +{ + if (!(MLX5_CAP_GEN_64(mdev, general_obj_types) & + MLX5_GENERAL_OBJ_TYPES_CAP_MACSEC_OFFLOAD)) + return false; + + if (!MLX5_CAP_GEN(mdev, log_max_dek)) + return false; + + if (!MLX5_CAP_MACSEC(mdev, log_max_macsec_offload)) + return false; + + if (!MLX5_CAP_FLOWTABLE_NIC_RX(mdev, macsec_decrypt) || + !MLX5_CAP_FLOWTABLE_NIC_RX(mdev, reformat_remove_macsec)) + return false; + + if (!MLX5_CAP_FLOWTABLE_NIC_TX(mdev, macsec_encrypt) || + !MLX5_CAP_FLOWTABLE_NIC_TX(mdev, reformat_add_macsec)) + return false; + + if (!MLX5_CAP_MACSEC(mdev, macsec_crypto_esp_aes_gcm_128_encrypt) && + !MLX5_CAP_MACSEC(mdev, macsec_crypto_esp_aes_gcm_256_encrypt)) + return false; + + if (!MLX5_CAP_MACSEC(mdev, macsec_crypto_esp_aes_gcm_128_decrypt) && + !MLX5_CAP_MACSEC(mdev, macsec_crypto_esp_aes_gcm_256_decrypt)) + return false; + + return true; +} + +void mlx5e_macsec_get_stats_fill(struct mlx5e_macsec *macsec, void *macsec_stats) +{ + mlx5e_macsec_fs_get_stats_fill(macsec->macsec_fs, macsec_stats); +} + +struct mlx5e_macsec_stats *mlx5e_macsec_get_stats(struct mlx5e_macsec *macsec) +{ + if (!macsec) + return NULL; + + return &macsec->stats; +} + +static const struct macsec_ops macsec_offload_ops = { + .mdo_add_txsa = mlx5e_macsec_add_txsa, + .mdo_upd_txsa = mlx5e_macsec_upd_txsa, + .mdo_del_txsa = mlx5e_macsec_del_txsa, + .mdo_add_rxsc = mlx5e_macsec_add_rxsc, + .mdo_upd_rxsc = mlx5e_macsec_upd_rxsc, + .mdo_del_rxsc = mlx5e_macsec_del_rxsc, + .mdo_add_rxsa = mlx5e_macsec_add_rxsa, + .mdo_upd_rxsa = mlx5e_macsec_upd_rxsa, + .mdo_del_rxsa = mlx5e_macsec_del_rxsa, + .mdo_add_secy = mlx5e_macsec_add_secy, + .mdo_upd_secy = mlx5e_macsec_upd_secy, + .mdo_del_secy = mlx5e_macsec_del_secy, +}; + +bool mlx5e_macsec_handle_tx_skb(struct mlx5e_macsec *macsec, struct sk_buff *skb) +{ + struct metadata_dst *md_dst = skb_metadata_dst(skb); + u32 fs_id; + + fs_id = mlx5e_macsec_get_sa_from_hashtable(&macsec->sci_hash, &md_dst->u.macsec_info.sci); + if (!fs_id) + goto err_out; + + return true; + +err_out: + dev_kfree_skb_any(skb); + return false; +} + +void mlx5e_macsec_tx_build_eseg(struct mlx5e_macsec *macsec, + struct sk_buff *skb, + struct mlx5_wqe_eth_seg *eseg) +{ + struct metadata_dst *md_dst = skb_metadata_dst(skb); + u32 fs_id; + + fs_id = mlx5e_macsec_get_sa_from_hashtable(&macsec->sci_hash, &md_dst->u.macsec_info.sci); + if (!fs_id) + return; + + eseg->flow_table_metadata = cpu_to_be32(MLX5_ETH_WQE_FT_META_MACSEC | fs_id << 2); +} + +void mlx5e_macsec_offload_handle_rx_skb(struct net_device *netdev, + struct sk_buff *skb, + struct mlx5_cqe64 *cqe) +{ + struct mlx5e_macsec_rx_sc_xarray_element *sc_xarray_element; + u32 macsec_meta_data = be32_to_cpu(cqe->ft_metadata); + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5e_macsec_rx_sc *rx_sc; + struct mlx5e_macsec *macsec; + u32 fs_id; + + macsec = priv->macsec; + if (!macsec) + return; + + fs_id = MLX5_MACSEC_RX_METADAT_HANDLE(macsec_meta_data); + + rcu_read_lock(); + sc_xarray_element = xa_load(&macsec->sc_xarray, fs_id); + rx_sc = sc_xarray_element->rx_sc; + if (rx_sc) { + dst_hold(&rx_sc->md_dst->dst); + skb_dst_set(skb, &rx_sc->md_dst->dst); + } + + rcu_read_unlock(); +} + +void mlx5e_macsec_build_netdev(struct mlx5e_priv *priv) +{ + struct net_device *netdev = priv->netdev; + + if (!mlx5e_is_macsec_device(priv->mdev)) + return; + + /* Enable MACsec */ + mlx5_core_dbg(priv->mdev, "mlx5e: MACsec acceleration enabled\n"); + netdev->macsec_ops = &macsec_offload_ops; + netdev->features |= NETIF_F_HW_MACSEC; + netif_keep_dst(netdev); +} + +int mlx5e_macsec_init(struct mlx5e_priv *priv) +{ + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5e_macsec *macsec = NULL; + struct mlx5e_macsec_fs *macsec_fs; + int err; + + if (!mlx5e_is_macsec_device(priv->mdev)) { + mlx5_core_dbg(mdev, "Not a MACsec offload device\n"); + return 0; + } + + macsec = kzalloc(sizeof(*macsec), GFP_KERNEL); + if (!macsec) + return -ENOMEM; + + INIT_LIST_HEAD(&macsec->macsec_device_list_head); + mutex_init(&macsec->lock); + + err = rhashtable_init(&macsec->sci_hash, &rhash_sci); + if (err) { + mlx5_core_err(mdev, "MACsec offload: Failed to init SCI hash table, err=%d\n", + err); + goto err_hash; + } + + err = mlx5e_macsec_aso_init(&macsec->aso, priv->mdev); + if (err) { + mlx5_core_err(mdev, "MACsec offload: Failed to init aso, err=%d\n", err); + goto err_aso; + } + + macsec->wq = alloc_ordered_workqueue("mlx5e_macsec_%s", 0, priv->netdev->name); + if (!macsec->wq) { + err = -ENOMEM; + goto err_wq; + } + + xa_init_flags(&macsec->sc_xarray, XA_FLAGS_ALLOC1); + + priv->macsec = macsec; + + macsec->mdev = mdev; + + macsec_fs = mlx5e_macsec_fs_init(mdev, priv->netdev); + if (!macsec_fs) { + err = -ENOMEM; + goto err_out; + } + + macsec->macsec_fs = macsec_fs; + + macsec->nb.notifier_call = macsec_obj_change_event; + mlx5_notifier_register(mdev, &macsec->nb); + + mlx5_core_dbg(mdev, "MACsec attached to netdevice\n"); + + return 0; + +err_out: + destroy_workqueue(macsec->wq); +err_wq: + mlx5e_macsec_aso_cleanup(&macsec->aso, priv->mdev); +err_aso: + rhashtable_destroy(&macsec->sci_hash); +err_hash: + kfree(macsec); + priv->macsec = NULL; + return err; +} + +void mlx5e_macsec_cleanup(struct mlx5e_priv *priv) +{ + struct mlx5e_macsec *macsec = priv->macsec; + struct mlx5_core_dev *mdev = priv->mdev; + + if (!macsec) + return; + + mlx5_notifier_unregister(mdev, &macsec->nb); + mlx5e_macsec_fs_cleanup(macsec->macsec_fs); + destroy_workqueue(macsec->wq); + mlx5e_macsec_aso_cleanup(&macsec->aso, mdev); + rhashtable_destroy(&macsec->sci_hash); + mutex_destroy(&macsec->lock); + kfree(macsec); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.h new file mode 100644 index 000000000..347380a2c --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.h @@ -0,0 +1,73 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#ifndef __MLX5_EN_ACCEL_MACSEC_H__ +#define __MLX5_EN_ACCEL_MACSEC_H__ + +#ifdef CONFIG_MLX5_EN_MACSEC + +#include <linux/mlx5/driver.h> +#include <net/macsec.h> +#include <net/dst_metadata.h> + +/* Bit31 - 30: MACsec marker, Bit15-0: MACsec id */ +#define MLX5_MACEC_RX_FS_ID_MAX USHRT_MAX /* Must be power of two */ +#define MLX5_MACSEC_RX_FS_ID_MASK MLX5_MACEC_RX_FS_ID_MAX +#define MLX5_MACSEC_METADATA_MARKER(metadata) ((((metadata) >> 30) & 0x3) == 0x1) +#define MLX5_MACSEC_RX_METADAT_HANDLE(metadata) ((metadata) & MLX5_MACSEC_RX_FS_ID_MASK) + +struct mlx5e_priv; +struct mlx5e_macsec; + +struct mlx5e_macsec_stats { + u64 macsec_rx_pkts; + u64 macsec_rx_bytes; + u64 macsec_rx_pkts_drop; + u64 macsec_rx_bytes_drop; + u64 macsec_tx_pkts; + u64 macsec_tx_bytes; + u64 macsec_tx_pkts_drop; + u64 macsec_tx_bytes_drop; +}; + +void mlx5e_macsec_build_netdev(struct mlx5e_priv *priv); +int mlx5e_macsec_init(struct mlx5e_priv *priv); +void mlx5e_macsec_cleanup(struct mlx5e_priv *priv); +bool mlx5e_macsec_handle_tx_skb(struct mlx5e_macsec *macsec, struct sk_buff *skb); +void mlx5e_macsec_tx_build_eseg(struct mlx5e_macsec *macsec, + struct sk_buff *skb, + struct mlx5_wqe_eth_seg *eseg); + +static inline bool mlx5e_macsec_skb_is_offload(struct sk_buff *skb) +{ + struct metadata_dst *md_dst = skb_metadata_dst(skb); + + return md_dst && (md_dst->type == METADATA_MACSEC); +} + +static inline bool mlx5e_macsec_is_rx_flow(struct mlx5_cqe64 *cqe) +{ + return MLX5_MACSEC_METADATA_MARKER(be32_to_cpu(cqe->ft_metadata)); +} + +void mlx5e_macsec_offload_handle_rx_skb(struct net_device *netdev, struct sk_buff *skb, + struct mlx5_cqe64 *cqe); +bool mlx5e_is_macsec_device(const struct mlx5_core_dev *mdev); +void mlx5e_macsec_get_stats_fill(struct mlx5e_macsec *macsec, void *macsec_stats); +struct mlx5e_macsec_stats *mlx5e_macsec_get_stats(struct mlx5e_macsec *macsec); + +#else + +static inline void mlx5e_macsec_build_netdev(struct mlx5e_priv *priv) {} +static inline int mlx5e_macsec_init(struct mlx5e_priv *priv) { return 0; } +static inline void mlx5e_macsec_cleanup(struct mlx5e_priv *priv) {} +static inline bool mlx5e_macsec_skb_is_offload(struct sk_buff *skb) { return false; } +static inline bool mlx5e_macsec_is_rx_flow(struct mlx5_cqe64 *cqe) { return false; } +static inline void mlx5e_macsec_offload_handle_rx_skb(struct net_device *netdev, + struct sk_buff *skb, + struct mlx5_cqe64 *cqe) +{} +static inline bool mlx5e_is_macsec_device(const struct mlx5_core_dev *mdev) { return false; } +#endif /* CONFIG_MLX5_EN_MACSEC */ + +#endif /* __MLX5_ACCEL_EN_MACSEC_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec_fs.c new file mode 100644 index 000000000..6ecf0bf23 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec_fs.c @@ -0,0 +1,1390 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#include <net/macsec.h> +#include <linux/netdevice.h> +#include <linux/mlx5/qp.h> +#include "fs_core.h" +#include "en/fs.h" +#include "en_accel/macsec_fs.h" +#include "mlx5_core.h" + +/* MACsec TX flow steering */ +#define CRYPTO_NUM_MAXSEC_FTE BIT(15) +#define CRYPTO_TABLE_DEFAULT_RULE_GROUP_SIZE 1 + +#define TX_CRYPTO_TABLE_LEVEL 0 +#define TX_CRYPTO_TABLE_NUM_GROUPS 3 +#define TX_CRYPTO_TABLE_MKE_GROUP_SIZE 1 +#define TX_CRYPTO_TABLE_SA_GROUP_SIZE \ + (CRYPTO_NUM_MAXSEC_FTE - (TX_CRYPTO_TABLE_MKE_GROUP_SIZE + \ + CRYPTO_TABLE_DEFAULT_RULE_GROUP_SIZE)) +#define TX_CHECK_TABLE_LEVEL 1 +#define TX_CHECK_TABLE_NUM_FTE 2 +#define RX_CRYPTO_TABLE_LEVEL 0 +#define RX_CHECK_TABLE_LEVEL 1 +#define RX_CHECK_TABLE_NUM_FTE 3 +#define RX_CRYPTO_TABLE_NUM_GROUPS 3 +#define RX_CRYPTO_TABLE_SA_RULE_WITH_SCI_GROUP_SIZE \ + ((CRYPTO_NUM_MAXSEC_FTE - CRYPTO_TABLE_DEFAULT_RULE_GROUP_SIZE) / 2) +#define RX_CRYPTO_TABLE_SA_RULE_WITHOUT_SCI_GROUP_SIZE \ + (CRYPTO_NUM_MAXSEC_FTE - RX_CRYPTO_TABLE_SA_RULE_WITH_SCI_GROUP_SIZE) +#define RX_NUM_OF_RULES_PER_SA 2 + +#define MLX5_MACSEC_TAG_LEN 8 /* SecTAG length with ethertype and without the optional SCI */ +#define MLX5_MACSEC_SECTAG_TCI_AN_FIELD_BITMASK 0x23 +#define MLX5_MACSEC_SECTAG_TCI_AN_FIELD_OFFSET 0x8 +#define MLX5_MACSEC_SECTAG_TCI_SC_FIELD_OFFSET 0x5 +#define MLX5_MACSEC_SECTAG_TCI_SC_FIELD_BIT (0x1 << MLX5_MACSEC_SECTAG_TCI_SC_FIELD_OFFSET) +#define MLX5_SECTAG_HEADER_SIZE_WITHOUT_SCI 0x8 +#define MLX5_SECTAG_HEADER_SIZE_WITH_SCI (MLX5_SECTAG_HEADER_SIZE_WITHOUT_SCI + MACSEC_SCI_LEN) + +/* MACsec RX flow steering */ +#define MLX5_ETH_WQE_FT_META_MACSEC_MASK 0x3E + +struct mlx5_sectag_header { + __be16 ethertype; + u8 tci_an; + u8 sl; + u32 pn; + u8 sci[MACSEC_SCI_LEN]; /* optional */ +} __packed; + +struct mlx5e_macsec_tx_rule { + struct mlx5_flow_handle *rule; + struct mlx5_pkt_reformat *pkt_reformat; + u32 fs_id; +}; + +struct mlx5e_macsec_tables { + struct mlx5e_flow_table ft_crypto; + struct mlx5_flow_handle *crypto_miss_rule; + + struct mlx5_flow_table *ft_check; + struct mlx5_flow_group *ft_check_group; + struct mlx5_fc *check_miss_rule_counter; + struct mlx5_flow_handle *check_miss_rule; + struct mlx5_fc *check_rule_counter; + + u32 refcnt; +}; + +struct mlx5e_macsec_tx { + struct mlx5_flow_handle *crypto_mke_rule; + struct mlx5_flow_handle *check_rule; + + struct ida tx_halloc; + + struct mlx5e_macsec_tables tables; +}; + +struct mlx5e_macsec_rx_rule { + struct mlx5_flow_handle *rule[RX_NUM_OF_RULES_PER_SA]; + struct mlx5_modify_hdr *meta_modhdr; +}; + +struct mlx5e_macsec_rx { + struct mlx5_flow_handle *check_rule[2]; + struct mlx5_pkt_reformat *check_rule_pkt_reformat[2]; + + struct mlx5e_macsec_tables tables; +}; + +union mlx5e_macsec_rule { + struct mlx5e_macsec_tx_rule tx_rule; + struct mlx5e_macsec_rx_rule rx_rule; +}; + +struct mlx5e_macsec_fs { + struct mlx5_core_dev *mdev; + struct net_device *netdev; + struct mlx5e_macsec_tx *tx_fs; + struct mlx5e_macsec_rx *rx_fs; +}; + +static void macsec_fs_tx_destroy(struct mlx5e_macsec_fs *macsec_fs) +{ + struct mlx5e_macsec_tx *tx_fs = macsec_fs->tx_fs; + struct mlx5e_macsec_tables *tx_tables; + + tx_tables = &tx_fs->tables; + + /* Tx check table */ + if (tx_fs->check_rule) { + mlx5_del_flow_rules(tx_fs->check_rule); + tx_fs->check_rule = NULL; + } + + if (tx_tables->check_miss_rule) { + mlx5_del_flow_rules(tx_tables->check_miss_rule); + tx_tables->check_miss_rule = NULL; + } + + if (tx_tables->ft_check_group) { + mlx5_destroy_flow_group(tx_tables->ft_check_group); + tx_tables->ft_check_group = NULL; + } + + if (tx_tables->ft_check) { + mlx5_destroy_flow_table(tx_tables->ft_check); + tx_tables->ft_check = NULL; + } + + /* Tx crypto table */ + if (tx_fs->crypto_mke_rule) { + mlx5_del_flow_rules(tx_fs->crypto_mke_rule); + tx_fs->crypto_mke_rule = NULL; + } + + if (tx_tables->crypto_miss_rule) { + mlx5_del_flow_rules(tx_tables->crypto_miss_rule); + tx_tables->crypto_miss_rule = NULL; + } + + mlx5e_destroy_flow_table(&tx_tables->ft_crypto); +} + +static int macsec_fs_tx_create_crypto_table_groups(struct mlx5e_flow_table *ft) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + int mclen = MLX5_ST_SZ_BYTES(fte_match_param); + int ix = 0; + u32 *in; + int err; + u8 *mc; + + ft->g = kcalloc(TX_CRYPTO_TABLE_NUM_GROUPS, sizeof(*ft->g), GFP_KERNEL); + if (!ft->g) + return -ENOMEM; + in = kvzalloc(inlen, GFP_KERNEL); + + if (!in) { + kfree(ft->g); + ft->g = NULL; + return -ENOMEM; + } + + mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria); + + /* Flow Group for MKE match */ + MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ethertype); + + MLX5_SET_CFG(in, start_flow_index, ix); + ix += TX_CRYPTO_TABLE_MKE_GROUP_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); + if (IS_ERR(ft->g[ft->num_groups])) + goto err; + ft->num_groups++; + + /* Flow Group for SA rules */ + memset(in, 0, inlen); + memset(mc, 0, mclen); + MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_MISC_PARAMETERS_2); + MLX5_SET(fte_match_param, mc, misc_parameters_2.metadata_reg_a, + MLX5_ETH_WQE_FT_META_MACSEC_MASK); + + MLX5_SET_CFG(in, start_flow_index, ix); + ix += TX_CRYPTO_TABLE_SA_GROUP_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); + if (IS_ERR(ft->g[ft->num_groups])) + goto err; + ft->num_groups++; + + /* Flow Group for l2 traps */ + memset(in, 0, inlen); + memset(mc, 0, mclen); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += CRYPTO_TABLE_DEFAULT_RULE_GROUP_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); + if (IS_ERR(ft->g[ft->num_groups])) + goto err; + ft->num_groups++; + + kvfree(in); + return 0; + +err: + err = PTR_ERR(ft->g[ft->num_groups]); + ft->g[ft->num_groups] = NULL; + kvfree(in); + + return err; +} + +static struct mlx5_flow_table + *macsec_fs_auto_group_table_create(struct mlx5_flow_namespace *ns, int flags, + int level, int max_fte) +{ + struct mlx5_flow_table_attr ft_attr = {}; + struct mlx5_flow_table *fdb = NULL; + + /* reserve entry for the match all miss group and rule */ + ft_attr.autogroup.num_reserved_entries = 1; + ft_attr.autogroup.max_num_groups = 1; + ft_attr.prio = 0; + ft_attr.flags = flags; + ft_attr.level = level; + ft_attr.max_fte = max_fte; + + fdb = mlx5_create_auto_grouped_flow_table(ns, &ft_attr); + + return fdb; +} + +static int macsec_fs_tx_create(struct mlx5e_macsec_fs *macsec_fs) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5e_macsec_tx *tx_fs = macsec_fs->tx_fs; + struct net_device *netdev = macsec_fs->netdev; + struct mlx5_flow_table_attr ft_attr = {}; + struct mlx5_flow_destination dest = {}; + struct mlx5e_macsec_tables *tx_tables; + struct mlx5_flow_act flow_act = {}; + struct mlx5e_flow_table *ft_crypto; + struct mlx5_flow_table *flow_table; + struct mlx5_flow_group *flow_group; + struct mlx5_flow_namespace *ns; + struct mlx5_flow_handle *rule; + struct mlx5_flow_spec *spec; + u32 *flow_group_in; + int err; + + ns = mlx5_get_flow_namespace(macsec_fs->mdev, MLX5_FLOW_NAMESPACE_EGRESS_MACSEC); + if (!ns) + return -ENOMEM; + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) + return -ENOMEM; + + flow_group_in = kvzalloc(inlen, GFP_KERNEL); + if (!flow_group_in) { + err = -ENOMEM; + goto out_spec; + } + + tx_tables = &tx_fs->tables; + ft_crypto = &tx_tables->ft_crypto; + + /* Tx crypto table */ + ft_attr.flags = MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT; + ft_attr.level = TX_CRYPTO_TABLE_LEVEL; + ft_attr.max_fte = CRYPTO_NUM_MAXSEC_FTE; + + flow_table = mlx5_create_flow_table(ns, &ft_attr); + if (IS_ERR(flow_table)) { + err = PTR_ERR(flow_table); + netdev_err(netdev, "Failed to create MACsec Tx crypto table err(%d)\n", err); + goto out_flow_group; + } + ft_crypto->t = flow_table; + + /* Tx crypto table groups */ + err = macsec_fs_tx_create_crypto_table_groups(ft_crypto); + if (err) { + netdev_err(netdev, + "Failed to create default flow group for MACsec Tx crypto table err(%d)\n", + err); + goto err; + } + + /* Tx crypto table MKE rule - MKE packets shouldn't be offloaded */ + memset(&flow_act, 0, sizeof(flow_act)); + memset(spec, 0, sizeof(*spec)); + spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ethertype); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.ethertype, ETH_P_PAE); + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_ALLOW; + + rule = mlx5_add_flow_rules(ft_crypto->t, spec, &flow_act, NULL, 0); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + netdev_err(netdev, "Failed to add MACsec TX MKE rule, err=%d\n", err); + goto err; + } + tx_fs->crypto_mke_rule = rule; + + /* Tx crypto table Default miss rule */ + memset(&flow_act, 0, sizeof(flow_act)); + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_ALLOW; + rule = mlx5_add_flow_rules(ft_crypto->t, NULL, &flow_act, NULL, 0); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + netdev_err(netdev, "Failed to add MACsec Tx table default miss rule %d\n", err); + goto err; + } + tx_tables->crypto_miss_rule = rule; + + /* Tx check table */ + flow_table = macsec_fs_auto_group_table_create(ns, 0, TX_CHECK_TABLE_LEVEL, + TX_CHECK_TABLE_NUM_FTE); + if (IS_ERR(flow_table)) { + err = PTR_ERR(flow_table); + netdev_err(netdev, "fail to create MACsec TX check table, err(%d)\n", err); + goto err; + } + tx_tables->ft_check = flow_table; + + /* Tx check table Default miss group/rule */ + memset(flow_group_in, 0, inlen); + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, flow_table->max_fte - 1); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, flow_table->max_fte - 1); + flow_group = mlx5_create_flow_group(tx_tables->ft_check, flow_group_in); + if (IS_ERR(flow_group)) { + err = PTR_ERR(flow_group); + netdev_err(netdev, + "Failed to create default flow group for MACsec Tx crypto table err(%d)\n", + err); + goto err; + } + tx_tables->ft_check_group = flow_group; + + /* Tx check table default drop rule */ + memset(&dest, 0, sizeof(struct mlx5_flow_destination)); + memset(&flow_act, 0, sizeof(flow_act)); + dest.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; + dest.counter_id = mlx5_fc_id(tx_tables->check_miss_rule_counter); + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP | MLX5_FLOW_CONTEXT_ACTION_COUNT; + rule = mlx5_add_flow_rules(tx_tables->ft_check, NULL, &flow_act, &dest, 1); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + netdev_err(netdev, "Failed to added MACsec tx check drop rule, err(%d)\n", err); + goto err; + } + tx_tables->check_miss_rule = rule; + + /* Tx check table rule */ + memset(spec, 0, sizeof(struct mlx5_flow_spec)); + memset(&dest, 0, sizeof(struct mlx5_flow_destination)); + memset(&flow_act, 0, sizeof(flow_act)); + + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, misc_parameters_2.metadata_reg_c_4); + MLX5_SET(fte_match_param, spec->match_value, misc_parameters_2.metadata_reg_c_4, 0); + spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2; + + flow_act.flags = FLOW_ACT_NO_APPEND; + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_ALLOW | MLX5_FLOW_CONTEXT_ACTION_COUNT; + dest.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; + dest.counter_id = mlx5_fc_id(tx_tables->check_rule_counter); + rule = mlx5_add_flow_rules(tx_tables->ft_check, spec, &flow_act, &dest, 1); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + netdev_err(netdev, "Failed to add MACsec check rule, err=%d\n", err); + goto err; + } + tx_fs->check_rule = rule; + + goto out_flow_group; + +err: + macsec_fs_tx_destroy(macsec_fs); +out_flow_group: + kvfree(flow_group_in); +out_spec: + kvfree(spec); + return err; +} + +static int macsec_fs_tx_ft_get(struct mlx5e_macsec_fs *macsec_fs) +{ + struct mlx5e_macsec_tx *tx_fs = macsec_fs->tx_fs; + struct mlx5e_macsec_tables *tx_tables; + int err = 0; + + tx_tables = &tx_fs->tables; + if (tx_tables->refcnt) + goto out; + + err = macsec_fs_tx_create(macsec_fs); + if (err) + return err; + +out: + tx_tables->refcnt++; + return err; +} + +static void macsec_fs_tx_ft_put(struct mlx5e_macsec_fs *macsec_fs) +{ + struct mlx5e_macsec_tables *tx_tables = &macsec_fs->tx_fs->tables; + + if (--tx_tables->refcnt) + return; + + macsec_fs_tx_destroy(macsec_fs); +} + +static int macsec_fs_tx_setup_fte(struct mlx5e_macsec_fs *macsec_fs, + struct mlx5_flow_spec *spec, + struct mlx5_flow_act *flow_act, + u32 macsec_obj_id, + u32 *fs_id) +{ + struct mlx5e_macsec_tx *tx_fs = macsec_fs->tx_fs; + int err = 0; + u32 id; + + err = ida_alloc_range(&tx_fs->tx_halloc, 1, + MLX5_MACSEC_NUM_OF_SUPPORTED_INTERFACES, + GFP_KERNEL); + if (err < 0) + return err; + + id = err; + spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_2; + + /* Metadata match */ + MLX5_SET(fte_match_param, spec->match_criteria, misc_parameters_2.metadata_reg_a, + MLX5_ETH_WQE_FT_META_MACSEC_MASK); + MLX5_SET(fte_match_param, spec->match_value, misc_parameters_2.metadata_reg_a, + MLX5_ETH_WQE_FT_META_MACSEC | id << 2); + + *fs_id = id; + flow_act->crypto.type = MLX5_FLOW_CONTEXT_ENCRYPT_DECRYPT_TYPE_MACSEC; + flow_act->crypto.obj_id = macsec_obj_id; + + mlx5_core_dbg(macsec_fs->mdev, "Tx fte: macsec obj_id %u, fs_id %u\n", macsec_obj_id, id); + return 0; +} + +static void macsec_fs_tx_create_sectag_header(const struct macsec_context *ctx, + char *reformatbf, + size_t *reformat_size) +{ + const struct macsec_secy *secy = ctx->secy; + bool sci_present = macsec_send_sci(secy); + struct mlx5_sectag_header sectag = {}; + const struct macsec_tx_sc *tx_sc; + + tx_sc = &secy->tx_sc; + sectag.ethertype = htons(ETH_P_MACSEC); + + if (sci_present) { + sectag.tci_an |= MACSEC_TCI_SC; + memcpy(§ag.sci, &secy->sci, + sizeof(sectag.sci)); + } else { + if (tx_sc->end_station) + sectag.tci_an |= MACSEC_TCI_ES; + if (tx_sc->scb) + sectag.tci_an |= MACSEC_TCI_SCB; + } + + /* With GCM, C/E clear for !encrypt, both set for encrypt */ + if (tx_sc->encrypt) + sectag.tci_an |= MACSEC_TCI_CONFID; + else if (secy->icv_len != MACSEC_DEFAULT_ICV_LEN) + sectag.tci_an |= MACSEC_TCI_C; + + sectag.tci_an |= tx_sc->encoding_sa; + + *reformat_size = MLX5_MACSEC_TAG_LEN + (sci_present ? MACSEC_SCI_LEN : 0); + + memcpy(reformatbf, §ag, *reformat_size); +} + +static void macsec_fs_tx_del_rule(struct mlx5e_macsec_fs *macsec_fs, + struct mlx5e_macsec_tx_rule *tx_rule) +{ + if (tx_rule->rule) { + mlx5_del_flow_rules(tx_rule->rule); + tx_rule->rule = NULL; + } + + if (tx_rule->pkt_reformat) { + mlx5_packet_reformat_dealloc(macsec_fs->mdev, tx_rule->pkt_reformat); + tx_rule->pkt_reformat = NULL; + } + + if (tx_rule->fs_id) { + ida_free(&macsec_fs->tx_fs->tx_halloc, tx_rule->fs_id); + tx_rule->fs_id = 0; + } + + kfree(tx_rule); + + macsec_fs_tx_ft_put(macsec_fs); +} + +static union mlx5e_macsec_rule * +macsec_fs_tx_add_rule(struct mlx5e_macsec_fs *macsec_fs, + const struct macsec_context *macsec_ctx, + struct mlx5_macsec_rule_attrs *attrs, + u32 *sa_fs_id) +{ + char reformatbf[MLX5_MACSEC_TAG_LEN + MACSEC_SCI_LEN]; + struct mlx5_pkt_reformat_params reformat_params = {}; + struct mlx5e_macsec_tx *tx_fs = macsec_fs->tx_fs; + struct net_device *netdev = macsec_fs->netdev; + union mlx5e_macsec_rule *macsec_rule = NULL; + struct mlx5_flow_destination dest = {}; + struct mlx5e_macsec_tables *tx_tables; + struct mlx5e_macsec_tx_rule *tx_rule; + struct mlx5_flow_act flow_act = {}; + struct mlx5_flow_handle *rule; + struct mlx5_flow_spec *spec; + size_t reformat_size; + int err = 0; + u32 fs_id; + + tx_tables = &tx_fs->tables; + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) + return NULL; + + err = macsec_fs_tx_ft_get(macsec_fs); + if (err) + goto out_spec; + + macsec_rule = kzalloc(sizeof(*macsec_rule), GFP_KERNEL); + if (!macsec_rule) { + macsec_fs_tx_ft_put(macsec_fs); + goto out_spec; + } + + tx_rule = &macsec_rule->tx_rule; + + /* Tx crypto table crypto rule */ + macsec_fs_tx_create_sectag_header(macsec_ctx, reformatbf, &reformat_size); + + reformat_params.type = MLX5_REFORMAT_TYPE_ADD_MACSEC; + reformat_params.size = reformat_size; + reformat_params.data = reformatbf; + flow_act.pkt_reformat = mlx5_packet_reformat_alloc(macsec_fs->mdev, + &reformat_params, + MLX5_FLOW_NAMESPACE_EGRESS_MACSEC); + if (IS_ERR(flow_act.pkt_reformat)) { + err = PTR_ERR(flow_act.pkt_reformat); + netdev_err(netdev, "Failed to allocate MACsec Tx reformat context err=%d\n", err); + goto err; + } + tx_rule->pkt_reformat = flow_act.pkt_reformat; + + err = macsec_fs_tx_setup_fte(macsec_fs, spec, &flow_act, attrs->macsec_obj_id, &fs_id); + if (err) { + netdev_err(netdev, + "Failed to add packet reformat for MACsec TX crypto rule, err=%d\n", + err); + goto err; + } + + tx_rule->fs_id = fs_id; + *sa_fs_id = fs_id; + + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | + MLX5_FLOW_CONTEXT_ACTION_CRYPTO_ENCRYPT | + MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT; + dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dest.ft = tx_tables->ft_check; + rule = mlx5_add_flow_rules(tx_tables->ft_crypto.t, spec, &flow_act, &dest, 1); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + netdev_err(netdev, "Failed to add MACsec TX crypto rule, err=%d\n", err); + goto err; + } + tx_rule->rule = rule; + + goto out_spec; + +err: + macsec_fs_tx_del_rule(macsec_fs, tx_rule); + macsec_rule = NULL; +out_spec: + kvfree(spec); + + return macsec_rule; +} + +static void macsec_fs_tx_cleanup(struct mlx5e_macsec_fs *macsec_fs) +{ + struct mlx5e_macsec_tx *tx_fs = macsec_fs->tx_fs; + struct mlx5_core_dev *mdev = macsec_fs->mdev; + struct mlx5e_macsec_tables *tx_tables; + + if (!tx_fs) + return; + + tx_tables = &tx_fs->tables; + if (tx_tables->refcnt) { + netdev_err(macsec_fs->netdev, + "Can't destroy MACsec offload tx_fs, refcnt(%u) isn't 0\n", + tx_tables->refcnt); + return; + } + + ida_destroy(&tx_fs->tx_halloc); + + if (tx_tables->check_miss_rule_counter) { + mlx5_fc_destroy(mdev, tx_tables->check_miss_rule_counter); + tx_tables->check_miss_rule_counter = NULL; + } + + if (tx_tables->check_rule_counter) { + mlx5_fc_destroy(mdev, tx_tables->check_rule_counter); + tx_tables->check_rule_counter = NULL; + } + + kfree(tx_fs); + macsec_fs->tx_fs = NULL; +} + +static int macsec_fs_tx_init(struct mlx5e_macsec_fs *macsec_fs) +{ + struct net_device *netdev = macsec_fs->netdev; + struct mlx5_core_dev *mdev = macsec_fs->mdev; + struct mlx5e_macsec_tables *tx_tables; + struct mlx5e_macsec_tx *tx_fs; + struct mlx5_fc *flow_counter; + int err; + + tx_fs = kzalloc(sizeof(*tx_fs), GFP_KERNEL); + if (!tx_fs) + return -ENOMEM; + + tx_tables = &tx_fs->tables; + + flow_counter = mlx5_fc_create(mdev, false); + if (IS_ERR(flow_counter)) { + err = PTR_ERR(flow_counter); + netdev_err(netdev, + "Failed to create MACsec Tx encrypt flow counter, err(%d)\n", + err); + goto err_encrypt_counter; + } + tx_tables->check_rule_counter = flow_counter; + + flow_counter = mlx5_fc_create(mdev, false); + if (IS_ERR(flow_counter)) { + err = PTR_ERR(flow_counter); + netdev_err(netdev, + "Failed to create MACsec Tx drop flow counter, err(%d)\n", + err); + goto err_drop_counter; + } + tx_tables->check_miss_rule_counter = flow_counter; + + ida_init(&tx_fs->tx_halloc); + + macsec_fs->tx_fs = tx_fs; + + return 0; + +err_drop_counter: + mlx5_fc_destroy(mdev, tx_tables->check_rule_counter); + tx_tables->check_rule_counter = NULL; + +err_encrypt_counter: + kfree(tx_fs); + macsec_fs->tx_fs = NULL; + + return err; +} + +static void macsec_fs_rx_destroy(struct mlx5e_macsec_fs *macsec_fs) +{ + struct mlx5e_macsec_rx *rx_fs = macsec_fs->rx_fs; + struct mlx5e_macsec_tables *rx_tables; + int i; + + /* Rx check table */ + for (i = 1; i >= 0; --i) { + if (rx_fs->check_rule[i]) { + mlx5_del_flow_rules(rx_fs->check_rule[i]); + rx_fs->check_rule[i] = NULL; + } + + if (rx_fs->check_rule_pkt_reformat[i]) { + mlx5_packet_reformat_dealloc(macsec_fs->mdev, + rx_fs->check_rule_pkt_reformat[i]); + rx_fs->check_rule_pkt_reformat[i] = NULL; + } + } + + rx_tables = &rx_fs->tables; + + if (rx_tables->check_miss_rule) { + mlx5_del_flow_rules(rx_tables->check_miss_rule); + rx_tables->check_miss_rule = NULL; + } + + if (rx_tables->ft_check_group) { + mlx5_destroy_flow_group(rx_tables->ft_check_group); + rx_tables->ft_check_group = NULL; + } + + if (rx_tables->ft_check) { + mlx5_destroy_flow_table(rx_tables->ft_check); + rx_tables->ft_check = NULL; + } + + /* Rx crypto table */ + if (rx_tables->crypto_miss_rule) { + mlx5_del_flow_rules(rx_tables->crypto_miss_rule); + rx_tables->crypto_miss_rule = NULL; + } + + mlx5e_destroy_flow_table(&rx_tables->ft_crypto); +} + +static int macsec_fs_rx_create_crypto_table_groups(struct mlx5e_flow_table *ft) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + int mclen = MLX5_ST_SZ_BYTES(fte_match_param); + int ix = 0; + u32 *in; + int err; + u8 *mc; + + ft->g = kcalloc(RX_CRYPTO_TABLE_NUM_GROUPS, sizeof(*ft->g), GFP_KERNEL); + if (!ft->g) + return -ENOMEM; + + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) { + kfree(ft->g); + return -ENOMEM; + } + + mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria); + + /* Flow group for SA rule with SCI */ + MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS | + MLX5_MATCH_MISC_PARAMETERS_5); + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ethertype); + + MLX5_SET(fte_match_param, mc, misc_parameters_5.macsec_tag_0, + MLX5_MACSEC_SECTAG_TCI_AN_FIELD_BITMASK << + MLX5_MACSEC_SECTAG_TCI_AN_FIELD_OFFSET); + MLX5_SET_TO_ONES(fte_match_param, mc, misc_parameters_5.macsec_tag_2); + MLX5_SET_TO_ONES(fte_match_param, mc, misc_parameters_5.macsec_tag_3); + + MLX5_SET_CFG(in, start_flow_index, ix); + ix += RX_CRYPTO_TABLE_SA_RULE_WITH_SCI_GROUP_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); + if (IS_ERR(ft->g[ft->num_groups])) + goto err; + ft->num_groups++; + + /* Flow group for SA rule without SCI */ + memset(in, 0, inlen); + memset(mc, 0, mclen); + MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS | + MLX5_MATCH_MISC_PARAMETERS_5); + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.smac_47_16); + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.smac_15_0); + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ethertype); + + MLX5_SET(fte_match_param, mc, misc_parameters_5.macsec_tag_0, + MLX5_MACSEC_SECTAG_TCI_AN_FIELD_BITMASK << MLX5_MACSEC_SECTAG_TCI_AN_FIELD_OFFSET); + + MLX5_SET_CFG(in, start_flow_index, ix); + ix += RX_CRYPTO_TABLE_SA_RULE_WITHOUT_SCI_GROUP_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); + if (IS_ERR(ft->g[ft->num_groups])) + goto err; + ft->num_groups++; + + /* Flow Group for l2 traps */ + memset(in, 0, inlen); + memset(mc, 0, mclen); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += CRYPTO_TABLE_DEFAULT_RULE_GROUP_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); + if (IS_ERR(ft->g[ft->num_groups])) + goto err; + ft->num_groups++; + + kvfree(in); + return 0; + +err: + err = PTR_ERR(ft->g[ft->num_groups]); + ft->g[ft->num_groups] = NULL; + kvfree(in); + + return err; +} + +static int macsec_fs_rx_create_check_decap_rule(struct mlx5e_macsec_fs *macsec_fs, + struct mlx5_flow_destination *dest, + struct mlx5_flow_act *flow_act, + struct mlx5_flow_spec *spec, + int reformat_param_size) +{ + int rule_index = (reformat_param_size == MLX5_SECTAG_HEADER_SIZE_WITH_SCI) ? 0 : 1; + u8 mlx5_reformat_buf[MLX5_SECTAG_HEADER_SIZE_WITH_SCI]; + struct mlx5_pkt_reformat_params reformat_params = {}; + struct mlx5e_macsec_rx *rx_fs = macsec_fs->rx_fs; + struct net_device *netdev = macsec_fs->netdev; + struct mlx5e_macsec_tables *rx_tables; + struct mlx5_flow_handle *rule; + int err = 0; + + rx_tables = &rx_fs->tables; + + /* Rx check table decap 16B rule */ + memset(dest, 0, sizeof(*dest)); + memset(flow_act, 0, sizeof(*flow_act)); + memset(spec, 0, sizeof(*spec)); + + reformat_params.type = MLX5_REFORMAT_TYPE_DEL_MACSEC; + reformat_params.size = reformat_param_size; + reformat_params.data = mlx5_reformat_buf; + flow_act->pkt_reformat = mlx5_packet_reformat_alloc(macsec_fs->mdev, + &reformat_params, + MLX5_FLOW_NAMESPACE_KERNEL_RX_MACSEC); + if (IS_ERR(flow_act->pkt_reformat)) { + err = PTR_ERR(flow_act->pkt_reformat); + netdev_err(netdev, "Failed to allocate MACsec Rx reformat context err=%d\n", err); + return err; + } + rx_fs->check_rule_pkt_reformat[rule_index] = flow_act->pkt_reformat; + + spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2; + /* MACsec syndrome match */ + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, misc_parameters_2.macsec_syndrome); + MLX5_SET(fte_match_param, spec->match_value, misc_parameters_2.macsec_syndrome, 0); + /* ASO return reg syndrome match */ + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, misc_parameters_2.metadata_reg_c_4); + MLX5_SET(fte_match_param, spec->match_value, misc_parameters_2.metadata_reg_c_4, 0); + + spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_5; + /* Sectag TCI SC present bit*/ + MLX5_SET(fte_match_param, spec->match_criteria, misc_parameters_5.macsec_tag_0, + MLX5_MACSEC_SECTAG_TCI_SC_FIELD_BIT << MLX5_MACSEC_SECTAG_TCI_AN_FIELD_OFFSET); + + if (reformat_param_size == MLX5_SECTAG_HEADER_SIZE_WITH_SCI) + MLX5_SET(fte_match_param, spec->match_value, misc_parameters_5.macsec_tag_0, + MLX5_MACSEC_SECTAG_TCI_SC_FIELD_BIT << + MLX5_MACSEC_SECTAG_TCI_AN_FIELD_OFFSET); + + flow_act->flags = FLOW_ACT_NO_APPEND; + flow_act->action = MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO | + MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT | + MLX5_FLOW_CONTEXT_ACTION_COUNT; + dest->type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; + dest->counter_id = mlx5_fc_id(rx_tables->check_rule_counter); + rule = mlx5_add_flow_rules(rx_tables->ft_check, spec, flow_act, dest, 1); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + netdev_err(netdev, "Failed to add MACsec Rx check rule, err=%d\n", err); + return err; + } + + rx_fs->check_rule[rule_index] = rule; + + return 0; +} + +static int macsec_fs_rx_create(struct mlx5e_macsec_fs *macsec_fs) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5e_macsec_rx *rx_fs = macsec_fs->rx_fs; + struct net_device *netdev = macsec_fs->netdev; + struct mlx5_flow_table_attr ft_attr = {}; + struct mlx5_flow_destination dest = {}; + struct mlx5e_macsec_tables *rx_tables; + struct mlx5e_flow_table *ft_crypto; + struct mlx5_flow_table *flow_table; + struct mlx5_flow_group *flow_group; + struct mlx5_flow_act flow_act = {}; + struct mlx5_flow_namespace *ns; + struct mlx5_flow_handle *rule; + struct mlx5_flow_spec *spec; + u32 *flow_group_in; + int err; + + ns = mlx5_get_flow_namespace(macsec_fs->mdev, MLX5_FLOW_NAMESPACE_KERNEL_RX_MACSEC); + if (!ns) + return -ENOMEM; + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) + return -ENOMEM; + + flow_group_in = kvzalloc(inlen, GFP_KERNEL); + if (!flow_group_in) { + err = -ENOMEM; + goto free_spec; + } + + rx_tables = &rx_fs->tables; + ft_crypto = &rx_tables->ft_crypto; + + /* Rx crypto table */ + ft_attr.level = RX_CRYPTO_TABLE_LEVEL; + ft_attr.max_fte = CRYPTO_NUM_MAXSEC_FTE; + + flow_table = mlx5_create_flow_table(ns, &ft_attr); + if (IS_ERR(flow_table)) { + err = PTR_ERR(flow_table); + netdev_err(netdev, "Failed to create MACsec Rx crypto table err(%d)\n", err); + goto out_flow_group; + } + ft_crypto->t = flow_table; + + /* Rx crypto table groups */ + err = macsec_fs_rx_create_crypto_table_groups(ft_crypto); + if (err) { + netdev_err(netdev, + "Failed to create default flow group for MACsec Tx crypto table err(%d)\n", + err); + goto err; + } + + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO; + rule = mlx5_add_flow_rules(ft_crypto->t, NULL, &flow_act, NULL, 0); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + netdev_err(netdev, + "Failed to add MACsec Rx crypto table default miss rule %d\n", + err); + goto err; + } + rx_tables->crypto_miss_rule = rule; + + /* Rx check table */ + flow_table = macsec_fs_auto_group_table_create(ns, + MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT, + RX_CHECK_TABLE_LEVEL, + RX_CHECK_TABLE_NUM_FTE); + if (IS_ERR(flow_table)) { + err = PTR_ERR(flow_table); + netdev_err(netdev, "fail to create MACsec RX check table, err(%d)\n", err); + goto err; + } + rx_tables->ft_check = flow_table; + + /* Rx check table Default miss group/rule */ + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, flow_table->max_fte - 1); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, flow_table->max_fte - 1); + flow_group = mlx5_create_flow_group(rx_tables->ft_check, flow_group_in); + if (IS_ERR(flow_group)) { + err = PTR_ERR(flow_group); + netdev_err(netdev, + "Failed to create default flow group for MACsec Rx check table err(%d)\n", + err); + goto err; + } + rx_tables->ft_check_group = flow_group; + + /* Rx check table default drop rule */ + memset(&flow_act, 0, sizeof(flow_act)); + + dest.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; + dest.counter_id = mlx5_fc_id(rx_tables->check_miss_rule_counter); + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP | MLX5_FLOW_CONTEXT_ACTION_COUNT; + rule = mlx5_add_flow_rules(rx_tables->ft_check, NULL, &flow_act, &dest, 1); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + netdev_err(netdev, "Failed to added MACsec Rx check drop rule, err(%d)\n", err); + goto err; + } + rx_tables->check_miss_rule = rule; + + /* Rx check table decap rules */ + err = macsec_fs_rx_create_check_decap_rule(macsec_fs, &dest, &flow_act, spec, + MLX5_SECTAG_HEADER_SIZE_WITH_SCI); + if (err) + goto err; + + err = macsec_fs_rx_create_check_decap_rule(macsec_fs, &dest, &flow_act, spec, + MLX5_SECTAG_HEADER_SIZE_WITHOUT_SCI); + if (err) + goto err; + + goto out_flow_group; + +err: + macsec_fs_rx_destroy(macsec_fs); +out_flow_group: + kvfree(flow_group_in); +free_spec: + kvfree(spec); + return err; +} + +static int macsec_fs_rx_ft_get(struct mlx5e_macsec_fs *macsec_fs) +{ + struct mlx5e_macsec_tables *rx_tables = &macsec_fs->rx_fs->tables; + int err = 0; + + if (rx_tables->refcnt) + goto out; + + err = macsec_fs_rx_create(macsec_fs); + if (err) + return err; + +out: + rx_tables->refcnt++; + return err; +} + +static void macsec_fs_rx_ft_put(struct mlx5e_macsec_fs *macsec_fs) +{ + struct mlx5e_macsec_tables *rx_tables = &macsec_fs->rx_fs->tables; + + if (--rx_tables->refcnt) + return; + + macsec_fs_rx_destroy(macsec_fs); +} + +static void macsec_fs_rx_del_rule(struct mlx5e_macsec_fs *macsec_fs, + struct mlx5e_macsec_rx_rule *rx_rule) +{ + int i; + + for (i = 0; i < RX_NUM_OF_RULES_PER_SA; ++i) { + if (rx_rule->rule[i]) { + mlx5_del_flow_rules(rx_rule->rule[i]); + rx_rule->rule[i] = NULL; + } + } + + if (rx_rule->meta_modhdr) { + mlx5_modify_header_dealloc(macsec_fs->mdev, rx_rule->meta_modhdr); + rx_rule->meta_modhdr = NULL; + } + + kfree(rx_rule); + + macsec_fs_rx_ft_put(macsec_fs); +} + +static void macsec_fs_rx_setup_fte(struct mlx5_flow_spec *spec, + struct mlx5_flow_act *flow_act, + struct mlx5_macsec_rule_attrs *attrs, + bool sci_present) +{ + u8 tci_an = (sci_present << MLX5_MACSEC_SECTAG_TCI_SC_FIELD_OFFSET) | attrs->assoc_num; + struct mlx5_flow_act_crypto_params *crypto_params = &flow_act->crypto; + __be32 *sci_p = (__be32 *)(&attrs->sci); + + spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + + /* MACsec ethertype */ + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ethertype); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.ethertype, ETH_P_MACSEC); + + spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_5; + + /* Sectag AN + TCI SC present bit*/ + MLX5_SET(fte_match_param, spec->match_criteria, misc_parameters_5.macsec_tag_0, + MLX5_MACSEC_SECTAG_TCI_AN_FIELD_BITMASK << MLX5_MACSEC_SECTAG_TCI_AN_FIELD_OFFSET); + MLX5_SET(fte_match_param, spec->match_value, misc_parameters_5.macsec_tag_0, + tci_an << MLX5_MACSEC_SECTAG_TCI_AN_FIELD_OFFSET); + + if (sci_present) { + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, + misc_parameters_5.macsec_tag_2); + MLX5_SET(fte_match_param, spec->match_value, misc_parameters_5.macsec_tag_2, + be32_to_cpu(sci_p[0])); + + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, + misc_parameters_5.macsec_tag_3); + MLX5_SET(fte_match_param, spec->match_value, misc_parameters_5.macsec_tag_3, + be32_to_cpu(sci_p[1])); + } else { + /* When SCI isn't present in the Sectag, need to match the source */ + /* MAC address only if the SCI contains the default MACsec PORT */ + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.smac_47_16); + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.smac_15_0); + memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers.smac_47_16), + sci_p, ETH_ALEN); + } + + crypto_params->type = MLX5_FLOW_CONTEXT_ENCRYPT_DECRYPT_TYPE_MACSEC; + crypto_params->obj_id = attrs->macsec_obj_id; +} + +static union mlx5e_macsec_rule * +macsec_fs_rx_add_rule(struct mlx5e_macsec_fs *macsec_fs, + const struct macsec_context *macsec_ctx, + struct mlx5_macsec_rule_attrs *attrs, + u32 fs_id) +{ + u8 action[MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto)] = {}; + struct mlx5e_macsec_rx *rx_fs = macsec_fs->rx_fs; + struct net_device *netdev = macsec_fs->netdev; + union mlx5e_macsec_rule *macsec_rule = NULL; + struct mlx5_modify_hdr *modify_hdr = NULL; + struct mlx5_flow_destination dest = {}; + struct mlx5e_macsec_tables *rx_tables; + struct mlx5e_macsec_rx_rule *rx_rule; + struct mlx5_flow_act flow_act = {}; + struct mlx5e_flow_table *ft_crypto; + struct mlx5_flow_handle *rule; + struct mlx5_flow_spec *spec; + int err = 0; + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) + return NULL; + + err = macsec_fs_rx_ft_get(macsec_fs); + if (err) + goto out_spec; + + macsec_rule = kzalloc(sizeof(*macsec_rule), GFP_KERNEL); + if (!macsec_rule) { + macsec_fs_rx_ft_put(macsec_fs); + goto out_spec; + } + + rx_rule = &macsec_rule->rx_rule; + rx_tables = &rx_fs->tables; + ft_crypto = &rx_tables->ft_crypto; + + /* Set bit[31 - 30] macsec marker - 0x01 */ + /* Set bit[15-0] fs id */ + MLX5_SET(set_action_in, action, action_type, MLX5_ACTION_TYPE_SET); + MLX5_SET(set_action_in, action, field, MLX5_ACTION_IN_FIELD_METADATA_REG_B); + MLX5_SET(set_action_in, action, data, MLX5_MACSEC_RX_METADAT_HANDLE(fs_id) | BIT(30)); + MLX5_SET(set_action_in, action, offset, 0); + MLX5_SET(set_action_in, action, length, 32); + + modify_hdr = mlx5_modify_header_alloc(macsec_fs->mdev, MLX5_FLOW_NAMESPACE_KERNEL_RX_MACSEC, + 1, action); + if (IS_ERR(modify_hdr)) { + err = PTR_ERR(modify_hdr); + netdev_err(netdev, "fail to alloc MACsec set modify_header_id err=%d\n", err); + modify_hdr = NULL; + goto err; + } + rx_rule->meta_modhdr = modify_hdr; + + /* Rx crypto table with SCI rule */ + macsec_fs_rx_setup_fte(spec, &flow_act, attrs, true); + + flow_act.modify_hdr = modify_hdr; + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | + MLX5_FLOW_CONTEXT_ACTION_CRYPTO_DECRYPT | + MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; + + dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dest.ft = rx_tables->ft_check; + rule = mlx5_add_flow_rules(ft_crypto->t, spec, &flow_act, &dest, 1); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + netdev_err(netdev, + "Failed to add SA with SCI rule to Rx crypto rule, err=%d\n", + err); + goto err; + } + rx_rule->rule[0] = rule; + + /* Rx crypto table without SCI rule */ + if ((cpu_to_be64((__force u64)attrs->sci) & 0xFFFF) == ntohs(MACSEC_PORT_ES)) { + memset(spec, 0, sizeof(struct mlx5_flow_spec)); + memset(&dest, 0, sizeof(struct mlx5_flow_destination)); + memset(&flow_act, 0, sizeof(flow_act)); + + macsec_fs_rx_setup_fte(spec, &flow_act, attrs, false); + + flow_act.modify_hdr = modify_hdr; + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | + MLX5_FLOW_CONTEXT_ACTION_CRYPTO_DECRYPT | + MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; + + dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dest.ft = rx_tables->ft_check; + rule = mlx5_add_flow_rules(ft_crypto->t, spec, &flow_act, &dest, 1); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + netdev_err(netdev, + "Failed to add SA without SCI rule to Rx crypto rule, err=%d\n", + err); + goto err; + } + rx_rule->rule[1] = rule; + } + + kvfree(spec); + return macsec_rule; + +err: + macsec_fs_rx_del_rule(macsec_fs, rx_rule); + macsec_rule = NULL; +out_spec: + kvfree(spec); + return macsec_rule; +} + +static int macsec_fs_rx_init(struct mlx5e_macsec_fs *macsec_fs) +{ + struct net_device *netdev = macsec_fs->netdev; + struct mlx5_core_dev *mdev = macsec_fs->mdev; + struct mlx5e_macsec_tables *rx_tables; + struct mlx5e_macsec_rx *rx_fs; + struct mlx5_fc *flow_counter; + int err; + + rx_fs = kzalloc(sizeof(*rx_fs), GFP_KERNEL); + if (!rx_fs) + return -ENOMEM; + + flow_counter = mlx5_fc_create(mdev, false); + if (IS_ERR(flow_counter)) { + err = PTR_ERR(flow_counter); + netdev_err(netdev, + "Failed to create MACsec Rx encrypt flow counter, err(%d)\n", + err); + goto err_encrypt_counter; + } + + rx_tables = &rx_fs->tables; + rx_tables->check_rule_counter = flow_counter; + + flow_counter = mlx5_fc_create(mdev, false); + if (IS_ERR(flow_counter)) { + err = PTR_ERR(flow_counter); + netdev_err(netdev, + "Failed to create MACsec Rx drop flow counter, err(%d)\n", + err); + goto err_drop_counter; + } + rx_tables->check_miss_rule_counter = flow_counter; + + macsec_fs->rx_fs = rx_fs; + + return 0; + +err_drop_counter: + mlx5_fc_destroy(mdev, rx_tables->check_rule_counter); + rx_tables->check_rule_counter = NULL; + +err_encrypt_counter: + kfree(rx_fs); + macsec_fs->rx_fs = NULL; + + return err; +} + +static void macsec_fs_rx_cleanup(struct mlx5e_macsec_fs *macsec_fs) +{ + struct mlx5e_macsec_rx *rx_fs = macsec_fs->rx_fs; + struct mlx5_core_dev *mdev = macsec_fs->mdev; + struct mlx5e_macsec_tables *rx_tables; + + if (!rx_fs) + return; + + rx_tables = &rx_fs->tables; + + if (rx_tables->refcnt) { + netdev_err(macsec_fs->netdev, + "Can't destroy MACsec offload rx_fs, refcnt(%u) isn't 0\n", + rx_tables->refcnt); + return; + } + + if (rx_tables->check_miss_rule_counter) { + mlx5_fc_destroy(mdev, rx_tables->check_miss_rule_counter); + rx_tables->check_miss_rule_counter = NULL; + } + + if (rx_tables->check_rule_counter) { + mlx5_fc_destroy(mdev, rx_tables->check_rule_counter); + rx_tables->check_rule_counter = NULL; + } + + kfree(rx_fs); + macsec_fs->rx_fs = NULL; +} + +void mlx5e_macsec_fs_get_stats_fill(struct mlx5e_macsec_fs *macsec_fs, void *macsec_stats) +{ + struct mlx5e_macsec_stats *stats = (struct mlx5e_macsec_stats *)macsec_stats; + struct mlx5e_macsec_tables *tx_tables = &macsec_fs->tx_fs->tables; + struct mlx5e_macsec_tables *rx_tables = &macsec_fs->rx_fs->tables; + struct mlx5_core_dev *mdev = macsec_fs->mdev; + + if (tx_tables->check_rule_counter) + mlx5_fc_query(mdev, tx_tables->check_rule_counter, + &stats->macsec_tx_pkts, &stats->macsec_tx_bytes); + + if (tx_tables->check_miss_rule_counter) + mlx5_fc_query(mdev, tx_tables->check_miss_rule_counter, + &stats->macsec_tx_pkts_drop, &stats->macsec_tx_bytes_drop); + + if (rx_tables->check_rule_counter) + mlx5_fc_query(mdev, rx_tables->check_rule_counter, + &stats->macsec_rx_pkts, &stats->macsec_rx_bytes); + + if (rx_tables->check_miss_rule_counter) + mlx5_fc_query(mdev, rx_tables->check_miss_rule_counter, + &stats->macsec_rx_pkts_drop, &stats->macsec_rx_bytes_drop); +} + +union mlx5e_macsec_rule * +mlx5e_macsec_fs_add_rule(struct mlx5e_macsec_fs *macsec_fs, + const struct macsec_context *macsec_ctx, + struct mlx5_macsec_rule_attrs *attrs, + u32 *sa_fs_id) +{ + return (attrs->action == MLX5_ACCEL_MACSEC_ACTION_ENCRYPT) ? + macsec_fs_tx_add_rule(macsec_fs, macsec_ctx, attrs, sa_fs_id) : + macsec_fs_rx_add_rule(macsec_fs, macsec_ctx, attrs, *sa_fs_id); +} + +void mlx5e_macsec_fs_del_rule(struct mlx5e_macsec_fs *macsec_fs, + union mlx5e_macsec_rule *macsec_rule, + int action) +{ + (action == MLX5_ACCEL_MACSEC_ACTION_ENCRYPT) ? + macsec_fs_tx_del_rule(macsec_fs, &macsec_rule->tx_rule) : + macsec_fs_rx_del_rule(macsec_fs, &macsec_rule->rx_rule); +} + +void mlx5e_macsec_fs_cleanup(struct mlx5e_macsec_fs *macsec_fs) +{ + macsec_fs_rx_cleanup(macsec_fs); + macsec_fs_tx_cleanup(macsec_fs); + kfree(macsec_fs); +} + +struct mlx5e_macsec_fs * +mlx5e_macsec_fs_init(struct mlx5_core_dev *mdev, + struct net_device *netdev) +{ + struct mlx5e_macsec_fs *macsec_fs; + int err; + + macsec_fs = kzalloc(sizeof(*macsec_fs), GFP_KERNEL); + if (!macsec_fs) + return NULL; + + macsec_fs->mdev = mdev; + macsec_fs->netdev = netdev; + + err = macsec_fs_tx_init(macsec_fs); + if (err) { + netdev_err(netdev, "MACsec offload: Failed to init tx_fs, err=%d\n", err); + goto err; + } + + err = macsec_fs_rx_init(macsec_fs); + if (err) { + netdev_err(netdev, "MACsec offload: Failed to init tx_fs, err=%d\n", err); + goto tx_cleanup; + } + + return macsec_fs; + +tx_cleanup: + macsec_fs_tx_cleanup(macsec_fs); +err: + kfree(macsec_fs); + return NULL; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec_fs.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec_fs.h new file mode 100644 index 000000000..b429648d4 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec_fs.h @@ -0,0 +1,47 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#ifndef __MLX5_MACSEC_STEERING_H__ +#define __MLX5_MACSEC_STEERING_H__ + +#ifdef CONFIG_MLX5_EN_MACSEC + +#include "en_accel/macsec.h" + +#define MLX5_MACSEC_NUM_OF_SUPPORTED_INTERFACES 16 + +struct mlx5e_macsec_fs; +union mlx5e_macsec_rule; + +struct mlx5_macsec_rule_attrs { + sci_t sci; + u32 macsec_obj_id; + u8 assoc_num; + int action; +}; + +enum mlx5_macsec_action { + MLX5_ACCEL_MACSEC_ACTION_ENCRYPT, + MLX5_ACCEL_MACSEC_ACTION_DECRYPT, +}; + +void mlx5e_macsec_fs_cleanup(struct mlx5e_macsec_fs *macsec_fs); + +struct mlx5e_macsec_fs * +mlx5e_macsec_fs_init(struct mlx5_core_dev *mdev, struct net_device *netdev); + +union mlx5e_macsec_rule * +mlx5e_macsec_fs_add_rule(struct mlx5e_macsec_fs *macsec_fs, + const struct macsec_context *ctx, + struct mlx5_macsec_rule_attrs *attrs, + u32 *sa_fs_id); + +void mlx5e_macsec_fs_del_rule(struct mlx5e_macsec_fs *macsec_fs, + union mlx5e_macsec_rule *macsec_rule, + int action); + +void mlx5e_macsec_fs_get_stats_fill(struct mlx5e_macsec_fs *macsec_fs, void *macsec_stats); + +#endif + +#endif /* __MLX5_MACSEC_STEERING_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec_stats.c new file mode 100644 index 000000000..e50a2e3f3 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec_stats.c @@ -0,0 +1,72 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#include <linux/ethtool.h> +#include <net/sock.h> + +#include "en.h" +#include "en_accel/macsec.h" + +static const struct counter_desc mlx5e_macsec_hw_stats_desc[] = { + { MLX5E_DECLARE_STAT(struct mlx5e_macsec_stats, macsec_rx_pkts) }, + { MLX5E_DECLARE_STAT(struct mlx5e_macsec_stats, macsec_rx_bytes) }, + { MLX5E_DECLARE_STAT(struct mlx5e_macsec_stats, macsec_rx_pkts_drop) }, + { MLX5E_DECLARE_STAT(struct mlx5e_macsec_stats, macsec_rx_bytes_drop) }, + { MLX5E_DECLARE_STAT(struct mlx5e_macsec_stats, macsec_tx_pkts) }, + { MLX5E_DECLARE_STAT(struct mlx5e_macsec_stats, macsec_tx_bytes) }, + { MLX5E_DECLARE_STAT(struct mlx5e_macsec_stats, macsec_tx_pkts_drop) }, + { MLX5E_DECLARE_STAT(struct mlx5e_macsec_stats, macsec_tx_bytes_drop) }, +}; + +#define NUM_MACSEC_HW_COUNTERS ARRAY_SIZE(mlx5e_macsec_hw_stats_desc) + +static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(macsec_hw) +{ + if (!priv->macsec) + return 0; + + if (mlx5e_is_macsec_device(priv->mdev)) + return NUM_MACSEC_HW_COUNTERS; + + return 0; +} + +static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(macsec_hw) {} + +static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(macsec_hw) +{ + unsigned int i; + + if (!priv->macsec) + return idx; + + if (!mlx5e_is_macsec_device(priv->mdev)) + return idx; + + for (i = 0; i < NUM_MACSEC_HW_COUNTERS; i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, + mlx5e_macsec_hw_stats_desc[i].format); + + return idx; +} + +static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(macsec_hw) +{ + int i; + + if (!priv->macsec) + return idx; + + if (!mlx5e_is_macsec_device(priv->mdev)) + return idx; + + mlx5e_macsec_get_stats_fill(priv->macsec, mlx5e_macsec_get_stats(priv->macsec)); + for (i = 0; i < NUM_MACSEC_HW_COUNTERS; i++) + data[idx++] = MLX5E_READ_CTR64_CPU(mlx5e_macsec_get_stats(priv->macsec), + mlx5e_macsec_hw_stats_desc, + i); + + return idx; +} + +MLX5E_DEFINE_STATS_GRP(macsec_hw, 0); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c new file mode 100644 index 000000000..58eacba6d --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c @@ -0,0 +1,765 @@ +/* + * Copyright (c) 2016, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/hash.h> +#include <linux/mlx5/fs.h> +#include <linux/ip.h> +#include <linux/ipv6.h> +#include "en.h" + +#define ARFS_HASH_SHIFT BITS_PER_BYTE +#define ARFS_HASH_SIZE BIT(BITS_PER_BYTE) + +struct arfs_table { + struct mlx5e_flow_table ft; + struct mlx5_flow_handle *default_rule; + struct hlist_head rules_hash[ARFS_HASH_SIZE]; +}; + +enum arfs_type { + ARFS_IPV4_TCP, + ARFS_IPV6_TCP, + ARFS_IPV4_UDP, + ARFS_IPV6_UDP, + ARFS_NUM_TYPES, +}; + +struct mlx5e_arfs_tables { + struct arfs_table arfs_tables[ARFS_NUM_TYPES]; + /* Protect aRFS rules list */ + spinlock_t arfs_lock; + struct list_head rules; + int last_filter_id; + struct workqueue_struct *wq; +}; + +struct arfs_tuple { + __be16 etype; + u8 ip_proto; + union { + __be32 src_ipv4; + struct in6_addr src_ipv6; + }; + union { + __be32 dst_ipv4; + struct in6_addr dst_ipv6; + }; + __be16 src_port; + __be16 dst_port; +}; + +struct arfs_rule { + struct mlx5e_priv *priv; + struct work_struct arfs_work; + struct mlx5_flow_handle *rule; + struct hlist_node hlist; + int rxq; + /* Flow ID passed to ndo_rx_flow_steer */ + int flow_id; + /* Filter ID returned by ndo_rx_flow_steer */ + int filter_id; + struct arfs_tuple tuple; +}; + +#define mlx5e_for_each_arfs_rule(hn, tmp, arfs_tables, i, j) \ + for (i = 0; i < ARFS_NUM_TYPES; i++) \ + mlx5e_for_each_hash_arfs_rule(hn, tmp, arfs_tables[i].rules_hash, j) + +#define mlx5e_for_each_hash_arfs_rule(hn, tmp, hash, j) \ + for (j = 0; j < ARFS_HASH_SIZE; j++) \ + hlist_for_each_entry_safe(hn, tmp, &hash[j], hlist) + +static enum mlx5_traffic_types arfs_get_tt(enum arfs_type type) +{ + switch (type) { + case ARFS_IPV4_TCP: + return MLX5_TT_IPV4_TCP; + case ARFS_IPV4_UDP: + return MLX5_TT_IPV4_UDP; + case ARFS_IPV6_TCP: + return MLX5_TT_IPV6_TCP; + case ARFS_IPV6_UDP: + return MLX5_TT_IPV6_UDP; + default: + return -EINVAL; + } +} + +static int arfs_disable(struct mlx5e_flow_steering *fs) +{ + struct mlx5_ttc_table *ttc = mlx5e_fs_get_ttc(fs, false); + int err, i; + + for (i = 0; i < ARFS_NUM_TYPES; i++) { + /* Modify ttc rules destination back to their default */ + err = mlx5_ttc_fwd_default_dest(ttc, arfs_get_tt(i)); + if (err) { + fs_err(fs, + "%s: modify ttc[%d] default destination failed, err(%d)\n", + __func__, arfs_get_tt(i), err); + return err; + } + } + return 0; +} + +static void arfs_del_rules(struct mlx5e_flow_steering *fs); + +int mlx5e_arfs_disable(struct mlx5e_flow_steering *fs) +{ + /* Moving to switchdev mode, fs->arfs is freed by mlx5e_nic_profile + * cleanup_rx callback and it is not recreated when + * mlx5e_uplink_rep_profile is loaded as mlx5e_create_flow_steering() + * is not called by the uplink_rep profile init_rx callback. Thus, if + * ntuple is set, moving to switchdev flow will enter this function + * with fs->arfs nullified. + */ + if (!mlx5e_fs_get_arfs(fs)) + return 0; + + arfs_del_rules(fs); + + return arfs_disable(fs); +} + +int mlx5e_arfs_enable(struct mlx5e_flow_steering *fs) +{ + struct mlx5_ttc_table *ttc = mlx5e_fs_get_ttc(fs, false); + struct mlx5e_arfs_tables *arfs = mlx5e_fs_get_arfs(fs); + struct mlx5_flow_destination dest = {}; + int err, i; + + dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + for (i = 0; i < ARFS_NUM_TYPES; i++) { + dest.ft = arfs->arfs_tables[i].ft.t; + /* Modify ttc rules destination to point on the aRFS FTs */ + err = mlx5_ttc_fwd_dest(ttc, arfs_get_tt(i), &dest); + if (err) { + fs_err(fs, "%s: modify ttc[%d] dest to arfs, failed err(%d)\n", + __func__, arfs_get_tt(i), err); + arfs_disable(fs); + return err; + } + } + return 0; +} + +static void arfs_destroy_table(struct arfs_table *arfs_t) +{ + mlx5_del_flow_rules(arfs_t->default_rule); + mlx5e_destroy_flow_table(&arfs_t->ft); +} + +static void _mlx5e_cleanup_tables(struct mlx5e_flow_steering *fs) +{ + struct mlx5e_arfs_tables *arfs = mlx5e_fs_get_arfs(fs); + int i; + + arfs_del_rules(fs); + destroy_workqueue(arfs->wq); + for (i = 0; i < ARFS_NUM_TYPES; i++) { + if (!IS_ERR_OR_NULL(arfs->arfs_tables[i].ft.t)) + arfs_destroy_table(&arfs->arfs_tables[i]); + } +} + +void mlx5e_arfs_destroy_tables(struct mlx5e_flow_steering *fs, bool ntuple) +{ + struct mlx5e_arfs_tables *arfs = mlx5e_fs_get_arfs(fs); + + if (!ntuple) + return; + + _mlx5e_cleanup_tables(fs); + mlx5e_fs_set_arfs(fs, NULL); + kvfree(arfs); +} + +static int arfs_add_default_rule(struct mlx5e_flow_steering *fs, + struct mlx5e_rx_res *rx_res, + enum arfs_type type) +{ + struct mlx5e_arfs_tables *arfs = mlx5e_fs_get_arfs(fs); + struct arfs_table *arfs_t = &arfs->arfs_tables[type]; + struct mlx5_flow_destination dest = {}; + MLX5_DECLARE_FLOW_ACT(flow_act); + enum mlx5_traffic_types tt; + int err = 0; + + dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR; + tt = arfs_get_tt(type); + if (tt == -EINVAL) { + fs_err(fs, "%s: bad arfs_type: %d\n", __func__, type); + return -EINVAL; + } + + /* FIXME: Must use mlx5_ttc_get_default_dest(), + * but can't since TTC default is not setup yet ! + */ + dest.tir_num = mlx5e_rx_res_get_tirn_rss(rx_res, tt); + arfs_t->default_rule = mlx5_add_flow_rules(arfs_t->ft.t, NULL, + &flow_act, + &dest, 1); + if (IS_ERR(arfs_t->default_rule)) { + err = PTR_ERR(arfs_t->default_rule); + arfs_t->default_rule = NULL; + fs_err(fs, "%s: add rule failed, arfs type=%d\n", __func__, type); + } + + return err; +} + +#define MLX5E_ARFS_NUM_GROUPS 2 +#define MLX5E_ARFS_GROUP1_SIZE (BIT(16) - 1) +#define MLX5E_ARFS_GROUP2_SIZE BIT(0) +#define MLX5E_ARFS_TABLE_SIZE (MLX5E_ARFS_GROUP1_SIZE +\ + MLX5E_ARFS_GROUP2_SIZE) +static int arfs_create_groups(struct mlx5e_flow_table *ft, + enum arfs_type type) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + void *outer_headers_c; + int ix = 0; + u32 *in; + int err; + u8 *mc; + + ft->g = kcalloc(MLX5E_ARFS_NUM_GROUPS, + sizeof(*ft->g), GFP_KERNEL); + if (!ft->g) + return -ENOMEM; + + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) { + err = -ENOMEM; + goto err_free_g; + } + + mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria); + outer_headers_c = MLX5_ADDR_OF(fte_match_param, mc, + outer_headers); + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, ethertype); + switch (type) { + case ARFS_IPV4_TCP: + case ARFS_IPV6_TCP: + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, tcp_dport); + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, tcp_sport); + break; + case ARFS_IPV4_UDP: + case ARFS_IPV6_UDP: + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, udp_dport); + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, udp_sport); + break; + default: + err = -EINVAL; + goto err_free_in; + } + + switch (type) { + case ARFS_IPV4_TCP: + case ARFS_IPV4_UDP: + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, + src_ipv4_src_ipv6.ipv4_layout.ipv4); + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, + dst_ipv4_dst_ipv6.ipv4_layout.ipv4); + break; + case ARFS_IPV6_TCP: + case ARFS_IPV6_UDP: + memset(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c, + src_ipv4_src_ipv6.ipv6_layout.ipv6), + 0xff, 16); + memset(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c, + dst_ipv4_dst_ipv6.ipv6_layout.ipv6), + 0xff, 16); + break; + default: + err = -EINVAL; + goto err_free_in; + } + + MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5E_ARFS_GROUP1_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); + if (IS_ERR(ft->g[ft->num_groups])) + goto err_clean_group; + ft->num_groups++; + + memset(in, 0, inlen); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5E_ARFS_GROUP2_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); + if (IS_ERR(ft->g[ft->num_groups])) + goto err_clean_group; + ft->num_groups++; + + kvfree(in); + return 0; + +err_clean_group: + err = PTR_ERR(ft->g[ft->num_groups]); + ft->g[ft->num_groups] = NULL; +err_free_in: + kvfree(in); +err_free_g: + kfree(ft->g); + ft->g = NULL; + return err; +} + +static int arfs_create_table(struct mlx5e_flow_steering *fs, + struct mlx5e_rx_res *rx_res, + enum arfs_type type) +{ + struct mlx5_flow_namespace *ns = mlx5e_fs_get_ns(fs, false); + struct mlx5e_arfs_tables *arfs = mlx5e_fs_get_arfs(fs); + struct mlx5e_flow_table *ft = &arfs->arfs_tables[type].ft; + struct mlx5_flow_table_attr ft_attr = {}; + int err; + + ft->num_groups = 0; + + ft_attr.max_fte = MLX5E_ARFS_TABLE_SIZE; + ft_attr.level = MLX5E_ARFS_FT_LEVEL; + ft_attr.prio = MLX5E_NIC_PRIO; + + ft->t = mlx5_create_flow_table(ns, &ft_attr); + if (IS_ERR(ft->t)) { + err = PTR_ERR(ft->t); + ft->t = NULL; + return err; + } + + err = arfs_create_groups(ft, type); + if (err) + goto err; + + err = arfs_add_default_rule(fs, rx_res, type); + if (err) + goto err; + + return 0; +err: + mlx5e_destroy_flow_table(ft); + return err; +} + +int mlx5e_arfs_create_tables(struct mlx5e_flow_steering *fs, + struct mlx5e_rx_res *rx_res, bool ntuple) +{ + struct mlx5e_arfs_tables *arfs; + int err = -ENOMEM; + int i; + + if (!ntuple) + return 0; + + arfs = kvzalloc(sizeof(*arfs), GFP_KERNEL); + if (!arfs) + return -ENOMEM; + + spin_lock_init(&arfs->arfs_lock); + INIT_LIST_HEAD(&arfs->rules); + arfs->wq = create_singlethread_workqueue("mlx5e_arfs"); + if (!arfs->wq) + goto err; + + mlx5e_fs_set_arfs(fs, arfs); + + for (i = 0; i < ARFS_NUM_TYPES; i++) { + err = arfs_create_table(fs, rx_res, i); + if (err) + goto err_des; + } + return 0; + +err_des: + _mlx5e_cleanup_tables(fs); +err: + mlx5e_fs_set_arfs(fs, NULL); + kvfree(arfs); + return err; +} + +#define MLX5E_ARFS_EXPIRY_QUOTA 60 + +static void arfs_may_expire_flow(struct mlx5e_priv *priv) +{ + struct mlx5e_arfs_tables *arfs = mlx5e_fs_get_arfs(priv->fs); + struct arfs_rule *arfs_rule; + struct hlist_node *htmp; + HLIST_HEAD(del_list); + int quota = 0; + int i; + int j; + + spin_lock_bh(&arfs->arfs_lock); + mlx5e_for_each_arfs_rule(arfs_rule, htmp, arfs->arfs_tables, i, j) { + if (!work_pending(&arfs_rule->arfs_work) && + rps_may_expire_flow(priv->netdev, + arfs_rule->rxq, arfs_rule->flow_id, + arfs_rule->filter_id)) { + hlist_del_init(&arfs_rule->hlist); + hlist_add_head(&arfs_rule->hlist, &del_list); + if (quota++ > MLX5E_ARFS_EXPIRY_QUOTA) + break; + } + } + spin_unlock_bh(&arfs->arfs_lock); + hlist_for_each_entry_safe(arfs_rule, htmp, &del_list, hlist) { + if (arfs_rule->rule) + mlx5_del_flow_rules(arfs_rule->rule); + hlist_del(&arfs_rule->hlist); + kfree(arfs_rule); + } +} + +static void arfs_del_rules(struct mlx5e_flow_steering *fs) +{ + struct mlx5e_arfs_tables *arfs = mlx5e_fs_get_arfs(fs); + struct hlist_node *htmp; + struct arfs_rule *rule; + HLIST_HEAD(del_list); + int i; + int j; + + spin_lock_bh(&arfs->arfs_lock); + mlx5e_for_each_arfs_rule(rule, htmp, arfs->arfs_tables, i, j) { + hlist_del_init(&rule->hlist); + hlist_add_head(&rule->hlist, &del_list); + } + spin_unlock_bh(&arfs->arfs_lock); + + hlist_for_each_entry_safe(rule, htmp, &del_list, hlist) { + cancel_work_sync(&rule->arfs_work); + if (rule->rule) + mlx5_del_flow_rules(rule->rule); + hlist_del(&rule->hlist); + kfree(rule); + } +} + +static struct hlist_head * +arfs_hash_bucket(struct arfs_table *arfs_t, __be16 src_port, + __be16 dst_port) +{ + unsigned long l; + int bucket_idx; + + l = (__force unsigned long)src_port | + ((__force unsigned long)dst_port << 2); + + bucket_idx = hash_long(l, ARFS_HASH_SHIFT); + + return &arfs_t->rules_hash[bucket_idx]; +} + +static struct arfs_table *arfs_get_table(struct mlx5e_arfs_tables *arfs, + u8 ip_proto, __be16 etype) +{ + if (etype == htons(ETH_P_IP) && ip_proto == IPPROTO_TCP) + return &arfs->arfs_tables[ARFS_IPV4_TCP]; + if (etype == htons(ETH_P_IP) && ip_proto == IPPROTO_UDP) + return &arfs->arfs_tables[ARFS_IPV4_UDP]; + if (etype == htons(ETH_P_IPV6) && ip_proto == IPPROTO_TCP) + return &arfs->arfs_tables[ARFS_IPV6_TCP]; + if (etype == htons(ETH_P_IPV6) && ip_proto == IPPROTO_UDP) + return &arfs->arfs_tables[ARFS_IPV6_UDP]; + + return NULL; +} + +static struct mlx5_flow_handle *arfs_add_rule(struct mlx5e_priv *priv, + struct arfs_rule *arfs_rule) +{ + struct mlx5e_arfs_tables *arfs = mlx5e_fs_get_arfs(priv->fs); + struct arfs_tuple *tuple = &arfs_rule->tuple; + struct mlx5_flow_handle *rule = NULL; + struct mlx5_flow_destination dest = {}; + MLX5_DECLARE_FLOW_ACT(flow_act); + struct arfs_table *arfs_table; + struct mlx5_flow_spec *spec; + struct mlx5_flow_table *ft; + int err = 0; + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) { + err = -ENOMEM; + goto out; + } + spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, + outer_headers.ethertype); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.ethertype, + ntohs(tuple->etype)); + arfs_table = arfs_get_table(arfs, tuple->ip_proto, tuple->etype); + if (!arfs_table) { + err = -EINVAL; + goto out; + } + + ft = arfs_table->ft.t; + if (tuple->ip_proto == IPPROTO_TCP) { + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, + outer_headers.tcp_dport); + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, + outer_headers.tcp_sport); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.tcp_dport, + ntohs(tuple->dst_port)); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.tcp_sport, + ntohs(tuple->src_port)); + } else { + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, + outer_headers.udp_dport); + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, + outer_headers.udp_sport); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.udp_dport, + ntohs(tuple->dst_port)); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.udp_sport, + ntohs(tuple->src_port)); + } + if (tuple->etype == htons(ETH_P_IP)) { + memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_value, + outer_headers.src_ipv4_src_ipv6.ipv4_layout.ipv4), + &tuple->src_ipv4, + 4); + memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_value, + outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4), + &tuple->dst_ipv4, + 4); + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, + outer_headers.src_ipv4_src_ipv6.ipv4_layout.ipv4); + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, + outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4); + } else { + memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_value, + outer_headers.src_ipv4_src_ipv6.ipv6_layout.ipv6), + &tuple->src_ipv6, + 16); + memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_value, + outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6), + &tuple->dst_ipv6, + 16); + memset(MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + outer_headers.src_ipv4_src_ipv6.ipv6_layout.ipv6), + 0xff, + 16); + memset(MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6), + 0xff, + 16); + } + dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR; + dest.tir_num = mlx5e_rx_res_get_tirn_direct(priv->rx_res, arfs_rule->rxq); + rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + priv->channel_stats[arfs_rule->rxq]->rq.arfs_err++; + mlx5e_dbg(HW, priv, + "%s: add rule(filter id=%d, rq idx=%d, ip proto=0x%x) failed,err=%d\n", + __func__, arfs_rule->filter_id, arfs_rule->rxq, + tuple->ip_proto, err); + } + +out: + kvfree(spec); + return err ? ERR_PTR(err) : rule; +} + +static void arfs_modify_rule_rq(struct mlx5e_priv *priv, + struct mlx5_flow_handle *rule, u16 rxq) +{ + struct mlx5_flow_destination dst = {}; + int err = 0; + + dst.type = MLX5_FLOW_DESTINATION_TYPE_TIR; + dst.tir_num = mlx5e_rx_res_get_tirn_direct(priv->rx_res, rxq); + err = mlx5_modify_rule_destination(rule, &dst, NULL); + if (err) + netdev_warn(priv->netdev, + "Failed to modify aRFS rule destination to rq=%d\n", rxq); +} + +static void arfs_handle_work(struct work_struct *work) +{ + struct arfs_rule *arfs_rule = container_of(work, + struct arfs_rule, + arfs_work); + struct mlx5e_priv *priv = arfs_rule->priv; + struct mlx5e_arfs_tables *arfs; + struct mlx5_flow_handle *rule; + + arfs = mlx5e_fs_get_arfs(priv->fs); + mutex_lock(&priv->state_lock); + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) { + spin_lock_bh(&arfs->arfs_lock); + hlist_del(&arfs_rule->hlist); + spin_unlock_bh(&arfs->arfs_lock); + + mutex_unlock(&priv->state_lock); + kfree(arfs_rule); + goto out; + } + mutex_unlock(&priv->state_lock); + + if (!arfs_rule->rule) { + rule = arfs_add_rule(priv, arfs_rule); + if (IS_ERR(rule)) + goto out; + arfs_rule->rule = rule; + } else { + arfs_modify_rule_rq(priv, arfs_rule->rule, + arfs_rule->rxq); + } +out: + arfs_may_expire_flow(priv); +} + +static struct arfs_rule *arfs_alloc_rule(struct mlx5e_priv *priv, + struct arfs_table *arfs_t, + const struct flow_keys *fk, + u16 rxq, u32 flow_id) +{ + struct mlx5e_arfs_tables *arfs = mlx5e_fs_get_arfs(priv->fs); + struct arfs_rule *rule; + struct arfs_tuple *tuple; + + rule = kzalloc(sizeof(*rule), GFP_ATOMIC); + if (!rule) + return NULL; + + rule->priv = priv; + rule->rxq = rxq; + INIT_WORK(&rule->arfs_work, arfs_handle_work); + + tuple = &rule->tuple; + tuple->etype = fk->basic.n_proto; + tuple->ip_proto = fk->basic.ip_proto; + if (tuple->etype == htons(ETH_P_IP)) { + tuple->src_ipv4 = fk->addrs.v4addrs.src; + tuple->dst_ipv4 = fk->addrs.v4addrs.dst; + } else { + memcpy(&tuple->src_ipv6, &fk->addrs.v6addrs.src, + sizeof(struct in6_addr)); + memcpy(&tuple->dst_ipv6, &fk->addrs.v6addrs.dst, + sizeof(struct in6_addr)); + } + tuple->src_port = fk->ports.src; + tuple->dst_port = fk->ports.dst; + + rule->flow_id = flow_id; + rule->filter_id = arfs->last_filter_id++ % RPS_NO_FILTER; + + hlist_add_head(&rule->hlist, + arfs_hash_bucket(arfs_t, tuple->src_port, + tuple->dst_port)); + return rule; +} + +static bool arfs_cmp(const struct arfs_tuple *tuple, const struct flow_keys *fk) +{ + if (tuple->src_port != fk->ports.src || tuple->dst_port != fk->ports.dst) + return false; + if (tuple->etype != fk->basic.n_proto) + return false; + if (tuple->etype == htons(ETH_P_IP)) + return tuple->src_ipv4 == fk->addrs.v4addrs.src && + tuple->dst_ipv4 == fk->addrs.v4addrs.dst; + if (tuple->etype == htons(ETH_P_IPV6)) + return !memcmp(&tuple->src_ipv6, &fk->addrs.v6addrs.src, + sizeof(struct in6_addr)) && + !memcmp(&tuple->dst_ipv6, &fk->addrs.v6addrs.dst, + sizeof(struct in6_addr)); + return false; +} + +static struct arfs_rule *arfs_find_rule(struct arfs_table *arfs_t, + const struct flow_keys *fk) +{ + struct arfs_rule *arfs_rule; + struct hlist_head *head; + + head = arfs_hash_bucket(arfs_t, fk->ports.src, fk->ports.dst); + hlist_for_each_entry(arfs_rule, head, hlist) { + if (arfs_cmp(&arfs_rule->tuple, fk)) + return arfs_rule; + } + + return NULL; +} + +int mlx5e_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb, + u16 rxq_index, u32 flow_id) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5e_arfs_tables *arfs; + struct arfs_rule *arfs_rule; + struct arfs_table *arfs_t; + struct flow_keys fk; + + arfs = mlx5e_fs_get_arfs(priv->fs); + if (!skb_flow_dissect_flow_keys(skb, &fk, 0)) + return -EPROTONOSUPPORT; + + if (fk.basic.n_proto != htons(ETH_P_IP) && + fk.basic.n_proto != htons(ETH_P_IPV6)) + return -EPROTONOSUPPORT; + + if (skb->encapsulation) + return -EPROTONOSUPPORT; + + arfs_t = arfs_get_table(arfs, fk.basic.ip_proto, fk.basic.n_proto); + if (!arfs_t) + return -EPROTONOSUPPORT; + + spin_lock_bh(&arfs->arfs_lock); + arfs_rule = arfs_find_rule(arfs_t, &fk); + if (arfs_rule) { + if (arfs_rule->rxq == rxq_index) { + spin_unlock_bh(&arfs->arfs_lock); + return arfs_rule->filter_id; + } + arfs_rule->rxq = rxq_index; + } else { + arfs_rule = arfs_alloc_rule(priv, arfs_t, &fk, rxq_index, flow_id); + if (!arfs_rule) { + spin_unlock_bh(&arfs->arfs_lock); + return -ENOMEM; + } + } + queue_work(arfs->wq, &arfs_rule->arfs_work); + spin_unlock_bh(&arfs->arfs_lock); + return arfs_rule->filter_id; +} + diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c new file mode 100644 index 000000000..03a99918a --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c @@ -0,0 +1,170 @@ +/* + * Copyright (c) 2016, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "en.h" + +/* mlx5e global resources should be placed in this file. + * Global resources are common to all the netdevices created on the same nic. + */ + +void mlx5e_mkey_set_relaxed_ordering(struct mlx5_core_dev *mdev, void *mkc) +{ + bool ro_pci_enable = pcie_relaxed_ordering_enabled(mdev->pdev); + bool ro_write = MLX5_CAP_GEN(mdev, relaxed_ordering_write); + bool ro_read = MLX5_CAP_GEN(mdev, relaxed_ordering_read); + + MLX5_SET(mkc, mkc, relaxed_ordering_read, ro_pci_enable && ro_read); + MLX5_SET(mkc, mkc, relaxed_ordering_write, ro_pci_enable && ro_write); +} + +int mlx5e_create_mkey(struct mlx5_core_dev *mdev, u32 pdn, u32 *mkey) +{ + int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); + void *mkc; + u32 *in; + int err; + + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); + MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_PA); + MLX5_SET(mkc, mkc, lw, 1); + MLX5_SET(mkc, mkc, lr, 1); + mlx5e_mkey_set_relaxed_ordering(mdev, mkc); + MLX5_SET(mkc, mkc, pd, pdn); + MLX5_SET(mkc, mkc, length64, 1); + MLX5_SET(mkc, mkc, qpn, 0xffffff); + + err = mlx5_core_create_mkey(mdev, mkey, in, inlen); + + kvfree(in); + return err; +} + +int mlx5e_create_mdev_resources(struct mlx5_core_dev *mdev) +{ + struct mlx5e_hw_objs *res = &mdev->mlx5e_res.hw_objs; + int err; + + err = mlx5_core_alloc_pd(mdev, &res->pdn); + if (err) { + mlx5_core_err(mdev, "alloc pd failed, %d\n", err); + return err; + } + + err = mlx5_core_alloc_transport_domain(mdev, &res->td.tdn); + if (err) { + mlx5_core_err(mdev, "alloc td failed, %d\n", err); + goto err_dealloc_pd; + } + + err = mlx5e_create_mkey(mdev, res->pdn, &res->mkey); + if (err) { + mlx5_core_err(mdev, "create mkey failed, %d\n", err); + goto err_dealloc_transport_domain; + } + + err = mlx5_alloc_bfreg(mdev, &res->bfreg, false, false); + if (err) { + mlx5_core_err(mdev, "alloc bfreg failed, %d\n", err); + goto err_destroy_mkey; + } + + INIT_LIST_HEAD(&res->td.tirs_list); + mutex_init(&res->td.list_lock); + + return 0; + +err_destroy_mkey: + mlx5_core_destroy_mkey(mdev, res->mkey); +err_dealloc_transport_domain: + mlx5_core_dealloc_transport_domain(mdev, res->td.tdn); +err_dealloc_pd: + mlx5_core_dealloc_pd(mdev, res->pdn); + return err; +} + +void mlx5e_destroy_mdev_resources(struct mlx5_core_dev *mdev) +{ + struct mlx5e_hw_objs *res = &mdev->mlx5e_res.hw_objs; + + mlx5_free_bfreg(mdev, &res->bfreg); + mlx5_core_destroy_mkey(mdev, res->mkey); + mlx5_core_dealloc_transport_domain(mdev, res->td.tdn); + mlx5_core_dealloc_pd(mdev, res->pdn); + memset(res, 0, sizeof(*res)); +} + +int mlx5e_refresh_tirs(struct mlx5e_priv *priv, bool enable_uc_lb, + bool enable_mc_lb) +{ + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5e_tir *tir; + u8 lb_flags = 0; + int err = 0; + u32 tirn = 0; + int inlen; + void *in; + + inlen = MLX5_ST_SZ_BYTES(modify_tir_in); + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + if (enable_uc_lb) + lb_flags = MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST; + + if (enable_mc_lb) + lb_flags |= MLX5_TIRC_SELF_LB_BLOCK_BLOCK_MULTICAST; + + if (lb_flags) + MLX5_SET(modify_tir_in, in, ctx.self_lb_block, lb_flags); + + MLX5_SET(modify_tir_in, in, bitmask.self_lb_en, 1); + + mutex_lock(&mdev->mlx5e_res.hw_objs.td.list_lock); + list_for_each_entry(tir, &mdev->mlx5e_res.hw_objs.td.tirs_list, list) { + tirn = tir->tirn; + err = mlx5_core_modify_tir(mdev, tirn, in); + if (err) + break; + } + mutex_unlock(&mdev->mlx5e_res.hw_objs.td.list_lock); + + kvfree(in); + if (err) + netdev_err(priv->netdev, "refresh tir(0x%x) failed, %d\n", tirn, err); + + return err; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c new file mode 100644 index 000000000..89de92d06 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c @@ -0,0 +1,1256 @@ +/* + * Copyright (c) 2016, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include <linux/device.h> +#include <linux/netdevice.h> +#include "en.h" +#include "en/port.h" +#include "en/port_buffer.h" + +#define MLX5E_MAX_BW_ALLOC 100 /* Max percentage of BW allocation */ + +#define MLX5E_100MB (100000) +#define MLX5E_1GB (1000000) + +#define MLX5E_CEE_STATE_UP 1 +#define MLX5E_CEE_STATE_DOWN 0 + +/* Max supported cable length is 1000 meters */ +#define MLX5E_MAX_CABLE_LENGTH 1000 + +enum { + MLX5E_VENDOR_TC_GROUP_NUM = 7, + MLX5E_LOWEST_PRIO_GROUP = 0, +}; + +enum { + MLX5_DCB_CHG_RESET, + MLX5_DCB_NO_CHG, + MLX5_DCB_CHG_NO_RESET, +}; + +#define MLX5_DSCP_SUPPORTED(mdev) (MLX5_CAP_GEN(mdev, qcam_reg) && \ + MLX5_CAP_QCAM_REG(mdev, qpts) && \ + MLX5_CAP_QCAM_REG(mdev, qpdpm)) + +static int mlx5e_set_trust_state(struct mlx5e_priv *priv, u8 trust_state); +static int mlx5e_set_dscp2prio(struct mlx5e_priv *priv, u8 dscp, u8 prio); + +/* If dcbx mode is non-host set the dcbx mode to host. + */ +static int mlx5e_dcbnl_set_dcbx_mode(struct mlx5e_priv *priv, + enum mlx5_dcbx_oper_mode mode) +{ + struct mlx5_core_dev *mdev = priv->mdev; + u32 param[MLX5_ST_SZ_DW(dcbx_param)]; + int err; + + err = mlx5_query_port_dcbx_param(mdev, param); + if (err) + return err; + + MLX5_SET(dcbx_param, param, version_admin, mode); + if (mode != MLX5E_DCBX_PARAM_VER_OPER_HOST) + MLX5_SET(dcbx_param, param, willing_admin, 1); + + return mlx5_set_port_dcbx_param(mdev, param); +} + +static int mlx5e_dcbnl_switch_to_host_mode(struct mlx5e_priv *priv) +{ + struct mlx5e_dcbx *dcbx = &priv->dcbx; + int err; + + if (!MLX5_CAP_GEN(priv->mdev, dcbx)) + return 0; + + if (dcbx->mode == MLX5E_DCBX_PARAM_VER_OPER_HOST) + return 0; + + err = mlx5e_dcbnl_set_dcbx_mode(priv, MLX5E_DCBX_PARAM_VER_OPER_HOST); + if (err) + return err; + + dcbx->mode = MLX5E_DCBX_PARAM_VER_OPER_HOST; + return 0; +} + +static int mlx5e_dcbnl_ieee_getets(struct net_device *netdev, + struct ieee_ets *ets) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5_core_dev *mdev = priv->mdev; + u8 tc_group[IEEE_8021QAZ_MAX_TCS]; + bool is_tc_group_6_exist = false; + bool is_zero_bw_ets_tc = false; + int err = 0; + int i; + + if (!MLX5_CAP_GEN(priv->mdev, ets)) + return -EOPNOTSUPP; + + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { + err = mlx5_query_port_prio_tc(mdev, i, &ets->prio_tc[i]); + if (err) + return err; + } + + ets->ets_cap = mlx5_max_tc(priv->mdev) + 1; + for (i = 0; i < ets->ets_cap; i++) { + err = mlx5_query_port_tc_group(mdev, i, &tc_group[i]); + if (err) + return err; + + err = mlx5_query_port_tc_bw_alloc(mdev, i, &ets->tc_tx_bw[i]); + if (err) + return err; + + if (ets->tc_tx_bw[i] < MLX5E_MAX_BW_ALLOC && + tc_group[i] == (MLX5E_LOWEST_PRIO_GROUP + 1)) + is_zero_bw_ets_tc = true; + + if (tc_group[i] == (MLX5E_VENDOR_TC_GROUP_NUM - 1)) + is_tc_group_6_exist = true; + } + + /* Report 0% ets tc if exits*/ + if (is_zero_bw_ets_tc) { + for (i = 0; i < ets->ets_cap; i++) + if (tc_group[i] == MLX5E_LOWEST_PRIO_GROUP) + ets->tc_tx_bw[i] = 0; + } + + /* Update tc_tsa based on fw setting*/ + for (i = 0; i < ets->ets_cap; i++) { + if (ets->tc_tx_bw[i] < MLX5E_MAX_BW_ALLOC) + priv->dcbx.tc_tsa[i] = IEEE_8021QAZ_TSA_ETS; + else if (tc_group[i] == MLX5E_VENDOR_TC_GROUP_NUM && + !is_tc_group_6_exist) + priv->dcbx.tc_tsa[i] = IEEE_8021QAZ_TSA_VENDOR; + } + memcpy(ets->tc_tsa, priv->dcbx.tc_tsa, sizeof(ets->tc_tsa)); + + return err; +} + +static void mlx5e_build_tc_group(struct ieee_ets *ets, u8 *tc_group, int max_tc) +{ + bool any_tc_mapped_to_ets = false; + bool ets_zero_bw = false; + int strict_group; + int i; + + for (i = 0; i <= max_tc; i++) { + if (ets->tc_tsa[i] == IEEE_8021QAZ_TSA_ETS) { + any_tc_mapped_to_ets = true; + if (!ets->tc_tx_bw[i]) + ets_zero_bw = true; + } + } + + /* strict group has higher priority than ets group */ + strict_group = MLX5E_LOWEST_PRIO_GROUP; + if (any_tc_mapped_to_ets) + strict_group++; + if (ets_zero_bw) + strict_group++; + + for (i = 0; i <= max_tc; i++) { + switch (ets->tc_tsa[i]) { + case IEEE_8021QAZ_TSA_VENDOR: + tc_group[i] = MLX5E_VENDOR_TC_GROUP_NUM; + break; + case IEEE_8021QAZ_TSA_STRICT: + tc_group[i] = strict_group++; + break; + case IEEE_8021QAZ_TSA_ETS: + tc_group[i] = MLX5E_LOWEST_PRIO_GROUP; + if (ets->tc_tx_bw[i] && ets_zero_bw) + tc_group[i] = MLX5E_LOWEST_PRIO_GROUP + 1; + break; + } + } +} + +static void mlx5e_build_tc_tx_bw(struct ieee_ets *ets, u8 *tc_tx_bw, + u8 *tc_group, int max_tc) +{ + int bw_for_ets_zero_bw_tc = 0; + int last_ets_zero_bw_tc = -1; + int num_ets_zero_bw = 0; + int i; + + for (i = 0; i <= max_tc; i++) { + if (ets->tc_tsa[i] == IEEE_8021QAZ_TSA_ETS && + !ets->tc_tx_bw[i]) { + num_ets_zero_bw++; + last_ets_zero_bw_tc = i; + } + } + + if (num_ets_zero_bw) + bw_for_ets_zero_bw_tc = MLX5E_MAX_BW_ALLOC / num_ets_zero_bw; + + for (i = 0; i <= max_tc; i++) { + switch (ets->tc_tsa[i]) { + case IEEE_8021QAZ_TSA_VENDOR: + tc_tx_bw[i] = MLX5E_MAX_BW_ALLOC; + break; + case IEEE_8021QAZ_TSA_STRICT: + tc_tx_bw[i] = MLX5E_MAX_BW_ALLOC; + break; + case IEEE_8021QAZ_TSA_ETS: + tc_tx_bw[i] = ets->tc_tx_bw[i] ? + ets->tc_tx_bw[i] : + bw_for_ets_zero_bw_tc; + break; + } + } + + /* Make sure the total bw for ets zero bw group is 100% */ + if (last_ets_zero_bw_tc != -1) + tc_tx_bw[last_ets_zero_bw_tc] += + MLX5E_MAX_BW_ALLOC % num_ets_zero_bw; +} + +/* If there are ETS BW 0, + * Set ETS group # to 1 for all ETS non zero BW tcs. Their sum must be 100%. + * Set group #0 to all the ETS BW 0 tcs and + * equally splits the 100% BW between them + * Report both group #0 and #1 as ETS type. + * All the tcs in group #0 will be reported with 0% BW. + */ +static int mlx5e_dcbnl_ieee_setets_core(struct mlx5e_priv *priv, struct ieee_ets *ets) +{ + struct mlx5_core_dev *mdev = priv->mdev; + u8 tc_tx_bw[IEEE_8021QAZ_MAX_TCS]; + u8 tc_group[IEEE_8021QAZ_MAX_TCS]; + int max_tc = mlx5_max_tc(mdev); + int err, i; + + mlx5e_build_tc_group(ets, tc_group, max_tc); + mlx5e_build_tc_tx_bw(ets, tc_tx_bw, tc_group, max_tc); + + err = mlx5_set_port_prio_tc(mdev, ets->prio_tc); + if (err) + return err; + + err = mlx5_set_port_tc_group(mdev, tc_group); + if (err) + return err; + + err = mlx5_set_port_tc_bw_alloc(mdev, tc_tx_bw); + + if (err) + return err; + + memcpy(priv->dcbx.tc_tsa, ets->tc_tsa, sizeof(ets->tc_tsa)); + + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { + mlx5e_dbg(HW, priv, "%s: prio_%d <=> tc_%d\n", + __func__, i, ets->prio_tc[i]); + mlx5e_dbg(HW, priv, "%s: tc_%d <=> tx_bw_%d%%, group_%d\n", + __func__, i, tc_tx_bw[i], tc_group[i]); + } + + return err; +} + +static int mlx5e_dbcnl_validate_ets(struct net_device *netdev, + struct ieee_ets *ets, + bool zero_sum_allowed) +{ + bool have_ets_tc = false; + int bw_sum = 0; + int i; + + /* Validate Priority */ + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { + if (ets->prio_tc[i] >= MLX5E_MAX_PRIORITY) { + netdev_err(netdev, + "Failed to validate ETS: priority value greater than max(%d)\n", + MLX5E_MAX_PRIORITY); + return -EINVAL; + } + } + + /* Validate Bandwidth Sum */ + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { + if (ets->tc_tsa[i] == IEEE_8021QAZ_TSA_ETS) { + have_ets_tc = true; + bw_sum += ets->tc_tx_bw[i]; + } + } + + if (have_ets_tc && bw_sum != 100) { + if (bw_sum || (!bw_sum && !zero_sum_allowed)) + netdev_err(netdev, + "Failed to validate ETS: BW sum is illegal\n"); + return -EINVAL; + } + return 0; +} + +static int mlx5e_dcbnl_ieee_setets(struct net_device *netdev, + struct ieee_ets *ets) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + int err; + + if (!MLX5_CAP_GEN(priv->mdev, ets)) + return -EOPNOTSUPP; + + err = mlx5e_dbcnl_validate_ets(netdev, ets, false); + if (err) + return err; + + err = mlx5e_dcbnl_ieee_setets_core(priv, ets); + if (err) + return err; + + return 0; +} + +static int mlx5e_dcbnl_ieee_getpfc(struct net_device *dev, + struct ieee_pfc *pfc) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5e_pport_stats *pstats = &priv->stats.pport; + int i; + + pfc->pfc_cap = mlx5_max_tc(mdev) + 1; + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { + pfc->requests[i] = PPORT_PER_PRIO_GET(pstats, i, tx_pause); + pfc->indications[i] = PPORT_PER_PRIO_GET(pstats, i, rx_pause); + } + + if (MLX5_BUFFER_SUPPORTED(mdev)) + pfc->delay = priv->dcbx.cable_len; + + return mlx5_query_port_pfc(mdev, &pfc->pfc_en, NULL); +} + +static int mlx5e_dcbnl_ieee_setpfc(struct net_device *dev, + struct ieee_pfc *pfc) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5_core_dev *mdev = priv->mdev; + u32 old_cable_len = priv->dcbx.cable_len; + struct ieee_pfc pfc_new; + u32 changed = 0; + u8 curr_pfc_en; + int ret = 0; + + /* pfc_en */ + mlx5_query_port_pfc(mdev, &curr_pfc_en, NULL); + if (pfc->pfc_en != curr_pfc_en) { + ret = mlx5_set_port_pfc(mdev, pfc->pfc_en, pfc->pfc_en); + if (ret) + return ret; + mlx5_toggle_port_link(mdev); + changed |= MLX5E_PORT_BUFFER_PFC; + } + + if (pfc->delay && + pfc->delay < MLX5E_MAX_CABLE_LENGTH && + pfc->delay != priv->dcbx.cable_len) { + priv->dcbx.cable_len = pfc->delay; + changed |= MLX5E_PORT_BUFFER_CABLE_LEN; + } + + if (MLX5_BUFFER_SUPPORTED(mdev)) { + pfc_new.pfc_en = (changed & MLX5E_PORT_BUFFER_PFC) ? pfc->pfc_en : curr_pfc_en; + if (priv->dcbx.manual_buffer) + ret = mlx5e_port_manual_buffer_config(priv, changed, + dev->mtu, &pfc_new, + NULL, NULL); + + if (ret && (changed & MLX5E_PORT_BUFFER_CABLE_LEN)) + priv->dcbx.cable_len = old_cable_len; + } + + if (!ret) { + mlx5e_dbg(HW, priv, + "%s: PFC per priority bit mask: 0x%x\n", + __func__, pfc->pfc_en); + } + return ret; +} + +static u8 mlx5e_dcbnl_getdcbx(struct net_device *dev) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + + return priv->dcbx.cap; +} + +static u8 mlx5e_dcbnl_setdcbx(struct net_device *dev, u8 mode) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5e_dcbx *dcbx = &priv->dcbx; + + if (mode & DCB_CAP_DCBX_LLD_MANAGED) + return 1; + + if ((!mode) && MLX5_CAP_GEN(priv->mdev, dcbx)) { + if (dcbx->mode == MLX5E_DCBX_PARAM_VER_OPER_AUTO) + return 0; + + /* set dcbx to fw controlled */ + if (!mlx5e_dcbnl_set_dcbx_mode(priv, MLX5E_DCBX_PARAM_VER_OPER_AUTO)) { + dcbx->mode = MLX5E_DCBX_PARAM_VER_OPER_AUTO; + dcbx->cap &= ~DCB_CAP_DCBX_HOST; + return 0; + } + + return 1; + } + + if (!(mode & DCB_CAP_DCBX_HOST)) + return 1; + + if (mlx5e_dcbnl_switch_to_host_mode(netdev_priv(dev))) + return 1; + + dcbx->cap = mode; + + return 0; +} + +static int mlx5e_dcbnl_ieee_setapp(struct net_device *dev, struct dcb_app *app) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct dcb_app temp; + bool is_new; + int err; + + if (!MLX5_CAP_GEN(priv->mdev, vport_group_manager) || + !MLX5_DSCP_SUPPORTED(priv->mdev)) + return -EOPNOTSUPP; + + if ((app->selector != IEEE_8021QAZ_APP_SEL_DSCP) || + (app->protocol >= MLX5E_MAX_DSCP)) + return -EINVAL; + + /* Save the old entry info */ + temp.selector = IEEE_8021QAZ_APP_SEL_DSCP; + temp.protocol = app->protocol; + temp.priority = priv->dcbx_dp.dscp2prio[app->protocol]; + + /* Check if need to switch to dscp trust state */ + if (!priv->dcbx.dscp_app_cnt) { + err = mlx5e_set_trust_state(priv, MLX5_QPTS_TRUST_DSCP); + if (err) + return err; + } + + /* Skip the fw command if new and old mapping are the same */ + if (app->priority != priv->dcbx_dp.dscp2prio[app->protocol]) { + err = mlx5e_set_dscp2prio(priv, app->protocol, app->priority); + if (err) + goto fw_err; + } + + /* Delete the old entry if exists */ + is_new = false; + err = dcb_ieee_delapp(dev, &temp); + if (err) + is_new = true; + + /* Add new entry and update counter */ + err = dcb_ieee_setapp(dev, app); + if (err) + return err; + + if (is_new) + priv->dcbx.dscp_app_cnt++; + + return err; + +fw_err: + mlx5e_set_trust_state(priv, MLX5_QPTS_TRUST_PCP); + return err; +} + +static int mlx5e_dcbnl_ieee_delapp(struct net_device *dev, struct dcb_app *app) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + int err; + + if (!MLX5_CAP_GEN(priv->mdev, vport_group_manager) || + !MLX5_DSCP_SUPPORTED(priv->mdev)) + return -EOPNOTSUPP; + + if ((app->selector != IEEE_8021QAZ_APP_SEL_DSCP) || + (app->protocol >= MLX5E_MAX_DSCP)) + return -EINVAL; + + /* Skip if no dscp app entry */ + if (!priv->dcbx.dscp_app_cnt) + return -ENOENT; + + /* Check if the entry matches fw setting */ + if (app->priority != priv->dcbx_dp.dscp2prio[app->protocol]) + return -ENOENT; + + /* Delete the app entry */ + err = dcb_ieee_delapp(dev, app); + if (err) + return err; + + /* Reset the priority mapping back to zero */ + err = mlx5e_set_dscp2prio(priv, app->protocol, 0); + if (err) + goto fw_err; + + priv->dcbx.dscp_app_cnt--; + + /* Check if need to switch to pcp trust state */ + if (!priv->dcbx.dscp_app_cnt) + err = mlx5e_set_trust_state(priv, MLX5_QPTS_TRUST_PCP); + + return err; + +fw_err: + mlx5e_set_trust_state(priv, MLX5_QPTS_TRUST_PCP); + return err; +} + +static int mlx5e_dcbnl_ieee_getmaxrate(struct net_device *netdev, + struct ieee_maxrate *maxrate) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5_core_dev *mdev = priv->mdev; + u8 max_bw_value[IEEE_8021QAZ_MAX_TCS]; + u8 max_bw_unit[IEEE_8021QAZ_MAX_TCS]; + int err; + int i; + + err = mlx5_query_port_ets_rate_limit(mdev, max_bw_value, max_bw_unit); + if (err) + return err; + + memset(maxrate->tc_maxrate, 0, sizeof(maxrate->tc_maxrate)); + + for (i = 0; i <= mlx5_max_tc(mdev); i++) { + switch (max_bw_unit[i]) { + case MLX5_100_MBPS_UNIT: + maxrate->tc_maxrate[i] = max_bw_value[i] * MLX5E_100MB; + break; + case MLX5_GBPS_UNIT: + maxrate->tc_maxrate[i] = max_bw_value[i] * MLX5E_1GB; + break; + case MLX5_BW_NO_LIMIT: + break; + default: + WARN(true, "non-supported BW unit"); + break; + } + } + + return 0; +} + +static int mlx5e_dcbnl_ieee_setmaxrate(struct net_device *netdev, + struct ieee_maxrate *maxrate) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5_core_dev *mdev = priv->mdev; + u8 max_bw_value[IEEE_8021QAZ_MAX_TCS]; + u8 max_bw_unit[IEEE_8021QAZ_MAX_TCS]; + __u64 upper_limit_mbps = roundup(255 * MLX5E_100MB, MLX5E_1GB); + int i; + + memset(max_bw_value, 0, sizeof(max_bw_value)); + memset(max_bw_unit, 0, sizeof(max_bw_unit)); + + for (i = 0; i <= mlx5_max_tc(mdev); i++) { + if (!maxrate->tc_maxrate[i]) { + max_bw_unit[i] = MLX5_BW_NO_LIMIT; + continue; + } + if (maxrate->tc_maxrate[i] < upper_limit_mbps) { + max_bw_value[i] = div_u64(maxrate->tc_maxrate[i], + MLX5E_100MB); + max_bw_value[i] = max_bw_value[i] ? max_bw_value[i] : 1; + max_bw_unit[i] = MLX5_100_MBPS_UNIT; + } else { + max_bw_value[i] = div_u64(maxrate->tc_maxrate[i], + MLX5E_1GB); + max_bw_unit[i] = MLX5_GBPS_UNIT; + } + } + + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { + mlx5e_dbg(HW, priv, "%s: tc_%d <=> max_bw %d Gbps\n", + __func__, i, max_bw_value[i]); + } + + return mlx5_modify_port_ets_rate_limit(mdev, max_bw_value, max_bw_unit); +} + +static u8 mlx5e_dcbnl_setall(struct net_device *netdev) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5e_cee_config *cee_cfg = &priv->dcbx.cee_cfg; + struct mlx5_core_dev *mdev = priv->mdev; + struct ieee_ets ets; + struct ieee_pfc pfc; + int err = -EOPNOTSUPP; + int i; + + if (!MLX5_CAP_GEN(mdev, ets)) + goto out; + + memset(&ets, 0, sizeof(ets)); + memset(&pfc, 0, sizeof(pfc)); + + ets.ets_cap = IEEE_8021QAZ_MAX_TCS; + for (i = 0; i < CEE_DCBX_MAX_PGS; i++) { + ets.tc_tx_bw[i] = cee_cfg->pg_bw_pct[i]; + ets.tc_rx_bw[i] = cee_cfg->pg_bw_pct[i]; + ets.tc_tsa[i] = IEEE_8021QAZ_TSA_ETS; + ets.prio_tc[i] = cee_cfg->prio_to_pg_map[i]; + mlx5e_dbg(HW, priv, + "%s: Priority group %d: tx_bw %d, rx_bw %d, prio_tc %d\n", + __func__, i, ets.tc_tx_bw[i], ets.tc_rx_bw[i], + ets.prio_tc[i]); + } + + err = mlx5e_dbcnl_validate_ets(netdev, &ets, true); + if (err) + goto out; + + err = mlx5e_dcbnl_ieee_setets_core(priv, &ets); + if (err) { + netdev_err(netdev, + "%s, Failed to set ETS: %d\n", __func__, err); + goto out; + } + + /* Set PFC */ + pfc.pfc_cap = mlx5_max_tc(mdev) + 1; + if (!cee_cfg->pfc_enable) + pfc.pfc_en = 0; + else + for (i = 0; i < CEE_DCBX_MAX_PRIO; i++) + pfc.pfc_en |= cee_cfg->pfc_setting[i] << i; + + err = mlx5e_dcbnl_ieee_setpfc(netdev, &pfc); + if (err) { + netdev_err(netdev, + "%s, Failed to set PFC: %d\n", __func__, err); + goto out; + } +out: + return err ? MLX5_DCB_NO_CHG : MLX5_DCB_CHG_RESET; +} + +static u8 mlx5e_dcbnl_getstate(struct net_device *netdev) +{ + return MLX5E_CEE_STATE_UP; +} + +static void mlx5e_dcbnl_getpermhwaddr(struct net_device *netdev, + u8 *perm_addr) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + + if (!perm_addr) + return; + + memset(perm_addr, 0xff, MAX_ADDR_LEN); + + mlx5_query_mac_address(priv->mdev, perm_addr); +} + +static void mlx5e_dcbnl_setpgtccfgtx(struct net_device *netdev, + int priority, u8 prio_type, + u8 pgid, u8 bw_pct, u8 up_map) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5e_cee_config *cee_cfg = &priv->dcbx.cee_cfg; + + if (priority >= CEE_DCBX_MAX_PRIO) { + netdev_err(netdev, + "%s, priority is out of range\n", __func__); + return; + } + + if (pgid >= CEE_DCBX_MAX_PGS) { + netdev_err(netdev, + "%s, priority group is out of range\n", __func__); + return; + } + + cee_cfg->prio_to_pg_map[priority] = pgid; +} + +static void mlx5e_dcbnl_setpgbwgcfgtx(struct net_device *netdev, + int pgid, u8 bw_pct) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5e_cee_config *cee_cfg = &priv->dcbx.cee_cfg; + + if (pgid >= CEE_DCBX_MAX_PGS) { + netdev_err(netdev, + "%s, priority group is out of range\n", __func__); + return; + } + + cee_cfg->pg_bw_pct[pgid] = bw_pct; +} + +static void mlx5e_dcbnl_getpgtccfgtx(struct net_device *netdev, + int priority, u8 *prio_type, + u8 *pgid, u8 *bw_pct, u8 *up_map) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5_core_dev *mdev = priv->mdev; + + if (!MLX5_CAP_GEN(priv->mdev, ets)) { + netdev_err(netdev, "%s, ets is not supported\n", __func__); + return; + } + + if (priority >= CEE_DCBX_MAX_PRIO) { + netdev_err(netdev, + "%s, priority is out of range\n", __func__); + return; + } + + *prio_type = 0; + *bw_pct = 0; + *up_map = 0; + + if (mlx5_query_port_prio_tc(mdev, priority, pgid)) + *pgid = 0; +} + +static void mlx5e_dcbnl_getpgbwgcfgtx(struct net_device *netdev, + int pgid, u8 *bw_pct) +{ + struct ieee_ets ets; + + if (pgid >= CEE_DCBX_MAX_PGS) { + netdev_err(netdev, + "%s, priority group is out of range\n", __func__); + return; + } + + mlx5e_dcbnl_ieee_getets(netdev, &ets); + *bw_pct = ets.tc_tx_bw[pgid]; +} + +static void mlx5e_dcbnl_setpfccfg(struct net_device *netdev, + int priority, u8 setting) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5e_cee_config *cee_cfg = &priv->dcbx.cee_cfg; + + if (priority >= CEE_DCBX_MAX_PRIO) { + netdev_err(netdev, + "%s, priority is out of range\n", __func__); + return; + } + + if (setting > 1) + return; + + cee_cfg->pfc_setting[priority] = setting; +} + +static int +mlx5e_dcbnl_get_priority_pfc(struct net_device *netdev, + int priority, u8 *setting) +{ + struct ieee_pfc pfc; + int err; + + err = mlx5e_dcbnl_ieee_getpfc(netdev, &pfc); + + if (err) + *setting = 0; + else + *setting = (pfc.pfc_en >> priority) & 0x01; + + return err; +} + +static void mlx5e_dcbnl_getpfccfg(struct net_device *netdev, + int priority, u8 *setting) +{ + if (priority >= CEE_DCBX_MAX_PRIO) { + netdev_err(netdev, + "%s, priority is out of range\n", __func__); + return; + } + + if (!setting) + return; + + mlx5e_dcbnl_get_priority_pfc(netdev, priority, setting); +} + +static u8 mlx5e_dcbnl_getcap(struct net_device *netdev, + int capid, u8 *cap) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5_core_dev *mdev = priv->mdev; + u8 rval = 0; + + switch (capid) { + case DCB_CAP_ATTR_PG: + *cap = true; + break; + case DCB_CAP_ATTR_PFC: + *cap = true; + break; + case DCB_CAP_ATTR_UP2TC: + *cap = false; + break; + case DCB_CAP_ATTR_PG_TCS: + *cap = 1 << mlx5_max_tc(mdev); + break; + case DCB_CAP_ATTR_PFC_TCS: + *cap = 1 << mlx5_max_tc(mdev); + break; + case DCB_CAP_ATTR_GSP: + *cap = false; + break; + case DCB_CAP_ATTR_BCN: + *cap = false; + break; + case DCB_CAP_ATTR_DCBX: + *cap = priv->dcbx.cap | + DCB_CAP_DCBX_VER_CEE | + DCB_CAP_DCBX_VER_IEEE; + break; + default: + *cap = 0; + rval = 1; + break; + } + + return rval; +} + +static int mlx5e_dcbnl_getnumtcs(struct net_device *netdev, + int tcs_id, u8 *num) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5_core_dev *mdev = priv->mdev; + + switch (tcs_id) { + case DCB_NUMTCS_ATTR_PG: + case DCB_NUMTCS_ATTR_PFC: + *num = mlx5_max_tc(mdev) + 1; + break; + default: + return -EINVAL; + } + + return 0; +} + +static u8 mlx5e_dcbnl_getpfcstate(struct net_device *netdev) +{ + struct ieee_pfc pfc; + + if (mlx5e_dcbnl_ieee_getpfc(netdev, &pfc)) + return MLX5E_CEE_STATE_DOWN; + + return pfc.pfc_en ? MLX5E_CEE_STATE_UP : MLX5E_CEE_STATE_DOWN; +} + +static void mlx5e_dcbnl_setpfcstate(struct net_device *netdev, u8 state) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5e_cee_config *cee_cfg = &priv->dcbx.cee_cfg; + + if ((state != MLX5E_CEE_STATE_UP) && (state != MLX5E_CEE_STATE_DOWN)) + return; + + cee_cfg->pfc_enable = state; +} + +static int mlx5e_dcbnl_getbuffer(struct net_device *dev, + struct dcbnl_buffer *dcb_buffer) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5e_port_buffer port_buffer; + u8 buffer[MLX5E_MAX_PRIORITY]; + int i, err; + + if (!MLX5_BUFFER_SUPPORTED(mdev)) + return -EOPNOTSUPP; + + err = mlx5e_port_query_priority2buffer(mdev, buffer); + if (err) + return err; + + for (i = 0; i < MLX5E_MAX_PRIORITY; i++) + dcb_buffer->prio2buffer[i] = buffer[i]; + + err = mlx5e_port_query_buffer(priv, &port_buffer); + if (err) + return err; + + for (i = 0; i < MLX5E_MAX_BUFFER; i++) + dcb_buffer->buffer_size[i] = port_buffer.buffer[i].size; + dcb_buffer->total_size = port_buffer.port_buffer_size; + + return 0; +} + +static int mlx5e_dcbnl_setbuffer(struct net_device *dev, + struct dcbnl_buffer *dcb_buffer) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5e_port_buffer port_buffer; + u8 old_prio2buffer[MLX5E_MAX_PRIORITY]; + u32 *buffer_size = NULL; + u8 *prio2buffer = NULL; + u32 changed = 0; + int i, err; + + if (!MLX5_BUFFER_SUPPORTED(mdev)) + return -EOPNOTSUPP; + + for (i = 0; i < DCBX_MAX_BUFFERS; i++) + mlx5_core_dbg(mdev, "buffer[%d]=%d\n", i, dcb_buffer->buffer_size[i]); + + for (i = 0; i < MLX5E_MAX_PRIORITY; i++) + mlx5_core_dbg(mdev, "priority %d buffer%d\n", i, dcb_buffer->prio2buffer[i]); + + err = mlx5e_port_query_priority2buffer(mdev, old_prio2buffer); + if (err) + return err; + + for (i = 0; i < MLX5E_MAX_PRIORITY; i++) { + if (dcb_buffer->prio2buffer[i] != old_prio2buffer[i]) { + changed |= MLX5E_PORT_BUFFER_PRIO2BUFFER; + prio2buffer = dcb_buffer->prio2buffer; + break; + } + } + + err = mlx5e_port_query_buffer(priv, &port_buffer); + if (err) + return err; + + for (i = 0; i < MLX5E_MAX_BUFFER; i++) { + if (port_buffer.buffer[i].size != dcb_buffer->buffer_size[i]) { + changed |= MLX5E_PORT_BUFFER_SIZE; + buffer_size = dcb_buffer->buffer_size; + break; + } + } + + if (!changed) + return 0; + + priv->dcbx.manual_buffer = true; + err = mlx5e_port_manual_buffer_config(priv, changed, dev->mtu, NULL, + buffer_size, prio2buffer); + return err; +} + +static const struct dcbnl_rtnl_ops mlx5e_dcbnl_ops = { + .ieee_getets = mlx5e_dcbnl_ieee_getets, + .ieee_setets = mlx5e_dcbnl_ieee_setets, + .ieee_getmaxrate = mlx5e_dcbnl_ieee_getmaxrate, + .ieee_setmaxrate = mlx5e_dcbnl_ieee_setmaxrate, + .ieee_getpfc = mlx5e_dcbnl_ieee_getpfc, + .ieee_setpfc = mlx5e_dcbnl_ieee_setpfc, + .ieee_setapp = mlx5e_dcbnl_ieee_setapp, + .ieee_delapp = mlx5e_dcbnl_ieee_delapp, + .getdcbx = mlx5e_dcbnl_getdcbx, + .setdcbx = mlx5e_dcbnl_setdcbx, + .dcbnl_getbuffer = mlx5e_dcbnl_getbuffer, + .dcbnl_setbuffer = mlx5e_dcbnl_setbuffer, + +/* CEE interfaces */ + .setall = mlx5e_dcbnl_setall, + .getstate = mlx5e_dcbnl_getstate, + .getpermhwaddr = mlx5e_dcbnl_getpermhwaddr, + + .setpgtccfgtx = mlx5e_dcbnl_setpgtccfgtx, + .setpgbwgcfgtx = mlx5e_dcbnl_setpgbwgcfgtx, + .getpgtccfgtx = mlx5e_dcbnl_getpgtccfgtx, + .getpgbwgcfgtx = mlx5e_dcbnl_getpgbwgcfgtx, + + .setpfccfg = mlx5e_dcbnl_setpfccfg, + .getpfccfg = mlx5e_dcbnl_getpfccfg, + .getcap = mlx5e_dcbnl_getcap, + .getnumtcs = mlx5e_dcbnl_getnumtcs, + .getpfcstate = mlx5e_dcbnl_getpfcstate, + .setpfcstate = mlx5e_dcbnl_setpfcstate, +}; + +void mlx5e_dcbnl_build_netdev(struct net_device *netdev) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5_core_dev *mdev = priv->mdev; + + if (MLX5_CAP_GEN(mdev, vport_group_manager) && MLX5_CAP_GEN(mdev, qos)) + netdev->dcbnl_ops = &mlx5e_dcbnl_ops; +} + +static void mlx5e_dcbnl_query_dcbx_mode(struct mlx5e_priv *priv, + enum mlx5_dcbx_oper_mode *mode) +{ + u32 out[MLX5_ST_SZ_DW(dcbx_param)]; + + *mode = MLX5E_DCBX_PARAM_VER_OPER_HOST; + + if (!mlx5_query_port_dcbx_param(priv->mdev, out)) + *mode = MLX5_GET(dcbx_param, out, version_oper); + + /* From driver's point of view, we only care if the mode + * is host (HOST) or non-host (AUTO) + */ + if (*mode != MLX5E_DCBX_PARAM_VER_OPER_HOST) + *mode = MLX5E_DCBX_PARAM_VER_OPER_AUTO; +} + +static void mlx5e_ets_init(struct mlx5e_priv *priv) +{ + struct ieee_ets ets; + int err; + int i; + + if (!MLX5_CAP_GEN(priv->mdev, ets)) + return; + + memset(&ets, 0, sizeof(ets)); + ets.ets_cap = mlx5_max_tc(priv->mdev) + 1; + for (i = 0; i < ets.ets_cap; i++) { + ets.tc_tx_bw[i] = MLX5E_MAX_BW_ALLOC; + ets.tc_tsa[i] = IEEE_8021QAZ_TSA_VENDOR; + ets.prio_tc[i] = i; + } + + if (ets.ets_cap > 1) { + /* tclass[prio=0]=1, tclass[prio=1]=0, tclass[prio=i]=i (for i>1) */ + ets.prio_tc[0] = 1; + ets.prio_tc[1] = 0; + } + + err = mlx5e_dcbnl_ieee_setets_core(priv, &ets); + if (err) + netdev_err(priv->netdev, + "%s, Failed to init ETS: %d\n", __func__, err); +} + +enum { + INIT, + DELETE, +}; + +static void mlx5e_dcbnl_dscp_app(struct mlx5e_priv *priv, int action) +{ + struct dcb_app temp; + int i; + + if (!MLX5_CAP_GEN(priv->mdev, vport_group_manager)) + return; + + if (!MLX5_DSCP_SUPPORTED(priv->mdev)) + return; + + /* No SEL_DSCP entry in non DSCP state */ + if (priv->dcbx_dp.trust_state != MLX5_QPTS_TRUST_DSCP) + return; + + temp.selector = IEEE_8021QAZ_APP_SEL_DSCP; + for (i = 0; i < MLX5E_MAX_DSCP; i++) { + temp.protocol = i; + temp.priority = priv->dcbx_dp.dscp2prio[i]; + if (action == INIT) + dcb_ieee_setapp(priv->netdev, &temp); + else + dcb_ieee_delapp(priv->netdev, &temp); + } + + priv->dcbx.dscp_app_cnt = (action == INIT) ? MLX5E_MAX_DSCP : 0; +} + +void mlx5e_dcbnl_init_app(struct mlx5e_priv *priv) +{ + mlx5e_dcbnl_dscp_app(priv, INIT); +} + +void mlx5e_dcbnl_delete_app(struct mlx5e_priv *priv) +{ + mlx5e_dcbnl_dscp_app(priv, DELETE); +} + +static void mlx5e_params_calc_trust_tx_min_inline_mode(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, + u8 trust_state) +{ + mlx5_query_min_inline(mdev, ¶ms->tx_min_inline_mode); + if (trust_state == MLX5_QPTS_TRUST_DSCP && + params->tx_min_inline_mode == MLX5_INLINE_MODE_L2) + params->tx_min_inline_mode = MLX5_INLINE_MODE_IP; +} + +static int mlx5e_update_trust_state_hw(struct mlx5e_priv *priv, void *context) +{ + u8 *trust_state = context; + int err; + + err = mlx5_set_trust_state(priv->mdev, *trust_state); + if (err) + return err; + WRITE_ONCE(priv->dcbx_dp.trust_state, *trust_state); + + return 0; +} + +static int mlx5e_set_trust_state(struct mlx5e_priv *priv, u8 trust_state) +{ + struct mlx5e_params new_params; + bool reset = true; + int err; + + mutex_lock(&priv->state_lock); + + new_params = priv->channels.params; + mlx5e_params_calc_trust_tx_min_inline_mode(priv->mdev, &new_params, + trust_state); + + /* Skip if tx_min_inline is the same */ + if (new_params.tx_min_inline_mode == priv->channels.params.tx_min_inline_mode) + reset = false; + + err = mlx5e_safe_switch_params(priv, &new_params, + mlx5e_update_trust_state_hw, + &trust_state, reset); + + mutex_unlock(&priv->state_lock); + + return err; +} + +static int mlx5e_set_dscp2prio(struct mlx5e_priv *priv, u8 dscp, u8 prio) +{ + int err; + + err = mlx5_set_dscp2prio(priv->mdev, dscp, prio); + if (err) + return err; + + priv->dcbx_dp.dscp2prio[dscp] = prio; + return err; +} + +static int mlx5e_trust_initialize(struct mlx5e_priv *priv) +{ + struct mlx5_core_dev *mdev = priv->mdev; + u8 trust_state; + int err; + + if (!MLX5_DSCP_SUPPORTED(mdev)) { + WRITE_ONCE(priv->dcbx_dp.trust_state, MLX5_QPTS_TRUST_PCP); + return 0; + } + + err = mlx5_query_trust_state(priv->mdev, &trust_state); + if (err) + return err; + WRITE_ONCE(priv->dcbx_dp.trust_state, trust_state); + + if (priv->dcbx_dp.trust_state == MLX5_QPTS_TRUST_PCP && priv->dcbx.dscp_app_cnt) { + /* + * Align the driver state with the register state. + * Temporary state change is required to enable the app list reset. + */ + priv->dcbx_dp.trust_state = MLX5_QPTS_TRUST_DSCP; + mlx5e_dcbnl_delete_app(priv); + priv->dcbx_dp.trust_state = MLX5_QPTS_TRUST_PCP; + } + + mlx5e_params_calc_trust_tx_min_inline_mode(priv->mdev, &priv->channels.params, + priv->dcbx_dp.trust_state); + + err = mlx5_query_dscp2prio(priv->mdev, priv->dcbx_dp.dscp2prio); + if (err) + return err; + + return 0; +} + +#define MLX5E_BUFFER_CELL_SHIFT 7 + +static u16 mlx5e_query_port_buffers_cell_size(struct mlx5e_priv *priv) +{ + struct mlx5_core_dev *mdev = priv->mdev; + u32 out[MLX5_ST_SZ_DW(sbcam_reg)] = {}; + u32 in[MLX5_ST_SZ_DW(sbcam_reg)] = {}; + + if (!MLX5_CAP_GEN(mdev, sbcam_reg)) + return (1 << MLX5E_BUFFER_CELL_SHIFT); + + if (mlx5_core_access_reg(mdev, in, sizeof(in), out, sizeof(out), + MLX5_REG_SBCAM, 0, 0)) + return (1 << MLX5E_BUFFER_CELL_SHIFT); + + return MLX5_GET(sbcam_reg, out, cap_cell_size); +} + +void mlx5e_dcbnl_initialize(struct mlx5e_priv *priv) +{ + struct mlx5e_dcbx *dcbx = &priv->dcbx; + + mlx5e_trust_initialize(priv); + + if (!MLX5_CAP_GEN(priv->mdev, qos)) + return; + + if (MLX5_CAP_GEN(priv->mdev, dcbx)) + mlx5e_dcbnl_query_dcbx_mode(priv, &dcbx->mode); + + priv->dcbx.cap = DCB_CAP_DCBX_VER_CEE | + DCB_CAP_DCBX_VER_IEEE; + if (priv->dcbx.mode == MLX5E_DCBX_PARAM_VER_OPER_HOST) + priv->dcbx.cap |= DCB_CAP_DCBX_HOST; + + priv->dcbx.port_buff_cell_sz = mlx5e_query_port_buffers_cell_size(priv); + priv->dcbx.manual_buffer = false; + priv->dcbx.cable_len = MLX5E_DEFAULT_CABLE_LEN; + + mlx5e_ets_init(priv); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dim.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dim.c new file mode 100644 index 000000000..ca9cfbf57 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dim.c @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2016, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/dim.h> +#include "en.h" + +static void +mlx5e_complete_dim_work(struct dim *dim, struct dim_cq_moder moder, + struct mlx5_core_dev *mdev, struct mlx5_core_cq *mcq) +{ + mlx5_core_modify_cq_moderation(mdev, mcq, moder.usec, moder.pkts); + dim->state = DIM_START_MEASURE; +} + +void mlx5e_rx_dim_work(struct work_struct *work) +{ + struct dim *dim = container_of(work, struct dim, work); + struct mlx5e_rq *rq = container_of(dim, struct mlx5e_rq, dim); + struct dim_cq_moder cur_moder = + net_dim_get_rx_moderation(dim->mode, dim->profile_ix); + + mlx5e_complete_dim_work(dim, cur_moder, rq->mdev, &rq->cq.mcq); +} + +void mlx5e_tx_dim_work(struct work_struct *work) +{ + struct dim *dim = container_of(work, struct dim, work); + struct mlx5e_txqsq *sq = container_of(dim, struct mlx5e_txqsq, dim); + struct dim_cq_moder cur_moder = + net_dim_get_tx_moderation(dim->mode, dim->profile_ix); + + mlx5e_complete_dim_work(dim, cur_moder, sq->cq.mdev, &sq->cq.mcq); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c new file mode 100644 index 000000000..ceeb23f47 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -0,0 +1,2453 @@ +/* + * Copyright (c) 2015, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/ethtool_netlink.h> + +#include "en.h" +#include "en/port.h" +#include "en/params.h" +#include "en/ptp.h" +#include "lib/clock.h" +#include "en/fs_ethtool.h" + +void mlx5e_ethtool_get_drvinfo(struct mlx5e_priv *priv, + struct ethtool_drvinfo *drvinfo) +{ + struct mlx5_core_dev *mdev = priv->mdev; + int count; + + strscpy(drvinfo->driver, KBUILD_MODNAME, sizeof(drvinfo->driver)); + count = snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version), + "%d.%d.%04d (%.16s)", fw_rev_maj(mdev), + fw_rev_min(mdev), fw_rev_sub(mdev), mdev->board_id); + if (count >= sizeof(drvinfo->fw_version)) + snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version), + "%d.%d.%04d", fw_rev_maj(mdev), + fw_rev_min(mdev), fw_rev_sub(mdev)); + + strscpy(drvinfo->bus_info, dev_name(mdev->device), + sizeof(drvinfo->bus_info)); +} + +static void mlx5e_get_drvinfo(struct net_device *dev, + struct ethtool_drvinfo *drvinfo) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + + mlx5e_ethtool_get_drvinfo(priv, drvinfo); +} + +struct ptys2ethtool_config { + __ETHTOOL_DECLARE_LINK_MODE_MASK(supported); + __ETHTOOL_DECLARE_LINK_MODE_MASK(advertised); +}; + +static +struct ptys2ethtool_config ptys2legacy_ethtool_table[MLX5E_LINK_MODES_NUMBER]; +static +struct ptys2ethtool_config ptys2ext_ethtool_table[MLX5E_EXT_LINK_MODES_NUMBER]; + +#define MLX5_BUILD_PTYS2ETHTOOL_CONFIG(reg_, table, ...) \ + ({ \ + struct ptys2ethtool_config *cfg; \ + const unsigned int modes[] = { __VA_ARGS__ }; \ + unsigned int i, bit, idx; \ + cfg = &ptys2##table##_ethtool_table[reg_]; \ + bitmap_zero(cfg->supported, \ + __ETHTOOL_LINK_MODE_MASK_NBITS); \ + bitmap_zero(cfg->advertised, \ + __ETHTOOL_LINK_MODE_MASK_NBITS); \ + for (i = 0 ; i < ARRAY_SIZE(modes) ; ++i) { \ + bit = modes[i] % 64; \ + idx = modes[i] / 64; \ + __set_bit(bit, &cfg->supported[idx]); \ + __set_bit(bit, &cfg->advertised[idx]); \ + } \ + }) + +void mlx5e_build_ptys2ethtool_map(void) +{ + memset(ptys2legacy_ethtool_table, 0, sizeof(ptys2legacy_ethtool_table)); + memset(ptys2ext_ethtool_table, 0, sizeof(ptys2ext_ethtool_table)); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_1000BASE_CX_SGMII, legacy, + ETHTOOL_LINK_MODE_1000baseKX_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_1000BASE_KX, legacy, + ETHTOOL_LINK_MODE_1000baseKX_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_CX4, legacy, + ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_KX4, legacy, + ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_KR, legacy, + ETHTOOL_LINK_MODE_10000baseKR_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_20GBASE_KR2, legacy, + ETHTOOL_LINK_MODE_20000baseKR2_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_40GBASE_CR4, legacy, + ETHTOOL_LINK_MODE_40000baseCR4_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_40GBASE_KR4, legacy, + ETHTOOL_LINK_MODE_40000baseKR4_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_56GBASE_R4, legacy, + ETHTOOL_LINK_MODE_56000baseKR4_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_CR, legacy, + ETHTOOL_LINK_MODE_10000baseKR_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_SR, legacy, + ETHTOOL_LINK_MODE_10000baseKR_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_ER, legacy, + ETHTOOL_LINK_MODE_10000baseKR_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_40GBASE_SR4, legacy, + ETHTOOL_LINK_MODE_40000baseSR4_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_40GBASE_LR4, legacy, + ETHTOOL_LINK_MODE_40000baseLR4_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_50GBASE_SR2, legacy, + ETHTOOL_LINK_MODE_50000baseSR2_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_100GBASE_CR4, legacy, + ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_100GBASE_SR4, legacy, + ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_100GBASE_KR4, legacy, + ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_100GBASE_LR4, legacy, + ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_T, legacy, + ETHTOOL_LINK_MODE_10000baseT_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_25GBASE_CR, legacy, + ETHTOOL_LINK_MODE_25000baseCR_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_25GBASE_KR, legacy, + ETHTOOL_LINK_MODE_25000baseKR_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_25GBASE_SR, legacy, + ETHTOOL_LINK_MODE_25000baseSR_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_50GBASE_CR2, legacy, + ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_50GBASE_KR2, legacy, + ETHTOOL_LINK_MODE_50000baseKR2_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_SGMII_100M, ext, + ETHTOOL_LINK_MODE_100baseT_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_1000BASE_X_SGMII, ext, + ETHTOOL_LINK_MODE_1000baseT_Full_BIT, + ETHTOOL_LINK_MODE_1000baseKX_Full_BIT, + ETHTOOL_LINK_MODE_1000baseX_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_5GBASE_R, ext, + ETHTOOL_LINK_MODE_5000baseT_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_XFI_XAUI_1, ext, + ETHTOOL_LINK_MODE_10000baseT_Full_BIT, + ETHTOOL_LINK_MODE_10000baseKR_Full_BIT, + ETHTOOL_LINK_MODE_10000baseR_FEC_BIT, + ETHTOOL_LINK_MODE_10000baseCR_Full_BIT, + ETHTOOL_LINK_MODE_10000baseSR_Full_BIT, + ETHTOOL_LINK_MODE_10000baseLR_Full_BIT, + ETHTOOL_LINK_MODE_10000baseER_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_40GBASE_XLAUI_4_XLPPI_4, ext, + ETHTOOL_LINK_MODE_40000baseKR4_Full_BIT, + ETHTOOL_LINK_MODE_40000baseCR4_Full_BIT, + ETHTOOL_LINK_MODE_40000baseSR4_Full_BIT, + ETHTOOL_LINK_MODE_40000baseLR4_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_25GAUI_1_25GBASE_CR_KR, ext, + ETHTOOL_LINK_MODE_25000baseCR_Full_BIT, + ETHTOOL_LINK_MODE_25000baseKR_Full_BIT, + ETHTOOL_LINK_MODE_25000baseSR_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_50GAUI_2_LAUI_2_50GBASE_CR2_KR2, + ext, + ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT, + ETHTOOL_LINK_MODE_50000baseKR2_Full_BIT, + ETHTOOL_LINK_MODE_50000baseSR2_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_50GAUI_1_LAUI_1_50GBASE_CR_KR, ext, + ETHTOOL_LINK_MODE_50000baseKR_Full_BIT, + ETHTOOL_LINK_MODE_50000baseSR_Full_BIT, + ETHTOOL_LINK_MODE_50000baseCR_Full_BIT, + ETHTOOL_LINK_MODE_50000baseLR_ER_FR_Full_BIT, + ETHTOOL_LINK_MODE_50000baseDR_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_CAUI_4_100GBASE_CR4_KR4, ext, + ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT, + ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT, + ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT, + ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_100GAUI_2_100GBASE_CR2_KR2, ext, + ETHTOOL_LINK_MODE_100000baseKR2_Full_BIT, + ETHTOOL_LINK_MODE_100000baseSR2_Full_BIT, + ETHTOOL_LINK_MODE_100000baseCR2_Full_BIT, + ETHTOOL_LINK_MODE_100000baseLR2_ER2_FR2_Full_BIT, + ETHTOOL_LINK_MODE_100000baseDR2_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_200GAUI_4_200GBASE_CR4_KR4, ext, + ETHTOOL_LINK_MODE_200000baseKR4_Full_BIT, + ETHTOOL_LINK_MODE_200000baseSR4_Full_BIT, + ETHTOOL_LINK_MODE_200000baseLR4_ER4_FR4_Full_BIT, + ETHTOOL_LINK_MODE_200000baseDR4_Full_BIT, + ETHTOOL_LINK_MODE_200000baseCR4_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_100GAUI_1_100GBASE_CR_KR, ext, + ETHTOOL_LINK_MODE_100000baseKR_Full_BIT, + ETHTOOL_LINK_MODE_100000baseSR_Full_BIT, + ETHTOOL_LINK_MODE_100000baseLR_ER_FR_Full_BIT, + ETHTOOL_LINK_MODE_100000baseDR_Full_BIT, + ETHTOOL_LINK_MODE_100000baseCR_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_200GAUI_2_200GBASE_CR2_KR2, ext, + ETHTOOL_LINK_MODE_200000baseKR2_Full_BIT, + ETHTOOL_LINK_MODE_200000baseSR2_Full_BIT, + ETHTOOL_LINK_MODE_200000baseLR2_ER2_FR2_Full_BIT, + ETHTOOL_LINK_MODE_200000baseDR2_Full_BIT, + ETHTOOL_LINK_MODE_200000baseCR2_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_400GAUI_4_400GBASE_CR4_KR4, ext, + ETHTOOL_LINK_MODE_400000baseKR4_Full_BIT, + ETHTOOL_LINK_MODE_400000baseSR4_Full_BIT, + ETHTOOL_LINK_MODE_400000baseLR4_ER4_FR4_Full_BIT, + ETHTOOL_LINK_MODE_400000baseDR4_Full_BIT, + ETHTOOL_LINK_MODE_400000baseCR4_Full_BIT); +} + +static void mlx5e_ethtool_get_speed_arr(struct mlx5_core_dev *mdev, + struct ptys2ethtool_config **arr, + u32 *size) +{ + bool ext = mlx5e_ptys_ext_supported(mdev); + + *arr = ext ? ptys2ext_ethtool_table : ptys2legacy_ethtool_table; + *size = ext ? ARRAY_SIZE(ptys2ext_ethtool_table) : + ARRAY_SIZE(ptys2legacy_ethtool_table); +} + +typedef int (*mlx5e_pflag_handler)(struct net_device *netdev, bool enable); + +struct pflag_desc { + char name[ETH_GSTRING_LEN]; + mlx5e_pflag_handler handler; +}; + +static const struct pflag_desc mlx5e_priv_flags[MLX5E_NUM_PFLAGS]; + +int mlx5e_ethtool_get_sset_count(struct mlx5e_priv *priv, int sset) +{ + switch (sset) { + case ETH_SS_STATS: + return mlx5e_stats_total_num(priv); + case ETH_SS_PRIV_FLAGS: + return MLX5E_NUM_PFLAGS; + case ETH_SS_TEST: + return mlx5e_self_test_num(priv); + default: + return -EOPNOTSUPP; + } +} + +static int mlx5e_get_sset_count(struct net_device *dev, int sset) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + + return mlx5e_ethtool_get_sset_count(priv, sset); +} + +void mlx5e_ethtool_get_strings(struct mlx5e_priv *priv, u32 stringset, u8 *data) +{ + int i; + + switch (stringset) { + case ETH_SS_PRIV_FLAGS: + for (i = 0; i < MLX5E_NUM_PFLAGS; i++) + strcpy(data + i * ETH_GSTRING_LEN, + mlx5e_priv_flags[i].name); + break; + + case ETH_SS_TEST: + mlx5e_self_test_fill_strings(priv, data); + break; + + case ETH_SS_STATS: + mlx5e_stats_fill_strings(priv, data); + break; + } +} + +static void mlx5e_get_strings(struct net_device *dev, u32 stringset, u8 *data) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + + mlx5e_ethtool_get_strings(priv, stringset, data); +} + +void mlx5e_ethtool_get_ethtool_stats(struct mlx5e_priv *priv, + struct ethtool_stats *stats, u64 *data) +{ + int idx = 0; + + mutex_lock(&priv->state_lock); + mlx5e_stats_update(priv); + mutex_unlock(&priv->state_lock); + + mlx5e_stats_fill(priv, data, idx); +} + +static void mlx5e_get_ethtool_stats(struct net_device *dev, + struct ethtool_stats *stats, + u64 *data) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + + mlx5e_ethtool_get_ethtool_stats(priv, stats, data); +} + +void mlx5e_ethtool_get_ringparam(struct mlx5e_priv *priv, + struct ethtool_ringparam *param, + struct kernel_ethtool_ringparam *kernel_param) +{ + /* Limitation for regular RQ. XSK RQ may clamp the queue length in + * mlx5e_mpwqe_get_log_rq_size. + */ + u8 max_log_mpwrq_pkts = mlx5e_mpwrq_max_log_rq_pkts(priv->mdev, + PAGE_SHIFT, + MLX5E_MPWRQ_UMR_MODE_ALIGNED); + + param->rx_max_pending = 1 << min_t(u8, MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE, + max_log_mpwrq_pkts); + param->tx_max_pending = 1 << MLX5E_PARAMS_MAXIMUM_LOG_SQ_SIZE; + param->rx_pending = 1 << priv->channels.params.log_rq_mtu_frames; + param->tx_pending = 1 << priv->channels.params.log_sq_size; + + kernel_param->tcp_data_split = + (priv->channels.params.packet_merge.type == MLX5E_PACKET_MERGE_SHAMPO) ? + ETHTOOL_TCP_DATA_SPLIT_ENABLED : + ETHTOOL_TCP_DATA_SPLIT_DISABLED; +} + +static void mlx5e_get_ringparam(struct net_device *dev, + struct ethtool_ringparam *param, + struct kernel_ethtool_ringparam *kernel_param, + struct netlink_ext_ack *extack) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + + mlx5e_ethtool_get_ringparam(priv, param, kernel_param); +} + +int mlx5e_ethtool_set_ringparam(struct mlx5e_priv *priv, + struct ethtool_ringparam *param) +{ + struct mlx5e_params new_params; + u8 log_rq_size; + u8 log_sq_size; + int err = 0; + + if (param->rx_jumbo_pending) { + netdev_info(priv->netdev, "%s: rx_jumbo_pending not supported\n", + __func__); + return -EINVAL; + } + if (param->rx_mini_pending) { + netdev_info(priv->netdev, "%s: rx_mini_pending not supported\n", + __func__); + return -EINVAL; + } + + if (param->rx_pending < (1 << MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE)) { + netdev_info(priv->netdev, "%s: rx_pending (%d) < min (%d)\n", + __func__, param->rx_pending, + 1 << MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE); + return -EINVAL; + } + + if (param->tx_pending < (1 << MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE)) { + netdev_info(priv->netdev, "%s: tx_pending (%d) < min (%d)\n", + __func__, param->tx_pending, + 1 << MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE); + return -EINVAL; + } + + log_rq_size = order_base_2(param->rx_pending); + log_sq_size = order_base_2(param->tx_pending); + + if (log_rq_size == priv->channels.params.log_rq_mtu_frames && + log_sq_size == priv->channels.params.log_sq_size) + return 0; + + mutex_lock(&priv->state_lock); + + new_params = priv->channels.params; + new_params.log_rq_mtu_frames = log_rq_size; + new_params.log_sq_size = log_sq_size; + + err = mlx5e_validate_params(priv->mdev, &new_params); + if (err) + goto unlock; + + err = mlx5e_safe_switch_params(priv, &new_params, NULL, NULL, true); + +unlock: + mutex_unlock(&priv->state_lock); + + return err; +} + +static int mlx5e_set_ringparam(struct net_device *dev, + struct ethtool_ringparam *param, + struct kernel_ethtool_ringparam *kernel_param, + struct netlink_ext_ack *extack) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + + return mlx5e_ethtool_set_ringparam(priv, param); +} + +void mlx5e_ethtool_get_channels(struct mlx5e_priv *priv, + struct ethtool_channels *ch) +{ + mutex_lock(&priv->state_lock); + ch->max_combined = priv->max_nch; + ch->combined_count = priv->channels.params.num_channels; + mutex_unlock(&priv->state_lock); +} + +static void mlx5e_get_channels(struct net_device *dev, + struct ethtool_channels *ch) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + + mlx5e_ethtool_get_channels(priv, ch); +} + +int mlx5e_ethtool_set_channels(struct mlx5e_priv *priv, + struct ethtool_channels *ch) +{ + struct mlx5e_params *cur_params = &priv->channels.params; + unsigned int count = ch->combined_count; + struct mlx5e_params new_params; + bool arfs_enabled; + int rss_cnt; + bool opened; + int err = 0; + + if (!count) { + netdev_info(priv->netdev, "%s: combined_count=0 not supported\n", + __func__); + return -EINVAL; + } + + if (cur_params->num_channels == count) + return 0; + + mutex_lock(&priv->state_lock); + + /* Don't allow changing the number of channels if HTB offload is active, + * because the numeration of the QoS SQs will change, while per-queue + * qdiscs are attached. + */ + if (mlx5e_selq_is_htb_enabled(&priv->selq)) { + err = -EINVAL; + netdev_err(priv->netdev, "%s: HTB offload is active, cannot change the number of channels\n", + __func__); + goto out; + } + + /* Don't allow changing the number of channels if non-default RSS contexts exist, + * the kernel doesn't protect against set_channels operations that break them. + */ + rss_cnt = mlx5e_rx_res_rss_cnt(priv->rx_res) - 1; + if (rss_cnt) { + err = -EINVAL; + netdev_err(priv->netdev, "%s: Non-default RSS contexts exist (%d), cannot change the number of channels\n", + __func__, rss_cnt); + goto out; + } + + /* Don't allow changing the number of channels if MQPRIO mode channel offload is active, + * because it defines a partition over the channels queues. + */ + if (cur_params->mqprio.mode == TC_MQPRIO_MODE_CHANNEL) { + err = -EINVAL; + netdev_err(priv->netdev, "%s: MQPRIO mode channel offload is active, cannot change the number of channels\n", + __func__); + goto out; + } + + new_params = *cur_params; + new_params.num_channels = count; + + opened = test_bit(MLX5E_STATE_OPENED, &priv->state); + + arfs_enabled = opened && (priv->netdev->features & NETIF_F_NTUPLE); + if (arfs_enabled) + mlx5e_arfs_disable(priv->fs); + + /* Switch to new channels, set new parameters and close old ones */ + err = mlx5e_safe_switch_params(priv, &new_params, + mlx5e_num_channels_changed_ctx, NULL, true); + + if (arfs_enabled) { + int err2 = mlx5e_arfs_enable(priv->fs); + + if (err2) + netdev_err(priv->netdev, "%s: mlx5e_arfs_enable failed: %d\n", + __func__, err2); + } + +out: + mutex_unlock(&priv->state_lock); + + return err; +} + +static int mlx5e_set_channels(struct net_device *dev, + struct ethtool_channels *ch) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + + return mlx5e_ethtool_set_channels(priv, ch); +} + +int mlx5e_ethtool_get_coalesce(struct mlx5e_priv *priv, + struct ethtool_coalesce *coal, + struct kernel_ethtool_coalesce *kernel_coal) +{ + struct dim_cq_moder *rx_moder, *tx_moder; + + if (!MLX5_CAP_GEN(priv->mdev, cq_moderation)) + return -EOPNOTSUPP; + + rx_moder = &priv->channels.params.rx_cq_moderation; + coal->rx_coalesce_usecs = rx_moder->usec; + coal->rx_max_coalesced_frames = rx_moder->pkts; + coal->use_adaptive_rx_coalesce = priv->channels.params.rx_dim_enabled; + + tx_moder = &priv->channels.params.tx_cq_moderation; + coal->tx_coalesce_usecs = tx_moder->usec; + coal->tx_max_coalesced_frames = tx_moder->pkts; + coal->use_adaptive_tx_coalesce = priv->channels.params.tx_dim_enabled; + + kernel_coal->use_cqe_mode_rx = + MLX5E_GET_PFLAG(&priv->channels.params, MLX5E_PFLAG_RX_CQE_BASED_MODER); + kernel_coal->use_cqe_mode_tx = + MLX5E_GET_PFLAG(&priv->channels.params, MLX5E_PFLAG_TX_CQE_BASED_MODER); + + return 0; +} + +static int mlx5e_get_coalesce(struct net_device *netdev, + struct ethtool_coalesce *coal, + struct kernel_ethtool_coalesce *kernel_coal, + struct netlink_ext_ack *extack) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + + return mlx5e_ethtool_get_coalesce(priv, coal, kernel_coal); +} + +#define MLX5E_MAX_COAL_TIME MLX5_MAX_CQ_PERIOD +#define MLX5E_MAX_COAL_FRAMES MLX5_MAX_CQ_COUNT + +static void +mlx5e_set_priv_channels_tx_coalesce(struct mlx5e_priv *priv, struct ethtool_coalesce *coal) +{ + struct mlx5_core_dev *mdev = priv->mdev; + int tc; + int i; + + for (i = 0; i < priv->channels.num; ++i) { + struct mlx5e_channel *c = priv->channels.c[i]; + + for (tc = 0; tc < c->num_tc; tc++) { + mlx5_core_modify_cq_moderation(mdev, + &c->sq[tc].cq.mcq, + coal->tx_coalesce_usecs, + coal->tx_max_coalesced_frames); + } + } +} + +static void +mlx5e_set_priv_channels_rx_coalesce(struct mlx5e_priv *priv, struct ethtool_coalesce *coal) +{ + struct mlx5_core_dev *mdev = priv->mdev; + int i; + + for (i = 0; i < priv->channels.num; ++i) { + struct mlx5e_channel *c = priv->channels.c[i]; + + mlx5_core_modify_cq_moderation(mdev, &c->rq.cq.mcq, + coal->rx_coalesce_usecs, + coal->rx_max_coalesced_frames); + } +} + +/* convert a boolean value of cq_mode to mlx5 period mode + * true : MLX5_CQ_PERIOD_MODE_START_FROM_CQE + * false : MLX5_CQ_PERIOD_MODE_START_FROM_EQE + */ +static int cqe_mode_to_period_mode(bool val) +{ + return val ? MLX5_CQ_PERIOD_MODE_START_FROM_CQE : MLX5_CQ_PERIOD_MODE_START_FROM_EQE; +} + +int mlx5e_ethtool_set_coalesce(struct mlx5e_priv *priv, + struct ethtool_coalesce *coal, + struct kernel_ethtool_coalesce *kernel_coal, + struct netlink_ext_ack *extack) +{ + struct dim_cq_moder *rx_moder, *tx_moder; + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5e_params new_params; + bool reset_rx, reset_tx; + bool reset = true; + u8 cq_period_mode; + int err = 0; + + if (!MLX5_CAP_GEN(mdev, cq_moderation)) + return -EOPNOTSUPP; + + if (coal->tx_coalesce_usecs > MLX5E_MAX_COAL_TIME || + coal->rx_coalesce_usecs > MLX5E_MAX_COAL_TIME) { + netdev_info(priv->netdev, "%s: maximum coalesce time supported is %lu usecs\n", + __func__, MLX5E_MAX_COAL_TIME); + return -ERANGE; + } + + if (coal->tx_max_coalesced_frames > MLX5E_MAX_COAL_FRAMES || + coal->rx_max_coalesced_frames > MLX5E_MAX_COAL_FRAMES) { + netdev_info(priv->netdev, "%s: maximum coalesced frames supported is %lu\n", + __func__, MLX5E_MAX_COAL_FRAMES); + return -ERANGE; + } + + if ((kernel_coal->use_cqe_mode_rx || kernel_coal->use_cqe_mode_tx) && + !MLX5_CAP_GEN(priv->mdev, cq_period_start_from_cqe)) { + NL_SET_ERR_MSG_MOD(extack, "cqe_mode_rx/tx is not supported on this device"); + return -EOPNOTSUPP; + } + + mutex_lock(&priv->state_lock); + new_params = priv->channels.params; + + rx_moder = &new_params.rx_cq_moderation; + rx_moder->usec = coal->rx_coalesce_usecs; + rx_moder->pkts = coal->rx_max_coalesced_frames; + new_params.rx_dim_enabled = !!coal->use_adaptive_rx_coalesce; + + tx_moder = &new_params.tx_cq_moderation; + tx_moder->usec = coal->tx_coalesce_usecs; + tx_moder->pkts = coal->tx_max_coalesced_frames; + new_params.tx_dim_enabled = !!coal->use_adaptive_tx_coalesce; + + reset_rx = !!coal->use_adaptive_rx_coalesce != priv->channels.params.rx_dim_enabled; + reset_tx = !!coal->use_adaptive_tx_coalesce != priv->channels.params.tx_dim_enabled; + + cq_period_mode = cqe_mode_to_period_mode(kernel_coal->use_cqe_mode_rx); + if (cq_period_mode != rx_moder->cq_period_mode) { + mlx5e_set_rx_cq_mode_params(&new_params, cq_period_mode); + reset_rx = true; + } + + cq_period_mode = cqe_mode_to_period_mode(kernel_coal->use_cqe_mode_tx); + if (cq_period_mode != tx_moder->cq_period_mode) { + mlx5e_set_tx_cq_mode_params(&new_params, cq_period_mode); + reset_tx = true; + } + + if (reset_rx) { + u8 mode = MLX5E_GET_PFLAG(&new_params, + MLX5E_PFLAG_RX_CQE_BASED_MODER); + + mlx5e_reset_rx_moderation(&new_params, mode); + } + if (reset_tx) { + u8 mode = MLX5E_GET_PFLAG(&new_params, + MLX5E_PFLAG_TX_CQE_BASED_MODER); + + mlx5e_reset_tx_moderation(&new_params, mode); + } + + /* If DIM state hasn't changed, it's possible to modify interrupt + * moderation parameters on the fly, even if the channels are open. + */ + if (!reset_rx && !reset_tx && test_bit(MLX5E_STATE_OPENED, &priv->state)) { + if (!coal->use_adaptive_rx_coalesce) + mlx5e_set_priv_channels_rx_coalesce(priv, coal); + if (!coal->use_adaptive_tx_coalesce) + mlx5e_set_priv_channels_tx_coalesce(priv, coal); + reset = false; + } + + err = mlx5e_safe_switch_params(priv, &new_params, NULL, NULL, reset); + + mutex_unlock(&priv->state_lock); + return err; +} + +static int mlx5e_set_coalesce(struct net_device *netdev, + struct ethtool_coalesce *coal, + struct kernel_ethtool_coalesce *kernel_coal, + struct netlink_ext_ack *extack) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + + return mlx5e_ethtool_set_coalesce(priv, coal, kernel_coal, extack); +} + +static void ptys2ethtool_supported_link(struct mlx5_core_dev *mdev, + unsigned long *supported_modes, + u32 eth_proto_cap) +{ + unsigned long proto_cap = eth_proto_cap; + struct ptys2ethtool_config *table; + u32 max_size; + int proto; + + mlx5e_ethtool_get_speed_arr(mdev, &table, &max_size); + for_each_set_bit(proto, &proto_cap, max_size) + bitmap_or(supported_modes, supported_modes, + table[proto].supported, + __ETHTOOL_LINK_MODE_MASK_NBITS); +} + +static void ptys2ethtool_adver_link(unsigned long *advertising_modes, + u32 eth_proto_cap, bool ext) +{ + unsigned long proto_cap = eth_proto_cap; + struct ptys2ethtool_config *table; + u32 max_size; + int proto; + + table = ext ? ptys2ext_ethtool_table : ptys2legacy_ethtool_table; + max_size = ext ? ARRAY_SIZE(ptys2ext_ethtool_table) : + ARRAY_SIZE(ptys2legacy_ethtool_table); + + for_each_set_bit(proto, &proto_cap, max_size) + bitmap_or(advertising_modes, advertising_modes, + table[proto].advertised, + __ETHTOOL_LINK_MODE_MASK_NBITS); +} + +static const u32 pplm_fec_2_ethtool[] = { + [MLX5E_FEC_NOFEC] = ETHTOOL_FEC_OFF, + [MLX5E_FEC_FIRECODE] = ETHTOOL_FEC_BASER, + [MLX5E_FEC_RS_528_514] = ETHTOOL_FEC_RS, + [MLX5E_FEC_RS_544_514] = ETHTOOL_FEC_RS, + [MLX5E_FEC_LLRS_272_257_1] = ETHTOOL_FEC_LLRS, +}; + +static u32 pplm2ethtool_fec(u_long fec_mode, unsigned long size) +{ + int mode = 0; + + if (!fec_mode) + return ETHTOOL_FEC_AUTO; + + mode = find_first_bit(&fec_mode, size); + + if (mode < ARRAY_SIZE(pplm_fec_2_ethtool)) + return pplm_fec_2_ethtool[mode]; + + return 0; +} + +#define MLX5E_ADVERTISE_SUPPORTED_FEC(mlx5_fec, ethtool_fec) \ + do { \ + if (mlx5e_fec_in_caps(dev, 1 << (mlx5_fec))) \ + __set_bit(ethtool_fec, \ + link_ksettings->link_modes.supported);\ + } while (0) + +static const u32 pplm_fec_2_ethtool_linkmodes[] = { + [MLX5E_FEC_NOFEC] = ETHTOOL_LINK_MODE_FEC_NONE_BIT, + [MLX5E_FEC_FIRECODE] = ETHTOOL_LINK_MODE_FEC_BASER_BIT, + [MLX5E_FEC_RS_528_514] = ETHTOOL_LINK_MODE_FEC_RS_BIT, + [MLX5E_FEC_RS_544_514] = ETHTOOL_LINK_MODE_FEC_RS_BIT, + [MLX5E_FEC_LLRS_272_257_1] = ETHTOOL_LINK_MODE_FEC_LLRS_BIT, +}; + +static int get_fec_supported_advertised(struct mlx5_core_dev *dev, + struct ethtool_link_ksettings *link_ksettings) +{ + unsigned long active_fec_long; + u32 active_fec; + u32 bitn; + int err; + + err = mlx5e_get_fec_mode(dev, &active_fec, NULL); + if (err) + return (err == -EOPNOTSUPP) ? 0 : err; + + MLX5E_ADVERTISE_SUPPORTED_FEC(MLX5E_FEC_NOFEC, + ETHTOOL_LINK_MODE_FEC_NONE_BIT); + MLX5E_ADVERTISE_SUPPORTED_FEC(MLX5E_FEC_FIRECODE, + ETHTOOL_LINK_MODE_FEC_BASER_BIT); + MLX5E_ADVERTISE_SUPPORTED_FEC(MLX5E_FEC_RS_528_514, + ETHTOOL_LINK_MODE_FEC_RS_BIT); + MLX5E_ADVERTISE_SUPPORTED_FEC(MLX5E_FEC_LLRS_272_257_1, + ETHTOOL_LINK_MODE_FEC_LLRS_BIT); + + active_fec_long = active_fec; + /* active fec is a bit set, find out which bit is set and + * advertise the corresponding ethtool bit + */ + bitn = find_first_bit(&active_fec_long, sizeof(active_fec_long) * BITS_PER_BYTE); + if (bitn < ARRAY_SIZE(pplm_fec_2_ethtool_linkmodes)) + __set_bit(pplm_fec_2_ethtool_linkmodes[bitn], + link_ksettings->link_modes.advertising); + + return 0; +} + +static void ptys2ethtool_supported_advertised_port(struct mlx5_core_dev *mdev, + struct ethtool_link_ksettings *link_ksettings, + u32 eth_proto_cap, u8 connector_type) +{ + if (!MLX5_CAP_PCAM_FEATURE(mdev, ptys_connector_type)) { + if (eth_proto_cap & (MLX5E_PROT_MASK(MLX5E_10GBASE_CR) + | MLX5E_PROT_MASK(MLX5E_10GBASE_SR) + | MLX5E_PROT_MASK(MLX5E_40GBASE_CR4) + | MLX5E_PROT_MASK(MLX5E_40GBASE_SR4) + | MLX5E_PROT_MASK(MLX5E_100GBASE_SR4) + | MLX5E_PROT_MASK(MLX5E_1000BASE_CX_SGMII))) { + ethtool_link_ksettings_add_link_mode(link_ksettings, + supported, + FIBRE); + ethtool_link_ksettings_add_link_mode(link_ksettings, + advertising, + FIBRE); + } + + if (eth_proto_cap & (MLX5E_PROT_MASK(MLX5E_100GBASE_KR4) + | MLX5E_PROT_MASK(MLX5E_40GBASE_KR4) + | MLX5E_PROT_MASK(MLX5E_10GBASE_KR) + | MLX5E_PROT_MASK(MLX5E_10GBASE_KX4) + | MLX5E_PROT_MASK(MLX5E_1000BASE_KX))) { + ethtool_link_ksettings_add_link_mode(link_ksettings, + supported, + Backplane); + ethtool_link_ksettings_add_link_mode(link_ksettings, + advertising, + Backplane); + } + return; + } + + switch (connector_type) { + case MLX5E_PORT_TP: + ethtool_link_ksettings_add_link_mode(link_ksettings, + supported, TP); + ethtool_link_ksettings_add_link_mode(link_ksettings, + advertising, TP); + break; + case MLX5E_PORT_AUI: + ethtool_link_ksettings_add_link_mode(link_ksettings, + supported, AUI); + ethtool_link_ksettings_add_link_mode(link_ksettings, + advertising, AUI); + break; + case MLX5E_PORT_BNC: + ethtool_link_ksettings_add_link_mode(link_ksettings, + supported, BNC); + ethtool_link_ksettings_add_link_mode(link_ksettings, + advertising, BNC); + break; + case MLX5E_PORT_MII: + ethtool_link_ksettings_add_link_mode(link_ksettings, + supported, MII); + ethtool_link_ksettings_add_link_mode(link_ksettings, + advertising, MII); + break; + case MLX5E_PORT_FIBRE: + ethtool_link_ksettings_add_link_mode(link_ksettings, + supported, FIBRE); + ethtool_link_ksettings_add_link_mode(link_ksettings, + advertising, FIBRE); + break; + case MLX5E_PORT_DA: + ethtool_link_ksettings_add_link_mode(link_ksettings, + supported, Backplane); + ethtool_link_ksettings_add_link_mode(link_ksettings, + advertising, Backplane); + break; + case MLX5E_PORT_NONE: + case MLX5E_PORT_OTHER: + default: + break; + } +} + +static void get_speed_duplex(struct net_device *netdev, + u32 eth_proto_oper, bool force_legacy, + u16 data_rate_oper, + struct ethtool_link_ksettings *link_ksettings) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + u32 speed = SPEED_UNKNOWN; + u8 duplex = DUPLEX_UNKNOWN; + + if (!netif_carrier_ok(netdev)) + goto out; + + speed = mlx5e_port_ptys2speed(priv->mdev, eth_proto_oper, force_legacy); + if (!speed) { + if (data_rate_oper) + speed = 100 * data_rate_oper; + else + speed = SPEED_UNKNOWN; + goto out; + } + + duplex = DUPLEX_FULL; + +out: + link_ksettings->base.speed = speed; + link_ksettings->base.duplex = duplex; +} + +static void get_supported(struct mlx5_core_dev *mdev, u32 eth_proto_cap, + struct ethtool_link_ksettings *link_ksettings) +{ + unsigned long *supported = link_ksettings->link_modes.supported; + ptys2ethtool_supported_link(mdev, supported, eth_proto_cap); + + ethtool_link_ksettings_add_link_mode(link_ksettings, supported, Pause); +} + +static void get_advertising(u32 eth_proto_cap, u8 tx_pause, u8 rx_pause, + struct ethtool_link_ksettings *link_ksettings, + bool ext) +{ + unsigned long *advertising = link_ksettings->link_modes.advertising; + ptys2ethtool_adver_link(advertising, eth_proto_cap, ext); + + if (rx_pause) + ethtool_link_ksettings_add_link_mode(link_ksettings, advertising, Pause); + if (tx_pause ^ rx_pause) + ethtool_link_ksettings_add_link_mode(link_ksettings, advertising, Asym_Pause); +} + +static int ptys2connector_type[MLX5E_CONNECTOR_TYPE_NUMBER] = { + [MLX5E_PORT_UNKNOWN] = PORT_OTHER, + [MLX5E_PORT_NONE] = PORT_NONE, + [MLX5E_PORT_TP] = PORT_TP, + [MLX5E_PORT_AUI] = PORT_AUI, + [MLX5E_PORT_BNC] = PORT_BNC, + [MLX5E_PORT_MII] = PORT_MII, + [MLX5E_PORT_FIBRE] = PORT_FIBRE, + [MLX5E_PORT_DA] = PORT_DA, + [MLX5E_PORT_OTHER] = PORT_OTHER, + }; + +static u8 get_connector_port(struct mlx5_core_dev *mdev, u32 eth_proto, u8 connector_type) +{ + if (MLX5_CAP_PCAM_FEATURE(mdev, ptys_connector_type)) + return ptys2connector_type[connector_type]; + + if (eth_proto & + (MLX5E_PROT_MASK(MLX5E_10GBASE_SR) | + MLX5E_PROT_MASK(MLX5E_40GBASE_SR4) | + MLX5E_PROT_MASK(MLX5E_100GBASE_SR4) | + MLX5E_PROT_MASK(MLX5E_1000BASE_CX_SGMII))) { + return PORT_FIBRE; + } + + if (eth_proto & + (MLX5E_PROT_MASK(MLX5E_40GBASE_CR4) | + MLX5E_PROT_MASK(MLX5E_10GBASE_CR) | + MLX5E_PROT_MASK(MLX5E_100GBASE_CR4))) { + return PORT_DA; + } + + if (eth_proto & + (MLX5E_PROT_MASK(MLX5E_10GBASE_KX4) | + MLX5E_PROT_MASK(MLX5E_10GBASE_KR) | + MLX5E_PROT_MASK(MLX5E_40GBASE_KR4) | + MLX5E_PROT_MASK(MLX5E_100GBASE_KR4))) { + return PORT_NONE; + } + + return PORT_OTHER; +} + +static void get_lp_advertising(struct mlx5_core_dev *mdev, u32 eth_proto_lp, + struct ethtool_link_ksettings *link_ksettings) +{ + unsigned long *lp_advertising = link_ksettings->link_modes.lp_advertising; + bool ext = mlx5e_ptys_ext_supported(mdev); + + ptys2ethtool_adver_link(lp_advertising, eth_proto_lp, ext); +} + +int mlx5e_ethtool_get_link_ksettings(struct mlx5e_priv *priv, + struct ethtool_link_ksettings *link_ksettings) +{ + struct mlx5_core_dev *mdev = priv->mdev; + u32 out[MLX5_ST_SZ_DW(ptys_reg)] = {}; + u32 eth_proto_admin; + u8 an_disable_admin; + u16 data_rate_oper; + u32 eth_proto_oper; + u32 eth_proto_cap; + u8 connector_type; + u32 rx_pause = 0; + u32 tx_pause = 0; + u32 eth_proto_lp; + bool admin_ext; + u8 an_status; + bool ext; + int err; + + err = mlx5_query_port_ptys(mdev, out, sizeof(out), MLX5_PTYS_EN, 1); + if (err) { + netdev_err(priv->netdev, "%s: query port ptys failed: %d\n", + __func__, err); + goto err_query_regs; + } + ext = !!MLX5_GET_ETH_PROTO(ptys_reg, out, true, eth_proto_capability); + eth_proto_cap = MLX5_GET_ETH_PROTO(ptys_reg, out, ext, + eth_proto_capability); + eth_proto_admin = MLX5_GET_ETH_PROTO(ptys_reg, out, ext, + eth_proto_admin); + /* Fields: eth_proto_admin and ext_eth_proto_admin are + * mutually exclusive. Hence try reading legacy advertising + * when extended advertising is zero. + * admin_ext indicates which proto_admin (ext vs. legacy) + * should be read and interpreted + */ + admin_ext = ext; + if (ext && !eth_proto_admin) { + eth_proto_admin = MLX5_GET_ETH_PROTO(ptys_reg, out, false, + eth_proto_admin); + admin_ext = false; + } + + eth_proto_oper = MLX5_GET_ETH_PROTO(ptys_reg, out, admin_ext, + eth_proto_oper); + eth_proto_lp = MLX5_GET(ptys_reg, out, eth_proto_lp_advertise); + an_disable_admin = MLX5_GET(ptys_reg, out, an_disable_admin); + an_status = MLX5_GET(ptys_reg, out, an_status); + connector_type = MLX5_GET(ptys_reg, out, connector_type); + data_rate_oper = MLX5_GET(ptys_reg, out, data_rate_oper); + + mlx5_query_port_pause(mdev, &rx_pause, &tx_pause); + + ethtool_link_ksettings_zero_link_mode(link_ksettings, supported); + ethtool_link_ksettings_zero_link_mode(link_ksettings, advertising); + + get_supported(mdev, eth_proto_cap, link_ksettings); + get_advertising(eth_proto_admin, tx_pause, rx_pause, link_ksettings, + admin_ext); + get_speed_duplex(priv->netdev, eth_proto_oper, !admin_ext, + data_rate_oper, link_ksettings); + + eth_proto_oper = eth_proto_oper ? eth_proto_oper : eth_proto_cap; + connector_type = connector_type < MLX5E_CONNECTOR_TYPE_NUMBER ? + connector_type : MLX5E_PORT_UNKNOWN; + link_ksettings->base.port = get_connector_port(mdev, eth_proto_oper, connector_type); + ptys2ethtool_supported_advertised_port(mdev, link_ksettings, eth_proto_admin, + connector_type); + get_lp_advertising(mdev, eth_proto_lp, link_ksettings); + + if (an_status == MLX5_AN_COMPLETE) + ethtool_link_ksettings_add_link_mode(link_ksettings, + lp_advertising, Autoneg); + + link_ksettings->base.autoneg = an_disable_admin ? AUTONEG_DISABLE : + AUTONEG_ENABLE; + ethtool_link_ksettings_add_link_mode(link_ksettings, supported, + Autoneg); + + err = get_fec_supported_advertised(mdev, link_ksettings); + if (err) { + netdev_dbg(priv->netdev, "%s: FEC caps query failed: %d\n", + __func__, err); + err = 0; /* don't fail caps query because of FEC error */ + } + + if (!an_disable_admin) + ethtool_link_ksettings_add_link_mode(link_ksettings, + advertising, Autoneg); + +err_query_regs: + return err; +} + +static int mlx5e_get_link_ksettings(struct net_device *netdev, + struct ethtool_link_ksettings *link_ksettings) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + + return mlx5e_ethtool_get_link_ksettings(priv, link_ksettings); +} + +static int mlx5e_speed_validate(struct net_device *netdev, bool ext, + const unsigned long link_modes, u8 autoneg) +{ + /* Extended link-mode has no speed limitations. */ + if (ext) + return 0; + + if ((link_modes & MLX5E_PROT_MASK(MLX5E_56GBASE_R4)) && + autoneg != AUTONEG_ENABLE) { + netdev_err(netdev, "%s: 56G link speed requires autoneg enabled\n", + __func__); + return -EINVAL; + } + return 0; +} + +static u32 mlx5e_ethtool2ptys_adver_link(const unsigned long *link_modes) +{ + u32 i, ptys_modes = 0; + + for (i = 0; i < MLX5E_LINK_MODES_NUMBER; ++i) { + if (*ptys2legacy_ethtool_table[i].advertised == 0) + continue; + if (bitmap_intersects(ptys2legacy_ethtool_table[i].advertised, + link_modes, + __ETHTOOL_LINK_MODE_MASK_NBITS)) + ptys_modes |= MLX5E_PROT_MASK(i); + } + + return ptys_modes; +} + +static u32 mlx5e_ethtool2ptys_ext_adver_link(const unsigned long *link_modes) +{ + u32 i, ptys_modes = 0; + unsigned long modes[2]; + + for (i = 0; i < MLX5E_EXT_LINK_MODES_NUMBER; ++i) { + if (ptys2ext_ethtool_table[i].advertised[0] == 0 && + ptys2ext_ethtool_table[i].advertised[1] == 0) + continue; + memset(modes, 0, sizeof(modes)); + bitmap_and(modes, ptys2ext_ethtool_table[i].advertised, + link_modes, __ETHTOOL_LINK_MODE_MASK_NBITS); + + if (modes[0] == ptys2ext_ethtool_table[i].advertised[0] && + modes[1] == ptys2ext_ethtool_table[i].advertised[1]) + ptys_modes |= MLX5E_PROT_MASK(i); + } + return ptys_modes; +} + +static bool ext_link_mode_requested(const unsigned long *adver) +{ +#define MLX5E_MIN_PTYS_EXT_LINK_MODE_BIT ETHTOOL_LINK_MODE_50000baseKR_Full_BIT + int size = __ETHTOOL_LINK_MODE_MASK_NBITS - MLX5E_MIN_PTYS_EXT_LINK_MODE_BIT; + __ETHTOOL_DECLARE_LINK_MODE_MASK(modes) = {0,}; + + bitmap_set(modes, MLX5E_MIN_PTYS_EXT_LINK_MODE_BIT, size); + return bitmap_intersects(modes, adver, __ETHTOOL_LINK_MODE_MASK_NBITS); +} + +static bool ext_requested(u8 autoneg, const unsigned long *adver, bool ext_supported) +{ + bool ext_link_mode = ext_link_mode_requested(adver); + + return autoneg == AUTONEG_ENABLE ? ext_link_mode : ext_supported; +} + +int mlx5e_ethtool_set_link_ksettings(struct mlx5e_priv *priv, + const struct ethtool_link_ksettings *link_ksettings) +{ + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5e_port_eth_proto eproto; + const unsigned long *adver; + bool an_changes = false; + u8 an_disable_admin; + bool ext_supported; + u8 an_disable_cap; + bool an_disable; + u32 link_modes; + u8 an_status; + u8 autoneg; + u32 speed; + bool ext; + int err; + + u32 (*ethtool2ptys_adver_func)(const unsigned long *adver); + + adver = link_ksettings->link_modes.advertising; + autoneg = link_ksettings->base.autoneg; + speed = link_ksettings->base.speed; + + ext_supported = mlx5e_ptys_ext_supported(mdev); + ext = ext_requested(autoneg, adver, ext_supported); + if (!ext_supported && ext) + return -EOPNOTSUPP; + + ethtool2ptys_adver_func = ext ? mlx5e_ethtool2ptys_ext_adver_link : + mlx5e_ethtool2ptys_adver_link; + err = mlx5_port_query_eth_proto(mdev, 1, ext, &eproto); + if (err) { + netdev_err(priv->netdev, "%s: query port eth proto failed: %d\n", + __func__, err); + goto out; + } + link_modes = autoneg == AUTONEG_ENABLE ? ethtool2ptys_adver_func(adver) : + mlx5e_port_speed2linkmodes(mdev, speed, !ext); + + err = mlx5e_speed_validate(priv->netdev, ext, link_modes, autoneg); + if (err) + goto out; + + link_modes = link_modes & eproto.cap; + if (!link_modes) { + netdev_err(priv->netdev, "%s: Not supported link mode(s) requested", + __func__); + err = -EINVAL; + goto out; + } + + mlx5_port_query_eth_autoneg(mdev, &an_status, &an_disable_cap, + &an_disable_admin); + + an_disable = autoneg == AUTONEG_DISABLE; + an_changes = ((!an_disable && an_disable_admin) || + (an_disable && !an_disable_admin)); + + if (!an_changes && link_modes == eproto.admin) + goto out; + + mlx5_port_set_eth_ptys(mdev, an_disable, link_modes, ext); + mlx5_toggle_port_link(mdev); + +out: + return err; +} + +static int mlx5e_set_link_ksettings(struct net_device *netdev, + const struct ethtool_link_ksettings *link_ksettings) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + + return mlx5e_ethtool_set_link_ksettings(priv, link_ksettings); +} + +u32 mlx5e_ethtool_get_rxfh_key_size(struct mlx5e_priv *priv) +{ + return sizeof_field(struct mlx5e_rss_params_hash, toeplitz_hash_key); +} + +static u32 mlx5e_get_rxfh_key_size(struct net_device *netdev) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + + return mlx5e_ethtool_get_rxfh_key_size(priv); +} + +u32 mlx5e_ethtool_get_rxfh_indir_size(struct mlx5e_priv *priv) +{ + return MLX5E_INDIR_RQT_SIZE; +} + +static u32 mlx5e_get_rxfh_indir_size(struct net_device *netdev) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + + return mlx5e_ethtool_get_rxfh_indir_size(priv); +} + +static int mlx5e_get_rxfh_context(struct net_device *dev, u32 *indir, + u8 *key, u8 *hfunc, u32 rss_context) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + int err; + + mutex_lock(&priv->state_lock); + err = mlx5e_rx_res_rss_get_rxfh(priv->rx_res, rss_context, indir, key, hfunc); + mutex_unlock(&priv->state_lock); + return err; +} + +static int mlx5e_set_rxfh_context(struct net_device *dev, const u32 *indir, + const u8 *key, const u8 hfunc, + u32 *rss_context, bool delete) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + int err; + + mutex_lock(&priv->state_lock); + if (delete) { + err = mlx5e_rx_res_rss_destroy(priv->rx_res, *rss_context); + goto unlock; + } + + if (*rss_context == ETH_RXFH_CONTEXT_ALLOC) { + unsigned int count = priv->channels.params.num_channels; + + err = mlx5e_rx_res_rss_init(priv->rx_res, rss_context, count); + if (err) + goto unlock; + } + + err = mlx5e_rx_res_rss_set_rxfh(priv->rx_res, *rss_context, indir, key, + hfunc == ETH_RSS_HASH_NO_CHANGE ? NULL : &hfunc); + +unlock: + mutex_unlock(&priv->state_lock); + return err; +} + +int mlx5e_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key, + u8 *hfunc) +{ + return mlx5e_get_rxfh_context(netdev, indir, key, hfunc, 0); +} + +int mlx5e_set_rxfh(struct net_device *dev, const u32 *indir, + const u8 *key, const u8 hfunc) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + int err; + + mutex_lock(&priv->state_lock); + err = mlx5e_rx_res_rss_set_rxfh(priv->rx_res, 0, indir, key, + hfunc == ETH_RSS_HASH_NO_CHANGE ? NULL : &hfunc); + mutex_unlock(&priv->state_lock); + return err; +} + +#define MLX5E_PFC_PREVEN_AUTO_TOUT_MSEC 100 +#define MLX5E_PFC_PREVEN_TOUT_MAX_MSEC 8000 +#define MLX5E_PFC_PREVEN_MINOR_PRECENT 85 +#define MLX5E_PFC_PREVEN_TOUT_MIN_MSEC 80 +#define MLX5E_DEVICE_STALL_MINOR_WATERMARK(critical_tout) \ + max_t(u16, MLX5E_PFC_PREVEN_TOUT_MIN_MSEC, \ + (critical_tout * MLX5E_PFC_PREVEN_MINOR_PRECENT) / 100) + +static int mlx5e_get_pfc_prevention_tout(struct net_device *netdev, + u16 *pfc_prevention_tout) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5_core_dev *mdev = priv->mdev; + + if (!MLX5_CAP_PCAM_FEATURE((priv)->mdev, pfcc_mask) || + !MLX5_CAP_DEBUG((priv)->mdev, stall_detect)) + return -EOPNOTSUPP; + + return mlx5_query_port_stall_watermark(mdev, pfc_prevention_tout, NULL); +} + +static int mlx5e_set_pfc_prevention_tout(struct net_device *netdev, + u16 pfc_preven) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5_core_dev *mdev = priv->mdev; + u16 critical_tout; + u16 minor; + + if (!MLX5_CAP_PCAM_FEATURE((priv)->mdev, pfcc_mask) || + !MLX5_CAP_DEBUG((priv)->mdev, stall_detect)) + return -EOPNOTSUPP; + + critical_tout = (pfc_preven == PFC_STORM_PREVENTION_AUTO) ? + MLX5E_PFC_PREVEN_AUTO_TOUT_MSEC : + pfc_preven; + + if (critical_tout != PFC_STORM_PREVENTION_DISABLE && + (critical_tout > MLX5E_PFC_PREVEN_TOUT_MAX_MSEC || + critical_tout < MLX5E_PFC_PREVEN_TOUT_MIN_MSEC)) { + netdev_info(netdev, "%s: pfc prevention tout not in range (%d-%d)\n", + __func__, MLX5E_PFC_PREVEN_TOUT_MIN_MSEC, + MLX5E_PFC_PREVEN_TOUT_MAX_MSEC); + return -EINVAL; + } + + minor = MLX5E_DEVICE_STALL_MINOR_WATERMARK(critical_tout); + return mlx5_set_port_stall_watermark(mdev, critical_tout, + minor); +} + +static int mlx5e_get_tunable(struct net_device *dev, + const struct ethtool_tunable *tuna, + void *data) +{ + int err; + + switch (tuna->id) { + case ETHTOOL_PFC_PREVENTION_TOUT: + err = mlx5e_get_pfc_prevention_tout(dev, data); + break; + default: + err = -EINVAL; + break; + } + + return err; +} + +static int mlx5e_set_tunable(struct net_device *dev, + const struct ethtool_tunable *tuna, + const void *data) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + int err; + + mutex_lock(&priv->state_lock); + + switch (tuna->id) { + case ETHTOOL_PFC_PREVENTION_TOUT: + err = mlx5e_set_pfc_prevention_tout(dev, *(u16 *)data); + break; + default: + err = -EINVAL; + break; + } + + mutex_unlock(&priv->state_lock); + return err; +} + +static void mlx5e_get_pause_stats(struct net_device *netdev, + struct ethtool_pause_stats *pause_stats) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + + mlx5e_stats_pause_get(priv, pause_stats); +} + +void mlx5e_ethtool_get_pauseparam(struct mlx5e_priv *priv, + struct ethtool_pauseparam *pauseparam) +{ + struct mlx5_core_dev *mdev = priv->mdev; + int err; + + err = mlx5_query_port_pause(mdev, &pauseparam->rx_pause, + &pauseparam->tx_pause); + if (err) { + netdev_err(priv->netdev, "%s: mlx5_query_port_pause failed:0x%x\n", + __func__, err); + } +} + +static void mlx5e_get_pauseparam(struct net_device *netdev, + struct ethtool_pauseparam *pauseparam) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + + mlx5e_ethtool_get_pauseparam(priv, pauseparam); +} + +int mlx5e_ethtool_set_pauseparam(struct mlx5e_priv *priv, + struct ethtool_pauseparam *pauseparam) +{ + struct mlx5_core_dev *mdev = priv->mdev; + int err; + + if (!MLX5_CAP_GEN(mdev, vport_group_manager)) + return -EOPNOTSUPP; + + if (pauseparam->autoneg) + return -EINVAL; + + err = mlx5_set_port_pause(mdev, + pauseparam->rx_pause ? 1 : 0, + pauseparam->tx_pause ? 1 : 0); + if (err) { + netdev_err(priv->netdev, "%s: mlx5_set_port_pause failed:0x%x\n", + __func__, err); + } + + return err; +} + +static int mlx5e_set_pauseparam(struct net_device *netdev, + struct ethtool_pauseparam *pauseparam) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + + return mlx5e_ethtool_set_pauseparam(priv, pauseparam); +} + +int mlx5e_ethtool_get_ts_info(struct mlx5e_priv *priv, + struct ethtool_ts_info *info) +{ + struct mlx5_core_dev *mdev = priv->mdev; + + info->phc_index = mlx5_clock_get_ptp_index(mdev); + + if (!MLX5_CAP_GEN(priv->mdev, device_frequency_khz) || + info->phc_index == -1) + return 0; + + info->so_timestamping = SOF_TIMESTAMPING_TX_HARDWARE | + SOF_TIMESTAMPING_RX_HARDWARE | + SOF_TIMESTAMPING_RAW_HARDWARE; + + info->tx_types = BIT(HWTSTAMP_TX_OFF) | + BIT(HWTSTAMP_TX_ON); + + info->rx_filters = BIT(HWTSTAMP_FILTER_NONE) | + BIT(HWTSTAMP_FILTER_ALL); + + return 0; +} + +static int mlx5e_get_ts_info(struct net_device *dev, + struct ethtool_ts_info *info) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + + return mlx5e_ethtool_get_ts_info(priv, info); +} + +static __u32 mlx5e_get_wol_supported(struct mlx5_core_dev *mdev) +{ + __u32 ret = 0; + + if (MLX5_CAP_GEN(mdev, wol_g)) + ret |= WAKE_MAGIC; + + if (MLX5_CAP_GEN(mdev, wol_s)) + ret |= WAKE_MAGICSECURE; + + if (MLX5_CAP_GEN(mdev, wol_a)) + ret |= WAKE_ARP; + + if (MLX5_CAP_GEN(mdev, wol_b)) + ret |= WAKE_BCAST; + + if (MLX5_CAP_GEN(mdev, wol_m)) + ret |= WAKE_MCAST; + + if (MLX5_CAP_GEN(mdev, wol_u)) + ret |= WAKE_UCAST; + + if (MLX5_CAP_GEN(mdev, wol_p)) + ret |= WAKE_PHY; + + return ret; +} + +static __u32 mlx5e_reformat_wol_mode_mlx5_to_linux(u8 mode) +{ + __u32 ret = 0; + + if (mode & MLX5_WOL_MAGIC) + ret |= WAKE_MAGIC; + + if (mode & MLX5_WOL_SECURED_MAGIC) + ret |= WAKE_MAGICSECURE; + + if (mode & MLX5_WOL_ARP) + ret |= WAKE_ARP; + + if (mode & MLX5_WOL_BROADCAST) + ret |= WAKE_BCAST; + + if (mode & MLX5_WOL_MULTICAST) + ret |= WAKE_MCAST; + + if (mode & MLX5_WOL_UNICAST) + ret |= WAKE_UCAST; + + if (mode & MLX5_WOL_PHY_ACTIVITY) + ret |= WAKE_PHY; + + return ret; +} + +static u8 mlx5e_reformat_wol_mode_linux_to_mlx5(__u32 mode) +{ + u8 ret = 0; + + if (mode & WAKE_MAGIC) + ret |= MLX5_WOL_MAGIC; + + if (mode & WAKE_MAGICSECURE) + ret |= MLX5_WOL_SECURED_MAGIC; + + if (mode & WAKE_ARP) + ret |= MLX5_WOL_ARP; + + if (mode & WAKE_BCAST) + ret |= MLX5_WOL_BROADCAST; + + if (mode & WAKE_MCAST) + ret |= MLX5_WOL_MULTICAST; + + if (mode & WAKE_UCAST) + ret |= MLX5_WOL_UNICAST; + + if (mode & WAKE_PHY) + ret |= MLX5_WOL_PHY_ACTIVITY; + + return ret; +} + +static void mlx5e_get_wol(struct net_device *netdev, + struct ethtool_wolinfo *wol) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5_core_dev *mdev = priv->mdev; + u8 mlx5_wol_mode; + int err; + + memset(wol, 0, sizeof(*wol)); + + wol->supported = mlx5e_get_wol_supported(mdev); + if (!wol->supported) + return; + + err = mlx5_query_port_wol(mdev, &mlx5_wol_mode); + if (err) + return; + + wol->wolopts = mlx5e_reformat_wol_mode_mlx5_to_linux(mlx5_wol_mode); +} + +static int mlx5e_set_wol(struct net_device *netdev, struct ethtool_wolinfo *wol) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5_core_dev *mdev = priv->mdev; + __u32 wol_supported = mlx5e_get_wol_supported(mdev); + u32 mlx5_wol_mode; + + if (!wol_supported) + return -EOPNOTSUPP; + + if (wol->wolopts & ~wol_supported) + return -EINVAL; + + mlx5_wol_mode = mlx5e_reformat_wol_mode_linux_to_mlx5(wol->wolopts); + + return mlx5_set_port_wol(mdev, mlx5_wol_mode); +} + +static void mlx5e_get_fec_stats(struct net_device *netdev, + struct ethtool_fec_stats *fec_stats) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + + mlx5e_stats_fec_get(priv, fec_stats); +} + +static int mlx5e_get_fecparam(struct net_device *netdev, + struct ethtool_fecparam *fecparam) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5_core_dev *mdev = priv->mdev; + u16 fec_configured; + u32 fec_active; + int err; + + err = mlx5e_get_fec_mode(mdev, &fec_active, &fec_configured); + + if (err) + return err; + + fecparam->active_fec = pplm2ethtool_fec((unsigned long)fec_active, + sizeof(unsigned long) * BITS_PER_BYTE); + + if (!fecparam->active_fec) + return -EOPNOTSUPP; + + fecparam->fec = pplm2ethtool_fec((unsigned long)fec_configured, + sizeof(unsigned long) * BITS_PER_BYTE); + + return 0; +} + +static int mlx5e_set_fecparam(struct net_device *netdev, + struct ethtool_fecparam *fecparam) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5_core_dev *mdev = priv->mdev; + unsigned long fec_bitmap; + u16 fec_policy = 0; + int mode; + int err; + + bitmap_from_arr32(&fec_bitmap, &fecparam->fec, sizeof(fecparam->fec) * BITS_PER_BYTE); + if (bitmap_weight(&fec_bitmap, ETHTOOL_FEC_LLRS_BIT + 1) > 1) + return -EOPNOTSUPP; + + for (mode = 0; mode < ARRAY_SIZE(pplm_fec_2_ethtool); mode++) { + if (!(pplm_fec_2_ethtool[mode] & fecparam->fec)) + continue; + fec_policy |= (1 << mode); + break; + } + + err = mlx5e_set_fec_mode(mdev, fec_policy); + + if (err) + return err; + + mlx5_toggle_port_link(mdev); + + return 0; +} + +static u32 mlx5e_get_msglevel(struct net_device *dev) +{ + return ((struct mlx5e_priv *)netdev_priv(dev))->msglevel; +} + +static void mlx5e_set_msglevel(struct net_device *dev, u32 val) +{ + ((struct mlx5e_priv *)netdev_priv(dev))->msglevel = val; +} + +static int mlx5e_set_phys_id(struct net_device *dev, + enum ethtool_phys_id_state state) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5_core_dev *mdev = priv->mdev; + u16 beacon_duration; + + if (!MLX5_CAP_GEN(mdev, beacon_led)) + return -EOPNOTSUPP; + + switch (state) { + case ETHTOOL_ID_ACTIVE: + beacon_duration = MLX5_BEACON_DURATION_INF; + break; + case ETHTOOL_ID_INACTIVE: + beacon_duration = MLX5_BEACON_DURATION_OFF; + break; + default: + return -EOPNOTSUPP; + } + + return mlx5_set_port_beacon(mdev, beacon_duration); +} + +static int mlx5e_get_module_info(struct net_device *netdev, + struct ethtool_modinfo *modinfo) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5_core_dev *dev = priv->mdev; + int size_read = 0; + u8 data[4] = {0}; + + size_read = mlx5_query_module_eeprom(dev, 0, 2, data); + if (size_read < 2) + return -EIO; + + /* data[0] = identifier byte */ + switch (data[0]) { + case MLX5_MODULE_ID_QSFP: + modinfo->type = ETH_MODULE_SFF_8436; + modinfo->eeprom_len = ETH_MODULE_SFF_8436_MAX_LEN; + break; + case MLX5_MODULE_ID_QSFP_PLUS: + case MLX5_MODULE_ID_QSFP28: + /* data[1] = revision id */ + if (data[0] == MLX5_MODULE_ID_QSFP28 || data[1] >= 0x3) { + modinfo->type = ETH_MODULE_SFF_8636; + modinfo->eeprom_len = ETH_MODULE_SFF_8636_MAX_LEN; + } else { + modinfo->type = ETH_MODULE_SFF_8436; + modinfo->eeprom_len = ETH_MODULE_SFF_8436_MAX_LEN; + } + break; + case MLX5_MODULE_ID_SFP: + modinfo->type = ETH_MODULE_SFF_8472; + modinfo->eeprom_len = ETH_MODULE_SFF_8472_LEN; + break; + default: + netdev_err(priv->netdev, "%s: cable type not recognized:0x%x\n", + __func__, data[0]); + return -EINVAL; + } + + return 0; +} + +static int mlx5e_get_module_eeprom(struct net_device *netdev, + struct ethtool_eeprom *ee, + u8 *data) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5_core_dev *mdev = priv->mdev; + int offset = ee->offset; + int size_read; + int i = 0; + + if (!ee->len) + return -EINVAL; + + memset(data, 0, ee->len); + + while (i < ee->len) { + size_read = mlx5_query_module_eeprom(mdev, offset, ee->len - i, + data + i); + + if (!size_read) + /* Done reading */ + return 0; + + if (size_read < 0) { + netdev_err(priv->netdev, "%s: mlx5_query_eeprom failed:0x%x\n", + __func__, size_read); + return size_read; + } + + i += size_read; + offset += size_read; + } + + return 0; +} + +static int mlx5e_get_module_eeprom_by_page(struct net_device *netdev, + const struct ethtool_module_eeprom *page_data, + struct netlink_ext_ack *extack) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5_module_eeprom_query_params query; + struct mlx5_core_dev *mdev = priv->mdev; + u8 *data = page_data->data; + int size_read; + int i = 0; + + if (!page_data->length) + return -EINVAL; + + memset(data, 0, page_data->length); + + query.offset = page_data->offset; + query.i2c_address = page_data->i2c_address; + query.bank = page_data->bank; + query.page = page_data->page; + while (i < page_data->length) { + query.size = page_data->length - i; + size_read = mlx5_query_module_eeprom_by_page(mdev, &query, data + i); + + /* Done reading, return how many bytes was read */ + if (!size_read) + return i; + + if (size_read == -EINVAL) + return -EINVAL; + if (size_read < 0) { + netdev_err(priv->netdev, "%s: mlx5_query_module_eeprom_by_page failed:0x%x\n", + __func__, size_read); + return i; + } + + i += size_read; + query.offset += size_read; + } + + return i; +} + +int mlx5e_ethtool_flash_device(struct mlx5e_priv *priv, + struct ethtool_flash *flash) +{ + struct mlx5_core_dev *mdev = priv->mdev; + struct net_device *dev = priv->netdev; + const struct firmware *fw; + int err; + + if (flash->region != ETHTOOL_FLASH_ALL_REGIONS) + return -EOPNOTSUPP; + + err = request_firmware_direct(&fw, flash->data, &dev->dev); + if (err) + return err; + + dev_hold(dev); + rtnl_unlock(); + + err = mlx5_firmware_flash(mdev, fw, NULL); + release_firmware(fw); + + rtnl_lock(); + dev_put(dev); + return err; +} + +static int mlx5e_flash_device(struct net_device *dev, + struct ethtool_flash *flash) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + + return mlx5e_ethtool_flash_device(priv, flash); +} + +static int set_pflag_cqe_based_moder(struct net_device *netdev, bool enable, + bool is_rx_cq) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + u8 cq_period_mode, current_cq_period_mode; + struct mlx5e_params new_params; + + if (enable && !MLX5_CAP_GEN(priv->mdev, cq_period_start_from_cqe)) + return -EOPNOTSUPP; + + cq_period_mode = cqe_mode_to_period_mode(enable); + + current_cq_period_mode = is_rx_cq ? + priv->channels.params.rx_cq_moderation.cq_period_mode : + priv->channels.params.tx_cq_moderation.cq_period_mode; + + if (cq_period_mode == current_cq_period_mode) + return 0; + + new_params = priv->channels.params; + if (is_rx_cq) + mlx5e_set_rx_cq_mode_params(&new_params, cq_period_mode); + else + mlx5e_set_tx_cq_mode_params(&new_params, cq_period_mode); + + return mlx5e_safe_switch_params(priv, &new_params, NULL, NULL, true); +} + +static int set_pflag_tx_cqe_based_moder(struct net_device *netdev, bool enable) +{ + return set_pflag_cqe_based_moder(netdev, enable, false); +} + +static int set_pflag_rx_cqe_based_moder(struct net_device *netdev, bool enable) +{ + return set_pflag_cqe_based_moder(netdev, enable, true); +} + +int mlx5e_modify_rx_cqe_compression_locked(struct mlx5e_priv *priv, bool new_val, bool rx_filter) +{ + bool curr_val = MLX5E_GET_PFLAG(&priv->channels.params, MLX5E_PFLAG_RX_CQE_COMPRESS); + struct mlx5e_params new_params; + int err = 0; + + if (!MLX5_CAP_GEN(priv->mdev, cqe_compression)) + return new_val ? -EOPNOTSUPP : 0; + + if (curr_val == new_val) + return 0; + + if (new_val && !mlx5e_profile_feature_cap(priv->profile, PTP_RX) && rx_filter) { + netdev_err(priv->netdev, + "Profile doesn't support enabling of CQE compression while hardware time-stamping is enabled.\n"); + return -EINVAL; + } + + if (priv->channels.params.packet_merge.type == MLX5E_PACKET_MERGE_SHAMPO) { + netdev_warn(priv->netdev, "Can't set CQE compression with HW-GRO, disable it first.\n"); + return -EINVAL; + } + + new_params = priv->channels.params; + MLX5E_SET_PFLAG(&new_params, MLX5E_PFLAG_RX_CQE_COMPRESS, new_val); + if (rx_filter) + new_params.ptp_rx = new_val; + + if (new_params.ptp_rx == priv->channels.params.ptp_rx) + err = mlx5e_safe_switch_params(priv, &new_params, NULL, NULL, true); + else + err = mlx5e_safe_switch_params(priv, &new_params, mlx5e_ptp_rx_manage_fs_ctx, + &new_params.ptp_rx, true); + if (err) + return err; + + mlx5e_dbg(DRV, priv, "MLX5E: RxCqeCmprss was turned %s\n", + MLX5E_GET_PFLAG(&priv->channels.params, + MLX5E_PFLAG_RX_CQE_COMPRESS) ? "ON" : "OFF"); + + return 0; +} + +static int set_pflag_rx_cqe_compress(struct net_device *netdev, + bool enable) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5_core_dev *mdev = priv->mdev; + bool rx_filter; + int err; + + if (!MLX5_CAP_GEN(mdev, cqe_compression)) + return -EOPNOTSUPP; + + rx_filter = priv->tstamp.rx_filter != HWTSTAMP_FILTER_NONE; + err = mlx5e_modify_rx_cqe_compression_locked(priv, enable, rx_filter); + if (err) + return err; + + priv->channels.params.rx_cqe_compress_def = enable; + + return 0; +} + +static int set_pflag_rx_striding_rq(struct net_device *netdev, bool enable) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5e_params new_params; + + if (enable) { + /* Checking the regular RQ here; mlx5e_validate_xsk_param called + * from mlx5e_open_xsk will check for each XSK queue, and + * mlx5e_safe_switch_params will be reverted if any check fails. + */ + int err = mlx5e_mpwrq_validate_regular(mdev, &priv->channels.params); + + if (err) + return err; + } else if (priv->channels.params.packet_merge.type != MLX5E_PACKET_MERGE_NONE) { + netdev_warn(netdev, "Can't set legacy RQ with HW-GRO/LRO, disable them first\n"); + return -EINVAL; + } + + new_params = priv->channels.params; + + MLX5E_SET_PFLAG(&new_params, MLX5E_PFLAG_RX_STRIDING_RQ, enable); + mlx5e_set_rq_type(mdev, &new_params); + + return mlx5e_safe_switch_params(priv, &new_params, NULL, NULL, true); +} + +static int set_pflag_rx_no_csum_complete(struct net_device *netdev, bool enable) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5e_channels *channels = &priv->channels; + struct mlx5e_channel *c; + int i; + + if (!test_bit(MLX5E_STATE_OPENED, &priv->state) || + priv->channels.params.xdp_prog) + return 0; + + for (i = 0; i < channels->num; i++) { + c = channels->c[i]; + if (enable) + __set_bit(MLX5E_RQ_STATE_NO_CSUM_COMPLETE, &c->rq.state); + else + __clear_bit(MLX5E_RQ_STATE_NO_CSUM_COMPLETE, &c->rq.state); + } + + return 0; +} + +static int set_pflag_tx_mpwqe_common(struct net_device *netdev, u32 flag, bool enable) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5e_params new_params; + + if (enable && !mlx5e_tx_mpwqe_supported(mdev)) + return -EOPNOTSUPP; + + new_params = priv->channels.params; + + MLX5E_SET_PFLAG(&new_params, flag, enable); + + return mlx5e_safe_switch_params(priv, &new_params, NULL, NULL, true); +} + +static int set_pflag_xdp_tx_mpwqe(struct net_device *netdev, bool enable) +{ + return set_pflag_tx_mpwqe_common(netdev, MLX5E_PFLAG_XDP_TX_MPWQE, enable); +} + +static int set_pflag_skb_tx_mpwqe(struct net_device *netdev, bool enable) +{ + return set_pflag_tx_mpwqe_common(netdev, MLX5E_PFLAG_SKB_TX_MPWQE, enable); +} + +static int set_pflag_tx_port_ts(struct net_device *netdev, bool enable) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5e_params new_params; + int err; + + if (!MLX5_CAP_GEN(mdev, ts_cqe_to_dest_cqn)) + return -EOPNOTSUPP; + + /* Don't allow changing the PTP state if HTB offload is active, because + * the numeration of the QoS SQs will change, while per-queue qdiscs are + * attached. + */ + if (mlx5e_selq_is_htb_enabled(&priv->selq)) { + netdev_err(priv->netdev, "%s: HTB offload is active, cannot change the PTP state\n", + __func__); + return -EINVAL; + } + + new_params = priv->channels.params; + /* Don't allow enabling TX-port-TS if MQPRIO mode channel offload is + * active, since it defines explicitly which TC accepts the packet. + * This conflicts with TX-port-TS hijacking the PTP traffic to a specific + * HW TX-queue. + */ + if (enable && new_params.mqprio.mode == TC_MQPRIO_MODE_CHANNEL) { + netdev_err(priv->netdev, + "%s: MQPRIO mode channel offload is active, cannot set the TX-port-TS\n", + __func__); + return -EINVAL; + } + MLX5E_SET_PFLAG(&new_params, MLX5E_PFLAG_TX_PORT_TS, enable); + /* No need to verify SQ stop room as + * ptpsq.txqsq.stop_room <= generic_sq->stop_room, and both + * has the same log_sq_size. + */ + + err = mlx5e_safe_switch_params(priv, &new_params, + mlx5e_num_channels_changed_ctx, NULL, true); + if (!err) + priv->tx_ptp_opened = true; + + return err; +} + +static const struct pflag_desc mlx5e_priv_flags[MLX5E_NUM_PFLAGS] = { + { "rx_cqe_moder", set_pflag_rx_cqe_based_moder }, + { "tx_cqe_moder", set_pflag_tx_cqe_based_moder }, + { "rx_cqe_compress", set_pflag_rx_cqe_compress }, + { "rx_striding_rq", set_pflag_rx_striding_rq }, + { "rx_no_csum_complete", set_pflag_rx_no_csum_complete }, + { "xdp_tx_mpwqe", set_pflag_xdp_tx_mpwqe }, + { "skb_tx_mpwqe", set_pflag_skb_tx_mpwqe }, + { "tx_port_ts", set_pflag_tx_port_ts }, +}; + +static int mlx5e_handle_pflag(struct net_device *netdev, + u32 wanted_flags, + enum mlx5e_priv_flag flag) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + bool enable = !!(wanted_flags & BIT(flag)); + u32 changes = wanted_flags ^ priv->channels.params.pflags; + int err; + + if (!(changes & BIT(flag))) + return 0; + + err = mlx5e_priv_flags[flag].handler(netdev, enable); + if (err) { + netdev_err(netdev, "%s private flag '%s' failed err %d\n", + enable ? "Enable" : "Disable", mlx5e_priv_flags[flag].name, err); + return err; + } + + MLX5E_SET_PFLAG(&priv->channels.params, flag, enable); + return 0; +} + +static int mlx5e_set_priv_flags(struct net_device *netdev, u32 pflags) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + enum mlx5e_priv_flag pflag; + int err; + + mutex_lock(&priv->state_lock); + + for (pflag = 0; pflag < MLX5E_NUM_PFLAGS; pflag++) { + err = mlx5e_handle_pflag(netdev, pflags, pflag); + if (err) + break; + } + + mutex_unlock(&priv->state_lock); + + /* Need to fix some features.. */ + netdev_update_features(netdev); + + return err; +} + +static u32 mlx5e_get_priv_flags(struct net_device *netdev) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + + return priv->channels.params.pflags; +} + +int mlx5e_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info, + u32 *rule_locs) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + + /* ETHTOOL_GRXRINGS is needed by ethtool -x which is not part + * of rxnfc. We keep this logic out of mlx5e_ethtool_get_rxnfc, + * to avoid breaking "ethtool -x" when mlx5e_ethtool_get_rxnfc + * is compiled out via CONFIG_MLX5_EN_RXNFC=n. + */ + if (info->cmd == ETHTOOL_GRXRINGS) { + info->data = priv->channels.params.num_channels; + return 0; + } + + return mlx5e_ethtool_get_rxnfc(priv, info, rule_locs); +} + +int mlx5e_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + + return mlx5e_ethtool_set_rxnfc(priv, cmd); +} + +static int query_port_status_opcode(struct mlx5_core_dev *mdev, u32 *status_opcode) +{ + struct mlx5_ifc_pddr_troubleshooting_page_bits *pddr_troubleshooting_page; + u32 in[MLX5_ST_SZ_DW(pddr_reg)] = {}; + u32 out[MLX5_ST_SZ_DW(pddr_reg)]; + int err; + + MLX5_SET(pddr_reg, in, local_port, 1); + MLX5_SET(pddr_reg, in, page_select, + MLX5_PDDR_REG_PAGE_SELECT_TROUBLESHOOTING_INFO_PAGE); + + pddr_troubleshooting_page = MLX5_ADDR_OF(pddr_reg, in, page_data); + MLX5_SET(pddr_troubleshooting_page, pddr_troubleshooting_page, + group_opcode, MLX5_PDDR_REG_TRBLSH_GROUP_OPCODE_MONITOR); + err = mlx5_core_access_reg(mdev, in, sizeof(in), out, + sizeof(out), MLX5_REG_PDDR, 0, 0); + if (err) + return err; + + pddr_troubleshooting_page = MLX5_ADDR_OF(pddr_reg, out, page_data); + *status_opcode = MLX5_GET(pddr_troubleshooting_page, pddr_troubleshooting_page, + status_opcode); + return 0; +} + +struct mlx5e_ethtool_link_ext_state_opcode_mapping { + u32 status_opcode; + enum ethtool_link_ext_state link_ext_state; + u8 link_ext_substate; +}; + +static const struct mlx5e_ethtool_link_ext_state_opcode_mapping +mlx5e_link_ext_state_opcode_map[] = { + /* States relating to the autonegotiation or issues therein */ + {2, ETHTOOL_LINK_EXT_STATE_AUTONEG, + ETHTOOL_LINK_EXT_SUBSTATE_AN_NO_PARTNER_DETECTED}, + {3, ETHTOOL_LINK_EXT_STATE_AUTONEG, + ETHTOOL_LINK_EXT_SUBSTATE_AN_ACK_NOT_RECEIVED}, + {4, ETHTOOL_LINK_EXT_STATE_AUTONEG, + ETHTOOL_LINK_EXT_SUBSTATE_AN_NEXT_PAGE_EXCHANGE_FAILED}, + {36, ETHTOOL_LINK_EXT_STATE_AUTONEG, + ETHTOOL_LINK_EXT_SUBSTATE_AN_NO_PARTNER_DETECTED_FORCE_MODE}, + {38, ETHTOOL_LINK_EXT_STATE_AUTONEG, + ETHTOOL_LINK_EXT_SUBSTATE_AN_FEC_MISMATCH_DURING_OVERRIDE}, + {39, ETHTOOL_LINK_EXT_STATE_AUTONEG, + ETHTOOL_LINK_EXT_SUBSTATE_AN_NO_HCD}, + + /* Failure during link training */ + {5, ETHTOOL_LINK_EXT_STATE_LINK_TRAINING_FAILURE, + ETHTOOL_LINK_EXT_SUBSTATE_LT_KR_FRAME_LOCK_NOT_ACQUIRED}, + {6, ETHTOOL_LINK_EXT_STATE_LINK_TRAINING_FAILURE, + ETHTOOL_LINK_EXT_SUBSTATE_LT_KR_LINK_INHIBIT_TIMEOUT}, + {7, ETHTOOL_LINK_EXT_STATE_LINK_TRAINING_FAILURE, + ETHTOOL_LINK_EXT_SUBSTATE_LT_KR_LINK_PARTNER_DID_NOT_SET_RECEIVER_READY}, + {8, ETHTOOL_LINK_EXT_STATE_LINK_TRAINING_FAILURE, 0}, + {14, ETHTOOL_LINK_EXT_STATE_LINK_TRAINING_FAILURE, + ETHTOOL_LINK_EXT_SUBSTATE_LT_REMOTE_FAULT}, + + /* Logical mismatch in physical coding sublayer or forward error correction sublayer */ + {9, ETHTOOL_LINK_EXT_STATE_LINK_LOGICAL_MISMATCH, + ETHTOOL_LINK_EXT_SUBSTATE_LLM_PCS_DID_NOT_ACQUIRE_BLOCK_LOCK}, + {10, ETHTOOL_LINK_EXT_STATE_LINK_LOGICAL_MISMATCH, + ETHTOOL_LINK_EXT_SUBSTATE_LLM_PCS_DID_NOT_ACQUIRE_AM_LOCK}, + {11, ETHTOOL_LINK_EXT_STATE_LINK_LOGICAL_MISMATCH, + ETHTOOL_LINK_EXT_SUBSTATE_LLM_PCS_DID_NOT_GET_ALIGN_STATUS}, + {12, ETHTOOL_LINK_EXT_STATE_LINK_LOGICAL_MISMATCH, + ETHTOOL_LINK_EXT_SUBSTATE_LLM_FC_FEC_IS_NOT_LOCKED}, + {13, ETHTOOL_LINK_EXT_STATE_LINK_LOGICAL_MISMATCH, + ETHTOOL_LINK_EXT_SUBSTATE_LLM_RS_FEC_IS_NOT_LOCKED}, + + /* Signal integrity issues */ + {15, ETHTOOL_LINK_EXT_STATE_BAD_SIGNAL_INTEGRITY, 0}, + {17, ETHTOOL_LINK_EXT_STATE_BAD_SIGNAL_INTEGRITY, + ETHTOOL_LINK_EXT_SUBSTATE_BSI_LARGE_NUMBER_OF_PHYSICAL_ERRORS}, + {42, ETHTOOL_LINK_EXT_STATE_BAD_SIGNAL_INTEGRITY, + ETHTOOL_LINK_EXT_SUBSTATE_BSI_UNSUPPORTED_RATE}, + + /* No cable connected */ + {1024, ETHTOOL_LINK_EXT_STATE_NO_CABLE, 0}, + + /* Failure is related to cable, e.g., unsupported cable */ + {16, ETHTOOL_LINK_EXT_STATE_CABLE_ISSUE, + ETHTOOL_LINK_EXT_SUBSTATE_CI_UNSUPPORTED_CABLE}, + {20, ETHTOOL_LINK_EXT_STATE_CABLE_ISSUE, + ETHTOOL_LINK_EXT_SUBSTATE_CI_UNSUPPORTED_CABLE}, + {29, ETHTOOL_LINK_EXT_STATE_CABLE_ISSUE, + ETHTOOL_LINK_EXT_SUBSTATE_CI_UNSUPPORTED_CABLE}, + {1025, ETHTOOL_LINK_EXT_STATE_CABLE_ISSUE, + ETHTOOL_LINK_EXT_SUBSTATE_CI_UNSUPPORTED_CABLE}, + {1029, ETHTOOL_LINK_EXT_STATE_CABLE_ISSUE, + ETHTOOL_LINK_EXT_SUBSTATE_CI_UNSUPPORTED_CABLE}, + {1031, ETHTOOL_LINK_EXT_STATE_CABLE_ISSUE, 0}, + + /* Failure is related to EEPROM, e.g., failure during reading or parsing the data */ + {1027, ETHTOOL_LINK_EXT_STATE_EEPROM_ISSUE, 0}, + + /* Failure during calibration algorithm */ + {23, ETHTOOL_LINK_EXT_STATE_CALIBRATION_FAILURE, 0}, + + /* The hardware is not able to provide the power required from cable or module */ + {1032, ETHTOOL_LINK_EXT_STATE_POWER_BUDGET_EXCEEDED, 0}, + + /* The module is overheated */ + {1030, ETHTOOL_LINK_EXT_STATE_OVERHEAT, 0}, +}; + +static void +mlx5e_set_link_ext_state(struct mlx5e_ethtool_link_ext_state_opcode_mapping + link_ext_state_mapping, + struct ethtool_link_ext_state_info *link_ext_state_info) +{ + switch (link_ext_state_mapping.link_ext_state) { + case ETHTOOL_LINK_EXT_STATE_AUTONEG: + link_ext_state_info->autoneg = + link_ext_state_mapping.link_ext_substate; + break; + case ETHTOOL_LINK_EXT_STATE_LINK_TRAINING_FAILURE: + link_ext_state_info->link_training = + link_ext_state_mapping.link_ext_substate; + break; + case ETHTOOL_LINK_EXT_STATE_LINK_LOGICAL_MISMATCH: + link_ext_state_info->link_logical_mismatch = + link_ext_state_mapping.link_ext_substate; + break; + case ETHTOOL_LINK_EXT_STATE_BAD_SIGNAL_INTEGRITY: + link_ext_state_info->bad_signal_integrity = + link_ext_state_mapping.link_ext_substate; + break; + case ETHTOOL_LINK_EXT_STATE_CABLE_ISSUE: + link_ext_state_info->cable_issue = + link_ext_state_mapping.link_ext_substate; + break; + default: + break; + } + + link_ext_state_info->link_ext_state = link_ext_state_mapping.link_ext_state; +} + +static int +mlx5e_get_link_ext_state(struct net_device *dev, + struct ethtool_link_ext_state_info *link_ext_state_info) +{ + struct mlx5e_ethtool_link_ext_state_opcode_mapping link_ext_state_mapping; + struct mlx5e_priv *priv = netdev_priv(dev); + u32 status_opcode = 0; + int i; + + /* Exit without data if the interface state is OK, since no extended data is + * available in such case + */ + if (netif_carrier_ok(dev)) + return -ENODATA; + + if (query_port_status_opcode(priv->mdev, &status_opcode) || + !status_opcode) + return -ENODATA; + + for (i = 0; i < ARRAY_SIZE(mlx5e_link_ext_state_opcode_map); i++) { + link_ext_state_mapping = mlx5e_link_ext_state_opcode_map[i]; + if (link_ext_state_mapping.status_opcode == status_opcode) { + mlx5e_set_link_ext_state(link_ext_state_mapping, + link_ext_state_info); + return 0; + } + } + + return -ENODATA; +} + +static void mlx5e_get_eth_phy_stats(struct net_device *netdev, + struct ethtool_eth_phy_stats *phy_stats) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + + mlx5e_stats_eth_phy_get(priv, phy_stats); +} + +static void mlx5e_get_eth_mac_stats(struct net_device *netdev, + struct ethtool_eth_mac_stats *mac_stats) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + + mlx5e_stats_eth_mac_get(priv, mac_stats); +} + +static void mlx5e_get_eth_ctrl_stats(struct net_device *netdev, + struct ethtool_eth_ctrl_stats *ctrl_stats) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + + mlx5e_stats_eth_ctrl_get(priv, ctrl_stats); +} + +static void mlx5e_get_rmon_stats(struct net_device *netdev, + struct ethtool_rmon_stats *rmon_stats, + const struct ethtool_rmon_hist_range **ranges) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + + mlx5e_stats_rmon_get(priv, rmon_stats, ranges); +} + +const struct ethtool_ops mlx5e_ethtool_ops = { + .supported_coalesce_params = ETHTOOL_COALESCE_USECS | + ETHTOOL_COALESCE_MAX_FRAMES | + ETHTOOL_COALESCE_USE_ADAPTIVE | + ETHTOOL_COALESCE_USE_CQE, + .get_drvinfo = mlx5e_get_drvinfo, + .get_link = ethtool_op_get_link, + .get_link_ext_state = mlx5e_get_link_ext_state, + .get_strings = mlx5e_get_strings, + .get_sset_count = mlx5e_get_sset_count, + .get_ethtool_stats = mlx5e_get_ethtool_stats, + .get_ringparam = mlx5e_get_ringparam, + .set_ringparam = mlx5e_set_ringparam, + .get_channels = mlx5e_get_channels, + .set_channels = mlx5e_set_channels, + .get_coalesce = mlx5e_get_coalesce, + .set_coalesce = mlx5e_set_coalesce, + .get_link_ksettings = mlx5e_get_link_ksettings, + .set_link_ksettings = mlx5e_set_link_ksettings, + .get_rxfh_key_size = mlx5e_get_rxfh_key_size, + .get_rxfh_indir_size = mlx5e_get_rxfh_indir_size, + .get_rxfh = mlx5e_get_rxfh, + .set_rxfh = mlx5e_set_rxfh, + .get_rxfh_context = mlx5e_get_rxfh_context, + .set_rxfh_context = mlx5e_set_rxfh_context, + .get_rxnfc = mlx5e_get_rxnfc, + .set_rxnfc = mlx5e_set_rxnfc, + .get_tunable = mlx5e_get_tunable, + .set_tunable = mlx5e_set_tunable, + .get_pause_stats = mlx5e_get_pause_stats, + .get_pauseparam = mlx5e_get_pauseparam, + .set_pauseparam = mlx5e_set_pauseparam, + .get_ts_info = mlx5e_get_ts_info, + .set_phys_id = mlx5e_set_phys_id, + .get_wol = mlx5e_get_wol, + .set_wol = mlx5e_set_wol, + .get_module_info = mlx5e_get_module_info, + .get_module_eeprom = mlx5e_get_module_eeprom, + .get_module_eeprom_by_page = mlx5e_get_module_eeprom_by_page, + .flash_device = mlx5e_flash_device, + .get_priv_flags = mlx5e_get_priv_flags, + .set_priv_flags = mlx5e_set_priv_flags, + .self_test = mlx5e_self_test, + .get_msglevel = mlx5e_get_msglevel, + .set_msglevel = mlx5e_set_msglevel, + .get_fec_stats = mlx5e_get_fec_stats, + .get_fecparam = mlx5e_get_fecparam, + .set_fecparam = mlx5e_set_fecparam, + .get_eth_phy_stats = mlx5e_get_eth_phy_stats, + .get_eth_mac_stats = mlx5e_get_eth_mac_stats, + .get_eth_ctrl_stats = mlx5e_get_eth_ctrl_stats, + .get_rmon_stats = mlx5e_get_rmon_stats, +}; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c new file mode 100644 index 000000000..eba601487 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c @@ -0,0 +1,1582 @@ +/* + * Copyright (c) 2015, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/list.h> +#include <linux/ip.h> +#include <linux/ipv6.h> +#include <linux/tcp.h> +#include <linux/mlx5/fs.h> +#include <linux/mlx5/mpfs.h> +#include "en_tc.h" +#include "lib/mpfs.h" +#include "en/ptp.h" +#include "en/fs_ethtool.h" + +struct mlx5e_flow_steering { + struct work_struct set_rx_mode_work; + bool state_destroy; + bool vlan_strip_disable; + struct mlx5_core_dev *mdev; + struct net_device *netdev; + struct mlx5_flow_namespace *ns; + struct mlx5_flow_namespace *egress_ns; +#ifdef CONFIG_MLX5_EN_RXNFC + struct mlx5e_ethtool_steering *ethtool; +#endif + struct mlx5e_tc_table *tc; + struct mlx5e_promisc_table promisc; + struct mlx5e_vlan_table *vlan; + struct mlx5e_l2_table l2; + struct mlx5_ttc_table *ttc; + struct mlx5_ttc_table *inner_ttc; +#ifdef CONFIG_MLX5_EN_ARFS + struct mlx5e_arfs_tables *arfs; +#endif +#ifdef CONFIG_MLX5_EN_TLS + struct mlx5e_accel_fs_tcp *accel_tcp; +#endif + struct mlx5e_fs_udp *udp; + struct mlx5e_fs_any *any; + struct mlx5e_ptp_fs *ptp_fs; +}; + +static int mlx5e_add_l2_flow_rule(struct mlx5e_flow_steering *fs, + struct mlx5e_l2_rule *ai, int type); +static void mlx5e_del_l2_flow_rule(struct mlx5e_flow_steering *fs, + struct mlx5e_l2_rule *ai); + +enum { + MLX5E_FULLMATCH = 0, + MLX5E_ALLMULTI = 1, +}; + +enum { + MLX5E_UC = 0, + MLX5E_MC_IPV4 = 1, + MLX5E_MC_IPV6 = 2, + MLX5E_MC_OTHER = 3, +}; + +enum { + MLX5E_ACTION_NONE = 0, + MLX5E_ACTION_ADD = 1, + MLX5E_ACTION_DEL = 2, +}; + +struct mlx5e_l2_hash_node { + struct hlist_node hlist; + u8 action; + struct mlx5e_l2_rule ai; + bool mpfs; +}; + +static inline int mlx5e_hash_l2(const u8 *addr) +{ + return addr[5]; +} + +static void mlx5e_add_l2_to_hash(struct hlist_head *hash, const u8 *addr) +{ + struct mlx5e_l2_hash_node *hn; + int ix = mlx5e_hash_l2(addr); + int found = 0; + + hlist_for_each_entry(hn, &hash[ix], hlist) + if (ether_addr_equal_64bits(hn->ai.addr, addr)) { + found = 1; + break; + } + + if (found) { + hn->action = MLX5E_ACTION_NONE; + return; + } + + hn = kzalloc(sizeof(*hn), GFP_ATOMIC); + if (!hn) + return; + + ether_addr_copy(hn->ai.addr, addr); + hn->action = MLX5E_ACTION_ADD; + + hlist_add_head(&hn->hlist, &hash[ix]); +} + +static void mlx5e_del_l2_from_hash(struct mlx5e_l2_hash_node *hn) +{ + hlist_del(&hn->hlist); + kfree(hn); +} + +struct mlx5e_vlan_table { + struct mlx5e_flow_table ft; + DECLARE_BITMAP(active_cvlans, VLAN_N_VID); + DECLARE_BITMAP(active_svlans, VLAN_N_VID); + struct mlx5_flow_handle *active_cvlans_rule[VLAN_N_VID]; + struct mlx5_flow_handle *active_svlans_rule[VLAN_N_VID]; + struct mlx5_flow_handle *untagged_rule; + struct mlx5_flow_handle *any_cvlan_rule; + struct mlx5_flow_handle *any_svlan_rule; + struct mlx5_flow_handle *trap_rule; + bool cvlan_filter_disabled; +}; + +unsigned long *mlx5e_vlan_get_active_svlans(struct mlx5e_vlan_table *vlan) +{ + return vlan->active_svlans; +} + +struct mlx5_flow_table *mlx5e_vlan_get_flowtable(struct mlx5e_vlan_table *vlan) +{ + return vlan->ft.t; +} + +static int mlx5e_vport_context_update_vlans(struct mlx5e_flow_steering *fs) +{ + int max_list_size; + int list_size; + u16 *vlans; + int vlan; + int err; + int i; + + list_size = 0; + for_each_set_bit(vlan, fs->vlan->active_cvlans, VLAN_N_VID) + list_size++; + + max_list_size = 1 << MLX5_CAP_GEN(fs->mdev, log_max_vlan_list); + + if (list_size > max_list_size) { + fs_warn(fs, "netdev vlans list size (%d) > (%d) max vport list size, some vlans will be dropped\n", + list_size, max_list_size); + list_size = max_list_size; + } + + vlans = kvcalloc(list_size, sizeof(*vlans), GFP_KERNEL); + if (!vlans) + return -ENOMEM; + + i = 0; + for_each_set_bit(vlan, fs->vlan->active_cvlans, VLAN_N_VID) { + if (i >= list_size) + break; + vlans[i++] = vlan; + } + + err = mlx5_modify_nic_vport_vlans(fs->mdev, vlans, list_size); + if (err) + fs_err(fs, "Failed to modify vport vlans list err(%d)\n", + err); + + kvfree(vlans); + return err; +} + +enum mlx5e_vlan_rule_type { + MLX5E_VLAN_RULE_TYPE_UNTAGGED, + MLX5E_VLAN_RULE_TYPE_ANY_CTAG_VID, + MLX5E_VLAN_RULE_TYPE_ANY_STAG_VID, + MLX5E_VLAN_RULE_TYPE_MATCH_CTAG_VID, + MLX5E_VLAN_RULE_TYPE_MATCH_STAG_VID, +}; + +static int __mlx5e_add_vlan_rule(struct mlx5e_flow_steering *fs, + enum mlx5e_vlan_rule_type rule_type, + u16 vid, struct mlx5_flow_spec *spec) +{ + struct mlx5_flow_table *ft = fs->vlan->ft.t; + struct mlx5_flow_destination dest = {}; + struct mlx5_flow_handle **rule_p; + MLX5_DECLARE_FLOW_ACT(flow_act); + int err = 0; + + dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dest.ft = fs->l2.ft.t; + + spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + + switch (rule_type) { + case MLX5E_VLAN_RULE_TYPE_UNTAGGED: + /* cvlan_tag enabled in match criteria and + * disabled in match value means both S & C tags + * don't exist (untagged of both) + */ + rule_p = &fs->vlan->untagged_rule; + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, + outer_headers.cvlan_tag); + break; + case MLX5E_VLAN_RULE_TYPE_ANY_CTAG_VID: + rule_p = &fs->vlan->any_cvlan_rule; + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, + outer_headers.cvlan_tag); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.cvlan_tag, 1); + break; + case MLX5E_VLAN_RULE_TYPE_ANY_STAG_VID: + rule_p = &fs->vlan->any_svlan_rule; + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, + outer_headers.svlan_tag); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.svlan_tag, 1); + break; + case MLX5E_VLAN_RULE_TYPE_MATCH_STAG_VID: + rule_p = &fs->vlan->active_svlans_rule[vid]; + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, + outer_headers.svlan_tag); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.svlan_tag, 1); + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, + outer_headers.first_vid); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.first_vid, + vid); + break; + default: /* MLX5E_VLAN_RULE_TYPE_MATCH_CTAG_VID */ + rule_p = &fs->vlan->active_cvlans_rule[vid]; + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, + outer_headers.cvlan_tag); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.cvlan_tag, 1); + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, + outer_headers.first_vid); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.first_vid, + vid); + break; + } + + if (WARN_ONCE(*rule_p, "VLAN rule already exists type %d", rule_type)) + return 0; + + *rule_p = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1); + + if (IS_ERR(*rule_p)) { + err = PTR_ERR(*rule_p); + *rule_p = NULL; + fs_err(fs, "%s: add rule failed\n", __func__); + } + + return err; +} + +static int mlx5e_add_vlan_rule(struct mlx5e_flow_steering *fs, + enum mlx5e_vlan_rule_type rule_type, u16 vid) +{ + struct mlx5_flow_spec *spec; + int err = 0; + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) + return -ENOMEM; + + if (rule_type == MLX5E_VLAN_RULE_TYPE_MATCH_CTAG_VID) + mlx5e_vport_context_update_vlans(fs); + + err = __mlx5e_add_vlan_rule(fs, rule_type, vid, spec); + + kvfree(spec); + + return err; +} + +static void mlx5e_fs_del_vlan_rule(struct mlx5e_flow_steering *fs, + enum mlx5e_vlan_rule_type rule_type, u16 vid) +{ + switch (rule_type) { + case MLX5E_VLAN_RULE_TYPE_UNTAGGED: + if (fs->vlan->untagged_rule) { + mlx5_del_flow_rules(fs->vlan->untagged_rule); + fs->vlan->untagged_rule = NULL; + } + break; + case MLX5E_VLAN_RULE_TYPE_ANY_CTAG_VID: + if (fs->vlan->any_cvlan_rule) { + mlx5_del_flow_rules(fs->vlan->any_cvlan_rule); + fs->vlan->any_cvlan_rule = NULL; + } + break; + case MLX5E_VLAN_RULE_TYPE_ANY_STAG_VID: + if (fs->vlan->any_svlan_rule) { + mlx5_del_flow_rules(fs->vlan->any_svlan_rule); + fs->vlan->any_svlan_rule = NULL; + } + break; + case MLX5E_VLAN_RULE_TYPE_MATCH_STAG_VID: + if (fs->vlan->active_svlans_rule[vid]) { + mlx5_del_flow_rules(fs->vlan->active_svlans_rule[vid]); + fs->vlan->active_svlans_rule[vid] = NULL; + } + break; + case MLX5E_VLAN_RULE_TYPE_MATCH_CTAG_VID: + if (fs->vlan->active_cvlans_rule[vid]) { + mlx5_del_flow_rules(fs->vlan->active_cvlans_rule[vid]); + fs->vlan->active_cvlans_rule[vid] = NULL; + } + mlx5e_vport_context_update_vlans(fs); + break; + } +} + +static void mlx5e_fs_del_any_vid_rules(struct mlx5e_flow_steering *fs) +{ + mlx5e_fs_del_vlan_rule(fs, MLX5E_VLAN_RULE_TYPE_ANY_CTAG_VID, 0); + mlx5e_fs_del_vlan_rule(fs, MLX5E_VLAN_RULE_TYPE_ANY_STAG_VID, 0); +} + +static int mlx5e_fs_add_any_vid_rules(struct mlx5e_flow_steering *fs) +{ + int err; + + err = mlx5e_add_vlan_rule(fs, MLX5E_VLAN_RULE_TYPE_ANY_CTAG_VID, 0); + if (err) + return err; + + return mlx5e_add_vlan_rule(fs, MLX5E_VLAN_RULE_TYPE_ANY_STAG_VID, 0); +} + +static struct mlx5_flow_handle * +mlx5e_add_trap_rule(struct mlx5_flow_table *ft, int trap_id, int tir_num) +{ + struct mlx5_flow_destination dest = {}; + MLX5_DECLARE_FLOW_ACT(flow_act); + struct mlx5_flow_handle *rule; + struct mlx5_flow_spec *spec; + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) + return ERR_PTR(-ENOMEM); + spec->flow_context.flags |= FLOW_CONTEXT_HAS_TAG; + spec->flow_context.flow_tag = trap_id; + dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR; + dest.tir_num = tir_num; + + rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1); + kvfree(spec); + return rule; +} + +int mlx5e_add_vlan_trap(struct mlx5e_flow_steering *fs, int trap_id, int tir_num) +{ + struct mlx5_flow_table *ft = fs->vlan->ft.t; + struct mlx5_flow_handle *rule; + int err; + + rule = mlx5e_add_trap_rule(ft, trap_id, tir_num); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + fs->vlan->trap_rule = NULL; + fs_err(fs, "%s: add VLAN trap rule failed, err %d\n", + __func__, err); + return err; + } + fs->vlan->trap_rule = rule; + return 0; +} + +void mlx5e_remove_vlan_trap(struct mlx5e_flow_steering *fs) +{ + if (fs->vlan->trap_rule) { + mlx5_del_flow_rules(fs->vlan->trap_rule); + fs->vlan->trap_rule = NULL; + } +} + +int mlx5e_add_mac_trap(struct mlx5e_flow_steering *fs, int trap_id, int tir_num) +{ + struct mlx5_flow_table *ft = fs->l2.ft.t; + struct mlx5_flow_handle *rule; + int err; + + rule = mlx5e_add_trap_rule(ft, trap_id, tir_num); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + fs->l2.trap_rule = NULL; + fs_err(fs, "%s: add MAC trap rule failed, err %d\n", + __func__, err); + return err; + } + fs->l2.trap_rule = rule; + return 0; +} + +void mlx5e_remove_mac_trap(struct mlx5e_flow_steering *fs) +{ + if (fs->l2.trap_rule) { + mlx5_del_flow_rules(fs->l2.trap_rule); + fs->l2.trap_rule = NULL; + } +} + +void mlx5e_enable_cvlan_filter(struct mlx5e_flow_steering *fs, bool promisc) +{ + if (!fs->vlan->cvlan_filter_disabled) + return; + + fs->vlan->cvlan_filter_disabled = false; + if (promisc) + return; + mlx5e_fs_del_vlan_rule(fs, MLX5E_VLAN_RULE_TYPE_ANY_CTAG_VID, 0); +} + +void mlx5e_disable_cvlan_filter(struct mlx5e_flow_steering *fs, bool promisc) +{ + if (!fs->vlan || fs->vlan->cvlan_filter_disabled) + return; + + fs->vlan->cvlan_filter_disabled = true; + if (promisc) + return; + mlx5e_add_vlan_rule(fs, MLX5E_VLAN_RULE_TYPE_ANY_CTAG_VID, 0); +} + +static int mlx5e_vlan_rx_add_cvid(struct mlx5e_flow_steering *fs, u16 vid) +{ + int err; + + set_bit(vid, fs->vlan->active_cvlans); + + err = mlx5e_add_vlan_rule(fs, MLX5E_VLAN_RULE_TYPE_MATCH_CTAG_VID, vid); + if (err) + clear_bit(vid, fs->vlan->active_cvlans); + + return err; +} + +static int mlx5e_vlan_rx_add_svid(struct mlx5e_flow_steering *fs, + struct net_device *netdev, u16 vid) +{ + int err; + + set_bit(vid, fs->vlan->active_svlans); + + err = mlx5e_add_vlan_rule(fs, MLX5E_VLAN_RULE_TYPE_MATCH_STAG_VID, vid); + if (err) { + clear_bit(vid, fs->vlan->active_svlans); + return err; + } + + /* Need to fix some features.. */ + netdev_update_features(netdev); + return err; +} + +int mlx5e_fs_vlan_rx_add_vid(struct mlx5e_flow_steering *fs, + struct net_device *netdev, + __be16 proto, u16 vid) +{ + + if (!fs->vlan) { + fs_err(fs, "Vlan doesn't exist\n"); + return -EINVAL; + } + + if (be16_to_cpu(proto) == ETH_P_8021Q) + return mlx5e_vlan_rx_add_cvid(fs, vid); + else if (be16_to_cpu(proto) == ETH_P_8021AD) + return mlx5e_vlan_rx_add_svid(fs, netdev, vid); + + return -EOPNOTSUPP; +} + +int mlx5e_fs_vlan_rx_kill_vid(struct mlx5e_flow_steering *fs, + struct net_device *netdev, + __be16 proto, u16 vid) +{ + if (!fs->vlan) { + fs_err(fs, "Vlan doesn't exist\n"); + return -EINVAL; + } + + if (be16_to_cpu(proto) == ETH_P_8021Q) { + clear_bit(vid, fs->vlan->active_cvlans); + mlx5e_fs_del_vlan_rule(fs, MLX5E_VLAN_RULE_TYPE_MATCH_CTAG_VID, vid); + } else if (be16_to_cpu(proto) == ETH_P_8021AD) { + clear_bit(vid, fs->vlan->active_svlans); + mlx5e_fs_del_vlan_rule(fs, MLX5E_VLAN_RULE_TYPE_MATCH_STAG_VID, vid); + netdev_update_features(netdev); + } + + return 0; +} + +static void mlx5e_fs_add_vlan_rules(struct mlx5e_flow_steering *fs) +{ + int i; + + mlx5e_add_vlan_rule(fs, MLX5E_VLAN_RULE_TYPE_UNTAGGED, 0); + + for_each_set_bit(i, fs->vlan->active_cvlans, VLAN_N_VID) { + mlx5e_add_vlan_rule(fs, MLX5E_VLAN_RULE_TYPE_MATCH_CTAG_VID, i); + } + + for_each_set_bit(i, fs->vlan->active_svlans, VLAN_N_VID) + mlx5e_add_vlan_rule(fs, MLX5E_VLAN_RULE_TYPE_MATCH_STAG_VID, i); + + if (fs->vlan->cvlan_filter_disabled) + mlx5e_fs_add_any_vid_rules(fs); +} + +static void mlx5e_del_vlan_rules(struct mlx5e_flow_steering *fs) +{ + int i; + + mlx5e_fs_del_vlan_rule(fs, MLX5E_VLAN_RULE_TYPE_UNTAGGED, 0); + + for_each_set_bit(i, fs->vlan->active_cvlans, VLAN_N_VID) { + mlx5e_fs_del_vlan_rule(fs, MLX5E_VLAN_RULE_TYPE_MATCH_CTAG_VID, i); + } + + for_each_set_bit(i, fs->vlan->active_svlans, VLAN_N_VID) + mlx5e_fs_del_vlan_rule(fs, MLX5E_VLAN_RULE_TYPE_MATCH_STAG_VID, i); + + WARN_ON_ONCE(fs->state_destroy); + + mlx5e_remove_vlan_trap(fs); + + /* must be called after DESTROY bit is set and + * set_rx_mode is called and flushed + */ + if (fs->vlan->cvlan_filter_disabled) + mlx5e_fs_del_any_vid_rules(fs); +} + +#define mlx5e_for_each_hash_node(hn, tmp, hash, i) \ + for (i = 0; i < MLX5E_L2_ADDR_HASH_SIZE; i++) \ + hlist_for_each_entry_safe(hn, tmp, &hash[i], hlist) + +static void mlx5e_execute_l2_action(struct mlx5e_flow_steering *fs, + struct mlx5e_l2_hash_node *hn) +{ + u8 action = hn->action; + u8 mac_addr[ETH_ALEN]; + int l2_err = 0; + + ether_addr_copy(mac_addr, hn->ai.addr); + + switch (action) { + case MLX5E_ACTION_ADD: + mlx5e_add_l2_flow_rule(fs, &hn->ai, MLX5E_FULLMATCH); + if (!is_multicast_ether_addr(mac_addr)) { + l2_err = mlx5_mpfs_add_mac(fs->mdev, mac_addr); + hn->mpfs = !l2_err; + } + hn->action = MLX5E_ACTION_NONE; + break; + + case MLX5E_ACTION_DEL: + if (!is_multicast_ether_addr(mac_addr) && hn->mpfs) + l2_err = mlx5_mpfs_del_mac(fs->mdev, mac_addr); + mlx5e_del_l2_flow_rule(fs, &hn->ai); + mlx5e_del_l2_from_hash(hn); + break; + } + + if (l2_err) + fs_warn(fs, "MPFS, failed to %s mac %pM, err(%d)\n", + action == MLX5E_ACTION_ADD ? "add" : "del", + mac_addr, l2_err); +} + +static void mlx5e_sync_netdev_addr(struct mlx5e_flow_steering *fs, + struct net_device *netdev) +{ + struct netdev_hw_addr *ha; + + netif_addr_lock_bh(netdev); + + mlx5e_add_l2_to_hash(fs->l2.netdev_uc, netdev->dev_addr); + netdev_for_each_uc_addr(ha, netdev) + mlx5e_add_l2_to_hash(fs->l2.netdev_uc, ha->addr); + + netdev_for_each_mc_addr(ha, netdev) + mlx5e_add_l2_to_hash(fs->l2.netdev_mc, ha->addr); + + netif_addr_unlock_bh(netdev); +} + +static void mlx5e_fill_addr_array(struct mlx5e_flow_steering *fs, int list_type, + struct net_device *ndev, + u8 addr_array[][ETH_ALEN], int size) +{ + bool is_uc = (list_type == MLX5_NVPRT_LIST_TYPE_UC); + struct mlx5e_l2_hash_node *hn; + struct hlist_head *addr_list; + struct hlist_node *tmp; + int i = 0; + int hi; + + addr_list = is_uc ? fs->l2.netdev_uc : fs->l2.netdev_mc; + + if (is_uc) /* Make sure our own address is pushed first */ + ether_addr_copy(addr_array[i++], ndev->dev_addr); + else if (fs->l2.broadcast_enabled) + ether_addr_copy(addr_array[i++], ndev->broadcast); + + mlx5e_for_each_hash_node(hn, tmp, addr_list, hi) { + if (ether_addr_equal(ndev->dev_addr, hn->ai.addr)) + continue; + if (i >= size) + break; + ether_addr_copy(addr_array[i++], hn->ai.addr); + } +} + +static void mlx5e_vport_context_update_addr_list(struct mlx5e_flow_steering *fs, + struct net_device *netdev, + int list_type) +{ + bool is_uc = (list_type == MLX5_NVPRT_LIST_TYPE_UC); + struct mlx5e_l2_hash_node *hn; + u8 (*addr_array)[ETH_ALEN] = NULL; + struct hlist_head *addr_list; + struct hlist_node *tmp; + int max_size; + int size; + int err; + int hi; + + size = is_uc ? 0 : (fs->l2.broadcast_enabled ? 1 : 0); + max_size = is_uc ? + 1 << MLX5_CAP_GEN(fs->mdev, log_max_current_uc_list) : + 1 << MLX5_CAP_GEN(fs->mdev, log_max_current_mc_list); + + addr_list = is_uc ? fs->l2.netdev_uc : fs->l2.netdev_mc; + mlx5e_for_each_hash_node(hn, tmp, addr_list, hi) + size++; + + if (size > max_size) { + fs_warn(fs, "mdev %s list size (%d) > (%d) max vport list size, some addresses will be dropped\n", + is_uc ? "UC" : "MC", size, max_size); + size = max_size; + } + + if (size) { + addr_array = kcalloc(size, ETH_ALEN, GFP_KERNEL); + if (!addr_array) { + err = -ENOMEM; + goto out; + } + mlx5e_fill_addr_array(fs, list_type, netdev, addr_array, size); + } + + err = mlx5_modify_nic_vport_mac_list(fs->mdev, list_type, addr_array, size); +out: + if (err) + fs_err(fs, "Failed to modify vport %s list err(%d)\n", + is_uc ? "UC" : "MC", err); + kfree(addr_array); +} + +static void mlx5e_vport_context_update(struct mlx5e_flow_steering *fs, + struct net_device *netdev) +{ + struct mlx5e_l2_table *ea = &fs->l2; + + mlx5e_vport_context_update_addr_list(fs, netdev, MLX5_NVPRT_LIST_TYPE_UC); + mlx5e_vport_context_update_addr_list(fs, netdev, MLX5_NVPRT_LIST_TYPE_MC); + mlx5_modify_nic_vport_promisc(fs->mdev, 0, + ea->allmulti_enabled, + ea->promisc_enabled); +} + +static void mlx5e_apply_netdev_addr(struct mlx5e_flow_steering *fs) +{ + struct mlx5e_l2_hash_node *hn; + struct hlist_node *tmp; + int i; + + mlx5e_for_each_hash_node(hn, tmp, fs->l2.netdev_uc, i) + mlx5e_execute_l2_action(fs, hn); + + mlx5e_for_each_hash_node(hn, tmp, fs->l2.netdev_mc, i) + mlx5e_execute_l2_action(fs, hn); +} + +static void mlx5e_handle_netdev_addr(struct mlx5e_flow_steering *fs, + struct net_device *netdev) +{ + struct mlx5e_l2_hash_node *hn; + struct hlist_node *tmp; + int i; + + mlx5e_for_each_hash_node(hn, tmp, fs->l2.netdev_uc, i) + hn->action = MLX5E_ACTION_DEL; + mlx5e_for_each_hash_node(hn, tmp, fs->l2.netdev_mc, i) + hn->action = MLX5E_ACTION_DEL; + + if (fs->state_destroy) + mlx5e_sync_netdev_addr(fs, netdev); + + mlx5e_apply_netdev_addr(fs); +} + +#define MLX5E_PROMISC_GROUP0_SIZE BIT(0) +#define MLX5E_PROMISC_TABLE_SIZE MLX5E_PROMISC_GROUP0_SIZE + +static int mlx5e_add_promisc_rule(struct mlx5e_flow_steering *fs) +{ + struct mlx5_flow_table *ft = fs->promisc.ft.t; + struct mlx5_flow_destination dest = {}; + struct mlx5_flow_handle **rule_p; + MLX5_DECLARE_FLOW_ACT(flow_act); + struct mlx5_flow_spec *spec; + int err = 0; + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) + return -ENOMEM; + dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dest.ft = mlx5_get_ttc_flow_table(fs->ttc); + + rule_p = &fs->promisc.rule; + *rule_p = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1); + if (IS_ERR(*rule_p)) { + err = PTR_ERR(*rule_p); + *rule_p = NULL; + fs_err(fs, "%s: add promiscuous rule failed\n", __func__); + } + kvfree(spec); + return err; +} + +static int mlx5e_create_promisc_table(struct mlx5e_flow_steering *fs) +{ + struct mlx5e_flow_table *ft = &fs->promisc.ft; + struct mlx5_flow_table_attr ft_attr = {}; + int err; + + ft_attr.max_fte = MLX5E_PROMISC_TABLE_SIZE; + ft_attr.autogroup.max_num_groups = 1; + ft_attr.level = MLX5E_PROMISC_FT_LEVEL; + ft_attr.prio = MLX5E_NIC_PRIO; + + ft->t = mlx5_create_auto_grouped_flow_table(fs->ns, &ft_attr); + if (IS_ERR(ft->t)) { + err = PTR_ERR(ft->t); + ft->t = NULL; + fs_err(fs, "fail to create promisc table err=%d\n", err); + return err; + } + + err = mlx5e_add_promisc_rule(fs); + if (err) + goto err_destroy_promisc_table; + + return 0; + +err_destroy_promisc_table: + mlx5_destroy_flow_table(ft->t); + ft->t = NULL; + + return err; +} + +static void mlx5e_del_promisc_rule(struct mlx5e_flow_steering *fs) +{ + if (WARN(!fs->promisc.rule, "Trying to remove non-existing promiscuous rule")) + return; + mlx5_del_flow_rules(fs->promisc.rule); + fs->promisc.rule = NULL; +} + +static void mlx5e_destroy_promisc_table(struct mlx5e_flow_steering *fs) +{ + if (!fs->promisc.ft.t) + return; + mlx5e_del_promisc_rule(fs); + mlx5_destroy_flow_table(fs->promisc.ft.t); + fs->promisc.ft.t = NULL; +} + +void mlx5e_fs_set_rx_mode_work(struct mlx5e_flow_steering *fs, + struct net_device *netdev) +{ + struct mlx5e_l2_table *ea = &fs->l2; + + bool rx_mode_enable = fs->state_destroy; + bool promisc_enabled = rx_mode_enable && (netdev->flags & IFF_PROMISC); + bool allmulti_enabled = rx_mode_enable && (netdev->flags & IFF_ALLMULTI); + bool broadcast_enabled = rx_mode_enable; + + bool enable_promisc = !ea->promisc_enabled && promisc_enabled; + bool disable_promisc = ea->promisc_enabled && !promisc_enabled; + bool enable_allmulti = !ea->allmulti_enabled && allmulti_enabled; + bool disable_allmulti = ea->allmulti_enabled && !allmulti_enabled; + bool enable_broadcast = !ea->broadcast_enabled && broadcast_enabled; + bool disable_broadcast = ea->broadcast_enabled && !broadcast_enabled; + int err; + + if (enable_promisc) { + err = mlx5e_create_promisc_table(fs); + if (err) + enable_promisc = false; + if (!fs->vlan_strip_disable && !err) + fs_warn_once(fs, + "S-tagged traffic will be dropped while C-tag vlan stripping is enabled\n"); + } + if (enable_allmulti) + mlx5e_add_l2_flow_rule(fs, &ea->allmulti, MLX5E_ALLMULTI); + if (enable_broadcast) + mlx5e_add_l2_flow_rule(fs, &ea->broadcast, MLX5E_FULLMATCH); + + mlx5e_handle_netdev_addr(fs, netdev); + + if (disable_broadcast) + mlx5e_del_l2_flow_rule(fs, &ea->broadcast); + if (disable_allmulti) + mlx5e_del_l2_flow_rule(fs, &ea->allmulti); + if (disable_promisc) + mlx5e_destroy_promisc_table(fs); + + ea->promisc_enabled = promisc_enabled; + ea->allmulti_enabled = allmulti_enabled; + ea->broadcast_enabled = broadcast_enabled; + + mlx5e_vport_context_update(fs, netdev); +} + +static void mlx5e_destroy_groups(struct mlx5e_flow_table *ft) +{ + int i; + + for (i = ft->num_groups - 1; i >= 0; i--) { + if (!IS_ERR_OR_NULL(ft->g[i])) + mlx5_destroy_flow_group(ft->g[i]); + ft->g[i] = NULL; + } + ft->num_groups = 0; +} + +void mlx5e_fs_init_l2_addr(struct mlx5e_flow_steering *fs, struct net_device *netdev) +{ + ether_addr_copy(fs->l2.broadcast.addr, netdev->broadcast); +} + +void mlx5e_destroy_flow_table(struct mlx5e_flow_table *ft) +{ + mlx5e_destroy_groups(ft); + kfree(ft->g); + mlx5_destroy_flow_table(ft->t); + ft->t = NULL; +} + +static void mlx5e_set_inner_ttc_params(struct mlx5e_flow_steering *fs, + struct mlx5e_rx_res *rx_res, + struct ttc_params *ttc_params) +{ + struct mlx5_flow_table_attr *ft_attr = &ttc_params->ft_attr; + int tt; + + memset(ttc_params, 0, sizeof(*ttc_params)); + ttc_params->ns = mlx5_get_flow_namespace(fs->mdev, + MLX5_FLOW_NAMESPACE_KERNEL); + ft_attr->level = MLX5E_INNER_TTC_FT_LEVEL; + ft_attr->prio = MLX5E_NIC_PRIO; + + for (tt = 0; tt < MLX5_NUM_TT; tt++) { + ttc_params->dests[tt].type = MLX5_FLOW_DESTINATION_TYPE_TIR; + ttc_params->dests[tt].tir_num = + tt == MLX5_TT_ANY ? + mlx5e_rx_res_get_tirn_direct(rx_res, 0) : + mlx5e_rx_res_get_tirn_rss_inner(rx_res, + tt); + } +} + +void mlx5e_set_ttc_params(struct mlx5e_flow_steering *fs, + struct mlx5e_rx_res *rx_res, + struct ttc_params *ttc_params, bool tunnel) + +{ + struct mlx5_flow_table_attr *ft_attr = &ttc_params->ft_attr; + int tt; + + memset(ttc_params, 0, sizeof(*ttc_params)); + ttc_params->ns = mlx5_get_flow_namespace(fs->mdev, + MLX5_FLOW_NAMESPACE_KERNEL); + ft_attr->level = MLX5E_TTC_FT_LEVEL; + ft_attr->prio = MLX5E_NIC_PRIO; + + for (tt = 0; tt < MLX5_NUM_TT; tt++) { + ttc_params->dests[tt].type = MLX5_FLOW_DESTINATION_TYPE_TIR; + ttc_params->dests[tt].tir_num = + tt == MLX5_TT_ANY ? + mlx5e_rx_res_get_tirn_direct(rx_res, 0) : + mlx5e_rx_res_get_tirn_rss(rx_res, tt); + } + + ttc_params->inner_ttc = tunnel; + if (!tunnel || !mlx5_tunnel_inner_ft_supported(fs->mdev)) + return; + + for (tt = 0; tt < MLX5_NUM_TUNNEL_TT; tt++) { + ttc_params->tunnel_dests[tt].type = + MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + ttc_params->tunnel_dests[tt].ft = + mlx5_get_ttc_flow_table(fs->inner_ttc); + } +} + +static void mlx5e_del_l2_flow_rule(struct mlx5e_flow_steering *fs, + struct mlx5e_l2_rule *ai) +{ + if (!IS_ERR_OR_NULL(ai->rule)) { + mlx5_del_flow_rules(ai->rule); + ai->rule = NULL; + } +} + +static int mlx5e_add_l2_flow_rule(struct mlx5e_flow_steering *fs, + struct mlx5e_l2_rule *ai, int type) +{ + struct mlx5_flow_table *ft = fs->l2.ft.t; + struct mlx5_flow_destination dest = {}; + MLX5_DECLARE_FLOW_ACT(flow_act); + struct mlx5_flow_spec *spec; + int err = 0; + u8 *mc_dmac; + u8 *mv_dmac; + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) + return -ENOMEM; + + mc_dmac = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + outer_headers.dmac_47_16); + mv_dmac = MLX5_ADDR_OF(fte_match_param, spec->match_value, + outer_headers.dmac_47_16); + + dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dest.ft = mlx5_get_ttc_flow_table(fs->ttc); + + switch (type) { + case MLX5E_FULLMATCH: + spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + eth_broadcast_addr(mc_dmac); + ether_addr_copy(mv_dmac, ai->addr); + break; + + case MLX5E_ALLMULTI: + spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + mc_dmac[0] = 0x01; + mv_dmac[0] = 0x01; + break; + } + + ai->rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1); + if (IS_ERR(ai->rule)) { + fs_err(fs, "%s: add l2 rule(mac:%pM) failed\n", __func__, mv_dmac); + err = PTR_ERR(ai->rule); + ai->rule = NULL; + } + + kvfree(spec); + + return err; +} + +#define MLX5E_NUM_L2_GROUPS 3 +#define MLX5E_L2_GROUP1_SIZE BIT(15) +#define MLX5E_L2_GROUP2_SIZE BIT(0) +#define MLX5E_L2_GROUP_TRAP_SIZE BIT(0) /* must be last */ +#define MLX5E_L2_TABLE_SIZE (MLX5E_L2_GROUP1_SIZE +\ + MLX5E_L2_GROUP2_SIZE +\ + MLX5E_L2_GROUP_TRAP_SIZE) +static int mlx5e_create_l2_table_groups(struct mlx5e_l2_table *l2_table) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5e_flow_table *ft = &l2_table->ft; + int ix = 0; + u8 *mc_dmac; + u32 *in; + int err; + u8 *mc; + + ft->g = kcalloc(MLX5E_NUM_L2_GROUPS, sizeof(*ft->g), GFP_KERNEL); + if (!ft->g) + return -ENOMEM; + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) { + kfree(ft->g); + return -ENOMEM; + } + + mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria); + mc_dmac = MLX5_ADDR_OF(fte_match_param, mc, + outer_headers.dmac_47_16); + /* Flow Group for full match */ + eth_broadcast_addr(mc_dmac); + MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5E_L2_GROUP1_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); + if (IS_ERR(ft->g[ft->num_groups])) + goto err_destroy_groups; + ft->num_groups++; + + /* Flow Group for allmulti */ + eth_zero_addr(mc_dmac); + mc_dmac[0] = 0x01; + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5E_L2_GROUP2_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); + if (IS_ERR(ft->g[ft->num_groups])) + goto err_destroy_groups; + ft->num_groups++; + + /* Flow Group for l2 traps */ + memset(in, 0, inlen); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5E_L2_GROUP_TRAP_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); + if (IS_ERR(ft->g[ft->num_groups])) + goto err_destroy_groups; + ft->num_groups++; + + kvfree(in); + return 0; + +err_destroy_groups: + err = PTR_ERR(ft->g[ft->num_groups]); + ft->g[ft->num_groups] = NULL; + mlx5e_destroy_groups(ft); + kvfree(in); + kfree(ft->g); + + return err; +} + +static void mlx5e_destroy_l2_table(struct mlx5e_flow_steering *fs) +{ + mlx5e_destroy_flow_table(&fs->l2.ft); +} + +static int mlx5e_create_l2_table(struct mlx5e_flow_steering *fs) +{ + struct mlx5e_l2_table *l2_table = &fs->l2; + struct mlx5e_flow_table *ft = &l2_table->ft; + struct mlx5_flow_table_attr ft_attr = {}; + int err; + + ft->num_groups = 0; + + ft_attr.max_fte = MLX5E_L2_TABLE_SIZE; + ft_attr.level = MLX5E_L2_FT_LEVEL; + ft_attr.prio = MLX5E_NIC_PRIO; + + ft->t = mlx5_create_flow_table(fs->ns, &ft_attr); + if (IS_ERR(ft->t)) { + err = PTR_ERR(ft->t); + ft->t = NULL; + return err; + } + + err = mlx5e_create_l2_table_groups(l2_table); + if (err) + goto err_destroy_flow_table; + + return 0; + +err_destroy_flow_table: + mlx5_destroy_flow_table(ft->t); + ft->t = NULL; + + return err; +} + +#define MLX5E_NUM_VLAN_GROUPS 5 +#define MLX5E_VLAN_GROUP0_SIZE BIT(12) +#define MLX5E_VLAN_GROUP1_SIZE BIT(12) +#define MLX5E_VLAN_GROUP2_SIZE BIT(1) +#define MLX5E_VLAN_GROUP3_SIZE BIT(0) +#define MLX5E_VLAN_GROUP_TRAP_SIZE BIT(0) /* must be last */ +#define MLX5E_VLAN_TABLE_SIZE (MLX5E_VLAN_GROUP0_SIZE +\ + MLX5E_VLAN_GROUP1_SIZE +\ + MLX5E_VLAN_GROUP2_SIZE +\ + MLX5E_VLAN_GROUP3_SIZE +\ + MLX5E_VLAN_GROUP_TRAP_SIZE) + +static int __mlx5e_create_vlan_table_groups(struct mlx5e_flow_table *ft, u32 *in, + int inlen) +{ + int err; + int ix = 0; + u8 *mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria); + + memset(in, 0, inlen); + MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.cvlan_tag); + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.first_vid); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5E_VLAN_GROUP0_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); + if (IS_ERR(ft->g[ft->num_groups])) + goto err_destroy_groups; + ft->num_groups++; + + memset(in, 0, inlen); + MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.svlan_tag); + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.first_vid); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5E_VLAN_GROUP1_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); + if (IS_ERR(ft->g[ft->num_groups])) + goto err_destroy_groups; + ft->num_groups++; + + memset(in, 0, inlen); + MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.cvlan_tag); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5E_VLAN_GROUP2_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); + if (IS_ERR(ft->g[ft->num_groups])) + goto err_destroy_groups; + ft->num_groups++; + + memset(in, 0, inlen); + MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.svlan_tag); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5E_VLAN_GROUP3_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); + if (IS_ERR(ft->g[ft->num_groups])) + goto err_destroy_groups; + ft->num_groups++; + + memset(in, 0, inlen); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5E_VLAN_GROUP_TRAP_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); + if (IS_ERR(ft->g[ft->num_groups])) + goto err_destroy_groups; + ft->num_groups++; + + return 0; + +err_destroy_groups: + err = PTR_ERR(ft->g[ft->num_groups]); + ft->g[ft->num_groups] = NULL; + mlx5e_destroy_groups(ft); + + return err; +} + +static int mlx5e_create_vlan_table_groups(struct mlx5e_flow_table *ft) +{ + u32 *in; + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + int err; + + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + err = __mlx5e_create_vlan_table_groups(ft, in, inlen); + + kvfree(in); + return err; +} + +static int mlx5e_fs_create_vlan_table(struct mlx5e_flow_steering *fs) +{ + struct mlx5_flow_table_attr ft_attr = {}; + struct mlx5e_flow_table *ft; + int err; + + ft = &fs->vlan->ft; + ft->num_groups = 0; + + ft_attr.max_fte = MLX5E_VLAN_TABLE_SIZE; + ft_attr.level = MLX5E_VLAN_FT_LEVEL; + ft_attr.prio = MLX5E_NIC_PRIO; + + ft->t = mlx5_create_flow_table(fs->ns, &ft_attr); + if (IS_ERR(ft->t)) + return PTR_ERR(ft->t); + + ft->g = kcalloc(MLX5E_NUM_VLAN_GROUPS, sizeof(*ft->g), GFP_KERNEL); + if (!ft->g) { + err = -ENOMEM; + goto err_destroy_vlan_table; + } + + err = mlx5e_create_vlan_table_groups(ft); + if (err) + goto err_free_g; + + mlx5e_fs_add_vlan_rules(fs); + + return 0; + +err_free_g: + kfree(ft->g); +err_destroy_vlan_table: + mlx5_destroy_flow_table(ft->t); + + return err; +} + +static void mlx5e_destroy_vlan_table(struct mlx5e_flow_steering *fs) +{ + mlx5e_del_vlan_rules(fs); + mlx5e_destroy_flow_table(&fs->vlan->ft); +} + +static void mlx5e_destroy_inner_ttc_table(struct mlx5e_flow_steering *fs) +{ + if (!mlx5_tunnel_inner_ft_supported(fs->mdev)) + return; + mlx5_destroy_ttc_table(fs->inner_ttc); +} + +void mlx5e_destroy_ttc_table(struct mlx5e_flow_steering *fs) +{ + mlx5_destroy_ttc_table(fs->ttc); +} + +static int mlx5e_create_inner_ttc_table(struct mlx5e_flow_steering *fs, + struct mlx5e_rx_res *rx_res) +{ + struct ttc_params ttc_params = {}; + + if (!mlx5_tunnel_inner_ft_supported(fs->mdev)) + return 0; + + mlx5e_set_inner_ttc_params(fs, rx_res, &ttc_params); + fs->inner_ttc = mlx5_create_inner_ttc_table(fs->mdev, + &ttc_params); + if (IS_ERR(fs->inner_ttc)) + return PTR_ERR(fs->inner_ttc); + return 0; +} + +int mlx5e_create_ttc_table(struct mlx5e_flow_steering *fs, + struct mlx5e_rx_res *rx_res) +{ + struct ttc_params ttc_params = {}; + + mlx5e_set_ttc_params(fs, rx_res, &ttc_params, true); + fs->ttc = mlx5_create_ttc_table(fs->mdev, &ttc_params); + if (IS_ERR(fs->ttc)) + return PTR_ERR(fs->ttc); + return 0; +} + +int mlx5e_create_flow_steering(struct mlx5e_flow_steering *fs, + struct mlx5e_rx_res *rx_res, + const struct mlx5e_profile *profile, + struct net_device *netdev) +{ + struct mlx5_flow_namespace *ns = mlx5_get_flow_namespace(fs->mdev, + MLX5_FLOW_NAMESPACE_KERNEL); + int err; + + if (!ns) + return -EOPNOTSUPP; + + mlx5e_fs_set_ns(fs, ns, false); + err = mlx5e_arfs_create_tables(fs, rx_res, + !!(netdev->hw_features & NETIF_F_NTUPLE)); + if (err) { + fs_err(fs, "Failed to create arfs tables, err=%d\n", err); + netdev->hw_features &= ~NETIF_F_NTUPLE; + } + + err = mlx5e_create_inner_ttc_table(fs, rx_res); + if (err) { + fs_err(fs, "Failed to create inner ttc table, err=%d\n", err); + goto err_destroy_arfs_tables; + } + + err = mlx5e_create_ttc_table(fs, rx_res); + if (err) { + fs_err(fs, "Failed to create ttc table, err=%d\n", err); + goto err_destroy_inner_ttc_table; + } + + err = mlx5e_create_l2_table(fs); + if (err) { + fs_err(fs, "Failed to create l2 table, err=%d\n", err); + goto err_destroy_ttc_table; + } + + err = mlx5e_fs_create_vlan_table(fs); + if (err) { + fs_err(fs, "Failed to create vlan table, err=%d\n", err); + goto err_destroy_l2_table; + } + + err = mlx5e_ptp_alloc_rx_fs(fs, profile); + if (err) + goto err_destory_vlan_table; + + mlx5e_ethtool_init_steering(fs); + + return 0; + +err_destory_vlan_table: + mlx5e_destroy_vlan_table(fs); +err_destroy_l2_table: + mlx5e_destroy_l2_table(fs); +err_destroy_ttc_table: + mlx5e_destroy_ttc_table(fs); +err_destroy_inner_ttc_table: + mlx5e_destroy_inner_ttc_table(fs); +err_destroy_arfs_tables: + mlx5e_arfs_destroy_tables(fs, !!(netdev->hw_features & NETIF_F_NTUPLE)); + + return err; +} + +void mlx5e_destroy_flow_steering(struct mlx5e_flow_steering *fs, bool ntuple, + const struct mlx5e_profile *profile) +{ + mlx5e_ptp_free_rx_fs(fs, profile); + mlx5e_destroy_vlan_table(fs); + mlx5e_destroy_l2_table(fs); + mlx5e_destroy_ttc_table(fs); + mlx5e_destroy_inner_ttc_table(fs); + mlx5e_arfs_destroy_tables(fs, ntuple); + mlx5e_ethtool_cleanup_steering(fs); +} + +static int mlx5e_fs_vlan_alloc(struct mlx5e_flow_steering *fs) +{ + fs->vlan = kvzalloc(sizeof(*fs->vlan), GFP_KERNEL); + if (!fs->vlan) + return -ENOMEM; + return 0; +} + +static void mlx5e_fs_vlan_free(struct mlx5e_flow_steering *fs) +{ + kvfree(fs->vlan); +} + +struct mlx5e_vlan_table *mlx5e_fs_get_vlan(struct mlx5e_flow_steering *fs) +{ + return fs->vlan; +} + +static int mlx5e_fs_tc_alloc(struct mlx5e_flow_steering *fs) +{ + fs->tc = mlx5e_tc_table_alloc(); + if (IS_ERR(fs->tc)) + return -ENOMEM; + return 0; +} + +static void mlx5e_fs_tc_free(struct mlx5e_flow_steering *fs) +{ + mlx5e_tc_table_free(fs->tc); +} + +struct mlx5e_tc_table *mlx5e_fs_get_tc(struct mlx5e_flow_steering *fs) +{ + return fs->tc; +} + +#ifdef CONFIG_MLX5_EN_RXNFC +static int mlx5e_fs_ethtool_alloc(struct mlx5e_flow_steering *fs) +{ + return mlx5e_ethtool_alloc(&fs->ethtool); +} + +static void mlx5e_fs_ethtool_free(struct mlx5e_flow_steering *fs) +{ + mlx5e_ethtool_free(fs->ethtool); +} + +struct mlx5e_ethtool_steering *mlx5e_fs_get_ethtool(struct mlx5e_flow_steering *fs) +{ + return fs->ethtool; +} +#else +static int mlx5e_fs_ethtool_alloc(struct mlx5e_flow_steering *fs) +{ return 0; } +static void mlx5e_fs_ethtool_free(struct mlx5e_flow_steering *fs) { } +#endif + +struct mlx5e_flow_steering *mlx5e_fs_init(const struct mlx5e_profile *profile, + struct mlx5_core_dev *mdev, + bool state_destroy) +{ + struct mlx5e_flow_steering *fs; + int err; + + fs = kvzalloc(sizeof(*fs), GFP_KERNEL); + if (!fs) + goto err; + + fs->mdev = mdev; + fs->state_destroy = state_destroy; + if (mlx5e_profile_feature_cap(profile, FS_VLAN)) { + err = mlx5e_fs_vlan_alloc(fs); + if (err) + goto err_free_fs; + } + + if (mlx5e_profile_feature_cap(profile, FS_TC)) { + err = mlx5e_fs_tc_alloc(fs); + if (err) + goto err_free_vlan; + } + + err = mlx5e_fs_ethtool_alloc(fs); + if (err) + goto err_free_tc; + + return fs; +err_free_tc: + mlx5e_fs_tc_free(fs); +err_free_vlan: + mlx5e_fs_vlan_free(fs); +err_free_fs: + kvfree(fs); +err: + return NULL; +} + +void mlx5e_fs_cleanup(struct mlx5e_flow_steering *fs) +{ + if (!fs) + return; + mlx5e_fs_ethtool_free(fs); + mlx5e_fs_tc_free(fs); + mlx5e_fs_vlan_free(fs); + kvfree(fs); +} + +struct mlx5e_l2_table *mlx5e_fs_get_l2(struct mlx5e_flow_steering *fs) +{ + return &fs->l2; +} + +struct mlx5_flow_namespace *mlx5e_fs_get_ns(struct mlx5e_flow_steering *fs, bool egress) +{ + return egress ? fs->egress_ns : fs->ns; +} + +void mlx5e_fs_set_ns(struct mlx5e_flow_steering *fs, struct mlx5_flow_namespace *ns, bool egress) +{ + if (!egress) + fs->ns = ns; + else + fs->egress_ns = ns; +} + +struct mlx5_ttc_table *mlx5e_fs_get_ttc(struct mlx5e_flow_steering *fs, bool inner) +{ + return inner ? fs->inner_ttc : fs->ttc; +} + +void mlx5e_fs_set_ttc(struct mlx5e_flow_steering *fs, struct mlx5_ttc_table *ttc, bool inner) +{ + if (!inner) + fs->ttc = ttc; + else + fs->inner_ttc = ttc; +} + +#ifdef CONFIG_MLX5_EN_ARFS +struct mlx5e_arfs_tables *mlx5e_fs_get_arfs(struct mlx5e_flow_steering *fs) +{ + return fs->arfs; +} + +void mlx5e_fs_set_arfs(struct mlx5e_flow_steering *fs, struct mlx5e_arfs_tables *arfs) +{ + fs->arfs = arfs; +} +#endif + +struct mlx5e_ptp_fs *mlx5e_fs_get_ptp(struct mlx5e_flow_steering *fs) +{ + return fs->ptp_fs; +} + +void mlx5e_fs_set_ptp(struct mlx5e_flow_steering *fs, struct mlx5e_ptp_fs *ptp_fs) +{ + fs->ptp_fs = ptp_fs; +} + +struct mlx5e_fs_any *mlx5e_fs_get_any(struct mlx5e_flow_steering *fs) +{ + return fs->any; +} + +void mlx5e_fs_set_any(struct mlx5e_flow_steering *fs, struct mlx5e_fs_any *any) +{ + fs->any = any; +} + +#ifdef CONFIG_MLX5_EN_TLS +struct mlx5e_accel_fs_tcp *mlx5e_fs_get_accel_tcp(struct mlx5e_flow_steering *fs) +{ + return fs->accel_tcp; +} + +void mlx5e_fs_set_accel_tcp(struct mlx5e_flow_steering *fs, struct mlx5e_accel_fs_tcp *accel_tcp) +{ + fs->accel_tcp = accel_tcp; +} +#endif + +void mlx5e_fs_set_state_destroy(struct mlx5e_flow_steering *fs, bool state_destroy) +{ + fs->state_destroy = state_destroy; +} + +void mlx5e_fs_set_vlan_strip_disable(struct mlx5e_flow_steering *fs, + bool vlan_strip_disable) +{ + fs->vlan_strip_disable = vlan_strip_disable; +} + +struct mlx5e_fs_udp *mlx5e_fs_get_udp(struct mlx5e_flow_steering *fs) +{ + return fs->udp; +} + +void mlx5e_fs_set_udp(struct mlx5e_flow_steering *fs, struct mlx5e_fs_udp *udp) +{ + fs->udp = udp; +} + +struct mlx5_core_dev *mlx5e_fs_get_mdev(struct mlx5e_flow_steering *fs) +{ + return fs->mdev; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c new file mode 100644 index 000000000..aac32e505 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c @@ -0,0 +1,1013 @@ +/* + * Copyright (c) 2016, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/mlx5/fs.h> +#include "en.h" +#include "en/params.h" +#include "en/xsk/pool.h" +#include "en/fs_ethtool.h" + +struct mlx5e_ethtool_table { + struct mlx5_flow_table *ft; + int num_rules; +}; + +#define ETHTOOL_NUM_L3_L4_FTS 7 +#define ETHTOOL_NUM_L2_FTS 4 + +struct mlx5e_ethtool_steering { + struct mlx5e_ethtool_table l3_l4_ft[ETHTOOL_NUM_L3_L4_FTS]; + struct mlx5e_ethtool_table l2_ft[ETHTOOL_NUM_L2_FTS]; + struct list_head rules; + int tot_num_rules; +}; + +static int flow_type_to_traffic_type(u32 flow_type); + +static u32 flow_type_mask(u32 flow_type) +{ + return flow_type & ~(FLOW_EXT | FLOW_MAC_EXT | FLOW_RSS); +} + +struct mlx5e_ethtool_rule { + struct list_head list; + struct ethtool_rx_flow_spec flow_spec; + struct mlx5_flow_handle *rule; + struct mlx5e_ethtool_table *eth_ft; + struct mlx5e_rss *rss; +}; + +static void put_flow_table(struct mlx5e_ethtool_table *eth_ft) +{ + if (!--eth_ft->num_rules) { + mlx5_destroy_flow_table(eth_ft->ft); + eth_ft->ft = NULL; + } +} + +#define MLX5E_ETHTOOL_L3_L4_PRIO 0 +#define MLX5E_ETHTOOL_L2_PRIO (MLX5E_ETHTOOL_L3_L4_PRIO + ETHTOOL_NUM_L3_L4_FTS) +#define MLX5E_ETHTOOL_NUM_ENTRIES 64000 +#define MLX5E_ETHTOOL_NUM_GROUPS 10 +static struct mlx5e_ethtool_table *get_flow_table(struct mlx5e_priv *priv, + struct ethtool_rx_flow_spec *fs, + int num_tuples) +{ + struct mlx5e_ethtool_steering *ethtool = mlx5e_fs_get_ethtool(priv->fs); + struct mlx5_flow_table_attr ft_attr = {}; + struct mlx5e_ethtool_table *eth_ft; + struct mlx5_flow_namespace *ns; + struct mlx5_flow_table *ft; + int max_tuples; + int table_size; + int prio; + + switch (flow_type_mask(fs->flow_type)) { + case TCP_V4_FLOW: + case UDP_V4_FLOW: + case TCP_V6_FLOW: + case UDP_V6_FLOW: + max_tuples = ETHTOOL_NUM_L3_L4_FTS; + prio = MLX5E_ETHTOOL_L3_L4_PRIO + (max_tuples - num_tuples); + eth_ft = ðtool->l3_l4_ft[prio]; + break; + case IP_USER_FLOW: + case IPV6_USER_FLOW: + max_tuples = ETHTOOL_NUM_L3_L4_FTS; + prio = MLX5E_ETHTOOL_L3_L4_PRIO + (max_tuples - num_tuples); + eth_ft = ðtool->l3_l4_ft[prio]; + break; + case ETHER_FLOW: + max_tuples = ETHTOOL_NUM_L2_FTS; + prio = max_tuples - num_tuples; + eth_ft = ðtool->l2_ft[prio]; + prio += MLX5E_ETHTOOL_L2_PRIO; + break; + default: + return ERR_PTR(-EINVAL); + } + + eth_ft->num_rules++; + if (eth_ft->ft) + return eth_ft; + + ns = mlx5_get_flow_namespace(priv->mdev, + MLX5_FLOW_NAMESPACE_ETHTOOL); + if (!ns) + return ERR_PTR(-EOPNOTSUPP); + + table_size = min_t(u32, BIT(MLX5_CAP_FLOWTABLE(priv->mdev, + flow_table_properties_nic_receive.log_max_ft_size)), + MLX5E_ETHTOOL_NUM_ENTRIES); + + ft_attr.prio = prio; + ft_attr.max_fte = table_size; + ft_attr.autogroup.max_num_groups = MLX5E_ETHTOOL_NUM_GROUPS; + ft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr); + if (IS_ERR(ft)) + return (void *)ft; + + eth_ft->ft = ft; + return eth_ft; +} + +static void mask_spec(u8 *mask, u8 *val, size_t size) +{ + unsigned int i; + + for (i = 0; i < size; i++, mask++, val++) + *((u8 *)val) = *((u8 *)mask) & *((u8 *)val); +} + +#define MLX5E_FTE_SET(header_p, fld, v) \ + MLX5_SET(fte_match_set_lyr_2_4, header_p, fld, v) + +#define MLX5E_FTE_ADDR_OF(header_p, fld) \ + MLX5_ADDR_OF(fte_match_set_lyr_2_4, header_p, fld) + +static void +set_ip4(void *headers_c, void *headers_v, __be32 ip4src_m, + __be32 ip4src_v, __be32 ip4dst_m, __be32 ip4dst_v) +{ + if (ip4src_m) { + memcpy(MLX5E_FTE_ADDR_OF(headers_v, src_ipv4_src_ipv6.ipv4_layout.ipv4), + &ip4src_v, sizeof(ip4src_v)); + memcpy(MLX5E_FTE_ADDR_OF(headers_c, src_ipv4_src_ipv6.ipv4_layout.ipv4), + &ip4src_m, sizeof(ip4src_m)); + } + if (ip4dst_m) { + memcpy(MLX5E_FTE_ADDR_OF(headers_v, dst_ipv4_dst_ipv6.ipv4_layout.ipv4), + &ip4dst_v, sizeof(ip4dst_v)); + memcpy(MLX5E_FTE_ADDR_OF(headers_c, dst_ipv4_dst_ipv6.ipv4_layout.ipv4), + &ip4dst_m, sizeof(ip4dst_m)); + } + + MLX5E_FTE_SET(headers_c, ethertype, 0xffff); + MLX5E_FTE_SET(headers_v, ethertype, ETH_P_IP); +} + +static void +set_ip6(void *headers_c, void *headers_v, __be32 ip6src_m[4], + __be32 ip6src_v[4], __be32 ip6dst_m[4], __be32 ip6dst_v[4]) +{ + u8 ip6_sz = MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6); + + if (!ipv6_addr_any((struct in6_addr *)ip6src_m)) { + memcpy(MLX5E_FTE_ADDR_OF(headers_v, src_ipv4_src_ipv6.ipv6_layout.ipv6), + ip6src_v, ip6_sz); + memcpy(MLX5E_FTE_ADDR_OF(headers_c, src_ipv4_src_ipv6.ipv6_layout.ipv6), + ip6src_m, ip6_sz); + } + if (!ipv6_addr_any((struct in6_addr *)ip6dst_m)) { + memcpy(MLX5E_FTE_ADDR_OF(headers_v, dst_ipv4_dst_ipv6.ipv6_layout.ipv6), + ip6dst_v, ip6_sz); + memcpy(MLX5E_FTE_ADDR_OF(headers_c, dst_ipv4_dst_ipv6.ipv6_layout.ipv6), + ip6dst_m, ip6_sz); + } + + MLX5E_FTE_SET(headers_c, ethertype, 0xffff); + MLX5E_FTE_SET(headers_v, ethertype, ETH_P_IPV6); +} + +static void +set_tcp(void *headers_c, void *headers_v, __be16 psrc_m, __be16 psrc_v, + __be16 pdst_m, __be16 pdst_v) +{ + if (psrc_m) { + MLX5E_FTE_SET(headers_c, tcp_sport, ntohs(psrc_m)); + MLX5E_FTE_SET(headers_v, tcp_sport, ntohs(psrc_v)); + } + if (pdst_m) { + MLX5E_FTE_SET(headers_c, tcp_dport, ntohs(pdst_m)); + MLX5E_FTE_SET(headers_v, tcp_dport, ntohs(pdst_v)); + } + + MLX5E_FTE_SET(headers_c, ip_protocol, 0xffff); + MLX5E_FTE_SET(headers_v, ip_protocol, IPPROTO_TCP); +} + +static void +set_udp(void *headers_c, void *headers_v, __be16 psrc_m, __be16 psrc_v, + __be16 pdst_m, __be16 pdst_v) +{ + if (psrc_m) { + MLX5E_FTE_SET(headers_c, udp_sport, ntohs(psrc_m)); + MLX5E_FTE_SET(headers_v, udp_sport, ntohs(psrc_v)); + } + + if (pdst_m) { + MLX5E_FTE_SET(headers_c, udp_dport, ntohs(pdst_m)); + MLX5E_FTE_SET(headers_v, udp_dport, ntohs(pdst_v)); + } + + MLX5E_FTE_SET(headers_c, ip_protocol, 0xffff); + MLX5E_FTE_SET(headers_v, ip_protocol, IPPROTO_UDP); +} + +static void +parse_tcp4(void *headers_c, void *headers_v, struct ethtool_rx_flow_spec *fs) +{ + struct ethtool_tcpip4_spec *l4_mask = &fs->m_u.tcp_ip4_spec; + struct ethtool_tcpip4_spec *l4_val = &fs->h_u.tcp_ip4_spec; + + set_ip4(headers_c, headers_v, l4_mask->ip4src, l4_val->ip4src, + l4_mask->ip4dst, l4_val->ip4dst); + + set_tcp(headers_c, headers_v, l4_mask->psrc, l4_val->psrc, + l4_mask->pdst, l4_val->pdst); +} + +static void +parse_udp4(void *headers_c, void *headers_v, struct ethtool_rx_flow_spec *fs) +{ + struct ethtool_tcpip4_spec *l4_mask = &fs->m_u.udp_ip4_spec; + struct ethtool_tcpip4_spec *l4_val = &fs->h_u.udp_ip4_spec; + + set_ip4(headers_c, headers_v, l4_mask->ip4src, l4_val->ip4src, + l4_mask->ip4dst, l4_val->ip4dst); + + set_udp(headers_c, headers_v, l4_mask->psrc, l4_val->psrc, + l4_mask->pdst, l4_val->pdst); +} + +static void +parse_ip4(void *headers_c, void *headers_v, struct ethtool_rx_flow_spec *fs) +{ + struct ethtool_usrip4_spec *l3_mask = &fs->m_u.usr_ip4_spec; + struct ethtool_usrip4_spec *l3_val = &fs->h_u.usr_ip4_spec; + + set_ip4(headers_c, headers_v, l3_mask->ip4src, l3_val->ip4src, + l3_mask->ip4dst, l3_val->ip4dst); + + if (l3_mask->proto) { + MLX5E_FTE_SET(headers_c, ip_protocol, l3_mask->proto); + MLX5E_FTE_SET(headers_v, ip_protocol, l3_val->proto); + } +} + +static void +parse_ip6(void *headers_c, void *headers_v, struct ethtool_rx_flow_spec *fs) +{ + struct ethtool_usrip6_spec *l3_mask = &fs->m_u.usr_ip6_spec; + struct ethtool_usrip6_spec *l3_val = &fs->h_u.usr_ip6_spec; + + set_ip6(headers_c, headers_v, l3_mask->ip6src, + l3_val->ip6src, l3_mask->ip6dst, l3_val->ip6dst); + + if (l3_mask->l4_proto) { + MLX5E_FTE_SET(headers_c, ip_protocol, l3_mask->l4_proto); + MLX5E_FTE_SET(headers_v, ip_protocol, l3_val->l4_proto); + } +} + +static void +parse_tcp6(void *headers_c, void *headers_v, struct ethtool_rx_flow_spec *fs) +{ + struct ethtool_tcpip6_spec *l4_mask = &fs->m_u.tcp_ip6_spec; + struct ethtool_tcpip6_spec *l4_val = &fs->h_u.tcp_ip6_spec; + + set_ip6(headers_c, headers_v, l4_mask->ip6src, + l4_val->ip6src, l4_mask->ip6dst, l4_val->ip6dst); + + set_tcp(headers_c, headers_v, l4_mask->psrc, l4_val->psrc, + l4_mask->pdst, l4_val->pdst); +} + +static void +parse_udp6(void *headers_c, void *headers_v, struct ethtool_rx_flow_spec *fs) +{ + struct ethtool_tcpip6_spec *l4_mask = &fs->m_u.udp_ip6_spec; + struct ethtool_tcpip6_spec *l4_val = &fs->h_u.udp_ip6_spec; + + set_ip6(headers_c, headers_v, l4_mask->ip6src, + l4_val->ip6src, l4_mask->ip6dst, l4_val->ip6dst); + + set_udp(headers_c, headers_v, l4_mask->psrc, l4_val->psrc, + l4_mask->pdst, l4_val->pdst); +} + +static void +parse_ether(void *headers_c, void *headers_v, struct ethtool_rx_flow_spec *fs) +{ + struct ethhdr *eth_mask = &fs->m_u.ether_spec; + struct ethhdr *eth_val = &fs->h_u.ether_spec; + + mask_spec((u8 *)eth_mask, (u8 *)eth_val, sizeof(*eth_mask)); + ether_addr_copy(MLX5E_FTE_ADDR_OF(headers_c, smac_47_16), eth_mask->h_source); + ether_addr_copy(MLX5E_FTE_ADDR_OF(headers_v, smac_47_16), eth_val->h_source); + ether_addr_copy(MLX5E_FTE_ADDR_OF(headers_c, dmac_47_16), eth_mask->h_dest); + ether_addr_copy(MLX5E_FTE_ADDR_OF(headers_v, dmac_47_16), eth_val->h_dest); + MLX5E_FTE_SET(headers_c, ethertype, ntohs(eth_mask->h_proto)); + MLX5E_FTE_SET(headers_v, ethertype, ntohs(eth_val->h_proto)); +} + +static void +set_cvlan(void *headers_c, void *headers_v, __be16 vlan_tci) +{ + MLX5E_FTE_SET(headers_c, cvlan_tag, 1); + MLX5E_FTE_SET(headers_v, cvlan_tag, 1); + MLX5E_FTE_SET(headers_c, first_vid, 0xfff); + MLX5E_FTE_SET(headers_v, first_vid, ntohs(vlan_tci)); +} + +static void +set_dmac(void *headers_c, void *headers_v, + unsigned char m_dest[ETH_ALEN], unsigned char v_dest[ETH_ALEN]) +{ + ether_addr_copy(MLX5E_FTE_ADDR_OF(headers_c, dmac_47_16), m_dest); + ether_addr_copy(MLX5E_FTE_ADDR_OF(headers_v, dmac_47_16), v_dest); +} + +static int set_flow_attrs(u32 *match_c, u32 *match_v, + struct ethtool_rx_flow_spec *fs) +{ + void *outer_headers_c = MLX5_ADDR_OF(fte_match_param, match_c, + outer_headers); + void *outer_headers_v = MLX5_ADDR_OF(fte_match_param, match_v, + outer_headers); + u32 flow_type = flow_type_mask(fs->flow_type); + + switch (flow_type) { + case TCP_V4_FLOW: + parse_tcp4(outer_headers_c, outer_headers_v, fs); + break; + case UDP_V4_FLOW: + parse_udp4(outer_headers_c, outer_headers_v, fs); + break; + case IP_USER_FLOW: + parse_ip4(outer_headers_c, outer_headers_v, fs); + break; + case TCP_V6_FLOW: + parse_tcp6(outer_headers_c, outer_headers_v, fs); + break; + case UDP_V6_FLOW: + parse_udp6(outer_headers_c, outer_headers_v, fs); + break; + case IPV6_USER_FLOW: + parse_ip6(outer_headers_c, outer_headers_v, fs); + break; + case ETHER_FLOW: + parse_ether(outer_headers_c, outer_headers_v, fs); + break; + default: + return -EINVAL; + } + + if ((fs->flow_type & FLOW_EXT) && + (fs->m_ext.vlan_tci & cpu_to_be16(VLAN_VID_MASK))) + set_cvlan(outer_headers_c, outer_headers_v, fs->h_ext.vlan_tci); + + if (fs->flow_type & FLOW_MAC_EXT && + !is_zero_ether_addr(fs->m_ext.h_dest)) { + mask_spec(fs->m_ext.h_dest, fs->h_ext.h_dest, ETH_ALEN); + set_dmac(outer_headers_c, outer_headers_v, fs->m_ext.h_dest, + fs->h_ext.h_dest); + } + + return 0; +} + +static void add_rule_to_list(struct mlx5e_priv *priv, + struct mlx5e_ethtool_rule *rule) +{ + struct mlx5e_ethtool_steering *ethtool = mlx5e_fs_get_ethtool(priv->fs); + struct list_head *head = ðtool->rules; + struct mlx5e_ethtool_rule *iter; + + list_for_each_entry(iter, ðtool->rules, list) { + if (iter->flow_spec.location > rule->flow_spec.location) + break; + head = &iter->list; + } + ethtool->tot_num_rules++; + list_add(&rule->list, head); +} + +static bool outer_header_zero(u32 *match_criteria) +{ + int size = MLX5_FLD_SZ_BYTES(fte_match_param, outer_headers); + char *outer_headers_c = MLX5_ADDR_OF(fte_match_param, match_criteria, + outer_headers); + + return outer_headers_c[0] == 0 && !memcmp(outer_headers_c, + outer_headers_c + 1, + size - 1); +} + +static int flow_get_tirn(struct mlx5e_priv *priv, + struct mlx5e_ethtool_rule *eth_rule, + struct ethtool_rx_flow_spec *fs, + u32 rss_context, u32 *tirn) +{ + if (fs->flow_type & FLOW_RSS) { + struct mlx5e_packet_merge_param pkt_merge_param; + struct mlx5e_rss *rss; + u32 flow_type; + int err; + int tt; + + rss = mlx5e_rx_res_rss_get(priv->rx_res, rss_context); + if (!rss) + return -ENOENT; + + flow_type = flow_type_mask(fs->flow_type); + tt = flow_type_to_traffic_type(flow_type); + if (tt < 0) + return -EINVAL; + + pkt_merge_param = priv->channels.params.packet_merge; + err = mlx5e_rss_obtain_tirn(rss, tt, &pkt_merge_param, false, tirn); + if (err) + return err; + eth_rule->rss = rss; + mlx5e_rss_refcnt_inc(eth_rule->rss); + } else { + *tirn = mlx5e_rx_res_get_tirn_direct(priv->rx_res, fs->ring_cookie); + } + + return 0; +} + +static struct mlx5_flow_handle * +add_ethtool_flow_rule(struct mlx5e_priv *priv, + struct mlx5e_ethtool_rule *eth_rule, + struct mlx5_flow_table *ft, + struct ethtool_rx_flow_spec *fs, u32 rss_context) +{ + struct mlx5_flow_act flow_act = { .flags = FLOW_ACT_NO_APPEND }; + struct mlx5_flow_destination *dst = NULL; + struct mlx5_flow_handle *rule; + struct mlx5_flow_spec *spec; + int err = 0; + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) + return ERR_PTR(-ENOMEM); + err = set_flow_attrs(spec->match_criteria, spec->match_value, + fs); + if (err) + goto free; + + if (fs->ring_cookie == RX_CLS_FLOW_DISC) { + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP; + } else { + dst = kzalloc(sizeof(*dst), GFP_KERNEL); + if (!dst) { + err = -ENOMEM; + goto free; + } + + err = flow_get_tirn(priv, eth_rule, fs, rss_context, &dst->tir_num); + if (err) + goto free; + + dst->type = MLX5_FLOW_DESTINATION_TYPE_TIR; + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + } + + spec->match_criteria_enable = (!outer_header_zero(spec->match_criteria)); + spec->flow_context.flow_tag = MLX5_FS_DEFAULT_FLOW_TAG; + rule = mlx5_add_flow_rules(ft, spec, &flow_act, dst, dst ? 1 : 0); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + netdev_err(priv->netdev, "%s: failed to add ethtool steering rule: %d\n", + __func__, err); + goto free; + } +free: + kvfree(spec); + kfree(dst); + return err ? ERR_PTR(err) : rule; +} + +static void del_ethtool_rule(struct mlx5e_flow_steering *fs, + struct mlx5e_ethtool_rule *eth_rule) +{ + struct mlx5e_ethtool_steering *ethtool = mlx5e_fs_get_ethtool(fs); + if (eth_rule->rule) + mlx5_del_flow_rules(eth_rule->rule); + if (eth_rule->rss) + mlx5e_rss_refcnt_dec(eth_rule->rss); + list_del(ð_rule->list); + ethtool->tot_num_rules--; + put_flow_table(eth_rule->eth_ft); + kfree(eth_rule); +} + +static struct mlx5e_ethtool_rule *find_ethtool_rule(struct mlx5e_priv *priv, + int location) +{ + struct mlx5e_ethtool_steering *ethtool = mlx5e_fs_get_ethtool(priv->fs); + struct mlx5e_ethtool_rule *iter; + + list_for_each_entry(iter, ðtool->rules, list) { + if (iter->flow_spec.location == location) + return iter; + } + return NULL; +} + +static struct mlx5e_ethtool_rule *get_ethtool_rule(struct mlx5e_priv *priv, + int location) +{ + struct mlx5e_ethtool_rule *eth_rule; + + eth_rule = find_ethtool_rule(priv, location); + if (eth_rule) + del_ethtool_rule(priv->fs, eth_rule); + + eth_rule = kzalloc(sizeof(*eth_rule), GFP_KERNEL); + if (!eth_rule) + return ERR_PTR(-ENOMEM); + + add_rule_to_list(priv, eth_rule); + return eth_rule; +} + +#define MAX_NUM_OF_ETHTOOL_RULES BIT(10) + +#define all_ones(field) (field == (__force typeof(field))-1) +#define all_zeros_or_all_ones(field) \ + ((field) == 0 || (field) == (__force typeof(field))-1) + +static int validate_ethter(struct ethtool_rx_flow_spec *fs) +{ + struct ethhdr *eth_mask = &fs->m_u.ether_spec; + int ntuples = 0; + + if (!is_zero_ether_addr(eth_mask->h_dest)) + ntuples++; + if (!is_zero_ether_addr(eth_mask->h_source)) + ntuples++; + if (eth_mask->h_proto) + ntuples++; + return ntuples; +} + +static int validate_tcpudp4(struct ethtool_rx_flow_spec *fs) +{ + struct ethtool_tcpip4_spec *l4_mask = &fs->m_u.tcp_ip4_spec; + int ntuples = 0; + + if (l4_mask->tos) + return -EINVAL; + + if (l4_mask->ip4src) + ntuples++; + if (l4_mask->ip4dst) + ntuples++; + if (l4_mask->psrc) + ntuples++; + if (l4_mask->pdst) + ntuples++; + /* Flow is TCP/UDP */ + return ++ntuples; +} + +static int validate_ip4(struct ethtool_rx_flow_spec *fs) +{ + struct ethtool_usrip4_spec *l3_mask = &fs->m_u.usr_ip4_spec; + int ntuples = 0; + + if (l3_mask->l4_4_bytes || l3_mask->tos || + fs->h_u.usr_ip4_spec.ip_ver != ETH_RX_NFC_IP4) + return -EINVAL; + if (l3_mask->ip4src) + ntuples++; + if (l3_mask->ip4dst) + ntuples++; + if (l3_mask->proto) + ntuples++; + /* Flow is IPv4 */ + return ++ntuples; +} + +static int validate_ip6(struct ethtool_rx_flow_spec *fs) +{ + struct ethtool_usrip6_spec *l3_mask = &fs->m_u.usr_ip6_spec; + int ntuples = 0; + + if (l3_mask->l4_4_bytes || l3_mask->tclass) + return -EINVAL; + if (!ipv6_addr_any((struct in6_addr *)l3_mask->ip6src)) + ntuples++; + + if (!ipv6_addr_any((struct in6_addr *)l3_mask->ip6dst)) + ntuples++; + if (l3_mask->l4_proto) + ntuples++; + /* Flow is IPv6 */ + return ++ntuples; +} + +static int validate_tcpudp6(struct ethtool_rx_flow_spec *fs) +{ + struct ethtool_tcpip6_spec *l4_mask = &fs->m_u.tcp_ip6_spec; + int ntuples = 0; + + if (l4_mask->tclass) + return -EINVAL; + + if (!ipv6_addr_any((struct in6_addr *)l4_mask->ip6src)) + ntuples++; + + if (!ipv6_addr_any((struct in6_addr *)l4_mask->ip6dst)) + ntuples++; + + if (l4_mask->psrc) + ntuples++; + if (l4_mask->pdst) + ntuples++; + /* Flow is TCP/UDP */ + return ++ntuples; +} + +static int validate_vlan(struct ethtool_rx_flow_spec *fs) +{ + if (fs->m_ext.vlan_etype || + fs->m_ext.vlan_tci != cpu_to_be16(VLAN_VID_MASK)) + return -EINVAL; + + if (fs->m_ext.vlan_tci && + (be16_to_cpu(fs->h_ext.vlan_tci) >= VLAN_N_VID)) + return -EINVAL; + + return 1; +} + +static int validate_flow(struct mlx5e_priv *priv, + struct ethtool_rx_flow_spec *fs) +{ + int num_tuples = 0; + int ret = 0; + + if (fs->location >= MAX_NUM_OF_ETHTOOL_RULES) + return -ENOSPC; + + if (fs->ring_cookie != RX_CLS_FLOW_DISC) + if (fs->ring_cookie >= priv->channels.params.num_channels) + return -EINVAL; + + switch (flow_type_mask(fs->flow_type)) { + case ETHER_FLOW: + num_tuples += validate_ethter(fs); + break; + case TCP_V4_FLOW: + case UDP_V4_FLOW: + ret = validate_tcpudp4(fs); + if (ret < 0) + return ret; + num_tuples += ret; + break; + case IP_USER_FLOW: + ret = validate_ip4(fs); + if (ret < 0) + return ret; + num_tuples += ret; + break; + case TCP_V6_FLOW: + case UDP_V6_FLOW: + ret = validate_tcpudp6(fs); + if (ret < 0) + return ret; + num_tuples += ret; + break; + case IPV6_USER_FLOW: + ret = validate_ip6(fs); + if (ret < 0) + return ret; + num_tuples += ret; + break; + default: + return -ENOTSUPP; + } + if ((fs->flow_type & FLOW_EXT)) { + ret = validate_vlan(fs); + if (ret < 0) + return ret; + num_tuples += ret; + } + + if (fs->flow_type & FLOW_MAC_EXT && + !is_zero_ether_addr(fs->m_ext.h_dest)) + num_tuples++; + + return num_tuples; +} + +static int +mlx5e_ethtool_flow_replace(struct mlx5e_priv *priv, + struct ethtool_rx_flow_spec *fs, u32 rss_context) +{ + struct mlx5e_ethtool_table *eth_ft; + struct mlx5e_ethtool_rule *eth_rule; + struct mlx5_flow_handle *rule; + int num_tuples; + int err; + + num_tuples = validate_flow(priv, fs); + if (num_tuples <= 0) { + netdev_warn(priv->netdev, "%s: flow is not valid %d\n", + __func__, num_tuples); + return num_tuples; + } + + eth_ft = get_flow_table(priv, fs, num_tuples); + if (IS_ERR(eth_ft)) + return PTR_ERR(eth_ft); + + eth_rule = get_ethtool_rule(priv, fs->location); + if (IS_ERR(eth_rule)) { + put_flow_table(eth_ft); + return PTR_ERR(eth_rule); + } + + eth_rule->flow_spec = *fs; + eth_rule->eth_ft = eth_ft; + + rule = add_ethtool_flow_rule(priv, eth_rule, eth_ft->ft, fs, rss_context); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + goto del_ethtool_rule; + } + + eth_rule->rule = rule; + + return 0; + +del_ethtool_rule: + del_ethtool_rule(priv->fs, eth_rule); + + return err; +} + +static int +mlx5e_ethtool_flow_remove(struct mlx5e_priv *priv, int location) +{ + struct mlx5e_ethtool_rule *eth_rule; + int err = 0; + + if (location >= MAX_NUM_OF_ETHTOOL_RULES) + return -ENOSPC; + + eth_rule = find_ethtool_rule(priv, location); + if (!eth_rule) { + err = -ENOENT; + goto out; + } + + del_ethtool_rule(priv->fs, eth_rule); +out: + return err; +} + +static int +mlx5e_ethtool_get_flow(struct mlx5e_priv *priv, + struct ethtool_rxnfc *info, int location) +{ + struct mlx5e_ethtool_steering *ethtool = mlx5e_fs_get_ethtool(priv->fs); + struct mlx5e_ethtool_rule *eth_rule; + + if (location < 0 || location >= MAX_NUM_OF_ETHTOOL_RULES) + return -EINVAL; + + list_for_each_entry(eth_rule, ðtool->rules, list) { + int index; + + if (eth_rule->flow_spec.location != location) + continue; + if (!info) + return 0; + info->fs = eth_rule->flow_spec; + if (!eth_rule->rss) + return 0; + index = mlx5e_rx_res_rss_index(priv->rx_res, eth_rule->rss); + if (index < 0) + return index; + info->rss_context = index; + return 0; + } + + return -ENOENT; +} + +static int +mlx5e_ethtool_get_all_flows(struct mlx5e_priv *priv, + struct ethtool_rxnfc *info, u32 *rule_locs) +{ + int location = 0; + int idx = 0; + int err = 0; + + info->data = MAX_NUM_OF_ETHTOOL_RULES; + while ((!err || err == -ENOENT) && idx < info->rule_cnt) { + err = mlx5e_ethtool_get_flow(priv, NULL, location); + if (!err) + rule_locs[idx++] = location; + location++; + } + return err; +} + +int mlx5e_ethtool_alloc(struct mlx5e_ethtool_steering **ethtool) +{ + *ethtool = kvzalloc(sizeof(**ethtool), GFP_KERNEL); + if (!*ethtool) + return -ENOMEM; + return 0; +} + +void mlx5e_ethtool_free(struct mlx5e_ethtool_steering *ethtool) +{ + kvfree(ethtool); +} + +void mlx5e_ethtool_cleanup_steering(struct mlx5e_flow_steering *fs) +{ + struct mlx5e_ethtool_steering *ethtool = mlx5e_fs_get_ethtool(fs); + struct mlx5e_ethtool_rule *iter; + struct mlx5e_ethtool_rule *temp; + + list_for_each_entry_safe(iter, temp, ðtool->rules, list) + del_ethtool_rule(fs, iter); +} + +void mlx5e_ethtool_init_steering(struct mlx5e_flow_steering *fs) +{ + struct mlx5e_ethtool_steering *ethtool = mlx5e_fs_get_ethtool(fs); + + INIT_LIST_HEAD(ðtool->rules); +} + +static int flow_type_to_traffic_type(u32 flow_type) +{ + switch (flow_type) { + case TCP_V4_FLOW: + return MLX5_TT_IPV4_TCP; + case TCP_V6_FLOW: + return MLX5_TT_IPV6_TCP; + case UDP_V4_FLOW: + return MLX5_TT_IPV4_UDP; + case UDP_V6_FLOW: + return MLX5_TT_IPV6_UDP; + case AH_V4_FLOW: + return MLX5_TT_IPV4_IPSEC_AH; + case AH_V6_FLOW: + return MLX5_TT_IPV6_IPSEC_AH; + case ESP_V4_FLOW: + return MLX5_TT_IPV4_IPSEC_ESP; + case ESP_V6_FLOW: + return MLX5_TT_IPV6_IPSEC_ESP; + case IPV4_FLOW: + return MLX5_TT_IPV4; + case IPV6_FLOW: + return MLX5_TT_IPV6; + default: + return -EINVAL; + } +} + +static int mlx5e_set_rss_hash_opt(struct mlx5e_priv *priv, + struct ethtool_rxnfc *nfc) +{ + u8 rx_hash_field = 0; + int err; + int tt; + + tt = flow_type_to_traffic_type(nfc->flow_type); + if (tt < 0) + return tt; + + /* RSS does not support anything other than hashing to queues + * on src IP, dest IP, TCP/UDP src port and TCP/UDP dest + * port. + */ + if (nfc->flow_type != TCP_V4_FLOW && + nfc->flow_type != TCP_V6_FLOW && + nfc->flow_type != UDP_V4_FLOW && + nfc->flow_type != UDP_V6_FLOW) + return -EOPNOTSUPP; + + if (nfc->data & ~(RXH_IP_SRC | RXH_IP_DST | + RXH_L4_B_0_1 | RXH_L4_B_2_3)) + return -EOPNOTSUPP; + + if (nfc->data & RXH_IP_SRC) + rx_hash_field |= MLX5_HASH_FIELD_SEL_SRC_IP; + if (nfc->data & RXH_IP_DST) + rx_hash_field |= MLX5_HASH_FIELD_SEL_DST_IP; + if (nfc->data & RXH_L4_B_0_1) + rx_hash_field |= MLX5_HASH_FIELD_SEL_L4_SPORT; + if (nfc->data & RXH_L4_B_2_3) + rx_hash_field |= MLX5_HASH_FIELD_SEL_L4_DPORT; + + mutex_lock(&priv->state_lock); + err = mlx5e_rx_res_rss_set_hash_fields(priv->rx_res, tt, rx_hash_field); + mutex_unlock(&priv->state_lock); + + return err; +} + +static int mlx5e_get_rss_hash_opt(struct mlx5e_priv *priv, + struct ethtool_rxnfc *nfc) +{ + u32 hash_field = 0; + int tt; + + tt = flow_type_to_traffic_type(nfc->flow_type); + if (tt < 0) + return tt; + + hash_field = mlx5e_rx_res_rss_get_hash_fields(priv->rx_res, tt); + nfc->data = 0; + + if (hash_field & MLX5_HASH_FIELD_SEL_SRC_IP) + nfc->data |= RXH_IP_SRC; + if (hash_field & MLX5_HASH_FIELD_SEL_DST_IP) + nfc->data |= RXH_IP_DST; + if (hash_field & MLX5_HASH_FIELD_SEL_L4_SPORT) + nfc->data |= RXH_L4_B_0_1; + if (hash_field & MLX5_HASH_FIELD_SEL_L4_DPORT) + nfc->data |= RXH_L4_B_2_3; + + return 0; +} + +int mlx5e_ethtool_set_rxnfc(struct mlx5e_priv *priv, struct ethtool_rxnfc *cmd) +{ + int err = 0; + + switch (cmd->cmd) { + case ETHTOOL_SRXCLSRLINS: + err = mlx5e_ethtool_flow_replace(priv, &cmd->fs, cmd->rss_context); + break; + case ETHTOOL_SRXCLSRLDEL: + err = mlx5e_ethtool_flow_remove(priv, cmd->fs.location); + break; + case ETHTOOL_SRXFH: + err = mlx5e_set_rss_hash_opt(priv, cmd); + break; + default: + err = -EOPNOTSUPP; + break; + } + + return err; +} + +int mlx5e_ethtool_get_rxnfc(struct mlx5e_priv *priv, + struct ethtool_rxnfc *info, u32 *rule_locs) +{ + struct mlx5e_ethtool_steering *ethtool = mlx5e_fs_get_ethtool(priv->fs); + int err = 0; + + switch (info->cmd) { + case ETHTOOL_GRXCLSRLCNT: + info->rule_cnt = ethtool->tot_num_rules; + break; + case ETHTOOL_GRXCLSRULE: + err = mlx5e_ethtool_get_flow(priv, info, info->fs.location); + break; + case ETHTOOL_GRXCLSRLALL: + err = mlx5e_ethtool_get_all_flows(priv, info, rule_locs); + break; + case ETHTOOL_GRXFH: + err = mlx5e_get_rss_hash_opt(priv, info); + break; + default: + err = -EOPNOTSUPP; + break; + } + + return err; +} + diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c new file mode 100644 index 000000000..9910a0480 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -0,0 +1,6021 @@ +/* + * Copyright (c) 2015-2016, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <net/tc_act/tc_gact.h> +#include <linux/mlx5/fs.h> +#include <net/vxlan.h> +#include <net/geneve.h> +#include <linux/bpf.h> +#include <linux/if_bridge.h> +#include <linux/filter.h> +#include <net/page_pool.h> +#include <net/xdp_sock_drv.h> +#include "eswitch.h" +#include "en.h" +#include "en/txrx.h" +#include "en_tc.h" +#include "en_rep.h" +#include "en_accel/ipsec.h" +#include "en_accel/macsec.h" +#include "en_accel/en_accel.h" +#include "en_accel/ktls.h" +#include "lib/vxlan.h" +#include "lib/clock.h" +#include "en/port.h" +#include "en/xdp.h" +#include "lib/eq.h" +#include "en/monitor_stats.h" +#include "en/health.h" +#include "en/params.h" +#include "en/xsk/pool.h" +#include "en/xsk/setup.h" +#include "en/xsk/rx.h" +#include "en/xsk/tx.h" +#include "en/hv_vhca_stats.h" +#include "en/devlink.h" +#include "lib/mlx5.h" +#include "en/ptp.h" +#include "en/htb.h" +#include "qos.h" +#include "en/trap.h" + +bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev, u8 page_shift, + enum mlx5e_mpwrq_umr_mode umr_mode) +{ + u16 umr_wqebbs, max_wqebbs; + bool striding_rq_umr; + + striding_rq_umr = MLX5_CAP_GEN(mdev, striding_rq) && MLX5_CAP_GEN(mdev, umr_ptr_rlky) && + MLX5_CAP_ETH(mdev, reg_umr_sq); + if (!striding_rq_umr) + return false; + + umr_wqebbs = mlx5e_mpwrq_umr_wqebbs(mdev, page_shift, umr_mode); + max_wqebbs = mlx5e_get_max_sq_aligned_wqebbs(mdev); + /* Sanity check; should never happen, because mlx5e_mpwrq_umr_wqebbs is + * calculated from mlx5e_get_max_sq_aligned_wqebbs. + */ + if (WARN_ON(umr_wqebbs > max_wqebbs)) + return false; + + return true; +} + +void mlx5e_update_carrier(struct mlx5e_priv *priv) +{ + struct mlx5_core_dev *mdev = priv->mdev; + u8 port_state; + bool up; + + port_state = mlx5_query_vport_state(mdev, + MLX5_VPORT_STATE_OP_MOD_VNIC_VPORT, + 0); + + up = port_state == VPORT_STATE_UP; + if (up == netif_carrier_ok(priv->netdev)) + netif_carrier_event(priv->netdev); + if (up) { + netdev_info(priv->netdev, "Link up\n"); + netif_carrier_on(priv->netdev); + } else { + netdev_info(priv->netdev, "Link down\n"); + netif_carrier_off(priv->netdev); + } +} + +static void mlx5e_update_carrier_work(struct work_struct *work) +{ + struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv, + update_carrier_work); + + mutex_lock(&priv->state_lock); + if (test_bit(MLX5E_STATE_OPENED, &priv->state)) + if (priv->profile->update_carrier) + priv->profile->update_carrier(priv); + mutex_unlock(&priv->state_lock); +} + +static void mlx5e_update_stats_work(struct work_struct *work) +{ + struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv, + update_stats_work); + + mutex_lock(&priv->state_lock); + priv->profile->update_stats(priv); + mutex_unlock(&priv->state_lock); +} + +void mlx5e_queue_update_stats(struct mlx5e_priv *priv) +{ + if (!priv->profile->update_stats) + return; + + if (unlikely(test_bit(MLX5E_STATE_DESTROYING, &priv->state))) + return; + + queue_work(priv->wq, &priv->update_stats_work); +} + +static int async_event(struct notifier_block *nb, unsigned long event, void *data) +{ + struct mlx5e_priv *priv = container_of(nb, struct mlx5e_priv, events_nb); + struct mlx5_eqe *eqe = data; + + if (event != MLX5_EVENT_TYPE_PORT_CHANGE) + return NOTIFY_DONE; + + switch (eqe->sub_type) { + case MLX5_PORT_CHANGE_SUBTYPE_DOWN: + case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE: + queue_work(priv->wq, &priv->update_carrier_work); + break; + default: + return NOTIFY_DONE; + } + + return NOTIFY_OK; +} + +static void mlx5e_enable_async_events(struct mlx5e_priv *priv) +{ + priv->events_nb.notifier_call = async_event; + mlx5_notifier_register(priv->mdev, &priv->events_nb); +} + +static void mlx5e_disable_async_events(struct mlx5e_priv *priv) +{ + mlx5_notifier_unregister(priv->mdev, &priv->events_nb); +} + +static int blocking_event(struct notifier_block *nb, unsigned long event, void *data) +{ + struct mlx5e_priv *priv = container_of(nb, struct mlx5e_priv, blocking_events_nb); + int err; + + switch (event) { + case MLX5_DRIVER_EVENT_TYPE_TRAP: + err = mlx5e_handle_trap_event(priv, data); + break; + default: + netdev_warn(priv->netdev, "Sync event: Unknown event %ld\n", event); + err = -EINVAL; + } + return err; +} + +static void mlx5e_enable_blocking_events(struct mlx5e_priv *priv) +{ + priv->blocking_events_nb.notifier_call = blocking_event; + mlx5_blocking_notifier_register(priv->mdev, &priv->blocking_events_nb); +} + +static void mlx5e_disable_blocking_events(struct mlx5e_priv *priv) +{ + mlx5_blocking_notifier_unregister(priv->mdev, &priv->blocking_events_nb); +} + +static u16 mlx5e_mpwrq_umr_octowords(u32 entries, enum mlx5e_mpwrq_umr_mode umr_mode) +{ + u8 umr_entry_size = mlx5e_mpwrq_umr_entry_size(umr_mode); + u32 sz; + + sz = ALIGN(entries * umr_entry_size, MLX5_UMR_MTT_ALIGNMENT); + + return sz / MLX5_OCTWORD; +} + +static inline void mlx5e_build_umr_wqe(struct mlx5e_rq *rq, + struct mlx5e_icosq *sq, + struct mlx5e_umr_wqe *wqe) +{ + struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl; + struct mlx5_wqe_umr_ctrl_seg *ucseg = &wqe->uctrl; + u16 octowords; + u8 ds_cnt; + + ds_cnt = DIV_ROUND_UP(mlx5e_mpwrq_umr_wqe_sz(rq->mdev, rq->mpwqe.page_shift, + rq->mpwqe.umr_mode), + MLX5_SEND_WQE_DS); + + cseg->qpn_ds = cpu_to_be32((sq->sqn << MLX5_WQE_CTRL_QPN_SHIFT) | + ds_cnt); + cseg->umr_mkey = rq->mpwqe.umr_mkey_be; + + ucseg->flags = MLX5_UMR_TRANSLATION_OFFSET_EN | MLX5_UMR_INLINE; + octowords = mlx5e_mpwrq_umr_octowords(rq->mpwqe.pages_per_wqe, rq->mpwqe.umr_mode); + ucseg->xlt_octowords = cpu_to_be16(octowords); + ucseg->mkey_mask = cpu_to_be64(MLX5_MKEY_MASK_FREE); +} + +static int mlx5e_rq_shampo_hd_alloc(struct mlx5e_rq *rq, int node) +{ + rq->mpwqe.shampo = kvzalloc_node(sizeof(*rq->mpwqe.shampo), + GFP_KERNEL, node); + if (!rq->mpwqe.shampo) + return -ENOMEM; + return 0; +} + +static void mlx5e_rq_shampo_hd_free(struct mlx5e_rq *rq) +{ + kvfree(rq->mpwqe.shampo); +} + +static int mlx5e_rq_shampo_hd_info_alloc(struct mlx5e_rq *rq, int node) +{ + struct mlx5e_shampo_hd *shampo = rq->mpwqe.shampo; + + shampo->bitmap = bitmap_zalloc_node(shampo->hd_per_wq, GFP_KERNEL, + node); + if (!shampo->bitmap) + return -ENOMEM; + + shampo->info = kvzalloc_node(array_size(shampo->hd_per_wq, + sizeof(*shampo->info)), + GFP_KERNEL, node); + if (!shampo->info) { + kvfree(shampo->bitmap); + return -ENOMEM; + } + return 0; +} + +static void mlx5e_rq_shampo_hd_info_free(struct mlx5e_rq *rq) +{ + kvfree(rq->mpwqe.shampo->bitmap); + kvfree(rq->mpwqe.shampo->info); +} + +static int mlx5e_rq_alloc_mpwqe_info(struct mlx5e_rq *rq, int node) +{ + int wq_sz = mlx5_wq_ll_get_size(&rq->mpwqe.wq); + size_t alloc_size; + + alloc_size = array_size(wq_sz, struct_size(rq->mpwqe.info, alloc_units, + rq->mpwqe.pages_per_wqe)); + + rq->mpwqe.info = kvzalloc_node(alloc_size, GFP_KERNEL, node); + if (!rq->mpwqe.info) + return -ENOMEM; + + mlx5e_build_umr_wqe(rq, rq->icosq, &rq->mpwqe.umr_wqe); + + return 0; +} + + +static u8 mlx5e_mpwrq_access_mode(enum mlx5e_mpwrq_umr_mode umr_mode) +{ + switch (umr_mode) { + case MLX5E_MPWRQ_UMR_MODE_ALIGNED: + return MLX5_MKC_ACCESS_MODE_MTT; + case MLX5E_MPWRQ_UMR_MODE_UNALIGNED: + return MLX5_MKC_ACCESS_MODE_KSM; + case MLX5E_MPWRQ_UMR_MODE_OVERSIZED: + return MLX5_MKC_ACCESS_MODE_KLMS; + case MLX5E_MPWRQ_UMR_MODE_TRIPLE: + return MLX5_MKC_ACCESS_MODE_KSM; + } + WARN_ONCE(1, "MPWRQ UMR mode %d is not known\n", umr_mode); + return 0; +} + +static int mlx5e_create_umr_mkey(struct mlx5_core_dev *mdev, + u32 npages, u8 page_shift, u32 *umr_mkey, + dma_addr_t filler_addr, + enum mlx5e_mpwrq_umr_mode umr_mode, + u32 xsk_chunk_size) +{ + struct mlx5_mtt *mtt; + struct mlx5_ksm *ksm; + struct mlx5_klm *klm; + u32 octwords; + int inlen; + void *mkc; + u32 *in; + int err; + int i; + + if ((umr_mode == MLX5E_MPWRQ_UMR_MODE_UNALIGNED || + umr_mode == MLX5E_MPWRQ_UMR_MODE_TRIPLE) && + !MLX5_CAP_GEN(mdev, fixed_buffer_size)) { + mlx5_core_warn(mdev, "Unaligned AF_XDP requires fixed_buffer_size capability\n"); + return -EINVAL; + } + + octwords = mlx5e_mpwrq_umr_octowords(npages, umr_mode); + + inlen = MLX5_FLEXIBLE_INLEN(mdev, MLX5_ST_SZ_BYTES(create_mkey_in), + MLX5_OCTWORD, octwords); + if (inlen < 0) + return inlen; + + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); + + MLX5_SET(mkc, mkc, free, 1); + MLX5_SET(mkc, mkc, umr_en, 1); + MLX5_SET(mkc, mkc, lw, 1); + MLX5_SET(mkc, mkc, lr, 1); + MLX5_SET(mkc, mkc, access_mode_1_0, mlx5e_mpwrq_access_mode(umr_mode)); + mlx5e_mkey_set_relaxed_ordering(mdev, mkc); + MLX5_SET(mkc, mkc, qpn, 0xffffff); + MLX5_SET(mkc, mkc, pd, mdev->mlx5e_res.hw_objs.pdn); + MLX5_SET64(mkc, mkc, len, npages << page_shift); + MLX5_SET(mkc, mkc, translations_octword_size, octwords); + if (umr_mode == MLX5E_MPWRQ_UMR_MODE_TRIPLE) + MLX5_SET(mkc, mkc, log_page_size, page_shift - 2); + else if (umr_mode != MLX5E_MPWRQ_UMR_MODE_OVERSIZED) + MLX5_SET(mkc, mkc, log_page_size, page_shift); + MLX5_SET(create_mkey_in, in, translations_octword_actual_size, octwords); + + /* Initialize the mkey with all MTTs pointing to a default + * page (filler_addr). When the channels are activated, UMR + * WQEs will redirect the RX WQEs to the actual memory from + * the RQ's pool, while the gaps (wqe_overflow) remain mapped + * to the default page. + */ + switch (umr_mode) { + case MLX5E_MPWRQ_UMR_MODE_OVERSIZED: + klm = MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt); + for (i = 0; i < npages; i++) { + klm[i << 1] = (struct mlx5_klm) { + .va = cpu_to_be64(filler_addr), + .bcount = cpu_to_be32(xsk_chunk_size), + .key = cpu_to_be32(mdev->mlx5e_res.hw_objs.mkey), + }; + klm[(i << 1) + 1] = (struct mlx5_klm) { + .va = cpu_to_be64(filler_addr), + .bcount = cpu_to_be32((1 << page_shift) - xsk_chunk_size), + .key = cpu_to_be32(mdev->mlx5e_res.hw_objs.mkey), + }; + } + break; + case MLX5E_MPWRQ_UMR_MODE_UNALIGNED: + ksm = MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt); + for (i = 0; i < npages; i++) + ksm[i] = (struct mlx5_ksm) { + .key = cpu_to_be32(mdev->mlx5e_res.hw_objs.mkey), + .va = cpu_to_be64(filler_addr), + }; + break; + case MLX5E_MPWRQ_UMR_MODE_ALIGNED: + mtt = MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt); + for (i = 0; i < npages; i++) + mtt[i] = (struct mlx5_mtt) { + .ptag = cpu_to_be64(filler_addr), + }; + break; + case MLX5E_MPWRQ_UMR_MODE_TRIPLE: + ksm = MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt); + for (i = 0; i < npages * 4; i++) { + ksm[i] = (struct mlx5_ksm) { + .key = cpu_to_be32(mdev->mlx5e_res.hw_objs.mkey), + .va = cpu_to_be64(filler_addr), + }; + } + break; + } + + err = mlx5_core_create_mkey(mdev, umr_mkey, in, inlen); + + kvfree(in); + return err; +} + +static int mlx5e_create_umr_klm_mkey(struct mlx5_core_dev *mdev, + u64 nentries, + u32 *umr_mkey) +{ + int inlen; + void *mkc; + u32 *in; + int err; + + inlen = MLX5_ST_SZ_BYTES(create_mkey_in); + + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); + + MLX5_SET(mkc, mkc, free, 1); + MLX5_SET(mkc, mkc, umr_en, 1); + MLX5_SET(mkc, mkc, lw, 1); + MLX5_SET(mkc, mkc, lr, 1); + MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_KLMS); + mlx5e_mkey_set_relaxed_ordering(mdev, mkc); + MLX5_SET(mkc, mkc, qpn, 0xffffff); + MLX5_SET(mkc, mkc, pd, mdev->mlx5e_res.hw_objs.pdn); + MLX5_SET(mkc, mkc, translations_octword_size, nentries); + MLX5_SET(mkc, mkc, length64, 1); + err = mlx5_core_create_mkey(mdev, umr_mkey, in, inlen); + + kvfree(in); + return err; +} + +static int mlx5e_create_rq_umr_mkey(struct mlx5_core_dev *mdev, struct mlx5e_rq *rq) +{ + u32 xsk_chunk_size = rq->xsk_pool ? rq->xsk_pool->chunk_size : 0; + u32 wq_size = mlx5_wq_ll_get_size(&rq->mpwqe.wq); + u32 num_entries, max_num_entries; + u32 umr_mkey; + int err; + + max_num_entries = mlx5e_mpwrq_max_num_entries(mdev, rq->mpwqe.umr_mode); + + /* Shouldn't overflow, the result is at most MLX5E_MAX_RQ_NUM_MTTS. */ + if (WARN_ON_ONCE(check_mul_overflow(wq_size, (u32)rq->mpwqe.mtts_per_wqe, + &num_entries) || + num_entries > max_num_entries)) + mlx5_core_err(mdev, "%s: multiplication overflow: %u * %u > %u\n", + __func__, wq_size, rq->mpwqe.mtts_per_wqe, + max_num_entries); + + err = mlx5e_create_umr_mkey(mdev, num_entries, rq->mpwqe.page_shift, + &umr_mkey, rq->wqe_overflow.addr, + rq->mpwqe.umr_mode, xsk_chunk_size); + rq->mpwqe.umr_mkey_be = cpu_to_be32(umr_mkey); + return err; +} + +static int mlx5e_create_rq_hd_umr_mkey(struct mlx5_core_dev *mdev, + struct mlx5e_rq *rq) +{ + u32 max_klm_size = BIT(MLX5_CAP_GEN(mdev, log_max_klm_list_size)); + + if (max_klm_size < rq->mpwqe.shampo->hd_per_wq) { + mlx5_core_err(mdev, "max klm list size 0x%x is smaller than shampo header buffer list size 0x%x\n", + max_klm_size, rq->mpwqe.shampo->hd_per_wq); + return -EINVAL; + } + return mlx5e_create_umr_klm_mkey(mdev, rq->mpwqe.shampo->hd_per_wq, + &rq->mpwqe.shampo->mkey); +} + +static void mlx5e_init_frags_partition(struct mlx5e_rq *rq) +{ + struct mlx5e_wqe_frag_info next_frag = {}; + struct mlx5e_wqe_frag_info *prev = NULL; + int i; + + if (rq->xsk_pool) { + /* Assumptions used by XSK batched allocator. */ + WARN_ON(rq->wqe.info.num_frags != 1); + WARN_ON(rq->wqe.info.log_num_frags != 0); + WARN_ON(rq->wqe.info.arr[0].frag_stride != PAGE_SIZE); + } + + next_frag.au = &rq->wqe.alloc_units[0]; + + for (i = 0; i < mlx5_wq_cyc_get_size(&rq->wqe.wq); i++) { + struct mlx5e_rq_frag_info *frag_info = &rq->wqe.info.arr[0]; + struct mlx5e_wqe_frag_info *frag = + &rq->wqe.frags[i << rq->wqe.info.log_num_frags]; + int f; + + for (f = 0; f < rq->wqe.info.num_frags; f++, frag++) { + if (next_frag.offset + frag_info[f].frag_stride > PAGE_SIZE) { + next_frag.au++; + next_frag.offset = 0; + if (prev) + prev->last_in_page = true; + } + *frag = next_frag; + + /* prepare next */ + next_frag.offset += frag_info[f].frag_stride; + prev = frag; + } + } + + if (prev) + prev->last_in_page = true; +} + +static int mlx5e_init_au_list(struct mlx5e_rq *rq, int wq_sz, int node) +{ + int len = wq_sz << rq->wqe.info.log_num_frags; + + rq->wqe.alloc_units = kvzalloc_node(array_size(len, sizeof(*rq->wqe.alloc_units)), + GFP_KERNEL, node); + if (!rq->wqe.alloc_units) + return -ENOMEM; + + mlx5e_init_frags_partition(rq); + + return 0; +} + +static void mlx5e_free_au_list(struct mlx5e_rq *rq) +{ + kvfree(rq->wqe.alloc_units); +} + +static void mlx5e_rq_err_cqe_work(struct work_struct *recover_work) +{ + struct mlx5e_rq *rq = container_of(recover_work, struct mlx5e_rq, recover_work); + + mlx5e_reporter_rq_cqe_err(rq); +} + +static int mlx5e_alloc_mpwqe_rq_drop_page(struct mlx5e_rq *rq) +{ + rq->wqe_overflow.page = alloc_page(GFP_KERNEL); + if (!rq->wqe_overflow.page) + return -ENOMEM; + + rq->wqe_overflow.addr = dma_map_page(rq->pdev, rq->wqe_overflow.page, 0, + PAGE_SIZE, rq->buff.map_dir); + if (dma_mapping_error(rq->pdev, rq->wqe_overflow.addr)) { + __free_page(rq->wqe_overflow.page); + return -ENOMEM; + } + return 0; +} + +static void mlx5e_free_mpwqe_rq_drop_page(struct mlx5e_rq *rq) +{ + dma_unmap_page(rq->pdev, rq->wqe_overflow.addr, PAGE_SIZE, + rq->buff.map_dir); + __free_page(rq->wqe_overflow.page); +} + +static int mlx5e_init_rxq_rq(struct mlx5e_channel *c, struct mlx5e_params *params, + struct mlx5e_rq *rq) +{ + struct mlx5_core_dev *mdev = c->mdev; + int err; + + rq->wq_type = params->rq_wq_type; + rq->pdev = c->pdev; + rq->netdev = c->netdev; + rq->priv = c->priv; + rq->tstamp = c->tstamp; + rq->clock = &mdev->clock; + rq->icosq = &c->icosq; + rq->ix = c->ix; + rq->channel = c; + rq->mdev = mdev; + rq->hw_mtu = + MLX5E_SW2HW_MTU(params, params->sw_mtu) - ETH_FCS_LEN * !params->scatter_fcs_en; + rq->xdpsq = &c->rq_xdpsq; + rq->stats = &c->priv->channel_stats[c->ix]->rq; + rq->ptp_cyc2time = mlx5_rq_ts_translator(mdev); + err = mlx5e_rq_set_handlers(rq, params, NULL); + if (err) + return err; + + return xdp_rxq_info_reg(&rq->xdp_rxq, rq->netdev, rq->ix, c->napi.napi_id); +} + +static int mlx5_rq_shampo_alloc(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, + struct mlx5e_rq_param *rqp, + struct mlx5e_rq *rq, + u32 *pool_size, + int node) +{ + void *wqc = MLX5_ADDR_OF(rqc, rqp->rqc, wq); + int wq_size; + int err; + + if (!test_bit(MLX5E_RQ_STATE_SHAMPO, &rq->state)) + return 0; + err = mlx5e_rq_shampo_hd_alloc(rq, node); + if (err) + goto out; + rq->mpwqe.shampo->hd_per_wq = + mlx5e_shampo_hd_per_wq(mdev, params, rqp); + err = mlx5e_create_rq_hd_umr_mkey(mdev, rq); + if (err) + goto err_shampo_hd; + err = mlx5e_rq_shampo_hd_info_alloc(rq, node); + if (err) + goto err_shampo_info; + rq->hw_gro_data = kvzalloc_node(sizeof(*rq->hw_gro_data), GFP_KERNEL, node); + if (!rq->hw_gro_data) { + err = -ENOMEM; + goto err_hw_gro_data; + } + rq->mpwqe.shampo->key = + cpu_to_be32(rq->mpwqe.shampo->mkey); + rq->mpwqe.shampo->hd_per_wqe = + mlx5e_shampo_hd_per_wqe(mdev, params, rqp); + wq_size = BIT(MLX5_GET(wq, wqc, log_wq_sz)); + *pool_size += (rq->mpwqe.shampo->hd_per_wqe * wq_size) / + MLX5E_SHAMPO_WQ_HEADER_PER_PAGE; + return 0; + +err_hw_gro_data: + mlx5e_rq_shampo_hd_info_free(rq); +err_shampo_info: + mlx5_core_destroy_mkey(mdev, rq->mpwqe.shampo->mkey); +err_shampo_hd: + mlx5e_rq_shampo_hd_free(rq); +out: + return err; +} + +static void mlx5e_rq_free_shampo(struct mlx5e_rq *rq) +{ + if (!test_bit(MLX5E_RQ_STATE_SHAMPO, &rq->state)) + return; + + kvfree(rq->hw_gro_data); + mlx5e_rq_shampo_hd_info_free(rq); + mlx5_core_destroy_mkey(rq->mdev, rq->mpwqe.shampo->mkey); + mlx5e_rq_shampo_hd_free(rq); +} + +static int mlx5e_alloc_rq(struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk, + struct mlx5e_rq_param *rqp, + int node, struct mlx5e_rq *rq) +{ + struct page_pool_params pp_params = { 0 }; + struct mlx5_core_dev *mdev = rq->mdev; + void *rqc = rqp->rqc; + void *rqc_wq = MLX5_ADDR_OF(rqc, rqc, wq); + u32 pool_size; + int wq_sz; + int err; + int i; + + rqp->wq.db_numa_node = node; + INIT_WORK(&rq->recover_work, mlx5e_rq_err_cqe_work); + + if (params->xdp_prog) + bpf_prog_inc(params->xdp_prog); + RCU_INIT_POINTER(rq->xdp_prog, params->xdp_prog); + + rq->buff.map_dir = params->xdp_prog ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE; + rq->buff.headroom = mlx5e_get_rq_headroom(mdev, params, xsk); + pool_size = 1 << params->log_rq_mtu_frames; + + rq->mkey_be = cpu_to_be32(mdev->mlx5e_res.hw_objs.mkey); + + switch (rq->wq_type) { + case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: + err = mlx5_wq_ll_create(mdev, &rqp->wq, rqc_wq, &rq->mpwqe.wq, + &rq->wq_ctrl); + if (err) + goto err_rq_xdp_prog; + + err = mlx5e_alloc_mpwqe_rq_drop_page(rq); + if (err) + goto err_rq_wq_destroy; + + rq->mpwqe.wq.db = &rq->mpwqe.wq.db[MLX5_RCV_DBR]; + + wq_sz = mlx5_wq_ll_get_size(&rq->mpwqe.wq); + + rq->mpwqe.page_shift = mlx5e_mpwrq_page_shift(mdev, xsk); + rq->mpwqe.umr_mode = mlx5e_mpwrq_umr_mode(mdev, xsk); + rq->mpwqe.pages_per_wqe = + mlx5e_mpwrq_pages_per_wqe(mdev, rq->mpwqe.page_shift, + rq->mpwqe.umr_mode); + rq->mpwqe.umr_wqebbs = + mlx5e_mpwrq_umr_wqebbs(mdev, rq->mpwqe.page_shift, + rq->mpwqe.umr_mode); + rq->mpwqe.mtts_per_wqe = + mlx5e_mpwrq_mtts_per_wqe(mdev, rq->mpwqe.page_shift, + rq->mpwqe.umr_mode); + + pool_size = rq->mpwqe.pages_per_wqe << + mlx5e_mpwqe_get_log_rq_size(mdev, params, xsk); + + rq->mpwqe.log_stride_sz = mlx5e_mpwqe_get_log_stride_size(mdev, params, xsk); + rq->mpwqe.num_strides = + BIT(mlx5e_mpwqe_get_log_num_strides(mdev, params, xsk)); + rq->mpwqe.min_wqe_bulk = mlx5e_mpwqe_get_min_wqe_bulk(wq_sz); + + rq->buff.frame0_sz = (1 << rq->mpwqe.log_stride_sz); + + err = mlx5e_create_rq_umr_mkey(mdev, rq); + if (err) + goto err_rq_drop_page; + + err = mlx5e_rq_alloc_mpwqe_info(rq, node); + if (err) + goto err_rq_mkey; + + err = mlx5_rq_shampo_alloc(mdev, params, rqp, rq, &pool_size, node); + if (err) + goto err_free_mpwqe_info; + + break; + default: /* MLX5_WQ_TYPE_CYCLIC */ + err = mlx5_wq_cyc_create(mdev, &rqp->wq, rqc_wq, &rq->wqe.wq, + &rq->wq_ctrl); + if (err) + goto err_rq_xdp_prog; + + rq->wqe.wq.db = &rq->wqe.wq.db[MLX5_RCV_DBR]; + + wq_sz = mlx5_wq_cyc_get_size(&rq->wqe.wq); + + rq->wqe.info = rqp->frags_info; + rq->buff.frame0_sz = rq->wqe.info.arr[0].frag_stride; + + rq->wqe.frags = + kvzalloc_node(array_size(sizeof(*rq->wqe.frags), + (wq_sz << rq->wqe.info.log_num_frags)), + GFP_KERNEL, node); + if (!rq->wqe.frags) { + err = -ENOMEM; + goto err_rq_wq_destroy; + } + + err = mlx5e_init_au_list(rq, wq_sz, node); + if (err) + goto err_rq_frags; + } + + if (xsk) { + err = xdp_rxq_info_reg_mem_model(&rq->xdp_rxq, + MEM_TYPE_XSK_BUFF_POOL, NULL); + xsk_pool_set_rxq_info(rq->xsk_pool, &rq->xdp_rxq); + } else { + /* Create a page_pool and register it with rxq */ + pp_params.order = 0; + pp_params.flags = 0; /* No-internal DMA mapping in page_pool */ + pp_params.pool_size = pool_size; + pp_params.nid = node; + pp_params.dev = rq->pdev; + pp_params.dma_dir = rq->buff.map_dir; + + /* page_pool can be used even when there is no rq->xdp_prog, + * given page_pool does not handle DMA mapping there is no + * required state to clear. And page_pool gracefully handle + * elevated refcnt. + */ + rq->page_pool = page_pool_create(&pp_params); + if (IS_ERR(rq->page_pool)) { + err = PTR_ERR(rq->page_pool); + rq->page_pool = NULL; + goto err_free_by_rq_type; + } + if (xdp_rxq_info_is_reg(&rq->xdp_rxq)) + err = xdp_rxq_info_reg_mem_model(&rq->xdp_rxq, + MEM_TYPE_PAGE_POOL, rq->page_pool); + } + if (err) + goto err_destroy_page_pool; + + for (i = 0; i < wq_sz; i++) { + if (rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) { + struct mlx5e_rx_wqe_ll *wqe = + mlx5_wq_ll_get_wqe(&rq->mpwqe.wq, i); + u32 byte_count = + rq->mpwqe.num_strides << rq->mpwqe.log_stride_sz; + u64 dma_offset = mul_u32_u32(i, rq->mpwqe.mtts_per_wqe) << + rq->mpwqe.page_shift; + u16 headroom = test_bit(MLX5E_RQ_STATE_SHAMPO, &rq->state) ? + 0 : rq->buff.headroom; + + wqe->data[0].addr = cpu_to_be64(dma_offset + headroom); + wqe->data[0].byte_count = cpu_to_be32(byte_count); + wqe->data[0].lkey = rq->mpwqe.umr_mkey_be; + } else { + struct mlx5e_rx_wqe_cyc *wqe = + mlx5_wq_cyc_get_wqe(&rq->wqe.wq, i); + int f; + + for (f = 0; f < rq->wqe.info.num_frags; f++) { + u32 frag_size = rq->wqe.info.arr[f].frag_size | + MLX5_HW_START_PADDING; + + wqe->data[f].byte_count = cpu_to_be32(frag_size); + wqe->data[f].lkey = rq->mkey_be; + } + /* check if num_frags is not a pow of two */ + if (rq->wqe.info.num_frags < (1 << rq->wqe.info.log_num_frags)) { + wqe->data[f].byte_count = 0; + wqe->data[f].lkey = cpu_to_be32(MLX5_INVALID_LKEY); + wqe->data[f].addr = 0; + } + } + } + + INIT_WORK(&rq->dim.work, mlx5e_rx_dim_work); + + switch (params->rx_cq_moderation.cq_period_mode) { + case MLX5_CQ_PERIOD_MODE_START_FROM_CQE: + rq->dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_CQE; + break; + case MLX5_CQ_PERIOD_MODE_START_FROM_EQE: + default: + rq->dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE; + } + + rq->page_cache.head = 0; + rq->page_cache.tail = 0; + + return 0; + +err_destroy_page_pool: + page_pool_destroy(rq->page_pool); +err_free_by_rq_type: + switch (rq->wq_type) { + case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: + mlx5e_rq_free_shampo(rq); +err_free_mpwqe_info: + kvfree(rq->mpwqe.info); +err_rq_mkey: + mlx5_core_destroy_mkey(mdev, be32_to_cpu(rq->mpwqe.umr_mkey_be)); +err_rq_drop_page: + mlx5e_free_mpwqe_rq_drop_page(rq); + break; + default: /* MLX5_WQ_TYPE_CYCLIC */ + mlx5e_free_au_list(rq); +err_rq_frags: + kvfree(rq->wqe.frags); + } +err_rq_wq_destroy: + mlx5_wq_destroy(&rq->wq_ctrl); +err_rq_xdp_prog: + if (params->xdp_prog) + bpf_prog_put(params->xdp_prog); + + return err; +} + +static void mlx5e_free_rq(struct mlx5e_rq *rq) +{ + struct bpf_prog *old_prog; + int i; + + if (xdp_rxq_info_is_reg(&rq->xdp_rxq)) { + old_prog = rcu_dereference_protected(rq->xdp_prog, + lockdep_is_held(&rq->priv->state_lock)); + if (old_prog) + bpf_prog_put(old_prog); + } + + switch (rq->wq_type) { + case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: + kvfree(rq->mpwqe.info); + mlx5_core_destroy_mkey(rq->mdev, be32_to_cpu(rq->mpwqe.umr_mkey_be)); + mlx5e_free_mpwqe_rq_drop_page(rq); + mlx5e_rq_free_shampo(rq); + break; + default: /* MLX5_WQ_TYPE_CYCLIC */ + kvfree(rq->wqe.frags); + mlx5e_free_au_list(rq); + } + + for (i = rq->page_cache.head; i != rq->page_cache.tail; + i = (i + 1) & (MLX5E_CACHE_SIZE - 1)) { + /* With AF_XDP, page_cache is not used, so this loop is not + * entered, and it's safe to call mlx5e_page_release_dynamic + * directly. + */ + mlx5e_page_release_dynamic(rq, rq->page_cache.page_cache[i], false); + } + + xdp_rxq_info_unreg(&rq->xdp_rxq); + page_pool_destroy(rq->page_pool); + mlx5_wq_destroy(&rq->wq_ctrl); +} + +int mlx5e_create_rq(struct mlx5e_rq *rq, struct mlx5e_rq_param *param) +{ + struct mlx5_core_dev *mdev = rq->mdev; + u8 ts_format; + void *in; + void *rqc; + void *wq; + int inlen; + int err; + + inlen = MLX5_ST_SZ_BYTES(create_rq_in) + + sizeof(u64) * rq->wq_ctrl.buf.npages; + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + ts_format = mlx5_is_real_time_rq(mdev) ? + MLX5_TIMESTAMP_FORMAT_REAL_TIME : + MLX5_TIMESTAMP_FORMAT_FREE_RUNNING; + rqc = MLX5_ADDR_OF(create_rq_in, in, ctx); + wq = MLX5_ADDR_OF(rqc, rqc, wq); + + memcpy(rqc, param->rqc, sizeof(param->rqc)); + + MLX5_SET(rqc, rqc, cqn, rq->cq.mcq.cqn); + MLX5_SET(rqc, rqc, state, MLX5_RQC_STATE_RST); + MLX5_SET(rqc, rqc, ts_format, ts_format); + MLX5_SET(wq, wq, log_wq_pg_sz, rq->wq_ctrl.buf.page_shift - + MLX5_ADAPTER_PAGE_SHIFT); + MLX5_SET64(wq, wq, dbr_addr, rq->wq_ctrl.db.dma); + + if (test_bit(MLX5E_RQ_STATE_SHAMPO, &rq->state)) { + MLX5_SET(wq, wq, log_headers_buffer_entry_num, + order_base_2(rq->mpwqe.shampo->hd_per_wq)); + MLX5_SET(wq, wq, headers_mkey, rq->mpwqe.shampo->mkey); + } + + mlx5_fill_page_frag_array(&rq->wq_ctrl.buf, + (__be64 *)MLX5_ADDR_OF(wq, wq, pas)); + + err = mlx5_core_create_rq(mdev, in, inlen, &rq->rqn); + + kvfree(in); + + return err; +} + +static int mlx5e_modify_rq_state(struct mlx5e_rq *rq, int curr_state, int next_state) +{ + struct mlx5_core_dev *mdev = rq->mdev; + + void *in; + void *rqc; + int inlen; + int err; + + inlen = MLX5_ST_SZ_BYTES(modify_rq_in); + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + if (curr_state == MLX5_RQC_STATE_RST && next_state == MLX5_RQC_STATE_RDY) + mlx5e_rqwq_reset(rq); + + rqc = MLX5_ADDR_OF(modify_rq_in, in, ctx); + + MLX5_SET(modify_rq_in, in, rq_state, curr_state); + MLX5_SET(rqc, rqc, state, next_state); + + err = mlx5_core_modify_rq(mdev, rq->rqn, in); + + kvfree(in); + + return err; +} + +static int mlx5e_rq_to_ready(struct mlx5e_rq *rq, int curr_state) +{ + struct net_device *dev = rq->netdev; + int err; + + err = mlx5e_modify_rq_state(rq, curr_state, MLX5_RQC_STATE_RST); + if (err) { + netdev_err(dev, "Failed to move rq 0x%x to reset\n", rq->rqn); + return err; + } + err = mlx5e_modify_rq_state(rq, MLX5_RQC_STATE_RST, MLX5_RQC_STATE_RDY); + if (err) { + netdev_err(dev, "Failed to move rq 0x%x to ready\n", rq->rqn); + return err; + } + + return 0; +} + +int mlx5e_flush_rq(struct mlx5e_rq *rq, int curr_state) +{ + mlx5e_free_rx_descs(rq); + + return mlx5e_rq_to_ready(rq, curr_state); +} + +static int mlx5e_modify_rq_vsd(struct mlx5e_rq *rq, bool vsd) +{ + struct mlx5_core_dev *mdev = rq->mdev; + void *in; + void *rqc; + int inlen; + int err; + + inlen = MLX5_ST_SZ_BYTES(modify_rq_in); + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + rqc = MLX5_ADDR_OF(modify_rq_in, in, ctx); + + MLX5_SET(modify_rq_in, in, rq_state, MLX5_RQC_STATE_RDY); + MLX5_SET64(modify_rq_in, in, modify_bitmask, + MLX5_MODIFY_RQ_IN_MODIFY_BITMASK_VSD); + MLX5_SET(rqc, rqc, vsd, vsd); + MLX5_SET(rqc, rqc, state, MLX5_RQC_STATE_RDY); + + err = mlx5_core_modify_rq(mdev, rq->rqn, in); + + kvfree(in); + + return err; +} + +void mlx5e_destroy_rq(struct mlx5e_rq *rq) +{ + mlx5_core_destroy_rq(rq->mdev, rq->rqn); +} + +int mlx5e_wait_for_min_rx_wqes(struct mlx5e_rq *rq, int wait_time) +{ + unsigned long exp_time = jiffies + msecs_to_jiffies(wait_time); + + u16 min_wqes = mlx5_min_rx_wqes(rq->wq_type, mlx5e_rqwq_get_size(rq)); + + do { + if (mlx5e_rqwq_get_cur_sz(rq) >= min_wqes) + return 0; + + msleep(20); + } while (time_before(jiffies, exp_time)); + + netdev_warn(rq->netdev, "Failed to get min RX wqes on Channel[%d] RQN[0x%x] wq cur_sz(%d) min_rx_wqes(%d)\n", + rq->ix, rq->rqn, mlx5e_rqwq_get_cur_sz(rq), min_wqes); + + mlx5e_reporter_rx_timeout(rq); + return -ETIMEDOUT; +} + +void mlx5e_free_rx_in_progress_descs(struct mlx5e_rq *rq) +{ + struct mlx5_wq_ll *wq; + u16 head; + int i; + + if (rq->wq_type != MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) + return; + + wq = &rq->mpwqe.wq; + head = wq->head; + + /* Outstanding UMR WQEs (in progress) start at wq->head */ + for (i = 0; i < rq->mpwqe.umr_in_progress; i++) { + rq->dealloc_wqe(rq, head); + head = mlx5_wq_ll_get_wqe_next_ix(wq, head); + } + + if (test_bit(MLX5E_RQ_STATE_SHAMPO, &rq->state)) { + u16 len; + + len = (rq->mpwqe.shampo->pi - rq->mpwqe.shampo->ci) & + (rq->mpwqe.shampo->hd_per_wq - 1); + mlx5e_shampo_dealloc_hd(rq, len, rq->mpwqe.shampo->ci, false); + rq->mpwqe.shampo->pi = rq->mpwqe.shampo->ci; + } + + rq->mpwqe.actual_wq_head = wq->head; + rq->mpwqe.umr_in_progress = 0; + rq->mpwqe.umr_completed = 0; +} + +void mlx5e_free_rx_descs(struct mlx5e_rq *rq) +{ + __be16 wqe_ix_be; + u16 wqe_ix; + + if (rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) { + struct mlx5_wq_ll *wq = &rq->mpwqe.wq; + + mlx5e_free_rx_in_progress_descs(rq); + + while (!mlx5_wq_ll_is_empty(wq)) { + struct mlx5e_rx_wqe_ll *wqe; + + wqe_ix_be = *wq->tail_next; + wqe_ix = be16_to_cpu(wqe_ix_be); + wqe = mlx5_wq_ll_get_wqe(wq, wqe_ix); + rq->dealloc_wqe(rq, wqe_ix); + mlx5_wq_ll_pop(wq, wqe_ix_be, + &wqe->next.next_wqe_index); + } + + if (test_bit(MLX5E_RQ_STATE_SHAMPO, &rq->state)) + mlx5e_shampo_dealloc_hd(rq, rq->mpwqe.shampo->hd_per_wq, + 0, true); + } else { + struct mlx5_wq_cyc *wq = &rq->wqe.wq; + + while (!mlx5_wq_cyc_is_empty(wq)) { + wqe_ix = mlx5_wq_cyc_get_tail(wq); + rq->dealloc_wqe(rq, wqe_ix); + mlx5_wq_cyc_pop(wq); + } + } + +} + +int mlx5e_open_rq(struct mlx5e_params *params, struct mlx5e_rq_param *param, + struct mlx5e_xsk_param *xsk, int node, + struct mlx5e_rq *rq) +{ + struct mlx5_core_dev *mdev = rq->mdev; + int err; + + if (params->packet_merge.type == MLX5E_PACKET_MERGE_SHAMPO) + __set_bit(MLX5E_RQ_STATE_SHAMPO, &rq->state); + + err = mlx5e_alloc_rq(params, xsk, param, node, rq); + if (err) + return err; + + err = mlx5e_create_rq(rq, param); + if (err) + goto err_free_rq; + + err = mlx5e_modify_rq_state(rq, MLX5_RQC_STATE_RST, MLX5_RQC_STATE_RDY); + if (err) + goto err_destroy_rq; + + if (MLX5_CAP_ETH(mdev, cqe_checksum_full)) + __set_bit(MLX5E_RQ_STATE_CSUM_FULL, &rq->state); + + if (params->rx_dim_enabled) + __set_bit(MLX5E_RQ_STATE_AM, &rq->state); + + /* We disable csum_complete when XDP is enabled since + * XDP programs might manipulate packets which will render + * skb->checksum incorrect. + */ + if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_NO_CSUM_COMPLETE) || params->xdp_prog) + __set_bit(MLX5E_RQ_STATE_NO_CSUM_COMPLETE, &rq->state); + + /* For CQE compression on striding RQ, use stride index provided by + * HW if capability is supported. + */ + if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_STRIDING_RQ) && + MLX5_CAP_GEN(mdev, mini_cqe_resp_stride_index)) + __set_bit(MLX5E_RQ_STATE_MINI_CQE_HW_STRIDX, &rq->state); + + return 0; + +err_destroy_rq: + mlx5e_destroy_rq(rq); +err_free_rq: + mlx5e_free_rq(rq); + + return err; +} + +void mlx5e_activate_rq(struct mlx5e_rq *rq) +{ + set_bit(MLX5E_RQ_STATE_ENABLED, &rq->state); +} + +void mlx5e_deactivate_rq(struct mlx5e_rq *rq) +{ + clear_bit(MLX5E_RQ_STATE_ENABLED, &rq->state); + synchronize_net(); /* Sync with NAPI to prevent mlx5e_post_rx_wqes. */ +} + +void mlx5e_close_rq(struct mlx5e_rq *rq) +{ + cancel_work_sync(&rq->dim.work); + cancel_work_sync(&rq->recover_work); + mlx5e_destroy_rq(rq); + mlx5e_free_rx_descs(rq); + mlx5e_free_rq(rq); +} + +static void mlx5e_free_xdpsq_db(struct mlx5e_xdpsq *sq) +{ + kvfree(sq->db.xdpi_fifo.xi); + kvfree(sq->db.wqe_info); +} + +static int mlx5e_alloc_xdpsq_fifo(struct mlx5e_xdpsq *sq, int numa) +{ + struct mlx5e_xdp_info_fifo *xdpi_fifo = &sq->db.xdpi_fifo; + int wq_sz = mlx5_wq_cyc_get_size(&sq->wq); + int dsegs_per_wq = wq_sz * MLX5_SEND_WQEBB_NUM_DS; + size_t size; + + size = array_size(sizeof(*xdpi_fifo->xi), dsegs_per_wq); + xdpi_fifo->xi = kvzalloc_node(size, GFP_KERNEL, numa); + if (!xdpi_fifo->xi) + return -ENOMEM; + + xdpi_fifo->pc = &sq->xdpi_fifo_pc; + xdpi_fifo->cc = &sq->xdpi_fifo_cc; + xdpi_fifo->mask = dsegs_per_wq - 1; + + return 0; +} + +static int mlx5e_alloc_xdpsq_db(struct mlx5e_xdpsq *sq, int numa) +{ + int wq_sz = mlx5_wq_cyc_get_size(&sq->wq); + size_t size; + int err; + + size = array_size(sizeof(*sq->db.wqe_info), wq_sz); + sq->db.wqe_info = kvzalloc_node(size, GFP_KERNEL, numa); + if (!sq->db.wqe_info) + return -ENOMEM; + + err = mlx5e_alloc_xdpsq_fifo(sq, numa); + if (err) { + mlx5e_free_xdpsq_db(sq); + return err; + } + + return 0; +} + +static int mlx5e_alloc_xdpsq(struct mlx5e_channel *c, + struct mlx5e_params *params, + struct xsk_buff_pool *xsk_pool, + struct mlx5e_sq_param *param, + struct mlx5e_xdpsq *sq, + bool is_redirect) +{ + void *sqc_wq = MLX5_ADDR_OF(sqc, param->sqc, wq); + struct mlx5_core_dev *mdev = c->mdev; + struct mlx5_wq_cyc *wq = &sq->wq; + int err; + + sq->pdev = c->pdev; + sq->mkey_be = c->mkey_be; + sq->channel = c; + sq->uar_map = mdev->mlx5e_res.hw_objs.bfreg.map; + sq->min_inline_mode = params->tx_min_inline_mode; + sq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu) - ETH_FCS_LEN; + sq->xsk_pool = xsk_pool; + + sq->stats = sq->xsk_pool ? + &c->priv->channel_stats[c->ix]->xsksq : + is_redirect ? + &c->priv->channel_stats[c->ix]->xdpsq : + &c->priv->channel_stats[c->ix]->rq_xdpsq; + sq->stop_room = param->is_mpw ? mlx5e_stop_room_for_mpwqe(mdev) : + mlx5e_stop_room_for_max_wqe(mdev); + sq->max_sq_mpw_wqebbs = mlx5e_get_max_sq_aligned_wqebbs(mdev); + + param->wq.db_numa_node = cpu_to_node(c->cpu); + err = mlx5_wq_cyc_create(mdev, ¶m->wq, sqc_wq, wq, &sq->wq_ctrl); + if (err) + return err; + wq->db = &wq->db[MLX5_SND_DBR]; + + err = mlx5e_alloc_xdpsq_db(sq, cpu_to_node(c->cpu)); + if (err) + goto err_sq_wq_destroy; + + return 0; + +err_sq_wq_destroy: + mlx5_wq_destroy(&sq->wq_ctrl); + + return err; +} + +static void mlx5e_free_xdpsq(struct mlx5e_xdpsq *sq) +{ + mlx5e_free_xdpsq_db(sq); + mlx5_wq_destroy(&sq->wq_ctrl); +} + +static void mlx5e_free_icosq_db(struct mlx5e_icosq *sq) +{ + kvfree(sq->db.wqe_info); +} + +static int mlx5e_alloc_icosq_db(struct mlx5e_icosq *sq, int numa) +{ + int wq_sz = mlx5_wq_cyc_get_size(&sq->wq); + size_t size; + + size = array_size(wq_sz, sizeof(*sq->db.wqe_info)); + sq->db.wqe_info = kvzalloc_node(size, GFP_KERNEL, numa); + if (!sq->db.wqe_info) + return -ENOMEM; + + return 0; +} + +static void mlx5e_icosq_err_cqe_work(struct work_struct *recover_work) +{ + struct mlx5e_icosq *sq = container_of(recover_work, struct mlx5e_icosq, + recover_work); + + mlx5e_reporter_icosq_cqe_err(sq); +} + +static void mlx5e_async_icosq_err_cqe_work(struct work_struct *recover_work) +{ + struct mlx5e_icosq *sq = container_of(recover_work, struct mlx5e_icosq, + recover_work); + + /* Not implemented yet. */ + + netdev_warn(sq->channel->netdev, "async_icosq recovery is not implemented\n"); +} + +static int mlx5e_alloc_icosq(struct mlx5e_channel *c, + struct mlx5e_sq_param *param, + struct mlx5e_icosq *sq, + work_func_t recover_work_func) +{ + void *sqc_wq = MLX5_ADDR_OF(sqc, param->sqc, wq); + struct mlx5_core_dev *mdev = c->mdev; + struct mlx5_wq_cyc *wq = &sq->wq; + int err; + + sq->channel = c; + sq->uar_map = mdev->mlx5e_res.hw_objs.bfreg.map; + sq->reserved_room = param->stop_room; + + param->wq.db_numa_node = cpu_to_node(c->cpu); + err = mlx5_wq_cyc_create(mdev, ¶m->wq, sqc_wq, wq, &sq->wq_ctrl); + if (err) + return err; + wq->db = &wq->db[MLX5_SND_DBR]; + + err = mlx5e_alloc_icosq_db(sq, cpu_to_node(c->cpu)); + if (err) + goto err_sq_wq_destroy; + + INIT_WORK(&sq->recover_work, recover_work_func); + + return 0; + +err_sq_wq_destroy: + mlx5_wq_destroy(&sq->wq_ctrl); + + return err; +} + +static void mlx5e_free_icosq(struct mlx5e_icosq *sq) +{ + mlx5e_free_icosq_db(sq); + mlx5_wq_destroy(&sq->wq_ctrl); +} + +void mlx5e_free_txqsq_db(struct mlx5e_txqsq *sq) +{ + kvfree(sq->db.wqe_info); + kvfree(sq->db.skb_fifo.fifo); + kvfree(sq->db.dma_fifo); +} + +int mlx5e_alloc_txqsq_db(struct mlx5e_txqsq *sq, int numa) +{ + int wq_sz = mlx5_wq_cyc_get_size(&sq->wq); + int df_sz = wq_sz * MLX5_SEND_WQEBB_NUM_DS; + + sq->db.dma_fifo = kvzalloc_node(array_size(df_sz, + sizeof(*sq->db.dma_fifo)), + GFP_KERNEL, numa); + sq->db.skb_fifo.fifo = kvzalloc_node(array_size(df_sz, + sizeof(*sq->db.skb_fifo.fifo)), + GFP_KERNEL, numa); + sq->db.wqe_info = kvzalloc_node(array_size(wq_sz, + sizeof(*sq->db.wqe_info)), + GFP_KERNEL, numa); + if (!sq->db.dma_fifo || !sq->db.skb_fifo.fifo || !sq->db.wqe_info) { + mlx5e_free_txqsq_db(sq); + return -ENOMEM; + } + + sq->dma_fifo_mask = df_sz - 1; + + sq->db.skb_fifo.pc = &sq->skb_fifo_pc; + sq->db.skb_fifo.cc = &sq->skb_fifo_cc; + sq->db.skb_fifo.mask = df_sz - 1; + + return 0; +} + +static int mlx5e_alloc_txqsq(struct mlx5e_channel *c, + int txq_ix, + struct mlx5e_params *params, + struct mlx5e_sq_param *param, + struct mlx5e_txqsq *sq, + int tc) +{ + void *sqc_wq = MLX5_ADDR_OF(sqc, param->sqc, wq); + struct mlx5_core_dev *mdev = c->mdev; + struct mlx5_wq_cyc *wq = &sq->wq; + int err; + + sq->pdev = c->pdev; + sq->clock = &mdev->clock; + sq->mkey_be = c->mkey_be; + sq->netdev = c->netdev; + sq->mdev = c->mdev; + sq->priv = c->priv; + sq->ch_ix = c->ix; + sq->txq_ix = txq_ix; + sq->uar_map = mdev->mlx5e_res.hw_objs.bfreg.map; + sq->min_inline_mode = params->tx_min_inline_mode; + sq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu); + sq->max_sq_mpw_wqebbs = mlx5e_get_max_sq_aligned_wqebbs(mdev); + INIT_WORK(&sq->recover_work, mlx5e_tx_err_cqe_work); + if (!MLX5_CAP_ETH(mdev, wqe_vlan_insert)) + set_bit(MLX5E_SQ_STATE_VLAN_NEED_L2_INLINE, &sq->state); + if (mlx5_ipsec_device_caps(c->priv->mdev)) + set_bit(MLX5E_SQ_STATE_IPSEC, &sq->state); + if (param->is_mpw) + set_bit(MLX5E_SQ_STATE_MPWQE, &sq->state); + sq->stop_room = param->stop_room; + sq->ptp_cyc2time = mlx5_sq_ts_translator(mdev); + + param->wq.db_numa_node = cpu_to_node(c->cpu); + err = mlx5_wq_cyc_create(mdev, ¶m->wq, sqc_wq, wq, &sq->wq_ctrl); + if (err) + return err; + wq->db = &wq->db[MLX5_SND_DBR]; + + err = mlx5e_alloc_txqsq_db(sq, cpu_to_node(c->cpu)); + if (err) + goto err_sq_wq_destroy; + + INIT_WORK(&sq->dim.work, mlx5e_tx_dim_work); + sq->dim.mode = params->tx_cq_moderation.cq_period_mode; + + return 0; + +err_sq_wq_destroy: + mlx5_wq_destroy(&sq->wq_ctrl); + + return err; +} + +void mlx5e_free_txqsq(struct mlx5e_txqsq *sq) +{ + mlx5e_free_txqsq_db(sq); + mlx5_wq_destroy(&sq->wq_ctrl); +} + +static int mlx5e_create_sq(struct mlx5_core_dev *mdev, + struct mlx5e_sq_param *param, + struct mlx5e_create_sq_param *csp, + u32 *sqn) +{ + u8 ts_format; + void *in; + void *sqc; + void *wq; + int inlen; + int err; + + inlen = MLX5_ST_SZ_BYTES(create_sq_in) + + sizeof(u64) * csp->wq_ctrl->buf.npages; + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + ts_format = mlx5_is_real_time_sq(mdev) ? + MLX5_TIMESTAMP_FORMAT_REAL_TIME : + MLX5_TIMESTAMP_FORMAT_FREE_RUNNING; + sqc = MLX5_ADDR_OF(create_sq_in, in, ctx); + wq = MLX5_ADDR_OF(sqc, sqc, wq); + + memcpy(sqc, param->sqc, sizeof(param->sqc)); + MLX5_SET(sqc, sqc, tis_lst_sz, csp->tis_lst_sz); + MLX5_SET(sqc, sqc, tis_num_0, csp->tisn); + MLX5_SET(sqc, sqc, cqn, csp->cqn); + MLX5_SET(sqc, sqc, ts_cqe_to_dest_cqn, csp->ts_cqe_to_dest_cqn); + MLX5_SET(sqc, sqc, ts_format, ts_format); + + + if (MLX5_CAP_ETH(mdev, wqe_inline_mode) == MLX5_CAP_INLINE_MODE_VPORT_CONTEXT) + MLX5_SET(sqc, sqc, min_wqe_inline_mode, csp->min_inline_mode); + + MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RST); + MLX5_SET(sqc, sqc, flush_in_error_en, 1); + + MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC); + MLX5_SET(wq, wq, uar_page, mdev->mlx5e_res.hw_objs.bfreg.index); + MLX5_SET(wq, wq, log_wq_pg_sz, csp->wq_ctrl->buf.page_shift - + MLX5_ADAPTER_PAGE_SHIFT); + MLX5_SET64(wq, wq, dbr_addr, csp->wq_ctrl->db.dma); + + mlx5_fill_page_frag_array(&csp->wq_ctrl->buf, + (__be64 *)MLX5_ADDR_OF(wq, wq, pas)); + + err = mlx5_core_create_sq(mdev, in, inlen, sqn); + + kvfree(in); + + return err; +} + +int mlx5e_modify_sq(struct mlx5_core_dev *mdev, u32 sqn, + struct mlx5e_modify_sq_param *p) +{ + u64 bitmask = 0; + void *in; + void *sqc; + int inlen; + int err; + + inlen = MLX5_ST_SZ_BYTES(modify_sq_in); + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + sqc = MLX5_ADDR_OF(modify_sq_in, in, ctx); + + MLX5_SET(modify_sq_in, in, sq_state, p->curr_state); + MLX5_SET(sqc, sqc, state, p->next_state); + if (p->rl_update && p->next_state == MLX5_SQC_STATE_RDY) { + bitmask |= 1; + MLX5_SET(sqc, sqc, packet_pacing_rate_limit_index, p->rl_index); + } + if (p->qos_update && p->next_state == MLX5_SQC_STATE_RDY) { + bitmask |= 1 << 2; + MLX5_SET(sqc, sqc, qos_queue_group_id, p->qos_queue_group_id); + } + MLX5_SET64(modify_sq_in, in, modify_bitmask, bitmask); + + err = mlx5_core_modify_sq(mdev, sqn, in); + + kvfree(in); + + return err; +} + +static void mlx5e_destroy_sq(struct mlx5_core_dev *mdev, u32 sqn) +{ + mlx5_core_destroy_sq(mdev, sqn); +} + +int mlx5e_create_sq_rdy(struct mlx5_core_dev *mdev, + struct mlx5e_sq_param *param, + struct mlx5e_create_sq_param *csp, + u16 qos_queue_group_id, + u32 *sqn) +{ + struct mlx5e_modify_sq_param msp = {0}; + int err; + + err = mlx5e_create_sq(mdev, param, csp, sqn); + if (err) + return err; + + msp.curr_state = MLX5_SQC_STATE_RST; + msp.next_state = MLX5_SQC_STATE_RDY; + if (qos_queue_group_id) { + msp.qos_update = true; + msp.qos_queue_group_id = qos_queue_group_id; + } + err = mlx5e_modify_sq(mdev, *sqn, &msp); + if (err) + mlx5e_destroy_sq(mdev, *sqn); + + return err; +} + +static int mlx5e_set_sq_maxrate(struct net_device *dev, + struct mlx5e_txqsq *sq, u32 rate); + +int mlx5e_open_txqsq(struct mlx5e_channel *c, u32 tisn, int txq_ix, + struct mlx5e_params *params, struct mlx5e_sq_param *param, + struct mlx5e_txqsq *sq, int tc, u16 qos_queue_group_id, + struct mlx5e_sq_stats *sq_stats) +{ + struct mlx5e_create_sq_param csp = {}; + u32 tx_rate; + int err; + + err = mlx5e_alloc_txqsq(c, txq_ix, params, param, sq, tc); + if (err) + return err; + + sq->stats = sq_stats; + + csp.tisn = tisn; + csp.tis_lst_sz = 1; + csp.cqn = sq->cq.mcq.cqn; + csp.wq_ctrl = &sq->wq_ctrl; + csp.min_inline_mode = sq->min_inline_mode; + err = mlx5e_create_sq_rdy(c->mdev, param, &csp, qos_queue_group_id, &sq->sqn); + if (err) + goto err_free_txqsq; + + tx_rate = c->priv->tx_rates[sq->txq_ix]; + if (tx_rate) + mlx5e_set_sq_maxrate(c->netdev, sq, tx_rate); + + if (params->tx_dim_enabled) + sq->state |= BIT(MLX5E_SQ_STATE_AM); + + return 0; + +err_free_txqsq: + mlx5e_free_txqsq(sq); + + return err; +} + +void mlx5e_activate_txqsq(struct mlx5e_txqsq *sq) +{ + sq->txq = netdev_get_tx_queue(sq->netdev, sq->txq_ix); + set_bit(MLX5E_SQ_STATE_ENABLED, &sq->state); + netdev_tx_reset_queue(sq->txq); + netif_tx_start_queue(sq->txq); +} + +void mlx5e_tx_disable_queue(struct netdev_queue *txq) +{ + __netif_tx_lock_bh(txq); + netif_tx_stop_queue(txq); + __netif_tx_unlock_bh(txq); +} + +void mlx5e_deactivate_txqsq(struct mlx5e_txqsq *sq) +{ + struct mlx5_wq_cyc *wq = &sq->wq; + + clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state); + synchronize_net(); /* Sync with NAPI to prevent netif_tx_wake_queue. */ + + mlx5e_tx_disable_queue(sq->txq); + + /* last doorbell out, godspeed .. */ + if (mlx5e_wqc_has_room_for(wq, sq->cc, sq->pc, 1)) { + u16 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc); + struct mlx5e_tx_wqe *nop; + + sq->db.wqe_info[pi] = (struct mlx5e_tx_wqe_info) { + .num_wqebbs = 1, + }; + + nop = mlx5e_post_nop(wq, sq->sqn, &sq->pc); + mlx5e_notify_hw(wq, sq->pc, sq->uar_map, &nop->ctrl); + } +} + +void mlx5e_close_txqsq(struct mlx5e_txqsq *sq) +{ + struct mlx5_core_dev *mdev = sq->mdev; + struct mlx5_rate_limit rl = {0}; + + cancel_work_sync(&sq->dim.work); + cancel_work_sync(&sq->recover_work); + mlx5e_destroy_sq(mdev, sq->sqn); + if (sq->rate_limit) { + rl.rate = sq->rate_limit; + mlx5_rl_remove_rate(mdev, &rl); + } + mlx5e_free_txqsq_descs(sq); + mlx5e_free_txqsq(sq); +} + +void mlx5e_tx_err_cqe_work(struct work_struct *recover_work) +{ + struct mlx5e_txqsq *sq = container_of(recover_work, struct mlx5e_txqsq, + recover_work); + + mlx5e_reporter_tx_err_cqe(sq); +} + +static int mlx5e_open_icosq(struct mlx5e_channel *c, struct mlx5e_params *params, + struct mlx5e_sq_param *param, struct mlx5e_icosq *sq, + work_func_t recover_work_func) +{ + struct mlx5e_create_sq_param csp = {}; + int err; + + err = mlx5e_alloc_icosq(c, param, sq, recover_work_func); + if (err) + return err; + + csp.cqn = sq->cq.mcq.cqn; + csp.wq_ctrl = &sq->wq_ctrl; + csp.min_inline_mode = params->tx_min_inline_mode; + err = mlx5e_create_sq_rdy(c->mdev, param, &csp, 0, &sq->sqn); + if (err) + goto err_free_icosq; + + if (param->is_tls) { + sq->ktls_resync = mlx5e_ktls_rx_resync_create_resp_list(); + if (IS_ERR(sq->ktls_resync)) { + err = PTR_ERR(sq->ktls_resync); + goto err_destroy_icosq; + } + } + return 0; + +err_destroy_icosq: + mlx5e_destroy_sq(c->mdev, sq->sqn); +err_free_icosq: + mlx5e_free_icosq(sq); + + return err; +} + +void mlx5e_activate_icosq(struct mlx5e_icosq *icosq) +{ + set_bit(MLX5E_SQ_STATE_ENABLED, &icosq->state); +} + +void mlx5e_deactivate_icosq(struct mlx5e_icosq *icosq) +{ + clear_bit(MLX5E_SQ_STATE_ENABLED, &icosq->state); + synchronize_net(); /* Sync with NAPI. */ +} + +static void mlx5e_close_icosq(struct mlx5e_icosq *sq) +{ + struct mlx5e_channel *c = sq->channel; + + if (sq->ktls_resync) + mlx5e_ktls_rx_resync_destroy_resp_list(sq->ktls_resync); + mlx5e_destroy_sq(c->mdev, sq->sqn); + mlx5e_free_icosq_descs(sq); + mlx5e_free_icosq(sq); +} + +int mlx5e_open_xdpsq(struct mlx5e_channel *c, struct mlx5e_params *params, + struct mlx5e_sq_param *param, struct xsk_buff_pool *xsk_pool, + struct mlx5e_xdpsq *sq, bool is_redirect) +{ + struct mlx5e_create_sq_param csp = {}; + int err; + + err = mlx5e_alloc_xdpsq(c, params, xsk_pool, param, sq, is_redirect); + if (err) + return err; + + csp.tis_lst_sz = 1; + csp.tisn = c->priv->tisn[c->lag_port][0]; /* tc = 0 */ + csp.cqn = sq->cq.mcq.cqn; + csp.wq_ctrl = &sq->wq_ctrl; + csp.min_inline_mode = sq->min_inline_mode; + set_bit(MLX5E_SQ_STATE_ENABLED, &sq->state); + + /* Don't enable multi buffer on XDP_REDIRECT SQ, as it's not yet + * supported by upstream, and there is no defined trigger to allow + * transmitting redirected multi-buffer frames. + */ + if (param->is_xdp_mb && !is_redirect) + set_bit(MLX5E_SQ_STATE_XDP_MULTIBUF, &sq->state); + + err = mlx5e_create_sq_rdy(c->mdev, param, &csp, 0, &sq->sqn); + if (err) + goto err_free_xdpsq; + + mlx5e_set_xmit_fp(sq, param->is_mpw); + + if (!param->is_mpw && !test_bit(MLX5E_SQ_STATE_XDP_MULTIBUF, &sq->state)) { + unsigned int ds_cnt = MLX5E_TX_WQE_EMPTY_DS_COUNT + 1; + unsigned int inline_hdr_sz = 0; + int i; + + if (sq->min_inline_mode != MLX5_INLINE_MODE_NONE) { + inline_hdr_sz = MLX5E_XDP_MIN_INLINE; + ds_cnt++; + } + + /* Pre initialize fixed WQE fields */ + for (i = 0; i < mlx5_wq_cyc_get_size(&sq->wq); i++) { + struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(&sq->wq, i); + struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl; + struct mlx5_wqe_eth_seg *eseg = &wqe->eth; + struct mlx5_wqe_data_seg *dseg; + + sq->db.wqe_info[i] = (struct mlx5e_xdp_wqe_info) { + .num_wqebbs = 1, + .num_pkts = 1, + }; + + cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt); + eseg->inline_hdr.sz = cpu_to_be16(inline_hdr_sz); + + dseg = (struct mlx5_wqe_data_seg *)cseg + (ds_cnt - 1); + dseg->lkey = sq->mkey_be; + } + } + + return 0; + +err_free_xdpsq: + clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state); + mlx5e_free_xdpsq(sq); + + return err; +} + +void mlx5e_close_xdpsq(struct mlx5e_xdpsq *sq) +{ + struct mlx5e_channel *c = sq->channel; + + clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state); + synchronize_net(); /* Sync with NAPI. */ + + mlx5e_destroy_sq(c->mdev, sq->sqn); + mlx5e_free_xdpsq_descs(sq); + mlx5e_free_xdpsq(sq); +} + +static int mlx5e_alloc_cq_common(struct mlx5e_priv *priv, + struct mlx5e_cq_param *param, + struct mlx5e_cq *cq) +{ + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5_core_cq *mcq = &cq->mcq; + int err; + u32 i; + + err = mlx5_cqwq_create(mdev, ¶m->wq, param->cqc, &cq->wq, + &cq->wq_ctrl); + if (err) + return err; + + mcq->cqe_sz = 64; + mcq->set_ci_db = cq->wq_ctrl.db.db; + mcq->arm_db = cq->wq_ctrl.db.db + 1; + *mcq->set_ci_db = 0; + *mcq->arm_db = 0; + mcq->vector = param->eq_ix; + mcq->comp = mlx5e_completion_event; + mcq->event = mlx5e_cq_error_event; + + for (i = 0; i < mlx5_cqwq_get_size(&cq->wq); i++) { + struct mlx5_cqe64 *cqe = mlx5_cqwq_get_wqe(&cq->wq, i); + + cqe->op_own = 0xf1; + } + + cq->mdev = mdev; + cq->netdev = priv->netdev; + cq->priv = priv; + + return 0; +} + +static int mlx5e_alloc_cq(struct mlx5e_priv *priv, + struct mlx5e_cq_param *param, + struct mlx5e_create_cq_param *ccp, + struct mlx5e_cq *cq) +{ + int err; + + param->wq.buf_numa_node = ccp->node; + param->wq.db_numa_node = ccp->node; + param->eq_ix = ccp->ix; + + err = mlx5e_alloc_cq_common(priv, param, cq); + + cq->napi = ccp->napi; + cq->ch_stats = ccp->ch_stats; + + return err; +} + +static void mlx5e_free_cq(struct mlx5e_cq *cq) +{ + mlx5_wq_destroy(&cq->wq_ctrl); +} + +static int mlx5e_create_cq(struct mlx5e_cq *cq, struct mlx5e_cq_param *param) +{ + u32 out[MLX5_ST_SZ_DW(create_cq_out)]; + struct mlx5_core_dev *mdev = cq->mdev; + struct mlx5_core_cq *mcq = &cq->mcq; + + void *in; + void *cqc; + int inlen; + int eqn; + int err; + + err = mlx5_vector2eqn(mdev, param->eq_ix, &eqn); + if (err) + return err; + + inlen = MLX5_ST_SZ_BYTES(create_cq_in) + + sizeof(u64) * cq->wq_ctrl.buf.npages; + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context); + + memcpy(cqc, param->cqc, sizeof(param->cqc)); + + mlx5_fill_page_frag_array(&cq->wq_ctrl.buf, + (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas)); + + MLX5_SET(cqc, cqc, cq_period_mode, param->cq_period_mode); + MLX5_SET(cqc, cqc, c_eqn_or_apu_element, eqn); + MLX5_SET(cqc, cqc, uar_page, mdev->priv.uar->index); + MLX5_SET(cqc, cqc, log_page_size, cq->wq_ctrl.buf.page_shift - + MLX5_ADAPTER_PAGE_SHIFT); + MLX5_SET64(cqc, cqc, dbr_addr, cq->wq_ctrl.db.dma); + + err = mlx5_core_create_cq(mdev, mcq, in, inlen, out, sizeof(out)); + + kvfree(in); + + if (err) + return err; + + mlx5e_cq_arm(cq); + + return 0; +} + +static void mlx5e_destroy_cq(struct mlx5e_cq *cq) +{ + mlx5_core_destroy_cq(cq->mdev, &cq->mcq); +} + +int mlx5e_open_cq(struct mlx5e_priv *priv, struct dim_cq_moder moder, + struct mlx5e_cq_param *param, struct mlx5e_create_cq_param *ccp, + struct mlx5e_cq *cq) +{ + struct mlx5_core_dev *mdev = priv->mdev; + int err; + + err = mlx5e_alloc_cq(priv, param, ccp, cq); + if (err) + return err; + + err = mlx5e_create_cq(cq, param); + if (err) + goto err_free_cq; + + if (MLX5_CAP_GEN(mdev, cq_moderation)) + mlx5_core_modify_cq_moderation(mdev, &cq->mcq, moder.usec, moder.pkts); + return 0; + +err_free_cq: + mlx5e_free_cq(cq); + + return err; +} + +void mlx5e_close_cq(struct mlx5e_cq *cq) +{ + mlx5e_destroy_cq(cq); + mlx5e_free_cq(cq); +} + +static int mlx5e_open_tx_cqs(struct mlx5e_channel *c, + struct mlx5e_params *params, + struct mlx5e_create_cq_param *ccp, + struct mlx5e_channel_param *cparam) +{ + int err; + int tc; + + for (tc = 0; tc < c->num_tc; tc++) { + err = mlx5e_open_cq(c->priv, params->tx_cq_moderation, &cparam->txq_sq.cqp, + ccp, &c->sq[tc].cq); + if (err) + goto err_close_tx_cqs; + } + + return 0; + +err_close_tx_cqs: + for (tc--; tc >= 0; tc--) + mlx5e_close_cq(&c->sq[tc].cq); + + return err; +} + +static void mlx5e_close_tx_cqs(struct mlx5e_channel *c) +{ + int tc; + + for (tc = 0; tc < c->num_tc; tc++) + mlx5e_close_cq(&c->sq[tc].cq); +} + +static int mlx5e_mqprio_txq_to_tc(struct netdev_tc_txq *tc_to_txq, unsigned int txq) +{ + int tc; + + for (tc = 0; tc < TC_MAX_QUEUE; tc++) + if (txq - tc_to_txq[tc].offset < tc_to_txq[tc].count) + return tc; + + WARN(1, "Unexpected TCs configuration. No match found for txq %u", txq); + return -ENOENT; +} + +static int mlx5e_txq_get_qos_node_hw_id(struct mlx5e_params *params, int txq_ix, + u32 *hw_id) +{ + int tc; + + if (params->mqprio.mode != TC_MQPRIO_MODE_CHANNEL) { + *hw_id = 0; + return 0; + } + + tc = mlx5e_mqprio_txq_to_tc(params->mqprio.tc_to_txq, txq_ix); + if (tc < 0) + return tc; + + if (tc >= params->mqprio.num_tc) { + WARN(1, "Unexpected TCs configuration. tc %d is out of range of %u", + tc, params->mqprio.num_tc); + return -EINVAL; + } + + *hw_id = params->mqprio.channel.hw_id[tc]; + return 0; +} + +static int mlx5e_open_sqs(struct mlx5e_channel *c, + struct mlx5e_params *params, + struct mlx5e_channel_param *cparam) +{ + int err, tc; + + for (tc = 0; tc < mlx5e_get_dcb_num_tc(params); tc++) { + int txq_ix = c->ix + tc * params->num_channels; + u32 qos_queue_group_id; + + err = mlx5e_txq_get_qos_node_hw_id(params, txq_ix, &qos_queue_group_id); + if (err) + goto err_close_sqs; + + err = mlx5e_open_txqsq(c, c->priv->tisn[c->lag_port][tc], txq_ix, + params, &cparam->txq_sq, &c->sq[tc], tc, + qos_queue_group_id, + &c->priv->channel_stats[c->ix]->sq[tc]); + if (err) + goto err_close_sqs; + } + + return 0; + +err_close_sqs: + for (tc--; tc >= 0; tc--) + mlx5e_close_txqsq(&c->sq[tc]); + + return err; +} + +static void mlx5e_close_sqs(struct mlx5e_channel *c) +{ + int tc; + + for (tc = 0; tc < c->num_tc; tc++) + mlx5e_close_txqsq(&c->sq[tc]); +} + +static int mlx5e_set_sq_maxrate(struct net_device *dev, + struct mlx5e_txqsq *sq, u32 rate) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5e_modify_sq_param msp = {0}; + struct mlx5_rate_limit rl = {0}; + u16 rl_index = 0; + int err; + + if (rate == sq->rate_limit) + /* nothing to do */ + return 0; + + if (sq->rate_limit) { + rl.rate = sq->rate_limit; + /* remove current rl index to free space to next ones */ + mlx5_rl_remove_rate(mdev, &rl); + } + + sq->rate_limit = 0; + + if (rate) { + rl.rate = rate; + err = mlx5_rl_add_rate(mdev, &rl_index, &rl); + if (err) { + netdev_err(dev, "Failed configuring rate %u: %d\n", + rate, err); + return err; + } + } + + msp.curr_state = MLX5_SQC_STATE_RDY; + msp.next_state = MLX5_SQC_STATE_RDY; + msp.rl_index = rl_index; + msp.rl_update = true; + err = mlx5e_modify_sq(mdev, sq->sqn, &msp); + if (err) { + netdev_err(dev, "Failed configuring rate %u: %d\n", + rate, err); + /* remove the rate from the table */ + if (rate) + mlx5_rl_remove_rate(mdev, &rl); + return err; + } + + sq->rate_limit = rate; + return 0; +} + +static int mlx5e_set_tx_maxrate(struct net_device *dev, int index, u32 rate) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5e_txqsq *sq = priv->txq2sq[index]; + int err = 0; + + if (!mlx5_rl_is_supported(mdev)) { + netdev_err(dev, "Rate limiting is not supported on this device\n"); + return -EINVAL; + } + + /* rate is given in Mb/sec, HW config is in Kb/sec */ + rate = rate << 10; + + /* Check whether rate in valid range, 0 is always valid */ + if (rate && !mlx5_rl_is_in_range(mdev, rate)) { + netdev_err(dev, "TX rate %u, is not in range\n", rate); + return -ERANGE; + } + + mutex_lock(&priv->state_lock); + if (test_bit(MLX5E_STATE_OPENED, &priv->state)) + err = mlx5e_set_sq_maxrate(dev, sq, rate); + if (!err) + priv->tx_rates[index] = rate; + mutex_unlock(&priv->state_lock); + + return err; +} + +static int mlx5e_open_rxq_rq(struct mlx5e_channel *c, struct mlx5e_params *params, + struct mlx5e_rq_param *rq_params) +{ + int err; + + err = mlx5e_init_rxq_rq(c, params, &c->rq); + if (err) + return err; + + return mlx5e_open_rq(params, rq_params, NULL, cpu_to_node(c->cpu), &c->rq); +} + +static int mlx5e_open_queues(struct mlx5e_channel *c, + struct mlx5e_params *params, + struct mlx5e_channel_param *cparam) +{ + struct dim_cq_moder icocq_moder = {0, 0}; + struct mlx5e_create_cq_param ccp; + int err; + + mlx5e_build_create_cq_param(&ccp, c); + + err = mlx5e_open_cq(c->priv, icocq_moder, &cparam->async_icosq.cqp, &ccp, + &c->async_icosq.cq); + if (err) + return err; + + err = mlx5e_open_cq(c->priv, icocq_moder, &cparam->icosq.cqp, &ccp, + &c->icosq.cq); + if (err) + goto err_close_async_icosq_cq; + + err = mlx5e_open_tx_cqs(c, params, &ccp, cparam); + if (err) + goto err_close_icosq_cq; + + err = mlx5e_open_cq(c->priv, params->tx_cq_moderation, &cparam->xdp_sq.cqp, &ccp, + &c->xdpsq.cq); + if (err) + goto err_close_tx_cqs; + + err = mlx5e_open_cq(c->priv, params->rx_cq_moderation, &cparam->rq.cqp, &ccp, + &c->rq.cq); + if (err) + goto err_close_xdp_tx_cqs; + + err = c->xdp ? mlx5e_open_cq(c->priv, params->tx_cq_moderation, &cparam->xdp_sq.cqp, + &ccp, &c->rq_xdpsq.cq) : 0; + if (err) + goto err_close_rx_cq; + + spin_lock_init(&c->async_icosq_lock); + + err = mlx5e_open_icosq(c, params, &cparam->async_icosq, &c->async_icosq, + mlx5e_async_icosq_err_cqe_work); + if (err) + goto err_close_xdpsq_cq; + + mutex_init(&c->icosq_recovery_lock); + + err = mlx5e_open_icosq(c, params, &cparam->icosq, &c->icosq, + mlx5e_icosq_err_cqe_work); + if (err) + goto err_close_async_icosq; + + err = mlx5e_open_sqs(c, params, cparam); + if (err) + goto err_close_icosq; + + err = mlx5e_open_rxq_rq(c, params, &cparam->rq); + if (err) + goto err_close_sqs; + + if (c->xdp) { + err = mlx5e_open_xdpsq(c, params, &cparam->xdp_sq, NULL, + &c->rq_xdpsq, false); + if (err) + goto err_close_rq; + } + + err = mlx5e_open_xdpsq(c, params, &cparam->xdp_sq, NULL, &c->xdpsq, true); + if (err) + goto err_close_xdp_sq; + + return 0; + +err_close_xdp_sq: + if (c->xdp) + mlx5e_close_xdpsq(&c->rq_xdpsq); + +err_close_rq: + mlx5e_close_rq(&c->rq); + +err_close_sqs: + mlx5e_close_sqs(c); + +err_close_icosq: + mlx5e_close_icosq(&c->icosq); + +err_close_async_icosq: + mlx5e_close_icosq(&c->async_icosq); + +err_close_xdpsq_cq: + if (c->xdp) + mlx5e_close_cq(&c->rq_xdpsq.cq); + +err_close_rx_cq: + mlx5e_close_cq(&c->rq.cq); + +err_close_xdp_tx_cqs: + mlx5e_close_cq(&c->xdpsq.cq); + +err_close_tx_cqs: + mlx5e_close_tx_cqs(c); + +err_close_icosq_cq: + mlx5e_close_cq(&c->icosq.cq); + +err_close_async_icosq_cq: + mlx5e_close_cq(&c->async_icosq.cq); + + return err; +} + +static void mlx5e_close_queues(struct mlx5e_channel *c) +{ + mlx5e_close_xdpsq(&c->xdpsq); + if (c->xdp) + mlx5e_close_xdpsq(&c->rq_xdpsq); + /* The same ICOSQ is used for UMRs for both RQ and XSKRQ. */ + cancel_work_sync(&c->icosq.recover_work); + mlx5e_close_rq(&c->rq); + mlx5e_close_sqs(c); + mlx5e_close_icosq(&c->icosq); + mutex_destroy(&c->icosq_recovery_lock); + mlx5e_close_icosq(&c->async_icosq); + if (c->xdp) + mlx5e_close_cq(&c->rq_xdpsq.cq); + mlx5e_close_cq(&c->rq.cq); + mlx5e_close_cq(&c->xdpsq.cq); + mlx5e_close_tx_cqs(c); + mlx5e_close_cq(&c->icosq.cq); + mlx5e_close_cq(&c->async_icosq.cq); +} + +static u8 mlx5e_enumerate_lag_port(struct mlx5_core_dev *mdev, int ix) +{ + u16 port_aff_bias = mlx5_core_is_pf(mdev) ? 0 : MLX5_CAP_GEN(mdev, vhca_id); + + return (ix + port_aff_bias) % mlx5e_get_num_lag_ports(mdev); +} + +static int mlx5e_channel_stats_alloc(struct mlx5e_priv *priv, int ix, int cpu) +{ + if (ix > priv->stats_nch) { + netdev_warn(priv->netdev, "Unexpected channel stats index %d > %d\n", ix, + priv->stats_nch); + return -EINVAL; + } + + if (priv->channel_stats[ix]) + return 0; + + /* Asymmetric dynamic memory allocation. + * Freed in mlx5e_priv_arrays_free, not on channel closure. + */ + mlx5e_dbg(DRV, priv, "Creating channel stats %d\n", ix); + priv->channel_stats[ix] = kvzalloc_node(sizeof(**priv->channel_stats), + GFP_KERNEL, cpu_to_node(cpu)); + if (!priv->channel_stats[ix]) + return -ENOMEM; + priv->stats_nch++; + + return 0; +} + +void mlx5e_trigger_napi_icosq(struct mlx5e_channel *c) +{ + spin_lock_bh(&c->async_icosq_lock); + mlx5e_trigger_irq(&c->async_icosq); + spin_unlock_bh(&c->async_icosq_lock); +} + +void mlx5e_trigger_napi_sched(struct napi_struct *napi) +{ + local_bh_disable(); + napi_schedule(napi); + local_bh_enable(); +} + +static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, + struct mlx5e_params *params, + struct mlx5e_channel_param *cparam, + struct xsk_buff_pool *xsk_pool, + struct mlx5e_channel **cp) +{ + int cpu = cpumask_first(mlx5_comp_irq_get_affinity_mask(priv->mdev, ix)); + struct net_device *netdev = priv->netdev; + struct mlx5e_xsk_param xsk; + struct mlx5e_channel *c; + unsigned int irq; + int err; + + err = mlx5_vector2irqn(priv->mdev, ix, &irq); + if (err) + return err; + + err = mlx5e_channel_stats_alloc(priv, ix, cpu); + if (err) + return err; + + c = kvzalloc_node(sizeof(*c), GFP_KERNEL, cpu_to_node(cpu)); + if (!c) + return -ENOMEM; + + c->priv = priv; + c->mdev = priv->mdev; + c->tstamp = &priv->tstamp; + c->ix = ix; + c->cpu = cpu; + c->pdev = mlx5_core_dma_dev(priv->mdev); + c->netdev = priv->netdev; + c->mkey_be = cpu_to_be32(priv->mdev->mlx5e_res.hw_objs.mkey); + c->num_tc = mlx5e_get_dcb_num_tc(params); + c->xdp = !!params->xdp_prog; + c->stats = &priv->channel_stats[ix]->ch; + c->aff_mask = irq_get_effective_affinity_mask(irq); + c->lag_port = mlx5e_enumerate_lag_port(priv->mdev, ix); + + netif_napi_add(netdev, &c->napi, mlx5e_napi_poll); + + err = mlx5e_open_queues(c, params, cparam); + if (unlikely(err)) + goto err_napi_del; + + if (xsk_pool) { + mlx5e_build_xsk_param(xsk_pool, &xsk); + err = mlx5e_open_xsk(priv, params, &xsk, xsk_pool, c); + if (unlikely(err)) + goto err_close_queues; + } + + *cp = c; + + return 0; + +err_close_queues: + mlx5e_close_queues(c); + +err_napi_del: + netif_napi_del(&c->napi); + + kvfree(c); + + return err; +} + +static void mlx5e_activate_channel(struct mlx5e_channel *c) +{ + int tc; + + napi_enable(&c->napi); + + for (tc = 0; tc < c->num_tc; tc++) + mlx5e_activate_txqsq(&c->sq[tc]); + mlx5e_activate_icosq(&c->icosq); + mlx5e_activate_icosq(&c->async_icosq); + + if (test_bit(MLX5E_CHANNEL_STATE_XSK, c->state)) + mlx5e_activate_xsk(c); + else + mlx5e_activate_rq(&c->rq); + + mlx5e_trigger_napi_icosq(c); +} + +static void mlx5e_deactivate_channel(struct mlx5e_channel *c) +{ + int tc; + + if (test_bit(MLX5E_CHANNEL_STATE_XSK, c->state)) + mlx5e_deactivate_xsk(c); + else + mlx5e_deactivate_rq(&c->rq); + + mlx5e_deactivate_icosq(&c->async_icosq); + mlx5e_deactivate_icosq(&c->icosq); + for (tc = 0; tc < c->num_tc; tc++) + mlx5e_deactivate_txqsq(&c->sq[tc]); + mlx5e_qos_deactivate_queues(c); + + napi_disable(&c->napi); +} + +static void mlx5e_close_channel(struct mlx5e_channel *c) +{ + if (test_bit(MLX5E_CHANNEL_STATE_XSK, c->state)) + mlx5e_close_xsk(c); + mlx5e_close_queues(c); + mlx5e_qos_close_queues(c); + netif_napi_del(&c->napi); + + kvfree(c); +} + +int mlx5e_open_channels(struct mlx5e_priv *priv, + struct mlx5e_channels *chs) +{ + struct mlx5e_channel_param *cparam; + int err = -ENOMEM; + int i; + + chs->num = chs->params.num_channels; + + chs->c = kcalloc(chs->num, sizeof(struct mlx5e_channel *), GFP_KERNEL); + cparam = kvzalloc(sizeof(struct mlx5e_channel_param), GFP_KERNEL); + if (!chs->c || !cparam) + goto err_free; + + err = mlx5e_build_channel_param(priv->mdev, &chs->params, priv->q_counter, cparam); + if (err) + goto err_free; + + for (i = 0; i < chs->num; i++) { + struct xsk_buff_pool *xsk_pool = NULL; + + if (chs->params.xdp_prog) + xsk_pool = mlx5e_xsk_get_pool(&chs->params, chs->params.xsk, i); + + err = mlx5e_open_channel(priv, i, &chs->params, cparam, xsk_pool, &chs->c[i]); + if (err) + goto err_close_channels; + } + + if (MLX5E_GET_PFLAG(&chs->params, MLX5E_PFLAG_TX_PORT_TS) || chs->params.ptp_rx) { + err = mlx5e_ptp_open(priv, &chs->params, chs->c[0]->lag_port, &chs->ptp); + if (err) + goto err_close_channels; + } + + if (priv->htb) { + err = mlx5e_qos_open_queues(priv, chs); + if (err) + goto err_close_ptp; + } + + mlx5e_health_channels_update(priv); + kvfree(cparam); + return 0; + +err_close_ptp: + if (chs->ptp) + mlx5e_ptp_close(chs->ptp); + +err_close_channels: + for (i--; i >= 0; i--) + mlx5e_close_channel(chs->c[i]); + +err_free: + kfree(chs->c); + kvfree(cparam); + chs->num = 0; + return err; +} + +static void mlx5e_activate_channels(struct mlx5e_channels *chs) +{ + int i; + + for (i = 0; i < chs->num; i++) + mlx5e_activate_channel(chs->c[i]); + + if (chs->ptp) + mlx5e_ptp_activate_channel(chs->ptp); +} + +static int mlx5e_wait_channels_min_rx_wqes(struct mlx5e_channels *chs) +{ + int err = 0; + int i; + + for (i = 0; i < chs->num; i++) { + int timeout = err ? 0 : MLX5E_RQ_WQES_TIMEOUT; + struct mlx5e_channel *c = chs->c[i]; + + if (test_bit(MLX5E_CHANNEL_STATE_XSK, c->state)) + continue; + + err |= mlx5e_wait_for_min_rx_wqes(&c->rq, timeout); + + /* Don't wait on the XSK RQ, because the newer xdpsock sample + * doesn't provide any Fill Ring entries at the setup stage. + */ + } + + return err ? -ETIMEDOUT : 0; +} + +static void mlx5e_deactivate_channels(struct mlx5e_channels *chs) +{ + int i; + + if (chs->ptp) + mlx5e_ptp_deactivate_channel(chs->ptp); + + for (i = 0; i < chs->num; i++) + mlx5e_deactivate_channel(chs->c[i]); +} + +void mlx5e_close_channels(struct mlx5e_channels *chs) +{ + int i; + + ASSERT_RTNL(); + if (chs->ptp) { + mlx5e_ptp_close(chs->ptp); + chs->ptp = NULL; + } + for (i = 0; i < chs->num; i++) + mlx5e_close_channel(chs->c[i]); + + kfree(chs->c); + chs->num = 0; +} + +static int mlx5e_modify_tirs_packet_merge(struct mlx5e_priv *priv) +{ + struct mlx5e_rx_res *res = priv->rx_res; + + return mlx5e_rx_res_packet_merge_set_param(res, &priv->channels.params.packet_merge); +} + +static MLX5E_DEFINE_PREACTIVATE_WRAPPER_CTX(mlx5e_modify_tirs_packet_merge); + +static int mlx5e_set_mtu(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, u16 mtu) +{ + u16 hw_mtu = MLX5E_SW2HW_MTU(params, mtu); + int err; + + err = mlx5_set_port_mtu(mdev, hw_mtu, 1); + if (err) + return err; + + /* Update vport context MTU */ + mlx5_modify_nic_vport_mtu(mdev, hw_mtu); + return 0; +} + +static void mlx5e_query_mtu(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, u16 *mtu) +{ + u16 hw_mtu = 0; + int err; + + err = mlx5_query_nic_vport_mtu(mdev, &hw_mtu); + if (err || !hw_mtu) /* fallback to port oper mtu */ + mlx5_query_port_oper_mtu(mdev, &hw_mtu, 1); + + *mtu = MLX5E_HW2SW_MTU(params, hw_mtu); +} + +int mlx5e_set_dev_port_mtu(struct mlx5e_priv *priv) +{ + struct mlx5e_params *params = &priv->channels.params; + struct net_device *netdev = priv->netdev; + struct mlx5_core_dev *mdev = priv->mdev; + u16 mtu; + int err; + + err = mlx5e_set_mtu(mdev, params, params->sw_mtu); + if (err) + return err; + + mlx5e_query_mtu(mdev, params, &mtu); + if (mtu != params->sw_mtu) + netdev_warn(netdev, "%s: VPort MTU %d is different than netdev mtu %d\n", + __func__, mtu, params->sw_mtu); + + params->sw_mtu = mtu; + return 0; +} + +MLX5E_DEFINE_PREACTIVATE_WRAPPER_CTX(mlx5e_set_dev_port_mtu); + +void mlx5e_set_netdev_mtu_boundaries(struct mlx5e_priv *priv) +{ + struct mlx5e_params *params = &priv->channels.params; + struct net_device *netdev = priv->netdev; + struct mlx5_core_dev *mdev = priv->mdev; + u16 max_mtu; + + /* MTU range: 68 - hw-specific max */ + netdev->min_mtu = ETH_MIN_MTU; + + mlx5_query_port_max_mtu(mdev, &max_mtu, 1); + netdev->max_mtu = min_t(unsigned int, MLX5E_HW2SW_MTU(params, max_mtu), + ETH_MAX_MTU); +} + +static int mlx5e_netdev_set_tcs(struct net_device *netdev, u16 nch, u8 ntc, + struct netdev_tc_txq *tc_to_txq) +{ + int tc, err; + + netdev_reset_tc(netdev); + + if (ntc == 1) + return 0; + + err = netdev_set_num_tc(netdev, ntc); + if (err) { + netdev_WARN(netdev, "netdev_set_num_tc failed (%d), ntc = %d\n", err, ntc); + return err; + } + + for (tc = 0; tc < ntc; tc++) { + u16 count, offset; + + count = tc_to_txq[tc].count; + offset = tc_to_txq[tc].offset; + netdev_set_tc_queue(netdev, tc, count, offset); + } + + return 0; +} + +int mlx5e_update_tx_netdev_queues(struct mlx5e_priv *priv) +{ + int nch, ntc, num_txqs, err; + int qos_queues = 0; + + if (priv->htb) + qos_queues = mlx5e_htb_cur_leaf_nodes(priv->htb); + + nch = priv->channels.params.num_channels; + ntc = mlx5e_get_dcb_num_tc(&priv->channels.params); + num_txqs = nch * ntc + qos_queues; + if (MLX5E_GET_PFLAG(&priv->channels.params, MLX5E_PFLAG_TX_PORT_TS)) + num_txqs += ntc; + + mlx5e_dbg(DRV, priv, "Setting num_txqs %d\n", num_txqs); + err = netif_set_real_num_tx_queues(priv->netdev, num_txqs); + if (err) + netdev_warn(priv->netdev, "netif_set_real_num_tx_queues failed, %d\n", err); + + return err; +} + +static int mlx5e_update_netdev_queues(struct mlx5e_priv *priv) +{ + struct netdev_tc_txq old_tc_to_txq[TC_MAX_QUEUE], *tc_to_txq; + struct net_device *netdev = priv->netdev; + int old_num_txqs, old_ntc; + int nch, ntc; + int err; + int i; + + old_num_txqs = netdev->real_num_tx_queues; + old_ntc = netdev->num_tc ? : 1; + for (i = 0; i < ARRAY_SIZE(old_tc_to_txq); i++) + old_tc_to_txq[i] = netdev->tc_to_txq[i]; + + nch = priv->channels.params.num_channels; + ntc = priv->channels.params.mqprio.num_tc; + tc_to_txq = priv->channels.params.mqprio.tc_to_txq; + + err = mlx5e_netdev_set_tcs(netdev, nch, ntc, tc_to_txq); + if (err) + goto err_out; + err = mlx5e_update_tx_netdev_queues(priv); + if (err) + goto err_tcs; + err = netif_set_real_num_rx_queues(netdev, nch); + if (err) { + netdev_warn(netdev, "netif_set_real_num_rx_queues failed, %d\n", err); + goto err_txqs; + } + + return 0; + +err_txqs: + /* netif_set_real_num_rx_queues could fail only when nch increased. Only + * one of nch and ntc is changed in this function. That means, the call + * to netif_set_real_num_tx_queues below should not fail, because it + * decreases the number of TX queues. + */ + WARN_ON_ONCE(netif_set_real_num_tx_queues(netdev, old_num_txqs)); + +err_tcs: + WARN_ON_ONCE(mlx5e_netdev_set_tcs(netdev, old_num_txqs / old_ntc, old_ntc, + old_tc_to_txq)); +err_out: + return err; +} + +static MLX5E_DEFINE_PREACTIVATE_WRAPPER_CTX(mlx5e_update_netdev_queues); + +static void mlx5e_set_default_xps_cpumasks(struct mlx5e_priv *priv, + struct mlx5e_params *params) +{ + struct mlx5_core_dev *mdev = priv->mdev; + int num_comp_vectors, ix, irq; + + num_comp_vectors = mlx5_comp_vectors_count(mdev); + + for (ix = 0; ix < params->num_channels; ix++) { + cpumask_clear(priv->scratchpad.cpumask); + + for (irq = ix; irq < num_comp_vectors; irq += params->num_channels) { + int cpu = cpumask_first(mlx5_comp_irq_get_affinity_mask(mdev, irq)); + + cpumask_set_cpu(cpu, priv->scratchpad.cpumask); + } + + netif_set_xps_queue(priv->netdev, priv->scratchpad.cpumask, ix); + } +} + +static int mlx5e_num_channels_changed(struct mlx5e_priv *priv) +{ + u16 count = priv->channels.params.num_channels; + int err; + + err = mlx5e_update_netdev_queues(priv); + if (err) + return err; + + mlx5e_set_default_xps_cpumasks(priv, &priv->channels.params); + + /* This function may be called on attach, before priv->rx_res is created. */ + if (!netif_is_rxfh_configured(priv->netdev) && priv->rx_res) + mlx5e_rx_res_rss_set_indir_uniform(priv->rx_res, count); + + return 0; +} + +MLX5E_DEFINE_PREACTIVATE_WRAPPER_CTX(mlx5e_num_channels_changed); + +static void mlx5e_build_txq_maps(struct mlx5e_priv *priv) +{ + int i, ch, tc, num_tc; + + ch = priv->channels.num; + num_tc = mlx5e_get_dcb_num_tc(&priv->channels.params); + + for (i = 0; i < ch; i++) { + for (tc = 0; tc < num_tc; tc++) { + struct mlx5e_channel *c = priv->channels.c[i]; + struct mlx5e_txqsq *sq = &c->sq[tc]; + + priv->txq2sq[sq->txq_ix] = sq; + } + } + + if (!priv->channels.ptp) + goto out; + + if (!test_bit(MLX5E_PTP_STATE_TX, priv->channels.ptp->state)) + goto out; + + for (tc = 0; tc < num_tc; tc++) { + struct mlx5e_ptp *c = priv->channels.ptp; + struct mlx5e_txqsq *sq = &c->ptpsq[tc].txqsq; + + priv->txq2sq[sq->txq_ix] = sq; + } + +out: + /* Make the change to txq2sq visible before the queue is started. + * As mlx5e_xmit runs under a spinlock, there is an implicit ACQUIRE, + * which pairs with this barrier. + */ + smp_wmb(); +} + +void mlx5e_activate_priv_channels(struct mlx5e_priv *priv) +{ + mlx5e_build_txq_maps(priv); + mlx5e_activate_channels(&priv->channels); + if (priv->htb) + mlx5e_qos_activate_queues(priv); + mlx5e_xdp_tx_enable(priv); + + /* dev_watchdog() wants all TX queues to be started when the carrier is + * OK, including the ones in range real_num_tx_queues..num_tx_queues-1. + * Make it happy to avoid TX timeout false alarms. + */ + netif_tx_start_all_queues(priv->netdev); + + if (mlx5e_is_vport_rep(priv)) + mlx5e_rep_activate_channels(priv); + + set_bit(MLX5E_STATE_CHANNELS_ACTIVE, &priv->state); + + mlx5e_wait_channels_min_rx_wqes(&priv->channels); + + if (priv->rx_res) + mlx5e_rx_res_channels_activate(priv->rx_res, &priv->channels); +} + +static void mlx5e_cancel_tx_timeout_work(struct mlx5e_priv *priv) +{ + WARN_ON_ONCE(test_bit(MLX5E_STATE_CHANNELS_ACTIVE, &priv->state)); + if (current_work() != &priv->tx_timeout_work) + cancel_work_sync(&priv->tx_timeout_work); +} + +void mlx5e_deactivate_priv_channels(struct mlx5e_priv *priv) +{ + if (priv->rx_res) + mlx5e_rx_res_channels_deactivate(priv->rx_res); + + clear_bit(MLX5E_STATE_CHANNELS_ACTIVE, &priv->state); + mlx5e_cancel_tx_timeout_work(priv); + + if (mlx5e_is_vport_rep(priv)) + mlx5e_rep_deactivate_channels(priv); + + /* The results of ndo_select_queue are unreliable, while netdev config + * is being changed (real_num_tx_queues, num_tc). Stop all queues to + * prevent ndo_start_xmit from being called, so that it can assume that + * the selected queue is always valid. + */ + netif_tx_disable(priv->netdev); + + mlx5e_xdp_tx_disable(priv); + mlx5e_deactivate_channels(&priv->channels); +} + +static int mlx5e_switch_priv_params(struct mlx5e_priv *priv, + struct mlx5e_params *new_params, + mlx5e_fp_preactivate preactivate, + void *context) +{ + struct mlx5e_params old_params; + + old_params = priv->channels.params; + priv->channels.params = *new_params; + + if (preactivate) { + int err; + + err = preactivate(priv, context); + if (err) { + priv->channels.params = old_params; + return err; + } + } + + return 0; +} + +static int mlx5e_switch_priv_channels(struct mlx5e_priv *priv, + struct mlx5e_channels *new_chs, + mlx5e_fp_preactivate preactivate, + void *context) +{ + struct net_device *netdev = priv->netdev; + struct mlx5e_channels old_chs; + int carrier_ok; + int err = 0; + + carrier_ok = netif_carrier_ok(netdev); + netif_carrier_off(netdev); + + mlx5e_deactivate_priv_channels(priv); + + old_chs = priv->channels; + priv->channels = *new_chs; + + /* New channels are ready to roll, call the preactivate hook if needed + * to modify HW settings or update kernel parameters. + */ + if (preactivate) { + err = preactivate(priv, context); + if (err) { + priv->channels = old_chs; + goto out; + } + } + + mlx5e_close_channels(&old_chs); + priv->profile->update_rx(priv); + + mlx5e_selq_apply(&priv->selq); +out: + mlx5e_activate_priv_channels(priv); + + /* return carrier back if needed */ + if (carrier_ok) + netif_carrier_on(netdev); + + return err; +} + +int mlx5e_safe_switch_params(struct mlx5e_priv *priv, + struct mlx5e_params *params, + mlx5e_fp_preactivate preactivate, + void *context, bool reset) +{ + struct mlx5e_channels new_chs = {}; + int err; + + reset &= test_bit(MLX5E_STATE_OPENED, &priv->state); + if (!reset) + return mlx5e_switch_priv_params(priv, params, preactivate, context); + + new_chs.params = *params; + + mlx5e_selq_prepare_params(&priv->selq, &new_chs.params); + + err = mlx5e_open_channels(priv, &new_chs); + if (err) + goto err_cancel_selq; + + err = mlx5e_switch_priv_channels(priv, &new_chs, preactivate, context); + if (err) + goto err_close; + + return 0; + +err_close: + mlx5e_close_channels(&new_chs); + +err_cancel_selq: + mlx5e_selq_cancel(&priv->selq); + return err; +} + +int mlx5e_safe_reopen_channels(struct mlx5e_priv *priv) +{ + return mlx5e_safe_switch_params(priv, &priv->channels.params, NULL, NULL, true); +} + +void mlx5e_timestamp_init(struct mlx5e_priv *priv) +{ + priv->tstamp.tx_type = HWTSTAMP_TX_OFF; + priv->tstamp.rx_filter = HWTSTAMP_FILTER_NONE; +} + +static void mlx5e_modify_admin_state(struct mlx5_core_dev *mdev, + enum mlx5_port_status state) +{ + struct mlx5_eswitch *esw = mdev->priv.eswitch; + int vport_admin_state; + + mlx5_set_port_admin_status(mdev, state); + + if (mlx5_eswitch_mode(mdev) == MLX5_ESWITCH_OFFLOADS || + !MLX5_CAP_GEN(mdev, uplink_follow)) + return; + + if (state == MLX5_PORT_UP) + vport_admin_state = MLX5_VPORT_ADMIN_STATE_AUTO; + else + vport_admin_state = MLX5_VPORT_ADMIN_STATE_DOWN; + + mlx5_eswitch_set_vport_state(esw, MLX5_VPORT_UPLINK, vport_admin_state); +} + +int mlx5e_open_locked(struct net_device *netdev) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + int err; + + mlx5e_selq_prepare_params(&priv->selq, &priv->channels.params); + + set_bit(MLX5E_STATE_OPENED, &priv->state); + + err = mlx5e_open_channels(priv, &priv->channels); + if (err) + goto err_clear_state_opened_flag; + + priv->profile->update_rx(priv); + mlx5e_selq_apply(&priv->selq); + mlx5e_activate_priv_channels(priv); + mlx5e_apply_traps(priv, true); + if (priv->profile->update_carrier) + priv->profile->update_carrier(priv); + + mlx5e_queue_update_stats(priv); + return 0; + +err_clear_state_opened_flag: + clear_bit(MLX5E_STATE_OPENED, &priv->state); + mlx5e_selq_cancel(&priv->selq); + return err; +} + +int mlx5e_open(struct net_device *netdev) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + int err; + + mutex_lock(&priv->state_lock); + err = mlx5e_open_locked(netdev); + if (!err) + mlx5e_modify_admin_state(priv->mdev, MLX5_PORT_UP); + mutex_unlock(&priv->state_lock); + + return err; +} + +int mlx5e_close_locked(struct net_device *netdev) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + + /* May already be CLOSED in case a previous configuration operation + * (e.g RX/TX queue size change) that involves close&open failed. + */ + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) + return 0; + + mlx5e_apply_traps(priv, false); + clear_bit(MLX5E_STATE_OPENED, &priv->state); + + netif_carrier_off(priv->netdev); + mlx5e_deactivate_priv_channels(priv); + mlx5e_close_channels(&priv->channels); + + return 0; +} + +int mlx5e_close(struct net_device *netdev) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + int err; + + if (!netif_device_present(netdev)) + return -ENODEV; + + mutex_lock(&priv->state_lock); + mlx5e_modify_admin_state(priv->mdev, MLX5_PORT_DOWN); + err = mlx5e_close_locked(netdev); + mutex_unlock(&priv->state_lock); + + return err; +} + +static void mlx5e_free_drop_rq(struct mlx5e_rq *rq) +{ + mlx5_wq_destroy(&rq->wq_ctrl); +} + +static int mlx5e_alloc_drop_rq(struct mlx5_core_dev *mdev, + struct mlx5e_rq *rq, + struct mlx5e_rq_param *param) +{ + void *rqc = param->rqc; + void *rqc_wq = MLX5_ADDR_OF(rqc, rqc, wq); + int err; + + param->wq.db_numa_node = param->wq.buf_numa_node; + + err = mlx5_wq_cyc_create(mdev, ¶m->wq, rqc_wq, &rq->wqe.wq, + &rq->wq_ctrl); + if (err) + return err; + + /* Mark as unused given "Drop-RQ" packets never reach XDP */ + xdp_rxq_info_unused(&rq->xdp_rxq); + + rq->mdev = mdev; + + return 0; +} + +static int mlx5e_alloc_drop_cq(struct mlx5e_priv *priv, + struct mlx5e_cq *cq, + struct mlx5e_cq_param *param) +{ + struct mlx5_core_dev *mdev = priv->mdev; + + param->wq.buf_numa_node = dev_to_node(mlx5_core_dma_dev(mdev)); + param->wq.db_numa_node = dev_to_node(mlx5_core_dma_dev(mdev)); + + return mlx5e_alloc_cq_common(priv, param, cq); +} + +int mlx5e_open_drop_rq(struct mlx5e_priv *priv, + struct mlx5e_rq *drop_rq) +{ + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5e_cq_param cq_param = {}; + struct mlx5e_rq_param rq_param = {}; + struct mlx5e_cq *cq = &drop_rq->cq; + int err; + + mlx5e_build_drop_rq_param(mdev, priv->drop_rq_q_counter, &rq_param); + + err = mlx5e_alloc_drop_cq(priv, cq, &cq_param); + if (err) + return err; + + err = mlx5e_create_cq(cq, &cq_param); + if (err) + goto err_free_cq; + + err = mlx5e_alloc_drop_rq(mdev, drop_rq, &rq_param); + if (err) + goto err_destroy_cq; + + err = mlx5e_create_rq(drop_rq, &rq_param); + if (err) + goto err_free_rq; + + err = mlx5e_modify_rq_state(drop_rq, MLX5_RQC_STATE_RST, MLX5_RQC_STATE_RDY); + if (err) + mlx5_core_warn(priv->mdev, "modify_rq_state failed, rx_if_down_packets won't be counted %d\n", err); + + return 0; + +err_free_rq: + mlx5e_free_drop_rq(drop_rq); + +err_destroy_cq: + mlx5e_destroy_cq(cq); + +err_free_cq: + mlx5e_free_cq(cq); + + return err; +} + +void mlx5e_close_drop_rq(struct mlx5e_rq *drop_rq) +{ + mlx5e_destroy_rq(drop_rq); + mlx5e_free_drop_rq(drop_rq); + mlx5e_destroy_cq(&drop_rq->cq); + mlx5e_free_cq(&drop_rq->cq); +} + +int mlx5e_create_tis(struct mlx5_core_dev *mdev, void *in, u32 *tisn) +{ + void *tisc = MLX5_ADDR_OF(create_tis_in, in, ctx); + + MLX5_SET(tisc, tisc, transport_domain, mdev->mlx5e_res.hw_objs.td.tdn); + + if (MLX5_GET(tisc, tisc, tls_en)) + MLX5_SET(tisc, tisc, pd, mdev->mlx5e_res.hw_objs.pdn); + + if (mlx5_lag_is_lacp_owner(mdev)) + MLX5_SET(tisc, tisc, strict_lag_tx_port_affinity, 1); + + return mlx5_core_create_tis(mdev, in, tisn); +} + +void mlx5e_destroy_tis(struct mlx5_core_dev *mdev, u32 tisn) +{ + mlx5_core_destroy_tis(mdev, tisn); +} + +void mlx5e_destroy_tises(struct mlx5e_priv *priv) +{ + int tc, i; + + for (i = 0; i < mlx5e_get_num_lag_ports(priv->mdev); i++) + for (tc = 0; tc < priv->profile->max_tc; tc++) + mlx5e_destroy_tis(priv->mdev, priv->tisn[i][tc]); +} + +static bool mlx5e_lag_should_assign_affinity(struct mlx5_core_dev *mdev) +{ + return MLX5_CAP_GEN(mdev, lag_tx_port_affinity) && mlx5e_get_num_lag_ports(mdev) > 1; +} + +int mlx5e_create_tises(struct mlx5e_priv *priv) +{ + int tc, i; + int err; + + for (i = 0; i < mlx5e_get_num_lag_ports(priv->mdev); i++) { + for (tc = 0; tc < priv->profile->max_tc; tc++) { + u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {}; + void *tisc; + + tisc = MLX5_ADDR_OF(create_tis_in, in, ctx); + + MLX5_SET(tisc, tisc, prio, tc << 1); + + if (mlx5e_lag_should_assign_affinity(priv->mdev)) + MLX5_SET(tisc, tisc, lag_tx_port_affinity, i + 1); + + err = mlx5e_create_tis(priv->mdev, in, &priv->tisn[i][tc]); + if (err) + goto err_close_tises; + } + } + + return 0; + +err_close_tises: + for (; i >= 0; i--) { + for (tc--; tc >= 0; tc--) + mlx5e_destroy_tis(priv->mdev, priv->tisn[i][tc]); + tc = priv->profile->max_tc; + } + + return err; +} + +static void mlx5e_cleanup_nic_tx(struct mlx5e_priv *priv) +{ + if (priv->mqprio_rl) { + mlx5e_mqprio_rl_cleanup(priv->mqprio_rl); + mlx5e_mqprio_rl_free(priv->mqprio_rl); + priv->mqprio_rl = NULL; + } + mlx5e_accel_cleanup_tx(priv); + mlx5e_destroy_tises(priv); +} + +static int mlx5e_modify_channels_vsd(struct mlx5e_channels *chs, bool vsd) +{ + int err; + int i; + + for (i = 0; i < chs->num; i++) { + err = mlx5e_modify_rq_vsd(&chs->c[i]->rq, vsd); + if (err) + return err; + } + if (chs->ptp && test_bit(MLX5E_PTP_STATE_RX, chs->ptp->state)) + return mlx5e_modify_rq_vsd(&chs->ptp->rq, vsd); + + return 0; +} + +static void mlx5e_mqprio_build_default_tc_to_txq(struct netdev_tc_txq *tc_to_txq, + int ntc, int nch) +{ + int tc; + + memset(tc_to_txq, 0, sizeof(*tc_to_txq) * TC_MAX_QUEUE); + + /* Map netdev TCs to offset 0. + * We have our own UP to TXQ mapping for DCB mode of QoS + */ + for (tc = 0; tc < ntc; tc++) { + tc_to_txq[tc] = (struct netdev_tc_txq) { + .count = nch, + .offset = 0, + }; + } +} + +static void mlx5e_mqprio_build_tc_to_txq(struct netdev_tc_txq *tc_to_txq, + struct tc_mqprio_qopt *qopt) +{ + int tc; + + for (tc = 0; tc < TC_MAX_QUEUE; tc++) { + tc_to_txq[tc] = (struct netdev_tc_txq) { + .count = qopt->count[tc], + .offset = qopt->offset[tc], + }; + } +} + +static void mlx5e_params_mqprio_dcb_set(struct mlx5e_params *params, u8 num_tc) +{ + params->mqprio.mode = TC_MQPRIO_MODE_DCB; + params->mqprio.num_tc = num_tc; + mlx5e_mqprio_build_default_tc_to_txq(params->mqprio.tc_to_txq, num_tc, + params->num_channels); +} + +static void mlx5e_mqprio_rl_update_params(struct mlx5e_params *params, + struct mlx5e_mqprio_rl *rl) +{ + int tc; + + for (tc = 0; tc < TC_MAX_QUEUE; tc++) { + u32 hw_id = 0; + + if (rl) + mlx5e_mqprio_rl_get_node_hw_id(rl, tc, &hw_id); + params->mqprio.channel.hw_id[tc] = hw_id; + } +} + +static void mlx5e_params_mqprio_channel_set(struct mlx5e_params *params, + struct tc_mqprio_qopt_offload *mqprio, + struct mlx5e_mqprio_rl *rl) +{ + int tc; + + params->mqprio.mode = TC_MQPRIO_MODE_CHANNEL; + params->mqprio.num_tc = mqprio->qopt.num_tc; + + for (tc = 0; tc < TC_MAX_QUEUE; tc++) + params->mqprio.channel.max_rate[tc] = mqprio->max_rate[tc]; + + mlx5e_mqprio_rl_update_params(params, rl); + mlx5e_mqprio_build_tc_to_txq(params->mqprio.tc_to_txq, &mqprio->qopt); +} + +static void mlx5e_params_mqprio_reset(struct mlx5e_params *params) +{ + mlx5e_params_mqprio_dcb_set(params, 1); +} + +static int mlx5e_setup_tc_mqprio_dcb(struct mlx5e_priv *priv, + struct tc_mqprio_qopt *mqprio) +{ + struct mlx5e_params new_params; + u8 tc = mqprio->num_tc; + int err; + + mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS; + + if (tc && tc != MLX5E_MAX_NUM_TC) + return -EINVAL; + + new_params = priv->channels.params; + mlx5e_params_mqprio_dcb_set(&new_params, tc ? tc : 1); + + err = mlx5e_safe_switch_params(priv, &new_params, + mlx5e_num_channels_changed_ctx, NULL, true); + + if (!err && priv->mqprio_rl) { + mlx5e_mqprio_rl_cleanup(priv->mqprio_rl); + mlx5e_mqprio_rl_free(priv->mqprio_rl); + priv->mqprio_rl = NULL; + } + + priv->max_opened_tc = max_t(u8, priv->max_opened_tc, + mlx5e_get_dcb_num_tc(&priv->channels.params)); + return err; +} + +static int mlx5e_mqprio_channel_validate(struct mlx5e_priv *priv, + struct tc_mqprio_qopt_offload *mqprio) +{ + struct net_device *netdev = priv->netdev; + struct mlx5e_ptp *ptp_channel; + int agg_count = 0; + int i; + + ptp_channel = priv->channels.ptp; + if (ptp_channel && test_bit(MLX5E_PTP_STATE_TX, ptp_channel->state)) { + netdev_err(netdev, + "Cannot activate MQPRIO mode channel since it conflicts with TX port TS\n"); + return -EINVAL; + } + + if (mqprio->qopt.offset[0] != 0 || mqprio->qopt.num_tc < 1 || + mqprio->qopt.num_tc > MLX5E_MAX_NUM_MQPRIO_CH_TC) + return -EINVAL; + + for (i = 0; i < mqprio->qopt.num_tc; i++) { + if (!mqprio->qopt.count[i]) { + netdev_err(netdev, "Zero size for queue-group (%d) is not supported\n", i); + return -EINVAL; + } + if (mqprio->min_rate[i]) { + netdev_err(netdev, "Min tx rate is not supported\n"); + return -EINVAL; + } + + if (mqprio->max_rate[i]) { + int err; + + err = mlx5e_qos_bytes_rate_check(priv->mdev, mqprio->max_rate[i]); + if (err) + return err; + } + + if (mqprio->qopt.offset[i] != agg_count) { + netdev_err(netdev, "Discontinuous queues config is not supported\n"); + return -EINVAL; + } + agg_count += mqprio->qopt.count[i]; + } + + if (priv->channels.params.num_channels != agg_count) { + netdev_err(netdev, "Num of queues (%d) does not match available (%d)\n", + agg_count, priv->channels.params.num_channels); + return -EINVAL; + } + + return 0; +} + +static bool mlx5e_mqprio_rate_limit(u8 num_tc, u64 max_rate[]) +{ + int tc; + + for (tc = 0; tc < num_tc; tc++) + if (max_rate[tc]) + return true; + return false; +} + +static struct mlx5e_mqprio_rl *mlx5e_mqprio_rl_create(struct mlx5_core_dev *mdev, + u8 num_tc, u64 max_rate[]) +{ + struct mlx5e_mqprio_rl *rl; + int err; + + if (!mlx5e_mqprio_rate_limit(num_tc, max_rate)) + return NULL; + + rl = mlx5e_mqprio_rl_alloc(); + if (!rl) + return ERR_PTR(-ENOMEM); + + err = mlx5e_mqprio_rl_init(rl, mdev, num_tc, max_rate); + if (err) { + mlx5e_mqprio_rl_free(rl); + return ERR_PTR(err); + } + + return rl; +} + +static int mlx5e_setup_tc_mqprio_channel(struct mlx5e_priv *priv, + struct tc_mqprio_qopt_offload *mqprio) +{ + mlx5e_fp_preactivate preactivate; + struct mlx5e_params new_params; + struct mlx5e_mqprio_rl *rl; + bool nch_changed; + int err; + + err = mlx5e_mqprio_channel_validate(priv, mqprio); + if (err) + return err; + + rl = mlx5e_mqprio_rl_create(priv->mdev, mqprio->qopt.num_tc, mqprio->max_rate); + if (IS_ERR(rl)) + return PTR_ERR(rl); + + new_params = priv->channels.params; + mlx5e_params_mqprio_channel_set(&new_params, mqprio, rl); + + nch_changed = mlx5e_get_dcb_num_tc(&priv->channels.params) > 1; + preactivate = nch_changed ? mlx5e_num_channels_changed_ctx : + mlx5e_update_netdev_queues_ctx; + err = mlx5e_safe_switch_params(priv, &new_params, preactivate, NULL, true); + if (err) { + if (rl) { + mlx5e_mqprio_rl_cleanup(rl); + mlx5e_mqprio_rl_free(rl); + } + return err; + } + + if (priv->mqprio_rl) { + mlx5e_mqprio_rl_cleanup(priv->mqprio_rl); + mlx5e_mqprio_rl_free(priv->mqprio_rl); + } + priv->mqprio_rl = rl; + + return 0; +} + +static int mlx5e_setup_tc_mqprio(struct mlx5e_priv *priv, + struct tc_mqprio_qopt_offload *mqprio) +{ + /* MQPRIO is another toplevel qdisc that can't be attached + * simultaneously with the offloaded HTB. + */ + if (WARN_ON(mlx5e_selq_is_htb_enabled(&priv->selq))) + return -EINVAL; + + switch (mqprio->mode) { + case TC_MQPRIO_MODE_DCB: + return mlx5e_setup_tc_mqprio_dcb(priv, &mqprio->qopt); + case TC_MQPRIO_MODE_CHANNEL: + return mlx5e_setup_tc_mqprio_channel(priv, mqprio); + default: + return -EOPNOTSUPP; + } +} + +static LIST_HEAD(mlx5e_block_cb_list); + +static int mlx5e_setup_tc(struct net_device *dev, enum tc_setup_type type, + void *type_data) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + bool tc_unbind = false; + int err; + + if (type == TC_SETUP_BLOCK && + ((struct flow_block_offload *)type_data)->command == FLOW_BLOCK_UNBIND) + tc_unbind = true; + + if (!netif_device_present(dev) && !tc_unbind) + return -ENODEV; + + switch (type) { + case TC_SETUP_BLOCK: { + struct flow_block_offload *f = type_data; + + f->unlocked_driver_cb = true; + return flow_block_cb_setup_simple(type_data, + &mlx5e_block_cb_list, + mlx5e_setup_tc_block_cb, + priv, priv, true); + } + case TC_SETUP_QDISC_MQPRIO: + mutex_lock(&priv->state_lock); + err = mlx5e_setup_tc_mqprio(priv, type_data); + mutex_unlock(&priv->state_lock); + return err; + case TC_SETUP_QDISC_HTB: + mutex_lock(&priv->state_lock); + err = mlx5e_htb_setup_tc(priv, type_data); + mutex_unlock(&priv->state_lock); + return err; + default: + return -EOPNOTSUPP; + } +} + +void mlx5e_fold_sw_stats64(struct mlx5e_priv *priv, struct rtnl_link_stats64 *s) +{ + int i; + + for (i = 0; i < priv->stats_nch; i++) { + struct mlx5e_channel_stats *channel_stats = priv->channel_stats[i]; + struct mlx5e_rq_stats *xskrq_stats = &channel_stats->xskrq; + struct mlx5e_rq_stats *rq_stats = &channel_stats->rq; + int j; + + s->rx_packets += rq_stats->packets + xskrq_stats->packets; + s->rx_bytes += rq_stats->bytes + xskrq_stats->bytes; + s->multicast += rq_stats->mcast_packets + xskrq_stats->mcast_packets; + + for (j = 0; j < priv->max_opened_tc; j++) { + struct mlx5e_sq_stats *sq_stats = &channel_stats->sq[j]; + + s->tx_packets += sq_stats->packets; + s->tx_bytes += sq_stats->bytes; + s->tx_dropped += sq_stats->dropped; + } + } + if (priv->tx_ptp_opened) { + for (i = 0; i < priv->max_opened_tc; i++) { + struct mlx5e_sq_stats *sq_stats = &priv->ptp_stats.sq[i]; + + s->tx_packets += sq_stats->packets; + s->tx_bytes += sq_stats->bytes; + s->tx_dropped += sq_stats->dropped; + } + } + if (priv->rx_ptp_opened) { + struct mlx5e_rq_stats *rq_stats = &priv->ptp_stats.rq; + + s->rx_packets += rq_stats->packets; + s->rx_bytes += rq_stats->bytes; + s->multicast += rq_stats->mcast_packets; + } +} + +void +mlx5e_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5e_pport_stats *pstats = &priv->stats.pport; + + if (!netif_device_present(dev)) + return; + + /* In switchdev mode, monitor counters doesn't monitor + * rx/tx stats of 802_3. The update stats mechanism + * should keep the 802_3 layout counters updated + */ + if (!mlx5e_monitor_counter_supported(priv) || + mlx5e_is_uplink_rep(priv)) { + /* update HW stats in background for next time */ + mlx5e_queue_update_stats(priv); + } + + if (mlx5e_is_uplink_rep(priv)) { + struct mlx5e_vport_stats *vstats = &priv->stats.vport; + + stats->rx_packets = PPORT_802_3_GET(pstats, a_frames_received_ok); + stats->rx_bytes = PPORT_802_3_GET(pstats, a_octets_received_ok); + stats->tx_packets = PPORT_802_3_GET(pstats, a_frames_transmitted_ok); + stats->tx_bytes = PPORT_802_3_GET(pstats, a_octets_transmitted_ok); + + /* vport multicast also counts packets that are dropped due to steering + * or rx out of buffer + */ + stats->multicast = VPORT_COUNTER_GET(vstats, received_eth_multicast.packets); + } else { + mlx5e_fold_sw_stats64(priv, stats); + } + + stats->rx_dropped = priv->stats.qcnt.rx_out_of_buffer; + + stats->rx_length_errors = + PPORT_802_3_GET(pstats, a_in_range_length_errors) + + PPORT_802_3_GET(pstats, a_out_of_range_length_field) + + PPORT_802_3_GET(pstats, a_frame_too_long_errors) + + VNIC_ENV_GET(&priv->stats.vnic, eth_wqe_too_small); + stats->rx_crc_errors = + PPORT_802_3_GET(pstats, a_frame_check_sequence_errors); + stats->rx_frame_errors = PPORT_802_3_GET(pstats, a_alignment_errors); + stats->tx_aborted_errors = PPORT_2863_GET(pstats, if_out_discards); + stats->rx_errors = stats->rx_length_errors + stats->rx_crc_errors + + stats->rx_frame_errors; + stats->tx_errors = stats->tx_aborted_errors + stats->tx_carrier_errors; +} + +static void mlx5e_nic_set_rx_mode(struct mlx5e_priv *priv) +{ + if (mlx5e_is_uplink_rep(priv)) + return; /* no rx mode for uplink rep */ + + queue_work(priv->wq, &priv->set_rx_mode_work); +} + +static void mlx5e_set_rx_mode(struct net_device *dev) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + + mlx5e_nic_set_rx_mode(priv); +} + +static int mlx5e_set_mac(struct net_device *netdev, void *addr) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct sockaddr *saddr = addr; + + if (!is_valid_ether_addr(saddr->sa_data)) + return -EADDRNOTAVAIL; + + netif_addr_lock_bh(netdev); + eth_hw_addr_set(netdev, saddr->sa_data); + netif_addr_unlock_bh(netdev); + + mlx5e_nic_set_rx_mode(priv); + + return 0; +} + +#define MLX5E_SET_FEATURE(features, feature, enable) \ + do { \ + if (enable) \ + *features |= feature; \ + else \ + *features &= ~feature; \ + } while (0) + +typedef int (*mlx5e_feature_handler)(struct net_device *netdev, bool enable); + +static int set_feature_lro(struct net_device *netdev, bool enable) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5e_params *cur_params; + struct mlx5e_params new_params; + bool reset = true; + int err = 0; + + mutex_lock(&priv->state_lock); + + cur_params = &priv->channels.params; + new_params = *cur_params; + + if (enable) + new_params.packet_merge.type = MLX5E_PACKET_MERGE_LRO; + else if (new_params.packet_merge.type == MLX5E_PACKET_MERGE_LRO) + new_params.packet_merge.type = MLX5E_PACKET_MERGE_NONE; + else + goto out; + + if (!(cur_params->packet_merge.type == MLX5E_PACKET_MERGE_SHAMPO && + new_params.packet_merge.type == MLX5E_PACKET_MERGE_LRO)) { + if (cur_params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) { + if (mlx5e_rx_mpwqe_is_linear_skb(mdev, cur_params, NULL) == + mlx5e_rx_mpwqe_is_linear_skb(mdev, &new_params, NULL)) + reset = false; + } + } + + err = mlx5e_safe_switch_params(priv, &new_params, + mlx5e_modify_tirs_packet_merge_ctx, NULL, reset); +out: + mutex_unlock(&priv->state_lock); + return err; +} + +static int set_feature_hw_gro(struct net_device *netdev, bool enable) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5e_params new_params; + bool reset = true; + int err = 0; + + mutex_lock(&priv->state_lock); + new_params = priv->channels.params; + + if (enable) { + new_params.packet_merge.type = MLX5E_PACKET_MERGE_SHAMPO; + new_params.packet_merge.shampo.match_criteria_type = + MLX5_RQC_SHAMPO_MATCH_CRITERIA_TYPE_EXTENDED; + new_params.packet_merge.shampo.alignment_granularity = + MLX5_RQC_SHAMPO_NO_MATCH_ALIGNMENT_GRANULARITY_STRIDE; + } else if (new_params.packet_merge.type == MLX5E_PACKET_MERGE_SHAMPO) { + new_params.packet_merge.type = MLX5E_PACKET_MERGE_NONE; + } else { + goto out; + } + + err = mlx5e_safe_switch_params(priv, &new_params, NULL, NULL, reset); +out: + mutex_unlock(&priv->state_lock); + return err; +} + +static int set_feature_cvlan_filter(struct net_device *netdev, bool enable) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + + if (enable) + mlx5e_enable_cvlan_filter(priv->fs, + !!(priv->netdev->flags & IFF_PROMISC)); + else + mlx5e_disable_cvlan_filter(priv->fs, + !!(priv->netdev->flags & IFF_PROMISC)); + + return 0; +} + +static int set_feature_hw_tc(struct net_device *netdev, bool enable) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + int err = 0; + +#if IS_ENABLED(CONFIG_MLX5_CLS_ACT) + int tc_flag = mlx5e_is_uplink_rep(priv) ? MLX5_TC_FLAG(ESW_OFFLOAD) : + MLX5_TC_FLAG(NIC_OFFLOAD); + if (!enable && mlx5e_tc_num_filters(priv, tc_flag)) { + netdev_err(netdev, + "Active offloaded tc filters, can't turn hw_tc_offload off\n"); + return -EINVAL; + } +#endif + + mutex_lock(&priv->state_lock); + if (!enable && mlx5e_selq_is_htb_enabled(&priv->selq)) { + netdev_err(netdev, "Active HTB offload, can't turn hw_tc_offload off\n"); + err = -EINVAL; + } + mutex_unlock(&priv->state_lock); + + return err; +} + +static int set_feature_rx_all(struct net_device *netdev, bool enable) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5_core_dev *mdev = priv->mdev; + + return mlx5_set_port_fcs(mdev, !enable); +} + +static int mlx5e_set_rx_port_ts(struct mlx5_core_dev *mdev, bool enable) +{ + u32 in[MLX5_ST_SZ_DW(pcmr_reg)] = {}; + bool supported, curr_state; + int err; + + if (!MLX5_CAP_GEN(mdev, ports_check)) + return 0; + + err = mlx5_query_ports_check(mdev, in, sizeof(in)); + if (err) + return err; + + supported = MLX5_GET(pcmr_reg, in, rx_ts_over_crc_cap); + curr_state = MLX5_GET(pcmr_reg, in, rx_ts_over_crc); + + if (!supported || enable == curr_state) + return 0; + + MLX5_SET(pcmr_reg, in, local_port, 1); + MLX5_SET(pcmr_reg, in, rx_ts_over_crc, enable); + + return mlx5_set_ports_check(mdev, in, sizeof(in)); +} + +static int mlx5e_set_rx_port_ts_wrap(struct mlx5e_priv *priv, void *ctx) +{ + struct mlx5_core_dev *mdev = priv->mdev; + bool enable = *(bool *)ctx; + + return mlx5e_set_rx_port_ts(mdev, enable); +} + +static int set_feature_rx_fcs(struct net_device *netdev, bool enable) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5e_channels *chs = &priv->channels; + struct mlx5e_params new_params; + int err; + bool rx_ts_over_crc = !enable; + + mutex_lock(&priv->state_lock); + + new_params = chs->params; + new_params.scatter_fcs_en = enable; + err = mlx5e_safe_switch_params(priv, &new_params, mlx5e_set_rx_port_ts_wrap, + &rx_ts_over_crc, true); + mutex_unlock(&priv->state_lock); + return err; +} + +static int set_feature_rx_vlan(struct net_device *netdev, bool enable) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + int err = 0; + + mutex_lock(&priv->state_lock); + + mlx5e_fs_set_vlan_strip_disable(priv->fs, !enable); + priv->channels.params.vlan_strip_disable = !enable; + + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) + goto unlock; + + err = mlx5e_modify_channels_vsd(&priv->channels, !enable); + if (err) { + mlx5e_fs_set_vlan_strip_disable(priv->fs, enable); + priv->channels.params.vlan_strip_disable = enable; + } +unlock: + mutex_unlock(&priv->state_lock); + + return err; +} + +int mlx5e_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5e_flow_steering *fs = priv->fs; + + if (mlx5e_is_uplink_rep(priv)) + return 0; /* no vlan table for uplink rep */ + + return mlx5e_fs_vlan_rx_add_vid(fs, dev, proto, vid); +} + +int mlx5e_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, u16 vid) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5e_flow_steering *fs = priv->fs; + + if (mlx5e_is_uplink_rep(priv)) + return 0; /* no vlan table for uplink rep */ + + return mlx5e_fs_vlan_rx_kill_vid(fs, dev, proto, vid); +} + +#ifdef CONFIG_MLX5_EN_ARFS +static int set_feature_arfs(struct net_device *netdev, bool enable) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + int err; + + if (enable) + err = mlx5e_arfs_enable(priv->fs); + else + err = mlx5e_arfs_disable(priv->fs); + + return err; +} +#endif + +static int mlx5e_handle_feature(struct net_device *netdev, + netdev_features_t *features, + netdev_features_t feature, + mlx5e_feature_handler feature_handler) +{ + netdev_features_t changes = *features ^ netdev->features; + bool enable = !!(*features & feature); + int err; + + if (!(changes & feature)) + return 0; + + err = feature_handler(netdev, enable); + if (err) { + MLX5E_SET_FEATURE(features, feature, !enable); + netdev_err(netdev, "%s feature %pNF failed, err %d\n", + enable ? "Enable" : "Disable", &feature, err); + return err; + } + + return 0; +} + +int mlx5e_set_features(struct net_device *netdev, netdev_features_t features) +{ + netdev_features_t oper_features = features; + int err = 0; + +#define MLX5E_HANDLE_FEATURE(feature, handler) \ + mlx5e_handle_feature(netdev, &oper_features, feature, handler) + + err |= MLX5E_HANDLE_FEATURE(NETIF_F_LRO, set_feature_lro); + err |= MLX5E_HANDLE_FEATURE(NETIF_F_GRO_HW, set_feature_hw_gro); + err |= MLX5E_HANDLE_FEATURE(NETIF_F_HW_VLAN_CTAG_FILTER, + set_feature_cvlan_filter); + err |= MLX5E_HANDLE_FEATURE(NETIF_F_HW_TC, set_feature_hw_tc); + err |= MLX5E_HANDLE_FEATURE(NETIF_F_RXALL, set_feature_rx_all); + err |= MLX5E_HANDLE_FEATURE(NETIF_F_RXFCS, set_feature_rx_fcs); + err |= MLX5E_HANDLE_FEATURE(NETIF_F_HW_VLAN_CTAG_RX, set_feature_rx_vlan); +#ifdef CONFIG_MLX5_EN_ARFS + err |= MLX5E_HANDLE_FEATURE(NETIF_F_NTUPLE, set_feature_arfs); +#endif + err |= MLX5E_HANDLE_FEATURE(NETIF_F_HW_TLS_RX, mlx5e_ktls_set_feature_rx); + + if (err) { + netdev->features = oper_features; + return -EINVAL; + } + + return 0; +} + +static netdev_features_t mlx5e_fix_uplink_rep_features(struct net_device *netdev, + netdev_features_t features) +{ + features &= ~NETIF_F_HW_TLS_RX; + if (netdev->features & NETIF_F_HW_TLS_RX) + netdev_warn(netdev, "Disabling hw_tls_rx, not supported in switchdev mode\n"); + + features &= ~NETIF_F_HW_TLS_TX; + if (netdev->features & NETIF_F_HW_TLS_TX) + netdev_warn(netdev, "Disabling hw_tls_tx, not supported in switchdev mode\n"); + + features &= ~NETIF_F_NTUPLE; + if (netdev->features & NETIF_F_NTUPLE) + netdev_warn(netdev, "Disabling ntuple, not supported in switchdev mode\n"); + + features &= ~NETIF_F_GRO_HW; + if (netdev->features & NETIF_F_GRO_HW) + netdev_warn(netdev, "Disabling HW_GRO, not supported in switchdev mode\n"); + + features &= ~NETIF_F_HW_VLAN_CTAG_FILTER; + if (netdev->features & NETIF_F_HW_VLAN_CTAG_FILTER) + netdev_warn(netdev, "Disabling HW_VLAN CTAG FILTERING, not supported in switchdev mode\n"); + + return features; +} + +static netdev_features_t mlx5e_fix_features(struct net_device *netdev, + netdev_features_t features) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5e_vlan_table *vlan; + struct mlx5e_params *params; + + if (!netif_device_present(netdev)) + return features; + + vlan = mlx5e_fs_get_vlan(priv->fs); + mutex_lock(&priv->state_lock); + params = &priv->channels.params; + if (!vlan || + !bitmap_empty(mlx5e_vlan_get_active_svlans(vlan), VLAN_N_VID)) { + /* HW strips the outer C-tag header, this is a problem + * for S-tag traffic. + */ + features &= ~NETIF_F_HW_VLAN_CTAG_RX; + if (!params->vlan_strip_disable) + netdev_warn(netdev, "Dropping C-tag vlan stripping offload due to S-tag vlan\n"); + } + + if (!MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_STRIDING_RQ)) { + if (features & NETIF_F_LRO) { + netdev_warn(netdev, "Disabling LRO, not supported in legacy RQ\n"); + features &= ~NETIF_F_LRO; + } + if (features & NETIF_F_GRO_HW) { + netdev_warn(netdev, "Disabling HW-GRO, not supported in legacy RQ\n"); + features &= ~NETIF_F_GRO_HW; + } + } + + if (params->xdp_prog) { + if (features & NETIF_F_LRO) { + netdev_warn(netdev, "LRO is incompatible with XDP\n"); + features &= ~NETIF_F_LRO; + } + if (features & NETIF_F_GRO_HW) { + netdev_warn(netdev, "HW GRO is incompatible with XDP\n"); + features &= ~NETIF_F_GRO_HW; + } + } + + if (priv->xsk.refcnt) { + if (features & NETIF_F_LRO) { + netdev_warn(netdev, "LRO is incompatible with AF_XDP (%u XSKs are active)\n", + priv->xsk.refcnt); + features &= ~NETIF_F_LRO; + } + if (features & NETIF_F_GRO_HW) { + netdev_warn(netdev, "HW GRO is incompatible with AF_XDP (%u XSKs are active)\n", + priv->xsk.refcnt); + features &= ~NETIF_F_GRO_HW; + } + } + + if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS)) { + features &= ~NETIF_F_RXHASH; + if (netdev->features & NETIF_F_RXHASH) + netdev_warn(netdev, "Disabling rxhash, not supported when CQE compress is active\n"); + + if (features & NETIF_F_GRO_HW) { + netdev_warn(netdev, "Disabling HW-GRO, not supported when CQE compress is active\n"); + features &= ~NETIF_F_GRO_HW; + } + } + + if (mlx5e_is_uplink_rep(priv)) { + features = mlx5e_fix_uplink_rep_features(netdev, features); + features |= NETIF_F_NETNS_LOCAL; + } else { + features &= ~NETIF_F_NETNS_LOCAL; + } + + mutex_unlock(&priv->state_lock); + + return features; +} + +static bool mlx5e_xsk_validate_mtu(struct net_device *netdev, + struct mlx5e_channels *chs, + struct mlx5e_params *new_params, + struct mlx5_core_dev *mdev) +{ + u16 ix; + + for (ix = 0; ix < chs->params.num_channels; ix++) { + struct xsk_buff_pool *xsk_pool = + mlx5e_xsk_get_pool(&chs->params, chs->params.xsk, ix); + struct mlx5e_xsk_param xsk; + int max_xdp_mtu; + + if (!xsk_pool) + continue; + + mlx5e_build_xsk_param(xsk_pool, &xsk); + max_xdp_mtu = mlx5e_xdp_max_mtu(new_params, &xsk); + + /* Validate XSK params and XDP MTU in advance */ + if (!mlx5e_validate_xsk_param(new_params, &xsk, mdev) || + new_params->sw_mtu > max_xdp_mtu) { + u32 hr = mlx5e_get_linear_rq_headroom(new_params, &xsk); + int max_mtu_frame, max_mtu_page, max_mtu; + + /* Two criteria must be met: + * 1. HW MTU + all headrooms <= XSK frame size. + * 2. Size of SKBs allocated on XDP_PASS <= PAGE_SIZE. + */ + max_mtu_frame = MLX5E_HW2SW_MTU(new_params, xsk.chunk_size - hr); + max_mtu_page = MLX5E_HW2SW_MTU(new_params, SKB_MAX_HEAD(0)); + max_mtu = min3(max_mtu_frame, max_mtu_page, max_xdp_mtu); + + netdev_err(netdev, "MTU %d is too big for an XSK running on channel %u or its redirection XDP program. Try MTU <= %d\n", + new_params->sw_mtu, ix, max_mtu); + return false; + } + } + + return true; +} + +static bool mlx5e_params_validate_xdp(struct net_device *netdev, + struct mlx5_core_dev *mdev, + struct mlx5e_params *params) +{ + bool is_linear; + + /* No XSK params: AF_XDP can't be enabled yet at the point of setting + * the XDP program. + */ + is_linear = mlx5e_rx_is_linear_skb(mdev, params, NULL); + + if (!is_linear && params->rq_wq_type != MLX5_WQ_TYPE_CYCLIC) { + netdev_warn(netdev, "XDP is not allowed with striding RQ and MTU(%d) > %d\n", + params->sw_mtu, + mlx5e_xdp_max_mtu(params, NULL)); + return false; + } + if (!is_linear && !params->xdp_prog->aux->xdp_has_frags) { + netdev_warn(netdev, "MTU(%d) > %d, too big for an XDP program not aware of multi buffer\n", + params->sw_mtu, + mlx5e_xdp_max_mtu(params, NULL)); + return false; + } + + return true; +} + +int mlx5e_change_mtu(struct net_device *netdev, int new_mtu, + mlx5e_fp_preactivate preactivate) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5e_params new_params; + struct mlx5e_params *params; + bool reset = true; + int err = 0; + + mutex_lock(&priv->state_lock); + + params = &priv->channels.params; + + new_params = *params; + new_params.sw_mtu = new_mtu; + err = mlx5e_validate_params(priv->mdev, &new_params); + if (err) + goto out; + + if (new_params.xdp_prog && !mlx5e_params_validate_xdp(netdev, priv->mdev, + &new_params)) { + err = -EINVAL; + goto out; + } + + if (priv->xsk.refcnt && + !mlx5e_xsk_validate_mtu(netdev, &priv->channels, + &new_params, priv->mdev)) { + err = -EINVAL; + goto out; + } + + if (params->packet_merge.type == MLX5E_PACKET_MERGE_LRO) + reset = false; + + if (params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ && + params->packet_merge.type != MLX5E_PACKET_MERGE_SHAMPO) { + bool is_linear_old = mlx5e_rx_mpwqe_is_linear_skb(priv->mdev, params, NULL); + bool is_linear_new = mlx5e_rx_mpwqe_is_linear_skb(priv->mdev, + &new_params, NULL); + u8 sz_old = mlx5e_mpwqe_get_log_rq_size(priv->mdev, params, NULL); + u8 sz_new = mlx5e_mpwqe_get_log_rq_size(priv->mdev, &new_params, NULL); + + /* Always reset in linear mode - hw_mtu is used in data path. + * Check that the mode was non-linear and didn't change. + * If XSK is active, XSK RQs are linear. + * Reset if the RQ size changed, even if it's non-linear. + */ + if (!is_linear_old && !is_linear_new && !priv->xsk.refcnt && + sz_old == sz_new) + reset = false; + } + + err = mlx5e_safe_switch_params(priv, &new_params, preactivate, NULL, reset); + +out: + netdev->mtu = params->sw_mtu; + mutex_unlock(&priv->state_lock); + return err; +} + +static int mlx5e_change_nic_mtu(struct net_device *netdev, int new_mtu) +{ + return mlx5e_change_mtu(netdev, new_mtu, mlx5e_set_dev_port_mtu_ctx); +} + +int mlx5e_ptp_rx_manage_fs_ctx(struct mlx5e_priv *priv, void *ctx) +{ + bool set = *(bool *)ctx; + + return mlx5e_ptp_rx_manage_fs(priv, set); +} + +static int mlx5e_hwstamp_config_no_ptp_rx(struct mlx5e_priv *priv, bool rx_filter) +{ + bool rx_cqe_compress_def = priv->channels.params.rx_cqe_compress_def; + int err; + + if (!rx_filter) + /* Reset CQE compression to Admin default */ + return mlx5e_modify_rx_cqe_compression_locked(priv, rx_cqe_compress_def, false); + + if (!MLX5E_GET_PFLAG(&priv->channels.params, MLX5E_PFLAG_RX_CQE_COMPRESS)) + return 0; + + /* Disable CQE compression */ + netdev_warn(priv->netdev, "Disabling RX cqe compression\n"); + err = mlx5e_modify_rx_cqe_compression_locked(priv, false, true); + if (err) + netdev_err(priv->netdev, "Failed disabling cqe compression err=%d\n", err); + + return err; +} + +static int mlx5e_hwstamp_config_ptp_rx(struct mlx5e_priv *priv, bool ptp_rx) +{ + struct mlx5e_params new_params; + + if (ptp_rx == priv->channels.params.ptp_rx) + return 0; + + new_params = priv->channels.params; + new_params.ptp_rx = ptp_rx; + return mlx5e_safe_switch_params(priv, &new_params, mlx5e_ptp_rx_manage_fs_ctx, + &new_params.ptp_rx, true); +} + +int mlx5e_hwstamp_set(struct mlx5e_priv *priv, struct ifreq *ifr) +{ + struct hwtstamp_config config; + bool rx_cqe_compress_def; + bool ptp_rx; + int err; + + if (!MLX5_CAP_GEN(priv->mdev, device_frequency_khz) || + (mlx5_clock_get_ptp_index(priv->mdev) == -1)) + return -EOPNOTSUPP; + + if (copy_from_user(&config, ifr->ifr_data, sizeof(config))) + return -EFAULT; + + /* TX HW timestamp */ + switch (config.tx_type) { + case HWTSTAMP_TX_OFF: + case HWTSTAMP_TX_ON: + break; + default: + return -ERANGE; + } + + mutex_lock(&priv->state_lock); + rx_cqe_compress_def = priv->channels.params.rx_cqe_compress_def; + + /* RX HW timestamp */ + switch (config.rx_filter) { + case HWTSTAMP_FILTER_NONE: + ptp_rx = false; + break; + case HWTSTAMP_FILTER_ALL: + case HWTSTAMP_FILTER_SOME: + case HWTSTAMP_FILTER_PTP_V1_L4_EVENT: + case HWTSTAMP_FILTER_PTP_V1_L4_SYNC: + case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ: + case HWTSTAMP_FILTER_PTP_V2_L4_EVENT: + case HWTSTAMP_FILTER_PTP_V2_L4_SYNC: + case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ: + case HWTSTAMP_FILTER_PTP_V2_L2_EVENT: + case HWTSTAMP_FILTER_PTP_V2_L2_SYNC: + case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ: + case HWTSTAMP_FILTER_PTP_V2_EVENT: + case HWTSTAMP_FILTER_PTP_V2_SYNC: + case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ: + case HWTSTAMP_FILTER_NTP_ALL: + config.rx_filter = HWTSTAMP_FILTER_ALL; + /* ptp_rx is set if both HW TS is set and CQE + * compression is set + */ + ptp_rx = rx_cqe_compress_def; + break; + default: + err = -ERANGE; + goto err_unlock; + } + + if (!mlx5e_profile_feature_cap(priv->profile, PTP_RX)) + err = mlx5e_hwstamp_config_no_ptp_rx(priv, + config.rx_filter != HWTSTAMP_FILTER_NONE); + else + err = mlx5e_hwstamp_config_ptp_rx(priv, ptp_rx); + if (err) + goto err_unlock; + + memcpy(&priv->tstamp, &config, sizeof(config)); + mutex_unlock(&priv->state_lock); + + /* might need to fix some features */ + netdev_update_features(priv->netdev); + + return copy_to_user(ifr->ifr_data, &config, + sizeof(config)) ? -EFAULT : 0; +err_unlock: + mutex_unlock(&priv->state_lock); + return err; +} + +int mlx5e_hwstamp_get(struct mlx5e_priv *priv, struct ifreq *ifr) +{ + struct hwtstamp_config *cfg = &priv->tstamp; + + if (!MLX5_CAP_GEN(priv->mdev, device_frequency_khz)) + return -EOPNOTSUPP; + + return copy_to_user(ifr->ifr_data, cfg, sizeof(*cfg)) ? -EFAULT : 0; +} + +static int mlx5e_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + + switch (cmd) { + case SIOCSHWTSTAMP: + return mlx5e_hwstamp_set(priv, ifr); + case SIOCGHWTSTAMP: + return mlx5e_hwstamp_get(priv, ifr); + default: + return -EOPNOTSUPP; + } +} + +#ifdef CONFIG_MLX5_ESWITCH +int mlx5e_set_vf_mac(struct net_device *dev, int vf, u8 *mac) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5_core_dev *mdev = priv->mdev; + + return mlx5_eswitch_set_vport_mac(mdev->priv.eswitch, vf + 1, mac); +} + +static int mlx5e_set_vf_vlan(struct net_device *dev, int vf, u16 vlan, u8 qos, + __be16 vlan_proto) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5_core_dev *mdev = priv->mdev; + + if (vlan_proto != htons(ETH_P_8021Q)) + return -EPROTONOSUPPORT; + + return mlx5_eswitch_set_vport_vlan(mdev->priv.eswitch, vf + 1, + vlan, qos); +} + +static int mlx5e_set_vf_spoofchk(struct net_device *dev, int vf, bool setting) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5_core_dev *mdev = priv->mdev; + + return mlx5_eswitch_set_vport_spoofchk(mdev->priv.eswitch, vf + 1, setting); +} + +static int mlx5e_set_vf_trust(struct net_device *dev, int vf, bool setting) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5_core_dev *mdev = priv->mdev; + + return mlx5_eswitch_set_vport_trust(mdev->priv.eswitch, vf + 1, setting); +} + +int mlx5e_set_vf_rate(struct net_device *dev, int vf, int min_tx_rate, + int max_tx_rate) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5_core_dev *mdev = priv->mdev; + + return mlx5_eswitch_set_vport_rate(mdev->priv.eswitch, vf + 1, + max_tx_rate, min_tx_rate); +} + +static int mlx5_vport_link2ifla(u8 esw_link) +{ + switch (esw_link) { + case MLX5_VPORT_ADMIN_STATE_DOWN: + return IFLA_VF_LINK_STATE_DISABLE; + case MLX5_VPORT_ADMIN_STATE_UP: + return IFLA_VF_LINK_STATE_ENABLE; + } + return IFLA_VF_LINK_STATE_AUTO; +} + +static int mlx5_ifla_link2vport(u8 ifla_link) +{ + switch (ifla_link) { + case IFLA_VF_LINK_STATE_DISABLE: + return MLX5_VPORT_ADMIN_STATE_DOWN; + case IFLA_VF_LINK_STATE_ENABLE: + return MLX5_VPORT_ADMIN_STATE_UP; + } + return MLX5_VPORT_ADMIN_STATE_AUTO; +} + +static int mlx5e_set_vf_link_state(struct net_device *dev, int vf, + int link_state) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5_core_dev *mdev = priv->mdev; + + if (mlx5e_is_uplink_rep(priv)) + return -EOPNOTSUPP; + + return mlx5_eswitch_set_vport_state(mdev->priv.eswitch, vf + 1, + mlx5_ifla_link2vport(link_state)); +} + +int mlx5e_get_vf_config(struct net_device *dev, + int vf, struct ifla_vf_info *ivi) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5_core_dev *mdev = priv->mdev; + int err; + + if (!netif_device_present(dev)) + return -EOPNOTSUPP; + + err = mlx5_eswitch_get_vport_config(mdev->priv.eswitch, vf + 1, ivi); + if (err) + return err; + ivi->linkstate = mlx5_vport_link2ifla(ivi->linkstate); + return 0; +} + +int mlx5e_get_vf_stats(struct net_device *dev, + int vf, struct ifla_vf_stats *vf_stats) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5_core_dev *mdev = priv->mdev; + + return mlx5_eswitch_get_vport_stats(mdev->priv.eswitch, vf + 1, + vf_stats); +} + +static bool +mlx5e_has_offload_stats(const struct net_device *dev, int attr_id) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + + if (!netif_device_present(dev)) + return false; + + if (!mlx5e_is_uplink_rep(priv)) + return false; + + return mlx5e_rep_has_offload_stats(dev, attr_id); +} + +static int +mlx5e_get_offload_stats(int attr_id, const struct net_device *dev, + void *sp) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + + if (!mlx5e_is_uplink_rep(priv)) + return -EOPNOTSUPP; + + return mlx5e_rep_get_offload_stats(attr_id, dev, sp); +} +#endif + +static bool mlx5e_tunnel_proto_supported_tx(struct mlx5_core_dev *mdev, u8 proto_type) +{ + switch (proto_type) { + case IPPROTO_GRE: + return MLX5_CAP_ETH(mdev, tunnel_stateless_gre); + case IPPROTO_IPIP: + case IPPROTO_IPV6: + return (MLX5_CAP_ETH(mdev, tunnel_stateless_ip_over_ip) || + MLX5_CAP_ETH(mdev, tunnel_stateless_ip_over_ip_tx)); + default: + return false; + } +} + +static bool mlx5e_gre_tunnel_inner_proto_offload_supported(struct mlx5_core_dev *mdev, + struct sk_buff *skb) +{ + switch (skb->inner_protocol) { + case htons(ETH_P_IP): + case htons(ETH_P_IPV6): + case htons(ETH_P_TEB): + return true; + case htons(ETH_P_MPLS_UC): + case htons(ETH_P_MPLS_MC): + return MLX5_CAP_ETH(mdev, tunnel_stateless_mpls_over_gre); + } + return false; +} + +static netdev_features_t mlx5e_tunnel_features_check(struct mlx5e_priv *priv, + struct sk_buff *skb, + netdev_features_t features) +{ + unsigned int offset = 0; + struct udphdr *udph; + u8 proto; + u16 port; + + switch (vlan_get_protocol(skb)) { + case htons(ETH_P_IP): + proto = ip_hdr(skb)->protocol; + break; + case htons(ETH_P_IPV6): + proto = ipv6_find_hdr(skb, &offset, -1, NULL, NULL); + break; + default: + goto out; + } + + switch (proto) { + case IPPROTO_GRE: + if (mlx5e_gre_tunnel_inner_proto_offload_supported(priv->mdev, skb)) + return features; + break; + case IPPROTO_IPIP: + case IPPROTO_IPV6: + if (mlx5e_tunnel_proto_supported_tx(priv->mdev, IPPROTO_IPIP)) + return features; + break; + case IPPROTO_UDP: + udph = udp_hdr(skb); + port = be16_to_cpu(udph->dest); + + /* Verify if UDP port is being offloaded by HW */ + if (mlx5_vxlan_lookup_port(priv->mdev->vxlan, port)) + return features; + +#if IS_ENABLED(CONFIG_GENEVE) + /* Support Geneve offload for default UDP port */ + if (port == GENEVE_UDP_PORT && mlx5_geneve_tx_allowed(priv->mdev)) + return features; +#endif + break; +#ifdef CONFIG_MLX5_EN_IPSEC + case IPPROTO_ESP: + return mlx5e_ipsec_feature_check(skb, features); +#endif + } + +out: + /* Disable CSUM and GSO if the udp dport is not offloaded by HW */ + return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK); +} + +netdev_features_t mlx5e_features_check(struct sk_buff *skb, + struct net_device *netdev, + netdev_features_t features) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + + features = vlan_features_check(skb, features); + features = vxlan_features_check(skb, features); + + /* Validate if the tunneled packet is being offloaded by HW */ + if (skb->encapsulation && + (features & NETIF_F_CSUM_MASK || features & NETIF_F_GSO_MASK)) + return mlx5e_tunnel_features_check(priv, skb, features); + + return features; +} + +static void mlx5e_tx_timeout_work(struct work_struct *work) +{ + struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv, + tx_timeout_work); + struct net_device *netdev = priv->netdev; + int i; + + /* Take rtnl_lock to ensure no change in netdev->real_num_tx_queues + * through this flow. However, channel closing flows have to wait for + * this work to finish while holding rtnl lock too. So either get the + * lock or find that channels are being closed for other reason and + * this work is not relevant anymore. + */ + while (!rtnl_trylock()) { + if (!test_bit(MLX5E_STATE_CHANNELS_ACTIVE, &priv->state)) + return; + msleep(20); + } + + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) + goto unlock; + + for (i = 0; i < netdev->real_num_tx_queues; i++) { + struct netdev_queue *dev_queue = + netdev_get_tx_queue(netdev, i); + struct mlx5e_txqsq *sq = priv->txq2sq[i]; + + if (!netif_xmit_stopped(dev_queue)) + continue; + + if (mlx5e_reporter_tx_timeout(sq)) + /* break if tried to reopened channels */ + break; + } + +unlock: + rtnl_unlock(); +} + +static void mlx5e_tx_timeout(struct net_device *dev, unsigned int txqueue) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + + netdev_err(dev, "TX timeout detected\n"); + queue_work(priv->wq, &priv->tx_timeout_work); +} + +static int mlx5e_xdp_allowed(struct mlx5e_priv *priv, struct bpf_prog *prog) +{ + struct net_device *netdev = priv->netdev; + struct mlx5e_params new_params; + + if (priv->channels.params.packet_merge.type != MLX5E_PACKET_MERGE_NONE) { + netdev_warn(netdev, "can't set XDP while HW-GRO/LRO is on, disable them first\n"); + return -EINVAL; + } + + new_params = priv->channels.params; + new_params.xdp_prog = prog; + + if (!mlx5e_params_validate_xdp(netdev, priv->mdev, &new_params)) + return -EINVAL; + + return 0; +} + +static void mlx5e_rq_replace_xdp_prog(struct mlx5e_rq *rq, struct bpf_prog *prog) +{ + struct bpf_prog *old_prog; + + old_prog = rcu_replace_pointer(rq->xdp_prog, prog, + lockdep_is_held(&rq->priv->state_lock)); + if (old_prog) + bpf_prog_put(old_prog); +} + +static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5e_params new_params; + struct bpf_prog *old_prog; + int err = 0; + bool reset; + int i; + + mutex_lock(&priv->state_lock); + + if (prog) { + err = mlx5e_xdp_allowed(priv, prog); + if (err) + goto unlock; + } + + /* no need for full reset when exchanging programs */ + reset = (!priv->channels.params.xdp_prog || !prog); + + new_params = priv->channels.params; + new_params.xdp_prog = prog; + + /* XDP affects striding RQ parameters. Block XDP if striding RQ won't be + * supported with the new parameters: if PAGE_SIZE is bigger than + * MLX5_MPWQE_LOG_STRIDE_SZ_MAX, striding RQ can't be used, even though + * the MTU is small enough for the linear mode, because XDP uses strides + * of PAGE_SIZE on regular RQs. + */ + if (reset && MLX5E_GET_PFLAG(&new_params, MLX5E_PFLAG_RX_STRIDING_RQ)) { + /* Checking for regular RQs here; XSK RQs were checked on XSK bind. */ + err = mlx5e_mpwrq_validate_regular(priv->mdev, &new_params); + if (err) + goto unlock; + } + + old_prog = priv->channels.params.xdp_prog; + + err = mlx5e_safe_switch_params(priv, &new_params, NULL, NULL, reset); + if (err) + goto unlock; + + if (old_prog) + bpf_prog_put(old_prog); + + if (!test_bit(MLX5E_STATE_OPENED, &priv->state) || reset) + goto unlock; + + /* exchanging programs w/o reset, we update ref counts on behalf + * of the channels RQs here. + */ + bpf_prog_add(prog, priv->channels.num); + for (i = 0; i < priv->channels.num; i++) { + struct mlx5e_channel *c = priv->channels.c[i]; + + mlx5e_rq_replace_xdp_prog(&c->rq, prog); + if (test_bit(MLX5E_CHANNEL_STATE_XSK, c->state)) { + bpf_prog_inc(prog); + mlx5e_rq_replace_xdp_prog(&c->xskrq, prog); + } + } + +unlock: + mutex_unlock(&priv->state_lock); + + /* Need to fix some features. */ + if (!err) + netdev_update_features(netdev); + + return err; +} + +static int mlx5e_xdp(struct net_device *dev, struct netdev_bpf *xdp) +{ + switch (xdp->command) { + case XDP_SETUP_PROG: + return mlx5e_xdp_set(dev, xdp->prog); + case XDP_SETUP_XSK_POOL: + return mlx5e_xsk_setup_pool(dev, xdp->xsk.pool, + xdp->xsk.queue_id); + default: + return -EINVAL; + } +} + +#ifdef CONFIG_MLX5_ESWITCH +static int mlx5e_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, + struct net_device *dev, u32 filter_mask, + int nlflags) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5_core_dev *mdev = priv->mdev; + u8 mode, setting; + int err; + + err = mlx5_eswitch_get_vepa(mdev->priv.eswitch, &setting); + if (err) + return err; + mode = setting ? BRIDGE_MODE_VEPA : BRIDGE_MODE_VEB; + return ndo_dflt_bridge_getlink(skb, pid, seq, dev, + mode, + 0, 0, nlflags, filter_mask, NULL); +} + +static int mlx5e_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh, + u16 flags, struct netlink_ext_ack *extack) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5_core_dev *mdev = priv->mdev; + struct nlattr *attr, *br_spec; + u16 mode = BRIDGE_MODE_UNDEF; + u8 setting; + int rem; + + br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC); + if (!br_spec) + return -EINVAL; + + nla_for_each_nested(attr, br_spec, rem) { + if (nla_type(attr) != IFLA_BRIDGE_MODE) + continue; + + if (nla_len(attr) < sizeof(mode)) + return -EINVAL; + + mode = nla_get_u16(attr); + if (mode > BRIDGE_MODE_VEPA) + return -EINVAL; + + break; + } + + if (mode == BRIDGE_MODE_UNDEF) + return -EINVAL; + + setting = (mode == BRIDGE_MODE_VEPA) ? 1 : 0; + return mlx5_eswitch_set_vepa(mdev->priv.eswitch, setting); +} +#endif + +const struct net_device_ops mlx5e_netdev_ops = { + .ndo_open = mlx5e_open, + .ndo_stop = mlx5e_close, + .ndo_start_xmit = mlx5e_xmit, + .ndo_setup_tc = mlx5e_setup_tc, + .ndo_select_queue = mlx5e_select_queue, + .ndo_get_stats64 = mlx5e_get_stats, + .ndo_set_rx_mode = mlx5e_set_rx_mode, + .ndo_set_mac_address = mlx5e_set_mac, + .ndo_vlan_rx_add_vid = mlx5e_vlan_rx_add_vid, + .ndo_vlan_rx_kill_vid = mlx5e_vlan_rx_kill_vid, + .ndo_set_features = mlx5e_set_features, + .ndo_fix_features = mlx5e_fix_features, + .ndo_change_mtu = mlx5e_change_nic_mtu, + .ndo_eth_ioctl = mlx5e_ioctl, + .ndo_set_tx_maxrate = mlx5e_set_tx_maxrate, + .ndo_features_check = mlx5e_features_check, + .ndo_tx_timeout = mlx5e_tx_timeout, + .ndo_bpf = mlx5e_xdp, + .ndo_xdp_xmit = mlx5e_xdp_xmit, + .ndo_xsk_wakeup = mlx5e_xsk_wakeup, +#ifdef CONFIG_MLX5_EN_ARFS + .ndo_rx_flow_steer = mlx5e_rx_flow_steer, +#endif +#ifdef CONFIG_MLX5_ESWITCH + .ndo_bridge_setlink = mlx5e_bridge_setlink, + .ndo_bridge_getlink = mlx5e_bridge_getlink, + + /* SRIOV E-Switch NDOs */ + .ndo_set_vf_mac = mlx5e_set_vf_mac, + .ndo_set_vf_vlan = mlx5e_set_vf_vlan, + .ndo_set_vf_spoofchk = mlx5e_set_vf_spoofchk, + .ndo_set_vf_trust = mlx5e_set_vf_trust, + .ndo_set_vf_rate = mlx5e_set_vf_rate, + .ndo_get_vf_config = mlx5e_get_vf_config, + .ndo_set_vf_link_state = mlx5e_set_vf_link_state, + .ndo_get_vf_stats = mlx5e_get_vf_stats, + .ndo_has_offload_stats = mlx5e_has_offload_stats, + .ndo_get_offload_stats = mlx5e_get_offload_stats, +#endif + .ndo_get_devlink_port = mlx5e_get_devlink_port, +}; + +static u32 mlx5e_choose_lro_timeout(struct mlx5_core_dev *mdev, u32 wanted_timeout) +{ + int i; + + /* The supported periods are organized in ascending order */ + for (i = 0; i < MLX5E_LRO_TIMEOUT_ARR_SIZE - 1; i++) + if (MLX5_CAP_ETH(mdev, lro_timer_supported_periods[i]) >= wanted_timeout) + break; + + return MLX5_CAP_ETH(mdev, lro_timer_supported_periods[i]); +} + +void mlx5e_build_nic_params(struct mlx5e_priv *priv, struct mlx5e_xsk *xsk, u16 mtu) +{ + struct mlx5e_params *params = &priv->channels.params; + struct mlx5_core_dev *mdev = priv->mdev; + u8 rx_cq_period_mode; + + params->sw_mtu = mtu; + params->hard_mtu = MLX5E_ETH_HARD_MTU; + params->num_channels = min_t(unsigned int, MLX5E_MAX_NUM_CHANNELS / 2, + priv->max_nch); + mlx5e_params_mqprio_reset(params); + + /* SQ */ + params->log_sq_size = is_kdump_kernel() ? + MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE : + MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE; + MLX5E_SET_PFLAG(params, MLX5E_PFLAG_SKB_TX_MPWQE, mlx5e_tx_mpwqe_supported(mdev)); + + /* XDP SQ */ + MLX5E_SET_PFLAG(params, MLX5E_PFLAG_XDP_TX_MPWQE, mlx5e_tx_mpwqe_supported(mdev)); + + /* set CQE compression */ + params->rx_cqe_compress_def = false; + if (MLX5_CAP_GEN(mdev, cqe_compression) && + MLX5_CAP_GEN(mdev, vport_group_manager)) + params->rx_cqe_compress_def = slow_pci_heuristic(mdev); + + MLX5E_SET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS, params->rx_cqe_compress_def); + MLX5E_SET_PFLAG(params, MLX5E_PFLAG_RX_NO_CSUM_COMPLETE, false); + + /* RQ */ + mlx5e_build_rq_params(mdev, params); + + params->packet_merge.timeout = mlx5e_choose_lro_timeout(mdev, MLX5E_DEFAULT_LRO_TIMEOUT); + + /* CQ moderation params */ + rx_cq_period_mode = MLX5_CAP_GEN(mdev, cq_period_start_from_cqe) ? + MLX5_CQ_PERIOD_MODE_START_FROM_CQE : + MLX5_CQ_PERIOD_MODE_START_FROM_EQE; + params->rx_dim_enabled = MLX5_CAP_GEN(mdev, cq_moderation); + params->tx_dim_enabled = MLX5_CAP_GEN(mdev, cq_moderation); + mlx5e_set_rx_cq_mode_params(params, rx_cq_period_mode); + mlx5e_set_tx_cq_mode_params(params, MLX5_CQ_PERIOD_MODE_START_FROM_EQE); + + /* TX inline */ + mlx5_query_min_inline(mdev, ¶ms->tx_min_inline_mode); + + /* AF_XDP */ + params->xsk = xsk; + + /* Do not update netdev->features directly in here + * on mlx5e_attach_netdev() we will call mlx5e_update_features() + * To update netdev->features please modify mlx5e_fix_features() + */ +} + +static void mlx5e_set_netdev_dev_addr(struct net_device *netdev) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + u8 addr[ETH_ALEN]; + + mlx5_query_mac_address(priv->mdev, addr); + if (is_zero_ether_addr(addr) && + !MLX5_CAP_GEN(priv->mdev, vport_group_manager)) { + eth_hw_addr_random(netdev); + mlx5_core_info(priv->mdev, "Assigned random MAC address %pM\n", netdev->dev_addr); + return; + } + + eth_hw_addr_set(netdev, addr); +} + +static int mlx5e_vxlan_set_port(struct net_device *netdev, unsigned int table, + unsigned int entry, struct udp_tunnel_info *ti) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + + return mlx5_vxlan_add_port(priv->mdev->vxlan, ntohs(ti->port)); +} + +static int mlx5e_vxlan_unset_port(struct net_device *netdev, unsigned int table, + unsigned int entry, struct udp_tunnel_info *ti) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + + return mlx5_vxlan_del_port(priv->mdev->vxlan, ntohs(ti->port)); +} + +void mlx5e_vxlan_set_netdev_info(struct mlx5e_priv *priv) +{ + if (!mlx5_vxlan_allowed(priv->mdev->vxlan)) + return; + + priv->nic_info.set_port = mlx5e_vxlan_set_port; + priv->nic_info.unset_port = mlx5e_vxlan_unset_port; + priv->nic_info.flags = UDP_TUNNEL_NIC_INFO_MAY_SLEEP | + UDP_TUNNEL_NIC_INFO_STATIC_IANA_VXLAN; + priv->nic_info.tables[0].tunnel_types = UDP_TUNNEL_TYPE_VXLAN; + /* Don't count the space hard-coded to the IANA port */ + priv->nic_info.tables[0].n_entries = + mlx5_vxlan_max_udp_ports(priv->mdev) - 1; + + priv->netdev->udp_tunnel_nic_info = &priv->nic_info; +} + +static bool mlx5e_tunnel_any_tx_proto_supported(struct mlx5_core_dev *mdev) +{ + int tt; + + for (tt = 0; tt < MLX5_NUM_TUNNEL_TT; tt++) { + if (mlx5e_tunnel_proto_supported_tx(mdev, mlx5_get_proto_by_tunnel_type(tt))) + return true; + } + return (mlx5_vxlan_allowed(mdev->vxlan) || mlx5_geneve_tx_allowed(mdev)); +} + +static void mlx5e_build_nic_netdev(struct net_device *netdev) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5_core_dev *mdev = priv->mdev; + bool fcs_supported; + bool fcs_enabled; + + SET_NETDEV_DEV(netdev, mdev->device); + + netdev->netdev_ops = &mlx5e_netdev_ops; + + mlx5e_dcbnl_build_netdev(netdev); + + netdev->watchdog_timeo = 15 * HZ; + + netdev->ethtool_ops = &mlx5e_ethtool_ops; + + netdev->vlan_features |= NETIF_F_SG; + netdev->vlan_features |= NETIF_F_HW_CSUM; + netdev->vlan_features |= NETIF_F_GRO; + netdev->vlan_features |= NETIF_F_TSO; + netdev->vlan_features |= NETIF_F_TSO6; + netdev->vlan_features |= NETIF_F_RXCSUM; + netdev->vlan_features |= NETIF_F_RXHASH; + netdev->vlan_features |= NETIF_F_GSO_PARTIAL; + + netdev->mpls_features |= NETIF_F_SG; + netdev->mpls_features |= NETIF_F_HW_CSUM; + netdev->mpls_features |= NETIF_F_TSO; + netdev->mpls_features |= NETIF_F_TSO6; + + netdev->hw_enc_features |= NETIF_F_HW_VLAN_CTAG_TX; + netdev->hw_enc_features |= NETIF_F_HW_VLAN_CTAG_RX; + + /* Tunneled LRO is not supported in the driver, and the same RQs are + * shared between inner and outer TIRs, so the driver can't disable LRO + * for inner TIRs while having it enabled for outer TIRs. Due to this, + * block LRO altogether if the firmware declares tunneled LRO support. + */ + if (!!MLX5_CAP_ETH(mdev, lro_cap) && + !MLX5_CAP_ETH(mdev, tunnel_lro_vxlan) && + !MLX5_CAP_ETH(mdev, tunnel_lro_gre) && + mlx5e_check_fragmented_striding_rq_cap(mdev, PAGE_SHIFT, + MLX5E_MPWRQ_UMR_MODE_ALIGNED)) + netdev->vlan_features |= NETIF_F_LRO; + + netdev->hw_features = netdev->vlan_features; + netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_TX; + netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_RX; + netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_FILTER; + netdev->hw_features |= NETIF_F_HW_VLAN_STAG_TX; + + if (mlx5e_tunnel_any_tx_proto_supported(mdev)) { + netdev->hw_enc_features |= NETIF_F_HW_CSUM; + netdev->hw_enc_features |= NETIF_F_TSO; + netdev->hw_enc_features |= NETIF_F_TSO6; + netdev->hw_enc_features |= NETIF_F_GSO_PARTIAL; + } + + if (mlx5_vxlan_allowed(mdev->vxlan) || mlx5_geneve_tx_allowed(mdev)) { + netdev->hw_features |= NETIF_F_GSO_UDP_TUNNEL | + NETIF_F_GSO_UDP_TUNNEL_CSUM; + netdev->hw_enc_features |= NETIF_F_GSO_UDP_TUNNEL | + NETIF_F_GSO_UDP_TUNNEL_CSUM; + netdev->gso_partial_features = NETIF_F_GSO_UDP_TUNNEL_CSUM; + netdev->vlan_features |= NETIF_F_GSO_UDP_TUNNEL | + NETIF_F_GSO_UDP_TUNNEL_CSUM; + } + + if (mlx5e_tunnel_proto_supported_tx(mdev, IPPROTO_GRE)) { + netdev->hw_features |= NETIF_F_GSO_GRE | + NETIF_F_GSO_GRE_CSUM; + netdev->hw_enc_features |= NETIF_F_GSO_GRE | + NETIF_F_GSO_GRE_CSUM; + netdev->gso_partial_features |= NETIF_F_GSO_GRE | + NETIF_F_GSO_GRE_CSUM; + } + + if (mlx5e_tunnel_proto_supported_tx(mdev, IPPROTO_IPIP)) { + netdev->hw_features |= NETIF_F_GSO_IPXIP4 | + NETIF_F_GSO_IPXIP6; + netdev->hw_enc_features |= NETIF_F_GSO_IPXIP4 | + NETIF_F_GSO_IPXIP6; + netdev->gso_partial_features |= NETIF_F_GSO_IPXIP4 | + NETIF_F_GSO_IPXIP6; + } + + netdev->gso_partial_features |= NETIF_F_GSO_UDP_L4; + netdev->hw_features |= NETIF_F_GSO_UDP_L4; + netdev->features |= NETIF_F_GSO_UDP_L4; + + mlx5_query_port_fcs(mdev, &fcs_supported, &fcs_enabled); + + if (fcs_supported) + netdev->hw_features |= NETIF_F_RXALL; + + if (MLX5_CAP_ETH(mdev, scatter_fcs)) + netdev->hw_features |= NETIF_F_RXFCS; + + if (mlx5_qos_is_supported(mdev)) + netdev->hw_features |= NETIF_F_HW_TC; + + netdev->features = netdev->hw_features; + + /* Defaults */ + if (fcs_enabled) + netdev->features &= ~NETIF_F_RXALL; + netdev->features &= ~NETIF_F_LRO; + netdev->features &= ~NETIF_F_GRO_HW; + netdev->features &= ~NETIF_F_RXFCS; + +#define FT_CAP(f) MLX5_CAP_FLOWTABLE(mdev, flow_table_properties_nic_receive.f) + if (FT_CAP(flow_modify_en) && + FT_CAP(modify_root) && + FT_CAP(identified_miss_table_mode) && + FT_CAP(flow_table_modify)) { +#if IS_ENABLED(CONFIG_MLX5_CLS_ACT) + netdev->hw_features |= NETIF_F_HW_TC; +#endif +#ifdef CONFIG_MLX5_EN_ARFS + netdev->hw_features |= NETIF_F_NTUPLE; +#endif + } + + netdev->features |= NETIF_F_HIGHDMA; + netdev->features |= NETIF_F_HW_VLAN_STAG_FILTER; + + netdev->priv_flags |= IFF_UNICAST_FLT; + + netif_set_tso_max_size(netdev, GSO_MAX_SIZE); + mlx5e_set_netdev_dev_addr(netdev); + mlx5e_macsec_build_netdev(priv); + mlx5e_ipsec_build_netdev(priv); + mlx5e_ktls_build_netdev(priv); +} + +void mlx5e_create_q_counters(struct mlx5e_priv *priv) +{ + u32 out[MLX5_ST_SZ_DW(alloc_q_counter_out)] = {}; + u32 in[MLX5_ST_SZ_DW(alloc_q_counter_in)] = {}; + struct mlx5_core_dev *mdev = priv->mdev; + int err; + + MLX5_SET(alloc_q_counter_in, in, opcode, MLX5_CMD_OP_ALLOC_Q_COUNTER); + err = mlx5_cmd_exec_inout(mdev, alloc_q_counter, in, out); + if (!err) + priv->q_counter = + MLX5_GET(alloc_q_counter_out, out, counter_set_id); + + err = mlx5_cmd_exec_inout(mdev, alloc_q_counter, in, out); + if (!err) + priv->drop_rq_q_counter = + MLX5_GET(alloc_q_counter_out, out, counter_set_id); +} + +void mlx5e_destroy_q_counters(struct mlx5e_priv *priv) +{ + u32 in[MLX5_ST_SZ_DW(dealloc_q_counter_in)] = {}; + + MLX5_SET(dealloc_q_counter_in, in, opcode, + MLX5_CMD_OP_DEALLOC_Q_COUNTER); + if (priv->q_counter) { + MLX5_SET(dealloc_q_counter_in, in, counter_set_id, + priv->q_counter); + mlx5_cmd_exec_in(priv->mdev, dealloc_q_counter, in); + } + + if (priv->drop_rq_q_counter) { + MLX5_SET(dealloc_q_counter_in, in, counter_set_id, + priv->drop_rq_q_counter); + mlx5_cmd_exec_in(priv->mdev, dealloc_q_counter, in); + } +} + +static int mlx5e_nic_init(struct mlx5_core_dev *mdev, + struct net_device *netdev) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5e_flow_steering *fs; + int err; + + mlx5e_build_nic_params(priv, &priv->xsk, netdev->mtu); + mlx5e_vxlan_set_netdev_info(priv); + + mlx5e_timestamp_init(priv); + + fs = mlx5e_fs_init(priv->profile, mdev, + !test_bit(MLX5E_STATE_DESTROYING, &priv->state)); + if (!fs) { + err = -ENOMEM; + mlx5_core_err(mdev, "FS initialization failed, %d\n", err); + return err; + } + priv->fs = fs; + + err = mlx5e_ipsec_init(priv); + if (err) + mlx5_core_err(mdev, "IPSec initialization failed, %d\n", err); + + err = mlx5e_ktls_init(priv); + if (err) + mlx5_core_err(mdev, "TLS initialization failed, %d\n", err); + + mlx5e_health_create_reporters(priv); + return 0; +} + +static void mlx5e_nic_cleanup(struct mlx5e_priv *priv) +{ + mlx5e_health_destroy_reporters(priv); + mlx5e_ktls_cleanup(priv); + mlx5e_ipsec_cleanup(priv); + mlx5e_fs_cleanup(priv->fs); + priv->fs = NULL; +} + +static int mlx5e_init_nic_rx(struct mlx5e_priv *priv) +{ + struct mlx5_core_dev *mdev = priv->mdev; + enum mlx5e_rx_res_features features; + int err; + + priv->rx_res = mlx5e_rx_res_alloc(); + if (!priv->rx_res) + return -ENOMEM; + + mlx5e_create_q_counters(priv); + + err = mlx5e_open_drop_rq(priv, &priv->drop_rq); + if (err) { + mlx5_core_err(mdev, "open drop rq failed, %d\n", err); + goto err_destroy_q_counters; + } + + features = MLX5E_RX_RES_FEATURE_PTP; + if (mlx5_tunnel_inner_ft_supported(mdev)) + features |= MLX5E_RX_RES_FEATURE_INNER_FT; + err = mlx5e_rx_res_init(priv->rx_res, priv->mdev, features, + priv->max_nch, priv->drop_rq.rqn, + &priv->channels.params.packet_merge, + priv->channels.params.num_channels); + if (err) + goto err_close_drop_rq; + + err = mlx5e_create_flow_steering(priv->fs, priv->rx_res, priv->profile, + priv->netdev); + if (err) { + mlx5_core_warn(mdev, "create flow steering failed, %d\n", err); + goto err_destroy_rx_res; + } + + err = mlx5e_tc_nic_init(priv); + if (err) + goto err_destroy_flow_steering; + + err = mlx5e_accel_init_rx(priv); + if (err) + goto err_tc_nic_cleanup; + +#ifdef CONFIG_MLX5_EN_ARFS + priv->netdev->rx_cpu_rmap = mlx5_eq_table_get_rmap(priv->mdev); +#endif + + return 0; + +err_tc_nic_cleanup: + mlx5e_tc_nic_cleanup(priv); +err_destroy_flow_steering: + mlx5e_destroy_flow_steering(priv->fs, !!(priv->netdev->hw_features & NETIF_F_NTUPLE), + priv->profile); +err_destroy_rx_res: + mlx5e_rx_res_destroy(priv->rx_res); +err_close_drop_rq: + mlx5e_close_drop_rq(&priv->drop_rq); +err_destroy_q_counters: + mlx5e_destroy_q_counters(priv); + mlx5e_rx_res_free(priv->rx_res); + priv->rx_res = NULL; + return err; +} + +static void mlx5e_cleanup_nic_rx(struct mlx5e_priv *priv) +{ + mlx5e_accel_cleanup_rx(priv); + mlx5e_tc_nic_cleanup(priv); + mlx5e_destroy_flow_steering(priv->fs, !!(priv->netdev->hw_features & NETIF_F_NTUPLE), + priv->profile); + mlx5e_rx_res_destroy(priv->rx_res); + mlx5e_close_drop_rq(&priv->drop_rq); + mlx5e_destroy_q_counters(priv); + mlx5e_rx_res_free(priv->rx_res); + priv->rx_res = NULL; +} + +static void mlx5e_set_mqprio_rl(struct mlx5e_priv *priv) +{ + struct mlx5e_params *params; + struct mlx5e_mqprio_rl *rl; + + params = &priv->channels.params; + if (params->mqprio.mode != TC_MQPRIO_MODE_CHANNEL) + return; + + rl = mlx5e_mqprio_rl_create(priv->mdev, params->mqprio.num_tc, + params->mqprio.channel.max_rate); + if (IS_ERR(rl)) + rl = NULL; + priv->mqprio_rl = rl; + mlx5e_mqprio_rl_update_params(params, rl); +} + +static int mlx5e_init_nic_tx(struct mlx5e_priv *priv) +{ + int err; + + err = mlx5e_create_tises(priv); + if (err) { + mlx5_core_warn(priv->mdev, "create tises failed, %d\n", err); + return err; + } + + err = mlx5e_accel_init_tx(priv); + if (err) + goto err_destroy_tises; + + mlx5e_set_mqprio_rl(priv); + mlx5e_dcbnl_initialize(priv); + return 0; + +err_destroy_tises: + mlx5e_destroy_tises(priv); + return err; +} + +static void mlx5e_nic_enable(struct mlx5e_priv *priv) +{ + struct net_device *netdev = priv->netdev; + struct mlx5_core_dev *mdev = priv->mdev; + int err; + + mlx5e_fs_init_l2_addr(priv->fs, netdev); + + err = mlx5e_macsec_init(priv); + if (err) + mlx5_core_err(mdev, "MACsec initialization failed, %d\n", err); + + /* Marking the link as currently not needed by the Driver */ + if (!netif_running(netdev)) + mlx5e_modify_admin_state(mdev, MLX5_PORT_DOWN); + + mlx5e_set_netdev_mtu_boundaries(priv); + mlx5e_set_dev_port_mtu(priv); + + mlx5_lag_add_netdev(mdev, netdev); + + mlx5e_enable_async_events(priv); + mlx5e_enable_blocking_events(priv); + if (mlx5e_monitor_counter_supported(priv)) + mlx5e_monitor_counter_init(priv); + + mlx5e_hv_vhca_stats_create(priv); + if (netdev->reg_state != NETREG_REGISTERED) + return; + mlx5e_dcbnl_init_app(priv); + + mlx5e_nic_set_rx_mode(priv); + + rtnl_lock(); + if (netif_running(netdev)) + mlx5e_open(netdev); + udp_tunnel_nic_reset_ntf(priv->netdev); + netif_device_attach(netdev); + rtnl_unlock(); +} + +static void mlx5e_nic_disable(struct mlx5e_priv *priv) +{ + struct mlx5_core_dev *mdev = priv->mdev; + + if (priv->netdev->reg_state == NETREG_REGISTERED) + mlx5e_dcbnl_delete_app(priv); + + rtnl_lock(); + if (netif_running(priv->netdev)) + mlx5e_close(priv->netdev); + netif_device_detach(priv->netdev); + rtnl_unlock(); + + mlx5e_nic_set_rx_mode(priv); + + mlx5e_hv_vhca_stats_destroy(priv); + if (mlx5e_monitor_counter_supported(priv)) + mlx5e_monitor_counter_cleanup(priv); + + mlx5e_disable_blocking_events(priv); + if (priv->en_trap) { + mlx5e_deactivate_trap(priv); + mlx5e_close_trap(priv->en_trap); + priv->en_trap = NULL; + } + mlx5e_disable_async_events(priv); + mlx5_lag_remove_netdev(mdev, priv->netdev); + mlx5_vxlan_reset_to_default(mdev->vxlan); + mlx5e_macsec_cleanup(priv); +} + +int mlx5e_update_nic_rx(struct mlx5e_priv *priv) +{ + return mlx5e_refresh_tirs(priv, false, false); +} + +static const struct mlx5e_profile mlx5e_nic_profile = { + .init = mlx5e_nic_init, + .cleanup = mlx5e_nic_cleanup, + .init_rx = mlx5e_init_nic_rx, + .cleanup_rx = mlx5e_cleanup_nic_rx, + .init_tx = mlx5e_init_nic_tx, + .cleanup_tx = mlx5e_cleanup_nic_tx, + .enable = mlx5e_nic_enable, + .disable = mlx5e_nic_disable, + .update_rx = mlx5e_update_nic_rx, + .update_stats = mlx5e_stats_update_ndo_stats, + .update_carrier = mlx5e_update_carrier, + .rx_handlers = &mlx5e_rx_handlers_nic, + .max_tc = MLX5E_MAX_NUM_TC, + .stats_grps = mlx5e_nic_stats_grps, + .stats_grps_num = mlx5e_nic_stats_grps_num, + .features = BIT(MLX5E_PROFILE_FEATURE_PTP_RX) | + BIT(MLX5E_PROFILE_FEATURE_PTP_TX) | + BIT(MLX5E_PROFILE_FEATURE_QOS_HTB) | + BIT(MLX5E_PROFILE_FEATURE_FS_VLAN) | + BIT(MLX5E_PROFILE_FEATURE_FS_TC), +}; + +static int mlx5e_profile_max_num_channels(struct mlx5_core_dev *mdev, + const struct mlx5e_profile *profile) +{ + int nch; + + nch = mlx5e_get_max_num_channels(mdev); + + if (profile->max_nch_limit) + nch = min_t(int, nch, profile->max_nch_limit(mdev)); + return nch; +} + +static unsigned int +mlx5e_calc_max_nch(struct mlx5_core_dev *mdev, struct net_device *netdev, + const struct mlx5e_profile *profile) + +{ + unsigned int max_nch, tmp; + + /* core resources */ + max_nch = mlx5e_profile_max_num_channels(mdev, profile); + + /* netdev rx queues */ + max_nch = min_t(unsigned int, max_nch, netdev->num_rx_queues); + + /* netdev tx queues */ + tmp = netdev->num_tx_queues; + if (mlx5_qos_is_supported(mdev)) + tmp -= mlx5e_qos_max_leaf_nodes(mdev); + if (MLX5_CAP_GEN(mdev, ts_cqe_to_dest_cqn)) + tmp -= profile->max_tc; + tmp = tmp / profile->max_tc; + max_nch = min_t(unsigned int, max_nch, tmp); + + return max_nch; +} + +int mlx5e_get_pf_num_tirs(struct mlx5_core_dev *mdev) +{ + /* Indirect TIRS: 2 sets of TTCs (inner + outer steering) + * and 1 set of direct TIRS + */ + return 2 * MLX5E_NUM_INDIR_TIRS + + mlx5e_profile_max_num_channels(mdev, &mlx5e_nic_profile); +} + +void mlx5e_set_rx_mode_work(struct work_struct *work) +{ + struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv, + set_rx_mode_work); + + return mlx5e_fs_set_rx_mode_work(priv->fs, priv->netdev); +} + +/* mlx5e generic netdev management API (move to en_common.c) */ +int mlx5e_priv_init(struct mlx5e_priv *priv, + const struct mlx5e_profile *profile, + struct net_device *netdev, + struct mlx5_core_dev *mdev) +{ + int nch, num_txqs, node; + int err; + + num_txqs = netdev->num_tx_queues; + nch = mlx5e_calc_max_nch(mdev, netdev, profile); + node = dev_to_node(mlx5_core_dma_dev(mdev)); + + /* priv init */ + priv->mdev = mdev; + priv->netdev = netdev; + priv->msglevel = MLX5E_MSG_LEVEL; + priv->max_nch = nch; + priv->max_opened_tc = 1; + + if (!alloc_cpumask_var(&priv->scratchpad.cpumask, GFP_KERNEL)) + return -ENOMEM; + + mutex_init(&priv->state_lock); + + err = mlx5e_selq_init(&priv->selq, &priv->state_lock); + if (err) + goto err_free_cpumask; + + INIT_WORK(&priv->update_carrier_work, mlx5e_update_carrier_work); + INIT_WORK(&priv->set_rx_mode_work, mlx5e_set_rx_mode_work); + INIT_WORK(&priv->tx_timeout_work, mlx5e_tx_timeout_work); + INIT_WORK(&priv->update_stats_work, mlx5e_update_stats_work); + + priv->wq = create_singlethread_workqueue("mlx5e"); + if (!priv->wq) + goto err_free_selq; + + priv->txq2sq = kcalloc_node(num_txqs, sizeof(*priv->txq2sq), GFP_KERNEL, node); + if (!priv->txq2sq) + goto err_destroy_workqueue; + + priv->tx_rates = kcalloc_node(num_txqs, sizeof(*priv->tx_rates), GFP_KERNEL, node); + if (!priv->tx_rates) + goto err_free_txq2sq; + + priv->channel_stats = + kcalloc_node(nch, sizeof(*priv->channel_stats), GFP_KERNEL, node); + if (!priv->channel_stats) + goto err_free_tx_rates; + + return 0; + +err_free_tx_rates: + kfree(priv->tx_rates); +err_free_txq2sq: + kfree(priv->txq2sq); +err_destroy_workqueue: + destroy_workqueue(priv->wq); +err_free_selq: + mlx5e_selq_cleanup(&priv->selq); +err_free_cpumask: + free_cpumask_var(priv->scratchpad.cpumask); + return -ENOMEM; +} + +void mlx5e_priv_cleanup(struct mlx5e_priv *priv) +{ + int i; + + /* bail if change profile failed and also rollback failed */ + if (!priv->mdev) + return; + + for (i = 0; i < priv->stats_nch; i++) + kvfree(priv->channel_stats[i]); + kfree(priv->channel_stats); + kfree(priv->tx_rates); + kfree(priv->txq2sq); + destroy_workqueue(priv->wq); + mutex_lock(&priv->state_lock); + mlx5e_selq_cleanup(&priv->selq); + mutex_unlock(&priv->state_lock); + free_cpumask_var(priv->scratchpad.cpumask); + + for (i = 0; i < priv->htb_max_qos_sqs; i++) + kfree(priv->htb_qos_sq_stats[i]); + kvfree(priv->htb_qos_sq_stats); + + memset(priv, 0, sizeof(*priv)); +} + +static unsigned int mlx5e_get_max_num_txqs(struct mlx5_core_dev *mdev, + const struct mlx5e_profile *profile) +{ + unsigned int nch, ptp_txqs, qos_txqs; + + nch = mlx5e_profile_max_num_channels(mdev, profile); + + ptp_txqs = MLX5_CAP_GEN(mdev, ts_cqe_to_dest_cqn) && + mlx5e_profile_feature_cap(profile, PTP_TX) ? + profile->max_tc : 0; + + qos_txqs = mlx5_qos_is_supported(mdev) && + mlx5e_profile_feature_cap(profile, QOS_HTB) ? + mlx5e_qos_max_leaf_nodes(mdev) : 0; + + return nch * profile->max_tc + ptp_txqs + qos_txqs; +} + +static unsigned int mlx5e_get_max_num_rxqs(struct mlx5_core_dev *mdev, + const struct mlx5e_profile *profile) +{ + return mlx5e_profile_max_num_channels(mdev, profile); +} + +struct net_device * +mlx5e_create_netdev(struct mlx5_core_dev *mdev, const struct mlx5e_profile *profile) +{ + struct net_device *netdev; + unsigned int txqs, rxqs; + int err; + + txqs = mlx5e_get_max_num_txqs(mdev, profile); + rxqs = mlx5e_get_max_num_rxqs(mdev, profile); + + netdev = alloc_etherdev_mqs(sizeof(struct mlx5e_priv), txqs, rxqs); + if (!netdev) { + mlx5_core_err(mdev, "alloc_etherdev_mqs() failed\n"); + return NULL; + } + + err = mlx5e_priv_init(netdev_priv(netdev), profile, netdev, mdev); + if (err) { + mlx5_core_err(mdev, "mlx5e_priv_init failed, err=%d\n", err); + goto err_free_netdev; + } + + netif_carrier_off(netdev); + netif_tx_disable(netdev); + dev_net_set(netdev, mlx5_core_net(mdev)); + + return netdev; + +err_free_netdev: + free_netdev(netdev); + + return NULL; +} + +static void mlx5e_update_features(struct net_device *netdev) +{ + if (netdev->reg_state != NETREG_REGISTERED) + return; /* features will be updated on netdev registration */ + + rtnl_lock(); + netdev_update_features(netdev); + rtnl_unlock(); +} + +static void mlx5e_reset_channels(struct net_device *netdev) +{ + netdev_reset_tc(netdev); +} + +int mlx5e_attach_netdev(struct mlx5e_priv *priv) +{ + const bool take_rtnl = priv->netdev->reg_state == NETREG_REGISTERED; + const struct mlx5e_profile *profile = priv->profile; + int max_nch; + int err; + + clear_bit(MLX5E_STATE_DESTROYING, &priv->state); + if (priv->fs) + mlx5e_fs_set_state_destroy(priv->fs, + !test_bit(MLX5E_STATE_DESTROYING, &priv->state)); + + /* Validate the max_wqe_size_sq capability. */ + if (WARN_ON_ONCE(mlx5e_get_max_sq_wqebbs(priv->mdev) < MLX5E_MAX_TX_WQEBBS)) { + mlx5_core_warn(priv->mdev, "MLX5E: Max SQ WQEBBs firmware capability: %u, needed %lu\n", + mlx5e_get_max_sq_wqebbs(priv->mdev), MLX5E_MAX_TX_WQEBBS); + return -EIO; + } + + /* max number of channels may have changed */ + max_nch = mlx5e_calc_max_nch(priv->mdev, priv->netdev, profile); + if (priv->channels.params.num_channels > max_nch) { + mlx5_core_warn(priv->mdev, "MLX5E: Reducing number of channels to %d\n", max_nch); + /* Reducing the number of channels - RXFH has to be reset, and + * mlx5e_num_channels_changed below will build the RQT. + */ + priv->netdev->priv_flags &= ~IFF_RXFH_CONFIGURED; + priv->channels.params.num_channels = max_nch; + if (priv->channels.params.mqprio.mode == TC_MQPRIO_MODE_CHANNEL) { + mlx5_core_warn(priv->mdev, "MLX5E: Disabling MQPRIO channel mode\n"); + mlx5e_params_mqprio_reset(&priv->channels.params); + } + } + if (max_nch != priv->max_nch) { + mlx5_core_warn(priv->mdev, + "MLX5E: Updating max number of channels from %u to %u\n", + priv->max_nch, max_nch); + priv->max_nch = max_nch; + } + + /* 1. Set the real number of queues in the kernel the first time. + * 2. Set our default XPS cpumask. + * 3. Build the RQT. + * + * rtnl_lock is required by netif_set_real_num_*_queues in case the + * netdev has been registered by this point (if this function was called + * in the reload or resume flow). + */ + if (take_rtnl) + rtnl_lock(); + err = mlx5e_num_channels_changed(priv); + if (take_rtnl) + rtnl_unlock(); + if (err) + goto out; + + err = profile->init_tx(priv); + if (err) + goto out; + + err = profile->init_rx(priv); + if (err) + goto err_cleanup_tx; + + if (profile->enable) + profile->enable(priv); + + mlx5e_update_features(priv->netdev); + + return 0; + +err_cleanup_tx: + profile->cleanup_tx(priv); + +out: + mlx5e_reset_channels(priv->netdev); + set_bit(MLX5E_STATE_DESTROYING, &priv->state); + if (priv->fs) + mlx5e_fs_set_state_destroy(priv->fs, + !test_bit(MLX5E_STATE_DESTROYING, &priv->state)); + cancel_work_sync(&priv->update_stats_work); + return err; +} + +void mlx5e_detach_netdev(struct mlx5e_priv *priv) +{ + const struct mlx5e_profile *profile = priv->profile; + + set_bit(MLX5E_STATE_DESTROYING, &priv->state); + if (priv->fs) + mlx5e_fs_set_state_destroy(priv->fs, + !test_bit(MLX5E_STATE_DESTROYING, &priv->state)); + + if (profile->disable) + profile->disable(priv); + flush_workqueue(priv->wq); + + profile->cleanup_rx(priv); + profile->cleanup_tx(priv); + mlx5e_reset_channels(priv->netdev); + cancel_work_sync(&priv->update_stats_work); +} + +static int +mlx5e_netdev_init_profile(struct net_device *netdev, struct mlx5_core_dev *mdev, + const struct mlx5e_profile *new_profile, void *new_ppriv) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + int err; + + err = mlx5e_priv_init(priv, new_profile, netdev, mdev); + if (err) { + mlx5_core_err(mdev, "mlx5e_priv_init failed, err=%d\n", err); + return err; + } + netif_carrier_off(netdev); + priv->profile = new_profile; + priv->ppriv = new_ppriv; + err = new_profile->init(priv->mdev, priv->netdev); + if (err) + goto priv_cleanup; + + return 0; + +priv_cleanup: + mlx5e_priv_cleanup(priv); + return err; +} + +static int +mlx5e_netdev_attach_profile(struct net_device *netdev, struct mlx5_core_dev *mdev, + const struct mlx5e_profile *new_profile, void *new_ppriv) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + int err; + + err = mlx5e_netdev_init_profile(netdev, mdev, new_profile, new_ppriv); + if (err) + return err; + + err = mlx5e_attach_netdev(priv); + if (err) + goto profile_cleanup; + return err; + +profile_cleanup: + new_profile->cleanup(priv); + mlx5e_priv_cleanup(priv); + return err; +} + +int mlx5e_netdev_change_profile(struct mlx5e_priv *priv, + const struct mlx5e_profile *new_profile, void *new_ppriv) +{ + const struct mlx5e_profile *orig_profile = priv->profile; + struct net_device *netdev = priv->netdev; + struct mlx5_core_dev *mdev = priv->mdev; + void *orig_ppriv = priv->ppriv; + int err, rollback_err; + + /* cleanup old profile */ + mlx5e_detach_netdev(priv); + priv->profile->cleanup(priv); + mlx5e_priv_cleanup(priv); + + if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) { + mlx5e_netdev_init_profile(netdev, mdev, new_profile, new_ppriv); + set_bit(MLX5E_STATE_DESTROYING, &priv->state); + return -EIO; + } + + err = mlx5e_netdev_attach_profile(netdev, mdev, new_profile, new_ppriv); + if (err) { /* roll back to original profile */ + netdev_warn(netdev, "%s: new profile init failed, %d\n", __func__, err); + goto rollback; + } + + return 0; + +rollback: + rollback_err = mlx5e_netdev_attach_profile(netdev, mdev, orig_profile, orig_ppriv); + if (rollback_err) + netdev_err(netdev, "%s: failed to rollback to orig profile, %d\n", + __func__, rollback_err); + return err; +} + +void mlx5e_netdev_attach_nic_profile(struct mlx5e_priv *priv) +{ + mlx5e_netdev_change_profile(priv, &mlx5e_nic_profile, NULL); +} + +void mlx5e_destroy_netdev(struct mlx5e_priv *priv) +{ + struct net_device *netdev = priv->netdev; + + mlx5e_priv_cleanup(priv); + free_netdev(netdev); +} + +static int mlx5e_resume(struct auxiliary_device *adev) +{ + struct mlx5_adev *edev = container_of(adev, struct mlx5_adev, adev); + struct mlx5e_priv *priv = auxiliary_get_drvdata(adev); + struct net_device *netdev = priv->netdev; + struct mlx5_core_dev *mdev = edev->mdev; + int err; + + if (netif_device_present(netdev)) + return 0; + + err = mlx5e_create_mdev_resources(mdev); + if (err) + return err; + + err = mlx5e_attach_netdev(priv); + if (err) { + mlx5e_destroy_mdev_resources(mdev); + return err; + } + + return 0; +} + +static int mlx5e_suspend(struct auxiliary_device *adev, pm_message_t state) +{ + struct mlx5e_priv *priv = auxiliary_get_drvdata(adev); + struct net_device *netdev = priv->netdev; + struct mlx5_core_dev *mdev = priv->mdev; + + if (!netif_device_present(netdev)) { + if (test_bit(MLX5E_STATE_DESTROYING, &priv->state)) + mlx5e_destroy_mdev_resources(mdev); + return -ENODEV; + } + + mlx5e_detach_netdev(priv); + mlx5e_destroy_mdev_resources(mdev); + return 0; +} + +static int mlx5e_probe(struct auxiliary_device *adev, + const struct auxiliary_device_id *id) +{ + struct mlx5_adev *edev = container_of(adev, struct mlx5_adev, adev); + const struct mlx5e_profile *profile = &mlx5e_nic_profile; + struct mlx5_core_dev *mdev = edev->mdev; + struct net_device *netdev; + pm_message_t state = {}; + struct mlx5e_priv *priv; + int err; + + netdev = mlx5e_create_netdev(mdev, profile); + if (!netdev) { + mlx5_core_err(mdev, "mlx5e_create_netdev failed\n"); + return -ENOMEM; + } + + mlx5e_build_nic_netdev(netdev); + + priv = netdev_priv(netdev); + auxiliary_set_drvdata(adev, priv); + + priv->profile = profile; + priv->ppriv = NULL; + + err = mlx5e_devlink_port_register(priv); + if (err) { + mlx5_core_err(mdev, "mlx5e_devlink_port_register failed, %d\n", err); + goto err_destroy_netdev; + } + + err = profile->init(mdev, netdev); + if (err) { + mlx5_core_err(mdev, "mlx5e_nic_profile init failed, %d\n", err); + goto err_devlink_cleanup; + } + + err = mlx5e_resume(adev); + if (err) { + mlx5_core_err(mdev, "mlx5e_resume failed, %d\n", err); + goto err_profile_cleanup; + } + + err = register_netdev(netdev); + if (err) { + mlx5_core_err(mdev, "register_netdev failed, %d\n", err); + goto err_resume; + } + + mlx5e_devlink_port_type_eth_set(priv); + + mlx5e_dcbnl_init_app(priv); + mlx5_uplink_netdev_set(mdev, netdev); + return 0; + +err_resume: + mlx5e_suspend(adev, state); +err_profile_cleanup: + profile->cleanup(priv); +err_devlink_cleanup: + mlx5e_devlink_port_unregister(priv); +err_destroy_netdev: + mlx5e_destroy_netdev(priv); + return err; +} + +static void mlx5e_remove(struct auxiliary_device *adev) +{ + struct mlx5e_priv *priv = auxiliary_get_drvdata(adev); + pm_message_t state = {}; + + mlx5e_dcbnl_delete_app(priv); + unregister_netdev(priv->netdev); + mlx5e_suspend(adev, state); + priv->profile->cleanup(priv); + mlx5e_devlink_port_unregister(priv); + mlx5e_destroy_netdev(priv); +} + +static const struct auxiliary_device_id mlx5e_id_table[] = { + { .name = MLX5_ADEV_NAME ".eth", }, + {}, +}; + +MODULE_DEVICE_TABLE(auxiliary, mlx5e_id_table); + +static struct auxiliary_driver mlx5e_driver = { + .name = "eth", + .probe = mlx5e_probe, + .remove = mlx5e_remove, + .suspend = mlx5e_suspend, + .resume = mlx5e_resume, + .id_table = mlx5e_id_table, +}; + +int mlx5e_init(void) +{ + int ret; + + mlx5e_build_ptys2ethtool_map(); + ret = auxiliary_driver_register(&mlx5e_driver); + if (ret) + return ret; + + ret = mlx5e_rep_init(); + if (ret) + auxiliary_driver_unregister(&mlx5e_driver); + return ret; +} + +void mlx5e_cleanup(void) +{ + mlx5e_rep_cleanup(); + auxiliary_driver_unregister(&mlx5e_driver); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c new file mode 100644 index 000000000..5aeca9534 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -0,0 +1,1521 @@ +/* + * Copyright (c) 2016, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/mlx5/fs.h> +#include <net/switchdev.h> +#include <net/pkt_cls.h> +#include <net/act_api.h> +#include <net/devlink.h> +#include <net/ipv6_stubs.h> + +#include "eswitch.h" +#include "en.h" +#include "en_rep.h" +#include "en/params.h" +#include "en/txrx.h" +#include "en_tc.h" +#include "en/rep/tc.h" +#include "en/rep/neigh.h" +#include "en/rep/bridge.h" +#include "en/devlink.h" +#include "fs_core.h" +#include "lib/mlx5.h" +#include "lib/devcom.h" +#include "lib/vxlan.h" +#define CREATE_TRACE_POINTS +#include "diag/en_rep_tracepoint.h" +#include "en_accel/ipsec.h" +#include "en/tc/int_port.h" +#include "en/ptp.h" +#include "en/fs_ethtool.h" + +#define MLX5E_REP_PARAMS_DEF_LOG_SQ_SIZE \ + max(0x7, MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE) +#define MLX5E_REP_PARAMS_DEF_NUM_CHANNELS 1 + +static const char mlx5e_rep_driver_name[] = "mlx5e_rep"; + +static void mlx5e_rep_get_drvinfo(struct net_device *dev, + struct ethtool_drvinfo *drvinfo) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5_core_dev *mdev = priv->mdev; + int count; + + strscpy(drvinfo->driver, mlx5e_rep_driver_name, + sizeof(drvinfo->driver)); + count = snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version), + "%d.%d.%04d (%.16s)", fw_rev_maj(mdev), + fw_rev_min(mdev), fw_rev_sub(mdev), mdev->board_id); + if (count >= sizeof(drvinfo->fw_version)) + snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version), + "%d.%d.%04d", fw_rev_maj(mdev), + fw_rev_min(mdev), fw_rev_sub(mdev)); +} + +static const struct counter_desc sw_rep_stats_desc[] = { + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_bytes) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_bytes) }, +}; + +struct vport_stats { + u64 vport_rx_packets; + u64 vport_tx_packets; + u64 vport_rx_bytes; + u64 vport_tx_bytes; +}; + +static const struct counter_desc vport_rep_stats_desc[] = { + { MLX5E_DECLARE_STAT(struct vport_stats, vport_rx_packets) }, + { MLX5E_DECLARE_STAT(struct vport_stats, vport_rx_bytes) }, + { MLX5E_DECLARE_STAT(struct vport_stats, vport_tx_packets) }, + { MLX5E_DECLARE_STAT(struct vport_stats, vport_tx_bytes) }, +}; + +#define NUM_VPORT_REP_SW_COUNTERS ARRAY_SIZE(sw_rep_stats_desc) +#define NUM_VPORT_REP_HW_COUNTERS ARRAY_SIZE(vport_rep_stats_desc) + +static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(sw_rep) +{ + return NUM_VPORT_REP_SW_COUNTERS; +} + +static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(sw_rep) +{ + int i; + + for (i = 0; i < NUM_VPORT_REP_SW_COUNTERS; i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, + sw_rep_stats_desc[i].format); + return idx; +} + +static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(sw_rep) +{ + int i; + + for (i = 0; i < NUM_VPORT_REP_SW_COUNTERS; i++) + data[idx++] = MLX5E_READ_CTR64_CPU(&priv->stats.sw, + sw_rep_stats_desc, i); + return idx; +} + +static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(sw_rep) +{ + struct mlx5e_sw_stats *s = &priv->stats.sw; + struct rtnl_link_stats64 stats64 = {}; + + memset(s, 0, sizeof(*s)); + mlx5e_fold_sw_stats64(priv, &stats64); + + s->rx_packets = stats64.rx_packets; + s->rx_bytes = stats64.rx_bytes; + s->tx_packets = stats64.tx_packets; + s->tx_bytes = stats64.tx_bytes; + s->tx_queue_dropped = stats64.tx_dropped; +} + +static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(vport_rep) +{ + return NUM_VPORT_REP_HW_COUNTERS; +} + +static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(vport_rep) +{ + int i; + + for (i = 0; i < NUM_VPORT_REP_HW_COUNTERS; i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, vport_rep_stats_desc[i].format); + return idx; +} + +static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(vport_rep) +{ + int i; + + for (i = 0; i < NUM_VPORT_REP_HW_COUNTERS; i++) + data[idx++] = MLX5E_READ_CTR64_CPU(&priv->stats.vf_vport, + vport_rep_stats_desc, i); + return idx; +} + +static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(vport_rep) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5_eswitch_rep *rep = rpriv->rep; + struct rtnl_link_stats64 *vport_stats; + struct ifla_vf_stats vf_stats; + int err; + + err = mlx5_eswitch_get_vport_stats(esw, rep->vport, &vf_stats); + if (err) { + netdev_warn(priv->netdev, "vport %d error %d reading stats\n", + rep->vport, err); + return; + } + + vport_stats = &priv->stats.vf_vport; + /* flip tx/rx as we are reporting the counters for the switch vport */ + vport_stats->rx_packets = vf_stats.tx_packets; + vport_stats->rx_bytes = vf_stats.tx_bytes; + vport_stats->tx_packets = vf_stats.rx_packets; + vport_stats->tx_bytes = vf_stats.rx_bytes; +} + +static void mlx5e_rep_get_strings(struct net_device *dev, + u32 stringset, uint8_t *data) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + + switch (stringset) { + case ETH_SS_STATS: + mlx5e_stats_fill_strings(priv, data); + break; + } +} + +static void mlx5e_rep_get_ethtool_stats(struct net_device *dev, + struct ethtool_stats *stats, u64 *data) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + + mlx5e_ethtool_get_ethtool_stats(priv, stats, data); +} + +static int mlx5e_rep_get_sset_count(struct net_device *dev, int sset) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + + switch (sset) { + case ETH_SS_STATS: + return mlx5e_stats_total_num(priv); + default: + return -EOPNOTSUPP; + } +} + +static void +mlx5e_rep_get_ringparam(struct net_device *dev, + struct ethtool_ringparam *param, + struct kernel_ethtool_ringparam *kernel_param, + struct netlink_ext_ack *extack) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + + mlx5e_ethtool_get_ringparam(priv, param, kernel_param); +} + +static int +mlx5e_rep_set_ringparam(struct net_device *dev, + struct ethtool_ringparam *param, + struct kernel_ethtool_ringparam *kernel_param, + struct netlink_ext_ack *extack) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + + return mlx5e_ethtool_set_ringparam(priv, param); +} + +static void mlx5e_rep_get_channels(struct net_device *dev, + struct ethtool_channels *ch) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + + mlx5e_ethtool_get_channels(priv, ch); +} + +static int mlx5e_rep_set_channels(struct net_device *dev, + struct ethtool_channels *ch) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + + return mlx5e_ethtool_set_channels(priv, ch); +} + +static int mlx5e_rep_get_coalesce(struct net_device *netdev, + struct ethtool_coalesce *coal, + struct kernel_ethtool_coalesce *kernel_coal, + struct netlink_ext_ack *extack) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + + return mlx5e_ethtool_get_coalesce(priv, coal, kernel_coal); +} + +static int mlx5e_rep_set_coalesce(struct net_device *netdev, + struct ethtool_coalesce *coal, + struct kernel_ethtool_coalesce *kernel_coal, + struct netlink_ext_ack *extack) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + + return mlx5e_ethtool_set_coalesce(priv, coal, kernel_coal, extack); +} + +static u32 mlx5e_rep_get_rxfh_key_size(struct net_device *netdev) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + + return mlx5e_ethtool_get_rxfh_key_size(priv); +} + +static u32 mlx5e_rep_get_rxfh_indir_size(struct net_device *netdev) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + + return mlx5e_ethtool_get_rxfh_indir_size(priv); +} + +static const struct ethtool_ops mlx5e_rep_ethtool_ops = { + .supported_coalesce_params = ETHTOOL_COALESCE_USECS | + ETHTOOL_COALESCE_MAX_FRAMES | + ETHTOOL_COALESCE_USE_ADAPTIVE, + .get_drvinfo = mlx5e_rep_get_drvinfo, + .get_link = ethtool_op_get_link, + .get_strings = mlx5e_rep_get_strings, + .get_sset_count = mlx5e_rep_get_sset_count, + .get_ethtool_stats = mlx5e_rep_get_ethtool_stats, + .get_ringparam = mlx5e_rep_get_ringparam, + .set_ringparam = mlx5e_rep_set_ringparam, + .get_channels = mlx5e_rep_get_channels, + .set_channels = mlx5e_rep_set_channels, + .get_coalesce = mlx5e_rep_get_coalesce, + .set_coalesce = mlx5e_rep_set_coalesce, + .get_rxfh_key_size = mlx5e_rep_get_rxfh_key_size, + .get_rxfh_indir_size = mlx5e_rep_get_rxfh_indir_size, +}; + +static void mlx5e_sqs2vport_stop(struct mlx5_eswitch *esw, + struct mlx5_eswitch_rep *rep) +{ + struct mlx5e_rep_sq *rep_sq, *tmp; + struct mlx5e_rep_priv *rpriv; + + if (esw->mode != MLX5_ESWITCH_OFFLOADS) + return; + + rpriv = mlx5e_rep_to_rep_priv(rep); + list_for_each_entry_safe(rep_sq, tmp, &rpriv->vport_sqs_list, list) { + mlx5_eswitch_del_send_to_vport_rule(rep_sq->send_to_vport_rule); + if (rep_sq->send_to_vport_rule_peer) + mlx5_eswitch_del_send_to_vport_rule(rep_sq->send_to_vport_rule_peer); + list_del(&rep_sq->list); + kfree(rep_sq); + } +} + +static int mlx5e_sqs2vport_start(struct mlx5_eswitch *esw, + struct mlx5_eswitch_rep *rep, + u32 *sqns_array, int sqns_num) +{ + struct mlx5_eswitch *peer_esw = NULL; + struct mlx5_flow_handle *flow_rule; + struct mlx5e_rep_priv *rpriv; + struct mlx5e_rep_sq *rep_sq; + int err; + int i; + + if (esw->mode != MLX5_ESWITCH_OFFLOADS) + return 0; + + rpriv = mlx5e_rep_to_rep_priv(rep); + if (mlx5_devcom_is_paired(esw->dev->priv.devcom, MLX5_DEVCOM_ESW_OFFLOADS)) + peer_esw = mlx5_devcom_get_peer_data(esw->dev->priv.devcom, + MLX5_DEVCOM_ESW_OFFLOADS); + + for (i = 0; i < sqns_num; i++) { + rep_sq = kzalloc(sizeof(*rep_sq), GFP_KERNEL); + if (!rep_sq) { + err = -ENOMEM; + goto out_err; + } + + /* Add re-inject rule to the PF/representor sqs */ + flow_rule = mlx5_eswitch_add_send_to_vport_rule(esw, esw, rep, + sqns_array[i]); + if (IS_ERR(flow_rule)) { + err = PTR_ERR(flow_rule); + kfree(rep_sq); + goto out_err; + } + rep_sq->send_to_vport_rule = flow_rule; + rep_sq->sqn = sqns_array[i]; + + if (peer_esw) { + flow_rule = mlx5_eswitch_add_send_to_vport_rule(peer_esw, esw, + rep, sqns_array[i]); + if (IS_ERR(flow_rule)) { + err = PTR_ERR(flow_rule); + mlx5_eswitch_del_send_to_vport_rule(rep_sq->send_to_vport_rule); + kfree(rep_sq); + goto out_err; + } + rep_sq->send_to_vport_rule_peer = flow_rule; + } + + list_add(&rep_sq->list, &rpriv->vport_sqs_list); + } + + if (peer_esw) + mlx5_devcom_release_peer_data(esw->dev->priv.devcom, MLX5_DEVCOM_ESW_OFFLOADS); + + return 0; + +out_err: + mlx5e_sqs2vport_stop(esw, rep); + + if (peer_esw) + mlx5_devcom_release_peer_data(esw->dev->priv.devcom, MLX5_DEVCOM_ESW_OFFLOADS); + + return err; +} + +static int +mlx5e_add_sqs_fwd_rules(struct mlx5e_priv *priv) +{ + int sqs_per_channel = mlx5e_get_dcb_num_tc(&priv->channels.params); + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + bool is_uplink_rep = mlx5e_is_uplink_rep(priv); + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5_eswitch_rep *rep = rpriv->rep; + int n, tc, nch, num_sqs = 0; + struct mlx5e_channel *c; + int err = -ENOMEM; + bool ptp_sq; + u32 *sqs; + + ptp_sq = !!(priv->channels.ptp && + MLX5E_GET_PFLAG(&priv->channels.params, MLX5E_PFLAG_TX_PORT_TS)); + nch = priv->channels.num + ptp_sq; + /* +2 for xdpsqs, they don't exist on the ptp channel but will not be + * counted for by num_sqs. + */ + if (is_uplink_rep) + sqs_per_channel += 2; + + sqs = kvcalloc(nch * sqs_per_channel, sizeof(*sqs), GFP_KERNEL); + if (!sqs) + goto out; + + for (n = 0; n < priv->channels.num; n++) { + c = priv->channels.c[n]; + for (tc = 0; tc < c->num_tc; tc++) + sqs[num_sqs++] = c->sq[tc].sqn; + + if (is_uplink_rep) { + if (c->xdp) + sqs[num_sqs++] = c->rq_xdpsq.sqn; + + sqs[num_sqs++] = c->xdpsq.sqn; + } + } + if (ptp_sq) { + struct mlx5e_ptp *ptp_ch = priv->channels.ptp; + + for (tc = 0; tc < ptp_ch->num_tc; tc++) + sqs[num_sqs++] = ptp_ch->ptpsq[tc].txqsq.sqn; + } + + err = mlx5e_sqs2vport_start(esw, rep, sqs, num_sqs); + kvfree(sqs); + +out: + if (err) + netdev_warn(priv->netdev, "Failed to add SQs FWD rules %d\n", err); + return err; +} + +static void +mlx5e_remove_sqs_fwd_rules(struct mlx5e_priv *priv) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5_eswitch_rep *rep = rpriv->rep; + + mlx5e_sqs2vport_stop(esw, rep); +} + +static int +mlx5e_rep_add_meta_tunnel_rule(struct mlx5e_priv *priv) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5_eswitch_rep *rep = rpriv->rep; + struct mlx5_flow_handle *flow_rule; + struct mlx5_flow_group *g; + + g = esw->fdb_table.offloads.send_to_vport_meta_grp; + if (!g) + return 0; + + flow_rule = mlx5_eswitch_add_send_to_vport_meta_rule(esw, rep->vport); + if (IS_ERR(flow_rule)) + return PTR_ERR(flow_rule); + + rpriv->send_to_vport_meta_rule = flow_rule; + + return 0; +} + +static void +mlx5e_rep_del_meta_tunnel_rule(struct mlx5e_priv *priv) +{ + struct mlx5e_rep_priv *rpriv = priv->ppriv; + + if (rpriv->send_to_vport_meta_rule) + mlx5_eswitch_del_send_to_vport_meta_rule(rpriv->send_to_vport_meta_rule); +} + +void mlx5e_rep_activate_channels(struct mlx5e_priv *priv) +{ + mlx5e_add_sqs_fwd_rules(priv); + mlx5e_rep_add_meta_tunnel_rule(priv); +} + +void mlx5e_rep_deactivate_channels(struct mlx5e_priv *priv) +{ + mlx5e_rep_del_meta_tunnel_rule(priv); + mlx5e_remove_sqs_fwd_rules(priv); +} + +static int mlx5e_rep_open(struct net_device *dev) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5_eswitch_rep *rep = rpriv->rep; + int err; + + mutex_lock(&priv->state_lock); + err = mlx5e_open_locked(dev); + if (err) + goto unlock; + + if (!mlx5_modify_vport_admin_state(priv->mdev, + MLX5_VPORT_STATE_OP_MOD_ESW_VPORT, + rep->vport, 1, + MLX5_VPORT_ADMIN_STATE_UP)) + netif_carrier_on(dev); + +unlock: + mutex_unlock(&priv->state_lock); + return err; +} + +static int mlx5e_rep_close(struct net_device *dev) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5_eswitch_rep *rep = rpriv->rep; + int ret; + + mutex_lock(&priv->state_lock); + mlx5_modify_vport_admin_state(priv->mdev, + MLX5_VPORT_STATE_OP_MOD_ESW_VPORT, + rep->vport, 1, + MLX5_VPORT_ADMIN_STATE_DOWN); + ret = mlx5e_close_locked(dev); + mutex_unlock(&priv->state_lock); + return ret; +} + +bool mlx5e_is_uplink_rep(struct mlx5e_priv *priv) +{ + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5_eswitch_rep *rep; + + if (!MLX5_ESWITCH_MANAGER(priv->mdev)) + return false; + + if (!rpriv) /* non vport rep mlx5e instances don't use this field */ + return false; + + rep = rpriv->rep; + return (rep->vport == MLX5_VPORT_UPLINK); +} + +bool mlx5e_rep_has_offload_stats(const struct net_device *dev, int attr_id) +{ + switch (attr_id) { + case IFLA_OFFLOAD_XSTATS_CPU_HIT: + return true; + } + + return false; +} + +static int +mlx5e_get_sw_stats64(const struct net_device *dev, + struct rtnl_link_stats64 *stats) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + + mlx5e_fold_sw_stats64(priv, stats); + return 0; +} + +int mlx5e_rep_get_offload_stats(int attr_id, const struct net_device *dev, + void *sp) +{ + switch (attr_id) { + case IFLA_OFFLOAD_XSTATS_CPU_HIT: + return mlx5e_get_sw_stats64(dev, sp); + } + + return -EINVAL; +} + +static void +mlx5e_rep_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + + /* update HW stats in background for next time */ + mlx5e_queue_update_stats(priv); + memcpy(stats, &priv->stats.vf_vport, sizeof(*stats)); +} + +static int mlx5e_rep_change_mtu(struct net_device *netdev, int new_mtu) +{ + return mlx5e_change_mtu(netdev, new_mtu, NULL); +} + +static struct devlink_port *mlx5e_rep_get_devlink_port(struct net_device *netdev) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5_core_dev *dev = priv->mdev; + + return mlx5_esw_offloads_devlink_port(dev->priv.eswitch, rpriv->rep->vport); +} + +static int mlx5e_rep_change_carrier(struct net_device *dev, bool new_carrier) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5_eswitch_rep *rep = rpriv->rep; + int err; + + if (new_carrier) { + err = mlx5_modify_vport_admin_state(priv->mdev, MLX5_VPORT_STATE_OP_MOD_ESW_VPORT, + rep->vport, 1, MLX5_VPORT_ADMIN_STATE_UP); + if (err) + return err; + netif_carrier_on(dev); + } else { + err = mlx5_modify_vport_admin_state(priv->mdev, MLX5_VPORT_STATE_OP_MOD_ESW_VPORT, + rep->vport, 1, MLX5_VPORT_ADMIN_STATE_DOWN); + if (err) + return err; + netif_carrier_off(dev); + } + return 0; +} + +static const struct net_device_ops mlx5e_netdev_ops_rep = { + .ndo_open = mlx5e_rep_open, + .ndo_stop = mlx5e_rep_close, + .ndo_start_xmit = mlx5e_xmit, + .ndo_setup_tc = mlx5e_rep_setup_tc, + .ndo_get_devlink_port = mlx5e_rep_get_devlink_port, + .ndo_get_stats64 = mlx5e_rep_get_stats, + .ndo_has_offload_stats = mlx5e_rep_has_offload_stats, + .ndo_get_offload_stats = mlx5e_rep_get_offload_stats, + .ndo_change_mtu = mlx5e_rep_change_mtu, + .ndo_change_carrier = mlx5e_rep_change_carrier, +}; + +bool mlx5e_eswitch_uplink_rep(const struct net_device *netdev) +{ + return netdev->netdev_ops == &mlx5e_netdev_ops && + mlx5e_is_uplink_rep(netdev_priv(netdev)); +} + +bool mlx5e_eswitch_vf_rep(const struct net_device *netdev) +{ + return netdev->netdev_ops == &mlx5e_netdev_ops_rep; +} + +/* One indirect TIR set for outer. Inner not supported in reps. */ +#define REP_NUM_INDIR_TIRS MLX5E_NUM_INDIR_TIRS + +static int mlx5e_rep_max_nch_limit(struct mlx5_core_dev *mdev) +{ + int max_tir_num = 1 << MLX5_CAP_GEN(mdev, log_max_tir); + int num_vports = mlx5_eswitch_get_total_vports(mdev); + + return (max_tir_num - mlx5e_get_pf_num_tirs(mdev) + - (num_vports * REP_NUM_INDIR_TIRS)) / num_vports; +} + +static void mlx5e_build_rep_params(struct net_device *netdev) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5_eswitch_rep *rep = rpriv->rep; + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5e_params *params; + + u8 cq_period_mode = MLX5_CAP_GEN(mdev, cq_period_start_from_cqe) ? + MLX5_CQ_PERIOD_MODE_START_FROM_CQE : + MLX5_CQ_PERIOD_MODE_START_FROM_EQE; + + params = &priv->channels.params; + + params->num_channels = MLX5E_REP_PARAMS_DEF_NUM_CHANNELS; + params->hard_mtu = MLX5E_ETH_HARD_MTU; + params->sw_mtu = netdev->mtu; + + /* SQ */ + if (rep->vport == MLX5_VPORT_UPLINK) + params->log_sq_size = MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE; + else + params->log_sq_size = MLX5E_REP_PARAMS_DEF_LOG_SQ_SIZE; + + /* RQ */ + mlx5e_build_rq_params(mdev, params); + + /* CQ moderation params */ + params->rx_dim_enabled = MLX5_CAP_GEN(mdev, cq_moderation); + mlx5e_set_rx_cq_mode_params(params, cq_period_mode); + + params->mqprio.num_tc = 1; + if (rep->vport != MLX5_VPORT_UPLINK) + params->vlan_strip_disable = true; + + mlx5_query_min_inline(mdev, ¶ms->tx_min_inline_mode); +} + +static void mlx5e_build_rep_netdev(struct net_device *netdev, + struct mlx5_core_dev *mdev) +{ + SET_NETDEV_DEV(netdev, mdev->device); + netdev->netdev_ops = &mlx5e_netdev_ops_rep; + eth_hw_addr_random(netdev); + netdev->ethtool_ops = &mlx5e_rep_ethtool_ops; + + netdev->watchdog_timeo = 15 * HZ; + +#if IS_ENABLED(CONFIG_MLX5_CLS_ACT) + netdev->hw_features |= NETIF_F_HW_TC; +#endif + netdev->hw_features |= NETIF_F_SG; + netdev->hw_features |= NETIF_F_IP_CSUM; + netdev->hw_features |= NETIF_F_IPV6_CSUM; + netdev->hw_features |= NETIF_F_GRO; + netdev->hw_features |= NETIF_F_TSO; + netdev->hw_features |= NETIF_F_TSO6; + netdev->hw_features |= NETIF_F_RXCSUM; + + netdev->features |= netdev->hw_features; + netdev->features |= NETIF_F_NETNS_LOCAL; +} + +static int mlx5e_init_rep(struct mlx5_core_dev *mdev, + struct net_device *netdev) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + + priv->fs = mlx5e_fs_init(priv->profile, mdev, + !test_bit(MLX5E_STATE_DESTROYING, &priv->state)); + if (!priv->fs) { + netdev_err(priv->netdev, "FS allocation failed\n"); + return -ENOMEM; + } + + mlx5e_build_rep_params(netdev); + mlx5e_timestamp_init(priv); + + return 0; +} + +static int mlx5e_init_ul_rep(struct mlx5_core_dev *mdev, + struct net_device *netdev) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + int err; + + priv->fs = mlx5e_fs_init(priv->profile, mdev, + !test_bit(MLX5E_STATE_DESTROYING, &priv->state)); + if (!priv->fs) { + netdev_err(priv->netdev, "FS allocation failed\n"); + return -ENOMEM; + } + + err = mlx5e_ipsec_init(priv); + if (err) + mlx5_core_err(mdev, "Uplink rep IPsec initialization failed, %d\n", err); + + mlx5e_vxlan_set_netdev_info(priv); + mlx5e_build_rep_params(netdev); + mlx5e_timestamp_init(priv); + return 0; +} + +static void mlx5e_cleanup_rep(struct mlx5e_priv *priv) +{ + mlx5e_fs_cleanup(priv->fs); + mlx5e_ipsec_cleanup(priv); + priv->fs = NULL; +} + +static int mlx5e_create_rep_ttc_table(struct mlx5e_priv *priv) +{ + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5_eswitch_rep *rep = rpriv->rep; + struct ttc_params ttc_params = {}; + int err; + + mlx5e_fs_set_ns(priv->fs, + mlx5_get_flow_namespace(priv->mdev, + MLX5_FLOW_NAMESPACE_KERNEL), false); + + /* The inner_ttc in the ttc params is intentionally not set */ + mlx5e_set_ttc_params(priv->fs, priv->rx_res, &ttc_params, false); + + if (rep->vport != MLX5_VPORT_UPLINK) + /* To give uplik rep TTC a lower level for chaining from root ft */ + ttc_params.ft_attr.level = MLX5E_TTC_FT_LEVEL + 1; + + mlx5e_fs_set_ttc(priv->fs, mlx5_create_ttc_table(priv->mdev, &ttc_params), false); + if (IS_ERR(mlx5e_fs_get_ttc(priv->fs, false))) { + err = PTR_ERR(mlx5e_fs_get_ttc(priv->fs, false)); + netdev_err(priv->netdev, "Failed to create rep ttc table, err=%d\n", + err); + return err; + } + return 0; +} + +static int mlx5e_create_rep_root_ft(struct mlx5e_priv *priv) +{ + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5_eswitch_rep *rep = rpriv->rep; + struct mlx5_flow_table_attr ft_attr = {}; + struct mlx5_flow_namespace *ns; + int err = 0; + + if (rep->vport != MLX5_VPORT_UPLINK) { + /* non uplik reps will skip any bypass tables and go directly to + * their own ttc + */ + rpriv->root_ft = mlx5_get_ttc_flow_table(mlx5e_fs_get_ttc(priv->fs, false)); + return 0; + } + + /* uplink root ft will be used to auto chain, to ethtool or ttc tables */ + ns = mlx5_get_flow_namespace(priv->mdev, MLX5_FLOW_NAMESPACE_OFFLOADS); + if (!ns) { + netdev_err(priv->netdev, "Failed to get reps offloads namespace\n"); + return -EOPNOTSUPP; + } + + ft_attr.max_fte = 0; /* Empty table, miss rule will always point to next table */ + ft_attr.prio = 1; + ft_attr.level = 1; + + rpriv->root_ft = mlx5_create_flow_table(ns, &ft_attr); + if (IS_ERR(rpriv->root_ft)) { + err = PTR_ERR(rpriv->root_ft); + rpriv->root_ft = NULL; + } + + return err; +} + +static void mlx5e_destroy_rep_root_ft(struct mlx5e_priv *priv) +{ + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5_eswitch_rep *rep = rpriv->rep; + + if (rep->vport != MLX5_VPORT_UPLINK) + return; + mlx5_destroy_flow_table(rpriv->root_ft); +} + +static int mlx5e_create_rep_vport_rx_rule(struct mlx5e_priv *priv) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5_eswitch_rep *rep = rpriv->rep; + struct mlx5_flow_handle *flow_rule; + struct mlx5_flow_destination dest; + + dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dest.ft = rpriv->root_ft; + + flow_rule = mlx5_eswitch_create_vport_rx_rule(esw, rep->vport, &dest); + if (IS_ERR(flow_rule)) + return PTR_ERR(flow_rule); + rpriv->vport_rx_rule = flow_rule; + return 0; +} + +static void rep_vport_rx_rule_destroy(struct mlx5e_priv *priv) +{ + struct mlx5e_rep_priv *rpriv = priv->ppriv; + + if (!rpriv->vport_rx_rule) + return; + + mlx5_del_flow_rules(rpriv->vport_rx_rule); + rpriv->vport_rx_rule = NULL; +} + +int mlx5e_rep_bond_update(struct mlx5e_priv *priv, bool cleanup) +{ + rep_vport_rx_rule_destroy(priv); + + return cleanup ? 0 : mlx5e_create_rep_vport_rx_rule(priv); +} + +static int mlx5e_init_rep_rx(struct mlx5e_priv *priv) +{ + struct mlx5_core_dev *mdev = priv->mdev; + int err; + + priv->rx_res = mlx5e_rx_res_alloc(); + if (!priv->rx_res) { + err = -ENOMEM; + goto err_free_fs; + } + + mlx5e_fs_init_l2_addr(priv->fs, priv->netdev); + + err = mlx5e_open_drop_rq(priv, &priv->drop_rq); + if (err) { + mlx5_core_err(mdev, "open drop rq failed, %d\n", err); + goto err_rx_res_free; + } + + err = mlx5e_rx_res_init(priv->rx_res, priv->mdev, 0, + priv->max_nch, priv->drop_rq.rqn, + &priv->channels.params.packet_merge, + priv->channels.params.num_channels); + if (err) + goto err_close_drop_rq; + + err = mlx5e_create_rep_ttc_table(priv); + if (err) + goto err_destroy_rx_res; + + err = mlx5e_create_rep_root_ft(priv); + if (err) + goto err_destroy_ttc_table; + + err = mlx5e_create_rep_vport_rx_rule(priv); + if (err) + goto err_destroy_root_ft; + + mlx5e_ethtool_init_steering(priv->fs); + + return 0; + +err_destroy_root_ft: + mlx5e_destroy_rep_root_ft(priv); +err_destroy_ttc_table: + mlx5_destroy_ttc_table(mlx5e_fs_get_ttc(priv->fs, false)); +err_destroy_rx_res: + mlx5e_rx_res_destroy(priv->rx_res); +err_close_drop_rq: + mlx5e_close_drop_rq(&priv->drop_rq); +err_rx_res_free: + mlx5e_rx_res_free(priv->rx_res); + priv->rx_res = NULL; +err_free_fs: + mlx5e_fs_cleanup(priv->fs); + priv->fs = NULL; + return err; +} + +static void mlx5e_cleanup_rep_rx(struct mlx5e_priv *priv) +{ + mlx5e_ethtool_cleanup_steering(priv->fs); + rep_vport_rx_rule_destroy(priv); + mlx5e_destroy_rep_root_ft(priv); + mlx5_destroy_ttc_table(mlx5e_fs_get_ttc(priv->fs, false)); + mlx5e_rx_res_destroy(priv->rx_res); + mlx5e_close_drop_rq(&priv->drop_rq); + mlx5e_rx_res_free(priv->rx_res); + priv->rx_res = NULL; +} + +static int mlx5e_init_ul_rep_rx(struct mlx5e_priv *priv) +{ + int err; + + mlx5e_create_q_counters(priv); + err = mlx5e_init_rep_rx(priv); + if (err) + goto out; + + mlx5e_tc_int_port_init_rep_rx(priv); + +out: + return err; +} + +static void mlx5e_cleanup_ul_rep_rx(struct mlx5e_priv *priv) +{ + mlx5e_tc_int_port_cleanup_rep_rx(priv); + mlx5e_cleanup_rep_rx(priv); + mlx5e_destroy_q_counters(priv); +} + +static int mlx5e_init_uplink_rep_tx(struct mlx5e_rep_priv *rpriv) +{ + struct mlx5_rep_uplink_priv *uplink_priv; + struct net_device *netdev; + struct mlx5e_priv *priv; + int err; + + netdev = rpriv->netdev; + priv = netdev_priv(netdev); + uplink_priv = &rpriv->uplink_priv; + + err = mlx5e_rep_tc_init(rpriv); + if (err) + return err; + + mlx5_init_port_tun_entropy(&uplink_priv->tun_entropy, priv->mdev); + + mlx5e_rep_bond_init(rpriv); + err = mlx5e_rep_tc_netdevice_event_register(rpriv); + if (err) { + mlx5_core_err(priv->mdev, "Failed to register netdev notifier, err: %d\n", + err); + goto err_event_reg; + } + + return 0; + +err_event_reg: + mlx5e_rep_bond_cleanup(rpriv); + mlx5e_rep_tc_cleanup(rpriv); + return err; +} + +static void mlx5e_cleanup_uplink_rep_tx(struct mlx5e_rep_priv *rpriv) +{ + mlx5e_rep_tc_netdevice_event_unregister(rpriv); + mlx5e_rep_bond_cleanup(rpriv); + mlx5e_rep_tc_cleanup(rpriv); +} + +static int mlx5e_init_rep_tx(struct mlx5e_priv *priv) +{ + struct mlx5e_rep_priv *rpriv = priv->ppriv; + int err; + + err = mlx5e_create_tises(priv); + if (err) { + mlx5_core_warn(priv->mdev, "create tises failed, %d\n", err); + return err; + } + + err = mlx5e_rep_neigh_init(rpriv); + if (err) + goto err_neigh_init; + + if (rpriv->rep->vport == MLX5_VPORT_UPLINK) { + err = mlx5e_init_uplink_rep_tx(rpriv); + if (err) + goto err_init_tx; + } + + err = mlx5e_tc_ht_init(&rpriv->tc_ht); + if (err) + goto err_ht_init; + + return 0; + +err_ht_init: + if (rpriv->rep->vport == MLX5_VPORT_UPLINK) + mlx5e_cleanup_uplink_rep_tx(rpriv); +err_init_tx: + mlx5e_rep_neigh_cleanup(rpriv); +err_neigh_init: + mlx5e_destroy_tises(priv); + return err; +} + +static void mlx5e_cleanup_rep_tx(struct mlx5e_priv *priv) +{ + struct mlx5e_rep_priv *rpriv = priv->ppriv; + + mlx5e_tc_ht_cleanup(&rpriv->tc_ht); + + if (rpriv->rep->vport == MLX5_VPORT_UPLINK) + mlx5e_cleanup_uplink_rep_tx(rpriv); + + mlx5e_rep_neigh_cleanup(rpriv); + mlx5e_destroy_tises(priv); +} + +static void mlx5e_rep_enable(struct mlx5e_priv *priv) +{ + mlx5e_set_netdev_mtu_boundaries(priv); +} + +static void mlx5e_rep_disable(struct mlx5e_priv *priv) +{ +} + +static int mlx5e_update_rep_rx(struct mlx5e_priv *priv) +{ + return 0; +} + +static int uplink_rep_async_event(struct notifier_block *nb, unsigned long event, void *data) +{ + struct mlx5e_priv *priv = container_of(nb, struct mlx5e_priv, events_nb); + + if (event == MLX5_EVENT_TYPE_PORT_CHANGE) { + struct mlx5_eqe *eqe = data; + + switch (eqe->sub_type) { + case MLX5_PORT_CHANGE_SUBTYPE_DOWN: + case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE: + queue_work(priv->wq, &priv->update_carrier_work); + break; + default: + return NOTIFY_DONE; + } + + return NOTIFY_OK; + } + + if (event == MLX5_DEV_EVENT_PORT_AFFINITY) + return mlx5e_rep_tc_event_port_affinity(priv); + + return NOTIFY_DONE; +} + +static void mlx5e_uplink_rep_enable(struct mlx5e_priv *priv) +{ + struct net_device *netdev = priv->netdev; + struct mlx5_core_dev *mdev = priv->mdev; + u16 max_mtu; + + netdev->min_mtu = ETH_MIN_MTU; + mlx5_query_port_max_mtu(priv->mdev, &max_mtu, 1); + netdev->max_mtu = MLX5E_HW2SW_MTU(&priv->channels.params, max_mtu); + mlx5e_set_dev_port_mtu(priv); + + mlx5e_rep_tc_enable(priv); + + if (MLX5_CAP_GEN(mdev, uplink_follow)) + mlx5_modify_vport_admin_state(mdev, MLX5_VPORT_STATE_OP_MOD_UPLINK, + 0, 0, MLX5_VPORT_ADMIN_STATE_AUTO); + mlx5_lag_add_netdev(mdev, netdev); + priv->events_nb.notifier_call = uplink_rep_async_event; + mlx5_notifier_register(mdev, &priv->events_nb); + mlx5e_dcbnl_initialize(priv); + mlx5e_dcbnl_init_app(priv); + mlx5e_rep_bridge_init(priv); + + netdev->wanted_features |= NETIF_F_HW_TC; + + rtnl_lock(); + if (netif_running(netdev)) + mlx5e_open(netdev); + udp_tunnel_nic_reset_ntf(priv->netdev); + netif_device_attach(netdev); + rtnl_unlock(); +} + +static void mlx5e_uplink_rep_disable(struct mlx5e_priv *priv) +{ + struct mlx5_core_dev *mdev = priv->mdev; + + rtnl_lock(); + if (netif_running(priv->netdev)) + mlx5e_close(priv->netdev); + netif_device_detach(priv->netdev); + rtnl_unlock(); + + mlx5e_rep_bridge_cleanup(priv); + mlx5e_dcbnl_delete_app(priv); + mlx5_notifier_unregister(mdev, &priv->events_nb); + mlx5e_rep_tc_disable(priv); + mlx5_lag_remove_netdev(mdev, priv->netdev); + mlx5_vxlan_reset_to_default(mdev->vxlan); +} + +static MLX5E_DEFINE_STATS_GRP(sw_rep, 0); +static MLX5E_DEFINE_STATS_GRP(vport_rep, MLX5E_NDO_UPDATE_STATS); + +/* The stats groups order is opposite to the update_stats() order calls */ +static mlx5e_stats_grp_t mlx5e_rep_stats_grps[] = { + &MLX5E_STATS_GRP(sw_rep), + &MLX5E_STATS_GRP(vport_rep), +}; + +static unsigned int mlx5e_rep_stats_grps_num(struct mlx5e_priv *priv) +{ + return ARRAY_SIZE(mlx5e_rep_stats_grps); +} + +/* The stats groups order is opposite to the update_stats() order calls */ +static mlx5e_stats_grp_t mlx5e_ul_rep_stats_grps[] = { + &MLX5E_STATS_GRP(sw), + &MLX5E_STATS_GRP(qcnt), + &MLX5E_STATS_GRP(vnic_env), + &MLX5E_STATS_GRP(vport), + &MLX5E_STATS_GRP(802_3), + &MLX5E_STATS_GRP(2863), + &MLX5E_STATS_GRP(2819), + &MLX5E_STATS_GRP(phy), + &MLX5E_STATS_GRP(eth_ext), + &MLX5E_STATS_GRP(pcie), + &MLX5E_STATS_GRP(per_prio), + &MLX5E_STATS_GRP(pme), + &MLX5E_STATS_GRP(channels), + &MLX5E_STATS_GRP(per_port_buff_congest), +#ifdef CONFIG_MLX5_EN_IPSEC + &MLX5E_STATS_GRP(ipsec_sw), +#endif + &MLX5E_STATS_GRP(ptp), +}; + +static unsigned int mlx5e_ul_rep_stats_grps_num(struct mlx5e_priv *priv) +{ + return ARRAY_SIZE(mlx5e_ul_rep_stats_grps); +} + +static const struct mlx5e_profile mlx5e_rep_profile = { + .init = mlx5e_init_rep, + .cleanup = mlx5e_cleanup_rep, + .init_rx = mlx5e_init_rep_rx, + .cleanup_rx = mlx5e_cleanup_rep_rx, + .init_tx = mlx5e_init_rep_tx, + .cleanup_tx = mlx5e_cleanup_rep_tx, + .enable = mlx5e_rep_enable, + .disable = mlx5e_rep_disable, + .update_rx = mlx5e_update_rep_rx, + .update_stats = mlx5e_stats_update_ndo_stats, + .rx_handlers = &mlx5e_rx_handlers_rep, + .max_tc = 1, + .stats_grps = mlx5e_rep_stats_grps, + .stats_grps_num = mlx5e_rep_stats_grps_num, + .max_nch_limit = mlx5e_rep_max_nch_limit, +}; + +static const struct mlx5e_profile mlx5e_uplink_rep_profile = { + .init = mlx5e_init_ul_rep, + .cleanup = mlx5e_cleanup_rep, + .init_rx = mlx5e_init_ul_rep_rx, + .cleanup_rx = mlx5e_cleanup_ul_rep_rx, + .init_tx = mlx5e_init_rep_tx, + .cleanup_tx = mlx5e_cleanup_rep_tx, + .enable = mlx5e_uplink_rep_enable, + .disable = mlx5e_uplink_rep_disable, + .update_rx = mlx5e_update_rep_rx, + .update_stats = mlx5e_stats_update_ndo_stats, + .update_carrier = mlx5e_update_carrier, + .rx_handlers = &mlx5e_rx_handlers_rep, + .max_tc = MLX5E_MAX_NUM_TC, + .stats_grps = mlx5e_ul_rep_stats_grps, + .stats_grps_num = mlx5e_ul_rep_stats_grps_num, +}; + +/* e-Switch vport representors */ +static int +mlx5e_vport_uplink_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) +{ + struct mlx5e_priv *priv = netdev_priv(mlx5_uplink_netdev_get(dev)); + struct mlx5e_rep_priv *rpriv = mlx5e_rep_to_rep_priv(rep); + struct devlink_port *dl_port; + int err; + + rpriv->netdev = priv->netdev; + + err = mlx5e_netdev_change_profile(priv, &mlx5e_uplink_rep_profile, + rpriv); + if (err) + return err; + + dl_port = mlx5_esw_offloads_devlink_port(dev->priv.eswitch, rpriv->rep->vport); + if (dl_port) + devlink_port_type_eth_set(dl_port, rpriv->netdev); + + return 0; +} + +static void +mlx5e_vport_uplink_rep_unload(struct mlx5e_rep_priv *rpriv) +{ + struct net_device *netdev = rpriv->netdev; + struct devlink_port *dl_port; + struct mlx5_core_dev *dev; + struct mlx5e_priv *priv; + + priv = netdev_priv(netdev); + dev = priv->mdev; + + dl_port = mlx5_esw_offloads_devlink_port(dev->priv.eswitch, rpriv->rep->vport); + if (dl_port) + devlink_port_type_clear(dl_port); + mlx5e_netdev_attach_nic_profile(priv); +} + +static int +mlx5e_vport_vf_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) +{ + struct mlx5e_rep_priv *rpriv = mlx5e_rep_to_rep_priv(rep); + const struct mlx5e_profile *profile; + struct devlink_port *dl_port; + struct net_device *netdev; + struct mlx5e_priv *priv; + int err; + + profile = &mlx5e_rep_profile; + netdev = mlx5e_create_netdev(dev, profile); + if (!netdev) { + mlx5_core_warn(dev, + "Failed to create representor netdev for vport %d\n", + rep->vport); + return -EINVAL; + } + + mlx5e_build_rep_netdev(netdev, dev); + rpriv->netdev = netdev; + + priv = netdev_priv(netdev); + priv->profile = profile; + priv->ppriv = rpriv; + err = profile->init(dev, netdev); + if (err) { + netdev_warn(netdev, "rep profile init failed, %d\n", err); + goto err_destroy_netdev; + } + + err = mlx5e_attach_netdev(netdev_priv(netdev)); + if (err) { + netdev_warn(netdev, + "Failed to attach representor netdev for vport %d\n", + rep->vport); + goto err_cleanup_profile; + } + + err = register_netdev(netdev); + if (err) { + netdev_warn(netdev, + "Failed to register representor netdev for vport %d\n", + rep->vport); + goto err_detach_netdev; + } + + dl_port = mlx5_esw_offloads_devlink_port(dev->priv.eswitch, rpriv->rep->vport); + if (dl_port) + devlink_port_type_eth_set(dl_port, netdev); + return 0; + +err_detach_netdev: + mlx5e_detach_netdev(netdev_priv(netdev)); + +err_cleanup_profile: + priv->profile->cleanup(priv); + +err_destroy_netdev: + mlx5e_destroy_netdev(netdev_priv(netdev)); + return err; +} + +static int +mlx5e_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) +{ + struct mlx5e_rep_priv *rpriv; + int err; + + rpriv = kvzalloc(sizeof(*rpriv), GFP_KERNEL); + if (!rpriv) + return -ENOMEM; + + /* rpriv->rep to be looked up when profile->init() is called */ + rpriv->rep = rep; + rep->rep_data[REP_ETH].priv = rpriv; + INIT_LIST_HEAD(&rpriv->vport_sqs_list); + + if (rep->vport == MLX5_VPORT_UPLINK) + err = mlx5e_vport_uplink_rep_load(dev, rep); + else + err = mlx5e_vport_vf_rep_load(dev, rep); + + if (err) + kvfree(rpriv); + + return err; +} + +static void +mlx5e_vport_rep_unload(struct mlx5_eswitch_rep *rep) +{ + struct mlx5e_rep_priv *rpriv = mlx5e_rep_to_rep_priv(rep); + struct net_device *netdev = rpriv->netdev; + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5_core_dev *dev = priv->mdev; + struct devlink_port *dl_port; + void *ppriv = priv->ppriv; + + if (rep->vport == MLX5_VPORT_UPLINK) { + mlx5e_vport_uplink_rep_unload(rpriv); + goto free_ppriv; + } + + dl_port = mlx5_esw_offloads_devlink_port(dev->priv.eswitch, rpriv->rep->vport); + if (dl_port) + devlink_port_type_clear(dl_port); + unregister_netdev(netdev); + mlx5e_detach_netdev(priv); + priv->profile->cleanup(priv); + mlx5e_destroy_netdev(priv); +free_ppriv: + kvfree(ppriv); /* mlx5e_rep_priv */ +} + +static void *mlx5e_vport_rep_get_proto_dev(struct mlx5_eswitch_rep *rep) +{ + struct mlx5e_rep_priv *rpriv; + + rpriv = mlx5e_rep_to_rep_priv(rep); + + return rpriv->netdev; +} + +static void mlx5e_vport_rep_event_unpair(struct mlx5_eswitch_rep *rep) +{ + struct mlx5e_rep_priv *rpriv; + struct mlx5e_rep_sq *rep_sq; + + rpriv = mlx5e_rep_to_rep_priv(rep); + list_for_each_entry(rep_sq, &rpriv->vport_sqs_list, list) { + if (!rep_sq->send_to_vport_rule_peer) + continue; + mlx5_eswitch_del_send_to_vport_rule(rep_sq->send_to_vport_rule_peer); + rep_sq->send_to_vport_rule_peer = NULL; + } +} + +static int mlx5e_vport_rep_event_pair(struct mlx5_eswitch *esw, + struct mlx5_eswitch_rep *rep, + struct mlx5_eswitch *peer_esw) +{ + struct mlx5_flow_handle *flow_rule; + struct mlx5e_rep_priv *rpriv; + struct mlx5e_rep_sq *rep_sq; + + rpriv = mlx5e_rep_to_rep_priv(rep); + list_for_each_entry(rep_sq, &rpriv->vport_sqs_list, list) { + if (rep_sq->send_to_vport_rule_peer) + continue; + flow_rule = mlx5_eswitch_add_send_to_vport_rule(peer_esw, esw, rep, rep_sq->sqn); + if (IS_ERR(flow_rule)) + goto err_out; + rep_sq->send_to_vport_rule_peer = flow_rule; + } + + return 0; +err_out: + mlx5e_vport_rep_event_unpair(rep); + return PTR_ERR(flow_rule); +} + +static int mlx5e_vport_rep_event(struct mlx5_eswitch *esw, + struct mlx5_eswitch_rep *rep, + enum mlx5_switchdev_event event, + void *data) +{ + int err = 0; + + if (event == MLX5_SWITCHDEV_EVENT_PAIR) + err = mlx5e_vport_rep_event_pair(esw, rep, data); + else if (event == MLX5_SWITCHDEV_EVENT_UNPAIR) + mlx5e_vport_rep_event_unpair(rep); + + return err; +} + +static const struct mlx5_eswitch_rep_ops rep_ops = { + .load = mlx5e_vport_rep_load, + .unload = mlx5e_vport_rep_unload, + .get_proto_dev = mlx5e_vport_rep_get_proto_dev, + .event = mlx5e_vport_rep_event, +}; + +static int mlx5e_rep_probe(struct auxiliary_device *adev, + const struct auxiliary_device_id *id) +{ + struct mlx5_adev *edev = container_of(adev, struct mlx5_adev, adev); + struct mlx5_core_dev *mdev = edev->mdev; + struct mlx5_eswitch *esw; + + esw = mdev->priv.eswitch; + mlx5_eswitch_register_vport_reps(esw, &rep_ops, REP_ETH); + return 0; +} + +static void mlx5e_rep_remove(struct auxiliary_device *adev) +{ + struct mlx5_adev *vdev = container_of(adev, struct mlx5_adev, adev); + struct mlx5_core_dev *mdev = vdev->mdev; + struct mlx5_eswitch *esw; + + esw = mdev->priv.eswitch; + mlx5_eswitch_unregister_vport_reps(esw, REP_ETH); +} + +static const struct auxiliary_device_id mlx5e_rep_id_table[] = { + { .name = MLX5_ADEV_NAME ".eth-rep", }, + {}, +}; + +MODULE_DEVICE_TABLE(auxiliary, mlx5e_rep_id_table); + +static struct auxiliary_driver mlx5e_rep_driver = { + .name = "eth-rep", + .probe = mlx5e_rep_probe, + .remove = mlx5e_rep_remove, + .id_table = mlx5e_rep_id_table, +}; + +int mlx5e_rep_init(void) +{ + return auxiliary_driver_register(&mlx5e_rep_driver); +} + +void mlx5e_rep_cleanup(void) +{ + auxiliary_driver_unregister(&mlx5e_rep_driver); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h new file mode 100644 index 000000000..b4e691760 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h @@ -0,0 +1,275 @@ +/* + * Copyright (c) 2017, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __MLX5E_REP_H__ +#define __MLX5E_REP_H__ + +#include <net/ip_tunnels.h> +#include <linux/rhashtable.h> +#include <linux/mutex.h> +#include "eswitch.h" +#include "en.h" +#include "lib/port_tun.h" + +#ifdef CONFIG_MLX5_ESWITCH +extern const struct mlx5e_rx_handlers mlx5e_rx_handlers_rep; + +struct mlx5e_neigh_update_table { + struct rhashtable neigh_ht; + /* Save the neigh hash entries in a list in addition to the hash table + * (neigh_ht). In order to iterate easily over the neigh entries. + * Used for stats query. + */ + struct list_head neigh_list; + /* protect lookup/remove operations */ + struct mutex encap_lock; + struct notifier_block netevent_nb; + struct delayed_work neigh_stats_work; + unsigned long min_interval; /* jiffies */ +}; + +struct mlx5_tc_ct_priv; +struct mlx5_tc_int_port_priv; +struct mlx5e_rep_bond; +struct mlx5e_tc_tun_encap; +struct mlx5e_post_act; +struct mlx5e_flow_meters; + +struct mlx5_rep_uplink_priv { + /* indirect block callbacks are invoked on bind/unbind events + * on registered higher level devices (e.g. tunnel devices) + * + * tc_indr_block_cb_priv_list is used to lookup indirect callback + * private data + * + */ + struct list_head tc_indr_block_priv_list; + + struct mlx5_tun_entropy tun_entropy; + + /* protects unready_flows */ + struct mutex unready_flows_lock; + struct list_head unready_flows; + struct work_struct reoffload_flows_work; + + /* maps tun_info to a unique id*/ + struct mapping_ctx *tunnel_mapping; + /* maps tun_enc_opts to a unique id*/ + struct mapping_ctx *tunnel_enc_opts_mapping; + + struct mlx5e_post_act *post_act; + struct mlx5_tc_ct_priv *ct_priv; + struct mlx5e_tc_psample *tc_psample; + + /* support eswitch vports bonding */ + struct mlx5e_rep_bond *bond; + + /* tc tunneling encapsulation private data */ + struct mlx5e_tc_tun_encap *encap; + + /* OVS internal port support */ + struct mlx5e_tc_int_port_priv *int_port_priv; + + struct mlx5e_flow_meters *flow_meters; +}; + +struct mlx5e_rep_priv { + struct mlx5_eswitch_rep *rep; + struct mlx5e_neigh_update_table neigh_update; + struct net_device *netdev; + struct mlx5_flow_table *root_ft; + struct mlx5_flow_handle *vport_rx_rule; + struct list_head vport_sqs_list; + struct mlx5_rep_uplink_priv uplink_priv; /* valid for uplink rep */ + struct rtnl_link_stats64 prev_vf_vport_stats; + struct mlx5_flow_handle *send_to_vport_meta_rule; + struct rhashtable tc_ht; +}; + +static inline +struct mlx5e_rep_priv *mlx5e_rep_to_rep_priv(struct mlx5_eswitch_rep *rep) +{ + return rep->rep_data[REP_ETH].priv; +} + +struct mlx5e_neigh { + union { + __be32 v4; + struct in6_addr v6; + } dst_ip; + int family; +}; + +struct mlx5e_neigh_hash_entry { + struct rhash_head rhash_node; + struct mlx5e_neigh m_neigh; + struct mlx5e_priv *priv; + struct net_device *neigh_dev; + + /* Save the neigh hash entry in a list on the representor in + * addition to the hash table. In order to iterate easily over the + * neighbour entries. Used for stats query. + */ + struct list_head neigh_list; + + /* protects encap list */ + spinlock_t encap_list_lock; + /* encap list sharing the same neigh */ + struct list_head encap_list; + + /* neigh hash entry can be deleted only when the refcount is zero. + * refcount is needed to avoid neigh hash entry removal by TC, while + * it's used by the neigh notification call. + */ + refcount_t refcnt; + + /* Save the last reported time offloaded traffic pass over one of the + * neigh hash entry flows. Use it to periodically update the neigh + * 'used' value and avoid neigh deleting by the kernel. + */ + unsigned long reported_lastuse; + + struct rcu_head rcu; +}; + +enum { + /* set when the encap entry is successfully offloaded into HW */ + MLX5_ENCAP_ENTRY_VALID = BIT(0), + MLX5_REFORMAT_DECAP = BIT(1), + MLX5_ENCAP_ENTRY_NO_ROUTE = BIT(2), +}; + +struct mlx5e_decap_key { + struct ethhdr key; +}; + +struct mlx5e_decap_entry { + struct mlx5e_decap_key key; + struct list_head flows; + struct hlist_node hlist; + refcount_t refcnt; + struct completion res_ready; + int compl_result; + struct mlx5_pkt_reformat *pkt_reformat; + struct rcu_head rcu; +}; + +struct mlx5e_mpls_info { + u32 label; + u8 tc; + u8 bos; + u8 ttl; +}; + +struct mlx5e_encap_entry { + /* attached neigh hash entry */ + struct mlx5e_neigh_hash_entry *nhe; + /* neigh hash entry list of encaps sharing the same neigh */ + struct list_head encap_list; + /* a node of the eswitch encap hash table which keeping all the encap + * entries + */ + struct hlist_node encap_hlist; + struct list_head flows; + struct list_head route_list; + struct mlx5_pkt_reformat *pkt_reformat; + const struct ip_tunnel_info *tun_info; + struct mlx5e_mpls_info mpls_info; + unsigned char h_dest[ETH_ALEN]; /* destination eth addr */ + + struct net_device *out_dev; + int route_dev_ifindex; + struct mlx5e_tc_tunnel *tunnel; + int reformat_type; + u8 flags; + char *encap_header; + int encap_size; + refcount_t refcnt; + struct completion res_ready; + int compl_result; + struct rcu_head rcu; +}; + +struct mlx5e_rep_sq { + struct mlx5_flow_handle *send_to_vport_rule; + struct mlx5_flow_handle *send_to_vport_rule_peer; + u32 sqn; + struct list_head list; +}; + +int mlx5e_rep_init(void); +void mlx5e_rep_cleanup(void); +int mlx5e_rep_bond_init(struct mlx5e_rep_priv *rpriv); +void mlx5e_rep_bond_cleanup(struct mlx5e_rep_priv *rpriv); +int mlx5e_rep_bond_enslave(struct mlx5_eswitch *esw, struct net_device *netdev, + struct net_device *lag_dev); +void mlx5e_rep_bond_unslave(struct mlx5_eswitch *esw, + const struct net_device *netdev, + const struct net_device *lag_dev); +int mlx5e_rep_bond_update(struct mlx5e_priv *priv, bool cleanup); + +bool mlx5e_rep_has_offload_stats(const struct net_device *dev, int attr_id); +int mlx5e_rep_get_offload_stats(int attr_id, const struct net_device *dev, + void *sp); + +bool mlx5e_is_uplink_rep(struct mlx5e_priv *priv); +void mlx5e_rep_activate_channels(struct mlx5e_priv *priv); +void mlx5e_rep_deactivate_channels(struct mlx5e_priv *priv); + +void mlx5e_rep_queue_neigh_stats_work(struct mlx5e_priv *priv); + +bool mlx5e_eswitch_vf_rep(const struct net_device *netdev); +bool mlx5e_eswitch_uplink_rep(const struct net_device *netdev); +static inline bool mlx5e_eswitch_rep(const struct net_device *netdev) +{ + return mlx5e_eswitch_vf_rep(netdev) || + mlx5e_eswitch_uplink_rep(netdev); +} + +#else /* CONFIG_MLX5_ESWITCH */ +static inline bool mlx5e_is_uplink_rep(struct mlx5e_priv *priv) { return false; } +static inline void mlx5e_rep_activate_channels(struct mlx5e_priv *priv) {} +static inline void mlx5e_rep_deactivate_channels(struct mlx5e_priv *priv) {} +static inline int mlx5e_rep_init(void) { return 0; }; +static inline void mlx5e_rep_cleanup(void) {}; +static inline bool mlx5e_rep_has_offload_stats(const struct net_device *dev, + int attr_id) { return false; } +static inline int mlx5e_rep_get_offload_stats(int attr_id, + const struct net_device *dev, + void *sp) { return -EOPNOTSUPP; } +#endif + +static inline bool mlx5e_is_vport_rep(struct mlx5e_priv *priv) +{ + return (MLX5_ESWITCH_MANAGER(priv->mdev) && priv->ppriv); +} +#endif /* __MLX5E_REP_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c new file mode 100644 index 000000000..56d1bd22c --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -0,0 +1,2494 @@ +/* + * Copyright (c) 2015, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/ip.h> +#include <linux/ipv6.h> +#include <linux/tcp.h> +#include <linux/bitmap.h> +#include <linux/filter.h> +#include <net/ip6_checksum.h> +#include <net/page_pool.h> +#include <net/inet_ecn.h> +#include <net/gro.h> +#include <net/udp.h> +#include <net/tcp.h> +#include <net/xdp_sock_drv.h> +#include "en.h" +#include "en/txrx.h" +#include "en_tc.h" +#include "eswitch.h" +#include "en_rep.h" +#include "en/rep/tc.h" +#include "ipoib/ipoib.h" +#include "en_accel/ipsec.h" +#include "en_accel/macsec.h" +#include "en_accel/ipsec_rxtx.h" +#include "en_accel/ktls_txrx.h" +#include "en/xdp.h" +#include "en/xsk/rx.h" +#include "en/health.h" +#include "en/params.h" +#include "devlink.h" +#include "en/devlink.h" + +static struct sk_buff * +mlx5e_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, + u16 cqe_bcnt, u32 head_offset, u32 page_idx); +static struct sk_buff * +mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, + u16 cqe_bcnt, u32 head_offset, u32 page_idx); +static void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe); +static void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe); +static void mlx5e_handle_rx_cqe_mpwrq_shampo(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe); + +const struct mlx5e_rx_handlers mlx5e_rx_handlers_nic = { + .handle_rx_cqe = mlx5e_handle_rx_cqe, + .handle_rx_cqe_mpwqe = mlx5e_handle_rx_cqe_mpwrq, + .handle_rx_cqe_mpwqe_shampo = mlx5e_handle_rx_cqe_mpwrq_shampo, +}; + +static inline bool mlx5e_rx_hw_stamp(struct hwtstamp_config *config) +{ + return config->rx_filter == HWTSTAMP_FILTER_ALL; +} + +static inline void mlx5e_read_cqe_slot(struct mlx5_cqwq *wq, + u32 cqcc, void *data) +{ + u32 ci = mlx5_cqwq_ctr2ix(wq, cqcc); + + memcpy(data, mlx5_cqwq_get_wqe(wq, ci), sizeof(struct mlx5_cqe64)); +} + +static inline void mlx5e_read_title_slot(struct mlx5e_rq *rq, + struct mlx5_cqwq *wq, + u32 cqcc) +{ + struct mlx5e_cq_decomp *cqd = &rq->cqd; + struct mlx5_cqe64 *title = &cqd->title; + + mlx5e_read_cqe_slot(wq, cqcc, title); + cqd->left = be32_to_cpu(title->byte_cnt); + cqd->wqe_counter = be16_to_cpu(title->wqe_counter); + rq->stats->cqe_compress_blks++; +} + +static inline void mlx5e_read_mini_arr_slot(struct mlx5_cqwq *wq, + struct mlx5e_cq_decomp *cqd, + u32 cqcc) +{ + mlx5e_read_cqe_slot(wq, cqcc, cqd->mini_arr); + cqd->mini_arr_idx = 0; +} + +static inline void mlx5e_cqes_update_owner(struct mlx5_cqwq *wq, int n) +{ + u32 cqcc = wq->cc; + u8 op_own = mlx5_cqwq_get_ctr_wrap_cnt(wq, cqcc) & 1; + u32 ci = mlx5_cqwq_ctr2ix(wq, cqcc); + u32 wq_sz = mlx5_cqwq_get_size(wq); + u32 ci_top = min_t(u32, wq_sz, ci + n); + + for (; ci < ci_top; ci++, n--) { + struct mlx5_cqe64 *cqe = mlx5_cqwq_get_wqe(wq, ci); + + cqe->op_own = op_own; + } + + if (unlikely(ci == wq_sz)) { + op_own = !op_own; + for (ci = 0; ci < n; ci++) { + struct mlx5_cqe64 *cqe = mlx5_cqwq_get_wqe(wq, ci); + + cqe->op_own = op_own; + } + } +} + +static inline void mlx5e_decompress_cqe(struct mlx5e_rq *rq, + struct mlx5_cqwq *wq, + u32 cqcc) +{ + struct mlx5e_cq_decomp *cqd = &rq->cqd; + struct mlx5_mini_cqe8 *mini_cqe = &cqd->mini_arr[cqd->mini_arr_idx]; + struct mlx5_cqe64 *title = &cqd->title; + + title->byte_cnt = mini_cqe->byte_cnt; + title->check_sum = mini_cqe->checksum; + title->op_own &= 0xf0; + title->op_own |= 0x01 & (cqcc >> wq->fbc.log_sz); + + /* state bit set implies linked-list striding RQ wq type and + * HW stride index capability supported + */ + if (test_bit(MLX5E_RQ_STATE_MINI_CQE_HW_STRIDX, &rq->state)) { + title->wqe_counter = mini_cqe->stridx; + return; + } + + /* HW stride index capability not supported */ + title->wqe_counter = cpu_to_be16(cqd->wqe_counter); + if (rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) + cqd->wqe_counter += mpwrq_get_cqe_consumed_strides(title); + else + cqd->wqe_counter = + mlx5_wq_cyc_ctr2ix(&rq->wqe.wq, cqd->wqe_counter + 1); +} + +static inline void mlx5e_decompress_cqe_no_hash(struct mlx5e_rq *rq, + struct mlx5_cqwq *wq, + u32 cqcc) +{ + struct mlx5e_cq_decomp *cqd = &rq->cqd; + + mlx5e_decompress_cqe(rq, wq, cqcc); + cqd->title.rss_hash_type = 0; + cqd->title.rss_hash_result = 0; +} + +static inline u32 mlx5e_decompress_cqes_cont(struct mlx5e_rq *rq, + struct mlx5_cqwq *wq, + int update_owner_only, + int budget_rem) +{ + struct mlx5e_cq_decomp *cqd = &rq->cqd; + u32 cqcc = wq->cc + update_owner_only; + u32 cqe_count; + u32 i; + + cqe_count = min_t(u32, cqd->left, budget_rem); + + for (i = update_owner_only; i < cqe_count; + i++, cqd->mini_arr_idx++, cqcc++) { + if (cqd->mini_arr_idx == MLX5_MINI_CQE_ARRAY_SIZE) + mlx5e_read_mini_arr_slot(wq, cqd, cqcc); + + mlx5e_decompress_cqe_no_hash(rq, wq, cqcc); + INDIRECT_CALL_3(rq->handle_rx_cqe, mlx5e_handle_rx_cqe_mpwrq, + mlx5e_handle_rx_cqe_mpwrq_shampo, mlx5e_handle_rx_cqe, + rq, &cqd->title); + } + mlx5e_cqes_update_owner(wq, cqcc - wq->cc); + wq->cc = cqcc; + cqd->left -= cqe_count; + rq->stats->cqe_compress_pkts += cqe_count; + + return cqe_count; +} + +static inline u32 mlx5e_decompress_cqes_start(struct mlx5e_rq *rq, + struct mlx5_cqwq *wq, + int budget_rem) +{ + struct mlx5e_cq_decomp *cqd = &rq->cqd; + u32 cc = wq->cc; + + mlx5e_read_title_slot(rq, wq, cc); + mlx5e_read_mini_arr_slot(wq, cqd, cc + 1); + mlx5e_decompress_cqe(rq, wq, cc); + INDIRECT_CALL_3(rq->handle_rx_cqe, mlx5e_handle_rx_cqe_mpwrq, + mlx5e_handle_rx_cqe_mpwrq_shampo, mlx5e_handle_rx_cqe, + rq, &cqd->title); + cqd->mini_arr_idx++; + + return mlx5e_decompress_cqes_cont(rq, wq, 1, budget_rem) - 1; +} + +static inline bool mlx5e_rx_cache_put(struct mlx5e_rq *rq, struct page *page) +{ + struct mlx5e_page_cache *cache = &rq->page_cache; + u32 tail_next = (cache->tail + 1) & (MLX5E_CACHE_SIZE - 1); + struct mlx5e_rq_stats *stats = rq->stats; + + if (tail_next == cache->head) { + stats->cache_full++; + return false; + } + + if (!dev_page_is_reusable(page)) { + stats->cache_waive++; + return false; + } + + cache->page_cache[cache->tail] = page; + cache->tail = tail_next; + return true; +} + +static inline bool mlx5e_rx_cache_get(struct mlx5e_rq *rq, union mlx5e_alloc_unit *au) +{ + struct mlx5e_page_cache *cache = &rq->page_cache; + struct mlx5e_rq_stats *stats = rq->stats; + dma_addr_t addr; + + if (unlikely(cache->head == cache->tail)) { + stats->cache_empty++; + return false; + } + + if (page_ref_count(cache->page_cache[cache->head]) != 1) { + stats->cache_busy++; + return false; + } + + au->page = cache->page_cache[cache->head]; + cache->head = (cache->head + 1) & (MLX5E_CACHE_SIZE - 1); + stats->cache_reuse++; + + addr = page_pool_get_dma_addr(au->page); + /* Non-XSK always uses PAGE_SIZE. */ + dma_sync_single_for_device(rq->pdev, addr, PAGE_SIZE, rq->buff.map_dir); + return true; +} + +static inline int mlx5e_page_alloc_pool(struct mlx5e_rq *rq, union mlx5e_alloc_unit *au) +{ + dma_addr_t addr; + + if (mlx5e_rx_cache_get(rq, au)) + return 0; + + au->page = page_pool_dev_alloc_pages(rq->page_pool); + if (unlikely(!au->page)) + return -ENOMEM; + + /* Non-XSK always uses PAGE_SIZE. */ + addr = dma_map_page(rq->pdev, au->page, 0, PAGE_SIZE, rq->buff.map_dir); + if (unlikely(dma_mapping_error(rq->pdev, addr))) { + page_pool_recycle_direct(rq->page_pool, au->page); + au->page = NULL; + return -ENOMEM; + } + page_pool_set_dma_addr(au->page, addr); + + return 0; +} + +void mlx5e_page_dma_unmap(struct mlx5e_rq *rq, struct page *page) +{ + dma_addr_t dma_addr = page_pool_get_dma_addr(page); + + dma_unmap_page_attrs(rq->pdev, dma_addr, PAGE_SIZE, rq->buff.map_dir, + DMA_ATTR_SKIP_CPU_SYNC); + page_pool_set_dma_addr(page, 0); +} + +void mlx5e_page_release_dynamic(struct mlx5e_rq *rq, struct page *page, bool recycle) +{ + if (likely(recycle)) { + if (mlx5e_rx_cache_put(rq, page)) + return; + + mlx5e_page_dma_unmap(rq, page); + page_pool_recycle_direct(rq->page_pool, page); + } else { + mlx5e_page_dma_unmap(rq, page); + page_pool_release_page(rq->page_pool, page); + put_page(page); + } +} + +static inline int mlx5e_get_rx_frag(struct mlx5e_rq *rq, + struct mlx5e_wqe_frag_info *frag) +{ + int err = 0; + + if (!frag->offset) + /* On first frag (offset == 0), replenish page (alloc_unit actually). + * Other frags that point to the same alloc_unit (with a different + * offset) should just use the new one without replenishing again + * by themselves. + */ + err = mlx5e_page_alloc_pool(rq, frag->au); + + return err; +} + +static inline void mlx5e_put_rx_frag(struct mlx5e_rq *rq, + struct mlx5e_wqe_frag_info *frag, + bool recycle) +{ + if (frag->last_in_page) + mlx5e_page_release_dynamic(rq, frag->au->page, recycle); +} + +static inline struct mlx5e_wqe_frag_info *get_frag(struct mlx5e_rq *rq, u16 ix) +{ + return &rq->wqe.frags[ix << rq->wqe.info.log_num_frags]; +} + +static int mlx5e_alloc_rx_wqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe_cyc *wqe, + u16 ix) +{ + struct mlx5e_wqe_frag_info *frag = get_frag(rq, ix); + int err; + int i; + + for (i = 0; i < rq->wqe.info.num_frags; i++, frag++) { + dma_addr_t addr; + u16 headroom; + + err = mlx5e_get_rx_frag(rq, frag); + if (unlikely(err)) + goto free_frags; + + headroom = i == 0 ? rq->buff.headroom : 0; + addr = page_pool_get_dma_addr(frag->au->page); + wqe->data[i].addr = cpu_to_be64(addr + frag->offset + headroom); + } + + return 0; + +free_frags: + while (--i >= 0) + mlx5e_put_rx_frag(rq, --frag, true); + + return err; +} + +static inline void mlx5e_free_rx_wqe(struct mlx5e_rq *rq, + struct mlx5e_wqe_frag_info *wi, + bool recycle) +{ + int i; + + if (rq->xsk_pool) { + /* The `recycle` parameter is ignored, and the page is always + * put into the Reuse Ring, because there is no way to return + * the page to the userspace when the interface goes down. + */ + xsk_buff_free(wi->au->xsk); + return; + } + + for (i = 0; i < rq->wqe.info.num_frags; i++, wi++) + mlx5e_put_rx_frag(rq, wi, recycle); +} + +static void mlx5e_dealloc_rx_wqe(struct mlx5e_rq *rq, u16 ix) +{ + struct mlx5e_wqe_frag_info *wi = get_frag(rq, ix); + + mlx5e_free_rx_wqe(rq, wi, false); +} + +static int mlx5e_alloc_rx_wqes(struct mlx5e_rq *rq, u16 ix, int wqe_bulk) +{ + struct mlx5_wq_cyc *wq = &rq->wqe.wq; + int i; + + for (i = 0; i < wqe_bulk; i++) { + int j = mlx5_wq_cyc_ctr2ix(wq, ix + i); + struct mlx5e_rx_wqe_cyc *wqe; + + wqe = mlx5_wq_cyc_get_wqe(wq, j); + + if (unlikely(mlx5e_alloc_rx_wqe(rq, wqe, j))) + break; + } + + return i; +} + +static inline void +mlx5e_add_skb_frag(struct mlx5e_rq *rq, struct sk_buff *skb, + union mlx5e_alloc_unit *au, u32 frag_offset, u32 len, + unsigned int truesize) +{ + dma_addr_t addr = page_pool_get_dma_addr(au->page); + + dma_sync_single_for_cpu(rq->pdev, addr + frag_offset, len, + rq->buff.map_dir); + page_ref_inc(au->page); + skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, + au->page, frag_offset, len, truesize); +} + +static inline void +mlx5e_copy_skb_header(struct mlx5e_rq *rq, struct sk_buff *skb, + struct page *page, dma_addr_t addr, + int offset_from, int dma_offset, u32 headlen) +{ + const void *from = page_address(page) + offset_from; + /* Aligning len to sizeof(long) optimizes memcpy performance */ + unsigned int len = ALIGN(headlen, sizeof(long)); + + dma_sync_single_for_cpu(rq->pdev, addr + dma_offset, len, + rq->buff.map_dir); + skb_copy_to_linear_data(skb, from, len); +} + +static void +mlx5e_free_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, bool recycle) +{ + union mlx5e_alloc_unit *alloc_units = wi->alloc_units; + bool no_xdp_xmit; + int i; + + /* A common case for AF_XDP. */ + if (bitmap_full(wi->xdp_xmit_bitmap, rq->mpwqe.pages_per_wqe)) + return; + + no_xdp_xmit = bitmap_empty(wi->xdp_xmit_bitmap, rq->mpwqe.pages_per_wqe); + + if (rq->xsk_pool) { + /* The `recycle` parameter is ignored, and the page is always + * put into the Reuse Ring, because there is no way to return + * the page to the userspace when the interface goes down. + */ + for (i = 0; i < rq->mpwqe.pages_per_wqe; i++) + if (no_xdp_xmit || !test_bit(i, wi->xdp_xmit_bitmap)) + xsk_buff_free(alloc_units[i].xsk); + } else { + for (i = 0; i < rq->mpwqe.pages_per_wqe; i++) + if (no_xdp_xmit || !test_bit(i, wi->xdp_xmit_bitmap)) + mlx5e_page_release_dynamic(rq, alloc_units[i].page, recycle); + } +} + +static void mlx5e_post_rx_mpwqe(struct mlx5e_rq *rq, u8 n) +{ + struct mlx5_wq_ll *wq = &rq->mpwqe.wq; + + do { + u16 next_wqe_index = mlx5_wq_ll_get_wqe_next_ix(wq, wq->head); + + mlx5_wq_ll_push(wq, next_wqe_index); + } while (--n); + + /* ensure wqes are visible to device before updating doorbell record */ + dma_wmb(); + + mlx5_wq_ll_update_db_record(wq); +} + +/* This function returns the size of the continuous free space inside a bitmap + * that starts from first and no longer than len including circular ones. + */ +static int bitmap_find_window(unsigned long *bitmap, int len, + int bitmap_size, int first) +{ + int next_one, count; + + next_one = find_next_bit(bitmap, bitmap_size, first); + if (next_one == bitmap_size) { + if (bitmap_size - first >= len) + return len; + next_one = find_next_bit(bitmap, bitmap_size, 0); + count = next_one + bitmap_size - first; + } else { + count = next_one - first; + } + + return min(len, count); +} + +static void build_klm_umr(struct mlx5e_icosq *sq, struct mlx5e_umr_wqe *umr_wqe, + __be32 key, u16 offset, u16 klm_len, u16 wqe_bbs) +{ + memset(umr_wqe, 0, offsetof(struct mlx5e_umr_wqe, inline_klms)); + umr_wqe->ctrl.opmod_idx_opcode = + cpu_to_be32((sq->pc << MLX5_WQE_CTRL_WQE_INDEX_SHIFT) | + MLX5_OPCODE_UMR); + umr_wqe->ctrl.umr_mkey = key; + umr_wqe->ctrl.qpn_ds = cpu_to_be32((sq->sqn << MLX5_WQE_CTRL_QPN_SHIFT) + | MLX5E_KLM_UMR_DS_CNT(klm_len)); + umr_wqe->uctrl.flags = MLX5_UMR_TRANSLATION_OFFSET_EN | MLX5_UMR_INLINE; + umr_wqe->uctrl.xlt_offset = cpu_to_be16(offset); + umr_wqe->uctrl.xlt_octowords = cpu_to_be16(klm_len); + umr_wqe->uctrl.mkey_mask = cpu_to_be64(MLX5_MKEY_MASK_FREE); +} + +static int mlx5e_build_shampo_hd_umr(struct mlx5e_rq *rq, + struct mlx5e_icosq *sq, + u16 klm_entries, u16 index) +{ + struct mlx5e_shampo_hd *shampo = rq->mpwqe.shampo; + u16 entries, pi, header_offset, err, wqe_bbs, new_entries; + u32 lkey = rq->mdev->mlx5e_res.hw_objs.mkey; + struct page *page = shampo->last_page; + u64 addr = shampo->last_addr; + struct mlx5e_dma_info *dma_info; + struct mlx5e_umr_wqe *umr_wqe; + int headroom, i; + + headroom = rq->buff.headroom; + new_entries = klm_entries - (shampo->pi & (MLX5_UMR_KLM_ALIGNMENT - 1)); + entries = ALIGN(klm_entries, MLX5_UMR_KLM_ALIGNMENT); + wqe_bbs = MLX5E_KLM_UMR_WQEBBS(entries); + pi = mlx5e_icosq_get_next_pi(sq, wqe_bbs); + umr_wqe = mlx5_wq_cyc_get_wqe(&sq->wq, pi); + build_klm_umr(sq, umr_wqe, shampo->key, index, entries, wqe_bbs); + + for (i = 0; i < entries; i++, index++) { + dma_info = &shampo->info[index]; + if (i >= klm_entries || (index < shampo->pi && shampo->pi - index < + MLX5_UMR_KLM_ALIGNMENT)) + goto update_klm; + header_offset = (index & (MLX5E_SHAMPO_WQ_HEADER_PER_PAGE - 1)) << + MLX5E_SHAMPO_LOG_MAX_HEADER_ENTRY_SIZE; + if (!(header_offset & (PAGE_SIZE - 1))) { + union mlx5e_alloc_unit au; + + err = mlx5e_page_alloc_pool(rq, &au); + if (unlikely(err)) + goto err_unmap; + page = dma_info->page = au.page; + addr = dma_info->addr = page_pool_get_dma_addr(au.page); + } else { + dma_info->addr = addr + header_offset; + dma_info->page = page; + } + +update_klm: + umr_wqe->inline_klms[i].bcount = + cpu_to_be32(MLX5E_RX_MAX_HEAD); + umr_wqe->inline_klms[i].key = cpu_to_be32(lkey); + umr_wqe->inline_klms[i].va = + cpu_to_be64(dma_info->addr + headroom); + } + + sq->db.wqe_info[pi] = (struct mlx5e_icosq_wqe_info) { + .wqe_type = MLX5E_ICOSQ_WQE_SHAMPO_HD_UMR, + .num_wqebbs = wqe_bbs, + .shampo.len = new_entries, + }; + + shampo->pi = (shampo->pi + new_entries) & (shampo->hd_per_wq - 1); + shampo->last_page = page; + shampo->last_addr = addr; + sq->pc += wqe_bbs; + sq->doorbell_cseg = &umr_wqe->ctrl; + + return 0; + +err_unmap: + while (--i >= 0) { + dma_info = &shampo->info[--index]; + if (!(i & (MLX5E_SHAMPO_WQ_HEADER_PER_PAGE - 1))) { + dma_info->addr = ALIGN_DOWN(dma_info->addr, PAGE_SIZE); + mlx5e_page_release_dynamic(rq, dma_info->page, true); + } + } + rq->stats->buff_alloc_err++; + return err; +} + +static int mlx5e_alloc_rx_hd_mpwqe(struct mlx5e_rq *rq) +{ + struct mlx5e_shampo_hd *shampo = rq->mpwqe.shampo; + u16 klm_entries, num_wqe, index, entries_before; + struct mlx5e_icosq *sq = rq->icosq; + int i, err, max_klm_entries, len; + + max_klm_entries = MLX5E_MAX_KLM_PER_WQE(rq->mdev); + klm_entries = bitmap_find_window(shampo->bitmap, + shampo->hd_per_wqe, + shampo->hd_per_wq, shampo->pi); + if (!klm_entries) + return 0; + + klm_entries += (shampo->pi & (MLX5_UMR_KLM_ALIGNMENT - 1)); + index = ALIGN_DOWN(shampo->pi, MLX5_UMR_KLM_ALIGNMENT); + entries_before = shampo->hd_per_wq - index; + + if (unlikely(entries_before < klm_entries)) + num_wqe = DIV_ROUND_UP(entries_before, max_klm_entries) + + DIV_ROUND_UP(klm_entries - entries_before, max_klm_entries); + else + num_wqe = DIV_ROUND_UP(klm_entries, max_klm_entries); + + for (i = 0; i < num_wqe; i++) { + len = (klm_entries > max_klm_entries) ? max_klm_entries : + klm_entries; + if (unlikely(index + len > shampo->hd_per_wq)) + len = shampo->hd_per_wq - index; + err = mlx5e_build_shampo_hd_umr(rq, sq, len, index); + if (unlikely(err)) + return err; + index = (index + len) & (rq->mpwqe.shampo->hd_per_wq - 1); + klm_entries -= len; + } + + return 0; +} + +static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) +{ + struct mlx5e_mpw_info *wi = mlx5e_get_mpw_info(rq, ix); + union mlx5e_alloc_unit *au = &wi->alloc_units[0]; + struct mlx5e_icosq *sq = rq->icosq; + struct mlx5_wq_cyc *wq = &sq->wq; + struct mlx5e_umr_wqe *umr_wqe; + u32 offset; /* 17-bit value with MTT. */ + u16 pi; + int err; + int i; + + if (test_bit(MLX5E_RQ_STATE_SHAMPO, &rq->state)) { + err = mlx5e_alloc_rx_hd_mpwqe(rq); + if (unlikely(err)) + goto err; + } + + pi = mlx5e_icosq_get_next_pi(sq, rq->mpwqe.umr_wqebbs); + umr_wqe = mlx5_wq_cyc_get_wqe(wq, pi); + memcpy(umr_wqe, &rq->mpwqe.umr_wqe, sizeof(struct mlx5e_umr_wqe)); + + for (i = 0; i < rq->mpwqe.pages_per_wqe; i++, au++) { + dma_addr_t addr; + + err = mlx5e_page_alloc_pool(rq, au); + if (unlikely(err)) + goto err_unmap; + addr = page_pool_get_dma_addr(au->page); + umr_wqe->inline_mtts[i] = (struct mlx5_mtt) { + .ptag = cpu_to_be64(addr | MLX5_EN_WR), + }; + } + + bitmap_zero(wi->xdp_xmit_bitmap, rq->mpwqe.pages_per_wqe); + wi->consumed_strides = 0; + + umr_wqe->ctrl.opmod_idx_opcode = + cpu_to_be32((sq->pc << MLX5_WQE_CTRL_WQE_INDEX_SHIFT) | + MLX5_OPCODE_UMR); + + offset = (ix * rq->mpwqe.mtts_per_wqe) * sizeof(struct mlx5_mtt) / MLX5_OCTWORD; + umr_wqe->uctrl.xlt_offset = cpu_to_be16(offset); + + sq->db.wqe_info[pi] = (struct mlx5e_icosq_wqe_info) { + .wqe_type = MLX5E_ICOSQ_WQE_UMR_RX, + .num_wqebbs = rq->mpwqe.umr_wqebbs, + .umr.rq = rq, + }; + + sq->pc += rq->mpwqe.umr_wqebbs; + + sq->doorbell_cseg = &umr_wqe->ctrl; + + return 0; + +err_unmap: + while (--i >= 0) { + au--; + mlx5e_page_release_dynamic(rq, au->page, true); + } + +err: + rq->stats->buff_alloc_err++; + + return err; +} + +/* This function is responsible to dealloc SHAMPO header buffer. + * close == true specifies that we are in the middle of closing RQ operation so + * we go over all the entries and if they are not in use we free them, + * otherwise we only go over a specific range inside the header buffer that are + * not in use. + */ +void mlx5e_shampo_dealloc_hd(struct mlx5e_rq *rq, u16 len, u16 start, bool close) +{ + struct mlx5e_shampo_hd *shampo = rq->mpwqe.shampo; + int hd_per_wq = shampo->hd_per_wq; + struct page *deleted_page = NULL; + struct mlx5e_dma_info *hd_info; + int i, index = start; + + for (i = 0; i < len; i++, index++) { + if (index == hd_per_wq) + index = 0; + + if (close && !test_bit(index, shampo->bitmap)) + continue; + + hd_info = &shampo->info[index]; + hd_info->addr = ALIGN_DOWN(hd_info->addr, PAGE_SIZE); + if (hd_info->page != deleted_page) { + deleted_page = hd_info->page; + mlx5e_page_release_dynamic(rq, hd_info->page, false); + } + } + + if (start + len > hd_per_wq) { + len -= hd_per_wq - start; + bitmap_clear(shampo->bitmap, start, hd_per_wq - start); + start = 0; + } + + bitmap_clear(shampo->bitmap, start, len); +} + +static void mlx5e_dealloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) +{ + struct mlx5e_mpw_info *wi = mlx5e_get_mpw_info(rq, ix); + /* Don't recycle, this function is called on rq/netdev close */ + mlx5e_free_rx_mpwqe(rq, wi, false); +} + +INDIRECT_CALLABLE_SCOPE bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq) +{ + struct mlx5_wq_cyc *wq = &rq->wqe.wq; + int wqe_bulk, count; + bool busy = false; + u16 head; + + if (unlikely(!test_bit(MLX5E_RQ_STATE_ENABLED, &rq->state))) + return false; + + if (mlx5_wq_cyc_missing(wq) < rq->wqe.info.wqe_bulk) + return false; + + if (rq->page_pool) + page_pool_nid_changed(rq->page_pool, numa_mem_id()); + + wqe_bulk = mlx5_wq_cyc_missing(wq); + head = mlx5_wq_cyc_get_head(wq); + + /* Don't allow any newly allocated WQEs to share the same page with old + * WQEs that aren't completed yet. Stop earlier. + */ + wqe_bulk -= (head + wqe_bulk) & rq->wqe.info.wqe_index_mask; + + if (!rq->xsk_pool) + count = mlx5e_alloc_rx_wqes(rq, head, wqe_bulk); + else if (likely(!rq->xsk_pool->dma_need_sync)) + count = mlx5e_xsk_alloc_rx_wqes_batched(rq, head, wqe_bulk); + else + /* If dma_need_sync is true, it's more efficient to call + * xsk_buff_alloc in a loop, rather than xsk_buff_alloc_batch, + * because the latter does the same check and returns only one + * frame. + */ + count = mlx5e_xsk_alloc_rx_wqes(rq, head, wqe_bulk); + + mlx5_wq_cyc_push_n(wq, count); + if (unlikely(count != wqe_bulk)) { + rq->stats->buff_alloc_err++; + busy = true; + } + + /* ensure wqes are visible to device before updating doorbell record */ + dma_wmb(); + + mlx5_wq_cyc_update_db_record(wq); + + return busy; +} + +void mlx5e_free_icosq_descs(struct mlx5e_icosq *sq) +{ + u16 sqcc; + + sqcc = sq->cc; + + while (sqcc != sq->pc) { + struct mlx5e_icosq_wqe_info *wi; + u16 ci; + + ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sqcc); + wi = &sq->db.wqe_info[ci]; + sqcc += wi->num_wqebbs; +#ifdef CONFIG_MLX5_EN_TLS + switch (wi->wqe_type) { + case MLX5E_ICOSQ_WQE_SET_PSV_TLS: + mlx5e_ktls_handle_ctx_completion(wi); + break; + case MLX5E_ICOSQ_WQE_GET_PSV_TLS: + mlx5e_ktls_handle_get_psv_completion(wi, sq); + break; + } +#endif + } + sq->cc = sqcc; +} + +static void mlx5e_handle_shampo_hd_umr(struct mlx5e_shampo_umr umr, + struct mlx5e_icosq *sq) +{ + struct mlx5e_channel *c = container_of(sq, struct mlx5e_channel, icosq); + struct mlx5e_shampo_hd *shampo; + /* assume 1:1 relationship between RQ and icosq */ + struct mlx5e_rq *rq = &c->rq; + int end, from, len = umr.len; + + shampo = rq->mpwqe.shampo; + end = shampo->hd_per_wq; + from = shampo->ci; + if (from + len > shampo->hd_per_wq) { + len -= end - from; + bitmap_set(shampo->bitmap, from, end - from); + from = 0; + } + + bitmap_set(shampo->bitmap, from, len); + shampo->ci = (shampo->ci + umr.len) & (shampo->hd_per_wq - 1); +} + +int mlx5e_poll_ico_cq(struct mlx5e_cq *cq) +{ + struct mlx5e_icosq *sq = container_of(cq, struct mlx5e_icosq, cq); + struct mlx5_cqe64 *cqe; + u16 sqcc; + int i; + + if (unlikely(!test_bit(MLX5E_SQ_STATE_ENABLED, &sq->state))) + return 0; + + cqe = mlx5_cqwq_get_cqe(&cq->wq); + if (likely(!cqe)) + return 0; + + /* sq->cc must be updated only after mlx5_cqwq_update_db_record(), + * otherwise a cq overrun may occur + */ + sqcc = sq->cc; + + i = 0; + do { + u16 wqe_counter; + bool last_wqe; + + mlx5_cqwq_pop(&cq->wq); + + wqe_counter = be16_to_cpu(cqe->wqe_counter); + + do { + struct mlx5e_icosq_wqe_info *wi; + u16 ci; + + last_wqe = (sqcc == wqe_counter); + + ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sqcc); + wi = &sq->db.wqe_info[ci]; + sqcc += wi->num_wqebbs; + + if (last_wqe && unlikely(get_cqe_opcode(cqe) != MLX5_CQE_REQ)) { + netdev_WARN_ONCE(cq->netdev, + "Bad OP in ICOSQ CQE: 0x%x\n", + get_cqe_opcode(cqe)); + mlx5e_dump_error_cqe(&sq->cq, sq->sqn, + (struct mlx5_err_cqe *)cqe); + mlx5_wq_cyc_wqe_dump(&sq->wq, ci, wi->num_wqebbs); + if (!test_and_set_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state)) + queue_work(cq->priv->wq, &sq->recover_work); + break; + } + + switch (wi->wqe_type) { + case MLX5E_ICOSQ_WQE_UMR_RX: + wi->umr.rq->mpwqe.umr_completed++; + break; + case MLX5E_ICOSQ_WQE_NOP: + break; + case MLX5E_ICOSQ_WQE_SHAMPO_HD_UMR: + mlx5e_handle_shampo_hd_umr(wi->shampo, sq); + break; +#ifdef CONFIG_MLX5_EN_TLS + case MLX5E_ICOSQ_WQE_UMR_TLS: + break; + case MLX5E_ICOSQ_WQE_SET_PSV_TLS: + mlx5e_ktls_handle_ctx_completion(wi); + break; + case MLX5E_ICOSQ_WQE_GET_PSV_TLS: + mlx5e_ktls_handle_get_psv_completion(wi, sq); + break; +#endif + default: + netdev_WARN_ONCE(cq->netdev, + "Bad WQE type in ICOSQ WQE info: 0x%x\n", + wi->wqe_type); + } + } while (!last_wqe); + } while ((++i < MLX5E_TX_CQ_POLL_BUDGET) && (cqe = mlx5_cqwq_get_cqe(&cq->wq))); + + sq->cc = sqcc; + + mlx5_cqwq_update_db_record(&cq->wq); + + return i; +} + +INDIRECT_CALLABLE_SCOPE bool mlx5e_post_rx_mpwqes(struct mlx5e_rq *rq) +{ + struct mlx5_wq_ll *wq = &rq->mpwqe.wq; + u8 umr_completed = rq->mpwqe.umr_completed; + struct mlx5e_icosq *sq = rq->icosq; + int alloc_err = 0; + u8 missing, i; + u16 head; + + if (unlikely(!test_bit(MLX5E_RQ_STATE_ENABLED, &rq->state))) + return false; + + if (umr_completed) { + mlx5e_post_rx_mpwqe(rq, umr_completed); + rq->mpwqe.umr_in_progress -= umr_completed; + rq->mpwqe.umr_completed = 0; + } + + missing = mlx5_wq_ll_missing(wq) - rq->mpwqe.umr_in_progress; + + if (unlikely(rq->mpwqe.umr_in_progress > rq->mpwqe.umr_last_bulk)) + rq->stats->congst_umr++; + + if (likely(missing < rq->mpwqe.min_wqe_bulk)) + return false; + + if (rq->page_pool) + page_pool_nid_changed(rq->page_pool, numa_mem_id()); + + head = rq->mpwqe.actual_wq_head; + i = missing; + do { + alloc_err = rq->xsk_pool ? mlx5e_xsk_alloc_rx_mpwqe(rq, head) : + mlx5e_alloc_rx_mpwqe(rq, head); + + if (unlikely(alloc_err)) + break; + head = mlx5_wq_ll_get_wqe_next_ix(wq, head); + } while (--i); + + rq->mpwqe.umr_last_bulk = missing - i; + if (sq->doorbell_cseg) { + mlx5e_notify_hw(&sq->wq, sq->pc, sq->uar_map, sq->doorbell_cseg); + sq->doorbell_cseg = NULL; + } + + rq->mpwqe.umr_in_progress += rq->mpwqe.umr_last_bulk; + rq->mpwqe.actual_wq_head = head; + + /* If XSK Fill Ring doesn't have enough frames, report the error, so + * that one of the actions can be performed: + * 1. If need_wakeup is used, signal that the application has to kick + * the driver when it refills the Fill Ring. + * 2. Otherwise, busy poll by rescheduling the NAPI poll. + */ + if (unlikely(alloc_err == -ENOMEM && rq->xsk_pool)) + return true; + + return false; +} + +static void mlx5e_lro_update_tcp_hdr(struct mlx5_cqe64 *cqe, struct tcphdr *tcp) +{ + u8 l4_hdr_type = get_cqe_l4_hdr_type(cqe); + u8 tcp_ack = (l4_hdr_type == CQE_L4_HDR_TYPE_TCP_ACK_NO_DATA) || + (l4_hdr_type == CQE_L4_HDR_TYPE_TCP_ACK_AND_DATA); + + tcp->check = 0; + tcp->psh = get_cqe_lro_tcppsh(cqe); + + if (tcp_ack) { + tcp->ack = 1; + tcp->ack_seq = cqe->lro.ack_seq_num; + tcp->window = cqe->lro.tcp_win; + } +} + +static void mlx5e_lro_update_hdr(struct sk_buff *skb, struct mlx5_cqe64 *cqe, + u32 cqe_bcnt) +{ + struct ethhdr *eth = (struct ethhdr *)(skb->data); + struct tcphdr *tcp; + int network_depth = 0; + __wsum check; + __be16 proto; + u16 tot_len; + void *ip_p; + + proto = __vlan_get_protocol(skb, eth->h_proto, &network_depth); + + tot_len = cqe_bcnt - network_depth; + ip_p = skb->data + network_depth; + + if (proto == htons(ETH_P_IP)) { + struct iphdr *ipv4 = ip_p; + + tcp = ip_p + sizeof(struct iphdr); + skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4; + + ipv4->ttl = cqe->lro.min_ttl; + ipv4->tot_len = cpu_to_be16(tot_len); + ipv4->check = 0; + ipv4->check = ip_fast_csum((unsigned char *)ipv4, + ipv4->ihl); + + mlx5e_lro_update_tcp_hdr(cqe, tcp); + check = csum_partial(tcp, tcp->doff * 4, + csum_unfold((__force __sum16)cqe->check_sum)); + /* Almost done, don't forget the pseudo header */ + tcp->check = csum_tcpudp_magic(ipv4->saddr, ipv4->daddr, + tot_len - sizeof(struct iphdr), + IPPROTO_TCP, check); + } else { + u16 payload_len = tot_len - sizeof(struct ipv6hdr); + struct ipv6hdr *ipv6 = ip_p; + + tcp = ip_p + sizeof(struct ipv6hdr); + skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6; + + ipv6->hop_limit = cqe->lro.min_ttl; + ipv6->payload_len = cpu_to_be16(payload_len); + + mlx5e_lro_update_tcp_hdr(cqe, tcp); + check = csum_partial(tcp, tcp->doff * 4, + csum_unfold((__force __sum16)cqe->check_sum)); + /* Almost done, don't forget the pseudo header */ + tcp->check = csum_ipv6_magic(&ipv6->saddr, &ipv6->daddr, payload_len, + IPPROTO_TCP, check); + } +} + +static void *mlx5e_shampo_get_packet_hd(struct mlx5e_rq *rq, u16 header_index) +{ + struct mlx5e_dma_info *last_head = &rq->mpwqe.shampo->info[header_index]; + u16 head_offset = (last_head->addr & (PAGE_SIZE - 1)) + rq->buff.headroom; + + return page_address(last_head->page) + head_offset; +} + +static void mlx5e_shampo_update_ipv4_udp_hdr(struct mlx5e_rq *rq, struct iphdr *ipv4) +{ + int udp_off = rq->hw_gro_data->fk.control.thoff; + struct sk_buff *skb = rq->hw_gro_data->skb; + struct udphdr *uh; + + uh = (struct udphdr *)(skb->data + udp_off); + uh->len = htons(skb->len - udp_off); + + if (uh->check) + uh->check = ~udp_v4_check(skb->len - udp_off, ipv4->saddr, + ipv4->daddr, 0); + + skb->csum_start = (unsigned char *)uh - skb->head; + skb->csum_offset = offsetof(struct udphdr, check); + + skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_L4; +} + +static void mlx5e_shampo_update_ipv6_udp_hdr(struct mlx5e_rq *rq, struct ipv6hdr *ipv6) +{ + int udp_off = rq->hw_gro_data->fk.control.thoff; + struct sk_buff *skb = rq->hw_gro_data->skb; + struct udphdr *uh; + + uh = (struct udphdr *)(skb->data + udp_off); + uh->len = htons(skb->len - udp_off); + + if (uh->check) + uh->check = ~udp_v6_check(skb->len - udp_off, &ipv6->saddr, + &ipv6->daddr, 0); + + skb->csum_start = (unsigned char *)uh - skb->head; + skb->csum_offset = offsetof(struct udphdr, check); + + skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_L4; +} + +static void mlx5e_shampo_update_fin_psh_flags(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe, + struct tcphdr *skb_tcp_hd) +{ + u16 header_index = mlx5e_shampo_get_cqe_header_index(rq, cqe); + struct tcphdr *last_tcp_hd; + void *last_hd_addr; + + last_hd_addr = mlx5e_shampo_get_packet_hd(rq, header_index); + last_tcp_hd = last_hd_addr + ETH_HLEN + rq->hw_gro_data->fk.control.thoff; + tcp_flag_word(skb_tcp_hd) |= tcp_flag_word(last_tcp_hd) & (TCP_FLAG_FIN | TCP_FLAG_PSH); +} + +static void mlx5e_shampo_update_ipv4_tcp_hdr(struct mlx5e_rq *rq, struct iphdr *ipv4, + struct mlx5_cqe64 *cqe, bool match) +{ + int tcp_off = rq->hw_gro_data->fk.control.thoff; + struct sk_buff *skb = rq->hw_gro_data->skb; + struct tcphdr *tcp; + + tcp = (struct tcphdr *)(skb->data + tcp_off); + if (match) + mlx5e_shampo_update_fin_psh_flags(rq, cqe, tcp); + + tcp->check = ~tcp_v4_check(skb->len - tcp_off, ipv4->saddr, + ipv4->daddr, 0); + skb_shinfo(skb)->gso_type |= SKB_GSO_TCPV4; + if (ntohs(ipv4->id) == rq->hw_gro_data->second_ip_id) + skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_FIXEDID; + + skb->csum_start = (unsigned char *)tcp - skb->head; + skb->csum_offset = offsetof(struct tcphdr, check); + + if (tcp->cwr) + skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN; +} + +static void mlx5e_shampo_update_ipv6_tcp_hdr(struct mlx5e_rq *rq, struct ipv6hdr *ipv6, + struct mlx5_cqe64 *cqe, bool match) +{ + int tcp_off = rq->hw_gro_data->fk.control.thoff; + struct sk_buff *skb = rq->hw_gro_data->skb; + struct tcphdr *tcp; + + tcp = (struct tcphdr *)(skb->data + tcp_off); + if (match) + mlx5e_shampo_update_fin_psh_flags(rq, cqe, tcp); + + tcp->check = ~tcp_v6_check(skb->len - tcp_off, &ipv6->saddr, + &ipv6->daddr, 0); + skb_shinfo(skb)->gso_type |= SKB_GSO_TCPV6; + skb->csum_start = (unsigned char *)tcp - skb->head; + skb->csum_offset = offsetof(struct tcphdr, check); + + if (tcp->cwr) + skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN; +} + +static void mlx5e_shampo_update_hdr(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe, bool match) +{ + bool is_ipv4 = (rq->hw_gro_data->fk.basic.n_proto == htons(ETH_P_IP)); + struct sk_buff *skb = rq->hw_gro_data->skb; + + skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count; + skb->ip_summed = CHECKSUM_PARTIAL; + + if (is_ipv4) { + int nhoff = rq->hw_gro_data->fk.control.thoff - sizeof(struct iphdr); + struct iphdr *ipv4 = (struct iphdr *)(skb->data + nhoff); + __be16 newlen = htons(skb->len - nhoff); + + csum_replace2(&ipv4->check, ipv4->tot_len, newlen); + ipv4->tot_len = newlen; + + if (ipv4->protocol == IPPROTO_TCP) + mlx5e_shampo_update_ipv4_tcp_hdr(rq, ipv4, cqe, match); + else + mlx5e_shampo_update_ipv4_udp_hdr(rq, ipv4); + } else { + int nhoff = rq->hw_gro_data->fk.control.thoff - sizeof(struct ipv6hdr); + struct ipv6hdr *ipv6 = (struct ipv6hdr *)(skb->data + nhoff); + + ipv6->payload_len = htons(skb->len - nhoff - sizeof(*ipv6)); + + if (ipv6->nexthdr == IPPROTO_TCP) + mlx5e_shampo_update_ipv6_tcp_hdr(rq, ipv6, cqe, match); + else + mlx5e_shampo_update_ipv6_udp_hdr(rq, ipv6); + } +} + +static inline void mlx5e_skb_set_hash(struct mlx5_cqe64 *cqe, + struct sk_buff *skb) +{ + u8 cht = cqe->rss_hash_type; + int ht = (cht & CQE_RSS_HTYPE_L4) ? PKT_HASH_TYPE_L4 : + (cht & CQE_RSS_HTYPE_IP) ? PKT_HASH_TYPE_L3 : + PKT_HASH_TYPE_NONE; + skb_set_hash(skb, be32_to_cpu(cqe->rss_hash_result), ht); +} + +static inline bool is_last_ethertype_ip(struct sk_buff *skb, int *network_depth, + __be16 *proto) +{ + *proto = ((struct ethhdr *)skb->data)->h_proto; + *proto = __vlan_get_protocol(skb, *proto, network_depth); + + if (*proto == htons(ETH_P_IP)) + return pskb_may_pull(skb, *network_depth + sizeof(struct iphdr)); + + if (*proto == htons(ETH_P_IPV6)) + return pskb_may_pull(skb, *network_depth + sizeof(struct ipv6hdr)); + + return false; +} + +static inline void mlx5e_enable_ecn(struct mlx5e_rq *rq, struct sk_buff *skb) +{ + int network_depth = 0; + __be16 proto; + void *ip; + int rc; + + if (unlikely(!is_last_ethertype_ip(skb, &network_depth, &proto))) + return; + + ip = skb->data + network_depth; + rc = ((proto == htons(ETH_P_IP)) ? IP_ECN_set_ce((struct iphdr *)ip) : + IP6_ECN_set_ce(skb, (struct ipv6hdr *)ip)); + + rq->stats->ecn_mark += !!rc; +} + +static u8 get_ip_proto(struct sk_buff *skb, int network_depth, __be16 proto) +{ + void *ip_p = skb->data + network_depth; + + return (proto == htons(ETH_P_IP)) ? ((struct iphdr *)ip_p)->protocol : + ((struct ipv6hdr *)ip_p)->nexthdr; +} + +#define short_frame(size) ((size) <= ETH_ZLEN + ETH_FCS_LEN) + +#define MAX_PADDING 8 + +static void +tail_padding_csum_slow(struct sk_buff *skb, int offset, int len, + struct mlx5e_rq_stats *stats) +{ + stats->csum_complete_tail_slow++; + skb->csum = csum_block_add(skb->csum, + skb_checksum(skb, offset, len, 0), + offset); +} + +static void +tail_padding_csum(struct sk_buff *skb, int offset, + struct mlx5e_rq_stats *stats) +{ + u8 tail_padding[MAX_PADDING]; + int len = skb->len - offset; + void *tail; + + if (unlikely(len > MAX_PADDING)) { + tail_padding_csum_slow(skb, offset, len, stats); + return; + } + + tail = skb_header_pointer(skb, offset, len, tail_padding); + if (unlikely(!tail)) { + tail_padding_csum_slow(skb, offset, len, stats); + return; + } + + stats->csum_complete_tail++; + skb->csum = csum_block_add(skb->csum, csum_partial(tail, len, 0), offset); +} + +static void +mlx5e_skb_csum_fixup(struct sk_buff *skb, int network_depth, __be16 proto, + struct mlx5e_rq_stats *stats) +{ + struct ipv6hdr *ip6; + struct iphdr *ip4; + int pkt_len; + + /* Fixup vlan headers, if any */ + if (network_depth > ETH_HLEN) + /* CQE csum is calculated from the IP header and does + * not cover VLAN headers (if present). This will add + * the checksum manually. + */ + skb->csum = csum_partial(skb->data + ETH_HLEN, + network_depth - ETH_HLEN, + skb->csum); + + /* Fixup tail padding, if any */ + switch (proto) { + case htons(ETH_P_IP): + ip4 = (struct iphdr *)(skb->data + network_depth); + pkt_len = network_depth + ntohs(ip4->tot_len); + break; + case htons(ETH_P_IPV6): + ip6 = (struct ipv6hdr *)(skb->data + network_depth); + pkt_len = network_depth + sizeof(*ip6) + ntohs(ip6->payload_len); + break; + default: + return; + } + + if (likely(pkt_len >= skb->len)) + return; + + tail_padding_csum(skb, pkt_len, stats); +} + +static inline void mlx5e_handle_csum(struct net_device *netdev, + struct mlx5_cqe64 *cqe, + struct mlx5e_rq *rq, + struct sk_buff *skb, + bool lro) +{ + struct mlx5e_rq_stats *stats = rq->stats; + int network_depth = 0; + __be16 proto; + + if (unlikely(!(netdev->features & NETIF_F_RXCSUM))) + goto csum_none; + + if (lro) { + skb->ip_summed = CHECKSUM_UNNECESSARY; + stats->csum_unnecessary++; + return; + } + + /* True when explicitly set via priv flag, or XDP prog is loaded */ + if (test_bit(MLX5E_RQ_STATE_NO_CSUM_COMPLETE, &rq->state) || + get_cqe_tls_offload(cqe)) + goto csum_unnecessary; + + /* CQE csum doesn't cover padding octets in short ethernet + * frames. And the pad field is appended prior to calculating + * and appending the FCS field. + * + * Detecting these padded frames requires to verify and parse + * IP headers, so we simply force all those small frames to be + * CHECKSUM_UNNECESSARY even if they are not padded. + */ + if (short_frame(skb->len)) + goto csum_unnecessary; + + if (likely(is_last_ethertype_ip(skb, &network_depth, &proto))) { + if (unlikely(get_ip_proto(skb, network_depth, proto) == IPPROTO_SCTP)) + goto csum_unnecessary; + + stats->csum_complete++; + skb->ip_summed = CHECKSUM_COMPLETE; + skb->csum = csum_unfold((__force __sum16)cqe->check_sum); + + if (test_bit(MLX5E_RQ_STATE_CSUM_FULL, &rq->state)) + return; /* CQE csum covers all received bytes */ + + /* csum might need some fixups ...*/ + mlx5e_skb_csum_fixup(skb, network_depth, proto, stats); + return; + } + +csum_unnecessary: + if (likely((cqe->hds_ip_ext & CQE_L3_OK) && + (cqe->hds_ip_ext & CQE_L4_OK))) { + skb->ip_summed = CHECKSUM_UNNECESSARY; + if (cqe_is_tunneled(cqe)) { + skb->csum_level = 1; + skb->encapsulation = 1; + stats->csum_unnecessary_inner++; + return; + } + stats->csum_unnecessary++; + return; + } +csum_none: + skb->ip_summed = CHECKSUM_NONE; + stats->csum_none++; +} + +#define MLX5E_CE_BIT_MASK 0x80 + +static inline void mlx5e_build_rx_skb(struct mlx5_cqe64 *cqe, + u32 cqe_bcnt, + struct mlx5e_rq *rq, + struct sk_buff *skb) +{ + u8 lro_num_seg = be32_to_cpu(cqe->srqn) >> 24; + struct mlx5e_rq_stats *stats = rq->stats; + struct net_device *netdev = rq->netdev; + + skb->mac_len = ETH_HLEN; + + if (unlikely(get_cqe_tls_offload(cqe))) + mlx5e_ktls_handle_rx_skb(rq, skb, cqe, &cqe_bcnt); + + if (unlikely(mlx5_ipsec_is_rx_flow(cqe))) + mlx5e_ipsec_offload_handle_rx_skb(netdev, skb, cqe); + + if (unlikely(mlx5e_macsec_is_rx_flow(cqe))) + mlx5e_macsec_offload_handle_rx_skb(netdev, skb, cqe); + + if (lro_num_seg > 1) { + mlx5e_lro_update_hdr(skb, cqe, cqe_bcnt); + skb_shinfo(skb)->gso_size = DIV_ROUND_UP(cqe_bcnt, lro_num_seg); + /* Subtract one since we already counted this as one + * "regular" packet in mlx5e_complete_rx_cqe() + */ + stats->packets += lro_num_seg - 1; + stats->lro_packets++; + stats->lro_bytes += cqe_bcnt; + } + + if (unlikely(mlx5e_rx_hw_stamp(rq->tstamp))) + skb_hwtstamps(skb)->hwtstamp = mlx5e_cqe_ts_to_ns(rq->ptp_cyc2time, + rq->clock, get_cqe_ts(cqe)); + skb_record_rx_queue(skb, rq->ix); + + if (likely(netdev->features & NETIF_F_RXHASH)) + mlx5e_skb_set_hash(cqe, skb); + + if (cqe_has_vlan(cqe)) { + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), + be16_to_cpu(cqe->vlan_info)); + stats->removed_vlan_packets++; + } + + skb->mark = be32_to_cpu(cqe->sop_drop_qpn) & MLX5E_TC_FLOW_ID_MASK; + + mlx5e_handle_csum(netdev, cqe, rq, skb, !!lro_num_seg); + /* checking CE bit in cqe - MSB in ml_path field */ + if (unlikely(cqe->ml_path & MLX5E_CE_BIT_MASK)) + mlx5e_enable_ecn(rq, skb); + + skb->protocol = eth_type_trans(skb, netdev); + + if (unlikely(mlx5e_skb_is_multicast(skb))) + stats->mcast_packets++; +} + +static void mlx5e_shampo_complete_rx_cqe(struct mlx5e_rq *rq, + struct mlx5_cqe64 *cqe, + u32 cqe_bcnt, + struct sk_buff *skb) +{ + struct mlx5e_rq_stats *stats = rq->stats; + + stats->packets++; + stats->gro_packets++; + stats->bytes += cqe_bcnt; + stats->gro_bytes += cqe_bcnt; + if (NAPI_GRO_CB(skb)->count != 1) + return; + mlx5e_build_rx_skb(cqe, cqe_bcnt, rq, skb); + skb_reset_network_header(skb); + if (!skb_flow_dissect_flow_keys(skb, &rq->hw_gro_data->fk, 0)) { + napi_gro_receive(rq->cq.napi, skb); + rq->hw_gro_data->skb = NULL; + } +} + +static inline void mlx5e_complete_rx_cqe(struct mlx5e_rq *rq, + struct mlx5_cqe64 *cqe, + u32 cqe_bcnt, + struct sk_buff *skb) +{ + struct mlx5e_rq_stats *stats = rq->stats; + + stats->packets++; + stats->bytes += cqe_bcnt; + mlx5e_build_rx_skb(cqe, cqe_bcnt, rq, skb); +} + +static inline +struct sk_buff *mlx5e_build_linear_skb(struct mlx5e_rq *rq, void *va, + u32 frag_size, u16 headroom, + u32 cqe_bcnt, u32 metasize) +{ + struct sk_buff *skb = build_skb(va, frag_size); + + if (unlikely(!skb)) { + rq->stats->buff_alloc_err++; + return NULL; + } + + skb_reserve(skb, headroom); + skb_put(skb, cqe_bcnt); + + if (metasize) + skb_metadata_set(skb, metasize); + + return skb; +} + +static void mlx5e_fill_xdp_buff(struct mlx5e_rq *rq, void *va, u16 headroom, + u32 len, struct xdp_buff *xdp) +{ + xdp_init_buff(xdp, rq->buff.frame0_sz, &rq->xdp_rxq); + xdp_prepare_buff(xdp, va, headroom, len, true); +} + +static struct sk_buff * +mlx5e_skb_from_cqe_linear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi, + u32 cqe_bcnt) +{ + union mlx5e_alloc_unit *au = wi->au; + u16 rx_headroom = rq->buff.headroom; + struct bpf_prog *prog; + struct sk_buff *skb; + u32 metasize = 0; + void *va, *data; + dma_addr_t addr; + u32 frag_size; + + va = page_address(au->page) + wi->offset; + data = va + rx_headroom; + frag_size = MLX5_SKB_FRAG_SZ(rx_headroom + cqe_bcnt); + + addr = page_pool_get_dma_addr(au->page); + dma_sync_single_range_for_cpu(rq->pdev, addr, wi->offset, + frag_size, rq->buff.map_dir); + net_prefetch(data); + + prog = rcu_dereference(rq->xdp_prog); + if (prog) { + struct xdp_buff xdp; + + net_prefetchw(va); /* xdp_frame data area */ + mlx5e_fill_xdp_buff(rq, va, rx_headroom, cqe_bcnt, &xdp); + if (mlx5e_xdp_handle(rq, au->page, prog, &xdp)) + return NULL; /* page/packet was consumed by XDP */ + + rx_headroom = xdp.data - xdp.data_hard_start; + metasize = xdp.data - xdp.data_meta; + cqe_bcnt = xdp.data_end - xdp.data; + } + frag_size = MLX5_SKB_FRAG_SZ(rx_headroom + cqe_bcnt); + skb = mlx5e_build_linear_skb(rq, va, frag_size, rx_headroom, cqe_bcnt, metasize); + if (unlikely(!skb)) + return NULL; + + /* queue up for recycling/reuse */ + page_ref_inc(au->page); + + return skb; +} + +static struct sk_buff * +mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi, + u32 cqe_bcnt) +{ + struct mlx5e_rq_frag_info *frag_info = &rq->wqe.info.arr[0]; + struct mlx5e_wqe_frag_info *head_wi = wi; + union mlx5e_alloc_unit *au = wi->au; + u16 rx_headroom = rq->buff.headroom; + struct skb_shared_info *sinfo; + u32 frag_consumed_bytes; + struct bpf_prog *prog; + struct xdp_buff xdp; + struct sk_buff *skb; + dma_addr_t addr; + u32 truesize; + void *va; + + va = page_address(au->page) + wi->offset; + frag_consumed_bytes = min_t(u32, frag_info->frag_size, cqe_bcnt); + + addr = page_pool_get_dma_addr(au->page); + dma_sync_single_range_for_cpu(rq->pdev, addr, wi->offset, + rq->buff.frame0_sz, rq->buff.map_dir); + net_prefetchw(va); /* xdp_frame data area */ + net_prefetch(va + rx_headroom); + + mlx5e_fill_xdp_buff(rq, va, rx_headroom, frag_consumed_bytes, &xdp); + sinfo = xdp_get_shared_info_from_buff(&xdp); + truesize = 0; + + cqe_bcnt -= frag_consumed_bytes; + frag_info++; + wi++; + + while (cqe_bcnt) { + skb_frag_t *frag; + + au = wi->au; + + frag_consumed_bytes = min_t(u32, frag_info->frag_size, cqe_bcnt); + + addr = page_pool_get_dma_addr(au->page); + dma_sync_single_for_cpu(rq->pdev, addr + wi->offset, + frag_consumed_bytes, rq->buff.map_dir); + + if (!xdp_buff_has_frags(&xdp)) { + /* Init on the first fragment to avoid cold cache access + * when possible. + */ + sinfo->nr_frags = 0; + sinfo->xdp_frags_size = 0; + xdp_buff_set_frags_flag(&xdp); + } + + frag = &sinfo->frags[sinfo->nr_frags++]; + __skb_frag_set_page(frag, au->page); + skb_frag_off_set(frag, wi->offset); + skb_frag_size_set(frag, frag_consumed_bytes); + + if (page_is_pfmemalloc(au->page)) + xdp_buff_set_frag_pfmemalloc(&xdp); + + sinfo->xdp_frags_size += frag_consumed_bytes; + truesize += frag_info->frag_stride; + + cqe_bcnt -= frag_consumed_bytes; + frag_info++; + wi++; + } + + au = head_wi->au; + + prog = rcu_dereference(rq->xdp_prog); + if (prog && mlx5e_xdp_handle(rq, au->page, prog, &xdp)) { + if (test_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)) { + int i; + + for (i = wi - head_wi; i < rq->wqe.info.num_frags; i++) + mlx5e_put_rx_frag(rq, &head_wi[i], true); + } + return NULL; /* page/packet was consumed by XDP */ + } + + skb = mlx5e_build_linear_skb(rq, xdp.data_hard_start, rq->buff.frame0_sz, + xdp.data - xdp.data_hard_start, + xdp.data_end - xdp.data, + xdp.data - xdp.data_meta); + if (unlikely(!skb)) + return NULL; + + page_ref_inc(au->page); + + if (unlikely(xdp_buff_has_frags(&xdp))) { + int i; + + /* sinfo->nr_frags is reset by build_skb, calculate again. */ + xdp_update_skb_shared_info(skb, wi - head_wi - 1, + sinfo->xdp_frags_size, truesize, + xdp_buff_is_frag_pfmemalloc(&xdp)); + + for (i = 0; i < sinfo->nr_frags; i++) { + skb_frag_t *frag = &sinfo->frags[i]; + + page_ref_inc(skb_frag_page(frag)); + } + } + + return skb; +} + +static void trigger_report(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) +{ + struct mlx5_err_cqe *err_cqe = (struct mlx5_err_cqe *)cqe; + struct mlx5e_priv *priv = rq->priv; + + if (cqe_syndrome_needs_recover(err_cqe->syndrome) && + !test_and_set_bit(MLX5E_RQ_STATE_RECOVERING, &rq->state)) { + mlx5e_dump_error_cqe(&rq->cq, rq->rqn, err_cqe); + queue_work(priv->wq, &rq->recover_work); + } +} + +static void mlx5e_handle_rx_err_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) +{ + trigger_report(rq, cqe); + rq->stats->wqe_err++; +} + +static void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) +{ + struct mlx5_wq_cyc *wq = &rq->wqe.wq; + struct mlx5e_wqe_frag_info *wi; + struct sk_buff *skb; + u32 cqe_bcnt; + u16 ci; + + ci = mlx5_wq_cyc_ctr2ix(wq, be16_to_cpu(cqe->wqe_counter)); + wi = get_frag(rq, ci); + cqe_bcnt = be32_to_cpu(cqe->byte_cnt); + + if (unlikely(MLX5E_RX_ERR_CQE(cqe))) { + mlx5e_handle_rx_err_cqe(rq, cqe); + goto free_wqe; + } + + skb = INDIRECT_CALL_3(rq->wqe.skb_from_cqe, + mlx5e_skb_from_cqe_linear, + mlx5e_skb_from_cqe_nonlinear, + mlx5e_xsk_skb_from_cqe_linear, + rq, wi, cqe_bcnt); + if (!skb) { + /* probably for XDP */ + if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)) { + /* do not return page to cache, + * it will be returned on XDP_TX completion. + */ + goto wq_cyc_pop; + } + goto free_wqe; + } + + mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb); + + if (mlx5e_cqe_regb_chain(cqe)) + if (!mlx5e_tc_update_skb(cqe, skb)) { + dev_kfree_skb_any(skb); + goto free_wqe; + } + + napi_gro_receive(rq->cq.napi, skb); + +free_wqe: + mlx5e_free_rx_wqe(rq, wi, true); +wq_cyc_pop: + mlx5_wq_cyc_pop(wq); +} + +#ifdef CONFIG_MLX5_ESWITCH +static void mlx5e_handle_rx_cqe_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) +{ + struct net_device *netdev = rq->netdev; + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5_eswitch_rep *rep = rpriv->rep; + struct mlx5_wq_cyc *wq = &rq->wqe.wq; + struct mlx5e_wqe_frag_info *wi; + struct sk_buff *skb; + u32 cqe_bcnt; + u16 ci; + + ci = mlx5_wq_cyc_ctr2ix(wq, be16_to_cpu(cqe->wqe_counter)); + wi = get_frag(rq, ci); + cqe_bcnt = be32_to_cpu(cqe->byte_cnt); + + if (unlikely(MLX5E_RX_ERR_CQE(cqe))) { + mlx5e_handle_rx_err_cqe(rq, cqe); + goto free_wqe; + } + + skb = INDIRECT_CALL_2(rq->wqe.skb_from_cqe, + mlx5e_skb_from_cqe_linear, + mlx5e_skb_from_cqe_nonlinear, + rq, wi, cqe_bcnt); + if (!skb) { + /* probably for XDP */ + if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)) { + /* do not return page to cache, + * it will be returned on XDP_TX completion. + */ + goto wq_cyc_pop; + } + goto free_wqe; + } + + mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb); + + if (rep->vlan && skb_vlan_tag_present(skb)) + skb_vlan_pop(skb); + + mlx5e_rep_tc_receive(cqe, rq, skb); + +free_wqe: + mlx5e_free_rx_wqe(rq, wi, true); +wq_cyc_pop: + mlx5_wq_cyc_pop(wq); +} + +static void mlx5e_handle_rx_cqe_mpwrq_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) +{ + u16 cstrides = mpwrq_get_cqe_consumed_strides(cqe); + u16 wqe_id = be16_to_cpu(cqe->wqe_id); + struct mlx5e_mpw_info *wi = mlx5e_get_mpw_info(rq, wqe_id); + u16 stride_ix = mpwrq_get_cqe_stride_index(cqe); + u32 wqe_offset = stride_ix << rq->mpwqe.log_stride_sz; + u32 head_offset = wqe_offset & ((1 << rq->mpwqe.page_shift) - 1); + u32 page_idx = wqe_offset >> rq->mpwqe.page_shift; + struct mlx5e_rx_wqe_ll *wqe; + struct mlx5_wq_ll *wq; + struct sk_buff *skb; + u16 cqe_bcnt; + + wi->consumed_strides += cstrides; + + if (unlikely(MLX5E_RX_ERR_CQE(cqe))) { + mlx5e_handle_rx_err_cqe(rq, cqe); + goto mpwrq_cqe_out; + } + + if (unlikely(mpwrq_is_filler_cqe(cqe))) { + struct mlx5e_rq_stats *stats = rq->stats; + + stats->mpwqe_filler_cqes++; + stats->mpwqe_filler_strides += cstrides; + goto mpwrq_cqe_out; + } + + cqe_bcnt = mpwrq_get_cqe_byte_cnt(cqe); + + skb = INDIRECT_CALL_2(rq->mpwqe.skb_from_cqe_mpwrq, + mlx5e_skb_from_cqe_mpwrq_linear, + mlx5e_skb_from_cqe_mpwrq_nonlinear, + rq, wi, cqe_bcnt, head_offset, page_idx); + if (!skb) + goto mpwrq_cqe_out; + + mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb); + + mlx5e_rep_tc_receive(cqe, rq, skb); + +mpwrq_cqe_out: + if (likely(wi->consumed_strides < rq->mpwqe.num_strides)) + return; + + wq = &rq->mpwqe.wq; + wqe = mlx5_wq_ll_get_wqe(wq, wqe_id); + mlx5e_free_rx_mpwqe(rq, wi, true); + mlx5_wq_ll_pop(wq, cqe->wqe_id, &wqe->next.next_wqe_index); +} + +const struct mlx5e_rx_handlers mlx5e_rx_handlers_rep = { + .handle_rx_cqe = mlx5e_handle_rx_cqe_rep, + .handle_rx_cqe_mpwqe = mlx5e_handle_rx_cqe_mpwrq_rep, +}; +#endif + +static void +mlx5e_fill_skb_data(struct sk_buff *skb, struct mlx5e_rq *rq, + union mlx5e_alloc_unit *au, u32 data_bcnt, u32 data_offset) +{ + net_prefetchw(skb->data); + + while (data_bcnt) { + /* Non-linear mode, hence non-XSK, which always uses PAGE_SIZE. */ + u32 pg_consumed_bytes = min_t(u32, PAGE_SIZE - data_offset, data_bcnt); + unsigned int truesize; + + if (test_bit(MLX5E_RQ_STATE_SHAMPO, &rq->state)) + truesize = pg_consumed_bytes; + else + truesize = ALIGN(pg_consumed_bytes, BIT(rq->mpwqe.log_stride_sz)); + + mlx5e_add_skb_frag(rq, skb, au, data_offset, + pg_consumed_bytes, truesize); + + data_bcnt -= pg_consumed_bytes; + data_offset = 0; + au++; + } +} + +static struct sk_buff * +mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, + u16 cqe_bcnt, u32 head_offset, u32 page_idx) +{ + union mlx5e_alloc_unit *au = &wi->alloc_units[page_idx]; + u16 headlen = min_t(u16, MLX5E_RX_MAX_HEAD, cqe_bcnt); + u32 frag_offset = head_offset + headlen; + u32 byte_cnt = cqe_bcnt - headlen; + union mlx5e_alloc_unit *head_au = au; + struct sk_buff *skb; + dma_addr_t addr; + + skb = napi_alloc_skb(rq->cq.napi, + ALIGN(MLX5E_RX_MAX_HEAD, sizeof(long))); + if (unlikely(!skb)) { + rq->stats->buff_alloc_err++; + return NULL; + } + + net_prefetchw(skb->data); + + /* Non-linear mode, hence non-XSK, which always uses PAGE_SIZE. */ + if (unlikely(frag_offset >= PAGE_SIZE)) { + au++; + frag_offset -= PAGE_SIZE; + } + + mlx5e_fill_skb_data(skb, rq, au, byte_cnt, frag_offset); + /* copy header */ + addr = page_pool_get_dma_addr(head_au->page); + mlx5e_copy_skb_header(rq, skb, head_au->page, addr, + head_offset, head_offset, headlen); + /* skb linear part was allocated with headlen and aligned to long */ + skb->tail += headlen; + skb->len += headlen; + + return skb; +} + +static struct sk_buff * +mlx5e_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, + u16 cqe_bcnt, u32 head_offset, u32 page_idx) +{ + union mlx5e_alloc_unit *au = &wi->alloc_units[page_idx]; + u16 rx_headroom = rq->buff.headroom; + struct bpf_prog *prog; + struct sk_buff *skb; + u32 metasize = 0; + void *va, *data; + dma_addr_t addr; + u32 frag_size; + + /* Check packet size. Note LRO doesn't use linear SKB */ + if (unlikely(cqe_bcnt > rq->hw_mtu)) { + rq->stats->oversize_pkts_sw_drop++; + return NULL; + } + + va = page_address(au->page) + head_offset; + data = va + rx_headroom; + frag_size = MLX5_SKB_FRAG_SZ(rx_headroom + cqe_bcnt); + + addr = page_pool_get_dma_addr(au->page); + dma_sync_single_range_for_cpu(rq->pdev, addr, head_offset, + frag_size, rq->buff.map_dir); + net_prefetch(data); + + prog = rcu_dereference(rq->xdp_prog); + if (prog) { + struct xdp_buff xdp; + + net_prefetchw(va); /* xdp_frame data area */ + mlx5e_fill_xdp_buff(rq, va, rx_headroom, cqe_bcnt, &xdp); + if (mlx5e_xdp_handle(rq, au->page, prog, &xdp)) { + if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)) + __set_bit(page_idx, wi->xdp_xmit_bitmap); /* non-atomic */ + return NULL; /* page/packet was consumed by XDP */ + } + + rx_headroom = xdp.data - xdp.data_hard_start; + metasize = xdp.data - xdp.data_meta; + cqe_bcnt = xdp.data_end - xdp.data; + } + frag_size = MLX5_SKB_FRAG_SZ(rx_headroom + cqe_bcnt); + skb = mlx5e_build_linear_skb(rq, va, frag_size, rx_headroom, cqe_bcnt, metasize); + if (unlikely(!skb)) + return NULL; + + /* queue up for recycling/reuse */ + page_ref_inc(au->page); + + return skb; +} + +static struct sk_buff * +mlx5e_skb_from_cqe_shampo(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, + struct mlx5_cqe64 *cqe, u16 header_index) +{ + struct mlx5e_dma_info *head = &rq->mpwqe.shampo->info[header_index]; + u16 head_offset = head->addr & (PAGE_SIZE - 1); + u16 head_size = cqe->shampo.header_size; + u16 rx_headroom = rq->buff.headroom; + struct sk_buff *skb = NULL; + void *hdr, *data; + u32 frag_size; + + hdr = page_address(head->page) + head_offset; + data = hdr + rx_headroom; + frag_size = MLX5_SKB_FRAG_SZ(rx_headroom + head_size); + + if (likely(frag_size <= BIT(MLX5E_SHAMPO_LOG_MAX_HEADER_ENTRY_SIZE))) { + /* build SKB around header */ + dma_sync_single_range_for_cpu(rq->pdev, head->addr, 0, frag_size, rq->buff.map_dir); + prefetchw(hdr); + prefetch(data); + skb = mlx5e_build_linear_skb(rq, hdr, frag_size, rx_headroom, head_size, 0); + + if (unlikely(!skb)) + return NULL; + + /* queue up for recycling/reuse */ + page_ref_inc(head->page); + + } else { + /* allocate SKB and copy header for large header */ + rq->stats->gro_large_hds++; + skb = napi_alloc_skb(rq->cq.napi, + ALIGN(head_size, sizeof(long))); + if (unlikely(!skb)) { + rq->stats->buff_alloc_err++; + return NULL; + } + + prefetchw(skb->data); + mlx5e_copy_skb_header(rq, skb, head->page, head->addr, + head_offset + rx_headroom, + rx_headroom, head_size); + /* skb linear part was allocated with headlen and aligned to long */ + skb->tail += head_size; + skb->len += head_size; + } + return skb; +} + +static void +mlx5e_shampo_align_fragment(struct sk_buff *skb, u8 log_stride_sz) +{ + skb_frag_t *last_frag = &skb_shinfo(skb)->frags[skb_shinfo(skb)->nr_frags - 1]; + unsigned int frag_size = skb_frag_size(last_frag); + unsigned int frag_truesize; + + frag_truesize = ALIGN(frag_size, BIT(log_stride_sz)); + skb->truesize += frag_truesize - frag_size; +} + +static void +mlx5e_shampo_flush_skb(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe, bool match) +{ + struct sk_buff *skb = rq->hw_gro_data->skb; + struct mlx5e_rq_stats *stats = rq->stats; + + stats->gro_skbs++; + if (likely(skb_shinfo(skb)->nr_frags)) + mlx5e_shampo_align_fragment(skb, rq->mpwqe.log_stride_sz); + if (NAPI_GRO_CB(skb)->count > 1) + mlx5e_shampo_update_hdr(rq, cqe, match); + napi_gro_receive(rq->cq.napi, skb); + rq->hw_gro_data->skb = NULL; +} + +static bool +mlx5e_hw_gro_skb_has_enough_space(struct sk_buff *skb, u16 data_bcnt) +{ + int nr_frags = skb_shinfo(skb)->nr_frags; + + return PAGE_SIZE * nr_frags + data_bcnt <= GRO_LEGACY_MAX_SIZE; +} + +static void +mlx5e_free_rx_shampo_hd_entry(struct mlx5e_rq *rq, u16 header_index) +{ + struct mlx5e_shampo_hd *shampo = rq->mpwqe.shampo; + u64 addr = shampo->info[header_index].addr; + + if (((header_index + 1) & (MLX5E_SHAMPO_WQ_HEADER_PER_PAGE - 1)) == 0) { + shampo->info[header_index].addr = ALIGN_DOWN(addr, PAGE_SIZE); + mlx5e_page_release_dynamic(rq, shampo->info[header_index].page, true); + } + bitmap_clear(shampo->bitmap, header_index, 1); +} + +static void mlx5e_handle_rx_cqe_mpwrq_shampo(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) +{ + u16 data_bcnt = mpwrq_get_cqe_byte_cnt(cqe) - cqe->shampo.header_size; + u16 header_index = mlx5e_shampo_get_cqe_header_index(rq, cqe); + u32 wqe_offset = be32_to_cpu(cqe->shampo.data_offset); + u16 cstrides = mpwrq_get_cqe_consumed_strides(cqe); + u32 data_offset = wqe_offset & (PAGE_SIZE - 1); + u32 cqe_bcnt = mpwrq_get_cqe_byte_cnt(cqe); + u16 wqe_id = be16_to_cpu(cqe->wqe_id); + u32 page_idx = wqe_offset >> PAGE_SHIFT; + u16 head_size = cqe->shampo.header_size; + struct sk_buff **skb = &rq->hw_gro_data->skb; + bool flush = cqe->shampo.flush; + bool match = cqe->shampo.match; + struct mlx5e_rq_stats *stats = rq->stats; + struct mlx5e_rx_wqe_ll *wqe; + union mlx5e_alloc_unit *au; + struct mlx5e_mpw_info *wi; + struct mlx5_wq_ll *wq; + + wi = mlx5e_get_mpw_info(rq, wqe_id); + wi->consumed_strides += cstrides; + + if (unlikely(MLX5E_RX_ERR_CQE(cqe))) { + mlx5e_handle_rx_err_cqe(rq, cqe); + goto mpwrq_cqe_out; + } + + if (unlikely(mpwrq_is_filler_cqe(cqe))) { + stats->mpwqe_filler_cqes++; + stats->mpwqe_filler_strides += cstrides; + goto mpwrq_cqe_out; + } + + stats->gro_match_packets += match; + + if (*skb && (!match || !(mlx5e_hw_gro_skb_has_enough_space(*skb, data_bcnt)))) { + match = false; + mlx5e_shampo_flush_skb(rq, cqe, match); + } + + if (!*skb) { + if (likely(head_size)) + *skb = mlx5e_skb_from_cqe_shampo(rq, wi, cqe, header_index); + else + *skb = mlx5e_skb_from_cqe_mpwrq_nonlinear(rq, wi, cqe_bcnt, data_offset, + page_idx); + if (unlikely(!*skb)) + goto free_hd_entry; + + NAPI_GRO_CB(*skb)->count = 1; + skb_shinfo(*skb)->gso_size = cqe_bcnt - head_size; + } else { + NAPI_GRO_CB(*skb)->count++; + if (NAPI_GRO_CB(*skb)->count == 2 && + rq->hw_gro_data->fk.basic.n_proto == htons(ETH_P_IP)) { + void *hd_addr = mlx5e_shampo_get_packet_hd(rq, header_index); + int nhoff = ETH_HLEN + rq->hw_gro_data->fk.control.thoff - + sizeof(struct iphdr); + struct iphdr *iph = (struct iphdr *)(hd_addr + nhoff); + + rq->hw_gro_data->second_ip_id = ntohs(iph->id); + } + } + + if (likely(head_size)) { + au = &wi->alloc_units[page_idx]; + mlx5e_fill_skb_data(*skb, rq, au, data_bcnt, data_offset); + } + + mlx5e_shampo_complete_rx_cqe(rq, cqe, cqe_bcnt, *skb); + if (flush) + mlx5e_shampo_flush_skb(rq, cqe, match); +free_hd_entry: + mlx5e_free_rx_shampo_hd_entry(rq, header_index); +mpwrq_cqe_out: + if (likely(wi->consumed_strides < rq->mpwqe.num_strides)) + return; + + wq = &rq->mpwqe.wq; + wqe = mlx5_wq_ll_get_wqe(wq, wqe_id); + mlx5e_free_rx_mpwqe(rq, wi, true); + mlx5_wq_ll_pop(wq, cqe->wqe_id, &wqe->next.next_wqe_index); +} + +static void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) +{ + u16 cstrides = mpwrq_get_cqe_consumed_strides(cqe); + u16 wqe_id = be16_to_cpu(cqe->wqe_id); + struct mlx5e_mpw_info *wi = mlx5e_get_mpw_info(rq, wqe_id); + u16 stride_ix = mpwrq_get_cqe_stride_index(cqe); + u32 wqe_offset = stride_ix << rq->mpwqe.log_stride_sz; + u32 head_offset = wqe_offset & ((1 << rq->mpwqe.page_shift) - 1); + u32 page_idx = wqe_offset >> rq->mpwqe.page_shift; + struct mlx5e_rx_wqe_ll *wqe; + struct mlx5_wq_ll *wq; + struct sk_buff *skb; + u16 cqe_bcnt; + + wi->consumed_strides += cstrides; + + if (unlikely(MLX5E_RX_ERR_CQE(cqe))) { + mlx5e_handle_rx_err_cqe(rq, cqe); + goto mpwrq_cqe_out; + } + + if (unlikely(mpwrq_is_filler_cqe(cqe))) { + struct mlx5e_rq_stats *stats = rq->stats; + + stats->mpwqe_filler_cqes++; + stats->mpwqe_filler_strides += cstrides; + goto mpwrq_cqe_out; + } + + cqe_bcnt = mpwrq_get_cqe_byte_cnt(cqe); + + skb = INDIRECT_CALL_3(rq->mpwqe.skb_from_cqe_mpwrq, + mlx5e_skb_from_cqe_mpwrq_linear, + mlx5e_skb_from_cqe_mpwrq_nonlinear, + mlx5e_xsk_skb_from_cqe_mpwrq_linear, + rq, wi, cqe_bcnt, head_offset, page_idx); + if (!skb) + goto mpwrq_cqe_out; + + mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb); + + if (mlx5e_cqe_regb_chain(cqe)) + if (!mlx5e_tc_update_skb(cqe, skb)) { + dev_kfree_skb_any(skb); + goto mpwrq_cqe_out; + } + + napi_gro_receive(rq->cq.napi, skb); + +mpwrq_cqe_out: + if (likely(wi->consumed_strides < rq->mpwqe.num_strides)) + return; + + wq = &rq->mpwqe.wq; + wqe = mlx5_wq_ll_get_wqe(wq, wqe_id); + mlx5e_free_rx_mpwqe(rq, wi, true); + mlx5_wq_ll_pop(wq, cqe->wqe_id, &wqe->next.next_wqe_index); +} + +int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget) +{ + struct mlx5e_rq *rq = container_of(cq, struct mlx5e_rq, cq); + struct mlx5_cqwq *cqwq = &cq->wq; + struct mlx5_cqe64 *cqe; + int work_done = 0; + + if (unlikely(!test_bit(MLX5E_RQ_STATE_ENABLED, &rq->state))) + return 0; + + if (rq->cqd.left) { + work_done += mlx5e_decompress_cqes_cont(rq, cqwq, 0, budget); + if (work_done >= budget) + goto out; + } + + cqe = mlx5_cqwq_get_cqe(cqwq); + if (!cqe) { + if (unlikely(work_done)) + goto out; + return 0; + } + + do { + if (mlx5_get_cqe_format(cqe) == MLX5_COMPRESSED) { + work_done += + mlx5e_decompress_cqes_start(rq, cqwq, + budget - work_done); + continue; + } + + mlx5_cqwq_pop(cqwq); + + INDIRECT_CALL_3(rq->handle_rx_cqe, mlx5e_handle_rx_cqe_mpwrq, + mlx5e_handle_rx_cqe, mlx5e_handle_rx_cqe_mpwrq_shampo, + rq, cqe); + } while ((++work_done < budget) && (cqe = mlx5_cqwq_get_cqe(cqwq))); + +out: + if (test_bit(MLX5E_RQ_STATE_SHAMPO, &rq->state) && rq->hw_gro_data->skb) + mlx5e_shampo_flush_skb(rq, NULL, false); + + if (rcu_access_pointer(rq->xdp_prog)) + mlx5e_xdp_rx_poll_complete(rq); + + mlx5_cqwq_update_db_record(cqwq); + + /* ensure cq space is freed before enabling more cqes */ + wmb(); + + return work_done; +} + +#ifdef CONFIG_MLX5_CORE_IPOIB + +#define MLX5_IB_GRH_SGID_OFFSET 8 +#define MLX5_IB_GRH_DGID_OFFSET 24 +#define MLX5_GID_SIZE 16 + +static inline void mlx5i_complete_rx_cqe(struct mlx5e_rq *rq, + struct mlx5_cqe64 *cqe, + u32 cqe_bcnt, + struct sk_buff *skb) +{ + struct hwtstamp_config *tstamp; + struct mlx5e_rq_stats *stats; + struct net_device *netdev; + struct mlx5e_priv *priv; + char *pseudo_header; + u32 flags_rqpn; + u32 qpn; + u8 *dgid; + u8 g; + + qpn = be32_to_cpu(cqe->sop_drop_qpn) & 0xffffff; + netdev = mlx5i_pkey_get_netdev(rq->netdev, qpn); + + /* No mapping present, cannot process SKB. This might happen if a child + * interface is going down while having unprocessed CQEs on parent RQ + */ + if (unlikely(!netdev)) { + /* TODO: add drop counters support */ + skb->dev = NULL; + pr_warn_once("Unable to map QPN %u to dev - dropping skb\n", qpn); + return; + } + + priv = mlx5i_epriv(netdev); + tstamp = &priv->tstamp; + stats = &priv->channel_stats[rq->ix]->rq; + + flags_rqpn = be32_to_cpu(cqe->flags_rqpn); + g = (flags_rqpn >> 28) & 3; + dgid = skb->data + MLX5_IB_GRH_DGID_OFFSET; + if ((!g) || dgid[0] != 0xff) + skb->pkt_type = PACKET_HOST; + else if (memcmp(dgid, netdev->broadcast + 4, MLX5_GID_SIZE) == 0) + skb->pkt_type = PACKET_BROADCAST; + else + skb->pkt_type = PACKET_MULTICAST; + + /* Drop packets that this interface sent, ie multicast packets + * that the HCA has replicated. + */ + if (g && (qpn == (flags_rqpn & 0xffffff)) && + (memcmp(netdev->dev_addr + 4, skb->data + MLX5_IB_GRH_SGID_OFFSET, + MLX5_GID_SIZE) == 0)) { + skb->dev = NULL; + return; + } + + skb_pull(skb, MLX5_IB_GRH_BYTES); + + skb->protocol = *((__be16 *)(skb->data)); + + if (netdev->features & NETIF_F_RXCSUM) { + skb->ip_summed = CHECKSUM_COMPLETE; + skb->csum = csum_unfold((__force __sum16)cqe->check_sum); + stats->csum_complete++; + } else { + skb->ip_summed = CHECKSUM_NONE; + stats->csum_none++; + } + + if (unlikely(mlx5e_rx_hw_stamp(tstamp))) + skb_hwtstamps(skb)->hwtstamp = mlx5e_cqe_ts_to_ns(rq->ptp_cyc2time, + rq->clock, get_cqe_ts(cqe)); + skb_record_rx_queue(skb, rq->ix); + + if (likely(netdev->features & NETIF_F_RXHASH)) + mlx5e_skb_set_hash(cqe, skb); + + /* 20 bytes of ipoib header and 4 for encap existing */ + pseudo_header = skb_push(skb, MLX5_IPOIB_PSEUDO_LEN); + memset(pseudo_header, 0, MLX5_IPOIB_PSEUDO_LEN); + skb_reset_mac_header(skb); + skb_pull(skb, MLX5_IPOIB_HARD_LEN); + + skb->dev = netdev; + + stats->packets++; + stats->bytes += cqe_bcnt; +} + +static void mlx5i_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) +{ + struct mlx5_wq_cyc *wq = &rq->wqe.wq; + struct mlx5e_wqe_frag_info *wi; + struct sk_buff *skb; + u32 cqe_bcnt; + u16 ci; + + ci = mlx5_wq_cyc_ctr2ix(wq, be16_to_cpu(cqe->wqe_counter)); + wi = get_frag(rq, ci); + cqe_bcnt = be32_to_cpu(cqe->byte_cnt); + + if (unlikely(MLX5E_RX_ERR_CQE(cqe))) { + rq->stats->wqe_err++; + goto wq_free_wqe; + } + + skb = INDIRECT_CALL_2(rq->wqe.skb_from_cqe, + mlx5e_skb_from_cqe_linear, + mlx5e_skb_from_cqe_nonlinear, + rq, wi, cqe_bcnt); + if (!skb) + goto wq_free_wqe; + + mlx5i_complete_rx_cqe(rq, cqe, cqe_bcnt, skb); + if (unlikely(!skb->dev)) { + dev_kfree_skb_any(skb); + goto wq_free_wqe; + } + napi_gro_receive(rq->cq.napi, skb); + +wq_free_wqe: + mlx5e_free_rx_wqe(rq, wi, true); + mlx5_wq_cyc_pop(wq); +} + +const struct mlx5e_rx_handlers mlx5i_rx_handlers = { + .handle_rx_cqe = mlx5i_handle_rx_cqe, + .handle_rx_cqe_mpwqe = NULL, /* Not supported */ +}; +#endif /* CONFIG_MLX5_CORE_IPOIB */ + +int mlx5e_rq_set_handlers(struct mlx5e_rq *rq, struct mlx5e_params *params, bool xsk) +{ + struct net_device *netdev = rq->netdev; + struct mlx5_core_dev *mdev = rq->mdev; + struct mlx5e_priv *priv = rq->priv; + + switch (rq->wq_type) { + case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: + rq->mpwqe.skb_from_cqe_mpwrq = xsk ? + mlx5e_xsk_skb_from_cqe_mpwrq_linear : + mlx5e_rx_mpwqe_is_linear_skb(mdev, params, NULL) ? + mlx5e_skb_from_cqe_mpwrq_linear : + mlx5e_skb_from_cqe_mpwrq_nonlinear; + rq->post_wqes = mlx5e_post_rx_mpwqes; + rq->dealloc_wqe = mlx5e_dealloc_rx_mpwqe; + + if (params->packet_merge.type == MLX5E_PACKET_MERGE_SHAMPO) { + rq->handle_rx_cqe = priv->profile->rx_handlers->handle_rx_cqe_mpwqe_shampo; + if (!rq->handle_rx_cqe) { + netdev_err(netdev, "RX handler of SHAMPO MPWQE RQ is not set\n"); + return -EINVAL; + } + } else { + rq->handle_rx_cqe = priv->profile->rx_handlers->handle_rx_cqe_mpwqe; + if (!rq->handle_rx_cqe) { + netdev_err(netdev, "RX handler of MPWQE RQ is not set\n"); + return -EINVAL; + } + } + + break; + default: /* MLX5_WQ_TYPE_CYCLIC */ + rq->wqe.skb_from_cqe = xsk ? + mlx5e_xsk_skb_from_cqe_linear : + mlx5e_rx_is_linear_skb(mdev, params, NULL) ? + mlx5e_skb_from_cqe_linear : + mlx5e_skb_from_cqe_nonlinear; + rq->post_wqes = mlx5e_post_rx_wqes; + rq->dealloc_wqe = mlx5e_dealloc_rx_wqe; + rq->handle_rx_cqe = priv->profile->rx_handlers->handle_rx_cqe; + if (!rq->handle_rx_cqe) { + netdev_err(netdev, "RX handler of RQ is not set\n"); + return -EINVAL; + } + } + + return 0; +} + +static void mlx5e_trap_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) +{ + struct mlx5e_priv *priv = netdev_priv(rq->netdev); + struct mlx5_wq_cyc *wq = &rq->wqe.wq; + struct mlx5e_wqe_frag_info *wi; + struct devlink_port *dl_port; + struct sk_buff *skb; + u32 cqe_bcnt; + u16 trap_id; + u16 ci; + + trap_id = get_cqe_flow_tag(cqe); + ci = mlx5_wq_cyc_ctr2ix(wq, be16_to_cpu(cqe->wqe_counter)); + wi = get_frag(rq, ci); + cqe_bcnt = be32_to_cpu(cqe->byte_cnt); + + if (unlikely(MLX5E_RX_ERR_CQE(cqe))) { + rq->stats->wqe_err++; + goto free_wqe; + } + + skb = mlx5e_skb_from_cqe_nonlinear(rq, wi, cqe_bcnt); + if (!skb) + goto free_wqe; + + mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb); + skb_push(skb, ETH_HLEN); + + dl_port = mlx5e_devlink_get_dl_port(priv); + mlx5_devlink_trap_report(rq->mdev, trap_id, skb, dl_port); + dev_kfree_skb_any(skb); + +free_wqe: + mlx5e_free_rx_wqe(rq, wi, false); + mlx5_wq_cyc_pop(wq); +} + +void mlx5e_rq_set_trap_handlers(struct mlx5e_rq *rq, struct mlx5e_params *params) +{ + rq->wqe.skb_from_cqe = mlx5e_rx_is_linear_skb(rq->mdev, params, NULL) ? + mlx5e_skb_from_cqe_linear : + mlx5e_skb_from_cqe_nonlinear; + rq->post_wqes = mlx5e_post_rx_wqes; + rq->dealloc_wqe = mlx5e_dealloc_rx_wqe; + rq->handle_rx_cqe = mlx5e_trap_handle_rx_cqe; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c b/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c new file mode 100644 index 000000000..08a75654f --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c @@ -0,0 +1,371 @@ +/* + * Copyright (c) 2016, Mellanox Technologies, Ltd. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/ip.h> +#include <linux/udp.h> +#include <net/udp.h> +#include "en.h" +#include "en/port.h" +#include "eswitch.h" + +static int mlx5e_test_health_info(struct mlx5e_priv *priv) +{ + struct mlx5_core_health *health = &priv->mdev->priv.health; + + return health->fatal_error ? 1 : 0; +} + +static int mlx5e_test_link_state(struct mlx5e_priv *priv) +{ + u8 port_state; + + if (!netif_carrier_ok(priv->netdev)) + return 1; + + port_state = mlx5_query_vport_state(priv->mdev, MLX5_VPORT_STATE_OP_MOD_VNIC_VPORT, 0); + return port_state == VPORT_STATE_UP ? 0 : 1; +} + +static int mlx5e_test_link_speed(struct mlx5e_priv *priv) +{ + u32 speed; + + if (!netif_carrier_ok(priv->netdev)) + return 1; + + return mlx5e_port_linkspeed(priv->mdev, &speed); +} + +struct mlx5ehdr { + __be32 version; + __be64 magic; +}; + +#ifdef CONFIG_INET +/* loopback test */ +#define MLX5E_TEST_PKT_SIZE (sizeof(struct ethhdr) + sizeof(struct iphdr) +\ + sizeof(struct udphdr) + sizeof(struct mlx5ehdr)) +#define MLX5E_TEST_MAGIC 0x5AEED15C001ULL + +static struct sk_buff *mlx5e_test_get_udp_skb(struct mlx5e_priv *priv) +{ + struct sk_buff *skb = NULL; + struct mlx5ehdr *mlxh; + struct ethhdr *ethh; + struct udphdr *udph; + struct iphdr *iph; + int iplen; + + skb = netdev_alloc_skb(priv->netdev, MLX5E_TEST_PKT_SIZE); + if (!skb) { + netdev_err(priv->netdev, "\tFailed to alloc loopback skb\n"); + return NULL; + } + + net_prefetchw(skb->data); + skb_reserve(skb, NET_IP_ALIGN); + + /* Reserve for ethernet and IP header */ + ethh = skb_push(skb, ETH_HLEN); + skb_reset_mac_header(skb); + + skb_set_network_header(skb, skb->len); + iph = skb_put(skb, sizeof(struct iphdr)); + + skb_set_transport_header(skb, skb->len); + udph = skb_put(skb, sizeof(struct udphdr)); + + /* Fill ETH header */ + ether_addr_copy(ethh->h_dest, priv->netdev->dev_addr); + eth_zero_addr(ethh->h_source); + ethh->h_proto = htons(ETH_P_IP); + + /* Fill UDP header */ + udph->source = htons(9); + udph->dest = htons(9); /* Discard Protocol */ + udph->len = htons(sizeof(struct mlx5ehdr) + sizeof(struct udphdr)); + udph->check = 0; + + /* Fill IP header */ + iph->ihl = 5; + iph->ttl = 32; + iph->version = 4; + iph->protocol = IPPROTO_UDP; + iplen = sizeof(struct iphdr) + sizeof(struct udphdr) + + sizeof(struct mlx5ehdr); + iph->tot_len = htons(iplen); + iph->frag_off = 0; + iph->saddr = 0; + iph->daddr = 0; + iph->tos = 0; + iph->id = 0; + ip_send_check(iph); + + /* Fill test header and data */ + mlxh = skb_put(skb, sizeof(*mlxh)); + mlxh->version = 0; + mlxh->magic = cpu_to_be64(MLX5E_TEST_MAGIC); + + skb->csum = 0; + skb->ip_summed = CHECKSUM_PARTIAL; + udp4_hwcsum(skb, iph->saddr, iph->daddr); + + skb->protocol = htons(ETH_P_IP); + skb->pkt_type = PACKET_HOST; + skb->dev = priv->netdev; + + return skb; +} + +struct mlx5e_lbt_priv { + struct packet_type pt; + struct completion comp; + bool loopback_ok; + bool local_lb; +}; + +static int +mlx5e_test_loopback_validate(struct sk_buff *skb, + struct net_device *ndev, + struct packet_type *pt, + struct net_device *orig_ndev) +{ + struct mlx5e_lbt_priv *lbtp = pt->af_packet_priv; + struct mlx5ehdr *mlxh; + struct ethhdr *ethh; + struct udphdr *udph; + struct iphdr *iph; + + /* We are only going to peek, no need to clone the SKB */ + if (MLX5E_TEST_PKT_SIZE - ETH_HLEN > skb_headlen(skb)) + goto out; + + ethh = (struct ethhdr *)skb_mac_header(skb); + if (!ether_addr_equal(ethh->h_dest, orig_ndev->dev_addr)) + goto out; + + iph = ip_hdr(skb); + if (iph->protocol != IPPROTO_UDP) + goto out; + + /* Don't assume skb_transport_header() was set */ + udph = (struct udphdr *)((u8 *)iph + 4 * iph->ihl); + if (udph->dest != htons(9)) + goto out; + + mlxh = (struct mlx5ehdr *)((char *)udph + sizeof(*udph)); + if (mlxh->magic != cpu_to_be64(MLX5E_TEST_MAGIC)) + goto out; /* so close ! */ + + /* bingo */ + lbtp->loopback_ok = true; + complete(&lbtp->comp); +out: + kfree_skb(skb); + return 0; +} + +static int mlx5e_test_loopback_setup(struct mlx5e_priv *priv, + struct mlx5e_lbt_priv *lbtp) +{ + int err = 0; + + /* Temporarily enable local_lb */ + err = mlx5_nic_vport_query_local_lb(priv->mdev, &lbtp->local_lb); + if (err) + return err; + + if (!lbtp->local_lb) { + err = mlx5_nic_vport_update_local_lb(priv->mdev, true); + if (err) + return err; + } + + err = mlx5e_refresh_tirs(priv, true, false); + if (err) + goto out; + + lbtp->loopback_ok = false; + init_completion(&lbtp->comp); + + lbtp->pt.type = htons(ETH_P_IP); + lbtp->pt.func = mlx5e_test_loopback_validate; + lbtp->pt.dev = priv->netdev; + lbtp->pt.af_packet_priv = lbtp; + dev_add_pack(&lbtp->pt); + + return 0; + +out: + if (!lbtp->local_lb) + mlx5_nic_vport_update_local_lb(priv->mdev, false); + + return err; +} + +static void mlx5e_test_loopback_cleanup(struct mlx5e_priv *priv, + struct mlx5e_lbt_priv *lbtp) +{ + if (!lbtp->local_lb) + mlx5_nic_vport_update_local_lb(priv->mdev, false); + + dev_remove_pack(&lbtp->pt); + mlx5e_refresh_tirs(priv, false, false); +} + +static int mlx5e_cond_loopback(struct mlx5e_priv *priv) +{ + if (is_mdev_switchdev_mode(priv->mdev)) + return -EOPNOTSUPP; + + return 0; +} + +#define MLX5E_LB_VERIFY_TIMEOUT (msecs_to_jiffies(200)) +static int mlx5e_test_loopback(struct mlx5e_priv *priv) +{ + struct mlx5e_lbt_priv *lbtp; + struct sk_buff *skb = NULL; + int err; + + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) { + netdev_err(priv->netdev, + "\tCan't perform loopback test while device is down\n"); + return -ENODEV; + } + + lbtp = kzalloc(sizeof(*lbtp), GFP_KERNEL); + if (!lbtp) + return -ENOMEM; + lbtp->loopback_ok = false; + + err = mlx5e_test_loopback_setup(priv, lbtp); + if (err) + goto out; + + skb = mlx5e_test_get_udp_skb(priv); + if (!skb) { + err = -ENOMEM; + goto cleanup; + } + + skb_set_queue_mapping(skb, 0); + err = dev_queue_xmit(skb); + if (err) { + netdev_err(priv->netdev, + "\tFailed to xmit loopback packet err(%d)\n", + err); + goto cleanup; + } + + wait_for_completion_timeout(&lbtp->comp, MLX5E_LB_VERIFY_TIMEOUT); + err = !lbtp->loopback_ok; + +cleanup: + mlx5e_test_loopback_cleanup(priv, lbtp); +out: + kfree(lbtp); + return err; +} +#endif + +typedef int (*mlx5e_st_func)(struct mlx5e_priv *); + +struct mlx5e_st { + char name[ETH_GSTRING_LEN]; + mlx5e_st_func st_func; + mlx5e_st_func cond_func; +}; + +static struct mlx5e_st mlx5e_sts[] = { + { "Link Test", mlx5e_test_link_state }, + { "Speed Test", mlx5e_test_link_speed }, + { "Health Test", mlx5e_test_health_info }, +#ifdef CONFIG_INET + { "Loopback Test", mlx5e_test_loopback, mlx5e_cond_loopback }, +#endif +}; + +#define MLX5E_ST_NUM ARRAY_SIZE(mlx5e_sts) + +void mlx5e_self_test(struct net_device *ndev, struct ethtool_test *etest, + u64 *buf) +{ + struct mlx5e_priv *priv = netdev_priv(ndev); + int i, count = 0; + + mutex_lock(&priv->state_lock); + netdev_info(ndev, "Self test begin..\n"); + + for (i = 0; i < MLX5E_ST_NUM; i++) { + struct mlx5e_st st = mlx5e_sts[i]; + + if (st.cond_func && st.cond_func(priv)) + continue; + netdev_info(ndev, "\t[%d] %s start..\n", i, st.name); + buf[count] = st.st_func(priv); + netdev_info(ndev, "\t[%d] %s end: result(%lld)\n", i, st.name, buf[count]); + count++; + } + + mutex_unlock(&priv->state_lock); + + for (i = 0; i < count; i++) { + if (buf[i]) { + etest->flags |= ETH_TEST_FL_FAILED; + break; + } + } + netdev_info(ndev, "Self test out: status flags(0x%x)\n", + etest->flags); +} + +int mlx5e_self_test_fill_strings(struct mlx5e_priv *priv, u8 *data) +{ + int i, count = 0; + + for (i = 0; i < MLX5E_ST_NUM; i++) { + struct mlx5e_st st = mlx5e_sts[i]; + + if (st.cond_func && st.cond_func(priv)) + continue; + if (data) + strcpy(data + count * ETH_GSTRING_LEN, st.name); + count++; + } + return count; +} + +int mlx5e_self_test_num(struct mlx5e_priv *priv) +{ + return mlx5e_self_test_fill_strings(priv, NULL); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c new file mode 100644 index 000000000..f7f54550a --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c @@ -0,0 +1,2482 @@ +/* + * Copyright (c) 2017, Mellanox Technologies, Ltd. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "lib/mlx5.h" +#include "en.h" +#include "en_accel/ktls.h" +#include "en_accel/en_accel.h" +#include "en/ptp.h" +#include "en/port.h" + +#ifdef CONFIG_PAGE_POOL_STATS +#include <net/page_pool.h> +#endif + +static unsigned int stats_grps_num(struct mlx5e_priv *priv) +{ + return !priv->profile->stats_grps_num ? 0 : + priv->profile->stats_grps_num(priv); +} + +unsigned int mlx5e_stats_total_num(struct mlx5e_priv *priv) +{ + mlx5e_stats_grp_t *stats_grps = priv->profile->stats_grps; + const unsigned int num_stats_grps = stats_grps_num(priv); + unsigned int total = 0; + int i; + + for (i = 0; i < num_stats_grps; i++) + total += stats_grps[i]->get_num_stats(priv); + + return total; +} + +void mlx5e_stats_update_ndo_stats(struct mlx5e_priv *priv) +{ + mlx5e_stats_grp_t *stats_grps = priv->profile->stats_grps; + const unsigned int num_stats_grps = stats_grps_num(priv); + int i; + + for (i = num_stats_grps - 1; i >= 0; i--) + if (stats_grps[i]->update_stats && + stats_grps[i]->update_stats_mask & MLX5E_NDO_UPDATE_STATS) + stats_grps[i]->update_stats(priv); +} + +void mlx5e_stats_update(struct mlx5e_priv *priv) +{ + mlx5e_stats_grp_t *stats_grps = priv->profile->stats_grps; + const unsigned int num_stats_grps = stats_grps_num(priv); + int i; + + for (i = num_stats_grps - 1; i >= 0; i--) + if (stats_grps[i]->update_stats) + stats_grps[i]->update_stats(priv); +} + +void mlx5e_stats_fill(struct mlx5e_priv *priv, u64 *data, int idx) +{ + mlx5e_stats_grp_t *stats_grps = priv->profile->stats_grps; + const unsigned int num_stats_grps = stats_grps_num(priv); + int i; + + for (i = 0; i < num_stats_grps; i++) + idx = stats_grps[i]->fill_stats(priv, data, idx); +} + +void mlx5e_stats_fill_strings(struct mlx5e_priv *priv, u8 *data) +{ + mlx5e_stats_grp_t *stats_grps = priv->profile->stats_grps; + const unsigned int num_stats_grps = stats_grps_num(priv); + int i, idx = 0; + + for (i = 0; i < num_stats_grps; i++) + idx = stats_grps[i]->fill_strings(priv, data, idx); +} + +/* Concrete NIC Stats */ + +static const struct counter_desc sw_stats_desc[] = { + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_bytes) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_bytes) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tso_packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tso_bytes) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tso_inner_packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tso_inner_bytes) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_added_vlan_packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_nop) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_mpwqe_blks) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_mpwqe_pkts) }, + +#ifdef CONFIG_MLX5_EN_TLS + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_encrypted_packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_encrypted_bytes) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_ooo) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_dump_packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_dump_bytes) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_resync_bytes) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_skip_no_sync_data) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_drop_no_sync_data) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_drop_bypass_req) }, +#endif + + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_lro_packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_lro_bytes) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_gro_packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_gro_bytes) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_gro_skbs) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_gro_match_packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_gro_large_hds) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_ecn_mark) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_removed_vlan_packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_unnecessary) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_none) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_complete) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_complete_tail) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_complete_tail_slow) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_unnecessary_inner) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_drop) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_redirect) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_tx_xmit) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_tx_mpwqe) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_tx_inlnw) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_tx_nops) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_tx_full) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_tx_err) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_tx_cqe) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_csum_none) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_csum_partial) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_csum_partial_inner) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_queue_stopped) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_queue_dropped) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xmit_more) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_recover) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_cqes) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_queue_wake) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_cqe_err) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xdp_xmit) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xdp_mpwqe) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xdp_inlnw) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xdp_nops) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xdp_full) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xdp_err) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xdp_cqes) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_wqe_err) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_mpwqe_filler_cqes) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_mpwqe_filler_strides) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_oversize_pkts_sw_drop) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_buff_alloc_err) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cqe_compress_blks) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cqe_compress_pkts) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_reuse) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_full) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_empty) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_busy) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_waive) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_congst_umr) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_arfs_err) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_recover) }, +#ifdef CONFIG_PAGE_POOL_STATS + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_pp_alloc_fast) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_pp_alloc_slow) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_pp_alloc_slow_high_order) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_pp_alloc_empty) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_pp_alloc_refill) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_pp_alloc_waive) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_pp_recycle_cached) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_pp_recycle_cache_full) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_pp_recycle_ring) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_pp_recycle_ring_full) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_pp_recycle_released_ref) }, +#endif +#ifdef CONFIG_MLX5_EN_TLS + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_tls_decrypted_packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_tls_decrypted_bytes) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_tls_resync_req_pkt) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_tls_resync_req_start) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_tls_resync_req_end) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_tls_resync_req_skip) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_tls_resync_res_ok) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_tls_resync_res_retry) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_tls_resync_res_skip) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_tls_err) }, +#endif + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, ch_events) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, ch_poll) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, ch_arm) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, ch_aff_change) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, ch_force_irq) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, ch_eq_rearm) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_bytes) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_csum_complete) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_csum_unnecessary) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_csum_unnecessary_inner) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_csum_none) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_ecn_mark) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_removed_vlan_packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_xdp_drop) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_xdp_redirect) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_wqe_err) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_mpwqe_filler_cqes) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_mpwqe_filler_strides) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_oversize_pkts_sw_drop) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_buff_alloc_err) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_cqe_compress_blks) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_cqe_compress_pkts) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_congst_umr) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_arfs_err) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xsk_xmit) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xsk_mpwqe) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xsk_inlnw) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xsk_full) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xsk_err) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xsk_cqes) }, +}; + +#define NUM_SW_COUNTERS ARRAY_SIZE(sw_stats_desc) + +static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(sw) +{ + return NUM_SW_COUNTERS; +} + +static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(sw) +{ + int i; + + for (i = 0; i < NUM_SW_COUNTERS; i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, sw_stats_desc[i].format); + return idx; +} + +static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(sw) +{ + int i; + + for (i = 0; i < NUM_SW_COUNTERS; i++) + data[idx++] = MLX5E_READ_CTR64_CPU(&priv->stats.sw, sw_stats_desc, i); + return idx; +} + +static void mlx5e_stats_grp_sw_update_stats_xdp_red(struct mlx5e_sw_stats *s, + struct mlx5e_xdpsq_stats *xdpsq_red_stats) +{ + s->tx_xdp_xmit += xdpsq_red_stats->xmit; + s->tx_xdp_mpwqe += xdpsq_red_stats->mpwqe; + s->tx_xdp_inlnw += xdpsq_red_stats->inlnw; + s->tx_xdp_nops += xdpsq_red_stats->nops; + s->tx_xdp_full += xdpsq_red_stats->full; + s->tx_xdp_err += xdpsq_red_stats->err; + s->tx_xdp_cqes += xdpsq_red_stats->cqes; +} + +static void mlx5e_stats_grp_sw_update_stats_xdpsq(struct mlx5e_sw_stats *s, + struct mlx5e_xdpsq_stats *xdpsq_stats) +{ + s->rx_xdp_tx_xmit += xdpsq_stats->xmit; + s->rx_xdp_tx_mpwqe += xdpsq_stats->mpwqe; + s->rx_xdp_tx_inlnw += xdpsq_stats->inlnw; + s->rx_xdp_tx_nops += xdpsq_stats->nops; + s->rx_xdp_tx_full += xdpsq_stats->full; + s->rx_xdp_tx_err += xdpsq_stats->err; + s->rx_xdp_tx_cqe += xdpsq_stats->cqes; +} + +static void mlx5e_stats_grp_sw_update_stats_xsksq(struct mlx5e_sw_stats *s, + struct mlx5e_xdpsq_stats *xsksq_stats) +{ + s->tx_xsk_xmit += xsksq_stats->xmit; + s->tx_xsk_mpwqe += xsksq_stats->mpwqe; + s->tx_xsk_inlnw += xsksq_stats->inlnw; + s->tx_xsk_full += xsksq_stats->full; + s->tx_xsk_err += xsksq_stats->err; + s->tx_xsk_cqes += xsksq_stats->cqes; +} + +static void mlx5e_stats_grp_sw_update_stats_xskrq(struct mlx5e_sw_stats *s, + struct mlx5e_rq_stats *xskrq_stats) +{ + s->rx_xsk_packets += xskrq_stats->packets; + s->rx_xsk_bytes += xskrq_stats->bytes; + s->rx_xsk_csum_complete += xskrq_stats->csum_complete; + s->rx_xsk_csum_unnecessary += xskrq_stats->csum_unnecessary; + s->rx_xsk_csum_unnecessary_inner += xskrq_stats->csum_unnecessary_inner; + s->rx_xsk_csum_none += xskrq_stats->csum_none; + s->rx_xsk_ecn_mark += xskrq_stats->ecn_mark; + s->rx_xsk_removed_vlan_packets += xskrq_stats->removed_vlan_packets; + s->rx_xsk_xdp_drop += xskrq_stats->xdp_drop; + s->rx_xsk_xdp_redirect += xskrq_stats->xdp_redirect; + s->rx_xsk_wqe_err += xskrq_stats->wqe_err; + s->rx_xsk_mpwqe_filler_cqes += xskrq_stats->mpwqe_filler_cqes; + s->rx_xsk_mpwqe_filler_strides += xskrq_stats->mpwqe_filler_strides; + s->rx_xsk_oversize_pkts_sw_drop += xskrq_stats->oversize_pkts_sw_drop; + s->rx_xsk_buff_alloc_err += xskrq_stats->buff_alloc_err; + s->rx_xsk_cqe_compress_blks += xskrq_stats->cqe_compress_blks; + s->rx_xsk_cqe_compress_pkts += xskrq_stats->cqe_compress_pkts; + s->rx_xsk_congst_umr += xskrq_stats->congst_umr; + s->rx_xsk_arfs_err += xskrq_stats->arfs_err; +} + +static void mlx5e_stats_grp_sw_update_stats_rq_stats(struct mlx5e_sw_stats *s, + struct mlx5e_rq_stats *rq_stats) +{ + s->rx_packets += rq_stats->packets; + s->rx_bytes += rq_stats->bytes; + s->rx_lro_packets += rq_stats->lro_packets; + s->rx_lro_bytes += rq_stats->lro_bytes; + s->rx_gro_packets += rq_stats->gro_packets; + s->rx_gro_bytes += rq_stats->gro_bytes; + s->rx_gro_skbs += rq_stats->gro_skbs; + s->rx_gro_match_packets += rq_stats->gro_match_packets; + s->rx_gro_large_hds += rq_stats->gro_large_hds; + s->rx_ecn_mark += rq_stats->ecn_mark; + s->rx_removed_vlan_packets += rq_stats->removed_vlan_packets; + s->rx_csum_none += rq_stats->csum_none; + s->rx_csum_complete += rq_stats->csum_complete; + s->rx_csum_complete_tail += rq_stats->csum_complete_tail; + s->rx_csum_complete_tail_slow += rq_stats->csum_complete_tail_slow; + s->rx_csum_unnecessary += rq_stats->csum_unnecessary; + s->rx_csum_unnecessary_inner += rq_stats->csum_unnecessary_inner; + s->rx_xdp_drop += rq_stats->xdp_drop; + s->rx_xdp_redirect += rq_stats->xdp_redirect; + s->rx_wqe_err += rq_stats->wqe_err; + s->rx_mpwqe_filler_cqes += rq_stats->mpwqe_filler_cqes; + s->rx_mpwqe_filler_strides += rq_stats->mpwqe_filler_strides; + s->rx_oversize_pkts_sw_drop += rq_stats->oversize_pkts_sw_drop; + s->rx_buff_alloc_err += rq_stats->buff_alloc_err; + s->rx_cqe_compress_blks += rq_stats->cqe_compress_blks; + s->rx_cqe_compress_pkts += rq_stats->cqe_compress_pkts; + s->rx_cache_reuse += rq_stats->cache_reuse; + s->rx_cache_full += rq_stats->cache_full; + s->rx_cache_empty += rq_stats->cache_empty; + s->rx_cache_busy += rq_stats->cache_busy; + s->rx_cache_waive += rq_stats->cache_waive; + s->rx_congst_umr += rq_stats->congst_umr; + s->rx_arfs_err += rq_stats->arfs_err; + s->rx_recover += rq_stats->recover; +#ifdef CONFIG_PAGE_POOL_STATS + s->rx_pp_alloc_fast += rq_stats->pp_alloc_fast; + s->rx_pp_alloc_slow += rq_stats->pp_alloc_slow; + s->rx_pp_alloc_empty += rq_stats->pp_alloc_empty; + s->rx_pp_alloc_refill += rq_stats->pp_alloc_refill; + s->rx_pp_alloc_waive += rq_stats->pp_alloc_waive; + s->rx_pp_alloc_slow_high_order += rq_stats->pp_alloc_slow_high_order; + s->rx_pp_recycle_cached += rq_stats->pp_recycle_cached; + s->rx_pp_recycle_cache_full += rq_stats->pp_recycle_cache_full; + s->rx_pp_recycle_ring += rq_stats->pp_recycle_ring; + s->rx_pp_recycle_ring_full += rq_stats->pp_recycle_ring_full; + s->rx_pp_recycle_released_ref += rq_stats->pp_recycle_released_ref; +#endif +#ifdef CONFIG_MLX5_EN_TLS + s->rx_tls_decrypted_packets += rq_stats->tls_decrypted_packets; + s->rx_tls_decrypted_bytes += rq_stats->tls_decrypted_bytes; + s->rx_tls_resync_req_pkt += rq_stats->tls_resync_req_pkt; + s->rx_tls_resync_req_start += rq_stats->tls_resync_req_start; + s->rx_tls_resync_req_end += rq_stats->tls_resync_req_end; + s->rx_tls_resync_req_skip += rq_stats->tls_resync_req_skip; + s->rx_tls_resync_res_ok += rq_stats->tls_resync_res_ok; + s->rx_tls_resync_res_retry += rq_stats->tls_resync_res_retry; + s->rx_tls_resync_res_skip += rq_stats->tls_resync_res_skip; + s->rx_tls_err += rq_stats->tls_err; +#endif +} + +static void mlx5e_stats_grp_sw_update_stats_ch_stats(struct mlx5e_sw_stats *s, + struct mlx5e_ch_stats *ch_stats) +{ + s->ch_events += ch_stats->events; + s->ch_poll += ch_stats->poll; + s->ch_arm += ch_stats->arm; + s->ch_aff_change += ch_stats->aff_change; + s->ch_force_irq += ch_stats->force_irq; + s->ch_eq_rearm += ch_stats->eq_rearm; +} + +static void mlx5e_stats_grp_sw_update_stats_sq(struct mlx5e_sw_stats *s, + struct mlx5e_sq_stats *sq_stats) +{ + s->tx_packets += sq_stats->packets; + s->tx_bytes += sq_stats->bytes; + s->tx_tso_packets += sq_stats->tso_packets; + s->tx_tso_bytes += sq_stats->tso_bytes; + s->tx_tso_inner_packets += sq_stats->tso_inner_packets; + s->tx_tso_inner_bytes += sq_stats->tso_inner_bytes; + s->tx_added_vlan_packets += sq_stats->added_vlan_packets; + s->tx_nop += sq_stats->nop; + s->tx_mpwqe_blks += sq_stats->mpwqe_blks; + s->tx_mpwqe_pkts += sq_stats->mpwqe_pkts; + s->tx_queue_stopped += sq_stats->stopped; + s->tx_queue_wake += sq_stats->wake; + s->tx_queue_dropped += sq_stats->dropped; + s->tx_cqe_err += sq_stats->cqe_err; + s->tx_recover += sq_stats->recover; + s->tx_xmit_more += sq_stats->xmit_more; + s->tx_csum_partial_inner += sq_stats->csum_partial_inner; + s->tx_csum_none += sq_stats->csum_none; + s->tx_csum_partial += sq_stats->csum_partial; +#ifdef CONFIG_MLX5_EN_TLS + s->tx_tls_encrypted_packets += sq_stats->tls_encrypted_packets; + s->tx_tls_encrypted_bytes += sq_stats->tls_encrypted_bytes; + s->tx_tls_ooo += sq_stats->tls_ooo; + s->tx_tls_dump_bytes += sq_stats->tls_dump_bytes; + s->tx_tls_dump_packets += sq_stats->tls_dump_packets; + s->tx_tls_resync_bytes += sq_stats->tls_resync_bytes; + s->tx_tls_skip_no_sync_data += sq_stats->tls_skip_no_sync_data; + s->tx_tls_drop_no_sync_data += sq_stats->tls_drop_no_sync_data; + s->tx_tls_drop_bypass_req += sq_stats->tls_drop_bypass_req; +#endif + s->tx_cqes += sq_stats->cqes; +} + +static void mlx5e_stats_grp_sw_update_stats_ptp(struct mlx5e_priv *priv, + struct mlx5e_sw_stats *s) +{ + int i; + + if (!priv->tx_ptp_opened && !priv->rx_ptp_opened) + return; + + mlx5e_stats_grp_sw_update_stats_ch_stats(s, &priv->ptp_stats.ch); + + if (priv->tx_ptp_opened) { + for (i = 0; i < priv->max_opened_tc; i++) { + mlx5e_stats_grp_sw_update_stats_sq(s, &priv->ptp_stats.sq[i]); + + /* https://gcc.gnu.org/bugzilla/show_bug.cgi?id=92657 */ + barrier(); + } + } + if (priv->rx_ptp_opened) { + mlx5e_stats_grp_sw_update_stats_rq_stats(s, &priv->ptp_stats.rq); + + /* https://gcc.gnu.org/bugzilla/show_bug.cgi?id=92657 */ + barrier(); + } +} + +static void mlx5e_stats_grp_sw_update_stats_qos(struct mlx5e_priv *priv, + struct mlx5e_sw_stats *s) +{ + struct mlx5e_sq_stats **stats; + u16 max_qos_sqs; + int i; + + /* Pairs with smp_store_release in mlx5e_open_qos_sq. */ + max_qos_sqs = smp_load_acquire(&priv->htb_max_qos_sqs); + stats = READ_ONCE(priv->htb_qos_sq_stats); + + for (i = 0; i < max_qos_sqs; i++) { + mlx5e_stats_grp_sw_update_stats_sq(s, READ_ONCE(stats[i])); + + /* https://gcc.gnu.org/bugzilla/show_bug.cgi?id=92657 */ + barrier(); + } +} + +#ifdef CONFIG_PAGE_POOL_STATS +static void mlx5e_stats_update_stats_rq_page_pool(struct mlx5e_channel *c) +{ + struct mlx5e_rq_stats *rq_stats = c->rq.stats; + struct page_pool *pool = c->rq.page_pool; + struct page_pool_stats stats = { 0 }; + + if (!page_pool_get_stats(pool, &stats)) + return; + + rq_stats->pp_alloc_fast = stats.alloc_stats.fast; + rq_stats->pp_alloc_slow = stats.alloc_stats.slow; + rq_stats->pp_alloc_slow_high_order = stats.alloc_stats.slow_high_order; + rq_stats->pp_alloc_empty = stats.alloc_stats.empty; + rq_stats->pp_alloc_waive = stats.alloc_stats.waive; + rq_stats->pp_alloc_refill = stats.alloc_stats.refill; + + rq_stats->pp_recycle_cached = stats.recycle_stats.cached; + rq_stats->pp_recycle_cache_full = stats.recycle_stats.cache_full; + rq_stats->pp_recycle_ring = stats.recycle_stats.ring; + rq_stats->pp_recycle_ring_full = stats.recycle_stats.ring_full; + rq_stats->pp_recycle_released_ref = stats.recycle_stats.released_refcnt; +} +#else +static void mlx5e_stats_update_stats_rq_page_pool(struct mlx5e_channel *c) +{ +} +#endif + +static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(sw) +{ + struct mlx5e_sw_stats *s = &priv->stats.sw; + int i; + + memset(s, 0, sizeof(*s)); + + for (i = 0; i < priv->channels.num; i++) /* for active channels only */ + mlx5e_stats_update_stats_rq_page_pool(priv->channels.c[i]); + + for (i = 0; i < priv->stats_nch; i++) { + struct mlx5e_channel_stats *channel_stats = + priv->channel_stats[i]; + + int j; + + mlx5e_stats_grp_sw_update_stats_rq_stats(s, &channel_stats->rq); + mlx5e_stats_grp_sw_update_stats_xdpsq(s, &channel_stats->rq_xdpsq); + mlx5e_stats_grp_sw_update_stats_ch_stats(s, &channel_stats->ch); + /* xdp redirect */ + mlx5e_stats_grp_sw_update_stats_xdp_red(s, &channel_stats->xdpsq); + /* AF_XDP zero-copy */ + mlx5e_stats_grp_sw_update_stats_xskrq(s, &channel_stats->xskrq); + mlx5e_stats_grp_sw_update_stats_xsksq(s, &channel_stats->xsksq); + + for (j = 0; j < priv->max_opened_tc; j++) { + mlx5e_stats_grp_sw_update_stats_sq(s, &channel_stats->sq[j]); + + /* https://gcc.gnu.org/bugzilla/show_bug.cgi?id=92657 */ + barrier(); + } + } + mlx5e_stats_grp_sw_update_stats_ptp(priv, s); + mlx5e_stats_grp_sw_update_stats_qos(priv, s); +} + +static const struct counter_desc q_stats_desc[] = { + { MLX5E_DECLARE_STAT(struct mlx5e_qcounter_stats, rx_out_of_buffer) }, +}; + +static const struct counter_desc drop_rq_stats_desc[] = { + { MLX5E_DECLARE_STAT(struct mlx5e_qcounter_stats, rx_if_down_packets) }, +}; + +#define NUM_Q_COUNTERS ARRAY_SIZE(q_stats_desc) +#define NUM_DROP_RQ_COUNTERS ARRAY_SIZE(drop_rq_stats_desc) + +static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(qcnt) +{ + int num_stats = 0; + + if (priv->q_counter) + num_stats += NUM_Q_COUNTERS; + + if (priv->drop_rq_q_counter) + num_stats += NUM_DROP_RQ_COUNTERS; + + return num_stats; +} + +static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(qcnt) +{ + int i; + + for (i = 0; i < NUM_Q_COUNTERS && priv->q_counter; i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, + q_stats_desc[i].format); + + for (i = 0; i < NUM_DROP_RQ_COUNTERS && priv->drop_rq_q_counter; i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, + drop_rq_stats_desc[i].format); + + return idx; +} + +static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(qcnt) +{ + int i; + + for (i = 0; i < NUM_Q_COUNTERS && priv->q_counter; i++) + data[idx++] = MLX5E_READ_CTR32_CPU(&priv->stats.qcnt, + q_stats_desc, i); + for (i = 0; i < NUM_DROP_RQ_COUNTERS && priv->drop_rq_q_counter; i++) + data[idx++] = MLX5E_READ_CTR32_CPU(&priv->stats.qcnt, + drop_rq_stats_desc, i); + return idx; +} + +static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(qcnt) +{ + struct mlx5e_qcounter_stats *qcnt = &priv->stats.qcnt; + u32 out[MLX5_ST_SZ_DW(query_q_counter_out)] = {}; + u32 in[MLX5_ST_SZ_DW(query_q_counter_in)] = {}; + int ret; + + MLX5_SET(query_q_counter_in, in, opcode, MLX5_CMD_OP_QUERY_Q_COUNTER); + + if (priv->q_counter) { + MLX5_SET(query_q_counter_in, in, counter_set_id, + priv->q_counter); + ret = mlx5_cmd_exec_inout(priv->mdev, query_q_counter, in, out); + if (!ret) + qcnt->rx_out_of_buffer = MLX5_GET(query_q_counter_out, + out, out_of_buffer); + } + + if (priv->drop_rq_q_counter) { + MLX5_SET(query_q_counter_in, in, counter_set_id, + priv->drop_rq_q_counter); + ret = mlx5_cmd_exec_inout(priv->mdev, query_q_counter, in, out); + if (!ret) + qcnt->rx_if_down_packets = MLX5_GET(query_q_counter_out, + out, out_of_buffer); + } +} + +#define VNIC_ENV_OFF(c) MLX5_BYTE_OFF(query_vnic_env_out, c) +static const struct counter_desc vnic_env_stats_steer_desc[] = { + { "rx_steer_missed_packets", + VNIC_ENV_OFF(vport_env.nic_receive_steering_discard) }, +}; + +static const struct counter_desc vnic_env_stats_dev_oob_desc[] = { + { "dev_internal_queue_oob", + VNIC_ENV_OFF(vport_env.internal_rq_out_of_buffer) }, +}; + +static const struct counter_desc vnic_env_stats_drop_desc[] = { + { "rx_oversize_pkts_buffer", + VNIC_ENV_OFF(vport_env.eth_wqe_too_small) }, +}; + +#define NUM_VNIC_ENV_STEER_COUNTERS(dev) \ + (MLX5_CAP_GEN(dev, nic_receive_steering_discard) ? \ + ARRAY_SIZE(vnic_env_stats_steer_desc) : 0) +#define NUM_VNIC_ENV_DEV_OOB_COUNTERS(dev) \ + (MLX5_CAP_GEN(dev, vnic_env_int_rq_oob) ? \ + ARRAY_SIZE(vnic_env_stats_dev_oob_desc) : 0) +#define NUM_VNIC_ENV_DROP_COUNTERS(dev) \ + (MLX5_CAP_GEN(dev, eth_wqe_too_small) ? \ + ARRAY_SIZE(vnic_env_stats_drop_desc) : 0) + +static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(vnic_env) +{ + return NUM_VNIC_ENV_STEER_COUNTERS(priv->mdev) + + NUM_VNIC_ENV_DEV_OOB_COUNTERS(priv->mdev) + + NUM_VNIC_ENV_DROP_COUNTERS(priv->mdev); +} + +static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(vnic_env) +{ + int i; + + for (i = 0; i < NUM_VNIC_ENV_STEER_COUNTERS(priv->mdev); i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, + vnic_env_stats_steer_desc[i].format); + + for (i = 0; i < NUM_VNIC_ENV_DEV_OOB_COUNTERS(priv->mdev); i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, + vnic_env_stats_dev_oob_desc[i].format); + + for (i = 0; i < NUM_VNIC_ENV_DROP_COUNTERS(priv->mdev); i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, + vnic_env_stats_drop_desc[i].format); + + return idx; +} + +static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(vnic_env) +{ + int i; + + for (i = 0; i < NUM_VNIC_ENV_STEER_COUNTERS(priv->mdev); i++) + data[idx++] = MLX5E_READ_CTR64_BE(priv->stats.vnic.query_vnic_env_out, + vnic_env_stats_steer_desc, i); + + for (i = 0; i < NUM_VNIC_ENV_DEV_OOB_COUNTERS(priv->mdev); i++) + data[idx++] = MLX5E_READ_CTR32_BE(priv->stats.vnic.query_vnic_env_out, + vnic_env_stats_dev_oob_desc, i); + + for (i = 0; i < NUM_VNIC_ENV_DROP_COUNTERS(priv->mdev); i++) + data[idx++] = MLX5E_READ_CTR32_BE(priv->stats.vnic.query_vnic_env_out, + vnic_env_stats_drop_desc, i); + + return idx; +} + +static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(vnic_env) +{ + u32 *out = (u32 *)priv->stats.vnic.query_vnic_env_out; + u32 in[MLX5_ST_SZ_DW(query_vnic_env_in)] = {}; + struct mlx5_core_dev *mdev = priv->mdev; + + if (!mlx5e_stats_grp_vnic_env_num_stats(priv)) + return; + + MLX5_SET(query_vnic_env_in, in, opcode, MLX5_CMD_OP_QUERY_VNIC_ENV); + mlx5_cmd_exec_inout(mdev, query_vnic_env, in, out); +} + +#define VPORT_COUNTER_OFF(c) MLX5_BYTE_OFF(query_vport_counter_out, c) +static const struct counter_desc vport_stats_desc[] = { + { "rx_vport_unicast_packets", + VPORT_COUNTER_OFF(received_eth_unicast.packets) }, + { "rx_vport_unicast_bytes", + VPORT_COUNTER_OFF(received_eth_unicast.octets) }, + { "tx_vport_unicast_packets", + VPORT_COUNTER_OFF(transmitted_eth_unicast.packets) }, + { "tx_vport_unicast_bytes", + VPORT_COUNTER_OFF(transmitted_eth_unicast.octets) }, + { "rx_vport_multicast_packets", + VPORT_COUNTER_OFF(received_eth_multicast.packets) }, + { "rx_vport_multicast_bytes", + VPORT_COUNTER_OFF(received_eth_multicast.octets) }, + { "tx_vport_multicast_packets", + VPORT_COUNTER_OFF(transmitted_eth_multicast.packets) }, + { "tx_vport_multicast_bytes", + VPORT_COUNTER_OFF(transmitted_eth_multicast.octets) }, + { "rx_vport_broadcast_packets", + VPORT_COUNTER_OFF(received_eth_broadcast.packets) }, + { "rx_vport_broadcast_bytes", + VPORT_COUNTER_OFF(received_eth_broadcast.octets) }, + { "tx_vport_broadcast_packets", + VPORT_COUNTER_OFF(transmitted_eth_broadcast.packets) }, + { "tx_vport_broadcast_bytes", + VPORT_COUNTER_OFF(transmitted_eth_broadcast.octets) }, + { "rx_vport_rdma_unicast_packets", + VPORT_COUNTER_OFF(received_ib_unicast.packets) }, + { "rx_vport_rdma_unicast_bytes", + VPORT_COUNTER_OFF(received_ib_unicast.octets) }, + { "tx_vport_rdma_unicast_packets", + VPORT_COUNTER_OFF(transmitted_ib_unicast.packets) }, + { "tx_vport_rdma_unicast_bytes", + VPORT_COUNTER_OFF(transmitted_ib_unicast.octets) }, + { "rx_vport_rdma_multicast_packets", + VPORT_COUNTER_OFF(received_ib_multicast.packets) }, + { "rx_vport_rdma_multicast_bytes", + VPORT_COUNTER_OFF(received_ib_multicast.octets) }, + { "tx_vport_rdma_multicast_packets", + VPORT_COUNTER_OFF(transmitted_ib_multicast.packets) }, + { "tx_vport_rdma_multicast_bytes", + VPORT_COUNTER_OFF(transmitted_ib_multicast.octets) }, +}; + +#define NUM_VPORT_COUNTERS ARRAY_SIZE(vport_stats_desc) + +static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(vport) +{ + return NUM_VPORT_COUNTERS; +} + +static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(vport) +{ + int i; + + for (i = 0; i < NUM_VPORT_COUNTERS; i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, vport_stats_desc[i].format); + return idx; +} + +static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(vport) +{ + int i; + + for (i = 0; i < NUM_VPORT_COUNTERS; i++) + data[idx++] = MLX5E_READ_CTR64_BE(priv->stats.vport.query_vport_out, + vport_stats_desc, i); + return idx; +} + +static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(vport) +{ + u32 *out = (u32 *)priv->stats.vport.query_vport_out; + u32 in[MLX5_ST_SZ_DW(query_vport_counter_in)] = {}; + struct mlx5_core_dev *mdev = priv->mdev; + + MLX5_SET(query_vport_counter_in, in, opcode, MLX5_CMD_OP_QUERY_VPORT_COUNTER); + mlx5_cmd_exec_inout(mdev, query_vport_counter, in, out); +} + +#define PPORT_802_3_OFF(c) \ + MLX5_BYTE_OFF(ppcnt_reg, \ + counter_set.eth_802_3_cntrs_grp_data_layout.c##_high) +static const struct counter_desc pport_802_3_stats_desc[] = { + { "tx_packets_phy", PPORT_802_3_OFF(a_frames_transmitted_ok) }, + { "rx_packets_phy", PPORT_802_3_OFF(a_frames_received_ok) }, + { "rx_crc_errors_phy", PPORT_802_3_OFF(a_frame_check_sequence_errors) }, + { "tx_bytes_phy", PPORT_802_3_OFF(a_octets_transmitted_ok) }, + { "rx_bytes_phy", PPORT_802_3_OFF(a_octets_received_ok) }, + { "tx_multicast_phy", PPORT_802_3_OFF(a_multicast_frames_xmitted_ok) }, + { "tx_broadcast_phy", PPORT_802_3_OFF(a_broadcast_frames_xmitted_ok) }, + { "rx_multicast_phy", PPORT_802_3_OFF(a_multicast_frames_received_ok) }, + { "rx_broadcast_phy", PPORT_802_3_OFF(a_broadcast_frames_received_ok) }, + { "rx_in_range_len_errors_phy", PPORT_802_3_OFF(a_in_range_length_errors) }, + { "rx_out_of_range_len_phy", PPORT_802_3_OFF(a_out_of_range_length_field) }, + { "rx_oversize_pkts_phy", PPORT_802_3_OFF(a_frame_too_long_errors) }, + { "rx_symbol_err_phy", PPORT_802_3_OFF(a_symbol_error_during_carrier) }, + { "tx_mac_control_phy", PPORT_802_3_OFF(a_mac_control_frames_transmitted) }, + { "rx_mac_control_phy", PPORT_802_3_OFF(a_mac_control_frames_received) }, + { "rx_unsupported_op_phy", PPORT_802_3_OFF(a_unsupported_opcodes_received) }, + { "rx_pause_ctrl_phy", PPORT_802_3_OFF(a_pause_mac_ctrl_frames_received) }, + { "tx_pause_ctrl_phy", PPORT_802_3_OFF(a_pause_mac_ctrl_frames_transmitted) }, +}; + +#define NUM_PPORT_802_3_COUNTERS ARRAY_SIZE(pport_802_3_stats_desc) + +static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(802_3) +{ + return NUM_PPORT_802_3_COUNTERS; +} + +static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(802_3) +{ + int i; + + for (i = 0; i < NUM_PPORT_802_3_COUNTERS; i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, pport_802_3_stats_desc[i].format); + return idx; +} + +static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(802_3) +{ + int i; + + for (i = 0; i < NUM_PPORT_802_3_COUNTERS; i++) + data[idx++] = MLX5E_READ_CTR64_BE(&priv->stats.pport.IEEE_802_3_counters, + pport_802_3_stats_desc, i); + return idx; +} + +#define MLX5_BASIC_PPCNT_SUPPORTED(mdev) \ + (MLX5_CAP_GEN(mdev, pcam_reg) ? MLX5_CAP_PCAM_REG(mdev, ppcnt) : 1) + +static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(802_3) +{ + struct mlx5e_pport_stats *pstats = &priv->stats.pport; + struct mlx5_core_dev *mdev = priv->mdev; + u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {0}; + int sz = MLX5_ST_SZ_BYTES(ppcnt_reg); + void *out; + + if (!MLX5_BASIC_PPCNT_SUPPORTED(mdev)) + return; + + MLX5_SET(ppcnt_reg, in, local_port, 1); + out = pstats->IEEE_802_3_counters; + MLX5_SET(ppcnt_reg, in, grp, MLX5_IEEE_802_3_COUNTERS_GROUP); + mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0); +} + +#define MLX5E_READ_CTR64_BE_F(ptr, set, c) \ + be64_to_cpu(*(__be64 *)((char *)ptr + \ + MLX5_BYTE_OFF(ppcnt_reg, \ + counter_set.set.c##_high))) + +static int mlx5e_stats_get_ieee(struct mlx5_core_dev *mdev, + u32 *ppcnt_ieee_802_3) +{ + u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {}; + int sz = MLX5_ST_SZ_BYTES(ppcnt_reg); + + if (!MLX5_BASIC_PPCNT_SUPPORTED(mdev)) + return -EOPNOTSUPP; + + MLX5_SET(ppcnt_reg, in, local_port, 1); + MLX5_SET(ppcnt_reg, in, grp, MLX5_IEEE_802_3_COUNTERS_GROUP); + return mlx5_core_access_reg(mdev, in, sz, ppcnt_ieee_802_3, + sz, MLX5_REG_PPCNT, 0, 0); +} + +void mlx5e_stats_pause_get(struct mlx5e_priv *priv, + struct ethtool_pause_stats *pause_stats) +{ + u32 ppcnt_ieee_802_3[MLX5_ST_SZ_DW(ppcnt_reg)]; + struct mlx5_core_dev *mdev = priv->mdev; + + if (mlx5e_stats_get_ieee(mdev, ppcnt_ieee_802_3)) + return; + + pause_stats->tx_pause_frames = + MLX5E_READ_CTR64_BE_F(ppcnt_ieee_802_3, + eth_802_3_cntrs_grp_data_layout, + a_pause_mac_ctrl_frames_transmitted); + pause_stats->rx_pause_frames = + MLX5E_READ_CTR64_BE_F(ppcnt_ieee_802_3, + eth_802_3_cntrs_grp_data_layout, + a_pause_mac_ctrl_frames_received); +} + +void mlx5e_stats_eth_phy_get(struct mlx5e_priv *priv, + struct ethtool_eth_phy_stats *phy_stats) +{ + u32 ppcnt_ieee_802_3[MLX5_ST_SZ_DW(ppcnt_reg)]; + struct mlx5_core_dev *mdev = priv->mdev; + + if (mlx5e_stats_get_ieee(mdev, ppcnt_ieee_802_3)) + return; + + phy_stats->SymbolErrorDuringCarrier = + MLX5E_READ_CTR64_BE_F(ppcnt_ieee_802_3, + eth_802_3_cntrs_grp_data_layout, + a_symbol_error_during_carrier); +} + +void mlx5e_stats_eth_mac_get(struct mlx5e_priv *priv, + struct ethtool_eth_mac_stats *mac_stats) +{ + u32 ppcnt_ieee_802_3[MLX5_ST_SZ_DW(ppcnt_reg)]; + struct mlx5_core_dev *mdev = priv->mdev; + + if (mlx5e_stats_get_ieee(mdev, ppcnt_ieee_802_3)) + return; + +#define RD(name) \ + MLX5E_READ_CTR64_BE_F(ppcnt_ieee_802_3, \ + eth_802_3_cntrs_grp_data_layout, \ + name) + + mac_stats->FramesTransmittedOK = RD(a_frames_transmitted_ok); + mac_stats->FramesReceivedOK = RD(a_frames_received_ok); + mac_stats->FrameCheckSequenceErrors = RD(a_frame_check_sequence_errors); + mac_stats->OctetsTransmittedOK = RD(a_octets_transmitted_ok); + mac_stats->OctetsReceivedOK = RD(a_octets_received_ok); + mac_stats->MulticastFramesXmittedOK = RD(a_multicast_frames_xmitted_ok); + mac_stats->BroadcastFramesXmittedOK = RD(a_broadcast_frames_xmitted_ok); + mac_stats->MulticastFramesReceivedOK = RD(a_multicast_frames_received_ok); + mac_stats->BroadcastFramesReceivedOK = RD(a_broadcast_frames_received_ok); + mac_stats->InRangeLengthErrors = RD(a_in_range_length_errors); + mac_stats->OutOfRangeLengthField = RD(a_out_of_range_length_field); + mac_stats->FrameTooLongErrors = RD(a_frame_too_long_errors); +#undef RD +} + +void mlx5e_stats_eth_ctrl_get(struct mlx5e_priv *priv, + struct ethtool_eth_ctrl_stats *ctrl_stats) +{ + u32 ppcnt_ieee_802_3[MLX5_ST_SZ_DW(ppcnt_reg)]; + struct mlx5_core_dev *mdev = priv->mdev; + + if (mlx5e_stats_get_ieee(mdev, ppcnt_ieee_802_3)) + return; + + ctrl_stats->MACControlFramesTransmitted = + MLX5E_READ_CTR64_BE_F(ppcnt_ieee_802_3, + eth_802_3_cntrs_grp_data_layout, + a_mac_control_frames_transmitted); + ctrl_stats->MACControlFramesReceived = + MLX5E_READ_CTR64_BE_F(ppcnt_ieee_802_3, + eth_802_3_cntrs_grp_data_layout, + a_mac_control_frames_received); + ctrl_stats->UnsupportedOpcodesReceived = + MLX5E_READ_CTR64_BE_F(ppcnt_ieee_802_3, + eth_802_3_cntrs_grp_data_layout, + a_unsupported_opcodes_received); +} + +#define PPORT_2863_OFF(c) \ + MLX5_BYTE_OFF(ppcnt_reg, \ + counter_set.eth_2863_cntrs_grp_data_layout.c##_high) +static const struct counter_desc pport_2863_stats_desc[] = { + { "rx_discards_phy", PPORT_2863_OFF(if_in_discards) }, + { "tx_discards_phy", PPORT_2863_OFF(if_out_discards) }, + { "tx_errors_phy", PPORT_2863_OFF(if_out_errors) }, +}; + +#define NUM_PPORT_2863_COUNTERS ARRAY_SIZE(pport_2863_stats_desc) + +static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(2863) +{ + return NUM_PPORT_2863_COUNTERS; +} + +static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(2863) +{ + int i; + + for (i = 0; i < NUM_PPORT_2863_COUNTERS; i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, pport_2863_stats_desc[i].format); + return idx; +} + +static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(2863) +{ + int i; + + for (i = 0; i < NUM_PPORT_2863_COUNTERS; i++) + data[idx++] = MLX5E_READ_CTR64_BE(&priv->stats.pport.RFC_2863_counters, + pport_2863_stats_desc, i); + return idx; +} + +static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(2863) +{ + struct mlx5e_pport_stats *pstats = &priv->stats.pport; + struct mlx5_core_dev *mdev = priv->mdev; + u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {0}; + int sz = MLX5_ST_SZ_BYTES(ppcnt_reg); + void *out; + + MLX5_SET(ppcnt_reg, in, local_port, 1); + out = pstats->RFC_2863_counters; + MLX5_SET(ppcnt_reg, in, grp, MLX5_RFC_2863_COUNTERS_GROUP); + mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0); +} + +#define PPORT_2819_OFF(c) \ + MLX5_BYTE_OFF(ppcnt_reg, \ + counter_set.eth_2819_cntrs_grp_data_layout.c##_high) +static const struct counter_desc pport_2819_stats_desc[] = { + { "rx_undersize_pkts_phy", PPORT_2819_OFF(ether_stats_undersize_pkts) }, + { "rx_fragments_phy", PPORT_2819_OFF(ether_stats_fragments) }, + { "rx_jabbers_phy", PPORT_2819_OFF(ether_stats_jabbers) }, + { "rx_64_bytes_phy", PPORT_2819_OFF(ether_stats_pkts64octets) }, + { "rx_65_to_127_bytes_phy", PPORT_2819_OFF(ether_stats_pkts65to127octets) }, + { "rx_128_to_255_bytes_phy", PPORT_2819_OFF(ether_stats_pkts128to255octets) }, + { "rx_256_to_511_bytes_phy", PPORT_2819_OFF(ether_stats_pkts256to511octets) }, + { "rx_512_to_1023_bytes_phy", PPORT_2819_OFF(ether_stats_pkts512to1023octets) }, + { "rx_1024_to_1518_bytes_phy", PPORT_2819_OFF(ether_stats_pkts1024to1518octets) }, + { "rx_1519_to_2047_bytes_phy", PPORT_2819_OFF(ether_stats_pkts1519to2047octets) }, + { "rx_2048_to_4095_bytes_phy", PPORT_2819_OFF(ether_stats_pkts2048to4095octets) }, + { "rx_4096_to_8191_bytes_phy", PPORT_2819_OFF(ether_stats_pkts4096to8191octets) }, + { "rx_8192_to_10239_bytes_phy", PPORT_2819_OFF(ether_stats_pkts8192to10239octets) }, +}; + +#define NUM_PPORT_2819_COUNTERS ARRAY_SIZE(pport_2819_stats_desc) + +static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(2819) +{ + return NUM_PPORT_2819_COUNTERS; +} + +static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(2819) +{ + int i; + + for (i = 0; i < NUM_PPORT_2819_COUNTERS; i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, pport_2819_stats_desc[i].format); + return idx; +} + +static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(2819) +{ + int i; + + for (i = 0; i < NUM_PPORT_2819_COUNTERS; i++) + data[idx++] = MLX5E_READ_CTR64_BE(&priv->stats.pport.RFC_2819_counters, + pport_2819_stats_desc, i); + return idx; +} + +static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(2819) +{ + struct mlx5e_pport_stats *pstats = &priv->stats.pport; + struct mlx5_core_dev *mdev = priv->mdev; + u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {0}; + int sz = MLX5_ST_SZ_BYTES(ppcnt_reg); + void *out; + + if (!MLX5_BASIC_PPCNT_SUPPORTED(mdev)) + return; + + MLX5_SET(ppcnt_reg, in, local_port, 1); + out = pstats->RFC_2819_counters; + MLX5_SET(ppcnt_reg, in, grp, MLX5_RFC_2819_COUNTERS_GROUP); + mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0); +} + +static const struct ethtool_rmon_hist_range mlx5e_rmon_ranges[] = { + { 0, 64 }, + { 65, 127 }, + { 128, 255 }, + { 256, 511 }, + { 512, 1023 }, + { 1024, 1518 }, + { 1519, 2047 }, + { 2048, 4095 }, + { 4096, 8191 }, + { 8192, 10239 }, + {} +}; + +void mlx5e_stats_rmon_get(struct mlx5e_priv *priv, + struct ethtool_rmon_stats *rmon, + const struct ethtool_rmon_hist_range **ranges) +{ + u32 ppcnt_RFC_2819_counters[MLX5_ST_SZ_DW(ppcnt_reg)]; + struct mlx5_core_dev *mdev = priv->mdev; + u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {0}; + int sz = MLX5_ST_SZ_BYTES(ppcnt_reg); + + MLX5_SET(ppcnt_reg, in, local_port, 1); + MLX5_SET(ppcnt_reg, in, grp, MLX5_RFC_2819_COUNTERS_GROUP); + if (mlx5_core_access_reg(mdev, in, sz, ppcnt_RFC_2819_counters, + sz, MLX5_REG_PPCNT, 0, 0)) + return; + +#define RD(name) \ + MLX5E_READ_CTR64_BE_F(ppcnt_RFC_2819_counters, \ + eth_2819_cntrs_grp_data_layout, \ + name) + + rmon->undersize_pkts = RD(ether_stats_undersize_pkts); + rmon->fragments = RD(ether_stats_fragments); + rmon->jabbers = RD(ether_stats_jabbers); + + rmon->hist[0] = RD(ether_stats_pkts64octets); + rmon->hist[1] = RD(ether_stats_pkts65to127octets); + rmon->hist[2] = RD(ether_stats_pkts128to255octets); + rmon->hist[3] = RD(ether_stats_pkts256to511octets); + rmon->hist[4] = RD(ether_stats_pkts512to1023octets); + rmon->hist[5] = RD(ether_stats_pkts1024to1518octets); + rmon->hist[6] = RD(ether_stats_pkts1519to2047octets); + rmon->hist[7] = RD(ether_stats_pkts2048to4095octets); + rmon->hist[8] = RD(ether_stats_pkts4096to8191octets); + rmon->hist[9] = RD(ether_stats_pkts8192to10239octets); +#undef RD + + *ranges = mlx5e_rmon_ranges; +} + +#define PPORT_PHY_STATISTICAL_OFF(c) \ + MLX5_BYTE_OFF(ppcnt_reg, \ + counter_set.phys_layer_statistical_cntrs.c##_high) +static const struct counter_desc pport_phy_statistical_stats_desc[] = { + { "rx_pcs_symbol_err_phy", PPORT_PHY_STATISTICAL_OFF(phy_symbol_errors) }, + { "rx_corrected_bits_phy", PPORT_PHY_STATISTICAL_OFF(phy_corrected_bits) }, +}; + +static const struct counter_desc +pport_phy_statistical_err_lanes_stats_desc[] = { + { "rx_err_lane_0_phy", PPORT_PHY_STATISTICAL_OFF(phy_corrected_bits_lane0) }, + { "rx_err_lane_1_phy", PPORT_PHY_STATISTICAL_OFF(phy_corrected_bits_lane1) }, + { "rx_err_lane_2_phy", PPORT_PHY_STATISTICAL_OFF(phy_corrected_bits_lane2) }, + { "rx_err_lane_3_phy", PPORT_PHY_STATISTICAL_OFF(phy_corrected_bits_lane3) }, +}; + +#define NUM_PPORT_PHY_STATISTICAL_COUNTERS \ + ARRAY_SIZE(pport_phy_statistical_stats_desc) +#define NUM_PPORT_PHY_STATISTICAL_PER_LANE_COUNTERS \ + ARRAY_SIZE(pport_phy_statistical_err_lanes_stats_desc) + +static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(phy) +{ + struct mlx5_core_dev *mdev = priv->mdev; + int num_stats; + + /* "1" for link_down_events special counter */ + num_stats = 1; + + num_stats += MLX5_CAP_PCAM_FEATURE(mdev, ppcnt_statistical_group) ? + NUM_PPORT_PHY_STATISTICAL_COUNTERS : 0; + + num_stats += MLX5_CAP_PCAM_FEATURE(mdev, per_lane_error_counters) ? + NUM_PPORT_PHY_STATISTICAL_PER_LANE_COUNTERS : 0; + + return num_stats; +} + +static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(phy) +{ + struct mlx5_core_dev *mdev = priv->mdev; + int i; + + strcpy(data + (idx++) * ETH_GSTRING_LEN, "link_down_events_phy"); + + if (!MLX5_CAP_PCAM_FEATURE(mdev, ppcnt_statistical_group)) + return idx; + + for (i = 0; i < NUM_PPORT_PHY_STATISTICAL_COUNTERS; i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, + pport_phy_statistical_stats_desc[i].format); + + if (MLX5_CAP_PCAM_FEATURE(mdev, per_lane_error_counters)) + for (i = 0; i < NUM_PPORT_PHY_STATISTICAL_PER_LANE_COUNTERS; i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, + pport_phy_statistical_err_lanes_stats_desc[i].format); + + return idx; +} + +static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(phy) +{ + struct mlx5_core_dev *mdev = priv->mdev; + int i; + + /* link_down_events_phy has special handling since it is not stored in __be64 format */ + data[idx++] = MLX5_GET(ppcnt_reg, priv->stats.pport.phy_counters, + counter_set.phys_layer_cntrs.link_down_events); + + if (!MLX5_CAP_PCAM_FEATURE(mdev, ppcnt_statistical_group)) + return idx; + + for (i = 0; i < NUM_PPORT_PHY_STATISTICAL_COUNTERS; i++) + data[idx++] = + MLX5E_READ_CTR64_BE(&priv->stats.pport.phy_statistical_counters, + pport_phy_statistical_stats_desc, i); + + if (MLX5_CAP_PCAM_FEATURE(mdev, per_lane_error_counters)) + for (i = 0; i < NUM_PPORT_PHY_STATISTICAL_PER_LANE_COUNTERS; i++) + data[idx++] = + MLX5E_READ_CTR64_BE(&priv->stats.pport.phy_statistical_counters, + pport_phy_statistical_err_lanes_stats_desc, + i); + return idx; +} + +static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(phy) +{ + struct mlx5e_pport_stats *pstats = &priv->stats.pport; + struct mlx5_core_dev *mdev = priv->mdev; + u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {0}; + int sz = MLX5_ST_SZ_BYTES(ppcnt_reg); + void *out; + + MLX5_SET(ppcnt_reg, in, local_port, 1); + out = pstats->phy_counters; + MLX5_SET(ppcnt_reg, in, grp, MLX5_PHYSICAL_LAYER_COUNTERS_GROUP); + mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0); + + if (!MLX5_CAP_PCAM_FEATURE(mdev, ppcnt_statistical_group)) + return; + + out = pstats->phy_statistical_counters; + MLX5_SET(ppcnt_reg, in, grp, MLX5_PHYSICAL_LAYER_STATISTICAL_GROUP); + mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0); +} + +static int fec_num_lanes(struct mlx5_core_dev *dev) +{ + u32 out[MLX5_ST_SZ_DW(pmlp_reg)] = {}; + u32 in[MLX5_ST_SZ_DW(pmlp_reg)] = {}; + int err; + + MLX5_SET(pmlp_reg, in, local_port, 1); + err = mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out), + MLX5_REG_PMLP, 0, 0); + if (err) + return 0; + + return MLX5_GET(pmlp_reg, out, width); +} + +static int fec_active_mode(struct mlx5_core_dev *mdev) +{ + unsigned long fec_active_long; + u32 fec_active; + + if (mlx5e_get_fec_mode(mdev, &fec_active, NULL)) + return MLX5E_FEC_NOFEC; + + fec_active_long = fec_active; + return find_first_bit(&fec_active_long, sizeof(unsigned long) * BITS_PER_BYTE); +} + +#define MLX5E_STATS_SET_FEC_BLOCK(idx) ({ \ + fec_stats->corrected_blocks.lanes[(idx)] = \ + MLX5E_READ_CTR64_BE_F(ppcnt, phys_layer_cntrs, \ + fc_fec_corrected_blocks_lane##idx); \ + fec_stats->uncorrectable_blocks.lanes[(idx)] = \ + MLX5E_READ_CTR64_BE_F(ppcnt, phys_layer_cntrs, \ + fc_fec_uncorrectable_blocks_lane##idx); \ +}) + +static void fec_set_fc_stats(struct ethtool_fec_stats *fec_stats, + u32 *ppcnt, u8 lanes) +{ + if (lanes > 3) { /* 4 lanes */ + MLX5E_STATS_SET_FEC_BLOCK(3); + MLX5E_STATS_SET_FEC_BLOCK(2); + } + if (lanes > 1) /* 2 lanes */ + MLX5E_STATS_SET_FEC_BLOCK(1); + if (lanes > 0) /* 1 lane */ + MLX5E_STATS_SET_FEC_BLOCK(0); +} + +static void fec_set_rs_stats(struct ethtool_fec_stats *fec_stats, u32 *ppcnt) +{ + fec_stats->corrected_blocks.total = + MLX5E_READ_CTR64_BE_F(ppcnt, phys_layer_cntrs, + rs_fec_corrected_blocks); + fec_stats->uncorrectable_blocks.total = + MLX5E_READ_CTR64_BE_F(ppcnt, phys_layer_cntrs, + rs_fec_uncorrectable_blocks); +} + +static void fec_set_block_stats(struct mlx5e_priv *priv, + struct ethtool_fec_stats *fec_stats) +{ + struct mlx5_core_dev *mdev = priv->mdev; + u32 out[MLX5_ST_SZ_DW(ppcnt_reg)] = {}; + u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {}; + int sz = MLX5_ST_SZ_BYTES(ppcnt_reg); + int mode = fec_active_mode(mdev); + + if (mode == MLX5E_FEC_NOFEC) + return; + + MLX5_SET(ppcnt_reg, in, local_port, 1); + MLX5_SET(ppcnt_reg, in, grp, MLX5_PHYSICAL_LAYER_COUNTERS_GROUP); + if (mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0)) + return; + + switch (mode) { + case MLX5E_FEC_RS_528_514: + case MLX5E_FEC_RS_544_514: + case MLX5E_FEC_LLRS_272_257_1: + fec_set_rs_stats(fec_stats, out); + return; + case MLX5E_FEC_FIRECODE: + fec_set_fc_stats(fec_stats, out, fec_num_lanes(mdev)); + } +} + +static void fec_set_corrected_bits_total(struct mlx5e_priv *priv, + struct ethtool_fec_stats *fec_stats) +{ + u32 ppcnt_phy_statistical[MLX5_ST_SZ_DW(ppcnt_reg)]; + struct mlx5_core_dev *mdev = priv->mdev; + u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {}; + int sz = MLX5_ST_SZ_BYTES(ppcnt_reg); + + MLX5_SET(ppcnt_reg, in, local_port, 1); + MLX5_SET(ppcnt_reg, in, grp, MLX5_PHYSICAL_LAYER_STATISTICAL_GROUP); + if (mlx5_core_access_reg(mdev, in, sz, ppcnt_phy_statistical, + sz, MLX5_REG_PPCNT, 0, 0)) + return; + + fec_stats->corrected_bits.total = + MLX5E_READ_CTR64_BE_F(ppcnt_phy_statistical, + phys_layer_statistical_cntrs, + phy_corrected_bits); +} + +void mlx5e_stats_fec_get(struct mlx5e_priv *priv, + struct ethtool_fec_stats *fec_stats) +{ + if (!MLX5_CAP_PCAM_FEATURE(priv->mdev, ppcnt_statistical_group)) + return; + + fec_set_corrected_bits_total(priv, fec_stats); + fec_set_block_stats(priv, fec_stats); +} + +#define PPORT_ETH_EXT_OFF(c) \ + MLX5_BYTE_OFF(ppcnt_reg, \ + counter_set.eth_extended_cntrs_grp_data_layout.c##_high) +static const struct counter_desc pport_eth_ext_stats_desc[] = { + { "rx_buffer_passed_thres_phy", PPORT_ETH_EXT_OFF(rx_buffer_almost_full) }, +}; + +#define NUM_PPORT_ETH_EXT_COUNTERS ARRAY_SIZE(pport_eth_ext_stats_desc) + +static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(eth_ext) +{ + if (MLX5_CAP_PCAM_FEATURE((priv)->mdev, rx_buffer_fullness_counters)) + return NUM_PPORT_ETH_EXT_COUNTERS; + + return 0; +} + +static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(eth_ext) +{ + int i; + + if (MLX5_CAP_PCAM_FEATURE((priv)->mdev, rx_buffer_fullness_counters)) + for (i = 0; i < NUM_PPORT_ETH_EXT_COUNTERS; i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, + pport_eth_ext_stats_desc[i].format); + return idx; +} + +static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(eth_ext) +{ + int i; + + if (MLX5_CAP_PCAM_FEATURE((priv)->mdev, rx_buffer_fullness_counters)) + for (i = 0; i < NUM_PPORT_ETH_EXT_COUNTERS; i++) + data[idx++] = + MLX5E_READ_CTR64_BE(&priv->stats.pport.eth_ext_counters, + pport_eth_ext_stats_desc, i); + return idx; +} + +static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(eth_ext) +{ + struct mlx5e_pport_stats *pstats = &priv->stats.pport; + struct mlx5_core_dev *mdev = priv->mdev; + u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {0}; + int sz = MLX5_ST_SZ_BYTES(ppcnt_reg); + void *out; + + if (!MLX5_CAP_PCAM_FEATURE(mdev, rx_buffer_fullness_counters)) + return; + + MLX5_SET(ppcnt_reg, in, local_port, 1); + out = pstats->eth_ext_counters; + MLX5_SET(ppcnt_reg, in, grp, MLX5_ETHERNET_EXTENDED_COUNTERS_GROUP); + mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0); +} + +#define PCIE_PERF_OFF(c) \ + MLX5_BYTE_OFF(mpcnt_reg, counter_set.pcie_perf_cntrs_grp_data_layout.c) +static const struct counter_desc pcie_perf_stats_desc[] = { + { "rx_pci_signal_integrity", PCIE_PERF_OFF(rx_errors) }, + { "tx_pci_signal_integrity", PCIE_PERF_OFF(tx_errors) }, +}; + +#define PCIE_PERF_OFF64(c) \ + MLX5_BYTE_OFF(mpcnt_reg, counter_set.pcie_perf_cntrs_grp_data_layout.c##_high) +static const struct counter_desc pcie_perf_stats_desc64[] = { + { "outbound_pci_buffer_overflow", PCIE_PERF_OFF64(tx_overflow_buffer_pkt) }, +}; + +static const struct counter_desc pcie_perf_stall_stats_desc[] = { + { "outbound_pci_stalled_rd", PCIE_PERF_OFF(outbound_stalled_reads) }, + { "outbound_pci_stalled_wr", PCIE_PERF_OFF(outbound_stalled_writes) }, + { "outbound_pci_stalled_rd_events", PCIE_PERF_OFF(outbound_stalled_reads_events) }, + { "outbound_pci_stalled_wr_events", PCIE_PERF_OFF(outbound_stalled_writes_events) }, +}; + +#define NUM_PCIE_PERF_COUNTERS ARRAY_SIZE(pcie_perf_stats_desc) +#define NUM_PCIE_PERF_COUNTERS64 ARRAY_SIZE(pcie_perf_stats_desc64) +#define NUM_PCIE_PERF_STALL_COUNTERS ARRAY_SIZE(pcie_perf_stall_stats_desc) + +static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(pcie) +{ + int num_stats = 0; + + if (MLX5_CAP_MCAM_FEATURE((priv)->mdev, pcie_performance_group)) + num_stats += NUM_PCIE_PERF_COUNTERS; + + if (MLX5_CAP_MCAM_FEATURE((priv)->mdev, tx_overflow_buffer_pkt)) + num_stats += NUM_PCIE_PERF_COUNTERS64; + + if (MLX5_CAP_MCAM_FEATURE((priv)->mdev, pcie_outbound_stalled)) + num_stats += NUM_PCIE_PERF_STALL_COUNTERS; + + return num_stats; +} + +static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(pcie) +{ + int i; + + if (MLX5_CAP_MCAM_FEATURE((priv)->mdev, pcie_performance_group)) + for (i = 0; i < NUM_PCIE_PERF_COUNTERS; i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, + pcie_perf_stats_desc[i].format); + + if (MLX5_CAP_MCAM_FEATURE((priv)->mdev, tx_overflow_buffer_pkt)) + for (i = 0; i < NUM_PCIE_PERF_COUNTERS64; i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, + pcie_perf_stats_desc64[i].format); + + if (MLX5_CAP_MCAM_FEATURE((priv)->mdev, pcie_outbound_stalled)) + for (i = 0; i < NUM_PCIE_PERF_STALL_COUNTERS; i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, + pcie_perf_stall_stats_desc[i].format); + return idx; +} + +static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(pcie) +{ + int i; + + if (MLX5_CAP_MCAM_FEATURE((priv)->mdev, pcie_performance_group)) + for (i = 0; i < NUM_PCIE_PERF_COUNTERS; i++) + data[idx++] = + MLX5E_READ_CTR32_BE(&priv->stats.pcie.pcie_perf_counters, + pcie_perf_stats_desc, i); + + if (MLX5_CAP_MCAM_FEATURE((priv)->mdev, tx_overflow_buffer_pkt)) + for (i = 0; i < NUM_PCIE_PERF_COUNTERS64; i++) + data[idx++] = + MLX5E_READ_CTR64_BE(&priv->stats.pcie.pcie_perf_counters, + pcie_perf_stats_desc64, i); + + if (MLX5_CAP_MCAM_FEATURE((priv)->mdev, pcie_outbound_stalled)) + for (i = 0; i < NUM_PCIE_PERF_STALL_COUNTERS; i++) + data[idx++] = + MLX5E_READ_CTR32_BE(&priv->stats.pcie.pcie_perf_counters, + pcie_perf_stall_stats_desc, i); + return idx; +} + +static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(pcie) +{ + struct mlx5e_pcie_stats *pcie_stats = &priv->stats.pcie; + struct mlx5_core_dev *mdev = priv->mdev; + u32 in[MLX5_ST_SZ_DW(mpcnt_reg)] = {0}; + int sz = MLX5_ST_SZ_BYTES(mpcnt_reg); + void *out; + + if (!MLX5_CAP_MCAM_FEATURE(mdev, pcie_performance_group)) + return; + + out = pcie_stats->pcie_perf_counters; + MLX5_SET(mpcnt_reg, in, grp, MLX5_PCIE_PERFORMANCE_COUNTERS_GROUP); + mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_MPCNT, 0, 0); +} + +#define PPORT_PER_TC_PRIO_OFF(c) \ + MLX5_BYTE_OFF(ppcnt_reg, \ + counter_set.eth_per_tc_prio_grp_data_layout.c##_high) + +static const struct counter_desc pport_per_tc_prio_stats_desc[] = { + { "rx_prio%d_buf_discard", PPORT_PER_TC_PRIO_OFF(no_buffer_discard_uc) }, +}; + +#define NUM_PPORT_PER_TC_PRIO_COUNTERS ARRAY_SIZE(pport_per_tc_prio_stats_desc) + +#define PPORT_PER_TC_CONGEST_PRIO_OFF(c) \ + MLX5_BYTE_OFF(ppcnt_reg, \ + counter_set.eth_per_tc_congest_prio_grp_data_layout.c##_high) + +static const struct counter_desc pport_per_tc_congest_prio_stats_desc[] = { + { "rx_prio%d_cong_discard", PPORT_PER_TC_CONGEST_PRIO_OFF(wred_discard) }, + { "rx_prio%d_marked", PPORT_PER_TC_CONGEST_PRIO_OFF(ecn_marked_tc) }, +}; + +#define NUM_PPORT_PER_TC_CONGEST_PRIO_COUNTERS \ + ARRAY_SIZE(pport_per_tc_congest_prio_stats_desc) + +static int mlx5e_grp_per_tc_prio_get_num_stats(struct mlx5e_priv *priv) +{ + struct mlx5_core_dev *mdev = priv->mdev; + + if (!MLX5_CAP_GEN(mdev, sbcam_reg)) + return 0; + + return NUM_PPORT_PER_TC_PRIO_COUNTERS * NUM_PPORT_PRIO; +} + +static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(per_port_buff_congest) +{ + struct mlx5_core_dev *mdev = priv->mdev; + int i, prio; + + if (!MLX5_CAP_GEN(mdev, sbcam_reg)) + return idx; + + for (prio = 0; prio < NUM_PPORT_PRIO; prio++) { + for (i = 0; i < NUM_PPORT_PER_TC_PRIO_COUNTERS; i++) + sprintf(data + (idx++) * ETH_GSTRING_LEN, + pport_per_tc_prio_stats_desc[i].format, prio); + for (i = 0; i < NUM_PPORT_PER_TC_CONGEST_PRIO_COUNTERS; i++) + sprintf(data + (idx++) * ETH_GSTRING_LEN, + pport_per_tc_congest_prio_stats_desc[i].format, prio); + } + + return idx; +} + +static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(per_port_buff_congest) +{ + struct mlx5e_pport_stats *pport = &priv->stats.pport; + struct mlx5_core_dev *mdev = priv->mdev; + int i, prio; + + if (!MLX5_CAP_GEN(mdev, sbcam_reg)) + return idx; + + for (prio = 0; prio < NUM_PPORT_PRIO; prio++) { + for (i = 0; i < NUM_PPORT_PER_TC_PRIO_COUNTERS; i++) + data[idx++] = + MLX5E_READ_CTR64_BE(&pport->per_tc_prio_counters[prio], + pport_per_tc_prio_stats_desc, i); + for (i = 0; i < NUM_PPORT_PER_TC_CONGEST_PRIO_COUNTERS ; i++) + data[idx++] = + MLX5E_READ_CTR64_BE(&pport->per_tc_congest_prio_counters[prio], + pport_per_tc_congest_prio_stats_desc, i); + } + + return idx; +} + +static void mlx5e_grp_per_tc_prio_update_stats(struct mlx5e_priv *priv) +{ + struct mlx5e_pport_stats *pstats = &priv->stats.pport; + struct mlx5_core_dev *mdev = priv->mdev; + u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {}; + int sz = MLX5_ST_SZ_BYTES(ppcnt_reg); + void *out; + int prio; + + if (!MLX5_CAP_GEN(mdev, sbcam_reg)) + return; + + MLX5_SET(ppcnt_reg, in, pnat, 2); + MLX5_SET(ppcnt_reg, in, grp, MLX5_PER_TRAFFIC_CLASS_COUNTERS_GROUP); + for (prio = 0; prio < NUM_PPORT_PRIO; prio++) { + out = pstats->per_tc_prio_counters[prio]; + MLX5_SET(ppcnt_reg, in, prio_tc, prio); + mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0); + } +} + +static int mlx5e_grp_per_tc_congest_prio_get_num_stats(struct mlx5e_priv *priv) +{ + struct mlx5_core_dev *mdev = priv->mdev; + + if (!MLX5_CAP_GEN(mdev, sbcam_reg)) + return 0; + + return NUM_PPORT_PER_TC_CONGEST_PRIO_COUNTERS * NUM_PPORT_PRIO; +} + +static void mlx5e_grp_per_tc_congest_prio_update_stats(struct mlx5e_priv *priv) +{ + struct mlx5e_pport_stats *pstats = &priv->stats.pport; + struct mlx5_core_dev *mdev = priv->mdev; + u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {}; + int sz = MLX5_ST_SZ_BYTES(ppcnt_reg); + void *out; + int prio; + + if (!MLX5_CAP_GEN(mdev, sbcam_reg)) + return; + + MLX5_SET(ppcnt_reg, in, pnat, 2); + MLX5_SET(ppcnt_reg, in, grp, MLX5_PER_TRAFFIC_CLASS_CONGESTION_GROUP); + for (prio = 0; prio < NUM_PPORT_PRIO; prio++) { + out = pstats->per_tc_congest_prio_counters[prio]; + MLX5_SET(ppcnt_reg, in, prio_tc, prio); + mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0); + } +} + +static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(per_port_buff_congest) +{ + return mlx5e_grp_per_tc_prio_get_num_stats(priv) + + mlx5e_grp_per_tc_congest_prio_get_num_stats(priv); +} + +static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(per_port_buff_congest) +{ + mlx5e_grp_per_tc_prio_update_stats(priv); + mlx5e_grp_per_tc_congest_prio_update_stats(priv); +} + +#define PPORT_PER_PRIO_OFF(c) \ + MLX5_BYTE_OFF(ppcnt_reg, \ + counter_set.eth_per_prio_grp_data_layout.c##_high) +static const struct counter_desc pport_per_prio_traffic_stats_desc[] = { + { "rx_prio%d_bytes", PPORT_PER_PRIO_OFF(rx_octets) }, + { "rx_prio%d_packets", PPORT_PER_PRIO_OFF(rx_frames) }, + { "rx_prio%d_discards", PPORT_PER_PRIO_OFF(rx_discards) }, + { "tx_prio%d_bytes", PPORT_PER_PRIO_OFF(tx_octets) }, + { "tx_prio%d_packets", PPORT_PER_PRIO_OFF(tx_frames) }, +}; + +#define NUM_PPORT_PER_PRIO_TRAFFIC_COUNTERS ARRAY_SIZE(pport_per_prio_traffic_stats_desc) + +static int mlx5e_grp_per_prio_traffic_get_num_stats(void) +{ + return NUM_PPORT_PER_PRIO_TRAFFIC_COUNTERS * NUM_PPORT_PRIO; +} + +static int mlx5e_grp_per_prio_traffic_fill_strings(struct mlx5e_priv *priv, + u8 *data, + int idx) +{ + int i, prio; + + for (prio = 0; prio < NUM_PPORT_PRIO; prio++) { + for (i = 0; i < NUM_PPORT_PER_PRIO_TRAFFIC_COUNTERS; i++) + sprintf(data + (idx++) * ETH_GSTRING_LEN, + pport_per_prio_traffic_stats_desc[i].format, prio); + } + + return idx; +} + +static int mlx5e_grp_per_prio_traffic_fill_stats(struct mlx5e_priv *priv, + u64 *data, + int idx) +{ + int i, prio; + + for (prio = 0; prio < NUM_PPORT_PRIO; prio++) { + for (i = 0; i < NUM_PPORT_PER_PRIO_TRAFFIC_COUNTERS; i++) + data[idx++] = + MLX5E_READ_CTR64_BE(&priv->stats.pport.per_prio_counters[prio], + pport_per_prio_traffic_stats_desc, i); + } + + return idx; +} + +static const struct counter_desc pport_per_prio_pfc_stats_desc[] = { + /* %s is "global" or "prio{i}" */ + { "rx_%s_pause", PPORT_PER_PRIO_OFF(rx_pause) }, + { "rx_%s_pause_duration", PPORT_PER_PRIO_OFF(rx_pause_duration) }, + { "tx_%s_pause", PPORT_PER_PRIO_OFF(tx_pause) }, + { "tx_%s_pause_duration", PPORT_PER_PRIO_OFF(tx_pause_duration) }, + { "rx_%s_pause_transition", PPORT_PER_PRIO_OFF(rx_pause_transition) }, +}; + +static const struct counter_desc pport_pfc_stall_stats_desc[] = { + { "tx_pause_storm_warning_events", PPORT_PER_PRIO_OFF(device_stall_minor_watermark_cnt) }, + { "tx_pause_storm_error_events", PPORT_PER_PRIO_OFF(device_stall_critical_watermark_cnt) }, +}; + +#define NUM_PPORT_PER_PRIO_PFC_COUNTERS ARRAY_SIZE(pport_per_prio_pfc_stats_desc) +#define NUM_PPORT_PFC_STALL_COUNTERS(priv) (ARRAY_SIZE(pport_pfc_stall_stats_desc) * \ + MLX5_CAP_PCAM_FEATURE((priv)->mdev, pfcc_mask) * \ + MLX5_CAP_DEBUG((priv)->mdev, stall_detect)) + +static unsigned long mlx5e_query_pfc_combined(struct mlx5e_priv *priv) +{ + struct mlx5_core_dev *mdev = priv->mdev; + u8 pfc_en_tx; + u8 pfc_en_rx; + int err; + + if (MLX5_CAP_GEN(mdev, port_type) != MLX5_CAP_PORT_TYPE_ETH) + return 0; + + err = mlx5_query_port_pfc(mdev, &pfc_en_tx, &pfc_en_rx); + + return err ? 0 : pfc_en_tx | pfc_en_rx; +} + +static bool mlx5e_query_global_pause_combined(struct mlx5e_priv *priv) +{ + struct mlx5_core_dev *mdev = priv->mdev; + u32 rx_pause; + u32 tx_pause; + int err; + + if (MLX5_CAP_GEN(mdev, port_type) != MLX5_CAP_PORT_TYPE_ETH) + return false; + + err = mlx5_query_port_pause(mdev, &rx_pause, &tx_pause); + + return err ? false : rx_pause | tx_pause; +} + +static int mlx5e_grp_per_prio_pfc_get_num_stats(struct mlx5e_priv *priv) +{ + return (mlx5e_query_global_pause_combined(priv) + + hweight8(mlx5e_query_pfc_combined(priv))) * + NUM_PPORT_PER_PRIO_PFC_COUNTERS + + NUM_PPORT_PFC_STALL_COUNTERS(priv); +} + +static int mlx5e_grp_per_prio_pfc_fill_strings(struct mlx5e_priv *priv, + u8 *data, + int idx) +{ + unsigned long pfc_combined; + int i, prio; + + pfc_combined = mlx5e_query_pfc_combined(priv); + for_each_set_bit(prio, &pfc_combined, NUM_PPORT_PRIO) { + for (i = 0; i < NUM_PPORT_PER_PRIO_PFC_COUNTERS; i++) { + char pfc_string[ETH_GSTRING_LEN]; + + snprintf(pfc_string, sizeof(pfc_string), "prio%d", prio); + sprintf(data + (idx++) * ETH_GSTRING_LEN, + pport_per_prio_pfc_stats_desc[i].format, pfc_string); + } + } + + if (mlx5e_query_global_pause_combined(priv)) { + for (i = 0; i < NUM_PPORT_PER_PRIO_PFC_COUNTERS; i++) { + sprintf(data + (idx++) * ETH_GSTRING_LEN, + pport_per_prio_pfc_stats_desc[i].format, "global"); + } + } + + for (i = 0; i < NUM_PPORT_PFC_STALL_COUNTERS(priv); i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, + pport_pfc_stall_stats_desc[i].format); + + return idx; +} + +static int mlx5e_grp_per_prio_pfc_fill_stats(struct mlx5e_priv *priv, + u64 *data, + int idx) +{ + unsigned long pfc_combined; + int i, prio; + + pfc_combined = mlx5e_query_pfc_combined(priv); + for_each_set_bit(prio, &pfc_combined, NUM_PPORT_PRIO) { + for (i = 0; i < NUM_PPORT_PER_PRIO_PFC_COUNTERS; i++) { + data[idx++] = + MLX5E_READ_CTR64_BE(&priv->stats.pport.per_prio_counters[prio], + pport_per_prio_pfc_stats_desc, i); + } + } + + if (mlx5e_query_global_pause_combined(priv)) { + for (i = 0; i < NUM_PPORT_PER_PRIO_PFC_COUNTERS; i++) { + data[idx++] = + MLX5E_READ_CTR64_BE(&priv->stats.pport.per_prio_counters[0], + pport_per_prio_pfc_stats_desc, i); + } + } + + for (i = 0; i < NUM_PPORT_PFC_STALL_COUNTERS(priv); i++) + data[idx++] = MLX5E_READ_CTR64_BE(&priv->stats.pport.per_prio_counters[0], + pport_pfc_stall_stats_desc, i); + + return idx; +} + +static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(per_prio) +{ + return mlx5e_grp_per_prio_traffic_get_num_stats() + + mlx5e_grp_per_prio_pfc_get_num_stats(priv); +} + +static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(per_prio) +{ + idx = mlx5e_grp_per_prio_traffic_fill_strings(priv, data, idx); + idx = mlx5e_grp_per_prio_pfc_fill_strings(priv, data, idx); + return idx; +} + +static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(per_prio) +{ + idx = mlx5e_grp_per_prio_traffic_fill_stats(priv, data, idx); + idx = mlx5e_grp_per_prio_pfc_fill_stats(priv, data, idx); + return idx; +} + +static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(per_prio) +{ + struct mlx5e_pport_stats *pstats = &priv->stats.pport; + struct mlx5_core_dev *mdev = priv->mdev; + u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {0}; + int sz = MLX5_ST_SZ_BYTES(ppcnt_reg); + int prio; + void *out; + + if (!MLX5_BASIC_PPCNT_SUPPORTED(mdev)) + return; + + MLX5_SET(ppcnt_reg, in, local_port, 1); + MLX5_SET(ppcnt_reg, in, grp, MLX5_PER_PRIORITY_COUNTERS_GROUP); + for (prio = 0; prio < NUM_PPORT_PRIO; prio++) { + out = pstats->per_prio_counters[prio]; + MLX5_SET(ppcnt_reg, in, prio_tc, prio); + mlx5_core_access_reg(mdev, in, sz, out, sz, + MLX5_REG_PPCNT, 0, 0); + } +} + +static const struct counter_desc mlx5e_pme_status_desc[] = { + { "module_unplug", sizeof(u64) * MLX5_MODULE_STATUS_UNPLUGGED }, +}; + +static const struct counter_desc mlx5e_pme_error_desc[] = { + { "module_bus_stuck", sizeof(u64) * MLX5_MODULE_EVENT_ERROR_BUS_STUCK }, + { "module_high_temp", sizeof(u64) * MLX5_MODULE_EVENT_ERROR_HIGH_TEMPERATURE }, + { "module_bad_shorted", sizeof(u64) * MLX5_MODULE_EVENT_ERROR_BAD_CABLE }, +}; + +#define NUM_PME_STATUS_STATS ARRAY_SIZE(mlx5e_pme_status_desc) +#define NUM_PME_ERR_STATS ARRAY_SIZE(mlx5e_pme_error_desc) + +static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(pme) +{ + return NUM_PME_STATUS_STATS + NUM_PME_ERR_STATS; +} + +static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(pme) +{ + int i; + + for (i = 0; i < NUM_PME_STATUS_STATS; i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, mlx5e_pme_status_desc[i].format); + + for (i = 0; i < NUM_PME_ERR_STATS; i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, mlx5e_pme_error_desc[i].format); + + return idx; +} + +static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(pme) +{ + struct mlx5_pme_stats pme_stats; + int i; + + mlx5_get_pme_stats(priv->mdev, &pme_stats); + + for (i = 0; i < NUM_PME_STATUS_STATS; i++) + data[idx++] = MLX5E_READ_CTR64_CPU(pme_stats.status_counters, + mlx5e_pme_status_desc, i); + + for (i = 0; i < NUM_PME_ERR_STATS; i++) + data[idx++] = MLX5E_READ_CTR64_CPU(pme_stats.error_counters, + mlx5e_pme_error_desc, i); + + return idx; +} + +static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(pme) { return; } + +static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(tls) +{ + return mlx5e_ktls_get_count(priv); +} + +static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(tls) +{ + return idx + mlx5e_ktls_get_strings(priv, data + idx * ETH_GSTRING_LEN); +} + +static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(tls) +{ + return idx + mlx5e_ktls_get_stats(priv, data + idx); +} + +static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(tls) { return; } + +static const struct counter_desc rq_stats_desc[] = { + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, packets) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, bytes) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_complete) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_complete_tail) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_complete_tail_slow) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_unnecessary) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_unnecessary_inner) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_none) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, xdp_drop) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, xdp_redirect) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, lro_packets) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, lro_bytes) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, gro_packets) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, gro_bytes) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, gro_skbs) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, gro_match_packets) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, gro_large_hds) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, ecn_mark) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, removed_vlan_packets) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, wqe_err) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, mpwqe_filler_cqes) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, mpwqe_filler_strides) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, oversize_pkts_sw_drop) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, buff_alloc_err) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cqe_compress_blks) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cqe_compress_pkts) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_reuse) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_full) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_empty) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_busy) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_waive) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, congst_umr) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, arfs_err) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, recover) }, +#ifdef CONFIG_PAGE_POOL_STATS + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, pp_alloc_fast) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, pp_alloc_slow) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, pp_alloc_slow_high_order) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, pp_alloc_empty) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, pp_alloc_refill) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, pp_alloc_waive) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, pp_recycle_cached) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, pp_recycle_cache_full) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, pp_recycle_ring) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, pp_recycle_ring_full) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, pp_recycle_released_ref) }, +#endif +#ifdef CONFIG_MLX5_EN_TLS + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, tls_decrypted_packets) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, tls_decrypted_bytes) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, tls_resync_req_pkt) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, tls_resync_req_start) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, tls_resync_req_end) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, tls_resync_req_skip) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, tls_resync_res_ok) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, tls_resync_res_retry) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, tls_resync_res_skip) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, tls_err) }, +#endif +}; + +static const struct counter_desc sq_stats_desc[] = { + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, packets) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, bytes) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tso_packets) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tso_bytes) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tso_inner_packets) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tso_inner_bytes) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, csum_partial) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, csum_partial_inner) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, added_vlan_packets) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, nop) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, mpwqe_blks) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, mpwqe_pkts) }, +#ifdef CONFIG_MLX5_EN_TLS + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_encrypted_packets) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_encrypted_bytes) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_ooo) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_dump_packets) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_dump_bytes) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_resync_bytes) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_skip_no_sync_data) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_drop_no_sync_data) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_drop_bypass_req) }, +#endif + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, csum_none) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, stopped) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, dropped) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, xmit_more) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, recover) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, cqes) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, wake) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, cqe_err) }, +}; + +static const struct counter_desc rq_xdpsq_stats_desc[] = { + { MLX5E_DECLARE_RQ_XDPSQ_STAT(struct mlx5e_xdpsq_stats, xmit) }, + { MLX5E_DECLARE_RQ_XDPSQ_STAT(struct mlx5e_xdpsq_stats, mpwqe) }, + { MLX5E_DECLARE_RQ_XDPSQ_STAT(struct mlx5e_xdpsq_stats, inlnw) }, + { MLX5E_DECLARE_RQ_XDPSQ_STAT(struct mlx5e_xdpsq_stats, nops) }, + { MLX5E_DECLARE_RQ_XDPSQ_STAT(struct mlx5e_xdpsq_stats, full) }, + { MLX5E_DECLARE_RQ_XDPSQ_STAT(struct mlx5e_xdpsq_stats, err) }, + { MLX5E_DECLARE_RQ_XDPSQ_STAT(struct mlx5e_xdpsq_stats, cqes) }, +}; + +static const struct counter_desc xdpsq_stats_desc[] = { + { MLX5E_DECLARE_XDPSQ_STAT(struct mlx5e_xdpsq_stats, xmit) }, + { MLX5E_DECLARE_XDPSQ_STAT(struct mlx5e_xdpsq_stats, mpwqe) }, + { MLX5E_DECLARE_XDPSQ_STAT(struct mlx5e_xdpsq_stats, inlnw) }, + { MLX5E_DECLARE_XDPSQ_STAT(struct mlx5e_xdpsq_stats, nops) }, + { MLX5E_DECLARE_XDPSQ_STAT(struct mlx5e_xdpsq_stats, full) }, + { MLX5E_DECLARE_XDPSQ_STAT(struct mlx5e_xdpsq_stats, err) }, + { MLX5E_DECLARE_XDPSQ_STAT(struct mlx5e_xdpsq_stats, cqes) }, +}; + +static const struct counter_desc xskrq_stats_desc[] = { + { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, packets) }, + { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, bytes) }, + { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, csum_complete) }, + { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, csum_unnecessary) }, + { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, csum_unnecessary_inner) }, + { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, csum_none) }, + { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, ecn_mark) }, + { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, removed_vlan_packets) }, + { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, xdp_drop) }, + { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, xdp_redirect) }, + { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, wqe_err) }, + { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, mpwqe_filler_cqes) }, + { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, mpwqe_filler_strides) }, + { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, oversize_pkts_sw_drop) }, + { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, buff_alloc_err) }, + { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, cqe_compress_blks) }, + { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, cqe_compress_pkts) }, + { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, congst_umr) }, + { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, arfs_err) }, +}; + +static const struct counter_desc xsksq_stats_desc[] = { + { MLX5E_DECLARE_XSKSQ_STAT(struct mlx5e_xdpsq_stats, xmit) }, + { MLX5E_DECLARE_XSKSQ_STAT(struct mlx5e_xdpsq_stats, mpwqe) }, + { MLX5E_DECLARE_XSKSQ_STAT(struct mlx5e_xdpsq_stats, inlnw) }, + { MLX5E_DECLARE_XSKSQ_STAT(struct mlx5e_xdpsq_stats, full) }, + { MLX5E_DECLARE_XSKSQ_STAT(struct mlx5e_xdpsq_stats, err) }, + { MLX5E_DECLARE_XSKSQ_STAT(struct mlx5e_xdpsq_stats, cqes) }, +}; + +static const struct counter_desc ch_stats_desc[] = { + { MLX5E_DECLARE_CH_STAT(struct mlx5e_ch_stats, events) }, + { MLX5E_DECLARE_CH_STAT(struct mlx5e_ch_stats, poll) }, + { MLX5E_DECLARE_CH_STAT(struct mlx5e_ch_stats, arm) }, + { MLX5E_DECLARE_CH_STAT(struct mlx5e_ch_stats, aff_change) }, + { MLX5E_DECLARE_CH_STAT(struct mlx5e_ch_stats, force_irq) }, + { MLX5E_DECLARE_CH_STAT(struct mlx5e_ch_stats, eq_rearm) }, +}; + +static const struct counter_desc ptp_sq_stats_desc[] = { + { MLX5E_DECLARE_PTP_TX_STAT(struct mlx5e_sq_stats, packets) }, + { MLX5E_DECLARE_PTP_TX_STAT(struct mlx5e_sq_stats, bytes) }, + { MLX5E_DECLARE_PTP_TX_STAT(struct mlx5e_sq_stats, csum_partial) }, + { MLX5E_DECLARE_PTP_TX_STAT(struct mlx5e_sq_stats, csum_partial_inner) }, + { MLX5E_DECLARE_PTP_TX_STAT(struct mlx5e_sq_stats, added_vlan_packets) }, + { MLX5E_DECLARE_PTP_TX_STAT(struct mlx5e_sq_stats, nop) }, + { MLX5E_DECLARE_PTP_TX_STAT(struct mlx5e_sq_stats, csum_none) }, + { MLX5E_DECLARE_PTP_TX_STAT(struct mlx5e_sq_stats, stopped) }, + { MLX5E_DECLARE_PTP_TX_STAT(struct mlx5e_sq_stats, dropped) }, + { MLX5E_DECLARE_PTP_TX_STAT(struct mlx5e_sq_stats, xmit_more) }, + { MLX5E_DECLARE_PTP_TX_STAT(struct mlx5e_sq_stats, recover) }, + { MLX5E_DECLARE_PTP_TX_STAT(struct mlx5e_sq_stats, cqes) }, + { MLX5E_DECLARE_PTP_TX_STAT(struct mlx5e_sq_stats, wake) }, + { MLX5E_DECLARE_PTP_TX_STAT(struct mlx5e_sq_stats, cqe_err) }, +}; + +static const struct counter_desc ptp_ch_stats_desc[] = { + { MLX5E_DECLARE_PTP_CH_STAT(struct mlx5e_ch_stats, events) }, + { MLX5E_DECLARE_PTP_CH_STAT(struct mlx5e_ch_stats, poll) }, + { MLX5E_DECLARE_PTP_CH_STAT(struct mlx5e_ch_stats, arm) }, + { MLX5E_DECLARE_PTP_CH_STAT(struct mlx5e_ch_stats, eq_rearm) }, +}; + +static const struct counter_desc ptp_cq_stats_desc[] = { + { MLX5E_DECLARE_PTP_CQ_STAT(struct mlx5e_ptp_cq_stats, cqe) }, + { MLX5E_DECLARE_PTP_CQ_STAT(struct mlx5e_ptp_cq_stats, err_cqe) }, + { MLX5E_DECLARE_PTP_CQ_STAT(struct mlx5e_ptp_cq_stats, abort) }, + { MLX5E_DECLARE_PTP_CQ_STAT(struct mlx5e_ptp_cq_stats, abort_abs_diff_ns) }, + { MLX5E_DECLARE_PTP_CQ_STAT(struct mlx5e_ptp_cq_stats, resync_cqe) }, + { MLX5E_DECLARE_PTP_CQ_STAT(struct mlx5e_ptp_cq_stats, resync_event) }, + { MLX5E_DECLARE_PTP_CQ_STAT(struct mlx5e_ptp_cq_stats, ooo_cqe_drop) }, +}; + +static const struct counter_desc ptp_rq_stats_desc[] = { + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, packets) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, bytes) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, csum_complete) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, csum_complete_tail) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, csum_complete_tail_slow) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, csum_unnecessary) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, csum_unnecessary_inner) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, csum_none) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, xdp_drop) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, xdp_redirect) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, lro_packets) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, lro_bytes) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, ecn_mark) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, removed_vlan_packets) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, wqe_err) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, mpwqe_filler_cqes) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, mpwqe_filler_strides) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, oversize_pkts_sw_drop) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, buff_alloc_err) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, cqe_compress_blks) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, cqe_compress_pkts) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, cache_reuse) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, cache_full) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, cache_empty) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, cache_busy) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, cache_waive) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, congst_umr) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, arfs_err) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, recover) }, +}; + +static const struct counter_desc qos_sq_stats_desc[] = { + { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, packets) }, + { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, bytes) }, + { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tso_packets) }, + { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tso_bytes) }, + { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tso_inner_packets) }, + { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tso_inner_bytes) }, + { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, csum_partial) }, + { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, csum_partial_inner) }, + { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, added_vlan_packets) }, + { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, nop) }, + { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, mpwqe_blks) }, + { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, mpwqe_pkts) }, +#ifdef CONFIG_MLX5_EN_TLS + { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tls_encrypted_packets) }, + { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tls_encrypted_bytes) }, + { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tls_ooo) }, + { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tls_dump_packets) }, + { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tls_dump_bytes) }, + { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tls_resync_bytes) }, + { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tls_skip_no_sync_data) }, + { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tls_drop_no_sync_data) }, + { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tls_drop_bypass_req) }, +#endif + { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, csum_none) }, + { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, stopped) }, + { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, dropped) }, + { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, xmit_more) }, + { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, recover) }, + { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, cqes) }, + { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, wake) }, + { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, cqe_err) }, +}; + +#define NUM_RQ_STATS ARRAY_SIZE(rq_stats_desc) +#define NUM_SQ_STATS ARRAY_SIZE(sq_stats_desc) +#define NUM_XDPSQ_STATS ARRAY_SIZE(xdpsq_stats_desc) +#define NUM_RQ_XDPSQ_STATS ARRAY_SIZE(rq_xdpsq_stats_desc) +#define NUM_XSKRQ_STATS ARRAY_SIZE(xskrq_stats_desc) +#define NUM_XSKSQ_STATS ARRAY_SIZE(xsksq_stats_desc) +#define NUM_CH_STATS ARRAY_SIZE(ch_stats_desc) +#define NUM_PTP_SQ_STATS ARRAY_SIZE(ptp_sq_stats_desc) +#define NUM_PTP_CH_STATS ARRAY_SIZE(ptp_ch_stats_desc) +#define NUM_PTP_CQ_STATS ARRAY_SIZE(ptp_cq_stats_desc) +#define NUM_PTP_RQ_STATS ARRAY_SIZE(ptp_rq_stats_desc) +#define NUM_QOS_SQ_STATS ARRAY_SIZE(qos_sq_stats_desc) + +static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(qos) +{ + /* Pairs with smp_store_release in mlx5e_open_qos_sq. */ + return NUM_QOS_SQ_STATS * smp_load_acquire(&priv->htb_max_qos_sqs); +} + +static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(qos) +{ + /* Pairs with smp_store_release in mlx5e_open_qos_sq. */ + u16 max_qos_sqs = smp_load_acquire(&priv->htb_max_qos_sqs); + int i, qid; + + for (qid = 0; qid < max_qos_sqs; qid++) + for (i = 0; i < NUM_QOS_SQ_STATS; i++) + sprintf(data + (idx++) * ETH_GSTRING_LEN, + qos_sq_stats_desc[i].format, qid); + + return idx; +} + +static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(qos) +{ + struct mlx5e_sq_stats **stats; + u16 max_qos_sqs; + int i, qid; + + /* Pairs with smp_store_release in mlx5e_open_qos_sq. */ + max_qos_sqs = smp_load_acquire(&priv->htb_max_qos_sqs); + stats = READ_ONCE(priv->htb_qos_sq_stats); + + for (qid = 0; qid < max_qos_sqs; qid++) { + struct mlx5e_sq_stats *s = READ_ONCE(stats[qid]); + + for (i = 0; i < NUM_QOS_SQ_STATS; i++) + data[idx++] = MLX5E_READ_CTR64_CPU(s, qos_sq_stats_desc, i); + } + + return idx; +} + +static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(qos) { return; } + +static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(ptp) +{ + int num = NUM_PTP_CH_STATS; + + if (!priv->tx_ptp_opened && !priv->rx_ptp_opened) + return 0; + + if (priv->tx_ptp_opened) + num += (NUM_PTP_SQ_STATS + NUM_PTP_CQ_STATS) * priv->max_opened_tc; + if (priv->rx_ptp_opened) + num += NUM_PTP_RQ_STATS; + + return num; +} + +static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(ptp) +{ + int i, tc; + + if (!priv->tx_ptp_opened && !priv->rx_ptp_opened) + return idx; + + for (i = 0; i < NUM_PTP_CH_STATS; i++) + sprintf(data + (idx++) * ETH_GSTRING_LEN, + "%s", ptp_ch_stats_desc[i].format); + + if (priv->tx_ptp_opened) { + for (tc = 0; tc < priv->max_opened_tc; tc++) + for (i = 0; i < NUM_PTP_SQ_STATS; i++) + sprintf(data + (idx++) * ETH_GSTRING_LEN, + ptp_sq_stats_desc[i].format, tc); + + for (tc = 0; tc < priv->max_opened_tc; tc++) + for (i = 0; i < NUM_PTP_CQ_STATS; i++) + sprintf(data + (idx++) * ETH_GSTRING_LEN, + ptp_cq_stats_desc[i].format, tc); + } + if (priv->rx_ptp_opened) { + for (i = 0; i < NUM_PTP_RQ_STATS; i++) + sprintf(data + (idx++) * ETH_GSTRING_LEN, + ptp_rq_stats_desc[i].format, MLX5E_PTP_CHANNEL_IX); + } + return idx; +} + +static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(ptp) +{ + int i, tc; + + if (!priv->tx_ptp_opened && !priv->rx_ptp_opened) + return idx; + + for (i = 0; i < NUM_PTP_CH_STATS; i++) + data[idx++] = + MLX5E_READ_CTR64_CPU(&priv->ptp_stats.ch, + ptp_ch_stats_desc, i); + + if (priv->tx_ptp_opened) { + for (tc = 0; tc < priv->max_opened_tc; tc++) + for (i = 0; i < NUM_PTP_SQ_STATS; i++) + data[idx++] = + MLX5E_READ_CTR64_CPU(&priv->ptp_stats.sq[tc], + ptp_sq_stats_desc, i); + + for (tc = 0; tc < priv->max_opened_tc; tc++) + for (i = 0; i < NUM_PTP_CQ_STATS; i++) + data[idx++] = + MLX5E_READ_CTR64_CPU(&priv->ptp_stats.cq[tc], + ptp_cq_stats_desc, i); + } + if (priv->rx_ptp_opened) { + for (i = 0; i < NUM_PTP_RQ_STATS; i++) + data[idx++] = + MLX5E_READ_CTR64_CPU(&priv->ptp_stats.rq, + ptp_rq_stats_desc, i); + } + return idx; +} + +static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(ptp) { return; } + +static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(channels) +{ + int max_nch = priv->stats_nch; + + return (NUM_RQ_STATS * max_nch) + + (NUM_CH_STATS * max_nch) + + (NUM_SQ_STATS * max_nch * priv->max_opened_tc) + + (NUM_RQ_XDPSQ_STATS * max_nch) + + (NUM_XDPSQ_STATS * max_nch) + + (NUM_XSKRQ_STATS * max_nch * priv->xsk.ever_used) + + (NUM_XSKSQ_STATS * max_nch * priv->xsk.ever_used); +} + +static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(channels) +{ + bool is_xsk = priv->xsk.ever_used; + int max_nch = priv->stats_nch; + int i, j, tc; + + for (i = 0; i < max_nch; i++) + for (j = 0; j < NUM_CH_STATS; j++) + sprintf(data + (idx++) * ETH_GSTRING_LEN, + ch_stats_desc[j].format, i); + + for (i = 0; i < max_nch; i++) { + for (j = 0; j < NUM_RQ_STATS; j++) + sprintf(data + (idx++) * ETH_GSTRING_LEN, + rq_stats_desc[j].format, i); + for (j = 0; j < NUM_XSKRQ_STATS * is_xsk; j++) + sprintf(data + (idx++) * ETH_GSTRING_LEN, + xskrq_stats_desc[j].format, i); + for (j = 0; j < NUM_RQ_XDPSQ_STATS; j++) + sprintf(data + (idx++) * ETH_GSTRING_LEN, + rq_xdpsq_stats_desc[j].format, i); + } + + for (tc = 0; tc < priv->max_opened_tc; tc++) + for (i = 0; i < max_nch; i++) + for (j = 0; j < NUM_SQ_STATS; j++) + sprintf(data + (idx++) * ETH_GSTRING_LEN, + sq_stats_desc[j].format, + i + tc * max_nch); + + for (i = 0; i < max_nch; i++) { + for (j = 0; j < NUM_XSKSQ_STATS * is_xsk; j++) + sprintf(data + (idx++) * ETH_GSTRING_LEN, + xsksq_stats_desc[j].format, i); + for (j = 0; j < NUM_XDPSQ_STATS; j++) + sprintf(data + (idx++) * ETH_GSTRING_LEN, + xdpsq_stats_desc[j].format, i); + } + + return idx; +} + +static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(channels) +{ + bool is_xsk = priv->xsk.ever_used; + int max_nch = priv->stats_nch; + int i, j, tc; + + for (i = 0; i < max_nch; i++) + for (j = 0; j < NUM_CH_STATS; j++) + data[idx++] = + MLX5E_READ_CTR64_CPU(&priv->channel_stats[i]->ch, + ch_stats_desc, j); + + for (i = 0; i < max_nch; i++) { + for (j = 0; j < NUM_RQ_STATS; j++) + data[idx++] = + MLX5E_READ_CTR64_CPU(&priv->channel_stats[i]->rq, + rq_stats_desc, j); + for (j = 0; j < NUM_XSKRQ_STATS * is_xsk; j++) + data[idx++] = + MLX5E_READ_CTR64_CPU(&priv->channel_stats[i]->xskrq, + xskrq_stats_desc, j); + for (j = 0; j < NUM_RQ_XDPSQ_STATS; j++) + data[idx++] = + MLX5E_READ_CTR64_CPU(&priv->channel_stats[i]->rq_xdpsq, + rq_xdpsq_stats_desc, j); + } + + for (tc = 0; tc < priv->max_opened_tc; tc++) + for (i = 0; i < max_nch; i++) + for (j = 0; j < NUM_SQ_STATS; j++) + data[idx++] = + MLX5E_READ_CTR64_CPU(&priv->channel_stats[i]->sq[tc], + sq_stats_desc, j); + + for (i = 0; i < max_nch; i++) { + for (j = 0; j < NUM_XSKSQ_STATS * is_xsk; j++) + data[idx++] = + MLX5E_READ_CTR64_CPU(&priv->channel_stats[i]->xsksq, + xsksq_stats_desc, j); + for (j = 0; j < NUM_XDPSQ_STATS; j++) + data[idx++] = + MLX5E_READ_CTR64_CPU(&priv->channel_stats[i]->xdpsq, + xdpsq_stats_desc, j); + } + + return idx; +} + +static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(channels) { return; } + +MLX5E_DEFINE_STATS_GRP(sw, 0); +MLX5E_DEFINE_STATS_GRP(qcnt, MLX5E_NDO_UPDATE_STATS); +MLX5E_DEFINE_STATS_GRP(vnic_env, 0); +MLX5E_DEFINE_STATS_GRP(vport, MLX5E_NDO_UPDATE_STATS); +MLX5E_DEFINE_STATS_GRP(802_3, MLX5E_NDO_UPDATE_STATS); +MLX5E_DEFINE_STATS_GRP(2863, 0); +MLX5E_DEFINE_STATS_GRP(2819, 0); +MLX5E_DEFINE_STATS_GRP(phy, 0); +MLX5E_DEFINE_STATS_GRP(pcie, 0); +MLX5E_DEFINE_STATS_GRP(per_prio, 0); +MLX5E_DEFINE_STATS_GRP(pme, 0); +MLX5E_DEFINE_STATS_GRP(channels, 0); +MLX5E_DEFINE_STATS_GRP(per_port_buff_congest, 0); +MLX5E_DEFINE_STATS_GRP(eth_ext, 0); +static MLX5E_DEFINE_STATS_GRP(tls, 0); +MLX5E_DEFINE_STATS_GRP(ptp, 0); +static MLX5E_DEFINE_STATS_GRP(qos, 0); + +/* The stats groups order is opposite to the update_stats() order calls */ +mlx5e_stats_grp_t mlx5e_nic_stats_grps[] = { + &MLX5E_STATS_GRP(sw), + &MLX5E_STATS_GRP(qcnt), + &MLX5E_STATS_GRP(vnic_env), + &MLX5E_STATS_GRP(vport), + &MLX5E_STATS_GRP(802_3), + &MLX5E_STATS_GRP(2863), + &MLX5E_STATS_GRP(2819), + &MLX5E_STATS_GRP(phy), + &MLX5E_STATS_GRP(eth_ext), + &MLX5E_STATS_GRP(pcie), + &MLX5E_STATS_GRP(per_prio), + &MLX5E_STATS_GRP(pme), +#ifdef CONFIG_MLX5_EN_IPSEC + &MLX5E_STATS_GRP(ipsec_sw), +#endif + &MLX5E_STATS_GRP(tls), + &MLX5E_STATS_GRP(channels), + &MLX5E_STATS_GRP(per_port_buff_congest), + &MLX5E_STATS_GRP(ptp), + &MLX5E_STATS_GRP(qos), +#ifdef CONFIG_MLX5_EN_MACSEC + &MLX5E_STATS_GRP(macsec_hw), +#endif +}; + +unsigned int mlx5e_nic_stats_grps_num(struct mlx5e_priv *priv) +{ + return ARRAY_SIZE(mlx5e_nic_stats_grps); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h new file mode 100644 index 000000000..52a67efaf --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h @@ -0,0 +1,496 @@ +/* + * Copyright (c) 2015-2016, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __MLX5_EN_STATS_H__ +#define __MLX5_EN_STATS_H__ + +#define MLX5E_READ_CTR64_CPU(ptr, dsc, i) \ + (*(u64 *)((char *)ptr + dsc[i].offset)) +#define MLX5E_READ_CTR64_BE(ptr, dsc, i) \ + be64_to_cpu(*(__be64 *)((char *)ptr + dsc[i].offset)) +#define MLX5E_READ_CTR32_CPU(ptr, dsc, i) \ + (*(u32 *)((char *)ptr + dsc[i].offset)) +#define MLX5E_READ_CTR32_BE(ptr, dsc, i) \ + be32_to_cpu(*(__be32 *)((char *)ptr + dsc[i].offset)) + +#define MLX5E_DECLARE_STAT(type, fld) #fld, offsetof(type, fld) +#define MLX5E_DECLARE_RX_STAT(type, fld) "rx%d_"#fld, offsetof(type, fld) +#define MLX5E_DECLARE_TX_STAT(type, fld) "tx%d_"#fld, offsetof(type, fld) +#define MLX5E_DECLARE_XDPSQ_STAT(type, fld) "tx%d_xdp_"#fld, offsetof(type, fld) +#define MLX5E_DECLARE_RQ_XDPSQ_STAT(type, fld) "rx%d_xdp_tx_"#fld, offsetof(type, fld) +#define MLX5E_DECLARE_XSKRQ_STAT(type, fld) "rx%d_xsk_"#fld, offsetof(type, fld) +#define MLX5E_DECLARE_XSKSQ_STAT(type, fld) "tx%d_xsk_"#fld, offsetof(type, fld) +#define MLX5E_DECLARE_CH_STAT(type, fld) "ch%d_"#fld, offsetof(type, fld) + +#define MLX5E_DECLARE_PTP_TX_STAT(type, fld) "ptp_tx%d_"#fld, offsetof(type, fld) +#define MLX5E_DECLARE_PTP_CH_STAT(type, fld) "ptp_ch_"#fld, offsetof(type, fld) +#define MLX5E_DECLARE_PTP_CQ_STAT(type, fld) "ptp_cq%d_"#fld, offsetof(type, fld) +#define MLX5E_DECLARE_PTP_RQ_STAT(type, fld) "ptp_rq%d_"#fld, offsetof(type, fld) + +#define MLX5E_DECLARE_QOS_TX_STAT(type, fld) "qos_tx%d_"#fld, offsetof(type, fld) + +struct counter_desc { + char format[ETH_GSTRING_LEN]; + size_t offset; /* Byte offset */ +}; + +enum { + MLX5E_NDO_UPDATE_STATS = BIT(0x1), +}; + +struct mlx5e_priv; +struct mlx5e_stats_grp { + u16 update_stats_mask; + int (*get_num_stats)(struct mlx5e_priv *priv); + int (*fill_strings)(struct mlx5e_priv *priv, u8 *data, int idx); + int (*fill_stats)(struct mlx5e_priv *priv, u64 *data, int idx); + void (*update_stats)(struct mlx5e_priv *priv); +}; + +typedef const struct mlx5e_stats_grp *const mlx5e_stats_grp_t; + +#define MLX5E_STATS_GRP_OP(grp, name) mlx5e_stats_grp_ ## grp ## _ ## name + +#define MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(grp) \ + int MLX5E_STATS_GRP_OP(grp, num_stats)(struct mlx5e_priv *priv) + +#define MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(grp) \ + void MLX5E_STATS_GRP_OP(grp, update_stats)(struct mlx5e_priv *priv) + +#define MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(grp) \ + int MLX5E_STATS_GRP_OP(grp, fill_strings)(struct mlx5e_priv *priv, u8 *data, int idx) + +#define MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(grp) \ + int MLX5E_STATS_GRP_OP(grp, fill_stats)(struct mlx5e_priv *priv, u64 *data, int idx) + +#define MLX5E_STATS_GRP(grp) mlx5e_stats_grp_ ## grp + +#define MLX5E_DECLARE_STATS_GRP(grp) \ + const struct mlx5e_stats_grp MLX5E_STATS_GRP(grp) + +#define MLX5E_DEFINE_STATS_GRP(grp, mask) \ +MLX5E_DECLARE_STATS_GRP(grp) = { \ + .get_num_stats = MLX5E_STATS_GRP_OP(grp, num_stats), \ + .fill_stats = MLX5E_STATS_GRP_OP(grp, fill_stats), \ + .fill_strings = MLX5E_STATS_GRP_OP(grp, fill_strings), \ + .update_stats = MLX5E_STATS_GRP_OP(grp, update_stats), \ + .update_stats_mask = mask, \ +} + +unsigned int mlx5e_stats_total_num(struct mlx5e_priv *priv); +void mlx5e_stats_update(struct mlx5e_priv *priv); +void mlx5e_stats_fill(struct mlx5e_priv *priv, u64 *data, int idx); +void mlx5e_stats_fill_strings(struct mlx5e_priv *priv, u8 *data); +void mlx5e_stats_update_ndo_stats(struct mlx5e_priv *priv); + +void mlx5e_stats_pause_get(struct mlx5e_priv *priv, + struct ethtool_pause_stats *pause_stats); +void mlx5e_stats_fec_get(struct mlx5e_priv *priv, + struct ethtool_fec_stats *fec_stats); + +void mlx5e_stats_eth_phy_get(struct mlx5e_priv *priv, + struct ethtool_eth_phy_stats *phy_stats); +void mlx5e_stats_eth_mac_get(struct mlx5e_priv *priv, + struct ethtool_eth_mac_stats *mac_stats); +void mlx5e_stats_eth_ctrl_get(struct mlx5e_priv *priv, + struct ethtool_eth_ctrl_stats *ctrl_stats); +void mlx5e_stats_rmon_get(struct mlx5e_priv *priv, + struct ethtool_rmon_stats *rmon, + const struct ethtool_rmon_hist_range **ranges); + +/* Concrete NIC Stats */ + +struct mlx5e_sw_stats { + u64 rx_packets; + u64 rx_bytes; + u64 tx_packets; + u64 tx_bytes; + u64 tx_tso_packets; + u64 tx_tso_bytes; + u64 tx_tso_inner_packets; + u64 tx_tso_inner_bytes; + u64 tx_added_vlan_packets; + u64 tx_nop; + u64 tx_mpwqe_blks; + u64 tx_mpwqe_pkts; + u64 rx_lro_packets; + u64 rx_lro_bytes; + u64 rx_gro_packets; + u64 rx_gro_bytes; + u64 rx_gro_skbs; + u64 rx_gro_match_packets; + u64 rx_gro_large_hds; + u64 rx_mcast_packets; + u64 rx_ecn_mark; + u64 rx_removed_vlan_packets; + u64 rx_csum_unnecessary; + u64 rx_csum_none; + u64 rx_csum_complete; + u64 rx_csum_complete_tail; + u64 rx_csum_complete_tail_slow; + u64 rx_csum_unnecessary_inner; + u64 rx_xdp_drop; + u64 rx_xdp_redirect; + u64 rx_xdp_tx_xmit; + u64 rx_xdp_tx_mpwqe; + u64 rx_xdp_tx_inlnw; + u64 rx_xdp_tx_nops; + u64 rx_xdp_tx_full; + u64 rx_xdp_tx_err; + u64 rx_xdp_tx_cqe; + u64 tx_csum_none; + u64 tx_csum_partial; + u64 tx_csum_partial_inner; + u64 tx_queue_stopped; + u64 tx_queue_dropped; + u64 tx_xmit_more; + u64 tx_recover; + u64 tx_cqes; + u64 tx_queue_wake; + u64 tx_cqe_err; + u64 tx_xdp_xmit; + u64 tx_xdp_mpwqe; + u64 tx_xdp_inlnw; + u64 tx_xdp_nops; + u64 tx_xdp_full; + u64 tx_xdp_err; + u64 tx_xdp_cqes; + u64 rx_wqe_err; + u64 rx_mpwqe_filler_cqes; + u64 rx_mpwqe_filler_strides; + u64 rx_oversize_pkts_sw_drop; + u64 rx_buff_alloc_err; + u64 rx_cqe_compress_blks; + u64 rx_cqe_compress_pkts; + u64 rx_cache_reuse; + u64 rx_cache_full; + u64 rx_cache_empty; + u64 rx_cache_busy; + u64 rx_cache_waive; + u64 rx_congst_umr; + u64 rx_arfs_err; + u64 rx_recover; + u64 ch_events; + u64 ch_poll; + u64 ch_arm; + u64 ch_aff_change; + u64 ch_force_irq; + u64 ch_eq_rearm; +#ifdef CONFIG_PAGE_POOL_STATS + u64 rx_pp_alloc_fast; + u64 rx_pp_alloc_slow; + u64 rx_pp_alloc_slow_high_order; + u64 rx_pp_alloc_empty; + u64 rx_pp_alloc_refill; + u64 rx_pp_alloc_waive; + u64 rx_pp_recycle_cached; + u64 rx_pp_recycle_cache_full; + u64 rx_pp_recycle_ring; + u64 rx_pp_recycle_ring_full; + u64 rx_pp_recycle_released_ref; +#endif +#ifdef CONFIG_MLX5_EN_TLS + u64 tx_tls_encrypted_packets; + u64 tx_tls_encrypted_bytes; + u64 tx_tls_ooo; + u64 tx_tls_dump_packets; + u64 tx_tls_dump_bytes; + u64 tx_tls_resync_bytes; + u64 tx_tls_skip_no_sync_data; + u64 tx_tls_drop_no_sync_data; + u64 tx_tls_drop_bypass_req; + + u64 rx_tls_decrypted_packets; + u64 rx_tls_decrypted_bytes; + u64 rx_tls_resync_req_pkt; + u64 rx_tls_resync_req_start; + u64 rx_tls_resync_req_end; + u64 rx_tls_resync_req_skip; + u64 rx_tls_resync_res_ok; + u64 rx_tls_resync_res_retry; + u64 rx_tls_resync_res_skip; + u64 rx_tls_err; +#endif + + u64 rx_xsk_packets; + u64 rx_xsk_bytes; + u64 rx_xsk_csum_complete; + u64 rx_xsk_csum_unnecessary; + u64 rx_xsk_csum_unnecessary_inner; + u64 rx_xsk_csum_none; + u64 rx_xsk_ecn_mark; + u64 rx_xsk_removed_vlan_packets; + u64 rx_xsk_xdp_drop; + u64 rx_xsk_xdp_redirect; + u64 rx_xsk_wqe_err; + u64 rx_xsk_mpwqe_filler_cqes; + u64 rx_xsk_mpwqe_filler_strides; + u64 rx_xsk_oversize_pkts_sw_drop; + u64 rx_xsk_buff_alloc_err; + u64 rx_xsk_cqe_compress_blks; + u64 rx_xsk_cqe_compress_pkts; + u64 rx_xsk_congst_umr; + u64 rx_xsk_arfs_err; + u64 tx_xsk_xmit; + u64 tx_xsk_mpwqe; + u64 tx_xsk_inlnw; + u64 tx_xsk_full; + u64 tx_xsk_err; + u64 tx_xsk_cqes; +}; + +struct mlx5e_qcounter_stats { + u32 rx_out_of_buffer; + u32 rx_if_down_packets; +}; + +#define VNIC_ENV_GET(vnic_env_stats, c) \ + MLX5_GET(query_vnic_env_out, (vnic_env_stats)->query_vnic_env_out, \ + vport_env.c) + +struct mlx5e_vnic_env_stats { + __be64 query_vnic_env_out[MLX5_ST_SZ_QW(query_vnic_env_out)]; +}; + +#define VPORT_COUNTER_GET(vstats, c) MLX5_GET64(query_vport_counter_out, \ + vstats->query_vport_out, c) + +struct mlx5e_vport_stats { + __be64 query_vport_out[MLX5_ST_SZ_QW(query_vport_counter_out)]; +}; + +#define PPORT_802_3_GET(pstats, c) \ + MLX5_GET64(ppcnt_reg, pstats->IEEE_802_3_counters, \ + counter_set.eth_802_3_cntrs_grp_data_layout.c##_high) +#define PPORT_2863_GET(pstats, c) \ + MLX5_GET64(ppcnt_reg, pstats->RFC_2863_counters, \ + counter_set.eth_2863_cntrs_grp_data_layout.c##_high) +#define PPORT_2819_GET(pstats, c) \ + MLX5_GET64(ppcnt_reg, pstats->RFC_2819_counters, \ + counter_set.eth_2819_cntrs_grp_data_layout.c##_high) +#define PPORT_PHY_STATISTICAL_GET(pstats, c) \ + MLX5_GET64(ppcnt_reg, (pstats)->phy_statistical_counters, \ + counter_set.phys_layer_statistical_cntrs.c##_high) +#define PPORT_PER_PRIO_GET(pstats, prio, c) \ + MLX5_GET64(ppcnt_reg, pstats->per_prio_counters[prio], \ + counter_set.eth_per_prio_grp_data_layout.c##_high) +#define NUM_PPORT_PRIO 8 +#define PPORT_ETH_EXT_GET(pstats, c) \ + MLX5_GET64(ppcnt_reg, (pstats)->eth_ext_counters, \ + counter_set.eth_extended_cntrs_grp_data_layout.c##_high) + +struct mlx5e_pport_stats { + __be64 IEEE_802_3_counters[MLX5_ST_SZ_QW(ppcnt_reg)]; + __be64 RFC_2863_counters[MLX5_ST_SZ_QW(ppcnt_reg)]; + __be64 RFC_2819_counters[MLX5_ST_SZ_QW(ppcnt_reg)]; + __be64 per_prio_counters[NUM_PPORT_PRIO][MLX5_ST_SZ_QW(ppcnt_reg)]; + __be64 phy_counters[MLX5_ST_SZ_QW(ppcnt_reg)]; + __be64 phy_statistical_counters[MLX5_ST_SZ_QW(ppcnt_reg)]; + __be64 eth_ext_counters[MLX5_ST_SZ_QW(ppcnt_reg)]; + __be64 per_tc_prio_counters[NUM_PPORT_PRIO][MLX5_ST_SZ_QW(ppcnt_reg)]; + __be64 per_tc_congest_prio_counters[NUM_PPORT_PRIO][MLX5_ST_SZ_QW(ppcnt_reg)]; +}; + +#define PCIE_PERF_GET(pcie_stats, c) \ + MLX5_GET(mpcnt_reg, (pcie_stats)->pcie_perf_counters, \ + counter_set.pcie_perf_cntrs_grp_data_layout.c) + +#define PCIE_PERF_GET64(pcie_stats, c) \ + MLX5_GET64(mpcnt_reg, (pcie_stats)->pcie_perf_counters, \ + counter_set.pcie_perf_cntrs_grp_data_layout.c##_high) + +struct mlx5e_pcie_stats { + __be64 pcie_perf_counters[MLX5_ST_SZ_QW(mpcnt_reg)]; +}; + +struct mlx5e_rq_stats { + u64 packets; + u64 bytes; + u64 csum_complete; + u64 csum_complete_tail; + u64 csum_complete_tail_slow; + u64 csum_unnecessary; + u64 csum_unnecessary_inner; + u64 csum_none; + u64 lro_packets; + u64 lro_bytes; + u64 gro_packets; + u64 gro_bytes; + u64 gro_skbs; + u64 gro_match_packets; + u64 gro_large_hds; + u64 mcast_packets; + u64 ecn_mark; + u64 removed_vlan_packets; + u64 xdp_drop; + u64 xdp_redirect; + u64 wqe_err; + u64 mpwqe_filler_cqes; + u64 mpwqe_filler_strides; + u64 oversize_pkts_sw_drop; + u64 buff_alloc_err; + u64 cqe_compress_blks; + u64 cqe_compress_pkts; + u64 cache_reuse; + u64 cache_full; + u64 cache_empty; + u64 cache_busy; + u64 cache_waive; + u64 congst_umr; + u64 arfs_err; + u64 recover; +#ifdef CONFIG_PAGE_POOL_STATS + u64 pp_alloc_fast; + u64 pp_alloc_slow; + u64 pp_alloc_slow_high_order; + u64 pp_alloc_empty; + u64 pp_alloc_refill; + u64 pp_alloc_waive; + u64 pp_recycle_cached; + u64 pp_recycle_cache_full; + u64 pp_recycle_ring; + u64 pp_recycle_ring_full; + u64 pp_recycle_released_ref; +#endif +#ifdef CONFIG_MLX5_EN_TLS + u64 tls_decrypted_packets; + u64 tls_decrypted_bytes; + u64 tls_resync_req_pkt; + u64 tls_resync_req_start; + u64 tls_resync_req_end; + u64 tls_resync_req_skip; + u64 tls_resync_res_ok; + u64 tls_resync_res_retry; + u64 tls_resync_res_skip; + u64 tls_err; +#endif +}; + +struct mlx5e_sq_stats { + /* commonly accessed in data path */ + u64 packets; + u64 bytes; + u64 xmit_more; + u64 tso_packets; + u64 tso_bytes; + u64 tso_inner_packets; + u64 tso_inner_bytes; + u64 csum_partial; + u64 csum_partial_inner; + u64 added_vlan_packets; + u64 nop; + u64 mpwqe_blks; + u64 mpwqe_pkts; +#ifdef CONFIG_MLX5_EN_TLS + u64 tls_encrypted_packets; + u64 tls_encrypted_bytes; + u64 tls_ooo; + u64 tls_dump_packets; + u64 tls_dump_bytes; + u64 tls_resync_bytes; + u64 tls_skip_no_sync_data; + u64 tls_drop_no_sync_data; + u64 tls_drop_bypass_req; +#endif + /* less likely accessed in data path */ + u64 csum_none; + u64 stopped; + u64 dropped; + u64 recover; + /* dirtied @completion */ + u64 cqes ____cacheline_aligned_in_smp; + u64 wake; + u64 cqe_err; +}; + +struct mlx5e_xdpsq_stats { + u64 xmit; + u64 mpwqe; + u64 inlnw; + u64 nops; + u64 full; + u64 err; + /* dirtied @completion */ + u64 cqes ____cacheline_aligned_in_smp; +}; + +struct mlx5e_ch_stats { + u64 events; + u64 poll; + u64 arm; + u64 aff_change; + u64 force_irq; + u64 eq_rearm; +}; + +struct mlx5e_ptp_cq_stats { + u64 cqe; + u64 err_cqe; + u64 abort; + u64 abort_abs_diff_ns; + u64 resync_cqe; + u64 resync_event; + u64 ooo_cqe_drop; +}; + +struct mlx5e_stats { + struct mlx5e_sw_stats sw; + struct mlx5e_qcounter_stats qcnt; + struct mlx5e_vnic_env_stats vnic; + struct mlx5e_vport_stats vport; + struct mlx5e_pport_stats pport; + struct rtnl_link_stats64 vf_vport; + struct mlx5e_pcie_stats pcie; +}; + +extern mlx5e_stats_grp_t mlx5e_nic_stats_grps[]; +unsigned int mlx5e_nic_stats_grps_num(struct mlx5e_priv *priv); + +extern MLX5E_DECLARE_STATS_GRP(sw); +extern MLX5E_DECLARE_STATS_GRP(qcnt); +extern MLX5E_DECLARE_STATS_GRP(vnic_env); +extern MLX5E_DECLARE_STATS_GRP(vport); +extern MLX5E_DECLARE_STATS_GRP(802_3); +extern MLX5E_DECLARE_STATS_GRP(2863); +extern MLX5E_DECLARE_STATS_GRP(2819); +extern MLX5E_DECLARE_STATS_GRP(phy); +extern MLX5E_DECLARE_STATS_GRP(eth_ext); +extern MLX5E_DECLARE_STATS_GRP(pcie); +extern MLX5E_DECLARE_STATS_GRP(per_prio); +extern MLX5E_DECLARE_STATS_GRP(pme); +extern MLX5E_DECLARE_STATS_GRP(channels); +extern MLX5E_DECLARE_STATS_GRP(per_port_buff_congest); +extern MLX5E_DECLARE_STATS_GRP(ipsec_sw); +extern MLX5E_DECLARE_STATS_GRP(ptp); +extern MLX5E_DECLARE_STATS_GRP(macsec_hw); + +#endif /* __MLX5_EN_STATS_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c new file mode 100644 index 000000000..43239555f --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -0,0 +1,5307 @@ +/* + * Copyright (c) 2016, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <net/flow_dissector.h> +#include <net/flow_offload.h> +#include <net/sch_generic.h> +#include <net/pkt_cls.h> +#include <linux/mlx5/fs.h> +#include <linux/mlx5/device.h> +#include <linux/rhashtable.h> +#include <linux/refcount.h> +#include <linux/completion.h> +#include <net/arp.h> +#include <net/ipv6_stubs.h> +#include <net/bareudp.h> +#include <net/bonding.h> +#include "en.h" +#include "en/tc/post_act.h" +#include "en_rep.h" +#include "en/rep/tc.h" +#include "en/rep/neigh.h" +#include "en_tc.h" +#include "eswitch.h" +#include "fs_core.h" +#include "en/port.h" +#include "en/tc_tun.h" +#include "en/mapping.h" +#include "en/tc_ct.h" +#include "en/mod_hdr.h" +#include "en/tc_tun_encap.h" +#include "en/tc/sample.h" +#include "en/tc/act/act.h" +#include "en/tc/post_meter.h" +#include "lib/devcom.h" +#include "lib/geneve.h" +#include "lib/fs_chains.h" +#include "diag/en_tc_tracepoint.h" +#include <asm/div64.h> +#include "lag/lag.h" +#include "lag/mp.h" + +#define MLX5E_TC_TABLE_NUM_GROUPS 4 +#define MLX5E_TC_TABLE_MAX_GROUP_SIZE BIT(18) + +struct mlx5e_tc_table { + /* Protects the dynamic assignment of the t parameter + * which is the nic tc root table. + */ + struct mutex t_lock; + struct mlx5e_priv *priv; + struct mlx5_flow_table *t; + struct mlx5_flow_table *miss_t; + struct mlx5_fs_chains *chains; + struct mlx5e_post_act *post_act; + + struct rhashtable ht; + + struct mod_hdr_tbl mod_hdr; + struct mutex hairpin_tbl_lock; /* protects hairpin_tbl */ + DECLARE_HASHTABLE(hairpin_tbl, 8); + + struct notifier_block netdevice_nb; + struct netdev_net_notifier netdevice_nn; + + struct mlx5_tc_ct_priv *ct; + struct mapping_ctx *mapping; +}; + +struct mlx5e_tc_attr_to_reg_mapping mlx5e_tc_attr_to_reg_mappings[] = { + [CHAIN_TO_REG] = { + .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_0, + .moffset = 0, + .mlen = 16, + }, + [VPORT_TO_REG] = { + .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_0, + .moffset = 16, + .mlen = 16, + }, + [TUNNEL_TO_REG] = { + .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_1, + .moffset = 8, + .mlen = ESW_TUN_OPTS_BITS + ESW_TUN_ID_BITS, + .soffset = MLX5_BYTE_OFF(fte_match_param, + misc_parameters_2.metadata_reg_c_1), + }, + [ZONE_TO_REG] = zone_to_reg_ct, + [ZONE_RESTORE_TO_REG] = zone_restore_to_reg_ct, + [CTSTATE_TO_REG] = ctstate_to_reg_ct, + [MARK_TO_REG] = mark_to_reg_ct, + [LABELS_TO_REG] = labels_to_reg_ct, + [FTEID_TO_REG] = fteid_to_reg_ct, + /* For NIC rules we store the restore metadata directly + * into reg_b that is passed to SW since we don't + * jump between steering domains. + */ + [NIC_CHAIN_TO_REG] = { + .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_B, + .moffset = 0, + .mlen = 16, + }, + [NIC_ZONE_RESTORE_TO_REG] = nic_zone_restore_to_reg_ct, + [PACKET_COLOR_TO_REG] = packet_color_to_reg, +}; + +struct mlx5e_tc_table *mlx5e_tc_table_alloc(void) +{ + struct mlx5e_tc_table *tc; + + tc = kvzalloc(sizeof(*tc), GFP_KERNEL); + return tc ? tc : ERR_PTR(-ENOMEM); +} + +void mlx5e_tc_table_free(struct mlx5e_tc_table *tc) +{ + kvfree(tc); +} + +struct mlx5_fs_chains *mlx5e_nic_chains(struct mlx5e_tc_table *tc) +{ + return tc->chains; +} + +/* To avoid false lock dependency warning set the tc_ht lock + * class different than the lock class of the ht being used when deleting + * last flow from a group and then deleting a group, we get into del_sw_flow_group() + * which call rhashtable_destroy on fg->ftes_hash which will take ht->mutex but + * it's different than the ht->mutex here. + */ +static struct lock_class_key tc_ht_lock_key; +static struct lock_class_key tc_ht_wq_key; + +static void mlx5e_put_flow_tunnel_id(struct mlx5e_tc_flow *flow); +static void free_flow_post_acts(struct mlx5e_tc_flow *flow); + +void +mlx5e_tc_match_to_reg_match(struct mlx5_flow_spec *spec, + enum mlx5e_tc_attr_to_reg type, + u32 val, + u32 mask) +{ + void *headers_c = spec->match_criteria, *headers_v = spec->match_value, *fmask, *fval; + int soffset = mlx5e_tc_attr_to_reg_mappings[type].soffset; + int moffset = mlx5e_tc_attr_to_reg_mappings[type].moffset; + int match_len = mlx5e_tc_attr_to_reg_mappings[type].mlen; + u32 max_mask = GENMASK(match_len - 1, 0); + __be32 curr_mask_be, curr_val_be; + u32 curr_mask, curr_val; + + fmask = headers_c + soffset; + fval = headers_v + soffset; + + memcpy(&curr_mask_be, fmask, 4); + memcpy(&curr_val_be, fval, 4); + + curr_mask = be32_to_cpu(curr_mask_be); + curr_val = be32_to_cpu(curr_val_be); + + //move to correct offset + WARN_ON(mask > max_mask); + mask <<= moffset; + val <<= moffset; + max_mask <<= moffset; + + //zero val and mask + curr_mask &= ~max_mask; + curr_val &= ~max_mask; + + //add current to mask + curr_mask |= mask; + curr_val |= val; + + //back to be32 and write + curr_mask_be = cpu_to_be32(curr_mask); + curr_val_be = cpu_to_be32(curr_val); + + memcpy(fmask, &curr_mask_be, 4); + memcpy(fval, &curr_val_be, 4); + + spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_2; +} + +void +mlx5e_tc_match_to_reg_get_match(struct mlx5_flow_spec *spec, + enum mlx5e_tc_attr_to_reg type, + u32 *val, + u32 *mask) +{ + void *headers_c = spec->match_criteria, *headers_v = spec->match_value, *fmask, *fval; + int soffset = mlx5e_tc_attr_to_reg_mappings[type].soffset; + int moffset = mlx5e_tc_attr_to_reg_mappings[type].moffset; + int match_len = mlx5e_tc_attr_to_reg_mappings[type].mlen; + u32 max_mask = GENMASK(match_len - 1, 0); + __be32 curr_mask_be, curr_val_be; + u32 curr_mask, curr_val; + + fmask = headers_c + soffset; + fval = headers_v + soffset; + + memcpy(&curr_mask_be, fmask, 4); + memcpy(&curr_val_be, fval, 4); + + curr_mask = be32_to_cpu(curr_mask_be); + curr_val = be32_to_cpu(curr_val_be); + + *mask = (curr_mask >> moffset) & max_mask; + *val = (curr_val >> moffset) & max_mask; +} + +int +mlx5e_tc_match_to_reg_set_and_get_id(struct mlx5_core_dev *mdev, + struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts, + enum mlx5_flow_namespace_type ns, + enum mlx5e_tc_attr_to_reg type, + u32 data) +{ + int moffset = mlx5e_tc_attr_to_reg_mappings[type].moffset; + int mfield = mlx5e_tc_attr_to_reg_mappings[type].mfield; + int mlen = mlx5e_tc_attr_to_reg_mappings[type].mlen; + char *modact; + int err; + + modact = mlx5e_mod_hdr_alloc(mdev, ns, mod_hdr_acts); + if (IS_ERR(modact)) + return PTR_ERR(modact); + + /* Firmware has 5bit length field and 0 means 32bits */ + if (mlen == 32) + mlen = 0; + + MLX5_SET(set_action_in, modact, action_type, MLX5_ACTION_TYPE_SET); + MLX5_SET(set_action_in, modact, field, mfield); + MLX5_SET(set_action_in, modact, offset, moffset); + MLX5_SET(set_action_in, modact, length, mlen); + MLX5_SET(set_action_in, modact, data, data); + err = mod_hdr_acts->num_actions; + mod_hdr_acts->num_actions++; + + return err; +} + +struct mlx5e_tc_int_port_priv * +mlx5e_get_int_port_priv(struct mlx5e_priv *priv) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5_rep_uplink_priv *uplink_priv; + struct mlx5e_rep_priv *uplink_rpriv; + + if (is_mdev_switchdev_mode(priv->mdev)) { + uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); + uplink_priv = &uplink_rpriv->uplink_priv; + + return uplink_priv->int_port_priv; + } + + return NULL; +} + +struct mlx5e_flow_meters * +mlx5e_get_flow_meters(struct mlx5_core_dev *dev) +{ + struct mlx5_eswitch *esw = dev->priv.eswitch; + struct mlx5_rep_uplink_priv *uplink_priv; + struct mlx5e_rep_priv *uplink_rpriv; + struct mlx5e_priv *priv; + + if (is_mdev_switchdev_mode(dev)) { + uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); + uplink_priv = &uplink_rpriv->uplink_priv; + priv = netdev_priv(uplink_rpriv->netdev); + if (!uplink_priv->flow_meters) + uplink_priv->flow_meters = + mlx5e_flow_meters_init(priv, + MLX5_FLOW_NAMESPACE_FDB, + uplink_priv->post_act); + if (!IS_ERR(uplink_priv->flow_meters)) + return uplink_priv->flow_meters; + } + + return NULL; +} + +static struct mlx5_tc_ct_priv * +get_ct_priv(struct mlx5e_priv *priv) +{ + struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs); + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5_rep_uplink_priv *uplink_priv; + struct mlx5e_rep_priv *uplink_rpriv; + + if (is_mdev_switchdev_mode(priv->mdev)) { + uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); + uplink_priv = &uplink_rpriv->uplink_priv; + + return uplink_priv->ct_priv; + } + + return tc->ct; +} + +static struct mlx5e_tc_psample * +get_sample_priv(struct mlx5e_priv *priv) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5_rep_uplink_priv *uplink_priv; + struct mlx5e_rep_priv *uplink_rpriv; + + if (is_mdev_switchdev_mode(priv->mdev)) { + uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); + uplink_priv = &uplink_rpriv->uplink_priv; + + return uplink_priv->tc_psample; + } + + return NULL; +} + +static struct mlx5e_post_act * +get_post_action(struct mlx5e_priv *priv) +{ + struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs); + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5_rep_uplink_priv *uplink_priv; + struct mlx5e_rep_priv *uplink_rpriv; + + if (is_mdev_switchdev_mode(priv->mdev)) { + uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); + uplink_priv = &uplink_rpriv->uplink_priv; + + return uplink_priv->post_act; + } + + return tc->post_act; +} + +struct mlx5_flow_handle * +mlx5_tc_rule_insert(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct mlx5_flow_attr *attr) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + + if (is_mdev_switchdev_mode(priv->mdev)) + return mlx5_eswitch_add_offloaded_rule(esw, spec, attr); + + return mlx5e_add_offloaded_nic_rule(priv, spec, attr); +} + +void +mlx5_tc_rule_delete(struct mlx5e_priv *priv, + struct mlx5_flow_handle *rule, + struct mlx5_flow_attr *attr) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + + if (is_mdev_switchdev_mode(priv->mdev)) { + mlx5_eswitch_del_offloaded_rule(esw, rule, attr); + return; + } + + mlx5e_del_offloaded_nic_rule(priv, rule, attr); +} + +static bool +is_flow_meter_action(struct mlx5_flow_attr *attr) +{ + return ((attr->action & MLX5_FLOW_CONTEXT_ACTION_EXECUTE_ASO) && + (attr->exe_aso_type == MLX5_EXE_ASO_FLOW_METER)); +} + +static int +mlx5e_tc_add_flow_meter(struct mlx5e_priv *priv, + struct mlx5_flow_attr *attr) +{ + struct mlx5e_post_act *post_act = get_post_action(priv); + struct mlx5e_post_meter_priv *post_meter; + enum mlx5_flow_namespace_type ns_type; + struct mlx5e_flow_meter_handle *meter; + + meter = mlx5e_tc_meter_replace(priv->mdev, &attr->meter_attr.params); + if (IS_ERR(meter)) { + mlx5_core_err(priv->mdev, "Failed to get flow meter\n"); + return PTR_ERR(meter); + } + + ns_type = mlx5e_tc_meter_get_namespace(meter->flow_meters); + post_meter = mlx5e_post_meter_init(priv, ns_type, post_act, meter->green_counter, + meter->red_counter); + if (IS_ERR(post_meter)) { + mlx5_core_err(priv->mdev, "Failed to init post meter\n"); + goto err_meter_init; + } + + attr->meter_attr.meter = meter; + attr->meter_attr.post_meter = post_meter; + attr->dest_ft = mlx5e_post_meter_get_ft(post_meter); + attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + + return 0; + +err_meter_init: + mlx5e_tc_meter_put(meter); + return PTR_ERR(post_meter); +} + +static void +mlx5e_tc_del_flow_meter(struct mlx5_flow_attr *attr) +{ + mlx5e_post_meter_cleanup(attr->meter_attr.post_meter); + mlx5e_tc_meter_put(attr->meter_attr.meter); +} + +struct mlx5_flow_handle * +mlx5e_tc_rule_offload(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct mlx5_flow_attr *attr) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + int err; + + if (attr->flags & MLX5_ATTR_FLAG_CT) { + struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts = + &attr->parse_attr->mod_hdr_acts; + + return mlx5_tc_ct_flow_offload(get_ct_priv(priv), + spec, attr, + mod_hdr_acts); + } + + if (!is_mdev_switchdev_mode(priv->mdev)) + return mlx5e_add_offloaded_nic_rule(priv, spec, attr); + + if (attr->flags & MLX5_ATTR_FLAG_SAMPLE) + return mlx5e_tc_sample_offload(get_sample_priv(priv), spec, attr); + + if (is_flow_meter_action(attr)) { + err = mlx5e_tc_add_flow_meter(priv, attr); + if (err) + return ERR_PTR(err); + } + + return mlx5_eswitch_add_offloaded_rule(esw, spec, attr); +} + +void +mlx5e_tc_rule_unoffload(struct mlx5e_priv *priv, + struct mlx5_flow_handle *rule, + struct mlx5_flow_attr *attr) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + + if (attr->flags & MLX5_ATTR_FLAG_CT) { + mlx5_tc_ct_delete_flow(get_ct_priv(priv), attr); + return; + } + + if (!is_mdev_switchdev_mode(priv->mdev)) { + mlx5e_del_offloaded_nic_rule(priv, rule, attr); + return; + } + + if (attr->flags & MLX5_ATTR_FLAG_SAMPLE) { + mlx5e_tc_sample_unoffload(get_sample_priv(priv), rule, attr); + return; + } + + mlx5_eswitch_del_offloaded_rule(esw, rule, attr); + + if (attr->meter_attr.meter) + mlx5e_tc_del_flow_meter(attr); +} + +int +mlx5e_tc_match_to_reg_set(struct mlx5_core_dev *mdev, + struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts, + enum mlx5_flow_namespace_type ns, + enum mlx5e_tc_attr_to_reg type, + u32 data) +{ + int ret = mlx5e_tc_match_to_reg_set_and_get_id(mdev, mod_hdr_acts, ns, type, data); + + return ret < 0 ? ret : 0; +} + +void mlx5e_tc_match_to_reg_mod_hdr_change(struct mlx5_core_dev *mdev, + struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts, + enum mlx5e_tc_attr_to_reg type, + int act_id, u32 data) +{ + int moffset = mlx5e_tc_attr_to_reg_mappings[type].moffset; + int mfield = mlx5e_tc_attr_to_reg_mappings[type].mfield; + int mlen = mlx5e_tc_attr_to_reg_mappings[type].mlen; + char *modact; + + modact = mlx5e_mod_hdr_get_item(mod_hdr_acts, act_id); + + /* Firmware has 5bit length field and 0 means 32bits */ + if (mlen == 32) + mlen = 0; + + MLX5_SET(set_action_in, modact, action_type, MLX5_ACTION_TYPE_SET); + MLX5_SET(set_action_in, modact, field, mfield); + MLX5_SET(set_action_in, modact, offset, moffset); + MLX5_SET(set_action_in, modact, length, mlen); + MLX5_SET(set_action_in, modact, data, data); +} + +struct mlx5e_hairpin { + struct mlx5_hairpin *pair; + + struct mlx5_core_dev *func_mdev; + struct mlx5e_priv *func_priv; + u32 tdn; + struct mlx5e_tir direct_tir; + + int num_channels; + struct mlx5e_rqt indir_rqt; + struct mlx5e_tir indir_tir[MLX5E_NUM_INDIR_TIRS]; + struct mlx5_ttc_table *ttc; +}; + +struct mlx5e_hairpin_entry { + /* a node of a hash table which keeps all the hairpin entries */ + struct hlist_node hairpin_hlist; + + /* protects flows list */ + spinlock_t flows_lock; + /* flows sharing the same hairpin */ + struct list_head flows; + /* hpe's that were not fully initialized when dead peer update event + * function traversed them. + */ + struct list_head dead_peer_wait_list; + + u16 peer_vhca_id; + u8 prio; + struct mlx5e_hairpin *hp; + refcount_t refcnt; + struct completion res_ready; +}; + +static void mlx5e_tc_del_flow(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow); + +struct mlx5e_tc_flow *mlx5e_flow_get(struct mlx5e_tc_flow *flow) +{ + if (!flow || !refcount_inc_not_zero(&flow->refcnt)) + return ERR_PTR(-EINVAL); + return flow; +} + +void mlx5e_flow_put(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow) +{ + if (refcount_dec_and_test(&flow->refcnt)) { + mlx5e_tc_del_flow(priv, flow); + kfree_rcu(flow, rcu_head); + } +} + +bool mlx5e_is_eswitch_flow(struct mlx5e_tc_flow *flow) +{ + return flow_flag_test(flow, ESWITCH); +} + +bool mlx5e_is_ft_flow(struct mlx5e_tc_flow *flow) +{ + return flow_flag_test(flow, FT); +} + +bool mlx5e_is_offloaded_flow(struct mlx5e_tc_flow *flow) +{ + return flow_flag_test(flow, OFFLOADED); +} + +int mlx5e_get_flow_namespace(struct mlx5e_tc_flow *flow) +{ + return mlx5e_is_eswitch_flow(flow) ? + MLX5_FLOW_NAMESPACE_FDB : MLX5_FLOW_NAMESPACE_KERNEL; +} + +static struct mod_hdr_tbl * +get_mod_hdr_table(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow) +{ + struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs); + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + + return mlx5e_get_flow_namespace(flow) == MLX5_FLOW_NAMESPACE_FDB ? + &esw->offloads.mod_hdr : + &tc->mod_hdr; +} + +static int mlx5e_attach_mod_hdr(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, + struct mlx5e_tc_flow_parse_attr *parse_attr) +{ + struct mlx5_modify_hdr *modify_hdr; + struct mlx5e_mod_hdr_handle *mh; + + mh = mlx5e_mod_hdr_attach(priv->mdev, get_mod_hdr_table(priv, flow), + mlx5e_get_flow_namespace(flow), + &parse_attr->mod_hdr_acts); + if (IS_ERR(mh)) + return PTR_ERR(mh); + + modify_hdr = mlx5e_mod_hdr_get(mh); + flow->attr->modify_hdr = modify_hdr; + flow->mh = mh; + + return 0; +} + +static void mlx5e_detach_mod_hdr(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow) +{ + /* flow wasn't fully initialized */ + if (!flow->mh) + return; + + mlx5e_mod_hdr_detach(priv->mdev, get_mod_hdr_table(priv, flow), + flow->mh); + flow->mh = NULL; +} + +static +struct mlx5_core_dev *mlx5e_hairpin_get_mdev(struct net *net, int ifindex) +{ + struct mlx5_core_dev *mdev; + struct net_device *netdev; + struct mlx5e_priv *priv; + + netdev = dev_get_by_index(net, ifindex); + if (!netdev) + return ERR_PTR(-ENODEV); + + priv = netdev_priv(netdev); + mdev = priv->mdev; + dev_put(netdev); + + /* Mirred tc action holds a refcount on the ifindex net_device (see + * net/sched/act_mirred.c:tcf_mirred_get_dev). So, it's okay to continue using mdev + * after dev_put(netdev), while we're in the context of adding a tc flow. + * + * The mdev pointer corresponds to the peer/out net_device of a hairpin. It is then + * stored in a hairpin object, which exists until all flows, that refer to it, get + * removed. + * + * On the other hand, after a hairpin object has been created, the peer net_device may + * be removed/unbound while there are still some hairpin flows that are using it. This + * case is handled by mlx5e_tc_hairpin_update_dead_peer, which is hooked to + * NETDEV_UNREGISTER event of the peer net_device. + */ + return mdev; +} + +static int mlx5e_hairpin_create_transport(struct mlx5e_hairpin *hp) +{ + struct mlx5e_tir_builder *builder; + int err; + + builder = mlx5e_tir_builder_alloc(false); + if (!builder) + return -ENOMEM; + + err = mlx5_core_alloc_transport_domain(hp->func_mdev, &hp->tdn); + if (err) + goto out; + + mlx5e_tir_builder_build_inline(builder, hp->tdn, hp->pair->rqn[0]); + err = mlx5e_tir_init(&hp->direct_tir, builder, hp->func_mdev, false); + if (err) + goto create_tir_err; + +out: + mlx5e_tir_builder_free(builder); + return err; + +create_tir_err: + mlx5_core_dealloc_transport_domain(hp->func_mdev, hp->tdn); + + goto out; +} + +static void mlx5e_hairpin_destroy_transport(struct mlx5e_hairpin *hp) +{ + mlx5e_tir_destroy(&hp->direct_tir); + mlx5_core_dealloc_transport_domain(hp->func_mdev, hp->tdn); +} + +static int mlx5e_hairpin_create_indirect_rqt(struct mlx5e_hairpin *hp) +{ + struct mlx5e_priv *priv = hp->func_priv; + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5e_rss_params_indir *indir; + int err; + + indir = kvmalloc(sizeof(*indir), GFP_KERNEL); + if (!indir) + return -ENOMEM; + + mlx5e_rss_params_indir_init_uniform(indir, hp->num_channels); + err = mlx5e_rqt_init_indir(&hp->indir_rqt, mdev, hp->pair->rqn, hp->num_channels, + mlx5e_rx_res_get_current_hash(priv->rx_res).hfunc, + indir); + + kvfree(indir); + return err; +} + +static int mlx5e_hairpin_create_indirect_tirs(struct mlx5e_hairpin *hp) +{ + struct mlx5e_priv *priv = hp->func_priv; + struct mlx5e_rss_params_hash rss_hash; + enum mlx5_traffic_types tt, max_tt; + struct mlx5e_tir_builder *builder; + int err = 0; + + builder = mlx5e_tir_builder_alloc(false); + if (!builder) + return -ENOMEM; + + rss_hash = mlx5e_rx_res_get_current_hash(priv->rx_res); + + for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) { + struct mlx5e_rss_params_traffic_type rss_tt; + + rss_tt = mlx5e_rss_get_default_tt_config(tt); + + mlx5e_tir_builder_build_rqt(builder, hp->tdn, + mlx5e_rqt_get_rqtn(&hp->indir_rqt), + false); + mlx5e_tir_builder_build_rss(builder, &rss_hash, &rss_tt, false); + + err = mlx5e_tir_init(&hp->indir_tir[tt], builder, hp->func_mdev, false); + if (err) { + mlx5_core_warn(hp->func_mdev, "create indirect tirs failed, %d\n", err); + goto err_destroy_tirs; + } + + mlx5e_tir_builder_clear(builder); + } + +out: + mlx5e_tir_builder_free(builder); + return err; + +err_destroy_tirs: + max_tt = tt; + for (tt = 0; tt < max_tt; tt++) + mlx5e_tir_destroy(&hp->indir_tir[tt]); + + goto out; +} + +static void mlx5e_hairpin_destroy_indirect_tirs(struct mlx5e_hairpin *hp) +{ + int tt; + + for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) + mlx5e_tir_destroy(&hp->indir_tir[tt]); +} + +static void mlx5e_hairpin_set_ttc_params(struct mlx5e_hairpin *hp, + struct ttc_params *ttc_params) +{ + struct mlx5_flow_table_attr *ft_attr = &ttc_params->ft_attr; + int tt; + + memset(ttc_params, 0, sizeof(*ttc_params)); + + ttc_params->ns = mlx5_get_flow_namespace(hp->func_mdev, + MLX5_FLOW_NAMESPACE_KERNEL); + for (tt = 0; tt < MLX5_NUM_TT; tt++) { + ttc_params->dests[tt].type = MLX5_FLOW_DESTINATION_TYPE_TIR; + ttc_params->dests[tt].tir_num = + tt == MLX5_TT_ANY ? + mlx5e_tir_get_tirn(&hp->direct_tir) : + mlx5e_tir_get_tirn(&hp->indir_tir[tt]); + } + + ft_attr->level = MLX5E_TC_TTC_FT_LEVEL; + ft_attr->prio = MLX5E_TC_PRIO; +} + +static int mlx5e_hairpin_rss_init(struct mlx5e_hairpin *hp) +{ + struct mlx5e_priv *priv = hp->func_priv; + struct ttc_params ttc_params; + struct mlx5_ttc_table *ttc; + int err; + + err = mlx5e_hairpin_create_indirect_rqt(hp); + if (err) + return err; + + err = mlx5e_hairpin_create_indirect_tirs(hp); + if (err) + goto err_create_indirect_tirs; + + mlx5e_hairpin_set_ttc_params(hp, &ttc_params); + hp->ttc = mlx5_create_ttc_table(priv->mdev, &ttc_params); + if (IS_ERR(hp->ttc)) { + err = PTR_ERR(hp->ttc); + goto err_create_ttc_table; + } + + ttc = mlx5e_fs_get_ttc(priv->fs, false); + netdev_dbg(priv->netdev, "add hairpin: using %d channels rss ttc table id %x\n", + hp->num_channels, + mlx5_get_ttc_flow_table(ttc)->id); + + return 0; + +err_create_ttc_table: + mlx5e_hairpin_destroy_indirect_tirs(hp); +err_create_indirect_tirs: + mlx5e_rqt_destroy(&hp->indir_rqt); + + return err; +} + +static void mlx5e_hairpin_rss_cleanup(struct mlx5e_hairpin *hp) +{ + mlx5_destroy_ttc_table(hp->ttc); + mlx5e_hairpin_destroy_indirect_tirs(hp); + mlx5e_rqt_destroy(&hp->indir_rqt); +} + +static struct mlx5e_hairpin * +mlx5e_hairpin_create(struct mlx5e_priv *priv, struct mlx5_hairpin_params *params, + int peer_ifindex) +{ + struct mlx5_core_dev *func_mdev, *peer_mdev; + struct mlx5e_hairpin *hp; + struct mlx5_hairpin *pair; + int err; + + hp = kzalloc(sizeof(*hp), GFP_KERNEL); + if (!hp) + return ERR_PTR(-ENOMEM); + + func_mdev = priv->mdev; + peer_mdev = mlx5e_hairpin_get_mdev(dev_net(priv->netdev), peer_ifindex); + if (IS_ERR(peer_mdev)) { + err = PTR_ERR(peer_mdev); + goto create_pair_err; + } + + pair = mlx5_core_hairpin_create(func_mdev, peer_mdev, params); + if (IS_ERR(pair)) { + err = PTR_ERR(pair); + goto create_pair_err; + } + hp->pair = pair; + hp->func_mdev = func_mdev; + hp->func_priv = priv; + hp->num_channels = params->num_channels; + + err = mlx5e_hairpin_create_transport(hp); + if (err) + goto create_transport_err; + + if (hp->num_channels > 1) { + err = mlx5e_hairpin_rss_init(hp); + if (err) + goto rss_init_err; + } + + return hp; + +rss_init_err: + mlx5e_hairpin_destroy_transport(hp); +create_transport_err: + mlx5_core_hairpin_destroy(hp->pair); +create_pair_err: + kfree(hp); + return ERR_PTR(err); +} + +static void mlx5e_hairpin_destroy(struct mlx5e_hairpin *hp) +{ + if (hp->num_channels > 1) + mlx5e_hairpin_rss_cleanup(hp); + mlx5e_hairpin_destroy_transport(hp); + mlx5_core_hairpin_destroy(hp->pair); + kvfree(hp); +} + +static inline u32 hash_hairpin_info(u16 peer_vhca_id, u8 prio) +{ + return (peer_vhca_id << 16 | prio); +} + +static struct mlx5e_hairpin_entry *mlx5e_hairpin_get(struct mlx5e_priv *priv, + u16 peer_vhca_id, u8 prio) +{ + struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs); + struct mlx5e_hairpin_entry *hpe; + u32 hash_key = hash_hairpin_info(peer_vhca_id, prio); + + hash_for_each_possible(tc->hairpin_tbl, hpe, + hairpin_hlist, hash_key) { + if (hpe->peer_vhca_id == peer_vhca_id && hpe->prio == prio) { + refcount_inc(&hpe->refcnt); + return hpe; + } + } + + return NULL; +} + +static void mlx5e_hairpin_put(struct mlx5e_priv *priv, + struct mlx5e_hairpin_entry *hpe) +{ + struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs); + /* no more hairpin flows for us, release the hairpin pair */ + if (!refcount_dec_and_mutex_lock(&hpe->refcnt, &tc->hairpin_tbl_lock)) + return; + hash_del(&hpe->hairpin_hlist); + mutex_unlock(&tc->hairpin_tbl_lock); + + if (!IS_ERR_OR_NULL(hpe->hp)) { + netdev_dbg(priv->netdev, "del hairpin: peer %s\n", + dev_name(hpe->hp->pair->peer_mdev->device)); + + mlx5e_hairpin_destroy(hpe->hp); + } + + WARN_ON(!list_empty(&hpe->flows)); + kfree(hpe); +} + +#define UNKNOWN_MATCH_PRIO 8 + +static int mlx5e_hairpin_get_prio(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, u8 *match_prio, + struct netlink_ext_ack *extack) +{ + void *headers_c, *headers_v; + u8 prio_val, prio_mask = 0; + bool vlan_present; + +#ifdef CONFIG_MLX5_CORE_EN_DCB + if (priv->dcbx_dp.trust_state != MLX5_QPTS_TRUST_PCP) { + NL_SET_ERR_MSG_MOD(extack, + "only PCP trust state supported for hairpin"); + return -EOPNOTSUPP; + } +#endif + headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, outer_headers); + headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers); + + vlan_present = MLX5_GET(fte_match_set_lyr_2_4, headers_v, cvlan_tag); + if (vlan_present) { + prio_mask = MLX5_GET(fte_match_set_lyr_2_4, headers_c, first_prio); + prio_val = MLX5_GET(fte_match_set_lyr_2_4, headers_v, first_prio); + } + + if (!vlan_present || !prio_mask) { + prio_val = UNKNOWN_MATCH_PRIO; + } else if (prio_mask != 0x7) { + NL_SET_ERR_MSG_MOD(extack, + "masked priority match not supported for hairpin"); + return -EOPNOTSUPP; + } + + *match_prio = prio_val; + return 0; +} + +static int mlx5e_hairpin_flow_add(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, + struct mlx5e_tc_flow_parse_attr *parse_attr, + struct netlink_ext_ack *extack) +{ + struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs); + int peer_ifindex = parse_attr->mirred_ifindex[0]; + struct mlx5_hairpin_params params; + struct mlx5_core_dev *peer_mdev; + struct mlx5e_hairpin_entry *hpe; + struct mlx5e_hairpin *hp; + u64 link_speed64; + u32 link_speed; + u8 match_prio; + u16 peer_id; + int err; + + peer_mdev = mlx5e_hairpin_get_mdev(dev_net(priv->netdev), peer_ifindex); + if (IS_ERR(peer_mdev)) { + NL_SET_ERR_MSG_MOD(extack, "invalid ifindex of mirred device"); + return PTR_ERR(peer_mdev); + } + + if (!MLX5_CAP_GEN(priv->mdev, hairpin) || !MLX5_CAP_GEN(peer_mdev, hairpin)) { + NL_SET_ERR_MSG_MOD(extack, "hairpin is not supported"); + return -EOPNOTSUPP; + } + + peer_id = MLX5_CAP_GEN(peer_mdev, vhca_id); + err = mlx5e_hairpin_get_prio(priv, &parse_attr->spec, &match_prio, + extack); + if (err) + return err; + + mutex_lock(&tc->hairpin_tbl_lock); + hpe = mlx5e_hairpin_get(priv, peer_id, match_prio); + if (hpe) { + mutex_unlock(&tc->hairpin_tbl_lock); + wait_for_completion(&hpe->res_ready); + + if (IS_ERR(hpe->hp)) { + err = -EREMOTEIO; + goto out_err; + } + goto attach_flow; + } + + hpe = kzalloc(sizeof(*hpe), GFP_KERNEL); + if (!hpe) { + mutex_unlock(&tc->hairpin_tbl_lock); + return -ENOMEM; + } + + spin_lock_init(&hpe->flows_lock); + INIT_LIST_HEAD(&hpe->flows); + INIT_LIST_HEAD(&hpe->dead_peer_wait_list); + hpe->peer_vhca_id = peer_id; + hpe->prio = match_prio; + refcount_set(&hpe->refcnt, 1); + init_completion(&hpe->res_ready); + + hash_add(tc->hairpin_tbl, &hpe->hairpin_hlist, + hash_hairpin_info(peer_id, match_prio)); + mutex_unlock(&tc->hairpin_tbl_lock); + + params.log_data_size = 16; + params.log_data_size = min_t(u8, params.log_data_size, + MLX5_CAP_GEN(priv->mdev, log_max_hairpin_wq_data_sz)); + params.log_data_size = max_t(u8, params.log_data_size, + MLX5_CAP_GEN(priv->mdev, log_min_hairpin_wq_data_sz)); + + params.log_num_packets = params.log_data_size - + MLX5_MPWRQ_MIN_LOG_STRIDE_SZ(priv->mdev); + params.log_num_packets = min_t(u8, params.log_num_packets, + MLX5_CAP_GEN(priv->mdev, log_max_hairpin_num_packets)); + + params.q_counter = priv->q_counter; + /* set hairpin pair per each 50Gbs share of the link */ + mlx5e_port_max_linkspeed(priv->mdev, &link_speed); + link_speed = max_t(u32, link_speed, 50000); + link_speed64 = link_speed; + do_div(link_speed64, 50000); + params.num_channels = link_speed64; + + hp = mlx5e_hairpin_create(priv, ¶ms, peer_ifindex); + hpe->hp = hp; + complete_all(&hpe->res_ready); + if (IS_ERR(hp)) { + err = PTR_ERR(hp); + goto out_err; + } + + netdev_dbg(priv->netdev, "add hairpin: tirn %x rqn %x peer %s sqn %x prio %d (log) data %d packets %d\n", + mlx5e_tir_get_tirn(&hp->direct_tir), hp->pair->rqn[0], + dev_name(hp->pair->peer_mdev->device), + hp->pair->sqn[0], match_prio, params.log_data_size, params.log_num_packets); + +attach_flow: + if (hpe->hp->num_channels > 1) { + flow_flag_set(flow, HAIRPIN_RSS); + flow->attr->nic_attr->hairpin_ft = + mlx5_get_ttc_flow_table(hpe->hp->ttc); + } else { + flow->attr->nic_attr->hairpin_tirn = mlx5e_tir_get_tirn(&hpe->hp->direct_tir); + } + + flow->hpe = hpe; + spin_lock(&hpe->flows_lock); + list_add(&flow->hairpin, &hpe->flows); + spin_unlock(&hpe->flows_lock); + + return 0; + +out_err: + mlx5e_hairpin_put(priv, hpe); + return err; +} + +static void mlx5e_hairpin_flow_del(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow) +{ + /* flow wasn't fully initialized */ + if (!flow->hpe) + return; + + spin_lock(&flow->hpe->flows_lock); + list_del(&flow->hairpin); + spin_unlock(&flow->hpe->flows_lock); + + mlx5e_hairpin_put(priv, flow->hpe); + flow->hpe = NULL; +} + +struct mlx5_flow_handle * +mlx5e_add_offloaded_nic_rule(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct mlx5_flow_attr *attr) +{ + struct mlx5_flow_context *flow_context = &spec->flow_context; + struct mlx5e_vlan_table *vlan = mlx5e_fs_get_vlan(priv->fs); + struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs); + struct mlx5_nic_flow_attr *nic_attr = attr->nic_attr; + struct mlx5_flow_destination dest[2] = {}; + struct mlx5_fs_chains *nic_chains; + struct mlx5_flow_act flow_act = { + .action = attr->action, + .flags = FLOW_ACT_NO_APPEND, + }; + struct mlx5_flow_handle *rule; + struct mlx5_flow_table *ft; + int dest_ix = 0; + + nic_chains = mlx5e_nic_chains(tc); + flow_context->flags |= FLOW_CONTEXT_HAS_TAG; + flow_context->flow_tag = nic_attr->flow_tag; + + if (attr->dest_ft) { + dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dest[dest_ix].ft = attr->dest_ft; + dest_ix++; + } else if (nic_attr->hairpin_ft) { + dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dest[dest_ix].ft = nic_attr->hairpin_ft; + dest_ix++; + } else if (nic_attr->hairpin_tirn) { + dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_TIR; + dest[dest_ix].tir_num = nic_attr->hairpin_tirn; + dest_ix++; + } else if (attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) { + dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + if (attr->dest_chain) { + dest[dest_ix].ft = mlx5_chains_get_table(nic_chains, + attr->dest_chain, 1, + MLX5E_TC_FT_LEVEL); + if (IS_ERR(dest[dest_ix].ft)) + return ERR_CAST(dest[dest_ix].ft); + } else { + dest[dest_ix].ft = mlx5e_vlan_get_flowtable(vlan); + } + dest_ix++; + } + + if (dest[0].type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE && + MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ignore_flow_level)) + flow_act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL; + + if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { + dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; + dest[dest_ix].counter_id = mlx5_fc_id(attr->counter); + dest_ix++; + } + + if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) + flow_act.modify_hdr = attr->modify_hdr; + + mutex_lock(&tc->t_lock); + if (IS_ERR_OR_NULL(tc->t)) { + /* Create the root table here if doesn't exist yet */ + tc->t = + mlx5_chains_get_table(nic_chains, 0, 1, MLX5E_TC_FT_LEVEL); + + if (IS_ERR(tc->t)) { + mutex_unlock(&tc->t_lock); + netdev_err(priv->netdev, + "Failed to create tc offload table\n"); + rule = ERR_CAST(tc->t); + goto err_ft_get; + } + } + mutex_unlock(&tc->t_lock); + + if (attr->chain || attr->prio) + ft = mlx5_chains_get_table(nic_chains, + attr->chain, attr->prio, + MLX5E_TC_FT_LEVEL); + else + ft = attr->ft; + + if (IS_ERR(ft)) { + rule = ERR_CAST(ft); + goto err_ft_get; + } + + if (attr->outer_match_level != MLX5_MATCH_NONE) + spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS; + + rule = mlx5_add_flow_rules(ft, spec, + &flow_act, dest, dest_ix); + if (IS_ERR(rule)) + goto err_rule; + + return rule; + +err_rule: + if (attr->chain || attr->prio) + mlx5_chains_put_table(nic_chains, + attr->chain, attr->prio, + MLX5E_TC_FT_LEVEL); +err_ft_get: + if (attr->dest_chain) + mlx5_chains_put_table(nic_chains, + attr->dest_chain, 1, + MLX5E_TC_FT_LEVEL); + + return ERR_CAST(rule); +} + +static int +alloc_flow_attr_counter(struct mlx5_core_dev *counter_dev, + struct mlx5_flow_attr *attr) + +{ + struct mlx5_fc *counter; + + counter = mlx5_fc_create(counter_dev, true); + if (IS_ERR(counter)) + return PTR_ERR(counter); + + attr->counter = counter; + return 0; +} + +static int +mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, + struct netlink_ext_ack *extack) +{ + struct mlx5e_tc_flow_parse_attr *parse_attr; + struct mlx5_flow_attr *attr = flow->attr; + struct mlx5_core_dev *dev = priv->mdev; + int err; + + parse_attr = attr->parse_attr; + + if (flow_flag_test(flow, HAIRPIN)) { + err = mlx5e_hairpin_flow_add(priv, flow, parse_attr, extack); + if (err) + return err; + } + + if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { + err = alloc_flow_attr_counter(dev, attr); + if (err) + return err; + } + + if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) { + err = mlx5e_attach_mod_hdr(priv, flow, parse_attr); + if (err) + return err; + } + + if (attr->flags & MLX5_ATTR_FLAG_CT) + flow->rule[0] = mlx5_tc_ct_flow_offload(get_ct_priv(priv), &parse_attr->spec, + attr, &parse_attr->mod_hdr_acts); + else + flow->rule[0] = mlx5e_add_offloaded_nic_rule(priv, &parse_attr->spec, + attr); + + return PTR_ERR_OR_ZERO(flow->rule[0]); +} + +void mlx5e_del_offloaded_nic_rule(struct mlx5e_priv *priv, + struct mlx5_flow_handle *rule, + struct mlx5_flow_attr *attr) +{ + struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs); + struct mlx5_fs_chains *nic_chains; + + nic_chains = mlx5e_nic_chains(tc); + mlx5_del_flow_rules(rule); + + if (attr->chain || attr->prio) + mlx5_chains_put_table(nic_chains, attr->chain, attr->prio, + MLX5E_TC_FT_LEVEL); + + if (attr->dest_chain) + mlx5_chains_put_table(nic_chains, attr->dest_chain, 1, + MLX5E_TC_FT_LEVEL); +} + +static void mlx5e_tc_del_nic_flow(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow) +{ + struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs); + struct mlx5_flow_attr *attr = flow->attr; + + flow_flag_clear(flow, OFFLOADED); + + if (attr->flags & MLX5_ATTR_FLAG_CT) + mlx5_tc_ct_delete_flow(get_ct_priv(flow->priv), attr); + else if (!IS_ERR_OR_NULL(flow->rule[0])) + mlx5e_del_offloaded_nic_rule(priv, flow->rule[0], attr); + + /* Remove root table if no rules are left to avoid + * extra steering hops. + */ + mutex_lock(&tc->t_lock); + if (!mlx5e_tc_num_filters(priv, MLX5_TC_FLAG(NIC_OFFLOAD)) && + !IS_ERR_OR_NULL(tc->t)) { + mlx5_chains_put_table(mlx5e_nic_chains(tc), 0, 1, MLX5E_TC_FT_LEVEL); + tc->t = NULL; + } + mutex_unlock(&tc->t_lock); + + if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) { + mlx5e_mod_hdr_dealloc(&attr->parse_attr->mod_hdr_acts); + mlx5e_detach_mod_hdr(priv, flow); + } + + if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) + mlx5_fc_destroy(priv->mdev, attr->counter); + + if (flow_flag_test(flow, HAIRPIN)) + mlx5e_hairpin_flow_del(priv, flow); + + free_flow_post_acts(flow); + + kvfree(attr->parse_attr); + kfree(flow->attr); +} + +struct mlx5_flow_handle * +mlx5e_tc_offload_fdb_rules(struct mlx5_eswitch *esw, + struct mlx5e_tc_flow *flow, + struct mlx5_flow_spec *spec, + struct mlx5_flow_attr *attr) +{ + struct mlx5_flow_handle *rule; + + if (attr->flags & MLX5_ATTR_FLAG_SLOW_PATH) + return mlx5_eswitch_add_offloaded_rule(esw, spec, attr); + + rule = mlx5e_tc_rule_offload(flow->priv, spec, attr); + + if (IS_ERR(rule)) + return rule; + + if (attr->esw_attr->split_count) { + flow->rule[1] = mlx5_eswitch_add_fwd_rule(esw, spec, attr); + if (IS_ERR(flow->rule[1])) + goto err_rule1; + } + + return rule; + +err_rule1: + mlx5e_tc_rule_unoffload(flow->priv, rule, attr); + return flow->rule[1]; +} + +void mlx5e_tc_unoffload_fdb_rules(struct mlx5_eswitch *esw, + struct mlx5e_tc_flow *flow, + struct mlx5_flow_attr *attr) +{ + flow_flag_clear(flow, OFFLOADED); + + if (attr->flags & MLX5_ATTR_FLAG_SLOW_PATH) + return mlx5_eswitch_del_offloaded_rule(esw, flow->rule[0], attr); + + if (attr->esw_attr->split_count) + mlx5_eswitch_del_fwd_rule(esw, flow->rule[1], attr); + + mlx5e_tc_rule_unoffload(flow->priv, flow->rule[0], attr); +} + +struct mlx5_flow_handle * +mlx5e_tc_offload_to_slow_path(struct mlx5_eswitch *esw, + struct mlx5e_tc_flow *flow, + struct mlx5_flow_spec *spec) +{ + struct mlx5e_tc_mod_hdr_acts mod_acts = {}; + struct mlx5e_mod_hdr_handle *mh = NULL; + struct mlx5_flow_attr *slow_attr; + struct mlx5_flow_handle *rule; + bool fwd_and_modify_cap; + u32 chain_mapping = 0; + int err; + + slow_attr = mlx5_alloc_flow_attr(MLX5_FLOW_NAMESPACE_FDB); + if (!slow_attr) + return ERR_PTR(-ENOMEM); + + memcpy(slow_attr, flow->attr, ESW_FLOW_ATTR_SZ); + slow_attr->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + slow_attr->esw_attr->split_count = 0; + slow_attr->flags |= MLX5_ATTR_FLAG_SLOW_PATH; + + fwd_and_modify_cap = MLX5_CAP_ESW_FLOWTABLE((esw)->dev, fdb_modify_header_fwd_to_table); + if (!fwd_and_modify_cap) + goto skip_restore; + + err = mlx5_chains_get_chain_mapping(esw_chains(esw), flow->attr->chain, &chain_mapping); + if (err) + goto err_get_chain; + + err = mlx5e_tc_match_to_reg_set(esw->dev, &mod_acts, MLX5_FLOW_NAMESPACE_FDB, + CHAIN_TO_REG, chain_mapping); + if (err) + goto err_reg_set; + + mh = mlx5e_mod_hdr_attach(esw->dev, get_mod_hdr_table(flow->priv, flow), + MLX5_FLOW_NAMESPACE_FDB, &mod_acts); + if (IS_ERR(mh)) { + err = PTR_ERR(mh); + goto err_attach; + } + + slow_attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; + slow_attr->modify_hdr = mlx5e_mod_hdr_get(mh); + +skip_restore: + rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, slow_attr); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + goto err_offload; + } + + flow->slow_mh = mh; + flow->chain_mapping = chain_mapping; + flow_flag_set(flow, SLOW); + + mlx5e_mod_hdr_dealloc(&mod_acts); + kfree(slow_attr); + + return rule; + +err_offload: + if (fwd_and_modify_cap) + mlx5e_mod_hdr_detach(esw->dev, get_mod_hdr_table(flow->priv, flow), mh); +err_attach: +err_reg_set: + if (fwd_and_modify_cap) + mlx5_chains_put_chain_mapping(esw_chains(esw), chain_mapping); +err_get_chain: + mlx5e_mod_hdr_dealloc(&mod_acts); + kfree(slow_attr); + return ERR_PTR(err); +} + +void mlx5e_tc_unoffload_from_slow_path(struct mlx5_eswitch *esw, + struct mlx5e_tc_flow *flow) +{ + struct mlx5_flow_attr *slow_attr; + + slow_attr = mlx5_alloc_flow_attr(MLX5_FLOW_NAMESPACE_FDB); + if (!slow_attr) { + mlx5_core_warn(flow->priv->mdev, "Unable to alloc attr to unoffload slow path rule\n"); + return; + } + + memcpy(slow_attr, flow->attr, ESW_FLOW_ATTR_SZ); + slow_attr->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + slow_attr->esw_attr->split_count = 0; + slow_attr->flags |= MLX5_ATTR_FLAG_SLOW_PATH; + if (flow->slow_mh) { + slow_attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; + slow_attr->modify_hdr = mlx5e_mod_hdr_get(flow->slow_mh); + } + mlx5e_tc_unoffload_fdb_rules(esw, flow, slow_attr); + if (flow->slow_mh) { + mlx5e_mod_hdr_detach(esw->dev, get_mod_hdr_table(flow->priv, flow), flow->slow_mh); + mlx5_chains_put_chain_mapping(esw_chains(esw), flow->chain_mapping); + flow->chain_mapping = 0; + flow->slow_mh = NULL; + } + flow_flag_clear(flow, SLOW); + kfree(slow_attr); +} + +/* Caller must obtain uplink_priv->unready_flows_lock mutex before calling this + * function. + */ +static void unready_flow_add(struct mlx5e_tc_flow *flow, + struct list_head *unready_flows) +{ + flow_flag_set(flow, NOT_READY); + list_add_tail(&flow->unready, unready_flows); +} + +/* Caller must obtain uplink_priv->unready_flows_lock mutex before calling this + * function. + */ +static void unready_flow_del(struct mlx5e_tc_flow *flow) +{ + list_del(&flow->unready); + flow_flag_clear(flow, NOT_READY); +} + +static void add_unready_flow(struct mlx5e_tc_flow *flow) +{ + struct mlx5_rep_uplink_priv *uplink_priv; + struct mlx5e_rep_priv *rpriv; + struct mlx5_eswitch *esw; + + esw = flow->priv->mdev->priv.eswitch; + rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); + uplink_priv = &rpriv->uplink_priv; + + mutex_lock(&uplink_priv->unready_flows_lock); + unready_flow_add(flow, &uplink_priv->unready_flows); + mutex_unlock(&uplink_priv->unready_flows_lock); +} + +static void remove_unready_flow(struct mlx5e_tc_flow *flow) +{ + struct mlx5_rep_uplink_priv *uplink_priv; + struct mlx5e_rep_priv *rpriv; + struct mlx5_eswitch *esw; + + esw = flow->priv->mdev->priv.eswitch; + rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); + uplink_priv = &rpriv->uplink_priv; + + mutex_lock(&uplink_priv->unready_flows_lock); + if (flow_flag_test(flow, NOT_READY)) + unready_flow_del(flow); + mutex_unlock(&uplink_priv->unready_flows_lock); +} + +bool mlx5e_tc_is_vf_tunnel(struct net_device *out_dev, struct net_device *route_dev) +{ + struct mlx5_core_dev *out_mdev, *route_mdev; + struct mlx5e_priv *out_priv, *route_priv; + + out_priv = netdev_priv(out_dev); + out_mdev = out_priv->mdev; + route_priv = netdev_priv(route_dev); + route_mdev = route_priv->mdev; + + if (out_mdev->coredev_type != MLX5_COREDEV_PF) + return false; + + if (route_mdev->coredev_type != MLX5_COREDEV_VF && + route_mdev->coredev_type != MLX5_COREDEV_SF) + return false; + + return mlx5e_same_hw_devs(out_priv, route_priv); +} + +int mlx5e_tc_query_route_vport(struct net_device *out_dev, struct net_device *route_dev, u16 *vport) +{ + struct mlx5e_priv *out_priv, *route_priv; + struct mlx5_core_dev *route_mdev; + struct mlx5_eswitch *esw; + u16 vhca_id; + + out_priv = netdev_priv(out_dev); + esw = out_priv->mdev->priv.eswitch; + route_priv = netdev_priv(route_dev); + route_mdev = route_priv->mdev; + + vhca_id = MLX5_CAP_GEN(route_mdev, vhca_id); + if (mlx5_lag_is_active(out_priv->mdev)) { + struct mlx5_devcom *devcom; + int err; + + /* In lag case we may get devices from different eswitch instances. + * If we failed to get vport num, it means, mostly, that we on the wrong + * eswitch. + */ + err = mlx5_eswitch_vhca_id_to_vport(esw, vhca_id, vport); + if (err != -ENOENT) + return err; + + rcu_read_lock(); + devcom = out_priv->mdev->priv.devcom; + esw = mlx5_devcom_get_peer_data_rcu(devcom, MLX5_DEVCOM_ESW_OFFLOADS); + err = esw ? mlx5_eswitch_vhca_id_to_vport(esw, vhca_id, vport) : -ENODEV; + rcu_read_unlock(); + + return err; + } + + return mlx5_eswitch_vhca_id_to_vport(esw, vhca_id, vport); +} + +int mlx5e_tc_add_flow_mod_hdr(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, + struct mlx5_flow_attr *attr) +{ + struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts = &attr->parse_attr->mod_hdr_acts; + struct mlx5_modify_hdr *mod_hdr; + + mod_hdr = mlx5_modify_header_alloc(priv->mdev, + mlx5e_get_flow_namespace(flow), + mod_hdr_acts->num_actions, + mod_hdr_acts->actions); + if (IS_ERR(mod_hdr)) + return PTR_ERR(mod_hdr); + + WARN_ON(attr->modify_hdr); + attr->modify_hdr = mod_hdr; + + return 0; +} + +static int +set_encap_dests(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, + struct mlx5_flow_attr *attr, + struct netlink_ext_ack *extack, + bool *vf_tun) +{ + struct mlx5e_tc_flow_parse_attr *parse_attr; + struct mlx5_esw_flow_attr *esw_attr; + struct net_device *encap_dev = NULL; + struct mlx5e_rep_priv *rpriv; + struct mlx5e_priv *out_priv; + int out_index; + int err = 0; + + if (!mlx5e_is_eswitch_flow(flow)) + return 0; + + parse_attr = attr->parse_attr; + esw_attr = attr->esw_attr; + *vf_tun = false; + + for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) { + struct net_device *out_dev; + int mirred_ifindex; + + if (!(esw_attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP)) + continue; + + mirred_ifindex = parse_attr->mirred_ifindex[out_index]; + out_dev = dev_get_by_index(dev_net(priv->netdev), mirred_ifindex); + if (!out_dev) { + NL_SET_ERR_MSG_MOD(extack, "Requested mirred device not found"); + err = -ENODEV; + goto out; + } + err = mlx5e_attach_encap(priv, flow, attr, out_dev, out_index, + extack, &encap_dev); + dev_put(out_dev); + if (err) + goto out; + + if (esw_attr->dests[out_index].flags & + MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE && + !esw_attr->dest_int_port) + *vf_tun = true; + + out_priv = netdev_priv(encap_dev); + rpriv = out_priv->ppriv; + esw_attr->dests[out_index].rep = rpriv->rep; + esw_attr->dests[out_index].mdev = out_priv->mdev; + } + + if (*vf_tun && esw_attr->out_count > 1) { + NL_SET_ERR_MSG_MOD(extack, "VF tunnel encap with mirroring is not supported"); + err = -EOPNOTSUPP; + goto out; + } + +out: + return err; +} + +static void +clean_encap_dests(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, + struct mlx5_flow_attr *attr, + bool *vf_tun) +{ + struct mlx5_esw_flow_attr *esw_attr; + int out_index; + + if (!mlx5e_is_eswitch_flow(flow)) + return; + + esw_attr = attr->esw_attr; + *vf_tun = false; + + for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) { + if (!(esw_attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP)) + continue; + + if (esw_attr->dests[out_index].flags & + MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE && + !esw_attr->dest_int_port) + *vf_tun = true; + + mlx5e_detach_encap(priv, flow, attr, out_index); + kfree(attr->parse_attr->tun_info[out_index]); + } +} + +static int +mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, + struct netlink_ext_ack *extack) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5e_tc_flow_parse_attr *parse_attr; + struct mlx5_flow_attr *attr = flow->attr; + struct mlx5_esw_flow_attr *esw_attr; + u32 max_prio, max_chain; + bool vf_tun; + int err = 0; + + parse_attr = attr->parse_attr; + esw_attr = attr->esw_attr; + + /* We check chain range only for tc flows. + * For ft flows, we checked attr->chain was originally 0 and set it to + * FDB_FT_CHAIN which is outside tc range. + * See mlx5e_rep_setup_ft_cb(). + */ + max_chain = mlx5_chains_get_chain_range(esw_chains(esw)); + if (!mlx5e_is_ft_flow(flow) && attr->chain > max_chain) { + NL_SET_ERR_MSG_MOD(extack, + "Requested chain is out of supported range"); + err = -EOPNOTSUPP; + goto err_out; + } + + max_prio = mlx5_chains_get_prio_range(esw_chains(esw)); + if (attr->prio > max_prio) { + NL_SET_ERR_MSG_MOD(extack, + "Requested priority is out of supported range"); + err = -EOPNOTSUPP; + goto err_out; + } + + if (flow_flag_test(flow, TUN_RX)) { + err = mlx5e_attach_decap_route(priv, flow); + if (err) + goto err_out; + + if (!attr->chain && esw_attr->int_port && + attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) { + /* If decap route device is internal port, change the + * source vport value in reg_c0 back to uplink just in + * case the rule performs goto chain > 0. If we have a miss + * on chain > 0 we want the metadata regs to hold the + * chain id so SW will resume handling of this packet + * from the proper chain. + */ + u32 metadata = mlx5_eswitch_get_vport_metadata_for_set(esw, + esw_attr->in_rep->vport); + + err = mlx5e_tc_match_to_reg_set(priv->mdev, &parse_attr->mod_hdr_acts, + MLX5_FLOW_NAMESPACE_FDB, VPORT_TO_REG, + metadata); + if (err) + goto err_out; + + attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; + } + } + + if (flow_flag_test(flow, L3_TO_L2_DECAP)) { + err = mlx5e_attach_decap(priv, flow, extack); + if (err) + goto err_out; + } + + if (netif_is_ovs_master(parse_attr->filter_dev)) { + struct mlx5e_tc_int_port *int_port; + + if (attr->chain) { + NL_SET_ERR_MSG_MOD(extack, + "Internal port rule is only supported on chain 0"); + err = -EOPNOTSUPP; + goto err_out; + } + + if (attr->dest_chain) { + NL_SET_ERR_MSG_MOD(extack, + "Internal port rule offload doesn't support goto action"); + err = -EOPNOTSUPP; + goto err_out; + } + + int_port = mlx5e_tc_int_port_get(mlx5e_get_int_port_priv(priv), + parse_attr->filter_dev->ifindex, + flow_flag_test(flow, EGRESS) ? + MLX5E_TC_INT_PORT_EGRESS : + MLX5E_TC_INT_PORT_INGRESS); + if (IS_ERR(int_port)) { + err = PTR_ERR(int_port); + goto err_out; + } + + esw_attr->int_port = int_port; + } + + err = set_encap_dests(priv, flow, attr, extack, &vf_tun); + if (err) + goto err_out; + + err = mlx5_eswitch_add_vlan_action(esw, attr); + if (err) + goto err_out; + + if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) { + if (vf_tun) { + err = mlx5e_tc_add_flow_mod_hdr(priv, flow, attr); + if (err) + goto err_out; + } else { + err = mlx5e_attach_mod_hdr(priv, flow, parse_attr); + if (err) + goto err_out; + } + } + + if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { + err = alloc_flow_attr_counter(esw_attr->counter_dev, attr); + if (err) + goto err_out; + } + + /* we get here if one of the following takes place: + * (1) there's no error + * (2) there's an encap action and we don't have valid neigh + */ + if (flow_flag_test(flow, SLOW)) + flow->rule[0] = mlx5e_tc_offload_to_slow_path(esw, flow, &parse_attr->spec); + else + flow->rule[0] = mlx5e_tc_offload_fdb_rules(esw, flow, &parse_attr->spec, attr); + + if (IS_ERR(flow->rule[0])) { + err = PTR_ERR(flow->rule[0]); + goto err_out; + } + flow_flag_set(flow, OFFLOADED); + + return 0; + +err_out: + flow_flag_set(flow, FAILED); + return err; +} + +static bool mlx5_flow_has_geneve_opt(struct mlx5e_tc_flow *flow) +{ + struct mlx5_flow_spec *spec = &flow->attr->parse_attr->spec; + void *headers_v = MLX5_ADDR_OF(fte_match_param, + spec->match_value, + misc_parameters_3); + u32 geneve_tlv_opt_0_data = MLX5_GET(fte_match_set_misc3, + headers_v, + geneve_tlv_option_0_data); + + return !!geneve_tlv_opt_0_data; +} + +static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5_flow_attr *attr = flow->attr; + struct mlx5_esw_flow_attr *esw_attr; + bool vf_tun; + + esw_attr = attr->esw_attr; + mlx5e_put_flow_tunnel_id(flow); + + remove_unready_flow(flow); + + if (mlx5e_is_offloaded_flow(flow)) { + if (flow_flag_test(flow, SLOW)) + mlx5e_tc_unoffload_from_slow_path(esw, flow); + else + mlx5e_tc_unoffload_fdb_rules(esw, flow, attr); + } + complete_all(&flow->del_hw_done); + + if (mlx5_flow_has_geneve_opt(flow)) + mlx5_geneve_tlv_option_del(priv->mdev->geneve); + + mlx5_eswitch_del_vlan_action(esw, attr); + + if (flow->decap_route) + mlx5e_detach_decap_route(priv, flow); + + clean_encap_dests(priv, flow, attr, &vf_tun); + + mlx5_tc_ct_match_del(get_ct_priv(priv), &flow->attr->ct_attr); + + if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) { + mlx5e_mod_hdr_dealloc(&attr->parse_attr->mod_hdr_acts); + if (vf_tun && attr->modify_hdr) + mlx5_modify_header_dealloc(priv->mdev, attr->modify_hdr); + else + mlx5e_detach_mod_hdr(priv, flow); + } + + if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) + mlx5_fc_destroy(esw_attr->counter_dev, attr->counter); + + if (esw_attr->int_port) + mlx5e_tc_int_port_put(mlx5e_get_int_port_priv(priv), esw_attr->int_port); + + if (esw_attr->dest_int_port) + mlx5e_tc_int_port_put(mlx5e_get_int_port_priv(priv), esw_attr->dest_int_port); + + if (flow_flag_test(flow, L3_TO_L2_DECAP)) + mlx5e_detach_decap(priv, flow); + + free_flow_post_acts(flow); + + if (flow->attr->lag.count) + mlx5_lag_del_mpesw_rule(esw->dev); + + kvfree(attr->esw_attr->rx_tun_attr); + kvfree(attr->parse_attr); + kfree(flow->attr); +} + +struct mlx5_fc *mlx5e_tc_get_counter(struct mlx5e_tc_flow *flow) +{ + struct mlx5_flow_attr *attr; + + attr = list_first_entry(&flow->attrs, struct mlx5_flow_attr, list); + return attr->counter; +} + +/* Iterate over tmp_list of flows attached to flow_list head. */ +void mlx5e_put_flow_list(struct mlx5e_priv *priv, struct list_head *flow_list) +{ + struct mlx5e_tc_flow *flow, *tmp; + + list_for_each_entry_safe(flow, tmp, flow_list, tmp_list) + mlx5e_flow_put(priv, flow); +} + +static void __mlx5e_tc_del_fdb_peer_flow(struct mlx5e_tc_flow *flow) +{ + struct mlx5_eswitch *esw = flow->priv->mdev->priv.eswitch; + + if (!flow_flag_test(flow, ESWITCH) || + !flow_flag_test(flow, DUP)) + return; + + mutex_lock(&esw->offloads.peer_mutex); + list_del(&flow->peer); + mutex_unlock(&esw->offloads.peer_mutex); + + flow_flag_clear(flow, DUP); + + if (refcount_dec_and_test(&flow->peer_flow->refcnt)) { + mlx5e_tc_del_fdb_flow(flow->peer_flow->priv, flow->peer_flow); + kfree(flow->peer_flow); + } + + flow->peer_flow = NULL; +} + +static void mlx5e_tc_del_fdb_peer_flow(struct mlx5e_tc_flow *flow) +{ + struct mlx5_core_dev *dev = flow->priv->mdev; + struct mlx5_devcom *devcom = dev->priv.devcom; + struct mlx5_eswitch *peer_esw; + + peer_esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS); + if (!peer_esw) + return; + + __mlx5e_tc_del_fdb_peer_flow(flow); + mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS); +} + +static void mlx5e_tc_del_flow(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow) +{ + if (mlx5e_is_eswitch_flow(flow)) { + mlx5e_tc_del_fdb_peer_flow(flow); + mlx5e_tc_del_fdb_flow(priv, flow); + } else { + mlx5e_tc_del_nic_flow(priv, flow); + } +} + +static bool flow_requires_tunnel_mapping(u32 chain, struct flow_cls_offload *f) +{ + struct flow_rule *rule = flow_cls_offload_flow_rule(f); + struct flow_action *flow_action = &rule->action; + const struct flow_action_entry *act; + int i; + + if (chain) + return false; + + flow_action_for_each(i, act, flow_action) { + switch (act->id) { + case FLOW_ACTION_GOTO: + return true; + case FLOW_ACTION_SAMPLE: + return true; + default: + continue; + } + } + + return false; +} + +static int +enc_opts_is_dont_care_or_full_match(struct mlx5e_priv *priv, + struct flow_dissector_key_enc_opts *opts, + struct netlink_ext_ack *extack, + bool *dont_care) +{ + struct geneve_opt *opt; + int off = 0; + + *dont_care = true; + + while (opts->len > off) { + opt = (struct geneve_opt *)&opts->data[off]; + + if (!(*dont_care) || opt->opt_class || opt->type || + memchr_inv(opt->opt_data, 0, opt->length * 4)) { + *dont_care = false; + + if (opt->opt_class != htons(U16_MAX) || + opt->type != U8_MAX) { + NL_SET_ERR_MSG_MOD(extack, + "Partial match of tunnel options in chain > 0 isn't supported"); + netdev_warn(priv->netdev, + "Partial match of tunnel options in chain > 0 isn't supported"); + return -EOPNOTSUPP; + } + } + + off += sizeof(struct geneve_opt) + opt->length * 4; + } + + return 0; +} + +#define COPY_DISSECTOR(rule, diss_key, dst)\ +({ \ + struct flow_rule *__rule = (rule);\ + typeof(dst) __dst = dst;\ +\ + memcpy(__dst,\ + skb_flow_dissector_target(__rule->match.dissector,\ + diss_key,\ + __rule->match.key),\ + sizeof(*__dst));\ +}) + +static int mlx5e_get_flow_tunnel_id(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, + struct flow_cls_offload *f, + struct net_device *filter_dev) +{ + struct flow_rule *rule = flow_cls_offload_flow_rule(f); + struct netlink_ext_ack *extack = f->common.extack; + struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts; + struct flow_match_enc_opts enc_opts_match; + struct tunnel_match_enc_opts tun_enc_opts; + struct mlx5_rep_uplink_priv *uplink_priv; + struct mlx5_flow_attr *attr = flow->attr; + struct mlx5e_rep_priv *uplink_rpriv; + struct tunnel_match_key tunnel_key; + bool enc_opts_is_dont_care = true; + u32 tun_id, enc_opts_id = 0; + struct mlx5_eswitch *esw; + u32 value, mask; + int err; + + esw = priv->mdev->priv.eswitch; + uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); + uplink_priv = &uplink_rpriv->uplink_priv; + + memset(&tunnel_key, 0, sizeof(tunnel_key)); + COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_CONTROL, + &tunnel_key.enc_control); + if (tunnel_key.enc_control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) + COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS, + &tunnel_key.enc_ipv4); + else + COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS, + &tunnel_key.enc_ipv6); + COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_IP, &tunnel_key.enc_ip); + COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_PORTS, + &tunnel_key.enc_tp); + COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_KEYID, + &tunnel_key.enc_key_id); + tunnel_key.filter_ifindex = filter_dev->ifindex; + + err = mapping_add(uplink_priv->tunnel_mapping, &tunnel_key, &tun_id); + if (err) + return err; + + flow_rule_match_enc_opts(rule, &enc_opts_match); + err = enc_opts_is_dont_care_or_full_match(priv, + enc_opts_match.mask, + extack, + &enc_opts_is_dont_care); + if (err) + goto err_enc_opts; + + if (!enc_opts_is_dont_care) { + memset(&tun_enc_opts, 0, sizeof(tun_enc_opts)); + memcpy(&tun_enc_opts.key, enc_opts_match.key, + sizeof(*enc_opts_match.key)); + memcpy(&tun_enc_opts.mask, enc_opts_match.mask, + sizeof(*enc_opts_match.mask)); + + err = mapping_add(uplink_priv->tunnel_enc_opts_mapping, + &tun_enc_opts, &enc_opts_id); + if (err) + goto err_enc_opts; + } + + value = tun_id << ENC_OPTS_BITS | enc_opts_id; + mask = enc_opts_id ? TUNNEL_ID_MASK : + (TUNNEL_ID_MASK & ~ENC_OPTS_BITS_MASK); + + if (attr->chain) { + mlx5e_tc_match_to_reg_match(&attr->parse_attr->spec, + TUNNEL_TO_REG, value, mask); + } else { + mod_hdr_acts = &attr->parse_attr->mod_hdr_acts; + err = mlx5e_tc_match_to_reg_set(priv->mdev, + mod_hdr_acts, MLX5_FLOW_NAMESPACE_FDB, + TUNNEL_TO_REG, value); + if (err) + goto err_set; + + attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; + } + + flow->attr->tunnel_id = value; + return 0; + +err_set: + if (enc_opts_id) + mapping_remove(uplink_priv->tunnel_enc_opts_mapping, + enc_opts_id); +err_enc_opts: + mapping_remove(uplink_priv->tunnel_mapping, tun_id); + return err; +} + +static void mlx5e_put_flow_tunnel_id(struct mlx5e_tc_flow *flow) +{ + u32 enc_opts_id = flow->attr->tunnel_id & ENC_OPTS_BITS_MASK; + u32 tun_id = flow->attr->tunnel_id >> ENC_OPTS_BITS; + struct mlx5_rep_uplink_priv *uplink_priv; + struct mlx5e_rep_priv *uplink_rpriv; + struct mlx5_eswitch *esw; + + esw = flow->priv->mdev->priv.eswitch; + uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); + uplink_priv = &uplink_rpriv->uplink_priv; + + if (tun_id) + mapping_remove(uplink_priv->tunnel_mapping, tun_id); + if (enc_opts_id) + mapping_remove(uplink_priv->tunnel_enc_opts_mapping, + enc_opts_id); +} + +void mlx5e_tc_set_ethertype(struct mlx5_core_dev *mdev, + struct flow_match_basic *match, bool outer, + void *headers_c, void *headers_v) +{ + bool ip_version_cap; + + ip_version_cap = outer ? + MLX5_CAP_FLOWTABLE_NIC_RX(mdev, + ft_field_support.outer_ip_version) : + MLX5_CAP_FLOWTABLE_NIC_RX(mdev, + ft_field_support.inner_ip_version); + + if (ip_version_cap && match->mask->n_proto == htons(0xFFFF) && + (match->key->n_proto == htons(ETH_P_IP) || + match->key->n_proto == htons(ETH_P_IPV6))) { + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ip_version); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_version, + match->key->n_proto == htons(ETH_P_IP) ? 4 : 6); + } else { + MLX5_SET(fte_match_set_lyr_2_4, headers_c, ethertype, + ntohs(match->mask->n_proto)); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype, + ntohs(match->key->n_proto)); + } +} + +u8 mlx5e_tc_get_ip_version(struct mlx5_flow_spec *spec, bool outer) +{ + void *headers_v; + u16 ethertype; + u8 ip_version; + + if (outer) + headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers); + else + headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, inner_headers); + + ip_version = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ip_version); + /* Return ip_version converted from ethertype anyway */ + if (!ip_version) { + ethertype = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ethertype); + if (ethertype == ETH_P_IP || ethertype == ETH_P_ARP) + ip_version = 4; + else if (ethertype == ETH_P_IPV6) + ip_version = 6; + } + return ip_version; +} + +/* Tunnel device follows RFC 6040, see include/net/inet_ecn.h. + * And changes inner ip_ecn depending on inner and outer ip_ecn as follows: + * +---------+----------------------------------------+ + * |Arriving | Arriving Outer Header | + * | Inner +---------+---------+---------+----------+ + * | Header | Not-ECT | ECT(0) | ECT(1) | CE | + * +---------+---------+---------+---------+----------+ + * | Not-ECT | Not-ECT | Not-ECT | Not-ECT | <drop> | + * | ECT(0) | ECT(0) | ECT(0) | ECT(1) | CE* | + * | ECT(1) | ECT(1) | ECT(1) | ECT(1)* | CE* | + * | CE | CE | CE | CE | CE | + * +---------+---------+---------+---------+----------+ + * + * Tc matches on inner after decapsulation on tunnel device, but hw offload matches + * the inner ip_ecn value before hardware decap action. + * + * Cells marked are changed from original inner packet ip_ecn value during decap, and + * so matching those values on inner ip_ecn before decap will fail. + * + * The following helper allows offload when inner ip_ecn won't be changed by outer ip_ecn, + * except for the outer ip_ecn = CE, where in all cases inner ip_ecn will be changed to CE, + * and such we can drop the inner ip_ecn=CE match. + */ + +static int mlx5e_tc_verify_tunnel_ecn(struct mlx5e_priv *priv, + struct flow_cls_offload *f, + bool *match_inner_ecn) +{ + u8 outer_ecn_mask = 0, outer_ecn_key = 0, inner_ecn_mask = 0, inner_ecn_key = 0; + struct flow_rule *rule = flow_cls_offload_flow_rule(f); + struct netlink_ext_ack *extack = f->common.extack; + struct flow_match_ip match; + + *match_inner_ecn = true; + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IP)) { + flow_rule_match_enc_ip(rule, &match); + outer_ecn_key = match.key->tos & INET_ECN_MASK; + outer_ecn_mask = match.mask->tos & INET_ECN_MASK; + } + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IP)) { + flow_rule_match_ip(rule, &match); + inner_ecn_key = match.key->tos & INET_ECN_MASK; + inner_ecn_mask = match.mask->tos & INET_ECN_MASK; + } + + if (outer_ecn_mask != 0 && outer_ecn_mask != INET_ECN_MASK) { + NL_SET_ERR_MSG_MOD(extack, "Partial match on enc_tos ecn bits isn't supported"); + netdev_warn(priv->netdev, "Partial match on enc_tos ecn bits isn't supported"); + return -EOPNOTSUPP; + } + + if (!outer_ecn_mask) { + if (!inner_ecn_mask) + return 0; + + NL_SET_ERR_MSG_MOD(extack, + "Matching on tos ecn bits without also matching enc_tos ecn bits isn't supported"); + netdev_warn(priv->netdev, + "Matching on tos ecn bits without also matching enc_tos ecn bits isn't supported"); + return -EOPNOTSUPP; + } + + if (inner_ecn_mask && inner_ecn_mask != INET_ECN_MASK) { + NL_SET_ERR_MSG_MOD(extack, + "Partial match on tos ecn bits with match on enc_tos ecn bits isn't supported"); + netdev_warn(priv->netdev, + "Partial match on tos ecn bits with match on enc_tos ecn bits isn't supported"); + return -EOPNOTSUPP; + } + + if (!inner_ecn_mask) + return 0; + + /* Both inner and outer have full mask on ecn */ + + if (outer_ecn_key == INET_ECN_ECT_1) { + /* inner ecn might change by DECAP action */ + + NL_SET_ERR_MSG_MOD(extack, "Match on enc_tos ecn = ECT(1) isn't supported"); + netdev_warn(priv->netdev, "Match on enc_tos ecn = ECT(1) isn't supported"); + return -EOPNOTSUPP; + } + + if (outer_ecn_key != INET_ECN_CE) + return 0; + + if (inner_ecn_key != INET_ECN_CE) { + /* Can't happen in software, as packet ecn will be changed to CE after decap */ + NL_SET_ERR_MSG_MOD(extack, + "Match on tos enc_tos ecn = CE while match on tos ecn != CE isn't supported"); + netdev_warn(priv->netdev, + "Match on tos enc_tos ecn = CE while match on tos ecn != CE isn't supported"); + return -EOPNOTSUPP; + } + + /* outer ecn = CE, inner ecn = CE, as decap will change inner ecn to CE in anycase, + * drop match on inner ecn + */ + *match_inner_ecn = false; + + return 0; +} + +static int parse_tunnel_attr(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, + struct mlx5_flow_spec *spec, + struct flow_cls_offload *f, + struct net_device *filter_dev, + u8 *match_level, + bool *match_inner) +{ + struct mlx5e_tc_tunnel *tunnel = mlx5e_get_tc_tun(filter_dev); + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct netlink_ext_ack *extack = f->common.extack; + bool needs_mapping, sets_mapping; + int err; + + if (!mlx5e_is_eswitch_flow(flow)) { + NL_SET_ERR_MSG_MOD(extack, "Match on tunnel is not supported"); + return -EOPNOTSUPP; + } + + needs_mapping = !!flow->attr->chain; + sets_mapping = flow_requires_tunnel_mapping(flow->attr->chain, f); + *match_inner = !needs_mapping; + + if ((needs_mapping || sets_mapping) && + !mlx5_eswitch_reg_c1_loopback_enabled(esw)) { + NL_SET_ERR_MSG_MOD(extack, + "Chains on tunnel devices isn't supported without register loopback support"); + netdev_warn(priv->netdev, + "Chains on tunnel devices isn't supported without register loopback support"); + return -EOPNOTSUPP; + } + + if (!flow->attr->chain) { + err = mlx5e_tc_tun_parse(filter_dev, priv, spec, f, + match_level); + if (err) { + NL_SET_ERR_MSG_MOD(extack, + "Failed to parse tunnel attributes"); + netdev_warn(priv->netdev, + "Failed to parse tunnel attributes"); + return err; + } + + /* With mpls over udp we decapsulate using packet reformat + * object + */ + if (!netif_is_bareudp(filter_dev)) + flow->attr->action |= MLX5_FLOW_CONTEXT_ACTION_DECAP; + err = mlx5e_tc_set_attr_rx_tun(flow, spec); + if (err) + return err; + } else if (tunnel) { + struct mlx5_flow_spec *tmp_spec; + + tmp_spec = kvzalloc(sizeof(*tmp_spec), GFP_KERNEL); + if (!tmp_spec) { + NL_SET_ERR_MSG_MOD(extack, "Failed to allocate memory for tunnel tmp spec"); + netdev_warn(priv->netdev, "Failed to allocate memory for tunnel tmp spec"); + return -ENOMEM; + } + memcpy(tmp_spec, spec, sizeof(*tmp_spec)); + + err = mlx5e_tc_tun_parse(filter_dev, priv, tmp_spec, f, match_level); + if (err) { + kvfree(tmp_spec); + NL_SET_ERR_MSG_MOD(extack, "Failed to parse tunnel attributes"); + netdev_warn(priv->netdev, "Failed to parse tunnel attributes"); + return err; + } + err = mlx5e_tc_set_attr_rx_tun(flow, tmp_spec); + kvfree(tmp_spec); + if (err) + return err; + } + + if (!needs_mapping && !sets_mapping) + return 0; + + return mlx5e_get_flow_tunnel_id(priv, flow, f, filter_dev); +} + +static void *get_match_inner_headers_criteria(struct mlx5_flow_spec *spec) +{ + return MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + inner_headers); +} + +static void *get_match_inner_headers_value(struct mlx5_flow_spec *spec) +{ + return MLX5_ADDR_OF(fte_match_param, spec->match_value, + inner_headers); +} + +static void *get_match_outer_headers_criteria(struct mlx5_flow_spec *spec) +{ + return MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + outer_headers); +} + +static void *get_match_outer_headers_value(struct mlx5_flow_spec *spec) +{ + return MLX5_ADDR_OF(fte_match_param, spec->match_value, + outer_headers); +} + +void *mlx5e_get_match_headers_value(u32 flags, struct mlx5_flow_spec *spec) +{ + return (flags & MLX5_FLOW_CONTEXT_ACTION_DECAP) ? + get_match_inner_headers_value(spec) : + get_match_outer_headers_value(spec); +} + +void *mlx5e_get_match_headers_criteria(u32 flags, struct mlx5_flow_spec *spec) +{ + return (flags & MLX5_FLOW_CONTEXT_ACTION_DECAP) ? + get_match_inner_headers_criteria(spec) : + get_match_outer_headers_criteria(spec); +} + +static int mlx5e_flower_parse_meta(struct net_device *filter_dev, + struct flow_cls_offload *f) +{ + struct flow_rule *rule = flow_cls_offload_flow_rule(f); + struct netlink_ext_ack *extack = f->common.extack; + struct net_device *ingress_dev; + struct flow_match_meta match; + + if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_META)) + return 0; + + flow_rule_match_meta(rule, &match); + if (!match.mask->ingress_ifindex) + return 0; + + if (match.mask->ingress_ifindex != 0xFFFFFFFF) { + NL_SET_ERR_MSG_MOD(extack, "Unsupported ingress ifindex mask"); + return -EOPNOTSUPP; + } + + ingress_dev = __dev_get_by_index(dev_net(filter_dev), + match.key->ingress_ifindex); + if (!ingress_dev) { + NL_SET_ERR_MSG_MOD(extack, + "Can't find the ingress port to match on"); + return -ENOENT; + } + + if (ingress_dev != filter_dev) { + NL_SET_ERR_MSG_MOD(extack, + "Can't match on the ingress filter port"); + return -EOPNOTSUPP; + } + + return 0; +} + +static bool skip_key_basic(struct net_device *filter_dev, + struct flow_cls_offload *f) +{ + /* When doing mpls over udp decap, the user needs to provide + * MPLS_UC as the protocol in order to be able to match on mpls + * label fields. However, the actual ethertype is IP so we want to + * avoid matching on this, otherwise we'll fail the match. + */ + if (netif_is_bareudp(filter_dev) && f->common.chain_index == 0) + return true; + + return false; +} + +static int __parse_cls_flower(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, + struct mlx5_flow_spec *spec, + struct flow_cls_offload *f, + struct net_device *filter_dev, + u8 *inner_match_level, u8 *outer_match_level) +{ + struct netlink_ext_ack *extack = f->common.extack; + void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + outer_headers); + void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, + outer_headers); + void *misc_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + misc_parameters); + void *misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, + misc_parameters); + void *misc_c_3 = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + misc_parameters_3); + void *misc_v_3 = MLX5_ADDR_OF(fte_match_param, spec->match_value, + misc_parameters_3); + struct flow_rule *rule = flow_cls_offload_flow_rule(f); + struct flow_dissector *dissector = rule->match.dissector; + enum fs_flow_table_type fs_type; + bool match_inner_ecn = true; + u16 addr_type = 0; + u8 ip_proto = 0; + u8 *match_level; + int err; + + fs_type = mlx5e_is_eswitch_flow(flow) ? FS_FT_FDB : FS_FT_NIC_RX; + match_level = outer_match_level; + + if (dissector->used_keys & + ~(BIT(FLOW_DISSECTOR_KEY_META) | + BIT(FLOW_DISSECTOR_KEY_CONTROL) | + BIT(FLOW_DISSECTOR_KEY_BASIC) | + BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS) | + BIT(FLOW_DISSECTOR_KEY_VLAN) | + BIT(FLOW_DISSECTOR_KEY_CVLAN) | + BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS) | + BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS) | + BIT(FLOW_DISSECTOR_KEY_PORTS) | + BIT(FLOW_DISSECTOR_KEY_ENC_KEYID) | + BIT(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) | + BIT(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS) | + BIT(FLOW_DISSECTOR_KEY_ENC_PORTS) | + BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL) | + BIT(FLOW_DISSECTOR_KEY_TCP) | + BIT(FLOW_DISSECTOR_KEY_IP) | + BIT(FLOW_DISSECTOR_KEY_CT) | + BIT(FLOW_DISSECTOR_KEY_ENC_IP) | + BIT(FLOW_DISSECTOR_KEY_ENC_OPTS) | + BIT(FLOW_DISSECTOR_KEY_ICMP) | + BIT(FLOW_DISSECTOR_KEY_MPLS))) { + NL_SET_ERR_MSG_MOD(extack, "Unsupported key"); + netdev_dbg(priv->netdev, "Unsupported key used: 0x%x\n", + dissector->used_keys); + return -EOPNOTSUPP; + } + + if (mlx5e_get_tc_tun(filter_dev)) { + bool match_inner = false; + + err = parse_tunnel_attr(priv, flow, spec, f, filter_dev, + outer_match_level, &match_inner); + if (err) + return err; + + if (match_inner) { + /* header pointers should point to the inner headers + * if the packet was decapsulated already. + * outer headers are set by parse_tunnel_attr. + */ + match_level = inner_match_level; + headers_c = get_match_inner_headers_criteria(spec); + headers_v = get_match_inner_headers_value(spec); + } + + err = mlx5e_tc_verify_tunnel_ecn(priv, f, &match_inner_ecn); + if (err) + return err; + } + + err = mlx5e_flower_parse_meta(filter_dev, f); + if (err) + return err; + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC) && + !skip_key_basic(filter_dev, f)) { + struct flow_match_basic match; + + flow_rule_match_basic(rule, &match); + mlx5e_tc_set_ethertype(priv->mdev, &match, + match_level == outer_match_level, + headers_c, headers_v); + + if (match.mask->n_proto) + *match_level = MLX5_MATCH_L2; + } + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN) || + is_vlan_dev(filter_dev)) { + struct flow_dissector_key_vlan filter_dev_mask; + struct flow_dissector_key_vlan filter_dev_key; + struct flow_match_vlan match; + + if (is_vlan_dev(filter_dev)) { + match.key = &filter_dev_key; + match.key->vlan_id = vlan_dev_vlan_id(filter_dev); + match.key->vlan_tpid = vlan_dev_vlan_proto(filter_dev); + match.key->vlan_priority = 0; + match.mask = &filter_dev_mask; + memset(match.mask, 0xff, sizeof(*match.mask)); + match.mask->vlan_priority = 0; + } else { + flow_rule_match_vlan(rule, &match); + } + if (match.mask->vlan_id || + match.mask->vlan_priority || + match.mask->vlan_tpid) { + if (match.key->vlan_tpid == htons(ETH_P_8021AD)) { + MLX5_SET(fte_match_set_lyr_2_4, headers_c, + svlan_tag, 1); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, + svlan_tag, 1); + } else { + MLX5_SET(fte_match_set_lyr_2_4, headers_c, + cvlan_tag, 1); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, + cvlan_tag, 1); + } + + MLX5_SET(fte_match_set_lyr_2_4, headers_c, first_vid, + match.mask->vlan_id); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_vid, + match.key->vlan_id); + + MLX5_SET(fte_match_set_lyr_2_4, headers_c, first_prio, + match.mask->vlan_priority); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_prio, + match.key->vlan_priority); + + *match_level = MLX5_MATCH_L2; + + if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CVLAN) && + match.mask->vlan_eth_type && + MLX5_CAP_FLOWTABLE_TYPE(priv->mdev, + ft_field_support.outer_second_vid, + fs_type)) { + MLX5_SET(fte_match_set_misc, misc_c, + outer_second_cvlan_tag, 1); + spec->match_criteria_enable |= + MLX5_MATCH_MISC_PARAMETERS; + } + } + } else if (*match_level != MLX5_MATCH_NONE) { + /* cvlan_tag enabled in match criteria and + * disabled in match value means both S & C tags + * don't exist (untagged of both) + */ + MLX5_SET(fte_match_set_lyr_2_4, headers_c, cvlan_tag, 1); + *match_level = MLX5_MATCH_L2; + } + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CVLAN)) { + struct flow_match_vlan match; + + flow_rule_match_cvlan(rule, &match); + if (match.mask->vlan_id || + match.mask->vlan_priority || + match.mask->vlan_tpid) { + if (!MLX5_CAP_FLOWTABLE_TYPE(priv->mdev, ft_field_support.outer_second_vid, + fs_type)) { + NL_SET_ERR_MSG_MOD(extack, + "Matching on CVLAN is not supported"); + return -EOPNOTSUPP; + } + + if (match.key->vlan_tpid == htons(ETH_P_8021AD)) { + MLX5_SET(fte_match_set_misc, misc_c, + outer_second_svlan_tag, 1); + MLX5_SET(fte_match_set_misc, misc_v, + outer_second_svlan_tag, 1); + } else { + MLX5_SET(fte_match_set_misc, misc_c, + outer_second_cvlan_tag, 1); + MLX5_SET(fte_match_set_misc, misc_v, + outer_second_cvlan_tag, 1); + } + + MLX5_SET(fte_match_set_misc, misc_c, outer_second_vid, + match.mask->vlan_id); + MLX5_SET(fte_match_set_misc, misc_v, outer_second_vid, + match.key->vlan_id); + MLX5_SET(fte_match_set_misc, misc_c, outer_second_prio, + match.mask->vlan_priority); + MLX5_SET(fte_match_set_misc, misc_v, outer_second_prio, + match.key->vlan_priority); + + *match_level = MLX5_MATCH_L2; + spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS; + } + } + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ETH_ADDRS)) { + struct flow_match_eth_addrs match; + + flow_rule_match_eth_addrs(rule, &match); + ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, + dmac_47_16), + match.mask->dst); + ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, + dmac_47_16), + match.key->dst); + + ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, + smac_47_16), + match.mask->src); + ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, + smac_47_16), + match.key->src); + + if (!is_zero_ether_addr(match.mask->src) || + !is_zero_ether_addr(match.mask->dst)) + *match_level = MLX5_MATCH_L2; + } + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) { + struct flow_match_control match; + + flow_rule_match_control(rule, &match); + addr_type = match.key->addr_type; + + /* the HW doesn't support frag first/later */ + if (match.mask->flags & FLOW_DIS_FIRST_FRAG) { + NL_SET_ERR_MSG_MOD(extack, "Match on frag first/later is not supported"); + return -EOPNOTSUPP; + } + + if (match.mask->flags & FLOW_DIS_IS_FRAGMENT) { + MLX5_SET(fte_match_set_lyr_2_4, headers_c, frag, 1); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, frag, + match.key->flags & FLOW_DIS_IS_FRAGMENT); + + /* the HW doesn't need L3 inline to match on frag=no */ + if (!(match.key->flags & FLOW_DIS_IS_FRAGMENT)) + *match_level = MLX5_MATCH_L2; + /* *** L2 attributes parsing up to here *** */ + else + *match_level = MLX5_MATCH_L3; + } + } + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) { + struct flow_match_basic match; + + flow_rule_match_basic(rule, &match); + ip_proto = match.key->ip_proto; + + MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol, + match.mask->ip_proto); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol, + match.key->ip_proto); + + if (match.mask->ip_proto) + *match_level = MLX5_MATCH_L3; + } + + if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) { + struct flow_match_ipv4_addrs match; + + flow_rule_match_ipv4_addrs(rule, &match); + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, + src_ipv4_src_ipv6.ipv4_layout.ipv4), + &match.mask->src, sizeof(match.mask->src)); + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, + src_ipv4_src_ipv6.ipv4_layout.ipv4), + &match.key->src, sizeof(match.key->src)); + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, + dst_ipv4_dst_ipv6.ipv4_layout.ipv4), + &match.mask->dst, sizeof(match.mask->dst)); + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, + dst_ipv4_dst_ipv6.ipv4_layout.ipv4), + &match.key->dst, sizeof(match.key->dst)); + + if (match.mask->src || match.mask->dst) + *match_level = MLX5_MATCH_L3; + } + + if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) { + struct flow_match_ipv6_addrs match; + + flow_rule_match_ipv6_addrs(rule, &match); + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, + src_ipv4_src_ipv6.ipv6_layout.ipv6), + &match.mask->src, sizeof(match.mask->src)); + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, + src_ipv4_src_ipv6.ipv6_layout.ipv6), + &match.key->src, sizeof(match.key->src)); + + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, + dst_ipv4_dst_ipv6.ipv6_layout.ipv6), + &match.mask->dst, sizeof(match.mask->dst)); + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, + dst_ipv4_dst_ipv6.ipv6_layout.ipv6), + &match.key->dst, sizeof(match.key->dst)); + + if (ipv6_addr_type(&match.mask->src) != IPV6_ADDR_ANY || + ipv6_addr_type(&match.mask->dst) != IPV6_ADDR_ANY) + *match_level = MLX5_MATCH_L3; + } + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IP)) { + struct flow_match_ip match; + + flow_rule_match_ip(rule, &match); + if (match_inner_ecn) { + MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_ecn, + match.mask->tos & 0x3); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_ecn, + match.key->tos & 0x3); + } + + MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_dscp, + match.mask->tos >> 2); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_dscp, + match.key->tos >> 2); + + MLX5_SET(fte_match_set_lyr_2_4, headers_c, ttl_hoplimit, + match.mask->ttl); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ttl_hoplimit, + match.key->ttl); + + if (match.mask->ttl && + !MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev, + ft_field_support.outer_ipv4_ttl)) { + NL_SET_ERR_MSG_MOD(extack, + "Matching on TTL is not supported"); + return -EOPNOTSUPP; + } + + if (match.mask->tos || match.mask->ttl) + *match_level = MLX5_MATCH_L3; + } + + /* *** L3 attributes parsing up to here *** */ + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) { + struct flow_match_ports match; + + flow_rule_match_ports(rule, &match); + switch (ip_proto) { + case IPPROTO_TCP: + MLX5_SET(fte_match_set_lyr_2_4, headers_c, + tcp_sport, ntohs(match.mask->src)); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, + tcp_sport, ntohs(match.key->src)); + + MLX5_SET(fte_match_set_lyr_2_4, headers_c, + tcp_dport, ntohs(match.mask->dst)); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, + tcp_dport, ntohs(match.key->dst)); + break; + + case IPPROTO_UDP: + MLX5_SET(fte_match_set_lyr_2_4, headers_c, + udp_sport, ntohs(match.mask->src)); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, + udp_sport, ntohs(match.key->src)); + + MLX5_SET(fte_match_set_lyr_2_4, headers_c, + udp_dport, ntohs(match.mask->dst)); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, + udp_dport, ntohs(match.key->dst)); + break; + default: + NL_SET_ERR_MSG_MOD(extack, + "Only UDP and TCP transports are supported for L4 matching"); + netdev_err(priv->netdev, + "Only UDP and TCP transport are supported\n"); + return -EINVAL; + } + + if (match.mask->src || match.mask->dst) + *match_level = MLX5_MATCH_L4; + } + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_TCP)) { + struct flow_match_tcp match; + + flow_rule_match_tcp(rule, &match); + MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_flags, + ntohs(match.mask->flags)); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_flags, + ntohs(match.key->flags)); + + if (match.mask->flags) + *match_level = MLX5_MATCH_L4; + } + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ICMP)) { + struct flow_match_icmp match; + + flow_rule_match_icmp(rule, &match); + switch (ip_proto) { + case IPPROTO_ICMP: + if (!(MLX5_CAP_GEN(priv->mdev, flex_parser_protocols) & + MLX5_FLEX_PROTO_ICMP)) { + NL_SET_ERR_MSG_MOD(extack, + "Match on Flex protocols for ICMP is not supported"); + return -EOPNOTSUPP; + } + MLX5_SET(fte_match_set_misc3, misc_c_3, icmp_type, + match.mask->type); + MLX5_SET(fte_match_set_misc3, misc_v_3, icmp_type, + match.key->type); + MLX5_SET(fte_match_set_misc3, misc_c_3, icmp_code, + match.mask->code); + MLX5_SET(fte_match_set_misc3, misc_v_3, icmp_code, + match.key->code); + break; + case IPPROTO_ICMPV6: + if (!(MLX5_CAP_GEN(priv->mdev, flex_parser_protocols) & + MLX5_FLEX_PROTO_ICMPV6)) { + NL_SET_ERR_MSG_MOD(extack, + "Match on Flex protocols for ICMPV6 is not supported"); + return -EOPNOTSUPP; + } + MLX5_SET(fte_match_set_misc3, misc_c_3, icmpv6_type, + match.mask->type); + MLX5_SET(fte_match_set_misc3, misc_v_3, icmpv6_type, + match.key->type); + MLX5_SET(fte_match_set_misc3, misc_c_3, icmpv6_code, + match.mask->code); + MLX5_SET(fte_match_set_misc3, misc_v_3, icmpv6_code, + match.key->code); + break; + default: + NL_SET_ERR_MSG_MOD(extack, + "Code and type matching only with ICMP and ICMPv6"); + netdev_err(priv->netdev, + "Code and type matching only with ICMP and ICMPv6\n"); + return -EINVAL; + } + if (match.mask->code || match.mask->type) { + *match_level = MLX5_MATCH_L4; + spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_3; + } + } + /* Currently supported only for MPLS over UDP */ + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_MPLS) && + !netif_is_bareudp(filter_dev)) { + NL_SET_ERR_MSG_MOD(extack, + "Matching on MPLS is supported only for MPLS over UDP"); + netdev_err(priv->netdev, + "Matching on MPLS is supported only for MPLS over UDP\n"); + return -EOPNOTSUPP; + } + + return 0; +} + +static int parse_cls_flower(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, + struct mlx5_flow_spec *spec, + struct flow_cls_offload *f, + struct net_device *filter_dev) +{ + u8 inner_match_level, outer_match_level, non_tunnel_match_level; + struct netlink_ext_ack *extack = f->common.extack; + struct mlx5_core_dev *dev = priv->mdev; + struct mlx5_eswitch *esw = dev->priv.eswitch; + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5_eswitch_rep *rep; + bool is_eswitch_flow; + int err; + + inner_match_level = MLX5_MATCH_NONE; + outer_match_level = MLX5_MATCH_NONE; + + err = __parse_cls_flower(priv, flow, spec, f, filter_dev, + &inner_match_level, &outer_match_level); + non_tunnel_match_level = (inner_match_level == MLX5_MATCH_NONE) ? + outer_match_level : inner_match_level; + + is_eswitch_flow = mlx5e_is_eswitch_flow(flow); + if (!err && is_eswitch_flow) { + rep = rpriv->rep; + if (rep->vport != MLX5_VPORT_UPLINK && + (esw->offloads.inline_mode != MLX5_INLINE_MODE_NONE && + esw->offloads.inline_mode < non_tunnel_match_level)) { + NL_SET_ERR_MSG_MOD(extack, + "Flow is not offloaded due to min inline setting"); + netdev_warn(priv->netdev, + "Flow is not offloaded due to min inline setting, required %d actual %d\n", + non_tunnel_match_level, esw->offloads.inline_mode); + return -EOPNOTSUPP; + } + } + + flow->attr->inner_match_level = inner_match_level; + flow->attr->outer_match_level = outer_match_level; + + + return err; +} + +struct mlx5_fields { + u8 field; + u8 field_bsize; + u32 field_mask; + u32 offset; + u32 match_offset; +}; + +#define OFFLOAD(fw_field, field_bsize, field_mask, field, off, match_field) \ + {MLX5_ACTION_IN_FIELD_OUT_ ## fw_field, field_bsize, field_mask, \ + offsetof(struct pedit_headers, field) + (off), \ + MLX5_BYTE_OFF(fte_match_set_lyr_2_4, match_field)} + +/* masked values are the same and there are no rewrites that do not have a + * match. + */ +#define SAME_VAL_MASK(type, valp, maskp, matchvalp, matchmaskp) ({ \ + type matchmaskx = *(type *)(matchmaskp); \ + type matchvalx = *(type *)(matchvalp); \ + type maskx = *(type *)(maskp); \ + type valx = *(type *)(valp); \ + \ + (valx & maskx) == (matchvalx & matchmaskx) && !(maskx & (maskx ^ \ + matchmaskx)); \ +}) + +static bool cmp_val_mask(void *valp, void *maskp, void *matchvalp, + void *matchmaskp, u8 bsize) +{ + bool same = false; + + switch (bsize) { + case 8: + same = SAME_VAL_MASK(u8, valp, maskp, matchvalp, matchmaskp); + break; + case 16: + same = SAME_VAL_MASK(u16, valp, maskp, matchvalp, matchmaskp); + break; + case 32: + same = SAME_VAL_MASK(u32, valp, maskp, matchvalp, matchmaskp); + break; + } + + return same; +} + +static struct mlx5_fields fields[] = { + OFFLOAD(DMAC_47_16, 32, U32_MAX, eth.h_dest[0], 0, dmac_47_16), + OFFLOAD(DMAC_15_0, 16, U16_MAX, eth.h_dest[4], 0, dmac_15_0), + OFFLOAD(SMAC_47_16, 32, U32_MAX, eth.h_source[0], 0, smac_47_16), + OFFLOAD(SMAC_15_0, 16, U16_MAX, eth.h_source[4], 0, smac_15_0), + OFFLOAD(ETHERTYPE, 16, U16_MAX, eth.h_proto, 0, ethertype), + OFFLOAD(FIRST_VID, 16, U16_MAX, vlan.h_vlan_TCI, 0, first_vid), + + OFFLOAD(IP_DSCP, 8, 0xfc, ip4.tos, 0, ip_dscp), + OFFLOAD(IP_TTL, 8, U8_MAX, ip4.ttl, 0, ttl_hoplimit), + OFFLOAD(SIPV4, 32, U32_MAX, ip4.saddr, 0, src_ipv4_src_ipv6.ipv4_layout.ipv4), + OFFLOAD(DIPV4, 32, U32_MAX, ip4.daddr, 0, dst_ipv4_dst_ipv6.ipv4_layout.ipv4), + + OFFLOAD(SIPV6_127_96, 32, U32_MAX, ip6.saddr.s6_addr32[0], 0, + src_ipv4_src_ipv6.ipv6_layout.ipv6[0]), + OFFLOAD(SIPV6_95_64, 32, U32_MAX, ip6.saddr.s6_addr32[1], 0, + src_ipv4_src_ipv6.ipv6_layout.ipv6[4]), + OFFLOAD(SIPV6_63_32, 32, U32_MAX, ip6.saddr.s6_addr32[2], 0, + src_ipv4_src_ipv6.ipv6_layout.ipv6[8]), + OFFLOAD(SIPV6_31_0, 32, U32_MAX, ip6.saddr.s6_addr32[3], 0, + src_ipv4_src_ipv6.ipv6_layout.ipv6[12]), + OFFLOAD(DIPV6_127_96, 32, U32_MAX, ip6.daddr.s6_addr32[0], 0, + dst_ipv4_dst_ipv6.ipv6_layout.ipv6[0]), + OFFLOAD(DIPV6_95_64, 32, U32_MAX, ip6.daddr.s6_addr32[1], 0, + dst_ipv4_dst_ipv6.ipv6_layout.ipv6[4]), + OFFLOAD(DIPV6_63_32, 32, U32_MAX, ip6.daddr.s6_addr32[2], 0, + dst_ipv4_dst_ipv6.ipv6_layout.ipv6[8]), + OFFLOAD(DIPV6_31_0, 32, U32_MAX, ip6.daddr.s6_addr32[3], 0, + dst_ipv4_dst_ipv6.ipv6_layout.ipv6[12]), + OFFLOAD(IPV6_HOPLIMIT, 8, U8_MAX, ip6.hop_limit, 0, ttl_hoplimit), + OFFLOAD(IP_DSCP, 16, 0x0fc0, ip6, 0, ip_dscp), + + OFFLOAD(TCP_SPORT, 16, U16_MAX, tcp.source, 0, tcp_sport), + OFFLOAD(TCP_DPORT, 16, U16_MAX, tcp.dest, 0, tcp_dport), + /* in linux iphdr tcp_flags is 8 bits long */ + OFFLOAD(TCP_FLAGS, 8, U8_MAX, tcp.ack_seq, 5, tcp_flags), + + OFFLOAD(UDP_SPORT, 16, U16_MAX, udp.source, 0, udp_sport), + OFFLOAD(UDP_DPORT, 16, U16_MAX, udp.dest, 0, udp_dport), +}; + +static u32 mask_field_get(void *mask, struct mlx5_fields *f) +{ + switch (f->field_bsize) { + case 32: + return be32_to_cpu(*(__be32 *)mask) & f->field_mask; + case 16: + return be16_to_cpu(*(__be16 *)mask) & (u16)f->field_mask; + default: + return *(u8 *)mask & (u8)f->field_mask; + } +} + +static void mask_field_clear(void *mask, struct mlx5_fields *f) +{ + switch (f->field_bsize) { + case 32: + *(__be32 *)mask &= ~cpu_to_be32(f->field_mask); + break; + case 16: + *(__be16 *)mask &= ~cpu_to_be16((u16)f->field_mask); + break; + default: + *(u8 *)mask &= ~(u8)f->field_mask; + break; + } +} + +static int offload_pedit_fields(struct mlx5e_priv *priv, + int namespace, + struct mlx5e_tc_flow_parse_attr *parse_attr, + u32 *action_flags, + struct netlink_ext_ack *extack) +{ + struct pedit_headers *set_masks, *add_masks, *set_vals, *add_vals; + struct pedit_headers_action *hdrs = parse_attr->hdrs; + void *headers_c, *headers_v, *action, *vals_p; + struct mlx5e_tc_mod_hdr_acts *mod_acts; + void *s_masks_p, *a_masks_p; + int i, first, last, next_z; + struct mlx5_fields *f; + unsigned long mask; + u32 s_mask, a_mask; + u8 cmd; + + mod_acts = &parse_attr->mod_hdr_acts; + headers_c = mlx5e_get_match_headers_criteria(*action_flags, &parse_attr->spec); + headers_v = mlx5e_get_match_headers_value(*action_flags, &parse_attr->spec); + + set_masks = &hdrs[0].masks; + add_masks = &hdrs[1].masks; + set_vals = &hdrs[0].vals; + add_vals = &hdrs[1].vals; + + for (i = 0; i < ARRAY_SIZE(fields); i++) { + bool skip; + + f = &fields[i]; + s_masks_p = (void *)set_masks + f->offset; + a_masks_p = (void *)add_masks + f->offset; + + s_mask = mask_field_get(s_masks_p, f); + a_mask = mask_field_get(a_masks_p, f); + + if (!s_mask && !a_mask) /* nothing to offload here */ + continue; + + if (s_mask && a_mask) { + NL_SET_ERR_MSG_MOD(extack, + "can't set and add to the same HW field"); + netdev_warn(priv->netdev, + "mlx5: can't set and add to the same HW field (%x)\n", + f->field); + return -EOPNOTSUPP; + } + + skip = false; + if (s_mask) { + void *match_mask = headers_c + f->match_offset; + void *match_val = headers_v + f->match_offset; + + cmd = MLX5_ACTION_TYPE_SET; + mask = s_mask; + vals_p = (void *)set_vals + f->offset; + /* don't rewrite if we have a match on the same value */ + if (cmp_val_mask(vals_p, s_masks_p, match_val, + match_mask, f->field_bsize)) + skip = true; + /* clear to denote we consumed this field */ + mask_field_clear(s_masks_p, f); + } else { + cmd = MLX5_ACTION_TYPE_ADD; + mask = a_mask; + vals_p = (void *)add_vals + f->offset; + /* add 0 is no change */ + if (!mask_field_get(vals_p, f)) + skip = true; + /* clear to denote we consumed this field */ + mask_field_clear(a_masks_p, f); + } + if (skip) + continue; + + first = find_first_bit(&mask, f->field_bsize); + next_z = find_next_zero_bit(&mask, f->field_bsize, first); + last = find_last_bit(&mask, f->field_bsize); + if (first < next_z && next_z < last) { + NL_SET_ERR_MSG_MOD(extack, + "rewrite of few sub-fields isn't supported"); + netdev_warn(priv->netdev, + "mlx5: rewrite of few sub-fields (mask %lx) isn't offloaded\n", + mask); + return -EOPNOTSUPP; + } + + action = mlx5e_mod_hdr_alloc(priv->mdev, namespace, mod_acts); + if (IS_ERR(action)) { + NL_SET_ERR_MSG_MOD(extack, + "too many pedit actions, can't offload"); + mlx5_core_warn(priv->mdev, + "mlx5: parsed %d pedit actions, can't do more\n", + mod_acts->num_actions); + return PTR_ERR(action); + } + + MLX5_SET(set_action_in, action, action_type, cmd); + MLX5_SET(set_action_in, action, field, f->field); + + if (cmd == MLX5_ACTION_TYPE_SET) { + unsigned long field_mask = f->field_mask; + int start; + + /* if field is bit sized it can start not from first bit */ + start = find_first_bit(&field_mask, f->field_bsize); + + MLX5_SET(set_action_in, action, offset, first - start); + /* length is num of bits to be written, zero means length of 32 */ + MLX5_SET(set_action_in, action, length, (last - first + 1)); + } + + if (f->field_bsize == 32) + MLX5_SET(set_action_in, action, data, ntohl(*(__be32 *)vals_p) >> first); + else if (f->field_bsize == 16) + MLX5_SET(set_action_in, action, data, ntohs(*(__be16 *)vals_p) >> first); + else if (f->field_bsize == 8) + MLX5_SET(set_action_in, action, data, *(u8 *)vals_p >> first); + + ++mod_acts->num_actions; + } + + return 0; +} + +static const struct pedit_headers zero_masks = {}; + +static int verify_offload_pedit_fields(struct mlx5e_priv *priv, + struct mlx5e_tc_flow_parse_attr *parse_attr, + struct netlink_ext_ack *extack) +{ + struct pedit_headers *cmd_masks; + u8 cmd; + + for (cmd = 0; cmd < __PEDIT_CMD_MAX; cmd++) { + cmd_masks = &parse_attr->hdrs[cmd].masks; + if (memcmp(cmd_masks, &zero_masks, sizeof(zero_masks))) { + NL_SET_ERR_MSG_MOD(extack, "attempt to offload an unsupported field"); + netdev_warn(priv->netdev, "attempt to offload an unsupported field (cmd %d)\n", cmd); + print_hex_dump(KERN_WARNING, "mask: ", DUMP_PREFIX_ADDRESS, + 16, 1, cmd_masks, sizeof(zero_masks), true); + return -EOPNOTSUPP; + } + } + + return 0; +} + +static int alloc_tc_pedit_action(struct mlx5e_priv *priv, int namespace, + struct mlx5e_tc_flow_parse_attr *parse_attr, + u32 *action_flags, + struct netlink_ext_ack *extack) +{ + int err; + + err = offload_pedit_fields(priv, namespace, parse_attr, action_flags, extack); + if (err) + goto out_dealloc_parsed_actions; + + err = verify_offload_pedit_fields(priv, parse_attr, extack); + if (err) + goto out_dealloc_parsed_actions; + + return 0; + +out_dealloc_parsed_actions: + mlx5e_mod_hdr_dealloc(&parse_attr->mod_hdr_acts); + return err; +} + +struct ip_ttl_word { + __u8 ttl; + __u8 protocol; + __sum16 check; +}; + +struct ipv6_hoplimit_word { + __be16 payload_len; + __u8 nexthdr; + __u8 hop_limit; +}; + +static bool +is_action_keys_supported(const struct flow_action_entry *act, bool ct_flow, + bool *modify_ip_header, bool *modify_tuple, + struct netlink_ext_ack *extack) +{ + u32 mask, offset; + u8 htype; + + htype = act->mangle.htype; + offset = act->mangle.offset; + mask = ~act->mangle.mask; + /* For IPv4 & IPv6 header check 4 byte word, + * to determine that modified fields + * are NOT ttl & hop_limit only. + */ + if (htype == FLOW_ACT_MANGLE_HDR_TYPE_IP4) { + struct ip_ttl_word *ttl_word = + (struct ip_ttl_word *)&mask; + + if (offset != offsetof(struct iphdr, ttl) || + ttl_word->protocol || + ttl_word->check) { + *modify_ip_header = true; + } + + if (offset >= offsetof(struct iphdr, saddr)) + *modify_tuple = true; + + if (ct_flow && *modify_tuple) { + NL_SET_ERR_MSG_MOD(extack, + "can't offload re-write of ipv4 address with action ct"); + return false; + } + } else if (htype == FLOW_ACT_MANGLE_HDR_TYPE_IP6) { + struct ipv6_hoplimit_word *hoplimit_word = + (struct ipv6_hoplimit_word *)&mask; + + if (offset != offsetof(struct ipv6hdr, payload_len) || + hoplimit_word->payload_len || + hoplimit_word->nexthdr) { + *modify_ip_header = true; + } + + if (ct_flow && offset >= offsetof(struct ipv6hdr, saddr)) + *modify_tuple = true; + + if (ct_flow && *modify_tuple) { + NL_SET_ERR_MSG_MOD(extack, + "can't offload re-write of ipv6 address with action ct"); + return false; + } + } else if (htype == FLOW_ACT_MANGLE_HDR_TYPE_TCP || + htype == FLOW_ACT_MANGLE_HDR_TYPE_UDP) { + *modify_tuple = true; + if (ct_flow) { + NL_SET_ERR_MSG_MOD(extack, + "can't offload re-write of transport header ports with action ct"); + return false; + } + } + + return true; +} + +static bool modify_tuple_supported(bool modify_tuple, bool ct_clear, + bool ct_flow, struct netlink_ext_ack *extack, + struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec) +{ + if (!modify_tuple || ct_clear) + return true; + + if (ct_flow) { + NL_SET_ERR_MSG_MOD(extack, + "can't offload tuple modification with non-clear ct()"); + netdev_info(priv->netdev, + "can't offload tuple modification with non-clear ct()"); + return false; + } + + /* Add ct_state=-trk match so it will be offloaded for non ct flows + * (or after clear action), as otherwise, since the tuple is changed, + * we can't restore ct state + */ + if (mlx5_tc_ct_add_no_trk_match(spec)) { + NL_SET_ERR_MSG_MOD(extack, + "can't offload tuple modification with ct matches and no ct(clear) action"); + netdev_info(priv->netdev, + "can't offload tuple modification with ct matches and no ct(clear) action"); + return false; + } + + return true; +} + +static bool modify_header_match_supported(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct flow_action *flow_action, + u32 actions, bool ct_flow, + bool ct_clear, + struct netlink_ext_ack *extack) +{ + const struct flow_action_entry *act; + bool modify_ip_header, modify_tuple; + void *headers_c; + void *headers_v; + u16 ethertype; + u8 ip_proto; + int i; + + headers_c = mlx5e_get_match_headers_criteria(actions, spec); + headers_v = mlx5e_get_match_headers_value(actions, spec); + ethertype = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ethertype); + + /* for non-IP we only re-write MACs, so we're okay */ + if (MLX5_GET(fte_match_set_lyr_2_4, headers_c, ip_version) == 0 && + ethertype != ETH_P_IP && ethertype != ETH_P_IPV6) + goto out_ok; + + modify_ip_header = false; + modify_tuple = false; + flow_action_for_each(i, act, flow_action) { + if (act->id != FLOW_ACTION_MANGLE && + act->id != FLOW_ACTION_ADD) + continue; + + if (!is_action_keys_supported(act, ct_flow, + &modify_ip_header, + &modify_tuple, extack)) + return false; + } + + if (!modify_tuple_supported(modify_tuple, ct_clear, ct_flow, extack, + priv, spec)) + return false; + + ip_proto = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ip_protocol); + if (modify_ip_header && ip_proto != IPPROTO_TCP && + ip_proto != IPPROTO_UDP && ip_proto != IPPROTO_ICMP) { + NL_SET_ERR_MSG_MOD(extack, + "can't offload re-write of non TCP/UDP"); + netdev_info(priv->netdev, "can't offload re-write of ip proto %d\n", + ip_proto); + return false; + } + +out_ok: + return true; +} + +static bool +actions_match_supported_fdb(struct mlx5e_priv *priv, + struct mlx5e_tc_flow_parse_attr *parse_attr, + struct mlx5e_tc_flow *flow, + struct netlink_ext_ack *extack) +{ + struct mlx5_esw_flow_attr *esw_attr = flow->attr->esw_attr; + bool ct_flow, ct_clear; + + ct_clear = flow->attr->ct_attr.ct_action & TCA_CT_ACT_CLEAR; + ct_flow = flow_flag_test(flow, CT) && !ct_clear; + + if (esw_attr->split_count && ct_flow && + !MLX5_CAP_GEN(esw_attr->in_mdev, reg_c_preserve)) { + /* All registers used by ct are cleared when using + * split rules. + */ + NL_SET_ERR_MSG_MOD(extack, "Can't offload mirroring with action ct"); + return false; + } + + if (esw_attr->split_count > 0 && !mlx5_esw_has_fwd_fdb(priv->mdev)) { + NL_SET_ERR_MSG_MOD(extack, + "current firmware doesn't support split rule for port mirroring"); + netdev_warn_once(priv->netdev, + "current firmware doesn't support split rule for port mirroring\n"); + return false; + } + + return true; +} + +static bool +actions_match_supported(struct mlx5e_priv *priv, + struct flow_action *flow_action, + u32 actions, + struct mlx5e_tc_flow_parse_attr *parse_attr, + struct mlx5e_tc_flow *flow, + struct netlink_ext_ack *extack) +{ + bool ct_flow, ct_clear; + + ct_clear = flow->attr->ct_attr.ct_action & TCA_CT_ACT_CLEAR; + ct_flow = flow_flag_test(flow, CT) && !ct_clear; + + if (!(actions & + (MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_DROP))) { + NL_SET_ERR_MSG_MOD(extack, "Rule must have at least one forward/drop action"); + return false; + } + + if (!(~actions & + (MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_DROP))) { + NL_SET_ERR_MSG_MOD(extack, "Rule cannot support forward+drop action"); + return false; + } + + if (actions & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR && + actions & MLX5_FLOW_CONTEXT_ACTION_DROP) { + NL_SET_ERR_MSG_MOD(extack, "Drop with modify header action is not supported"); + return false; + } + + if (!(~actions & + (MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_DROP))) { + NL_SET_ERR_MSG_MOD(extack, "Rule cannot support forward+drop action"); + return false; + } + + if (actions & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR && + actions & MLX5_FLOW_CONTEXT_ACTION_DROP) { + NL_SET_ERR_MSG_MOD(extack, "Drop with modify header action is not supported"); + return false; + } + + if (actions & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR && + !modify_header_match_supported(priv, &parse_attr->spec, flow_action, + actions, ct_flow, ct_clear, extack)) + return false; + + if (mlx5e_is_eswitch_flow(flow) && + !actions_match_supported_fdb(priv, parse_attr, flow, extack)) + return false; + + return true; +} + +static bool same_port_devs(struct mlx5e_priv *priv, struct mlx5e_priv *peer_priv) +{ + return priv->mdev == peer_priv->mdev; +} + +bool mlx5e_same_hw_devs(struct mlx5e_priv *priv, struct mlx5e_priv *peer_priv) +{ + struct mlx5_core_dev *fmdev, *pmdev; + u64 fsystem_guid, psystem_guid; + + fmdev = priv->mdev; + pmdev = peer_priv->mdev; + + fsystem_guid = mlx5_query_nic_system_image_guid(fmdev); + psystem_guid = mlx5_query_nic_system_image_guid(pmdev); + + return (fsystem_guid == psystem_guid); +} + +static int +actions_prepare_mod_hdr_actions(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, + struct mlx5_flow_attr *attr, + struct netlink_ext_ack *extack) +{ + struct mlx5e_tc_flow_parse_attr *parse_attr = attr->parse_attr; + struct pedit_headers_action *hdrs = parse_attr->hdrs; + enum mlx5_flow_namespace_type ns_type; + int err; + + if (!hdrs[TCA_PEDIT_KEY_EX_CMD_SET].pedits && + !hdrs[TCA_PEDIT_KEY_EX_CMD_ADD].pedits) + return 0; + + ns_type = mlx5e_get_flow_namespace(flow); + + err = alloc_tc_pedit_action(priv, ns_type, parse_attr, &attr->action, extack); + if (err) + return err; + + if (parse_attr->mod_hdr_acts.num_actions > 0) + return 0; + + /* In case all pedit actions are skipped, remove the MOD_HDR flag. */ + attr->action &= ~MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; + mlx5e_mod_hdr_dealloc(&parse_attr->mod_hdr_acts); + + if (ns_type != MLX5_FLOW_NAMESPACE_FDB) + return 0; + + if (!((attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP) || + (attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH))) + attr->esw_attr->split_count = 0; + + return 0; +} + +static struct mlx5_flow_attr* +mlx5e_clone_flow_attr_for_post_act(struct mlx5_flow_attr *attr, + enum mlx5_flow_namespace_type ns_type) +{ + struct mlx5e_tc_flow_parse_attr *parse_attr; + u32 attr_sz = ns_to_attr_sz(ns_type); + struct mlx5_flow_attr *attr2; + + attr2 = mlx5_alloc_flow_attr(ns_type); + parse_attr = kvzalloc(sizeof(*parse_attr), GFP_KERNEL); + if (!attr2 || !parse_attr) { + kvfree(parse_attr); + kfree(attr2); + return NULL; + } + + memcpy(attr2, attr, attr_sz); + INIT_LIST_HEAD(&attr2->list); + parse_attr->filter_dev = attr->parse_attr->filter_dev; + attr2->action = 0; + attr2->flags = 0; + attr2->parse_attr = parse_attr; + attr2->dest_chain = 0; + attr2->dest_ft = NULL; + + if (ns_type == MLX5_FLOW_NAMESPACE_FDB) { + attr2->esw_attr->out_count = 0; + attr2->esw_attr->split_count = 0; + } + + return attr2; +} + +static struct mlx5_core_dev * +get_flow_counter_dev(struct mlx5e_tc_flow *flow) +{ + return mlx5e_is_eswitch_flow(flow) ? flow->attr->esw_attr->counter_dev : flow->priv->mdev; +} + +struct mlx5_flow_attr * +mlx5e_tc_get_encap_attr(struct mlx5e_tc_flow *flow) +{ + struct mlx5_esw_flow_attr *esw_attr; + struct mlx5_flow_attr *attr; + int i; + + list_for_each_entry(attr, &flow->attrs, list) { + esw_attr = attr->esw_attr; + for (i = 0; i < MLX5_MAX_FLOW_FWD_VPORTS; i++) { + if (esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP) + return attr; + } + } + + return NULL; +} + +void +mlx5e_tc_unoffload_flow_post_acts(struct mlx5e_tc_flow *flow) +{ + struct mlx5e_post_act *post_act = get_post_action(flow->priv); + struct mlx5_flow_attr *attr; + + list_for_each_entry(attr, &flow->attrs, list) { + if (list_is_last(&attr->list, &flow->attrs)) + break; + + mlx5e_tc_post_act_unoffload(post_act, attr->post_act_handle); + } +} + +static void +free_flow_post_acts(struct mlx5e_tc_flow *flow) +{ + struct mlx5_core_dev *counter_dev = get_flow_counter_dev(flow); + struct mlx5e_post_act *post_act = get_post_action(flow->priv); + struct mlx5_flow_attr *attr, *tmp; + bool vf_tun; + + list_for_each_entry_safe(attr, tmp, &flow->attrs, list) { + if (list_is_last(&attr->list, &flow->attrs)) + break; + + if (attr->post_act_handle) + mlx5e_tc_post_act_del(post_act, attr->post_act_handle); + + clean_encap_dests(flow->priv, flow, attr, &vf_tun); + + if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) + mlx5_fc_destroy(counter_dev, attr->counter); + + if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) { + mlx5e_mod_hdr_dealloc(&attr->parse_attr->mod_hdr_acts); + if (attr->modify_hdr) + mlx5_modify_header_dealloc(flow->priv->mdev, attr->modify_hdr); + } + + list_del(&attr->list); + kvfree(attr->parse_attr); + kfree(attr); + } +} + +int +mlx5e_tc_offload_flow_post_acts(struct mlx5e_tc_flow *flow) +{ + struct mlx5e_post_act *post_act = get_post_action(flow->priv); + struct mlx5_flow_attr *attr; + int err = 0; + + list_for_each_entry(attr, &flow->attrs, list) { + if (list_is_last(&attr->list, &flow->attrs)) + break; + + err = mlx5e_tc_post_act_offload(post_act, attr->post_act_handle); + if (err) + break; + } + + return err; +} + +/* TC filter rule HW translation: + * + * +---------------------+ + * + ft prio (tc chain) + + * + original match + + * +---------------------+ + * | + * | if multi table action + * | + * v + * +---------------------+ + * + post act ft |<----. + * + match fte id | | split on multi table action + * + do actions |-----' + * +---------------------+ + * | + * | + * v + * Do rest of the actions after last multi table action. + */ +static int +alloc_flow_post_acts(struct mlx5e_tc_flow *flow, struct netlink_ext_ack *extack) +{ + struct mlx5e_post_act *post_act = get_post_action(flow->priv); + struct mlx5_flow_attr *attr, *next_attr = NULL; + struct mlx5e_post_act_handle *handle; + bool vf_tun; + int err; + + /* This is going in reverse order as needed. + * The first entry is the last attribute. + */ + list_for_each_entry(attr, &flow->attrs, list) { + if (!next_attr) { + /* Set counter action on last post act rule. */ + attr->action |= MLX5_FLOW_CONTEXT_ACTION_COUNT; + } else { + err = mlx5e_tc_act_set_next_post_act(flow, attr, next_attr); + if (err) + goto out_free; + } + + /* Don't add post_act rule for first attr (last in the list). + * It's being handled by the caller. + */ + if (list_is_last(&attr->list, &flow->attrs)) + break; + + err = set_encap_dests(flow->priv, flow, attr, extack, &vf_tun); + if (err) + goto out_free; + + err = actions_prepare_mod_hdr_actions(flow->priv, flow, attr, extack); + if (err) + goto out_free; + + if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) { + err = mlx5e_tc_add_flow_mod_hdr(flow->priv, flow, attr); + if (err) + goto out_free; + } + + if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { + err = alloc_flow_attr_counter(get_flow_counter_dev(flow), attr); + if (err) + goto out_free; + } + + handle = mlx5e_tc_post_act_add(post_act, attr); + if (IS_ERR(handle)) { + err = PTR_ERR(handle); + goto out_free; + } + + attr->post_act_handle = handle; + next_attr = attr; + } + + if (flow_flag_test(flow, SLOW)) + goto out; + + err = mlx5e_tc_offload_flow_post_acts(flow); + if (err) + goto out_free; + +out: + return 0; + +out_free: + free_flow_post_acts(flow); + return err; +} + +static int +parse_tc_actions(struct mlx5e_tc_act_parse_state *parse_state, + struct flow_action *flow_action) +{ + struct netlink_ext_ack *extack = parse_state->extack; + struct mlx5e_tc_flow_action flow_action_reorder; + struct mlx5e_tc_flow *flow = parse_state->flow; + struct mlx5_flow_attr *attr = flow->attr; + enum mlx5_flow_namespace_type ns_type; + struct mlx5e_priv *priv = flow->priv; + struct flow_action_entry *act, **_act; + struct mlx5e_tc_act *tc_act; + int err, i; + + flow_action_reorder.num_entries = flow_action->num_entries; + flow_action_reorder.entries = kcalloc(flow_action->num_entries, + sizeof(flow_action), GFP_KERNEL); + if (!flow_action_reorder.entries) + return -ENOMEM; + + mlx5e_tc_act_reorder_flow_actions(flow_action, &flow_action_reorder); + + ns_type = mlx5e_get_flow_namespace(flow); + list_add(&attr->list, &flow->attrs); + + flow_action_for_each(i, _act, &flow_action_reorder) { + act = *_act; + tc_act = mlx5e_tc_act_get(act->id, ns_type); + if (!tc_act) { + NL_SET_ERR_MSG_MOD(extack, "Not implemented offload action"); + err = -EOPNOTSUPP; + goto out_free; + } + + if (!tc_act->can_offload(parse_state, act, i, attr)) { + err = -EOPNOTSUPP; + goto out_free; + } + + err = tc_act->parse_action(parse_state, act, priv, attr); + if (err) + goto out_free; + + parse_state->actions |= attr->action; + + /* Split attr for multi table act if not the last act. */ + if (tc_act->is_multi_table_act && + tc_act->is_multi_table_act(priv, act, attr) && + i < flow_action_reorder.num_entries - 1) { + err = mlx5e_tc_act_post_parse(parse_state, flow_action, attr, ns_type); + if (err) + goto out_free; + + attr = mlx5e_clone_flow_attr_for_post_act(flow->attr, ns_type); + if (!attr) { + err = -ENOMEM; + goto out_free; + } + + list_add(&attr->list, &flow->attrs); + } + } + + kfree(flow_action_reorder.entries); + + err = mlx5e_tc_act_post_parse(parse_state, flow_action, attr, ns_type); + if (err) + goto out_free_post_acts; + + err = alloc_flow_post_acts(flow, extack); + if (err) + goto out_free_post_acts; + + return 0; + +out_free: + kfree(flow_action_reorder.entries); +out_free_post_acts: + free_flow_post_acts(flow); + + return err; +} + +static int +flow_action_supported(struct flow_action *flow_action, + struct netlink_ext_ack *extack) +{ + if (!flow_action_has_entries(flow_action)) { + NL_SET_ERR_MSG_MOD(extack, "Flow action doesn't have any entries"); + return -EINVAL; + } + + if (!flow_action_hw_stats_check(flow_action, extack, + FLOW_ACTION_HW_STATS_DELAYED_BIT)) { + NL_SET_ERR_MSG_MOD(extack, "Flow action HW stats type is not supported"); + return -EOPNOTSUPP; + } + + return 0; +} + +static int +parse_tc_nic_actions(struct mlx5e_priv *priv, + struct flow_action *flow_action, + struct mlx5e_tc_flow *flow, + struct netlink_ext_ack *extack) +{ + struct mlx5e_tc_act_parse_state *parse_state; + struct mlx5e_tc_flow_parse_attr *parse_attr; + struct mlx5_flow_attr *attr = flow->attr; + int err; + + err = flow_action_supported(flow_action, extack); + if (err) + return err; + + attr->nic_attr->flow_tag = MLX5_FS_DEFAULT_FLOW_TAG; + parse_attr = attr->parse_attr; + parse_state = &parse_attr->parse_state; + mlx5e_tc_act_init_parse_state(parse_state, flow, flow_action, extack); + parse_state->ct_priv = get_ct_priv(priv); + + err = parse_tc_actions(parse_state, flow_action); + if (err) + return err; + + err = actions_prepare_mod_hdr_actions(priv, flow, attr, extack); + if (err) + return err; + + if (!actions_match_supported(priv, flow_action, parse_state->actions, + parse_attr, flow, extack)) + return -EOPNOTSUPP; + + return 0; +} + +static bool is_merged_eswitch_vfs(struct mlx5e_priv *priv, + struct net_device *peer_netdev) +{ + struct mlx5e_priv *peer_priv; + + peer_priv = netdev_priv(peer_netdev); + + return (MLX5_CAP_ESW(priv->mdev, merged_eswitch) && + mlx5e_eswitch_vf_rep(priv->netdev) && + mlx5e_eswitch_vf_rep(peer_netdev) && + mlx5e_same_hw_devs(priv, peer_priv)); +} + +static bool same_hw_reps(struct mlx5e_priv *priv, + struct net_device *peer_netdev) +{ + struct mlx5e_priv *peer_priv; + + peer_priv = netdev_priv(peer_netdev); + + return mlx5e_eswitch_rep(priv->netdev) && + mlx5e_eswitch_rep(peer_netdev) && + mlx5e_same_hw_devs(priv, peer_priv); +} + +static bool is_lag_dev(struct mlx5e_priv *priv, + struct net_device *peer_netdev) +{ + return ((mlx5_lag_is_sriov(priv->mdev) || + mlx5_lag_is_multipath(priv->mdev)) && + same_hw_reps(priv, peer_netdev)); +} + +static bool is_multiport_eligible(struct mlx5e_priv *priv, struct net_device *out_dev) +{ + if (same_hw_reps(priv, out_dev) && + MLX5_CAP_PORT_SELECTION(priv->mdev, port_select_flow_table) && + MLX5_CAP_GEN(priv->mdev, create_lag_when_not_master_up)) + return true; + + return false; +} + +bool mlx5e_is_valid_eswitch_fwd_dev(struct mlx5e_priv *priv, + struct net_device *out_dev) +{ + if (is_merged_eswitch_vfs(priv, out_dev)) + return true; + + if (is_multiport_eligible(priv, out_dev)) + return true; + + if (is_lag_dev(priv, out_dev)) + return true; + + return mlx5e_eswitch_rep(out_dev) && + same_port_devs(priv, netdev_priv(out_dev)); +} + +int mlx5e_set_fwd_to_int_port_actions(struct mlx5e_priv *priv, + struct mlx5_flow_attr *attr, + int ifindex, + enum mlx5e_tc_int_port_type type, + u32 *action, + int out_index) +{ + struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; + struct mlx5e_tc_int_port_priv *int_port_priv; + struct mlx5e_tc_flow_parse_attr *parse_attr; + struct mlx5e_tc_int_port *dest_int_port; + int err; + + parse_attr = attr->parse_attr; + int_port_priv = mlx5e_get_int_port_priv(priv); + + dest_int_port = mlx5e_tc_int_port_get(int_port_priv, ifindex, type); + if (IS_ERR(dest_int_port)) + return PTR_ERR(dest_int_port); + + err = mlx5e_tc_match_to_reg_set(priv->mdev, &parse_attr->mod_hdr_acts, + MLX5_FLOW_NAMESPACE_FDB, VPORT_TO_REG, + mlx5e_tc_int_port_get_metadata(dest_int_port)); + if (err) { + mlx5e_tc_int_port_put(int_port_priv, dest_int_port); + return err; + } + + *action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; + + esw_attr->dest_int_port = dest_int_port; + esw_attr->dests[out_index].flags |= MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE; + esw_attr->split_count = out_index; + + /* Forward to root fdb for matching against the new source vport */ + attr->dest_chain = 0; + + return 0; +} + +static int +parse_tc_fdb_actions(struct mlx5e_priv *priv, + struct flow_action *flow_action, + struct mlx5e_tc_flow *flow, + struct netlink_ext_ack *extack) +{ + struct mlx5e_tc_act_parse_state *parse_state; + struct mlx5e_tc_flow_parse_attr *parse_attr; + struct mlx5_flow_attr *attr = flow->attr; + struct mlx5_esw_flow_attr *esw_attr; + struct net_device *filter_dev; + int err; + + err = flow_action_supported(flow_action, extack); + if (err) + return err; + + esw_attr = attr->esw_attr; + parse_attr = attr->parse_attr; + filter_dev = parse_attr->filter_dev; + parse_state = &parse_attr->parse_state; + mlx5e_tc_act_init_parse_state(parse_state, flow, flow_action, extack); + parse_state->ct_priv = get_ct_priv(priv); + + err = parse_tc_actions(parse_state, flow_action); + if (err) + return err; + + /* Forward to/from internal port can only have 1 dest */ + if ((netif_is_ovs_master(filter_dev) || esw_attr->dest_int_port) && + esw_attr->out_count > 1) { + NL_SET_ERR_MSG_MOD(extack, + "Rules with internal port can have only one destination"); + return -EOPNOTSUPP; + } + + /* Forward from tunnel/internal port to internal port is not supported */ + if ((mlx5e_get_tc_tun(filter_dev) || netif_is_ovs_master(filter_dev)) && + esw_attr->dest_int_port) { + NL_SET_ERR_MSG_MOD(extack, + "Forwarding from tunnel/internal port to internal port is not supported"); + return -EOPNOTSUPP; + } + + err = actions_prepare_mod_hdr_actions(priv, flow, attr, extack); + if (err) + return err; + + if (!actions_match_supported(priv, flow_action, parse_state->actions, + parse_attr, flow, extack)) + return -EOPNOTSUPP; + + return 0; +} + +static void get_flags(int flags, unsigned long *flow_flags) +{ + unsigned long __flow_flags = 0; + + if (flags & MLX5_TC_FLAG(INGRESS)) + __flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_INGRESS); + if (flags & MLX5_TC_FLAG(EGRESS)) + __flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_EGRESS); + + if (flags & MLX5_TC_FLAG(ESW_OFFLOAD)) + __flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_ESWITCH); + if (flags & MLX5_TC_FLAG(NIC_OFFLOAD)) + __flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_NIC); + if (flags & MLX5_TC_FLAG(FT_OFFLOAD)) + __flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_FT); + + *flow_flags = __flow_flags; +} + +static const struct rhashtable_params tc_ht_params = { + .head_offset = offsetof(struct mlx5e_tc_flow, node), + .key_offset = offsetof(struct mlx5e_tc_flow, cookie), + .key_len = sizeof(((struct mlx5e_tc_flow *)0)->cookie), + .automatic_shrinking = true, +}; + +static struct rhashtable *get_tc_ht(struct mlx5e_priv *priv, + unsigned long flags) +{ + struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs); + struct mlx5e_rep_priv *rpriv; + + if (flags & MLX5_TC_FLAG(ESW_OFFLOAD)) { + rpriv = priv->ppriv; + return &rpriv->tc_ht; + } else /* NIC offload */ + return &tc->ht; +} + +static bool is_peer_flow_needed(struct mlx5e_tc_flow *flow) +{ + struct mlx5_esw_flow_attr *esw_attr = flow->attr->esw_attr; + struct mlx5_flow_attr *attr = flow->attr; + bool is_rep_ingress = esw_attr->in_rep->vport != MLX5_VPORT_UPLINK && + flow_flag_test(flow, INGRESS); + bool act_is_encap = !!(attr->action & + MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT); + bool esw_paired = mlx5_devcom_is_paired(esw_attr->in_mdev->priv.devcom, + MLX5_DEVCOM_ESW_OFFLOADS); + + if (!esw_paired) + return false; + + if ((mlx5_lag_is_sriov(esw_attr->in_mdev) || + mlx5_lag_is_multipath(esw_attr->in_mdev)) && + (is_rep_ingress || act_is_encap)) + return true; + + return false; +} + +struct mlx5_flow_attr * +mlx5_alloc_flow_attr(enum mlx5_flow_namespace_type type) +{ + u32 ex_attr_size = (type == MLX5_FLOW_NAMESPACE_FDB) ? + sizeof(struct mlx5_esw_flow_attr) : + sizeof(struct mlx5_nic_flow_attr); + struct mlx5_flow_attr *attr; + + attr = kzalloc(sizeof(*attr) + ex_attr_size, GFP_KERNEL); + if (!attr) + return attr; + + INIT_LIST_HEAD(&attr->list); + return attr; +} + +static int +mlx5e_alloc_flow(struct mlx5e_priv *priv, int attr_size, + struct flow_cls_offload *f, unsigned long flow_flags, + struct mlx5e_tc_flow_parse_attr **__parse_attr, + struct mlx5e_tc_flow **__flow) +{ + struct mlx5e_tc_flow_parse_attr *parse_attr; + struct mlx5_flow_attr *attr; + struct mlx5e_tc_flow *flow; + int err = -ENOMEM; + int out_index; + + flow = kzalloc(sizeof(*flow), GFP_KERNEL); + parse_attr = kvzalloc(sizeof(*parse_attr), GFP_KERNEL); + if (!parse_attr || !flow) + goto err_free; + + flow->flags = flow_flags; + flow->cookie = f->cookie; + flow->priv = priv; + + attr = mlx5_alloc_flow_attr(mlx5e_get_flow_namespace(flow)); + if (!attr) + goto err_free; + + flow->attr = attr; + + for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) + INIT_LIST_HEAD(&flow->encaps[out_index].list); + INIT_LIST_HEAD(&flow->hairpin); + INIT_LIST_HEAD(&flow->l3_to_l2_reformat); + INIT_LIST_HEAD(&flow->attrs); + refcount_set(&flow->refcnt, 1); + init_completion(&flow->init_done); + init_completion(&flow->del_hw_done); + + *__flow = flow; + *__parse_attr = parse_attr; + + return 0; + +err_free: + kfree(flow); + kvfree(parse_attr); + return err; +} + +static void +mlx5e_flow_attr_init(struct mlx5_flow_attr *attr, + struct mlx5e_tc_flow_parse_attr *parse_attr, + struct flow_cls_offload *f) +{ + attr->parse_attr = parse_attr; + attr->chain = f->common.chain_index; + attr->prio = f->common.prio; +} + +static void +mlx5e_flow_esw_attr_init(struct mlx5_flow_attr *attr, + struct mlx5e_priv *priv, + struct mlx5e_tc_flow_parse_attr *parse_attr, + struct flow_cls_offload *f, + struct mlx5_eswitch_rep *in_rep, + struct mlx5_core_dev *in_mdev) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; + + mlx5e_flow_attr_init(attr, parse_attr, f); + + esw_attr->in_rep = in_rep; + esw_attr->in_mdev = in_mdev; + + if (MLX5_CAP_ESW(esw->dev, counter_eswitch_affinity) == + MLX5_COUNTER_SOURCE_ESWITCH) + esw_attr->counter_dev = in_mdev; + else + esw_attr->counter_dev = priv->mdev; +} + +static struct mlx5e_tc_flow * +__mlx5e_add_fdb_flow(struct mlx5e_priv *priv, + struct flow_cls_offload *f, + unsigned long flow_flags, + struct net_device *filter_dev, + struct mlx5_eswitch_rep *in_rep, + struct mlx5_core_dev *in_mdev) +{ + struct flow_rule *rule = flow_cls_offload_flow_rule(f); + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct netlink_ext_ack *extack = f->common.extack; + struct mlx5e_tc_flow_parse_attr *parse_attr; + struct mlx5e_tc_flow *flow; + int attr_size, err; + + flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_ESWITCH); + attr_size = sizeof(struct mlx5_esw_flow_attr); + err = mlx5e_alloc_flow(priv, attr_size, f, flow_flags, + &parse_attr, &flow); + if (err) + goto out; + + parse_attr->filter_dev = filter_dev; + mlx5e_flow_esw_attr_init(flow->attr, + priv, parse_attr, + f, in_rep, in_mdev); + + err = parse_cls_flower(flow->priv, flow, &parse_attr->spec, + f, filter_dev); + if (err) + goto err_free; + + /* actions validation depends on parsing the ct matches first */ + err = mlx5_tc_ct_match_add(get_ct_priv(priv), &parse_attr->spec, f, + &flow->attr->ct_attr, extack); + if (err) + goto err_free; + + err = parse_tc_fdb_actions(priv, &rule->action, flow, extack); + if (err) + goto err_free; + + if (flow->attr->lag.count) { + err = mlx5_lag_add_mpesw_rule(esw->dev); + if (err) + goto err_free; + } + + err = mlx5e_tc_add_fdb_flow(priv, flow, extack); + complete_all(&flow->init_done); + if (err) { + if (!(err == -ENETUNREACH && mlx5_lag_is_multipath(in_mdev))) + goto err_lag; + + add_unready_flow(flow); + } + + return flow; + +err_lag: + if (flow->attr->lag.count) + mlx5_lag_del_mpesw_rule(esw->dev); +err_free: + mlx5e_flow_put(priv, flow); +out: + return ERR_PTR(err); +} + +static int mlx5e_tc_add_fdb_peer_flow(struct flow_cls_offload *f, + struct mlx5e_tc_flow *flow, + unsigned long flow_flags) +{ + struct mlx5e_priv *priv = flow->priv, *peer_priv; + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch, *peer_esw; + struct mlx5_esw_flow_attr *attr = flow->attr->esw_attr; + struct mlx5_devcom *devcom = priv->mdev->priv.devcom; + struct mlx5e_tc_flow_parse_attr *parse_attr; + struct mlx5e_rep_priv *peer_urpriv; + struct mlx5e_tc_flow *peer_flow; + struct mlx5_core_dev *in_mdev; + int err = 0; + + peer_esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS); + if (!peer_esw) + return -ENODEV; + + peer_urpriv = mlx5_eswitch_get_uplink_priv(peer_esw, REP_ETH); + peer_priv = netdev_priv(peer_urpriv->netdev); + + /* in_mdev is assigned of which the packet originated from. + * So packets redirected to uplink use the same mdev of the + * original flow and packets redirected from uplink use the + * peer mdev. + */ + if (attr->in_rep->vport == MLX5_VPORT_UPLINK) + in_mdev = peer_priv->mdev; + else + in_mdev = priv->mdev; + + parse_attr = flow->attr->parse_attr; + peer_flow = __mlx5e_add_fdb_flow(peer_priv, f, flow_flags, + parse_attr->filter_dev, + attr->in_rep, in_mdev); + if (IS_ERR(peer_flow)) { + err = PTR_ERR(peer_flow); + goto out; + } + + flow->peer_flow = peer_flow; + flow_flag_set(flow, DUP); + mutex_lock(&esw->offloads.peer_mutex); + list_add_tail(&flow->peer, &esw->offloads.peer_flows); + mutex_unlock(&esw->offloads.peer_mutex); + +out: + mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS); + return err; +} + +static int +mlx5e_add_fdb_flow(struct mlx5e_priv *priv, + struct flow_cls_offload *f, + unsigned long flow_flags, + struct net_device *filter_dev, + struct mlx5e_tc_flow **__flow) +{ + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5_eswitch_rep *in_rep = rpriv->rep; + struct mlx5_core_dev *in_mdev = priv->mdev; + struct mlx5e_tc_flow *flow; + int err; + + flow = __mlx5e_add_fdb_flow(priv, f, flow_flags, filter_dev, in_rep, + in_mdev); + if (IS_ERR(flow)) + return PTR_ERR(flow); + + if (is_peer_flow_needed(flow)) { + err = mlx5e_tc_add_fdb_peer_flow(f, flow, flow_flags); + if (err) { + mlx5e_tc_del_fdb_flow(priv, flow); + goto out; + } + } + + *__flow = flow; + + return 0; + +out: + return err; +} + +static int +mlx5e_add_nic_flow(struct mlx5e_priv *priv, + struct flow_cls_offload *f, + unsigned long flow_flags, + struct net_device *filter_dev, + struct mlx5e_tc_flow **__flow) +{ + struct flow_rule *rule = flow_cls_offload_flow_rule(f); + struct netlink_ext_ack *extack = f->common.extack; + struct mlx5e_tc_flow_parse_attr *parse_attr; + struct mlx5e_tc_flow *flow; + int attr_size, err; + + if (!MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ignore_flow_level)) { + if (!tc_cls_can_offload_and_chain0(priv->netdev, &f->common)) + return -EOPNOTSUPP; + } else if (!tc_can_offload_extack(priv->netdev, f->common.extack)) { + return -EOPNOTSUPP; + } + + flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_NIC); + attr_size = sizeof(struct mlx5_nic_flow_attr); + err = mlx5e_alloc_flow(priv, attr_size, f, flow_flags, + &parse_attr, &flow); + if (err) + goto out; + + parse_attr->filter_dev = filter_dev; + mlx5e_flow_attr_init(flow->attr, parse_attr, f); + + err = parse_cls_flower(flow->priv, flow, &parse_attr->spec, + f, filter_dev); + if (err) + goto err_free; + + err = mlx5_tc_ct_match_add(get_ct_priv(priv), &parse_attr->spec, f, + &flow->attr->ct_attr, extack); + if (err) + goto err_free; + + err = parse_tc_nic_actions(priv, &rule->action, flow, extack); + if (err) + goto err_free; + + err = mlx5e_tc_add_nic_flow(priv, flow, extack); + if (err) + goto err_free; + + flow_flag_set(flow, OFFLOADED); + *__flow = flow; + + return 0; + +err_free: + flow_flag_set(flow, FAILED); + mlx5e_mod_hdr_dealloc(&parse_attr->mod_hdr_acts); + mlx5e_flow_put(priv, flow); +out: + return err; +} + +static int +mlx5e_tc_add_flow(struct mlx5e_priv *priv, + struct flow_cls_offload *f, + unsigned long flags, + struct net_device *filter_dev, + struct mlx5e_tc_flow **flow) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + unsigned long flow_flags; + int err; + + get_flags(flags, &flow_flags); + + if (!tc_can_offload_extack(priv->netdev, f->common.extack)) + return -EOPNOTSUPP; + + if (esw && esw->mode == MLX5_ESWITCH_OFFLOADS) + err = mlx5e_add_fdb_flow(priv, f, flow_flags, + filter_dev, flow); + else + err = mlx5e_add_nic_flow(priv, f, flow_flags, + filter_dev, flow); + + return err; +} + +static bool is_flow_rule_duplicate_allowed(struct net_device *dev, + struct mlx5e_rep_priv *rpriv) +{ + /* Offloaded flow rule is allowed to duplicate on non-uplink representor + * sharing tc block with other slaves of a lag device. Rpriv can be NULL if this + * function is called from NIC mode. + */ + return netif_is_lag_port(dev) && rpriv && rpriv->rep->vport != MLX5_VPORT_UPLINK; +} + +int mlx5e_configure_flower(struct net_device *dev, struct mlx5e_priv *priv, + struct flow_cls_offload *f, unsigned long flags) +{ + struct netlink_ext_ack *extack = f->common.extack; + struct rhashtable *tc_ht = get_tc_ht(priv, flags); + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5e_tc_flow *flow; + int err = 0; + + if (!mlx5_esw_hold(priv->mdev)) + return -EBUSY; + + mlx5_esw_get(priv->mdev); + + rcu_read_lock(); + flow = rhashtable_lookup(tc_ht, &f->cookie, tc_ht_params); + if (flow) { + /* Same flow rule offloaded to non-uplink representor sharing tc block, + * just return 0. + */ + if (is_flow_rule_duplicate_allowed(dev, rpriv) && flow->orig_dev != dev) + goto rcu_unlock; + + NL_SET_ERR_MSG_MOD(extack, + "flow cookie already exists, ignoring"); + netdev_warn_once(priv->netdev, + "flow cookie %lx already exists, ignoring\n", + f->cookie); + err = -EEXIST; + goto rcu_unlock; + } +rcu_unlock: + rcu_read_unlock(); + if (flow) + goto out; + + trace_mlx5e_configure_flower(f); + err = mlx5e_tc_add_flow(priv, f, flags, dev, &flow); + if (err) + goto out; + + /* Flow rule offloaded to non-uplink representor sharing tc block, + * set the flow's owner dev. + */ + if (is_flow_rule_duplicate_allowed(dev, rpriv)) + flow->orig_dev = dev; + + err = rhashtable_lookup_insert_fast(tc_ht, &flow->node, tc_ht_params); + if (err) + goto err_free; + + mlx5_esw_release(priv->mdev); + return 0; + +err_free: + mlx5e_flow_put(priv, flow); +out: + mlx5_esw_put(priv->mdev); + mlx5_esw_release(priv->mdev); + return err; +} + +static bool same_flow_direction(struct mlx5e_tc_flow *flow, int flags) +{ + bool dir_ingress = !!(flags & MLX5_TC_FLAG(INGRESS)); + bool dir_egress = !!(flags & MLX5_TC_FLAG(EGRESS)); + + return flow_flag_test(flow, INGRESS) == dir_ingress && + flow_flag_test(flow, EGRESS) == dir_egress; +} + +int mlx5e_delete_flower(struct net_device *dev, struct mlx5e_priv *priv, + struct flow_cls_offload *f, unsigned long flags) +{ + struct rhashtable *tc_ht = get_tc_ht(priv, flags); + struct mlx5e_tc_flow *flow; + int err; + + rcu_read_lock(); + flow = rhashtable_lookup(tc_ht, &f->cookie, tc_ht_params); + if (!flow || !same_flow_direction(flow, flags)) { + err = -EINVAL; + goto errout; + } + + /* Only delete the flow if it doesn't have MLX5E_TC_FLOW_DELETED flag + * set. + */ + if (flow_flag_test_and_set(flow, DELETED)) { + err = -EINVAL; + goto errout; + } + rhashtable_remove_fast(tc_ht, &flow->node, tc_ht_params); + rcu_read_unlock(); + + trace_mlx5e_delete_flower(f); + mlx5e_flow_put(priv, flow); + + mlx5_esw_put(priv->mdev); + return 0; + +errout: + rcu_read_unlock(); + return err; +} + +int mlx5e_stats_flower(struct net_device *dev, struct mlx5e_priv *priv, + struct flow_cls_offload *f, unsigned long flags) +{ + struct mlx5_devcom *devcom = priv->mdev->priv.devcom; + struct rhashtable *tc_ht = get_tc_ht(priv, flags); + struct mlx5_eswitch *peer_esw; + struct mlx5e_tc_flow *flow; + struct mlx5_fc *counter; + u64 lastuse = 0; + u64 packets = 0; + u64 bytes = 0; + int err = 0; + + rcu_read_lock(); + flow = mlx5e_flow_get(rhashtable_lookup(tc_ht, &f->cookie, + tc_ht_params)); + rcu_read_unlock(); + if (IS_ERR(flow)) + return PTR_ERR(flow); + + if (!same_flow_direction(flow, flags)) { + err = -EINVAL; + goto errout; + } + + if (mlx5e_is_offloaded_flow(flow) || flow_flag_test(flow, CT)) { + counter = mlx5e_tc_get_counter(flow); + if (!counter) + goto errout; + + mlx5_fc_query_cached(counter, &bytes, &packets, &lastuse); + } + + /* Under multipath it's possible for one rule to be currently + * un-offloaded while the other rule is offloaded. + */ + peer_esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS); + if (!peer_esw) + goto out; + + if (flow_flag_test(flow, DUP) && + flow_flag_test(flow->peer_flow, OFFLOADED)) { + u64 bytes2; + u64 packets2; + u64 lastuse2; + + counter = mlx5e_tc_get_counter(flow->peer_flow); + if (!counter) + goto no_peer_counter; + mlx5_fc_query_cached(counter, &bytes2, &packets2, &lastuse2); + + bytes += bytes2; + packets += packets2; + lastuse = max_t(u64, lastuse, lastuse2); + } + +no_peer_counter: + mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS); +out: + flow_stats_update(&f->stats, bytes, packets, 0, lastuse, + FLOW_ACTION_HW_STATS_DELAYED); + trace_mlx5e_stats_flower(f); +errout: + mlx5e_flow_put(priv, flow); + return err; +} + +static int apply_police_params(struct mlx5e_priv *priv, u64 rate, + struct netlink_ext_ack *extack) +{ + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5_eswitch *esw; + u32 rate_mbps = 0; + u16 vport_num; + int err; + + vport_num = rpriv->rep->vport; + if (vport_num >= MLX5_VPORT_ECPF) { + NL_SET_ERR_MSG_MOD(extack, + "Ingress rate limit is supported only for Eswitch ports connected to VFs"); + return -EOPNOTSUPP; + } + + esw = priv->mdev->priv.eswitch; + /* rate is given in bytes/sec. + * First convert to bits/sec and then round to the nearest mbit/secs. + * mbit means million bits. + * Moreover, if rate is non zero we choose to configure to a minimum of + * 1 mbit/sec. + */ + if (rate) { + rate = (rate * BITS_PER_BYTE) + 500000; + do_div(rate, 1000000); + rate_mbps = max_t(u32, rate, 1); + } + + err = mlx5_esw_qos_modify_vport_rate(esw, vport_num, rate_mbps); + if (err) + NL_SET_ERR_MSG_MOD(extack, "failed applying action to hardware"); + + return err; +} + +int mlx5e_policer_validate(const struct flow_action *action, + const struct flow_action_entry *act, + struct netlink_ext_ack *extack) +{ + if (act->police.exceed.act_id != FLOW_ACTION_DROP) { + NL_SET_ERR_MSG_MOD(extack, + "Offload not supported when exceed action is not drop"); + return -EOPNOTSUPP; + } + + if (act->police.notexceed.act_id == FLOW_ACTION_ACCEPT && + !flow_action_is_last_entry(action, act)) { + NL_SET_ERR_MSG_MOD(extack, + "Offload not supported when conform action is ok, but action is not last"); + return -EOPNOTSUPP; + } + + if (act->police.peakrate_bytes_ps || + act->police.avrate || act->police.overhead) { + NL_SET_ERR_MSG_MOD(extack, + "Offload not supported when peakrate/avrate/overhead is configured"); + return -EOPNOTSUPP; + } + + return 0; +} + +static int scan_tc_matchall_fdb_actions(struct mlx5e_priv *priv, + struct flow_action *flow_action, + struct netlink_ext_ack *extack) +{ + struct mlx5e_rep_priv *rpriv = priv->ppriv; + const struct flow_action_entry *act; + int err; + int i; + + if (!flow_action_has_entries(flow_action)) { + NL_SET_ERR_MSG_MOD(extack, "matchall called with no action"); + return -EINVAL; + } + + if (!flow_offload_has_one_action(flow_action)) { + NL_SET_ERR_MSG_MOD(extack, "matchall policing support only a single action"); + return -EOPNOTSUPP; + } + + if (!flow_action_basic_hw_stats_check(flow_action, extack)) { + NL_SET_ERR_MSG_MOD(extack, "Flow action HW stats type is not supported"); + return -EOPNOTSUPP; + } + + flow_action_for_each(i, act, flow_action) { + switch (act->id) { + case FLOW_ACTION_POLICE: + if (act->police.notexceed.act_id != FLOW_ACTION_CONTINUE) { + NL_SET_ERR_MSG_MOD(extack, + "Offload not supported when conform action is not continue"); + return -EOPNOTSUPP; + } + + err = mlx5e_policer_validate(flow_action, act, extack); + if (err) + return err; + + err = apply_police_params(priv, act->police.rate_bytes_ps, extack); + if (err) + return err; + + rpriv->prev_vf_vport_stats = priv->stats.vf_vport; + break; + default: + NL_SET_ERR_MSG_MOD(extack, "mlx5 supports only police action for matchall"); + return -EOPNOTSUPP; + } + } + + return 0; +} + +int mlx5e_tc_configure_matchall(struct mlx5e_priv *priv, + struct tc_cls_matchall_offload *ma) +{ + struct netlink_ext_ack *extack = ma->common.extack; + + if (ma->common.prio != 1) { + NL_SET_ERR_MSG_MOD(extack, "only priority 1 is supported"); + return -EINVAL; + } + + return scan_tc_matchall_fdb_actions(priv, &ma->rule->action, extack); +} + +int mlx5e_tc_delete_matchall(struct mlx5e_priv *priv, + struct tc_cls_matchall_offload *ma) +{ + struct netlink_ext_ack *extack = ma->common.extack; + + return apply_police_params(priv, 0, extack); +} + +void mlx5e_tc_stats_matchall(struct mlx5e_priv *priv, + struct tc_cls_matchall_offload *ma) +{ + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct rtnl_link_stats64 cur_stats; + u64 dbytes; + u64 dpkts; + + cur_stats = priv->stats.vf_vport; + dpkts = cur_stats.rx_packets - rpriv->prev_vf_vport_stats.rx_packets; + dbytes = cur_stats.rx_bytes - rpriv->prev_vf_vport_stats.rx_bytes; + rpriv->prev_vf_vport_stats = cur_stats; + flow_stats_update(&ma->stats, dbytes, dpkts, 0, jiffies, + FLOW_ACTION_HW_STATS_DELAYED); +} + +static void mlx5e_tc_hairpin_update_dead_peer(struct mlx5e_priv *priv, + struct mlx5e_priv *peer_priv) +{ + struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs); + struct mlx5_core_dev *peer_mdev = peer_priv->mdev; + struct mlx5e_hairpin_entry *hpe, *tmp; + LIST_HEAD(init_wait_list); + u16 peer_vhca_id; + int bkt; + + if (!mlx5e_same_hw_devs(priv, peer_priv)) + return; + + peer_vhca_id = MLX5_CAP_GEN(peer_mdev, vhca_id); + + mutex_lock(&tc->hairpin_tbl_lock); + hash_for_each(tc->hairpin_tbl, bkt, hpe, hairpin_hlist) + if (refcount_inc_not_zero(&hpe->refcnt)) + list_add(&hpe->dead_peer_wait_list, &init_wait_list); + mutex_unlock(&tc->hairpin_tbl_lock); + + list_for_each_entry_safe(hpe, tmp, &init_wait_list, dead_peer_wait_list) { + wait_for_completion(&hpe->res_ready); + if (!IS_ERR_OR_NULL(hpe->hp) && hpe->peer_vhca_id == peer_vhca_id) + mlx5_core_hairpin_clear_dead_peer(hpe->hp->pair); + + mlx5e_hairpin_put(priv, hpe); + } +} + +static int mlx5e_tc_netdev_event(struct notifier_block *this, + unsigned long event, void *ptr) +{ + struct net_device *ndev = netdev_notifier_info_to_dev(ptr); + struct mlx5e_priv *peer_priv; + struct mlx5e_tc_table *tc; + struct mlx5e_priv *priv; + + if (ndev->netdev_ops != &mlx5e_netdev_ops || + event != NETDEV_UNREGISTER || + ndev->reg_state == NETREG_REGISTERED) + return NOTIFY_DONE; + + tc = container_of(this, struct mlx5e_tc_table, netdevice_nb); + priv = tc->priv; + peer_priv = netdev_priv(ndev); + if (priv == peer_priv || + !(priv->netdev->features & NETIF_F_HW_TC)) + return NOTIFY_DONE; + + mlx5e_tc_hairpin_update_dead_peer(priv, peer_priv); + + return NOTIFY_DONE; +} + +static int mlx5e_tc_nic_get_ft_size(struct mlx5_core_dev *dev) +{ + int tc_grp_size, tc_tbl_size; + u32 max_flow_counter; + + max_flow_counter = (MLX5_CAP_GEN(dev, max_flow_counter_31_16) << 16) | + MLX5_CAP_GEN(dev, max_flow_counter_15_0); + + tc_grp_size = min_t(int, max_flow_counter, MLX5E_TC_TABLE_MAX_GROUP_SIZE); + + tc_tbl_size = min_t(int, tc_grp_size * MLX5E_TC_TABLE_NUM_GROUPS, + BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev, log_max_ft_size))); + + return tc_tbl_size; +} + +static int mlx5e_tc_nic_create_miss_table(struct mlx5e_priv *priv) +{ + struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs); + struct mlx5_flow_table **ft = &tc->miss_t; + struct mlx5_flow_table_attr ft_attr = {}; + struct mlx5_flow_namespace *ns; + int err = 0; + + ft_attr.max_fte = 1; + ft_attr.autogroup.max_num_groups = 1; + ft_attr.level = MLX5E_TC_MISS_LEVEL; + ft_attr.prio = 0; + ns = mlx5_get_flow_namespace(priv->mdev, MLX5_FLOW_NAMESPACE_KERNEL); + + *ft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr); + if (IS_ERR(*ft)) { + err = PTR_ERR(*ft); + netdev_err(priv->netdev, "failed to create tc nic miss table err=%d\n", err); + } + + return err; +} + +static void mlx5e_tc_nic_destroy_miss_table(struct mlx5e_priv *priv) +{ + struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs); + + mlx5_destroy_flow_table(tc->miss_t); +} + +int mlx5e_tc_nic_init(struct mlx5e_priv *priv) +{ + struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs); + struct mlx5_core_dev *dev = priv->mdev; + struct mapping_ctx *chains_mapping; + struct mlx5_chains_attr attr = {}; + u64 mapping_id; + int err; + + mlx5e_mod_hdr_tbl_init(&tc->mod_hdr); + mutex_init(&tc->t_lock); + mutex_init(&tc->hairpin_tbl_lock); + hash_init(tc->hairpin_tbl); + tc->priv = priv; + + err = rhashtable_init(&tc->ht, &tc_ht_params); + if (err) + return err; + + lockdep_set_class(&tc->ht.mutex, &tc_ht_lock_key); + lockdep_init_map(&tc->ht.run_work.lockdep_map, "tc_ht_wq_key", &tc_ht_wq_key, 0); + + mapping_id = mlx5_query_nic_system_image_guid(dev); + + chains_mapping = mapping_create_for_id(mapping_id, MAPPING_TYPE_CHAIN, + sizeof(struct mlx5_mapped_obj), + MLX5E_TC_TABLE_CHAIN_TAG_MASK, true); + + if (IS_ERR(chains_mapping)) { + err = PTR_ERR(chains_mapping); + goto err_mapping; + } + tc->mapping = chains_mapping; + + err = mlx5e_tc_nic_create_miss_table(priv); + if (err) + goto err_chains; + + if (MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ignore_flow_level)) + attr.flags = MLX5_CHAINS_AND_PRIOS_SUPPORTED | + MLX5_CHAINS_IGNORE_FLOW_LEVEL_SUPPORTED; + attr.ns = MLX5_FLOW_NAMESPACE_KERNEL; + attr.max_ft_sz = mlx5e_tc_nic_get_ft_size(dev); + attr.max_grp_num = MLX5E_TC_TABLE_NUM_GROUPS; + attr.default_ft = tc->miss_t; + attr.mapping = chains_mapping; + + tc->chains = mlx5_chains_create(dev, &attr); + if (IS_ERR(tc->chains)) { + err = PTR_ERR(tc->chains); + goto err_miss; + } + + tc->post_act = mlx5e_tc_post_act_init(priv, tc->chains, MLX5_FLOW_NAMESPACE_KERNEL); + tc->ct = mlx5_tc_ct_init(priv, tc->chains, &tc->mod_hdr, + MLX5_FLOW_NAMESPACE_KERNEL, tc->post_act); + + tc->netdevice_nb.notifier_call = mlx5e_tc_netdev_event; + err = register_netdevice_notifier_dev_net(priv->netdev, + &tc->netdevice_nb, + &tc->netdevice_nn); + if (err) { + tc->netdevice_nb.notifier_call = NULL; + mlx5_core_warn(priv->mdev, "Failed to register netdev notifier\n"); + goto err_reg; + } + + return 0; + +err_reg: + mlx5_tc_ct_clean(tc->ct); + mlx5e_tc_post_act_destroy(tc->post_act); + mlx5_chains_destroy(tc->chains); +err_miss: + mlx5e_tc_nic_destroy_miss_table(priv); +err_chains: + mapping_destroy(chains_mapping); +err_mapping: + rhashtable_destroy(&tc->ht); + return err; +} + +static void _mlx5e_tc_del_flow(void *ptr, void *arg) +{ + struct mlx5e_tc_flow *flow = ptr; + struct mlx5e_priv *priv = flow->priv; + + mlx5e_tc_del_flow(priv, flow); + kfree(flow); +} + +void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv) +{ + struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs); + + if (tc->netdevice_nb.notifier_call) + unregister_netdevice_notifier_dev_net(priv->netdev, + &tc->netdevice_nb, + &tc->netdevice_nn); + + mlx5e_mod_hdr_tbl_destroy(&tc->mod_hdr); + mutex_destroy(&tc->hairpin_tbl_lock); + + rhashtable_free_and_destroy(&tc->ht, _mlx5e_tc_del_flow, NULL); + + if (!IS_ERR_OR_NULL(tc->t)) { + mlx5_chains_put_table(tc->chains, 0, 1, MLX5E_TC_FT_LEVEL); + tc->t = NULL; + } + mutex_destroy(&tc->t_lock); + + mlx5_tc_ct_clean(tc->ct); + mlx5e_tc_post_act_destroy(tc->post_act); + mapping_destroy(tc->mapping); + mlx5_chains_destroy(tc->chains); + mlx5e_tc_nic_destroy_miss_table(priv); +} + +int mlx5e_tc_ht_init(struct rhashtable *tc_ht) +{ + int err; + + err = rhashtable_init(tc_ht, &tc_ht_params); + if (err) + return err; + + lockdep_set_class(&tc_ht->mutex, &tc_ht_lock_key); + lockdep_init_map(&tc_ht->run_work.lockdep_map, "tc_ht_wq_key", &tc_ht_wq_key, 0); + + return 0; +} + +void mlx5e_tc_ht_cleanup(struct rhashtable *tc_ht) +{ + rhashtable_free_and_destroy(tc_ht, _mlx5e_tc_del_flow, NULL); +} + +int mlx5e_tc_esw_init(struct mlx5_rep_uplink_priv *uplink_priv) +{ + const size_t sz_enc_opts = sizeof(struct tunnel_match_enc_opts); + struct mlx5e_rep_priv *rpriv; + struct mapping_ctx *mapping; + struct mlx5_eswitch *esw; + struct mlx5e_priv *priv; + u64 mapping_id; + int err = 0; + + rpriv = container_of(uplink_priv, struct mlx5e_rep_priv, uplink_priv); + priv = netdev_priv(rpriv->netdev); + esw = priv->mdev->priv.eswitch; + + uplink_priv->post_act = mlx5e_tc_post_act_init(priv, esw_chains(esw), + MLX5_FLOW_NAMESPACE_FDB); + uplink_priv->ct_priv = mlx5_tc_ct_init(netdev_priv(priv->netdev), + esw_chains(esw), + &esw->offloads.mod_hdr, + MLX5_FLOW_NAMESPACE_FDB, + uplink_priv->post_act); + + uplink_priv->int_port_priv = mlx5e_tc_int_port_init(netdev_priv(priv->netdev)); + + uplink_priv->tc_psample = mlx5e_tc_sample_init(esw, uplink_priv->post_act); + + mapping_id = mlx5_query_nic_system_image_guid(esw->dev); + + mapping = mapping_create_for_id(mapping_id, MAPPING_TYPE_TUNNEL, + sizeof(struct tunnel_match_key), + TUNNEL_INFO_BITS_MASK, true); + + if (IS_ERR(mapping)) { + err = PTR_ERR(mapping); + goto err_tun_mapping; + } + uplink_priv->tunnel_mapping = mapping; + + /* Two last values are reserved for stack devices slow path table mark + * and bridge ingress push mark. + */ + mapping = mapping_create_for_id(mapping_id, MAPPING_TYPE_TUNNEL_ENC_OPTS, + sz_enc_opts, ENC_OPTS_BITS_MASK - 2, true); + if (IS_ERR(mapping)) { + err = PTR_ERR(mapping); + goto err_enc_opts_mapping; + } + uplink_priv->tunnel_enc_opts_mapping = mapping; + + uplink_priv->encap = mlx5e_tc_tun_init(priv); + if (IS_ERR(uplink_priv->encap)) { + err = PTR_ERR(uplink_priv->encap); + goto err_register_fib_notifier; + } + + mlx5_esw_offloads_devcom_init(esw); + + return 0; + +err_register_fib_notifier: + mapping_destroy(uplink_priv->tunnel_enc_opts_mapping); +err_enc_opts_mapping: + mapping_destroy(uplink_priv->tunnel_mapping); +err_tun_mapping: + mlx5e_tc_sample_cleanup(uplink_priv->tc_psample); + mlx5e_tc_int_port_cleanup(uplink_priv->int_port_priv); + mlx5_tc_ct_clean(uplink_priv->ct_priv); + netdev_warn(priv->netdev, + "Failed to initialize tc (eswitch), err: %d", err); + mlx5e_tc_post_act_destroy(uplink_priv->post_act); + return err; +} + +void mlx5e_tc_esw_cleanup(struct mlx5_rep_uplink_priv *uplink_priv) +{ + struct mlx5e_rep_priv *rpriv; + struct mlx5_eswitch *esw; + struct mlx5e_priv *priv; + + rpriv = container_of(uplink_priv, struct mlx5e_rep_priv, uplink_priv); + priv = netdev_priv(rpriv->netdev); + esw = priv->mdev->priv.eswitch; + + mlx5_esw_offloads_devcom_cleanup(esw); + + mlx5e_tc_tun_cleanup(uplink_priv->encap); + + mapping_destroy(uplink_priv->tunnel_enc_opts_mapping); + mapping_destroy(uplink_priv->tunnel_mapping); + + mlx5e_tc_sample_cleanup(uplink_priv->tc_psample); + mlx5e_tc_int_port_cleanup(uplink_priv->int_port_priv); + mlx5_tc_ct_clean(uplink_priv->ct_priv); + mlx5e_flow_meters_cleanup(uplink_priv->flow_meters); + mlx5e_tc_post_act_destroy(uplink_priv->post_act); +} + +int mlx5e_tc_num_filters(struct mlx5e_priv *priv, unsigned long flags) +{ + struct rhashtable *tc_ht = get_tc_ht(priv, flags); + + return atomic_read(&tc_ht->nelems); +} + +void mlx5e_tc_clean_fdb_peer_flows(struct mlx5_eswitch *esw) +{ + struct mlx5e_tc_flow *flow, *tmp; + + list_for_each_entry_safe(flow, tmp, &esw->offloads.peer_flows, peer) + __mlx5e_tc_del_fdb_peer_flow(flow); +} + +void mlx5e_tc_reoffload_flows_work(struct work_struct *work) +{ + struct mlx5_rep_uplink_priv *rpriv = + container_of(work, struct mlx5_rep_uplink_priv, + reoffload_flows_work); + struct mlx5e_tc_flow *flow, *tmp; + + mutex_lock(&rpriv->unready_flows_lock); + list_for_each_entry_safe(flow, tmp, &rpriv->unready_flows, unready) { + if (!mlx5e_tc_add_fdb_flow(flow->priv, flow, NULL)) + unready_flow_del(flow); + } + mutex_unlock(&rpriv->unready_flows_lock); +} + +static int mlx5e_setup_tc_cls_flower(struct mlx5e_priv *priv, + struct flow_cls_offload *cls_flower, + unsigned long flags) +{ + switch (cls_flower->command) { + case FLOW_CLS_REPLACE: + return mlx5e_configure_flower(priv->netdev, priv, cls_flower, + flags); + case FLOW_CLS_DESTROY: + return mlx5e_delete_flower(priv->netdev, priv, cls_flower, + flags); + case FLOW_CLS_STATS: + return mlx5e_stats_flower(priv->netdev, priv, cls_flower, + flags); + default: + return -EOPNOTSUPP; + } +} + +int mlx5e_setup_tc_block_cb(enum tc_setup_type type, void *type_data, + void *cb_priv) +{ + unsigned long flags = MLX5_TC_FLAG(INGRESS); + struct mlx5e_priv *priv = cb_priv; + + if (!priv->netdev || !netif_device_present(priv->netdev)) + return -EOPNOTSUPP; + + if (mlx5e_is_uplink_rep(priv)) + flags |= MLX5_TC_FLAG(ESW_OFFLOAD); + else + flags |= MLX5_TC_FLAG(NIC_OFFLOAD); + + switch (type) { + case TC_SETUP_CLSFLOWER: + return mlx5e_setup_tc_cls_flower(priv, type_data, flags); + default: + return -EOPNOTSUPP; + } +} + +bool mlx5e_tc_update_skb(struct mlx5_cqe64 *cqe, + struct sk_buff *skb) +{ +#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT) + u32 chain = 0, chain_tag, reg_b, zone_restore_id; + struct mlx5e_priv *priv = netdev_priv(skb->dev); + struct mlx5_mapped_obj mapped_obj; + struct tc_skb_ext *tc_skb_ext; + struct mlx5e_tc_table *tc; + int err; + + reg_b = be32_to_cpu(cqe->ft_metadata); + tc = mlx5e_fs_get_tc(priv->fs); + chain_tag = reg_b & MLX5E_TC_TABLE_CHAIN_TAG_MASK; + + err = mapping_find(tc->mapping, chain_tag, &mapped_obj); + if (err) { + netdev_dbg(priv->netdev, + "Couldn't find chain for chain tag: %d, err: %d\n", + chain_tag, err); + return false; + } + + if (mapped_obj.type == MLX5_MAPPED_OBJ_CHAIN) { + chain = mapped_obj.chain; + tc_skb_ext = tc_skb_ext_alloc(skb); + if (WARN_ON(!tc_skb_ext)) + return false; + + tc_skb_ext->chain = chain; + + zone_restore_id = (reg_b >> MLX5_REG_MAPPING_MOFFSET(NIC_ZONE_RESTORE_TO_REG)) & + ESW_ZONE_ID_MASK; + + if (!mlx5e_tc_ct_restore_flow(tc->ct, skb, + zone_restore_id)) + return false; + } else { + netdev_dbg(priv->netdev, "Invalid mapped object type: %d\n", mapped_obj.type); + return false; + } +#endif /* CONFIG_NET_TC_SKB_EXT */ + + return true; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h new file mode 100644 index 000000000..edd5f0944 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h @@ -0,0 +1,389 @@ +/* + * Copyright (c) 2016, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __MLX5_EN_TC_H__ +#define __MLX5_EN_TC_H__ + +#include <net/pkt_cls.h> +#include "en.h" +#include "eswitch.h" +#include "en/tc_ct.h" +#include "en/tc_tun.h" +#include "en/tc/int_port.h" +#include "en/tc/meter.h" +#include "en_rep.h" + +#define MLX5E_TC_FLOW_ID_MASK 0x0000ffff + +#ifdef CONFIG_MLX5_ESWITCH + +#define NIC_FLOW_ATTR_SZ (sizeof(struct mlx5_flow_attr) +\ + sizeof(struct mlx5_nic_flow_attr)) +#define ESW_FLOW_ATTR_SZ (sizeof(struct mlx5_flow_attr) +\ + sizeof(struct mlx5_esw_flow_attr)) +#define ns_to_attr_sz(ns) (((ns) == MLX5_FLOW_NAMESPACE_FDB) ?\ + ESW_FLOW_ATTR_SZ :\ + NIC_FLOW_ATTR_SZ) + +struct mlx5_fs_chains *mlx5e_nic_chains(struct mlx5e_tc_table *tc); +int mlx5e_tc_num_filters(struct mlx5e_priv *priv, unsigned long flags); + +struct mlx5e_tc_update_priv { + struct net_device *fwd_dev; +}; + +struct mlx5_nic_flow_attr { + u32 flow_tag; + u32 hairpin_tirn; + struct mlx5_flow_table *hairpin_ft; +}; + +struct mlx5_flow_attr { + u32 action; + struct mlx5_fc *counter; + struct mlx5_modify_hdr *modify_hdr; + struct mlx5_ct_attr ct_attr; + struct mlx5e_sample_attr sample_attr; + struct mlx5e_meter_attr meter_attr; + struct mlx5e_tc_flow_parse_attr *parse_attr; + u32 chain; + u16 prio; + u32 dest_chain; + struct mlx5_flow_table *ft; + struct mlx5_flow_table *dest_ft; + u8 inner_match_level; + u8 outer_match_level; + u8 tun_ip_version; + int tunnel_id; /* mapped tunnel id */ + u32 flags; + u32 exe_aso_type; + struct list_head list; + struct mlx5e_post_act_handle *post_act_handle; + struct { + /* Indicate whether the parsed flow should be counted for lag mode decision + * making + */ + bool count; + } lag; + /* keep this union last */ + union { + struct mlx5_esw_flow_attr esw_attr[0]; + struct mlx5_nic_flow_attr nic_attr[0]; + }; +}; + +enum { + MLX5_ATTR_FLAG_VLAN_HANDLED = BIT(0), + MLX5_ATTR_FLAG_SLOW_PATH = BIT(1), + MLX5_ATTR_FLAG_NO_IN_PORT = BIT(2), + MLX5_ATTR_FLAG_SRC_REWRITE = BIT(3), + MLX5_ATTR_FLAG_SAMPLE = BIT(4), + MLX5_ATTR_FLAG_ACCEPT = BIT(5), + MLX5_ATTR_FLAG_CT = BIT(6), +}; + +/* Returns true if any of the flags that require skipping further TC/NF processing are set. */ +static inline bool +mlx5e_tc_attr_flags_skip(u32 attr_flags) +{ + return attr_flags & (MLX5_ATTR_FLAG_SLOW_PATH | MLX5_ATTR_FLAG_ACCEPT); +} + +struct mlx5_rx_tun_attr { + u16 decap_vport; + union { + __be32 v4; + struct in6_addr v6; + } src_ip; /* Valid if decap_vport is not zero */ + union { + __be32 v4; + struct in6_addr v6; + } dst_ip; /* Valid if decap_vport is not zero */ +}; + +#define MLX5E_TC_TABLE_CHAIN_TAG_BITS 16 +#define MLX5E_TC_TABLE_CHAIN_TAG_MASK GENMASK(MLX5E_TC_TABLE_CHAIN_TAG_BITS - 1, 0) + +#define MLX5E_TC_MAX_INT_PORT_NUM (8) + +#if IS_ENABLED(CONFIG_MLX5_CLS_ACT) + +struct tunnel_match_key { + struct flow_dissector_key_control enc_control; + struct flow_dissector_key_keyid enc_key_id; + struct flow_dissector_key_ports enc_tp; + struct flow_dissector_key_ip enc_ip; + union { + struct flow_dissector_key_ipv4_addrs enc_ipv4; + struct flow_dissector_key_ipv6_addrs enc_ipv6; + }; + + int filter_ifindex; +}; + +struct tunnel_match_enc_opts { + struct flow_dissector_key_enc_opts key; + struct flow_dissector_key_enc_opts mask; +}; + +/* Tunnel_id mapping is TUNNEL_INFO_BITS + ENC_OPTS_BITS. + * Upper TUNNEL_INFO_BITS for general tunnel info. + * Lower ENC_OPTS_BITS bits for enc_opts. + */ +#define TUNNEL_INFO_BITS 12 +#define TUNNEL_INFO_BITS_MASK GENMASK(TUNNEL_INFO_BITS - 1, 0) +#define ENC_OPTS_BITS 11 +#define ENC_OPTS_BITS_MASK GENMASK(ENC_OPTS_BITS - 1, 0) +#define TUNNEL_ID_BITS (TUNNEL_INFO_BITS + ENC_OPTS_BITS) +#define TUNNEL_ID_MASK GENMASK(TUNNEL_ID_BITS - 1, 0) + +enum { + MLX5E_TC_FLAG_INGRESS_BIT, + MLX5E_TC_FLAG_EGRESS_BIT, + MLX5E_TC_FLAG_NIC_OFFLOAD_BIT, + MLX5E_TC_FLAG_ESW_OFFLOAD_BIT, + MLX5E_TC_FLAG_FT_OFFLOAD_BIT, + MLX5E_TC_FLAG_LAST_EXPORTED_BIT = MLX5E_TC_FLAG_FT_OFFLOAD_BIT, +}; + +#define MLX5_TC_FLAG(flag) BIT(MLX5E_TC_FLAG_##flag##_BIT) + +int mlx5e_tc_esw_init(struct mlx5_rep_uplink_priv *uplink_priv); +void mlx5e_tc_esw_cleanup(struct mlx5_rep_uplink_priv *uplink_priv); + +int mlx5e_tc_ht_init(struct rhashtable *tc_ht); +void mlx5e_tc_ht_cleanup(struct rhashtable *tc_ht); + +int mlx5e_configure_flower(struct net_device *dev, struct mlx5e_priv *priv, + struct flow_cls_offload *f, unsigned long flags); +int mlx5e_delete_flower(struct net_device *dev, struct mlx5e_priv *priv, + struct flow_cls_offload *f, unsigned long flags); + +int mlx5e_stats_flower(struct net_device *dev, struct mlx5e_priv *priv, + struct flow_cls_offload *f, unsigned long flags); + +int mlx5e_tc_configure_matchall(struct mlx5e_priv *priv, + struct tc_cls_matchall_offload *f); +int mlx5e_tc_delete_matchall(struct mlx5e_priv *priv, + struct tc_cls_matchall_offload *f); +void mlx5e_tc_stats_matchall(struct mlx5e_priv *priv, + struct tc_cls_matchall_offload *ma); + +struct mlx5e_encap_entry; +void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv, + struct mlx5e_encap_entry *e, + struct list_head *flow_list); +void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv, + struct mlx5e_encap_entry *e, + struct list_head *flow_list); +bool mlx5e_encap_take(struct mlx5e_encap_entry *e); +void mlx5e_encap_put(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e); + +void mlx5e_take_all_encap_flows(struct mlx5e_encap_entry *e, struct list_head *flow_list); +void mlx5e_put_flow_list(struct mlx5e_priv *priv, struct list_head *flow_list); + +struct mlx5e_neigh_hash_entry; +struct mlx5e_encap_entry * +mlx5e_get_next_init_encap(struct mlx5e_neigh_hash_entry *nhe, + struct mlx5e_encap_entry *e); +void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe); + +void mlx5e_tc_reoffload_flows_work(struct work_struct *work); + +enum mlx5e_tc_attr_to_reg { + CHAIN_TO_REG, + VPORT_TO_REG, + TUNNEL_TO_REG, + CTSTATE_TO_REG, + ZONE_TO_REG, + ZONE_RESTORE_TO_REG, + MARK_TO_REG, + LABELS_TO_REG, + FTEID_TO_REG, + NIC_CHAIN_TO_REG, + NIC_ZONE_RESTORE_TO_REG, + PACKET_COLOR_TO_REG, +}; + +struct mlx5e_tc_attr_to_reg_mapping { + int mfield; /* rewrite field */ + int moffset; /* bit offset of mfield */ + int mlen; /* bits to rewrite/match */ + + int soffset; /* byte offset of spec for match */ +}; + +extern struct mlx5e_tc_attr_to_reg_mapping mlx5e_tc_attr_to_reg_mappings[]; + +#define MLX5_REG_MAPPING_MOFFSET(reg_id) (mlx5e_tc_attr_to_reg_mappings[reg_id].moffset) +#define MLX5_REG_MAPPING_MBITS(reg_id) (mlx5e_tc_attr_to_reg_mappings[reg_id].mlen) +#define MLX5_REG_MAPPING_MASK(reg_id) (GENMASK(mlx5e_tc_attr_to_reg_mappings[reg_id].mlen - 1, 0)) + +bool mlx5e_is_valid_eswitch_fwd_dev(struct mlx5e_priv *priv, + struct net_device *out_dev); + +int mlx5e_tc_match_to_reg_set(struct mlx5_core_dev *mdev, + struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts, + enum mlx5_flow_namespace_type ns, + enum mlx5e_tc_attr_to_reg type, + u32 data); + +void mlx5e_tc_match_to_reg_mod_hdr_change(struct mlx5_core_dev *mdev, + struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts, + enum mlx5e_tc_attr_to_reg type, + int act_id, u32 data); + +void mlx5e_tc_match_to_reg_match(struct mlx5_flow_spec *spec, + enum mlx5e_tc_attr_to_reg type, + u32 data, + u32 mask); + +void mlx5e_tc_match_to_reg_get_match(struct mlx5_flow_spec *spec, + enum mlx5e_tc_attr_to_reg type, + u32 *data, + u32 *mask); + +int mlx5e_tc_match_to_reg_set_and_get_id(struct mlx5_core_dev *mdev, + struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts, + enum mlx5_flow_namespace_type ns, + enum mlx5e_tc_attr_to_reg type, + u32 data); + +int mlx5e_tc_add_flow_mod_hdr(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, + struct mlx5_flow_attr *attr); + +void mlx5e_tc_set_ethertype(struct mlx5_core_dev *mdev, + struct flow_match_basic *match, bool outer, + void *headers_c, void *headers_v); + +int mlx5e_tc_nic_init(struct mlx5e_priv *priv); +void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv); + +int mlx5e_setup_tc_block_cb(enum tc_setup_type type, void *type_data, + void *cb_priv); + +struct mlx5_flow_handle * +mlx5e_add_offloaded_nic_rule(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct mlx5_flow_attr *attr); +void mlx5e_del_offloaded_nic_rule(struct mlx5e_priv *priv, + struct mlx5_flow_handle *rule, + struct mlx5_flow_attr *attr); + +struct mlx5_flow_handle * +mlx5_tc_rule_insert(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct mlx5_flow_attr *attr); +void +mlx5_tc_rule_delete(struct mlx5e_priv *priv, + struct mlx5_flow_handle *rule, + struct mlx5_flow_attr *attr); + +bool mlx5e_tc_is_vf_tunnel(struct net_device *out_dev, struct net_device *route_dev); +int mlx5e_tc_query_route_vport(struct net_device *out_dev, struct net_device *route_dev, + u16 *vport); + +int mlx5e_set_fwd_to_int_port_actions(struct mlx5e_priv *priv, + struct mlx5_flow_attr *attr, + int ifindex, + enum mlx5e_tc_int_port_type type, + u32 *action, + int out_index); +#else /* CONFIG_MLX5_CLS_ACT */ +static inline int mlx5e_tc_nic_init(struct mlx5e_priv *priv) { return 0; } +static inline void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv) {} +static inline int mlx5e_tc_ht_init(struct rhashtable *tc_ht) { return 0; } +static inline void mlx5e_tc_ht_cleanup(struct rhashtable *tc_ht) {} +static inline int +mlx5e_setup_tc_block_cb(enum tc_setup_type type, void *type_data, void *cb_priv) +{ return -EOPNOTSUPP; } + +#endif /* CONFIG_MLX5_CLS_ACT */ + +struct mlx5_flow_attr *mlx5_alloc_flow_attr(enum mlx5_flow_namespace_type type); + +struct mlx5_flow_handle * +mlx5e_add_offloaded_nic_rule(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct mlx5_flow_attr *attr); +void mlx5e_del_offloaded_nic_rule(struct mlx5e_priv *priv, + struct mlx5_flow_handle *rule, + struct mlx5_flow_attr *attr); + +#else /* CONFIG_MLX5_ESWITCH */ +static inline int mlx5e_tc_nic_init(struct mlx5e_priv *priv) { return 0; } +static inline void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv) {} +static inline int mlx5e_tc_num_filters(struct mlx5e_priv *priv, + unsigned long flags) +{ + return 0; +} + +static inline int +mlx5e_setup_tc_block_cb(enum tc_setup_type type, void *type_data, void *cb_priv) +{ return -EOPNOTSUPP; } +#endif + +#if IS_ENABLED(CONFIG_MLX5_CLS_ACT) +struct mlx5e_tc_table *mlx5e_tc_table_alloc(void); +void mlx5e_tc_table_free(struct mlx5e_tc_table *tc); +static inline bool mlx5e_cqe_regb_chain(struct mlx5_cqe64 *cqe) +{ +#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT) + u32 chain, reg_b; + + reg_b = be32_to_cpu(cqe->ft_metadata); + + if (reg_b >> (MLX5E_TC_TABLE_CHAIN_TAG_BITS + ESW_ZONE_ID_BITS)) + return false; + + chain = reg_b & MLX5E_TC_TABLE_CHAIN_TAG_MASK; + if (chain) + return true; +#endif + + return false; +} + +bool mlx5e_tc_update_skb(struct mlx5_cqe64 *cqe, struct sk_buff *skb); +#else /* CONFIG_MLX5_CLS_ACT */ +static inline struct mlx5e_tc_table *mlx5e_tc_table_alloc(void) { return NULL; } +static inline void mlx5e_tc_table_free(struct mlx5e_tc_table *tc) {} +static inline bool mlx5e_cqe_regb_chain(struct mlx5_cqe64 *cqe) +{ return false; } +static inline bool +mlx5e_tc_update_skb(struct mlx5_cqe64 *cqe, struct sk_buff *skb) +{ return true; } +#endif + +#endif /* __MLX5_EN_TC_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c new file mode 100644 index 000000000..a6d7e2cfc --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -0,0 +1,1058 @@ +/* + * Copyright (c) 2015-2016, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/tcp.h> +#include <linux/if_vlan.h> +#include <net/geneve.h> +#include <net/dsfield.h> +#include "en.h" +#include "en/txrx.h" +#include "ipoib/ipoib.h" +#include "en_accel/en_accel.h" +#include "en_accel/ipsec_rxtx.h" +#include "en_accel/macsec.h" +#include "en/ptp.h" +#include <net/ipv6.h> + +static void mlx5e_dma_unmap_wqe_err(struct mlx5e_txqsq *sq, u8 num_dma) +{ + int i; + + for (i = 0; i < num_dma; i++) { + struct mlx5e_sq_dma *last_pushed_dma = + mlx5e_dma_get(sq, --sq->dma_fifo_pc); + + mlx5e_tx_dma_unmap(sq->pdev, last_pushed_dma); + } +} + +static inline int mlx5e_skb_l2_header_offset(struct sk_buff *skb) +{ +#define MLX5E_MIN_INLINE (ETH_HLEN + VLAN_HLEN) + + return max(skb_network_offset(skb), MLX5E_MIN_INLINE); +} + +static inline int mlx5e_skb_l3_header_offset(struct sk_buff *skb) +{ + if (skb_transport_header_was_set(skb)) + return skb_transport_offset(skb); + else + return mlx5e_skb_l2_header_offset(skb); +} + +static inline u16 mlx5e_calc_min_inline(enum mlx5_inline_modes mode, + struct sk_buff *skb) +{ + u16 hlen; + + switch (mode) { + case MLX5_INLINE_MODE_NONE: + return 0; + case MLX5_INLINE_MODE_TCP_UDP: + hlen = eth_get_headlen(skb->dev, skb->data, skb_headlen(skb)); + if (hlen == ETH_HLEN && !skb_vlan_tag_present(skb)) + hlen += VLAN_HLEN; + break; + case MLX5_INLINE_MODE_IP: + hlen = mlx5e_skb_l3_header_offset(skb); + break; + case MLX5_INLINE_MODE_L2: + default: + hlen = mlx5e_skb_l2_header_offset(skb); + } + return min_t(u16, hlen, skb_headlen(skb)); +} + +#define MLX5_UNSAFE_MEMCPY_DISCLAIMER \ + "This copy has been bounds-checked earlier in " \ + "mlx5i_sq_calc_wqe_attr() and intentionally " \ + "crosses a flex array boundary. Since it is " \ + "performance sensitive, splitting the copy is " \ + "undesirable." + +static inline void mlx5e_insert_vlan(void *start, struct sk_buff *skb, u16 ihs) +{ + struct vlan_ethhdr *vhdr = (struct vlan_ethhdr *)start; + int cpy1_sz = 2 * ETH_ALEN; + int cpy2_sz = ihs - cpy1_sz; + + memcpy(&vhdr->addrs, skb->data, cpy1_sz); + vhdr->h_vlan_proto = skb->vlan_proto; + vhdr->h_vlan_TCI = cpu_to_be16(skb_vlan_tag_get(skb)); + unsafe_memcpy(&vhdr->h_vlan_encapsulated_proto, + skb->data + cpy1_sz, + cpy2_sz, + MLX5_UNSAFE_MEMCPY_DISCLAIMER); +} + +static inline void +mlx5e_txwqe_build_eseg_csum(struct mlx5e_txqsq *sq, struct sk_buff *skb, + struct mlx5e_accel_tx_state *accel, + struct mlx5_wqe_eth_seg *eseg) +{ + if (unlikely(mlx5e_ipsec_txwqe_build_eseg_csum(sq, skb, eseg))) + return; + + if (likely(skb->ip_summed == CHECKSUM_PARTIAL)) { + eseg->cs_flags = MLX5_ETH_WQE_L3_CSUM; + if (skb->encapsulation) { + eseg->cs_flags |= MLX5_ETH_WQE_L3_INNER_CSUM | + MLX5_ETH_WQE_L4_INNER_CSUM; + sq->stats->csum_partial_inner++; + } else { + eseg->cs_flags |= MLX5_ETH_WQE_L4_CSUM; + sq->stats->csum_partial++; + } +#ifdef CONFIG_MLX5_EN_TLS + } else if (unlikely(accel && accel->tls.tls_tisn)) { + eseg->cs_flags = MLX5_ETH_WQE_L3_CSUM | MLX5_ETH_WQE_L4_CSUM; + sq->stats->csum_partial++; +#endif + } else + sq->stats->csum_none++; +} + +/* Returns the number of header bytes that we plan + * to inline later in the transmit descriptor + */ +static inline u16 +mlx5e_tx_get_gso_ihs(struct mlx5e_txqsq *sq, struct sk_buff *skb, int *hopbyhop) +{ + struct mlx5e_sq_stats *stats = sq->stats; + u16 ihs; + + *hopbyhop = 0; + if (skb->encapsulation) { + ihs = skb_inner_tcp_all_headers(skb); + stats->tso_inner_packets++; + stats->tso_inner_bytes += skb->len - ihs; + } else { + if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) { + ihs = skb_transport_offset(skb) + sizeof(struct udphdr); + } else { + ihs = skb_tcp_all_headers(skb); + if (ipv6_has_hopopt_jumbo(skb)) { + *hopbyhop = sizeof(struct hop_jumbo_hdr); + ihs -= sizeof(struct hop_jumbo_hdr); + } + } + stats->tso_packets++; + stats->tso_bytes += skb->len - ihs - *hopbyhop; + } + + return ihs; +} + +static inline int +mlx5e_txwqe_build_dsegs(struct mlx5e_txqsq *sq, struct sk_buff *skb, + unsigned char *skb_data, u16 headlen, + struct mlx5_wqe_data_seg *dseg) +{ + dma_addr_t dma_addr = 0; + u8 num_dma = 0; + int i; + + if (headlen) { + dma_addr = dma_map_single(sq->pdev, skb_data, headlen, + DMA_TO_DEVICE); + if (unlikely(dma_mapping_error(sq->pdev, dma_addr))) + goto dma_unmap_wqe_err; + + dseg->addr = cpu_to_be64(dma_addr); + dseg->lkey = sq->mkey_be; + dseg->byte_count = cpu_to_be32(headlen); + + mlx5e_dma_push(sq, dma_addr, headlen, MLX5E_DMA_MAP_SINGLE); + num_dma++; + dseg++; + } + + for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { + skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; + int fsz = skb_frag_size(frag); + + dma_addr = skb_frag_dma_map(sq->pdev, frag, 0, fsz, + DMA_TO_DEVICE); + if (unlikely(dma_mapping_error(sq->pdev, dma_addr))) + goto dma_unmap_wqe_err; + + dseg->addr = cpu_to_be64(dma_addr); + dseg->lkey = sq->mkey_be; + dseg->byte_count = cpu_to_be32(fsz); + + mlx5e_dma_push(sq, dma_addr, fsz, MLX5E_DMA_MAP_PAGE); + num_dma++; + dseg++; + } + + return num_dma; + +dma_unmap_wqe_err: + mlx5e_dma_unmap_wqe_err(sq, num_dma); + return -ENOMEM; +} + +struct mlx5e_tx_attr { + u32 num_bytes; + u16 headlen; + u16 ihs; + __be16 mss; + u16 insz; + u8 opcode; + u8 hopbyhop; +}; + +struct mlx5e_tx_wqe_attr { + u16 ds_cnt; + u16 ds_cnt_inl; + u16 ds_cnt_ids; + u8 num_wqebbs; +}; + +static u8 +mlx5e_tx_wqe_inline_mode(struct mlx5e_txqsq *sq, struct sk_buff *skb, + struct mlx5e_accel_tx_state *accel) +{ + u8 mode; + +#ifdef CONFIG_MLX5_EN_TLS + if (accel && accel->tls.tls_tisn) + return MLX5_INLINE_MODE_TCP_UDP; +#endif + + mode = sq->min_inline_mode; + + if (skb_vlan_tag_present(skb) && + test_bit(MLX5E_SQ_STATE_VLAN_NEED_L2_INLINE, &sq->state)) + mode = max_t(u8, MLX5_INLINE_MODE_L2, mode); + + return mode; +} + +static void mlx5e_sq_xmit_prepare(struct mlx5e_txqsq *sq, struct sk_buff *skb, + struct mlx5e_accel_tx_state *accel, + struct mlx5e_tx_attr *attr) +{ + struct mlx5e_sq_stats *stats = sq->stats; + + if (skb_is_gso(skb)) { + int hopbyhop; + u16 ihs = mlx5e_tx_get_gso_ihs(sq, skb, &hopbyhop); + + *attr = (struct mlx5e_tx_attr) { + .opcode = MLX5_OPCODE_LSO, + .mss = cpu_to_be16(skb_shinfo(skb)->gso_size), + .ihs = ihs, + .num_bytes = skb->len + (skb_shinfo(skb)->gso_segs - 1) * ihs, + .headlen = skb_headlen(skb) - ihs - hopbyhop, + .hopbyhop = hopbyhop, + }; + + stats->packets += skb_shinfo(skb)->gso_segs; + } else { + u8 mode = mlx5e_tx_wqe_inline_mode(sq, skb, accel); + u16 ihs = mlx5e_calc_min_inline(mode, skb); + + *attr = (struct mlx5e_tx_attr) { + .opcode = MLX5_OPCODE_SEND, + .mss = cpu_to_be16(0), + .ihs = ihs, + .num_bytes = max_t(unsigned int, skb->len, ETH_ZLEN), + .headlen = skb_headlen(skb) - ihs, + }; + + stats->packets++; + } + + attr->insz = mlx5e_accel_tx_ids_len(sq, accel); + stats->bytes += attr->num_bytes; +} + +static void mlx5e_sq_calc_wqe_attr(struct sk_buff *skb, const struct mlx5e_tx_attr *attr, + struct mlx5e_tx_wqe_attr *wqe_attr) +{ + u16 ds_cnt = MLX5E_TX_WQE_EMPTY_DS_COUNT; + u16 ds_cnt_inl = 0; + u16 ds_cnt_ids = 0; + + /* Sync the calculation with MLX5E_MAX_TX_WQEBBS. */ + + if (attr->insz) + ds_cnt_ids = DIV_ROUND_UP(sizeof(struct mlx5_wqe_inline_seg) + attr->insz, + MLX5_SEND_WQE_DS); + + ds_cnt += !!attr->headlen + skb_shinfo(skb)->nr_frags + ds_cnt_ids; + if (attr->ihs) { + u16 inl = attr->ihs - INL_HDR_START_SZ; + + if (skb_vlan_tag_present(skb)) + inl += VLAN_HLEN; + + ds_cnt_inl = DIV_ROUND_UP(inl, MLX5_SEND_WQE_DS); + if (WARN_ON_ONCE(ds_cnt_inl > MLX5E_MAX_TX_INLINE_DS)) + netdev_warn(skb->dev, "ds_cnt_inl = %u > max %u\n", ds_cnt_inl, + (u16)MLX5E_MAX_TX_INLINE_DS); + ds_cnt += ds_cnt_inl; + } + + *wqe_attr = (struct mlx5e_tx_wqe_attr) { + .ds_cnt = ds_cnt, + .ds_cnt_inl = ds_cnt_inl, + .ds_cnt_ids = ds_cnt_ids, + .num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS), + }; +} + +static void mlx5e_tx_skb_update_hwts_flags(struct sk_buff *skb) +{ + if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) + skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; +} + +static void mlx5e_tx_check_stop(struct mlx5e_txqsq *sq) +{ + if (unlikely(!mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, sq->stop_room))) { + netif_tx_stop_queue(sq->txq); + sq->stats->stopped++; + } +} + +static void mlx5e_tx_flush(struct mlx5e_txqsq *sq) +{ + struct mlx5e_tx_wqe_info *wi; + struct mlx5e_tx_wqe *wqe; + u16 pi; + + /* Must not be called when a MPWQE session is active but empty. */ + mlx5e_tx_mpwqe_ensure_complete(sq); + + pi = mlx5_wq_cyc_ctr2ix(&sq->wq, sq->pc); + wi = &sq->db.wqe_info[pi]; + + *wi = (struct mlx5e_tx_wqe_info) { + .num_wqebbs = 1, + }; + + wqe = mlx5e_post_nop(&sq->wq, sq->sqn, &sq->pc); + mlx5e_notify_hw(&sq->wq, sq->pc, sq->uar_map, &wqe->ctrl); +} + +static inline void +mlx5e_txwqe_complete(struct mlx5e_txqsq *sq, struct sk_buff *skb, + const struct mlx5e_tx_attr *attr, + const struct mlx5e_tx_wqe_attr *wqe_attr, u8 num_dma, + struct mlx5e_tx_wqe_info *wi, struct mlx5_wqe_ctrl_seg *cseg, + bool xmit_more) +{ + struct mlx5_wq_cyc *wq = &sq->wq; + bool send_doorbell; + + *wi = (struct mlx5e_tx_wqe_info) { + .skb = skb, + .num_bytes = attr->num_bytes, + .num_dma = num_dma, + .num_wqebbs = wqe_attr->num_wqebbs, + .num_fifo_pkts = 0, + }; + + cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | attr->opcode); + cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | wqe_attr->ds_cnt); + + mlx5e_tx_skb_update_hwts_flags(skb); + + sq->pc += wi->num_wqebbs; + + mlx5e_tx_check_stop(sq); + + if (unlikely(sq->ptpsq)) { + mlx5e_skb_cb_hwtstamp_init(skb); + mlx5e_skb_fifo_push(&sq->ptpsq->skb_fifo, skb); + if (!netif_tx_queue_stopped(sq->txq) && + !mlx5e_skb_fifo_has_room(&sq->ptpsq->skb_fifo)) { + netif_tx_stop_queue(sq->txq); + sq->stats->stopped++; + } + skb_get(skb); + } + + send_doorbell = __netdev_tx_sent_queue(sq->txq, attr->num_bytes, xmit_more); + if (send_doorbell) + mlx5e_notify_hw(wq, sq->pc, sq->uar_map, cseg); +} + +static void +mlx5e_sq_xmit_wqe(struct mlx5e_txqsq *sq, struct sk_buff *skb, + const struct mlx5e_tx_attr *attr, const struct mlx5e_tx_wqe_attr *wqe_attr, + struct mlx5e_tx_wqe *wqe, u16 pi, bool xmit_more) +{ + struct mlx5_wqe_ctrl_seg *cseg; + struct mlx5_wqe_eth_seg *eseg; + struct mlx5_wqe_data_seg *dseg; + struct mlx5e_tx_wqe_info *wi; + u16 ihs = attr->ihs; + struct ipv6hdr *h6; + struct mlx5e_sq_stats *stats = sq->stats; + int num_dma; + + stats->xmit_more += xmit_more; + + /* fill wqe */ + wi = &sq->db.wqe_info[pi]; + cseg = &wqe->ctrl; + eseg = &wqe->eth; + dseg = wqe->data; + + eseg->mss = attr->mss; + + if (ihs) { + u8 *start = eseg->inline_hdr.start; + + if (unlikely(attr->hopbyhop)) { + /* remove the HBH header. + * Layout: [Ethernet header][IPv6 header][HBH][TCP header] + */ + if (skb_vlan_tag_present(skb)) { + mlx5e_insert_vlan(start, skb, ETH_HLEN + sizeof(*h6)); + ihs += VLAN_HLEN; + h6 = (struct ipv6hdr *)(start + sizeof(struct vlan_ethhdr)); + } else { + unsafe_memcpy(start, skb->data, + ETH_HLEN + sizeof(*h6), + MLX5_UNSAFE_MEMCPY_DISCLAIMER); + h6 = (struct ipv6hdr *)(start + ETH_HLEN); + } + h6->nexthdr = IPPROTO_TCP; + /* Copy the TCP header after the IPv6 one */ + memcpy(h6 + 1, + skb->data + ETH_HLEN + sizeof(*h6) + + sizeof(struct hop_jumbo_hdr), + tcp_hdrlen(skb)); + /* Leave ipv6 payload_len set to 0, as LSO v2 specs request. */ + } else if (skb_vlan_tag_present(skb)) { + mlx5e_insert_vlan(start, skb, ihs); + ihs += VLAN_HLEN; + stats->added_vlan_packets++; + } else { + unsafe_memcpy(eseg->inline_hdr.start, skb->data, + attr->ihs, + MLX5_UNSAFE_MEMCPY_DISCLAIMER); + } + eseg->inline_hdr.sz |= cpu_to_be16(ihs); + dseg += wqe_attr->ds_cnt_inl; + } else if (skb_vlan_tag_present(skb)) { + eseg->insert.type = cpu_to_be16(MLX5_ETH_WQE_INSERT_VLAN); + if (skb->vlan_proto == cpu_to_be16(ETH_P_8021AD)) + eseg->insert.type |= cpu_to_be16(MLX5_ETH_WQE_SVLAN); + eseg->insert.vlan_tci = cpu_to_be16(skb_vlan_tag_get(skb)); + stats->added_vlan_packets++; + } + + dseg += wqe_attr->ds_cnt_ids; + num_dma = mlx5e_txwqe_build_dsegs(sq, skb, skb->data + attr->ihs + attr->hopbyhop, + attr->headlen, dseg); + if (unlikely(num_dma < 0)) + goto err_drop; + + mlx5e_txwqe_complete(sq, skb, attr, wqe_attr, num_dma, wi, cseg, xmit_more); + + return; + +err_drop: + stats->dropped++; + dev_kfree_skb_any(skb); + mlx5e_tx_flush(sq); +} + +static bool mlx5e_tx_skb_supports_mpwqe(struct sk_buff *skb, struct mlx5e_tx_attr *attr) +{ + return !skb_is_nonlinear(skb) && !skb_vlan_tag_present(skb) && !attr->ihs && + !attr->insz && !mlx5e_macsec_skb_is_offload(skb); +} + +static bool mlx5e_tx_mpwqe_same_eseg(struct mlx5e_txqsq *sq, struct mlx5_wqe_eth_seg *eseg) +{ + struct mlx5e_tx_mpwqe *session = &sq->mpwqe; + + /* Assumes the session is already running and has at least one packet. */ + return !memcmp(&session->wqe->eth, eseg, MLX5E_ACCEL_ESEG_LEN); +} + +static void mlx5e_tx_mpwqe_session_start(struct mlx5e_txqsq *sq, + struct mlx5_wqe_eth_seg *eseg) +{ + struct mlx5e_tx_mpwqe *session = &sq->mpwqe; + struct mlx5e_tx_wqe *wqe; + u16 pi; + + pi = mlx5e_txqsq_get_next_pi(sq, sq->max_sq_mpw_wqebbs); + wqe = MLX5E_TX_FETCH_WQE(sq, pi); + net_prefetchw(wqe->data); + + *session = (struct mlx5e_tx_mpwqe) { + .wqe = wqe, + .bytes_count = 0, + .ds_count = MLX5E_TX_WQE_EMPTY_DS_COUNT, + .pkt_count = 0, + .inline_on = 0, + }; + + memcpy(&session->wqe->eth, eseg, MLX5E_ACCEL_ESEG_LEN); + + sq->stats->mpwqe_blks++; +} + +static bool mlx5e_tx_mpwqe_session_is_active(struct mlx5e_txqsq *sq) +{ + return sq->mpwqe.wqe; +} + +static void mlx5e_tx_mpwqe_add_dseg(struct mlx5e_txqsq *sq, struct mlx5e_xmit_data *txd) +{ + struct mlx5e_tx_mpwqe *session = &sq->mpwqe; + struct mlx5_wqe_data_seg *dseg; + + dseg = (struct mlx5_wqe_data_seg *)session->wqe + session->ds_count; + + session->pkt_count++; + session->bytes_count += txd->len; + + dseg->addr = cpu_to_be64(txd->dma_addr); + dseg->byte_count = cpu_to_be32(txd->len); + dseg->lkey = sq->mkey_be; + session->ds_count++; + + sq->stats->mpwqe_pkts++; +} + +static struct mlx5_wqe_ctrl_seg *mlx5e_tx_mpwqe_session_complete(struct mlx5e_txqsq *sq) +{ + struct mlx5e_tx_mpwqe *session = &sq->mpwqe; + u8 ds_count = session->ds_count; + struct mlx5_wqe_ctrl_seg *cseg; + struct mlx5e_tx_wqe_info *wi; + u16 pi; + + cseg = &session->wqe->ctrl; + cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_ENHANCED_MPSW); + cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_count); + + pi = mlx5_wq_cyc_ctr2ix(&sq->wq, sq->pc); + wi = &sq->db.wqe_info[pi]; + *wi = (struct mlx5e_tx_wqe_info) { + .skb = NULL, + .num_bytes = session->bytes_count, + .num_wqebbs = DIV_ROUND_UP(ds_count, MLX5_SEND_WQEBB_NUM_DS), + .num_dma = session->pkt_count, + .num_fifo_pkts = session->pkt_count, + }; + + sq->pc += wi->num_wqebbs; + + session->wqe = NULL; + + mlx5e_tx_check_stop(sq); + + return cseg; +} + +static void +mlx5e_sq_xmit_mpwqe(struct mlx5e_txqsq *sq, struct sk_buff *skb, + struct mlx5_wqe_eth_seg *eseg, bool xmit_more) +{ + struct mlx5_wqe_ctrl_seg *cseg; + struct mlx5e_xmit_data txd; + + txd.data = skb->data; + txd.len = skb->len; + + txd.dma_addr = dma_map_single(sq->pdev, txd.data, txd.len, DMA_TO_DEVICE); + if (unlikely(dma_mapping_error(sq->pdev, txd.dma_addr))) + goto err_unmap; + + if (!mlx5e_tx_mpwqe_session_is_active(sq)) { + mlx5e_tx_mpwqe_session_start(sq, eseg); + } else if (!mlx5e_tx_mpwqe_same_eseg(sq, eseg)) { + mlx5e_tx_mpwqe_session_complete(sq); + mlx5e_tx_mpwqe_session_start(sq, eseg); + } + + sq->stats->xmit_more += xmit_more; + + mlx5e_dma_push(sq, txd.dma_addr, txd.len, MLX5E_DMA_MAP_SINGLE); + mlx5e_skb_fifo_push(&sq->db.skb_fifo, skb); + mlx5e_tx_mpwqe_add_dseg(sq, &txd); + mlx5e_tx_skb_update_hwts_flags(skb); + + if (unlikely(mlx5e_tx_mpwqe_is_full(&sq->mpwqe, sq->max_sq_mpw_wqebbs))) { + /* Might stop the queue and affect the retval of __netdev_tx_sent_queue. */ + cseg = mlx5e_tx_mpwqe_session_complete(sq); + + if (__netdev_tx_sent_queue(sq->txq, txd.len, xmit_more)) + mlx5e_notify_hw(&sq->wq, sq->pc, sq->uar_map, cseg); + } else if (__netdev_tx_sent_queue(sq->txq, txd.len, xmit_more)) { + /* Might stop the queue, but we were asked to ring the doorbell anyway. */ + cseg = mlx5e_tx_mpwqe_session_complete(sq); + + mlx5e_notify_hw(&sq->wq, sq->pc, sq->uar_map, cseg); + } + + return; + +err_unmap: + mlx5e_dma_unmap_wqe_err(sq, 1); + sq->stats->dropped++; + dev_kfree_skb_any(skb); + mlx5e_tx_flush(sq); +} + +void mlx5e_tx_mpwqe_ensure_complete(struct mlx5e_txqsq *sq) +{ + /* Unlikely in non-MPWQE workloads; not important in MPWQE workloads. */ + if (unlikely(mlx5e_tx_mpwqe_session_is_active(sq))) + mlx5e_tx_mpwqe_session_complete(sq); +} + +static void mlx5e_cqe_ts_id_eseg(struct mlx5e_ptpsq *ptpsq, struct sk_buff *skb, + struct mlx5_wqe_eth_seg *eseg) +{ + if (ptpsq->ts_cqe_ctr_mask && unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) + eseg->flow_table_metadata = cpu_to_be32(ptpsq->skb_fifo_pc & + ptpsq->ts_cqe_ctr_mask); +} + +static void mlx5e_txwqe_build_eseg(struct mlx5e_priv *priv, struct mlx5e_txqsq *sq, + struct sk_buff *skb, struct mlx5e_accel_tx_state *accel, + struct mlx5_wqe_eth_seg *eseg, u16 ihs) +{ + mlx5e_accel_tx_eseg(priv, skb, eseg, ihs); + mlx5e_txwqe_build_eseg_csum(sq, skb, accel, eseg); + if (unlikely(sq->ptpsq)) + mlx5e_cqe_ts_id_eseg(sq->ptpsq, skb, eseg); +} + +netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5e_accel_tx_state accel = {}; + struct mlx5e_tx_wqe_attr wqe_attr; + struct mlx5e_tx_attr attr; + struct mlx5e_tx_wqe *wqe; + struct mlx5e_txqsq *sq; + u16 pi; + + /* All changes to txq2sq are performed in sync with mlx5e_xmit, when the + * queue being changed is disabled, and smp_wmb guarantees that the + * changes are visible before mlx5e_xmit tries to read from txq2sq. It + * guarantees that the value of txq2sq[qid] doesn't change while + * mlx5e_xmit is running on queue number qid. smb_wmb is paired with + * HARD_TX_LOCK around ndo_start_xmit, which serves as an ACQUIRE. + */ + sq = priv->txq2sq[skb_get_queue_mapping(skb)]; + if (unlikely(!sq)) { + /* Two cases when sq can be NULL: + * 1. The HTB node is registered, and mlx5e_select_queue + * selected its queue ID, but the SQ itself is not yet created. + * 2. HTB SQ creation failed. Similar to the previous case, but + * the SQ won't be created. + */ + dev_kfree_skb_any(skb); + return NETDEV_TX_OK; + } + + /* May send SKBs and WQEs. */ + if (unlikely(!mlx5e_accel_tx_begin(dev, sq, skb, &accel))) + return NETDEV_TX_OK; + + mlx5e_sq_xmit_prepare(sq, skb, &accel, &attr); + + if (test_bit(MLX5E_SQ_STATE_MPWQE, &sq->state)) { + if (mlx5e_tx_skb_supports_mpwqe(skb, &attr)) { + struct mlx5_wqe_eth_seg eseg = {}; + + mlx5e_txwqe_build_eseg(priv, sq, skb, &accel, &eseg, attr.ihs); + mlx5e_sq_xmit_mpwqe(sq, skb, &eseg, netdev_xmit_more()); + return NETDEV_TX_OK; + } + + mlx5e_tx_mpwqe_ensure_complete(sq); + } + + mlx5e_sq_calc_wqe_attr(skb, &attr, &wqe_attr); + pi = mlx5e_txqsq_get_next_pi(sq, wqe_attr.num_wqebbs); + wqe = MLX5E_TX_FETCH_WQE(sq, pi); + + /* May update the WQE, but may not post other WQEs. */ + mlx5e_accel_tx_finish(sq, wqe, &accel, + (struct mlx5_wqe_inline_seg *)(wqe->data + wqe_attr.ds_cnt_inl)); + mlx5e_txwqe_build_eseg(priv, sq, skb, &accel, &wqe->eth, attr.ihs); + mlx5e_sq_xmit_wqe(sq, skb, &attr, &wqe_attr, wqe, pi, netdev_xmit_more()); + + return NETDEV_TX_OK; +} + +void mlx5e_sq_xmit_simple(struct mlx5e_txqsq *sq, struct sk_buff *skb, bool xmit_more) +{ + struct mlx5e_tx_wqe_attr wqe_attr; + struct mlx5e_tx_attr attr; + struct mlx5e_tx_wqe *wqe; + u16 pi; + + mlx5e_sq_xmit_prepare(sq, skb, NULL, &attr); + mlx5e_sq_calc_wqe_attr(skb, &attr, &wqe_attr); + pi = mlx5e_txqsq_get_next_pi(sq, wqe_attr.num_wqebbs); + wqe = MLX5E_TX_FETCH_WQE(sq, pi); + mlx5e_txwqe_build_eseg_csum(sq, skb, NULL, &wqe->eth); + mlx5e_sq_xmit_wqe(sq, skb, &attr, &wqe_attr, wqe, pi, xmit_more); +} + +static void mlx5e_tx_wi_dma_unmap(struct mlx5e_txqsq *sq, struct mlx5e_tx_wqe_info *wi, + u32 *dma_fifo_cc) +{ + int i; + + for (i = 0; i < wi->num_dma; i++) { + struct mlx5e_sq_dma *dma = mlx5e_dma_get(sq, (*dma_fifo_cc)++); + + mlx5e_tx_dma_unmap(sq->pdev, dma); + } +} + +static void mlx5e_consume_skb(struct mlx5e_txqsq *sq, struct sk_buff *skb, + struct mlx5_cqe64 *cqe, int napi_budget) +{ + if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) { + struct skb_shared_hwtstamps hwts = {}; + u64 ts = get_cqe_ts(cqe); + + hwts.hwtstamp = mlx5e_cqe_ts_to_ns(sq->ptp_cyc2time, sq->clock, ts); + if (sq->ptpsq) + mlx5e_skb_cb_hwtstamp_handler(skb, MLX5E_SKB_CB_CQE_HWTSTAMP, + hwts.hwtstamp, sq->ptpsq->cq_stats); + else + skb_tstamp_tx(skb, &hwts); + } + + napi_consume_skb(skb, napi_budget); +} + +static void mlx5e_tx_wi_consume_fifo_skbs(struct mlx5e_txqsq *sq, struct mlx5e_tx_wqe_info *wi, + struct mlx5_cqe64 *cqe, int napi_budget) +{ + int i; + + for (i = 0; i < wi->num_fifo_pkts; i++) { + struct sk_buff *skb = mlx5e_skb_fifo_pop(&sq->db.skb_fifo); + + mlx5e_consume_skb(sq, skb, cqe, napi_budget); + } +} + +void mlx5e_txqsq_wake(struct mlx5e_txqsq *sq) +{ + if (netif_tx_queue_stopped(sq->txq) && + mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, sq->stop_room) && + mlx5e_ptpsq_fifo_has_room(sq) && + !test_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state)) { + netif_tx_wake_queue(sq->txq); + sq->stats->wake++; + } +} + +bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget) +{ + struct mlx5e_sq_stats *stats; + struct mlx5e_txqsq *sq; + struct mlx5_cqe64 *cqe; + u32 dma_fifo_cc; + u32 nbytes; + u16 npkts; + u16 sqcc; + int i; + + sq = container_of(cq, struct mlx5e_txqsq, cq); + + if (unlikely(!test_bit(MLX5E_SQ_STATE_ENABLED, &sq->state))) + return false; + + cqe = mlx5_cqwq_get_cqe(&cq->wq); + if (!cqe) + return false; + + stats = sq->stats; + + npkts = 0; + nbytes = 0; + + /* sq->cc must be updated only after mlx5_cqwq_update_db_record(), + * otherwise a cq overrun may occur + */ + sqcc = sq->cc; + + /* avoid dirtying sq cache line every cqe */ + dma_fifo_cc = sq->dma_fifo_cc; + + i = 0; + do { + struct mlx5e_tx_wqe_info *wi; + u16 wqe_counter; + bool last_wqe; + u16 ci; + + mlx5_cqwq_pop(&cq->wq); + + wqe_counter = be16_to_cpu(cqe->wqe_counter); + + do { + last_wqe = (sqcc == wqe_counter); + + ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sqcc); + wi = &sq->db.wqe_info[ci]; + + sqcc += wi->num_wqebbs; + + if (likely(wi->skb)) { + mlx5e_tx_wi_dma_unmap(sq, wi, &dma_fifo_cc); + mlx5e_consume_skb(sq, wi->skb, cqe, napi_budget); + + npkts++; + nbytes += wi->num_bytes; + continue; + } + + if (unlikely(mlx5e_ktls_tx_try_handle_resync_dump_comp(sq, wi, + &dma_fifo_cc))) + continue; + + if (wi->num_fifo_pkts) { + mlx5e_tx_wi_dma_unmap(sq, wi, &dma_fifo_cc); + mlx5e_tx_wi_consume_fifo_skbs(sq, wi, cqe, napi_budget); + + npkts += wi->num_fifo_pkts; + nbytes += wi->num_bytes; + } + } while (!last_wqe); + + if (unlikely(get_cqe_opcode(cqe) == MLX5_CQE_REQ_ERR)) { + if (!test_and_set_bit(MLX5E_SQ_STATE_RECOVERING, + &sq->state)) { + mlx5e_dump_error_cqe(&sq->cq, sq->sqn, + (struct mlx5_err_cqe *)cqe); + mlx5_wq_cyc_wqe_dump(&sq->wq, ci, wi->num_wqebbs); + queue_work(cq->priv->wq, &sq->recover_work); + } + stats->cqe_err++; + } + + } while ((++i < MLX5E_TX_CQ_POLL_BUDGET) && (cqe = mlx5_cqwq_get_cqe(&cq->wq))); + + stats->cqes += i; + + mlx5_cqwq_update_db_record(&cq->wq); + + /* ensure cq space is freed before enabling more cqes */ + wmb(); + + sq->dma_fifo_cc = dma_fifo_cc; + sq->cc = sqcc; + + netdev_tx_completed_queue(sq->txq, npkts, nbytes); + + mlx5e_txqsq_wake(sq); + + return (i == MLX5E_TX_CQ_POLL_BUDGET); +} + +static void mlx5e_tx_wi_kfree_fifo_skbs(struct mlx5e_txqsq *sq, struct mlx5e_tx_wqe_info *wi) +{ + int i; + + for (i = 0; i < wi->num_fifo_pkts; i++) + dev_kfree_skb_any(mlx5e_skb_fifo_pop(&sq->db.skb_fifo)); +} + +void mlx5e_free_txqsq_descs(struct mlx5e_txqsq *sq) +{ + struct mlx5e_tx_wqe_info *wi; + u32 dma_fifo_cc, nbytes = 0; + u16 ci, sqcc, npkts = 0; + + sqcc = sq->cc; + dma_fifo_cc = sq->dma_fifo_cc; + + while (sqcc != sq->pc) { + ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sqcc); + wi = &sq->db.wqe_info[ci]; + + sqcc += wi->num_wqebbs; + + if (likely(wi->skb)) { + mlx5e_tx_wi_dma_unmap(sq, wi, &dma_fifo_cc); + dev_kfree_skb_any(wi->skb); + + npkts++; + nbytes += wi->num_bytes; + continue; + } + + if (unlikely(mlx5e_ktls_tx_try_handle_resync_dump_comp(sq, wi, &dma_fifo_cc))) + continue; + + if (wi->num_fifo_pkts) { + mlx5e_tx_wi_dma_unmap(sq, wi, &dma_fifo_cc); + mlx5e_tx_wi_kfree_fifo_skbs(sq, wi); + + npkts += wi->num_fifo_pkts; + nbytes += wi->num_bytes; + } + } + + sq->dma_fifo_cc = dma_fifo_cc; + sq->cc = sqcc; + + netdev_tx_completed_queue(sq->txq, npkts, nbytes); +} + +#ifdef CONFIG_MLX5_CORE_IPOIB +static inline void +mlx5i_txwqe_build_datagram(struct mlx5_av *av, u32 dqpn, u32 dqkey, + struct mlx5_wqe_datagram_seg *dseg) +{ + memcpy(&dseg->av, av, sizeof(struct mlx5_av)); + dseg->av.dqp_dct = cpu_to_be32(dqpn | MLX5_EXTENDED_UD_AV); + dseg->av.key.qkey.qkey = cpu_to_be32(dqkey); +} + +static void mlx5i_sq_calc_wqe_attr(struct sk_buff *skb, + const struct mlx5e_tx_attr *attr, + struct mlx5e_tx_wqe_attr *wqe_attr) +{ + u16 ds_cnt = sizeof(struct mlx5i_tx_wqe) / MLX5_SEND_WQE_DS; + u16 ds_cnt_inl = 0; + + ds_cnt += !!attr->headlen + skb_shinfo(skb)->nr_frags; + + if (attr->ihs) { + u16 inl = attr->ihs - INL_HDR_START_SZ; + + ds_cnt_inl = DIV_ROUND_UP(inl, MLX5_SEND_WQE_DS); + ds_cnt += ds_cnt_inl; + } + + *wqe_attr = (struct mlx5e_tx_wqe_attr) { + .ds_cnt = ds_cnt, + .ds_cnt_inl = ds_cnt_inl, + .num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS), + }; +} + +void mlx5i_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb, + struct mlx5_av *av, u32 dqpn, u32 dqkey, bool xmit_more) +{ + struct mlx5e_tx_wqe_attr wqe_attr; + struct mlx5e_tx_attr attr; + struct mlx5i_tx_wqe *wqe; + + struct mlx5_wqe_datagram_seg *datagram; + struct mlx5_wqe_ctrl_seg *cseg; + struct mlx5_wqe_eth_seg *eseg; + struct mlx5_wqe_data_seg *dseg; + struct mlx5e_tx_wqe_info *wi; + + struct mlx5e_sq_stats *stats = sq->stats; + int num_dma; + u16 pi; + + mlx5e_sq_xmit_prepare(sq, skb, NULL, &attr); + mlx5i_sq_calc_wqe_attr(skb, &attr, &wqe_attr); + + pi = mlx5e_txqsq_get_next_pi(sq, wqe_attr.num_wqebbs); + wqe = MLX5I_SQ_FETCH_WQE(sq, pi); + + stats->xmit_more += xmit_more; + + /* fill wqe */ + wi = &sq->db.wqe_info[pi]; + cseg = &wqe->ctrl; + datagram = &wqe->datagram; + eseg = &wqe->eth; + dseg = wqe->data; + + mlx5i_txwqe_build_datagram(av, dqpn, dqkey, datagram); + + mlx5e_txwqe_build_eseg_csum(sq, skb, NULL, eseg); + + eseg->mss = attr.mss; + + if (attr.ihs) { + if (unlikely(attr.hopbyhop)) { + struct ipv6hdr *h6; + + /* remove the HBH header. + * Layout: [Ethernet header][IPv6 header][HBH][TCP header] + */ + unsafe_memcpy(eseg->inline_hdr.start, skb->data, + ETH_HLEN + sizeof(*h6), + MLX5_UNSAFE_MEMCPY_DISCLAIMER); + h6 = (struct ipv6hdr *)((char *)eseg->inline_hdr.start + ETH_HLEN); + h6->nexthdr = IPPROTO_TCP; + /* Copy the TCP header after the IPv6 one */ + unsafe_memcpy(h6 + 1, + skb->data + ETH_HLEN + sizeof(*h6) + + sizeof(struct hop_jumbo_hdr), + tcp_hdrlen(skb), + MLX5_UNSAFE_MEMCPY_DISCLAIMER); + /* Leave ipv6 payload_len set to 0, as LSO v2 specs request. */ + } else { + unsafe_memcpy(eseg->inline_hdr.start, skb->data, + attr.ihs, + MLX5_UNSAFE_MEMCPY_DISCLAIMER); + } + eseg->inline_hdr.sz = cpu_to_be16(attr.ihs); + dseg += wqe_attr.ds_cnt_inl; + } + + num_dma = mlx5e_txwqe_build_dsegs(sq, skb, skb->data + attr.ihs + attr.hopbyhop, + attr.headlen, dseg); + if (unlikely(num_dma < 0)) + goto err_drop; + + mlx5e_txwqe_complete(sq, skb, &attr, &wqe_attr, num_dma, wi, cseg, xmit_more); + + return; + +err_drop: + stats->dropped++; + dev_kfree_skb_any(skb); + mlx5e_tx_flush(sq); +} +#endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c new file mode 100644 index 000000000..44547b22a --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c @@ -0,0 +1,274 @@ +/* + * Copyright (c) 2015, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/irq.h> +#include <net/xdp_sock_drv.h> +#include "en.h" +#include "en/txrx.h" +#include "en/xdp.h" +#include "en/xsk/rx.h" +#include "en/xsk/tx.h" +#include "en_accel/ktls_txrx.h" + +static inline bool mlx5e_channel_no_affinity_change(struct mlx5e_channel *c) +{ + int current_cpu = smp_processor_id(); + + return cpumask_test_cpu(current_cpu, c->aff_mask); +} + +static void mlx5e_handle_tx_dim(struct mlx5e_txqsq *sq) +{ + struct mlx5e_sq_stats *stats = sq->stats; + struct dim_sample dim_sample = {}; + + if (unlikely(!test_bit(MLX5E_SQ_STATE_AM, &sq->state))) + return; + + dim_update_sample(sq->cq.event_ctr, stats->packets, stats->bytes, &dim_sample); + net_dim(&sq->dim, dim_sample); +} + +static void mlx5e_handle_rx_dim(struct mlx5e_rq *rq) +{ + struct mlx5e_rq_stats *stats = rq->stats; + struct dim_sample dim_sample = {}; + + if (unlikely(!test_bit(MLX5E_RQ_STATE_AM, &rq->state))) + return; + + dim_update_sample(rq->cq.event_ctr, stats->packets, stats->bytes, &dim_sample); + net_dim(&rq->dim, dim_sample); +} + +void mlx5e_trigger_irq(struct mlx5e_icosq *sq) +{ + struct mlx5_wq_cyc *wq = &sq->wq; + struct mlx5e_tx_wqe *nopwqe; + u16 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc); + + sq->db.wqe_info[pi] = (struct mlx5e_icosq_wqe_info) { + .wqe_type = MLX5E_ICOSQ_WQE_NOP, + .num_wqebbs = 1, + }; + + nopwqe = mlx5e_post_nop(wq, sq->sqn, &sq->pc); + mlx5e_notify_hw(wq, sq->pc, sq->uar_map, &nopwqe->ctrl); +} + +static bool mlx5e_napi_xsk_post(struct mlx5e_xdpsq *xsksq, struct mlx5e_rq *xskrq) +{ + bool need_wakeup = xsk_uses_need_wakeup(xskrq->xsk_pool); + bool busy_xsk = false, xsk_rx_alloc_err; + + /* If SQ is empty, there are no TX completions to trigger NAPI, so set + * need_wakeup. Do it before queuing packets for TX to avoid race + * condition with userspace. + */ + if (need_wakeup && xsksq->pc == xsksq->cc) + xsk_set_tx_need_wakeup(xsksq->xsk_pool); + busy_xsk |= mlx5e_xsk_tx(xsksq, MLX5E_TX_XSK_POLL_BUDGET); + /* If we queued some packets for TX, no need for wakeup anymore. */ + if (need_wakeup && xsksq->pc != xsksq->cc) + xsk_clear_tx_need_wakeup(xsksq->xsk_pool); + + /* If WQ is empty, RX won't trigger NAPI, so set need_wakeup. Do it + * before refilling to avoid race condition with userspace. + */ + if (need_wakeup && !mlx5e_rqwq_get_cur_sz(xskrq)) + xsk_set_rx_need_wakeup(xskrq->xsk_pool); + xsk_rx_alloc_err = INDIRECT_CALL_2(xskrq->post_wqes, + mlx5e_post_rx_mpwqes, + mlx5e_post_rx_wqes, + xskrq); + /* Ask for wakeup if WQ is not full after refill. */ + if (!need_wakeup) + busy_xsk |= xsk_rx_alloc_err; + else if (xsk_rx_alloc_err) + xsk_set_rx_need_wakeup(xskrq->xsk_pool); + else + xsk_clear_rx_need_wakeup(xskrq->xsk_pool); + + return busy_xsk; +} + +int mlx5e_napi_poll(struct napi_struct *napi, int budget) +{ + struct mlx5e_channel *c = container_of(napi, struct mlx5e_channel, + napi); + struct mlx5e_ch_stats *ch_stats = c->stats; + struct mlx5e_xdpsq *xsksq = &c->xsksq; + struct mlx5e_txqsq __rcu **qos_sqs; + struct mlx5e_rq *xskrq = &c->xskrq; + struct mlx5e_rq *rq = &c->rq; + bool aff_change = false; + bool busy_xsk = false; + bool busy = false; + int work_done = 0; + u16 qos_sqs_size; + bool xsk_open; + int i; + + rcu_read_lock(); + + qos_sqs = rcu_dereference(c->qos_sqs); + + xsk_open = test_bit(MLX5E_CHANNEL_STATE_XSK, c->state); + + ch_stats->poll++; + + for (i = 0; i < c->num_tc; i++) + busy |= mlx5e_poll_tx_cq(&c->sq[i].cq, budget); + + if (unlikely(qos_sqs)) { + smp_rmb(); /* Pairs with mlx5e_qos_alloc_queues. */ + qos_sqs_size = READ_ONCE(c->qos_sqs_size); + + for (i = 0; i < qos_sqs_size; i++) { + struct mlx5e_txqsq *sq = rcu_dereference(qos_sqs[i]); + + if (sq) + busy |= mlx5e_poll_tx_cq(&sq->cq, budget); + } + } + + /* budget=0 means we may be in IRQ context, do as little as possible */ + if (unlikely(!budget)) + goto out; + + busy |= mlx5e_poll_xdpsq_cq(&c->xdpsq.cq); + + if (c->xdp) + busy |= mlx5e_poll_xdpsq_cq(&c->rq_xdpsq.cq); + + if (xsk_open) + work_done = mlx5e_poll_rx_cq(&xskrq->cq, budget); + + if (likely(budget - work_done)) + work_done += mlx5e_poll_rx_cq(&rq->cq, budget - work_done); + + busy |= work_done == budget; + + mlx5e_poll_ico_cq(&c->icosq.cq); + if (mlx5e_poll_ico_cq(&c->async_icosq.cq)) + /* Don't clear the flag if nothing was polled to prevent + * queueing more WQEs and overflowing the async ICOSQ. + */ + clear_bit(MLX5E_SQ_STATE_PENDING_XSK_TX, &c->async_icosq.state); + + /* Keep after async ICOSQ CQ poll */ + if (unlikely(mlx5e_ktls_rx_pending_resync_list(c, budget))) + busy |= mlx5e_ktls_rx_handle_resync_list(c, budget); + + busy |= INDIRECT_CALL_2(rq->post_wqes, + mlx5e_post_rx_mpwqes, + mlx5e_post_rx_wqes, + rq); + if (xsk_open) { + busy |= mlx5e_poll_xdpsq_cq(&xsksq->cq); + busy_xsk |= mlx5e_napi_xsk_post(xsksq, xskrq); + } + + busy |= busy_xsk; + + if (busy) { + if (likely(mlx5e_channel_no_affinity_change(c))) { + work_done = budget; + goto out; + } + ch_stats->aff_change++; + aff_change = true; + if (budget && work_done == budget) + work_done--; + } + + if (unlikely(!napi_complete_done(napi, work_done))) + goto out; + + ch_stats->arm++; + + for (i = 0; i < c->num_tc; i++) { + mlx5e_handle_tx_dim(&c->sq[i]); + mlx5e_cq_arm(&c->sq[i].cq); + } + if (unlikely(qos_sqs)) { + for (i = 0; i < qos_sqs_size; i++) { + struct mlx5e_txqsq *sq = rcu_dereference(qos_sqs[i]); + + if (sq) { + mlx5e_handle_tx_dim(sq); + mlx5e_cq_arm(&sq->cq); + } + } + } + + mlx5e_handle_rx_dim(rq); + + mlx5e_cq_arm(&rq->cq); + mlx5e_cq_arm(&c->icosq.cq); + mlx5e_cq_arm(&c->async_icosq.cq); + mlx5e_cq_arm(&c->xdpsq.cq); + + if (xsk_open) { + mlx5e_handle_rx_dim(xskrq); + mlx5e_cq_arm(&xsksq->cq); + mlx5e_cq_arm(&xskrq->cq); + } + + if (unlikely(aff_change && busy_xsk)) { + mlx5e_trigger_irq(&c->icosq); + ch_stats->force_irq++; + } + +out: + rcu_read_unlock(); + + return work_done; +} + +void mlx5e_completion_event(struct mlx5_core_cq *mcq, struct mlx5_eqe *eqe) +{ + struct mlx5e_cq *cq = container_of(mcq, struct mlx5e_cq, mcq); + + napi_schedule(cq->napi); + cq->event_ctr++; + cq->ch_stats->events++; +} + +void mlx5e_cq_error_event(struct mlx5_core_cq *mcq, enum mlx5_event event) +{ + struct mlx5e_cq *cq = container_of(mcq, struct mlx5e_cq, mcq); + struct net_device *netdev = cq->netdev; + + netdev_err(netdev, "%s: cqn=0x%.6x event=0x%.2x\n", + __func__, mcq->cqn, event); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c new file mode 100644 index 000000000..e112b5685 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -0,0 +1,1150 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* + * Copyright (c) 2013-2021, Mellanox Technologies inc. All rights reserved. + */ + +#include <linux/interrupt.h> +#include <linux/notifier.h> +#include <linux/mlx5/driver.h> +#include <linux/mlx5/vport.h> +#include <linux/mlx5/eq.h> +#ifdef CONFIG_RFS_ACCEL +#include <linux/cpu_rmap.h> +#endif +#include "mlx5_core.h" +#include "lib/eq.h" +#include "fpga/core.h" +#include "eswitch.h" +#include "lib/clock.h" +#include "diag/fw_tracer.h" +#include "mlx5_irq.h" +#include "devlink.h" + +enum { + MLX5_EQE_OWNER_INIT_VAL = 0x1, +}; + +enum { + MLX5_EQ_STATE_ARMED = 0x9, + MLX5_EQ_STATE_FIRED = 0xa, + MLX5_EQ_STATE_ALWAYS_ARMED = 0xb, +}; + +enum { + MLX5_EQ_DOORBEL_OFFSET = 0x40, +}; + +/* budget must be smaller than MLX5_NUM_SPARE_EQE to guarantee that we update + * the ci before we polled all the entries in the EQ. MLX5_NUM_SPARE_EQE is + * used to set the EQ size, budget must be smaller than the EQ size. + */ +enum { + MLX5_EQ_POLLING_BUDGET = 128, +}; + +static_assert(MLX5_EQ_POLLING_BUDGET <= MLX5_NUM_SPARE_EQE); + +struct mlx5_eq_table { + struct list_head comp_eqs_list; + struct mlx5_eq_async pages_eq; + struct mlx5_eq_async cmd_eq; + struct mlx5_eq_async async_eq; + + struct atomic_notifier_head nh[MLX5_EVENT_TYPE_MAX]; + + /* Since CQ DB is stored in async_eq */ + struct mlx5_nb cq_err_nb; + + struct mutex lock; /* sync async eqs creations */ + int num_comp_eqs; + struct mlx5_irq_table *irq_table; + struct mlx5_irq **comp_irqs; + struct mlx5_irq *ctrl_irq; +#ifdef CONFIG_RFS_ACCEL + struct cpu_rmap *rmap; +#endif +}; + +#define MLX5_ASYNC_EVENT_MASK ((1ull << MLX5_EVENT_TYPE_PATH_MIG) | \ + (1ull << MLX5_EVENT_TYPE_COMM_EST) | \ + (1ull << MLX5_EVENT_TYPE_SQ_DRAINED) | \ + (1ull << MLX5_EVENT_TYPE_CQ_ERROR) | \ + (1ull << MLX5_EVENT_TYPE_WQ_CATAS_ERROR) | \ + (1ull << MLX5_EVENT_TYPE_PATH_MIG_FAILED) | \ + (1ull << MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR) | \ + (1ull << MLX5_EVENT_TYPE_WQ_ACCESS_ERROR) | \ + (1ull << MLX5_EVENT_TYPE_PORT_CHANGE) | \ + (1ull << MLX5_EVENT_TYPE_SRQ_CATAS_ERROR) | \ + (1ull << MLX5_EVENT_TYPE_SRQ_LAST_WQE) | \ + (1ull << MLX5_EVENT_TYPE_SRQ_RQ_LIMIT)) + +static int mlx5_cmd_destroy_eq(struct mlx5_core_dev *dev, u8 eqn) +{ + u32 in[MLX5_ST_SZ_DW(destroy_eq_in)] = {}; + + MLX5_SET(destroy_eq_in, in, opcode, MLX5_CMD_OP_DESTROY_EQ); + MLX5_SET(destroy_eq_in, in, eq_number, eqn); + return mlx5_cmd_exec_in(dev, destroy_eq, in); +} + +/* caller must eventually call mlx5_cq_put on the returned cq */ +static struct mlx5_core_cq *mlx5_eq_cq_get(struct mlx5_eq *eq, u32 cqn) +{ + struct mlx5_cq_table *table = &eq->cq_table; + struct mlx5_core_cq *cq = NULL; + + rcu_read_lock(); + cq = radix_tree_lookup(&table->tree, cqn); + if (likely(cq)) + mlx5_cq_hold(cq); + rcu_read_unlock(); + + return cq; +} + +static int mlx5_eq_comp_int(struct notifier_block *nb, + __always_unused unsigned long action, + __always_unused void *data) +{ + struct mlx5_eq_comp *eq_comp = + container_of(nb, struct mlx5_eq_comp, irq_nb); + struct mlx5_eq *eq = &eq_comp->core; + struct mlx5_eqe *eqe; + int num_eqes = 0; + u32 cqn = -1; + + eqe = next_eqe_sw(eq); + if (!eqe) + goto out; + + do { + struct mlx5_core_cq *cq; + + /* Make sure we read EQ entry contents after we've + * checked the ownership bit. + */ + dma_rmb(); + /* Assume (eqe->type) is always MLX5_EVENT_TYPE_COMP */ + cqn = be32_to_cpu(eqe->data.comp.cqn) & 0xffffff; + + cq = mlx5_eq_cq_get(eq, cqn); + if (likely(cq)) { + ++cq->arm_sn; + cq->comp(cq, eqe); + mlx5_cq_put(cq); + } else { + dev_dbg_ratelimited(eq->dev->device, + "Completion event for bogus CQ 0x%x\n", cqn); + } + + ++eq->cons_index; + + } while ((++num_eqes < MLX5_EQ_POLLING_BUDGET) && (eqe = next_eqe_sw(eq))); + +out: + eq_update_ci(eq, 1); + + if (cqn != -1) + tasklet_schedule(&eq_comp->tasklet_ctx.task); + + return 0; +} + +/* Some architectures don't latch interrupts when they are disabled, so using + * mlx5_eq_poll_irq_disabled could end up losing interrupts while trying to + * avoid losing them. It is not recommended to use it, unless this is the last + * resort. + */ +u32 mlx5_eq_poll_irq_disabled(struct mlx5_eq_comp *eq) +{ + u32 count_eqe; + + disable_irq(eq->core.irqn); + count_eqe = eq->core.cons_index; + mlx5_eq_comp_int(&eq->irq_nb, 0, NULL); + count_eqe = eq->core.cons_index - count_eqe; + enable_irq(eq->core.irqn); + + return count_eqe; +} + +static void mlx5_eq_async_int_lock(struct mlx5_eq_async *eq, bool recovery, + unsigned long *flags) + __acquires(&eq->lock) +{ + if (!recovery) + spin_lock(&eq->lock); + else + spin_lock_irqsave(&eq->lock, *flags); +} + +static void mlx5_eq_async_int_unlock(struct mlx5_eq_async *eq, bool recovery, + unsigned long *flags) + __releases(&eq->lock) +{ + if (!recovery) + spin_unlock(&eq->lock); + else + spin_unlock_irqrestore(&eq->lock, *flags); +} + +enum async_eq_nb_action { + ASYNC_EQ_IRQ_HANDLER = 0, + ASYNC_EQ_RECOVER = 1, +}; + +static int mlx5_eq_async_int(struct notifier_block *nb, + unsigned long action, void *data) +{ + struct mlx5_eq_async *eq_async = + container_of(nb, struct mlx5_eq_async, irq_nb); + struct mlx5_eq *eq = &eq_async->core; + struct mlx5_eq_table *eqt; + struct mlx5_core_dev *dev; + struct mlx5_eqe *eqe; + unsigned long flags; + int num_eqes = 0; + bool recovery; + + dev = eq->dev; + eqt = dev->priv.eq_table; + + recovery = action == ASYNC_EQ_RECOVER; + mlx5_eq_async_int_lock(eq_async, recovery, &flags); + + eqe = next_eqe_sw(eq); + if (!eqe) + goto out; + + do { + /* + * Make sure we read EQ entry contents after we've + * checked the ownership bit. + */ + dma_rmb(); + + atomic_notifier_call_chain(&eqt->nh[eqe->type], eqe->type, eqe); + atomic_notifier_call_chain(&eqt->nh[MLX5_EVENT_TYPE_NOTIFY_ANY], eqe->type, eqe); + + ++eq->cons_index; + + } while ((++num_eqes < MLX5_EQ_POLLING_BUDGET) && (eqe = next_eqe_sw(eq))); + +out: + eq_update_ci(eq, 1); + mlx5_eq_async_int_unlock(eq_async, recovery, &flags); + + return unlikely(recovery) ? num_eqes : 0; +} + +void mlx5_cmd_eq_recover(struct mlx5_core_dev *dev) +{ + struct mlx5_eq_async *eq = &dev->priv.eq_table->cmd_eq; + int eqes; + + eqes = mlx5_eq_async_int(&eq->irq_nb, ASYNC_EQ_RECOVER, NULL); + if (eqes) + mlx5_core_warn(dev, "Recovered %d EQEs on cmd_eq\n", eqes); +} + +static void init_eq_buf(struct mlx5_eq *eq) +{ + struct mlx5_eqe *eqe; + int i; + + for (i = 0; i < eq_get_size(eq); i++) { + eqe = get_eqe(eq, i); + eqe->owner = MLX5_EQE_OWNER_INIT_VAL; + } +} + +static int +create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, + struct mlx5_eq_param *param) +{ + u8 log_eq_size = order_base_2(param->nent + MLX5_NUM_SPARE_EQE); + struct mlx5_cq_table *cq_table = &eq->cq_table; + u32 out[MLX5_ST_SZ_DW(create_eq_out)] = {0}; + u8 log_eq_stride = ilog2(MLX5_EQE_SIZE); + struct mlx5_priv *priv = &dev->priv; + __be64 *pas; + u16 vecidx; + void *eqc; + int inlen; + u32 *in; + int err; + int i; + + /* Init CQ table */ + memset(cq_table, 0, sizeof(*cq_table)); + spin_lock_init(&cq_table->lock); + INIT_RADIX_TREE(&cq_table->tree, GFP_ATOMIC); + + eq->cons_index = 0; + + err = mlx5_frag_buf_alloc_node(dev, wq_get_byte_sz(log_eq_size, log_eq_stride), + &eq->frag_buf, dev->priv.numa_node); + if (err) + return err; + + mlx5_init_fbc(eq->frag_buf.frags, log_eq_stride, log_eq_size, &eq->fbc); + init_eq_buf(eq); + + eq->irq = param->irq; + vecidx = mlx5_irq_get_index(eq->irq); + + inlen = MLX5_ST_SZ_BYTES(create_eq_in) + + MLX5_FLD_SZ_BYTES(create_eq_in, pas[0]) * eq->frag_buf.npages; + + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) { + err = -ENOMEM; + goto err_buf; + } + + pas = (__be64 *)MLX5_ADDR_OF(create_eq_in, in, pas); + mlx5_fill_page_frag_array(&eq->frag_buf, pas); + + MLX5_SET(create_eq_in, in, opcode, MLX5_CMD_OP_CREATE_EQ); + if (!param->mask[0] && MLX5_CAP_GEN(dev, log_max_uctx)) + MLX5_SET(create_eq_in, in, uid, MLX5_SHARED_RESOURCE_UID); + + for (i = 0; i < 4; i++) + MLX5_ARRAY_SET64(create_eq_in, in, event_bitmask, i, + param->mask[i]); + + eqc = MLX5_ADDR_OF(create_eq_in, in, eq_context_entry); + MLX5_SET(eqc, eqc, log_eq_size, eq->fbc.log_sz); + MLX5_SET(eqc, eqc, uar_page, priv->uar->index); + MLX5_SET(eqc, eqc, intr, vecidx); + MLX5_SET(eqc, eqc, log_page_size, + eq->frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT); + + err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); + if (err) + goto err_in; + + eq->vecidx = vecidx; + eq->eqn = MLX5_GET(create_eq_out, out, eq_number); + eq->irqn = pci_irq_vector(dev->pdev, vecidx); + eq->dev = dev; + eq->doorbell = priv->uar->map + MLX5_EQ_DOORBEL_OFFSET; + + err = mlx5_debug_eq_add(dev, eq); + if (err) + goto err_eq; + + kvfree(in); + return 0; + +err_eq: + mlx5_cmd_destroy_eq(dev, eq->eqn); + +err_in: + kvfree(in); + +err_buf: + mlx5_frag_buf_free(dev, &eq->frag_buf); + return err; +} + +/** + * mlx5_eq_enable - Enable EQ for receiving EQEs + * @dev : Device which owns the eq + * @eq : EQ to enable + * @nb : Notifier call block + * + * Must be called after EQ is created in device. + * + * @return: 0 if no error + */ +int mlx5_eq_enable(struct mlx5_core_dev *dev, struct mlx5_eq *eq, + struct notifier_block *nb) +{ + int err; + + err = mlx5_irq_attach_nb(eq->irq, nb); + if (!err) + eq_update_ci(eq, 1); + + return err; +} +EXPORT_SYMBOL(mlx5_eq_enable); + +/** + * mlx5_eq_disable - Disable EQ for receiving EQEs + * @dev : Device which owns the eq + * @eq : EQ to disable + * @nb : Notifier call block + * + * Must be called before EQ is destroyed. + */ +void mlx5_eq_disable(struct mlx5_core_dev *dev, struct mlx5_eq *eq, + struct notifier_block *nb) +{ + mlx5_irq_detach_nb(eq->irq, nb); +} +EXPORT_SYMBOL(mlx5_eq_disable); + +static int destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq) +{ + int err; + + mlx5_debug_eq_remove(dev, eq); + + err = mlx5_cmd_destroy_eq(dev, eq->eqn); + if (err) + mlx5_core_warn(dev, "failed to destroy a previously created eq: eqn %d\n", + eq->eqn); + + mlx5_frag_buf_free(dev, &eq->frag_buf); + return err; +} + +int mlx5_eq_add_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq) +{ + struct mlx5_cq_table *table = &eq->cq_table; + int err; + + spin_lock(&table->lock); + err = radix_tree_insert(&table->tree, cq->cqn, cq); + spin_unlock(&table->lock); + + return err; +} + +void mlx5_eq_del_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq) +{ + struct mlx5_cq_table *table = &eq->cq_table; + struct mlx5_core_cq *tmp; + + spin_lock(&table->lock); + tmp = radix_tree_delete(&table->tree, cq->cqn); + spin_unlock(&table->lock); + + if (!tmp) { + mlx5_core_dbg(eq->dev, "cq 0x%x not found in eq 0x%x tree\n", + eq->eqn, cq->cqn); + return; + } + + if (tmp != cq) + mlx5_core_dbg(eq->dev, "corruption on cqn 0x%x in eq 0x%x\n", + eq->eqn, cq->cqn); +} + +int mlx5_eq_table_init(struct mlx5_core_dev *dev) +{ + struct mlx5_eq_table *eq_table; + int i; + + eq_table = kvzalloc_node(sizeof(*eq_table), GFP_KERNEL, + dev->priv.numa_node); + if (!eq_table) + return -ENOMEM; + + dev->priv.eq_table = eq_table; + + mlx5_eq_debugfs_init(dev); + + mutex_init(&eq_table->lock); + for (i = 0; i < MLX5_EVENT_TYPE_MAX; i++) + ATOMIC_INIT_NOTIFIER_HEAD(&eq_table->nh[i]); + + eq_table->irq_table = mlx5_irq_table_get(dev); + return 0; +} + +void mlx5_eq_table_cleanup(struct mlx5_core_dev *dev) +{ + mlx5_eq_debugfs_cleanup(dev); + kvfree(dev->priv.eq_table); +} + +/* Async EQs */ + +static int create_async_eq(struct mlx5_core_dev *dev, + struct mlx5_eq *eq, struct mlx5_eq_param *param) +{ + struct mlx5_eq_table *eq_table = dev->priv.eq_table; + int err; + + mutex_lock(&eq_table->lock); + err = create_map_eq(dev, eq, param); + mutex_unlock(&eq_table->lock); + return err; +} + +static int destroy_async_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq) +{ + struct mlx5_eq_table *eq_table = dev->priv.eq_table; + int err; + + mutex_lock(&eq_table->lock); + err = destroy_unmap_eq(dev, eq); + mutex_unlock(&eq_table->lock); + return err; +} + +static int cq_err_event_notifier(struct notifier_block *nb, + unsigned long type, void *data) +{ + struct mlx5_eq_table *eqt; + struct mlx5_core_cq *cq; + struct mlx5_eqe *eqe; + struct mlx5_eq *eq; + u32 cqn; + + /* type == MLX5_EVENT_TYPE_CQ_ERROR */ + + eqt = mlx5_nb_cof(nb, struct mlx5_eq_table, cq_err_nb); + eq = &eqt->async_eq.core; + eqe = data; + + cqn = be32_to_cpu(eqe->data.cq_err.cqn) & 0xffffff; + mlx5_core_warn(eq->dev, "CQ error on CQN 0x%x, syndrome 0x%x\n", + cqn, eqe->data.cq_err.syndrome); + + cq = mlx5_eq_cq_get(eq, cqn); + if (unlikely(!cq)) { + mlx5_core_warn(eq->dev, "Async event for bogus CQ 0x%x\n", cqn); + return NOTIFY_OK; + } + + if (cq->event) + cq->event(cq, type); + + mlx5_cq_put(cq); + + return NOTIFY_OK; +} + +static void gather_user_async_events(struct mlx5_core_dev *dev, u64 mask[4]) +{ + __be64 *user_unaffiliated_events; + __be64 *user_affiliated_events; + int i; + + user_affiliated_events = + MLX5_CAP_DEV_EVENT(dev, user_affiliated_events); + user_unaffiliated_events = + MLX5_CAP_DEV_EVENT(dev, user_unaffiliated_events); + + for (i = 0; i < 4; i++) + mask[i] |= be64_to_cpu(user_affiliated_events[i] | + user_unaffiliated_events[i]); +} + +static void gather_async_events_mask(struct mlx5_core_dev *dev, u64 mask[4]) +{ + u64 async_event_mask = MLX5_ASYNC_EVENT_MASK; + + if (MLX5_VPORT_MANAGER(dev)) + async_event_mask |= (1ull << MLX5_EVENT_TYPE_NIC_VPORT_CHANGE); + + if (MLX5_CAP_GEN(dev, general_notification_event)) + async_event_mask |= (1ull << MLX5_EVENT_TYPE_GENERAL_EVENT); + + if (MLX5_CAP_GEN(dev, port_module_event)) + async_event_mask |= (1ull << MLX5_EVENT_TYPE_PORT_MODULE_EVENT); + else + mlx5_core_dbg(dev, "port_module_event is not set\n"); + + if (MLX5_PPS_CAP(dev)) + async_event_mask |= (1ull << MLX5_EVENT_TYPE_PPS_EVENT); + + if (MLX5_CAP_GEN(dev, fpga)) + async_event_mask |= (1ull << MLX5_EVENT_TYPE_FPGA_ERROR) | + (1ull << MLX5_EVENT_TYPE_FPGA_QP_ERROR); + if (MLX5_CAP_GEN_MAX(dev, dct)) + async_event_mask |= (1ull << MLX5_EVENT_TYPE_DCT_DRAINED); + + if (MLX5_CAP_GEN(dev, temp_warn_event)) + async_event_mask |= (1ull << MLX5_EVENT_TYPE_TEMP_WARN_EVENT); + + if (MLX5_CAP_MCAM_REG(dev, tracer_registers)) + async_event_mask |= (1ull << MLX5_EVENT_TYPE_DEVICE_TRACER); + + if (MLX5_CAP_GEN(dev, max_num_of_monitor_counters)) + async_event_mask |= (1ull << MLX5_EVENT_TYPE_MONITOR_COUNTER); + + if (mlx5_eswitch_is_funcs_handler(dev)) + async_event_mask |= + (1ull << MLX5_EVENT_TYPE_ESW_FUNCTIONS_CHANGED); + + if (MLX5_CAP_GEN_MAX(dev, vhca_state)) + async_event_mask |= (1ull << MLX5_EVENT_TYPE_VHCA_STATE_CHANGE); + + if (MLX5_CAP_MACSEC(dev, log_max_macsec_offload)) + async_event_mask |= (1ull << MLX5_EVENT_TYPE_OBJECT_CHANGE); + + mask[0] = async_event_mask; + + if (MLX5_CAP_GEN(dev, event_cap)) + gather_user_async_events(dev, mask); +} + +static int +setup_async_eq(struct mlx5_core_dev *dev, struct mlx5_eq_async *eq, + struct mlx5_eq_param *param, const char *name) +{ + int err; + + eq->irq_nb.notifier_call = mlx5_eq_async_int; + spin_lock_init(&eq->lock); + + err = create_async_eq(dev, &eq->core, param); + if (err) { + mlx5_core_warn(dev, "failed to create %s EQ %d\n", name, err); + return err; + } + err = mlx5_eq_enable(dev, &eq->core, &eq->irq_nb); + if (err) { + mlx5_core_warn(dev, "failed to enable %s EQ %d\n", name, err); + destroy_async_eq(dev, &eq->core); + } + return err; +} + +static void cleanup_async_eq(struct mlx5_core_dev *dev, + struct mlx5_eq_async *eq, const char *name) +{ + int err; + + mlx5_eq_disable(dev, &eq->core, &eq->irq_nb); + err = destroy_async_eq(dev, &eq->core); + if (err) + mlx5_core_err(dev, "failed to destroy %s eq, err(%d)\n", + name, err); +} + +static u16 async_eq_depth_devlink_param_get(struct mlx5_core_dev *dev) +{ + struct devlink *devlink = priv_to_devlink(dev); + union devlink_param_value val; + int err; + + err = devlink_param_driverinit_value_get(devlink, + DEVLINK_PARAM_GENERIC_ID_EVENT_EQ_SIZE, + &val); + if (!err) + return val.vu32; + mlx5_core_dbg(dev, "Failed to get param. using default. err = %d\n", err); + return MLX5_NUM_ASYNC_EQE; +} +static int create_async_eqs(struct mlx5_core_dev *dev) +{ + struct mlx5_eq_table *table = dev->priv.eq_table; + struct mlx5_eq_param param = {}; + int err; + + /* All the async_eqs are using single IRQ, request one IRQ and share its + * index among all the async_eqs of this device. + */ + table->ctrl_irq = mlx5_ctrl_irq_request(dev); + if (IS_ERR(table->ctrl_irq)) + return PTR_ERR(table->ctrl_irq); + + MLX5_NB_INIT(&table->cq_err_nb, cq_err_event_notifier, CQ_ERROR); + mlx5_eq_notifier_register(dev, &table->cq_err_nb); + + param = (struct mlx5_eq_param) { + .irq = table->ctrl_irq, + .nent = MLX5_NUM_CMD_EQE, + .mask[0] = 1ull << MLX5_EVENT_TYPE_CMD, + }; + mlx5_cmd_allowed_opcode(dev, MLX5_CMD_OP_CREATE_EQ); + err = setup_async_eq(dev, &table->cmd_eq, ¶m, "cmd"); + if (err) + goto err1; + + mlx5_cmd_use_events(dev); + mlx5_cmd_allowed_opcode(dev, CMD_ALLOWED_OPCODE_ALL); + + param = (struct mlx5_eq_param) { + .irq = table->ctrl_irq, + .nent = async_eq_depth_devlink_param_get(dev), + }; + + gather_async_events_mask(dev, param.mask); + err = setup_async_eq(dev, &table->async_eq, ¶m, "async"); + if (err) + goto err2; + + param = (struct mlx5_eq_param) { + .irq = table->ctrl_irq, + .nent = /* TODO: sriov max_vf + */ 1, + .mask[0] = 1ull << MLX5_EVENT_TYPE_PAGE_REQUEST, + }; + + err = setup_async_eq(dev, &table->pages_eq, ¶m, "pages"); + if (err) + goto err3; + + return 0; + +err3: + cleanup_async_eq(dev, &table->async_eq, "async"); +err2: + mlx5_cmd_use_polling(dev); + cleanup_async_eq(dev, &table->cmd_eq, "cmd"); +err1: + mlx5_cmd_allowed_opcode(dev, CMD_ALLOWED_OPCODE_ALL); + mlx5_eq_notifier_unregister(dev, &table->cq_err_nb); + mlx5_ctrl_irq_release(table->ctrl_irq); + return err; +} + +static void destroy_async_eqs(struct mlx5_core_dev *dev) +{ + struct mlx5_eq_table *table = dev->priv.eq_table; + + cleanup_async_eq(dev, &table->pages_eq, "pages"); + cleanup_async_eq(dev, &table->async_eq, "async"); + mlx5_cmd_allowed_opcode(dev, MLX5_CMD_OP_DESTROY_EQ); + mlx5_cmd_use_polling(dev); + cleanup_async_eq(dev, &table->cmd_eq, "cmd"); + mlx5_cmd_allowed_opcode(dev, CMD_ALLOWED_OPCODE_ALL); + mlx5_eq_notifier_unregister(dev, &table->cq_err_nb); + mlx5_ctrl_irq_release(table->ctrl_irq); +} + +struct mlx5_eq *mlx5_get_async_eq(struct mlx5_core_dev *dev) +{ + return &dev->priv.eq_table->async_eq.core; +} + +void mlx5_eq_synchronize_async_irq(struct mlx5_core_dev *dev) +{ + synchronize_irq(dev->priv.eq_table->async_eq.core.irqn); +} + +void mlx5_eq_synchronize_cmd_irq(struct mlx5_core_dev *dev) +{ + synchronize_irq(dev->priv.eq_table->cmd_eq.core.irqn); +} + +/* Generic EQ API for mlx5_core consumers + * Needed For RDMA ODP EQ for now + */ +struct mlx5_eq * +mlx5_eq_create_generic(struct mlx5_core_dev *dev, + struct mlx5_eq_param *param) +{ + struct mlx5_eq *eq = kvzalloc_node(sizeof(*eq), GFP_KERNEL, + dev->priv.numa_node); + int err; + + if (!eq) + return ERR_PTR(-ENOMEM); + + param->irq = dev->priv.eq_table->ctrl_irq; + err = create_async_eq(dev, eq, param); + if (err) { + kvfree(eq); + eq = ERR_PTR(err); + } + + return eq; +} +EXPORT_SYMBOL(mlx5_eq_create_generic); + +int mlx5_eq_destroy_generic(struct mlx5_core_dev *dev, struct mlx5_eq *eq) +{ + int err; + + if (IS_ERR(eq)) + return -EINVAL; + + err = destroy_async_eq(dev, eq); + if (err) + goto out; + + kvfree(eq); +out: + return err; +} +EXPORT_SYMBOL(mlx5_eq_destroy_generic); + +struct mlx5_eqe *mlx5_eq_get_eqe(struct mlx5_eq *eq, u32 cc) +{ + u32 ci = eq->cons_index + cc; + u32 nent = eq_get_size(eq); + struct mlx5_eqe *eqe; + + eqe = get_eqe(eq, ci & (nent - 1)); + eqe = ((eqe->owner & 1) ^ !!(ci & nent)) ? NULL : eqe; + /* Make sure we read EQ entry contents after we've + * checked the ownership bit. + */ + if (eqe) + dma_rmb(); + + return eqe; +} +EXPORT_SYMBOL(mlx5_eq_get_eqe); + +void mlx5_eq_update_ci(struct mlx5_eq *eq, u32 cc, bool arm) +{ + __be32 __iomem *addr = eq->doorbell + (arm ? 0 : 2); + u32 val; + + eq->cons_index += cc; + val = (eq->cons_index & 0xffffff) | (eq->eqn << 24); + + __raw_writel((__force u32)cpu_to_be32(val), addr); + /* We still want ordering, just not swabbing, so add a barrier */ + wmb(); +} +EXPORT_SYMBOL(mlx5_eq_update_ci); + +static void comp_irqs_release(struct mlx5_core_dev *dev) +{ + struct mlx5_eq_table *table = dev->priv.eq_table; + + if (mlx5_core_is_sf(dev)) + mlx5_irq_affinity_irqs_release(dev, table->comp_irqs, table->num_comp_eqs); + else + mlx5_irqs_release_vectors(table->comp_irqs, table->num_comp_eqs); + kfree(table->comp_irqs); +} + +static int comp_irqs_request(struct mlx5_core_dev *dev) +{ + struct mlx5_eq_table *table = dev->priv.eq_table; + int ncomp_eqs = table->num_comp_eqs; + u16 *cpus; + int ret; + int i; + + ncomp_eqs = table->num_comp_eqs; + table->comp_irqs = kcalloc(ncomp_eqs, sizeof(*table->comp_irqs), GFP_KERNEL); + if (!table->comp_irqs) + return -ENOMEM; + if (mlx5_core_is_sf(dev)) { + ret = mlx5_irq_affinity_irqs_request_auto(dev, ncomp_eqs, table->comp_irqs); + if (ret < 0) + goto free_irqs; + return ret; + } + + cpus = kcalloc(ncomp_eqs, sizeof(*cpus), GFP_KERNEL); + if (!cpus) { + ret = -ENOMEM; + goto free_irqs; + } + for (i = 0; i < ncomp_eqs; i++) + cpus[i] = cpumask_local_spread(i, dev->priv.numa_node); + ret = mlx5_irqs_request_vectors(dev, cpus, ncomp_eqs, table->comp_irqs); + kfree(cpus); + if (ret < 0) + goto free_irqs; + return ret; + +free_irqs: + kfree(table->comp_irqs); + return ret; +} + +static void destroy_comp_eqs(struct mlx5_core_dev *dev) +{ + struct mlx5_eq_table *table = dev->priv.eq_table; + struct mlx5_eq_comp *eq, *n; + + list_for_each_entry_safe(eq, n, &table->comp_eqs_list, list) { + list_del(&eq->list); + mlx5_eq_disable(dev, &eq->core, &eq->irq_nb); + if (destroy_unmap_eq(dev, &eq->core)) + mlx5_core_warn(dev, "failed to destroy comp EQ 0x%x\n", + eq->core.eqn); + tasklet_disable(&eq->tasklet_ctx.task); + kfree(eq); + } + comp_irqs_release(dev); +} + +static u16 comp_eq_depth_devlink_param_get(struct mlx5_core_dev *dev) +{ + struct devlink *devlink = priv_to_devlink(dev); + union devlink_param_value val; + int err; + + err = devlink_param_driverinit_value_get(devlink, + DEVLINK_PARAM_GENERIC_ID_IO_EQ_SIZE, + &val); + if (!err) + return val.vu32; + mlx5_core_dbg(dev, "Failed to get param. using default. err = %d\n", err); + return MLX5_COMP_EQ_SIZE; +} + +static int create_comp_eqs(struct mlx5_core_dev *dev) +{ + struct mlx5_eq_table *table = dev->priv.eq_table; + struct mlx5_eq_comp *eq; + int ncomp_eqs; + int nent; + int err; + int i; + + ncomp_eqs = comp_irqs_request(dev); + if (ncomp_eqs < 0) + return ncomp_eqs; + INIT_LIST_HEAD(&table->comp_eqs_list); + nent = comp_eq_depth_devlink_param_get(dev); + + for (i = 0; i < ncomp_eqs; i++) { + struct mlx5_eq_param param = {}; + + eq = kzalloc_node(sizeof(*eq), GFP_KERNEL, dev->priv.numa_node); + if (!eq) { + err = -ENOMEM; + goto clean; + } + + INIT_LIST_HEAD(&eq->tasklet_ctx.list); + INIT_LIST_HEAD(&eq->tasklet_ctx.process_list); + spin_lock_init(&eq->tasklet_ctx.lock); + tasklet_setup(&eq->tasklet_ctx.task, mlx5_cq_tasklet_cb); + + eq->irq_nb.notifier_call = mlx5_eq_comp_int; + param = (struct mlx5_eq_param) { + .irq = table->comp_irqs[i], + .nent = nent, + }; + + err = create_map_eq(dev, &eq->core, ¶m); + if (err) + goto clean_eq; + err = mlx5_eq_enable(dev, &eq->core, &eq->irq_nb); + if (err) { + destroy_unmap_eq(dev, &eq->core); + goto clean_eq; + } + + mlx5_core_dbg(dev, "allocated completion EQN %d\n", eq->core.eqn); + /* add tail, to keep the list ordered, for mlx5_vector2eqn to work */ + list_add_tail(&eq->list, &table->comp_eqs_list); + } + + table->num_comp_eqs = ncomp_eqs; + return 0; + +clean_eq: + kfree(eq); +clean: + destroy_comp_eqs(dev); + return err; +} + +static int vector2eqnirqn(struct mlx5_core_dev *dev, int vector, int *eqn, + unsigned int *irqn) +{ + struct mlx5_eq_table *table = dev->priv.eq_table; + struct mlx5_eq_comp *eq, *n; + int err = -ENOENT; + int i = 0; + + list_for_each_entry_safe(eq, n, &table->comp_eqs_list, list) { + if (i++ == vector) { + if (irqn) + *irqn = eq->core.irqn; + if (eqn) + *eqn = eq->core.eqn; + err = 0; + break; + } + } + + return err; +} + +int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn) +{ + return vector2eqnirqn(dev, vector, eqn, NULL); +} +EXPORT_SYMBOL(mlx5_vector2eqn); + +int mlx5_vector2irqn(struct mlx5_core_dev *dev, int vector, unsigned int *irqn) +{ + return vector2eqnirqn(dev, vector, NULL, irqn); +} + +unsigned int mlx5_comp_vectors_count(struct mlx5_core_dev *dev) +{ + return dev->priv.eq_table->num_comp_eqs; +} +EXPORT_SYMBOL(mlx5_comp_vectors_count); + +struct cpumask * +mlx5_comp_irq_get_affinity_mask(struct mlx5_core_dev *dev, int vector) +{ + struct mlx5_eq_table *table = dev->priv.eq_table; + struct mlx5_eq_comp *eq, *n; + int i = 0; + + list_for_each_entry_safe(eq, n, &table->comp_eqs_list, list) { + if (i++ == vector) + break; + } + + return mlx5_irq_get_affinity_mask(eq->core.irq); +} +EXPORT_SYMBOL(mlx5_comp_irq_get_affinity_mask); + +#ifdef CONFIG_RFS_ACCEL +struct cpu_rmap *mlx5_eq_table_get_rmap(struct mlx5_core_dev *dev) +{ + return dev->priv.eq_table->rmap; +} +#endif + +struct mlx5_eq_comp *mlx5_eqn2comp_eq(struct mlx5_core_dev *dev, int eqn) +{ + struct mlx5_eq_table *table = dev->priv.eq_table; + struct mlx5_eq_comp *eq; + + list_for_each_entry(eq, &table->comp_eqs_list, list) { + if (eq->core.eqn == eqn) + return eq; + } + + return ERR_PTR(-ENOENT); +} + +static void clear_rmap(struct mlx5_core_dev *dev) +{ +#ifdef CONFIG_RFS_ACCEL + struct mlx5_eq_table *eq_table = dev->priv.eq_table; + + free_irq_cpu_rmap(eq_table->rmap); +#endif +} + +static int set_rmap(struct mlx5_core_dev *mdev) +{ + int err = 0; +#ifdef CONFIG_RFS_ACCEL + struct mlx5_eq_table *eq_table = mdev->priv.eq_table; + int vecidx; + + eq_table->rmap = alloc_irq_cpu_rmap(eq_table->num_comp_eqs); + if (!eq_table->rmap) { + err = -ENOMEM; + mlx5_core_err(mdev, "Failed to allocate cpu_rmap. err %d", err); + goto err_out; + } + + for (vecidx = 0; vecidx < eq_table->num_comp_eqs; vecidx++) { + err = irq_cpu_rmap_add(eq_table->rmap, + pci_irq_vector(mdev->pdev, vecidx)); + if (err) { + mlx5_core_err(mdev, "irq_cpu_rmap_add failed. err %d", + err); + goto err_irq_cpu_rmap_add; + } + } + return 0; + +err_irq_cpu_rmap_add: + clear_rmap(mdev); +err_out: +#endif + return err; +} + +/* This function should only be called after mlx5_cmd_force_teardown_hca */ +void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev) +{ + struct mlx5_eq_table *table = dev->priv.eq_table; + + mutex_lock(&table->lock); /* sync with create/destroy_async_eq */ + if (!mlx5_core_is_sf(dev)) + clear_rmap(dev); + mlx5_irq_table_free_irqs(dev); + mutex_unlock(&table->lock); +} + +#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING +#define MLX5_MAX_ASYNC_EQS 4 +#else +#define MLX5_MAX_ASYNC_EQS 3 +#endif + +int mlx5_eq_table_create(struct mlx5_core_dev *dev) +{ + struct mlx5_eq_table *eq_table = dev->priv.eq_table; + int num_eqs = MLX5_CAP_GEN(dev, max_num_eqs) ? + MLX5_CAP_GEN(dev, max_num_eqs) : + 1 << MLX5_CAP_GEN(dev, log_max_eq); + int max_eqs_sf; + int err; + + eq_table->num_comp_eqs = + min_t(int, + mlx5_irq_table_get_num_comp(eq_table->irq_table), + num_eqs - MLX5_MAX_ASYNC_EQS); + if (mlx5_core_is_sf(dev)) { + max_eqs_sf = min_t(int, MLX5_COMP_EQS_PER_SF, + mlx5_irq_table_get_sfs_vec(eq_table->irq_table)); + eq_table->num_comp_eqs = min_t(int, eq_table->num_comp_eqs, + max_eqs_sf); + } + + err = create_async_eqs(dev); + if (err) { + mlx5_core_err(dev, "Failed to create async EQs\n"); + goto err_async_eqs; + } + + if (!mlx5_core_is_sf(dev)) { + /* rmap is a mapping between irq number and queue number. + * each irq can be assign only to a single rmap. + * since SFs share IRQs, rmap mapping cannot function correctly + * for irqs that are shared for different core/netdev RX rings. + * Hence we don't allow netdev rmap for SFs + */ + err = set_rmap(dev); + if (err) + goto err_rmap; + } + + err = create_comp_eqs(dev); + if (err) { + mlx5_core_err(dev, "Failed to create completion EQs\n"); + goto err_comp_eqs; + } + + return 0; +err_comp_eqs: + if (!mlx5_core_is_sf(dev)) + clear_rmap(dev); +err_rmap: + destroy_async_eqs(dev); +err_async_eqs: + return err; +} + +void mlx5_eq_table_destroy(struct mlx5_core_dev *dev) +{ + if (!mlx5_core_is_sf(dev)) + clear_rmap(dev); + destroy_comp_eqs(dev); + destroy_async_eqs(dev); +} + +int mlx5_eq_notifier_register(struct mlx5_core_dev *dev, struct mlx5_nb *nb) +{ + struct mlx5_eq_table *eqt = dev->priv.eq_table; + + return atomic_notifier_chain_register(&eqt->nh[nb->event_type], &nb->nb); +} +EXPORT_SYMBOL(mlx5_eq_notifier_register); + +int mlx5_eq_notifier_unregister(struct mlx5_core_dev *dev, struct mlx5_nb *nb) +{ + struct mlx5_eq_table *eqt = dev->priv.eq_table; + + return atomic_notifier_chain_unregister(&eqt->nh[nb->event_type], &nb->nb); +} +EXPORT_SYMBOL(mlx5_eq_notifier_unregister); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/esw/Makefile new file mode 100644 index 000000000..c78512eed --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/Makefile @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only +subdir-ccflags-y += -I$(src)/.. diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_lgcy.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_lgcy.c new file mode 100644 index 000000000..6b4c9ffad --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_lgcy.c @@ -0,0 +1,178 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2020 Mellanox Technologies Inc. All rights reserved. */ + +#include "mlx5_core.h" +#include "eswitch.h" +#include "helper.h" +#include "lgcy.h" + +static void esw_acl_egress_lgcy_rules_destroy(struct mlx5_vport *vport) +{ + esw_acl_egress_vlan_destroy(vport); + if (!IS_ERR_OR_NULL(vport->egress.legacy.drop_rule)) { + mlx5_del_flow_rules(vport->egress.legacy.drop_rule); + vport->egress.legacy.drop_rule = NULL; + } +} + +static int esw_acl_egress_lgcy_groups_create(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5_core_dev *dev = esw->dev; + struct mlx5_flow_group *drop_grp; + u32 *flow_group_in; + int err = 0; + + err = esw_acl_egress_vlan_grp_create(esw, vport); + if (err) + return err; + + flow_group_in = kvzalloc(inlen, GFP_KERNEL); + if (!flow_group_in) { + err = -ENOMEM; + goto alloc_err; + } + + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 1); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 1); + drop_grp = mlx5_create_flow_group(vport->egress.acl, flow_group_in); + if (IS_ERR(drop_grp)) { + err = PTR_ERR(drop_grp); + esw_warn(dev, "Failed to create E-Switch vport[%d] egress drop flow group, err(%d)\n", + vport->vport, err); + goto drop_grp_err; + } + + vport->egress.legacy.drop_grp = drop_grp; + kvfree(flow_group_in); + return 0; + +drop_grp_err: + kvfree(flow_group_in); +alloc_err: + esw_acl_egress_vlan_grp_destroy(vport); + return err; +} + +static void esw_acl_egress_lgcy_groups_destroy(struct mlx5_vport *vport) +{ + if (!IS_ERR_OR_NULL(vport->egress.legacy.drop_grp)) { + mlx5_destroy_flow_group(vport->egress.legacy.drop_grp); + vport->egress.legacy.drop_grp = NULL; + } + esw_acl_egress_vlan_grp_destroy(vport); +} + +int esw_acl_egress_lgcy_setup(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) +{ + bool vst_mode_steering = esw_vst_mode_is_steering(esw); + struct mlx5_flow_destination drop_ctr_dst = {}; + struct mlx5_flow_destination *dst = NULL; + struct mlx5_fc *drop_counter = NULL; + struct mlx5_flow_act flow_act = {}; + /* The egress acl table contains 2 rules: + * 1)Allow traffic with vlan_tag=vst_vlan_id + * 2)Drop all other traffic. + */ + int table_size = 2; + int dest_num = 0; + int actions_flag; + int err = 0; + + if (vport->egress.legacy.drop_counter) { + drop_counter = vport->egress.legacy.drop_counter; + } else if (MLX5_CAP_ESW_EGRESS_ACL(esw->dev, flow_counter)) { + drop_counter = mlx5_fc_create(esw->dev, false); + if (IS_ERR(drop_counter)) { + esw_warn(esw->dev, + "vport[%d] configure egress drop rule counter err(%ld)\n", + vport->vport, PTR_ERR(drop_counter)); + drop_counter = NULL; + } + vport->egress.legacy.drop_counter = drop_counter; + } + + esw_acl_egress_lgcy_rules_destroy(vport); + + if (!vport->info.vlan && !vport->info.qos) { + esw_acl_egress_lgcy_cleanup(esw, vport); + return 0; + } + + if (!vport->egress.acl) { + vport->egress.acl = esw_acl_table_create(esw, vport, + MLX5_FLOW_NAMESPACE_ESW_EGRESS, + table_size); + if (IS_ERR(vport->egress.acl)) { + err = PTR_ERR(vport->egress.acl); + vport->egress.acl = NULL; + goto out; + } + + err = esw_acl_egress_lgcy_groups_create(esw, vport); + if (err) + goto out; + } + + esw_debug(esw->dev, + "vport[%d] configure egress rules, vlan(%d) qos(%d)\n", + vport->vport, vport->info.vlan, vport->info.qos); + + /* Allowed vlan rule */ + actions_flag = MLX5_FLOW_CONTEXT_ACTION_ALLOW; + if (vst_mode_steering) + actions_flag |= MLX5_FLOW_CONTEXT_ACTION_VLAN_POP; + err = esw_egress_acl_vlan_create(esw, vport, NULL, vport->info.vlan, + actions_flag); + if (err) + goto out; + + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP; + + /* Attach egress drop flow counter */ + if (drop_counter) { + flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_COUNT; + drop_ctr_dst.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; + drop_ctr_dst.counter_id = mlx5_fc_id(drop_counter); + dst = &drop_ctr_dst; + dest_num++; + } + vport->egress.legacy.drop_rule = + mlx5_add_flow_rules(vport->egress.acl, NULL, + &flow_act, dst, dest_num); + if (IS_ERR(vport->egress.legacy.drop_rule)) { + err = PTR_ERR(vport->egress.legacy.drop_rule); + esw_warn(esw->dev, + "vport[%d] configure egress drop rule failed, err(%d)\n", + vport->vport, err); + vport->egress.legacy.drop_rule = NULL; + goto out; + } + + return err; + +out: + esw_acl_egress_lgcy_cleanup(esw, vport); + return err; +} + +void esw_acl_egress_lgcy_cleanup(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) +{ + if (IS_ERR_OR_NULL(vport->egress.acl)) + goto clean_drop_counter; + + esw_debug(esw->dev, "Destroy vport[%d] E-Switch egress ACL\n", vport->vport); + + esw_acl_egress_lgcy_rules_destroy(vport); + esw_acl_egress_lgcy_groups_destroy(vport); + esw_acl_egress_table_destroy(vport); + +clean_drop_counter: + if (vport->egress.legacy.drop_counter) { + mlx5_fc_destroy(esw->dev, vport->egress.legacy.drop_counter); + vport->egress.legacy.drop_counter = NULL; + } +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_ofld.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_ofld.c new file mode 100644 index 000000000..2e504c746 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_ofld.c @@ -0,0 +1,259 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2020 Mellanox Technologies Inc. All rights reserved. */ + +#include "mlx5_core.h" +#include "eswitch.h" +#include "helper.h" +#include "ofld.h" + +static void esw_acl_egress_ofld_fwd2vport_destroy(struct mlx5_vport *vport) +{ + if (!vport->egress.offloads.fwd_rule) + return; + + mlx5_del_flow_rules(vport->egress.offloads.fwd_rule); + vport->egress.offloads.fwd_rule = NULL; +} + +static void esw_acl_egress_ofld_bounce_rule_destroy(struct mlx5_vport *vport) +{ + if (!vport->egress.offloads.bounce_rule) + return; + + mlx5_del_flow_rules(vport->egress.offloads.bounce_rule); + vport->egress.offloads.bounce_rule = NULL; +} + +static int esw_acl_egress_ofld_fwd2vport_create(struct mlx5_eswitch *esw, + struct mlx5_vport *vport, + struct mlx5_flow_destination *fwd_dest) +{ + struct mlx5_flow_act flow_act = {}; + int err = 0; + + esw_debug(esw->dev, "vport(%d) configure egress acl rule fwd2vport(%d)\n", + vport->vport, fwd_dest->vport.num); + + /* Delete the old egress forward-to-vport rule if any */ + esw_acl_egress_ofld_fwd2vport_destroy(vport); + + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + + vport->egress.offloads.fwd_rule = + mlx5_add_flow_rules(vport->egress.acl, NULL, + &flow_act, fwd_dest, 1); + if (IS_ERR(vport->egress.offloads.fwd_rule)) { + err = PTR_ERR(vport->egress.offloads.fwd_rule); + esw_warn(esw->dev, + "vport(%d) failed to add fwd2vport acl rule err(%d)\n", + vport->vport, err); + vport->egress.offloads.fwd_rule = NULL; + } + + return err; +} + +static int esw_acl_egress_ofld_rules_create(struct mlx5_eswitch *esw, + struct mlx5_vport *vport, + struct mlx5_flow_destination *fwd_dest) +{ + int err = 0; + int action; + + if (MLX5_CAP_GEN(esw->dev, prio_tag_required)) { + /* For prio tag mode, there is only 1 FTEs: + * 1) prio tag packets - pop the prio tag VLAN, allow + * Unmatched traffic is allowed by default + */ + esw_debug(esw->dev, + "vport[%d] configure prio tag egress rules\n", vport->vport); + + action = MLX5_FLOW_CONTEXT_ACTION_VLAN_POP; + action |= fwd_dest ? MLX5_FLOW_CONTEXT_ACTION_FWD_DEST : + MLX5_FLOW_CONTEXT_ACTION_ALLOW; + + /* prio tag vlan rule - pop it so vport receives untagged packets */ + err = esw_egress_acl_vlan_create(esw, vport, fwd_dest, 0, action); + if (err) + goto prio_err; + } + + if (fwd_dest) { + err = esw_acl_egress_ofld_fwd2vport_create(esw, vport, fwd_dest); + if (err) + goto fwd_err; + } + + return 0; + +fwd_err: + esw_acl_egress_vlan_destroy(vport); +prio_err: + return err; +} + +static void esw_acl_egress_ofld_rules_destroy(struct mlx5_vport *vport) +{ + esw_acl_egress_vlan_destroy(vport); + esw_acl_egress_ofld_fwd2vport_destroy(vport); + esw_acl_egress_ofld_bounce_rule_destroy(vport); +} + +static int esw_acl_egress_ofld_groups_create(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5_flow_group *fwd_grp; + u32 *flow_group_in; + u32 flow_index = 0; + int ret = 0; + + if (MLX5_CAP_GEN(esw->dev, prio_tag_required)) { + ret = esw_acl_egress_vlan_grp_create(esw, vport); + if (ret) + return ret; + + flow_index++; + } + + if (!mlx5_esw_acl_egress_fwd2vport_supported(esw)) + goto out; + + flow_group_in = kvzalloc(inlen, GFP_KERNEL); + if (!flow_group_in) { + ret = -ENOMEM; + goto fwd_grp_err; + } + + /* This group holds 1 FTE to forward all packets to other vport + * when bond vports is supported. + */ + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, flow_index); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, flow_index); + fwd_grp = mlx5_create_flow_group(vport->egress.acl, flow_group_in); + if (IS_ERR(fwd_grp)) { + ret = PTR_ERR(fwd_grp); + esw_warn(esw->dev, + "Failed to create vport[%d] egress fwd2vport flow group, err(%d)\n", + vport->vport, ret); + kvfree(flow_group_in); + goto fwd_grp_err; + } + vport->egress.offloads.fwd_grp = fwd_grp; + kvfree(flow_group_in); + return 0; + +fwd_grp_err: + esw_acl_egress_vlan_grp_destroy(vport); +out: + return ret; +} + +static void esw_acl_egress_ofld_groups_destroy(struct mlx5_vport *vport) +{ + if (!IS_ERR_OR_NULL(vport->egress.offloads.fwd_grp)) { + mlx5_destroy_flow_group(vport->egress.offloads.fwd_grp); + vport->egress.offloads.fwd_grp = NULL; + } + + if (!IS_ERR_OR_NULL(vport->egress.offloads.bounce_grp)) { + mlx5_destroy_flow_group(vport->egress.offloads.bounce_grp); + vport->egress.offloads.bounce_grp = NULL; + } + + esw_acl_egress_vlan_grp_destroy(vport); +} + +static bool esw_acl_egress_needed(struct mlx5_eswitch *esw, u16 vport_num) +{ + return mlx5_eswitch_is_vf_vport(esw, vport_num) || mlx5_esw_is_sf_vport(esw, vport_num); +} + +int esw_acl_egress_ofld_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport) +{ + int table_size = 0; + int err; + + if (!mlx5_esw_acl_egress_fwd2vport_supported(esw) && + !MLX5_CAP_GEN(esw->dev, prio_tag_required)) + return 0; + + if (!esw_acl_egress_needed(esw, vport->vport)) + return 0; + + esw_acl_egress_ofld_rules_destroy(vport); + + if (mlx5_esw_acl_egress_fwd2vport_supported(esw)) + table_size++; + if (MLX5_CAP_GEN(esw->dev, prio_tag_required)) + table_size++; + vport->egress.acl = esw_acl_table_create(esw, vport, + MLX5_FLOW_NAMESPACE_ESW_EGRESS, table_size); + if (IS_ERR(vport->egress.acl)) { + err = PTR_ERR(vport->egress.acl); + vport->egress.acl = NULL; + return err; + } + + err = esw_acl_egress_ofld_groups_create(esw, vport); + if (err) + goto group_err; + + esw_debug(esw->dev, "vport[%d] configure egress rules\n", vport->vport); + + err = esw_acl_egress_ofld_rules_create(esw, vport, NULL); + if (err) + goto rules_err; + + return 0; + +rules_err: + esw_acl_egress_ofld_groups_destroy(vport); +group_err: + esw_acl_egress_table_destroy(vport); + return err; +} + +void esw_acl_egress_ofld_cleanup(struct mlx5_vport *vport) +{ + esw_acl_egress_ofld_rules_destroy(vport); + esw_acl_egress_ofld_groups_destroy(vport); + esw_acl_egress_table_destroy(vport); +} + +int mlx5_esw_acl_egress_vport_bond(struct mlx5_eswitch *esw, u16 active_vport_num, + u16 passive_vport_num) +{ + struct mlx5_vport *passive_vport = mlx5_eswitch_get_vport(esw, passive_vport_num); + struct mlx5_vport *active_vport = mlx5_eswitch_get_vport(esw, active_vport_num); + struct mlx5_flow_destination fwd_dest = {}; + + if (IS_ERR(active_vport)) + return PTR_ERR(active_vport); + if (IS_ERR(passive_vport)) + return PTR_ERR(passive_vport); + + /* Cleanup and recreate rules WITHOUT fwd2vport of active vport */ + esw_acl_egress_ofld_rules_destroy(active_vport); + esw_acl_egress_ofld_rules_create(esw, active_vport, NULL); + + /* Cleanup and recreate all rules + fwd2vport rule of passive vport to forward */ + esw_acl_egress_ofld_rules_destroy(passive_vport); + fwd_dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT; + fwd_dest.vport.num = active_vport_num; + fwd_dest.vport.vhca_id = MLX5_CAP_GEN(esw->dev, vhca_id); + fwd_dest.vport.flags = MLX5_FLOW_DEST_VPORT_VHCA_ID; + + return esw_acl_egress_ofld_rules_create(esw, passive_vport, &fwd_dest); +} + +int mlx5_esw_acl_egress_vport_unbond(struct mlx5_eswitch *esw, u16 vport_num) +{ + struct mlx5_vport *vport = mlx5_eswitch_get_vport(esw, vport_num); + + if (IS_ERR(vport)) + return PTR_ERR(vport); + + esw_acl_egress_ofld_rules_destroy(vport); + return esw_acl_egress_ofld_rules_create(esw, vport, NULL); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/helper.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/helper.c new file mode 100644 index 000000000..45b839116 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/helper.c @@ -0,0 +1,163 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2020 Mellanox Technologies Inc. All rights reserved. */ + +#include "mlx5_core.h" +#include "eswitch.h" +#include "helper.h" + +struct mlx5_flow_table * +esw_acl_table_create(struct mlx5_eswitch *esw, struct mlx5_vport *vport, int ns, int size) +{ + struct mlx5_flow_table_attr ft_attr = {}; + struct mlx5_core_dev *dev = esw->dev; + struct mlx5_flow_namespace *root_ns; + struct mlx5_flow_table *acl; + int acl_supported; + u16 vport_num; + int err; + + acl_supported = (ns == MLX5_FLOW_NAMESPACE_ESW_INGRESS) ? + MLX5_CAP_ESW_INGRESS_ACL(dev, ft_support) : + MLX5_CAP_ESW_EGRESS_ACL(dev, ft_support); + + if (!acl_supported) + return ERR_PTR(-EOPNOTSUPP); + + vport_num = vport->vport; + esw_debug(dev, "Create vport[%d] %s ACL table\n", vport_num, + ns == MLX5_FLOW_NAMESPACE_ESW_INGRESS ? "ingress" : "egress"); + + root_ns = mlx5_get_flow_vport_acl_namespace(dev, ns, vport->index); + if (!root_ns) { + esw_warn(dev, "Failed to get E-Switch root namespace for vport (%d)\n", + vport_num); + return ERR_PTR(-EOPNOTSUPP); + } + + ft_attr.max_fte = size; + ft_attr.flags = MLX5_FLOW_TABLE_OTHER_VPORT; + acl = mlx5_create_vport_flow_table(root_ns, &ft_attr, vport_num); + if (IS_ERR(acl)) { + err = PTR_ERR(acl); + esw_warn(dev, "vport[%d] create %s ACL table, err(%d)\n", vport_num, + ns == MLX5_FLOW_NAMESPACE_ESW_INGRESS ? "ingress" : "egress", err); + } + return acl; +} + +int esw_egress_acl_vlan_create(struct mlx5_eswitch *esw, + struct mlx5_vport *vport, + struct mlx5_flow_destination *fwd_dest, + u16 vlan_id, u32 flow_action) +{ + struct mlx5_flow_act flow_act = {}; + struct mlx5_flow_spec *spec; + int err = 0; + + if (vport->egress.allowed_vlan) + return -EEXIST; + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) + return -ENOMEM; + + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.cvlan_tag); + MLX5_SET_TO_ONES(fte_match_param, spec->match_value, outer_headers.cvlan_tag); + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.first_vid); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.first_vid, vlan_id); + + spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + flow_act.action = flow_action; + vport->egress.allowed_vlan = + mlx5_add_flow_rules(vport->egress.acl, spec, + &flow_act, fwd_dest, 0); + if (IS_ERR(vport->egress.allowed_vlan)) { + err = PTR_ERR(vport->egress.allowed_vlan); + esw_warn(esw->dev, + "vport[%d] configure egress vlan rule failed, err(%d)\n", + vport->vport, err); + vport->egress.allowed_vlan = NULL; + } + + kvfree(spec); + return err; +} + +void esw_acl_egress_vlan_destroy(struct mlx5_vport *vport) +{ + if (!IS_ERR_OR_NULL(vport->egress.allowed_vlan)) { + mlx5_del_flow_rules(vport->egress.allowed_vlan); + vport->egress.allowed_vlan = NULL; + } +} + +int esw_acl_egress_vlan_grp_create(struct mlx5_eswitch *esw, struct mlx5_vport *vport) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5_flow_group *vlan_grp; + void *match_criteria; + u32 *flow_group_in; + int ret = 0; + + flow_group_in = kvzalloc(inlen, GFP_KERNEL); + if (!flow_group_in) + return -ENOMEM; + + MLX5_SET(create_flow_group_in, flow_group_in, + match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); + match_criteria = MLX5_ADDR_OF(create_flow_group_in, + flow_group_in, match_criteria); + MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.cvlan_tag); + MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.first_vid); + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 0); + + vlan_grp = mlx5_create_flow_group(vport->egress.acl, flow_group_in); + if (IS_ERR(vlan_grp)) { + ret = PTR_ERR(vlan_grp); + esw_warn(esw->dev, + "Failed to create E-Switch vport[%d] egress pop vlans flow group, err(%d)\n", + vport->vport, ret); + goto out; + } + vport->egress.vlan_grp = vlan_grp; + +out: + kvfree(flow_group_in); + return ret; +} + +void esw_acl_egress_vlan_grp_destroy(struct mlx5_vport *vport) +{ + if (!IS_ERR_OR_NULL(vport->egress.vlan_grp)) { + mlx5_destroy_flow_group(vport->egress.vlan_grp); + vport->egress.vlan_grp = NULL; + } +} + +void esw_acl_egress_table_destroy(struct mlx5_vport *vport) +{ + if (IS_ERR_OR_NULL(vport->egress.acl)) + return; + + mlx5_destroy_flow_table(vport->egress.acl); + vport->egress.acl = NULL; +} + +void esw_acl_ingress_table_destroy(struct mlx5_vport *vport) +{ + if (!vport->ingress.acl) + return; + + mlx5_destroy_flow_table(vport->ingress.acl); + vport->ingress.acl = NULL; +} + +void esw_acl_ingress_allow_rule_destroy(struct mlx5_vport *vport) +{ + if (!vport->ingress.allow_rule) + return; + + mlx5_del_flow_rules(vport->ingress.allow_rule); + vport->ingress.allow_rule = NULL; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/helper.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/helper.h new file mode 100644 index 000000000..a47063fab --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/helper.h @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2020 Mellanox Technologies Inc. All rights reserved. */ + +#ifndef __MLX5_ESWITCH_ACL_HELPER_H__ +#define __MLX5_ESWITCH_ACL_HELPER_H__ + +#include "eswitch.h" + +/* General acl helper functions */ +struct mlx5_flow_table * +esw_acl_table_create(struct mlx5_eswitch *esw, struct mlx5_vport *vport, int ns, int size); + +/* Egress acl helper functions */ +void esw_acl_egress_table_destroy(struct mlx5_vport *vport); +int esw_egress_acl_vlan_create(struct mlx5_eswitch *esw, struct mlx5_vport *vport, + struct mlx5_flow_destination *fwd_dest, + u16 vlan_id, u32 flow_action); +void esw_acl_egress_vlan_destroy(struct mlx5_vport *vport); +int esw_acl_egress_vlan_grp_create(struct mlx5_eswitch *esw, struct mlx5_vport *vport); +void esw_acl_egress_vlan_grp_destroy(struct mlx5_vport *vport); + +/* Ingress acl helper functions */ +void esw_acl_ingress_table_destroy(struct mlx5_vport *vport); +void esw_acl_ingress_allow_rule_destroy(struct mlx5_vport *vport); + +#endif /* __MLX5_ESWITCH_ACL_HELPER_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_lgcy.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_lgcy.c new file mode 100644 index 000000000..093ed86a0 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_lgcy.c @@ -0,0 +1,305 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2020 Mellanox Technologies Inc. All rights reserved. */ + +#include "mlx5_core.h" +#include "eswitch.h" +#include "helper.h" +#include "lgcy.h" + +static void esw_acl_ingress_lgcy_rules_destroy(struct mlx5_vport *vport) +{ + if (vport->ingress.legacy.drop_rule) { + mlx5_del_flow_rules(vport->ingress.legacy.drop_rule); + vport->ingress.legacy.drop_rule = NULL; + } + esw_acl_ingress_allow_rule_destroy(vport); +} + +static int esw_acl_ingress_lgcy_groups_create(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5_core_dev *dev = esw->dev; + struct mlx5_flow_group *g; + void *match_criteria; + u32 *flow_group_in; + int err; + + flow_group_in = kvzalloc(inlen, GFP_KERNEL); + if (!flow_group_in) + return -ENOMEM; + + match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, match_criteria); + + MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, + MLX5_MATCH_OUTER_HEADERS); + MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.cvlan_tag); + MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.smac_47_16); + MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.smac_15_0); + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 0); + + g = mlx5_create_flow_group(vport->ingress.acl, flow_group_in); + if (IS_ERR(g)) { + err = PTR_ERR(g); + esw_warn(dev, "vport[%d] ingress create untagged spoofchk flow group, err(%d)\n", + vport->vport, err); + goto spoof_err; + } + vport->ingress.legacy.allow_untagged_spoofchk_grp = g; + + memset(flow_group_in, 0, inlen); + MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, + MLX5_MATCH_OUTER_HEADERS); + MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.cvlan_tag); + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 1); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 1); + + g = mlx5_create_flow_group(vport->ingress.acl, flow_group_in); + if (IS_ERR(g)) { + err = PTR_ERR(g); + esw_warn(dev, "vport[%d] ingress create untagged flow group, err(%d)\n", + vport->vport, err); + goto untagged_err; + } + vport->ingress.legacy.allow_untagged_only_grp = g; + + memset(flow_group_in, 0, inlen); + MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, + MLX5_MATCH_OUTER_HEADERS); + MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.smac_47_16); + MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.smac_15_0); + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 2); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 2); + + g = mlx5_create_flow_group(vport->ingress.acl, flow_group_in); + if (IS_ERR(g)) { + err = PTR_ERR(g); + esw_warn(dev, "vport[%d] ingress create spoofchk flow group, err(%d)\n", + vport->vport, err); + goto allow_spoof_err; + } + vport->ingress.legacy.allow_spoofchk_only_grp = g; + + memset(flow_group_in, 0, inlen); + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 3); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 3); + + g = mlx5_create_flow_group(vport->ingress.acl, flow_group_in); + if (IS_ERR(g)) { + err = PTR_ERR(g); + esw_warn(dev, "vport[%d] ingress create drop flow group, err(%d)\n", + vport->vport, err); + goto drop_err; + } + vport->ingress.legacy.drop_grp = g; + kvfree(flow_group_in); + return 0; + +drop_err: + if (!IS_ERR_OR_NULL(vport->ingress.legacy.allow_spoofchk_only_grp)) { + mlx5_destroy_flow_group(vport->ingress.legacy.allow_spoofchk_only_grp); + vport->ingress.legacy.allow_spoofchk_only_grp = NULL; + } +allow_spoof_err: + if (!IS_ERR_OR_NULL(vport->ingress.legacy.allow_untagged_only_grp)) { + mlx5_destroy_flow_group(vport->ingress.legacy.allow_untagged_only_grp); + vport->ingress.legacy.allow_untagged_only_grp = NULL; + } +untagged_err: + if (!IS_ERR_OR_NULL(vport->ingress.legacy.allow_untagged_spoofchk_grp)) { + mlx5_destroy_flow_group(vport->ingress.legacy.allow_untagged_spoofchk_grp); + vport->ingress.legacy.allow_untagged_spoofchk_grp = NULL; + } +spoof_err: + kvfree(flow_group_in); + return err; +} + +static void esw_acl_ingress_lgcy_groups_destroy(struct mlx5_vport *vport) +{ + if (vport->ingress.legacy.allow_spoofchk_only_grp) { + mlx5_destroy_flow_group(vport->ingress.legacy.allow_spoofchk_only_grp); + vport->ingress.legacy.allow_spoofchk_only_grp = NULL; + } + if (vport->ingress.legacy.allow_untagged_only_grp) { + mlx5_destroy_flow_group(vport->ingress.legacy.allow_untagged_only_grp); + vport->ingress.legacy.allow_untagged_only_grp = NULL; + } + if (vport->ingress.legacy.allow_untagged_spoofchk_grp) { + mlx5_destroy_flow_group(vport->ingress.legacy.allow_untagged_spoofchk_grp); + vport->ingress.legacy.allow_untagged_spoofchk_grp = NULL; + } + if (vport->ingress.legacy.drop_grp) { + mlx5_destroy_flow_group(vport->ingress.legacy.drop_grp); + vport->ingress.legacy.drop_grp = NULL; + } +} + +int esw_acl_ingress_lgcy_setup(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) +{ + bool vst_mode_steering = esw_vst_mode_is_steering(esw); + struct mlx5_flow_destination drop_ctr_dst = {}; + struct mlx5_flow_destination *dst = NULL; + struct mlx5_flow_act flow_act = {}; + struct mlx5_flow_spec *spec = NULL; + struct mlx5_fc *counter = NULL; + bool vst_check_cvlan = false; + bool vst_push_cvlan = false; + /* The ingress acl table contains 4 groups + * (2 active rules at the same time - + * 1 allow rule from one of the first 3 groups. + * 1 drop rule from the last group): + * 1)Allow untagged traffic with smac=original mac. + * 2)Allow untagged traffic. + * 3)Allow traffic with smac=original mac. + * 4)Drop all other traffic. + */ + int table_size = 4; + int dest_num = 0; + int err = 0; + u8 *smac_v; + + esw_acl_ingress_lgcy_rules_destroy(vport); + + if (vport->ingress.legacy.drop_counter) { + counter = vport->ingress.legacy.drop_counter; + } else if (MLX5_CAP_ESW_INGRESS_ACL(esw->dev, flow_counter)) { + counter = mlx5_fc_create(esw->dev, false); + if (IS_ERR(counter)) { + esw_warn(esw->dev, + "vport[%d] configure ingress drop rule counter failed\n", + vport->vport); + counter = NULL; + } + vport->ingress.legacy.drop_counter = counter; + } + + if (!vport->info.vlan && !vport->info.qos && !vport->info.spoofchk) { + esw_acl_ingress_lgcy_cleanup(esw, vport); + return 0; + } + + if (!vport->ingress.acl) { + vport->ingress.acl = esw_acl_table_create(esw, vport, + MLX5_FLOW_NAMESPACE_ESW_INGRESS, + table_size); + if (IS_ERR(vport->ingress.acl)) { + err = PTR_ERR(vport->ingress.acl); + vport->ingress.acl = NULL; + return err; + } + + err = esw_acl_ingress_lgcy_groups_create(esw, vport); + if (err) + goto out; + } + + esw_debug(esw->dev, + "vport[%d] configure ingress rules, vlan(%d) qos(%d)\n", + vport->vport, vport->info.vlan, vport->info.qos); + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) { + err = -ENOMEM; + goto out; + } + + if ((vport->info.vlan || vport->info.qos)) { + if (vst_mode_steering) + vst_push_cvlan = true; + else if (!MLX5_CAP_ESW(esw->dev, vport_cvlan_insert_always)) + vst_check_cvlan = true; + } + + if (vst_check_cvlan || vport->info.spoofchk) + spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + + /* Create ingress allow rule */ + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_ALLOW; + if (vst_push_cvlan) { + flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH; + flow_act.vlan[0].prio = vport->info.qos; + flow_act.vlan[0].vid = vport->info.vlan; + flow_act.vlan[0].ethtype = ETH_P_8021Q; + } + + if (vst_check_cvlan) + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, + outer_headers.cvlan_tag); + + if (vport->info.spoofchk) { + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, + outer_headers.smac_47_16); + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, + outer_headers.smac_15_0); + smac_v = MLX5_ADDR_OF(fte_match_param, + spec->match_value, + outer_headers.smac_47_16); + ether_addr_copy(smac_v, vport->info.mac); + } + + vport->ingress.allow_rule = mlx5_add_flow_rules(vport->ingress.acl, spec, + &flow_act, NULL, 0); + if (IS_ERR(vport->ingress.allow_rule)) { + err = PTR_ERR(vport->ingress.allow_rule); + esw_warn(esw->dev, + "vport[%d] configure ingress allow rule, err(%d)\n", + vport->vport, err); + vport->ingress.allow_rule = NULL; + goto out; + } + + if (!vst_check_cvlan && !vport->info.spoofchk) + goto out; + + memset(&flow_act, 0, sizeof(flow_act)); + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP; + /* Attach drop flow counter */ + if (counter) { + flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_COUNT; + drop_ctr_dst.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; + drop_ctr_dst.counter_id = mlx5_fc_id(counter); + dst = &drop_ctr_dst; + dest_num++; + } + vport->ingress.legacy.drop_rule = + mlx5_add_flow_rules(vport->ingress.acl, NULL, + &flow_act, dst, dest_num); + if (IS_ERR(vport->ingress.legacy.drop_rule)) { + err = PTR_ERR(vport->ingress.legacy.drop_rule); + esw_warn(esw->dev, + "vport[%d] configure ingress drop rule, err(%d)\n", + vport->vport, err); + vport->ingress.legacy.drop_rule = NULL; + goto out; + } + kvfree(spec); + return 0; + +out: + if (err) + esw_acl_ingress_lgcy_cleanup(esw, vport); + kvfree(spec); + return err; +} + +void esw_acl_ingress_lgcy_cleanup(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) +{ + if (IS_ERR_OR_NULL(vport->ingress.acl)) + goto clean_drop_counter; + + esw_debug(esw->dev, "Destroy vport[%d] E-Switch ingress ACL\n", vport->vport); + + esw_acl_ingress_lgcy_rules_destroy(vport); + esw_acl_ingress_lgcy_groups_destroy(vport); + esw_acl_ingress_table_destroy(vport); + +clean_drop_counter: + if (vport->ingress.legacy.drop_counter) { + mlx5_fc_destroy(esw->dev, vport->ingress.legacy.drop_counter); + vport->ingress.legacy.drop_counter = NULL; + } +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_ofld.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_ofld.c new file mode 100644 index 000000000..db578a7e7 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_ofld.c @@ -0,0 +1,408 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2020 Mellanox Technologies Inc. All rights reserved. */ + +#include "mlx5_core.h" +#include "eswitch.h" +#include "helper.h" +#include "ofld.h" + +static bool +esw_acl_ingress_prio_tag_enabled(struct mlx5_eswitch *esw, + const struct mlx5_vport *vport) +{ + return (MLX5_CAP_GEN(esw->dev, prio_tag_required) && + mlx5_eswitch_is_vf_vport(esw, vport->vport)); +} + +static int esw_acl_ingress_prio_tag_create(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) +{ + struct mlx5_flow_act flow_act = {}; + struct mlx5_flow_spec *spec; + int err = 0; + + /* For prio tag mode, there is only 1 FTEs: + * 1) Untagged packets - push prio tag VLAN and modify metadata if + * required, allow + * Unmatched traffic is allowed by default + */ + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) + return -ENOMEM; + + /* Untagged packets - push prio tag VLAN, allow */ + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.cvlan_tag); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.cvlan_tag, 0); + spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH | + MLX5_FLOW_CONTEXT_ACTION_ALLOW; + flow_act.vlan[0].ethtype = ETH_P_8021Q; + flow_act.vlan[0].vid = 0; + flow_act.vlan[0].prio = 0; + + if (vport->ingress.offloads.modify_metadata_rule) { + flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; + flow_act.modify_hdr = vport->ingress.offloads.modify_metadata; + } + + vport->ingress.allow_rule = mlx5_add_flow_rules(vport->ingress.acl, spec, + &flow_act, NULL, 0); + if (IS_ERR(vport->ingress.allow_rule)) { + err = PTR_ERR(vport->ingress.allow_rule); + esw_warn(esw->dev, + "vport[%d] configure ingress untagged allow rule, err(%d)\n", + vport->vport, err); + vport->ingress.allow_rule = NULL; + } + + kvfree(spec); + return err; +} + +static int esw_acl_ingress_mod_metadata_create(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) +{ + u8 action[MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto)] = {}; + struct mlx5_flow_act flow_act = {}; + int err = 0; + u32 key; + + key = mlx5_eswitch_get_vport_metadata_for_match(esw, vport->vport); + key >>= ESW_SOURCE_PORT_METADATA_OFFSET; + + MLX5_SET(set_action_in, action, action_type, MLX5_ACTION_TYPE_SET); + MLX5_SET(set_action_in, action, field, + MLX5_ACTION_IN_FIELD_METADATA_REG_C_0); + MLX5_SET(set_action_in, action, data, key); + MLX5_SET(set_action_in, action, offset, + ESW_SOURCE_PORT_METADATA_OFFSET); + MLX5_SET(set_action_in, action, length, + ESW_SOURCE_PORT_METADATA_BITS); + + vport->ingress.offloads.modify_metadata = + mlx5_modify_header_alloc(esw->dev, MLX5_FLOW_NAMESPACE_ESW_INGRESS, + 1, action); + if (IS_ERR(vport->ingress.offloads.modify_metadata)) { + err = PTR_ERR(vport->ingress.offloads.modify_metadata); + esw_warn(esw->dev, + "failed to alloc modify header for vport %d ingress acl (%d)\n", + vport->vport, err); + return err; + } + + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_MOD_HDR | MLX5_FLOW_CONTEXT_ACTION_ALLOW; + flow_act.modify_hdr = vport->ingress.offloads.modify_metadata; + flow_act.fg = vport->ingress.offloads.metadata_allmatch_grp; + vport->ingress.offloads.modify_metadata_rule = + mlx5_add_flow_rules(vport->ingress.acl, + NULL, &flow_act, NULL, 0); + if (IS_ERR(vport->ingress.offloads.modify_metadata_rule)) { + err = PTR_ERR(vport->ingress.offloads.modify_metadata_rule); + esw_warn(esw->dev, + "failed to add setting metadata rule for vport %d ingress acl, err(%d)\n", + vport->vport, err); + mlx5_modify_header_dealloc(esw->dev, vport->ingress.offloads.modify_metadata); + vport->ingress.offloads.modify_metadata_rule = NULL; + } + return err; +} + +static void esw_acl_ingress_mod_metadata_destroy(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) +{ + if (!vport->ingress.offloads.modify_metadata_rule) + return; + + mlx5_del_flow_rules(vport->ingress.offloads.modify_metadata_rule); + mlx5_modify_header_dealloc(esw->dev, vport->ingress.offloads.modify_metadata); + vport->ingress.offloads.modify_metadata_rule = NULL; +} + +static int esw_acl_ingress_src_port_drop_create(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) +{ + struct mlx5_flow_act flow_act = {}; + struct mlx5_flow_handle *flow_rule; + int err = 0; + + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP; + flow_act.fg = vport->ingress.offloads.drop_grp; + flow_rule = mlx5_add_flow_rules(vport->ingress.acl, NULL, &flow_act, NULL, 0); + if (IS_ERR(flow_rule)) { + err = PTR_ERR(flow_rule); + goto out; + } + + vport->ingress.offloads.drop_rule = flow_rule; +out: + return err; +} + +static void esw_acl_ingress_src_port_drop_destroy(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) +{ + if (!vport->ingress.offloads.drop_rule) + return; + + mlx5_del_flow_rules(vport->ingress.offloads.drop_rule); + vport->ingress.offloads.drop_rule = NULL; +} + +static int esw_acl_ingress_ofld_rules_create(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) +{ + int err; + + if (mlx5_eswitch_vport_match_metadata_enabled(esw)) { + err = esw_acl_ingress_mod_metadata_create(esw, vport); + if (err) { + esw_warn(esw->dev, + "vport(%d) create ingress modify metadata, err(%d)\n", + vport->vport, err); + return err; + } + } + + if (esw_acl_ingress_prio_tag_enabled(esw, vport)) { + err = esw_acl_ingress_prio_tag_create(esw, vport); + if (err) { + esw_warn(esw->dev, + "vport(%d) create ingress prio tag rule, err(%d)\n", + vport->vport, err); + goto prio_tag_err; + } + } + + return 0; + +prio_tag_err: + esw_acl_ingress_mod_metadata_destroy(esw, vport); + return err; +} + +static void esw_acl_ingress_ofld_rules_destroy(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) +{ + esw_acl_ingress_allow_rule_destroy(vport); + esw_acl_ingress_mod_metadata_destroy(esw, vport); + esw_acl_ingress_src_port_drop_destroy(esw, vport); +} + +static int esw_acl_ingress_ofld_groups_create(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5_flow_group *g; + void *match_criteria; + u32 *flow_group_in; + u32 flow_index = 0; + int ret = 0; + + flow_group_in = kvzalloc(inlen, GFP_KERNEL); + if (!flow_group_in) + return -ENOMEM; + + if (vport->vport == MLX5_VPORT_UPLINK) { + /* This group can hold an FTE to drop all traffic. + * Need in case LAG is enabled. + */ + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, flow_index); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, flow_index); + + g = mlx5_create_flow_group(vport->ingress.acl, flow_group_in); + if (IS_ERR(g)) { + ret = PTR_ERR(g); + esw_warn(esw->dev, "vport[%d] ingress create drop flow group, err(%d)\n", + vport->vport, ret); + goto drop_err; + } + vport->ingress.offloads.drop_grp = g; + flow_index++; + } + + if (esw_acl_ingress_prio_tag_enabled(esw, vport)) { + /* This group is to hold FTE to match untagged packets when prio_tag + * is enabled. + */ + memset(flow_group_in, 0, inlen); + match_criteria = MLX5_ADDR_OF(create_flow_group_in, + flow_group_in, match_criteria); + MLX5_SET(create_flow_group_in, flow_group_in, + match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); + MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.cvlan_tag); + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, flow_index); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, flow_index); + + g = mlx5_create_flow_group(vport->ingress.acl, flow_group_in); + if (IS_ERR(g)) { + ret = PTR_ERR(g); + esw_warn(esw->dev, "vport[%d] ingress create untagged flow group, err(%d)\n", + vport->vport, ret); + goto prio_tag_err; + } + vport->ingress.offloads.metadata_prio_tag_grp = g; + flow_index++; + } + + if (mlx5_eswitch_vport_match_metadata_enabled(esw)) { + /* This group holds an FTE with no match to add metadata for + * tagged packets if prio-tag is enabled, or for all untagged + * traffic in case prio-tag is disabled. + */ + memset(flow_group_in, 0, inlen); + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, flow_index); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, flow_index); + + g = mlx5_create_flow_group(vport->ingress.acl, flow_group_in); + if (IS_ERR(g)) { + ret = PTR_ERR(g); + esw_warn(esw->dev, "vport[%d] ingress create drop flow group, err(%d)\n", + vport->vport, ret); + goto metadata_err; + } + vport->ingress.offloads.metadata_allmatch_grp = g; + } + + kvfree(flow_group_in); + return 0; + +metadata_err: + if (!IS_ERR_OR_NULL(vport->ingress.offloads.metadata_prio_tag_grp)) { + mlx5_destroy_flow_group(vport->ingress.offloads.metadata_prio_tag_grp); + vport->ingress.offloads.metadata_prio_tag_grp = NULL; + } +prio_tag_err: + if (!IS_ERR_OR_NULL(vport->ingress.offloads.drop_grp)) { + mlx5_destroy_flow_group(vport->ingress.offloads.drop_grp); + vport->ingress.offloads.drop_grp = NULL; + } +drop_err: + kvfree(flow_group_in); + return ret; +} + +static void esw_acl_ingress_ofld_groups_destroy(struct mlx5_vport *vport) +{ + if (vport->ingress.offloads.metadata_allmatch_grp) { + mlx5_destroy_flow_group(vport->ingress.offloads.metadata_allmatch_grp); + vport->ingress.offloads.metadata_allmatch_grp = NULL; + } + + if (vport->ingress.offloads.metadata_prio_tag_grp) { + mlx5_destroy_flow_group(vport->ingress.offloads.metadata_prio_tag_grp); + vport->ingress.offloads.metadata_prio_tag_grp = NULL; + } + + if (vport->ingress.offloads.drop_grp) { + mlx5_destroy_flow_group(vport->ingress.offloads.drop_grp); + vport->ingress.offloads.drop_grp = NULL; + } +} + +int esw_acl_ingress_ofld_setup(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) +{ + int num_ftes = 0; + int err; + + if (!mlx5_eswitch_vport_match_metadata_enabled(esw) && + !esw_acl_ingress_prio_tag_enabled(esw, vport)) + return 0; + + esw_acl_ingress_allow_rule_destroy(vport); + + if (mlx5_eswitch_vport_match_metadata_enabled(esw)) + num_ftes++; + if (vport->vport == MLX5_VPORT_UPLINK) + num_ftes++; + if (esw_acl_ingress_prio_tag_enabled(esw, vport)) + num_ftes++; + + vport->ingress.acl = esw_acl_table_create(esw, vport, + MLX5_FLOW_NAMESPACE_ESW_INGRESS, + num_ftes); + if (IS_ERR(vport->ingress.acl)) { + err = PTR_ERR(vport->ingress.acl); + vport->ingress.acl = NULL; + return err; + } + + err = esw_acl_ingress_ofld_groups_create(esw, vport); + if (err) + goto group_err; + + esw_debug(esw->dev, + "vport[%d] configure ingress rules\n", vport->vport); + + err = esw_acl_ingress_ofld_rules_create(esw, vport); + if (err) + goto rules_err; + + return 0; + +rules_err: + esw_acl_ingress_ofld_groups_destroy(vport); +group_err: + esw_acl_ingress_table_destroy(vport); + return err; +} + +void esw_acl_ingress_ofld_cleanup(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) +{ + esw_acl_ingress_ofld_rules_destroy(esw, vport); + esw_acl_ingress_ofld_groups_destroy(vport); + esw_acl_ingress_table_destroy(vport); +} + +/* Caller must hold rtnl_lock */ +int mlx5_esw_acl_ingress_vport_bond_update(struct mlx5_eswitch *esw, u16 vport_num, + u32 metadata) +{ + struct mlx5_vport *vport = mlx5_eswitch_get_vport(esw, vport_num); + int err; + + if (WARN_ON_ONCE(IS_ERR(vport))) { + esw_warn(esw->dev, "vport(%d) invalid!\n", vport_num); + return PTR_ERR(vport); + } + + esw_acl_ingress_ofld_rules_destroy(esw, vport); + + vport->metadata = metadata ? metadata : vport->default_metadata; + + /* Recreate ingress acl rules with vport->metadata */ + err = esw_acl_ingress_ofld_rules_create(esw, vport); + if (err) + goto out; + + return 0; + +out: + vport->metadata = vport->default_metadata; + return err; +} + +int mlx5_esw_acl_ingress_vport_drop_rule_create(struct mlx5_eswitch *esw, u16 vport_num) +{ + struct mlx5_vport *vport = mlx5_eswitch_get_vport(esw, vport_num); + + if (IS_ERR(vport)) { + esw_warn(esw->dev, "vport(%d) invalid!\n", vport_num); + return PTR_ERR(vport); + } + + return esw_acl_ingress_src_port_drop_create(esw, vport); +} + +void mlx5_esw_acl_ingress_vport_drop_rule_destroy(struct mlx5_eswitch *esw, u16 vport_num) +{ + struct mlx5_vport *vport = mlx5_eswitch_get_vport(esw, vport_num); + + if (WARN_ON_ONCE(IS_ERR(vport))) { + esw_warn(esw->dev, "vport(%d) invalid!\n", vport_num); + return; + } + + esw_acl_ingress_src_port_drop_destroy(esw, vport); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/lgcy.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/lgcy.h new file mode 100644 index 000000000..44c152da3 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/lgcy.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2020 Mellanox Technologies Inc. All rights reserved. */ + +#ifndef __MLX5_ESWITCH_ACL_LGCY_H__ +#define __MLX5_ESWITCH_ACL_LGCY_H__ + +#include "eswitch.h" + +/* Eswitch acl egress external APIs */ +int esw_acl_egress_lgcy_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport); +void esw_acl_egress_lgcy_cleanup(struct mlx5_eswitch *esw, struct mlx5_vport *vport); + +/* Eswitch acl ingress external APIs */ +int esw_acl_ingress_lgcy_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport); +void esw_acl_ingress_lgcy_cleanup(struct mlx5_eswitch *esw, struct mlx5_vport *vport); + +#endif /* __MLX5_ESWITCH_ACL_LGCY_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ofld.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ofld.h new file mode 100644 index 000000000..11d3d3978 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ofld.h @@ -0,0 +1,44 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2020 Mellanox Technologies Inc. All rights reserved. */ + +#ifndef __MLX5_ESWITCH_ACL_OFLD_H__ +#define __MLX5_ESWITCH_ACL_OFLD_H__ + +#include "eswitch.h" + +#ifdef CONFIG_MLX5_ESWITCH +/* Eswitch acl egress external APIs */ +int esw_acl_egress_ofld_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport); +void esw_acl_egress_ofld_cleanup(struct mlx5_vport *vport); +int mlx5_esw_acl_egress_vport_bond(struct mlx5_eswitch *esw, u16 active_vport_num, + u16 passive_vport_num); +int mlx5_esw_acl_egress_vport_unbond(struct mlx5_eswitch *esw, u16 vport_num); + +static inline bool mlx5_esw_acl_egress_fwd2vport_supported(struct mlx5_eswitch *esw) +{ + return esw && esw->mode == MLX5_ESWITCH_OFFLOADS && + mlx5_eswitch_vport_match_metadata_enabled(esw) && + MLX5_CAP_ESW_FLOWTABLE(esw->dev, egress_acl_forward_to_vport); +} + +/* Eswitch acl ingress external APIs */ +int esw_acl_ingress_ofld_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport); +void esw_acl_ingress_ofld_cleanup(struct mlx5_eswitch *esw, struct mlx5_vport *vport); +int mlx5_esw_acl_ingress_vport_bond_update(struct mlx5_eswitch *esw, u16 vport_num, + u32 metadata); +void mlx5_esw_acl_ingress_vport_drop_rule_destroy(struct mlx5_eswitch *esw, u16 vport_num); +int mlx5_esw_acl_ingress_vport_drop_rule_create(struct mlx5_eswitch *esw, u16 vport_num); + +#else /* CONFIG_MLX5_ESWITCH */ +static void +mlx5_esw_acl_ingress_vport_drop_rule_destroy(struct mlx5_eswitch *esw, + u16 vport_num) +{} + +static int mlx5_esw_acl_ingress_vport_drop_rule_create(struct mlx5_eswitch *esw, + u16 vport_num) +{ + return 0; +} +#endif /* CONFIG_MLX5_ESWITCH */ +#endif /* __MLX5_ESWITCH_ACL_OFLD_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c new file mode 100644 index 000000000..d0b2676c3 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c @@ -0,0 +1,1853 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2021 Mellanox Technologies. */ + +#include <linux/build_bug.h> +#include <linux/list.h> +#include <linux/notifier.h> +#include <net/netevent.h> +#include <net/switchdev.h> +#include "lib/devcom.h" +#include "bridge.h" +#include "eswitch.h" +#include "bridge_priv.h" +#define CREATE_TRACE_POINTS +#include "diag/bridge_tracepoint.h" + +#define MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_GRP_SIZE 12000 +#define MLX5_ESW_BRIDGE_INGRESS_TABLE_UNTAGGED_GRP_SIZE 16000 +#define MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_GRP_IDX_FROM 0 +#define MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_GRP_IDX_TO \ + (MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_GRP_SIZE - 1) +#define MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_FILTER_GRP_IDX_FROM \ + (MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_GRP_IDX_TO + 1) +#define MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_FILTER_GRP_IDX_TO \ + (MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_FILTER_GRP_IDX_FROM + \ + MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_GRP_SIZE - 1) +#define MLX5_ESW_BRIDGE_INGRESS_TABLE_QINQ_GRP_IDX_FROM \ + (MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_FILTER_GRP_IDX_TO + 1) +#define MLX5_ESW_BRIDGE_INGRESS_TABLE_QINQ_GRP_IDX_TO \ + (MLX5_ESW_BRIDGE_INGRESS_TABLE_QINQ_GRP_IDX_FROM + \ + MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_GRP_SIZE - 1) +#define MLX5_ESW_BRIDGE_INGRESS_TABLE_QINQ_FILTER_GRP_IDX_FROM \ + (MLX5_ESW_BRIDGE_INGRESS_TABLE_QINQ_GRP_IDX_TO + 1) +#define MLX5_ESW_BRIDGE_INGRESS_TABLE_QINQ_FILTER_GRP_IDX_TO \ + (MLX5_ESW_BRIDGE_INGRESS_TABLE_QINQ_FILTER_GRP_IDX_FROM + \ + MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_GRP_SIZE - 1) +#define MLX5_ESW_BRIDGE_INGRESS_TABLE_MAC_GRP_IDX_FROM \ + (MLX5_ESW_BRIDGE_INGRESS_TABLE_QINQ_FILTER_GRP_IDX_TO + 1) +#define MLX5_ESW_BRIDGE_INGRESS_TABLE_MAC_GRP_IDX_TO \ + (MLX5_ESW_BRIDGE_INGRESS_TABLE_MAC_GRP_IDX_FROM + \ + MLX5_ESW_BRIDGE_INGRESS_TABLE_UNTAGGED_GRP_SIZE - 1) +#define MLX5_ESW_BRIDGE_INGRESS_TABLE_SIZE \ + (MLX5_ESW_BRIDGE_INGRESS_TABLE_MAC_GRP_IDX_TO + 1) +static_assert(MLX5_ESW_BRIDGE_INGRESS_TABLE_SIZE == 64000); + +#define MLX5_ESW_BRIDGE_EGRESS_TABLE_VLAN_GRP_SIZE 16000 +#define MLX5_ESW_BRIDGE_EGRESS_TABLE_MAC_GRP_SIZE (32000 - 1) +#define MLX5_ESW_BRIDGE_EGRESS_TABLE_VLAN_GRP_IDX_FROM 0 +#define MLX5_ESW_BRIDGE_EGRESS_TABLE_VLAN_GRP_IDX_TO \ + (MLX5_ESW_BRIDGE_EGRESS_TABLE_VLAN_GRP_SIZE - 1) +#define MLX5_ESW_BRIDGE_EGRESS_TABLE_QINQ_GRP_IDX_FROM \ + (MLX5_ESW_BRIDGE_EGRESS_TABLE_VLAN_GRP_IDX_TO + 1) +#define MLX5_ESW_BRIDGE_EGRESS_TABLE_QINQ_GRP_IDX_TO \ + (MLX5_ESW_BRIDGE_EGRESS_TABLE_QINQ_GRP_IDX_FROM + \ + MLX5_ESW_BRIDGE_EGRESS_TABLE_VLAN_GRP_SIZE - 1) +#define MLX5_ESW_BRIDGE_EGRESS_TABLE_MAC_GRP_IDX_FROM \ + (MLX5_ESW_BRIDGE_EGRESS_TABLE_QINQ_GRP_IDX_TO + 1) +#define MLX5_ESW_BRIDGE_EGRESS_TABLE_MAC_GRP_IDX_TO \ + (MLX5_ESW_BRIDGE_EGRESS_TABLE_MAC_GRP_IDX_FROM + \ + MLX5_ESW_BRIDGE_EGRESS_TABLE_MAC_GRP_SIZE - 1) +#define MLX5_ESW_BRIDGE_EGRESS_TABLE_MISS_GRP_IDX_FROM \ + (MLX5_ESW_BRIDGE_EGRESS_TABLE_MAC_GRP_IDX_TO + 1) +#define MLX5_ESW_BRIDGE_EGRESS_TABLE_MISS_GRP_IDX_TO \ + MLX5_ESW_BRIDGE_EGRESS_TABLE_MISS_GRP_IDX_FROM +#define MLX5_ESW_BRIDGE_EGRESS_TABLE_SIZE \ + (MLX5_ESW_BRIDGE_EGRESS_TABLE_MISS_GRP_IDX_TO + 1) +static_assert(MLX5_ESW_BRIDGE_EGRESS_TABLE_SIZE == 64000); + +#define MLX5_ESW_BRIDGE_SKIP_TABLE_SIZE 0 + +enum { + MLX5_ESW_BRIDGE_LEVEL_INGRESS_TABLE, + MLX5_ESW_BRIDGE_LEVEL_EGRESS_TABLE, + MLX5_ESW_BRIDGE_LEVEL_SKIP_TABLE, +}; + +static const struct rhashtable_params fdb_ht_params = { + .key_offset = offsetof(struct mlx5_esw_bridge_fdb_entry, key), + .key_len = sizeof(struct mlx5_esw_bridge_fdb_key), + .head_offset = offsetof(struct mlx5_esw_bridge_fdb_entry, ht_node), + .automatic_shrinking = true, +}; + +enum { + MLX5_ESW_BRIDGE_VLAN_FILTERING_FLAG = BIT(0), +}; + +struct mlx5_esw_bridge { + int ifindex; + int refcnt; + struct list_head list; + struct mlx5_esw_bridge_offloads *br_offloads; + + struct list_head fdb_list; + struct rhashtable fdb_ht; + + struct mlx5_flow_table *egress_ft; + struct mlx5_flow_group *egress_vlan_fg; + struct mlx5_flow_group *egress_qinq_fg; + struct mlx5_flow_group *egress_mac_fg; + struct mlx5_flow_group *egress_miss_fg; + struct mlx5_pkt_reformat *egress_miss_pkt_reformat; + struct mlx5_flow_handle *egress_miss_handle; + unsigned long ageing_time; + u32 flags; + u16 vlan_proto; +}; + +static void +mlx5_esw_bridge_fdb_offload_notify(struct net_device *dev, const unsigned char *addr, u16 vid, + unsigned long val) +{ + struct switchdev_notifier_fdb_info send_info = {}; + + send_info.addr = addr; + send_info.vid = vid; + send_info.offloaded = true; + call_switchdev_notifiers(val, dev, &send_info.info, NULL); +} + +static void +mlx5_esw_bridge_fdb_del_notify(struct mlx5_esw_bridge_fdb_entry *entry) +{ + if (!(entry->flags & (MLX5_ESW_BRIDGE_FLAG_ADDED_BY_USER | MLX5_ESW_BRIDGE_FLAG_PEER))) + mlx5_esw_bridge_fdb_offload_notify(entry->dev, entry->key.addr, + entry->key.vid, + SWITCHDEV_FDB_DEL_TO_BRIDGE); +} + +static bool mlx5_esw_bridge_pkt_reformat_vlan_pop_supported(struct mlx5_eswitch *esw) +{ + return BIT(MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, reformat_remove)) && + MLX5_CAP_GEN_2(esw->dev, max_reformat_remove_size) >= sizeof(struct vlan_hdr) && + MLX5_CAP_GEN_2(esw->dev, max_reformat_remove_offset) >= + offsetof(struct vlan_ethhdr, h_vlan_proto); +} + +static struct mlx5_pkt_reformat * +mlx5_esw_bridge_pkt_reformat_vlan_pop_create(struct mlx5_eswitch *esw) +{ + struct mlx5_pkt_reformat_params reformat_params = {}; + + reformat_params.type = MLX5_REFORMAT_TYPE_REMOVE_HDR; + reformat_params.param_0 = MLX5_REFORMAT_CONTEXT_ANCHOR_MAC_START; + reformat_params.param_1 = offsetof(struct vlan_ethhdr, h_vlan_proto); + reformat_params.size = sizeof(struct vlan_hdr); + return mlx5_packet_reformat_alloc(esw->dev, &reformat_params, MLX5_FLOW_NAMESPACE_FDB); +} + +static struct mlx5_flow_table * +mlx5_esw_bridge_table_create(int max_fte, u32 level, struct mlx5_eswitch *esw) +{ + struct mlx5_flow_table_attr ft_attr = {}; + struct mlx5_core_dev *dev = esw->dev; + struct mlx5_flow_namespace *ns; + struct mlx5_flow_table *fdb; + + ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_FDB); + if (!ns) { + esw_warn(dev, "Failed to get FDB namespace\n"); + return ERR_PTR(-ENOENT); + } + + ft_attr.flags = MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT; + ft_attr.max_fte = max_fte; + ft_attr.level = level; + ft_attr.prio = FDB_BR_OFFLOAD; + fdb = mlx5_create_flow_table(ns, &ft_attr); + if (IS_ERR(fdb)) + esw_warn(dev, "Failed to create bridge FDB Table (err=%ld)\n", PTR_ERR(fdb)); + + return fdb; +} + +static struct mlx5_flow_group * +mlx5_esw_bridge_ingress_vlan_proto_fg_create(unsigned int from, unsigned int to, u16 vlan_proto, + struct mlx5_eswitch *esw, + struct mlx5_flow_table *ingress_ft) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5_flow_group *fg; + u32 *in, *match; + + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return ERR_PTR(-ENOMEM); + + MLX5_SET(create_flow_group_in, in, match_criteria_enable, + MLX5_MATCH_OUTER_HEADERS | MLX5_MATCH_MISC_PARAMETERS_2); + match = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria); + + MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.smac_47_16); + MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.smac_15_0); + if (vlan_proto == ETH_P_8021Q) + MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.cvlan_tag); + else if (vlan_proto == ETH_P_8021AD) + MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.svlan_tag); + MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.first_vid); + + MLX5_SET(fte_match_param, match, misc_parameters_2.metadata_reg_c_0, + mlx5_eswitch_get_vport_metadata_mask()); + + MLX5_SET(create_flow_group_in, in, start_flow_index, from); + MLX5_SET(create_flow_group_in, in, end_flow_index, to); + + fg = mlx5_create_flow_group(ingress_ft, in); + kvfree(in); + if (IS_ERR(fg)) + esw_warn(esw->dev, + "Failed to create VLAN(proto=%x) flow group for bridge ingress table (err=%ld)\n", + vlan_proto, PTR_ERR(fg)); + + return fg; +} + +static struct mlx5_flow_group * +mlx5_esw_bridge_ingress_vlan_fg_create(struct mlx5_eswitch *esw, + struct mlx5_flow_table *ingress_ft) +{ + unsigned int from = MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_GRP_IDX_FROM; + unsigned int to = MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_GRP_IDX_TO; + + return mlx5_esw_bridge_ingress_vlan_proto_fg_create(from, to, ETH_P_8021Q, esw, ingress_ft); +} + +static struct mlx5_flow_group * +mlx5_esw_bridge_ingress_qinq_fg_create(struct mlx5_eswitch *esw, + struct mlx5_flow_table *ingress_ft) +{ + unsigned int from = MLX5_ESW_BRIDGE_INGRESS_TABLE_QINQ_GRP_IDX_FROM; + unsigned int to = MLX5_ESW_BRIDGE_INGRESS_TABLE_QINQ_GRP_IDX_TO; + + return mlx5_esw_bridge_ingress_vlan_proto_fg_create(from, to, ETH_P_8021AD, esw, + ingress_ft); +} + +static struct mlx5_flow_group * +mlx5_esw_bridge_ingress_vlan_proto_filter_fg_create(unsigned int from, unsigned int to, + u16 vlan_proto, struct mlx5_eswitch *esw, + struct mlx5_flow_table *ingress_ft) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5_flow_group *fg; + u32 *in, *match; + + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return ERR_PTR(-ENOMEM); + + MLX5_SET(create_flow_group_in, in, match_criteria_enable, + MLX5_MATCH_OUTER_HEADERS | MLX5_MATCH_MISC_PARAMETERS_2); + match = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria); + + MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.smac_47_16); + MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.smac_15_0); + if (vlan_proto == ETH_P_8021Q) + MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.cvlan_tag); + else if (vlan_proto == ETH_P_8021AD) + MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.svlan_tag); + MLX5_SET(fte_match_param, match, misc_parameters_2.metadata_reg_c_0, + mlx5_eswitch_get_vport_metadata_mask()); + + MLX5_SET(create_flow_group_in, in, start_flow_index, from); + MLX5_SET(create_flow_group_in, in, end_flow_index, to); + + fg = mlx5_create_flow_group(ingress_ft, in); + if (IS_ERR(fg)) + esw_warn(esw->dev, + "Failed to create bridge ingress table VLAN filter flow group (err=%ld)\n", + PTR_ERR(fg)); + kvfree(in); + return fg; +} + +static struct mlx5_flow_group * +mlx5_esw_bridge_ingress_vlan_filter_fg_create(struct mlx5_eswitch *esw, + struct mlx5_flow_table *ingress_ft) +{ + unsigned int from = MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_FILTER_GRP_IDX_FROM; + unsigned int to = MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_FILTER_GRP_IDX_TO; + + return mlx5_esw_bridge_ingress_vlan_proto_filter_fg_create(from, to, ETH_P_8021Q, esw, + ingress_ft); +} + +static struct mlx5_flow_group * +mlx5_esw_bridge_ingress_qinq_filter_fg_create(struct mlx5_eswitch *esw, + struct mlx5_flow_table *ingress_ft) +{ + unsigned int from = MLX5_ESW_BRIDGE_INGRESS_TABLE_QINQ_FILTER_GRP_IDX_FROM; + unsigned int to = MLX5_ESW_BRIDGE_INGRESS_TABLE_QINQ_FILTER_GRP_IDX_TO; + + return mlx5_esw_bridge_ingress_vlan_proto_filter_fg_create(from, to, ETH_P_8021AD, esw, + ingress_ft); +} + +static struct mlx5_flow_group * +mlx5_esw_bridge_ingress_mac_fg_create(struct mlx5_eswitch *esw, struct mlx5_flow_table *ingress_ft) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5_flow_group *fg; + u32 *in, *match; + + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return ERR_PTR(-ENOMEM); + + MLX5_SET(create_flow_group_in, in, match_criteria_enable, + MLX5_MATCH_OUTER_HEADERS | MLX5_MATCH_MISC_PARAMETERS_2); + match = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria); + + MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.smac_47_16); + MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.smac_15_0); + + MLX5_SET(fte_match_param, match, misc_parameters_2.metadata_reg_c_0, + mlx5_eswitch_get_vport_metadata_mask()); + + MLX5_SET(create_flow_group_in, in, start_flow_index, + MLX5_ESW_BRIDGE_INGRESS_TABLE_MAC_GRP_IDX_FROM); + MLX5_SET(create_flow_group_in, in, end_flow_index, + MLX5_ESW_BRIDGE_INGRESS_TABLE_MAC_GRP_IDX_TO); + + fg = mlx5_create_flow_group(ingress_ft, in); + if (IS_ERR(fg)) + esw_warn(esw->dev, + "Failed to create MAC flow group for bridge ingress table (err=%ld)\n", + PTR_ERR(fg)); + + kvfree(in); + return fg; +} + +static struct mlx5_flow_group * +mlx5_esw_bridge_egress_vlan_proto_fg_create(unsigned int from, unsigned int to, u16 vlan_proto, + struct mlx5_eswitch *esw, + struct mlx5_flow_table *egress_ft) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5_flow_group *fg; + u32 *in, *match; + + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return ERR_PTR(-ENOMEM); + + MLX5_SET(create_flow_group_in, in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); + match = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria); + + MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.dmac_47_16); + MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.dmac_15_0); + if (vlan_proto == ETH_P_8021Q) + MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.cvlan_tag); + else if (vlan_proto == ETH_P_8021AD) + MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.svlan_tag); + MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.first_vid); + + MLX5_SET(create_flow_group_in, in, start_flow_index, from); + MLX5_SET(create_flow_group_in, in, end_flow_index, to); + + fg = mlx5_create_flow_group(egress_ft, in); + if (IS_ERR(fg)) + esw_warn(esw->dev, + "Failed to create VLAN flow group for bridge egress table (err=%ld)\n", + PTR_ERR(fg)); + kvfree(in); + return fg; +} + +static struct mlx5_flow_group * +mlx5_esw_bridge_egress_vlan_fg_create(struct mlx5_eswitch *esw, struct mlx5_flow_table *egress_ft) +{ + unsigned int from = MLX5_ESW_BRIDGE_EGRESS_TABLE_VLAN_GRP_IDX_FROM; + unsigned int to = MLX5_ESW_BRIDGE_EGRESS_TABLE_VLAN_GRP_IDX_TO; + + return mlx5_esw_bridge_egress_vlan_proto_fg_create(from, to, ETH_P_8021Q, esw, egress_ft); +} + +static struct mlx5_flow_group * +mlx5_esw_bridge_egress_qinq_fg_create(struct mlx5_eswitch *esw, + struct mlx5_flow_table *egress_ft) +{ + unsigned int from = MLX5_ESW_BRIDGE_EGRESS_TABLE_QINQ_GRP_IDX_FROM; + unsigned int to = MLX5_ESW_BRIDGE_EGRESS_TABLE_QINQ_GRP_IDX_TO; + + return mlx5_esw_bridge_egress_vlan_proto_fg_create(from, to, ETH_P_8021AD, esw, egress_ft); +} + +static struct mlx5_flow_group * +mlx5_esw_bridge_egress_mac_fg_create(struct mlx5_eswitch *esw, struct mlx5_flow_table *egress_ft) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5_flow_group *fg; + u32 *in, *match; + + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return ERR_PTR(-ENOMEM); + + MLX5_SET(create_flow_group_in, in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); + match = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria); + + MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.dmac_47_16); + MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.dmac_15_0); + + MLX5_SET(create_flow_group_in, in, start_flow_index, + MLX5_ESW_BRIDGE_EGRESS_TABLE_MAC_GRP_IDX_FROM); + MLX5_SET(create_flow_group_in, in, end_flow_index, + MLX5_ESW_BRIDGE_EGRESS_TABLE_MAC_GRP_IDX_TO); + + fg = mlx5_create_flow_group(egress_ft, in); + if (IS_ERR(fg)) + esw_warn(esw->dev, + "Failed to create bridge egress table MAC flow group (err=%ld)\n", + PTR_ERR(fg)); + kvfree(in); + return fg; +} + +static struct mlx5_flow_group * +mlx5_esw_bridge_egress_miss_fg_create(struct mlx5_eswitch *esw, struct mlx5_flow_table *egress_ft) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5_flow_group *fg; + u32 *in, *match; + + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return ERR_PTR(-ENOMEM); + + MLX5_SET(create_flow_group_in, in, match_criteria_enable, MLX5_MATCH_MISC_PARAMETERS_2); + match = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria); + + MLX5_SET(fte_match_param, match, misc_parameters_2.metadata_reg_c_1, ESW_TUN_MASK); + + MLX5_SET(create_flow_group_in, in, start_flow_index, + MLX5_ESW_BRIDGE_EGRESS_TABLE_MISS_GRP_IDX_FROM); + MLX5_SET(create_flow_group_in, in, end_flow_index, + MLX5_ESW_BRIDGE_EGRESS_TABLE_MISS_GRP_IDX_TO); + + fg = mlx5_create_flow_group(egress_ft, in); + if (IS_ERR(fg)) + esw_warn(esw->dev, + "Failed to create bridge egress table miss flow group (err=%ld)\n", + PTR_ERR(fg)); + kvfree(in); + return fg; +} + +static int +mlx5_esw_bridge_ingress_table_init(struct mlx5_esw_bridge_offloads *br_offloads) +{ + struct mlx5_flow_group *mac_fg, *qinq_filter_fg, *qinq_fg, *vlan_filter_fg, *vlan_fg; + struct mlx5_flow_table *ingress_ft, *skip_ft; + struct mlx5_eswitch *esw = br_offloads->esw; + int err; + + if (!mlx5_eswitch_vport_match_metadata_enabled(esw)) + return -EOPNOTSUPP; + + ingress_ft = mlx5_esw_bridge_table_create(MLX5_ESW_BRIDGE_INGRESS_TABLE_SIZE, + MLX5_ESW_BRIDGE_LEVEL_INGRESS_TABLE, + esw); + if (IS_ERR(ingress_ft)) + return PTR_ERR(ingress_ft); + + skip_ft = mlx5_esw_bridge_table_create(MLX5_ESW_BRIDGE_SKIP_TABLE_SIZE, + MLX5_ESW_BRIDGE_LEVEL_SKIP_TABLE, + esw); + if (IS_ERR(skip_ft)) { + err = PTR_ERR(skip_ft); + goto err_skip_tbl; + } + + vlan_fg = mlx5_esw_bridge_ingress_vlan_fg_create(esw, ingress_ft); + if (IS_ERR(vlan_fg)) { + err = PTR_ERR(vlan_fg); + goto err_vlan_fg; + } + + vlan_filter_fg = mlx5_esw_bridge_ingress_vlan_filter_fg_create(esw, ingress_ft); + if (IS_ERR(vlan_filter_fg)) { + err = PTR_ERR(vlan_filter_fg); + goto err_vlan_filter_fg; + } + + qinq_fg = mlx5_esw_bridge_ingress_qinq_fg_create(esw, ingress_ft); + if (IS_ERR(qinq_fg)) { + err = PTR_ERR(qinq_fg); + goto err_qinq_fg; + } + + qinq_filter_fg = mlx5_esw_bridge_ingress_qinq_filter_fg_create(esw, ingress_ft); + if (IS_ERR(qinq_filter_fg)) { + err = PTR_ERR(qinq_filter_fg); + goto err_qinq_filter_fg; + } + + mac_fg = mlx5_esw_bridge_ingress_mac_fg_create(esw, ingress_ft); + if (IS_ERR(mac_fg)) { + err = PTR_ERR(mac_fg); + goto err_mac_fg; + } + + br_offloads->ingress_ft = ingress_ft; + br_offloads->skip_ft = skip_ft; + br_offloads->ingress_vlan_fg = vlan_fg; + br_offloads->ingress_vlan_filter_fg = vlan_filter_fg; + br_offloads->ingress_qinq_fg = qinq_fg; + br_offloads->ingress_qinq_filter_fg = qinq_filter_fg; + br_offloads->ingress_mac_fg = mac_fg; + return 0; + +err_mac_fg: + mlx5_destroy_flow_group(qinq_filter_fg); +err_qinq_filter_fg: + mlx5_destroy_flow_group(qinq_fg); +err_qinq_fg: + mlx5_destroy_flow_group(vlan_filter_fg); +err_vlan_filter_fg: + mlx5_destroy_flow_group(vlan_fg); +err_vlan_fg: + mlx5_destroy_flow_table(skip_ft); +err_skip_tbl: + mlx5_destroy_flow_table(ingress_ft); + return err; +} + +static void +mlx5_esw_bridge_ingress_table_cleanup(struct mlx5_esw_bridge_offloads *br_offloads) +{ + mlx5_destroy_flow_group(br_offloads->ingress_mac_fg); + br_offloads->ingress_mac_fg = NULL; + mlx5_destroy_flow_group(br_offloads->ingress_qinq_filter_fg); + br_offloads->ingress_qinq_filter_fg = NULL; + mlx5_destroy_flow_group(br_offloads->ingress_qinq_fg); + br_offloads->ingress_qinq_fg = NULL; + mlx5_destroy_flow_group(br_offloads->ingress_vlan_filter_fg); + br_offloads->ingress_vlan_filter_fg = NULL; + mlx5_destroy_flow_group(br_offloads->ingress_vlan_fg); + br_offloads->ingress_vlan_fg = NULL; + mlx5_destroy_flow_table(br_offloads->skip_ft); + br_offloads->skip_ft = NULL; + mlx5_destroy_flow_table(br_offloads->ingress_ft); + br_offloads->ingress_ft = NULL; +} + +static struct mlx5_flow_handle * +mlx5_esw_bridge_egress_miss_flow_create(struct mlx5_flow_table *egress_ft, + struct mlx5_flow_table *skip_ft, + struct mlx5_pkt_reformat *pkt_reformat); + +static int +mlx5_esw_bridge_egress_table_init(struct mlx5_esw_bridge_offloads *br_offloads, + struct mlx5_esw_bridge *bridge) +{ + struct mlx5_flow_group *miss_fg = NULL, *mac_fg, *vlan_fg, *qinq_fg; + struct mlx5_pkt_reformat *miss_pkt_reformat = NULL; + struct mlx5_flow_handle *miss_handle = NULL; + struct mlx5_eswitch *esw = br_offloads->esw; + struct mlx5_flow_table *egress_ft; + int err; + + egress_ft = mlx5_esw_bridge_table_create(MLX5_ESW_BRIDGE_EGRESS_TABLE_SIZE, + MLX5_ESW_BRIDGE_LEVEL_EGRESS_TABLE, + esw); + if (IS_ERR(egress_ft)) + return PTR_ERR(egress_ft); + + vlan_fg = mlx5_esw_bridge_egress_vlan_fg_create(esw, egress_ft); + if (IS_ERR(vlan_fg)) { + err = PTR_ERR(vlan_fg); + goto err_vlan_fg; + } + + qinq_fg = mlx5_esw_bridge_egress_qinq_fg_create(esw, egress_ft); + if (IS_ERR(qinq_fg)) { + err = PTR_ERR(qinq_fg); + goto err_qinq_fg; + } + + mac_fg = mlx5_esw_bridge_egress_mac_fg_create(esw, egress_ft); + if (IS_ERR(mac_fg)) { + err = PTR_ERR(mac_fg); + goto err_mac_fg; + } + + if (mlx5_esw_bridge_pkt_reformat_vlan_pop_supported(esw)) { + miss_fg = mlx5_esw_bridge_egress_miss_fg_create(esw, egress_ft); + if (IS_ERR(miss_fg)) { + esw_warn(esw->dev, "Failed to create miss flow group (err=%ld)\n", + PTR_ERR(miss_fg)); + miss_fg = NULL; + goto skip_miss_flow; + } + + miss_pkt_reformat = mlx5_esw_bridge_pkt_reformat_vlan_pop_create(esw); + if (IS_ERR(miss_pkt_reformat)) { + esw_warn(esw->dev, + "Failed to alloc packet reformat REMOVE_HEADER (err=%ld)\n", + PTR_ERR(miss_pkt_reformat)); + miss_pkt_reformat = NULL; + mlx5_destroy_flow_group(miss_fg); + miss_fg = NULL; + goto skip_miss_flow; + } + + miss_handle = mlx5_esw_bridge_egress_miss_flow_create(egress_ft, + br_offloads->skip_ft, + miss_pkt_reformat); + if (IS_ERR(miss_handle)) { + esw_warn(esw->dev, "Failed to create miss flow (err=%ld)\n", + PTR_ERR(miss_handle)); + miss_handle = NULL; + mlx5_packet_reformat_dealloc(esw->dev, miss_pkt_reformat); + miss_pkt_reformat = NULL; + mlx5_destroy_flow_group(miss_fg); + miss_fg = NULL; + goto skip_miss_flow; + } + } +skip_miss_flow: + + bridge->egress_ft = egress_ft; + bridge->egress_vlan_fg = vlan_fg; + bridge->egress_qinq_fg = qinq_fg; + bridge->egress_mac_fg = mac_fg; + bridge->egress_miss_fg = miss_fg; + bridge->egress_miss_pkt_reformat = miss_pkt_reformat; + bridge->egress_miss_handle = miss_handle; + return 0; + +err_mac_fg: + mlx5_destroy_flow_group(qinq_fg); +err_qinq_fg: + mlx5_destroy_flow_group(vlan_fg); +err_vlan_fg: + mlx5_destroy_flow_table(egress_ft); + return err; +} + +static void +mlx5_esw_bridge_egress_table_cleanup(struct mlx5_esw_bridge *bridge) +{ + if (bridge->egress_miss_handle) + mlx5_del_flow_rules(bridge->egress_miss_handle); + if (bridge->egress_miss_pkt_reformat) + mlx5_packet_reformat_dealloc(bridge->br_offloads->esw->dev, + bridge->egress_miss_pkt_reformat); + if (bridge->egress_miss_fg) + mlx5_destroy_flow_group(bridge->egress_miss_fg); + mlx5_destroy_flow_group(bridge->egress_mac_fg); + mlx5_destroy_flow_group(bridge->egress_qinq_fg); + mlx5_destroy_flow_group(bridge->egress_vlan_fg); + mlx5_destroy_flow_table(bridge->egress_ft); +} + +static struct mlx5_flow_handle * +mlx5_esw_bridge_ingress_flow_with_esw_create(u16 vport_num, const unsigned char *addr, + struct mlx5_esw_bridge_vlan *vlan, u32 counter_id, + struct mlx5_esw_bridge *bridge, + struct mlx5_eswitch *esw) +{ + struct mlx5_esw_bridge_offloads *br_offloads = bridge->br_offloads; + struct mlx5_flow_act flow_act = { + .action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_COUNT, + .flags = FLOW_ACT_NO_APPEND, + }; + struct mlx5_flow_destination dests[2] = {}; + struct mlx5_flow_spec *rule_spec; + struct mlx5_flow_handle *handle; + u8 *smac_v, *smac_c; + + rule_spec = kvzalloc(sizeof(*rule_spec), GFP_KERNEL); + if (!rule_spec) + return ERR_PTR(-ENOMEM); + + rule_spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS | MLX5_MATCH_MISC_PARAMETERS_2; + + smac_v = MLX5_ADDR_OF(fte_match_param, rule_spec->match_value, + outer_headers.smac_47_16); + ether_addr_copy(smac_v, addr); + smac_c = MLX5_ADDR_OF(fte_match_param, rule_spec->match_criteria, + outer_headers.smac_47_16); + eth_broadcast_addr(smac_c); + + MLX5_SET(fte_match_param, rule_spec->match_criteria, + misc_parameters_2.metadata_reg_c_0, mlx5_eswitch_get_vport_metadata_mask()); + MLX5_SET(fte_match_param, rule_spec->match_value, misc_parameters_2.metadata_reg_c_0, + mlx5_eswitch_get_vport_metadata_for_match(esw, vport_num)); + + if (vlan && vlan->pkt_reformat_push) { + flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT | + MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; + flow_act.pkt_reformat = vlan->pkt_reformat_push; + flow_act.modify_hdr = vlan->pkt_mod_hdr_push_mark; + } else if (vlan) { + if (bridge->vlan_proto == ETH_P_8021Q) { + MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_criteria, + outer_headers.cvlan_tag); + MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_value, + outer_headers.cvlan_tag); + } else if (bridge->vlan_proto == ETH_P_8021AD) { + MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_criteria, + outer_headers.svlan_tag); + MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_value, + outer_headers.svlan_tag); + } + MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_criteria, + outer_headers.first_vid); + MLX5_SET(fte_match_param, rule_spec->match_value, outer_headers.first_vid, + vlan->vid); + } + + dests[0].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dests[0].ft = bridge->egress_ft; + dests[1].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; + dests[1].counter_id = counter_id; + + handle = mlx5_add_flow_rules(br_offloads->ingress_ft, rule_spec, &flow_act, dests, + ARRAY_SIZE(dests)); + + kvfree(rule_spec); + return handle; +} + +static struct mlx5_flow_handle * +mlx5_esw_bridge_ingress_flow_create(u16 vport_num, const unsigned char *addr, + struct mlx5_esw_bridge_vlan *vlan, u32 counter_id, + struct mlx5_esw_bridge *bridge) +{ + return mlx5_esw_bridge_ingress_flow_with_esw_create(vport_num, addr, vlan, counter_id, + bridge, bridge->br_offloads->esw); +} + +static struct mlx5_flow_handle * +mlx5_esw_bridge_ingress_flow_peer_create(u16 vport_num, const unsigned char *addr, + struct mlx5_esw_bridge_vlan *vlan, u32 counter_id, + struct mlx5_esw_bridge *bridge) +{ + struct mlx5_devcom *devcom = bridge->br_offloads->esw->dev->priv.devcom; + static struct mlx5_flow_handle *handle; + struct mlx5_eswitch *peer_esw; + + peer_esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS); + if (!peer_esw) + return ERR_PTR(-ENODEV); + + handle = mlx5_esw_bridge_ingress_flow_with_esw_create(vport_num, addr, vlan, counter_id, + bridge, peer_esw); + + mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS); + return handle; +} + +static struct mlx5_flow_handle * +mlx5_esw_bridge_ingress_filter_flow_create(u16 vport_num, const unsigned char *addr, + struct mlx5_esw_bridge *bridge) +{ + struct mlx5_esw_bridge_offloads *br_offloads = bridge->br_offloads; + struct mlx5_flow_destination dest = { + .type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE, + .ft = br_offloads->skip_ft, + }; + struct mlx5_flow_act flow_act = { + .action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, + .flags = FLOW_ACT_NO_APPEND, + }; + struct mlx5_flow_spec *rule_spec; + struct mlx5_flow_handle *handle; + u8 *smac_v, *smac_c; + + rule_spec = kvzalloc(sizeof(*rule_spec), GFP_KERNEL); + if (!rule_spec) + return ERR_PTR(-ENOMEM); + + rule_spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS | MLX5_MATCH_MISC_PARAMETERS_2; + + smac_v = MLX5_ADDR_OF(fte_match_param, rule_spec->match_value, + outer_headers.smac_47_16); + ether_addr_copy(smac_v, addr); + smac_c = MLX5_ADDR_OF(fte_match_param, rule_spec->match_criteria, + outer_headers.smac_47_16); + eth_broadcast_addr(smac_c); + + MLX5_SET(fte_match_param, rule_spec->match_criteria, + misc_parameters_2.metadata_reg_c_0, mlx5_eswitch_get_vport_metadata_mask()); + MLX5_SET(fte_match_param, rule_spec->match_value, misc_parameters_2.metadata_reg_c_0, + mlx5_eswitch_get_vport_metadata_for_match(br_offloads->esw, vport_num)); + + if (bridge->vlan_proto == ETH_P_8021Q) { + MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_criteria, + outer_headers.cvlan_tag); + MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_value, + outer_headers.cvlan_tag); + } else if (bridge->vlan_proto == ETH_P_8021AD) { + MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_criteria, + outer_headers.svlan_tag); + MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_value, + outer_headers.svlan_tag); + } + + handle = mlx5_add_flow_rules(br_offloads->ingress_ft, rule_spec, &flow_act, &dest, 1); + + kvfree(rule_spec); + return handle; +} + +static struct mlx5_flow_handle * +mlx5_esw_bridge_egress_flow_create(u16 vport_num, u16 esw_owner_vhca_id, const unsigned char *addr, + struct mlx5_esw_bridge_vlan *vlan, + struct mlx5_esw_bridge *bridge) +{ + struct mlx5_flow_destination dest = { + .type = MLX5_FLOW_DESTINATION_TYPE_VPORT, + .vport.num = vport_num, + }; + struct mlx5_flow_act flow_act = { + .action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, + .flags = FLOW_ACT_NO_APPEND, + }; + struct mlx5_flow_spec *rule_spec; + struct mlx5_flow_handle *handle; + u8 *dmac_v, *dmac_c; + + rule_spec = kvzalloc(sizeof(*rule_spec), GFP_KERNEL); + if (!rule_spec) + return ERR_PTR(-ENOMEM); + + if (MLX5_CAP_ESW_FLOWTABLE(bridge->br_offloads->esw->dev, flow_source) && + vport_num == MLX5_VPORT_UPLINK) + rule_spec->flow_context.flow_source = + MLX5_FLOW_CONTEXT_FLOW_SOURCE_LOCAL_VPORT; + rule_spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + + dmac_v = MLX5_ADDR_OF(fte_match_param, rule_spec->match_value, + outer_headers.dmac_47_16); + ether_addr_copy(dmac_v, addr); + dmac_c = MLX5_ADDR_OF(fte_match_param, rule_spec->match_criteria, + outer_headers.dmac_47_16); + eth_broadcast_addr(dmac_c); + + if (vlan) { + if (vlan->pkt_reformat_pop) { + flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT; + flow_act.pkt_reformat = vlan->pkt_reformat_pop; + } + + if (bridge->vlan_proto == ETH_P_8021Q) { + MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_criteria, + outer_headers.cvlan_tag); + MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_value, + outer_headers.cvlan_tag); + } else if (bridge->vlan_proto == ETH_P_8021AD) { + MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_criteria, + outer_headers.svlan_tag); + MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_value, + outer_headers.svlan_tag); + } + MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_criteria, + outer_headers.first_vid); + MLX5_SET(fte_match_param, rule_spec->match_value, outer_headers.first_vid, + vlan->vid); + } + + if (MLX5_CAP_ESW(bridge->br_offloads->esw->dev, merged_eswitch)) { + dest.vport.flags = MLX5_FLOW_DEST_VPORT_VHCA_ID; + dest.vport.vhca_id = esw_owner_vhca_id; + } + handle = mlx5_add_flow_rules(bridge->egress_ft, rule_spec, &flow_act, &dest, 1); + + kvfree(rule_spec); + return handle; +} + +static struct mlx5_flow_handle * +mlx5_esw_bridge_egress_miss_flow_create(struct mlx5_flow_table *egress_ft, + struct mlx5_flow_table *skip_ft, + struct mlx5_pkt_reformat *pkt_reformat) +{ + struct mlx5_flow_destination dest = { + .type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE, + .ft = skip_ft, + }; + struct mlx5_flow_act flow_act = { + .action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | + MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT, + .flags = FLOW_ACT_NO_APPEND, + .pkt_reformat = pkt_reformat, + }; + struct mlx5_flow_spec *rule_spec; + struct mlx5_flow_handle *handle; + + rule_spec = kvzalloc(sizeof(*rule_spec), GFP_KERNEL); + if (!rule_spec) + return ERR_PTR(-ENOMEM); + + rule_spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2; + + MLX5_SET(fte_match_param, rule_spec->match_criteria, + misc_parameters_2.metadata_reg_c_1, ESW_TUN_MASK); + MLX5_SET(fte_match_param, rule_spec->match_value, misc_parameters_2.metadata_reg_c_1, + ESW_TUN_BRIDGE_INGRESS_PUSH_VLAN_MARK); + + handle = mlx5_add_flow_rules(egress_ft, rule_spec, &flow_act, &dest, 1); + + kvfree(rule_spec); + return handle; +} + +static struct mlx5_esw_bridge *mlx5_esw_bridge_create(int ifindex, + struct mlx5_esw_bridge_offloads *br_offloads) +{ + struct mlx5_esw_bridge *bridge; + int err; + + bridge = kvzalloc(sizeof(*bridge), GFP_KERNEL); + if (!bridge) + return ERR_PTR(-ENOMEM); + + bridge->br_offloads = br_offloads; + err = mlx5_esw_bridge_egress_table_init(br_offloads, bridge); + if (err) + goto err_egress_tbl; + + err = rhashtable_init(&bridge->fdb_ht, &fdb_ht_params); + if (err) + goto err_fdb_ht; + + INIT_LIST_HEAD(&bridge->fdb_list); + bridge->ifindex = ifindex; + bridge->refcnt = 1; + bridge->ageing_time = clock_t_to_jiffies(BR_DEFAULT_AGEING_TIME); + bridge->vlan_proto = ETH_P_8021Q; + list_add(&bridge->list, &br_offloads->bridges); + + return bridge; + +err_fdb_ht: + mlx5_esw_bridge_egress_table_cleanup(bridge); +err_egress_tbl: + kvfree(bridge); + return ERR_PTR(err); +} + +static void mlx5_esw_bridge_get(struct mlx5_esw_bridge *bridge) +{ + bridge->refcnt++; +} + +static void mlx5_esw_bridge_put(struct mlx5_esw_bridge_offloads *br_offloads, + struct mlx5_esw_bridge *bridge) +{ + if (--bridge->refcnt) + return; + + mlx5_esw_bridge_egress_table_cleanup(bridge); + list_del(&bridge->list); + rhashtable_destroy(&bridge->fdb_ht); + kvfree(bridge); + + if (list_empty(&br_offloads->bridges)) + mlx5_esw_bridge_ingress_table_cleanup(br_offloads); +} + +static struct mlx5_esw_bridge * +mlx5_esw_bridge_lookup(int ifindex, struct mlx5_esw_bridge_offloads *br_offloads) +{ + struct mlx5_esw_bridge *bridge; + + ASSERT_RTNL(); + + list_for_each_entry(bridge, &br_offloads->bridges, list) { + if (bridge->ifindex == ifindex) { + mlx5_esw_bridge_get(bridge); + return bridge; + } + } + + if (!br_offloads->ingress_ft) { + int err = mlx5_esw_bridge_ingress_table_init(br_offloads); + + if (err) + return ERR_PTR(err); + } + + bridge = mlx5_esw_bridge_create(ifindex, br_offloads); + if (IS_ERR(bridge) && list_empty(&br_offloads->bridges)) + mlx5_esw_bridge_ingress_table_cleanup(br_offloads); + return bridge; +} + +static unsigned long mlx5_esw_bridge_port_key_from_data(u16 vport_num, u16 esw_owner_vhca_id) +{ + return vport_num | (unsigned long)esw_owner_vhca_id << sizeof(vport_num) * BITS_PER_BYTE; +} + +static unsigned long mlx5_esw_bridge_port_key(struct mlx5_esw_bridge_port *port) +{ + return mlx5_esw_bridge_port_key_from_data(port->vport_num, port->esw_owner_vhca_id); +} + +static int mlx5_esw_bridge_port_insert(struct mlx5_esw_bridge_port *port, + struct mlx5_esw_bridge_offloads *br_offloads) +{ + return xa_insert(&br_offloads->ports, mlx5_esw_bridge_port_key(port), port, GFP_KERNEL); +} + +static struct mlx5_esw_bridge_port * +mlx5_esw_bridge_port_lookup(u16 vport_num, u16 esw_owner_vhca_id, + struct mlx5_esw_bridge_offloads *br_offloads) +{ + return xa_load(&br_offloads->ports, mlx5_esw_bridge_port_key_from_data(vport_num, + esw_owner_vhca_id)); +} + +static void mlx5_esw_bridge_port_erase(struct mlx5_esw_bridge_port *port, + struct mlx5_esw_bridge_offloads *br_offloads) +{ + xa_erase(&br_offloads->ports, mlx5_esw_bridge_port_key(port)); +} + +static void mlx5_esw_bridge_fdb_entry_refresh(struct mlx5_esw_bridge_fdb_entry *entry) +{ + trace_mlx5_esw_bridge_fdb_entry_refresh(entry); + + mlx5_esw_bridge_fdb_offload_notify(entry->dev, entry->key.addr, + entry->key.vid, + SWITCHDEV_FDB_ADD_TO_BRIDGE); +} + +static void +mlx5_esw_bridge_fdb_entry_cleanup(struct mlx5_esw_bridge_fdb_entry *entry, + struct mlx5_esw_bridge *bridge) +{ + trace_mlx5_esw_bridge_fdb_entry_cleanup(entry); + + rhashtable_remove_fast(&bridge->fdb_ht, &entry->ht_node, fdb_ht_params); + mlx5_del_flow_rules(entry->egress_handle); + if (entry->filter_handle) + mlx5_del_flow_rules(entry->filter_handle); + mlx5_del_flow_rules(entry->ingress_handle); + mlx5_fc_destroy(bridge->br_offloads->esw->dev, entry->ingress_counter); + list_del(&entry->vlan_list); + list_del(&entry->list); + kvfree(entry); +} + +static void +mlx5_esw_bridge_fdb_entry_notify_and_cleanup(struct mlx5_esw_bridge_fdb_entry *entry, + struct mlx5_esw_bridge *bridge) +{ + mlx5_esw_bridge_fdb_del_notify(entry); + mlx5_esw_bridge_fdb_entry_cleanup(entry, bridge); +} + +static void mlx5_esw_bridge_fdb_flush(struct mlx5_esw_bridge *bridge) +{ + struct mlx5_esw_bridge_fdb_entry *entry, *tmp; + + list_for_each_entry_safe(entry, tmp, &bridge->fdb_list, list) + mlx5_esw_bridge_fdb_entry_notify_and_cleanup(entry, bridge); +} + +static struct mlx5_esw_bridge_vlan * +mlx5_esw_bridge_vlan_lookup(u16 vid, struct mlx5_esw_bridge_port *port) +{ + return xa_load(&port->vlans, vid); +} + +static int +mlx5_esw_bridge_vlan_push_create(u16 vlan_proto, struct mlx5_esw_bridge_vlan *vlan, + struct mlx5_eswitch *esw) +{ + struct { + __be16 h_vlan_proto; + __be16 h_vlan_TCI; + } vlan_hdr = { htons(vlan_proto), htons(vlan->vid) }; + struct mlx5_pkt_reformat_params reformat_params = {}; + struct mlx5_pkt_reformat *pkt_reformat; + + if (!BIT(MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, reformat_insert)) || + MLX5_CAP_GEN_2(esw->dev, max_reformat_insert_size) < sizeof(vlan_hdr) || + MLX5_CAP_GEN_2(esw->dev, max_reformat_insert_offset) < + offsetof(struct vlan_ethhdr, h_vlan_proto)) { + esw_warn(esw->dev, "Packet reformat INSERT_HEADER is not supported\n"); + return -EOPNOTSUPP; + } + + reformat_params.type = MLX5_REFORMAT_TYPE_INSERT_HDR; + reformat_params.param_0 = MLX5_REFORMAT_CONTEXT_ANCHOR_MAC_START; + reformat_params.param_1 = offsetof(struct vlan_ethhdr, h_vlan_proto); + reformat_params.size = sizeof(vlan_hdr); + reformat_params.data = &vlan_hdr; + pkt_reformat = mlx5_packet_reformat_alloc(esw->dev, + &reformat_params, + MLX5_FLOW_NAMESPACE_FDB); + if (IS_ERR(pkt_reformat)) { + esw_warn(esw->dev, "Failed to alloc packet reformat INSERT_HEADER (err=%ld)\n", + PTR_ERR(pkt_reformat)); + return PTR_ERR(pkt_reformat); + } + + vlan->pkt_reformat_push = pkt_reformat; + return 0; +} + +static void +mlx5_esw_bridge_vlan_push_cleanup(struct mlx5_esw_bridge_vlan *vlan, struct mlx5_eswitch *esw) +{ + mlx5_packet_reformat_dealloc(esw->dev, vlan->pkt_reformat_push); + vlan->pkt_reformat_push = NULL; +} + +static int +mlx5_esw_bridge_vlan_pop_create(struct mlx5_esw_bridge_vlan *vlan, struct mlx5_eswitch *esw) +{ + struct mlx5_pkt_reformat *pkt_reformat; + + if (!mlx5_esw_bridge_pkt_reformat_vlan_pop_supported(esw)) { + esw_warn(esw->dev, "Packet reformat REMOVE_HEADER is not supported\n"); + return -EOPNOTSUPP; + } + + pkt_reformat = mlx5_esw_bridge_pkt_reformat_vlan_pop_create(esw); + if (IS_ERR(pkt_reformat)) { + esw_warn(esw->dev, "Failed to alloc packet reformat REMOVE_HEADER (err=%ld)\n", + PTR_ERR(pkt_reformat)); + return PTR_ERR(pkt_reformat); + } + + vlan->pkt_reformat_pop = pkt_reformat; + return 0; +} + +static void +mlx5_esw_bridge_vlan_pop_cleanup(struct mlx5_esw_bridge_vlan *vlan, struct mlx5_eswitch *esw) +{ + mlx5_packet_reformat_dealloc(esw->dev, vlan->pkt_reformat_pop); + vlan->pkt_reformat_pop = NULL; +} + +static int +mlx5_esw_bridge_vlan_push_mark_create(struct mlx5_esw_bridge_vlan *vlan, struct mlx5_eswitch *esw) +{ + u8 action[MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto)] = {}; + struct mlx5_modify_hdr *pkt_mod_hdr; + + MLX5_SET(set_action_in, action, action_type, MLX5_ACTION_TYPE_SET); + MLX5_SET(set_action_in, action, field, MLX5_ACTION_IN_FIELD_METADATA_REG_C_1); + MLX5_SET(set_action_in, action, offset, 8); + MLX5_SET(set_action_in, action, length, ESW_TUN_OPTS_BITS + ESW_TUN_ID_BITS); + MLX5_SET(set_action_in, action, data, ESW_TUN_BRIDGE_INGRESS_PUSH_VLAN); + + pkt_mod_hdr = mlx5_modify_header_alloc(esw->dev, MLX5_FLOW_NAMESPACE_FDB, 1, action); + if (IS_ERR(pkt_mod_hdr)) + return PTR_ERR(pkt_mod_hdr); + + vlan->pkt_mod_hdr_push_mark = pkt_mod_hdr; + return 0; +} + +static void +mlx5_esw_bridge_vlan_push_mark_cleanup(struct mlx5_esw_bridge_vlan *vlan, struct mlx5_eswitch *esw) +{ + mlx5_modify_header_dealloc(esw->dev, vlan->pkt_mod_hdr_push_mark); + vlan->pkt_mod_hdr_push_mark = NULL; +} + +static int +mlx5_esw_bridge_vlan_push_pop_create(u16 vlan_proto, u16 flags, struct mlx5_esw_bridge_vlan *vlan, + struct mlx5_eswitch *esw) +{ + int err; + + if (flags & BRIDGE_VLAN_INFO_PVID) { + err = mlx5_esw_bridge_vlan_push_create(vlan_proto, vlan, esw); + if (err) + return err; + + err = mlx5_esw_bridge_vlan_push_mark_create(vlan, esw); + if (err) + goto err_vlan_push_mark; + } + + if (flags & BRIDGE_VLAN_INFO_UNTAGGED) { + err = mlx5_esw_bridge_vlan_pop_create(vlan, esw); + if (err) + goto err_vlan_pop; + } + + return 0; + +err_vlan_pop: + if (vlan->pkt_mod_hdr_push_mark) + mlx5_esw_bridge_vlan_push_mark_cleanup(vlan, esw); +err_vlan_push_mark: + if (vlan->pkt_reformat_push) + mlx5_esw_bridge_vlan_push_cleanup(vlan, esw); + return err; +} + +static struct mlx5_esw_bridge_vlan * +mlx5_esw_bridge_vlan_create(u16 vlan_proto, u16 vid, u16 flags, struct mlx5_esw_bridge_port *port, + struct mlx5_eswitch *esw) +{ + struct mlx5_esw_bridge_vlan *vlan; + int err; + + vlan = kvzalloc(sizeof(*vlan), GFP_KERNEL); + if (!vlan) + return ERR_PTR(-ENOMEM); + + vlan->vid = vid; + vlan->flags = flags; + INIT_LIST_HEAD(&vlan->fdb_list); + + err = mlx5_esw_bridge_vlan_push_pop_create(vlan_proto, flags, vlan, esw); + if (err) + goto err_vlan_push_pop; + + err = xa_insert(&port->vlans, vid, vlan, GFP_KERNEL); + if (err) + goto err_xa_insert; + + trace_mlx5_esw_bridge_vlan_create(vlan); + return vlan; + +err_xa_insert: + if (vlan->pkt_reformat_pop) + mlx5_esw_bridge_vlan_pop_cleanup(vlan, esw); + if (vlan->pkt_mod_hdr_push_mark) + mlx5_esw_bridge_vlan_push_mark_cleanup(vlan, esw); + if (vlan->pkt_reformat_push) + mlx5_esw_bridge_vlan_push_cleanup(vlan, esw); +err_vlan_push_pop: + kvfree(vlan); + return ERR_PTR(err); +} + +static void mlx5_esw_bridge_vlan_erase(struct mlx5_esw_bridge_port *port, + struct mlx5_esw_bridge_vlan *vlan) +{ + xa_erase(&port->vlans, vlan->vid); +} + +static void mlx5_esw_bridge_vlan_flush(struct mlx5_esw_bridge_vlan *vlan, + struct mlx5_esw_bridge *bridge) +{ + struct mlx5_eswitch *esw = bridge->br_offloads->esw; + struct mlx5_esw_bridge_fdb_entry *entry, *tmp; + + list_for_each_entry_safe(entry, tmp, &vlan->fdb_list, vlan_list) + mlx5_esw_bridge_fdb_entry_notify_and_cleanup(entry, bridge); + + if (vlan->pkt_reformat_pop) + mlx5_esw_bridge_vlan_pop_cleanup(vlan, esw); + if (vlan->pkt_mod_hdr_push_mark) + mlx5_esw_bridge_vlan_push_mark_cleanup(vlan, esw); + if (vlan->pkt_reformat_push) + mlx5_esw_bridge_vlan_push_cleanup(vlan, esw); +} + +static void mlx5_esw_bridge_vlan_cleanup(struct mlx5_esw_bridge_port *port, + struct mlx5_esw_bridge_vlan *vlan, + struct mlx5_esw_bridge *bridge) +{ + trace_mlx5_esw_bridge_vlan_cleanup(vlan); + mlx5_esw_bridge_vlan_flush(vlan, bridge); + mlx5_esw_bridge_vlan_erase(port, vlan); + kvfree(vlan); +} + +static void mlx5_esw_bridge_port_vlans_flush(struct mlx5_esw_bridge_port *port, + struct mlx5_esw_bridge *bridge) +{ + struct mlx5_esw_bridge_vlan *vlan; + unsigned long index; + + xa_for_each(&port->vlans, index, vlan) + mlx5_esw_bridge_vlan_cleanup(port, vlan, bridge); +} + +static int mlx5_esw_bridge_port_vlans_recreate(struct mlx5_esw_bridge_port *port, + struct mlx5_esw_bridge *bridge) +{ + struct mlx5_esw_bridge_offloads *br_offloads = bridge->br_offloads; + struct mlx5_esw_bridge_vlan *vlan; + unsigned long i; + int err; + + xa_for_each(&port->vlans, i, vlan) { + mlx5_esw_bridge_vlan_flush(vlan, bridge); + err = mlx5_esw_bridge_vlan_push_pop_create(bridge->vlan_proto, vlan->flags, vlan, + br_offloads->esw); + if (err) { + esw_warn(br_offloads->esw->dev, + "Failed to create VLAN=%u(proto=%x) push/pop actions (vport=%u,err=%d)\n", + vlan->vid, bridge->vlan_proto, port->vport_num, + err); + return err; + } + } + + return 0; +} + +static int +mlx5_esw_bridge_vlans_recreate(struct mlx5_esw_bridge *bridge) +{ + struct mlx5_esw_bridge_offloads *br_offloads = bridge->br_offloads; + struct mlx5_esw_bridge_port *port; + unsigned long i; + int err; + + xa_for_each(&br_offloads->ports, i, port) { + if (port->bridge != bridge) + continue; + + err = mlx5_esw_bridge_port_vlans_recreate(port, bridge); + if (err) + return err; + } + + return 0; +} + +static struct mlx5_esw_bridge_vlan * +mlx5_esw_bridge_port_vlan_lookup(u16 vid, u16 vport_num, u16 esw_owner_vhca_id, + struct mlx5_esw_bridge *bridge, struct mlx5_eswitch *esw) +{ + struct mlx5_esw_bridge_port *port; + struct mlx5_esw_bridge_vlan *vlan; + + port = mlx5_esw_bridge_port_lookup(vport_num, esw_owner_vhca_id, bridge->br_offloads); + if (!port) { + /* FDB is added asynchronously on wq while port might have been deleted + * concurrently. Report on 'info' logging level and skip the FDB offload. + */ + esw_info(esw->dev, "Failed to lookup bridge port (vport=%u)\n", vport_num); + return ERR_PTR(-EINVAL); + } + + vlan = mlx5_esw_bridge_vlan_lookup(vid, port); + if (!vlan) { + /* FDB is added asynchronously on wq while vlan might have been deleted + * concurrently. Report on 'info' logging level and skip the FDB offload. + */ + esw_info(esw->dev, "Failed to lookup bridge port vlan metadata (vport=%u)\n", + vport_num); + return ERR_PTR(-EINVAL); + } + + return vlan; +} + +static struct mlx5_esw_bridge_fdb_entry * +mlx5_esw_bridge_fdb_lookup(struct mlx5_esw_bridge *bridge, + const unsigned char *addr, u16 vid) +{ + struct mlx5_esw_bridge_fdb_key key = {}; + + ether_addr_copy(key.addr, addr); + key.vid = vid; + return rhashtable_lookup_fast(&bridge->fdb_ht, &key, fdb_ht_params); +} + +static struct mlx5_esw_bridge_fdb_entry * +mlx5_esw_bridge_fdb_entry_init(struct net_device *dev, u16 vport_num, u16 esw_owner_vhca_id, + const unsigned char *addr, u16 vid, bool added_by_user, bool peer, + struct mlx5_eswitch *esw, struct mlx5_esw_bridge *bridge) +{ + struct mlx5_esw_bridge_vlan *vlan = NULL; + struct mlx5_esw_bridge_fdb_entry *entry; + struct mlx5_flow_handle *handle; + struct mlx5_fc *counter; + int err; + + if (bridge->flags & MLX5_ESW_BRIDGE_VLAN_FILTERING_FLAG && vid) { + vlan = mlx5_esw_bridge_port_vlan_lookup(vid, vport_num, esw_owner_vhca_id, bridge, + esw); + if (IS_ERR(vlan)) + return ERR_CAST(vlan); + } + + entry = mlx5_esw_bridge_fdb_lookup(bridge, addr, vid); + if (entry) + mlx5_esw_bridge_fdb_entry_notify_and_cleanup(entry, bridge); + + entry = kvzalloc(sizeof(*entry), GFP_KERNEL); + if (!entry) + return ERR_PTR(-ENOMEM); + + ether_addr_copy(entry->key.addr, addr); + entry->key.vid = vid; + entry->dev = dev; + entry->vport_num = vport_num; + entry->esw_owner_vhca_id = esw_owner_vhca_id; + entry->lastuse = jiffies; + if (added_by_user) + entry->flags |= MLX5_ESW_BRIDGE_FLAG_ADDED_BY_USER; + if (peer) + entry->flags |= MLX5_ESW_BRIDGE_FLAG_PEER; + + counter = mlx5_fc_create(esw->dev, true); + if (IS_ERR(counter)) { + err = PTR_ERR(counter); + goto err_ingress_fc_create; + } + entry->ingress_counter = counter; + + handle = peer ? + mlx5_esw_bridge_ingress_flow_peer_create(vport_num, addr, vlan, + mlx5_fc_id(counter), bridge) : + mlx5_esw_bridge_ingress_flow_create(vport_num, addr, vlan, + mlx5_fc_id(counter), bridge); + if (IS_ERR(handle)) { + err = PTR_ERR(handle); + esw_warn(esw->dev, "Failed to create ingress flow(vport=%u,err=%d)\n", + vport_num, err); + goto err_ingress_flow_create; + } + entry->ingress_handle = handle; + + if (bridge->flags & MLX5_ESW_BRIDGE_VLAN_FILTERING_FLAG) { + handle = mlx5_esw_bridge_ingress_filter_flow_create(vport_num, addr, bridge); + if (IS_ERR(handle)) { + err = PTR_ERR(handle); + esw_warn(esw->dev, "Failed to create ingress filter(vport=%u,err=%d)\n", + vport_num, err); + goto err_ingress_filter_flow_create; + } + entry->filter_handle = handle; + } + + handle = mlx5_esw_bridge_egress_flow_create(vport_num, esw_owner_vhca_id, addr, vlan, + bridge); + if (IS_ERR(handle)) { + err = PTR_ERR(handle); + esw_warn(esw->dev, "Failed to create egress flow(vport=%u,err=%d)\n", + vport_num, err); + goto err_egress_flow_create; + } + entry->egress_handle = handle; + + err = rhashtable_insert_fast(&bridge->fdb_ht, &entry->ht_node, fdb_ht_params); + if (err) { + esw_warn(esw->dev, "Failed to insert FDB flow(vport=%u,err=%d)\n", vport_num, err); + goto err_ht_init; + } + + if (vlan) + list_add(&entry->vlan_list, &vlan->fdb_list); + else + INIT_LIST_HEAD(&entry->vlan_list); + list_add(&entry->list, &bridge->fdb_list); + + trace_mlx5_esw_bridge_fdb_entry_init(entry); + return entry; + +err_ht_init: + mlx5_del_flow_rules(entry->egress_handle); +err_egress_flow_create: + if (entry->filter_handle) + mlx5_del_flow_rules(entry->filter_handle); +err_ingress_filter_flow_create: + mlx5_del_flow_rules(entry->ingress_handle); +err_ingress_flow_create: + mlx5_fc_destroy(esw->dev, entry->ingress_counter); +err_ingress_fc_create: + kvfree(entry); + return ERR_PTR(err); +} + +int mlx5_esw_bridge_ageing_time_set(u16 vport_num, u16 esw_owner_vhca_id, unsigned long ageing_time, + struct mlx5_esw_bridge_offloads *br_offloads) +{ + struct mlx5_esw_bridge_port *port; + + port = mlx5_esw_bridge_port_lookup(vport_num, esw_owner_vhca_id, br_offloads); + if (!port) + return -EINVAL; + + port->bridge->ageing_time = clock_t_to_jiffies(ageing_time); + return 0; +} + +int mlx5_esw_bridge_vlan_filtering_set(u16 vport_num, u16 esw_owner_vhca_id, bool enable, + struct mlx5_esw_bridge_offloads *br_offloads) +{ + struct mlx5_esw_bridge_port *port; + struct mlx5_esw_bridge *bridge; + bool filtering; + + port = mlx5_esw_bridge_port_lookup(vport_num, esw_owner_vhca_id, br_offloads); + if (!port) + return -EINVAL; + + bridge = port->bridge; + filtering = bridge->flags & MLX5_ESW_BRIDGE_VLAN_FILTERING_FLAG; + if (filtering == enable) + return 0; + + mlx5_esw_bridge_fdb_flush(bridge); + if (enable) + bridge->flags |= MLX5_ESW_BRIDGE_VLAN_FILTERING_FLAG; + else + bridge->flags &= ~MLX5_ESW_BRIDGE_VLAN_FILTERING_FLAG; + + return 0; +} + +int mlx5_esw_bridge_vlan_proto_set(u16 vport_num, u16 esw_owner_vhca_id, u16 proto, + struct mlx5_esw_bridge_offloads *br_offloads) +{ + struct mlx5_esw_bridge_port *port; + struct mlx5_esw_bridge *bridge; + + port = mlx5_esw_bridge_port_lookup(vport_num, esw_owner_vhca_id, + br_offloads); + if (!port) + return -EINVAL; + + bridge = port->bridge; + if (bridge->vlan_proto == proto) + return 0; + if (proto != ETH_P_8021Q && proto != ETH_P_8021AD) { + esw_warn(br_offloads->esw->dev, "Can't set unsupported VLAN protocol %x", proto); + return -EOPNOTSUPP; + } + + mlx5_esw_bridge_fdb_flush(bridge); + bridge->vlan_proto = proto; + mlx5_esw_bridge_vlans_recreate(bridge); + + return 0; +} + +static int mlx5_esw_bridge_vport_init(u16 vport_num, u16 esw_owner_vhca_id, u16 flags, + struct mlx5_esw_bridge_offloads *br_offloads, + struct mlx5_esw_bridge *bridge) +{ + struct mlx5_eswitch *esw = br_offloads->esw; + struct mlx5_esw_bridge_port *port; + int err; + + port = kvzalloc(sizeof(*port), GFP_KERNEL); + if (!port) + return -ENOMEM; + + port->vport_num = vport_num; + port->esw_owner_vhca_id = esw_owner_vhca_id; + port->bridge = bridge; + port->flags |= flags; + xa_init(&port->vlans); + err = mlx5_esw_bridge_port_insert(port, br_offloads); + if (err) { + esw_warn(esw->dev, + "Failed to insert port metadata (vport=%u,esw_owner_vhca_id=%u,err=%d)\n", + port->vport_num, port->esw_owner_vhca_id, err); + goto err_port_insert; + } + trace_mlx5_esw_bridge_vport_init(port); + + return 0; + +err_port_insert: + kvfree(port); + return err; +} + +static int mlx5_esw_bridge_vport_cleanup(struct mlx5_esw_bridge_offloads *br_offloads, + struct mlx5_esw_bridge_port *port) +{ + u16 vport_num = port->vport_num, esw_owner_vhca_id = port->esw_owner_vhca_id; + struct mlx5_esw_bridge *bridge = port->bridge; + struct mlx5_esw_bridge_fdb_entry *entry, *tmp; + + list_for_each_entry_safe(entry, tmp, &bridge->fdb_list, list) + if (entry->vport_num == vport_num && entry->esw_owner_vhca_id == esw_owner_vhca_id) + mlx5_esw_bridge_fdb_entry_cleanup(entry, bridge); + + trace_mlx5_esw_bridge_vport_cleanup(port); + mlx5_esw_bridge_port_vlans_flush(port, bridge); + mlx5_esw_bridge_port_erase(port, br_offloads); + kvfree(port); + mlx5_esw_bridge_put(br_offloads, bridge); + return 0; +} + +static int mlx5_esw_bridge_vport_link_with_flags(int ifindex, u16 vport_num, u16 esw_owner_vhca_id, + u16 flags, + struct mlx5_esw_bridge_offloads *br_offloads, + struct netlink_ext_ack *extack) +{ + struct mlx5_esw_bridge *bridge; + int err; + + bridge = mlx5_esw_bridge_lookup(ifindex, br_offloads); + if (IS_ERR(bridge)) { + NL_SET_ERR_MSG_MOD(extack, "Error checking for existing bridge with same ifindex"); + return PTR_ERR(bridge); + } + + err = mlx5_esw_bridge_vport_init(vport_num, esw_owner_vhca_id, flags, br_offloads, bridge); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Error initializing port"); + goto err_vport; + } + return 0; + +err_vport: + mlx5_esw_bridge_put(br_offloads, bridge); + return err; +} + +int mlx5_esw_bridge_vport_link(int ifindex, u16 vport_num, u16 esw_owner_vhca_id, + struct mlx5_esw_bridge_offloads *br_offloads, + struct netlink_ext_ack *extack) +{ + return mlx5_esw_bridge_vport_link_with_flags(ifindex, vport_num, esw_owner_vhca_id, 0, + br_offloads, extack); +} + +int mlx5_esw_bridge_vport_unlink(int ifindex, u16 vport_num, u16 esw_owner_vhca_id, + struct mlx5_esw_bridge_offloads *br_offloads, + struct netlink_ext_ack *extack) +{ + struct mlx5_esw_bridge_port *port; + int err; + + port = mlx5_esw_bridge_port_lookup(vport_num, esw_owner_vhca_id, br_offloads); + if (!port) { + NL_SET_ERR_MSG_MOD(extack, "Port is not attached to any bridge"); + return -EINVAL; + } + if (port->bridge->ifindex != ifindex) { + NL_SET_ERR_MSG_MOD(extack, "Port is attached to another bridge"); + return -EINVAL; + } + + err = mlx5_esw_bridge_vport_cleanup(br_offloads, port); + if (err) + NL_SET_ERR_MSG_MOD(extack, "Port cleanup failed"); + return err; +} + +int mlx5_esw_bridge_vport_peer_link(int ifindex, u16 vport_num, u16 esw_owner_vhca_id, + struct mlx5_esw_bridge_offloads *br_offloads, + struct netlink_ext_ack *extack) +{ + if (!MLX5_CAP_ESW(br_offloads->esw->dev, merged_eswitch)) + return 0; + + return mlx5_esw_bridge_vport_link_with_flags(ifindex, vport_num, esw_owner_vhca_id, + MLX5_ESW_BRIDGE_PORT_FLAG_PEER, + br_offloads, extack); +} + +int mlx5_esw_bridge_vport_peer_unlink(int ifindex, u16 vport_num, u16 esw_owner_vhca_id, + struct mlx5_esw_bridge_offloads *br_offloads, + struct netlink_ext_ack *extack) +{ + return mlx5_esw_bridge_vport_unlink(ifindex, vport_num, esw_owner_vhca_id, br_offloads, + extack); +} + +int mlx5_esw_bridge_port_vlan_add(u16 vport_num, u16 esw_owner_vhca_id, u16 vid, u16 flags, + struct mlx5_esw_bridge_offloads *br_offloads, + struct netlink_ext_ack *extack) +{ + struct mlx5_esw_bridge_port *port; + struct mlx5_esw_bridge_vlan *vlan; + + port = mlx5_esw_bridge_port_lookup(vport_num, esw_owner_vhca_id, br_offloads); + if (!port) + return -EINVAL; + + vlan = mlx5_esw_bridge_vlan_lookup(vid, port); + if (vlan) { + if (vlan->flags == flags) + return 0; + mlx5_esw_bridge_vlan_cleanup(port, vlan, port->bridge); + } + + vlan = mlx5_esw_bridge_vlan_create(port->bridge->vlan_proto, vid, flags, port, + br_offloads->esw); + if (IS_ERR(vlan)) { + NL_SET_ERR_MSG_MOD(extack, "Failed to create VLAN entry"); + return PTR_ERR(vlan); + } + return 0; +} + +void mlx5_esw_bridge_port_vlan_del(u16 vport_num, u16 esw_owner_vhca_id, u16 vid, + struct mlx5_esw_bridge_offloads *br_offloads) +{ + struct mlx5_esw_bridge_port *port; + struct mlx5_esw_bridge_vlan *vlan; + + port = mlx5_esw_bridge_port_lookup(vport_num, esw_owner_vhca_id, br_offloads); + if (!port) + return; + + vlan = mlx5_esw_bridge_vlan_lookup(vid, port); + if (!vlan) + return; + mlx5_esw_bridge_vlan_cleanup(port, vlan, port->bridge); +} + +void mlx5_esw_bridge_fdb_update_used(struct net_device *dev, u16 vport_num, u16 esw_owner_vhca_id, + struct mlx5_esw_bridge_offloads *br_offloads, + struct switchdev_notifier_fdb_info *fdb_info) +{ + struct mlx5_esw_bridge_fdb_entry *entry; + struct mlx5_esw_bridge_port *port; + struct mlx5_esw_bridge *bridge; + + port = mlx5_esw_bridge_port_lookup(vport_num, esw_owner_vhca_id, br_offloads); + if (!port) + return; + + bridge = port->bridge; + entry = mlx5_esw_bridge_fdb_lookup(bridge, fdb_info->addr, fdb_info->vid); + if (!entry) { + esw_debug(br_offloads->esw->dev, + "FDB entry with specified key not found (MAC=%pM,vid=%u,vport=%u)\n", + fdb_info->addr, fdb_info->vid, vport_num); + return; + } + + entry->lastuse = jiffies; +} + +void mlx5_esw_bridge_fdb_create(struct net_device *dev, u16 vport_num, u16 esw_owner_vhca_id, + struct mlx5_esw_bridge_offloads *br_offloads, + struct switchdev_notifier_fdb_info *fdb_info) +{ + struct mlx5_esw_bridge_fdb_entry *entry; + struct mlx5_esw_bridge_port *port; + struct mlx5_esw_bridge *bridge; + + port = mlx5_esw_bridge_port_lookup(vport_num, esw_owner_vhca_id, br_offloads); + if (!port) + return; + + bridge = port->bridge; + entry = mlx5_esw_bridge_fdb_entry_init(dev, vport_num, esw_owner_vhca_id, fdb_info->addr, + fdb_info->vid, fdb_info->added_by_user, + port->flags & MLX5_ESW_BRIDGE_PORT_FLAG_PEER, + br_offloads->esw, bridge); + if (IS_ERR(entry)) + return; + + if (entry->flags & MLX5_ESW_BRIDGE_FLAG_ADDED_BY_USER) + mlx5_esw_bridge_fdb_offload_notify(dev, entry->key.addr, entry->key.vid, + SWITCHDEV_FDB_OFFLOADED); + else if (!(entry->flags & MLX5_ESW_BRIDGE_FLAG_PEER)) + /* Take over dynamic entries to prevent kernel bridge from aging them out. */ + mlx5_esw_bridge_fdb_offload_notify(dev, entry->key.addr, entry->key.vid, + SWITCHDEV_FDB_ADD_TO_BRIDGE); +} + +void mlx5_esw_bridge_fdb_remove(struct net_device *dev, u16 vport_num, u16 esw_owner_vhca_id, + struct mlx5_esw_bridge_offloads *br_offloads, + struct switchdev_notifier_fdb_info *fdb_info) +{ + struct mlx5_eswitch *esw = br_offloads->esw; + struct mlx5_esw_bridge_fdb_entry *entry; + struct mlx5_esw_bridge_port *port; + struct mlx5_esw_bridge *bridge; + + port = mlx5_esw_bridge_port_lookup(vport_num, esw_owner_vhca_id, br_offloads); + if (!port) + return; + + bridge = port->bridge; + entry = mlx5_esw_bridge_fdb_lookup(bridge, fdb_info->addr, fdb_info->vid); + if (!entry) { + esw_warn(esw->dev, + "FDB entry with specified key not found (MAC=%pM,vid=%u,vport=%u)\n", + fdb_info->addr, fdb_info->vid, vport_num); + return; + } + + mlx5_esw_bridge_fdb_entry_notify_and_cleanup(entry, bridge); +} + +void mlx5_esw_bridge_update(struct mlx5_esw_bridge_offloads *br_offloads) +{ + struct mlx5_esw_bridge_fdb_entry *entry, *tmp; + struct mlx5_esw_bridge *bridge; + + list_for_each_entry(bridge, &br_offloads->bridges, list) { + list_for_each_entry_safe(entry, tmp, &bridge->fdb_list, list) { + unsigned long lastuse = + (unsigned long)mlx5_fc_query_lastuse(entry->ingress_counter); + + if (entry->flags & MLX5_ESW_BRIDGE_FLAG_ADDED_BY_USER) + continue; + + if (time_after(lastuse, entry->lastuse)) + mlx5_esw_bridge_fdb_entry_refresh(entry); + else if (!(entry->flags & MLX5_ESW_BRIDGE_FLAG_PEER) && + time_is_before_jiffies(entry->lastuse + bridge->ageing_time)) + mlx5_esw_bridge_fdb_entry_notify_and_cleanup(entry, bridge); + } + } +} + +static void mlx5_esw_bridge_flush(struct mlx5_esw_bridge_offloads *br_offloads) +{ + struct mlx5_esw_bridge_port *port; + unsigned long i; + + xa_for_each(&br_offloads->ports, i, port) + mlx5_esw_bridge_vport_cleanup(br_offloads, port); + + WARN_ONCE(!list_empty(&br_offloads->bridges), + "Cleaning up bridge offloads while still having bridges attached\n"); +} + +struct mlx5_esw_bridge_offloads *mlx5_esw_bridge_init(struct mlx5_eswitch *esw) +{ + struct mlx5_esw_bridge_offloads *br_offloads; + + ASSERT_RTNL(); + + br_offloads = kvzalloc(sizeof(*br_offloads), GFP_KERNEL); + if (!br_offloads) + return ERR_PTR(-ENOMEM); + + INIT_LIST_HEAD(&br_offloads->bridges); + xa_init(&br_offloads->ports); + br_offloads->esw = esw; + esw->br_offloads = br_offloads; + + return br_offloads; +} + +void mlx5_esw_bridge_cleanup(struct mlx5_eswitch *esw) +{ + struct mlx5_esw_bridge_offloads *br_offloads = esw->br_offloads; + + ASSERT_RTNL(); + + if (!br_offloads) + return; + + mlx5_esw_bridge_flush(br_offloads); + WARN_ON(!xa_empty(&br_offloads->ports)); + + esw->br_offloads = NULL; + kvfree(br_offloads); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.h new file mode 100644 index 000000000..10851a515 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.h @@ -0,0 +1,73 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2021 Mellanox Technologies. */ + +#ifndef __MLX5_ESW_BRIDGE_H__ +#define __MLX5_ESW_BRIDGE_H__ + +#include <linux/notifier.h> +#include <linux/list.h> +#include <linux/workqueue.h> +#include <linux/xarray.h> +#include "eswitch.h" + +struct mlx5_flow_table; +struct mlx5_flow_group; + +struct mlx5_esw_bridge_offloads { + struct mlx5_eswitch *esw; + struct list_head bridges; + struct xarray ports; + + struct notifier_block netdev_nb; + struct notifier_block nb_blk; + struct notifier_block nb; + struct workqueue_struct *wq; + struct delayed_work update_work; + + struct mlx5_flow_table *ingress_ft; + struct mlx5_flow_group *ingress_vlan_fg; + struct mlx5_flow_group *ingress_vlan_filter_fg; + struct mlx5_flow_group *ingress_qinq_fg; + struct mlx5_flow_group *ingress_qinq_filter_fg; + struct mlx5_flow_group *ingress_mac_fg; + + struct mlx5_flow_table *skip_ft; +}; + +struct mlx5_esw_bridge_offloads *mlx5_esw_bridge_init(struct mlx5_eswitch *esw); +void mlx5_esw_bridge_cleanup(struct mlx5_eswitch *esw); +int mlx5_esw_bridge_vport_link(int ifindex, u16 vport_num, u16 esw_owner_vhca_id, + struct mlx5_esw_bridge_offloads *br_offloads, + struct netlink_ext_ack *extack); +int mlx5_esw_bridge_vport_unlink(int ifindex, u16 vport_num, u16 esw_owner_vhca_id, + struct mlx5_esw_bridge_offloads *br_offloads, + struct netlink_ext_ack *extack); +int mlx5_esw_bridge_vport_peer_link(int ifindex, u16 vport_num, u16 esw_owner_vhca_id, + struct mlx5_esw_bridge_offloads *br_offloads, + struct netlink_ext_ack *extack); +int mlx5_esw_bridge_vport_peer_unlink(int ifindex, u16 vport_num, u16 esw_owner_vhca_id, + struct mlx5_esw_bridge_offloads *br_offloads, + struct netlink_ext_ack *extack); +void mlx5_esw_bridge_fdb_update_used(struct net_device *dev, u16 vport_num, u16 esw_owner_vhca_id, + struct mlx5_esw_bridge_offloads *br_offloads, + struct switchdev_notifier_fdb_info *fdb_info); +void mlx5_esw_bridge_fdb_create(struct net_device *dev, u16 vport_num, u16 esw_owner_vhca_id, + struct mlx5_esw_bridge_offloads *br_offloads, + struct switchdev_notifier_fdb_info *fdb_info); +void mlx5_esw_bridge_fdb_remove(struct net_device *dev, u16 vport_num, u16 esw_owner_vhca_id, + struct mlx5_esw_bridge_offloads *br_offloads, + struct switchdev_notifier_fdb_info *fdb_info); +void mlx5_esw_bridge_update(struct mlx5_esw_bridge_offloads *br_offloads); +int mlx5_esw_bridge_ageing_time_set(u16 vport_num, u16 esw_owner_vhca_id, unsigned long ageing_time, + struct mlx5_esw_bridge_offloads *br_offloads); +int mlx5_esw_bridge_vlan_filtering_set(u16 vport_num, u16 esw_owner_vhca_id, bool enable, + struct mlx5_esw_bridge_offloads *br_offloads); +int mlx5_esw_bridge_vlan_proto_set(u16 vport_num, u16 esw_owner_vhca_id, u16 proto, + struct mlx5_esw_bridge_offloads *br_offloads); +int mlx5_esw_bridge_port_vlan_add(u16 vport_num, u16 esw_owner_vhca_id, u16 vid, u16 flags, + struct mlx5_esw_bridge_offloads *br_offloads, + struct netlink_ext_ack *extack); +void mlx5_esw_bridge_port_vlan_del(u16 vport_num, u16 esw_owner_vhca_id, u16 vid, + struct mlx5_esw_bridge_offloads *br_offloads); + +#endif /* __MLX5_ESW_BRIDGE_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_priv.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_priv.h new file mode 100644 index 000000000..878311fe9 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_priv.h @@ -0,0 +1,63 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2021 Mellanox Technologies. */ + +#ifndef _MLX5_ESW_BRIDGE_PRIVATE_ +#define _MLX5_ESW_BRIDGE_PRIVATE_ + +#include <linux/netdevice.h> +#include <linux/if_bridge.h> +#include <linux/if_vlan.h> +#include <linux/if_ether.h> +#include <linux/rhashtable.h> +#include <linux/xarray.h> +#include "fs_core.h" + +struct mlx5_esw_bridge_fdb_key { + unsigned char addr[ETH_ALEN]; + u16 vid; +}; + +enum { + MLX5_ESW_BRIDGE_FLAG_ADDED_BY_USER = BIT(0), + MLX5_ESW_BRIDGE_FLAG_PEER = BIT(1), +}; + +enum { + MLX5_ESW_BRIDGE_PORT_FLAG_PEER = BIT(0), +}; + +struct mlx5_esw_bridge_fdb_entry { + struct mlx5_esw_bridge_fdb_key key; + struct rhash_head ht_node; + struct net_device *dev; + struct list_head list; + struct list_head vlan_list; + u16 vport_num; + u16 esw_owner_vhca_id; + u16 flags; + + struct mlx5_flow_handle *ingress_handle; + struct mlx5_fc *ingress_counter; + unsigned long lastuse; + struct mlx5_flow_handle *egress_handle; + struct mlx5_flow_handle *filter_handle; +}; + +struct mlx5_esw_bridge_vlan { + u16 vid; + u16 flags; + struct list_head fdb_list; + struct mlx5_pkt_reformat *pkt_reformat_push; + struct mlx5_pkt_reformat *pkt_reformat_pop; + struct mlx5_modify_hdr *pkt_mod_hdr_push_mark; +}; + +struct mlx5_esw_bridge_port { + u16 vport_num; + u16 esw_owner_vhca_id; + u16 flags; + struct mlx5_esw_bridge *bridge; + struct xarray vlans; +}; + +#endif /* _MLX5_ESW_BRIDGE_PRIVATE_ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/debugfs.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/debugfs.c new file mode 100644 index 000000000..2db13c71e --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/debugfs.c @@ -0,0 +1,182 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#include <linux/debugfs.h> +#include "eswitch.h" + +enum vnic_diag_counter { + MLX5_VNIC_DIAG_TOTAL_Q_UNDER_PROCESSOR_HANDLE, + MLX5_VNIC_DIAG_SEND_QUEUE_PRIORITY_UPDATE_FLOW, + MLX5_VNIC_DIAG_COMP_EQ_OVERRUN, + MLX5_VNIC_DIAG_ASYNC_EQ_OVERRUN, + MLX5_VNIC_DIAG_CQ_OVERRUN, + MLX5_VNIC_DIAG_INVALID_COMMAND, + MLX5_VNIC_DIAG_QOUTA_EXCEEDED_COMMAND, +}; + +static int mlx5_esw_query_vnic_diag(struct mlx5_vport *vport, enum vnic_diag_counter counter, + u32 *val) +{ + u32 out[MLX5_ST_SZ_DW(query_vnic_env_out)] = {}; + u32 in[MLX5_ST_SZ_DW(query_vnic_env_in)] = {}; + struct mlx5_core_dev *dev = vport->dev; + u16 vport_num = vport->vport; + void *vnic_diag_out; + int err; + + MLX5_SET(query_vnic_env_in, in, opcode, MLX5_CMD_OP_QUERY_VNIC_ENV); + MLX5_SET(query_vnic_env_in, in, vport_number, vport_num); + if (!mlx5_esw_is_manager_vport(dev->priv.eswitch, vport_num)) + MLX5_SET(query_vnic_env_in, in, other_vport, 1); + + err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); + if (err) + return err; + + vnic_diag_out = MLX5_ADDR_OF(query_vnic_env_out, out, vport_env); + switch (counter) { + case MLX5_VNIC_DIAG_TOTAL_Q_UNDER_PROCESSOR_HANDLE: + *val = MLX5_GET(vnic_diagnostic_statistics, vnic_diag_out, total_error_queues); + break; + case MLX5_VNIC_DIAG_SEND_QUEUE_PRIORITY_UPDATE_FLOW: + *val = MLX5_GET(vnic_diagnostic_statistics, vnic_diag_out, + send_queue_priority_update_flow); + break; + case MLX5_VNIC_DIAG_COMP_EQ_OVERRUN: + *val = MLX5_GET(vnic_diagnostic_statistics, vnic_diag_out, comp_eq_overrun); + break; + case MLX5_VNIC_DIAG_ASYNC_EQ_OVERRUN: + *val = MLX5_GET(vnic_diagnostic_statistics, vnic_diag_out, async_eq_overrun); + break; + case MLX5_VNIC_DIAG_CQ_OVERRUN: + *val = MLX5_GET(vnic_diagnostic_statistics, vnic_diag_out, cq_overrun); + break; + case MLX5_VNIC_DIAG_INVALID_COMMAND: + *val = MLX5_GET(vnic_diagnostic_statistics, vnic_diag_out, invalid_command); + break; + case MLX5_VNIC_DIAG_QOUTA_EXCEEDED_COMMAND: + *val = MLX5_GET(vnic_diagnostic_statistics, vnic_diag_out, quota_exceeded_command); + break; + } + + return 0; +} + +static int __show_vnic_diag(struct seq_file *file, struct mlx5_vport *vport, + enum vnic_diag_counter type) +{ + u32 val = 0; + int ret; + + ret = mlx5_esw_query_vnic_diag(vport, type, &val); + if (ret) + return ret; + + seq_printf(file, "%d\n", val); + return 0; +} + +static int total_q_under_processor_handle_show(struct seq_file *file, void *priv) +{ + return __show_vnic_diag(file, file->private, MLX5_VNIC_DIAG_TOTAL_Q_UNDER_PROCESSOR_HANDLE); +} + +static int send_queue_priority_update_flow_show(struct seq_file *file, void *priv) +{ + return __show_vnic_diag(file, file->private, + MLX5_VNIC_DIAG_SEND_QUEUE_PRIORITY_UPDATE_FLOW); +} + +static int comp_eq_overrun_show(struct seq_file *file, void *priv) +{ + return __show_vnic_diag(file, file->private, MLX5_VNIC_DIAG_COMP_EQ_OVERRUN); +} + +static int async_eq_overrun_show(struct seq_file *file, void *priv) +{ + return __show_vnic_diag(file, file->private, MLX5_VNIC_DIAG_ASYNC_EQ_OVERRUN); +} + +static int cq_overrun_show(struct seq_file *file, void *priv) +{ + return __show_vnic_diag(file, file->private, MLX5_VNIC_DIAG_CQ_OVERRUN); +} + +static int invalid_command_show(struct seq_file *file, void *priv) +{ + return __show_vnic_diag(file, file->private, MLX5_VNIC_DIAG_INVALID_COMMAND); +} + +static int quota_exceeded_command_show(struct seq_file *file, void *priv) +{ + return __show_vnic_diag(file, file->private, MLX5_VNIC_DIAG_QOUTA_EXCEEDED_COMMAND); +} + +DEFINE_SHOW_ATTRIBUTE(total_q_under_processor_handle); +DEFINE_SHOW_ATTRIBUTE(send_queue_priority_update_flow); +DEFINE_SHOW_ATTRIBUTE(comp_eq_overrun); +DEFINE_SHOW_ATTRIBUTE(async_eq_overrun); +DEFINE_SHOW_ATTRIBUTE(cq_overrun); +DEFINE_SHOW_ATTRIBUTE(invalid_command); +DEFINE_SHOW_ATTRIBUTE(quota_exceeded_command); + +void mlx5_esw_vport_debugfs_destroy(struct mlx5_eswitch *esw, u16 vport_num) +{ + struct mlx5_vport *vport = mlx5_eswitch_get_vport(esw, vport_num); + + debugfs_remove_recursive(vport->dbgfs); + vport->dbgfs = NULL; +} + +/* vnic diag dir name is "pf", "ecpf" or "{vf/sf}_xxxx" */ +#define VNIC_DIAG_DIR_NAME_MAX_LEN 8 + +void mlx5_esw_vport_debugfs_create(struct mlx5_eswitch *esw, u16 vport_num, bool is_sf, u16 sf_num) +{ + struct mlx5_vport *vport = mlx5_eswitch_get_vport(esw, vport_num); + struct dentry *vnic_diag; + char dir_name[VNIC_DIAG_DIR_NAME_MAX_LEN]; + int err; + + if (!MLX5_CAP_GEN(esw->dev, vport_group_manager)) + return; + + if (vport_num == MLX5_VPORT_PF) { + strcpy(dir_name, "pf"); + } else if (vport_num == MLX5_VPORT_ECPF) { + strcpy(dir_name, "ecpf"); + } else { + err = snprintf(dir_name, VNIC_DIAG_DIR_NAME_MAX_LEN, "%s_%d", is_sf ? "sf" : "vf", + is_sf ? sf_num : vport_num - MLX5_VPORT_FIRST_VF); + if (WARN_ON(err < 0)) + return; + } + + vport->dbgfs = debugfs_create_dir(dir_name, esw->dbgfs); + vnic_diag = debugfs_create_dir("vnic_diag", vport->dbgfs); + + if (MLX5_CAP_GEN(esw->dev, vnic_env_queue_counters)) { + debugfs_create_file("total_q_under_processor_handle", 0444, vnic_diag, vport, + &total_q_under_processor_handle_fops); + debugfs_create_file("send_queue_priority_update_flow", 0444, vnic_diag, vport, + &send_queue_priority_update_flow_fops); + } + + if (MLX5_CAP_GEN(esw->dev, eq_overrun_count)) { + debugfs_create_file("comp_eq_overrun", 0444, vnic_diag, vport, + &comp_eq_overrun_fops); + debugfs_create_file("async_eq_overrun", 0444, vnic_diag, vport, + &async_eq_overrun_fops); + } + + if (MLX5_CAP_GEN(esw->dev, vnic_env_cq_overrun)) + debugfs_create_file("cq_overrun", 0444, vnic_diag, vport, &cq_overrun_fops); + + if (MLX5_CAP_GEN(esw->dev, invalid_command_count)) + debugfs_create_file("invalid_command", 0444, vnic_diag, vport, + &invalid_command_fops); + + if (MLX5_CAP_GEN(esw->dev, quota_exceeded_count)) + debugfs_create_file("quota_exceeded_command", 0444, vnic_diag, vport, + "a_exceeded_command_fops); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c new file mode 100644 index 000000000..9bc7be95d --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c @@ -0,0 +1,190 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2020 Mellanox Technologies Ltd. */ + +#include <linux/mlx5/driver.h> +#include "eswitch.h" + +static void +mlx5_esw_get_port_parent_id(struct mlx5_core_dev *dev, struct netdev_phys_item_id *ppid) +{ + u64 parent_id; + + parent_id = mlx5_query_nic_system_image_guid(dev); + ppid->id_len = sizeof(parent_id); + memcpy(ppid->id, &parent_id, sizeof(parent_id)); +} + +static bool mlx5_esw_devlink_port_supported(struct mlx5_eswitch *esw, u16 vport_num) +{ + return vport_num == MLX5_VPORT_UPLINK || + (mlx5_core_is_ecpf(esw->dev) && vport_num == MLX5_VPORT_PF) || + mlx5_eswitch_is_vf_vport(esw, vport_num); +} + +static struct devlink_port *mlx5_esw_dl_port_alloc(struct mlx5_eswitch *esw, u16 vport_num) +{ + struct mlx5_core_dev *dev = esw->dev; + struct devlink_port_attrs attrs = {}; + struct netdev_phys_item_id ppid = {}; + struct devlink_port *dl_port; + u32 controller_num = 0; + bool external; + u16 pfnum; + + dl_port = kzalloc(sizeof(*dl_port), GFP_KERNEL); + if (!dl_port) + return NULL; + + mlx5_esw_get_port_parent_id(dev, &ppid); + pfnum = mlx5_get_dev_index(dev); + external = mlx5_core_is_ecpf_esw_manager(dev); + if (external) + controller_num = dev->priv.eswitch->offloads.host_number + 1; + + if (vport_num == MLX5_VPORT_UPLINK) { + attrs.flavour = DEVLINK_PORT_FLAVOUR_PHYSICAL; + attrs.phys.port_number = pfnum; + memcpy(attrs.switch_id.id, ppid.id, ppid.id_len); + attrs.switch_id.id_len = ppid.id_len; + devlink_port_attrs_set(dl_port, &attrs); + } else if (vport_num == MLX5_VPORT_PF) { + memcpy(dl_port->attrs.switch_id.id, ppid.id, ppid.id_len); + dl_port->attrs.switch_id.id_len = ppid.id_len; + devlink_port_attrs_pci_pf_set(dl_port, controller_num, pfnum, external); + } else if (mlx5_eswitch_is_vf_vport(esw, vport_num)) { + memcpy(dl_port->attrs.switch_id.id, ppid.id, ppid.id_len); + dl_port->attrs.switch_id.id_len = ppid.id_len; + devlink_port_attrs_pci_vf_set(dl_port, controller_num, pfnum, + vport_num - 1, external); + } + return dl_port; +} + +static void mlx5_esw_dl_port_free(struct devlink_port *dl_port) +{ + kfree(dl_port); +} + +int mlx5_esw_offloads_devlink_port_register(struct mlx5_eswitch *esw, u16 vport_num) +{ + struct mlx5_core_dev *dev = esw->dev; + struct devlink_port *dl_port; + unsigned int dl_port_index; + struct mlx5_vport *vport; + struct devlink *devlink; + int err; + + if (!mlx5_esw_devlink_port_supported(esw, vport_num)) + return 0; + + vport = mlx5_eswitch_get_vport(esw, vport_num); + if (IS_ERR(vport)) + return PTR_ERR(vport); + + dl_port = mlx5_esw_dl_port_alloc(esw, vport_num); + if (!dl_port) + return -ENOMEM; + + devlink = priv_to_devlink(dev); + dl_port_index = mlx5_esw_vport_to_devlink_port_index(dev, vport_num); + err = devl_port_register(devlink, dl_port, dl_port_index); + if (err) + goto reg_err; + + err = devl_rate_leaf_create(dl_port, vport); + if (err) + goto rate_err; + + vport->dl_port = dl_port; + return 0; + +rate_err: + devl_port_unregister(dl_port); +reg_err: + mlx5_esw_dl_port_free(dl_port); + return err; +} + +void mlx5_esw_offloads_devlink_port_unregister(struct mlx5_eswitch *esw, u16 vport_num) +{ + struct mlx5_vport *vport; + + if (!mlx5_esw_devlink_port_supported(esw, vport_num)) + return; + + vport = mlx5_eswitch_get_vport(esw, vport_num); + if (IS_ERR(vport)) + return; + + if (vport->dl_port->devlink_rate) { + mlx5_esw_qos_vport_update_group(esw, vport, NULL, NULL); + devl_rate_leaf_destroy(vport->dl_port); + } + + devl_port_unregister(vport->dl_port); + mlx5_esw_dl_port_free(vport->dl_port); + vport->dl_port = NULL; +} + +struct devlink_port *mlx5_esw_offloads_devlink_port(struct mlx5_eswitch *esw, u16 vport_num) +{ + struct mlx5_vport *vport; + + vport = mlx5_eswitch_get_vport(esw, vport_num); + return IS_ERR(vport) ? ERR_CAST(vport) : vport->dl_port; +} + +int mlx5_esw_devlink_sf_port_register(struct mlx5_eswitch *esw, struct devlink_port *dl_port, + u16 vport_num, u32 controller, u32 sfnum) +{ + struct mlx5_core_dev *dev = esw->dev; + struct netdev_phys_item_id ppid = {}; + unsigned int dl_port_index; + struct mlx5_vport *vport; + struct devlink *devlink; + u16 pfnum; + int err; + + vport = mlx5_eswitch_get_vport(esw, vport_num); + if (IS_ERR(vport)) + return PTR_ERR(vport); + + pfnum = mlx5_get_dev_index(dev); + mlx5_esw_get_port_parent_id(dev, &ppid); + memcpy(dl_port->attrs.switch_id.id, &ppid.id[0], ppid.id_len); + dl_port->attrs.switch_id.id_len = ppid.id_len; + devlink_port_attrs_pci_sf_set(dl_port, controller, pfnum, sfnum, !!controller); + devlink = priv_to_devlink(dev); + dl_port_index = mlx5_esw_vport_to_devlink_port_index(dev, vport_num); + err = devl_port_register(devlink, dl_port, dl_port_index); + if (err) + return err; + + err = devl_rate_leaf_create(dl_port, vport); + if (err) + goto rate_err; + + vport->dl_port = dl_port; + return 0; + +rate_err: + devl_port_unregister(dl_port); + return err; +} + +void mlx5_esw_devlink_sf_port_unregister(struct mlx5_eswitch *esw, u16 vport_num) +{ + struct mlx5_vport *vport; + + vport = mlx5_eswitch_get_vport(esw, vport_num); + if (IS_ERR(vport)) + return; + + if (vport->dl_port->devlink_rate) { + mlx5_esw_qos_vport_update_group(esw, vport, NULL, NULL); + devl_rate_leaf_destroy(vport->dl_port); + } + + devl_port_unregister(vport->dl_port); + vport->dl_port = NULL; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/diag/bridge_tracepoint.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/diag/bridge_tracepoint.h new file mode 100644 index 000000000..51ac24e6e --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/diag/bridge_tracepoint.h @@ -0,0 +1,120 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2021 Mellanox Technologies. */ + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM mlx5 + +#if !defined(_MLX5_ESW_BRIDGE_TRACEPOINT_) || defined(TRACE_HEADER_MULTI_READ) +#define _MLX5_ESW_BRIDGE_TRACEPOINT_ + +#include <linux/tracepoint.h> +#include "../bridge_priv.h" + +DECLARE_EVENT_CLASS(mlx5_esw_bridge_fdb_template, + TP_PROTO(const struct mlx5_esw_bridge_fdb_entry *fdb), + TP_ARGS(fdb), + TP_STRUCT__entry( + __array(char, dev_name, IFNAMSIZ) + __array(unsigned char, addr, ETH_ALEN) + __field(u16, vid) + __field(u16, flags) + __field(unsigned int, used) + ), + TP_fast_assign( + strscpy(__entry->dev_name, + netdev_name(fdb->dev), + IFNAMSIZ); + memcpy(__entry->addr, fdb->key.addr, ETH_ALEN); + __entry->vid = fdb->key.vid; + __entry->flags = fdb->flags; + __entry->used = jiffies_to_msecs(jiffies - fdb->lastuse) + ), + TP_printk("net_device=%s addr=%pM vid=%hu flags=%hx used=%u", + __entry->dev_name, + __entry->addr, + __entry->vid, + __entry->flags, + __entry->used / 1000) + ); + +DEFINE_EVENT(mlx5_esw_bridge_fdb_template, + mlx5_esw_bridge_fdb_entry_init, + TP_PROTO(const struct mlx5_esw_bridge_fdb_entry *fdb), + TP_ARGS(fdb) + ); +DEFINE_EVENT(mlx5_esw_bridge_fdb_template, + mlx5_esw_bridge_fdb_entry_refresh, + TP_PROTO(const struct mlx5_esw_bridge_fdb_entry *fdb), + TP_ARGS(fdb) + ); +DEFINE_EVENT(mlx5_esw_bridge_fdb_template, + mlx5_esw_bridge_fdb_entry_cleanup, + TP_PROTO(const struct mlx5_esw_bridge_fdb_entry *fdb), + TP_ARGS(fdb) + ); + +DECLARE_EVENT_CLASS(mlx5_esw_bridge_vlan_template, + TP_PROTO(const struct mlx5_esw_bridge_vlan *vlan), + TP_ARGS(vlan), + TP_STRUCT__entry( + __field(u16, vid) + __field(u16, flags) + ), + TP_fast_assign( + __entry->vid = vlan->vid; + __entry->flags = vlan->flags; + ), + TP_printk("vid=%hu flags=%hx", + __entry->vid, + __entry->flags) + ); + +DEFINE_EVENT(mlx5_esw_bridge_vlan_template, + mlx5_esw_bridge_vlan_create, + TP_PROTO(const struct mlx5_esw_bridge_vlan *vlan), + TP_ARGS(vlan) + ); +DEFINE_EVENT(mlx5_esw_bridge_vlan_template, + mlx5_esw_bridge_vlan_cleanup, + TP_PROTO(const struct mlx5_esw_bridge_vlan *vlan), + TP_ARGS(vlan) + ); + +DECLARE_EVENT_CLASS(mlx5_esw_bridge_port_template, + TP_PROTO(const struct mlx5_esw_bridge_port *port), + TP_ARGS(port), + TP_STRUCT__entry( + __field(u16, vport_num) + __field(u16, esw_owner_vhca_id) + __field(u16, flags) + ), + TP_fast_assign( + __entry->vport_num = port->vport_num; + __entry->esw_owner_vhca_id = port->esw_owner_vhca_id; + __entry->flags = port->flags; + ), + TP_printk("vport_num=%hu esw_owner_vhca_id=%hu flags=%hx", + __entry->vport_num, + __entry->esw_owner_vhca_id, + __entry->flags) + ); + +DEFINE_EVENT(mlx5_esw_bridge_port_template, + mlx5_esw_bridge_vport_init, + TP_PROTO(const struct mlx5_esw_bridge_port *port), + TP_ARGS(port) + ); +DEFINE_EVENT(mlx5_esw_bridge_port_template, + mlx5_esw_bridge_vport_cleanup, + TP_PROTO(const struct mlx5_esw_bridge_port *port), + TP_ARGS(port) + ); + +#endif + +/* This part must be outside protection */ +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH esw/diag +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_FILE bridge_tracepoint +#include <trace/define_trace.h> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/diag/qos_tracepoint.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/diag/qos_tracepoint.h new file mode 100644 index 000000000..458baf0c6 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/diag/qos_tracepoint.h @@ -0,0 +1,123 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM mlx5 + +#if !defined(_MLX5_ESW_TP_) || defined(TRACE_HEADER_MULTI_READ) +#define _MLX5_ESW_TP_ + +#include <linux/tracepoint.h> +#include "eswitch.h" + +TRACE_EVENT(mlx5_esw_vport_qos_destroy, + TP_PROTO(const struct mlx5_vport *vport), + TP_ARGS(vport), + TP_STRUCT__entry(__string(devname, dev_name(vport->dev->device)) + __field(unsigned short, vport_id) + __field(unsigned int, tsar_ix) + ), + TP_fast_assign(__assign_str(devname, dev_name(vport->dev->device)); + __entry->vport_id = vport->vport; + __entry->tsar_ix = vport->qos.esw_tsar_ix; + ), + TP_printk("(%s) vport=%hu tsar_ix=%u\n", + __get_str(devname), __entry->vport_id, __entry->tsar_ix + ) +); + +DECLARE_EVENT_CLASS(mlx5_esw_vport_qos_template, + TP_PROTO(const struct mlx5_vport *vport, u32 bw_share, u32 max_rate), + TP_ARGS(vport, bw_share, max_rate), + TP_STRUCT__entry(__string(devname, dev_name(vport->dev->device)) + __field(unsigned short, vport_id) + __field(unsigned int, tsar_ix) + __field(unsigned int, bw_share) + __field(unsigned int, max_rate) + __field(void *, group) + ), + TP_fast_assign(__assign_str(devname, dev_name(vport->dev->device)); + __entry->vport_id = vport->vport; + __entry->tsar_ix = vport->qos.esw_tsar_ix; + __entry->bw_share = bw_share; + __entry->max_rate = max_rate; + __entry->group = vport->qos.group; + ), + TP_printk("(%s) vport=%hu tsar_ix=%u bw_share=%u, max_rate=%u group=%p\n", + __get_str(devname), __entry->vport_id, __entry->tsar_ix, + __entry->bw_share, __entry->max_rate, __entry->group + ) +); + +DEFINE_EVENT(mlx5_esw_vport_qos_template, mlx5_esw_vport_qos_create, + TP_PROTO(const struct mlx5_vport *vport, u32 bw_share, u32 max_rate), + TP_ARGS(vport, bw_share, max_rate) + ); + +DEFINE_EVENT(mlx5_esw_vport_qos_template, mlx5_esw_vport_qos_config, + TP_PROTO(const struct mlx5_vport *vport, u32 bw_share, u32 max_rate), + TP_ARGS(vport, bw_share, max_rate) + ); + +DECLARE_EVENT_CLASS(mlx5_esw_group_qos_template, + TP_PROTO(const struct mlx5_core_dev *dev, + const struct mlx5_esw_rate_group *group, + unsigned int tsar_ix), + TP_ARGS(dev, group, tsar_ix), + TP_STRUCT__entry(__string(devname, dev_name(dev->device)) + __field(const void *, group) + __field(unsigned int, tsar_ix) + ), + TP_fast_assign(__assign_str(devname, dev_name(dev->device)); + __entry->group = group; + __entry->tsar_ix = tsar_ix; + ), + TP_printk("(%s) group=%p tsar_ix=%u\n", + __get_str(devname), __entry->group, __entry->tsar_ix + ) +); + +DEFINE_EVENT(mlx5_esw_group_qos_template, mlx5_esw_group_qos_create, + TP_PROTO(const struct mlx5_core_dev *dev, + const struct mlx5_esw_rate_group *group, + unsigned int tsar_ix), + TP_ARGS(dev, group, tsar_ix) + ); + +DEFINE_EVENT(mlx5_esw_group_qos_template, mlx5_esw_group_qos_destroy, + TP_PROTO(const struct mlx5_core_dev *dev, + const struct mlx5_esw_rate_group *group, + unsigned int tsar_ix), + TP_ARGS(dev, group, tsar_ix) + ); + +TRACE_EVENT(mlx5_esw_group_qos_config, + TP_PROTO(const struct mlx5_core_dev *dev, + const struct mlx5_esw_rate_group *group, + unsigned int tsar_ix, u32 bw_share, u32 max_rate), + TP_ARGS(dev, group, tsar_ix, bw_share, max_rate), + TP_STRUCT__entry(__string(devname, dev_name(dev->device)) + __field(const void *, group) + __field(unsigned int, tsar_ix) + __field(unsigned int, bw_share) + __field(unsigned int, max_rate) + ), + TP_fast_assign(__assign_str(devname, dev_name(dev->device)); + __entry->group = group; + __entry->tsar_ix = tsar_ix; + __entry->bw_share = bw_share; + __entry->max_rate = max_rate; + ), + TP_printk("(%s) group=%p tsar_ix=%u bw_share=%u max_rate=%u\n", + __get_str(devname), __entry->group, __entry->tsar_ix, + __entry->bw_share, __entry->max_rate + ) +); +#endif /* _MLX5_ESW_TP_ */ + +/* This part must be outside protection */ +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH esw/diag +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_FILE qos_tracepoint +#include <trace/define_trace.h> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/indir_table.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/indir_table.c new file mode 100644 index 000000000..8a94870c5 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/indir_table.c @@ -0,0 +1,390 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2021 Mellanox Technologies. */ + +#include <linux/etherdevice.h> +#include <linux/idr.h> +#include <linux/mlx5/driver.h> +#include <linux/mlx5/mlx5_ifc.h> +#include <linux/mlx5/vport.h> +#include <linux/mlx5/fs.h> +#include "mlx5_core.h" +#include "eswitch.h" +#include "en.h" +#include "en_tc.h" +#include "fs_core.h" +#include "esw/indir_table.h" +#include "lib/fs_chains.h" +#include "en/mod_hdr.h" + +#define MLX5_ESW_INDIR_TABLE_SIZE 2 +#define MLX5_ESW_INDIR_TABLE_RECIRC_IDX (MLX5_ESW_INDIR_TABLE_SIZE - 2) +#define MLX5_ESW_INDIR_TABLE_FWD_IDX (MLX5_ESW_INDIR_TABLE_SIZE - 1) + +struct mlx5_esw_indir_table_rule { + struct mlx5_flow_handle *handle; + struct mlx5_modify_hdr *mh; + refcount_t refcnt; +}; + +struct mlx5_esw_indir_table_entry { + struct hlist_node hlist; + struct mlx5_flow_table *ft; + struct mlx5_flow_group *recirc_grp; + struct mlx5_flow_group *fwd_grp; + struct mlx5_flow_handle *fwd_rule; + struct mlx5_esw_indir_table_rule *recirc_rule; + int fwd_ref; + + u16 vport; +}; + +struct mlx5_esw_indir_table { + struct mutex lock; /* protects table */ + DECLARE_HASHTABLE(table, 8); +}; + +struct mlx5_esw_indir_table * +mlx5_esw_indir_table_init(void) +{ + struct mlx5_esw_indir_table *indir = kvzalloc(sizeof(*indir), GFP_KERNEL); + + if (!indir) + return ERR_PTR(-ENOMEM); + + mutex_init(&indir->lock); + hash_init(indir->table); + return indir; +} + +void +mlx5_esw_indir_table_destroy(struct mlx5_esw_indir_table *indir) +{ + mutex_destroy(&indir->lock); + kvfree(indir); +} + +bool +mlx5_esw_indir_table_needed(struct mlx5_eswitch *esw, + struct mlx5_flow_attr *attr, + u16 vport_num, + struct mlx5_core_dev *dest_mdev) +{ + struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; + bool vf_sf_vport; + + vf_sf_vport = mlx5_eswitch_is_vf_vport(esw, vport_num) || + mlx5_esw_is_sf_vport(esw, vport_num); + + /* Use indirect table for all IP traffic from UL to VF with vport + * destination when source rewrite flag is set. + */ + return esw_attr->in_rep->vport == MLX5_VPORT_UPLINK && + vf_sf_vport && + esw->dev == dest_mdev && + attr->flags & MLX5_ATTR_FLAG_SRC_REWRITE; +} + +u16 +mlx5_esw_indir_table_decap_vport(struct mlx5_flow_attr *attr) +{ + struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; + + return esw_attr->rx_tun_attr ? esw_attr->rx_tun_attr->decap_vport : 0; +} + +static int mlx5_esw_indir_table_rule_get(struct mlx5_eswitch *esw, + struct mlx5_flow_attr *attr, + struct mlx5_esw_indir_table_entry *e) +{ + struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; + struct mlx5_fs_chains *chains = esw_chains(esw); + struct mlx5e_tc_mod_hdr_acts mod_acts = {}; + struct mlx5_flow_destination dest = {}; + struct mlx5_esw_indir_table_rule *rule; + struct mlx5_flow_act flow_act = {}; + struct mlx5_flow_handle *handle; + int err = 0; + u32 data; + + if (e->recirc_rule) { + refcount_inc(&e->recirc_rule->refcnt); + return 0; + } + + rule = kzalloc(sizeof(*rule), GFP_KERNEL); + if (!rule) + return -ENOMEM; + + /* Modify flow source to recirculate packet */ + data = mlx5_eswitch_get_vport_metadata_for_set(esw, esw_attr->rx_tun_attr->decap_vport); + err = mlx5e_tc_match_to_reg_set(esw->dev, &mod_acts, MLX5_FLOW_NAMESPACE_FDB, + VPORT_TO_REG, data); + if (err) + goto err_mod_hdr_regc0; + + err = mlx5e_tc_match_to_reg_set(esw->dev, &mod_acts, MLX5_FLOW_NAMESPACE_FDB, + TUNNEL_TO_REG, ESW_TUN_SLOW_TABLE_GOTO_VPORT); + if (err) + goto err_mod_hdr_regc1; + + flow_act.modify_hdr = mlx5_modify_header_alloc(esw->dev, MLX5_FLOW_NAMESPACE_FDB, + mod_acts.num_actions, mod_acts.actions); + if (IS_ERR(flow_act.modify_hdr)) { + err = PTR_ERR(flow_act.modify_hdr); + goto err_mod_hdr_alloc; + } + + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; + flow_act.flags = FLOW_ACT_IGNORE_FLOW_LEVEL | FLOW_ACT_NO_APPEND; + flow_act.fg = e->recirc_grp; + dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dest.ft = mlx5_chains_get_table(chains, 0, 1, 0); + if (IS_ERR(dest.ft)) { + err = PTR_ERR(dest.ft); + goto err_table; + } + handle = mlx5_add_flow_rules(e->ft, NULL, &flow_act, &dest, 1); + if (IS_ERR(handle)) { + err = PTR_ERR(handle); + goto err_handle; + } + + mlx5e_mod_hdr_dealloc(&mod_acts); + rule->handle = handle; + rule->mh = flow_act.modify_hdr; + refcount_set(&rule->refcnt, 1); + e->recirc_rule = rule; + return 0; + +err_handle: + mlx5_chains_put_table(chains, 0, 1, 0); +err_table: + mlx5_modify_header_dealloc(esw->dev, flow_act.modify_hdr); +err_mod_hdr_alloc: +err_mod_hdr_regc1: + mlx5e_mod_hdr_dealloc(&mod_acts); +err_mod_hdr_regc0: + kfree(rule); + return err; +} + +static void mlx5_esw_indir_table_rule_put(struct mlx5_eswitch *esw, + struct mlx5_esw_indir_table_entry *e) +{ + struct mlx5_esw_indir_table_rule *rule = e->recirc_rule; + struct mlx5_fs_chains *chains = esw_chains(esw); + + if (!rule) + return; + + if (!refcount_dec_and_test(&rule->refcnt)) + return; + + mlx5_del_flow_rules(rule->handle); + mlx5_chains_put_table(chains, 0, 1, 0); + mlx5_modify_header_dealloc(esw->dev, rule->mh); + kfree(rule); + e->recirc_rule = NULL; +} + +static int mlx5_create_indir_recirc_group(struct mlx5_esw_indir_table_entry *e) +{ + int err = 0, inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + u32 *in; + + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + MLX5_SET(create_flow_group_in, in, start_flow_index, 0); + MLX5_SET(create_flow_group_in, in, end_flow_index, MLX5_ESW_INDIR_TABLE_RECIRC_IDX); + e->recirc_grp = mlx5_create_flow_group(e->ft, in); + if (IS_ERR(e->recirc_grp)) + err = PTR_ERR(e->recirc_grp); + + kvfree(in); + return err; +} + +static int mlx5_create_indir_fwd_group(struct mlx5_eswitch *esw, + struct mlx5_esw_indir_table_entry *e) +{ + int err = 0, inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5_flow_destination dest = {}; + struct mlx5_flow_act flow_act = {}; + struct mlx5_flow_spec *spec; + u32 *in; + + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) { + kvfree(in); + return -ENOMEM; + } + + /* Hold one entry */ + MLX5_SET(create_flow_group_in, in, start_flow_index, MLX5_ESW_INDIR_TABLE_FWD_IDX); + MLX5_SET(create_flow_group_in, in, end_flow_index, MLX5_ESW_INDIR_TABLE_FWD_IDX); + e->fwd_grp = mlx5_create_flow_group(e->ft, in); + if (IS_ERR(e->fwd_grp)) { + err = PTR_ERR(e->fwd_grp); + goto err_out; + } + + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + flow_act.fg = e->fwd_grp; + dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT; + dest.vport.num = e->vport; + dest.vport.vhca_id = MLX5_CAP_GEN(esw->dev, vhca_id); + dest.vport.flags = MLX5_FLOW_DEST_VPORT_VHCA_ID; + e->fwd_rule = mlx5_add_flow_rules(e->ft, spec, &flow_act, &dest, 1); + if (IS_ERR(e->fwd_rule)) { + mlx5_destroy_flow_group(e->fwd_grp); + err = PTR_ERR(e->fwd_rule); + } + +err_out: + kvfree(spec); + kvfree(in); + return err; +} + +static struct mlx5_esw_indir_table_entry * +mlx5_esw_indir_table_entry_create(struct mlx5_eswitch *esw, struct mlx5_flow_attr *attr, + u16 vport, bool decap) +{ + struct mlx5_flow_table_attr ft_attr = {}; + struct mlx5_flow_namespace *root_ns; + struct mlx5_esw_indir_table_entry *e; + struct mlx5_flow_table *ft; + int err = 0; + + root_ns = mlx5_get_flow_namespace(esw->dev, MLX5_FLOW_NAMESPACE_FDB); + if (!root_ns) + return ERR_PTR(-ENOENT); + + e = kzalloc(sizeof(*e), GFP_KERNEL); + if (!e) + return ERR_PTR(-ENOMEM); + + ft_attr.prio = FDB_TC_OFFLOAD; + ft_attr.max_fte = MLX5_ESW_INDIR_TABLE_SIZE; + ft_attr.flags = MLX5_FLOW_TABLE_UNMANAGED; + ft_attr.level = 1; + + ft = mlx5_create_flow_table(root_ns, &ft_attr); + if (IS_ERR(ft)) { + err = PTR_ERR(ft); + goto tbl_err; + } + e->ft = ft; + e->vport = vport; + e->fwd_ref = !decap; + + err = mlx5_create_indir_recirc_group(e); + if (err) + goto recirc_grp_err; + + if (decap) { + err = mlx5_esw_indir_table_rule_get(esw, attr, e); + if (err) + goto recirc_rule_err; + } + + err = mlx5_create_indir_fwd_group(esw, e); + if (err) + goto fwd_grp_err; + + hash_add(esw->fdb_table.offloads.indir->table, &e->hlist, + vport << 16); + + return e; + +fwd_grp_err: + if (decap) + mlx5_esw_indir_table_rule_put(esw, e); +recirc_rule_err: + mlx5_destroy_flow_group(e->recirc_grp); +recirc_grp_err: + mlx5_destroy_flow_table(e->ft); +tbl_err: + kfree(e); + return ERR_PTR(err); +} + +static struct mlx5_esw_indir_table_entry * +mlx5_esw_indir_table_entry_lookup(struct mlx5_eswitch *esw, u16 vport) +{ + struct mlx5_esw_indir_table_entry *e; + u32 key = vport << 16; + + hash_for_each_possible(esw->fdb_table.offloads.indir->table, e, hlist, key) + if (e->vport == vport) + return e; + + return NULL; +} + +struct mlx5_flow_table *mlx5_esw_indir_table_get(struct mlx5_eswitch *esw, + struct mlx5_flow_attr *attr, + u16 vport, bool decap) +{ + struct mlx5_esw_indir_table_entry *e; + int err; + + mutex_lock(&esw->fdb_table.offloads.indir->lock); + e = mlx5_esw_indir_table_entry_lookup(esw, vport); + if (e) { + if (!decap) { + e->fwd_ref++; + } else { + err = mlx5_esw_indir_table_rule_get(esw, attr, e); + if (err) + goto out_err; + } + } else { + e = mlx5_esw_indir_table_entry_create(esw, attr, vport, decap); + if (IS_ERR(e)) { + err = PTR_ERR(e); + esw_warn(esw->dev, "Failed to create indirection table, err %d.\n", err); + goto out_err; + } + } + mutex_unlock(&esw->fdb_table.offloads.indir->lock); + return e->ft; + +out_err: + mutex_unlock(&esw->fdb_table.offloads.indir->lock); + return ERR_PTR(err); +} + +void mlx5_esw_indir_table_put(struct mlx5_eswitch *esw, + u16 vport, bool decap) +{ + struct mlx5_esw_indir_table_entry *e; + + mutex_lock(&esw->fdb_table.offloads.indir->lock); + e = mlx5_esw_indir_table_entry_lookup(esw, vport); + if (!e) + goto out; + + if (!decap) + e->fwd_ref--; + else + mlx5_esw_indir_table_rule_put(esw, e); + + if (e->fwd_ref || e->recirc_rule) + goto out; + + hash_del(&e->hlist); + mlx5_destroy_flow_group(e->recirc_grp); + mlx5_del_flow_rules(e->fwd_rule); + mlx5_destroy_flow_group(e->fwd_grp); + mlx5_destroy_flow_table(e->ft); + kfree(e); +out: + mutex_unlock(&esw->fdb_table.offloads.indir->lock); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/indir_table.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/indir_table.h new file mode 100644 index 000000000..036f5b3a3 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/indir_table.h @@ -0,0 +1,72 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2021 Mellanox Technologies. */ + +#ifndef __MLX5_ESW_FT_H__ +#define __MLX5_ESW_FT_H__ + +#ifdef CONFIG_MLX5_CLS_ACT + +struct mlx5_esw_indir_table * +mlx5_esw_indir_table_init(void); +void +mlx5_esw_indir_table_destroy(struct mlx5_esw_indir_table *indir); + +struct mlx5_flow_table *mlx5_esw_indir_table_get(struct mlx5_eswitch *esw, + struct mlx5_flow_attr *attr, + u16 vport, bool decap); +void mlx5_esw_indir_table_put(struct mlx5_eswitch *esw, + u16 vport, bool decap); + +bool +mlx5_esw_indir_table_needed(struct mlx5_eswitch *esw, + struct mlx5_flow_attr *attr, + u16 vport_num, + struct mlx5_core_dev *dest_mdev); + +u16 +mlx5_esw_indir_table_decap_vport(struct mlx5_flow_attr *attr); + +#else +/* indir API stubs */ +static inline struct mlx5_esw_indir_table * +mlx5_esw_indir_table_init(void) +{ + return NULL; +} + +static inline void +mlx5_esw_indir_table_destroy(struct mlx5_esw_indir_table *indir) +{ +} + +static inline struct mlx5_flow_table * +mlx5_esw_indir_table_get(struct mlx5_eswitch *esw, + struct mlx5_flow_attr *attr, + u16 vport, bool decap) +{ + return ERR_PTR(-EOPNOTSUPP); +} + +static inline void +mlx5_esw_indir_table_put(struct mlx5_eswitch *esw, + u16 vport, bool decap) +{ +} + +static inline bool +mlx5_esw_indir_table_needed(struct mlx5_eswitch *esw, + struct mlx5_flow_attr *attr, + u16 vport_num, + struct mlx5_core_dev *dest_mdev) +{ + return false; +} + +static inline u16 +mlx5_esw_indir_table_decap_vport(struct mlx5_flow_attr *attr) +{ + return 0; +} +#endif + +#endif /* __MLX5_ESW_FT_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c new file mode 100644 index 000000000..fabe49a35 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c @@ -0,0 +1,529 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2021 Mellanox Technologies Ltd */ + +#include <linux/etherdevice.h> +#include <linux/mlx5/driver.h> +#include <linux/mlx5/mlx5_ifc.h> +#include <linux/mlx5/vport.h> +#include <linux/mlx5/fs.h> +#include "esw/acl/lgcy.h" +#include "esw/legacy.h" +#include "mlx5_core.h" +#include "eswitch.h" +#include "fs_core.h" +#include "fs_ft_pool.h" +#include "esw/qos.h" + +enum { + LEGACY_VEPA_PRIO = 0, + LEGACY_FDB_PRIO, +}; + +static int esw_create_legacy_vepa_table(struct mlx5_eswitch *esw) +{ + struct mlx5_flow_table_attr ft_attr = {}; + struct mlx5_core_dev *dev = esw->dev; + struct mlx5_flow_namespace *root_ns; + struct mlx5_flow_table *fdb; + int err; + + root_ns = mlx5_get_fdb_sub_ns(dev, 0); + if (!root_ns) { + esw_warn(dev, "Failed to get FDB flow namespace\n"); + return -EOPNOTSUPP; + } + + /* num FTE 2, num FG 2 */ + ft_attr.prio = LEGACY_VEPA_PRIO; + ft_attr.max_fte = 2; + ft_attr.autogroup.max_num_groups = 2; + fdb = mlx5_create_auto_grouped_flow_table(root_ns, &ft_attr); + if (IS_ERR(fdb)) { + err = PTR_ERR(fdb); + esw_warn(dev, "Failed to create VEPA FDB err %d\n", err); + return err; + } + esw->fdb_table.legacy.vepa_fdb = fdb; + + return 0; +} + +static void esw_destroy_legacy_fdb_table(struct mlx5_eswitch *esw) +{ + esw_debug(esw->dev, "Destroy FDB Table\n"); + if (!esw->fdb_table.legacy.fdb) + return; + + if (esw->fdb_table.legacy.promisc_grp) + mlx5_destroy_flow_group(esw->fdb_table.legacy.promisc_grp); + if (esw->fdb_table.legacy.allmulti_grp) + mlx5_destroy_flow_group(esw->fdb_table.legacy.allmulti_grp); + if (esw->fdb_table.legacy.addr_grp) + mlx5_destroy_flow_group(esw->fdb_table.legacy.addr_grp); + mlx5_destroy_flow_table(esw->fdb_table.legacy.fdb); + + esw->fdb_table.legacy.fdb = NULL; + esw->fdb_table.legacy.addr_grp = NULL; + esw->fdb_table.legacy.allmulti_grp = NULL; + esw->fdb_table.legacy.promisc_grp = NULL; + atomic64_set(&esw->user_count, 0); +} + +static int esw_create_legacy_fdb_table(struct mlx5_eswitch *esw) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5_flow_table_attr ft_attr = {}; + struct mlx5_core_dev *dev = esw->dev; + struct mlx5_flow_namespace *root_ns; + struct mlx5_flow_table *fdb; + struct mlx5_flow_group *g; + void *match_criteria; + int table_size; + u32 *flow_group_in; + u8 *dmac; + int err = 0; + + esw_debug(dev, "Create FDB log_max_size(%d)\n", + MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size)); + + root_ns = mlx5_get_fdb_sub_ns(dev, 0); + if (!root_ns) { + esw_warn(dev, "Failed to get FDB flow namespace\n"); + return -EOPNOTSUPP; + } + + flow_group_in = kvzalloc(inlen, GFP_KERNEL); + if (!flow_group_in) + return -ENOMEM; + + ft_attr.max_fte = POOL_NEXT_SIZE; + ft_attr.prio = LEGACY_FDB_PRIO; + fdb = mlx5_create_flow_table(root_ns, &ft_attr); + if (IS_ERR(fdb)) { + err = PTR_ERR(fdb); + esw_warn(dev, "Failed to create FDB Table err %d\n", err); + goto out; + } + esw->fdb_table.legacy.fdb = fdb; + table_size = fdb->max_fte; + + /* Addresses group : Full match unicast/multicast addresses */ + MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, + MLX5_MATCH_OUTER_HEADERS); + match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, match_criteria); + dmac = MLX5_ADDR_OF(fte_match_param, match_criteria, outer_headers.dmac_47_16); + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0); + /* Preserve 2 entries for allmulti and promisc rules*/ + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, table_size - 3); + eth_broadcast_addr(dmac); + g = mlx5_create_flow_group(fdb, flow_group_in); + if (IS_ERR(g)) { + err = PTR_ERR(g); + esw_warn(dev, "Failed to create flow group err(%d)\n", err); + goto out; + } + esw->fdb_table.legacy.addr_grp = g; + + /* Allmulti group : One rule that forwards any mcast traffic */ + MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, + MLX5_MATCH_OUTER_HEADERS); + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, table_size - 2); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, table_size - 2); + eth_zero_addr(dmac); + dmac[0] = 0x01; + g = mlx5_create_flow_group(fdb, flow_group_in); + if (IS_ERR(g)) { + err = PTR_ERR(g); + esw_warn(dev, "Failed to create allmulti flow group err(%d)\n", err); + goto out; + } + esw->fdb_table.legacy.allmulti_grp = g; + + /* Promiscuous group : + * One rule that forward all unmatched traffic from previous groups + */ + eth_zero_addr(dmac); + MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, + MLX5_MATCH_MISC_PARAMETERS); + MLX5_SET_TO_ONES(fte_match_param, match_criteria, misc_parameters.source_port); + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, table_size - 1); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, table_size - 1); + g = mlx5_create_flow_group(fdb, flow_group_in); + if (IS_ERR(g)) { + err = PTR_ERR(g); + esw_warn(dev, "Failed to create promisc flow group err(%d)\n", err); + goto out; + } + esw->fdb_table.legacy.promisc_grp = g; + +out: + if (err) + esw_destroy_legacy_fdb_table(esw); + + kvfree(flow_group_in); + return err; +} + +static void esw_destroy_legacy_vepa_table(struct mlx5_eswitch *esw) +{ + esw_debug(esw->dev, "Destroy VEPA Table\n"); + if (!esw->fdb_table.legacy.vepa_fdb) + return; + + mlx5_destroy_flow_table(esw->fdb_table.legacy.vepa_fdb); + esw->fdb_table.legacy.vepa_fdb = NULL; +} + +static int esw_create_legacy_table(struct mlx5_eswitch *esw) +{ + int err; + + memset(&esw->fdb_table.legacy, 0, sizeof(struct legacy_fdb)); + atomic64_set(&esw->user_count, 0); + + err = esw_create_legacy_vepa_table(esw); + if (err) + return err; + + err = esw_create_legacy_fdb_table(esw); + if (err) + esw_destroy_legacy_vepa_table(esw); + + return err; +} + +static void esw_cleanup_vepa_rules(struct mlx5_eswitch *esw) +{ + if (esw->fdb_table.legacy.vepa_uplink_rule) + mlx5_del_flow_rules(esw->fdb_table.legacy.vepa_uplink_rule); + + if (esw->fdb_table.legacy.vepa_star_rule) + mlx5_del_flow_rules(esw->fdb_table.legacy.vepa_star_rule); + + esw->fdb_table.legacy.vepa_uplink_rule = NULL; + esw->fdb_table.legacy.vepa_star_rule = NULL; +} + +static void esw_destroy_legacy_table(struct mlx5_eswitch *esw) +{ + esw_cleanup_vepa_rules(esw); + esw_destroy_legacy_fdb_table(esw); + esw_destroy_legacy_vepa_table(esw); +} + +#define MLX5_LEGACY_SRIOV_VPORT_EVENTS (MLX5_VPORT_UC_ADDR_CHANGE | \ + MLX5_VPORT_MC_ADDR_CHANGE | \ + MLX5_VPORT_PROMISC_CHANGE) + +int esw_legacy_enable(struct mlx5_eswitch *esw) +{ + struct mlx5_vport *vport; + unsigned long i; + int ret; + + ret = esw_create_legacy_table(esw); + if (ret) + return ret; + + mlx5_esw_for_each_vf_vport(esw, i, vport, esw->esw_funcs.num_vfs) + vport->info.link_state = MLX5_VPORT_ADMIN_STATE_AUTO; + + ret = mlx5_eswitch_enable_pf_vf_vports(esw, MLX5_LEGACY_SRIOV_VPORT_EVENTS); + if (ret) + esw_destroy_legacy_table(esw); + return ret; +} + +void esw_legacy_disable(struct mlx5_eswitch *esw) +{ + struct esw_mc_addr *mc_promisc; + + mlx5_eswitch_disable_pf_vf_vports(esw); + + mc_promisc = &esw->mc_promisc; + if (mc_promisc->uplink_rule) + mlx5_del_flow_rules(mc_promisc->uplink_rule); + + esw_destroy_legacy_table(esw); +} + +static int _mlx5_eswitch_set_vepa_locked(struct mlx5_eswitch *esw, + u8 setting) +{ + struct mlx5_flow_destination dest = {}; + struct mlx5_flow_act flow_act = {}; + struct mlx5_flow_handle *flow_rule; + struct mlx5_flow_spec *spec; + int err = 0; + void *misc; + + if (!setting) { + esw_cleanup_vepa_rules(esw); + return 0; + } + + if (esw->fdb_table.legacy.vepa_uplink_rule) + return 0; + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) + return -ENOMEM; + + /* Uplink rule forward uplink traffic to FDB */ + misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters); + MLX5_SET(fte_match_set_misc, misc, source_port, MLX5_VPORT_UPLINK); + + misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters); + MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port); + + spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS; + dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dest.ft = esw->fdb_table.legacy.fdb; + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + flow_rule = mlx5_add_flow_rules(esw->fdb_table.legacy.vepa_fdb, spec, + &flow_act, &dest, 1); + if (IS_ERR(flow_rule)) { + err = PTR_ERR(flow_rule); + goto out; + } else { + esw->fdb_table.legacy.vepa_uplink_rule = flow_rule; + } + + /* Star rule to forward all traffic to uplink vport */ + memset(&dest, 0, sizeof(dest)); + dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT; + dest.vport.num = MLX5_VPORT_UPLINK; + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + flow_rule = mlx5_add_flow_rules(esw->fdb_table.legacy.vepa_fdb, NULL, + &flow_act, &dest, 1); + if (IS_ERR(flow_rule)) { + err = PTR_ERR(flow_rule); + goto out; + } else { + esw->fdb_table.legacy.vepa_star_rule = flow_rule; + } + +out: + kvfree(spec); + if (err) + esw_cleanup_vepa_rules(esw); + return err; +} + +int mlx5_eswitch_set_vepa(struct mlx5_eswitch *esw, u8 setting) +{ + int err = 0; + + if (!esw) + return -EOPNOTSUPP; + + if (!mlx5_esw_allowed(esw)) + return -EPERM; + + mutex_lock(&esw->state_lock); + if (esw->mode != MLX5_ESWITCH_LEGACY) { + err = -EOPNOTSUPP; + goto out; + } + + err = _mlx5_eswitch_set_vepa_locked(esw, setting); + +out: + mutex_unlock(&esw->state_lock); + return err; +} + +int mlx5_eswitch_get_vepa(struct mlx5_eswitch *esw, u8 *setting) +{ + if (!esw) + return -EOPNOTSUPP; + + if (!mlx5_esw_allowed(esw)) + return -EPERM; + + if (esw->mode != MLX5_ESWITCH_LEGACY) + return -EOPNOTSUPP; + + *setting = esw->fdb_table.legacy.vepa_uplink_rule ? 1 : 0; + return 0; +} + +int esw_legacy_vport_acl_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport) +{ + int ret; + + /* Only non manager vports need ACL in legacy mode */ + if (mlx5_esw_is_manager_vport(esw, vport->vport)) + return 0; + + ret = esw_acl_ingress_lgcy_setup(esw, vport); + if (ret) + goto ingress_err; + + ret = esw_acl_egress_lgcy_setup(esw, vport); + if (ret) + goto egress_err; + + return 0; + +egress_err: + esw_acl_ingress_lgcy_cleanup(esw, vport); +ingress_err: + return ret; +} + +void esw_legacy_vport_acl_cleanup(struct mlx5_eswitch *esw, struct mlx5_vport *vport) +{ + if (mlx5_esw_is_manager_vport(esw, vport->vport)) + return; + + esw_acl_egress_lgcy_cleanup(esw, vport); + esw_acl_ingress_lgcy_cleanup(esw, vport); +} + +int mlx5_esw_query_vport_drop_stats(struct mlx5_core_dev *dev, + struct mlx5_vport *vport, + struct mlx5_vport_drop_stats *stats) +{ + u64 rx_discard_vport_down, tx_discard_vport_down; + struct mlx5_eswitch *esw = dev->priv.eswitch; + u64 bytes = 0; + int err = 0; + + if (esw->mode != MLX5_ESWITCH_LEGACY) + return 0; + + mutex_lock(&esw->state_lock); + if (!vport->enabled) + goto unlock; + + if (!IS_ERR_OR_NULL(vport->egress.legacy.drop_counter)) + mlx5_fc_query(dev, vport->egress.legacy.drop_counter, + &stats->rx_dropped, &bytes); + + if (vport->ingress.legacy.drop_counter) + mlx5_fc_query(dev, vport->ingress.legacy.drop_counter, + &stats->tx_dropped, &bytes); + + if (!MLX5_CAP_GEN(dev, receive_discard_vport_down) && + !MLX5_CAP_GEN(dev, transmit_discard_vport_down)) + goto unlock; + + err = mlx5_query_vport_down_stats(dev, vport->vport, 1, + &rx_discard_vport_down, + &tx_discard_vport_down); + if (err) + goto unlock; + + if (MLX5_CAP_GEN(dev, receive_discard_vport_down)) + stats->rx_dropped += rx_discard_vport_down; + if (MLX5_CAP_GEN(dev, transmit_discard_vport_down)) + stats->tx_dropped += tx_discard_vport_down; + +unlock: + mutex_unlock(&esw->state_lock); + return err; +} + +int mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw, + u16 vport, u16 vlan, u8 qos) +{ + u8 set_flags = 0; + int err = 0; + + if (!mlx5_esw_allowed(esw)) + return vlan ? -EPERM : 0; + + if (vlan || qos) + set_flags = SET_VLAN_STRIP | SET_VLAN_INSERT; + + mutex_lock(&esw->state_lock); + if (esw->mode != MLX5_ESWITCH_LEGACY) { + if (!vlan) + goto unlock; /* compatibility with libvirt */ + + err = -EOPNOTSUPP; + goto unlock; + } + + err = __mlx5_eswitch_set_vport_vlan(esw, vport, vlan, qos, set_flags); + +unlock: + mutex_unlock(&esw->state_lock); + return err; +} + +int mlx5_eswitch_set_vport_spoofchk(struct mlx5_eswitch *esw, + u16 vport, bool spoofchk) +{ + struct mlx5_vport *evport = mlx5_eswitch_get_vport(esw, vport); + bool pschk; + int err = 0; + + if (!mlx5_esw_allowed(esw)) + return -EPERM; + if (IS_ERR(evport)) + return PTR_ERR(evport); + + mutex_lock(&esw->state_lock); + if (esw->mode != MLX5_ESWITCH_LEGACY) { + err = -EOPNOTSUPP; + goto unlock; + } + pschk = evport->info.spoofchk; + evport->info.spoofchk = spoofchk; + if (pschk && !is_valid_ether_addr(evport->info.mac)) + mlx5_core_warn(esw->dev, + "Spoofchk in set while MAC is invalid, vport(%d)\n", + evport->vport); + if (evport->enabled && esw->mode == MLX5_ESWITCH_LEGACY) + err = esw_acl_ingress_lgcy_setup(esw, evport); + if (err) + evport->info.spoofchk = pschk; + +unlock: + mutex_unlock(&esw->state_lock); + return err; +} + +int mlx5_eswitch_set_vport_trust(struct mlx5_eswitch *esw, + u16 vport, bool setting) +{ + struct mlx5_vport *evport = mlx5_eswitch_get_vport(esw, vport); + int err = 0; + + if (!mlx5_esw_allowed(esw)) + return -EPERM; + if (IS_ERR(evport)) + return PTR_ERR(evport); + + mutex_lock(&esw->state_lock); + if (esw->mode != MLX5_ESWITCH_LEGACY) { + err = -EOPNOTSUPP; + goto unlock; + } + evport->info.trusted = setting; + if (evport->enabled) + esw_vport_change_handle_locked(evport); + +unlock: + mutex_unlock(&esw->state_lock); + return err; +} + +int mlx5_eswitch_set_vport_rate(struct mlx5_eswitch *esw, u16 vport, + u32 max_rate, u32 min_rate) +{ + struct mlx5_vport *evport = mlx5_eswitch_get_vport(esw, vport); + int err; + + if (!mlx5_esw_allowed(esw)) + return -EPERM; + if (IS_ERR(evport)) + return PTR_ERR(evport); + + mutex_lock(&esw->state_lock); + err = mlx5_esw_qos_set_vport_rate(esw, evport, max_rate, min_rate); + mutex_unlock(&esw->state_lock); + return err; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.h new file mode 100644 index 000000000..e0820bb72 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2021 Mellanox Technologies Ltd */ + +#ifndef __MLX5_ESW_LEGACY_H__ +#define __MLX5_ESW_LEGACY_H__ + +#define MLX5_LEGACY_SRIOV_VPORT_EVENTS (MLX5_VPORT_UC_ADDR_CHANGE | \ + MLX5_VPORT_MC_ADDR_CHANGE | \ + MLX5_VPORT_PROMISC_CHANGE) + +struct mlx5_eswitch; + +int esw_legacy_enable(struct mlx5_eswitch *esw); +void esw_legacy_disable(struct mlx5_eswitch *esw); + +int esw_legacy_vport_acl_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport); +void esw_legacy_vport_acl_cleanup(struct mlx5_eswitch *esw, struct mlx5_vport *vport); + +int mlx5_esw_query_vport_drop_stats(struct mlx5_core_dev *dev, + struct mlx5_vport *vport, + struct mlx5_vport_drop_stats *stats); +#endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c new file mode 100644 index 000000000..75015d370 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c @@ -0,0 +1,943 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#include "eswitch.h" +#include "esw/qos.h" +#include "en/port.h" +#define CREATE_TRACE_POINTS +#include "diag/qos_tracepoint.h" + +/* Minimum supported BW share value by the HW is 1 Mbit/sec */ +#define MLX5_MIN_BW_SHARE 1 + +#define MLX5_RATE_TO_BW_SHARE(rate, divider, limit) \ + min_t(u32, max_t(u32, DIV_ROUND_UP(rate, divider), MLX5_MIN_BW_SHARE), limit) + +struct mlx5_esw_rate_group { + u32 tsar_ix; + u32 max_rate; + u32 min_rate; + u32 bw_share; + struct list_head list; +}; + +static int esw_qos_tsar_config(struct mlx5_core_dev *dev, u32 *sched_ctx, + u32 tsar_ix, u32 max_rate, u32 bw_share) +{ + u32 bitmask = 0; + + if (!MLX5_CAP_GEN(dev, qos) || !MLX5_CAP_QOS(dev, esw_scheduling)) + return -EOPNOTSUPP; + + MLX5_SET(scheduling_context, sched_ctx, max_average_bw, max_rate); + MLX5_SET(scheduling_context, sched_ctx, bw_share, bw_share); + bitmask |= MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_MAX_AVERAGE_BW; + bitmask |= MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_BW_SHARE; + + return mlx5_modify_scheduling_element_cmd(dev, + SCHEDULING_HIERARCHY_E_SWITCH, + sched_ctx, + tsar_ix, + bitmask); +} + +static int esw_qos_group_config(struct mlx5_eswitch *esw, struct mlx5_esw_rate_group *group, + u32 max_rate, u32 bw_share, struct netlink_ext_ack *extack) +{ + u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {}; + struct mlx5_core_dev *dev = esw->dev; + int err; + + err = esw_qos_tsar_config(dev, sched_ctx, + group->tsar_ix, + max_rate, bw_share); + if (err) + NL_SET_ERR_MSG_MOD(extack, "E-Switch modify group TSAR element failed"); + + trace_mlx5_esw_group_qos_config(dev, group, group->tsar_ix, bw_share, max_rate); + + return err; +} + +static int esw_qos_vport_config(struct mlx5_eswitch *esw, + struct mlx5_vport *vport, + u32 max_rate, u32 bw_share, + struct netlink_ext_ack *extack) +{ + u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {}; + struct mlx5_core_dev *dev = esw->dev; + int err; + + if (!vport->qos.enabled) + return -EIO; + + err = esw_qos_tsar_config(dev, sched_ctx, vport->qos.esw_tsar_ix, + max_rate, bw_share); + if (err) { + esw_warn(esw->dev, + "E-Switch modify TSAR vport element failed (vport=%d,err=%d)\n", + vport->vport, err); + NL_SET_ERR_MSG_MOD(extack, "E-Switch modify TSAR vport element failed"); + return err; + } + + trace_mlx5_esw_vport_qos_config(vport, bw_share, max_rate); + + return 0; +} + +static u32 esw_qos_calculate_min_rate_divider(struct mlx5_eswitch *esw, + struct mlx5_esw_rate_group *group, + bool group_level) +{ + u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share); + struct mlx5_vport *evport; + u32 max_guarantee = 0; + unsigned long i; + + if (group_level) { + struct mlx5_esw_rate_group *group; + + list_for_each_entry(group, &esw->qos.groups, list) { + if (group->min_rate < max_guarantee) + continue; + max_guarantee = group->min_rate; + } + } else { + mlx5_esw_for_each_vport(esw, i, evport) { + if (!evport->enabled || !evport->qos.enabled || + evport->qos.group != group || evport->qos.min_rate < max_guarantee) + continue; + max_guarantee = evport->qos.min_rate; + } + } + + if (max_guarantee) + return max_t(u32, max_guarantee / fw_max_bw_share, 1); + + /* If vports min rate divider is 0 but their group has bw_share configured, then + * need to set bw_share for vports to minimal value. + */ + if (!group_level && !max_guarantee && group && group->bw_share) + return 1; + return 0; +} + +static u32 esw_qos_calc_bw_share(u32 min_rate, u32 divider, u32 fw_max) +{ + if (divider) + return MLX5_RATE_TO_BW_SHARE(min_rate, divider, fw_max); + + return 0; +} + +static int esw_qos_normalize_vports_min_rate(struct mlx5_eswitch *esw, + struct mlx5_esw_rate_group *group, + struct netlink_ext_ack *extack) +{ + u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share); + u32 divider = esw_qos_calculate_min_rate_divider(esw, group, false); + struct mlx5_vport *evport; + unsigned long i; + u32 bw_share; + int err; + + mlx5_esw_for_each_vport(esw, i, evport) { + if (!evport->enabled || !evport->qos.enabled || evport->qos.group != group) + continue; + bw_share = esw_qos_calc_bw_share(evport->qos.min_rate, divider, fw_max_bw_share); + + if (bw_share == evport->qos.bw_share) + continue; + + err = esw_qos_vport_config(esw, evport, evport->qos.max_rate, bw_share, extack); + if (err) + return err; + + evport->qos.bw_share = bw_share; + } + + return 0; +} + +static int esw_qos_normalize_groups_min_rate(struct mlx5_eswitch *esw, u32 divider, + struct netlink_ext_ack *extack) +{ + u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share); + struct mlx5_esw_rate_group *group; + u32 bw_share; + int err; + + list_for_each_entry(group, &esw->qos.groups, list) { + bw_share = esw_qos_calc_bw_share(group->min_rate, divider, fw_max_bw_share); + + if (bw_share == group->bw_share) + continue; + + err = esw_qos_group_config(esw, group, group->max_rate, bw_share, extack); + if (err) + return err; + + group->bw_share = bw_share; + + /* All the group's vports need to be set with default bw_share + * to enable them with QOS + */ + err = esw_qos_normalize_vports_min_rate(esw, group, extack); + + if (err) + return err; + } + + return 0; +} + +static int esw_qos_set_vport_min_rate(struct mlx5_eswitch *esw, struct mlx5_vport *evport, + u32 min_rate, struct netlink_ext_ack *extack) +{ + u32 fw_max_bw_share, previous_min_rate; + bool min_rate_supported; + int err; + + lockdep_assert_held(&esw->state_lock); + fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share); + min_rate_supported = MLX5_CAP_QOS(esw->dev, esw_bw_share) && + fw_max_bw_share >= MLX5_MIN_BW_SHARE; + if (min_rate && !min_rate_supported) + return -EOPNOTSUPP; + if (min_rate == evport->qos.min_rate) + return 0; + + previous_min_rate = evport->qos.min_rate; + evport->qos.min_rate = min_rate; + err = esw_qos_normalize_vports_min_rate(esw, evport->qos.group, extack); + if (err) + evport->qos.min_rate = previous_min_rate; + + return err; +} + +static int esw_qos_set_vport_max_rate(struct mlx5_eswitch *esw, struct mlx5_vport *evport, + u32 max_rate, struct netlink_ext_ack *extack) +{ + u32 act_max_rate = max_rate; + bool max_rate_supported; + int err; + + lockdep_assert_held(&esw->state_lock); + max_rate_supported = MLX5_CAP_QOS(esw->dev, esw_rate_limit); + + if (max_rate && !max_rate_supported) + return -EOPNOTSUPP; + if (max_rate == evport->qos.max_rate) + return 0; + + /* If parent group has rate limit need to set to group + * value when new max rate is 0. + */ + if (evport->qos.group && !max_rate) + act_max_rate = evport->qos.group->max_rate; + + err = esw_qos_vport_config(esw, evport, act_max_rate, evport->qos.bw_share, extack); + + if (!err) + evport->qos.max_rate = max_rate; + + return err; +} + +static int esw_qos_set_group_min_rate(struct mlx5_eswitch *esw, struct mlx5_esw_rate_group *group, + u32 min_rate, struct netlink_ext_ack *extack) +{ + u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share); + struct mlx5_core_dev *dev = esw->dev; + u32 previous_min_rate, divider; + int err; + + if (!(MLX5_CAP_QOS(dev, esw_bw_share) && fw_max_bw_share >= MLX5_MIN_BW_SHARE)) + return -EOPNOTSUPP; + + if (min_rate == group->min_rate) + return 0; + + previous_min_rate = group->min_rate; + group->min_rate = min_rate; + divider = esw_qos_calculate_min_rate_divider(esw, group, true); + err = esw_qos_normalize_groups_min_rate(esw, divider, extack); + if (err) { + group->min_rate = previous_min_rate; + NL_SET_ERR_MSG_MOD(extack, "E-Switch group min rate setting failed"); + + /* Attempt restoring previous configuration */ + divider = esw_qos_calculate_min_rate_divider(esw, group, true); + if (esw_qos_normalize_groups_min_rate(esw, divider, extack)) + NL_SET_ERR_MSG_MOD(extack, "E-Switch BW share restore failed"); + } + + return err; +} + +static int esw_qos_set_group_max_rate(struct mlx5_eswitch *esw, + struct mlx5_esw_rate_group *group, + u32 max_rate, struct netlink_ext_ack *extack) +{ + struct mlx5_vport *vport; + unsigned long i; + int err; + + if (group->max_rate == max_rate) + return 0; + + err = esw_qos_group_config(esw, group, max_rate, group->bw_share, extack); + if (err) + return err; + + group->max_rate = max_rate; + + /* Any unlimited vports in the group should be set + * with the value of the group. + */ + mlx5_esw_for_each_vport(esw, i, vport) { + if (!vport->enabled || !vport->qos.enabled || + vport->qos.group != group || vport->qos.max_rate) + continue; + + err = esw_qos_vport_config(esw, vport, max_rate, vport->qos.bw_share, extack); + if (err) + NL_SET_ERR_MSG_MOD(extack, + "E-Switch vport implicit rate limit setting failed"); + } + + return err; +} + +static int esw_qos_vport_create_sched_element(struct mlx5_eswitch *esw, + struct mlx5_vport *vport, + u32 max_rate, u32 bw_share) +{ + u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {}; + struct mlx5_esw_rate_group *group = vport->qos.group; + struct mlx5_core_dev *dev = esw->dev; + u32 parent_tsar_ix; + void *vport_elem; + int err; + + parent_tsar_ix = group ? group->tsar_ix : esw->qos.root_tsar_ix; + MLX5_SET(scheduling_context, sched_ctx, element_type, + SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT); + vport_elem = MLX5_ADDR_OF(scheduling_context, sched_ctx, element_attributes); + MLX5_SET(vport_element, vport_elem, vport_number, vport->vport); + MLX5_SET(scheduling_context, sched_ctx, parent_element_id, parent_tsar_ix); + MLX5_SET(scheduling_context, sched_ctx, max_average_bw, max_rate); + MLX5_SET(scheduling_context, sched_ctx, bw_share, bw_share); + + err = mlx5_create_scheduling_element_cmd(dev, + SCHEDULING_HIERARCHY_E_SWITCH, + sched_ctx, + &vport->qos.esw_tsar_ix); + if (err) { + esw_warn(esw->dev, "E-Switch create TSAR vport element failed (vport=%d,err=%d)\n", + vport->vport, err); + return err; + } + + return 0; +} + +static int esw_qos_update_group_scheduling_element(struct mlx5_eswitch *esw, + struct mlx5_vport *vport, + struct mlx5_esw_rate_group *curr_group, + struct mlx5_esw_rate_group *new_group, + struct netlink_ext_ack *extack) +{ + u32 max_rate; + int err; + + err = mlx5_destroy_scheduling_element_cmd(esw->dev, + SCHEDULING_HIERARCHY_E_SWITCH, + vport->qos.esw_tsar_ix); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "E-Switch destroy TSAR vport element failed"); + return err; + } + + vport->qos.group = new_group; + max_rate = vport->qos.max_rate ? vport->qos.max_rate : new_group->max_rate; + + /* If vport is unlimited, we set the group's value. + * Therefore, if the group is limited it will apply to + * the vport as well and if not, vport will remain unlimited. + */ + err = esw_qos_vport_create_sched_element(esw, vport, max_rate, vport->qos.bw_share); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "E-Switch vport group set failed."); + goto err_sched; + } + + return 0; + +err_sched: + vport->qos.group = curr_group; + max_rate = vport->qos.max_rate ? vport->qos.max_rate : curr_group->max_rate; + if (esw_qos_vport_create_sched_element(esw, vport, max_rate, vport->qos.bw_share)) + esw_warn(esw->dev, "E-Switch vport group restore failed (vport=%d)\n", + vport->vport); + + return err; +} + +static int esw_qos_vport_update_group(struct mlx5_eswitch *esw, + struct mlx5_vport *vport, + struct mlx5_esw_rate_group *group, + struct netlink_ext_ack *extack) +{ + struct mlx5_esw_rate_group *new_group, *curr_group; + int err; + + if (!vport->enabled) + return -EINVAL; + + curr_group = vport->qos.group; + new_group = group ?: esw->qos.group0; + if (curr_group == new_group) + return 0; + + err = esw_qos_update_group_scheduling_element(esw, vport, curr_group, new_group, extack); + if (err) + return err; + + /* Recalculate bw share weights of old and new groups */ + if (vport->qos.bw_share || new_group->bw_share) { + esw_qos_normalize_vports_min_rate(esw, curr_group, extack); + esw_qos_normalize_vports_min_rate(esw, new_group, extack); + } + + return 0; +} + +static struct mlx5_esw_rate_group * +__esw_qos_create_rate_group(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack) +{ + u32 tsar_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {}; + struct mlx5_esw_rate_group *group; + u32 divider; + int err; + + group = kzalloc(sizeof(*group), GFP_KERNEL); + if (!group) + return ERR_PTR(-ENOMEM); + + MLX5_SET(scheduling_context, tsar_ctx, parent_element_id, + esw->qos.root_tsar_ix); + err = mlx5_create_scheduling_element_cmd(esw->dev, + SCHEDULING_HIERARCHY_E_SWITCH, + tsar_ctx, + &group->tsar_ix); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "E-Switch create TSAR for group failed"); + goto err_sched_elem; + } + + list_add_tail(&group->list, &esw->qos.groups); + + divider = esw_qos_calculate_min_rate_divider(esw, group, true); + if (divider) { + err = esw_qos_normalize_groups_min_rate(esw, divider, extack); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "E-Switch groups normalization failed"); + goto err_min_rate; + } + } + trace_mlx5_esw_group_qos_create(esw->dev, group, group->tsar_ix); + + return group; + +err_min_rate: + list_del(&group->list); + if (mlx5_destroy_scheduling_element_cmd(esw->dev, + SCHEDULING_HIERARCHY_E_SWITCH, + group->tsar_ix)) + NL_SET_ERR_MSG_MOD(extack, "E-Switch destroy TSAR for group failed"); +err_sched_elem: + kfree(group); + return ERR_PTR(err); +} + +static int esw_qos_get(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack); +static void esw_qos_put(struct mlx5_eswitch *esw); + +static struct mlx5_esw_rate_group * +esw_qos_create_rate_group(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack) +{ + struct mlx5_esw_rate_group *group; + int err; + + if (!MLX5_CAP_QOS(esw->dev, log_esw_max_sched_depth)) + return ERR_PTR(-EOPNOTSUPP); + + err = esw_qos_get(esw, extack); + if (err) + return ERR_PTR(err); + + group = __esw_qos_create_rate_group(esw, extack); + if (IS_ERR(group)) + esw_qos_put(esw); + + return group; +} + +static int __esw_qos_destroy_rate_group(struct mlx5_eswitch *esw, + struct mlx5_esw_rate_group *group, + struct netlink_ext_ack *extack) +{ + u32 divider; + int err; + + list_del(&group->list); + + divider = esw_qos_calculate_min_rate_divider(esw, NULL, true); + err = esw_qos_normalize_groups_min_rate(esw, divider, extack); + if (err) + NL_SET_ERR_MSG_MOD(extack, "E-Switch groups' normalization failed"); + + err = mlx5_destroy_scheduling_element_cmd(esw->dev, + SCHEDULING_HIERARCHY_E_SWITCH, + group->tsar_ix); + if (err) + NL_SET_ERR_MSG_MOD(extack, "E-Switch destroy TSAR_ID failed"); + + trace_mlx5_esw_group_qos_destroy(esw->dev, group, group->tsar_ix); + + kfree(group); + + return err; +} + +static int esw_qos_destroy_rate_group(struct mlx5_eswitch *esw, + struct mlx5_esw_rate_group *group, + struct netlink_ext_ack *extack) +{ + int err; + + err = __esw_qos_destroy_rate_group(esw, group, extack); + esw_qos_put(esw); + + return err; +} + +static bool esw_qos_element_type_supported(struct mlx5_core_dev *dev, int type) +{ + switch (type) { + case SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR: + return MLX5_CAP_QOS(dev, esw_element_type) & + ELEMENT_TYPE_CAP_MASK_TASR; + case SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT: + return MLX5_CAP_QOS(dev, esw_element_type) & + ELEMENT_TYPE_CAP_MASK_VPORT; + case SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT_TC: + return MLX5_CAP_QOS(dev, esw_element_type) & + ELEMENT_TYPE_CAP_MASK_VPORT_TC; + case SCHEDULING_CONTEXT_ELEMENT_TYPE_PARA_VPORT_TC: + return MLX5_CAP_QOS(dev, esw_element_type) & + ELEMENT_TYPE_CAP_MASK_PARA_VPORT_TC; + } + return false; +} + +static int esw_qos_create(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack) +{ + u32 tsar_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {}; + struct mlx5_core_dev *dev = esw->dev; + __be32 *attr; + int err; + + if (!MLX5_CAP_GEN(dev, qos) || !MLX5_CAP_QOS(dev, esw_scheduling)) + return -EOPNOTSUPP; + + if (!esw_qos_element_type_supported(dev, SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR)) + return -EOPNOTSUPP; + + MLX5_SET(scheduling_context, tsar_ctx, element_type, + SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR); + + attr = MLX5_ADDR_OF(scheduling_context, tsar_ctx, element_attributes); + *attr = cpu_to_be32(TSAR_ELEMENT_TSAR_TYPE_DWRR << 16); + + err = mlx5_create_scheduling_element_cmd(dev, + SCHEDULING_HIERARCHY_E_SWITCH, + tsar_ctx, + &esw->qos.root_tsar_ix); + if (err) { + esw_warn(dev, "E-Switch create root TSAR failed (%d)\n", err); + return err; + } + + INIT_LIST_HEAD(&esw->qos.groups); + if (MLX5_CAP_QOS(dev, log_esw_max_sched_depth)) { + esw->qos.group0 = __esw_qos_create_rate_group(esw, extack); + if (IS_ERR(esw->qos.group0)) { + esw_warn(dev, "E-Switch create rate group 0 failed (%ld)\n", + PTR_ERR(esw->qos.group0)); + err = PTR_ERR(esw->qos.group0); + goto err_group0; + } + } + refcount_set(&esw->qos.refcnt, 1); + + return 0; + +err_group0: + if (mlx5_destroy_scheduling_element_cmd(esw->dev, SCHEDULING_HIERARCHY_E_SWITCH, + esw->qos.root_tsar_ix)) + esw_warn(esw->dev, "E-Switch destroy root TSAR failed.\n"); + + return err; +} + +static void esw_qos_destroy(struct mlx5_eswitch *esw) +{ + int err; + + if (esw->qos.group0) + __esw_qos_destroy_rate_group(esw, esw->qos.group0, NULL); + + err = mlx5_destroy_scheduling_element_cmd(esw->dev, + SCHEDULING_HIERARCHY_E_SWITCH, + esw->qos.root_tsar_ix); + if (err) + esw_warn(esw->dev, "E-Switch destroy root TSAR failed (%d)\n", err); +} + +static int esw_qos_get(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack) +{ + int err = 0; + + lockdep_assert_held(&esw->state_lock); + + if (!refcount_inc_not_zero(&esw->qos.refcnt)) { + /* esw_qos_create() set refcount to 1 only on success. + * No need to decrement on failure. + */ + err = esw_qos_create(esw, extack); + } + + return err; +} + +static void esw_qos_put(struct mlx5_eswitch *esw) +{ + lockdep_assert_held(&esw->state_lock); + if (refcount_dec_and_test(&esw->qos.refcnt)) + esw_qos_destroy(esw); +} + +static int esw_qos_vport_enable(struct mlx5_eswitch *esw, struct mlx5_vport *vport, + u32 max_rate, u32 bw_share, struct netlink_ext_ack *extack) +{ + int err; + + lockdep_assert_held(&esw->state_lock); + if (vport->qos.enabled) + return 0; + + err = esw_qos_get(esw, extack); + if (err) + return err; + + vport->qos.group = esw->qos.group0; + + err = esw_qos_vport_create_sched_element(esw, vport, max_rate, bw_share); + if (err) + goto err_out; + + vport->qos.enabled = true; + trace_mlx5_esw_vport_qos_create(vport, bw_share, max_rate); + + return 0; + +err_out: + esw_qos_put(esw); + + return err; +} + +void mlx5_esw_qos_vport_disable(struct mlx5_eswitch *esw, struct mlx5_vport *vport) +{ + int err; + + lockdep_assert_held(&esw->state_lock); + if (!vport->qos.enabled) + return; + WARN(vport->qos.group && vport->qos.group != esw->qos.group0, + "Disabling QoS on port before detaching it from group"); + + err = mlx5_destroy_scheduling_element_cmd(esw->dev, + SCHEDULING_HIERARCHY_E_SWITCH, + vport->qos.esw_tsar_ix); + if (err) + esw_warn(esw->dev, "E-Switch destroy TSAR vport element failed (vport=%d,err=%d)\n", + vport->vport, err); + + memset(&vport->qos, 0, sizeof(vport->qos)); + trace_mlx5_esw_vport_qos_destroy(vport); + + esw_qos_put(esw); +} + +int mlx5_esw_qos_set_vport_rate(struct mlx5_eswitch *esw, struct mlx5_vport *vport, + u32 max_rate, u32 min_rate) +{ + int err; + + lockdep_assert_held(&esw->state_lock); + err = esw_qos_vport_enable(esw, vport, 0, 0, NULL); + if (err) + return err; + + err = esw_qos_set_vport_min_rate(esw, vport, min_rate, NULL); + if (!err) + err = esw_qos_set_vport_max_rate(esw, vport, max_rate, NULL); + + return err; +} + +int mlx5_esw_qos_modify_vport_rate(struct mlx5_eswitch *esw, u16 vport_num, u32 rate_mbps) +{ + u32 ctx[MLX5_ST_SZ_DW(scheduling_context)] = {}; + struct mlx5_vport *vport; + u32 bitmask; + int err; + + vport = mlx5_eswitch_get_vport(esw, vport_num); + if (IS_ERR(vport)) + return PTR_ERR(vport); + + mutex_lock(&esw->state_lock); + if (!vport->qos.enabled) { + /* Eswitch QoS wasn't enabled yet. Enable it and vport QoS. */ + err = esw_qos_vport_enable(esw, vport, rate_mbps, vport->qos.bw_share, NULL); + } else { + MLX5_SET(scheduling_context, ctx, max_average_bw, rate_mbps); + + bitmask = MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_MAX_AVERAGE_BW; + err = mlx5_modify_scheduling_element_cmd(esw->dev, + SCHEDULING_HIERARCHY_E_SWITCH, + ctx, + vport->qos.esw_tsar_ix, + bitmask); + } + mutex_unlock(&esw->state_lock); + + return err; +} + +#define MLX5_LINKSPEED_UNIT 125000 /* 1Mbps in Bps */ + +/* Converts bytes per second value passed in a pointer into megabits per + * second, rewriting last. If converted rate exceed link speed or is not a + * fraction of Mbps - returns error. + */ +static int esw_qos_devlink_rate_to_mbps(struct mlx5_core_dev *mdev, const char *name, + u64 *rate, struct netlink_ext_ack *extack) +{ + u32 link_speed_max, reminder; + u64 value; + int err; + + err = mlx5e_port_max_linkspeed(mdev, &link_speed_max); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Failed to get link maximum speed"); + return err; + } + + value = div_u64_rem(*rate, MLX5_LINKSPEED_UNIT, &reminder); + if (reminder) { + pr_err("%s rate value %lluBps not in link speed units of 1Mbps.\n", + name, *rate); + NL_SET_ERR_MSG_MOD(extack, "TX rate value not in link speed units of 1Mbps"); + return -EINVAL; + } + + if (value > link_speed_max) { + pr_err("%s rate value %lluMbps exceed link maximum speed %u.\n", + name, value, link_speed_max); + NL_SET_ERR_MSG_MOD(extack, "TX rate value exceed link maximum speed"); + return -EINVAL; + } + + *rate = value; + return 0; +} + +/* Eswitch devlink rate API */ + +int mlx5_esw_devlink_rate_leaf_tx_share_set(struct devlink_rate *rate_leaf, void *priv, + u64 tx_share, struct netlink_ext_ack *extack) +{ + struct mlx5_vport *vport = priv; + struct mlx5_eswitch *esw; + int err; + + esw = vport->dev->priv.eswitch; + if (!mlx5_esw_allowed(esw)) + return -EPERM; + + err = esw_qos_devlink_rate_to_mbps(vport->dev, "tx_share", &tx_share, extack); + if (err) + return err; + + mutex_lock(&esw->state_lock); + err = esw_qos_vport_enable(esw, vport, 0, 0, extack); + if (err) + goto unlock; + + err = esw_qos_set_vport_min_rate(esw, vport, tx_share, extack); +unlock: + mutex_unlock(&esw->state_lock); + return err; +} + +int mlx5_esw_devlink_rate_leaf_tx_max_set(struct devlink_rate *rate_leaf, void *priv, + u64 tx_max, struct netlink_ext_ack *extack) +{ + struct mlx5_vport *vport = priv; + struct mlx5_eswitch *esw; + int err; + + esw = vport->dev->priv.eswitch; + if (!mlx5_esw_allowed(esw)) + return -EPERM; + + err = esw_qos_devlink_rate_to_mbps(vport->dev, "tx_max", &tx_max, extack); + if (err) + return err; + + mutex_lock(&esw->state_lock); + err = esw_qos_vport_enable(esw, vport, 0, 0, extack); + if (err) + goto unlock; + + err = esw_qos_set_vport_max_rate(esw, vport, tx_max, extack); +unlock: + mutex_unlock(&esw->state_lock); + return err; +} + +int mlx5_esw_devlink_rate_node_tx_share_set(struct devlink_rate *rate_node, void *priv, + u64 tx_share, struct netlink_ext_ack *extack) +{ + struct mlx5_core_dev *dev = devlink_priv(rate_node->devlink); + struct mlx5_eswitch *esw = dev->priv.eswitch; + struct mlx5_esw_rate_group *group = priv; + int err; + + err = esw_qos_devlink_rate_to_mbps(dev, "tx_share", &tx_share, extack); + if (err) + return err; + + mutex_lock(&esw->state_lock); + err = esw_qos_set_group_min_rate(esw, group, tx_share, extack); + mutex_unlock(&esw->state_lock); + return err; +} + +int mlx5_esw_devlink_rate_node_tx_max_set(struct devlink_rate *rate_node, void *priv, + u64 tx_max, struct netlink_ext_ack *extack) +{ + struct mlx5_core_dev *dev = devlink_priv(rate_node->devlink); + struct mlx5_eswitch *esw = dev->priv.eswitch; + struct mlx5_esw_rate_group *group = priv; + int err; + + err = esw_qos_devlink_rate_to_mbps(dev, "tx_max", &tx_max, extack); + if (err) + return err; + + mutex_lock(&esw->state_lock); + err = esw_qos_set_group_max_rate(esw, group, tx_max, extack); + mutex_unlock(&esw->state_lock); + return err; +} + +int mlx5_esw_devlink_rate_node_new(struct devlink_rate *rate_node, void **priv, + struct netlink_ext_ack *extack) +{ + struct mlx5_esw_rate_group *group; + struct mlx5_eswitch *esw; + int err = 0; + + esw = mlx5_devlink_eswitch_get(rate_node->devlink); + if (IS_ERR(esw)) + return PTR_ERR(esw); + + mutex_lock(&esw->state_lock); + if (esw->mode != MLX5_ESWITCH_OFFLOADS) { + NL_SET_ERR_MSG_MOD(extack, + "Rate node creation supported only in switchdev mode"); + err = -EOPNOTSUPP; + goto unlock; + } + + group = esw_qos_create_rate_group(esw, extack); + if (IS_ERR(group)) { + err = PTR_ERR(group); + goto unlock; + } + + *priv = group; +unlock: + mutex_unlock(&esw->state_lock); + return err; +} + +int mlx5_esw_devlink_rate_node_del(struct devlink_rate *rate_node, void *priv, + struct netlink_ext_ack *extack) +{ + struct mlx5_esw_rate_group *group = priv; + struct mlx5_eswitch *esw; + int err; + + esw = mlx5_devlink_eswitch_get(rate_node->devlink); + if (IS_ERR(esw)) + return PTR_ERR(esw); + + mutex_lock(&esw->state_lock); + err = esw_qos_destroy_rate_group(esw, group, extack); + mutex_unlock(&esw->state_lock); + return err; +} + +int mlx5_esw_qos_vport_update_group(struct mlx5_eswitch *esw, + struct mlx5_vport *vport, + struct mlx5_esw_rate_group *group, + struct netlink_ext_ack *extack) +{ + int err = 0; + + mutex_lock(&esw->state_lock); + if (!vport->qos.enabled && !group) + goto unlock; + + err = esw_qos_vport_enable(esw, vport, 0, 0, extack); + if (!err) + err = esw_qos_vport_update_group(esw, vport, group, extack); +unlock: + mutex_unlock(&esw->state_lock); + return err; +} + +int mlx5_esw_devlink_rate_parent_set(struct devlink_rate *devlink_rate, + struct devlink_rate *parent, + void *priv, void *parent_priv, + struct netlink_ext_ack *extack) +{ + struct mlx5_esw_rate_group *group; + struct mlx5_vport *vport = priv; + + if (!parent) + return mlx5_esw_qos_vport_update_group(vport->dev->priv.eswitch, + vport, NULL, extack); + + group = parent_priv; + return mlx5_esw_qos_vport_update_group(vport->dev->priv.eswitch, vport, group, extack); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.h new file mode 100644 index 000000000..0141e9d52 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.h @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#ifndef __MLX5_ESW_QOS_H__ +#define __MLX5_ESW_QOS_H__ + +#ifdef CONFIG_MLX5_ESWITCH + +int mlx5_esw_qos_set_vport_rate(struct mlx5_eswitch *esw, struct mlx5_vport *evport, + u32 max_rate, u32 min_rate); +void mlx5_esw_qos_vport_disable(struct mlx5_eswitch *esw, struct mlx5_vport *vport); + +int mlx5_esw_devlink_rate_leaf_tx_share_set(struct devlink_rate *rate_leaf, void *priv, + u64 tx_share, struct netlink_ext_ack *extack); +int mlx5_esw_devlink_rate_leaf_tx_max_set(struct devlink_rate *rate_leaf, void *priv, + u64 tx_max, struct netlink_ext_ack *extack); +int mlx5_esw_devlink_rate_node_tx_share_set(struct devlink_rate *rate_node, void *priv, + u64 tx_share, struct netlink_ext_ack *extack); +int mlx5_esw_devlink_rate_node_tx_max_set(struct devlink_rate *rate_node, void *priv, + u64 tx_max, struct netlink_ext_ack *extack); +int mlx5_esw_devlink_rate_node_new(struct devlink_rate *rate_node, void **priv, + struct netlink_ext_ack *extack); +int mlx5_esw_devlink_rate_node_del(struct devlink_rate *rate_node, void *priv, + struct netlink_ext_ack *extack); +int mlx5_esw_devlink_rate_parent_set(struct devlink_rate *devlink_rate, + struct devlink_rate *parent, + void *priv, void *parent_priv, + struct netlink_ext_ack *extack); +#endif + +#endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/vporttbl.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/vporttbl.c new file mode 100644 index 000000000..749c3957a --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/vporttbl.c @@ -0,0 +1,150 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2021 Mellanox Technologies. + +#include "eswitch.h" + +/* This struct is used as a key to the hash table and we need it to be packed + * so hash result is consistent + */ +struct mlx5_vport_key { + u32 chain; + u16 prio; + u16 vport; + u16 vhca_id; + struct esw_vport_tbl_namespace *vport_ns; +} __packed; + +struct mlx5_vport_table { + struct hlist_node hlist; + struct mlx5_flow_table *fdb; + u32 num_rules; + struct mlx5_vport_key key; +}; + +static void +esw_vport_tbl_init(struct mlx5_eswitch *esw, struct esw_vport_tbl_namespace *ns) +{ + if (esw->offloads.encap != DEVLINK_ESWITCH_ENCAP_MODE_NONE) + ns->flags |= (MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT | + MLX5_FLOW_TABLE_TUNNEL_EN_DECAP); +} + +static struct mlx5_flow_table * +esw_vport_tbl_create(struct mlx5_eswitch *esw, struct mlx5_flow_namespace *ns, + const struct esw_vport_tbl_namespace *vport_ns) +{ + struct mlx5_flow_table_attr ft_attr = {}; + struct mlx5_flow_table *fdb; + + if (vport_ns->max_num_groups) + ft_attr.autogroup.max_num_groups = vport_ns->max_num_groups; + else + ft_attr.autogroup.max_num_groups = esw->params.large_group_num; + ft_attr.max_fte = vport_ns->max_fte; + ft_attr.prio = FDB_PER_VPORT; + ft_attr.flags = vport_ns->flags; + fdb = mlx5_create_auto_grouped_flow_table(ns, &ft_attr); + if (IS_ERR(fdb)) { + esw_warn(esw->dev, "Failed to create per vport FDB Table err %ld\n", + PTR_ERR(fdb)); + } + + return fdb; +} + +static u32 flow_attr_to_vport_key(struct mlx5_eswitch *esw, + struct mlx5_vport_tbl_attr *attr, + struct mlx5_vport_key *key) +{ + key->vport = attr->vport; + key->chain = attr->chain; + key->prio = attr->prio; + key->vhca_id = MLX5_CAP_GEN(esw->dev, vhca_id); + key->vport_ns = attr->vport_ns; + return jhash(key, sizeof(*key), 0); +} + +/* caller must hold vports.lock */ +static struct mlx5_vport_table * +esw_vport_tbl_lookup(struct mlx5_eswitch *esw, struct mlx5_vport_key *skey, u32 key) +{ + struct mlx5_vport_table *e; + + hash_for_each_possible(esw->fdb_table.offloads.vports.table, e, hlist, key) + if (!memcmp(&e->key, skey, sizeof(*skey))) + return e; + + return NULL; +} + +struct mlx5_flow_table * +mlx5_esw_vporttbl_get(struct mlx5_eswitch *esw, struct mlx5_vport_tbl_attr *attr) +{ + struct mlx5_core_dev *dev = esw->dev; + struct mlx5_flow_namespace *ns; + struct mlx5_flow_table *fdb; + struct mlx5_vport_table *e; + struct mlx5_vport_key skey; + u32 hkey; + + mutex_lock(&esw->fdb_table.offloads.vports.lock); + esw_vport_tbl_init(esw, attr->vport_ns); + hkey = flow_attr_to_vport_key(esw, attr, &skey); + e = esw_vport_tbl_lookup(esw, &skey, hkey); + if (e) { + e->num_rules++; + goto out; + } + + e = kzalloc(sizeof(*e), GFP_KERNEL); + if (!e) { + fdb = ERR_PTR(-ENOMEM); + goto err_alloc; + } + + ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_FDB); + if (!ns) { + esw_warn(dev, "Failed to get FDB namespace\n"); + fdb = ERR_PTR(-ENOENT); + goto err_ns; + } + + fdb = esw_vport_tbl_create(esw, ns, attr->vport_ns); + if (IS_ERR(fdb)) + goto err_ns; + + e->fdb = fdb; + e->num_rules = 1; + e->key = skey; + hash_add(esw->fdb_table.offloads.vports.table, &e->hlist, hkey); +out: + mutex_unlock(&esw->fdb_table.offloads.vports.lock); + return e->fdb; + +err_ns: + kfree(e); +err_alloc: + mutex_unlock(&esw->fdb_table.offloads.vports.lock); + return fdb; +} + +void +mlx5_esw_vporttbl_put(struct mlx5_eswitch *esw, struct mlx5_vport_tbl_attr *attr) +{ + struct mlx5_vport_table *e; + struct mlx5_vport_key key; + u32 hkey; + + mutex_lock(&esw->fdb_table.offloads.vports.lock); + esw_vport_tbl_init(esw, attr->vport_ns); + hkey = flow_attr_to_vport_key(esw, attr, &key); + e = esw_vport_tbl_lookup(esw, &key, hkey); + if (!e || --e->num_rules) + goto out; + + hash_del(&e->hlist); + mlx5_destroy_flow_table(e->fdb); + kfree(e); +out: + mutex_unlock(&esw->fdb_table.offloads.vports.lock); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c new file mode 100644 index 000000000..48939c72b --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -0,0 +1,2075 @@ +/* + * Copyright (c) 2015, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/etherdevice.h> +#include <linux/mlx5/driver.h> +#include <linux/mlx5/mlx5_ifc.h> +#include <linux/mlx5/vport.h> +#include <linux/mlx5/fs.h> +#include <linux/mlx5/mpfs.h> +#include <linux/debugfs.h> +#include "esw/acl/lgcy.h" +#include "esw/legacy.h" +#include "esw/qos.h" +#include "mlx5_core.h" +#include "lib/eq.h" +#include "eswitch.h" +#include "fs_core.h" +#include "devlink.h" +#include "ecpf.h" +#include "en/mod_hdr.h" + +enum { + MLX5_ACTION_NONE = 0, + MLX5_ACTION_ADD = 1, + MLX5_ACTION_DEL = 2, +}; + +/* Vport UC/MC hash node */ +struct vport_addr { + struct l2addr_node node; + u8 action; + u16 vport; + struct mlx5_flow_handle *flow_rule; + bool mpfs; /* UC MAC was added to MPFs */ + /* A flag indicating that mac was added due to mc promiscuous vport */ + bool mc_promisc; +}; + +static int mlx5_eswitch_check(const struct mlx5_core_dev *dev) +{ + if (MLX5_CAP_GEN(dev, port_type) != MLX5_CAP_PORT_TYPE_ETH) + return -EOPNOTSUPP; + + if (!MLX5_ESWITCH_MANAGER(dev)) + return -EOPNOTSUPP; + + return 0; +} + +struct mlx5_eswitch *mlx5_devlink_eswitch_get(struct devlink *devlink) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + int err; + + err = mlx5_eswitch_check(dev); + if (err) + return ERR_PTR(err); + + return dev->priv.eswitch; +} + +struct mlx5_vport *__must_check +mlx5_eswitch_get_vport(struct mlx5_eswitch *esw, u16 vport_num) +{ + struct mlx5_vport *vport; + + if (!esw || !MLX5_CAP_GEN(esw->dev, vport_group_manager)) + return ERR_PTR(-EPERM); + + vport = xa_load(&esw->vports, vport_num); + if (!vport) { + esw_debug(esw->dev, "vport out of range: num(0x%x)\n", vport_num); + return ERR_PTR(-EINVAL); + } + return vport; +} + +static int arm_vport_context_events_cmd(struct mlx5_core_dev *dev, u16 vport, + u32 events_mask) +{ + u32 in[MLX5_ST_SZ_DW(modify_nic_vport_context_in)] = {}; + void *nic_vport_ctx; + + MLX5_SET(modify_nic_vport_context_in, in, + opcode, MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT); + MLX5_SET(modify_nic_vport_context_in, in, field_select.change_event, 1); + MLX5_SET(modify_nic_vport_context_in, in, vport_number, vport); + MLX5_SET(modify_nic_vport_context_in, in, other_vport, 1); + nic_vport_ctx = MLX5_ADDR_OF(modify_nic_vport_context_in, + in, nic_vport_context); + + MLX5_SET(nic_vport_context, nic_vport_ctx, arm_change_event, 1); + + if (events_mask & MLX5_VPORT_UC_ADDR_CHANGE) + MLX5_SET(nic_vport_context, nic_vport_ctx, + event_on_uc_address_change, 1); + if (events_mask & MLX5_VPORT_MC_ADDR_CHANGE) + MLX5_SET(nic_vport_context, nic_vport_ctx, + event_on_mc_address_change, 1); + if (events_mask & MLX5_VPORT_PROMISC_CHANGE) + MLX5_SET(nic_vport_context, nic_vport_ctx, + event_on_promisc_change, 1); + + return mlx5_cmd_exec_in(dev, modify_nic_vport_context, in); +} + +/* E-Switch vport context HW commands */ +int mlx5_eswitch_modify_esw_vport_context(struct mlx5_core_dev *dev, u16 vport, + bool other_vport, void *in) +{ + MLX5_SET(modify_esw_vport_context_in, in, opcode, + MLX5_CMD_OP_MODIFY_ESW_VPORT_CONTEXT); + MLX5_SET(modify_esw_vport_context_in, in, vport_number, vport); + MLX5_SET(modify_esw_vport_context_in, in, other_vport, other_vport); + return mlx5_cmd_exec_in(dev, modify_esw_vport_context, in); +} + +static int modify_esw_vport_cvlan(struct mlx5_core_dev *dev, u16 vport, + u16 vlan, u8 qos, u8 set_flags) +{ + u32 in[MLX5_ST_SZ_DW(modify_esw_vport_context_in)] = {}; + + if (!MLX5_CAP_ESW(dev, vport_cvlan_strip) || + !MLX5_CAP_ESW(dev, vport_cvlan_insert_if_not_exist)) + return -EOPNOTSUPP; + + esw_debug(dev, "Set Vport[%d] VLAN %d qos %d set=%x\n", + vport, vlan, qos, set_flags); + + if (set_flags & SET_VLAN_STRIP) + MLX5_SET(modify_esw_vport_context_in, in, + esw_vport_context.vport_cvlan_strip, 1); + + if (set_flags & SET_VLAN_INSERT) { + if (MLX5_CAP_ESW(dev, vport_cvlan_insert_always)) { + /* insert either if vlan exist in packet or not */ + MLX5_SET(modify_esw_vport_context_in, in, + esw_vport_context.vport_cvlan_insert, + MLX5_VPORT_CVLAN_INSERT_ALWAYS); + } else { + /* insert only if no vlan in packet */ + MLX5_SET(modify_esw_vport_context_in, in, + esw_vport_context.vport_cvlan_insert, + MLX5_VPORT_CVLAN_INSERT_WHEN_NO_CVLAN); + } + MLX5_SET(modify_esw_vport_context_in, in, + esw_vport_context.cvlan_pcp, qos); + MLX5_SET(modify_esw_vport_context_in, in, + esw_vport_context.cvlan_id, vlan); + } + + MLX5_SET(modify_esw_vport_context_in, in, + field_select.vport_cvlan_strip, 1); + MLX5_SET(modify_esw_vport_context_in, in, + field_select.vport_cvlan_insert, 1); + + return mlx5_eswitch_modify_esw_vport_context(dev, vport, true, in); +} + +/* E-Switch FDB */ +static struct mlx5_flow_handle * +__esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u16 vport, bool rx_rule, + u8 mac_c[ETH_ALEN], u8 mac_v[ETH_ALEN]) +{ + int match_header = (is_zero_ether_addr(mac_c) ? 0 : + MLX5_MATCH_OUTER_HEADERS); + struct mlx5_flow_handle *flow_rule = NULL; + struct mlx5_flow_act flow_act = {0}; + struct mlx5_flow_destination dest = {}; + struct mlx5_flow_spec *spec; + void *mv_misc = NULL; + void *mc_misc = NULL; + u8 *dmac_v = NULL; + u8 *dmac_c = NULL; + + if (rx_rule) + match_header |= MLX5_MATCH_MISC_PARAMETERS; + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) + return NULL; + + dmac_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, + outer_headers.dmac_47_16); + dmac_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + outer_headers.dmac_47_16); + + if (match_header & MLX5_MATCH_OUTER_HEADERS) { + ether_addr_copy(dmac_v, mac_v); + ether_addr_copy(dmac_c, mac_c); + } + + if (match_header & MLX5_MATCH_MISC_PARAMETERS) { + mv_misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, + misc_parameters); + mc_misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + misc_parameters); + MLX5_SET(fte_match_set_misc, mv_misc, source_port, MLX5_VPORT_UPLINK); + MLX5_SET_TO_ONES(fte_match_set_misc, mc_misc, source_port); + } + + dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT; + dest.vport.num = vport; + + esw_debug(esw->dev, + "\tFDB add rule dmac_v(%pM) dmac_c(%pM) -> vport(%d)\n", + dmac_v, dmac_c, vport); + spec->match_criteria_enable = match_header; + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + flow_rule = + mlx5_add_flow_rules(esw->fdb_table.legacy.fdb, spec, + &flow_act, &dest, 1); + if (IS_ERR(flow_rule)) { + esw_warn(esw->dev, + "FDB: Failed to add flow rule: dmac_v(%pM) dmac_c(%pM) -> vport(%d), err(%ld)\n", + dmac_v, dmac_c, vport, PTR_ERR(flow_rule)); + flow_rule = NULL; + } + + kvfree(spec); + return flow_rule; +} + +static struct mlx5_flow_handle * +esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u8 mac[ETH_ALEN], u16 vport) +{ + u8 mac_c[ETH_ALEN]; + + eth_broadcast_addr(mac_c); + return __esw_fdb_set_vport_rule(esw, vport, false, mac_c, mac); +} + +static struct mlx5_flow_handle * +esw_fdb_set_vport_allmulti_rule(struct mlx5_eswitch *esw, u16 vport) +{ + u8 mac_c[ETH_ALEN]; + u8 mac_v[ETH_ALEN]; + + eth_zero_addr(mac_c); + eth_zero_addr(mac_v); + mac_c[0] = 0x01; + mac_v[0] = 0x01; + return __esw_fdb_set_vport_rule(esw, vport, false, mac_c, mac_v); +} + +static struct mlx5_flow_handle * +esw_fdb_set_vport_promisc_rule(struct mlx5_eswitch *esw, u16 vport) +{ + u8 mac_c[ETH_ALEN]; + u8 mac_v[ETH_ALEN]; + + eth_zero_addr(mac_c); + eth_zero_addr(mac_v); + return __esw_fdb_set_vport_rule(esw, vport, true, mac_c, mac_v); +} + +/* E-Switch vport UC/MC lists management */ +typedef int (*vport_addr_action)(struct mlx5_eswitch *esw, + struct vport_addr *vaddr); + +static int esw_add_uc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr) +{ + u8 *mac = vaddr->node.addr; + u16 vport = vaddr->vport; + int err; + + /* Skip mlx5_mpfs_add_mac for eswitch_managers, + * it is already done by its netdev in mlx5e_execute_l2_action + */ + if (mlx5_esw_is_manager_vport(esw, vport)) + goto fdb_add; + + err = mlx5_mpfs_add_mac(esw->dev, mac); + if (err) { + esw_warn(esw->dev, + "Failed to add L2 table mac(%pM) for vport(0x%x), err(%d)\n", + mac, vport, err); + return err; + } + vaddr->mpfs = true; + +fdb_add: + /* SRIOV is enabled: Forward UC MAC to vport */ + if (esw->fdb_table.legacy.fdb && esw->mode == MLX5_ESWITCH_LEGACY) + vaddr->flow_rule = esw_fdb_set_vport_rule(esw, mac, vport); + + esw_debug(esw->dev, "\tADDED UC MAC: vport[%d] %pM fr(%p)\n", + vport, mac, vaddr->flow_rule); + + return 0; +} + +static int esw_del_uc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr) +{ + u8 *mac = vaddr->node.addr; + u16 vport = vaddr->vport; + int err = 0; + + /* Skip mlx5_mpfs_del_mac for eswitch managers, + * it is already done by its netdev in mlx5e_execute_l2_action + */ + if (!vaddr->mpfs || mlx5_esw_is_manager_vport(esw, vport)) + goto fdb_del; + + err = mlx5_mpfs_del_mac(esw->dev, mac); + if (err) + esw_warn(esw->dev, + "Failed to del L2 table mac(%pM) for vport(%d), err(%d)\n", + mac, vport, err); + vaddr->mpfs = false; + +fdb_del: + if (vaddr->flow_rule) + mlx5_del_flow_rules(vaddr->flow_rule); + vaddr->flow_rule = NULL; + + return 0; +} + +static void update_allmulti_vports(struct mlx5_eswitch *esw, + struct vport_addr *vaddr, + struct esw_mc_addr *esw_mc) +{ + u8 *mac = vaddr->node.addr; + struct mlx5_vport *vport; + unsigned long i; + u16 vport_num; + + mlx5_esw_for_each_vport(esw, i, vport) { + struct hlist_head *vport_hash = vport->mc_list; + struct vport_addr *iter_vaddr = + l2addr_hash_find(vport_hash, + mac, + struct vport_addr); + vport_num = vport->vport; + if (IS_ERR_OR_NULL(vport->allmulti_rule) || + vaddr->vport == vport_num) + continue; + switch (vaddr->action) { + case MLX5_ACTION_ADD: + if (iter_vaddr) + continue; + iter_vaddr = l2addr_hash_add(vport_hash, mac, + struct vport_addr, + GFP_KERNEL); + if (!iter_vaddr) { + esw_warn(esw->dev, + "ALL-MULTI: Failed to add MAC(%pM) to vport[%d] DB\n", + mac, vport_num); + continue; + } + iter_vaddr->vport = vport_num; + iter_vaddr->flow_rule = + esw_fdb_set_vport_rule(esw, + mac, + vport_num); + iter_vaddr->mc_promisc = true; + break; + case MLX5_ACTION_DEL: + if (!iter_vaddr) + continue; + mlx5_del_flow_rules(iter_vaddr->flow_rule); + l2addr_hash_del(iter_vaddr); + break; + } + } +} + +static int esw_add_mc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr) +{ + struct hlist_head *hash = esw->mc_table; + struct esw_mc_addr *esw_mc; + u8 *mac = vaddr->node.addr; + u16 vport = vaddr->vport; + + if (!esw->fdb_table.legacy.fdb) + return 0; + + esw_mc = l2addr_hash_find(hash, mac, struct esw_mc_addr); + if (esw_mc) + goto add; + + esw_mc = l2addr_hash_add(hash, mac, struct esw_mc_addr, GFP_KERNEL); + if (!esw_mc) + return -ENOMEM; + + esw_mc->uplink_rule = /* Forward MC MAC to Uplink */ + esw_fdb_set_vport_rule(esw, mac, MLX5_VPORT_UPLINK); + + /* Add this multicast mac to all the mc promiscuous vports */ + update_allmulti_vports(esw, vaddr, esw_mc); + +add: + /* If the multicast mac is added as a result of mc promiscuous vport, + * don't increment the multicast ref count + */ + if (!vaddr->mc_promisc) + esw_mc->refcnt++; + + /* Forward MC MAC to vport */ + vaddr->flow_rule = esw_fdb_set_vport_rule(esw, mac, vport); + esw_debug(esw->dev, + "\tADDED MC MAC: vport[%d] %pM fr(%p) refcnt(%d) uplinkfr(%p)\n", + vport, mac, vaddr->flow_rule, + esw_mc->refcnt, esw_mc->uplink_rule); + return 0; +} + +static int esw_del_mc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr) +{ + struct hlist_head *hash = esw->mc_table; + struct esw_mc_addr *esw_mc; + u8 *mac = vaddr->node.addr; + u16 vport = vaddr->vport; + + if (!esw->fdb_table.legacy.fdb) + return 0; + + esw_mc = l2addr_hash_find(hash, mac, struct esw_mc_addr); + if (!esw_mc) { + esw_warn(esw->dev, + "Failed to find eswitch MC addr for MAC(%pM) vport(%d)", + mac, vport); + return -EINVAL; + } + esw_debug(esw->dev, + "\tDELETE MC MAC: vport[%d] %pM fr(%p) refcnt(%d) uplinkfr(%p)\n", + vport, mac, vaddr->flow_rule, esw_mc->refcnt, + esw_mc->uplink_rule); + + if (vaddr->flow_rule) + mlx5_del_flow_rules(vaddr->flow_rule); + vaddr->flow_rule = NULL; + + /* If the multicast mac is added as a result of mc promiscuous vport, + * don't decrement the multicast ref count. + */ + if (vaddr->mc_promisc || (--esw_mc->refcnt > 0)) + return 0; + + /* Remove this multicast mac from all the mc promiscuous vports */ + update_allmulti_vports(esw, vaddr, esw_mc); + + if (esw_mc->uplink_rule) + mlx5_del_flow_rules(esw_mc->uplink_rule); + + l2addr_hash_del(esw_mc); + return 0; +} + +/* Apply vport UC/MC list to HW l2 table and FDB table */ +static void esw_apply_vport_addr_list(struct mlx5_eswitch *esw, + struct mlx5_vport *vport, int list_type) +{ + bool is_uc = list_type == MLX5_NVPRT_LIST_TYPE_UC; + vport_addr_action vport_addr_add; + vport_addr_action vport_addr_del; + struct vport_addr *addr; + struct l2addr_node *node; + struct hlist_head *hash; + struct hlist_node *tmp; + int hi; + + vport_addr_add = is_uc ? esw_add_uc_addr : + esw_add_mc_addr; + vport_addr_del = is_uc ? esw_del_uc_addr : + esw_del_mc_addr; + + hash = is_uc ? vport->uc_list : vport->mc_list; + for_each_l2hash_node(node, tmp, hash, hi) { + addr = container_of(node, struct vport_addr, node); + switch (addr->action) { + case MLX5_ACTION_ADD: + vport_addr_add(esw, addr); + addr->action = MLX5_ACTION_NONE; + break; + case MLX5_ACTION_DEL: + vport_addr_del(esw, addr); + l2addr_hash_del(addr); + break; + } + } +} + +/* Sync vport UC/MC list from vport context */ +static void esw_update_vport_addr_list(struct mlx5_eswitch *esw, + struct mlx5_vport *vport, int list_type) +{ + bool is_uc = list_type == MLX5_NVPRT_LIST_TYPE_UC; + u8 (*mac_list)[ETH_ALEN]; + struct l2addr_node *node; + struct vport_addr *addr; + struct hlist_head *hash; + struct hlist_node *tmp; + int size; + int err; + int hi; + int i; + + size = is_uc ? MLX5_MAX_UC_PER_VPORT(esw->dev) : + MLX5_MAX_MC_PER_VPORT(esw->dev); + + mac_list = kcalloc(size, ETH_ALEN, GFP_KERNEL); + if (!mac_list) + return; + + hash = is_uc ? vport->uc_list : vport->mc_list; + + for_each_l2hash_node(node, tmp, hash, hi) { + addr = container_of(node, struct vport_addr, node); + addr->action = MLX5_ACTION_DEL; + } + + if (!vport->enabled) + goto out; + + err = mlx5_query_nic_vport_mac_list(esw->dev, vport->vport, list_type, + mac_list, &size); + if (err) + goto out; + esw_debug(esw->dev, "vport[%d] context update %s list size (%d)\n", + vport->vport, is_uc ? "UC" : "MC", size); + + for (i = 0; i < size; i++) { + if (is_uc && !is_valid_ether_addr(mac_list[i])) + continue; + + if (!is_uc && !is_multicast_ether_addr(mac_list[i])) + continue; + + addr = l2addr_hash_find(hash, mac_list[i], struct vport_addr); + if (addr) { + addr->action = MLX5_ACTION_NONE; + /* If this mac was previously added because of allmulti + * promiscuous rx mode, its now converted to be original + * vport mac. + */ + if (addr->mc_promisc) { + struct esw_mc_addr *esw_mc = + l2addr_hash_find(esw->mc_table, + mac_list[i], + struct esw_mc_addr); + if (!esw_mc) { + esw_warn(esw->dev, + "Failed to MAC(%pM) in mcast DB\n", + mac_list[i]); + continue; + } + esw_mc->refcnt++; + addr->mc_promisc = false; + } + continue; + } + + addr = l2addr_hash_add(hash, mac_list[i], struct vport_addr, + GFP_KERNEL); + if (!addr) { + esw_warn(esw->dev, + "Failed to add MAC(%pM) to vport[%d] DB\n", + mac_list[i], vport->vport); + continue; + } + addr->vport = vport->vport; + addr->action = MLX5_ACTION_ADD; + } +out: + kfree(mac_list); +} + +/* Sync vport UC/MC list from vport context + * Must be called after esw_update_vport_addr_list + */ +static void esw_update_vport_mc_promisc(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) +{ + struct l2addr_node *node; + struct vport_addr *addr; + struct hlist_head *hash; + struct hlist_node *tmp; + int hi; + + hash = vport->mc_list; + + for_each_l2hash_node(node, tmp, esw->mc_table, hi) { + u8 *mac = node->addr; + + addr = l2addr_hash_find(hash, mac, struct vport_addr); + if (addr) { + if (addr->action == MLX5_ACTION_DEL) + addr->action = MLX5_ACTION_NONE; + continue; + } + addr = l2addr_hash_add(hash, mac, struct vport_addr, + GFP_KERNEL); + if (!addr) { + esw_warn(esw->dev, + "Failed to add allmulti MAC(%pM) to vport[%d] DB\n", + mac, vport->vport); + continue; + } + addr->vport = vport->vport; + addr->action = MLX5_ACTION_ADD; + addr->mc_promisc = true; + } +} + +/* Apply vport rx mode to HW FDB table */ +static void esw_apply_vport_rx_mode(struct mlx5_eswitch *esw, + struct mlx5_vport *vport, + bool promisc, bool mc_promisc) +{ + struct esw_mc_addr *allmulti_addr = &esw->mc_promisc; + + if (IS_ERR_OR_NULL(vport->allmulti_rule) != mc_promisc) + goto promisc; + + if (mc_promisc) { + vport->allmulti_rule = + esw_fdb_set_vport_allmulti_rule(esw, vport->vport); + if (!allmulti_addr->uplink_rule) + allmulti_addr->uplink_rule = + esw_fdb_set_vport_allmulti_rule(esw, + MLX5_VPORT_UPLINK); + allmulti_addr->refcnt++; + } else if (vport->allmulti_rule) { + mlx5_del_flow_rules(vport->allmulti_rule); + vport->allmulti_rule = NULL; + + if (--allmulti_addr->refcnt > 0) + goto promisc; + + if (allmulti_addr->uplink_rule) + mlx5_del_flow_rules(allmulti_addr->uplink_rule); + allmulti_addr->uplink_rule = NULL; + } + +promisc: + if (IS_ERR_OR_NULL(vport->promisc_rule) != promisc) + return; + + if (promisc) { + vport->promisc_rule = + esw_fdb_set_vport_promisc_rule(esw, vport->vport); + } else if (vport->promisc_rule) { + mlx5_del_flow_rules(vport->promisc_rule); + vport->promisc_rule = NULL; + } +} + +/* Sync vport rx mode from vport context */ +static void esw_update_vport_rx_mode(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) +{ + int promisc_all = 0; + int promisc_uc = 0; + int promisc_mc = 0; + int err; + + err = mlx5_query_nic_vport_promisc(esw->dev, + vport->vport, + &promisc_uc, + &promisc_mc, + &promisc_all); + if (err) + return; + esw_debug(esw->dev, "vport[%d] context update rx mode promisc_all=%d, all_multi=%d\n", + vport->vport, promisc_all, promisc_mc); + + if (!vport->info.trusted || !vport->enabled) { + promisc_uc = 0; + promisc_mc = 0; + promisc_all = 0; + } + + esw_apply_vport_rx_mode(esw, vport, promisc_all, + (promisc_all || promisc_mc)); +} + +void esw_vport_change_handle_locked(struct mlx5_vport *vport) +{ + struct mlx5_core_dev *dev = vport->dev; + struct mlx5_eswitch *esw = dev->priv.eswitch; + u8 mac[ETH_ALEN]; + + mlx5_query_nic_vport_mac_address(dev, vport->vport, true, mac); + esw_debug(dev, "vport[%d] Context Changed: perm mac: %pM\n", + vport->vport, mac); + + if (vport->enabled_events & MLX5_VPORT_UC_ADDR_CHANGE) { + esw_update_vport_addr_list(esw, vport, MLX5_NVPRT_LIST_TYPE_UC); + esw_apply_vport_addr_list(esw, vport, MLX5_NVPRT_LIST_TYPE_UC); + } + + if (vport->enabled_events & MLX5_VPORT_MC_ADDR_CHANGE) + esw_update_vport_addr_list(esw, vport, MLX5_NVPRT_LIST_TYPE_MC); + + if (vport->enabled_events & MLX5_VPORT_PROMISC_CHANGE) { + esw_update_vport_rx_mode(esw, vport); + if (!IS_ERR_OR_NULL(vport->allmulti_rule)) + esw_update_vport_mc_promisc(esw, vport); + } + + if (vport->enabled_events & (MLX5_VPORT_PROMISC_CHANGE | MLX5_VPORT_MC_ADDR_CHANGE)) + esw_apply_vport_addr_list(esw, vport, MLX5_NVPRT_LIST_TYPE_MC); + + esw_debug(esw->dev, "vport[%d] Context Changed: Done\n", vport->vport); + if (vport->enabled) + arm_vport_context_events_cmd(dev, vport->vport, + vport->enabled_events); +} + +static void esw_vport_change_handler(struct work_struct *work) +{ + struct mlx5_vport *vport = + container_of(work, struct mlx5_vport, vport_change_handler); + struct mlx5_eswitch *esw = vport->dev->priv.eswitch; + + mutex_lock(&esw->state_lock); + esw_vport_change_handle_locked(vport); + mutex_unlock(&esw->state_lock); +} + +static void node_guid_gen_from_mac(u64 *node_guid, const u8 *mac) +{ + ((u8 *)node_guid)[7] = mac[0]; + ((u8 *)node_guid)[6] = mac[1]; + ((u8 *)node_guid)[5] = mac[2]; + ((u8 *)node_guid)[4] = 0xff; + ((u8 *)node_guid)[3] = 0xfe; + ((u8 *)node_guid)[2] = mac[3]; + ((u8 *)node_guid)[1] = mac[4]; + ((u8 *)node_guid)[0] = mac[5]; +} + +static int esw_vport_setup_acl(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) +{ + if (esw->mode == MLX5_ESWITCH_LEGACY) + return esw_legacy_vport_acl_setup(esw, vport); + else + return esw_vport_create_offloads_acl_tables(esw, vport); +} + +static void esw_vport_cleanup_acl(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) +{ + if (esw->mode == MLX5_ESWITCH_LEGACY) + esw_legacy_vport_acl_cleanup(esw, vport); + else + esw_vport_destroy_offloads_acl_tables(esw, vport); +} + +static int esw_vport_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport) +{ + bool vst_mode_steering = esw_vst_mode_is_steering(esw); + u16 vport_num = vport->vport; + int flags; + int err; + + err = esw_vport_setup_acl(esw, vport); + if (err) + return err; + + if (mlx5_esw_is_manager_vport(esw, vport_num)) + return 0; + + mlx5_modify_vport_admin_state(esw->dev, + MLX5_VPORT_STATE_OP_MOD_ESW_VPORT, + vport_num, 1, + vport->info.link_state); + + /* Host PF has its own mac/guid. */ + if (vport_num) { + mlx5_modify_nic_vport_mac_address(esw->dev, vport_num, + vport->info.mac); + mlx5_modify_nic_vport_node_guid(esw->dev, vport_num, + vport->info.node_guid); + } + + flags = (vport->info.vlan || vport->info.qos) ? + SET_VLAN_STRIP | SET_VLAN_INSERT : 0; + if (esw->mode == MLX5_ESWITCH_OFFLOADS || !vst_mode_steering) + modify_esw_vport_cvlan(esw->dev, vport_num, vport->info.vlan, + vport->info.qos, flags); + + return 0; +} + +/* Don't cleanup vport->info, it's needed to restore vport configuration */ +static void esw_vport_cleanup(struct mlx5_eswitch *esw, struct mlx5_vport *vport) +{ + u16 vport_num = vport->vport; + + if (!mlx5_esw_is_manager_vport(esw, vport_num)) + mlx5_modify_vport_admin_state(esw->dev, + MLX5_VPORT_STATE_OP_MOD_ESW_VPORT, + vport_num, 1, + MLX5_VPORT_ADMIN_STATE_DOWN); + + mlx5_esw_qos_vport_disable(esw, vport); + esw_vport_cleanup_acl(esw, vport); +} + +int mlx5_esw_vport_enable(struct mlx5_eswitch *esw, u16 vport_num, + enum mlx5_eswitch_vport_event enabled_events) +{ + struct mlx5_vport *vport; + int ret; + + vport = mlx5_eswitch_get_vport(esw, vport_num); + if (IS_ERR(vport)) + return PTR_ERR(vport); + + mutex_lock(&esw->state_lock); + WARN_ON(vport->enabled); + + esw_debug(esw->dev, "Enabling VPORT(%d)\n", vport_num); + + ret = esw_vport_setup(esw, vport); + if (ret) + goto done; + + /* Sync with current vport context */ + vport->enabled_events = enabled_events; + vport->enabled = true; + + /* Esw manager is trusted by default. Host PF (vport 0) is trusted as well + * in smartNIC as it's a vport group manager. + */ + if (mlx5_esw_is_manager_vport(esw, vport_num) || + (!vport_num && mlx5_core_is_ecpf(esw->dev))) + vport->info.trusted = true; + + if (!mlx5_esw_is_manager_vport(esw, vport->vport) && + MLX5_CAP_GEN(esw->dev, vhca_resource_manager)) { + ret = mlx5_esw_vport_vhca_id_set(esw, vport_num); + if (ret) + goto err_vhca_mapping; + } + + /* External controller host PF has factory programmed MAC. + * Read it from the device. + */ + if (mlx5_core_is_ecpf(esw->dev) && vport_num == MLX5_VPORT_PF) + mlx5_query_nic_vport_mac_address(esw->dev, vport_num, true, vport->info.mac); + + esw_vport_change_handle_locked(vport); + + esw->enabled_vports++; + esw_debug(esw->dev, "Enabled VPORT(%d)\n", vport_num); +done: + mutex_unlock(&esw->state_lock); + return ret; + +err_vhca_mapping: + esw_vport_cleanup(esw, vport); + mutex_unlock(&esw->state_lock); + return ret; +} + +void mlx5_esw_vport_disable(struct mlx5_eswitch *esw, u16 vport_num) +{ + struct mlx5_vport *vport; + + vport = mlx5_eswitch_get_vport(esw, vport_num); + if (IS_ERR(vport)) + return; + + mutex_lock(&esw->state_lock); + if (!vport->enabled) + goto done; + + esw_debug(esw->dev, "Disabling vport(%d)\n", vport_num); + /* Mark this vport as disabled to discard new events */ + vport->enabled = false; + + /* Disable events from this vport */ + arm_vport_context_events_cmd(esw->dev, vport->vport, 0); + + if (!mlx5_esw_is_manager_vport(esw, vport->vport) && + MLX5_CAP_GEN(esw->dev, vhca_resource_manager)) + mlx5_esw_vport_vhca_id_clear(esw, vport_num); + + /* We don't assume VFs will cleanup after themselves. + * Calling vport change handler while vport is disabled will cleanup + * the vport resources. + */ + esw_vport_change_handle_locked(vport); + vport->enabled_events = 0; + esw_apply_vport_rx_mode(esw, vport, false, false); + esw_vport_cleanup(esw, vport); + esw->enabled_vports--; + +done: + mutex_unlock(&esw->state_lock); +} + +static int eswitch_vport_event(struct notifier_block *nb, + unsigned long type, void *data) +{ + struct mlx5_eswitch *esw = mlx5_nb_cof(nb, struct mlx5_eswitch, nb); + struct mlx5_eqe *eqe = data; + struct mlx5_vport *vport; + u16 vport_num; + + vport_num = be16_to_cpu(eqe->data.vport_change.vport_num); + vport = mlx5_eswitch_get_vport(esw, vport_num); + if (!IS_ERR(vport)) + queue_work(esw->work_queue, &vport->vport_change_handler); + return NOTIFY_OK; +} + +/** + * mlx5_esw_query_functions - Returns raw output about functions state + * @dev: Pointer to device to query + * + * mlx5_esw_query_functions() allocates and returns functions changed + * raw output memory pointer from device on success. Otherwise returns ERR_PTR. + * Caller must free the memory using kvfree() when valid pointer is returned. + */ +const u32 *mlx5_esw_query_functions(struct mlx5_core_dev *dev) +{ + int outlen = MLX5_ST_SZ_BYTES(query_esw_functions_out); + u32 in[MLX5_ST_SZ_DW(query_esw_functions_in)] = {}; + u32 *out; + int err; + + out = kvzalloc(outlen, GFP_KERNEL); + if (!out) + return ERR_PTR(-ENOMEM); + + MLX5_SET(query_esw_functions_in, in, opcode, + MLX5_CMD_OP_QUERY_ESW_FUNCTIONS); + + err = mlx5_cmd_exec(dev, in, sizeof(in), out, outlen); + if (!err) + return out; + + kvfree(out); + return ERR_PTR(err); +} + +static void mlx5_eswitch_event_handler_register(struct mlx5_eswitch *esw) +{ + if (esw->mode == MLX5_ESWITCH_OFFLOADS && mlx5_eswitch_is_funcs_handler(esw->dev)) { + MLX5_NB_INIT(&esw->esw_funcs.nb, mlx5_esw_funcs_changed_handler, + ESW_FUNCTIONS_CHANGED); + mlx5_eq_notifier_register(esw->dev, &esw->esw_funcs.nb); + } +} + +static void mlx5_eswitch_event_handler_unregister(struct mlx5_eswitch *esw) +{ + if (esw->mode == MLX5_ESWITCH_OFFLOADS && mlx5_eswitch_is_funcs_handler(esw->dev)) + mlx5_eq_notifier_unregister(esw->dev, &esw->esw_funcs.nb); + + flush_workqueue(esw->work_queue); +} + +static void mlx5_eswitch_clear_vf_vports_info(struct mlx5_eswitch *esw) +{ + struct mlx5_vport *vport; + unsigned long i; + + mlx5_esw_for_each_vf_vport(esw, i, vport, esw->esw_funcs.num_vfs) { + memset(&vport->qos, 0, sizeof(vport->qos)); + memset(&vport->info, 0, sizeof(vport->info)); + vport->info.link_state = MLX5_VPORT_ADMIN_STATE_AUTO; + } +} + +/* Public E-Switch API */ +int mlx5_eswitch_load_vport(struct mlx5_eswitch *esw, u16 vport_num, + enum mlx5_eswitch_vport_event enabled_events) +{ + int err; + + err = mlx5_esw_vport_enable(esw, vport_num, enabled_events); + if (err) + return err; + + mlx5_esw_vport_debugfs_create(esw, vport_num, false, 0); + err = esw_offloads_load_rep(esw, vport_num); + if (err) + goto err_rep; + + return err; + +err_rep: + mlx5_esw_vport_debugfs_destroy(esw, vport_num); + mlx5_esw_vport_disable(esw, vport_num); + return err; +} + +void mlx5_eswitch_unload_vport(struct mlx5_eswitch *esw, u16 vport_num) +{ + esw_offloads_unload_rep(esw, vport_num); + mlx5_esw_vport_debugfs_destroy(esw, vport_num); + mlx5_esw_vport_disable(esw, vport_num); +} + +void mlx5_eswitch_unload_vf_vports(struct mlx5_eswitch *esw, u16 num_vfs) +{ + struct mlx5_vport *vport; + unsigned long i; + + mlx5_esw_for_each_vf_vport(esw, i, vport, num_vfs) { + if (!vport->enabled) + continue; + mlx5_eswitch_unload_vport(esw, vport->vport); + } +} + +int mlx5_eswitch_load_vf_vports(struct mlx5_eswitch *esw, u16 num_vfs, + enum mlx5_eswitch_vport_event enabled_events) +{ + struct mlx5_vport *vport; + unsigned long i; + int err; + + mlx5_esw_for_each_vf_vport(esw, i, vport, num_vfs) { + err = mlx5_eswitch_load_vport(esw, vport->vport, enabled_events); + if (err) + goto vf_err; + } + + return 0; + +vf_err: + mlx5_eswitch_unload_vf_vports(esw, num_vfs); + return err; +} + +static int host_pf_enable_hca(struct mlx5_core_dev *dev) +{ + if (!mlx5_core_is_ecpf(dev)) + return 0; + + /* Once vport and representor are ready, take out the external host PF + * out of initializing state. Enabling HCA clears the iser->initializing + * bit and host PF driver loading can progress. + */ + return mlx5_cmd_host_pf_enable_hca(dev); +} + +static void host_pf_disable_hca(struct mlx5_core_dev *dev) +{ + if (!mlx5_core_is_ecpf(dev)) + return; + + mlx5_cmd_host_pf_disable_hca(dev); +} + +/* mlx5_eswitch_enable_pf_vf_vports() enables vports of PF, ECPF and VFs + * whichever are present on the eswitch. + */ +int +mlx5_eswitch_enable_pf_vf_vports(struct mlx5_eswitch *esw, + enum mlx5_eswitch_vport_event enabled_events) +{ + int ret; + + /* Enable PF vport */ + ret = mlx5_eswitch_load_vport(esw, MLX5_VPORT_PF, enabled_events); + if (ret) + return ret; + + /* Enable external host PF HCA */ + ret = host_pf_enable_hca(esw->dev); + if (ret) + goto pf_hca_err; + + /* Enable ECPF vport */ + if (mlx5_ecpf_vport_exists(esw->dev)) { + ret = mlx5_eswitch_load_vport(esw, MLX5_VPORT_ECPF, enabled_events); + if (ret) + goto ecpf_err; + } + + /* Enable VF vports */ + ret = mlx5_eswitch_load_vf_vports(esw, esw->esw_funcs.num_vfs, + enabled_events); + if (ret) + goto vf_err; + return 0; + +vf_err: + if (mlx5_ecpf_vport_exists(esw->dev)) + mlx5_eswitch_unload_vport(esw, MLX5_VPORT_ECPF); +ecpf_err: + host_pf_disable_hca(esw->dev); +pf_hca_err: + mlx5_eswitch_unload_vport(esw, MLX5_VPORT_PF); + return ret; +} + +/* mlx5_eswitch_disable_pf_vf_vports() disables vports of PF, ECPF and VFs + * whichever are previously enabled on the eswitch. + */ +void mlx5_eswitch_disable_pf_vf_vports(struct mlx5_eswitch *esw) +{ + mlx5_eswitch_unload_vf_vports(esw, esw->esw_funcs.num_vfs); + + if (mlx5_ecpf_vport_exists(esw->dev)) + mlx5_eswitch_unload_vport(esw, MLX5_VPORT_ECPF); + + host_pf_disable_hca(esw->dev); + mlx5_eswitch_unload_vport(esw, MLX5_VPORT_PF); +} + +static void mlx5_eswitch_get_devlink_param(struct mlx5_eswitch *esw) +{ + struct devlink *devlink = priv_to_devlink(esw->dev); + union devlink_param_value val; + int err; + + err = devlink_param_driverinit_value_get(devlink, + MLX5_DEVLINK_PARAM_ID_ESW_LARGE_GROUP_NUM, + &val); + if (!err) { + esw->params.large_group_num = val.vu32; + } else { + esw_warn(esw->dev, + "Devlink can't get param fdb_large_groups, uses default (%d).\n", + ESW_OFFLOADS_DEFAULT_NUM_GROUPS); + esw->params.large_group_num = ESW_OFFLOADS_DEFAULT_NUM_GROUPS; + } +} + +static void +mlx5_eswitch_update_num_of_vfs(struct mlx5_eswitch *esw, int num_vfs) +{ + const u32 *out; + + if (num_vfs < 0) + return; + + if (!mlx5_core_is_ecpf_esw_manager(esw->dev)) { + esw->esw_funcs.num_vfs = num_vfs; + return; + } + + out = mlx5_esw_query_functions(esw->dev); + if (IS_ERR(out)) + return; + + esw->esw_funcs.num_vfs = MLX5_GET(query_esw_functions_out, out, + host_params_context.host_num_of_vfs); + kvfree(out); +} + +static void mlx5_esw_mode_change_notify(struct mlx5_eswitch *esw, u16 mode) +{ + struct mlx5_esw_event_info info = {}; + + info.new_mode = mode; + + blocking_notifier_call_chain(&esw->n_head, 0, &info); +} + +static int mlx5_esw_acls_ns_init(struct mlx5_eswitch *esw) +{ + struct mlx5_core_dev *dev = esw->dev; + int total_vports; + int err; + + if (esw->flags & MLX5_ESWITCH_VPORT_ACL_NS_CREATED) + return 0; + + total_vports = mlx5_eswitch_get_total_vports(dev); + + if (MLX5_CAP_ESW_EGRESS_ACL(dev, ft_support)) { + err = mlx5_fs_egress_acls_init(dev, total_vports); + if (err) + return err; + } else { + esw_warn(dev, "engress ACL is not supported by FW\n"); + } + + if (MLX5_CAP_ESW_INGRESS_ACL(dev, ft_support)) { + err = mlx5_fs_ingress_acls_init(dev, total_vports); + if (err) + goto err; + } else { + esw_warn(dev, "ingress ACL is not supported by FW\n"); + } + esw->flags |= MLX5_ESWITCH_VPORT_ACL_NS_CREATED; + return 0; + +err: + if (MLX5_CAP_ESW_EGRESS_ACL(dev, ft_support)) + mlx5_fs_egress_acls_cleanup(dev); + return err; +} + +static void mlx5_esw_acls_ns_cleanup(struct mlx5_eswitch *esw) +{ + struct mlx5_core_dev *dev = esw->dev; + + esw->flags &= ~MLX5_ESWITCH_VPORT_ACL_NS_CREATED; + if (MLX5_CAP_ESW_INGRESS_ACL(dev, ft_support)) + mlx5_fs_ingress_acls_cleanup(dev); + if (MLX5_CAP_ESW_EGRESS_ACL(dev, ft_support)) + mlx5_fs_egress_acls_cleanup(dev); +} + +/** + * mlx5_eswitch_enable_locked - Enable eswitch + * @esw: Pointer to eswitch + * @num_vfs: Enable eswitch for given number of VFs. This is optional. + * Valid value are 0, > 0 and MLX5_ESWITCH_IGNORE_NUM_VFS. + * Caller should pass num_vfs > 0 when enabling eswitch for + * vf vports. Caller should pass num_vfs = 0, when eswitch + * is enabled without sriov VFs or when caller + * is unaware of the sriov state of the host PF on ECPF based + * eswitch. Caller should pass < 0 when num_vfs should be + * completely ignored. This is typically the case when eswitch + * is enabled without sriov regardless of PF/ECPF system. + * mlx5_eswitch_enable_locked() Enables eswitch in either legacy or offloads + * mode. If num_vfs >=0 is provided, it setup VF related eswitch vports. + * It returns 0 on success or error code on failure. + */ +int mlx5_eswitch_enable_locked(struct mlx5_eswitch *esw, int num_vfs) +{ + int err; + + lockdep_assert_held(&esw->mode_lock); + + if (!MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, ft_support)) { + esw_warn(esw->dev, "FDB is not supported, aborting ...\n"); + return -EOPNOTSUPP; + } + + mlx5_eswitch_get_devlink_param(esw); + + err = mlx5_esw_acls_ns_init(esw); + if (err) + return err; + + mlx5_eswitch_update_num_of_vfs(esw, num_vfs); + + MLX5_NB_INIT(&esw->nb, eswitch_vport_event, NIC_VPORT_CHANGE); + mlx5_eq_notifier_register(esw->dev, &esw->nb); + + if (esw->mode == MLX5_ESWITCH_LEGACY) { + err = esw_legacy_enable(esw); + } else { + mlx5_rescan_drivers(esw->dev); + err = esw_offloads_enable(esw); + } + + if (err) + goto abort; + + esw->fdb_table.flags |= MLX5_ESW_FDB_CREATED; + + mlx5_eswitch_event_handler_register(esw); + + esw_info(esw->dev, "Enable: mode(%s), nvfs(%d), active vports(%d)\n", + esw->mode == MLX5_ESWITCH_LEGACY ? "LEGACY" : "OFFLOADS", + esw->esw_funcs.num_vfs, esw->enabled_vports); + + mlx5_esw_mode_change_notify(esw, esw->mode); + + return 0; + +abort: + mlx5_esw_acls_ns_cleanup(esw); + return err; +} + +/** + * mlx5_eswitch_enable - Enable eswitch + * @esw: Pointer to eswitch + * @num_vfs: Enable eswitch switch for given number of VFs. + * Caller must pass num_vfs > 0 when enabling eswitch for + * vf vports. + * mlx5_eswitch_enable() returns 0 on success or error code on failure. + */ +int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int num_vfs) +{ + bool toggle_lag; + int ret; + + if (!mlx5_esw_allowed(esw)) + return 0; + + devl_assert_locked(priv_to_devlink(esw->dev)); + + toggle_lag = !mlx5_esw_is_fdb_created(esw); + + if (toggle_lag) + mlx5_lag_disable_change(esw->dev); + + down_write(&esw->mode_lock); + if (!mlx5_esw_is_fdb_created(esw)) { + ret = mlx5_eswitch_enable_locked(esw, num_vfs); + } else { + enum mlx5_eswitch_vport_event vport_events; + + vport_events = (esw->mode == MLX5_ESWITCH_LEGACY) ? + MLX5_LEGACY_SRIOV_VPORT_EVENTS : MLX5_VPORT_UC_ADDR_CHANGE; + ret = mlx5_eswitch_load_vf_vports(esw, num_vfs, vport_events); + if (!ret) + esw->esw_funcs.num_vfs = num_vfs; + } + up_write(&esw->mode_lock); + + if (toggle_lag) + mlx5_lag_enable_change(esw->dev); + + return ret; +} + +/* When disabling sriov, free driver level resources. */ +void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw, bool clear_vf) +{ + if (!mlx5_esw_allowed(esw)) + return; + + devl_assert_locked(priv_to_devlink(esw->dev)); + down_write(&esw->mode_lock); + /* If driver is unloaded, this function is called twice by remove_one() + * and mlx5_unload(). Prevent the second call. + */ + if (!esw->esw_funcs.num_vfs && !clear_vf) + goto unlock; + + esw_info(esw->dev, "Unload vfs: mode(%s), nvfs(%d), active vports(%d)\n", + esw->mode == MLX5_ESWITCH_LEGACY ? "LEGACY" : "OFFLOADS", + esw->esw_funcs.num_vfs, esw->enabled_vports); + + mlx5_eswitch_unload_vf_vports(esw, esw->esw_funcs.num_vfs); + if (clear_vf) + mlx5_eswitch_clear_vf_vports_info(esw); + /* If disabling sriov in switchdev mode, free meta rules here + * because it depends on num_vfs. + */ + if (esw->mode == MLX5_ESWITCH_OFFLOADS) { + struct devlink *devlink = priv_to_devlink(esw->dev); + + devl_rate_nodes_destroy(devlink); + } + /* Destroy legacy fdb when disabling sriov in legacy mode. */ + if (esw->mode == MLX5_ESWITCH_LEGACY) + mlx5_eswitch_disable_locked(esw); + + esw->esw_funcs.num_vfs = 0; + +unlock: + up_write(&esw->mode_lock); +} + +/* Free resources for corresponding eswitch mode. It is called by devlink + * when changing eswitch mode or modprobe when unloading driver. + */ +void mlx5_eswitch_disable_locked(struct mlx5_eswitch *esw) +{ + struct devlink *devlink = priv_to_devlink(esw->dev); + + /* Notify eswitch users that it is exiting from current mode. + * So that it can do necessary cleanup before the eswitch is disabled. + */ + mlx5_esw_mode_change_notify(esw, MLX5_ESWITCH_LEGACY); + + mlx5_eq_notifier_unregister(esw->dev, &esw->nb); + mlx5_eswitch_event_handler_unregister(esw); + + esw_info(esw->dev, "Disable: mode(%s), nvfs(%d), active vports(%d)\n", + esw->mode == MLX5_ESWITCH_LEGACY ? "LEGACY" : "OFFLOADS", + esw->esw_funcs.num_vfs, esw->enabled_vports); + + if (esw->fdb_table.flags & MLX5_ESW_FDB_CREATED) { + esw->fdb_table.flags &= ~MLX5_ESW_FDB_CREATED; + if (esw->mode == MLX5_ESWITCH_OFFLOADS) + esw_offloads_disable(esw); + else if (esw->mode == MLX5_ESWITCH_LEGACY) + esw_legacy_disable(esw); + mlx5_esw_acls_ns_cleanup(esw); + } + + if (esw->mode == MLX5_ESWITCH_OFFLOADS) + devl_rate_nodes_destroy(devlink); +} + +void mlx5_eswitch_disable(struct mlx5_eswitch *esw) +{ + if (!mlx5_esw_allowed(esw)) + return; + + devl_assert_locked(priv_to_devlink(esw->dev)); + mlx5_lag_disable_change(esw->dev); + down_write(&esw->mode_lock); + mlx5_eswitch_disable_locked(esw); + esw->mode = MLX5_ESWITCH_LEGACY; + up_write(&esw->mode_lock); + mlx5_lag_enable_change(esw->dev); +} + +static int mlx5_query_hca_cap_host_pf(struct mlx5_core_dev *dev, void *out) +{ + u16 opmod = (MLX5_CAP_GENERAL << 1) | (HCA_CAP_OPMOD_GET_MAX & 0x01); + u8 in[MLX5_ST_SZ_BYTES(query_hca_cap_in)] = {}; + + MLX5_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP); + MLX5_SET(query_hca_cap_in, in, op_mod, opmod); + MLX5_SET(query_hca_cap_in, in, function_id, MLX5_VPORT_PF); + MLX5_SET(query_hca_cap_in, in, other_function, true); + return mlx5_cmd_exec_inout(dev, query_hca_cap, in, out); +} + +int mlx5_esw_sf_max_hpf_functions(struct mlx5_core_dev *dev, u16 *max_sfs, u16 *sf_base_id) + +{ + int query_out_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out); + void *query_ctx; + void *hca_caps; + int err; + + if (!mlx5_core_is_ecpf(dev)) { + *max_sfs = 0; + return 0; + } + + query_ctx = kzalloc(query_out_sz, GFP_KERNEL); + if (!query_ctx) + return -ENOMEM; + + err = mlx5_query_hca_cap_host_pf(dev, query_ctx); + if (err) + goto out_free; + + hca_caps = MLX5_ADDR_OF(query_hca_cap_out, query_ctx, capability); + *max_sfs = MLX5_GET(cmd_hca_cap, hca_caps, max_num_sf); + *sf_base_id = MLX5_GET(cmd_hca_cap, hca_caps, sf_base_id); + +out_free: + kfree(query_ctx); + return err; +} + +static int mlx5_esw_vport_alloc(struct mlx5_eswitch *esw, struct mlx5_core_dev *dev, + int index, u16 vport_num) +{ + struct mlx5_vport *vport; + int err; + + vport = kzalloc(sizeof(*vport), GFP_KERNEL); + if (!vport) + return -ENOMEM; + + vport->dev = esw->dev; + vport->vport = vport_num; + vport->index = index; + vport->info.link_state = MLX5_VPORT_ADMIN_STATE_AUTO; + INIT_WORK(&vport->vport_change_handler, esw_vport_change_handler); + err = xa_insert(&esw->vports, vport_num, vport, GFP_KERNEL); + if (err) + goto insert_err; + + esw->total_vports++; + return 0; + +insert_err: + kfree(vport); + return err; +} + +static void mlx5_esw_vport_free(struct mlx5_eswitch *esw, struct mlx5_vport *vport) +{ + xa_erase(&esw->vports, vport->vport); + kfree(vport); +} + +static void mlx5_esw_vports_cleanup(struct mlx5_eswitch *esw) +{ + struct mlx5_vport *vport; + unsigned long i; + + mlx5_esw_for_each_vport(esw, i, vport) + mlx5_esw_vport_free(esw, vport); + xa_destroy(&esw->vports); +} + +static int mlx5_esw_vports_init(struct mlx5_eswitch *esw) +{ + struct mlx5_core_dev *dev = esw->dev; + u16 max_host_pf_sfs; + u16 base_sf_num; + int idx = 0; + int err; + int i; + + xa_init(&esw->vports); + + err = mlx5_esw_vport_alloc(esw, dev, idx, MLX5_VPORT_PF); + if (err) + goto err; + if (esw->first_host_vport == MLX5_VPORT_PF) + xa_set_mark(&esw->vports, idx, MLX5_ESW_VPT_HOST_FN); + idx++; + + for (i = 0; i < mlx5_core_max_vfs(dev); i++) { + err = mlx5_esw_vport_alloc(esw, dev, idx, idx); + if (err) + goto err; + xa_set_mark(&esw->vports, idx, MLX5_ESW_VPT_VF); + xa_set_mark(&esw->vports, idx, MLX5_ESW_VPT_HOST_FN); + idx++; + } + base_sf_num = mlx5_sf_start_function_id(dev); + for (i = 0; i < mlx5_sf_max_functions(dev); i++) { + err = mlx5_esw_vport_alloc(esw, dev, idx, base_sf_num + i); + if (err) + goto err; + xa_set_mark(&esw->vports, base_sf_num + i, MLX5_ESW_VPT_SF); + idx++; + } + + err = mlx5_esw_sf_max_hpf_functions(dev, &max_host_pf_sfs, &base_sf_num); + if (err) + goto err; + for (i = 0; i < max_host_pf_sfs; i++) { + err = mlx5_esw_vport_alloc(esw, dev, idx, base_sf_num + i); + if (err) + goto err; + xa_set_mark(&esw->vports, base_sf_num + i, MLX5_ESW_VPT_SF); + idx++; + } + + if (mlx5_ecpf_vport_exists(dev)) { + err = mlx5_esw_vport_alloc(esw, dev, idx, MLX5_VPORT_ECPF); + if (err) + goto err; + idx++; + } + err = mlx5_esw_vport_alloc(esw, dev, idx, MLX5_VPORT_UPLINK); + if (err) + goto err; + return 0; + +err: + mlx5_esw_vports_cleanup(esw); + return err; +} + +int mlx5_eswitch_init(struct mlx5_core_dev *dev) +{ + struct mlx5_eswitch *esw; + int err; + + if (!MLX5_VPORT_MANAGER(dev)) + return 0; + + esw = kzalloc(sizeof(*esw), GFP_KERNEL); + if (!esw) + return -ENOMEM; + + esw->dev = dev; + esw->manager_vport = mlx5_eswitch_manager_vport(dev); + esw->first_host_vport = mlx5_eswitch_first_host_vport_num(dev); + + esw->work_queue = create_singlethread_workqueue("mlx5_esw_wq"); + if (!esw->work_queue) { + err = -ENOMEM; + goto abort; + } + + err = mlx5_esw_vports_init(esw); + if (err) + goto abort; + + err = esw_offloads_init_reps(esw); + if (err) + goto reps_err; + + mutex_init(&esw->offloads.encap_tbl_lock); + hash_init(esw->offloads.encap_tbl); + mutex_init(&esw->offloads.decap_tbl_lock); + hash_init(esw->offloads.decap_tbl); + mlx5e_mod_hdr_tbl_init(&esw->offloads.mod_hdr); + atomic64_set(&esw->offloads.num_flows, 0); + ida_init(&esw->offloads.vport_metadata_ida); + xa_init_flags(&esw->offloads.vhca_map, XA_FLAGS_ALLOC); + mutex_init(&esw->state_lock); + init_rwsem(&esw->mode_lock); + refcount_set(&esw->qos.refcnt, 0); + + esw->enabled_vports = 0; + esw->mode = MLX5_ESWITCH_LEGACY; + esw->offloads.inline_mode = MLX5_INLINE_MODE_NONE; + if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev, reformat) && + MLX5_CAP_ESW_FLOWTABLE_FDB(dev, decap)) + esw->offloads.encap = DEVLINK_ESWITCH_ENCAP_MODE_BASIC; + else + esw->offloads.encap = DEVLINK_ESWITCH_ENCAP_MODE_NONE; + if (MLX5_ESWITCH_MANAGER(dev) && + mlx5_esw_vport_match_metadata_supported(esw)) + esw->flags |= MLX5_ESWITCH_VPORT_MATCH_METADATA; + + dev->priv.eswitch = esw; + BLOCKING_INIT_NOTIFIER_HEAD(&esw->n_head); + + esw->dbgfs = debugfs_create_dir("esw", mlx5_debugfs_get_dev_root(esw->dev)); + esw_info(dev, + "Total vports %d, per vport: max uc(%d) max mc(%d)\n", + esw->total_vports, + MLX5_MAX_UC_PER_VPORT(dev), + MLX5_MAX_MC_PER_VPORT(dev)); + return 0; + +reps_err: + mlx5_esw_vports_cleanup(esw); +abort: + if (esw->work_queue) + destroy_workqueue(esw->work_queue); + kfree(esw); + return err; +} + +void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw) +{ + if (!esw || !MLX5_VPORT_MANAGER(esw->dev)) + return; + + esw_info(esw->dev, "cleanup\n"); + + debugfs_remove_recursive(esw->dbgfs); + esw->dev->priv.eswitch = NULL; + destroy_workqueue(esw->work_queue); + WARN_ON(refcount_read(&esw->qos.refcnt)); + mutex_destroy(&esw->state_lock); + WARN_ON(!xa_empty(&esw->offloads.vhca_map)); + xa_destroy(&esw->offloads.vhca_map); + ida_destroy(&esw->offloads.vport_metadata_ida); + mlx5e_mod_hdr_tbl_destroy(&esw->offloads.mod_hdr); + mutex_destroy(&esw->offloads.encap_tbl_lock); + mutex_destroy(&esw->offloads.decap_tbl_lock); + esw_offloads_cleanup_reps(esw); + mlx5_esw_vports_cleanup(esw); + kfree(esw); +} + +/* Vport Administration */ +static int +mlx5_esw_set_vport_mac_locked(struct mlx5_eswitch *esw, + struct mlx5_vport *evport, const u8 *mac) +{ + u16 vport_num = evport->vport; + u64 node_guid; + int err = 0; + + if (is_multicast_ether_addr(mac)) + return -EINVAL; + + if (evport->info.spoofchk && !is_valid_ether_addr(mac)) + mlx5_core_warn(esw->dev, + "Set invalid MAC while spoofchk is on, vport(%d)\n", + vport_num); + + err = mlx5_modify_nic_vport_mac_address(esw->dev, vport_num, mac); + if (err) { + mlx5_core_warn(esw->dev, + "Failed to mlx5_modify_nic_vport_mac vport(%d) err=(%d)\n", + vport_num, err); + return err; + } + + node_guid_gen_from_mac(&node_guid, mac); + err = mlx5_modify_nic_vport_node_guid(esw->dev, vport_num, node_guid); + if (err) + mlx5_core_warn(esw->dev, + "Failed to set vport %d node guid, err = %d. RDMA_CM will not function properly for this VF.\n", + vport_num, err); + + ether_addr_copy(evport->info.mac, mac); + evport->info.node_guid = node_guid; + if (evport->enabled && esw->mode == MLX5_ESWITCH_LEGACY) + err = esw_acl_ingress_lgcy_setup(esw, evport); + + return err; +} + +int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw, + u16 vport, const u8 *mac) +{ + struct mlx5_vport *evport = mlx5_eswitch_get_vport(esw, vport); + int err = 0; + + if (IS_ERR(evport)) + return PTR_ERR(evport); + + mutex_lock(&esw->state_lock); + err = mlx5_esw_set_vport_mac_locked(esw, evport, mac); + mutex_unlock(&esw->state_lock); + return err; +} + +static bool mlx5_esw_check_port_type(struct mlx5_eswitch *esw, u16 vport_num, xa_mark_t mark) +{ + struct mlx5_vport *vport; + + vport = mlx5_eswitch_get_vport(esw, vport_num); + if (IS_ERR(vport)) + return false; + + return xa_get_mark(&esw->vports, vport_num, mark); +} + +bool mlx5_eswitch_is_vf_vport(struct mlx5_eswitch *esw, u16 vport_num) +{ + return mlx5_esw_check_port_type(esw, vport_num, MLX5_ESW_VPT_VF); +} + +bool mlx5_esw_is_sf_vport(struct mlx5_eswitch *esw, u16 vport_num) +{ + return mlx5_esw_check_port_type(esw, vport_num, MLX5_ESW_VPT_SF); +} + +int mlx5_eswitch_set_vport_state(struct mlx5_eswitch *esw, + u16 vport, int link_state) +{ + struct mlx5_vport *evport = mlx5_eswitch_get_vport(esw, vport); + int opmod = MLX5_VPORT_STATE_OP_MOD_ESW_VPORT; + int other_vport = 1; + int err = 0; + + if (!mlx5_esw_allowed(esw)) + return -EPERM; + if (IS_ERR(evport)) + return PTR_ERR(evport); + + if (vport == MLX5_VPORT_UPLINK) { + opmod = MLX5_VPORT_STATE_OP_MOD_UPLINK; + other_vport = 0; + vport = 0; + } + mutex_lock(&esw->state_lock); + if (esw->mode != MLX5_ESWITCH_LEGACY) { + err = -EOPNOTSUPP; + goto unlock; + } + + err = mlx5_modify_vport_admin_state(esw->dev, opmod, vport, other_vport, link_state); + if (err) { + mlx5_core_warn(esw->dev, "Failed to set vport %d link state, opmod = %d, err = %d", + vport, opmod, err); + goto unlock; + } + + evport->info.link_state = link_state; + +unlock: + mutex_unlock(&esw->state_lock); + return err; +} + +int mlx5_eswitch_get_vport_config(struct mlx5_eswitch *esw, + u16 vport, struct ifla_vf_info *ivi) +{ + struct mlx5_vport *evport = mlx5_eswitch_get_vport(esw, vport); + + if (IS_ERR(evport)) + return PTR_ERR(evport); + + memset(ivi, 0, sizeof(*ivi)); + ivi->vf = vport - 1; + + mutex_lock(&esw->state_lock); + ether_addr_copy(ivi->mac, evport->info.mac); + ivi->linkstate = evport->info.link_state; + ivi->vlan = evport->info.vlan; + ivi->qos = evport->info.qos; + ivi->spoofchk = evport->info.spoofchk; + ivi->trusted = evport->info.trusted; + if (evport->qos.enabled) { + ivi->min_tx_rate = evport->qos.min_rate; + ivi->max_tx_rate = evport->qos.max_rate; + } + mutex_unlock(&esw->state_lock); + + return 0; +} + +int __mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw, + u16 vport, u16 vlan, u8 qos, u8 set_flags) +{ + struct mlx5_vport *evport = mlx5_eswitch_get_vport(esw, vport); + bool vst_mode_steering = esw_vst_mode_is_steering(esw); + int err = 0; + + if (IS_ERR(evport)) + return PTR_ERR(evport); + if (vlan > 4095 || qos > 7) + return -EINVAL; + + if (esw->mode == MLX5_ESWITCH_OFFLOADS || !vst_mode_steering) { + err = modify_esw_vport_cvlan(esw->dev, vport, vlan, qos, set_flags); + if (err) + return err; + } + + evport->info.vlan = vlan; + evport->info.qos = qos; + if (evport->enabled && esw->mode == MLX5_ESWITCH_LEGACY) { + err = esw_acl_ingress_lgcy_setup(esw, evport); + if (err) + return err; + err = esw_acl_egress_lgcy_setup(esw, evport); + } + + return err; +} + +int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw, + u16 vport_num, + struct ifla_vf_stats *vf_stats) +{ + struct mlx5_vport *vport = mlx5_eswitch_get_vport(esw, vport_num); + int outlen = MLX5_ST_SZ_BYTES(query_vport_counter_out); + u32 in[MLX5_ST_SZ_DW(query_vport_counter_in)] = {}; + struct mlx5_vport_drop_stats stats = {}; + int err = 0; + u32 *out; + + if (IS_ERR(vport)) + return PTR_ERR(vport); + + out = kvzalloc(outlen, GFP_KERNEL); + if (!out) + return -ENOMEM; + + MLX5_SET(query_vport_counter_in, in, opcode, + MLX5_CMD_OP_QUERY_VPORT_COUNTER); + MLX5_SET(query_vport_counter_in, in, op_mod, 0); + MLX5_SET(query_vport_counter_in, in, vport_number, vport->vport); + MLX5_SET(query_vport_counter_in, in, other_vport, 1); + + err = mlx5_cmd_exec_inout(esw->dev, query_vport_counter, in, out); + if (err) + goto free_out; + + #define MLX5_GET_CTR(p, x) \ + MLX5_GET64(query_vport_counter_out, p, x) + + memset(vf_stats, 0, sizeof(*vf_stats)); + vf_stats->rx_packets = + MLX5_GET_CTR(out, received_eth_unicast.packets) + + MLX5_GET_CTR(out, received_ib_unicast.packets) + + MLX5_GET_CTR(out, received_eth_multicast.packets) + + MLX5_GET_CTR(out, received_ib_multicast.packets) + + MLX5_GET_CTR(out, received_eth_broadcast.packets); + + vf_stats->rx_bytes = + MLX5_GET_CTR(out, received_eth_unicast.octets) + + MLX5_GET_CTR(out, received_ib_unicast.octets) + + MLX5_GET_CTR(out, received_eth_multicast.octets) + + MLX5_GET_CTR(out, received_ib_multicast.octets) + + MLX5_GET_CTR(out, received_eth_broadcast.octets); + + vf_stats->tx_packets = + MLX5_GET_CTR(out, transmitted_eth_unicast.packets) + + MLX5_GET_CTR(out, transmitted_ib_unicast.packets) + + MLX5_GET_CTR(out, transmitted_eth_multicast.packets) + + MLX5_GET_CTR(out, transmitted_ib_multicast.packets) + + MLX5_GET_CTR(out, transmitted_eth_broadcast.packets); + + vf_stats->tx_bytes = + MLX5_GET_CTR(out, transmitted_eth_unicast.octets) + + MLX5_GET_CTR(out, transmitted_ib_unicast.octets) + + MLX5_GET_CTR(out, transmitted_eth_multicast.octets) + + MLX5_GET_CTR(out, transmitted_ib_multicast.octets) + + MLX5_GET_CTR(out, transmitted_eth_broadcast.octets); + + vf_stats->multicast = + MLX5_GET_CTR(out, received_eth_multicast.packets) + + MLX5_GET_CTR(out, received_ib_multicast.packets); + + vf_stats->broadcast = + MLX5_GET_CTR(out, received_eth_broadcast.packets); + + err = mlx5_esw_query_vport_drop_stats(esw->dev, vport, &stats); + if (err) + goto free_out; + vf_stats->rx_dropped = stats.rx_dropped; + vf_stats->tx_dropped = stats.tx_dropped; + +free_out: + kvfree(out); + return err; +} + +u8 mlx5_eswitch_mode(const struct mlx5_core_dev *dev) +{ + struct mlx5_eswitch *esw = dev->priv.eswitch; + + return mlx5_esw_allowed(esw) ? esw->mode : MLX5_ESWITCH_LEGACY; +} +EXPORT_SYMBOL_GPL(mlx5_eswitch_mode); + +enum devlink_eswitch_encap_mode +mlx5_eswitch_get_encap_mode(const struct mlx5_core_dev *dev) +{ + struct mlx5_eswitch *esw; + + esw = dev->priv.eswitch; + return (mlx5_eswitch_mode(dev) == MLX5_ESWITCH_OFFLOADS) ? esw->offloads.encap : + DEVLINK_ESWITCH_ENCAP_MODE_NONE; +} +EXPORT_SYMBOL(mlx5_eswitch_get_encap_mode); + +bool mlx5_esw_multipath_prereq(struct mlx5_core_dev *dev0, + struct mlx5_core_dev *dev1) +{ + return (dev0->priv.eswitch->mode == MLX5_ESWITCH_OFFLOADS && + dev1->priv.eswitch->mode == MLX5_ESWITCH_OFFLOADS); +} + +int mlx5_esw_event_notifier_register(struct mlx5_eswitch *esw, struct notifier_block *nb) +{ + return blocking_notifier_chain_register(&esw->n_head, nb); +} + +void mlx5_esw_event_notifier_unregister(struct mlx5_eswitch *esw, struct notifier_block *nb) +{ + blocking_notifier_chain_unregister(&esw->n_head, nb); +} + +/** + * mlx5_esw_hold() - Try to take a read lock on esw mode lock. + * @mdev: mlx5 core device. + * + * Should be called by esw resources callers. + * + * Return: true on success or false. + */ +bool mlx5_esw_hold(struct mlx5_core_dev *mdev) +{ + struct mlx5_eswitch *esw = mdev->priv.eswitch; + + /* e.g. VF doesn't have eswitch so nothing to do */ + if (!mlx5_esw_allowed(esw)) + return true; + + if (down_read_trylock(&esw->mode_lock) != 0) + return true; + + return false; +} + +/** + * mlx5_esw_release() - Release a read lock on esw mode lock. + * @mdev: mlx5 core device. + */ +void mlx5_esw_release(struct mlx5_core_dev *mdev) +{ + struct mlx5_eswitch *esw = mdev->priv.eswitch; + + if (mlx5_esw_allowed(esw)) + up_read(&esw->mode_lock); +} + +/** + * mlx5_esw_get() - Increase esw user count. + * @mdev: mlx5 core device. + */ +void mlx5_esw_get(struct mlx5_core_dev *mdev) +{ + struct mlx5_eswitch *esw = mdev->priv.eswitch; + + if (mlx5_esw_allowed(esw)) + atomic64_inc(&esw->user_count); +} + +/** + * mlx5_esw_put() - Decrease esw user count. + * @mdev: mlx5 core device. + */ +void mlx5_esw_put(struct mlx5_core_dev *mdev) +{ + struct mlx5_eswitch *esw = mdev->priv.eswitch; + + if (mlx5_esw_allowed(esw)) + atomic64_dec_if_positive(&esw->user_count); +} + +/** + * mlx5_esw_try_lock() - Take a write lock on esw mode lock. + * @esw: eswitch device. + * + * Should be called by esw mode change routine. + * + * Return: + * * 0 - esw mode if successfully locked and refcount is 0. + * * -EBUSY - refcount is not 0. + * * -EINVAL - In the middle of switching mode or lock is already held. + */ +int mlx5_esw_try_lock(struct mlx5_eswitch *esw) +{ + if (down_write_trylock(&esw->mode_lock) == 0) + return -EINVAL; + + if (atomic64_read(&esw->user_count) > 0) { + up_write(&esw->mode_lock); + return -EBUSY; + } + + return esw->mode; +} + +/** + * mlx5_esw_unlock() - Release write lock on esw mode lock + * @esw: eswitch device. + */ +void mlx5_esw_unlock(struct mlx5_eswitch *esw) +{ + up_write(&esw->mode_lock); +} + +/** + * mlx5_eswitch_get_total_vports - Get total vports of the eswitch + * + * @dev: Pointer to core device + * + * mlx5_eswitch_get_total_vports returns total number of eswitch vports. + */ +u16 mlx5_eswitch_get_total_vports(const struct mlx5_core_dev *dev) +{ + struct mlx5_eswitch *esw; + + esw = dev->priv.eswitch; + return mlx5_esw_allowed(esw) ? esw->total_vports : 0; +} +EXPORT_SYMBOL_GPL(mlx5_eswitch_get_total_vports); + +/** + * mlx5_eswitch_get_core_dev - Get the mdev device + * @esw : eswitch device. + * + * Return the mellanox core device which manages the eswitch. + */ +struct mlx5_core_dev *mlx5_eswitch_get_core_dev(struct mlx5_eswitch *esw) +{ + return mlx5_esw_allowed(esw) ? esw->dev : NULL; +} +EXPORT_SYMBOL(mlx5_eswitch_get_core_dev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h new file mode 100644 index 000000000..a3daca44f --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -0,0 +1,804 @@ +/* + * Copyright (c) 2015, Mellanox Technologies, Ltd. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __MLX5_ESWITCH_H__ +#define __MLX5_ESWITCH_H__ + +#include <linux/if_ether.h> +#include <linux/if_link.h> +#include <linux/atomic.h> +#include <linux/xarray.h> +#include <net/devlink.h> +#include <linux/mlx5/device.h> +#include <linux/mlx5/eswitch.h> +#include <linux/mlx5/vport.h> +#include <linux/mlx5/fs.h> +#include "lib/mpfs.h" +#include "lib/fs_chains.h" +#include "sf/sf.h" +#include "en/tc_ct.h" +#include "en/tc/sample.h" + +enum mlx5_mapped_obj_type { + MLX5_MAPPED_OBJ_CHAIN, + MLX5_MAPPED_OBJ_SAMPLE, + MLX5_MAPPED_OBJ_INT_PORT_METADATA, +}; + +struct mlx5_mapped_obj { + enum mlx5_mapped_obj_type type; + union { + u32 chain; + struct { + u32 group_id; + u32 rate; + u32 trunc_size; + u32 tunnel_id; + } sample; + u32 int_port_metadata; + }; +}; + +#ifdef CONFIG_MLX5_ESWITCH + +#define ESW_OFFLOADS_DEFAULT_NUM_GROUPS 15 + +#define MLX5_MAX_UC_PER_VPORT(dev) \ + (1 << MLX5_CAP_GEN(dev, log_max_current_uc_list)) + +#define MLX5_MAX_MC_PER_VPORT(dev) \ + (1 << MLX5_CAP_GEN(dev, log_max_current_mc_list)) + +#define mlx5_esw_has_fwd_fdb(dev) \ + MLX5_CAP_ESW_FLOWTABLE(dev, fdb_multi_path_to_table) + +#define esw_chains(esw) \ + ((esw)->fdb_table.offloads.esw_chains_priv) + +enum { + MAPPING_TYPE_CHAIN, + MAPPING_TYPE_TUNNEL, + MAPPING_TYPE_TUNNEL_ENC_OPTS, + MAPPING_TYPE_LABELS, + MAPPING_TYPE_ZONE, + MAPPING_TYPE_INT_PORT, +}; + +struct vport_ingress { + struct mlx5_flow_table *acl; + struct mlx5_flow_handle *allow_rule; + struct { + struct mlx5_flow_group *allow_spoofchk_only_grp; + struct mlx5_flow_group *allow_untagged_spoofchk_grp; + struct mlx5_flow_group *allow_untagged_only_grp; + struct mlx5_flow_group *drop_grp; + struct mlx5_flow_handle *drop_rule; + struct mlx5_fc *drop_counter; + } legacy; + struct { + /* Optional group to add an FTE to do internal priority + * tagging on ingress packets. + */ + struct mlx5_flow_group *metadata_prio_tag_grp; + /* Group to add default match-all FTE entry to tag ingress + * packet with metadata. + */ + struct mlx5_flow_group *metadata_allmatch_grp; + /* Optional group to add a drop all rule */ + struct mlx5_flow_group *drop_grp; + struct mlx5_modify_hdr *modify_metadata; + struct mlx5_flow_handle *modify_metadata_rule; + struct mlx5_flow_handle *drop_rule; + } offloads; +}; + +struct vport_egress { + struct mlx5_flow_table *acl; + struct mlx5_flow_handle *allowed_vlan; + struct mlx5_flow_group *vlan_grp; + union { + struct { + struct mlx5_flow_group *drop_grp; + struct mlx5_flow_handle *drop_rule; + struct mlx5_fc *drop_counter; + } legacy; + struct { + struct mlx5_flow_group *fwd_grp; + struct mlx5_flow_handle *fwd_rule; + struct mlx5_flow_handle *bounce_rule; + struct mlx5_flow_group *bounce_grp; + } offloads; + }; +}; + +struct mlx5_vport_drop_stats { + u64 rx_dropped; + u64 tx_dropped; +}; + +struct mlx5_vport_info { + u8 mac[ETH_ALEN]; + u16 vlan; + u64 node_guid; + int link_state; + u8 qos; + u8 spoofchk: 1; + u8 trusted: 1; +}; + +/* Vport context events */ +enum mlx5_eswitch_vport_event { + MLX5_VPORT_UC_ADDR_CHANGE = BIT(0), + MLX5_VPORT_MC_ADDR_CHANGE = BIT(1), + MLX5_VPORT_PROMISC_CHANGE = BIT(3), +}; + +struct mlx5_vport { + struct mlx5_core_dev *dev; + struct hlist_head uc_list[MLX5_L2_ADDR_HASH_SIZE]; + struct hlist_head mc_list[MLX5_L2_ADDR_HASH_SIZE]; + struct mlx5_flow_handle *promisc_rule; + struct mlx5_flow_handle *allmulti_rule; + struct work_struct vport_change_handler; + + struct vport_ingress ingress; + struct vport_egress egress; + u32 default_metadata; + u32 metadata; + + struct mlx5_vport_info info; + + struct { + bool enabled; + u32 esw_tsar_ix; + u32 bw_share; + u32 min_rate; + u32 max_rate; + struct mlx5_esw_rate_group *group; + } qos; + + u16 vport; + bool enabled; + enum mlx5_eswitch_vport_event enabled_events; + int index; + struct devlink_port *dl_port; + struct dentry *dbgfs; +}; + +struct mlx5_esw_indir_table; + +struct mlx5_eswitch_fdb { + union { + struct legacy_fdb { + struct mlx5_flow_table *fdb; + struct mlx5_flow_group *addr_grp; + struct mlx5_flow_group *allmulti_grp; + struct mlx5_flow_group *promisc_grp; + struct mlx5_flow_table *vepa_fdb; + struct mlx5_flow_handle *vepa_uplink_rule; + struct mlx5_flow_handle *vepa_star_rule; + } legacy; + + struct offloads_fdb { + struct mlx5_flow_namespace *ns; + struct mlx5_flow_table *tc_miss_table; + struct mlx5_flow_table *slow_fdb; + struct mlx5_flow_group *send_to_vport_grp; + struct mlx5_flow_group *send_to_vport_meta_grp; + struct mlx5_flow_group *peer_miss_grp; + struct mlx5_flow_handle **peer_miss_rules; + struct mlx5_flow_group *miss_grp; + struct mlx5_flow_handle **send_to_vport_meta_rules; + struct mlx5_flow_handle *miss_rule_uni; + struct mlx5_flow_handle *miss_rule_multi; + int vlan_push_pop_refcount; + + struct mlx5_fs_chains *esw_chains_priv; + struct { + DECLARE_HASHTABLE(table, 8); + /* Protects vports.table */ + struct mutex lock; + } vports; + + struct mlx5_esw_indir_table *indir; + + } offloads; + }; + u32 flags; +}; + +struct mlx5_esw_offload { + struct mlx5_flow_table *ft_offloads_restore; + struct mlx5_flow_group *restore_group; + struct mlx5_modify_hdr *restore_copy_hdr_id; + struct mapping_ctx *reg_c0_obj_pool; + + struct mlx5_flow_table *ft_offloads; + struct mlx5_flow_group *vport_rx_group; + struct mlx5_flow_group *vport_rx_drop_group; + struct mlx5_flow_handle *vport_rx_drop_rule; + struct xarray vport_reps; + struct list_head peer_flows; + struct mutex peer_mutex; + struct mutex encap_tbl_lock; /* protects encap_tbl */ + DECLARE_HASHTABLE(encap_tbl, 8); + struct mutex decap_tbl_lock; /* protects decap_tbl */ + DECLARE_HASHTABLE(decap_tbl, 8); + struct mod_hdr_tbl mod_hdr; + DECLARE_HASHTABLE(termtbl_tbl, 8); + struct mutex termtbl_mutex; /* protects termtbl hash */ + struct xarray vhca_map; + const struct mlx5_eswitch_rep_ops *rep_ops[NUM_REP_TYPES]; + u8 inline_mode; + atomic64_t num_flows; + enum devlink_eswitch_encap_mode encap; + struct ida vport_metadata_ida; + unsigned int host_number; /* ECPF supports one external host */ +}; + +/* E-Switch MC FDB table hash node */ +struct esw_mc_addr { /* SRIOV only */ + struct l2addr_node node; + struct mlx5_flow_handle *uplink_rule; /* Forward to uplink rule */ + u32 refcnt; +}; + +struct mlx5_host_work { + struct work_struct work; + struct mlx5_eswitch *esw; +}; + +struct mlx5_esw_functions { + struct mlx5_nb nb; + u16 num_vfs; +}; + +enum { + MLX5_ESWITCH_VPORT_MATCH_METADATA = BIT(0), + MLX5_ESWITCH_REG_C1_LOOPBACK_ENABLED = BIT(1), + MLX5_ESWITCH_VPORT_ACL_NS_CREATED = BIT(2), +}; + +struct mlx5_esw_bridge_offloads; + +enum { + MLX5_ESW_FDB_CREATED = BIT(0), +}; + +struct mlx5_eswitch { + struct mlx5_core_dev *dev; + struct mlx5_nb nb; + struct mlx5_eswitch_fdb fdb_table; + /* legacy data structures */ + struct hlist_head mc_table[MLX5_L2_ADDR_HASH_SIZE]; + struct esw_mc_addr mc_promisc; + /* end of legacy */ + struct workqueue_struct *work_queue; + struct xarray vports; + u32 flags; + int total_vports; + int enabled_vports; + /* Synchronize between vport change events + * and async SRIOV admin state changes + */ + struct mutex state_lock; + + /* Protects eswitch mode change that occurs via one or more + * user commands, i.e. sriov state change, devlink commands. + */ + struct rw_semaphore mode_lock; + atomic64_t user_count; + + struct { + u32 root_tsar_ix; + struct mlx5_esw_rate_group *group0; + struct list_head groups; /* Protected by esw->state_lock */ + + /* Protected by esw->state_lock. + * Initially 0, meaning no QoS users and QoS is disabled. + */ + refcount_t refcnt; + } qos; + + struct mlx5_esw_bridge_offloads *br_offloads; + struct mlx5_esw_offload offloads; + int mode; + u16 manager_vport; + u16 first_host_vport; + struct mlx5_esw_functions esw_funcs; + struct { + u32 large_group_num; + } params; + struct blocking_notifier_head n_head; + struct dentry *dbgfs; + bool paired[MLX5_MAX_PORTS]; +}; + +void esw_offloads_disable(struct mlx5_eswitch *esw); +int esw_offloads_enable(struct mlx5_eswitch *esw); +void esw_offloads_cleanup_reps(struct mlx5_eswitch *esw); +int esw_offloads_init_reps(struct mlx5_eswitch *esw); + +struct mlx5_flow_handle * +mlx5_eswitch_add_send_to_vport_meta_rule(struct mlx5_eswitch *esw, u16 vport_num); +void mlx5_eswitch_del_send_to_vport_meta_rule(struct mlx5_flow_handle *rule); + +bool mlx5_esw_vport_match_metadata_supported(const struct mlx5_eswitch *esw); +int mlx5_esw_offloads_vport_metadata_set(struct mlx5_eswitch *esw, bool enable); +u32 mlx5_esw_match_metadata_alloc(struct mlx5_eswitch *esw); +void mlx5_esw_match_metadata_free(struct mlx5_eswitch *esw, u32 metadata); + +int mlx5_esw_qos_modify_vport_rate(struct mlx5_eswitch *esw, u16 vport_num, u32 rate_mbps); + +/* E-Switch API */ +int mlx5_eswitch_init(struct mlx5_core_dev *dev); +void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw); + +#define MLX5_ESWITCH_IGNORE_NUM_VFS (-1) +int mlx5_eswitch_enable_locked(struct mlx5_eswitch *esw, int num_vfs); +int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int num_vfs); +void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw, bool clear_vf); +void mlx5_eswitch_disable_locked(struct mlx5_eswitch *esw); +void mlx5_eswitch_disable(struct mlx5_eswitch *esw); +void mlx5_esw_offloads_devcom_init(struct mlx5_eswitch *esw); +void mlx5_esw_offloads_devcom_cleanup(struct mlx5_eswitch *esw); +int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw, + u16 vport, const u8 *mac); +int mlx5_eswitch_set_vport_state(struct mlx5_eswitch *esw, + u16 vport, int link_state); +int mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw, + u16 vport, u16 vlan, u8 qos); +int mlx5_eswitch_set_vport_spoofchk(struct mlx5_eswitch *esw, + u16 vport, bool spoofchk); +int mlx5_eswitch_set_vport_trust(struct mlx5_eswitch *esw, + u16 vport_num, bool setting); +int mlx5_eswitch_set_vport_rate(struct mlx5_eswitch *esw, u16 vport, + u32 max_rate, u32 min_rate); +int mlx5_esw_qos_vport_update_group(struct mlx5_eswitch *esw, + struct mlx5_vport *vport, + struct mlx5_esw_rate_group *group, + struct netlink_ext_ack *extack); +int mlx5_eswitch_set_vepa(struct mlx5_eswitch *esw, u8 setting); +int mlx5_eswitch_get_vepa(struct mlx5_eswitch *esw, u8 *setting); +int mlx5_eswitch_get_vport_config(struct mlx5_eswitch *esw, + u16 vport, struct ifla_vf_info *ivi); +int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw, + u16 vport, + struct ifla_vf_stats *vf_stats); +void mlx5_eswitch_del_send_to_vport_rule(struct mlx5_flow_handle *rule); + +int mlx5_eswitch_modify_esw_vport_context(struct mlx5_core_dev *dev, u16 vport, + bool other_vport, void *in); + +struct mlx5_flow_spec; +struct mlx5_esw_flow_attr; +struct mlx5_termtbl_handle; + +bool +mlx5_eswitch_termtbl_required(struct mlx5_eswitch *esw, + struct mlx5_flow_attr *attr, + struct mlx5_flow_act *flow_act, + struct mlx5_flow_spec *spec); + +struct mlx5_flow_handle * +mlx5_eswitch_add_termtbl_rule(struct mlx5_eswitch *esw, + struct mlx5_flow_table *ft, + struct mlx5_flow_spec *spec, + struct mlx5_esw_flow_attr *attr, + struct mlx5_flow_act *flow_act, + struct mlx5_flow_destination *dest, + int num_dest); + +void +mlx5_eswitch_termtbl_put(struct mlx5_eswitch *esw, + struct mlx5_termtbl_handle *tt); + +void +mlx5_eswitch_clear_rule_source_port(struct mlx5_eswitch *esw, struct mlx5_flow_spec *spec); + +struct mlx5_flow_handle * +mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, + struct mlx5_flow_spec *spec, + struct mlx5_flow_attr *attr); +struct mlx5_flow_handle * +mlx5_eswitch_add_fwd_rule(struct mlx5_eswitch *esw, + struct mlx5_flow_spec *spec, + struct mlx5_flow_attr *attr); +void +mlx5_eswitch_del_offloaded_rule(struct mlx5_eswitch *esw, + struct mlx5_flow_handle *rule, + struct mlx5_flow_attr *attr); +void +mlx5_eswitch_del_fwd_rule(struct mlx5_eswitch *esw, + struct mlx5_flow_handle *rule, + struct mlx5_flow_attr *attr); + +struct mlx5_flow_handle * +mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, u16 vport, + struct mlx5_flow_destination *dest); + +enum { + SET_VLAN_STRIP = BIT(0), + SET_VLAN_INSERT = BIT(1) +}; + +enum mlx5_flow_match_level { + MLX5_MATCH_NONE = MLX5_INLINE_MODE_NONE, + MLX5_MATCH_L2 = MLX5_INLINE_MODE_L2, + MLX5_MATCH_L3 = MLX5_INLINE_MODE_IP, + MLX5_MATCH_L4 = MLX5_INLINE_MODE_TCP_UDP, +}; + +/* current maximum for flow based vport multicasting */ +#define MLX5_MAX_FLOW_FWD_VPORTS 32 + +enum { + MLX5_ESW_DEST_ENCAP = BIT(0), + MLX5_ESW_DEST_ENCAP_VALID = BIT(1), + MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE = BIT(2), +}; + +struct mlx5_esw_flow_attr { + struct mlx5_eswitch_rep *in_rep; + struct mlx5_core_dev *in_mdev; + struct mlx5_core_dev *counter_dev; + struct mlx5e_tc_int_port *dest_int_port; + struct mlx5e_tc_int_port *int_port; + + int split_count; + int out_count; + + __be16 vlan_proto[MLX5_FS_VLAN_DEPTH]; + u16 vlan_vid[MLX5_FS_VLAN_DEPTH]; + u8 vlan_prio[MLX5_FS_VLAN_DEPTH]; + u8 total_vlan; + struct { + u32 flags; + struct mlx5_eswitch_rep *rep; + struct mlx5_pkt_reformat *pkt_reformat; + struct mlx5_core_dev *mdev; + struct mlx5_termtbl_handle *termtbl; + int src_port_rewrite_act_id; + } dests[MLX5_MAX_FLOW_FWD_VPORTS]; + struct mlx5_rx_tun_attr *rx_tun_attr; + struct ethhdr eth; + struct mlx5_pkt_reformat *decap_pkt_reformat; +}; + +int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode, + struct netlink_ext_ack *extack); +int mlx5_devlink_eswitch_mode_get(struct devlink *devlink, u16 *mode); +int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode, + struct netlink_ext_ack *extack); +int mlx5_devlink_eswitch_inline_mode_get(struct devlink *devlink, u8 *mode); +int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, + enum devlink_eswitch_encap_mode encap, + struct netlink_ext_ack *extack); +int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink, + enum devlink_eswitch_encap_mode *encap); +int mlx5_devlink_port_function_hw_addr_get(struct devlink_port *port, + u8 *hw_addr, int *hw_addr_len, + struct netlink_ext_ack *extack); +int mlx5_devlink_port_function_hw_addr_set(struct devlink_port *port, + const u8 *hw_addr, int hw_addr_len, + struct netlink_ext_ack *extack); + +void *mlx5_eswitch_get_uplink_priv(struct mlx5_eswitch *esw, u8 rep_type); + +int mlx5_eswitch_add_vlan_action(struct mlx5_eswitch *esw, + struct mlx5_flow_attr *attr); +int mlx5_eswitch_del_vlan_action(struct mlx5_eswitch *esw, + struct mlx5_flow_attr *attr); +int __mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw, + u16 vport, u16 vlan, u8 qos, u8 set_flags); + +static inline bool esw_vst_mode_is_steering(struct mlx5_eswitch *esw) +{ + return (MLX5_CAP_ESW_EGRESS_ACL(esw->dev, pop_vlan) && + MLX5_CAP_ESW_INGRESS_ACL(esw->dev, push_vlan)); +} + +static inline bool mlx5_eswitch_vlan_actions_supported(struct mlx5_core_dev *dev, + u8 vlan_depth) +{ + bool ret = MLX5_CAP_ESW_FLOWTABLE_FDB(dev, pop_vlan) && + MLX5_CAP_ESW_FLOWTABLE_FDB(dev, push_vlan); + + if (vlan_depth == 1) + return ret; + + return ret && MLX5_CAP_ESW_FLOWTABLE_FDB(dev, pop_vlan_2) && + MLX5_CAP_ESW_FLOWTABLE_FDB(dev, push_vlan_2); +} + +bool mlx5_esw_multipath_prereq(struct mlx5_core_dev *dev0, + struct mlx5_core_dev *dev1); + +const u32 *mlx5_esw_query_functions(struct mlx5_core_dev *dev); + +#define MLX5_DEBUG_ESWITCH_MASK BIT(3) + +#define esw_info(__dev, format, ...) \ + dev_info((__dev)->device, "E-Switch: " format, ##__VA_ARGS__) + +#define esw_warn(__dev, format, ...) \ + dev_warn((__dev)->device, "E-Switch: " format, ##__VA_ARGS__) + +#define esw_debug(dev, format, ...) \ + mlx5_core_dbg_mask(dev, MLX5_DEBUG_ESWITCH_MASK, format, ##__VA_ARGS__) + +static inline bool mlx5_esw_allowed(const struct mlx5_eswitch *esw) +{ + return esw && MLX5_ESWITCH_MANAGER(esw->dev); +} + +/* The returned number is valid only when the dev is eswitch manager. */ +static inline u16 mlx5_eswitch_manager_vport(struct mlx5_core_dev *dev) +{ + return mlx5_core_is_ecpf_esw_manager(dev) ? + MLX5_VPORT_ECPF : MLX5_VPORT_PF; +} + +static inline bool +mlx5_esw_is_manager_vport(const struct mlx5_eswitch *esw, u16 vport_num) +{ + return esw->manager_vport == vport_num; +} + +static inline u16 mlx5_eswitch_first_host_vport_num(struct mlx5_core_dev *dev) +{ + return mlx5_core_is_ecpf_esw_manager(dev) ? + MLX5_VPORT_PF : MLX5_VPORT_FIRST_VF; +} + +static inline bool mlx5_eswitch_is_funcs_handler(const struct mlx5_core_dev *dev) +{ + return mlx5_core_is_ecpf_esw_manager(dev); +} + +static inline unsigned int +mlx5_esw_vport_to_devlink_port_index(const struct mlx5_core_dev *dev, + u16 vport_num) +{ + return (MLX5_CAP_GEN(dev, vhca_id) << 16) | vport_num; +} + +static inline u16 +mlx5_esw_devlink_port_index_to_vport_num(unsigned int dl_port_index) +{ + return dl_port_index & 0xffff; +} + +static inline bool mlx5_esw_is_fdb_created(struct mlx5_eswitch *esw) +{ + return esw->fdb_table.flags & MLX5_ESW_FDB_CREATED; +} + +/* TODO: This mlx5e_tc function shouldn't be called by eswitch */ +void mlx5e_tc_clean_fdb_peer_flows(struct mlx5_eswitch *esw); + +/* Each mark identifies eswitch vport type. + * MLX5_ESW_VPT_HOST_FN is used to identify both PF and VF ports using + * a single mark. + * MLX5_ESW_VPT_VF identifies a SRIOV VF vport. + * MLX5_ESW_VPT_SF identifies SF vport. + */ +#define MLX5_ESW_VPT_HOST_FN XA_MARK_0 +#define MLX5_ESW_VPT_VF XA_MARK_1 +#define MLX5_ESW_VPT_SF XA_MARK_2 + +/* The vport iterator is valid only after vport are initialized in mlx5_eswitch_init. + * Borrowed the idea from xa_for_each_marked() but with support for desired last element. + */ + +#define mlx5_esw_for_each_vport(esw, index, vport) \ + xa_for_each(&((esw)->vports), index, vport) + +#define mlx5_esw_for_each_entry_marked(xa, index, entry, last, filter) \ + for (index = 0, entry = xa_find(xa, &index, last, filter); \ + entry; entry = xa_find_after(xa, &index, last, filter)) + +#define mlx5_esw_for_each_vport_marked(esw, index, vport, last, filter) \ + mlx5_esw_for_each_entry_marked(&((esw)->vports), index, vport, last, filter) + +#define mlx5_esw_for_each_vf_vport(esw, index, vport, last) \ + mlx5_esw_for_each_vport_marked(esw, index, vport, last, MLX5_ESW_VPT_VF) + +#define mlx5_esw_for_each_host_func_vport(esw, index, vport, last) \ + mlx5_esw_for_each_vport_marked(esw, index, vport, last, MLX5_ESW_VPT_HOST_FN) + +struct mlx5_eswitch *mlx5_devlink_eswitch_get(struct devlink *devlink); +struct mlx5_vport *__must_check +mlx5_eswitch_get_vport(struct mlx5_eswitch *esw, u16 vport_num); + +bool mlx5_eswitch_is_vf_vport(struct mlx5_eswitch *esw, u16 vport_num); +bool mlx5_esw_is_sf_vport(struct mlx5_eswitch *esw, u16 vport_num); + +int mlx5_esw_funcs_changed_handler(struct notifier_block *nb, unsigned long type, void *data); + +int +mlx5_eswitch_enable_pf_vf_vports(struct mlx5_eswitch *esw, + enum mlx5_eswitch_vport_event enabled_events); +void mlx5_eswitch_disable_pf_vf_vports(struct mlx5_eswitch *esw); + +int mlx5_esw_vport_enable(struct mlx5_eswitch *esw, u16 vport_num, + enum mlx5_eswitch_vport_event enabled_events); +void mlx5_esw_vport_disable(struct mlx5_eswitch *esw, u16 vport_num); + +int +esw_vport_create_offloads_acl_tables(struct mlx5_eswitch *esw, + struct mlx5_vport *vport); +void +esw_vport_destroy_offloads_acl_tables(struct mlx5_eswitch *esw, + struct mlx5_vport *vport); + +struct esw_vport_tbl_namespace { + int max_fte; + int max_num_groups; + u32 flags; +}; + +struct mlx5_vport_tbl_attr { + u32 chain; + u16 prio; + u16 vport; + struct esw_vport_tbl_namespace *vport_ns; +}; + +struct mlx5_flow_table * +mlx5_esw_vporttbl_get(struct mlx5_eswitch *esw, struct mlx5_vport_tbl_attr *attr); +void +mlx5_esw_vporttbl_put(struct mlx5_eswitch *esw, struct mlx5_vport_tbl_attr *attr); + +struct mlx5_flow_handle * +esw_add_restore_rule(struct mlx5_eswitch *esw, u32 tag); + +int esw_offloads_load_rep(struct mlx5_eswitch *esw, u16 vport_num); +void esw_offloads_unload_rep(struct mlx5_eswitch *esw, u16 vport_num); + +int mlx5_esw_offloads_rep_load(struct mlx5_eswitch *esw, u16 vport_num); +void mlx5_esw_offloads_rep_unload(struct mlx5_eswitch *esw, u16 vport_num); + +int mlx5_eswitch_load_vport(struct mlx5_eswitch *esw, u16 vport_num, + enum mlx5_eswitch_vport_event enabled_events); +void mlx5_eswitch_unload_vport(struct mlx5_eswitch *esw, u16 vport_num); + +int mlx5_eswitch_load_vf_vports(struct mlx5_eswitch *esw, u16 num_vfs, + enum mlx5_eswitch_vport_event enabled_events); +void mlx5_eswitch_unload_vf_vports(struct mlx5_eswitch *esw, u16 num_vfs); + +int mlx5_esw_offloads_devlink_port_register(struct mlx5_eswitch *esw, u16 vport_num); +void mlx5_esw_offloads_devlink_port_unregister(struct mlx5_eswitch *esw, u16 vport_num); +struct devlink_port *mlx5_esw_offloads_devlink_port(struct mlx5_eswitch *esw, u16 vport_num); + +void mlx5_esw_vport_debugfs_create(struct mlx5_eswitch *esw, u16 vport_num, bool is_sf, u16 sf_num); +void mlx5_esw_vport_debugfs_destroy(struct mlx5_eswitch *esw, u16 vport_num); + +int mlx5_esw_devlink_sf_port_register(struct mlx5_eswitch *esw, struct devlink_port *dl_port, + u16 vport_num, u32 controller, u32 sfnum); +void mlx5_esw_devlink_sf_port_unregister(struct mlx5_eswitch *esw, u16 vport_num); + +int mlx5_esw_offloads_sf_vport_enable(struct mlx5_eswitch *esw, struct devlink_port *dl_port, + u16 vport_num, u32 controller, u32 sfnum); +void mlx5_esw_offloads_sf_vport_disable(struct mlx5_eswitch *esw, u16 vport_num); +int mlx5_esw_sf_max_hpf_functions(struct mlx5_core_dev *dev, u16 *max_sfs, u16 *sf_base_id); + +int mlx5_esw_vport_vhca_id_set(struct mlx5_eswitch *esw, u16 vport_num); +void mlx5_esw_vport_vhca_id_clear(struct mlx5_eswitch *esw, u16 vport_num); +int mlx5_eswitch_vhca_id_to_vport(struct mlx5_eswitch *esw, u16 vhca_id, u16 *vport_num); + +/** + * mlx5_esw_event_info - Indicates eswitch mode changed/changing. + * + * @new_mode: New mode of eswitch. + */ +struct mlx5_esw_event_info { + u16 new_mode; +}; + +int mlx5_esw_event_notifier_register(struct mlx5_eswitch *esw, struct notifier_block *n); +void mlx5_esw_event_notifier_unregister(struct mlx5_eswitch *esw, struct notifier_block *n); + +bool mlx5_esw_hold(struct mlx5_core_dev *dev); +void mlx5_esw_release(struct mlx5_core_dev *dev); +void mlx5_esw_get(struct mlx5_core_dev *dev); +void mlx5_esw_put(struct mlx5_core_dev *dev); +int mlx5_esw_try_lock(struct mlx5_eswitch *esw); +void mlx5_esw_unlock(struct mlx5_eswitch *esw); + +void esw_vport_change_handle_locked(struct mlx5_vport *vport); + +bool mlx5_esw_offloads_controller_valid(const struct mlx5_eswitch *esw, u32 controller); + +int mlx5_eswitch_offloads_config_single_fdb(struct mlx5_eswitch *master_esw, + struct mlx5_eswitch *slave_esw); +void mlx5_eswitch_offloads_destroy_single_fdb(struct mlx5_eswitch *master_esw, + struct mlx5_eswitch *slave_esw); +int mlx5_eswitch_reload_reps(struct mlx5_eswitch *esw); + +static inline int mlx5_eswitch_num_vfs(struct mlx5_eswitch *esw) +{ + if (mlx5_esw_allowed(esw)) + return esw->esw_funcs.num_vfs; + + return 0; +} + +#else /* CONFIG_MLX5_ESWITCH */ +/* eswitch API stubs */ +static inline int mlx5_eswitch_init(struct mlx5_core_dev *dev) { return 0; } +static inline void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw) {} +static inline int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int num_vfs) { return 0; } +static inline void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw, bool clear_vf) {} +static inline void mlx5_eswitch_disable(struct mlx5_eswitch *esw) {} +static inline void mlx5_esw_offloads_devcom_init(struct mlx5_eswitch *esw) {} +static inline void mlx5_esw_offloads_devcom_cleanup(struct mlx5_eswitch *esw) {} +static inline bool mlx5_eswitch_is_funcs_handler(struct mlx5_core_dev *dev) { return false; } +static inline +int mlx5_eswitch_set_vport_state(struct mlx5_eswitch *esw, u16 vport, int link_state) { return 0; } +static inline const u32 *mlx5_esw_query_functions(struct mlx5_core_dev *dev) +{ + return ERR_PTR(-EOPNOTSUPP); +} + +static inline struct mlx5_flow_handle * +esw_add_restore_rule(struct mlx5_eswitch *esw, u32 tag) +{ + return ERR_PTR(-EOPNOTSUPP); +} + +static inline unsigned int +mlx5_esw_vport_to_devlink_port_index(const struct mlx5_core_dev *dev, + u16 vport_num) +{ + return vport_num; +} + +static inline int +mlx5_eswitch_offloads_config_single_fdb(struct mlx5_eswitch *master_esw, + struct mlx5_eswitch *slave_esw) +{ + return 0; +} + +static inline void +mlx5_eswitch_offloads_destroy_single_fdb(struct mlx5_eswitch *master_esw, + struct mlx5_eswitch *slave_esw) {} + +static inline int +mlx5_eswitch_reload_reps(struct mlx5_eswitch *esw) +{ + return 0; +} +#endif /* CONFIG_MLX5_ESWITCH */ + +#endif /* __MLX5_ESWITCH_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c new file mode 100644 index 000000000..433cdd0a2 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -0,0 +1,4040 @@ +/* + * Copyright (c) 2016, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/etherdevice.h> +#include <linux/idr.h> +#include <linux/mlx5/driver.h> +#include <linux/mlx5/mlx5_ifc.h> +#include <linux/mlx5/vport.h> +#include <linux/mlx5/fs.h> +#include "mlx5_core.h" +#include "eswitch.h" +#include "esw/indir_table.h" +#include "esw/acl/ofld.h" +#include "rdma.h" +#include "en.h" +#include "fs_core.h" +#include "lib/devcom.h" +#include "lib/eq.h" +#include "lib/fs_chains.h" +#include "en_tc.h" +#include "en/mapping.h" +#include "devlink.h" +#include "lag/lag.h" + +#define mlx5_esw_for_each_rep(esw, i, rep) \ + xa_for_each(&((esw)->offloads.vport_reps), i, rep) + +#define mlx5_esw_for_each_sf_rep(esw, i, rep) \ + xa_for_each_marked(&((esw)->offloads.vport_reps), i, rep, MLX5_ESW_VPT_SF) + +#define mlx5_esw_for_each_vf_rep(esw, index, rep) \ + mlx5_esw_for_each_entry_marked(&((esw)->offloads.vport_reps), index, \ + rep, (esw)->esw_funcs.num_vfs, MLX5_ESW_VPT_VF) + +/* There are two match-all miss flows, one for unicast dst mac and + * one for multicast. + */ +#define MLX5_ESW_MISS_FLOWS (2) +#define UPLINK_REP_INDEX 0 + +#define MLX5_ESW_VPORT_TBL_SIZE 128 +#define MLX5_ESW_VPORT_TBL_NUM_GROUPS 4 + +#define MLX5_ESW_FT_OFFLOADS_DROP_RULE (1) + +static struct esw_vport_tbl_namespace mlx5_esw_vport_tbl_mirror_ns = { + .max_fte = MLX5_ESW_VPORT_TBL_SIZE, + .max_num_groups = MLX5_ESW_VPORT_TBL_NUM_GROUPS, + .flags = 0, +}; + +static struct mlx5_eswitch_rep *mlx5_eswitch_get_rep(struct mlx5_eswitch *esw, + u16 vport_num) +{ + return xa_load(&esw->offloads.vport_reps, vport_num); +} + +static void +mlx5_eswitch_set_rule_flow_source(struct mlx5_eswitch *esw, + struct mlx5_flow_spec *spec, + struct mlx5_esw_flow_attr *attr) +{ + if (!MLX5_CAP_ESW_FLOWTABLE(esw->dev, flow_source) || !attr || !attr->in_rep) + return; + + if (attr->int_port) { + spec->flow_context.flow_source = mlx5e_tc_int_port_get_flow_source(attr->int_port); + + return; + } + + spec->flow_context.flow_source = (attr->in_rep->vport == MLX5_VPORT_UPLINK) ? + MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK : + MLX5_FLOW_CONTEXT_FLOW_SOURCE_LOCAL_VPORT; +} + +/* Actually only the upper 16 bits of reg c0 need to be cleared, but the lower 16 bits + * are not needed as well in the following process. So clear them all for simplicity. + */ +void +mlx5_eswitch_clear_rule_source_port(struct mlx5_eswitch *esw, struct mlx5_flow_spec *spec) +{ + if (mlx5_eswitch_vport_match_metadata_enabled(esw)) { + void *misc2; + + misc2 = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters_2); + MLX5_SET(fte_match_set_misc2, misc2, metadata_reg_c_0, 0); + + misc2 = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters_2); + MLX5_SET(fte_match_set_misc2, misc2, metadata_reg_c_0, 0); + + if (!memchr_inv(misc2, 0, MLX5_ST_SZ_BYTES(fte_match_set_misc2))) + spec->match_criteria_enable &= ~MLX5_MATCH_MISC_PARAMETERS_2; + } +} + +static void +mlx5_eswitch_set_rule_source_port(struct mlx5_eswitch *esw, + struct mlx5_flow_spec *spec, + struct mlx5_flow_attr *attr, + struct mlx5_eswitch *src_esw, + u16 vport) +{ + struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; + u32 metadata; + void *misc2; + void *misc; + + /* Use metadata matching because vport is not represented by single + * VHCA in dual-port RoCE mode, and matching on source vport may fail. + */ + if (mlx5_eswitch_vport_match_metadata_enabled(esw)) { + if (mlx5_esw_indir_table_decap_vport(attr)) + vport = mlx5_esw_indir_table_decap_vport(attr); + + if (!attr->chain && esw_attr && esw_attr->int_port) + metadata = + mlx5e_tc_int_port_get_metadata_for_match(esw_attr->int_port); + else + metadata = + mlx5_eswitch_get_vport_metadata_for_match(src_esw, vport); + + misc2 = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters_2); + MLX5_SET(fte_match_set_misc2, misc2, metadata_reg_c_0, metadata); + + misc2 = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters_2); + MLX5_SET(fte_match_set_misc2, misc2, metadata_reg_c_0, + mlx5_eswitch_get_vport_metadata_mask()); + + spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_2; + } else { + misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters); + MLX5_SET(fte_match_set_misc, misc, source_port, vport); + + if (MLX5_CAP_ESW(esw->dev, merged_eswitch)) + MLX5_SET(fte_match_set_misc, misc, + source_eswitch_owner_vhca_id, + MLX5_CAP_GEN(src_esw->dev, vhca_id)); + + misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters); + MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port); + if (MLX5_CAP_ESW(esw->dev, merged_eswitch)) + MLX5_SET_TO_ONES(fte_match_set_misc, misc, + source_eswitch_owner_vhca_id); + + spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS; + } +} + +static int +esw_setup_decap_indir(struct mlx5_eswitch *esw, + struct mlx5_flow_attr *attr) +{ + struct mlx5_flow_table *ft; + + if (!(attr->flags & MLX5_ATTR_FLAG_SRC_REWRITE)) + return -EOPNOTSUPP; + + ft = mlx5_esw_indir_table_get(esw, attr, + mlx5_esw_indir_table_decap_vport(attr), true); + return PTR_ERR_OR_ZERO(ft); +} + +static void +esw_cleanup_decap_indir(struct mlx5_eswitch *esw, + struct mlx5_flow_attr *attr) +{ + if (mlx5_esw_indir_table_decap_vport(attr)) + mlx5_esw_indir_table_put(esw, + mlx5_esw_indir_table_decap_vport(attr), + true); +} + +static int +esw_setup_sampler_dest(struct mlx5_flow_destination *dest, + struct mlx5_flow_act *flow_act, + u32 sampler_id, + int i) +{ + flow_act->flags |= FLOW_ACT_IGNORE_FLOW_LEVEL; + dest[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_SAMPLER; + dest[i].sampler_id = sampler_id; + + return 0; +} + +static int +esw_setup_ft_dest(struct mlx5_flow_destination *dest, + struct mlx5_flow_act *flow_act, + struct mlx5_eswitch *esw, + struct mlx5_flow_attr *attr, + int i) +{ + flow_act->flags |= FLOW_ACT_IGNORE_FLOW_LEVEL; + dest[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dest[i].ft = attr->dest_ft; + + if (mlx5_esw_indir_table_decap_vport(attr)) + return esw_setup_decap_indir(esw, attr); + return 0; +} + +static void +esw_setup_accept_dest(struct mlx5_flow_destination *dest, struct mlx5_flow_act *flow_act, + struct mlx5_fs_chains *chains, int i) +{ + if (mlx5_chains_ignore_flow_level_supported(chains)) + flow_act->flags |= FLOW_ACT_IGNORE_FLOW_LEVEL; + dest[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dest[i].ft = mlx5_chains_get_tc_end_ft(chains); +} + +static void +esw_setup_slow_path_dest(struct mlx5_flow_destination *dest, struct mlx5_flow_act *flow_act, + struct mlx5_eswitch *esw, int i) +{ + if (MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, ignore_flow_level)) + flow_act->flags |= FLOW_ACT_IGNORE_FLOW_LEVEL; + dest[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dest[i].ft = esw->fdb_table.offloads.slow_fdb; +} + +static int +esw_setup_chain_dest(struct mlx5_flow_destination *dest, + struct mlx5_flow_act *flow_act, + struct mlx5_fs_chains *chains, + u32 chain, u32 prio, u32 level, + int i) +{ + struct mlx5_flow_table *ft; + + flow_act->flags |= FLOW_ACT_IGNORE_FLOW_LEVEL; + ft = mlx5_chains_get_table(chains, chain, prio, level); + if (IS_ERR(ft)) + return PTR_ERR(ft); + + dest[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dest[i].ft = ft; + return 0; +} + +static void esw_put_dest_tables_loop(struct mlx5_eswitch *esw, struct mlx5_flow_attr *attr, + int from, int to) +{ + struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; + struct mlx5_fs_chains *chains = esw_chains(esw); + int i; + + for (i = from; i < to; i++) + if (esw_attr->dests[i].flags & MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE) + mlx5_chains_put_table(chains, 0, 1, 0); + else if (mlx5_esw_indir_table_needed(esw, attr, esw_attr->dests[i].rep->vport, + esw_attr->dests[i].mdev)) + mlx5_esw_indir_table_put(esw, esw_attr->dests[i].rep->vport, + false); +} + +static bool +esw_is_chain_src_port_rewrite(struct mlx5_eswitch *esw, struct mlx5_esw_flow_attr *esw_attr) +{ + int i; + + for (i = esw_attr->split_count; i < esw_attr->out_count; i++) + if (esw_attr->dests[i].flags & MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE) + return true; + return false; +} + +static int +esw_setup_chain_src_port_rewrite(struct mlx5_flow_destination *dest, + struct mlx5_flow_act *flow_act, + struct mlx5_eswitch *esw, + struct mlx5_fs_chains *chains, + struct mlx5_flow_attr *attr, + int *i) +{ + struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; + int err; + + if (!(attr->flags & MLX5_ATTR_FLAG_SRC_REWRITE)) + return -EOPNOTSUPP; + + /* flow steering cannot handle more than one dest with the same ft + * in a single flow + */ + if (esw_attr->out_count - esw_attr->split_count > 1) + return -EOPNOTSUPP; + + err = esw_setup_chain_dest(dest, flow_act, chains, attr->dest_chain, 1, 0, *i); + if (err) + return err; + + if (esw_attr->dests[esw_attr->split_count].pkt_reformat) { + flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT; + flow_act->pkt_reformat = esw_attr->dests[esw_attr->split_count].pkt_reformat; + } + (*i)++; + + return 0; +} + +static void esw_cleanup_chain_src_port_rewrite(struct mlx5_eswitch *esw, + struct mlx5_flow_attr *attr) +{ + struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; + + esw_put_dest_tables_loop(esw, attr, esw_attr->split_count, esw_attr->out_count); +} + +static bool +esw_is_indir_table(struct mlx5_eswitch *esw, struct mlx5_flow_attr *attr) +{ + struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; + bool result = false; + int i; + + /* Indirect table is supported only for flows with in_port uplink + * and the destination is vport on the same eswitch as the uplink, + * return false in case at least one of destinations doesn't meet + * this criteria. + */ + for (i = esw_attr->split_count; i < esw_attr->out_count; i++) { + if (esw_attr->dests[i].rep && + mlx5_esw_indir_table_needed(esw, attr, esw_attr->dests[i].rep->vport, + esw_attr->dests[i].mdev)) { + result = true; + } else { + result = false; + break; + } + } + return result; +} + +static int +esw_setup_indir_table(struct mlx5_flow_destination *dest, + struct mlx5_flow_act *flow_act, + struct mlx5_eswitch *esw, + struct mlx5_flow_attr *attr, + bool ignore_flow_lvl, + int *i) +{ + struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; + int j, err; + + if (!(attr->flags & MLX5_ATTR_FLAG_SRC_REWRITE)) + return -EOPNOTSUPP; + + for (j = esw_attr->split_count; j < esw_attr->out_count; j++, (*i)++) { + if (ignore_flow_lvl) + flow_act->flags |= FLOW_ACT_IGNORE_FLOW_LEVEL; + dest[*i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + + dest[*i].ft = mlx5_esw_indir_table_get(esw, attr, + esw_attr->dests[j].rep->vport, false); + if (IS_ERR(dest[*i].ft)) { + err = PTR_ERR(dest[*i].ft); + goto err_indir_tbl_get; + } + } + + if (mlx5_esw_indir_table_decap_vport(attr)) { + err = esw_setup_decap_indir(esw, attr); + if (err) + goto err_indir_tbl_get; + } + + return 0; + +err_indir_tbl_get: + esw_put_dest_tables_loop(esw, attr, esw_attr->split_count, j); + return err; +} + +static void esw_cleanup_indir_table(struct mlx5_eswitch *esw, struct mlx5_flow_attr *attr) +{ + struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; + + esw_put_dest_tables_loop(esw, attr, esw_attr->split_count, esw_attr->out_count); + esw_cleanup_decap_indir(esw, attr); +} + +static void +esw_cleanup_chain_dest(struct mlx5_fs_chains *chains, u32 chain, u32 prio, u32 level) +{ + mlx5_chains_put_table(chains, chain, prio, level); +} + +static void +esw_setup_vport_dest(struct mlx5_flow_destination *dest, struct mlx5_flow_act *flow_act, + struct mlx5_eswitch *esw, struct mlx5_esw_flow_attr *esw_attr, + int attr_idx, int dest_idx, bool pkt_reformat) +{ + dest[dest_idx].type = MLX5_FLOW_DESTINATION_TYPE_VPORT; + dest[dest_idx].vport.num = esw_attr->dests[attr_idx].rep->vport; + if (MLX5_CAP_ESW(esw->dev, merged_eswitch)) { + dest[dest_idx].vport.vhca_id = + MLX5_CAP_GEN(esw_attr->dests[attr_idx].mdev, vhca_id); + dest[dest_idx].vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID; + if (dest[dest_idx].vport.num == MLX5_VPORT_UPLINK && + mlx5_lag_mpesw_is_activated(esw->dev)) + dest[dest_idx].type = MLX5_FLOW_DESTINATION_TYPE_UPLINK; + } + if (esw_attr->dests[attr_idx].flags & MLX5_ESW_DEST_ENCAP_VALID) { + if (pkt_reformat) { + flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT; + flow_act->pkt_reformat = esw_attr->dests[attr_idx].pkt_reformat; + } + dest[dest_idx].vport.flags |= MLX5_FLOW_DEST_VPORT_REFORMAT_ID; + dest[dest_idx].vport.pkt_reformat = esw_attr->dests[attr_idx].pkt_reformat; + } +} + +static int +esw_setup_vport_dests(struct mlx5_flow_destination *dest, struct mlx5_flow_act *flow_act, + struct mlx5_eswitch *esw, struct mlx5_esw_flow_attr *esw_attr, + int i) +{ + int j; + + for (j = esw_attr->split_count; j < esw_attr->out_count; j++, i++) + esw_setup_vport_dest(dest, flow_act, esw, esw_attr, j, i, true); + return i; +} + +static bool +esw_src_port_rewrite_supported(struct mlx5_eswitch *esw) +{ + return MLX5_CAP_GEN(esw->dev, reg_c_preserve) && + mlx5_eswitch_vport_match_metadata_enabled(esw) && + MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, ignore_flow_level); +} + +static int +esw_setup_dests(struct mlx5_flow_destination *dest, + struct mlx5_flow_act *flow_act, + struct mlx5_eswitch *esw, + struct mlx5_flow_attr *attr, + struct mlx5_flow_spec *spec, + int *i) +{ + struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; + struct mlx5_fs_chains *chains = esw_chains(esw); + int err = 0; + + if (!mlx5_eswitch_termtbl_required(esw, attr, flow_act, spec) && + esw_src_port_rewrite_supported(esw)) + attr->flags |= MLX5_ATTR_FLAG_SRC_REWRITE; + + if (attr->flags & MLX5_ATTR_FLAG_SAMPLE && + !(attr->flags & MLX5_ATTR_FLAG_SLOW_PATH)) { + esw_setup_sampler_dest(dest, flow_act, attr->sample_attr.sampler_id, *i); + (*i)++; + } else if (attr->flags & MLX5_ATTR_FLAG_SLOW_PATH) { + esw_setup_slow_path_dest(dest, flow_act, esw, *i); + (*i)++; + } else if (attr->flags & MLX5_ATTR_FLAG_ACCEPT) { + esw_setup_accept_dest(dest, flow_act, chains, *i); + (*i)++; + } else if (esw_is_indir_table(esw, attr)) { + err = esw_setup_indir_table(dest, flow_act, esw, attr, true, i); + } else if (esw_is_chain_src_port_rewrite(esw, esw_attr)) { + err = esw_setup_chain_src_port_rewrite(dest, flow_act, esw, chains, attr, i); + } else { + *i = esw_setup_vport_dests(dest, flow_act, esw, esw_attr, *i); + + if (attr->dest_ft) { + err = esw_setup_ft_dest(dest, flow_act, esw, attr, *i); + (*i)++; + } else if (attr->dest_chain) { + err = esw_setup_chain_dest(dest, flow_act, chains, attr->dest_chain, + 1, 0, *i); + (*i)++; + } + } + + return err; +} + +static void +esw_cleanup_dests(struct mlx5_eswitch *esw, + struct mlx5_flow_attr *attr) +{ + struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; + struct mlx5_fs_chains *chains = esw_chains(esw); + + if (attr->dest_ft) { + esw_cleanup_decap_indir(esw, attr); + } else if (!mlx5e_tc_attr_flags_skip(attr->flags)) { + if (attr->dest_chain) + esw_cleanup_chain_dest(chains, attr->dest_chain, 1, 0); + else if (esw_is_indir_table(esw, attr)) + esw_cleanup_indir_table(esw, attr); + else if (esw_is_chain_src_port_rewrite(esw, esw_attr)) + esw_cleanup_chain_src_port_rewrite(esw, attr); + } +} + +static void +esw_setup_meter(struct mlx5_flow_attr *attr, struct mlx5_flow_act *flow_act) +{ + struct mlx5e_flow_meter_handle *meter; + + meter = attr->meter_attr.meter; + flow_act->exe_aso.type = attr->exe_aso_type; + flow_act->exe_aso.object_id = meter->obj_id; + flow_act->exe_aso.flow_meter.meter_idx = meter->idx; + flow_act->exe_aso.flow_meter.init_color = MLX5_FLOW_METER_COLOR_GREEN; + /* use metadata reg 5 for packet color */ + flow_act->exe_aso.return_reg_id = 5; +} + +struct mlx5_flow_handle * +mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, + struct mlx5_flow_spec *spec, + struct mlx5_flow_attr *attr) +{ + struct mlx5_flow_act flow_act = { .flags = FLOW_ACT_NO_APPEND, }; + struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; + struct mlx5_fs_chains *chains = esw_chains(esw); + bool split = !!(esw_attr->split_count); + struct mlx5_vport_tbl_attr fwd_attr; + struct mlx5_flow_destination *dest; + struct mlx5_flow_handle *rule; + struct mlx5_flow_table *fdb; + int i = 0; + + if (esw->mode != MLX5_ESWITCH_OFFLOADS) + return ERR_PTR(-EOPNOTSUPP); + + dest = kcalloc(MLX5_MAX_FLOW_FWD_VPORTS + 1, sizeof(*dest), GFP_KERNEL); + if (!dest) + return ERR_PTR(-ENOMEM); + + flow_act.action = attr->action; + /* if per flow vlan pop/push is emulated, don't set that into the firmware */ + if (!mlx5_eswitch_vlan_actions_supported(esw->dev, 1)) + flow_act.action &= ~(MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH | + MLX5_FLOW_CONTEXT_ACTION_VLAN_POP); + else if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH) { + flow_act.vlan[0].ethtype = ntohs(esw_attr->vlan_proto[0]); + flow_act.vlan[0].vid = esw_attr->vlan_vid[0]; + flow_act.vlan[0].prio = esw_attr->vlan_prio[0]; + if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2) { + flow_act.vlan[1].ethtype = ntohs(esw_attr->vlan_proto[1]); + flow_act.vlan[1].vid = esw_attr->vlan_vid[1]; + flow_act.vlan[1].prio = esw_attr->vlan_prio[1]; + } + } + + mlx5_eswitch_set_rule_flow_source(esw, spec, esw_attr); + + if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) { + int err; + + err = esw_setup_dests(dest, &flow_act, esw, attr, spec, &i); + if (err) { + rule = ERR_PTR(err); + goto err_create_goto_table; + } + } + + if (esw_attr->decap_pkt_reformat) + flow_act.pkt_reformat = esw_attr->decap_pkt_reformat; + + if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { + dest[i].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; + dest[i].counter_id = mlx5_fc_id(attr->counter); + i++; + } + + if (attr->outer_match_level != MLX5_MATCH_NONE) + spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS; + if (attr->inner_match_level != MLX5_MATCH_NONE) + spec->match_criteria_enable |= MLX5_MATCH_INNER_HEADERS; + + if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) + flow_act.modify_hdr = attr->modify_hdr; + + if ((flow_act.action & MLX5_FLOW_CONTEXT_ACTION_EXECUTE_ASO) && + attr->exe_aso_type == MLX5_EXE_ASO_FLOW_METER) + esw_setup_meter(attr, &flow_act); + + if (split) { + fwd_attr.chain = attr->chain; + fwd_attr.prio = attr->prio; + fwd_attr.vport = esw_attr->in_rep->vport; + fwd_attr.vport_ns = &mlx5_esw_vport_tbl_mirror_ns; + + fdb = mlx5_esw_vporttbl_get(esw, &fwd_attr); + } else { + if (attr->chain || attr->prio) + fdb = mlx5_chains_get_table(chains, attr->chain, + attr->prio, 0); + else + fdb = attr->ft; + + if (!(attr->flags & MLX5_ATTR_FLAG_NO_IN_PORT)) + mlx5_eswitch_set_rule_source_port(esw, spec, attr, + esw_attr->in_mdev->priv.eswitch, + esw_attr->in_rep->vport); + } + if (IS_ERR(fdb)) { + rule = ERR_CAST(fdb); + goto err_esw_get; + } + + if (mlx5_eswitch_termtbl_required(esw, attr, &flow_act, spec)) + rule = mlx5_eswitch_add_termtbl_rule(esw, fdb, spec, esw_attr, + &flow_act, dest, i); + else + rule = mlx5_add_flow_rules(fdb, spec, &flow_act, dest, i); + if (IS_ERR(rule)) + goto err_add_rule; + else + atomic64_inc(&esw->offloads.num_flows); + + kfree(dest); + return rule; + +err_add_rule: + if (split) + mlx5_esw_vporttbl_put(esw, &fwd_attr); + else if (attr->chain || attr->prio) + mlx5_chains_put_table(chains, attr->chain, attr->prio, 0); +err_esw_get: + esw_cleanup_dests(esw, attr); +err_create_goto_table: + kfree(dest); + return rule; +} + +struct mlx5_flow_handle * +mlx5_eswitch_add_fwd_rule(struct mlx5_eswitch *esw, + struct mlx5_flow_spec *spec, + struct mlx5_flow_attr *attr) +{ + struct mlx5_flow_act flow_act = { .flags = FLOW_ACT_NO_APPEND, }; + struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; + struct mlx5_fs_chains *chains = esw_chains(esw); + struct mlx5_vport_tbl_attr fwd_attr; + struct mlx5_flow_destination *dest; + struct mlx5_flow_table *fast_fdb; + struct mlx5_flow_table *fwd_fdb; + struct mlx5_flow_handle *rule; + int i, err = 0; + + dest = kcalloc(MLX5_MAX_FLOW_FWD_VPORTS + 1, sizeof(*dest), GFP_KERNEL); + if (!dest) + return ERR_PTR(-ENOMEM); + + fast_fdb = mlx5_chains_get_table(chains, attr->chain, attr->prio, 0); + if (IS_ERR(fast_fdb)) { + rule = ERR_CAST(fast_fdb); + goto err_get_fast; + } + + fwd_attr.chain = attr->chain; + fwd_attr.prio = attr->prio; + fwd_attr.vport = esw_attr->in_rep->vport; + fwd_attr.vport_ns = &mlx5_esw_vport_tbl_mirror_ns; + fwd_fdb = mlx5_esw_vporttbl_get(esw, &fwd_attr); + if (IS_ERR(fwd_fdb)) { + rule = ERR_CAST(fwd_fdb); + goto err_get_fwd; + } + + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + for (i = 0; i < esw_attr->split_count; i++) { + if (esw_attr->dests[i].flags & MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE) + /* Source port rewrite (forward to ovs internal port or statck device) isn't + * supported in the rule of split action. + */ + err = -EOPNOTSUPP; + else + esw_setup_vport_dest(dest, &flow_act, esw, esw_attr, i, i, false); + + if (err) { + rule = ERR_PTR(err); + goto err_chain_src_rewrite; + } + } + dest[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dest[i].ft = fwd_fdb; + i++; + + mlx5_eswitch_set_rule_source_port(esw, spec, attr, + esw_attr->in_mdev->priv.eswitch, + esw_attr->in_rep->vport); + + if (attr->outer_match_level != MLX5_MATCH_NONE) + spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS; + + flow_act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL; + rule = mlx5_add_flow_rules(fast_fdb, spec, &flow_act, dest, i); + + if (IS_ERR(rule)) { + i = esw_attr->split_count; + goto err_chain_src_rewrite; + } + + atomic64_inc(&esw->offloads.num_flows); + + kfree(dest); + return rule; +err_chain_src_rewrite: + mlx5_esw_vporttbl_put(esw, &fwd_attr); +err_get_fwd: + mlx5_chains_put_table(chains, attr->chain, attr->prio, 0); +err_get_fast: + kfree(dest); + return rule; +} + +static void +__mlx5_eswitch_del_rule(struct mlx5_eswitch *esw, + struct mlx5_flow_handle *rule, + struct mlx5_flow_attr *attr, + bool fwd_rule) +{ + struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; + struct mlx5_fs_chains *chains = esw_chains(esw); + bool split = (esw_attr->split_count > 0); + struct mlx5_vport_tbl_attr fwd_attr; + int i; + + mlx5_del_flow_rules(rule); + + if (!mlx5e_tc_attr_flags_skip(attr->flags)) { + /* unref the term table */ + for (i = 0; i < MLX5_MAX_FLOW_FWD_VPORTS; i++) { + if (esw_attr->dests[i].termtbl) + mlx5_eswitch_termtbl_put(esw, esw_attr->dests[i].termtbl); + } + } + + atomic64_dec(&esw->offloads.num_flows); + + if (fwd_rule || split) { + fwd_attr.chain = attr->chain; + fwd_attr.prio = attr->prio; + fwd_attr.vport = esw_attr->in_rep->vport; + fwd_attr.vport_ns = &mlx5_esw_vport_tbl_mirror_ns; + } + + if (fwd_rule) { + mlx5_esw_vporttbl_put(esw, &fwd_attr); + mlx5_chains_put_table(chains, attr->chain, attr->prio, 0); + } else { + if (split) + mlx5_esw_vporttbl_put(esw, &fwd_attr); + else if (attr->chain || attr->prio) + mlx5_chains_put_table(chains, attr->chain, attr->prio, 0); + esw_cleanup_dests(esw, attr); + } +} + +void +mlx5_eswitch_del_offloaded_rule(struct mlx5_eswitch *esw, + struct mlx5_flow_handle *rule, + struct mlx5_flow_attr *attr) +{ + __mlx5_eswitch_del_rule(esw, rule, attr, false); +} + +void +mlx5_eswitch_del_fwd_rule(struct mlx5_eswitch *esw, + struct mlx5_flow_handle *rule, + struct mlx5_flow_attr *attr) +{ + __mlx5_eswitch_del_rule(esw, rule, attr, true); +} + +static int esw_set_global_vlan_pop(struct mlx5_eswitch *esw, u8 val) +{ + struct mlx5_eswitch_rep *rep; + unsigned long i; + int err = 0; + + esw_debug(esw->dev, "%s applying global %s policy\n", __func__, val ? "pop" : "none"); + mlx5_esw_for_each_host_func_vport(esw, i, rep, esw->esw_funcs.num_vfs) { + if (atomic_read(&rep->rep_data[REP_ETH].state) != REP_LOADED) + continue; + + err = __mlx5_eswitch_set_vport_vlan(esw, rep->vport, 0, 0, val); + if (err) + goto out; + } + +out: + return err; +} + +static struct mlx5_eswitch_rep * +esw_vlan_action_get_vport(struct mlx5_esw_flow_attr *attr, bool push, bool pop) +{ + struct mlx5_eswitch_rep *in_rep, *out_rep, *vport = NULL; + + in_rep = attr->in_rep; + out_rep = attr->dests[0].rep; + + if (push) + vport = in_rep; + else if (pop) + vport = out_rep; + else + vport = in_rep; + + return vport; +} + +static int esw_add_vlan_action_check(struct mlx5_esw_flow_attr *attr, + bool push, bool pop, bool fwd) +{ + struct mlx5_eswitch_rep *in_rep, *out_rep; + + if ((push || pop) && !fwd) + goto out_notsupp; + + in_rep = attr->in_rep; + out_rep = attr->dests[0].rep; + + if (push && in_rep->vport == MLX5_VPORT_UPLINK) + goto out_notsupp; + + if (pop && out_rep->vport == MLX5_VPORT_UPLINK) + goto out_notsupp; + + /* vport has vlan push configured, can't offload VF --> wire rules w.o it */ + if (!push && !pop && fwd) + if (in_rep->vlan && out_rep->vport == MLX5_VPORT_UPLINK) + goto out_notsupp; + + /* protects against (1) setting rules with different vlans to push and + * (2) setting rules w.o vlans (attr->vlan = 0) && w. vlans to push (!= 0) + */ + if (push && in_rep->vlan_refcount && (in_rep->vlan != attr->vlan_vid[0])) + goto out_notsupp; + + return 0; + +out_notsupp: + return -EOPNOTSUPP; +} + +int mlx5_eswitch_add_vlan_action(struct mlx5_eswitch *esw, + struct mlx5_flow_attr *attr) +{ + struct offloads_fdb *offloads = &esw->fdb_table.offloads; + struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; + struct mlx5_eswitch_rep *vport = NULL; + bool push, pop, fwd; + int err = 0; + + /* nop if we're on the vlan push/pop non emulation mode */ + if (mlx5_eswitch_vlan_actions_supported(esw->dev, 1)) + return 0; + + push = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH); + pop = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP); + fwd = !!((attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) && + !attr->dest_chain); + + mutex_lock(&esw->state_lock); + + err = esw_add_vlan_action_check(esw_attr, push, pop, fwd); + if (err) + goto unlock; + + attr->flags &= ~MLX5_ATTR_FLAG_VLAN_HANDLED; + + vport = esw_vlan_action_get_vport(esw_attr, push, pop); + + if (!push && !pop && fwd) { + /* tracks VF --> wire rules without vlan push action */ + if (esw_attr->dests[0].rep->vport == MLX5_VPORT_UPLINK) { + vport->vlan_refcount++; + attr->flags |= MLX5_ATTR_FLAG_VLAN_HANDLED; + } + + goto unlock; + } + + if (!push && !pop) + goto unlock; + + if (!(offloads->vlan_push_pop_refcount)) { + /* it's the 1st vlan rule, apply global vlan pop policy */ + err = esw_set_global_vlan_pop(esw, SET_VLAN_STRIP); + if (err) + goto out; + } + offloads->vlan_push_pop_refcount++; + + if (push) { + if (vport->vlan_refcount) + goto skip_set_push; + + err = __mlx5_eswitch_set_vport_vlan(esw, vport->vport, esw_attr->vlan_vid[0], + 0, SET_VLAN_INSERT | SET_VLAN_STRIP); + if (err) + goto out; + vport->vlan = esw_attr->vlan_vid[0]; +skip_set_push: + vport->vlan_refcount++; + } +out: + if (!err) + attr->flags |= MLX5_ATTR_FLAG_VLAN_HANDLED; +unlock: + mutex_unlock(&esw->state_lock); + return err; +} + +int mlx5_eswitch_del_vlan_action(struct mlx5_eswitch *esw, + struct mlx5_flow_attr *attr) +{ + struct offloads_fdb *offloads = &esw->fdb_table.offloads; + struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; + struct mlx5_eswitch_rep *vport = NULL; + bool push, pop, fwd; + int err = 0; + + /* nop if we're on the vlan push/pop non emulation mode */ + if (mlx5_eswitch_vlan_actions_supported(esw->dev, 1)) + return 0; + + if (!(attr->flags & MLX5_ATTR_FLAG_VLAN_HANDLED)) + return 0; + + push = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH); + pop = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP); + fwd = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST); + + mutex_lock(&esw->state_lock); + + vport = esw_vlan_action_get_vport(esw_attr, push, pop); + + if (!push && !pop && fwd) { + /* tracks VF --> wire rules without vlan push action */ + if (esw_attr->dests[0].rep->vport == MLX5_VPORT_UPLINK) + vport->vlan_refcount--; + + goto out; + } + + if (push) { + vport->vlan_refcount--; + if (vport->vlan_refcount) + goto skip_unset_push; + + vport->vlan = 0; + err = __mlx5_eswitch_set_vport_vlan(esw, vport->vport, + 0, 0, SET_VLAN_STRIP); + if (err) + goto out; + } + +skip_unset_push: + offloads->vlan_push_pop_refcount--; + if (offloads->vlan_push_pop_refcount) + goto out; + + /* no more vlan rules, stop global vlan pop policy */ + err = esw_set_global_vlan_pop(esw, 0); + +out: + mutex_unlock(&esw->state_lock); + return err; +} + +struct mlx5_flow_handle * +mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *on_esw, + struct mlx5_eswitch *from_esw, + struct mlx5_eswitch_rep *rep, + u32 sqn) +{ + struct mlx5_flow_act flow_act = {0}; + struct mlx5_flow_destination dest = {}; + struct mlx5_flow_handle *flow_rule; + struct mlx5_flow_spec *spec; + void *misc; + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) { + flow_rule = ERR_PTR(-ENOMEM); + goto out; + } + + misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters); + MLX5_SET(fte_match_set_misc, misc, source_sqn, sqn); + /* source vport is the esw manager */ + MLX5_SET(fte_match_set_misc, misc, source_port, from_esw->manager_vport); + if (MLX5_CAP_ESW(on_esw->dev, merged_eswitch)) + MLX5_SET(fte_match_set_misc, misc, source_eswitch_owner_vhca_id, + MLX5_CAP_GEN(from_esw->dev, vhca_id)); + + misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters); + MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_sqn); + MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port); + if (MLX5_CAP_ESW(on_esw->dev, merged_eswitch)) + MLX5_SET_TO_ONES(fte_match_set_misc, misc, + source_eswitch_owner_vhca_id); + + spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS; + dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT; + dest.vport.num = rep->vport; + dest.vport.vhca_id = MLX5_CAP_GEN(rep->esw->dev, vhca_id); + dest.vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID; + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + + if (MLX5_CAP_ESW_FLOWTABLE(on_esw->dev, flow_source) && + rep->vport == MLX5_VPORT_UPLINK) + spec->flow_context.flow_source = MLX5_FLOW_CONTEXT_FLOW_SOURCE_LOCAL_VPORT; + + flow_rule = mlx5_add_flow_rules(on_esw->fdb_table.offloads.slow_fdb, + spec, &flow_act, &dest, 1); + if (IS_ERR(flow_rule)) + esw_warn(on_esw->dev, "FDB: Failed to add send to vport rule err %ld\n", + PTR_ERR(flow_rule)); +out: + kvfree(spec); + return flow_rule; +} +EXPORT_SYMBOL(mlx5_eswitch_add_send_to_vport_rule); + +void mlx5_eswitch_del_send_to_vport_rule(struct mlx5_flow_handle *rule) +{ + mlx5_del_flow_rules(rule); +} + +void mlx5_eswitch_del_send_to_vport_meta_rule(struct mlx5_flow_handle *rule) +{ + if (rule) + mlx5_del_flow_rules(rule); +} + +struct mlx5_flow_handle * +mlx5_eswitch_add_send_to_vport_meta_rule(struct mlx5_eswitch *esw, u16 vport_num) +{ + struct mlx5_flow_destination dest = {}; + struct mlx5_flow_act flow_act = {0}; + struct mlx5_flow_handle *flow_rule; + struct mlx5_flow_spec *spec; + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) + return ERR_PTR(-ENOMEM); + + MLX5_SET(fte_match_param, spec->match_criteria, + misc_parameters_2.metadata_reg_c_0, mlx5_eswitch_get_vport_metadata_mask()); + MLX5_SET(fte_match_param, spec->match_criteria, + misc_parameters_2.metadata_reg_c_1, ESW_TUN_MASK); + MLX5_SET(fte_match_param, spec->match_value, misc_parameters_2.metadata_reg_c_1, + ESW_TUN_SLOW_TABLE_GOTO_VPORT_MARK); + + spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2; + dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT; + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + + MLX5_SET(fte_match_param, spec->match_value, misc_parameters_2.metadata_reg_c_0, + mlx5_eswitch_get_vport_metadata_for_match(esw, vport_num)); + dest.vport.num = vport_num; + + flow_rule = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb, + spec, &flow_act, &dest, 1); + if (IS_ERR(flow_rule)) + esw_warn(esw->dev, "FDB: Failed to add send to vport meta rule vport %d, err %ld\n", + vport_num, PTR_ERR(flow_rule)); + + kvfree(spec); + return flow_rule; +} + +static bool mlx5_eswitch_reg_c1_loopback_supported(struct mlx5_eswitch *esw) +{ + return MLX5_CAP_ESW_FLOWTABLE(esw->dev, fdb_to_vport_reg_c_id) & + MLX5_FDB_TO_VPORT_REG_C_1; +} + +static int esw_set_passing_vport_metadata(struct mlx5_eswitch *esw, bool enable) +{ + u32 out[MLX5_ST_SZ_DW(query_esw_vport_context_out)] = {}; + u32 min[MLX5_ST_SZ_DW(modify_esw_vport_context_in)] = {}; + u32 in[MLX5_ST_SZ_DW(query_esw_vport_context_in)] = {}; + u8 curr, wanted; + int err; + + if (!mlx5_eswitch_reg_c1_loopback_supported(esw) && + !mlx5_eswitch_vport_match_metadata_enabled(esw)) + return 0; + + MLX5_SET(query_esw_vport_context_in, in, opcode, + MLX5_CMD_OP_QUERY_ESW_VPORT_CONTEXT); + err = mlx5_cmd_exec_inout(esw->dev, query_esw_vport_context, in, out); + if (err) + return err; + + curr = MLX5_GET(query_esw_vport_context_out, out, + esw_vport_context.fdb_to_vport_reg_c_id); + wanted = MLX5_FDB_TO_VPORT_REG_C_0; + if (mlx5_eswitch_reg_c1_loopback_supported(esw)) + wanted |= MLX5_FDB_TO_VPORT_REG_C_1; + + if (enable) + curr |= wanted; + else + curr &= ~wanted; + + MLX5_SET(modify_esw_vport_context_in, min, + esw_vport_context.fdb_to_vport_reg_c_id, curr); + MLX5_SET(modify_esw_vport_context_in, min, + field_select.fdb_to_vport_reg_c_id, 1); + + err = mlx5_eswitch_modify_esw_vport_context(esw->dev, 0, false, min); + if (!err) { + if (enable && (curr & MLX5_FDB_TO_VPORT_REG_C_1)) + esw->flags |= MLX5_ESWITCH_REG_C1_LOOPBACK_ENABLED; + else + esw->flags &= ~MLX5_ESWITCH_REG_C1_LOOPBACK_ENABLED; + } + + return err; +} + +static void peer_miss_rules_setup(struct mlx5_eswitch *esw, + struct mlx5_core_dev *peer_dev, + struct mlx5_flow_spec *spec, + struct mlx5_flow_destination *dest) +{ + void *misc; + + if (mlx5_eswitch_vport_match_metadata_enabled(esw)) { + misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + misc_parameters_2); + MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0, + mlx5_eswitch_get_vport_metadata_mask()); + + spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2; + } else { + misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, + misc_parameters); + + MLX5_SET(fte_match_set_misc, misc, source_eswitch_owner_vhca_id, + MLX5_CAP_GEN(peer_dev, vhca_id)); + + spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS; + + misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + misc_parameters); + MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port); + MLX5_SET_TO_ONES(fte_match_set_misc, misc, + source_eswitch_owner_vhca_id); + } + + dest->type = MLX5_FLOW_DESTINATION_TYPE_VPORT; + dest->vport.num = peer_dev->priv.eswitch->manager_vport; + dest->vport.vhca_id = MLX5_CAP_GEN(peer_dev, vhca_id); + dest->vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID; +} + +static void esw_set_peer_miss_rule_source_port(struct mlx5_eswitch *esw, + struct mlx5_eswitch *peer_esw, + struct mlx5_flow_spec *spec, + u16 vport) +{ + void *misc; + + if (mlx5_eswitch_vport_match_metadata_enabled(esw)) { + misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, + misc_parameters_2); + MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0, + mlx5_eswitch_get_vport_metadata_for_match(peer_esw, + vport)); + } else { + misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, + misc_parameters); + MLX5_SET(fte_match_set_misc, misc, source_port, vport); + } +} + +static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw, + struct mlx5_core_dev *peer_dev) +{ + struct mlx5_flow_destination dest = {}; + struct mlx5_flow_act flow_act = {0}; + struct mlx5_flow_handle **flows; + /* total vports is the same for both e-switches */ + int nvports = esw->total_vports; + struct mlx5_flow_handle *flow; + struct mlx5_flow_spec *spec; + struct mlx5_vport *vport; + unsigned long i; + void *misc; + int err; + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) + return -ENOMEM; + + peer_miss_rules_setup(esw, peer_dev, spec, &dest); + + flows = kvcalloc(nvports, sizeof(*flows), GFP_KERNEL); + if (!flows) { + err = -ENOMEM; + goto alloc_flows_err; + } + + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, + misc_parameters); + + if (mlx5_core_is_ecpf_esw_manager(esw->dev)) { + vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_PF); + esw_set_peer_miss_rule_source_port(esw, peer_dev->priv.eswitch, + spec, MLX5_VPORT_PF); + + flow = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb, + spec, &flow_act, &dest, 1); + if (IS_ERR(flow)) { + err = PTR_ERR(flow); + goto add_pf_flow_err; + } + flows[vport->index] = flow; + } + + if (mlx5_ecpf_vport_exists(esw->dev)) { + vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_ECPF); + MLX5_SET(fte_match_set_misc, misc, source_port, MLX5_VPORT_ECPF); + flow = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb, + spec, &flow_act, &dest, 1); + if (IS_ERR(flow)) { + err = PTR_ERR(flow); + goto add_ecpf_flow_err; + } + flows[vport->index] = flow; + } + + mlx5_esw_for_each_vf_vport(esw, i, vport, mlx5_core_max_vfs(esw->dev)) { + esw_set_peer_miss_rule_source_port(esw, + peer_dev->priv.eswitch, + spec, vport->vport); + + flow = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb, + spec, &flow_act, &dest, 1); + if (IS_ERR(flow)) { + err = PTR_ERR(flow); + goto add_vf_flow_err; + } + flows[vport->index] = flow; + } + + esw->fdb_table.offloads.peer_miss_rules = flows; + + kvfree(spec); + return 0; + +add_vf_flow_err: + mlx5_esw_for_each_vf_vport(esw, i, vport, mlx5_core_max_vfs(esw->dev)) { + if (!flows[vport->index]) + continue; + mlx5_del_flow_rules(flows[vport->index]); + } + if (mlx5_ecpf_vport_exists(esw->dev)) { + vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_ECPF); + mlx5_del_flow_rules(flows[vport->index]); + } +add_ecpf_flow_err: + if (mlx5_core_is_ecpf_esw_manager(esw->dev)) { + vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_PF); + mlx5_del_flow_rules(flows[vport->index]); + } +add_pf_flow_err: + esw_warn(esw->dev, "FDB: Failed to add peer miss flow rule err %d\n", err); + kvfree(flows); +alloc_flows_err: + kvfree(spec); + return err; +} + +static void esw_del_fdb_peer_miss_rules(struct mlx5_eswitch *esw) +{ + struct mlx5_flow_handle **flows; + struct mlx5_vport *vport; + unsigned long i; + + flows = esw->fdb_table.offloads.peer_miss_rules; + + mlx5_esw_for_each_vf_vport(esw, i, vport, mlx5_core_max_vfs(esw->dev)) + mlx5_del_flow_rules(flows[vport->index]); + + if (mlx5_ecpf_vport_exists(esw->dev)) { + vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_ECPF); + mlx5_del_flow_rules(flows[vport->index]); + } + + if (mlx5_core_is_ecpf_esw_manager(esw->dev)) { + vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_PF); + mlx5_del_flow_rules(flows[vport->index]); + } + kvfree(flows); +} + +static int esw_add_fdb_miss_rule(struct mlx5_eswitch *esw) +{ + struct mlx5_flow_act flow_act = {0}; + struct mlx5_flow_destination dest = {}; + struct mlx5_flow_handle *flow_rule = NULL; + struct mlx5_flow_spec *spec; + void *headers_c; + void *headers_v; + int err = 0; + u8 *dmac_c; + u8 *dmac_v; + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) { + err = -ENOMEM; + goto out; + } + + spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + outer_headers); + dmac_c = MLX5_ADDR_OF(fte_match_param, headers_c, + outer_headers.dmac_47_16); + dmac_c[0] = 0x01; + + dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT; + dest.vport.num = esw->manager_vport; + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + + flow_rule = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb, + spec, &flow_act, &dest, 1); + if (IS_ERR(flow_rule)) { + err = PTR_ERR(flow_rule); + esw_warn(esw->dev, "FDB: Failed to add unicast miss flow rule err %d\n", err); + goto out; + } + + esw->fdb_table.offloads.miss_rule_uni = flow_rule; + + headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, + outer_headers); + dmac_v = MLX5_ADDR_OF(fte_match_param, headers_v, + outer_headers.dmac_47_16); + dmac_v[0] = 0x01; + flow_rule = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb, + spec, &flow_act, &dest, 1); + if (IS_ERR(flow_rule)) { + err = PTR_ERR(flow_rule); + esw_warn(esw->dev, "FDB: Failed to add multicast miss flow rule err %d\n", err); + mlx5_del_flow_rules(esw->fdb_table.offloads.miss_rule_uni); + goto out; + } + + esw->fdb_table.offloads.miss_rule_multi = flow_rule; + +out: + kvfree(spec); + return err; +} + +struct mlx5_flow_handle * +esw_add_restore_rule(struct mlx5_eswitch *esw, u32 tag) +{ + struct mlx5_flow_act flow_act = { .flags = FLOW_ACT_NO_APPEND, }; + struct mlx5_flow_table *ft = esw->offloads.ft_offloads_restore; + struct mlx5_flow_context *flow_context; + struct mlx5_flow_handle *flow_rule; + struct mlx5_flow_destination dest; + struct mlx5_flow_spec *spec; + void *misc; + + if (!mlx5_eswitch_reg_c1_loopback_supported(esw)) + return ERR_PTR(-EOPNOTSUPP); + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) + return ERR_PTR(-ENOMEM); + + misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + misc_parameters_2); + MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0, + ESW_REG_C0_USER_DATA_METADATA_MASK); + misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, + misc_parameters_2); + MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0, tag); + spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2; + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | + MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; + flow_act.modify_hdr = esw->offloads.restore_copy_hdr_id; + + flow_context = &spec->flow_context; + flow_context->flags |= FLOW_CONTEXT_HAS_TAG; + flow_context->flow_tag = tag; + dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dest.ft = esw->offloads.ft_offloads; + + flow_rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1); + kvfree(spec); + + if (IS_ERR(flow_rule)) + esw_warn(esw->dev, + "Failed to create restore rule for tag: %d, err(%d)\n", + tag, (int)PTR_ERR(flow_rule)); + + return flow_rule; +} + +#define MAX_PF_SQ 256 +#define MAX_SQ_NVPORTS 32 + +static void esw_set_flow_group_source_port(struct mlx5_eswitch *esw, + u32 *flow_group_in) +{ + void *match_criteria = MLX5_ADDR_OF(create_flow_group_in, + flow_group_in, + match_criteria); + + if (mlx5_eswitch_vport_match_metadata_enabled(esw)) { + MLX5_SET(create_flow_group_in, flow_group_in, + match_criteria_enable, + MLX5_MATCH_MISC_PARAMETERS_2); + + MLX5_SET(fte_match_param, match_criteria, + misc_parameters_2.metadata_reg_c_0, + mlx5_eswitch_get_vport_metadata_mask()); + } else { + MLX5_SET(create_flow_group_in, flow_group_in, + match_criteria_enable, + MLX5_MATCH_MISC_PARAMETERS); + + MLX5_SET_TO_ONES(fte_match_param, match_criteria, + misc_parameters.source_port); + } +} + +#if IS_ENABLED(CONFIG_MLX5_CLS_ACT) +static void esw_vport_tbl_put(struct mlx5_eswitch *esw) +{ + struct mlx5_vport_tbl_attr attr; + struct mlx5_vport *vport; + unsigned long i; + + attr.chain = 0; + attr.prio = 1; + mlx5_esw_for_each_vport(esw, i, vport) { + attr.vport = vport->vport; + attr.vport_ns = &mlx5_esw_vport_tbl_mirror_ns; + mlx5_esw_vporttbl_put(esw, &attr); + } +} + +static int esw_vport_tbl_get(struct mlx5_eswitch *esw) +{ + struct mlx5_vport_tbl_attr attr; + struct mlx5_flow_table *fdb; + struct mlx5_vport *vport; + unsigned long i; + + attr.chain = 0; + attr.prio = 1; + mlx5_esw_for_each_vport(esw, i, vport) { + attr.vport = vport->vport; + attr.vport_ns = &mlx5_esw_vport_tbl_mirror_ns; + fdb = mlx5_esw_vporttbl_get(esw, &attr); + if (IS_ERR(fdb)) + goto out; + } + return 0; + +out: + esw_vport_tbl_put(esw); + return PTR_ERR(fdb); +} + +#define fdb_modify_header_fwd_to_table_supported(esw) \ + (MLX5_CAP_ESW_FLOWTABLE((esw)->dev, fdb_modify_header_fwd_to_table)) +static void esw_init_chains_offload_flags(struct mlx5_eswitch *esw, u32 *flags) +{ + struct mlx5_core_dev *dev = esw->dev; + + if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev, ignore_flow_level)) + *flags |= MLX5_CHAINS_IGNORE_FLOW_LEVEL_SUPPORTED; + + if (!MLX5_CAP_ESW_FLOWTABLE(dev, multi_fdb_encap) && + esw->offloads.encap != DEVLINK_ESWITCH_ENCAP_MODE_NONE) { + *flags &= ~MLX5_CHAINS_AND_PRIOS_SUPPORTED; + esw_warn(dev, "Tc chains and priorities offload aren't supported, update firmware if needed\n"); + } else if (!mlx5_eswitch_reg_c1_loopback_enabled(esw)) { + *flags &= ~MLX5_CHAINS_AND_PRIOS_SUPPORTED; + esw_warn(dev, "Tc chains and priorities offload aren't supported\n"); + } else if (!fdb_modify_header_fwd_to_table_supported(esw)) { + /* Disabled when ttl workaround is needed, e.g + * when ESWITCH_IPV4_TTL_MODIFY_ENABLE = true in mlxconfig + */ + esw_warn(dev, + "Tc chains and priorities offload aren't supported, check firmware version, or mlxconfig settings\n"); + *flags &= ~MLX5_CHAINS_AND_PRIOS_SUPPORTED; + } else { + *flags |= MLX5_CHAINS_AND_PRIOS_SUPPORTED; + esw_info(dev, "Supported tc chains and prios offload\n"); + } + + if (esw->offloads.encap != DEVLINK_ESWITCH_ENCAP_MODE_NONE) + *flags |= MLX5_CHAINS_FT_TUNNEL_SUPPORTED; +} + +static int +esw_chains_create(struct mlx5_eswitch *esw, struct mlx5_flow_table *miss_fdb) +{ + struct mlx5_core_dev *dev = esw->dev; + struct mlx5_flow_table *nf_ft, *ft; + struct mlx5_chains_attr attr = {}; + struct mlx5_fs_chains *chains; + u32 fdb_max; + int err; + + fdb_max = 1 << MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size); + + esw_init_chains_offload_flags(esw, &attr.flags); + attr.ns = MLX5_FLOW_NAMESPACE_FDB; + attr.max_ft_sz = fdb_max; + attr.max_grp_num = esw->params.large_group_num; + attr.default_ft = miss_fdb; + attr.mapping = esw->offloads.reg_c0_obj_pool; + + chains = mlx5_chains_create(dev, &attr); + if (IS_ERR(chains)) { + err = PTR_ERR(chains); + esw_warn(dev, "Failed to create fdb chains err(%d)\n", err); + return err; + } + + esw->fdb_table.offloads.esw_chains_priv = chains; + + /* Create tc_end_ft which is the always created ft chain */ + nf_ft = mlx5_chains_get_table(chains, mlx5_chains_get_nf_ft_chain(chains), + 1, 0); + if (IS_ERR(nf_ft)) { + err = PTR_ERR(nf_ft); + goto nf_ft_err; + } + + /* Always open the root for fast path */ + ft = mlx5_chains_get_table(chains, 0, 1, 0); + if (IS_ERR(ft)) { + err = PTR_ERR(ft); + goto level_0_err; + } + + /* Open level 1 for split fdb rules now if prios isn't supported */ + if (!mlx5_chains_prios_supported(chains)) { + err = esw_vport_tbl_get(esw); + if (err) + goto level_1_err; + } + + mlx5_chains_set_end_ft(chains, nf_ft); + + return 0; + +level_1_err: + mlx5_chains_put_table(chains, 0, 1, 0); +level_0_err: + mlx5_chains_put_table(chains, mlx5_chains_get_nf_ft_chain(chains), 1, 0); +nf_ft_err: + mlx5_chains_destroy(chains); + esw->fdb_table.offloads.esw_chains_priv = NULL; + + return err; +} + +static void +esw_chains_destroy(struct mlx5_eswitch *esw, struct mlx5_fs_chains *chains) +{ + if (!mlx5_chains_prios_supported(chains)) + esw_vport_tbl_put(esw); + mlx5_chains_put_table(chains, 0, 1, 0); + mlx5_chains_put_table(chains, mlx5_chains_get_nf_ft_chain(chains), 1, 0); + mlx5_chains_destroy(chains); +} + +#else /* CONFIG_MLX5_CLS_ACT */ + +static int +esw_chains_create(struct mlx5_eswitch *esw, struct mlx5_flow_table *miss_fdb) +{ return 0; } + +static void +esw_chains_destroy(struct mlx5_eswitch *esw, struct mlx5_fs_chains *chains) +{} + +#endif + +static int +esw_create_send_to_vport_group(struct mlx5_eswitch *esw, + struct mlx5_flow_table *fdb, + u32 *flow_group_in, + int *ix) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5_flow_group *g; + void *match_criteria; + int count, err = 0; + + memset(flow_group_in, 0, inlen); + + MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, + MLX5_MATCH_MISC_PARAMETERS); + + match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, match_criteria); + + MLX5_SET_TO_ONES(fte_match_param, match_criteria, misc_parameters.source_sqn); + MLX5_SET_TO_ONES(fte_match_param, match_criteria, misc_parameters.source_port); + if (MLX5_CAP_ESW(esw->dev, merged_eswitch)) { + MLX5_SET_TO_ONES(fte_match_param, match_criteria, + misc_parameters.source_eswitch_owner_vhca_id); + MLX5_SET(create_flow_group_in, flow_group_in, + source_eswitch_owner_vhca_id_valid, 1); + } + + /* See comment at table_size calculation */ + count = MLX5_MAX_PORTS * (esw->total_vports * MAX_SQ_NVPORTS + MAX_PF_SQ); + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, *ix + count - 1); + *ix += count; + + g = mlx5_create_flow_group(fdb, flow_group_in); + if (IS_ERR(g)) { + err = PTR_ERR(g); + esw_warn(esw->dev, "Failed to create send-to-vport flow group err(%d)\n", err); + goto out; + } + esw->fdb_table.offloads.send_to_vport_grp = g; + +out: + return err; +} + +static int +esw_create_meta_send_to_vport_group(struct mlx5_eswitch *esw, + struct mlx5_flow_table *fdb, + u32 *flow_group_in, + int *ix) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5_flow_group *g; + void *match_criteria; + int err = 0; + + if (!esw_src_port_rewrite_supported(esw)) + return 0; + + memset(flow_group_in, 0, inlen); + + MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, + MLX5_MATCH_MISC_PARAMETERS_2); + + match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, match_criteria); + + MLX5_SET(fte_match_param, match_criteria, + misc_parameters_2.metadata_reg_c_0, + mlx5_eswitch_get_vport_metadata_mask()); + MLX5_SET(fte_match_param, match_criteria, + misc_parameters_2.metadata_reg_c_1, ESW_TUN_MASK); + + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, *ix); + MLX5_SET(create_flow_group_in, flow_group_in, + end_flow_index, *ix + esw->total_vports - 1); + *ix += esw->total_vports; + + g = mlx5_create_flow_group(fdb, flow_group_in); + if (IS_ERR(g)) { + err = PTR_ERR(g); + esw_warn(esw->dev, + "Failed to create send-to-vport meta flow group err(%d)\n", err); + goto send_vport_meta_err; + } + esw->fdb_table.offloads.send_to_vport_meta_grp = g; + + return 0; + +send_vport_meta_err: + return err; +} + +static int +esw_create_peer_esw_miss_group(struct mlx5_eswitch *esw, + struct mlx5_flow_table *fdb, + u32 *flow_group_in, + int *ix) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5_flow_group *g; + void *match_criteria; + int err = 0; + + if (!MLX5_CAP_ESW(esw->dev, merged_eswitch)) + return 0; + + memset(flow_group_in, 0, inlen); + + esw_set_flow_group_source_port(esw, flow_group_in); + + if (!mlx5_eswitch_vport_match_metadata_enabled(esw)) { + match_criteria = MLX5_ADDR_OF(create_flow_group_in, + flow_group_in, + match_criteria); + + MLX5_SET_TO_ONES(fte_match_param, match_criteria, + misc_parameters.source_eswitch_owner_vhca_id); + + MLX5_SET(create_flow_group_in, flow_group_in, + source_eswitch_owner_vhca_id_valid, 1); + } + + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, *ix); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, + *ix + esw->total_vports - 1); + *ix += esw->total_vports; + + g = mlx5_create_flow_group(fdb, flow_group_in); + if (IS_ERR(g)) { + err = PTR_ERR(g); + esw_warn(esw->dev, "Failed to create peer miss flow group err(%d)\n", err); + goto out; + } + esw->fdb_table.offloads.peer_miss_grp = g; + +out: + return err; +} + +static int +esw_create_miss_group(struct mlx5_eswitch *esw, + struct mlx5_flow_table *fdb, + u32 *flow_group_in, + int *ix) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5_flow_group *g; + void *match_criteria; + int err = 0; + u8 *dmac; + + memset(flow_group_in, 0, inlen); + + MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, + MLX5_MATCH_OUTER_HEADERS); + match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, + match_criteria); + dmac = MLX5_ADDR_OF(fte_match_param, match_criteria, + outer_headers.dmac_47_16); + dmac[0] = 0x01; + + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, *ix); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, + *ix + MLX5_ESW_MISS_FLOWS); + + g = mlx5_create_flow_group(fdb, flow_group_in); + if (IS_ERR(g)) { + err = PTR_ERR(g); + esw_warn(esw->dev, "Failed to create miss flow group err(%d)\n", err); + goto miss_err; + } + esw->fdb_table.offloads.miss_grp = g; + + err = esw_add_fdb_miss_rule(esw); + if (err) + goto miss_rule_err; + + return 0; + +miss_rule_err: + mlx5_destroy_flow_group(esw->fdb_table.offloads.miss_grp); +miss_err: + return err; +} + +static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5_flow_table_attr ft_attr = {}; + struct mlx5_core_dev *dev = esw->dev; + struct mlx5_flow_namespace *root_ns; + struct mlx5_flow_table *fdb = NULL; + int table_size, ix = 0, err = 0; + u32 flags = 0, *flow_group_in; + + esw_debug(esw->dev, "Create offloads FDB Tables\n"); + + flow_group_in = kvzalloc(inlen, GFP_KERNEL); + if (!flow_group_in) + return -ENOMEM; + + root_ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_FDB); + if (!root_ns) { + esw_warn(dev, "Failed to get FDB flow namespace\n"); + err = -EOPNOTSUPP; + goto ns_err; + } + esw->fdb_table.offloads.ns = root_ns; + err = mlx5_flow_namespace_set_mode(root_ns, + esw->dev->priv.steering->mode); + if (err) { + esw_warn(dev, "Failed to set FDB namespace steering mode\n"); + goto ns_err; + } + + /* To be strictly correct: + * MLX5_MAX_PORTS * (esw->total_vports * MAX_SQ_NVPORTS + MAX_PF_SQ) + * should be: + * esw->total_vports * MAX_SQ_NVPORTS + MAX_PF_SQ + + * peer_esw->total_vports * MAX_SQ_NVPORTS + MAX_PF_SQ + * but as the peer device might not be in switchdev mode it's not + * possible. We use the fact that by default FW sets max vfs and max sfs + * to the same value on both devices. If it needs to be changed in the future note + * the peer miss group should also be created based on the number of + * total vports of the peer (currently is also uses esw->total_vports). + */ + table_size = MLX5_MAX_PORTS * (esw->total_vports * MAX_SQ_NVPORTS + MAX_PF_SQ) + + esw->total_vports * 2 + MLX5_ESW_MISS_FLOWS; + + /* create the slow path fdb with encap set, so further table instances + * can be created at run time while VFs are probed if the FW allows that. + */ + if (esw->offloads.encap != DEVLINK_ESWITCH_ENCAP_MODE_NONE) + flags |= (MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT | + MLX5_FLOW_TABLE_TUNNEL_EN_DECAP); + + ft_attr.flags = flags; + ft_attr.max_fte = table_size; + ft_attr.prio = FDB_SLOW_PATH; + + fdb = mlx5_create_flow_table(root_ns, &ft_attr); + if (IS_ERR(fdb)) { + err = PTR_ERR(fdb); + esw_warn(dev, "Failed to create slow path FDB Table err %d\n", err); + goto slow_fdb_err; + } + esw->fdb_table.offloads.slow_fdb = fdb; + + /* Create empty TC-miss managed table. This allows plugging in following + * priorities without directly exposing their level 0 table to + * eswitch_offloads and passing it as miss_fdb to following call to + * esw_chains_create(). + */ + memset(&ft_attr, 0, sizeof(ft_attr)); + ft_attr.prio = FDB_TC_MISS; + esw->fdb_table.offloads.tc_miss_table = mlx5_create_flow_table(root_ns, &ft_attr); + if (IS_ERR(esw->fdb_table.offloads.tc_miss_table)) { + err = PTR_ERR(esw->fdb_table.offloads.tc_miss_table); + esw_warn(dev, "Failed to create TC miss FDB Table err %d\n", err); + goto tc_miss_table_err; + } + + err = esw_chains_create(esw, esw->fdb_table.offloads.tc_miss_table); + if (err) { + esw_warn(dev, "Failed to open fdb chains err(%d)\n", err); + goto fdb_chains_err; + } + + err = esw_create_send_to_vport_group(esw, fdb, flow_group_in, &ix); + if (err) + goto send_vport_err; + + err = esw_create_meta_send_to_vport_group(esw, fdb, flow_group_in, &ix); + if (err) + goto send_vport_meta_err; + + err = esw_create_peer_esw_miss_group(esw, fdb, flow_group_in, &ix); + if (err) + goto peer_miss_err; + + err = esw_create_miss_group(esw, fdb, flow_group_in, &ix); + if (err) + goto miss_err; + + kvfree(flow_group_in); + return 0; + +miss_err: + if (MLX5_CAP_ESW(esw->dev, merged_eswitch)) + mlx5_destroy_flow_group(esw->fdb_table.offloads.peer_miss_grp); +peer_miss_err: + if (esw->fdb_table.offloads.send_to_vport_meta_grp) + mlx5_destroy_flow_group(esw->fdb_table.offloads.send_to_vport_meta_grp); +send_vport_meta_err: + mlx5_destroy_flow_group(esw->fdb_table.offloads.send_to_vport_grp); +send_vport_err: + esw_chains_destroy(esw, esw_chains(esw)); +fdb_chains_err: + mlx5_destroy_flow_table(esw->fdb_table.offloads.tc_miss_table); +tc_miss_table_err: + mlx5_destroy_flow_table(esw->fdb_table.offloads.slow_fdb); +slow_fdb_err: + /* Holds true only as long as DMFS is the default */ + mlx5_flow_namespace_set_mode(root_ns, MLX5_FLOW_STEERING_MODE_DMFS); +ns_err: + kvfree(flow_group_in); + return err; +} + +static void esw_destroy_offloads_fdb_tables(struct mlx5_eswitch *esw) +{ + if (!esw->fdb_table.offloads.slow_fdb) + return; + + esw_debug(esw->dev, "Destroy offloads FDB Tables\n"); + mlx5_del_flow_rules(esw->fdb_table.offloads.miss_rule_multi); + mlx5_del_flow_rules(esw->fdb_table.offloads.miss_rule_uni); + mlx5_destroy_flow_group(esw->fdb_table.offloads.send_to_vport_grp); + if (esw->fdb_table.offloads.send_to_vport_meta_grp) + mlx5_destroy_flow_group(esw->fdb_table.offloads.send_to_vport_meta_grp); + if (MLX5_CAP_ESW(esw->dev, merged_eswitch)) + mlx5_destroy_flow_group(esw->fdb_table.offloads.peer_miss_grp); + mlx5_destroy_flow_group(esw->fdb_table.offloads.miss_grp); + + esw_chains_destroy(esw, esw_chains(esw)); + + mlx5_destroy_flow_table(esw->fdb_table.offloads.tc_miss_table); + mlx5_destroy_flow_table(esw->fdb_table.offloads.slow_fdb); + /* Holds true only as long as DMFS is the default */ + mlx5_flow_namespace_set_mode(esw->fdb_table.offloads.ns, + MLX5_FLOW_STEERING_MODE_DMFS); + atomic64_set(&esw->user_count, 0); +} + +static int esw_get_nr_ft_offloads_steering_src_ports(struct mlx5_eswitch *esw) +{ + int nvports; + + nvports = esw->total_vports + MLX5_ESW_MISS_FLOWS; + if (mlx5e_tc_int_port_supported(esw)) + nvports += MLX5E_TC_MAX_INT_PORT_NUM; + + return nvports; +} + +static int esw_create_offloads_table(struct mlx5_eswitch *esw) +{ + struct mlx5_flow_table_attr ft_attr = {}; + struct mlx5_core_dev *dev = esw->dev; + struct mlx5_flow_table *ft_offloads; + struct mlx5_flow_namespace *ns; + int err = 0; + + ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_OFFLOADS); + if (!ns) { + esw_warn(esw->dev, "Failed to get offloads flow namespace\n"); + return -EOPNOTSUPP; + } + + ft_attr.max_fte = esw_get_nr_ft_offloads_steering_src_ports(esw) + + MLX5_ESW_FT_OFFLOADS_DROP_RULE; + ft_attr.prio = 1; + + ft_offloads = mlx5_create_flow_table(ns, &ft_attr); + if (IS_ERR(ft_offloads)) { + err = PTR_ERR(ft_offloads); + esw_warn(esw->dev, "Failed to create offloads table, err %d\n", err); + return err; + } + + esw->offloads.ft_offloads = ft_offloads; + return 0; +} + +static void esw_destroy_offloads_table(struct mlx5_eswitch *esw) +{ + struct mlx5_esw_offload *offloads = &esw->offloads; + + mlx5_destroy_flow_table(offloads->ft_offloads); +} + +static int esw_create_vport_rx_group(struct mlx5_eswitch *esw) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5_flow_group *g; + u32 *flow_group_in; + int nvports; + int err = 0; + + nvports = esw_get_nr_ft_offloads_steering_src_ports(esw); + flow_group_in = kvzalloc(inlen, GFP_KERNEL); + if (!flow_group_in) + return -ENOMEM; + + /* create vport rx group */ + esw_set_flow_group_source_port(esw, flow_group_in); + + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, nvports - 1); + + g = mlx5_create_flow_group(esw->offloads.ft_offloads, flow_group_in); + + if (IS_ERR(g)) { + err = PTR_ERR(g); + mlx5_core_warn(esw->dev, "Failed to create vport rx group err %d\n", err); + goto out; + } + + esw->offloads.vport_rx_group = g; +out: + kvfree(flow_group_in); + return err; +} + +static void esw_destroy_vport_rx_group(struct mlx5_eswitch *esw) +{ + mlx5_destroy_flow_group(esw->offloads.vport_rx_group); +} + +static int esw_create_vport_rx_drop_rule_index(struct mlx5_eswitch *esw) +{ + /* ft_offloads table is enlarged by MLX5_ESW_FT_OFFLOADS_DROP_RULE (1) + * for the drop rule, which is placed at the end of the table. + * So return the total of vport and int_port as rule index. + */ + return esw_get_nr_ft_offloads_steering_src_ports(esw); +} + +static int esw_create_vport_rx_drop_group(struct mlx5_eswitch *esw) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5_flow_group *g; + u32 *flow_group_in; + int flow_index; + int err = 0; + + flow_index = esw_create_vport_rx_drop_rule_index(esw); + + flow_group_in = kvzalloc(inlen, GFP_KERNEL); + if (!flow_group_in) + return -ENOMEM; + + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, flow_index); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, flow_index); + + g = mlx5_create_flow_group(esw->offloads.ft_offloads, flow_group_in); + + if (IS_ERR(g)) { + err = PTR_ERR(g); + mlx5_core_warn(esw->dev, "Failed to create vport rx drop group err %d\n", err); + goto out; + } + + esw->offloads.vport_rx_drop_group = g; +out: + kvfree(flow_group_in); + return err; +} + +static void esw_destroy_vport_rx_drop_group(struct mlx5_eswitch *esw) +{ + if (esw->offloads.vport_rx_drop_group) + mlx5_destroy_flow_group(esw->offloads.vport_rx_drop_group); +} + +struct mlx5_flow_handle * +mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, u16 vport, + struct mlx5_flow_destination *dest) +{ + struct mlx5_flow_act flow_act = {0}; + struct mlx5_flow_handle *flow_rule; + struct mlx5_flow_spec *spec; + void *misc; + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) { + flow_rule = ERR_PTR(-ENOMEM); + goto out; + } + + if (mlx5_eswitch_vport_match_metadata_enabled(esw)) { + misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters_2); + MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0, + mlx5_eswitch_get_vport_metadata_for_match(esw, vport)); + + misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters_2); + MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0, + mlx5_eswitch_get_vport_metadata_mask()); + + spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2; + } else { + misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters); + MLX5_SET(fte_match_set_misc, misc, source_port, vport); + + misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters); + MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port); + + spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS; + } + + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + flow_rule = mlx5_add_flow_rules(esw->offloads.ft_offloads, spec, + &flow_act, dest, 1); + if (IS_ERR(flow_rule)) { + esw_warn(esw->dev, "fs offloads: Failed to add vport rx rule err %ld\n", PTR_ERR(flow_rule)); + goto out; + } + +out: + kvfree(spec); + return flow_rule; +} + +static int esw_create_vport_rx_drop_rule(struct mlx5_eswitch *esw) +{ + struct mlx5_flow_act flow_act = {}; + struct mlx5_flow_handle *flow_rule; + + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP; + flow_rule = mlx5_add_flow_rules(esw->offloads.ft_offloads, NULL, + &flow_act, NULL, 0); + if (IS_ERR(flow_rule)) { + esw_warn(esw->dev, + "fs offloads: Failed to add vport rx drop rule err %ld\n", + PTR_ERR(flow_rule)); + return PTR_ERR(flow_rule); + } + + esw->offloads.vport_rx_drop_rule = flow_rule; + + return 0; +} + +static void esw_destroy_vport_rx_drop_rule(struct mlx5_eswitch *esw) +{ + if (esw->offloads.vport_rx_drop_rule) + mlx5_del_flow_rules(esw->offloads.vport_rx_drop_rule); +} + +static int mlx5_eswitch_inline_mode_get(struct mlx5_eswitch *esw, u8 *mode) +{ + u8 prev_mlx5_mode, mlx5_mode = MLX5_INLINE_MODE_L2; + struct mlx5_core_dev *dev = esw->dev; + struct mlx5_vport *vport; + unsigned long i; + + if (!MLX5_CAP_GEN(dev, vport_group_manager)) + return -EOPNOTSUPP; + + if (!mlx5_esw_is_fdb_created(esw)) + return -EOPNOTSUPP; + + switch (MLX5_CAP_ETH(dev, wqe_inline_mode)) { + case MLX5_CAP_INLINE_MODE_NOT_REQUIRED: + mlx5_mode = MLX5_INLINE_MODE_NONE; + goto out; + case MLX5_CAP_INLINE_MODE_L2: + mlx5_mode = MLX5_INLINE_MODE_L2; + goto out; + case MLX5_CAP_INLINE_MODE_VPORT_CONTEXT: + goto query_vports; + } + +query_vports: + mlx5_query_nic_vport_min_inline(dev, esw->first_host_vport, &prev_mlx5_mode); + mlx5_esw_for_each_host_func_vport(esw, i, vport, esw->esw_funcs.num_vfs) { + mlx5_query_nic_vport_min_inline(dev, vport->vport, &mlx5_mode); + if (prev_mlx5_mode != mlx5_mode) + return -EINVAL; + prev_mlx5_mode = mlx5_mode; + } + +out: + *mode = mlx5_mode; + return 0; +} + +static void esw_destroy_restore_table(struct mlx5_eswitch *esw) +{ + struct mlx5_esw_offload *offloads = &esw->offloads; + + if (!mlx5_eswitch_reg_c1_loopback_supported(esw)) + return; + + mlx5_modify_header_dealloc(esw->dev, offloads->restore_copy_hdr_id); + mlx5_destroy_flow_group(offloads->restore_group); + mlx5_destroy_flow_table(offloads->ft_offloads_restore); +} + +static int esw_create_restore_table(struct mlx5_eswitch *esw) +{ + u8 modact[MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto)] = {}; + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5_flow_table_attr ft_attr = {}; + struct mlx5_core_dev *dev = esw->dev; + struct mlx5_flow_namespace *ns; + struct mlx5_modify_hdr *mod_hdr; + void *match_criteria, *misc; + struct mlx5_flow_table *ft; + struct mlx5_flow_group *g; + u32 *flow_group_in; + int err = 0; + + if (!mlx5_eswitch_reg_c1_loopback_supported(esw)) + return 0; + + ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_OFFLOADS); + if (!ns) { + esw_warn(esw->dev, "Failed to get offloads flow namespace\n"); + return -EOPNOTSUPP; + } + + flow_group_in = kvzalloc(inlen, GFP_KERNEL); + if (!flow_group_in) { + err = -ENOMEM; + goto out_free; + } + + ft_attr.max_fte = 1 << ESW_REG_C0_USER_DATA_METADATA_BITS; + ft = mlx5_create_flow_table(ns, &ft_attr); + if (IS_ERR(ft)) { + err = PTR_ERR(ft); + esw_warn(esw->dev, "Failed to create restore table, err %d\n", + err); + goto out_free; + } + + match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, + match_criteria); + misc = MLX5_ADDR_OF(fte_match_param, match_criteria, + misc_parameters_2); + + MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0, + ESW_REG_C0_USER_DATA_METADATA_MASK); + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, + ft_attr.max_fte - 1); + MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, + MLX5_MATCH_MISC_PARAMETERS_2); + g = mlx5_create_flow_group(ft, flow_group_in); + if (IS_ERR(g)) { + err = PTR_ERR(g); + esw_warn(dev, "Failed to create restore flow group, err: %d\n", + err); + goto err_group; + } + + MLX5_SET(copy_action_in, modact, action_type, MLX5_ACTION_TYPE_COPY); + MLX5_SET(copy_action_in, modact, src_field, + MLX5_ACTION_IN_FIELD_METADATA_REG_C_1); + MLX5_SET(copy_action_in, modact, dst_field, + MLX5_ACTION_IN_FIELD_METADATA_REG_B); + mod_hdr = mlx5_modify_header_alloc(esw->dev, + MLX5_FLOW_NAMESPACE_KERNEL, 1, + modact); + if (IS_ERR(mod_hdr)) { + err = PTR_ERR(mod_hdr); + esw_warn(dev, "Failed to create restore mod header, err: %d\n", + err); + goto err_mod_hdr; + } + + esw->offloads.ft_offloads_restore = ft; + esw->offloads.restore_group = g; + esw->offloads.restore_copy_hdr_id = mod_hdr; + + kvfree(flow_group_in); + + return 0; + +err_mod_hdr: + mlx5_destroy_flow_group(g); +err_group: + mlx5_destroy_flow_table(ft); +out_free: + kvfree(flow_group_in); + + return err; +} + +static int esw_offloads_start(struct mlx5_eswitch *esw, + struct netlink_ext_ack *extack) +{ + int err; + + esw->mode = MLX5_ESWITCH_OFFLOADS; + err = mlx5_eswitch_enable_locked(esw, esw->dev->priv.sriov.num_vfs); + if (err) { + NL_SET_ERR_MSG_MOD(extack, + "Failed setting eswitch to offloads"); + esw->mode = MLX5_ESWITCH_LEGACY; + mlx5_rescan_drivers(esw->dev); + } + if (esw->offloads.inline_mode == MLX5_INLINE_MODE_NONE) { + if (mlx5_eswitch_inline_mode_get(esw, + &esw->offloads.inline_mode)) { + esw->offloads.inline_mode = MLX5_INLINE_MODE_L2; + NL_SET_ERR_MSG_MOD(extack, + "Inline mode is different between vports"); + } + } + return err; +} + +static void mlx5_esw_offloads_rep_mark_set(struct mlx5_eswitch *esw, + struct mlx5_eswitch_rep *rep, + xa_mark_t mark) +{ + bool mark_set; + + /* Copy the mark from vport to its rep */ + mark_set = xa_get_mark(&esw->vports, rep->vport, mark); + if (mark_set) + xa_set_mark(&esw->offloads.vport_reps, rep->vport, mark); +} + +static int mlx5_esw_offloads_rep_init(struct mlx5_eswitch *esw, const struct mlx5_vport *vport) +{ + struct mlx5_eswitch_rep *rep; + int rep_type; + int err; + + rep = kzalloc(sizeof(*rep), GFP_KERNEL); + if (!rep) + return -ENOMEM; + + rep->vport = vport->vport; + rep->vport_index = vport->index; + for (rep_type = 0; rep_type < NUM_REP_TYPES; rep_type++) + atomic_set(&rep->rep_data[rep_type].state, REP_UNREGISTERED); + + err = xa_insert(&esw->offloads.vport_reps, rep->vport, rep, GFP_KERNEL); + if (err) + goto insert_err; + + mlx5_esw_offloads_rep_mark_set(esw, rep, MLX5_ESW_VPT_HOST_FN); + mlx5_esw_offloads_rep_mark_set(esw, rep, MLX5_ESW_VPT_VF); + mlx5_esw_offloads_rep_mark_set(esw, rep, MLX5_ESW_VPT_SF); + return 0; + +insert_err: + kfree(rep); + return err; +} + +static void mlx5_esw_offloads_rep_cleanup(struct mlx5_eswitch *esw, + struct mlx5_eswitch_rep *rep) +{ + xa_erase(&esw->offloads.vport_reps, rep->vport); + kfree(rep); +} + +void esw_offloads_cleanup_reps(struct mlx5_eswitch *esw) +{ + struct mlx5_eswitch_rep *rep; + unsigned long i; + + mlx5_esw_for_each_rep(esw, i, rep) + mlx5_esw_offloads_rep_cleanup(esw, rep); + xa_destroy(&esw->offloads.vport_reps); +} + +int esw_offloads_init_reps(struct mlx5_eswitch *esw) +{ + struct mlx5_vport *vport; + unsigned long i; + int err; + + xa_init(&esw->offloads.vport_reps); + + mlx5_esw_for_each_vport(esw, i, vport) { + err = mlx5_esw_offloads_rep_init(esw, vport); + if (err) + goto err; + } + return 0; + +err: + esw_offloads_cleanup_reps(esw); + return err; +} + +static void __esw_offloads_unload_rep(struct mlx5_eswitch *esw, + struct mlx5_eswitch_rep *rep, u8 rep_type) +{ + if (atomic_cmpxchg(&rep->rep_data[rep_type].state, + REP_LOADED, REP_REGISTERED) == REP_LOADED) + esw->offloads.rep_ops[rep_type]->unload(rep); +} + +static void __unload_reps_sf_vport(struct mlx5_eswitch *esw, u8 rep_type) +{ + struct mlx5_eswitch_rep *rep; + unsigned long i; + + mlx5_esw_for_each_sf_rep(esw, i, rep) + __esw_offloads_unload_rep(esw, rep, rep_type); +} + +static void __unload_reps_all_vport(struct mlx5_eswitch *esw, u8 rep_type) +{ + struct mlx5_eswitch_rep *rep; + unsigned long i; + + __unload_reps_sf_vport(esw, rep_type); + + mlx5_esw_for_each_vf_rep(esw, i, rep) + __esw_offloads_unload_rep(esw, rep, rep_type); + + if (mlx5_ecpf_vport_exists(esw->dev)) { + rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_ECPF); + __esw_offloads_unload_rep(esw, rep, rep_type); + } + + if (mlx5_core_is_ecpf_esw_manager(esw->dev)) { + rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_PF); + __esw_offloads_unload_rep(esw, rep, rep_type); + } + + rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_UPLINK); + __esw_offloads_unload_rep(esw, rep, rep_type); +} + +int mlx5_esw_offloads_rep_load(struct mlx5_eswitch *esw, u16 vport_num) +{ + struct mlx5_eswitch_rep *rep; + int rep_type; + int err; + + rep = mlx5_eswitch_get_rep(esw, vport_num); + for (rep_type = 0; rep_type < NUM_REP_TYPES; rep_type++) + if (atomic_cmpxchg(&rep->rep_data[rep_type].state, + REP_REGISTERED, REP_LOADED) == REP_REGISTERED) { + err = esw->offloads.rep_ops[rep_type]->load(esw->dev, rep); + if (err) + goto err_reps; + } + + return 0; + +err_reps: + atomic_set(&rep->rep_data[rep_type].state, REP_REGISTERED); + for (--rep_type; rep_type >= 0; rep_type--) + __esw_offloads_unload_rep(esw, rep, rep_type); + return err; +} + +void mlx5_esw_offloads_rep_unload(struct mlx5_eswitch *esw, u16 vport_num) +{ + struct mlx5_eswitch_rep *rep; + int rep_type; + + rep = mlx5_eswitch_get_rep(esw, vport_num); + for (rep_type = NUM_REP_TYPES - 1; rep_type >= 0; rep_type--) + __esw_offloads_unload_rep(esw, rep, rep_type); +} + +int esw_offloads_load_rep(struct mlx5_eswitch *esw, u16 vport_num) +{ + int err; + + if (esw->mode != MLX5_ESWITCH_OFFLOADS) + return 0; + + if (vport_num != MLX5_VPORT_UPLINK) { + err = mlx5_esw_offloads_devlink_port_register(esw, vport_num); + if (err) + return err; + } + + err = mlx5_esw_offloads_rep_load(esw, vport_num); + if (err) + goto load_err; + return err; + +load_err: + if (vport_num != MLX5_VPORT_UPLINK) + mlx5_esw_offloads_devlink_port_unregister(esw, vport_num); + return err; +} + +void esw_offloads_unload_rep(struct mlx5_eswitch *esw, u16 vport_num) +{ + if (esw->mode != MLX5_ESWITCH_OFFLOADS) + return; + + mlx5_esw_offloads_rep_unload(esw, vport_num); + + if (vport_num != MLX5_VPORT_UPLINK) + mlx5_esw_offloads_devlink_port_unregister(esw, vport_num); +} + +static int esw_set_slave_root_fdb(struct mlx5_core_dev *master, + struct mlx5_core_dev *slave) +{ + u32 in[MLX5_ST_SZ_DW(set_flow_table_root_in)] = {}; + u32 out[MLX5_ST_SZ_DW(set_flow_table_root_out)] = {}; + struct mlx5_flow_root_namespace *root; + struct mlx5_flow_namespace *ns; + int err; + + MLX5_SET(set_flow_table_root_in, in, opcode, + MLX5_CMD_OP_SET_FLOW_TABLE_ROOT); + MLX5_SET(set_flow_table_root_in, in, table_type, + FS_FT_FDB); + + if (master) { + ns = mlx5_get_flow_namespace(master, + MLX5_FLOW_NAMESPACE_FDB); + root = find_root(&ns->node); + mutex_lock(&root->chain_lock); + MLX5_SET(set_flow_table_root_in, in, + table_eswitch_owner_vhca_id_valid, 1); + MLX5_SET(set_flow_table_root_in, in, + table_eswitch_owner_vhca_id, + MLX5_CAP_GEN(master, vhca_id)); + MLX5_SET(set_flow_table_root_in, in, table_id, + root->root_ft->id); + } else { + ns = mlx5_get_flow_namespace(slave, + MLX5_FLOW_NAMESPACE_FDB); + root = find_root(&ns->node); + mutex_lock(&root->chain_lock); + MLX5_SET(set_flow_table_root_in, in, table_id, + root->root_ft->id); + } + + err = mlx5_cmd_exec(slave, in, sizeof(in), out, sizeof(out)); + mutex_unlock(&root->chain_lock); + + return err; +} + +static int __esw_set_master_egress_rule(struct mlx5_core_dev *master, + struct mlx5_core_dev *slave, + struct mlx5_vport *vport, + struct mlx5_flow_table *acl) +{ + struct mlx5_flow_handle *flow_rule = NULL; + struct mlx5_flow_destination dest = {}; + struct mlx5_flow_act flow_act = {}; + struct mlx5_flow_spec *spec; + int err = 0; + void *misc; + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) + return -ENOMEM; + + spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS; + misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, + misc_parameters); + MLX5_SET(fte_match_set_misc, misc, source_port, MLX5_VPORT_UPLINK); + MLX5_SET(fte_match_set_misc, misc, source_eswitch_owner_vhca_id, + MLX5_CAP_GEN(slave, vhca_id)); + + misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters); + MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port); + MLX5_SET_TO_ONES(fte_match_set_misc, misc, + source_eswitch_owner_vhca_id); + + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT; + dest.vport.num = slave->priv.eswitch->manager_vport; + dest.vport.vhca_id = MLX5_CAP_GEN(slave, vhca_id); + dest.vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID; + + flow_rule = mlx5_add_flow_rules(acl, spec, &flow_act, + &dest, 1); + if (IS_ERR(flow_rule)) + err = PTR_ERR(flow_rule); + else + vport->egress.offloads.bounce_rule = flow_rule; + + kvfree(spec); + return err; +} + +static int esw_set_master_egress_rule(struct mlx5_core_dev *master, + struct mlx5_core_dev *slave) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5_eswitch *esw = master->priv.eswitch; + struct mlx5_flow_table_attr ft_attr = { + .max_fte = 1, .prio = 0, .level = 0, + .flags = MLX5_FLOW_TABLE_OTHER_VPORT, + }; + struct mlx5_flow_namespace *egress_ns; + struct mlx5_flow_table *acl; + struct mlx5_flow_group *g; + struct mlx5_vport *vport; + void *match_criteria; + u32 *flow_group_in; + int err; + + vport = mlx5_eswitch_get_vport(esw, esw->manager_vport); + if (IS_ERR(vport)) + return PTR_ERR(vport); + + egress_ns = mlx5_get_flow_vport_acl_namespace(master, + MLX5_FLOW_NAMESPACE_ESW_EGRESS, + vport->index); + if (!egress_ns) + return -EINVAL; + + if (vport->egress.acl) + return -EINVAL; + + flow_group_in = kvzalloc(inlen, GFP_KERNEL); + if (!flow_group_in) + return -ENOMEM; + + acl = mlx5_create_vport_flow_table(egress_ns, &ft_attr, vport->vport); + if (IS_ERR(acl)) { + err = PTR_ERR(acl); + goto out; + } + + match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, + match_criteria); + MLX5_SET_TO_ONES(fte_match_param, match_criteria, + misc_parameters.source_port); + MLX5_SET_TO_ONES(fte_match_param, match_criteria, + misc_parameters.source_eswitch_owner_vhca_id); + MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, + MLX5_MATCH_MISC_PARAMETERS); + + MLX5_SET(create_flow_group_in, flow_group_in, + source_eswitch_owner_vhca_id_valid, 1); + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 0); + + g = mlx5_create_flow_group(acl, flow_group_in); + if (IS_ERR(g)) { + err = PTR_ERR(g); + goto err_group; + } + + err = __esw_set_master_egress_rule(master, slave, vport, acl); + if (err) + goto err_rule; + + vport->egress.acl = acl; + vport->egress.offloads.bounce_grp = g; + + kvfree(flow_group_in); + + return 0; + +err_rule: + mlx5_destroy_flow_group(g); +err_group: + mlx5_destroy_flow_table(acl); +out: + kvfree(flow_group_in); + return err; +} + +static void esw_unset_master_egress_rule(struct mlx5_core_dev *dev) +{ + struct mlx5_vport *vport; + + vport = mlx5_eswitch_get_vport(dev->priv.eswitch, + dev->priv.eswitch->manager_vport); + + esw_acl_egress_ofld_cleanup(vport); +} + +int mlx5_eswitch_offloads_config_single_fdb(struct mlx5_eswitch *master_esw, + struct mlx5_eswitch *slave_esw) +{ + int err; + + err = esw_set_slave_root_fdb(master_esw->dev, + slave_esw->dev); + if (err) + return err; + + err = esw_set_master_egress_rule(master_esw->dev, + slave_esw->dev); + if (err) + goto err_acl; + + return err; + +err_acl: + esw_set_slave_root_fdb(NULL, slave_esw->dev); + + return err; +} + +void mlx5_eswitch_offloads_destroy_single_fdb(struct mlx5_eswitch *master_esw, + struct mlx5_eswitch *slave_esw) +{ + esw_unset_master_egress_rule(master_esw->dev); + esw_set_slave_root_fdb(NULL, slave_esw->dev); +} + +#define ESW_OFFLOADS_DEVCOM_PAIR (0) +#define ESW_OFFLOADS_DEVCOM_UNPAIR (1) + +static void mlx5_esw_offloads_rep_event_unpair(struct mlx5_eswitch *esw) +{ + const struct mlx5_eswitch_rep_ops *ops; + struct mlx5_eswitch_rep *rep; + unsigned long i; + u8 rep_type; + + mlx5_esw_for_each_rep(esw, i, rep) { + rep_type = NUM_REP_TYPES; + while (rep_type--) { + ops = esw->offloads.rep_ops[rep_type]; + if (atomic_read(&rep->rep_data[rep_type].state) == REP_LOADED && + ops->event) + ops->event(esw, rep, MLX5_SWITCHDEV_EVENT_UNPAIR, NULL); + } + } +} + +static void mlx5_esw_offloads_unpair(struct mlx5_eswitch *esw) +{ +#if IS_ENABLED(CONFIG_MLX5_CLS_ACT) + mlx5e_tc_clean_fdb_peer_flows(esw); +#endif + mlx5_esw_offloads_rep_event_unpair(esw); + esw_del_fdb_peer_miss_rules(esw); +} + +static int mlx5_esw_offloads_pair(struct mlx5_eswitch *esw, + struct mlx5_eswitch *peer_esw) +{ + const struct mlx5_eswitch_rep_ops *ops; + struct mlx5_eswitch_rep *rep; + unsigned long i; + u8 rep_type; + int err; + + err = esw_add_fdb_peer_miss_rules(esw, peer_esw->dev); + if (err) + return err; + + mlx5_esw_for_each_rep(esw, i, rep) { + for (rep_type = 0; rep_type < NUM_REP_TYPES; rep_type++) { + ops = esw->offloads.rep_ops[rep_type]; + if (atomic_read(&rep->rep_data[rep_type].state) == REP_LOADED && + ops->event) { + err = ops->event(esw, rep, MLX5_SWITCHDEV_EVENT_PAIR, peer_esw); + if (err) + goto err_out; + } + } + } + + return 0; + +err_out: + mlx5_esw_offloads_unpair(esw); + return err; +} + +static int mlx5_esw_offloads_set_ns_peer(struct mlx5_eswitch *esw, + struct mlx5_eswitch *peer_esw, + bool pair) +{ + struct mlx5_flow_root_namespace *peer_ns; + struct mlx5_flow_root_namespace *ns; + int err; + + peer_ns = peer_esw->dev->priv.steering->fdb_root_ns; + ns = esw->dev->priv.steering->fdb_root_ns; + + if (pair) { + err = mlx5_flow_namespace_set_peer(ns, peer_ns); + if (err) + return err; + + err = mlx5_flow_namespace_set_peer(peer_ns, ns); + if (err) { + mlx5_flow_namespace_set_peer(ns, NULL); + return err; + } + } else { + mlx5_flow_namespace_set_peer(ns, NULL); + mlx5_flow_namespace_set_peer(peer_ns, NULL); + } + + return 0; +} + +static int mlx5_esw_offloads_devcom_event(int event, + void *my_data, + void *event_data) +{ + struct mlx5_eswitch *esw = my_data; + struct mlx5_devcom *devcom = esw->dev->priv.devcom; + struct mlx5_eswitch *peer_esw = event_data; + int err; + + switch (event) { + case ESW_OFFLOADS_DEVCOM_PAIR: + if (mlx5_eswitch_vport_match_metadata_enabled(esw) != + mlx5_eswitch_vport_match_metadata_enabled(peer_esw)) + break; + + if (esw->paired[mlx5_get_dev_index(peer_esw->dev)]) + break; + + err = mlx5_esw_offloads_set_ns_peer(esw, peer_esw, true); + if (err) + goto err_out; + err = mlx5_esw_offloads_pair(esw, peer_esw); + if (err) + goto err_peer; + + err = mlx5_esw_offloads_pair(peer_esw, esw); + if (err) + goto err_pair; + + esw->paired[mlx5_get_dev_index(peer_esw->dev)] = true; + peer_esw->paired[mlx5_get_dev_index(esw->dev)] = true; + mlx5_devcom_set_paired(devcom, MLX5_DEVCOM_ESW_OFFLOADS, true); + break; + + case ESW_OFFLOADS_DEVCOM_UNPAIR: + if (!esw->paired[mlx5_get_dev_index(peer_esw->dev)]) + break; + + mlx5_devcom_set_paired(devcom, MLX5_DEVCOM_ESW_OFFLOADS, false); + esw->paired[mlx5_get_dev_index(peer_esw->dev)] = false; + peer_esw->paired[mlx5_get_dev_index(esw->dev)] = false; + mlx5_esw_offloads_unpair(peer_esw); + mlx5_esw_offloads_unpair(esw); + mlx5_esw_offloads_set_ns_peer(esw, peer_esw, false); + break; + } + + return 0; + +err_pair: + mlx5_esw_offloads_unpair(esw); +err_peer: + mlx5_esw_offloads_set_ns_peer(esw, peer_esw, false); +err_out: + mlx5_core_err(esw->dev, "esw offloads devcom event failure, event %u err %d", + event, err); + return err; +} + +void mlx5_esw_offloads_devcom_init(struct mlx5_eswitch *esw) +{ + struct mlx5_devcom *devcom = esw->dev->priv.devcom; + + INIT_LIST_HEAD(&esw->offloads.peer_flows); + mutex_init(&esw->offloads.peer_mutex); + + if (!MLX5_CAP_ESW(esw->dev, merged_eswitch)) + return; + + if (!mlx5_is_lag_supported(esw->dev)) + return; + + mlx5_devcom_register_component(devcom, + MLX5_DEVCOM_ESW_OFFLOADS, + mlx5_esw_offloads_devcom_event, + esw); + + mlx5_devcom_send_event(devcom, + MLX5_DEVCOM_ESW_OFFLOADS, + ESW_OFFLOADS_DEVCOM_PAIR, esw); +} + +void mlx5_esw_offloads_devcom_cleanup(struct mlx5_eswitch *esw) +{ + struct mlx5_devcom *devcom = esw->dev->priv.devcom; + + if (!MLX5_CAP_ESW(esw->dev, merged_eswitch)) + return; + + if (!mlx5_is_lag_supported(esw->dev)) + return; + + mlx5_devcom_send_event(devcom, MLX5_DEVCOM_ESW_OFFLOADS, + ESW_OFFLOADS_DEVCOM_UNPAIR, esw); + + mlx5_devcom_unregister_component(devcom, MLX5_DEVCOM_ESW_OFFLOADS); +} + +bool mlx5_esw_vport_match_metadata_supported(const struct mlx5_eswitch *esw) +{ + if (!MLX5_CAP_ESW(esw->dev, esw_uplink_ingress_acl)) + return false; + + if (!(MLX5_CAP_ESW_FLOWTABLE(esw->dev, fdb_to_vport_reg_c_id) & + MLX5_FDB_TO_VPORT_REG_C_0)) + return false; + + if (!MLX5_CAP_ESW_FLOWTABLE(esw->dev, flow_source)) + return false; + + return true; +} + +#define MLX5_ESW_METADATA_RSVD_UPLINK 1 + +/* Share the same metadata for uplink's. This is fine because: + * (a) In shared FDB mode (LAG) both uplink's are treated the + * same and tagged with the same metadata. + * (b) In non shared FDB mode, packets from physical port0 + * cannot hit eswitch of PF1 and vice versa. + */ +static u32 mlx5_esw_match_metadata_reserved(struct mlx5_eswitch *esw) +{ + return MLX5_ESW_METADATA_RSVD_UPLINK; +} + +u32 mlx5_esw_match_metadata_alloc(struct mlx5_eswitch *esw) +{ + u32 vport_end_ida = (1 << ESW_VPORT_BITS) - 1; + /* Reserve 0xf for internal port offload */ + u32 max_pf_num = (1 << ESW_PFNUM_BITS) - 2; + u32 pf_num; + int id; + + /* Only 4 bits of pf_num */ + pf_num = mlx5_get_dev_index(esw->dev); + if (pf_num > max_pf_num) + return 0; + + /* Metadata is 4 bits of PFNUM and 12 bits of unique id */ + /* Use only non-zero vport_id (2-4095) for all PF's */ + id = ida_alloc_range(&esw->offloads.vport_metadata_ida, + MLX5_ESW_METADATA_RSVD_UPLINK + 1, + vport_end_ida, GFP_KERNEL); + if (id < 0) + return 0; + id = (pf_num << ESW_VPORT_BITS) | id; + return id; +} + +void mlx5_esw_match_metadata_free(struct mlx5_eswitch *esw, u32 metadata) +{ + u32 vport_bit_mask = (1 << ESW_VPORT_BITS) - 1; + + /* Metadata contains only 12 bits of actual ida id */ + ida_free(&esw->offloads.vport_metadata_ida, metadata & vport_bit_mask); +} + +static int esw_offloads_vport_metadata_setup(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) +{ + if (vport->vport == MLX5_VPORT_UPLINK) + vport->default_metadata = mlx5_esw_match_metadata_reserved(esw); + else + vport->default_metadata = mlx5_esw_match_metadata_alloc(esw); + + vport->metadata = vport->default_metadata; + return vport->metadata ? 0 : -ENOSPC; +} + +static void esw_offloads_vport_metadata_cleanup(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) +{ + if (!vport->default_metadata) + return; + + if (vport->vport == MLX5_VPORT_UPLINK) + return; + + WARN_ON(vport->metadata != vport->default_metadata); + mlx5_esw_match_metadata_free(esw, vport->default_metadata); +} + +static void esw_offloads_metadata_uninit(struct mlx5_eswitch *esw) +{ + struct mlx5_vport *vport; + unsigned long i; + + if (!mlx5_eswitch_vport_match_metadata_enabled(esw)) + return; + + mlx5_esw_for_each_vport(esw, i, vport) + esw_offloads_vport_metadata_cleanup(esw, vport); +} + +static int esw_offloads_metadata_init(struct mlx5_eswitch *esw) +{ + struct mlx5_vport *vport; + unsigned long i; + int err; + + if (!mlx5_eswitch_vport_match_metadata_enabled(esw)) + return 0; + + mlx5_esw_for_each_vport(esw, i, vport) { + err = esw_offloads_vport_metadata_setup(esw, vport); + if (err) + goto metadata_err; + } + + return 0; + +metadata_err: + esw_offloads_metadata_uninit(esw); + return err; +} + +int mlx5_esw_offloads_vport_metadata_set(struct mlx5_eswitch *esw, bool enable) +{ + int err = 0; + + down_write(&esw->mode_lock); + if (mlx5_esw_is_fdb_created(esw)) { + err = -EBUSY; + goto done; + } + if (!mlx5_esw_vport_match_metadata_supported(esw)) { + err = -EOPNOTSUPP; + goto done; + } + if (enable) + esw->flags |= MLX5_ESWITCH_VPORT_MATCH_METADATA; + else + esw->flags &= ~MLX5_ESWITCH_VPORT_MATCH_METADATA; +done: + up_write(&esw->mode_lock); + return err; +} + +int +esw_vport_create_offloads_acl_tables(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) +{ + int err; + + err = esw_acl_ingress_ofld_setup(esw, vport); + if (err) + return err; + + err = esw_acl_egress_ofld_setup(esw, vport); + if (err) + goto egress_err; + + return 0; + +egress_err: + esw_acl_ingress_ofld_cleanup(esw, vport); + return err; +} + +void +esw_vport_destroy_offloads_acl_tables(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) +{ + esw_acl_egress_ofld_cleanup(vport); + esw_acl_ingress_ofld_cleanup(esw, vport); +} + +static int esw_create_uplink_offloads_acl_tables(struct mlx5_eswitch *esw) +{ + struct mlx5_vport *vport; + + vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_UPLINK); + if (IS_ERR(vport)) + return PTR_ERR(vport); + + return esw_vport_create_offloads_acl_tables(esw, vport); +} + +static void esw_destroy_uplink_offloads_acl_tables(struct mlx5_eswitch *esw) +{ + struct mlx5_vport *vport; + + vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_UPLINK); + if (IS_ERR(vport)) + return; + + esw_vport_destroy_offloads_acl_tables(esw, vport); +} + +int mlx5_eswitch_reload_reps(struct mlx5_eswitch *esw) +{ + struct mlx5_eswitch_rep *rep; + unsigned long i; + int ret; + + if (!esw || esw->mode != MLX5_ESWITCH_OFFLOADS) + return 0; + + rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_UPLINK); + if (atomic_read(&rep->rep_data[REP_ETH].state) != REP_LOADED) + return 0; + + ret = mlx5_esw_offloads_rep_load(esw, MLX5_VPORT_UPLINK); + if (ret) + return ret; + + mlx5_esw_for_each_rep(esw, i, rep) { + if (atomic_read(&rep->rep_data[REP_ETH].state) == REP_LOADED) + mlx5_esw_offloads_rep_load(esw, rep->vport); + } + + return 0; +} + +static int esw_offloads_steering_init(struct mlx5_eswitch *esw) +{ + struct mlx5_esw_indir_table *indir; + int err; + + memset(&esw->fdb_table.offloads, 0, sizeof(struct offloads_fdb)); + mutex_init(&esw->fdb_table.offloads.vports.lock); + hash_init(esw->fdb_table.offloads.vports.table); + atomic64_set(&esw->user_count, 0); + + indir = mlx5_esw_indir_table_init(); + if (IS_ERR(indir)) { + err = PTR_ERR(indir); + goto create_indir_err; + } + esw->fdb_table.offloads.indir = indir; + + err = esw_create_uplink_offloads_acl_tables(esw); + if (err) + goto create_acl_err; + + err = esw_create_offloads_table(esw); + if (err) + goto create_offloads_err; + + err = esw_create_restore_table(esw); + if (err) + goto create_restore_err; + + err = esw_create_offloads_fdb_tables(esw); + if (err) + goto create_fdb_err; + + err = esw_create_vport_rx_group(esw); + if (err) + goto create_fg_err; + + err = esw_create_vport_rx_drop_group(esw); + if (err) + goto create_rx_drop_fg_err; + + err = esw_create_vport_rx_drop_rule(esw); + if (err) + goto create_rx_drop_rule_err; + + return 0; + +create_rx_drop_rule_err: + esw_destroy_vport_rx_drop_group(esw); +create_rx_drop_fg_err: + esw_destroy_vport_rx_group(esw); +create_fg_err: + esw_destroy_offloads_fdb_tables(esw); +create_fdb_err: + esw_destroy_restore_table(esw); +create_restore_err: + esw_destroy_offloads_table(esw); +create_offloads_err: + esw_destroy_uplink_offloads_acl_tables(esw); +create_acl_err: + mlx5_esw_indir_table_destroy(esw->fdb_table.offloads.indir); +create_indir_err: + mutex_destroy(&esw->fdb_table.offloads.vports.lock); + return err; +} + +static void esw_offloads_steering_cleanup(struct mlx5_eswitch *esw) +{ + esw_destroy_vport_rx_drop_rule(esw); + esw_destroy_vport_rx_drop_group(esw); + esw_destroy_vport_rx_group(esw); + esw_destroy_offloads_fdb_tables(esw); + esw_destroy_restore_table(esw); + esw_destroy_offloads_table(esw); + esw_destroy_uplink_offloads_acl_tables(esw); + mlx5_esw_indir_table_destroy(esw->fdb_table.offloads.indir); + mutex_destroy(&esw->fdb_table.offloads.vports.lock); +} + +static void +esw_vfs_changed_event_handler(struct mlx5_eswitch *esw, const u32 *out) +{ + struct devlink *devlink; + bool host_pf_disabled; + u16 new_num_vfs; + + new_num_vfs = MLX5_GET(query_esw_functions_out, out, + host_params_context.host_num_of_vfs); + host_pf_disabled = MLX5_GET(query_esw_functions_out, out, + host_params_context.host_pf_disabled); + + if (new_num_vfs == esw->esw_funcs.num_vfs || host_pf_disabled) + return; + + devlink = priv_to_devlink(esw->dev); + devl_lock(devlink); + /* Number of VFs can only change from "0 to x" or "x to 0". */ + if (esw->esw_funcs.num_vfs > 0) { + mlx5_eswitch_unload_vf_vports(esw, esw->esw_funcs.num_vfs); + } else { + int err; + + err = mlx5_eswitch_load_vf_vports(esw, new_num_vfs, + MLX5_VPORT_UC_ADDR_CHANGE); + if (err) { + devl_unlock(devlink); + return; + } + } + esw->esw_funcs.num_vfs = new_num_vfs; + devl_unlock(devlink); +} + +static void esw_functions_changed_event_handler(struct work_struct *work) +{ + struct mlx5_host_work *host_work; + struct mlx5_eswitch *esw; + const u32 *out; + + host_work = container_of(work, struct mlx5_host_work, work); + esw = host_work->esw; + + out = mlx5_esw_query_functions(esw->dev); + if (IS_ERR(out)) + goto out; + + esw_vfs_changed_event_handler(esw, out); + kvfree(out); +out: + kfree(host_work); +} + +int mlx5_esw_funcs_changed_handler(struct notifier_block *nb, unsigned long type, void *data) +{ + struct mlx5_esw_functions *esw_funcs; + struct mlx5_host_work *host_work; + struct mlx5_eswitch *esw; + + host_work = kzalloc(sizeof(*host_work), GFP_ATOMIC); + if (!host_work) + return NOTIFY_DONE; + + esw_funcs = mlx5_nb_cof(nb, struct mlx5_esw_functions, nb); + esw = container_of(esw_funcs, struct mlx5_eswitch, esw_funcs); + + host_work->esw = esw; + + INIT_WORK(&host_work->work, esw_functions_changed_event_handler); + queue_work(esw->work_queue, &host_work->work); + + return NOTIFY_OK; +} + +static int mlx5_esw_host_number_init(struct mlx5_eswitch *esw) +{ + const u32 *query_host_out; + + if (!mlx5_core_is_ecpf_esw_manager(esw->dev)) + return 0; + + query_host_out = mlx5_esw_query_functions(esw->dev); + if (IS_ERR(query_host_out)) + return PTR_ERR(query_host_out); + + /* Mark non local controller with non zero controller number. */ + esw->offloads.host_number = MLX5_GET(query_esw_functions_out, query_host_out, + host_params_context.host_number); + kvfree(query_host_out); + return 0; +} + +bool mlx5_esw_offloads_controller_valid(const struct mlx5_eswitch *esw, u32 controller) +{ + /* Local controller is always valid */ + if (controller == 0) + return true; + + if (!mlx5_core_is_ecpf_esw_manager(esw->dev)) + return false; + + /* External host number starts with zero in device */ + return (controller == esw->offloads.host_number + 1); +} + +int esw_offloads_enable(struct mlx5_eswitch *esw) +{ + struct mapping_ctx *reg_c0_obj_pool; + struct mlx5_vport *vport; + unsigned long i; + u64 mapping_id; + int err; + + mutex_init(&esw->offloads.termtbl_mutex); + mlx5_rdma_enable_roce(esw->dev); + + err = mlx5_esw_host_number_init(esw); + if (err) + goto err_metadata; + + err = esw_offloads_metadata_init(esw); + if (err) + goto err_metadata; + + err = esw_set_passing_vport_metadata(esw, true); + if (err) + goto err_vport_metadata; + + mapping_id = mlx5_query_nic_system_image_guid(esw->dev); + + reg_c0_obj_pool = mapping_create_for_id(mapping_id, MAPPING_TYPE_CHAIN, + sizeof(struct mlx5_mapped_obj), + ESW_REG_C0_USER_DATA_METADATA_MASK, + true); + + if (IS_ERR(reg_c0_obj_pool)) { + err = PTR_ERR(reg_c0_obj_pool); + goto err_pool; + } + esw->offloads.reg_c0_obj_pool = reg_c0_obj_pool; + + err = esw_offloads_steering_init(esw); + if (err) + goto err_steering_init; + + /* Representor will control the vport link state */ + mlx5_esw_for_each_vf_vport(esw, i, vport, esw->esw_funcs.num_vfs) + vport->info.link_state = MLX5_VPORT_ADMIN_STATE_DOWN; + + /* Uplink vport rep must load first. */ + err = esw_offloads_load_rep(esw, MLX5_VPORT_UPLINK); + if (err) + goto err_uplink; + + err = mlx5_eswitch_enable_pf_vf_vports(esw, MLX5_VPORT_UC_ADDR_CHANGE); + if (err) + goto err_vports; + + return 0; + +err_vports: + esw_offloads_unload_rep(esw, MLX5_VPORT_UPLINK); +err_uplink: + esw_offloads_steering_cleanup(esw); +err_steering_init: + mapping_destroy(reg_c0_obj_pool); +err_pool: + esw_set_passing_vport_metadata(esw, false); +err_vport_metadata: + esw_offloads_metadata_uninit(esw); +err_metadata: + mlx5_rdma_disable_roce(esw->dev); + mutex_destroy(&esw->offloads.termtbl_mutex); + return err; +} + +static int esw_offloads_stop(struct mlx5_eswitch *esw, + struct netlink_ext_ack *extack) +{ + int err; + + esw->mode = MLX5_ESWITCH_LEGACY; + + /* If changing from switchdev to legacy mode without sriov enabled, + * no need to create legacy fdb. + */ + if (!mlx5_sriov_is_enabled(esw->dev)) + return 0; + + err = mlx5_eswitch_enable_locked(esw, MLX5_ESWITCH_IGNORE_NUM_VFS); + if (err) + NL_SET_ERR_MSG_MOD(extack, "Failed setting eswitch to legacy"); + + return err; +} + +void esw_offloads_disable(struct mlx5_eswitch *esw) +{ + mlx5_eswitch_disable_pf_vf_vports(esw); + esw_offloads_unload_rep(esw, MLX5_VPORT_UPLINK); + esw_set_passing_vport_metadata(esw, false); + esw_offloads_steering_cleanup(esw); + mapping_destroy(esw->offloads.reg_c0_obj_pool); + esw_offloads_metadata_uninit(esw); + mlx5_rdma_disable_roce(esw->dev); + mutex_destroy(&esw->offloads.termtbl_mutex); +} + +static int esw_mode_from_devlink(u16 mode, u16 *mlx5_mode) +{ + switch (mode) { + case DEVLINK_ESWITCH_MODE_LEGACY: + *mlx5_mode = MLX5_ESWITCH_LEGACY; + break; + case DEVLINK_ESWITCH_MODE_SWITCHDEV: + *mlx5_mode = MLX5_ESWITCH_OFFLOADS; + break; + default: + return -EINVAL; + } + + return 0; +} + +static int esw_mode_to_devlink(u16 mlx5_mode, u16 *mode) +{ + switch (mlx5_mode) { + case MLX5_ESWITCH_LEGACY: + *mode = DEVLINK_ESWITCH_MODE_LEGACY; + break; + case MLX5_ESWITCH_OFFLOADS: + *mode = DEVLINK_ESWITCH_MODE_SWITCHDEV; + break; + default: + return -EINVAL; + } + + return 0; +} + +static int esw_inline_mode_from_devlink(u8 mode, u8 *mlx5_mode) +{ + switch (mode) { + case DEVLINK_ESWITCH_INLINE_MODE_NONE: + *mlx5_mode = MLX5_INLINE_MODE_NONE; + break; + case DEVLINK_ESWITCH_INLINE_MODE_LINK: + *mlx5_mode = MLX5_INLINE_MODE_L2; + break; + case DEVLINK_ESWITCH_INLINE_MODE_NETWORK: + *mlx5_mode = MLX5_INLINE_MODE_IP; + break; + case DEVLINK_ESWITCH_INLINE_MODE_TRANSPORT: + *mlx5_mode = MLX5_INLINE_MODE_TCP_UDP; + break; + default: + return -EINVAL; + } + + return 0; +} + +static int esw_inline_mode_to_devlink(u8 mlx5_mode, u8 *mode) +{ + switch (mlx5_mode) { + case MLX5_INLINE_MODE_NONE: + *mode = DEVLINK_ESWITCH_INLINE_MODE_NONE; + break; + case MLX5_INLINE_MODE_L2: + *mode = DEVLINK_ESWITCH_INLINE_MODE_LINK; + break; + case MLX5_INLINE_MODE_IP: + *mode = DEVLINK_ESWITCH_INLINE_MODE_NETWORK; + break; + case MLX5_INLINE_MODE_TCP_UDP: + *mode = DEVLINK_ESWITCH_INLINE_MODE_TRANSPORT; + break; + default: + return -EINVAL; + } + + return 0; +} + +static bool esw_offloads_devlink_ns_eq_netdev_ns(struct devlink *devlink) +{ + struct net *devl_net, *netdev_net; + struct mlx5_eswitch *esw; + + esw = mlx5_devlink_eswitch_get(devlink); + netdev_net = dev_net(esw->dev->mlx5e_res.uplink_netdev); + devl_net = devlink_net(devlink); + + return net_eq(devl_net, netdev_net); +} + +int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode, + struct netlink_ext_ack *extack) +{ + u16 cur_mlx5_mode, mlx5_mode = 0; + struct mlx5_eswitch *esw; + int err = 0; + + esw = mlx5_devlink_eswitch_get(devlink); + if (IS_ERR(esw)) + return PTR_ERR(esw); + + if (esw_mode_from_devlink(mode, &mlx5_mode)) + return -EINVAL; + + if (mode == DEVLINK_ESWITCH_MODE_SWITCHDEV && + !esw_offloads_devlink_ns_eq_netdev_ns(devlink)) { + NL_SET_ERR_MSG_MOD(extack, + "Can't change E-Switch mode to switchdev when netdev net namespace has diverged from the devlink's."); + return -EPERM; + } + + mlx5_lag_disable_change(esw->dev); + err = mlx5_esw_try_lock(esw); + if (err < 0) { + NL_SET_ERR_MSG_MOD(extack, "Can't change mode, E-Switch is busy"); + goto enable_lag; + } + cur_mlx5_mode = err; + err = 0; + + if (cur_mlx5_mode == mlx5_mode) + goto unlock; + + mlx5_eswitch_disable_locked(esw); + if (mode == DEVLINK_ESWITCH_MODE_SWITCHDEV) { + if (mlx5_devlink_trap_get_num_active(esw->dev)) { + NL_SET_ERR_MSG_MOD(extack, + "Can't change mode while devlink traps are active"); + err = -EOPNOTSUPP; + goto unlock; + } + err = esw_offloads_start(esw, extack); + } else if (mode == DEVLINK_ESWITCH_MODE_LEGACY) { + err = esw_offloads_stop(esw, extack); + mlx5_rescan_drivers(esw->dev); + } else { + err = -EINVAL; + } + +unlock: + mlx5_esw_unlock(esw); +enable_lag: + mlx5_lag_enable_change(esw->dev); + return err; +} + +int mlx5_devlink_eswitch_mode_get(struct devlink *devlink, u16 *mode) +{ + struct mlx5_eswitch *esw; + int err; + + esw = mlx5_devlink_eswitch_get(devlink); + if (IS_ERR(esw)) + return PTR_ERR(esw); + + down_write(&esw->mode_lock); + err = esw_mode_to_devlink(esw->mode, mode); + up_write(&esw->mode_lock); + return err; +} + +static int mlx5_esw_vports_inline_set(struct mlx5_eswitch *esw, u8 mlx5_mode, + struct netlink_ext_ack *extack) +{ + struct mlx5_core_dev *dev = esw->dev; + struct mlx5_vport *vport; + u16 err_vport_num = 0; + unsigned long i; + int err = 0; + + mlx5_esw_for_each_host_func_vport(esw, i, vport, esw->esw_funcs.num_vfs) { + err = mlx5_modify_nic_vport_min_inline(dev, vport->vport, mlx5_mode); + if (err) { + err_vport_num = vport->vport; + NL_SET_ERR_MSG_MOD(extack, + "Failed to set min inline on vport"); + goto revert_inline_mode; + } + } + return 0; + +revert_inline_mode: + mlx5_esw_for_each_host_func_vport(esw, i, vport, esw->esw_funcs.num_vfs) { + if (vport->vport == err_vport_num) + break; + mlx5_modify_nic_vport_min_inline(dev, + vport->vport, + esw->offloads.inline_mode); + } + return err; +} + +int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode, + struct netlink_ext_ack *extack) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + struct mlx5_eswitch *esw; + u8 mlx5_mode; + int err; + + esw = mlx5_devlink_eswitch_get(devlink); + if (IS_ERR(esw)) + return PTR_ERR(esw); + + down_write(&esw->mode_lock); + + switch (MLX5_CAP_ETH(dev, wqe_inline_mode)) { + case MLX5_CAP_INLINE_MODE_NOT_REQUIRED: + if (mode == DEVLINK_ESWITCH_INLINE_MODE_NONE) { + err = 0; + goto out; + } + + fallthrough; + case MLX5_CAP_INLINE_MODE_L2: + NL_SET_ERR_MSG_MOD(extack, "Inline mode can't be set"); + err = -EOPNOTSUPP; + goto out; + case MLX5_CAP_INLINE_MODE_VPORT_CONTEXT: + break; + } + + if (atomic64_read(&esw->offloads.num_flows) > 0) { + NL_SET_ERR_MSG_MOD(extack, + "Can't set inline mode when flows are configured"); + err = -EOPNOTSUPP; + goto out; + } + + err = esw_inline_mode_from_devlink(mode, &mlx5_mode); + if (err) + goto out; + + err = mlx5_esw_vports_inline_set(esw, mlx5_mode, extack); + if (err) + goto out; + + esw->offloads.inline_mode = mlx5_mode; + up_write(&esw->mode_lock); + return 0; + +out: + up_write(&esw->mode_lock); + return err; +} + +int mlx5_devlink_eswitch_inline_mode_get(struct devlink *devlink, u8 *mode) +{ + struct mlx5_eswitch *esw; + int err; + + esw = mlx5_devlink_eswitch_get(devlink); + if (IS_ERR(esw)) + return PTR_ERR(esw); + + down_write(&esw->mode_lock); + err = esw_inline_mode_to_devlink(esw->offloads.inline_mode, mode); + up_write(&esw->mode_lock); + return err; +} + +int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, + enum devlink_eswitch_encap_mode encap, + struct netlink_ext_ack *extack) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + struct mlx5_eswitch *esw; + int err = 0; + + esw = mlx5_devlink_eswitch_get(devlink); + if (IS_ERR(esw)) + return PTR_ERR(esw); + + down_write(&esw->mode_lock); + + if (encap != DEVLINK_ESWITCH_ENCAP_MODE_NONE && + (!MLX5_CAP_ESW_FLOWTABLE_FDB(dev, reformat) || + !MLX5_CAP_ESW_FLOWTABLE_FDB(dev, decap))) { + err = -EOPNOTSUPP; + goto unlock; + } + + if (encap && encap != DEVLINK_ESWITCH_ENCAP_MODE_BASIC) { + err = -EOPNOTSUPP; + goto unlock; + } + + if (esw->mode == MLX5_ESWITCH_LEGACY) { + esw->offloads.encap = encap; + goto unlock; + } + + if (esw->offloads.encap == encap) + goto unlock; + + if (atomic64_read(&esw->offloads.num_flows) > 0) { + NL_SET_ERR_MSG_MOD(extack, + "Can't set encapsulation when flows are configured"); + err = -EOPNOTSUPP; + goto unlock; + } + + esw_destroy_offloads_fdb_tables(esw); + + esw->offloads.encap = encap; + + err = esw_create_offloads_fdb_tables(esw); + + if (err) { + NL_SET_ERR_MSG_MOD(extack, + "Failed re-creating fast FDB table"); + esw->offloads.encap = !encap; + (void)esw_create_offloads_fdb_tables(esw); + } + +unlock: + up_write(&esw->mode_lock); + return err; +} + +int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink, + enum devlink_eswitch_encap_mode *encap) +{ + struct mlx5_eswitch *esw; + + esw = mlx5_devlink_eswitch_get(devlink); + if (IS_ERR(esw)) + return PTR_ERR(esw); + + down_write(&esw->mode_lock); + *encap = esw->offloads.encap; + up_write(&esw->mode_lock); + return 0; +} + +static bool +mlx5_eswitch_vport_has_rep(const struct mlx5_eswitch *esw, u16 vport_num) +{ + /* Currently, only ECPF based device has representor for host PF. */ + if (vport_num == MLX5_VPORT_PF && + !mlx5_core_is_ecpf_esw_manager(esw->dev)) + return false; + + if (vport_num == MLX5_VPORT_ECPF && + !mlx5_ecpf_vport_exists(esw->dev)) + return false; + + return true; +} + +void mlx5_eswitch_register_vport_reps(struct mlx5_eswitch *esw, + const struct mlx5_eswitch_rep_ops *ops, + u8 rep_type) +{ + struct mlx5_eswitch_rep_data *rep_data; + struct mlx5_eswitch_rep *rep; + unsigned long i; + + esw->offloads.rep_ops[rep_type] = ops; + mlx5_esw_for_each_rep(esw, i, rep) { + if (likely(mlx5_eswitch_vport_has_rep(esw, rep->vport))) { + rep->esw = esw; + rep_data = &rep->rep_data[rep_type]; + atomic_set(&rep_data->state, REP_REGISTERED); + } + } +} +EXPORT_SYMBOL(mlx5_eswitch_register_vport_reps); + +void mlx5_eswitch_unregister_vport_reps(struct mlx5_eswitch *esw, u8 rep_type) +{ + struct mlx5_eswitch_rep *rep; + unsigned long i; + + if (esw->mode == MLX5_ESWITCH_OFFLOADS) + __unload_reps_all_vport(esw, rep_type); + + mlx5_esw_for_each_rep(esw, i, rep) + atomic_set(&rep->rep_data[rep_type].state, REP_UNREGISTERED); +} +EXPORT_SYMBOL(mlx5_eswitch_unregister_vport_reps); + +void *mlx5_eswitch_get_uplink_priv(struct mlx5_eswitch *esw, u8 rep_type) +{ + struct mlx5_eswitch_rep *rep; + + rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_UPLINK); + return rep->rep_data[rep_type].priv; +} + +void *mlx5_eswitch_get_proto_dev(struct mlx5_eswitch *esw, + u16 vport, + u8 rep_type) +{ + struct mlx5_eswitch_rep *rep; + + rep = mlx5_eswitch_get_rep(esw, vport); + + if (atomic_read(&rep->rep_data[rep_type].state) == REP_LOADED && + esw->offloads.rep_ops[rep_type]->get_proto_dev) + return esw->offloads.rep_ops[rep_type]->get_proto_dev(rep); + return NULL; +} +EXPORT_SYMBOL(mlx5_eswitch_get_proto_dev); + +void *mlx5_eswitch_uplink_get_proto_dev(struct mlx5_eswitch *esw, u8 rep_type) +{ + return mlx5_eswitch_get_proto_dev(esw, MLX5_VPORT_UPLINK, rep_type); +} +EXPORT_SYMBOL(mlx5_eswitch_uplink_get_proto_dev); + +struct mlx5_eswitch_rep *mlx5_eswitch_vport_rep(struct mlx5_eswitch *esw, + u16 vport) +{ + return mlx5_eswitch_get_rep(esw, vport); +} +EXPORT_SYMBOL(mlx5_eswitch_vport_rep); + +bool mlx5_eswitch_reg_c1_loopback_enabled(const struct mlx5_eswitch *esw) +{ + return !!(esw->flags & MLX5_ESWITCH_REG_C1_LOOPBACK_ENABLED); +} +EXPORT_SYMBOL(mlx5_eswitch_reg_c1_loopback_enabled); + +bool mlx5_eswitch_vport_match_metadata_enabled(const struct mlx5_eswitch *esw) +{ + return !!(esw->flags & MLX5_ESWITCH_VPORT_MATCH_METADATA); +} +EXPORT_SYMBOL(mlx5_eswitch_vport_match_metadata_enabled); + +u32 mlx5_eswitch_get_vport_metadata_for_match(struct mlx5_eswitch *esw, + u16 vport_num) +{ + struct mlx5_vport *vport = mlx5_eswitch_get_vport(esw, vport_num); + + if (WARN_ON_ONCE(IS_ERR(vport))) + return 0; + + return vport->metadata << (32 - ESW_SOURCE_PORT_METADATA_BITS); +} +EXPORT_SYMBOL(mlx5_eswitch_get_vport_metadata_for_match); + +int mlx5_esw_offloads_sf_vport_enable(struct mlx5_eswitch *esw, struct devlink_port *dl_port, + u16 vport_num, u32 controller, u32 sfnum) +{ + int err; + + err = mlx5_esw_vport_enable(esw, vport_num, MLX5_VPORT_UC_ADDR_CHANGE); + if (err) + return err; + + err = mlx5_esw_devlink_sf_port_register(esw, dl_port, vport_num, controller, sfnum); + if (err) + goto devlink_err; + + mlx5_esw_vport_debugfs_create(esw, vport_num, true, sfnum); + err = mlx5_esw_offloads_rep_load(esw, vport_num); + if (err) + goto rep_err; + return 0; + +rep_err: + mlx5_esw_vport_debugfs_destroy(esw, vport_num); + mlx5_esw_devlink_sf_port_unregister(esw, vport_num); +devlink_err: + mlx5_esw_vport_disable(esw, vport_num); + return err; +} + +void mlx5_esw_offloads_sf_vport_disable(struct mlx5_eswitch *esw, u16 vport_num) +{ + mlx5_esw_offloads_rep_unload(esw, vport_num); + mlx5_esw_vport_debugfs_destroy(esw, vport_num); + mlx5_esw_devlink_sf_port_unregister(esw, vport_num); + mlx5_esw_vport_disable(esw, vport_num); +} + +static int mlx5_esw_query_vport_vhca_id(struct mlx5_eswitch *esw, u16 vport_num, u16 *vhca_id) +{ + int query_out_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out); + void *query_ctx; + void *hca_caps; + int err; + + *vhca_id = 0; + if (mlx5_esw_is_manager_vport(esw, vport_num) || + !MLX5_CAP_GEN(esw->dev, vhca_resource_manager)) + return -EPERM; + + query_ctx = kzalloc(query_out_sz, GFP_KERNEL); + if (!query_ctx) + return -ENOMEM; + + err = mlx5_vport_get_other_func_cap(esw->dev, vport_num, query_ctx); + if (err) + goto out_free; + + hca_caps = MLX5_ADDR_OF(query_hca_cap_out, query_ctx, capability); + *vhca_id = MLX5_GET(cmd_hca_cap, hca_caps, vhca_id); + +out_free: + kfree(query_ctx); + return err; +} + +int mlx5_esw_vport_vhca_id_set(struct mlx5_eswitch *esw, u16 vport_num) +{ + u16 *old_entry, *vhca_map_entry, vhca_id; + int err; + + err = mlx5_esw_query_vport_vhca_id(esw, vport_num, &vhca_id); + if (err) { + esw_warn(esw->dev, "Getting vhca_id for vport failed (vport=%u,err=%d)\n", + vport_num, err); + return err; + } + + vhca_map_entry = kmalloc(sizeof(*vhca_map_entry), GFP_KERNEL); + if (!vhca_map_entry) + return -ENOMEM; + + *vhca_map_entry = vport_num; + old_entry = xa_store(&esw->offloads.vhca_map, vhca_id, vhca_map_entry, GFP_KERNEL); + if (xa_is_err(old_entry)) { + kfree(vhca_map_entry); + return xa_err(old_entry); + } + kfree(old_entry); + return 0; +} + +void mlx5_esw_vport_vhca_id_clear(struct mlx5_eswitch *esw, u16 vport_num) +{ + u16 *vhca_map_entry, vhca_id; + int err; + + err = mlx5_esw_query_vport_vhca_id(esw, vport_num, &vhca_id); + if (err) + esw_warn(esw->dev, "Getting vhca_id for vport failed (vport=%hu,err=%d)\n", + vport_num, err); + + vhca_map_entry = xa_erase(&esw->offloads.vhca_map, vhca_id); + kfree(vhca_map_entry); +} + +int mlx5_eswitch_vhca_id_to_vport(struct mlx5_eswitch *esw, u16 vhca_id, u16 *vport_num) +{ + u16 *res = xa_load(&esw->offloads.vhca_map, vhca_id); + + if (!res) + return -ENOENT; + + *vport_num = *res; + return 0; +} + +u32 mlx5_eswitch_get_vport_metadata_for_set(struct mlx5_eswitch *esw, + u16 vport_num) +{ + struct mlx5_vport *vport = mlx5_eswitch_get_vport(esw, vport_num); + + if (WARN_ON_ONCE(IS_ERR(vport))) + return 0; + + return vport->metadata; +} +EXPORT_SYMBOL(mlx5_eswitch_get_vport_metadata_for_set); + +static bool +is_port_function_supported(struct mlx5_eswitch *esw, u16 vport_num) +{ + return vport_num == MLX5_VPORT_PF || + mlx5_eswitch_is_vf_vport(esw, vport_num) || + mlx5_esw_is_sf_vport(esw, vport_num); +} + +int mlx5_devlink_port_function_hw_addr_get(struct devlink_port *port, + u8 *hw_addr, int *hw_addr_len, + struct netlink_ext_ack *extack) +{ + struct mlx5_eswitch *esw; + struct mlx5_vport *vport; + u16 vport_num; + + esw = mlx5_devlink_eswitch_get(port->devlink); + if (IS_ERR(esw)) + return PTR_ERR(esw); + + vport_num = mlx5_esw_devlink_port_index_to_vport_num(port->index); + if (!is_port_function_supported(esw, vport_num)) + return -EOPNOTSUPP; + + vport = mlx5_eswitch_get_vport(esw, vport_num); + if (IS_ERR(vport)) { + NL_SET_ERR_MSG_MOD(extack, "Invalid port"); + return PTR_ERR(vport); + } + + mutex_lock(&esw->state_lock); + ether_addr_copy(hw_addr, vport->info.mac); + *hw_addr_len = ETH_ALEN; + mutex_unlock(&esw->state_lock); + return 0; +} + +int mlx5_devlink_port_function_hw_addr_set(struct devlink_port *port, + const u8 *hw_addr, int hw_addr_len, + struct netlink_ext_ack *extack) +{ + struct mlx5_eswitch *esw; + u16 vport_num; + + esw = mlx5_devlink_eswitch_get(port->devlink); + if (IS_ERR(esw)) { + NL_SET_ERR_MSG_MOD(extack, "Eswitch doesn't support set hw_addr"); + return PTR_ERR(esw); + } + + vport_num = mlx5_esw_devlink_port_index_to_vport_num(port->index); + if (!is_port_function_supported(esw, vport_num)) { + NL_SET_ERR_MSG_MOD(extack, "Port doesn't support set hw_addr"); + return -EINVAL; + } + + return mlx5_eswitch_set_vport_mac(esw, vport_num, hw_addr); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c new file mode 100644 index 000000000..edd910258 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c @@ -0,0 +1,335 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2019 Mellanox Technologies. + +#include <linux/mlx5/fs.h> +#include "eswitch.h" +#include "en_tc.h" +#include "fs_core.h" + +struct mlx5_termtbl_handle { + struct hlist_node termtbl_hlist; + + struct mlx5_flow_table *termtbl; + struct mlx5_flow_act flow_act; + struct mlx5_flow_destination dest; + + struct mlx5_flow_handle *rule; + int ref_count; +}; + +static u32 +mlx5_eswitch_termtbl_hash(struct mlx5_flow_act *flow_act, + struct mlx5_flow_destination *dest) +{ + u32 hash; + + hash = jhash_1word(flow_act->action, 0); + hash = jhash((const void *)&flow_act->vlan, + sizeof(flow_act->vlan), hash); + hash = jhash((const void *)&dest->vport.num, + sizeof(dest->vport.num), hash); + hash = jhash((const void *)&dest->vport.vhca_id, + sizeof(dest->vport.num), hash); + if (flow_act->pkt_reformat) + hash = jhash(flow_act->pkt_reformat, + sizeof(*flow_act->pkt_reformat), + hash); + return hash; +} + +static int +mlx5_eswitch_termtbl_cmp(struct mlx5_flow_act *flow_act1, + struct mlx5_flow_destination *dest1, + struct mlx5_flow_act *flow_act2, + struct mlx5_flow_destination *dest2) +{ + int ret; + + ret = flow_act1->action != flow_act2->action || + dest1->vport.num != dest2->vport.num || + dest1->vport.vhca_id != dest2->vport.vhca_id || + memcmp(&flow_act1->vlan, &flow_act2->vlan, + sizeof(flow_act1->vlan)); + if (ret) + return ret; + + if (flow_act1->pkt_reformat && flow_act2->pkt_reformat) + return memcmp(flow_act1->pkt_reformat, flow_act2->pkt_reformat, + sizeof(*flow_act1->pkt_reformat)); + + return !(flow_act1->pkt_reformat == flow_act2->pkt_reformat); +} + +static int +mlx5_eswitch_termtbl_create(struct mlx5_core_dev *dev, + struct mlx5_termtbl_handle *tt, + struct mlx5_flow_act *flow_act) +{ + struct mlx5_flow_table_attr ft_attr = {}; + struct mlx5_flow_namespace *root_ns; + int err, err2; + + root_ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_FDB); + if (!root_ns) { + esw_warn(dev, "Failed to get FDB flow namespace\n"); + return -EOPNOTSUPP; + } + + /* As this is the terminating action then the termination table is the + * same prio as the slow path + */ + ft_attr.flags = MLX5_FLOW_TABLE_TERMINATION | MLX5_FLOW_TABLE_UNMANAGED | + MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT; + ft_attr.prio = FDB_TC_OFFLOAD; + ft_attr.max_fte = 1; + ft_attr.level = 1; + ft_attr.autogroup.max_num_groups = 1; + tt->termtbl = mlx5_create_auto_grouped_flow_table(root_ns, &ft_attr); + if (IS_ERR(tt->termtbl)) { + err = PTR_ERR(tt->termtbl); + esw_warn(dev, "Failed to create termination table, err %pe\n", tt->termtbl); + return err; + } + + tt->rule = mlx5_add_flow_rules(tt->termtbl, NULL, flow_act, + &tt->dest, 1); + if (IS_ERR(tt->rule)) { + err = PTR_ERR(tt->rule); + esw_warn(dev, "Failed to create termination table rule, err %pe\n", tt->rule); + goto add_flow_err; + } + return 0; + +add_flow_err: + err2 = mlx5_destroy_flow_table(tt->termtbl); + if (err2) + esw_warn(dev, "Failed to destroy termination table, err %d\n", err2); + + return err; +} + +static struct mlx5_termtbl_handle * +mlx5_eswitch_termtbl_get_create(struct mlx5_eswitch *esw, + struct mlx5_flow_act *flow_act, + struct mlx5_flow_destination *dest, + struct mlx5_esw_flow_attr *attr) +{ + struct mlx5_termtbl_handle *tt; + bool found = false; + u32 hash_key; + int err; + + mutex_lock(&esw->offloads.termtbl_mutex); + hash_key = mlx5_eswitch_termtbl_hash(flow_act, dest); + hash_for_each_possible(esw->offloads.termtbl_tbl, tt, + termtbl_hlist, hash_key) { + if (!mlx5_eswitch_termtbl_cmp(&tt->flow_act, &tt->dest, + flow_act, dest)) { + found = true; + break; + } + } + if (found) + goto tt_add_ref; + + tt = kzalloc(sizeof(*tt), GFP_KERNEL); + if (!tt) { + err = -ENOMEM; + goto tt_create_err; + } + + tt->dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT; + tt->dest.vport.num = dest->vport.num; + tt->dest.vport.vhca_id = dest->vport.vhca_id; + tt->dest.vport.flags = dest->vport.flags; + memcpy(&tt->flow_act, flow_act, sizeof(*flow_act)); + + err = mlx5_eswitch_termtbl_create(esw->dev, tt, flow_act); + if (err) + goto tt_create_err; + + hash_add(esw->offloads.termtbl_tbl, &tt->termtbl_hlist, hash_key); +tt_add_ref: + tt->ref_count++; + mutex_unlock(&esw->offloads.termtbl_mutex); + return tt; +tt_create_err: + kfree(tt); + mutex_unlock(&esw->offloads.termtbl_mutex); + return ERR_PTR(err); +} + +void +mlx5_eswitch_termtbl_put(struct mlx5_eswitch *esw, + struct mlx5_termtbl_handle *tt) +{ + mutex_lock(&esw->offloads.termtbl_mutex); + if (--tt->ref_count == 0) + hash_del(&tt->termtbl_hlist); + mutex_unlock(&esw->offloads.termtbl_mutex); + + if (!tt->ref_count) { + mlx5_del_flow_rules(tt->rule); + mlx5_destroy_flow_table(tt->termtbl); + kfree(tt); + } +} + +static void +mlx5_eswitch_termtbl_actions_move(struct mlx5_flow_act *src, + struct mlx5_flow_act *dst) +{ + if (src->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH) { + src->action &= ~MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH; + dst->action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH; + memcpy(&dst->vlan[0], &src->vlan[0], sizeof(src->vlan[0])); + memset(&src->vlan[0], 0, sizeof(src->vlan[0])); + + if (src->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2) { + src->action &= ~MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2; + dst->action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2; + memcpy(&dst->vlan[1], &src->vlan[1], sizeof(src->vlan[1])); + memset(&src->vlan[1], 0, sizeof(src->vlan[1])); + } + } +} + +static bool mlx5_eswitch_offload_is_uplink_port(const struct mlx5_eswitch *esw, + const struct mlx5_flow_spec *spec) +{ + u16 port_mask, port_value; + + if (MLX5_CAP_ESW_FLOWTABLE(esw->dev, flow_source)) + return spec->flow_context.flow_source == + MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK; + + port_mask = MLX5_GET(fte_match_param, spec->match_criteria, + misc_parameters.source_port); + port_value = MLX5_GET(fte_match_param, spec->match_value, + misc_parameters.source_port); + return (port_mask & port_value) == MLX5_VPORT_UPLINK; +} + +bool +mlx5_eswitch_termtbl_required(struct mlx5_eswitch *esw, + struct mlx5_flow_attr *attr, + struct mlx5_flow_act *flow_act, + struct mlx5_flow_spec *spec) +{ + struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; + int i; + + if (!MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, termination_table) || + !MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, ignore_flow_level) || + mlx5e_tc_attr_flags_skip(attr->flags) || + (!mlx5_eswitch_offload_is_uplink_port(esw, spec) && !esw_attr->int_port)) + return false; + + /* push vlan on RX */ + if (flow_act->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH && + !(mlx5_fs_get_capabilities(esw->dev, MLX5_FLOW_NAMESPACE_FDB) & + MLX5_FLOW_STEERING_CAP_VLAN_PUSH_ON_RX)) + return true; + + /* hairpin */ + for (i = esw_attr->split_count; i < esw_attr->out_count; i++) + if (!esw_attr->dest_int_port && esw_attr->dests[i].rep && + esw_attr->dests[i].rep->vport == MLX5_VPORT_UPLINK) + return true; + + return false; +} + +struct mlx5_flow_handle * +mlx5_eswitch_add_termtbl_rule(struct mlx5_eswitch *esw, + struct mlx5_flow_table *fdb, + struct mlx5_flow_spec *spec, + struct mlx5_esw_flow_attr *attr, + struct mlx5_flow_act *flow_act, + struct mlx5_flow_destination *dest, + int num_dest) +{ + struct mlx5_flow_act term_tbl_act = {}; + struct mlx5_flow_handle *rule = NULL; + bool term_table_created = false; + int num_vport_dests = 0; + int i, curr_dest; + + mlx5_eswitch_termtbl_actions_move(flow_act, &term_tbl_act); + term_tbl_act.action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + + for (i = 0; i < num_dest; i++) { + struct mlx5_termtbl_handle *tt; + + /* only vport destinations can be terminated */ + if (dest[i].type != MLX5_FLOW_DESTINATION_TYPE_VPORT) + continue; + + if (attr->dests[num_vport_dests].flags & MLX5_ESW_DEST_ENCAP) { + term_tbl_act.action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT; + term_tbl_act.pkt_reformat = attr->dests[num_vport_dests].pkt_reformat; + } else { + term_tbl_act.action &= ~MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT; + term_tbl_act.pkt_reformat = NULL; + } + + /* get the terminating table for the action list */ + tt = mlx5_eswitch_termtbl_get_create(esw, &term_tbl_act, + &dest[i], attr); + if (IS_ERR(tt)) { + esw_warn(esw->dev, "Failed to get termination table, err %pe\n", tt); + goto revert_changes; + } + attr->dests[num_vport_dests].termtbl = tt; + num_vport_dests++; + + /* link the destination with the termination table */ + dest[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dest[i].ft = tt->termtbl; + term_table_created = true; + } + + /* at least one destination should reference a termination table */ + if (!term_table_created) + goto revert_changes; + + /* create the FTE */ + flow_act->action &= ~MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT; + flow_act->pkt_reformat = NULL; + flow_act->flags |= FLOW_ACT_IGNORE_FLOW_LEVEL; + rule = mlx5_add_flow_rules(fdb, spec, flow_act, dest, num_dest); + if (IS_ERR(rule)) + goto revert_changes; + + goto out; + +revert_changes: + /* revert the changes that were made to the original flow_act + * and fall-back to the original rule actions + */ + mlx5_eswitch_termtbl_actions_move(&term_tbl_act, flow_act); + + for (curr_dest = 0; curr_dest < num_vport_dests; curr_dest++) { + struct mlx5_termtbl_handle *tt = attr->dests[curr_dest].termtbl; + + attr->dests[curr_dest].termtbl = NULL; + + /* search for the destination associated with the + * current term table + */ + for (i = 0; i < num_dest; i++) { + if (dest[i].ft != tt->termtbl) + continue; + + memset(&dest[i], 0, sizeof(dest[i])); + dest[i].type = MLX5_FLOW_DESTINATION_TYPE_VPORT; + dest[i].vport.num = tt->dest.vport.num; + dest[i].vport.vhca_id = tt->dest.vport.vhca_id; + mlx5_eswitch_termtbl_put(esw, tt); + break; + } + } + rule = mlx5_add_flow_rules(fdb, spec, flow_act, dest, num_dest); +out: + return rule; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/events.c b/drivers/net/ethernet/mellanox/mlx5/core/events.c new file mode 100644 index 000000000..9459e56ee --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/events.c @@ -0,0 +1,446 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2018 Mellanox Technologies + +#include <linux/mlx5/driver.h> + +#include "mlx5_core.h" +#include "lib/eq.h" +#include "lib/mlx5.h" + +struct mlx5_event_nb { + struct mlx5_nb nb; + void *ctx; +}; + +/* General events handlers for the low level mlx5_core driver + * + * Other Major feature specific events such as + * clock/eswitch/fpga/FW trace and many others, are handled elsewhere, with + * separate notifiers callbacks, specifically by those mlx5 components. + */ +static int any_notifier(struct notifier_block *, unsigned long, void *); +static int temp_warn(struct notifier_block *, unsigned long, void *); +static int port_module(struct notifier_block *, unsigned long, void *); +static int pcie_core(struct notifier_block *, unsigned long, void *); + +/* handler which forwards the event to events->fw_nh, driver notifiers */ +static int forward_event(struct notifier_block *, unsigned long, void *); + +static struct mlx5_nb events_nbs_ref[] = { + /* Events to be processed by mlx5_core */ + {.nb.notifier_call = any_notifier, .event_type = MLX5_EVENT_TYPE_NOTIFY_ANY }, + {.nb.notifier_call = temp_warn, .event_type = MLX5_EVENT_TYPE_TEMP_WARN_EVENT }, + {.nb.notifier_call = port_module, .event_type = MLX5_EVENT_TYPE_PORT_MODULE_EVENT }, + {.nb.notifier_call = pcie_core, .event_type = MLX5_EVENT_TYPE_GENERAL_EVENT }, + + /* Events to be forwarded (as is) to mlx5 core interfaces (mlx5e/mlx5_ib) */ + {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_PORT_CHANGE }, + {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_GENERAL_EVENT }, + {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_OBJECT_CHANGE }, + /* QP/WQ resource events to forward */ + {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_DCT_DRAINED }, + {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_PATH_MIG }, + {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_COMM_EST }, + {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_SQ_DRAINED }, + {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_SRQ_LAST_WQE }, + {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_WQ_CATAS_ERROR }, + {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_PATH_MIG_FAILED }, + {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR }, + {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_WQ_ACCESS_ERROR }, + /* SRQ events */ + {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_SRQ_CATAS_ERROR }, + {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_SRQ_RQ_LIMIT }, +}; + +struct mlx5_events { + struct mlx5_core_dev *dev; + struct workqueue_struct *wq; + struct mlx5_event_nb notifiers[ARRAY_SIZE(events_nbs_ref)]; + /* driver notifier chain for fw events */ + struct atomic_notifier_head fw_nh; + /* port module events stats */ + struct mlx5_pme_stats pme_stats; + /*pcie_core*/ + struct work_struct pcie_core_work; + /* driver notifier chain for sw events */ + struct blocking_notifier_head sw_nh; +}; + +static const char *eqe_type_str(u8 type) +{ + switch (type) { + case MLX5_EVENT_TYPE_COMP: + return "MLX5_EVENT_TYPE_COMP"; + case MLX5_EVENT_TYPE_PATH_MIG: + return "MLX5_EVENT_TYPE_PATH_MIG"; + case MLX5_EVENT_TYPE_COMM_EST: + return "MLX5_EVENT_TYPE_COMM_EST"; + case MLX5_EVENT_TYPE_SQ_DRAINED: + return "MLX5_EVENT_TYPE_SQ_DRAINED"; + case MLX5_EVENT_TYPE_SRQ_LAST_WQE: + return "MLX5_EVENT_TYPE_SRQ_LAST_WQE"; + case MLX5_EVENT_TYPE_SRQ_RQ_LIMIT: + return "MLX5_EVENT_TYPE_SRQ_RQ_LIMIT"; + case MLX5_EVENT_TYPE_CQ_ERROR: + return "MLX5_EVENT_TYPE_CQ_ERROR"; + case MLX5_EVENT_TYPE_WQ_CATAS_ERROR: + return "MLX5_EVENT_TYPE_WQ_CATAS_ERROR"; + case MLX5_EVENT_TYPE_PATH_MIG_FAILED: + return "MLX5_EVENT_TYPE_PATH_MIG_FAILED"; + case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR: + return "MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR"; + case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR: + return "MLX5_EVENT_TYPE_WQ_ACCESS_ERROR"; + case MLX5_EVENT_TYPE_SRQ_CATAS_ERROR: + return "MLX5_EVENT_TYPE_SRQ_CATAS_ERROR"; + case MLX5_EVENT_TYPE_INTERNAL_ERROR: + return "MLX5_EVENT_TYPE_INTERNAL_ERROR"; + case MLX5_EVENT_TYPE_PORT_CHANGE: + return "MLX5_EVENT_TYPE_PORT_CHANGE"; + case MLX5_EVENT_TYPE_GPIO_EVENT: + return "MLX5_EVENT_TYPE_GPIO_EVENT"; + case MLX5_EVENT_TYPE_PORT_MODULE_EVENT: + return "MLX5_EVENT_TYPE_PORT_MODULE_EVENT"; + case MLX5_EVENT_TYPE_TEMP_WARN_EVENT: + return "MLX5_EVENT_TYPE_TEMP_WARN_EVENT"; + case MLX5_EVENT_TYPE_REMOTE_CONFIG: + return "MLX5_EVENT_TYPE_REMOTE_CONFIG"; + case MLX5_EVENT_TYPE_DB_BF_CONGESTION: + return "MLX5_EVENT_TYPE_DB_BF_CONGESTION"; + case MLX5_EVENT_TYPE_STALL_EVENT: + return "MLX5_EVENT_TYPE_STALL_EVENT"; + case MLX5_EVENT_TYPE_CMD: + return "MLX5_EVENT_TYPE_CMD"; + case MLX5_EVENT_TYPE_ESW_FUNCTIONS_CHANGED: + return "MLX5_EVENT_TYPE_ESW_FUNCTIONS_CHANGED"; + case MLX5_EVENT_TYPE_VHCA_STATE_CHANGE: + return "MLX5_EVENT_TYPE_VHCA_STATE_CHANGE"; + case MLX5_EVENT_TYPE_PAGE_REQUEST: + return "MLX5_EVENT_TYPE_PAGE_REQUEST"; + case MLX5_EVENT_TYPE_PAGE_FAULT: + return "MLX5_EVENT_TYPE_PAGE_FAULT"; + case MLX5_EVENT_TYPE_PPS_EVENT: + return "MLX5_EVENT_TYPE_PPS_EVENT"; + case MLX5_EVENT_TYPE_NIC_VPORT_CHANGE: + return "MLX5_EVENT_TYPE_NIC_VPORT_CHANGE"; + case MLX5_EVENT_TYPE_FPGA_ERROR: + return "MLX5_EVENT_TYPE_FPGA_ERROR"; + case MLX5_EVENT_TYPE_FPGA_QP_ERROR: + return "MLX5_EVENT_TYPE_FPGA_QP_ERROR"; + case MLX5_EVENT_TYPE_GENERAL_EVENT: + return "MLX5_EVENT_TYPE_GENERAL_EVENT"; + case MLX5_EVENT_TYPE_MONITOR_COUNTER: + return "MLX5_EVENT_TYPE_MONITOR_COUNTER"; + case MLX5_EVENT_TYPE_DEVICE_TRACER: + return "MLX5_EVENT_TYPE_DEVICE_TRACER"; + case MLX5_EVENT_TYPE_OBJECT_CHANGE: + return "MLX5_EVENT_TYPE_OBJECT_CHANGE"; + default: + return "Unrecognized event"; + } +} + +/* handles all FW events, type == eqe->type */ +static int any_notifier(struct notifier_block *nb, + unsigned long type, void *data) +{ + struct mlx5_event_nb *event_nb = mlx5_nb_cof(nb, struct mlx5_event_nb, nb); + struct mlx5_events *events = event_nb->ctx; + struct mlx5_eqe *eqe = data; + + mlx5_core_dbg(events->dev, "Async eqe type %s, subtype (%d)\n", + eqe_type_str(eqe->type), eqe->sub_type); + return NOTIFY_OK; +} + +/* type == MLX5_EVENT_TYPE_TEMP_WARN_EVENT */ +static int temp_warn(struct notifier_block *nb, unsigned long type, void *data) +{ + struct mlx5_event_nb *event_nb = mlx5_nb_cof(nb, struct mlx5_event_nb, nb); + struct mlx5_events *events = event_nb->ctx; + struct mlx5_eqe *eqe = data; + u64 value_lsb; + u64 value_msb; + + value_lsb = be64_to_cpu(eqe->data.temp_warning.sensor_warning_lsb); + value_msb = be64_to_cpu(eqe->data.temp_warning.sensor_warning_msb); + + mlx5_core_warn(events->dev, + "High temperature on sensors with bit set %llx %llx", + value_msb, value_lsb); + + return NOTIFY_OK; +} + +/* MLX5_EVENT_TYPE_PORT_MODULE_EVENT */ +static const char *mlx5_pme_status_to_string(enum port_module_event_status_type status) +{ + switch (status) { + case MLX5_MODULE_STATUS_PLUGGED: + return "Cable plugged"; + case MLX5_MODULE_STATUS_UNPLUGGED: + return "Cable unplugged"; + case MLX5_MODULE_STATUS_ERROR: + return "Cable error"; + case MLX5_MODULE_STATUS_DISABLED: + return "Cable disabled"; + default: + return "Unknown status"; + } +} + +static const char *mlx5_pme_error_to_string(enum port_module_event_error_type error) +{ + switch (error) { + case MLX5_MODULE_EVENT_ERROR_POWER_BUDGET_EXCEEDED: + return "Power budget exceeded"; + case MLX5_MODULE_EVENT_ERROR_LONG_RANGE_FOR_NON_MLNX: + return "Long Range for non MLNX cable"; + case MLX5_MODULE_EVENT_ERROR_BUS_STUCK: + return "Bus stuck (I2C or data shorted)"; + case MLX5_MODULE_EVENT_ERROR_NO_EEPROM_RETRY_TIMEOUT: + return "No EEPROM/retry timeout"; + case MLX5_MODULE_EVENT_ERROR_ENFORCE_PART_NUMBER_LIST: + return "Enforce part number list"; + case MLX5_MODULE_EVENT_ERROR_UNKNOWN_IDENTIFIER: + return "Unknown identifier"; + case MLX5_MODULE_EVENT_ERROR_HIGH_TEMPERATURE: + return "High Temperature"; + case MLX5_MODULE_EVENT_ERROR_BAD_CABLE: + return "Bad or shorted cable/module"; + case MLX5_MODULE_EVENT_ERROR_PCIE_POWER_SLOT_EXCEEDED: + return "One or more network ports have been powered down due to insufficient/unadvertised power on the PCIe slot"; + default: + return "Unknown error"; + } +} + +/* type == MLX5_EVENT_TYPE_PORT_MODULE_EVENT */ +static int port_module(struct notifier_block *nb, unsigned long type, void *data) +{ + struct mlx5_event_nb *event_nb = mlx5_nb_cof(nb, struct mlx5_event_nb, nb); + struct mlx5_events *events = event_nb->ctx; + struct mlx5_eqe *eqe = data; + + enum port_module_event_status_type module_status; + enum port_module_event_error_type error_type; + struct mlx5_eqe_port_module *module_event_eqe; + const char *status_str; + u8 module_num; + + module_event_eqe = &eqe->data.port_module; + module_status = module_event_eqe->module_status & + PORT_MODULE_EVENT_MODULE_STATUS_MASK; + error_type = module_event_eqe->error_type & + PORT_MODULE_EVENT_ERROR_TYPE_MASK; + + if (module_status < MLX5_MODULE_STATUS_NUM) + events->pme_stats.status_counters[module_status]++; + + if (module_status == MLX5_MODULE_STATUS_ERROR) + if (error_type < MLX5_MODULE_EVENT_ERROR_NUM) + events->pme_stats.error_counters[error_type]++; + + if (!printk_ratelimit()) + return NOTIFY_OK; + + module_num = module_event_eqe->module; + status_str = mlx5_pme_status_to_string(module_status); + if (module_status == MLX5_MODULE_STATUS_ERROR) { + const char *error_str = mlx5_pme_error_to_string(error_type); + + mlx5_core_err(events->dev, + "Port module event[error]: module %u, %s, %s\n", + module_num, status_str, error_str); + } else { + mlx5_core_info(events->dev, + "Port module event: module %u, %s\n", + module_num, status_str); + } + + return NOTIFY_OK; +} + +enum { + MLX5_PCI_POWER_COULD_NOT_BE_READ = 0x0, + MLX5_PCI_POWER_SUFFICIENT_REPORTED = 0x1, + MLX5_PCI_POWER_INSUFFICIENT_REPORTED = 0x2, +}; + +static void mlx5_pcie_event(struct work_struct *work) +{ + u32 out[MLX5_ST_SZ_DW(mpein_reg)] = {0}; + u32 in[MLX5_ST_SZ_DW(mpein_reg)] = {0}; + struct mlx5_events *events; + struct mlx5_core_dev *dev; + u8 power_status; + u16 pci_power; + + events = container_of(work, struct mlx5_events, pcie_core_work); + dev = events->dev; + + if (!MLX5_CAP_MCAM_FEATURE(dev, pci_status_and_power)) + return; + + mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out), + MLX5_REG_MPEIN, 0, 0); + power_status = MLX5_GET(mpein_reg, out, pwr_status); + pci_power = MLX5_GET(mpein_reg, out, pci_power); + + switch (power_status) { + case MLX5_PCI_POWER_COULD_NOT_BE_READ: + mlx5_core_info_rl(dev, + "PCIe slot power capability was not advertised.\n"); + break; + case MLX5_PCI_POWER_INSUFFICIENT_REPORTED: + mlx5_core_warn_rl(dev, + "Detected insufficient power on the PCIe slot (%uW).\n", + pci_power); + break; + case MLX5_PCI_POWER_SUFFICIENT_REPORTED: + mlx5_core_info_rl(dev, + "PCIe slot advertised sufficient power (%uW).\n", + pci_power); + break; + } +} + +static int pcie_core(struct notifier_block *nb, unsigned long type, void *data) +{ + struct mlx5_event_nb *event_nb = mlx5_nb_cof(nb, + struct mlx5_event_nb, + nb); + struct mlx5_events *events = event_nb->ctx; + struct mlx5_eqe *eqe = data; + + switch (eqe->sub_type) { + case MLX5_GENERAL_SUBTYPE_PCI_POWER_CHANGE_EVENT: + queue_work(events->wq, &events->pcie_core_work); + break; + default: + return NOTIFY_DONE; + } + + return NOTIFY_OK; +} + +void mlx5_get_pme_stats(struct mlx5_core_dev *dev, struct mlx5_pme_stats *stats) +{ + *stats = dev->priv.events->pme_stats; +} + +/* forward event as is to registered interfaces (mlx5e/mlx5_ib) */ +static int forward_event(struct notifier_block *nb, unsigned long event, void *data) +{ + struct mlx5_event_nb *event_nb = mlx5_nb_cof(nb, struct mlx5_event_nb, nb); + struct mlx5_events *events = event_nb->ctx; + struct mlx5_eqe *eqe = data; + + mlx5_core_dbg(events->dev, "Async eqe type %s, subtype (%d) forward to interfaces\n", + eqe_type_str(eqe->type), eqe->sub_type); + atomic_notifier_call_chain(&events->fw_nh, event, data); + return NOTIFY_OK; +} + +int mlx5_events_init(struct mlx5_core_dev *dev) +{ + struct mlx5_events *events = kzalloc(sizeof(*events), GFP_KERNEL); + + if (!events) + return -ENOMEM; + + ATOMIC_INIT_NOTIFIER_HEAD(&events->fw_nh); + events->dev = dev; + dev->priv.events = events; + events->wq = create_singlethread_workqueue("mlx5_events"); + if (!events->wq) { + kfree(events); + return -ENOMEM; + } + INIT_WORK(&events->pcie_core_work, mlx5_pcie_event); + BLOCKING_INIT_NOTIFIER_HEAD(&events->sw_nh); + + return 0; +} + +void mlx5_events_cleanup(struct mlx5_core_dev *dev) +{ + destroy_workqueue(dev->priv.events->wq); + kvfree(dev->priv.events); +} + +void mlx5_events_start(struct mlx5_core_dev *dev) +{ + struct mlx5_events *events = dev->priv.events; + int i; + + for (i = 0; i < ARRAY_SIZE(events_nbs_ref); i++) { + events->notifiers[i].nb = events_nbs_ref[i]; + events->notifiers[i].ctx = events; + mlx5_eq_notifier_register(dev, &events->notifiers[i].nb); + } +} + +void mlx5_events_stop(struct mlx5_core_dev *dev) +{ + struct mlx5_events *events = dev->priv.events; + int i; + + for (i = ARRAY_SIZE(events_nbs_ref) - 1; i >= 0 ; i--) + mlx5_eq_notifier_unregister(dev, &events->notifiers[i].nb); + flush_workqueue(events->wq); +} + +/* This API is used only for processing and forwarding firmware + * events to mlx5 consumer. + */ +int mlx5_notifier_register(struct mlx5_core_dev *dev, struct notifier_block *nb) +{ + struct mlx5_events *events = dev->priv.events; + + return atomic_notifier_chain_register(&events->fw_nh, nb); +} +EXPORT_SYMBOL(mlx5_notifier_register); + +int mlx5_notifier_unregister(struct mlx5_core_dev *dev, struct notifier_block *nb) +{ + struct mlx5_events *events = dev->priv.events; + + return atomic_notifier_chain_unregister(&events->fw_nh, nb); +} +EXPORT_SYMBOL(mlx5_notifier_unregister); + +int mlx5_notifier_call_chain(struct mlx5_events *events, unsigned int event, void *data) +{ + return atomic_notifier_call_chain(&events->fw_nh, event, data); +} + +/* This API is used only for processing and forwarding driver-specific + * events to mlx5 consumers. + */ +int mlx5_blocking_notifier_register(struct mlx5_core_dev *dev, struct notifier_block *nb) +{ + struct mlx5_events *events = dev->priv.events; + + return blocking_notifier_chain_register(&events->sw_nh, nb); +} + +int mlx5_blocking_notifier_unregister(struct mlx5_core_dev *dev, struct notifier_block *nb) +{ + struct mlx5_events *events = dev->priv.events; + + return blocking_notifier_chain_unregister(&events->sw_nh, nb); +} + +int mlx5_blocking_notifier_call_chain(struct mlx5_core_dev *dev, unsigned int event, + void *data) +{ + struct mlx5_events *events = dev->priv.events; + + return blocking_notifier_call_chain(&events->sw_nh, event, data); +} + +void mlx5_events_work_enqueue(struct mlx5_core_dev *dev, struct work_struct *work) +{ + queue_work(dev->priv.events->wq, work); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.c new file mode 100644 index 000000000..9a3707715 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.c @@ -0,0 +1,235 @@ +/* + * Copyright (c) 2017, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/etherdevice.h> +#include <linux/mlx5/driver.h> +#include <linux/mlx5/device.h> + +#include "mlx5_core.h" +#include "fpga/cmd.h" + +#define MLX5_FPGA_ACCESS_REG_SZ (MLX5_ST_SZ_DW(fpga_access_reg) + \ + MLX5_FPGA_ACCESS_REG_SIZE_MAX) + +int mlx5_fpga_access_reg(struct mlx5_core_dev *dev, u8 size, u64 addr, + void *buf, bool write) +{ + u32 in[MLX5_FPGA_ACCESS_REG_SZ] = {0}; + u32 out[MLX5_FPGA_ACCESS_REG_SZ]; + int err; + + if (size & 3) + return -EINVAL; + if (addr & 3) + return -EINVAL; + if (size > MLX5_FPGA_ACCESS_REG_SIZE_MAX) + return -EINVAL; + + MLX5_SET(fpga_access_reg, in, size, size); + MLX5_SET64(fpga_access_reg, in, address, addr); + if (write) + memcpy(MLX5_ADDR_OF(fpga_access_reg, in, data), buf, size); + + err = mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out), + MLX5_REG_FPGA_ACCESS_REG, 0, write); + if (err) + return err; + + if (!write) + memcpy(buf, MLX5_ADDR_OF(fpga_access_reg, out, data), size); + + return 0; +} + +int mlx5_fpga_caps(struct mlx5_core_dev *dev) +{ + u32 in[MLX5_ST_SZ_DW(fpga_cap)] = {0}; + + return mlx5_core_access_reg(dev, in, sizeof(in), dev->caps.fpga, + MLX5_ST_SZ_BYTES(fpga_cap), + MLX5_REG_FPGA_CAP, 0, 0); +} + +int mlx5_fpga_ctrl_op(struct mlx5_core_dev *dev, u8 op) +{ + u32 in[MLX5_ST_SZ_DW(fpga_ctrl)] = {0}; + u32 out[MLX5_ST_SZ_DW(fpga_ctrl)]; + + MLX5_SET(fpga_ctrl, in, operation, op); + + return mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out), + MLX5_REG_FPGA_CTRL, 0, true); +} + +int mlx5_fpga_sbu_caps(struct mlx5_core_dev *dev, void *caps, int size) +{ + unsigned int cap_size = MLX5_CAP_FPGA(dev, sandbox_extended_caps_len); + u64 addr = MLX5_CAP64_FPGA(dev, sandbox_extended_caps_addr); + unsigned int read; + int ret = 0; + + if (cap_size > size) { + mlx5_core_warn(dev, "Not enough buffer %u for FPGA SBU caps %u", + size, cap_size); + return -EINVAL; + } + + while (cap_size > 0) { + read = min_t(unsigned int, cap_size, + MLX5_FPGA_ACCESS_REG_SIZE_MAX); + + ret = mlx5_fpga_access_reg(dev, read, addr, caps, false); + if (ret) { + mlx5_core_warn(dev, "Error reading FPGA SBU caps %u bytes at address 0x%llx: %d", + read, addr, ret); + return ret; + } + + cap_size -= read; + addr += read; + caps += read; + } + + return ret; +} + +int mlx5_fpga_query(struct mlx5_core_dev *dev, struct mlx5_fpga_query *query) +{ + u32 in[MLX5_ST_SZ_DW(fpga_ctrl)] = {0}; + u32 out[MLX5_ST_SZ_DW(fpga_ctrl)]; + int err; + + err = mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out), + MLX5_REG_FPGA_CTRL, 0, false); + if (err) + return err; + + query->status = MLX5_GET(fpga_ctrl, out, status); + query->admin_image = MLX5_GET(fpga_ctrl, out, flash_select_admin); + query->oper_image = MLX5_GET(fpga_ctrl, out, flash_select_oper); + return 0; +} + +int mlx5_fpga_create_qp(struct mlx5_core_dev *dev, void *fpga_qpc, + u32 *fpga_qpn) +{ + u32 out[MLX5_ST_SZ_DW(fpga_create_qp_out)] = {}; + u32 in[MLX5_ST_SZ_DW(fpga_create_qp_in)] = {}; + int ret; + + MLX5_SET(fpga_create_qp_in, in, opcode, MLX5_CMD_OP_FPGA_CREATE_QP); + memcpy(MLX5_ADDR_OF(fpga_create_qp_in, in, fpga_qpc), fpga_qpc, + MLX5_FLD_SZ_BYTES(fpga_create_qp_in, fpga_qpc)); + + ret = mlx5_cmd_exec_inout(dev, fpga_create_qp, in, out); + if (ret) + return ret; + + memcpy(fpga_qpc, MLX5_ADDR_OF(fpga_create_qp_out, out, fpga_qpc), + MLX5_FLD_SZ_BYTES(fpga_create_qp_out, fpga_qpc)); + *fpga_qpn = MLX5_GET(fpga_create_qp_out, out, fpga_qpn); + return ret; +} + +int mlx5_fpga_modify_qp(struct mlx5_core_dev *dev, u32 fpga_qpn, + enum mlx5_fpga_qpc_field_select fields, + void *fpga_qpc) +{ + u32 in[MLX5_ST_SZ_DW(fpga_modify_qp_in)] = {}; + + MLX5_SET(fpga_modify_qp_in, in, opcode, MLX5_CMD_OP_FPGA_MODIFY_QP); + MLX5_SET(fpga_modify_qp_in, in, field_select, fields); + MLX5_SET(fpga_modify_qp_in, in, fpga_qpn, fpga_qpn); + memcpy(MLX5_ADDR_OF(fpga_modify_qp_in, in, fpga_qpc), fpga_qpc, + MLX5_FLD_SZ_BYTES(fpga_modify_qp_in, fpga_qpc)); + + return mlx5_cmd_exec_in(dev, fpga_modify_qp, in); +} + +int mlx5_fpga_query_qp(struct mlx5_core_dev *dev, + u32 fpga_qpn, void *fpga_qpc) +{ + u32 out[MLX5_ST_SZ_DW(fpga_query_qp_out)] = {}; + u32 in[MLX5_ST_SZ_DW(fpga_query_qp_in)] = {}; + int ret; + + MLX5_SET(fpga_query_qp_in, in, opcode, MLX5_CMD_OP_FPGA_QUERY_QP); + MLX5_SET(fpga_query_qp_in, in, fpga_qpn, fpga_qpn); + + ret = mlx5_cmd_exec_inout(dev, fpga_query_qp, in, out); + if (ret) + return ret; + + memcpy(fpga_qpc, MLX5_ADDR_OF(fpga_query_qp_out, out, fpga_qpc), + MLX5_FLD_SZ_BYTES(fpga_query_qp_out, fpga_qpc)); + return ret; +} + +int mlx5_fpga_destroy_qp(struct mlx5_core_dev *dev, u32 fpga_qpn) +{ + u32 in[MLX5_ST_SZ_DW(fpga_destroy_qp_in)] = {}; + + MLX5_SET(fpga_destroy_qp_in, in, opcode, MLX5_CMD_OP_FPGA_DESTROY_QP); + MLX5_SET(fpga_destroy_qp_in, in, fpga_qpn, fpga_qpn); + + return mlx5_cmd_exec_in(dev, fpga_destroy_qp, in); +} + +int mlx5_fpga_query_qp_counters(struct mlx5_core_dev *dev, u32 fpga_qpn, + bool clear, struct mlx5_fpga_qp_counters *data) +{ + u32 out[MLX5_ST_SZ_DW(fpga_query_qp_counters_out)] = {}; + u32 in[MLX5_ST_SZ_DW(fpga_query_qp_counters_in)] = {}; + int ret; + + MLX5_SET(fpga_query_qp_counters_in, in, opcode, + MLX5_CMD_OP_FPGA_QUERY_QP_COUNTERS); + MLX5_SET(fpga_query_qp_counters_in, in, clear, clear); + MLX5_SET(fpga_query_qp_counters_in, in, fpga_qpn, fpga_qpn); + + ret = mlx5_cmd_exec_inout(dev, fpga_query_qp_counters, in, out); + if (ret) + return ret; + + data->rx_ack_packets = MLX5_GET64(fpga_query_qp_counters_out, out, + rx_ack_packets); + data->rx_send_packets = MLX5_GET64(fpga_query_qp_counters_out, out, + rx_send_packets); + data->tx_ack_packets = MLX5_GET64(fpga_query_qp_counters_out, out, + tx_ack_packets); + data->tx_send_packets = MLX5_GET64(fpga_query_qp_counters_out, out, + tx_send_packets); + data->rx_total_drop = MLX5_GET64(fpga_query_qp_counters_out, out, + rx_total_drop); + + return ret; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.h b/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.h new file mode 100644 index 000000000..11621d265 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.h @@ -0,0 +1,91 @@ +/* + * Copyright (c) 2017, Mellanox Technologies, Ltd. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __MLX5_FPGA_H__ +#define __MLX5_FPGA_H__ + +#include <linux/mlx5/driver.h> + +enum mlx5_fpga_id { + MLX5_FPGA_NEWTON = 0, + MLX5_FPGA_EDISON = 1, + MLX5_FPGA_MORSE = 2, + MLX5_FPGA_MORSEQ = 3, +}; + +enum mlx5_fpga_image { + MLX5_FPGA_IMAGE_USER = 0, + MLX5_FPGA_IMAGE_FACTORY, +}; + +enum mlx5_fpga_status { + MLX5_FPGA_STATUS_SUCCESS = 0, + MLX5_FPGA_STATUS_FAILURE = 1, + MLX5_FPGA_STATUS_IN_PROGRESS = 2, + MLX5_FPGA_STATUS_NONE = 0xFFFF, +}; + +struct mlx5_fpga_query { + enum mlx5_fpga_image admin_image; + enum mlx5_fpga_image oper_image; + enum mlx5_fpga_status status; +}; + +enum mlx5_fpga_qpc_field_select { + MLX5_FPGA_QPC_STATE = BIT(0), +}; + +struct mlx5_fpga_qp_counters { + u64 rx_ack_packets; + u64 rx_send_packets; + u64 tx_ack_packets; + u64 tx_send_packets; + u64 rx_total_drop; +}; + +int mlx5_fpga_caps(struct mlx5_core_dev *dev); +int mlx5_fpga_query(struct mlx5_core_dev *dev, struct mlx5_fpga_query *query); +int mlx5_fpga_ctrl_op(struct mlx5_core_dev *dev, u8 op); +int mlx5_fpga_access_reg(struct mlx5_core_dev *dev, u8 size, u64 addr, + void *buf, bool write); +int mlx5_fpga_sbu_caps(struct mlx5_core_dev *dev, void *caps, int size); + +int mlx5_fpga_create_qp(struct mlx5_core_dev *dev, void *fpga_qpc, + u32 *fpga_qpn); +int mlx5_fpga_modify_qp(struct mlx5_core_dev *dev, u32 fpga_qpn, + enum mlx5_fpga_qpc_field_select fields, void *fpga_qpc); +int mlx5_fpga_query_qp(struct mlx5_core_dev *dev, u32 fpga_qpn, void *fpga_qpc); +int mlx5_fpga_query_qp_counters(struct mlx5_core_dev *dev, u32 fpga_qpn, + bool clear, struct mlx5_fpga_qp_counters *data); +int mlx5_fpga_destroy_qp(struct mlx5_core_dev *dev, u32 fpga_qpn); + +#endif /* __MLX5_FPGA_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c new file mode 100644 index 000000000..12abe9915 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c @@ -0,0 +1,1001 @@ +/* + * Copyright (c) 2017 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#include <net/addrconf.h> +#include <linux/etherdevice.h> +#include <linux/mlx5/vport.h> + +#include "mlx5_core.h" +#include "lib/mlx5.h" +#include "fpga/conn.h" + +#define MLX5_FPGA_PKEY 0xFFFF +#define MLX5_FPGA_PKEY_INDEX 0 /* RoCE PKEY 0xFFFF is always at index 0 */ +#define MLX5_FPGA_RECV_SIZE 2048 +#define MLX5_FPGA_PORT_NUM 1 +#define MLX5_FPGA_CQ_BUDGET 64 + +static int mlx5_fpga_conn_map_buf(struct mlx5_fpga_conn *conn, + struct mlx5_fpga_dma_buf *buf) +{ + struct device *dma_device; + int err = 0; + + if (unlikely(!buf->sg[0].data)) + goto out; + + dma_device = mlx5_core_dma_dev(conn->fdev->mdev); + buf->sg[0].dma_addr = dma_map_single(dma_device, buf->sg[0].data, + buf->sg[0].size, buf->dma_dir); + err = dma_mapping_error(dma_device, buf->sg[0].dma_addr); + if (unlikely(err)) { + mlx5_fpga_warn(conn->fdev, "DMA error on sg 0: %d\n", err); + err = -ENOMEM; + goto out; + } + + if (!buf->sg[1].data) + goto out; + + buf->sg[1].dma_addr = dma_map_single(dma_device, buf->sg[1].data, + buf->sg[1].size, buf->dma_dir); + err = dma_mapping_error(dma_device, buf->sg[1].dma_addr); + if (unlikely(err)) { + mlx5_fpga_warn(conn->fdev, "DMA error on sg 1: %d\n", err); + dma_unmap_single(dma_device, buf->sg[0].dma_addr, + buf->sg[0].size, buf->dma_dir); + err = -ENOMEM; + } + +out: + return err; +} + +static void mlx5_fpga_conn_unmap_buf(struct mlx5_fpga_conn *conn, + struct mlx5_fpga_dma_buf *buf) +{ + struct device *dma_device; + + dma_device = mlx5_core_dma_dev(conn->fdev->mdev); + if (buf->sg[1].data) + dma_unmap_single(dma_device, buf->sg[1].dma_addr, + buf->sg[1].size, buf->dma_dir); + + if (likely(buf->sg[0].data)) + dma_unmap_single(dma_device, buf->sg[0].dma_addr, + buf->sg[0].size, buf->dma_dir); +} + +static int mlx5_fpga_conn_post_recv(struct mlx5_fpga_conn *conn, + struct mlx5_fpga_dma_buf *buf) +{ + struct mlx5_wqe_data_seg *data; + unsigned int ix; + int err = 0; + + err = mlx5_fpga_conn_map_buf(conn, buf); + if (unlikely(err)) + goto out; + + if (unlikely(conn->qp.rq.pc - conn->qp.rq.cc >= conn->qp.rq.size)) { + mlx5_fpga_conn_unmap_buf(conn, buf); + return -EBUSY; + } + + ix = conn->qp.rq.pc & (conn->qp.rq.size - 1); + data = mlx5_wq_cyc_get_wqe(&conn->qp.wq.rq, ix); + data->byte_count = cpu_to_be32(buf->sg[0].size); + data->lkey = cpu_to_be32(conn->fdev->conn_res.mkey); + data->addr = cpu_to_be64(buf->sg[0].dma_addr); + + conn->qp.rq.pc++; + conn->qp.rq.bufs[ix] = buf; + + /* Make sure that descriptors are written before doorbell record. */ + dma_wmb(); + *conn->qp.wq.rq.db = cpu_to_be32(conn->qp.rq.pc & 0xffff); +out: + return err; +} + +static void mlx5_fpga_conn_notify_hw(struct mlx5_fpga_conn *conn, void *wqe) +{ + /* ensure wqe is visible to device before updating doorbell record */ + dma_wmb(); + *conn->qp.wq.sq.db = cpu_to_be32(conn->qp.sq.pc); + /* Make sure that doorbell record is visible before ringing */ + wmb(); + mlx5_write64(wqe, conn->fdev->conn_res.uar->map + MLX5_BF_OFFSET); +} + +static void mlx5_fpga_conn_post_send(struct mlx5_fpga_conn *conn, + struct mlx5_fpga_dma_buf *buf) +{ + struct mlx5_wqe_ctrl_seg *ctrl; + struct mlx5_wqe_data_seg *data; + unsigned int ix, sgi; + int size = 1; + + ix = conn->qp.sq.pc & (conn->qp.sq.size - 1); + + ctrl = mlx5_wq_cyc_get_wqe(&conn->qp.wq.sq, ix); + data = (void *)(ctrl + 1); + + for (sgi = 0; sgi < ARRAY_SIZE(buf->sg); sgi++) { + if (!buf->sg[sgi].data) + break; + data->byte_count = cpu_to_be32(buf->sg[sgi].size); + data->lkey = cpu_to_be32(conn->fdev->conn_res.mkey); + data->addr = cpu_to_be64(buf->sg[sgi].dma_addr); + data++; + size++; + } + + ctrl->imm = 0; + ctrl->fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE; + ctrl->opmod_idx_opcode = cpu_to_be32(((conn->qp.sq.pc & 0xffff) << 8) | + MLX5_OPCODE_SEND); + ctrl->qpn_ds = cpu_to_be32(size | (conn->qp.qpn << 8)); + + conn->qp.sq.pc++; + conn->qp.sq.bufs[ix] = buf; + mlx5_fpga_conn_notify_hw(conn, ctrl); +} + +int mlx5_fpga_conn_send(struct mlx5_fpga_conn *conn, + struct mlx5_fpga_dma_buf *buf) +{ + unsigned long flags; + int err; + + if (!conn->qp.active) + return -ENOTCONN; + + buf->dma_dir = DMA_TO_DEVICE; + err = mlx5_fpga_conn_map_buf(conn, buf); + if (err) + return err; + + spin_lock_irqsave(&conn->qp.sq.lock, flags); + + if (conn->qp.sq.pc - conn->qp.sq.cc >= conn->qp.sq.size) { + list_add_tail(&buf->list, &conn->qp.sq.backlog); + goto out_unlock; + } + + mlx5_fpga_conn_post_send(conn, buf); + +out_unlock: + spin_unlock_irqrestore(&conn->qp.sq.lock, flags); + return err; +} + +static int mlx5_fpga_conn_post_recv_buf(struct mlx5_fpga_conn *conn) +{ + struct mlx5_fpga_dma_buf *buf; + int err; + + buf = kzalloc(sizeof(*buf) + MLX5_FPGA_RECV_SIZE, 0); + if (!buf) + return -ENOMEM; + + buf->sg[0].data = (void *)(buf + 1); + buf->sg[0].size = MLX5_FPGA_RECV_SIZE; + buf->dma_dir = DMA_FROM_DEVICE; + + err = mlx5_fpga_conn_post_recv(conn, buf); + if (err) + kfree(buf); + + return err; +} + +static int mlx5_fpga_conn_create_mkey(struct mlx5_core_dev *mdev, u32 pdn, + u32 *mkey) +{ + int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); + void *mkc; + u32 *in; + int err; + + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); + MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_PA); + MLX5_SET(mkc, mkc, lw, 1); + MLX5_SET(mkc, mkc, lr, 1); + + MLX5_SET(mkc, mkc, pd, pdn); + MLX5_SET(mkc, mkc, length64, 1); + MLX5_SET(mkc, mkc, qpn, 0xffffff); + + err = mlx5_core_create_mkey(mdev, mkey, in, inlen); + + kvfree(in); + return err; +} + +static void mlx5_fpga_conn_rq_cqe(struct mlx5_fpga_conn *conn, + struct mlx5_cqe64 *cqe, u8 status) +{ + struct mlx5_fpga_dma_buf *buf; + int ix, err; + + ix = be16_to_cpu(cqe->wqe_counter) & (conn->qp.rq.size - 1); + buf = conn->qp.rq.bufs[ix]; + conn->qp.rq.bufs[ix] = NULL; + conn->qp.rq.cc++; + + if (unlikely(status && (status != MLX5_CQE_SYNDROME_WR_FLUSH_ERR))) + mlx5_fpga_warn(conn->fdev, "RQ buf %p on FPGA QP %u completion status %d\n", + buf, conn->fpga_qpn, status); + else + mlx5_fpga_dbg(conn->fdev, "RQ buf %p on FPGA QP %u completion status %d\n", + buf, conn->fpga_qpn, status); + + mlx5_fpga_conn_unmap_buf(conn, buf); + + if (unlikely(status || !conn->qp.active)) { + conn->qp.active = false; + kfree(buf); + return; + } + + buf->sg[0].size = be32_to_cpu(cqe->byte_cnt); + mlx5_fpga_dbg(conn->fdev, "Message with %u bytes received successfully\n", + buf->sg[0].size); + conn->recv_cb(conn->cb_arg, buf); + + buf->sg[0].size = MLX5_FPGA_RECV_SIZE; + err = mlx5_fpga_conn_post_recv(conn, buf); + if (unlikely(err)) { + mlx5_fpga_warn(conn->fdev, + "Failed to re-post recv buf: %d\n", err); + kfree(buf); + } +} + +static void mlx5_fpga_conn_sq_cqe(struct mlx5_fpga_conn *conn, + struct mlx5_cqe64 *cqe, u8 status) +{ + struct mlx5_fpga_dma_buf *buf, *nextbuf; + unsigned long flags; + int ix; + + spin_lock_irqsave(&conn->qp.sq.lock, flags); + + ix = be16_to_cpu(cqe->wqe_counter) & (conn->qp.sq.size - 1); + buf = conn->qp.sq.bufs[ix]; + conn->qp.sq.bufs[ix] = NULL; + conn->qp.sq.cc++; + + /* Handle backlog still under the spinlock to ensure message post order */ + if (unlikely(!list_empty(&conn->qp.sq.backlog))) { + if (likely(conn->qp.active)) { + nextbuf = list_first_entry(&conn->qp.sq.backlog, + struct mlx5_fpga_dma_buf, list); + list_del(&nextbuf->list); + mlx5_fpga_conn_post_send(conn, nextbuf); + } + } + + spin_unlock_irqrestore(&conn->qp.sq.lock, flags); + + if (unlikely(status && (status != MLX5_CQE_SYNDROME_WR_FLUSH_ERR))) + mlx5_fpga_warn(conn->fdev, "SQ buf %p on FPGA QP %u completion status %d\n", + buf, conn->fpga_qpn, status); + else + mlx5_fpga_dbg(conn->fdev, "SQ buf %p on FPGA QP %u completion status %d\n", + buf, conn->fpga_qpn, status); + + mlx5_fpga_conn_unmap_buf(conn, buf); + + if (likely(buf->complete)) + buf->complete(conn, conn->fdev, buf, status); + + if (unlikely(status)) + conn->qp.active = false; +} + +static void mlx5_fpga_conn_handle_cqe(struct mlx5_fpga_conn *conn, + struct mlx5_cqe64 *cqe) +{ + u8 opcode, status = 0; + + opcode = get_cqe_opcode(cqe); + + switch (opcode) { + case MLX5_CQE_REQ_ERR: + status = ((struct mlx5_err_cqe *)cqe)->syndrome; + fallthrough; + case MLX5_CQE_REQ: + mlx5_fpga_conn_sq_cqe(conn, cqe, status); + break; + + case MLX5_CQE_RESP_ERR: + status = ((struct mlx5_err_cqe *)cqe)->syndrome; + fallthrough; + case MLX5_CQE_RESP_SEND: + mlx5_fpga_conn_rq_cqe(conn, cqe, status); + break; + default: + mlx5_fpga_warn(conn->fdev, "Unexpected cqe opcode %u\n", + opcode); + } +} + +static void mlx5_fpga_conn_arm_cq(struct mlx5_fpga_conn *conn) +{ + mlx5_cq_arm(&conn->cq.mcq, MLX5_CQ_DB_REQ_NOT, + conn->fdev->conn_res.uar->map, conn->cq.wq.cc); +} + +static inline void mlx5_fpga_conn_cqes(struct mlx5_fpga_conn *conn, + unsigned int budget) +{ + struct mlx5_cqe64 *cqe; + + while (budget) { + cqe = mlx5_cqwq_get_cqe(&conn->cq.wq); + if (!cqe) + break; + + budget--; + mlx5_cqwq_pop(&conn->cq.wq); + mlx5_fpga_conn_handle_cqe(conn, cqe); + mlx5_cqwq_update_db_record(&conn->cq.wq); + } + if (!budget) { + tasklet_schedule(&conn->cq.tasklet); + return; + } + + mlx5_fpga_dbg(conn->fdev, "Re-arming CQ with cc# %u\n", conn->cq.wq.cc); + /* ensure cq space is freed before enabling more cqes */ + wmb(); + mlx5_fpga_conn_arm_cq(conn); +} + +static void mlx5_fpga_conn_cq_tasklet(struct tasklet_struct *t) +{ + struct mlx5_fpga_conn *conn = from_tasklet(conn, t, cq.tasklet); + + if (unlikely(!conn->qp.active)) + return; + mlx5_fpga_conn_cqes(conn, MLX5_FPGA_CQ_BUDGET); +} + +static void mlx5_fpga_conn_cq_complete(struct mlx5_core_cq *mcq, + struct mlx5_eqe *eqe) +{ + struct mlx5_fpga_conn *conn; + + conn = container_of(mcq, struct mlx5_fpga_conn, cq.mcq); + if (unlikely(!conn->qp.active)) + return; + mlx5_fpga_conn_cqes(conn, MLX5_FPGA_CQ_BUDGET); +} + +static int mlx5_fpga_conn_create_cq(struct mlx5_fpga_conn *conn, int cq_size) +{ + struct mlx5_fpga_device *fdev = conn->fdev; + struct mlx5_core_dev *mdev = fdev->mdev; + u32 temp_cqc[MLX5_ST_SZ_DW(cqc)] = {0}; + u32 out[MLX5_ST_SZ_DW(create_cq_out)]; + struct mlx5_wq_param wqp; + struct mlx5_cqe64 *cqe; + int inlen, err, eqn; + void *cqc, *in; + __be64 *pas; + u32 i; + + cq_size = roundup_pow_of_two(cq_size); + MLX5_SET(cqc, temp_cqc, log_cq_size, ilog2(cq_size)); + + wqp.buf_numa_node = mdev->priv.numa_node; + wqp.db_numa_node = mdev->priv.numa_node; + + err = mlx5_cqwq_create(mdev, &wqp, temp_cqc, &conn->cq.wq, + &conn->cq.wq_ctrl); + if (err) + return err; + + for (i = 0; i < mlx5_cqwq_get_size(&conn->cq.wq); i++) { + cqe = mlx5_cqwq_get_wqe(&conn->cq.wq, i); + cqe->op_own = MLX5_CQE_INVALID << 4 | MLX5_CQE_OWNER_MASK; + } + + inlen = MLX5_ST_SZ_BYTES(create_cq_in) + + sizeof(u64) * conn->cq.wq_ctrl.buf.npages; + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) { + err = -ENOMEM; + goto err_cqwq; + } + + err = mlx5_vector2eqn(mdev, smp_processor_id(), &eqn); + if (err) { + kvfree(in); + goto err_cqwq; + } + + cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context); + MLX5_SET(cqc, cqc, log_cq_size, ilog2(cq_size)); + MLX5_SET(cqc, cqc, c_eqn_or_apu_element, eqn); + MLX5_SET(cqc, cqc, uar_page, fdev->conn_res.uar->index); + MLX5_SET(cqc, cqc, log_page_size, conn->cq.wq_ctrl.buf.page_shift - + MLX5_ADAPTER_PAGE_SHIFT); + MLX5_SET64(cqc, cqc, dbr_addr, conn->cq.wq_ctrl.db.dma); + + pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas); + mlx5_fill_page_frag_array(&conn->cq.wq_ctrl.buf, pas); + + err = mlx5_core_create_cq(mdev, &conn->cq.mcq, in, inlen, out, sizeof(out)); + kvfree(in); + + if (err) + goto err_cqwq; + + conn->cq.mcq.cqe_sz = 64; + conn->cq.mcq.set_ci_db = conn->cq.wq_ctrl.db.db; + conn->cq.mcq.arm_db = conn->cq.wq_ctrl.db.db + 1; + *conn->cq.mcq.set_ci_db = 0; + *conn->cq.mcq.arm_db = 0; + conn->cq.mcq.vector = 0; + conn->cq.mcq.comp = mlx5_fpga_conn_cq_complete; + conn->cq.mcq.uar = fdev->conn_res.uar; + tasklet_setup(&conn->cq.tasklet, mlx5_fpga_conn_cq_tasklet); + + mlx5_fpga_dbg(fdev, "Created CQ #0x%x\n", conn->cq.mcq.cqn); + + goto out; + +err_cqwq: + mlx5_wq_destroy(&conn->cq.wq_ctrl); +out: + return err; +} + +static void mlx5_fpga_conn_destroy_cq(struct mlx5_fpga_conn *conn) +{ + tasklet_disable(&conn->cq.tasklet); + tasklet_kill(&conn->cq.tasklet); + mlx5_core_destroy_cq(conn->fdev->mdev, &conn->cq.mcq); + mlx5_wq_destroy(&conn->cq.wq_ctrl); +} + +static int mlx5_fpga_conn_create_wq(struct mlx5_fpga_conn *conn, void *qpc) +{ + struct mlx5_fpga_device *fdev = conn->fdev; + struct mlx5_core_dev *mdev = fdev->mdev; + struct mlx5_wq_param wqp; + + wqp.buf_numa_node = mdev->priv.numa_node; + wqp.db_numa_node = mdev->priv.numa_node; + + return mlx5_wq_qp_create(mdev, &wqp, qpc, &conn->qp.wq, + &conn->qp.wq_ctrl); +} + +static int mlx5_fpga_conn_create_qp(struct mlx5_fpga_conn *conn, + unsigned int tx_size, unsigned int rx_size) +{ + struct mlx5_fpga_device *fdev = conn->fdev; + u32 out[MLX5_ST_SZ_DW(create_qp_out)] = {}; + struct mlx5_core_dev *mdev = fdev->mdev; + u32 temp_qpc[MLX5_ST_SZ_DW(qpc)] = {}; + void *in = NULL, *qpc; + int err, inlen; + + conn->qp.rq.pc = 0; + conn->qp.rq.cc = 0; + conn->qp.rq.size = roundup_pow_of_two(rx_size); + conn->qp.sq.pc = 0; + conn->qp.sq.cc = 0; + conn->qp.sq.size = roundup_pow_of_two(tx_size); + + MLX5_SET(qpc, temp_qpc, log_rq_stride, ilog2(MLX5_SEND_WQE_DS) - 4); + MLX5_SET(qpc, temp_qpc, log_rq_size, ilog2(conn->qp.rq.size)); + MLX5_SET(qpc, temp_qpc, log_sq_size, ilog2(conn->qp.sq.size)); + err = mlx5_fpga_conn_create_wq(conn, temp_qpc); + if (err) + goto out; + + conn->qp.rq.bufs = kvcalloc(conn->qp.rq.size, + sizeof(conn->qp.rq.bufs[0]), + GFP_KERNEL); + if (!conn->qp.rq.bufs) { + err = -ENOMEM; + goto err_wq; + } + + conn->qp.sq.bufs = kvcalloc(conn->qp.sq.size, + sizeof(conn->qp.sq.bufs[0]), + GFP_KERNEL); + if (!conn->qp.sq.bufs) { + err = -ENOMEM; + goto err_rq_bufs; + } + + inlen = MLX5_ST_SZ_BYTES(create_qp_in) + + MLX5_FLD_SZ_BYTES(create_qp_in, pas[0]) * + conn->qp.wq_ctrl.buf.npages; + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) { + err = -ENOMEM; + goto err_sq_bufs; + } + + qpc = MLX5_ADDR_OF(create_qp_in, in, qpc); + MLX5_SET(qpc, qpc, uar_page, fdev->conn_res.uar->index); + MLX5_SET(qpc, qpc, log_page_size, + conn->qp.wq_ctrl.buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT); + MLX5_SET(qpc, qpc, fre, 1); + MLX5_SET(qpc, qpc, rlky, 1); + MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC); + MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED); + MLX5_SET(qpc, qpc, pd, fdev->conn_res.pdn); + MLX5_SET(qpc, qpc, log_rq_stride, ilog2(MLX5_SEND_WQE_DS) - 4); + MLX5_SET(qpc, qpc, log_rq_size, ilog2(conn->qp.rq.size)); + MLX5_SET(qpc, qpc, rq_type, MLX5_NON_ZERO_RQ); + MLX5_SET(qpc, qpc, log_sq_size, ilog2(conn->qp.sq.size)); + MLX5_SET(qpc, qpc, cqn_snd, conn->cq.mcq.cqn); + MLX5_SET(qpc, qpc, cqn_rcv, conn->cq.mcq.cqn); + MLX5_SET(qpc, qpc, ts_format, mlx5_get_qp_default_ts(mdev)); + MLX5_SET64(qpc, qpc, dbr_addr, conn->qp.wq_ctrl.db.dma); + if (MLX5_CAP_GEN(mdev, cqe_version) == 1) + MLX5_SET(qpc, qpc, user_index, 0xFFFFFF); + + mlx5_fill_page_frag_array(&conn->qp.wq_ctrl.buf, + (__be64 *)MLX5_ADDR_OF(create_qp_in, in, pas)); + + MLX5_SET(create_qp_in, in, opcode, MLX5_CMD_OP_CREATE_QP); + err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out)); + if (err) + goto err_sq_bufs; + + conn->qp.qpn = MLX5_GET(create_qp_out, out, qpn); + mlx5_fpga_dbg(fdev, "Created QP #0x%x\n", conn->qp.qpn); + + goto out; + +err_sq_bufs: + kvfree(conn->qp.sq.bufs); +err_rq_bufs: + kvfree(conn->qp.rq.bufs); +err_wq: + mlx5_wq_destroy(&conn->qp.wq_ctrl); +out: + kvfree(in); + return err; +} + +static void mlx5_fpga_conn_free_recv_bufs(struct mlx5_fpga_conn *conn) +{ + int ix; + + for (ix = 0; ix < conn->qp.rq.size; ix++) { + if (!conn->qp.rq.bufs[ix]) + continue; + mlx5_fpga_conn_unmap_buf(conn, conn->qp.rq.bufs[ix]); + kfree(conn->qp.rq.bufs[ix]); + conn->qp.rq.bufs[ix] = NULL; + } +} + +static void mlx5_fpga_conn_flush_send_bufs(struct mlx5_fpga_conn *conn) +{ + struct mlx5_fpga_dma_buf *buf, *temp; + int ix; + + for (ix = 0; ix < conn->qp.sq.size; ix++) { + buf = conn->qp.sq.bufs[ix]; + if (!buf) + continue; + conn->qp.sq.bufs[ix] = NULL; + mlx5_fpga_conn_unmap_buf(conn, buf); + if (!buf->complete) + continue; + buf->complete(conn, conn->fdev, buf, MLX5_CQE_SYNDROME_WR_FLUSH_ERR); + } + list_for_each_entry_safe(buf, temp, &conn->qp.sq.backlog, list) { + mlx5_fpga_conn_unmap_buf(conn, buf); + if (!buf->complete) + continue; + buf->complete(conn, conn->fdev, buf, MLX5_CQE_SYNDROME_WR_FLUSH_ERR); + } +} + +static void mlx5_fpga_conn_destroy_qp(struct mlx5_fpga_conn *conn) +{ + struct mlx5_core_dev *dev = conn->fdev->mdev; + u32 in[MLX5_ST_SZ_DW(destroy_qp_in)] = {}; + + MLX5_SET(destroy_qp_in, in, opcode, MLX5_CMD_OP_DESTROY_QP); + MLX5_SET(destroy_qp_in, in, qpn, conn->qp.qpn); + mlx5_cmd_exec_in(dev, destroy_qp, in); + + mlx5_fpga_conn_free_recv_bufs(conn); + mlx5_fpga_conn_flush_send_bufs(conn); + kvfree(conn->qp.sq.bufs); + kvfree(conn->qp.rq.bufs); + mlx5_wq_destroy(&conn->qp.wq_ctrl); +} + +static int mlx5_fpga_conn_reset_qp(struct mlx5_fpga_conn *conn) +{ + struct mlx5_core_dev *mdev = conn->fdev->mdev; + u32 in[MLX5_ST_SZ_DW(qp_2rst_in)] = {}; + + mlx5_fpga_dbg(conn->fdev, "Modifying QP %u to RST\n", conn->qp.qpn); + + MLX5_SET(qp_2rst_in, in, opcode, MLX5_CMD_OP_2RST_QP); + MLX5_SET(qp_2rst_in, in, qpn, conn->qp.qpn); + + return mlx5_cmd_exec_in(mdev, qp_2rst, in); +} + +static int mlx5_fpga_conn_init_qp(struct mlx5_fpga_conn *conn) +{ + u32 in[MLX5_ST_SZ_DW(rst2init_qp_in)] = {}; + struct mlx5_fpga_device *fdev = conn->fdev; + struct mlx5_core_dev *mdev = fdev->mdev; + u32 *qpc; + + mlx5_fpga_dbg(conn->fdev, "Modifying QP %u to INIT\n", conn->qp.qpn); + + qpc = MLX5_ADDR_OF(rst2init_qp_in, in, qpc); + + MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC); + MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED); + MLX5_SET(qpc, qpc, primary_address_path.pkey_index, MLX5_FPGA_PKEY_INDEX); + MLX5_SET(qpc, qpc, primary_address_path.vhca_port_num, MLX5_FPGA_PORT_NUM); + MLX5_SET(qpc, qpc, pd, conn->fdev->conn_res.pdn); + MLX5_SET(qpc, qpc, cqn_snd, conn->cq.mcq.cqn); + MLX5_SET(qpc, qpc, cqn_rcv, conn->cq.mcq.cqn); + MLX5_SET64(qpc, qpc, dbr_addr, conn->qp.wq_ctrl.db.dma); + + MLX5_SET(rst2init_qp_in, in, opcode, MLX5_CMD_OP_RST2INIT_QP); + MLX5_SET(rst2init_qp_in, in, qpn, conn->qp.qpn); + + return mlx5_cmd_exec_in(mdev, rst2init_qp, in); +} + +static int mlx5_fpga_conn_rtr_qp(struct mlx5_fpga_conn *conn) +{ + u32 in[MLX5_ST_SZ_DW(init2rtr_qp_in)] = {}; + struct mlx5_fpga_device *fdev = conn->fdev; + struct mlx5_core_dev *mdev = fdev->mdev; + u32 *qpc; + + mlx5_fpga_dbg(conn->fdev, "QP RTR\n"); + + qpc = MLX5_ADDR_OF(init2rtr_qp_in, in, qpc); + + MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_1K_BYTES); + MLX5_SET(qpc, qpc, log_msg_max, (u8)MLX5_CAP_GEN(mdev, log_max_msg)); + MLX5_SET(qpc, qpc, remote_qpn, conn->fpga_qpn); + MLX5_SET(qpc, qpc, next_rcv_psn, + MLX5_GET(fpga_qpc, conn->fpga_qpc, next_send_psn)); + MLX5_SET(qpc, qpc, primary_address_path.pkey_index, MLX5_FPGA_PKEY_INDEX); + MLX5_SET(qpc, qpc, primary_address_path.vhca_port_num, MLX5_FPGA_PORT_NUM); + ether_addr_copy(MLX5_ADDR_OF(qpc, qpc, primary_address_path.rmac_47_32), + MLX5_ADDR_OF(fpga_qpc, conn->fpga_qpc, fpga_mac_47_32)); + MLX5_SET(qpc, qpc, primary_address_path.udp_sport, + MLX5_CAP_ROCE(mdev, r_roce_min_src_udp_port)); + MLX5_SET(qpc, qpc, primary_address_path.src_addr_index, + conn->qp.sgid_index); + MLX5_SET(qpc, qpc, primary_address_path.hop_limit, 0); + memcpy(MLX5_ADDR_OF(qpc, qpc, primary_address_path.rgid_rip), + MLX5_ADDR_OF(fpga_qpc, conn->fpga_qpc, fpga_ip), + MLX5_FLD_SZ_BYTES(qpc, primary_address_path.rgid_rip)); + + MLX5_SET(init2rtr_qp_in, in, opcode, MLX5_CMD_OP_INIT2RTR_QP); + MLX5_SET(init2rtr_qp_in, in, qpn, conn->qp.qpn); + + return mlx5_cmd_exec_in(mdev, init2rtr_qp, in); +} + +static int mlx5_fpga_conn_rts_qp(struct mlx5_fpga_conn *conn) +{ + struct mlx5_fpga_device *fdev = conn->fdev; + u32 in[MLX5_ST_SZ_DW(rtr2rts_qp_in)] = {}; + struct mlx5_core_dev *mdev = fdev->mdev; + u32 *qpc; + + mlx5_fpga_dbg(conn->fdev, "QP RTS\n"); + + qpc = MLX5_ADDR_OF(rtr2rts_qp_in, in, qpc); + + MLX5_SET(qpc, qpc, log_ack_req_freq, 8); + MLX5_SET(qpc, qpc, min_rnr_nak, 0x12); + MLX5_SET(qpc, qpc, primary_address_path.ack_timeout, 0x12); /* ~1.07s */ + MLX5_SET(qpc, qpc, next_send_psn, + MLX5_GET(fpga_qpc, conn->fpga_qpc, next_rcv_psn)); + MLX5_SET(qpc, qpc, retry_count, 7); + MLX5_SET(qpc, qpc, rnr_retry, 7); /* Infinite retry if RNR NACK */ + + MLX5_SET(rtr2rts_qp_in, in, opcode, MLX5_CMD_OP_RTR2RTS_QP); + MLX5_SET(rtr2rts_qp_in, in, qpn, conn->qp.qpn); + MLX5_SET(rtr2rts_qp_in, in, opt_param_mask, MLX5_QP_OPTPAR_RNR_TIMEOUT); + + return mlx5_cmd_exec_in(mdev, rtr2rts_qp, in); +} + +static int mlx5_fpga_conn_connect(struct mlx5_fpga_conn *conn) +{ + struct mlx5_fpga_device *fdev = conn->fdev; + int err; + + MLX5_SET(fpga_qpc, conn->fpga_qpc, state, MLX5_FPGA_QPC_STATE_ACTIVE); + err = mlx5_fpga_modify_qp(conn->fdev->mdev, conn->fpga_qpn, + MLX5_FPGA_QPC_STATE, &conn->fpga_qpc); + if (err) { + mlx5_fpga_err(fdev, "Failed to activate FPGA RC QP: %d\n", err); + goto out; + } + + err = mlx5_fpga_conn_reset_qp(conn); + if (err) { + mlx5_fpga_err(fdev, "Failed to change QP state to reset\n"); + goto err_fpga_qp; + } + + err = mlx5_fpga_conn_init_qp(conn); + if (err) { + mlx5_fpga_err(fdev, "Failed to modify QP from RESET to INIT\n"); + goto err_fpga_qp; + } + conn->qp.active = true; + + while (!mlx5_fpga_conn_post_recv_buf(conn)) + ; + + err = mlx5_fpga_conn_rtr_qp(conn); + if (err) { + mlx5_fpga_err(fdev, "Failed to change QP state from INIT to RTR\n"); + goto err_recv_bufs; + } + + err = mlx5_fpga_conn_rts_qp(conn); + if (err) { + mlx5_fpga_err(fdev, "Failed to change QP state from RTR to RTS\n"); + goto err_recv_bufs; + } + goto out; + +err_recv_bufs: + mlx5_fpga_conn_free_recv_bufs(conn); +err_fpga_qp: + MLX5_SET(fpga_qpc, conn->fpga_qpc, state, MLX5_FPGA_QPC_STATE_INIT); + if (mlx5_fpga_modify_qp(conn->fdev->mdev, conn->fpga_qpn, + MLX5_FPGA_QPC_STATE, &conn->fpga_qpc)) + mlx5_fpga_err(fdev, "Failed to revert FPGA QP to INIT\n"); +out: + return err; +} + +struct mlx5_fpga_conn *mlx5_fpga_conn_create(struct mlx5_fpga_device *fdev, + struct mlx5_fpga_conn_attr *attr, + enum mlx5_ifc_fpga_qp_type qp_type) +{ + struct mlx5_fpga_conn *ret, *conn; + u8 *remote_mac, *remote_ip; + int err; + + if (!attr->recv_cb) + return ERR_PTR(-EINVAL); + + conn = kzalloc(sizeof(*conn), GFP_KERNEL); + if (!conn) + return ERR_PTR(-ENOMEM); + + conn->fdev = fdev; + INIT_LIST_HEAD(&conn->qp.sq.backlog); + + spin_lock_init(&conn->qp.sq.lock); + + conn->recv_cb = attr->recv_cb; + conn->cb_arg = attr->cb_arg; + + remote_mac = MLX5_ADDR_OF(fpga_qpc, conn->fpga_qpc, remote_mac_47_32); + err = mlx5_query_mac_address(fdev->mdev, remote_mac); + if (err) { + mlx5_fpga_err(fdev, "Failed to query local MAC: %d\n", err); + ret = ERR_PTR(err); + goto err; + } + + /* Build Modified EUI-64 IPv6 address from the MAC address */ + remote_ip = MLX5_ADDR_OF(fpga_qpc, conn->fpga_qpc, remote_ip); + remote_ip[0] = 0xfe; + remote_ip[1] = 0x80; + addrconf_addr_eui48(&remote_ip[8], remote_mac); + + err = mlx5_core_reserved_gid_alloc(fdev->mdev, &conn->qp.sgid_index); + if (err) { + mlx5_fpga_err(fdev, "Failed to allocate SGID: %d\n", err); + ret = ERR_PTR(err); + goto err; + } + + err = mlx5_core_roce_gid_set(fdev->mdev, conn->qp.sgid_index, + MLX5_ROCE_VERSION_2, + MLX5_ROCE_L3_TYPE_IPV6, + remote_ip, remote_mac, true, 0, + MLX5_FPGA_PORT_NUM); + if (err) { + mlx5_fpga_err(fdev, "Failed to set SGID: %d\n", err); + ret = ERR_PTR(err); + goto err_rsvd_gid; + } + mlx5_fpga_dbg(fdev, "Reserved SGID index %u\n", conn->qp.sgid_index); + + /* Allow for one cqe per rx/tx wqe, plus one cqe for the next wqe, + * created during processing of the cqe + */ + err = mlx5_fpga_conn_create_cq(conn, + (attr->tx_size + attr->rx_size) * 2); + if (err) { + mlx5_fpga_err(fdev, "Failed to create CQ: %d\n", err); + ret = ERR_PTR(err); + goto err_gid; + } + + mlx5_fpga_conn_arm_cq(conn); + + err = mlx5_fpga_conn_create_qp(conn, attr->tx_size, attr->rx_size); + if (err) { + mlx5_fpga_err(fdev, "Failed to create QP: %d\n", err); + ret = ERR_PTR(err); + goto err_cq; + } + + MLX5_SET(fpga_qpc, conn->fpga_qpc, state, MLX5_FPGA_QPC_STATE_INIT); + MLX5_SET(fpga_qpc, conn->fpga_qpc, qp_type, qp_type); + MLX5_SET(fpga_qpc, conn->fpga_qpc, st, MLX5_FPGA_QPC_ST_RC); + MLX5_SET(fpga_qpc, conn->fpga_qpc, ether_type, ETH_P_8021Q); + MLX5_SET(fpga_qpc, conn->fpga_qpc, vid, 0); + MLX5_SET(fpga_qpc, conn->fpga_qpc, next_rcv_psn, 1); + MLX5_SET(fpga_qpc, conn->fpga_qpc, next_send_psn, 0); + MLX5_SET(fpga_qpc, conn->fpga_qpc, pkey, MLX5_FPGA_PKEY); + MLX5_SET(fpga_qpc, conn->fpga_qpc, remote_qpn, conn->qp.qpn); + MLX5_SET(fpga_qpc, conn->fpga_qpc, rnr_retry, 7); + MLX5_SET(fpga_qpc, conn->fpga_qpc, retry_count, 7); + + err = mlx5_fpga_create_qp(fdev->mdev, &conn->fpga_qpc, + &conn->fpga_qpn); + if (err) { + mlx5_fpga_err(fdev, "Failed to create FPGA RC QP: %d\n", err); + ret = ERR_PTR(err); + goto err_qp; + } + + err = mlx5_fpga_conn_connect(conn); + if (err) { + ret = ERR_PTR(err); + goto err_conn; + } + + mlx5_fpga_dbg(fdev, "FPGA QPN is %u\n", conn->fpga_qpn); + ret = conn; + goto out; + +err_conn: + mlx5_fpga_destroy_qp(conn->fdev->mdev, conn->fpga_qpn); +err_qp: + mlx5_fpga_conn_destroy_qp(conn); +err_cq: + mlx5_fpga_conn_destroy_cq(conn); +err_gid: + mlx5_core_roce_gid_set(fdev->mdev, conn->qp.sgid_index, 0, 0, NULL, + NULL, false, 0, MLX5_FPGA_PORT_NUM); +err_rsvd_gid: + mlx5_core_reserved_gid_free(fdev->mdev, conn->qp.sgid_index); +err: + kfree(conn); +out: + return ret; +} + +void mlx5_fpga_conn_destroy(struct mlx5_fpga_conn *conn) +{ + conn->qp.active = false; + tasklet_disable(&conn->cq.tasklet); + synchronize_irq(conn->cq.mcq.irqn); + + mlx5_fpga_destroy_qp(conn->fdev->mdev, conn->fpga_qpn); + mlx5_fpga_conn_destroy_qp(conn); + mlx5_fpga_conn_destroy_cq(conn); + + mlx5_core_roce_gid_set(conn->fdev->mdev, conn->qp.sgid_index, 0, 0, + NULL, NULL, false, 0, MLX5_FPGA_PORT_NUM); + mlx5_core_reserved_gid_free(conn->fdev->mdev, conn->qp.sgid_index); + kfree(conn); +} + +int mlx5_fpga_conn_device_init(struct mlx5_fpga_device *fdev) +{ + int err; + + err = mlx5_nic_vport_enable_roce(fdev->mdev); + if (err) { + mlx5_fpga_err(fdev, "Failed to enable RoCE: %d\n", err); + goto out; + } + + fdev->conn_res.uar = mlx5_get_uars_page(fdev->mdev); + if (IS_ERR(fdev->conn_res.uar)) { + err = PTR_ERR(fdev->conn_res.uar); + mlx5_fpga_err(fdev, "get_uars_page failed, %d\n", err); + goto err_roce; + } + mlx5_fpga_dbg(fdev, "Allocated UAR index %u\n", + fdev->conn_res.uar->index); + + err = mlx5_core_alloc_pd(fdev->mdev, &fdev->conn_res.pdn); + if (err) { + mlx5_fpga_err(fdev, "alloc pd failed, %d\n", err); + goto err_uar; + } + mlx5_fpga_dbg(fdev, "Allocated PD %u\n", fdev->conn_res.pdn); + + err = mlx5_fpga_conn_create_mkey(fdev->mdev, fdev->conn_res.pdn, + &fdev->conn_res.mkey); + if (err) { + mlx5_fpga_err(fdev, "create mkey failed, %d\n", err); + goto err_dealloc_pd; + } + mlx5_fpga_dbg(fdev, "Created mkey 0x%x\n", fdev->conn_res.mkey); + + return 0; + +err_dealloc_pd: + mlx5_core_dealloc_pd(fdev->mdev, fdev->conn_res.pdn); +err_uar: + mlx5_put_uars_page(fdev->mdev, fdev->conn_res.uar); +err_roce: + mlx5_nic_vport_disable_roce(fdev->mdev); +out: + return err; +} + +void mlx5_fpga_conn_device_cleanup(struct mlx5_fpga_device *fdev) +{ + mlx5_core_destroy_mkey(fdev->mdev, fdev->conn_res.mkey); + mlx5_core_dealloc_pd(fdev->mdev, fdev->conn_res.pdn); + mlx5_put_uars_page(fdev->mdev, fdev->conn_res.uar); + mlx5_nic_vport_disable_roce(fdev->mdev); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.h b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.h new file mode 100644 index 000000000..5116e869a --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.h @@ -0,0 +1,96 @@ +/* + * Copyright (c) 2017 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#ifndef __MLX5_FPGA_CONN_H__ +#define __MLX5_FPGA_CONN_H__ + +#include <linux/mlx5/cq.h> +#include <linux/mlx5/qp.h> + +#include "fpga/core.h" +#include "fpga/sdk.h" +#include "wq.h" + +struct mlx5_fpga_conn { + struct mlx5_fpga_device *fdev; + + void (*recv_cb)(void *cb_arg, struct mlx5_fpga_dma_buf *buf); + void *cb_arg; + + /* FPGA QP */ + u32 fpga_qpc[MLX5_ST_SZ_DW(fpga_qpc)]; + u32 fpga_qpn; + + /* CQ */ + struct { + struct mlx5_cqwq wq; + struct mlx5_wq_ctrl wq_ctrl; + struct mlx5_core_cq mcq; + struct tasklet_struct tasklet; + } cq; + + /* QP */ + struct { + bool active; + int sgid_index; + struct mlx5_wq_qp wq; + struct mlx5_wq_ctrl wq_ctrl; + u32 qpn; + struct { + spinlock_t lock; /* Protects all SQ state */ + unsigned int pc; + unsigned int cc; + unsigned int size; + struct mlx5_fpga_dma_buf **bufs; + struct list_head backlog; + } sq; + struct { + unsigned int pc; + unsigned int cc; + unsigned int size; + struct mlx5_fpga_dma_buf **bufs; + } rq; + } qp; +}; + +int mlx5_fpga_conn_device_init(struct mlx5_fpga_device *fdev); +void mlx5_fpga_conn_device_cleanup(struct mlx5_fpga_device *fdev); +struct mlx5_fpga_conn * +mlx5_fpga_conn_create(struct mlx5_fpga_device *fdev, + struct mlx5_fpga_conn_attr *attr, + enum mlx5_ifc_fpga_qp_type qp_type); +void mlx5_fpga_conn_destroy(struct mlx5_fpga_conn *conn); +int mlx5_fpga_conn_send(struct mlx5_fpga_conn *conn, + struct mlx5_fpga_dma_buf *buf); + +#endif /* __MLX5_FPGA_CONN_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c new file mode 100644 index 000000000..39c03dcbd --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c @@ -0,0 +1,375 @@ +/* + * Copyright (c) 2017, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/etherdevice.h> +#include <linux/mlx5/driver.h> + +#include "mlx5_core.h" +#include "lib/mlx5.h" +#include "lib/eq.h" +#include "fpga/core.h" +#include "fpga/conn.h" + +static const char *const mlx5_fpga_error_strings[] = { + "Null Syndrome", + "Corrupted DDR", + "Flash Timeout", + "Internal Link Error", + "Watchdog HW Failure", + "I2C Failure", + "Image Changed", + "Temperature Critical", +}; + +static const char * const mlx5_fpga_qp_error_strings[] = { + "Null Syndrome", + "Retry Counter Expired", + "RNR Expired", +}; +static struct mlx5_fpga_device *mlx5_fpga_device_alloc(void) +{ + struct mlx5_fpga_device *fdev = NULL; + + fdev = kzalloc(sizeof(*fdev), GFP_KERNEL); + if (!fdev) + return NULL; + + spin_lock_init(&fdev->state_lock); + fdev->state = MLX5_FPGA_STATUS_NONE; + return fdev; +} + +static const char *mlx5_fpga_image_name(enum mlx5_fpga_image image) +{ + switch (image) { + case MLX5_FPGA_IMAGE_USER: + return "user"; + case MLX5_FPGA_IMAGE_FACTORY: + return "factory"; + default: + return "unknown"; + } +} + +static const char *mlx5_fpga_name(u32 fpga_id) +{ + static char ret[32]; + + switch (fpga_id) { + case MLX5_FPGA_NEWTON: + return "Newton"; + case MLX5_FPGA_EDISON: + return "Edison"; + case MLX5_FPGA_MORSE: + return "Morse"; + case MLX5_FPGA_MORSEQ: + return "MorseQ"; + } + + snprintf(ret, sizeof(ret), "Unknown %d", fpga_id); + return ret; +} + +static int mlx5_is_fpga_lookaside(u32 fpga_id) +{ + return fpga_id != MLX5_FPGA_NEWTON && fpga_id != MLX5_FPGA_EDISON; +} + +static int mlx5_fpga_device_load_check(struct mlx5_fpga_device *fdev) +{ + struct mlx5_fpga_query query; + int err; + + err = mlx5_fpga_query(fdev->mdev, &query); + if (err) { + mlx5_fpga_err(fdev, "Failed to query status: %d\n", err); + return err; + } + + fdev->last_admin_image = query.admin_image; + fdev->last_oper_image = query.oper_image; + + mlx5_fpga_info(fdev, "Status %u; Admin image %u; Oper image %u\n", + query.status, query.admin_image, query.oper_image); + + /* for FPGA lookaside projects FPGA load status is not important */ + if (mlx5_is_fpga_lookaside(MLX5_CAP_FPGA(fdev->mdev, fpga_id))) + return 0; + + if (query.status != MLX5_FPGA_STATUS_SUCCESS) { + mlx5_fpga_err(fdev, "%s image failed to load; status %u\n", + mlx5_fpga_image_name(fdev->last_oper_image), + query.status); + return -EIO; + } + + return 0; +} + +static int mlx5_fpga_device_brb(struct mlx5_fpga_device *fdev) +{ + int err; + struct mlx5_core_dev *mdev = fdev->mdev; + + err = mlx5_fpga_ctrl_op(mdev, MLX5_FPGA_CTRL_OPERATION_SANDBOX_BYPASS_ON); + if (err) { + mlx5_fpga_err(fdev, "Failed to set bypass on: %d\n", err); + return err; + } + err = mlx5_fpga_ctrl_op(mdev, MLX5_FPGA_CTRL_OPERATION_RESET_SANDBOX); + if (err) { + mlx5_fpga_err(fdev, "Failed to reset SBU: %d\n", err); + return err; + } + err = mlx5_fpga_ctrl_op(mdev, MLX5_FPGA_CTRL_OPERATION_SANDBOX_BYPASS_OFF); + if (err) { + mlx5_fpga_err(fdev, "Failed to set bypass off: %d\n", err); + return err; + } + return 0; +} + +static int mlx5_fpga_event(struct mlx5_fpga_device *, unsigned long, void *); + +static int fpga_err_event(struct notifier_block *nb, unsigned long event, void *eqe) +{ + struct mlx5_fpga_device *fdev = mlx5_nb_cof(nb, struct mlx5_fpga_device, fpga_err_nb); + + return mlx5_fpga_event(fdev, event, eqe); +} + +static int fpga_qp_err_event(struct notifier_block *nb, unsigned long event, void *eqe) +{ + struct mlx5_fpga_device *fdev = mlx5_nb_cof(nb, struct mlx5_fpga_device, fpga_qp_err_nb); + + return mlx5_fpga_event(fdev, event, eqe); +} + +int mlx5_fpga_device_start(struct mlx5_core_dev *mdev) +{ + struct mlx5_fpga_device *fdev = mdev->fpga; + unsigned int max_num_qps; + unsigned long flags; + u32 fpga_id; + int err; + + if (!fdev) + return 0; + + err = mlx5_fpga_caps(fdev->mdev); + if (err) + goto out; + + err = mlx5_fpga_device_load_check(fdev); + if (err) + goto out; + + fpga_id = MLX5_CAP_FPGA(fdev->mdev, fpga_id); + mlx5_fpga_info(fdev, "FPGA card %s:%u\n", mlx5_fpga_name(fpga_id), fpga_id); + + /* No QPs if FPGA does not participate in net processing */ + if (mlx5_is_fpga_lookaside(fpga_id)) + goto out; + + mlx5_fpga_info(fdev, "%s(%d): image, version %u; SBU %06x:%04x version %d\n", + mlx5_fpga_image_name(fdev->last_oper_image), + fdev->last_oper_image, + MLX5_CAP_FPGA(fdev->mdev, image_version), + MLX5_CAP_FPGA(fdev->mdev, ieee_vendor_id), + MLX5_CAP_FPGA(fdev->mdev, sandbox_product_id), + MLX5_CAP_FPGA(fdev->mdev, sandbox_product_version)); + + max_num_qps = MLX5_CAP_FPGA(mdev, shell_caps.max_num_qps); + if (!max_num_qps) { + mlx5_fpga_err(fdev, "FPGA reports 0 QPs in SHELL_CAPS\n"); + err = -ENOTSUPP; + goto out; + } + + err = mlx5_core_reserve_gids(mdev, max_num_qps); + if (err) + goto out; + + MLX5_NB_INIT(&fdev->fpga_err_nb, fpga_err_event, FPGA_ERROR); + MLX5_NB_INIT(&fdev->fpga_qp_err_nb, fpga_qp_err_event, FPGA_QP_ERROR); + mlx5_eq_notifier_register(fdev->mdev, &fdev->fpga_err_nb); + mlx5_eq_notifier_register(fdev->mdev, &fdev->fpga_qp_err_nb); + + err = mlx5_fpga_conn_device_init(fdev); + if (err) + goto err_rsvd_gid; + + if (fdev->last_oper_image == MLX5_FPGA_IMAGE_USER) { + err = mlx5_fpga_device_brb(fdev); + if (err) + goto err_conn_init; + } + + goto out; + +err_conn_init: + mlx5_fpga_conn_device_cleanup(fdev); + +err_rsvd_gid: + mlx5_eq_notifier_unregister(fdev->mdev, &fdev->fpga_err_nb); + mlx5_eq_notifier_unregister(fdev->mdev, &fdev->fpga_qp_err_nb); + mlx5_core_unreserve_gids(mdev, max_num_qps); +out: + spin_lock_irqsave(&fdev->state_lock, flags); + fdev->state = err ? MLX5_FPGA_STATUS_FAILURE : MLX5_FPGA_STATUS_SUCCESS; + spin_unlock_irqrestore(&fdev->state_lock, flags); + return err; +} + +int mlx5_fpga_init(struct mlx5_core_dev *mdev) +{ + struct mlx5_fpga_device *fdev = NULL; + + if (!MLX5_CAP_GEN(mdev, fpga)) { + mlx5_core_dbg(mdev, "FPGA capability not present\n"); + return 0; + } + + mlx5_core_dbg(mdev, "Initializing FPGA\n"); + + fdev = mlx5_fpga_device_alloc(); + if (!fdev) + return -ENOMEM; + + fdev->mdev = mdev; + mdev->fpga = fdev; + + return 0; +} + +void mlx5_fpga_device_stop(struct mlx5_core_dev *mdev) +{ + struct mlx5_fpga_device *fdev = mdev->fpga; + unsigned int max_num_qps; + unsigned long flags; + int err; + + if (!fdev) + return; + + if (mlx5_is_fpga_lookaside(MLX5_CAP_FPGA(fdev->mdev, fpga_id))) + return; + + spin_lock_irqsave(&fdev->state_lock, flags); + if (fdev->state != MLX5_FPGA_STATUS_SUCCESS) { + spin_unlock_irqrestore(&fdev->state_lock, flags); + return; + } + fdev->state = MLX5_FPGA_STATUS_NONE; + spin_unlock_irqrestore(&fdev->state_lock, flags); + + if (fdev->last_oper_image == MLX5_FPGA_IMAGE_USER) { + err = mlx5_fpga_ctrl_op(mdev, MLX5_FPGA_CTRL_OPERATION_SANDBOX_BYPASS_ON); + if (err) + mlx5_fpga_err(fdev, "Failed to re-set SBU bypass on: %d\n", + err); + } + + mlx5_fpga_conn_device_cleanup(fdev); + mlx5_eq_notifier_unregister(fdev->mdev, &fdev->fpga_err_nb); + mlx5_eq_notifier_unregister(fdev->mdev, &fdev->fpga_qp_err_nb); + + max_num_qps = MLX5_CAP_FPGA(mdev, shell_caps.max_num_qps); + mlx5_core_unreserve_gids(mdev, max_num_qps); +} + +void mlx5_fpga_cleanup(struct mlx5_core_dev *mdev) +{ + struct mlx5_fpga_device *fdev = mdev->fpga; + + mlx5_fpga_device_stop(mdev); + kfree(fdev); + mdev->fpga = NULL; +} + +static const char *mlx5_fpga_syndrome_to_string(u8 syndrome) +{ + if (syndrome < ARRAY_SIZE(mlx5_fpga_error_strings)) + return mlx5_fpga_error_strings[syndrome]; + return "Unknown"; +} + +static const char *mlx5_fpga_qp_syndrome_to_string(u8 syndrome) +{ + if (syndrome < ARRAY_SIZE(mlx5_fpga_qp_error_strings)) + return mlx5_fpga_qp_error_strings[syndrome]; + return "Unknown"; +} + +static int mlx5_fpga_event(struct mlx5_fpga_device *fdev, + unsigned long event, void *eqe) +{ + void *data = ((struct mlx5_eqe *)eqe)->data.raw; + const char *event_name; + bool teardown = false; + unsigned long flags; + u8 syndrome; + + switch (event) { + case MLX5_EVENT_TYPE_FPGA_ERROR: + syndrome = MLX5_GET(fpga_error_event, data, syndrome); + event_name = mlx5_fpga_syndrome_to_string(syndrome); + break; + case MLX5_EVENT_TYPE_FPGA_QP_ERROR: + syndrome = MLX5_GET(fpga_qp_error_event, data, syndrome); + event_name = mlx5_fpga_qp_syndrome_to_string(syndrome); + break; + default: + return NOTIFY_DONE; + } + + spin_lock_irqsave(&fdev->state_lock, flags); + switch (fdev->state) { + case MLX5_FPGA_STATUS_SUCCESS: + mlx5_fpga_warn(fdev, "Error %u: %s\n", syndrome, event_name); + teardown = true; + break; + default: + mlx5_fpga_warn_ratelimited(fdev, "Unexpected error event %u: %s\n", + syndrome, event_name); + } + spin_unlock_irqrestore(&fdev->state_lock, flags); + /* We tear-down the card's interfaces and functionality because + * the FPGA bump-on-the-wire is misbehaving and we lose ability + * to communicate with the network. User may still be able to + * recover by re-programming or debugging the FPGA + */ + if (teardown) + mlx5_trigger_health_work(fdev->mdev); + + return NOTIFY_OK; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h b/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h new file mode 100644 index 000000000..750c32050 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h @@ -0,0 +1,111 @@ +/* + * Copyright (c) 2017, Mellanox Technologies, Ltd. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __MLX5_FPGA_CORE_H__ +#define __MLX5_FPGA_CORE_H__ + +#ifdef CONFIG_MLX5_FPGA + +#include <linux/mlx5/eq.h> + +#include "mlx5_core.h" +#include "lib/eq.h" +#include "fpga/cmd.h" + +/* Represents an Innova device */ +struct mlx5_fpga_device { + struct mlx5_core_dev *mdev; + struct mlx5_nb fpga_err_nb; + struct mlx5_nb fpga_qp_err_nb; + spinlock_t state_lock; /* Protects state transitions */ + enum mlx5_fpga_status state; + enum mlx5_fpga_image last_admin_image; + enum mlx5_fpga_image last_oper_image; + + /* QP Connection resources */ + struct { + u32 pdn; + u32 mkey; + struct mlx5_uars_page *uar; + } conn_res; +}; + +#define mlx5_fpga_dbg(__adev, format, ...) \ + mlx5_core_dbg((__adev)->mdev, "FPGA: %s:%d:(pid %d): " format, \ + __func__, __LINE__, current->pid, ##__VA_ARGS__) + +#define mlx5_fpga_err(__adev, format, ...) \ + mlx5_core_err((__adev)->mdev, "FPGA: %s:%d:(pid %d): " format, \ + __func__, __LINE__, current->pid, ##__VA_ARGS__) + +#define mlx5_fpga_warn(__adev, format, ...) \ + mlx5_core_warn((__adev)->mdev, "FPGA: %s:%d:(pid %d): " format, \ + __func__, __LINE__, current->pid, ##__VA_ARGS__) + +#define mlx5_fpga_warn_ratelimited(__adev, format, ...) \ + mlx5_core_err_rl((__adev)->mdev, "FPGA: %s:%d: " \ + format, __func__, __LINE__, ##__VA_ARGS__) + +#define mlx5_fpga_notice(__adev, format, ...) \ + mlx5_core_info((__adev)->mdev, "FPGA: " format, ##__VA_ARGS__) + +#define mlx5_fpga_info(__adev, format, ...) \ + mlx5_core_info((__adev)->mdev, "FPGA: " format, ##__VA_ARGS__) + +int mlx5_fpga_init(struct mlx5_core_dev *mdev); +void mlx5_fpga_cleanup(struct mlx5_core_dev *mdev); +int mlx5_fpga_device_start(struct mlx5_core_dev *mdev); +void mlx5_fpga_device_stop(struct mlx5_core_dev *mdev); + +#else + +static inline int mlx5_fpga_init(struct mlx5_core_dev *mdev) +{ + return 0; +} + +static inline void mlx5_fpga_cleanup(struct mlx5_core_dev *mdev) +{ +} + +static inline int mlx5_fpga_device_start(struct mlx5_core_dev *mdev) +{ + return 0; +} + +static inline void mlx5_fpga_device_stop(struct mlx5_core_dev *mdev) +{ +} + +#endif + +#endif /* __MLX5_FPGA_CORE_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.c new file mode 100644 index 000000000..14962969c --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.c @@ -0,0 +1,170 @@ +/* + * Copyright (c) 2017 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#include <linux/mlx5/device.h> + +#include "fpga/core.h" +#include "fpga/conn.h" +#include "fpga/sdk.h" + +struct mlx5_fpga_conn * +mlx5_fpga_sbu_conn_create(struct mlx5_fpga_device *fdev, + struct mlx5_fpga_conn_attr *attr) +{ + return mlx5_fpga_conn_create(fdev, attr, MLX5_FPGA_QPC_QP_TYPE_SANDBOX_QP); +} +EXPORT_SYMBOL(mlx5_fpga_sbu_conn_create); + +void mlx5_fpga_sbu_conn_destroy(struct mlx5_fpga_conn *conn) +{ + mlx5_fpga_conn_destroy(conn); +} +EXPORT_SYMBOL(mlx5_fpga_sbu_conn_destroy); + +int mlx5_fpga_sbu_conn_sendmsg(struct mlx5_fpga_conn *conn, + struct mlx5_fpga_dma_buf *buf) +{ + return mlx5_fpga_conn_send(conn, buf); +} +EXPORT_SYMBOL(mlx5_fpga_sbu_conn_sendmsg); + +static int mlx5_fpga_mem_read_i2c(struct mlx5_fpga_device *fdev, size_t size, + u64 addr, u8 *buf) +{ + size_t max_size = MLX5_FPGA_ACCESS_REG_SIZE_MAX; + size_t bytes_done = 0; + u8 actual_size; + int err; + + if (!size) + return -EINVAL; + + if (!fdev->mdev) + return -ENOTCONN; + + while (bytes_done < size) { + actual_size = min(max_size, (size - bytes_done)); + + err = mlx5_fpga_access_reg(fdev->mdev, actual_size, + addr + bytes_done, + buf + bytes_done, false); + if (err) { + mlx5_fpga_err(fdev, "Failed to read over I2C: %d\n", + err); + break; + } + + bytes_done += actual_size; + } + + return err; +} + +static int mlx5_fpga_mem_write_i2c(struct mlx5_fpga_device *fdev, size_t size, + u64 addr, u8 *buf) +{ + size_t max_size = MLX5_FPGA_ACCESS_REG_SIZE_MAX; + size_t bytes_done = 0; + u8 actual_size; + int err; + + if (!size) + return -EINVAL; + + if (!fdev->mdev) + return -ENOTCONN; + + while (bytes_done < size) { + actual_size = min(max_size, (size - bytes_done)); + + err = mlx5_fpga_access_reg(fdev->mdev, actual_size, + addr + bytes_done, + buf + bytes_done, true); + if (err) { + mlx5_fpga_err(fdev, "Failed to write FPGA crspace\n"); + break; + } + + bytes_done += actual_size; + } + + return err; +} + +int mlx5_fpga_mem_read(struct mlx5_fpga_device *fdev, size_t size, u64 addr, + void *buf, enum mlx5_fpga_access_type access_type) +{ + int ret; + + switch (access_type) { + case MLX5_FPGA_ACCESS_TYPE_I2C: + ret = mlx5_fpga_mem_read_i2c(fdev, size, addr, buf); + if (ret) + return ret; + break; + default: + mlx5_fpga_warn(fdev, "Unexpected read access_type %u\n", + access_type); + return -EACCES; + } + + return size; +} +EXPORT_SYMBOL(mlx5_fpga_mem_read); + +int mlx5_fpga_mem_write(struct mlx5_fpga_device *fdev, size_t size, u64 addr, + void *buf, enum mlx5_fpga_access_type access_type) +{ + int ret; + + switch (access_type) { + case MLX5_FPGA_ACCESS_TYPE_I2C: + ret = mlx5_fpga_mem_write_i2c(fdev, size, addr, buf); + if (ret) + return ret; + break; + default: + mlx5_fpga_warn(fdev, "Unexpected write access_type %u\n", + access_type); + return -EACCES; + } + + return size; +} +EXPORT_SYMBOL(mlx5_fpga_mem_write); + +int mlx5_fpga_get_sbu_caps(struct mlx5_fpga_device *fdev, int size, void *buf) +{ + return mlx5_fpga_sbu_caps(fdev->mdev, buf, size); +} +EXPORT_SYMBOL(mlx5_fpga_get_sbu_caps); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.h b/drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.h new file mode 100644 index 000000000..89ef59265 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.h @@ -0,0 +1,214 @@ +/* + * Copyright (c) 2017 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#ifndef MLX5_FPGA_SDK_H +#define MLX5_FPGA_SDK_H + +#include <linux/types.h> +#include <linux/dma-direction.h> + +/** + * DOC: Innova SDK + * This header defines the in-kernel API for Innova FPGA client drivers. + */ +#define SBU_QP_QUEUE_SIZE 8 +#define MLX5_FPGA_CMD_TIMEOUT_MSEC (60 * 1000) + +/** + * enum mlx5_fpga_access_type - Enumerated the different methods possible for + * accessing the device memory address space + * + * @MLX5_FPGA_ACCESS_TYPE_I2C: Use the slow CX-FPGA I2C bus + * @MLX5_FPGA_ACCESS_TYPE_DONTCARE: Use the fastest available method + */ +enum mlx5_fpga_access_type { + MLX5_FPGA_ACCESS_TYPE_I2C = 0x0, + MLX5_FPGA_ACCESS_TYPE_DONTCARE = 0x0, +}; + +struct mlx5_fpga_conn; +struct mlx5_fpga_device; + +/** + * struct mlx5_fpga_dma_entry - A scatter-gather DMA entry + */ +struct mlx5_fpga_dma_entry { + /** @data: Virtual address pointer to the data */ + void *data; + /** @size: Size in bytes of the data */ + unsigned int size; + /** @dma_addr: Private member. Physical DMA-mapped address of the data */ + dma_addr_t dma_addr; +}; + +/** + * struct mlx5_fpga_dma_buf - A packet buffer + * May contain up to 2 scatter-gather data entries + */ +struct mlx5_fpga_dma_buf { + /** @dma_dir: DMA direction */ + enum dma_data_direction dma_dir; + /** @sg: Scatter-gather entries pointing to the data in memory */ + struct mlx5_fpga_dma_entry sg[2]; + /** @list: Item in SQ backlog, for TX packets */ + struct list_head list; + /** + * @complete: Completion routine, for TX packets + * @conn: FPGA Connection this packet was sent to + * @fdev: FPGA device this packet was sent to + * @buf: The packet buffer + * @status: 0 if successful, or an error code otherwise + */ + void (*complete)(struct mlx5_fpga_conn *conn, + struct mlx5_fpga_device *fdev, + struct mlx5_fpga_dma_buf *buf, u8 status); +}; + +/** + * struct mlx5_fpga_conn_attr - FPGA connection attributes + * Describes the attributes of a connection + */ +struct mlx5_fpga_conn_attr { + /** @tx_size: Size of connection TX queue, in packets */ + unsigned int tx_size; + /** @rx_size: Size of connection RX queue, in packets */ + unsigned int rx_size; + /** + * @recv_cb: Callback function which is called for received packets + * @cb_arg: The value provided in mlx5_fpga_conn_attr.cb_arg + * @buf: A buffer containing a received packet + * + * buf is guaranteed to only contain a single scatter-gather entry. + * The size of the actual packet received is specified in buf.sg[0].size + * When this callback returns, the packet buffer may be re-used for + * subsequent receives. + */ + void (*recv_cb)(void *cb_arg, struct mlx5_fpga_dma_buf *buf); + /** @cb_arg: A context to be passed to recv_cb callback */ + void *cb_arg; +}; + +/** + * mlx5_fpga_sbu_conn_create() - Initialize a new FPGA SBU connection + * @fdev: The FPGA device + * @attr: Attributes of the new connection + * + * Sets up a new FPGA SBU connection with the specified attributes. + * The receive callback function may be called for incoming messages even + * before this function returns. + * + * The caller must eventually destroy the connection by calling + * mlx5_fpga_sbu_conn_destroy. + * + * Return: A new connection, or ERR_PTR() error value otherwise. + */ +struct mlx5_fpga_conn * +mlx5_fpga_sbu_conn_create(struct mlx5_fpga_device *fdev, + struct mlx5_fpga_conn_attr *attr); + +/** + * mlx5_fpga_sbu_conn_destroy() - Destroy an FPGA SBU connection + * @conn: The FPGA SBU connection to destroy + * + * Cleans up an FPGA SBU connection which was previously created with + * mlx5_fpga_sbu_conn_create. + */ +void mlx5_fpga_sbu_conn_destroy(struct mlx5_fpga_conn *conn); + +/** + * mlx5_fpga_sbu_conn_sendmsg() - Queue the transmission of a packet + * @conn: An FPGA SBU connection + * @buf: The packet buffer + * + * Queues a packet for transmission over an FPGA SBU connection. + * The buffer should not be modified or freed until completion. + * Upon completion, the buf's complete() callback is invoked, indicating the + * success or error status of the transmission. + * + * Return: 0 if successful, or an error value otherwise. + */ +int mlx5_fpga_sbu_conn_sendmsg(struct mlx5_fpga_conn *conn, + struct mlx5_fpga_dma_buf *buf); + +/** + * mlx5_fpga_mem_read() - Read from FPGA memory address space + * @fdev: The FPGA device + * @size: Size of chunk to read, in bytes + * @addr: Starting address to read from, in FPGA address space + * @buf: Buffer to read into + * @access_type: Method for reading + * + * Reads from the specified address into the specified buffer. + * The address may point to configuration space or to DDR. + * Large reads may be performed internally as several non-atomic operations. + * This function may sleep, so should not be called from atomic contexts. + * + * Return: 0 if successful, or an error value otherwise. + */ +int mlx5_fpga_mem_read(struct mlx5_fpga_device *fdev, size_t size, u64 addr, + void *buf, enum mlx5_fpga_access_type access_type); + +/** + * mlx5_fpga_mem_write() - Write to FPGA memory address space + * @fdev: The FPGA device + * @size: Size of chunk to write, in bytes + * @addr: Starting address to write to, in FPGA address space + * @buf: Buffer which contains data to write + * @access_type: Method for writing + * + * Writes the specified buffer data to FPGA memory at the specified address. + * The address may point to configuration space or to DDR. + * Large writes may be performed internally as several non-atomic operations. + * This function may sleep, so should not be called from atomic contexts. + * + * Return: 0 if successful, or an error value otherwise. + */ +int mlx5_fpga_mem_write(struct mlx5_fpga_device *fdev, size_t size, u64 addr, + void *buf, enum mlx5_fpga_access_type access_type); + +/** + * mlx5_fpga_get_sbu_caps() - Read the SBU capabilities + * @fdev: The FPGA device + * @size: Size of the buffer to read into + * @buf: Buffer to read the capabilities into + * + * Reads the FPGA SBU capabilities into the specified buffer. + * The format of the capabilities buffer is SBU-dependent. + * + * Return: 0 if successful + * -EINVAL if the buffer is not large enough to contain SBU caps + * or any other error value otherwise. + */ +int mlx5_fpga_get_sbu_caps(struct mlx5_fpga_device *fdev, int size, void *buf); + +#endif /* MLX5_FPGA_SDK_H */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c new file mode 100644 index 000000000..32d4c9674 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c @@ -0,0 +1,1107 @@ +/* + * Copyright (c) 2015, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/mlx5/driver.h> +#include <linux/mlx5/device.h> +#include <linux/mlx5/mlx5_ifc.h> + +#include "fs_core.h" +#include "fs_cmd.h" +#include "fs_ft_pool.h" +#include "mlx5_core.h" +#include "eswitch.h" + +static int mlx5_cmd_stub_update_root_ft(struct mlx5_flow_root_namespace *ns, + struct mlx5_flow_table *ft, + u32 underlay_qpn, + bool disconnect) +{ + return 0; +} + +static int mlx5_cmd_stub_create_flow_table(struct mlx5_flow_root_namespace *ns, + struct mlx5_flow_table *ft, + struct mlx5_flow_table_attr *ft_attr, + struct mlx5_flow_table *next_ft) +{ + int max_fte = ft_attr->max_fte; + + ft->max_fte = max_fte ? roundup_pow_of_two(max_fte) : 1; + + return 0; +} + +static int mlx5_cmd_stub_destroy_flow_table(struct mlx5_flow_root_namespace *ns, + struct mlx5_flow_table *ft) +{ + return 0; +} + +static int mlx5_cmd_stub_modify_flow_table(struct mlx5_flow_root_namespace *ns, + struct mlx5_flow_table *ft, + struct mlx5_flow_table *next_ft) +{ + return 0; +} + +static int mlx5_cmd_stub_create_flow_group(struct mlx5_flow_root_namespace *ns, + struct mlx5_flow_table *ft, + u32 *in, + struct mlx5_flow_group *fg) +{ + return 0; +} + +static int mlx5_cmd_stub_destroy_flow_group(struct mlx5_flow_root_namespace *ns, + struct mlx5_flow_table *ft, + struct mlx5_flow_group *fg) +{ + return 0; +} + +static int mlx5_cmd_stub_create_fte(struct mlx5_flow_root_namespace *ns, + struct mlx5_flow_table *ft, + struct mlx5_flow_group *group, + struct fs_fte *fte) +{ + return 0; +} + +static int mlx5_cmd_stub_update_fte(struct mlx5_flow_root_namespace *ns, + struct mlx5_flow_table *ft, + struct mlx5_flow_group *group, + int modify_mask, + struct fs_fte *fte) +{ + return -EOPNOTSUPP; +} + +static int mlx5_cmd_stub_delete_fte(struct mlx5_flow_root_namespace *ns, + struct mlx5_flow_table *ft, + struct fs_fte *fte) +{ + return 0; +} + +static int mlx5_cmd_stub_packet_reformat_alloc(struct mlx5_flow_root_namespace *ns, + struct mlx5_pkt_reformat_params *params, + enum mlx5_flow_namespace_type namespace, + struct mlx5_pkt_reformat *pkt_reformat) +{ + return 0; +} + +static void mlx5_cmd_stub_packet_reformat_dealloc(struct mlx5_flow_root_namespace *ns, + struct mlx5_pkt_reformat *pkt_reformat) +{ +} + +static int mlx5_cmd_stub_modify_header_alloc(struct mlx5_flow_root_namespace *ns, + u8 namespace, u8 num_actions, + void *modify_actions, + struct mlx5_modify_hdr *modify_hdr) +{ + return 0; +} + +static void mlx5_cmd_stub_modify_header_dealloc(struct mlx5_flow_root_namespace *ns, + struct mlx5_modify_hdr *modify_hdr) +{ +} + +static int mlx5_cmd_stub_set_peer(struct mlx5_flow_root_namespace *ns, + struct mlx5_flow_root_namespace *peer_ns) +{ + return 0; +} + +static int mlx5_cmd_stub_create_ns(struct mlx5_flow_root_namespace *ns) +{ + return 0; +} + +static int mlx5_cmd_stub_destroy_ns(struct mlx5_flow_root_namespace *ns) +{ + return 0; +} + +static u32 mlx5_cmd_stub_get_capabilities(struct mlx5_flow_root_namespace *ns, + enum fs_flow_table_type ft_type) +{ + return 0; +} + +static int mlx5_cmd_set_slave_root_fdb(struct mlx5_core_dev *master, + struct mlx5_core_dev *slave, + bool ft_id_valid, + u32 ft_id) +{ + u32 out[MLX5_ST_SZ_DW(set_flow_table_root_out)] = {}; + u32 in[MLX5_ST_SZ_DW(set_flow_table_root_in)] = {}; + struct mlx5_flow_root_namespace *root; + struct mlx5_flow_namespace *ns; + + MLX5_SET(set_flow_table_root_in, in, opcode, + MLX5_CMD_OP_SET_FLOW_TABLE_ROOT); + MLX5_SET(set_flow_table_root_in, in, table_type, + FS_FT_FDB); + if (ft_id_valid) { + MLX5_SET(set_flow_table_root_in, in, + table_eswitch_owner_vhca_id_valid, 1); + MLX5_SET(set_flow_table_root_in, in, + table_eswitch_owner_vhca_id, + MLX5_CAP_GEN(master, vhca_id)); + MLX5_SET(set_flow_table_root_in, in, table_id, + ft_id); + } else { + ns = mlx5_get_flow_namespace(slave, + MLX5_FLOW_NAMESPACE_FDB); + root = find_root(&ns->node); + MLX5_SET(set_flow_table_root_in, in, table_id, + root->root_ft->id); + } + + return mlx5_cmd_exec(slave, in, sizeof(in), out, sizeof(out)); +} + +static int +mlx5_cmd_stub_destroy_match_definer(struct mlx5_flow_root_namespace *ns, + int definer_id) +{ + return 0; +} + +static int +mlx5_cmd_stub_create_match_definer(struct mlx5_flow_root_namespace *ns, + u16 format_id, u32 *match_mask) +{ + return 0; +} + +static int mlx5_cmd_update_root_ft(struct mlx5_flow_root_namespace *ns, + struct mlx5_flow_table *ft, u32 underlay_qpn, + bool disconnect) +{ + u32 in[MLX5_ST_SZ_DW(set_flow_table_root_in)] = {}; + struct mlx5_core_dev *dev = ns->dev; + int err; + + if ((MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_IB) && + underlay_qpn == 0) + return 0; + + if (ft->type == FS_FT_FDB && + mlx5_lag_is_shared_fdb(dev) && + !mlx5_lag_is_master(dev)) + return 0; + + MLX5_SET(set_flow_table_root_in, in, opcode, + MLX5_CMD_OP_SET_FLOW_TABLE_ROOT); + MLX5_SET(set_flow_table_root_in, in, table_type, ft->type); + + if (disconnect) + MLX5_SET(set_flow_table_root_in, in, op_mod, 1); + else + MLX5_SET(set_flow_table_root_in, in, table_id, ft->id); + + MLX5_SET(set_flow_table_root_in, in, underlay_qpn, underlay_qpn); + MLX5_SET(set_flow_table_root_in, in, vport_number, ft->vport); + MLX5_SET(set_flow_table_root_in, in, other_vport, + !!(ft->flags & MLX5_FLOW_TABLE_OTHER_VPORT)); + + err = mlx5_cmd_exec_in(dev, set_flow_table_root, in); + if (!err && + ft->type == FS_FT_FDB && + mlx5_lag_is_shared_fdb(dev) && + mlx5_lag_is_master(dev)) { + err = mlx5_cmd_set_slave_root_fdb(dev, + mlx5_lag_get_peer_mdev(dev), + !disconnect, (!disconnect) ? + ft->id : 0); + if (err && !disconnect) { + MLX5_SET(set_flow_table_root_in, in, op_mod, 0); + MLX5_SET(set_flow_table_root_in, in, table_id, + ns->root_ft->id); + mlx5_cmd_exec_in(dev, set_flow_table_root, in); + } + } + + return err; +} + +static int mlx5_cmd_create_flow_table(struct mlx5_flow_root_namespace *ns, + struct mlx5_flow_table *ft, + struct mlx5_flow_table_attr *ft_attr, + struct mlx5_flow_table *next_ft) +{ + int en_encap = !!(ft->flags & MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT); + int en_decap = !!(ft->flags & MLX5_FLOW_TABLE_TUNNEL_EN_DECAP); + int term = !!(ft->flags & MLX5_FLOW_TABLE_TERMINATION); + u32 out[MLX5_ST_SZ_DW(create_flow_table_out)] = {}; + u32 in[MLX5_ST_SZ_DW(create_flow_table_in)] = {}; + struct mlx5_core_dev *dev = ns->dev; + unsigned int size; + int err; + + if (ft_attr->max_fte != POOL_NEXT_SIZE) + size = roundup_pow_of_two(ft_attr->max_fte); + size = mlx5_ft_pool_get_avail_sz(dev, ft->type, ft_attr->max_fte); + if (!size) + return -ENOSPC; + + MLX5_SET(create_flow_table_in, in, opcode, + MLX5_CMD_OP_CREATE_FLOW_TABLE); + + MLX5_SET(create_flow_table_in, in, uid, ft_attr->uid); + MLX5_SET(create_flow_table_in, in, table_type, ft->type); + MLX5_SET(create_flow_table_in, in, flow_table_context.level, ft->level); + MLX5_SET(create_flow_table_in, in, flow_table_context.log_size, size ? ilog2(size) : 0); + MLX5_SET(create_flow_table_in, in, vport_number, ft->vport); + MLX5_SET(create_flow_table_in, in, other_vport, + !!(ft->flags & MLX5_FLOW_TABLE_OTHER_VPORT)); + + MLX5_SET(create_flow_table_in, in, flow_table_context.decap_en, + en_decap); + MLX5_SET(create_flow_table_in, in, flow_table_context.reformat_en, + en_encap); + MLX5_SET(create_flow_table_in, in, flow_table_context.termination_table, + term); + + switch (ft->op_mod) { + case FS_FT_OP_MOD_NORMAL: + if (next_ft) { + MLX5_SET(create_flow_table_in, in, + flow_table_context.table_miss_action, + MLX5_FLOW_TABLE_MISS_ACTION_FWD); + MLX5_SET(create_flow_table_in, in, + flow_table_context.table_miss_id, next_ft->id); + } else { + MLX5_SET(create_flow_table_in, in, + flow_table_context.table_miss_action, + ft->def_miss_action); + } + break; + + case FS_FT_OP_MOD_LAG_DEMUX: + MLX5_SET(create_flow_table_in, in, op_mod, 0x1); + if (next_ft) + MLX5_SET(create_flow_table_in, in, + flow_table_context.lag_master_next_table_id, + next_ft->id); + break; + } + + err = mlx5_cmd_exec_inout(dev, create_flow_table, in, out); + if (!err) { + ft->id = MLX5_GET(create_flow_table_out, out, + table_id); + ft->max_fte = size; + } else { + mlx5_ft_pool_put_sz(ns->dev, size); + } + + return err; +} + +static int mlx5_cmd_destroy_flow_table(struct mlx5_flow_root_namespace *ns, + struct mlx5_flow_table *ft) +{ + u32 in[MLX5_ST_SZ_DW(destroy_flow_table_in)] = {}; + struct mlx5_core_dev *dev = ns->dev; + int err; + + MLX5_SET(destroy_flow_table_in, in, opcode, + MLX5_CMD_OP_DESTROY_FLOW_TABLE); + MLX5_SET(destroy_flow_table_in, in, table_type, ft->type); + MLX5_SET(destroy_flow_table_in, in, table_id, ft->id); + MLX5_SET(destroy_flow_table_in, in, vport_number, ft->vport); + MLX5_SET(destroy_flow_table_in, in, other_vport, + !!(ft->flags & MLX5_FLOW_TABLE_OTHER_VPORT)); + + err = mlx5_cmd_exec_in(dev, destroy_flow_table, in); + if (!err) + mlx5_ft_pool_put_sz(ns->dev, ft->max_fte); + + return err; +} + +static int mlx5_cmd_modify_flow_table(struct mlx5_flow_root_namespace *ns, + struct mlx5_flow_table *ft, + struct mlx5_flow_table *next_ft) +{ + u32 in[MLX5_ST_SZ_DW(modify_flow_table_in)] = {}; + struct mlx5_core_dev *dev = ns->dev; + + MLX5_SET(modify_flow_table_in, in, opcode, + MLX5_CMD_OP_MODIFY_FLOW_TABLE); + MLX5_SET(modify_flow_table_in, in, table_type, ft->type); + MLX5_SET(modify_flow_table_in, in, table_id, ft->id); + + if (ft->op_mod == FS_FT_OP_MOD_LAG_DEMUX) { + MLX5_SET(modify_flow_table_in, in, modify_field_select, + MLX5_MODIFY_FLOW_TABLE_LAG_NEXT_TABLE_ID); + if (next_ft) { + MLX5_SET(modify_flow_table_in, in, + flow_table_context.lag_master_next_table_id, next_ft->id); + } else { + MLX5_SET(modify_flow_table_in, in, + flow_table_context.lag_master_next_table_id, 0); + } + } else { + MLX5_SET(modify_flow_table_in, in, vport_number, ft->vport); + MLX5_SET(modify_flow_table_in, in, other_vport, + !!(ft->flags & MLX5_FLOW_TABLE_OTHER_VPORT)); + MLX5_SET(modify_flow_table_in, in, modify_field_select, + MLX5_MODIFY_FLOW_TABLE_MISS_TABLE_ID); + if (next_ft) { + MLX5_SET(modify_flow_table_in, in, + flow_table_context.table_miss_action, + MLX5_FLOW_TABLE_MISS_ACTION_FWD); + MLX5_SET(modify_flow_table_in, in, + flow_table_context.table_miss_id, + next_ft->id); + } else { + MLX5_SET(modify_flow_table_in, in, + flow_table_context.table_miss_action, + ft->def_miss_action); + } + } + + return mlx5_cmd_exec_in(dev, modify_flow_table, in); +} + +static int mlx5_cmd_create_flow_group(struct mlx5_flow_root_namespace *ns, + struct mlx5_flow_table *ft, + u32 *in, + struct mlx5_flow_group *fg) +{ + u32 out[MLX5_ST_SZ_DW(create_flow_group_out)] = {}; + struct mlx5_core_dev *dev = ns->dev; + int err; + + MLX5_SET(create_flow_group_in, in, opcode, + MLX5_CMD_OP_CREATE_FLOW_GROUP); + MLX5_SET(create_flow_group_in, in, table_type, ft->type); + MLX5_SET(create_flow_group_in, in, table_id, ft->id); + if (ft->vport) { + MLX5_SET(create_flow_group_in, in, vport_number, ft->vport); + MLX5_SET(create_flow_group_in, in, other_vport, 1); + } + + MLX5_SET(create_flow_group_in, in, vport_number, ft->vport); + MLX5_SET(create_flow_group_in, in, other_vport, + !!(ft->flags & MLX5_FLOW_TABLE_OTHER_VPORT)); + err = mlx5_cmd_exec_inout(dev, create_flow_group, in, out); + if (!err) + fg->id = MLX5_GET(create_flow_group_out, out, + group_id); + return err; +} + +static int mlx5_cmd_destroy_flow_group(struct mlx5_flow_root_namespace *ns, + struct mlx5_flow_table *ft, + struct mlx5_flow_group *fg) +{ + u32 in[MLX5_ST_SZ_DW(destroy_flow_group_in)] = {}; + struct mlx5_core_dev *dev = ns->dev; + + MLX5_SET(destroy_flow_group_in, in, opcode, + MLX5_CMD_OP_DESTROY_FLOW_GROUP); + MLX5_SET(destroy_flow_group_in, in, table_type, ft->type); + MLX5_SET(destroy_flow_group_in, in, table_id, ft->id); + MLX5_SET(destroy_flow_group_in, in, group_id, fg->id); + MLX5_SET(destroy_flow_group_in, in, vport_number, ft->vport); + MLX5_SET(destroy_flow_group_in, in, other_vport, + !!(ft->flags & MLX5_FLOW_TABLE_OTHER_VPORT)); + return mlx5_cmd_exec_in(dev, destroy_flow_group, in); +} + +static int mlx5_set_extended_dest(struct mlx5_core_dev *dev, + struct fs_fte *fte, bool *extended_dest) +{ + int fw_log_max_fdb_encap_uplink = + MLX5_CAP_ESW(dev, log_max_fdb_encap_uplink); + int num_fwd_destinations = 0; + struct mlx5_flow_rule *dst; + int num_encap = 0; + + *extended_dest = false; + if (!(fte->action.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST)) + return 0; + + list_for_each_entry(dst, &fte->node.children, node.list) { + if (dst->dest_attr.type == MLX5_FLOW_DESTINATION_TYPE_COUNTER || + dst->dest_attr.type == MLX5_FLOW_DESTINATION_TYPE_NONE) + continue; + if ((dst->dest_attr.type == MLX5_FLOW_DESTINATION_TYPE_VPORT || + dst->dest_attr.type == MLX5_FLOW_DESTINATION_TYPE_UPLINK) && + dst->dest_attr.vport.flags & MLX5_FLOW_DEST_VPORT_REFORMAT_ID) + num_encap++; + num_fwd_destinations++; + } + if (num_fwd_destinations > 1 && num_encap > 0) + *extended_dest = true; + + if (*extended_dest && !fw_log_max_fdb_encap_uplink) { + mlx5_core_warn(dev, "FW does not support extended destination"); + return -EOPNOTSUPP; + } + if (num_encap > (1 << fw_log_max_fdb_encap_uplink)) { + mlx5_core_warn(dev, "FW does not support more than %d encaps", + 1 << fw_log_max_fdb_encap_uplink); + return -EOPNOTSUPP; + } + + return 0; +} + +static void +mlx5_cmd_set_fte_flow_meter(struct fs_fte *fte, void *in_flow_context) +{ + void *exe_aso_ctrl; + void *execute_aso; + + execute_aso = MLX5_ADDR_OF(flow_context, in_flow_context, + execute_aso[0]); + MLX5_SET(execute_aso, execute_aso, valid, 1); + MLX5_SET(execute_aso, execute_aso, aso_object_id, + fte->action.exe_aso.object_id); + + exe_aso_ctrl = MLX5_ADDR_OF(execute_aso, execute_aso, exe_aso_ctrl); + MLX5_SET(exe_aso_ctrl_flow_meter, exe_aso_ctrl, return_reg_id, + fte->action.exe_aso.return_reg_id); + MLX5_SET(exe_aso_ctrl_flow_meter, exe_aso_ctrl, aso_type, + fte->action.exe_aso.type); + MLX5_SET(exe_aso_ctrl_flow_meter, exe_aso_ctrl, init_color, + fte->action.exe_aso.flow_meter.init_color); + MLX5_SET(exe_aso_ctrl_flow_meter, exe_aso_ctrl, meter_id, + fte->action.exe_aso.flow_meter.meter_idx); +} + +static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev, + int opmod, int modify_mask, + struct mlx5_flow_table *ft, + unsigned group_id, + struct fs_fte *fte) +{ + u32 out[MLX5_ST_SZ_DW(set_fte_out)] = {0}; + bool extended_dest = false; + struct mlx5_flow_rule *dst; + void *in_flow_context, *vlan; + void *in_match_value; + unsigned int inlen; + int dst_cnt_size; + void *in_dests; + u32 *in; + int err; + + if (mlx5_set_extended_dest(dev, fte, &extended_dest)) + return -EOPNOTSUPP; + + if (!extended_dest) + dst_cnt_size = MLX5_ST_SZ_BYTES(dest_format_struct); + else + dst_cnt_size = MLX5_ST_SZ_BYTES(extended_dest_format); + + inlen = MLX5_ST_SZ_BYTES(set_fte_in) + fte->dests_size * dst_cnt_size; + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + MLX5_SET(set_fte_in, in, opcode, MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY); + MLX5_SET(set_fte_in, in, op_mod, opmod); + MLX5_SET(set_fte_in, in, modify_enable_mask, modify_mask); + MLX5_SET(set_fte_in, in, table_type, ft->type); + MLX5_SET(set_fte_in, in, table_id, ft->id); + MLX5_SET(set_fte_in, in, flow_index, fte->index); + MLX5_SET(set_fte_in, in, ignore_flow_level, + !!(fte->action.flags & FLOW_ACT_IGNORE_FLOW_LEVEL)); + + MLX5_SET(set_fte_in, in, vport_number, ft->vport); + MLX5_SET(set_fte_in, in, other_vport, + !!(ft->flags & MLX5_FLOW_TABLE_OTHER_VPORT)); + + in_flow_context = MLX5_ADDR_OF(set_fte_in, in, flow_context); + MLX5_SET(flow_context, in_flow_context, group_id, group_id); + + MLX5_SET(flow_context, in_flow_context, flow_tag, + fte->flow_context.flow_tag); + MLX5_SET(flow_context, in_flow_context, flow_source, + fte->flow_context.flow_source); + + MLX5_SET(flow_context, in_flow_context, extended_destination, + extended_dest); + if (extended_dest) { + u32 action; + + action = fte->action.action & + ~MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT; + MLX5_SET(flow_context, in_flow_context, action, action); + } else { + MLX5_SET(flow_context, in_flow_context, action, + fte->action.action); + if (fte->action.pkt_reformat) + MLX5_SET(flow_context, in_flow_context, packet_reformat_id, + fte->action.pkt_reformat->id); + } + if (fte->action.modify_hdr) + MLX5_SET(flow_context, in_flow_context, modify_header_id, + fte->action.modify_hdr->id); + + MLX5_SET(flow_context, in_flow_context, encrypt_decrypt_type, + fte->action.crypto.type); + MLX5_SET(flow_context, in_flow_context, encrypt_decrypt_obj_id, + fte->action.crypto.obj_id); + + vlan = MLX5_ADDR_OF(flow_context, in_flow_context, push_vlan); + + MLX5_SET(vlan, vlan, ethtype, fte->action.vlan[0].ethtype); + MLX5_SET(vlan, vlan, vid, fte->action.vlan[0].vid); + MLX5_SET(vlan, vlan, prio, fte->action.vlan[0].prio); + + vlan = MLX5_ADDR_OF(flow_context, in_flow_context, push_vlan_2); + + MLX5_SET(vlan, vlan, ethtype, fte->action.vlan[1].ethtype); + MLX5_SET(vlan, vlan, vid, fte->action.vlan[1].vid); + MLX5_SET(vlan, vlan, prio, fte->action.vlan[1].prio); + + in_match_value = MLX5_ADDR_OF(flow_context, in_flow_context, + match_value); + memcpy(in_match_value, &fte->val, sizeof(fte->val)); + + in_dests = MLX5_ADDR_OF(flow_context, in_flow_context, destination); + if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) { + int list_size = 0; + + list_for_each_entry(dst, &fte->node.children, node.list) { + enum mlx5_flow_destination_type type = dst->dest_attr.type; + enum mlx5_ifc_flow_destination_type ifc_type; + unsigned int id; + + if (type == MLX5_FLOW_DESTINATION_TYPE_COUNTER) + continue; + + switch (type) { + case MLX5_FLOW_DESTINATION_TYPE_NONE: + continue; + case MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM: + id = dst->dest_attr.ft_num; + ifc_type = MLX5_IFC_FLOW_DESTINATION_TYPE_FLOW_TABLE; + break; + case MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE: + id = dst->dest_attr.ft->id; + ifc_type = MLX5_IFC_FLOW_DESTINATION_TYPE_FLOW_TABLE; + break; + case MLX5_FLOW_DESTINATION_TYPE_UPLINK: + case MLX5_FLOW_DESTINATION_TYPE_VPORT: + MLX5_SET(dest_format_struct, in_dests, + destination_eswitch_owner_vhca_id_valid, + !!(dst->dest_attr.vport.flags & + MLX5_FLOW_DEST_VPORT_VHCA_ID)); + MLX5_SET(dest_format_struct, in_dests, + destination_eswitch_owner_vhca_id, + dst->dest_attr.vport.vhca_id); + if (type == MLX5_FLOW_DESTINATION_TYPE_UPLINK) { + /* destination_id is reserved */ + id = 0; + ifc_type = MLX5_IFC_FLOW_DESTINATION_TYPE_UPLINK; + break; + } + ifc_type = MLX5_IFC_FLOW_DESTINATION_TYPE_VPORT; + id = dst->dest_attr.vport.num; + if (extended_dest && + dst->dest_attr.vport.pkt_reformat) { + MLX5_SET(dest_format_struct, in_dests, + packet_reformat, + !!(dst->dest_attr.vport.flags & + MLX5_FLOW_DEST_VPORT_REFORMAT_ID)); + MLX5_SET(extended_dest_format, in_dests, + packet_reformat_id, + dst->dest_attr.vport.pkt_reformat->id); + } + break; + case MLX5_FLOW_DESTINATION_TYPE_FLOW_SAMPLER: + id = dst->dest_attr.sampler_id; + ifc_type = MLX5_IFC_FLOW_DESTINATION_TYPE_FLOW_SAMPLER; + break; + default: + id = dst->dest_attr.tir_num; + ifc_type = MLX5_IFC_FLOW_DESTINATION_TYPE_TIR; + } + + MLX5_SET(dest_format_struct, in_dests, destination_type, + ifc_type); + MLX5_SET(dest_format_struct, in_dests, destination_id, id); + in_dests += dst_cnt_size; + list_size++; + } + + MLX5_SET(flow_context, in_flow_context, destination_list_size, + list_size); + } + + if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { + int max_list_size = BIT(MLX5_CAP_FLOWTABLE_TYPE(dev, + log_max_flow_counter, + ft->type)); + int list_size = 0; + + list_for_each_entry(dst, &fte->node.children, node.list) { + if (dst->dest_attr.type != + MLX5_FLOW_DESTINATION_TYPE_COUNTER) + continue; + + MLX5_SET(flow_counter_list, in_dests, flow_counter_id, + dst->dest_attr.counter_id); + in_dests += dst_cnt_size; + list_size++; + } + if (list_size > max_list_size) { + err = -EINVAL; + goto err_out; + } + + MLX5_SET(flow_context, in_flow_context, flow_counter_list_size, + list_size); + } + + if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_EXECUTE_ASO) { + if (fte->action.exe_aso.type == MLX5_EXE_ASO_FLOW_METER) { + mlx5_cmd_set_fte_flow_meter(fte, in_flow_context); + } else { + err = -EOPNOTSUPP; + goto err_out; + } + } + + err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); +err_out: + kvfree(in); + return err; +} + +static int mlx5_cmd_create_fte(struct mlx5_flow_root_namespace *ns, + struct mlx5_flow_table *ft, + struct mlx5_flow_group *group, + struct fs_fte *fte) +{ + struct mlx5_core_dev *dev = ns->dev; + unsigned int group_id = group->id; + + return mlx5_cmd_set_fte(dev, 0, 0, ft, group_id, fte); +} + +static int mlx5_cmd_update_fte(struct mlx5_flow_root_namespace *ns, + struct mlx5_flow_table *ft, + struct mlx5_flow_group *fg, + int modify_mask, + struct fs_fte *fte) +{ + int opmod; + struct mlx5_core_dev *dev = ns->dev; + int atomic_mod_cap = MLX5_CAP_FLOWTABLE(dev, + flow_table_properties_nic_receive. + flow_modify_en); + if (!atomic_mod_cap) + return -EOPNOTSUPP; + opmod = 1; + + return mlx5_cmd_set_fte(dev, opmod, modify_mask, ft, fg->id, fte); +} + +static int mlx5_cmd_delete_fte(struct mlx5_flow_root_namespace *ns, + struct mlx5_flow_table *ft, + struct fs_fte *fte) +{ + u32 in[MLX5_ST_SZ_DW(delete_fte_in)] = {}; + struct mlx5_core_dev *dev = ns->dev; + + MLX5_SET(delete_fte_in, in, opcode, MLX5_CMD_OP_DELETE_FLOW_TABLE_ENTRY); + MLX5_SET(delete_fte_in, in, table_type, ft->type); + MLX5_SET(delete_fte_in, in, table_id, ft->id); + MLX5_SET(delete_fte_in, in, flow_index, fte->index); + MLX5_SET(delete_fte_in, in, vport_number, ft->vport); + MLX5_SET(delete_fte_in, in, other_vport, + !!(ft->flags & MLX5_FLOW_TABLE_OTHER_VPORT)); + + return mlx5_cmd_exec_in(dev, delete_fte, in); +} + +int mlx5_cmd_fc_bulk_alloc(struct mlx5_core_dev *dev, + enum mlx5_fc_bulk_alloc_bitmask alloc_bitmask, + u32 *id) +{ + u32 out[MLX5_ST_SZ_DW(alloc_flow_counter_out)] = {}; + u32 in[MLX5_ST_SZ_DW(alloc_flow_counter_in)] = {}; + int err; + + MLX5_SET(alloc_flow_counter_in, in, opcode, + MLX5_CMD_OP_ALLOC_FLOW_COUNTER); + MLX5_SET(alloc_flow_counter_in, in, flow_counter_bulk, alloc_bitmask); + + err = mlx5_cmd_exec_inout(dev, alloc_flow_counter, in, out); + if (!err) + *id = MLX5_GET(alloc_flow_counter_out, out, flow_counter_id); + return err; +} + +int mlx5_cmd_fc_alloc(struct mlx5_core_dev *dev, u32 *id) +{ + return mlx5_cmd_fc_bulk_alloc(dev, 0, id); +} + +int mlx5_cmd_fc_free(struct mlx5_core_dev *dev, u32 id) +{ + u32 in[MLX5_ST_SZ_DW(dealloc_flow_counter_in)] = {}; + + MLX5_SET(dealloc_flow_counter_in, in, opcode, + MLX5_CMD_OP_DEALLOC_FLOW_COUNTER); + MLX5_SET(dealloc_flow_counter_in, in, flow_counter_id, id); + return mlx5_cmd_exec_in(dev, dealloc_flow_counter, in); +} + +int mlx5_cmd_fc_query(struct mlx5_core_dev *dev, u32 id, + u64 *packets, u64 *bytes) +{ + u32 out[MLX5_ST_SZ_BYTES(query_flow_counter_out) + + MLX5_ST_SZ_BYTES(traffic_counter)] = {}; + u32 in[MLX5_ST_SZ_DW(query_flow_counter_in)] = {}; + void *stats; + int err = 0; + + MLX5_SET(query_flow_counter_in, in, opcode, + MLX5_CMD_OP_QUERY_FLOW_COUNTER); + MLX5_SET(query_flow_counter_in, in, op_mod, 0); + MLX5_SET(query_flow_counter_in, in, flow_counter_id, id); + err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); + if (err) + return err; + + stats = MLX5_ADDR_OF(query_flow_counter_out, out, flow_statistics); + *packets = MLX5_GET64(traffic_counter, stats, packets); + *bytes = MLX5_GET64(traffic_counter, stats, octets); + return 0; +} + +int mlx5_cmd_fc_get_bulk_query_out_len(int bulk_len) +{ + return MLX5_ST_SZ_BYTES(query_flow_counter_out) + + MLX5_ST_SZ_BYTES(traffic_counter) * bulk_len; +} + +int mlx5_cmd_fc_bulk_query(struct mlx5_core_dev *dev, u32 base_id, int bulk_len, + u32 *out) +{ + int outlen = mlx5_cmd_fc_get_bulk_query_out_len(bulk_len); + u32 in[MLX5_ST_SZ_DW(query_flow_counter_in)] = {}; + + MLX5_SET(query_flow_counter_in, in, opcode, + MLX5_CMD_OP_QUERY_FLOW_COUNTER); + MLX5_SET(query_flow_counter_in, in, flow_counter_id, base_id); + MLX5_SET(query_flow_counter_in, in, num_of_counters, bulk_len); + return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen); +} + +static int mlx5_cmd_packet_reformat_alloc(struct mlx5_flow_root_namespace *ns, + struct mlx5_pkt_reformat_params *params, + enum mlx5_flow_namespace_type namespace, + struct mlx5_pkt_reformat *pkt_reformat) +{ + u32 out[MLX5_ST_SZ_DW(alloc_packet_reformat_context_out)] = {}; + struct mlx5_core_dev *dev = ns->dev; + void *packet_reformat_context_in; + int max_encap_size; + void *reformat; + int inlen; + int err; + u32 *in; + + if (namespace == MLX5_FLOW_NAMESPACE_FDB || + namespace == MLX5_FLOW_NAMESPACE_FDB_BYPASS) + max_encap_size = MLX5_CAP_ESW(dev, max_encap_header_size); + else + max_encap_size = MLX5_CAP_FLOWTABLE(dev, max_encap_header_size); + + if (params->size > max_encap_size) { + mlx5_core_warn(dev, "encap size %zd too big, max supported is %d\n", + params->size, max_encap_size); + return -EINVAL; + } + + in = kzalloc(MLX5_ST_SZ_BYTES(alloc_packet_reformat_context_in) + + params->size, GFP_KERNEL); + if (!in) + return -ENOMEM; + + packet_reformat_context_in = MLX5_ADDR_OF(alloc_packet_reformat_context_in, + in, packet_reformat_context); + reformat = MLX5_ADDR_OF(packet_reformat_context_in, + packet_reformat_context_in, + reformat_data); + inlen = reformat - (void *)in + params->size; + + MLX5_SET(alloc_packet_reformat_context_in, in, opcode, + MLX5_CMD_OP_ALLOC_PACKET_REFORMAT_CONTEXT); + MLX5_SET(packet_reformat_context_in, packet_reformat_context_in, + reformat_data_size, params->size); + MLX5_SET(packet_reformat_context_in, packet_reformat_context_in, + reformat_type, params->type); + MLX5_SET(packet_reformat_context_in, packet_reformat_context_in, + reformat_param_0, params->param_0); + MLX5_SET(packet_reformat_context_in, packet_reformat_context_in, + reformat_param_1, params->param_1); + if (params->data && params->size) + memcpy(reformat, params->data, params->size); + + err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); + + pkt_reformat->id = MLX5_GET(alloc_packet_reformat_context_out, + out, packet_reformat_id); + kfree(in); + return err; +} + +static void mlx5_cmd_packet_reformat_dealloc(struct mlx5_flow_root_namespace *ns, + struct mlx5_pkt_reformat *pkt_reformat) +{ + u32 in[MLX5_ST_SZ_DW(dealloc_packet_reformat_context_in)] = {}; + struct mlx5_core_dev *dev = ns->dev; + + MLX5_SET(dealloc_packet_reformat_context_in, in, opcode, + MLX5_CMD_OP_DEALLOC_PACKET_REFORMAT_CONTEXT); + MLX5_SET(dealloc_packet_reformat_context_in, in, packet_reformat_id, + pkt_reformat->id); + + mlx5_cmd_exec_in(dev, dealloc_packet_reformat_context, in); +} + +static int mlx5_cmd_modify_header_alloc(struct mlx5_flow_root_namespace *ns, + u8 namespace, u8 num_actions, + void *modify_actions, + struct mlx5_modify_hdr *modify_hdr) +{ + u32 out[MLX5_ST_SZ_DW(alloc_modify_header_context_out)] = {}; + int max_actions, actions_size, inlen, err; + struct mlx5_core_dev *dev = ns->dev; + void *actions_in; + u8 table_type; + u32 *in; + + switch (namespace) { + case MLX5_FLOW_NAMESPACE_FDB: + case MLX5_FLOW_NAMESPACE_FDB_BYPASS: + max_actions = MLX5_CAP_ESW_FLOWTABLE_FDB(dev, max_modify_header_actions); + table_type = FS_FT_FDB; + break; + case MLX5_FLOW_NAMESPACE_KERNEL_RX_MACSEC: + case MLX5_FLOW_NAMESPACE_KERNEL: + case MLX5_FLOW_NAMESPACE_BYPASS: + max_actions = MLX5_CAP_FLOWTABLE_NIC_RX(dev, max_modify_header_actions); + table_type = FS_FT_NIC_RX; + break; + case MLX5_FLOW_NAMESPACE_EGRESS: + case MLX5_FLOW_NAMESPACE_EGRESS_IPSEC: + case MLX5_FLOW_NAMESPACE_EGRESS_MACSEC: + max_actions = MLX5_CAP_FLOWTABLE_NIC_TX(dev, max_modify_header_actions); + table_type = FS_FT_NIC_TX; + break; + case MLX5_FLOW_NAMESPACE_ESW_INGRESS: + max_actions = MLX5_CAP_ESW_INGRESS_ACL(dev, max_modify_header_actions); + table_type = FS_FT_ESW_INGRESS_ACL; + break; + case MLX5_FLOW_NAMESPACE_RDMA_TX: + max_actions = MLX5_CAP_FLOWTABLE_RDMA_TX(dev, max_modify_header_actions); + table_type = FS_FT_RDMA_TX; + break; + default: + return -EOPNOTSUPP; + } + + if (num_actions > max_actions) { + mlx5_core_warn(dev, "too many modify header actions %d, max supported %d\n", + num_actions, max_actions); + return -EOPNOTSUPP; + } + + actions_size = MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto) * num_actions; + inlen = MLX5_ST_SZ_BYTES(alloc_modify_header_context_in) + actions_size; + + in = kzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + MLX5_SET(alloc_modify_header_context_in, in, opcode, + MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT); + MLX5_SET(alloc_modify_header_context_in, in, table_type, table_type); + MLX5_SET(alloc_modify_header_context_in, in, num_of_actions, num_actions); + + actions_in = MLX5_ADDR_OF(alloc_modify_header_context_in, in, actions); + memcpy(actions_in, modify_actions, actions_size); + + err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); + + modify_hdr->id = MLX5_GET(alloc_modify_header_context_out, out, modify_header_id); + kfree(in); + return err; +} + +static void mlx5_cmd_modify_header_dealloc(struct mlx5_flow_root_namespace *ns, + struct mlx5_modify_hdr *modify_hdr) +{ + u32 in[MLX5_ST_SZ_DW(dealloc_modify_header_context_in)] = {}; + struct mlx5_core_dev *dev = ns->dev; + + MLX5_SET(dealloc_modify_header_context_in, in, opcode, + MLX5_CMD_OP_DEALLOC_MODIFY_HEADER_CONTEXT); + MLX5_SET(dealloc_modify_header_context_in, in, modify_header_id, + modify_hdr->id); + + mlx5_cmd_exec_in(dev, dealloc_modify_header_context, in); +} + +static int mlx5_cmd_destroy_match_definer(struct mlx5_flow_root_namespace *ns, + int definer_id) +{ + u32 in[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)] = {}; + u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)]; + + MLX5_SET(general_obj_in_cmd_hdr, in, opcode, + MLX5_CMD_OP_DESTROY_GENERAL_OBJECT); + MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, + MLX5_OBJ_TYPE_MATCH_DEFINER); + MLX5_SET(general_obj_in_cmd_hdr, in, obj_id, definer_id); + + return mlx5_cmd_exec(ns->dev, in, sizeof(in), out, sizeof(out)); +} + +static int mlx5_cmd_create_match_definer(struct mlx5_flow_root_namespace *ns, + u16 format_id, u32 *match_mask) +{ + u32 out[MLX5_ST_SZ_DW(create_match_definer_out)] = {}; + u32 in[MLX5_ST_SZ_DW(create_match_definer_in)] = {}; + struct mlx5_core_dev *dev = ns->dev; + void *ptr; + int err; + + MLX5_SET(create_match_definer_in, in, general_obj_in_cmd_hdr.opcode, + MLX5_CMD_OP_CREATE_GENERAL_OBJECT); + MLX5_SET(create_match_definer_in, in, general_obj_in_cmd_hdr.obj_type, + MLX5_OBJ_TYPE_MATCH_DEFINER); + + ptr = MLX5_ADDR_OF(create_match_definer_in, in, obj_context); + MLX5_SET(match_definer, ptr, format_id, format_id); + + ptr = MLX5_ADDR_OF(match_definer, ptr, match_mask); + memcpy(ptr, match_mask, MLX5_FLD_SZ_BYTES(match_definer, match_mask)); + + err = mlx5_cmd_exec_inout(dev, create_match_definer, in, out); + return err ? err : MLX5_GET(general_obj_out_cmd_hdr, out, obj_id); +} + +static u32 mlx5_cmd_get_capabilities(struct mlx5_flow_root_namespace *ns, + enum fs_flow_table_type ft_type) +{ + return 0; +} + +static const struct mlx5_flow_cmds mlx5_flow_cmds = { + .create_flow_table = mlx5_cmd_create_flow_table, + .destroy_flow_table = mlx5_cmd_destroy_flow_table, + .modify_flow_table = mlx5_cmd_modify_flow_table, + .create_flow_group = mlx5_cmd_create_flow_group, + .destroy_flow_group = mlx5_cmd_destroy_flow_group, + .create_fte = mlx5_cmd_create_fte, + .update_fte = mlx5_cmd_update_fte, + .delete_fte = mlx5_cmd_delete_fte, + .update_root_ft = mlx5_cmd_update_root_ft, + .packet_reformat_alloc = mlx5_cmd_packet_reformat_alloc, + .packet_reformat_dealloc = mlx5_cmd_packet_reformat_dealloc, + .modify_header_alloc = mlx5_cmd_modify_header_alloc, + .modify_header_dealloc = mlx5_cmd_modify_header_dealloc, + .create_match_definer = mlx5_cmd_create_match_definer, + .destroy_match_definer = mlx5_cmd_destroy_match_definer, + .set_peer = mlx5_cmd_stub_set_peer, + .create_ns = mlx5_cmd_stub_create_ns, + .destroy_ns = mlx5_cmd_stub_destroy_ns, + .get_capabilities = mlx5_cmd_get_capabilities, +}; + +static const struct mlx5_flow_cmds mlx5_flow_cmd_stubs = { + .create_flow_table = mlx5_cmd_stub_create_flow_table, + .destroy_flow_table = mlx5_cmd_stub_destroy_flow_table, + .modify_flow_table = mlx5_cmd_stub_modify_flow_table, + .create_flow_group = mlx5_cmd_stub_create_flow_group, + .destroy_flow_group = mlx5_cmd_stub_destroy_flow_group, + .create_fte = mlx5_cmd_stub_create_fte, + .update_fte = mlx5_cmd_stub_update_fte, + .delete_fte = mlx5_cmd_stub_delete_fte, + .update_root_ft = mlx5_cmd_stub_update_root_ft, + .packet_reformat_alloc = mlx5_cmd_stub_packet_reformat_alloc, + .packet_reformat_dealloc = mlx5_cmd_stub_packet_reformat_dealloc, + .modify_header_alloc = mlx5_cmd_stub_modify_header_alloc, + .modify_header_dealloc = mlx5_cmd_stub_modify_header_dealloc, + .create_match_definer = mlx5_cmd_stub_create_match_definer, + .destroy_match_definer = mlx5_cmd_stub_destroy_match_definer, + .set_peer = mlx5_cmd_stub_set_peer, + .create_ns = mlx5_cmd_stub_create_ns, + .destroy_ns = mlx5_cmd_stub_destroy_ns, + .get_capabilities = mlx5_cmd_stub_get_capabilities, +}; + +const struct mlx5_flow_cmds *mlx5_fs_cmd_get_fw_cmds(void) +{ + return &mlx5_flow_cmds; +} + +static const struct mlx5_flow_cmds *mlx5_fs_cmd_get_stub_cmds(void) +{ + return &mlx5_flow_cmd_stubs; +} + +const struct mlx5_flow_cmds *mlx5_fs_cmd_get_default(enum fs_flow_table_type type) +{ + switch (type) { + case FS_FT_NIC_RX: + case FS_FT_ESW_EGRESS_ACL: + case FS_FT_ESW_INGRESS_ACL: + case FS_FT_FDB: + case FS_FT_SNIFFER_RX: + case FS_FT_SNIFFER_TX: + case FS_FT_NIC_TX: + case FS_FT_RDMA_RX: + case FS_FT_RDMA_TX: + case FS_FT_PORT_SEL: + return mlx5_fs_cmd_get_fw_cmds(); + default: + return mlx5_fs_cmd_get_stub_cmds(); + } +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h new file mode 100644 index 000000000..8ef4254b9 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h @@ -0,0 +1,124 @@ +/* + * Copyright (c) 2015, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _MLX5_FS_CMD_ +#define _MLX5_FS_CMD_ + +#include "fs_core.h" + +struct mlx5_flow_cmds { + int (*create_flow_table)(struct mlx5_flow_root_namespace *ns, + struct mlx5_flow_table *ft, + struct mlx5_flow_table_attr *ft_attr, + struct mlx5_flow_table *next_ft); + int (*destroy_flow_table)(struct mlx5_flow_root_namespace *ns, + struct mlx5_flow_table *ft); + + int (*modify_flow_table)(struct mlx5_flow_root_namespace *ns, + struct mlx5_flow_table *ft, + struct mlx5_flow_table *next_ft); + + int (*create_flow_group)(struct mlx5_flow_root_namespace *ns, + struct mlx5_flow_table *ft, + u32 *in, + struct mlx5_flow_group *fg); + + int (*destroy_flow_group)(struct mlx5_flow_root_namespace *ns, + struct mlx5_flow_table *ft, + struct mlx5_flow_group *fg); + + int (*create_fte)(struct mlx5_flow_root_namespace *ns, + struct mlx5_flow_table *ft, + struct mlx5_flow_group *fg, + struct fs_fte *fte); + + int (*update_fte)(struct mlx5_flow_root_namespace *ns, + struct mlx5_flow_table *ft, + struct mlx5_flow_group *fg, + int modify_mask, + struct fs_fte *fte); + + int (*delete_fte)(struct mlx5_flow_root_namespace *ns, + struct mlx5_flow_table *ft, + struct fs_fte *fte); + + int (*update_root_ft)(struct mlx5_flow_root_namespace *ns, + struct mlx5_flow_table *ft, + u32 underlay_qpn, + bool disconnect); + + int (*packet_reformat_alloc)(struct mlx5_flow_root_namespace *ns, + struct mlx5_pkt_reformat_params *params, + enum mlx5_flow_namespace_type namespace, + struct mlx5_pkt_reformat *pkt_reformat); + + void (*packet_reformat_dealloc)(struct mlx5_flow_root_namespace *ns, + struct mlx5_pkt_reformat *pkt_reformat); + + int (*modify_header_alloc)(struct mlx5_flow_root_namespace *ns, + u8 namespace, u8 num_actions, + void *modify_actions, + struct mlx5_modify_hdr *modify_hdr); + + void (*modify_header_dealloc)(struct mlx5_flow_root_namespace *ns, + struct mlx5_modify_hdr *modify_hdr); + + int (*set_peer)(struct mlx5_flow_root_namespace *ns, + struct mlx5_flow_root_namespace *peer_ns); + + int (*create_ns)(struct mlx5_flow_root_namespace *ns); + int (*destroy_ns)(struct mlx5_flow_root_namespace *ns); + int (*create_match_definer)(struct mlx5_flow_root_namespace *ns, + u16 format_id, u32 *match_mask); + int (*destroy_match_definer)(struct mlx5_flow_root_namespace *ns, + int definer_id); + + u32 (*get_capabilities)(struct mlx5_flow_root_namespace *ns, + enum fs_flow_table_type ft_type); +}; + +int mlx5_cmd_fc_alloc(struct mlx5_core_dev *dev, u32 *id); +int mlx5_cmd_fc_bulk_alloc(struct mlx5_core_dev *dev, + enum mlx5_fc_bulk_alloc_bitmask alloc_bitmask, + u32 *id); +int mlx5_cmd_fc_free(struct mlx5_core_dev *dev, u32 id); +int mlx5_cmd_fc_query(struct mlx5_core_dev *dev, u32 id, + u64 *packets, u64 *bytes); + +int mlx5_cmd_fc_get_bulk_query_out_len(int bulk_len); +int mlx5_cmd_fc_bulk_query(struct mlx5_core_dev *dev, u32 base_id, int bulk_len, + u32 *out); + +const struct mlx5_flow_cmds *mlx5_fs_cmd_get_default(enum fs_flow_table_type type); +const struct mlx5_flow_cmds *mlx5_fs_cmd_get_fw_cmds(void); + +#endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c new file mode 100644 index 000000000..e6674118b --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -0,0 +1,3612 @@ +/* + * Copyright (c) 2015, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/mutex.h> +#include <linux/mlx5/driver.h> +#include <linux/mlx5/vport.h> +#include <linux/mlx5/eswitch.h> + +#include "mlx5_core.h" +#include "fs_core.h" +#include "fs_cmd.h" +#include "fs_ft_pool.h" +#include "diag/fs_tracepoint.h" + +#define INIT_TREE_NODE_ARRAY_SIZE(...) (sizeof((struct init_tree_node[]){__VA_ARGS__}) /\ + sizeof(struct init_tree_node)) + +#define ADD_PRIO(num_prios_val, min_level_val, num_levels_val, caps_val,\ + ...) {.type = FS_TYPE_PRIO,\ + .min_ft_level = min_level_val,\ + .num_levels = num_levels_val,\ + .num_leaf_prios = num_prios_val,\ + .caps = caps_val,\ + .children = (struct init_tree_node[]) {__VA_ARGS__},\ + .ar_size = INIT_TREE_NODE_ARRAY_SIZE(__VA_ARGS__) \ +} + +#define ADD_MULTIPLE_PRIO(num_prios_val, num_levels_val, ...)\ + ADD_PRIO(num_prios_val, 0, num_levels_val, {},\ + __VA_ARGS__)\ + +#define ADD_NS(def_miss_act, ...) {.type = FS_TYPE_NAMESPACE, \ + .def_miss_action = def_miss_act,\ + .children = (struct init_tree_node[]) {__VA_ARGS__},\ + .ar_size = INIT_TREE_NODE_ARRAY_SIZE(__VA_ARGS__) \ +} + +#define INIT_CAPS_ARRAY_SIZE(...) (sizeof((long[]){__VA_ARGS__}) /\ + sizeof(long)) + +#define FS_CAP(cap) (__mlx5_bit_off(flow_table_nic_cap, cap)) + +#define FS_REQUIRED_CAPS(...) {.arr_sz = INIT_CAPS_ARRAY_SIZE(__VA_ARGS__), \ + .caps = (long[]) {__VA_ARGS__} } + +#define FS_CHAINING_CAPS FS_REQUIRED_CAPS(FS_CAP(flow_table_properties_nic_receive.flow_modify_en), \ + FS_CAP(flow_table_properties_nic_receive.modify_root), \ + FS_CAP(flow_table_properties_nic_receive.identified_miss_table_mode), \ + FS_CAP(flow_table_properties_nic_receive.flow_table_modify)) + +#define FS_CHAINING_CAPS_EGRESS \ + FS_REQUIRED_CAPS( \ + FS_CAP(flow_table_properties_nic_transmit.flow_modify_en), \ + FS_CAP(flow_table_properties_nic_transmit.modify_root), \ + FS_CAP(flow_table_properties_nic_transmit \ + .identified_miss_table_mode), \ + FS_CAP(flow_table_properties_nic_transmit.flow_table_modify)) + +#define FS_CHAINING_CAPS_RDMA_TX \ + FS_REQUIRED_CAPS( \ + FS_CAP(flow_table_properties_nic_transmit_rdma.flow_modify_en), \ + FS_CAP(flow_table_properties_nic_transmit_rdma.modify_root), \ + FS_CAP(flow_table_properties_nic_transmit_rdma \ + .identified_miss_table_mode), \ + FS_CAP(flow_table_properties_nic_transmit_rdma \ + .flow_table_modify)) + +#define LEFTOVERS_NUM_LEVELS 1 +#define LEFTOVERS_NUM_PRIOS 1 + +#define RDMA_RX_COUNTERS_PRIO_NUM_LEVELS 1 +#define RDMA_TX_COUNTERS_PRIO_NUM_LEVELS 1 + +#define BY_PASS_PRIO_NUM_LEVELS 1 +#define BY_PASS_MIN_LEVEL (ETHTOOL_MIN_LEVEL + MLX5_BY_PASS_NUM_PRIOS +\ + LEFTOVERS_NUM_PRIOS) + +#define KERNEL_RX_MACSEC_NUM_PRIOS 1 +#define KERNEL_RX_MACSEC_NUM_LEVELS 2 +#define KERNEL_RX_MACSEC_MIN_LEVEL (BY_PASS_MIN_LEVEL + KERNEL_RX_MACSEC_NUM_PRIOS) + +#define ETHTOOL_PRIO_NUM_LEVELS 1 +#define ETHTOOL_NUM_PRIOS 11 +#define ETHTOOL_MIN_LEVEL (KERNEL_MIN_LEVEL + ETHTOOL_NUM_PRIOS) +/* Promiscuous, Vlan, mac, ttc, inner ttc, {UDP/ANY/aRFS/accel/{esp, esp_err}} */ +#define KERNEL_NIC_PRIO_NUM_LEVELS 7 +#define KERNEL_NIC_NUM_PRIOS 1 +/* One more level for tc */ +#define KERNEL_MIN_LEVEL (KERNEL_NIC_PRIO_NUM_LEVELS + 1) + +#define KERNEL_NIC_TC_NUM_PRIOS 1 +#define KERNEL_NIC_TC_NUM_LEVELS 3 + +#define ANCHOR_NUM_LEVELS 1 +#define ANCHOR_NUM_PRIOS 1 +#define ANCHOR_MIN_LEVEL (BY_PASS_MIN_LEVEL + 1) + +#define OFFLOADS_MAX_FT 2 +#define OFFLOADS_NUM_PRIOS 2 +#define OFFLOADS_MIN_LEVEL (ANCHOR_MIN_LEVEL + OFFLOADS_NUM_PRIOS) + +#define LAG_PRIO_NUM_LEVELS 1 +#define LAG_NUM_PRIOS 1 +#define LAG_MIN_LEVEL (OFFLOADS_MIN_LEVEL + KERNEL_RX_MACSEC_MIN_LEVEL + 1) + +#define KERNEL_TX_IPSEC_NUM_PRIOS 1 +#define KERNEL_TX_IPSEC_NUM_LEVELS 1 +#define KERNEL_TX_IPSEC_MIN_LEVEL (KERNEL_TX_IPSEC_NUM_LEVELS) + +#define KERNEL_TX_MACSEC_NUM_PRIOS 1 +#define KERNEL_TX_MACSEC_NUM_LEVELS 2 +#define KERNEL_TX_MACSEC_MIN_LEVEL (KERNEL_TX_IPSEC_MIN_LEVEL + KERNEL_TX_MACSEC_NUM_PRIOS) + +struct node_caps { + size_t arr_sz; + long *caps; +}; + +static struct init_tree_node { + enum fs_node_type type; + struct init_tree_node *children; + int ar_size; + struct node_caps caps; + int min_ft_level; + int num_leaf_prios; + int prio; + int num_levels; + enum mlx5_flow_table_miss_action def_miss_action; +} root_fs = { + .type = FS_TYPE_NAMESPACE, + .ar_size = 8, + .children = (struct init_tree_node[]){ + ADD_PRIO(0, BY_PASS_MIN_LEVEL, 0, FS_CHAINING_CAPS, + ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF, + ADD_MULTIPLE_PRIO(MLX5_BY_PASS_NUM_PRIOS, + BY_PASS_PRIO_NUM_LEVELS))), + ADD_PRIO(0, KERNEL_RX_MACSEC_MIN_LEVEL, 0, FS_CHAINING_CAPS, + ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF, + ADD_MULTIPLE_PRIO(KERNEL_RX_MACSEC_NUM_PRIOS, + KERNEL_RX_MACSEC_NUM_LEVELS))), + ADD_PRIO(0, LAG_MIN_LEVEL, 0, FS_CHAINING_CAPS, + ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF, + ADD_MULTIPLE_PRIO(LAG_NUM_PRIOS, + LAG_PRIO_NUM_LEVELS))), + ADD_PRIO(0, OFFLOADS_MIN_LEVEL, 0, FS_CHAINING_CAPS, + ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF, + ADD_MULTIPLE_PRIO(OFFLOADS_NUM_PRIOS, + OFFLOADS_MAX_FT))), + ADD_PRIO(0, ETHTOOL_MIN_LEVEL, 0, FS_CHAINING_CAPS, + ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF, + ADD_MULTIPLE_PRIO(ETHTOOL_NUM_PRIOS, + ETHTOOL_PRIO_NUM_LEVELS))), + ADD_PRIO(0, KERNEL_MIN_LEVEL, 0, {}, + ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF, + ADD_MULTIPLE_PRIO(KERNEL_NIC_TC_NUM_PRIOS, + KERNEL_NIC_TC_NUM_LEVELS), + ADD_MULTIPLE_PRIO(KERNEL_NIC_NUM_PRIOS, + KERNEL_NIC_PRIO_NUM_LEVELS))), + ADD_PRIO(0, BY_PASS_MIN_LEVEL, 0, FS_CHAINING_CAPS, + ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF, + ADD_MULTIPLE_PRIO(LEFTOVERS_NUM_PRIOS, + LEFTOVERS_NUM_LEVELS))), + ADD_PRIO(0, ANCHOR_MIN_LEVEL, 0, {}, + ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF, + ADD_MULTIPLE_PRIO(ANCHOR_NUM_PRIOS, + ANCHOR_NUM_LEVELS))), + } +}; + +static struct init_tree_node egress_root_fs = { + .type = FS_TYPE_NAMESPACE, + .ar_size = 3, + .children = (struct init_tree_node[]) { + ADD_PRIO(0, MLX5_BY_PASS_NUM_PRIOS, 0, + FS_CHAINING_CAPS_EGRESS, + ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF, + ADD_MULTIPLE_PRIO(MLX5_BY_PASS_NUM_PRIOS, + BY_PASS_PRIO_NUM_LEVELS))), + ADD_PRIO(0, KERNEL_TX_IPSEC_MIN_LEVEL, 0, + FS_CHAINING_CAPS_EGRESS, + ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF, + ADD_MULTIPLE_PRIO(KERNEL_TX_IPSEC_NUM_PRIOS, + KERNEL_TX_IPSEC_NUM_LEVELS))), + ADD_PRIO(0, KERNEL_TX_MACSEC_MIN_LEVEL, 0, + FS_CHAINING_CAPS_EGRESS, + ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF, + ADD_MULTIPLE_PRIO(KERNEL_TX_MACSEC_NUM_PRIOS, + KERNEL_TX_MACSEC_NUM_LEVELS))), + } +}; + +enum { + RDMA_RX_COUNTERS_PRIO, + RDMA_RX_BYPASS_PRIO, + RDMA_RX_KERNEL_PRIO, +}; + +#define RDMA_RX_BYPASS_MIN_LEVEL MLX5_BY_PASS_NUM_REGULAR_PRIOS +#define RDMA_RX_KERNEL_MIN_LEVEL (RDMA_RX_BYPASS_MIN_LEVEL + 1) +#define RDMA_RX_COUNTERS_MIN_LEVEL (RDMA_RX_KERNEL_MIN_LEVEL + 2) + +static struct init_tree_node rdma_rx_root_fs = { + .type = FS_TYPE_NAMESPACE, + .ar_size = 3, + .children = (struct init_tree_node[]) { + [RDMA_RX_COUNTERS_PRIO] = + ADD_PRIO(0, RDMA_RX_COUNTERS_MIN_LEVEL, 0, + FS_CHAINING_CAPS, + ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF, + ADD_MULTIPLE_PRIO(MLX5_RDMA_RX_NUM_COUNTERS_PRIOS, + RDMA_RX_COUNTERS_PRIO_NUM_LEVELS))), + [RDMA_RX_BYPASS_PRIO] = + ADD_PRIO(0, RDMA_RX_BYPASS_MIN_LEVEL, 0, + FS_CHAINING_CAPS, + ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF, + ADD_MULTIPLE_PRIO(MLX5_BY_PASS_NUM_REGULAR_PRIOS, + BY_PASS_PRIO_NUM_LEVELS))), + [RDMA_RX_KERNEL_PRIO] = + ADD_PRIO(0, RDMA_RX_KERNEL_MIN_LEVEL, 0, + FS_CHAINING_CAPS, + ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_SWITCH_DOMAIN, + ADD_MULTIPLE_PRIO(1, 1))), + } +}; + +enum { + RDMA_TX_COUNTERS_PRIO, + RDMA_TX_BYPASS_PRIO, +}; + +#define RDMA_TX_BYPASS_MIN_LEVEL MLX5_BY_PASS_NUM_PRIOS +#define RDMA_TX_COUNTERS_MIN_LEVEL (RDMA_TX_BYPASS_MIN_LEVEL + 1) + +static struct init_tree_node rdma_tx_root_fs = { + .type = FS_TYPE_NAMESPACE, + .ar_size = 2, + .children = (struct init_tree_node[]) { + [RDMA_TX_COUNTERS_PRIO] = + ADD_PRIO(0, RDMA_TX_COUNTERS_MIN_LEVEL, 0, + FS_CHAINING_CAPS, + ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF, + ADD_MULTIPLE_PRIO(MLX5_RDMA_TX_NUM_COUNTERS_PRIOS, + RDMA_TX_COUNTERS_PRIO_NUM_LEVELS))), + [RDMA_TX_BYPASS_PRIO] = + ADD_PRIO(0, RDMA_TX_BYPASS_MIN_LEVEL, 0, + FS_CHAINING_CAPS_RDMA_TX, + ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF, + ADD_MULTIPLE_PRIO(RDMA_TX_BYPASS_MIN_LEVEL, + BY_PASS_PRIO_NUM_LEVELS))), + } +}; + +enum fs_i_lock_class { + FS_LOCK_GRANDPARENT, + FS_LOCK_PARENT, + FS_LOCK_CHILD +}; + +static const struct rhashtable_params rhash_fte = { + .key_len = sizeof_field(struct fs_fte, val), + .key_offset = offsetof(struct fs_fte, val), + .head_offset = offsetof(struct fs_fte, hash), + .automatic_shrinking = true, + .min_size = 1, +}; + +static const struct rhashtable_params rhash_fg = { + .key_len = sizeof_field(struct mlx5_flow_group, mask), + .key_offset = offsetof(struct mlx5_flow_group, mask), + .head_offset = offsetof(struct mlx5_flow_group, hash), + .automatic_shrinking = true, + .min_size = 1, + +}; + +static void del_hw_flow_table(struct fs_node *node); +static void del_hw_flow_group(struct fs_node *node); +static void del_hw_fte(struct fs_node *node); +static void del_sw_flow_table(struct fs_node *node); +static void del_sw_flow_group(struct fs_node *node); +static void del_sw_fte(struct fs_node *node); +static void del_sw_prio(struct fs_node *node); +static void del_sw_ns(struct fs_node *node); +/* Delete rule (destination) is special case that + * requires to lock the FTE for all the deletion process. + */ +static void del_sw_hw_rule(struct fs_node *node); +static bool mlx5_flow_dests_cmp(struct mlx5_flow_destination *d1, + struct mlx5_flow_destination *d2); +static void cleanup_root_ns(struct mlx5_flow_root_namespace *root_ns); +static struct mlx5_flow_rule * +find_flow_rule(struct fs_fte *fte, + struct mlx5_flow_destination *dest); + +static void tree_init_node(struct fs_node *node, + void (*del_hw_func)(struct fs_node *), + void (*del_sw_func)(struct fs_node *)) +{ + refcount_set(&node->refcount, 1); + INIT_LIST_HEAD(&node->list); + INIT_LIST_HEAD(&node->children); + init_rwsem(&node->lock); + node->del_hw_func = del_hw_func; + node->del_sw_func = del_sw_func; + node->active = false; +} + +static void tree_add_node(struct fs_node *node, struct fs_node *parent) +{ + if (parent) + refcount_inc(&parent->refcount); + node->parent = parent; + + /* Parent is the root */ + if (!parent) + node->root = node; + else + node->root = parent->root; +} + +static int tree_get_node(struct fs_node *node) +{ + return refcount_inc_not_zero(&node->refcount); +} + +static void nested_down_read_ref_node(struct fs_node *node, + enum fs_i_lock_class class) +{ + if (node) { + down_read_nested(&node->lock, class); + refcount_inc(&node->refcount); + } +} + +static void nested_down_write_ref_node(struct fs_node *node, + enum fs_i_lock_class class) +{ + if (node) { + down_write_nested(&node->lock, class); + refcount_inc(&node->refcount); + } +} + +static void down_write_ref_node(struct fs_node *node, bool locked) +{ + if (node) { + if (!locked) + down_write(&node->lock); + refcount_inc(&node->refcount); + } +} + +static void up_read_ref_node(struct fs_node *node) +{ + refcount_dec(&node->refcount); + up_read(&node->lock); +} + +static void up_write_ref_node(struct fs_node *node, bool locked) +{ + refcount_dec(&node->refcount); + if (!locked) + up_write(&node->lock); +} + +static void tree_put_node(struct fs_node *node, bool locked) +{ + struct fs_node *parent_node = node->parent; + + if (refcount_dec_and_test(&node->refcount)) { + if (node->del_hw_func) + node->del_hw_func(node); + if (parent_node) { + down_write_ref_node(parent_node, locked); + list_del_init(&node->list); + } + node->del_sw_func(node); + if (parent_node) + up_write_ref_node(parent_node, locked); + node = NULL; + } + if (!node && parent_node) + tree_put_node(parent_node, locked); +} + +static int tree_remove_node(struct fs_node *node, bool locked) +{ + if (refcount_read(&node->refcount) > 1) { + refcount_dec(&node->refcount); + return -EEXIST; + } + tree_put_node(node, locked); + return 0; +} + +static struct fs_prio *find_prio(struct mlx5_flow_namespace *ns, + unsigned int prio) +{ + struct fs_prio *iter_prio; + + fs_for_each_prio(iter_prio, ns) { + if (iter_prio->prio == prio) + return iter_prio; + } + + return NULL; +} + +static bool is_fwd_next_action(u32 action) +{ + return action & (MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO | + MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_NS); +} + +static bool is_fwd_dest_type(enum mlx5_flow_destination_type type) +{ + return type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM || + type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE || + type == MLX5_FLOW_DESTINATION_TYPE_UPLINK || + type == MLX5_FLOW_DESTINATION_TYPE_VPORT || + type == MLX5_FLOW_DESTINATION_TYPE_FLOW_SAMPLER || + type == MLX5_FLOW_DESTINATION_TYPE_TIR; +} + +static bool check_valid_spec(const struct mlx5_flow_spec *spec) +{ + int i; + + for (i = 0; i < MLX5_ST_SZ_DW_MATCH_PARAM; i++) + if (spec->match_value[i] & ~spec->match_criteria[i]) { + pr_warn("mlx5_core: match_value differs from match_criteria\n"); + return false; + } + + return true; +} + +struct mlx5_flow_root_namespace *find_root(struct fs_node *node) +{ + struct fs_node *root; + struct mlx5_flow_namespace *ns; + + root = node->root; + + if (WARN_ON(root->type != FS_TYPE_NAMESPACE)) { + pr_warn("mlx5: flow steering node is not in tree or garbaged\n"); + return NULL; + } + + ns = container_of(root, struct mlx5_flow_namespace, node); + return container_of(ns, struct mlx5_flow_root_namespace, ns); +} + +static inline struct mlx5_flow_steering *get_steering(struct fs_node *node) +{ + struct mlx5_flow_root_namespace *root = find_root(node); + + if (root) + return root->dev->priv.steering; + return NULL; +} + +static inline struct mlx5_core_dev *get_dev(struct fs_node *node) +{ + struct mlx5_flow_root_namespace *root = find_root(node); + + if (root) + return root->dev; + return NULL; +} + +static void del_sw_ns(struct fs_node *node) +{ + kfree(node); +} + +static void del_sw_prio(struct fs_node *node) +{ + kfree(node); +} + +static void del_hw_flow_table(struct fs_node *node) +{ + struct mlx5_flow_root_namespace *root; + struct mlx5_flow_table *ft; + struct mlx5_core_dev *dev; + int err; + + fs_get_obj(ft, node); + dev = get_dev(&ft->node); + root = find_root(&ft->node); + trace_mlx5_fs_del_ft(ft); + + if (node->active) { + err = root->cmds->destroy_flow_table(root, ft); + if (err) + mlx5_core_warn(dev, "flow steering can't destroy ft\n"); + } +} + +static void del_sw_flow_table(struct fs_node *node) +{ + struct mlx5_flow_table *ft; + struct fs_prio *prio; + + fs_get_obj(ft, node); + + rhltable_destroy(&ft->fgs_hash); + if (ft->node.parent) { + fs_get_obj(prio, ft->node.parent); + prio->num_ft--; + } + kfree(ft); +} + +static void modify_fte(struct fs_fte *fte) +{ + struct mlx5_flow_root_namespace *root; + struct mlx5_flow_table *ft; + struct mlx5_flow_group *fg; + struct mlx5_core_dev *dev; + int err; + + fs_get_obj(fg, fte->node.parent); + fs_get_obj(ft, fg->node.parent); + dev = get_dev(&fte->node); + + root = find_root(&ft->node); + err = root->cmds->update_fte(root, ft, fg, fte->modify_mask, fte); + if (err) + mlx5_core_warn(dev, + "%s can't del rule fg id=%d fte_index=%d\n", + __func__, fg->id, fte->index); + fte->modify_mask = 0; +} + +static void del_sw_hw_rule(struct fs_node *node) +{ + struct mlx5_flow_rule *rule; + struct fs_fte *fte; + + fs_get_obj(rule, node); + fs_get_obj(fte, rule->node.parent); + trace_mlx5_fs_del_rule(rule); + if (is_fwd_next_action(rule->sw_action)) { + mutex_lock(&rule->dest_attr.ft->lock); + list_del(&rule->next_ft); + mutex_unlock(&rule->dest_attr.ft->lock); + } + + if (rule->dest_attr.type == MLX5_FLOW_DESTINATION_TYPE_COUNTER) { + --fte->dests_size; + fte->modify_mask |= + BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_ACTION) | + BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_FLOW_COUNTERS); + fte->action.action &= ~MLX5_FLOW_CONTEXT_ACTION_COUNT; + goto out; + } + + if (rule->dest_attr.type == MLX5_FLOW_DESTINATION_TYPE_PORT) { + --fte->dests_size; + fte->modify_mask |= BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_ACTION); + fte->action.action &= ~MLX5_FLOW_CONTEXT_ACTION_ALLOW; + goto out; + } + + if (is_fwd_dest_type(rule->dest_attr.type)) { + --fte->dests_size; + --fte->fwd_dests; + + if (!fte->fwd_dests) + fte->action.action &= + ~MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + fte->modify_mask |= + BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_DESTINATION_LIST); + goto out; + } +out: + kfree(rule); +} + +static void del_hw_fte(struct fs_node *node) +{ + struct mlx5_flow_root_namespace *root; + struct mlx5_flow_table *ft; + struct mlx5_flow_group *fg; + struct mlx5_core_dev *dev; + struct fs_fte *fte; + int err; + + fs_get_obj(fte, node); + fs_get_obj(fg, fte->node.parent); + fs_get_obj(ft, fg->node.parent); + + trace_mlx5_fs_del_fte(fte); + WARN_ON(fte->dests_size); + dev = get_dev(&ft->node); + root = find_root(&ft->node); + if (node->active) { + err = root->cmds->delete_fte(root, ft, fte); + if (err) + mlx5_core_warn(dev, + "flow steering can't delete fte in index %d of flow group id %d\n", + fte->index, fg->id); + node->active = false; + } +} + +static void del_sw_fte(struct fs_node *node) +{ + struct mlx5_flow_steering *steering = get_steering(node); + struct mlx5_flow_group *fg; + struct fs_fte *fte; + int err; + + fs_get_obj(fte, node); + fs_get_obj(fg, fte->node.parent); + + err = rhashtable_remove_fast(&fg->ftes_hash, + &fte->hash, + rhash_fte); + WARN_ON(err); + ida_free(&fg->fte_allocator, fte->index - fg->start_index); + kmem_cache_free(steering->ftes_cache, fte); +} + +static void del_hw_flow_group(struct fs_node *node) +{ + struct mlx5_flow_root_namespace *root; + struct mlx5_flow_group *fg; + struct mlx5_flow_table *ft; + struct mlx5_core_dev *dev; + + fs_get_obj(fg, node); + fs_get_obj(ft, fg->node.parent); + dev = get_dev(&ft->node); + trace_mlx5_fs_del_fg(fg); + + root = find_root(&ft->node); + if (fg->node.active && root->cmds->destroy_flow_group(root, ft, fg)) + mlx5_core_warn(dev, "flow steering can't destroy fg %d of ft %d\n", + fg->id, ft->id); +} + +static void del_sw_flow_group(struct fs_node *node) +{ + struct mlx5_flow_steering *steering = get_steering(node); + struct mlx5_flow_group *fg; + struct mlx5_flow_table *ft; + int err; + + fs_get_obj(fg, node); + fs_get_obj(ft, fg->node.parent); + + rhashtable_destroy(&fg->ftes_hash); + ida_destroy(&fg->fte_allocator); + if (ft->autogroup.active && + fg->max_ftes == ft->autogroup.group_size && + fg->start_index < ft->autogroup.max_fte) + ft->autogroup.num_groups--; + err = rhltable_remove(&ft->fgs_hash, + &fg->hash, + rhash_fg); + WARN_ON(err); + kmem_cache_free(steering->fgs_cache, fg); +} + +static int insert_fte(struct mlx5_flow_group *fg, struct fs_fte *fte) +{ + int index; + int ret; + + index = ida_alloc_max(&fg->fte_allocator, fg->max_ftes - 1, GFP_KERNEL); + if (index < 0) + return index; + + fte->index = index + fg->start_index; + ret = rhashtable_insert_fast(&fg->ftes_hash, + &fte->hash, + rhash_fte); + if (ret) + goto err_ida_remove; + + tree_add_node(&fte->node, &fg->node); + list_add_tail(&fte->node.list, &fg->node.children); + return 0; + +err_ida_remove: + ida_free(&fg->fte_allocator, index); + return ret; +} + +static struct fs_fte *alloc_fte(struct mlx5_flow_table *ft, + const struct mlx5_flow_spec *spec, + struct mlx5_flow_act *flow_act) +{ + struct mlx5_flow_steering *steering = get_steering(&ft->node); + struct fs_fte *fte; + + fte = kmem_cache_zalloc(steering->ftes_cache, GFP_KERNEL); + if (!fte) + return ERR_PTR(-ENOMEM); + + memcpy(fte->val, &spec->match_value, sizeof(fte->val)); + fte->node.type = FS_TYPE_FLOW_ENTRY; + fte->action = *flow_act; + fte->flow_context = spec->flow_context; + + tree_init_node(&fte->node, del_hw_fte, del_sw_fte); + + return fte; +} + +static void dealloc_flow_group(struct mlx5_flow_steering *steering, + struct mlx5_flow_group *fg) +{ + rhashtable_destroy(&fg->ftes_hash); + kmem_cache_free(steering->fgs_cache, fg); +} + +static struct mlx5_flow_group *alloc_flow_group(struct mlx5_flow_steering *steering, + u8 match_criteria_enable, + const void *match_criteria, + int start_index, + int end_index) +{ + struct mlx5_flow_group *fg; + int ret; + + fg = kmem_cache_zalloc(steering->fgs_cache, GFP_KERNEL); + if (!fg) + return ERR_PTR(-ENOMEM); + + ret = rhashtable_init(&fg->ftes_hash, &rhash_fte); + if (ret) { + kmem_cache_free(steering->fgs_cache, fg); + return ERR_PTR(ret); + } + + ida_init(&fg->fte_allocator); + fg->mask.match_criteria_enable = match_criteria_enable; + memcpy(&fg->mask.match_criteria, match_criteria, + sizeof(fg->mask.match_criteria)); + fg->node.type = FS_TYPE_FLOW_GROUP; + fg->start_index = start_index; + fg->max_ftes = end_index - start_index + 1; + + return fg; +} + +static struct mlx5_flow_group *alloc_insert_flow_group(struct mlx5_flow_table *ft, + u8 match_criteria_enable, + const void *match_criteria, + int start_index, + int end_index, + struct list_head *prev) +{ + struct mlx5_flow_steering *steering = get_steering(&ft->node); + struct mlx5_flow_group *fg; + int ret; + + fg = alloc_flow_group(steering, match_criteria_enable, match_criteria, + start_index, end_index); + if (IS_ERR(fg)) + return fg; + + /* initialize refcnt, add to parent list */ + ret = rhltable_insert(&ft->fgs_hash, + &fg->hash, + rhash_fg); + if (ret) { + dealloc_flow_group(steering, fg); + return ERR_PTR(ret); + } + + tree_init_node(&fg->node, del_hw_flow_group, del_sw_flow_group); + tree_add_node(&fg->node, &ft->node); + /* Add node to group list */ + list_add(&fg->node.list, prev); + atomic_inc(&ft->node.version); + + return fg; +} + +static struct mlx5_flow_table *alloc_flow_table(int level, u16 vport, + enum fs_flow_table_type table_type, + enum fs_flow_table_op_mod op_mod, + u32 flags) +{ + struct mlx5_flow_table *ft; + int ret; + + ft = kzalloc(sizeof(*ft), GFP_KERNEL); + if (!ft) + return ERR_PTR(-ENOMEM); + + ret = rhltable_init(&ft->fgs_hash, &rhash_fg); + if (ret) { + kfree(ft); + return ERR_PTR(ret); + } + + ft->level = level; + ft->node.type = FS_TYPE_FLOW_TABLE; + ft->op_mod = op_mod; + ft->type = table_type; + ft->vport = vport; + ft->flags = flags; + INIT_LIST_HEAD(&ft->fwd_rules); + mutex_init(&ft->lock); + + return ft; +} + +/* If reverse is false, then we search for the first flow table in the + * root sub-tree from start(closest from right), else we search for the + * last flow table in the root sub-tree till start(closest from left). + */ +static struct mlx5_flow_table *find_closest_ft_recursive(struct fs_node *root, + struct list_head *start, + bool reverse) +{ +#define list_advance_entry(pos, reverse) \ + ((reverse) ? list_prev_entry(pos, list) : list_next_entry(pos, list)) + +#define list_for_each_advance_continue(pos, head, reverse) \ + for (pos = list_advance_entry(pos, reverse); \ + &pos->list != (head); \ + pos = list_advance_entry(pos, reverse)) + + struct fs_node *iter = list_entry(start, struct fs_node, list); + struct mlx5_flow_table *ft = NULL; + + if (!root) + return NULL; + + list_for_each_advance_continue(iter, &root->children, reverse) { + if (iter->type == FS_TYPE_FLOW_TABLE) { + fs_get_obj(ft, iter); + return ft; + } + ft = find_closest_ft_recursive(iter, &iter->children, reverse); + if (ft) + return ft; + } + + return ft; +} + +static struct fs_node *find_prio_chains_parent(struct fs_node *parent, + struct fs_node **child) +{ + struct fs_node *node = NULL; + + while (parent && parent->type != FS_TYPE_PRIO_CHAINS) { + node = parent; + parent = parent->parent; + } + + if (child) + *child = node; + + return parent; +} + +/* If reverse is false then return the first flow table next to the passed node + * in the tree, else return the last flow table before the node in the tree. + * If skip is true, skip the flow tables in the same prio_chains prio. + */ +static struct mlx5_flow_table *find_closest_ft(struct fs_node *node, bool reverse, + bool skip) +{ + struct fs_node *prio_chains_parent = NULL; + struct mlx5_flow_table *ft = NULL; + struct fs_node *curr_node; + struct fs_node *parent; + + if (skip) + prio_chains_parent = find_prio_chains_parent(node, NULL); + parent = node->parent; + curr_node = node; + while (!ft && parent) { + if (parent != prio_chains_parent) + ft = find_closest_ft_recursive(parent, &curr_node->list, + reverse); + curr_node = parent; + parent = curr_node->parent; + } + return ft; +} + +/* Assuming all the tree is locked by mutex chain lock */ +static struct mlx5_flow_table *find_next_chained_ft(struct fs_node *node) +{ + return find_closest_ft(node, false, true); +} + +/* Assuming all the tree is locked by mutex chain lock */ +static struct mlx5_flow_table *find_prev_chained_ft(struct fs_node *node) +{ + return find_closest_ft(node, true, true); +} + +static struct mlx5_flow_table *find_next_fwd_ft(struct mlx5_flow_table *ft, + struct mlx5_flow_act *flow_act) +{ + struct fs_prio *prio; + bool next_ns; + + next_ns = flow_act->action & MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_NS; + fs_get_obj(prio, next_ns ? ft->ns->node.parent : ft->node.parent); + + return find_next_chained_ft(&prio->node); +} + +static int connect_fts_in_prio(struct mlx5_core_dev *dev, + struct fs_prio *prio, + struct mlx5_flow_table *ft) +{ + struct mlx5_flow_root_namespace *root = find_root(&prio->node); + struct mlx5_flow_table *iter; + int err; + + fs_for_each_ft(iter, prio) { + err = root->cmds->modify_flow_table(root, iter, ft); + if (err) { + mlx5_core_err(dev, + "Failed to modify flow table id %d, type %d, err %d\n", + iter->id, iter->type, err); + /* The driver is out of sync with the FW */ + return err; + } + } + return 0; +} + +static struct mlx5_flow_table *find_closet_ft_prio_chains(struct fs_node *node, + struct fs_node *parent, + struct fs_node **child, + bool reverse) +{ + struct mlx5_flow_table *ft; + + ft = find_closest_ft(node, reverse, false); + + if (ft && parent == find_prio_chains_parent(&ft->node, child)) + return ft; + + return NULL; +} + +/* Connect flow tables from previous priority of prio to ft */ +static int connect_prev_fts(struct mlx5_core_dev *dev, + struct mlx5_flow_table *ft, + struct fs_prio *prio) +{ + struct fs_node *prio_parent, *parent = NULL, *child, *node; + struct mlx5_flow_table *prev_ft; + int err = 0; + + prio_parent = find_prio_chains_parent(&prio->node, &child); + + /* return directly if not under the first sub ns of prio_chains prio */ + if (prio_parent && !list_is_first(&child->list, &prio_parent->children)) + return 0; + + prev_ft = find_prev_chained_ft(&prio->node); + while (prev_ft) { + struct fs_prio *prev_prio; + + fs_get_obj(prev_prio, prev_ft->node.parent); + err = connect_fts_in_prio(dev, prev_prio, ft); + if (err) + break; + + if (!parent) { + parent = find_prio_chains_parent(&prev_prio->node, &child); + if (!parent) + break; + } + + node = child; + prev_ft = find_closet_ft_prio_chains(node, parent, &child, true); + } + return err; +} + +static int update_root_ft_create(struct mlx5_flow_table *ft, struct fs_prio + *prio) +{ + struct mlx5_flow_root_namespace *root = find_root(&prio->node); + struct mlx5_ft_underlay_qp *uqp; + int min_level = INT_MAX; + int err = 0; + u32 qpn; + + if (root->root_ft) + min_level = root->root_ft->level; + + if (ft->level >= min_level) + return 0; + + if (list_empty(&root->underlay_qpns)) { + /* Don't set any QPN (zero) in case QPN list is empty */ + qpn = 0; + err = root->cmds->update_root_ft(root, ft, qpn, false); + } else { + list_for_each_entry(uqp, &root->underlay_qpns, list) { + qpn = uqp->qpn; + err = root->cmds->update_root_ft(root, ft, + qpn, false); + if (err) + break; + } + } + + if (err) + mlx5_core_warn(root->dev, + "Update root flow table of id(%u) qpn(%d) failed\n", + ft->id, qpn); + else + root->root_ft = ft; + + return err; +} + +static int _mlx5_modify_rule_destination(struct mlx5_flow_rule *rule, + struct mlx5_flow_destination *dest) +{ + struct mlx5_flow_root_namespace *root; + struct mlx5_flow_table *ft; + struct mlx5_flow_group *fg; + struct fs_fte *fte; + int modify_mask = BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_DESTINATION_LIST); + int err = 0; + + fs_get_obj(fte, rule->node.parent); + if (!(fte->action.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST)) + return -EINVAL; + down_write_ref_node(&fte->node, false); + fs_get_obj(fg, fte->node.parent); + fs_get_obj(ft, fg->node.parent); + + memcpy(&rule->dest_attr, dest, sizeof(*dest)); + root = find_root(&ft->node); + err = root->cmds->update_fte(root, ft, fg, + modify_mask, fte); + up_write_ref_node(&fte->node, false); + + return err; +} + +int mlx5_modify_rule_destination(struct mlx5_flow_handle *handle, + struct mlx5_flow_destination *new_dest, + struct mlx5_flow_destination *old_dest) +{ + int i; + + if (!old_dest) { + if (handle->num_rules != 1) + return -EINVAL; + return _mlx5_modify_rule_destination(handle->rule[0], + new_dest); + } + + for (i = 0; i < handle->num_rules; i++) { + if (mlx5_flow_dests_cmp(new_dest, &handle->rule[i]->dest_attr)) + return _mlx5_modify_rule_destination(handle->rule[i], + new_dest); + } + + return -EINVAL; +} + +/* Modify/set FWD rules that point on old_next_ft to point on new_next_ft */ +static int connect_fwd_rules(struct mlx5_core_dev *dev, + struct mlx5_flow_table *new_next_ft, + struct mlx5_flow_table *old_next_ft) +{ + struct mlx5_flow_destination dest = {}; + struct mlx5_flow_rule *iter; + int err = 0; + + /* new_next_ft and old_next_ft could be NULL only + * when we create/destroy the anchor flow table. + */ + if (!new_next_ft || !old_next_ft) + return 0; + + dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dest.ft = new_next_ft; + + mutex_lock(&old_next_ft->lock); + list_splice_init(&old_next_ft->fwd_rules, &new_next_ft->fwd_rules); + mutex_unlock(&old_next_ft->lock); + list_for_each_entry(iter, &new_next_ft->fwd_rules, next_ft) { + if ((iter->sw_action & MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_NS) && + iter->ft->ns == new_next_ft->ns) + continue; + + err = _mlx5_modify_rule_destination(iter, &dest); + if (err) + pr_err("mlx5_core: failed to modify rule to point on flow table %d\n", + new_next_ft->id); + } + return 0; +} + +static int connect_flow_table(struct mlx5_core_dev *dev, struct mlx5_flow_table *ft, + struct fs_prio *prio) +{ + struct mlx5_flow_table *next_ft, *first_ft; + int err = 0; + + /* Connect_prev_fts and update_root_ft_create are mutually exclusive */ + + first_ft = list_first_entry_or_null(&prio->node.children, + struct mlx5_flow_table, node.list); + if (!first_ft || first_ft->level > ft->level) { + err = connect_prev_fts(dev, ft, prio); + if (err) + return err; + + next_ft = first_ft ? first_ft : find_next_chained_ft(&prio->node); + err = connect_fwd_rules(dev, ft, next_ft); + if (err) + return err; + } + + if (MLX5_CAP_FLOWTABLE(dev, + flow_table_properties_nic_receive.modify_root)) + err = update_root_ft_create(ft, prio); + return err; +} + +static void list_add_flow_table(struct mlx5_flow_table *ft, + struct fs_prio *prio) +{ + struct list_head *prev = &prio->node.children; + struct mlx5_flow_table *iter; + + fs_for_each_ft(iter, prio) { + if (iter->level > ft->level) + break; + prev = &iter->node.list; + } + list_add(&ft->node.list, prev); +} + +static struct mlx5_flow_table *__mlx5_create_flow_table(struct mlx5_flow_namespace *ns, + struct mlx5_flow_table_attr *ft_attr, + enum fs_flow_table_op_mod op_mod, + u16 vport) +{ + struct mlx5_flow_root_namespace *root = find_root(&ns->node); + bool unmanaged = ft_attr->flags & MLX5_FLOW_TABLE_UNMANAGED; + struct mlx5_flow_table *next_ft; + struct fs_prio *fs_prio = NULL; + struct mlx5_flow_table *ft; + int err; + + if (!root) { + pr_err("mlx5: flow steering failed to find root of namespace\n"); + return ERR_PTR(-ENODEV); + } + + mutex_lock(&root->chain_lock); + fs_prio = find_prio(ns, ft_attr->prio); + if (!fs_prio) { + err = -EINVAL; + goto unlock_root; + } + if (!unmanaged) { + /* The level is related to the + * priority level range. + */ + if (ft_attr->level >= fs_prio->num_levels) { + err = -ENOSPC; + goto unlock_root; + } + + ft_attr->level += fs_prio->start_level; + } + + /* The level is related to the + * priority level range. + */ + ft = alloc_flow_table(ft_attr->level, + vport, + root->table_type, + op_mod, ft_attr->flags); + if (IS_ERR(ft)) { + err = PTR_ERR(ft); + goto unlock_root; + } + + tree_init_node(&ft->node, del_hw_flow_table, del_sw_flow_table); + next_ft = unmanaged ? ft_attr->next_ft : + find_next_chained_ft(&fs_prio->node); + ft->def_miss_action = ns->def_miss_action; + ft->ns = ns; + err = root->cmds->create_flow_table(root, ft, ft_attr, next_ft); + if (err) + goto free_ft; + + if (!unmanaged) { + err = connect_flow_table(root->dev, ft, fs_prio); + if (err) + goto destroy_ft; + } + + ft->node.active = true; + down_write_ref_node(&fs_prio->node, false); + if (!unmanaged) { + tree_add_node(&ft->node, &fs_prio->node); + list_add_flow_table(ft, fs_prio); + } else { + ft->node.root = fs_prio->node.root; + } + fs_prio->num_ft++; + up_write_ref_node(&fs_prio->node, false); + mutex_unlock(&root->chain_lock); + trace_mlx5_fs_add_ft(ft); + return ft; +destroy_ft: + root->cmds->destroy_flow_table(root, ft); +free_ft: + rhltable_destroy(&ft->fgs_hash); + kfree(ft); +unlock_root: + mutex_unlock(&root->chain_lock); + return ERR_PTR(err); +} + +struct mlx5_flow_table *mlx5_create_flow_table(struct mlx5_flow_namespace *ns, + struct mlx5_flow_table_attr *ft_attr) +{ + return __mlx5_create_flow_table(ns, ft_attr, FS_FT_OP_MOD_NORMAL, 0); +} +EXPORT_SYMBOL(mlx5_create_flow_table); + +u32 mlx5_flow_table_id(struct mlx5_flow_table *ft) +{ + return ft->id; +} +EXPORT_SYMBOL(mlx5_flow_table_id); + +struct mlx5_flow_table * +mlx5_create_vport_flow_table(struct mlx5_flow_namespace *ns, + struct mlx5_flow_table_attr *ft_attr, u16 vport) +{ + return __mlx5_create_flow_table(ns, ft_attr, FS_FT_OP_MOD_NORMAL, vport); +} + +struct mlx5_flow_table* +mlx5_create_lag_demux_flow_table(struct mlx5_flow_namespace *ns, + int prio, u32 level) +{ + struct mlx5_flow_table_attr ft_attr = {}; + + ft_attr.level = level; + ft_attr.prio = prio; + ft_attr.max_fte = 1; + + return __mlx5_create_flow_table(ns, &ft_attr, FS_FT_OP_MOD_LAG_DEMUX, 0); +} +EXPORT_SYMBOL(mlx5_create_lag_demux_flow_table); + +#define MAX_FLOW_GROUP_SIZE BIT(24) +struct mlx5_flow_table* +mlx5_create_auto_grouped_flow_table(struct mlx5_flow_namespace *ns, + struct mlx5_flow_table_attr *ft_attr) +{ + int num_reserved_entries = ft_attr->autogroup.num_reserved_entries; + int max_num_groups = ft_attr->autogroup.max_num_groups; + struct mlx5_flow_table *ft; + int autogroups_max_fte; + + ft = mlx5_create_flow_table(ns, ft_attr); + if (IS_ERR(ft)) + return ft; + + autogroups_max_fte = ft->max_fte - num_reserved_entries; + if (max_num_groups > autogroups_max_fte) + goto err_validate; + if (num_reserved_entries > ft->max_fte) + goto err_validate; + + /* Align the number of groups according to the largest group size */ + if (autogroups_max_fte / (max_num_groups + 1) > MAX_FLOW_GROUP_SIZE) + max_num_groups = (autogroups_max_fte / MAX_FLOW_GROUP_SIZE) - 1; + + ft->autogroup.active = true; + ft->autogroup.required_groups = max_num_groups; + ft->autogroup.max_fte = autogroups_max_fte; + /* We save place for flow groups in addition to max types */ + ft->autogroup.group_size = autogroups_max_fte / (max_num_groups + 1); + + return ft; + +err_validate: + mlx5_destroy_flow_table(ft); + return ERR_PTR(-ENOSPC); +} +EXPORT_SYMBOL(mlx5_create_auto_grouped_flow_table); + +struct mlx5_flow_group *mlx5_create_flow_group(struct mlx5_flow_table *ft, + u32 *fg_in) +{ + struct mlx5_flow_root_namespace *root = find_root(&ft->node); + void *match_criteria = MLX5_ADDR_OF(create_flow_group_in, + fg_in, match_criteria); + u8 match_criteria_enable = MLX5_GET(create_flow_group_in, + fg_in, + match_criteria_enable); + int start_index = MLX5_GET(create_flow_group_in, fg_in, + start_flow_index); + int end_index = MLX5_GET(create_flow_group_in, fg_in, + end_flow_index); + struct mlx5_flow_group *fg; + int err; + + if (ft->autogroup.active && start_index < ft->autogroup.max_fte) + return ERR_PTR(-EPERM); + + down_write_ref_node(&ft->node, false); + fg = alloc_insert_flow_group(ft, match_criteria_enable, match_criteria, + start_index, end_index, + ft->node.children.prev); + up_write_ref_node(&ft->node, false); + if (IS_ERR(fg)) + return fg; + + err = root->cmds->create_flow_group(root, ft, fg_in, fg); + if (err) { + tree_put_node(&fg->node, false); + return ERR_PTR(err); + } + trace_mlx5_fs_add_fg(fg); + fg->node.active = true; + + return fg; +} +EXPORT_SYMBOL(mlx5_create_flow_group); + +static struct mlx5_flow_rule *alloc_rule(struct mlx5_flow_destination *dest) +{ + struct mlx5_flow_rule *rule; + + rule = kzalloc(sizeof(*rule), GFP_KERNEL); + if (!rule) + return NULL; + + INIT_LIST_HEAD(&rule->next_ft); + rule->node.type = FS_TYPE_FLOW_DEST; + if (dest) + memcpy(&rule->dest_attr, dest, sizeof(*dest)); + else + rule->dest_attr.type = MLX5_FLOW_DESTINATION_TYPE_NONE; + + return rule; +} + +static struct mlx5_flow_handle *alloc_handle(int num_rules) +{ + struct mlx5_flow_handle *handle; + + handle = kzalloc(struct_size(handle, rule, num_rules), GFP_KERNEL); + if (!handle) + return NULL; + + handle->num_rules = num_rules; + + return handle; +} + +static void destroy_flow_handle(struct fs_fte *fte, + struct mlx5_flow_handle *handle, + struct mlx5_flow_destination *dest, + int i) +{ + for (; --i >= 0;) { + if (refcount_dec_and_test(&handle->rule[i]->node.refcount)) { + fte->dests_size--; + list_del(&handle->rule[i]->node.list); + kfree(handle->rule[i]); + } + } + kfree(handle); +} + +static struct mlx5_flow_handle * +create_flow_handle(struct fs_fte *fte, + struct mlx5_flow_destination *dest, + int dest_num, + int *modify_mask, + bool *new_rule) +{ + struct mlx5_flow_handle *handle; + struct mlx5_flow_rule *rule = NULL; + static int count = BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_FLOW_COUNTERS); + static int dst = BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_DESTINATION_LIST); + int type; + int i = 0; + + handle = alloc_handle((dest_num) ? dest_num : 1); + if (!handle) + return ERR_PTR(-ENOMEM); + + do { + if (dest) { + rule = find_flow_rule(fte, dest + i); + if (rule) { + refcount_inc(&rule->node.refcount); + goto rule_found; + } + } + + *new_rule = true; + rule = alloc_rule(dest + i); + if (!rule) + goto free_rules; + + /* Add dest to dests list- we need flow tables to be in the + * end of the list for forward to next prio rules. + */ + tree_init_node(&rule->node, NULL, del_sw_hw_rule); + if (dest && + dest[i].type != MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE) + list_add(&rule->node.list, &fte->node.children); + else + list_add_tail(&rule->node.list, &fte->node.children); + if (dest) { + fte->dests_size++; + + if (is_fwd_dest_type(dest[i].type)) + fte->fwd_dests++; + + type = dest[i].type == + MLX5_FLOW_DESTINATION_TYPE_COUNTER; + *modify_mask |= type ? count : dst; + } +rule_found: + handle->rule[i] = rule; + } while (++i < dest_num); + + return handle; + +free_rules: + destroy_flow_handle(fte, handle, dest, i); + return ERR_PTR(-ENOMEM); +} + +/* fte should not be deleted while calling this function */ +static struct mlx5_flow_handle * +add_rule_fte(struct fs_fte *fte, + struct mlx5_flow_group *fg, + struct mlx5_flow_destination *dest, + int dest_num, + bool update_action) +{ + struct mlx5_flow_root_namespace *root; + struct mlx5_flow_handle *handle; + struct mlx5_flow_table *ft; + int modify_mask = 0; + int err; + bool new_rule = false; + + handle = create_flow_handle(fte, dest, dest_num, &modify_mask, + &new_rule); + if (IS_ERR(handle) || !new_rule) + goto out; + + if (update_action) + modify_mask |= BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_ACTION); + + fs_get_obj(ft, fg->node.parent); + root = find_root(&fg->node); + if (!(fte->status & FS_FTE_STATUS_EXISTING)) + err = root->cmds->create_fte(root, ft, fg, fte); + else + err = root->cmds->update_fte(root, ft, fg, modify_mask, fte); + if (err) + goto free_handle; + + fte->node.active = true; + fte->status |= FS_FTE_STATUS_EXISTING; + atomic_inc(&fg->node.version); + +out: + return handle; + +free_handle: + destroy_flow_handle(fte, handle, dest, handle->num_rules); + return ERR_PTR(err); +} + +static struct mlx5_flow_group *alloc_auto_flow_group(struct mlx5_flow_table *ft, + const struct mlx5_flow_spec *spec) +{ + struct list_head *prev = &ft->node.children; + u32 max_fte = ft->autogroup.max_fte; + unsigned int candidate_index = 0; + unsigned int group_size = 0; + struct mlx5_flow_group *fg; + + if (!ft->autogroup.active) + return ERR_PTR(-ENOENT); + + if (ft->autogroup.num_groups < ft->autogroup.required_groups) + group_size = ft->autogroup.group_size; + + /* max_fte == ft->autogroup.max_types */ + if (group_size == 0) + group_size = 1; + + /* sorted by start_index */ + fs_for_each_fg(fg, ft) { + if (candidate_index + group_size > fg->start_index) + candidate_index = fg->start_index + fg->max_ftes; + else + break; + prev = &fg->node.list; + } + + if (candidate_index + group_size > max_fte) + return ERR_PTR(-ENOSPC); + + fg = alloc_insert_flow_group(ft, + spec->match_criteria_enable, + spec->match_criteria, + candidate_index, + candidate_index + group_size - 1, + prev); + if (IS_ERR(fg)) + goto out; + + if (group_size == ft->autogroup.group_size) + ft->autogroup.num_groups++; + +out: + return fg; +} + +static int create_auto_flow_group(struct mlx5_flow_table *ft, + struct mlx5_flow_group *fg) +{ + struct mlx5_flow_root_namespace *root = find_root(&ft->node); + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + void *match_criteria_addr; + u8 src_esw_owner_mask_on; + void *misc; + int err; + u32 *in; + + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + MLX5_SET(create_flow_group_in, in, match_criteria_enable, + fg->mask.match_criteria_enable); + MLX5_SET(create_flow_group_in, in, start_flow_index, fg->start_index); + MLX5_SET(create_flow_group_in, in, end_flow_index, fg->start_index + + fg->max_ftes - 1); + + misc = MLX5_ADDR_OF(fte_match_param, fg->mask.match_criteria, + misc_parameters); + src_esw_owner_mask_on = !!MLX5_GET(fte_match_set_misc, misc, + source_eswitch_owner_vhca_id); + MLX5_SET(create_flow_group_in, in, + source_eswitch_owner_vhca_id_valid, src_esw_owner_mask_on); + + match_criteria_addr = MLX5_ADDR_OF(create_flow_group_in, + in, match_criteria); + memcpy(match_criteria_addr, fg->mask.match_criteria, + sizeof(fg->mask.match_criteria)); + + err = root->cmds->create_flow_group(root, ft, in, fg); + if (!err) { + fg->node.active = true; + trace_mlx5_fs_add_fg(fg); + } + + kvfree(in); + return err; +} + +static bool mlx5_flow_dests_cmp(struct mlx5_flow_destination *d1, + struct mlx5_flow_destination *d2) +{ + if (d1->type == d2->type) { + if (((d1->type == MLX5_FLOW_DESTINATION_TYPE_VPORT || + d1->type == MLX5_FLOW_DESTINATION_TYPE_UPLINK) && + d1->vport.num == d2->vport.num && + d1->vport.flags == d2->vport.flags && + ((d1->vport.flags & MLX5_FLOW_DEST_VPORT_VHCA_ID) ? + (d1->vport.vhca_id == d2->vport.vhca_id) : true) && + ((d1->vport.flags & MLX5_FLOW_DEST_VPORT_REFORMAT_ID) ? + (d1->vport.pkt_reformat->id == + d2->vport.pkt_reformat->id) : true)) || + (d1->type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE && + d1->ft == d2->ft) || + (d1->type == MLX5_FLOW_DESTINATION_TYPE_TIR && + d1->tir_num == d2->tir_num) || + (d1->type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM && + d1->ft_num == d2->ft_num) || + (d1->type == MLX5_FLOW_DESTINATION_TYPE_FLOW_SAMPLER && + d1->sampler_id == d2->sampler_id)) + return true; + } + + return false; +} + +static struct mlx5_flow_rule *find_flow_rule(struct fs_fte *fte, + struct mlx5_flow_destination *dest) +{ + struct mlx5_flow_rule *rule; + + list_for_each_entry(rule, &fte->node.children, node.list) { + if (mlx5_flow_dests_cmp(&rule->dest_attr, dest)) + return rule; + } + return NULL; +} + +static bool check_conflicting_actions_vlan(const struct mlx5_fs_vlan *vlan0, + const struct mlx5_fs_vlan *vlan1) +{ + return vlan0->ethtype != vlan1->ethtype || + vlan0->vid != vlan1->vid || + vlan0->prio != vlan1->prio; +} + +static bool check_conflicting_actions(const struct mlx5_flow_act *act1, + const struct mlx5_flow_act *act2) +{ + u32 action1 = act1->action; + u32 action2 = act2->action; + u32 xored_actions; + + xored_actions = action1 ^ action2; + + /* if one rule only wants to count, it's ok */ + if (action1 == MLX5_FLOW_CONTEXT_ACTION_COUNT || + action2 == MLX5_FLOW_CONTEXT_ACTION_COUNT) + return false; + + if (xored_actions & (MLX5_FLOW_CONTEXT_ACTION_DROP | + MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT | + MLX5_FLOW_CONTEXT_ACTION_DECAP | + MLX5_FLOW_CONTEXT_ACTION_MOD_HDR | + MLX5_FLOW_CONTEXT_ACTION_VLAN_POP | + MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH | + MLX5_FLOW_CONTEXT_ACTION_VLAN_POP_2 | + MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2)) + return true; + + if (action1 & MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT && + act1->pkt_reformat != act2->pkt_reformat) + return true; + + if (action1 & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR && + act1->modify_hdr != act2->modify_hdr) + return true; + + if (action1 & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH && + check_conflicting_actions_vlan(&act1->vlan[0], &act2->vlan[0])) + return true; + + if (action1 & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2 && + check_conflicting_actions_vlan(&act1->vlan[1], &act2->vlan[1])) + return true; + + return false; +} + +static int check_conflicting_ftes(struct fs_fte *fte, + const struct mlx5_flow_context *flow_context, + const struct mlx5_flow_act *flow_act) +{ + if (check_conflicting_actions(flow_act, &fte->action)) { + mlx5_core_warn(get_dev(&fte->node), + "Found two FTEs with conflicting actions\n"); + return -EEXIST; + } + + if ((flow_context->flags & FLOW_CONTEXT_HAS_TAG) && + fte->flow_context.flow_tag != flow_context->flow_tag) { + mlx5_core_warn(get_dev(&fte->node), + "FTE flow tag %u already exists with different flow tag %u\n", + fte->flow_context.flow_tag, + flow_context->flow_tag); + return -EEXIST; + } + + return 0; +} + +static struct mlx5_flow_handle *add_rule_fg(struct mlx5_flow_group *fg, + const struct mlx5_flow_spec *spec, + struct mlx5_flow_act *flow_act, + struct mlx5_flow_destination *dest, + int dest_num, + struct fs_fte *fte) +{ + struct mlx5_flow_handle *handle; + int old_action; + int i; + int ret; + + ret = check_conflicting_ftes(fte, &spec->flow_context, flow_act); + if (ret) + return ERR_PTR(ret); + + old_action = fte->action.action; + fte->action.action |= flow_act->action; + handle = add_rule_fte(fte, fg, dest, dest_num, + old_action != flow_act->action); + if (IS_ERR(handle)) { + fte->action.action = old_action; + return handle; + } + trace_mlx5_fs_set_fte(fte, false); + + for (i = 0; i < handle->num_rules; i++) { + if (refcount_read(&handle->rule[i]->node.refcount) == 1) { + tree_add_node(&handle->rule[i]->node, &fte->node); + trace_mlx5_fs_add_rule(handle->rule[i]); + } + } + return handle; +} + +static bool counter_is_valid(u32 action) +{ + return (action & (MLX5_FLOW_CONTEXT_ACTION_DROP | + MLX5_FLOW_CONTEXT_ACTION_ALLOW | + MLX5_FLOW_CONTEXT_ACTION_FWD_DEST)); +} + +static bool dest_is_valid(struct mlx5_flow_destination *dest, + struct mlx5_flow_act *flow_act, + struct mlx5_flow_table *ft) +{ + bool ignore_level = flow_act->flags & FLOW_ACT_IGNORE_FLOW_LEVEL; + u32 action = flow_act->action; + + if (dest && (dest->type == MLX5_FLOW_DESTINATION_TYPE_COUNTER)) + return counter_is_valid(action); + + if (!(action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST)) + return true; + + if (ignore_level) { + if (ft->type != FS_FT_FDB && + ft->type != FS_FT_NIC_RX) + return false; + + if (dest->type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE && + ft->type != dest->ft->type) + return false; + } + + if (!dest || ((dest->type == + MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE) && + (dest->ft->level <= ft->level && !ignore_level))) + return false; + return true; +} + +struct match_list { + struct list_head list; + struct mlx5_flow_group *g; +}; + +static void free_match_list(struct match_list *head, bool ft_locked) +{ + struct match_list *iter, *match_tmp; + + list_for_each_entry_safe(iter, match_tmp, &head->list, + list) { + tree_put_node(&iter->g->node, ft_locked); + list_del(&iter->list); + kfree(iter); + } +} + +static int build_match_list(struct match_list *match_head, + struct mlx5_flow_table *ft, + const struct mlx5_flow_spec *spec, + struct mlx5_flow_group *fg, + bool ft_locked) +{ + struct rhlist_head *tmp, *list; + struct mlx5_flow_group *g; + int err = 0; + + rcu_read_lock(); + INIT_LIST_HEAD(&match_head->list); + /* Collect all fgs which has a matching match_criteria */ + list = rhltable_lookup(&ft->fgs_hash, spec, rhash_fg); + /* RCU is atomic, we can't execute FW commands here */ + rhl_for_each_entry_rcu(g, tmp, list, hash) { + struct match_list *curr_match; + + if (fg && fg != g) + continue; + + if (unlikely(!tree_get_node(&g->node))) + continue; + + curr_match = kmalloc(sizeof(*curr_match), GFP_ATOMIC); + if (!curr_match) { + rcu_read_unlock(); + free_match_list(match_head, ft_locked); + return -ENOMEM; + } + curr_match->g = g; + list_add_tail(&curr_match->list, &match_head->list); + } + rcu_read_unlock(); + return err; +} + +static u64 matched_fgs_get_version(struct list_head *match_head) +{ + struct match_list *iter; + u64 version = 0; + + list_for_each_entry(iter, match_head, list) + version += (u64)atomic_read(&iter->g->node.version); + return version; +} + +static struct fs_fte * +lookup_fte_locked(struct mlx5_flow_group *g, + const u32 *match_value, + bool take_write) +{ + struct fs_fte *fte_tmp; + + if (take_write) + nested_down_write_ref_node(&g->node, FS_LOCK_PARENT); + else + nested_down_read_ref_node(&g->node, FS_LOCK_PARENT); + fte_tmp = rhashtable_lookup_fast(&g->ftes_hash, match_value, + rhash_fte); + if (!fte_tmp || !tree_get_node(&fte_tmp->node)) { + fte_tmp = NULL; + goto out; + } + if (!fte_tmp->node.active) { + tree_put_node(&fte_tmp->node, false); + fte_tmp = NULL; + goto out; + } + + nested_down_write_ref_node(&fte_tmp->node, FS_LOCK_CHILD); +out: + if (take_write) + up_write_ref_node(&g->node, false); + else + up_read_ref_node(&g->node); + return fte_tmp; +} + +static struct mlx5_flow_handle * +try_add_to_existing_fg(struct mlx5_flow_table *ft, + struct list_head *match_head, + const struct mlx5_flow_spec *spec, + struct mlx5_flow_act *flow_act, + struct mlx5_flow_destination *dest, + int dest_num, + int ft_version) +{ + struct mlx5_flow_steering *steering = get_steering(&ft->node); + struct mlx5_flow_group *g; + struct mlx5_flow_handle *rule; + struct match_list *iter; + bool take_write = false; + struct fs_fte *fte; + u64 version = 0; + int err; + + fte = alloc_fte(ft, spec, flow_act); + if (IS_ERR(fte)) + return ERR_PTR(-ENOMEM); + +search_again_locked: + if (flow_act->flags & FLOW_ACT_NO_APPEND) + goto skip_search; + version = matched_fgs_get_version(match_head); + /* Try to find an fte with identical match value and attempt update its + * action. + */ + list_for_each_entry(iter, match_head, list) { + struct fs_fte *fte_tmp; + + g = iter->g; + fte_tmp = lookup_fte_locked(g, spec->match_value, take_write); + if (!fte_tmp) + continue; + rule = add_rule_fg(g, spec, flow_act, dest, dest_num, fte_tmp); + /* No error check needed here, because insert_fte() is not called */ + up_write_ref_node(&fte_tmp->node, false); + tree_put_node(&fte_tmp->node, false); + kmem_cache_free(steering->ftes_cache, fte); + return rule; + } + +skip_search: + /* No group with matching fte found, or we skipped the search. + * Try to add a new fte to any matching fg. + */ + + /* Check the ft version, for case that new flow group + * was added while the fgs weren't locked + */ + if (atomic_read(&ft->node.version) != ft_version) { + rule = ERR_PTR(-EAGAIN); + goto out; + } + + /* Check the fgs version. If version have changed it could be that an + * FTE with the same match value was added while the fgs weren't + * locked. + */ + if (!(flow_act->flags & FLOW_ACT_NO_APPEND) && + version != matched_fgs_get_version(match_head)) { + take_write = true; + goto search_again_locked; + } + + list_for_each_entry(iter, match_head, list) { + g = iter->g; + + nested_down_write_ref_node(&g->node, FS_LOCK_PARENT); + + if (!g->node.active) { + up_write_ref_node(&g->node, false); + continue; + } + + err = insert_fte(g, fte); + if (err) { + up_write_ref_node(&g->node, false); + if (err == -ENOSPC) + continue; + kmem_cache_free(steering->ftes_cache, fte); + return ERR_PTR(err); + } + + nested_down_write_ref_node(&fte->node, FS_LOCK_CHILD); + up_write_ref_node(&g->node, false); + rule = add_rule_fg(g, spec, flow_act, dest, dest_num, fte); + up_write_ref_node(&fte->node, false); + if (IS_ERR(rule)) + tree_put_node(&fte->node, false); + return rule; + } + rule = ERR_PTR(-ENOENT); +out: + kmem_cache_free(steering->ftes_cache, fte); + return rule; +} + +static struct mlx5_flow_handle * +_mlx5_add_flow_rules(struct mlx5_flow_table *ft, + const struct mlx5_flow_spec *spec, + struct mlx5_flow_act *flow_act, + struct mlx5_flow_destination *dest, + int dest_num) + +{ + struct mlx5_flow_steering *steering = get_steering(&ft->node); + struct mlx5_flow_handle *rule; + struct match_list match_head; + struct mlx5_flow_group *g; + bool take_write = false; + struct fs_fte *fte; + int version; + int err; + int i; + + if (!check_valid_spec(spec)) + return ERR_PTR(-EINVAL); + + if (flow_act->fg && ft->autogroup.active) + return ERR_PTR(-EINVAL); + + for (i = 0; i < dest_num; i++) { + if (!dest_is_valid(&dest[i], flow_act, ft)) + return ERR_PTR(-EINVAL); + } + nested_down_read_ref_node(&ft->node, FS_LOCK_GRANDPARENT); +search_again_locked: + version = atomic_read(&ft->node.version); + + /* Collect all fgs which has a matching match_criteria */ + err = build_match_list(&match_head, ft, spec, flow_act->fg, take_write); + if (err) { + if (take_write) + up_write_ref_node(&ft->node, false); + else + up_read_ref_node(&ft->node); + return ERR_PTR(err); + } + + if (!take_write) + up_read_ref_node(&ft->node); + + rule = try_add_to_existing_fg(ft, &match_head.list, spec, flow_act, dest, + dest_num, version); + free_match_list(&match_head, take_write); + if (!IS_ERR(rule) || + (PTR_ERR(rule) != -ENOENT && PTR_ERR(rule) != -EAGAIN)) { + if (take_write) + up_write_ref_node(&ft->node, false); + return rule; + } + + if (!take_write) { + nested_down_write_ref_node(&ft->node, FS_LOCK_GRANDPARENT); + take_write = true; + } + + if (PTR_ERR(rule) == -EAGAIN || + version != atomic_read(&ft->node.version)) + goto search_again_locked; + + g = alloc_auto_flow_group(ft, spec); + if (IS_ERR(g)) { + rule = ERR_CAST(g); + up_write_ref_node(&ft->node, false); + return rule; + } + + fte = alloc_fte(ft, spec, flow_act); + if (IS_ERR(fte)) { + up_write_ref_node(&ft->node, false); + err = PTR_ERR(fte); + goto err_alloc_fte; + } + + nested_down_write_ref_node(&g->node, FS_LOCK_PARENT); + up_write_ref_node(&ft->node, false); + + err = create_auto_flow_group(ft, g); + if (err) + goto err_release_fg; + + err = insert_fte(g, fte); + if (err) + goto err_release_fg; + + nested_down_write_ref_node(&fte->node, FS_LOCK_CHILD); + up_write_ref_node(&g->node, false); + rule = add_rule_fg(g, spec, flow_act, dest, dest_num, fte); + up_write_ref_node(&fte->node, false); + if (IS_ERR(rule)) + tree_put_node(&fte->node, false); + tree_put_node(&g->node, false); + return rule; + +err_release_fg: + up_write_ref_node(&g->node, false); + kmem_cache_free(steering->ftes_cache, fte); +err_alloc_fte: + tree_put_node(&g->node, false); + return ERR_PTR(err); +} + +static bool fwd_next_prio_supported(struct mlx5_flow_table *ft) +{ + return ((ft->type == FS_FT_NIC_RX) && + (MLX5_CAP_FLOWTABLE(get_dev(&ft->node), nic_rx_multi_path_tirs))); +} + +struct mlx5_flow_handle * +mlx5_add_flow_rules(struct mlx5_flow_table *ft, + const struct mlx5_flow_spec *spec, + struct mlx5_flow_act *flow_act, + struct mlx5_flow_destination *dest, + int num_dest) +{ + struct mlx5_flow_root_namespace *root = find_root(&ft->node); + static const struct mlx5_flow_spec zero_spec = {}; + struct mlx5_flow_destination *gen_dest = NULL; + struct mlx5_flow_table *next_ft = NULL; + struct mlx5_flow_handle *handle = NULL; + u32 sw_action = flow_act->action; + int i; + + if (!spec) + spec = &zero_spec; + + if (!is_fwd_next_action(sw_action)) + return _mlx5_add_flow_rules(ft, spec, flow_act, dest, num_dest); + + if (!fwd_next_prio_supported(ft)) + return ERR_PTR(-EOPNOTSUPP); + + mutex_lock(&root->chain_lock); + next_ft = find_next_fwd_ft(ft, flow_act); + if (!next_ft) { + handle = ERR_PTR(-EOPNOTSUPP); + goto unlock; + } + + gen_dest = kcalloc(num_dest + 1, sizeof(*dest), + GFP_KERNEL); + if (!gen_dest) { + handle = ERR_PTR(-ENOMEM); + goto unlock; + } + for (i = 0; i < num_dest; i++) + gen_dest[i] = dest[i]; + gen_dest[i].type = + MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + gen_dest[i].ft = next_ft; + dest = gen_dest; + num_dest++; + flow_act->action &= ~(MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO | + MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_NS); + flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + handle = _mlx5_add_flow_rules(ft, spec, flow_act, dest, num_dest); + if (IS_ERR(handle)) + goto unlock; + + if (list_empty(&handle->rule[num_dest - 1]->next_ft)) { + mutex_lock(&next_ft->lock); + list_add(&handle->rule[num_dest - 1]->next_ft, + &next_ft->fwd_rules); + mutex_unlock(&next_ft->lock); + handle->rule[num_dest - 1]->sw_action = sw_action; + handle->rule[num_dest - 1]->ft = ft; + } +unlock: + mutex_unlock(&root->chain_lock); + kfree(gen_dest); + return handle; +} +EXPORT_SYMBOL(mlx5_add_flow_rules); + +void mlx5_del_flow_rules(struct mlx5_flow_handle *handle) +{ + struct fs_fte *fte; + int i; + + /* In order to consolidate the HW changes we lock the FTE for other + * changes, and increase its refcount, in order not to perform the + * "del" functions of the FTE. Will handle them here. + * The removal of the rules is done under locked FTE. + * After removing all the handle's rules, if there are remaining + * rules, it means we just need to modify the FTE in FW, and + * unlock/decrease the refcount we increased before. + * Otherwise, it means the FTE should be deleted. First delete the + * FTE in FW. Then, unlock the FTE, and proceed the tree_put_node of + * the FTE, which will handle the last decrease of the refcount, as + * well as required handling of its parent. + */ + fs_get_obj(fte, handle->rule[0]->node.parent); + down_write_ref_node(&fte->node, false); + for (i = handle->num_rules - 1; i >= 0; i--) + tree_remove_node(&handle->rule[i]->node, true); + if (list_empty(&fte->node.children)) { + fte->node.del_hw_func(&fte->node); + /* Avoid double call to del_hw_fte */ + fte->node.del_hw_func = NULL; + up_write_ref_node(&fte->node, false); + tree_put_node(&fte->node, false); + } else if (fte->dests_size) { + if (fte->modify_mask) + modify_fte(fte); + up_write_ref_node(&fte->node, false); + } else { + up_write_ref_node(&fte->node, false); + } + kfree(handle); +} +EXPORT_SYMBOL(mlx5_del_flow_rules); + +/* Assuming prio->node.children(flow tables) is sorted by level */ +static struct mlx5_flow_table *find_next_ft(struct mlx5_flow_table *ft) +{ + struct fs_node *prio_parent, *child; + struct fs_prio *prio; + + fs_get_obj(prio, ft->node.parent); + + if (!list_is_last(&ft->node.list, &prio->node.children)) + return list_next_entry(ft, node.list); + + prio_parent = find_prio_chains_parent(&prio->node, &child); + + if (prio_parent && list_is_first(&child->list, &prio_parent->children)) + return find_closest_ft(&prio->node, false, false); + + return find_next_chained_ft(&prio->node); +} + +static int update_root_ft_destroy(struct mlx5_flow_table *ft) +{ + struct mlx5_flow_root_namespace *root = find_root(&ft->node); + struct mlx5_ft_underlay_qp *uqp; + struct mlx5_flow_table *new_root_ft = NULL; + int err = 0; + u32 qpn; + + if (root->root_ft != ft) + return 0; + + new_root_ft = find_next_ft(ft); + if (!new_root_ft) { + root->root_ft = NULL; + return 0; + } + + if (list_empty(&root->underlay_qpns)) { + /* Don't set any QPN (zero) in case QPN list is empty */ + qpn = 0; + err = root->cmds->update_root_ft(root, new_root_ft, + qpn, false); + } else { + list_for_each_entry(uqp, &root->underlay_qpns, list) { + qpn = uqp->qpn; + err = root->cmds->update_root_ft(root, + new_root_ft, qpn, + false); + if (err) + break; + } + } + + if (err) + mlx5_core_warn(root->dev, + "Update root flow table of id(%u) qpn(%d) failed\n", + ft->id, qpn); + else + root->root_ft = new_root_ft; + + return 0; +} + +/* Connect flow table from previous priority to + * the next flow table. + */ +static int disconnect_flow_table(struct mlx5_flow_table *ft) +{ + struct mlx5_core_dev *dev = get_dev(&ft->node); + struct mlx5_flow_table *next_ft; + struct fs_prio *prio; + int err = 0; + + err = update_root_ft_destroy(ft); + if (err) + return err; + + fs_get_obj(prio, ft->node.parent); + if (!(list_first_entry(&prio->node.children, + struct mlx5_flow_table, + node.list) == ft)) + return 0; + + next_ft = find_next_ft(ft); + err = connect_fwd_rules(dev, next_ft, ft); + if (err) + return err; + + err = connect_prev_fts(dev, next_ft, prio); + if (err) + mlx5_core_warn(dev, "Failed to disconnect flow table %d\n", + ft->id); + return err; +} + +int mlx5_destroy_flow_table(struct mlx5_flow_table *ft) +{ + struct mlx5_flow_root_namespace *root = find_root(&ft->node); + int err = 0; + + mutex_lock(&root->chain_lock); + if (!(ft->flags & MLX5_FLOW_TABLE_UNMANAGED)) + err = disconnect_flow_table(ft); + if (err) { + mutex_unlock(&root->chain_lock); + return err; + } + if (tree_remove_node(&ft->node, false)) + mlx5_core_warn(get_dev(&ft->node), "Flow table %d wasn't destroyed, refcount > 1\n", + ft->id); + mutex_unlock(&root->chain_lock); + + return err; +} +EXPORT_SYMBOL(mlx5_destroy_flow_table); + +void mlx5_destroy_flow_group(struct mlx5_flow_group *fg) +{ + if (tree_remove_node(&fg->node, false)) + mlx5_core_warn(get_dev(&fg->node), "Flow group %d wasn't destroyed, refcount > 1\n", + fg->id); +} +EXPORT_SYMBOL(mlx5_destroy_flow_group); + +struct mlx5_flow_namespace *mlx5_get_fdb_sub_ns(struct mlx5_core_dev *dev, + int n) +{ + struct mlx5_flow_steering *steering = dev->priv.steering; + + if (!steering || !steering->fdb_sub_ns) + return NULL; + + return steering->fdb_sub_ns[n]; +} +EXPORT_SYMBOL(mlx5_get_fdb_sub_ns); + +static bool is_nic_rx_ns(enum mlx5_flow_namespace_type type) +{ + switch (type) { + case MLX5_FLOW_NAMESPACE_BYPASS: + case MLX5_FLOW_NAMESPACE_KERNEL_RX_MACSEC: + case MLX5_FLOW_NAMESPACE_LAG: + case MLX5_FLOW_NAMESPACE_OFFLOADS: + case MLX5_FLOW_NAMESPACE_ETHTOOL: + case MLX5_FLOW_NAMESPACE_KERNEL: + case MLX5_FLOW_NAMESPACE_LEFTOVERS: + case MLX5_FLOW_NAMESPACE_ANCHOR: + return true; + default: + return false; + } +} + +struct mlx5_flow_namespace *mlx5_get_flow_namespace(struct mlx5_core_dev *dev, + enum mlx5_flow_namespace_type type) +{ + struct mlx5_flow_steering *steering = dev->priv.steering; + struct mlx5_flow_root_namespace *root_ns; + int prio = 0; + struct fs_prio *fs_prio; + struct mlx5_flow_namespace *ns; + + if (!steering) + return NULL; + + switch (type) { + case MLX5_FLOW_NAMESPACE_FDB: + if (steering->fdb_root_ns) + return &steering->fdb_root_ns->ns; + return NULL; + case MLX5_FLOW_NAMESPACE_PORT_SEL: + if (steering->port_sel_root_ns) + return &steering->port_sel_root_ns->ns; + return NULL; + case MLX5_FLOW_NAMESPACE_SNIFFER_RX: + if (steering->sniffer_rx_root_ns) + return &steering->sniffer_rx_root_ns->ns; + return NULL; + case MLX5_FLOW_NAMESPACE_SNIFFER_TX: + if (steering->sniffer_tx_root_ns) + return &steering->sniffer_tx_root_ns->ns; + return NULL; + case MLX5_FLOW_NAMESPACE_FDB_BYPASS: + root_ns = steering->fdb_root_ns; + prio = FDB_BYPASS_PATH; + break; + case MLX5_FLOW_NAMESPACE_EGRESS: + case MLX5_FLOW_NAMESPACE_EGRESS_IPSEC: + case MLX5_FLOW_NAMESPACE_EGRESS_MACSEC: + root_ns = steering->egress_root_ns; + prio = type - MLX5_FLOW_NAMESPACE_EGRESS; + break; + case MLX5_FLOW_NAMESPACE_RDMA_RX: + root_ns = steering->rdma_rx_root_ns; + prio = RDMA_RX_BYPASS_PRIO; + break; + case MLX5_FLOW_NAMESPACE_RDMA_RX_KERNEL: + root_ns = steering->rdma_rx_root_ns; + prio = RDMA_RX_KERNEL_PRIO; + break; + case MLX5_FLOW_NAMESPACE_RDMA_TX: + root_ns = steering->rdma_tx_root_ns; + break; + case MLX5_FLOW_NAMESPACE_RDMA_RX_COUNTERS: + root_ns = steering->rdma_rx_root_ns; + prio = RDMA_RX_COUNTERS_PRIO; + break; + case MLX5_FLOW_NAMESPACE_RDMA_TX_COUNTERS: + root_ns = steering->rdma_tx_root_ns; + prio = RDMA_TX_COUNTERS_PRIO; + break; + default: /* Must be NIC RX */ + WARN_ON(!is_nic_rx_ns(type)); + root_ns = steering->root_ns; + prio = type; + break; + } + + if (!root_ns) + return NULL; + + fs_prio = find_prio(&root_ns->ns, prio); + if (!fs_prio) + return NULL; + + ns = list_first_entry(&fs_prio->node.children, + typeof(*ns), + node.list); + + return ns; +} +EXPORT_SYMBOL(mlx5_get_flow_namespace); + +struct mlx5_flow_namespace *mlx5_get_flow_vport_acl_namespace(struct mlx5_core_dev *dev, + enum mlx5_flow_namespace_type type, + int vport) +{ + struct mlx5_flow_steering *steering = dev->priv.steering; + + if (!steering) + return NULL; + + switch (type) { + case MLX5_FLOW_NAMESPACE_ESW_EGRESS: + if (vport >= steering->esw_egress_acl_vports) + return NULL; + if (steering->esw_egress_root_ns && + steering->esw_egress_root_ns[vport]) + return &steering->esw_egress_root_ns[vport]->ns; + else + return NULL; + case MLX5_FLOW_NAMESPACE_ESW_INGRESS: + if (vport >= steering->esw_ingress_acl_vports) + return NULL; + if (steering->esw_ingress_root_ns && + steering->esw_ingress_root_ns[vport]) + return &steering->esw_ingress_root_ns[vport]->ns; + else + return NULL; + default: + return NULL; + } +} + +static struct fs_prio *_fs_create_prio(struct mlx5_flow_namespace *ns, + unsigned int prio, + int num_levels, + enum fs_node_type type) +{ + struct fs_prio *fs_prio; + + fs_prio = kzalloc(sizeof(*fs_prio), GFP_KERNEL); + if (!fs_prio) + return ERR_PTR(-ENOMEM); + + fs_prio->node.type = type; + tree_init_node(&fs_prio->node, NULL, del_sw_prio); + tree_add_node(&fs_prio->node, &ns->node); + fs_prio->num_levels = num_levels; + fs_prio->prio = prio; + list_add_tail(&fs_prio->node.list, &ns->node.children); + + return fs_prio; +} + +static struct fs_prio *fs_create_prio_chained(struct mlx5_flow_namespace *ns, + unsigned int prio, + int num_levels) +{ + return _fs_create_prio(ns, prio, num_levels, FS_TYPE_PRIO_CHAINS); +} + +static struct fs_prio *fs_create_prio(struct mlx5_flow_namespace *ns, + unsigned int prio, int num_levels) +{ + return _fs_create_prio(ns, prio, num_levels, FS_TYPE_PRIO); +} + +static struct mlx5_flow_namespace *fs_init_namespace(struct mlx5_flow_namespace + *ns) +{ + ns->node.type = FS_TYPE_NAMESPACE; + + return ns; +} + +static struct mlx5_flow_namespace *fs_create_namespace(struct fs_prio *prio, + int def_miss_act) +{ + struct mlx5_flow_namespace *ns; + + ns = kzalloc(sizeof(*ns), GFP_KERNEL); + if (!ns) + return ERR_PTR(-ENOMEM); + + fs_init_namespace(ns); + ns->def_miss_action = def_miss_act; + tree_init_node(&ns->node, NULL, del_sw_ns); + tree_add_node(&ns->node, &prio->node); + list_add_tail(&ns->node.list, &prio->node.children); + + return ns; +} + +static int create_leaf_prios(struct mlx5_flow_namespace *ns, int prio, + struct init_tree_node *prio_metadata) +{ + struct fs_prio *fs_prio; + int i; + + for (i = 0; i < prio_metadata->num_leaf_prios; i++) { + fs_prio = fs_create_prio(ns, prio++, prio_metadata->num_levels); + if (IS_ERR(fs_prio)) + return PTR_ERR(fs_prio); + } + return 0; +} + +#define FLOW_TABLE_BIT_SZ 1 +#define GET_FLOW_TABLE_CAP(dev, offset) \ + ((be32_to_cpu(*((__be32 *)(dev->caps.hca[MLX5_CAP_FLOW_TABLE]->cur) + \ + offset / 32)) >> \ + (32 - FLOW_TABLE_BIT_SZ - (offset & 0x1f))) & FLOW_TABLE_BIT_SZ) +static bool has_required_caps(struct mlx5_core_dev *dev, struct node_caps *caps) +{ + int i; + + for (i = 0; i < caps->arr_sz; i++) { + if (!GET_FLOW_TABLE_CAP(dev, caps->caps[i])) + return false; + } + return true; +} + +static int init_root_tree_recursive(struct mlx5_flow_steering *steering, + struct init_tree_node *init_node, + struct fs_node *fs_parent_node, + struct init_tree_node *init_parent_node, + int prio) +{ + int max_ft_level = MLX5_CAP_FLOWTABLE(steering->dev, + flow_table_properties_nic_receive. + max_ft_level); + struct mlx5_flow_namespace *fs_ns; + struct fs_prio *fs_prio; + struct fs_node *base; + int i; + int err; + + if (init_node->type == FS_TYPE_PRIO) { + if ((init_node->min_ft_level > max_ft_level) || + !has_required_caps(steering->dev, &init_node->caps)) + return 0; + + fs_get_obj(fs_ns, fs_parent_node); + if (init_node->num_leaf_prios) + return create_leaf_prios(fs_ns, prio, init_node); + fs_prio = fs_create_prio(fs_ns, prio, init_node->num_levels); + if (IS_ERR(fs_prio)) + return PTR_ERR(fs_prio); + base = &fs_prio->node; + } else if (init_node->type == FS_TYPE_NAMESPACE) { + fs_get_obj(fs_prio, fs_parent_node); + fs_ns = fs_create_namespace(fs_prio, init_node->def_miss_action); + if (IS_ERR(fs_ns)) + return PTR_ERR(fs_ns); + base = &fs_ns->node; + } else { + return -EINVAL; + } + prio = 0; + for (i = 0; i < init_node->ar_size; i++) { + err = init_root_tree_recursive(steering, &init_node->children[i], + base, init_node, prio); + if (err) + return err; + if (init_node->children[i].type == FS_TYPE_PRIO && + init_node->children[i].num_leaf_prios) { + prio += init_node->children[i].num_leaf_prios; + } + } + + return 0; +} + +static int init_root_tree(struct mlx5_flow_steering *steering, + struct init_tree_node *init_node, + struct fs_node *fs_parent_node) +{ + int err; + int i; + + for (i = 0; i < init_node->ar_size; i++) { + err = init_root_tree_recursive(steering, &init_node->children[i], + fs_parent_node, + init_node, i); + if (err) + return err; + } + return 0; +} + +static void del_sw_root_ns(struct fs_node *node) +{ + struct mlx5_flow_root_namespace *root_ns; + struct mlx5_flow_namespace *ns; + + fs_get_obj(ns, node); + root_ns = container_of(ns, struct mlx5_flow_root_namespace, ns); + mutex_destroy(&root_ns->chain_lock); + kfree(node); +} + +static struct mlx5_flow_root_namespace +*create_root_ns(struct mlx5_flow_steering *steering, + enum fs_flow_table_type table_type) +{ + const struct mlx5_flow_cmds *cmds = mlx5_fs_cmd_get_default(table_type); + struct mlx5_flow_root_namespace *root_ns; + struct mlx5_flow_namespace *ns; + + /* Create the root namespace */ + root_ns = kzalloc(sizeof(*root_ns), GFP_KERNEL); + if (!root_ns) + return NULL; + + root_ns->dev = steering->dev; + root_ns->table_type = table_type; + root_ns->cmds = cmds; + + INIT_LIST_HEAD(&root_ns->underlay_qpns); + + ns = &root_ns->ns; + fs_init_namespace(ns); + mutex_init(&root_ns->chain_lock); + tree_init_node(&ns->node, NULL, del_sw_root_ns); + tree_add_node(&ns->node, NULL); + + return root_ns; +} + +static void set_prio_attrs_in_prio(struct fs_prio *prio, int acc_level); + +static int set_prio_attrs_in_ns(struct mlx5_flow_namespace *ns, int acc_level) +{ + struct fs_prio *prio; + + fs_for_each_prio(prio, ns) { + /* This updates prio start_level and num_levels */ + set_prio_attrs_in_prio(prio, acc_level); + acc_level += prio->num_levels; + } + return acc_level; +} + +static void set_prio_attrs_in_prio(struct fs_prio *prio, int acc_level) +{ + struct mlx5_flow_namespace *ns; + int acc_level_ns = acc_level; + + prio->start_level = acc_level; + fs_for_each_ns(ns, prio) { + /* This updates start_level and num_levels of ns's priority descendants */ + acc_level_ns = set_prio_attrs_in_ns(ns, acc_level); + + /* If this a prio with chains, and we can jump from one chain + * (namespace) to another, so we accumulate the levels + */ + if (prio->node.type == FS_TYPE_PRIO_CHAINS) + acc_level = acc_level_ns; + } + + if (!prio->num_levels) + prio->num_levels = acc_level_ns - prio->start_level; + WARN_ON(prio->num_levels < acc_level_ns - prio->start_level); +} + +static void set_prio_attrs(struct mlx5_flow_root_namespace *root_ns) +{ + struct mlx5_flow_namespace *ns = &root_ns->ns; + struct fs_prio *prio; + int start_level = 0; + + fs_for_each_prio(prio, ns) { + set_prio_attrs_in_prio(prio, start_level); + start_level += prio->num_levels; + } +} + +#define ANCHOR_PRIO 0 +#define ANCHOR_SIZE 1 +#define ANCHOR_LEVEL 0 +static int create_anchor_flow_table(struct mlx5_flow_steering *steering) +{ + struct mlx5_flow_namespace *ns = NULL; + struct mlx5_flow_table_attr ft_attr = {}; + struct mlx5_flow_table *ft; + + ns = mlx5_get_flow_namespace(steering->dev, MLX5_FLOW_NAMESPACE_ANCHOR); + if (WARN_ON(!ns)) + return -EINVAL; + + ft_attr.max_fte = ANCHOR_SIZE; + ft_attr.level = ANCHOR_LEVEL; + ft_attr.prio = ANCHOR_PRIO; + + ft = mlx5_create_flow_table(ns, &ft_attr); + if (IS_ERR(ft)) { + mlx5_core_err(steering->dev, "Failed to create last anchor flow table"); + return PTR_ERR(ft); + } + return 0; +} + +static int init_root_ns(struct mlx5_flow_steering *steering) +{ + int err; + + steering->root_ns = create_root_ns(steering, FS_FT_NIC_RX); + if (!steering->root_ns) + return -ENOMEM; + + err = init_root_tree(steering, &root_fs, &steering->root_ns->ns.node); + if (err) + goto out_err; + + set_prio_attrs(steering->root_ns); + err = create_anchor_flow_table(steering); + if (err) + goto out_err; + + return 0; + +out_err: + cleanup_root_ns(steering->root_ns); + steering->root_ns = NULL; + return err; +} + +static void clean_tree(struct fs_node *node) +{ + if (node) { + struct fs_node *iter; + struct fs_node *temp; + + tree_get_node(node); + list_for_each_entry_safe(iter, temp, &node->children, list) + clean_tree(iter); + tree_put_node(node, false); + tree_remove_node(node, false); + } +} + +static void cleanup_root_ns(struct mlx5_flow_root_namespace *root_ns) +{ + if (!root_ns) + return; + + clean_tree(&root_ns->ns.node); +} + +static int init_sniffer_tx_root_ns(struct mlx5_flow_steering *steering) +{ + struct fs_prio *prio; + + steering->sniffer_tx_root_ns = create_root_ns(steering, FS_FT_SNIFFER_TX); + if (!steering->sniffer_tx_root_ns) + return -ENOMEM; + + /* Create single prio */ + prio = fs_create_prio(&steering->sniffer_tx_root_ns->ns, 0, 1); + return PTR_ERR_OR_ZERO(prio); +} + +static int init_sniffer_rx_root_ns(struct mlx5_flow_steering *steering) +{ + struct fs_prio *prio; + + steering->sniffer_rx_root_ns = create_root_ns(steering, FS_FT_SNIFFER_RX); + if (!steering->sniffer_rx_root_ns) + return -ENOMEM; + + /* Create single prio */ + prio = fs_create_prio(&steering->sniffer_rx_root_ns->ns, 0, 1); + return PTR_ERR_OR_ZERO(prio); +} + +#define PORT_SEL_NUM_LEVELS 3 +static int init_port_sel_root_ns(struct mlx5_flow_steering *steering) +{ + struct fs_prio *prio; + + steering->port_sel_root_ns = create_root_ns(steering, FS_FT_PORT_SEL); + if (!steering->port_sel_root_ns) + return -ENOMEM; + + /* Create single prio */ + prio = fs_create_prio(&steering->port_sel_root_ns->ns, 0, + PORT_SEL_NUM_LEVELS); + return PTR_ERR_OR_ZERO(prio); +} + +static int init_rdma_rx_root_ns(struct mlx5_flow_steering *steering) +{ + int err; + + steering->rdma_rx_root_ns = create_root_ns(steering, FS_FT_RDMA_RX); + if (!steering->rdma_rx_root_ns) + return -ENOMEM; + + err = init_root_tree(steering, &rdma_rx_root_fs, + &steering->rdma_rx_root_ns->ns.node); + if (err) + goto out_err; + + set_prio_attrs(steering->rdma_rx_root_ns); + + return 0; + +out_err: + cleanup_root_ns(steering->rdma_rx_root_ns); + steering->rdma_rx_root_ns = NULL; + return err; +} + +static int init_rdma_tx_root_ns(struct mlx5_flow_steering *steering) +{ + int err; + + steering->rdma_tx_root_ns = create_root_ns(steering, FS_FT_RDMA_TX); + if (!steering->rdma_tx_root_ns) + return -ENOMEM; + + err = init_root_tree(steering, &rdma_tx_root_fs, + &steering->rdma_tx_root_ns->ns.node); + if (err) + goto out_err; + + set_prio_attrs(steering->rdma_tx_root_ns); + + return 0; + +out_err: + cleanup_root_ns(steering->rdma_tx_root_ns); + steering->rdma_tx_root_ns = NULL; + return err; +} + +/* FT and tc chains are stored in the same array so we can re-use the + * mlx5_get_fdb_sub_ns() and tc api for FT chains. + * When creating a new ns for each chain store it in the first available slot. + * Assume tc chains are created and stored first and only then the FT chain. + */ +static void store_fdb_sub_ns_prio_chain(struct mlx5_flow_steering *steering, + struct mlx5_flow_namespace *ns) +{ + int chain = 0; + + while (steering->fdb_sub_ns[chain]) + ++chain; + + steering->fdb_sub_ns[chain] = ns; +} + +static int create_fdb_sub_ns_prio_chain(struct mlx5_flow_steering *steering, + struct fs_prio *maj_prio) +{ + struct mlx5_flow_namespace *ns; + struct fs_prio *min_prio; + int prio; + + ns = fs_create_namespace(maj_prio, MLX5_FLOW_TABLE_MISS_ACTION_DEF); + if (IS_ERR(ns)) + return PTR_ERR(ns); + + for (prio = 0; prio < FDB_TC_MAX_PRIO; prio++) { + min_prio = fs_create_prio(ns, prio, FDB_TC_LEVELS_PER_PRIO); + if (IS_ERR(min_prio)) + return PTR_ERR(min_prio); + } + + store_fdb_sub_ns_prio_chain(steering, ns); + + return 0; +} + +static int create_fdb_chains(struct mlx5_flow_steering *steering, + int fs_prio, + int chains) +{ + struct fs_prio *maj_prio; + int levels; + int chain; + int err; + + levels = FDB_TC_LEVELS_PER_PRIO * FDB_TC_MAX_PRIO * chains; + maj_prio = fs_create_prio_chained(&steering->fdb_root_ns->ns, + fs_prio, + levels); + if (IS_ERR(maj_prio)) + return PTR_ERR(maj_prio); + + for (chain = 0; chain < chains; chain++) { + err = create_fdb_sub_ns_prio_chain(steering, maj_prio); + if (err) + return err; + } + + return 0; +} + +static int create_fdb_fast_path(struct mlx5_flow_steering *steering) +{ + int err; + + steering->fdb_sub_ns = kcalloc(FDB_NUM_CHAINS, + sizeof(*steering->fdb_sub_ns), + GFP_KERNEL); + if (!steering->fdb_sub_ns) + return -ENOMEM; + + err = create_fdb_chains(steering, FDB_TC_OFFLOAD, FDB_TC_MAX_CHAIN + 1); + if (err) + return err; + + err = create_fdb_chains(steering, FDB_FT_OFFLOAD, 1); + if (err) + return err; + + return 0; +} + +static int create_fdb_bypass(struct mlx5_flow_steering *steering) +{ + struct mlx5_flow_namespace *ns; + struct fs_prio *prio; + int i; + + prio = fs_create_prio(&steering->fdb_root_ns->ns, FDB_BYPASS_PATH, 0); + if (IS_ERR(prio)) + return PTR_ERR(prio); + + ns = fs_create_namespace(prio, MLX5_FLOW_TABLE_MISS_ACTION_DEF); + if (IS_ERR(ns)) + return PTR_ERR(ns); + + for (i = 0; i < MLX5_BY_PASS_NUM_REGULAR_PRIOS; i++) { + prio = fs_create_prio(ns, i, 1); + if (IS_ERR(prio)) + return PTR_ERR(prio); + } + return 0; +} + +static void cleanup_fdb_root_ns(struct mlx5_flow_steering *steering) +{ + cleanup_root_ns(steering->fdb_root_ns); + steering->fdb_root_ns = NULL; + kfree(steering->fdb_sub_ns); + steering->fdb_sub_ns = NULL; +} + +static int init_fdb_root_ns(struct mlx5_flow_steering *steering) +{ + struct fs_prio *maj_prio; + int err; + + steering->fdb_root_ns = create_root_ns(steering, FS_FT_FDB); + if (!steering->fdb_root_ns) + return -ENOMEM; + + err = create_fdb_bypass(steering); + if (err) + goto out_err; + + err = create_fdb_fast_path(steering); + if (err) + goto out_err; + + maj_prio = fs_create_prio(&steering->fdb_root_ns->ns, FDB_TC_MISS, 1); + if (IS_ERR(maj_prio)) { + err = PTR_ERR(maj_prio); + goto out_err; + } + + maj_prio = fs_create_prio(&steering->fdb_root_ns->ns, FDB_BR_OFFLOAD, 3); + if (IS_ERR(maj_prio)) { + err = PTR_ERR(maj_prio); + goto out_err; + } + + maj_prio = fs_create_prio(&steering->fdb_root_ns->ns, FDB_SLOW_PATH, 1); + if (IS_ERR(maj_prio)) { + err = PTR_ERR(maj_prio); + goto out_err; + } + + /* We put this priority last, knowing that nothing will get here + * unless explicitly forwarded to. This is possible because the + * slow path tables have catch all rules and nothing gets passed + * those tables. + */ + maj_prio = fs_create_prio(&steering->fdb_root_ns->ns, FDB_PER_VPORT, 1); + if (IS_ERR(maj_prio)) { + err = PTR_ERR(maj_prio); + goto out_err; + } + + set_prio_attrs(steering->fdb_root_ns); + return 0; + +out_err: + cleanup_fdb_root_ns(steering); + return err; +} + +static int init_egress_acl_root_ns(struct mlx5_flow_steering *steering, int vport) +{ + struct fs_prio *prio; + + steering->esw_egress_root_ns[vport] = create_root_ns(steering, FS_FT_ESW_EGRESS_ACL); + if (!steering->esw_egress_root_ns[vport]) + return -ENOMEM; + + /* create 1 prio*/ + prio = fs_create_prio(&steering->esw_egress_root_ns[vport]->ns, 0, 1); + return PTR_ERR_OR_ZERO(prio); +} + +static int init_ingress_acl_root_ns(struct mlx5_flow_steering *steering, int vport) +{ + struct fs_prio *prio; + + steering->esw_ingress_root_ns[vport] = create_root_ns(steering, FS_FT_ESW_INGRESS_ACL); + if (!steering->esw_ingress_root_ns[vport]) + return -ENOMEM; + + /* create 1 prio*/ + prio = fs_create_prio(&steering->esw_ingress_root_ns[vport]->ns, 0, 1); + return PTR_ERR_OR_ZERO(prio); +} + +int mlx5_fs_egress_acls_init(struct mlx5_core_dev *dev, int total_vports) +{ + struct mlx5_flow_steering *steering = dev->priv.steering; + int err; + int i; + + steering->esw_egress_root_ns = + kcalloc(total_vports, + sizeof(*steering->esw_egress_root_ns), + GFP_KERNEL); + if (!steering->esw_egress_root_ns) + return -ENOMEM; + + for (i = 0; i < total_vports; i++) { + err = init_egress_acl_root_ns(steering, i); + if (err) + goto cleanup_root_ns; + } + steering->esw_egress_acl_vports = total_vports; + return 0; + +cleanup_root_ns: + for (i--; i >= 0; i--) + cleanup_root_ns(steering->esw_egress_root_ns[i]); + kfree(steering->esw_egress_root_ns); + steering->esw_egress_root_ns = NULL; + return err; +} + +void mlx5_fs_egress_acls_cleanup(struct mlx5_core_dev *dev) +{ + struct mlx5_flow_steering *steering = dev->priv.steering; + int i; + + if (!steering->esw_egress_root_ns) + return; + + for (i = 0; i < steering->esw_egress_acl_vports; i++) + cleanup_root_ns(steering->esw_egress_root_ns[i]); + + kfree(steering->esw_egress_root_ns); + steering->esw_egress_root_ns = NULL; +} + +int mlx5_fs_ingress_acls_init(struct mlx5_core_dev *dev, int total_vports) +{ + struct mlx5_flow_steering *steering = dev->priv.steering; + int err; + int i; + + steering->esw_ingress_root_ns = + kcalloc(total_vports, + sizeof(*steering->esw_ingress_root_ns), + GFP_KERNEL); + if (!steering->esw_ingress_root_ns) + return -ENOMEM; + + for (i = 0; i < total_vports; i++) { + err = init_ingress_acl_root_ns(steering, i); + if (err) + goto cleanup_root_ns; + } + steering->esw_ingress_acl_vports = total_vports; + return 0; + +cleanup_root_ns: + for (i--; i >= 0; i--) + cleanup_root_ns(steering->esw_ingress_root_ns[i]); + kfree(steering->esw_ingress_root_ns); + steering->esw_ingress_root_ns = NULL; + return err; +} + +void mlx5_fs_ingress_acls_cleanup(struct mlx5_core_dev *dev) +{ + struct mlx5_flow_steering *steering = dev->priv.steering; + int i; + + if (!steering->esw_ingress_root_ns) + return; + + for (i = 0; i < steering->esw_ingress_acl_vports; i++) + cleanup_root_ns(steering->esw_ingress_root_ns[i]); + + kfree(steering->esw_ingress_root_ns); + steering->esw_ingress_root_ns = NULL; +} + +u32 mlx5_fs_get_capabilities(struct mlx5_core_dev *dev, enum mlx5_flow_namespace_type type) +{ + struct mlx5_flow_root_namespace *root; + struct mlx5_flow_namespace *ns; + + ns = mlx5_get_flow_namespace(dev, type); + if (!ns) + return 0; + + root = find_root(&ns->node); + if (!root) + return 0; + + return root->cmds->get_capabilities(root, root->table_type); +} + +static int init_egress_root_ns(struct mlx5_flow_steering *steering) +{ + int err; + + steering->egress_root_ns = create_root_ns(steering, + FS_FT_NIC_TX); + if (!steering->egress_root_ns) + return -ENOMEM; + + err = init_root_tree(steering, &egress_root_fs, + &steering->egress_root_ns->ns.node); + if (err) + goto cleanup; + set_prio_attrs(steering->egress_root_ns); + return 0; +cleanup: + cleanup_root_ns(steering->egress_root_ns); + steering->egress_root_ns = NULL; + return err; +} + +void mlx5_fs_core_cleanup(struct mlx5_core_dev *dev) +{ + struct mlx5_flow_steering *steering = dev->priv.steering; + + cleanup_root_ns(steering->root_ns); + cleanup_fdb_root_ns(steering); + cleanup_root_ns(steering->port_sel_root_ns); + cleanup_root_ns(steering->sniffer_rx_root_ns); + cleanup_root_ns(steering->sniffer_tx_root_ns); + cleanup_root_ns(steering->rdma_rx_root_ns); + cleanup_root_ns(steering->rdma_tx_root_ns); + cleanup_root_ns(steering->egress_root_ns); +} + +int mlx5_fs_core_init(struct mlx5_core_dev *dev) +{ + struct mlx5_flow_steering *steering = dev->priv.steering; + int err = 0; + + if ((((MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_ETH) && + (MLX5_CAP_GEN(dev, nic_flow_table))) || + ((MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_IB) && + MLX5_CAP_GEN(dev, ipoib_enhanced_offloads))) && + MLX5_CAP_FLOWTABLE_NIC_RX(dev, ft_support)) { + err = init_root_ns(steering); + if (err) + goto err; + } + + if (MLX5_ESWITCH_MANAGER(dev)) { + if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev, ft_support)) { + err = init_fdb_root_ns(steering); + if (err) + goto err; + } + } + + if (MLX5_CAP_FLOWTABLE_SNIFFER_RX(dev, ft_support)) { + err = init_sniffer_rx_root_ns(steering); + if (err) + goto err; + } + + if (MLX5_CAP_FLOWTABLE_SNIFFER_TX(dev, ft_support)) { + err = init_sniffer_tx_root_ns(steering); + if (err) + goto err; + } + + if (MLX5_CAP_FLOWTABLE_PORT_SELECTION(dev, ft_support)) { + err = init_port_sel_root_ns(steering); + if (err) + goto err; + } + + if (MLX5_CAP_FLOWTABLE_RDMA_RX(dev, ft_support) && + MLX5_CAP_FLOWTABLE_RDMA_RX(dev, table_miss_action_domain)) { + err = init_rdma_rx_root_ns(steering); + if (err) + goto err; + } + + if (MLX5_CAP_FLOWTABLE_RDMA_TX(dev, ft_support)) { + err = init_rdma_tx_root_ns(steering); + if (err) + goto err; + } + + if (MLX5_CAP_FLOWTABLE_NIC_TX(dev, ft_support)) { + err = init_egress_root_ns(steering); + if (err) + goto err; + } + + return 0; + +err: + mlx5_fs_core_cleanup(dev); + return err; +} + +void mlx5_fs_core_free(struct mlx5_core_dev *dev) +{ + struct mlx5_flow_steering *steering = dev->priv.steering; + + kmem_cache_destroy(steering->ftes_cache); + kmem_cache_destroy(steering->fgs_cache); + kfree(steering); + mlx5_ft_pool_destroy(dev); + mlx5_cleanup_fc_stats(dev); +} + +int mlx5_fs_core_alloc(struct mlx5_core_dev *dev) +{ + struct mlx5_flow_steering *steering; + int err = 0; + + err = mlx5_init_fc_stats(dev); + if (err) + return err; + + err = mlx5_ft_pool_init(dev); + if (err) + goto err; + + steering = kzalloc(sizeof(*steering), GFP_KERNEL); + if (!steering) { + err = -ENOMEM; + goto err; + } + + steering->dev = dev; + dev->priv.steering = steering; + + if (mlx5_fs_dr_is_supported(dev)) + steering->mode = MLX5_FLOW_STEERING_MODE_SMFS; + else + steering->mode = MLX5_FLOW_STEERING_MODE_DMFS; + + steering->fgs_cache = kmem_cache_create("mlx5_fs_fgs", + sizeof(struct mlx5_flow_group), 0, + 0, NULL); + steering->ftes_cache = kmem_cache_create("mlx5_fs_ftes", sizeof(struct fs_fte), 0, + 0, NULL); + if (!steering->ftes_cache || !steering->fgs_cache) { + err = -ENOMEM; + goto err; + } + + return 0; + +err: + mlx5_fs_core_free(dev); + return err; +} + +int mlx5_fs_add_rx_underlay_qpn(struct mlx5_core_dev *dev, u32 underlay_qpn) +{ + struct mlx5_flow_root_namespace *root = dev->priv.steering->root_ns; + struct mlx5_ft_underlay_qp *new_uqp; + int err = 0; + + new_uqp = kzalloc(sizeof(*new_uqp), GFP_KERNEL); + if (!new_uqp) + return -ENOMEM; + + mutex_lock(&root->chain_lock); + + if (!root->root_ft) { + err = -EINVAL; + goto update_ft_fail; + } + + err = root->cmds->update_root_ft(root, root->root_ft, underlay_qpn, + false); + if (err) { + mlx5_core_warn(dev, "Failed adding underlay QPN (%u) to root FT err(%d)\n", + underlay_qpn, err); + goto update_ft_fail; + } + + new_uqp->qpn = underlay_qpn; + list_add_tail(&new_uqp->list, &root->underlay_qpns); + + mutex_unlock(&root->chain_lock); + + return 0; + +update_ft_fail: + mutex_unlock(&root->chain_lock); + kfree(new_uqp); + return err; +} +EXPORT_SYMBOL(mlx5_fs_add_rx_underlay_qpn); + +int mlx5_fs_remove_rx_underlay_qpn(struct mlx5_core_dev *dev, u32 underlay_qpn) +{ + struct mlx5_flow_root_namespace *root = dev->priv.steering->root_ns; + struct mlx5_ft_underlay_qp *uqp; + bool found = false; + int err = 0; + + mutex_lock(&root->chain_lock); + list_for_each_entry(uqp, &root->underlay_qpns, list) { + if (uqp->qpn == underlay_qpn) { + found = true; + break; + } + } + + if (!found) { + mlx5_core_warn(dev, "Failed finding underlay qp (%u) in qpn list\n", + underlay_qpn); + err = -EINVAL; + goto out; + } + + err = root->cmds->update_root_ft(root, root->root_ft, underlay_qpn, + true); + if (err) + mlx5_core_warn(dev, "Failed removing underlay QPN (%u) from root FT err(%d)\n", + underlay_qpn, err); + + list_del(&uqp->list); + mutex_unlock(&root->chain_lock); + kfree(uqp); + + return 0; + +out: + mutex_unlock(&root->chain_lock); + return err; +} +EXPORT_SYMBOL(mlx5_fs_remove_rx_underlay_qpn); + +static struct mlx5_flow_root_namespace +*get_root_namespace(struct mlx5_core_dev *dev, enum mlx5_flow_namespace_type ns_type) +{ + struct mlx5_flow_namespace *ns; + + if (ns_type == MLX5_FLOW_NAMESPACE_ESW_EGRESS || + ns_type == MLX5_FLOW_NAMESPACE_ESW_INGRESS) + ns = mlx5_get_flow_vport_acl_namespace(dev, ns_type, 0); + else + ns = mlx5_get_flow_namespace(dev, ns_type); + if (!ns) + return NULL; + + return find_root(&ns->node); +} + +struct mlx5_modify_hdr *mlx5_modify_header_alloc(struct mlx5_core_dev *dev, + u8 ns_type, u8 num_actions, + void *modify_actions) +{ + struct mlx5_flow_root_namespace *root; + struct mlx5_modify_hdr *modify_hdr; + int err; + + root = get_root_namespace(dev, ns_type); + if (!root) + return ERR_PTR(-EOPNOTSUPP); + + modify_hdr = kzalloc(sizeof(*modify_hdr), GFP_KERNEL); + if (!modify_hdr) + return ERR_PTR(-ENOMEM); + + modify_hdr->ns_type = ns_type; + err = root->cmds->modify_header_alloc(root, ns_type, num_actions, + modify_actions, modify_hdr); + if (err) { + kfree(modify_hdr); + return ERR_PTR(err); + } + + return modify_hdr; +} +EXPORT_SYMBOL(mlx5_modify_header_alloc); + +void mlx5_modify_header_dealloc(struct mlx5_core_dev *dev, + struct mlx5_modify_hdr *modify_hdr) +{ + struct mlx5_flow_root_namespace *root; + + root = get_root_namespace(dev, modify_hdr->ns_type); + if (WARN_ON(!root)) + return; + root->cmds->modify_header_dealloc(root, modify_hdr); + kfree(modify_hdr); +} +EXPORT_SYMBOL(mlx5_modify_header_dealloc); + +struct mlx5_pkt_reformat *mlx5_packet_reformat_alloc(struct mlx5_core_dev *dev, + struct mlx5_pkt_reformat_params *params, + enum mlx5_flow_namespace_type ns_type) +{ + struct mlx5_pkt_reformat *pkt_reformat; + struct mlx5_flow_root_namespace *root; + int err; + + root = get_root_namespace(dev, ns_type); + if (!root) + return ERR_PTR(-EOPNOTSUPP); + + pkt_reformat = kzalloc(sizeof(*pkt_reformat), GFP_KERNEL); + if (!pkt_reformat) + return ERR_PTR(-ENOMEM); + + pkt_reformat->ns_type = ns_type; + pkt_reformat->reformat_type = params->type; + err = root->cmds->packet_reformat_alloc(root, params, ns_type, + pkt_reformat); + if (err) { + kfree(pkt_reformat); + return ERR_PTR(err); + } + + return pkt_reformat; +} +EXPORT_SYMBOL(mlx5_packet_reformat_alloc); + +void mlx5_packet_reformat_dealloc(struct mlx5_core_dev *dev, + struct mlx5_pkt_reformat *pkt_reformat) +{ + struct mlx5_flow_root_namespace *root; + + root = get_root_namespace(dev, pkt_reformat->ns_type); + if (WARN_ON(!root)) + return; + root->cmds->packet_reformat_dealloc(root, pkt_reformat); + kfree(pkt_reformat); +} +EXPORT_SYMBOL(mlx5_packet_reformat_dealloc); + +int mlx5_get_match_definer_id(struct mlx5_flow_definer *definer) +{ + return definer->id; +} + +struct mlx5_flow_definer * +mlx5_create_match_definer(struct mlx5_core_dev *dev, + enum mlx5_flow_namespace_type ns_type, u16 format_id, + u32 *match_mask) +{ + struct mlx5_flow_root_namespace *root; + struct mlx5_flow_definer *definer; + int id; + + root = get_root_namespace(dev, ns_type); + if (!root) + return ERR_PTR(-EOPNOTSUPP); + + definer = kzalloc(sizeof(*definer), GFP_KERNEL); + if (!definer) + return ERR_PTR(-ENOMEM); + + definer->ns_type = ns_type; + id = root->cmds->create_match_definer(root, format_id, match_mask); + if (id < 0) { + mlx5_core_warn(root->dev, "Failed to create match definer (%d)\n", id); + kfree(definer); + return ERR_PTR(id); + } + definer->id = id; + return definer; +} + +void mlx5_destroy_match_definer(struct mlx5_core_dev *dev, + struct mlx5_flow_definer *definer) +{ + struct mlx5_flow_root_namespace *root; + + root = get_root_namespace(dev, definer->ns_type); + if (WARN_ON(!root)) + return; + + root->cmds->destroy_match_definer(root, definer->id); + kfree(definer); +} + +int mlx5_flow_namespace_set_peer(struct mlx5_flow_root_namespace *ns, + struct mlx5_flow_root_namespace *peer_ns) +{ + if (peer_ns && ns->mode != peer_ns->mode) { + mlx5_core_err(ns->dev, + "Can't peer namespace of different steering mode\n"); + return -EINVAL; + } + + return ns->cmds->set_peer(ns, peer_ns); +} + +/* This function should be called only at init stage of the namespace. + * It is not safe to call this function while steering operations + * are executed in the namespace. + */ +int mlx5_flow_namespace_set_mode(struct mlx5_flow_namespace *ns, + enum mlx5_flow_steering_mode mode) +{ + struct mlx5_flow_root_namespace *root; + const struct mlx5_flow_cmds *cmds; + int err; + + root = find_root(&ns->node); + if (&root->ns != ns) + /* Can't set cmds to non root namespace */ + return -EINVAL; + + if (root->table_type != FS_FT_FDB) + return -EOPNOTSUPP; + + if (root->mode == mode) + return 0; + + if (mode == MLX5_FLOW_STEERING_MODE_SMFS) + cmds = mlx5_fs_cmd_get_dr_cmds(); + else + cmds = mlx5_fs_cmd_get_fw_cmds(); + if (!cmds) + return -EOPNOTSUPP; + + err = cmds->create_ns(root); + if (err) { + mlx5_core_err(root->dev, "Failed to create flow namespace (%d)\n", + err); + return err; + } + + root->cmds->destroy_ns(root); + root->cmds = cmds; + root->mode = mode; + + return 0; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h new file mode 100644 index 000000000..3af50fd04 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h @@ -0,0 +1,365 @@ +/* + * Copyright (c) 2015, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _MLX5_FS_CORE_ +#define _MLX5_FS_CORE_ + +#include <linux/refcount.h> +#include <linux/mlx5/fs.h> +#include <linux/rhashtable.h> +#include <linux/llist.h> +#include <steering/fs_dr.h> + +#define FDB_TC_MAX_CHAIN 3 +#define FDB_FT_CHAIN (FDB_TC_MAX_CHAIN + 1) +#define FDB_TC_SLOW_PATH_CHAIN (FDB_FT_CHAIN + 1) + +/* The index of the last real chain (FT) + 1 as chain zero is valid as well */ +#define FDB_NUM_CHAINS (FDB_FT_CHAIN + 1) + +#define FDB_TC_MAX_PRIO 16 +#define FDB_TC_LEVELS_PER_PRIO 2 + +struct mlx5_flow_definer { + enum mlx5_flow_namespace_type ns_type; + u32 id; +}; + +struct mlx5_modify_hdr { + enum mlx5_flow_namespace_type ns_type; + union { + struct mlx5_fs_dr_action action; + u32 id; + }; +}; + +struct mlx5_pkt_reformat { + enum mlx5_flow_namespace_type ns_type; + int reformat_type; /* from mlx5_ifc */ + union { + struct mlx5_fs_dr_action action; + u32 id; + }; +}; + +/* FS_TYPE_PRIO_CHAINS is a PRIO that will have namespaces only, + * and those are in parallel to one another when going over them to connect + * a new flow table. Meaning the last flow table in a TYPE_PRIO prio in one + * parallel namespace will not automatically connect to the first flow table + * found in any prio in any next namespace, but skip the entire containing + * TYPE_PRIO_CHAINS prio. + * + * This is used to implement tc chains, each chain of prios is a different + * namespace inside a containing TYPE_PRIO_CHAINS prio. + */ + +enum fs_node_type { + FS_TYPE_NAMESPACE, + FS_TYPE_PRIO, + FS_TYPE_PRIO_CHAINS, + FS_TYPE_FLOW_TABLE, + FS_TYPE_FLOW_GROUP, + FS_TYPE_FLOW_ENTRY, + FS_TYPE_FLOW_DEST +}; + +enum fs_flow_table_type { + FS_FT_NIC_RX = 0x0, + FS_FT_NIC_TX = 0x1, + FS_FT_ESW_EGRESS_ACL = 0x2, + FS_FT_ESW_INGRESS_ACL = 0x3, + FS_FT_FDB = 0X4, + FS_FT_SNIFFER_RX = 0X5, + FS_FT_SNIFFER_TX = 0X6, + FS_FT_RDMA_RX = 0X7, + FS_FT_RDMA_TX = 0X8, + FS_FT_PORT_SEL = 0X9, + FS_FT_MAX_TYPE = FS_FT_PORT_SEL, +}; + +enum fs_flow_table_op_mod { + FS_FT_OP_MOD_NORMAL, + FS_FT_OP_MOD_LAG_DEMUX, +}; + +enum fs_fte_status { + FS_FTE_STATUS_EXISTING = 1UL << 0, +}; + +enum mlx5_flow_steering_mode { + MLX5_FLOW_STEERING_MODE_DMFS, + MLX5_FLOW_STEERING_MODE_SMFS +}; + +enum mlx5_flow_steering_capabilty { + MLX5_FLOW_STEERING_CAP_VLAN_PUSH_ON_RX = 1UL << 0, + MLX5_FLOW_STEERING_CAP_VLAN_POP_ON_TX = 1UL << 1, +}; + +struct mlx5_flow_steering { + struct mlx5_core_dev *dev; + enum mlx5_flow_steering_mode mode; + struct kmem_cache *fgs_cache; + struct kmem_cache *ftes_cache; + struct mlx5_flow_root_namespace *root_ns; + struct mlx5_flow_root_namespace *fdb_root_ns; + struct mlx5_flow_namespace **fdb_sub_ns; + struct mlx5_flow_root_namespace **esw_egress_root_ns; + struct mlx5_flow_root_namespace **esw_ingress_root_ns; + struct mlx5_flow_root_namespace *sniffer_tx_root_ns; + struct mlx5_flow_root_namespace *sniffer_rx_root_ns; + struct mlx5_flow_root_namespace *rdma_rx_root_ns; + struct mlx5_flow_root_namespace *rdma_tx_root_ns; + struct mlx5_flow_root_namespace *egress_root_ns; + struct mlx5_flow_root_namespace *port_sel_root_ns; + int esw_egress_acl_vports; + int esw_ingress_acl_vports; +}; + +struct fs_node { + struct list_head list; + struct list_head children; + enum fs_node_type type; + struct fs_node *parent; + struct fs_node *root; + /* lock the node for writing and traversing */ + struct rw_semaphore lock; + refcount_t refcount; + bool active; + void (*del_hw_func)(struct fs_node *); + void (*del_sw_func)(struct fs_node *); + atomic_t version; +}; + +struct mlx5_flow_rule { + struct fs_node node; + struct mlx5_flow_table *ft; + struct mlx5_flow_destination dest_attr; + /* next_ft should be accessed under chain_lock and only of + * destination type is FWD_NEXT_fT. + */ + struct list_head next_ft; + u32 sw_action; +}; + +struct mlx5_flow_handle { + int num_rules; + struct mlx5_flow_rule *rule[]; +}; + +/* Type of children is mlx5_flow_group */ +struct mlx5_flow_table { + struct fs_node node; + struct mlx5_fs_dr_table fs_dr_table; + u32 id; + u16 vport; + unsigned int max_fte; + unsigned int level; + enum fs_flow_table_type type; + enum fs_flow_table_op_mod op_mod; + struct { + bool active; + unsigned int required_groups; + unsigned int group_size; + unsigned int num_groups; + unsigned int max_fte; + } autogroup; + /* Protect fwd_rules */ + struct mutex lock; + /* FWD rules that point on this flow table */ + struct list_head fwd_rules; + u32 flags; + struct rhltable fgs_hash; + enum mlx5_flow_table_miss_action def_miss_action; + struct mlx5_flow_namespace *ns; +}; + +struct mlx5_ft_underlay_qp { + struct list_head list; + u32 qpn; +}; + +#define MLX5_FTE_MATCH_PARAM_RESERVED reserved_at_e00 +/* Calculate the fte_match_param length and without the reserved length. + * Make sure the reserved field is the last. + */ +#define MLX5_ST_SZ_DW_MATCH_PARAM \ + ((MLX5_BYTE_OFF(fte_match_param, MLX5_FTE_MATCH_PARAM_RESERVED) / sizeof(u32)) + \ + BUILD_BUG_ON_ZERO(MLX5_ST_SZ_BYTES(fte_match_param) != \ + MLX5_FLD_SZ_BYTES(fte_match_param, \ + MLX5_FTE_MATCH_PARAM_RESERVED) +\ + MLX5_BYTE_OFF(fte_match_param, \ + MLX5_FTE_MATCH_PARAM_RESERVED))) + +/* Type of children is mlx5_flow_rule */ +struct fs_fte { + struct fs_node node; + struct mlx5_fs_dr_rule fs_dr_rule; + u32 val[MLX5_ST_SZ_DW_MATCH_PARAM]; + u32 dests_size; + u32 fwd_dests; + u32 index; + struct mlx5_flow_context flow_context; + struct mlx5_flow_act action; + enum fs_fte_status status; + struct mlx5_fc *counter; + struct rhash_head hash; + int modify_mask; +}; + +/* Type of children is mlx5_flow_table/namespace */ +struct fs_prio { + struct fs_node node; + unsigned int num_levels; + unsigned int start_level; + unsigned int prio; + unsigned int num_ft; +}; + +/* Type of children is fs_prio */ +struct mlx5_flow_namespace { + /* parent == NULL => root ns */ + struct fs_node node; + enum mlx5_flow_table_miss_action def_miss_action; +}; + +struct mlx5_flow_group_mask { + u8 match_criteria_enable; + u32 match_criteria[MLX5_ST_SZ_DW_MATCH_PARAM]; +}; + +/* Type of children is fs_fte */ +struct mlx5_flow_group { + struct fs_node node; + struct mlx5_fs_dr_matcher fs_dr_matcher; + struct mlx5_flow_group_mask mask; + u32 start_index; + u32 max_ftes; + struct ida fte_allocator; + u32 id; + struct rhashtable ftes_hash; + struct rhlist_head hash; +}; + +struct mlx5_flow_root_namespace { + struct mlx5_flow_namespace ns; + enum mlx5_flow_steering_mode mode; + struct mlx5_fs_dr_domain fs_dr_domain; + enum fs_flow_table_type table_type; + struct mlx5_core_dev *dev; + struct mlx5_flow_table *root_ft; + /* Should be held when chaining flow tables */ + struct mutex chain_lock; + struct list_head underlay_qpns; + const struct mlx5_flow_cmds *cmds; +}; + +int mlx5_init_fc_stats(struct mlx5_core_dev *dev); +void mlx5_cleanup_fc_stats(struct mlx5_core_dev *dev); +void mlx5_fc_queue_stats_work(struct mlx5_core_dev *dev, + struct delayed_work *dwork, + unsigned long delay); +void mlx5_fc_update_sampling_interval(struct mlx5_core_dev *dev, + unsigned long interval); + +const struct mlx5_flow_cmds *mlx5_fs_cmd_get_fw_cmds(void); + +int mlx5_flow_namespace_set_peer(struct mlx5_flow_root_namespace *ns, + struct mlx5_flow_root_namespace *peer_ns); + +int mlx5_flow_namespace_set_mode(struct mlx5_flow_namespace *ns, + enum mlx5_flow_steering_mode mode); + +int mlx5_fs_core_alloc(struct mlx5_core_dev *dev); +void mlx5_fs_core_free(struct mlx5_core_dev *dev); +int mlx5_fs_core_init(struct mlx5_core_dev *dev); +void mlx5_fs_core_cleanup(struct mlx5_core_dev *dev); + +int mlx5_fs_egress_acls_init(struct mlx5_core_dev *dev, int total_vports); +void mlx5_fs_egress_acls_cleanup(struct mlx5_core_dev *dev); +int mlx5_fs_ingress_acls_init(struct mlx5_core_dev *dev, int total_vports); +void mlx5_fs_ingress_acls_cleanup(struct mlx5_core_dev *dev); + +u32 mlx5_fs_get_capabilities(struct mlx5_core_dev *dev, enum mlx5_flow_namespace_type type); + +struct mlx5_flow_root_namespace *find_root(struct fs_node *node); + +#define fs_get_obj(v, _node) {v = container_of((_node), typeof(*v), node); } + +#define fs_list_for_each_entry(pos, root) \ + list_for_each_entry(pos, root, node.list) + +#define fs_list_for_each_entry_safe(pos, tmp, root) \ + list_for_each_entry_safe(pos, tmp, root, node.list) + +#define fs_for_each_ns_or_ft_reverse(pos, prio) \ + list_for_each_entry_reverse(pos, &(prio)->node.children, list) + +#define fs_for_each_ns_or_ft(pos, prio) \ + list_for_each_entry(pos, (&(prio)->node.children), list) + +#define fs_for_each_prio(pos, ns) \ + fs_list_for_each_entry(pos, &(ns)->node.children) + +#define fs_for_each_ns(pos, prio) \ + fs_list_for_each_entry(pos, &(prio)->node.children) + +#define fs_for_each_ft(pos, prio) \ + fs_list_for_each_entry(pos, &(prio)->node.children) + +#define fs_for_each_ft_safe(pos, tmp, prio) \ + fs_list_for_each_entry_safe(pos, tmp, &(prio)->node.children) + +#define fs_for_each_fg(pos, ft) \ + fs_list_for_each_entry(pos, &(ft)->node.children) + +#define fs_for_each_fte(pos, fg) \ + fs_list_for_each_entry(pos, &(fg)->node.children) + +#define fs_for_each_dst(pos, fte) \ + fs_list_for_each_entry(pos, &(fte)->node.children) + +#define MLX5_CAP_FLOWTABLE_TYPE(mdev, cap, type) ( \ + (type == FS_FT_NIC_RX) ? MLX5_CAP_FLOWTABLE_NIC_RX(mdev, cap) : \ + (type == FS_FT_NIC_TX) ? MLX5_CAP_FLOWTABLE_NIC_TX(mdev, cap) : \ + (type == FS_FT_ESW_EGRESS_ACL) ? MLX5_CAP_ESW_EGRESS_ACL(mdev, cap) : \ + (type == FS_FT_ESW_INGRESS_ACL) ? MLX5_CAP_ESW_INGRESS_ACL(mdev, cap) : \ + (type == FS_FT_FDB) ? MLX5_CAP_ESW_FLOWTABLE_FDB(mdev, cap) : \ + (type == FS_FT_SNIFFER_RX) ? MLX5_CAP_FLOWTABLE_SNIFFER_RX(mdev, cap) : \ + (type == FS_FT_SNIFFER_TX) ? MLX5_CAP_FLOWTABLE_SNIFFER_TX(mdev, cap) : \ + (type == FS_FT_RDMA_RX) ? MLX5_CAP_FLOWTABLE_RDMA_RX(mdev, cap) : \ + (type == FS_FT_RDMA_TX) ? MLX5_CAP_FLOWTABLE_RDMA_TX(mdev, cap) : \ + (type == FS_FT_PORT_SEL) ? MLX5_CAP_FLOWTABLE_PORT_SELECTION(mdev, cap) : \ + (BUILD_BUG_ON_ZERO(FS_FT_PORT_SEL != FS_FT_MAX_TYPE))\ + ) + +#endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c new file mode 100644 index 000000000..b406e0367 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c @@ -0,0 +1,762 @@ +/* + * Copyright (c) 2016, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/mlx5/driver.h> +#include <linux/mlx5/fs.h> +#include <linux/rbtree.h> +#include "mlx5_core.h" +#include "fs_core.h" +#include "fs_cmd.h" + +#define MLX5_FC_STATS_PERIOD msecs_to_jiffies(1000) +#define MLX5_FC_BULK_QUERY_ALLOC_PERIOD msecs_to_jiffies(180 * 1000) +/* Max number of counters to query in bulk read is 32K */ +#define MLX5_SW_MAX_COUNTERS_BULK BIT(15) +#define MLX5_INIT_COUNTERS_BULK 8 +#define MLX5_FC_POOL_MAX_THRESHOLD BIT(18) +#define MLX5_FC_POOL_USED_BUFF_RATIO 10 + +struct mlx5_fc_cache { + u64 packets; + u64 bytes; + u64 lastuse; +}; + +struct mlx5_fc { + struct list_head list; + struct llist_node addlist; + struct llist_node dellist; + + /* last{packets,bytes} members are used when calculating the delta since + * last reading + */ + u64 lastpackets; + u64 lastbytes; + + struct mlx5_fc_bulk *bulk; + u32 id; + bool aging; + + struct mlx5_fc_cache cache ____cacheline_aligned_in_smp; +}; + +static void mlx5_fc_pool_init(struct mlx5_fc_pool *fc_pool, struct mlx5_core_dev *dev); +static void mlx5_fc_pool_cleanup(struct mlx5_fc_pool *fc_pool); +static struct mlx5_fc *mlx5_fc_pool_acquire_counter(struct mlx5_fc_pool *fc_pool); +static void mlx5_fc_pool_release_counter(struct mlx5_fc_pool *fc_pool, struct mlx5_fc *fc); + +/* locking scheme: + * + * It is the responsibility of the user to prevent concurrent calls or bad + * ordering to mlx5_fc_create(), mlx5_fc_destroy() and accessing a reference + * to struct mlx5_fc. + * e.g en_tc.c is protected by RTNL lock of its caller, and will never call a + * dump (access to struct mlx5_fc) after a counter is destroyed. + * + * access to counter list: + * - create (user context) + * - mlx5_fc_create() only adds to an addlist to be used by + * mlx5_fc_stats_work(). addlist is a lockless single linked list + * that doesn't require any additional synchronization when adding single + * node. + * - spawn thread to do the actual destroy + * + * - destroy (user context) + * - add a counter to lockless dellist + * - spawn thread to do the actual del + * + * - dump (user context) + * user should not call dump after destroy + * + * - query (single thread workqueue context) + * destroy/dump - no conflict (see destroy) + * query/dump - packets and bytes might be inconsistent (since update is not + * atomic) + * query/create - no conflict (see create) + * since every create/destroy spawn the work, only after necessary time has + * elapsed, the thread will actually query the hardware. + */ + +static struct list_head *mlx5_fc_counters_lookup_next(struct mlx5_core_dev *dev, + u32 id) +{ + struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats; + unsigned long next_id = (unsigned long)id + 1; + struct mlx5_fc *counter; + unsigned long tmp; + + rcu_read_lock(); + /* skip counters that are in idr, but not yet in counters list */ + idr_for_each_entry_continue_ul(&fc_stats->counters_idr, + counter, tmp, next_id) { + if (!list_empty(&counter->list)) + break; + } + rcu_read_unlock(); + + return counter ? &counter->list : &fc_stats->counters; +} + +static void mlx5_fc_stats_insert(struct mlx5_core_dev *dev, + struct mlx5_fc *counter) +{ + struct list_head *next = mlx5_fc_counters_lookup_next(dev, counter->id); + + list_add_tail(&counter->list, next); +} + +static void mlx5_fc_stats_remove(struct mlx5_core_dev *dev, + struct mlx5_fc *counter) +{ + struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats; + + list_del(&counter->list); + + spin_lock(&fc_stats->counters_idr_lock); + WARN_ON(!idr_remove(&fc_stats->counters_idr, counter->id)); + spin_unlock(&fc_stats->counters_idr_lock); +} + +static int get_init_bulk_query_len(struct mlx5_core_dev *dev) +{ + return min_t(int, MLX5_INIT_COUNTERS_BULK, + (1 << MLX5_CAP_GEN(dev, log_max_flow_counter_bulk))); +} + +static int get_max_bulk_query_len(struct mlx5_core_dev *dev) +{ + return min_t(int, MLX5_SW_MAX_COUNTERS_BULK, + (1 << MLX5_CAP_GEN(dev, log_max_flow_counter_bulk))); +} + +static void update_counter_cache(int index, u32 *bulk_raw_data, + struct mlx5_fc_cache *cache) +{ + void *stats = MLX5_ADDR_OF(query_flow_counter_out, bulk_raw_data, + flow_statistics[index]); + u64 packets = MLX5_GET64(traffic_counter, stats, packets); + u64 bytes = MLX5_GET64(traffic_counter, stats, octets); + + if (cache->packets == packets) + return; + + cache->packets = packets; + cache->bytes = bytes; + cache->lastuse = jiffies; +} + +static void mlx5_fc_stats_query_counter_range(struct mlx5_core_dev *dev, + struct mlx5_fc *first, + u32 last_id) +{ + struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats; + bool query_more_counters = (first->id <= last_id); + int cur_bulk_len = fc_stats->bulk_query_len; + u32 *data = fc_stats->bulk_query_out; + struct mlx5_fc *counter = first; + u32 bulk_base_id; + int bulk_len; + int err; + + while (query_more_counters) { + /* first id must be aligned to 4 when using bulk query */ + bulk_base_id = counter->id & ~0x3; + + /* number of counters to query inc. the last counter */ + bulk_len = min_t(int, cur_bulk_len, + ALIGN(last_id - bulk_base_id + 1, 4)); + + err = mlx5_cmd_fc_bulk_query(dev, bulk_base_id, bulk_len, + data); + if (err) { + mlx5_core_err(dev, "Error doing bulk query: %d\n", err); + return; + } + query_more_counters = false; + + list_for_each_entry_from(counter, &fc_stats->counters, list) { + int counter_index = counter->id - bulk_base_id; + struct mlx5_fc_cache *cache = &counter->cache; + + if (counter->id >= bulk_base_id + bulk_len) { + query_more_counters = true; + break; + } + + update_counter_cache(counter_index, data, cache); + } + } +} + +static void mlx5_fc_free(struct mlx5_core_dev *dev, struct mlx5_fc *counter) +{ + mlx5_cmd_fc_free(dev, counter->id); + kfree(counter); +} + +static void mlx5_fc_release(struct mlx5_core_dev *dev, struct mlx5_fc *counter) +{ + struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats; + + if (counter->bulk) + mlx5_fc_pool_release_counter(&fc_stats->fc_pool, counter); + else + mlx5_fc_free(dev, counter); +} + +static void mlx5_fc_stats_bulk_query_size_increase(struct mlx5_core_dev *dev) +{ + struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats; + int max_bulk_len = get_max_bulk_query_len(dev); + unsigned long now = jiffies; + u32 *bulk_query_out_tmp; + int max_out_len; + + if (fc_stats->bulk_query_alloc_failed && + time_before(now, fc_stats->next_bulk_query_alloc)) + return; + + max_out_len = mlx5_cmd_fc_get_bulk_query_out_len(max_bulk_len); + bulk_query_out_tmp = kzalloc(max_out_len, GFP_KERNEL); + if (!bulk_query_out_tmp) { + mlx5_core_warn_once(dev, + "Can't increase flow counters bulk query buffer size, insufficient memory, bulk_size(%d)\n", + max_bulk_len); + fc_stats->bulk_query_alloc_failed = true; + fc_stats->next_bulk_query_alloc = + now + MLX5_FC_BULK_QUERY_ALLOC_PERIOD; + return; + } + + kfree(fc_stats->bulk_query_out); + fc_stats->bulk_query_out = bulk_query_out_tmp; + fc_stats->bulk_query_len = max_bulk_len; + if (fc_stats->bulk_query_alloc_failed) { + mlx5_core_info(dev, + "Flow counters bulk query buffer size increased, bulk_size(%d)\n", + max_bulk_len); + fc_stats->bulk_query_alloc_failed = false; + } +} + +static void mlx5_fc_stats_work(struct work_struct *work) +{ + struct mlx5_core_dev *dev = container_of(work, struct mlx5_core_dev, + priv.fc_stats.work.work); + struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats; + /* Take dellist first to ensure that counters cannot be deleted before + * they are inserted. + */ + struct llist_node *dellist = llist_del_all(&fc_stats->dellist); + struct llist_node *addlist = llist_del_all(&fc_stats->addlist); + struct mlx5_fc *counter = NULL, *last = NULL, *tmp; + unsigned long now = jiffies; + + if (addlist || !list_empty(&fc_stats->counters)) + queue_delayed_work(fc_stats->wq, &fc_stats->work, + fc_stats->sampling_interval); + + llist_for_each_entry(counter, addlist, addlist) { + mlx5_fc_stats_insert(dev, counter); + fc_stats->num_counters++; + } + + llist_for_each_entry_safe(counter, tmp, dellist, dellist) { + mlx5_fc_stats_remove(dev, counter); + + mlx5_fc_release(dev, counter); + fc_stats->num_counters--; + } + + if (fc_stats->bulk_query_len < get_max_bulk_query_len(dev) && + fc_stats->num_counters > get_init_bulk_query_len(dev)) + mlx5_fc_stats_bulk_query_size_increase(dev); + + if (time_before(now, fc_stats->next_query) || + list_empty(&fc_stats->counters)) + return; + last = list_last_entry(&fc_stats->counters, struct mlx5_fc, list); + + counter = list_first_entry(&fc_stats->counters, struct mlx5_fc, + list); + if (counter) + mlx5_fc_stats_query_counter_range(dev, counter, last->id); + + fc_stats->next_query = now + fc_stats->sampling_interval; +} + +static struct mlx5_fc *mlx5_fc_single_alloc(struct mlx5_core_dev *dev) +{ + struct mlx5_fc *counter; + int err; + + counter = kzalloc(sizeof(*counter), GFP_KERNEL); + if (!counter) + return ERR_PTR(-ENOMEM); + + err = mlx5_cmd_fc_alloc(dev, &counter->id); + if (err) { + kfree(counter); + return ERR_PTR(err); + } + + return counter; +} + +static struct mlx5_fc *mlx5_fc_acquire(struct mlx5_core_dev *dev, bool aging) +{ + struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats; + struct mlx5_fc *counter; + + if (aging && MLX5_CAP_GEN(dev, flow_counter_bulk_alloc) != 0) { + counter = mlx5_fc_pool_acquire_counter(&fc_stats->fc_pool); + if (!IS_ERR(counter)) + return counter; + } + + return mlx5_fc_single_alloc(dev); +} + +struct mlx5_fc *mlx5_fc_create_ex(struct mlx5_core_dev *dev, bool aging) +{ + struct mlx5_fc *counter = mlx5_fc_acquire(dev, aging); + struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats; + int err; + + if (IS_ERR(counter)) + return counter; + + INIT_LIST_HEAD(&counter->list); + counter->aging = aging; + + if (aging) { + u32 id = counter->id; + + counter->cache.lastuse = jiffies; + counter->lastbytes = counter->cache.bytes; + counter->lastpackets = counter->cache.packets; + + idr_preload(GFP_KERNEL); + spin_lock(&fc_stats->counters_idr_lock); + + err = idr_alloc_u32(&fc_stats->counters_idr, counter, &id, id, + GFP_NOWAIT); + + spin_unlock(&fc_stats->counters_idr_lock); + idr_preload_end(); + if (err) + goto err_out_alloc; + + llist_add(&counter->addlist, &fc_stats->addlist); + } + + return counter; + +err_out_alloc: + mlx5_fc_release(dev, counter); + return ERR_PTR(err); +} + +struct mlx5_fc *mlx5_fc_create(struct mlx5_core_dev *dev, bool aging) +{ + struct mlx5_fc *counter = mlx5_fc_create_ex(dev, aging); + struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats; + + if (aging) + mod_delayed_work(fc_stats->wq, &fc_stats->work, 0); + return counter; +} +EXPORT_SYMBOL(mlx5_fc_create); + +u32 mlx5_fc_id(struct mlx5_fc *counter) +{ + return counter->id; +} +EXPORT_SYMBOL(mlx5_fc_id); + +void mlx5_fc_destroy(struct mlx5_core_dev *dev, struct mlx5_fc *counter) +{ + struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats; + + if (!counter) + return; + + if (counter->aging) { + llist_add(&counter->dellist, &fc_stats->dellist); + mod_delayed_work(fc_stats->wq, &fc_stats->work, 0); + return; + } + + mlx5_fc_release(dev, counter); +} +EXPORT_SYMBOL(mlx5_fc_destroy); + +int mlx5_init_fc_stats(struct mlx5_core_dev *dev) +{ + struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats; + int init_bulk_len; + int init_out_len; + + spin_lock_init(&fc_stats->counters_idr_lock); + idr_init(&fc_stats->counters_idr); + INIT_LIST_HEAD(&fc_stats->counters); + init_llist_head(&fc_stats->addlist); + init_llist_head(&fc_stats->dellist); + + init_bulk_len = get_init_bulk_query_len(dev); + init_out_len = mlx5_cmd_fc_get_bulk_query_out_len(init_bulk_len); + fc_stats->bulk_query_out = kzalloc(init_out_len, GFP_KERNEL); + if (!fc_stats->bulk_query_out) + return -ENOMEM; + fc_stats->bulk_query_len = init_bulk_len; + + fc_stats->wq = create_singlethread_workqueue("mlx5_fc"); + if (!fc_stats->wq) + goto err_wq_create; + + fc_stats->sampling_interval = MLX5_FC_STATS_PERIOD; + INIT_DELAYED_WORK(&fc_stats->work, mlx5_fc_stats_work); + + mlx5_fc_pool_init(&fc_stats->fc_pool, dev); + return 0; + +err_wq_create: + kfree(fc_stats->bulk_query_out); + return -ENOMEM; +} + +void mlx5_cleanup_fc_stats(struct mlx5_core_dev *dev) +{ + struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats; + struct llist_node *tmplist; + struct mlx5_fc *counter; + struct mlx5_fc *tmp; + + cancel_delayed_work_sync(&dev->priv.fc_stats.work); + destroy_workqueue(dev->priv.fc_stats.wq); + dev->priv.fc_stats.wq = NULL; + + tmplist = llist_del_all(&fc_stats->addlist); + llist_for_each_entry_safe(counter, tmp, tmplist, addlist) + mlx5_fc_release(dev, counter); + + list_for_each_entry_safe(counter, tmp, &fc_stats->counters, list) + mlx5_fc_release(dev, counter); + + mlx5_fc_pool_cleanup(&fc_stats->fc_pool); + idr_destroy(&fc_stats->counters_idr); + kfree(fc_stats->bulk_query_out); +} + +int mlx5_fc_query(struct mlx5_core_dev *dev, struct mlx5_fc *counter, + u64 *packets, u64 *bytes) +{ + return mlx5_cmd_fc_query(dev, counter->id, packets, bytes); +} +EXPORT_SYMBOL(mlx5_fc_query); + +u64 mlx5_fc_query_lastuse(struct mlx5_fc *counter) +{ + return counter->cache.lastuse; +} + +void mlx5_fc_query_cached(struct mlx5_fc *counter, + u64 *bytes, u64 *packets, u64 *lastuse) +{ + struct mlx5_fc_cache c; + + c = counter->cache; + + *bytes = c.bytes - counter->lastbytes; + *packets = c.packets - counter->lastpackets; + *lastuse = c.lastuse; + + counter->lastbytes = c.bytes; + counter->lastpackets = c.packets; +} + +void mlx5_fc_queue_stats_work(struct mlx5_core_dev *dev, + struct delayed_work *dwork, + unsigned long delay) +{ + struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats; + + queue_delayed_work(fc_stats->wq, dwork, delay); +} + +void mlx5_fc_update_sampling_interval(struct mlx5_core_dev *dev, + unsigned long interval) +{ + struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats; + + fc_stats->sampling_interval = min_t(unsigned long, interval, + fc_stats->sampling_interval); +} + +/* Flow counter bluks */ + +struct mlx5_fc_bulk { + struct list_head pool_list; + u32 base_id; + int bulk_len; + unsigned long *bitmask; + struct mlx5_fc fcs[]; +}; + +static void mlx5_fc_init(struct mlx5_fc *counter, struct mlx5_fc_bulk *bulk, + u32 id) +{ + counter->bulk = bulk; + counter->id = id; +} + +static int mlx5_fc_bulk_get_free_fcs_amount(struct mlx5_fc_bulk *bulk) +{ + return bitmap_weight(bulk->bitmask, bulk->bulk_len); +} + +static struct mlx5_fc_bulk *mlx5_fc_bulk_create(struct mlx5_core_dev *dev) +{ + enum mlx5_fc_bulk_alloc_bitmask alloc_bitmask; + struct mlx5_fc_bulk *bulk; + int err = -ENOMEM; + int bulk_len; + u32 base_id; + int i; + + alloc_bitmask = MLX5_CAP_GEN(dev, flow_counter_bulk_alloc); + bulk_len = alloc_bitmask > 0 ? MLX5_FC_BULK_NUM_FCS(alloc_bitmask) : 1; + + bulk = kvzalloc(struct_size(bulk, fcs, bulk_len), GFP_KERNEL); + if (!bulk) + goto err_alloc_bulk; + + bulk->bitmask = kvcalloc(BITS_TO_LONGS(bulk_len), sizeof(unsigned long), + GFP_KERNEL); + if (!bulk->bitmask) + goto err_alloc_bitmask; + + err = mlx5_cmd_fc_bulk_alloc(dev, alloc_bitmask, &base_id); + if (err) + goto err_mlx5_cmd_bulk_alloc; + + bulk->base_id = base_id; + bulk->bulk_len = bulk_len; + for (i = 0; i < bulk_len; i++) { + mlx5_fc_init(&bulk->fcs[i], bulk, base_id + i); + set_bit(i, bulk->bitmask); + } + + return bulk; + +err_mlx5_cmd_bulk_alloc: + kvfree(bulk->bitmask); +err_alloc_bitmask: + kvfree(bulk); +err_alloc_bulk: + return ERR_PTR(err); +} + +static int +mlx5_fc_bulk_destroy(struct mlx5_core_dev *dev, struct mlx5_fc_bulk *bulk) +{ + if (mlx5_fc_bulk_get_free_fcs_amount(bulk) < bulk->bulk_len) { + mlx5_core_err(dev, "Freeing bulk before all counters were released\n"); + return -EBUSY; + } + + mlx5_cmd_fc_free(dev, bulk->base_id); + kvfree(bulk->bitmask); + kvfree(bulk); + + return 0; +} + +static struct mlx5_fc *mlx5_fc_bulk_acquire_fc(struct mlx5_fc_bulk *bulk) +{ + int free_fc_index = find_first_bit(bulk->bitmask, bulk->bulk_len); + + if (free_fc_index >= bulk->bulk_len) + return ERR_PTR(-ENOSPC); + + clear_bit(free_fc_index, bulk->bitmask); + return &bulk->fcs[free_fc_index]; +} + +static int mlx5_fc_bulk_release_fc(struct mlx5_fc_bulk *bulk, struct mlx5_fc *fc) +{ + int fc_index = fc->id - bulk->base_id; + + if (test_bit(fc_index, bulk->bitmask)) + return -EINVAL; + + set_bit(fc_index, bulk->bitmask); + return 0; +} + +/* Flow counters pool API */ + +static void mlx5_fc_pool_init(struct mlx5_fc_pool *fc_pool, struct mlx5_core_dev *dev) +{ + fc_pool->dev = dev; + mutex_init(&fc_pool->pool_lock); + INIT_LIST_HEAD(&fc_pool->fully_used); + INIT_LIST_HEAD(&fc_pool->partially_used); + INIT_LIST_HEAD(&fc_pool->unused); + fc_pool->available_fcs = 0; + fc_pool->used_fcs = 0; + fc_pool->threshold = 0; +} + +static void mlx5_fc_pool_cleanup(struct mlx5_fc_pool *fc_pool) +{ + struct mlx5_core_dev *dev = fc_pool->dev; + struct mlx5_fc_bulk *bulk; + struct mlx5_fc_bulk *tmp; + + list_for_each_entry_safe(bulk, tmp, &fc_pool->fully_used, pool_list) + mlx5_fc_bulk_destroy(dev, bulk); + list_for_each_entry_safe(bulk, tmp, &fc_pool->partially_used, pool_list) + mlx5_fc_bulk_destroy(dev, bulk); + list_for_each_entry_safe(bulk, tmp, &fc_pool->unused, pool_list) + mlx5_fc_bulk_destroy(dev, bulk); +} + +static void mlx5_fc_pool_update_threshold(struct mlx5_fc_pool *fc_pool) +{ + fc_pool->threshold = min_t(int, MLX5_FC_POOL_MAX_THRESHOLD, + fc_pool->used_fcs / MLX5_FC_POOL_USED_BUFF_RATIO); +} + +static struct mlx5_fc_bulk * +mlx5_fc_pool_alloc_new_bulk(struct mlx5_fc_pool *fc_pool) +{ + struct mlx5_core_dev *dev = fc_pool->dev; + struct mlx5_fc_bulk *new_bulk; + + new_bulk = mlx5_fc_bulk_create(dev); + if (!IS_ERR(new_bulk)) + fc_pool->available_fcs += new_bulk->bulk_len; + mlx5_fc_pool_update_threshold(fc_pool); + return new_bulk; +} + +static void +mlx5_fc_pool_free_bulk(struct mlx5_fc_pool *fc_pool, struct mlx5_fc_bulk *bulk) +{ + struct mlx5_core_dev *dev = fc_pool->dev; + + fc_pool->available_fcs -= bulk->bulk_len; + mlx5_fc_bulk_destroy(dev, bulk); + mlx5_fc_pool_update_threshold(fc_pool); +} + +static struct mlx5_fc * +mlx5_fc_pool_acquire_from_list(struct list_head *src_list, + struct list_head *next_list, + bool move_non_full_bulk) +{ + struct mlx5_fc_bulk *bulk; + struct mlx5_fc *fc; + + if (list_empty(src_list)) + return ERR_PTR(-ENODATA); + + bulk = list_first_entry(src_list, struct mlx5_fc_bulk, pool_list); + fc = mlx5_fc_bulk_acquire_fc(bulk); + if (move_non_full_bulk || mlx5_fc_bulk_get_free_fcs_amount(bulk) == 0) + list_move(&bulk->pool_list, next_list); + return fc; +} + +static struct mlx5_fc * +mlx5_fc_pool_acquire_counter(struct mlx5_fc_pool *fc_pool) +{ + struct mlx5_fc_bulk *new_bulk; + struct mlx5_fc *fc; + + mutex_lock(&fc_pool->pool_lock); + + fc = mlx5_fc_pool_acquire_from_list(&fc_pool->partially_used, + &fc_pool->fully_used, false); + if (IS_ERR(fc)) + fc = mlx5_fc_pool_acquire_from_list(&fc_pool->unused, + &fc_pool->partially_used, + true); + if (IS_ERR(fc)) { + new_bulk = mlx5_fc_pool_alloc_new_bulk(fc_pool); + if (IS_ERR(new_bulk)) { + fc = ERR_CAST(new_bulk); + goto out; + } + fc = mlx5_fc_bulk_acquire_fc(new_bulk); + list_add(&new_bulk->pool_list, &fc_pool->partially_used); + } + fc_pool->available_fcs--; + fc_pool->used_fcs++; + +out: + mutex_unlock(&fc_pool->pool_lock); + return fc; +} + +static void +mlx5_fc_pool_release_counter(struct mlx5_fc_pool *fc_pool, struct mlx5_fc *fc) +{ + struct mlx5_core_dev *dev = fc_pool->dev; + struct mlx5_fc_bulk *bulk = fc->bulk; + int bulk_free_fcs_amount; + + mutex_lock(&fc_pool->pool_lock); + + if (mlx5_fc_bulk_release_fc(bulk, fc)) { + mlx5_core_warn(dev, "Attempted to release a counter which is not acquired\n"); + goto unlock; + } + + fc_pool->available_fcs++; + fc_pool->used_fcs--; + + bulk_free_fcs_amount = mlx5_fc_bulk_get_free_fcs_amount(bulk); + if (bulk_free_fcs_amount == 1) + list_move_tail(&bulk->pool_list, &fc_pool->partially_used); + if (bulk_free_fcs_amount == bulk->bulk_len) { + list_del(&bulk->pool_list); + if (fc_pool->available_fcs > fc_pool->threshold) + mlx5_fc_pool_free_bulk(fc_pool, bulk); + else + list_add(&bulk->pool_list, &fc_pool->unused); + } + +unlock: + mutex_unlock(&fc_pool->pool_lock); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_ft_pool.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_ft_pool.c new file mode 100644 index 000000000..c14590acc --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_ft_pool.c @@ -0,0 +1,85 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2021 Mellanox Technologies. */ + +#include "fs_ft_pool.h" + +/* Firmware currently has 4 pool of 4 sizes that it supports (FT_POOLS), + * and a virtual memory region of 16M (MLX5_FT_SIZE), this region is duplicated + * for each flow table pool. We can allocate up to 16M of each pool, + * and we keep track of how much we used via mlx5_ft_pool_get_avail_sz. + * Firmware doesn't report any of this for now. + * ESW_POOL is expected to be sorted from large to small and match firmware + * pools. + */ +#define FT_SIZE (16 * 1024 * 1024) +static const unsigned int FT_POOLS[] = { 4 * 1024 * 1024, + 1 * 1024 * 1024, + 64 * 1024, + 128, + 1 /* size for termination tables */ }; +struct mlx5_ft_pool { + int ft_left[ARRAY_SIZE(FT_POOLS)]; +}; + +int mlx5_ft_pool_init(struct mlx5_core_dev *dev) +{ + struct mlx5_ft_pool *ft_pool; + int i; + + ft_pool = kzalloc(sizeof(*ft_pool), GFP_KERNEL); + if (!ft_pool) + return -ENOMEM; + + for (i = ARRAY_SIZE(FT_POOLS) - 1; i >= 0; i--) + ft_pool->ft_left[i] = FT_SIZE / FT_POOLS[i]; + + dev->priv.ft_pool = ft_pool; + return 0; +} + +void mlx5_ft_pool_destroy(struct mlx5_core_dev *dev) +{ + kfree(dev->priv.ft_pool); +} + +int +mlx5_ft_pool_get_avail_sz(struct mlx5_core_dev *dev, enum fs_flow_table_type table_type, + int desired_size) +{ + u32 max_ft_size = 1 << MLX5_CAP_FLOWTABLE_TYPE(dev, log_max_ft_size, table_type); + int i, found_i = -1; + + for (i = ARRAY_SIZE(FT_POOLS) - 1; i >= 0; i--) { + if (dev->priv.ft_pool->ft_left[i] && FT_POOLS[i] >= desired_size && + FT_POOLS[i] <= max_ft_size) { + found_i = i; + if (desired_size != POOL_NEXT_SIZE) + break; + } + } + + if (found_i != -1) { + --dev->priv.ft_pool->ft_left[found_i]; + return FT_POOLS[found_i]; + } + + return 0; +} + +void +mlx5_ft_pool_put_sz(struct mlx5_core_dev *dev, int sz) +{ + int i; + + if (!sz) + return; + + for (i = ARRAY_SIZE(FT_POOLS) - 1; i >= 0; i--) { + if (sz == FT_POOLS[i]) { + ++dev->priv.ft_pool->ft_left[i]; + return; + } + } + + WARN_ONCE(1, "Couldn't find size %d in flow table size pool", sz); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_ft_pool.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_ft_pool.h new file mode 100644 index 000000000..25f4274b3 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_ft_pool.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2021 Mellanox Technologies. */ + +#ifndef __MLX5_FS_FT_POOL_H__ +#define __MLX5_FS_FT_POOL_H__ + +#include <linux/mlx5/driver.h> +#include "fs_core.h" + +#define POOL_NEXT_SIZE 0 + +int mlx5_ft_pool_init(struct mlx5_core_dev *dev); +void mlx5_ft_pool_destroy(struct mlx5_core_dev *dev); + +int +mlx5_ft_pool_get_avail_sz(struct mlx5_core_dev *dev, enum fs_flow_table_type table_type, + int desired_size); +void +mlx5_ft_pool_put_sz(struct mlx5_core_dev *dev, int sz); + +#endif /* __MLX5_FS_FT_POOL_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw.c b/drivers/net/ethernet/mellanox/mlx5/core/fw.c new file mode 100644 index 000000000..f34e758a2 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw.c @@ -0,0 +1,851 @@ +/* + * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/mlx5/driver.h> +#include <linux/mlx5/eswitch.h> +#include "mlx5_core.h" +#include "../../mlxfw/mlxfw.h" +#include "lib/tout.h" + +enum { + MCQS_IDENTIFIER_BOOT_IMG = 0x1, + MCQS_IDENTIFIER_OEM_NVCONFIG = 0x4, + MCQS_IDENTIFIER_MLNX_NVCONFIG = 0x5, + MCQS_IDENTIFIER_CS_TOKEN = 0x6, + MCQS_IDENTIFIER_DBG_TOKEN = 0x7, + MCQS_IDENTIFIER_GEARBOX = 0xA, +}; + +enum { + MCQS_UPDATE_STATE_IDLE, + MCQS_UPDATE_STATE_IN_PROGRESS, + MCQS_UPDATE_STATE_APPLIED, + MCQS_UPDATE_STATE_ACTIVE, + MCQS_UPDATE_STATE_ACTIVE_PENDING_RESET, + MCQS_UPDATE_STATE_FAILED, + MCQS_UPDATE_STATE_CANCELED, + MCQS_UPDATE_STATE_BUSY, +}; + +enum { + MCQI_INFO_TYPE_CAPABILITIES = 0x0, + MCQI_INFO_TYPE_VERSION = 0x1, + MCQI_INFO_TYPE_ACTIVATION_METHOD = 0x5, +}; + +enum { + MCQI_FW_RUNNING_VERSION = 0, + MCQI_FW_STORED_VERSION = 1, +}; + +int mlx5_query_board_id(struct mlx5_core_dev *dev) +{ + u32 *out; + int outlen = MLX5_ST_SZ_BYTES(query_adapter_out); + u32 in[MLX5_ST_SZ_DW(query_adapter_in)] = {}; + int err; + + out = kzalloc(outlen, GFP_KERNEL); + if (!out) + return -ENOMEM; + + MLX5_SET(query_adapter_in, in, opcode, MLX5_CMD_OP_QUERY_ADAPTER); + err = mlx5_cmd_exec_inout(dev, query_adapter, in, out); + if (err) + goto out; + + memcpy(dev->board_id, + MLX5_ADDR_OF(query_adapter_out, out, + query_adapter_struct.vsd_contd_psid), + MLX5_FLD_SZ_BYTES(query_adapter_out, + query_adapter_struct.vsd_contd_psid)); + +out: + kfree(out); + return err; +} + +int mlx5_core_query_vendor_id(struct mlx5_core_dev *mdev, u32 *vendor_id) +{ + u32 *out; + int outlen = MLX5_ST_SZ_BYTES(query_adapter_out); + u32 in[MLX5_ST_SZ_DW(query_adapter_in)] = {}; + int err; + + out = kzalloc(outlen, GFP_KERNEL); + if (!out) + return -ENOMEM; + + MLX5_SET(query_adapter_in, in, opcode, MLX5_CMD_OP_QUERY_ADAPTER); + err = mlx5_cmd_exec_inout(mdev, query_adapter, in, out); + if (err) + goto out; + + *vendor_id = MLX5_GET(query_adapter_out, out, + query_adapter_struct.ieee_vendor_id); +out: + kfree(out); + return err; +} +EXPORT_SYMBOL(mlx5_core_query_vendor_id); + +static int mlx5_get_pcam_reg(struct mlx5_core_dev *dev) +{ + return mlx5_query_pcam_reg(dev, dev->caps.pcam, + MLX5_PCAM_FEATURE_ENHANCED_FEATURES, + MLX5_PCAM_REGS_5000_TO_507F); +} + +static int mlx5_get_mcam_access_reg_group(struct mlx5_core_dev *dev, + enum mlx5_mcam_reg_groups group) +{ + return mlx5_query_mcam_reg(dev, dev->caps.mcam[group], + MLX5_MCAM_FEATURE_ENHANCED_FEATURES, group); +} + +static int mlx5_get_qcam_reg(struct mlx5_core_dev *dev) +{ + return mlx5_query_qcam_reg(dev, dev->caps.qcam, + MLX5_QCAM_FEATURE_ENHANCED_FEATURES, + MLX5_QCAM_REGS_FIRST_128); +} + +int mlx5_query_hca_caps(struct mlx5_core_dev *dev) +{ + int err; + + err = mlx5_core_get_caps(dev, MLX5_CAP_GENERAL); + if (err) + return err; + + if (MLX5_CAP_GEN(dev, port_selection_cap)) { + err = mlx5_core_get_caps(dev, MLX5_CAP_PORT_SELECTION); + if (err) + return err; + } + + if (MLX5_CAP_GEN(dev, hca_cap_2)) { + err = mlx5_core_get_caps(dev, MLX5_CAP_GENERAL_2); + if (err) + return err; + } + + if (MLX5_CAP_GEN(dev, eth_net_offloads)) { + err = mlx5_core_get_caps(dev, MLX5_CAP_ETHERNET_OFFLOADS); + if (err) + return err; + } + + if (MLX5_CAP_GEN(dev, ipoib_enhanced_offloads)) { + err = mlx5_core_get_caps(dev, MLX5_CAP_IPOIB_ENHANCED_OFFLOADS); + if (err) + return err; + } + + if (MLX5_CAP_GEN(dev, pg)) { + err = mlx5_core_get_caps(dev, MLX5_CAP_ODP); + if (err) + return err; + } + + if (MLX5_CAP_GEN(dev, atomic)) { + err = mlx5_core_get_caps(dev, MLX5_CAP_ATOMIC); + if (err) + return err; + } + + if (MLX5_CAP_GEN(dev, roce)) { + err = mlx5_core_get_caps(dev, MLX5_CAP_ROCE); + if (err) + return err; + } + + if (MLX5_CAP_GEN(dev, nic_flow_table) || + MLX5_CAP_GEN(dev, ipoib_enhanced_offloads)) { + err = mlx5_core_get_caps(dev, MLX5_CAP_FLOW_TABLE); + if (err) + return err; + } + + if (MLX5_CAP_GEN(dev, vport_group_manager) && + MLX5_ESWITCH_MANAGER(dev)) { + err = mlx5_core_get_caps(dev, MLX5_CAP_ESWITCH_FLOW_TABLE); + if (err) + return err; + } + + if (MLX5_ESWITCH_MANAGER(dev)) { + err = mlx5_core_get_caps(dev, MLX5_CAP_ESWITCH); + if (err) + return err; + } + + if (MLX5_CAP_GEN(dev, vector_calc)) { + err = mlx5_core_get_caps(dev, MLX5_CAP_VECTOR_CALC); + if (err) + return err; + } + + if (MLX5_CAP_GEN(dev, qos)) { + err = mlx5_core_get_caps(dev, MLX5_CAP_QOS); + if (err) + return err; + } + + if (MLX5_CAP_GEN(dev, debug)) + mlx5_core_get_caps(dev, MLX5_CAP_DEBUG); + + if (MLX5_CAP_GEN(dev, pcam_reg)) + mlx5_get_pcam_reg(dev); + + if (MLX5_CAP_GEN(dev, mcam_reg)) { + mlx5_get_mcam_access_reg_group(dev, MLX5_MCAM_REGS_FIRST_128); + mlx5_get_mcam_access_reg_group(dev, MLX5_MCAM_REGS_0x9080_0x90FF); + mlx5_get_mcam_access_reg_group(dev, MLX5_MCAM_REGS_0x9100_0x917F); + } + + if (MLX5_CAP_GEN(dev, qcam_reg)) + mlx5_get_qcam_reg(dev); + + if (MLX5_CAP_GEN(dev, device_memory)) { + err = mlx5_core_get_caps(dev, MLX5_CAP_DEV_MEM); + if (err) + return err; + } + + if (MLX5_CAP_GEN(dev, event_cap)) { + err = mlx5_core_get_caps(dev, MLX5_CAP_DEV_EVENT); + if (err) + return err; + } + + if (MLX5_CAP_GEN(dev, tls_tx) || MLX5_CAP_GEN(dev, tls_rx)) { + err = mlx5_core_get_caps(dev, MLX5_CAP_TLS); + if (err) + return err; + } + + if (MLX5_CAP_GEN_64(dev, general_obj_types) & + MLX5_GENERAL_OBJ_TYPES_CAP_VIRTIO_NET_Q) { + err = mlx5_core_get_caps(dev, MLX5_CAP_VDPA_EMULATION); + if (err) + return err; + } + + if (MLX5_CAP_GEN(dev, ipsec_offload)) { + err = mlx5_core_get_caps(dev, MLX5_CAP_IPSEC); + if (err) + return err; + } + + if (MLX5_CAP_GEN(dev, shampo)) { + err = mlx5_core_get_caps(dev, MLX5_CAP_DEV_SHAMPO); + if (err) + return err; + } + + if (MLX5_CAP_GEN_64(dev, general_obj_types) & + MLX5_GENERAL_OBJ_TYPES_CAP_MACSEC_OFFLOAD) { + err = mlx5_core_get_caps(dev, MLX5_CAP_MACSEC); + if (err) + return err; + } + + if (MLX5_CAP_GEN(dev, adv_virtualization)) { + err = mlx5_core_get_caps(dev, MLX5_CAP_ADV_VIRTUALIZATION); + if (err) + return err; + } + + return 0; +} + +int mlx5_cmd_init_hca(struct mlx5_core_dev *dev, uint32_t *sw_owner_id) +{ + u32 in[MLX5_ST_SZ_DW(init_hca_in)] = {}; + int i; + + MLX5_SET(init_hca_in, in, opcode, MLX5_CMD_OP_INIT_HCA); + + if (MLX5_CAP_GEN(dev, sw_owner_id)) { + for (i = 0; i < 4; i++) + MLX5_ARRAY_SET(init_hca_in, in, sw_owner_id, i, + sw_owner_id[i]); + } + + if (MLX5_CAP_GEN_2_MAX(dev, sw_vhca_id_valid) && + dev->priv.sw_vhca_id > 0) + MLX5_SET(init_hca_in, in, sw_vhca_id, dev->priv.sw_vhca_id); + + return mlx5_cmd_exec_in(dev, init_hca, in); +} + +int mlx5_cmd_teardown_hca(struct mlx5_core_dev *dev) +{ + u32 in[MLX5_ST_SZ_DW(teardown_hca_in)] = {}; + + MLX5_SET(teardown_hca_in, in, opcode, MLX5_CMD_OP_TEARDOWN_HCA); + return mlx5_cmd_exec_in(dev, teardown_hca, in); +} + +int mlx5_cmd_force_teardown_hca(struct mlx5_core_dev *dev) +{ + u32 out[MLX5_ST_SZ_DW(teardown_hca_out)] = {0}; + u32 in[MLX5_ST_SZ_DW(teardown_hca_in)] = {0}; + int force_state; + int ret; + + if (!MLX5_CAP_GEN(dev, force_teardown)) { + mlx5_core_dbg(dev, "force teardown is not supported in the firmware\n"); + return -EOPNOTSUPP; + } + + MLX5_SET(teardown_hca_in, in, opcode, MLX5_CMD_OP_TEARDOWN_HCA); + MLX5_SET(teardown_hca_in, in, profile, MLX5_TEARDOWN_HCA_IN_PROFILE_FORCE_CLOSE); + + ret = mlx5_cmd_exec_polling(dev, in, sizeof(in), out, sizeof(out)); + if (ret) + return ret; + + force_state = MLX5_GET(teardown_hca_out, out, state); + if (force_state == MLX5_TEARDOWN_HCA_OUT_FORCE_STATE_FAIL) { + mlx5_core_warn(dev, "teardown with force mode failed, doing normal teardown\n"); + return -EIO; + } + + return 0; +} + +int mlx5_cmd_fast_teardown_hca(struct mlx5_core_dev *dev) +{ + unsigned long end, delay_ms = mlx5_tout_ms(dev, TEARDOWN); + u32 out[MLX5_ST_SZ_DW(teardown_hca_out)] = {}; + u32 in[MLX5_ST_SZ_DW(teardown_hca_in)] = {}; + int state; + int ret; + + if (!MLX5_CAP_GEN(dev, fast_teardown)) { + mlx5_core_dbg(dev, "fast teardown is not supported in the firmware\n"); + return -EOPNOTSUPP; + } + + MLX5_SET(teardown_hca_in, in, opcode, MLX5_CMD_OP_TEARDOWN_HCA); + MLX5_SET(teardown_hca_in, in, profile, + MLX5_TEARDOWN_HCA_IN_PROFILE_PREPARE_FAST_TEARDOWN); + + ret = mlx5_cmd_exec_inout(dev, teardown_hca, in, out); + if (ret) + return ret; + + state = MLX5_GET(teardown_hca_out, out, state); + if (state == MLX5_TEARDOWN_HCA_OUT_FORCE_STATE_FAIL) { + mlx5_core_warn(dev, "teardown with fast mode failed\n"); + return -EIO; + } + + mlx5_set_nic_state(dev, MLX5_NIC_IFC_DISABLED); + + /* Loop until device state turns to disable */ + end = jiffies + msecs_to_jiffies(delay_ms); + do { + if (mlx5_get_nic_state(dev) == MLX5_NIC_IFC_DISABLED) + break; + + cond_resched(); + } while (!time_after(jiffies, end)); + + if (mlx5_get_nic_state(dev) != MLX5_NIC_IFC_DISABLED) { + dev_err(&dev->pdev->dev, "NIC IFC still %d after %lums.\n", + mlx5_get_nic_state(dev), delay_ms); + return -EIO; + } + + return 0; +} + +enum mlxsw_reg_mcc_instruction { + MLX5_REG_MCC_INSTRUCTION_LOCK_UPDATE_HANDLE = 0x01, + MLX5_REG_MCC_INSTRUCTION_RELEASE_UPDATE_HANDLE = 0x02, + MLX5_REG_MCC_INSTRUCTION_UPDATE_COMPONENT = 0x03, + MLX5_REG_MCC_INSTRUCTION_VERIFY_COMPONENT = 0x04, + MLX5_REG_MCC_INSTRUCTION_ACTIVATE = 0x06, + MLX5_REG_MCC_INSTRUCTION_CANCEL = 0x08, +}; + +static int mlx5_reg_mcc_set(struct mlx5_core_dev *dev, + enum mlxsw_reg_mcc_instruction instr, + u16 component_index, u32 update_handle, + u32 component_size) +{ + u32 out[MLX5_ST_SZ_DW(mcc_reg)]; + u32 in[MLX5_ST_SZ_DW(mcc_reg)]; + + memset(in, 0, sizeof(in)); + + MLX5_SET(mcc_reg, in, instruction, instr); + MLX5_SET(mcc_reg, in, component_index, component_index); + MLX5_SET(mcc_reg, in, update_handle, update_handle); + MLX5_SET(mcc_reg, in, component_size, component_size); + + return mlx5_core_access_reg(dev, in, sizeof(in), out, + sizeof(out), MLX5_REG_MCC, 0, 1); +} + +static int mlx5_reg_mcc_query(struct mlx5_core_dev *dev, + u32 *update_handle, u8 *error_code, + u8 *control_state) +{ + u32 out[MLX5_ST_SZ_DW(mcc_reg)]; + u32 in[MLX5_ST_SZ_DW(mcc_reg)]; + int err; + + memset(in, 0, sizeof(in)); + memset(out, 0, sizeof(out)); + MLX5_SET(mcc_reg, in, update_handle, *update_handle); + + err = mlx5_core_access_reg(dev, in, sizeof(in), out, + sizeof(out), MLX5_REG_MCC, 0, 0); + if (err) + goto out; + + *update_handle = MLX5_GET(mcc_reg, out, update_handle); + *error_code = MLX5_GET(mcc_reg, out, error_code); + *control_state = MLX5_GET(mcc_reg, out, control_state); + +out: + return err; +} + +static int mlx5_reg_mcda_set(struct mlx5_core_dev *dev, + u32 update_handle, + u32 offset, u16 size, + u8 *data) +{ + int err, in_size = MLX5_ST_SZ_BYTES(mcda_reg) + size; + u32 out[MLX5_ST_SZ_DW(mcda_reg)]; + int i, j, dw_size = size >> 2; + __be32 data_element; + u32 *in; + + in = kzalloc(in_size, GFP_KERNEL); + if (!in) + return -ENOMEM; + + MLX5_SET(mcda_reg, in, update_handle, update_handle); + MLX5_SET(mcda_reg, in, offset, offset); + MLX5_SET(mcda_reg, in, size, size); + + for (i = 0; i < dw_size; i++) { + j = i * 4; + data_element = htonl(*(u32 *)&data[j]); + memcpy(MLX5_ADDR_OF(mcda_reg, in, data) + j, &data_element, 4); + } + + err = mlx5_core_access_reg(dev, in, in_size, out, + sizeof(out), MLX5_REG_MCDA, 0, 1); + kfree(in); + return err; +} + +static int mlx5_reg_mcqi_query(struct mlx5_core_dev *dev, + u16 component_index, bool read_pending, + u8 info_type, u16 data_size, void *mcqi_data) +{ + u32 out[MLX5_ST_SZ_DW(mcqi_reg) + MLX5_UN_SZ_DW(mcqi_reg_data)] = {}; + u32 in[MLX5_ST_SZ_DW(mcqi_reg)] = {}; + void *data; + int err; + + MLX5_SET(mcqi_reg, in, component_index, component_index); + MLX5_SET(mcqi_reg, in, read_pending_component, read_pending); + MLX5_SET(mcqi_reg, in, info_type, info_type); + MLX5_SET(mcqi_reg, in, data_size, data_size); + + err = mlx5_core_access_reg(dev, in, sizeof(in), out, + MLX5_ST_SZ_BYTES(mcqi_reg) + data_size, + MLX5_REG_MCQI, 0, 0); + if (err) + return err; + + data = MLX5_ADDR_OF(mcqi_reg, out, data); + memcpy(mcqi_data, data, data_size); + + return 0; +} + +static int mlx5_reg_mcqi_caps_query(struct mlx5_core_dev *dev, u16 component_index, + u32 *max_component_size, u8 *log_mcda_word_size, + u16 *mcda_max_write_size) +{ + u32 mcqi_reg[MLX5_ST_SZ_DW(mcqi_cap)] = {}; + int err; + + err = mlx5_reg_mcqi_query(dev, component_index, 0, + MCQI_INFO_TYPE_CAPABILITIES, + MLX5_ST_SZ_BYTES(mcqi_cap), mcqi_reg); + if (err) + return err; + + *max_component_size = MLX5_GET(mcqi_cap, mcqi_reg, max_component_size); + *log_mcda_word_size = MLX5_GET(mcqi_cap, mcqi_reg, log_mcda_word_size); + *mcda_max_write_size = MLX5_GET(mcqi_cap, mcqi_reg, mcda_max_write_size); + + return 0; +} + +struct mlx5_mlxfw_dev { + struct mlxfw_dev mlxfw_dev; + struct mlx5_core_dev *mlx5_core_dev; +}; + +static int mlx5_component_query(struct mlxfw_dev *mlxfw_dev, + u16 component_index, u32 *p_max_size, + u8 *p_align_bits, u16 *p_max_write_size) +{ + struct mlx5_mlxfw_dev *mlx5_mlxfw_dev = + container_of(mlxfw_dev, struct mlx5_mlxfw_dev, mlxfw_dev); + struct mlx5_core_dev *dev = mlx5_mlxfw_dev->mlx5_core_dev; + + if (!MLX5_CAP_GEN(dev, mcam_reg) || !MLX5_CAP_MCAM_REG(dev, mcqi)) { + mlx5_core_warn(dev, "caps query isn't supported by running FW\n"); + return -EOPNOTSUPP; + } + + return mlx5_reg_mcqi_caps_query(dev, component_index, p_max_size, + p_align_bits, p_max_write_size); +} + +static int mlx5_fsm_lock(struct mlxfw_dev *mlxfw_dev, u32 *fwhandle) +{ + struct mlx5_mlxfw_dev *mlx5_mlxfw_dev = + container_of(mlxfw_dev, struct mlx5_mlxfw_dev, mlxfw_dev); + struct mlx5_core_dev *dev = mlx5_mlxfw_dev->mlx5_core_dev; + u8 control_state, error_code; + int err; + + *fwhandle = 0; + err = mlx5_reg_mcc_query(dev, fwhandle, &error_code, &control_state); + if (err) + return err; + + if (control_state != MLXFW_FSM_STATE_IDLE) + return -EBUSY; + + return mlx5_reg_mcc_set(dev, MLX5_REG_MCC_INSTRUCTION_LOCK_UPDATE_HANDLE, + 0, *fwhandle, 0); +} + +static int mlx5_fsm_component_update(struct mlxfw_dev *mlxfw_dev, u32 fwhandle, + u16 component_index, u32 component_size) +{ + struct mlx5_mlxfw_dev *mlx5_mlxfw_dev = + container_of(mlxfw_dev, struct mlx5_mlxfw_dev, mlxfw_dev); + struct mlx5_core_dev *dev = mlx5_mlxfw_dev->mlx5_core_dev; + + return mlx5_reg_mcc_set(dev, MLX5_REG_MCC_INSTRUCTION_UPDATE_COMPONENT, + component_index, fwhandle, component_size); +} + +static int mlx5_fsm_block_download(struct mlxfw_dev *mlxfw_dev, u32 fwhandle, + u8 *data, u16 size, u32 offset) +{ + struct mlx5_mlxfw_dev *mlx5_mlxfw_dev = + container_of(mlxfw_dev, struct mlx5_mlxfw_dev, mlxfw_dev); + struct mlx5_core_dev *dev = mlx5_mlxfw_dev->mlx5_core_dev; + + return mlx5_reg_mcda_set(dev, fwhandle, offset, size, data); +} + +static int mlx5_fsm_component_verify(struct mlxfw_dev *mlxfw_dev, u32 fwhandle, + u16 component_index) +{ + struct mlx5_mlxfw_dev *mlx5_mlxfw_dev = + container_of(mlxfw_dev, struct mlx5_mlxfw_dev, mlxfw_dev); + struct mlx5_core_dev *dev = mlx5_mlxfw_dev->mlx5_core_dev; + + return mlx5_reg_mcc_set(dev, MLX5_REG_MCC_INSTRUCTION_VERIFY_COMPONENT, + component_index, fwhandle, 0); +} + +static int mlx5_fsm_activate(struct mlxfw_dev *mlxfw_dev, u32 fwhandle) +{ + struct mlx5_mlxfw_dev *mlx5_mlxfw_dev = + container_of(mlxfw_dev, struct mlx5_mlxfw_dev, mlxfw_dev); + struct mlx5_core_dev *dev = mlx5_mlxfw_dev->mlx5_core_dev; + + return mlx5_reg_mcc_set(dev, MLX5_REG_MCC_INSTRUCTION_ACTIVATE, 0, + fwhandle, 0); +} + +static int mlx5_fsm_query_state(struct mlxfw_dev *mlxfw_dev, u32 fwhandle, + enum mlxfw_fsm_state *fsm_state, + enum mlxfw_fsm_state_err *fsm_state_err) +{ + struct mlx5_mlxfw_dev *mlx5_mlxfw_dev = + container_of(mlxfw_dev, struct mlx5_mlxfw_dev, mlxfw_dev); + struct mlx5_core_dev *dev = mlx5_mlxfw_dev->mlx5_core_dev; + u8 control_state, error_code; + int err; + + err = mlx5_reg_mcc_query(dev, &fwhandle, &error_code, &control_state); + if (err) + return err; + + *fsm_state = control_state; + *fsm_state_err = min_t(enum mlxfw_fsm_state_err, error_code, + MLXFW_FSM_STATE_ERR_MAX); + return 0; +} + +static void mlx5_fsm_cancel(struct mlxfw_dev *mlxfw_dev, u32 fwhandle) +{ + struct mlx5_mlxfw_dev *mlx5_mlxfw_dev = + container_of(mlxfw_dev, struct mlx5_mlxfw_dev, mlxfw_dev); + struct mlx5_core_dev *dev = mlx5_mlxfw_dev->mlx5_core_dev; + + mlx5_reg_mcc_set(dev, MLX5_REG_MCC_INSTRUCTION_CANCEL, 0, fwhandle, 0); +} + +static void mlx5_fsm_release(struct mlxfw_dev *mlxfw_dev, u32 fwhandle) +{ + struct mlx5_mlxfw_dev *mlx5_mlxfw_dev = + container_of(mlxfw_dev, struct mlx5_mlxfw_dev, mlxfw_dev); + struct mlx5_core_dev *dev = mlx5_mlxfw_dev->mlx5_core_dev; + + mlx5_reg_mcc_set(dev, MLX5_REG_MCC_INSTRUCTION_RELEASE_UPDATE_HANDLE, 0, + fwhandle, 0); +} + +static int mlx5_fsm_reactivate(struct mlxfw_dev *mlxfw_dev, u8 *status) +{ + struct mlx5_mlxfw_dev *mlx5_mlxfw_dev = + container_of(mlxfw_dev, struct mlx5_mlxfw_dev, mlxfw_dev); + struct mlx5_core_dev *dev = mlx5_mlxfw_dev->mlx5_core_dev; + u32 out[MLX5_ST_SZ_DW(mirc_reg)]; + u32 in[MLX5_ST_SZ_DW(mirc_reg)]; + unsigned long exp_time; + int err; + + exp_time = jiffies + msecs_to_jiffies(mlx5_tout_ms(dev, FSM_REACTIVATE)); + + if (!MLX5_CAP_MCAM_REG2(dev, mirc)) + return -EOPNOTSUPP; + + memset(in, 0, sizeof(in)); + + err = mlx5_core_access_reg(dev, in, sizeof(in), out, + sizeof(out), MLX5_REG_MIRC, 0, 1); + if (err) + return err; + + do { + memset(out, 0, sizeof(out)); + err = mlx5_core_access_reg(dev, in, sizeof(in), out, + sizeof(out), MLX5_REG_MIRC, 0, 0); + if (err) + return err; + + *status = MLX5_GET(mirc_reg, out, status_code); + if (*status != MLXFW_FSM_REACTIVATE_STATUS_BUSY) + return 0; + + msleep(20); + } while (time_before(jiffies, exp_time)); + + return 0; +} + +static const struct mlxfw_dev_ops mlx5_mlxfw_dev_ops = { + .component_query = mlx5_component_query, + .fsm_lock = mlx5_fsm_lock, + .fsm_component_update = mlx5_fsm_component_update, + .fsm_block_download = mlx5_fsm_block_download, + .fsm_component_verify = mlx5_fsm_component_verify, + .fsm_activate = mlx5_fsm_activate, + .fsm_reactivate = mlx5_fsm_reactivate, + .fsm_query_state = mlx5_fsm_query_state, + .fsm_cancel = mlx5_fsm_cancel, + .fsm_release = mlx5_fsm_release +}; + +int mlx5_firmware_flash(struct mlx5_core_dev *dev, + const struct firmware *firmware, + struct netlink_ext_ack *extack) +{ + struct mlx5_mlxfw_dev mlx5_mlxfw_dev = { + .mlxfw_dev = { + .ops = &mlx5_mlxfw_dev_ops, + .psid = dev->board_id, + .psid_size = strlen(dev->board_id), + .devlink = priv_to_devlink(dev), + }, + .mlx5_core_dev = dev + }; + + if (!MLX5_CAP_GEN(dev, mcam_reg) || + !MLX5_CAP_MCAM_REG(dev, mcqi) || + !MLX5_CAP_MCAM_REG(dev, mcc) || + !MLX5_CAP_MCAM_REG(dev, mcda)) { + pr_info("%s flashing isn't supported by the running FW\n", __func__); + return -EOPNOTSUPP; + } + + return mlxfw_firmware_flash(&mlx5_mlxfw_dev.mlxfw_dev, + firmware, extack); +} + +static int mlx5_reg_mcqi_version_query(struct mlx5_core_dev *dev, + u16 component_index, bool read_pending, + u32 *mcqi_version_out) +{ + return mlx5_reg_mcqi_query(dev, component_index, read_pending, + MCQI_INFO_TYPE_VERSION, + MLX5_ST_SZ_BYTES(mcqi_version), + mcqi_version_out); +} + +static int mlx5_reg_mcqs_query(struct mlx5_core_dev *dev, u32 *out, + u16 component_index) +{ + u8 out_sz = MLX5_ST_SZ_BYTES(mcqs_reg); + u32 in[MLX5_ST_SZ_DW(mcqs_reg)] = {}; + int err; + + memset(out, 0, out_sz); + + MLX5_SET(mcqs_reg, in, component_index, component_index); + + err = mlx5_core_access_reg(dev, in, sizeof(in), out, + out_sz, MLX5_REG_MCQS, 0, 0); + return err; +} + +/* scans component index sequentially, to find the boot img index */ +static int mlx5_get_boot_img_component_index(struct mlx5_core_dev *dev) +{ + u32 out[MLX5_ST_SZ_DW(mcqs_reg)] = {}; + u16 identifier, component_idx = 0; + bool quit; + int err; + + do { + err = mlx5_reg_mcqs_query(dev, out, component_idx); + if (err) + return err; + + identifier = MLX5_GET(mcqs_reg, out, identifier); + quit = !!MLX5_GET(mcqs_reg, out, last_index_flag); + quit |= identifier == MCQS_IDENTIFIER_BOOT_IMG; + } while (!quit && ++component_idx); + + if (identifier != MCQS_IDENTIFIER_BOOT_IMG) { + mlx5_core_warn(dev, "mcqs: can't find boot_img component ix, last scanned idx %d\n", + component_idx); + return -EOPNOTSUPP; + } + + return component_idx; +} + +static int +mlx5_fw_image_pending(struct mlx5_core_dev *dev, + int component_index, + bool *pending_version_exists) +{ + u32 out[MLX5_ST_SZ_DW(mcqs_reg)]; + u8 component_update_state; + int err; + + err = mlx5_reg_mcqs_query(dev, out, component_index); + if (err) + return err; + + component_update_state = MLX5_GET(mcqs_reg, out, component_update_state); + + if (component_update_state == MCQS_UPDATE_STATE_IDLE) { + *pending_version_exists = false; + } else if (component_update_state == MCQS_UPDATE_STATE_ACTIVE_PENDING_RESET) { + *pending_version_exists = true; + } else { + mlx5_core_warn(dev, + "mcqs: can't read pending fw version while fw state is %d\n", + component_update_state); + return -ENODATA; + } + return 0; +} + +int mlx5_fw_version_query(struct mlx5_core_dev *dev, + u32 *running_ver, u32 *pending_ver) +{ + u32 reg_mcqi_version[MLX5_ST_SZ_DW(mcqi_version)] = {}; + bool pending_version_exists; + int component_index; + int err; + + if (!MLX5_CAP_GEN(dev, mcam_reg) || !MLX5_CAP_MCAM_REG(dev, mcqi) || + !MLX5_CAP_MCAM_REG(dev, mcqs)) { + mlx5_core_warn(dev, "fw query isn't supported by the FW\n"); + return -EOPNOTSUPP; + } + + component_index = mlx5_get_boot_img_component_index(dev); + if (component_index < 0) + return component_index; + + err = mlx5_reg_mcqi_version_query(dev, component_index, + MCQI_FW_RUNNING_VERSION, + reg_mcqi_version); + if (err) + return err; + + *running_ver = MLX5_GET(mcqi_version, reg_mcqi_version, version); + + err = mlx5_fw_image_pending(dev, component_index, &pending_version_exists); + if (err) + return err; + + if (!pending_version_exists) { + *pending_ver = 0; + return 0; + } + + err = mlx5_reg_mcqi_version_query(dev, component_index, + MCQI_FW_STORED_VERSION, + reg_mcqi_version); + if (err) + return err; + + *pending_ver = MLX5_GET(mcqi_version, reg_mcqi_version, version); + + return 0; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c new file mode 100644 index 000000000..dec1492da --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c @@ -0,0 +1,546 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2020, Mellanox Technologies inc. All rights reserved. */ + +#include "fw_reset.h" +#include "diag/fw_tracer.h" +#include "lib/tout.h" + +enum { + MLX5_FW_RESET_FLAGS_RESET_REQUESTED, + MLX5_FW_RESET_FLAGS_NACK_RESET_REQUEST, + MLX5_FW_RESET_FLAGS_PENDING_COMP, + MLX5_FW_RESET_FLAGS_DROP_NEW_REQUESTS, + MLX5_FW_RESET_FLAGS_RELOAD_REQUIRED +}; + +struct mlx5_fw_reset { + struct mlx5_core_dev *dev; + struct mlx5_nb nb; + struct workqueue_struct *wq; + struct work_struct fw_live_patch_work; + struct work_struct reset_request_work; + struct work_struct reset_reload_work; + struct work_struct reset_now_work; + struct work_struct reset_abort_work; + unsigned long reset_flags; + struct timer_list timer; + struct completion done; + int ret; +}; + +void mlx5_fw_reset_enable_remote_dev_reset_set(struct mlx5_core_dev *dev, bool enable) +{ + struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset; + + if (enable) + clear_bit(MLX5_FW_RESET_FLAGS_NACK_RESET_REQUEST, &fw_reset->reset_flags); + else + set_bit(MLX5_FW_RESET_FLAGS_NACK_RESET_REQUEST, &fw_reset->reset_flags); +} + +bool mlx5_fw_reset_enable_remote_dev_reset_get(struct mlx5_core_dev *dev) +{ + struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset; + + return !test_bit(MLX5_FW_RESET_FLAGS_NACK_RESET_REQUEST, &fw_reset->reset_flags); +} + +static int mlx5_reg_mfrl_set(struct mlx5_core_dev *dev, u8 reset_level, + u8 reset_type_sel, u8 sync_resp, bool sync_start) +{ + u32 out[MLX5_ST_SZ_DW(mfrl_reg)] = {}; + u32 in[MLX5_ST_SZ_DW(mfrl_reg)] = {}; + + MLX5_SET(mfrl_reg, in, reset_level, reset_level); + MLX5_SET(mfrl_reg, in, rst_type_sel, reset_type_sel); + MLX5_SET(mfrl_reg, in, pci_sync_for_fw_update_resp, sync_resp); + MLX5_SET(mfrl_reg, in, pci_sync_for_fw_update_start, sync_start); + + return mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out), MLX5_REG_MFRL, 0, 1); +} + +static int mlx5_reg_mfrl_query(struct mlx5_core_dev *dev, u8 *reset_level, + u8 *reset_type, u8 *reset_state) +{ + u32 out[MLX5_ST_SZ_DW(mfrl_reg)] = {}; + u32 in[MLX5_ST_SZ_DW(mfrl_reg)] = {}; + int err; + + err = mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out), MLX5_REG_MFRL, 0, 0); + if (err) + return err; + + if (reset_level) + *reset_level = MLX5_GET(mfrl_reg, out, reset_level); + if (reset_type) + *reset_type = MLX5_GET(mfrl_reg, out, reset_type); + if (reset_state) + *reset_state = MLX5_GET(mfrl_reg, out, reset_state); + + return 0; +} + +int mlx5_fw_reset_query(struct mlx5_core_dev *dev, u8 *reset_level, u8 *reset_type) +{ + return mlx5_reg_mfrl_query(dev, reset_level, reset_type, NULL); +} + +static int mlx5_fw_reset_get_reset_state_err(struct mlx5_core_dev *dev, + struct netlink_ext_ack *extack) +{ + u8 reset_state; + + if (mlx5_reg_mfrl_query(dev, NULL, NULL, &reset_state)) + goto out; + + switch (reset_state) { + case MLX5_MFRL_REG_RESET_STATE_IN_NEGOTIATION: + case MLX5_MFRL_REG_RESET_STATE_RESET_IN_PROGRESS: + NL_SET_ERR_MSG_MOD(extack, "Sync reset was already triggered"); + return -EBUSY; + case MLX5_MFRL_REG_RESET_STATE_TIMEOUT: + NL_SET_ERR_MSG_MOD(extack, "Sync reset got timeout"); + return -ETIMEDOUT; + case MLX5_MFRL_REG_RESET_STATE_NACK: + NL_SET_ERR_MSG_MOD(extack, "One of the hosts disabled reset"); + return -EPERM; + } + +out: + NL_SET_ERR_MSG_MOD(extack, "Sync reset failed"); + return -EIO; +} + +int mlx5_fw_reset_set_reset_sync(struct mlx5_core_dev *dev, u8 reset_type_sel, + struct netlink_ext_ack *extack) +{ + struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset; + u32 out[MLX5_ST_SZ_DW(mfrl_reg)] = {}; + u32 in[MLX5_ST_SZ_DW(mfrl_reg)] = {}; + int err; + + set_bit(MLX5_FW_RESET_FLAGS_PENDING_COMP, &fw_reset->reset_flags); + + MLX5_SET(mfrl_reg, in, reset_level, MLX5_MFRL_REG_RESET_LEVEL3); + MLX5_SET(mfrl_reg, in, rst_type_sel, reset_type_sel); + MLX5_SET(mfrl_reg, in, pci_sync_for_fw_update_start, 1); + err = mlx5_access_reg(dev, in, sizeof(in), out, sizeof(out), + MLX5_REG_MFRL, 0, 1, false); + if (!err) + return 0; + + clear_bit(MLX5_FW_RESET_FLAGS_PENDING_COMP, &fw_reset->reset_flags); + if (err == -EREMOTEIO && MLX5_CAP_MCAM_FEATURE(dev, reset_state)) + return mlx5_fw_reset_get_reset_state_err(dev, extack); + + NL_SET_ERR_MSG_MOD(extack, "Sync reset command failed"); + return mlx5_cmd_check(dev, err, in, out); +} + +int mlx5_fw_reset_set_live_patch(struct mlx5_core_dev *dev) +{ + return mlx5_reg_mfrl_set(dev, MLX5_MFRL_REG_RESET_LEVEL0, 0, 0, false); +} + +static void mlx5_fw_reset_complete_reload(struct mlx5_core_dev *dev) +{ + struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset; + + /* if this is the driver that initiated the fw reset, devlink completed the reload */ + if (test_bit(MLX5_FW_RESET_FLAGS_PENDING_COMP, &fw_reset->reset_flags)) { + complete(&fw_reset->done); + } else { + mlx5_unload_one(dev, false); + if (mlx5_health_wait_pci_up(dev)) + mlx5_core_err(dev, "reset reload flow aborted, PCI reads still not working\n"); + else + mlx5_load_one(dev, true); + devlink_remote_reload_actions_performed(priv_to_devlink(dev), 0, + BIT(DEVLINK_RELOAD_ACTION_DRIVER_REINIT) | + BIT(DEVLINK_RELOAD_ACTION_FW_ACTIVATE)); + } +} + +static void mlx5_stop_sync_reset_poll(struct mlx5_core_dev *dev) +{ + struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset; + + del_timer_sync(&fw_reset->timer); +} + +static int mlx5_sync_reset_clear_reset_requested(struct mlx5_core_dev *dev, bool poll_health) +{ + struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset; + + if (!test_and_clear_bit(MLX5_FW_RESET_FLAGS_RESET_REQUESTED, &fw_reset->reset_flags)) { + mlx5_core_warn(dev, "Reset request was already cleared\n"); + return -EALREADY; + } + + mlx5_stop_sync_reset_poll(dev); + if (poll_health) + mlx5_start_health_poll(dev); + return 0; +} + +static void mlx5_sync_reset_reload_work(struct work_struct *work) +{ + struct mlx5_fw_reset *fw_reset = container_of(work, struct mlx5_fw_reset, + reset_reload_work); + struct mlx5_core_dev *dev = fw_reset->dev; + + mlx5_sync_reset_clear_reset_requested(dev, false); + mlx5_enter_error_state(dev, true); + mlx5_fw_reset_complete_reload(dev); +} + +#define MLX5_RESET_POLL_INTERVAL (HZ / 10) +static void poll_sync_reset(struct timer_list *t) +{ + struct mlx5_fw_reset *fw_reset = from_timer(fw_reset, t, timer); + struct mlx5_core_dev *dev = fw_reset->dev; + u32 fatal_error; + + if (!test_bit(MLX5_FW_RESET_FLAGS_RESET_REQUESTED, &fw_reset->reset_flags)) + return; + + fatal_error = mlx5_health_check_fatal_sensors(dev); + + if (fatal_error) { + mlx5_core_warn(dev, "Got Device Reset\n"); + if (!test_bit(MLX5_FW_RESET_FLAGS_DROP_NEW_REQUESTS, &fw_reset->reset_flags)) + queue_work(fw_reset->wq, &fw_reset->reset_reload_work); + else + mlx5_core_err(dev, "Device is being removed, Drop new reset work\n"); + return; + } + + mod_timer(&fw_reset->timer, round_jiffies(jiffies + MLX5_RESET_POLL_INTERVAL)); +} + +static void mlx5_start_sync_reset_poll(struct mlx5_core_dev *dev) +{ + struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset; + + timer_setup(&fw_reset->timer, poll_sync_reset, 0); + fw_reset->timer.expires = round_jiffies(jiffies + MLX5_RESET_POLL_INTERVAL); + add_timer(&fw_reset->timer); +} + +static int mlx5_fw_reset_set_reset_sync_ack(struct mlx5_core_dev *dev) +{ + return mlx5_reg_mfrl_set(dev, MLX5_MFRL_REG_RESET_LEVEL3, 0, 1, false); +} + +static int mlx5_fw_reset_set_reset_sync_nack(struct mlx5_core_dev *dev) +{ + return mlx5_reg_mfrl_set(dev, MLX5_MFRL_REG_RESET_LEVEL3, 0, 2, false); +} + +static int mlx5_sync_reset_set_reset_requested(struct mlx5_core_dev *dev) +{ + struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset; + + if (test_and_set_bit(MLX5_FW_RESET_FLAGS_RESET_REQUESTED, &fw_reset->reset_flags)) { + mlx5_core_warn(dev, "Reset request was already set\n"); + return -EALREADY; + } + mlx5_stop_health_poll(dev, true); + mlx5_start_sync_reset_poll(dev); + return 0; +} + +static void mlx5_fw_live_patch_event(struct work_struct *work) +{ + struct mlx5_fw_reset *fw_reset = container_of(work, struct mlx5_fw_reset, + fw_live_patch_work); + struct mlx5_core_dev *dev = fw_reset->dev; + + mlx5_core_info(dev, "Live patch updated firmware version: %d.%d.%d\n", fw_rev_maj(dev), + fw_rev_min(dev), fw_rev_sub(dev)); + + if (mlx5_fw_tracer_reload(dev->tracer)) + mlx5_core_err(dev, "Failed to reload FW tracer\n"); +} + +static void mlx5_sync_reset_request_event(struct work_struct *work) +{ + struct mlx5_fw_reset *fw_reset = container_of(work, struct mlx5_fw_reset, + reset_request_work); + struct mlx5_core_dev *dev = fw_reset->dev; + int err; + + if (test_bit(MLX5_FW_RESET_FLAGS_NACK_RESET_REQUEST, &fw_reset->reset_flags)) { + err = mlx5_fw_reset_set_reset_sync_nack(dev); + mlx5_core_warn(dev, "PCI Sync FW Update Reset Nack %s", + err ? "Failed" : "Sent"); + return; + } + if (mlx5_sync_reset_set_reset_requested(dev)) + return; + + err = mlx5_fw_reset_set_reset_sync_ack(dev); + if (err) + mlx5_core_warn(dev, "PCI Sync FW Update Reset Ack Failed. Error code: %d\n", err); + else + mlx5_core_warn(dev, "PCI Sync FW Update Reset Ack. Device reset is expected.\n"); +} + +static int mlx5_pci_link_toggle(struct mlx5_core_dev *dev) +{ + struct pci_bus *bridge_bus = dev->pdev->bus; + struct pci_dev *bridge = bridge_bus->self; + u16 reg16, dev_id, sdev_id; + unsigned long timeout; + struct pci_dev *sdev; + int cap, err; + u32 reg32; + + /* Check that all functions under the pci bridge are PFs of + * this device otherwise fail this function. + */ + err = pci_read_config_word(dev->pdev, PCI_DEVICE_ID, &dev_id); + if (err) + return err; + list_for_each_entry(sdev, &bridge_bus->devices, bus_list) { + err = pci_read_config_word(sdev, PCI_DEVICE_ID, &sdev_id); + if (err) + return err; + if (sdev_id != dev_id) + return -EPERM; + } + + cap = pci_find_capability(bridge, PCI_CAP_ID_EXP); + if (!cap) + return -EOPNOTSUPP; + + list_for_each_entry(sdev, &bridge_bus->devices, bus_list) { + pci_save_state(sdev); + pci_cfg_access_lock(sdev); + } + /* PCI link toggle */ + err = pcie_capability_set_word(bridge, PCI_EXP_LNKCTL, PCI_EXP_LNKCTL_LD); + if (err) + return err; + msleep(500); + err = pcie_capability_clear_word(bridge, PCI_EXP_LNKCTL, PCI_EXP_LNKCTL_LD); + if (err) + return err; + + /* Check link */ + err = pci_read_config_dword(bridge, cap + PCI_EXP_LNKCAP, ®32); + if (err) + return err; + if (!(reg32 & PCI_EXP_LNKCAP_DLLLARC)) { + mlx5_core_warn(dev, "No PCI link reporting capability (0x%08x)\n", reg32); + msleep(1000); + goto restore; + } + + timeout = jiffies + msecs_to_jiffies(mlx5_tout_ms(dev, PCI_TOGGLE)); + do { + err = pci_read_config_word(bridge, cap + PCI_EXP_LNKSTA, ®16); + if (err) + return err; + if (reg16 & PCI_EXP_LNKSTA_DLLLA) + break; + msleep(20); + } while (!time_after(jiffies, timeout)); + + if (reg16 & PCI_EXP_LNKSTA_DLLLA) { + mlx5_core_info(dev, "PCI Link up\n"); + } else { + mlx5_core_err(dev, "PCI link not ready (0x%04x) after %llu ms\n", + reg16, mlx5_tout_ms(dev, PCI_TOGGLE)); + err = -ETIMEDOUT; + } + + do { + err = pci_read_config_word(dev->pdev, PCI_DEVICE_ID, ®16); + if (err) + return err; + if (reg16 == dev_id) + break; + msleep(20); + } while (!time_after(jiffies, timeout)); + + if (reg16 == dev_id) { + mlx5_core_info(dev, "Firmware responds to PCI config cycles again\n"); + } else { + mlx5_core_err(dev, "Firmware is not responsive (0x%04x) after %llu ms\n", + reg16, mlx5_tout_ms(dev, PCI_TOGGLE)); + err = -ETIMEDOUT; + } + +restore: + list_for_each_entry(sdev, &bridge_bus->devices, bus_list) { + pci_cfg_access_unlock(sdev); + pci_restore_state(sdev); + } + + return err; +} + +static void mlx5_sync_reset_now_event(struct work_struct *work) +{ + struct mlx5_fw_reset *fw_reset = container_of(work, struct mlx5_fw_reset, + reset_now_work); + struct mlx5_core_dev *dev = fw_reset->dev; + int err; + + if (mlx5_sync_reset_clear_reset_requested(dev, false)) + return; + + mlx5_core_warn(dev, "Sync Reset now. Device is going to reset.\n"); + + err = mlx5_cmd_fast_teardown_hca(dev); + if (err) { + mlx5_core_warn(dev, "Fast teardown failed, no reset done, err %d\n", err); + goto done; + } + + err = mlx5_pci_link_toggle(dev); + if (err) { + mlx5_core_warn(dev, "mlx5_pci_link_toggle failed, no reset done, err %d\n", err); + set_bit(MLX5_FW_RESET_FLAGS_RELOAD_REQUIRED, &fw_reset->reset_flags); + } + + mlx5_enter_error_state(dev, true); +done: + fw_reset->ret = err; + mlx5_fw_reset_complete_reload(dev); +} + +static void mlx5_sync_reset_abort_event(struct work_struct *work) +{ + struct mlx5_fw_reset *fw_reset = container_of(work, struct mlx5_fw_reset, + reset_abort_work); + struct mlx5_core_dev *dev = fw_reset->dev; + + if (mlx5_sync_reset_clear_reset_requested(dev, true)) + return; + mlx5_core_warn(dev, "PCI Sync FW Update Reset Aborted.\n"); +} + +static void mlx5_sync_reset_events_handle(struct mlx5_fw_reset *fw_reset, struct mlx5_eqe *eqe) +{ + struct mlx5_eqe_sync_fw_update *sync_fw_update_eqe; + u8 sync_event_rst_type; + + sync_fw_update_eqe = &eqe->data.sync_fw_update; + sync_event_rst_type = sync_fw_update_eqe->sync_rst_state & SYNC_RST_STATE_MASK; + switch (sync_event_rst_type) { + case MLX5_SYNC_RST_STATE_RESET_REQUEST: + queue_work(fw_reset->wq, &fw_reset->reset_request_work); + break; + case MLX5_SYNC_RST_STATE_RESET_NOW: + queue_work(fw_reset->wq, &fw_reset->reset_now_work); + break; + case MLX5_SYNC_RST_STATE_RESET_ABORT: + queue_work(fw_reset->wq, &fw_reset->reset_abort_work); + break; + } +} + +static int fw_reset_event_notifier(struct notifier_block *nb, unsigned long action, void *data) +{ + struct mlx5_fw_reset *fw_reset = mlx5_nb_cof(nb, struct mlx5_fw_reset, nb); + struct mlx5_eqe *eqe = data; + + if (test_bit(MLX5_FW_RESET_FLAGS_DROP_NEW_REQUESTS, &fw_reset->reset_flags)) + return NOTIFY_DONE; + + switch (eqe->sub_type) { + case MLX5_GENERAL_SUBTYPE_FW_LIVE_PATCH_EVENT: + queue_work(fw_reset->wq, &fw_reset->fw_live_patch_work); + break; + case MLX5_GENERAL_SUBTYPE_PCI_SYNC_FOR_FW_UPDATE_EVENT: + mlx5_sync_reset_events_handle(fw_reset, eqe); + break; + default: + return NOTIFY_DONE; + } + + return NOTIFY_OK; +} + +int mlx5_fw_reset_wait_reset_done(struct mlx5_core_dev *dev) +{ + unsigned long pci_sync_update_timeout = mlx5_tout_ms(dev, PCI_SYNC_UPDATE); + unsigned long timeout = msecs_to_jiffies(pci_sync_update_timeout); + struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset; + int err; + + if (!wait_for_completion_timeout(&fw_reset->done, timeout)) { + mlx5_core_warn(dev, "FW sync reset timeout after %lu seconds\n", + pci_sync_update_timeout / 1000); + err = -ETIMEDOUT; + goto out; + } + err = fw_reset->ret; + if (test_and_clear_bit(MLX5_FW_RESET_FLAGS_RELOAD_REQUIRED, &fw_reset->reset_flags)) { + mlx5_unload_one_devl_locked(dev, false); + mlx5_load_one_devl_locked(dev, true); + } +out: + clear_bit(MLX5_FW_RESET_FLAGS_PENDING_COMP, &fw_reset->reset_flags); + return err; +} + +void mlx5_fw_reset_events_start(struct mlx5_core_dev *dev) +{ + struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset; + + MLX5_NB_INIT(&fw_reset->nb, fw_reset_event_notifier, GENERAL_EVENT); + mlx5_eq_notifier_register(dev, &fw_reset->nb); +} + +void mlx5_fw_reset_events_stop(struct mlx5_core_dev *dev) +{ + mlx5_eq_notifier_unregister(dev, &dev->priv.fw_reset->nb); +} + +void mlx5_drain_fw_reset(struct mlx5_core_dev *dev) +{ + struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset; + + set_bit(MLX5_FW_RESET_FLAGS_DROP_NEW_REQUESTS, &fw_reset->reset_flags); + cancel_work_sync(&fw_reset->fw_live_patch_work); + cancel_work_sync(&fw_reset->reset_request_work); + cancel_work_sync(&fw_reset->reset_reload_work); + cancel_work_sync(&fw_reset->reset_now_work); + cancel_work_sync(&fw_reset->reset_abort_work); +} + +int mlx5_fw_reset_init(struct mlx5_core_dev *dev) +{ + struct mlx5_fw_reset *fw_reset = kzalloc(sizeof(*fw_reset), GFP_KERNEL); + + if (!fw_reset) + return -ENOMEM; + fw_reset->wq = create_singlethread_workqueue("mlx5_fw_reset_events"); + if (!fw_reset->wq) { + kfree(fw_reset); + return -ENOMEM; + } + + fw_reset->dev = dev; + dev->priv.fw_reset = fw_reset; + + INIT_WORK(&fw_reset->fw_live_patch_work, mlx5_fw_live_patch_event); + INIT_WORK(&fw_reset->reset_request_work, mlx5_sync_reset_request_event); + INIT_WORK(&fw_reset->reset_reload_work, mlx5_sync_reset_reload_work); + INIT_WORK(&fw_reset->reset_now_work, mlx5_sync_reset_now_event); + INIT_WORK(&fw_reset->reset_abort_work, mlx5_sync_reset_abort_event); + + init_completion(&fw_reset->done); + return 0; +} + +void mlx5_fw_reset_cleanup(struct mlx5_core_dev *dev) +{ + struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset; + + destroy_workqueue(fw_reset->wq); + kfree(dev->priv.fw_reset); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.h b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.h new file mode 100644 index 000000000..dc141c7e6 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2020, Mellanox Technologies inc. All rights reserved. */ + +#ifndef __MLX5_FW_RESET_H +#define __MLX5_FW_RESET_H + +#include "mlx5_core.h" + +void mlx5_fw_reset_enable_remote_dev_reset_set(struct mlx5_core_dev *dev, bool enable); +bool mlx5_fw_reset_enable_remote_dev_reset_get(struct mlx5_core_dev *dev); +int mlx5_fw_reset_query(struct mlx5_core_dev *dev, u8 *reset_level, u8 *reset_type); +int mlx5_fw_reset_set_reset_sync(struct mlx5_core_dev *dev, u8 reset_type_sel, + struct netlink_ext_ack *extack); +int mlx5_fw_reset_set_live_patch(struct mlx5_core_dev *dev); + +int mlx5_fw_reset_wait_reset_done(struct mlx5_core_dev *dev); +void mlx5_fw_reset_events_start(struct mlx5_core_dev *dev); +void mlx5_fw_reset_events_stop(struct mlx5_core_dev *dev); +void mlx5_drain_fw_reset(struct mlx5_core_dev *dev); +int mlx5_fw_reset_init(struct mlx5_core_dev *dev); +void mlx5_fw_reset_cleanup(struct mlx5_core_dev *dev); + +#endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c new file mode 100644 index 000000000..e42e4ac23 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c @@ -0,0 +1,941 @@ +/* + * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/kernel.h> +#include <linux/random.h> +#include <linux/vmalloc.h> +#include <linux/hardirq.h> +#include <linux/mlx5/driver.h> +#include <linux/kern_levels.h> +#include "mlx5_core.h" +#include "lib/eq.h" +#include "lib/mlx5.h" +#include "lib/pci_vsc.h" +#include "lib/tout.h" +#include "diag/fw_tracer.h" + +enum { + MAX_MISSES = 3, +}; + +enum { + MLX5_HEALTH_SYNDR_FW_ERR = 0x1, + MLX5_HEALTH_SYNDR_IRISC_ERR = 0x7, + MLX5_HEALTH_SYNDR_HW_UNRECOVERABLE_ERR = 0x8, + MLX5_HEALTH_SYNDR_CRC_ERR = 0x9, + MLX5_HEALTH_SYNDR_FETCH_PCI_ERR = 0xa, + MLX5_HEALTH_SYNDR_HW_FTL_ERR = 0xb, + MLX5_HEALTH_SYNDR_ASYNC_EQ_OVERRUN_ERR = 0xc, + MLX5_HEALTH_SYNDR_EQ_ERR = 0xd, + MLX5_HEALTH_SYNDR_EQ_INV = 0xe, + MLX5_HEALTH_SYNDR_FFSER_ERR = 0xf, + MLX5_HEALTH_SYNDR_HIGH_TEMP = 0x10 +}; + +enum { + MLX5_DROP_NEW_HEALTH_WORK, +}; + +enum { + MLX5_SENSOR_NO_ERR = 0, + MLX5_SENSOR_PCI_COMM_ERR = 1, + MLX5_SENSOR_PCI_ERR = 2, + MLX5_SENSOR_NIC_DISABLED = 3, + MLX5_SENSOR_NIC_SW_RESET = 4, + MLX5_SENSOR_FW_SYND_RFR = 5, +}; + +enum { + MLX5_SEVERITY_MASK = 0x7, + MLX5_SEVERITY_VALID_MASK = 0x8, +}; + +u8 mlx5_get_nic_state(struct mlx5_core_dev *dev) +{ + return (ioread32be(&dev->iseg->cmdq_addr_l_sz) >> 8) & 7; +} + +void mlx5_set_nic_state(struct mlx5_core_dev *dev, u8 state) +{ + u32 cur_cmdq_addr_l_sz; + + cur_cmdq_addr_l_sz = ioread32be(&dev->iseg->cmdq_addr_l_sz); + iowrite32be((cur_cmdq_addr_l_sz & 0xFFFFF000) | + state << MLX5_NIC_IFC_OFFSET, + &dev->iseg->cmdq_addr_l_sz); +} + +static bool sensor_pci_not_working(struct mlx5_core_dev *dev) +{ + struct mlx5_core_health *health = &dev->priv.health; + struct health_buffer __iomem *h = health->health; + + /* Offline PCI reads return 0xffffffff */ + return (ioread32be(&h->fw_ver) == 0xffffffff); +} + +static int mlx5_health_get_rfr(u8 rfr_severity) +{ + return rfr_severity >> MLX5_RFR_BIT_OFFSET; +} + +static bool sensor_fw_synd_rfr(struct mlx5_core_dev *dev) +{ + struct mlx5_core_health *health = &dev->priv.health; + struct health_buffer __iomem *h = health->health; + u8 synd = ioread8(&h->synd); + u8 rfr; + + rfr = mlx5_health_get_rfr(ioread8(&h->rfr_severity)); + + if (rfr && synd) + mlx5_core_dbg(dev, "FW requests reset, synd: %d\n", synd); + return rfr && synd; +} + +u32 mlx5_health_check_fatal_sensors(struct mlx5_core_dev *dev) +{ + if (sensor_pci_not_working(dev)) + return MLX5_SENSOR_PCI_COMM_ERR; + if (pci_channel_offline(dev->pdev)) + return MLX5_SENSOR_PCI_ERR; + if (mlx5_get_nic_state(dev) == MLX5_NIC_IFC_DISABLED) + return MLX5_SENSOR_NIC_DISABLED; + if (mlx5_get_nic_state(dev) == MLX5_NIC_IFC_SW_RESET) + return MLX5_SENSOR_NIC_SW_RESET; + if (sensor_fw_synd_rfr(dev)) + return MLX5_SENSOR_FW_SYND_RFR; + + return MLX5_SENSOR_NO_ERR; +} + +static int lock_sem_sw_reset(struct mlx5_core_dev *dev, bool lock) +{ + enum mlx5_vsc_state state; + int ret; + + if (!mlx5_core_is_pf(dev)) + return -EBUSY; + + /* Try to lock GW access, this stage doesn't return + * EBUSY because locked GW does not mean that other PF + * already started the reset. + */ + ret = mlx5_vsc_gw_lock(dev); + if (ret == -EBUSY) + return -EINVAL; + if (ret) + return ret; + + state = lock ? MLX5_VSC_LOCK : MLX5_VSC_UNLOCK; + /* At this stage, if the return status == EBUSY, then we know + * for sure that another PF started the reset, so don't allow + * another reset. + */ + ret = mlx5_vsc_sem_set_space(dev, MLX5_SEMAPHORE_SW_RESET, state); + if (ret) + mlx5_core_warn(dev, "Failed to lock SW reset semaphore\n"); + + /* Unlock GW access */ + mlx5_vsc_gw_unlock(dev); + + return ret; +} + +static bool reset_fw_if_needed(struct mlx5_core_dev *dev) +{ + bool supported = (ioread32be(&dev->iseg->initializing) >> + MLX5_FW_RESET_SUPPORTED_OFFSET) & 1; + u32 fatal_error; + + if (!supported) + return false; + + /* The reset only needs to be issued by one PF. The health buffer is + * shared between all functions, and will be cleared during a reset. + * Check again to avoid a redundant 2nd reset. If the fatal errors was + * PCI related a reset won't help. + */ + fatal_error = mlx5_health_check_fatal_sensors(dev); + if (fatal_error == MLX5_SENSOR_PCI_COMM_ERR || + fatal_error == MLX5_SENSOR_NIC_DISABLED || + fatal_error == MLX5_SENSOR_NIC_SW_RESET) { + mlx5_core_warn(dev, "Not issuing FW reset. Either it's already done or won't help."); + return false; + } + + mlx5_core_warn(dev, "Issuing FW Reset\n"); + /* Write the NIC interface field to initiate the reset, the command + * interface address also resides here, don't overwrite it. + */ + mlx5_set_nic_state(dev, MLX5_NIC_IFC_SW_RESET); + + return true; +} + +static void enter_error_state(struct mlx5_core_dev *dev, bool force) +{ + if (mlx5_health_check_fatal_sensors(dev) || force) { /* protected state setting */ + dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR; + mlx5_cmd_flush(dev); + } + + mlx5_notifier_call_chain(dev->priv.events, MLX5_DEV_EVENT_SYS_ERROR, (void *)1); +} + +void mlx5_enter_error_state(struct mlx5_core_dev *dev, bool force) +{ + bool err_detected = false; + + /* Mark the device as fatal in order to abort FW commands */ + if ((mlx5_health_check_fatal_sensors(dev) || force) && + dev->state == MLX5_DEVICE_STATE_UP) { + dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR; + err_detected = true; + } + mutex_lock(&dev->intf_state_mutex); + if (!err_detected && dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) + goto unlock;/* a previous error is still being handled */ + + enter_error_state(dev, force); +unlock: + mutex_unlock(&dev->intf_state_mutex); +} + +void mlx5_error_sw_reset(struct mlx5_core_dev *dev) +{ + unsigned long end, delay_ms = mlx5_tout_ms(dev, PCI_TOGGLE); + int lock = -EBUSY; + + mutex_lock(&dev->intf_state_mutex); + if (dev->state != MLX5_DEVICE_STATE_INTERNAL_ERROR) + goto unlock; + + mlx5_core_err(dev, "start\n"); + + if (mlx5_health_check_fatal_sensors(dev) == MLX5_SENSOR_FW_SYND_RFR) { + /* Get cr-dump and reset FW semaphore */ + lock = lock_sem_sw_reset(dev, true); + + if (lock == -EBUSY) { + delay_ms = mlx5_tout_ms(dev, FULL_CRDUMP); + goto recover_from_sw_reset; + } + /* Execute SW reset */ + reset_fw_if_needed(dev); + } + +recover_from_sw_reset: + /* Recover from SW reset */ + end = jiffies + msecs_to_jiffies(delay_ms); + do { + if (mlx5_get_nic_state(dev) == MLX5_NIC_IFC_DISABLED) + break; + + msleep(20); + } while (!time_after(jiffies, end)); + + if (mlx5_get_nic_state(dev) != MLX5_NIC_IFC_DISABLED) { + dev_err(&dev->pdev->dev, "NIC IFC still %d after %lums.\n", + mlx5_get_nic_state(dev), delay_ms); + } + + /* Release FW semaphore if you are the lock owner */ + if (!lock) + lock_sem_sw_reset(dev, false); + + mlx5_core_err(dev, "end\n"); + +unlock: + mutex_unlock(&dev->intf_state_mutex); +} + +static void mlx5_handle_bad_state(struct mlx5_core_dev *dev) +{ + u8 nic_interface = mlx5_get_nic_state(dev); + + switch (nic_interface) { + case MLX5_NIC_IFC_FULL: + mlx5_core_warn(dev, "Expected to see disabled NIC but it is full driver\n"); + break; + + case MLX5_NIC_IFC_DISABLED: + mlx5_core_warn(dev, "starting teardown\n"); + break; + + case MLX5_NIC_IFC_NO_DRAM_NIC: + mlx5_core_warn(dev, "Expected to see disabled NIC but it is no dram nic\n"); + break; + + case MLX5_NIC_IFC_SW_RESET: + /* The IFC mode field is 3 bits, so it will read 0x7 in 2 cases: + * 1. PCI has been disabled (ie. PCI-AER, PF driver unloaded + * and this is a VF), this is not recoverable by SW reset. + * Logging of this is handled elsewhere. + * 2. FW reset has been issued by another function, driver can + * be reloaded to recover after the mode switches to + * MLX5_NIC_IFC_DISABLED. + */ + if (dev->priv.health.fatal_error != MLX5_SENSOR_PCI_COMM_ERR) + mlx5_core_warn(dev, "NIC SW reset in progress\n"); + break; + + default: + mlx5_core_warn(dev, "Expected to see disabled NIC but it is has invalid value %d\n", + nic_interface); + } + + mlx5_disable_device(dev); +} + +int mlx5_health_wait_pci_up(struct mlx5_core_dev *dev) +{ + unsigned long end; + + end = jiffies + msecs_to_jiffies(mlx5_tout_ms(dev, FW_RESET)); + while (sensor_pci_not_working(dev)) { + if (time_after(jiffies, end)) + return -ETIMEDOUT; + msleep(100); + } + return 0; +} + +static int mlx5_health_try_recover(struct mlx5_core_dev *dev) +{ + mlx5_core_warn(dev, "handling bad device here\n"); + mlx5_handle_bad_state(dev); + if (mlx5_health_wait_pci_up(dev)) { + mlx5_core_err(dev, "health recovery flow aborted, PCI reads still not working\n"); + return -EIO; + } + mlx5_core_err(dev, "starting health recovery flow\n"); + if (mlx5_recover_device(dev) || mlx5_health_check_fatal_sensors(dev)) { + mlx5_core_err(dev, "health recovery failed\n"); + return -EIO; + } + + mlx5_core_info(dev, "health recovery succeeded\n"); + return 0; +} + +static const char *hsynd_str(u8 synd) +{ + switch (synd) { + case MLX5_HEALTH_SYNDR_FW_ERR: + return "firmware internal error"; + case MLX5_HEALTH_SYNDR_IRISC_ERR: + return "irisc not responding"; + case MLX5_HEALTH_SYNDR_HW_UNRECOVERABLE_ERR: + return "unrecoverable hardware error"; + case MLX5_HEALTH_SYNDR_CRC_ERR: + return "firmware CRC error"; + case MLX5_HEALTH_SYNDR_FETCH_PCI_ERR: + return "ICM fetch PCI error"; + case MLX5_HEALTH_SYNDR_HW_FTL_ERR: + return "HW fatal error\n"; + case MLX5_HEALTH_SYNDR_ASYNC_EQ_OVERRUN_ERR: + return "async EQ buffer overrun"; + case MLX5_HEALTH_SYNDR_EQ_ERR: + return "EQ error"; + case MLX5_HEALTH_SYNDR_EQ_INV: + return "Invalid EQ referenced"; + case MLX5_HEALTH_SYNDR_FFSER_ERR: + return "FFSER error"; + case MLX5_HEALTH_SYNDR_HIGH_TEMP: + return "High temperature"; + default: + return "unrecognized error"; + } +} + +static const char *mlx5_loglevel_str(int level) +{ + switch (level) { + case LOGLEVEL_EMERG: + return "EMERGENCY"; + case LOGLEVEL_ALERT: + return "ALERT"; + case LOGLEVEL_CRIT: + return "CRITICAL"; + case LOGLEVEL_ERR: + return "ERROR"; + case LOGLEVEL_WARNING: + return "WARNING"; + case LOGLEVEL_NOTICE: + return "NOTICE"; + case LOGLEVEL_INFO: + return "INFO"; + case LOGLEVEL_DEBUG: + return "DEBUG"; + } + return "Unknown log level"; +} + +static int mlx5_health_get_severity(u8 rfr_severity) +{ + return rfr_severity & MLX5_SEVERITY_VALID_MASK ? + rfr_severity & MLX5_SEVERITY_MASK : LOGLEVEL_ERR; +} + +static void print_health_info(struct mlx5_core_dev *dev) +{ + struct mlx5_core_health *health = &dev->priv.health; + struct health_buffer __iomem *h = health->health; + u8 rfr_severity; + int severity; + int i; + + /* If the syndrome is 0, the device is OK and no need to print buffer */ + if (!ioread8(&h->synd)) + return; + + if (ioread32be(&h->fw_ver) == 0xFFFFFFFF) { + mlx5_log(dev, LOGLEVEL_ERR, "PCI slot is unavailable\n"); + return; + } + + rfr_severity = ioread8(&h->rfr_severity); + severity = mlx5_health_get_severity(rfr_severity); + mlx5_log(dev, severity, "Health issue observed, %s, severity(%d) %s:\n", + hsynd_str(ioread8(&h->synd)), severity, mlx5_loglevel_str(severity)); + + for (i = 0; i < ARRAY_SIZE(h->assert_var); i++) + mlx5_log(dev, severity, "assert_var[%d] 0x%08x\n", i, + ioread32be(h->assert_var + i)); + + mlx5_log(dev, severity, "assert_exit_ptr 0x%08x\n", ioread32be(&h->assert_exit_ptr)); + mlx5_log(dev, severity, "assert_callra 0x%08x\n", ioread32be(&h->assert_callra)); + mlx5_log(dev, severity, "fw_ver %d.%d.%d", fw_rev_maj(dev), fw_rev_min(dev), + fw_rev_sub(dev)); + mlx5_log(dev, severity, "time %u\n", ioread32be(&h->time)); + mlx5_log(dev, severity, "hw_id 0x%08x\n", ioread32be(&h->hw_id)); + mlx5_log(dev, severity, "rfr %d\n", mlx5_health_get_rfr(rfr_severity)); + mlx5_log(dev, severity, "severity %d (%s)\n", severity, mlx5_loglevel_str(severity)); + mlx5_log(dev, severity, "irisc_index %d\n", ioread8(&h->irisc_index)); + mlx5_log(dev, severity, "synd 0x%x: %s\n", ioread8(&h->synd), + hsynd_str(ioread8(&h->synd))); + mlx5_log(dev, severity, "ext_synd 0x%04x\n", ioread16be(&h->ext_synd)); + mlx5_log(dev, severity, "raw fw_ver 0x%08x\n", ioread32be(&h->fw_ver)); +} + +static int +mlx5_fw_reporter_diagnose(struct devlink_health_reporter *reporter, + struct devlink_fmsg *fmsg, + struct netlink_ext_ack *extack) +{ + struct mlx5_core_dev *dev = devlink_health_reporter_priv(reporter); + struct mlx5_core_health *health = &dev->priv.health; + struct health_buffer __iomem *h = health->health; + u8 synd; + int err; + + synd = ioread8(&h->synd); + err = devlink_fmsg_u8_pair_put(fmsg, "Syndrome", synd); + if (err || !synd) + return err; + return devlink_fmsg_string_pair_put(fmsg, "Description", hsynd_str(synd)); +} + +struct mlx5_fw_reporter_ctx { + u8 err_synd; + int miss_counter; +}; + +static int +mlx5_fw_reporter_ctx_pairs_put(struct devlink_fmsg *fmsg, + struct mlx5_fw_reporter_ctx *fw_reporter_ctx) +{ + int err; + + err = devlink_fmsg_u8_pair_put(fmsg, "syndrome", + fw_reporter_ctx->err_synd); + if (err) + return err; + err = devlink_fmsg_u32_pair_put(fmsg, "fw_miss_counter", + fw_reporter_ctx->miss_counter); + if (err) + return err; + return 0; +} + +static int +mlx5_fw_reporter_heath_buffer_data_put(struct mlx5_core_dev *dev, + struct devlink_fmsg *fmsg) +{ + struct mlx5_core_health *health = &dev->priv.health; + struct health_buffer __iomem *h = health->health; + u8 rfr_severity; + int err; + int i; + + if (!ioread8(&h->synd)) + return 0; + + err = devlink_fmsg_pair_nest_start(fmsg, "health buffer"); + if (err) + return err; + err = devlink_fmsg_obj_nest_start(fmsg); + if (err) + return err; + err = devlink_fmsg_arr_pair_nest_start(fmsg, "assert_var"); + if (err) + return err; + + for (i = 0; i < ARRAY_SIZE(h->assert_var); i++) { + err = devlink_fmsg_u32_put(fmsg, ioread32be(h->assert_var + i)); + if (err) + return err; + } + err = devlink_fmsg_arr_pair_nest_end(fmsg); + if (err) + return err; + err = devlink_fmsg_u32_pair_put(fmsg, "assert_exit_ptr", + ioread32be(&h->assert_exit_ptr)); + if (err) + return err; + err = devlink_fmsg_u32_pair_put(fmsg, "assert_callra", + ioread32be(&h->assert_callra)); + if (err) + return err; + err = devlink_fmsg_u32_pair_put(fmsg, "time", ioread32be(&h->time)); + if (err) + return err; + err = devlink_fmsg_u32_pair_put(fmsg, "hw_id", ioread32be(&h->hw_id)); + if (err) + return err; + rfr_severity = ioread8(&h->rfr_severity); + err = devlink_fmsg_u8_pair_put(fmsg, "rfr", mlx5_health_get_rfr(rfr_severity)); + if (err) + return err; + err = devlink_fmsg_u8_pair_put(fmsg, "severity", mlx5_health_get_severity(rfr_severity)); + if (err) + return err; + err = devlink_fmsg_u8_pair_put(fmsg, "irisc_index", + ioread8(&h->irisc_index)); + if (err) + return err; + err = devlink_fmsg_u8_pair_put(fmsg, "synd", ioread8(&h->synd)); + if (err) + return err; + err = devlink_fmsg_u32_pair_put(fmsg, "ext_synd", + ioread16be(&h->ext_synd)); + if (err) + return err; + err = devlink_fmsg_u32_pair_put(fmsg, "raw_fw_ver", + ioread32be(&h->fw_ver)); + if (err) + return err; + err = devlink_fmsg_obj_nest_end(fmsg); + if (err) + return err; + return devlink_fmsg_pair_nest_end(fmsg); +} + +static int +mlx5_fw_reporter_dump(struct devlink_health_reporter *reporter, + struct devlink_fmsg *fmsg, void *priv_ctx, + struct netlink_ext_ack *extack) +{ + struct mlx5_core_dev *dev = devlink_health_reporter_priv(reporter); + int err; + + err = mlx5_fw_tracer_trigger_core_dump_general(dev); + if (err) + return err; + + if (priv_ctx) { + struct mlx5_fw_reporter_ctx *fw_reporter_ctx = priv_ctx; + + err = mlx5_fw_reporter_ctx_pairs_put(fmsg, fw_reporter_ctx); + if (err) + return err; + } + + err = mlx5_fw_reporter_heath_buffer_data_put(dev, fmsg); + if (err) + return err; + return mlx5_fw_tracer_get_saved_traces_objects(dev->tracer, fmsg); +} + +static void mlx5_fw_reporter_err_work(struct work_struct *work) +{ + struct mlx5_fw_reporter_ctx fw_reporter_ctx; + struct mlx5_core_health *health; + + health = container_of(work, struct mlx5_core_health, report_work); + + if (IS_ERR_OR_NULL(health->fw_reporter)) + return; + + fw_reporter_ctx.err_synd = health->synd; + fw_reporter_ctx.miss_counter = health->miss_counter; + if (fw_reporter_ctx.err_synd) { + devlink_health_report(health->fw_reporter, + "FW syndrome reported", &fw_reporter_ctx); + return; + } + if (fw_reporter_ctx.miss_counter) + devlink_health_report(health->fw_reporter, + "FW miss counter reported", + &fw_reporter_ctx); +} + +static const struct devlink_health_reporter_ops mlx5_fw_reporter_ops = { + .name = "fw", + .diagnose = mlx5_fw_reporter_diagnose, + .dump = mlx5_fw_reporter_dump, +}; + +static int +mlx5_fw_fatal_reporter_recover(struct devlink_health_reporter *reporter, + void *priv_ctx, + struct netlink_ext_ack *extack) +{ + struct mlx5_core_dev *dev = devlink_health_reporter_priv(reporter); + + return mlx5_health_try_recover(dev); +} + +static int +mlx5_fw_fatal_reporter_dump(struct devlink_health_reporter *reporter, + struct devlink_fmsg *fmsg, void *priv_ctx, + struct netlink_ext_ack *extack) +{ + struct mlx5_core_dev *dev = devlink_health_reporter_priv(reporter); + u32 crdump_size = dev->priv.health.crdump_size; + u32 *cr_data; + int err; + + if (!mlx5_core_is_pf(dev)) + return -EPERM; + + cr_data = kvmalloc(crdump_size, GFP_KERNEL); + if (!cr_data) + return -ENOMEM; + err = mlx5_crdump_collect(dev, cr_data); + if (err) + goto free_data; + + if (priv_ctx) { + struct mlx5_fw_reporter_ctx *fw_reporter_ctx = priv_ctx; + + err = mlx5_fw_reporter_ctx_pairs_put(fmsg, fw_reporter_ctx); + if (err) + goto free_data; + } + + err = devlink_fmsg_binary_pair_put(fmsg, "crdump_data", cr_data, crdump_size); + +free_data: + kvfree(cr_data); + return err; +} + +static void mlx5_fw_fatal_reporter_err_work(struct work_struct *work) +{ + struct mlx5_fw_reporter_ctx fw_reporter_ctx; + struct mlx5_core_health *health; + struct mlx5_core_dev *dev; + struct devlink *devlink; + struct mlx5_priv *priv; + + health = container_of(work, struct mlx5_core_health, fatal_report_work); + priv = container_of(health, struct mlx5_priv, health); + dev = container_of(priv, struct mlx5_core_dev, priv); + devlink = priv_to_devlink(dev); + + mutex_lock(&dev->intf_state_mutex); + if (test_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags)) { + mlx5_core_err(dev, "health works are not permitted at this stage\n"); + mutex_unlock(&dev->intf_state_mutex); + return; + } + mutex_unlock(&dev->intf_state_mutex); + enter_error_state(dev, false); + if (IS_ERR_OR_NULL(health->fw_fatal_reporter)) { + devl_lock(devlink); + if (mlx5_health_try_recover(dev)) + mlx5_core_err(dev, "health recovery failed\n"); + devl_unlock(devlink); + return; + } + fw_reporter_ctx.err_synd = health->synd; + fw_reporter_ctx.miss_counter = health->miss_counter; + if (devlink_health_report(health->fw_fatal_reporter, + "FW fatal error reported", &fw_reporter_ctx) == -ECANCELED) { + /* If recovery wasn't performed, due to grace period, + * unload the driver. This ensures that the driver + * closes all its resources and it is not subjected to + * requests from the kernel. + */ + mlx5_core_err(dev, "Driver is in error state. Unloading\n"); + mlx5_unload_one(dev, false); + } +} + +static const struct devlink_health_reporter_ops mlx5_fw_fatal_reporter_ops = { + .name = "fw_fatal", + .recover = mlx5_fw_fatal_reporter_recover, + .dump = mlx5_fw_fatal_reporter_dump, +}; + +#define MLX5_FW_REPORTER_ECPF_GRACEFUL_PERIOD 180000 +#define MLX5_FW_REPORTER_PF_GRACEFUL_PERIOD 60000 +#define MLX5_FW_REPORTER_VF_GRACEFUL_PERIOD 30000 +#define MLX5_FW_REPORTER_DEFAULT_GRACEFUL_PERIOD MLX5_FW_REPORTER_VF_GRACEFUL_PERIOD + +static void mlx5_fw_reporters_create(struct mlx5_core_dev *dev) +{ + struct mlx5_core_health *health = &dev->priv.health; + struct devlink *devlink = priv_to_devlink(dev); + u64 grace_period; + + if (mlx5_core_is_ecpf(dev)) { + grace_period = MLX5_FW_REPORTER_ECPF_GRACEFUL_PERIOD; + } else if (mlx5_core_is_pf(dev)) { + grace_period = MLX5_FW_REPORTER_PF_GRACEFUL_PERIOD; + } else { + /* VF or SF */ + grace_period = MLX5_FW_REPORTER_DEFAULT_GRACEFUL_PERIOD; + } + + health->fw_reporter = + devlink_health_reporter_create(devlink, &mlx5_fw_reporter_ops, + 0, dev); + if (IS_ERR(health->fw_reporter)) + mlx5_core_warn(dev, "Failed to create fw reporter, err = %ld\n", + PTR_ERR(health->fw_reporter)); + + health->fw_fatal_reporter = + devlink_health_reporter_create(devlink, + &mlx5_fw_fatal_reporter_ops, + grace_period, + dev); + if (IS_ERR(health->fw_fatal_reporter)) + mlx5_core_warn(dev, "Failed to create fw fatal reporter, err = %ld\n", + PTR_ERR(health->fw_fatal_reporter)); +} + +static void mlx5_fw_reporters_destroy(struct mlx5_core_dev *dev) +{ + struct mlx5_core_health *health = &dev->priv.health; + + if (!IS_ERR_OR_NULL(health->fw_reporter)) + devlink_health_reporter_destroy(health->fw_reporter); + + if (!IS_ERR_OR_NULL(health->fw_fatal_reporter)) + devlink_health_reporter_destroy(health->fw_fatal_reporter); +} + +static unsigned long get_next_poll_jiffies(struct mlx5_core_dev *dev) +{ + unsigned long next; + + get_random_bytes(&next, sizeof(next)); + next %= HZ; + next += jiffies + msecs_to_jiffies(mlx5_tout_ms(dev, HEALTH_POLL_INTERVAL)); + + return next; +} + +void mlx5_trigger_health_work(struct mlx5_core_dev *dev) +{ + struct mlx5_core_health *health = &dev->priv.health; + unsigned long flags; + + spin_lock_irqsave(&health->wq_lock, flags); + if (!test_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags)) + queue_work(health->wq, &health->fatal_report_work); + else + mlx5_core_err(dev, "new health works are not permitted at this stage\n"); + spin_unlock_irqrestore(&health->wq_lock, flags); +} + +#define MLX5_MSEC_PER_HOUR (MSEC_PER_SEC * 60 * 60) +static void mlx5_health_log_ts_update(struct work_struct *work) +{ + struct delayed_work *dwork = to_delayed_work(work); + u32 out[MLX5_ST_SZ_DW(mrtc_reg)] = {}; + u32 in[MLX5_ST_SZ_DW(mrtc_reg)] = {}; + struct mlx5_core_health *health; + struct mlx5_core_dev *dev; + struct mlx5_priv *priv; + u64 now_us; + + health = container_of(dwork, struct mlx5_core_health, update_fw_log_ts_work); + priv = container_of(health, struct mlx5_priv, health); + dev = container_of(priv, struct mlx5_core_dev, priv); + + now_us = ktime_to_us(ktime_get_real()); + + MLX5_SET(mrtc_reg, in, time_h, now_us >> 32); + MLX5_SET(mrtc_reg, in, time_l, now_us & 0xFFFFFFFF); + mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out), MLX5_REG_MRTC, 0, 1); + + queue_delayed_work(health->wq, &health->update_fw_log_ts_work, + msecs_to_jiffies(MLX5_MSEC_PER_HOUR)); +} + +static void poll_health(struct timer_list *t) +{ + struct mlx5_core_dev *dev = from_timer(dev, t, priv.health.timer); + struct mlx5_core_health *health = &dev->priv.health; + struct health_buffer __iomem *h = health->health; + u32 fatal_error; + u8 prev_synd; + u32 count; + + if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) + goto out; + + fatal_error = mlx5_health_check_fatal_sensors(dev); + + if (fatal_error && !health->fatal_error) { + mlx5_core_err(dev, "Fatal error %u detected\n", fatal_error); + dev->priv.health.fatal_error = fatal_error; + print_health_info(dev); + dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR; + mlx5_trigger_health_work(dev); + return; + } + + count = ioread32be(health->health_counter); + if (count == health->prev) + ++health->miss_counter; + else + health->miss_counter = 0; + + health->prev = count; + if (health->miss_counter == MAX_MISSES) { + mlx5_core_err(dev, "device's health compromised - reached miss count\n"); + print_health_info(dev); + queue_work(health->wq, &health->report_work); + } + + prev_synd = health->synd; + health->synd = ioread8(&h->synd); + if (health->synd && health->synd != prev_synd) + queue_work(health->wq, &health->report_work); + +out: + mod_timer(&health->timer, get_next_poll_jiffies(dev)); +} + +void mlx5_start_health_poll(struct mlx5_core_dev *dev) +{ + u64 poll_interval_ms = mlx5_tout_ms(dev, HEALTH_POLL_INTERVAL); + struct mlx5_core_health *health = &dev->priv.health; + + timer_setup(&health->timer, poll_health, 0); + health->fatal_error = MLX5_SENSOR_NO_ERR; + clear_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags); + health->health = &dev->iseg->health; + health->health_counter = &dev->iseg->health_counter; + + health->timer.expires = jiffies + msecs_to_jiffies(poll_interval_ms); + add_timer(&health->timer); +} + +void mlx5_stop_health_poll(struct mlx5_core_dev *dev, bool disable_health) +{ + struct mlx5_core_health *health = &dev->priv.health; + unsigned long flags; + + if (disable_health) { + spin_lock_irqsave(&health->wq_lock, flags); + set_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags); + spin_unlock_irqrestore(&health->wq_lock, flags); + } + + del_timer_sync(&health->timer); +} + +void mlx5_start_health_fw_log_up(struct mlx5_core_dev *dev) +{ + struct mlx5_core_health *health = &dev->priv.health; + + if (mlx5_core_is_pf(dev) && MLX5_CAP_MCAM_REG(dev, mrtc)) + queue_delayed_work(health->wq, &health->update_fw_log_ts_work, 0); +} + +void mlx5_drain_health_wq(struct mlx5_core_dev *dev) +{ + struct mlx5_core_health *health = &dev->priv.health; + unsigned long flags; + + spin_lock_irqsave(&health->wq_lock, flags); + set_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags); + spin_unlock_irqrestore(&health->wq_lock, flags); + cancel_delayed_work_sync(&health->update_fw_log_ts_work); + cancel_work_sync(&health->report_work); + cancel_work_sync(&health->fatal_report_work); +} + +void mlx5_health_cleanup(struct mlx5_core_dev *dev) +{ + struct mlx5_core_health *health = &dev->priv.health; + + cancel_delayed_work_sync(&health->update_fw_log_ts_work); + destroy_workqueue(health->wq); + mlx5_fw_reporters_destroy(dev); +} + +int mlx5_health_init(struct mlx5_core_dev *dev) +{ + struct mlx5_core_health *health; + char *name; + + mlx5_fw_reporters_create(dev); + + health = &dev->priv.health; + name = kmalloc(64, GFP_KERNEL); + if (!name) + goto out_err; + + strcpy(name, "mlx5_health"); + strcat(name, dev_name(dev->device)); + health->wq = create_singlethread_workqueue(name); + kfree(name); + if (!health->wq) + goto out_err; + spin_lock_init(&health->wq_lock); + INIT_WORK(&health->fatal_report_work, mlx5_fw_fatal_reporter_err_work); + INIT_WORK(&health->report_work, mlx5_fw_reporter_err_work); + INIT_DELAYED_WORK(&health->update_fw_log_ts_work, mlx5_health_log_ts_update); + + return 0; + +out_err: + mlx5_fw_reporters_destroy(dev); + return -ENOMEM; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c new file mode 100644 index 000000000..e09518f88 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c @@ -0,0 +1,294 @@ +/* + * Copyright (c) 2017, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "en.h" +#include "ipoib.h" +#include "en/fs_ethtool.h" + +static void mlx5i_get_drvinfo(struct net_device *dev, + struct ethtool_drvinfo *drvinfo) +{ + struct mlx5e_priv *priv = mlx5i_epriv(dev); + + mlx5e_ethtool_get_drvinfo(priv, drvinfo); + strscpy(drvinfo->driver, KBUILD_MODNAME "[ib_ipoib]", + sizeof(drvinfo->driver)); +} + +static void mlx5i_get_strings(struct net_device *dev, u32 stringset, u8 *data) +{ + struct mlx5e_priv *priv = mlx5i_epriv(dev); + + mlx5e_ethtool_get_strings(priv, stringset, data); +} + +static int mlx5i_get_sset_count(struct net_device *dev, int sset) +{ + struct mlx5e_priv *priv = mlx5i_epriv(dev); + + return mlx5e_ethtool_get_sset_count(priv, sset); +} + +static void mlx5i_get_ethtool_stats(struct net_device *dev, + struct ethtool_stats *stats, + u64 *data) +{ + struct mlx5e_priv *priv = mlx5i_epriv(dev); + + mlx5e_ethtool_get_ethtool_stats(priv, stats, data); +} + +static int mlx5i_set_ringparam(struct net_device *dev, + struct ethtool_ringparam *param, + struct kernel_ethtool_ringparam *kernel_param, + struct netlink_ext_ack *extack) +{ + struct mlx5e_priv *priv = mlx5i_epriv(dev); + + return mlx5e_ethtool_set_ringparam(priv, param); +} + +static void mlx5i_get_ringparam(struct net_device *dev, + struct ethtool_ringparam *param, + struct kernel_ethtool_ringparam *kernel_param, + struct netlink_ext_ack *extack) +{ + struct mlx5e_priv *priv = mlx5i_epriv(dev); + + mlx5e_ethtool_get_ringparam(priv, param, kernel_param); +} + +static int mlx5i_set_channels(struct net_device *dev, + struct ethtool_channels *ch) +{ + struct mlx5i_priv *ipriv = netdev_priv(dev); + struct mlx5e_priv *epriv = mlx5i_epriv(dev); + + /* rtnl lock protects from race between this ethtool op and sub + * interface ndo_init/uninit. + */ + ASSERT_RTNL(); + if (ipriv->num_sub_interfaces > 0) { + mlx5_core_warn(epriv->mdev, + "can't change number of channels for interfaces with sub interfaces (%u)\n", + ipriv->num_sub_interfaces); + return -EINVAL; + } + + return mlx5e_ethtool_set_channels(epriv, ch); +} + +static void mlx5i_get_channels(struct net_device *dev, + struct ethtool_channels *ch) +{ + struct mlx5e_priv *priv = mlx5i_epriv(dev); + + mlx5e_ethtool_get_channels(priv, ch); +} + +static int mlx5i_set_coalesce(struct net_device *netdev, + struct ethtool_coalesce *coal, + struct kernel_ethtool_coalesce *kernel_coal, + struct netlink_ext_ack *extack) +{ + struct mlx5e_priv *priv = mlx5i_epriv(netdev); + + return mlx5e_ethtool_set_coalesce(priv, coal, kernel_coal, extack); +} + +static int mlx5i_get_coalesce(struct net_device *netdev, + struct ethtool_coalesce *coal, + struct kernel_ethtool_coalesce *kernel_coal, + struct netlink_ext_ack *extack) +{ + struct mlx5e_priv *priv = mlx5i_epriv(netdev); + + return mlx5e_ethtool_get_coalesce(priv, coal, kernel_coal); +} + +static int mlx5i_get_ts_info(struct net_device *netdev, + struct ethtool_ts_info *info) +{ + struct mlx5e_priv *priv = mlx5i_epriv(netdev); + + return mlx5e_ethtool_get_ts_info(priv, info); +} + +static int mlx5i_flash_device(struct net_device *netdev, + struct ethtool_flash *flash) +{ + struct mlx5e_priv *priv = mlx5i_epriv(netdev); + + return mlx5e_ethtool_flash_device(priv, flash); +} + +static inline int mlx5_ptys_width_enum_to_int(enum mlx5_ptys_width width) +{ + switch (width) { + case MLX5_PTYS_WIDTH_1X: return 1; + case MLX5_PTYS_WIDTH_2X: return 2; + case MLX5_PTYS_WIDTH_4X: return 4; + case MLX5_PTYS_WIDTH_8X: return 8; + case MLX5_PTYS_WIDTH_12X: return 12; + default: return -1; + } +} + +enum mlx5_ptys_rate { + MLX5_PTYS_RATE_SDR = 1 << 0, + MLX5_PTYS_RATE_DDR = 1 << 1, + MLX5_PTYS_RATE_QDR = 1 << 2, + MLX5_PTYS_RATE_FDR10 = 1 << 3, + MLX5_PTYS_RATE_FDR = 1 << 4, + MLX5_PTYS_RATE_EDR = 1 << 5, + MLX5_PTYS_RATE_HDR = 1 << 6, + MLX5_PTYS_RATE_NDR = 1 << 7, +}; + +static inline int mlx5_ptys_rate_enum_to_int(enum mlx5_ptys_rate rate) +{ + switch (rate) { + case MLX5_PTYS_RATE_SDR: return 2500; + case MLX5_PTYS_RATE_DDR: return 5000; + case MLX5_PTYS_RATE_QDR: + case MLX5_PTYS_RATE_FDR10: return 10000; + case MLX5_PTYS_RATE_FDR: return 14000; + case MLX5_PTYS_RATE_EDR: return 25000; + case MLX5_PTYS_RATE_HDR: return 50000; + case MLX5_PTYS_RATE_NDR: return 100000; + default: return -1; + } +} + +static u32 mlx5i_get_speed_settings(u16 ib_link_width_oper, u16 ib_proto_oper) +{ + int rate, width; + + rate = mlx5_ptys_rate_enum_to_int(ib_proto_oper); + if (rate < 0) + return SPEED_UNKNOWN; + width = mlx5_ptys_width_enum_to_int(ib_link_width_oper); + if (width < 0) + return SPEED_UNKNOWN; + + return rate * width; +} + +static int mlx5i_get_link_ksettings(struct net_device *netdev, + struct ethtool_link_ksettings *link_ksettings) +{ + struct mlx5e_priv *priv = mlx5i_epriv(netdev); + struct mlx5_core_dev *mdev = priv->mdev; + u16 ib_link_width_oper; + u16 ib_proto_oper; + int speed, ret; + + ret = mlx5_query_ib_port_oper(mdev, &ib_link_width_oper, &ib_proto_oper, + 1); + if (ret) + return ret; + + ethtool_link_ksettings_zero_link_mode(link_ksettings, supported); + ethtool_link_ksettings_zero_link_mode(link_ksettings, advertising); + + speed = mlx5i_get_speed_settings(ib_link_width_oper, ib_proto_oper); + link_ksettings->base.speed = speed; + link_ksettings->base.duplex = speed == SPEED_UNKNOWN ? DUPLEX_UNKNOWN : DUPLEX_FULL; + + link_ksettings->base.port = PORT_OTHER; + + link_ksettings->base.autoneg = AUTONEG_DISABLE; + + return 0; +} + +static u32 mlx5i_flow_type_mask(u32 flow_type) +{ + return flow_type & ~(FLOW_EXT | FLOW_MAC_EXT | FLOW_RSS); +} + +static int mlx5i_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd) +{ + struct mlx5e_priv *priv = mlx5i_epriv(dev); + struct ethtool_rx_flow_spec *fs = &cmd->fs; + + if (mlx5i_flow_type_mask(fs->flow_type) == ETHER_FLOW) + return -EINVAL; + + return mlx5e_ethtool_set_rxnfc(priv, cmd); +} + +static int mlx5i_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info, + u32 *rule_locs) +{ + struct mlx5e_priv *priv = mlx5i_epriv(dev); + + /* ETHTOOL_GRXRINGS is needed by ethtool -x which is not part + * of rxnfc. We keep this logic out of mlx5e_ethtool_get_rxnfc, + * to avoid breaking "ethtool -x" when mlx5e_ethtool_get_rxnfc + * is compiled out via CONFIG_MLX5_EN_RXNFC=n. + */ + if (info->cmd == ETHTOOL_GRXRINGS) { + info->data = priv->channels.params.num_channels; + return 0; + } + + return mlx5e_ethtool_get_rxnfc(priv, info, rule_locs); +} + +const struct ethtool_ops mlx5i_ethtool_ops = { + .supported_coalesce_params = ETHTOOL_COALESCE_USECS | + ETHTOOL_COALESCE_MAX_FRAMES | + ETHTOOL_COALESCE_USE_ADAPTIVE, + .get_drvinfo = mlx5i_get_drvinfo, + .get_strings = mlx5i_get_strings, + .get_sset_count = mlx5i_get_sset_count, + .get_ethtool_stats = mlx5i_get_ethtool_stats, + .get_ringparam = mlx5i_get_ringparam, + .set_ringparam = mlx5i_set_ringparam, + .flash_device = mlx5i_flash_device, + .get_channels = mlx5i_get_channels, + .set_channels = mlx5i_set_channels, + .get_coalesce = mlx5i_get_coalesce, + .set_coalesce = mlx5i_set_coalesce, + .get_ts_info = mlx5i_get_ts_info, + .get_rxnfc = mlx5i_get_rxnfc, + .set_rxnfc = mlx5i_set_rxnfc, + .get_link_ksettings = mlx5i_get_link_ksettings, + .get_link = ethtool_op_get_link, +}; + +const struct ethtool_ops mlx5i_pkey_ethtool_ops = { + .get_drvinfo = mlx5i_get_drvinfo, + .get_link = ethtool_op_get_link, + .get_ts_info = mlx5i_get_ts_info, +}; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c new file mode 100644 index 000000000..aed4e8961 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c @@ -0,0 +1,842 @@ +/* + * Copyright (c) 2017, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <rdma/ib_verbs.h> +#include <linux/mlx5/fs.h> +#include "en.h" +#include "en/params.h" +#include "ipoib.h" +#include "en/fs_ethtool.h" + +#define IB_DEFAULT_Q_KEY 0xb1b +#define MLX5I_PARAMS_DEFAULT_LOG_RQ_SIZE 9 + +static int mlx5i_open(struct net_device *netdev); +static int mlx5i_close(struct net_device *netdev); +static int mlx5i_change_mtu(struct net_device *netdev, int new_mtu); + +static const struct net_device_ops mlx5i_netdev_ops = { + .ndo_open = mlx5i_open, + .ndo_stop = mlx5i_close, + .ndo_get_stats64 = mlx5i_get_stats, + .ndo_init = mlx5i_dev_init, + .ndo_uninit = mlx5i_dev_cleanup, + .ndo_change_mtu = mlx5i_change_mtu, + .ndo_eth_ioctl = mlx5i_ioctl, +}; + +/* IPoIB mlx5 netdev profile */ +static void mlx5i_build_nic_params(struct mlx5_core_dev *mdev, + struct mlx5e_params *params) +{ + /* Override RQ params as IPoIB supports only LINKED LIST RQ for now */ + MLX5E_SET_PFLAG(params, MLX5E_PFLAG_RX_STRIDING_RQ, false); + mlx5e_set_rq_type(mdev, params); + mlx5e_init_rq_type_params(mdev, params); + + /* RQ size in ipoib by default is 512 */ + params->log_rq_mtu_frames = is_kdump_kernel() ? + MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE : + MLX5I_PARAMS_DEFAULT_LOG_RQ_SIZE; + + params->packet_merge.type = MLX5E_PACKET_MERGE_NONE; + params->hard_mtu = MLX5_IB_GRH_BYTES + MLX5_IPOIB_HARD_LEN; + + /* CQE compression is not supported for IPoIB */ + params->rx_cqe_compress_def = false; + MLX5E_SET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS, params->rx_cqe_compress_def); +} + +/* Called directly after IPoIB netdevice was created to initialize SW structs */ +int mlx5i_init(struct mlx5_core_dev *mdev, struct net_device *netdev) +{ + struct mlx5e_priv *priv = mlx5i_epriv(netdev); + + netif_carrier_off(netdev); + mlx5e_set_netdev_mtu_boundaries(priv); + netdev->mtu = netdev->max_mtu; + + mlx5e_build_nic_params(priv, NULL, netdev->mtu); + mlx5i_build_nic_params(mdev, &priv->channels.params); + + mlx5e_timestamp_init(priv); + + /* netdev init */ + netdev->hw_features |= NETIF_F_SG; + netdev->hw_features |= NETIF_F_IP_CSUM; + netdev->hw_features |= NETIF_F_IPV6_CSUM; + netdev->hw_features |= NETIF_F_GRO; + netdev->hw_features |= NETIF_F_TSO; + netdev->hw_features |= NETIF_F_TSO6; + netdev->hw_features |= NETIF_F_RXCSUM; + netdev->hw_features |= NETIF_F_RXHASH; + + netdev->netdev_ops = &mlx5i_netdev_ops; + netdev->ethtool_ops = &mlx5i_ethtool_ops; + + return 0; +} + +/* Called directly before IPoIB netdevice is destroyed to cleanup SW structs */ +void mlx5i_cleanup(struct mlx5e_priv *priv) +{ + mlx5e_priv_cleanup(priv); +} + +static void mlx5i_grp_sw_update_stats(struct mlx5e_priv *priv) +{ + struct rtnl_link_stats64 s = {}; + int i, j; + + for (i = 0; i < priv->stats_nch; i++) { + struct mlx5e_channel_stats *channel_stats; + struct mlx5e_rq_stats *rq_stats; + + channel_stats = priv->channel_stats[i]; + rq_stats = &channel_stats->rq; + + s.rx_packets += rq_stats->packets; + s.rx_bytes += rq_stats->bytes; + + for (j = 0; j < priv->max_opened_tc; j++) { + struct mlx5e_sq_stats *sq_stats = &channel_stats->sq[j]; + + s.tx_packets += sq_stats->packets; + s.tx_bytes += sq_stats->bytes; + s.tx_dropped += sq_stats->dropped; + } + } + + memset(&priv->stats.sw, 0, sizeof(s)); + + priv->stats.sw.rx_packets = s.rx_packets; + priv->stats.sw.rx_bytes = s.rx_bytes; + priv->stats.sw.tx_packets = s.tx_packets; + priv->stats.sw.tx_bytes = s.tx_bytes; + priv->stats.sw.tx_queue_dropped = s.tx_dropped; +} + +void mlx5i_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats) +{ + struct mlx5e_priv *priv = mlx5i_epriv(dev); + struct mlx5e_sw_stats *sstats = &priv->stats.sw; + + mlx5i_grp_sw_update_stats(priv); + + stats->rx_packets = sstats->rx_packets; + stats->rx_bytes = sstats->rx_bytes; + stats->tx_packets = sstats->tx_packets; + stats->tx_bytes = sstats->tx_bytes; + stats->tx_dropped = sstats->tx_queue_dropped; +} + +struct net_device *mlx5i_parent_get(struct net_device *netdev) +{ + struct mlx5e_priv *priv = mlx5i_epriv(netdev); + struct mlx5i_priv *ipriv, *parent_ipriv; + struct net_device *parent_dev; + int parent_ifindex; + + ipriv = priv->ppriv; + + parent_ifindex = netdev->netdev_ops->ndo_get_iflink(netdev); + parent_dev = dev_get_by_index(dev_net(netdev), parent_ifindex); + if (!parent_dev) + return NULL; + + parent_ipriv = netdev_priv(parent_dev); + + ASSERT_RTNL(); + parent_ipriv->num_sub_interfaces++; + + ipriv->parent_dev = parent_dev; + + return parent_dev; +} + +void mlx5i_parent_put(struct net_device *netdev) +{ + struct mlx5e_priv *priv = mlx5i_epriv(netdev); + struct mlx5i_priv *ipriv, *parent_ipriv; + + ipriv = priv->ppriv; + parent_ipriv = netdev_priv(ipriv->parent_dev); + + ASSERT_RTNL(); + parent_ipriv->num_sub_interfaces--; + + dev_put(ipriv->parent_dev); +} + +int mlx5i_init_underlay_qp(struct mlx5e_priv *priv) +{ + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5i_priv *ipriv = priv->ppriv; + int ret; + + { + u32 in[MLX5_ST_SZ_DW(rst2init_qp_in)] = {}; + u32 *qpc; + + qpc = MLX5_ADDR_OF(rst2init_qp_in, in, qpc); + + MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED); + MLX5_SET(qpc, qpc, primary_address_path.pkey_index, + ipriv->pkey_index); + MLX5_SET(qpc, qpc, primary_address_path.vhca_port_num, 1); + MLX5_SET(qpc, qpc, q_key, IB_DEFAULT_Q_KEY); + + MLX5_SET(rst2init_qp_in, in, opcode, MLX5_CMD_OP_RST2INIT_QP); + MLX5_SET(rst2init_qp_in, in, qpn, ipriv->qpn); + ret = mlx5_cmd_exec_in(mdev, rst2init_qp, in); + if (ret) + goto err_qp_modify_to_err; + } + { + u32 in[MLX5_ST_SZ_DW(init2rtr_qp_in)] = {}; + + MLX5_SET(init2rtr_qp_in, in, opcode, MLX5_CMD_OP_INIT2RTR_QP); + MLX5_SET(init2rtr_qp_in, in, qpn, ipriv->qpn); + ret = mlx5_cmd_exec_in(mdev, init2rtr_qp, in); + if (ret) + goto err_qp_modify_to_err; + } + { + u32 in[MLX5_ST_SZ_DW(rtr2rts_qp_in)] = {}; + + MLX5_SET(rtr2rts_qp_in, in, opcode, MLX5_CMD_OP_RTR2RTS_QP); + MLX5_SET(rtr2rts_qp_in, in, qpn, ipriv->qpn); + ret = mlx5_cmd_exec_in(mdev, rtr2rts_qp, in); + if (ret) + goto err_qp_modify_to_err; + } + return 0; + +err_qp_modify_to_err: + { + u32 in[MLX5_ST_SZ_DW(qp_2err_in)] = {}; + + MLX5_SET(qp_2err_in, in, opcode, MLX5_CMD_OP_2ERR_QP); + MLX5_SET(qp_2err_in, in, qpn, ipriv->qpn); + mlx5_cmd_exec_in(mdev, qp_2err, in); + } + return ret; +} + +void mlx5i_uninit_underlay_qp(struct mlx5e_priv *priv) +{ + struct mlx5i_priv *ipriv = priv->ppriv; + struct mlx5_core_dev *mdev = priv->mdev; + u32 in[MLX5_ST_SZ_DW(qp_2rst_in)] = {}; + + MLX5_SET(qp_2rst_in, in, opcode, MLX5_CMD_OP_2RST_QP); + MLX5_SET(qp_2rst_in, in, qpn, ipriv->qpn); + mlx5_cmd_exec_in(mdev, qp_2rst, in); +} + +#define MLX5_QP_ENHANCED_ULP_STATELESS_MODE 2 + +int mlx5i_create_underlay_qp(struct mlx5e_priv *priv) +{ + const unsigned char *dev_addr = priv->netdev->dev_addr; + u32 out[MLX5_ST_SZ_DW(create_qp_out)] = {}; + u32 in[MLX5_ST_SZ_DW(create_qp_in)] = {}; + struct mlx5i_priv *ipriv = priv->ppriv; + void *addr_path; + int qpn = 0; + int ret = 0; + void *qpc; + + if (MLX5_CAP_GEN(priv->mdev, mkey_by_name)) { + qpn = (dev_addr[1] << 16) + (dev_addr[2] << 8) + dev_addr[3]; + MLX5_SET(create_qp_in, in, input_qpn, qpn); + } + + qpc = MLX5_ADDR_OF(create_qp_in, in, qpc); + MLX5_SET(qpc, qpc, ts_format, mlx5_get_qp_default_ts(priv->mdev)); + MLX5_SET(qpc, qpc, st, MLX5_QP_ST_UD); + MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED); + MLX5_SET(qpc, qpc, ulp_stateless_offload_mode, + MLX5_QP_ENHANCED_ULP_STATELESS_MODE); + + addr_path = MLX5_ADDR_OF(qpc, qpc, primary_address_path); + MLX5_SET(ads, addr_path, vhca_port_num, 1); + MLX5_SET(ads, addr_path, grh, 1); + + MLX5_SET(create_qp_in, in, opcode, MLX5_CMD_OP_CREATE_QP); + ret = mlx5_cmd_exec_inout(priv->mdev, create_qp, in, out); + if (ret) + return ret; + + ipriv->qpn = MLX5_GET(create_qp_out, out, qpn); + + return 0; +} + +void mlx5i_destroy_underlay_qp(struct mlx5_core_dev *mdev, u32 qpn) +{ + u32 in[MLX5_ST_SZ_DW(destroy_qp_in)] = {}; + + MLX5_SET(destroy_qp_in, in, opcode, MLX5_CMD_OP_DESTROY_QP); + MLX5_SET(destroy_qp_in, in, qpn, qpn); + mlx5_cmd_exec_in(mdev, destroy_qp, in); +} + +int mlx5i_update_nic_rx(struct mlx5e_priv *priv) +{ + return mlx5e_refresh_tirs(priv, true, true); +} + +int mlx5i_create_tis(struct mlx5_core_dev *mdev, u32 underlay_qpn, u32 *tisn) +{ + u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {}; + void *tisc; + + tisc = MLX5_ADDR_OF(create_tis_in, in, ctx); + + MLX5_SET(tisc, tisc, underlay_qpn, underlay_qpn); + + return mlx5e_create_tis(mdev, in, tisn); +} + +static int mlx5i_init_tx(struct mlx5e_priv *priv) +{ + struct mlx5i_priv *ipriv = priv->ppriv; + int err; + + err = mlx5i_create_underlay_qp(priv); + if (err) { + mlx5_core_warn(priv->mdev, "create underlay QP failed, %d\n", err); + return err; + } + + err = mlx5i_create_tis(priv->mdev, ipriv->qpn, &priv->tisn[0][0]); + if (err) { + mlx5_core_warn(priv->mdev, "create tis failed, %d\n", err); + goto err_destroy_underlay_qp; + } + + return 0; + +err_destroy_underlay_qp: + mlx5i_destroy_underlay_qp(priv->mdev, ipriv->qpn); + return err; +} + +static void mlx5i_cleanup_tx(struct mlx5e_priv *priv) +{ + struct mlx5i_priv *ipriv = priv->ppriv; + + mlx5e_destroy_tis(priv->mdev, priv->tisn[0][0]); + mlx5i_destroy_underlay_qp(priv->mdev, ipriv->qpn); +} + +static int mlx5i_create_flow_steering(struct mlx5e_priv *priv) +{ + struct mlx5_flow_namespace *ns = + mlx5_get_flow_namespace(priv->mdev, MLX5_FLOW_NAMESPACE_KERNEL); + int err; + + + if (!ns) + return -EINVAL; + + mlx5e_fs_set_ns(priv->fs, ns, false); + err = mlx5e_arfs_create_tables(priv->fs, priv->rx_res, + !!(priv->netdev->hw_features & NETIF_F_NTUPLE)); + if (err) { + netdev_err(priv->netdev, "Failed to create arfs tables, err=%d\n", + err); + priv->netdev->hw_features &= ~NETIF_F_NTUPLE; + } + + err = mlx5e_create_ttc_table(priv->fs, priv->rx_res); + if (err) { + netdev_err(priv->netdev, "Failed to create ttc table, err=%d\n", + err); + goto err_destroy_arfs_tables; + } + + mlx5e_ethtool_init_steering(priv->fs); + + return 0; + +err_destroy_arfs_tables: + mlx5e_arfs_destroy_tables(priv->fs, + !!(priv->netdev->hw_features & NETIF_F_NTUPLE)); + + return err; +} + +static void mlx5i_destroy_flow_steering(struct mlx5e_priv *priv) +{ + mlx5e_destroy_ttc_table(priv->fs); + mlx5e_arfs_destroy_tables(priv->fs, + !!(priv->netdev->hw_features & NETIF_F_NTUPLE)); + mlx5e_ethtool_cleanup_steering(priv->fs); +} + +static int mlx5i_init_rx(struct mlx5e_priv *priv) +{ + struct mlx5_core_dev *mdev = priv->mdev; + int err; + + priv->fs = mlx5e_fs_init(priv->profile, mdev, + !test_bit(MLX5E_STATE_DESTROYING, &priv->state)); + if (!priv->fs) { + netdev_err(priv->netdev, "FS allocation failed\n"); + return -ENOMEM; + } + + priv->rx_res = mlx5e_rx_res_alloc(); + if (!priv->rx_res) { + err = -ENOMEM; + goto err_free_fs; + } + + mlx5e_create_q_counters(priv); + + err = mlx5e_open_drop_rq(priv, &priv->drop_rq); + if (err) { + mlx5_core_err(mdev, "open drop rq failed, %d\n", err); + goto err_destroy_q_counters; + } + + err = mlx5e_rx_res_init(priv->rx_res, priv->mdev, 0, + priv->max_nch, priv->drop_rq.rqn, + &priv->channels.params.packet_merge, + priv->channels.params.num_channels); + if (err) + goto err_close_drop_rq; + + err = mlx5i_create_flow_steering(priv); + if (err) + goto err_destroy_rx_res; + + return 0; + +err_destroy_rx_res: + mlx5e_rx_res_destroy(priv->rx_res); +err_close_drop_rq: + mlx5e_close_drop_rq(&priv->drop_rq); +err_destroy_q_counters: + mlx5e_destroy_q_counters(priv); + mlx5e_rx_res_free(priv->rx_res); + priv->rx_res = NULL; +err_free_fs: + mlx5e_fs_cleanup(priv->fs); + return err; +} + +static void mlx5i_cleanup_rx(struct mlx5e_priv *priv) +{ + mlx5i_destroy_flow_steering(priv); + mlx5e_rx_res_destroy(priv->rx_res); + mlx5e_close_drop_rq(&priv->drop_rq); + mlx5e_destroy_q_counters(priv); + mlx5e_rx_res_free(priv->rx_res); + priv->rx_res = NULL; + mlx5e_fs_cleanup(priv->fs); +} + +/* The stats groups order is opposite to the update_stats() order calls */ +static mlx5e_stats_grp_t mlx5i_stats_grps[] = { + &MLX5E_STATS_GRP(sw), + &MLX5E_STATS_GRP(qcnt), + &MLX5E_STATS_GRP(vnic_env), + &MLX5E_STATS_GRP(vport), + &MLX5E_STATS_GRP(802_3), + &MLX5E_STATS_GRP(2863), + &MLX5E_STATS_GRP(2819), + &MLX5E_STATS_GRP(phy), + &MLX5E_STATS_GRP(pcie), + &MLX5E_STATS_GRP(per_prio), + &MLX5E_STATS_GRP(pme), + &MLX5E_STATS_GRP(channels), + &MLX5E_STATS_GRP(per_port_buff_congest), +}; + +static unsigned int mlx5i_stats_grps_num(struct mlx5e_priv *priv) +{ + return ARRAY_SIZE(mlx5i_stats_grps); +} + +static const struct mlx5e_profile mlx5i_nic_profile = { + .init = mlx5i_init, + .cleanup = mlx5i_cleanup, + .init_tx = mlx5i_init_tx, + .cleanup_tx = mlx5i_cleanup_tx, + .init_rx = mlx5i_init_rx, + .cleanup_rx = mlx5i_cleanup_rx, + .enable = NULL, /* mlx5i_enable */ + .disable = NULL, /* mlx5i_disable */ + .update_rx = mlx5i_update_nic_rx, + .update_stats = NULL, /* mlx5i_update_stats */ + .update_carrier = NULL, /* no HW update in IB link */ + .rx_handlers = &mlx5i_rx_handlers, + .max_tc = MLX5I_MAX_NUM_TC, + .stats_grps = mlx5i_stats_grps, + .stats_grps_num = mlx5i_stats_grps_num, +}; + +/* mlx5i netdev NDos */ + +static int mlx5i_change_mtu(struct net_device *netdev, int new_mtu) +{ + struct mlx5e_priv *priv = mlx5i_epriv(netdev); + struct mlx5e_params new_params; + int err = 0; + + mutex_lock(&priv->state_lock); + + new_params = priv->channels.params; + new_params.sw_mtu = new_mtu; + + err = mlx5e_safe_switch_params(priv, &new_params, NULL, NULL, true); + if (err) + goto out; + + netdev->mtu = new_params.sw_mtu; + +out: + mutex_unlock(&priv->state_lock); + return err; +} + +int mlx5i_dev_init(struct net_device *dev) +{ + struct mlx5e_priv *priv = mlx5i_epriv(dev); + struct mlx5i_priv *ipriv = priv->ppriv; + u8 addr_mod[3]; + + /* Set dev address using underlay QP */ + addr_mod[0] = (ipriv->qpn >> 16) & 0xff; + addr_mod[1] = (ipriv->qpn >> 8) & 0xff; + addr_mod[2] = (ipriv->qpn) & 0xff; + dev_addr_mod(dev, 1, addr_mod, sizeof(addr_mod)); + + /* Add QPN to net-device mapping to HT */ + mlx5i_pkey_add_qpn(dev, ipriv->qpn); + + return 0; +} + +int mlx5i_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) +{ + struct mlx5e_priv *priv = mlx5i_epriv(dev); + + switch (cmd) { + case SIOCSHWTSTAMP: + return mlx5e_hwstamp_set(priv, ifr); + case SIOCGHWTSTAMP: + return mlx5e_hwstamp_get(priv, ifr); + default: + return -EOPNOTSUPP; + } +} + +void mlx5i_dev_cleanup(struct net_device *dev) +{ + struct mlx5e_priv *priv = mlx5i_epriv(dev); + struct mlx5i_priv *ipriv = priv->ppriv; + + mlx5i_uninit_underlay_qp(priv); + + /* Delete QPN to net-device mapping from HT */ + mlx5i_pkey_del_qpn(dev, ipriv->qpn); +} + +static int mlx5i_open(struct net_device *netdev) +{ + struct mlx5e_priv *epriv = mlx5i_epriv(netdev); + struct mlx5i_priv *ipriv = epriv->ppriv; + struct mlx5_core_dev *mdev = epriv->mdev; + int err; + + mutex_lock(&epriv->state_lock); + + set_bit(MLX5E_STATE_OPENED, &epriv->state); + + err = mlx5i_init_underlay_qp(epriv); + if (err) { + mlx5_core_warn(mdev, "prepare underlay qp state failed, %d\n", err); + goto err_clear_state_opened_flag; + } + + err = mlx5_fs_add_rx_underlay_qpn(mdev, ipriv->qpn); + if (err) { + mlx5_core_warn(mdev, "attach underlay qp to ft failed, %d\n", err); + goto err_reset_qp; + } + + err = mlx5e_open_channels(epriv, &epriv->channels); + if (err) + goto err_remove_fs_underlay_qp; + + epriv->profile->update_rx(epriv); + mlx5e_activate_priv_channels(epriv); + + mutex_unlock(&epriv->state_lock); + return 0; + +err_remove_fs_underlay_qp: + mlx5_fs_remove_rx_underlay_qpn(mdev, ipriv->qpn); +err_reset_qp: + mlx5i_uninit_underlay_qp(epriv); +err_clear_state_opened_flag: + clear_bit(MLX5E_STATE_OPENED, &epriv->state); + mutex_unlock(&epriv->state_lock); + return err; +} + +static int mlx5i_close(struct net_device *netdev) +{ + struct mlx5e_priv *epriv = mlx5i_epriv(netdev); + struct mlx5i_priv *ipriv = epriv->ppriv; + struct mlx5_core_dev *mdev = epriv->mdev; + + /* May already be CLOSED in case a previous configuration operation + * (e.g RX/TX queue size change) that involves close&open failed. + */ + mutex_lock(&epriv->state_lock); + + if (!test_bit(MLX5E_STATE_OPENED, &epriv->state)) + goto unlock; + + clear_bit(MLX5E_STATE_OPENED, &epriv->state); + + netif_carrier_off(epriv->netdev); + mlx5_fs_remove_rx_underlay_qpn(mdev, ipriv->qpn); + mlx5e_deactivate_priv_channels(epriv); + mlx5e_close_channels(&epriv->channels); + mlx5i_uninit_underlay_qp(epriv); +unlock: + mutex_unlock(&epriv->state_lock); + return 0; +} + +/* IPoIB RDMA netdev callbacks */ +static int mlx5i_attach_mcast(struct net_device *netdev, struct ib_device *hca, + union ib_gid *gid, u16 lid, int set_qkey, + u32 qkey) +{ + struct mlx5e_priv *epriv = mlx5i_epriv(netdev); + struct mlx5_core_dev *mdev = epriv->mdev; + struct mlx5i_priv *ipriv = epriv->ppriv; + int err; + + mlx5_core_dbg(mdev, "attaching QPN 0x%x, MGID %pI6\n", ipriv->qpn, + gid->raw); + err = mlx5_core_attach_mcg(mdev, gid, ipriv->qpn); + if (err) + mlx5_core_warn(mdev, "failed attaching QPN 0x%x, MGID %pI6\n", + ipriv->qpn, gid->raw); + + if (set_qkey) { + mlx5_core_dbg(mdev, "%s setting qkey 0x%x\n", + netdev->name, qkey); + ipriv->qkey = qkey; + } + + return err; +} + +static int mlx5i_detach_mcast(struct net_device *netdev, struct ib_device *hca, + union ib_gid *gid, u16 lid) +{ + struct mlx5e_priv *epriv = mlx5i_epriv(netdev); + struct mlx5_core_dev *mdev = epriv->mdev; + struct mlx5i_priv *ipriv = epriv->ppriv; + int err; + + mlx5_core_dbg(mdev, "detaching QPN 0x%x, MGID %pI6\n", ipriv->qpn, + gid->raw); + + err = mlx5_core_detach_mcg(mdev, gid, ipriv->qpn); + if (err) + mlx5_core_dbg(mdev, "failed detaching QPN 0x%x, MGID %pI6\n", + ipriv->qpn, gid->raw); + + return err; +} + +static int mlx5i_xmit(struct net_device *dev, struct sk_buff *skb, + struct ib_ah *address, u32 dqpn) +{ + struct mlx5e_priv *epriv = mlx5i_epriv(dev); + struct mlx5e_txqsq *sq = epriv->txq2sq[skb_get_queue_mapping(skb)]; + struct mlx5_ib_ah *mah = to_mah(address); + struct mlx5i_priv *ipriv = epriv->ppriv; + + mlx5i_sq_xmit(sq, skb, &mah->av, dqpn, ipriv->qkey, netdev_xmit_more()); + + return NETDEV_TX_OK; +} + +static void mlx5i_set_pkey_index(struct net_device *netdev, int id) +{ + struct mlx5i_priv *ipriv = netdev_priv(netdev); + + ipriv->pkey_index = (u16)id; +} + +static int mlx5i_check_required_hca_cap(struct mlx5_core_dev *mdev) +{ + if (MLX5_CAP_GEN(mdev, port_type) != MLX5_CAP_PORT_TYPE_IB) + return -EOPNOTSUPP; + + if (!MLX5_CAP_GEN(mdev, ipoib_enhanced_offloads)) { + mlx5_core_warn(mdev, "IPoIB enhanced offloads are not supported\n"); + return -EOPNOTSUPP; + } + + return 0; +} + +static void mlx5_rdma_netdev_free(struct net_device *netdev) +{ + struct mlx5e_priv *priv = mlx5i_epriv(netdev); + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5i_priv *ipriv = priv->ppriv; + const struct mlx5e_profile *profile = priv->profile; + + mlx5e_detach_netdev(priv); + profile->cleanup(priv); + + if (!ipriv->sub_interface) { + mlx5i_pkey_qpn_ht_cleanup(netdev); + mlx5e_destroy_mdev_resources(mdev); + } +} + +static bool mlx5_is_sub_interface(struct mlx5_core_dev *mdev) +{ + return mdev->mlx5e_res.hw_objs.pdn != 0; +} + +static const struct mlx5e_profile *mlx5_get_profile(struct mlx5_core_dev *mdev) +{ + if (mlx5_is_sub_interface(mdev)) + return mlx5i_pkey_get_profile(); + return &mlx5i_nic_profile; +} + +static int mlx5_rdma_setup_rn(struct ib_device *ibdev, u32 port_num, + struct net_device *netdev, void *param) +{ + struct mlx5_core_dev *mdev = (struct mlx5_core_dev *)param; + const struct mlx5e_profile *prof = mlx5_get_profile(mdev); + struct mlx5i_priv *ipriv; + struct mlx5e_priv *epriv; + struct rdma_netdev *rn; + int err; + + ipriv = netdev_priv(netdev); + epriv = mlx5i_epriv(netdev); + + ipriv->sub_interface = mlx5_is_sub_interface(mdev); + if (!ipriv->sub_interface) { + err = mlx5i_pkey_qpn_ht_init(netdev); + if (err) { + mlx5_core_warn(mdev, "allocate qpn_to_netdev ht failed\n"); + return err; + } + + /* This should only be called once per mdev */ + err = mlx5e_create_mdev_resources(mdev); + if (err) + goto destroy_ht; + } + + err = mlx5e_priv_init(epriv, prof, netdev, mdev); + if (err) + goto destroy_mdev_resources; + + epriv->profile = prof; + epriv->ppriv = ipriv; + + prof->init(mdev, netdev); + + err = mlx5e_attach_netdev(epriv); + if (err) + goto detach; + netif_carrier_off(netdev); + + /* set rdma_netdev func pointers */ + rn = &ipriv->rn; + rn->hca = ibdev; + rn->send = mlx5i_xmit; + rn->attach_mcast = mlx5i_attach_mcast; + rn->detach_mcast = mlx5i_detach_mcast; + rn->set_id = mlx5i_set_pkey_index; + + netdev->priv_destructor = mlx5_rdma_netdev_free; + netdev->needs_free_netdev = 1; + + return 0; + +detach: + prof->cleanup(epriv); + if (ipriv->sub_interface) + return err; +destroy_mdev_resources: + mlx5e_destroy_mdev_resources(mdev); +destroy_ht: + mlx5i_pkey_qpn_ht_cleanup(netdev); + return err; +} + +int mlx5_rdma_rn_get_params(struct mlx5_core_dev *mdev, + struct ib_device *device, + struct rdma_netdev_alloc_params *params) +{ + int nch; + int rc; + + rc = mlx5i_check_required_hca_cap(mdev); + if (rc) + return rc; + + nch = mlx5e_get_max_num_channels(mdev); + + *params = (struct rdma_netdev_alloc_params){ + .sizeof_priv = sizeof(struct mlx5i_priv) + + sizeof(struct mlx5e_priv), + .txqs = nch * MLX5E_MAX_NUM_TC, + .rxqs = nch, + .param = mdev, + .initialize_rdma_netdev = mlx5_rdma_setup_rn, + }; + + return 0; +} +EXPORT_SYMBOL(mlx5_rdma_rn_get_params); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h new file mode 100644 index 000000000..f3f2af972 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h @@ -0,0 +1,127 @@ +/* + * Copyright (c) 2017, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __MLX5E_IPOB_H__ +#define __MLX5E_IPOB_H__ + +#ifdef CONFIG_MLX5_CORE_IPOIB + +#include <linux/mlx5/fs.h> +#include "en.h" + +#define MLX5I_MAX_NUM_TC 1 + +extern const struct ethtool_ops mlx5i_ethtool_ops; +extern const struct ethtool_ops mlx5i_pkey_ethtool_ops; +extern const struct mlx5e_rx_handlers mlx5i_rx_handlers; + +#define MLX5_IB_GRH_BYTES 40 +#define MLX5_IPOIB_ENCAP_LEN 4 +#define MLX5_IPOIB_PSEUDO_LEN 20 +#define MLX5_IPOIB_HARD_LEN (MLX5_IPOIB_PSEUDO_LEN + MLX5_IPOIB_ENCAP_LEN) + +/* ipoib rdma netdev's private data structure */ +struct mlx5i_priv { + struct rdma_netdev rn; /* keep this first */ + u32 qpn; + bool sub_interface; + u32 num_sub_interfaces; + u32 qkey; + u16 pkey_index; + struct mlx5i_pkey_qpn_ht *qpn_htbl; + struct net_device *parent_dev; + char *mlx5e_priv[]; +}; + +int mlx5i_create_tis(struct mlx5_core_dev *mdev, u32 underlay_qpn, u32 *tisn); + +/* Underlay QP create/destroy functions */ +int mlx5i_create_underlay_qp(struct mlx5e_priv *priv); +void mlx5i_destroy_underlay_qp(struct mlx5_core_dev *mdev, u32 qpn); + +/* Underlay QP state modification init/uninit functions */ +int mlx5i_init_underlay_qp(struct mlx5e_priv *priv); +void mlx5i_uninit_underlay_qp(struct mlx5e_priv *priv); + +/* Allocate/Free underlay QPN to net-device hash table */ +int mlx5i_pkey_qpn_ht_init(struct net_device *netdev); +void mlx5i_pkey_qpn_ht_cleanup(struct net_device *netdev); + +/* Add/Remove an underlay QPN to net-device mapping to/from the hash table */ +int mlx5i_pkey_add_qpn(struct net_device *netdev, u32 qpn); +int mlx5i_pkey_del_qpn(struct net_device *netdev, u32 qpn); + +/* Get the net-device corresponding to the given underlay QPN */ +struct net_device *mlx5i_pkey_get_netdev(struct net_device *netdev, u32 qpn); + +/* Shared ndo functions */ +int mlx5i_dev_init(struct net_device *dev); +void mlx5i_dev_cleanup(struct net_device *dev); +int mlx5i_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd); + +/* Parent profile functions */ +int mlx5i_init(struct mlx5_core_dev *mdev, struct net_device *netdev); +void mlx5i_cleanup(struct mlx5e_priv *priv); + +int mlx5i_update_nic_rx(struct mlx5e_priv *priv); + +/* Get child interface nic profile */ +const struct mlx5e_profile *mlx5i_pkey_get_profile(void); + +/* Extract mlx5e_priv from IPoIB netdev */ +#define mlx5i_epriv(netdev) ((void *)(((struct mlx5i_priv *)netdev_priv(netdev))->mlx5e_priv)) + +struct mlx5_wqe_eth_pad { + u8 rsvd0[16]; +}; + +struct mlx5i_tx_wqe { + struct mlx5_wqe_ctrl_seg ctrl; + struct mlx5_wqe_datagram_seg datagram; + struct mlx5_wqe_eth_pad pad; + struct mlx5_wqe_eth_seg eth; + struct mlx5_wqe_data_seg data[]; +}; + +#define MLX5I_SQ_FETCH_WQE(sq, pi) \ + ((struct mlx5i_tx_wqe *)mlx5e_fetch_wqe(&(sq)->wq, pi, sizeof(struct mlx5i_tx_wqe))) + +void mlx5i_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb, + struct mlx5_av *av, u32 dqpn, u32 dqkey, bool xmit_more); +void mlx5i_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats); + +/* Reference management for child to parent interfaces. */ +struct net_device *mlx5i_parent_get(struct net_device *netdev); +void mlx5i_parent_put(struct net_device *netdev); + +#endif /* CONFIG_MLX5_CORE_IPOIB */ +#endif /* __MLX5E_IPOB_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c new file mode 100644 index 000000000..0cf4eaf85 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c @@ -0,0 +1,365 @@ +/* + * Copyright (c) 2017, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/hash.h> +#include "ipoib.h" + +#define MLX5I_MAX_LOG_PKEY_SUP 7 + +struct qpn_to_netdev { + struct net_device *netdev; + struct hlist_node hlist; + u32 underlay_qpn; +}; + +struct mlx5i_pkey_qpn_ht { + struct hlist_head buckets[1 << MLX5I_MAX_LOG_PKEY_SUP]; + spinlock_t ht_lock; /* Synchronise with NAPI */ +}; + +int mlx5i_pkey_qpn_ht_init(struct net_device *netdev) +{ + struct mlx5i_priv *ipriv = netdev_priv(netdev); + struct mlx5i_pkey_qpn_ht *qpn_htbl; + + qpn_htbl = kzalloc(sizeof(*qpn_htbl), GFP_KERNEL); + if (!qpn_htbl) + return -ENOMEM; + + ipriv->qpn_htbl = qpn_htbl; + spin_lock_init(&qpn_htbl->ht_lock); + + return 0; +} + +void mlx5i_pkey_qpn_ht_cleanup(struct net_device *netdev) +{ + struct mlx5i_priv *ipriv = netdev_priv(netdev); + + kfree(ipriv->qpn_htbl); +} + +static struct qpn_to_netdev *mlx5i_find_qpn_to_netdev_node(struct hlist_head *buckets, + u32 qpn) +{ + struct hlist_head *h = &buckets[hash_32(qpn, MLX5I_MAX_LOG_PKEY_SUP)]; + struct qpn_to_netdev *node; + + hlist_for_each_entry(node, h, hlist) { + if (node->underlay_qpn == qpn) + return node; + } + + return NULL; +} + +int mlx5i_pkey_add_qpn(struct net_device *netdev, u32 qpn) +{ + struct mlx5i_priv *ipriv = netdev_priv(netdev); + struct mlx5i_pkey_qpn_ht *ht = ipriv->qpn_htbl; + u8 key = hash_32(qpn, MLX5I_MAX_LOG_PKEY_SUP); + struct qpn_to_netdev *new_node; + + new_node = kzalloc(sizeof(*new_node), GFP_KERNEL); + if (!new_node) + return -ENOMEM; + + new_node->netdev = netdev; + new_node->underlay_qpn = qpn; + spin_lock_bh(&ht->ht_lock); + hlist_add_head(&new_node->hlist, &ht->buckets[key]); + spin_unlock_bh(&ht->ht_lock); + + return 0; +} + +int mlx5i_pkey_del_qpn(struct net_device *netdev, u32 qpn) +{ + struct mlx5e_priv *epriv = mlx5i_epriv(netdev); + struct mlx5i_priv *ipriv = epriv->ppriv; + struct mlx5i_pkey_qpn_ht *ht = ipriv->qpn_htbl; + struct qpn_to_netdev *node; + + node = mlx5i_find_qpn_to_netdev_node(ht->buckets, qpn); + if (!node) { + mlx5_core_warn(epriv->mdev, "QPN to netdev delete from HT failed\n"); + return -EINVAL; + } + + spin_lock_bh(&ht->ht_lock); + hlist_del_init(&node->hlist); + spin_unlock_bh(&ht->ht_lock); + kfree(node); + + return 0; +} + +struct net_device *mlx5i_pkey_get_netdev(struct net_device *netdev, u32 qpn) +{ + struct mlx5i_priv *ipriv = netdev_priv(netdev); + struct qpn_to_netdev *node; + + node = mlx5i_find_qpn_to_netdev_node(ipriv->qpn_htbl->buckets, qpn); + if (!node) + return NULL; + + return node->netdev; +} + +static int mlx5i_pkey_open(struct net_device *netdev); +static int mlx5i_pkey_close(struct net_device *netdev); +static int mlx5i_pkey_dev_init(struct net_device *dev); +static void mlx5i_pkey_dev_cleanup(struct net_device *netdev); +static int mlx5i_pkey_change_mtu(struct net_device *netdev, int new_mtu); +static int mlx5i_pkey_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd); + +static const struct net_device_ops mlx5i_pkey_netdev_ops = { + .ndo_open = mlx5i_pkey_open, + .ndo_stop = mlx5i_pkey_close, + .ndo_init = mlx5i_pkey_dev_init, + .ndo_get_stats64 = mlx5i_get_stats, + .ndo_uninit = mlx5i_pkey_dev_cleanup, + .ndo_change_mtu = mlx5i_pkey_change_mtu, + .ndo_eth_ioctl = mlx5i_pkey_ioctl, +}; + +/* Child NDOs */ +static int mlx5i_pkey_dev_init(struct net_device *dev) +{ + struct mlx5e_priv *priv = mlx5i_epriv(dev); + struct mlx5i_priv *ipriv, *parent_ipriv; + struct net_device *parent_dev; + + ipriv = priv->ppriv; + + /* Link to parent */ + parent_dev = mlx5i_parent_get(dev); + if (!parent_dev) { + mlx5_core_warn(priv->mdev, "failed to get parent device\n"); + return -EINVAL; + } + + if (dev->num_rx_queues < parent_dev->real_num_rx_queues) { + mlx5_core_warn(priv->mdev, + "failed to create child device with rx queues [%d] less than parent's [%d]\n", + dev->num_rx_queues, + parent_dev->real_num_rx_queues); + mlx5i_parent_put(dev); + return -EINVAL; + } + + /* Get QPN to netdevice hash table from parent */ + parent_ipriv = netdev_priv(parent_dev); + ipriv->qpn_htbl = parent_ipriv->qpn_htbl; + + return mlx5i_dev_init(dev); +} + +static int mlx5i_pkey_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) +{ + return mlx5i_ioctl(dev, ifr, cmd); +} + +static void mlx5i_pkey_dev_cleanup(struct net_device *netdev) +{ + mlx5i_parent_put(netdev); + return mlx5i_dev_cleanup(netdev); +} + +static int mlx5i_pkey_open(struct net_device *netdev) +{ + struct mlx5e_priv *epriv = mlx5i_epriv(netdev); + struct mlx5i_priv *ipriv = epriv->ppriv; + struct mlx5_core_dev *mdev = epriv->mdev; + int err; + + mutex_lock(&epriv->state_lock); + + set_bit(MLX5E_STATE_OPENED, &epriv->state); + + err = mlx5i_init_underlay_qp(epriv); + if (err) { + mlx5_core_warn(mdev, "prepare child underlay qp state failed, %d\n", err); + goto err_release_lock; + } + + err = mlx5_fs_add_rx_underlay_qpn(mdev, ipriv->qpn); + if (err) { + mlx5_core_warn(mdev, "attach child underlay qp to ft failed, %d\n", err); + goto err_unint_underlay_qp; + } + + err = mlx5i_create_tis(mdev, ipriv->qpn, &epriv->tisn[0][0]); + if (err) { + mlx5_core_warn(mdev, "create child tis failed, %d\n", err); + goto err_remove_rx_uderlay_qp; + } + + err = mlx5e_open_channels(epriv, &epriv->channels); + if (err) { + mlx5_core_warn(mdev, "opening child channels failed, %d\n", err); + goto err_clear_state_opened_flag; + } + epriv->profile->update_rx(epriv); + mlx5e_activate_priv_channels(epriv); + mutex_unlock(&epriv->state_lock); + + return 0; + +err_clear_state_opened_flag: + mlx5e_destroy_tis(mdev, epriv->tisn[0][0]); +err_remove_rx_uderlay_qp: + mlx5_fs_remove_rx_underlay_qpn(mdev, ipriv->qpn); +err_unint_underlay_qp: + mlx5i_uninit_underlay_qp(epriv); +err_release_lock: + clear_bit(MLX5E_STATE_OPENED, &epriv->state); + mutex_unlock(&epriv->state_lock); + return err; +} + +static int mlx5i_pkey_close(struct net_device *netdev) +{ + struct mlx5e_priv *priv = mlx5i_epriv(netdev); + struct mlx5i_priv *ipriv = priv->ppriv; + struct mlx5_core_dev *mdev = priv->mdev; + + mutex_lock(&priv->state_lock); + + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) + goto unlock; + + clear_bit(MLX5E_STATE_OPENED, &priv->state); + + netif_carrier_off(priv->netdev); + mlx5_fs_remove_rx_underlay_qpn(mdev, ipriv->qpn); + mlx5i_uninit_underlay_qp(priv); + mlx5e_deactivate_priv_channels(priv); + mlx5e_close_channels(&priv->channels); + mlx5e_destroy_tis(mdev, priv->tisn[0][0]); +unlock: + mutex_unlock(&priv->state_lock); + return 0; +} + +static int mlx5i_pkey_change_mtu(struct net_device *netdev, int new_mtu) +{ + struct mlx5e_priv *priv = mlx5i_epriv(netdev); + + mutex_lock(&priv->state_lock); + netdev->mtu = new_mtu; + mutex_unlock(&priv->state_lock); + + return 0; +} + +/* Called directly after IPoIB netdevice was created to initialize SW structs */ +static int mlx5i_pkey_init(struct mlx5_core_dev *mdev, + struct net_device *netdev) +{ + struct mlx5e_priv *priv = mlx5i_epriv(netdev); + int err; + + err = mlx5i_init(mdev, netdev); + if (err) + return err; + + /* Override parent ndo */ + netdev->netdev_ops = &mlx5i_pkey_netdev_ops; + + /* Set child limited ethtool support */ + netdev->ethtool_ops = &mlx5i_pkey_ethtool_ops; + + /* Use dummy rqs */ + priv->channels.params.log_rq_mtu_frames = MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE; + + return 0; +} + +/* Called directly before IPoIB netdevice is destroyed to cleanup SW structs */ +static void mlx5i_pkey_cleanup(struct mlx5e_priv *priv) +{ + mlx5i_cleanup(priv); +} + +static int mlx5i_pkey_init_tx(struct mlx5e_priv *priv) +{ + int err; + + err = mlx5i_create_underlay_qp(priv); + if (err) + mlx5_core_warn(priv->mdev, "create child underlay QP failed, %d\n", err); + + return err; +} + +static void mlx5i_pkey_cleanup_tx(struct mlx5e_priv *priv) +{ + struct mlx5i_priv *ipriv = priv->ppriv; + + mlx5i_destroy_underlay_qp(priv->mdev, ipriv->qpn); +} + +static int mlx5i_pkey_init_rx(struct mlx5e_priv *priv) +{ + /* Since the rx resources are shared between child and parent, the + * parent interface is taking care of rx resource allocation and init + */ + return 0; +} + +static void mlx5i_pkey_cleanup_rx(struct mlx5e_priv *priv) +{ + /* Since the rx resources are shared between child and parent, the + * parent interface is taking care of rx resource free and de-init + */ +} + +static const struct mlx5e_profile mlx5i_pkey_nic_profile = { + .init = mlx5i_pkey_init, + .cleanup = mlx5i_pkey_cleanup, + .init_tx = mlx5i_pkey_init_tx, + .cleanup_tx = mlx5i_pkey_cleanup_tx, + .init_rx = mlx5i_pkey_init_rx, + .cleanup_rx = mlx5i_pkey_cleanup_rx, + .enable = NULL, + .disable = NULL, + .update_rx = mlx5i_update_nic_rx, + .update_stats = NULL, + .rx_handlers = &mlx5i_rx_handlers, + .max_tc = MLX5I_MAX_NUM_TC, +}; + +const struct mlx5e_profile *mlx5i_pkey_get_profile(void) +{ + return &mlx5i_pkey_nic_profile; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/irq_affinity.c b/drivers/net/ethernet/mellanox/mlx5/core/irq_affinity.c new file mode 100644 index 000000000..380a208ab --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/irq_affinity.c @@ -0,0 +1,226 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#include "mlx5_core.h" +#include "mlx5_irq.h" +#include "pci_irq.h" + +static void cpu_put(struct mlx5_irq_pool *pool, int cpu) +{ + pool->irqs_per_cpu[cpu]--; +} + +static void cpu_get(struct mlx5_irq_pool *pool, int cpu) +{ + pool->irqs_per_cpu[cpu]++; +} + +/* Gets the least loaded CPU. e.g.: the CPU with least IRQs bound to it */ +static int cpu_get_least_loaded(struct mlx5_irq_pool *pool, + const struct cpumask *req_mask) +{ + int best_cpu = -1; + int cpu; + + for_each_cpu_and(cpu, req_mask, cpu_online_mask) { + /* CPU has zero IRQs on it. No need to search any more CPUs. */ + if (!pool->irqs_per_cpu[cpu]) { + best_cpu = cpu; + break; + } + if (best_cpu < 0) + best_cpu = cpu; + if (pool->irqs_per_cpu[cpu] < pool->irqs_per_cpu[best_cpu]) + best_cpu = cpu; + } + if (best_cpu == -1) { + /* There isn't online CPUs in req_mask */ + mlx5_core_err(pool->dev, "NO online CPUs in req_mask (%*pbl)\n", + cpumask_pr_args(req_mask)); + best_cpu = cpumask_first(cpu_online_mask); + } + pool->irqs_per_cpu[best_cpu]++; + return best_cpu; +} + +/* Creating an IRQ from irq_pool */ +static struct mlx5_irq * +irq_pool_request_irq(struct mlx5_irq_pool *pool, const struct cpumask *req_mask) +{ + cpumask_var_t auto_mask; + struct mlx5_irq *irq; + u32 irq_index; + int err; + + if (!zalloc_cpumask_var(&auto_mask, GFP_KERNEL)) + return ERR_PTR(-ENOMEM); + err = xa_alloc(&pool->irqs, &irq_index, NULL, pool->xa_num_irqs, GFP_KERNEL); + if (err) + return ERR_PTR(err); + if (pool->irqs_per_cpu) { + if (cpumask_weight(req_mask) > 1) + /* if req_mask contain more then one CPU, set the least loadad CPU + * of req_mask + */ + cpumask_set_cpu(cpu_get_least_loaded(pool, req_mask), auto_mask); + else + cpu_get(pool, cpumask_first(req_mask)); + } + irq = mlx5_irq_alloc(pool, irq_index, cpumask_empty(auto_mask) ? req_mask : auto_mask); + free_cpumask_var(auto_mask); + return irq; +} + +/* Looking for the IRQ with the smallest refcount that fits req_mask. + * If pool is sf_comp_pool, then we are looking for an IRQ with any of the + * requested CPUs in req_mask. + * for example: req_mask = 0xf, irq0_mask = 0x10, irq1_mask = 0x1. irq0_mask + * isn't subset of req_mask, so we will skip it. irq1_mask is subset of req_mask, + * we don't skip it. + * If pool is sf_ctrl_pool, then all IRQs have the same mask, so any IRQ will + * fit. And since mask is subset of itself, we will pass the first if bellow. + */ +static struct mlx5_irq * +irq_pool_find_least_loaded(struct mlx5_irq_pool *pool, const struct cpumask *req_mask) +{ + int start = pool->xa_num_irqs.min; + int end = pool->xa_num_irqs.max; + struct mlx5_irq *irq = NULL; + struct mlx5_irq *iter; + int irq_refcount = 0; + unsigned long index; + + lockdep_assert_held(&pool->lock); + xa_for_each_range(&pool->irqs, index, iter, start, end) { + struct cpumask *iter_mask = mlx5_irq_get_affinity_mask(iter); + int iter_refcount = mlx5_irq_read_locked(iter); + + if (!cpumask_subset(iter_mask, req_mask)) + /* skip IRQs with a mask which is not subset of req_mask */ + continue; + if (iter_refcount < pool->min_threshold) + /* If we found an IRQ with less than min_thres, return it */ + return iter; + if (!irq || iter_refcount < irq_refcount) { + /* In case we won't find an IRQ with less than min_thres, + * keep a pointer to the least used IRQ + */ + irq_refcount = iter_refcount; + irq = iter; + } + } + return irq; +} + +/** + * mlx5_irq_affinity_request - request an IRQ according to the given mask. + * @pool: IRQ pool to request from. + * @req_mask: cpumask requested for this IRQ. + * + * This function returns a pointer to IRQ, or ERR_PTR in case of error. + */ +struct mlx5_irq * +mlx5_irq_affinity_request(struct mlx5_irq_pool *pool, const struct cpumask *req_mask) +{ + struct mlx5_irq *least_loaded_irq, *new_irq; + + mutex_lock(&pool->lock); + least_loaded_irq = irq_pool_find_least_loaded(pool, req_mask); + if (least_loaded_irq && + mlx5_irq_read_locked(least_loaded_irq) < pool->min_threshold) + goto out; + /* We didn't find an IRQ with less than min_thres, try to allocate a new IRQ */ + new_irq = irq_pool_request_irq(pool, req_mask); + if (IS_ERR(new_irq)) { + if (!least_loaded_irq) { + /* We failed to create an IRQ and we didn't find an IRQ */ + mlx5_core_err(pool->dev, "Didn't find a matching IRQ. err = %ld\n", + PTR_ERR(new_irq)); + mutex_unlock(&pool->lock); + return new_irq; + } + /* We failed to create a new IRQ for the requested affinity, + * sharing existing IRQ. + */ + goto out; + } + least_loaded_irq = new_irq; + goto unlock; +out: + mlx5_irq_get_locked(least_loaded_irq); + if (mlx5_irq_read_locked(least_loaded_irq) > pool->max_threshold) + mlx5_core_dbg(pool->dev, "IRQ %u overloaded, pool_name: %s, %u EQs on this irq\n", + pci_irq_vector(pool->dev->pdev, + mlx5_irq_get_index(least_loaded_irq)), pool->name, + mlx5_irq_read_locked(least_loaded_irq) / MLX5_EQ_REFS_PER_IRQ); +unlock: + mutex_unlock(&pool->lock); + return least_loaded_irq; +} + +void mlx5_irq_affinity_irqs_release(struct mlx5_core_dev *dev, struct mlx5_irq **irqs, + int num_irqs) +{ + struct mlx5_irq_pool *pool = mlx5_irq_pool_get(dev); + int i; + + for (i = 0; i < num_irqs; i++) { + int cpu = cpumask_first(mlx5_irq_get_affinity_mask(irqs[i])); + + synchronize_irq(pci_irq_vector(pool->dev->pdev, + mlx5_irq_get_index(irqs[i]))); + if (mlx5_irq_put(irqs[i])) + if (pool->irqs_per_cpu) + cpu_put(pool, cpu); + } +} + +/** + * mlx5_irq_affinity_irqs_request_auto - request one or more IRQs for mlx5 device. + * @dev: mlx5 device that is requesting the IRQs. + * @nirqs: number of IRQs to request. + * @irqs: an output array of IRQs pointers. + * + * Each IRQ is bounded to at most 1 CPU. + * This function is requesting IRQs according to the default assignment. + * The default assignment policy is: + * - in each iteration, request the least loaded IRQ which is not bound to any + * CPU of the previous IRQs requested. + * + * This function returns the number of IRQs requested, (which might be smaller than + * @nirqs), if successful, or a negative error code in case of an error. + */ +int mlx5_irq_affinity_irqs_request_auto(struct mlx5_core_dev *dev, int nirqs, + struct mlx5_irq **irqs) +{ + struct mlx5_irq_pool *pool = mlx5_irq_pool_get(dev); + cpumask_var_t req_mask; + struct mlx5_irq *irq; + int i = 0; + + if (!zalloc_cpumask_var(&req_mask, GFP_KERNEL)) + return -ENOMEM; + cpumask_copy(req_mask, cpu_online_mask); + for (i = 0; i < nirqs; i++) { + if (mlx5_irq_pool_is_sf_pool(pool)) + irq = mlx5_irq_affinity_request(pool, req_mask); + else + /* In case SF pool doesn't exists, fallback to the PF IRQs. + * The PF IRQs are already allocated and binded to CPU + * at this point. Hence, only an index is needed. + */ + irq = mlx5_irq_request(dev, i, NULL); + if (IS_ERR(irq)) + break; + irqs[i] = irq; + cpumask_clear_cpu(cpumask_first(mlx5_irq_get_affinity_mask(irq)), req_mask); + mlx5_core_dbg(pool->dev, "IRQ %u mapped to cpu %*pbl, %u EQs on this irq\n", + pci_irq_vector(dev->pdev, mlx5_irq_get_index(irq)), + cpumask_pr_args(mlx5_irq_get_affinity_mask(irq)), + mlx5_irq_read_locked(irq) / MLX5_EQ_REFS_PER_IRQ); + } + free_cpumask_var(req_mask); + if (!i) + return PTR_ERR(irq); + return i; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/debugfs.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/debugfs.c new file mode 100644 index 000000000..b8feaf0f5 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/debugfs.c @@ -0,0 +1,182 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#include "lag.h" + +static char *get_str_mode_type(struct mlx5_lag *ldev) +{ + switch (ldev->mode) { + case MLX5_LAG_MODE_ROCE: return "roce"; + case MLX5_LAG_MODE_SRIOV: return "switchdev"; + case MLX5_LAG_MODE_MULTIPATH: return "multipath"; + case MLX5_LAG_MODE_MPESW: return "multiport_eswitch"; + default: return "invalid"; + } + + return NULL; +} + +static int type_show(struct seq_file *file, void *priv) +{ + struct mlx5_core_dev *dev = file->private; + struct mlx5_lag *ldev; + char *mode = NULL; + + ldev = dev->priv.lag; + mutex_lock(&ldev->lock); + if (__mlx5_lag_is_active(ldev)) + mode = get_str_mode_type(ldev); + mutex_unlock(&ldev->lock); + if (!mode) + return -EINVAL; + seq_printf(file, "%s\n", mode); + + return 0; +} + +static int port_sel_mode_show(struct seq_file *file, void *priv) +{ + struct mlx5_core_dev *dev = file->private; + struct mlx5_lag *ldev; + int ret = 0; + char *mode; + + ldev = dev->priv.lag; + mutex_lock(&ldev->lock); + if (__mlx5_lag_is_active(ldev)) + mode = mlx5_get_str_port_sel_mode(ldev->mode, ldev->mode_flags); + else + ret = -EINVAL; + mutex_unlock(&ldev->lock); + if (ret) + return ret; + + seq_printf(file, "%s\n", mode); + return 0; +} + +static int state_show(struct seq_file *file, void *priv) +{ + struct mlx5_core_dev *dev = file->private; + struct mlx5_lag *ldev; + bool active; + + ldev = dev->priv.lag; + mutex_lock(&ldev->lock); + active = __mlx5_lag_is_active(ldev); + mutex_unlock(&ldev->lock); + seq_printf(file, "%s\n", active ? "active" : "disabled"); + return 0; +} + +static int flags_show(struct seq_file *file, void *priv) +{ + struct mlx5_core_dev *dev = file->private; + bool fdb_sel_mode_native; + struct mlx5_lag *ldev; + bool shared_fdb; + bool lag_active; + + ldev = dev->priv.lag; + mutex_lock(&ldev->lock); + lag_active = __mlx5_lag_is_active(ldev); + if (!lag_active) + goto unlock; + + shared_fdb = test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &ldev->mode_flags); + fdb_sel_mode_native = test_bit(MLX5_LAG_MODE_FLAG_FDB_SEL_MODE_NATIVE, + &ldev->mode_flags); + +unlock: + mutex_unlock(&ldev->lock); + if (!lag_active) + return -EINVAL; + + seq_printf(file, "%s:%s\n", "shared_fdb", shared_fdb ? "on" : "off"); + seq_printf(file, "%s:%s\n", "fdb_selection_mode", + fdb_sel_mode_native ? "native" : "affinity"); + return 0; +} + +static int mapping_show(struct seq_file *file, void *priv) +{ + struct mlx5_core_dev *dev = file->private; + u8 ports[MLX5_MAX_PORTS] = {}; + struct mlx5_lag *ldev; + bool hash = false; + bool lag_active; + int num_ports; + int i; + + ldev = dev->priv.lag; + mutex_lock(&ldev->lock); + lag_active = __mlx5_lag_is_active(ldev); + if (lag_active) { + if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &ldev->mode_flags)) { + mlx5_infer_tx_enabled(&ldev->tracker, ldev->ports, ports, + &num_ports); + hash = true; + } else { + for (i = 0; i < ldev->ports; i++) + ports[i] = ldev->v2p_map[i]; + num_ports = ldev->ports; + } + } + mutex_unlock(&ldev->lock); + if (!lag_active) + return -EINVAL; + + for (i = 0; i < num_ports; i++) { + if (hash) + seq_printf(file, "%d\n", ports[i] + 1); + else + seq_printf(file, "%d:%d\n", i + 1, ports[i]); + } + + return 0; +} + +static int members_show(struct seq_file *file, void *priv) +{ + struct mlx5_core_dev *dev = file->private; + struct mlx5_lag *ldev; + int i; + + ldev = dev->priv.lag; + mutex_lock(&ldev->lock); + for (i = 0; i < ldev->ports; i++) { + if (!ldev->pf[i].dev) + continue; + seq_printf(file, "%s\n", dev_name(ldev->pf[i].dev->device)); + } + mutex_unlock(&ldev->lock); + + return 0; +} + +DEFINE_SHOW_ATTRIBUTE(type); +DEFINE_SHOW_ATTRIBUTE(port_sel_mode); +DEFINE_SHOW_ATTRIBUTE(state); +DEFINE_SHOW_ATTRIBUTE(flags); +DEFINE_SHOW_ATTRIBUTE(mapping); +DEFINE_SHOW_ATTRIBUTE(members); + +void mlx5_ldev_add_debugfs(struct mlx5_core_dev *dev) +{ + struct dentry *dbg; + + dbg = debugfs_create_dir("lag", mlx5_debugfs_get_dev_root(dev)); + dev->priv.dbg.lag_debugfs = dbg; + + debugfs_create_file("type", 0444, dbg, dev, &type_fops); + debugfs_create_file("port_sel_mode", 0444, dbg, dev, &port_sel_mode_fops); + debugfs_create_file("state", 0444, dbg, dev, &state_fops); + debugfs_create_file("flags", 0444, dbg, dev, &flags_fops); + debugfs_create_file("mapping", 0444, dbg, dev, &mapping_fops); + debugfs_create_file("members", 0444, dbg, dev, &members_fops); +} + +void mlx5_ldev_remove_debugfs(struct dentry *dbg) +{ + debugfs_remove_recursive(dbg); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c new file mode 100644 index 000000000..ad32b80e8 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c @@ -0,0 +1,1580 @@ +/* + * Copyright (c) 2016, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/netdevice.h> +#include <net/bonding.h> +#include <linux/mlx5/driver.h> +#include <linux/mlx5/eswitch.h> +#include <linux/mlx5/vport.h> +#include "lib/devcom.h" +#include "mlx5_core.h" +#include "eswitch.h" +#include "esw/acl/ofld.h" +#include "lag.h" +#include "mp.h" +#include "mpesw.h" + +enum { + MLX5_LAG_EGRESS_PORT_1 = 1, + MLX5_LAG_EGRESS_PORT_2, +}; + +/* General purpose, use for short periods of time. + * Beware of lock dependencies (preferably, no locks should be acquired + * under it). + */ +static DEFINE_SPINLOCK(lag_lock); + +static int get_port_sel_mode(enum mlx5_lag_mode mode, unsigned long flags) +{ + if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags)) + return MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_FT; + + if (mode == MLX5_LAG_MODE_MPESW) + return MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_MPESW; + + return MLX5_LAG_PORT_SELECT_MODE_QUEUE_AFFINITY; +} + +static u8 lag_active_port_bits(struct mlx5_lag *ldev) +{ + u8 enabled_ports[MLX5_MAX_PORTS] = {}; + u8 active_port = 0; + int num_enabled; + int idx; + + mlx5_infer_tx_enabled(&ldev->tracker, ldev->ports, enabled_ports, + &num_enabled); + for (idx = 0; idx < num_enabled; idx++) + active_port |= BIT_MASK(enabled_ports[idx]); + + return active_port; +} + +static int mlx5_cmd_create_lag(struct mlx5_core_dev *dev, u8 *ports, int mode, + unsigned long flags) +{ + bool fdb_sel_mode = test_bit(MLX5_LAG_MODE_FLAG_FDB_SEL_MODE_NATIVE, + &flags); + int port_sel_mode = get_port_sel_mode(mode, flags); + u32 in[MLX5_ST_SZ_DW(create_lag_in)] = {}; + void *lag_ctx; + + lag_ctx = MLX5_ADDR_OF(create_lag_in, in, ctx); + MLX5_SET(create_lag_in, in, opcode, MLX5_CMD_OP_CREATE_LAG); + MLX5_SET(lagc, lag_ctx, fdb_selection_mode, fdb_sel_mode); + + switch (port_sel_mode) { + case MLX5_LAG_PORT_SELECT_MODE_QUEUE_AFFINITY: + MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, ports[0]); + MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, ports[1]); + break; + case MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_FT: + if (!MLX5_CAP_PORT_SELECTION(dev, port_select_flow_table_bypass)) + break; + + MLX5_SET(lagc, lag_ctx, active_port, + lag_active_port_bits(mlx5_lag_dev(dev))); + break; + default: + break; + } + MLX5_SET(lagc, lag_ctx, port_select_mode, port_sel_mode); + + return mlx5_cmd_exec_in(dev, create_lag, in); +} + +static int mlx5_cmd_modify_lag(struct mlx5_core_dev *dev, u8 num_ports, + u8 *ports) +{ + u32 in[MLX5_ST_SZ_DW(modify_lag_in)] = {}; + void *lag_ctx = MLX5_ADDR_OF(modify_lag_in, in, ctx); + + MLX5_SET(modify_lag_in, in, opcode, MLX5_CMD_OP_MODIFY_LAG); + MLX5_SET(modify_lag_in, in, field_select, 0x1); + + MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, ports[0]); + MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, ports[1]); + + return mlx5_cmd_exec_in(dev, modify_lag, in); +} + +int mlx5_cmd_create_vport_lag(struct mlx5_core_dev *dev) +{ + u32 in[MLX5_ST_SZ_DW(create_vport_lag_in)] = {}; + + MLX5_SET(create_vport_lag_in, in, opcode, MLX5_CMD_OP_CREATE_VPORT_LAG); + + return mlx5_cmd_exec_in(dev, create_vport_lag, in); +} +EXPORT_SYMBOL(mlx5_cmd_create_vport_lag); + +int mlx5_cmd_destroy_vport_lag(struct mlx5_core_dev *dev) +{ + u32 in[MLX5_ST_SZ_DW(destroy_vport_lag_in)] = {}; + + MLX5_SET(destroy_vport_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_VPORT_LAG); + + return mlx5_cmd_exec_in(dev, destroy_vport_lag, in); +} +EXPORT_SYMBOL(mlx5_cmd_destroy_vport_lag); + +static void mlx5_infer_tx_disabled(struct lag_tracker *tracker, u8 num_ports, + u8 *ports, int *num_disabled) +{ + int i; + + *num_disabled = 0; + for (i = 0; i < num_ports; i++) { + if (!tracker->netdev_state[i].tx_enabled || + !tracker->netdev_state[i].link_up) + ports[(*num_disabled)++] = i; + } +} + +void mlx5_infer_tx_enabled(struct lag_tracker *tracker, u8 num_ports, + u8 *ports, int *num_enabled) +{ + int i; + + *num_enabled = 0; + for (i = 0; i < num_ports; i++) { + if (tracker->netdev_state[i].tx_enabled && + tracker->netdev_state[i].link_up) + ports[(*num_enabled)++] = i; + } + + if (*num_enabled == 0) + mlx5_infer_tx_disabled(tracker, num_ports, ports, num_enabled); +} + +static void mlx5_lag_print_mapping(struct mlx5_core_dev *dev, + struct mlx5_lag *ldev, + struct lag_tracker *tracker, + unsigned long flags) +{ + char buf[MLX5_MAX_PORTS * 10 + 1] = {}; + u8 enabled_ports[MLX5_MAX_PORTS] = {}; + int written = 0; + int num_enabled; + int idx; + int err; + int i; + int j; + + if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags)) { + mlx5_infer_tx_enabled(tracker, ldev->ports, enabled_ports, + &num_enabled); + for (i = 0; i < num_enabled; i++) { + err = scnprintf(buf + written, 4, "%d, ", enabled_ports[i] + 1); + if (err != 3) + return; + written += err; + } + buf[written - 2] = 0; + mlx5_core_info(dev, "lag map active ports: %s\n", buf); + } else { + for (i = 0; i < ldev->ports; i++) { + for (j = 0; j < ldev->buckets; j++) { + idx = i * ldev->buckets + j; + err = scnprintf(buf + written, 10, + " port %d:%d", i + 1, ldev->v2p_map[idx]); + if (err != 9) + return; + written += err; + } + } + mlx5_core_info(dev, "lag map:%s\n", buf); + } +} + +static int mlx5_lag_netdev_event(struct notifier_block *this, + unsigned long event, void *ptr); +static void mlx5_do_bond_work(struct work_struct *work); + +static void mlx5_ldev_free(struct kref *ref) +{ + struct mlx5_lag *ldev = container_of(ref, struct mlx5_lag, ref); + + if (ldev->nb.notifier_call) + unregister_netdevice_notifier_net(&init_net, &ldev->nb); + mlx5_lag_mp_cleanup(ldev); + cancel_delayed_work_sync(&ldev->bond_work); + destroy_workqueue(ldev->wq); + mlx5_lag_mpesw_cleanup(ldev); + mutex_destroy(&ldev->lock); + kfree(ldev); +} + +static void mlx5_ldev_put(struct mlx5_lag *ldev) +{ + kref_put(&ldev->ref, mlx5_ldev_free); +} + +static void mlx5_ldev_get(struct mlx5_lag *ldev) +{ + kref_get(&ldev->ref); +} + +static struct mlx5_lag *mlx5_lag_dev_alloc(struct mlx5_core_dev *dev) +{ + struct mlx5_lag *ldev; + int err; + + ldev = kzalloc(sizeof(*ldev), GFP_KERNEL); + if (!ldev) + return NULL; + + ldev->wq = create_singlethread_workqueue("mlx5_lag"); + if (!ldev->wq) { + kfree(ldev); + return NULL; + } + + kref_init(&ldev->ref); + mutex_init(&ldev->lock); + INIT_DELAYED_WORK(&ldev->bond_work, mlx5_do_bond_work); + + ldev->nb.notifier_call = mlx5_lag_netdev_event; + if (register_netdevice_notifier_net(&init_net, &ldev->nb)) { + ldev->nb.notifier_call = NULL; + mlx5_core_err(dev, "Failed to register LAG netdev notifier\n"); + } + ldev->mode = MLX5_LAG_MODE_NONE; + + err = mlx5_lag_mp_init(ldev); + if (err) + mlx5_core_err(dev, "Failed to init multipath lag err=%d\n", + err); + + mlx5_lag_mpesw_init(ldev); + ldev->ports = MLX5_CAP_GEN(dev, num_lag_ports); + ldev->buckets = 1; + + return ldev; +} + +int mlx5_lag_dev_get_netdev_idx(struct mlx5_lag *ldev, + struct net_device *ndev) +{ + int i; + + for (i = 0; i < ldev->ports; i++) + if (ldev->pf[i].netdev == ndev) + return i; + + return -ENOENT; +} + +static bool __mlx5_lag_is_roce(struct mlx5_lag *ldev) +{ + return ldev->mode == MLX5_LAG_MODE_ROCE; +} + +static bool __mlx5_lag_is_sriov(struct mlx5_lag *ldev) +{ + return ldev->mode == MLX5_LAG_MODE_SRIOV; +} + +/* Create a mapping between steering slots and active ports. + * As we have ldev->buckets slots per port first assume the native + * mapping should be used. + * If there are ports that are disabled fill the relevant slots + * with mapping that points to active ports. + */ +static void mlx5_infer_tx_affinity_mapping(struct lag_tracker *tracker, + u8 num_ports, + u8 buckets, + u8 *ports) +{ + int disabled[MLX5_MAX_PORTS] = {}; + int enabled[MLX5_MAX_PORTS] = {}; + int disabled_ports_num = 0; + int enabled_ports_num = 0; + int idx; + u32 rand; + int i; + int j; + + for (i = 0; i < num_ports; i++) { + if (tracker->netdev_state[i].tx_enabled && + tracker->netdev_state[i].link_up) + enabled[enabled_ports_num++] = i; + else + disabled[disabled_ports_num++] = i; + } + + /* Use native mapping by default where each port's buckets + * point the native port: 1 1 1 .. 1 2 2 2 ... 2 3 3 3 ... 3 etc + */ + for (i = 0; i < num_ports; i++) + for (j = 0; j < buckets; j++) { + idx = i * buckets + j; + ports[idx] = MLX5_LAG_EGRESS_PORT_1 + i; + } + + /* If all ports are disabled/enabled keep native mapping */ + if (enabled_ports_num == num_ports || + disabled_ports_num == num_ports) + return; + + /* Go over the disabled ports and for each assign a random active port */ + for (i = 0; i < disabled_ports_num; i++) { + for (j = 0; j < buckets; j++) { + get_random_bytes(&rand, 4); + ports[disabled[i] * buckets + j] = enabled[rand % enabled_ports_num] + 1; + } + } +} + +static bool mlx5_lag_has_drop_rule(struct mlx5_lag *ldev) +{ + int i; + + for (i = 0; i < ldev->ports; i++) + if (ldev->pf[i].has_drop) + return true; + return false; +} + +static void mlx5_lag_drop_rule_cleanup(struct mlx5_lag *ldev) +{ + int i; + + for (i = 0; i < ldev->ports; i++) { + if (!ldev->pf[i].has_drop) + continue; + + mlx5_esw_acl_ingress_vport_drop_rule_destroy(ldev->pf[i].dev->priv.eswitch, + MLX5_VPORT_UPLINK); + ldev->pf[i].has_drop = false; + } +} + +static void mlx5_lag_drop_rule_setup(struct mlx5_lag *ldev, + struct lag_tracker *tracker) +{ + u8 disabled_ports[MLX5_MAX_PORTS] = {}; + struct mlx5_core_dev *dev; + int disabled_index; + int num_disabled; + int err; + int i; + + /* First delete the current drop rule so there won't be any dropped + * packets + */ + mlx5_lag_drop_rule_cleanup(ldev); + + if (!ldev->tracker.has_inactive) + return; + + mlx5_infer_tx_disabled(tracker, ldev->ports, disabled_ports, &num_disabled); + + for (i = 0; i < num_disabled; i++) { + disabled_index = disabled_ports[i]; + dev = ldev->pf[disabled_index].dev; + err = mlx5_esw_acl_ingress_vport_drop_rule_create(dev->priv.eswitch, + MLX5_VPORT_UPLINK); + if (!err) + ldev->pf[disabled_index].has_drop = true; + else + mlx5_core_err(dev, + "Failed to create lag drop rule, error: %d", err); + } +} + +static int mlx5_cmd_modify_active_port(struct mlx5_core_dev *dev, u8 ports) +{ + u32 in[MLX5_ST_SZ_DW(modify_lag_in)] = {}; + void *lag_ctx; + + lag_ctx = MLX5_ADDR_OF(modify_lag_in, in, ctx); + + MLX5_SET(modify_lag_in, in, opcode, MLX5_CMD_OP_MODIFY_LAG); + MLX5_SET(modify_lag_in, in, field_select, 0x2); + + MLX5_SET(lagc, lag_ctx, active_port, ports); + + return mlx5_cmd_exec_in(dev, modify_lag, in); +} + +static int _mlx5_modify_lag(struct mlx5_lag *ldev, u8 *ports) +{ + struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; + u8 active_ports; + int ret; + + if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &ldev->mode_flags)) { + ret = mlx5_lag_port_sel_modify(ldev, ports); + if (ret || + !MLX5_CAP_PORT_SELECTION(dev0, port_select_flow_table_bypass)) + return ret; + + active_ports = lag_active_port_bits(ldev); + + return mlx5_cmd_modify_active_port(dev0, active_ports); + } + return mlx5_cmd_modify_lag(dev0, ldev->ports, ports); +} + +void mlx5_modify_lag(struct mlx5_lag *ldev, + struct lag_tracker *tracker) +{ + u8 ports[MLX5_MAX_PORTS * MLX5_LAG_MAX_HASH_BUCKETS] = {}; + struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; + int idx; + int err; + int i; + int j; + + mlx5_infer_tx_affinity_mapping(tracker, ldev->ports, ldev->buckets, ports); + + for (i = 0; i < ldev->ports; i++) { + for (j = 0; j < ldev->buckets; j++) { + idx = i * ldev->buckets + j; + if (ports[idx] == ldev->v2p_map[idx]) + continue; + err = _mlx5_modify_lag(ldev, ports); + if (err) { + mlx5_core_err(dev0, + "Failed to modify LAG (%d)\n", + err); + return; + } + memcpy(ldev->v2p_map, ports, sizeof(ports)); + + mlx5_lag_print_mapping(dev0, ldev, tracker, + ldev->mode_flags); + break; + } + } + + if (tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP && + !(ldev->mode == MLX5_LAG_MODE_ROCE)) + mlx5_lag_drop_rule_setup(ldev, tracker); +} + +static int mlx5_lag_set_port_sel_mode_roce(struct mlx5_lag *ldev, + unsigned long *flags) +{ + struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; + + if (!MLX5_CAP_PORT_SELECTION(dev0, port_select_flow_table)) { + if (ldev->ports > 2) + return -EINVAL; + return 0; + } + + if (ldev->ports > 2) + ldev->buckets = MLX5_LAG_MAX_HASH_BUCKETS; + + set_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, flags); + + return 0; +} + +static void mlx5_lag_set_port_sel_mode_offloads(struct mlx5_lag *ldev, + struct lag_tracker *tracker, + enum mlx5_lag_mode mode, + unsigned long *flags) +{ + struct lag_func *dev0 = &ldev->pf[MLX5_LAG_P1]; + + if (mode == MLX5_LAG_MODE_MPESW) + return; + + if (MLX5_CAP_PORT_SELECTION(dev0->dev, port_select_flow_table) && + tracker->tx_type == NETDEV_LAG_TX_TYPE_HASH) + set_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, flags); +} + +static int mlx5_lag_set_flags(struct mlx5_lag *ldev, enum mlx5_lag_mode mode, + struct lag_tracker *tracker, bool shared_fdb, + unsigned long *flags) +{ + bool roce_lag = mode == MLX5_LAG_MODE_ROCE; + + *flags = 0; + if (shared_fdb) { + set_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, flags); + set_bit(MLX5_LAG_MODE_FLAG_FDB_SEL_MODE_NATIVE, flags); + } + + if (mode == MLX5_LAG_MODE_MPESW) + set_bit(MLX5_LAG_MODE_FLAG_FDB_SEL_MODE_NATIVE, flags); + + if (roce_lag) + return mlx5_lag_set_port_sel_mode_roce(ldev, flags); + + mlx5_lag_set_port_sel_mode_offloads(ldev, tracker, mode, flags); + return 0; +} + +char *mlx5_get_str_port_sel_mode(enum mlx5_lag_mode mode, unsigned long flags) +{ + int port_sel_mode = get_port_sel_mode(mode, flags); + + switch (port_sel_mode) { + case MLX5_LAG_PORT_SELECT_MODE_QUEUE_AFFINITY: return "queue_affinity"; + case MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_FT: return "hash"; + case MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_MPESW: return "mpesw"; + default: return "invalid"; + } +} + +static int mlx5_create_lag(struct mlx5_lag *ldev, + struct lag_tracker *tracker, + enum mlx5_lag_mode mode, + unsigned long flags) +{ + bool shared_fdb = test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &flags); + struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; + struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev; + u32 in[MLX5_ST_SZ_DW(destroy_lag_in)] = {}; + int err; + + if (tracker) + mlx5_lag_print_mapping(dev0, ldev, tracker, flags); + mlx5_core_info(dev0, "shared_fdb:%d mode:%s\n", + shared_fdb, mlx5_get_str_port_sel_mode(mode, flags)); + + err = mlx5_cmd_create_lag(dev0, ldev->v2p_map, mode, flags); + if (err) { + mlx5_core_err(dev0, + "Failed to create LAG (%d)\n", + err); + return err; + } + + if (shared_fdb) { + err = mlx5_eswitch_offloads_config_single_fdb(dev0->priv.eswitch, + dev1->priv.eswitch); + if (err) + mlx5_core_err(dev0, "Can't enable single FDB mode\n"); + else + mlx5_core_info(dev0, "Operation mode is single FDB\n"); + } + + if (err) { + MLX5_SET(destroy_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_LAG); + if (mlx5_cmd_exec_in(dev0, destroy_lag, in)) + mlx5_core_err(dev0, + "Failed to deactivate RoCE LAG; driver restart required\n"); + } + + return err; +} + +int mlx5_activate_lag(struct mlx5_lag *ldev, + struct lag_tracker *tracker, + enum mlx5_lag_mode mode, + bool shared_fdb) +{ + bool roce_lag = mode == MLX5_LAG_MODE_ROCE; + struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; + unsigned long flags = 0; + int err; + + err = mlx5_lag_set_flags(ldev, mode, tracker, shared_fdb, &flags); + if (err) + return err; + + if (mode != MLX5_LAG_MODE_MPESW) { + mlx5_infer_tx_affinity_mapping(tracker, ldev->ports, ldev->buckets, ldev->v2p_map); + if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags)) { + err = mlx5_lag_port_sel_create(ldev, tracker->hash_type, + ldev->v2p_map); + if (err) { + mlx5_core_err(dev0, + "Failed to create LAG port selection(%d)\n", + err); + return err; + } + } + } + + err = mlx5_create_lag(ldev, tracker, mode, flags); + if (err) { + if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags)) + mlx5_lag_port_sel_destroy(ldev); + if (roce_lag) + mlx5_core_err(dev0, + "Failed to activate RoCE LAG\n"); + else + mlx5_core_err(dev0, + "Failed to activate VF LAG\n" + "Make sure all VFs are unbound prior to VF LAG activation or deactivation\n"); + return err; + } + + if (tracker && tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP && + !roce_lag) + mlx5_lag_drop_rule_setup(ldev, tracker); + + ldev->mode = mode; + ldev->mode_flags = flags; + return 0; +} + +static int mlx5_deactivate_lag(struct mlx5_lag *ldev) +{ + struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; + struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev; + u32 in[MLX5_ST_SZ_DW(destroy_lag_in)] = {}; + bool roce_lag = __mlx5_lag_is_roce(ldev); + unsigned long flags = ldev->mode_flags; + int err; + + ldev->mode = MLX5_LAG_MODE_NONE; + ldev->mode_flags = 0; + mlx5_lag_mp_reset(ldev); + + if (test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &flags)) { + mlx5_eswitch_offloads_destroy_single_fdb(dev0->priv.eswitch, + dev1->priv.eswitch); + clear_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &flags); + } + + MLX5_SET(destroy_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_LAG); + err = mlx5_cmd_exec_in(dev0, destroy_lag, in); + if (err) { + if (roce_lag) { + mlx5_core_err(dev0, + "Failed to deactivate RoCE LAG; driver restart required\n"); + } else { + mlx5_core_err(dev0, + "Failed to deactivate VF LAG; driver restart required\n" + "Make sure all VFs are unbound prior to VF LAG activation or deactivation\n"); + } + return err; + } + + if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags)) + mlx5_lag_port_sel_destroy(ldev); + if (mlx5_lag_has_drop_rule(ldev)) + mlx5_lag_drop_rule_cleanup(ldev); + + return 0; +} + +#define MLX5_LAG_OFFLOADS_SUPPORTED_PORTS 2 +static bool mlx5_lag_check_prereq(struct mlx5_lag *ldev) +{ +#ifdef CONFIG_MLX5_ESWITCH + struct mlx5_core_dev *dev; + u8 mode; +#endif + int i; + + for (i = 0; i < ldev->ports; i++) + if (!ldev->pf[i].dev) + return false; + +#ifdef CONFIG_MLX5_ESWITCH + for (i = 0; i < ldev->ports; i++) { + dev = ldev->pf[i].dev; + if (mlx5_eswitch_num_vfs(dev->priv.eswitch) && !is_mdev_switchdev_mode(dev)) + return false; + } + + dev = ldev->pf[MLX5_LAG_P1].dev; + mode = mlx5_eswitch_mode(dev); + for (i = 0; i < ldev->ports; i++) + if (mlx5_eswitch_mode(ldev->pf[i].dev) != mode) + return false; + + if (mode == MLX5_ESWITCH_OFFLOADS && ldev->ports != MLX5_LAG_OFFLOADS_SUPPORTED_PORTS) + return false; +#else + for (i = 0; i < ldev->ports; i++) + if (mlx5_sriov_is_enabled(ldev->pf[i].dev)) + return false; +#endif + return true; +} + +static void mlx5_lag_add_devices(struct mlx5_lag *ldev) +{ + int i; + + for (i = 0; i < ldev->ports; i++) { + if (!ldev->pf[i].dev) + continue; + + if (ldev->pf[i].dev->priv.flags & + MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV) + continue; + + ldev->pf[i].dev->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV; + mlx5_rescan_drivers_locked(ldev->pf[i].dev); + } +} + +static void mlx5_lag_remove_devices(struct mlx5_lag *ldev) +{ + int i; + + for (i = 0; i < ldev->ports; i++) { + if (!ldev->pf[i].dev) + continue; + + if (ldev->pf[i].dev->priv.flags & + MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV) + continue; + + ldev->pf[i].dev->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV; + mlx5_rescan_drivers_locked(ldev->pf[i].dev); + } +} + +void mlx5_disable_lag(struct mlx5_lag *ldev) +{ + bool shared_fdb = test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &ldev->mode_flags); + struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; + struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev; + bool roce_lag; + int err; + int i; + + roce_lag = __mlx5_lag_is_roce(ldev); + + if (shared_fdb) { + mlx5_lag_remove_devices(ldev); + } else if (roce_lag) { + if (!(dev0->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)) { + dev0->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV; + mlx5_rescan_drivers_locked(dev0); + } + for (i = 1; i < ldev->ports; i++) + mlx5_nic_vport_disable_roce(ldev->pf[i].dev); + } + + err = mlx5_deactivate_lag(ldev); + if (err) + return; + + if (shared_fdb || roce_lag) + mlx5_lag_add_devices(ldev); + + if (shared_fdb) { + if (!(dev0->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)) + mlx5_eswitch_reload_reps(dev0->priv.eswitch); + if (!(dev1->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)) + mlx5_eswitch_reload_reps(dev1->priv.eswitch); + } +} + +bool mlx5_shared_fdb_supported(struct mlx5_lag *ldev) +{ + struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; + struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev; + + if (is_mdev_switchdev_mode(dev0) && + is_mdev_switchdev_mode(dev1) && + mlx5_eswitch_vport_match_metadata_enabled(dev0->priv.eswitch) && + mlx5_eswitch_vport_match_metadata_enabled(dev1->priv.eswitch) && + mlx5_devcom_is_paired(dev0->priv.devcom, + MLX5_DEVCOM_ESW_OFFLOADS) && + MLX5_CAP_GEN(dev1, lag_native_fdb_selection) && + MLX5_CAP_ESW(dev1, root_ft_on_other_esw) && + MLX5_CAP_ESW(dev0, esw_shared_ingress_acl)) + return true; + + return false; +} + +static bool mlx5_lag_is_roce_lag(struct mlx5_lag *ldev) +{ + bool roce_lag = true; + int i; + + for (i = 0; i < ldev->ports; i++) + roce_lag = roce_lag && !mlx5_sriov_is_enabled(ldev->pf[i].dev); + +#ifdef CONFIG_MLX5_ESWITCH + for (i = 0; i < ldev->ports; i++) + roce_lag = roce_lag && is_mdev_legacy_mode(ldev->pf[i].dev); +#endif + + return roce_lag; +} + +static bool mlx5_lag_should_modify_lag(struct mlx5_lag *ldev, bool do_bond) +{ + return do_bond && __mlx5_lag_is_active(ldev) && + ldev->mode != MLX5_LAG_MODE_MPESW; +} + +static bool mlx5_lag_should_disable_lag(struct mlx5_lag *ldev, bool do_bond) +{ + return !do_bond && __mlx5_lag_is_active(ldev) && + ldev->mode != MLX5_LAG_MODE_MPESW; +} + +static void mlx5_do_bond(struct mlx5_lag *ldev) +{ + struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; + struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev; + struct lag_tracker tracker = { }; + bool do_bond, roce_lag; + int err; + int i; + + if (!mlx5_lag_is_ready(ldev)) { + do_bond = false; + } else { + /* VF LAG is in multipath mode, ignore bond change requests */ + if (mlx5_lag_is_multipath(dev0)) + return; + + tracker = ldev->tracker; + + do_bond = tracker.is_bonded && mlx5_lag_check_prereq(ldev); + } + + if (do_bond && !__mlx5_lag_is_active(ldev)) { + bool shared_fdb = mlx5_shared_fdb_supported(ldev); + + roce_lag = mlx5_lag_is_roce_lag(ldev); + + if (shared_fdb || roce_lag) + mlx5_lag_remove_devices(ldev); + + err = mlx5_activate_lag(ldev, &tracker, + roce_lag ? MLX5_LAG_MODE_ROCE : + MLX5_LAG_MODE_SRIOV, + shared_fdb); + if (err) { + if (shared_fdb || roce_lag) + mlx5_lag_add_devices(ldev); + + return; + } else if (roce_lag) { + dev0->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV; + mlx5_rescan_drivers_locked(dev0); + for (i = 1; i < ldev->ports; i++) + mlx5_nic_vport_enable_roce(ldev->pf[i].dev); + } else if (shared_fdb) { + dev0->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV; + mlx5_rescan_drivers_locked(dev0); + + err = mlx5_eswitch_reload_reps(dev0->priv.eswitch); + if (!err) + err = mlx5_eswitch_reload_reps(dev1->priv.eswitch); + + if (err) { + dev0->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV; + mlx5_rescan_drivers_locked(dev0); + mlx5_deactivate_lag(ldev); + mlx5_lag_add_devices(ldev); + mlx5_eswitch_reload_reps(dev0->priv.eswitch); + mlx5_eswitch_reload_reps(dev1->priv.eswitch); + mlx5_core_err(dev0, "Failed to enable lag\n"); + return; + } + } + } else if (mlx5_lag_should_modify_lag(ldev, do_bond)) { + mlx5_modify_lag(ldev, &tracker); + } else if (mlx5_lag_should_disable_lag(ldev, do_bond)) { + mlx5_disable_lag(ldev); + } +} + +static void mlx5_queue_bond_work(struct mlx5_lag *ldev, unsigned long delay) +{ + queue_delayed_work(ldev->wq, &ldev->bond_work, delay); +} + +static void mlx5_do_bond_work(struct work_struct *work) +{ + struct delayed_work *delayed_work = to_delayed_work(work); + struct mlx5_lag *ldev = container_of(delayed_work, struct mlx5_lag, + bond_work); + int status; + + status = mlx5_dev_list_trylock(); + if (!status) { + mlx5_queue_bond_work(ldev, HZ); + return; + } + + mutex_lock(&ldev->lock); + if (ldev->mode_changes_in_progress) { + mutex_unlock(&ldev->lock); + mlx5_dev_list_unlock(); + mlx5_queue_bond_work(ldev, HZ); + return; + } + + mlx5_do_bond(ldev); + mutex_unlock(&ldev->lock); + mlx5_dev_list_unlock(); +} + +static int mlx5_handle_changeupper_event(struct mlx5_lag *ldev, + struct lag_tracker *tracker, + struct netdev_notifier_changeupper_info *info) +{ + struct net_device *upper = info->upper_dev, *ndev_tmp; + struct netdev_lag_upper_info *lag_upper_info = NULL; + bool is_bonded, is_in_lag, mode_supported; + bool has_inactive = 0; + struct slave *slave; + u8 bond_status = 0; + int num_slaves = 0; + int changed = 0; + int idx; + + if (!netif_is_lag_master(upper)) + return 0; + + if (info->linking) + lag_upper_info = info->upper_info; + + /* The event may still be of interest if the slave does not belong to + * us, but is enslaved to a master which has one or more of our netdevs + * as slaves (e.g., if a new slave is added to a master that bonds two + * of our netdevs, we should unbond). + */ + rcu_read_lock(); + for_each_netdev_in_bond_rcu(upper, ndev_tmp) { + idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev_tmp); + if (idx >= 0) { + slave = bond_slave_get_rcu(ndev_tmp); + if (slave) + has_inactive |= bond_is_slave_inactive(slave); + bond_status |= (1 << idx); + } + + num_slaves++; + } + rcu_read_unlock(); + + /* None of this lagdev's netdevs are slaves of this master. */ + if (!(bond_status & GENMASK(ldev->ports - 1, 0))) + return 0; + + if (lag_upper_info) { + tracker->tx_type = lag_upper_info->tx_type; + tracker->hash_type = lag_upper_info->hash_type; + } + + tracker->has_inactive = has_inactive; + /* Determine bonding status: + * A device is considered bonded if both its physical ports are slaves + * of the same lag master, and only them. + */ + is_in_lag = num_slaves == ldev->ports && + bond_status == GENMASK(ldev->ports - 1, 0); + + /* Lag mode must be activebackup or hash. */ + mode_supported = tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP || + tracker->tx_type == NETDEV_LAG_TX_TYPE_HASH; + + is_bonded = is_in_lag && mode_supported; + if (tracker->is_bonded != is_bonded) { + tracker->is_bonded = is_bonded; + changed = 1; + } + + if (!is_in_lag) + return changed; + + if (!mlx5_lag_is_ready(ldev)) + NL_SET_ERR_MSG_MOD(info->info.extack, + "Can't activate LAG offload, PF is configured with more than 64 VFs"); + else if (!mode_supported) + NL_SET_ERR_MSG_MOD(info->info.extack, + "Can't activate LAG offload, TX type isn't supported"); + + return changed; +} + +static int mlx5_handle_changelowerstate_event(struct mlx5_lag *ldev, + struct lag_tracker *tracker, + struct net_device *ndev, + struct netdev_notifier_changelowerstate_info *info) +{ + struct netdev_lag_lower_state_info *lag_lower_info; + int idx; + + if (!netif_is_lag_port(ndev)) + return 0; + + idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev); + if (idx < 0) + return 0; + + /* This information is used to determine virtual to physical + * port mapping. + */ + lag_lower_info = info->lower_state_info; + if (!lag_lower_info) + return 0; + + tracker->netdev_state[idx] = *lag_lower_info; + + return 1; +} + +static int mlx5_handle_changeinfodata_event(struct mlx5_lag *ldev, + struct lag_tracker *tracker, + struct net_device *ndev) +{ + struct net_device *ndev_tmp; + struct slave *slave; + bool has_inactive = 0; + int idx; + + if (!netif_is_lag_master(ndev)) + return 0; + + rcu_read_lock(); + for_each_netdev_in_bond_rcu(ndev, ndev_tmp) { + idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev_tmp); + if (idx < 0) + continue; + + slave = bond_slave_get_rcu(ndev_tmp); + if (slave) + has_inactive |= bond_is_slave_inactive(slave); + } + rcu_read_unlock(); + + if (tracker->has_inactive == has_inactive) + return 0; + + tracker->has_inactive = has_inactive; + + return 1; +} + +/* this handler is always registered to netdev events */ +static int mlx5_lag_netdev_event(struct notifier_block *this, + unsigned long event, void *ptr) +{ + struct net_device *ndev = netdev_notifier_info_to_dev(ptr); + struct lag_tracker tracker; + struct mlx5_lag *ldev; + int changed = 0; + + if (event != NETDEV_CHANGEUPPER && + event != NETDEV_CHANGELOWERSTATE && + event != NETDEV_CHANGEINFODATA) + return NOTIFY_DONE; + + ldev = container_of(this, struct mlx5_lag, nb); + + tracker = ldev->tracker; + + switch (event) { + case NETDEV_CHANGEUPPER: + changed = mlx5_handle_changeupper_event(ldev, &tracker, ptr); + break; + case NETDEV_CHANGELOWERSTATE: + changed = mlx5_handle_changelowerstate_event(ldev, &tracker, + ndev, ptr); + break; + case NETDEV_CHANGEINFODATA: + changed = mlx5_handle_changeinfodata_event(ldev, &tracker, ndev); + break; + } + + ldev->tracker = tracker; + + if (changed) + mlx5_queue_bond_work(ldev, 0); + + return NOTIFY_DONE; +} + +static void mlx5_ldev_add_netdev(struct mlx5_lag *ldev, + struct mlx5_core_dev *dev, + struct net_device *netdev) +{ + unsigned int fn = mlx5_get_dev_index(dev); + unsigned long flags; + + if (fn >= ldev->ports) + return; + + spin_lock_irqsave(&lag_lock, flags); + ldev->pf[fn].netdev = netdev; + ldev->tracker.netdev_state[fn].link_up = 0; + ldev->tracker.netdev_state[fn].tx_enabled = 0; + spin_unlock_irqrestore(&lag_lock, flags); +} + +static void mlx5_ldev_remove_netdev(struct mlx5_lag *ldev, + struct net_device *netdev) +{ + unsigned long flags; + int i; + + spin_lock_irqsave(&lag_lock, flags); + for (i = 0; i < ldev->ports; i++) { + if (ldev->pf[i].netdev == netdev) { + ldev->pf[i].netdev = NULL; + break; + } + } + spin_unlock_irqrestore(&lag_lock, flags); +} + +static void mlx5_ldev_add_mdev(struct mlx5_lag *ldev, + struct mlx5_core_dev *dev) +{ + unsigned int fn = mlx5_get_dev_index(dev); + + if (fn >= ldev->ports) + return; + + ldev->pf[fn].dev = dev; + dev->priv.lag = ldev; +} + +static void mlx5_ldev_remove_mdev(struct mlx5_lag *ldev, + struct mlx5_core_dev *dev) +{ + int i; + + for (i = 0; i < ldev->ports; i++) + if (ldev->pf[i].dev == dev) + break; + + if (i == ldev->ports) + return; + + ldev->pf[i].dev = NULL; + dev->priv.lag = NULL; +} + +/* Must be called with intf_mutex held */ +static int __mlx5_lag_dev_add_mdev(struct mlx5_core_dev *dev) +{ + struct mlx5_lag *ldev = NULL; + struct mlx5_core_dev *tmp_dev; + + tmp_dev = mlx5_get_next_phys_dev_lag(dev); + if (tmp_dev) + ldev = tmp_dev->priv.lag; + + if (!ldev) { + ldev = mlx5_lag_dev_alloc(dev); + if (!ldev) { + mlx5_core_err(dev, "Failed to alloc lag dev\n"); + return 0; + } + mlx5_ldev_add_mdev(ldev, dev); + return 0; + } + + mutex_lock(&ldev->lock); + if (ldev->mode_changes_in_progress) { + mutex_unlock(&ldev->lock); + return -EAGAIN; + } + mlx5_ldev_get(ldev); + mlx5_ldev_add_mdev(ldev, dev); + mutex_unlock(&ldev->lock); + + return 0; +} + +void mlx5_lag_remove_mdev(struct mlx5_core_dev *dev) +{ + struct mlx5_lag *ldev; + + ldev = mlx5_lag_dev(dev); + if (!ldev) + return; + + /* mdev is being removed, might as well remove debugfs + * as early as possible. + */ + mlx5_ldev_remove_debugfs(dev->priv.dbg.lag_debugfs); +recheck: + mutex_lock(&ldev->lock); + if (ldev->mode_changes_in_progress) { + mutex_unlock(&ldev->lock); + msleep(100); + goto recheck; + } + mlx5_ldev_remove_mdev(ldev, dev); + mutex_unlock(&ldev->lock); + mlx5_ldev_put(ldev); +} + +void mlx5_lag_add_mdev(struct mlx5_core_dev *dev) +{ + int err; + + if (!MLX5_CAP_GEN(dev, vport_group_manager) || + !MLX5_CAP_GEN(dev, lag_master) || + (MLX5_CAP_GEN(dev, num_lag_ports) > MLX5_MAX_PORTS || + MLX5_CAP_GEN(dev, num_lag_ports) <= 1)) + return; + +recheck: + mlx5_dev_list_lock(); + err = __mlx5_lag_dev_add_mdev(dev); + mlx5_dev_list_unlock(); + + if (err) { + msleep(100); + goto recheck; + } + mlx5_ldev_add_debugfs(dev); +} + +void mlx5_lag_remove_netdev(struct mlx5_core_dev *dev, + struct net_device *netdev) +{ + struct mlx5_lag *ldev; + bool lag_is_active; + + ldev = mlx5_lag_dev(dev); + if (!ldev) + return; + + mutex_lock(&ldev->lock); + mlx5_ldev_remove_netdev(ldev, netdev); + clear_bit(MLX5_LAG_FLAG_NDEVS_READY, &ldev->state_flags); + + lag_is_active = __mlx5_lag_is_active(ldev); + mutex_unlock(&ldev->lock); + + if (lag_is_active) + mlx5_queue_bond_work(ldev, 0); +} + +void mlx5_lag_add_netdev(struct mlx5_core_dev *dev, + struct net_device *netdev) +{ + struct mlx5_lag *ldev; + int i; + + ldev = mlx5_lag_dev(dev); + if (!ldev) + return; + + mutex_lock(&ldev->lock); + mlx5_ldev_add_netdev(ldev, dev, netdev); + + for (i = 0; i < ldev->ports; i++) + if (!ldev->pf[i].netdev) + break; + + if (i >= ldev->ports) + set_bit(MLX5_LAG_FLAG_NDEVS_READY, &ldev->state_flags); + mutex_unlock(&ldev->lock); + mlx5_queue_bond_work(ldev, 0); +} + +bool mlx5_lag_is_roce(struct mlx5_core_dev *dev) +{ + struct mlx5_lag *ldev; + unsigned long flags; + bool res; + + spin_lock_irqsave(&lag_lock, flags); + ldev = mlx5_lag_dev(dev); + res = ldev && __mlx5_lag_is_roce(ldev); + spin_unlock_irqrestore(&lag_lock, flags); + + return res; +} +EXPORT_SYMBOL(mlx5_lag_is_roce); + +bool mlx5_lag_is_active(struct mlx5_core_dev *dev) +{ + struct mlx5_lag *ldev; + unsigned long flags; + bool res; + + spin_lock_irqsave(&lag_lock, flags); + ldev = mlx5_lag_dev(dev); + res = ldev && __mlx5_lag_is_active(ldev); + spin_unlock_irqrestore(&lag_lock, flags); + + return res; +} +EXPORT_SYMBOL(mlx5_lag_is_active); + +bool mlx5_lag_mode_is_hash(struct mlx5_core_dev *dev) +{ + struct mlx5_lag *ldev; + unsigned long flags; + bool res = 0; + + spin_lock_irqsave(&lag_lock, flags); + ldev = mlx5_lag_dev(dev); + if (ldev) + res = test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &ldev->mode_flags); + spin_unlock_irqrestore(&lag_lock, flags); + + return res; +} +EXPORT_SYMBOL(mlx5_lag_mode_is_hash); + +bool mlx5_lag_is_master(struct mlx5_core_dev *dev) +{ + struct mlx5_lag *ldev; + unsigned long flags; + bool res; + + spin_lock_irqsave(&lag_lock, flags); + ldev = mlx5_lag_dev(dev); + res = ldev && __mlx5_lag_is_active(ldev) && + dev == ldev->pf[MLX5_LAG_P1].dev; + spin_unlock_irqrestore(&lag_lock, flags); + + return res; +} +EXPORT_SYMBOL(mlx5_lag_is_master); + +bool mlx5_lag_is_sriov(struct mlx5_core_dev *dev) +{ + struct mlx5_lag *ldev; + unsigned long flags; + bool res; + + spin_lock_irqsave(&lag_lock, flags); + ldev = mlx5_lag_dev(dev); + res = ldev && __mlx5_lag_is_sriov(ldev); + spin_unlock_irqrestore(&lag_lock, flags); + + return res; +} +EXPORT_SYMBOL(mlx5_lag_is_sriov); + +bool mlx5_lag_is_shared_fdb(struct mlx5_core_dev *dev) +{ + struct mlx5_lag *ldev; + unsigned long flags; + bool res; + + spin_lock_irqsave(&lag_lock, flags); + ldev = mlx5_lag_dev(dev); + res = ldev && __mlx5_lag_is_sriov(ldev) && + test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &ldev->mode_flags); + spin_unlock_irqrestore(&lag_lock, flags); + + return res; +} +EXPORT_SYMBOL(mlx5_lag_is_shared_fdb); + +void mlx5_lag_disable_change(struct mlx5_core_dev *dev) +{ + struct mlx5_lag *ldev; + + ldev = mlx5_lag_dev(dev); + if (!ldev) + return; + + mlx5_dev_list_lock(); + mutex_lock(&ldev->lock); + + ldev->mode_changes_in_progress++; + if (__mlx5_lag_is_active(ldev)) + mlx5_disable_lag(ldev); + + mutex_unlock(&ldev->lock); + mlx5_dev_list_unlock(); +} + +void mlx5_lag_enable_change(struct mlx5_core_dev *dev) +{ + struct mlx5_lag *ldev; + + ldev = mlx5_lag_dev(dev); + if (!ldev) + return; + + mutex_lock(&ldev->lock); + ldev->mode_changes_in_progress--; + mutex_unlock(&ldev->lock); + mlx5_queue_bond_work(ldev, 0); +} + +struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev) +{ + struct net_device *ndev = NULL; + struct mlx5_lag *ldev; + unsigned long flags; + int i; + + spin_lock_irqsave(&lag_lock, flags); + ldev = mlx5_lag_dev(dev); + + if (!(ldev && __mlx5_lag_is_roce(ldev))) + goto unlock; + + if (ldev->tracker.tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) { + for (i = 0; i < ldev->ports; i++) + if (ldev->tracker.netdev_state[i].tx_enabled) + ndev = ldev->pf[i].netdev; + if (!ndev) + ndev = ldev->pf[ldev->ports - 1].netdev; + } else { + ndev = ldev->pf[MLX5_LAG_P1].netdev; + } + if (ndev) + dev_hold(ndev); + +unlock: + spin_unlock_irqrestore(&lag_lock, flags); + + return ndev; +} +EXPORT_SYMBOL(mlx5_lag_get_roce_netdev); + +u8 mlx5_lag_get_slave_port(struct mlx5_core_dev *dev, + struct net_device *slave) +{ + struct mlx5_lag *ldev; + unsigned long flags; + u8 port = 0; + int i; + + spin_lock_irqsave(&lag_lock, flags); + ldev = mlx5_lag_dev(dev); + if (!(ldev && __mlx5_lag_is_roce(ldev))) + goto unlock; + + for (i = 0; i < ldev->ports; i++) { + if (ldev->pf[MLX5_LAG_P1].netdev == slave) { + port = i; + break; + } + } + + port = ldev->v2p_map[port * ldev->buckets]; + +unlock: + spin_unlock_irqrestore(&lag_lock, flags); + return port; +} +EXPORT_SYMBOL(mlx5_lag_get_slave_port); + +u8 mlx5_lag_get_num_ports(struct mlx5_core_dev *dev) +{ + struct mlx5_lag *ldev; + + ldev = mlx5_lag_dev(dev); + if (!ldev) + return 0; + + return ldev->ports; +} +EXPORT_SYMBOL(mlx5_lag_get_num_ports); + +struct mlx5_core_dev *mlx5_lag_get_peer_mdev(struct mlx5_core_dev *dev) +{ + struct mlx5_core_dev *peer_dev = NULL; + struct mlx5_lag *ldev; + unsigned long flags; + + spin_lock_irqsave(&lag_lock, flags); + ldev = mlx5_lag_dev(dev); + if (!ldev) + goto unlock; + + peer_dev = ldev->pf[MLX5_LAG_P1].dev == dev ? + ldev->pf[MLX5_LAG_P2].dev : + ldev->pf[MLX5_LAG_P1].dev; + +unlock: + spin_unlock_irqrestore(&lag_lock, flags); + return peer_dev; +} +EXPORT_SYMBOL(mlx5_lag_get_peer_mdev); + +int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev, + u64 *values, + int num_counters, + size_t *offsets) +{ + int outlen = MLX5_ST_SZ_BYTES(query_cong_statistics_out); + struct mlx5_core_dev **mdev; + struct mlx5_lag *ldev; + unsigned long flags; + int num_ports; + int ret, i, j; + void *out; + + out = kvzalloc(outlen, GFP_KERNEL); + if (!out) + return -ENOMEM; + + mdev = kvzalloc(sizeof(mdev[0]) * MLX5_MAX_PORTS, GFP_KERNEL); + if (!mdev) { + ret = -ENOMEM; + goto free_out; + } + + memset(values, 0, sizeof(*values) * num_counters); + + spin_lock_irqsave(&lag_lock, flags); + ldev = mlx5_lag_dev(dev); + if (ldev && __mlx5_lag_is_active(ldev)) { + num_ports = ldev->ports; + for (i = 0; i < ldev->ports; i++) + mdev[i] = ldev->pf[i].dev; + } else { + num_ports = 1; + mdev[MLX5_LAG_P1] = dev; + } + spin_unlock_irqrestore(&lag_lock, flags); + + for (i = 0; i < num_ports; ++i) { + u32 in[MLX5_ST_SZ_DW(query_cong_statistics_in)] = {}; + + MLX5_SET(query_cong_statistics_in, in, opcode, + MLX5_CMD_OP_QUERY_CONG_STATISTICS); + ret = mlx5_cmd_exec_inout(mdev[i], query_cong_statistics, in, + out); + if (ret) + goto free_mdev; + + for (j = 0; j < num_counters; ++j) + values[j] += be64_to_cpup((__be64 *)(out + offsets[j])); + } + +free_mdev: + kvfree(mdev); +free_out: + kvfree(out); + return ret; +} +EXPORT_SYMBOL(mlx5_lag_query_cong_counters); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h new file mode 100644 index 000000000..f30ac2de6 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h @@ -0,0 +1,138 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2019 Mellanox Technologies. */ + +#ifndef __MLX5_LAG_H__ +#define __MLX5_LAG_H__ + +#include <linux/debugfs.h> + +#define MLX5_LAG_MAX_HASH_BUCKETS 16 +#include "mlx5_core.h" +#include "mp.h" +#include "port_sel.h" +#include "mpesw.h" + +enum { + MLX5_LAG_P1, + MLX5_LAG_P2, +}; + +enum { + MLX5_LAG_FLAG_NDEVS_READY, +}; + +enum { + MLX5_LAG_MODE_FLAG_HASH_BASED, + MLX5_LAG_MODE_FLAG_SHARED_FDB, + MLX5_LAG_MODE_FLAG_FDB_SEL_MODE_NATIVE, +}; + +enum mlx5_lag_mode { + MLX5_LAG_MODE_NONE, + MLX5_LAG_MODE_ROCE, + MLX5_LAG_MODE_SRIOV, + MLX5_LAG_MODE_MULTIPATH, + MLX5_LAG_MODE_MPESW, +}; + +struct lag_func { + struct mlx5_core_dev *dev; + struct net_device *netdev; + bool has_drop; +}; + +/* Used for collection of netdev event info. */ +struct lag_tracker { + enum netdev_lag_tx_type tx_type; + struct netdev_lag_lower_state_info netdev_state[MLX5_MAX_PORTS]; + unsigned int is_bonded:1; + unsigned int has_inactive:1; + enum netdev_lag_hash hash_type; +}; + +enum mpesw_op { + MLX5_MPESW_OP_ENABLE, + MLX5_MPESW_OP_DISABLE, +}; + +struct mlx5_mpesw_work_st { + struct work_struct work; + struct mlx5_lag *lag; + enum mpesw_op op; + struct completion comp; + int result; +}; + +/* LAG data of a ConnectX card. + * It serves both its phys functions. + */ +struct mlx5_lag { + enum mlx5_lag_mode mode; + unsigned long mode_flags; + unsigned long state_flags; + u8 ports; + u8 buckets; + int mode_changes_in_progress; + u8 v2p_map[MLX5_MAX_PORTS * MLX5_LAG_MAX_HASH_BUCKETS]; + struct kref ref; + struct lag_func pf[MLX5_MAX_PORTS]; + struct lag_tracker tracker; + struct workqueue_struct *wq; + struct delayed_work bond_work; + struct notifier_block nb; + struct lag_mp lag_mp; + struct mlx5_lag_port_sel port_sel; + /* Protect lag fields/state changes */ + struct mutex lock; + struct lag_mpesw lag_mpesw; +}; + +static inline bool mlx5_is_lag_supported(struct mlx5_core_dev *dev) +{ + if (!MLX5_CAP_GEN(dev, vport_group_manager) || + !MLX5_CAP_GEN(dev, lag_master) || + MLX5_CAP_GEN(dev, num_lag_ports) < 2 || + MLX5_CAP_GEN(dev, num_lag_ports) > MLX5_MAX_PORTS) + return false; + return true; +} + +static inline struct mlx5_lag * +mlx5_lag_dev(struct mlx5_core_dev *dev) +{ + return dev->priv.lag; +} + +static inline bool +__mlx5_lag_is_active(struct mlx5_lag *ldev) +{ + return ldev->mode != MLX5_LAG_MODE_NONE; +} + +static inline bool +mlx5_lag_is_ready(struct mlx5_lag *ldev) +{ + return test_bit(MLX5_LAG_FLAG_NDEVS_READY, &ldev->state_flags); +} + +void mlx5_modify_lag(struct mlx5_lag *ldev, + struct lag_tracker *tracker); +int mlx5_activate_lag(struct mlx5_lag *ldev, + struct lag_tracker *tracker, + enum mlx5_lag_mode mode, + bool shared_fdb); +int mlx5_lag_dev_get_netdev_idx(struct mlx5_lag *ldev, + struct net_device *ndev); +bool mlx5_shared_fdb_supported(struct mlx5_lag *ldev); +void mlx5_lag_del_mpesw_rule(struct mlx5_core_dev *dev); +int mlx5_lag_add_mpesw_rule(struct mlx5_core_dev *dev); + +char *mlx5_get_str_port_sel_mode(enum mlx5_lag_mode mode, unsigned long flags); +void mlx5_infer_tx_enabled(struct lag_tracker *tracker, u8 num_ports, + u8 *ports, int *num_enabled); + +void mlx5_ldev_add_debugfs(struct mlx5_core_dev *dev); +void mlx5_ldev_remove_debugfs(struct dentry *dbg); +void mlx5_disable_lag(struct mlx5_lag *ldev); + +#endif /* __MLX5_LAG_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c new file mode 100644 index 000000000..0259a149a --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c @@ -0,0 +1,368 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2019 Mellanox Technologies. */ + +#include <linux/netdevice.h> +#include <net/nexthop.h> +#include "lag/lag.h" +#include "lag/mp.h" +#include "mlx5_core.h" +#include "eswitch.h" +#include "lib/mlx5.h" + +static bool __mlx5_lag_is_multipath(struct mlx5_lag *ldev) +{ + return ldev->mode == MLX5_LAG_MODE_MULTIPATH; +} + +static bool mlx5_lag_multipath_check_prereq(struct mlx5_lag *ldev) +{ + if (!mlx5_lag_is_ready(ldev)) + return false; + + if (__mlx5_lag_is_active(ldev) && !__mlx5_lag_is_multipath(ldev)) + return false; + + return mlx5_esw_multipath_prereq(ldev->pf[MLX5_LAG_P1].dev, + ldev->pf[MLX5_LAG_P2].dev); +} + +bool mlx5_lag_is_multipath(struct mlx5_core_dev *dev) +{ + struct mlx5_lag *ldev; + bool res; + + ldev = mlx5_lag_dev(dev); + res = ldev && __mlx5_lag_is_multipath(ldev); + + return res; +} + +/** + * mlx5_lag_set_port_affinity + * + * @ldev: lag device + * @port: + * 0 - set normal affinity. + * 1 - set affinity to port 1. + * 2 - set affinity to port 2. + * + **/ +static void mlx5_lag_set_port_affinity(struct mlx5_lag *ldev, + enum mlx5_lag_port_affinity port) +{ + struct lag_tracker tracker = {}; + + if (!__mlx5_lag_is_multipath(ldev)) + return; + + switch (port) { + case MLX5_LAG_NORMAL_AFFINITY: + tracker.netdev_state[MLX5_LAG_P1].tx_enabled = true; + tracker.netdev_state[MLX5_LAG_P2].tx_enabled = true; + tracker.netdev_state[MLX5_LAG_P1].link_up = true; + tracker.netdev_state[MLX5_LAG_P2].link_up = true; + break; + case MLX5_LAG_P1_AFFINITY: + tracker.netdev_state[MLX5_LAG_P1].tx_enabled = true; + tracker.netdev_state[MLX5_LAG_P1].link_up = true; + tracker.netdev_state[MLX5_LAG_P2].tx_enabled = false; + tracker.netdev_state[MLX5_LAG_P2].link_up = false; + break; + case MLX5_LAG_P2_AFFINITY: + tracker.netdev_state[MLX5_LAG_P1].tx_enabled = false; + tracker.netdev_state[MLX5_LAG_P1].link_up = false; + tracker.netdev_state[MLX5_LAG_P2].tx_enabled = true; + tracker.netdev_state[MLX5_LAG_P2].link_up = true; + break; + default: + mlx5_core_warn(ldev->pf[MLX5_LAG_P1].dev, + "Invalid affinity port %d", port); + return; + } + + if (tracker.netdev_state[MLX5_LAG_P1].tx_enabled) + mlx5_notifier_call_chain(ldev->pf[MLX5_LAG_P1].dev->priv.events, + MLX5_DEV_EVENT_PORT_AFFINITY, + (void *)0); + + if (tracker.netdev_state[MLX5_LAG_P2].tx_enabled) + mlx5_notifier_call_chain(ldev->pf[MLX5_LAG_P2].dev->priv.events, + MLX5_DEV_EVENT_PORT_AFFINITY, + (void *)0); + + mlx5_modify_lag(ldev, &tracker); +} + +static void mlx5_lag_fib_event_flush(struct notifier_block *nb) +{ + struct lag_mp *mp = container_of(nb, struct lag_mp, fib_nb); + + flush_workqueue(mp->wq); +} + +static void mlx5_lag_fib_set(struct lag_mp *mp, struct fib_info *fi, u32 dst, int dst_len) +{ + mp->fib.mfi = fi; + mp->fib.priority = fi->fib_priority; + mp->fib.dst = dst; + mp->fib.dst_len = dst_len; +} + +struct mlx5_fib_event_work { + struct work_struct work; + struct mlx5_lag *ldev; + unsigned long event; + union { + struct fib_entry_notifier_info fen_info; + struct fib_nh_notifier_info fnh_info; + }; +}; + +static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev, unsigned long event, + struct fib_entry_notifier_info *fen_info) +{ + struct fib_info *fi = fen_info->fi; + struct lag_mp *mp = &ldev->lag_mp; + struct fib_nh *fib_nh0, *fib_nh1; + unsigned int nhs; + + /* Handle delete event */ + if (event == FIB_EVENT_ENTRY_DEL) { + /* stop track */ + if (mp->fib.mfi == fi) + mp->fib.mfi = NULL; + return; + } + + /* Handle multipath entry with lower priority value */ + if (mp->fib.mfi && mp->fib.mfi != fi && + (mp->fib.dst != fen_info->dst || mp->fib.dst_len != fen_info->dst_len) && + fi->fib_priority >= mp->fib.priority) + return; + + /* Handle add/replace event */ + nhs = fib_info_num_path(fi); + if (nhs == 1) { + if (__mlx5_lag_is_active(ldev)) { + struct fib_nh *nh = fib_info_nh(fi, 0); + struct net_device *nh_dev = nh->fib_nh_dev; + int i = mlx5_lag_dev_get_netdev_idx(ldev, nh_dev); + + if (i < 0) + return; + + i++; + mlx5_lag_set_port_affinity(ldev, i); + mlx5_lag_fib_set(mp, fi, fen_info->dst, fen_info->dst_len); + } + + return; + } + + if (nhs != 2) + return; + + /* Verify next hops are ports of the same hca */ + fib_nh0 = fib_info_nh(fi, 0); + fib_nh1 = fib_info_nh(fi, 1); + if (!(fib_nh0->fib_nh_dev == ldev->pf[MLX5_LAG_P1].netdev && + fib_nh1->fib_nh_dev == ldev->pf[MLX5_LAG_P2].netdev) && + !(fib_nh0->fib_nh_dev == ldev->pf[MLX5_LAG_P2].netdev && + fib_nh1->fib_nh_dev == ldev->pf[MLX5_LAG_P1].netdev)) { + mlx5_core_warn(ldev->pf[MLX5_LAG_P1].dev, + "Multipath offload require two ports of the same HCA\n"); + return; + } + + /* First time we see multipath route */ + if (!mp->fib.mfi && !__mlx5_lag_is_active(ldev)) { + struct lag_tracker tracker; + + tracker = ldev->tracker; + mlx5_activate_lag(ldev, &tracker, MLX5_LAG_MODE_MULTIPATH, false); + } + + mlx5_lag_set_port_affinity(ldev, MLX5_LAG_NORMAL_AFFINITY); + mlx5_lag_fib_set(mp, fi, fen_info->dst, fen_info->dst_len); +} + +static void mlx5_lag_fib_nexthop_event(struct mlx5_lag *ldev, + unsigned long event, + struct fib_nh *fib_nh, + struct fib_info *fi) +{ + struct lag_mp *mp = &ldev->lag_mp; + + /* Check the nh event is related to the route */ + if (!mp->fib.mfi || mp->fib.mfi != fi) + return; + + /* nh added/removed */ + if (event == FIB_EVENT_NH_DEL) { + int i = mlx5_lag_dev_get_netdev_idx(ldev, fib_nh->fib_nh_dev); + + if (i >= 0) { + i = (i + 1) % 2 + 1; /* peer port */ + mlx5_lag_set_port_affinity(ldev, i); + } + } else if (event == FIB_EVENT_NH_ADD && + fib_info_num_path(fi) == 2) { + mlx5_lag_set_port_affinity(ldev, MLX5_LAG_NORMAL_AFFINITY); + } +} + +static void mlx5_lag_fib_update(struct work_struct *work) +{ + struct mlx5_fib_event_work *fib_work = + container_of(work, struct mlx5_fib_event_work, work); + struct mlx5_lag *ldev = fib_work->ldev; + struct fib_nh *fib_nh; + + /* Protect internal structures from changes */ + rtnl_lock(); + switch (fib_work->event) { + case FIB_EVENT_ENTRY_REPLACE: + case FIB_EVENT_ENTRY_DEL: + mlx5_lag_fib_route_event(ldev, fib_work->event, + &fib_work->fen_info); + fib_info_put(fib_work->fen_info.fi); + break; + case FIB_EVENT_NH_ADD: + case FIB_EVENT_NH_DEL: + fib_nh = fib_work->fnh_info.fib_nh; + mlx5_lag_fib_nexthop_event(ldev, + fib_work->event, + fib_work->fnh_info.fib_nh, + fib_nh->nh_parent); + fib_info_put(fib_work->fnh_info.fib_nh->nh_parent); + break; + } + + rtnl_unlock(); + kfree(fib_work); +} + +static struct mlx5_fib_event_work * +mlx5_lag_init_fib_work(struct mlx5_lag *ldev, unsigned long event) +{ + struct mlx5_fib_event_work *fib_work; + + fib_work = kzalloc(sizeof(*fib_work), GFP_ATOMIC); + if (WARN_ON(!fib_work)) + return NULL; + + INIT_WORK(&fib_work->work, mlx5_lag_fib_update); + fib_work->ldev = ldev; + fib_work->event = event; + + return fib_work; +} + +static int mlx5_lag_fib_event(struct notifier_block *nb, + unsigned long event, + void *ptr) +{ + struct lag_mp *mp = container_of(nb, struct lag_mp, fib_nb); + struct mlx5_lag *ldev = container_of(mp, struct mlx5_lag, lag_mp); + struct fib_notifier_info *info = ptr; + struct mlx5_fib_event_work *fib_work; + struct fib_entry_notifier_info *fen_info; + struct fib_nh_notifier_info *fnh_info; + struct net_device *fib_dev; + struct fib_info *fi; + + if (info->family != AF_INET) + return NOTIFY_DONE; + + if (!mlx5_lag_multipath_check_prereq(ldev)) + return NOTIFY_DONE; + + switch (event) { + case FIB_EVENT_ENTRY_REPLACE: + case FIB_EVENT_ENTRY_DEL: + fen_info = container_of(info, struct fib_entry_notifier_info, + info); + fi = fen_info->fi; + if (fi->nh) + return NOTIFY_DONE; + fib_dev = fib_info_nh(fen_info->fi, 0)->fib_nh_dev; + if (fib_dev != ldev->pf[MLX5_LAG_P1].netdev && + fib_dev != ldev->pf[MLX5_LAG_P2].netdev) { + return NOTIFY_DONE; + } + fib_work = mlx5_lag_init_fib_work(ldev, event); + if (!fib_work) + return NOTIFY_DONE; + fib_work->fen_info = *fen_info; + /* Take reference on fib_info to prevent it from being + * freed while work is queued. Release it afterwards. + */ + fib_info_hold(fib_work->fen_info.fi); + break; + case FIB_EVENT_NH_ADD: + case FIB_EVENT_NH_DEL: + fnh_info = container_of(info, struct fib_nh_notifier_info, + info); + fib_work = mlx5_lag_init_fib_work(ldev, event); + if (!fib_work) + return NOTIFY_DONE; + fib_work->fnh_info = *fnh_info; + fib_info_hold(fib_work->fnh_info.fib_nh->nh_parent); + break; + default: + return NOTIFY_DONE; + } + + queue_work(mp->wq, &fib_work->work); + + return NOTIFY_DONE; +} + +void mlx5_lag_mp_reset(struct mlx5_lag *ldev) +{ + /* Clear mfi, as it might become stale when a route delete event + * has been missed, see mlx5_lag_fib_route_event(). + */ + ldev->lag_mp.fib.mfi = NULL; +} + +int mlx5_lag_mp_init(struct mlx5_lag *ldev) +{ + struct lag_mp *mp = &ldev->lag_mp; + int err; + + /* always clear mfi, as it might become stale when a route delete event + * has been missed + */ + mp->fib.mfi = NULL; + + if (mp->fib_nb.notifier_call) + return 0; + + mp->wq = create_singlethread_workqueue("mlx5_lag_mp"); + if (!mp->wq) + return -ENOMEM; + + mp->fib_nb.notifier_call = mlx5_lag_fib_event; + err = register_fib_notifier(&init_net, &mp->fib_nb, + mlx5_lag_fib_event_flush, NULL); + if (err) { + destroy_workqueue(mp->wq); + mp->fib_nb.notifier_call = NULL; + } + + return err; +} + +void mlx5_lag_mp_cleanup(struct mlx5_lag *ldev) +{ + struct lag_mp *mp = &ldev->lag_mp; + + if (!mp->fib_nb.notifier_call) + return; + + unregister_fib_notifier(&init_net, &mp->fib_nb); + destroy_workqueue(mp->wq); + mp->fib_nb.notifier_call = NULL; + mp->fib.mfi = NULL; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.h b/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.h new file mode 100644 index 000000000..056a066da --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.h @@ -0,0 +1,42 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2019 Mellanox Technologies. */ + +#ifndef __MLX5_LAG_MP_H__ +#define __MLX5_LAG_MP_H__ + +#include "lag.h" +#include "mlx5_core.h" + +enum mlx5_lag_port_affinity { + MLX5_LAG_NORMAL_AFFINITY, + MLX5_LAG_P1_AFFINITY, + MLX5_LAG_P2_AFFINITY, +}; + +struct lag_mp { + struct notifier_block fib_nb; + struct { + const void *mfi; /* used in tracking fib events */ + u32 priority; + u32 dst; + int dst_len; + } fib; + struct workqueue_struct *wq; +}; + +#ifdef CONFIG_MLX5_ESWITCH + +void mlx5_lag_mp_reset(struct mlx5_lag *ldev); +int mlx5_lag_mp_init(struct mlx5_lag *ldev); +void mlx5_lag_mp_cleanup(struct mlx5_lag *ldev); +bool mlx5_lag_is_multipath(struct mlx5_core_dev *dev); + +#else /* CONFIG_MLX5_ESWITCH */ + +static inline void mlx5_lag_mp_reset(struct mlx5_lag *ldev) {}; +static inline int mlx5_lag_mp_init(struct mlx5_lag *ldev) { return 0; } +static inline void mlx5_lag_mp_cleanup(struct mlx5_lag *ldev) {} +static inline bool mlx5_lag_is_multipath(struct mlx5_core_dev *dev) { return false; } + +#endif /* CONFIG_MLX5_ESWITCH */ +#endif /* __MLX5_LAG_MP_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c new file mode 100644 index 000000000..c17e8f1ec --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c @@ -0,0 +1,128 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#include <linux/netdevice.h> +#include <net/nexthop.h> +#include "lag/lag.h" +#include "eswitch.h" +#include "lib/mlx5.h" + +static int add_mpesw_rule(struct mlx5_lag *ldev) +{ + struct mlx5_core_dev *dev = ldev->pf[MLX5_LAG_P1].dev; + int err; + + if (atomic_add_return(1, &ldev->lag_mpesw.mpesw_rule_count) != 1) + return 0; + + if (ldev->mode != MLX5_LAG_MODE_NONE) { + err = -EINVAL; + goto out_err; + } + + err = mlx5_activate_lag(ldev, NULL, MLX5_LAG_MODE_MPESW, false); + if (err) { + mlx5_core_warn(dev, "Failed to create LAG in MPESW mode (%d)\n", err); + goto out_err; + } + + return 0; + +out_err: + atomic_dec(&ldev->lag_mpesw.mpesw_rule_count); + return err; +} + +static void del_mpesw_rule(struct mlx5_lag *ldev) +{ + if (!atomic_dec_return(&ldev->lag_mpesw.mpesw_rule_count) && + ldev->mode == MLX5_LAG_MODE_MPESW) + mlx5_disable_lag(ldev); +} + +static void mlx5_mpesw_work(struct work_struct *work) +{ + struct mlx5_mpesw_work_st *mpesww = container_of(work, struct mlx5_mpesw_work_st, work); + struct mlx5_lag *ldev = mpesww->lag; + + mutex_lock(&ldev->lock); + if (mpesww->op == MLX5_MPESW_OP_ENABLE) + mpesww->result = add_mpesw_rule(ldev); + else if (mpesww->op == MLX5_MPESW_OP_DISABLE) + del_mpesw_rule(ldev); + mutex_unlock(&ldev->lock); + + complete(&mpesww->comp); +} + +static int mlx5_lag_mpesw_queue_work(struct mlx5_core_dev *dev, + enum mpesw_op op) +{ + struct mlx5_lag *ldev = dev->priv.lag; + struct mlx5_mpesw_work_st *work; + int err = 0; + + if (!ldev) + return 0; + + work = kzalloc(sizeof(*work), GFP_KERNEL); + if (!work) + return -ENOMEM; + + INIT_WORK(&work->work, mlx5_mpesw_work); + init_completion(&work->comp); + work->op = op; + work->lag = ldev; + + if (!queue_work(ldev->wq, &work->work)) { + mlx5_core_warn(dev, "failed to queue mpesw work\n"); + err = -EINVAL; + goto out; + } + wait_for_completion(&work->comp); + err = work->result; +out: + kfree(work); + return err; +} + +void mlx5_lag_del_mpesw_rule(struct mlx5_core_dev *dev) +{ + mlx5_lag_mpesw_queue_work(dev, MLX5_MPESW_OP_DISABLE); +} + +int mlx5_lag_add_mpesw_rule(struct mlx5_core_dev *dev) +{ + return mlx5_lag_mpesw_queue_work(dev, MLX5_MPESW_OP_ENABLE); +} + +int mlx5_lag_do_mirred(struct mlx5_core_dev *mdev, struct net_device *out_dev) +{ + struct mlx5_lag *ldev = mdev->priv.lag; + + if (!netif_is_bond_master(out_dev) || !ldev) + return 0; + + if (ldev->mode == MLX5_LAG_MODE_MPESW) + return -EOPNOTSUPP; + + return 0; +} + +bool mlx5_lag_mpesw_is_activated(struct mlx5_core_dev *dev) +{ + bool ret; + + ret = dev->priv.lag && dev->priv.lag->mode == MLX5_LAG_MODE_MPESW; + return ret; +} + +void mlx5_lag_mpesw_init(struct mlx5_lag *ldev) +{ + atomic_set(&ldev->lag_mpesw.mpesw_rule_count, 0); +} + +void mlx5_lag_mpesw_cleanup(struct mlx5_lag *ldev) +{ + WARN_ON(atomic_read(&ldev->lag_mpesw.mpesw_rule_count)); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.h b/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.h new file mode 100644 index 000000000..88e8daffc --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#ifndef __MLX5_LAG_MPESW_H__ +#define __MLX5_LAG_MPESW_H__ + +#include "lag.h" +#include "mlx5_core.h" + +struct lag_mpesw { + struct work_struct mpesw_work; + atomic_t mpesw_rule_count; +}; + +int mlx5_lag_do_mirred(struct mlx5_core_dev *mdev, struct net_device *out_dev); +bool mlx5_lag_mpesw_is_activated(struct mlx5_core_dev *dev); +#if IS_ENABLED(CONFIG_MLX5_ESWITCH) +void mlx5_lag_mpesw_init(struct mlx5_lag *ldev); +void mlx5_lag_mpesw_cleanup(struct mlx5_lag *ldev); +#else +static inline void mlx5_lag_mpesw_init(struct mlx5_lag *ldev) {} +static inline void mlx5_lag_mpesw_cleanup(struct mlx5_lag *ldev) {} +#endif + +#endif /* __MLX5_LAG_MPESW_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c new file mode 100644 index 000000000..7d9bbb494 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c @@ -0,0 +1,637 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. */ + +#include <linux/netdevice.h> +#include "lag.h" + +enum { + MLX5_LAG_FT_LEVEL_TTC, + MLX5_LAG_FT_LEVEL_INNER_TTC, + MLX5_LAG_FT_LEVEL_DEFINER, +}; + +static struct mlx5_flow_group * +mlx5_create_hash_flow_group(struct mlx5_flow_table *ft, + struct mlx5_flow_definer *definer, + u8 rules) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5_flow_group *fg; + u32 *in; + + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return ERR_PTR(-ENOMEM); + + MLX5_SET(create_flow_group_in, in, match_definer_id, + mlx5_get_match_definer_id(definer)); + MLX5_SET(create_flow_group_in, in, start_flow_index, 0); + MLX5_SET(create_flow_group_in, in, end_flow_index, rules - 1); + MLX5_SET(create_flow_group_in, in, group_type, + MLX5_CREATE_FLOW_GROUP_IN_GROUP_TYPE_HASH_SPLIT); + + fg = mlx5_create_flow_group(ft, in); + kvfree(in); + return fg; +} + +static int mlx5_lag_create_port_sel_table(struct mlx5_lag *ldev, + struct mlx5_lag_definer *lag_definer, + u8 *ports) +{ + struct mlx5_core_dev *dev = ldev->pf[MLX5_LAG_P1].dev; + struct mlx5_flow_table_attr ft_attr = {}; + struct mlx5_flow_destination dest = {}; + MLX5_DECLARE_FLOW_ACT(flow_act); + struct mlx5_flow_namespace *ns; + int err, i; + int idx; + int j; + + ft_attr.max_fte = ldev->ports * ldev->buckets; + ft_attr.level = MLX5_LAG_FT_LEVEL_DEFINER; + + ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_PORT_SEL); + if (!ns) { + mlx5_core_warn(dev, "Failed to get port selection namespace\n"); + return -EOPNOTSUPP; + } + + lag_definer->ft = mlx5_create_flow_table(ns, &ft_attr); + if (IS_ERR(lag_definer->ft)) { + mlx5_core_warn(dev, "Failed to create port selection table\n"); + return PTR_ERR(lag_definer->ft); + } + + lag_definer->fg = mlx5_create_hash_flow_group(lag_definer->ft, + lag_definer->definer, + ft_attr.max_fte); + if (IS_ERR(lag_definer->fg)) { + err = PTR_ERR(lag_definer->fg); + goto destroy_ft; + } + + dest.type = MLX5_FLOW_DESTINATION_TYPE_UPLINK; + dest.vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID; + flow_act.flags |= FLOW_ACT_NO_APPEND; + for (i = 0; i < ldev->ports; i++) { + for (j = 0; j < ldev->buckets; j++) { + u8 affinity; + + idx = i * ldev->buckets + j; + affinity = ports[idx]; + + dest.vport.vhca_id = MLX5_CAP_GEN(ldev->pf[affinity - 1].dev, + vhca_id); + lag_definer->rules[idx] = mlx5_add_flow_rules(lag_definer->ft, + NULL, &flow_act, + &dest, 1); + if (IS_ERR(lag_definer->rules[idx])) { + err = PTR_ERR(lag_definer->rules[idx]); + while (i--) + while (j--) + mlx5_del_flow_rules(lag_definer->rules[idx]); + goto destroy_fg; + } + } + } + + return 0; + +destroy_fg: + mlx5_destroy_flow_group(lag_definer->fg); +destroy_ft: + mlx5_destroy_flow_table(lag_definer->ft); + return err; +} + +static int mlx5_lag_set_definer_inner(u32 *match_definer_mask, + enum mlx5_traffic_types tt) +{ + int format_id; + u8 *ipv6; + + switch (tt) { + case MLX5_TT_IPV4_UDP: + case MLX5_TT_IPV4_TCP: + format_id = 23; + MLX5_SET_TO_ONES(match_definer_format_23, match_definer_mask, + inner_l4_sport); + MLX5_SET_TO_ONES(match_definer_format_23, match_definer_mask, + inner_l4_dport); + MLX5_SET_TO_ONES(match_definer_format_23, match_definer_mask, + inner_ip_src_addr); + MLX5_SET_TO_ONES(match_definer_format_23, match_definer_mask, + inner_ip_dest_addr); + break; + case MLX5_TT_IPV4: + format_id = 23; + MLX5_SET_TO_ONES(match_definer_format_23, match_definer_mask, + inner_l3_type); + MLX5_SET_TO_ONES(match_definer_format_23, match_definer_mask, + inner_dmac_47_16); + MLX5_SET_TO_ONES(match_definer_format_23, match_definer_mask, + inner_dmac_15_0); + MLX5_SET_TO_ONES(match_definer_format_23, match_definer_mask, + inner_smac_47_16); + MLX5_SET_TO_ONES(match_definer_format_23, match_definer_mask, + inner_smac_15_0); + MLX5_SET_TO_ONES(match_definer_format_23, match_definer_mask, + inner_ip_src_addr); + MLX5_SET_TO_ONES(match_definer_format_23, match_definer_mask, + inner_ip_dest_addr); + break; + case MLX5_TT_IPV6_TCP: + case MLX5_TT_IPV6_UDP: + format_id = 31; + MLX5_SET_TO_ONES(match_definer_format_31, match_definer_mask, + inner_l4_sport); + MLX5_SET_TO_ONES(match_definer_format_31, match_definer_mask, + inner_l4_dport); + ipv6 = MLX5_ADDR_OF(match_definer_format_31, match_definer_mask, + inner_ip_dest_addr); + memset(ipv6, 0xff, 16); + ipv6 = MLX5_ADDR_OF(match_definer_format_31, match_definer_mask, + inner_ip_src_addr); + memset(ipv6, 0xff, 16); + break; + case MLX5_TT_IPV6: + format_id = 32; + ipv6 = MLX5_ADDR_OF(match_definer_format_32, match_definer_mask, + inner_ip_dest_addr); + memset(ipv6, 0xff, 16); + ipv6 = MLX5_ADDR_OF(match_definer_format_32, match_definer_mask, + inner_ip_src_addr); + memset(ipv6, 0xff, 16); + MLX5_SET_TO_ONES(match_definer_format_32, match_definer_mask, + inner_dmac_47_16); + MLX5_SET_TO_ONES(match_definer_format_32, match_definer_mask, + inner_dmac_15_0); + MLX5_SET_TO_ONES(match_definer_format_32, match_definer_mask, + inner_smac_47_16); + MLX5_SET_TO_ONES(match_definer_format_32, match_definer_mask, + inner_smac_15_0); + break; + default: + format_id = 23; + MLX5_SET_TO_ONES(match_definer_format_23, match_definer_mask, + inner_l3_type); + MLX5_SET_TO_ONES(match_definer_format_23, match_definer_mask, + inner_dmac_47_16); + MLX5_SET_TO_ONES(match_definer_format_23, match_definer_mask, + inner_dmac_15_0); + MLX5_SET_TO_ONES(match_definer_format_23, match_definer_mask, + inner_smac_47_16); + MLX5_SET_TO_ONES(match_definer_format_23, match_definer_mask, + inner_smac_15_0); + break; + } + + return format_id; +} + +static int mlx5_lag_set_definer(u32 *match_definer_mask, + enum mlx5_traffic_types tt, bool tunnel, + enum netdev_lag_hash hash) +{ + int format_id; + u8 *ipv6; + + if (tunnel) + return mlx5_lag_set_definer_inner(match_definer_mask, tt); + + switch (tt) { + case MLX5_TT_IPV4_UDP: + case MLX5_TT_IPV4_TCP: + format_id = 22; + MLX5_SET_TO_ONES(match_definer_format_22, match_definer_mask, + outer_l4_sport); + MLX5_SET_TO_ONES(match_definer_format_22, match_definer_mask, + outer_l4_dport); + MLX5_SET_TO_ONES(match_definer_format_22, match_definer_mask, + outer_ip_src_addr); + MLX5_SET_TO_ONES(match_definer_format_22, match_definer_mask, + outer_ip_dest_addr); + break; + case MLX5_TT_IPV4: + format_id = 22; + MLX5_SET_TO_ONES(match_definer_format_22, match_definer_mask, + outer_l3_type); + MLX5_SET_TO_ONES(match_definer_format_22, match_definer_mask, + outer_dmac_47_16); + MLX5_SET_TO_ONES(match_definer_format_22, match_definer_mask, + outer_dmac_15_0); + MLX5_SET_TO_ONES(match_definer_format_22, match_definer_mask, + outer_smac_47_16); + MLX5_SET_TO_ONES(match_definer_format_22, match_definer_mask, + outer_smac_15_0); + MLX5_SET_TO_ONES(match_definer_format_22, match_definer_mask, + outer_ip_src_addr); + MLX5_SET_TO_ONES(match_definer_format_22, match_definer_mask, + outer_ip_dest_addr); + break; + case MLX5_TT_IPV6_TCP: + case MLX5_TT_IPV6_UDP: + format_id = 29; + MLX5_SET_TO_ONES(match_definer_format_29, match_definer_mask, + outer_l4_sport); + MLX5_SET_TO_ONES(match_definer_format_29, match_definer_mask, + outer_l4_dport); + ipv6 = MLX5_ADDR_OF(match_definer_format_29, match_definer_mask, + outer_ip_dest_addr); + memset(ipv6, 0xff, 16); + ipv6 = MLX5_ADDR_OF(match_definer_format_29, match_definer_mask, + outer_ip_src_addr); + memset(ipv6, 0xff, 16); + break; + case MLX5_TT_IPV6: + format_id = 30; + ipv6 = MLX5_ADDR_OF(match_definer_format_30, match_definer_mask, + outer_ip_dest_addr); + memset(ipv6, 0xff, 16); + ipv6 = MLX5_ADDR_OF(match_definer_format_30, match_definer_mask, + outer_ip_src_addr); + memset(ipv6, 0xff, 16); + MLX5_SET_TO_ONES(match_definer_format_30, match_definer_mask, + outer_dmac_47_16); + MLX5_SET_TO_ONES(match_definer_format_30, match_definer_mask, + outer_dmac_15_0); + MLX5_SET_TO_ONES(match_definer_format_30, match_definer_mask, + outer_smac_47_16); + MLX5_SET_TO_ONES(match_definer_format_30, match_definer_mask, + outer_smac_15_0); + break; + default: + format_id = 0; + MLX5_SET_TO_ONES(match_definer_format_0, match_definer_mask, + outer_smac_47_16); + MLX5_SET_TO_ONES(match_definer_format_0, match_definer_mask, + outer_smac_15_0); + + if (hash == NETDEV_LAG_HASH_VLAN_SRCMAC) { + MLX5_SET_TO_ONES(match_definer_format_0, + match_definer_mask, + outer_first_vlan_vid); + break; + } + + MLX5_SET_TO_ONES(match_definer_format_0, match_definer_mask, + outer_ethertype); + MLX5_SET_TO_ONES(match_definer_format_0, match_definer_mask, + outer_dmac_47_16); + MLX5_SET_TO_ONES(match_definer_format_0, match_definer_mask, + outer_dmac_15_0); + break; + } + + return format_id; +} + +static struct mlx5_lag_definer * +mlx5_lag_create_definer(struct mlx5_lag *ldev, enum netdev_lag_hash hash, + enum mlx5_traffic_types tt, bool tunnel, u8 *ports) +{ + struct mlx5_core_dev *dev = ldev->pf[MLX5_LAG_P1].dev; + struct mlx5_lag_definer *lag_definer; + u32 *match_definer_mask; + int format_id, err; + + lag_definer = kzalloc(sizeof(*lag_definer), GFP_KERNEL); + if (!lag_definer) + return ERR_PTR(-ENOMEM); + + match_definer_mask = kvzalloc(MLX5_FLD_SZ_BYTES(match_definer, + match_mask), + GFP_KERNEL); + if (!match_definer_mask) { + err = -ENOMEM; + goto free_lag_definer; + } + + format_id = mlx5_lag_set_definer(match_definer_mask, tt, tunnel, hash); + lag_definer->definer = + mlx5_create_match_definer(dev, MLX5_FLOW_NAMESPACE_PORT_SEL, + format_id, match_definer_mask); + if (IS_ERR(lag_definer->definer)) { + err = PTR_ERR(lag_definer->definer); + goto free_mask; + } + + err = mlx5_lag_create_port_sel_table(ldev, lag_definer, ports); + if (err) + goto destroy_match_definer; + + kvfree(match_definer_mask); + + return lag_definer; + +destroy_match_definer: + mlx5_destroy_match_definer(dev, lag_definer->definer); +free_mask: + kvfree(match_definer_mask); +free_lag_definer: + kfree(lag_definer); + return ERR_PTR(err); +} + +static void mlx5_lag_destroy_definer(struct mlx5_lag *ldev, + struct mlx5_lag_definer *lag_definer) +{ + struct mlx5_core_dev *dev = ldev->pf[MLX5_LAG_P1].dev; + int idx; + int i; + int j; + + for (i = 0; i < ldev->ports; i++) { + for (j = 0; j < ldev->buckets; j++) { + idx = i * ldev->buckets + j; + mlx5_del_flow_rules(lag_definer->rules[idx]); + } + } + mlx5_destroy_flow_group(lag_definer->fg); + mlx5_destroy_flow_table(lag_definer->ft); + mlx5_destroy_match_definer(dev, lag_definer->definer); + kfree(lag_definer); +} + +static void mlx5_lag_destroy_definers(struct mlx5_lag *ldev) +{ + struct mlx5_lag_port_sel *port_sel = &ldev->port_sel; + int tt; + + for_each_set_bit(tt, port_sel->tt_map, MLX5_NUM_TT) { + if (port_sel->outer.definers[tt]) + mlx5_lag_destroy_definer(ldev, + port_sel->outer.definers[tt]); + if (port_sel->inner.definers[tt]) + mlx5_lag_destroy_definer(ldev, + port_sel->inner.definers[tt]); + } +} + +static int mlx5_lag_create_definers(struct mlx5_lag *ldev, + enum netdev_lag_hash hash_type, + u8 *ports) +{ + struct mlx5_lag_port_sel *port_sel = &ldev->port_sel; + struct mlx5_lag_definer *lag_definer; + int tt, err; + + for_each_set_bit(tt, port_sel->tt_map, MLX5_NUM_TT) { + lag_definer = mlx5_lag_create_definer(ldev, hash_type, tt, + false, ports); + if (IS_ERR(lag_definer)) { + err = PTR_ERR(lag_definer); + goto destroy_definers; + } + port_sel->outer.definers[tt] = lag_definer; + + if (!port_sel->tunnel) + continue; + + lag_definer = + mlx5_lag_create_definer(ldev, hash_type, tt, + true, ports); + if (IS_ERR(lag_definer)) { + err = PTR_ERR(lag_definer); + goto destroy_definers; + } + port_sel->inner.definers[tt] = lag_definer; + } + + return 0; + +destroy_definers: + mlx5_lag_destroy_definers(ldev); + return err; +} + +static void set_tt_map(struct mlx5_lag_port_sel *port_sel, + enum netdev_lag_hash hash) +{ + port_sel->tunnel = false; + + switch (hash) { + case NETDEV_LAG_HASH_E34: + port_sel->tunnel = true; + fallthrough; + case NETDEV_LAG_HASH_L34: + set_bit(MLX5_TT_IPV4_TCP, port_sel->tt_map); + set_bit(MLX5_TT_IPV4_UDP, port_sel->tt_map); + set_bit(MLX5_TT_IPV6_TCP, port_sel->tt_map); + set_bit(MLX5_TT_IPV6_UDP, port_sel->tt_map); + set_bit(MLX5_TT_IPV4, port_sel->tt_map); + set_bit(MLX5_TT_IPV6, port_sel->tt_map); + set_bit(MLX5_TT_ANY, port_sel->tt_map); + break; + case NETDEV_LAG_HASH_E23: + port_sel->tunnel = true; + fallthrough; + case NETDEV_LAG_HASH_L23: + set_bit(MLX5_TT_IPV4, port_sel->tt_map); + set_bit(MLX5_TT_IPV6, port_sel->tt_map); + set_bit(MLX5_TT_ANY, port_sel->tt_map); + break; + default: + set_bit(MLX5_TT_ANY, port_sel->tt_map); + break; + } +} + +#define SET_IGNORE_DESTS_BITS(tt_map, dests) \ + do { \ + int idx; \ + \ + for_each_clear_bit(idx, tt_map, MLX5_NUM_TT) \ + set_bit(idx, dests); \ + } while (0) + +static void mlx5_lag_set_inner_ttc_params(struct mlx5_lag *ldev, + struct ttc_params *ttc_params) +{ + struct mlx5_core_dev *dev = ldev->pf[MLX5_LAG_P1].dev; + struct mlx5_lag_port_sel *port_sel = &ldev->port_sel; + struct mlx5_flow_table_attr *ft_attr; + int tt; + + ttc_params->ns = mlx5_get_flow_namespace(dev, + MLX5_FLOW_NAMESPACE_PORT_SEL); + ft_attr = &ttc_params->ft_attr; + ft_attr->level = MLX5_LAG_FT_LEVEL_INNER_TTC; + + for_each_set_bit(tt, port_sel->tt_map, MLX5_NUM_TT) { + ttc_params->dests[tt].type = + MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + ttc_params->dests[tt].ft = port_sel->inner.definers[tt]->ft; + } + SET_IGNORE_DESTS_BITS(port_sel->tt_map, ttc_params->ignore_dests); +} + +static void mlx5_lag_set_outer_ttc_params(struct mlx5_lag *ldev, + struct ttc_params *ttc_params) +{ + struct mlx5_core_dev *dev = ldev->pf[MLX5_LAG_P1].dev; + struct mlx5_lag_port_sel *port_sel = &ldev->port_sel; + struct mlx5_flow_table_attr *ft_attr; + int tt; + + ttc_params->ns = mlx5_get_flow_namespace(dev, + MLX5_FLOW_NAMESPACE_PORT_SEL); + ft_attr = &ttc_params->ft_attr; + ft_attr->level = MLX5_LAG_FT_LEVEL_TTC; + + for_each_set_bit(tt, port_sel->tt_map, MLX5_NUM_TT) { + ttc_params->dests[tt].type = + MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + ttc_params->dests[tt].ft = port_sel->outer.definers[tt]->ft; + } + SET_IGNORE_DESTS_BITS(port_sel->tt_map, ttc_params->ignore_dests); + + ttc_params->inner_ttc = port_sel->tunnel; + if (!port_sel->tunnel) + return; + + for (tt = 0; tt < MLX5_NUM_TUNNEL_TT; tt++) { + ttc_params->tunnel_dests[tt].type = + MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + ttc_params->tunnel_dests[tt].ft = + mlx5_get_ttc_flow_table(port_sel->inner.ttc); + } +} + +static int mlx5_lag_create_ttc_table(struct mlx5_lag *ldev) +{ + struct mlx5_core_dev *dev = ldev->pf[MLX5_LAG_P1].dev; + struct mlx5_lag_port_sel *port_sel = &ldev->port_sel; + struct ttc_params ttc_params = {}; + + mlx5_lag_set_outer_ttc_params(ldev, &ttc_params); + port_sel->outer.ttc = mlx5_create_ttc_table(dev, &ttc_params); + if (IS_ERR(port_sel->outer.ttc)) + return PTR_ERR(port_sel->outer.ttc); + + return 0; +} + +static int mlx5_lag_create_inner_ttc_table(struct mlx5_lag *ldev) +{ + struct mlx5_core_dev *dev = ldev->pf[MLX5_LAG_P1].dev; + struct mlx5_lag_port_sel *port_sel = &ldev->port_sel; + struct ttc_params ttc_params = {}; + + mlx5_lag_set_inner_ttc_params(ldev, &ttc_params); + port_sel->inner.ttc = mlx5_create_inner_ttc_table(dev, &ttc_params); + if (IS_ERR(port_sel->inner.ttc)) + return PTR_ERR(port_sel->inner.ttc); + + return 0; +} + +int mlx5_lag_port_sel_create(struct mlx5_lag *ldev, + enum netdev_lag_hash hash_type, u8 *ports) +{ + struct mlx5_lag_port_sel *port_sel = &ldev->port_sel; + int err; + + set_tt_map(port_sel, hash_type); + err = mlx5_lag_create_definers(ldev, hash_type, ports); + if (err) + return err; + + if (port_sel->tunnel) { + err = mlx5_lag_create_inner_ttc_table(ldev); + if (err) + goto destroy_definers; + } + + err = mlx5_lag_create_ttc_table(ldev); + if (err) + goto destroy_inner; + + return 0; + +destroy_inner: + if (port_sel->tunnel) + mlx5_destroy_ttc_table(port_sel->inner.ttc); +destroy_definers: + mlx5_lag_destroy_definers(ldev); + return err; +} + +static int __mlx5_lag_modify_definers_destinations(struct mlx5_lag *ldev, + struct mlx5_lag_definer *def, + u8 *ports) +{ + struct mlx5_flow_destination dest = {}; + int idx; + int err; + int i; + int j; + + dest.type = MLX5_FLOW_DESTINATION_TYPE_UPLINK; + dest.vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID; + + for (i = 0; i < ldev->ports; i++) { + for (j = 0; j < ldev->buckets; j++) { + idx = i * ldev->buckets + j; + if (ldev->v2p_map[idx] == ports[idx]) + continue; + + dest.vport.vhca_id = MLX5_CAP_GEN(ldev->pf[ports[idx] - 1].dev, + vhca_id); + err = mlx5_modify_rule_destination(def->rules[idx], &dest, NULL); + if (err) + return err; + } + } + + return 0; +} + +static int +mlx5_lag_modify_definers_destinations(struct mlx5_lag *ldev, + struct mlx5_lag_definer **definers, + u8 *ports) +{ + struct mlx5_lag_port_sel *port_sel = &ldev->port_sel; + int err; + int tt; + + for_each_set_bit(tt, port_sel->tt_map, MLX5_NUM_TT) { + err = __mlx5_lag_modify_definers_destinations(ldev, definers[tt], ports); + if (err) + return err; + } + + return 0; +} + +int mlx5_lag_port_sel_modify(struct mlx5_lag *ldev, u8 *ports) +{ + struct mlx5_lag_port_sel *port_sel = &ldev->port_sel; + int err; + + err = mlx5_lag_modify_definers_destinations(ldev, + port_sel->outer.definers, + ports); + if (err) + return err; + + if (!port_sel->tunnel) + return 0; + + return mlx5_lag_modify_definers_destinations(ldev, + port_sel->inner.definers, + ports); +} + +void mlx5_lag_port_sel_destroy(struct mlx5_lag *ldev) +{ + struct mlx5_lag_port_sel *port_sel = &ldev->port_sel; + + mlx5_destroy_ttc_table(port_sel->outer.ttc); + if (port_sel->tunnel) + mlx5_destroy_ttc_table(port_sel->inner.ttc); + mlx5_lag_destroy_definers(ldev); + memset(port_sel, 0, sizeof(*port_sel)); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.h b/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.h new file mode 100644 index 000000000..5ec3af2a3 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.h @@ -0,0 +1,53 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. */ + +#ifndef __MLX5_LAG_FS_H__ +#define __MLX5_LAG_FS_H__ + +#include "lib/fs_ttc.h" + +struct mlx5_lag_definer { + struct mlx5_flow_definer *definer; + struct mlx5_flow_table *ft; + struct mlx5_flow_group *fg; + /* Each port has ldev->buckets number of rules and they are arrange in + * [port * buckets .. port * buckets + buckets) locations + */ + struct mlx5_flow_handle *rules[MLX5_MAX_PORTS * MLX5_LAG_MAX_HASH_BUCKETS]; +}; + +struct mlx5_lag_ttc { + struct mlx5_ttc_table *ttc; + struct mlx5_lag_definer *definers[MLX5_NUM_TT]; +}; + +struct mlx5_lag_port_sel { + DECLARE_BITMAP(tt_map, MLX5_NUM_TT); + bool tunnel; + struct mlx5_lag_ttc outer; + struct mlx5_lag_ttc inner; +}; + +#ifdef CONFIG_MLX5_ESWITCH + +int mlx5_lag_port_sel_modify(struct mlx5_lag *ldev, u8 *ports); +void mlx5_lag_port_sel_destroy(struct mlx5_lag *ldev); +int mlx5_lag_port_sel_create(struct mlx5_lag *ldev, + enum netdev_lag_hash hash_type, u8 *ports); + +#else /* CONFIG_MLX5_ESWITCH */ +static inline int mlx5_lag_port_sel_create(struct mlx5_lag *ldev, + enum netdev_lag_hash hash_type, + u8 *ports) +{ + return 0; +} + +static inline int mlx5_lag_port_sel_modify(struct mlx5_lag *ldev, u8 *ports) +{ + return 0; +} + +static inline void mlx5_lag_port_sel_destroy(struct mlx5_lag *ldev) {} +#endif /* CONFIG_MLX5_ESWITCH */ +#endif /* __MLX5_LAG_FS_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/aso.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/aso.c new file mode 100644 index 000000000..c215252f2 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/aso.c @@ -0,0 +1,432 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + +#include <linux/mlx5/device.h> +#include <linux/mlx5/transobj.h> +#include "clock.h" +#include "aso.h" +#include "wq.h" + +struct mlx5_aso_cq { + /* data path - accessed per cqe */ + struct mlx5_cqwq wq; + + /* data path - accessed per napi poll */ + struct mlx5_core_cq mcq; + + /* control */ + struct mlx5_core_dev *mdev; + struct mlx5_wq_ctrl wq_ctrl; +} ____cacheline_aligned_in_smp; + +struct mlx5_aso { + /* data path */ + u16 cc; + u16 pc; + + struct mlx5_wqe_ctrl_seg *doorbell_cseg; + struct mlx5_aso_cq cq; + + /* read only */ + struct mlx5_wq_cyc wq; + void __iomem *uar_map; + u32 sqn; + + /* control path */ + struct mlx5_wq_ctrl wq_ctrl; + +} ____cacheline_aligned_in_smp; + +static void mlx5_aso_free_cq(struct mlx5_aso_cq *cq) +{ + mlx5_wq_destroy(&cq->wq_ctrl); +} + +static int mlx5_aso_alloc_cq(struct mlx5_core_dev *mdev, int numa_node, + void *cqc_data, struct mlx5_aso_cq *cq) +{ + struct mlx5_core_cq *mcq = &cq->mcq; + struct mlx5_wq_param param; + int err; + u32 i; + + param.buf_numa_node = numa_node; + param.db_numa_node = numa_node; + + err = mlx5_cqwq_create(mdev, ¶m, cqc_data, &cq->wq, &cq->wq_ctrl); + if (err) + return err; + + mcq->cqe_sz = 64; + mcq->set_ci_db = cq->wq_ctrl.db.db; + mcq->arm_db = cq->wq_ctrl.db.db + 1; + + for (i = 0; i < mlx5_cqwq_get_size(&cq->wq); i++) { + struct mlx5_cqe64 *cqe = mlx5_cqwq_get_wqe(&cq->wq, i); + + cqe->op_own = 0xf1; + } + + cq->mdev = mdev; + + return 0; +} + +static int create_aso_cq(struct mlx5_aso_cq *cq, void *cqc_data) +{ + u32 out[MLX5_ST_SZ_DW(create_cq_out)]; + struct mlx5_core_dev *mdev = cq->mdev; + struct mlx5_core_cq *mcq = &cq->mcq; + void *in, *cqc; + int inlen, eqn; + int err; + + err = mlx5_vector2eqn(mdev, 0, &eqn); + if (err) + return err; + + inlen = MLX5_ST_SZ_BYTES(create_cq_in) + + sizeof(u64) * cq->wq_ctrl.buf.npages; + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context); + + memcpy(cqc, cqc_data, MLX5_ST_SZ_BYTES(cqc)); + + mlx5_fill_page_frag_array(&cq->wq_ctrl.buf, + (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas)); + + MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_EQE); + MLX5_SET(cqc, cqc, c_eqn_or_apu_element, eqn); + MLX5_SET(cqc, cqc, uar_page, mdev->priv.uar->index); + MLX5_SET(cqc, cqc, log_page_size, cq->wq_ctrl.buf.page_shift - + MLX5_ADAPTER_PAGE_SHIFT); + MLX5_SET64(cqc, cqc, dbr_addr, cq->wq_ctrl.db.dma); + + err = mlx5_core_create_cq(mdev, mcq, in, inlen, out, sizeof(out)); + + kvfree(in); + + return err; +} + +static void mlx5_aso_destroy_cq(struct mlx5_aso_cq *cq) +{ + mlx5_core_destroy_cq(cq->mdev, &cq->mcq); + mlx5_wq_destroy(&cq->wq_ctrl); +} + +static int mlx5_aso_create_cq(struct mlx5_core_dev *mdev, int numa_node, + struct mlx5_aso_cq *cq) +{ + void *cqc_data; + int err; + + cqc_data = kvzalloc(MLX5_ST_SZ_BYTES(cqc), GFP_KERNEL); + if (!cqc_data) + return -ENOMEM; + + MLX5_SET(cqc, cqc_data, log_cq_size, 1); + MLX5_SET(cqc, cqc_data, uar_page, mdev->priv.uar->index); + if (MLX5_CAP_GEN(mdev, cqe_128_always) && cache_line_size() >= 128) + MLX5_SET(cqc, cqc_data, cqe_sz, CQE_STRIDE_128_PAD); + + err = mlx5_aso_alloc_cq(mdev, numa_node, cqc_data, cq); + if (err) { + mlx5_core_err(mdev, "Failed to alloc aso wq cq, err=%d\n", err); + goto err_out; + } + + err = create_aso_cq(cq, cqc_data); + if (err) { + mlx5_core_err(mdev, "Failed to create aso wq cq, err=%d\n", err); + goto err_free_cq; + } + + kvfree(cqc_data); + return 0; + +err_free_cq: + mlx5_aso_free_cq(cq); +err_out: + kvfree(cqc_data); + return err; +} + +static int mlx5_aso_alloc_sq(struct mlx5_core_dev *mdev, int numa_node, + void *sqc_data, struct mlx5_aso *sq) +{ + void *sqc_wq = MLX5_ADDR_OF(sqc, sqc_data, wq); + struct mlx5_wq_cyc *wq = &sq->wq; + struct mlx5_wq_param param; + int err; + + sq->uar_map = mdev->mlx5e_res.hw_objs.bfreg.map; + + param.db_numa_node = numa_node; + param.buf_numa_node = numa_node; + err = mlx5_wq_cyc_create(mdev, ¶m, sqc_wq, wq, &sq->wq_ctrl); + if (err) + return err; + wq->db = &wq->db[MLX5_SND_DBR]; + + return 0; +} + +static int create_aso_sq(struct mlx5_core_dev *mdev, int pdn, + void *sqc_data, struct mlx5_aso *sq) +{ + void *in, *sqc, *wq; + int inlen, err; + u8 ts_format; + + inlen = MLX5_ST_SZ_BYTES(create_sq_in) + + sizeof(u64) * sq->wq_ctrl.buf.npages; + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + sqc = MLX5_ADDR_OF(create_sq_in, in, ctx); + wq = MLX5_ADDR_OF(sqc, sqc, wq); + + memcpy(sqc, sqc_data, MLX5_ST_SZ_BYTES(sqc)); + MLX5_SET(sqc, sqc, cqn, sq->cq.mcq.cqn); + + MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RST); + MLX5_SET(sqc, sqc, flush_in_error_en, 1); + + ts_format = mlx5_is_real_time_sq(mdev) ? + MLX5_TIMESTAMP_FORMAT_REAL_TIME : + MLX5_TIMESTAMP_FORMAT_FREE_RUNNING; + MLX5_SET(sqc, sqc, ts_format, ts_format); + + MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC); + MLX5_SET(wq, wq, uar_page, mdev->mlx5e_res.hw_objs.bfreg.index); + MLX5_SET(wq, wq, log_wq_pg_sz, sq->wq_ctrl.buf.page_shift - + MLX5_ADAPTER_PAGE_SHIFT); + MLX5_SET64(wq, wq, dbr_addr, sq->wq_ctrl.db.dma); + + mlx5_fill_page_frag_array(&sq->wq_ctrl.buf, + (__be64 *)MLX5_ADDR_OF(wq, wq, pas)); + + err = mlx5_core_create_sq(mdev, in, inlen, &sq->sqn); + + kvfree(in); + + return err; +} + +static int mlx5_aso_set_sq_rdy(struct mlx5_core_dev *mdev, u32 sqn) +{ + void *in, *sqc; + int inlen, err; + + inlen = MLX5_ST_SZ_BYTES(modify_sq_in); + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + MLX5_SET(modify_sq_in, in, sq_state, MLX5_SQC_STATE_RST); + sqc = MLX5_ADDR_OF(modify_sq_in, in, ctx); + MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RDY); + + err = mlx5_core_modify_sq(mdev, sqn, in); + + kvfree(in); + + return err; +} + +static int mlx5_aso_create_sq_rdy(struct mlx5_core_dev *mdev, u32 pdn, + void *sqc_data, struct mlx5_aso *sq) +{ + int err; + + err = create_aso_sq(mdev, pdn, sqc_data, sq); + if (err) + return err; + + err = mlx5_aso_set_sq_rdy(mdev, sq->sqn); + if (err) + mlx5_core_destroy_sq(mdev, sq->sqn); + + return err; +} + +static void mlx5_aso_free_sq(struct mlx5_aso *sq) +{ + mlx5_wq_destroy(&sq->wq_ctrl); +} + +static void mlx5_aso_destroy_sq(struct mlx5_aso *sq) +{ + mlx5_core_destroy_sq(sq->cq.mdev, sq->sqn); + mlx5_aso_free_sq(sq); +} + +static int mlx5_aso_create_sq(struct mlx5_core_dev *mdev, int numa_node, + u32 pdn, struct mlx5_aso *sq) +{ + void *sqc_data, *wq; + int err; + + sqc_data = kvzalloc(MLX5_ST_SZ_BYTES(sqc), GFP_KERNEL); + if (!sqc_data) + return -ENOMEM; + + wq = MLX5_ADDR_OF(sqc, sqc_data, wq); + MLX5_SET(wq, wq, log_wq_stride, ilog2(MLX5_SEND_WQE_BB)); + MLX5_SET(wq, wq, pd, pdn); + MLX5_SET(wq, wq, log_wq_sz, 1); + + err = mlx5_aso_alloc_sq(mdev, numa_node, sqc_data, sq); + if (err) { + mlx5_core_err(mdev, "Failed to alloc aso wq sq, err=%d\n", err); + goto err_out; + } + + err = mlx5_aso_create_sq_rdy(mdev, pdn, sqc_data, sq); + if (err) { + mlx5_core_err(mdev, "Failed to open aso wq sq, err=%d\n", err); + goto err_free_asosq; + } + + mlx5_core_dbg(mdev, "aso sq->sqn = 0x%x\n", sq->sqn); + + kvfree(sqc_data); + return 0; + +err_free_asosq: + mlx5_aso_free_sq(sq); +err_out: + kvfree(sqc_data); + return err; +} + +struct mlx5_aso *mlx5_aso_create(struct mlx5_core_dev *mdev, u32 pdn) +{ + int numa_node = dev_to_node(mlx5_core_dma_dev(mdev)); + struct mlx5_aso *aso; + int err; + + aso = kzalloc(sizeof(*aso), GFP_KERNEL); + if (!aso) + return ERR_PTR(-ENOMEM); + + err = mlx5_aso_create_cq(mdev, numa_node, &aso->cq); + if (err) + goto err_cq; + + err = mlx5_aso_create_sq(mdev, numa_node, pdn, aso); + if (err) + goto err_sq; + + return aso; + +err_sq: + mlx5_aso_destroy_cq(&aso->cq); +err_cq: + kfree(aso); + return ERR_PTR(err); +} + +void mlx5_aso_destroy(struct mlx5_aso *aso) +{ + if (IS_ERR_OR_NULL(aso)) + return; + + mlx5_aso_destroy_sq(aso); + mlx5_aso_destroy_cq(&aso->cq); + kfree(aso); +} + +void mlx5_aso_build_wqe(struct mlx5_aso *aso, u8 ds_cnt, + struct mlx5_aso_wqe *aso_wqe, + u32 obj_id, u32 opc_mode) +{ + struct mlx5_wqe_ctrl_seg *cseg = &aso_wqe->ctrl; + + cseg->opmod_idx_opcode = cpu_to_be32((opc_mode << MLX5_WQE_CTRL_WQE_OPC_MOD_SHIFT) | + (aso->pc << MLX5_WQE_CTRL_WQE_INDEX_SHIFT) | + MLX5_OPCODE_ACCESS_ASO); + cseg->qpn_ds = cpu_to_be32((aso->sqn << MLX5_WQE_CTRL_QPN_SHIFT) | ds_cnt); + cseg->fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE; + cseg->general_id = cpu_to_be32(obj_id); +} + +void *mlx5_aso_get_wqe(struct mlx5_aso *aso) +{ + u16 pi; + + pi = mlx5_wq_cyc_ctr2ix(&aso->wq, aso->pc); + return mlx5_wq_cyc_get_wqe(&aso->wq, pi); +} + +void mlx5_aso_post_wqe(struct mlx5_aso *aso, bool with_data, + struct mlx5_wqe_ctrl_seg *doorbell_cseg) +{ + doorbell_cseg->fm_ce_se |= MLX5_WQE_CTRL_CQ_UPDATE; + /* ensure wqe is visible to device before updating doorbell record */ + dma_wmb(); + + if (with_data) + aso->pc += MLX5_ASO_WQEBBS_DATA; + else + aso->pc += MLX5_ASO_WQEBBS; + *aso->wq.db = cpu_to_be32(aso->pc); + + /* ensure doorbell record is visible to device before ringing the + * doorbell + */ + wmb(); + + mlx5_write64((__be32 *)doorbell_cseg, aso->uar_map); + + /* Ensure doorbell is written on uar_page before poll_cq */ + WRITE_ONCE(doorbell_cseg, NULL); +} + +int mlx5_aso_poll_cq(struct mlx5_aso *aso, bool with_data) +{ + struct mlx5_aso_cq *cq = &aso->cq; + struct mlx5_cqe64 *cqe; + + cqe = mlx5_cqwq_get_cqe(&cq->wq); + if (!cqe) + return -ETIMEDOUT; + + /* sq->cc must be updated only after mlx5_cqwq_update_db_record(), + * otherwise a cq overrun may occur + */ + mlx5_cqwq_pop(&cq->wq); + + if (unlikely(get_cqe_opcode(cqe) != MLX5_CQE_REQ)) { + struct mlx5_err_cqe *err_cqe; + + mlx5_core_err(cq->mdev, "Bad OP in ASOSQ CQE: 0x%x\n", + get_cqe_opcode(cqe)); + + err_cqe = (struct mlx5_err_cqe *)cqe; + mlx5_core_err(cq->mdev, "vendor_err_synd=%x\n", + err_cqe->vendor_err_synd); + mlx5_core_err(cq->mdev, "syndrome=%x\n", + err_cqe->syndrome); + print_hex_dump(KERN_WARNING, "", DUMP_PREFIX_OFFSET, + 16, 1, err_cqe, + sizeof(*err_cqe), false); + } + + mlx5_cqwq_update_db_record(&cq->wq); + + /* ensure cq space is freed before enabling more cqes */ + wmb(); + + if (with_data) + aso->cc += MLX5_ASO_WQEBBS_DATA; + else + aso->cc += MLX5_ASO_WQEBBS; + + return 0; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/aso.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/aso.h new file mode 100644 index 000000000..2d40dcf9d --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/aso.h @@ -0,0 +1,90 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#ifndef __MLX5_LIB_ASO_H__ +#define __MLX5_LIB_ASO_H__ + +#include <linux/mlx5/qp.h> +#include "mlx5_core.h" + +#define MLX5_ASO_WQEBBS \ + (DIV_ROUND_UP(sizeof(struct mlx5_aso_wqe), MLX5_SEND_WQE_BB)) +#define MLX5_ASO_WQEBBS_DATA \ + (DIV_ROUND_UP(sizeof(struct mlx5_aso_wqe_data), MLX5_SEND_WQE_BB)) +#define ASO_CTRL_READ_EN BIT(0) +#define MLX5_WQE_CTRL_WQE_OPC_MOD_SHIFT 24 +#define MLX5_MACSEC_ASO_DS_CNT (DIV_ROUND_UP(sizeof(struct mlx5_aso_wqe), MLX5_SEND_WQE_DS)) + +struct mlx5_wqe_aso_ctrl_seg { + __be32 va_h; + __be32 va_l; /* include read_enable */ + __be32 l_key; + u8 data_mask_mode; + u8 condition_1_0_operand; + u8 condition_1_0_offset; + u8 data_offset_condition_operand; + __be32 condition_0_data; + __be32 condition_0_mask; + __be32 condition_1_data; + __be32 condition_1_mask; + __be64 bitwise_data; + __be64 data_mask; +}; + +struct mlx5_wqe_aso_data_seg { + __be32 bytewise_data[16]; +}; + +struct mlx5_aso_wqe { + struct mlx5_wqe_ctrl_seg ctrl; + struct mlx5_wqe_aso_ctrl_seg aso_ctrl; +}; + +struct mlx5_aso_wqe_data { + struct mlx5_wqe_ctrl_seg ctrl; + struct mlx5_wqe_aso_ctrl_seg aso_ctrl; + struct mlx5_wqe_aso_data_seg aso_data; +}; + +enum { + MLX5_ASO_LOGICAL_AND, + MLX5_ASO_LOGICAL_OR, +}; + +enum { + MLX5_ASO_ALWAYS_FALSE, + MLX5_ASO_ALWAYS_TRUE, + MLX5_ASO_EQUAL, + MLX5_ASO_NOT_EQUAL, + MLX5_ASO_GREATER_OR_EQUAL, + MLX5_ASO_LESSER_OR_EQUAL, + MLX5_ASO_LESSER, + MLX5_ASO_GREATER, + MLX5_ASO_CYCLIC_GREATER, + MLX5_ASO_CYCLIC_LESSER, +}; + +enum { + MLX5_ASO_DATA_MASK_MODE_BITWISE_64BIT, + MLX5_ASO_DATA_MASK_MODE_BYTEWISE_64BYTE, + MLX5_ASO_DATA_MASK_MODE_CALCULATED_64BYTE, +}; + +enum { + MLX5_ACCESS_ASO_OPC_MOD_FLOW_METER = 0x2, + MLX5_ACCESS_ASO_OPC_MOD_MACSEC = 0x5, +}; + +struct mlx5_aso; + +void *mlx5_aso_get_wqe(struct mlx5_aso *aso); +void mlx5_aso_build_wqe(struct mlx5_aso *aso, u8 ds_cnt, + struct mlx5_aso_wqe *aso_wqe, + u32 obj_id, u32 opc_mode); +void mlx5_aso_post_wqe(struct mlx5_aso *aso, bool with_data, + struct mlx5_wqe_ctrl_seg *doorbell_cseg); +int mlx5_aso_poll_cq(struct mlx5_aso *aso, bool with_data); + +struct mlx5_aso *mlx5_aso_create(struct mlx5_core_dev *mdev, u32 pdn); +void mlx5_aso_destroy(struct mlx5_aso *aso); +#endif /* __MLX5_LIB_ASO_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c new file mode 100644 index 000000000..2ac255bb9 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c @@ -0,0 +1,1026 @@ +/* + * Copyright (c) 2015, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/clocksource.h> +#include <linux/highmem.h> +#include <linux/ptp_clock_kernel.h> +#include <rdma/mlx5-abi.h> +#include "lib/eq.h" +#include "en.h" +#include "clock.h" + +enum { + MLX5_CYCLES_SHIFT = 23 +}; + +enum { + MLX5_PIN_MODE_IN = 0x0, + MLX5_PIN_MODE_OUT = 0x1, +}; + +enum { + MLX5_OUT_PATTERN_PULSE = 0x0, + MLX5_OUT_PATTERN_PERIODIC = 0x1, +}; + +enum { + MLX5_EVENT_MODE_DISABLE = 0x0, + MLX5_EVENT_MODE_REPETETIVE = 0x1, + MLX5_EVENT_MODE_ONCE_TILL_ARM = 0x2, +}; + +enum { + MLX5_MTPPS_FS_ENABLE = BIT(0x0), + MLX5_MTPPS_FS_PATTERN = BIT(0x2), + MLX5_MTPPS_FS_PIN_MODE = BIT(0x3), + MLX5_MTPPS_FS_TIME_STAMP = BIT(0x4), + MLX5_MTPPS_FS_OUT_PULSE_DURATION = BIT(0x5), + MLX5_MTPPS_FS_ENH_OUT_PER_ADJ = BIT(0x7), + MLX5_MTPPS_FS_NPPS_PERIOD = BIT(0x9), + MLX5_MTPPS_FS_OUT_PULSE_DURATION_NS = BIT(0xa), +}; + +static bool mlx5_real_time_mode(struct mlx5_core_dev *mdev) +{ + return (mlx5_is_real_time_rq(mdev) || mlx5_is_real_time_sq(mdev)); +} + +static bool mlx5_npps_real_time_supported(struct mlx5_core_dev *mdev) +{ + return (mlx5_real_time_mode(mdev) && + MLX5_CAP_MCAM_FEATURE(mdev, npps_period) && + MLX5_CAP_MCAM_FEATURE(mdev, out_pulse_duration_ns)); +} + +static bool mlx5_modify_mtutc_allowed(struct mlx5_core_dev *mdev) +{ + return MLX5_CAP_MCAM_FEATURE(mdev, ptpcyc2realtime_modify); +} + +static int mlx5_set_mtutc(struct mlx5_core_dev *dev, u32 *mtutc, u32 size) +{ + u32 out[MLX5_ST_SZ_DW(mtutc_reg)] = {}; + + if (!MLX5_CAP_MCAM_REG(dev, mtutc)) + return -EOPNOTSUPP; + + return mlx5_core_access_reg(dev, mtutc, size, out, sizeof(out), + MLX5_REG_MTUTC, 0, 1); +} + +static u64 mlx5_read_time(struct mlx5_core_dev *dev, + struct ptp_system_timestamp *sts, + bool real_time) +{ + u32 timer_h, timer_h1, timer_l; + + timer_h = ioread32be(real_time ? &dev->iseg->real_time_h : + &dev->iseg->internal_timer_h); + ptp_read_system_prets(sts); + timer_l = ioread32be(real_time ? &dev->iseg->real_time_l : + &dev->iseg->internal_timer_l); + ptp_read_system_postts(sts); + timer_h1 = ioread32be(real_time ? &dev->iseg->real_time_h : + &dev->iseg->internal_timer_h); + if (timer_h != timer_h1) { + /* wrap around */ + ptp_read_system_prets(sts); + timer_l = ioread32be(real_time ? &dev->iseg->real_time_l : + &dev->iseg->internal_timer_l); + ptp_read_system_postts(sts); + } + + return real_time ? REAL_TIME_TO_NS(timer_h1, timer_l) : + (u64)timer_l | (u64)timer_h1 << 32; +} + +static u64 read_internal_timer(const struct cyclecounter *cc) +{ + struct mlx5_timer *timer = container_of(cc, struct mlx5_timer, cycles); + struct mlx5_clock *clock = container_of(timer, struct mlx5_clock, timer); + struct mlx5_core_dev *mdev = container_of(clock, struct mlx5_core_dev, + clock); + + return mlx5_read_time(mdev, NULL, false) & cc->mask; +} + +static void mlx5_update_clock_info_page(struct mlx5_core_dev *mdev) +{ + struct mlx5_ib_clock_info *clock_info = mdev->clock_info; + struct mlx5_clock *clock = &mdev->clock; + struct mlx5_timer *timer; + u32 sign; + + if (!clock_info) + return; + + sign = smp_load_acquire(&clock_info->sign); + smp_store_mb(clock_info->sign, + sign | MLX5_IB_CLOCK_INFO_KERNEL_UPDATING); + + timer = &clock->timer; + clock_info->cycles = timer->tc.cycle_last; + clock_info->mult = timer->cycles.mult; + clock_info->nsec = timer->tc.nsec; + clock_info->frac = timer->tc.frac; + + smp_store_release(&clock_info->sign, + sign + MLX5_IB_CLOCK_INFO_KERNEL_UPDATING * 2); +} + +static void mlx5_pps_out(struct work_struct *work) +{ + struct mlx5_pps *pps_info = container_of(work, struct mlx5_pps, + out_work); + struct mlx5_clock *clock = container_of(pps_info, struct mlx5_clock, + pps_info); + struct mlx5_core_dev *mdev = container_of(clock, struct mlx5_core_dev, + clock); + u32 in[MLX5_ST_SZ_DW(mtpps_reg)] = {0}; + unsigned long flags; + int i; + + for (i = 0; i < clock->ptp_info.n_pins; i++) { + u64 tstart; + + write_seqlock_irqsave(&clock->lock, flags); + tstart = clock->pps_info.start[i]; + clock->pps_info.start[i] = 0; + write_sequnlock_irqrestore(&clock->lock, flags); + if (!tstart) + continue; + + MLX5_SET(mtpps_reg, in, pin, i); + MLX5_SET64(mtpps_reg, in, time_stamp, tstart); + MLX5_SET(mtpps_reg, in, field_select, MLX5_MTPPS_FS_TIME_STAMP); + mlx5_set_mtpps(mdev, in, sizeof(in)); + } +} + +static void mlx5_timestamp_overflow(struct work_struct *work) +{ + struct delayed_work *dwork = to_delayed_work(work); + struct mlx5_core_dev *mdev; + struct mlx5_timer *timer; + struct mlx5_clock *clock; + unsigned long flags; + + timer = container_of(dwork, struct mlx5_timer, overflow_work); + clock = container_of(timer, struct mlx5_clock, timer); + mdev = container_of(clock, struct mlx5_core_dev, clock); + + if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) + goto out; + + write_seqlock_irqsave(&clock->lock, flags); + timecounter_read(&timer->tc); + mlx5_update_clock_info_page(mdev); + write_sequnlock_irqrestore(&clock->lock, flags); + +out: + schedule_delayed_work(&timer->overflow_work, timer->overflow_period); +} + +static int mlx5_ptp_settime_real_time(struct mlx5_core_dev *mdev, + const struct timespec64 *ts) +{ + u32 in[MLX5_ST_SZ_DW(mtutc_reg)] = {}; + + if (!mlx5_modify_mtutc_allowed(mdev)) + return 0; + + if (ts->tv_sec < 0 || ts->tv_sec > U32_MAX || + ts->tv_nsec < 0 || ts->tv_nsec > NSEC_PER_SEC) + return -EINVAL; + + MLX5_SET(mtutc_reg, in, operation, MLX5_MTUTC_OPERATION_SET_TIME_IMMEDIATE); + MLX5_SET(mtutc_reg, in, utc_sec, ts->tv_sec); + MLX5_SET(mtutc_reg, in, utc_nsec, ts->tv_nsec); + + return mlx5_set_mtutc(mdev, in, sizeof(in)); +} + +static int mlx5_ptp_settime(struct ptp_clock_info *ptp, const struct timespec64 *ts) +{ + struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock, ptp_info); + struct mlx5_timer *timer = &clock->timer; + struct mlx5_core_dev *mdev; + unsigned long flags; + int err; + + mdev = container_of(clock, struct mlx5_core_dev, clock); + err = mlx5_ptp_settime_real_time(mdev, ts); + if (err) + return err; + + write_seqlock_irqsave(&clock->lock, flags); + timecounter_init(&timer->tc, &timer->cycles, timespec64_to_ns(ts)); + mlx5_update_clock_info_page(mdev); + write_sequnlock_irqrestore(&clock->lock, flags); + + return 0; +} + +static +struct timespec64 mlx5_ptp_gettimex_real_time(struct mlx5_core_dev *mdev, + struct ptp_system_timestamp *sts) +{ + struct timespec64 ts; + u64 time; + + time = mlx5_read_time(mdev, sts, true); + ts = ns_to_timespec64(time); + return ts; +} + +static int mlx5_ptp_gettimex(struct ptp_clock_info *ptp, struct timespec64 *ts, + struct ptp_system_timestamp *sts) +{ + struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock, ptp_info); + struct mlx5_timer *timer = &clock->timer; + struct mlx5_core_dev *mdev; + unsigned long flags; + u64 cycles, ns; + + mdev = container_of(clock, struct mlx5_core_dev, clock); + if (mlx5_real_time_mode(mdev)) { + *ts = mlx5_ptp_gettimex_real_time(mdev, sts); + goto out; + } + + write_seqlock_irqsave(&clock->lock, flags); + cycles = mlx5_read_time(mdev, sts, false); + ns = timecounter_cyc2time(&timer->tc, cycles); + write_sequnlock_irqrestore(&clock->lock, flags); + *ts = ns_to_timespec64(ns); +out: + return 0; +} + +static int mlx5_ptp_adjtime_real_time(struct mlx5_core_dev *mdev, s64 delta) +{ + u32 in[MLX5_ST_SZ_DW(mtutc_reg)] = {}; + + if (!mlx5_modify_mtutc_allowed(mdev)) + return 0; + + /* HW time adjustment range is s16. If out of range, settime instead */ + if (delta < S16_MIN || delta > S16_MAX) { + struct timespec64 ts; + s64 ns; + + ts = mlx5_ptp_gettimex_real_time(mdev, NULL); + ns = timespec64_to_ns(&ts) + delta; + ts = ns_to_timespec64(ns); + return mlx5_ptp_settime_real_time(mdev, &ts); + } + + MLX5_SET(mtutc_reg, in, operation, MLX5_MTUTC_OPERATION_ADJUST_TIME); + MLX5_SET(mtutc_reg, in, time_adjustment, delta); + + return mlx5_set_mtutc(mdev, in, sizeof(in)); +} + +static int mlx5_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta) +{ + struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock, ptp_info); + struct mlx5_timer *timer = &clock->timer; + struct mlx5_core_dev *mdev; + unsigned long flags; + int err; + + mdev = container_of(clock, struct mlx5_core_dev, clock); + + err = mlx5_ptp_adjtime_real_time(mdev, delta); + if (err) + return err; + write_seqlock_irqsave(&clock->lock, flags); + timecounter_adjtime(&timer->tc, delta); + mlx5_update_clock_info_page(mdev); + write_sequnlock_irqrestore(&clock->lock, flags); + + return 0; +} + +static int mlx5_ptp_adjfreq_real_time(struct mlx5_core_dev *mdev, s32 freq) +{ + u32 in[MLX5_ST_SZ_DW(mtutc_reg)] = {}; + + if (!mlx5_modify_mtutc_allowed(mdev)) + return 0; + + MLX5_SET(mtutc_reg, in, operation, MLX5_MTUTC_OPERATION_ADJUST_FREQ_UTC); + MLX5_SET(mtutc_reg, in, freq_adjustment, freq); + + return mlx5_set_mtutc(mdev, in, sizeof(in)); +} + +static int mlx5_ptp_adjfreq(struct ptp_clock_info *ptp, s32 delta) +{ + struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock, ptp_info); + struct mlx5_timer *timer = &clock->timer; + struct mlx5_core_dev *mdev; + unsigned long flags; + int neg_adj = 0; + u32 diff; + u64 adj; + int err; + + mdev = container_of(clock, struct mlx5_core_dev, clock); + err = mlx5_ptp_adjfreq_real_time(mdev, delta); + if (err) + return err; + + if (delta < 0) { + neg_adj = 1; + delta = -delta; + } + + adj = timer->nominal_c_mult; + adj *= delta; + diff = div_u64(adj, 1000000000ULL); + + write_seqlock_irqsave(&clock->lock, flags); + timecounter_read(&timer->tc); + timer->cycles.mult = neg_adj ? timer->nominal_c_mult - diff : + timer->nominal_c_mult + diff; + mlx5_update_clock_info_page(mdev); + write_sequnlock_irqrestore(&clock->lock, flags); + + return 0; +} + +static int mlx5_extts_configure(struct ptp_clock_info *ptp, + struct ptp_clock_request *rq, + int on) +{ + struct mlx5_clock *clock = + container_of(ptp, struct mlx5_clock, ptp_info); + struct mlx5_core_dev *mdev = + container_of(clock, struct mlx5_core_dev, clock); + u32 in[MLX5_ST_SZ_DW(mtpps_reg)] = {0}; + u32 field_select = 0; + u8 pin_mode = 0; + u8 pattern = 0; + int pin = -1; + int err = 0; + + if (!MLX5_PPS_CAP(mdev)) + return -EOPNOTSUPP; + + /* Reject requests with unsupported flags */ + if (rq->extts.flags & ~(PTP_ENABLE_FEATURE | + PTP_RISING_EDGE | + PTP_FALLING_EDGE | + PTP_STRICT_FLAGS)) + return -EOPNOTSUPP; + + /* Reject requests to enable time stamping on both edges. */ + if ((rq->extts.flags & PTP_STRICT_FLAGS) && + (rq->extts.flags & PTP_ENABLE_FEATURE) && + (rq->extts.flags & PTP_EXTTS_EDGES) == PTP_EXTTS_EDGES) + return -EOPNOTSUPP; + + if (rq->extts.index >= clock->ptp_info.n_pins) + return -EINVAL; + + pin = ptp_find_pin(clock->ptp, PTP_PF_EXTTS, rq->extts.index); + if (pin < 0) + return -EBUSY; + + if (on) { + pin_mode = MLX5_PIN_MODE_IN; + pattern = !!(rq->extts.flags & PTP_FALLING_EDGE); + field_select = MLX5_MTPPS_FS_PIN_MODE | + MLX5_MTPPS_FS_PATTERN | + MLX5_MTPPS_FS_ENABLE; + } else { + field_select = MLX5_MTPPS_FS_ENABLE; + } + + MLX5_SET(mtpps_reg, in, pin, pin); + MLX5_SET(mtpps_reg, in, pin_mode, pin_mode); + MLX5_SET(mtpps_reg, in, pattern, pattern); + MLX5_SET(mtpps_reg, in, enable, on); + MLX5_SET(mtpps_reg, in, field_select, field_select); + + err = mlx5_set_mtpps(mdev, in, sizeof(in)); + if (err) + return err; + + return mlx5_set_mtppse(mdev, pin, 0, + MLX5_EVENT_MODE_REPETETIVE & on); +} + +static u64 find_target_cycles(struct mlx5_core_dev *mdev, s64 target_ns) +{ + struct mlx5_clock *clock = &mdev->clock; + u64 cycles_now, cycles_delta; + u64 nsec_now, nsec_delta; + struct mlx5_timer *timer; + unsigned long flags; + + timer = &clock->timer; + + cycles_now = mlx5_read_time(mdev, NULL, false); + write_seqlock_irqsave(&clock->lock, flags); + nsec_now = timecounter_cyc2time(&timer->tc, cycles_now); + nsec_delta = target_ns - nsec_now; + cycles_delta = div64_u64(nsec_delta << timer->cycles.shift, + timer->cycles.mult); + write_sequnlock_irqrestore(&clock->lock, flags); + + return cycles_now + cycles_delta; +} + +static u64 perout_conf_internal_timer(struct mlx5_core_dev *mdev, s64 sec) +{ + struct timespec64 ts = {}; + s64 target_ns; + + ts.tv_sec = sec; + target_ns = timespec64_to_ns(&ts); + + return find_target_cycles(mdev, target_ns); +} + +static u64 perout_conf_real_time(s64 sec, u32 nsec) +{ + return (u64)nsec | (u64)sec << 32; +} + +static int perout_conf_1pps(struct mlx5_core_dev *mdev, struct ptp_clock_request *rq, + u64 *time_stamp, bool real_time) +{ + struct timespec64 ts; + s64 ns; + + ts.tv_nsec = rq->perout.period.nsec; + ts.tv_sec = rq->perout.period.sec; + ns = timespec64_to_ns(&ts); + + if ((ns >> 1) != 500000000LL) + return -EINVAL; + + *time_stamp = real_time ? perout_conf_real_time(rq->perout.start.sec, 0) : + perout_conf_internal_timer(mdev, rq->perout.start.sec); + + return 0; +} + +#define MLX5_MAX_PULSE_DURATION (BIT(__mlx5_bit_sz(mtpps_reg, out_pulse_duration_ns)) - 1) +static int mlx5_perout_conf_out_pulse_duration(struct mlx5_core_dev *mdev, + struct ptp_clock_request *rq, + u32 *out_pulse_duration_ns) +{ + struct mlx5_pps *pps_info = &mdev->clock.pps_info; + u32 out_pulse_duration; + struct timespec64 ts; + + if (rq->perout.flags & PTP_PEROUT_DUTY_CYCLE) { + ts.tv_sec = rq->perout.on.sec; + ts.tv_nsec = rq->perout.on.nsec; + out_pulse_duration = (u32)timespec64_to_ns(&ts); + } else { + /* out_pulse_duration_ns should be up to 50% of the + * pulse period as default + */ + ts.tv_sec = rq->perout.period.sec; + ts.tv_nsec = rq->perout.period.nsec; + out_pulse_duration = (u32)timespec64_to_ns(&ts) >> 1; + } + + if (out_pulse_duration < pps_info->min_out_pulse_duration_ns || + out_pulse_duration > MLX5_MAX_PULSE_DURATION) { + mlx5_core_err(mdev, "NPPS pulse duration %u is not in [%llu, %lu]\n", + out_pulse_duration, pps_info->min_out_pulse_duration_ns, + MLX5_MAX_PULSE_DURATION); + return -EINVAL; + } + *out_pulse_duration_ns = out_pulse_duration; + + return 0; +} + +static int perout_conf_npps_real_time(struct mlx5_core_dev *mdev, struct ptp_clock_request *rq, + u32 *field_select, u32 *out_pulse_duration_ns, + u64 *period, u64 *time_stamp) +{ + struct mlx5_pps *pps_info = &mdev->clock.pps_info; + struct ptp_clock_time *time = &rq->perout.start; + struct timespec64 ts; + + ts.tv_sec = rq->perout.period.sec; + ts.tv_nsec = rq->perout.period.nsec; + if (timespec64_to_ns(&ts) < pps_info->min_npps_period) { + mlx5_core_err(mdev, "NPPS period is lower than minimal npps period %llu\n", + pps_info->min_npps_period); + return -EINVAL; + } + *period = perout_conf_real_time(rq->perout.period.sec, rq->perout.period.nsec); + + if (mlx5_perout_conf_out_pulse_duration(mdev, rq, out_pulse_duration_ns)) + return -EINVAL; + + *time_stamp = perout_conf_real_time(time->sec, time->nsec); + *field_select |= MLX5_MTPPS_FS_NPPS_PERIOD | + MLX5_MTPPS_FS_OUT_PULSE_DURATION_NS; + + return 0; +} + +static bool mlx5_perout_verify_flags(struct mlx5_core_dev *mdev, unsigned int flags) +{ + return ((!mlx5_npps_real_time_supported(mdev) && flags) || + (mlx5_npps_real_time_supported(mdev) && flags & ~PTP_PEROUT_DUTY_CYCLE)); +} + +static int mlx5_perout_configure(struct ptp_clock_info *ptp, + struct ptp_clock_request *rq, + int on) +{ + struct mlx5_clock *clock = + container_of(ptp, struct mlx5_clock, ptp_info); + struct mlx5_core_dev *mdev = + container_of(clock, struct mlx5_core_dev, clock); + bool rt_mode = mlx5_real_time_mode(mdev); + u32 in[MLX5_ST_SZ_DW(mtpps_reg)] = {0}; + u32 out_pulse_duration_ns = 0; + u32 field_select = 0; + u64 npps_period = 0; + u64 time_stamp = 0; + u8 pin_mode = 0; + u8 pattern = 0; + int pin = -1; + int err = 0; + + if (!MLX5_PPS_CAP(mdev)) + return -EOPNOTSUPP; + + /* Reject requests with unsupported flags */ + if (mlx5_perout_verify_flags(mdev, rq->perout.flags)) + return -EOPNOTSUPP; + + if (rq->perout.index >= clock->ptp_info.n_pins) + return -EINVAL; + + field_select = MLX5_MTPPS_FS_ENABLE; + pin = ptp_find_pin(clock->ptp, PTP_PF_PEROUT, rq->perout.index); + if (pin < 0) + return -EBUSY; + + if (on) { + bool rt_mode = mlx5_real_time_mode(mdev); + + pin_mode = MLX5_PIN_MODE_OUT; + pattern = MLX5_OUT_PATTERN_PERIODIC; + + if (rt_mode && rq->perout.start.sec > U32_MAX) + return -EINVAL; + + field_select |= MLX5_MTPPS_FS_PIN_MODE | + MLX5_MTPPS_FS_PATTERN | + MLX5_MTPPS_FS_TIME_STAMP; + + if (mlx5_npps_real_time_supported(mdev)) + err = perout_conf_npps_real_time(mdev, rq, &field_select, + &out_pulse_duration_ns, &npps_period, + &time_stamp); + else + err = perout_conf_1pps(mdev, rq, &time_stamp, rt_mode); + if (err) + return err; + } + + MLX5_SET(mtpps_reg, in, pin, pin); + MLX5_SET(mtpps_reg, in, pin_mode, pin_mode); + MLX5_SET(mtpps_reg, in, pattern, pattern); + MLX5_SET(mtpps_reg, in, enable, on); + MLX5_SET64(mtpps_reg, in, time_stamp, time_stamp); + MLX5_SET(mtpps_reg, in, field_select, field_select); + MLX5_SET64(mtpps_reg, in, npps_period, npps_period); + MLX5_SET(mtpps_reg, in, out_pulse_duration_ns, out_pulse_duration_ns); + err = mlx5_set_mtpps(mdev, in, sizeof(in)); + if (err) + return err; + + if (rt_mode) + return 0; + + return mlx5_set_mtppse(mdev, pin, 0, + MLX5_EVENT_MODE_REPETETIVE & on); +} + +static int mlx5_pps_configure(struct ptp_clock_info *ptp, + struct ptp_clock_request *rq, + int on) +{ + struct mlx5_clock *clock = + container_of(ptp, struct mlx5_clock, ptp_info); + + clock->pps_info.enabled = !!on; + return 0; +} + +static int mlx5_ptp_enable(struct ptp_clock_info *ptp, + struct ptp_clock_request *rq, + int on) +{ + switch (rq->type) { + case PTP_CLK_REQ_EXTTS: + return mlx5_extts_configure(ptp, rq, on); + case PTP_CLK_REQ_PEROUT: + return mlx5_perout_configure(ptp, rq, on); + case PTP_CLK_REQ_PPS: + return mlx5_pps_configure(ptp, rq, on); + default: + return -EOPNOTSUPP; + } + return 0; +} + +enum { + MLX5_MTPPS_REG_CAP_PIN_X_MODE_SUPPORT_PPS_IN = BIT(0), + MLX5_MTPPS_REG_CAP_PIN_X_MODE_SUPPORT_PPS_OUT = BIT(1), +}; + +static int mlx5_ptp_verify(struct ptp_clock_info *ptp, unsigned int pin, + enum ptp_pin_function func, unsigned int chan) +{ + struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock, + ptp_info); + + switch (func) { + case PTP_PF_NONE: + return 0; + case PTP_PF_EXTTS: + return !(clock->pps_info.pin_caps[pin] & + MLX5_MTPPS_REG_CAP_PIN_X_MODE_SUPPORT_PPS_IN); + case PTP_PF_PEROUT: + return !(clock->pps_info.pin_caps[pin] & + MLX5_MTPPS_REG_CAP_PIN_X_MODE_SUPPORT_PPS_OUT); + default: + return -EOPNOTSUPP; + } +} + +static const struct ptp_clock_info mlx5_ptp_clock_info = { + .owner = THIS_MODULE, + .name = "mlx5_ptp", + .max_adj = 50000000, + .n_alarm = 0, + .n_ext_ts = 0, + .n_per_out = 0, + .n_pins = 0, + .pps = 0, + .adjfreq = mlx5_ptp_adjfreq, + .adjtime = mlx5_ptp_adjtime, + .gettimex64 = mlx5_ptp_gettimex, + .settime64 = mlx5_ptp_settime, + .enable = NULL, + .verify = NULL, +}; + +static int mlx5_query_mtpps_pin_mode(struct mlx5_core_dev *mdev, u8 pin, + u32 *mtpps, u32 mtpps_size) +{ + u32 in[MLX5_ST_SZ_DW(mtpps_reg)] = {}; + + MLX5_SET(mtpps_reg, in, pin, pin); + + return mlx5_core_access_reg(mdev, in, sizeof(in), mtpps, + mtpps_size, MLX5_REG_MTPPS, 0, 0); +} + +static int mlx5_get_pps_pin_mode(struct mlx5_clock *clock, u8 pin) +{ + struct mlx5_core_dev *mdev = container_of(clock, struct mlx5_core_dev, clock); + + u32 out[MLX5_ST_SZ_DW(mtpps_reg)] = {}; + u8 mode; + int err; + + err = mlx5_query_mtpps_pin_mode(mdev, pin, out, sizeof(out)); + if (err || !MLX5_GET(mtpps_reg, out, enable)) + return PTP_PF_NONE; + + mode = MLX5_GET(mtpps_reg, out, pin_mode); + + if (mode == MLX5_PIN_MODE_IN) + return PTP_PF_EXTTS; + else if (mode == MLX5_PIN_MODE_OUT) + return PTP_PF_PEROUT; + + return PTP_PF_NONE; +} + +static void mlx5_init_pin_config(struct mlx5_clock *clock) +{ + int i; + + if (!clock->ptp_info.n_pins) + return; + + clock->ptp_info.pin_config = + kcalloc(clock->ptp_info.n_pins, + sizeof(*clock->ptp_info.pin_config), + GFP_KERNEL); + if (!clock->ptp_info.pin_config) + return; + clock->ptp_info.enable = mlx5_ptp_enable; + clock->ptp_info.verify = mlx5_ptp_verify; + clock->ptp_info.pps = 1; + + for (i = 0; i < clock->ptp_info.n_pins; i++) { + snprintf(clock->ptp_info.pin_config[i].name, + sizeof(clock->ptp_info.pin_config[i].name), + "mlx5_pps%d", i); + clock->ptp_info.pin_config[i].index = i; + clock->ptp_info.pin_config[i].func = mlx5_get_pps_pin_mode(clock, i); + clock->ptp_info.pin_config[i].chan = 0; + } +} + +static void mlx5_get_pps_caps(struct mlx5_core_dev *mdev) +{ + struct mlx5_clock *clock = &mdev->clock; + u32 out[MLX5_ST_SZ_DW(mtpps_reg)] = {0}; + + mlx5_query_mtpps(mdev, out, sizeof(out)); + + clock->ptp_info.n_pins = MLX5_GET(mtpps_reg, out, + cap_number_of_pps_pins); + clock->ptp_info.n_ext_ts = MLX5_GET(mtpps_reg, out, + cap_max_num_of_pps_in_pins); + clock->ptp_info.n_per_out = MLX5_GET(mtpps_reg, out, + cap_max_num_of_pps_out_pins); + + if (MLX5_CAP_MCAM_FEATURE(mdev, npps_period)) + clock->pps_info.min_npps_period = 1 << MLX5_GET(mtpps_reg, out, + cap_log_min_npps_period); + if (MLX5_CAP_MCAM_FEATURE(mdev, out_pulse_duration_ns)) + clock->pps_info.min_out_pulse_duration_ns = 1 << MLX5_GET(mtpps_reg, out, + cap_log_min_out_pulse_duration_ns); + + clock->pps_info.pin_caps[0] = MLX5_GET(mtpps_reg, out, cap_pin_0_mode); + clock->pps_info.pin_caps[1] = MLX5_GET(mtpps_reg, out, cap_pin_1_mode); + clock->pps_info.pin_caps[2] = MLX5_GET(mtpps_reg, out, cap_pin_2_mode); + clock->pps_info.pin_caps[3] = MLX5_GET(mtpps_reg, out, cap_pin_3_mode); + clock->pps_info.pin_caps[4] = MLX5_GET(mtpps_reg, out, cap_pin_4_mode); + clock->pps_info.pin_caps[5] = MLX5_GET(mtpps_reg, out, cap_pin_5_mode); + clock->pps_info.pin_caps[6] = MLX5_GET(mtpps_reg, out, cap_pin_6_mode); + clock->pps_info.pin_caps[7] = MLX5_GET(mtpps_reg, out, cap_pin_7_mode); +} + +static void ts_next_sec(struct timespec64 *ts) +{ + ts->tv_sec += 1; + ts->tv_nsec = 0; +} + +static u64 perout_conf_next_event_timer(struct mlx5_core_dev *mdev, + struct mlx5_clock *clock) +{ + struct timespec64 ts; + s64 target_ns; + + mlx5_ptp_gettimex(&clock->ptp_info, &ts, NULL); + ts_next_sec(&ts); + target_ns = timespec64_to_ns(&ts); + + return find_target_cycles(mdev, target_ns); +} + +static int mlx5_pps_event(struct notifier_block *nb, + unsigned long type, void *data) +{ + struct mlx5_clock *clock = mlx5_nb_cof(nb, struct mlx5_clock, pps_nb); + struct ptp_clock_event ptp_event; + struct mlx5_eqe *eqe = data; + int pin = eqe->data.pps.pin; + struct mlx5_core_dev *mdev; + unsigned long flags; + u64 ns; + + mdev = container_of(clock, struct mlx5_core_dev, clock); + + switch (clock->ptp_info.pin_config[pin].func) { + case PTP_PF_EXTTS: + ptp_event.index = pin; + ptp_event.timestamp = mlx5_real_time_mode(mdev) ? + mlx5_real_time_cyc2time(clock, + be64_to_cpu(eqe->data.pps.time_stamp)) : + mlx5_timecounter_cyc2time(clock, + be64_to_cpu(eqe->data.pps.time_stamp)); + if (clock->pps_info.enabled) { + ptp_event.type = PTP_CLOCK_PPSUSR; + ptp_event.pps_times.ts_real = + ns_to_timespec64(ptp_event.timestamp); + } else { + ptp_event.type = PTP_CLOCK_EXTTS; + } + /* TODOL clock->ptp can be NULL if ptp_clock_register fails */ + ptp_clock_event(clock->ptp, &ptp_event); + break; + case PTP_PF_PEROUT: + ns = perout_conf_next_event_timer(mdev, clock); + write_seqlock_irqsave(&clock->lock, flags); + clock->pps_info.start[pin] = ns; + write_sequnlock_irqrestore(&clock->lock, flags); + schedule_work(&clock->pps_info.out_work); + break; + default: + mlx5_core_err(mdev, " Unhandled clock PPS event, func %d\n", + clock->ptp_info.pin_config[pin].func); + } + + return NOTIFY_OK; +} + +static void mlx5_timecounter_init(struct mlx5_core_dev *mdev) +{ + struct mlx5_clock *clock = &mdev->clock; + struct mlx5_timer *timer = &clock->timer; + u32 dev_freq; + + dev_freq = MLX5_CAP_GEN(mdev, device_frequency_khz); + timer->cycles.read = read_internal_timer; + timer->cycles.shift = MLX5_CYCLES_SHIFT; + timer->cycles.mult = clocksource_khz2mult(dev_freq, + timer->cycles.shift); + timer->nominal_c_mult = timer->cycles.mult; + timer->cycles.mask = CLOCKSOURCE_MASK(41); + + timecounter_init(&timer->tc, &timer->cycles, + ktime_to_ns(ktime_get_real())); +} + +static void mlx5_init_overflow_period(struct mlx5_clock *clock) +{ + struct mlx5_core_dev *mdev = container_of(clock, struct mlx5_core_dev, clock); + struct mlx5_ib_clock_info *clock_info = mdev->clock_info; + struct mlx5_timer *timer = &clock->timer; + u64 overflow_cycles; + u64 frac = 0; + u64 ns; + + /* Calculate period in seconds to call the overflow watchdog - to make + * sure counter is checked at least twice every wrap around. + * The period is calculated as the minimum between max HW cycles count + * (The clock source mask) and max amount of cycles that can be + * multiplied by clock multiplier where the result doesn't exceed + * 64bits. + */ + overflow_cycles = div64_u64(~0ULL >> 1, timer->cycles.mult); + overflow_cycles = min(overflow_cycles, div_u64(timer->cycles.mask, 3)); + + ns = cyclecounter_cyc2ns(&timer->cycles, overflow_cycles, + frac, &frac); + do_div(ns, NSEC_PER_SEC / HZ); + timer->overflow_period = ns; + + INIT_DELAYED_WORK(&timer->overflow_work, mlx5_timestamp_overflow); + if (timer->overflow_period) + schedule_delayed_work(&timer->overflow_work, 0); + else + mlx5_core_warn(mdev, + "invalid overflow period, overflow_work is not scheduled\n"); + + if (clock_info) + clock_info->overflow_period = timer->overflow_period; +} + +static void mlx5_init_clock_info(struct mlx5_core_dev *mdev) +{ + struct mlx5_clock *clock = &mdev->clock; + struct mlx5_ib_clock_info *info; + struct mlx5_timer *timer; + + mdev->clock_info = (struct mlx5_ib_clock_info *)get_zeroed_page(GFP_KERNEL); + if (!mdev->clock_info) { + mlx5_core_warn(mdev, "Failed to allocate IB clock info page\n"); + return; + } + + info = mdev->clock_info; + timer = &clock->timer; + + info->nsec = timer->tc.nsec; + info->cycles = timer->tc.cycle_last; + info->mask = timer->cycles.mask; + info->mult = timer->nominal_c_mult; + info->shift = timer->cycles.shift; + info->frac = timer->tc.frac; +} + +static void mlx5_init_timer_clock(struct mlx5_core_dev *mdev) +{ + struct mlx5_clock *clock = &mdev->clock; + + mlx5_timecounter_init(mdev); + mlx5_init_clock_info(mdev); + mlx5_init_overflow_period(clock); + clock->ptp_info = mlx5_ptp_clock_info; + + if (mlx5_real_time_mode(mdev)) { + struct timespec64 ts; + + ktime_get_real_ts64(&ts); + mlx5_ptp_settime(&clock->ptp_info, &ts); + } +} + +static void mlx5_init_pps(struct mlx5_core_dev *mdev) +{ + struct mlx5_clock *clock = &mdev->clock; + + if (!MLX5_PPS_CAP(mdev)) + return; + + mlx5_get_pps_caps(mdev); + mlx5_init_pin_config(clock); +} + +void mlx5_init_clock(struct mlx5_core_dev *mdev) +{ + struct mlx5_clock *clock = &mdev->clock; + + if (!MLX5_CAP_GEN(mdev, device_frequency_khz)) { + mlx5_core_warn(mdev, "invalid device_frequency_khz, aborting HW clock init\n"); + return; + } + + seqlock_init(&clock->lock); + mlx5_init_timer_clock(mdev); + INIT_WORK(&clock->pps_info.out_work, mlx5_pps_out); + + /* Configure the PHC */ + clock->ptp_info = mlx5_ptp_clock_info; + + /* Initialize 1PPS data structures */ + mlx5_init_pps(mdev); + + clock->ptp = ptp_clock_register(&clock->ptp_info, + &mdev->pdev->dev); + if (IS_ERR(clock->ptp)) { + mlx5_core_warn(mdev, "ptp_clock_register failed %ld\n", + PTR_ERR(clock->ptp)); + clock->ptp = NULL; + } + + MLX5_NB_INIT(&clock->pps_nb, mlx5_pps_event, PPS_EVENT); + mlx5_eq_notifier_register(mdev, &clock->pps_nb); +} + +void mlx5_cleanup_clock(struct mlx5_core_dev *mdev) +{ + struct mlx5_clock *clock = &mdev->clock; + + if (!MLX5_CAP_GEN(mdev, device_frequency_khz)) + return; + + mlx5_eq_notifier_unregister(mdev, &clock->pps_nb); + if (clock->ptp) { + ptp_clock_unregister(clock->ptp); + clock->ptp = NULL; + } + + cancel_work_sync(&clock->pps_info.out_work); + cancel_delayed_work_sync(&clock->timer.overflow_work); + + if (mdev->clock_info) { + free_page((unsigned long)mdev->clock_info); + mdev->clock_info = NULL; + } + + kfree(clock->ptp_info.pin_config); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.h new file mode 100644 index 000000000..bd95b9f8d --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.h @@ -0,0 +1,121 @@ +/* + * Copyright (c) 2017, Mellanox Technologies, Ltd. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __LIB_CLOCK_H__ +#define __LIB_CLOCK_H__ + +static inline bool mlx5_is_real_time_rq(struct mlx5_core_dev *mdev) +{ + u8 rq_ts_format_cap = MLX5_CAP_GEN(mdev, rq_ts_format); + + return (rq_ts_format_cap == MLX5_TIMESTAMP_FORMAT_CAP_REAL_TIME || + rq_ts_format_cap == + MLX5_TIMESTAMP_FORMAT_CAP_FREE_RUNNING_AND_REAL_TIME); +} + +static inline bool mlx5_is_real_time_sq(struct mlx5_core_dev *mdev) +{ + u8 sq_ts_format_cap = MLX5_CAP_GEN(mdev, sq_ts_format); + + return (sq_ts_format_cap == MLX5_TIMESTAMP_FORMAT_CAP_REAL_TIME || + sq_ts_format_cap == + MLX5_TIMESTAMP_FORMAT_CAP_FREE_RUNNING_AND_REAL_TIME); +} + +typedef ktime_t (*cqe_ts_to_ns)(struct mlx5_clock *, u64); + +#if IS_ENABLED(CONFIG_PTP_1588_CLOCK) +void mlx5_init_clock(struct mlx5_core_dev *mdev); +void mlx5_cleanup_clock(struct mlx5_core_dev *mdev); + +static inline int mlx5_clock_get_ptp_index(struct mlx5_core_dev *mdev) +{ + return mdev->clock.ptp ? ptp_clock_index(mdev->clock.ptp) : -1; +} + +static inline ktime_t mlx5_timecounter_cyc2time(struct mlx5_clock *clock, + u64 timestamp) +{ + struct mlx5_timer *timer = &clock->timer; + unsigned int seq; + u64 nsec; + + do { + seq = read_seqbegin(&clock->lock); + nsec = timecounter_cyc2time(&timer->tc, timestamp); + } while (read_seqretry(&clock->lock, seq)); + + return ns_to_ktime(nsec); +} + +#define REAL_TIME_TO_NS(hi, low) (((u64)hi) * NSEC_PER_SEC + ((u64)low)) + +static inline ktime_t mlx5_real_time_cyc2time(struct mlx5_clock *clock, + u64 timestamp) +{ + u64 time = REAL_TIME_TO_NS(timestamp >> 32, timestamp & 0xFFFFFFFF); + + return ns_to_ktime(time); +} +#else +static inline void mlx5_init_clock(struct mlx5_core_dev *mdev) {} +static inline void mlx5_cleanup_clock(struct mlx5_core_dev *mdev) {} +static inline int mlx5_clock_get_ptp_index(struct mlx5_core_dev *mdev) +{ + return -1; +} + +static inline ktime_t mlx5_timecounter_cyc2time(struct mlx5_clock *clock, + u64 timestamp) +{ + return 0; +} + +static inline ktime_t mlx5_real_time_cyc2time(struct mlx5_clock *clock, + u64 timestamp) +{ + return 0; +} +#endif + +static inline cqe_ts_to_ns mlx5_rq_ts_translator(struct mlx5_core_dev *mdev) +{ + return mlx5_is_real_time_rq(mdev) ? mlx5_real_time_cyc2time : + mlx5_timecounter_cyc2time; +} + +static inline cqe_ts_to_ns mlx5_sq_ts_translator(struct mlx5_core_dev *mdev) +{ + return mlx5_is_real_time_sq(mdev) ? mlx5_real_time_cyc2time : + mlx5_timecounter_cyc2time; +} +#endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/crypto.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/crypto.c new file mode 100644 index 000000000..e995f8378 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/crypto.c @@ -0,0 +1,73 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2019 Mellanox Technologies. + +#include "mlx5_core.h" +#include "lib/mlx5.h" + +int mlx5_create_encryption_key(struct mlx5_core_dev *mdev, + void *key, u32 sz_bytes, + u32 key_type, u32 *p_key_id) +{ + u32 in[MLX5_ST_SZ_DW(create_encryption_key_in)] = {}; + u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)]; + u32 sz_bits = sz_bytes * BITS_PER_BYTE; + u8 general_obj_key_size; + u64 general_obj_types; + void *obj, *key_p; + int err; + + obj = MLX5_ADDR_OF(create_encryption_key_in, in, encryption_key_object); + key_p = MLX5_ADDR_OF(encryption_key_obj, obj, key); + + general_obj_types = MLX5_CAP_GEN_64(mdev, general_obj_types); + if (!(general_obj_types & + MLX5_HCA_CAP_GENERAL_OBJECT_TYPES_ENCRYPTION_KEY)) + return -EINVAL; + + switch (sz_bits) { + case 128: + general_obj_key_size = + MLX5_GENERAL_OBJECT_TYPE_ENCRYPTION_KEY_KEY_SIZE_128; + key_p += sz_bytes; + break; + case 256: + general_obj_key_size = + MLX5_GENERAL_OBJECT_TYPE_ENCRYPTION_KEY_KEY_SIZE_256; + break; + default: + return -EINVAL; + } + + memcpy(key_p, key, sz_bytes); + + MLX5_SET(encryption_key_obj, obj, key_size, general_obj_key_size); + MLX5_SET(encryption_key_obj, obj, key_type, key_type); + MLX5_SET(general_obj_in_cmd_hdr, in, opcode, + MLX5_CMD_OP_CREATE_GENERAL_OBJECT); + MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, + MLX5_GENERAL_OBJECT_TYPES_ENCRYPTION_KEY); + MLX5_SET(encryption_key_obj, obj, pd, mdev->mlx5e_res.hw_objs.pdn); + + err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); + if (!err) + *p_key_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id); + + /* avoid leaking key on the stack */ + memzero_explicit(in, sizeof(in)); + + return err; +} + +void mlx5_destroy_encryption_key(struct mlx5_core_dev *mdev, u32 key_id) +{ + u32 in[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)] = {}; + u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)]; + + MLX5_SET(general_obj_in_cmd_hdr, in, opcode, + MLX5_CMD_OP_DESTROY_GENERAL_OBJECT); + MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, + MLX5_GENERAL_OBJECT_TYPES_ENCRYPTION_KEY); + MLX5_SET(general_obj_in_cmd_hdr, in, obj_id, key_id); + + mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c new file mode 100644 index 000000000..b7d779d08 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c @@ -0,0 +1,293 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2018 Mellanox Technologies */ + +#include <linux/mlx5/vport.h> +#include "lib/devcom.h" +#include "mlx5_core.h" + +static LIST_HEAD(devcom_list); + +#define devcom_for_each_component(priv, comp, iter) \ + for (iter = 0; \ + comp = &(priv)->components[iter], iter < MLX5_DEVCOM_NUM_COMPONENTS; \ + iter++) + +struct mlx5_devcom_component { + struct { + void __rcu *data; + } device[MLX5_DEVCOM_PORTS_SUPPORTED]; + + mlx5_devcom_event_handler_t handler; + struct rw_semaphore sem; + bool paired; +}; + +struct mlx5_devcom_list { + struct list_head list; + + struct mlx5_devcom_component components[MLX5_DEVCOM_NUM_COMPONENTS]; + struct mlx5_core_dev *devs[MLX5_DEVCOM_PORTS_SUPPORTED]; +}; + +struct mlx5_devcom { + struct mlx5_devcom_list *priv; + int idx; +}; + +static struct mlx5_devcom_list *mlx5_devcom_list_alloc(void) +{ + struct mlx5_devcom_component *comp; + struct mlx5_devcom_list *priv; + int i; + + priv = kzalloc(sizeof(*priv), GFP_KERNEL); + if (!priv) + return NULL; + + devcom_for_each_component(priv, comp, i) + init_rwsem(&comp->sem); + + return priv; +} + +static struct mlx5_devcom *mlx5_devcom_alloc(struct mlx5_devcom_list *priv, + u8 idx) +{ + struct mlx5_devcom *devcom; + + devcom = kzalloc(sizeof(*devcom), GFP_KERNEL); + if (!devcom) + return NULL; + + devcom->priv = priv; + devcom->idx = idx; + return devcom; +} + +/* Must be called with intf_mutex held */ +struct mlx5_devcom *mlx5_devcom_register_device(struct mlx5_core_dev *dev) +{ + struct mlx5_devcom_list *priv = NULL, *iter; + struct mlx5_devcom *devcom = NULL; + bool new_priv = false; + u64 sguid0, sguid1; + int idx, i; + + if (!mlx5_core_is_pf(dev)) + return NULL; + if (MLX5_CAP_GEN(dev, num_lag_ports) != MLX5_DEVCOM_PORTS_SUPPORTED) + return NULL; + + mlx5_dev_list_lock(); + sguid0 = mlx5_query_nic_system_image_guid(dev); + list_for_each_entry(iter, &devcom_list, list) { + struct mlx5_core_dev *tmp_dev = NULL; + + idx = -1; + for (i = 0; i < MLX5_DEVCOM_PORTS_SUPPORTED; i++) { + if (iter->devs[i]) + tmp_dev = iter->devs[i]; + else + idx = i; + } + + if (idx == -1) + continue; + + sguid1 = mlx5_query_nic_system_image_guid(tmp_dev); + if (sguid0 != sguid1) + continue; + + priv = iter; + break; + } + + if (!priv) { + priv = mlx5_devcom_list_alloc(); + if (!priv) { + devcom = ERR_PTR(-ENOMEM); + goto out; + } + + idx = 0; + new_priv = true; + } + + priv->devs[idx] = dev; + devcom = mlx5_devcom_alloc(priv, idx); + if (!devcom) { + if (new_priv) + kfree(priv); + devcom = ERR_PTR(-ENOMEM); + goto out; + } + + if (new_priv) + list_add(&priv->list, &devcom_list); +out: + mlx5_dev_list_unlock(); + return devcom; +} + +/* Must be called with intf_mutex held */ +void mlx5_devcom_unregister_device(struct mlx5_devcom *devcom) +{ + struct mlx5_devcom_list *priv; + int i; + + if (IS_ERR_OR_NULL(devcom)) + return; + + mlx5_dev_list_lock(); + priv = devcom->priv; + priv->devs[devcom->idx] = NULL; + + kfree(devcom); + + for (i = 0; i < MLX5_DEVCOM_PORTS_SUPPORTED; i++) + if (priv->devs[i]) + break; + + if (i != MLX5_DEVCOM_PORTS_SUPPORTED) + goto out; + + list_del(&priv->list); + kfree(priv); +out: + mlx5_dev_list_unlock(); +} + +void mlx5_devcom_register_component(struct mlx5_devcom *devcom, + enum mlx5_devcom_components id, + mlx5_devcom_event_handler_t handler, + void *data) +{ + struct mlx5_devcom_component *comp; + + if (IS_ERR_OR_NULL(devcom)) + return; + + WARN_ON(!data); + + comp = &devcom->priv->components[id]; + down_write(&comp->sem); + comp->handler = handler; + rcu_assign_pointer(comp->device[devcom->idx].data, data); + up_write(&comp->sem); +} + +void mlx5_devcom_unregister_component(struct mlx5_devcom *devcom, + enum mlx5_devcom_components id) +{ + struct mlx5_devcom_component *comp; + + if (IS_ERR_OR_NULL(devcom)) + return; + + comp = &devcom->priv->components[id]; + down_write(&comp->sem); + RCU_INIT_POINTER(comp->device[devcom->idx].data, NULL); + up_write(&comp->sem); + synchronize_rcu(); +} + +int mlx5_devcom_send_event(struct mlx5_devcom *devcom, + enum mlx5_devcom_components id, + int event, + void *event_data) +{ + struct mlx5_devcom_component *comp; + int err = -ENODEV, i; + + if (IS_ERR_OR_NULL(devcom)) + return err; + + comp = &devcom->priv->components[id]; + down_write(&comp->sem); + for (i = 0; i < MLX5_DEVCOM_PORTS_SUPPORTED; i++) { + void *data = rcu_dereference_protected(comp->device[i].data, + lockdep_is_held(&comp->sem)); + + if (i != devcom->idx && data) { + err = comp->handler(event, data, event_data); + break; + } + } + + up_write(&comp->sem); + return err; +} + +void mlx5_devcom_set_paired(struct mlx5_devcom *devcom, + enum mlx5_devcom_components id, + bool paired) +{ + struct mlx5_devcom_component *comp; + + comp = &devcom->priv->components[id]; + WARN_ON(!rwsem_is_locked(&comp->sem)); + + WRITE_ONCE(comp->paired, paired); +} + +bool mlx5_devcom_is_paired(struct mlx5_devcom *devcom, + enum mlx5_devcom_components id) +{ + if (IS_ERR_OR_NULL(devcom)) + return false; + + return READ_ONCE(devcom->priv->components[id].paired); +} + +void *mlx5_devcom_get_peer_data(struct mlx5_devcom *devcom, + enum mlx5_devcom_components id) +{ + struct mlx5_devcom_component *comp; + int i; + + if (IS_ERR_OR_NULL(devcom)) + return NULL; + + comp = &devcom->priv->components[id]; + down_read(&comp->sem); + if (!READ_ONCE(comp->paired)) { + up_read(&comp->sem); + return NULL; + } + + for (i = 0; i < MLX5_DEVCOM_PORTS_SUPPORTED; i++) + if (i != devcom->idx) + break; + + return rcu_dereference_protected(comp->device[i].data, lockdep_is_held(&comp->sem)); +} + +void *mlx5_devcom_get_peer_data_rcu(struct mlx5_devcom *devcom, enum mlx5_devcom_components id) +{ + struct mlx5_devcom_component *comp; + int i; + + if (IS_ERR_OR_NULL(devcom)) + return NULL; + + for (i = 0; i < MLX5_DEVCOM_PORTS_SUPPORTED; i++) + if (i != devcom->idx) + break; + + comp = &devcom->priv->components[id]; + /* This can change concurrently, however 'data' pointer will remain + * valid for the duration of RCU read section. + */ + if (!READ_ONCE(comp->paired)) + return NULL; + + return rcu_dereference(comp->device[i].data); +} + +void mlx5_devcom_release_peer_data(struct mlx5_devcom *devcom, + enum mlx5_devcom_components id) +{ + struct mlx5_devcom_component *comp = &devcom->priv->components[id]; + + up_read(&comp->sem); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h new file mode 100644 index 000000000..9a496f472 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h @@ -0,0 +1,49 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2018 Mellanox Technologies */ + +#ifndef __LIB_MLX5_DEVCOM_H__ +#define __LIB_MLX5_DEVCOM_H__ + +#include <linux/mlx5/driver.h> + +#define MLX5_DEVCOM_PORTS_SUPPORTED 2 + +enum mlx5_devcom_components { + MLX5_DEVCOM_ESW_OFFLOADS, + + MLX5_DEVCOM_NUM_COMPONENTS, +}; + +typedef int (*mlx5_devcom_event_handler_t)(int event, + void *my_data, + void *event_data); + +struct mlx5_devcom *mlx5_devcom_register_device(struct mlx5_core_dev *dev); +void mlx5_devcom_unregister_device(struct mlx5_devcom *devcom); + +void mlx5_devcom_register_component(struct mlx5_devcom *devcom, + enum mlx5_devcom_components id, + mlx5_devcom_event_handler_t handler, + void *data); +void mlx5_devcom_unregister_component(struct mlx5_devcom *devcom, + enum mlx5_devcom_components id); + +int mlx5_devcom_send_event(struct mlx5_devcom *devcom, + enum mlx5_devcom_components id, + int event, + void *event_data); + +void mlx5_devcom_set_paired(struct mlx5_devcom *devcom, + enum mlx5_devcom_components id, + bool paired); +bool mlx5_devcom_is_paired(struct mlx5_devcom *devcom, + enum mlx5_devcom_components id); + +void *mlx5_devcom_get_peer_data(struct mlx5_devcom *devcom, + enum mlx5_devcom_components id); +void *mlx5_devcom_get_peer_data_rcu(struct mlx5_devcom *devcom, enum mlx5_devcom_components id); +void mlx5_devcom_release_peer_data(struct mlx5_devcom *devcom, + enum mlx5_devcom_components id); + +#endif + diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/dm.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/dm.c new file mode 100644 index 000000000..9482e51ac --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/dm.c @@ -0,0 +1,267 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2019 Mellanox Technologies + +#include <linux/mlx5/driver.h> +#include <linux/mlx5/device.h> + +#include "mlx5_core.h" +#include "lib/mlx5.h" + +struct mlx5_dm { + /* protect access to icm bitmask */ + spinlock_t lock; + unsigned long *steering_sw_icm_alloc_blocks; + unsigned long *header_modify_sw_icm_alloc_blocks; + unsigned long *header_modify_pattern_sw_icm_alloc_blocks; +}; + +struct mlx5_dm *mlx5_dm_create(struct mlx5_core_dev *dev) +{ + u64 header_modify_pattern_icm_blocks = 0; + u64 header_modify_icm_blocks = 0; + u64 steering_icm_blocks = 0; + struct mlx5_dm *dm; + bool support_v2; + + if (!(MLX5_CAP_GEN_64(dev, general_obj_types) & MLX5_GENERAL_OBJ_TYPES_CAP_SW_ICM)) + return NULL; + + dm = kzalloc(sizeof(*dm), GFP_KERNEL); + if (!dm) + return ERR_PTR(-ENOMEM); + + spin_lock_init(&dm->lock); + + if (MLX5_CAP64_DEV_MEM(dev, steering_sw_icm_start_address)) { + steering_icm_blocks = + BIT(MLX5_CAP_DEV_MEM(dev, log_steering_sw_icm_size) - + MLX5_LOG_SW_ICM_BLOCK_SIZE(dev)); + + dm->steering_sw_icm_alloc_blocks = + bitmap_zalloc(steering_icm_blocks, GFP_KERNEL); + if (!dm->steering_sw_icm_alloc_blocks) + goto err_steering; + } + + if (MLX5_CAP64_DEV_MEM(dev, header_modify_sw_icm_start_address)) { + header_modify_icm_blocks = + BIT(MLX5_CAP_DEV_MEM(dev, log_header_modify_sw_icm_size) - + MLX5_LOG_SW_ICM_BLOCK_SIZE(dev)); + + dm->header_modify_sw_icm_alloc_blocks = + bitmap_zalloc(header_modify_icm_blocks, GFP_KERNEL); + if (!dm->header_modify_sw_icm_alloc_blocks) + goto err_modify_hdr; + } + + support_v2 = MLX5_CAP_FLOWTABLE_NIC_RX(dev, sw_owner_v2) && + MLX5_CAP_FLOWTABLE_NIC_TX(dev, sw_owner_v2) && + MLX5_CAP64_DEV_MEM(dev, header_modify_pattern_sw_icm_start_address); + + if (support_v2) { + header_modify_pattern_icm_blocks = + BIT(MLX5_CAP_DEV_MEM(dev, log_header_modify_pattern_sw_icm_size) - + MLX5_LOG_SW_ICM_BLOCK_SIZE(dev)); + + dm->header_modify_pattern_sw_icm_alloc_blocks = + bitmap_zalloc(header_modify_pattern_icm_blocks, GFP_KERNEL); + if (!dm->header_modify_pattern_sw_icm_alloc_blocks) + goto err_pattern; + } + + return dm; + +err_pattern: + bitmap_free(dm->header_modify_sw_icm_alloc_blocks); + +err_modify_hdr: + bitmap_free(dm->steering_sw_icm_alloc_blocks); + +err_steering: + kfree(dm); + + return ERR_PTR(-ENOMEM); +} + +void mlx5_dm_cleanup(struct mlx5_core_dev *dev) +{ + struct mlx5_dm *dm = dev->dm; + + if (!dev->dm) + return; + + if (dm->steering_sw_icm_alloc_blocks) { + WARN_ON(!bitmap_empty(dm->steering_sw_icm_alloc_blocks, + BIT(MLX5_CAP_DEV_MEM(dev, log_steering_sw_icm_size) - + MLX5_LOG_SW_ICM_BLOCK_SIZE(dev)))); + bitmap_free(dm->steering_sw_icm_alloc_blocks); + } + + if (dm->header_modify_sw_icm_alloc_blocks) { + WARN_ON(!bitmap_empty(dm->header_modify_sw_icm_alloc_blocks, + BIT(MLX5_CAP_DEV_MEM(dev, + log_header_modify_sw_icm_size) - + MLX5_LOG_SW_ICM_BLOCK_SIZE(dev)))); + bitmap_free(dm->header_modify_sw_icm_alloc_blocks); + } + + if (dm->header_modify_pattern_sw_icm_alloc_blocks) { + WARN_ON(!bitmap_empty(dm->header_modify_pattern_sw_icm_alloc_blocks, + BIT(MLX5_CAP_DEV_MEM(dev, + log_header_modify_pattern_sw_icm_size) - + MLX5_LOG_SW_ICM_BLOCK_SIZE(dev)))); + bitmap_free(dm->header_modify_pattern_sw_icm_alloc_blocks); + } + + kfree(dm); +} + +int mlx5_dm_sw_icm_alloc(struct mlx5_core_dev *dev, enum mlx5_sw_icm_type type, + u64 length, u32 log_alignment, u16 uid, + phys_addr_t *addr, u32 *obj_id) +{ + u32 num_blocks = DIV_ROUND_UP_ULL(length, MLX5_SW_ICM_BLOCK_SIZE(dev)); + u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {}; + u32 in[MLX5_ST_SZ_DW(create_sw_icm_in)] = {}; + struct mlx5_dm *dm = dev->dm; + unsigned long *block_map; + u64 icm_start_addr; + u32 log_icm_size; + u64 align_mask; + u32 max_blocks; + u64 block_idx; + void *sw_icm; + int ret; + + if (!dev->dm) + return -EOPNOTSUPP; + + if (!length || (length & (length - 1)) || + length & (MLX5_SW_ICM_BLOCK_SIZE(dev) - 1)) + return -EINVAL; + + MLX5_SET(general_obj_in_cmd_hdr, in, opcode, + MLX5_CMD_OP_CREATE_GENERAL_OBJECT); + MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, MLX5_OBJ_TYPE_SW_ICM); + MLX5_SET(general_obj_in_cmd_hdr, in, uid, uid); + + switch (type) { + case MLX5_SW_ICM_TYPE_STEERING: + icm_start_addr = MLX5_CAP64_DEV_MEM(dev, steering_sw_icm_start_address); + log_icm_size = MLX5_CAP_DEV_MEM(dev, log_steering_sw_icm_size); + block_map = dm->steering_sw_icm_alloc_blocks; + break; + case MLX5_SW_ICM_TYPE_HEADER_MODIFY: + icm_start_addr = MLX5_CAP64_DEV_MEM(dev, header_modify_sw_icm_start_address); + log_icm_size = MLX5_CAP_DEV_MEM(dev, + log_header_modify_sw_icm_size); + block_map = dm->header_modify_sw_icm_alloc_blocks; + break; + case MLX5_SW_ICM_TYPE_HEADER_MODIFY_PATTERN: + icm_start_addr = MLX5_CAP64_DEV_MEM(dev, + header_modify_pattern_sw_icm_start_address); + log_icm_size = MLX5_CAP_DEV_MEM(dev, + log_header_modify_pattern_sw_icm_size); + block_map = dm->header_modify_pattern_sw_icm_alloc_blocks; + break; + default: + return -EINVAL; + } + + if (!block_map) + return -EOPNOTSUPP; + + max_blocks = BIT(log_icm_size - MLX5_LOG_SW_ICM_BLOCK_SIZE(dev)); + + if (log_alignment < MLX5_LOG_SW_ICM_BLOCK_SIZE(dev)) + log_alignment = MLX5_LOG_SW_ICM_BLOCK_SIZE(dev); + align_mask = BIT(log_alignment - MLX5_LOG_SW_ICM_BLOCK_SIZE(dev)) - 1; + + spin_lock(&dm->lock); + block_idx = bitmap_find_next_zero_area(block_map, max_blocks, 0, + num_blocks, align_mask); + + if (block_idx < max_blocks) + bitmap_set(block_map, + block_idx, num_blocks); + + spin_unlock(&dm->lock); + + if (block_idx >= max_blocks) + return -ENOMEM; + + sw_icm = MLX5_ADDR_OF(create_sw_icm_in, in, sw_icm); + icm_start_addr += block_idx << MLX5_LOG_SW_ICM_BLOCK_SIZE(dev); + MLX5_SET64(sw_icm, sw_icm, sw_icm_start_addr, + icm_start_addr); + MLX5_SET(sw_icm, sw_icm, log_sw_icm_size, ilog2(length)); + + ret = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); + if (ret) { + spin_lock(&dm->lock); + bitmap_clear(block_map, + block_idx, num_blocks); + spin_unlock(&dm->lock); + + return ret; + } + + *addr = icm_start_addr; + *obj_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id); + + return 0; +} +EXPORT_SYMBOL_GPL(mlx5_dm_sw_icm_alloc); + +int mlx5_dm_sw_icm_dealloc(struct mlx5_core_dev *dev, enum mlx5_sw_icm_type type, + u64 length, u16 uid, phys_addr_t addr, u32 obj_id) +{ + u32 num_blocks = DIV_ROUND_UP_ULL(length, MLX5_SW_ICM_BLOCK_SIZE(dev)); + u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {}; + u32 in[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)] = {}; + struct mlx5_dm *dm = dev->dm; + unsigned long *block_map; + u64 icm_start_addr; + u64 start_idx; + int err; + + if (!dev->dm) + return -EOPNOTSUPP; + + switch (type) { + case MLX5_SW_ICM_TYPE_STEERING: + icm_start_addr = MLX5_CAP64_DEV_MEM(dev, steering_sw_icm_start_address); + block_map = dm->steering_sw_icm_alloc_blocks; + break; + case MLX5_SW_ICM_TYPE_HEADER_MODIFY: + icm_start_addr = MLX5_CAP64_DEV_MEM(dev, header_modify_sw_icm_start_address); + block_map = dm->header_modify_sw_icm_alloc_blocks; + break; + case MLX5_SW_ICM_TYPE_HEADER_MODIFY_PATTERN: + icm_start_addr = MLX5_CAP64_DEV_MEM(dev, + header_modify_pattern_sw_icm_start_address); + block_map = dm->header_modify_pattern_sw_icm_alloc_blocks; + break; + default: + return -EINVAL; + } + + MLX5_SET(general_obj_in_cmd_hdr, in, opcode, + MLX5_CMD_OP_DESTROY_GENERAL_OBJECT); + MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, MLX5_OBJ_TYPE_SW_ICM); + MLX5_SET(general_obj_in_cmd_hdr, in, obj_id, obj_id); + MLX5_SET(general_obj_in_cmd_hdr, in, uid, uid); + + err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); + if (err) + return err; + + start_idx = (addr - icm_start_addr) >> MLX5_LOG_SW_ICM_BLOCK_SIZE(dev); + spin_lock(&dm->lock); + bitmap_clear(block_map, + start_idx, num_blocks); + spin_unlock(&dm->lock); + + return 0; +} +EXPORT_SYMBOL_GPL(mlx5_dm_sw_icm_dealloc); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h new file mode 100644 index 000000000..d3d628b86 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h @@ -0,0 +1,109 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2018-2021, Mellanox Technologies inc. All rights reserved. */ + +#ifndef __LIB_MLX5_EQ_H__ +#define __LIB_MLX5_EQ_H__ +#include <linux/mlx5/driver.h> +#include <linux/mlx5/eq.h> +#include <linux/mlx5/cq.h> + +#define MLX5_EQE_SIZE (sizeof(struct mlx5_eqe)) + +struct mlx5_eq_tasklet { + struct list_head list; + struct list_head process_list; + struct tasklet_struct task; + spinlock_t lock; /* lock completion tasklet list */ +}; + +struct mlx5_cq_table { + spinlock_t lock; /* protect radix tree */ + struct radix_tree_root tree; +}; + +struct mlx5_eq { + struct mlx5_frag_buf_ctrl fbc; + struct mlx5_frag_buf frag_buf; + struct mlx5_core_dev *dev; + struct mlx5_cq_table cq_table; + __be32 __iomem *doorbell; + u32 cons_index; + unsigned int vecidx; + unsigned int irqn; + u8 eqn; + struct mlx5_rsc_debug *dbg; + struct mlx5_irq *irq; +}; + +struct mlx5_eq_async { + struct mlx5_eq core; + struct notifier_block irq_nb; + spinlock_t lock; /* To avoid irq EQ handle races with resiliency flows */ +}; + +struct mlx5_eq_comp { + struct mlx5_eq core; + struct notifier_block irq_nb; + struct mlx5_eq_tasklet tasklet_ctx; + struct list_head list; +}; + +static inline u32 eq_get_size(struct mlx5_eq *eq) +{ + return eq->fbc.sz_m1 + 1; +} + +static inline struct mlx5_eqe *get_eqe(struct mlx5_eq *eq, u32 entry) +{ + return mlx5_frag_buf_get_wqe(&eq->fbc, entry); +} + +static inline struct mlx5_eqe *next_eqe_sw(struct mlx5_eq *eq) +{ + struct mlx5_eqe *eqe = get_eqe(eq, eq->cons_index & eq->fbc.sz_m1); + + return (eqe->owner ^ (eq->cons_index >> eq->fbc.log_sz)) & 1 ? NULL : eqe; +} + +static inline void eq_update_ci(struct mlx5_eq *eq, int arm) +{ + __be32 __iomem *addr = eq->doorbell + (arm ? 0 : 2); + u32 val = (eq->cons_index & 0xffffff) | (eq->eqn << 24); + + __raw_writel((__force u32)cpu_to_be32(val), addr); + /* We still want ordering, just not swabbing, so add a barrier */ + mb(); +} + +int mlx5_eq_table_init(struct mlx5_core_dev *dev); +void mlx5_eq_table_cleanup(struct mlx5_core_dev *dev); +int mlx5_eq_table_create(struct mlx5_core_dev *dev); +void mlx5_eq_table_destroy(struct mlx5_core_dev *dev); + +int mlx5_eq_add_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq); +void mlx5_eq_del_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq); +struct mlx5_eq_comp *mlx5_eqn2comp_eq(struct mlx5_core_dev *dev, int eqn); +struct mlx5_eq *mlx5_get_async_eq(struct mlx5_core_dev *dev); +void mlx5_cq_tasklet_cb(struct tasklet_struct *t); +struct cpumask *mlx5_eq_comp_cpumask(struct mlx5_core_dev *dev, int ix); + +u32 mlx5_eq_poll_irq_disabled(struct mlx5_eq_comp *eq); +void mlx5_cmd_eq_recover(struct mlx5_core_dev *dev); +void mlx5_eq_synchronize_async_irq(struct mlx5_core_dev *dev); +void mlx5_eq_synchronize_cmd_irq(struct mlx5_core_dev *dev); + +int mlx5_debug_eq_add(struct mlx5_core_dev *dev, struct mlx5_eq *eq); +void mlx5_debug_eq_remove(struct mlx5_core_dev *dev, struct mlx5_eq *eq); +void mlx5_eq_debugfs_init(struct mlx5_core_dev *dev); +void mlx5_eq_debugfs_cleanup(struct mlx5_core_dev *dev); + +/* This function should only be called after mlx5_cmd_force_teardown_hca */ +void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev); + +#ifdef CONFIG_RFS_ACCEL +struct cpu_rmap *mlx5_eq_table_get_rmap(struct mlx5_core_dev *dev); +#endif + +int mlx5_vector2irqn(struct mlx5_core_dev *dev, int vector, unsigned int *irqn); + +#endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c new file mode 100644 index 000000000..df58cba37 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c @@ -0,0 +1,810 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2020 Mellanox Technologies. + +#include <linux/mlx5/driver.h> +#include <linux/mlx5/mlx5_ifc.h> +#include <linux/mlx5/fs.h> + +#include "lib/fs_chains.h" +#include "fs_ft_pool.h" +#include "en/mapping.h" +#include "fs_core.h" +#include "en_tc.h" + +#define chains_lock(chains) ((chains)->lock) +#define chains_ht(chains) ((chains)->chains_ht) +#define prios_ht(chains) ((chains)->prios_ht) +#define tc_default_ft(chains) ((chains)->tc_default_ft) +#define tc_end_ft(chains) ((chains)->tc_end_ft) +#define ns_to_chains_fs_prio(ns) ((ns) == MLX5_FLOW_NAMESPACE_FDB ? \ + FDB_TC_OFFLOAD : MLX5E_TC_PRIO) +#define FT_TBL_SZ (64 * 1024) + +struct mlx5_fs_chains { + struct mlx5_core_dev *dev; + + struct rhashtable chains_ht; + struct rhashtable prios_ht; + /* Protects above chains_ht and prios_ht */ + struct mutex lock; + + struct mlx5_flow_table *tc_default_ft; + struct mlx5_flow_table *tc_end_ft; + struct mapping_ctx *chains_mapping; + + enum mlx5_flow_namespace_type ns; + u32 group_num; + u32 flags; +}; + +struct fs_chain { + struct rhash_head node; + + u32 chain; + + int ref; + int id; + + struct mlx5_fs_chains *chains; + struct list_head prios_list; + struct mlx5_flow_handle *restore_rule; + struct mlx5_modify_hdr *miss_modify_hdr; +}; + +struct prio_key { + u32 chain; + u32 prio; + u32 level; +}; + +struct prio { + struct rhash_head node; + struct list_head list; + + struct prio_key key; + + int ref; + + struct fs_chain *chain; + struct mlx5_flow_table *ft; + struct mlx5_flow_table *next_ft; + struct mlx5_flow_group *miss_group; + struct mlx5_flow_handle *miss_rule; +}; + +static const struct rhashtable_params chain_params = { + .head_offset = offsetof(struct fs_chain, node), + .key_offset = offsetof(struct fs_chain, chain), + .key_len = sizeof_field(struct fs_chain, chain), + .automatic_shrinking = true, +}; + +static const struct rhashtable_params prio_params = { + .head_offset = offsetof(struct prio, node), + .key_offset = offsetof(struct prio, key), + .key_len = sizeof_field(struct prio, key), + .automatic_shrinking = true, +}; + +bool mlx5_chains_prios_supported(struct mlx5_fs_chains *chains) +{ + return chains->flags & MLX5_CHAINS_AND_PRIOS_SUPPORTED; +} + +bool mlx5_chains_ignore_flow_level_supported(struct mlx5_fs_chains *chains) +{ + return chains->flags & MLX5_CHAINS_IGNORE_FLOW_LEVEL_SUPPORTED; +} + +bool mlx5_chains_backwards_supported(struct mlx5_fs_chains *chains) +{ + return mlx5_chains_prios_supported(chains) && + mlx5_chains_ignore_flow_level_supported(chains); +} + +u32 mlx5_chains_get_chain_range(struct mlx5_fs_chains *chains) +{ + if (!mlx5_chains_prios_supported(chains)) + return 1; + + if (mlx5_chains_ignore_flow_level_supported(chains)) + return UINT_MAX - 1; + + /* We should get here only for eswitch case */ + return FDB_TC_MAX_CHAIN; +} + +u32 mlx5_chains_get_nf_ft_chain(struct mlx5_fs_chains *chains) +{ + return mlx5_chains_get_chain_range(chains) + 1; +} + +u32 mlx5_chains_get_prio_range(struct mlx5_fs_chains *chains) +{ + if (mlx5_chains_ignore_flow_level_supported(chains)) + return UINT_MAX; + + if (!chains->dev->priv.eswitch || + chains->dev->priv.eswitch->mode != MLX5_ESWITCH_OFFLOADS) + return 1; + + /* We should get here only for eswitch case */ + return FDB_TC_MAX_PRIO; +} + +static unsigned int mlx5_chains_get_level_range(struct mlx5_fs_chains *chains) +{ + if (mlx5_chains_ignore_flow_level_supported(chains)) + return UINT_MAX; + + /* Same value for FDB and NIC RX tables */ + return FDB_TC_LEVELS_PER_PRIO; +} + +void +mlx5_chains_set_end_ft(struct mlx5_fs_chains *chains, + struct mlx5_flow_table *ft) +{ + tc_end_ft(chains) = ft; +} + +static struct mlx5_flow_table * +mlx5_chains_create_table(struct mlx5_fs_chains *chains, + u32 chain, u32 prio, u32 level) +{ + struct mlx5_flow_table_attr ft_attr = {}; + struct mlx5_flow_namespace *ns; + struct mlx5_flow_table *ft; + int sz; + + if (chains->flags & MLX5_CHAINS_FT_TUNNEL_SUPPORTED) + ft_attr.flags |= (MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT | + MLX5_FLOW_TABLE_TUNNEL_EN_DECAP); + + sz = (chain == mlx5_chains_get_nf_ft_chain(chains)) ? FT_TBL_SZ : POOL_NEXT_SIZE; + ft_attr.max_fte = sz; + + /* We use tc_default_ft(chains) as the table's next_ft till + * ignore_flow_level is allowed on FT creation and not just for FTEs. + * Instead caller should add an explicit miss rule if needed. + */ + ft_attr.next_ft = tc_default_ft(chains); + + /* The root table(chain 0, prio 1, level 0) is required to be + * connected to the previous fs_core managed prio. + * We always create it, as a managed table, in order to align with + * fs_core logic. + */ + if (!mlx5_chains_ignore_flow_level_supported(chains) || + (chain == 0 && prio == 1 && level == 0)) { + ft_attr.level = level; + ft_attr.prio = prio - 1; + ns = (chains->ns == MLX5_FLOW_NAMESPACE_FDB) ? + mlx5_get_fdb_sub_ns(chains->dev, chain) : + mlx5_get_flow_namespace(chains->dev, chains->ns); + } else { + ft_attr.flags |= MLX5_FLOW_TABLE_UNMANAGED; + ft_attr.prio = ns_to_chains_fs_prio(chains->ns); + /* Firmware doesn't allow us to create another level 0 table, + * so we create all unmanaged tables as level 1. + * + * To connect them, we use explicit miss rules with + * ignore_flow_level. Caller is responsible to create + * these rules (if needed). + */ + ft_attr.level = 1; + ns = mlx5_get_flow_namespace(chains->dev, chains->ns); + } + + ft_attr.autogroup.num_reserved_entries = 2; + ft_attr.autogroup.max_num_groups = chains->group_num; + ft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr); + if (IS_ERR(ft)) { + mlx5_core_warn(chains->dev, "Failed to create chains table err %d (chain: %d, prio: %d, level: %d, size: %d)\n", + (int)PTR_ERR(ft), chain, prio, level, sz); + return ft; + } + + return ft; +} + +static int +create_chain_restore(struct fs_chain *chain) +{ + struct mlx5_eswitch *esw = chain->chains->dev->priv.eswitch; + u8 modact[MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto)] = {}; + struct mlx5_fs_chains *chains = chain->chains; + enum mlx5e_tc_attr_to_reg chain_to_reg; + struct mlx5_modify_hdr *mod_hdr; + u32 index; + int err; + + if (chain->chain == mlx5_chains_get_nf_ft_chain(chains) || + !mlx5_chains_prios_supported(chains)) + return 0; + + err = mlx5_chains_get_chain_mapping(chains, chain->chain, &index); + if (err) + return err; + if (index == MLX5_FS_DEFAULT_FLOW_TAG) { + /* we got the special default flow tag id, so we won't know + * if we actually marked the packet with the restore rule + * we create. + * + * This case isn't possible with MLX5_FS_DEFAULT_FLOW_TAG = 0. + */ + err = mlx5_chains_get_chain_mapping(chains, chain->chain, &index); + mapping_remove(chains->chains_mapping, MLX5_FS_DEFAULT_FLOW_TAG); + if (err) + return err; + } + + chain->id = index; + + if (chains->ns == MLX5_FLOW_NAMESPACE_FDB) { + chain_to_reg = CHAIN_TO_REG; + chain->restore_rule = esw_add_restore_rule(esw, chain->id); + if (IS_ERR(chain->restore_rule)) { + err = PTR_ERR(chain->restore_rule); + goto err_rule; + } + } else if (chains->ns == MLX5_FLOW_NAMESPACE_KERNEL) { + /* For NIC RX we don't need a restore rule + * since we write the metadata to reg_b + * that is passed to SW directly. + */ + chain_to_reg = NIC_CHAIN_TO_REG; + } else { + err = -EINVAL; + goto err_rule; + } + + MLX5_SET(set_action_in, modact, action_type, MLX5_ACTION_TYPE_SET); + MLX5_SET(set_action_in, modact, field, + mlx5e_tc_attr_to_reg_mappings[chain_to_reg].mfield); + MLX5_SET(set_action_in, modact, offset, + mlx5e_tc_attr_to_reg_mappings[chain_to_reg].moffset); + MLX5_SET(set_action_in, modact, length, + mlx5e_tc_attr_to_reg_mappings[chain_to_reg].mlen == 32 ? + 0 : mlx5e_tc_attr_to_reg_mappings[chain_to_reg].mlen); + MLX5_SET(set_action_in, modact, data, chain->id); + mod_hdr = mlx5_modify_header_alloc(chains->dev, chains->ns, + 1, modact); + if (IS_ERR(mod_hdr)) { + err = PTR_ERR(mod_hdr); + goto err_mod_hdr; + } + chain->miss_modify_hdr = mod_hdr; + + return 0; + +err_mod_hdr: + if (!IS_ERR_OR_NULL(chain->restore_rule)) + mlx5_del_flow_rules(chain->restore_rule); +err_rule: + /* Datapath can't find this mapping, so we can safely remove it */ + mapping_remove(chains->chains_mapping, chain->id); + return err; +} + +static void destroy_chain_restore(struct fs_chain *chain) +{ + struct mlx5_fs_chains *chains = chain->chains; + + if (!chain->miss_modify_hdr) + return; + + if (chain->restore_rule) + mlx5_del_flow_rules(chain->restore_rule); + + mlx5_modify_header_dealloc(chains->dev, chain->miss_modify_hdr); + mapping_remove(chains->chains_mapping, chain->id); +} + +static struct fs_chain * +mlx5_chains_create_chain(struct mlx5_fs_chains *chains, u32 chain) +{ + struct fs_chain *chain_s = NULL; + int err; + + chain_s = kvzalloc(sizeof(*chain_s), GFP_KERNEL); + if (!chain_s) + return ERR_PTR(-ENOMEM); + + chain_s->chains = chains; + chain_s->chain = chain; + INIT_LIST_HEAD(&chain_s->prios_list); + + err = create_chain_restore(chain_s); + if (err) + goto err_restore; + + err = rhashtable_insert_fast(&chains_ht(chains), &chain_s->node, + chain_params); + if (err) + goto err_insert; + + return chain_s; + +err_insert: + destroy_chain_restore(chain_s); +err_restore: + kvfree(chain_s); + return ERR_PTR(err); +} + +static void +mlx5_chains_destroy_chain(struct fs_chain *chain) +{ + struct mlx5_fs_chains *chains = chain->chains; + + rhashtable_remove_fast(&chains_ht(chains), &chain->node, + chain_params); + + destroy_chain_restore(chain); + kvfree(chain); +} + +static struct fs_chain * +mlx5_chains_get_chain(struct mlx5_fs_chains *chains, u32 chain) +{ + struct fs_chain *chain_s; + + chain_s = rhashtable_lookup_fast(&chains_ht(chains), &chain, + chain_params); + if (!chain_s) { + chain_s = mlx5_chains_create_chain(chains, chain); + if (IS_ERR(chain_s)) + return chain_s; + } + + chain_s->ref++; + + return chain_s; +} + +static struct mlx5_flow_handle * +mlx5_chains_add_miss_rule(struct fs_chain *chain, + struct mlx5_flow_table *ft, + struct mlx5_flow_table *next_ft) +{ + struct mlx5_fs_chains *chains = chain->chains; + struct mlx5_flow_destination dest = {}; + struct mlx5_flow_act act = {}; + + act.flags = FLOW_ACT_NO_APPEND; + if (mlx5_chains_ignore_flow_level_supported(chain->chains)) + act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL; + + act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dest.ft = next_ft; + + if (next_ft == tc_end_ft(chains) && + chain->chain != mlx5_chains_get_nf_ft_chain(chains) && + mlx5_chains_prios_supported(chains)) { + act.modify_hdr = chain->miss_modify_hdr; + act.action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; + } + + return mlx5_add_flow_rules(ft, NULL, &act, &dest, 1); +} + +static int +mlx5_chains_update_prio_prevs(struct prio *prio, + struct mlx5_flow_table *next_ft) +{ + struct mlx5_flow_handle *miss_rules[FDB_TC_LEVELS_PER_PRIO + 1] = {}; + struct fs_chain *chain = prio->chain; + struct prio *pos; + int n = 0, err; + + if (prio->key.level) + return 0; + + /* Iterate in reverse order until reaching the level 0 rule of + * the previous priority, adding all the miss rules first, so we can + * revert them if any of them fails. + */ + pos = prio; + list_for_each_entry_continue_reverse(pos, + &chain->prios_list, + list) { + miss_rules[n] = mlx5_chains_add_miss_rule(chain, + pos->ft, + next_ft); + if (IS_ERR(miss_rules[n])) { + err = PTR_ERR(miss_rules[n]); + goto err_prev_rule; + } + + n++; + if (!pos->key.level) + break; + } + + /* Success, delete old miss rules, and update the pointers. */ + n = 0; + pos = prio; + list_for_each_entry_continue_reverse(pos, + &chain->prios_list, + list) { + mlx5_del_flow_rules(pos->miss_rule); + + pos->miss_rule = miss_rules[n]; + pos->next_ft = next_ft; + + n++; + if (!pos->key.level) + break; + } + + return 0; + +err_prev_rule: + while (--n >= 0) + mlx5_del_flow_rules(miss_rules[n]); + + return err; +} + +static void +mlx5_chains_put_chain(struct fs_chain *chain) +{ + if (--chain->ref == 0) + mlx5_chains_destroy_chain(chain); +} + +static struct prio * +mlx5_chains_create_prio(struct mlx5_fs_chains *chains, + u32 chain, u32 prio, u32 level) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5_flow_handle *miss_rule; + struct mlx5_flow_group *miss_group; + struct mlx5_flow_table *next_ft; + struct mlx5_flow_table *ft; + struct fs_chain *chain_s; + struct list_head *pos; + struct prio *prio_s; + u32 *flow_group_in; + int err; + + chain_s = mlx5_chains_get_chain(chains, chain); + if (IS_ERR(chain_s)) + return ERR_CAST(chain_s); + + prio_s = kvzalloc(sizeof(*prio_s), GFP_KERNEL); + flow_group_in = kvzalloc(inlen, GFP_KERNEL); + if (!prio_s || !flow_group_in) { + err = -ENOMEM; + goto err_alloc; + } + + /* Chain's prio list is sorted by prio and level. + * And all levels of some prio point to the next prio's level 0. + * Example list (prio, level): + * (3,0)->(3,1)->(5,0)->(5,1)->(6,1)->(7,0) + * In hardware, we will we have the following pointers: + * (3,0) -> (5,0) -> (7,0) -> Slow path + * (3,1) -> (5,0) + * (5,1) -> (7,0) + * (6,1) -> (7,0) + */ + + /* Default miss for each chain: */ + next_ft = (chain == mlx5_chains_get_nf_ft_chain(chains)) ? + tc_default_ft(chains) : + tc_end_ft(chains); + list_for_each(pos, &chain_s->prios_list) { + struct prio *p = list_entry(pos, struct prio, list); + + /* exit on first pos that is larger */ + if (prio < p->key.prio || (prio == p->key.prio && + level < p->key.level)) { + /* Get next level 0 table */ + next_ft = p->key.level == 0 ? p->ft : p->next_ft; + break; + } + } + + ft = mlx5_chains_create_table(chains, chain, prio, level); + if (IS_ERR(ft)) { + err = PTR_ERR(ft); + goto err_create; + } + + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, + ft->max_fte - 2); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, + ft->max_fte - 1); + miss_group = mlx5_create_flow_group(ft, flow_group_in); + if (IS_ERR(miss_group)) { + err = PTR_ERR(miss_group); + goto err_group; + } + + /* Add miss rule to next_ft */ + miss_rule = mlx5_chains_add_miss_rule(chain_s, ft, next_ft); + if (IS_ERR(miss_rule)) { + err = PTR_ERR(miss_rule); + goto err_miss_rule; + } + + prio_s->miss_group = miss_group; + prio_s->miss_rule = miss_rule; + prio_s->next_ft = next_ft; + prio_s->chain = chain_s; + prio_s->key.chain = chain; + prio_s->key.prio = prio; + prio_s->key.level = level; + prio_s->ft = ft; + + err = rhashtable_insert_fast(&prios_ht(chains), &prio_s->node, + prio_params); + if (err) + goto err_insert; + + list_add(&prio_s->list, pos->prev); + + /* Table is ready, connect it */ + err = mlx5_chains_update_prio_prevs(prio_s, ft); + if (err) + goto err_update; + + kvfree(flow_group_in); + return prio_s; + +err_update: + list_del(&prio_s->list); + rhashtable_remove_fast(&prios_ht(chains), &prio_s->node, + prio_params); +err_insert: + mlx5_del_flow_rules(miss_rule); +err_miss_rule: + mlx5_destroy_flow_group(miss_group); +err_group: + mlx5_destroy_flow_table(ft); +err_create: +err_alloc: + kvfree(prio_s); + kvfree(flow_group_in); + mlx5_chains_put_chain(chain_s); + return ERR_PTR(err); +} + +static void +mlx5_chains_destroy_prio(struct mlx5_fs_chains *chains, + struct prio *prio) +{ + struct fs_chain *chain = prio->chain; + + WARN_ON(mlx5_chains_update_prio_prevs(prio, + prio->next_ft)); + + list_del(&prio->list); + rhashtable_remove_fast(&prios_ht(chains), &prio->node, + prio_params); + mlx5_del_flow_rules(prio->miss_rule); + mlx5_destroy_flow_group(prio->miss_group); + mlx5_destroy_flow_table(prio->ft); + mlx5_chains_put_chain(chain); + kvfree(prio); +} + +struct mlx5_flow_table * +mlx5_chains_get_table(struct mlx5_fs_chains *chains, u32 chain, u32 prio, + u32 level) +{ + struct mlx5_flow_table *prev_fts; + struct prio *prio_s; + struct prio_key key; + int l = 0; + + if ((chain > mlx5_chains_get_chain_range(chains) && + chain != mlx5_chains_get_nf_ft_chain(chains)) || + prio > mlx5_chains_get_prio_range(chains) || + level > mlx5_chains_get_level_range(chains)) + return ERR_PTR(-EOPNOTSUPP); + + /* create earlier levels for correct fs_core lookup when + * connecting tables. + */ + for (l = 0; l < level; l++) { + prev_fts = mlx5_chains_get_table(chains, chain, prio, l); + if (IS_ERR(prev_fts)) { + prio_s = ERR_CAST(prev_fts); + goto err_get_prevs; + } + } + + key.chain = chain; + key.prio = prio; + key.level = level; + + mutex_lock(&chains_lock(chains)); + prio_s = rhashtable_lookup_fast(&prios_ht(chains), &key, + prio_params); + if (!prio_s) { + prio_s = mlx5_chains_create_prio(chains, chain, + prio, level); + if (IS_ERR(prio_s)) + goto err_create_prio; + } + + ++prio_s->ref; + mutex_unlock(&chains_lock(chains)); + + return prio_s->ft; + +err_create_prio: + mutex_unlock(&chains_lock(chains)); +err_get_prevs: + while (--l >= 0) + mlx5_chains_put_table(chains, chain, prio, l); + return ERR_CAST(prio_s); +} + +void +mlx5_chains_put_table(struct mlx5_fs_chains *chains, u32 chain, u32 prio, + u32 level) +{ + struct prio *prio_s; + struct prio_key key; + + key.chain = chain; + key.prio = prio; + key.level = level; + + mutex_lock(&chains_lock(chains)); + prio_s = rhashtable_lookup_fast(&prios_ht(chains), &key, + prio_params); + if (!prio_s) + goto err_get_prio; + + if (--prio_s->ref == 0) + mlx5_chains_destroy_prio(chains, prio_s); + mutex_unlock(&chains_lock(chains)); + + while (level-- > 0) + mlx5_chains_put_table(chains, chain, prio, level); + + return; + +err_get_prio: + mutex_unlock(&chains_lock(chains)); + WARN_ONCE(1, + "Couldn't find table: (chain: %d prio: %d level: %d)", + chain, prio, level); +} + +struct mlx5_flow_table * +mlx5_chains_get_tc_end_ft(struct mlx5_fs_chains *chains) +{ + return tc_end_ft(chains); +} + +struct mlx5_flow_table * +mlx5_chains_create_global_table(struct mlx5_fs_chains *chains) +{ + u32 chain, prio, level; + int err; + + if (!mlx5_chains_ignore_flow_level_supported(chains)) { + err = -EOPNOTSUPP; + + mlx5_core_warn(chains->dev, + "Couldn't create global flow table, ignore_flow_level not supported."); + goto err_ignore; + } + + chain = mlx5_chains_get_chain_range(chains), + prio = mlx5_chains_get_prio_range(chains); + level = mlx5_chains_get_level_range(chains); + + return mlx5_chains_create_table(chains, chain, prio, level); + +err_ignore: + return ERR_PTR(err); +} + +void +mlx5_chains_destroy_global_table(struct mlx5_fs_chains *chains, + struct mlx5_flow_table *ft) +{ + mlx5_destroy_flow_table(ft); +} + +static struct mlx5_fs_chains * +mlx5_chains_init(struct mlx5_core_dev *dev, struct mlx5_chains_attr *attr) +{ + struct mlx5_fs_chains *chains_priv; + u32 max_flow_counter; + int err; + + chains_priv = kzalloc(sizeof(*chains_priv), GFP_KERNEL); + if (!chains_priv) + return ERR_PTR(-ENOMEM); + + max_flow_counter = (MLX5_CAP_GEN(dev, max_flow_counter_31_16) << 16) | + MLX5_CAP_GEN(dev, max_flow_counter_15_0); + + mlx5_core_dbg(dev, + "Init flow table chains, max counters(%d), groups(%d), max flow table size(%d)\n", + max_flow_counter, attr->max_grp_num, attr->max_ft_sz); + + chains_priv->dev = dev; + chains_priv->flags = attr->flags; + chains_priv->ns = attr->ns; + chains_priv->group_num = attr->max_grp_num; + chains_priv->chains_mapping = attr->mapping; + tc_default_ft(chains_priv) = tc_end_ft(chains_priv) = attr->default_ft; + + mlx5_core_info(dev, "Supported tc offload range - chains: %u, prios: %u\n", + mlx5_chains_get_chain_range(chains_priv), + mlx5_chains_get_prio_range(chains_priv)); + + err = rhashtable_init(&chains_ht(chains_priv), &chain_params); + if (err) + goto init_chains_ht_err; + + err = rhashtable_init(&prios_ht(chains_priv), &prio_params); + if (err) + goto init_prios_ht_err; + + mutex_init(&chains_lock(chains_priv)); + + return chains_priv; + +init_prios_ht_err: + rhashtable_destroy(&chains_ht(chains_priv)); +init_chains_ht_err: + kfree(chains_priv); + return ERR_PTR(err); +} + +static void +mlx5_chains_cleanup(struct mlx5_fs_chains *chains) +{ + mutex_destroy(&chains_lock(chains)); + rhashtable_destroy(&prios_ht(chains)); + rhashtable_destroy(&chains_ht(chains)); + + kfree(chains); +} + +struct mlx5_fs_chains * +mlx5_chains_create(struct mlx5_core_dev *dev, struct mlx5_chains_attr *attr) +{ + struct mlx5_fs_chains *chains; + + chains = mlx5_chains_init(dev, attr); + + return chains; +} + +void +mlx5_chains_destroy(struct mlx5_fs_chains *chains) +{ + mlx5_chains_cleanup(chains); +} + +int +mlx5_chains_get_chain_mapping(struct mlx5_fs_chains *chains, u32 chain, + u32 *chain_mapping) +{ + struct mapping_ctx *ctx = chains->chains_mapping; + struct mlx5_mapped_obj mapped_obj = {}; + + mapped_obj.type = MLX5_MAPPED_OBJ_CHAIN; + mapped_obj.chain = chain; + return mapping_add(ctx, &mapped_obj, chain_mapping); +} + +int +mlx5_chains_put_chain_mapping(struct mlx5_fs_chains *chains, u32 chain_mapping) +{ + struct mapping_ctx *ctx = chains->chains_mapping; + + return mapping_remove(ctx, chain_mapping); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.h new file mode 100644 index 000000000..d50bdb226 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.h @@ -0,0 +1,96 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2020 Mellanox Technologies. */ + +#ifndef __ML5_ESW_CHAINS_H__ +#define __ML5_ESW_CHAINS_H__ + +#include <linux/mlx5/fs.h> + +struct mlx5_fs_chains; +struct mlx5_mapped_obj; + +enum mlx5_chains_flags { + MLX5_CHAINS_AND_PRIOS_SUPPORTED = BIT(0), + MLX5_CHAINS_IGNORE_FLOW_LEVEL_SUPPORTED = BIT(1), + MLX5_CHAINS_FT_TUNNEL_SUPPORTED = BIT(2), +}; + +struct mlx5_chains_attr { + enum mlx5_flow_namespace_type ns; + u32 flags; + u32 max_ft_sz; + u32 max_grp_num; + struct mlx5_flow_table *default_ft; + struct mapping_ctx *mapping; +}; + +#if IS_ENABLED(CONFIG_MLX5_CLS_ACT) + +bool +mlx5_chains_prios_supported(struct mlx5_fs_chains *chains); +bool mlx5_chains_ignore_flow_level_supported(struct mlx5_fs_chains *chains); +bool +mlx5_chains_backwards_supported(struct mlx5_fs_chains *chains); +u32 +mlx5_chains_get_prio_range(struct mlx5_fs_chains *chains); +u32 +mlx5_chains_get_chain_range(struct mlx5_fs_chains *chains); +u32 +mlx5_chains_get_nf_ft_chain(struct mlx5_fs_chains *chains); + +struct mlx5_flow_table * +mlx5_chains_get_table(struct mlx5_fs_chains *chains, u32 chain, u32 prio, + u32 level); +void +mlx5_chains_put_table(struct mlx5_fs_chains *chains, u32 chain, u32 prio, + u32 level); + +struct mlx5_flow_table * +mlx5_chains_get_tc_end_ft(struct mlx5_fs_chains *chains); + +struct mlx5_flow_table * +mlx5_chains_create_global_table(struct mlx5_fs_chains *chains); +void +mlx5_chains_destroy_global_table(struct mlx5_fs_chains *chains, + struct mlx5_flow_table *ft); + +int +mlx5_chains_get_chain_mapping(struct mlx5_fs_chains *chains, u32 chain, + u32 *chain_mapping); +int +mlx5_chains_put_chain_mapping(struct mlx5_fs_chains *chains, + u32 chain_mapping); + +struct mlx5_fs_chains * +mlx5_chains_create(struct mlx5_core_dev *dev, struct mlx5_chains_attr *attr); +void mlx5_chains_destroy(struct mlx5_fs_chains *chains); + +void +mlx5_chains_set_end_ft(struct mlx5_fs_chains *chains, + struct mlx5_flow_table *ft); + +#else /* CONFIG_MLX5_CLS_ACT */ + +static inline bool +mlx5_chains_ignore_flow_level_supported(struct mlx5_fs_chains *chains) +{ return false; } + +static inline struct mlx5_flow_table * +mlx5_chains_get_table(struct mlx5_fs_chains *chains, u32 chain, u32 prio, + u32 level) { return ERR_PTR(-EOPNOTSUPP); } +static inline void +mlx5_chains_put_table(struct mlx5_fs_chains *chains, u32 chain, u32 prio, + u32 level) {}; + +static inline struct mlx5_flow_table * +mlx5_chains_get_tc_end_ft(struct mlx5_fs_chains *chains) { return ERR_PTR(-EOPNOTSUPP); } + +static inline struct mlx5_fs_chains * +mlx5_chains_create(struct mlx5_core_dev *dev, struct mlx5_chains_attr *attr) +{ return NULL; } +static inline void +mlx5_chains_destroy(struct mlx5_fs_chains *chains) {}; + +#endif /* CONFIG_MLX5_CLS_ACT */ + +#endif /* __ML5_ESW_CHAINS_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.c new file mode 100644 index 000000000..b78f2ba25 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.c @@ -0,0 +1,608 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. + +#include <linux/ip.h> +#include <linux/ipv6.h> +#include <linux/tcp.h> +#include <linux/mlx5/fs.h> +#include <linux/mlx5/driver.h> +#include "mlx5_core.h" +#include "lib/fs_ttc.h" + +#define MLX5_TTC_NUM_GROUPS 3 +#define MLX5_TTC_GROUP1_SIZE (BIT(3) + MLX5_NUM_TUNNEL_TT) +#define MLX5_TTC_GROUP2_SIZE BIT(1) +#define MLX5_TTC_GROUP3_SIZE BIT(0) +#define MLX5_TTC_TABLE_SIZE (MLX5_TTC_GROUP1_SIZE +\ + MLX5_TTC_GROUP2_SIZE +\ + MLX5_TTC_GROUP3_SIZE) + +#define MLX5_INNER_TTC_NUM_GROUPS 3 +#define MLX5_INNER_TTC_GROUP1_SIZE BIT(3) +#define MLX5_INNER_TTC_GROUP2_SIZE BIT(1) +#define MLX5_INNER_TTC_GROUP3_SIZE BIT(0) +#define MLX5_INNER_TTC_TABLE_SIZE (MLX5_INNER_TTC_GROUP1_SIZE +\ + MLX5_INNER_TTC_GROUP2_SIZE +\ + MLX5_INNER_TTC_GROUP3_SIZE) + +/* L3/L4 traffic type classifier */ +struct mlx5_ttc_table { + int num_groups; + struct mlx5_flow_table *t; + struct mlx5_flow_group **g; + struct mlx5_ttc_rule rules[MLX5_NUM_TT]; + struct mlx5_flow_handle *tunnel_rules[MLX5_NUM_TUNNEL_TT]; +}; + +struct mlx5_flow_table *mlx5_get_ttc_flow_table(struct mlx5_ttc_table *ttc) +{ + return ttc->t; +} + +static void mlx5_cleanup_ttc_rules(struct mlx5_ttc_table *ttc) +{ + int i; + + for (i = 0; i < MLX5_NUM_TT; i++) { + if (!IS_ERR_OR_NULL(ttc->rules[i].rule)) { + mlx5_del_flow_rules(ttc->rules[i].rule); + ttc->rules[i].rule = NULL; + } + } + + for (i = 0; i < MLX5_NUM_TUNNEL_TT; i++) { + if (!IS_ERR_OR_NULL(ttc->tunnel_rules[i])) { + mlx5_del_flow_rules(ttc->tunnel_rules[i]); + ttc->tunnel_rules[i] = NULL; + } + } +} + +struct mlx5_etype_proto { + u16 etype; + u8 proto; +}; + +static struct mlx5_etype_proto ttc_rules[] = { + [MLX5_TT_IPV4_TCP] = { + .etype = ETH_P_IP, + .proto = IPPROTO_TCP, + }, + [MLX5_TT_IPV6_TCP] = { + .etype = ETH_P_IPV6, + .proto = IPPROTO_TCP, + }, + [MLX5_TT_IPV4_UDP] = { + .etype = ETH_P_IP, + .proto = IPPROTO_UDP, + }, + [MLX5_TT_IPV6_UDP] = { + .etype = ETH_P_IPV6, + .proto = IPPROTO_UDP, + }, + [MLX5_TT_IPV4_IPSEC_AH] = { + .etype = ETH_P_IP, + .proto = IPPROTO_AH, + }, + [MLX5_TT_IPV6_IPSEC_AH] = { + .etype = ETH_P_IPV6, + .proto = IPPROTO_AH, + }, + [MLX5_TT_IPV4_IPSEC_ESP] = { + .etype = ETH_P_IP, + .proto = IPPROTO_ESP, + }, + [MLX5_TT_IPV6_IPSEC_ESP] = { + .etype = ETH_P_IPV6, + .proto = IPPROTO_ESP, + }, + [MLX5_TT_IPV4] = { + .etype = ETH_P_IP, + .proto = 0, + }, + [MLX5_TT_IPV6] = { + .etype = ETH_P_IPV6, + .proto = 0, + }, + [MLX5_TT_ANY] = { + .etype = 0, + .proto = 0, + }, +}; + +static struct mlx5_etype_proto ttc_tunnel_rules[] = { + [MLX5_TT_IPV4_GRE] = { + .etype = ETH_P_IP, + .proto = IPPROTO_GRE, + }, + [MLX5_TT_IPV6_GRE] = { + .etype = ETH_P_IPV6, + .proto = IPPROTO_GRE, + }, + [MLX5_TT_IPV4_IPIP] = { + .etype = ETH_P_IP, + .proto = IPPROTO_IPIP, + }, + [MLX5_TT_IPV6_IPIP] = { + .etype = ETH_P_IPV6, + .proto = IPPROTO_IPIP, + }, + [MLX5_TT_IPV4_IPV6] = { + .etype = ETH_P_IP, + .proto = IPPROTO_IPV6, + }, + [MLX5_TT_IPV6_IPV6] = { + .etype = ETH_P_IPV6, + .proto = IPPROTO_IPV6, + }, + +}; + +u8 mlx5_get_proto_by_tunnel_type(enum mlx5_tunnel_types tt) +{ + return ttc_tunnel_rules[tt].proto; +} + +static bool mlx5_tunnel_proto_supported_rx(struct mlx5_core_dev *mdev, + u8 proto_type) +{ + switch (proto_type) { + case IPPROTO_GRE: + return MLX5_CAP_ETH(mdev, tunnel_stateless_gre); + case IPPROTO_IPIP: + case IPPROTO_IPV6: + return (MLX5_CAP_ETH(mdev, tunnel_stateless_ip_over_ip) || + MLX5_CAP_ETH(mdev, tunnel_stateless_ip_over_ip_rx)); + default: + return false; + } +} + +static bool mlx5_tunnel_any_rx_proto_supported(struct mlx5_core_dev *mdev) +{ + int tt; + + for (tt = 0; tt < MLX5_NUM_TUNNEL_TT; tt++) { + if (mlx5_tunnel_proto_supported_rx(mdev, + ttc_tunnel_rules[tt].proto)) + return true; + } + return false; +} + +bool mlx5_tunnel_inner_ft_supported(struct mlx5_core_dev *mdev) +{ + return (mlx5_tunnel_any_rx_proto_supported(mdev) && + MLX5_CAP_FLOWTABLE_NIC_RX(mdev, + ft_field_support.inner_ip_version)); +} + +static u8 mlx5_etype_to_ipv(u16 ethertype) +{ + if (ethertype == ETH_P_IP) + return 4; + + if (ethertype == ETH_P_IPV6) + return 6; + + return 0; +} + +static struct mlx5_flow_handle * +mlx5_generate_ttc_rule(struct mlx5_core_dev *dev, struct mlx5_flow_table *ft, + struct mlx5_flow_destination *dest, u16 etype, u8 proto) +{ + int match_ipv_outer = + MLX5_CAP_FLOWTABLE_NIC_RX(dev, + ft_field_support.outer_ip_version); + MLX5_DECLARE_FLOW_ACT(flow_act); + struct mlx5_flow_handle *rule; + struct mlx5_flow_spec *spec; + int err = 0; + u8 ipv; + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) + return ERR_PTR(-ENOMEM); + + if (proto) { + spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ip_protocol); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_protocol, proto); + } + + ipv = mlx5_etype_to_ipv(etype); + if (match_ipv_outer && ipv) { + spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ip_version); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_version, ipv); + } else if (etype) { + spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ethertype); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.ethertype, etype); + } + + rule = mlx5_add_flow_rules(ft, spec, &flow_act, dest, 1); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + mlx5_core_err(dev, "%s: add rule failed\n", __func__); + } + + kvfree(spec); + return err ? ERR_PTR(err) : rule; +} + +static int mlx5_generate_ttc_table_rules(struct mlx5_core_dev *dev, + struct ttc_params *params, + struct mlx5_ttc_table *ttc) +{ + struct mlx5_flow_handle **trules; + struct mlx5_ttc_rule *rules; + struct mlx5_flow_table *ft; + int tt; + int err; + + ft = ttc->t; + rules = ttc->rules; + for (tt = 0; tt < MLX5_NUM_TT; tt++) { + struct mlx5_ttc_rule *rule = &rules[tt]; + + if (test_bit(tt, params->ignore_dests)) + continue; + rule->rule = mlx5_generate_ttc_rule(dev, ft, ¶ms->dests[tt], + ttc_rules[tt].etype, + ttc_rules[tt].proto); + if (IS_ERR(rule->rule)) { + err = PTR_ERR(rule->rule); + rule->rule = NULL; + goto del_rules; + } + rule->default_dest = params->dests[tt]; + } + + if (!params->inner_ttc || !mlx5_tunnel_inner_ft_supported(dev)) + return 0; + + trules = ttc->tunnel_rules; + for (tt = 0; tt < MLX5_NUM_TUNNEL_TT; tt++) { + if (!mlx5_tunnel_proto_supported_rx(dev, + ttc_tunnel_rules[tt].proto)) + continue; + if (test_bit(tt, params->ignore_tunnel_dests)) + continue; + trules[tt] = mlx5_generate_ttc_rule(dev, ft, + ¶ms->tunnel_dests[tt], + ttc_tunnel_rules[tt].etype, + ttc_tunnel_rules[tt].proto); + if (IS_ERR(trules[tt])) { + err = PTR_ERR(trules[tt]); + trules[tt] = NULL; + goto del_rules; + } + } + + return 0; + +del_rules: + mlx5_cleanup_ttc_rules(ttc); + return err; +} + +static int mlx5_create_ttc_table_groups(struct mlx5_ttc_table *ttc, + bool use_ipv) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + int ix = 0; + u32 *in; + int err; + u8 *mc; + + ttc->g = kcalloc(MLX5_TTC_NUM_GROUPS, sizeof(*ttc->g), GFP_KERNEL); + if (!ttc->g) + return -ENOMEM; + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) { + kfree(ttc->g); + ttc->g = NULL; + return -ENOMEM; + } + + /* L4 Group */ + mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria); + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ip_protocol); + if (use_ipv) + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ip_version); + else + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ethertype); + MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5_TTC_GROUP1_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ttc->g[ttc->num_groups] = mlx5_create_flow_group(ttc->t, in); + if (IS_ERR(ttc->g[ttc->num_groups])) + goto err; + ttc->num_groups++; + + /* L3 Group */ + MLX5_SET(fte_match_param, mc, outer_headers.ip_protocol, 0); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5_TTC_GROUP2_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ttc->g[ttc->num_groups] = mlx5_create_flow_group(ttc->t, in); + if (IS_ERR(ttc->g[ttc->num_groups])) + goto err; + ttc->num_groups++; + + /* Any Group */ + memset(in, 0, inlen); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5_TTC_GROUP3_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ttc->g[ttc->num_groups] = mlx5_create_flow_group(ttc->t, in); + if (IS_ERR(ttc->g[ttc->num_groups])) + goto err; + ttc->num_groups++; + + kvfree(in); + return 0; + +err: + err = PTR_ERR(ttc->g[ttc->num_groups]); + ttc->g[ttc->num_groups] = NULL; + kvfree(in); + + return err; +} + +static struct mlx5_flow_handle * +mlx5_generate_inner_ttc_rule(struct mlx5_core_dev *dev, + struct mlx5_flow_table *ft, + struct mlx5_flow_destination *dest, + u16 etype, u8 proto) +{ + MLX5_DECLARE_FLOW_ACT(flow_act); + struct mlx5_flow_handle *rule; + struct mlx5_flow_spec *spec; + int err = 0; + u8 ipv; + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) + return ERR_PTR(-ENOMEM); + + ipv = mlx5_etype_to_ipv(etype); + if (etype && ipv) { + spec->match_criteria_enable = MLX5_MATCH_INNER_HEADERS; + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, inner_headers.ip_version); + MLX5_SET(fte_match_param, spec->match_value, inner_headers.ip_version, ipv); + } + + if (proto) { + spec->match_criteria_enable = MLX5_MATCH_INNER_HEADERS; + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, inner_headers.ip_protocol); + MLX5_SET(fte_match_param, spec->match_value, inner_headers.ip_protocol, proto); + } + + rule = mlx5_add_flow_rules(ft, spec, &flow_act, dest, 1); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + mlx5_core_err(dev, "%s: add inner TTC rule failed\n", __func__); + } + + kvfree(spec); + return err ? ERR_PTR(err) : rule; +} + +static int mlx5_generate_inner_ttc_table_rules(struct mlx5_core_dev *dev, + struct ttc_params *params, + struct mlx5_ttc_table *ttc) +{ + struct mlx5_ttc_rule *rules; + struct mlx5_flow_table *ft; + int err; + int tt; + + ft = ttc->t; + rules = ttc->rules; + + for (tt = 0; tt < MLX5_NUM_TT; tt++) { + struct mlx5_ttc_rule *rule = &rules[tt]; + + if (test_bit(tt, params->ignore_dests)) + continue; + rule->rule = mlx5_generate_inner_ttc_rule(dev, ft, + ¶ms->dests[tt], + ttc_rules[tt].etype, + ttc_rules[tt].proto); + if (IS_ERR(rule->rule)) { + err = PTR_ERR(rule->rule); + rule->rule = NULL; + goto del_rules; + } + rule->default_dest = params->dests[tt]; + } + + return 0; + +del_rules: + + mlx5_cleanup_ttc_rules(ttc); + return err; +} + +static int mlx5_create_inner_ttc_table_groups(struct mlx5_ttc_table *ttc) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + int ix = 0; + u32 *in; + int err; + u8 *mc; + + ttc->g = kcalloc(MLX5_INNER_TTC_NUM_GROUPS, sizeof(*ttc->g), + GFP_KERNEL); + if (!ttc->g) + return -ENOMEM; + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) { + kfree(ttc->g); + ttc->g = NULL; + return -ENOMEM; + } + + /* L4 Group */ + mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria); + MLX5_SET_TO_ONES(fte_match_param, mc, inner_headers.ip_protocol); + MLX5_SET_TO_ONES(fte_match_param, mc, inner_headers.ip_version); + MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_INNER_HEADERS); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5_INNER_TTC_GROUP1_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ttc->g[ttc->num_groups] = mlx5_create_flow_group(ttc->t, in); + if (IS_ERR(ttc->g[ttc->num_groups])) + goto err; + ttc->num_groups++; + + /* L3 Group */ + MLX5_SET(fte_match_param, mc, inner_headers.ip_protocol, 0); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5_INNER_TTC_GROUP2_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ttc->g[ttc->num_groups] = mlx5_create_flow_group(ttc->t, in); + if (IS_ERR(ttc->g[ttc->num_groups])) + goto err; + ttc->num_groups++; + + /* Any Group */ + memset(in, 0, inlen); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5_INNER_TTC_GROUP3_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ttc->g[ttc->num_groups] = mlx5_create_flow_group(ttc->t, in); + if (IS_ERR(ttc->g[ttc->num_groups])) + goto err; + ttc->num_groups++; + + kvfree(in); + return 0; + +err: + err = PTR_ERR(ttc->g[ttc->num_groups]); + ttc->g[ttc->num_groups] = NULL; + kvfree(in); + + return err; +} + +struct mlx5_ttc_table *mlx5_create_inner_ttc_table(struct mlx5_core_dev *dev, + struct ttc_params *params) +{ + struct mlx5_ttc_table *ttc; + int err; + + ttc = kvzalloc(sizeof(*ttc), GFP_KERNEL); + if (!ttc) + return ERR_PTR(-ENOMEM); + + WARN_ON_ONCE(params->ft_attr.max_fte); + params->ft_attr.max_fte = MLX5_INNER_TTC_TABLE_SIZE; + ttc->t = mlx5_create_flow_table(params->ns, ¶ms->ft_attr); + if (IS_ERR(ttc->t)) { + err = PTR_ERR(ttc->t); + kvfree(ttc); + return ERR_PTR(err); + } + + err = mlx5_create_inner_ttc_table_groups(ttc); + if (err) + goto destroy_ft; + + err = mlx5_generate_inner_ttc_table_rules(dev, params, ttc); + if (err) + goto destroy_ft; + + return ttc; + +destroy_ft: + mlx5_destroy_ttc_table(ttc); + return ERR_PTR(err); +} + +void mlx5_destroy_ttc_table(struct mlx5_ttc_table *ttc) +{ + int i; + + mlx5_cleanup_ttc_rules(ttc); + for (i = ttc->num_groups - 1; i >= 0; i--) { + if (!IS_ERR_OR_NULL(ttc->g[i])) + mlx5_destroy_flow_group(ttc->g[i]); + ttc->g[i] = NULL; + } + + kfree(ttc->g); + mlx5_destroy_flow_table(ttc->t); + kvfree(ttc); +} + +struct mlx5_ttc_table *mlx5_create_ttc_table(struct mlx5_core_dev *dev, + struct ttc_params *params) +{ + bool match_ipv_outer = + MLX5_CAP_FLOWTABLE_NIC_RX(dev, + ft_field_support.outer_ip_version); + struct mlx5_ttc_table *ttc; + int err; + + ttc = kvzalloc(sizeof(*ttc), GFP_KERNEL); + if (!ttc) + return ERR_PTR(-ENOMEM); + + WARN_ON_ONCE(params->ft_attr.max_fte); + params->ft_attr.max_fte = MLX5_TTC_TABLE_SIZE; + ttc->t = mlx5_create_flow_table(params->ns, ¶ms->ft_attr); + if (IS_ERR(ttc->t)) { + err = PTR_ERR(ttc->t); + kvfree(ttc); + return ERR_PTR(err); + } + + err = mlx5_create_ttc_table_groups(ttc, match_ipv_outer); + if (err) + goto destroy_ft; + + err = mlx5_generate_ttc_table_rules(dev, params, ttc); + if (err) + goto destroy_ft; + + return ttc; + +destroy_ft: + mlx5_destroy_ttc_table(ttc); + return ERR_PTR(err); +} + +int mlx5_ttc_fwd_dest(struct mlx5_ttc_table *ttc, enum mlx5_traffic_types type, + struct mlx5_flow_destination *new_dest) +{ + return mlx5_modify_rule_destination(ttc->rules[type].rule, new_dest, + NULL); +} + +struct mlx5_flow_destination +mlx5_ttc_get_default_dest(struct mlx5_ttc_table *ttc, + enum mlx5_traffic_types type) +{ + struct mlx5_flow_destination *dest = &ttc->rules[type].default_dest; + + WARN_ONCE(dest->type != MLX5_FLOW_DESTINATION_TYPE_TIR, + "TTC[%d] default dest is not setup yet", type); + + return *dest; +} + +int mlx5_ttc_fwd_default_dest(struct mlx5_ttc_table *ttc, + enum mlx5_traffic_types type) +{ + struct mlx5_flow_destination dest = mlx5_ttc_get_default_dest(ttc, type); + + return mlx5_ttc_fwd_dest(ttc, type, &dest); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.h new file mode 100644 index 000000000..85fef0cd1 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.h @@ -0,0 +1,72 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2020 Mellanox Technologies. */ + +#ifndef __ML5_FS_TTC_H__ +#define __ML5_FS_TTC_H__ + +#include <linux/mlx5/fs.h> + +enum mlx5_traffic_types { + MLX5_TT_IPV4_TCP, + MLX5_TT_IPV6_TCP, + MLX5_TT_IPV4_UDP, + MLX5_TT_IPV6_UDP, + MLX5_TT_IPV4_IPSEC_AH, + MLX5_TT_IPV6_IPSEC_AH, + MLX5_TT_IPV4_IPSEC_ESP, + MLX5_TT_IPV6_IPSEC_ESP, + MLX5_TT_IPV4, + MLX5_TT_IPV6, + MLX5_TT_ANY, + MLX5_NUM_TT, + MLX5_NUM_INDIR_TIRS = MLX5_TT_ANY, +}; + +enum mlx5_tunnel_types { + MLX5_TT_IPV4_GRE, + MLX5_TT_IPV6_GRE, + MLX5_TT_IPV4_IPIP, + MLX5_TT_IPV6_IPIP, + MLX5_TT_IPV4_IPV6, + MLX5_TT_IPV6_IPV6, + MLX5_NUM_TUNNEL_TT, +}; + +struct mlx5_ttc_rule { + struct mlx5_flow_handle *rule; + struct mlx5_flow_destination default_dest; +}; + +struct mlx5_ttc_table; + +struct ttc_params { + struct mlx5_flow_namespace *ns; + struct mlx5_flow_table_attr ft_attr; + struct mlx5_flow_destination dests[MLX5_NUM_TT]; + DECLARE_BITMAP(ignore_dests, MLX5_NUM_TT); + bool inner_ttc; + DECLARE_BITMAP(ignore_tunnel_dests, MLX5_NUM_TUNNEL_TT); + struct mlx5_flow_destination tunnel_dests[MLX5_NUM_TUNNEL_TT]; +}; + +struct mlx5_flow_table *mlx5_get_ttc_flow_table(struct mlx5_ttc_table *ttc); + +struct mlx5_ttc_table *mlx5_create_ttc_table(struct mlx5_core_dev *dev, + struct ttc_params *params); +void mlx5_destroy_ttc_table(struct mlx5_ttc_table *ttc); + +struct mlx5_ttc_table *mlx5_create_inner_ttc_table(struct mlx5_core_dev *dev, + struct ttc_params *params); + +int mlx5_ttc_fwd_dest(struct mlx5_ttc_table *ttc, enum mlx5_traffic_types type, + struct mlx5_flow_destination *new_dest); +struct mlx5_flow_destination +mlx5_ttc_get_default_dest(struct mlx5_ttc_table *ttc, + enum mlx5_traffic_types type); +int mlx5_ttc_fwd_default_dest(struct mlx5_ttc_table *ttc, + enum mlx5_traffic_types type); + +bool mlx5_tunnel_inner_ft_supported(struct mlx5_core_dev *mdev); +u8 mlx5_get_proto_by_tunnel_type(enum mlx5_tunnel_types tt); + +#endif /* __MLX5_FS_TTC_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/geneve.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/geneve.c new file mode 100644 index 000000000..6dc83e871 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/geneve.c @@ -0,0 +1,158 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2019 Mellanox Technologies. */ + +#include <linux/kernel.h> +#include "mlx5_core.h" +#include "geneve.h" + +struct mlx5_geneve { + struct mlx5_core_dev *mdev; + __be16 opt_class; + u8 opt_type; + u32 obj_id; + struct mutex sync_lock; /* protect GENEVE obj operations */ + u32 refcount; +}; + +static int mlx5_geneve_tlv_option_create(struct mlx5_core_dev *mdev, + __be16 class, + u8 type, + u8 len) +{ + u32 in[MLX5_ST_SZ_DW(create_geneve_tlv_option_in)] = {}; + u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {}; + u64 general_obj_types; + void *hdr, *opt; + u16 obj_id; + int err; + + general_obj_types = MLX5_CAP_GEN_64(mdev, general_obj_types); + if (!(general_obj_types & MLX5_GENERAL_OBJ_TYPES_CAP_GENEVE_TLV_OPT)) + return -EINVAL; + + hdr = MLX5_ADDR_OF(create_geneve_tlv_option_in, in, hdr); + opt = MLX5_ADDR_OF(create_geneve_tlv_option_in, in, geneve_tlv_opt); + + MLX5_SET(general_obj_in_cmd_hdr, hdr, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT); + MLX5_SET(general_obj_in_cmd_hdr, hdr, obj_type, MLX5_OBJ_TYPE_GENEVE_TLV_OPT); + + MLX5_SET(geneve_tlv_option, opt, option_class, be16_to_cpu(class)); + MLX5_SET(geneve_tlv_option, opt, option_type, type); + MLX5_SET(geneve_tlv_option, opt, option_data_length, len); + + err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); + if (err) + return err; + + obj_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id); + return obj_id; +} + +static void mlx5_geneve_tlv_option_destroy(struct mlx5_core_dev *mdev, u16 obj_id) +{ + u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {}; + u32 in[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)] = {}; + + MLX5_SET(general_obj_in_cmd_hdr, in, opcode, MLX5_CMD_OP_DESTROY_GENERAL_OBJECT); + MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, MLX5_OBJ_TYPE_GENEVE_TLV_OPT); + MLX5_SET(general_obj_in_cmd_hdr, in, obj_id, obj_id); + + mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); +} + +int mlx5_geneve_tlv_option_add(struct mlx5_geneve *geneve, struct geneve_opt *opt) +{ + int res = 0; + + if (IS_ERR_OR_NULL(geneve)) + return -EOPNOTSUPP; + + mutex_lock(&geneve->sync_lock); + + if (geneve->refcount) { + if (geneve->opt_class == opt->opt_class && + geneve->opt_type == opt->type) { + /* We already have TLV options obj allocated */ + geneve->refcount++; + } else { + /* TLV options obj allocated, but its params + * do not match the new request. + * We support only one such object. + */ + mlx5_core_warn(geneve->mdev, + "Won't create Geneve TLV opt object with class:type:len = 0x%x:0x%x:%d (another class:type already exists)\n", + be16_to_cpu(opt->opt_class), + opt->type, + opt->length); + res = -EOPNOTSUPP; + goto unlock; + } + } else { + /* We don't have any TLV options obj allocated */ + + res = mlx5_geneve_tlv_option_create(geneve->mdev, + opt->opt_class, + opt->type, + opt->length); + if (res < 0) { + mlx5_core_warn(geneve->mdev, + "Failed creating Geneve TLV opt object class:type:len = 0x%x:0x%x:%d (err=%d)\n", + be16_to_cpu(opt->opt_class), + opt->type, opt->length, res); + goto unlock; + } + geneve->opt_class = opt->opt_class; + geneve->opt_type = opt->type; + geneve->obj_id = res; + geneve->refcount++; + res = 0; + } + +unlock: + mutex_unlock(&geneve->sync_lock); + return res; +} + +void mlx5_geneve_tlv_option_del(struct mlx5_geneve *geneve) +{ + if (IS_ERR_OR_NULL(geneve)) + return; + + mutex_lock(&geneve->sync_lock); + if (--geneve->refcount == 0) { + /* We've just removed the last user of Geneve option. + * Now delete the object in FW. + */ + mlx5_geneve_tlv_option_destroy(geneve->mdev, geneve->obj_id); + + geneve->opt_class = 0; + geneve->opt_type = 0; + geneve->obj_id = 0; + } + mutex_unlock(&geneve->sync_lock); +} + +struct mlx5_geneve *mlx5_geneve_create(struct mlx5_core_dev *mdev) +{ + struct mlx5_geneve *geneve = + kzalloc(sizeof(*geneve), GFP_KERNEL); + + if (!geneve) + return ERR_PTR(-ENOMEM); + geneve->mdev = mdev; + mutex_init(&geneve->sync_lock); + + return geneve; +} + +void mlx5_geneve_destroy(struct mlx5_geneve *geneve) +{ + if (IS_ERR_OR_NULL(geneve)) + return; + + /* Lockless since we are unloading */ + if (geneve->refcount) + mlx5_geneve_tlv_option_destroy(geneve->mdev, geneve->obj_id); + + kfree(geneve); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/geneve.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/geneve.h new file mode 100644 index 000000000..adee0cbba --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/geneve.h @@ -0,0 +1,33 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2019 Mellanox Technologies. */ + +#ifndef __MLX5_GENEVE_H__ +#define __MLX5_GENEVE_H__ + +#include <net/geneve.h> +#include <linux/mlx5/driver.h> + +struct mlx5_geneve; + +#ifdef CONFIG_MLX5_ESWITCH + +struct mlx5_geneve *mlx5_geneve_create(struct mlx5_core_dev *mdev); +void mlx5_geneve_destroy(struct mlx5_geneve *geneve); + +int mlx5_geneve_tlv_option_add(struct mlx5_geneve *geneve, struct geneve_opt *opt); +void mlx5_geneve_tlv_option_del(struct mlx5_geneve *geneve); + +#else /* CONFIG_MLX5_ESWITCH */ + +static inline struct mlx5_geneve +*mlx5_geneve_create(struct mlx5_core_dev *mdev) { return NULL; } +static inline void +mlx5_geneve_destroy(struct mlx5_geneve *geneve) {} +static inline int +mlx5_geneve_tlv_option_add(struct mlx5_geneve *geneve, struct geneve_opt *opt) { return 0; } +static inline void +mlx5_geneve_tlv_option_del(struct mlx5_geneve *geneve) {} + +#endif /* CONFIG_MLX5_ESWITCH */ + +#endif /* __MLX5_GENEVE_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/gid.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/gid.c new file mode 100644 index 000000000..96ffc0a0e --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/gid.c @@ -0,0 +1,152 @@ +/* + * Copyright (c) 2017, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/mlx5/driver.h> +#include <linux/etherdevice.h> +#include <linux/idr.h> +#include "mlx5_core.h" +#include "lib/mlx5.h" + +void mlx5_init_reserved_gids(struct mlx5_core_dev *dev) +{ + unsigned int tblsz = MLX5_CAP_ROCE(dev, roce_address_table_size); + + ida_init(&dev->roce.reserved_gids.ida); + dev->roce.reserved_gids.start = tblsz; + dev->roce.reserved_gids.count = 0; +} + +void mlx5_cleanup_reserved_gids(struct mlx5_core_dev *dev) +{ + WARN_ON(!ida_is_empty(&dev->roce.reserved_gids.ida)); + dev->roce.reserved_gids.start = 0; + dev->roce.reserved_gids.count = 0; + ida_destroy(&dev->roce.reserved_gids.ida); +} + +int mlx5_core_reserve_gids(struct mlx5_core_dev *dev, unsigned int count) +{ + if (dev->roce.reserved_gids.start < count) { + mlx5_core_warn(dev, "GID table exhausted attempting to reserve %d more GIDs\n", + count); + return -ENOMEM; + } + if (dev->roce.reserved_gids.count + count > MLX5_MAX_RESERVED_GIDS) { + mlx5_core_warn(dev, "Unable to reserve %d more GIDs\n", count); + return -ENOMEM; + } + + dev->roce.reserved_gids.start -= count; + dev->roce.reserved_gids.count += count; + mlx5_core_dbg(dev, "Reserved %u GIDs starting at %u\n", + dev->roce.reserved_gids.count, + dev->roce.reserved_gids.start); + return 0; +} + +void mlx5_core_unreserve_gids(struct mlx5_core_dev *dev, unsigned int count) +{ + WARN(count > dev->roce.reserved_gids.count, "Unreserving %u GIDs when only %u reserved", + count, dev->roce.reserved_gids.count); + + dev->roce.reserved_gids.start += count; + dev->roce.reserved_gids.count -= count; + mlx5_core_dbg(dev, "%u GIDs starting at %u left reserved\n", + dev->roce.reserved_gids.count, + dev->roce.reserved_gids.start); +} + +int mlx5_core_reserved_gid_alloc(struct mlx5_core_dev *dev, int *gid_index) +{ + int end = dev->roce.reserved_gids.start + + dev->roce.reserved_gids.count - 1; + int index = 0; + + index = ida_alloc_range(&dev->roce.reserved_gids.ida, + dev->roce.reserved_gids.start, end, + GFP_KERNEL); + if (index < 0) + return index; + + mlx5_core_dbg(dev, "Allocating reserved GID %u\n", index); + *gid_index = index; + return 0; +} + +void mlx5_core_reserved_gid_free(struct mlx5_core_dev *dev, int gid_index) +{ + mlx5_core_dbg(dev, "Freeing reserved GID %u\n", gid_index); + ida_free(&dev->roce.reserved_gids.ida, gid_index); +} + +unsigned int mlx5_core_reserved_gids_count(struct mlx5_core_dev *dev) +{ + return dev->roce.reserved_gids.count; +} +EXPORT_SYMBOL_GPL(mlx5_core_reserved_gids_count); + +int mlx5_core_roce_gid_set(struct mlx5_core_dev *dev, unsigned int index, + u8 roce_version, u8 roce_l3_type, const u8 *gid, + const u8 *mac, bool vlan, u16 vlan_id, u8 port_num) +{ +#define MLX5_SET_RA(p, f, v) MLX5_SET(roce_addr_layout, p, f, v) + u32 in[MLX5_ST_SZ_DW(set_roce_address_in)] = {}; + void *in_addr = MLX5_ADDR_OF(set_roce_address_in, in, roce_address); + char *addr_l3_addr = MLX5_ADDR_OF(roce_addr_layout, in_addr, + source_l3_address); + void *addr_mac = MLX5_ADDR_OF(roce_addr_layout, in_addr, + source_mac_47_32); + int gidsz = MLX5_FLD_SZ_BYTES(roce_addr_layout, source_l3_address); + + if (MLX5_CAP_GEN(dev, port_type) != MLX5_CAP_PORT_TYPE_ETH) + return -EINVAL; + + if (gid) { + if (vlan) { + MLX5_SET_RA(in_addr, vlan_valid, 1); + MLX5_SET_RA(in_addr, vlan_id, vlan_id); + } + + ether_addr_copy(addr_mac, mac); + memcpy(addr_l3_addr, gid, gidsz); + } + MLX5_SET_RA(in_addr, roce_version, roce_version); + MLX5_SET_RA(in_addr, roce_l3_type, roce_l3_type); + + if (MLX5_CAP_GEN(dev, num_vhca_ports) > 0) + MLX5_SET(set_roce_address_in, in, vhca_port_num, port_num); + + MLX5_SET(set_roce_address_in, in, roce_address_index, index); + MLX5_SET(set_roce_address_in, in, opcode, MLX5_CMD_OP_SET_ROCE_ADDRESS); + return mlx5_cmd_exec_in(dev, set_roce_address, in); +} +EXPORT_SYMBOL(mlx5_core_roce_gid_set); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/hv.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv.c new file mode 100644 index 000000000..583dc7e2a --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv.c @@ -0,0 +1,64 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2018 Mellanox Technologies + +#include <linux/hyperv.h> +#include "mlx5_core.h" +#include "lib/hv.h" + +static int mlx5_hv_config_common(struct mlx5_core_dev *dev, void *buf, int len, + int offset, bool read) +{ + int rc = -EOPNOTSUPP; + int bytes_returned; + int block_id; + + if (offset % HV_CONFIG_BLOCK_SIZE_MAX || len != HV_CONFIG_BLOCK_SIZE_MAX) + return -EINVAL; + + block_id = offset / HV_CONFIG_BLOCK_SIZE_MAX; + + rc = read ? + hyperv_read_cfg_blk(dev->pdev, buf, + HV_CONFIG_BLOCK_SIZE_MAX, block_id, + &bytes_returned) : + hyperv_write_cfg_blk(dev->pdev, buf, + HV_CONFIG_BLOCK_SIZE_MAX, block_id); + + /* Make sure len bytes were read successfully */ + if (read && !rc && len != bytes_returned) + rc = -EIO; + + if (rc) { + mlx5_core_err(dev, "Failed to %s hv config, err = %d, len = %d, offset = %d\n", + read ? "read" : "write", rc, len, + offset); + return rc; + } + + return 0; +} + +int mlx5_hv_read_config(struct mlx5_core_dev *dev, void *buf, int len, + int offset) +{ + return mlx5_hv_config_common(dev, buf, len, offset, true); +} + +int mlx5_hv_write_config(struct mlx5_core_dev *dev, void *buf, int len, + int offset) +{ + return mlx5_hv_config_common(dev, buf, len, offset, false); +} + +int mlx5_hv_register_invalidate(struct mlx5_core_dev *dev, void *context, + void (*block_invalidate)(void *context, + u64 block_mask)) +{ + return hyperv_reg_block_invalidate(dev->pdev, context, + block_invalidate); +} + +void mlx5_hv_unregister_invalidate(struct mlx5_core_dev *dev) +{ + hyperv_reg_block_invalidate(dev->pdev, NULL, NULL); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/hv.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv.h new file mode 100644 index 000000000..f9a45573f --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2019 Mellanox Technologies. */ + +#ifndef __LIB_HV_H__ +#define __LIB_HV_H__ + +#if IS_ENABLED(CONFIG_PCI_HYPERV_INTERFACE) + +#include <linux/hyperv.h> +#include <linux/mlx5/driver.h> + +int mlx5_hv_read_config(struct mlx5_core_dev *dev, void *buf, int len, + int offset); +int mlx5_hv_write_config(struct mlx5_core_dev *dev, void *buf, int len, + int offset); +int mlx5_hv_register_invalidate(struct mlx5_core_dev *dev, void *context, + void (*block_invalidate)(void *context, + u64 block_mask)); +void mlx5_hv_unregister_invalidate(struct mlx5_core_dev *dev); +#endif + +#endif /* __LIB_HV_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.c new file mode 100644 index 000000000..4047629a8 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.c @@ -0,0 +1,371 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2018 Mellanox Technologies + +#include <linux/hyperv.h> +#include "mlx5_core.h" +#include "lib/hv.h" +#include "lib/hv_vhca.h" + +struct mlx5_hv_vhca { + struct mlx5_core_dev *dev; + struct workqueue_struct *work_queue; + struct mlx5_hv_vhca_agent *agents[MLX5_HV_VHCA_AGENT_MAX]; + struct mutex agents_lock; /* Protect agents array */ +}; + +struct mlx5_hv_vhca_work { + struct work_struct invalidate_work; + struct mlx5_hv_vhca *hv_vhca; + u64 block_mask; +}; + +struct mlx5_hv_vhca_data_block { + u16 sequence; + u16 offset; + u8 reserved[4]; + u64 data[15]; +}; + +struct mlx5_hv_vhca_agent { + enum mlx5_hv_vhca_agent_type type; + struct mlx5_hv_vhca *hv_vhca; + void *priv; + u16 seq; + void (*control)(struct mlx5_hv_vhca_agent *agent, + struct mlx5_hv_vhca_control_block *block); + void (*invalidate)(struct mlx5_hv_vhca_agent *agent, + u64 block_mask); + void (*cleanup)(struct mlx5_hv_vhca_agent *agent); +}; + +struct mlx5_hv_vhca *mlx5_hv_vhca_create(struct mlx5_core_dev *dev) +{ + struct mlx5_hv_vhca *hv_vhca = NULL; + + hv_vhca = kzalloc(sizeof(*hv_vhca), GFP_KERNEL); + if (!hv_vhca) + return ERR_PTR(-ENOMEM); + + hv_vhca->work_queue = create_singlethread_workqueue("mlx5_hv_vhca"); + if (!hv_vhca->work_queue) { + kfree(hv_vhca); + return ERR_PTR(-ENOMEM); + } + + hv_vhca->dev = dev; + mutex_init(&hv_vhca->agents_lock); + + return hv_vhca; +} + +void mlx5_hv_vhca_destroy(struct mlx5_hv_vhca *hv_vhca) +{ + if (IS_ERR_OR_NULL(hv_vhca)) + return; + + destroy_workqueue(hv_vhca->work_queue); + kfree(hv_vhca); +} + +static void mlx5_hv_vhca_invalidate_work(struct work_struct *work) +{ + struct mlx5_hv_vhca_work *hwork; + struct mlx5_hv_vhca *hv_vhca; + int i; + + hwork = container_of(work, struct mlx5_hv_vhca_work, invalidate_work); + hv_vhca = hwork->hv_vhca; + + mutex_lock(&hv_vhca->agents_lock); + for (i = 0; i < MLX5_HV_VHCA_AGENT_MAX; i++) { + struct mlx5_hv_vhca_agent *agent = hv_vhca->agents[i]; + + if (!agent || !agent->invalidate) + continue; + + if (!(BIT(agent->type) & hwork->block_mask)) + continue; + + agent->invalidate(agent, hwork->block_mask); + } + mutex_unlock(&hv_vhca->agents_lock); + + kfree(hwork); +} + +void mlx5_hv_vhca_invalidate(void *context, u64 block_mask) +{ + struct mlx5_hv_vhca *hv_vhca = (struct mlx5_hv_vhca *)context; + struct mlx5_hv_vhca_work *work; + + work = kzalloc(sizeof(*work), GFP_ATOMIC); + if (!work) + return; + + INIT_WORK(&work->invalidate_work, mlx5_hv_vhca_invalidate_work); + work->hv_vhca = hv_vhca; + work->block_mask = block_mask; + + queue_work(hv_vhca->work_queue, &work->invalidate_work); +} + +#define AGENT_MASK(type) (type ? BIT(type - 1) : 0 /* control */) + +static void mlx5_hv_vhca_agents_control(struct mlx5_hv_vhca *hv_vhca, + struct mlx5_hv_vhca_control_block *block) +{ + int i; + + for (i = 0; i < MLX5_HV_VHCA_AGENT_MAX; i++) { + struct mlx5_hv_vhca_agent *agent = hv_vhca->agents[i]; + + if (!agent || !agent->control) + continue; + + if (!(AGENT_MASK(agent->type) & block->control)) + continue; + + agent->control(agent, block); + } +} + +static void mlx5_hv_vhca_capabilities(struct mlx5_hv_vhca *hv_vhca, + u32 *capabilities) +{ + int i; + + for (i = 0; i < MLX5_HV_VHCA_AGENT_MAX; i++) { + struct mlx5_hv_vhca_agent *agent = hv_vhca->agents[i]; + + if (agent) + *capabilities |= AGENT_MASK(agent->type); + } +} + +static void +mlx5_hv_vhca_control_agent_invalidate(struct mlx5_hv_vhca_agent *agent, + u64 block_mask) +{ + struct mlx5_hv_vhca *hv_vhca = agent->hv_vhca; + struct mlx5_core_dev *dev = hv_vhca->dev; + struct mlx5_hv_vhca_control_block *block; + u32 capabilities = 0; + int err; + + block = kzalloc(sizeof(*block), GFP_KERNEL); + if (!block) + return; + + err = mlx5_hv_read_config(dev, block, sizeof(*block), 0); + if (err) + goto free_block; + + mlx5_hv_vhca_capabilities(hv_vhca, &capabilities); + + /* In case no capabilities, send empty block in return */ + if (!capabilities) { + memset(block, 0, sizeof(*block)); + goto write; + } + + if (block->capabilities != capabilities) + block->capabilities = capabilities; + + if (block->control & ~capabilities) + goto free_block; + + mlx5_hv_vhca_agents_control(hv_vhca, block); + block->command_ack = block->command; + +write: + mlx5_hv_write_config(dev, block, sizeof(*block), 0); + +free_block: + kfree(block); +} + +static struct mlx5_hv_vhca_agent * +mlx5_hv_vhca_control_agent_create(struct mlx5_hv_vhca *hv_vhca) +{ + return mlx5_hv_vhca_agent_create(hv_vhca, MLX5_HV_VHCA_AGENT_CONTROL, + NULL, + mlx5_hv_vhca_control_agent_invalidate, + NULL, NULL); +} + +static void mlx5_hv_vhca_control_agent_destroy(struct mlx5_hv_vhca_agent *agent) +{ + mlx5_hv_vhca_agent_destroy(agent); +} + +int mlx5_hv_vhca_init(struct mlx5_hv_vhca *hv_vhca) +{ + struct mlx5_hv_vhca_agent *agent; + int err; + + if (IS_ERR_OR_NULL(hv_vhca)) + return IS_ERR_OR_NULL(hv_vhca); + + err = mlx5_hv_register_invalidate(hv_vhca->dev, hv_vhca, + mlx5_hv_vhca_invalidate); + if (err) + return err; + + agent = mlx5_hv_vhca_control_agent_create(hv_vhca); + if (IS_ERR_OR_NULL(agent)) { + mlx5_hv_unregister_invalidate(hv_vhca->dev); + return IS_ERR_OR_NULL(agent); + } + + hv_vhca->agents[MLX5_HV_VHCA_AGENT_CONTROL] = agent; + + return 0; +} + +void mlx5_hv_vhca_cleanup(struct mlx5_hv_vhca *hv_vhca) +{ + struct mlx5_hv_vhca_agent *agent; + int i; + + if (IS_ERR_OR_NULL(hv_vhca)) + return; + + agent = hv_vhca->agents[MLX5_HV_VHCA_AGENT_CONTROL]; + if (agent) + mlx5_hv_vhca_control_agent_destroy(agent); + + mutex_lock(&hv_vhca->agents_lock); + for (i = 0; i < MLX5_HV_VHCA_AGENT_MAX; i++) + WARN_ON(hv_vhca->agents[i]); + + mutex_unlock(&hv_vhca->agents_lock); + + mlx5_hv_unregister_invalidate(hv_vhca->dev); +} + +static void mlx5_hv_vhca_agents_update(struct mlx5_hv_vhca *hv_vhca) +{ + mlx5_hv_vhca_invalidate(hv_vhca, BIT(MLX5_HV_VHCA_AGENT_CONTROL)); +} + +struct mlx5_hv_vhca_agent * +mlx5_hv_vhca_agent_create(struct mlx5_hv_vhca *hv_vhca, + enum mlx5_hv_vhca_agent_type type, + void (*control)(struct mlx5_hv_vhca_agent*, + struct mlx5_hv_vhca_control_block *block), + void (*invalidate)(struct mlx5_hv_vhca_agent*, + u64 block_mask), + void (*cleaup)(struct mlx5_hv_vhca_agent *agent), + void *priv) +{ + struct mlx5_hv_vhca_agent *agent; + + if (IS_ERR_OR_NULL(hv_vhca)) + return ERR_PTR(-ENOMEM); + + if (type >= MLX5_HV_VHCA_AGENT_MAX) + return ERR_PTR(-EINVAL); + + mutex_lock(&hv_vhca->agents_lock); + if (hv_vhca->agents[type]) { + mutex_unlock(&hv_vhca->agents_lock); + return ERR_PTR(-EINVAL); + } + mutex_unlock(&hv_vhca->agents_lock); + + agent = kzalloc(sizeof(*agent), GFP_KERNEL); + if (!agent) + return ERR_PTR(-ENOMEM); + + agent->type = type; + agent->hv_vhca = hv_vhca; + agent->priv = priv; + agent->control = control; + agent->invalidate = invalidate; + agent->cleanup = cleaup; + + mutex_lock(&hv_vhca->agents_lock); + hv_vhca->agents[type] = agent; + mutex_unlock(&hv_vhca->agents_lock); + + mlx5_hv_vhca_agents_update(hv_vhca); + + return agent; +} + +void mlx5_hv_vhca_agent_destroy(struct mlx5_hv_vhca_agent *agent) +{ + struct mlx5_hv_vhca *hv_vhca = agent->hv_vhca; + + mutex_lock(&hv_vhca->agents_lock); + + if (WARN_ON(agent != hv_vhca->agents[agent->type])) { + mutex_unlock(&hv_vhca->agents_lock); + return; + } + + hv_vhca->agents[agent->type] = NULL; + mutex_unlock(&hv_vhca->agents_lock); + + if (agent->cleanup) + agent->cleanup(agent); + + kfree(agent); + + mlx5_hv_vhca_agents_update(hv_vhca); +} + +static int mlx5_hv_vhca_data_block_prepare(struct mlx5_hv_vhca_agent *agent, + struct mlx5_hv_vhca_data_block *data_block, + void *src, int len, int *offset) +{ + int bytes = min_t(int, (int)sizeof(data_block->data), len); + + data_block->sequence = agent->seq; + data_block->offset = (*offset)++; + memcpy(data_block->data, src, bytes); + + return bytes; +} + +static void mlx5_hv_vhca_agent_seq_update(struct mlx5_hv_vhca_agent *agent) +{ + agent->seq++; +} + +int mlx5_hv_vhca_agent_write(struct mlx5_hv_vhca_agent *agent, + void *buf, int len) +{ + int offset = agent->type * HV_CONFIG_BLOCK_SIZE_MAX; + int block_offset = 0; + int total = 0; + int err; + + while (len) { + struct mlx5_hv_vhca_data_block data_block = {0}; + int bytes; + + bytes = mlx5_hv_vhca_data_block_prepare(agent, &data_block, + buf + total, + len, &block_offset); + if (!bytes) + return -ENOMEM; + + err = mlx5_hv_write_config(agent->hv_vhca->dev, &data_block, + sizeof(data_block), offset); + if (err) + return err; + + total += bytes; + len -= bytes; + } + + mlx5_hv_vhca_agent_seq_update(agent); + + return 0; +} + +void *mlx5_hv_vhca_agent_priv(struct mlx5_hv_vhca_agent *agent) +{ + return agent->priv; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h new file mode 100644 index 000000000..f240ffe51 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h @@ -0,0 +1,97 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2019 Mellanox Technologies. */ + +#ifndef __LIB_HV_VHCA_H__ +#define __LIB_HV_VHCA_H__ + +#include "en.h" +#include "lib/hv.h" + +struct mlx5_hv_vhca_agent; +struct mlx5_hv_vhca; +struct mlx5_hv_vhca_control_block; + +enum mlx5_hv_vhca_agent_type { + MLX5_HV_VHCA_AGENT_CONTROL = 0, + MLX5_HV_VHCA_AGENT_STATS = 1, + MLX5_HV_VHCA_AGENT_MAX = 32, +}; + +#if IS_ENABLED(CONFIG_PCI_HYPERV_INTERFACE) + +struct mlx5_hv_vhca_control_block { + u32 capabilities; + u32 control; + u16 command; + u16 command_ack; + u16 version; + u16 rings; + u32 reserved1[28]; +}; + +struct mlx5_hv_vhca *mlx5_hv_vhca_create(struct mlx5_core_dev *dev); +void mlx5_hv_vhca_destroy(struct mlx5_hv_vhca *hv_vhca); +int mlx5_hv_vhca_init(struct mlx5_hv_vhca *hv_vhca); +void mlx5_hv_vhca_cleanup(struct mlx5_hv_vhca *hv_vhca); +void mlx5_hv_vhca_invalidate(void *context, u64 block_mask); + +struct mlx5_hv_vhca_agent * +mlx5_hv_vhca_agent_create(struct mlx5_hv_vhca *hv_vhca, + enum mlx5_hv_vhca_agent_type type, + void (*control)(struct mlx5_hv_vhca_agent*, + struct mlx5_hv_vhca_control_block *block), + void (*invalidate)(struct mlx5_hv_vhca_agent*, + u64 block_mask), + void (*cleanup)(struct mlx5_hv_vhca_agent *agent), + void *context); + +void mlx5_hv_vhca_agent_destroy(struct mlx5_hv_vhca_agent *agent); +int mlx5_hv_vhca_agent_write(struct mlx5_hv_vhca_agent *agent, + void *buf, int len); +void *mlx5_hv_vhca_agent_priv(struct mlx5_hv_vhca_agent *agent); + +#else + +static inline struct mlx5_hv_vhca * +mlx5_hv_vhca_create(struct mlx5_core_dev *dev) +{ + return NULL; +} + +static inline void mlx5_hv_vhca_destroy(struct mlx5_hv_vhca *hv_vhca) +{ +} + +static inline int mlx5_hv_vhca_init(struct mlx5_hv_vhca *hv_vhca) +{ + return 0; +} + +static inline void mlx5_hv_vhca_cleanup(struct mlx5_hv_vhca *hv_vhca) +{ +} + +static inline void mlx5_hv_vhca_invalidate(void *context, + u64 block_mask) +{ +} + +static inline struct mlx5_hv_vhca_agent * +mlx5_hv_vhca_agent_create(struct mlx5_hv_vhca *hv_vhca, + enum mlx5_hv_vhca_agent_type type, + void (*control)(struct mlx5_hv_vhca_agent*, + struct mlx5_hv_vhca_control_block *block), + void (*invalidate)(struct mlx5_hv_vhca_agent*, + u64 block_mask), + void (*cleanup)(struct mlx5_hv_vhca_agent *agent), + void *context) +{ + return NULL; +} + +static inline void mlx5_hv_vhca_agent_destroy(struct mlx5_hv_vhca_agent *agent) +{ +} +#endif + +#endif /* __LIB_HV_VHCA_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h new file mode 100644 index 000000000..032adb21a --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h @@ -0,0 +1,108 @@ +/* + * Copyright (c) 2017, Mellanox Technologies, Ltd. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __LIB_MLX5_H__ +#define __LIB_MLX5_H__ + +#include "mlx5_core.h" + +void mlx5_init_reserved_gids(struct mlx5_core_dev *dev); +void mlx5_cleanup_reserved_gids(struct mlx5_core_dev *dev); +int mlx5_core_reserve_gids(struct mlx5_core_dev *dev, unsigned int count); +void mlx5_core_unreserve_gids(struct mlx5_core_dev *dev, unsigned int count); +int mlx5_core_reserved_gid_alloc(struct mlx5_core_dev *dev, int *gid_index); +void mlx5_core_reserved_gid_free(struct mlx5_core_dev *dev, int gid_index); +int mlx5_crdump_enable(struct mlx5_core_dev *dev); +void mlx5_crdump_disable(struct mlx5_core_dev *dev); +int mlx5_crdump_collect(struct mlx5_core_dev *dev, u32 *cr_data); + +/* TODO move to lib/events.h */ + +#define PORT_MODULE_EVENT_MODULE_STATUS_MASK 0xF +#define PORT_MODULE_EVENT_ERROR_TYPE_MASK 0xF + +enum port_module_event_status_type { + MLX5_MODULE_STATUS_PLUGGED = 0x1, + MLX5_MODULE_STATUS_UNPLUGGED = 0x2, + MLX5_MODULE_STATUS_ERROR = 0x3, + MLX5_MODULE_STATUS_DISABLED = 0x4, + MLX5_MODULE_STATUS_NUM, +}; + +enum port_module_event_error_type { + MLX5_MODULE_EVENT_ERROR_POWER_BUDGET_EXCEEDED = 0x0, + MLX5_MODULE_EVENT_ERROR_LONG_RANGE_FOR_NON_MLNX = 0x1, + MLX5_MODULE_EVENT_ERROR_BUS_STUCK = 0x2, + MLX5_MODULE_EVENT_ERROR_NO_EEPROM_RETRY_TIMEOUT = 0x3, + MLX5_MODULE_EVENT_ERROR_ENFORCE_PART_NUMBER_LIST = 0x4, + MLX5_MODULE_EVENT_ERROR_UNKNOWN_IDENTIFIER = 0x5, + MLX5_MODULE_EVENT_ERROR_HIGH_TEMPERATURE = 0x6, + MLX5_MODULE_EVENT_ERROR_BAD_CABLE = 0x7, + MLX5_MODULE_EVENT_ERROR_PCIE_POWER_SLOT_EXCEEDED = 0xc, + MLX5_MODULE_EVENT_ERROR_NUM, +}; + +struct mlx5_pme_stats { + u64 status_counters[MLX5_MODULE_STATUS_NUM]; + u64 error_counters[MLX5_MODULE_EVENT_ERROR_NUM]; +}; + +void mlx5_get_pme_stats(struct mlx5_core_dev *dev, struct mlx5_pme_stats *stats); +int mlx5_notifier_call_chain(struct mlx5_events *events, unsigned int event, void *data); + +/* Crypto */ +enum { + MLX5_ACCEL_OBJ_TLS_KEY = MLX5_GENERAL_OBJECT_TYPE_ENCRYPTION_KEY_TYPE_TLS, + MLX5_ACCEL_OBJ_IPSEC_KEY = MLX5_GENERAL_OBJECT_TYPE_ENCRYPTION_KEY_TYPE_IPSEC, + MLX5_ACCEL_OBJ_MACSEC_KEY = MLX5_GENERAL_OBJECT_TYPE_ENCRYPTION_KEY_TYPE_MACSEC, +}; + +int mlx5_create_encryption_key(struct mlx5_core_dev *mdev, + void *key, u32 sz_bytes, + u32 key_type, u32 *p_key_id); +void mlx5_destroy_encryption_key(struct mlx5_core_dev *mdev, u32 key_id); + +static inline struct net *mlx5_core_net(struct mlx5_core_dev *dev) +{ + return devlink_net(priv_to_devlink(dev)); +} + +static inline void mlx5_uplink_netdev_set(struct mlx5_core_dev *mdev, struct net_device *netdev) +{ + mdev->mlx5e_res.uplink_netdev = netdev; +} + +static inline struct net_device *mlx5_uplink_netdev_get(struct mlx5_core_dev *mdev) +{ + return mdev->mlx5e_res.uplink_netdev; +} +#endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c new file mode 100644 index 000000000..8ff16318e --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c @@ -0,0 +1,211 @@ +/* + * Copyright (c) 2017, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/etherdevice.h> +#include <linux/mlx5/driver.h> +#include <linux/mlx5/mlx5_ifc.h> +#include <linux/mlx5/mpfs.h> +#include <linux/mlx5/eswitch.h> +#include "mlx5_core.h" +#include "lib/mpfs.h" + +/* HW L2 Table (MPFS) management */ +static int set_l2table_entry_cmd(struct mlx5_core_dev *dev, u32 index, u8 *mac) +{ + u32 in[MLX5_ST_SZ_DW(set_l2_table_entry_in)] = {}; + u8 *in_mac_addr; + + MLX5_SET(set_l2_table_entry_in, in, opcode, MLX5_CMD_OP_SET_L2_TABLE_ENTRY); + MLX5_SET(set_l2_table_entry_in, in, table_index, index); + + in_mac_addr = MLX5_ADDR_OF(set_l2_table_entry_in, in, mac_address); + ether_addr_copy(&in_mac_addr[2], mac); + + return mlx5_cmd_exec_in(dev, set_l2_table_entry, in); +} + +static int del_l2table_entry_cmd(struct mlx5_core_dev *dev, u32 index) +{ + u32 in[MLX5_ST_SZ_DW(delete_l2_table_entry_in)] = {}; + + MLX5_SET(delete_l2_table_entry_in, in, opcode, MLX5_CMD_OP_DELETE_L2_TABLE_ENTRY); + MLX5_SET(delete_l2_table_entry_in, in, table_index, index); + return mlx5_cmd_exec_in(dev, delete_l2_table_entry, in); +} + +/* UC L2 table hash node */ +struct l2table_node { + struct l2addr_node node; + u32 index; /* index in HW l2 table */ + int ref_count; +}; + +struct mlx5_mpfs { + struct hlist_head hash[MLX5_L2_ADDR_HASH_SIZE]; + struct mutex lock; /* Synchronize l2 table access */ + u32 size; + unsigned long *bitmap; +}; + +static int alloc_l2table_index(struct mlx5_mpfs *l2table, u32 *ix) +{ + int err = 0; + + *ix = find_first_zero_bit(l2table->bitmap, l2table->size); + if (*ix >= l2table->size) + err = -ENOSPC; + else + __set_bit(*ix, l2table->bitmap); + + return err; +} + +static void free_l2table_index(struct mlx5_mpfs *l2table, u32 ix) +{ + __clear_bit(ix, l2table->bitmap); +} + +int mlx5_mpfs_init(struct mlx5_core_dev *dev) +{ + int l2table_size = 1 << MLX5_CAP_GEN(dev, log_max_l2_table); + struct mlx5_mpfs *mpfs; + + if (!MLX5_ESWITCH_MANAGER(dev)) + return 0; + + mpfs = kzalloc(sizeof(*mpfs), GFP_KERNEL); + if (!mpfs) + return -ENOMEM; + + mutex_init(&mpfs->lock); + mpfs->size = l2table_size; + mpfs->bitmap = bitmap_zalloc(l2table_size, GFP_KERNEL); + if (!mpfs->bitmap) { + kfree(mpfs); + return -ENOMEM; + } + + dev->priv.mpfs = mpfs; + return 0; +} + +void mlx5_mpfs_cleanup(struct mlx5_core_dev *dev) +{ + struct mlx5_mpfs *mpfs = dev->priv.mpfs; + + if (!mpfs) + return; + + WARN_ON(!hlist_empty(mpfs->hash)); + bitmap_free(mpfs->bitmap); + kfree(mpfs); +} + +int mlx5_mpfs_add_mac(struct mlx5_core_dev *dev, u8 *mac) +{ + struct mlx5_mpfs *mpfs = dev->priv.mpfs; + struct l2table_node *l2addr; + int err = 0; + u32 index; + + if (!mpfs) + return 0; + + mutex_lock(&mpfs->lock); + + l2addr = l2addr_hash_find(mpfs->hash, mac, struct l2table_node); + if (l2addr) { + l2addr->ref_count++; + goto out; + } + + err = alloc_l2table_index(mpfs, &index); + if (err) + goto out; + + l2addr = l2addr_hash_add(mpfs->hash, mac, struct l2table_node, GFP_KERNEL); + if (!l2addr) { + err = -ENOMEM; + goto hash_add_err; + } + + err = set_l2table_entry_cmd(dev, index, mac); + if (err) + goto set_table_entry_err; + + l2addr->index = index; + l2addr->ref_count = 1; + + mlx5_core_dbg(dev, "MPFS mac added %pM, index (%d)\n", mac, index); + goto out; + +set_table_entry_err: + l2addr_hash_del(l2addr); +hash_add_err: + free_l2table_index(mpfs, index); +out: + mutex_unlock(&mpfs->lock); + return err; +} +EXPORT_SYMBOL(mlx5_mpfs_add_mac); + +int mlx5_mpfs_del_mac(struct mlx5_core_dev *dev, u8 *mac) +{ + struct mlx5_mpfs *mpfs = dev->priv.mpfs; + struct l2table_node *l2addr; + int err = 0; + u32 index; + + if (!mpfs) + return 0; + + mutex_lock(&mpfs->lock); + + l2addr = l2addr_hash_find(mpfs->hash, mac, struct l2table_node); + if (!l2addr) { + err = -ENOENT; + goto unlock; + } + + if (--l2addr->ref_count > 0) + goto unlock; + + index = l2addr->index; + del_l2table_entry_cmd(dev, index); + l2addr_hash_del(l2addr); + free_l2table_index(mpfs, index); + mlx5_core_dbg(dev, "MPFS mac deleted %pM, index (%d)\n", mac, index); +unlock: + mutex_unlock(&mpfs->lock); + return err; +} +EXPORT_SYMBOL(mlx5_mpfs_del_mac); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.h new file mode 100644 index 000000000..4a293542a --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.h @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2017, Mellanox Technologies, Ltd. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __MLX5_MPFS_H__ +#define __MLX5_MPFS_H__ + +#include <linux/if_ether.h> +#include <linux/mlx5/device.h> + +/* L2 -mac address based- hash helpers */ +#define MLX5_L2_ADDR_HASH_SIZE (BIT(BITS_PER_BYTE)) +#define MLX5_L2_ADDR_HASH(addr) (addr[5]) + +struct l2addr_node { + struct hlist_node hlist; + u8 addr[ETH_ALEN]; +}; + +#define for_each_l2hash_node(hn, tmp, hash, i) \ + for (i = 0; i < MLX5_L2_ADDR_HASH_SIZE; i++) \ + hlist_for_each_entry_safe(hn, tmp, &(hash)[i], hlist) + +#define l2addr_hash_find(hash, mac, type) ({ \ + int ix = MLX5_L2_ADDR_HASH(mac); \ + bool found = false; \ + type *ptr = NULL; \ + \ + hlist_for_each_entry(ptr, &(hash)[ix], node.hlist) \ + if (ether_addr_equal(ptr->node.addr, mac)) {\ + found = true; \ + break; \ + } \ + if (!found) \ + ptr = NULL; \ + ptr; \ +}) + +#define l2addr_hash_add(hash, mac, type, gfp) ({ \ + int ix = MLX5_L2_ADDR_HASH(mac); \ + type *ptr = NULL; \ + \ + ptr = kzalloc(sizeof(type), gfp); \ + if (ptr) { \ + ether_addr_copy(ptr->node.addr, mac); \ + hlist_add_head(&ptr->node.hlist, &(hash)[ix]);\ + } \ + ptr; \ +}) + +#define l2addr_hash_del(ptr) ({ \ + hlist_del(&(ptr)->node.hlist); \ + kfree(ptr); \ +}) + +#ifdef CONFIG_MLX5_MPFS +int mlx5_mpfs_init(struct mlx5_core_dev *dev); +void mlx5_mpfs_cleanup(struct mlx5_core_dev *dev); +#else /* #ifndef CONFIG_MLX5_MPFS */ +static inline int mlx5_mpfs_init(struct mlx5_core_dev *dev) { return 0; } +static inline void mlx5_mpfs_cleanup(struct mlx5_core_dev *dev) {} +#endif + +#endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/pci_vsc.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/pci_vsc.c new file mode 100644 index 000000000..6b774e0c2 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/pci_vsc.c @@ -0,0 +1,316 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2019 Mellanox Technologies */ + +#include <linux/pci.h> +#include "mlx5_core.h" +#include "pci_vsc.h" + +#define MLX5_EXTRACT_C(source, offset, size) \ + ((((u32)(source)) >> (offset)) & MLX5_ONES32(size)) +#define MLX5_EXTRACT(src, start, len) \ + (((len) == 32) ? (src) : MLX5_EXTRACT_C(src, start, len)) +#define MLX5_ONES32(size) \ + ((size) ? (0xffffffff >> (32 - (size))) : 0) +#define MLX5_MASK32(offset, size) \ + (MLX5_ONES32(size) << (offset)) +#define MLX5_MERGE_C(rsrc1, rsrc2, start, len) \ + ((((rsrc2) << (start)) & (MLX5_MASK32((start), (len)))) | \ + ((rsrc1) & (~MLX5_MASK32((start), (len))))) +#define MLX5_MERGE(rsrc1, rsrc2, start, len) \ + (((len) == 32) ? (rsrc2) : MLX5_MERGE_C(rsrc1, rsrc2, start, len)) +#define vsc_read(dev, offset, val) \ + pci_read_config_dword((dev)->pdev, (dev)->vsc_addr + (offset), (val)) +#define vsc_write(dev, offset, val) \ + pci_write_config_dword((dev)->pdev, (dev)->vsc_addr + (offset), (val)) +#define VSC_MAX_RETRIES 2048 + +enum { + VSC_CTRL_OFFSET = 0x4, + VSC_COUNTER_OFFSET = 0x8, + VSC_SEMAPHORE_OFFSET = 0xc, + VSC_ADDR_OFFSET = 0x10, + VSC_DATA_OFFSET = 0x14, + + VSC_FLAG_BIT_OFFS = 31, + VSC_FLAG_BIT_LEN = 1, + + VSC_SYND_BIT_OFFS = 30, + VSC_SYND_BIT_LEN = 1, + + VSC_ADDR_BIT_OFFS = 0, + VSC_ADDR_BIT_LEN = 30, + + VSC_SPACE_BIT_OFFS = 0, + VSC_SPACE_BIT_LEN = 16, + + VSC_SIZE_VLD_BIT_OFFS = 28, + VSC_SIZE_VLD_BIT_LEN = 1, + + VSC_STATUS_BIT_OFFS = 29, + VSC_STATUS_BIT_LEN = 3, +}; + +void mlx5_pci_vsc_init(struct mlx5_core_dev *dev) +{ + if (!mlx5_core_is_pf(dev)) + return; + + dev->vsc_addr = pci_find_capability(dev->pdev, + PCI_CAP_ID_VNDR); + if (!dev->vsc_addr) + mlx5_core_warn(dev, "Failed to get valid vendor specific ID\n"); +} + +int mlx5_vsc_gw_lock(struct mlx5_core_dev *dev) +{ + u32 counter = 0; + int retries = 0; + u32 lock_val; + int ret; + + pci_cfg_access_lock(dev->pdev); + do { + if (retries > VSC_MAX_RETRIES) { + ret = -EBUSY; + goto pci_unlock; + } + + /* Check if semaphore is already locked */ + ret = vsc_read(dev, VSC_SEMAPHORE_OFFSET, &lock_val); + if (ret) + goto pci_unlock; + + if (lock_val) { + retries++; + usleep_range(1000, 2000); + continue; + } + + /* Read and write counter value, if written value is + * the same, semaphore was acquired successfully. + */ + ret = vsc_read(dev, VSC_COUNTER_OFFSET, &counter); + if (ret) + goto pci_unlock; + + ret = vsc_write(dev, VSC_SEMAPHORE_OFFSET, counter); + if (ret) + goto pci_unlock; + + ret = vsc_read(dev, VSC_SEMAPHORE_OFFSET, &lock_val); + if (ret) + goto pci_unlock; + + retries++; + } while (counter != lock_val); + + return 0; + +pci_unlock: + pci_cfg_access_unlock(dev->pdev); + return ret; +} + +int mlx5_vsc_gw_unlock(struct mlx5_core_dev *dev) +{ + int ret; + + ret = vsc_write(dev, VSC_SEMAPHORE_OFFSET, MLX5_VSC_UNLOCK); + pci_cfg_access_unlock(dev->pdev); + return ret; +} + +int mlx5_vsc_gw_set_space(struct mlx5_core_dev *dev, u16 space, + u32 *ret_space_size) +{ + int ret; + u32 val = 0; + + if (!mlx5_vsc_accessible(dev)) + return -EINVAL; + + if (ret_space_size) + *ret_space_size = 0; + + /* Get a unique val */ + ret = vsc_read(dev, VSC_CTRL_OFFSET, &val); + if (ret) + goto out; + + /* Try to modify the lock */ + val = MLX5_MERGE(val, space, VSC_SPACE_BIT_OFFS, VSC_SPACE_BIT_LEN); + ret = vsc_write(dev, VSC_CTRL_OFFSET, val); + if (ret) + goto out; + + /* Verify lock was modified */ + ret = vsc_read(dev, VSC_CTRL_OFFSET, &val); + if (ret) + goto out; + + if (MLX5_EXTRACT(val, VSC_STATUS_BIT_OFFS, VSC_STATUS_BIT_LEN) == 0) + return -EINVAL; + + /* Get space max address if indicated by size valid bit */ + if (ret_space_size && + MLX5_EXTRACT(val, VSC_SIZE_VLD_BIT_OFFS, VSC_SIZE_VLD_BIT_LEN)) { + ret = vsc_read(dev, VSC_ADDR_OFFSET, &val); + if (ret) { + mlx5_core_warn(dev, "Failed to get max space size\n"); + goto out; + } + *ret_space_size = MLX5_EXTRACT(val, VSC_ADDR_BIT_OFFS, + VSC_ADDR_BIT_LEN); + } + return 0; + +out: + return ret; +} + +static int mlx5_vsc_wait_on_flag(struct mlx5_core_dev *dev, u8 expected_val) +{ + int retries = 0; + u32 flag; + int ret; + + do { + if (retries > VSC_MAX_RETRIES) + return -EBUSY; + + ret = vsc_read(dev, VSC_ADDR_OFFSET, &flag); + if (ret) + return ret; + flag = MLX5_EXTRACT(flag, VSC_FLAG_BIT_OFFS, VSC_FLAG_BIT_LEN); + retries++; + + if ((retries & 0xf) == 0) + usleep_range(1000, 2000); + + } while (flag != expected_val); + + return 0; +} + +static int mlx5_vsc_gw_write(struct mlx5_core_dev *dev, unsigned int address, + u32 data) +{ + int ret; + + if (MLX5_EXTRACT(address, VSC_SYND_BIT_OFFS, + VSC_FLAG_BIT_LEN + VSC_SYND_BIT_LEN)) + return -EINVAL; + + /* Set flag to 0x1 */ + address = MLX5_MERGE(address, 1, VSC_FLAG_BIT_OFFS, 1); + ret = vsc_write(dev, VSC_DATA_OFFSET, data); + if (ret) + goto out; + + ret = vsc_write(dev, VSC_ADDR_OFFSET, address); + if (ret) + goto out; + + /* Wait for the flag to be cleared */ + ret = mlx5_vsc_wait_on_flag(dev, 0); + +out: + return ret; +} + +static int mlx5_vsc_gw_read(struct mlx5_core_dev *dev, unsigned int address, + u32 *data) +{ + int ret; + + if (MLX5_EXTRACT(address, VSC_SYND_BIT_OFFS, + VSC_FLAG_BIT_LEN + VSC_SYND_BIT_LEN)) + return -EINVAL; + + ret = vsc_write(dev, VSC_ADDR_OFFSET, address); + if (ret) + goto out; + + ret = mlx5_vsc_wait_on_flag(dev, 1); + if (ret) + goto out; + + ret = vsc_read(dev, VSC_DATA_OFFSET, data); +out: + return ret; +} + +static int mlx5_vsc_gw_read_fast(struct mlx5_core_dev *dev, + unsigned int read_addr, + unsigned int *next_read_addr, + u32 *data) +{ + int ret; + + ret = mlx5_vsc_gw_read(dev, read_addr, data); + if (ret) + goto out; + + ret = vsc_read(dev, VSC_ADDR_OFFSET, next_read_addr); + if (ret) + goto out; + + *next_read_addr = MLX5_EXTRACT(*next_read_addr, VSC_ADDR_BIT_OFFS, + VSC_ADDR_BIT_LEN); + + if (*next_read_addr <= read_addr) + ret = -EINVAL; +out: + return ret; +} + +int mlx5_vsc_gw_read_block_fast(struct mlx5_core_dev *dev, u32 *data, + int length) +{ + unsigned int next_read_addr = 0; + unsigned int read_addr = 0; + + while (read_addr < length) { + if (mlx5_vsc_gw_read_fast(dev, read_addr, &next_read_addr, + &data[(read_addr >> 2)])) + return read_addr; + + read_addr = next_read_addr; + } + return length; +} + +int mlx5_vsc_sem_set_space(struct mlx5_core_dev *dev, u16 space, + enum mlx5_vsc_state state) +{ + u32 data, id = 0; + int ret; + + ret = mlx5_vsc_gw_set_space(dev, MLX5_SEMAPHORE_SPACE_DOMAIN, NULL); + if (ret) { + mlx5_core_warn(dev, "Failed to set gw space %d\n", ret); + return ret; + } + + if (state == MLX5_VSC_LOCK) { + /* Get a unique ID based on the counter */ + ret = vsc_read(dev, VSC_COUNTER_OFFSET, &id); + if (ret) + return ret; + } + + /* Try to modify lock */ + ret = mlx5_vsc_gw_write(dev, space, id); + if (ret) + return ret; + + /* Verify lock was modified */ + ret = mlx5_vsc_gw_read(dev, space, &data); + if (ret) + return -EINVAL; + + if (data != id) + return -EBUSY; + + return 0; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/pci_vsc.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/pci_vsc.h new file mode 100644 index 000000000..64272a6d7 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/pci_vsc.h @@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2019 Mellanox Technologies */ + +#ifndef __MLX5_PCI_VSC_H__ +#define __MLX5_PCI_VSC_H__ + +enum mlx5_vsc_state { + MLX5_VSC_UNLOCK, + MLX5_VSC_LOCK, +}; + +enum { + MLX5_VSC_SPACE_SCAN_CRSPACE = 0x7, +}; + +void mlx5_pci_vsc_init(struct mlx5_core_dev *dev); +int mlx5_vsc_gw_lock(struct mlx5_core_dev *dev); +int mlx5_vsc_gw_unlock(struct mlx5_core_dev *dev); +int mlx5_vsc_gw_set_space(struct mlx5_core_dev *dev, u16 space, + u32 *ret_space_size); +int mlx5_vsc_gw_read_block_fast(struct mlx5_core_dev *dev, u32 *data, + int length); + +static inline bool mlx5_vsc_accessible(struct mlx5_core_dev *dev) +{ + return !!dev->vsc_addr; +} + +int mlx5_vsc_sem_set_space(struct mlx5_core_dev *dev, u16 space, + enum mlx5_vsc_state state); + +#endif /* __MLX5_PCI_VSC_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/port_tun.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/port_tun.c new file mode 100644 index 000000000..4571c56ec --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/port_tun.c @@ -0,0 +1,186 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2019 Mellanox Technologies. */ + +#include <linux/mlx5/driver.h> +#include <linux/mlx5/port.h> +#include "mlx5_core.h" +#include "lib/port_tun.h" + +struct mlx5_port_tun_entropy_flags { + bool force_supported, force_enabled; + bool calc_supported, calc_enabled; + bool gre_calc_supported, gre_calc_enabled; +}; + +static void mlx5_query_port_tun_entropy(struct mlx5_core_dev *mdev, + struct mlx5_port_tun_entropy_flags *entropy_flags) +{ + u32 out[MLX5_ST_SZ_DW(pcmr_reg)]; + /* Default values for FW which do not support MLX5_REG_PCMR */ + entropy_flags->force_supported = false; + entropy_flags->calc_supported = false; + entropy_flags->gre_calc_supported = false; + entropy_flags->force_enabled = false; + entropy_flags->calc_enabled = true; + entropy_flags->gre_calc_enabled = true; + + if (!MLX5_CAP_GEN(mdev, ports_check)) + return; + + if (mlx5_query_ports_check(mdev, out, sizeof(out))) + return; + + entropy_flags->force_supported = !!(MLX5_GET(pcmr_reg, out, entropy_force_cap)); + entropy_flags->calc_supported = !!(MLX5_GET(pcmr_reg, out, entropy_calc_cap)); + entropy_flags->gre_calc_supported = !!(MLX5_GET(pcmr_reg, out, entropy_gre_calc_cap)); + entropy_flags->force_enabled = !!(MLX5_GET(pcmr_reg, out, entropy_force)); + entropy_flags->calc_enabled = !!(MLX5_GET(pcmr_reg, out, entropy_calc)); + entropy_flags->gre_calc_enabled = !!(MLX5_GET(pcmr_reg, out, entropy_gre_calc)); +} + +static int mlx5_set_port_tun_entropy_calc(struct mlx5_core_dev *mdev, u8 enable, + u8 force) +{ + u32 in[MLX5_ST_SZ_DW(pcmr_reg)] = {0}; + int err; + + err = mlx5_query_ports_check(mdev, in, sizeof(in)); + if (err) + return err; + MLX5_SET(pcmr_reg, in, local_port, 1); + MLX5_SET(pcmr_reg, in, entropy_force, force); + MLX5_SET(pcmr_reg, in, entropy_calc, enable); + return mlx5_set_ports_check(mdev, in, sizeof(in)); +} + +static int mlx5_set_port_gre_tun_entropy_calc(struct mlx5_core_dev *mdev, + u8 enable, u8 force) +{ + u32 in[MLX5_ST_SZ_DW(pcmr_reg)] = {0}; + int err; + + err = mlx5_query_ports_check(mdev, in, sizeof(in)); + if (err) + return err; + MLX5_SET(pcmr_reg, in, local_port, 1); + MLX5_SET(pcmr_reg, in, entropy_force, force); + MLX5_SET(pcmr_reg, in, entropy_gre_calc, enable); + return mlx5_set_ports_check(mdev, in, sizeof(in)); +} + +void mlx5_init_port_tun_entropy(struct mlx5_tun_entropy *tun_entropy, + struct mlx5_core_dev *mdev) +{ + struct mlx5_port_tun_entropy_flags entropy_flags; + + tun_entropy->mdev = mdev; + mutex_init(&tun_entropy->lock); + mlx5_query_port_tun_entropy(mdev, &entropy_flags); + tun_entropy->num_enabling_entries = 0; + tun_entropy->num_disabling_entries = 0; + tun_entropy->enabled = entropy_flags.calc_supported ? + entropy_flags.calc_enabled : true; +} + +static int mlx5_set_entropy(struct mlx5_tun_entropy *tun_entropy, + int reformat_type, bool enable) +{ + struct mlx5_port_tun_entropy_flags entropy_flags; + int err; + + mlx5_query_port_tun_entropy(tun_entropy->mdev, &entropy_flags); + /* Tunnel entropy calculation may be controlled either on port basis + * for all tunneling protocols or specifically for GRE protocol. + * Prioritize GRE protocol control (if capable) over global port + * configuration. + */ + if (entropy_flags.gre_calc_supported && + reformat_type == MLX5_REFORMAT_TYPE_L2_TO_NVGRE) { + if (!entropy_flags.force_supported) + return 0; + err = mlx5_set_port_gre_tun_entropy_calc(tun_entropy->mdev, + enable, !enable); + if (err) + return err; + } else if (entropy_flags.calc_supported) { + /* Other applications may change the global FW entropy + * calculations settings. Check that the current entropy value + * is the negative of the updated value. + */ + if (entropy_flags.force_enabled && + enable == entropy_flags.calc_enabled) { + mlx5_core_warn(tun_entropy->mdev, + "Unexpected entropy calc setting - expected %d", + !entropy_flags.calc_enabled); + return -EOPNOTSUPP; + } + /* GRE requires disabling entropy calculation. if there are + * enabling entries (i.e VXLAN) we cannot turn it off for them, + * thus fail. + */ + if (tun_entropy->num_enabling_entries) + return -EOPNOTSUPP; + err = mlx5_set_port_tun_entropy_calc(tun_entropy->mdev, enable, + entropy_flags.force_supported); + if (err) + return err; + tun_entropy->enabled = enable; + /* if we turn on the entropy we don't need to force it anymore */ + if (entropy_flags.force_supported && enable) { + err = mlx5_set_port_tun_entropy_calc(tun_entropy->mdev, 1, 0); + if (err) + return err; + } + } + + return 0; +} + +/* the function manages the refcount for enabling/disabling tunnel types. + * the return value indicates if the inc is successful or not, depending on + * entropy capabilities and configuration. + */ +int mlx5_tun_entropy_refcount_inc(struct mlx5_tun_entropy *tun_entropy, + int reformat_type) +{ + int err = -EOPNOTSUPP; + + mutex_lock(&tun_entropy->lock); + if ((reformat_type == MLX5_REFORMAT_TYPE_L2_TO_VXLAN || + reformat_type == MLX5_REFORMAT_TYPE_L2_TO_L3_TUNNEL) && + tun_entropy->enabled) { + /* in case entropy calculation is enabled for all tunneling + * types, it is ok for VXLAN, so approve. + * otherwise keep the error default. + */ + tun_entropy->num_enabling_entries++; + err = 0; + } else if (reformat_type == MLX5_REFORMAT_TYPE_L2_TO_NVGRE) { + /* turn off the entropy only for the first GRE rule. + * for the next rules the entropy was already disabled + * successfully. + */ + if (tun_entropy->num_disabling_entries == 0) + err = mlx5_set_entropy(tun_entropy, reformat_type, 0); + else + err = 0; + if (!err) + tun_entropy->num_disabling_entries++; + } + mutex_unlock(&tun_entropy->lock); + + return err; +} + +void mlx5_tun_entropy_refcount_dec(struct mlx5_tun_entropy *tun_entropy, + int reformat_type) +{ + mutex_lock(&tun_entropy->lock); + if (reformat_type == MLX5_REFORMAT_TYPE_L2_TO_VXLAN) + tun_entropy->num_enabling_entries--; + else if (reformat_type == MLX5_REFORMAT_TYPE_L2_TO_NVGRE && + --tun_entropy->num_disabling_entries == 0) + mlx5_set_entropy(tun_entropy, reformat_type, 1); + mutex_unlock(&tun_entropy->lock); +} + diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/port_tun.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/port_tun.h new file mode 100644 index 000000000..54c42a887 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/port_tun.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2019 Mellanox Technologies. */ + +#ifndef __MLX5_PORT_TUN_H__ +#define __MLX5_PORT_TUN_H__ + +#include <linux/mlx5/driver.h> + +struct mlx5_tun_entropy { + struct mlx5_core_dev *mdev; + u32 num_enabling_entries; + u32 num_disabling_entries; + u8 enabled; + struct mutex lock; /* lock the entropy fields */ +}; + +void mlx5_init_port_tun_entropy(struct mlx5_tun_entropy *tun_entropy, + struct mlx5_core_dev *mdev); +int mlx5_tun_entropy_refcount_inc(struct mlx5_tun_entropy *tun_entropy, + int reformat_type); +void mlx5_tun_entropy_refcount_dec(struct mlx5_tun_entropy *tun_entropy, + int reformat_type); + +#endif /* __MLX5_PORT_TUN_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/sf.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/sf.h new file mode 100644 index 000000000..84e568386 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/sf.h @@ -0,0 +1,45 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2021 Mellanox Technologies Ltd */ + +#ifndef __LIB_MLX5_SF_H__ +#define __LIB_MLX5_SF_H__ + +#include <linux/mlx5/driver.h> + +static inline u16 mlx5_sf_start_function_id(const struct mlx5_core_dev *dev) +{ + return MLX5_CAP_GEN(dev, sf_base_id); +} + +#ifdef CONFIG_MLX5_SF + +static inline bool mlx5_sf_supported(const struct mlx5_core_dev *dev) +{ + return MLX5_CAP_GEN(dev, sf); +} + +static inline u16 mlx5_sf_max_functions(const struct mlx5_core_dev *dev) +{ + if (!mlx5_sf_supported(dev)) + return 0; + if (MLX5_CAP_GEN(dev, max_num_sf)) + return MLX5_CAP_GEN(dev, max_num_sf); + else + return 1 << MLX5_CAP_GEN(dev, log_max_sf); +} + +#else + +static inline bool mlx5_sf_supported(const struct mlx5_core_dev *dev) +{ + return false; +} + +static inline u16 mlx5_sf_max_functions(const struct mlx5_core_dev *dev) +{ + return 0; +} + +#endif + +#endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/smfs.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/smfs.c new file mode 100644 index 000000000..9b8c051cc --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/smfs.c @@ -0,0 +1,68 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. */ + +#include <linux/kernel.h> +#include <linux/mlx5/driver.h> + +#include "smfs.h" + +struct mlx5dr_matcher * +mlx5_smfs_matcher_create(struct mlx5dr_table *table, u32 priority, struct mlx5_flow_spec *spec) +{ + struct mlx5dr_match_parameters matcher_mask = {}; + + matcher_mask.match_buf = (u64 *)&spec->match_criteria; + matcher_mask.match_sz = DR_SZ_MATCH_PARAM; + + return mlx5dr_matcher_create(table, priority, spec->match_criteria_enable, &matcher_mask); +} + +void +mlx5_smfs_matcher_destroy(struct mlx5dr_matcher *matcher) +{ + mlx5dr_matcher_destroy(matcher); +} + +struct mlx5dr_table * +mlx5_smfs_table_get_from_fs_ft(struct mlx5_flow_table *ft) +{ + return mlx5dr_table_get_from_fs_ft(ft); +} + +struct mlx5dr_action * +mlx5_smfs_action_create_dest_table(struct mlx5dr_table *table) +{ + return mlx5dr_action_create_dest_table(table); +} + +struct mlx5dr_action * +mlx5_smfs_action_create_flow_counter(u32 counter_id) +{ + return mlx5dr_action_create_flow_counter(counter_id); +} + +void +mlx5_smfs_action_destroy(struct mlx5dr_action *action) +{ + mlx5dr_action_destroy(action); +} + +struct mlx5dr_rule * +mlx5_smfs_rule_create(struct mlx5dr_matcher *matcher, struct mlx5_flow_spec *spec, + size_t num_actions, struct mlx5dr_action *actions[], + u32 flow_source) +{ + struct mlx5dr_match_parameters value = {}; + + value.match_buf = (u64 *)spec->match_value; + value.match_sz = DR_SZ_MATCH_PARAM; + + return mlx5dr_rule_create(matcher, &value, num_actions, actions, flow_source); +} + +void +mlx5_smfs_rule_destroy(struct mlx5dr_rule *rule) +{ + mlx5dr_rule_destroy(rule); +} + diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/smfs.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/smfs.h new file mode 100644 index 000000000..452d0df33 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/smfs.h @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. */ + +#ifndef __MLX5_LIB_SMFS_H__ +#define __MLX5_LIB_SMFS_H__ + +#include "steering/mlx5dr.h" +#include "steering/dr_types.h" + +struct mlx5dr_matcher * +mlx5_smfs_matcher_create(struct mlx5dr_table *table, u32 priority, struct mlx5_flow_spec *spec); + +void +mlx5_smfs_matcher_destroy(struct mlx5dr_matcher *matcher); + +struct mlx5dr_table * +mlx5_smfs_table_get_from_fs_ft(struct mlx5_flow_table *ft); + +struct mlx5dr_action * +mlx5_smfs_action_create_dest_table(struct mlx5dr_table *table); + +struct mlx5dr_action * +mlx5_smfs_action_create_flow_counter(u32 counter_id); + +void +mlx5_smfs_action_destroy(struct mlx5dr_action *action); + +struct mlx5dr_rule * +mlx5_smfs_rule_create(struct mlx5dr_matcher *matcher, struct mlx5_flow_spec *spec, + size_t num_actions, struct mlx5dr_action *actions[], + u32 flow_source); + +void +mlx5_smfs_rule_destroy(struct mlx5dr_rule *rule); + +#endif /* __MLX5_LIB_SMFS_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.c new file mode 100644 index 000000000..696e45e2b --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.c @@ -0,0 +1,159 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#include <linux/mlx5/driver.h> +#include "lib/tout.h" + +struct mlx5_timeouts { + u64 to[MAX_TIMEOUT_TYPES]; +}; + +static const u32 tout_def_sw_val[MAX_TIMEOUT_TYPES] = { + [MLX5_TO_FW_PRE_INIT_TIMEOUT_MS] = 120000, + [MLX5_TO_FW_PRE_INIT_ON_RECOVERY_TIMEOUT_MS] = 7200000, + [MLX5_TO_FW_PRE_INIT_WARN_MESSAGE_INTERVAL_MS] = 20000, + [MLX5_TO_FW_PRE_INIT_WAIT_MS] = 2, + [MLX5_TO_FW_INIT_MS] = 2000, + [MLX5_TO_CMD_MS] = 60000, + [MLX5_TO_PCI_TOGGLE_MS] = 2000, + [MLX5_TO_HEALTH_POLL_INTERVAL_MS] = 2000, + [MLX5_TO_FULL_CRDUMP_MS] = 60000, + [MLX5_TO_FW_RESET_MS] = 60000, + [MLX5_TO_FLUSH_ON_ERROR_MS] = 2000, + [MLX5_TO_PCI_SYNC_UPDATE_MS] = 5000, + [MLX5_TO_TEARDOWN_MS] = 3000, + [MLX5_TO_FSM_REACTIVATE_MS] = 5000, + [MLX5_TO_RECLAIM_PAGES_MS] = 5000, + [MLX5_TO_RECLAIM_VFS_PAGES_MS] = 120000 +}; + +static void tout_set(struct mlx5_core_dev *dev, u64 val, enum mlx5_timeouts_types type) +{ + dev->timeouts->to[type] = val; +} + +int mlx5_tout_init(struct mlx5_core_dev *dev) +{ + int i; + + dev->timeouts = kmalloc(sizeof(*dev->timeouts), GFP_KERNEL); + if (!dev->timeouts) + return -ENOMEM; + + for (i = 0; i < MAX_TIMEOUT_TYPES; i++) + tout_set(dev, tout_def_sw_val[i], i); + + return 0; +} + +void mlx5_tout_cleanup(struct mlx5_core_dev *dev) +{ + kfree(dev->timeouts); +} + +/* Time register consists of two fields to_multiplier(time out multiplier) + * and to_value(time out value). to_value is the quantity of the time units and + * to_multiplier is the type and should be one off these four values. + * 0x0: millisecond + * 0x1: seconds + * 0x2: minutes + * 0x3: hours + * this function converts the time stored in the two register fields into + * millisecond. + */ +static u64 tout_convert_reg_field_to_ms(u32 to_mul, u32 to_val) +{ + u64 msec = to_val; + + to_mul &= 0x3; + /* convert hours/minutes/seconds to miliseconds */ + if (to_mul) + msec *= 1000 * int_pow(60, to_mul - 1); + + return msec; +} + +static u64 tout_convert_iseg_to_ms(u32 iseg_to) +{ + return tout_convert_reg_field_to_ms(iseg_to >> 29, iseg_to & 0xfffff); +} + +static bool tout_is_supported(struct mlx5_core_dev *dev) +{ + return !!ioread32be(&dev->iseg->cmd_q_init_to); +} + +void mlx5_tout_query_iseg(struct mlx5_core_dev *dev) +{ + u32 to; + + if (!tout_is_supported(dev)) + return; + + to = ioread32be(&dev->iseg->cmd_q_init_to); + tout_set(dev, tout_convert_iseg_to_ms(to), MLX5_TO_FW_INIT_MS); + + to = ioread32be(&dev->iseg->cmd_exec_to); + tout_set(dev, tout_convert_iseg_to_ms(to), MLX5_TO_CMD_MS); +} + +u64 _mlx5_tout_ms(struct mlx5_core_dev *dev, enum mlx5_timeouts_types type) +{ + return dev->timeouts->to[type]; +} + +#define MLX5_TIMEOUT_QUERY(fld, reg_out) \ + ({ \ + struct mlx5_ifc_default_timeout_bits *time_field; \ + u32 to_multi, to_value; \ + u64 to_val_ms; \ + \ + time_field = MLX5_ADDR_OF(dtor_reg, reg_out, fld); \ + to_multi = MLX5_GET(default_timeout, time_field, to_multiplier); \ + to_value = MLX5_GET(default_timeout, time_field, to_value); \ + to_val_ms = tout_convert_reg_field_to_ms(to_multi, to_value); \ + to_val_ms; \ + }) + +#define MLX5_TIMEOUT_FILL(fld, reg_out, dev, to_type, to_extra) \ + ({ \ + u64 fw_to = MLX5_TIMEOUT_QUERY(fld, reg_out); \ + tout_set(dev, fw_to + (to_extra), to_type); \ + fw_to; \ + }) + +static int tout_query_dtor(struct mlx5_core_dev *dev) +{ + u64 pcie_toggle_to_val, tear_down_to_val; + u32 out[MLX5_ST_SZ_DW(dtor_reg)] = {}; + u32 in[MLX5_ST_SZ_DW(dtor_reg)] = {}; + int err; + + err = mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out), MLX5_REG_DTOR, 0, 0); + if (err) + return err; + + pcie_toggle_to_val = MLX5_TIMEOUT_FILL(pcie_toggle_to, out, dev, MLX5_TO_PCI_TOGGLE_MS, 0); + MLX5_TIMEOUT_FILL(fw_reset_to, out, dev, MLX5_TO_FW_RESET_MS, pcie_toggle_to_val); + + tear_down_to_val = MLX5_TIMEOUT_FILL(tear_down_to, out, dev, MLX5_TO_TEARDOWN_MS, 0); + MLX5_TIMEOUT_FILL(pci_sync_update_to, out, dev, MLX5_TO_PCI_SYNC_UPDATE_MS, + tear_down_to_val); + + MLX5_TIMEOUT_FILL(health_poll_to, out, dev, MLX5_TO_HEALTH_POLL_INTERVAL_MS, 0); + MLX5_TIMEOUT_FILL(full_crdump_to, out, dev, MLX5_TO_FULL_CRDUMP_MS, 0); + MLX5_TIMEOUT_FILL(flush_on_err_to, out, dev, MLX5_TO_FLUSH_ON_ERROR_MS, 0); + MLX5_TIMEOUT_FILL(fsm_reactivate_to, out, dev, MLX5_TO_FSM_REACTIVATE_MS, 0); + MLX5_TIMEOUT_FILL(reclaim_pages_to, out, dev, MLX5_TO_RECLAIM_PAGES_MS, 0); + MLX5_TIMEOUT_FILL(reclaim_vfs_pages_to, out, dev, MLX5_TO_RECLAIM_VFS_PAGES_MS, 0); + + return 0; +} + +int mlx5_tout_query_dtor(struct mlx5_core_dev *dev) +{ + if (tout_is_supported(dev)) + return tout_query_dtor(dev); + + return 0; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.h new file mode 100644 index 000000000..bc9e9aeda --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.h @@ -0,0 +1,42 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#ifndef MLX5_TIMEOUTS_H +#define MLX5_TIMEOUTS_H + +enum mlx5_timeouts_types { + /* pre init timeouts (not read from FW) */ + MLX5_TO_FW_PRE_INIT_TIMEOUT_MS, + MLX5_TO_FW_PRE_INIT_ON_RECOVERY_TIMEOUT_MS, + MLX5_TO_FW_PRE_INIT_WARN_MESSAGE_INTERVAL_MS, + MLX5_TO_FW_PRE_INIT_WAIT_MS, + + /* init segment timeouts */ + MLX5_TO_FW_INIT_MS, + MLX5_TO_CMD_MS, + + /* DTOR timeouts */ + MLX5_TO_PCI_TOGGLE_MS, + MLX5_TO_HEALTH_POLL_INTERVAL_MS, + MLX5_TO_FULL_CRDUMP_MS, + MLX5_TO_FW_RESET_MS, + MLX5_TO_FLUSH_ON_ERROR_MS, + MLX5_TO_PCI_SYNC_UPDATE_MS, + MLX5_TO_TEARDOWN_MS, + MLX5_TO_FSM_REACTIVATE_MS, + MLX5_TO_RECLAIM_PAGES_MS, + MLX5_TO_RECLAIM_VFS_PAGES_MS, + + MAX_TIMEOUT_TYPES +}; + +struct mlx5_core_dev; +int mlx5_tout_init(struct mlx5_core_dev *dev); +void mlx5_tout_cleanup(struct mlx5_core_dev *dev); +void mlx5_tout_query_iseg(struct mlx5_core_dev *dev); +int mlx5_tout_query_dtor(struct mlx5_core_dev *dev); +u64 _mlx5_tout_ms(struct mlx5_core_dev *dev, enum mlx5_timeouts_types type); + +#define mlx5_tout_ms(dev, type) _mlx5_tout_ms(dev, MLX5_TO_##type##_MS) + +# endif /* MLX5_TIMEOUTS_H */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.c new file mode 100644 index 000000000..d55e15c1f --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.c @@ -0,0 +1,196 @@ +/* + * Copyright (c) 2016, Mellanox Technologies, Ltd. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/kernel.h> +#include <linux/refcount.h> +#include <linux/mlx5/driver.h> +#include <net/vxlan.h> +#include "mlx5_core.h" +#include "vxlan.h" + +struct mlx5_vxlan { + struct mlx5_core_dev *mdev; + /* max_num_ports is usually 4, 16 buckets is more than enough */ + DECLARE_HASHTABLE(htable, 4); + struct mutex sync_lock; /* sync add/del port HW operations */ +}; + +struct mlx5_vxlan_port { + struct hlist_node hlist; + u16 udp_port; +}; + +static int mlx5_vxlan_core_add_port_cmd(struct mlx5_core_dev *mdev, u16 port) +{ + u32 in[MLX5_ST_SZ_DW(add_vxlan_udp_dport_in)] = {}; + + MLX5_SET(add_vxlan_udp_dport_in, in, opcode, + MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT); + MLX5_SET(add_vxlan_udp_dport_in, in, vxlan_udp_port, port); + return mlx5_cmd_exec_in(mdev, add_vxlan_udp_dport, in); +} + +static int mlx5_vxlan_core_del_port_cmd(struct mlx5_core_dev *mdev, u16 port) +{ + u32 in[MLX5_ST_SZ_DW(delete_vxlan_udp_dport_in)] = {}; + + MLX5_SET(delete_vxlan_udp_dport_in, in, opcode, + MLX5_CMD_OP_DELETE_VXLAN_UDP_DPORT); + MLX5_SET(delete_vxlan_udp_dport_in, in, vxlan_udp_port, port); + return mlx5_cmd_exec_in(mdev, delete_vxlan_udp_dport, in); +} + +bool mlx5_vxlan_lookup_port(struct mlx5_vxlan *vxlan, u16 port) +{ + struct mlx5_vxlan_port *vxlanp; + bool found = false; + + if (!mlx5_vxlan_allowed(vxlan)) + return NULL; + + rcu_read_lock(); + hash_for_each_possible_rcu(vxlan->htable, vxlanp, hlist, port) + if (vxlanp->udp_port == port) { + found = true; + break; + } + rcu_read_unlock(); + + return found; +} + +static struct mlx5_vxlan_port *vxlan_lookup_port(struct mlx5_vxlan *vxlan, u16 port) +{ + struct mlx5_vxlan_port *vxlanp; + + hash_for_each_possible(vxlan->htable, vxlanp, hlist, port) + if (vxlanp->udp_port == port) + return vxlanp; + return NULL; +} + +int mlx5_vxlan_add_port(struct mlx5_vxlan *vxlan, u16 port) +{ + struct mlx5_vxlan_port *vxlanp; + int ret; + + vxlanp = kzalloc(sizeof(*vxlanp), GFP_KERNEL); + if (!vxlanp) + return -ENOMEM; + vxlanp->udp_port = port; + + ret = mlx5_vxlan_core_add_port_cmd(vxlan->mdev, port); + if (ret) { + kfree(vxlanp); + return ret; + } + + mutex_lock(&vxlan->sync_lock); + hash_add_rcu(vxlan->htable, &vxlanp->hlist, port); + mutex_unlock(&vxlan->sync_lock); + + return 0; +} + +int mlx5_vxlan_del_port(struct mlx5_vxlan *vxlan, u16 port) +{ + struct mlx5_vxlan_port *vxlanp; + int ret = 0; + + mutex_lock(&vxlan->sync_lock); + + vxlanp = vxlan_lookup_port(vxlan, port); + if (WARN_ON(!vxlanp)) { + ret = -ENOENT; + goto out_unlock; + } + + hash_del_rcu(&vxlanp->hlist); + synchronize_rcu(); + mlx5_vxlan_core_del_port_cmd(vxlan->mdev, port); + kfree(vxlanp); + +out_unlock: + mutex_unlock(&vxlan->sync_lock); + return ret; +} + +struct mlx5_vxlan *mlx5_vxlan_create(struct mlx5_core_dev *mdev) +{ + struct mlx5_vxlan *vxlan; + + if (!MLX5_CAP_ETH(mdev, tunnel_stateless_vxlan) || !mlx5_core_is_pf(mdev)) + return ERR_PTR(-ENOTSUPP); + + vxlan = kzalloc(sizeof(*vxlan), GFP_KERNEL); + if (!vxlan) + return ERR_PTR(-ENOMEM); + + vxlan->mdev = mdev; + mutex_init(&vxlan->sync_lock); + hash_init(vxlan->htable); + + /* Hardware adds 4789 (IANA_VXLAN_UDP_PORT) by default */ + mlx5_vxlan_add_port(vxlan, IANA_VXLAN_UDP_PORT); + + return vxlan; +} + +void mlx5_vxlan_destroy(struct mlx5_vxlan *vxlan) +{ + if (!mlx5_vxlan_allowed(vxlan)) + return; + + mlx5_vxlan_del_port(vxlan, IANA_VXLAN_UDP_PORT); + WARN_ON(!hash_empty(vxlan->htable)); + + kfree(vxlan); +} + +void mlx5_vxlan_reset_to_default(struct mlx5_vxlan *vxlan) +{ + struct mlx5_vxlan_port *vxlanp; + struct hlist_node *tmp; + int bkt; + + if (!mlx5_vxlan_allowed(vxlan)) + return; + + hash_for_each_safe(vxlan->htable, bkt, tmp, vxlanp, hlist) { + /* Don't delete default UDP port added by the HW. + * Remove only user configured ports + */ + if (vxlanp->udp_port == IANA_VXLAN_UDP_PORT) + continue; + mlx5_vxlan_del_port(vxlan, vxlanp->udp_port); + } +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.h new file mode 100644 index 000000000..34ef662da --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.h @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2016, Mellanox Technologies, Ltd. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __MLX5_VXLAN_H__ +#define __MLX5_VXLAN_H__ + +#include <linux/mlx5/driver.h> + +struct mlx5_vxlan; +struct mlx5_vxlan_port; + +static inline u8 mlx5_vxlan_max_udp_ports(struct mlx5_core_dev *mdev) +{ + return MLX5_CAP_ETH(mdev, max_vxlan_udp_ports) ?: 4; +} + +static inline bool mlx5_vxlan_allowed(struct mlx5_vxlan *vxlan) +{ + /* not allowed reason is encoded in vxlan pointer as error, + * on mlx5_vxlan_create + */ + return !IS_ERR_OR_NULL(vxlan); +} + +#if IS_ENABLED(CONFIG_VXLAN) +struct mlx5_vxlan *mlx5_vxlan_create(struct mlx5_core_dev *mdev); +void mlx5_vxlan_destroy(struct mlx5_vxlan *vxlan); +int mlx5_vxlan_add_port(struct mlx5_vxlan *vxlan, u16 port); +int mlx5_vxlan_del_port(struct mlx5_vxlan *vxlan, u16 port); +bool mlx5_vxlan_lookup_port(struct mlx5_vxlan *vxlan, u16 port); +void mlx5_vxlan_reset_to_default(struct mlx5_vxlan *vxlan); +#else +static inline struct mlx5_vxlan* +mlx5_vxlan_create(struct mlx5_core_dev *mdev) { return ERR_PTR(-EOPNOTSUPP); } +static inline void mlx5_vxlan_destroy(struct mlx5_vxlan *vxlan) { return; } +static inline int mlx5_vxlan_add_port(struct mlx5_vxlan *vxlan, u16 port) { return -EOPNOTSUPP; } +static inline int mlx5_vxlan_del_port(struct mlx5_vxlan *vxlan, u16 port) { return -EOPNOTSUPP; } +static inline bool mlx5_vxlan_lookup_port(struct mlx5_vxlan *vxlan, u16 port) { return false; } +static inline void mlx5_vxlan_reset_to_default(struct mlx5_vxlan *vxlan) { return; } +#endif + +#endif /* __MLX5_VXLAN_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c new file mode 100644 index 000000000..6ab0642e9 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -0,0 +1,2130 @@ +/* + * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/highmem.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/errno.h> +#include <linux/pci.h> +#include <linux/dma-mapping.h> +#include <linux/slab.h> +#include <linux/io-mapping.h> +#include <linux/interrupt.h> +#include <linux/delay.h> +#include <linux/mlx5/driver.h> +#include <linux/mlx5/cq.h> +#include <linux/mlx5/qp.h> +#include <linux/debugfs.h> +#include <linux/kmod.h> +#include <linux/mlx5/mlx5_ifc.h> +#include <linux/mlx5/vport.h> +#ifdef CONFIG_RFS_ACCEL +#include <linux/cpu_rmap.h> +#endif +#include <linux/version.h> +#include <net/devlink.h> +#include "mlx5_core.h" +#include "lib/eq.h" +#include "fs_core.h" +#include "lib/mpfs.h" +#include "eswitch.h" +#include "devlink.h" +#include "fw_reset.h" +#include "lib/mlx5.h" +#include "lib/tout.h" +#include "fpga/core.h" +#include "en_accel/ipsec.h" +#include "lib/clock.h" +#include "lib/vxlan.h" +#include "lib/geneve.h" +#include "lib/devcom.h" +#include "lib/pci_vsc.h" +#include "diag/fw_tracer.h" +#include "ecpf.h" +#include "lib/hv_vhca.h" +#include "diag/rsc_dump.h" +#include "sf/vhca_event.h" +#include "sf/dev/dev.h" +#include "sf/sf.h" +#include "mlx5_irq.h" + +MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>"); +MODULE_DESCRIPTION("Mellanox 5th generation network adapters (ConnectX series) core driver"); +MODULE_LICENSE("Dual BSD/GPL"); + +unsigned int mlx5_core_debug_mask; +module_param_named(debug_mask, mlx5_core_debug_mask, uint, 0644); +MODULE_PARM_DESC(debug_mask, "debug mask: 1 = dump cmd data, 2 = dump cmd exec time, 3 = both. Default=0"); + +static unsigned int prof_sel = MLX5_DEFAULT_PROF; +module_param_named(prof_sel, prof_sel, uint, 0444); +MODULE_PARM_DESC(prof_sel, "profile selector. Valid range 0 - 2"); + +static u32 sw_owner_id[4]; +#define MAX_SW_VHCA_ID (BIT(__mlx5_bit_sz(cmd_hca_cap_2, sw_vhca_id)) - 1) +static DEFINE_IDA(sw_vhca_ida); + +enum { + MLX5_ATOMIC_REQ_MODE_BE = 0x0, + MLX5_ATOMIC_REQ_MODE_HOST_ENDIANNESS = 0x1, +}; + +#define LOG_MAX_SUPPORTED_QPS 0xff + +static struct mlx5_profile profile[] = { + [0] = { + .mask = 0, + }, + [1] = { + .mask = MLX5_PROF_MASK_QP_SIZE, + .log_max_qp = 12, + }, + [2] = { + .mask = MLX5_PROF_MASK_QP_SIZE | + MLX5_PROF_MASK_MR_CACHE, + .log_max_qp = LOG_MAX_SUPPORTED_QPS, + .mr_cache[0] = { + .size = 500, + .limit = 250 + }, + .mr_cache[1] = { + .size = 500, + .limit = 250 + }, + .mr_cache[2] = { + .size = 500, + .limit = 250 + }, + .mr_cache[3] = { + .size = 500, + .limit = 250 + }, + .mr_cache[4] = { + .size = 500, + .limit = 250 + }, + .mr_cache[5] = { + .size = 500, + .limit = 250 + }, + .mr_cache[6] = { + .size = 500, + .limit = 250 + }, + .mr_cache[7] = { + .size = 500, + .limit = 250 + }, + .mr_cache[8] = { + .size = 500, + .limit = 250 + }, + .mr_cache[9] = { + .size = 500, + .limit = 250 + }, + .mr_cache[10] = { + .size = 500, + .limit = 250 + }, + .mr_cache[11] = { + .size = 500, + .limit = 250 + }, + .mr_cache[12] = { + .size = 64, + .limit = 32 + }, + .mr_cache[13] = { + .size = 32, + .limit = 16 + }, + .mr_cache[14] = { + .size = 16, + .limit = 8 + }, + .mr_cache[15] = { + .size = 8, + .limit = 4 + }, + }, +}; + +static int wait_fw_init(struct mlx5_core_dev *dev, u32 max_wait_mili, + u32 warn_time_mili) +{ + unsigned long warn = jiffies + msecs_to_jiffies(warn_time_mili); + unsigned long end = jiffies + msecs_to_jiffies(max_wait_mili); + u32 fw_initializing; + int err = 0; + + do { + fw_initializing = ioread32be(&dev->iseg->initializing); + if (!(fw_initializing >> 31)) + break; + if (time_after(jiffies, end) || + test_and_clear_bit(MLX5_BREAK_FW_WAIT, &dev->intf_state)) { + err = -EBUSY; + break; + } + if (warn_time_mili && time_after(jiffies, warn)) { + mlx5_core_warn(dev, "Waiting for FW initialization, timeout abort in %ds (0x%x)\n", + jiffies_to_msecs(end - warn) / 1000, fw_initializing); + warn = jiffies + msecs_to_jiffies(warn_time_mili); + } + msleep(mlx5_tout_ms(dev, FW_PRE_INIT_WAIT)); + } while (true); + + return err; +} + +static void mlx5_set_driver_version(struct mlx5_core_dev *dev) +{ + int driver_ver_sz = MLX5_FLD_SZ_BYTES(set_driver_version_in, + driver_version); + u8 in[MLX5_ST_SZ_BYTES(set_driver_version_in)] = {}; + int remaining_size = driver_ver_sz; + char *string; + + if (!MLX5_CAP_GEN(dev, driver_version)) + return; + + string = MLX5_ADDR_OF(set_driver_version_in, in, driver_version); + + strncpy(string, "Linux", remaining_size); + + remaining_size = max_t(int, 0, driver_ver_sz - strlen(string)); + strncat(string, ",", remaining_size); + + remaining_size = max_t(int, 0, driver_ver_sz - strlen(string)); + strncat(string, KBUILD_MODNAME, remaining_size); + + remaining_size = max_t(int, 0, driver_ver_sz - strlen(string)); + strncat(string, ",", remaining_size); + + remaining_size = max_t(int, 0, driver_ver_sz - strlen(string)); + + snprintf(string + strlen(string), remaining_size, "%u.%u.%u", + LINUX_VERSION_MAJOR, LINUX_VERSION_PATCHLEVEL, + LINUX_VERSION_SUBLEVEL); + + /*Send the command*/ + MLX5_SET(set_driver_version_in, in, opcode, + MLX5_CMD_OP_SET_DRIVER_VERSION); + + mlx5_cmd_exec_in(dev, set_driver_version, in); +} + +static int set_dma_caps(struct pci_dev *pdev) +{ + int err; + + err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); + if (err) { + dev_warn(&pdev->dev, "Warning: couldn't set 64-bit PCI DMA mask\n"); + err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)); + if (err) { + dev_err(&pdev->dev, "Can't set PCI DMA mask, aborting\n"); + return err; + } + } + + dma_set_max_seg_size(&pdev->dev, 2u * 1024 * 1024 * 1024); + return err; +} + +static int mlx5_pci_enable_device(struct mlx5_core_dev *dev) +{ + struct pci_dev *pdev = dev->pdev; + int err = 0; + + mutex_lock(&dev->pci_status_mutex); + if (dev->pci_status == MLX5_PCI_STATUS_DISABLED) { + err = pci_enable_device(pdev); + if (!err) + dev->pci_status = MLX5_PCI_STATUS_ENABLED; + } + mutex_unlock(&dev->pci_status_mutex); + + return err; +} + +static void mlx5_pci_disable_device(struct mlx5_core_dev *dev) +{ + struct pci_dev *pdev = dev->pdev; + + mutex_lock(&dev->pci_status_mutex); + if (dev->pci_status == MLX5_PCI_STATUS_ENABLED) { + pci_disable_device(pdev); + dev->pci_status = MLX5_PCI_STATUS_DISABLED; + } + mutex_unlock(&dev->pci_status_mutex); +} + +static int request_bar(struct pci_dev *pdev) +{ + int err = 0; + + if (!(pci_resource_flags(pdev, 0) & IORESOURCE_MEM)) { + dev_err(&pdev->dev, "Missing registers BAR, aborting\n"); + return -ENODEV; + } + + err = pci_request_regions(pdev, KBUILD_MODNAME); + if (err) + dev_err(&pdev->dev, "Couldn't get PCI resources, aborting\n"); + + return err; +} + +static void release_bar(struct pci_dev *pdev) +{ + pci_release_regions(pdev); +} + +struct mlx5_reg_host_endianness { + u8 he; + u8 rsvd[15]; +}; + +static u16 to_fw_pkey_sz(struct mlx5_core_dev *dev, u32 size) +{ + switch (size) { + case 128: + return 0; + case 256: + return 1; + case 512: + return 2; + case 1024: + return 3; + case 2048: + return 4; + case 4096: + return 5; + default: + mlx5_core_warn(dev, "invalid pkey table size %d\n", size); + return 0; + } +} + +static int mlx5_core_get_caps_mode(struct mlx5_core_dev *dev, + enum mlx5_cap_type cap_type, + enum mlx5_cap_mode cap_mode) +{ + u8 in[MLX5_ST_SZ_BYTES(query_hca_cap_in)]; + int out_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out); + void *out, *hca_caps; + u16 opmod = (cap_type << 1) | (cap_mode & 0x01); + int err; + + memset(in, 0, sizeof(in)); + out = kzalloc(out_sz, GFP_KERNEL); + if (!out) + return -ENOMEM; + + MLX5_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP); + MLX5_SET(query_hca_cap_in, in, op_mod, opmod); + err = mlx5_cmd_exec_inout(dev, query_hca_cap, in, out); + if (err) { + mlx5_core_warn(dev, + "QUERY_HCA_CAP : type(%x) opmode(%x) Failed(%d)\n", + cap_type, cap_mode, err); + goto query_ex; + } + + hca_caps = MLX5_ADDR_OF(query_hca_cap_out, out, capability); + + switch (cap_mode) { + case HCA_CAP_OPMOD_GET_MAX: + memcpy(dev->caps.hca[cap_type]->max, hca_caps, + MLX5_UN_SZ_BYTES(hca_cap_union)); + break; + case HCA_CAP_OPMOD_GET_CUR: + memcpy(dev->caps.hca[cap_type]->cur, hca_caps, + MLX5_UN_SZ_BYTES(hca_cap_union)); + break; + default: + mlx5_core_warn(dev, + "Tried to query dev cap type(%x) with wrong opmode(%x)\n", + cap_type, cap_mode); + err = -EINVAL; + break; + } +query_ex: + kfree(out); + return err; +} + +int mlx5_core_get_caps(struct mlx5_core_dev *dev, enum mlx5_cap_type cap_type) +{ + int ret; + + ret = mlx5_core_get_caps_mode(dev, cap_type, HCA_CAP_OPMOD_GET_CUR); + if (ret) + return ret; + return mlx5_core_get_caps_mode(dev, cap_type, HCA_CAP_OPMOD_GET_MAX); +} + +static int set_caps(struct mlx5_core_dev *dev, void *in, int opmod) +{ + MLX5_SET(set_hca_cap_in, in, opcode, MLX5_CMD_OP_SET_HCA_CAP); + MLX5_SET(set_hca_cap_in, in, op_mod, opmod << 1); + return mlx5_cmd_exec_in(dev, set_hca_cap, in); +} + +static int handle_hca_cap_atomic(struct mlx5_core_dev *dev, void *set_ctx) +{ + void *set_hca_cap; + int req_endianness; + int err; + + if (!MLX5_CAP_GEN(dev, atomic)) + return 0; + + err = mlx5_core_get_caps(dev, MLX5_CAP_ATOMIC); + if (err) + return err; + + req_endianness = + MLX5_CAP_ATOMIC(dev, + supported_atomic_req_8B_endianness_mode_1); + + if (req_endianness != MLX5_ATOMIC_REQ_MODE_HOST_ENDIANNESS) + return 0; + + set_hca_cap = MLX5_ADDR_OF(set_hca_cap_in, set_ctx, capability); + + /* Set requestor to host endianness */ + MLX5_SET(atomic_caps, set_hca_cap, atomic_req_8B_endianness_mode, + MLX5_ATOMIC_REQ_MODE_HOST_ENDIANNESS); + + return set_caps(dev, set_ctx, MLX5_SET_HCA_CAP_OP_MOD_ATOMIC); +} + +static int handle_hca_cap_odp(struct mlx5_core_dev *dev, void *set_ctx) +{ + void *set_hca_cap; + bool do_set = false; + int err; + + if (!IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING) || + !MLX5_CAP_GEN(dev, pg)) + return 0; + + err = mlx5_core_get_caps(dev, MLX5_CAP_ODP); + if (err) + return err; + + set_hca_cap = MLX5_ADDR_OF(set_hca_cap_in, set_ctx, capability); + memcpy(set_hca_cap, dev->caps.hca[MLX5_CAP_ODP]->cur, + MLX5_ST_SZ_BYTES(odp_cap)); + +#define ODP_CAP_SET_MAX(dev, field) \ + do { \ + u32 _res = MLX5_CAP_ODP_MAX(dev, field); \ + if (_res) { \ + do_set = true; \ + MLX5_SET(odp_cap, set_hca_cap, field, _res); \ + } \ + } while (0) + + ODP_CAP_SET_MAX(dev, ud_odp_caps.srq_receive); + ODP_CAP_SET_MAX(dev, rc_odp_caps.srq_receive); + ODP_CAP_SET_MAX(dev, xrc_odp_caps.srq_receive); + ODP_CAP_SET_MAX(dev, xrc_odp_caps.send); + ODP_CAP_SET_MAX(dev, xrc_odp_caps.receive); + ODP_CAP_SET_MAX(dev, xrc_odp_caps.write); + ODP_CAP_SET_MAX(dev, xrc_odp_caps.read); + ODP_CAP_SET_MAX(dev, xrc_odp_caps.atomic); + ODP_CAP_SET_MAX(dev, dc_odp_caps.srq_receive); + ODP_CAP_SET_MAX(dev, dc_odp_caps.send); + ODP_CAP_SET_MAX(dev, dc_odp_caps.receive); + ODP_CAP_SET_MAX(dev, dc_odp_caps.write); + ODP_CAP_SET_MAX(dev, dc_odp_caps.read); + ODP_CAP_SET_MAX(dev, dc_odp_caps.atomic); + + if (!do_set) + return 0; + + return set_caps(dev, set_ctx, MLX5_SET_HCA_CAP_OP_MOD_ODP); +} + +static int max_uc_list_get_devlink_param(struct mlx5_core_dev *dev) +{ + struct devlink *devlink = priv_to_devlink(dev); + union devlink_param_value val; + int err; + + err = devlink_param_driverinit_value_get(devlink, + DEVLINK_PARAM_GENERIC_ID_MAX_MACS, + &val); + if (!err) + return val.vu32; + mlx5_core_dbg(dev, "Failed to get param. err = %d\n", err); + return err; +} + +bool mlx5_is_roce_on(struct mlx5_core_dev *dev) +{ + struct devlink *devlink = priv_to_devlink(dev); + union devlink_param_value val; + int err; + + err = devlink_param_driverinit_value_get(devlink, + DEVLINK_PARAM_GENERIC_ID_ENABLE_ROCE, + &val); + + if (!err) + return val.vbool; + + mlx5_core_dbg(dev, "Failed to get param. err = %d\n", err); + return MLX5_CAP_GEN(dev, roce); +} +EXPORT_SYMBOL(mlx5_is_roce_on); + +static int handle_hca_cap_2(struct mlx5_core_dev *dev, void *set_ctx) +{ + void *set_hca_cap; + int err; + + if (!MLX5_CAP_GEN_MAX(dev, hca_cap_2)) + return 0; + + err = mlx5_core_get_caps(dev, MLX5_CAP_GENERAL_2); + if (err) + return err; + + if (!MLX5_CAP_GEN_2_MAX(dev, sw_vhca_id_valid) || + !(dev->priv.sw_vhca_id > 0)) + return 0; + + set_hca_cap = MLX5_ADDR_OF(set_hca_cap_in, set_ctx, + capability); + memcpy(set_hca_cap, dev->caps.hca[MLX5_CAP_GENERAL_2]->cur, + MLX5_ST_SZ_BYTES(cmd_hca_cap_2)); + MLX5_SET(cmd_hca_cap_2, set_hca_cap, sw_vhca_id_valid, 1); + + return set_caps(dev, set_ctx, MLX5_CAP_GENERAL_2); +} + +static int handle_hca_cap(struct mlx5_core_dev *dev, void *set_ctx) +{ + struct mlx5_profile *prof = &dev->profile; + void *set_hca_cap; + int max_uc_list; + int err; + + err = mlx5_core_get_caps(dev, MLX5_CAP_GENERAL); + if (err) + return err; + + set_hca_cap = MLX5_ADDR_OF(set_hca_cap_in, set_ctx, + capability); + memcpy(set_hca_cap, dev->caps.hca[MLX5_CAP_GENERAL]->cur, + MLX5_ST_SZ_BYTES(cmd_hca_cap)); + + mlx5_core_dbg(dev, "Current Pkey table size %d Setting new size %d\n", + mlx5_to_sw_pkey_sz(MLX5_CAP_GEN(dev, pkey_table_size)), + 128); + /* we limit the size of the pkey table to 128 entries for now */ + MLX5_SET(cmd_hca_cap, set_hca_cap, pkey_table_size, + to_fw_pkey_sz(dev, 128)); + + /* Check log_max_qp from HCA caps to set in current profile */ + if (prof->log_max_qp == LOG_MAX_SUPPORTED_QPS) { + prof->log_max_qp = min_t(u8, 18, MLX5_CAP_GEN_MAX(dev, log_max_qp)); + } else if (MLX5_CAP_GEN_MAX(dev, log_max_qp) < prof->log_max_qp) { + mlx5_core_warn(dev, "log_max_qp value in current profile is %d, changing it to HCA capability limit (%d)\n", + prof->log_max_qp, + MLX5_CAP_GEN_MAX(dev, log_max_qp)); + prof->log_max_qp = MLX5_CAP_GEN_MAX(dev, log_max_qp); + } + if (prof->mask & MLX5_PROF_MASK_QP_SIZE) + MLX5_SET(cmd_hca_cap, set_hca_cap, log_max_qp, + prof->log_max_qp); + + /* disable cmdif checksum */ + MLX5_SET(cmd_hca_cap, set_hca_cap, cmdif_checksum, 0); + + /* Enable 4K UAR only when HCA supports it and page size is bigger + * than 4K. + */ + if (MLX5_CAP_GEN_MAX(dev, uar_4k) && PAGE_SIZE > 4096) + MLX5_SET(cmd_hca_cap, set_hca_cap, uar_4k, 1); + + MLX5_SET(cmd_hca_cap, set_hca_cap, log_uar_page_sz, PAGE_SHIFT - 12); + + if (MLX5_CAP_GEN_MAX(dev, cache_line_128byte)) + MLX5_SET(cmd_hca_cap, + set_hca_cap, + cache_line_128byte, + cache_line_size() >= 128 ? 1 : 0); + + if (MLX5_CAP_GEN_MAX(dev, dct)) + MLX5_SET(cmd_hca_cap, set_hca_cap, dct, 1); + + if (MLX5_CAP_GEN_MAX(dev, pci_sync_for_fw_update_event)) + MLX5_SET(cmd_hca_cap, set_hca_cap, pci_sync_for_fw_update_event, 1); + + if (MLX5_CAP_GEN_MAX(dev, num_vhca_ports)) + MLX5_SET(cmd_hca_cap, + set_hca_cap, + num_vhca_ports, + MLX5_CAP_GEN_MAX(dev, num_vhca_ports)); + + if (MLX5_CAP_GEN_MAX(dev, release_all_pages)) + MLX5_SET(cmd_hca_cap, set_hca_cap, release_all_pages, 1); + + if (MLX5_CAP_GEN_MAX(dev, mkey_by_name)) + MLX5_SET(cmd_hca_cap, set_hca_cap, mkey_by_name, 1); + + mlx5_vhca_state_cap_handle(dev, set_hca_cap); + + if (MLX5_CAP_GEN_MAX(dev, num_total_dynamic_vf_msix)) + MLX5_SET(cmd_hca_cap, set_hca_cap, num_total_dynamic_vf_msix, + MLX5_CAP_GEN_MAX(dev, num_total_dynamic_vf_msix)); + + if (MLX5_CAP_GEN(dev, roce_rw_supported) && MLX5_CAP_GEN_MAX(dev, roce)) + MLX5_SET(cmd_hca_cap, set_hca_cap, roce, + mlx5_is_roce_on(dev)); + + max_uc_list = max_uc_list_get_devlink_param(dev); + if (max_uc_list > 0) + MLX5_SET(cmd_hca_cap, set_hca_cap, log_max_current_uc_list, + ilog2(max_uc_list)); + + return set_caps(dev, set_ctx, MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE); +} + +/* Cached MLX5_CAP_GEN(dev, roce) can be out of sync this early in the + * boot process. + * In case RoCE cap is writable in FW and user/devlink requested to change the + * cap, we are yet to query the final state of the above cap. + * Hence, the need for this function. + * + * Returns + * True: + * 1) RoCE cap is read only in FW and already disabled + * OR: + * 2) RoCE cap is writable in FW and user/devlink requested it off. + * + * In any other case, return False. + */ +static bool is_roce_fw_disabled(struct mlx5_core_dev *dev) +{ + return (MLX5_CAP_GEN(dev, roce_rw_supported) && !mlx5_is_roce_on(dev)) || + (!MLX5_CAP_GEN(dev, roce_rw_supported) && !MLX5_CAP_GEN(dev, roce)); +} + +static int handle_hca_cap_roce(struct mlx5_core_dev *dev, void *set_ctx) +{ + void *set_hca_cap; + int err; + + if (is_roce_fw_disabled(dev)) + return 0; + + err = mlx5_core_get_caps(dev, MLX5_CAP_ROCE); + if (err) + return err; + + if (MLX5_CAP_ROCE(dev, sw_r_roce_src_udp_port) || + !MLX5_CAP_ROCE_MAX(dev, sw_r_roce_src_udp_port)) + return 0; + + set_hca_cap = MLX5_ADDR_OF(set_hca_cap_in, set_ctx, capability); + memcpy(set_hca_cap, dev->caps.hca[MLX5_CAP_ROCE]->cur, + MLX5_ST_SZ_BYTES(roce_cap)); + MLX5_SET(roce_cap, set_hca_cap, sw_r_roce_src_udp_port, 1); + + err = set_caps(dev, set_ctx, MLX5_SET_HCA_CAP_OP_MOD_ROCE); + return err; +} + +static int handle_hca_cap_port_selection(struct mlx5_core_dev *dev, + void *set_ctx) +{ + void *set_hca_cap; + int err; + + if (!MLX5_CAP_GEN(dev, port_selection_cap)) + return 0; + + err = mlx5_core_get_caps(dev, MLX5_CAP_PORT_SELECTION); + if (err) + return err; + + if (MLX5_CAP_PORT_SELECTION(dev, port_select_flow_table_bypass) || + !MLX5_CAP_PORT_SELECTION_MAX(dev, port_select_flow_table_bypass)) + return 0; + + set_hca_cap = MLX5_ADDR_OF(set_hca_cap_in, set_ctx, capability); + memcpy(set_hca_cap, dev->caps.hca[MLX5_CAP_PORT_SELECTION]->cur, + MLX5_ST_SZ_BYTES(port_selection_cap)); + MLX5_SET(port_selection_cap, set_hca_cap, port_select_flow_table_bypass, 1); + + err = set_caps(dev, set_ctx, MLX5_SET_HCA_CAP_OP_MODE_PORT_SELECTION); + + return err; +} + +static int set_hca_cap(struct mlx5_core_dev *dev) +{ + int set_sz = MLX5_ST_SZ_BYTES(set_hca_cap_in); + void *set_ctx; + int err; + + set_ctx = kzalloc(set_sz, GFP_KERNEL); + if (!set_ctx) + return -ENOMEM; + + err = handle_hca_cap(dev, set_ctx); + if (err) { + mlx5_core_err(dev, "handle_hca_cap failed\n"); + goto out; + } + + memset(set_ctx, 0, set_sz); + err = handle_hca_cap_atomic(dev, set_ctx); + if (err) { + mlx5_core_err(dev, "handle_hca_cap_atomic failed\n"); + goto out; + } + + memset(set_ctx, 0, set_sz); + err = handle_hca_cap_odp(dev, set_ctx); + if (err) { + mlx5_core_err(dev, "handle_hca_cap_odp failed\n"); + goto out; + } + + memset(set_ctx, 0, set_sz); + err = handle_hca_cap_roce(dev, set_ctx); + if (err) { + mlx5_core_err(dev, "handle_hca_cap_roce failed\n"); + goto out; + } + + memset(set_ctx, 0, set_sz); + err = handle_hca_cap_2(dev, set_ctx); + if (err) { + mlx5_core_err(dev, "handle_hca_cap_2 failed\n"); + goto out; + } + + memset(set_ctx, 0, set_sz); + err = handle_hca_cap_port_selection(dev, set_ctx); + if (err) { + mlx5_core_err(dev, "handle_hca_cap_port_selection failed\n"); + goto out; + } + +out: + kfree(set_ctx); + return err; +} + +static int set_hca_ctrl(struct mlx5_core_dev *dev) +{ + struct mlx5_reg_host_endianness he_in; + struct mlx5_reg_host_endianness he_out; + int err; + + if (!mlx5_core_is_pf(dev)) + return 0; + + memset(&he_in, 0, sizeof(he_in)); + he_in.he = MLX5_SET_HOST_ENDIANNESS; + err = mlx5_core_access_reg(dev, &he_in, sizeof(he_in), + &he_out, sizeof(he_out), + MLX5_REG_HOST_ENDIANNESS, 0, 1); + return err; +} + +static int mlx5_core_set_hca_defaults(struct mlx5_core_dev *dev) +{ + int ret = 0; + + /* Disable local_lb by default */ + if (MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_ETH) + ret = mlx5_nic_vport_update_local_lb(dev, false); + + return ret; +} + +int mlx5_core_enable_hca(struct mlx5_core_dev *dev, u16 func_id) +{ + u32 in[MLX5_ST_SZ_DW(enable_hca_in)] = {}; + + MLX5_SET(enable_hca_in, in, opcode, MLX5_CMD_OP_ENABLE_HCA); + MLX5_SET(enable_hca_in, in, function_id, func_id); + MLX5_SET(enable_hca_in, in, embedded_cpu_function, + dev->caps.embedded_cpu); + return mlx5_cmd_exec_in(dev, enable_hca, in); +} + +int mlx5_core_disable_hca(struct mlx5_core_dev *dev, u16 func_id) +{ + u32 in[MLX5_ST_SZ_DW(disable_hca_in)] = {}; + + MLX5_SET(disable_hca_in, in, opcode, MLX5_CMD_OP_DISABLE_HCA); + MLX5_SET(disable_hca_in, in, function_id, func_id); + MLX5_SET(enable_hca_in, in, embedded_cpu_function, + dev->caps.embedded_cpu); + return mlx5_cmd_exec_in(dev, disable_hca, in); +} + +static int mlx5_core_set_issi(struct mlx5_core_dev *dev) +{ + u32 query_out[MLX5_ST_SZ_DW(query_issi_out)] = {}; + u32 query_in[MLX5_ST_SZ_DW(query_issi_in)] = {}; + u32 sup_issi; + int err; + + MLX5_SET(query_issi_in, query_in, opcode, MLX5_CMD_OP_QUERY_ISSI); + err = mlx5_cmd_exec_inout(dev, query_issi, query_in, query_out); + if (err) { + u32 syndrome = MLX5_GET(query_issi_out, query_out, syndrome); + u8 status = MLX5_GET(query_issi_out, query_out, status); + + if (!status || syndrome == MLX5_DRIVER_SYND) { + mlx5_core_err(dev, "Failed to query ISSI err(%d) status(%d) synd(%d)\n", + err, status, syndrome); + return err; + } + + mlx5_core_warn(dev, "Query ISSI is not supported by FW, ISSI is 0\n"); + dev->issi = 0; + return 0; + } + + sup_issi = MLX5_GET(query_issi_out, query_out, supported_issi_dw0); + + if (sup_issi & (1 << 1)) { + u32 set_in[MLX5_ST_SZ_DW(set_issi_in)] = {}; + + MLX5_SET(set_issi_in, set_in, opcode, MLX5_CMD_OP_SET_ISSI); + MLX5_SET(set_issi_in, set_in, current_issi, 1); + err = mlx5_cmd_exec_in(dev, set_issi, set_in); + if (err) { + mlx5_core_err(dev, "Failed to set ISSI to 1 err(%d)\n", + err); + return err; + } + + dev->issi = 1; + + return 0; + } else if (sup_issi & (1 << 0) || !sup_issi) { + return 0; + } + + return -EOPNOTSUPP; +} + +static int mlx5_pci_init(struct mlx5_core_dev *dev, struct pci_dev *pdev, + const struct pci_device_id *id) +{ + int err = 0; + + mutex_init(&dev->pci_status_mutex); + pci_set_drvdata(dev->pdev, dev); + + dev->bar_addr = pci_resource_start(pdev, 0); + + err = mlx5_pci_enable_device(dev); + if (err) { + mlx5_core_err(dev, "Cannot enable PCI device, aborting\n"); + return err; + } + + err = request_bar(pdev); + if (err) { + mlx5_core_err(dev, "error requesting BARs, aborting\n"); + goto err_disable; + } + + pci_set_master(pdev); + + err = set_dma_caps(pdev); + if (err) { + mlx5_core_err(dev, "Failed setting DMA capabilities mask, aborting\n"); + goto err_clr_master; + } + + if (pci_enable_atomic_ops_to_root(pdev, PCI_EXP_DEVCAP2_ATOMIC_COMP32) && + pci_enable_atomic_ops_to_root(pdev, PCI_EXP_DEVCAP2_ATOMIC_COMP64) && + pci_enable_atomic_ops_to_root(pdev, PCI_EXP_DEVCAP2_ATOMIC_COMP128)) + mlx5_core_dbg(dev, "Enabling pci atomics failed\n"); + + dev->iseg_base = dev->bar_addr; + dev->iseg = ioremap(dev->iseg_base, sizeof(*dev->iseg)); + if (!dev->iseg) { + err = -ENOMEM; + mlx5_core_err(dev, "Failed mapping initialization segment, aborting\n"); + goto err_clr_master; + } + + mlx5_pci_vsc_init(dev); + return 0; + +err_clr_master: + pci_clear_master(dev->pdev); + release_bar(dev->pdev); +err_disable: + mlx5_pci_disable_device(dev); + return err; +} + +static void mlx5_pci_close(struct mlx5_core_dev *dev) +{ + /* health work might still be active, and it needs pci bar in + * order to know the NIC state. Therefore, drain the health WQ + * before removing the pci bars + */ + mlx5_drain_health_wq(dev); + iounmap(dev->iseg); + pci_clear_master(dev->pdev); + release_bar(dev->pdev); + mlx5_pci_disable_device(dev); +} + +static int mlx5_init_once(struct mlx5_core_dev *dev) +{ + int err; + + dev->priv.devcom = mlx5_devcom_register_device(dev); + if (IS_ERR(dev->priv.devcom)) + mlx5_core_err(dev, "failed to register with devcom (0x%p)\n", + dev->priv.devcom); + + err = mlx5_query_board_id(dev); + if (err) { + mlx5_core_err(dev, "query board id failed\n"); + goto err_devcom; + } + + err = mlx5_irq_table_init(dev); + if (err) { + mlx5_core_err(dev, "failed to initialize irq table\n"); + goto err_devcom; + } + + err = mlx5_eq_table_init(dev); + if (err) { + mlx5_core_err(dev, "failed to initialize eq\n"); + goto err_irq_cleanup; + } + + err = mlx5_events_init(dev); + if (err) { + mlx5_core_err(dev, "failed to initialize events\n"); + goto err_eq_cleanup; + } + + err = mlx5_fw_reset_init(dev); + if (err) { + mlx5_core_err(dev, "failed to initialize fw reset events\n"); + goto err_events_cleanup; + } + + mlx5_cq_debugfs_init(dev); + + mlx5_init_reserved_gids(dev); + + mlx5_init_clock(dev); + + dev->vxlan = mlx5_vxlan_create(dev); + dev->geneve = mlx5_geneve_create(dev); + + err = mlx5_init_rl_table(dev); + if (err) { + mlx5_core_err(dev, "Failed to init rate limiting\n"); + goto err_tables_cleanup; + } + + err = mlx5_mpfs_init(dev); + if (err) { + mlx5_core_err(dev, "Failed to init l2 table %d\n", err); + goto err_rl_cleanup; + } + + err = mlx5_sriov_init(dev); + if (err) { + mlx5_core_err(dev, "Failed to init sriov %d\n", err); + goto err_mpfs_cleanup; + } + + err = mlx5_eswitch_init(dev); + if (err) { + mlx5_core_err(dev, "Failed to init eswitch %d\n", err); + goto err_sriov_cleanup; + } + + err = mlx5_fpga_init(dev); + if (err) { + mlx5_core_err(dev, "Failed to init fpga device %d\n", err); + goto err_eswitch_cleanup; + } + + err = mlx5_vhca_event_init(dev); + if (err) { + mlx5_core_err(dev, "Failed to init vhca event notifier %d\n", err); + goto err_fpga_cleanup; + } + + err = mlx5_sf_hw_table_init(dev); + if (err) { + mlx5_core_err(dev, "Failed to init SF HW table %d\n", err); + goto err_sf_hw_table_cleanup; + } + + err = mlx5_sf_table_init(dev); + if (err) { + mlx5_core_err(dev, "Failed to init SF table %d\n", err); + goto err_sf_table_cleanup; + } + + err = mlx5_fs_core_alloc(dev); + if (err) { + mlx5_core_err(dev, "Failed to alloc flow steering\n"); + goto err_fs; + } + + dev->dm = mlx5_dm_create(dev); + if (IS_ERR(dev->dm)) + mlx5_core_warn(dev, "Failed to init device memory %ld\n", PTR_ERR(dev->dm)); + + dev->tracer = mlx5_fw_tracer_create(dev); + dev->hv_vhca = mlx5_hv_vhca_create(dev); + dev->rsc_dump = mlx5_rsc_dump_create(dev); + + return 0; + +err_fs: + mlx5_sf_table_cleanup(dev); +err_sf_table_cleanup: + mlx5_sf_hw_table_cleanup(dev); +err_sf_hw_table_cleanup: + mlx5_vhca_event_cleanup(dev); +err_fpga_cleanup: + mlx5_fpga_cleanup(dev); +err_eswitch_cleanup: + mlx5_eswitch_cleanup(dev->priv.eswitch); +err_sriov_cleanup: + mlx5_sriov_cleanup(dev); +err_mpfs_cleanup: + mlx5_mpfs_cleanup(dev); +err_rl_cleanup: + mlx5_cleanup_rl_table(dev); +err_tables_cleanup: + mlx5_geneve_destroy(dev->geneve); + mlx5_vxlan_destroy(dev->vxlan); + mlx5_cleanup_clock(dev); + mlx5_cleanup_reserved_gids(dev); + mlx5_cq_debugfs_cleanup(dev); + mlx5_fw_reset_cleanup(dev); +err_events_cleanup: + mlx5_events_cleanup(dev); +err_eq_cleanup: + mlx5_eq_table_cleanup(dev); +err_irq_cleanup: + mlx5_irq_table_cleanup(dev); +err_devcom: + mlx5_devcom_unregister_device(dev->priv.devcom); + + return err; +} + +static void mlx5_cleanup_once(struct mlx5_core_dev *dev) +{ + mlx5_rsc_dump_destroy(dev); + mlx5_hv_vhca_destroy(dev->hv_vhca); + mlx5_fw_tracer_destroy(dev->tracer); + mlx5_dm_cleanup(dev); + mlx5_fs_core_free(dev); + mlx5_sf_table_cleanup(dev); + mlx5_sf_hw_table_cleanup(dev); + mlx5_vhca_event_cleanup(dev); + mlx5_fpga_cleanup(dev); + mlx5_eswitch_cleanup(dev->priv.eswitch); + mlx5_sriov_cleanup(dev); + mlx5_mpfs_cleanup(dev); + mlx5_cleanup_rl_table(dev); + mlx5_geneve_destroy(dev->geneve); + mlx5_vxlan_destroy(dev->vxlan); + mlx5_cleanup_clock(dev); + mlx5_cleanup_reserved_gids(dev); + mlx5_cq_debugfs_cleanup(dev); + mlx5_fw_reset_cleanup(dev); + mlx5_events_cleanup(dev); + mlx5_eq_table_cleanup(dev); + mlx5_irq_table_cleanup(dev); + mlx5_devcom_unregister_device(dev->priv.devcom); +} + +static int mlx5_function_setup(struct mlx5_core_dev *dev, bool boot, u64 timeout) +{ + int err; + + mlx5_core_info(dev, "firmware version: %d.%d.%d\n", fw_rev_maj(dev), + fw_rev_min(dev), fw_rev_sub(dev)); + + /* Only PFs hold the relevant PCIe information for this query */ + if (mlx5_core_is_pf(dev)) + pcie_print_link_status(dev->pdev); + + /* wait for firmware to accept initialization segments configurations + */ + err = wait_fw_init(dev, timeout, + mlx5_tout_ms(dev, FW_PRE_INIT_WARN_MESSAGE_INTERVAL)); + if (err) { + mlx5_core_err(dev, "Firmware over %llu MS in pre-initializing state, aborting\n", + timeout); + return err; + } + + err = mlx5_cmd_init(dev); + if (err) { + mlx5_core_err(dev, "Failed initializing command interface, aborting\n"); + return err; + } + + mlx5_tout_query_iseg(dev); + + err = wait_fw_init(dev, mlx5_tout_ms(dev, FW_INIT), 0); + if (err) { + mlx5_core_err(dev, "Firmware over %llu MS in initializing state, aborting\n", + mlx5_tout_ms(dev, FW_INIT)); + goto err_cmd_cleanup; + } + + dev->caps.embedded_cpu = mlx5_read_embedded_cpu(dev); + mlx5_cmd_set_state(dev, MLX5_CMDIF_STATE_UP); + + mlx5_start_health_poll(dev); + + err = mlx5_core_enable_hca(dev, 0); + if (err) { + mlx5_core_err(dev, "enable hca failed\n"); + goto stop_health_poll; + } + + err = mlx5_core_set_issi(dev); + if (err) { + mlx5_core_err(dev, "failed to set issi\n"); + goto err_disable_hca; + } + + err = mlx5_satisfy_startup_pages(dev, 1); + if (err) { + mlx5_core_err(dev, "failed to allocate boot pages\n"); + goto err_disable_hca; + } + + err = mlx5_tout_query_dtor(dev); + if (err) { + mlx5_core_err(dev, "failed to read dtor\n"); + goto reclaim_boot_pages; + } + + err = set_hca_ctrl(dev); + if (err) { + mlx5_core_err(dev, "set_hca_ctrl failed\n"); + goto reclaim_boot_pages; + } + + err = set_hca_cap(dev); + if (err) { + mlx5_core_err(dev, "set_hca_cap failed\n"); + goto reclaim_boot_pages; + } + + err = mlx5_satisfy_startup_pages(dev, 0); + if (err) { + mlx5_core_err(dev, "failed to allocate init pages\n"); + goto reclaim_boot_pages; + } + + err = mlx5_cmd_init_hca(dev, sw_owner_id); + if (err) { + mlx5_core_err(dev, "init hca failed\n"); + goto reclaim_boot_pages; + } + + mlx5_set_driver_version(dev); + + err = mlx5_query_hca_caps(dev); + if (err) { + mlx5_core_err(dev, "query hca failed\n"); + goto reclaim_boot_pages; + } + mlx5_start_health_fw_log_up(dev); + + return 0; + +reclaim_boot_pages: + mlx5_reclaim_startup_pages(dev); +err_disable_hca: + mlx5_core_disable_hca(dev, 0); +stop_health_poll: + mlx5_stop_health_poll(dev, boot); +err_cmd_cleanup: + mlx5_cmd_set_state(dev, MLX5_CMDIF_STATE_DOWN); + mlx5_cmd_cleanup(dev); + + return err; +} + +static int mlx5_function_teardown(struct mlx5_core_dev *dev, bool boot) +{ + int err; + + err = mlx5_cmd_teardown_hca(dev); + if (err) { + mlx5_core_err(dev, "tear_down_hca failed, skip cleanup\n"); + return err; + } + mlx5_reclaim_startup_pages(dev); + mlx5_core_disable_hca(dev, 0); + mlx5_stop_health_poll(dev, boot); + mlx5_cmd_set_state(dev, MLX5_CMDIF_STATE_DOWN); + mlx5_cmd_cleanup(dev); + + return 0; +} + +static int mlx5_load(struct mlx5_core_dev *dev) +{ + int err; + + dev->priv.uar = mlx5_get_uars_page(dev); + if (IS_ERR(dev->priv.uar)) { + mlx5_core_err(dev, "Failed allocating uar, aborting\n"); + err = PTR_ERR(dev->priv.uar); + return err; + } + + mlx5_events_start(dev); + mlx5_pagealloc_start(dev); + + err = mlx5_irq_table_create(dev); + if (err) { + mlx5_core_err(dev, "Failed to alloc IRQs\n"); + goto err_irq_table; + } + + err = mlx5_eq_table_create(dev); + if (err) { + mlx5_core_err(dev, "Failed to create EQs\n"); + goto err_eq_table; + } + + err = mlx5_fw_tracer_init(dev->tracer); + if (err) { + mlx5_core_err(dev, "Failed to init FW tracer %d\n", err); + mlx5_fw_tracer_destroy(dev->tracer); + dev->tracer = NULL; + } + + mlx5_fw_reset_events_start(dev); + mlx5_hv_vhca_init(dev->hv_vhca); + + err = mlx5_rsc_dump_init(dev); + if (err) { + mlx5_core_err(dev, "Failed to init Resource dump %d\n", err); + mlx5_rsc_dump_destroy(dev); + dev->rsc_dump = NULL; + } + + err = mlx5_fpga_device_start(dev); + if (err) { + mlx5_core_err(dev, "fpga device start failed %d\n", err); + goto err_fpga_start; + } + + err = mlx5_fs_core_init(dev); + if (err) { + mlx5_core_err(dev, "Failed to init flow steering\n"); + goto err_fs; + } + + err = mlx5_core_set_hca_defaults(dev); + if (err) { + mlx5_core_err(dev, "Failed to set hca defaults\n"); + goto err_set_hca; + } + + mlx5_vhca_event_start(dev); + + err = mlx5_sf_hw_table_create(dev); + if (err) { + mlx5_core_err(dev, "sf table create failed %d\n", err); + goto err_vhca; + } + + err = mlx5_ec_init(dev); + if (err) { + mlx5_core_err(dev, "Failed to init embedded CPU\n"); + goto err_ec; + } + + mlx5_lag_add_mdev(dev); + err = mlx5_sriov_attach(dev); + if (err) { + mlx5_core_err(dev, "sriov init failed %d\n", err); + goto err_sriov; + } + + mlx5_sf_dev_table_create(dev); + + return 0; + +err_sriov: + mlx5_lag_remove_mdev(dev); + mlx5_ec_cleanup(dev); +err_ec: + mlx5_sf_hw_table_destroy(dev); +err_vhca: + mlx5_vhca_event_stop(dev); +err_set_hca: + mlx5_fs_core_cleanup(dev); +err_fs: + mlx5_fpga_device_stop(dev); +err_fpga_start: + mlx5_rsc_dump_cleanup(dev); + mlx5_hv_vhca_cleanup(dev->hv_vhca); + mlx5_fw_reset_events_stop(dev); + mlx5_fw_tracer_cleanup(dev->tracer); + mlx5_eq_table_destroy(dev); +err_eq_table: + mlx5_irq_table_destroy(dev); +err_irq_table: + mlx5_pagealloc_stop(dev); + mlx5_events_stop(dev); + mlx5_put_uars_page(dev, dev->priv.uar); + return err; +} + +static void mlx5_unload(struct mlx5_core_dev *dev) +{ + mlx5_sf_dev_table_destroy(dev); + mlx5_eswitch_disable(dev->priv.eswitch); + mlx5_sriov_detach(dev); + mlx5_lag_remove_mdev(dev); + mlx5_ec_cleanup(dev); + mlx5_sf_hw_table_destroy(dev); + mlx5_vhca_event_stop(dev); + mlx5_fs_core_cleanup(dev); + mlx5_fpga_device_stop(dev); + mlx5_rsc_dump_cleanup(dev); + mlx5_hv_vhca_cleanup(dev->hv_vhca); + mlx5_fw_reset_events_stop(dev); + mlx5_fw_tracer_cleanup(dev->tracer); + mlx5_eq_table_destroy(dev); + mlx5_irq_table_destroy(dev); + mlx5_pagealloc_stop(dev); + mlx5_events_stop(dev); + mlx5_put_uars_page(dev, dev->priv.uar); +} + +int mlx5_init_one(struct mlx5_core_dev *dev) +{ + struct devlink *devlink = priv_to_devlink(dev); + int err = 0; + + devl_lock(devlink); + mutex_lock(&dev->intf_state_mutex); + dev->state = MLX5_DEVICE_STATE_UP; + + err = mlx5_function_setup(dev, true, mlx5_tout_ms(dev, FW_PRE_INIT_TIMEOUT)); + if (err) + goto err_function; + + err = mlx5_init_once(dev); + if (err) { + mlx5_core_err(dev, "sw objs init failed\n"); + goto function_teardown; + } + + err = mlx5_load(dev); + if (err) + goto err_load; + + set_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state); + + err = mlx5_devlink_register(priv_to_devlink(dev)); + if (err) + goto err_devlink_reg; + + err = mlx5_register_device(dev); + if (err) + goto err_register; + + mutex_unlock(&dev->intf_state_mutex); + devl_unlock(devlink); + return 0; + +err_register: + mlx5_devlink_unregister(priv_to_devlink(dev)); +err_devlink_reg: + clear_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state); + mlx5_unload(dev); +err_load: + mlx5_cleanup_once(dev); +function_teardown: + mlx5_function_teardown(dev, true); +err_function: + dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR; + mutex_unlock(&dev->intf_state_mutex); + devl_unlock(devlink); + return err; +} + +void mlx5_uninit_one(struct mlx5_core_dev *dev) +{ + struct devlink *devlink = priv_to_devlink(dev); + + devl_lock(devlink); + mutex_lock(&dev->intf_state_mutex); + + mlx5_unregister_device(dev); + mlx5_devlink_unregister(priv_to_devlink(dev)); + + if (!test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state)) { + mlx5_core_warn(dev, "%s: interface is down, NOP\n", + __func__); + mlx5_cleanup_once(dev); + goto out; + } + + clear_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state); + mlx5_unload(dev); + mlx5_cleanup_once(dev); + mlx5_function_teardown(dev, true); +out: + mutex_unlock(&dev->intf_state_mutex); + devl_unlock(devlink); +} + +int mlx5_load_one_devl_locked(struct mlx5_core_dev *dev, bool recovery) +{ + int err = 0; + u64 timeout; + + devl_assert_locked(priv_to_devlink(dev)); + mutex_lock(&dev->intf_state_mutex); + if (test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state)) { + mlx5_core_warn(dev, "interface is up, NOP\n"); + goto out; + } + /* remove any previous indication of internal error */ + dev->state = MLX5_DEVICE_STATE_UP; + + if (recovery) + timeout = mlx5_tout_ms(dev, FW_PRE_INIT_ON_RECOVERY_TIMEOUT); + else + timeout = mlx5_tout_ms(dev, FW_PRE_INIT_TIMEOUT); + err = mlx5_function_setup(dev, false, timeout); + if (err) + goto err_function; + + err = mlx5_load(dev); + if (err) + goto err_load; + + set_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state); + + err = mlx5_attach_device(dev); + if (err) + goto err_attach; + + mutex_unlock(&dev->intf_state_mutex); + return 0; + +err_attach: + clear_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state); + mlx5_unload(dev); +err_load: + mlx5_function_teardown(dev, false); +err_function: + dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR; +out: + mutex_unlock(&dev->intf_state_mutex); + return err; +} + +int mlx5_load_one(struct mlx5_core_dev *dev, bool recovery) +{ + struct devlink *devlink = priv_to_devlink(dev); + int ret; + + devl_lock(devlink); + ret = mlx5_load_one_devl_locked(dev, recovery); + devl_unlock(devlink); + return ret; +} + +void mlx5_unload_one_devl_locked(struct mlx5_core_dev *dev, bool suspend) +{ + devl_assert_locked(priv_to_devlink(dev)); + mutex_lock(&dev->intf_state_mutex); + + mlx5_detach_device(dev, suspend); + + if (!test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state)) { + mlx5_core_warn(dev, "%s: interface is down, NOP\n", + __func__); + goto out; + } + + clear_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state); + mlx5_unload(dev); + mlx5_function_teardown(dev, false); +out: + mutex_unlock(&dev->intf_state_mutex); +} + +void mlx5_unload_one(struct mlx5_core_dev *dev, bool suspend) +{ + struct devlink *devlink = priv_to_devlink(dev); + + devl_lock(devlink); + mlx5_unload_one_devl_locked(dev, suspend); + devl_unlock(devlink); +} + +static const int types[] = { + MLX5_CAP_GENERAL, + MLX5_CAP_GENERAL_2, + MLX5_CAP_ETHERNET_OFFLOADS, + MLX5_CAP_IPOIB_ENHANCED_OFFLOADS, + MLX5_CAP_ODP, + MLX5_CAP_ATOMIC, + MLX5_CAP_ROCE, + MLX5_CAP_IPOIB_OFFLOADS, + MLX5_CAP_FLOW_TABLE, + MLX5_CAP_ESWITCH_FLOW_TABLE, + MLX5_CAP_ESWITCH, + MLX5_CAP_VECTOR_CALC, + MLX5_CAP_QOS, + MLX5_CAP_DEBUG, + MLX5_CAP_DEV_MEM, + MLX5_CAP_DEV_EVENT, + MLX5_CAP_TLS, + MLX5_CAP_VDPA_EMULATION, + MLX5_CAP_IPSEC, + MLX5_CAP_PORT_SELECTION, + MLX5_CAP_DEV_SHAMPO, + MLX5_CAP_MACSEC, + MLX5_CAP_ADV_VIRTUALIZATION, +}; + +static void mlx5_hca_caps_free(struct mlx5_core_dev *dev) +{ + int type; + int i; + + for (i = 0; i < ARRAY_SIZE(types); i++) { + type = types[i]; + kfree(dev->caps.hca[type]); + } +} + +static int mlx5_hca_caps_alloc(struct mlx5_core_dev *dev) +{ + struct mlx5_hca_cap *cap; + int type; + int i; + + for (i = 0; i < ARRAY_SIZE(types); i++) { + cap = kzalloc(sizeof(*cap), GFP_KERNEL); + if (!cap) + goto err; + type = types[i]; + dev->caps.hca[type] = cap; + } + + return 0; + +err: + mlx5_hca_caps_free(dev); + return -ENOMEM; +} + +int mlx5_mdev_init(struct mlx5_core_dev *dev, int profile_idx) +{ + struct mlx5_priv *priv = &dev->priv; + int err; + + memcpy(&dev->profile, &profile[profile_idx], sizeof(dev->profile)); + INIT_LIST_HEAD(&priv->ctx_list); + spin_lock_init(&priv->ctx_lock); + lockdep_register_key(&dev->lock_key); + mutex_init(&dev->intf_state_mutex); + lockdep_set_class(&dev->intf_state_mutex, &dev->lock_key); + + mutex_init(&priv->bfregs.reg_head.lock); + mutex_init(&priv->bfregs.wc_head.lock); + INIT_LIST_HEAD(&priv->bfregs.reg_head.list); + INIT_LIST_HEAD(&priv->bfregs.wc_head.list); + + mutex_init(&priv->alloc_mutex); + mutex_init(&priv->pgdir_mutex); + INIT_LIST_HEAD(&priv->pgdir_list); + + priv->numa_node = dev_to_node(mlx5_core_dma_dev(dev)); + priv->dbg.dbg_root = debugfs_create_dir(dev_name(dev->device), + mlx5_debugfs_root); + INIT_LIST_HEAD(&priv->traps); + + err = mlx5_tout_init(dev); + if (err) { + mlx5_core_err(dev, "Failed initializing timeouts, aborting\n"); + goto err_timeout_init; + } + + err = mlx5_health_init(dev); + if (err) + goto err_health_init; + + err = mlx5_pagealloc_init(dev); + if (err) + goto err_pagealloc_init; + + err = mlx5_adev_init(dev); + if (err) + goto err_adev_init; + + err = mlx5_hca_caps_alloc(dev); + if (err) + goto err_hca_caps; + + /* The conjunction of sw_vhca_id with sw_owner_id will be a global + * unique id per function which uses mlx5_core. + * Those values are supplied to FW as part of the init HCA command to + * be used by both driver and FW when it's applicable. + */ + dev->priv.sw_vhca_id = ida_alloc_range(&sw_vhca_ida, 1, + MAX_SW_VHCA_ID, + GFP_KERNEL); + if (dev->priv.sw_vhca_id < 0) + mlx5_core_err(dev, "failed to allocate sw_vhca_id, err=%d\n", + dev->priv.sw_vhca_id); + + return 0; + +err_hca_caps: + mlx5_adev_cleanup(dev); +err_adev_init: + mlx5_pagealloc_cleanup(dev); +err_pagealloc_init: + mlx5_health_cleanup(dev); +err_health_init: + mlx5_tout_cleanup(dev); +err_timeout_init: + debugfs_remove(dev->priv.dbg.dbg_root); + mutex_destroy(&priv->pgdir_mutex); + mutex_destroy(&priv->alloc_mutex); + mutex_destroy(&priv->bfregs.wc_head.lock); + mutex_destroy(&priv->bfregs.reg_head.lock); + mutex_destroy(&dev->intf_state_mutex); + lockdep_unregister_key(&dev->lock_key); + return err; +} + +void mlx5_mdev_uninit(struct mlx5_core_dev *dev) +{ + struct mlx5_priv *priv = &dev->priv; + + if (priv->sw_vhca_id > 0) + ida_free(&sw_vhca_ida, dev->priv.sw_vhca_id); + + mlx5_hca_caps_free(dev); + mlx5_adev_cleanup(dev); + mlx5_pagealloc_cleanup(dev); + mlx5_health_cleanup(dev); + mlx5_tout_cleanup(dev); + debugfs_remove_recursive(dev->priv.dbg.dbg_root); + mutex_destroy(&priv->pgdir_mutex); + mutex_destroy(&priv->alloc_mutex); + mutex_destroy(&priv->bfregs.wc_head.lock); + mutex_destroy(&priv->bfregs.reg_head.lock); + mutex_destroy(&dev->intf_state_mutex); + lockdep_unregister_key(&dev->lock_key); +} + +static int probe_one(struct pci_dev *pdev, const struct pci_device_id *id) +{ + struct mlx5_core_dev *dev; + struct devlink *devlink; + int err; + + devlink = mlx5_devlink_alloc(&pdev->dev); + if (!devlink) { + dev_err(&pdev->dev, "devlink alloc failed\n"); + return -ENOMEM; + } + + dev = devlink_priv(devlink); + dev->device = &pdev->dev; + dev->pdev = pdev; + + dev->coredev_type = id->driver_data & MLX5_PCI_DEV_IS_VF ? + MLX5_COREDEV_VF : MLX5_COREDEV_PF; + + dev->priv.adev_idx = mlx5_adev_idx_alloc(); + if (dev->priv.adev_idx < 0) { + err = dev->priv.adev_idx; + goto adev_init_err; + } + + err = mlx5_mdev_init(dev, prof_sel); + if (err) + goto mdev_init_err; + + err = mlx5_pci_init(dev, pdev, id); + if (err) { + mlx5_core_err(dev, "mlx5_pci_init failed with error code %d\n", + err); + goto pci_init_err; + } + + err = mlx5_init_one(dev); + if (err) { + mlx5_core_err(dev, "mlx5_init_one failed with error code %d\n", + err); + goto err_init_one; + } + + err = mlx5_crdump_enable(dev); + if (err) + dev_err(&pdev->dev, "mlx5_crdump_enable failed with error code %d\n", err); + + pci_save_state(pdev); + devlink_register(devlink); + return 0; + +err_init_one: + mlx5_pci_close(dev); +pci_init_err: + mlx5_mdev_uninit(dev); +mdev_init_err: + mlx5_adev_idx_free(dev->priv.adev_idx); +adev_init_err: + mlx5_devlink_free(devlink); + + return err; +} + +static void remove_one(struct pci_dev *pdev) +{ + struct mlx5_core_dev *dev = pci_get_drvdata(pdev); + struct devlink *devlink = priv_to_devlink(dev); + + set_bit(MLX5_BREAK_FW_WAIT, &dev->intf_state); + /* mlx5_drain_fw_reset() and mlx5_drain_health_wq() are using + * devlink notify APIs. + * Hence, we must drain them before unregistering the devlink. + */ + mlx5_drain_fw_reset(dev); + mlx5_drain_health_wq(dev); + devlink_unregister(devlink); + mlx5_sriov_disable(pdev); + mlx5_crdump_disable(dev); + mlx5_uninit_one(dev); + mlx5_pci_close(dev); + mlx5_mdev_uninit(dev); + mlx5_adev_idx_free(dev->priv.adev_idx); + mlx5_devlink_free(devlink); +} + +#define mlx5_pci_trace(dev, fmt, ...) ({ \ + struct mlx5_core_dev *__dev = (dev); \ + mlx5_core_info(__dev, "%s Device state = %d health sensors: %d pci_status: %d. " fmt, \ + __func__, __dev->state, mlx5_health_check_fatal_sensors(__dev), \ + __dev->pci_status, ##__VA_ARGS__); \ +}) + +static const char *result2str(enum pci_ers_result result) +{ + return result == PCI_ERS_RESULT_NEED_RESET ? "need reset" : + result == PCI_ERS_RESULT_DISCONNECT ? "disconnect" : + result == PCI_ERS_RESULT_RECOVERED ? "recovered" : + "unknown"; +} + +static pci_ers_result_t mlx5_pci_err_detected(struct pci_dev *pdev, + pci_channel_state_t state) +{ + struct mlx5_core_dev *dev = pci_get_drvdata(pdev); + enum pci_ers_result res; + + mlx5_pci_trace(dev, "Enter, pci channel state = %d\n", state); + + mlx5_enter_error_state(dev, false); + mlx5_error_sw_reset(dev); + mlx5_unload_one(dev, false); + mlx5_drain_health_wq(dev); + mlx5_pci_disable_device(dev); + + res = state == pci_channel_io_perm_failure ? + PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_NEED_RESET; + + mlx5_core_info(dev, "%s Device state = %d pci_status: %d. Exit, result = %d, %s\n", + __func__, dev->state, dev->pci_status, res, result2str(res)); + return res; +} + +/* wait for the device to show vital signs by waiting + * for the health counter to start counting. + */ +static int wait_vital(struct pci_dev *pdev) +{ + struct mlx5_core_dev *dev = pci_get_drvdata(pdev); + struct mlx5_core_health *health = &dev->priv.health; + const int niter = 100; + u32 last_count = 0; + u32 count; + int i; + + for (i = 0; i < niter; i++) { + count = ioread32be(health->health_counter); + if (count && count != 0xffffffff) { + if (last_count && last_count != count) { + mlx5_core_info(dev, + "wait vital counter value 0x%x after %d iterations\n", + count, i); + return 0; + } + last_count = count; + } + msleep(50); + } + + return -ETIMEDOUT; +} + +static pci_ers_result_t mlx5_pci_slot_reset(struct pci_dev *pdev) +{ + enum pci_ers_result res = PCI_ERS_RESULT_DISCONNECT; + struct mlx5_core_dev *dev = pci_get_drvdata(pdev); + int err; + + mlx5_core_info(dev, "%s Device state = %d pci_status: %d. Enter\n", + __func__, dev->state, dev->pci_status); + + err = mlx5_pci_enable_device(dev); + if (err) { + mlx5_core_err(dev, "%s: mlx5_pci_enable_device failed with error code: %d\n", + __func__, err); + goto out; + } + + pci_set_master(pdev); + pci_restore_state(pdev); + pci_save_state(pdev); + + err = wait_vital(pdev); + if (err) { + mlx5_core_err(dev, "%s: wait vital failed with error code: %d\n", + __func__, err); + goto out; + } + + res = PCI_ERS_RESULT_RECOVERED; +out: + mlx5_core_info(dev, "%s Device state = %d pci_status: %d. Exit, err = %d, result = %d, %s\n", + __func__, dev->state, dev->pci_status, err, res, result2str(res)); + return res; +} + +static void mlx5_pci_resume(struct pci_dev *pdev) +{ + struct mlx5_core_dev *dev = pci_get_drvdata(pdev); + int err; + + mlx5_pci_trace(dev, "Enter, loading driver..\n"); + + err = mlx5_load_one(dev, false); + + if (!err) + devlink_health_reporter_state_update(dev->priv.health.fw_fatal_reporter, + DEVLINK_HEALTH_REPORTER_STATE_HEALTHY); + + mlx5_pci_trace(dev, "Done, err = %d, device %s\n", err, + !err ? "recovered" : "Failed"); +} + +static const struct pci_error_handlers mlx5_err_handler = { + .error_detected = mlx5_pci_err_detected, + .slot_reset = mlx5_pci_slot_reset, + .resume = mlx5_pci_resume +}; + +static int mlx5_try_fast_unload(struct mlx5_core_dev *dev) +{ + bool fast_teardown = false, force_teardown = false; + int ret = 1; + + fast_teardown = MLX5_CAP_GEN(dev, fast_teardown); + force_teardown = MLX5_CAP_GEN(dev, force_teardown); + + mlx5_core_dbg(dev, "force teardown firmware support=%d\n", force_teardown); + mlx5_core_dbg(dev, "fast teardown firmware support=%d\n", fast_teardown); + + if (!fast_teardown && !force_teardown) + return -EOPNOTSUPP; + + if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) { + mlx5_core_dbg(dev, "Device in internal error state, giving up\n"); + return -EAGAIN; + } + + /* Panic tear down fw command will stop the PCI bus communication + * with the HCA, so the health poll is no longer needed. + */ + mlx5_drain_health_wq(dev); + mlx5_stop_health_poll(dev, false); + + ret = mlx5_cmd_fast_teardown_hca(dev); + if (!ret) + goto succeed; + + ret = mlx5_cmd_force_teardown_hca(dev); + if (!ret) + goto succeed; + + mlx5_core_dbg(dev, "Firmware couldn't do fast unload error: %d\n", ret); + mlx5_start_health_poll(dev); + return ret; + +succeed: + mlx5_enter_error_state(dev, true); + + /* Some platforms requiring freeing the IRQ's in the shutdown + * flow. If they aren't freed they can't be allocated after + * kexec. There is no need to cleanup the mlx5_core software + * contexts. + */ + mlx5_core_eq_free_irqs(dev); + + return 0; +} + +static void shutdown(struct pci_dev *pdev) +{ + struct mlx5_core_dev *dev = pci_get_drvdata(pdev); + int err; + + mlx5_core_info(dev, "Shutdown was called\n"); + set_bit(MLX5_BREAK_FW_WAIT, &dev->intf_state); + err = mlx5_try_fast_unload(dev); + if (err) + mlx5_unload_one(dev, false); + mlx5_pci_disable_device(dev); +} + +static int mlx5_suspend(struct pci_dev *pdev, pm_message_t state) +{ + struct mlx5_core_dev *dev = pci_get_drvdata(pdev); + + mlx5_unload_one(dev, true); + + return 0; +} + +static int mlx5_resume(struct pci_dev *pdev) +{ + struct mlx5_core_dev *dev = pci_get_drvdata(pdev); + + return mlx5_load_one(dev, false); +} + +static const struct pci_device_id mlx5_core_pci_table[] = { + { PCI_VDEVICE(MELLANOX, PCI_DEVICE_ID_MELLANOX_CONNECTIB) }, + { PCI_VDEVICE(MELLANOX, 0x1012), MLX5_PCI_DEV_IS_VF}, /* Connect-IB VF */ + { PCI_VDEVICE(MELLANOX, PCI_DEVICE_ID_MELLANOX_CONNECTX4) }, + { PCI_VDEVICE(MELLANOX, 0x1014), MLX5_PCI_DEV_IS_VF}, /* ConnectX-4 VF */ + { PCI_VDEVICE(MELLANOX, PCI_DEVICE_ID_MELLANOX_CONNECTX4_LX) }, + { PCI_VDEVICE(MELLANOX, 0x1016), MLX5_PCI_DEV_IS_VF}, /* ConnectX-4LX VF */ + { PCI_VDEVICE(MELLANOX, 0x1017) }, /* ConnectX-5, PCIe 3.0 */ + { PCI_VDEVICE(MELLANOX, 0x1018), MLX5_PCI_DEV_IS_VF}, /* ConnectX-5 VF */ + { PCI_VDEVICE(MELLANOX, 0x1019) }, /* ConnectX-5 Ex */ + { PCI_VDEVICE(MELLANOX, 0x101a), MLX5_PCI_DEV_IS_VF}, /* ConnectX-5 Ex VF */ + { PCI_VDEVICE(MELLANOX, 0x101b) }, /* ConnectX-6 */ + { PCI_VDEVICE(MELLANOX, 0x101c), MLX5_PCI_DEV_IS_VF}, /* ConnectX-6 VF */ + { PCI_VDEVICE(MELLANOX, 0x101d) }, /* ConnectX-6 Dx */ + { PCI_VDEVICE(MELLANOX, 0x101e), MLX5_PCI_DEV_IS_VF}, /* ConnectX Family mlx5Gen Virtual Function */ + { PCI_VDEVICE(MELLANOX, 0x101f) }, /* ConnectX-6 LX */ + { PCI_VDEVICE(MELLANOX, 0x1021) }, /* ConnectX-7 */ + { PCI_VDEVICE(MELLANOX, 0x1023) }, /* ConnectX-8 */ + { PCI_VDEVICE(MELLANOX, 0xa2d2) }, /* BlueField integrated ConnectX-5 network controller */ + { PCI_VDEVICE(MELLANOX, 0xa2d3), MLX5_PCI_DEV_IS_VF}, /* BlueField integrated ConnectX-5 network controller VF */ + { PCI_VDEVICE(MELLANOX, 0xa2d6) }, /* BlueField-2 integrated ConnectX-6 Dx network controller */ + { PCI_VDEVICE(MELLANOX, 0xa2dc) }, /* BlueField-3 integrated ConnectX-7 network controller */ + { PCI_VDEVICE(MELLANOX, 0xa2df) }, /* BlueField-4 integrated ConnectX-8 network controller */ + { 0, } +}; + +MODULE_DEVICE_TABLE(pci, mlx5_core_pci_table); + +void mlx5_disable_device(struct mlx5_core_dev *dev) +{ + mlx5_error_sw_reset(dev); + mlx5_unload_one_devl_locked(dev, false); +} + +int mlx5_recover_device(struct mlx5_core_dev *dev) +{ + if (!mlx5_core_is_sf(dev)) { + mlx5_pci_disable_device(dev); + if (mlx5_pci_slot_reset(dev->pdev) != PCI_ERS_RESULT_RECOVERED) + return -EIO; + } + + return mlx5_load_one_devl_locked(dev, true); +} + +static struct pci_driver mlx5_core_driver = { + .name = KBUILD_MODNAME, + .id_table = mlx5_core_pci_table, + .probe = probe_one, + .remove = remove_one, + .suspend = mlx5_suspend, + .resume = mlx5_resume, + .shutdown = shutdown, + .err_handler = &mlx5_err_handler, + .sriov_configure = mlx5_core_sriov_configure, + .sriov_get_vf_total_msix = mlx5_sriov_get_vf_total_msix, + .sriov_set_msix_vec_count = mlx5_core_sriov_set_msix_vec_count, +}; + +/** + * mlx5_vf_get_core_dev - Get the mlx5 core device from a given VF PCI device if + * mlx5_core is its driver. + * @pdev: The associated PCI device. + * + * Upon return the interface state lock stay held to let caller uses it safely. + * Caller must ensure to use the returned mlx5 device for a narrow window + * and put it back with mlx5_vf_put_core_dev() immediately once usage was over. + * + * Return: Pointer to the associated mlx5_core_dev or NULL. + */ +struct mlx5_core_dev *mlx5_vf_get_core_dev(struct pci_dev *pdev) +{ + struct mlx5_core_dev *mdev; + + mdev = pci_iov_get_pf_drvdata(pdev, &mlx5_core_driver); + if (IS_ERR(mdev)) + return NULL; + + mutex_lock(&mdev->intf_state_mutex); + if (!test_bit(MLX5_INTERFACE_STATE_UP, &mdev->intf_state)) { + mutex_unlock(&mdev->intf_state_mutex); + return NULL; + } + + return mdev; +} +EXPORT_SYMBOL(mlx5_vf_get_core_dev); + +/** + * mlx5_vf_put_core_dev - Put the mlx5 core device back. + * @mdev: The mlx5 core device. + * + * Upon return the interface state lock is unlocked and caller should not + * access the mdev any more. + */ +void mlx5_vf_put_core_dev(struct mlx5_core_dev *mdev) +{ + mutex_unlock(&mdev->intf_state_mutex); +} +EXPORT_SYMBOL(mlx5_vf_put_core_dev); + +static void mlx5_core_verify_params(void) +{ + if (prof_sel >= ARRAY_SIZE(profile)) { + pr_warn("mlx5_core: WARNING: Invalid module parameter prof_sel %d, valid range 0-%zu, changing back to default(%d)\n", + prof_sel, + ARRAY_SIZE(profile) - 1, + MLX5_DEFAULT_PROF); + prof_sel = MLX5_DEFAULT_PROF; + } +} + +static int __init mlx5_init(void) +{ + int err; + + WARN_ONCE(strcmp(MLX5_ADEV_NAME, KBUILD_MODNAME), + "mlx5_core name not in sync with kernel module name"); + + get_random_bytes(&sw_owner_id, sizeof(sw_owner_id)); + + mlx5_core_verify_params(); + mlx5_register_debugfs(); + + err = mlx5e_init(); + if (err) + goto err_debug; + + err = mlx5_sf_driver_register(); + if (err) + goto err_sf; + + err = pci_register_driver(&mlx5_core_driver); + if (err) + goto err_pci; + + return 0; + +err_pci: + mlx5_sf_driver_unregister(); +err_sf: + mlx5e_cleanup(); +err_debug: + mlx5_unregister_debugfs(); + return err; +} + +static void __exit mlx5_cleanup(void) +{ + pci_unregister_driver(&mlx5_core_driver); + mlx5_sf_driver_unregister(); + mlx5e_cleanup(); + mlx5_unregister_debugfs(); +} + +module_init(mlx5_init); +module_exit(mlx5_cleanup); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mcg.c b/drivers/net/ethernet/mellanox/mlx5/core/mcg.c new file mode 100644 index 000000000..495cca58d --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/mcg.c @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/kernel.h> +#include <linux/mlx5/driver.h> +#include <rdma/ib_verbs.h> +#include "mlx5_core.h" + +int mlx5_core_attach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid, u32 qpn) +{ + u32 in[MLX5_ST_SZ_DW(attach_to_mcg_in)] = {}; + void *gid; + + MLX5_SET(attach_to_mcg_in, in, opcode, MLX5_CMD_OP_ATTACH_TO_MCG); + MLX5_SET(attach_to_mcg_in, in, qpn, qpn); + gid = MLX5_ADDR_OF(attach_to_mcg_in, in, multicast_gid); + memcpy(gid, mgid, sizeof(*mgid)); + return mlx5_cmd_exec_in(dev, attach_to_mcg, in); +} +EXPORT_SYMBOL(mlx5_core_attach_mcg); + +int mlx5_core_detach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid, u32 qpn) +{ + u32 in[MLX5_ST_SZ_DW(detach_from_mcg_in)] = {}; + void *gid; + + MLX5_SET(detach_from_mcg_in, in, opcode, MLX5_CMD_OP_DETACH_FROM_MCG); + MLX5_SET(detach_from_mcg_in, in, qpn, qpn); + gid = MLX5_ADDR_OF(detach_from_mcg_in, in, multicast_gid); + memcpy(gid, mgid, sizeof(*mgid)); + return mlx5_cmd_exec_in(dev, detach_from_mcg, in); +} +EXPORT_SYMBOL(mlx5_core_detach_mcg); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h new file mode 100644 index 000000000..0b560e97a --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -0,0 +1,330 @@ +/* + * Copyright (c) 2013-2015, Mellanox Technologies, Ltd. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __MLX5_CORE_H__ +#define __MLX5_CORE_H__ + +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/if_link.h> +#include <linux/firmware.h> +#include <linux/mlx5/cq.h> +#include <linux/mlx5/fs.h> +#include <linux/mlx5/driver.h> + +extern uint mlx5_core_debug_mask; + +#define mlx5_core_dbg(__dev, format, ...) \ + dev_dbg((__dev)->device, "%s:%d:(pid %d): " format, \ + __func__, __LINE__, current->pid, \ + ##__VA_ARGS__) + +#define mlx5_core_dbg_once(__dev, format, ...) \ + dev_dbg_once((__dev)->device, \ + "%s:%d:(pid %d): " format, \ + __func__, __LINE__, current->pid, \ + ##__VA_ARGS__) + +#define mlx5_core_dbg_mask(__dev, mask, format, ...) \ +do { \ + if ((mask) & mlx5_core_debug_mask) \ + mlx5_core_dbg(__dev, format, ##__VA_ARGS__); \ +} while (0) + +#define mlx5_core_err(__dev, format, ...) \ + dev_err((__dev)->device, "%s:%d:(pid %d): " format, \ + __func__, __LINE__, current->pid, \ + ##__VA_ARGS__) + +#define mlx5_core_err_rl(__dev, format, ...) \ + dev_err_ratelimited((__dev)->device, \ + "%s:%d:(pid %d): " format, \ + __func__, __LINE__, current->pid, \ + ##__VA_ARGS__) + +#define mlx5_core_warn(__dev, format, ...) \ + dev_warn((__dev)->device, "%s:%d:(pid %d): " format, \ + __func__, __LINE__, current->pid, \ + ##__VA_ARGS__) + +#define mlx5_core_warn_once(__dev, format, ...) \ + dev_warn_once((__dev)->device, "%s:%d:(pid %d): " format, \ + __func__, __LINE__, current->pid, \ + ##__VA_ARGS__) + +#define mlx5_core_warn_rl(__dev, format, ...) \ + dev_warn_ratelimited((__dev)->device, \ + "%s:%d:(pid %d): " format, \ + __func__, __LINE__, current->pid, \ + ##__VA_ARGS__) + +#define mlx5_core_info(__dev, format, ...) \ + dev_info((__dev)->device, format, ##__VA_ARGS__) + +#define mlx5_core_info_rl(__dev, format, ...) \ + dev_info_ratelimited((__dev)->device, \ + "%s:%d:(pid %d): " format, \ + __func__, __LINE__, current->pid, \ + ##__VA_ARGS__) + +static inline void mlx5_printk(struct mlx5_core_dev *dev, int level, const char *format, ...) +{ + struct device *device = dev->device; + struct va_format vaf; + va_list args; + + if (WARN_ONCE(level < LOGLEVEL_EMERG || level > LOGLEVEL_DEBUG, + "Level %d is out of range, set to default level\n", level)) + level = LOGLEVEL_DEFAULT; + + va_start(args, format); + vaf.fmt = format; + vaf.va = &args; + + dev_printk_emit(level, device, "%s %s: %pV", dev_driver_string(device), dev_name(device), + &vaf); + va_end(args); +} + +#define mlx5_log(__dev, level, format, ...) \ + mlx5_printk(__dev, level, "%s:%d:(pid %d): " format, \ + __func__, __LINE__, current->pid, \ + ##__VA_ARGS__) + +static inline struct device *mlx5_core_dma_dev(struct mlx5_core_dev *dev) +{ + return &dev->pdev->dev; +} + +enum { + MLX5_CMD_DATA, /* print command payload only */ + MLX5_CMD_TIME, /* print command execution time */ +}; + +enum { + MLX5_DRIVER_STATUS_ABORTED = 0xfe, + MLX5_DRIVER_SYND = 0xbadd00de, +}; + +enum mlx5_semaphore_space_address { + MLX5_SEMAPHORE_SPACE_DOMAIN = 0xA, + MLX5_SEMAPHORE_SW_RESET = 0x20, +}; + +#define MLX5_DEFAULT_PROF 2 + +static inline int mlx5_flexible_inlen(struct mlx5_core_dev *dev, size_t fixed, + size_t item_size, size_t num_items, + const char *func, int line) +{ + int inlen; + + if (fixed > INT_MAX || item_size > INT_MAX || num_items > INT_MAX) { + mlx5_core_err(dev, "%s: %s:%d: input values too big: %zu + %zu * %zu\n", + __func__, func, line, fixed, item_size, num_items); + return -ENOMEM; + } + + if (check_mul_overflow((int)item_size, (int)num_items, &inlen)) { + mlx5_core_err(dev, "%s: %s:%d: multiplication overflow: %zu + %zu * %zu\n", + __func__, func, line, fixed, item_size, num_items); + return -ENOMEM; + } + + if (check_add_overflow((int)fixed, inlen, &inlen)) { + mlx5_core_err(dev, "%s: %s:%d: addition overflow: %zu + %zu * %zu\n", + __func__, func, line, fixed, item_size, num_items); + return -ENOMEM; + } + + return inlen; +} + +#define MLX5_FLEXIBLE_INLEN(dev, fixed, item_size, num_items) \ + mlx5_flexible_inlen(dev, fixed, item_size, num_items, __func__, __LINE__) + +int mlx5_query_hca_caps(struct mlx5_core_dev *dev); +int mlx5_query_board_id(struct mlx5_core_dev *dev); +int mlx5_cmd_init(struct mlx5_core_dev *dev); +void mlx5_cmd_cleanup(struct mlx5_core_dev *dev); +void mlx5_cmd_set_state(struct mlx5_core_dev *dev, + enum mlx5_cmdif_state cmdif_state); +int mlx5_cmd_init_hca(struct mlx5_core_dev *dev, uint32_t *sw_owner_id); +int mlx5_cmd_teardown_hca(struct mlx5_core_dev *dev); +int mlx5_cmd_force_teardown_hca(struct mlx5_core_dev *dev); +int mlx5_cmd_fast_teardown_hca(struct mlx5_core_dev *dev); +void mlx5_enter_error_state(struct mlx5_core_dev *dev, bool force); +void mlx5_error_sw_reset(struct mlx5_core_dev *dev); +u32 mlx5_health_check_fatal_sensors(struct mlx5_core_dev *dev); +int mlx5_health_wait_pci_up(struct mlx5_core_dev *dev); +void mlx5_disable_device(struct mlx5_core_dev *dev); +int mlx5_recover_device(struct mlx5_core_dev *dev); +int mlx5_sriov_init(struct mlx5_core_dev *dev); +void mlx5_sriov_cleanup(struct mlx5_core_dev *dev); +int mlx5_sriov_attach(struct mlx5_core_dev *dev); +void mlx5_sriov_detach(struct mlx5_core_dev *dev); +int mlx5_core_sriov_configure(struct pci_dev *dev, int num_vfs); +void mlx5_sriov_disable(struct pci_dev *pdev); +int mlx5_core_sriov_set_msix_vec_count(struct pci_dev *vf, int msix_vec_count); +int mlx5_core_enable_hca(struct mlx5_core_dev *dev, u16 func_id); +int mlx5_core_disable_hca(struct mlx5_core_dev *dev, u16 func_id); +int mlx5_create_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy, + void *context, u32 *element_id); +int mlx5_modify_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy, + void *context, u32 element_id, + u32 modify_bitmask); +int mlx5_destroy_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy, + u32 element_id); +int mlx5_wait_for_pages(struct mlx5_core_dev *dev, int *pages); + +void mlx5_cmd_flush(struct mlx5_core_dev *dev); +void mlx5_cq_debugfs_init(struct mlx5_core_dev *dev); +void mlx5_cq_debugfs_cleanup(struct mlx5_core_dev *dev); + +int mlx5_query_pcam_reg(struct mlx5_core_dev *dev, u32 *pcam, u8 feature_group, + u8 access_reg_group); +int mlx5_query_mcam_reg(struct mlx5_core_dev *dev, u32 *mcap, u8 feature_group, + u8 access_reg_group); +int mlx5_query_qcam_reg(struct mlx5_core_dev *mdev, u32 *qcam, + u8 feature_group, u8 access_reg_group); + +void mlx5_lag_add_netdev(struct mlx5_core_dev *dev, struct net_device *netdev); +void mlx5_lag_remove_netdev(struct mlx5_core_dev *dev, struct net_device *netdev); +void mlx5_lag_add_mdev(struct mlx5_core_dev *dev); +void mlx5_lag_remove_mdev(struct mlx5_core_dev *dev); +void mlx5_lag_disable_change(struct mlx5_core_dev *dev); +void mlx5_lag_enable_change(struct mlx5_core_dev *dev); + +int mlx5_events_init(struct mlx5_core_dev *dev); +void mlx5_events_cleanup(struct mlx5_core_dev *dev); +void mlx5_events_start(struct mlx5_core_dev *dev); +void mlx5_events_stop(struct mlx5_core_dev *dev); + +int mlx5_adev_idx_alloc(void); +void mlx5_adev_idx_free(int idx); +void mlx5_adev_cleanup(struct mlx5_core_dev *dev); +int mlx5_adev_init(struct mlx5_core_dev *dev); + +int mlx5_attach_device(struct mlx5_core_dev *dev); +void mlx5_detach_device(struct mlx5_core_dev *dev, bool suspend); +int mlx5_register_device(struct mlx5_core_dev *dev); +void mlx5_unregister_device(struct mlx5_core_dev *dev); +struct mlx5_core_dev *mlx5_get_next_phys_dev_lag(struct mlx5_core_dev *dev); +void mlx5_dev_list_lock(void); +void mlx5_dev_list_unlock(void); +int mlx5_dev_list_trylock(void); + +int mlx5_query_mtpps(struct mlx5_core_dev *dev, u32 *mtpps, u32 mtpps_size); +int mlx5_set_mtpps(struct mlx5_core_dev *mdev, u32 *mtpps, u32 mtpps_size); +int mlx5_query_mtppse(struct mlx5_core_dev *mdev, u8 pin, u8 *arm, u8 *mode); +int mlx5_set_mtppse(struct mlx5_core_dev *mdev, u8 pin, u8 arm, u8 mode); + +struct mlx5_dm *mlx5_dm_create(struct mlx5_core_dev *dev); +void mlx5_dm_cleanup(struct mlx5_core_dev *dev); + +#define MLX5_PPS_CAP(mdev) (MLX5_CAP_GEN((mdev), pps) && \ + MLX5_CAP_GEN((mdev), pps_modify) && \ + MLX5_CAP_MCAM_FEATURE((mdev), mtpps_fs) && \ + MLX5_CAP_MCAM_FEATURE((mdev), mtpps_enh_out_per_adj)) + +int mlx5_firmware_flash(struct mlx5_core_dev *dev, const struct firmware *fw, + struct netlink_ext_ack *extack); +int mlx5_fw_version_query(struct mlx5_core_dev *dev, + u32 *running_ver, u32 *stored_ver); + +#ifdef CONFIG_MLX5_CORE_EN +int mlx5e_init(void); +void mlx5e_cleanup(void); +#else +static inline int mlx5e_init(void){ return 0; } +static inline void mlx5e_cleanup(void){} +#endif + +static inline bool mlx5_sriov_is_enabled(struct mlx5_core_dev *dev) +{ + return pci_num_vf(dev->pdev) ? true : false; +} + +int mlx5_rescan_drivers_locked(struct mlx5_core_dev *dev); +static inline int mlx5_rescan_drivers(struct mlx5_core_dev *dev) +{ + int ret; + + mlx5_dev_list_lock(); + ret = mlx5_rescan_drivers_locked(dev); + mlx5_dev_list_unlock(); + return ret; +} + +void mlx5_lag_update(struct mlx5_core_dev *dev); + +enum { + MLX5_NIC_IFC_FULL = 0, + MLX5_NIC_IFC_DISABLED = 1, + MLX5_NIC_IFC_NO_DRAM_NIC = 2, + MLX5_NIC_IFC_SW_RESET = 7 +}; + +u8 mlx5_get_nic_state(struct mlx5_core_dev *dev); +void mlx5_set_nic_state(struct mlx5_core_dev *dev, u8 state); + +static inline bool mlx5_core_is_sf(const struct mlx5_core_dev *dev) +{ + return dev->coredev_type == MLX5_COREDEV_SF; +} + +int mlx5_mdev_init(struct mlx5_core_dev *dev, int profile_idx); +void mlx5_mdev_uninit(struct mlx5_core_dev *dev); +int mlx5_init_one(struct mlx5_core_dev *dev); +void mlx5_uninit_one(struct mlx5_core_dev *dev); +void mlx5_unload_one(struct mlx5_core_dev *dev, bool suspend); +void mlx5_unload_one_devl_locked(struct mlx5_core_dev *dev, bool suspend); +int mlx5_load_one(struct mlx5_core_dev *dev, bool recovery); +int mlx5_load_one_devl_locked(struct mlx5_core_dev *dev, bool recovery); + +int mlx5_vport_get_other_func_cap(struct mlx5_core_dev *dev, u16 function_id, void *out); + +void mlx5_events_work_enqueue(struct mlx5_core_dev *dev, struct work_struct *work); +static inline u32 mlx5_sriov_get_vf_total_msix(struct pci_dev *pdev) +{ + struct mlx5_core_dev *dev = pci_get_drvdata(pdev); + + return MLX5_CAP_GEN_MAX(dev, num_total_dynamic_vf_msix); +} + +bool mlx5_eth_supported(struct mlx5_core_dev *dev); +bool mlx5_rdma_supported(struct mlx5_core_dev *dev); +bool mlx5_vnet_supported(struct mlx5_core_dev *dev); +bool mlx5_same_hw_devs(struct mlx5_core_dev *dev, struct mlx5_core_dev *peer_dev); + +#endif /* __MLX5_CORE_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h new file mode 100644 index 000000000..2e728e4e8 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h @@ -0,0 +1,62 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2021 Mellanox Technologies. */ + +#ifndef __MLX5_IRQ_H__ +#define __MLX5_IRQ_H__ + +#include <linux/mlx5/driver.h> + +#define MLX5_COMP_EQS_PER_SF 8 + +struct mlx5_irq; + +int mlx5_irq_table_init(struct mlx5_core_dev *dev); +void mlx5_irq_table_cleanup(struct mlx5_core_dev *dev); +int mlx5_irq_table_create(struct mlx5_core_dev *dev); +void mlx5_irq_table_destroy(struct mlx5_core_dev *dev); +void mlx5_irq_table_free_irqs(struct mlx5_core_dev *dev); +int mlx5_irq_table_get_num_comp(struct mlx5_irq_table *table); +int mlx5_irq_table_get_sfs_vec(struct mlx5_irq_table *table); +struct mlx5_irq_table *mlx5_irq_table_get(struct mlx5_core_dev *dev); + +int mlx5_set_msix_vec_count(struct mlx5_core_dev *dev, int devfn, + int msix_vec_count); +int mlx5_get_default_msix_vec_count(struct mlx5_core_dev *dev, int num_vfs); + +struct mlx5_irq *mlx5_ctrl_irq_request(struct mlx5_core_dev *dev); +void mlx5_ctrl_irq_release(struct mlx5_irq *ctrl_irq); +struct mlx5_irq *mlx5_irq_request(struct mlx5_core_dev *dev, u16 vecidx, + struct cpumask *affinity); +int mlx5_irqs_request_vectors(struct mlx5_core_dev *dev, u16 *cpus, int nirqs, + struct mlx5_irq **irqs); +void mlx5_irqs_release_vectors(struct mlx5_irq **irqs, int nirqs); +int mlx5_irq_attach_nb(struct mlx5_irq *irq, struct notifier_block *nb); +int mlx5_irq_detach_nb(struct mlx5_irq *irq, struct notifier_block *nb); +struct cpumask *mlx5_irq_get_affinity_mask(struct mlx5_irq *irq); +int mlx5_irq_get_index(struct mlx5_irq *irq); + +struct mlx5_irq_pool; +#ifdef CONFIG_MLX5_SF +int mlx5_irq_affinity_irqs_request_auto(struct mlx5_core_dev *dev, int nirqs, + struct mlx5_irq **irqs); +struct mlx5_irq *mlx5_irq_affinity_request(struct mlx5_irq_pool *pool, + const struct cpumask *req_mask); +void mlx5_irq_affinity_irqs_release(struct mlx5_core_dev *dev, struct mlx5_irq **irqs, + int num_irqs); +#else +static inline int mlx5_irq_affinity_irqs_request_auto(struct mlx5_core_dev *dev, int nirqs, + struct mlx5_irq **irqs) +{ + return -EOPNOTSUPP; +} + +static inline struct mlx5_irq * +mlx5_irq_affinity_request(struct mlx5_irq_pool *pool, const struct cpumask *req_mask) +{ + return ERR_PTR(-EOPNOTSUPP); +} + +static inline void mlx5_irq_affinity_irqs_release(struct mlx5_core_dev *dev, + struct mlx5_irq **irqs, int num_irqs) {} +#endif +#endif /* __MLX5_IRQ_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mr.c b/drivers/net/ethernet/mellanox/mlx5/core/mr.c new file mode 100644 index 000000000..9d735c343 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/mr.c @@ -0,0 +1,124 @@ +/* + * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/kernel.h> +#include <linux/mlx5/driver.h> +#include "mlx5_core.h" + +int mlx5_core_create_mkey(struct mlx5_core_dev *dev, u32 *mkey, u32 *in, + int inlen) +{ + u32 lout[MLX5_ST_SZ_DW(create_mkey_out)] = {}; + u32 mkey_index; + int err; + + MLX5_SET(create_mkey_in, in, opcode, MLX5_CMD_OP_CREATE_MKEY); + + err = mlx5_cmd_exec(dev, in, inlen, lout, sizeof(lout)); + if (err) + return err; + + mkey_index = MLX5_GET(create_mkey_out, lout, mkey_index); + *mkey = MLX5_GET(create_mkey_in, in, memory_key_mkey_entry.mkey_7_0) | + mlx5_idx_to_mkey(mkey_index); + + mlx5_core_dbg(dev, "out 0x%x, mkey 0x%x\n", mkey_index, *mkey); + return 0; +} +EXPORT_SYMBOL(mlx5_core_create_mkey); + +int mlx5_core_destroy_mkey(struct mlx5_core_dev *dev, u32 mkey) +{ + u32 in[MLX5_ST_SZ_DW(destroy_mkey_in)] = {}; + + MLX5_SET(destroy_mkey_in, in, opcode, MLX5_CMD_OP_DESTROY_MKEY); + MLX5_SET(destroy_mkey_in, in, mkey_index, mlx5_mkey_to_idx(mkey)); + return mlx5_cmd_exec_in(dev, destroy_mkey, in); +} +EXPORT_SYMBOL(mlx5_core_destroy_mkey); + +int mlx5_core_query_mkey(struct mlx5_core_dev *dev, u32 mkey, u32 *out, + int outlen) +{ + u32 in[MLX5_ST_SZ_DW(query_mkey_in)] = {}; + + memset(out, 0, outlen); + MLX5_SET(query_mkey_in, in, opcode, MLX5_CMD_OP_QUERY_MKEY); + MLX5_SET(query_mkey_in, in, mkey_index, mlx5_mkey_to_idx(mkey)); + return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen); +} +EXPORT_SYMBOL(mlx5_core_query_mkey); + +static inline u32 mlx5_get_psv(u32 *out, int psv_index) +{ + switch (psv_index) { + case 1: return MLX5_GET(create_psv_out, out, psv1_index); + case 2: return MLX5_GET(create_psv_out, out, psv2_index); + case 3: return MLX5_GET(create_psv_out, out, psv3_index); + default: return MLX5_GET(create_psv_out, out, psv0_index); + } +} + +int mlx5_core_create_psv(struct mlx5_core_dev *dev, u32 pdn, + int npsvs, u32 *sig_index) +{ + u32 out[MLX5_ST_SZ_DW(create_psv_out)] = {}; + u32 in[MLX5_ST_SZ_DW(create_psv_in)] = {}; + int i, err; + + if (npsvs > MLX5_MAX_PSVS) + return -EINVAL; + + MLX5_SET(create_psv_in, in, opcode, MLX5_CMD_OP_CREATE_PSV); + MLX5_SET(create_psv_in, in, pd, pdn); + MLX5_SET(create_psv_in, in, num_psv, npsvs); + + err = mlx5_cmd_exec_inout(dev, create_psv, in, out); + if (err) + return err; + + for (i = 0; i < npsvs; i++) + sig_index[i] = mlx5_get_psv(out, i); + + return err; +} +EXPORT_SYMBOL(mlx5_core_create_psv); + +int mlx5_core_destroy_psv(struct mlx5_core_dev *dev, int psv_num) +{ + u32 in[MLX5_ST_SZ_DW(destroy_psv_in)] = {}; + + MLX5_SET(destroy_psv_in, in, opcode, MLX5_CMD_OP_DESTROY_PSV); + MLX5_SET(destroy_psv_in, in, psvn, psv_num); + return mlx5_cmd_exec_in(dev, destroy_psv, in); +} +EXPORT_SYMBOL(mlx5_core_destroy_psv); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c new file mode 100644 index 000000000..95dc67fb3 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c @@ -0,0 +1,796 @@ +/* + * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/highmem.h> +#include <linux/kernel.h> +#include <linux/delay.h> +#include <linux/mlx5/driver.h> +#include <linux/xarray.h> +#include "mlx5_core.h" +#include "lib/eq.h" +#include "lib/tout.h" + +enum { + MLX5_PAGES_CANT_GIVE = 0, + MLX5_PAGES_GIVE = 1, + MLX5_PAGES_TAKE = 2 +}; + +struct mlx5_pages_req { + struct mlx5_core_dev *dev; + u16 func_id; + u8 ec_function; + s32 npages; + struct work_struct work; + u8 release_all; +}; + +struct fw_page { + struct rb_node rb_node; + u64 addr; + struct page *page; + u32 function; + unsigned long bitmask; + struct list_head list; + unsigned int free_count; +}; + +enum { + MLX5_MAX_RECLAIM_TIME_MILI = 5000, + MLX5_NUM_4K_IN_PAGE = PAGE_SIZE / MLX5_ADAPTER_PAGE_SIZE, +}; + +static u32 get_function(u16 func_id, bool ec_function) +{ + return (u32)func_id | (ec_function << 16); +} + +static u16 func_id_to_type(struct mlx5_core_dev *dev, u16 func_id, bool ec_function) +{ + if (!func_id) + return mlx5_core_is_ecpf(dev) && !ec_function ? MLX5_HOST_PF : MLX5_PF; + + return func_id <= mlx5_core_max_vfs(dev) ? MLX5_VF : MLX5_SF; +} + +static u32 mlx5_get_ec_function(u32 function) +{ + return function >> 16; +} + +static u32 mlx5_get_func_id(u32 function) +{ + return function & 0xffff; +} + +static struct rb_root *page_root_per_function(struct mlx5_core_dev *dev, u32 function) +{ + struct rb_root *root; + int err; + + root = xa_load(&dev->priv.page_root_xa, function); + if (root) + return root; + + root = kzalloc(sizeof(*root), GFP_KERNEL); + if (!root) + return ERR_PTR(-ENOMEM); + + err = xa_insert(&dev->priv.page_root_xa, function, root, GFP_KERNEL); + if (err) { + kfree(root); + return ERR_PTR(err); + } + + *root = RB_ROOT; + + return root; +} + +static int insert_page(struct mlx5_core_dev *dev, u64 addr, struct page *page, u32 function) +{ + struct rb_node *parent = NULL; + struct rb_root *root; + struct rb_node **new; + struct fw_page *nfp; + struct fw_page *tfp; + int i; + + root = page_root_per_function(dev, function); + if (IS_ERR(root)) + return PTR_ERR(root); + + new = &root->rb_node; + + while (*new) { + parent = *new; + tfp = rb_entry(parent, struct fw_page, rb_node); + if (tfp->addr < addr) + new = &parent->rb_left; + else if (tfp->addr > addr) + new = &parent->rb_right; + else + return -EEXIST; + } + + nfp = kzalloc(sizeof(*nfp), GFP_KERNEL); + if (!nfp) + return -ENOMEM; + + nfp->addr = addr; + nfp->page = page; + nfp->function = function; + nfp->free_count = MLX5_NUM_4K_IN_PAGE; + for (i = 0; i < MLX5_NUM_4K_IN_PAGE; i++) + set_bit(i, &nfp->bitmask); + + rb_link_node(&nfp->rb_node, parent, new); + rb_insert_color(&nfp->rb_node, root); + list_add(&nfp->list, &dev->priv.free_list); + + return 0; +} + +static struct fw_page *find_fw_page(struct mlx5_core_dev *dev, u64 addr, + u32 function) +{ + struct fw_page *result = NULL; + struct rb_root *root; + struct rb_node *tmp; + struct fw_page *tfp; + + root = xa_load(&dev->priv.page_root_xa, function); + if (WARN_ON_ONCE(!root)) + return NULL; + + tmp = root->rb_node; + + while (tmp) { + tfp = rb_entry(tmp, struct fw_page, rb_node); + if (tfp->addr < addr) { + tmp = tmp->rb_left; + } else if (tfp->addr > addr) { + tmp = tmp->rb_right; + } else { + result = tfp; + break; + } + } + + return result; +} + +static int mlx5_cmd_query_pages(struct mlx5_core_dev *dev, u16 *func_id, + s32 *npages, int boot) +{ + u32 out[MLX5_ST_SZ_DW(query_pages_out)] = {}; + u32 in[MLX5_ST_SZ_DW(query_pages_in)] = {}; + int err; + + MLX5_SET(query_pages_in, in, opcode, MLX5_CMD_OP_QUERY_PAGES); + MLX5_SET(query_pages_in, in, op_mod, boot ? + MLX5_QUERY_PAGES_IN_OP_MOD_BOOT_PAGES : + MLX5_QUERY_PAGES_IN_OP_MOD_INIT_PAGES); + MLX5_SET(query_pages_in, in, embedded_cpu_function, mlx5_core_is_ecpf(dev)); + + err = mlx5_cmd_exec_inout(dev, query_pages, in, out); + if (err) + return err; + + *npages = MLX5_GET(query_pages_out, out, num_pages); + *func_id = MLX5_GET(query_pages_out, out, function_id); + + return err; +} + +static int alloc_4k(struct mlx5_core_dev *dev, u64 *addr, u32 function) +{ + struct fw_page *fp = NULL; + struct fw_page *iter; + unsigned n; + + list_for_each_entry(iter, &dev->priv.free_list, list) { + if (iter->function != function) + continue; + fp = iter; + } + + if (list_empty(&dev->priv.free_list) || !fp) + return -ENOMEM; + + n = find_first_bit(&fp->bitmask, 8 * sizeof(fp->bitmask)); + if (n >= MLX5_NUM_4K_IN_PAGE) { + mlx5_core_warn(dev, "alloc 4k bug: fw page = 0x%llx, n = %u, bitmask: %lu, max num of 4K pages: %d\n", + fp->addr, n, fp->bitmask, MLX5_NUM_4K_IN_PAGE); + return -ENOENT; + } + clear_bit(n, &fp->bitmask); + fp->free_count--; + if (!fp->free_count) + list_del(&fp->list); + + *addr = fp->addr + n * MLX5_ADAPTER_PAGE_SIZE; + + return 0; +} + +#define MLX5_U64_4K_PAGE_MASK ((~(u64)0U) << PAGE_SHIFT) + +static void free_fwp(struct mlx5_core_dev *dev, struct fw_page *fwp, + bool in_free_list) +{ + struct rb_root *root; + + root = xa_load(&dev->priv.page_root_xa, fwp->function); + if (WARN_ON_ONCE(!root)) + return; + + rb_erase(&fwp->rb_node, root); + if (in_free_list) + list_del(&fwp->list); + dma_unmap_page(mlx5_core_dma_dev(dev), fwp->addr & MLX5_U64_4K_PAGE_MASK, + PAGE_SIZE, DMA_BIDIRECTIONAL); + __free_page(fwp->page); + kfree(fwp); +} + +static void free_4k(struct mlx5_core_dev *dev, u64 addr, u32 function) +{ + struct fw_page *fwp; + int n; + + fwp = find_fw_page(dev, addr & MLX5_U64_4K_PAGE_MASK, function); + if (!fwp) { + mlx5_core_warn_rl(dev, "page not found\n"); + return; + } + n = (addr & ~MLX5_U64_4K_PAGE_MASK) >> MLX5_ADAPTER_PAGE_SHIFT; + fwp->free_count++; + set_bit(n, &fwp->bitmask); + if (fwp->free_count == MLX5_NUM_4K_IN_PAGE) + free_fwp(dev, fwp, fwp->free_count != 1); + else if (fwp->free_count == 1) + list_add(&fwp->list, &dev->priv.free_list); +} + +static int alloc_system_page(struct mlx5_core_dev *dev, u32 function) +{ + struct device *device = mlx5_core_dma_dev(dev); + int nid = dev_to_node(device); + struct page *page; + u64 zero_addr = 1; + u64 addr; + int err; + + page = alloc_pages_node(nid, GFP_HIGHUSER, 0); + if (!page) { + mlx5_core_warn(dev, "failed to allocate page\n"); + return -ENOMEM; + } +map: + addr = dma_map_page(device, page, 0, PAGE_SIZE, DMA_BIDIRECTIONAL); + if (dma_mapping_error(device, addr)) { + mlx5_core_warn(dev, "failed dma mapping page\n"); + err = -ENOMEM; + goto err_mapping; + } + + /* Firmware doesn't support page with physical address 0 */ + if (addr == 0) { + zero_addr = addr; + goto map; + } + + err = insert_page(dev, addr, page, function); + if (err) { + mlx5_core_err(dev, "failed to track allocated page\n"); + dma_unmap_page(device, addr, PAGE_SIZE, DMA_BIDIRECTIONAL); + } + +err_mapping: + if (err) + __free_page(page); + + if (zero_addr == 0) + dma_unmap_page(device, zero_addr, PAGE_SIZE, + DMA_BIDIRECTIONAL); + + return err; +} + +static void page_notify_fail(struct mlx5_core_dev *dev, u16 func_id, + bool ec_function) +{ + u32 in[MLX5_ST_SZ_DW(manage_pages_in)] = {}; + int err; + + MLX5_SET(manage_pages_in, in, opcode, MLX5_CMD_OP_MANAGE_PAGES); + MLX5_SET(manage_pages_in, in, op_mod, MLX5_PAGES_CANT_GIVE); + MLX5_SET(manage_pages_in, in, function_id, func_id); + MLX5_SET(manage_pages_in, in, embedded_cpu_function, ec_function); + + err = mlx5_cmd_exec_in(dev, manage_pages, in); + if (err) + mlx5_core_warn(dev, "page notify failed func_id(%d) err(%d)\n", + func_id, err); +} + +static int give_pages(struct mlx5_core_dev *dev, u16 func_id, int npages, + int event, bool ec_function) +{ + u32 function = get_function(func_id, ec_function); + u32 out[MLX5_ST_SZ_DW(manage_pages_out)] = {0}; + int inlen = MLX5_ST_SZ_BYTES(manage_pages_in); + int notify_fail = event; + u16 func_type; + u64 addr; + int err; + u32 *in; + int i; + + inlen += npages * MLX5_FLD_SZ_BYTES(manage_pages_in, pas[0]); + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) { + err = -ENOMEM; + mlx5_core_warn(dev, "vzalloc failed %d\n", inlen); + goto out_free; + } + + for (i = 0; i < npages; i++) { +retry: + err = alloc_4k(dev, &addr, function); + if (err) { + if (err == -ENOMEM) + err = alloc_system_page(dev, function); + if (err) { + dev->priv.fw_pages_alloc_failed += (npages - i); + goto out_4k; + } + + goto retry; + } + MLX5_ARRAY_SET64(manage_pages_in, in, pas, i, addr); + } + + MLX5_SET(manage_pages_in, in, opcode, MLX5_CMD_OP_MANAGE_PAGES); + MLX5_SET(manage_pages_in, in, op_mod, MLX5_PAGES_GIVE); + MLX5_SET(manage_pages_in, in, function_id, func_id); + MLX5_SET(manage_pages_in, in, input_num_entries, npages); + MLX5_SET(manage_pages_in, in, embedded_cpu_function, ec_function); + + err = mlx5_cmd_do(dev, in, inlen, out, sizeof(out)); + if (err == -EREMOTEIO) { + notify_fail = 0; + /* if triggered by FW and failed by FW ignore */ + if (event) { + err = 0; + goto out_dropped; + } + } + err = mlx5_cmd_check(dev, err, in, out); + if (err) { + mlx5_core_warn(dev, "func_id 0x%x, npages %d, err %d\n", + func_id, npages, err); + goto out_dropped; + } + + func_type = func_id_to_type(dev, func_id, ec_function); + dev->priv.page_counters[func_type] += npages; + dev->priv.fw_pages += npages; + + mlx5_core_dbg(dev, "npages %d, ec_function %d, func_id 0x%x, err %d\n", + npages, ec_function, func_id, err); + + kvfree(in); + return 0; + +out_dropped: + dev->priv.give_pages_dropped += npages; +out_4k: + for (i--; i >= 0; i--) + free_4k(dev, MLX5_GET64(manage_pages_in, in, pas[i]), function); +out_free: + kvfree(in); + if (notify_fail) + page_notify_fail(dev, func_id, ec_function); + return err; +} + +static void release_all_pages(struct mlx5_core_dev *dev, u16 func_id, + bool ec_function) +{ + u32 function = get_function(func_id, ec_function); + struct rb_root *root; + struct rb_node *p; + int npages = 0; + u16 func_type; + + root = xa_load(&dev->priv.page_root_xa, function); + if (WARN_ON_ONCE(!root)) + return; + + p = rb_first(root); + while (p) { + struct fw_page *fwp = rb_entry(p, struct fw_page, rb_node); + + p = rb_next(p); + npages += (MLX5_NUM_4K_IN_PAGE - fwp->free_count); + free_fwp(dev, fwp, fwp->free_count); + } + + func_type = func_id_to_type(dev, func_id, ec_function); + dev->priv.page_counters[func_type] -= npages; + dev->priv.fw_pages -= npages; + + mlx5_core_dbg(dev, "npages %d, ec_function %d, func_id 0x%x\n", + npages, ec_function, func_id); +} + +static u32 fwp_fill_manage_pages_out(struct fw_page *fwp, u32 *out, u32 index, + u32 npages) +{ + u32 pages_set = 0; + unsigned int n; + + for_each_clear_bit(n, &fwp->bitmask, MLX5_NUM_4K_IN_PAGE) { + MLX5_ARRAY_SET64(manage_pages_out, out, pas, index + pages_set, + fwp->addr + (n * MLX5_ADAPTER_PAGE_SIZE)); + pages_set++; + + if (!--npages) + break; + } + + return pages_set; +} + +static int reclaim_pages_cmd(struct mlx5_core_dev *dev, + u32 *in, int in_size, u32 *out, int out_size) +{ + struct rb_root *root; + struct fw_page *fwp; + struct rb_node *p; + bool ec_function; + u32 func_id; + u32 npages; + u32 i = 0; + + if (!mlx5_cmd_is_down(dev)) + return mlx5_cmd_do(dev, in, in_size, out, out_size); + + /* No hard feelings, we want our pages back! */ + npages = MLX5_GET(manage_pages_in, in, input_num_entries); + func_id = MLX5_GET(manage_pages_in, in, function_id); + ec_function = MLX5_GET(manage_pages_in, in, embedded_cpu_function); + + root = xa_load(&dev->priv.page_root_xa, get_function(func_id, ec_function)); + if (WARN_ON_ONCE(!root)) + return -EEXIST; + + p = rb_first(root); + while (p && i < npages) { + fwp = rb_entry(p, struct fw_page, rb_node); + p = rb_next(p); + + i += fwp_fill_manage_pages_out(fwp, out, i, npages - i); + } + + MLX5_SET(manage_pages_out, out, output_num_entries, i); + return 0; +} + +static int reclaim_pages(struct mlx5_core_dev *dev, u16 func_id, int npages, + int *nclaimed, bool event, bool ec_function) +{ + u32 function = get_function(func_id, ec_function); + int outlen = MLX5_ST_SZ_BYTES(manage_pages_out); + u32 in[MLX5_ST_SZ_DW(manage_pages_in)] = {}; + int num_claimed; + u16 func_type; + u32 *out; + int err; + int i; + + if (nclaimed) + *nclaimed = 0; + + outlen += npages * MLX5_FLD_SZ_BYTES(manage_pages_out, pas[0]); + out = kvzalloc(outlen, GFP_KERNEL); + if (!out) + return -ENOMEM; + + MLX5_SET(manage_pages_in, in, opcode, MLX5_CMD_OP_MANAGE_PAGES); + MLX5_SET(manage_pages_in, in, op_mod, MLX5_PAGES_TAKE); + MLX5_SET(manage_pages_in, in, function_id, func_id); + MLX5_SET(manage_pages_in, in, input_num_entries, npages); + MLX5_SET(manage_pages_in, in, embedded_cpu_function, ec_function); + + mlx5_core_dbg(dev, "func 0x%x, npages %d, outlen %d\n", + func_id, npages, outlen); + err = reclaim_pages_cmd(dev, in, sizeof(in), out, outlen); + if (err) { + npages = MLX5_GET(manage_pages_in, in, input_num_entries); + dev->priv.reclaim_pages_discard += npages; + } + /* if triggered by FW event and failed by FW then ignore */ + if (event && err == -EREMOTEIO) { + err = 0; + goto out_free; + } + + err = mlx5_cmd_check(dev, err, in, out); + if (err) { + mlx5_core_err(dev, "failed reclaiming pages: err %d\n", err); + goto out_free; + } + + num_claimed = MLX5_GET(manage_pages_out, out, output_num_entries); + if (num_claimed > npages) { + mlx5_core_warn(dev, "fw returned %d, driver asked %d => corruption\n", + num_claimed, npages); + err = -EINVAL; + goto out_free; + } + + for (i = 0; i < num_claimed; i++) + free_4k(dev, MLX5_GET64(manage_pages_out, out, pas[i]), function); + + if (nclaimed) + *nclaimed = num_claimed; + + func_type = func_id_to_type(dev, func_id, ec_function); + dev->priv.page_counters[func_type] -= num_claimed; + dev->priv.fw_pages -= num_claimed; + +out_free: + kvfree(out); + return err; +} + +static void pages_work_handler(struct work_struct *work) +{ + struct mlx5_pages_req *req = container_of(work, struct mlx5_pages_req, work); + struct mlx5_core_dev *dev = req->dev; + int err = 0; + + if (req->release_all) + release_all_pages(dev, req->func_id, req->ec_function); + else if (req->npages < 0) + err = reclaim_pages(dev, req->func_id, -1 * req->npages, NULL, + true, req->ec_function); + else if (req->npages > 0) + err = give_pages(dev, req->func_id, req->npages, 1, req->ec_function); + + if (err) + mlx5_core_warn(dev, "%s fail %d\n", + req->npages < 0 ? "reclaim" : "give", err); + + kfree(req); +} + +enum { + EC_FUNCTION_MASK = 0x8000, + RELEASE_ALL_PAGES_MASK = 0x4000, +}; + +static int req_pages_handler(struct notifier_block *nb, + unsigned long type, void *data) +{ + struct mlx5_pages_req *req; + struct mlx5_core_dev *dev; + struct mlx5_priv *priv; + struct mlx5_eqe *eqe; + bool ec_function; + bool release_all; + u16 func_id; + s32 npages; + + priv = mlx5_nb_cof(nb, struct mlx5_priv, pg_nb); + dev = container_of(priv, struct mlx5_core_dev, priv); + eqe = data; + + func_id = be16_to_cpu(eqe->data.req_pages.func_id); + npages = be32_to_cpu(eqe->data.req_pages.num_pages); + ec_function = be16_to_cpu(eqe->data.req_pages.ec_function) & EC_FUNCTION_MASK; + release_all = be16_to_cpu(eqe->data.req_pages.ec_function) & + RELEASE_ALL_PAGES_MASK; + mlx5_core_dbg(dev, "page request for func 0x%x, npages %d, release_all %d\n", + func_id, npages, release_all); + req = kzalloc(sizeof(*req), GFP_ATOMIC); + if (!req) { + mlx5_core_warn(dev, "failed to allocate pages request\n"); + return NOTIFY_DONE; + } + + req->dev = dev; + req->func_id = func_id; + req->npages = npages; + req->ec_function = ec_function; + req->release_all = release_all; + INIT_WORK(&req->work, pages_work_handler); + queue_work(dev->priv.pg_wq, &req->work); + return NOTIFY_OK; +} + +int mlx5_satisfy_startup_pages(struct mlx5_core_dev *dev, int boot) +{ + u16 func_id; + s32 npages; + int err; + + err = mlx5_cmd_query_pages(dev, &func_id, &npages, boot); + if (err) + return err; + + mlx5_core_dbg(dev, "requested %d %s pages for func_id 0x%x\n", + npages, boot ? "boot" : "init", func_id); + + return give_pages(dev, func_id, npages, 0, mlx5_core_is_ecpf(dev)); +} + +enum { + MLX5_BLKS_FOR_RECLAIM_PAGES = 12 +}; + +static int optimal_reclaimed_pages(void) +{ + struct mlx5_cmd_prot_block *block; + struct mlx5_cmd_layout *lay; + int ret; + + ret = (sizeof(lay->out) + MLX5_BLKS_FOR_RECLAIM_PAGES * sizeof(block->data) - + MLX5_ST_SZ_BYTES(manage_pages_out)) / + MLX5_FLD_SZ_BYTES(manage_pages_out, pas[0]); + + return ret; +} + +static int mlx5_reclaim_root_pages(struct mlx5_core_dev *dev, + struct rb_root *root, u32 function) +{ + u64 recl_pages_to_jiffies = msecs_to_jiffies(mlx5_tout_ms(dev, RECLAIM_PAGES)); + unsigned long end = jiffies + recl_pages_to_jiffies; + + while (!RB_EMPTY_ROOT(root)) { + u32 ec_function = mlx5_get_ec_function(function); + u32 function_id = mlx5_get_func_id(function); + int nclaimed; + int err; + + err = reclaim_pages(dev, function_id, optimal_reclaimed_pages(), + &nclaimed, false, ec_function); + if (err) { + mlx5_core_warn(dev, "reclaim_pages err (%d) func_id=0x%x ec_func=0x%x\n", + err, function_id, ec_function); + return err; + } + + if (nclaimed) + end = jiffies + recl_pages_to_jiffies; + + if (time_after(jiffies, end)) { + mlx5_core_warn(dev, "FW did not return all pages. giving up...\n"); + break; + } + } + + return 0; +} + +int mlx5_reclaim_startup_pages(struct mlx5_core_dev *dev) +{ + struct rb_root *root; + unsigned long id; + void *entry; + + xa_for_each(&dev->priv.page_root_xa, id, entry) { + root = entry; + mlx5_reclaim_root_pages(dev, root, id); + xa_erase(&dev->priv.page_root_xa, id); + kfree(root); + } + + WARN_ON(!xa_empty(&dev->priv.page_root_xa)); + + WARN(dev->priv.fw_pages, + "FW pages counter is %d after reclaiming all pages\n", + dev->priv.fw_pages); + WARN(dev->priv.page_counters[MLX5_VF], + "VFs FW pages counter is %d after reclaiming all pages\n", + dev->priv.page_counters[MLX5_VF]); + WARN(dev->priv.page_counters[MLX5_HOST_PF], + "External host PF FW pages counter is %d after reclaiming all pages\n", + dev->priv.page_counters[MLX5_HOST_PF]); + + return 0; +} + +int mlx5_pagealloc_init(struct mlx5_core_dev *dev) +{ + INIT_LIST_HEAD(&dev->priv.free_list); + dev->priv.pg_wq = create_singlethread_workqueue("mlx5_page_allocator"); + if (!dev->priv.pg_wq) + return -ENOMEM; + + xa_init(&dev->priv.page_root_xa); + mlx5_pages_debugfs_init(dev); + + return 0; +} + +void mlx5_pagealloc_cleanup(struct mlx5_core_dev *dev) +{ + mlx5_pages_debugfs_cleanup(dev); + xa_destroy(&dev->priv.page_root_xa); + destroy_workqueue(dev->priv.pg_wq); +} + +void mlx5_pagealloc_start(struct mlx5_core_dev *dev) +{ + MLX5_NB_INIT(&dev->priv.pg_nb, req_pages_handler, PAGE_REQUEST); + mlx5_eq_notifier_register(dev, &dev->priv.pg_nb); +} + +void mlx5_pagealloc_stop(struct mlx5_core_dev *dev) +{ + mlx5_eq_notifier_unregister(dev, &dev->priv.pg_nb); + flush_workqueue(dev->priv.pg_wq); +} + +int mlx5_wait_for_pages(struct mlx5_core_dev *dev, int *pages) +{ + u64 recl_vf_pages_to_jiffies = msecs_to_jiffies(mlx5_tout_ms(dev, RECLAIM_VFS_PAGES)); + unsigned long end = jiffies + recl_vf_pages_to_jiffies; + int prev_pages = *pages; + + /* In case of internal error we will free the pages manually later */ + if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) { + mlx5_core_warn(dev, "Skipping wait for vf pages stage"); + return 0; + } + + mlx5_core_dbg(dev, "Waiting for %d pages\n", prev_pages); + while (*pages) { + if (time_after(jiffies, end)) { + mlx5_core_warn(dev, "aborting while there are %d pending pages\n", *pages); + return -ETIMEDOUT; + } + if (*pages < prev_pages) { + end = jiffies + recl_vf_pages_to_jiffies; + prev_pages = *pages; + } + msleep(50); + } + + mlx5_core_dbg(dev, "All pages received\n"); + return 0; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c new file mode 100644 index 000000000..a6d3fc96e --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c @@ -0,0 +1,733 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2019 Mellanox Technologies. */ + +#include <linux/interrupt.h> +#include <linux/notifier.h> +#include <linux/mlx5/driver.h> +#include "mlx5_core.h" +#include "mlx5_irq.h" +#include "pci_irq.h" +#include "lib/sf.h" +#ifdef CONFIG_RFS_ACCEL +#include <linux/cpu_rmap.h> +#endif + +#define MLX5_SFS_PER_CTRL_IRQ 64 +#define MLX5_IRQ_CTRL_SF_MAX 8 +/* min num of vectors for SFs to be enabled */ +#define MLX5_IRQ_VEC_COMP_BASE_SF 2 + +#define MLX5_EQ_SHARE_IRQ_MAX_COMP (8) +#define MLX5_EQ_SHARE_IRQ_MAX_CTRL (UINT_MAX) +#define MLX5_EQ_SHARE_IRQ_MIN_COMP (1) +#define MLX5_EQ_SHARE_IRQ_MIN_CTRL (4) + +struct mlx5_irq { + struct atomic_notifier_head nh; + cpumask_var_t mask; + char name[MLX5_MAX_IRQ_FORMATTED_NAME]; + struct mlx5_irq_pool *pool; + int refcount; + u32 index; + int irqn; +}; + +struct mlx5_irq_table { + struct mlx5_irq_pool *pf_pool; + struct mlx5_irq_pool *sf_ctrl_pool; + struct mlx5_irq_pool *sf_comp_pool; +}; + +/** + * mlx5_get_default_msix_vec_count - Get the default number of MSI-X vectors + * to be ssigned to each VF. + * @dev: PF to work on + * @num_vfs: Number of enabled VFs + */ +int mlx5_get_default_msix_vec_count(struct mlx5_core_dev *dev, int num_vfs) +{ + int num_vf_msix, min_msix, max_msix; + + num_vf_msix = MLX5_CAP_GEN_MAX(dev, num_total_dynamic_vf_msix); + if (!num_vf_msix) + return 0; + + min_msix = MLX5_CAP_GEN(dev, min_dynamic_vf_msix_table_size); + max_msix = MLX5_CAP_GEN(dev, max_dynamic_vf_msix_table_size); + + /* Limit maximum number of MSI-X vectors so the default configuration + * has some available in the pool. This will allow the user to increase + * the number of vectors in a VF without having to first size-down other + * VFs. + */ + return max(min(num_vf_msix / num_vfs, max_msix / 2), min_msix); +} + +/** + * mlx5_set_msix_vec_count - Set dynamically allocated MSI-X on the VF + * @dev: PF to work on + * @function_id: Internal PCI VF function IDd + * @msix_vec_count: Number of MSI-X vectors to set + */ +int mlx5_set_msix_vec_count(struct mlx5_core_dev *dev, int function_id, + int msix_vec_count) +{ + int query_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out); + int set_sz = MLX5_ST_SZ_BYTES(set_hca_cap_in); + void *hca_cap = NULL, *query_cap = NULL, *cap; + int num_vf_msix, min_msix, max_msix; + int ret; + + num_vf_msix = MLX5_CAP_GEN_MAX(dev, num_total_dynamic_vf_msix); + if (!num_vf_msix) + return 0; + + if (!MLX5_CAP_GEN(dev, vport_group_manager) || !mlx5_core_is_pf(dev)) + return -EOPNOTSUPP; + + min_msix = MLX5_CAP_GEN(dev, min_dynamic_vf_msix_table_size); + max_msix = MLX5_CAP_GEN(dev, max_dynamic_vf_msix_table_size); + + if (msix_vec_count < min_msix) + return -EINVAL; + + if (msix_vec_count > max_msix) + return -EOVERFLOW; + + query_cap = kvzalloc(query_sz, GFP_KERNEL); + hca_cap = kvzalloc(set_sz, GFP_KERNEL); + if (!hca_cap || !query_cap) { + ret = -ENOMEM; + goto out; + } + + ret = mlx5_vport_get_other_func_cap(dev, function_id, query_cap); + if (ret) + goto out; + + cap = MLX5_ADDR_OF(set_hca_cap_in, hca_cap, capability); + memcpy(cap, MLX5_ADDR_OF(query_hca_cap_out, query_cap, capability), + MLX5_UN_SZ_BYTES(hca_cap_union)); + MLX5_SET(cmd_hca_cap, cap, dynamic_msix_table_size, msix_vec_count); + + MLX5_SET(set_hca_cap_in, hca_cap, opcode, MLX5_CMD_OP_SET_HCA_CAP); + MLX5_SET(set_hca_cap_in, hca_cap, other_function, 1); + MLX5_SET(set_hca_cap_in, hca_cap, function_id, function_id); + + MLX5_SET(set_hca_cap_in, hca_cap, op_mod, + MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE << 1); + ret = mlx5_cmd_exec_in(dev, set_hca_cap, hca_cap); +out: + kvfree(hca_cap); + kvfree(query_cap); + return ret; +} + +/* mlx5_system_free_irq - Free an IRQ + * @irq: IRQ to free + * + * Free the IRQ and other resources such as rmap from the system. + * BUT doesn't free or remove reference from mlx5. + * This function is very important for the shutdown flow, where we need to + * cleanup system resoruces but keep mlx5 objects alive, + * see mlx5_irq_table_free_irqs(). + */ +static void mlx5_system_free_irq(struct mlx5_irq *irq) +{ + /* free_irq requires that affinity_hint and rmap will be cleared + * before calling it. This is why there is asymmetry with set_rmap + * which should be called after alloc_irq but before request_irq. + */ + irq_update_affinity_hint(irq->irqn, NULL); + free_irq(irq->irqn, &irq->nh); +} + +static void irq_release(struct mlx5_irq *irq) +{ + struct mlx5_irq_pool *pool = irq->pool; + + xa_erase(&pool->irqs, irq->index); + mlx5_system_free_irq(irq); + free_cpumask_var(irq->mask); + kfree(irq); +} + +int mlx5_irq_put(struct mlx5_irq *irq) +{ + struct mlx5_irq_pool *pool = irq->pool; + int ret = 0; + + mutex_lock(&pool->lock); + irq->refcount--; + if (!irq->refcount) { + irq_release(irq); + ret = 1; + } + mutex_unlock(&pool->lock); + return ret; +} + +int mlx5_irq_read_locked(struct mlx5_irq *irq) +{ + lockdep_assert_held(&irq->pool->lock); + return irq->refcount; +} + +int mlx5_irq_get_locked(struct mlx5_irq *irq) +{ + lockdep_assert_held(&irq->pool->lock); + if (WARN_ON_ONCE(!irq->refcount)) + return 0; + irq->refcount++; + return 1; +} + +static int irq_get(struct mlx5_irq *irq) +{ + int err; + + mutex_lock(&irq->pool->lock); + err = mlx5_irq_get_locked(irq); + mutex_unlock(&irq->pool->lock); + return err; +} + +static irqreturn_t irq_int_handler(int irq, void *nh) +{ + atomic_notifier_call_chain(nh, 0, NULL); + return IRQ_HANDLED; +} + +static void irq_sf_set_name(struct mlx5_irq_pool *pool, char *name, int vecidx) +{ + snprintf(name, MLX5_MAX_IRQ_NAME, "%s%d", pool->name, vecidx); +} + +static void irq_set_name(struct mlx5_irq_pool *pool, char *name, int vecidx) +{ + if (!pool->xa_num_irqs.max) { + /* in case we only have a single irq for the device */ + snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_combined%d", vecidx); + return; + } + + if (vecidx == pool->xa_num_irqs.max) { + snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_async%d", vecidx); + return; + } + + snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_comp%d", vecidx); +} + +struct mlx5_irq *mlx5_irq_alloc(struct mlx5_irq_pool *pool, int i, + const struct cpumask *affinity) +{ + struct mlx5_core_dev *dev = pool->dev; + char name[MLX5_MAX_IRQ_NAME]; + struct mlx5_irq *irq; + int err; + + irq = kzalloc(sizeof(*irq), GFP_KERNEL); + if (!irq) + return ERR_PTR(-ENOMEM); + irq->irqn = pci_irq_vector(dev->pdev, i); + if (!mlx5_irq_pool_is_sf_pool(pool)) + irq_set_name(pool, name, i); + else + irq_sf_set_name(pool, name, i); + ATOMIC_INIT_NOTIFIER_HEAD(&irq->nh); + snprintf(irq->name, MLX5_MAX_IRQ_FORMATTED_NAME, + MLX5_IRQ_NAME_FORMAT_STR, name, pci_name(dev->pdev)); + err = request_irq(irq->irqn, irq_int_handler, 0, irq->name, + &irq->nh); + if (err) { + mlx5_core_err(dev, "Failed to request irq. err = %d\n", err); + goto err_req_irq; + } + if (!zalloc_cpumask_var(&irq->mask, GFP_KERNEL)) { + mlx5_core_warn(dev, "zalloc_cpumask_var failed\n"); + err = -ENOMEM; + goto err_cpumask; + } + if (affinity) { + cpumask_copy(irq->mask, affinity); + irq_set_affinity_and_hint(irq->irqn, irq->mask); + } + irq->pool = pool; + irq->refcount = 1; + irq->index = i; + err = xa_err(xa_store(&pool->irqs, irq->index, irq, GFP_KERNEL)); + if (err) { + mlx5_core_err(dev, "Failed to alloc xa entry for irq(%u). err = %d\n", + irq->index, err); + goto err_xa; + } + return irq; +err_xa: + irq_update_affinity_hint(irq->irqn, NULL); + free_cpumask_var(irq->mask); +err_cpumask: + free_irq(irq->irqn, &irq->nh); +err_req_irq: + kfree(irq); + return ERR_PTR(err); +} + +int mlx5_irq_attach_nb(struct mlx5_irq *irq, struct notifier_block *nb) +{ + int ret; + + ret = irq_get(irq); + if (!ret) + /* Something very bad happens here, we are enabling EQ + * on non-existing IRQ. + */ + return -ENOENT; + ret = atomic_notifier_chain_register(&irq->nh, nb); + if (ret) + mlx5_irq_put(irq); + return ret; +} + +int mlx5_irq_detach_nb(struct mlx5_irq *irq, struct notifier_block *nb) +{ + int err = 0; + + err = atomic_notifier_chain_unregister(&irq->nh, nb); + mlx5_irq_put(irq); + return err; +} + +struct cpumask *mlx5_irq_get_affinity_mask(struct mlx5_irq *irq) +{ + return irq->mask; +} + +int mlx5_irq_get_index(struct mlx5_irq *irq) +{ + return irq->index; +} + +/* irq_pool API */ + +/* requesting an irq from a given pool according to given index */ +static struct mlx5_irq * +irq_pool_request_vector(struct mlx5_irq_pool *pool, int vecidx, + struct cpumask *affinity) +{ + struct mlx5_irq *irq; + + mutex_lock(&pool->lock); + irq = xa_load(&pool->irqs, vecidx); + if (irq) { + mlx5_irq_get_locked(irq); + goto unlock; + } + irq = mlx5_irq_alloc(pool, vecidx, affinity); +unlock: + mutex_unlock(&pool->lock); + return irq; +} + +static struct mlx5_irq_pool *sf_ctrl_irq_pool_get(struct mlx5_irq_table *irq_table) +{ + return irq_table->sf_ctrl_pool; +} + +static struct mlx5_irq_pool *sf_irq_pool_get(struct mlx5_irq_table *irq_table) +{ + return irq_table->sf_comp_pool; +} + +struct mlx5_irq_pool *mlx5_irq_pool_get(struct mlx5_core_dev *dev) +{ + struct mlx5_irq_table *irq_table = mlx5_irq_table_get(dev); + struct mlx5_irq_pool *pool = NULL; + + if (mlx5_core_is_sf(dev)) + pool = sf_irq_pool_get(irq_table); + + /* In some configs, there won't be a pool of SFs IRQs. Hence, returning + * the PF IRQs pool in case the SF pool doesn't exist. + */ + return pool ? pool : irq_table->pf_pool; +} + +static struct mlx5_irq_pool *ctrl_irq_pool_get(struct mlx5_core_dev *dev) +{ + struct mlx5_irq_table *irq_table = mlx5_irq_table_get(dev); + struct mlx5_irq_pool *pool = NULL; + + if (mlx5_core_is_sf(dev)) + pool = sf_ctrl_irq_pool_get(irq_table); + + /* In some configs, there won't be a pool of SFs IRQs. Hence, returning + * the PF IRQs pool in case the SF pool doesn't exist. + */ + return pool ? pool : irq_table->pf_pool; +} + +/** + * mlx5_irqs_release - release one or more IRQs back to the system. + * @irqs: IRQs to be released. + * @nirqs: number of IRQs to be released. + */ +static void mlx5_irqs_release(struct mlx5_irq **irqs, int nirqs) +{ + int i; + + for (i = 0; i < nirqs; i++) { + synchronize_irq(irqs[i]->irqn); + mlx5_irq_put(irqs[i]); + } +} + +/** + * mlx5_ctrl_irq_release - release a ctrl IRQ back to the system. + * @ctrl_irq: ctrl IRQ to be released. + */ +void mlx5_ctrl_irq_release(struct mlx5_irq *ctrl_irq) +{ + mlx5_irqs_release(&ctrl_irq, 1); +} + +/** + * mlx5_ctrl_irq_request - request a ctrl IRQ for mlx5 device. + * @dev: mlx5 device that requesting the IRQ. + * + * This function returns a pointer to IRQ, or ERR_PTR in case of error. + */ +struct mlx5_irq *mlx5_ctrl_irq_request(struct mlx5_core_dev *dev) +{ + struct mlx5_irq_pool *pool = ctrl_irq_pool_get(dev); + cpumask_var_t req_mask; + struct mlx5_irq *irq; + + if (!zalloc_cpumask_var(&req_mask, GFP_KERNEL)) + return ERR_PTR(-ENOMEM); + cpumask_copy(req_mask, cpu_online_mask); + if (!mlx5_irq_pool_is_sf_pool(pool)) { + /* In case we are allocating a control IRQ for PF/VF */ + if (!pool->xa_num_irqs.max) { + cpumask_clear(req_mask); + /* In case we only have a single IRQ for PF/VF */ + cpumask_set_cpu(cpumask_first(cpu_online_mask), req_mask); + } + /* Allocate the IRQ in the last index of the pool */ + irq = irq_pool_request_vector(pool, pool->xa_num_irqs.max, req_mask); + } else { + irq = mlx5_irq_affinity_request(pool, req_mask); + } + + free_cpumask_var(req_mask); + return irq; +} + +/** + * mlx5_irq_request - request an IRQ for mlx5 PF/VF device. + * @dev: mlx5 device that requesting the IRQ. + * @vecidx: vector index of the IRQ. This argument is ignore if affinity is + * provided. + * @affinity: cpumask requested for this IRQ. + * + * This function returns a pointer to IRQ, or ERR_PTR in case of error. + */ +struct mlx5_irq *mlx5_irq_request(struct mlx5_core_dev *dev, u16 vecidx, + struct cpumask *affinity) +{ + struct mlx5_irq_table *irq_table = mlx5_irq_table_get(dev); + struct mlx5_irq_pool *pool; + struct mlx5_irq *irq; + + pool = irq_table->pf_pool; + irq = irq_pool_request_vector(pool, vecidx, affinity); + if (IS_ERR(irq)) + return irq; + mlx5_core_dbg(dev, "irq %u mapped to cpu %*pbl, %u EQs on this irq\n", + irq->irqn, cpumask_pr_args(affinity), + irq->refcount / MLX5_EQ_REFS_PER_IRQ); + return irq; +} + +/** + * mlx5_irqs_release_vectors - release one or more IRQs back to the system. + * @irqs: IRQs to be released. + * @nirqs: number of IRQs to be released. + */ +void mlx5_irqs_release_vectors(struct mlx5_irq **irqs, int nirqs) +{ + mlx5_irqs_release(irqs, nirqs); +} + +/** + * mlx5_irqs_request_vectors - request one or more IRQs for mlx5 device. + * @dev: mlx5 device that is requesting the IRQs. + * @cpus: CPUs array for binding the IRQs + * @nirqs: number of IRQs to request. + * @irqs: an output array of IRQs pointers. + * + * Each IRQ is bound to at most 1 CPU. + * This function is requests nirqs IRQs, starting from @vecidx. + * + * This function returns the number of IRQs requested, (which might be smaller than + * @nirqs), if successful, or a negative error code in case of an error. + */ +int mlx5_irqs_request_vectors(struct mlx5_core_dev *dev, u16 *cpus, int nirqs, + struct mlx5_irq **irqs) +{ + cpumask_var_t req_mask; + struct mlx5_irq *irq; + int i; + + if (!zalloc_cpumask_var(&req_mask, GFP_KERNEL)) + return -ENOMEM; + for (i = 0; i < nirqs; i++) { + cpumask_set_cpu(cpus[i], req_mask); + irq = mlx5_irq_request(dev, i, req_mask); + if (IS_ERR(irq)) + break; + cpumask_clear(req_mask); + irqs[i] = irq; + } + + free_cpumask_var(req_mask); + return i ? i : PTR_ERR(irq); +} + +static struct mlx5_irq_pool * +irq_pool_alloc(struct mlx5_core_dev *dev, int start, int size, char *name, + u32 min_threshold, u32 max_threshold) +{ + struct mlx5_irq_pool *pool = kvzalloc(sizeof(*pool), GFP_KERNEL); + + if (!pool) + return ERR_PTR(-ENOMEM); + pool->dev = dev; + mutex_init(&pool->lock); + xa_init_flags(&pool->irqs, XA_FLAGS_ALLOC); + pool->xa_num_irqs.min = start; + pool->xa_num_irqs.max = start + size - 1; + if (name) + snprintf(pool->name, MLX5_MAX_IRQ_NAME - MLX5_MAX_IRQ_IDX_CHARS, + "%s", name); + pool->min_threshold = min_threshold * MLX5_EQ_REFS_PER_IRQ; + pool->max_threshold = max_threshold * MLX5_EQ_REFS_PER_IRQ; + mlx5_core_dbg(dev, "pool->name = %s, pool->size = %d, pool->start = %d", + name, size, start); + return pool; +} + +static void irq_pool_free(struct mlx5_irq_pool *pool) +{ + struct mlx5_irq *irq; + unsigned long index; + + /* There are cases in which we are destrying the irq_table before + * freeing all the IRQs, fast teardown for example. Hence, free the irqs + * which might not have been freed. + */ + xa_for_each(&pool->irqs, index, irq) + irq_release(irq); + xa_destroy(&pool->irqs); + mutex_destroy(&pool->lock); + kfree(pool->irqs_per_cpu); + kvfree(pool); +} + +static int irq_pools_init(struct mlx5_core_dev *dev, int sf_vec, int pf_vec) +{ + struct mlx5_irq_table *table = dev->priv.irq_table; + int num_sf_ctrl_by_msix; + int num_sf_ctrl_by_sfs; + int num_sf_ctrl; + int err; + + /* init pf_pool */ + table->pf_pool = irq_pool_alloc(dev, 0, pf_vec, NULL, + MLX5_EQ_SHARE_IRQ_MIN_COMP, + MLX5_EQ_SHARE_IRQ_MAX_COMP); + if (IS_ERR(table->pf_pool)) + return PTR_ERR(table->pf_pool); + if (!mlx5_sf_max_functions(dev)) + return 0; + if (sf_vec < MLX5_IRQ_VEC_COMP_BASE_SF) { + mlx5_core_dbg(dev, "Not enught IRQs for SFs. SF may run at lower performance\n"); + return 0; + } + + /* init sf_ctrl_pool */ + num_sf_ctrl_by_msix = DIV_ROUND_UP(sf_vec, MLX5_COMP_EQS_PER_SF); + num_sf_ctrl_by_sfs = DIV_ROUND_UP(mlx5_sf_max_functions(dev), + MLX5_SFS_PER_CTRL_IRQ); + num_sf_ctrl = min_t(int, num_sf_ctrl_by_msix, num_sf_ctrl_by_sfs); + num_sf_ctrl = min_t(int, MLX5_IRQ_CTRL_SF_MAX, num_sf_ctrl); + table->sf_ctrl_pool = irq_pool_alloc(dev, pf_vec, num_sf_ctrl, + "mlx5_sf_ctrl", + MLX5_EQ_SHARE_IRQ_MIN_CTRL, + MLX5_EQ_SHARE_IRQ_MAX_CTRL); + if (IS_ERR(table->sf_ctrl_pool)) { + err = PTR_ERR(table->sf_ctrl_pool); + goto err_pf; + } + /* init sf_comp_pool */ + table->sf_comp_pool = irq_pool_alloc(dev, pf_vec + num_sf_ctrl, + sf_vec - num_sf_ctrl, "mlx5_sf_comp", + MLX5_EQ_SHARE_IRQ_MIN_COMP, + MLX5_EQ_SHARE_IRQ_MAX_COMP); + if (IS_ERR(table->sf_comp_pool)) { + err = PTR_ERR(table->sf_comp_pool); + goto err_sf_ctrl; + } + + table->sf_comp_pool->irqs_per_cpu = kcalloc(nr_cpu_ids, sizeof(u16), GFP_KERNEL); + if (!table->sf_comp_pool->irqs_per_cpu) { + err = -ENOMEM; + goto err_irqs_per_cpu; + } + + return 0; + +err_irqs_per_cpu: + irq_pool_free(table->sf_comp_pool); +err_sf_ctrl: + irq_pool_free(table->sf_ctrl_pool); +err_pf: + irq_pool_free(table->pf_pool); + return err; +} + +static void irq_pools_destroy(struct mlx5_irq_table *table) +{ + if (table->sf_ctrl_pool) { + irq_pool_free(table->sf_comp_pool); + irq_pool_free(table->sf_ctrl_pool); + } + irq_pool_free(table->pf_pool); +} + +static void mlx5_irq_pool_free_irqs(struct mlx5_irq_pool *pool) +{ + struct mlx5_irq *irq; + unsigned long index; + + xa_for_each(&pool->irqs, index, irq) + mlx5_system_free_irq(irq); +} + +static void mlx5_irq_pools_free_irqs(struct mlx5_irq_table *table) +{ + if (table->sf_ctrl_pool) { + mlx5_irq_pool_free_irqs(table->sf_comp_pool); + mlx5_irq_pool_free_irqs(table->sf_ctrl_pool); + } + mlx5_irq_pool_free_irqs(table->pf_pool); +} + +/* irq_table API */ + +int mlx5_irq_table_init(struct mlx5_core_dev *dev) +{ + struct mlx5_irq_table *irq_table; + + if (mlx5_core_is_sf(dev)) + return 0; + + irq_table = kvzalloc_node(sizeof(*irq_table), GFP_KERNEL, + dev->priv.numa_node); + if (!irq_table) + return -ENOMEM; + + dev->priv.irq_table = irq_table; + return 0; +} + +void mlx5_irq_table_cleanup(struct mlx5_core_dev *dev) +{ + if (mlx5_core_is_sf(dev)) + return; + + kvfree(dev->priv.irq_table); +} + +int mlx5_irq_table_get_num_comp(struct mlx5_irq_table *table) +{ + if (!table->pf_pool->xa_num_irqs.max) + return 1; + return table->pf_pool->xa_num_irqs.max - table->pf_pool->xa_num_irqs.min; +} + +int mlx5_irq_table_create(struct mlx5_core_dev *dev) +{ + int num_eqs = MLX5_CAP_GEN(dev, max_num_eqs) ? + MLX5_CAP_GEN(dev, max_num_eqs) : + 1 << MLX5_CAP_GEN(dev, log_max_eq); + int total_vec; + int pf_vec; + int err; + + if (mlx5_core_is_sf(dev)) + return 0; + + pf_vec = MLX5_CAP_GEN(dev, num_ports) * num_online_cpus() + 1; + pf_vec = min_t(int, pf_vec, num_eqs); + + total_vec = pf_vec; + if (mlx5_sf_max_functions(dev)) + total_vec += MLX5_IRQ_CTRL_SF_MAX + + MLX5_COMP_EQS_PER_SF * mlx5_sf_max_functions(dev); + + total_vec = pci_alloc_irq_vectors(dev->pdev, 1, total_vec, PCI_IRQ_MSIX); + if (total_vec < 0) + return total_vec; + pf_vec = min(pf_vec, total_vec); + + err = irq_pools_init(dev, total_vec - pf_vec, pf_vec); + if (err) + pci_free_irq_vectors(dev->pdev); + + return err; +} + +void mlx5_irq_table_destroy(struct mlx5_core_dev *dev) +{ + struct mlx5_irq_table *table = dev->priv.irq_table; + + if (mlx5_core_is_sf(dev)) + return; + + /* There are cases where IRQs still will be in used when we reaching + * to here. Hence, making sure all the irqs are released. + */ + irq_pools_destroy(table); + pci_free_irq_vectors(dev->pdev); +} + +void mlx5_irq_table_free_irqs(struct mlx5_core_dev *dev) +{ + struct mlx5_irq_table *table = dev->priv.irq_table; + + if (mlx5_core_is_sf(dev)) + return; + + mlx5_irq_pools_free_irqs(table); + pci_free_irq_vectors(dev->pdev); +} + +int mlx5_irq_table_get_sfs_vec(struct mlx5_irq_table *table) +{ + if (table->sf_comp_pool) + return min_t(int, num_online_cpus(), + table->sf_comp_pool->xa_num_irqs.max - + table->sf_comp_pool->xa_num_irqs.min + 1); + else + return mlx5_irq_table_get_num_comp(table); +} + +struct mlx5_irq_table *mlx5_irq_table_get(struct mlx5_core_dev *dev) +{ +#ifdef CONFIG_MLX5_SF + if (mlx5_core_is_sf(dev)) + return dev->priv.parent_mdev->priv.irq_table; +#endif + return dev->priv.irq_table; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.h b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.h new file mode 100644 index 000000000..404717930 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.h @@ -0,0 +1,42 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#ifndef __PCI_IRQ_H__ +#define __PCI_IRQ_H__ + +#include <linux/mlx5/driver.h> + +#define MLX5_MAX_IRQ_NAME (32) +#define MLX5_IRQ_NAME_FORMAT_STR ("%s@pci:%s") +#define MLX5_MAX_IRQ_FORMATTED_NAME \ + (MLX5_MAX_IRQ_NAME + sizeof(MLX5_IRQ_NAME_FORMAT_STR)) +/* max irq_index is 2047, so four chars */ +#define MLX5_MAX_IRQ_IDX_CHARS (4) +#define MLX5_EQ_REFS_PER_IRQ (2) + +struct mlx5_irq; + +struct mlx5_irq_pool { + char name[MLX5_MAX_IRQ_NAME - MLX5_MAX_IRQ_IDX_CHARS]; + struct xa_limit xa_num_irqs; + struct mutex lock; /* sync IRQs creations */ + struct xarray irqs; + u32 max_threshold; + u32 min_threshold; + u16 *irqs_per_cpu; + struct mlx5_core_dev *dev; +}; + +struct mlx5_irq_pool *mlx5_irq_pool_get(struct mlx5_core_dev *dev); +static inline bool mlx5_irq_pool_is_sf_pool(struct mlx5_irq_pool *pool) +{ + return !strncmp("mlx5_sf", pool->name, strlen("mlx5_sf")); +} + +struct mlx5_irq *mlx5_irq_alloc(struct mlx5_irq_pool *pool, int i, + const struct cpumask *affinity); +int mlx5_irq_get_locked(struct mlx5_irq *irq); +int mlx5_irq_read_locked(struct mlx5_irq *irq); +int mlx5_irq_put(struct mlx5_irq *irq); + +#endif /* __PCI_IRQ_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pd.c b/drivers/net/ethernet/mellanox/mlx5/core/pd.c new file mode 100644 index 000000000..ee5ffdeb9 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/pd.c @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/kernel.h> +#include <linux/mlx5/driver.h> +#include "mlx5_core.h" + +int mlx5_core_alloc_pd(struct mlx5_core_dev *dev, u32 *pdn) +{ + u32 out[MLX5_ST_SZ_DW(alloc_pd_out)] = {}; + u32 in[MLX5_ST_SZ_DW(alloc_pd_in)] = {}; + int err; + + MLX5_SET(alloc_pd_in, in, opcode, MLX5_CMD_OP_ALLOC_PD); + err = mlx5_cmd_exec_inout(dev, alloc_pd, in, out); + if (!err) + *pdn = MLX5_GET(alloc_pd_out, out, pd); + return err; +} +EXPORT_SYMBOL(mlx5_core_alloc_pd); + +int mlx5_core_dealloc_pd(struct mlx5_core_dev *dev, u32 pdn) +{ + u32 in[MLX5_ST_SZ_DW(dealloc_pd_in)] = {}; + + MLX5_SET(dealloc_pd_in, in, opcode, MLX5_CMD_OP_DEALLOC_PD); + MLX5_SET(dealloc_pd_in, in, pd, pdn); + return mlx5_cmd_exec_in(dev, dealloc_pd, in); +} +EXPORT_SYMBOL(mlx5_core_dealloc_pd); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c new file mode 100644 index 000000000..a1548e6bf --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c @@ -0,0 +1,1056 @@ +/* + * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/mlx5/port.h> +#include "mlx5_core.h" + +/* calling with verbose false will not print error to log */ +int mlx5_access_reg(struct mlx5_core_dev *dev, void *data_in, int size_in, + void *data_out, int size_out, u16 reg_id, int arg, + int write, bool verbose) +{ + int outlen = MLX5_ST_SZ_BYTES(access_register_out) + size_out; + int inlen = MLX5_ST_SZ_BYTES(access_register_in) + size_in; + int err = -ENOMEM; + u32 *out = NULL; + u32 *in = NULL; + void *data; + + in = kvzalloc(inlen, GFP_KERNEL); + out = kvzalloc(outlen, GFP_KERNEL); + if (!in || !out) + goto out; + + data = MLX5_ADDR_OF(access_register_in, in, register_data); + memcpy(data, data_in, size_in); + + MLX5_SET(access_register_in, in, opcode, MLX5_CMD_OP_ACCESS_REG); + MLX5_SET(access_register_in, in, op_mod, !write); + MLX5_SET(access_register_in, in, argument, arg); + MLX5_SET(access_register_in, in, register_id, reg_id); + + err = mlx5_cmd_do(dev, in, inlen, out, outlen); + if (verbose) + err = mlx5_cmd_check(dev, err, in, out); + if (err) + goto out; + + data = MLX5_ADDR_OF(access_register_out, out, register_data); + memcpy(data_out, data, size_out); + +out: + kvfree(out); + kvfree(in); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_access_reg); + +int mlx5_core_access_reg(struct mlx5_core_dev *dev, void *data_in, + int size_in, void *data_out, int size_out, + u16 reg_id, int arg, int write) +{ + return mlx5_access_reg(dev, data_in, size_in, data_out, size_out, + reg_id, arg, write, true); +} +EXPORT_SYMBOL_GPL(mlx5_core_access_reg); + +int mlx5_query_pcam_reg(struct mlx5_core_dev *dev, u32 *pcam, u8 feature_group, + u8 access_reg_group) +{ + u32 in[MLX5_ST_SZ_DW(pcam_reg)] = {0}; + int sz = MLX5_ST_SZ_BYTES(pcam_reg); + + MLX5_SET(pcam_reg, in, feature_group, feature_group); + MLX5_SET(pcam_reg, in, access_reg_group, access_reg_group); + + return mlx5_core_access_reg(dev, in, sz, pcam, sz, MLX5_REG_PCAM, 0, 0); +} + +int mlx5_query_mcam_reg(struct mlx5_core_dev *dev, u32 *mcam, u8 feature_group, + u8 access_reg_group) +{ + u32 in[MLX5_ST_SZ_DW(mcam_reg)] = {0}; + int sz = MLX5_ST_SZ_BYTES(mcam_reg); + + MLX5_SET(mcam_reg, in, feature_group, feature_group); + MLX5_SET(mcam_reg, in, access_reg_group, access_reg_group); + + return mlx5_core_access_reg(dev, in, sz, mcam, sz, MLX5_REG_MCAM, 0, 0); +} + +int mlx5_query_qcam_reg(struct mlx5_core_dev *mdev, u32 *qcam, + u8 feature_group, u8 access_reg_group) +{ + u32 in[MLX5_ST_SZ_DW(qcam_reg)] = {}; + int sz = MLX5_ST_SZ_BYTES(qcam_reg); + + MLX5_SET(qcam_reg, in, feature_group, feature_group); + MLX5_SET(qcam_reg, in, access_reg_group, access_reg_group); + + return mlx5_core_access_reg(mdev, in, sz, qcam, sz, MLX5_REG_QCAM, 0, 0); +} + +struct mlx5_reg_pcap { + u8 rsvd0; + u8 port_num; + u8 rsvd1[2]; + __be32 caps_127_96; + __be32 caps_95_64; + __be32 caps_63_32; + __be32 caps_31_0; +}; + +int mlx5_set_port_caps(struct mlx5_core_dev *dev, u8 port_num, u32 caps) +{ + struct mlx5_reg_pcap in; + struct mlx5_reg_pcap out; + + memset(&in, 0, sizeof(in)); + in.caps_127_96 = cpu_to_be32(caps); + in.port_num = port_num; + + return mlx5_core_access_reg(dev, &in, sizeof(in), &out, + sizeof(out), MLX5_REG_PCAP, 0, 1); +} +EXPORT_SYMBOL_GPL(mlx5_set_port_caps); + +int mlx5_query_port_ptys(struct mlx5_core_dev *dev, u32 *ptys, + int ptys_size, int proto_mask, u8 local_port) +{ + u32 in[MLX5_ST_SZ_DW(ptys_reg)] = {0}; + + MLX5_SET(ptys_reg, in, local_port, local_port); + MLX5_SET(ptys_reg, in, proto_mask, proto_mask); + return mlx5_core_access_reg(dev, in, sizeof(in), ptys, + ptys_size, MLX5_REG_PTYS, 0, 0); +} +EXPORT_SYMBOL_GPL(mlx5_query_port_ptys); + +int mlx5_set_port_beacon(struct mlx5_core_dev *dev, u16 beacon_duration) +{ + u32 in[MLX5_ST_SZ_DW(mlcr_reg)] = {0}; + u32 out[MLX5_ST_SZ_DW(mlcr_reg)]; + + MLX5_SET(mlcr_reg, in, local_port, 1); + MLX5_SET(mlcr_reg, in, beacon_duration, beacon_duration); + return mlx5_core_access_reg(dev, in, sizeof(in), out, + sizeof(out), MLX5_REG_MLCR, 0, 1); +} + +int mlx5_query_ib_port_oper(struct mlx5_core_dev *dev, u16 *link_width_oper, + u16 *proto_oper, u8 local_port) +{ + u32 out[MLX5_ST_SZ_DW(ptys_reg)]; + int err; + + err = mlx5_query_port_ptys(dev, out, sizeof(out), MLX5_PTYS_IB, + local_port); + if (err) + return err; + + *link_width_oper = MLX5_GET(ptys_reg, out, ib_link_width_oper); + *proto_oper = MLX5_GET(ptys_reg, out, ib_proto_oper); + + return 0; +} +EXPORT_SYMBOL(mlx5_query_ib_port_oper); + +/* This function should be used after setting a port register only */ +void mlx5_toggle_port_link(struct mlx5_core_dev *dev) +{ + enum mlx5_port_status ps; + + mlx5_query_port_admin_status(dev, &ps); + mlx5_set_port_admin_status(dev, MLX5_PORT_DOWN); + if (ps == MLX5_PORT_UP) + mlx5_set_port_admin_status(dev, MLX5_PORT_UP); +} +EXPORT_SYMBOL_GPL(mlx5_toggle_port_link); + +int mlx5_set_port_admin_status(struct mlx5_core_dev *dev, + enum mlx5_port_status status) +{ + u32 in[MLX5_ST_SZ_DW(paos_reg)] = {0}; + u32 out[MLX5_ST_SZ_DW(paos_reg)]; + + MLX5_SET(paos_reg, in, local_port, 1); + MLX5_SET(paos_reg, in, admin_status, status); + MLX5_SET(paos_reg, in, ase, 1); + return mlx5_core_access_reg(dev, in, sizeof(in), out, + sizeof(out), MLX5_REG_PAOS, 0, 1); +} +EXPORT_SYMBOL_GPL(mlx5_set_port_admin_status); + +int mlx5_query_port_admin_status(struct mlx5_core_dev *dev, + enum mlx5_port_status *status) +{ + u32 in[MLX5_ST_SZ_DW(paos_reg)] = {0}; + u32 out[MLX5_ST_SZ_DW(paos_reg)]; + int err; + + MLX5_SET(paos_reg, in, local_port, 1); + err = mlx5_core_access_reg(dev, in, sizeof(in), out, + sizeof(out), MLX5_REG_PAOS, 0, 0); + if (err) + return err; + *status = MLX5_GET(paos_reg, out, admin_status); + return 0; +} +EXPORT_SYMBOL_GPL(mlx5_query_port_admin_status); + +static void mlx5_query_port_mtu(struct mlx5_core_dev *dev, u16 *admin_mtu, + u16 *max_mtu, u16 *oper_mtu, u8 port) +{ + u32 in[MLX5_ST_SZ_DW(pmtu_reg)] = {0}; + u32 out[MLX5_ST_SZ_DW(pmtu_reg)]; + + MLX5_SET(pmtu_reg, in, local_port, port); + mlx5_core_access_reg(dev, in, sizeof(in), out, + sizeof(out), MLX5_REG_PMTU, 0, 0); + + if (max_mtu) + *max_mtu = MLX5_GET(pmtu_reg, out, max_mtu); + if (oper_mtu) + *oper_mtu = MLX5_GET(pmtu_reg, out, oper_mtu); + if (admin_mtu) + *admin_mtu = MLX5_GET(pmtu_reg, out, admin_mtu); +} + +int mlx5_set_port_mtu(struct mlx5_core_dev *dev, u16 mtu, u8 port) +{ + u32 in[MLX5_ST_SZ_DW(pmtu_reg)] = {0}; + u32 out[MLX5_ST_SZ_DW(pmtu_reg)]; + + MLX5_SET(pmtu_reg, in, admin_mtu, mtu); + MLX5_SET(pmtu_reg, in, local_port, port); + return mlx5_core_access_reg(dev, in, sizeof(in), out, + sizeof(out), MLX5_REG_PMTU, 0, 1); +} +EXPORT_SYMBOL_GPL(mlx5_set_port_mtu); + +void mlx5_query_port_max_mtu(struct mlx5_core_dev *dev, u16 *max_mtu, + u8 port) +{ + mlx5_query_port_mtu(dev, NULL, max_mtu, NULL, port); +} +EXPORT_SYMBOL_GPL(mlx5_query_port_max_mtu); + +void mlx5_query_port_oper_mtu(struct mlx5_core_dev *dev, u16 *oper_mtu, + u8 port) +{ + mlx5_query_port_mtu(dev, NULL, NULL, oper_mtu, port); +} +EXPORT_SYMBOL_GPL(mlx5_query_port_oper_mtu); + +static int mlx5_query_module_num(struct mlx5_core_dev *dev, int *module_num) +{ + u32 in[MLX5_ST_SZ_DW(pmlp_reg)] = {0}; + u32 out[MLX5_ST_SZ_DW(pmlp_reg)]; + int err; + + MLX5_SET(pmlp_reg, in, local_port, 1); + err = mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out), + MLX5_REG_PMLP, 0, 0); + if (err) + return err; + + *module_num = MLX5_GET(lane_2_module_mapping, + MLX5_ADDR_OF(pmlp_reg, out, lane0_module_mapping), + module); + + return 0; +} + +static int mlx5_query_module_id(struct mlx5_core_dev *dev, int module_num, + u8 *module_id) +{ + u32 in[MLX5_ST_SZ_DW(mcia_reg)] = {}; + u32 out[MLX5_ST_SZ_DW(mcia_reg)]; + int err, status; + u8 *ptr; + + MLX5_SET(mcia_reg, in, i2c_device_address, MLX5_I2C_ADDR_LOW); + MLX5_SET(mcia_reg, in, module, module_num); + MLX5_SET(mcia_reg, in, device_address, 0); + MLX5_SET(mcia_reg, in, page_number, 0); + MLX5_SET(mcia_reg, in, size, 1); + MLX5_SET(mcia_reg, in, l, 0); + + err = mlx5_core_access_reg(dev, in, sizeof(in), out, + sizeof(out), MLX5_REG_MCIA, 0, 0); + if (err) + return err; + + status = MLX5_GET(mcia_reg, out, status); + if (status) { + mlx5_core_err(dev, "query_mcia_reg failed: status: 0x%x\n", + status); + return -EIO; + } + ptr = MLX5_ADDR_OF(mcia_reg, out, dword_0); + + *module_id = ptr[0]; + + return 0; +} + +static int mlx5_qsfp_eeprom_page(u16 offset) +{ + if (offset < MLX5_EEPROM_PAGE_LENGTH) + /* Addresses between 0-255 - page 00 */ + return 0; + + /* Addresses between 256 - 639 belongs to pages 01, 02 and 03 + * For example, offset = 400 belongs to page 02: + * 1 + ((400 - 256)/128) = 2 + */ + return 1 + ((offset - MLX5_EEPROM_PAGE_LENGTH) / + MLX5_EEPROM_HIGH_PAGE_LENGTH); +} + +static int mlx5_qsfp_eeprom_high_page_offset(int page_num) +{ + if (!page_num) /* Page 0 always start from low page */ + return 0; + + /* High page */ + return page_num * MLX5_EEPROM_HIGH_PAGE_LENGTH; +} + +static void mlx5_qsfp_eeprom_params_set(u16 *i2c_addr, int *page_num, u16 *offset) +{ + *i2c_addr = MLX5_I2C_ADDR_LOW; + *page_num = mlx5_qsfp_eeprom_page(*offset); + *offset -= mlx5_qsfp_eeprom_high_page_offset(*page_num); +} + +static void mlx5_sfp_eeprom_params_set(u16 *i2c_addr, int *page_num, u16 *offset) +{ + *i2c_addr = MLX5_I2C_ADDR_LOW; + *page_num = 0; + + if (*offset < MLX5_EEPROM_PAGE_LENGTH) + return; + + *i2c_addr = MLX5_I2C_ADDR_HIGH; + *offset -= MLX5_EEPROM_PAGE_LENGTH; +} + +static int mlx5_mcia_max_bytes(struct mlx5_core_dev *dev) +{ + /* mcia supports either 12 dwords or 32 dwords */ + return (MLX5_CAP_MCAM_FEATURE(dev, mcia_32dwords) ? 32 : 12) * sizeof(u32); +} + +static int mlx5_query_mcia(struct mlx5_core_dev *dev, + struct mlx5_module_eeprom_query_params *params, u8 *data) +{ + u32 in[MLX5_ST_SZ_DW(mcia_reg)] = {}; + u32 out[MLX5_ST_SZ_DW(mcia_reg)]; + int status, err; + void *ptr; + u16 size; + + size = min_t(int, params->size, mlx5_mcia_max_bytes(dev)); + + MLX5_SET(mcia_reg, in, l, 0); + MLX5_SET(mcia_reg, in, size, size); + MLX5_SET(mcia_reg, in, module, params->module_number); + MLX5_SET(mcia_reg, in, device_address, params->offset); + MLX5_SET(mcia_reg, in, page_number, params->page); + MLX5_SET(mcia_reg, in, i2c_device_address, params->i2c_address); + + err = mlx5_core_access_reg(dev, in, sizeof(in), out, + sizeof(out), MLX5_REG_MCIA, 0, 0); + if (err) + return err; + + status = MLX5_GET(mcia_reg, out, status); + if (status) { + mlx5_core_err(dev, "query_mcia_reg failed: status: 0x%x\n", + status); + return -EIO; + } + + ptr = MLX5_ADDR_OF(mcia_reg, out, dword_0); + memcpy(data, ptr, size); + + return size; +} + +int mlx5_query_module_eeprom(struct mlx5_core_dev *dev, + u16 offset, u16 size, u8 *data) +{ + struct mlx5_module_eeprom_query_params query = {0}; + u8 module_id; + int err; + + err = mlx5_query_module_num(dev, &query.module_number); + if (err) + return err; + + err = mlx5_query_module_id(dev, query.module_number, &module_id); + if (err) + return err; + + switch (module_id) { + case MLX5_MODULE_ID_SFP: + mlx5_sfp_eeprom_params_set(&query.i2c_address, &query.page, &offset); + break; + case MLX5_MODULE_ID_QSFP: + case MLX5_MODULE_ID_QSFP_PLUS: + case MLX5_MODULE_ID_QSFP28: + mlx5_qsfp_eeprom_params_set(&query.i2c_address, &query.page, &offset); + break; + default: + mlx5_core_err(dev, "Module ID not recognized: 0x%x\n", module_id); + return -EINVAL; + } + + if (offset + size > MLX5_EEPROM_PAGE_LENGTH) + /* Cross pages read, read until offset 256 in low page */ + size = MLX5_EEPROM_PAGE_LENGTH - offset; + + query.size = size; + query.offset = offset; + + return mlx5_query_mcia(dev, &query, data); +} +EXPORT_SYMBOL_GPL(mlx5_query_module_eeprom); + +int mlx5_query_module_eeprom_by_page(struct mlx5_core_dev *dev, + struct mlx5_module_eeprom_query_params *params, + u8 *data) +{ + int err; + + err = mlx5_query_module_num(dev, ¶ms->module_number); + if (err) + return err; + + if (params->i2c_address != MLX5_I2C_ADDR_HIGH && + params->i2c_address != MLX5_I2C_ADDR_LOW) { + mlx5_core_err(dev, "I2C address not recognized: 0x%x\n", params->i2c_address); + return -EINVAL; + } + + return mlx5_query_mcia(dev, params, data); +} +EXPORT_SYMBOL_GPL(mlx5_query_module_eeprom_by_page); + +static int mlx5_query_port_pvlc(struct mlx5_core_dev *dev, u32 *pvlc, + int pvlc_size, u8 local_port) +{ + u32 in[MLX5_ST_SZ_DW(pvlc_reg)] = {0}; + + MLX5_SET(pvlc_reg, in, local_port, local_port); + return mlx5_core_access_reg(dev, in, sizeof(in), pvlc, + pvlc_size, MLX5_REG_PVLC, 0, 0); +} + +int mlx5_query_port_vl_hw_cap(struct mlx5_core_dev *dev, + u8 *vl_hw_cap, u8 local_port) +{ + u32 out[MLX5_ST_SZ_DW(pvlc_reg)]; + int err; + + err = mlx5_query_port_pvlc(dev, out, sizeof(out), local_port); + if (err) + return err; + + *vl_hw_cap = MLX5_GET(pvlc_reg, out, vl_hw_cap); + + return 0; +} +EXPORT_SYMBOL_GPL(mlx5_query_port_vl_hw_cap); + +static int mlx5_query_pfcc_reg(struct mlx5_core_dev *dev, u32 *out, + u32 out_size) +{ + u32 in[MLX5_ST_SZ_DW(pfcc_reg)] = {0}; + + MLX5_SET(pfcc_reg, in, local_port, 1); + + return mlx5_core_access_reg(dev, in, sizeof(in), out, + out_size, MLX5_REG_PFCC, 0, 0); +} + +int mlx5_set_port_pause(struct mlx5_core_dev *dev, u32 rx_pause, u32 tx_pause) +{ + u32 in[MLX5_ST_SZ_DW(pfcc_reg)] = {0}; + u32 out[MLX5_ST_SZ_DW(pfcc_reg)]; + + MLX5_SET(pfcc_reg, in, local_port, 1); + MLX5_SET(pfcc_reg, in, pptx, tx_pause); + MLX5_SET(pfcc_reg, in, pprx, rx_pause); + + return mlx5_core_access_reg(dev, in, sizeof(in), out, + sizeof(out), MLX5_REG_PFCC, 0, 1); +} +EXPORT_SYMBOL_GPL(mlx5_set_port_pause); + +int mlx5_query_port_pause(struct mlx5_core_dev *dev, + u32 *rx_pause, u32 *tx_pause) +{ + u32 out[MLX5_ST_SZ_DW(pfcc_reg)]; + int err; + + err = mlx5_query_pfcc_reg(dev, out, sizeof(out)); + if (err) + return err; + + if (rx_pause) + *rx_pause = MLX5_GET(pfcc_reg, out, pprx); + + if (tx_pause) + *tx_pause = MLX5_GET(pfcc_reg, out, pptx); + + return 0; +} +EXPORT_SYMBOL_GPL(mlx5_query_port_pause); + +int mlx5_set_port_stall_watermark(struct mlx5_core_dev *dev, + u16 stall_critical_watermark, + u16 stall_minor_watermark) +{ + u32 in[MLX5_ST_SZ_DW(pfcc_reg)] = {0}; + u32 out[MLX5_ST_SZ_DW(pfcc_reg)]; + + MLX5_SET(pfcc_reg, in, local_port, 1); + MLX5_SET(pfcc_reg, in, pptx_mask_n, 1); + MLX5_SET(pfcc_reg, in, pprx_mask_n, 1); + MLX5_SET(pfcc_reg, in, ppan_mask_n, 1); + MLX5_SET(pfcc_reg, in, critical_stall_mask, 1); + MLX5_SET(pfcc_reg, in, minor_stall_mask, 1); + MLX5_SET(pfcc_reg, in, device_stall_critical_watermark, + stall_critical_watermark); + MLX5_SET(pfcc_reg, in, device_stall_minor_watermark, stall_minor_watermark); + + return mlx5_core_access_reg(dev, in, sizeof(in), out, + sizeof(out), MLX5_REG_PFCC, 0, 1); +} + +int mlx5_query_port_stall_watermark(struct mlx5_core_dev *dev, + u16 *stall_critical_watermark, + u16 *stall_minor_watermark) +{ + u32 out[MLX5_ST_SZ_DW(pfcc_reg)]; + int err; + + err = mlx5_query_pfcc_reg(dev, out, sizeof(out)); + if (err) + return err; + + if (stall_critical_watermark) + *stall_critical_watermark = MLX5_GET(pfcc_reg, out, + device_stall_critical_watermark); + + if (stall_minor_watermark) + *stall_minor_watermark = MLX5_GET(pfcc_reg, out, + device_stall_minor_watermark); + + return 0; +} + +int mlx5_set_port_pfc(struct mlx5_core_dev *dev, u8 pfc_en_tx, u8 pfc_en_rx) +{ + u32 in[MLX5_ST_SZ_DW(pfcc_reg)] = {0}; + u32 out[MLX5_ST_SZ_DW(pfcc_reg)]; + + MLX5_SET(pfcc_reg, in, local_port, 1); + MLX5_SET(pfcc_reg, in, pfctx, pfc_en_tx); + MLX5_SET(pfcc_reg, in, pfcrx, pfc_en_rx); + MLX5_SET_TO_ONES(pfcc_reg, in, prio_mask_tx); + MLX5_SET_TO_ONES(pfcc_reg, in, prio_mask_rx); + + return mlx5_core_access_reg(dev, in, sizeof(in), out, + sizeof(out), MLX5_REG_PFCC, 0, 1); +} +EXPORT_SYMBOL_GPL(mlx5_set_port_pfc); + +int mlx5_query_port_pfc(struct mlx5_core_dev *dev, u8 *pfc_en_tx, u8 *pfc_en_rx) +{ + u32 out[MLX5_ST_SZ_DW(pfcc_reg)]; + int err; + + err = mlx5_query_pfcc_reg(dev, out, sizeof(out)); + if (err) + return err; + + if (pfc_en_tx) + *pfc_en_tx = MLX5_GET(pfcc_reg, out, pfctx); + + if (pfc_en_rx) + *pfc_en_rx = MLX5_GET(pfcc_reg, out, pfcrx); + + return 0; +} +EXPORT_SYMBOL_GPL(mlx5_query_port_pfc); + +int mlx5_max_tc(struct mlx5_core_dev *mdev) +{ + u8 num_tc = MLX5_CAP_GEN(mdev, max_tc) ? : 8; + + return num_tc - 1; +} + +int mlx5_query_port_dcbx_param(struct mlx5_core_dev *mdev, u32 *out) +{ + u32 in[MLX5_ST_SZ_DW(dcbx_param)] = {0}; + + MLX5_SET(dcbx_param, in, port_number, 1); + + return mlx5_core_access_reg(mdev, in, sizeof(in), out, + sizeof(in), MLX5_REG_DCBX_PARAM, 0, 0); +} + +int mlx5_set_port_dcbx_param(struct mlx5_core_dev *mdev, u32 *in) +{ + u32 out[MLX5_ST_SZ_DW(dcbx_param)]; + + MLX5_SET(dcbx_param, in, port_number, 1); + + return mlx5_core_access_reg(mdev, in, sizeof(out), out, + sizeof(out), MLX5_REG_DCBX_PARAM, 0, 1); +} + +int mlx5_set_port_prio_tc(struct mlx5_core_dev *mdev, u8 *prio_tc) +{ + u32 in[MLX5_ST_SZ_DW(qtct_reg)] = {0}; + u32 out[MLX5_ST_SZ_DW(qtct_reg)]; + int err; + int i; + + for (i = 0; i < 8; i++) { + if (prio_tc[i] > mlx5_max_tc(mdev)) + return -EINVAL; + + MLX5_SET(qtct_reg, in, prio, i); + MLX5_SET(qtct_reg, in, tclass, prio_tc[i]); + + err = mlx5_core_access_reg(mdev, in, sizeof(in), out, + sizeof(out), MLX5_REG_QTCT, 0, 1); + if (err) + return err; + } + + return 0; +} +EXPORT_SYMBOL_GPL(mlx5_set_port_prio_tc); + +int mlx5_query_port_prio_tc(struct mlx5_core_dev *mdev, + u8 prio, u8 *tc) +{ + u32 in[MLX5_ST_SZ_DW(qtct_reg)]; + u32 out[MLX5_ST_SZ_DW(qtct_reg)]; + int err; + + memset(in, 0, sizeof(in)); + memset(out, 0, sizeof(out)); + + MLX5_SET(qtct_reg, in, port_number, 1); + MLX5_SET(qtct_reg, in, prio, prio); + + err = mlx5_core_access_reg(mdev, in, sizeof(in), out, + sizeof(out), MLX5_REG_QTCT, 0, 0); + if (!err) + *tc = MLX5_GET(qtct_reg, out, tclass); + + return err; +} +EXPORT_SYMBOL_GPL(mlx5_query_port_prio_tc); + +static int mlx5_set_port_qetcr_reg(struct mlx5_core_dev *mdev, u32 *in, + int inlen) +{ + u32 out[MLX5_ST_SZ_DW(qetc_reg)]; + + if (!MLX5_CAP_GEN(mdev, ets)) + return -EOPNOTSUPP; + + return mlx5_core_access_reg(mdev, in, inlen, out, sizeof(out), + MLX5_REG_QETCR, 0, 1); +} + +static int mlx5_query_port_qetcr_reg(struct mlx5_core_dev *mdev, u32 *out, + int outlen) +{ + u32 in[MLX5_ST_SZ_DW(qetc_reg)]; + + if (!MLX5_CAP_GEN(mdev, ets)) + return -EOPNOTSUPP; + + memset(in, 0, sizeof(in)); + return mlx5_core_access_reg(mdev, in, sizeof(in), out, outlen, + MLX5_REG_QETCR, 0, 0); +} + +int mlx5_set_port_tc_group(struct mlx5_core_dev *mdev, u8 *tc_group) +{ + u32 in[MLX5_ST_SZ_DW(qetc_reg)] = {0}; + int i; + + for (i = 0; i <= mlx5_max_tc(mdev); i++) { + MLX5_SET(qetc_reg, in, tc_configuration[i].g, 1); + MLX5_SET(qetc_reg, in, tc_configuration[i].group, tc_group[i]); + } + + return mlx5_set_port_qetcr_reg(mdev, in, sizeof(in)); +} +EXPORT_SYMBOL_GPL(mlx5_set_port_tc_group); + +int mlx5_query_port_tc_group(struct mlx5_core_dev *mdev, + u8 tc, u8 *tc_group) +{ + u32 out[MLX5_ST_SZ_DW(qetc_reg)]; + void *ets_tcn_conf; + int err; + + err = mlx5_query_port_qetcr_reg(mdev, out, sizeof(out)); + if (err) + return err; + + ets_tcn_conf = MLX5_ADDR_OF(qetc_reg, out, + tc_configuration[tc]); + + *tc_group = MLX5_GET(ets_tcn_config_reg, ets_tcn_conf, + group); + + return 0; +} +EXPORT_SYMBOL_GPL(mlx5_query_port_tc_group); + +int mlx5_set_port_tc_bw_alloc(struct mlx5_core_dev *mdev, u8 *tc_bw) +{ + u32 in[MLX5_ST_SZ_DW(qetc_reg)] = {0}; + int i; + + for (i = 0; i <= mlx5_max_tc(mdev); i++) { + MLX5_SET(qetc_reg, in, tc_configuration[i].b, 1); + MLX5_SET(qetc_reg, in, tc_configuration[i].bw_allocation, tc_bw[i]); + } + + return mlx5_set_port_qetcr_reg(mdev, in, sizeof(in)); +} +EXPORT_SYMBOL_GPL(mlx5_set_port_tc_bw_alloc); + +int mlx5_query_port_tc_bw_alloc(struct mlx5_core_dev *mdev, + u8 tc, u8 *bw_pct) +{ + u32 out[MLX5_ST_SZ_DW(qetc_reg)]; + void *ets_tcn_conf; + int err; + + err = mlx5_query_port_qetcr_reg(mdev, out, sizeof(out)); + if (err) + return err; + + ets_tcn_conf = MLX5_ADDR_OF(qetc_reg, out, + tc_configuration[tc]); + + *bw_pct = MLX5_GET(ets_tcn_config_reg, ets_tcn_conf, + bw_allocation); + + return 0; +} +EXPORT_SYMBOL_GPL(mlx5_query_port_tc_bw_alloc); + +int mlx5_modify_port_ets_rate_limit(struct mlx5_core_dev *mdev, + u8 *max_bw_value, + u8 *max_bw_units) +{ + u32 in[MLX5_ST_SZ_DW(qetc_reg)] = {0}; + void *ets_tcn_conf; + int i; + + MLX5_SET(qetc_reg, in, port_number, 1); + + for (i = 0; i <= mlx5_max_tc(mdev); i++) { + ets_tcn_conf = MLX5_ADDR_OF(qetc_reg, in, tc_configuration[i]); + + MLX5_SET(ets_tcn_config_reg, ets_tcn_conf, r, 1); + MLX5_SET(ets_tcn_config_reg, ets_tcn_conf, max_bw_units, + max_bw_units[i]); + MLX5_SET(ets_tcn_config_reg, ets_tcn_conf, max_bw_value, + max_bw_value[i]); + } + + return mlx5_set_port_qetcr_reg(mdev, in, sizeof(in)); +} +EXPORT_SYMBOL_GPL(mlx5_modify_port_ets_rate_limit); + +int mlx5_query_port_ets_rate_limit(struct mlx5_core_dev *mdev, + u8 *max_bw_value, + u8 *max_bw_units) +{ + u32 out[MLX5_ST_SZ_DW(qetc_reg)]; + void *ets_tcn_conf; + int err; + int i; + + err = mlx5_query_port_qetcr_reg(mdev, out, sizeof(out)); + if (err) + return err; + + for (i = 0; i <= mlx5_max_tc(mdev); i++) { + ets_tcn_conf = MLX5_ADDR_OF(qetc_reg, out, tc_configuration[i]); + + max_bw_value[i] = MLX5_GET(ets_tcn_config_reg, ets_tcn_conf, + max_bw_value); + max_bw_units[i] = MLX5_GET(ets_tcn_config_reg, ets_tcn_conf, + max_bw_units); + } + + return 0; +} +EXPORT_SYMBOL_GPL(mlx5_query_port_ets_rate_limit); + +int mlx5_set_port_wol(struct mlx5_core_dev *mdev, u8 wol_mode) +{ + u32 in[MLX5_ST_SZ_DW(set_wol_rol_in)] = {}; + + MLX5_SET(set_wol_rol_in, in, opcode, MLX5_CMD_OP_SET_WOL_ROL); + MLX5_SET(set_wol_rol_in, in, wol_mode_valid, 1); + MLX5_SET(set_wol_rol_in, in, wol_mode, wol_mode); + return mlx5_cmd_exec_in(mdev, set_wol_rol, in); +} +EXPORT_SYMBOL_GPL(mlx5_set_port_wol); + +int mlx5_query_port_wol(struct mlx5_core_dev *mdev, u8 *wol_mode) +{ + u32 out[MLX5_ST_SZ_DW(query_wol_rol_out)] = {}; + u32 in[MLX5_ST_SZ_DW(query_wol_rol_in)] = {}; + int err; + + MLX5_SET(query_wol_rol_in, in, opcode, MLX5_CMD_OP_QUERY_WOL_ROL); + err = mlx5_cmd_exec_inout(mdev, query_wol_rol, in, out); + if (!err) + *wol_mode = MLX5_GET(query_wol_rol_out, out, wol_mode); + + return err; +} +EXPORT_SYMBOL_GPL(mlx5_query_port_wol); + +int mlx5_query_ports_check(struct mlx5_core_dev *mdev, u32 *out, int outlen) +{ + u32 in[MLX5_ST_SZ_DW(pcmr_reg)] = {0}; + + MLX5_SET(pcmr_reg, in, local_port, 1); + return mlx5_core_access_reg(mdev, in, sizeof(in), out, + outlen, MLX5_REG_PCMR, 0, 0); +} + +int mlx5_set_ports_check(struct mlx5_core_dev *mdev, u32 *in, int inlen) +{ + u32 out[MLX5_ST_SZ_DW(pcmr_reg)]; + + return mlx5_core_access_reg(mdev, in, inlen, out, + sizeof(out), MLX5_REG_PCMR, 0, 1); +} + +int mlx5_set_port_fcs(struct mlx5_core_dev *mdev, u8 enable) +{ + u32 in[MLX5_ST_SZ_DW(pcmr_reg)] = {0}; + int err; + + err = mlx5_query_ports_check(mdev, in, sizeof(in)); + if (err) + return err; + MLX5_SET(pcmr_reg, in, local_port, 1); + MLX5_SET(pcmr_reg, in, fcs_chk, enable); + return mlx5_set_ports_check(mdev, in, sizeof(in)); +} + +void mlx5_query_port_fcs(struct mlx5_core_dev *mdev, bool *supported, + bool *enabled) +{ + u32 out[MLX5_ST_SZ_DW(pcmr_reg)]; + /* Default values for FW which do not support MLX5_REG_PCMR */ + *supported = false; + *enabled = true; + + if (!MLX5_CAP_GEN(mdev, ports_check)) + return; + + if (mlx5_query_ports_check(mdev, out, sizeof(out))) + return; + + *supported = !!(MLX5_GET(pcmr_reg, out, fcs_cap)); + *enabled = !!(MLX5_GET(pcmr_reg, out, fcs_chk)); +} + +int mlx5_query_mtpps(struct mlx5_core_dev *mdev, u32 *mtpps, u32 mtpps_size) +{ + u32 in[MLX5_ST_SZ_DW(mtpps_reg)] = {0}; + + return mlx5_core_access_reg(mdev, in, sizeof(in), mtpps, + mtpps_size, MLX5_REG_MTPPS, 0, 0); +} + +int mlx5_set_mtpps(struct mlx5_core_dev *mdev, u32 *mtpps, u32 mtpps_size) +{ + u32 out[MLX5_ST_SZ_DW(mtpps_reg)] = {0}; + + return mlx5_core_access_reg(mdev, mtpps, mtpps_size, out, + sizeof(out), MLX5_REG_MTPPS, 0, 1); +} + +int mlx5_query_mtppse(struct mlx5_core_dev *mdev, u8 pin, u8 *arm, u8 *mode) +{ + u32 out[MLX5_ST_SZ_DW(mtppse_reg)] = {0}; + u32 in[MLX5_ST_SZ_DW(mtppse_reg)] = {0}; + int err = 0; + + MLX5_SET(mtppse_reg, in, pin, pin); + + err = mlx5_core_access_reg(mdev, in, sizeof(in), out, + sizeof(out), MLX5_REG_MTPPSE, 0, 0); + if (err) + return err; + + *arm = MLX5_GET(mtppse_reg, in, event_arm); + *mode = MLX5_GET(mtppse_reg, in, event_generation_mode); + + return err; +} + +int mlx5_set_mtppse(struct mlx5_core_dev *mdev, u8 pin, u8 arm, u8 mode) +{ + u32 out[MLX5_ST_SZ_DW(mtppse_reg)] = {0}; + u32 in[MLX5_ST_SZ_DW(mtppse_reg)] = {0}; + + MLX5_SET(mtppse_reg, in, pin, pin); + MLX5_SET(mtppse_reg, in, event_arm, arm); + MLX5_SET(mtppse_reg, in, event_generation_mode, mode); + + return mlx5_core_access_reg(mdev, in, sizeof(in), out, + sizeof(out), MLX5_REG_MTPPSE, 0, 1); +} + +int mlx5_set_trust_state(struct mlx5_core_dev *mdev, u8 trust_state) +{ + u32 out[MLX5_ST_SZ_DW(qpts_reg)] = {}; + u32 in[MLX5_ST_SZ_DW(qpts_reg)] = {}; + int err; + + MLX5_SET(qpts_reg, in, local_port, 1); + MLX5_SET(qpts_reg, in, trust_state, trust_state); + + err = mlx5_core_access_reg(mdev, in, sizeof(in), out, + sizeof(out), MLX5_REG_QPTS, 0, 1); + return err; +} + +int mlx5_query_trust_state(struct mlx5_core_dev *mdev, u8 *trust_state) +{ + u32 out[MLX5_ST_SZ_DW(qpts_reg)] = {}; + u32 in[MLX5_ST_SZ_DW(qpts_reg)] = {}; + int err; + + MLX5_SET(qpts_reg, in, local_port, 1); + + err = mlx5_core_access_reg(mdev, in, sizeof(in), out, + sizeof(out), MLX5_REG_QPTS, 0, 0); + if (!err) + *trust_state = MLX5_GET(qpts_reg, out, trust_state); + + return err; +} + +int mlx5_set_dscp2prio(struct mlx5_core_dev *mdev, u8 dscp, u8 prio) +{ + int sz = MLX5_ST_SZ_BYTES(qpdpm_reg); + void *qpdpm_dscp; + void *out; + void *in; + int err; + + in = kzalloc(sz, GFP_KERNEL); + out = kzalloc(sz, GFP_KERNEL); + if (!in || !out) { + err = -ENOMEM; + goto out; + } + + MLX5_SET(qpdpm_reg, in, local_port, 1); + err = mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_QPDPM, 0, 0); + if (err) + goto out; + + memcpy(in, out, sz); + MLX5_SET(qpdpm_reg, in, local_port, 1); + + /* Update the corresponding dscp entry */ + qpdpm_dscp = MLX5_ADDR_OF(qpdpm_reg, in, dscp[dscp]); + MLX5_SET16(qpdpm_dscp_reg, qpdpm_dscp, prio, prio); + MLX5_SET16(qpdpm_dscp_reg, qpdpm_dscp, e, 1); + err = mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_QPDPM, 0, 1); + +out: + kfree(in); + kfree(out); + return err; +} + +/* dscp2prio[i]: priority that dscp i mapped to */ +#define MLX5E_SUPPORTED_DSCP 64 +int mlx5_query_dscp2prio(struct mlx5_core_dev *mdev, u8 *dscp2prio) +{ + int sz = MLX5_ST_SZ_BYTES(qpdpm_reg); + void *qpdpm_dscp; + void *out; + void *in; + int err; + int i; + + in = kzalloc(sz, GFP_KERNEL); + out = kzalloc(sz, GFP_KERNEL); + if (!in || !out) { + err = -ENOMEM; + goto out; + } + + MLX5_SET(qpdpm_reg, in, local_port, 1); + err = mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_QPDPM, 0, 0); + if (err) + goto out; + + for (i = 0; i < (MLX5E_SUPPORTED_DSCP); i++) { + qpdpm_dscp = MLX5_ADDR_OF(qpdpm_reg, out, dscp[i]); + dscp2prio[i] = MLX5_GET16(qpdpm_dscp_reg, qpdpm_dscp, prio); + } + +out: + kfree(in); + kfree(out); + return err; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/qos.c new file mode 100644 index 000000000..8bce730b5 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/qos.c @@ -0,0 +1,84 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2020, Mellanox Technologies inc. All rights reserved. */ + +#include "qos.h" + +#define MLX5_QOS_DEFAULT_DWRR_UID 0 + +bool mlx5_qos_is_supported(struct mlx5_core_dev *mdev) +{ + if (!MLX5_CAP_GEN(mdev, qos)) + return false; + if (!MLX5_CAP_QOS(mdev, nic_sq_scheduling)) + return false; + if (!MLX5_CAP_QOS(mdev, nic_bw_share)) + return false; + if (!MLX5_CAP_QOS(mdev, nic_rate_limit)) + return false; + return true; +} + +int mlx5_qos_max_leaf_nodes(struct mlx5_core_dev *mdev) +{ + return 1 << MLX5_CAP_QOS(mdev, log_max_qos_nic_queue_group); +} + +int mlx5_qos_create_leaf_node(struct mlx5_core_dev *mdev, u32 parent_id, + u32 bw_share, u32 max_avg_bw, u32 *id) +{ + u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {0}; + + MLX5_SET(scheduling_context, sched_ctx, parent_element_id, parent_id); + MLX5_SET(scheduling_context, sched_ctx, element_type, + SCHEDULING_CONTEXT_ELEMENT_TYPE_QUEUE_GROUP); + MLX5_SET(scheduling_context, sched_ctx, bw_share, bw_share); + MLX5_SET(scheduling_context, sched_ctx, max_average_bw, max_avg_bw); + + return mlx5_create_scheduling_element_cmd(mdev, SCHEDULING_HIERARCHY_NIC, + sched_ctx, id); +} + +int mlx5_qos_create_inner_node(struct mlx5_core_dev *mdev, u32 parent_id, + u32 bw_share, u32 max_avg_bw, u32 *id) +{ + u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {0}; + void *attr; + + MLX5_SET(scheduling_context, sched_ctx, parent_element_id, parent_id); + MLX5_SET(scheduling_context, sched_ctx, element_type, + SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR); + MLX5_SET(scheduling_context, sched_ctx, bw_share, bw_share); + MLX5_SET(scheduling_context, sched_ctx, max_average_bw, max_avg_bw); + + attr = MLX5_ADDR_OF(scheduling_context, sched_ctx, element_attributes); + MLX5_SET(tsar_element, attr, tsar_type, TSAR_ELEMENT_TSAR_TYPE_DWRR); + + return mlx5_create_scheduling_element_cmd(mdev, SCHEDULING_HIERARCHY_NIC, + sched_ctx, id); +} + +int mlx5_qos_create_root_node(struct mlx5_core_dev *mdev, u32 *id) +{ + return mlx5_qos_create_inner_node(mdev, MLX5_QOS_DEFAULT_DWRR_UID, 0, 0, id); +} + +int mlx5_qos_update_node(struct mlx5_core_dev *mdev, + u32 bw_share, u32 max_avg_bw, u32 id) +{ + u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {0}; + u32 bitmask = 0; + + MLX5_SET(scheduling_context, sched_ctx, bw_share, bw_share); + MLX5_SET(scheduling_context, sched_ctx, max_average_bw, max_avg_bw); + + bitmask |= MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_BW_SHARE; + bitmask |= MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_MAX_AVERAGE_BW; + + return mlx5_modify_scheduling_element_cmd(mdev, SCHEDULING_HIERARCHY_NIC, + sched_ctx, id, bitmask); +} + +int mlx5_qos_destroy_node(struct mlx5_core_dev *mdev, u32 id) +{ + return mlx5_destroy_scheduling_element_cmd(mdev, SCHEDULING_HIERARCHY_NIC, id); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/qos.h b/drivers/net/ethernet/mellanox/mlx5/core/qos.h new file mode 100644 index 000000000..624ce822b --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/qos.h @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2020, Mellanox Technologies inc. All rights reserved. */ + +#ifndef __MLX5_QOS_H +#define __MLX5_QOS_H + +#include "mlx5_core.h" + +#define MLX5_DEBUG_QOS_MASK BIT(4) + +#define qos_err(mdev, fmt, ...) \ + mlx5_core_err(mdev, "QoS: " fmt, ##__VA_ARGS__) +#define qos_warn(mdev, fmt, ...) \ + mlx5_core_warn(mdev, "QoS: " fmt, ##__VA_ARGS__) +#define qos_dbg(mdev, fmt, ...) \ + mlx5_core_dbg_mask(mdev, MLX5_DEBUG_QOS_MASK, "QoS: " fmt, ##__VA_ARGS__) + +bool mlx5_qos_is_supported(struct mlx5_core_dev *mdev); +int mlx5_qos_max_leaf_nodes(struct mlx5_core_dev *mdev); + +int mlx5_qos_create_leaf_node(struct mlx5_core_dev *mdev, u32 parent_id, + u32 bw_share, u32 max_avg_bw, u32 *id); +int mlx5_qos_create_inner_node(struct mlx5_core_dev *mdev, u32 parent_id, + u32 bw_share, u32 max_avg_bw, u32 *id); +int mlx5_qos_create_root_node(struct mlx5_core_dev *mdev, u32 *id); +int mlx5_qos_update_node(struct mlx5_core_dev *mdev, u32 bw_share, + u32 max_avg_bw, u32 id); +int mlx5_qos_destroy_node(struct mlx5_core_dev *mdev, u32 id); + +#endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/rdma.c b/drivers/net/ethernet/mellanox/mlx5/core/rdma.c new file mode 100644 index 000000000..540cf05f6 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/rdma.c @@ -0,0 +1,186 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2019 Mellanox Technologies */ + +#include <linux/mlx5/vport.h> +#include <rdma/ib_verbs.h> +#include <net/addrconf.h> + +#include "lib/mlx5.h" +#include "eswitch.h" +#include "fs_core.h" +#include "rdma.h" + +static void mlx5_rdma_disable_roce_steering(struct mlx5_core_dev *dev) +{ + struct mlx5_core_roce *roce = &dev->priv.roce; + + mlx5_del_flow_rules(roce->allow_rule); + mlx5_destroy_flow_group(roce->fg); + mlx5_destroy_flow_table(roce->ft); +} + +static int mlx5_rdma_enable_roce_steering(struct mlx5_core_dev *dev) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5_core_roce *roce = &dev->priv.roce; + struct mlx5_flow_handle *flow_rule = NULL; + struct mlx5_flow_table_attr ft_attr = {}; + struct mlx5_flow_namespace *ns = NULL; + struct mlx5_flow_act flow_act = {}; + struct mlx5_flow_spec *spec; + struct mlx5_flow_table *ft; + struct mlx5_flow_group *fg; + void *match_criteria; + u32 *flow_group_in; + void *misc; + int err; + + if (!(MLX5_CAP_FLOWTABLE_RDMA_RX(dev, ft_support) && + MLX5_CAP_FLOWTABLE_RDMA_RX(dev, table_miss_action_domain))) + return -EOPNOTSUPP; + + flow_group_in = kvzalloc(inlen, GFP_KERNEL); + if (!flow_group_in) + return -ENOMEM; + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) { + kvfree(flow_group_in); + return -ENOMEM; + } + + ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_RDMA_RX_KERNEL); + if (!ns) { + mlx5_core_err(dev, "Failed to get RDMA RX namespace"); + err = -EOPNOTSUPP; + goto free; + } + + ft_attr.max_fte = 1; + ft = mlx5_create_flow_table(ns, &ft_attr); + if (IS_ERR(ft)) { + mlx5_core_err(dev, "Failed to create RDMA RX flow table"); + err = PTR_ERR(ft); + goto free; + } + + MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, + MLX5_MATCH_MISC_PARAMETERS); + match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, + match_criteria); + MLX5_SET_TO_ONES(fte_match_param, match_criteria, + misc_parameters.source_port); + + fg = mlx5_create_flow_group(ft, flow_group_in); + if (IS_ERR(fg)) { + err = PTR_ERR(fg); + mlx5_core_err(dev, "Failed to create RDMA RX flow group err(%d)\n", err); + goto destroy_flow_table; + } + + spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS; + misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, + misc_parameters); + MLX5_SET(fte_match_set_misc, misc, source_port, + dev->priv.eswitch->manager_vport); + misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + misc_parameters); + MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port); + + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_ALLOW; + flow_rule = mlx5_add_flow_rules(ft, spec, &flow_act, NULL, 0); + if (IS_ERR(flow_rule)) { + err = PTR_ERR(flow_rule); + mlx5_core_err(dev, "Failed to add RoCE allow rule, err=%d\n", + err); + goto destroy_flow_group; + } + + kvfree(spec); + kvfree(flow_group_in); + roce->ft = ft; + roce->fg = fg; + roce->allow_rule = flow_rule; + + return 0; + +destroy_flow_group: + mlx5_destroy_flow_group(fg); +destroy_flow_table: + mlx5_destroy_flow_table(ft); +free: + kvfree(spec); + kvfree(flow_group_in); + return err; +} + +static void mlx5_rdma_del_roce_addr(struct mlx5_core_dev *dev) +{ + mlx5_core_roce_gid_set(dev, 0, 0, 0, + NULL, NULL, false, 0, 1); +} + +static void mlx5_rdma_make_default_gid(struct mlx5_core_dev *dev, union ib_gid *gid) +{ + u8 hw_id[ETH_ALEN]; + + mlx5_query_mac_address(dev, hw_id); + gid->global.subnet_prefix = cpu_to_be64(0xfe80000000000000LL); + addrconf_addr_eui48(&gid->raw[8], hw_id); +} + +static int mlx5_rdma_add_roce_addr(struct mlx5_core_dev *dev) +{ + union ib_gid gid; + u8 mac[ETH_ALEN]; + + mlx5_rdma_make_default_gid(dev, &gid); + return mlx5_core_roce_gid_set(dev, 0, + MLX5_ROCE_VERSION_1, + 0, gid.raw, mac, + false, 0, 1); +} + +void mlx5_rdma_disable_roce(struct mlx5_core_dev *dev) +{ + struct mlx5_core_roce *roce = &dev->priv.roce; + + if (!roce->ft) + return; + + mlx5_rdma_disable_roce_steering(dev); + mlx5_rdma_del_roce_addr(dev); + mlx5_nic_vport_disable_roce(dev); +} + +void mlx5_rdma_enable_roce(struct mlx5_core_dev *dev) +{ + int err; + + if (!MLX5_CAP_GEN(dev, roce)) + return; + + err = mlx5_nic_vport_enable_roce(dev); + if (err) { + mlx5_core_err(dev, "Failed to enable RoCE: %d\n", err); + return; + } + + err = mlx5_rdma_add_roce_addr(dev); + if (err) { + mlx5_core_err(dev, "Failed to add RoCE address: %d\n", err); + goto disable_roce; + } + + err = mlx5_rdma_enable_roce_steering(dev); + if (err) { + mlx5_core_err(dev, "Failed to enable RoCE steering: %d\n", err); + goto del_roce_addr; + } + + return; + +del_roce_addr: + mlx5_rdma_del_roce_addr(dev); +disable_roce: + mlx5_nic_vport_disable_roce(dev); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/rdma.h b/drivers/net/ethernet/mellanox/mlx5/core/rdma.h new file mode 100644 index 000000000..750cff2a7 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/rdma.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2019 Mellanox Technologies. */ + +#ifndef __MLX5_RDMA_H__ +#define __MLX5_RDMA_H__ + +#include "mlx5_core.h" + +#ifdef CONFIG_MLX5_ESWITCH + +void mlx5_rdma_enable_roce(struct mlx5_core_dev *dev); +void mlx5_rdma_disable_roce(struct mlx5_core_dev *dev); + +#else /* CONFIG_MLX5_ESWITCH */ + +static inline void mlx5_rdma_enable_roce(struct mlx5_core_dev *dev) {} +static inline void mlx5_rdma_disable_roce(struct mlx5_core_dev *dev) {} + +#endif /* CONFIG_MLX5_ESWITCH */ +#endif /* __MLX5_RDMA_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/rl.c b/drivers/net/ethernet/mellanox/mlx5/core/rl.c new file mode 100644 index 000000000..9f8b4005f --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/rl.c @@ -0,0 +1,398 @@ +/* + * Copyright (c) 2013-2016, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/kernel.h> +#include <linux/mlx5/driver.h> +#include "mlx5_core.h" + +/* Scheduling element fw management */ +int mlx5_create_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy, + void *ctx, u32 *element_id) +{ + u32 out[MLX5_ST_SZ_DW(create_scheduling_element_in)] = {}; + u32 in[MLX5_ST_SZ_DW(create_scheduling_element_in)] = {}; + void *schedc; + int err; + + schedc = MLX5_ADDR_OF(create_scheduling_element_in, in, + scheduling_context); + MLX5_SET(create_scheduling_element_in, in, opcode, + MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT); + MLX5_SET(create_scheduling_element_in, in, scheduling_hierarchy, + hierarchy); + memcpy(schedc, ctx, MLX5_ST_SZ_BYTES(scheduling_context)); + + err = mlx5_cmd_exec_inout(dev, create_scheduling_element, in, out); + if (err) + return err; + + *element_id = MLX5_GET(create_scheduling_element_out, out, + scheduling_element_id); + return 0; +} + +int mlx5_modify_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy, + void *ctx, u32 element_id, + u32 modify_bitmask) +{ + u32 in[MLX5_ST_SZ_DW(modify_scheduling_element_in)] = {}; + void *schedc; + + schedc = MLX5_ADDR_OF(modify_scheduling_element_in, in, + scheduling_context); + MLX5_SET(modify_scheduling_element_in, in, opcode, + MLX5_CMD_OP_MODIFY_SCHEDULING_ELEMENT); + MLX5_SET(modify_scheduling_element_in, in, scheduling_element_id, + element_id); + MLX5_SET(modify_scheduling_element_in, in, modify_bitmask, + modify_bitmask); + MLX5_SET(modify_scheduling_element_in, in, scheduling_hierarchy, + hierarchy); + memcpy(schedc, ctx, MLX5_ST_SZ_BYTES(scheduling_context)); + + return mlx5_cmd_exec_in(dev, modify_scheduling_element, in); +} + +int mlx5_destroy_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy, + u32 element_id) +{ + u32 in[MLX5_ST_SZ_DW(destroy_scheduling_element_in)] = {}; + + MLX5_SET(destroy_scheduling_element_in, in, opcode, + MLX5_CMD_OP_DESTROY_SCHEDULING_ELEMENT); + MLX5_SET(destroy_scheduling_element_in, in, scheduling_element_id, + element_id); + MLX5_SET(destroy_scheduling_element_in, in, scheduling_hierarchy, + hierarchy); + + return mlx5_cmd_exec_in(dev, destroy_scheduling_element, in); +} + +static bool mlx5_rl_are_equal_raw(struct mlx5_rl_entry *entry, void *rl_in, + u16 uid) +{ + return (!memcmp(entry->rl_raw, rl_in, sizeof(entry->rl_raw)) && + entry->uid == uid); +} + +/* Finds an entry where we can register the given rate + * If the rate already exists, return the entry where it is registered, + * otherwise return the first available entry. + * If the table is full, return NULL + */ +static struct mlx5_rl_entry *find_rl_entry(struct mlx5_rl_table *table, + void *rl_in, u16 uid, bool dedicated) +{ + struct mlx5_rl_entry *ret_entry = NULL; + bool empty_found = false; + int i; + + lockdep_assert_held(&table->rl_lock); + WARN_ON(!table->rl_entry); + + for (i = 0; i < table->max_size; i++) { + if (dedicated) { + if (!table->rl_entry[i].refcount) + return &table->rl_entry[i]; + continue; + } + + if (table->rl_entry[i].refcount) { + if (table->rl_entry[i].dedicated) + continue; + if (mlx5_rl_are_equal_raw(&table->rl_entry[i], rl_in, + uid)) + return &table->rl_entry[i]; + } else if (!empty_found) { + empty_found = true; + ret_entry = &table->rl_entry[i]; + } + } + + return ret_entry; +} + +static int mlx5_set_pp_rate_limit_cmd(struct mlx5_core_dev *dev, + struct mlx5_rl_entry *entry, bool set) +{ + u32 in[MLX5_ST_SZ_DW(set_pp_rate_limit_in)] = {}; + void *pp_context; + + pp_context = MLX5_ADDR_OF(set_pp_rate_limit_in, in, ctx); + MLX5_SET(set_pp_rate_limit_in, in, opcode, + MLX5_CMD_OP_SET_PP_RATE_LIMIT); + MLX5_SET(set_pp_rate_limit_in, in, uid, entry->uid); + MLX5_SET(set_pp_rate_limit_in, in, rate_limit_index, entry->index); + if (set) + memcpy(pp_context, entry->rl_raw, sizeof(entry->rl_raw)); + return mlx5_cmd_exec_in(dev, set_pp_rate_limit, in); +} + +bool mlx5_rl_is_in_range(struct mlx5_core_dev *dev, u32 rate) +{ + struct mlx5_rl_table *table = &dev->priv.rl_table; + + return (rate <= table->max_rate && rate >= table->min_rate); +} +EXPORT_SYMBOL(mlx5_rl_is_in_range); + +bool mlx5_rl_are_equal(struct mlx5_rate_limit *rl_0, + struct mlx5_rate_limit *rl_1) +{ + return ((rl_0->rate == rl_1->rate) && + (rl_0->max_burst_sz == rl_1->max_burst_sz) && + (rl_0->typical_pkt_sz == rl_1->typical_pkt_sz)); +} +EXPORT_SYMBOL(mlx5_rl_are_equal); + +static int mlx5_rl_table_get(struct mlx5_rl_table *table) +{ + int i; + + lockdep_assert_held(&table->rl_lock); + + if (table->rl_entry) { + table->refcount++; + return 0; + } + + table->rl_entry = kcalloc(table->max_size, sizeof(struct mlx5_rl_entry), + GFP_KERNEL); + if (!table->rl_entry) + return -ENOMEM; + + /* The index represents the index in HW rate limit table + * Index 0 is reserved for unlimited rate + */ + for (i = 0; i < table->max_size; i++) + table->rl_entry[i].index = i + 1; + + table->refcount++; + return 0; +} + +static void mlx5_rl_table_put(struct mlx5_rl_table *table) +{ + lockdep_assert_held(&table->rl_lock); + if (--table->refcount) + return; + + kfree(table->rl_entry); + table->rl_entry = NULL; +} + +static void mlx5_rl_table_free(struct mlx5_core_dev *dev, struct mlx5_rl_table *table) +{ + int i; + + if (!table->rl_entry) + return; + + /* Clear all configured rates */ + for (i = 0; i < table->max_size; i++) + if (table->rl_entry[i].refcount) + mlx5_set_pp_rate_limit_cmd(dev, &table->rl_entry[i], false); + kfree(table->rl_entry); +} + +static void mlx5_rl_entry_get(struct mlx5_rl_entry *entry) +{ + entry->refcount++; +} + +static void +mlx5_rl_entry_put(struct mlx5_core_dev *dev, struct mlx5_rl_entry *entry) +{ + entry->refcount--; + if (!entry->refcount) + mlx5_set_pp_rate_limit_cmd(dev, entry, false); +} + +int mlx5_rl_add_rate_raw(struct mlx5_core_dev *dev, void *rl_in, u16 uid, + bool dedicated_entry, u16 *index) +{ + struct mlx5_rl_table *table = &dev->priv.rl_table; + struct mlx5_rl_entry *entry; + u32 rate; + int err; + + if (!table->max_size) + return -EOPNOTSUPP; + + rate = MLX5_GET(set_pp_rate_limit_context, rl_in, rate_limit); + if (!rate || !mlx5_rl_is_in_range(dev, rate)) { + mlx5_core_err(dev, "Invalid rate: %u, should be %u to %u\n", + rate, table->min_rate, table->max_rate); + return -EINVAL; + } + + mutex_lock(&table->rl_lock); + err = mlx5_rl_table_get(table); + if (err) + goto out; + + entry = find_rl_entry(table, rl_in, uid, dedicated_entry); + if (!entry) { + mlx5_core_err(dev, "Max number of %u rates reached\n", + table->max_size); + err = -ENOSPC; + goto rl_err; + } + if (!entry->refcount) { + /* new rate limit */ + memcpy(entry->rl_raw, rl_in, sizeof(entry->rl_raw)); + entry->uid = uid; + err = mlx5_set_pp_rate_limit_cmd(dev, entry, true); + if (err) { + mlx5_core_err( + dev, + "Failed configuring rate limit(err %d): rate %u, max_burst_sz %u, typical_pkt_sz %u\n", + err, rate, + MLX5_GET(set_pp_rate_limit_context, rl_in, + burst_upper_bound), + MLX5_GET(set_pp_rate_limit_context, rl_in, + typical_packet_size)); + goto rl_err; + } + + entry->dedicated = dedicated_entry; + } + mlx5_rl_entry_get(entry); + *index = entry->index; + mutex_unlock(&table->rl_lock); + return 0; + +rl_err: + mlx5_rl_table_put(table); +out: + mutex_unlock(&table->rl_lock); + return err; +} +EXPORT_SYMBOL(mlx5_rl_add_rate_raw); + +void mlx5_rl_remove_rate_raw(struct mlx5_core_dev *dev, u16 index) +{ + struct mlx5_rl_table *table = &dev->priv.rl_table; + struct mlx5_rl_entry *entry; + + mutex_lock(&table->rl_lock); + entry = &table->rl_entry[index - 1]; + mlx5_rl_entry_put(dev, entry); + mlx5_rl_table_put(table); + mutex_unlock(&table->rl_lock); +} +EXPORT_SYMBOL(mlx5_rl_remove_rate_raw); + +int mlx5_rl_add_rate(struct mlx5_core_dev *dev, u16 *index, + struct mlx5_rate_limit *rl) +{ + u8 rl_raw[MLX5_ST_SZ_BYTES(set_pp_rate_limit_context)] = {}; + + MLX5_SET(set_pp_rate_limit_context, rl_raw, rate_limit, rl->rate); + MLX5_SET(set_pp_rate_limit_context, rl_raw, burst_upper_bound, + rl->max_burst_sz); + MLX5_SET(set_pp_rate_limit_context, rl_raw, typical_packet_size, + rl->typical_pkt_sz); + + return mlx5_rl_add_rate_raw(dev, rl_raw, + MLX5_CAP_QOS(dev, packet_pacing_uid) ? + MLX5_SHARED_RESOURCE_UID : 0, + false, index); +} +EXPORT_SYMBOL(mlx5_rl_add_rate); + +void mlx5_rl_remove_rate(struct mlx5_core_dev *dev, struct mlx5_rate_limit *rl) +{ + u8 rl_raw[MLX5_ST_SZ_BYTES(set_pp_rate_limit_context)] = {}; + struct mlx5_rl_table *table = &dev->priv.rl_table; + struct mlx5_rl_entry *entry = NULL; + + /* 0 is a reserved value for unlimited rate */ + if (rl->rate == 0) + return; + + MLX5_SET(set_pp_rate_limit_context, rl_raw, rate_limit, rl->rate); + MLX5_SET(set_pp_rate_limit_context, rl_raw, burst_upper_bound, + rl->max_burst_sz); + MLX5_SET(set_pp_rate_limit_context, rl_raw, typical_packet_size, + rl->typical_pkt_sz); + + mutex_lock(&table->rl_lock); + entry = find_rl_entry(table, rl_raw, + MLX5_CAP_QOS(dev, packet_pacing_uid) ? + MLX5_SHARED_RESOURCE_UID : 0, false); + if (!entry || !entry->refcount) { + mlx5_core_warn(dev, "Rate %u, max_burst_sz %u typical_pkt_sz %u are not configured\n", + rl->rate, rl->max_burst_sz, rl->typical_pkt_sz); + goto out; + } + mlx5_rl_entry_put(dev, entry); + mlx5_rl_table_put(table); +out: + mutex_unlock(&table->rl_lock); +} +EXPORT_SYMBOL(mlx5_rl_remove_rate); + +int mlx5_init_rl_table(struct mlx5_core_dev *dev) +{ + struct mlx5_rl_table *table = &dev->priv.rl_table; + + if (!MLX5_CAP_GEN(dev, qos) || !MLX5_CAP_QOS(dev, packet_pacing)) { + table->max_size = 0; + return 0; + } + + mutex_init(&table->rl_lock); + + /* First entry is reserved for unlimited rate */ + table->max_size = MLX5_CAP_QOS(dev, packet_pacing_rate_table_size) - 1; + table->max_rate = MLX5_CAP_QOS(dev, packet_pacing_max_rate); + table->min_rate = MLX5_CAP_QOS(dev, packet_pacing_min_rate); + + mlx5_core_info(dev, "Rate limit: %u rates are supported, range: %uMbps to %uMbps\n", + table->max_size, + table->min_rate >> 10, + table->max_rate >> 10); + + return 0; +} + +void mlx5_cleanup_rl_table(struct mlx5_core_dev *dev) +{ + struct mlx5_rl_table *table = &dev->priv.rl_table; + + if (!MLX5_CAP_GEN(dev, qos) || !MLX5_CAP_QOS(dev, packet_pacing)) + return; + + mlx5_rl_table_free(dev, table); + mutex_destroy(&table->rl_lock); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/cmd.c new file mode 100644 index 000000000..a8d75c2f0 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/cmd.c @@ -0,0 +1,49 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2020 Mellanox Technologies Ltd */ + +#include <linux/mlx5/driver.h> +#include "priv.h" + +int mlx5_cmd_alloc_sf(struct mlx5_core_dev *dev, u16 function_id) +{ + u32 out[MLX5_ST_SZ_DW(alloc_sf_out)] = {}; + u32 in[MLX5_ST_SZ_DW(alloc_sf_in)] = {}; + + MLX5_SET(alloc_sf_in, in, opcode, MLX5_CMD_OP_ALLOC_SF); + MLX5_SET(alloc_sf_in, in, function_id, function_id); + + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); +} + +int mlx5_cmd_dealloc_sf(struct mlx5_core_dev *dev, u16 function_id) +{ + u32 out[MLX5_ST_SZ_DW(dealloc_sf_out)] = {}; + u32 in[MLX5_ST_SZ_DW(dealloc_sf_in)] = {}; + + MLX5_SET(dealloc_sf_in, in, opcode, MLX5_CMD_OP_DEALLOC_SF); + MLX5_SET(dealloc_sf_in, in, function_id, function_id); + + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); +} + +int mlx5_cmd_sf_enable_hca(struct mlx5_core_dev *dev, u16 func_id) +{ + u32 out[MLX5_ST_SZ_DW(enable_hca_out)] = {}; + u32 in[MLX5_ST_SZ_DW(enable_hca_in)] = {}; + + MLX5_SET(enable_hca_in, in, opcode, MLX5_CMD_OP_ENABLE_HCA); + MLX5_SET(enable_hca_in, in, function_id, func_id); + MLX5_SET(enable_hca_in, in, embedded_cpu_function, 0); + return mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out)); +} + +int mlx5_cmd_sf_disable_hca(struct mlx5_core_dev *dev, u16 func_id) +{ + u32 out[MLX5_ST_SZ_DW(disable_hca_out)] = {}; + u32 in[MLX5_ST_SZ_DW(disable_hca_in)] = {}; + + MLX5_SET(disable_hca_in, in, opcode, MLX5_CMD_OP_DISABLE_HCA); + MLX5_SET(disable_hca_in, in, function_id, func_id); + MLX5_SET(enable_hca_in, in, embedded_cpu_function, 0); + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.c new file mode 100644 index 000000000..8e2abbab0 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.c @@ -0,0 +1,380 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2020 Mellanox Technologies Ltd */ + +#include <linux/mlx5/driver.h> +#include <linux/mlx5/device.h> +#include "mlx5_core.h" +#include "dev.h" +#include "sf/vhca_event.h" +#include "sf/sf.h" +#include "sf/mlx5_ifc_vhca_event.h" +#include "ecpf.h" +#define CREATE_TRACE_POINTS +#include "diag/dev_tracepoint.h" + +struct mlx5_sf_dev_table { + struct xarray devices; + unsigned int max_sfs; + phys_addr_t base_address; + u64 sf_bar_length; + struct notifier_block nb; + struct mutex table_lock; /* Serializes sf life cycle and vhca state change handler */ + struct workqueue_struct *active_wq; + struct work_struct work; + u8 stop_active_wq:1; + struct mlx5_core_dev *dev; +}; + +static bool mlx5_sf_dev_supported(const struct mlx5_core_dev *dev) +{ + return MLX5_CAP_GEN(dev, sf) && mlx5_vhca_event_supported(dev); +} + +bool mlx5_sf_dev_allocated(const struct mlx5_core_dev *dev) +{ + struct mlx5_sf_dev_table *table = dev->priv.sf_dev_table; + + return table && !xa_empty(&table->devices); +} + +static ssize_t sfnum_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct auxiliary_device *adev = container_of(dev, struct auxiliary_device, dev); + struct mlx5_sf_dev *sf_dev = container_of(adev, struct mlx5_sf_dev, adev); + + return sysfs_emit(buf, "%u\n", sf_dev->sfnum); +} +static DEVICE_ATTR_RO(sfnum); + +static struct attribute *sf_device_attrs[] = { + &dev_attr_sfnum.attr, + NULL, +}; + +static const struct attribute_group sf_attr_group = { + .attrs = sf_device_attrs, +}; + +static const struct attribute_group *sf_attr_groups[2] = { + &sf_attr_group, + NULL +}; + +static void mlx5_sf_dev_release(struct device *device) +{ + struct auxiliary_device *adev = container_of(device, struct auxiliary_device, dev); + struct mlx5_sf_dev *sf_dev = container_of(adev, struct mlx5_sf_dev, adev); + + mlx5_adev_idx_free(adev->id); + kfree(sf_dev); +} + +static void mlx5_sf_dev_remove(struct mlx5_core_dev *dev, struct mlx5_sf_dev *sf_dev) +{ + int id; + + id = sf_dev->adev.id; + trace_mlx5_sf_dev_del(dev, sf_dev, id); + + auxiliary_device_delete(&sf_dev->adev); + auxiliary_device_uninit(&sf_dev->adev); +} + +static void mlx5_sf_dev_add(struct mlx5_core_dev *dev, u16 sf_index, u16 fn_id, u32 sfnum) +{ + struct mlx5_sf_dev_table *table = dev->priv.sf_dev_table; + struct mlx5_sf_dev *sf_dev; + struct pci_dev *pdev; + int err; + int id; + + id = mlx5_adev_idx_alloc(); + if (id < 0) { + err = id; + goto add_err; + } + + sf_dev = kzalloc(sizeof(*sf_dev), GFP_KERNEL); + if (!sf_dev) { + mlx5_adev_idx_free(id); + err = -ENOMEM; + goto add_err; + } + pdev = dev->pdev; + sf_dev->adev.id = id; + sf_dev->adev.name = MLX5_SF_DEV_ID_NAME; + sf_dev->adev.dev.release = mlx5_sf_dev_release; + sf_dev->adev.dev.parent = &pdev->dev; + sf_dev->adev.dev.groups = sf_attr_groups; + sf_dev->sfnum = sfnum; + sf_dev->parent_mdev = dev; + sf_dev->fn_id = fn_id; + + if (!table->max_sfs) { + mlx5_adev_idx_free(id); + kfree(sf_dev); + err = -EOPNOTSUPP; + goto add_err; + } + sf_dev->bar_base_addr = table->base_address + (sf_index * table->sf_bar_length); + + trace_mlx5_sf_dev_add(dev, sf_dev, id); + + err = auxiliary_device_init(&sf_dev->adev); + if (err) { + mlx5_adev_idx_free(id); + kfree(sf_dev); + goto add_err; + } + + err = auxiliary_device_add(&sf_dev->adev); + if (err) { + put_device(&sf_dev->adev.dev); + goto add_err; + } + + err = xa_insert(&table->devices, sf_index, sf_dev, GFP_KERNEL); + if (err) + goto xa_err; + return; + +xa_err: + mlx5_sf_dev_remove(dev, sf_dev); +add_err: + mlx5_core_err(dev, "SF DEV: fail device add for index=%d sfnum=%d err=%d\n", + sf_index, sfnum, err); +} + +static void mlx5_sf_dev_del(struct mlx5_core_dev *dev, struct mlx5_sf_dev *sf_dev, u16 sf_index) +{ + struct mlx5_sf_dev_table *table = dev->priv.sf_dev_table; + + xa_erase(&table->devices, sf_index); + mlx5_sf_dev_remove(dev, sf_dev); +} + +static int +mlx5_sf_dev_state_change_handler(struct notifier_block *nb, unsigned long event_code, void *data) +{ + struct mlx5_sf_dev_table *table = container_of(nb, struct mlx5_sf_dev_table, nb); + const struct mlx5_vhca_state_event *event = data; + struct mlx5_sf_dev *sf_dev; + u16 max_functions; + u16 sf_index; + u16 base_id; + + max_functions = mlx5_sf_max_functions(table->dev); + if (!max_functions) + return 0; + + base_id = MLX5_CAP_GEN(table->dev, sf_base_id); + if (event->function_id < base_id || event->function_id >= (base_id + max_functions)) + return 0; + + sf_index = event->function_id - base_id; + mutex_lock(&table->table_lock); + sf_dev = xa_load(&table->devices, sf_index); + switch (event->new_vhca_state) { + case MLX5_VHCA_STATE_INVALID: + case MLX5_VHCA_STATE_ALLOCATED: + if (sf_dev) + mlx5_sf_dev_del(table->dev, sf_dev, sf_index); + break; + case MLX5_VHCA_STATE_TEARDOWN_REQUEST: + if (sf_dev) + mlx5_sf_dev_del(table->dev, sf_dev, sf_index); + else + mlx5_core_err(table->dev, + "SF DEV: teardown state for invalid dev index=%d fn_id=0x%x\n", + sf_index, event->sw_function_id); + break; + case MLX5_VHCA_STATE_ACTIVE: + if (!sf_dev) + mlx5_sf_dev_add(table->dev, sf_index, event->function_id, + event->sw_function_id); + break; + default: + break; + } + mutex_unlock(&table->table_lock); + return 0; +} + +static int mlx5_sf_dev_vhca_arm_all(struct mlx5_sf_dev_table *table) +{ + struct mlx5_core_dev *dev = table->dev; + u16 max_functions; + u16 function_id; + int err = 0; + int i; + + max_functions = mlx5_sf_max_functions(dev); + function_id = MLX5_CAP_GEN(dev, sf_base_id); + /* Arm the vhca context as the vhca event notifier */ + for (i = 0; i < max_functions; i++) { + err = mlx5_vhca_event_arm(dev, function_id); + if (err) + return err; + + function_id++; + } + return 0; +} + +static void mlx5_sf_dev_add_active_work(struct work_struct *work) +{ + struct mlx5_sf_dev_table *table = container_of(work, struct mlx5_sf_dev_table, work); + u32 out[MLX5_ST_SZ_DW(query_vhca_state_out)] = {}; + struct mlx5_core_dev *dev = table->dev; + u16 max_functions; + u16 function_id; + u16 sw_func_id; + int err = 0; + u8 state; + int i; + + max_functions = mlx5_sf_max_functions(dev); + function_id = MLX5_CAP_GEN(dev, sf_base_id); + for (i = 0; i < max_functions; i++, function_id++) { + if (table->stop_active_wq) + return; + err = mlx5_cmd_query_vhca_state(dev, function_id, out, sizeof(out)); + if (err) + /* A failure of specific vhca doesn't mean others will + * fail as well. + */ + continue; + state = MLX5_GET(query_vhca_state_out, out, vhca_state_context.vhca_state); + if (state != MLX5_VHCA_STATE_ACTIVE) + continue; + + sw_func_id = MLX5_GET(query_vhca_state_out, out, vhca_state_context.sw_function_id); + mutex_lock(&table->table_lock); + /* Don't probe device which is already probe */ + if (!xa_load(&table->devices, i)) + mlx5_sf_dev_add(dev, i, function_id, sw_func_id); + /* There is a race where SF got inactive after the query + * above. e.g.: the query returns that the state of the + * SF is active, and after that the eswitch manager set it to + * inactive. + * This case cannot be managed in SW, since the probing of the + * SF is on one system, and the inactivation is on a different + * system. + * If the inactive is done after the SF perform init_hca(), + * the SF will fully probe and then removed. If it was + * done before init_hca(), the SF probe will fail. + */ + mutex_unlock(&table->table_lock); + } +} + +/* In case SFs are generated externally, probe active SFs */ +static int mlx5_sf_dev_queue_active_work(struct mlx5_sf_dev_table *table) +{ + if (MLX5_CAP_GEN(table->dev, eswitch_manager)) + return 0; /* the table is local */ + + /* Use a workqueue to probe active SFs, which are in large + * quantity and may take up to minutes to probe. + */ + table->active_wq = create_singlethread_workqueue("mlx5_active_sf"); + if (!table->active_wq) + return -ENOMEM; + INIT_WORK(&table->work, &mlx5_sf_dev_add_active_work); + queue_work(table->active_wq, &table->work); + return 0; +} + +static void mlx5_sf_dev_destroy_active_work(struct mlx5_sf_dev_table *table) +{ + if (table->active_wq) { + table->stop_active_wq = true; + destroy_workqueue(table->active_wq); + } +} + +void mlx5_sf_dev_table_create(struct mlx5_core_dev *dev) +{ + struct mlx5_sf_dev_table *table; + unsigned int max_sfs; + int err; + + if (!mlx5_sf_dev_supported(dev) || !mlx5_vhca_event_supported(dev)) + return; + + table = kzalloc(sizeof(*table), GFP_KERNEL); + if (!table) { + err = -ENOMEM; + goto table_err; + } + + table->nb.notifier_call = mlx5_sf_dev_state_change_handler; + table->dev = dev; + if (MLX5_CAP_GEN(dev, max_num_sf)) + max_sfs = MLX5_CAP_GEN(dev, max_num_sf); + else + max_sfs = 1 << MLX5_CAP_GEN(dev, log_max_sf); + table->sf_bar_length = 1 << (MLX5_CAP_GEN(dev, log_min_sf_size) + 12); + table->base_address = pci_resource_start(dev->pdev, 2); + table->max_sfs = max_sfs; + xa_init(&table->devices); + mutex_init(&table->table_lock); + dev->priv.sf_dev_table = table; + + err = mlx5_vhca_event_notifier_register(dev, &table->nb); + if (err) + goto vhca_err; + + err = mlx5_sf_dev_queue_active_work(table); + if (err) + goto add_active_err; + + err = mlx5_sf_dev_vhca_arm_all(table); + if (err) + goto arm_err; + mlx5_core_dbg(dev, "SF DEV: max sf devices=%d\n", max_sfs); + return; + +arm_err: + mlx5_sf_dev_destroy_active_work(table); +add_active_err: + mlx5_vhca_event_notifier_unregister(dev, &table->nb); +vhca_err: + table->max_sfs = 0; + kfree(table); + dev->priv.sf_dev_table = NULL; +table_err: + mlx5_core_err(dev, "SF DEV table create err = %d\n", err); +} + +static void mlx5_sf_dev_destroy_all(struct mlx5_sf_dev_table *table) +{ + struct mlx5_sf_dev *sf_dev; + unsigned long index; + + xa_for_each(&table->devices, index, sf_dev) { + xa_erase(&table->devices, index); + mlx5_sf_dev_remove(table->dev, sf_dev); + } +} + +void mlx5_sf_dev_table_destroy(struct mlx5_core_dev *dev) +{ + struct mlx5_sf_dev_table *table = dev->priv.sf_dev_table; + + if (!table) + return; + + mlx5_sf_dev_destroy_active_work(table); + mlx5_vhca_event_notifier_unregister(dev, &table->nb); + mutex_destroy(&table->table_lock); + + /* Now that event handler is not running, it is safe to destroy + * the sf device without race. + */ + mlx5_sf_dev_destroy_all(table); + + WARN_ON(!xa_empty(&table->devices)); + kfree(table); + dev->priv.sf_dev_table = NULL; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.h b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.h new file mode 100644 index 000000000..2a66a427e --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.h @@ -0,0 +1,56 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2020 Mellanox Technologies Ltd */ + +#ifndef __MLX5_SF_DEV_H__ +#define __MLX5_SF_DEV_H__ + +#ifdef CONFIG_MLX5_SF + +#include <linux/auxiliary_bus.h> + +#define MLX5_SF_DEV_ID_NAME "sf" + +struct mlx5_sf_dev { + struct auxiliary_device adev; + struct mlx5_core_dev *parent_mdev; + struct mlx5_core_dev *mdev; + phys_addr_t bar_base_addr; + u32 sfnum; + u16 fn_id; +}; + +void mlx5_sf_dev_table_create(struct mlx5_core_dev *dev); +void mlx5_sf_dev_table_destroy(struct mlx5_core_dev *dev); + +int mlx5_sf_driver_register(void); +void mlx5_sf_driver_unregister(void); + +bool mlx5_sf_dev_allocated(const struct mlx5_core_dev *dev); + +#else + +static inline void mlx5_sf_dev_table_create(struct mlx5_core_dev *dev) +{ +} + +static inline void mlx5_sf_dev_table_destroy(struct mlx5_core_dev *dev) +{ +} + +static inline int mlx5_sf_driver_register(void) +{ + return 0; +} + +static inline void mlx5_sf_driver_unregister(void) +{ +} + +static inline bool mlx5_sf_dev_allocated(const struct mlx5_core_dev *dev) +{ + return false; +} + +#endif + +#endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/diag/dev_tracepoint.h b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/diag/dev_tracepoint.h new file mode 100644 index 000000000..7f7c9af5d --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/diag/dev_tracepoint.h @@ -0,0 +1,58 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM mlx5 + +#if !defined(_MLX5_SF_DEV_TP_) || defined(TRACE_HEADER_MULTI_READ) +#define _MLX5_SF_DEV_TP_ + +#include <linux/tracepoint.h> +#include <linux/mlx5/driver.h> +#include "../../dev/dev.h" + +DECLARE_EVENT_CLASS(mlx5_sf_dev_template, + TP_PROTO(const struct mlx5_core_dev *dev, + const struct mlx5_sf_dev *sfdev, + int aux_id), + TP_ARGS(dev, sfdev, aux_id), + TP_STRUCT__entry(__string(devname, dev_name(dev->device)) + __field(const struct mlx5_sf_dev*, sfdev) + __field(int, aux_id) + __field(u16, hw_fn_id) + __field(u32, sfnum) + ), + TP_fast_assign(__assign_str(devname, dev_name(dev->device)); + __entry->sfdev = sfdev; + __entry->aux_id = aux_id; + __entry->hw_fn_id = sfdev->fn_id; + __entry->sfnum = sfdev->sfnum; + ), + TP_printk("(%s) sfdev=%pK aux_id=%d hw_id=0x%x sfnum=%u\n", + __get_str(devname), __entry->sfdev, + __entry->aux_id, __entry->hw_fn_id, + __entry->sfnum) +); + +DEFINE_EVENT(mlx5_sf_dev_template, mlx5_sf_dev_add, + TP_PROTO(const struct mlx5_core_dev *dev, + const struct mlx5_sf_dev *sfdev, + int aux_id), + TP_ARGS(dev, sfdev, aux_id) + ); + +DEFINE_EVENT(mlx5_sf_dev_template, mlx5_sf_dev_del, + TP_PROTO(const struct mlx5_core_dev *dev, + const struct mlx5_sf_dev *sfdev, + int aux_id), + TP_ARGS(dev, sfdev, aux_id) + ); + +#endif /* _MLX5_SF_DEV_TP_ */ + +/* This part must be outside protection */ +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH sf/dev/diag +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_FILE dev_tracepoint +#include <trace/define_trace.h> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c new file mode 100644 index 000000000..2424cdf9c --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c @@ -0,0 +1,104 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2020 Mellanox Technologies Ltd */ + +#include <linux/mlx5/driver.h> +#include <linux/mlx5/device.h> +#include "mlx5_core.h" +#include "dev.h" +#include "devlink.h" + +static int mlx5_sf_dev_probe(struct auxiliary_device *adev, const struct auxiliary_device_id *id) +{ + struct mlx5_sf_dev *sf_dev = container_of(adev, struct mlx5_sf_dev, adev); + struct mlx5_core_dev *mdev; + struct devlink *devlink; + int err; + + devlink = mlx5_devlink_alloc(&adev->dev); + if (!devlink) + return -ENOMEM; + + mdev = devlink_priv(devlink); + mdev->device = &adev->dev; + mdev->pdev = sf_dev->parent_mdev->pdev; + mdev->bar_addr = sf_dev->bar_base_addr; + mdev->iseg_base = sf_dev->bar_base_addr; + mdev->coredev_type = MLX5_COREDEV_SF; + mdev->priv.parent_mdev = sf_dev->parent_mdev; + mdev->priv.adev_idx = adev->id; + sf_dev->mdev = mdev; + + err = mlx5_mdev_init(mdev, MLX5_DEFAULT_PROF); + if (err) { + mlx5_core_warn(mdev, "mlx5_mdev_init on err=%d\n", err); + goto mdev_err; + } + + mdev->iseg = ioremap(mdev->iseg_base, sizeof(*mdev->iseg)); + if (!mdev->iseg) { + mlx5_core_warn(mdev, "remap error\n"); + err = -ENOMEM; + goto remap_err; + } + + err = mlx5_init_one(mdev); + if (err) { + mlx5_core_warn(mdev, "mlx5_init_one err=%d\n", err); + goto init_one_err; + } + devlink_register(devlink); + return 0; + +init_one_err: + iounmap(mdev->iseg); +remap_err: + mlx5_mdev_uninit(mdev); +mdev_err: + mlx5_devlink_free(devlink); + return err; +} + +static void mlx5_sf_dev_remove(struct auxiliary_device *adev) +{ + struct mlx5_sf_dev *sf_dev = container_of(adev, struct mlx5_sf_dev, adev); + struct devlink *devlink = priv_to_devlink(sf_dev->mdev); + + mlx5_drain_health_wq(sf_dev->mdev); + devlink_unregister(devlink); + mlx5_uninit_one(sf_dev->mdev); + iounmap(sf_dev->mdev->iseg); + mlx5_mdev_uninit(sf_dev->mdev); + mlx5_devlink_free(devlink); +} + +static void mlx5_sf_dev_shutdown(struct auxiliary_device *adev) +{ + struct mlx5_sf_dev *sf_dev = container_of(adev, struct mlx5_sf_dev, adev); + + mlx5_unload_one(sf_dev->mdev, false); +} + +static const struct auxiliary_device_id mlx5_sf_dev_id_table[] = { + { .name = MLX5_ADEV_NAME "." MLX5_SF_DEV_ID_NAME, }, + { }, +}; + +MODULE_DEVICE_TABLE(auxiliary, mlx5_sf_dev_id_table); + +static struct auxiliary_driver mlx5_sf_driver = { + .name = MLX5_SF_DEV_ID_NAME, + .probe = mlx5_sf_dev_probe, + .remove = mlx5_sf_dev_remove, + .shutdown = mlx5_sf_dev_shutdown, + .id_table = mlx5_sf_dev_id_table, +}; + +int mlx5_sf_driver_register(void) +{ + return auxiliary_driver_register(&mlx5_sf_driver); +} + +void mlx5_sf_driver_unregister(void) +{ + auxiliary_driver_unregister(&mlx5_sf_driver); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c new file mode 100644 index 000000000..7d955a4d9 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c @@ -0,0 +1,571 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2020 Mellanox Technologies Ltd */ + +#include <linux/mlx5/driver.h> +#include "eswitch.h" +#include "priv.h" +#include "sf/dev/dev.h" +#include "mlx5_ifc_vhca_event.h" +#include "vhca_event.h" +#include "ecpf.h" +#define CREATE_TRACE_POINTS +#include "diag/sf_tracepoint.h" + +struct mlx5_sf { + struct devlink_port dl_port; + unsigned int port_index; + u32 controller; + u16 id; + u16 hw_fn_id; + u16 hw_state; +}; + +struct mlx5_sf_table { + struct mlx5_core_dev *dev; /* To refer from notifier context. */ + struct xarray port_indices; /* port index based lookup. */ + refcount_t refcount; + struct completion disable_complete; + struct mutex sf_state_lock; /* Serializes sf state among user cmds & vhca event handler. */ + struct notifier_block esw_nb; + struct notifier_block vhca_nb; + u8 ecpu: 1; +}; + +static struct mlx5_sf * +mlx5_sf_lookup_by_index(struct mlx5_sf_table *table, unsigned int port_index) +{ + return xa_load(&table->port_indices, port_index); +} + +static struct mlx5_sf * +mlx5_sf_lookup_by_function_id(struct mlx5_sf_table *table, unsigned int fn_id) +{ + unsigned long index; + struct mlx5_sf *sf; + + xa_for_each(&table->port_indices, index, sf) { + if (sf->hw_fn_id == fn_id) + return sf; + } + return NULL; +} + +static int mlx5_sf_id_insert(struct mlx5_sf_table *table, struct mlx5_sf *sf) +{ + return xa_insert(&table->port_indices, sf->port_index, sf, GFP_KERNEL); +} + +static void mlx5_sf_id_erase(struct mlx5_sf_table *table, struct mlx5_sf *sf) +{ + xa_erase(&table->port_indices, sf->port_index); +} + +static struct mlx5_sf * +mlx5_sf_alloc(struct mlx5_sf_table *table, struct mlx5_eswitch *esw, + u32 controller, u32 sfnum, struct netlink_ext_ack *extack) +{ + unsigned int dl_port_index; + struct mlx5_sf *sf; + u16 hw_fn_id; + int id_err; + int err; + + if (!mlx5_esw_offloads_controller_valid(esw, controller)) { + NL_SET_ERR_MSG_MOD(extack, "Invalid controller number"); + return ERR_PTR(-EINVAL); + } + + id_err = mlx5_sf_hw_table_sf_alloc(table->dev, controller, sfnum); + if (id_err < 0) { + err = id_err; + goto id_err; + } + + sf = kzalloc(sizeof(*sf), GFP_KERNEL); + if (!sf) { + err = -ENOMEM; + goto alloc_err; + } + sf->id = id_err; + hw_fn_id = mlx5_sf_sw_to_hw_id(table->dev, controller, sf->id); + dl_port_index = mlx5_esw_vport_to_devlink_port_index(table->dev, hw_fn_id); + sf->port_index = dl_port_index; + sf->hw_fn_id = hw_fn_id; + sf->hw_state = MLX5_VHCA_STATE_ALLOCATED; + sf->controller = controller; + + err = mlx5_sf_id_insert(table, sf); + if (err) + goto insert_err; + + return sf; + +insert_err: + kfree(sf); +alloc_err: + mlx5_sf_hw_table_sf_free(table->dev, controller, id_err); +id_err: + if (err == -EEXIST) + NL_SET_ERR_MSG_MOD(extack, "SF already exist. Choose different sfnum"); + return ERR_PTR(err); +} + +static void mlx5_sf_free(struct mlx5_sf_table *table, struct mlx5_sf *sf) +{ + mlx5_sf_id_erase(table, sf); + mlx5_sf_hw_table_sf_free(table->dev, sf->controller, sf->id); + trace_mlx5_sf_free(table->dev, sf->port_index, sf->controller, sf->hw_fn_id); + kfree(sf); +} + +static struct mlx5_sf_table *mlx5_sf_table_try_get(struct mlx5_core_dev *dev) +{ + struct mlx5_sf_table *table = dev->priv.sf_table; + + if (!table) + return NULL; + + return refcount_inc_not_zero(&table->refcount) ? table : NULL; +} + +static void mlx5_sf_table_put(struct mlx5_sf_table *table) +{ + if (refcount_dec_and_test(&table->refcount)) + complete(&table->disable_complete); +} + +static enum devlink_port_fn_state mlx5_sf_to_devlink_state(u8 hw_state) +{ + switch (hw_state) { + case MLX5_VHCA_STATE_ACTIVE: + case MLX5_VHCA_STATE_IN_USE: + return DEVLINK_PORT_FN_STATE_ACTIVE; + case MLX5_VHCA_STATE_INVALID: + case MLX5_VHCA_STATE_ALLOCATED: + case MLX5_VHCA_STATE_TEARDOWN_REQUEST: + default: + return DEVLINK_PORT_FN_STATE_INACTIVE; + } +} + +static enum devlink_port_fn_opstate mlx5_sf_to_devlink_opstate(u8 hw_state) +{ + switch (hw_state) { + case MLX5_VHCA_STATE_IN_USE: + case MLX5_VHCA_STATE_TEARDOWN_REQUEST: + return DEVLINK_PORT_FN_OPSTATE_ATTACHED; + case MLX5_VHCA_STATE_INVALID: + case MLX5_VHCA_STATE_ALLOCATED: + case MLX5_VHCA_STATE_ACTIVE: + default: + return DEVLINK_PORT_FN_OPSTATE_DETACHED; + } +} + +static bool mlx5_sf_is_active(const struct mlx5_sf *sf) +{ + return sf->hw_state == MLX5_VHCA_STATE_ACTIVE || sf->hw_state == MLX5_VHCA_STATE_IN_USE; +} + +int mlx5_devlink_sf_port_fn_state_get(struct devlink_port *dl_port, + enum devlink_port_fn_state *state, + enum devlink_port_fn_opstate *opstate, + struct netlink_ext_ack *extack) +{ + struct mlx5_core_dev *dev = devlink_priv(dl_port->devlink); + struct mlx5_sf_table *table; + struct mlx5_sf *sf; + int err = 0; + + table = mlx5_sf_table_try_get(dev); + if (!table) + return -EOPNOTSUPP; + + sf = mlx5_sf_lookup_by_index(table, dl_port->index); + if (!sf) { + err = -EOPNOTSUPP; + goto sf_err; + } + mutex_lock(&table->sf_state_lock); + *state = mlx5_sf_to_devlink_state(sf->hw_state); + *opstate = mlx5_sf_to_devlink_opstate(sf->hw_state); + mutex_unlock(&table->sf_state_lock); +sf_err: + mlx5_sf_table_put(table); + return err; +} + +static int mlx5_sf_activate(struct mlx5_core_dev *dev, struct mlx5_sf *sf, + struct netlink_ext_ack *extack) +{ + int err; + + if (mlx5_sf_is_active(sf)) + return 0; + if (sf->hw_state != MLX5_VHCA_STATE_ALLOCATED) { + NL_SET_ERR_MSG_MOD(extack, "SF is inactivated but it is still attached"); + return -EBUSY; + } + + err = mlx5_cmd_sf_enable_hca(dev, sf->hw_fn_id); + if (err) + return err; + + sf->hw_state = MLX5_VHCA_STATE_ACTIVE; + trace_mlx5_sf_activate(dev, sf->port_index, sf->controller, sf->hw_fn_id); + return 0; +} + +static int mlx5_sf_deactivate(struct mlx5_core_dev *dev, struct mlx5_sf *sf) +{ + int err; + + if (!mlx5_sf_is_active(sf)) + return 0; + + err = mlx5_cmd_sf_disable_hca(dev, sf->hw_fn_id); + if (err) + return err; + + sf->hw_state = MLX5_VHCA_STATE_TEARDOWN_REQUEST; + trace_mlx5_sf_deactivate(dev, sf->port_index, sf->controller, sf->hw_fn_id); + return 0; +} + +static int mlx5_sf_state_set(struct mlx5_core_dev *dev, struct mlx5_sf_table *table, + struct mlx5_sf *sf, + enum devlink_port_fn_state state, + struct netlink_ext_ack *extack) +{ + int err = 0; + + mutex_lock(&table->sf_state_lock); + if (state == mlx5_sf_to_devlink_state(sf->hw_state)) + goto out; + if (state == DEVLINK_PORT_FN_STATE_ACTIVE) + err = mlx5_sf_activate(dev, sf, extack); + else if (state == DEVLINK_PORT_FN_STATE_INACTIVE) + err = mlx5_sf_deactivate(dev, sf); + else + err = -EINVAL; +out: + mutex_unlock(&table->sf_state_lock); + return err; +} + +int mlx5_devlink_sf_port_fn_state_set(struct devlink_port *dl_port, + enum devlink_port_fn_state state, + struct netlink_ext_ack *extack) +{ + struct mlx5_core_dev *dev = devlink_priv(dl_port->devlink); + struct mlx5_sf_table *table; + struct mlx5_sf *sf; + int err; + + table = mlx5_sf_table_try_get(dev); + if (!table) { + NL_SET_ERR_MSG_MOD(extack, + "Port state set is only supported in eswitch switchdev mode or SF ports are disabled."); + return -EOPNOTSUPP; + } + sf = mlx5_sf_lookup_by_index(table, dl_port->index); + if (!sf) { + err = -ENODEV; + goto out; + } + + err = mlx5_sf_state_set(dev, table, sf, state, extack); +out: + mlx5_sf_table_put(table); + return err; +} + +static int mlx5_sf_add(struct mlx5_core_dev *dev, struct mlx5_sf_table *table, + const struct devlink_port_new_attrs *new_attr, + struct netlink_ext_ack *extack, + unsigned int *new_port_index) +{ + struct mlx5_eswitch *esw = dev->priv.eswitch; + struct mlx5_sf *sf; + int err; + + sf = mlx5_sf_alloc(table, esw, new_attr->controller, new_attr->sfnum, extack); + if (IS_ERR(sf)) + return PTR_ERR(sf); + + err = mlx5_esw_offloads_sf_vport_enable(esw, &sf->dl_port, sf->hw_fn_id, + new_attr->controller, new_attr->sfnum); + if (err) + goto esw_err; + *new_port_index = sf->port_index; + trace_mlx5_sf_add(dev, sf->port_index, sf->controller, sf->hw_fn_id, new_attr->sfnum); + return 0; + +esw_err: + mlx5_sf_free(table, sf); + return err; +} + +static int +mlx5_sf_new_check_attr(struct mlx5_core_dev *dev, const struct devlink_port_new_attrs *new_attr, + struct netlink_ext_ack *extack) +{ + if (new_attr->flavour != DEVLINK_PORT_FLAVOUR_PCI_SF) { + NL_SET_ERR_MSG_MOD(extack, "Driver supports only SF port addition"); + return -EOPNOTSUPP; + } + if (new_attr->port_index_valid) { + NL_SET_ERR_MSG_MOD(extack, + "Driver does not support user defined port index assignment"); + return -EOPNOTSUPP; + } + if (!new_attr->sfnum_valid) { + NL_SET_ERR_MSG_MOD(extack, + "User must provide unique sfnum. Driver does not support auto assignment"); + return -EOPNOTSUPP; + } + if (new_attr->controller_valid && new_attr->controller && + !mlx5_core_is_ecpf_esw_manager(dev)) { + NL_SET_ERR_MSG_MOD(extack, "External controller is unsupported"); + return -EOPNOTSUPP; + } + if (new_attr->pfnum != mlx5_get_dev_index(dev)) { + NL_SET_ERR_MSG_MOD(extack, "Invalid pfnum supplied"); + return -EOPNOTSUPP; + } + return 0; +} + +int mlx5_devlink_sf_port_new(struct devlink *devlink, + const struct devlink_port_new_attrs *new_attr, + struct netlink_ext_ack *extack, + unsigned int *new_port_index) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + struct mlx5_sf_table *table; + int err; + + err = mlx5_sf_new_check_attr(dev, new_attr, extack); + if (err) + return err; + + table = mlx5_sf_table_try_get(dev); + if (!table) { + NL_SET_ERR_MSG_MOD(extack, + "Port add is only supported in eswitch switchdev mode or SF ports are disabled."); + return -EOPNOTSUPP; + } + err = mlx5_sf_add(dev, table, new_attr, extack, new_port_index); + mlx5_sf_table_put(table); + return err; +} + +static void mlx5_sf_dealloc(struct mlx5_sf_table *table, struct mlx5_sf *sf) +{ + if (sf->hw_state == MLX5_VHCA_STATE_ALLOCATED) { + mlx5_sf_free(table, sf); + } else if (mlx5_sf_is_active(sf)) { + /* Even if its active, it is treated as in_use because by the time, + * it is disabled here, it may getting used. So it is safe to + * always look for the event to ensure that it is recycled only after + * firmware gives confirmation that it is detached by the driver. + */ + mlx5_cmd_sf_disable_hca(table->dev, sf->hw_fn_id); + mlx5_sf_hw_table_sf_deferred_free(table->dev, sf->controller, sf->id); + kfree(sf); + } else { + mlx5_sf_hw_table_sf_deferred_free(table->dev, sf->controller, sf->id); + kfree(sf); + } +} + +int mlx5_devlink_sf_port_del(struct devlink *devlink, unsigned int port_index, + struct netlink_ext_ack *extack) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + struct mlx5_eswitch *esw = dev->priv.eswitch; + struct mlx5_sf_table *table; + struct mlx5_sf *sf; + int err = 0; + + table = mlx5_sf_table_try_get(dev); + if (!table) { + NL_SET_ERR_MSG_MOD(extack, + "Port del is only supported in eswitch switchdev mode or SF ports are disabled."); + return -EOPNOTSUPP; + } + sf = mlx5_sf_lookup_by_index(table, port_index); + if (!sf) { + err = -ENODEV; + goto sf_err; + } + + mlx5_esw_offloads_sf_vport_disable(esw, sf->hw_fn_id); + mlx5_sf_id_erase(table, sf); + + mutex_lock(&table->sf_state_lock); + mlx5_sf_dealloc(table, sf); + mutex_unlock(&table->sf_state_lock); +sf_err: + mlx5_sf_table_put(table); + return err; +} + +static bool mlx5_sf_state_update_check(const struct mlx5_sf *sf, u8 new_state) +{ + if (sf->hw_state == MLX5_VHCA_STATE_ACTIVE && new_state == MLX5_VHCA_STATE_IN_USE) + return true; + + if (sf->hw_state == MLX5_VHCA_STATE_IN_USE && new_state == MLX5_VHCA_STATE_ACTIVE) + return true; + + if (sf->hw_state == MLX5_VHCA_STATE_TEARDOWN_REQUEST && + new_state == MLX5_VHCA_STATE_ALLOCATED) + return true; + + return false; +} + +static int mlx5_sf_vhca_event(struct notifier_block *nb, unsigned long opcode, void *data) +{ + struct mlx5_sf_table *table = container_of(nb, struct mlx5_sf_table, vhca_nb); + const struct mlx5_vhca_state_event *event = data; + bool update = false; + struct mlx5_sf *sf; + + table = mlx5_sf_table_try_get(table->dev); + if (!table) + return 0; + + mutex_lock(&table->sf_state_lock); + sf = mlx5_sf_lookup_by_function_id(table, event->function_id); + if (!sf) + goto sf_err; + + /* When driver is attached or detached to a function, an event + * notifies such state change. + */ + update = mlx5_sf_state_update_check(sf, event->new_vhca_state); + if (update) + sf->hw_state = event->new_vhca_state; + trace_mlx5_sf_update_state(table->dev, sf->port_index, sf->controller, + sf->hw_fn_id, sf->hw_state); +sf_err: + mutex_unlock(&table->sf_state_lock); + mlx5_sf_table_put(table); + return 0; +} + +static void mlx5_sf_table_enable(struct mlx5_sf_table *table) +{ + init_completion(&table->disable_complete); + refcount_set(&table->refcount, 1); +} + +static void mlx5_sf_deactivate_all(struct mlx5_sf_table *table) +{ + struct mlx5_eswitch *esw = table->dev->priv.eswitch; + unsigned long index; + struct mlx5_sf *sf; + + /* At this point, no new user commands can start and no vhca event can + * arrive. It is safe to destroy all user created SFs. + */ + xa_for_each(&table->port_indices, index, sf) { + mlx5_esw_offloads_sf_vport_disable(esw, sf->hw_fn_id); + mlx5_sf_id_erase(table, sf); + mlx5_sf_dealloc(table, sf); + } +} + +static void mlx5_sf_table_disable(struct mlx5_sf_table *table) +{ + if (!refcount_read(&table->refcount)) + return; + + /* Balances with refcount_set; drop the reference so that new user cmd cannot start + * and new vhca event handler cannot run. + */ + mlx5_sf_table_put(table); + wait_for_completion(&table->disable_complete); + + mlx5_sf_deactivate_all(table); +} + +static int mlx5_sf_esw_event(struct notifier_block *nb, unsigned long event, void *data) +{ + struct mlx5_sf_table *table = container_of(nb, struct mlx5_sf_table, esw_nb); + const struct mlx5_esw_event_info *mode = data; + + switch (mode->new_mode) { + case MLX5_ESWITCH_OFFLOADS: + mlx5_sf_table_enable(table); + break; + case MLX5_ESWITCH_LEGACY: + mlx5_sf_table_disable(table); + break; + default: + break; + } + + return 0; +} + +static bool mlx5_sf_table_supported(const struct mlx5_core_dev *dev) +{ + return dev->priv.eswitch && MLX5_ESWITCH_MANAGER(dev) && + mlx5_sf_hw_table_supported(dev); +} + +int mlx5_sf_table_init(struct mlx5_core_dev *dev) +{ + struct mlx5_sf_table *table; + int err; + + if (!mlx5_sf_table_supported(dev) || !mlx5_vhca_event_supported(dev)) + return 0; + + table = kzalloc(sizeof(*table), GFP_KERNEL); + if (!table) + return -ENOMEM; + + mutex_init(&table->sf_state_lock); + table->dev = dev; + xa_init(&table->port_indices); + dev->priv.sf_table = table; + refcount_set(&table->refcount, 0); + table->esw_nb.notifier_call = mlx5_sf_esw_event; + err = mlx5_esw_event_notifier_register(dev->priv.eswitch, &table->esw_nb); + if (err) + goto reg_err; + + table->vhca_nb.notifier_call = mlx5_sf_vhca_event; + err = mlx5_vhca_event_notifier_register(table->dev, &table->vhca_nb); + if (err) + goto vhca_err; + + return 0; + +vhca_err: + mlx5_esw_event_notifier_unregister(dev->priv.eswitch, &table->esw_nb); +reg_err: + mutex_destroy(&table->sf_state_lock); + kfree(table); + dev->priv.sf_table = NULL; + return err; +} + +void mlx5_sf_table_cleanup(struct mlx5_core_dev *dev) +{ + struct mlx5_sf_table *table = dev->priv.sf_table; + + if (!table) + return; + + mlx5_vhca_event_notifier_unregister(table->dev, &table->vhca_nb); + mlx5_esw_event_notifier_unregister(dev->priv.eswitch, &table->esw_nb); + WARN_ON(refcount_read(&table->refcount)); + mutex_destroy(&table->sf_state_lock); + WARN_ON(!xa_empty(&table->port_indices)); + kfree(table); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/diag/sf_tracepoint.h b/drivers/net/ethernet/mellanox/mlx5/core/sf/diag/sf_tracepoint.h new file mode 100644 index 000000000..8bf1cd909 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/diag/sf_tracepoint.h @@ -0,0 +1,173 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM mlx5 + +#if !defined(_MLX5_SF_TP_) || defined(TRACE_HEADER_MULTI_READ) +#define _MLX5_SF_TP_ + +#include <linux/tracepoint.h> +#include <linux/mlx5/driver.h> +#include "sf/vhca_event.h" + +TRACE_EVENT(mlx5_sf_add, + TP_PROTO(const struct mlx5_core_dev *dev, + unsigned int port_index, + u32 controller, + u16 hw_fn_id, + u32 sfnum), + TP_ARGS(dev, port_index, controller, hw_fn_id, sfnum), + TP_STRUCT__entry(__string(devname, dev_name(dev->device)) + __field(unsigned int, port_index) + __field(u32, controller) + __field(u16, hw_fn_id) + __field(u32, sfnum) + ), + TP_fast_assign(__assign_str(devname, dev_name(dev->device)); + __entry->port_index = port_index; + __entry->controller = controller; + __entry->hw_fn_id = hw_fn_id; + __entry->sfnum = sfnum; + ), + TP_printk("(%s) port_index=%u controller=%u hw_id=0x%x sfnum=%u\n", + __get_str(devname), __entry->port_index, __entry->controller, + __entry->hw_fn_id, __entry->sfnum) +); + +TRACE_EVENT(mlx5_sf_free, + TP_PROTO(const struct mlx5_core_dev *dev, + unsigned int port_index, + u32 controller, + u16 hw_fn_id), + TP_ARGS(dev, port_index, controller, hw_fn_id), + TP_STRUCT__entry(__string(devname, dev_name(dev->device)) + __field(unsigned int, port_index) + __field(u32, controller) + __field(u16, hw_fn_id) + ), + TP_fast_assign(__assign_str(devname, dev_name(dev->device)); + __entry->port_index = port_index; + __entry->controller = controller; + __entry->hw_fn_id = hw_fn_id; + ), + TP_printk("(%s) port_index=%u controller=%u hw_id=0x%x\n", + __get_str(devname), __entry->port_index, __entry->controller, + __entry->hw_fn_id) +); + +TRACE_EVENT(mlx5_sf_hwc_alloc, + TP_PROTO(const struct mlx5_core_dev *dev, + u32 controller, + u16 hw_fn_id, + u32 sfnum), + TP_ARGS(dev, controller, hw_fn_id, sfnum), + TP_STRUCT__entry(__string(devname, dev_name(dev->device)) + __field(u32, controller) + __field(u16, hw_fn_id) + __field(u32, sfnum) + ), + TP_fast_assign(__assign_str(devname, dev_name(dev->device)); + __entry->controller = controller; + __entry->hw_fn_id = hw_fn_id; + __entry->sfnum = sfnum; + ), + TP_printk("(%s) controller=%u hw_id=0x%x sfnum=%u\n", + __get_str(devname), __entry->controller, __entry->hw_fn_id, + __entry->sfnum) +); + +TRACE_EVENT(mlx5_sf_hwc_free, + TP_PROTO(const struct mlx5_core_dev *dev, + u16 hw_fn_id), + TP_ARGS(dev, hw_fn_id), + TP_STRUCT__entry(__string(devname, dev_name(dev->device)) + __field(u16, hw_fn_id) + ), + TP_fast_assign(__assign_str(devname, dev_name(dev->device)); + __entry->hw_fn_id = hw_fn_id; + ), + TP_printk("(%s) hw_id=0x%x\n", __get_str(devname), __entry->hw_fn_id) +); + +TRACE_EVENT(mlx5_sf_hwc_deferred_free, + TP_PROTO(const struct mlx5_core_dev *dev, + u16 hw_fn_id), + TP_ARGS(dev, hw_fn_id), + TP_STRUCT__entry(__string(devname, dev_name(dev->device)) + __field(u16, hw_fn_id) + ), + TP_fast_assign(__assign_str(devname, dev_name(dev->device)); + __entry->hw_fn_id = hw_fn_id; + ), + TP_printk("(%s) hw_id=0x%x\n", __get_str(devname), __entry->hw_fn_id) +); + +DECLARE_EVENT_CLASS(mlx5_sf_state_template, + TP_PROTO(const struct mlx5_core_dev *dev, + u32 port_index, + u32 controller, + u16 hw_fn_id), + TP_ARGS(dev, port_index, controller, hw_fn_id), + TP_STRUCT__entry(__string(devname, dev_name(dev->device)) + __field(unsigned int, port_index) + __field(u32, controller) + __field(u16, hw_fn_id)), + TP_fast_assign(__assign_str(devname, dev_name(dev->device)); + __entry->port_index = port_index; + __entry->controller = controller; + __entry->hw_fn_id = hw_fn_id; + ), + TP_printk("(%s) port_index=%u controller=%u hw_id=0x%x\n", + __get_str(devname), __entry->port_index, __entry->controller, + __entry->hw_fn_id) +); + +DEFINE_EVENT(mlx5_sf_state_template, mlx5_sf_activate, + TP_PROTO(const struct mlx5_core_dev *dev, + u32 port_index, + u32 controller, + u16 hw_fn_id), + TP_ARGS(dev, port_index, controller, hw_fn_id) + ); + +DEFINE_EVENT(mlx5_sf_state_template, mlx5_sf_deactivate, + TP_PROTO(const struct mlx5_core_dev *dev, + u32 port_index, + u32 controller, + u16 hw_fn_id), + TP_ARGS(dev, port_index, controller, hw_fn_id) + ); + +TRACE_EVENT(mlx5_sf_update_state, + TP_PROTO(const struct mlx5_core_dev *dev, + unsigned int port_index, + u32 controller, + u16 hw_fn_id, + u8 state), + TP_ARGS(dev, port_index, controller, hw_fn_id, state), + TP_STRUCT__entry(__string(devname, dev_name(dev->device)) + __field(unsigned int, port_index) + __field(u32, controller) + __field(u16, hw_fn_id) + __field(u8, state) + ), + TP_fast_assign(__assign_str(devname, dev_name(dev->device)); + __entry->port_index = port_index; + __entry->controller = controller; + __entry->hw_fn_id = hw_fn_id; + __entry->state = state; + ), + TP_printk("(%s) port_index=%u controller=%u hw_id=0x%x state=%u\n", + __get_str(devname), __entry->port_index, __entry->controller, + __entry->hw_fn_id, __entry->state) +); + +#endif /* _MLX5_SF_TP_ */ + +/* This part must be outside protection */ +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH sf/diag +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_FILE sf_tracepoint +#include <trace/define_trace.h> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/diag/vhca_tracepoint.h b/drivers/net/ethernet/mellanox/mlx5/core/sf/diag/vhca_tracepoint.h new file mode 100644 index 000000000..fd814a190 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/diag/vhca_tracepoint.h @@ -0,0 +1,40 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM mlx5 + +#if !defined(_MLX5_SF_VHCA_TP_) || defined(TRACE_HEADER_MULTI_READ) +#define _MLX5_SF_VHCA_TP_ + +#include <linux/tracepoint.h> +#include <linux/mlx5/driver.h> +#include "sf/vhca_event.h" + +TRACE_EVENT(mlx5_sf_vhca_event, + TP_PROTO(const struct mlx5_core_dev *dev, + const struct mlx5_vhca_state_event *event), + TP_ARGS(dev, event), + TP_STRUCT__entry(__string(devname, dev_name(dev->device)) + __field(u16, hw_fn_id) + __field(u32, sfnum) + __field(u8, vhca_state) + ), + TP_fast_assign(__assign_str(devname, dev_name(dev->device)); + __entry->hw_fn_id = event->function_id; + __entry->sfnum = event->sw_function_id; + __entry->vhca_state = event->new_vhca_state; + ), + TP_printk("(%s) hw_id=0x%x sfnum=%u vhca_state=%d\n", + __get_str(devname), __entry->hw_fn_id, + __entry->sfnum, __entry->vhca_state) +); + +#endif /* _MLX5_SF_VHCA_TP_ */ + +/* This part must be outside protection */ +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH sf/diag +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_FILE vhca_tracepoint +#include <trace/define_trace.h> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/hw_table.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/hw_table.c new file mode 100644 index 000000000..17aa34898 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/hw_table.c @@ -0,0 +1,364 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2020 Mellanox Technologies Ltd */ +#include <linux/mlx5/driver.h> +#include "vhca_event.h" +#include "priv.h" +#include "sf.h" +#include "mlx5_ifc_vhca_event.h" +#include "ecpf.h" +#include "mlx5_core.h" +#include "eswitch.h" +#include "diag/sf_tracepoint.h" + +struct mlx5_sf_hw { + u32 usr_sfnum; + u8 allocated: 1; + u8 pending_delete: 1; +}; + +struct mlx5_sf_hwc_table { + struct mlx5_sf_hw *sfs; + int max_fn; + u16 start_fn_id; +}; + +enum mlx5_sf_hwc_index { + MLX5_SF_HWC_LOCAL, + MLX5_SF_HWC_EXTERNAL, + MLX5_SF_HWC_MAX, +}; + +struct mlx5_sf_hw_table { + struct mlx5_core_dev *dev; + struct mutex table_lock; /* Serializes sf deletion and vhca state change handler. */ + struct notifier_block vhca_nb; + struct mlx5_sf_hwc_table hwc[MLX5_SF_HWC_MAX]; +}; + +static struct mlx5_sf_hwc_table * +mlx5_sf_controller_to_hwc(struct mlx5_core_dev *dev, u32 controller) +{ + int idx = !!controller; + + return &dev->priv.sf_hw_table->hwc[idx]; +} + +u16 mlx5_sf_sw_to_hw_id(struct mlx5_core_dev *dev, u32 controller, u16 sw_id) +{ + struct mlx5_sf_hwc_table *hwc; + + hwc = mlx5_sf_controller_to_hwc(dev, controller); + return hwc->start_fn_id + sw_id; +} + +static u16 mlx5_sf_hw_to_sw_id(struct mlx5_sf_hwc_table *hwc, u16 hw_id) +{ + return hw_id - hwc->start_fn_id; +} + +static struct mlx5_sf_hwc_table * +mlx5_sf_table_fn_to_hwc(struct mlx5_sf_hw_table *table, u16 fn_id) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(table->hwc); i++) { + if (table->hwc[i].max_fn && + fn_id >= table->hwc[i].start_fn_id && + fn_id < (table->hwc[i].start_fn_id + table->hwc[i].max_fn)) + return &table->hwc[i]; + } + return NULL; +} + +static int mlx5_sf_hw_table_id_alloc(struct mlx5_sf_hw_table *table, u32 controller, + u32 usr_sfnum) +{ + struct mlx5_sf_hwc_table *hwc; + int free_idx = -1; + int i; + + hwc = mlx5_sf_controller_to_hwc(table->dev, controller); + if (!hwc->sfs) + return -ENOSPC; + + for (i = 0; i < hwc->max_fn; i++) { + if (!hwc->sfs[i].allocated && free_idx == -1) { + free_idx = i; + continue; + } + + if (hwc->sfs[i].allocated && hwc->sfs[i].usr_sfnum == usr_sfnum) + return -EEXIST; + } + + if (free_idx == -1) + return -ENOSPC; + + hwc->sfs[free_idx].usr_sfnum = usr_sfnum; + hwc->sfs[free_idx].allocated = true; + return free_idx; +} + +static void mlx5_sf_hw_table_id_free(struct mlx5_sf_hw_table *table, u32 controller, int id) +{ + struct mlx5_sf_hwc_table *hwc; + + hwc = mlx5_sf_controller_to_hwc(table->dev, controller); + hwc->sfs[id].allocated = false; + hwc->sfs[id].pending_delete = false; +} + +int mlx5_sf_hw_table_sf_alloc(struct mlx5_core_dev *dev, u32 controller, u32 usr_sfnum) +{ + struct mlx5_sf_hw_table *table = dev->priv.sf_hw_table; + u16 hw_fn_id; + int sw_id; + int err; + + if (!table) + return -EOPNOTSUPP; + + mutex_lock(&table->table_lock); + sw_id = mlx5_sf_hw_table_id_alloc(table, controller, usr_sfnum); + if (sw_id < 0) { + err = sw_id; + goto exist_err; + } + + hw_fn_id = mlx5_sf_sw_to_hw_id(dev, controller, sw_id); + err = mlx5_cmd_alloc_sf(dev, hw_fn_id); + if (err) + goto err; + + err = mlx5_modify_vhca_sw_id(dev, hw_fn_id, usr_sfnum); + if (err) + goto vhca_err; + + if (controller) { + /* If this SF is for external controller, SF manager + * needs to arm firmware to receive the events. + */ + err = mlx5_vhca_event_arm(dev, hw_fn_id); + if (err) + goto vhca_err; + } + + trace_mlx5_sf_hwc_alloc(dev, controller, hw_fn_id, usr_sfnum); + mutex_unlock(&table->table_lock); + return sw_id; + +vhca_err: + mlx5_cmd_dealloc_sf(dev, hw_fn_id); +err: + mlx5_sf_hw_table_id_free(table, controller, sw_id); +exist_err: + mutex_unlock(&table->table_lock); + return err; +} + +void mlx5_sf_hw_table_sf_free(struct mlx5_core_dev *dev, u32 controller, u16 id) +{ + struct mlx5_sf_hw_table *table = dev->priv.sf_hw_table; + u16 hw_fn_id; + + mutex_lock(&table->table_lock); + hw_fn_id = mlx5_sf_sw_to_hw_id(dev, controller, id); + mlx5_cmd_dealloc_sf(dev, hw_fn_id); + mlx5_sf_hw_table_id_free(table, controller, id); + mutex_unlock(&table->table_lock); +} + +static void mlx5_sf_hw_table_hwc_sf_free(struct mlx5_core_dev *dev, + struct mlx5_sf_hwc_table *hwc, int idx) +{ + mlx5_cmd_dealloc_sf(dev, hwc->start_fn_id + idx); + hwc->sfs[idx].allocated = false; + hwc->sfs[idx].pending_delete = false; + trace_mlx5_sf_hwc_free(dev, hwc->start_fn_id + idx); +} + +void mlx5_sf_hw_table_sf_deferred_free(struct mlx5_core_dev *dev, u32 controller, u16 id) +{ + struct mlx5_sf_hw_table *table = dev->priv.sf_hw_table; + u32 out[MLX5_ST_SZ_DW(query_vhca_state_out)] = {}; + struct mlx5_sf_hwc_table *hwc; + u16 hw_fn_id; + u8 state; + int err; + + hw_fn_id = mlx5_sf_sw_to_hw_id(dev, controller, id); + hwc = mlx5_sf_controller_to_hwc(dev, controller); + mutex_lock(&table->table_lock); + err = mlx5_cmd_query_vhca_state(dev, hw_fn_id, out, sizeof(out)); + if (err) + goto err; + state = MLX5_GET(query_vhca_state_out, out, vhca_state_context.vhca_state); + if (state == MLX5_VHCA_STATE_ALLOCATED) { + mlx5_cmd_dealloc_sf(dev, hw_fn_id); + hwc->sfs[id].allocated = false; + } else { + hwc->sfs[id].pending_delete = true; + trace_mlx5_sf_hwc_deferred_free(dev, hw_fn_id); + } +err: + mutex_unlock(&table->table_lock); +} + +static void mlx5_sf_hw_table_hwc_dealloc_all(struct mlx5_core_dev *dev, + struct mlx5_sf_hwc_table *hwc) +{ + int i; + + for (i = 0; i < hwc->max_fn; i++) { + if (hwc->sfs[i].allocated) + mlx5_sf_hw_table_hwc_sf_free(dev, hwc, i); + } +} + +static void mlx5_sf_hw_table_dealloc_all(struct mlx5_sf_hw_table *table) +{ + mlx5_sf_hw_table_hwc_dealloc_all(table->dev, &table->hwc[MLX5_SF_HWC_EXTERNAL]); + mlx5_sf_hw_table_hwc_dealloc_all(table->dev, &table->hwc[MLX5_SF_HWC_LOCAL]); +} + +static int mlx5_sf_hw_table_hwc_init(struct mlx5_sf_hwc_table *hwc, u16 max_fn, u16 base_id) +{ + struct mlx5_sf_hw *sfs; + + if (!max_fn) + return 0; + + sfs = kcalloc(max_fn, sizeof(*sfs), GFP_KERNEL); + if (!sfs) + return -ENOMEM; + + hwc->sfs = sfs; + hwc->max_fn = max_fn; + hwc->start_fn_id = base_id; + return 0; +} + +static void mlx5_sf_hw_table_hwc_cleanup(struct mlx5_sf_hwc_table *hwc) +{ + kfree(hwc->sfs); +} + +int mlx5_sf_hw_table_init(struct mlx5_core_dev *dev) +{ + struct mlx5_sf_hw_table *table; + u16 max_ext_fn = 0; + u16 ext_base_id = 0; + u16 max_fn = 0; + u16 base_id; + int err; + + if (!mlx5_vhca_event_supported(dev)) + return 0; + + if (mlx5_sf_supported(dev)) + max_fn = mlx5_sf_max_functions(dev); + + err = mlx5_esw_sf_max_hpf_functions(dev, &max_ext_fn, &ext_base_id); + if (err) + return err; + + if (!max_fn && !max_ext_fn) + return 0; + + table = kzalloc(sizeof(*table), GFP_KERNEL); + if (!table) + return -ENOMEM; + + mutex_init(&table->table_lock); + table->dev = dev; + dev->priv.sf_hw_table = table; + + base_id = mlx5_sf_start_function_id(dev); + err = mlx5_sf_hw_table_hwc_init(&table->hwc[MLX5_SF_HWC_LOCAL], max_fn, base_id); + if (err) + goto table_err; + + err = mlx5_sf_hw_table_hwc_init(&table->hwc[MLX5_SF_HWC_EXTERNAL], + max_ext_fn, ext_base_id); + if (err) + goto ext_err; + + mlx5_core_dbg(dev, "SF HW table: max sfs = %d, ext sfs = %d\n", max_fn, max_ext_fn); + return 0; + +ext_err: + mlx5_sf_hw_table_hwc_cleanup(&table->hwc[MLX5_SF_HWC_LOCAL]); +table_err: + mutex_destroy(&table->table_lock); + kfree(table); + return err; +} + +void mlx5_sf_hw_table_cleanup(struct mlx5_core_dev *dev) +{ + struct mlx5_sf_hw_table *table = dev->priv.sf_hw_table; + + if (!table) + return; + + mutex_destroy(&table->table_lock); + mlx5_sf_hw_table_hwc_cleanup(&table->hwc[MLX5_SF_HWC_EXTERNAL]); + mlx5_sf_hw_table_hwc_cleanup(&table->hwc[MLX5_SF_HWC_LOCAL]); + kfree(table); +} + +static int mlx5_sf_hw_vhca_event(struct notifier_block *nb, unsigned long opcode, void *data) +{ + struct mlx5_sf_hw_table *table = container_of(nb, struct mlx5_sf_hw_table, vhca_nb); + const struct mlx5_vhca_state_event *event = data; + struct mlx5_sf_hwc_table *hwc; + struct mlx5_sf_hw *sf_hw; + u16 sw_id; + + if (event->new_vhca_state != MLX5_VHCA_STATE_ALLOCATED) + return 0; + + hwc = mlx5_sf_table_fn_to_hwc(table, event->function_id); + if (!hwc) + return 0; + + sw_id = mlx5_sf_hw_to_sw_id(hwc, event->function_id); + sf_hw = &hwc->sfs[sw_id]; + + mutex_lock(&table->table_lock); + /* SF driver notified through firmware that SF is finally detached. + * Hence recycle the sf hardware id for reuse. + */ + if (sf_hw->allocated && sf_hw->pending_delete) + mlx5_sf_hw_table_hwc_sf_free(table->dev, hwc, sw_id); + mutex_unlock(&table->table_lock); + return 0; +} + +int mlx5_sf_hw_table_create(struct mlx5_core_dev *dev) +{ + struct mlx5_sf_hw_table *table = dev->priv.sf_hw_table; + + if (!table) + return 0; + + table->vhca_nb.notifier_call = mlx5_sf_hw_vhca_event; + return mlx5_vhca_event_notifier_register(dev, &table->vhca_nb); +} + +void mlx5_sf_hw_table_destroy(struct mlx5_core_dev *dev) +{ + struct mlx5_sf_hw_table *table = dev->priv.sf_hw_table; + + if (!table) + return; + + mlx5_vhca_event_notifier_unregister(dev, &table->vhca_nb); + /* Dealloc SFs whose firmware event has been missed. */ + mlx5_sf_hw_table_dealloc_all(table); +} + +bool mlx5_sf_hw_table_supported(const struct mlx5_core_dev *dev) +{ + return !!dev->priv.sf_hw_table; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/mlx5_ifc_vhca_event.h b/drivers/net/ethernet/mellanox/mlx5/core/sf/mlx5_ifc_vhca_event.h new file mode 100644 index 000000000..4fc870140 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/mlx5_ifc_vhca_event.h @@ -0,0 +1,82 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2020 Mellanox Technologies Ltd */ + +#ifndef __MLX5_IFC_VHCA_EVENT_H__ +#define __MLX5_IFC_VHCA_EVENT_H__ + +enum mlx5_ifc_vhca_state { + MLX5_VHCA_STATE_INVALID = 0x0, + MLX5_VHCA_STATE_ALLOCATED = 0x1, + MLX5_VHCA_STATE_ACTIVE = 0x2, + MLX5_VHCA_STATE_IN_USE = 0x3, + MLX5_VHCA_STATE_TEARDOWN_REQUEST = 0x4, +}; + +struct mlx5_ifc_vhca_state_context_bits { + u8 arm_change_event[0x1]; + u8 reserved_at_1[0xb]; + u8 vhca_state[0x4]; + u8 reserved_at_10[0x10]; + + u8 sw_function_id[0x20]; + + u8 reserved_at_40[0x40]; +}; + +struct mlx5_ifc_query_vhca_state_out_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_at_40[0x40]; + + struct mlx5_ifc_vhca_state_context_bits vhca_state_context; +}; + +struct mlx5_ifc_query_vhca_state_in_bits { + u8 opcode[0x10]; + u8 uid[0x10]; + + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + + u8 embedded_cpu_function[0x1]; + u8 reserved_at_41[0xf]; + u8 function_id[0x10]; + + u8 reserved_at_60[0x20]; +}; + +struct mlx5_ifc_vhca_state_field_select_bits { + u8 reserved_at_0[0x1e]; + u8 sw_function_id[0x1]; + u8 arm_change_event[0x1]; +}; + +struct mlx5_ifc_modify_vhca_state_out_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_at_40[0x40]; +}; + +struct mlx5_ifc_modify_vhca_state_in_bits { + u8 opcode[0x10]; + u8 uid[0x10]; + + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + + u8 embedded_cpu_function[0x1]; + u8 reserved_at_41[0xf]; + u8 function_id[0x10]; + + struct mlx5_ifc_vhca_state_field_select_bits vhca_state_field_select; + + struct mlx5_ifc_vhca_state_context_bits vhca_state_context; +}; + +#endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/priv.h b/drivers/net/ethernet/mellanox/mlx5/core/sf/priv.h new file mode 100644 index 000000000..7114f3fc3 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/priv.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2020 Mellanox Technologies Ltd */ + +#ifndef __MLX5_SF_PRIV_H__ +#define __MLX5_SF_PRIV_H__ + +#include <linux/mlx5/driver.h> + +int mlx5_cmd_alloc_sf(struct mlx5_core_dev *dev, u16 function_id); +int mlx5_cmd_dealloc_sf(struct mlx5_core_dev *dev, u16 function_id); + +int mlx5_cmd_sf_enable_hca(struct mlx5_core_dev *dev, u16 func_id); +int mlx5_cmd_sf_disable_hca(struct mlx5_core_dev *dev, u16 func_id); + +u16 mlx5_sf_sw_to_hw_id(struct mlx5_core_dev *dev, u32 controller, u16 sw_id); + +int mlx5_sf_hw_table_sf_alloc(struct mlx5_core_dev *dev, u32 controller, u32 usr_sfnum); +void mlx5_sf_hw_table_sf_free(struct mlx5_core_dev *dev, u32 controller, u16 id); +void mlx5_sf_hw_table_sf_deferred_free(struct mlx5_core_dev *dev, u32 controller, u16 id); +bool mlx5_sf_hw_table_supported(const struct mlx5_core_dev *dev); + +#endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/sf.h b/drivers/net/ethernet/mellanox/mlx5/core/sf/sf.h new file mode 100644 index 000000000..3a480e06e --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/sf.h @@ -0,0 +1,65 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2020 Mellanox Technologies Ltd */ + +#ifndef __MLX5_SF_H__ +#define __MLX5_SF_H__ + +#include <linux/mlx5/driver.h> +#include "lib/sf.h" + +#ifdef CONFIG_MLX5_SF_MANAGER + +int mlx5_sf_hw_table_init(struct mlx5_core_dev *dev); +void mlx5_sf_hw_table_cleanup(struct mlx5_core_dev *dev); + +int mlx5_sf_hw_table_create(struct mlx5_core_dev *dev); +void mlx5_sf_hw_table_destroy(struct mlx5_core_dev *dev); + +int mlx5_sf_table_init(struct mlx5_core_dev *dev); +void mlx5_sf_table_cleanup(struct mlx5_core_dev *dev); + +int mlx5_devlink_sf_port_new(struct devlink *devlink, + const struct devlink_port_new_attrs *add_attr, + struct netlink_ext_ack *extack, + unsigned int *new_port_index); +int mlx5_devlink_sf_port_del(struct devlink *devlink, unsigned int port_index, + struct netlink_ext_ack *extack); +int mlx5_devlink_sf_port_fn_state_get(struct devlink_port *dl_port, + enum devlink_port_fn_state *state, + enum devlink_port_fn_opstate *opstate, + struct netlink_ext_ack *extack); +int mlx5_devlink_sf_port_fn_state_set(struct devlink_port *dl_port, + enum devlink_port_fn_state state, + struct netlink_ext_ack *extack); +#else + +static inline int mlx5_sf_hw_table_init(struct mlx5_core_dev *dev) +{ + return 0; +} + +static inline void mlx5_sf_hw_table_cleanup(struct mlx5_core_dev *dev) +{ +} + +static inline int mlx5_sf_hw_table_create(struct mlx5_core_dev *dev) +{ + return 0; +} + +static inline void mlx5_sf_hw_table_destroy(struct mlx5_core_dev *dev) +{ +} + +static inline int mlx5_sf_table_init(struct mlx5_core_dev *dev) +{ + return 0; +} + +static inline void mlx5_sf_table_cleanup(struct mlx5_core_dev *dev) +{ +} + +#endif + +#endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/vhca_event.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/vhca_event.c new file mode 100644 index 000000000..d908fba96 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/vhca_event.c @@ -0,0 +1,191 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2020 Mellanox Technologies Ltd */ + +#include <linux/mlx5/driver.h> +#include "mlx5_ifc_vhca_event.h" +#include "mlx5_core.h" +#include "vhca_event.h" +#include "ecpf.h" +#define CREATE_TRACE_POINTS +#include "diag/vhca_tracepoint.h" + +struct mlx5_vhca_state_notifier { + struct mlx5_core_dev *dev; + struct mlx5_nb nb; + struct blocking_notifier_head n_head; +}; + +struct mlx5_vhca_event_work { + struct work_struct work; + struct mlx5_vhca_state_notifier *notifier; + struct mlx5_vhca_state_event event; +}; + +int mlx5_cmd_query_vhca_state(struct mlx5_core_dev *dev, u16 function_id, u32 *out, u32 outlen) +{ + u32 in[MLX5_ST_SZ_DW(query_vhca_state_in)] = {}; + + MLX5_SET(query_vhca_state_in, in, opcode, MLX5_CMD_OP_QUERY_VHCA_STATE); + MLX5_SET(query_vhca_state_in, in, function_id, function_id); + MLX5_SET(query_vhca_state_in, in, embedded_cpu_function, 0); + + return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen); +} + +static int mlx5_cmd_modify_vhca_state(struct mlx5_core_dev *dev, u16 function_id, + u32 *in, u32 inlen) +{ + u32 out[MLX5_ST_SZ_DW(modify_vhca_state_out)] = {}; + + MLX5_SET(modify_vhca_state_in, in, opcode, MLX5_CMD_OP_MODIFY_VHCA_STATE); + MLX5_SET(modify_vhca_state_in, in, function_id, function_id); + MLX5_SET(modify_vhca_state_in, in, embedded_cpu_function, 0); + + return mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); +} + +int mlx5_modify_vhca_sw_id(struct mlx5_core_dev *dev, u16 function_id, u32 sw_fn_id) +{ + u32 out[MLX5_ST_SZ_DW(modify_vhca_state_out)] = {}; + u32 in[MLX5_ST_SZ_DW(modify_vhca_state_in)] = {}; + + MLX5_SET(modify_vhca_state_in, in, opcode, MLX5_CMD_OP_MODIFY_VHCA_STATE); + MLX5_SET(modify_vhca_state_in, in, function_id, function_id); + MLX5_SET(modify_vhca_state_in, in, embedded_cpu_function, 0); + MLX5_SET(modify_vhca_state_in, in, vhca_state_field_select.sw_function_id, 1); + MLX5_SET(modify_vhca_state_in, in, vhca_state_context.sw_function_id, sw_fn_id); + + return mlx5_cmd_exec_inout(dev, modify_vhca_state, in, out); +} + +int mlx5_vhca_event_arm(struct mlx5_core_dev *dev, u16 function_id) +{ + u32 in[MLX5_ST_SZ_DW(modify_vhca_state_in)] = {}; + + MLX5_SET(modify_vhca_state_in, in, vhca_state_context.arm_change_event, 1); + MLX5_SET(modify_vhca_state_in, in, vhca_state_field_select.arm_change_event, 1); + + return mlx5_cmd_modify_vhca_state(dev, function_id, in, sizeof(in)); +} + +static void +mlx5_vhca_event_notify(struct mlx5_core_dev *dev, struct mlx5_vhca_state_event *event) +{ + u32 out[MLX5_ST_SZ_DW(query_vhca_state_out)] = {}; + int err; + + err = mlx5_cmd_query_vhca_state(dev, event->function_id, out, sizeof(out)); + if (err) + return; + + event->sw_function_id = MLX5_GET(query_vhca_state_out, out, + vhca_state_context.sw_function_id); + event->new_vhca_state = MLX5_GET(query_vhca_state_out, out, + vhca_state_context.vhca_state); + + mlx5_vhca_event_arm(dev, event->function_id); + trace_mlx5_sf_vhca_event(dev, event); + + blocking_notifier_call_chain(&dev->priv.vhca_state_notifier->n_head, 0, event); +} + +static void mlx5_vhca_state_work_handler(struct work_struct *_work) +{ + struct mlx5_vhca_event_work *work = container_of(_work, struct mlx5_vhca_event_work, work); + struct mlx5_vhca_state_notifier *notifier = work->notifier; + struct mlx5_core_dev *dev = notifier->dev; + + mlx5_vhca_event_notify(dev, &work->event); + kfree(work); +} + +static int +mlx5_vhca_state_change_notifier(struct notifier_block *nb, unsigned long type, void *data) +{ + struct mlx5_vhca_state_notifier *notifier = + mlx5_nb_cof(nb, struct mlx5_vhca_state_notifier, nb); + struct mlx5_vhca_event_work *work; + struct mlx5_eqe *eqe = data; + + work = kzalloc(sizeof(*work), GFP_ATOMIC); + if (!work) + return NOTIFY_DONE; + INIT_WORK(&work->work, &mlx5_vhca_state_work_handler); + work->notifier = notifier; + work->event.function_id = be16_to_cpu(eqe->data.vhca_state.function_id); + mlx5_events_work_enqueue(notifier->dev, &work->work); + return NOTIFY_OK; +} + +void mlx5_vhca_state_cap_handle(struct mlx5_core_dev *dev, void *set_hca_cap) +{ + if (!mlx5_vhca_event_supported(dev)) + return; + + MLX5_SET(cmd_hca_cap, set_hca_cap, vhca_state, 1); + MLX5_SET(cmd_hca_cap, set_hca_cap, event_on_vhca_state_allocated, 1); + MLX5_SET(cmd_hca_cap, set_hca_cap, event_on_vhca_state_active, 1); + MLX5_SET(cmd_hca_cap, set_hca_cap, event_on_vhca_state_in_use, 1); + MLX5_SET(cmd_hca_cap, set_hca_cap, event_on_vhca_state_teardown_request, 1); +} + +int mlx5_vhca_event_init(struct mlx5_core_dev *dev) +{ + struct mlx5_vhca_state_notifier *notifier; + + if (!mlx5_vhca_event_supported(dev)) + return 0; + + notifier = kzalloc(sizeof(*notifier), GFP_KERNEL); + if (!notifier) + return -ENOMEM; + + dev->priv.vhca_state_notifier = notifier; + notifier->dev = dev; + BLOCKING_INIT_NOTIFIER_HEAD(¬ifier->n_head); + MLX5_NB_INIT(¬ifier->nb, mlx5_vhca_state_change_notifier, VHCA_STATE_CHANGE); + return 0; +} + +void mlx5_vhca_event_cleanup(struct mlx5_core_dev *dev) +{ + if (!mlx5_vhca_event_supported(dev)) + return; + + kfree(dev->priv.vhca_state_notifier); + dev->priv.vhca_state_notifier = NULL; +} + +void mlx5_vhca_event_start(struct mlx5_core_dev *dev) +{ + struct mlx5_vhca_state_notifier *notifier; + + if (!dev->priv.vhca_state_notifier) + return; + + notifier = dev->priv.vhca_state_notifier; + mlx5_eq_notifier_register(dev, ¬ifier->nb); +} + +void mlx5_vhca_event_stop(struct mlx5_core_dev *dev) +{ + struct mlx5_vhca_state_notifier *notifier; + + if (!dev->priv.vhca_state_notifier) + return; + + notifier = dev->priv.vhca_state_notifier; + mlx5_eq_notifier_unregister(dev, ¬ifier->nb); +} + +int mlx5_vhca_event_notifier_register(struct mlx5_core_dev *dev, struct notifier_block *nb) +{ + if (!dev->priv.vhca_state_notifier) + return -EOPNOTSUPP; + return blocking_notifier_chain_register(&dev->priv.vhca_state_notifier->n_head, nb); +} + +void mlx5_vhca_event_notifier_unregister(struct mlx5_core_dev *dev, struct notifier_block *nb) +{ + blocking_notifier_chain_unregister(&dev->priv.vhca_state_notifier->n_head, nb); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/vhca_event.h b/drivers/net/ethernet/mellanox/mlx5/core/sf/vhca_event.h new file mode 100644 index 000000000..013cdfe90 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/vhca_event.h @@ -0,0 +1,56 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2020 Mellanox Technologies Ltd */ + +#ifndef __MLX5_VHCA_EVENT_H__ +#define __MLX5_VHCA_EVENT_H__ + +#ifdef CONFIG_MLX5_SF + +struct mlx5_vhca_state_event { + u16 function_id; + u16 sw_function_id; + u8 new_vhca_state; +}; + +static inline bool mlx5_vhca_event_supported(const struct mlx5_core_dev *dev) +{ + return MLX5_CAP_GEN_MAX(dev, vhca_state); +} + +void mlx5_vhca_state_cap_handle(struct mlx5_core_dev *dev, void *set_hca_cap); +int mlx5_vhca_event_init(struct mlx5_core_dev *dev); +void mlx5_vhca_event_cleanup(struct mlx5_core_dev *dev); +void mlx5_vhca_event_start(struct mlx5_core_dev *dev); +void mlx5_vhca_event_stop(struct mlx5_core_dev *dev); +int mlx5_vhca_event_notifier_register(struct mlx5_core_dev *dev, struct notifier_block *nb); +void mlx5_vhca_event_notifier_unregister(struct mlx5_core_dev *dev, struct notifier_block *nb); +int mlx5_modify_vhca_sw_id(struct mlx5_core_dev *dev, u16 function_id, u32 sw_fn_id); +int mlx5_vhca_event_arm(struct mlx5_core_dev *dev, u16 function_id); +int mlx5_cmd_query_vhca_state(struct mlx5_core_dev *dev, u16 function_id, + u32 *out, u32 outlen); +#else + +static inline void mlx5_vhca_state_cap_handle(struct mlx5_core_dev *dev, void *set_hca_cap) +{ +} + +static inline int mlx5_vhca_event_init(struct mlx5_core_dev *dev) +{ + return 0; +} + +static inline void mlx5_vhca_event_cleanup(struct mlx5_core_dev *dev) +{ +} + +static inline void mlx5_vhca_event_start(struct mlx5_core_dev *dev) +{ +} + +static inline void mlx5_vhca_event_stop(struct mlx5_core_dev *dev) +{ +} + +#endif + +#endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c new file mode 100644 index 000000000..5f2195e65 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c @@ -0,0 +1,354 @@ +/* + * Copyright (c) 2014, Mellanox Technologies inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/pci.h> +#include <linux/mlx5/driver.h> +#include <linux/mlx5/vport.h> +#include "mlx5_core.h" +#include "mlx5_irq.h" +#include "eswitch.h" + +static int sriov_restore_guids(struct mlx5_core_dev *dev, int vf) +{ + struct mlx5_core_sriov *sriov = &dev->priv.sriov; + struct mlx5_hca_vport_context *in; + int err = 0; + + /* Restore sriov guid and policy settings */ + if (sriov->vfs_ctx[vf].node_guid || + sriov->vfs_ctx[vf].port_guid || + sriov->vfs_ctx[vf].policy != MLX5_POLICY_INVALID) { + in = kzalloc(sizeof(*in), GFP_KERNEL); + if (!in) + return -ENOMEM; + + in->node_guid = sriov->vfs_ctx[vf].node_guid; + in->port_guid = sriov->vfs_ctx[vf].port_guid; + in->policy = sriov->vfs_ctx[vf].policy; + in->field_select = + !!(in->port_guid) * MLX5_HCA_VPORT_SEL_PORT_GUID | + !!(in->node_guid) * MLX5_HCA_VPORT_SEL_NODE_GUID | + !!(in->policy) * MLX5_HCA_VPORT_SEL_STATE_POLICY; + + err = mlx5_core_modify_hca_vport_context(dev, 1, 1, vf + 1, in); + if (err) + mlx5_core_warn(dev, "modify vport context failed, unable to restore VF %d settings\n", vf); + + kfree(in); + } + + return err; +} + +static int mlx5_device_enable_sriov(struct mlx5_core_dev *dev, int num_vfs) +{ + struct mlx5_core_sriov *sriov = &dev->priv.sriov; + int err, vf, num_msix_count; + + if (!MLX5_ESWITCH_MANAGER(dev)) + goto enable_vfs_hca; + + err = mlx5_eswitch_enable(dev->priv.eswitch, num_vfs); + if (err) { + mlx5_core_warn(dev, + "failed to enable eswitch SRIOV (%d)\n", err); + return err; + } + +enable_vfs_hca: + num_msix_count = mlx5_get_default_msix_vec_count(dev, num_vfs); + for (vf = 0; vf < num_vfs; vf++) { + /* Notify the VF before its enablement to let it set + * some stuff. + */ + blocking_notifier_call_chain(&sriov->vfs_ctx[vf].notifier, + MLX5_PF_NOTIFY_ENABLE_VF, dev); + err = mlx5_core_enable_hca(dev, vf + 1); + if (err) { + mlx5_core_warn(dev, "failed to enable VF %d (%d)\n", vf, err); + continue; + } + + err = mlx5_set_msix_vec_count(dev, vf + 1, num_msix_count); + if (err) { + mlx5_core_warn(dev, + "failed to set MSI-X vector counts VF %d, err %d\n", + vf, err); + continue; + } + + sriov->vfs_ctx[vf].enabled = 1; + if (MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_IB) { + err = sriov_restore_guids(dev, vf); + if (err) { + mlx5_core_warn(dev, + "failed to restore VF %d settings, err %d\n", + vf, err); + continue; + } + } + mlx5_core_dbg(dev, "successfully enabled VF* %d\n", vf); + } + + return 0; +} + +static void +mlx5_device_disable_sriov(struct mlx5_core_dev *dev, int num_vfs, bool clear_vf) +{ + struct mlx5_core_sriov *sriov = &dev->priv.sriov; + int err; + int vf; + + for (vf = num_vfs - 1; vf >= 0; vf--) { + if (!sriov->vfs_ctx[vf].enabled) + continue; + /* Notify the VF before its disablement to let it clean + * some resources. + */ + blocking_notifier_call_chain(&sriov->vfs_ctx[vf].notifier, + MLX5_PF_NOTIFY_DISABLE_VF, dev); + err = mlx5_core_disable_hca(dev, vf + 1); + if (err) { + mlx5_core_warn(dev, "failed to disable VF %d\n", vf); + continue; + } + sriov->vfs_ctx[vf].enabled = 0; + } + + mlx5_eswitch_disable_sriov(dev->priv.eswitch, clear_vf); + + /* For ECPFs, skip waiting for host VF pages until ECPF is destroyed */ + if (mlx5_core_is_ecpf(dev)) + return; + + if (mlx5_wait_for_pages(dev, &dev->priv.page_counters[MLX5_VF])) + mlx5_core_warn(dev, "timeout reclaiming VFs pages\n"); +} + +static int mlx5_sriov_enable(struct pci_dev *pdev, int num_vfs) +{ + struct mlx5_core_dev *dev = pci_get_drvdata(pdev); + struct devlink *devlink = priv_to_devlink(dev); + int err; + + devl_lock(devlink); + err = mlx5_device_enable_sriov(dev, num_vfs); + devl_unlock(devlink); + if (err) { + mlx5_core_warn(dev, "mlx5_device_enable_sriov failed : %d\n", err); + return err; + } + + err = pci_enable_sriov(pdev, num_vfs); + if (err) { + mlx5_core_warn(dev, "pci_enable_sriov failed : %d\n", err); + mlx5_device_disable_sriov(dev, num_vfs, true); + } + return err; +} + +void mlx5_sriov_disable(struct pci_dev *pdev) +{ + struct mlx5_core_dev *dev = pci_get_drvdata(pdev); + struct devlink *devlink = priv_to_devlink(dev); + int num_vfs = pci_num_vf(dev->pdev); + + pci_disable_sriov(pdev); + devl_lock(devlink); + mlx5_device_disable_sriov(dev, num_vfs, true); + devl_unlock(devlink); +} + +int mlx5_core_sriov_configure(struct pci_dev *pdev, int num_vfs) +{ + struct mlx5_core_dev *dev = pci_get_drvdata(pdev); + struct mlx5_core_sriov *sriov = &dev->priv.sriov; + int err = 0; + + mlx5_core_dbg(dev, "requested num_vfs %d\n", num_vfs); + + if (num_vfs) + err = mlx5_sriov_enable(pdev, num_vfs); + else + mlx5_sriov_disable(pdev); + + if (!err) + sriov->num_vfs = num_vfs; + return err ? err : num_vfs; +} + +int mlx5_core_sriov_set_msix_vec_count(struct pci_dev *vf, int msix_vec_count) +{ + struct pci_dev *pf = pci_physfn(vf); + struct mlx5_core_sriov *sriov; + struct mlx5_core_dev *dev; + int num_vf_msix, id; + + dev = pci_get_drvdata(pf); + num_vf_msix = MLX5_CAP_GEN_MAX(dev, num_total_dynamic_vf_msix); + if (!num_vf_msix) + return -EOPNOTSUPP; + + if (!msix_vec_count) + msix_vec_count = + mlx5_get_default_msix_vec_count(dev, pci_num_vf(pf)); + + sriov = &dev->priv.sriov; + id = pci_iov_vf_id(vf); + if (id < 0 || !sriov->vfs_ctx[id].enabled) + return -EINVAL; + + return mlx5_set_msix_vec_count(dev, id + 1, msix_vec_count); +} + +int mlx5_sriov_attach(struct mlx5_core_dev *dev) +{ + if (!mlx5_core_is_pf(dev) || !pci_num_vf(dev->pdev)) + return 0; + + /* If sriov VFs exist in PCI level, enable them in device level */ + return mlx5_device_enable_sriov(dev, pci_num_vf(dev->pdev)); +} + +void mlx5_sriov_detach(struct mlx5_core_dev *dev) +{ + if (!mlx5_core_is_pf(dev)) + return; + + mlx5_device_disable_sriov(dev, pci_num_vf(dev->pdev), false); +} + +static u16 mlx5_get_max_vfs(struct mlx5_core_dev *dev) +{ + u16 host_total_vfs; + const u32 *out; + + if (mlx5_core_is_ecpf_esw_manager(dev)) { + out = mlx5_esw_query_functions(dev); + + /* Old FW doesn't support getting total_vfs from esw func + * but supports getting it from pci_sriov. + */ + if (IS_ERR(out)) + goto done; + host_total_vfs = MLX5_GET(query_esw_functions_out, out, + host_params_context.host_total_vfs); + kvfree(out); + return host_total_vfs; + } + +done: + return pci_sriov_get_totalvfs(dev->pdev); +} + +int mlx5_sriov_init(struct mlx5_core_dev *dev) +{ + struct mlx5_core_sriov *sriov = &dev->priv.sriov; + struct pci_dev *pdev = dev->pdev; + int total_vfs, i; + + if (!mlx5_core_is_pf(dev)) + return 0; + + total_vfs = pci_sriov_get_totalvfs(pdev); + sriov->max_vfs = mlx5_get_max_vfs(dev); + sriov->num_vfs = pci_num_vf(pdev); + sriov->vfs_ctx = kcalloc(total_vfs, sizeof(*sriov->vfs_ctx), GFP_KERNEL); + if (!sriov->vfs_ctx) + return -ENOMEM; + + for (i = 0; i < total_vfs; i++) + BLOCKING_INIT_NOTIFIER_HEAD(&sriov->vfs_ctx[i].notifier); + + return 0; +} + +void mlx5_sriov_cleanup(struct mlx5_core_dev *dev) +{ + struct mlx5_core_sriov *sriov = &dev->priv.sriov; + + if (!mlx5_core_is_pf(dev)) + return; + + kfree(sriov->vfs_ctx); +} + +/** + * mlx5_sriov_blocking_notifier_unregister - Unregister a VF from + * a notification block chain. + * + * @mdev: The mlx5 core device. + * @vf_id: The VF id. + * @nb: The notifier block to be unregistered. + */ +void mlx5_sriov_blocking_notifier_unregister(struct mlx5_core_dev *mdev, + int vf_id, + struct notifier_block *nb) +{ + struct mlx5_vf_context *vfs_ctx; + struct mlx5_core_sriov *sriov; + + sriov = &mdev->priv.sriov; + if (WARN_ON(vf_id < 0 || vf_id >= sriov->num_vfs)) + return; + + vfs_ctx = &sriov->vfs_ctx[vf_id]; + blocking_notifier_chain_unregister(&vfs_ctx->notifier, nb); +} +EXPORT_SYMBOL(mlx5_sriov_blocking_notifier_unregister); + +/** + * mlx5_sriov_blocking_notifier_register - Register a VF notification + * block chain. + * + * @mdev: The mlx5 core device. + * @vf_id: The VF id. + * @nb: The notifier block to be called upon the VF events. + * + * Returns 0 on success or an error code. + */ +int mlx5_sriov_blocking_notifier_register(struct mlx5_core_dev *mdev, + int vf_id, + struct notifier_block *nb) +{ + struct mlx5_vf_context *vfs_ctx; + struct mlx5_core_sriov *sriov; + + sriov = &mdev->priv.sriov; + if (vf_id < 0 || vf_id >= sriov->num_vfs) + return -EINVAL; + + vfs_ctx = &sriov->vfs_ctx[vf_id]; + return blocking_notifier_chain_register(&vfs_ctx->notifier, nb); +} +EXPORT_SYMBOL(mlx5_sriov_blocking_notifier_register); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/steering/Makefile new file mode 100644 index 000000000..c78512eed --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/Makefile @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only +subdir-ccflags-y += -I$(src)/.. diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c new file mode 100644 index 000000000..bf7517725 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c @@ -0,0 +1,2003 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2019 Mellanox Technologies. */ + +#include "dr_types.h" +#include "dr_ste.h" + +enum dr_action_domain { + DR_ACTION_DOMAIN_NIC_INGRESS, + DR_ACTION_DOMAIN_NIC_EGRESS, + DR_ACTION_DOMAIN_FDB_INGRESS, + DR_ACTION_DOMAIN_FDB_EGRESS, + DR_ACTION_DOMAIN_MAX, +}; + +enum dr_action_valid_state { + DR_ACTION_STATE_ERR, + DR_ACTION_STATE_NO_ACTION, + DR_ACTION_STATE_ENCAP, + DR_ACTION_STATE_DECAP, + DR_ACTION_STATE_MODIFY_HDR, + DR_ACTION_STATE_POP_VLAN, + DR_ACTION_STATE_PUSH_VLAN, + DR_ACTION_STATE_NON_TERM, + DR_ACTION_STATE_TERM, + DR_ACTION_STATE_ASO, + DR_ACTION_STATE_MAX, +}; + +static const char * const action_type_to_str[] = { + [DR_ACTION_TYP_TNL_L2_TO_L2] = "DR_ACTION_TYP_TNL_L2_TO_L2", + [DR_ACTION_TYP_L2_TO_TNL_L2] = "DR_ACTION_TYP_L2_TO_TNL_L2", + [DR_ACTION_TYP_TNL_L3_TO_L2] = "DR_ACTION_TYP_TNL_L3_TO_L2", + [DR_ACTION_TYP_L2_TO_TNL_L3] = "DR_ACTION_TYP_L2_TO_TNL_L3", + [DR_ACTION_TYP_DROP] = "DR_ACTION_TYP_DROP", + [DR_ACTION_TYP_QP] = "DR_ACTION_TYP_QP", + [DR_ACTION_TYP_FT] = "DR_ACTION_TYP_FT", + [DR_ACTION_TYP_CTR] = "DR_ACTION_TYP_CTR", + [DR_ACTION_TYP_TAG] = "DR_ACTION_TYP_TAG", + [DR_ACTION_TYP_MODIFY_HDR] = "DR_ACTION_TYP_MODIFY_HDR", + [DR_ACTION_TYP_VPORT] = "DR_ACTION_TYP_VPORT", + [DR_ACTION_TYP_POP_VLAN] = "DR_ACTION_TYP_POP_VLAN", + [DR_ACTION_TYP_PUSH_VLAN] = "DR_ACTION_TYP_PUSH_VLAN", + [DR_ACTION_TYP_SAMPLER] = "DR_ACTION_TYP_SAMPLER", + [DR_ACTION_TYP_INSERT_HDR] = "DR_ACTION_TYP_INSERT_HDR", + [DR_ACTION_TYP_REMOVE_HDR] = "DR_ACTION_TYP_REMOVE_HDR", + [DR_ACTION_TYP_ASO_FLOW_METER] = "DR_ACTION_TYP_ASO_FLOW_METER", + [DR_ACTION_TYP_MAX] = "DR_ACTION_UNKNOWN", +}; + +static const char *dr_action_id_to_str(enum mlx5dr_action_type action_id) +{ + if (action_id > DR_ACTION_TYP_MAX) + action_id = DR_ACTION_TYP_MAX; + return action_type_to_str[action_id]; +} + +static const enum dr_action_valid_state +next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX] = { + [DR_ACTION_DOMAIN_NIC_INGRESS] = { + [DR_ACTION_STATE_NO_ACTION] = { + [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_QP] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_TAG] = DR_ACTION_STATE_NON_TERM, + [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_NON_TERM, + [DR_ACTION_TYP_TNL_L2_TO_L2] = DR_ACTION_STATE_DECAP, + [DR_ACTION_TYP_TNL_L3_TO_L2] = DR_ACTION_STATE_DECAP, + [DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_INSERT_HDR] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_REMOVE_HDR] = DR_ACTION_STATE_DECAP, + [DR_ACTION_TYP_MODIFY_HDR] = DR_ACTION_STATE_MODIFY_HDR, + [DR_ACTION_TYP_POP_VLAN] = DR_ACTION_STATE_POP_VLAN, + [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_PUSH_VLAN, + [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO, + }, + [DR_ACTION_STATE_DECAP] = { + [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_QP] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_TAG] = DR_ACTION_STATE_DECAP, + [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_DECAP, + [DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_INSERT_HDR] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_MODIFY_HDR] = DR_ACTION_STATE_MODIFY_HDR, + [DR_ACTION_TYP_POP_VLAN] = DR_ACTION_STATE_POP_VLAN, + [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_PUSH_VLAN, + [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO, + }, + [DR_ACTION_STATE_ENCAP] = { + [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_QP] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_TAG] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO, + }, + [DR_ACTION_STATE_MODIFY_HDR] = { + [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_QP] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_TAG] = DR_ACTION_STATE_MODIFY_HDR, + [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_MODIFY_HDR, + [DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_INSERT_HDR] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_PUSH_VLAN, + [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO, + }, + [DR_ACTION_STATE_POP_VLAN] = { + [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_QP] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_TAG] = DR_ACTION_STATE_POP_VLAN, + [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_POP_VLAN, + [DR_ACTION_TYP_POP_VLAN] = DR_ACTION_STATE_POP_VLAN, + [DR_ACTION_TYP_MODIFY_HDR] = DR_ACTION_STATE_MODIFY_HDR, + [DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_INSERT_HDR] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO, + }, + [DR_ACTION_STATE_PUSH_VLAN] = { + [DR_ACTION_TYP_QP] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_TAG] = DR_ACTION_STATE_PUSH_VLAN, + [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_PUSH_VLAN, + [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_PUSH_VLAN, + [DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO, + }, + [DR_ACTION_STATE_NON_TERM] = { + [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_QP] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_TAG] = DR_ACTION_STATE_NON_TERM, + [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_NON_TERM, + [DR_ACTION_TYP_TNL_L2_TO_L2] = DR_ACTION_STATE_DECAP, + [DR_ACTION_TYP_TNL_L3_TO_L2] = DR_ACTION_STATE_DECAP, + [DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_INSERT_HDR] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_REMOVE_HDR] = DR_ACTION_STATE_DECAP, + [DR_ACTION_TYP_MODIFY_HDR] = DR_ACTION_STATE_MODIFY_HDR, + [DR_ACTION_TYP_POP_VLAN] = DR_ACTION_STATE_POP_VLAN, + [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_PUSH_VLAN, + [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO, + }, + [DR_ACTION_STATE_ASO] = { + [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_QP] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_ASO, + }, + [DR_ACTION_STATE_TERM] = { + [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_TERM, + }, + }, + [DR_ACTION_DOMAIN_NIC_EGRESS] = { + [DR_ACTION_STATE_NO_ACTION] = { + [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_NON_TERM, + [DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_INSERT_HDR] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_REMOVE_HDR] = DR_ACTION_STATE_DECAP, + [DR_ACTION_TYP_MODIFY_HDR] = DR_ACTION_STATE_MODIFY_HDR, + [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_PUSH_VLAN, + [DR_ACTION_TYP_POP_VLAN] = DR_ACTION_STATE_POP_VLAN, + [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO, + }, + [DR_ACTION_STATE_DECAP] = { + [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_DECAP, + [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO, + }, + [DR_ACTION_STATE_ENCAP] = { + [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO, + }, + [DR_ACTION_STATE_MODIFY_HDR] = { + [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_MODIFY_HDR, + [DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_INSERT_HDR] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_PUSH_VLAN, + [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO, + }, + [DR_ACTION_STATE_POP_VLAN] = { + [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_POP_VLAN, + [DR_ACTION_TYP_POP_VLAN] = DR_ACTION_STATE_POP_VLAN, + [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_PUSH_VLAN, + [DR_ACTION_TYP_MODIFY_HDR] = DR_ACTION_STATE_MODIFY_HDR, + [DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_INSERT_HDR] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO, + }, + [DR_ACTION_STATE_PUSH_VLAN] = { + [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_PUSH_VLAN, + [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_PUSH_VLAN, + [DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_INSERT_HDR] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO, + }, + [DR_ACTION_STATE_NON_TERM] = { + [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_NON_TERM, + [DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_INSERT_HDR] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_REMOVE_HDR] = DR_ACTION_STATE_DECAP, + [DR_ACTION_TYP_MODIFY_HDR] = DR_ACTION_STATE_MODIFY_HDR, + [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_PUSH_VLAN, + [DR_ACTION_TYP_POP_VLAN] = DR_ACTION_STATE_POP_VLAN, + [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO, + }, + [DR_ACTION_STATE_ASO] = { + [DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_MODIFY_HDR] = DR_ACTION_STATE_MODIFY_HDR, + [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_PUSH_VLAN, + [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_ASO, + [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + }, + [DR_ACTION_STATE_TERM] = { + [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_TERM, + }, + }, + [DR_ACTION_DOMAIN_FDB_INGRESS] = { + [DR_ACTION_STATE_NO_ACTION] = { + [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_NON_TERM, + [DR_ACTION_TYP_TNL_L2_TO_L2] = DR_ACTION_STATE_DECAP, + [DR_ACTION_TYP_TNL_L3_TO_L2] = DR_ACTION_STATE_DECAP, + [DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_INSERT_HDR] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_REMOVE_HDR] = DR_ACTION_STATE_DECAP, + [DR_ACTION_TYP_MODIFY_HDR] = DR_ACTION_STATE_MODIFY_HDR, + [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_PUSH_VLAN, + [DR_ACTION_TYP_POP_VLAN] = DR_ACTION_STATE_POP_VLAN, + [DR_ACTION_TYP_VPORT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO, + }, + [DR_ACTION_STATE_DECAP] = { + [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_DECAP, + [DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_MODIFY_HDR] = DR_ACTION_STATE_MODIFY_HDR, + [DR_ACTION_TYP_VPORT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_INSERT_HDR] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_POP_VLAN] = DR_ACTION_STATE_POP_VLAN, + [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_PUSH_VLAN, + [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO, + }, + [DR_ACTION_STATE_ENCAP] = { + [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_QP] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_VPORT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO, + }, + [DR_ACTION_STATE_MODIFY_HDR] = { + [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_MODIFY_HDR, + [DR_ACTION_TYP_VPORT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_INSERT_HDR] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_PUSH_VLAN, + [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO, + }, + [DR_ACTION_STATE_POP_VLAN] = { + [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_POP_VLAN] = DR_ACTION_STATE_POP_VLAN, + [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_POP_VLAN, + [DR_ACTION_TYP_VPORT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_MODIFY_HDR] = DR_ACTION_STATE_MODIFY_HDR, + [DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_INSERT_HDR] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO, + }, + [DR_ACTION_STATE_PUSH_VLAN] = { + [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_PUSH_VLAN, + [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_PUSH_VLAN, + [DR_ACTION_TYP_VPORT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_MODIFY_HDR] = DR_ACTION_STATE_MODIFY_HDR, + [DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_INSERT_HDR] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO, + }, + [DR_ACTION_STATE_NON_TERM] = { + [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_NON_TERM, + [DR_ACTION_TYP_TNL_L2_TO_L2] = DR_ACTION_STATE_DECAP, + [DR_ACTION_TYP_TNL_L3_TO_L2] = DR_ACTION_STATE_DECAP, + [DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_INSERT_HDR] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_REMOVE_HDR] = DR_ACTION_STATE_DECAP, + [DR_ACTION_TYP_MODIFY_HDR] = DR_ACTION_STATE_MODIFY_HDR, + [DR_ACTION_TYP_POP_VLAN] = DR_ACTION_STATE_POP_VLAN, + [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_PUSH_VLAN, + [DR_ACTION_TYP_VPORT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO, + }, + [DR_ACTION_STATE_ASO] = { + [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_VPORT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_ASO, + }, + [DR_ACTION_STATE_TERM] = { + [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_TERM, + }, + }, + [DR_ACTION_DOMAIN_FDB_EGRESS] = { + [DR_ACTION_STATE_NO_ACTION] = { + [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_NON_TERM, + [DR_ACTION_TYP_MODIFY_HDR] = DR_ACTION_STATE_MODIFY_HDR, + [DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_INSERT_HDR] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_REMOVE_HDR] = DR_ACTION_STATE_DECAP, + [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_PUSH_VLAN, + [DR_ACTION_TYP_POP_VLAN] = DR_ACTION_STATE_POP_VLAN, + [DR_ACTION_TYP_VPORT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO, + }, + [DR_ACTION_STATE_DECAP] = { + [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_DECAP, + [DR_ACTION_TYP_VPORT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO, + }, + [DR_ACTION_STATE_ENCAP] = { + [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_VPORT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO, + }, + [DR_ACTION_STATE_MODIFY_HDR] = { + [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_MODIFY_HDR, + [DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_INSERT_HDR] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_PUSH_VLAN, + [DR_ACTION_TYP_VPORT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO, + }, + [DR_ACTION_STATE_POP_VLAN] = { + [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_POP_VLAN, + [DR_ACTION_TYP_POP_VLAN] = DR_ACTION_STATE_POP_VLAN, + [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_PUSH_VLAN, + [DR_ACTION_TYP_MODIFY_HDR] = DR_ACTION_STATE_MODIFY_HDR, + [DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_INSERT_HDR] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_VPORT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO, + }, + [DR_ACTION_STATE_PUSH_VLAN] = { + [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_PUSH_VLAN, + [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_PUSH_VLAN, + [DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_INSERT_HDR] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_VPORT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO, + }, + [DR_ACTION_STATE_NON_TERM] = { + [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_NON_TERM, + [DR_ACTION_TYP_MODIFY_HDR] = DR_ACTION_STATE_MODIFY_HDR, + [DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_INSERT_HDR] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_REMOVE_HDR] = DR_ACTION_STATE_DECAP, + [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_PUSH_VLAN, + [DR_ACTION_TYP_POP_VLAN] = DR_ACTION_STATE_POP_VLAN, + [DR_ACTION_TYP_VPORT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO, + }, + [DR_ACTION_STATE_ASO] = { + [DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_MODIFY_HDR] = DR_ACTION_STATE_MODIFY_HDR, + [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_PUSH_VLAN, + [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_VPORT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_ASO, + }, + [DR_ACTION_STATE_TERM] = { + [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_TERM, + }, + }, +}; + +static int +dr_action_reformat_to_action_type(enum mlx5dr_action_reformat_type reformat_type, + enum mlx5dr_action_type *action_type) +{ + switch (reformat_type) { + case DR_ACTION_REFORMAT_TYP_TNL_L2_TO_L2: + *action_type = DR_ACTION_TYP_TNL_L2_TO_L2; + break; + case DR_ACTION_REFORMAT_TYP_L2_TO_TNL_L2: + *action_type = DR_ACTION_TYP_L2_TO_TNL_L2; + break; + case DR_ACTION_REFORMAT_TYP_TNL_L3_TO_L2: + *action_type = DR_ACTION_TYP_TNL_L3_TO_L2; + break; + case DR_ACTION_REFORMAT_TYP_L2_TO_TNL_L3: + *action_type = DR_ACTION_TYP_L2_TO_TNL_L3; + break; + case DR_ACTION_REFORMAT_TYP_INSERT_HDR: + *action_type = DR_ACTION_TYP_INSERT_HDR; + break; + case DR_ACTION_REFORMAT_TYP_REMOVE_HDR: + *action_type = DR_ACTION_TYP_REMOVE_HDR; + break; + default: + return -EINVAL; + } + + return 0; +} + +/* Apply the actions on the rule STE array starting from the last_ste. + * Actions might require more than one STE, new_num_stes will return + * the new size of the STEs array, rule with actions. + */ +static void dr_actions_apply(struct mlx5dr_domain *dmn, + enum mlx5dr_domain_nic_type nic_type, + u8 *action_type_set, + u8 *last_ste, + struct mlx5dr_ste_actions_attr *attr, + u32 *new_num_stes) +{ + struct mlx5dr_ste_ctx *ste_ctx = dmn->ste_ctx; + u32 added_stes = 0; + + if (nic_type == DR_DOMAIN_NIC_TYPE_RX) + mlx5dr_ste_set_actions_rx(ste_ctx, dmn, action_type_set, + last_ste, attr, &added_stes); + else + mlx5dr_ste_set_actions_tx(ste_ctx, dmn, action_type_set, + last_ste, attr, &added_stes); + + *new_num_stes += added_stes; +} + +static enum dr_action_domain +dr_action_get_action_domain(enum mlx5dr_domain_type domain, + enum mlx5dr_domain_nic_type nic_type) +{ + switch (domain) { + case MLX5DR_DOMAIN_TYPE_NIC_RX: + return DR_ACTION_DOMAIN_NIC_INGRESS; + case MLX5DR_DOMAIN_TYPE_NIC_TX: + return DR_ACTION_DOMAIN_NIC_EGRESS; + case MLX5DR_DOMAIN_TYPE_FDB: + if (nic_type == DR_DOMAIN_NIC_TYPE_RX) + return DR_ACTION_DOMAIN_FDB_INGRESS; + return DR_ACTION_DOMAIN_FDB_EGRESS; + default: + WARN_ON(true); + return DR_ACTION_DOMAIN_MAX; + } +} + +static +int dr_action_validate_and_get_next_state(enum dr_action_domain action_domain, + u32 action_type, + u32 *state) +{ + u32 cur_state = *state; + + /* Check action state machine is valid */ + *state = next_action_state[action_domain][cur_state][action_type]; + + if (*state == DR_ACTION_STATE_ERR) + return -EOPNOTSUPP; + + return 0; +} + +static int dr_action_handle_cs_recalc(struct mlx5dr_domain *dmn, + struct mlx5dr_action *dest_action, + u64 *final_icm_addr) +{ + int ret; + + switch (dest_action->action_type) { + case DR_ACTION_TYP_FT: + /* Allow destination flow table only if table is a terminating + * table, since there is an *assumption* that in such case FW + * will recalculate the CS. + */ + if (dest_action->dest_tbl->is_fw_tbl) { + *final_icm_addr = dest_action->dest_tbl->fw_tbl.rx_icm_addr; + } else { + mlx5dr_dbg(dmn, + "Destination FT should be terminating when modify TTL is used\n"); + return -EINVAL; + } + break; + + case DR_ACTION_TYP_VPORT: + /* If destination is vport we will get the FW flow table + * that recalculates the CS and forwards to the vport. + */ + ret = mlx5dr_domain_get_recalc_cs_ft_addr(dest_action->vport->dmn, + dest_action->vport->caps->num, + final_icm_addr); + if (ret) { + mlx5dr_err(dmn, "Failed to get FW cs recalc flow table\n"); + return ret; + } + break; + + default: + break; + } + + return 0; +} + +static void dr_action_modify_ttl_adjust(struct mlx5dr_domain *dmn, + struct mlx5dr_ste_actions_attr *attr, + bool rx_rule, + bool *recalc_cs_required) +{ + *recalc_cs_required = false; + + /* if device supports csum recalculation - no adjustment needed */ + if (mlx5dr_ste_supp_ttl_cs_recalc(&dmn->info.caps)) + return; + + /* no adjustment needed on TX rules */ + if (!rx_rule) + return; + + if (!MLX5_CAP_ESW_FLOWTABLE(dmn->mdev, fdb_ipv4_ttl_modify)) { + /* Ignore the modify TTL action. + * It is always kept as last HW action. + */ + attr->modify_actions--; + return; + } + + if (dmn->type == MLX5DR_DOMAIN_TYPE_FDB) + /* Due to a HW bug on some devices, modifying TTL on RX flows + * will cause an incorrect checksum calculation. In such cases + * we will use a FW table to recalculate the checksum. + */ + *recalc_cs_required = true; +} + +static void dr_action_print_sequence(struct mlx5dr_domain *dmn, + struct mlx5dr_action *actions[], + int last_idx) +{ + int i; + + for (i = 0; i <= last_idx; i++) + mlx5dr_err(dmn, "< %s (%d) > ", + dr_action_id_to_str(actions[i]->action_type), + actions[i]->action_type); +} + +#define WITH_VLAN_NUM_HW_ACTIONS 6 + +int mlx5dr_actions_build_ste_arr(struct mlx5dr_matcher *matcher, + struct mlx5dr_matcher_rx_tx *nic_matcher, + struct mlx5dr_action *actions[], + u32 num_actions, + u8 *ste_arr, + u32 *new_hw_ste_arr_sz) +{ + struct mlx5dr_domain_rx_tx *nic_dmn = nic_matcher->nic_tbl->nic_dmn; + bool rx_rule = nic_dmn->type == DR_DOMAIN_NIC_TYPE_RX; + struct mlx5dr_domain *dmn = matcher->tbl->dmn; + u8 action_type_set[DR_ACTION_TYP_MAX] = {}; + struct mlx5dr_ste_actions_attr attr = {}; + struct mlx5dr_action *dest_action = NULL; + u32 state = DR_ACTION_STATE_NO_ACTION; + enum dr_action_domain action_domain; + bool recalc_cs_required = false; + u8 *last_ste; + int i, ret; + + attr.gvmi = dmn->info.caps.gvmi; + attr.hit_gvmi = dmn->info.caps.gvmi; + attr.final_icm_addr = nic_dmn->default_icm_addr; + action_domain = dr_action_get_action_domain(dmn->type, nic_dmn->type); + + for (i = 0; i < num_actions; i++) { + struct mlx5dr_action_dest_tbl *dest_tbl; + struct mlx5dr_icm_chunk *chunk; + struct mlx5dr_action *action; + int max_actions_type = 1; + u32 action_type; + + action = actions[i]; + action_type = action->action_type; + + switch (action_type) { + case DR_ACTION_TYP_DROP: + attr.final_icm_addr = nic_dmn->drop_icm_addr; + attr.hit_gvmi = nic_dmn->drop_icm_addr >> 48; + break; + case DR_ACTION_TYP_FT: + dest_action = action; + dest_tbl = action->dest_tbl; + if (!dest_tbl->is_fw_tbl) { + if (dest_tbl->tbl->dmn != dmn) { + mlx5dr_err(dmn, + "Destination table belongs to a different domain\n"); + return -EINVAL; + } + if (dest_tbl->tbl->level <= matcher->tbl->level) { + mlx5_core_dbg_once(dmn->mdev, + "Connecting table to a lower/same level destination table\n"); + mlx5dr_dbg(dmn, + "Connecting table at level %d to a destination table at level %d\n", + matcher->tbl->level, + dest_tbl->tbl->level); + } + chunk = rx_rule ? dest_tbl->tbl->rx.s_anchor->chunk : + dest_tbl->tbl->tx.s_anchor->chunk; + attr.final_icm_addr = mlx5dr_icm_pool_get_chunk_icm_addr(chunk); + } else { + struct mlx5dr_cmd_query_flow_table_details output; + int ret; + + /* get the relevant addresses */ + if (!action->dest_tbl->fw_tbl.rx_icm_addr) { + ret = mlx5dr_cmd_query_flow_table(dmn->mdev, + dest_tbl->fw_tbl.type, + dest_tbl->fw_tbl.id, + &output); + if (!ret) { + dest_tbl->fw_tbl.tx_icm_addr = + output.sw_owner_icm_root_1; + dest_tbl->fw_tbl.rx_icm_addr = + output.sw_owner_icm_root_0; + } else { + mlx5dr_err(dmn, + "Failed mlx5_cmd_query_flow_table ret: %d\n", + ret); + return ret; + } + } + attr.final_icm_addr = rx_rule ? + dest_tbl->fw_tbl.rx_icm_addr : + dest_tbl->fw_tbl.tx_icm_addr; + } + break; + case DR_ACTION_TYP_QP: + mlx5dr_info(dmn, "Domain doesn't support QP\n"); + return -EOPNOTSUPP; + case DR_ACTION_TYP_CTR: + attr.ctr_id = action->ctr->ctr_id + + action->ctr->offset; + break; + case DR_ACTION_TYP_TAG: + attr.flow_tag = action->flow_tag->flow_tag; + break; + case DR_ACTION_TYP_TNL_L2_TO_L2: + break; + case DR_ACTION_TYP_TNL_L3_TO_L2: + attr.decap_index = action->rewrite->index; + attr.decap_actions = action->rewrite->num_of_actions; + attr.decap_with_vlan = + attr.decap_actions == WITH_VLAN_NUM_HW_ACTIONS; + break; + case DR_ACTION_TYP_MODIFY_HDR: + attr.modify_index = action->rewrite->index; + attr.modify_actions = action->rewrite->num_of_actions; + if (action->rewrite->modify_ttl) + dr_action_modify_ttl_adjust(dmn, &attr, rx_rule, + &recalc_cs_required); + break; + case DR_ACTION_TYP_L2_TO_TNL_L2: + case DR_ACTION_TYP_L2_TO_TNL_L3: + if (rx_rule && + !(dmn->ste_ctx->actions_caps & DR_STE_CTX_ACTION_CAP_RX_ENCAP)) { + mlx5dr_info(dmn, "Device doesn't support Encap on RX\n"); + return -EOPNOTSUPP; + } + attr.reformat.size = action->reformat->size; + attr.reformat.id = action->reformat->id; + break; + case DR_ACTION_TYP_SAMPLER: + attr.final_icm_addr = rx_rule ? action->sampler->rx_icm_addr : + action->sampler->tx_icm_addr; + break; + case DR_ACTION_TYP_VPORT: + if (unlikely(rx_rule && action->vport->caps->num == MLX5_VPORT_UPLINK)) { + /* can't go to uplink on RX rule - dropping instead */ + attr.final_icm_addr = nic_dmn->drop_icm_addr; + attr.hit_gvmi = nic_dmn->drop_icm_addr >> 48; + } else { + attr.hit_gvmi = action->vport->caps->vhca_gvmi; + dest_action = action; + attr.final_icm_addr = rx_rule ? + action->vport->caps->icm_address_rx : + action->vport->caps->icm_address_tx; + } + break; + case DR_ACTION_TYP_POP_VLAN: + if (!rx_rule && !(dmn->ste_ctx->actions_caps & + DR_STE_CTX_ACTION_CAP_TX_POP)) { + mlx5dr_dbg(dmn, "Device doesn't support POP VLAN action on TX\n"); + return -EOPNOTSUPP; + } + + max_actions_type = MLX5DR_MAX_VLANS; + attr.vlans.count++; + break; + case DR_ACTION_TYP_PUSH_VLAN: + if (rx_rule && !(dmn->ste_ctx->actions_caps & + DR_STE_CTX_ACTION_CAP_RX_PUSH)) { + mlx5dr_dbg(dmn, "Device doesn't support PUSH VLAN action on RX\n"); + return -EOPNOTSUPP; + } + + max_actions_type = MLX5DR_MAX_VLANS; + if (attr.vlans.count == MLX5DR_MAX_VLANS) { + mlx5dr_dbg(dmn, "Max VLAN push/pop count exceeded\n"); + return -EINVAL; + } + + attr.vlans.headers[attr.vlans.count++] = action->push_vlan->vlan_hdr; + break; + case DR_ACTION_TYP_INSERT_HDR: + case DR_ACTION_TYP_REMOVE_HDR: + attr.reformat.size = action->reformat->size; + attr.reformat.id = action->reformat->id; + attr.reformat.param_0 = action->reformat->param_0; + attr.reformat.param_1 = action->reformat->param_1; + break; + case DR_ACTION_TYP_ASO_FLOW_METER: + attr.aso_flow_meter.obj_id = action->aso->obj_id; + attr.aso_flow_meter.offset = action->aso->offset; + attr.aso_flow_meter.dest_reg_id = action->aso->dest_reg_id; + attr.aso_flow_meter.init_color = action->aso->init_color; + break; + default: + mlx5dr_err(dmn, "Unsupported action type %d\n", action_type); + return -EINVAL; + } + + /* Check action duplication */ + if (++action_type_set[action_type] > max_actions_type) { + mlx5dr_err(dmn, "Action type %d supports only max %d time(s)\n", + action_type, max_actions_type); + return -EINVAL; + } + + /* Check action state machine is valid */ + if (dr_action_validate_and_get_next_state(action_domain, + action_type, + &state)) { + mlx5dr_err(dmn, "Invalid action (gvmi: %d, is_rx: %d) sequence provided:", + attr.gvmi, rx_rule); + dr_action_print_sequence(dmn, actions, i); + return -EOPNOTSUPP; + } + } + + *new_hw_ste_arr_sz = nic_matcher->num_of_builders; + last_ste = ste_arr + DR_STE_SIZE * (nic_matcher->num_of_builders - 1); + + if (recalc_cs_required && dest_action) { + ret = dr_action_handle_cs_recalc(dmn, dest_action, &attr.final_icm_addr); + if (ret) { + mlx5dr_err(dmn, + "Failed to handle checksum recalculation err %d\n", + ret); + return ret; + } + } + + dr_actions_apply(dmn, + nic_dmn->type, + action_type_set, + last_ste, + &attr, + new_hw_ste_arr_sz); + + return 0; +} + +static unsigned int action_size[DR_ACTION_TYP_MAX] = { + [DR_ACTION_TYP_TNL_L2_TO_L2] = sizeof(struct mlx5dr_action_reformat), + [DR_ACTION_TYP_L2_TO_TNL_L2] = sizeof(struct mlx5dr_action_reformat), + [DR_ACTION_TYP_TNL_L3_TO_L2] = sizeof(struct mlx5dr_action_rewrite), + [DR_ACTION_TYP_L2_TO_TNL_L3] = sizeof(struct mlx5dr_action_reformat), + [DR_ACTION_TYP_FT] = sizeof(struct mlx5dr_action_dest_tbl), + [DR_ACTION_TYP_CTR] = sizeof(struct mlx5dr_action_ctr), + [DR_ACTION_TYP_TAG] = sizeof(struct mlx5dr_action_flow_tag), + [DR_ACTION_TYP_MODIFY_HDR] = sizeof(struct mlx5dr_action_rewrite), + [DR_ACTION_TYP_VPORT] = sizeof(struct mlx5dr_action_vport), + [DR_ACTION_TYP_PUSH_VLAN] = sizeof(struct mlx5dr_action_push_vlan), + [DR_ACTION_TYP_INSERT_HDR] = sizeof(struct mlx5dr_action_reformat), + [DR_ACTION_TYP_REMOVE_HDR] = sizeof(struct mlx5dr_action_reformat), + [DR_ACTION_TYP_SAMPLER] = sizeof(struct mlx5dr_action_sampler), + [DR_ACTION_TYP_ASO_FLOW_METER] = sizeof(struct mlx5dr_action_aso_flow_meter), +}; + +static struct mlx5dr_action * +dr_action_create_generic(enum mlx5dr_action_type action_type) +{ + struct mlx5dr_action *action; + int extra_size; + + if (action_type < DR_ACTION_TYP_MAX) + extra_size = action_size[action_type]; + else + return NULL; + + action = kzalloc(sizeof(*action) + extra_size, GFP_KERNEL); + if (!action) + return NULL; + + action->action_type = action_type; + refcount_set(&action->refcount, 1); + action->data = action + 1; + + return action; +} + +struct mlx5dr_action *mlx5dr_action_create_drop(void) +{ + return dr_action_create_generic(DR_ACTION_TYP_DROP); +} + +struct mlx5dr_action * +mlx5dr_action_create_dest_table_num(struct mlx5dr_domain *dmn, u32 table_num) +{ + struct mlx5dr_action *action; + + action = dr_action_create_generic(DR_ACTION_TYP_FT); + if (!action) + return NULL; + + action->dest_tbl->is_fw_tbl = true; + action->dest_tbl->fw_tbl.dmn = dmn; + action->dest_tbl->fw_tbl.id = table_num; + action->dest_tbl->fw_tbl.type = FS_FT_FDB; + refcount_inc(&dmn->refcount); + + return action; +} + +struct mlx5dr_action * +mlx5dr_action_create_dest_table(struct mlx5dr_table *tbl) +{ + struct mlx5dr_action *action; + + refcount_inc(&tbl->refcount); + + action = dr_action_create_generic(DR_ACTION_TYP_FT); + if (!action) + goto dec_ref; + + action->dest_tbl->tbl = tbl; + + return action; + +dec_ref: + refcount_dec(&tbl->refcount); + return NULL; +} + +struct mlx5dr_action * +mlx5dr_action_create_mult_dest_tbl(struct mlx5dr_domain *dmn, + struct mlx5dr_action_dest *dests, + u32 num_of_dests, + bool ignore_flow_level, + u32 flow_source) +{ + struct mlx5dr_cmd_flow_destination_hw_info *hw_dests; + struct mlx5dr_action **ref_actions; + struct mlx5dr_action *action; + bool reformat_req = false; + u32 num_of_ref = 0; + u32 ref_act_cnt; + int ret; + int i; + + if (dmn->type != MLX5DR_DOMAIN_TYPE_FDB) { + mlx5dr_err(dmn, "Multiple destination support is for FDB only\n"); + return NULL; + } + + hw_dests = kcalloc(num_of_dests, sizeof(*hw_dests), GFP_KERNEL); + if (!hw_dests) + return NULL; + + if (unlikely(check_mul_overflow(num_of_dests, 2u, &ref_act_cnt))) + goto free_hw_dests; + + ref_actions = kcalloc(ref_act_cnt, sizeof(*ref_actions), GFP_KERNEL); + if (!ref_actions) + goto free_hw_dests; + + for (i = 0; i < num_of_dests; i++) { + struct mlx5dr_action *reformat_action = dests[i].reformat; + struct mlx5dr_action *dest_action = dests[i].dest; + + ref_actions[num_of_ref++] = dest_action; + + switch (dest_action->action_type) { + case DR_ACTION_TYP_VPORT: + hw_dests[i].vport.flags = MLX5_FLOW_DEST_VPORT_VHCA_ID; + hw_dests[i].type = MLX5_FLOW_DESTINATION_TYPE_VPORT; + hw_dests[i].vport.num = dest_action->vport->caps->num; + hw_dests[i].vport.vhca_id = dest_action->vport->caps->vhca_gvmi; + if (reformat_action) { + reformat_req = true; + hw_dests[i].vport.reformat_id = + reformat_action->reformat->id; + ref_actions[num_of_ref++] = reformat_action; + hw_dests[i].vport.flags |= MLX5_FLOW_DEST_VPORT_REFORMAT_ID; + } + break; + + case DR_ACTION_TYP_FT: + hw_dests[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + if (dest_action->dest_tbl->is_fw_tbl) + hw_dests[i].ft_id = dest_action->dest_tbl->fw_tbl.id; + else + hw_dests[i].ft_id = dest_action->dest_tbl->tbl->table_id; + break; + + default: + mlx5dr_dbg(dmn, "Invalid multiple destinations action\n"); + goto free_ref_actions; + } + } + + action = dr_action_create_generic(DR_ACTION_TYP_FT); + if (!action) + goto free_ref_actions; + + ret = mlx5dr_fw_create_md_tbl(dmn, + hw_dests, + num_of_dests, + reformat_req, + &action->dest_tbl->fw_tbl.id, + &action->dest_tbl->fw_tbl.group_id, + ignore_flow_level, + flow_source); + if (ret) + goto free_action; + + refcount_inc(&dmn->refcount); + + for (i = 0; i < num_of_ref; i++) + refcount_inc(&ref_actions[i]->refcount); + + action->dest_tbl->is_fw_tbl = true; + action->dest_tbl->fw_tbl.dmn = dmn; + action->dest_tbl->fw_tbl.type = FS_FT_FDB; + action->dest_tbl->fw_tbl.ref_actions = ref_actions; + action->dest_tbl->fw_tbl.num_of_ref_actions = num_of_ref; + + kfree(hw_dests); + + return action; + +free_action: + kfree(action); +free_ref_actions: + kfree(ref_actions); +free_hw_dests: + kfree(hw_dests); + return NULL; +} + +struct mlx5dr_action * +mlx5dr_action_create_dest_flow_fw_table(struct mlx5dr_domain *dmn, + struct mlx5_flow_table *ft) +{ + struct mlx5dr_action *action; + + action = dr_action_create_generic(DR_ACTION_TYP_FT); + if (!action) + return NULL; + + action->dest_tbl->is_fw_tbl = 1; + action->dest_tbl->fw_tbl.type = ft->type; + action->dest_tbl->fw_tbl.id = ft->id; + action->dest_tbl->fw_tbl.dmn = dmn; + + refcount_inc(&dmn->refcount); + + return action; +} + +struct mlx5dr_action * +mlx5dr_action_create_flow_counter(u32 counter_id) +{ + struct mlx5dr_action *action; + + action = dr_action_create_generic(DR_ACTION_TYP_CTR); + if (!action) + return NULL; + + action->ctr->ctr_id = counter_id; + + return action; +} + +struct mlx5dr_action *mlx5dr_action_create_tag(u32 tag_value) +{ + struct mlx5dr_action *action; + + action = dr_action_create_generic(DR_ACTION_TYP_TAG); + if (!action) + return NULL; + + action->flow_tag->flow_tag = tag_value & 0xffffff; + + return action; +} + +struct mlx5dr_action * +mlx5dr_action_create_flow_sampler(struct mlx5dr_domain *dmn, u32 sampler_id) +{ + struct mlx5dr_action *action; + u64 icm_rx, icm_tx; + int ret; + + ret = mlx5dr_cmd_query_flow_sampler(dmn->mdev, sampler_id, + &icm_rx, &icm_tx); + if (ret) + return NULL; + + action = dr_action_create_generic(DR_ACTION_TYP_SAMPLER); + if (!action) + return NULL; + + action->sampler->dmn = dmn; + action->sampler->sampler_id = sampler_id; + action->sampler->rx_icm_addr = icm_rx; + action->sampler->tx_icm_addr = icm_tx; + + refcount_inc(&dmn->refcount); + return action; +} + +static int +dr_action_verify_reformat_params(enum mlx5dr_action_type reformat_type, + struct mlx5dr_domain *dmn, + u8 reformat_param_0, + u8 reformat_param_1, + size_t data_sz, + void *data) +{ + if (reformat_type == DR_ACTION_TYP_INSERT_HDR) { + if ((!data && data_sz) || (data && !data_sz) || + MLX5_CAP_GEN_2(dmn->mdev, max_reformat_insert_size) < data_sz || + MLX5_CAP_GEN_2(dmn->mdev, max_reformat_insert_offset) < reformat_param_1) { + mlx5dr_dbg(dmn, "Invalid reformat parameters for INSERT_HDR\n"); + goto out_err; + } + } else if (reformat_type == DR_ACTION_TYP_REMOVE_HDR) { + if (data || + MLX5_CAP_GEN_2(dmn->mdev, max_reformat_remove_size) < data_sz || + MLX5_CAP_GEN_2(dmn->mdev, max_reformat_remove_offset) < reformat_param_1) { + mlx5dr_dbg(dmn, "Invalid reformat parameters for REMOVE_HDR\n"); + goto out_err; + } + } else if (reformat_param_0 || reformat_param_1 || + reformat_type > DR_ACTION_TYP_REMOVE_HDR) { + mlx5dr_dbg(dmn, "Invalid reformat parameters\n"); + goto out_err; + } + + if (dmn->type == MLX5DR_DOMAIN_TYPE_FDB) + return 0; + + if (dmn->type == MLX5DR_DOMAIN_TYPE_NIC_RX) { + if (reformat_type != DR_ACTION_TYP_TNL_L2_TO_L2 && + reformat_type != DR_ACTION_TYP_TNL_L3_TO_L2) { + mlx5dr_dbg(dmn, "Action reformat type not support on RX domain\n"); + goto out_err; + } + } else if (dmn->type == MLX5DR_DOMAIN_TYPE_NIC_TX) { + if (reformat_type != DR_ACTION_TYP_L2_TO_TNL_L2 && + reformat_type != DR_ACTION_TYP_L2_TO_TNL_L3) { + mlx5dr_dbg(dmn, "Action reformat type not support on TX domain\n"); + goto out_err; + } + } + + return 0; + +out_err: + return -EINVAL; +} + +#define ACTION_CACHE_LINE_SIZE 64 + +static int +dr_action_create_reformat_action(struct mlx5dr_domain *dmn, + u8 reformat_param_0, u8 reformat_param_1, + size_t data_sz, void *data, + struct mlx5dr_action *action) +{ + u32 reformat_id; + int ret; + + switch (action->action_type) { + case DR_ACTION_TYP_L2_TO_TNL_L2: + case DR_ACTION_TYP_L2_TO_TNL_L3: + { + enum mlx5_reformat_ctx_type rt; + + if (action->action_type == DR_ACTION_TYP_L2_TO_TNL_L2) + rt = MLX5_REFORMAT_TYPE_L2_TO_L2_TUNNEL; + else + rt = MLX5_REFORMAT_TYPE_L2_TO_L3_TUNNEL; + + ret = mlx5dr_cmd_create_reformat_ctx(dmn->mdev, rt, 0, 0, + data_sz, data, + &reformat_id); + if (ret) + return ret; + + action->reformat->id = reformat_id; + action->reformat->size = data_sz; + return 0; + } + case DR_ACTION_TYP_TNL_L2_TO_L2: + { + return 0; + } + case DR_ACTION_TYP_TNL_L3_TO_L2: + { + u8 *hw_actions; + int ret; + + hw_actions = kzalloc(ACTION_CACHE_LINE_SIZE, GFP_KERNEL); + if (!hw_actions) + return -ENOMEM; + + ret = mlx5dr_ste_set_action_decap_l3_list(dmn->ste_ctx, + data, data_sz, + hw_actions, + ACTION_CACHE_LINE_SIZE, + &action->rewrite->num_of_actions); + if (ret) { + mlx5dr_dbg(dmn, "Failed creating decap l3 action list\n"); + kfree(hw_actions); + return ret; + } + + action->rewrite->chunk = mlx5dr_icm_alloc_chunk(dmn->action_icm_pool, + DR_CHUNK_SIZE_8); + if (!action->rewrite->chunk) { + mlx5dr_dbg(dmn, "Failed allocating modify header chunk\n"); + kfree(hw_actions); + return -ENOMEM; + } + + action->rewrite->data = (void *)hw_actions; + action->rewrite->index = (mlx5dr_icm_pool_get_chunk_icm_addr + (action->rewrite->chunk) - + dmn->info.caps.hdr_modify_icm_addr) / + ACTION_CACHE_LINE_SIZE; + + ret = mlx5dr_send_postsend_action(dmn, action); + if (ret) { + mlx5dr_dbg(dmn, "Writing decap l3 actions to ICM failed\n"); + mlx5dr_icm_free_chunk(action->rewrite->chunk); + kfree(hw_actions); + return ret; + } + return 0; + } + case DR_ACTION_TYP_INSERT_HDR: + ret = mlx5dr_cmd_create_reformat_ctx(dmn->mdev, + MLX5_REFORMAT_TYPE_INSERT_HDR, + reformat_param_0, + reformat_param_1, + data_sz, data, + &reformat_id); + if (ret) + return ret; + + action->reformat->id = reformat_id; + action->reformat->size = data_sz; + action->reformat->param_0 = reformat_param_0; + action->reformat->param_1 = reformat_param_1; + return 0; + case DR_ACTION_TYP_REMOVE_HDR: + action->reformat->id = 0; + action->reformat->size = data_sz; + action->reformat->param_0 = reformat_param_0; + action->reformat->param_1 = reformat_param_1; + return 0; + default: + mlx5dr_info(dmn, "Reformat type is not supported %d\n", action->action_type); + return -EINVAL; + } +} + +#define CVLAN_ETHERTYPE 0x8100 +#define SVLAN_ETHERTYPE 0x88a8 + +struct mlx5dr_action *mlx5dr_action_create_pop_vlan(void) +{ + return dr_action_create_generic(DR_ACTION_TYP_POP_VLAN); +} + +struct mlx5dr_action *mlx5dr_action_create_push_vlan(struct mlx5dr_domain *dmn, + __be32 vlan_hdr) +{ + u32 vlan_hdr_h = ntohl(vlan_hdr); + u16 ethertype = vlan_hdr_h >> 16; + struct mlx5dr_action *action; + + if (ethertype != SVLAN_ETHERTYPE && ethertype != CVLAN_ETHERTYPE) { + mlx5dr_dbg(dmn, "Invalid vlan ethertype\n"); + return NULL; + } + + action = dr_action_create_generic(DR_ACTION_TYP_PUSH_VLAN); + if (!action) + return NULL; + + action->push_vlan->vlan_hdr = vlan_hdr_h; + return action; +} + +struct mlx5dr_action * +mlx5dr_action_create_packet_reformat(struct mlx5dr_domain *dmn, + enum mlx5dr_action_reformat_type reformat_type, + u8 reformat_param_0, + u8 reformat_param_1, + size_t data_sz, + void *data) +{ + enum mlx5dr_action_type action_type; + struct mlx5dr_action *action; + int ret; + + refcount_inc(&dmn->refcount); + + /* General checks */ + ret = dr_action_reformat_to_action_type(reformat_type, &action_type); + if (ret) { + mlx5dr_dbg(dmn, "Invalid reformat_type provided\n"); + goto dec_ref; + } + + ret = dr_action_verify_reformat_params(action_type, dmn, + reformat_param_0, reformat_param_1, + data_sz, data); + if (ret) + goto dec_ref; + + action = dr_action_create_generic(action_type); + if (!action) + goto dec_ref; + + action->reformat->dmn = dmn; + + ret = dr_action_create_reformat_action(dmn, + reformat_param_0, + reformat_param_1, + data_sz, + data, + action); + if (ret) { + mlx5dr_dbg(dmn, "Failed creating reformat action %d\n", ret); + goto free_action; + } + + return action; + +free_action: + kfree(action); +dec_ref: + refcount_dec(&dmn->refcount); + return NULL; +} + +static int +dr_action_modify_sw_to_hw_add(struct mlx5dr_domain *dmn, + __be64 *sw_action, + __be64 *hw_action, + const struct mlx5dr_ste_action_modify_field **ret_hw_info) +{ + const struct mlx5dr_ste_action_modify_field *hw_action_info; + u8 max_length; + u16 sw_field; + u32 data; + + /* Get SW modify action data */ + sw_field = MLX5_GET(set_action_in, sw_action, field); + data = MLX5_GET(set_action_in, sw_action, data); + + /* Convert SW data to HW modify action format */ + hw_action_info = mlx5dr_ste_conv_modify_hdr_sw_field(dmn->ste_ctx, sw_field); + if (!hw_action_info) { + mlx5dr_dbg(dmn, "Modify add action invalid field given\n"); + return -EINVAL; + } + + max_length = hw_action_info->end - hw_action_info->start + 1; + + mlx5dr_ste_set_action_add(dmn->ste_ctx, + hw_action, + hw_action_info->hw_field, + hw_action_info->start, + max_length, + data); + + *ret_hw_info = hw_action_info; + + return 0; +} + +static int +dr_action_modify_sw_to_hw_set(struct mlx5dr_domain *dmn, + __be64 *sw_action, + __be64 *hw_action, + const struct mlx5dr_ste_action_modify_field **ret_hw_info) +{ + const struct mlx5dr_ste_action_modify_field *hw_action_info; + u8 offset, length, max_length; + u16 sw_field; + u32 data; + + /* Get SW modify action data */ + length = MLX5_GET(set_action_in, sw_action, length); + offset = MLX5_GET(set_action_in, sw_action, offset); + sw_field = MLX5_GET(set_action_in, sw_action, field); + data = MLX5_GET(set_action_in, sw_action, data); + + /* Convert SW data to HW modify action format */ + hw_action_info = mlx5dr_ste_conv_modify_hdr_sw_field(dmn->ste_ctx, sw_field); + if (!hw_action_info) { + mlx5dr_dbg(dmn, "Modify set action invalid field given\n"); + return -EINVAL; + } + + /* PRM defines that length zero specific length of 32bits */ + length = length ? length : 32; + + max_length = hw_action_info->end - hw_action_info->start + 1; + + if (length + offset > max_length) { + mlx5dr_dbg(dmn, "Modify action length + offset exceeds limit\n"); + return -EINVAL; + } + + mlx5dr_ste_set_action_set(dmn->ste_ctx, + hw_action, + hw_action_info->hw_field, + hw_action_info->start + offset, + length, + data); + + *ret_hw_info = hw_action_info; + + return 0; +} + +static int +dr_action_modify_sw_to_hw_copy(struct mlx5dr_domain *dmn, + __be64 *sw_action, + __be64 *hw_action, + const struct mlx5dr_ste_action_modify_field **ret_dst_hw_info, + const struct mlx5dr_ste_action_modify_field **ret_src_hw_info) +{ + u8 src_offset, dst_offset, src_max_length, dst_max_length, length; + const struct mlx5dr_ste_action_modify_field *hw_dst_action_info; + const struct mlx5dr_ste_action_modify_field *hw_src_action_info; + u16 src_field, dst_field; + + /* Get SW modify action data */ + src_field = MLX5_GET(copy_action_in, sw_action, src_field); + dst_field = MLX5_GET(copy_action_in, sw_action, dst_field); + src_offset = MLX5_GET(copy_action_in, sw_action, src_offset); + dst_offset = MLX5_GET(copy_action_in, sw_action, dst_offset); + length = MLX5_GET(copy_action_in, sw_action, length); + + /* Convert SW data to HW modify action format */ + hw_src_action_info = mlx5dr_ste_conv_modify_hdr_sw_field(dmn->ste_ctx, src_field); + hw_dst_action_info = mlx5dr_ste_conv_modify_hdr_sw_field(dmn->ste_ctx, dst_field); + if (!hw_src_action_info || !hw_dst_action_info) { + mlx5dr_dbg(dmn, "Modify copy action invalid field given\n"); + return -EINVAL; + } + + /* PRM defines that length zero specific length of 32bits */ + length = length ? length : 32; + + src_max_length = hw_src_action_info->end - + hw_src_action_info->start + 1; + dst_max_length = hw_dst_action_info->end - + hw_dst_action_info->start + 1; + + if (length + src_offset > src_max_length || + length + dst_offset > dst_max_length) { + mlx5dr_dbg(dmn, "Modify action length + offset exceeds limit\n"); + return -EINVAL; + } + + mlx5dr_ste_set_action_copy(dmn->ste_ctx, + hw_action, + hw_dst_action_info->hw_field, + hw_dst_action_info->start + dst_offset, + length, + hw_src_action_info->hw_field, + hw_src_action_info->start + src_offset); + + *ret_dst_hw_info = hw_dst_action_info; + *ret_src_hw_info = hw_src_action_info; + + return 0; +} + +static int +dr_action_modify_sw_to_hw(struct mlx5dr_domain *dmn, + __be64 *sw_action, + __be64 *hw_action, + const struct mlx5dr_ste_action_modify_field **ret_dst_hw_info, + const struct mlx5dr_ste_action_modify_field **ret_src_hw_info) +{ + u8 action; + int ret; + + *hw_action = 0; + *ret_src_hw_info = NULL; + + /* Get SW modify action type */ + action = MLX5_GET(set_action_in, sw_action, action_type); + + switch (action) { + case MLX5_ACTION_TYPE_SET: + ret = dr_action_modify_sw_to_hw_set(dmn, sw_action, + hw_action, + ret_dst_hw_info); + break; + + case MLX5_ACTION_TYPE_ADD: + ret = dr_action_modify_sw_to_hw_add(dmn, sw_action, + hw_action, + ret_dst_hw_info); + break; + + case MLX5_ACTION_TYPE_COPY: + ret = dr_action_modify_sw_to_hw_copy(dmn, sw_action, + hw_action, + ret_dst_hw_info, + ret_src_hw_info); + break; + + default: + mlx5dr_info(dmn, "Unsupported action_type for modify action\n"); + ret = -EOPNOTSUPP; + } + + return ret; +} + +static int +dr_action_modify_check_set_field_limitation(struct mlx5dr_action *action, + const __be64 *sw_action) +{ + u16 sw_field = MLX5_GET(set_action_in, sw_action, field); + struct mlx5dr_domain *dmn = action->rewrite->dmn; + + if (sw_field == MLX5_ACTION_IN_FIELD_METADATA_REG_A) { + action->rewrite->allow_rx = 0; + if (dmn->type != MLX5DR_DOMAIN_TYPE_NIC_TX) { + mlx5dr_dbg(dmn, "Unsupported field %d for RX/FDB set action\n", + sw_field); + return -EINVAL; + } + } else if (sw_field == MLX5_ACTION_IN_FIELD_METADATA_REG_B) { + action->rewrite->allow_tx = 0; + if (dmn->type != MLX5DR_DOMAIN_TYPE_NIC_RX) { + mlx5dr_dbg(dmn, "Unsupported field %d for TX/FDB set action\n", + sw_field); + return -EINVAL; + } + } + + if (!action->rewrite->allow_rx && !action->rewrite->allow_tx) { + mlx5dr_dbg(dmn, "Modify SET actions not supported on both RX and TX\n"); + return -EINVAL; + } + + return 0; +} + +static int +dr_action_modify_check_add_field_limitation(struct mlx5dr_action *action, + const __be64 *sw_action) +{ + u16 sw_field = MLX5_GET(set_action_in, sw_action, field); + struct mlx5dr_domain *dmn = action->rewrite->dmn; + + if (sw_field != MLX5_ACTION_IN_FIELD_OUT_IP_TTL && + sw_field != MLX5_ACTION_IN_FIELD_OUT_IPV6_HOPLIMIT && + sw_field != MLX5_ACTION_IN_FIELD_OUT_TCP_SEQ_NUM && + sw_field != MLX5_ACTION_IN_FIELD_OUT_TCP_ACK_NUM) { + mlx5dr_dbg(dmn, "Unsupported field %d for add action\n", + sw_field); + return -EINVAL; + } + + return 0; +} + +static int +dr_action_modify_check_copy_field_limitation(struct mlx5dr_action *action, + const __be64 *sw_action) +{ + struct mlx5dr_domain *dmn = action->rewrite->dmn; + u16 sw_fields[2]; + int i; + + sw_fields[0] = MLX5_GET(copy_action_in, sw_action, src_field); + sw_fields[1] = MLX5_GET(copy_action_in, sw_action, dst_field); + + for (i = 0; i < 2; i++) { + if (sw_fields[i] == MLX5_ACTION_IN_FIELD_METADATA_REG_A) { + action->rewrite->allow_rx = 0; + if (dmn->type != MLX5DR_DOMAIN_TYPE_NIC_TX) { + mlx5dr_dbg(dmn, "Unsupported field %d for RX/FDB set action\n", + sw_fields[i]); + return -EINVAL; + } + } else if (sw_fields[i] == MLX5_ACTION_IN_FIELD_METADATA_REG_B) { + action->rewrite->allow_tx = 0; + if (dmn->type != MLX5DR_DOMAIN_TYPE_NIC_RX) { + mlx5dr_dbg(dmn, "Unsupported field %d for TX/FDB set action\n", + sw_fields[i]); + return -EINVAL; + } + } + } + + if (!action->rewrite->allow_rx && !action->rewrite->allow_tx) { + mlx5dr_dbg(dmn, "Modify copy actions not supported on both RX and TX\n"); + return -EINVAL; + } + + return 0; +} + +static int +dr_action_modify_check_field_limitation(struct mlx5dr_action *action, + const __be64 *sw_action) +{ + struct mlx5dr_domain *dmn = action->rewrite->dmn; + u8 action_type; + int ret; + + action_type = MLX5_GET(set_action_in, sw_action, action_type); + + switch (action_type) { + case MLX5_ACTION_TYPE_SET: + ret = dr_action_modify_check_set_field_limitation(action, + sw_action); + break; + + case MLX5_ACTION_TYPE_ADD: + ret = dr_action_modify_check_add_field_limitation(action, + sw_action); + break; + + case MLX5_ACTION_TYPE_COPY: + ret = dr_action_modify_check_copy_field_limitation(action, + sw_action); + break; + + default: + mlx5dr_info(dmn, "Unsupported action %d modify action\n", + action_type); + ret = -EOPNOTSUPP; + } + + return ret; +} + +static bool +dr_action_modify_check_is_ttl_modify(const void *sw_action) +{ + u16 sw_field = MLX5_GET(set_action_in, sw_action, field); + + return sw_field == MLX5_ACTION_IN_FIELD_OUT_IP_TTL; +} + +static int dr_actions_convert_modify_header(struct mlx5dr_action *action, + u32 max_hw_actions, + u32 num_sw_actions, + __be64 sw_actions[], + __be64 hw_actions[], + u32 *num_hw_actions, + bool *modify_ttl) +{ + const struct mlx5dr_ste_action_modify_field *hw_dst_action_info; + const struct mlx5dr_ste_action_modify_field *hw_src_action_info; + struct mlx5dr_domain *dmn = action->rewrite->dmn; + __be64 *modify_ttl_sw_action = NULL; + int ret, i, hw_idx = 0; + __be64 *sw_action; + __be64 hw_action; + u16 hw_field = 0; + u32 l3_type = 0; + u32 l4_type = 0; + + *modify_ttl = false; + + action->rewrite->allow_rx = 1; + action->rewrite->allow_tx = 1; + + for (i = 0; i < num_sw_actions || modify_ttl_sw_action; i++) { + /* modify TTL is handled separately, as a last action */ + if (i == num_sw_actions) { + sw_action = modify_ttl_sw_action; + modify_ttl_sw_action = NULL; + } else { + sw_action = &sw_actions[i]; + } + + ret = dr_action_modify_check_field_limitation(action, + sw_action); + if (ret) + return ret; + + if (!(*modify_ttl) && + dr_action_modify_check_is_ttl_modify(sw_action)) { + modify_ttl_sw_action = sw_action; + *modify_ttl = true; + continue; + } + + /* Convert SW action to HW action */ + ret = dr_action_modify_sw_to_hw(dmn, + sw_action, + &hw_action, + &hw_dst_action_info, + &hw_src_action_info); + if (ret) + return ret; + + /* Due to a HW limitation we cannot modify 2 different L3 types */ + if (l3_type && hw_dst_action_info->l3_type && + hw_dst_action_info->l3_type != l3_type) { + mlx5dr_dbg(dmn, "Action list can't support two different L3 types\n"); + return -EINVAL; + } + if (hw_dst_action_info->l3_type) + l3_type = hw_dst_action_info->l3_type; + + /* Due to a HW limitation we cannot modify two different L4 types */ + if (l4_type && hw_dst_action_info->l4_type && + hw_dst_action_info->l4_type != l4_type) { + mlx5dr_dbg(dmn, "Action list can't support two different L4 types\n"); + return -EINVAL; + } + if (hw_dst_action_info->l4_type) + l4_type = hw_dst_action_info->l4_type; + + /* HW reads and executes two actions at once this means we + * need to create a gap if two actions access the same field + */ + if ((hw_idx % 2) && (hw_field == hw_dst_action_info->hw_field || + (hw_src_action_info && + hw_field == hw_src_action_info->hw_field))) { + /* Check if after gap insertion the total number of HW + * modify actions doesn't exceeds the limit + */ + hw_idx++; + if (hw_idx >= max_hw_actions) { + mlx5dr_dbg(dmn, "Modify header action number exceeds HW limit\n"); + return -EINVAL; + } + } + hw_field = hw_dst_action_info->hw_field; + + hw_actions[hw_idx] = hw_action; + hw_idx++; + } + + /* if the resulting HW actions list is empty, add NOP action */ + if (!hw_idx) + hw_idx++; + + *num_hw_actions = hw_idx; + + return 0; +} + +static int dr_action_create_modify_action(struct mlx5dr_domain *dmn, + size_t actions_sz, + __be64 actions[], + struct mlx5dr_action *action) +{ + struct mlx5dr_icm_chunk *chunk; + u32 max_hw_actions; + u32 num_hw_actions; + u32 num_sw_actions; + __be64 *hw_actions; + bool modify_ttl; + int ret; + + num_sw_actions = actions_sz / DR_MODIFY_ACTION_SIZE; + max_hw_actions = mlx5dr_icm_pool_chunk_size_to_entries(DR_CHUNK_SIZE_16); + + if (num_sw_actions > max_hw_actions) { + mlx5dr_dbg(dmn, "Max number of actions %d exceeds limit %d\n", + num_sw_actions, max_hw_actions); + return -EINVAL; + } + + chunk = mlx5dr_icm_alloc_chunk(dmn->action_icm_pool, DR_CHUNK_SIZE_16); + if (!chunk) + return -ENOMEM; + + hw_actions = kcalloc(1, max_hw_actions * DR_MODIFY_ACTION_SIZE, GFP_KERNEL); + if (!hw_actions) { + ret = -ENOMEM; + goto free_chunk; + } + + ret = dr_actions_convert_modify_header(action, + max_hw_actions, + num_sw_actions, + actions, + hw_actions, + &num_hw_actions, + &modify_ttl); + if (ret) + goto free_hw_actions; + + action->rewrite->chunk = chunk; + action->rewrite->modify_ttl = modify_ttl; + action->rewrite->data = (u8 *)hw_actions; + action->rewrite->num_of_actions = num_hw_actions; + action->rewrite->index = (mlx5dr_icm_pool_get_chunk_icm_addr(chunk) - + dmn->info.caps.hdr_modify_icm_addr) / + ACTION_CACHE_LINE_SIZE; + + ret = mlx5dr_send_postsend_action(dmn, action); + if (ret) + goto free_hw_actions; + + return 0; + +free_hw_actions: + kfree(hw_actions); +free_chunk: + mlx5dr_icm_free_chunk(chunk); + return ret; +} + +struct mlx5dr_action * +mlx5dr_action_create_modify_header(struct mlx5dr_domain *dmn, + u32 flags, + size_t actions_sz, + __be64 actions[]) +{ + struct mlx5dr_action *action; + int ret = 0; + + refcount_inc(&dmn->refcount); + + if (actions_sz % DR_MODIFY_ACTION_SIZE) { + mlx5dr_dbg(dmn, "Invalid modify actions size provided\n"); + goto dec_ref; + } + + action = dr_action_create_generic(DR_ACTION_TYP_MODIFY_HDR); + if (!action) + goto dec_ref; + + action->rewrite->dmn = dmn; + + ret = dr_action_create_modify_action(dmn, + actions_sz, + actions, + action); + if (ret) { + mlx5dr_dbg(dmn, "Failed creating modify header action %d\n", ret); + goto free_action; + } + + return action; + +free_action: + kfree(action); +dec_ref: + refcount_dec(&dmn->refcount); + return NULL; +} + +struct mlx5dr_action * +mlx5dr_action_create_dest_vport(struct mlx5dr_domain *dmn, + u16 vport, u8 vhca_id_valid, + u16 vhca_id) +{ + struct mlx5dr_cmd_vport_cap *vport_cap; + struct mlx5dr_domain *vport_dmn; + struct mlx5dr_action *action; + u8 peer_vport; + + peer_vport = vhca_id_valid && (vhca_id != dmn->info.caps.gvmi); + vport_dmn = peer_vport ? dmn->peer_dmn : dmn; + if (!vport_dmn) { + mlx5dr_dbg(dmn, "No peer vport domain for given vhca_id\n"); + return NULL; + } + + if (vport_dmn->type != MLX5DR_DOMAIN_TYPE_FDB) { + mlx5dr_dbg(dmn, "Domain doesn't support vport actions\n"); + return NULL; + } + + vport_cap = mlx5dr_domain_get_vport_cap(vport_dmn, vport); + if (!vport_cap) { + mlx5dr_err(dmn, + "Failed to get vport 0x%x caps - vport is disabled or invalid\n", + vport); + return NULL; + } + + action = dr_action_create_generic(DR_ACTION_TYP_VPORT); + if (!action) + return NULL; + + action->vport->dmn = vport_dmn; + action->vport->caps = vport_cap; + + return action; +} + +struct mlx5dr_action * +mlx5dr_action_create_aso(struct mlx5dr_domain *dmn, u32 obj_id, + u8 dest_reg_id, u8 aso_type, + u8 init_color, u8 meter_id) +{ + struct mlx5dr_action *action; + + if (aso_type != MLX5_EXE_ASO_FLOW_METER) + return NULL; + + if (init_color > MLX5_FLOW_METER_COLOR_UNDEFINED) + return NULL; + + action = dr_action_create_generic(DR_ACTION_TYP_ASO_FLOW_METER); + if (!action) + return NULL; + + action->aso->obj_id = obj_id; + action->aso->offset = meter_id; + action->aso->dest_reg_id = dest_reg_id; + action->aso->init_color = init_color; + action->aso->dmn = dmn; + + refcount_inc(&dmn->refcount); + + return action; +} + +int mlx5dr_action_destroy(struct mlx5dr_action *action) +{ + if (WARN_ON_ONCE(refcount_read(&action->refcount) > 1)) + return -EBUSY; + + switch (action->action_type) { + case DR_ACTION_TYP_FT: + if (action->dest_tbl->is_fw_tbl) + refcount_dec(&action->dest_tbl->fw_tbl.dmn->refcount); + else + refcount_dec(&action->dest_tbl->tbl->refcount); + + if (action->dest_tbl->is_fw_tbl && + action->dest_tbl->fw_tbl.num_of_ref_actions) { + struct mlx5dr_action **ref_actions; + int i; + + ref_actions = action->dest_tbl->fw_tbl.ref_actions; + for (i = 0; i < action->dest_tbl->fw_tbl.num_of_ref_actions; i++) + refcount_dec(&ref_actions[i]->refcount); + + kfree(ref_actions); + + mlx5dr_fw_destroy_md_tbl(action->dest_tbl->fw_tbl.dmn, + action->dest_tbl->fw_tbl.id, + action->dest_tbl->fw_tbl.group_id); + } + break; + case DR_ACTION_TYP_TNL_L2_TO_L2: + case DR_ACTION_TYP_REMOVE_HDR: + refcount_dec(&action->reformat->dmn->refcount); + break; + case DR_ACTION_TYP_TNL_L3_TO_L2: + mlx5dr_icm_free_chunk(action->rewrite->chunk); + refcount_dec(&action->rewrite->dmn->refcount); + break; + case DR_ACTION_TYP_L2_TO_TNL_L2: + case DR_ACTION_TYP_L2_TO_TNL_L3: + case DR_ACTION_TYP_INSERT_HDR: + mlx5dr_cmd_destroy_reformat_ctx((action->reformat->dmn)->mdev, + action->reformat->id); + refcount_dec(&action->reformat->dmn->refcount); + break; + case DR_ACTION_TYP_MODIFY_HDR: + mlx5dr_icm_free_chunk(action->rewrite->chunk); + kfree(action->rewrite->data); + refcount_dec(&action->rewrite->dmn->refcount); + break; + case DR_ACTION_TYP_SAMPLER: + refcount_dec(&action->sampler->dmn->refcount); + break; + case DR_ACTION_TYP_ASO_FLOW_METER: + refcount_dec(&action->aso->dmn->refcount); + break; + default: + break; + } + + kfree(action); + return 0; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_buddy.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_buddy.c new file mode 100644 index 000000000..7df11a019 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_buddy.c @@ -0,0 +1,170 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2004 Topspin Communications. All rights reserved. + * Copyright (c) 2005 - 2008 Mellanox Technologies. All rights reserved. + * Copyright (c) 2006 - 2007 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2020 NVIDIA CORPORATION. All rights reserved. + */ + +#include "dr_types.h" + +int mlx5dr_buddy_init(struct mlx5dr_icm_buddy_mem *buddy, + unsigned int max_order) +{ + int i; + + buddy->max_order = max_order; + + INIT_LIST_HEAD(&buddy->list_node); + INIT_LIST_HEAD(&buddy->used_list); + INIT_LIST_HEAD(&buddy->hot_list); + + buddy->bitmap = kcalloc(buddy->max_order + 1, + sizeof(*buddy->bitmap), + GFP_KERNEL); + buddy->num_free = kcalloc(buddy->max_order + 1, + sizeof(*buddy->num_free), + GFP_KERNEL); + + if (!buddy->bitmap || !buddy->num_free) + goto err_free_all; + + /* Allocating max_order bitmaps, one for each order */ + + for (i = 0; i <= buddy->max_order; ++i) { + unsigned int size = 1 << (buddy->max_order - i); + + buddy->bitmap[i] = bitmap_zalloc(size, GFP_KERNEL); + if (!buddy->bitmap[i]) + goto err_out_free_each_bit_per_order; + } + + /* In the beginning, we have only one order that is available for + * use (the biggest one), so mark the first bit in both bitmaps. + */ + + bitmap_set(buddy->bitmap[buddy->max_order], 0, 1); + + buddy->num_free[buddy->max_order] = 1; + + return 0; + +err_out_free_each_bit_per_order: + for (i = 0; i <= buddy->max_order; ++i) + bitmap_free(buddy->bitmap[i]); + +err_free_all: + kfree(buddy->num_free); + kfree(buddy->bitmap); + return -ENOMEM; +} + +void mlx5dr_buddy_cleanup(struct mlx5dr_icm_buddy_mem *buddy) +{ + int i; + + list_del(&buddy->list_node); + + for (i = 0; i <= buddy->max_order; ++i) + bitmap_free(buddy->bitmap[i]); + + kfree(buddy->num_free); + kfree(buddy->bitmap); +} + +static int dr_buddy_find_free_seg(struct mlx5dr_icm_buddy_mem *buddy, + unsigned int start_order, + unsigned int *segment, + unsigned int *order) +{ + unsigned int seg, order_iter, m; + + for (order_iter = start_order; + order_iter <= buddy->max_order; ++order_iter) { + if (!buddy->num_free[order_iter]) + continue; + + m = 1 << (buddy->max_order - order_iter); + seg = find_first_bit(buddy->bitmap[order_iter], m); + + if (WARN(seg >= m, + "ICM Buddy: failed finding free mem for order %d\n", + order_iter)) + return -ENOMEM; + + break; + } + + if (order_iter > buddy->max_order) + return -ENOMEM; + + *segment = seg; + *order = order_iter; + return 0; +} + +/** + * mlx5dr_buddy_alloc_mem() - Update second level bitmap. + * @buddy: Buddy to update. + * @order: Order of the buddy to update. + * @segment: Segment number. + * + * This function finds the first area of the ICM memory managed by this buddy. + * It uses the data structures of the buddy system in order to find the first + * area of free place, starting from the current order till the maximum order + * in the system. + * + * Return: 0 when segment is set, non-zero error status otherwise. + * + * The function returns the location (segment) in the whole buddy ICM memory + * area - the index of the memory segment that is available for use. + */ +int mlx5dr_buddy_alloc_mem(struct mlx5dr_icm_buddy_mem *buddy, + unsigned int order, + unsigned int *segment) +{ + unsigned int seg, order_iter; + int err; + + err = dr_buddy_find_free_seg(buddy, order, &seg, &order_iter); + if (err) + return err; + + bitmap_clear(buddy->bitmap[order_iter], seg, 1); + --buddy->num_free[order_iter]; + + /* If we found free memory in some order that is bigger than the + * required order, we need to split every order between the required + * order and the order that we found into two parts, and mark accordingly. + */ + while (order_iter > order) { + --order_iter; + seg <<= 1; + bitmap_set(buddy->bitmap[order_iter], seg ^ 1, 1); + ++buddy->num_free[order_iter]; + } + + seg <<= order; + *segment = seg; + + return 0; +} + +void mlx5dr_buddy_free_mem(struct mlx5dr_icm_buddy_mem *buddy, + unsigned int seg, unsigned int order) +{ + seg >>= order; + + /* Whenever a segment is free, + * the mem is added to the buddy that gave it. + */ + while (test_bit(seg ^ 1, buddy->bitmap[order])) { + bitmap_clear(buddy->bitmap[order], seg ^ 1, 1); + --buddy->num_free[order]; + seg >>= 1; + ++order; + } + bitmap_set(buddy->bitmap[order], seg, 1); + + ++buddy->num_free[order]; +} + diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c new file mode 100644 index 000000000..d7b1a230b --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c @@ -0,0 +1,824 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2019 Mellanox Technologies. */ + +#include "dr_types.h" + +int mlx5dr_cmd_query_esw_vport_context(struct mlx5_core_dev *mdev, + bool other_vport, + u16 vport_number, + u64 *icm_address_rx, + u64 *icm_address_tx) +{ + u32 out[MLX5_ST_SZ_DW(query_esw_vport_context_out)] = {}; + u32 in[MLX5_ST_SZ_DW(query_esw_vport_context_in)] = {}; + int err; + + MLX5_SET(query_esw_vport_context_in, in, opcode, + MLX5_CMD_OP_QUERY_ESW_VPORT_CONTEXT); + MLX5_SET(query_esw_vport_context_in, in, other_vport, other_vport); + MLX5_SET(query_esw_vport_context_in, in, vport_number, vport_number); + + err = mlx5_cmd_exec_inout(mdev, query_esw_vport_context, in, out); + if (err) + return err; + + *icm_address_rx = + MLX5_GET64(query_esw_vport_context_out, out, + esw_vport_context.sw_steering_vport_icm_address_rx); + *icm_address_tx = + MLX5_GET64(query_esw_vport_context_out, out, + esw_vport_context.sw_steering_vport_icm_address_tx); + return 0; +} + +int mlx5dr_cmd_query_gvmi(struct mlx5_core_dev *mdev, bool other_vport, + u16 vport_number, u16 *gvmi) +{ + u32 in[MLX5_ST_SZ_DW(query_hca_cap_in)] = {}; + int out_size; + void *out; + int err; + + out_size = MLX5_ST_SZ_BYTES(query_hca_cap_out); + out = kzalloc(out_size, GFP_KERNEL); + if (!out) + return -ENOMEM; + + MLX5_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP); + MLX5_SET(query_hca_cap_in, in, other_function, other_vport); + MLX5_SET(query_hca_cap_in, in, function_id, vport_number); + MLX5_SET(query_hca_cap_in, in, op_mod, + MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE << 1 | + HCA_CAP_OPMOD_GET_CUR); + + err = mlx5_cmd_exec_inout(mdev, query_hca_cap, in, out); + if (err) { + kfree(out); + return err; + } + + *gvmi = MLX5_GET(query_hca_cap_out, out, capability.cmd_hca_cap.vhca_id); + + kfree(out); + return 0; +} + +int mlx5dr_cmd_query_esw_caps(struct mlx5_core_dev *mdev, + struct mlx5dr_esw_caps *caps) +{ + caps->drop_icm_address_rx = + MLX5_CAP64_ESW_FLOWTABLE(mdev, + sw_steering_fdb_action_drop_icm_address_rx); + caps->drop_icm_address_tx = + MLX5_CAP64_ESW_FLOWTABLE(mdev, + sw_steering_fdb_action_drop_icm_address_tx); + caps->uplink_icm_address_rx = + MLX5_CAP64_ESW_FLOWTABLE(mdev, + sw_steering_uplink_icm_address_rx); + caps->uplink_icm_address_tx = + MLX5_CAP64_ESW_FLOWTABLE(mdev, + sw_steering_uplink_icm_address_tx); + caps->sw_owner_v2 = MLX5_CAP_ESW_FLOWTABLE_FDB(mdev, sw_owner_v2); + if (!caps->sw_owner_v2) + caps->sw_owner = MLX5_CAP_ESW_FLOWTABLE_FDB(mdev, sw_owner); + + return 0; +} + +static int dr_cmd_query_nic_vport_roce_en(struct mlx5_core_dev *mdev, + u16 vport, bool *roce_en) +{ + u32 out[MLX5_ST_SZ_DW(query_nic_vport_context_out)] = {}; + u32 in[MLX5_ST_SZ_DW(query_nic_vport_context_in)] = {}; + int err; + + MLX5_SET(query_nic_vport_context_in, in, opcode, + MLX5_CMD_OP_QUERY_NIC_VPORT_CONTEXT); + MLX5_SET(query_nic_vport_context_in, in, vport_number, vport); + MLX5_SET(query_nic_vport_context_in, in, other_vport, !!vport); + + err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); + if (err) + return err; + + *roce_en = MLX5_GET(query_nic_vport_context_out, out, + nic_vport_context.roce_en); + return 0; +} + +int mlx5dr_cmd_query_device(struct mlx5_core_dev *mdev, + struct mlx5dr_cmd_caps *caps) +{ + bool roce_en; + int err; + + caps->prio_tag_required = MLX5_CAP_GEN(mdev, prio_tag_required); + caps->eswitch_manager = MLX5_CAP_GEN(mdev, eswitch_manager); + caps->gvmi = MLX5_CAP_GEN(mdev, vhca_id); + caps->flex_protocols = MLX5_CAP_GEN(mdev, flex_parser_protocols); + caps->sw_format_ver = MLX5_CAP_GEN(mdev, steering_format_version); + caps->roce_caps.fl_rc_qp_when_roce_disabled = + MLX5_CAP_GEN(mdev, fl_rc_qp_when_roce_disabled); + + if (MLX5_CAP_GEN(mdev, roce)) { + err = dr_cmd_query_nic_vport_roce_en(mdev, 0, &roce_en); + if (err) + return err; + + caps->roce_caps.roce_en = roce_en; + caps->roce_caps.fl_rc_qp_when_roce_disabled |= + MLX5_CAP_ROCE(mdev, fl_rc_qp_when_roce_disabled); + caps->roce_caps.fl_rc_qp_when_roce_enabled = + MLX5_CAP_ROCE(mdev, fl_rc_qp_when_roce_enabled); + } + + caps->isolate_vl_tc = MLX5_CAP_GEN(mdev, isolate_vl_tc_new); + + /* geneve_tlv_option_0_exist is the indication of + * STE support for lookup type flex_parser_ok + */ + caps->flex_parser_ok_bits_supp = + MLX5_CAP_FLOWTABLE(mdev, + flow_table_properties_nic_receive.ft_field_support.geneve_tlv_option_0_exist); + + if (caps->flex_protocols & MLX5_FLEX_PARSER_ICMP_V4_ENABLED) { + caps->flex_parser_id_icmp_dw0 = MLX5_CAP_GEN(mdev, flex_parser_id_icmp_dw0); + caps->flex_parser_id_icmp_dw1 = MLX5_CAP_GEN(mdev, flex_parser_id_icmp_dw1); + } + + if (caps->flex_protocols & MLX5_FLEX_PARSER_ICMP_V6_ENABLED) { + caps->flex_parser_id_icmpv6_dw0 = + MLX5_CAP_GEN(mdev, flex_parser_id_icmpv6_dw0); + caps->flex_parser_id_icmpv6_dw1 = + MLX5_CAP_GEN(mdev, flex_parser_id_icmpv6_dw1); + } + + if (caps->flex_protocols & MLX5_FLEX_PARSER_GENEVE_TLV_OPTION_0_ENABLED) + caps->flex_parser_id_geneve_tlv_option_0 = + MLX5_CAP_GEN(mdev, flex_parser_id_geneve_tlv_option_0); + + if (caps->flex_protocols & MLX5_FLEX_PARSER_MPLS_OVER_GRE_ENABLED) + caps->flex_parser_id_mpls_over_gre = + MLX5_CAP_GEN(mdev, flex_parser_id_outer_first_mpls_over_gre); + + if (caps->flex_protocols & MLX5_FLEX_PARSER_MPLS_OVER_UDP_ENABLED) + caps->flex_parser_id_mpls_over_udp = + MLX5_CAP_GEN(mdev, flex_parser_id_outer_first_mpls_over_udp_label); + + if (caps->flex_protocols & MLX5_FLEX_PARSER_GTPU_DW_0_ENABLED) + caps->flex_parser_id_gtpu_dw_0 = + MLX5_CAP_GEN(mdev, flex_parser_id_gtpu_dw_0); + + if (caps->flex_protocols & MLX5_FLEX_PARSER_GTPU_TEID_ENABLED) + caps->flex_parser_id_gtpu_teid = + MLX5_CAP_GEN(mdev, flex_parser_id_gtpu_teid); + + if (caps->flex_protocols & MLX5_FLEX_PARSER_GTPU_DW_2_ENABLED) + caps->flex_parser_id_gtpu_dw_2 = + MLX5_CAP_GEN(mdev, flex_parser_id_gtpu_dw_2); + + if (caps->flex_protocols & MLX5_FLEX_PARSER_GTPU_FIRST_EXT_DW_0_ENABLED) + caps->flex_parser_id_gtpu_first_ext_dw_0 = + MLX5_CAP_GEN(mdev, flex_parser_id_gtpu_first_ext_dw_0); + + caps->nic_rx_drop_address = + MLX5_CAP64_FLOWTABLE(mdev, sw_steering_nic_rx_action_drop_icm_address); + caps->nic_tx_drop_address = + MLX5_CAP64_FLOWTABLE(mdev, sw_steering_nic_tx_action_drop_icm_address); + caps->nic_tx_allow_address = + MLX5_CAP64_FLOWTABLE(mdev, sw_steering_nic_tx_action_allow_icm_address); + + caps->rx_sw_owner_v2 = MLX5_CAP_FLOWTABLE_NIC_RX(mdev, sw_owner_v2); + caps->tx_sw_owner_v2 = MLX5_CAP_FLOWTABLE_NIC_TX(mdev, sw_owner_v2); + + if (!caps->rx_sw_owner_v2) + caps->rx_sw_owner = MLX5_CAP_FLOWTABLE_NIC_RX(mdev, sw_owner); + if (!caps->tx_sw_owner_v2) + caps->tx_sw_owner = MLX5_CAP_FLOWTABLE_NIC_TX(mdev, sw_owner); + + caps->max_ft_level = MLX5_CAP_FLOWTABLE_NIC_RX(mdev, max_ft_level); + + caps->log_icm_size = MLX5_CAP_DEV_MEM(mdev, log_steering_sw_icm_size); + caps->hdr_modify_icm_addr = + MLX5_CAP64_DEV_MEM(mdev, header_modify_sw_icm_start_address); + + caps->roce_min_src_udp = MLX5_CAP_ROCE(mdev, r_roce_min_src_udp_port); + + caps->is_ecpf = mlx5_core_is_ecpf_esw_manager(mdev); + + return 0; +} + +int mlx5dr_cmd_query_flow_table(struct mlx5_core_dev *dev, + enum fs_flow_table_type type, + u32 table_id, + struct mlx5dr_cmd_query_flow_table_details *output) +{ + u32 out[MLX5_ST_SZ_DW(query_flow_table_out)] = {}; + u32 in[MLX5_ST_SZ_DW(query_flow_table_in)] = {}; + int err; + + MLX5_SET(query_flow_table_in, in, opcode, + MLX5_CMD_OP_QUERY_FLOW_TABLE); + + MLX5_SET(query_flow_table_in, in, table_type, type); + MLX5_SET(query_flow_table_in, in, table_id, table_id); + + err = mlx5_cmd_exec_inout(dev, query_flow_table, in, out); + if (err) + return err; + + output->status = MLX5_GET(query_flow_table_out, out, status); + output->level = MLX5_GET(query_flow_table_out, out, flow_table_context.level); + + output->sw_owner_icm_root_1 = MLX5_GET64(query_flow_table_out, out, + flow_table_context.sw_owner_icm_root_1); + output->sw_owner_icm_root_0 = MLX5_GET64(query_flow_table_out, out, + flow_table_context.sw_owner_icm_root_0); + + return 0; +} + +int mlx5dr_cmd_query_flow_sampler(struct mlx5_core_dev *dev, + u32 sampler_id, + u64 *rx_icm_addr, + u64 *tx_icm_addr) +{ + u32 out[MLX5_ST_SZ_DW(query_sampler_obj_out)] = {}; + u32 in[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)] = {}; + void *attr; + int ret; + + MLX5_SET(general_obj_in_cmd_hdr, in, opcode, + MLX5_CMD_OP_QUERY_GENERAL_OBJECT); + MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, + MLX5_GENERAL_OBJECT_TYPES_SAMPLER); + MLX5_SET(general_obj_in_cmd_hdr, in, obj_id, sampler_id); + + ret = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); + if (ret) + return ret; + + attr = MLX5_ADDR_OF(query_sampler_obj_out, out, sampler_object); + + *rx_icm_addr = MLX5_GET64(sampler_obj, attr, + sw_steering_icm_address_rx); + *tx_icm_addr = MLX5_GET64(sampler_obj, attr, + sw_steering_icm_address_tx); + + return 0; +} + +int mlx5dr_cmd_sync_steering(struct mlx5_core_dev *mdev) +{ + u32 in[MLX5_ST_SZ_DW(sync_steering_in)] = {}; + + MLX5_SET(sync_steering_in, in, opcode, MLX5_CMD_OP_SYNC_STEERING); + + return mlx5_cmd_exec_in(mdev, sync_steering, in); +} + +int mlx5dr_cmd_set_fte_modify_and_vport(struct mlx5_core_dev *mdev, + u32 table_type, + u32 table_id, + u32 group_id, + u32 modify_header_id, + u16 vport) +{ + u32 out[MLX5_ST_SZ_DW(set_fte_out)] = {}; + void *in_flow_context; + unsigned int inlen; + void *in_dests; + u32 *in; + int err; + + inlen = MLX5_ST_SZ_BYTES(set_fte_in) + + 1 * MLX5_ST_SZ_BYTES(dest_format_struct); /* One destination only */ + + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + MLX5_SET(set_fte_in, in, opcode, MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY); + MLX5_SET(set_fte_in, in, table_type, table_type); + MLX5_SET(set_fte_in, in, table_id, table_id); + + in_flow_context = MLX5_ADDR_OF(set_fte_in, in, flow_context); + MLX5_SET(flow_context, in_flow_context, group_id, group_id); + MLX5_SET(flow_context, in_flow_context, modify_header_id, modify_header_id); + MLX5_SET(flow_context, in_flow_context, destination_list_size, 1); + MLX5_SET(flow_context, in_flow_context, action, + MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | + MLX5_FLOW_CONTEXT_ACTION_MOD_HDR); + + in_dests = MLX5_ADDR_OF(flow_context, in_flow_context, destination); + MLX5_SET(dest_format_struct, in_dests, destination_type, + MLX5_IFC_FLOW_DESTINATION_TYPE_VPORT); + MLX5_SET(dest_format_struct, in_dests, destination_id, vport); + + err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out)); + kvfree(in); + + return err; +} + +int mlx5dr_cmd_del_flow_table_entry(struct mlx5_core_dev *mdev, + u32 table_type, + u32 table_id) +{ + u32 in[MLX5_ST_SZ_DW(delete_fte_in)] = {}; + + MLX5_SET(delete_fte_in, in, opcode, MLX5_CMD_OP_DELETE_FLOW_TABLE_ENTRY); + MLX5_SET(delete_fte_in, in, table_type, table_type); + MLX5_SET(delete_fte_in, in, table_id, table_id); + + return mlx5_cmd_exec_in(mdev, delete_fte, in); +} + +int mlx5dr_cmd_alloc_modify_header(struct mlx5_core_dev *mdev, + u32 table_type, + u8 num_of_actions, + u64 *actions, + u32 *modify_header_id) +{ + u32 out[MLX5_ST_SZ_DW(alloc_modify_header_context_out)] = {}; + void *p_actions; + u32 inlen; + u32 *in; + int err; + + inlen = MLX5_ST_SZ_BYTES(alloc_modify_header_context_in) + + num_of_actions * sizeof(u64); + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + MLX5_SET(alloc_modify_header_context_in, in, opcode, + MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT); + MLX5_SET(alloc_modify_header_context_in, in, table_type, table_type); + MLX5_SET(alloc_modify_header_context_in, in, num_of_actions, num_of_actions); + p_actions = MLX5_ADDR_OF(alloc_modify_header_context_in, in, actions); + memcpy(p_actions, actions, num_of_actions * sizeof(u64)); + + err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out)); + if (err) + goto out; + + *modify_header_id = MLX5_GET(alloc_modify_header_context_out, out, + modify_header_id); +out: + kvfree(in); + return err; +} + +int mlx5dr_cmd_dealloc_modify_header(struct mlx5_core_dev *mdev, + u32 modify_header_id) +{ + u32 in[MLX5_ST_SZ_DW(dealloc_modify_header_context_in)] = {}; + + MLX5_SET(dealloc_modify_header_context_in, in, opcode, + MLX5_CMD_OP_DEALLOC_MODIFY_HEADER_CONTEXT); + MLX5_SET(dealloc_modify_header_context_in, in, modify_header_id, + modify_header_id); + + return mlx5_cmd_exec_in(mdev, dealloc_modify_header_context, in); +} + +int mlx5dr_cmd_create_empty_flow_group(struct mlx5_core_dev *mdev, + u32 table_type, + u32 table_id, + u32 *group_id) +{ + u32 out[MLX5_ST_SZ_DW(create_flow_group_out)] = {}; + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + u32 *in; + int err; + + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + MLX5_SET(create_flow_group_in, in, opcode, MLX5_CMD_OP_CREATE_FLOW_GROUP); + MLX5_SET(create_flow_group_in, in, table_type, table_type); + MLX5_SET(create_flow_group_in, in, table_id, table_id); + + err = mlx5_cmd_exec_inout(mdev, create_flow_group, in, out); + if (err) + goto out; + + *group_id = MLX5_GET(create_flow_group_out, out, group_id); + +out: + kvfree(in); + return err; +} + +int mlx5dr_cmd_destroy_flow_group(struct mlx5_core_dev *mdev, + u32 table_type, + u32 table_id, + u32 group_id) +{ + u32 in[MLX5_ST_SZ_DW(destroy_flow_group_in)] = {}; + + MLX5_SET(destroy_flow_group_in, in, opcode, + MLX5_CMD_OP_DESTROY_FLOW_GROUP); + MLX5_SET(destroy_flow_group_in, in, table_type, table_type); + MLX5_SET(destroy_flow_group_in, in, table_id, table_id); + MLX5_SET(destroy_flow_group_in, in, group_id, group_id); + + return mlx5_cmd_exec_in(mdev, destroy_flow_group, in); +} + +int mlx5dr_cmd_create_flow_table(struct mlx5_core_dev *mdev, + struct mlx5dr_cmd_create_flow_table_attr *attr, + u64 *fdb_rx_icm_addr, + u32 *table_id) +{ + u32 out[MLX5_ST_SZ_DW(create_flow_table_out)] = {}; + u32 in[MLX5_ST_SZ_DW(create_flow_table_in)] = {}; + void *ft_mdev; + int err; + + MLX5_SET(create_flow_table_in, in, opcode, MLX5_CMD_OP_CREATE_FLOW_TABLE); + MLX5_SET(create_flow_table_in, in, table_type, attr->table_type); + MLX5_SET(create_flow_table_in, in, uid, attr->uid); + + ft_mdev = MLX5_ADDR_OF(create_flow_table_in, in, flow_table_context); + MLX5_SET(flow_table_context, ft_mdev, termination_table, attr->term_tbl); + MLX5_SET(flow_table_context, ft_mdev, sw_owner, attr->sw_owner); + MLX5_SET(flow_table_context, ft_mdev, level, attr->level); + + if (attr->sw_owner) { + /* icm_addr_0 used for FDB RX / NIC TX / NIC_RX + * icm_addr_1 used for FDB TX + */ + if (attr->table_type == MLX5_FLOW_TABLE_TYPE_NIC_RX) { + MLX5_SET64(flow_table_context, ft_mdev, + sw_owner_icm_root_0, attr->icm_addr_rx); + } else if (attr->table_type == MLX5_FLOW_TABLE_TYPE_NIC_TX) { + MLX5_SET64(flow_table_context, ft_mdev, + sw_owner_icm_root_0, attr->icm_addr_tx); + } else if (attr->table_type == MLX5_FLOW_TABLE_TYPE_FDB) { + MLX5_SET64(flow_table_context, ft_mdev, + sw_owner_icm_root_0, attr->icm_addr_rx); + MLX5_SET64(flow_table_context, ft_mdev, + sw_owner_icm_root_1, attr->icm_addr_tx); + } + } + + MLX5_SET(create_flow_table_in, in, flow_table_context.decap_en, + attr->decap_en); + MLX5_SET(create_flow_table_in, in, flow_table_context.reformat_en, + attr->reformat_en); + + err = mlx5_cmd_exec_inout(mdev, create_flow_table, in, out); + if (err) + return err; + + *table_id = MLX5_GET(create_flow_table_out, out, table_id); + if (!attr->sw_owner && attr->table_type == MLX5_FLOW_TABLE_TYPE_FDB && + fdb_rx_icm_addr) + *fdb_rx_icm_addr = + (u64)MLX5_GET(create_flow_table_out, out, icm_address_31_0) | + (u64)MLX5_GET(create_flow_table_out, out, icm_address_39_32) << 32 | + (u64)MLX5_GET(create_flow_table_out, out, icm_address_63_40) << 40; + + return 0; +} + +int mlx5dr_cmd_destroy_flow_table(struct mlx5_core_dev *mdev, + u32 table_id, + u32 table_type) +{ + u32 in[MLX5_ST_SZ_DW(destroy_flow_table_in)] = {}; + + MLX5_SET(destroy_flow_table_in, in, opcode, + MLX5_CMD_OP_DESTROY_FLOW_TABLE); + MLX5_SET(destroy_flow_table_in, in, table_type, table_type); + MLX5_SET(destroy_flow_table_in, in, table_id, table_id); + + return mlx5_cmd_exec_in(mdev, destroy_flow_table, in); +} + +int mlx5dr_cmd_create_reformat_ctx(struct mlx5_core_dev *mdev, + enum mlx5_reformat_ctx_type rt, + u8 reformat_param_0, + u8 reformat_param_1, + size_t reformat_size, + void *reformat_data, + u32 *reformat_id) +{ + u32 out[MLX5_ST_SZ_DW(alloc_packet_reformat_context_out)] = {}; + size_t inlen, cmd_data_sz, cmd_total_sz; + void *prctx; + void *pdata; + void *in; + int err; + + cmd_total_sz = MLX5_ST_SZ_BYTES(alloc_packet_reformat_context_in); + cmd_data_sz = MLX5_FLD_SZ_BYTES(alloc_packet_reformat_context_in, + packet_reformat_context.reformat_data); + inlen = ALIGN(cmd_total_sz + reformat_size - cmd_data_sz, 4); + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + MLX5_SET(alloc_packet_reformat_context_in, in, opcode, + MLX5_CMD_OP_ALLOC_PACKET_REFORMAT_CONTEXT); + + prctx = MLX5_ADDR_OF(alloc_packet_reformat_context_in, in, packet_reformat_context); + pdata = MLX5_ADDR_OF(packet_reformat_context_in, prctx, reformat_data); + + MLX5_SET(packet_reformat_context_in, prctx, reformat_type, rt); + MLX5_SET(packet_reformat_context_in, prctx, reformat_param_0, reformat_param_0); + MLX5_SET(packet_reformat_context_in, prctx, reformat_param_1, reformat_param_1); + MLX5_SET(packet_reformat_context_in, prctx, reformat_data_size, reformat_size); + if (reformat_data && reformat_size) + memcpy(pdata, reformat_data, reformat_size); + + err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out)); + if (err) + goto err_free_in; + + *reformat_id = MLX5_GET(alloc_packet_reformat_context_out, out, packet_reformat_id); + +err_free_in: + kvfree(in); + return err; +} + +void mlx5dr_cmd_destroy_reformat_ctx(struct mlx5_core_dev *mdev, + u32 reformat_id) +{ + u32 in[MLX5_ST_SZ_DW(dealloc_packet_reformat_context_in)] = {}; + + MLX5_SET(dealloc_packet_reformat_context_in, in, opcode, + MLX5_CMD_OP_DEALLOC_PACKET_REFORMAT_CONTEXT); + MLX5_SET(dealloc_packet_reformat_context_in, in, packet_reformat_id, + reformat_id); + + mlx5_cmd_exec_in(mdev, dealloc_packet_reformat_context, in); +} + +int mlx5dr_cmd_query_gid(struct mlx5_core_dev *mdev, u8 vhca_port_num, + u16 index, struct mlx5dr_cmd_gid_attr *attr) +{ + u32 out[MLX5_ST_SZ_DW(query_roce_address_out)] = {}; + u32 in[MLX5_ST_SZ_DW(query_roce_address_in)] = {}; + int err; + + MLX5_SET(query_roce_address_in, in, opcode, + MLX5_CMD_OP_QUERY_ROCE_ADDRESS); + + MLX5_SET(query_roce_address_in, in, roce_address_index, index); + MLX5_SET(query_roce_address_in, in, vhca_port_num, vhca_port_num); + + err = mlx5_cmd_exec_inout(mdev, query_roce_address, in, out); + if (err) + return err; + + memcpy(&attr->gid, + MLX5_ADDR_OF(query_roce_address_out, + out, roce_address.source_l3_address), + sizeof(attr->gid)); + memcpy(attr->mac, + MLX5_ADDR_OF(query_roce_address_out, out, + roce_address.source_mac_47_32), + sizeof(attr->mac)); + + if (MLX5_GET(query_roce_address_out, out, + roce_address.roce_version) == MLX5_ROCE_VERSION_2) + attr->roce_ver = MLX5_ROCE_VERSION_2; + else + attr->roce_ver = MLX5_ROCE_VERSION_1; + + return 0; +} + +static int mlx5dr_cmd_set_extended_dest(struct mlx5_core_dev *dev, + struct mlx5dr_cmd_fte_info *fte, + bool *extended_dest) +{ + int fw_log_max_fdb_encap_uplink = MLX5_CAP_ESW(dev, log_max_fdb_encap_uplink); + int num_fwd_destinations = 0; + int num_encap = 0; + int i; + + *extended_dest = false; + if (!(fte->action.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST)) + return 0; + for (i = 0; i < fte->dests_size; i++) { + if (fte->dest_arr[i].type == MLX5_FLOW_DESTINATION_TYPE_COUNTER || + fte->dest_arr[i].type == MLX5_FLOW_DESTINATION_TYPE_NONE) + continue; + if ((fte->dest_arr[i].type == MLX5_FLOW_DESTINATION_TYPE_VPORT || + fte->dest_arr[i].type == MLX5_FLOW_DESTINATION_TYPE_UPLINK) && + fte->dest_arr[i].vport.flags & MLX5_FLOW_DEST_VPORT_REFORMAT_ID) + num_encap++; + num_fwd_destinations++; + } + + if (num_fwd_destinations > 1 && num_encap > 0) + *extended_dest = true; + + if (*extended_dest && !fw_log_max_fdb_encap_uplink) { + mlx5_core_warn(dev, "FW does not support extended destination"); + return -EOPNOTSUPP; + } + if (num_encap > (1 << fw_log_max_fdb_encap_uplink)) { + mlx5_core_warn(dev, "FW does not support more than %d encaps", + 1 << fw_log_max_fdb_encap_uplink); + return -EOPNOTSUPP; + } + + return 0; +} + +int mlx5dr_cmd_set_fte(struct mlx5_core_dev *dev, + int opmod, int modify_mask, + struct mlx5dr_cmd_ft_info *ft, + u32 group_id, + struct mlx5dr_cmd_fte_info *fte) +{ + u32 out[MLX5_ST_SZ_DW(set_fte_out)] = {}; + void *in_flow_context, *vlan; + bool extended_dest = false; + void *in_match_value; + unsigned int inlen; + int dst_cnt_size; + void *in_dests; + u32 *in; + int err; + int i; + + if (mlx5dr_cmd_set_extended_dest(dev, fte, &extended_dest)) + return -EOPNOTSUPP; + + if (!extended_dest) + dst_cnt_size = MLX5_ST_SZ_BYTES(dest_format_struct); + else + dst_cnt_size = MLX5_ST_SZ_BYTES(extended_dest_format); + + inlen = MLX5_ST_SZ_BYTES(set_fte_in) + fte->dests_size * dst_cnt_size; + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + MLX5_SET(set_fte_in, in, opcode, MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY); + MLX5_SET(set_fte_in, in, op_mod, opmod); + MLX5_SET(set_fte_in, in, modify_enable_mask, modify_mask); + MLX5_SET(set_fte_in, in, table_type, ft->type); + MLX5_SET(set_fte_in, in, table_id, ft->id); + MLX5_SET(set_fte_in, in, flow_index, fte->index); + MLX5_SET(set_fte_in, in, ignore_flow_level, fte->ignore_flow_level); + if (ft->vport) { + MLX5_SET(set_fte_in, in, vport_number, ft->vport); + MLX5_SET(set_fte_in, in, other_vport, 1); + } + + in_flow_context = MLX5_ADDR_OF(set_fte_in, in, flow_context); + MLX5_SET(flow_context, in_flow_context, group_id, group_id); + + MLX5_SET(flow_context, in_flow_context, flow_tag, + fte->flow_context.flow_tag); + MLX5_SET(flow_context, in_flow_context, flow_source, + fte->flow_context.flow_source); + + MLX5_SET(flow_context, in_flow_context, extended_destination, + extended_dest); + if (extended_dest) { + u32 action; + + action = fte->action.action & + ~MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT; + MLX5_SET(flow_context, in_flow_context, action, action); + } else { + MLX5_SET(flow_context, in_flow_context, action, + fte->action.action); + if (fte->action.pkt_reformat) + MLX5_SET(flow_context, in_flow_context, packet_reformat_id, + fte->action.pkt_reformat->id); + } + if (fte->action.modify_hdr) + MLX5_SET(flow_context, in_flow_context, modify_header_id, + fte->action.modify_hdr->id); + + vlan = MLX5_ADDR_OF(flow_context, in_flow_context, push_vlan); + + MLX5_SET(vlan, vlan, ethtype, fte->action.vlan[0].ethtype); + MLX5_SET(vlan, vlan, vid, fte->action.vlan[0].vid); + MLX5_SET(vlan, vlan, prio, fte->action.vlan[0].prio); + + vlan = MLX5_ADDR_OF(flow_context, in_flow_context, push_vlan_2); + + MLX5_SET(vlan, vlan, ethtype, fte->action.vlan[1].ethtype); + MLX5_SET(vlan, vlan, vid, fte->action.vlan[1].vid); + MLX5_SET(vlan, vlan, prio, fte->action.vlan[1].prio); + + in_match_value = MLX5_ADDR_OF(flow_context, in_flow_context, + match_value); + memcpy(in_match_value, fte->val, sizeof(u32) * MLX5_ST_SZ_DW_MATCH_PARAM); + + in_dests = MLX5_ADDR_OF(flow_context, in_flow_context, destination); + if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) { + int list_size = 0; + + for (i = 0; i < fte->dests_size; i++) { + enum mlx5_flow_destination_type type = fte->dest_arr[i].type; + enum mlx5_ifc_flow_destination_type ifc_type; + unsigned int id; + + if (type == MLX5_FLOW_DESTINATION_TYPE_COUNTER) + continue; + + switch (type) { + case MLX5_FLOW_DESTINATION_TYPE_NONE: + continue; + case MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM: + id = fte->dest_arr[i].ft_num; + ifc_type = MLX5_IFC_FLOW_DESTINATION_TYPE_FLOW_TABLE; + break; + case MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE: + id = fte->dest_arr[i].ft_id; + ifc_type = MLX5_IFC_FLOW_DESTINATION_TYPE_FLOW_TABLE; + + break; + case MLX5_FLOW_DESTINATION_TYPE_UPLINK: + case MLX5_FLOW_DESTINATION_TYPE_VPORT: + if (type == MLX5_FLOW_DESTINATION_TYPE_VPORT) { + id = fte->dest_arr[i].vport.num; + MLX5_SET(dest_format_struct, in_dests, + destination_eswitch_owner_vhca_id_valid, + !!(fte->dest_arr[i].vport.flags & + MLX5_FLOW_DEST_VPORT_VHCA_ID)); + ifc_type = MLX5_IFC_FLOW_DESTINATION_TYPE_VPORT; + } else { + id = 0; + ifc_type = MLX5_IFC_FLOW_DESTINATION_TYPE_UPLINK; + MLX5_SET(dest_format_struct, in_dests, + destination_eswitch_owner_vhca_id_valid, 1); + } + MLX5_SET(dest_format_struct, in_dests, + destination_eswitch_owner_vhca_id, + fte->dest_arr[i].vport.vhca_id); + if (extended_dest && (fte->dest_arr[i].vport.flags & + MLX5_FLOW_DEST_VPORT_REFORMAT_ID)) { + MLX5_SET(dest_format_struct, in_dests, + packet_reformat, + !!(fte->dest_arr[i].vport.flags & + MLX5_FLOW_DEST_VPORT_REFORMAT_ID)); + MLX5_SET(extended_dest_format, in_dests, + packet_reformat_id, + fte->dest_arr[i].vport.reformat_id); + } + break; + case MLX5_FLOW_DESTINATION_TYPE_FLOW_SAMPLER: + id = fte->dest_arr[i].sampler_id; + ifc_type = MLX5_IFC_FLOW_DESTINATION_TYPE_FLOW_SAMPLER; + break; + default: + id = fte->dest_arr[i].tir_num; + ifc_type = MLX5_IFC_FLOW_DESTINATION_TYPE_TIR; + } + + MLX5_SET(dest_format_struct, in_dests, destination_type, + ifc_type); + MLX5_SET(dest_format_struct, in_dests, destination_id, id); + in_dests += dst_cnt_size; + list_size++; + } + + MLX5_SET(flow_context, in_flow_context, destination_list_size, + list_size); + } + + if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { + int max_list_size = BIT(MLX5_CAP_FLOWTABLE_TYPE(dev, + log_max_flow_counter, + ft->type)); + int list_size = 0; + + for (i = 0; i < fte->dests_size; i++) { + if (fte->dest_arr[i].type != + MLX5_FLOW_DESTINATION_TYPE_COUNTER) + continue; + + MLX5_SET(flow_counter_list, in_dests, flow_counter_id, + fte->dest_arr[i].counter_id); + in_dests += dst_cnt_size; + list_size++; + } + if (list_size > max_list_size) { + err = -EINVAL; + goto err_out; + } + + MLX5_SET(flow_context, in_flow_context, flow_counter_list_size, + list_size); + } + + err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); +err_out: + kvfree(in); + return err; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_dbg.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_dbg.c new file mode 100644 index 000000000..7adcf0eec --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_dbg.c @@ -0,0 +1,657 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + +#include <linux/debugfs.h> +#include <linux/kernel.h> +#include <linux/seq_file.h> +#include "dr_types.h" + +#define DR_DBG_PTR_TO_ID(p) ((u64)(uintptr_t)(p) & 0xFFFFFFFFULL) + +enum dr_dump_rec_type { + DR_DUMP_REC_TYPE_DOMAIN = 3000, + DR_DUMP_REC_TYPE_DOMAIN_INFO_FLEX_PARSER = 3001, + DR_DUMP_REC_TYPE_DOMAIN_INFO_DEV_ATTR = 3002, + DR_DUMP_REC_TYPE_DOMAIN_INFO_VPORT = 3003, + DR_DUMP_REC_TYPE_DOMAIN_INFO_CAPS = 3004, + DR_DUMP_REC_TYPE_DOMAIN_SEND_RING = 3005, + + DR_DUMP_REC_TYPE_TABLE = 3100, + DR_DUMP_REC_TYPE_TABLE_RX = 3101, + DR_DUMP_REC_TYPE_TABLE_TX = 3102, + + DR_DUMP_REC_TYPE_MATCHER = 3200, + DR_DUMP_REC_TYPE_MATCHER_MASK_DEPRECATED = 3201, + DR_DUMP_REC_TYPE_MATCHER_RX = 3202, + DR_DUMP_REC_TYPE_MATCHER_TX = 3203, + DR_DUMP_REC_TYPE_MATCHER_BUILDER = 3204, + DR_DUMP_REC_TYPE_MATCHER_MASK = 3205, + + DR_DUMP_REC_TYPE_RULE = 3300, + DR_DUMP_REC_TYPE_RULE_RX_ENTRY_V0 = 3301, + DR_DUMP_REC_TYPE_RULE_TX_ENTRY_V0 = 3302, + DR_DUMP_REC_TYPE_RULE_RX_ENTRY_V1 = 3303, + DR_DUMP_REC_TYPE_RULE_TX_ENTRY_V1 = 3304, + + DR_DUMP_REC_TYPE_ACTION_ENCAP_L2 = 3400, + DR_DUMP_REC_TYPE_ACTION_ENCAP_L3 = 3401, + DR_DUMP_REC_TYPE_ACTION_MODIFY_HDR = 3402, + DR_DUMP_REC_TYPE_ACTION_DROP = 3403, + DR_DUMP_REC_TYPE_ACTION_QP = 3404, + DR_DUMP_REC_TYPE_ACTION_FT = 3405, + DR_DUMP_REC_TYPE_ACTION_CTR = 3406, + DR_DUMP_REC_TYPE_ACTION_TAG = 3407, + DR_DUMP_REC_TYPE_ACTION_VPORT = 3408, + DR_DUMP_REC_TYPE_ACTION_DECAP_L2 = 3409, + DR_DUMP_REC_TYPE_ACTION_DECAP_L3 = 3410, + DR_DUMP_REC_TYPE_ACTION_DEVX_TIR = 3411, + DR_DUMP_REC_TYPE_ACTION_PUSH_VLAN = 3412, + DR_DUMP_REC_TYPE_ACTION_POP_VLAN = 3413, + DR_DUMP_REC_TYPE_ACTION_SAMPLER = 3415, + DR_DUMP_REC_TYPE_ACTION_INSERT_HDR = 3420, + DR_DUMP_REC_TYPE_ACTION_REMOVE_HDR = 3421 +}; + +void mlx5dr_dbg_tbl_add(struct mlx5dr_table *tbl) +{ + mutex_lock(&tbl->dmn->dump_info.dbg_mutex); + list_add_tail(&tbl->dbg_node, &tbl->dmn->dbg_tbl_list); + mutex_unlock(&tbl->dmn->dump_info.dbg_mutex); +} + +void mlx5dr_dbg_tbl_del(struct mlx5dr_table *tbl) +{ + mutex_lock(&tbl->dmn->dump_info.dbg_mutex); + list_del(&tbl->dbg_node); + mutex_unlock(&tbl->dmn->dump_info.dbg_mutex); +} + +void mlx5dr_dbg_rule_add(struct mlx5dr_rule *rule) +{ + struct mlx5dr_domain *dmn = rule->matcher->tbl->dmn; + + mutex_lock(&dmn->dump_info.dbg_mutex); + list_add_tail(&rule->dbg_node, &rule->matcher->dbg_rule_list); + mutex_unlock(&dmn->dump_info.dbg_mutex); +} + +void mlx5dr_dbg_rule_del(struct mlx5dr_rule *rule) +{ + struct mlx5dr_domain *dmn = rule->matcher->tbl->dmn; + + mutex_lock(&dmn->dump_info.dbg_mutex); + list_del(&rule->dbg_node); + mutex_unlock(&dmn->dump_info.dbg_mutex); +} + +static u64 dr_dump_icm_to_idx(u64 icm_addr) +{ + return (icm_addr >> 6) & 0xffffffff; +} + +#define DR_HEX_SIZE 256 + +static void +dr_dump_hex_print(char hex[DR_HEX_SIZE], char *src, u32 size) +{ + if (WARN_ON_ONCE(DR_HEX_SIZE < 2 * size + 1)) + size = DR_HEX_SIZE / 2 - 1; /* truncate */ + + bin2hex(hex, src, size); + hex[2 * size] = 0; /* NULL-terminate */ +} + +static int +dr_dump_rule_action_mem(struct seq_file *file, const u64 rule_id, + struct mlx5dr_rule_action_member *action_mem) +{ + struct mlx5dr_action *action = action_mem->action; + const u64 action_id = DR_DBG_PTR_TO_ID(action); + + switch (action->action_type) { + case DR_ACTION_TYP_DROP: + seq_printf(file, "%d,0x%llx,0x%llx\n", + DR_DUMP_REC_TYPE_ACTION_DROP, action_id, rule_id); + break; + case DR_ACTION_TYP_FT: + if (action->dest_tbl->is_fw_tbl) + seq_printf(file, "%d,0x%llx,0x%llx,0x%x,0x%x\n", + DR_DUMP_REC_TYPE_ACTION_FT, action_id, + rule_id, action->dest_tbl->fw_tbl.id, + -1); + else + seq_printf(file, "%d,0x%llx,0x%llx,0x%x,0x%llx\n", + DR_DUMP_REC_TYPE_ACTION_FT, action_id, + rule_id, action->dest_tbl->tbl->table_id, + DR_DBG_PTR_TO_ID(action->dest_tbl->tbl)); + + break; + case DR_ACTION_TYP_CTR: + seq_printf(file, "%d,0x%llx,0x%llx,0x%x\n", + DR_DUMP_REC_TYPE_ACTION_CTR, action_id, rule_id, + action->ctr->ctr_id + action->ctr->offset); + break; + case DR_ACTION_TYP_TAG: + seq_printf(file, "%d,0x%llx,0x%llx,0x%x\n", + DR_DUMP_REC_TYPE_ACTION_TAG, action_id, rule_id, + action->flow_tag->flow_tag); + break; + case DR_ACTION_TYP_MODIFY_HDR: + seq_printf(file, "%d,0x%llx,0x%llx,0x%x\n", + DR_DUMP_REC_TYPE_ACTION_MODIFY_HDR, action_id, + rule_id, action->rewrite->index); + break; + case DR_ACTION_TYP_VPORT: + seq_printf(file, "%d,0x%llx,0x%llx,0x%x\n", + DR_DUMP_REC_TYPE_ACTION_VPORT, action_id, rule_id, + action->vport->caps->num); + break; + case DR_ACTION_TYP_TNL_L2_TO_L2: + seq_printf(file, "%d,0x%llx,0x%llx\n", + DR_DUMP_REC_TYPE_ACTION_DECAP_L2, action_id, + rule_id); + break; + case DR_ACTION_TYP_TNL_L3_TO_L2: + seq_printf(file, "%d,0x%llx,0x%llx,0x%x\n", + DR_DUMP_REC_TYPE_ACTION_DECAP_L3, action_id, + rule_id, action->rewrite->index); + break; + case DR_ACTION_TYP_L2_TO_TNL_L2: + seq_printf(file, "%d,0x%llx,0x%llx,0x%x\n", + DR_DUMP_REC_TYPE_ACTION_ENCAP_L2, action_id, + rule_id, action->reformat->id); + break; + case DR_ACTION_TYP_L2_TO_TNL_L3: + seq_printf(file, "%d,0x%llx,0x%llx,0x%x\n", + DR_DUMP_REC_TYPE_ACTION_ENCAP_L3, action_id, + rule_id, action->reformat->id); + break; + case DR_ACTION_TYP_POP_VLAN: + seq_printf(file, "%d,0x%llx,0x%llx\n", + DR_DUMP_REC_TYPE_ACTION_POP_VLAN, action_id, + rule_id); + break; + case DR_ACTION_TYP_PUSH_VLAN: + seq_printf(file, "%d,0x%llx,0x%llx,0x%x\n", + DR_DUMP_REC_TYPE_ACTION_PUSH_VLAN, action_id, + rule_id, action->push_vlan->vlan_hdr); + break; + case DR_ACTION_TYP_INSERT_HDR: + seq_printf(file, "%d,0x%llx,0x%llx,0x%x,0x%x,0x%x\n", + DR_DUMP_REC_TYPE_ACTION_INSERT_HDR, action_id, + rule_id, action->reformat->id, + action->reformat->param_0, + action->reformat->param_1); + break; + case DR_ACTION_TYP_REMOVE_HDR: + seq_printf(file, "%d,0x%llx,0x%llx,0x%x,0x%x,0x%x\n", + DR_DUMP_REC_TYPE_ACTION_REMOVE_HDR, action_id, + rule_id, action->reformat->id, + action->reformat->param_0, + action->reformat->param_1); + break; + case DR_ACTION_TYP_SAMPLER: + seq_printf(file, + "%d,0x%llx,0x%llx,0x%x,0x%x,0x%x,0x%llx,0x%llx\n", + DR_DUMP_REC_TYPE_ACTION_SAMPLER, action_id, rule_id, + 0, 0, action->sampler->sampler_id, + action->sampler->rx_icm_addr, + action->sampler->tx_icm_addr); + break; + default: + return 0; + } + + return 0; +} + +static int +dr_dump_rule_mem(struct seq_file *file, struct mlx5dr_ste *ste, + bool is_rx, const u64 rule_id, u8 format_ver) +{ + char hw_ste_dump[DR_HEX_SIZE]; + u32 mem_rec_type; + + if (format_ver == MLX5_STEERING_FORMAT_CONNECTX_5) { + mem_rec_type = is_rx ? DR_DUMP_REC_TYPE_RULE_RX_ENTRY_V0 : + DR_DUMP_REC_TYPE_RULE_TX_ENTRY_V0; + } else { + mem_rec_type = is_rx ? DR_DUMP_REC_TYPE_RULE_RX_ENTRY_V1 : + DR_DUMP_REC_TYPE_RULE_TX_ENTRY_V1; + } + + dr_dump_hex_print(hw_ste_dump, (char *)mlx5dr_ste_get_hw_ste(ste), + DR_STE_SIZE_REDUCED); + + seq_printf(file, "%d,0x%llx,0x%llx,%s\n", mem_rec_type, + dr_dump_icm_to_idx(mlx5dr_ste_get_icm_addr(ste)), rule_id, + hw_ste_dump); + + return 0; +} + +static int +dr_dump_rule_rx_tx(struct seq_file *file, struct mlx5dr_rule_rx_tx *rule_rx_tx, + bool is_rx, const u64 rule_id, u8 format_ver) +{ + struct mlx5dr_ste *ste_arr[DR_RULE_MAX_STES + DR_ACTION_MAX_STES]; + struct mlx5dr_ste *curr_ste = rule_rx_tx->last_rule_ste; + int ret, i; + + if (mlx5dr_rule_get_reverse_rule_members(ste_arr, curr_ste, &i)) + return 0; + + while (i--) { + ret = dr_dump_rule_mem(file, ste_arr[i], is_rx, rule_id, + format_ver); + if (ret < 0) + return ret; + } + + return 0; +} + +static int dr_dump_rule(struct seq_file *file, struct mlx5dr_rule *rule) +{ + struct mlx5dr_rule_action_member *action_mem; + const u64 rule_id = DR_DBG_PTR_TO_ID(rule); + struct mlx5dr_rule_rx_tx *rx = &rule->rx; + struct mlx5dr_rule_rx_tx *tx = &rule->tx; + u8 format_ver; + int ret; + + format_ver = rule->matcher->tbl->dmn->info.caps.sw_format_ver; + + seq_printf(file, "%d,0x%llx,0x%llx\n", DR_DUMP_REC_TYPE_RULE, rule_id, + DR_DBG_PTR_TO_ID(rule->matcher)); + + if (rx->nic_matcher) { + ret = dr_dump_rule_rx_tx(file, rx, true, rule_id, format_ver); + if (ret < 0) + return ret; + } + + if (tx->nic_matcher) { + ret = dr_dump_rule_rx_tx(file, tx, false, rule_id, format_ver); + if (ret < 0) + return ret; + } + + list_for_each_entry(action_mem, &rule->rule_actions_list, list) { + ret = dr_dump_rule_action_mem(file, rule_id, action_mem); + if (ret < 0) + return ret; + } + + return 0; +} + +static int +dr_dump_matcher_mask(struct seq_file *file, struct mlx5dr_match_param *mask, + u8 criteria, const u64 matcher_id) +{ + char dump[DR_HEX_SIZE]; + + seq_printf(file, "%d,0x%llx,", DR_DUMP_REC_TYPE_MATCHER_MASK, + matcher_id); + + if (criteria & DR_MATCHER_CRITERIA_OUTER) { + dr_dump_hex_print(dump, (char *)&mask->outer, sizeof(mask->outer)); + seq_printf(file, "%s,", dump); + } else { + seq_puts(file, ","); + } + + if (criteria & DR_MATCHER_CRITERIA_INNER) { + dr_dump_hex_print(dump, (char *)&mask->inner, sizeof(mask->inner)); + seq_printf(file, "%s,", dump); + } else { + seq_puts(file, ","); + } + + if (criteria & DR_MATCHER_CRITERIA_MISC) { + dr_dump_hex_print(dump, (char *)&mask->misc, sizeof(mask->misc)); + seq_printf(file, "%s,", dump); + } else { + seq_puts(file, ","); + } + + if (criteria & DR_MATCHER_CRITERIA_MISC2) { + dr_dump_hex_print(dump, (char *)&mask->misc2, sizeof(mask->misc2)); + seq_printf(file, "%s,", dump); + } else { + seq_puts(file, ","); + } + + if (criteria & DR_MATCHER_CRITERIA_MISC3) { + dr_dump_hex_print(dump, (char *)&mask->misc3, sizeof(mask->misc3)); + seq_printf(file, "%s\n", dump); + } else { + seq_puts(file, ",\n"); + } + + return 0; +} + +static int +dr_dump_matcher_builder(struct seq_file *file, struct mlx5dr_ste_build *builder, + u32 index, bool is_rx, const u64 matcher_id) +{ + seq_printf(file, "%d,0x%llx,%d,%d,0x%x\n", + DR_DUMP_REC_TYPE_MATCHER_BUILDER, matcher_id, index, is_rx, + builder->lu_type); + + return 0; +} + +static int +dr_dump_matcher_rx_tx(struct seq_file *file, bool is_rx, + struct mlx5dr_matcher_rx_tx *matcher_rx_tx, + const u64 matcher_id) +{ + enum dr_dump_rec_type rec_type; + u64 s_icm_addr, e_icm_addr; + int i, ret; + + rec_type = is_rx ? DR_DUMP_REC_TYPE_MATCHER_RX : + DR_DUMP_REC_TYPE_MATCHER_TX; + + s_icm_addr = mlx5dr_icm_pool_get_chunk_icm_addr(matcher_rx_tx->s_htbl->chunk); + e_icm_addr = mlx5dr_icm_pool_get_chunk_icm_addr(matcher_rx_tx->e_anchor->chunk); + seq_printf(file, "%d,0x%llx,0x%llx,%d,0x%llx,0x%llx\n", + rec_type, DR_DBG_PTR_TO_ID(matcher_rx_tx), + matcher_id, matcher_rx_tx->num_of_builders, + dr_dump_icm_to_idx(s_icm_addr), + dr_dump_icm_to_idx(e_icm_addr)); + + for (i = 0; i < matcher_rx_tx->num_of_builders; i++) { + ret = dr_dump_matcher_builder(file, + &matcher_rx_tx->ste_builder[i], + i, is_rx, matcher_id); + if (ret < 0) + return ret; + } + + return 0; +} + +static int +dr_dump_matcher(struct seq_file *file, struct mlx5dr_matcher *matcher) +{ + struct mlx5dr_matcher_rx_tx *rx = &matcher->rx; + struct mlx5dr_matcher_rx_tx *tx = &matcher->tx; + u64 matcher_id; + int ret; + + matcher_id = DR_DBG_PTR_TO_ID(matcher); + + seq_printf(file, "%d,0x%llx,0x%llx,%d\n", DR_DUMP_REC_TYPE_MATCHER, + matcher_id, DR_DBG_PTR_TO_ID(matcher->tbl), matcher->prio); + + ret = dr_dump_matcher_mask(file, &matcher->mask, + matcher->match_criteria, matcher_id); + if (ret < 0) + return ret; + + if (rx->nic_tbl) { + ret = dr_dump_matcher_rx_tx(file, true, rx, matcher_id); + if (ret < 0) + return ret; + } + + if (tx->nic_tbl) { + ret = dr_dump_matcher_rx_tx(file, false, tx, matcher_id); + if (ret < 0) + return ret; + } + + return 0; +} + +static int +dr_dump_matcher_all(struct seq_file *file, struct mlx5dr_matcher *matcher) +{ + struct mlx5dr_rule *rule; + int ret; + + ret = dr_dump_matcher(file, matcher); + if (ret < 0) + return ret; + + list_for_each_entry(rule, &matcher->dbg_rule_list, dbg_node) { + ret = dr_dump_rule(file, rule); + if (ret < 0) + return ret; + } + + return 0; +} + +static int +dr_dump_table_rx_tx(struct seq_file *file, bool is_rx, + struct mlx5dr_table_rx_tx *table_rx_tx, + const u64 table_id) +{ + enum dr_dump_rec_type rec_type; + u64 s_icm_addr; + + rec_type = is_rx ? DR_DUMP_REC_TYPE_TABLE_RX : + DR_DUMP_REC_TYPE_TABLE_TX; + + s_icm_addr = mlx5dr_icm_pool_get_chunk_icm_addr(table_rx_tx->s_anchor->chunk); + seq_printf(file, "%d,0x%llx,0x%llx\n", rec_type, table_id, + dr_dump_icm_to_idx(s_icm_addr)); + + return 0; +} + +static int dr_dump_table(struct seq_file *file, struct mlx5dr_table *table) +{ + struct mlx5dr_table_rx_tx *rx = &table->rx; + struct mlx5dr_table_rx_tx *tx = &table->tx; + int ret; + + seq_printf(file, "%d,0x%llx,0x%llx,%d,%d\n", DR_DUMP_REC_TYPE_TABLE, + DR_DBG_PTR_TO_ID(table), DR_DBG_PTR_TO_ID(table->dmn), + table->table_type, table->level); + + if (rx->nic_dmn) { + ret = dr_dump_table_rx_tx(file, true, rx, + DR_DBG_PTR_TO_ID(table)); + if (ret < 0) + return ret; + } + + if (tx->nic_dmn) { + ret = dr_dump_table_rx_tx(file, false, tx, + DR_DBG_PTR_TO_ID(table)); + if (ret < 0) + return ret; + } + return 0; +} + +static int dr_dump_table_all(struct seq_file *file, struct mlx5dr_table *tbl) +{ + struct mlx5dr_matcher *matcher; + int ret; + + ret = dr_dump_table(file, tbl); + if (ret < 0) + return ret; + + list_for_each_entry(matcher, &tbl->matcher_list, list_node) { + ret = dr_dump_matcher_all(file, matcher); + if (ret < 0) + return ret; + } + return 0; +} + +static int +dr_dump_send_ring(struct seq_file *file, struct mlx5dr_send_ring *ring, + const u64 domain_id) +{ + seq_printf(file, "%d,0x%llx,0x%llx,0x%x,0x%x\n", + DR_DUMP_REC_TYPE_DOMAIN_SEND_RING, DR_DBG_PTR_TO_ID(ring), + domain_id, ring->cq->mcq.cqn, ring->qp->qpn); + return 0; +} + +static int +dr_dump_domain_info_flex_parser(struct seq_file *file, + const char *flex_parser_name, + const u8 flex_parser_value, + const u64 domain_id) +{ + seq_printf(file, "%d,0x%llx,%s,0x%x\n", + DR_DUMP_REC_TYPE_DOMAIN_INFO_FLEX_PARSER, domain_id, + flex_parser_name, flex_parser_value); + return 0; +} + +static int +dr_dump_domain_info_caps(struct seq_file *file, struct mlx5dr_cmd_caps *caps, + const u64 domain_id) +{ + struct mlx5dr_cmd_vport_cap *vport_caps; + unsigned long i, vports_num; + + xa_for_each(&caps->vports.vports_caps_xa, vports_num, vport_caps) + ; /* count the number of vports in xarray */ + + seq_printf(file, "%d,0x%llx,0x%x,0x%llx,0x%llx,0x%x,%lu,%d\n", + DR_DUMP_REC_TYPE_DOMAIN_INFO_CAPS, domain_id, caps->gvmi, + caps->nic_rx_drop_address, caps->nic_tx_drop_address, + caps->flex_protocols, vports_num, caps->eswitch_manager); + + xa_for_each(&caps->vports.vports_caps_xa, i, vport_caps) { + vport_caps = xa_load(&caps->vports.vports_caps_xa, i); + + seq_printf(file, "%d,0x%llx,%lu,0x%x,0x%llx,0x%llx\n", + DR_DUMP_REC_TYPE_DOMAIN_INFO_VPORT, domain_id, i, + vport_caps->vport_gvmi, vport_caps->icm_address_rx, + vport_caps->icm_address_tx); + } + return 0; +} + +static int +dr_dump_domain_info(struct seq_file *file, struct mlx5dr_domain_info *info, + const u64 domain_id) +{ + int ret; + + ret = dr_dump_domain_info_caps(file, &info->caps, domain_id); + if (ret < 0) + return ret; + + ret = dr_dump_domain_info_flex_parser(file, "icmp_dw0", + info->caps.flex_parser_id_icmp_dw0, + domain_id); + if (ret < 0) + return ret; + + ret = dr_dump_domain_info_flex_parser(file, "icmp_dw1", + info->caps.flex_parser_id_icmp_dw1, + domain_id); + if (ret < 0) + return ret; + + ret = dr_dump_domain_info_flex_parser(file, "icmpv6_dw0", + info->caps.flex_parser_id_icmpv6_dw0, + domain_id); + if (ret < 0) + return ret; + + ret = dr_dump_domain_info_flex_parser(file, "icmpv6_dw1", + info->caps.flex_parser_id_icmpv6_dw1, + domain_id); + if (ret < 0) + return ret; + + return 0; +} + +static int +dr_dump_domain(struct seq_file *file, struct mlx5dr_domain *dmn) +{ + u64 domain_id = DR_DBG_PTR_TO_ID(dmn); + int ret; + + seq_printf(file, "%d,0x%llx,%d,0%x,%d,%s\n", DR_DUMP_REC_TYPE_DOMAIN, + domain_id, dmn->type, dmn->info.caps.gvmi, + dmn->info.supp_sw_steering, pci_name(dmn->mdev->pdev)); + + ret = dr_dump_domain_info(file, &dmn->info, domain_id); + if (ret < 0) + return ret; + + if (dmn->info.supp_sw_steering) { + ret = dr_dump_send_ring(file, dmn->send_ring, domain_id); + if (ret < 0) + return ret; + } + + return 0; +} + +static int dr_dump_domain_all(struct seq_file *file, struct mlx5dr_domain *dmn) +{ + struct mlx5dr_table *tbl; + int ret; + + mutex_lock(&dmn->dump_info.dbg_mutex); + mlx5dr_domain_lock(dmn); + + ret = dr_dump_domain(file, dmn); + if (ret < 0) + goto unlock_mutex; + + list_for_each_entry(tbl, &dmn->dbg_tbl_list, dbg_node) { + ret = dr_dump_table_all(file, tbl); + if (ret < 0) + break; + } + +unlock_mutex: + mlx5dr_domain_unlock(dmn); + mutex_unlock(&dmn->dump_info.dbg_mutex); + return ret; +} + +static int dr_dump_show(struct seq_file *file, void *priv) +{ + return dr_dump_domain_all(file, file->private); +} +DEFINE_SHOW_ATTRIBUTE(dr_dump); + +void mlx5dr_dbg_init_dump(struct mlx5dr_domain *dmn) +{ + struct mlx5_core_dev *dev = dmn->mdev; + char file_name[128]; + + if (dmn->type != MLX5DR_DOMAIN_TYPE_FDB) { + mlx5_core_warn(dev, + "Steering dump is not supported for NIC RX/TX domains\n"); + return; + } + + dmn->dump_info.steering_debugfs = + debugfs_create_dir("steering", mlx5_debugfs_get_dev_root(dev)); + dmn->dump_info.fdb_debugfs = + debugfs_create_dir("fdb", dmn->dump_info.steering_debugfs); + + sprintf(file_name, "dmn_%p", dmn); + debugfs_create_file(file_name, 0444, dmn->dump_info.fdb_debugfs, + dmn, &dr_dump_fops); + + INIT_LIST_HEAD(&dmn->dbg_tbl_list); + mutex_init(&dmn->dump_info.dbg_mutex); +} + +void mlx5dr_dbg_uninit_dump(struct mlx5dr_domain *dmn) +{ + debugfs_remove_recursive(dmn->dump_info.steering_debugfs); + mutex_destroy(&dmn->dump_info.dbg_mutex); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_dbg.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_dbg.h new file mode 100644 index 000000000..def6cf853 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_dbg.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +struct mlx5dr_dbg_dump_info { + struct mutex dbg_mutex; /* protect dbg lists */ + struct dentry *steering_debugfs; + struct dentry *fdb_debugfs; +}; + +void mlx5dr_dbg_init_dump(struct mlx5dr_domain *dmn); +void mlx5dr_dbg_uninit_dump(struct mlx5dr_domain *dmn); +void mlx5dr_dbg_tbl_add(struct mlx5dr_table *tbl); +void mlx5dr_dbg_tbl_del(struct mlx5dr_table *tbl); +void mlx5dr_dbg_rule_add(struct mlx5dr_rule *rule); +void mlx5dr_dbg_rule_del(struct mlx5dr_rule *rule); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c new file mode 100644 index 000000000..fc6ae49b5 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c @@ -0,0 +1,463 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2019 Mellanox Technologies. */ + +#include <linux/mlx5/eswitch.h> +#include <linux/err.h> +#include "dr_types.h" + +#define DR_DOMAIN_SW_STEERING_SUPPORTED(dmn, dmn_type) \ + ((dmn)->info.caps.dmn_type##_sw_owner || \ + ((dmn)->info.caps.dmn_type##_sw_owner_v2 && \ + (dmn)->info.caps.sw_format_ver <= MLX5_STEERING_FORMAT_CONNECTX_7)) + +static void dr_domain_init_csum_recalc_fts(struct mlx5dr_domain *dmn) +{ + /* Per vport cached FW FT for checksum recalculation, this + * recalculation is needed due to a HW bug in STEv0. + */ + xa_init(&dmn->csum_fts_xa); +} + +static void dr_domain_uninit_csum_recalc_fts(struct mlx5dr_domain *dmn) +{ + struct mlx5dr_fw_recalc_cs_ft *recalc_cs_ft; + unsigned long i; + + xa_for_each(&dmn->csum_fts_xa, i, recalc_cs_ft) { + if (recalc_cs_ft) + mlx5dr_fw_destroy_recalc_cs_ft(dmn, recalc_cs_ft); + } + + xa_destroy(&dmn->csum_fts_xa); +} + +int mlx5dr_domain_get_recalc_cs_ft_addr(struct mlx5dr_domain *dmn, + u16 vport_num, + u64 *rx_icm_addr) +{ + struct mlx5dr_fw_recalc_cs_ft *recalc_cs_ft; + int ret; + + recalc_cs_ft = xa_load(&dmn->csum_fts_xa, vport_num); + if (!recalc_cs_ft) { + /* Table hasn't been created yet */ + recalc_cs_ft = mlx5dr_fw_create_recalc_cs_ft(dmn, vport_num); + if (!recalc_cs_ft) + return -EINVAL; + + ret = xa_err(xa_store(&dmn->csum_fts_xa, vport_num, + recalc_cs_ft, GFP_KERNEL)); + if (ret) + return ret; + } + + *rx_icm_addr = recalc_cs_ft->rx_icm_addr; + + return 0; +} + +static int dr_domain_init_resources(struct mlx5dr_domain *dmn) +{ + int ret; + + dmn->ste_ctx = mlx5dr_ste_get_ctx(dmn->info.caps.sw_format_ver); + if (!dmn->ste_ctx) { + mlx5dr_err(dmn, "SW Steering on this device is unsupported\n"); + return -EOPNOTSUPP; + } + + ret = mlx5_core_alloc_pd(dmn->mdev, &dmn->pdn); + if (ret) { + mlx5dr_err(dmn, "Couldn't allocate PD, ret: %d", ret); + return ret; + } + + dmn->uar = mlx5_get_uars_page(dmn->mdev); + if (IS_ERR(dmn->uar)) { + mlx5dr_err(dmn, "Couldn't allocate UAR\n"); + ret = PTR_ERR(dmn->uar); + goto clean_pd; + } + + dmn->ste_icm_pool = mlx5dr_icm_pool_create(dmn, DR_ICM_TYPE_STE); + if (!dmn->ste_icm_pool) { + mlx5dr_err(dmn, "Couldn't get icm memory\n"); + ret = -ENOMEM; + goto clean_uar; + } + + dmn->action_icm_pool = mlx5dr_icm_pool_create(dmn, DR_ICM_TYPE_MODIFY_ACTION); + if (!dmn->action_icm_pool) { + mlx5dr_err(dmn, "Couldn't get action icm memory\n"); + ret = -ENOMEM; + goto free_ste_icm_pool; + } + + ret = mlx5dr_send_ring_alloc(dmn); + if (ret) { + mlx5dr_err(dmn, "Couldn't create send-ring\n"); + goto free_action_icm_pool; + } + + return 0; + +free_action_icm_pool: + mlx5dr_icm_pool_destroy(dmn->action_icm_pool); +free_ste_icm_pool: + mlx5dr_icm_pool_destroy(dmn->ste_icm_pool); +clean_uar: + mlx5_put_uars_page(dmn->mdev, dmn->uar); +clean_pd: + mlx5_core_dealloc_pd(dmn->mdev, dmn->pdn); + + return ret; +} + +static void dr_domain_uninit_resources(struct mlx5dr_domain *dmn) +{ + mlx5dr_send_ring_free(dmn, dmn->send_ring); + mlx5dr_icm_pool_destroy(dmn->action_icm_pool); + mlx5dr_icm_pool_destroy(dmn->ste_icm_pool); + mlx5_put_uars_page(dmn->mdev, dmn->uar); + mlx5_core_dealloc_pd(dmn->mdev, dmn->pdn); +} + +static void dr_domain_fill_uplink_caps(struct mlx5dr_domain *dmn, + struct mlx5dr_cmd_vport_cap *uplink_vport) +{ + struct mlx5dr_esw_caps *esw_caps = &dmn->info.caps.esw_caps; + + uplink_vport->num = MLX5_VPORT_UPLINK; + uplink_vport->icm_address_rx = esw_caps->uplink_icm_address_rx; + uplink_vport->icm_address_tx = esw_caps->uplink_icm_address_tx; + uplink_vport->vport_gvmi = 0; + uplink_vport->vhca_gvmi = dmn->info.caps.gvmi; +} + +static int dr_domain_query_vport(struct mlx5dr_domain *dmn, + u16 vport_number, + bool other_vport, + struct mlx5dr_cmd_vport_cap *vport_caps) +{ + int ret; + + ret = mlx5dr_cmd_query_esw_vport_context(dmn->mdev, + other_vport, + vport_number, + &vport_caps->icm_address_rx, + &vport_caps->icm_address_tx); + if (ret) + return ret; + + ret = mlx5dr_cmd_query_gvmi(dmn->mdev, + other_vport, + vport_number, + &vport_caps->vport_gvmi); + if (ret) + return ret; + + vport_caps->num = vport_number; + vport_caps->vhca_gvmi = dmn->info.caps.gvmi; + + return 0; +} + +static int dr_domain_query_esw_mngr(struct mlx5dr_domain *dmn) +{ + return dr_domain_query_vport(dmn, 0, false, + &dmn->info.caps.vports.esw_manager_caps); +} + +static void dr_domain_query_uplink(struct mlx5dr_domain *dmn) +{ + dr_domain_fill_uplink_caps(dmn, &dmn->info.caps.vports.uplink_caps); +} + +static struct mlx5dr_cmd_vport_cap * +dr_domain_add_vport_cap(struct mlx5dr_domain *dmn, u16 vport) +{ + struct mlx5dr_cmd_caps *caps = &dmn->info.caps; + struct mlx5dr_cmd_vport_cap *vport_caps; + int ret; + + vport_caps = kvzalloc(sizeof(*vport_caps), GFP_KERNEL); + if (!vport_caps) + return NULL; + + ret = dr_domain_query_vport(dmn, vport, true, vport_caps); + if (ret) { + kvfree(vport_caps); + return NULL; + } + + ret = xa_insert(&caps->vports.vports_caps_xa, vport, + vport_caps, GFP_KERNEL); + if (ret) { + mlx5dr_dbg(dmn, "Couldn't insert new vport into xarray (%d)\n", ret); + kvfree(vport_caps); + return ERR_PTR(ret); + } + + return vport_caps; +} + +static bool dr_domain_is_esw_mgr_vport(struct mlx5dr_domain *dmn, u16 vport) +{ + struct mlx5dr_cmd_caps *caps = &dmn->info.caps; + + return (caps->is_ecpf && vport == MLX5_VPORT_ECPF) || + (!caps->is_ecpf && vport == 0); +} + +struct mlx5dr_cmd_vport_cap * +mlx5dr_domain_get_vport_cap(struct mlx5dr_domain *dmn, u16 vport) +{ + struct mlx5dr_cmd_caps *caps = &dmn->info.caps; + struct mlx5dr_cmd_vport_cap *vport_caps; + + if (dr_domain_is_esw_mgr_vport(dmn, vport)) + return &caps->vports.esw_manager_caps; + + if (vport == MLX5_VPORT_UPLINK) + return &caps->vports.uplink_caps; + +vport_load: + vport_caps = xa_load(&caps->vports.vports_caps_xa, vport); + if (vport_caps) + return vport_caps; + + vport_caps = dr_domain_add_vport_cap(dmn, vport); + if (PTR_ERR(vport_caps) == -EBUSY) + /* caps were already stored by another thread */ + goto vport_load; + + return vport_caps; +} + +static void dr_domain_clear_vports(struct mlx5dr_domain *dmn) +{ + struct mlx5dr_cmd_vport_cap *vport_caps; + unsigned long i; + + xa_for_each(&dmn->info.caps.vports.vports_caps_xa, i, vport_caps) { + vport_caps = xa_erase(&dmn->info.caps.vports.vports_caps_xa, i); + kvfree(vport_caps); + } +} + +static int dr_domain_query_fdb_caps(struct mlx5_core_dev *mdev, + struct mlx5dr_domain *dmn) +{ + int ret; + + if (!dmn->info.caps.eswitch_manager) + return -EOPNOTSUPP; + + ret = mlx5dr_cmd_query_esw_caps(mdev, &dmn->info.caps.esw_caps); + if (ret) + return ret; + + dmn->info.caps.fdb_sw_owner = dmn->info.caps.esw_caps.sw_owner; + dmn->info.caps.fdb_sw_owner_v2 = dmn->info.caps.esw_caps.sw_owner_v2; + dmn->info.caps.esw_rx_drop_address = dmn->info.caps.esw_caps.drop_icm_address_rx; + dmn->info.caps.esw_tx_drop_address = dmn->info.caps.esw_caps.drop_icm_address_tx; + + xa_init(&dmn->info.caps.vports.vports_caps_xa); + + /* Query eswitch manager and uplink vports only. Rest of the + * vports (vport 0, VFs and SFs) will be queried dynamically. + */ + + ret = dr_domain_query_esw_mngr(dmn); + if (ret) { + mlx5dr_err(dmn, "Failed to query eswitch manager vport caps (err: %d)", ret); + goto free_vports_caps_xa; + } + + dr_domain_query_uplink(dmn); + + return 0; + +free_vports_caps_xa: + xa_destroy(&dmn->info.caps.vports.vports_caps_xa); + + return ret; +} + +static int dr_domain_caps_init(struct mlx5_core_dev *mdev, + struct mlx5dr_domain *dmn) +{ + struct mlx5dr_cmd_vport_cap *vport_cap; + int ret; + + if (MLX5_CAP_GEN(mdev, port_type) != MLX5_CAP_PORT_TYPE_ETH) { + mlx5dr_err(dmn, "Failed to allocate domain, bad link type\n"); + return -EOPNOTSUPP; + } + + ret = mlx5dr_cmd_query_device(mdev, &dmn->info.caps); + if (ret) + return ret; + + ret = dr_domain_query_fdb_caps(mdev, dmn); + if (ret) + return ret; + + switch (dmn->type) { + case MLX5DR_DOMAIN_TYPE_NIC_RX: + if (!DR_DOMAIN_SW_STEERING_SUPPORTED(dmn, rx)) + return -ENOTSUPP; + + dmn->info.supp_sw_steering = true; + dmn->info.rx.type = DR_DOMAIN_NIC_TYPE_RX; + dmn->info.rx.default_icm_addr = dmn->info.caps.nic_rx_drop_address; + dmn->info.rx.drop_icm_addr = dmn->info.caps.nic_rx_drop_address; + break; + case MLX5DR_DOMAIN_TYPE_NIC_TX: + if (!DR_DOMAIN_SW_STEERING_SUPPORTED(dmn, tx)) + return -ENOTSUPP; + + dmn->info.supp_sw_steering = true; + dmn->info.tx.type = DR_DOMAIN_NIC_TYPE_TX; + dmn->info.tx.default_icm_addr = dmn->info.caps.nic_tx_allow_address; + dmn->info.tx.drop_icm_addr = dmn->info.caps.nic_tx_drop_address; + break; + case MLX5DR_DOMAIN_TYPE_FDB: + if (!dmn->info.caps.eswitch_manager) + return -ENOTSUPP; + + if (!DR_DOMAIN_SW_STEERING_SUPPORTED(dmn, fdb)) + return -ENOTSUPP; + + dmn->info.rx.type = DR_DOMAIN_NIC_TYPE_RX; + dmn->info.tx.type = DR_DOMAIN_NIC_TYPE_TX; + vport_cap = &dmn->info.caps.vports.esw_manager_caps; + + dmn->info.supp_sw_steering = true; + dmn->info.tx.default_icm_addr = vport_cap->icm_address_tx; + dmn->info.rx.default_icm_addr = vport_cap->icm_address_rx; + dmn->info.rx.drop_icm_addr = dmn->info.caps.esw_rx_drop_address; + dmn->info.tx.drop_icm_addr = dmn->info.caps.esw_tx_drop_address; + break; + default: + mlx5dr_err(dmn, "Invalid domain\n"); + ret = -EINVAL; + break; + } + + return ret; +} + +static void dr_domain_caps_uninit(struct mlx5dr_domain *dmn) +{ + dr_domain_clear_vports(dmn); + xa_destroy(&dmn->info.caps.vports.vports_caps_xa); +} + +struct mlx5dr_domain * +mlx5dr_domain_create(struct mlx5_core_dev *mdev, enum mlx5dr_domain_type type) +{ + struct mlx5dr_domain *dmn; + int ret; + + if (type > MLX5DR_DOMAIN_TYPE_FDB) + return NULL; + + dmn = kzalloc(sizeof(*dmn), GFP_KERNEL); + if (!dmn) + return NULL; + + dmn->mdev = mdev; + dmn->type = type; + refcount_set(&dmn->refcount, 1); + mutex_init(&dmn->info.rx.mutex); + mutex_init(&dmn->info.tx.mutex); + + if (dr_domain_caps_init(mdev, dmn)) { + mlx5dr_err(dmn, "Failed init domain, no caps\n"); + goto free_domain; + } + + dmn->info.max_log_action_icm_sz = DR_CHUNK_SIZE_4K; + dmn->info.max_log_sw_icm_sz = min_t(u32, DR_CHUNK_SIZE_1024K, + dmn->info.caps.log_icm_size); + + if (!dmn->info.supp_sw_steering) { + mlx5dr_err(dmn, "SW steering is not supported\n"); + goto uninit_caps; + } + + /* Allocate resources */ + ret = dr_domain_init_resources(dmn); + if (ret) { + mlx5dr_err(dmn, "Failed init domain resources\n"); + goto uninit_caps; + } + + dr_domain_init_csum_recalc_fts(dmn); + mlx5dr_dbg_init_dump(dmn); + return dmn; + +uninit_caps: + dr_domain_caps_uninit(dmn); +free_domain: + kfree(dmn); + return NULL; +} + +/* Assure synchronization of the device steering tables with updates made by SW + * insertion. + */ +int mlx5dr_domain_sync(struct mlx5dr_domain *dmn, u32 flags) +{ + int ret = 0; + + if (flags & MLX5DR_DOMAIN_SYNC_FLAGS_SW) { + mlx5dr_domain_lock(dmn); + ret = mlx5dr_send_ring_force_drain(dmn); + mlx5dr_domain_unlock(dmn); + if (ret) { + mlx5dr_err(dmn, "Force drain failed flags: %d, ret: %d\n", + flags, ret); + return ret; + } + } + + if (flags & MLX5DR_DOMAIN_SYNC_FLAGS_HW) + ret = mlx5dr_cmd_sync_steering(dmn->mdev); + + return ret; +} + +int mlx5dr_domain_destroy(struct mlx5dr_domain *dmn) +{ + if (WARN_ON_ONCE(refcount_read(&dmn->refcount) > 1)) + return -EBUSY; + + /* make sure resources are not used by the hardware */ + mlx5dr_cmd_sync_steering(dmn->mdev); + mlx5dr_dbg_uninit_dump(dmn); + dr_domain_uninit_csum_recalc_fts(dmn); + dr_domain_uninit_resources(dmn); + dr_domain_caps_uninit(dmn); + mutex_destroy(&dmn->info.tx.mutex); + mutex_destroy(&dmn->info.rx.mutex); + kfree(dmn); + return 0; +} + +void mlx5dr_domain_set_peer(struct mlx5dr_domain *dmn, + struct mlx5dr_domain *peer_dmn) +{ + mlx5dr_domain_lock(dmn); + + if (dmn->peer_dmn) + refcount_dec(&dmn->peer_dmn->refcount); + + dmn->peer_dmn = peer_dmn; + + if (dmn->peer_dmn) + refcount_inc(&dmn->peer_dmn->refcount); + + mlx5dr_domain_unlock(dmn); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_fw.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_fw.c new file mode 100644 index 000000000..f05ef0cd5 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_fw.c @@ -0,0 +1,171 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2019 Mellanox Technologies. */ + +#include <linux/types.h> +#include "dr_types.h" + +struct mlx5dr_fw_recalc_cs_ft * +mlx5dr_fw_create_recalc_cs_ft(struct mlx5dr_domain *dmn, u16 vport_num) +{ + struct mlx5dr_cmd_create_flow_table_attr ft_attr = {}; + struct mlx5dr_fw_recalc_cs_ft *recalc_cs_ft; + u32 table_id, group_id, modify_hdr_id; + u64 rx_icm_addr, modify_ttl_action; + int ret; + + recalc_cs_ft = kzalloc(sizeof(*recalc_cs_ft), GFP_KERNEL); + if (!recalc_cs_ft) + return NULL; + + ft_attr.table_type = MLX5_FLOW_TABLE_TYPE_FDB; + ft_attr.level = dmn->info.caps.max_ft_level - 1; + ft_attr.term_tbl = true; + + ret = mlx5dr_cmd_create_flow_table(dmn->mdev, + &ft_attr, + &rx_icm_addr, + &table_id); + if (ret) { + mlx5dr_err(dmn, "Failed creating TTL W/A FW flow table %d\n", ret); + goto free_ttl_tbl; + } + + ret = mlx5dr_cmd_create_empty_flow_group(dmn->mdev, + MLX5_FLOW_TABLE_TYPE_FDB, + table_id, &group_id); + if (ret) { + mlx5dr_err(dmn, "Failed creating TTL W/A FW flow group %d\n", ret); + goto destroy_flow_table; + } + + /* Modify TTL action by adding zero to trigger CS recalculation */ + modify_ttl_action = 0; + MLX5_SET(set_action_in, &modify_ttl_action, action_type, MLX5_ACTION_TYPE_ADD); + MLX5_SET(set_action_in, &modify_ttl_action, field, MLX5_ACTION_IN_FIELD_OUT_IP_TTL); + + ret = mlx5dr_cmd_alloc_modify_header(dmn->mdev, MLX5_FLOW_TABLE_TYPE_FDB, 1, + &modify_ttl_action, + &modify_hdr_id); + if (ret) { + mlx5dr_err(dmn, "Failed modify header TTL %d\n", ret); + goto destroy_flow_group; + } + + ret = mlx5dr_cmd_set_fte_modify_and_vport(dmn->mdev, + MLX5_FLOW_TABLE_TYPE_FDB, + table_id, group_id, modify_hdr_id, + vport_num); + if (ret) { + mlx5dr_err(dmn, "Failed setting TTL W/A flow table entry %d\n", ret); + goto dealloc_modify_header; + } + + recalc_cs_ft->modify_hdr_id = modify_hdr_id; + recalc_cs_ft->rx_icm_addr = rx_icm_addr; + recalc_cs_ft->table_id = table_id; + recalc_cs_ft->group_id = group_id; + + return recalc_cs_ft; + +dealloc_modify_header: + mlx5dr_cmd_dealloc_modify_header(dmn->mdev, modify_hdr_id); +destroy_flow_group: + mlx5dr_cmd_destroy_flow_group(dmn->mdev, + MLX5_FLOW_TABLE_TYPE_FDB, + table_id, group_id); +destroy_flow_table: + mlx5dr_cmd_destroy_flow_table(dmn->mdev, table_id, MLX5_FLOW_TABLE_TYPE_FDB); +free_ttl_tbl: + kfree(recalc_cs_ft); + return NULL; +} + +void mlx5dr_fw_destroy_recalc_cs_ft(struct mlx5dr_domain *dmn, + struct mlx5dr_fw_recalc_cs_ft *recalc_cs_ft) +{ + mlx5dr_cmd_del_flow_table_entry(dmn->mdev, + MLX5_FLOW_TABLE_TYPE_FDB, + recalc_cs_ft->table_id); + mlx5dr_cmd_dealloc_modify_header(dmn->mdev, recalc_cs_ft->modify_hdr_id); + mlx5dr_cmd_destroy_flow_group(dmn->mdev, + MLX5_FLOW_TABLE_TYPE_FDB, + recalc_cs_ft->table_id, + recalc_cs_ft->group_id); + mlx5dr_cmd_destroy_flow_table(dmn->mdev, + recalc_cs_ft->table_id, + MLX5_FLOW_TABLE_TYPE_FDB); + + kfree(recalc_cs_ft); +} + +int mlx5dr_fw_create_md_tbl(struct mlx5dr_domain *dmn, + struct mlx5dr_cmd_flow_destination_hw_info *dest, + int num_dest, + bool reformat_req, + u32 *tbl_id, + u32 *group_id, + bool ignore_flow_level, + u32 flow_source) +{ + struct mlx5dr_cmd_create_flow_table_attr ft_attr = {}; + struct mlx5dr_cmd_fte_info fte_info = {}; + u32 val[MLX5_ST_SZ_DW_MATCH_PARAM] = {}; + struct mlx5dr_cmd_ft_info ft_info = {}; + int ret; + + ft_attr.table_type = MLX5_FLOW_TABLE_TYPE_FDB; + ft_attr.level = min_t(int, dmn->info.caps.max_ft_level - 2, + MLX5_FT_MAX_MULTIPATH_LEVEL); + ft_attr.reformat_en = reformat_req; + ft_attr.decap_en = reformat_req; + + ret = mlx5dr_cmd_create_flow_table(dmn->mdev, &ft_attr, NULL, tbl_id); + if (ret) { + mlx5dr_err(dmn, "Failed creating multi dest FW flow table %d\n", ret); + return ret; + } + + ret = mlx5dr_cmd_create_empty_flow_group(dmn->mdev, + MLX5_FLOW_TABLE_TYPE_FDB, + *tbl_id, group_id); + if (ret) { + mlx5dr_err(dmn, "Failed creating multi dest FW flow group %d\n", ret); + goto free_flow_table; + } + + ft_info.id = *tbl_id; + ft_info.type = FS_FT_FDB; + fte_info.action.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + fte_info.dests_size = num_dest; + fte_info.val = val; + fte_info.dest_arr = dest; + fte_info.ignore_flow_level = ignore_flow_level; + fte_info.flow_context.flow_source = flow_source; + + ret = mlx5dr_cmd_set_fte(dmn->mdev, 0, 0, &ft_info, *group_id, &fte_info); + if (ret) { + mlx5dr_err(dmn, "Failed setting fte into table %d\n", ret); + goto free_flow_group; + } + + return 0; + +free_flow_group: + mlx5dr_cmd_destroy_flow_group(dmn->mdev, MLX5_FLOW_TABLE_TYPE_FDB, + *tbl_id, *group_id); +free_flow_table: + mlx5dr_cmd_destroy_flow_table(dmn->mdev, *tbl_id, + MLX5_FLOW_TABLE_TYPE_FDB); + return ret; +} + +void mlx5dr_fw_destroy_md_tbl(struct mlx5dr_domain *dmn, + u32 tbl_id, u32 group_id) +{ + mlx5dr_cmd_del_flow_table_entry(dmn->mdev, FS_FT_FDB, tbl_id); + mlx5dr_cmd_destroy_flow_group(dmn->mdev, + MLX5_FLOW_TABLE_TYPE_FDB, + tbl_id, group_id); + mlx5dr_cmd_destroy_flow_table(dmn->mdev, tbl_id, + MLX5_FLOW_TABLE_TYPE_FDB); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_icm_pool.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_icm_pool.c new file mode 100644 index 000000000..4ca67fa24 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_icm_pool.c @@ -0,0 +1,520 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2019 Mellanox Technologies. */ + +#include "dr_types.h" + +#define DR_ICM_MODIFY_HDR_ALIGN_BASE 64 + +struct mlx5dr_icm_pool { + enum mlx5dr_icm_type icm_type; + enum mlx5dr_icm_chunk_size max_log_chunk_sz; + struct mlx5dr_domain *dmn; + /* memory management */ + struct mutex mutex; /* protect the ICM pool and ICM buddy */ + struct list_head buddy_mem_list; + u64 hot_memory_size; +}; + +struct mlx5dr_icm_dm { + u32 obj_id; + enum mlx5_sw_icm_type type; + phys_addr_t addr; + size_t length; +}; + +struct mlx5dr_icm_mr { + u32 mkey; + struct mlx5dr_icm_dm dm; + struct mlx5dr_domain *dmn; + size_t length; + u64 icm_start_addr; +}; + +static int dr_icm_create_dm_mkey(struct mlx5_core_dev *mdev, + u32 pd, u64 length, u64 start_addr, int mode, + u32 *mkey) +{ + u32 inlen = MLX5_ST_SZ_BYTES(create_mkey_in); + u32 in[MLX5_ST_SZ_DW(create_mkey_in)] = {}; + void *mkc; + + mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); + + MLX5_SET(mkc, mkc, access_mode_1_0, mode); + MLX5_SET(mkc, mkc, access_mode_4_2, (mode >> 2) & 0x7); + MLX5_SET(mkc, mkc, lw, 1); + MLX5_SET(mkc, mkc, lr, 1); + if (mode == MLX5_MKC_ACCESS_MODE_SW_ICM) { + MLX5_SET(mkc, mkc, rw, 1); + MLX5_SET(mkc, mkc, rr, 1); + } + + MLX5_SET64(mkc, mkc, len, length); + MLX5_SET(mkc, mkc, pd, pd); + MLX5_SET(mkc, mkc, qpn, 0xffffff); + MLX5_SET64(mkc, mkc, start_addr, start_addr); + + return mlx5_core_create_mkey(mdev, mkey, in, inlen); +} + +u64 mlx5dr_icm_pool_get_chunk_mr_addr(struct mlx5dr_icm_chunk *chunk) +{ + u32 offset = mlx5dr_icm_pool_dm_type_to_entry_size(chunk->buddy_mem->pool->icm_type); + + return (u64)offset * chunk->seg; +} + +u32 mlx5dr_icm_pool_get_chunk_rkey(struct mlx5dr_icm_chunk *chunk) +{ + return chunk->buddy_mem->icm_mr->mkey; +} + +u64 mlx5dr_icm_pool_get_chunk_icm_addr(struct mlx5dr_icm_chunk *chunk) +{ + u32 size = mlx5dr_icm_pool_dm_type_to_entry_size(chunk->buddy_mem->pool->icm_type); + + return (u64)chunk->buddy_mem->icm_mr->icm_start_addr + size * chunk->seg; +} + +u32 mlx5dr_icm_pool_get_chunk_byte_size(struct mlx5dr_icm_chunk *chunk) +{ + return mlx5dr_icm_pool_chunk_size_to_byte(chunk->size, + chunk->buddy_mem->pool->icm_type); +} + +u32 mlx5dr_icm_pool_get_chunk_num_of_entries(struct mlx5dr_icm_chunk *chunk) +{ + return mlx5dr_icm_pool_chunk_size_to_entries(chunk->size); +} + +static struct mlx5dr_icm_mr * +dr_icm_pool_mr_create(struct mlx5dr_icm_pool *pool) +{ + struct mlx5_core_dev *mdev = pool->dmn->mdev; + enum mlx5_sw_icm_type dm_type; + struct mlx5dr_icm_mr *icm_mr; + size_t log_align_base; + int err; + + icm_mr = kvzalloc(sizeof(*icm_mr), GFP_KERNEL); + if (!icm_mr) + return NULL; + + icm_mr->dmn = pool->dmn; + + icm_mr->dm.length = mlx5dr_icm_pool_chunk_size_to_byte(pool->max_log_chunk_sz, + pool->icm_type); + + if (pool->icm_type == DR_ICM_TYPE_STE) { + dm_type = MLX5_SW_ICM_TYPE_STEERING; + log_align_base = ilog2(icm_mr->dm.length); + } else { + dm_type = MLX5_SW_ICM_TYPE_HEADER_MODIFY; + /* Align base is 64B */ + log_align_base = ilog2(DR_ICM_MODIFY_HDR_ALIGN_BASE); + } + icm_mr->dm.type = dm_type; + + err = mlx5_dm_sw_icm_alloc(mdev, icm_mr->dm.type, icm_mr->dm.length, + log_align_base, 0, &icm_mr->dm.addr, + &icm_mr->dm.obj_id); + if (err) { + mlx5dr_err(pool->dmn, "Failed to allocate SW ICM memory, err (%d)\n", err); + goto free_icm_mr; + } + + /* Register device memory */ + err = dr_icm_create_dm_mkey(mdev, pool->dmn->pdn, + icm_mr->dm.length, + icm_mr->dm.addr, + MLX5_MKC_ACCESS_MODE_SW_ICM, + &icm_mr->mkey); + if (err) { + mlx5dr_err(pool->dmn, "Failed to create SW ICM MKEY, err (%d)\n", err); + goto free_dm; + } + + icm_mr->icm_start_addr = icm_mr->dm.addr; + + if (icm_mr->icm_start_addr & (BIT(log_align_base) - 1)) { + mlx5dr_err(pool->dmn, "Failed to get Aligned ICM mem (asked: %zu)\n", + log_align_base); + goto free_mkey; + } + + return icm_mr; + +free_mkey: + mlx5_core_destroy_mkey(mdev, icm_mr->mkey); +free_dm: + mlx5_dm_sw_icm_dealloc(mdev, icm_mr->dm.type, icm_mr->dm.length, 0, + icm_mr->dm.addr, icm_mr->dm.obj_id); +free_icm_mr: + kvfree(icm_mr); + return NULL; +} + +static void dr_icm_pool_mr_destroy(struct mlx5dr_icm_mr *icm_mr) +{ + struct mlx5_core_dev *mdev = icm_mr->dmn->mdev; + struct mlx5dr_icm_dm *dm = &icm_mr->dm; + + mlx5_core_destroy_mkey(mdev, icm_mr->mkey); + mlx5_dm_sw_icm_dealloc(mdev, dm->type, dm->length, 0, + dm->addr, dm->obj_id); + kvfree(icm_mr); +} + +static int dr_icm_buddy_get_ste_size(struct mlx5dr_icm_buddy_mem *buddy) +{ + /* We support only one type of STE size, both for ConnectX-5 and later + * devices. Once the support for match STE which has a larger tag is + * added (32B instead of 16B), the STE size for devices later than + * ConnectX-5 needs to account for that. + */ + return DR_STE_SIZE_REDUCED; +} + +static void dr_icm_chunk_ste_init(struct mlx5dr_icm_chunk *chunk, int offset) +{ + struct mlx5dr_icm_buddy_mem *buddy = chunk->buddy_mem; + int index = offset / DR_STE_SIZE; + + chunk->ste_arr = &buddy->ste_arr[index]; + chunk->miss_list = &buddy->miss_list[index]; + chunk->hw_ste_arr = buddy->hw_ste_arr + + index * dr_icm_buddy_get_ste_size(buddy); +} + +static void dr_icm_chunk_ste_cleanup(struct mlx5dr_icm_chunk *chunk) +{ + int num_of_entries = mlx5dr_icm_pool_get_chunk_num_of_entries(chunk); + struct mlx5dr_icm_buddy_mem *buddy = chunk->buddy_mem; + + memset(chunk->hw_ste_arr, 0, + num_of_entries * dr_icm_buddy_get_ste_size(buddy)); + memset(chunk->ste_arr, 0, + num_of_entries * sizeof(chunk->ste_arr[0])); +} + +static enum mlx5dr_icm_type +get_chunk_icm_type(struct mlx5dr_icm_chunk *chunk) +{ + return chunk->buddy_mem->pool->icm_type; +} + +static void dr_icm_chunk_destroy(struct mlx5dr_icm_chunk *chunk, + struct mlx5dr_icm_buddy_mem *buddy) +{ + enum mlx5dr_icm_type icm_type = get_chunk_icm_type(chunk); + + buddy->used_memory -= mlx5dr_icm_pool_get_chunk_byte_size(chunk); + list_del(&chunk->chunk_list); + + if (icm_type == DR_ICM_TYPE_STE) + dr_icm_chunk_ste_cleanup(chunk); + + kvfree(chunk); +} + +static int dr_icm_buddy_init_ste_cache(struct mlx5dr_icm_buddy_mem *buddy) +{ + int num_of_entries = + mlx5dr_icm_pool_chunk_size_to_entries(buddy->pool->max_log_chunk_sz); + + buddy->ste_arr = kvcalloc(num_of_entries, + sizeof(struct mlx5dr_ste), GFP_KERNEL); + if (!buddy->ste_arr) + return -ENOMEM; + + /* Preallocate full STE size on non-ConnectX-5 devices since + * we need to support both full and reduced with the same cache. + */ + buddy->hw_ste_arr = kvcalloc(num_of_entries, + dr_icm_buddy_get_ste_size(buddy), GFP_KERNEL); + if (!buddy->hw_ste_arr) + goto free_ste_arr; + + buddy->miss_list = kvmalloc(num_of_entries * sizeof(struct list_head), GFP_KERNEL); + if (!buddy->miss_list) + goto free_hw_ste_arr; + + return 0; + +free_hw_ste_arr: + kvfree(buddy->hw_ste_arr); +free_ste_arr: + kvfree(buddy->ste_arr); + return -ENOMEM; +} + +static void dr_icm_buddy_cleanup_ste_cache(struct mlx5dr_icm_buddy_mem *buddy) +{ + kvfree(buddy->ste_arr); + kvfree(buddy->hw_ste_arr); + kvfree(buddy->miss_list); +} + +static int dr_icm_buddy_create(struct mlx5dr_icm_pool *pool) +{ + struct mlx5dr_icm_buddy_mem *buddy; + struct mlx5dr_icm_mr *icm_mr; + + icm_mr = dr_icm_pool_mr_create(pool); + if (!icm_mr) + return -ENOMEM; + + buddy = kvzalloc(sizeof(*buddy), GFP_KERNEL); + if (!buddy) + goto free_mr; + + if (mlx5dr_buddy_init(buddy, pool->max_log_chunk_sz)) + goto err_free_buddy; + + buddy->icm_mr = icm_mr; + buddy->pool = pool; + + if (pool->icm_type == DR_ICM_TYPE_STE) { + /* Reduce allocations by preallocating and reusing the STE structures */ + if (dr_icm_buddy_init_ste_cache(buddy)) + goto err_cleanup_buddy; + } + + /* add it to the -start- of the list in order to search in it first */ + list_add(&buddy->list_node, &pool->buddy_mem_list); + + return 0; + +err_cleanup_buddy: + mlx5dr_buddy_cleanup(buddy); +err_free_buddy: + kvfree(buddy); +free_mr: + dr_icm_pool_mr_destroy(icm_mr); + return -ENOMEM; +} + +static void dr_icm_buddy_destroy(struct mlx5dr_icm_buddy_mem *buddy) +{ + struct mlx5dr_icm_chunk *chunk, *next; + + list_for_each_entry_safe(chunk, next, &buddy->hot_list, chunk_list) + dr_icm_chunk_destroy(chunk, buddy); + + list_for_each_entry_safe(chunk, next, &buddy->used_list, chunk_list) + dr_icm_chunk_destroy(chunk, buddy); + + dr_icm_pool_mr_destroy(buddy->icm_mr); + + mlx5dr_buddy_cleanup(buddy); + + if (buddy->pool->icm_type == DR_ICM_TYPE_STE) + dr_icm_buddy_cleanup_ste_cache(buddy); + + kvfree(buddy); +} + +static struct mlx5dr_icm_chunk * +dr_icm_chunk_create(struct mlx5dr_icm_pool *pool, + enum mlx5dr_icm_chunk_size chunk_size, + struct mlx5dr_icm_buddy_mem *buddy_mem_pool, + unsigned int seg) +{ + struct mlx5dr_icm_chunk *chunk; + int offset; + + chunk = kvzalloc(sizeof(*chunk), GFP_KERNEL); + if (!chunk) + return NULL; + + offset = mlx5dr_icm_pool_dm_type_to_entry_size(pool->icm_type) * seg; + + chunk->seg = seg; + chunk->size = chunk_size; + chunk->buddy_mem = buddy_mem_pool; + + if (pool->icm_type == DR_ICM_TYPE_STE) + dr_icm_chunk_ste_init(chunk, offset); + + buddy_mem_pool->used_memory += mlx5dr_icm_pool_get_chunk_byte_size(chunk); + INIT_LIST_HEAD(&chunk->chunk_list); + + /* chunk now is part of the used_list */ + list_add_tail(&chunk->chunk_list, &buddy_mem_pool->used_list); + + return chunk; +} + +static bool dr_icm_pool_is_sync_required(struct mlx5dr_icm_pool *pool) +{ + int allow_hot_size; + + /* sync when hot memory reaches half of the pool size */ + allow_hot_size = + mlx5dr_icm_pool_chunk_size_to_byte(pool->max_log_chunk_sz, + pool->icm_type) / 2; + + return pool->hot_memory_size > allow_hot_size; +} + +static int dr_icm_pool_sync_all_buddy_pools(struct mlx5dr_icm_pool *pool) +{ + struct mlx5dr_icm_buddy_mem *buddy, *tmp_buddy; + u32 num_entries; + int err; + + err = mlx5dr_cmd_sync_steering(pool->dmn->mdev); + if (err) { + mlx5dr_err(pool->dmn, "Failed to sync to HW (err: %d)\n", err); + return err; + } + + list_for_each_entry_safe(buddy, tmp_buddy, &pool->buddy_mem_list, list_node) { + struct mlx5dr_icm_chunk *chunk, *tmp_chunk; + + list_for_each_entry_safe(chunk, tmp_chunk, &buddy->hot_list, chunk_list) { + num_entries = mlx5dr_icm_pool_get_chunk_num_of_entries(chunk); + mlx5dr_buddy_free_mem(buddy, chunk->seg, ilog2(num_entries)); + pool->hot_memory_size -= mlx5dr_icm_pool_get_chunk_byte_size(chunk); + dr_icm_chunk_destroy(chunk, buddy); + } + + if (!buddy->used_memory && pool->icm_type == DR_ICM_TYPE_STE) + dr_icm_buddy_destroy(buddy); + } + + return 0; +} + +static int dr_icm_handle_buddies_get_mem(struct mlx5dr_icm_pool *pool, + enum mlx5dr_icm_chunk_size chunk_size, + struct mlx5dr_icm_buddy_mem **buddy, + unsigned int *seg) +{ + struct mlx5dr_icm_buddy_mem *buddy_mem_pool; + bool new_mem = false; + int err; + +alloc_buddy_mem: + /* find the next free place from the buddy list */ + list_for_each_entry(buddy_mem_pool, &pool->buddy_mem_list, list_node) { + err = mlx5dr_buddy_alloc_mem(buddy_mem_pool, + chunk_size, seg); + if (!err) + goto found; + + if (WARN_ON(new_mem)) { + /* We have new memory pool, first in the list */ + mlx5dr_err(pool->dmn, + "No memory for order: %d\n", + chunk_size); + goto out; + } + } + + /* no more available allocators in that pool, create new */ + err = dr_icm_buddy_create(pool); + if (err) { + mlx5dr_err(pool->dmn, + "Failed creating buddy for order %d\n", + chunk_size); + goto out; + } + + /* mark we have new memory, first in list */ + new_mem = true; + goto alloc_buddy_mem; + +found: + *buddy = buddy_mem_pool; +out: + return err; +} + +/* Allocate an ICM chunk, each chunk holds a piece of ICM memory and + * also memory used for HW STE management for optimizations. + */ +struct mlx5dr_icm_chunk * +mlx5dr_icm_alloc_chunk(struct mlx5dr_icm_pool *pool, + enum mlx5dr_icm_chunk_size chunk_size) +{ + struct mlx5dr_icm_chunk *chunk = NULL; + struct mlx5dr_icm_buddy_mem *buddy; + unsigned int seg; + int ret; + + if (chunk_size > pool->max_log_chunk_sz) + return NULL; + + mutex_lock(&pool->mutex); + /* find mem, get back the relevant buddy pool and seg in that mem */ + ret = dr_icm_handle_buddies_get_mem(pool, chunk_size, &buddy, &seg); + if (ret) + goto out; + + chunk = dr_icm_chunk_create(pool, chunk_size, buddy, seg); + if (!chunk) + goto out_err; + + goto out; + +out_err: + mlx5dr_buddy_free_mem(buddy, seg, chunk_size); +out: + mutex_unlock(&pool->mutex); + return chunk; +} + +void mlx5dr_icm_free_chunk(struct mlx5dr_icm_chunk *chunk) +{ + struct mlx5dr_icm_buddy_mem *buddy = chunk->buddy_mem; + struct mlx5dr_icm_pool *pool = buddy->pool; + + /* move the memory to the waiting list AKA "hot" */ + mutex_lock(&pool->mutex); + list_move_tail(&chunk->chunk_list, &buddy->hot_list); + pool->hot_memory_size += mlx5dr_icm_pool_get_chunk_byte_size(chunk); + + /* Check if we have chunks that are waiting for sync-ste */ + if (dr_icm_pool_is_sync_required(pool)) + dr_icm_pool_sync_all_buddy_pools(pool); + + mutex_unlock(&pool->mutex); +} + +struct mlx5dr_icm_pool *mlx5dr_icm_pool_create(struct mlx5dr_domain *dmn, + enum mlx5dr_icm_type icm_type) +{ + enum mlx5dr_icm_chunk_size max_log_chunk_sz; + struct mlx5dr_icm_pool *pool; + + if (icm_type == DR_ICM_TYPE_STE) + max_log_chunk_sz = dmn->info.max_log_sw_icm_sz; + else + max_log_chunk_sz = dmn->info.max_log_action_icm_sz; + + pool = kvzalloc(sizeof(*pool), GFP_KERNEL); + if (!pool) + return NULL; + + pool->dmn = dmn; + pool->icm_type = icm_type; + pool->max_log_chunk_sz = max_log_chunk_sz; + + INIT_LIST_HEAD(&pool->buddy_mem_list); + + mutex_init(&pool->mutex); + + return pool; +} + +void mlx5dr_icm_pool_destroy(struct mlx5dr_icm_pool *pool) +{ + struct mlx5dr_icm_buddy_mem *buddy, *tmp_buddy; + + list_for_each_entry_safe(buddy, tmp_buddy, &pool->buddy_mem_list, list_node) + dr_icm_buddy_destroy(buddy); + + mutex_destroy(&pool->mutex); + kvfree(pool); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c new file mode 100644 index 000000000..0726848eb --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c @@ -0,0 +1,1108 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2019 Mellanox Technologies. */ + +#include "dr_types.h" + +static bool dr_mask_is_smac_set(struct mlx5dr_match_spec *spec) +{ + return (spec->smac_47_16 || spec->smac_15_0); +} + +static bool dr_mask_is_dmac_set(struct mlx5dr_match_spec *spec) +{ + return (spec->dmac_47_16 || spec->dmac_15_0); +} + +static bool dr_mask_is_l3_base_set(struct mlx5dr_match_spec *spec) +{ + return (spec->ip_protocol || spec->frag || spec->tcp_flags || + spec->ip_ecn || spec->ip_dscp); +} + +static bool dr_mask_is_tcp_udp_base_set(struct mlx5dr_match_spec *spec) +{ + return (spec->tcp_sport || spec->tcp_dport || + spec->udp_sport || spec->udp_dport); +} + +static bool dr_mask_is_ipv4_set(struct mlx5dr_match_spec *spec) +{ + return (spec->dst_ip_31_0 || spec->src_ip_31_0); +} + +static bool dr_mask_is_ipv4_5_tuple_set(struct mlx5dr_match_spec *spec) +{ + return (dr_mask_is_l3_base_set(spec) || + dr_mask_is_tcp_udp_base_set(spec) || + dr_mask_is_ipv4_set(spec)); +} + +static bool dr_mask_is_eth_l2_tnl_set(struct mlx5dr_match_misc *misc) +{ + return misc->vxlan_vni; +} + +static bool dr_mask_is_ttl_set(struct mlx5dr_match_spec *spec) +{ + return spec->ttl_hoplimit; +} + +static bool dr_mask_is_ipv4_ihl_set(struct mlx5dr_match_spec *spec) +{ + return spec->ipv4_ihl; +} + +#define DR_MASK_IS_L2_DST(_spec, _misc, _inner_outer) (_spec.first_vid || \ + (_spec).first_cfi || (_spec).first_prio || (_spec).cvlan_tag || \ + (_spec).svlan_tag || (_spec).dmac_47_16 || (_spec).dmac_15_0 || \ + (_spec).ethertype || (_spec).ip_version || \ + (_misc)._inner_outer##_second_vid || \ + (_misc)._inner_outer##_second_cfi || \ + (_misc)._inner_outer##_second_prio || \ + (_misc)._inner_outer##_second_cvlan_tag || \ + (_misc)._inner_outer##_second_svlan_tag) + +#define DR_MASK_IS_ETH_L4_SET(_spec, _misc, _inner_outer) ( \ + dr_mask_is_l3_base_set(&(_spec)) || \ + dr_mask_is_tcp_udp_base_set(&(_spec)) || \ + dr_mask_is_ttl_set(&(_spec)) || \ + (_misc)._inner_outer##_ipv6_flow_label) + +#define DR_MASK_IS_ETH_L4_MISC_SET(_misc3, _inner_outer) ( \ + (_misc3)._inner_outer##_tcp_seq_num || \ + (_misc3)._inner_outer##_tcp_ack_num) + +#define DR_MASK_IS_FIRST_MPLS_SET(_misc2, _inner_outer) ( \ + (_misc2)._inner_outer##_first_mpls_label || \ + (_misc2)._inner_outer##_first_mpls_exp || \ + (_misc2)._inner_outer##_first_mpls_s_bos || \ + (_misc2)._inner_outer##_first_mpls_ttl) + +static bool dr_mask_is_tnl_gre_set(struct mlx5dr_match_misc *misc) +{ + return (misc->gre_key_h || misc->gre_key_l || + misc->gre_protocol || misc->gre_c_present || + misc->gre_k_present || misc->gre_s_present); +} + +#define DR_MASK_IS_OUTER_MPLS_OVER_GRE_SET(_misc) (\ + (_misc)->outer_first_mpls_over_gre_label || \ + (_misc)->outer_first_mpls_over_gre_exp || \ + (_misc)->outer_first_mpls_over_gre_s_bos || \ + (_misc)->outer_first_mpls_over_gre_ttl) + +#define DR_MASK_IS_OUTER_MPLS_OVER_UDP_SET(_misc) (\ + (_misc)->outer_first_mpls_over_udp_label || \ + (_misc)->outer_first_mpls_over_udp_exp || \ + (_misc)->outer_first_mpls_over_udp_s_bos || \ + (_misc)->outer_first_mpls_over_udp_ttl) + +static bool +dr_mask_is_vxlan_gpe_set(struct mlx5dr_match_misc3 *misc3) +{ + return (misc3->outer_vxlan_gpe_vni || + misc3->outer_vxlan_gpe_next_protocol || + misc3->outer_vxlan_gpe_flags); +} + +static bool +dr_matcher_supp_vxlan_gpe(struct mlx5dr_cmd_caps *caps) +{ + return (caps->sw_format_ver >= MLX5_STEERING_FORMAT_CONNECTX_6DX) || + (caps->flex_protocols & MLX5_FLEX_PARSER_VXLAN_GPE_ENABLED); +} + +static bool +dr_mask_is_tnl_vxlan_gpe(struct mlx5dr_match_param *mask, + struct mlx5dr_domain *dmn) +{ + return dr_mask_is_vxlan_gpe_set(&mask->misc3) && + dr_matcher_supp_vxlan_gpe(&dmn->info.caps); +} + +static bool dr_mask_is_tnl_geneve_set(struct mlx5dr_match_misc *misc) +{ + return misc->geneve_vni || + misc->geneve_oam || + misc->geneve_protocol_type || + misc->geneve_opt_len; +} + +static bool dr_mask_is_tnl_geneve_tlv_opt(struct mlx5dr_match_misc3 *misc3) +{ + return misc3->geneve_tlv_option_0_data; +} + +static bool +dr_matcher_supp_flex_parser_ok(struct mlx5dr_cmd_caps *caps) +{ + return caps->flex_parser_ok_bits_supp; +} + +static bool dr_mask_is_tnl_geneve_tlv_opt_exist_set(struct mlx5dr_match_misc *misc, + struct mlx5dr_domain *dmn) +{ + return dr_matcher_supp_flex_parser_ok(&dmn->info.caps) && + misc->geneve_tlv_option_0_exist; +} + +static bool +dr_matcher_supp_tnl_geneve(struct mlx5dr_cmd_caps *caps) +{ + return (caps->sw_format_ver >= MLX5_STEERING_FORMAT_CONNECTX_6DX) || + (caps->flex_protocols & MLX5_FLEX_PARSER_GENEVE_ENABLED); +} + +static bool +dr_mask_is_tnl_geneve(struct mlx5dr_match_param *mask, + struct mlx5dr_domain *dmn) +{ + return dr_mask_is_tnl_geneve_set(&mask->misc) && + dr_matcher_supp_tnl_geneve(&dmn->info.caps); +} + +static bool dr_mask_is_tnl_gtpu_set(struct mlx5dr_match_misc3 *misc3) +{ + return misc3->gtpu_msg_flags || misc3->gtpu_msg_type || misc3->gtpu_teid; +} + +static bool dr_matcher_supp_tnl_gtpu(struct mlx5dr_cmd_caps *caps) +{ + return caps->flex_protocols & MLX5_FLEX_PARSER_GTPU_ENABLED; +} + +static bool dr_mask_is_tnl_gtpu(struct mlx5dr_match_param *mask, + struct mlx5dr_domain *dmn) +{ + return dr_mask_is_tnl_gtpu_set(&mask->misc3) && + dr_matcher_supp_tnl_gtpu(&dmn->info.caps); +} + +static int dr_matcher_supp_tnl_gtpu_dw_0(struct mlx5dr_cmd_caps *caps) +{ + return caps->flex_protocols & MLX5_FLEX_PARSER_GTPU_DW_0_ENABLED; +} + +static bool dr_mask_is_tnl_gtpu_dw_0(struct mlx5dr_match_param *mask, + struct mlx5dr_domain *dmn) +{ + return mask->misc3.gtpu_dw_0 && + dr_matcher_supp_tnl_gtpu_dw_0(&dmn->info.caps); +} + +static int dr_matcher_supp_tnl_gtpu_teid(struct mlx5dr_cmd_caps *caps) +{ + return caps->flex_protocols & MLX5_FLEX_PARSER_GTPU_TEID_ENABLED; +} + +static bool dr_mask_is_tnl_gtpu_teid(struct mlx5dr_match_param *mask, + struct mlx5dr_domain *dmn) +{ + return mask->misc3.gtpu_teid && + dr_matcher_supp_tnl_gtpu_teid(&dmn->info.caps); +} + +static int dr_matcher_supp_tnl_gtpu_dw_2(struct mlx5dr_cmd_caps *caps) +{ + return caps->flex_protocols & MLX5_FLEX_PARSER_GTPU_DW_2_ENABLED; +} + +static bool dr_mask_is_tnl_gtpu_dw_2(struct mlx5dr_match_param *mask, + struct mlx5dr_domain *dmn) +{ + return mask->misc3.gtpu_dw_2 && + dr_matcher_supp_tnl_gtpu_dw_2(&dmn->info.caps); +} + +static int dr_matcher_supp_tnl_gtpu_first_ext(struct mlx5dr_cmd_caps *caps) +{ + return caps->flex_protocols & MLX5_FLEX_PARSER_GTPU_FIRST_EXT_DW_0_ENABLED; +} + +static bool dr_mask_is_tnl_gtpu_first_ext(struct mlx5dr_match_param *mask, + struct mlx5dr_domain *dmn) +{ + return mask->misc3.gtpu_first_ext_dw_0 && + dr_matcher_supp_tnl_gtpu_first_ext(&dmn->info.caps); +} + +static bool dr_mask_is_tnl_gtpu_flex_parser_0(struct mlx5dr_match_param *mask, + struct mlx5dr_domain *dmn) +{ + struct mlx5dr_cmd_caps *caps = &dmn->info.caps; + + return (dr_is_flex_parser_0_id(caps->flex_parser_id_gtpu_dw_0) && + dr_mask_is_tnl_gtpu_dw_0(mask, dmn)) || + (dr_is_flex_parser_0_id(caps->flex_parser_id_gtpu_teid) && + dr_mask_is_tnl_gtpu_teid(mask, dmn)) || + (dr_is_flex_parser_0_id(caps->flex_parser_id_gtpu_dw_2) && + dr_mask_is_tnl_gtpu_dw_2(mask, dmn)) || + (dr_is_flex_parser_0_id(caps->flex_parser_id_gtpu_first_ext_dw_0) && + dr_mask_is_tnl_gtpu_first_ext(mask, dmn)); +} + +static bool dr_mask_is_tnl_gtpu_flex_parser_1(struct mlx5dr_match_param *mask, + struct mlx5dr_domain *dmn) +{ + struct mlx5dr_cmd_caps *caps = &dmn->info.caps; + + return (dr_is_flex_parser_1_id(caps->flex_parser_id_gtpu_dw_0) && + dr_mask_is_tnl_gtpu_dw_0(mask, dmn)) || + (dr_is_flex_parser_1_id(caps->flex_parser_id_gtpu_teid) && + dr_mask_is_tnl_gtpu_teid(mask, dmn)) || + (dr_is_flex_parser_1_id(caps->flex_parser_id_gtpu_dw_2) && + dr_mask_is_tnl_gtpu_dw_2(mask, dmn)) || + (dr_is_flex_parser_1_id(caps->flex_parser_id_gtpu_first_ext_dw_0) && + dr_mask_is_tnl_gtpu_first_ext(mask, dmn)); +} + +static bool dr_mask_is_tnl_gtpu_any(struct mlx5dr_match_param *mask, + struct mlx5dr_domain *dmn) +{ + return dr_mask_is_tnl_gtpu_flex_parser_0(mask, dmn) || + dr_mask_is_tnl_gtpu_flex_parser_1(mask, dmn) || + dr_mask_is_tnl_gtpu(mask, dmn); +} + +static int dr_matcher_supp_icmp_v4(struct mlx5dr_cmd_caps *caps) +{ + return (caps->sw_format_ver >= MLX5_STEERING_FORMAT_CONNECTX_6DX) || + (caps->flex_protocols & MLX5_FLEX_PARSER_ICMP_V4_ENABLED); +} + +static int dr_matcher_supp_icmp_v6(struct mlx5dr_cmd_caps *caps) +{ + return (caps->sw_format_ver >= MLX5_STEERING_FORMAT_CONNECTX_6DX) || + (caps->flex_protocols & MLX5_FLEX_PARSER_ICMP_V6_ENABLED); +} + +static bool dr_mask_is_icmpv6_set(struct mlx5dr_match_misc3 *misc3) +{ + return (misc3->icmpv6_type || misc3->icmpv6_code || + misc3->icmpv6_header_data); +} + +static bool dr_mask_is_icmp(struct mlx5dr_match_param *mask, + struct mlx5dr_domain *dmn) +{ + if (DR_MASK_IS_ICMPV4_SET(&mask->misc3)) + return dr_matcher_supp_icmp_v4(&dmn->info.caps); + else if (dr_mask_is_icmpv6_set(&mask->misc3)) + return dr_matcher_supp_icmp_v6(&dmn->info.caps); + + return false; +} + +static bool dr_mask_is_wqe_metadata_set(struct mlx5dr_match_misc2 *misc2) +{ + return misc2->metadata_reg_a; +} + +static bool dr_mask_is_reg_c_0_3_set(struct mlx5dr_match_misc2 *misc2) +{ + return (misc2->metadata_reg_c_0 || misc2->metadata_reg_c_1 || + misc2->metadata_reg_c_2 || misc2->metadata_reg_c_3); +} + +static bool dr_mask_is_reg_c_4_7_set(struct mlx5dr_match_misc2 *misc2) +{ + return (misc2->metadata_reg_c_4 || misc2->metadata_reg_c_5 || + misc2->metadata_reg_c_6 || misc2->metadata_reg_c_7); +} + +static bool dr_mask_is_gvmi_or_qpn_set(struct mlx5dr_match_misc *misc) +{ + return (misc->source_sqn || misc->source_port); +} + +static bool dr_mask_is_flex_parser_id_0_3_set(u32 flex_parser_id, + u32 flex_parser_value) +{ + if (flex_parser_id) + return flex_parser_id <= DR_STE_MAX_FLEX_0_ID; + + /* Using flex_parser 0 means that id is zero, thus value must be set. */ + return flex_parser_value; +} + +static bool dr_mask_is_flex_parser_0_3_set(struct mlx5dr_match_misc4 *misc4) +{ + return (dr_mask_is_flex_parser_id_0_3_set(misc4->prog_sample_field_id_0, + misc4->prog_sample_field_value_0) || + dr_mask_is_flex_parser_id_0_3_set(misc4->prog_sample_field_id_1, + misc4->prog_sample_field_value_1) || + dr_mask_is_flex_parser_id_0_3_set(misc4->prog_sample_field_id_2, + misc4->prog_sample_field_value_2) || + dr_mask_is_flex_parser_id_0_3_set(misc4->prog_sample_field_id_3, + misc4->prog_sample_field_value_3)); +} + +static bool dr_mask_is_flex_parser_id_4_7_set(u32 flex_parser_id) +{ + return flex_parser_id > DR_STE_MAX_FLEX_0_ID && + flex_parser_id <= DR_STE_MAX_FLEX_1_ID; +} + +static bool dr_mask_is_flex_parser_4_7_set(struct mlx5dr_match_misc4 *misc4) +{ + return (dr_mask_is_flex_parser_id_4_7_set(misc4->prog_sample_field_id_0) || + dr_mask_is_flex_parser_id_4_7_set(misc4->prog_sample_field_id_1) || + dr_mask_is_flex_parser_id_4_7_set(misc4->prog_sample_field_id_2) || + dr_mask_is_flex_parser_id_4_7_set(misc4->prog_sample_field_id_3)); +} + +static int dr_matcher_supp_tnl_mpls_over_gre(struct mlx5dr_cmd_caps *caps) +{ + return caps->flex_protocols & MLX5_FLEX_PARSER_MPLS_OVER_GRE_ENABLED; +} + +static bool dr_mask_is_tnl_mpls_over_gre(struct mlx5dr_match_param *mask, + struct mlx5dr_domain *dmn) +{ + return DR_MASK_IS_OUTER_MPLS_OVER_GRE_SET(&mask->misc2) && + dr_matcher_supp_tnl_mpls_over_gre(&dmn->info.caps); +} + +static int dr_matcher_supp_tnl_mpls_over_udp(struct mlx5dr_cmd_caps *caps) +{ + return caps->flex_protocols & MLX5_FLEX_PARSER_MPLS_OVER_UDP_ENABLED; +} + +static bool dr_mask_is_tnl_mpls_over_udp(struct mlx5dr_match_param *mask, + struct mlx5dr_domain *dmn) +{ + return DR_MASK_IS_OUTER_MPLS_OVER_UDP_SET(&mask->misc2) && + dr_matcher_supp_tnl_mpls_over_udp(&dmn->info.caps); +} + +static bool dr_mask_is_tnl_header_0_1_set(struct mlx5dr_match_misc5 *misc5) +{ + return misc5->tunnel_header_0 || misc5->tunnel_header_1; +} + +int mlx5dr_matcher_select_builders(struct mlx5dr_matcher *matcher, + struct mlx5dr_matcher_rx_tx *nic_matcher, + enum mlx5dr_ipv outer_ipv, + enum mlx5dr_ipv inner_ipv) +{ + nic_matcher->ste_builder = + nic_matcher->ste_builder_arr[outer_ipv][inner_ipv]; + nic_matcher->num_of_builders = + nic_matcher->num_of_builders_arr[outer_ipv][inner_ipv]; + + if (!nic_matcher->num_of_builders) { + mlx5dr_dbg(matcher->tbl->dmn, + "Rule not supported on this matcher due to IP related fields\n"); + return -EINVAL; + } + + return 0; +} + +static int dr_matcher_set_ste_builders(struct mlx5dr_matcher *matcher, + struct mlx5dr_matcher_rx_tx *nic_matcher, + enum mlx5dr_ipv outer_ipv, + enum mlx5dr_ipv inner_ipv) +{ + struct mlx5dr_domain_rx_tx *nic_dmn = nic_matcher->nic_tbl->nic_dmn; + struct mlx5dr_domain *dmn = matcher->tbl->dmn; + struct mlx5dr_ste_ctx *ste_ctx = dmn->ste_ctx; + struct mlx5dr_match_param mask = {}; + bool allow_empty_match = false; + struct mlx5dr_ste_build *sb; + bool inner, rx; + int idx = 0; + int ret, i; + + sb = nic_matcher->ste_builder_arr[outer_ipv][inner_ipv]; + rx = nic_dmn->type == DR_DOMAIN_NIC_TYPE_RX; + + /* Create a temporary mask to track and clear used mask fields */ + if (matcher->match_criteria & DR_MATCHER_CRITERIA_OUTER) + mask.outer = matcher->mask.outer; + + if (matcher->match_criteria & DR_MATCHER_CRITERIA_MISC) + mask.misc = matcher->mask.misc; + + if (matcher->match_criteria & DR_MATCHER_CRITERIA_INNER) + mask.inner = matcher->mask.inner; + + if (matcher->match_criteria & DR_MATCHER_CRITERIA_MISC2) + mask.misc2 = matcher->mask.misc2; + + if (matcher->match_criteria & DR_MATCHER_CRITERIA_MISC3) + mask.misc3 = matcher->mask.misc3; + + if (matcher->match_criteria & DR_MATCHER_CRITERIA_MISC4) + mask.misc4 = matcher->mask.misc4; + + if (matcher->match_criteria & DR_MATCHER_CRITERIA_MISC5) + mask.misc5 = matcher->mask.misc5; + + ret = mlx5dr_ste_build_pre_check(dmn, matcher->match_criteria, + &matcher->mask, NULL); + if (ret) + return ret; + + /* Optimize RX pipe by reducing source port match, since + * the FDB RX part is connected only to the wire. + */ + if (dmn->type == MLX5DR_DOMAIN_TYPE_FDB && + rx && mask.misc.source_port) { + mask.misc.source_port = 0; + mask.misc.source_eswitch_owner_vhca_id = 0; + allow_empty_match = true; + } + + /* Outer */ + if (matcher->match_criteria & (DR_MATCHER_CRITERIA_OUTER | + DR_MATCHER_CRITERIA_MISC | + DR_MATCHER_CRITERIA_MISC2 | + DR_MATCHER_CRITERIA_MISC3 | + DR_MATCHER_CRITERIA_MISC5)) { + inner = false; + + if (dr_mask_is_wqe_metadata_set(&mask.misc2)) + mlx5dr_ste_build_general_purpose(ste_ctx, &sb[idx++], + &mask, inner, rx); + + if (dr_mask_is_reg_c_0_3_set(&mask.misc2)) + mlx5dr_ste_build_register_0(ste_ctx, &sb[idx++], + &mask, inner, rx); + + if (dr_mask_is_reg_c_4_7_set(&mask.misc2)) + mlx5dr_ste_build_register_1(ste_ctx, &sb[idx++], + &mask, inner, rx); + + if (dr_mask_is_gvmi_or_qpn_set(&mask.misc) && + (dmn->type == MLX5DR_DOMAIN_TYPE_FDB || + dmn->type == MLX5DR_DOMAIN_TYPE_NIC_RX)) { + mlx5dr_ste_build_src_gvmi_qpn(ste_ctx, &sb[idx++], + &mask, dmn, inner, rx); + } + + if (dr_mask_is_smac_set(&mask.outer) && + dr_mask_is_dmac_set(&mask.outer)) { + mlx5dr_ste_build_eth_l2_src_dst(ste_ctx, &sb[idx++], + &mask, inner, rx); + } + + if (dr_mask_is_smac_set(&mask.outer)) + mlx5dr_ste_build_eth_l2_src(ste_ctx, &sb[idx++], + &mask, inner, rx); + + if (DR_MASK_IS_L2_DST(mask.outer, mask.misc, outer)) + mlx5dr_ste_build_eth_l2_dst(ste_ctx, &sb[idx++], + &mask, inner, rx); + + if (outer_ipv == DR_RULE_IPV6) { + if (DR_MASK_IS_DST_IP_SET(&mask.outer)) + mlx5dr_ste_build_eth_l3_ipv6_dst(ste_ctx, &sb[idx++], + &mask, inner, rx); + + if (DR_MASK_IS_SRC_IP_SET(&mask.outer)) + mlx5dr_ste_build_eth_l3_ipv6_src(ste_ctx, &sb[idx++], + &mask, inner, rx); + + if (DR_MASK_IS_ETH_L4_SET(mask.outer, mask.misc, outer)) + mlx5dr_ste_build_eth_ipv6_l3_l4(ste_ctx, &sb[idx++], + &mask, inner, rx); + } else { + if (dr_mask_is_ipv4_5_tuple_set(&mask.outer)) + mlx5dr_ste_build_eth_l3_ipv4_5_tuple(ste_ctx, &sb[idx++], + &mask, inner, rx); + + if (dr_mask_is_ttl_set(&mask.outer) || + dr_mask_is_ipv4_ihl_set(&mask.outer)) + mlx5dr_ste_build_eth_l3_ipv4_misc(ste_ctx, &sb[idx++], + &mask, inner, rx); + } + + if (dr_mask_is_tnl_vxlan_gpe(&mask, dmn)) + mlx5dr_ste_build_tnl_vxlan_gpe(ste_ctx, &sb[idx++], + &mask, inner, rx); + else if (dr_mask_is_tnl_geneve(&mask, dmn)) { + mlx5dr_ste_build_tnl_geneve(ste_ctx, &sb[idx++], + &mask, inner, rx); + if (dr_mask_is_tnl_geneve_tlv_opt(&mask.misc3)) + mlx5dr_ste_build_tnl_geneve_tlv_opt(ste_ctx, &sb[idx++], + &mask, &dmn->info.caps, + inner, rx); + if (dr_mask_is_tnl_geneve_tlv_opt_exist_set(&mask.misc, dmn)) + mlx5dr_ste_build_tnl_geneve_tlv_opt_exist(ste_ctx, &sb[idx++], + &mask, &dmn->info.caps, + inner, rx); + } else if (dr_mask_is_tnl_gtpu_any(&mask, dmn)) { + if (dr_mask_is_tnl_gtpu_flex_parser_0(&mask, dmn)) + mlx5dr_ste_build_tnl_gtpu_flex_parser_0(ste_ctx, &sb[idx++], + &mask, &dmn->info.caps, + inner, rx); + + if (dr_mask_is_tnl_gtpu_flex_parser_1(&mask, dmn)) + mlx5dr_ste_build_tnl_gtpu_flex_parser_1(ste_ctx, &sb[idx++], + &mask, &dmn->info.caps, + inner, rx); + + if (dr_mask_is_tnl_gtpu(&mask, dmn)) + mlx5dr_ste_build_tnl_gtpu(ste_ctx, &sb[idx++], + &mask, inner, rx); + } else if (dr_mask_is_tnl_header_0_1_set(&mask.misc5)) { + mlx5dr_ste_build_tnl_header_0_1(ste_ctx, &sb[idx++], + &mask, inner, rx); + } + + if (DR_MASK_IS_ETH_L4_MISC_SET(mask.misc3, outer)) + mlx5dr_ste_build_eth_l4_misc(ste_ctx, &sb[idx++], + &mask, inner, rx); + + if (DR_MASK_IS_FIRST_MPLS_SET(mask.misc2, outer)) + mlx5dr_ste_build_mpls(ste_ctx, &sb[idx++], + &mask, inner, rx); + + if (dr_mask_is_tnl_mpls_over_gre(&mask, dmn)) + mlx5dr_ste_build_tnl_mpls_over_gre(ste_ctx, &sb[idx++], + &mask, &dmn->info.caps, + inner, rx); + else if (dr_mask_is_tnl_mpls_over_udp(&mask, dmn)) + mlx5dr_ste_build_tnl_mpls_over_udp(ste_ctx, &sb[idx++], + &mask, &dmn->info.caps, + inner, rx); + + if (dr_mask_is_icmp(&mask, dmn)) + mlx5dr_ste_build_icmp(ste_ctx, &sb[idx++], + &mask, &dmn->info.caps, + inner, rx); + + if (dr_mask_is_tnl_gre_set(&mask.misc)) + mlx5dr_ste_build_tnl_gre(ste_ctx, &sb[idx++], + &mask, inner, rx); + } + + /* Inner */ + if (matcher->match_criteria & (DR_MATCHER_CRITERIA_INNER | + DR_MATCHER_CRITERIA_MISC | + DR_MATCHER_CRITERIA_MISC2 | + DR_MATCHER_CRITERIA_MISC3)) { + inner = true; + + if (dr_mask_is_eth_l2_tnl_set(&mask.misc)) + mlx5dr_ste_build_eth_l2_tnl(ste_ctx, &sb[idx++], + &mask, inner, rx); + + if (dr_mask_is_smac_set(&mask.inner) && + dr_mask_is_dmac_set(&mask.inner)) { + mlx5dr_ste_build_eth_l2_src_dst(ste_ctx, &sb[idx++], + &mask, inner, rx); + } + + if (dr_mask_is_smac_set(&mask.inner)) + mlx5dr_ste_build_eth_l2_src(ste_ctx, &sb[idx++], + &mask, inner, rx); + + if (DR_MASK_IS_L2_DST(mask.inner, mask.misc, inner)) + mlx5dr_ste_build_eth_l2_dst(ste_ctx, &sb[idx++], + &mask, inner, rx); + + if (inner_ipv == DR_RULE_IPV6) { + if (DR_MASK_IS_DST_IP_SET(&mask.inner)) + mlx5dr_ste_build_eth_l3_ipv6_dst(ste_ctx, &sb[idx++], + &mask, inner, rx); + + if (DR_MASK_IS_SRC_IP_SET(&mask.inner)) + mlx5dr_ste_build_eth_l3_ipv6_src(ste_ctx, &sb[idx++], + &mask, inner, rx); + + if (DR_MASK_IS_ETH_L4_SET(mask.inner, mask.misc, inner)) + mlx5dr_ste_build_eth_ipv6_l3_l4(ste_ctx, &sb[idx++], + &mask, inner, rx); + } else { + if (dr_mask_is_ipv4_5_tuple_set(&mask.inner)) + mlx5dr_ste_build_eth_l3_ipv4_5_tuple(ste_ctx, &sb[idx++], + &mask, inner, rx); + + if (dr_mask_is_ttl_set(&mask.inner) || + dr_mask_is_ipv4_ihl_set(&mask.inner)) + mlx5dr_ste_build_eth_l3_ipv4_misc(ste_ctx, &sb[idx++], + &mask, inner, rx); + } + + if (DR_MASK_IS_ETH_L4_MISC_SET(mask.misc3, inner)) + mlx5dr_ste_build_eth_l4_misc(ste_ctx, &sb[idx++], + &mask, inner, rx); + + if (DR_MASK_IS_FIRST_MPLS_SET(mask.misc2, inner)) + mlx5dr_ste_build_mpls(ste_ctx, &sb[idx++], + &mask, inner, rx); + + if (dr_mask_is_tnl_mpls_over_gre(&mask, dmn)) + mlx5dr_ste_build_tnl_mpls_over_gre(ste_ctx, &sb[idx++], + &mask, &dmn->info.caps, + inner, rx); + else if (dr_mask_is_tnl_mpls_over_udp(&mask, dmn)) + mlx5dr_ste_build_tnl_mpls_over_udp(ste_ctx, &sb[idx++], + &mask, &dmn->info.caps, + inner, rx); + } + + if (matcher->match_criteria & DR_MATCHER_CRITERIA_MISC4) { + if (dr_mask_is_flex_parser_0_3_set(&mask.misc4)) + mlx5dr_ste_build_flex_parser_0(ste_ctx, &sb[idx++], + &mask, false, rx); + + if (dr_mask_is_flex_parser_4_7_set(&mask.misc4)) + mlx5dr_ste_build_flex_parser_1(ste_ctx, &sb[idx++], + &mask, false, rx); + } + + /* Empty matcher, takes all */ + if ((!idx && allow_empty_match) || + matcher->match_criteria == DR_MATCHER_CRITERIA_EMPTY) + mlx5dr_ste_build_empty_always_hit(&sb[idx++], rx); + + if (idx == 0) { + mlx5dr_err(dmn, "Cannot generate any valid rules from mask\n"); + return -EINVAL; + } + + /* Check that all mask fields were consumed */ + for (i = 0; i < sizeof(struct mlx5dr_match_param); i++) { + if (((u8 *)&mask)[i] != 0) { + mlx5dr_dbg(dmn, "Mask contains unsupported parameters\n"); + return -EOPNOTSUPP; + } + } + + nic_matcher->ste_builder = sb; + nic_matcher->num_of_builders_arr[outer_ipv][inner_ipv] = idx; + + return 0; +} + +static int dr_nic_matcher_connect(struct mlx5dr_domain *dmn, + struct mlx5dr_matcher_rx_tx *curr_nic_matcher, + struct mlx5dr_matcher_rx_tx *next_nic_matcher, + struct mlx5dr_matcher_rx_tx *prev_nic_matcher) +{ + struct mlx5dr_table_rx_tx *nic_tbl = curr_nic_matcher->nic_tbl; + struct mlx5dr_domain_rx_tx *nic_dmn = nic_tbl->nic_dmn; + struct mlx5dr_htbl_connect_info info; + struct mlx5dr_ste_htbl *prev_htbl; + int ret; + + /* Connect end anchor hash table to next_htbl or to the default address */ + if (next_nic_matcher) { + info.type = CONNECT_HIT; + info.hit_next_htbl = next_nic_matcher->s_htbl; + } else { + info.type = CONNECT_MISS; + info.miss_icm_addr = nic_tbl->default_icm_addr; + } + ret = mlx5dr_ste_htbl_init_and_postsend(dmn, nic_dmn, + curr_nic_matcher->e_anchor, + &info, info.type == CONNECT_HIT); + if (ret) + return ret; + + /* Connect start hash table to end anchor */ + info.type = CONNECT_MISS; + info.miss_icm_addr = mlx5dr_icm_pool_get_chunk_icm_addr(curr_nic_matcher->e_anchor->chunk); + ret = mlx5dr_ste_htbl_init_and_postsend(dmn, nic_dmn, + curr_nic_matcher->s_htbl, + &info, false); + if (ret) + return ret; + + /* Connect previous hash table to matcher start hash table */ + if (prev_nic_matcher) + prev_htbl = prev_nic_matcher->e_anchor; + else + prev_htbl = nic_tbl->s_anchor; + + info.type = CONNECT_HIT; + info.hit_next_htbl = curr_nic_matcher->s_htbl; + ret = mlx5dr_ste_htbl_init_and_postsend(dmn, nic_dmn, prev_htbl, + &info, true); + if (ret) + return ret; + + /* Update the pointing ste and next hash table */ + curr_nic_matcher->s_htbl->pointing_ste = prev_htbl->chunk->ste_arr; + prev_htbl->chunk->ste_arr[0].next_htbl = curr_nic_matcher->s_htbl; + + if (next_nic_matcher) { + next_nic_matcher->s_htbl->pointing_ste = + curr_nic_matcher->e_anchor->chunk->ste_arr; + curr_nic_matcher->e_anchor->chunk->ste_arr[0].next_htbl = + next_nic_matcher->s_htbl; + } + + return 0; +} + +int mlx5dr_matcher_add_to_tbl_nic(struct mlx5dr_domain *dmn, + struct mlx5dr_matcher_rx_tx *nic_matcher) +{ + struct mlx5dr_matcher_rx_tx *next_nic_matcher, *prev_nic_matcher, *tmp_nic_matcher; + struct mlx5dr_table_rx_tx *nic_tbl = nic_matcher->nic_tbl; + bool first = true; + int ret; + + /* If the nic matcher is already on its parent nic table list, + * then it is already connected to the chain of nic matchers. + */ + if (!list_empty(&nic_matcher->list_node)) + return 0; + + next_nic_matcher = NULL; + list_for_each_entry(tmp_nic_matcher, &nic_tbl->nic_matcher_list, list_node) { + if (tmp_nic_matcher->prio >= nic_matcher->prio) { + next_nic_matcher = tmp_nic_matcher; + break; + } + first = false; + } + + prev_nic_matcher = NULL; + if (next_nic_matcher && !first) + prev_nic_matcher = list_prev_entry(next_nic_matcher, list_node); + else if (!first) + prev_nic_matcher = list_last_entry(&nic_tbl->nic_matcher_list, + struct mlx5dr_matcher_rx_tx, + list_node); + + ret = dr_nic_matcher_connect(dmn, nic_matcher, + next_nic_matcher, prev_nic_matcher); + if (ret) + return ret; + + if (prev_nic_matcher) + list_add(&nic_matcher->list_node, &prev_nic_matcher->list_node); + else if (next_nic_matcher) + list_add_tail(&nic_matcher->list_node, &next_nic_matcher->list_node); + else + list_add(&nic_matcher->list_node, &nic_matcher->nic_tbl->nic_matcher_list); + + return ret; +} + +static void dr_matcher_uninit_nic(struct mlx5dr_matcher_rx_tx *nic_matcher) +{ + mlx5dr_htbl_put(nic_matcher->s_htbl); + mlx5dr_htbl_put(nic_matcher->e_anchor); +} + +static void dr_matcher_uninit_fdb(struct mlx5dr_matcher *matcher) +{ + dr_matcher_uninit_nic(&matcher->rx); + dr_matcher_uninit_nic(&matcher->tx); +} + +static void dr_matcher_uninit(struct mlx5dr_matcher *matcher) +{ + struct mlx5dr_domain *dmn = matcher->tbl->dmn; + + switch (dmn->type) { + case MLX5DR_DOMAIN_TYPE_NIC_RX: + dr_matcher_uninit_nic(&matcher->rx); + break; + case MLX5DR_DOMAIN_TYPE_NIC_TX: + dr_matcher_uninit_nic(&matcher->tx); + break; + case MLX5DR_DOMAIN_TYPE_FDB: + dr_matcher_uninit_fdb(matcher); + break; + default: + WARN_ON(true); + break; + } +} + +static int dr_matcher_set_all_ste_builders(struct mlx5dr_matcher *matcher, + struct mlx5dr_matcher_rx_tx *nic_matcher) +{ + struct mlx5dr_domain *dmn = matcher->tbl->dmn; + + dr_matcher_set_ste_builders(matcher, nic_matcher, DR_RULE_IPV4, DR_RULE_IPV4); + dr_matcher_set_ste_builders(matcher, nic_matcher, DR_RULE_IPV4, DR_RULE_IPV6); + dr_matcher_set_ste_builders(matcher, nic_matcher, DR_RULE_IPV6, DR_RULE_IPV4); + dr_matcher_set_ste_builders(matcher, nic_matcher, DR_RULE_IPV6, DR_RULE_IPV6); + + if (!nic_matcher->ste_builder) { + mlx5dr_err(dmn, "Cannot generate IPv4 or IPv6 rules with given mask\n"); + return -EINVAL; + } + + return 0; +} + +static int dr_matcher_init_nic(struct mlx5dr_matcher *matcher, + struct mlx5dr_matcher_rx_tx *nic_matcher) +{ + struct mlx5dr_domain *dmn = matcher->tbl->dmn; + int ret; + + nic_matcher->prio = matcher->prio; + INIT_LIST_HEAD(&nic_matcher->list_node); + + ret = dr_matcher_set_all_ste_builders(matcher, nic_matcher); + if (ret) + return ret; + + nic_matcher->e_anchor = mlx5dr_ste_htbl_alloc(dmn->ste_icm_pool, + DR_CHUNK_SIZE_1, + MLX5DR_STE_LU_TYPE_DONT_CARE, + 0); + if (!nic_matcher->e_anchor) + return -ENOMEM; + + nic_matcher->s_htbl = mlx5dr_ste_htbl_alloc(dmn->ste_icm_pool, + DR_CHUNK_SIZE_1, + nic_matcher->ste_builder[0].lu_type, + nic_matcher->ste_builder[0].byte_mask); + if (!nic_matcher->s_htbl) { + ret = -ENOMEM; + goto free_e_htbl; + } + + /* make sure the tables exist while empty */ + mlx5dr_htbl_get(nic_matcher->s_htbl); + mlx5dr_htbl_get(nic_matcher->e_anchor); + + return 0; + +free_e_htbl: + mlx5dr_ste_htbl_free(nic_matcher->e_anchor); + return ret; +} + +static int dr_matcher_init_fdb(struct mlx5dr_matcher *matcher) +{ + int ret; + + ret = dr_matcher_init_nic(matcher, &matcher->rx); + if (ret) + return ret; + + ret = dr_matcher_init_nic(matcher, &matcher->tx); + if (ret) + goto uninit_nic_rx; + + return 0; + +uninit_nic_rx: + dr_matcher_uninit_nic(&matcher->rx); + return ret; +} + +static int dr_matcher_copy_param(struct mlx5dr_matcher *matcher, + struct mlx5dr_match_parameters *mask) +{ + struct mlx5dr_domain *dmn = matcher->tbl->dmn; + struct mlx5dr_match_parameters consumed_mask; + int i, ret = 0; + + if (matcher->match_criteria >= DR_MATCHER_CRITERIA_MAX) { + mlx5dr_err(dmn, "Invalid match criteria attribute\n"); + return -EINVAL; + } + + if (mask) { + if (mask->match_sz > DR_SZ_MATCH_PARAM) { + mlx5dr_err(dmn, "Invalid match size attribute\n"); + return -EINVAL; + } + + consumed_mask.match_buf = kzalloc(mask->match_sz, GFP_KERNEL); + if (!consumed_mask.match_buf) + return -ENOMEM; + + consumed_mask.match_sz = mask->match_sz; + memcpy(consumed_mask.match_buf, mask->match_buf, mask->match_sz); + mlx5dr_ste_copy_param(matcher->match_criteria, + &matcher->mask, &consumed_mask, true); + + /* Check that all mask data was consumed */ + for (i = 0; i < consumed_mask.match_sz; i++) { + if (!((u8 *)consumed_mask.match_buf)[i]) + continue; + + mlx5dr_dbg(dmn, + "Match param mask contains unsupported parameters\n"); + ret = -EOPNOTSUPP; + break; + } + + kfree(consumed_mask.match_buf); + } + + return ret; +} + +static int dr_matcher_init(struct mlx5dr_matcher *matcher, + struct mlx5dr_match_parameters *mask) +{ + struct mlx5dr_table *tbl = matcher->tbl; + struct mlx5dr_domain *dmn = tbl->dmn; + int ret; + + ret = dr_matcher_copy_param(matcher, mask); + if (ret) + return ret; + + switch (dmn->type) { + case MLX5DR_DOMAIN_TYPE_NIC_RX: + matcher->rx.nic_tbl = &tbl->rx; + ret = dr_matcher_init_nic(matcher, &matcher->rx); + break; + case MLX5DR_DOMAIN_TYPE_NIC_TX: + matcher->tx.nic_tbl = &tbl->tx; + ret = dr_matcher_init_nic(matcher, &matcher->tx); + break; + case MLX5DR_DOMAIN_TYPE_FDB: + matcher->rx.nic_tbl = &tbl->rx; + matcher->tx.nic_tbl = &tbl->tx; + ret = dr_matcher_init_fdb(matcher); + break; + default: + WARN_ON(true); + ret = -EINVAL; + } + + return ret; +} + +static void dr_matcher_add_to_dbg_list(struct mlx5dr_matcher *matcher) +{ + mutex_lock(&matcher->tbl->dmn->dump_info.dbg_mutex); + list_add(&matcher->list_node, &matcher->tbl->matcher_list); + mutex_unlock(&matcher->tbl->dmn->dump_info.dbg_mutex); +} + +static void dr_matcher_remove_from_dbg_list(struct mlx5dr_matcher *matcher) +{ + mutex_lock(&matcher->tbl->dmn->dump_info.dbg_mutex); + list_del(&matcher->list_node); + mutex_unlock(&matcher->tbl->dmn->dump_info.dbg_mutex); +} + +struct mlx5dr_matcher * +mlx5dr_matcher_create(struct mlx5dr_table *tbl, + u32 priority, + u8 match_criteria_enable, + struct mlx5dr_match_parameters *mask) +{ + struct mlx5dr_matcher *matcher; + int ret; + + refcount_inc(&tbl->refcount); + + matcher = kzalloc(sizeof(*matcher), GFP_KERNEL); + if (!matcher) + goto dec_ref; + + matcher->tbl = tbl; + matcher->prio = priority; + matcher->match_criteria = match_criteria_enable; + refcount_set(&matcher->refcount, 1); + INIT_LIST_HEAD(&matcher->list_node); + INIT_LIST_HEAD(&matcher->dbg_rule_list); + + mlx5dr_domain_lock(tbl->dmn); + + ret = dr_matcher_init(matcher, mask); + if (ret) + goto free_matcher; + + dr_matcher_add_to_dbg_list(matcher); + + mlx5dr_domain_unlock(tbl->dmn); + + return matcher; + +free_matcher: + mlx5dr_domain_unlock(tbl->dmn); + kfree(matcher); +dec_ref: + refcount_dec(&tbl->refcount); + return NULL; +} + +static int dr_matcher_disconnect_nic(struct mlx5dr_domain *dmn, + struct mlx5dr_table_rx_tx *nic_tbl, + struct mlx5dr_matcher_rx_tx *next_nic_matcher, + struct mlx5dr_matcher_rx_tx *prev_nic_matcher) +{ + struct mlx5dr_domain_rx_tx *nic_dmn = nic_tbl->nic_dmn; + struct mlx5dr_htbl_connect_info info; + struct mlx5dr_ste_htbl *prev_anchor; + + if (prev_nic_matcher) + prev_anchor = prev_nic_matcher->e_anchor; + else + prev_anchor = nic_tbl->s_anchor; + + /* Connect previous anchor hash table to next matcher or to the default address */ + if (next_nic_matcher) { + info.type = CONNECT_HIT; + info.hit_next_htbl = next_nic_matcher->s_htbl; + next_nic_matcher->s_htbl->pointing_ste = prev_anchor->chunk->ste_arr; + prev_anchor->chunk->ste_arr[0].next_htbl = next_nic_matcher->s_htbl; + } else { + info.type = CONNECT_MISS; + info.miss_icm_addr = nic_tbl->default_icm_addr; + prev_anchor->chunk->ste_arr[0].next_htbl = NULL; + } + + return mlx5dr_ste_htbl_init_and_postsend(dmn, nic_dmn, prev_anchor, + &info, true); +} + +int mlx5dr_matcher_remove_from_tbl_nic(struct mlx5dr_domain *dmn, + struct mlx5dr_matcher_rx_tx *nic_matcher) +{ + struct mlx5dr_matcher_rx_tx *prev_nic_matcher, *next_nic_matcher; + struct mlx5dr_table_rx_tx *nic_tbl = nic_matcher->nic_tbl; + int ret; + + /* If the nic matcher is not on its parent nic table list, + * then it is detached - no need to disconnect it. + */ + if (list_empty(&nic_matcher->list_node)) + return 0; + + if (list_is_last(&nic_matcher->list_node, &nic_tbl->nic_matcher_list)) + next_nic_matcher = NULL; + else + next_nic_matcher = list_next_entry(nic_matcher, list_node); + + if (nic_matcher->list_node.prev == &nic_tbl->nic_matcher_list) + prev_nic_matcher = NULL; + else + prev_nic_matcher = list_prev_entry(nic_matcher, list_node); + + ret = dr_matcher_disconnect_nic(dmn, nic_tbl, next_nic_matcher, prev_nic_matcher); + if (ret) + return ret; + + list_del_init(&nic_matcher->list_node); + return 0; +} + +int mlx5dr_matcher_destroy(struct mlx5dr_matcher *matcher) +{ + struct mlx5dr_table *tbl = matcher->tbl; + + if (WARN_ON_ONCE(refcount_read(&matcher->refcount) > 1)) + return -EBUSY; + + mlx5dr_domain_lock(tbl->dmn); + + dr_matcher_remove_from_dbg_list(matcher); + dr_matcher_uninit(matcher); + refcount_dec(&matcher->tbl->refcount); + + mlx5dr_domain_unlock(tbl->dmn); + kfree(matcher); + + return 0; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c new file mode 100644 index 000000000..91ff19f67 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c @@ -0,0 +1,1334 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2019 Mellanox Technologies. */ + +#include "dr_types.h" + +#define DR_RULE_MAX_STE_CHAIN (DR_RULE_MAX_STES + DR_ACTION_MAX_STES) + +static int dr_rule_append_to_miss_list(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste *new_last_ste, + struct list_head *miss_list, + struct list_head *send_list) +{ + struct mlx5dr_ste_send_info *ste_info_last; + struct mlx5dr_ste *last_ste; + + /* The new entry will be inserted after the last */ + last_ste = list_last_entry(miss_list, struct mlx5dr_ste, miss_list_node); + WARN_ON(!last_ste); + + ste_info_last = kzalloc(sizeof(*ste_info_last), GFP_KERNEL); + if (!ste_info_last) + return -ENOMEM; + + mlx5dr_ste_set_miss_addr(ste_ctx, mlx5dr_ste_get_hw_ste(last_ste), + mlx5dr_ste_get_icm_addr(new_last_ste)); + list_add_tail(&new_last_ste->miss_list_node, miss_list); + + mlx5dr_send_fill_and_append_ste_send_info(last_ste, DR_STE_SIZE_CTRL, + 0, mlx5dr_ste_get_hw_ste(last_ste), + ste_info_last, send_list, true); + + return 0; +} + +static struct mlx5dr_ste * +dr_rule_create_collision_htbl(struct mlx5dr_matcher *matcher, + struct mlx5dr_matcher_rx_tx *nic_matcher, + u8 *hw_ste) +{ + struct mlx5dr_domain *dmn = matcher->tbl->dmn; + struct mlx5dr_ste_ctx *ste_ctx = dmn->ste_ctx; + struct mlx5dr_ste_htbl *new_htbl; + struct mlx5dr_ste *ste; + u64 icm_addr; + + /* Create new table for miss entry */ + new_htbl = mlx5dr_ste_htbl_alloc(dmn->ste_icm_pool, + DR_CHUNK_SIZE_1, + MLX5DR_STE_LU_TYPE_DONT_CARE, + 0); + if (!new_htbl) { + mlx5dr_dbg(dmn, "Failed allocating collision table\n"); + return NULL; + } + + /* One and only entry, never grows */ + ste = new_htbl->chunk->ste_arr; + icm_addr = mlx5dr_icm_pool_get_chunk_icm_addr(nic_matcher->e_anchor->chunk); + mlx5dr_ste_set_miss_addr(ste_ctx, hw_ste, icm_addr); + mlx5dr_htbl_get(new_htbl); + + return ste; +} + +static struct mlx5dr_ste * +dr_rule_create_collision_entry(struct mlx5dr_matcher *matcher, + struct mlx5dr_matcher_rx_tx *nic_matcher, + u8 *hw_ste, + struct mlx5dr_ste *orig_ste) +{ + struct mlx5dr_ste *ste; + + ste = dr_rule_create_collision_htbl(matcher, nic_matcher, hw_ste); + if (!ste) { + mlx5dr_dbg(matcher->tbl->dmn, "Failed creating collision entry\n"); + return NULL; + } + + ste->ste_chain_location = orig_ste->ste_chain_location; + ste->htbl->pointing_ste = orig_ste->htbl->pointing_ste; + + /* In collision entry, all members share the same miss_list_head */ + ste->htbl->chunk->miss_list = mlx5dr_ste_get_miss_list(orig_ste); + + /* Next table */ + if (mlx5dr_ste_create_next_htbl(matcher, nic_matcher, ste, hw_ste, + DR_CHUNK_SIZE_1)) { + mlx5dr_dbg(matcher->tbl->dmn, "Failed allocating table\n"); + goto free_tbl; + } + + return ste; + +free_tbl: + mlx5dr_ste_free(ste, matcher, nic_matcher); + return NULL; +} + +static int +dr_rule_handle_one_ste_in_update_list(struct mlx5dr_ste_send_info *ste_info, + struct mlx5dr_domain *dmn) +{ + int ret; + + list_del(&ste_info->send_list); + + /* Copy data to ste, only reduced size or control, the last 16B (mask) + * is already written to the hw. + */ + if (ste_info->size == DR_STE_SIZE_CTRL) + memcpy(mlx5dr_ste_get_hw_ste(ste_info->ste), + ste_info->data, DR_STE_SIZE_CTRL); + else + memcpy(mlx5dr_ste_get_hw_ste(ste_info->ste), + ste_info->data, DR_STE_SIZE_REDUCED); + + ret = mlx5dr_send_postsend_ste(dmn, ste_info->ste, ste_info->data, + ste_info->size, ste_info->offset); + if (ret) + goto out; + +out: + kfree(ste_info); + return ret; +} + +static int dr_rule_send_update_list(struct list_head *send_ste_list, + struct mlx5dr_domain *dmn, + bool is_reverse) +{ + struct mlx5dr_ste_send_info *ste_info, *tmp_ste_info; + int ret; + + if (is_reverse) { + list_for_each_entry_safe_reverse(ste_info, tmp_ste_info, + send_ste_list, send_list) { + ret = dr_rule_handle_one_ste_in_update_list(ste_info, + dmn); + if (ret) + return ret; + } + } else { + list_for_each_entry_safe(ste_info, tmp_ste_info, + send_ste_list, send_list) { + ret = dr_rule_handle_one_ste_in_update_list(ste_info, + dmn); + if (ret) + return ret; + } + } + + return 0; +} + +static struct mlx5dr_ste * +dr_rule_find_ste_in_miss_list(struct list_head *miss_list, u8 *hw_ste) +{ + struct mlx5dr_ste *ste; + + if (list_empty(miss_list)) + return NULL; + + /* Check if hw_ste is present in the list */ + list_for_each_entry(ste, miss_list, miss_list_node) { + if (mlx5dr_ste_equal_tag(mlx5dr_ste_get_hw_ste(ste), hw_ste)) + return ste; + } + + return NULL; +} + +static struct mlx5dr_ste * +dr_rule_rehash_handle_collision(struct mlx5dr_matcher *matcher, + struct mlx5dr_matcher_rx_tx *nic_matcher, + struct list_head *update_list, + struct mlx5dr_ste *col_ste, + u8 *hw_ste) +{ + struct mlx5dr_domain *dmn = matcher->tbl->dmn; + struct mlx5dr_ste *new_ste; + int ret; + + new_ste = dr_rule_create_collision_htbl(matcher, nic_matcher, hw_ste); + if (!new_ste) + return NULL; + + /* Update collision pointing STE */ + new_ste->htbl->pointing_ste = col_ste->htbl->pointing_ste; + + /* In collision entry, all members share the same miss_list_head */ + new_ste->htbl->chunk->miss_list = mlx5dr_ste_get_miss_list(col_ste); + + /* Update the previous from the list */ + ret = dr_rule_append_to_miss_list(dmn->ste_ctx, new_ste, + mlx5dr_ste_get_miss_list(col_ste), + update_list); + if (ret) { + mlx5dr_dbg(dmn, "Failed update dup entry\n"); + goto err_exit; + } + + return new_ste; + +err_exit: + mlx5dr_ste_free(new_ste, matcher, nic_matcher); + return NULL; +} + +static void dr_rule_rehash_copy_ste_ctrl(struct mlx5dr_matcher *matcher, + struct mlx5dr_matcher_rx_tx *nic_matcher, + struct mlx5dr_ste *cur_ste, + struct mlx5dr_ste *new_ste) +{ + new_ste->next_htbl = cur_ste->next_htbl; + new_ste->ste_chain_location = cur_ste->ste_chain_location; + + if (new_ste->next_htbl) + new_ste->next_htbl->pointing_ste = new_ste; + + /* We need to copy the refcount since this ste + * may have been traversed several times + */ + new_ste->refcount = cur_ste->refcount; + + /* Link old STEs rule to the new ste */ + mlx5dr_rule_set_last_member(cur_ste->rule_rx_tx, new_ste, false); +} + +static struct mlx5dr_ste * +dr_rule_rehash_copy_ste(struct mlx5dr_matcher *matcher, + struct mlx5dr_matcher_rx_tx *nic_matcher, + struct mlx5dr_ste *cur_ste, + struct mlx5dr_ste_htbl *new_htbl, + struct list_head *update_list) +{ + struct mlx5dr_domain *dmn = matcher->tbl->dmn; + struct mlx5dr_ste_send_info *ste_info; + bool use_update_list = false; + u8 hw_ste[DR_STE_SIZE] = {}; + struct mlx5dr_ste *new_ste; + u64 icm_addr; + int new_idx; + u8 sb_idx; + + /* Copy STE mask from the matcher */ + sb_idx = cur_ste->ste_chain_location - 1; + mlx5dr_ste_set_bit_mask(hw_ste, nic_matcher->ste_builder[sb_idx].bit_mask); + + /* Copy STE control and tag */ + icm_addr = mlx5dr_icm_pool_get_chunk_icm_addr(nic_matcher->e_anchor->chunk); + memcpy(hw_ste, mlx5dr_ste_get_hw_ste(cur_ste), DR_STE_SIZE_REDUCED); + mlx5dr_ste_set_miss_addr(dmn->ste_ctx, hw_ste, icm_addr); + + new_idx = mlx5dr_ste_calc_hash_index(hw_ste, new_htbl); + new_ste = &new_htbl->chunk->ste_arr[new_idx]; + + if (mlx5dr_ste_is_not_used(new_ste)) { + mlx5dr_htbl_get(new_htbl); + list_add_tail(&new_ste->miss_list_node, + mlx5dr_ste_get_miss_list(new_ste)); + } else { + new_ste = dr_rule_rehash_handle_collision(matcher, + nic_matcher, + update_list, + new_ste, + hw_ste); + if (!new_ste) { + mlx5dr_dbg(dmn, "Failed adding collision entry, index: %d\n", + new_idx); + return NULL; + } + new_htbl->ctrl.num_of_collisions++; + use_update_list = true; + } + + memcpy(mlx5dr_ste_get_hw_ste(new_ste), hw_ste, DR_STE_SIZE_REDUCED); + + new_htbl->ctrl.num_of_valid_entries++; + + if (use_update_list) { + ste_info = kzalloc(sizeof(*ste_info), GFP_KERNEL); + if (!ste_info) + goto err_exit; + + mlx5dr_send_fill_and_append_ste_send_info(new_ste, DR_STE_SIZE, 0, + hw_ste, ste_info, + update_list, true); + } + + dr_rule_rehash_copy_ste_ctrl(matcher, nic_matcher, cur_ste, new_ste); + + return new_ste; + +err_exit: + mlx5dr_ste_free(new_ste, matcher, nic_matcher); + return NULL; +} + +static int dr_rule_rehash_copy_miss_list(struct mlx5dr_matcher *matcher, + struct mlx5dr_matcher_rx_tx *nic_matcher, + struct list_head *cur_miss_list, + struct mlx5dr_ste_htbl *new_htbl, + struct list_head *update_list) +{ + struct mlx5dr_ste *tmp_ste, *cur_ste, *new_ste; + + if (list_empty(cur_miss_list)) + return 0; + + list_for_each_entry_safe(cur_ste, tmp_ste, cur_miss_list, miss_list_node) { + new_ste = dr_rule_rehash_copy_ste(matcher, + nic_matcher, + cur_ste, + new_htbl, + update_list); + if (!new_ste) + goto err_insert; + + list_del(&cur_ste->miss_list_node); + mlx5dr_htbl_put(cur_ste->htbl); + } + return 0; + +err_insert: + mlx5dr_err(matcher->tbl->dmn, "Fatal error during resize\n"); + WARN_ON(true); + return -EINVAL; +} + +static int dr_rule_rehash_copy_htbl(struct mlx5dr_matcher *matcher, + struct mlx5dr_matcher_rx_tx *nic_matcher, + struct mlx5dr_ste_htbl *cur_htbl, + struct mlx5dr_ste_htbl *new_htbl, + struct list_head *update_list) +{ + struct mlx5dr_ste *cur_ste; + int cur_entries; + int err = 0; + int i; + + cur_entries = mlx5dr_icm_pool_chunk_size_to_entries(cur_htbl->chunk->size); + + if (cur_entries < 1) { + mlx5dr_dbg(matcher->tbl->dmn, "Invalid number of entries\n"); + return -EINVAL; + } + + for (i = 0; i < cur_entries; i++) { + cur_ste = &cur_htbl->chunk->ste_arr[i]; + if (mlx5dr_ste_is_not_used(cur_ste)) /* Empty, nothing to copy */ + continue; + + err = dr_rule_rehash_copy_miss_list(matcher, + nic_matcher, + mlx5dr_ste_get_miss_list(cur_ste), + new_htbl, + update_list); + if (err) + goto clean_copy; + } + +clean_copy: + return err; +} + +static struct mlx5dr_ste_htbl * +dr_rule_rehash_htbl(struct mlx5dr_rule *rule, + struct mlx5dr_rule_rx_tx *nic_rule, + struct mlx5dr_ste_htbl *cur_htbl, + u8 ste_location, + struct list_head *update_list, + enum mlx5dr_icm_chunk_size new_size) +{ + struct mlx5dr_ste_send_info *del_ste_info, *tmp_ste_info; + struct mlx5dr_matcher *matcher = rule->matcher; + struct mlx5dr_domain *dmn = matcher->tbl->dmn; + struct mlx5dr_matcher_rx_tx *nic_matcher; + struct mlx5dr_ste_send_info *ste_info; + struct mlx5dr_htbl_connect_info info; + struct mlx5dr_domain_rx_tx *nic_dmn; + u8 formatted_ste[DR_STE_SIZE] = {}; + LIST_HEAD(rehash_table_send_list); + struct mlx5dr_ste *ste_to_update; + struct mlx5dr_ste_htbl *new_htbl; + int err; + + nic_matcher = nic_rule->nic_matcher; + nic_dmn = nic_matcher->nic_tbl->nic_dmn; + + ste_info = kzalloc(sizeof(*ste_info), GFP_KERNEL); + if (!ste_info) + return NULL; + + new_htbl = mlx5dr_ste_htbl_alloc(dmn->ste_icm_pool, + new_size, + cur_htbl->lu_type, + cur_htbl->byte_mask); + if (!new_htbl) { + mlx5dr_err(dmn, "Failed to allocate new hash table\n"); + goto free_ste_info; + } + + /* Write new table to HW */ + info.type = CONNECT_MISS; + info.miss_icm_addr = mlx5dr_icm_pool_get_chunk_icm_addr(nic_matcher->e_anchor->chunk); + mlx5dr_ste_set_formatted_ste(dmn->ste_ctx, + dmn->info.caps.gvmi, + nic_dmn->type, + new_htbl, + formatted_ste, + &info); + + new_htbl->pointing_ste = cur_htbl->pointing_ste; + new_htbl->pointing_ste->next_htbl = new_htbl; + err = dr_rule_rehash_copy_htbl(matcher, + nic_matcher, + cur_htbl, + new_htbl, + &rehash_table_send_list); + if (err) + goto free_new_htbl; + + if (mlx5dr_send_postsend_htbl(dmn, new_htbl, formatted_ste, + nic_matcher->ste_builder[ste_location - 1].bit_mask)) { + mlx5dr_err(dmn, "Failed writing table to HW\n"); + goto free_new_htbl; + } + + /* Writing to the hw is done in regular order of rehash_table_send_list, + * in order to have the origin data written before the miss address of + * collision entries, if exists. + */ + if (dr_rule_send_update_list(&rehash_table_send_list, dmn, false)) { + mlx5dr_err(dmn, "Failed updating table to HW\n"); + goto free_ste_list; + } + + /* Connect previous hash table to current */ + if (ste_location == 1) { + /* The previous table is an anchor, anchors size is always one STE */ + struct mlx5dr_ste_htbl *prev_htbl = cur_htbl->pointing_ste->htbl; + + /* On matcher s_anchor we keep an extra refcount */ + mlx5dr_htbl_get(new_htbl); + mlx5dr_htbl_put(cur_htbl); + + nic_matcher->s_htbl = new_htbl; + + /* It is safe to operate dr_ste_set_hit_addr on the hw_ste here + * (48B len) which works only on first 32B + */ + mlx5dr_ste_set_hit_addr(dmn->ste_ctx, + prev_htbl->chunk->hw_ste_arr, + mlx5dr_icm_pool_get_chunk_icm_addr(new_htbl->chunk), + mlx5dr_icm_pool_get_chunk_num_of_entries(new_htbl->chunk)); + + ste_to_update = &prev_htbl->chunk->ste_arr[0]; + } else { + mlx5dr_ste_set_hit_addr_by_next_htbl(dmn->ste_ctx, + mlx5dr_ste_get_hw_ste(cur_htbl->pointing_ste), + new_htbl); + ste_to_update = cur_htbl->pointing_ste; + } + + mlx5dr_send_fill_and_append_ste_send_info(ste_to_update, DR_STE_SIZE_CTRL, + 0, mlx5dr_ste_get_hw_ste(ste_to_update), + ste_info, update_list, false); + + return new_htbl; + +free_ste_list: + /* Clean all ste_info's from the new table */ + list_for_each_entry_safe(del_ste_info, tmp_ste_info, + &rehash_table_send_list, send_list) { + list_del(&del_ste_info->send_list); + kfree(del_ste_info); + } + +free_new_htbl: + mlx5dr_ste_htbl_free(new_htbl); +free_ste_info: + kfree(ste_info); + mlx5dr_info(dmn, "Failed creating rehash table\n"); + return NULL; +} + +static struct mlx5dr_ste_htbl *dr_rule_rehash(struct mlx5dr_rule *rule, + struct mlx5dr_rule_rx_tx *nic_rule, + struct mlx5dr_ste_htbl *cur_htbl, + u8 ste_location, + struct list_head *update_list) +{ + struct mlx5dr_domain *dmn = rule->matcher->tbl->dmn; + enum mlx5dr_icm_chunk_size new_size; + + new_size = mlx5dr_icm_next_higher_chunk(cur_htbl->chunk->size); + new_size = min_t(u32, new_size, dmn->info.max_log_sw_icm_sz); + + if (new_size == cur_htbl->chunk->size) + return NULL; /* Skip rehash, we already at the max size */ + + return dr_rule_rehash_htbl(rule, nic_rule, cur_htbl, ste_location, + update_list, new_size); +} + +static struct mlx5dr_ste * +dr_rule_handle_collision(struct mlx5dr_matcher *matcher, + struct mlx5dr_matcher_rx_tx *nic_matcher, + struct mlx5dr_ste *ste, + u8 *hw_ste, + struct list_head *miss_list, + struct list_head *send_list) +{ + struct mlx5dr_domain *dmn = matcher->tbl->dmn; + struct mlx5dr_ste_ctx *ste_ctx = dmn->ste_ctx; + struct mlx5dr_ste_send_info *ste_info; + struct mlx5dr_ste *new_ste; + + ste_info = kzalloc(sizeof(*ste_info), GFP_KERNEL); + if (!ste_info) + return NULL; + + new_ste = dr_rule_create_collision_entry(matcher, nic_matcher, hw_ste, ste); + if (!new_ste) + goto free_send_info; + + if (dr_rule_append_to_miss_list(ste_ctx, new_ste, + miss_list, send_list)) { + mlx5dr_dbg(dmn, "Failed to update prev miss_list\n"); + goto err_exit; + } + + mlx5dr_send_fill_and_append_ste_send_info(new_ste, DR_STE_SIZE, 0, hw_ste, + ste_info, send_list, false); + + ste->htbl->ctrl.num_of_collisions++; + ste->htbl->ctrl.num_of_valid_entries++; + + return new_ste; + +err_exit: + mlx5dr_ste_free(new_ste, matcher, nic_matcher); +free_send_info: + kfree(ste_info); + return NULL; +} + +static void dr_rule_remove_action_members(struct mlx5dr_rule *rule) +{ + struct mlx5dr_rule_action_member *action_mem; + struct mlx5dr_rule_action_member *tmp; + + list_for_each_entry_safe(action_mem, tmp, &rule->rule_actions_list, list) { + list_del(&action_mem->list); + refcount_dec(&action_mem->action->refcount); + kvfree(action_mem); + } +} + +static int dr_rule_add_action_members(struct mlx5dr_rule *rule, + size_t num_actions, + struct mlx5dr_action *actions[]) +{ + struct mlx5dr_rule_action_member *action_mem; + int i; + + for (i = 0; i < num_actions; i++) { + action_mem = kvzalloc(sizeof(*action_mem), GFP_KERNEL); + if (!action_mem) + goto free_action_members; + + action_mem->action = actions[i]; + INIT_LIST_HEAD(&action_mem->list); + list_add_tail(&action_mem->list, &rule->rule_actions_list); + refcount_inc(&action_mem->action->refcount); + } + + return 0; + +free_action_members: + dr_rule_remove_action_members(rule); + return -ENOMEM; +} + +void mlx5dr_rule_set_last_member(struct mlx5dr_rule_rx_tx *nic_rule, + struct mlx5dr_ste *ste, + bool force) +{ + /* Update rule member is usually done for the last STE or during rule + * creation to recover from mid-creation failure (for this peruse the + * force flag is used) + */ + if (ste->next_htbl && !force) + return; + + /* Update is required since each rule keeps track of its last STE */ + ste->rule_rx_tx = nic_rule; + nic_rule->last_rule_ste = ste; +} + +static struct mlx5dr_ste *dr_rule_get_pointed_ste(struct mlx5dr_ste *curr_ste) +{ + struct mlx5dr_ste *first_ste; + + first_ste = list_first_entry(mlx5dr_ste_get_miss_list(curr_ste), + struct mlx5dr_ste, miss_list_node); + + return first_ste->htbl->pointing_ste; +} + +int mlx5dr_rule_get_reverse_rule_members(struct mlx5dr_ste **ste_arr, + struct mlx5dr_ste *curr_ste, + int *num_of_stes) +{ + bool first = false; + + *num_of_stes = 0; + + if (!curr_ste) + return -ENOENT; + + /* Iterate from last to first */ + while (!first) { + first = curr_ste->ste_chain_location == 1; + ste_arr[*num_of_stes] = curr_ste; + *num_of_stes += 1; + curr_ste = dr_rule_get_pointed_ste(curr_ste); + } + + return 0; +} + +static void dr_rule_clean_rule_members(struct mlx5dr_rule *rule, + struct mlx5dr_rule_rx_tx *nic_rule) +{ + struct mlx5dr_ste *ste_arr[DR_RULE_MAX_STES + DR_ACTION_MAX_STES]; + struct mlx5dr_ste *curr_ste = nic_rule->last_rule_ste; + int i; + + if (mlx5dr_rule_get_reverse_rule_members(ste_arr, curr_ste, &i)) + return; + + while (i--) + mlx5dr_ste_put(ste_arr[i], rule->matcher, nic_rule->nic_matcher); +} + +static u16 dr_get_bits_per_mask(u16 byte_mask) +{ + u16 bits = 0; + + while (byte_mask) { + byte_mask = byte_mask & (byte_mask - 1); + bits++; + } + + return bits; +} + +static bool dr_rule_need_enlarge_hash(struct mlx5dr_ste_htbl *htbl, + struct mlx5dr_domain *dmn, + struct mlx5dr_domain_rx_tx *nic_dmn) +{ + struct mlx5dr_ste_htbl_ctrl *ctrl = &htbl->ctrl; + int threshold; + + if (dmn->info.max_log_sw_icm_sz <= htbl->chunk->size) + return false; + + if (!mlx5dr_ste_htbl_may_grow(htbl)) + return false; + + if (dr_get_bits_per_mask(htbl->byte_mask) * BITS_PER_BYTE <= htbl->chunk->size) + return false; + + threshold = mlx5dr_ste_htbl_increase_threshold(htbl); + if (ctrl->num_of_collisions >= threshold && + (ctrl->num_of_valid_entries - ctrl->num_of_collisions) >= threshold) + return true; + + return false; +} + +static int dr_rule_handle_action_stes(struct mlx5dr_rule *rule, + struct mlx5dr_rule_rx_tx *nic_rule, + struct list_head *send_ste_list, + struct mlx5dr_ste *last_ste, + u8 *hw_ste_arr, + u32 new_hw_ste_arr_sz) +{ + struct mlx5dr_matcher_rx_tx *nic_matcher = nic_rule->nic_matcher; + struct mlx5dr_ste_send_info *ste_info_arr[DR_ACTION_MAX_STES]; + u8 num_of_builders = nic_matcher->num_of_builders; + struct mlx5dr_matcher *matcher = rule->matcher; + struct mlx5dr_domain *dmn = matcher->tbl->dmn; + u8 *curr_hw_ste, *prev_hw_ste; + struct mlx5dr_ste *action_ste; + int i, k; + + /* Two cases: + * 1. num_of_builders is equal to new_hw_ste_arr_sz, the action in the ste + * 2. num_of_builders is less then new_hw_ste_arr_sz, new ste was added + * to support the action. + */ + + for (i = num_of_builders, k = 0; i < new_hw_ste_arr_sz; i++, k++) { + curr_hw_ste = hw_ste_arr + i * DR_STE_SIZE; + prev_hw_ste = (i == 0) ? curr_hw_ste : hw_ste_arr + ((i - 1) * DR_STE_SIZE); + action_ste = dr_rule_create_collision_htbl(matcher, + nic_matcher, + curr_hw_ste); + if (!action_ste) + return -ENOMEM; + + mlx5dr_ste_get(action_ste); + + action_ste->htbl->pointing_ste = last_ste; + last_ste->next_htbl = action_ste->htbl; + last_ste = action_ste; + + /* While free ste we go over the miss list, so add this ste to the list */ + list_add_tail(&action_ste->miss_list_node, + mlx5dr_ste_get_miss_list(action_ste)); + + ste_info_arr[k] = kzalloc(sizeof(*ste_info_arr[k]), + GFP_KERNEL); + if (!ste_info_arr[k]) + goto err_exit; + + /* Point current ste to the new action */ + mlx5dr_ste_set_hit_addr_by_next_htbl(dmn->ste_ctx, + prev_hw_ste, + action_ste->htbl); + + mlx5dr_rule_set_last_member(nic_rule, action_ste, true); + + mlx5dr_send_fill_and_append_ste_send_info(action_ste, DR_STE_SIZE, 0, + curr_hw_ste, + ste_info_arr[k], + send_ste_list, false); + } + + last_ste->next_htbl = NULL; + + return 0; + +err_exit: + mlx5dr_ste_put(action_ste, matcher, nic_matcher); + return -ENOMEM; +} + +static int dr_rule_handle_empty_entry(struct mlx5dr_matcher *matcher, + struct mlx5dr_matcher_rx_tx *nic_matcher, + struct mlx5dr_ste_htbl *cur_htbl, + struct mlx5dr_ste *ste, + u8 ste_location, + u8 *hw_ste, + struct list_head *miss_list, + struct list_head *send_list) +{ + struct mlx5dr_domain *dmn = matcher->tbl->dmn; + struct mlx5dr_ste_send_info *ste_info; + u64 icm_addr; + + /* Take ref on table, only on first time this ste is used */ + mlx5dr_htbl_get(cur_htbl); + + /* new entry -> new branch */ + list_add_tail(&ste->miss_list_node, miss_list); + + icm_addr = mlx5dr_icm_pool_get_chunk_icm_addr(nic_matcher->e_anchor->chunk); + mlx5dr_ste_set_miss_addr(dmn->ste_ctx, hw_ste, icm_addr); + + ste->ste_chain_location = ste_location; + + ste_info = kzalloc(sizeof(*ste_info), GFP_KERNEL); + if (!ste_info) + goto clean_ste_setting; + + if (mlx5dr_ste_create_next_htbl(matcher, + nic_matcher, + ste, + hw_ste, + DR_CHUNK_SIZE_1)) { + mlx5dr_dbg(dmn, "Failed allocating table\n"); + goto clean_ste_info; + } + + cur_htbl->ctrl.num_of_valid_entries++; + + mlx5dr_send_fill_and_append_ste_send_info(ste, DR_STE_SIZE, 0, hw_ste, + ste_info, send_list, false); + + return 0; + +clean_ste_info: + kfree(ste_info); +clean_ste_setting: + list_del_init(&ste->miss_list_node); + mlx5dr_htbl_put(cur_htbl); + + return -ENOMEM; +} + +static struct mlx5dr_ste * +dr_rule_handle_ste_branch(struct mlx5dr_rule *rule, + struct mlx5dr_rule_rx_tx *nic_rule, + struct list_head *send_ste_list, + struct mlx5dr_ste_htbl *cur_htbl, + u8 *hw_ste, + u8 ste_location, + struct mlx5dr_ste_htbl **put_htbl) +{ + struct mlx5dr_matcher *matcher = rule->matcher; + struct mlx5dr_domain *dmn = matcher->tbl->dmn; + struct mlx5dr_matcher_rx_tx *nic_matcher; + struct mlx5dr_domain_rx_tx *nic_dmn; + struct mlx5dr_ste_htbl *new_htbl; + struct mlx5dr_ste *matched_ste; + struct list_head *miss_list; + bool skip_rehash = false; + struct mlx5dr_ste *ste; + int index; + + nic_matcher = nic_rule->nic_matcher; + nic_dmn = nic_matcher->nic_tbl->nic_dmn; + +again: + index = mlx5dr_ste_calc_hash_index(hw_ste, cur_htbl); + miss_list = &cur_htbl->chunk->miss_list[index]; + ste = &cur_htbl->chunk->ste_arr[index]; + + if (mlx5dr_ste_is_not_used(ste)) { + if (dr_rule_handle_empty_entry(matcher, nic_matcher, cur_htbl, + ste, ste_location, + hw_ste, miss_list, + send_ste_list)) + return NULL; + } else { + /* Hash table index in use, check if this ste is in the miss list */ + matched_ste = dr_rule_find_ste_in_miss_list(miss_list, hw_ste); + if (matched_ste) { + /* If it is last STE in the chain, and has the same tag + * it means that all the previous stes are the same, + * if so, this rule is duplicated. + */ + if (!mlx5dr_ste_is_last_in_rule(nic_matcher, ste_location)) + return matched_ste; + + mlx5dr_dbg(dmn, "Duplicate rule inserted\n"); + } + + if (!skip_rehash && dr_rule_need_enlarge_hash(cur_htbl, dmn, nic_dmn)) { + /* Hash table index in use, try to resize of the hash */ + skip_rehash = true; + + /* Hold the table till we update. + * Release in dr_rule_create_rule() + */ + *put_htbl = cur_htbl; + mlx5dr_htbl_get(cur_htbl); + + new_htbl = dr_rule_rehash(rule, nic_rule, cur_htbl, + ste_location, send_ste_list); + if (!new_htbl) { + mlx5dr_err(dmn, "Failed creating rehash table, htbl-log_size: %d\n", + cur_htbl->chunk->size); + mlx5dr_htbl_put(cur_htbl); + } else { + cur_htbl = new_htbl; + } + goto again; + } else { + /* Hash table index in use, add another collision (miss) */ + ste = dr_rule_handle_collision(matcher, + nic_matcher, + ste, + hw_ste, + miss_list, + send_ste_list); + if (!ste) { + mlx5dr_dbg(dmn, "failed adding collision entry, index: %d\n", + index); + return NULL; + } + } + } + return ste; +} + +static bool dr_rule_cmp_value_to_mask(u8 *mask, u8 *value, + u32 s_idx, u32 e_idx) +{ + u32 i; + + for (i = s_idx; i < e_idx; i++) { + if (value[i] & ~mask[i]) { + pr_info("Rule parameters contains a value not specified by mask\n"); + return false; + } + } + return true; +} + +static bool dr_rule_verify(struct mlx5dr_matcher *matcher, + struct mlx5dr_match_parameters *value, + struct mlx5dr_match_param *param) +{ + u8 match_criteria = matcher->match_criteria; + size_t value_size = value->match_sz; + u8 *mask_p = (u8 *)&matcher->mask; + u8 *param_p = (u8 *)param; + u32 s_idx, e_idx; + + if (!value_size || + (value_size > DR_SZ_MATCH_PARAM || (value_size % sizeof(u32)))) { + mlx5dr_err(matcher->tbl->dmn, "Rule parameters length is incorrect\n"); + return false; + } + + mlx5dr_ste_copy_param(matcher->match_criteria, param, value, false); + + if (match_criteria & DR_MATCHER_CRITERIA_OUTER) { + s_idx = offsetof(struct mlx5dr_match_param, outer); + e_idx = min(s_idx + sizeof(param->outer), value_size); + + if (!dr_rule_cmp_value_to_mask(mask_p, param_p, s_idx, e_idx)) { + mlx5dr_err(matcher->tbl->dmn, "Rule outer parameters contains a value not specified by mask\n"); + return false; + } + } + + if (match_criteria & DR_MATCHER_CRITERIA_MISC) { + s_idx = offsetof(struct mlx5dr_match_param, misc); + e_idx = min(s_idx + sizeof(param->misc), value_size); + + if (!dr_rule_cmp_value_to_mask(mask_p, param_p, s_idx, e_idx)) { + mlx5dr_err(matcher->tbl->dmn, "Rule misc parameters contains a value not specified by mask\n"); + return false; + } + } + + if (match_criteria & DR_MATCHER_CRITERIA_INNER) { + s_idx = offsetof(struct mlx5dr_match_param, inner); + e_idx = min(s_idx + sizeof(param->inner), value_size); + + if (!dr_rule_cmp_value_to_mask(mask_p, param_p, s_idx, e_idx)) { + mlx5dr_err(matcher->tbl->dmn, "Rule inner parameters contains a value not specified by mask\n"); + return false; + } + } + + if (match_criteria & DR_MATCHER_CRITERIA_MISC2) { + s_idx = offsetof(struct mlx5dr_match_param, misc2); + e_idx = min(s_idx + sizeof(param->misc2), value_size); + + if (!dr_rule_cmp_value_to_mask(mask_p, param_p, s_idx, e_idx)) { + mlx5dr_err(matcher->tbl->dmn, "Rule misc2 parameters contains a value not specified by mask\n"); + return false; + } + } + + if (match_criteria & DR_MATCHER_CRITERIA_MISC3) { + s_idx = offsetof(struct mlx5dr_match_param, misc3); + e_idx = min(s_idx + sizeof(param->misc3), value_size); + + if (!dr_rule_cmp_value_to_mask(mask_p, param_p, s_idx, e_idx)) { + mlx5dr_err(matcher->tbl->dmn, "Rule misc3 parameters contains a value not specified by mask\n"); + return false; + } + } + + if (match_criteria & DR_MATCHER_CRITERIA_MISC4) { + s_idx = offsetof(struct mlx5dr_match_param, misc4); + e_idx = min(s_idx + sizeof(param->misc4), value_size); + + if (!dr_rule_cmp_value_to_mask(mask_p, param_p, s_idx, e_idx)) { + mlx5dr_err(matcher->tbl->dmn, + "Rule misc4 parameters contains a value not specified by mask\n"); + return false; + } + } + + if (match_criteria & DR_MATCHER_CRITERIA_MISC5) { + s_idx = offsetof(struct mlx5dr_match_param, misc5); + e_idx = min(s_idx + sizeof(param->misc5), value_size); + + if (!dr_rule_cmp_value_to_mask(mask_p, param_p, s_idx, e_idx)) { + mlx5dr_err(matcher->tbl->dmn, "Rule misc5 parameters contains a value not specified by mask\n"); + return false; + } + } + return true; +} + +static int dr_rule_destroy_rule_nic(struct mlx5dr_rule *rule, + struct mlx5dr_rule_rx_tx *nic_rule) +{ + /* Check if this nic rule was actually created, or was it skipped + * and only the other type of the RX/TX nic rule was created. + */ + if (!nic_rule->last_rule_ste) + return 0; + + mlx5dr_domain_nic_lock(nic_rule->nic_matcher->nic_tbl->nic_dmn); + dr_rule_clean_rule_members(rule, nic_rule); + + nic_rule->nic_matcher->rules--; + if (!nic_rule->nic_matcher->rules) + mlx5dr_matcher_remove_from_tbl_nic(rule->matcher->tbl->dmn, + nic_rule->nic_matcher); + + mlx5dr_domain_nic_unlock(nic_rule->nic_matcher->nic_tbl->nic_dmn); + + return 0; +} + +static int dr_rule_destroy_rule_fdb(struct mlx5dr_rule *rule) +{ + dr_rule_destroy_rule_nic(rule, &rule->rx); + dr_rule_destroy_rule_nic(rule, &rule->tx); + return 0; +} + +static int dr_rule_destroy_rule(struct mlx5dr_rule *rule) +{ + struct mlx5dr_domain *dmn = rule->matcher->tbl->dmn; + + mlx5dr_dbg_rule_del(rule); + + switch (dmn->type) { + case MLX5DR_DOMAIN_TYPE_NIC_RX: + dr_rule_destroy_rule_nic(rule, &rule->rx); + break; + case MLX5DR_DOMAIN_TYPE_NIC_TX: + dr_rule_destroy_rule_nic(rule, &rule->tx); + break; + case MLX5DR_DOMAIN_TYPE_FDB: + dr_rule_destroy_rule_fdb(rule); + break; + default: + return -EINVAL; + } + + dr_rule_remove_action_members(rule); + kfree(rule); + return 0; +} + +static enum mlx5dr_ipv dr_rule_get_ipv(struct mlx5dr_match_spec *spec) +{ + if (spec->ip_version == 6 || spec->ethertype == ETH_P_IPV6) + return DR_RULE_IPV6; + + return DR_RULE_IPV4; +} + +static bool dr_rule_skip(enum mlx5dr_domain_type domain, + enum mlx5dr_domain_nic_type nic_type, + struct mlx5dr_match_param *mask, + struct mlx5dr_match_param *value, + u32 flow_source) +{ + bool rx = nic_type == DR_DOMAIN_NIC_TYPE_RX; + + if (domain != MLX5DR_DOMAIN_TYPE_FDB) + return false; + + if (mask->misc.source_port) { + if (rx && value->misc.source_port != MLX5_VPORT_UPLINK) + return true; + + if (!rx && value->misc.source_port == MLX5_VPORT_UPLINK) + return true; + } + + if (rx && flow_source == MLX5_FLOW_CONTEXT_FLOW_SOURCE_LOCAL_VPORT) + return true; + + if (!rx && flow_source == MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK) + return true; + + return false; +} + +static int +dr_rule_create_rule_nic(struct mlx5dr_rule *rule, + struct mlx5dr_rule_rx_tx *nic_rule, + struct mlx5dr_match_param *param, + size_t num_actions, + struct mlx5dr_action *actions[]) +{ + struct mlx5dr_ste_send_info *ste_info, *tmp_ste_info; + struct mlx5dr_matcher *matcher = rule->matcher; + struct mlx5dr_domain *dmn = matcher->tbl->dmn; + struct mlx5dr_matcher_rx_tx *nic_matcher; + struct mlx5dr_domain_rx_tx *nic_dmn; + struct mlx5dr_ste_htbl *htbl = NULL; + struct mlx5dr_ste_htbl *cur_htbl; + struct mlx5dr_ste *ste = NULL; + LIST_HEAD(send_ste_list); + u8 *hw_ste_arr = NULL; + u32 new_hw_ste_arr_sz; + int ret, i; + + nic_matcher = nic_rule->nic_matcher; + nic_dmn = nic_matcher->nic_tbl->nic_dmn; + + if (dr_rule_skip(dmn->type, nic_dmn->type, &matcher->mask, param, + rule->flow_source)) + return 0; + + hw_ste_arr = kzalloc(DR_RULE_MAX_STE_CHAIN * DR_STE_SIZE, GFP_KERNEL); + if (!hw_ste_arr) + return -ENOMEM; + + mlx5dr_domain_nic_lock(nic_dmn); + + ret = mlx5dr_matcher_add_to_tbl_nic(dmn, nic_matcher); + if (ret) + goto free_hw_ste; + + ret = mlx5dr_matcher_select_builders(matcher, + nic_matcher, + dr_rule_get_ipv(¶m->outer), + dr_rule_get_ipv(¶m->inner)); + if (ret) + goto remove_from_nic_tbl; + + /* Set the tag values inside the ste array */ + ret = mlx5dr_ste_build_ste_arr(matcher, nic_matcher, param, hw_ste_arr); + if (ret) + goto remove_from_nic_tbl; + + /* Set the actions values/addresses inside the ste array */ + ret = mlx5dr_actions_build_ste_arr(matcher, nic_matcher, actions, + num_actions, hw_ste_arr, + &new_hw_ste_arr_sz); + if (ret) + goto remove_from_nic_tbl; + + cur_htbl = nic_matcher->s_htbl; + + /* Go over the array of STEs, and build dr_ste accordingly. + * The loop is over only the builders which are equal or less to the + * number of stes, in case we have actions that lives in other stes. + */ + for (i = 0; i < nic_matcher->num_of_builders; i++) { + /* Calculate CRC and keep new ste entry */ + u8 *cur_hw_ste_ent = hw_ste_arr + (i * DR_STE_SIZE); + + ste = dr_rule_handle_ste_branch(rule, + nic_rule, + &send_ste_list, + cur_htbl, + cur_hw_ste_ent, + i + 1, + &htbl); + if (!ste) { + mlx5dr_err(dmn, "Failed creating next branch\n"); + ret = -ENOENT; + goto free_rule; + } + + cur_htbl = ste->next_htbl; + + mlx5dr_ste_get(ste); + mlx5dr_rule_set_last_member(nic_rule, ste, true); + } + + /* Connect actions */ + ret = dr_rule_handle_action_stes(rule, nic_rule, &send_ste_list, + ste, hw_ste_arr, new_hw_ste_arr_sz); + if (ret) { + mlx5dr_dbg(dmn, "Failed apply actions\n"); + goto free_rule; + } + ret = dr_rule_send_update_list(&send_ste_list, dmn, true); + if (ret) { + mlx5dr_err(dmn, "Failed sending ste!\n"); + goto free_rule; + } + + if (htbl) + mlx5dr_htbl_put(htbl); + + nic_matcher->rules++; + + mlx5dr_domain_nic_unlock(nic_dmn); + + kfree(hw_ste_arr); + + return 0; + +free_rule: + dr_rule_clean_rule_members(rule, nic_rule); + /* Clean all ste_info's */ + list_for_each_entry_safe(ste_info, tmp_ste_info, &send_ste_list, send_list) { + list_del(&ste_info->send_list); + kfree(ste_info); + } + +remove_from_nic_tbl: + if (!nic_matcher->rules) + mlx5dr_matcher_remove_from_tbl_nic(dmn, nic_matcher); + +free_hw_ste: + mlx5dr_domain_nic_unlock(nic_dmn); + kfree(hw_ste_arr); + return ret; +} + +static int +dr_rule_create_rule_fdb(struct mlx5dr_rule *rule, + struct mlx5dr_match_param *param, + size_t num_actions, + struct mlx5dr_action *actions[]) +{ + struct mlx5dr_match_param copy_param = {}; + int ret; + + /* Copy match_param since they will be consumed during the first + * nic_rule insertion. + */ + memcpy(©_param, param, sizeof(struct mlx5dr_match_param)); + + ret = dr_rule_create_rule_nic(rule, &rule->rx, param, + num_actions, actions); + if (ret) + return ret; + + ret = dr_rule_create_rule_nic(rule, &rule->tx, ©_param, + num_actions, actions); + if (ret) + goto destroy_rule_nic_rx; + + return 0; + +destroy_rule_nic_rx: + dr_rule_destroy_rule_nic(rule, &rule->rx); + return ret; +} + +static struct mlx5dr_rule * +dr_rule_create_rule(struct mlx5dr_matcher *matcher, + struct mlx5dr_match_parameters *value, + size_t num_actions, + struct mlx5dr_action *actions[], + u32 flow_source) +{ + struct mlx5dr_domain *dmn = matcher->tbl->dmn; + struct mlx5dr_match_param param = {}; + struct mlx5dr_rule *rule; + int ret; + + if (!dr_rule_verify(matcher, value, ¶m)) + return NULL; + + rule = kzalloc(sizeof(*rule), GFP_KERNEL); + if (!rule) + return NULL; + + rule->matcher = matcher; + rule->flow_source = flow_source; + INIT_LIST_HEAD(&rule->rule_actions_list); + + ret = dr_rule_add_action_members(rule, num_actions, actions); + if (ret) + goto free_rule; + + switch (dmn->type) { + case MLX5DR_DOMAIN_TYPE_NIC_RX: + rule->rx.nic_matcher = &matcher->rx; + ret = dr_rule_create_rule_nic(rule, &rule->rx, ¶m, + num_actions, actions); + break; + case MLX5DR_DOMAIN_TYPE_NIC_TX: + rule->tx.nic_matcher = &matcher->tx; + ret = dr_rule_create_rule_nic(rule, &rule->tx, ¶m, + num_actions, actions); + break; + case MLX5DR_DOMAIN_TYPE_FDB: + rule->rx.nic_matcher = &matcher->rx; + rule->tx.nic_matcher = &matcher->tx; + ret = dr_rule_create_rule_fdb(rule, ¶m, + num_actions, actions); + break; + default: + ret = -EINVAL; + break; + } + + if (ret) + goto remove_action_members; + + INIT_LIST_HEAD(&rule->dbg_node); + mlx5dr_dbg_rule_add(rule); + return rule; + +remove_action_members: + dr_rule_remove_action_members(rule); +free_rule: + kfree(rule); + mlx5dr_err(dmn, "Failed creating rule\n"); + return NULL; +} + +struct mlx5dr_rule *mlx5dr_rule_create(struct mlx5dr_matcher *matcher, + struct mlx5dr_match_parameters *value, + size_t num_actions, + struct mlx5dr_action *actions[], + u32 flow_source) +{ + struct mlx5dr_rule *rule; + + refcount_inc(&matcher->refcount); + + rule = dr_rule_create_rule(matcher, value, num_actions, actions, flow_source); + if (!rule) + refcount_dec(&matcher->refcount); + + return rule; +} + +int mlx5dr_rule_destroy(struct mlx5dr_rule *rule) +{ + struct mlx5dr_matcher *matcher = rule->matcher; + int ret; + + ret = dr_rule_destroy_rule(rule); + if (!ret) + refcount_dec(&matcher->refcount); + + return ret; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c new file mode 100644 index 000000000..ef19a66f5 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c @@ -0,0 +1,1056 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2019 Mellanox Technologies. */ + +#include <linux/smp.h> +#include "dr_types.h" + +#define QUEUE_SIZE 128 +#define SIGNAL_PER_DIV_QUEUE 16 +#define TH_NUMS_TO_DRAIN 2 + +enum { CQ_OK = 0, CQ_EMPTY = -1, CQ_POLL_ERR = -2 }; + +struct dr_data_seg { + u64 addr; + u32 length; + u32 lkey; + unsigned int send_flags; +}; + +struct postsend_info { + struct dr_data_seg write; + struct dr_data_seg read; + u64 remote_addr; + u32 rkey; +}; + +struct dr_qp_rtr_attr { + struct mlx5dr_cmd_gid_attr dgid_attr; + enum ib_mtu mtu; + u32 qp_num; + u16 port_num; + u8 min_rnr_timer; + u8 sgid_index; + u16 udp_src_port; + u8 fl:1; +}; + +struct dr_qp_rts_attr { + u8 timeout; + u8 retry_cnt; + u8 rnr_retry; +}; + +struct dr_qp_init_attr { + u32 cqn; + u32 pdn; + u32 max_send_wr; + struct mlx5_uars_page *uar; + u8 isolate_vl_tc:1; +}; + +static int dr_parse_cqe(struct mlx5dr_cq *dr_cq, struct mlx5_cqe64 *cqe64) +{ + unsigned int idx; + u8 opcode; + + opcode = get_cqe_opcode(cqe64); + if (opcode == MLX5_CQE_REQ_ERR) { + idx = be16_to_cpu(cqe64->wqe_counter) & + (dr_cq->qp->sq.wqe_cnt - 1); + dr_cq->qp->sq.cc = dr_cq->qp->sq.wqe_head[idx] + 1; + } else if (opcode == MLX5_CQE_RESP_ERR) { + ++dr_cq->qp->sq.cc; + } else { + idx = be16_to_cpu(cqe64->wqe_counter) & + (dr_cq->qp->sq.wqe_cnt - 1); + dr_cq->qp->sq.cc = dr_cq->qp->sq.wqe_head[idx] + 1; + + return CQ_OK; + } + + return CQ_POLL_ERR; +} + +static int dr_cq_poll_one(struct mlx5dr_cq *dr_cq) +{ + struct mlx5_cqe64 *cqe64; + int err; + + cqe64 = mlx5_cqwq_get_cqe(&dr_cq->wq); + if (!cqe64) + return CQ_EMPTY; + + mlx5_cqwq_pop(&dr_cq->wq); + err = dr_parse_cqe(dr_cq, cqe64); + mlx5_cqwq_update_db_record(&dr_cq->wq); + + return err; +} + +static int dr_poll_cq(struct mlx5dr_cq *dr_cq, int ne) +{ + int npolled; + int err = 0; + + for (npolled = 0; npolled < ne; ++npolled) { + err = dr_cq_poll_one(dr_cq); + if (err != CQ_OK) + break; + } + + return err == CQ_POLL_ERR ? err : npolled; +} + +static struct mlx5dr_qp *dr_create_rc_qp(struct mlx5_core_dev *mdev, + struct dr_qp_init_attr *attr) +{ + u32 out[MLX5_ST_SZ_DW(create_qp_out)] = {}; + u32 temp_qpc[MLX5_ST_SZ_DW(qpc)] = {}; + struct mlx5_wq_param wqp; + struct mlx5dr_qp *dr_qp; + int inlen; + void *qpc; + void *in; + int err; + + dr_qp = kzalloc(sizeof(*dr_qp), GFP_KERNEL); + if (!dr_qp) + return NULL; + + wqp.buf_numa_node = mdev->priv.numa_node; + wqp.db_numa_node = mdev->priv.numa_node; + + dr_qp->rq.pc = 0; + dr_qp->rq.cc = 0; + dr_qp->rq.wqe_cnt = 4; + dr_qp->sq.pc = 0; + dr_qp->sq.cc = 0; + dr_qp->sq.wqe_cnt = roundup_pow_of_two(attr->max_send_wr); + + MLX5_SET(qpc, temp_qpc, log_rq_stride, ilog2(MLX5_SEND_WQE_DS) - 4); + MLX5_SET(qpc, temp_qpc, log_rq_size, ilog2(dr_qp->rq.wqe_cnt)); + MLX5_SET(qpc, temp_qpc, log_sq_size, ilog2(dr_qp->sq.wqe_cnt)); + err = mlx5_wq_qp_create(mdev, &wqp, temp_qpc, &dr_qp->wq, + &dr_qp->wq_ctrl); + if (err) { + mlx5_core_warn(mdev, "Can't create QP WQ\n"); + goto err_wq; + } + + dr_qp->sq.wqe_head = kcalloc(dr_qp->sq.wqe_cnt, + sizeof(dr_qp->sq.wqe_head[0]), + GFP_KERNEL); + + if (!dr_qp->sq.wqe_head) { + mlx5_core_warn(mdev, "Can't allocate wqe head\n"); + goto err_wqe_head; + } + + inlen = MLX5_ST_SZ_BYTES(create_qp_in) + + MLX5_FLD_SZ_BYTES(create_qp_in, pas[0]) * + dr_qp->wq_ctrl.buf.npages; + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) { + err = -ENOMEM; + goto err_in; + } + + qpc = MLX5_ADDR_OF(create_qp_in, in, qpc); + MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC); + MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED); + MLX5_SET(qpc, qpc, isolate_vl_tc, attr->isolate_vl_tc); + MLX5_SET(qpc, qpc, pd, attr->pdn); + MLX5_SET(qpc, qpc, uar_page, attr->uar->index); + MLX5_SET(qpc, qpc, log_page_size, + dr_qp->wq_ctrl.buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT); + MLX5_SET(qpc, qpc, fre, 1); + MLX5_SET(qpc, qpc, rlky, 1); + MLX5_SET(qpc, qpc, cqn_snd, attr->cqn); + MLX5_SET(qpc, qpc, cqn_rcv, attr->cqn); + MLX5_SET(qpc, qpc, log_rq_stride, ilog2(MLX5_SEND_WQE_DS) - 4); + MLX5_SET(qpc, qpc, log_rq_size, ilog2(dr_qp->rq.wqe_cnt)); + MLX5_SET(qpc, qpc, rq_type, MLX5_NON_ZERO_RQ); + MLX5_SET(qpc, qpc, log_sq_size, ilog2(dr_qp->sq.wqe_cnt)); + MLX5_SET(qpc, qpc, ts_format, mlx5_get_qp_default_ts(mdev)); + MLX5_SET64(qpc, qpc, dbr_addr, dr_qp->wq_ctrl.db.dma); + if (MLX5_CAP_GEN(mdev, cqe_version) == 1) + MLX5_SET(qpc, qpc, user_index, 0xFFFFFF); + mlx5_fill_page_frag_array(&dr_qp->wq_ctrl.buf, + (__be64 *)MLX5_ADDR_OF(create_qp_in, + in, pas)); + + MLX5_SET(create_qp_in, in, opcode, MLX5_CMD_OP_CREATE_QP); + err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out)); + dr_qp->qpn = MLX5_GET(create_qp_out, out, qpn); + kvfree(in); + if (err) + goto err_in; + dr_qp->uar = attr->uar; + + return dr_qp; + +err_in: + kfree(dr_qp->sq.wqe_head); +err_wqe_head: + mlx5_wq_destroy(&dr_qp->wq_ctrl); +err_wq: + kfree(dr_qp); + return NULL; +} + +static void dr_destroy_qp(struct mlx5_core_dev *mdev, + struct mlx5dr_qp *dr_qp) +{ + u32 in[MLX5_ST_SZ_DW(destroy_qp_in)] = {}; + + MLX5_SET(destroy_qp_in, in, opcode, MLX5_CMD_OP_DESTROY_QP); + MLX5_SET(destroy_qp_in, in, qpn, dr_qp->qpn); + mlx5_cmd_exec_in(mdev, destroy_qp, in); + + kfree(dr_qp->sq.wqe_head); + mlx5_wq_destroy(&dr_qp->wq_ctrl); + kfree(dr_qp); +} + +static void dr_cmd_notify_hw(struct mlx5dr_qp *dr_qp, void *ctrl) +{ + dma_wmb(); + *dr_qp->wq.sq.db = cpu_to_be32(dr_qp->sq.pc & 0xffff); + + /* After wmb() the hw aware of new work */ + wmb(); + + mlx5_write64(ctrl, dr_qp->uar->map + MLX5_BF_OFFSET); +} + +static void dr_rdma_segments(struct mlx5dr_qp *dr_qp, u64 remote_addr, + u32 rkey, struct dr_data_seg *data_seg, + u32 opcode, bool notify_hw) +{ + struct mlx5_wqe_raddr_seg *wq_raddr; + struct mlx5_wqe_ctrl_seg *wq_ctrl; + struct mlx5_wqe_data_seg *wq_dseg; + unsigned int size; + unsigned int idx; + + size = sizeof(*wq_ctrl) / 16 + sizeof(*wq_dseg) / 16 + + sizeof(*wq_raddr) / 16; + + idx = dr_qp->sq.pc & (dr_qp->sq.wqe_cnt - 1); + + wq_ctrl = mlx5_wq_cyc_get_wqe(&dr_qp->wq.sq, idx); + wq_ctrl->imm = 0; + wq_ctrl->fm_ce_se = (data_seg->send_flags) ? + MLX5_WQE_CTRL_CQ_UPDATE : 0; + wq_ctrl->opmod_idx_opcode = cpu_to_be32(((dr_qp->sq.pc & 0xffff) << 8) | + opcode); + wq_ctrl->qpn_ds = cpu_to_be32(size | dr_qp->qpn << 8); + wq_raddr = (void *)(wq_ctrl + 1); + wq_raddr->raddr = cpu_to_be64(remote_addr); + wq_raddr->rkey = cpu_to_be32(rkey); + wq_raddr->reserved = 0; + + wq_dseg = (void *)(wq_raddr + 1); + wq_dseg->byte_count = cpu_to_be32(data_seg->length); + wq_dseg->lkey = cpu_to_be32(data_seg->lkey); + wq_dseg->addr = cpu_to_be64(data_seg->addr); + + dr_qp->sq.wqe_head[idx] = dr_qp->sq.pc++; + + if (notify_hw) + dr_cmd_notify_hw(dr_qp, wq_ctrl); +} + +static void dr_post_send(struct mlx5dr_qp *dr_qp, struct postsend_info *send_info) +{ + dr_rdma_segments(dr_qp, send_info->remote_addr, send_info->rkey, + &send_info->write, MLX5_OPCODE_RDMA_WRITE, false); + dr_rdma_segments(dr_qp, send_info->remote_addr, send_info->rkey, + &send_info->read, MLX5_OPCODE_RDMA_READ, true); +} + +/** + * mlx5dr_send_fill_and_append_ste_send_info: Add data to be sent + * with send_list parameters: + * + * @ste: The data that attached to this specific ste + * @size: of data to write + * @offset: of the data from start of the hw_ste entry + * @data: data + * @ste_info: ste to be sent with send_list + * @send_list: to append into it + * @copy_data: if true indicates that the data should be kept because + * it's not backuped any where (like in re-hash). + * if false, it lets the data to be updated after + * it was added to the list. + */ +void mlx5dr_send_fill_and_append_ste_send_info(struct mlx5dr_ste *ste, u16 size, + u16 offset, u8 *data, + struct mlx5dr_ste_send_info *ste_info, + struct list_head *send_list, + bool copy_data) +{ + ste_info->size = size; + ste_info->ste = ste; + ste_info->offset = offset; + + if (copy_data) { + memcpy(ste_info->data_cont, data, size); + ste_info->data = ste_info->data_cont; + } else { + ste_info->data = data; + } + + list_add_tail(&ste_info->send_list, send_list); +} + +/* The function tries to consume one wc each time, unless the queue is full, in + * that case, which means that the hw is behind the sw in a full queue len + * the function will drain the cq till it empty. + */ +static int dr_handle_pending_wc(struct mlx5dr_domain *dmn, + struct mlx5dr_send_ring *send_ring) +{ + bool is_drain = false; + int ne; + + if (send_ring->pending_wqe < send_ring->signal_th) + return 0; + + /* Queue is full start drain it */ + if (send_ring->pending_wqe >= + dmn->send_ring->signal_th * TH_NUMS_TO_DRAIN) + is_drain = true; + + do { + ne = dr_poll_cq(send_ring->cq, 1); + if (unlikely(ne < 0)) { + mlx5_core_warn_once(dmn->mdev, "SMFS QPN 0x%x is disabled/limited", + send_ring->qp->qpn); + send_ring->err_state = true; + return ne; + } else if (ne == 1) { + send_ring->pending_wqe -= send_ring->signal_th; + } + } while (is_drain && send_ring->pending_wqe); + + return 0; +} + +static void dr_fill_data_segs(struct mlx5dr_send_ring *send_ring, + struct postsend_info *send_info) +{ + send_ring->pending_wqe++; + + if (send_ring->pending_wqe % send_ring->signal_th == 0) + send_info->write.send_flags |= IB_SEND_SIGNALED; + + send_ring->pending_wqe++; + send_info->read.length = send_info->write.length; + /* Read into the same write area */ + send_info->read.addr = (uintptr_t)send_info->write.addr; + send_info->read.lkey = send_ring->mr->mkey; + + if (send_ring->pending_wqe % send_ring->signal_th == 0) + send_info->read.send_flags = IB_SEND_SIGNALED; + else + send_info->read.send_flags = 0; +} + +static int dr_postsend_icm_data(struct mlx5dr_domain *dmn, + struct postsend_info *send_info) +{ + struct mlx5dr_send_ring *send_ring = dmn->send_ring; + u32 buff_offset; + int ret; + + if (unlikely(dmn->mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR || + send_ring->err_state)) { + mlx5_core_dbg_once(dmn->mdev, + "Skipping post send: QP err state: %d, device state: %d\n", + send_ring->err_state, dmn->mdev->state); + return 0; + } + + spin_lock(&send_ring->lock); + + ret = dr_handle_pending_wc(dmn, send_ring); + if (ret) + goto out_unlock; + + if (send_info->write.length > dmn->info.max_inline_size) { + buff_offset = (send_ring->tx_head & + (dmn->send_ring->signal_th - 1)) * + send_ring->max_post_send_size; + /* Copy to ring mr */ + memcpy(send_ring->buf + buff_offset, + (void *)(uintptr_t)send_info->write.addr, + send_info->write.length); + send_info->write.addr = (uintptr_t)send_ring->mr->dma_addr + buff_offset; + send_info->write.lkey = send_ring->mr->mkey; + } + + send_ring->tx_head++; + dr_fill_data_segs(send_ring, send_info); + dr_post_send(send_ring->qp, send_info); + +out_unlock: + spin_unlock(&send_ring->lock); + return ret; +} + +static int dr_get_tbl_copy_details(struct mlx5dr_domain *dmn, + struct mlx5dr_ste_htbl *htbl, + u8 **data, + u32 *byte_size, + int *iterations, + int *num_stes) +{ + u32 chunk_byte_size = mlx5dr_icm_pool_get_chunk_byte_size(htbl->chunk); + int alloc_size; + + if (chunk_byte_size > dmn->send_ring->max_post_send_size) { + *iterations = chunk_byte_size / dmn->send_ring->max_post_send_size; + *byte_size = dmn->send_ring->max_post_send_size; + alloc_size = *byte_size; + *num_stes = *byte_size / DR_STE_SIZE; + } else { + *iterations = 1; + *num_stes = mlx5dr_icm_pool_get_chunk_num_of_entries(htbl->chunk); + alloc_size = *num_stes * DR_STE_SIZE; + } + + *data = kvzalloc(alloc_size, GFP_KERNEL); + if (!*data) + return -ENOMEM; + + return 0; +} + +/** + * mlx5dr_send_postsend_ste: write size bytes into offset from the hw cm. + * + * @dmn: Domain + * @ste: The ste struct that contains the data (at + * least part of it) + * @data: The real data to send size data + * @size: for writing. + * @offset: The offset from the icm mapped data to + * start write to this for write only part of the + * buffer. + * + * Return: 0 on success. + */ +int mlx5dr_send_postsend_ste(struct mlx5dr_domain *dmn, struct mlx5dr_ste *ste, + u8 *data, u16 size, u16 offset) +{ + struct postsend_info send_info = {}; + + mlx5dr_ste_prepare_for_postsend(dmn->ste_ctx, data, size); + + send_info.write.addr = (uintptr_t)data; + send_info.write.length = size; + send_info.write.lkey = 0; + send_info.remote_addr = mlx5dr_ste_get_mr_addr(ste) + offset; + send_info.rkey = mlx5dr_icm_pool_get_chunk_rkey(ste->htbl->chunk); + + return dr_postsend_icm_data(dmn, &send_info); +} + +int mlx5dr_send_postsend_htbl(struct mlx5dr_domain *dmn, + struct mlx5dr_ste_htbl *htbl, + u8 *formatted_ste, u8 *mask) +{ + u32 byte_size = mlx5dr_icm_pool_get_chunk_byte_size(htbl->chunk); + int num_stes_per_iter; + int iterations; + u8 *data; + int ret; + int i; + int j; + + ret = dr_get_tbl_copy_details(dmn, htbl, &data, &byte_size, + &iterations, &num_stes_per_iter); + if (ret) + return ret; + + mlx5dr_ste_prepare_for_postsend(dmn->ste_ctx, formatted_ste, DR_STE_SIZE); + + /* Send the data iteration times */ + for (i = 0; i < iterations; i++) { + u32 ste_index = i * (byte_size / DR_STE_SIZE); + struct postsend_info send_info = {}; + + /* Copy all ste's on the data buffer + * need to add the bit_mask + */ + for (j = 0; j < num_stes_per_iter; j++) { + struct mlx5dr_ste *ste = &htbl->chunk->ste_arr[ste_index + j]; + u32 ste_off = j * DR_STE_SIZE; + + if (mlx5dr_ste_is_not_used(ste)) { + memcpy(data + ste_off, + formatted_ste, DR_STE_SIZE); + } else { + /* Copy data */ + memcpy(data + ste_off, + htbl->chunk->hw_ste_arr + + DR_STE_SIZE_REDUCED * (ste_index + j), + DR_STE_SIZE_REDUCED); + /* Copy bit_mask */ + memcpy(data + ste_off + DR_STE_SIZE_REDUCED, + mask, DR_STE_SIZE_MASK); + /* Only when we have mask we need to re-arrange the STE */ + mlx5dr_ste_prepare_for_postsend(dmn->ste_ctx, + data + (j * DR_STE_SIZE), + DR_STE_SIZE); + } + } + + send_info.write.addr = (uintptr_t)data; + send_info.write.length = byte_size; + send_info.write.lkey = 0; + send_info.remote_addr = + mlx5dr_ste_get_mr_addr(htbl->chunk->ste_arr + ste_index); + send_info.rkey = mlx5dr_icm_pool_get_chunk_rkey(htbl->chunk); + + ret = dr_postsend_icm_data(dmn, &send_info); + if (ret) + goto out_free; + } + +out_free: + kvfree(data); + return ret; +} + +/* Initialize htble with default STEs */ +int mlx5dr_send_postsend_formatted_htbl(struct mlx5dr_domain *dmn, + struct mlx5dr_ste_htbl *htbl, + u8 *ste_init_data, + bool update_hw_ste) +{ + u32 byte_size = mlx5dr_icm_pool_get_chunk_byte_size(htbl->chunk); + int iterations; + int num_stes; + u8 *copy_dst; + u8 *data; + int ret; + int i; + + ret = dr_get_tbl_copy_details(dmn, htbl, &data, &byte_size, + &iterations, &num_stes); + if (ret) + return ret; + + if (update_hw_ste) { + /* Copy the reduced STE to hash table ste_arr */ + for (i = 0; i < num_stes; i++) { + copy_dst = htbl->chunk->hw_ste_arr + i * DR_STE_SIZE_REDUCED; + memcpy(copy_dst, ste_init_data, DR_STE_SIZE_REDUCED); + } + } + + mlx5dr_ste_prepare_for_postsend(dmn->ste_ctx, ste_init_data, DR_STE_SIZE); + + /* Copy the same STE on the data buffer */ + for (i = 0; i < num_stes; i++) { + copy_dst = data + i * DR_STE_SIZE; + memcpy(copy_dst, ste_init_data, DR_STE_SIZE); + } + + /* Send the data iteration times */ + for (i = 0; i < iterations; i++) { + u8 ste_index = i * (byte_size / DR_STE_SIZE); + struct postsend_info send_info = {}; + + send_info.write.addr = (uintptr_t)data; + send_info.write.length = byte_size; + send_info.write.lkey = 0; + send_info.remote_addr = + mlx5dr_ste_get_mr_addr(htbl->chunk->ste_arr + ste_index); + send_info.rkey = mlx5dr_icm_pool_get_chunk_rkey(htbl->chunk); + + ret = dr_postsend_icm_data(dmn, &send_info); + if (ret) + goto out_free; + } + +out_free: + kvfree(data); + return ret; +} + +int mlx5dr_send_postsend_action(struct mlx5dr_domain *dmn, + struct mlx5dr_action *action) +{ + struct postsend_info send_info = {}; + int ret; + + send_info.write.addr = (uintptr_t)action->rewrite->data; + send_info.write.length = action->rewrite->num_of_actions * + DR_MODIFY_ACTION_SIZE; + send_info.write.lkey = 0; + send_info.remote_addr = + mlx5dr_icm_pool_get_chunk_mr_addr(action->rewrite->chunk); + send_info.rkey = mlx5dr_icm_pool_get_chunk_rkey(action->rewrite->chunk); + + ret = dr_postsend_icm_data(dmn, &send_info); + + return ret; +} + +static int dr_modify_qp_rst2init(struct mlx5_core_dev *mdev, + struct mlx5dr_qp *dr_qp, + int port) +{ + u32 in[MLX5_ST_SZ_DW(rst2init_qp_in)] = {}; + void *qpc; + + qpc = MLX5_ADDR_OF(rst2init_qp_in, in, qpc); + + MLX5_SET(qpc, qpc, primary_address_path.vhca_port_num, port); + MLX5_SET(qpc, qpc, pm_state, MLX5_QPC_PM_STATE_MIGRATED); + MLX5_SET(qpc, qpc, rre, 1); + MLX5_SET(qpc, qpc, rwe, 1); + + MLX5_SET(rst2init_qp_in, in, opcode, MLX5_CMD_OP_RST2INIT_QP); + MLX5_SET(rst2init_qp_in, in, qpn, dr_qp->qpn); + + return mlx5_cmd_exec_in(mdev, rst2init_qp, in); +} + +static int dr_cmd_modify_qp_rtr2rts(struct mlx5_core_dev *mdev, + struct mlx5dr_qp *dr_qp, + struct dr_qp_rts_attr *attr) +{ + u32 in[MLX5_ST_SZ_DW(rtr2rts_qp_in)] = {}; + void *qpc; + + qpc = MLX5_ADDR_OF(rtr2rts_qp_in, in, qpc); + + MLX5_SET(rtr2rts_qp_in, in, qpn, dr_qp->qpn); + + MLX5_SET(qpc, qpc, retry_count, attr->retry_cnt); + MLX5_SET(qpc, qpc, rnr_retry, attr->rnr_retry); + MLX5_SET(qpc, qpc, primary_address_path.ack_timeout, 0x8); /* ~1ms */ + + MLX5_SET(rtr2rts_qp_in, in, opcode, MLX5_CMD_OP_RTR2RTS_QP); + MLX5_SET(rtr2rts_qp_in, in, qpn, dr_qp->qpn); + + return mlx5_cmd_exec_in(mdev, rtr2rts_qp, in); +} + +static int dr_cmd_modify_qp_init2rtr(struct mlx5_core_dev *mdev, + struct mlx5dr_qp *dr_qp, + struct dr_qp_rtr_attr *attr) +{ + u32 in[MLX5_ST_SZ_DW(init2rtr_qp_in)] = {}; + void *qpc; + + qpc = MLX5_ADDR_OF(init2rtr_qp_in, in, qpc); + + MLX5_SET(init2rtr_qp_in, in, qpn, dr_qp->qpn); + + MLX5_SET(qpc, qpc, mtu, attr->mtu); + MLX5_SET(qpc, qpc, log_msg_max, DR_CHUNK_SIZE_MAX - 1); + MLX5_SET(qpc, qpc, remote_qpn, attr->qp_num); + memcpy(MLX5_ADDR_OF(qpc, qpc, primary_address_path.rmac_47_32), + attr->dgid_attr.mac, sizeof(attr->dgid_attr.mac)); + memcpy(MLX5_ADDR_OF(qpc, qpc, primary_address_path.rgid_rip), + attr->dgid_attr.gid, sizeof(attr->dgid_attr.gid)); + MLX5_SET(qpc, qpc, primary_address_path.src_addr_index, + attr->sgid_index); + + if (attr->dgid_attr.roce_ver == MLX5_ROCE_VERSION_2) + MLX5_SET(qpc, qpc, primary_address_path.udp_sport, + attr->udp_src_port); + + MLX5_SET(qpc, qpc, primary_address_path.vhca_port_num, attr->port_num); + MLX5_SET(qpc, qpc, primary_address_path.fl, attr->fl); + MLX5_SET(qpc, qpc, min_rnr_nak, 1); + + MLX5_SET(init2rtr_qp_in, in, opcode, MLX5_CMD_OP_INIT2RTR_QP); + MLX5_SET(init2rtr_qp_in, in, qpn, dr_qp->qpn); + + return mlx5_cmd_exec_in(mdev, init2rtr_qp, in); +} + +static bool dr_send_allow_fl(struct mlx5dr_cmd_caps *caps) +{ + /* Check whether RC RoCE QP creation with force loopback is allowed. + * There are two separate capability bits for this: + * - force loopback when RoCE is enabled + * - force loopback when RoCE is disabled + */ + return ((caps->roce_caps.roce_en && + caps->roce_caps.fl_rc_qp_when_roce_enabled) || + (!caps->roce_caps.roce_en && + caps->roce_caps.fl_rc_qp_when_roce_disabled)); +} + +static int dr_prepare_qp_to_rts(struct mlx5dr_domain *dmn) +{ + struct mlx5dr_qp *dr_qp = dmn->send_ring->qp; + struct dr_qp_rts_attr rts_attr = {}; + struct dr_qp_rtr_attr rtr_attr = {}; + enum ib_mtu mtu = IB_MTU_1024; + u16 gid_index = 0; + int port = 1; + int ret; + + /* Init */ + ret = dr_modify_qp_rst2init(dmn->mdev, dr_qp, port); + if (ret) { + mlx5dr_err(dmn, "Failed modify QP rst2init\n"); + return ret; + } + + /* RTR */ + rtr_attr.mtu = mtu; + rtr_attr.qp_num = dr_qp->qpn; + rtr_attr.min_rnr_timer = 12; + rtr_attr.port_num = port; + rtr_attr.udp_src_port = dmn->info.caps.roce_min_src_udp; + + /* If QP creation with force loopback is allowed, then there + * is no need for GID index when creating the QP. + * Otherwise we query GID attributes and use GID index. + */ + rtr_attr.fl = dr_send_allow_fl(&dmn->info.caps); + if (!rtr_attr.fl) { + ret = mlx5dr_cmd_query_gid(dmn->mdev, port, gid_index, + &rtr_attr.dgid_attr); + if (ret) + return ret; + + rtr_attr.sgid_index = gid_index; + } + + ret = dr_cmd_modify_qp_init2rtr(dmn->mdev, dr_qp, &rtr_attr); + if (ret) { + mlx5dr_err(dmn, "Failed modify QP init2rtr\n"); + return ret; + } + + /* RTS */ + rts_attr.timeout = 14; + rts_attr.retry_cnt = 7; + rts_attr.rnr_retry = 7; + + ret = dr_cmd_modify_qp_rtr2rts(dmn->mdev, dr_qp, &rts_attr); + if (ret) { + mlx5dr_err(dmn, "Failed modify QP rtr2rts\n"); + return ret; + } + + return 0; +} + +static void dr_cq_complete(struct mlx5_core_cq *mcq, + struct mlx5_eqe *eqe) +{ + pr_err("CQ completion CQ: #%u\n", mcq->cqn); +} + +static struct mlx5dr_cq *dr_create_cq(struct mlx5_core_dev *mdev, + struct mlx5_uars_page *uar, + size_t ncqe) +{ + u32 temp_cqc[MLX5_ST_SZ_DW(cqc)] = {}; + u32 out[MLX5_ST_SZ_DW(create_cq_out)]; + struct mlx5_wq_param wqp; + struct mlx5_cqe64 *cqe; + struct mlx5dr_cq *cq; + int inlen, err, eqn; + void *cqc, *in; + __be64 *pas; + int vector; + u32 i; + + cq = kzalloc(sizeof(*cq), GFP_KERNEL); + if (!cq) + return NULL; + + ncqe = roundup_pow_of_two(ncqe); + MLX5_SET(cqc, temp_cqc, log_cq_size, ilog2(ncqe)); + + wqp.buf_numa_node = mdev->priv.numa_node; + wqp.db_numa_node = mdev->priv.numa_node; + + err = mlx5_cqwq_create(mdev, &wqp, temp_cqc, &cq->wq, + &cq->wq_ctrl); + if (err) + goto out; + + for (i = 0; i < mlx5_cqwq_get_size(&cq->wq); i++) { + cqe = mlx5_cqwq_get_wqe(&cq->wq, i); + cqe->op_own = MLX5_CQE_INVALID << 4 | MLX5_CQE_OWNER_MASK; + } + + inlen = MLX5_ST_SZ_BYTES(create_cq_in) + + sizeof(u64) * cq->wq_ctrl.buf.npages; + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + goto err_cqwq; + + vector = raw_smp_processor_id() % mlx5_comp_vectors_count(mdev); + err = mlx5_vector2eqn(mdev, vector, &eqn); + if (err) { + kvfree(in); + goto err_cqwq; + } + + cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context); + MLX5_SET(cqc, cqc, log_cq_size, ilog2(ncqe)); + MLX5_SET(cqc, cqc, c_eqn_or_apu_element, eqn); + MLX5_SET(cqc, cqc, uar_page, uar->index); + MLX5_SET(cqc, cqc, log_page_size, cq->wq_ctrl.buf.page_shift - + MLX5_ADAPTER_PAGE_SHIFT); + MLX5_SET64(cqc, cqc, dbr_addr, cq->wq_ctrl.db.dma); + + pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas); + mlx5_fill_page_frag_array(&cq->wq_ctrl.buf, pas); + + cq->mcq.comp = dr_cq_complete; + + err = mlx5_core_create_cq(mdev, &cq->mcq, in, inlen, out, sizeof(out)); + kvfree(in); + + if (err) + goto err_cqwq; + + cq->mcq.cqe_sz = 64; + cq->mcq.set_ci_db = cq->wq_ctrl.db.db; + cq->mcq.arm_db = cq->wq_ctrl.db.db + 1; + *cq->mcq.set_ci_db = 0; + + /* set no-zero value, in order to avoid the HW to run db-recovery on + * CQ that used in polling mode. + */ + *cq->mcq.arm_db = cpu_to_be32(2 << 28); + + cq->mcq.vector = 0; + cq->mcq.uar = uar; + + return cq; + +err_cqwq: + mlx5_wq_destroy(&cq->wq_ctrl); +out: + kfree(cq); + return NULL; +} + +static void dr_destroy_cq(struct mlx5_core_dev *mdev, struct mlx5dr_cq *cq) +{ + mlx5_core_destroy_cq(mdev, &cq->mcq); + mlx5_wq_destroy(&cq->wq_ctrl); + kfree(cq); +} + +static int dr_create_mkey(struct mlx5_core_dev *mdev, u32 pdn, u32 *mkey) +{ + u32 in[MLX5_ST_SZ_DW(create_mkey_in)] = {}; + void *mkc; + + mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); + MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_PA); + MLX5_SET(mkc, mkc, a, 1); + MLX5_SET(mkc, mkc, rw, 1); + MLX5_SET(mkc, mkc, rr, 1); + MLX5_SET(mkc, mkc, lw, 1); + MLX5_SET(mkc, mkc, lr, 1); + + MLX5_SET(mkc, mkc, pd, pdn); + MLX5_SET(mkc, mkc, length64, 1); + MLX5_SET(mkc, mkc, qpn, 0xffffff); + + return mlx5_core_create_mkey(mdev, mkey, in, sizeof(in)); +} + +static struct mlx5dr_mr *dr_reg_mr(struct mlx5_core_dev *mdev, + u32 pdn, void *buf, size_t size) +{ + struct mlx5dr_mr *mr = kzalloc(sizeof(*mr), GFP_KERNEL); + struct device *dma_device; + dma_addr_t dma_addr; + int err; + + if (!mr) + return NULL; + + dma_device = mlx5_core_dma_dev(mdev); + dma_addr = dma_map_single(dma_device, buf, size, + DMA_BIDIRECTIONAL); + err = dma_mapping_error(dma_device, dma_addr); + if (err) { + mlx5_core_warn(mdev, "Can't dma buf\n"); + kfree(mr); + return NULL; + } + + err = dr_create_mkey(mdev, pdn, &mr->mkey); + if (err) { + mlx5_core_warn(mdev, "Can't create mkey\n"); + dma_unmap_single(dma_device, dma_addr, size, + DMA_BIDIRECTIONAL); + kfree(mr); + return NULL; + } + + mr->dma_addr = dma_addr; + mr->size = size; + mr->addr = buf; + + return mr; +} + +static void dr_dereg_mr(struct mlx5_core_dev *mdev, struct mlx5dr_mr *mr) +{ + mlx5_core_destroy_mkey(mdev, mr->mkey); + dma_unmap_single(mlx5_core_dma_dev(mdev), mr->dma_addr, mr->size, + DMA_BIDIRECTIONAL); + kfree(mr); +} + +int mlx5dr_send_ring_alloc(struct mlx5dr_domain *dmn) +{ + struct dr_qp_init_attr init_attr = {}; + int cq_size; + int size; + int ret; + + dmn->send_ring = kzalloc(sizeof(*dmn->send_ring), GFP_KERNEL); + if (!dmn->send_ring) + return -ENOMEM; + + cq_size = QUEUE_SIZE + 1; + dmn->send_ring->cq = dr_create_cq(dmn->mdev, dmn->uar, cq_size); + if (!dmn->send_ring->cq) { + mlx5dr_err(dmn, "Failed creating CQ\n"); + ret = -ENOMEM; + goto free_send_ring; + } + + init_attr.cqn = dmn->send_ring->cq->mcq.cqn; + init_attr.pdn = dmn->pdn; + init_attr.uar = dmn->uar; + init_attr.max_send_wr = QUEUE_SIZE; + + /* Isolated VL is applicable only if force loopback is supported */ + if (dr_send_allow_fl(&dmn->info.caps)) + init_attr.isolate_vl_tc = dmn->info.caps.isolate_vl_tc; + + spin_lock_init(&dmn->send_ring->lock); + + dmn->send_ring->qp = dr_create_rc_qp(dmn->mdev, &init_attr); + if (!dmn->send_ring->qp) { + mlx5dr_err(dmn, "Failed creating QP\n"); + ret = -ENOMEM; + goto clean_cq; + } + + dmn->send_ring->cq->qp = dmn->send_ring->qp; + + dmn->info.max_send_wr = QUEUE_SIZE; + dmn->info.max_inline_size = min(dmn->send_ring->qp->max_inline_data, + DR_STE_SIZE); + + dmn->send_ring->signal_th = dmn->info.max_send_wr / + SIGNAL_PER_DIV_QUEUE; + + /* Prepare qp to be used */ + ret = dr_prepare_qp_to_rts(dmn); + if (ret) + goto clean_qp; + + dmn->send_ring->max_post_send_size = + mlx5dr_icm_pool_chunk_size_to_byte(DR_CHUNK_SIZE_1K, + DR_ICM_TYPE_STE); + + /* Allocating the max size as a buffer for writing */ + size = dmn->send_ring->signal_th * dmn->send_ring->max_post_send_size; + dmn->send_ring->buf = kzalloc(size, GFP_KERNEL); + if (!dmn->send_ring->buf) { + ret = -ENOMEM; + goto clean_qp; + } + + dmn->send_ring->buf_size = size; + + dmn->send_ring->mr = dr_reg_mr(dmn->mdev, + dmn->pdn, dmn->send_ring->buf, size); + if (!dmn->send_ring->mr) { + ret = -ENOMEM; + goto free_mem; + } + + dmn->send_ring->sync_mr = dr_reg_mr(dmn->mdev, + dmn->pdn, dmn->send_ring->sync_buff, + MIN_READ_SYNC); + if (!dmn->send_ring->sync_mr) { + ret = -ENOMEM; + goto clean_mr; + } + + return 0; + +clean_mr: + dr_dereg_mr(dmn->mdev, dmn->send_ring->mr); +free_mem: + kfree(dmn->send_ring->buf); +clean_qp: + dr_destroy_qp(dmn->mdev, dmn->send_ring->qp); +clean_cq: + dr_destroy_cq(dmn->mdev, dmn->send_ring->cq); +free_send_ring: + kfree(dmn->send_ring); + + return ret; +} + +void mlx5dr_send_ring_free(struct mlx5dr_domain *dmn, + struct mlx5dr_send_ring *send_ring) +{ + dr_destroy_qp(dmn->mdev, send_ring->qp); + dr_destroy_cq(dmn->mdev, send_ring->cq); + dr_dereg_mr(dmn->mdev, send_ring->sync_mr); + dr_dereg_mr(dmn->mdev, send_ring->mr); + kfree(send_ring->buf); + kfree(send_ring); +} + +int mlx5dr_send_ring_force_drain(struct mlx5dr_domain *dmn) +{ + struct mlx5dr_send_ring *send_ring = dmn->send_ring; + struct postsend_info send_info = {}; + u8 data[DR_STE_SIZE]; + int num_of_sends_req; + int ret; + int i; + + /* Sending this amount of requests makes sure we will get drain */ + num_of_sends_req = send_ring->signal_th * TH_NUMS_TO_DRAIN / 2; + + /* Send fake requests forcing the last to be signaled */ + send_info.write.addr = (uintptr_t)data; + send_info.write.length = DR_STE_SIZE; + send_info.write.lkey = 0; + /* Using the sync_mr in order to write/read */ + send_info.remote_addr = (uintptr_t)send_ring->sync_mr->addr; + send_info.rkey = send_ring->sync_mr->mkey; + + for (i = 0; i < num_of_sends_req; i++) { + ret = dr_postsend_icm_data(dmn, &send_info); + if (ret) + return ret; + } + + spin_lock(&send_ring->lock); + ret = dr_handle_pending_wc(dmn, send_ring); + spin_unlock(&send_ring->lock); + + return ret; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c new file mode 100644 index 000000000..7815a629d --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c @@ -0,0 +1,1390 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2019 Mellanox Technologies. */ + +#include <linux/types.h> +#include <linux/crc32.h> +#include "dr_ste.h" + +struct dr_hw_ste_format { + u8 ctrl[DR_STE_SIZE_CTRL]; + u8 tag[DR_STE_SIZE_TAG]; + u8 mask[DR_STE_SIZE_MASK]; +}; + +static u32 dr_ste_crc32_calc(const void *input_data, size_t length) +{ + u32 crc = crc32(0, input_data, length); + + return (__force u32)((crc >> 24) & 0xff) | ((crc << 8) & 0xff0000) | + ((crc >> 8) & 0xff00) | ((crc << 24) & 0xff000000); +} + +bool mlx5dr_ste_supp_ttl_cs_recalc(struct mlx5dr_cmd_caps *caps) +{ + return caps->sw_format_ver > MLX5_STEERING_FORMAT_CONNECTX_5; +} + +u32 mlx5dr_ste_calc_hash_index(u8 *hw_ste_p, struct mlx5dr_ste_htbl *htbl) +{ + u32 num_entries = mlx5dr_icm_pool_get_chunk_num_of_entries(htbl->chunk); + struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p; + u8 masked[DR_STE_SIZE_TAG] = {}; + u32 crc32, index; + u16 bit; + int i; + + /* Don't calculate CRC if the result is predicted */ + if (num_entries == 1 || htbl->byte_mask == 0) + return 0; + + /* Mask tag using byte mask, bit per byte */ + bit = 1 << (DR_STE_SIZE_TAG - 1); + for (i = 0; i < DR_STE_SIZE_TAG; i++) { + if (htbl->byte_mask & bit) + masked[i] = hw_ste->tag[i]; + + bit = bit >> 1; + } + + crc32 = dr_ste_crc32_calc(masked, DR_STE_SIZE_TAG); + index = crc32 & (num_entries - 1); + + return index; +} + +u16 mlx5dr_ste_conv_bit_to_byte_mask(u8 *bit_mask) +{ + u16 byte_mask = 0; + int i; + + for (i = 0; i < DR_STE_SIZE_MASK; i++) { + byte_mask = byte_mask << 1; + if (bit_mask[i] == 0xff) + byte_mask |= 1; + } + return byte_mask; +} + +static u8 *dr_ste_get_tag(u8 *hw_ste_p) +{ + struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p; + + return hw_ste->tag; +} + +void mlx5dr_ste_set_bit_mask(u8 *hw_ste_p, u8 *bit_mask) +{ + struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p; + + memcpy(hw_ste->mask, bit_mask, DR_STE_SIZE_MASK); +} + +static void dr_ste_set_always_hit(struct dr_hw_ste_format *hw_ste) +{ + memset(&hw_ste->tag, 0, sizeof(hw_ste->tag)); + memset(&hw_ste->mask, 0, sizeof(hw_ste->mask)); +} + +static void dr_ste_set_always_miss(struct dr_hw_ste_format *hw_ste) +{ + hw_ste->tag[0] = 0xdc; + hw_ste->mask[0] = 0; +} + +void mlx5dr_ste_set_miss_addr(struct mlx5dr_ste_ctx *ste_ctx, + u8 *hw_ste_p, u64 miss_addr) +{ + ste_ctx->set_miss_addr(hw_ste_p, miss_addr); +} + +static void dr_ste_always_miss_addr(struct mlx5dr_ste_ctx *ste_ctx, + u8 *hw_ste, u64 miss_addr) +{ + ste_ctx->set_next_lu_type(hw_ste, MLX5DR_STE_LU_TYPE_DONT_CARE); + ste_ctx->set_miss_addr(hw_ste, miss_addr); + dr_ste_set_always_miss((struct dr_hw_ste_format *)hw_ste); +} + +void mlx5dr_ste_set_hit_addr(struct mlx5dr_ste_ctx *ste_ctx, + u8 *hw_ste, u64 icm_addr, u32 ht_size) +{ + ste_ctx->set_hit_addr(hw_ste, icm_addr, ht_size); +} + +u64 mlx5dr_ste_get_icm_addr(struct mlx5dr_ste *ste) +{ + u64 base_icm_addr = mlx5dr_icm_pool_get_chunk_icm_addr(ste->htbl->chunk); + u32 index = ste - ste->htbl->chunk->ste_arr; + + return base_icm_addr + DR_STE_SIZE * index; +} + +u64 mlx5dr_ste_get_mr_addr(struct mlx5dr_ste *ste) +{ + u32 index = ste - ste->htbl->chunk->ste_arr; + + return mlx5dr_icm_pool_get_chunk_mr_addr(ste->htbl->chunk) + DR_STE_SIZE * index; +} + +u8 *mlx5dr_ste_get_hw_ste(struct mlx5dr_ste *ste) +{ + u64 index = ste - ste->htbl->chunk->ste_arr; + + return ste->htbl->chunk->hw_ste_arr + DR_STE_SIZE_REDUCED * index; +} + +struct list_head *mlx5dr_ste_get_miss_list(struct mlx5dr_ste *ste) +{ + u32 index = ste - ste->htbl->chunk->ste_arr; + + return &ste->htbl->chunk->miss_list[index]; +} + +static void dr_ste_always_hit_htbl(struct mlx5dr_ste_ctx *ste_ctx, + u8 *hw_ste, + struct mlx5dr_ste_htbl *next_htbl) +{ + struct mlx5dr_icm_chunk *chunk = next_htbl->chunk; + + ste_ctx->set_byte_mask(hw_ste, next_htbl->byte_mask); + ste_ctx->set_next_lu_type(hw_ste, next_htbl->lu_type); + ste_ctx->set_hit_addr(hw_ste, mlx5dr_icm_pool_get_chunk_icm_addr(chunk), + mlx5dr_icm_pool_get_chunk_num_of_entries(chunk)); + + dr_ste_set_always_hit((struct dr_hw_ste_format *)hw_ste); +} + +bool mlx5dr_ste_is_last_in_rule(struct mlx5dr_matcher_rx_tx *nic_matcher, + u8 ste_location) +{ + return ste_location == nic_matcher->num_of_builders; +} + +/* Replace relevant fields, except of: + * htbl - keep the origin htbl + * miss_list + list - already took the src from the list. + * icm_addr/mr_addr - depends on the hosting table. + * + * Before: + * | a | -> | b | -> | c | -> + * + * After: + * | a | -> | c | -> + * While the data that was in b copied to a. + */ +static void dr_ste_replace(struct mlx5dr_ste *dst, struct mlx5dr_ste *src) +{ + memcpy(mlx5dr_ste_get_hw_ste(dst), mlx5dr_ste_get_hw_ste(src), + DR_STE_SIZE_REDUCED); + dst->next_htbl = src->next_htbl; + if (dst->next_htbl) + dst->next_htbl->pointing_ste = dst; + + dst->refcount = src->refcount; +} + +/* Free ste which is the head and the only one in miss_list */ +static void +dr_ste_remove_head_ste(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste *ste, + struct mlx5dr_matcher_rx_tx *nic_matcher, + struct mlx5dr_ste_send_info *ste_info_head, + struct list_head *send_ste_list, + struct mlx5dr_ste_htbl *stats_tbl) +{ + u8 tmp_data_ste[DR_STE_SIZE] = {}; + u64 miss_addr; + + miss_addr = mlx5dr_icm_pool_get_chunk_icm_addr(nic_matcher->e_anchor->chunk); + + /* Use temp ste because dr_ste_always_miss_addr + * touches bit_mask area which doesn't exist at ste->hw_ste. + * Need to use a full-sized (DR_STE_SIZE) hw_ste. + */ + memcpy(tmp_data_ste, mlx5dr_ste_get_hw_ste(ste), DR_STE_SIZE_REDUCED); + dr_ste_always_miss_addr(ste_ctx, tmp_data_ste, miss_addr); + memcpy(mlx5dr_ste_get_hw_ste(ste), tmp_data_ste, DR_STE_SIZE_REDUCED); + + list_del_init(&ste->miss_list_node); + + /* Write full STE size in order to have "always_miss" */ + mlx5dr_send_fill_and_append_ste_send_info(ste, DR_STE_SIZE, + 0, tmp_data_ste, + ste_info_head, + send_ste_list, + true /* Copy data */); + + stats_tbl->ctrl.num_of_valid_entries--; +} + +/* Free ste which is the head but NOT the only one in miss_list: + * |_ste_| --> |_next_ste_| -->|__| -->|__| -->/0 + */ +static void +dr_ste_replace_head_ste(struct mlx5dr_matcher_rx_tx *nic_matcher, + struct mlx5dr_ste *ste, + struct mlx5dr_ste *next_ste, + struct mlx5dr_ste_send_info *ste_info_head, + struct list_head *send_ste_list, + struct mlx5dr_ste_htbl *stats_tbl) + +{ + struct mlx5dr_ste_htbl *next_miss_htbl; + u8 hw_ste[DR_STE_SIZE] = {}; + int sb_idx; + + next_miss_htbl = next_ste->htbl; + + /* Remove from the miss_list the next_ste before copy */ + list_del_init(&next_ste->miss_list_node); + + /* Move data from next into ste */ + dr_ste_replace(ste, next_ste); + + /* Update the rule on STE change */ + mlx5dr_rule_set_last_member(next_ste->rule_rx_tx, ste, false); + + /* Copy all 64 hw_ste bytes */ + memcpy(hw_ste, mlx5dr_ste_get_hw_ste(ste), DR_STE_SIZE_REDUCED); + sb_idx = ste->ste_chain_location - 1; + mlx5dr_ste_set_bit_mask(hw_ste, + nic_matcher->ste_builder[sb_idx].bit_mask); + + /* Del the htbl that contains the next_ste. + * The origin htbl stay with the same number of entries. + */ + mlx5dr_htbl_put(next_miss_htbl); + + mlx5dr_send_fill_and_append_ste_send_info(ste, DR_STE_SIZE, + 0, hw_ste, + ste_info_head, + send_ste_list, + true /* Copy data */); + + stats_tbl->ctrl.num_of_collisions--; + stats_tbl->ctrl.num_of_valid_entries--; +} + +/* Free ste that is located in the middle of the miss list: + * |__| -->|_prev_ste_|->|_ste_|-->|_next_ste_| + */ +static void dr_ste_remove_middle_ste(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste *ste, + struct mlx5dr_ste_send_info *ste_info, + struct list_head *send_ste_list, + struct mlx5dr_ste_htbl *stats_tbl) +{ + struct mlx5dr_ste *prev_ste; + u64 miss_addr; + + prev_ste = list_prev_entry(ste, miss_list_node); + if (WARN_ON(!prev_ste)) + return; + + miss_addr = ste_ctx->get_miss_addr(mlx5dr_ste_get_hw_ste(ste)); + ste_ctx->set_miss_addr(mlx5dr_ste_get_hw_ste(prev_ste), miss_addr); + + mlx5dr_send_fill_and_append_ste_send_info(prev_ste, DR_STE_SIZE_CTRL, 0, + mlx5dr_ste_get_hw_ste(prev_ste), + ste_info, send_ste_list, + true /* Copy data*/); + + list_del_init(&ste->miss_list_node); + + stats_tbl->ctrl.num_of_valid_entries--; + stats_tbl->ctrl.num_of_collisions--; +} + +void mlx5dr_ste_free(struct mlx5dr_ste *ste, + struct mlx5dr_matcher *matcher, + struct mlx5dr_matcher_rx_tx *nic_matcher) +{ + struct mlx5dr_ste_send_info *cur_ste_info, *tmp_ste_info; + struct mlx5dr_domain *dmn = matcher->tbl->dmn; + struct mlx5dr_ste_ctx *ste_ctx = dmn->ste_ctx; + struct mlx5dr_ste_send_info ste_info_head; + struct mlx5dr_ste *next_ste, *first_ste; + bool put_on_origin_table = true; + struct mlx5dr_ste_htbl *stats_tbl; + LIST_HEAD(send_ste_list); + + first_ste = list_first_entry(mlx5dr_ste_get_miss_list(ste), + struct mlx5dr_ste, miss_list_node); + stats_tbl = first_ste->htbl; + + /* Two options: + * 1. ste is head: + * a. head ste is the only ste in the miss list + * b. head ste is not the only ste in the miss-list + * 2. ste is not head + */ + if (first_ste == ste) { /* Ste is the head */ + struct mlx5dr_ste *last_ste; + + last_ste = list_last_entry(mlx5dr_ste_get_miss_list(ste), + struct mlx5dr_ste, miss_list_node); + if (last_ste == first_ste) + next_ste = NULL; + else + next_ste = list_next_entry(ste, miss_list_node); + + if (!next_ste) { + /* One and only entry in the list */ + dr_ste_remove_head_ste(ste_ctx, ste, + nic_matcher, + &ste_info_head, + &send_ste_list, + stats_tbl); + } else { + /* First but not only entry in the list */ + dr_ste_replace_head_ste(nic_matcher, ste, + next_ste, &ste_info_head, + &send_ste_list, stats_tbl); + put_on_origin_table = false; + } + } else { /* Ste in the middle of the list */ + dr_ste_remove_middle_ste(ste_ctx, ste, + &ste_info_head, &send_ste_list, + stats_tbl); + } + + /* Update HW */ + list_for_each_entry_safe(cur_ste_info, tmp_ste_info, + &send_ste_list, send_list) { + list_del(&cur_ste_info->send_list); + mlx5dr_send_postsend_ste(dmn, cur_ste_info->ste, + cur_ste_info->data, cur_ste_info->size, + cur_ste_info->offset); + } + + if (put_on_origin_table) + mlx5dr_htbl_put(ste->htbl); +} + +bool mlx5dr_ste_equal_tag(void *src, void *dst) +{ + struct dr_hw_ste_format *s_hw_ste = (struct dr_hw_ste_format *)src; + struct dr_hw_ste_format *d_hw_ste = (struct dr_hw_ste_format *)dst; + + return !memcmp(s_hw_ste->tag, d_hw_ste->tag, DR_STE_SIZE_TAG); +} + +void mlx5dr_ste_set_hit_addr_by_next_htbl(struct mlx5dr_ste_ctx *ste_ctx, + u8 *hw_ste, + struct mlx5dr_ste_htbl *next_htbl) +{ + u64 icm_addr = mlx5dr_icm_pool_get_chunk_icm_addr(next_htbl->chunk); + u32 num_entries = + mlx5dr_icm_pool_get_chunk_num_of_entries(next_htbl->chunk); + + ste_ctx->set_hit_addr(hw_ste, icm_addr, num_entries); +} + +void mlx5dr_ste_prepare_for_postsend(struct mlx5dr_ste_ctx *ste_ctx, + u8 *hw_ste_p, u32 ste_size) +{ + if (ste_ctx->prepare_for_postsend) + ste_ctx->prepare_for_postsend(hw_ste_p, ste_size); +} + +/* Init one ste as a pattern for ste data array */ +void mlx5dr_ste_set_formatted_ste(struct mlx5dr_ste_ctx *ste_ctx, + u16 gvmi, + enum mlx5dr_domain_nic_type nic_type, + struct mlx5dr_ste_htbl *htbl, + u8 *formatted_ste, + struct mlx5dr_htbl_connect_info *connect_info) +{ + bool is_rx = nic_type == DR_DOMAIN_NIC_TYPE_RX; + u8 tmp_hw_ste[DR_STE_SIZE] = {0}; + + ste_ctx->ste_init(formatted_ste, htbl->lu_type, is_rx, gvmi); + + /* Use temp ste because dr_ste_always_miss_addr/hit_htbl + * touches bit_mask area which doesn't exist at ste->hw_ste. + * Need to use a full-sized (DR_STE_SIZE) hw_ste. + */ + memcpy(tmp_hw_ste, formatted_ste, DR_STE_SIZE_REDUCED); + if (connect_info->type == CONNECT_HIT) + dr_ste_always_hit_htbl(ste_ctx, tmp_hw_ste, + connect_info->hit_next_htbl); + else + dr_ste_always_miss_addr(ste_ctx, tmp_hw_ste, + connect_info->miss_icm_addr); + memcpy(formatted_ste, tmp_hw_ste, DR_STE_SIZE_REDUCED); +} + +int mlx5dr_ste_htbl_init_and_postsend(struct mlx5dr_domain *dmn, + struct mlx5dr_domain_rx_tx *nic_dmn, + struct mlx5dr_ste_htbl *htbl, + struct mlx5dr_htbl_connect_info *connect_info, + bool update_hw_ste) +{ + u8 formatted_ste[DR_STE_SIZE] = {}; + + mlx5dr_ste_set_formatted_ste(dmn->ste_ctx, + dmn->info.caps.gvmi, + nic_dmn->type, + htbl, + formatted_ste, + connect_info); + + return mlx5dr_send_postsend_formatted_htbl(dmn, htbl, formatted_ste, update_hw_ste); +} + +int mlx5dr_ste_create_next_htbl(struct mlx5dr_matcher *matcher, + struct mlx5dr_matcher_rx_tx *nic_matcher, + struct mlx5dr_ste *ste, + u8 *cur_hw_ste, + enum mlx5dr_icm_chunk_size log_table_size) +{ + struct mlx5dr_domain_rx_tx *nic_dmn = nic_matcher->nic_tbl->nic_dmn; + struct mlx5dr_domain *dmn = matcher->tbl->dmn; + struct mlx5dr_ste_ctx *ste_ctx = dmn->ste_ctx; + struct mlx5dr_htbl_connect_info info; + struct mlx5dr_ste_htbl *next_htbl; + + if (!mlx5dr_ste_is_last_in_rule(nic_matcher, ste->ste_chain_location)) { + u16 next_lu_type; + u16 byte_mask; + + next_lu_type = ste_ctx->get_next_lu_type(cur_hw_ste); + byte_mask = ste_ctx->get_byte_mask(cur_hw_ste); + + next_htbl = mlx5dr_ste_htbl_alloc(dmn->ste_icm_pool, + log_table_size, + next_lu_type, + byte_mask); + if (!next_htbl) { + mlx5dr_dbg(dmn, "Failed allocating table\n"); + return -ENOMEM; + } + + /* Write new table to HW */ + info.type = CONNECT_MISS; + info.miss_icm_addr = + mlx5dr_icm_pool_get_chunk_icm_addr(nic_matcher->e_anchor->chunk); + if (mlx5dr_ste_htbl_init_and_postsend(dmn, nic_dmn, next_htbl, + &info, false)) { + mlx5dr_info(dmn, "Failed writing table to HW\n"); + goto free_table; + } + + mlx5dr_ste_set_hit_addr_by_next_htbl(ste_ctx, + cur_hw_ste, next_htbl); + ste->next_htbl = next_htbl; + next_htbl->pointing_ste = ste; + } + + return 0; + +free_table: + mlx5dr_ste_htbl_free(next_htbl); + return -ENOENT; +} + +struct mlx5dr_ste_htbl *mlx5dr_ste_htbl_alloc(struct mlx5dr_icm_pool *pool, + enum mlx5dr_icm_chunk_size chunk_size, + u16 lu_type, u16 byte_mask) +{ + struct mlx5dr_icm_chunk *chunk; + struct mlx5dr_ste_htbl *htbl; + u32 num_entries; + int i; + + htbl = kzalloc(sizeof(*htbl), GFP_KERNEL); + if (!htbl) + return NULL; + + chunk = mlx5dr_icm_alloc_chunk(pool, chunk_size); + if (!chunk) + goto out_free_htbl; + + htbl->chunk = chunk; + htbl->lu_type = lu_type; + htbl->byte_mask = byte_mask; + htbl->refcount = 0; + num_entries = mlx5dr_icm_pool_get_chunk_num_of_entries(chunk); + + for (i = 0; i < num_entries; i++) { + struct mlx5dr_ste *ste = &chunk->ste_arr[i]; + + ste->htbl = htbl; + ste->refcount = 0; + INIT_LIST_HEAD(&ste->miss_list_node); + INIT_LIST_HEAD(&chunk->miss_list[i]); + } + + return htbl; + +out_free_htbl: + kfree(htbl); + return NULL; +} + +int mlx5dr_ste_htbl_free(struct mlx5dr_ste_htbl *htbl) +{ + if (htbl->refcount) + return -EBUSY; + + mlx5dr_icm_free_chunk(htbl->chunk); + kfree(htbl); + return 0; +} + +void mlx5dr_ste_set_actions_tx(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_domain *dmn, + u8 *action_type_set, + u8 *hw_ste_arr, + struct mlx5dr_ste_actions_attr *attr, + u32 *added_stes) +{ + ste_ctx->set_actions_tx(dmn, action_type_set, ste_ctx->actions_caps, + hw_ste_arr, attr, added_stes); +} + +void mlx5dr_ste_set_actions_rx(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_domain *dmn, + u8 *action_type_set, + u8 *hw_ste_arr, + struct mlx5dr_ste_actions_attr *attr, + u32 *added_stes) +{ + ste_ctx->set_actions_rx(dmn, action_type_set, ste_ctx->actions_caps, + hw_ste_arr, attr, added_stes); +} + +const struct mlx5dr_ste_action_modify_field * +mlx5dr_ste_conv_modify_hdr_sw_field(struct mlx5dr_ste_ctx *ste_ctx, u16 sw_field) +{ + const struct mlx5dr_ste_action_modify_field *hw_field; + + if (sw_field >= ste_ctx->modify_field_arr_sz) + return NULL; + + hw_field = &ste_ctx->modify_field_arr[sw_field]; + if (!hw_field->end && !hw_field->start) + return NULL; + + return hw_field; +} + +void mlx5dr_ste_set_action_set(struct mlx5dr_ste_ctx *ste_ctx, + __be64 *hw_action, + u8 hw_field, + u8 shifter, + u8 length, + u32 data) +{ + ste_ctx->set_action_set((u8 *)hw_action, + hw_field, shifter, length, data); +} + +void mlx5dr_ste_set_action_add(struct mlx5dr_ste_ctx *ste_ctx, + __be64 *hw_action, + u8 hw_field, + u8 shifter, + u8 length, + u32 data) +{ + ste_ctx->set_action_add((u8 *)hw_action, + hw_field, shifter, length, data); +} + +void mlx5dr_ste_set_action_copy(struct mlx5dr_ste_ctx *ste_ctx, + __be64 *hw_action, + u8 dst_hw_field, + u8 dst_shifter, + u8 dst_len, + u8 src_hw_field, + u8 src_shifter) +{ + ste_ctx->set_action_copy((u8 *)hw_action, + dst_hw_field, dst_shifter, dst_len, + src_hw_field, src_shifter); +} + +int mlx5dr_ste_set_action_decap_l3_list(struct mlx5dr_ste_ctx *ste_ctx, + void *data, u32 data_sz, + u8 *hw_action, u32 hw_action_sz, + u16 *used_hw_action_num) +{ + /* Only Ethernet frame is supported, with VLAN (18) or without (14) */ + if (data_sz != HDR_LEN_L2 && data_sz != HDR_LEN_L2_W_VLAN) + return -EINVAL; + + return ste_ctx->set_action_decap_l3_list(data, data_sz, + hw_action, hw_action_sz, + used_hw_action_num); +} + +static int dr_ste_build_pre_check_spec(struct mlx5dr_domain *dmn, + struct mlx5dr_match_spec *spec) +{ + if (spec->ip_version) { + if (spec->ip_version != 0xf) { + mlx5dr_err(dmn, + "Partial ip_version mask with src/dst IP is not supported\n"); + return -EINVAL; + } + } else if (spec->ethertype != 0xffff && + (DR_MASK_IS_SRC_IP_SET(spec) || DR_MASK_IS_DST_IP_SET(spec))) { + mlx5dr_err(dmn, + "Partial/no ethertype mask with src/dst IP is not supported\n"); + return -EINVAL; + } + + return 0; +} + +int mlx5dr_ste_build_pre_check(struct mlx5dr_domain *dmn, + u8 match_criteria, + struct mlx5dr_match_param *mask, + struct mlx5dr_match_param *value) +{ + if (value) + return 0; + + if (match_criteria & DR_MATCHER_CRITERIA_MISC) { + if (mask->misc.source_port && mask->misc.source_port != 0xffff) { + mlx5dr_err(dmn, + "Partial mask source_port is not supported\n"); + return -EINVAL; + } + if (mask->misc.source_eswitch_owner_vhca_id && + mask->misc.source_eswitch_owner_vhca_id != 0xffff) { + mlx5dr_err(dmn, + "Partial mask source_eswitch_owner_vhca_id is not supported\n"); + return -EINVAL; + } + } + + if ((match_criteria & DR_MATCHER_CRITERIA_OUTER) && + dr_ste_build_pre_check_spec(dmn, &mask->outer)) + return -EINVAL; + + if ((match_criteria & DR_MATCHER_CRITERIA_INNER) && + dr_ste_build_pre_check_spec(dmn, &mask->inner)) + return -EINVAL; + + return 0; +} + +int mlx5dr_ste_build_ste_arr(struct mlx5dr_matcher *matcher, + struct mlx5dr_matcher_rx_tx *nic_matcher, + struct mlx5dr_match_param *value, + u8 *ste_arr) +{ + struct mlx5dr_domain_rx_tx *nic_dmn = nic_matcher->nic_tbl->nic_dmn; + bool is_rx = nic_dmn->type == DR_DOMAIN_NIC_TYPE_RX; + struct mlx5dr_domain *dmn = matcher->tbl->dmn; + struct mlx5dr_ste_ctx *ste_ctx = dmn->ste_ctx; + struct mlx5dr_ste_build *sb; + int ret, i; + + ret = mlx5dr_ste_build_pre_check(dmn, matcher->match_criteria, + &matcher->mask, value); + if (ret) + return ret; + + sb = nic_matcher->ste_builder; + for (i = 0; i < nic_matcher->num_of_builders; i++) { + ste_ctx->ste_init(ste_arr, + sb->lu_type, + is_rx, + dmn->info.caps.gvmi); + + mlx5dr_ste_set_bit_mask(ste_arr, sb->bit_mask); + + ret = sb->ste_build_tag_func(value, sb, dr_ste_get_tag(ste_arr)); + if (ret) + return ret; + + /* Connect the STEs */ + if (i < (nic_matcher->num_of_builders - 1)) { + /* Need the next builder for these fields, + * not relevant for the last ste in the chain. + */ + sb++; + ste_ctx->set_next_lu_type(ste_arr, sb->lu_type); + ste_ctx->set_byte_mask(ste_arr, sb->byte_mask); + } + ste_arr += DR_STE_SIZE; + } + return 0; +} + +#define IFC_GET_CLR(typ, p, fld, clear) ({ \ + void *__p = (p); \ + u32 __t = MLX5_GET(typ, __p, fld); \ + if (clear) \ + MLX5_SET(typ, __p, fld, 0); \ + __t; \ +}) + +#define memcpy_and_clear(to, from, len, clear) ({ \ + void *__to = (to), *__from = (from); \ + size_t __len = (len); \ + memcpy(__to, __from, __len); \ + if (clear) \ + memset(__from, 0, __len); \ +}) + +static void dr_ste_copy_mask_misc(char *mask, struct mlx5dr_match_misc *spec, bool clr) +{ + spec->gre_c_present = IFC_GET_CLR(fte_match_set_misc, mask, gre_c_present, clr); + spec->gre_k_present = IFC_GET_CLR(fte_match_set_misc, mask, gre_k_present, clr); + spec->gre_s_present = IFC_GET_CLR(fte_match_set_misc, mask, gre_s_present, clr); + spec->source_vhca_port = IFC_GET_CLR(fte_match_set_misc, mask, source_vhca_port, clr); + spec->source_sqn = IFC_GET_CLR(fte_match_set_misc, mask, source_sqn, clr); + + spec->source_port = IFC_GET_CLR(fte_match_set_misc, mask, source_port, clr); + spec->source_eswitch_owner_vhca_id = + IFC_GET_CLR(fte_match_set_misc, mask, source_eswitch_owner_vhca_id, clr); + + spec->outer_second_prio = IFC_GET_CLR(fte_match_set_misc, mask, outer_second_prio, clr); + spec->outer_second_cfi = IFC_GET_CLR(fte_match_set_misc, mask, outer_second_cfi, clr); + spec->outer_second_vid = IFC_GET_CLR(fte_match_set_misc, mask, outer_second_vid, clr); + spec->inner_second_prio = IFC_GET_CLR(fte_match_set_misc, mask, inner_second_prio, clr); + spec->inner_second_cfi = IFC_GET_CLR(fte_match_set_misc, mask, inner_second_cfi, clr); + spec->inner_second_vid = IFC_GET_CLR(fte_match_set_misc, mask, inner_second_vid, clr); + + spec->outer_second_cvlan_tag = + IFC_GET_CLR(fte_match_set_misc, mask, outer_second_cvlan_tag, clr); + spec->inner_second_cvlan_tag = + IFC_GET_CLR(fte_match_set_misc, mask, inner_second_cvlan_tag, clr); + spec->outer_second_svlan_tag = + IFC_GET_CLR(fte_match_set_misc, mask, outer_second_svlan_tag, clr); + spec->inner_second_svlan_tag = + IFC_GET_CLR(fte_match_set_misc, mask, inner_second_svlan_tag, clr); + spec->gre_protocol = IFC_GET_CLR(fte_match_set_misc, mask, gre_protocol, clr); + + spec->gre_key_h = IFC_GET_CLR(fte_match_set_misc, mask, gre_key.nvgre.hi, clr); + spec->gre_key_l = IFC_GET_CLR(fte_match_set_misc, mask, gre_key.nvgre.lo, clr); + + spec->vxlan_vni = IFC_GET_CLR(fte_match_set_misc, mask, vxlan_vni, clr); + + spec->geneve_vni = IFC_GET_CLR(fte_match_set_misc, mask, geneve_vni, clr); + spec->geneve_tlv_option_0_exist = + IFC_GET_CLR(fte_match_set_misc, mask, geneve_tlv_option_0_exist, clr); + spec->geneve_oam = IFC_GET_CLR(fte_match_set_misc, mask, geneve_oam, clr); + + spec->outer_ipv6_flow_label = + IFC_GET_CLR(fte_match_set_misc, mask, outer_ipv6_flow_label, clr); + + spec->inner_ipv6_flow_label = + IFC_GET_CLR(fte_match_set_misc, mask, inner_ipv6_flow_label, clr); + + spec->geneve_opt_len = IFC_GET_CLR(fte_match_set_misc, mask, geneve_opt_len, clr); + spec->geneve_protocol_type = + IFC_GET_CLR(fte_match_set_misc, mask, geneve_protocol_type, clr); + + spec->bth_dst_qp = IFC_GET_CLR(fte_match_set_misc, mask, bth_dst_qp, clr); +} + +static void dr_ste_copy_mask_spec(char *mask, struct mlx5dr_match_spec *spec, bool clr) +{ + __be32 raw_ip[4]; + + spec->smac_47_16 = IFC_GET_CLR(fte_match_set_lyr_2_4, mask, smac_47_16, clr); + + spec->smac_15_0 = IFC_GET_CLR(fte_match_set_lyr_2_4, mask, smac_15_0, clr); + spec->ethertype = IFC_GET_CLR(fte_match_set_lyr_2_4, mask, ethertype, clr); + + spec->dmac_47_16 = IFC_GET_CLR(fte_match_set_lyr_2_4, mask, dmac_47_16, clr); + + spec->dmac_15_0 = IFC_GET_CLR(fte_match_set_lyr_2_4, mask, dmac_15_0, clr); + spec->first_prio = IFC_GET_CLR(fte_match_set_lyr_2_4, mask, first_prio, clr); + spec->first_cfi = IFC_GET_CLR(fte_match_set_lyr_2_4, mask, first_cfi, clr); + spec->first_vid = IFC_GET_CLR(fte_match_set_lyr_2_4, mask, first_vid, clr); + + spec->ip_protocol = IFC_GET_CLR(fte_match_set_lyr_2_4, mask, ip_protocol, clr); + spec->ip_dscp = IFC_GET_CLR(fte_match_set_lyr_2_4, mask, ip_dscp, clr); + spec->ip_ecn = IFC_GET_CLR(fte_match_set_lyr_2_4, mask, ip_ecn, clr); + spec->cvlan_tag = IFC_GET_CLR(fte_match_set_lyr_2_4, mask, cvlan_tag, clr); + spec->svlan_tag = IFC_GET_CLR(fte_match_set_lyr_2_4, mask, svlan_tag, clr); + spec->frag = IFC_GET_CLR(fte_match_set_lyr_2_4, mask, frag, clr); + spec->ip_version = IFC_GET_CLR(fte_match_set_lyr_2_4, mask, ip_version, clr); + spec->tcp_flags = IFC_GET_CLR(fte_match_set_lyr_2_4, mask, tcp_flags, clr); + spec->tcp_sport = IFC_GET_CLR(fte_match_set_lyr_2_4, mask, tcp_sport, clr); + spec->tcp_dport = IFC_GET_CLR(fte_match_set_lyr_2_4, mask, tcp_dport, clr); + + spec->ipv4_ihl = IFC_GET_CLR(fte_match_set_lyr_2_4, mask, ipv4_ihl, clr); + spec->ttl_hoplimit = IFC_GET_CLR(fte_match_set_lyr_2_4, mask, ttl_hoplimit, clr); + + spec->udp_sport = IFC_GET_CLR(fte_match_set_lyr_2_4, mask, udp_sport, clr); + spec->udp_dport = IFC_GET_CLR(fte_match_set_lyr_2_4, mask, udp_dport, clr); + + memcpy_and_clear(raw_ip, MLX5_ADDR_OF(fte_match_set_lyr_2_4, mask, + src_ipv4_src_ipv6.ipv6_layout.ipv6), + sizeof(raw_ip), clr); + + spec->src_ip_127_96 = be32_to_cpu(raw_ip[0]); + spec->src_ip_95_64 = be32_to_cpu(raw_ip[1]); + spec->src_ip_63_32 = be32_to_cpu(raw_ip[2]); + spec->src_ip_31_0 = be32_to_cpu(raw_ip[3]); + + memcpy_and_clear(raw_ip, MLX5_ADDR_OF(fte_match_set_lyr_2_4, mask, + dst_ipv4_dst_ipv6.ipv6_layout.ipv6), + sizeof(raw_ip), clr); + + spec->dst_ip_127_96 = be32_to_cpu(raw_ip[0]); + spec->dst_ip_95_64 = be32_to_cpu(raw_ip[1]); + spec->dst_ip_63_32 = be32_to_cpu(raw_ip[2]); + spec->dst_ip_31_0 = be32_to_cpu(raw_ip[3]); +} + +static void dr_ste_copy_mask_misc2(char *mask, struct mlx5dr_match_misc2 *spec, bool clr) +{ + spec->outer_first_mpls_label = + IFC_GET_CLR(fte_match_set_misc2, mask, outer_first_mpls.mpls_label, clr); + spec->outer_first_mpls_exp = + IFC_GET_CLR(fte_match_set_misc2, mask, outer_first_mpls.mpls_exp, clr); + spec->outer_first_mpls_s_bos = + IFC_GET_CLR(fte_match_set_misc2, mask, outer_first_mpls.mpls_s_bos, clr); + spec->outer_first_mpls_ttl = + IFC_GET_CLR(fte_match_set_misc2, mask, outer_first_mpls.mpls_ttl, clr); + spec->inner_first_mpls_label = + IFC_GET_CLR(fte_match_set_misc2, mask, inner_first_mpls.mpls_label, clr); + spec->inner_first_mpls_exp = + IFC_GET_CLR(fte_match_set_misc2, mask, inner_first_mpls.mpls_exp, clr); + spec->inner_first_mpls_s_bos = + IFC_GET_CLR(fte_match_set_misc2, mask, inner_first_mpls.mpls_s_bos, clr); + spec->inner_first_mpls_ttl = + IFC_GET_CLR(fte_match_set_misc2, mask, inner_first_mpls.mpls_ttl, clr); + spec->outer_first_mpls_over_gre_label = + IFC_GET_CLR(fte_match_set_misc2, mask, outer_first_mpls_over_gre.mpls_label, clr); + spec->outer_first_mpls_over_gre_exp = + IFC_GET_CLR(fte_match_set_misc2, mask, outer_first_mpls_over_gre.mpls_exp, clr); + spec->outer_first_mpls_over_gre_s_bos = + IFC_GET_CLR(fte_match_set_misc2, mask, outer_first_mpls_over_gre.mpls_s_bos, clr); + spec->outer_first_mpls_over_gre_ttl = + IFC_GET_CLR(fte_match_set_misc2, mask, outer_first_mpls_over_gre.mpls_ttl, clr); + spec->outer_first_mpls_over_udp_label = + IFC_GET_CLR(fte_match_set_misc2, mask, outer_first_mpls_over_udp.mpls_label, clr); + spec->outer_first_mpls_over_udp_exp = + IFC_GET_CLR(fte_match_set_misc2, mask, outer_first_mpls_over_udp.mpls_exp, clr); + spec->outer_first_mpls_over_udp_s_bos = + IFC_GET_CLR(fte_match_set_misc2, mask, outer_first_mpls_over_udp.mpls_s_bos, clr); + spec->outer_first_mpls_over_udp_ttl = + IFC_GET_CLR(fte_match_set_misc2, mask, outer_first_mpls_over_udp.mpls_ttl, clr); + spec->metadata_reg_c_7 = IFC_GET_CLR(fte_match_set_misc2, mask, metadata_reg_c_7, clr); + spec->metadata_reg_c_6 = IFC_GET_CLR(fte_match_set_misc2, mask, metadata_reg_c_6, clr); + spec->metadata_reg_c_5 = IFC_GET_CLR(fte_match_set_misc2, mask, metadata_reg_c_5, clr); + spec->metadata_reg_c_4 = IFC_GET_CLR(fte_match_set_misc2, mask, metadata_reg_c_4, clr); + spec->metadata_reg_c_3 = IFC_GET_CLR(fte_match_set_misc2, mask, metadata_reg_c_3, clr); + spec->metadata_reg_c_2 = IFC_GET_CLR(fte_match_set_misc2, mask, metadata_reg_c_2, clr); + spec->metadata_reg_c_1 = IFC_GET_CLR(fte_match_set_misc2, mask, metadata_reg_c_1, clr); + spec->metadata_reg_c_0 = IFC_GET_CLR(fte_match_set_misc2, mask, metadata_reg_c_0, clr); + spec->metadata_reg_a = IFC_GET_CLR(fte_match_set_misc2, mask, metadata_reg_a, clr); +} + +static void dr_ste_copy_mask_misc3(char *mask, struct mlx5dr_match_misc3 *spec, bool clr) +{ + spec->inner_tcp_seq_num = IFC_GET_CLR(fte_match_set_misc3, mask, inner_tcp_seq_num, clr); + spec->outer_tcp_seq_num = IFC_GET_CLR(fte_match_set_misc3, mask, outer_tcp_seq_num, clr); + spec->inner_tcp_ack_num = IFC_GET_CLR(fte_match_set_misc3, mask, inner_tcp_ack_num, clr); + spec->outer_tcp_ack_num = IFC_GET_CLR(fte_match_set_misc3, mask, outer_tcp_ack_num, clr); + spec->outer_vxlan_gpe_vni = + IFC_GET_CLR(fte_match_set_misc3, mask, outer_vxlan_gpe_vni, clr); + spec->outer_vxlan_gpe_next_protocol = + IFC_GET_CLR(fte_match_set_misc3, mask, outer_vxlan_gpe_next_protocol, clr); + spec->outer_vxlan_gpe_flags = + IFC_GET_CLR(fte_match_set_misc3, mask, outer_vxlan_gpe_flags, clr); + spec->icmpv4_header_data = IFC_GET_CLR(fte_match_set_misc3, mask, icmp_header_data, clr); + spec->icmpv6_header_data = + IFC_GET_CLR(fte_match_set_misc3, mask, icmpv6_header_data, clr); + spec->icmpv4_type = IFC_GET_CLR(fte_match_set_misc3, mask, icmp_type, clr); + spec->icmpv4_code = IFC_GET_CLR(fte_match_set_misc3, mask, icmp_code, clr); + spec->icmpv6_type = IFC_GET_CLR(fte_match_set_misc3, mask, icmpv6_type, clr); + spec->icmpv6_code = IFC_GET_CLR(fte_match_set_misc3, mask, icmpv6_code, clr); + spec->geneve_tlv_option_0_data = + IFC_GET_CLR(fte_match_set_misc3, mask, geneve_tlv_option_0_data, clr); + spec->gtpu_teid = IFC_GET_CLR(fte_match_set_misc3, mask, gtpu_teid, clr); + spec->gtpu_msg_flags = IFC_GET_CLR(fte_match_set_misc3, mask, gtpu_msg_flags, clr); + spec->gtpu_msg_type = IFC_GET_CLR(fte_match_set_misc3, mask, gtpu_msg_type, clr); + spec->gtpu_dw_0 = IFC_GET_CLR(fte_match_set_misc3, mask, gtpu_dw_0, clr); + spec->gtpu_dw_2 = IFC_GET_CLR(fte_match_set_misc3, mask, gtpu_dw_2, clr); + spec->gtpu_first_ext_dw_0 = + IFC_GET_CLR(fte_match_set_misc3, mask, gtpu_first_ext_dw_0, clr); +} + +static void dr_ste_copy_mask_misc4(char *mask, struct mlx5dr_match_misc4 *spec, bool clr) +{ + spec->prog_sample_field_id_0 = + IFC_GET_CLR(fte_match_set_misc4, mask, prog_sample_field_id_0, clr); + spec->prog_sample_field_value_0 = + IFC_GET_CLR(fte_match_set_misc4, mask, prog_sample_field_value_0, clr); + spec->prog_sample_field_id_1 = + IFC_GET_CLR(fte_match_set_misc4, mask, prog_sample_field_id_1, clr); + spec->prog_sample_field_value_1 = + IFC_GET_CLR(fte_match_set_misc4, mask, prog_sample_field_value_1, clr); + spec->prog_sample_field_id_2 = + IFC_GET_CLR(fte_match_set_misc4, mask, prog_sample_field_id_2, clr); + spec->prog_sample_field_value_2 = + IFC_GET_CLR(fte_match_set_misc4, mask, prog_sample_field_value_2, clr); + spec->prog_sample_field_id_3 = + IFC_GET_CLR(fte_match_set_misc4, mask, prog_sample_field_id_3, clr); + spec->prog_sample_field_value_3 = + IFC_GET_CLR(fte_match_set_misc4, mask, prog_sample_field_value_3, clr); +} + +static void dr_ste_copy_mask_misc5(char *mask, struct mlx5dr_match_misc5 *spec, bool clr) +{ + spec->macsec_tag_0 = + IFC_GET_CLR(fte_match_set_misc5, mask, macsec_tag_0, clr); + spec->macsec_tag_1 = + IFC_GET_CLR(fte_match_set_misc5, mask, macsec_tag_1, clr); + spec->macsec_tag_2 = + IFC_GET_CLR(fte_match_set_misc5, mask, macsec_tag_2, clr); + spec->macsec_tag_3 = + IFC_GET_CLR(fte_match_set_misc5, mask, macsec_tag_3, clr); + spec->tunnel_header_0 = + IFC_GET_CLR(fte_match_set_misc5, mask, tunnel_header_0, clr); + spec->tunnel_header_1 = + IFC_GET_CLR(fte_match_set_misc5, mask, tunnel_header_1, clr); + spec->tunnel_header_2 = + IFC_GET_CLR(fte_match_set_misc5, mask, tunnel_header_2, clr); + spec->tunnel_header_3 = + IFC_GET_CLR(fte_match_set_misc5, mask, tunnel_header_3, clr); +} + +void mlx5dr_ste_copy_param(u8 match_criteria, + struct mlx5dr_match_param *set_param, + struct mlx5dr_match_parameters *mask, + bool clr) +{ + u8 tail_param[MLX5_ST_SZ_BYTES(fte_match_set_lyr_2_4)] = {}; + u8 *data = (u8 *)mask->match_buf; + size_t param_location; + void *buff; + + if (match_criteria & DR_MATCHER_CRITERIA_OUTER) { + if (mask->match_sz < sizeof(struct mlx5dr_match_spec)) { + memcpy(tail_param, data, mask->match_sz); + buff = tail_param; + } else { + buff = mask->match_buf; + } + dr_ste_copy_mask_spec(buff, &set_param->outer, clr); + } + param_location = sizeof(struct mlx5dr_match_spec); + + if (match_criteria & DR_MATCHER_CRITERIA_MISC) { + if (mask->match_sz < param_location + + sizeof(struct mlx5dr_match_misc)) { + memcpy(tail_param, data + param_location, + mask->match_sz - param_location); + buff = tail_param; + } else { + buff = data + param_location; + } + dr_ste_copy_mask_misc(buff, &set_param->misc, clr); + } + param_location += sizeof(struct mlx5dr_match_misc); + + if (match_criteria & DR_MATCHER_CRITERIA_INNER) { + if (mask->match_sz < param_location + + sizeof(struct mlx5dr_match_spec)) { + memcpy(tail_param, data + param_location, + mask->match_sz - param_location); + buff = tail_param; + } else { + buff = data + param_location; + } + dr_ste_copy_mask_spec(buff, &set_param->inner, clr); + } + param_location += sizeof(struct mlx5dr_match_spec); + + if (match_criteria & DR_MATCHER_CRITERIA_MISC2) { + if (mask->match_sz < param_location + + sizeof(struct mlx5dr_match_misc2)) { + memcpy(tail_param, data + param_location, + mask->match_sz - param_location); + buff = tail_param; + } else { + buff = data + param_location; + } + dr_ste_copy_mask_misc2(buff, &set_param->misc2, clr); + } + + param_location += sizeof(struct mlx5dr_match_misc2); + + if (match_criteria & DR_MATCHER_CRITERIA_MISC3) { + if (mask->match_sz < param_location + + sizeof(struct mlx5dr_match_misc3)) { + memcpy(tail_param, data + param_location, + mask->match_sz - param_location); + buff = tail_param; + } else { + buff = data + param_location; + } + dr_ste_copy_mask_misc3(buff, &set_param->misc3, clr); + } + + param_location += sizeof(struct mlx5dr_match_misc3); + + if (match_criteria & DR_MATCHER_CRITERIA_MISC4) { + if (mask->match_sz < param_location + + sizeof(struct mlx5dr_match_misc4)) { + memcpy(tail_param, data + param_location, + mask->match_sz - param_location); + buff = tail_param; + } else { + buff = data + param_location; + } + dr_ste_copy_mask_misc4(buff, &set_param->misc4, clr); + } + + param_location += sizeof(struct mlx5dr_match_misc4); + + if (match_criteria & DR_MATCHER_CRITERIA_MISC5) { + if (mask->match_sz < param_location + + sizeof(struct mlx5dr_match_misc5)) { + memcpy(tail_param, data + param_location, + mask->match_sz - param_location); + buff = tail_param; + } else { + buff = data + param_location; + } + dr_ste_copy_mask_misc5(buff, &set_param->misc5, clr); + } +} + +void mlx5dr_ste_build_eth_l2_src_dst(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx) +{ + sb->rx = rx; + sb->inner = inner; + ste_ctx->build_eth_l2_src_dst_init(sb, mask); +} + +void mlx5dr_ste_build_eth_l3_ipv6_dst(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx) +{ + sb->rx = rx; + sb->inner = inner; + ste_ctx->build_eth_l3_ipv6_dst_init(sb, mask); +} + +void mlx5dr_ste_build_eth_l3_ipv6_src(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx) +{ + sb->rx = rx; + sb->inner = inner; + ste_ctx->build_eth_l3_ipv6_src_init(sb, mask); +} + +void mlx5dr_ste_build_eth_l3_ipv4_5_tuple(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx) +{ + sb->rx = rx; + sb->inner = inner; + ste_ctx->build_eth_l3_ipv4_5_tuple_init(sb, mask); +} + +void mlx5dr_ste_build_eth_l2_src(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx) +{ + sb->rx = rx; + sb->inner = inner; + ste_ctx->build_eth_l2_src_init(sb, mask); +} + +void mlx5dr_ste_build_eth_l2_dst(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx) +{ + sb->rx = rx; + sb->inner = inner; + ste_ctx->build_eth_l2_dst_init(sb, mask); +} + +void mlx5dr_ste_build_eth_l2_tnl(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, bool inner, bool rx) +{ + sb->rx = rx; + sb->inner = inner; + ste_ctx->build_eth_l2_tnl_init(sb, mask); +} + +void mlx5dr_ste_build_eth_l3_ipv4_misc(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx) +{ + sb->rx = rx; + sb->inner = inner; + ste_ctx->build_eth_l3_ipv4_misc_init(sb, mask); +} + +void mlx5dr_ste_build_eth_ipv6_l3_l4(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx) +{ + sb->rx = rx; + sb->inner = inner; + ste_ctx->build_eth_ipv6_l3_l4_init(sb, mask); +} + +static int dr_ste_build_empty_always_hit_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *tag) +{ + return 0; +} + +void mlx5dr_ste_build_empty_always_hit(struct mlx5dr_ste_build *sb, bool rx) +{ + sb->rx = rx; + sb->lu_type = MLX5DR_STE_LU_TYPE_DONT_CARE; + sb->byte_mask = 0; + sb->ste_build_tag_func = &dr_ste_build_empty_always_hit_tag; +} + +void mlx5dr_ste_build_mpls(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx) +{ + sb->rx = rx; + sb->inner = inner; + ste_ctx->build_mpls_init(sb, mask); +} + +void mlx5dr_ste_build_tnl_gre(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx) +{ + sb->rx = rx; + sb->inner = inner; + ste_ctx->build_tnl_gre_init(sb, mask); +} + +void mlx5dr_ste_build_tnl_mpls_over_gre(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + struct mlx5dr_cmd_caps *caps, + bool inner, bool rx) +{ + sb->rx = rx; + sb->inner = inner; + sb->caps = caps; + return ste_ctx->build_tnl_mpls_over_gre_init(sb, mask); +} + +void mlx5dr_ste_build_tnl_mpls_over_udp(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + struct mlx5dr_cmd_caps *caps, + bool inner, bool rx) +{ + sb->rx = rx; + sb->inner = inner; + sb->caps = caps; + return ste_ctx->build_tnl_mpls_over_udp_init(sb, mask); +} + +void mlx5dr_ste_build_icmp(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + struct mlx5dr_cmd_caps *caps, + bool inner, bool rx) +{ + sb->rx = rx; + sb->inner = inner; + sb->caps = caps; + ste_ctx->build_icmp_init(sb, mask); +} + +void mlx5dr_ste_build_general_purpose(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx) +{ + sb->rx = rx; + sb->inner = inner; + ste_ctx->build_general_purpose_init(sb, mask); +} + +void mlx5dr_ste_build_eth_l4_misc(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx) +{ + sb->rx = rx; + sb->inner = inner; + ste_ctx->build_eth_l4_misc_init(sb, mask); +} + +void mlx5dr_ste_build_tnl_vxlan_gpe(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx) +{ + sb->rx = rx; + sb->inner = inner; + ste_ctx->build_tnl_vxlan_gpe_init(sb, mask); +} + +void mlx5dr_ste_build_tnl_geneve(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx) +{ + sb->rx = rx; + sb->inner = inner; + ste_ctx->build_tnl_geneve_init(sb, mask); +} + +void mlx5dr_ste_build_tnl_geneve_tlv_opt(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + struct mlx5dr_cmd_caps *caps, + bool inner, bool rx) +{ + sb->rx = rx; + sb->caps = caps; + sb->inner = inner; + ste_ctx->build_tnl_geneve_tlv_opt_init(sb, mask); +} + +void mlx5dr_ste_build_tnl_geneve_tlv_opt_exist(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + struct mlx5dr_cmd_caps *caps, + bool inner, bool rx) +{ + if (!ste_ctx->build_tnl_geneve_tlv_opt_exist_init) + return; + + sb->rx = rx; + sb->caps = caps; + sb->inner = inner; + ste_ctx->build_tnl_geneve_tlv_opt_exist_init(sb, mask); +} + +void mlx5dr_ste_build_tnl_gtpu(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx) +{ + sb->rx = rx; + sb->inner = inner; + ste_ctx->build_tnl_gtpu_init(sb, mask); +} + +void mlx5dr_ste_build_tnl_gtpu_flex_parser_0(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + struct mlx5dr_cmd_caps *caps, + bool inner, bool rx) +{ + sb->rx = rx; + sb->caps = caps; + sb->inner = inner; + ste_ctx->build_tnl_gtpu_flex_parser_0_init(sb, mask); +} + +void mlx5dr_ste_build_tnl_gtpu_flex_parser_1(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + struct mlx5dr_cmd_caps *caps, + bool inner, bool rx) +{ + sb->rx = rx; + sb->caps = caps; + sb->inner = inner; + ste_ctx->build_tnl_gtpu_flex_parser_1_init(sb, mask); +} + +void mlx5dr_ste_build_register_0(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx) +{ + sb->rx = rx; + sb->inner = inner; + ste_ctx->build_register_0_init(sb, mask); +} + +void mlx5dr_ste_build_register_1(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx) +{ + sb->rx = rx; + sb->inner = inner; + ste_ctx->build_register_1_init(sb, mask); +} + +void mlx5dr_ste_build_src_gvmi_qpn(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + struct mlx5dr_domain *dmn, + bool inner, bool rx) +{ + /* Set vhca_id_valid before we reset source_eswitch_owner_vhca_id */ + sb->vhca_id_valid = mask->misc.source_eswitch_owner_vhca_id; + + sb->rx = rx; + sb->dmn = dmn; + sb->inner = inner; + ste_ctx->build_src_gvmi_qpn_init(sb, mask); +} + +void mlx5dr_ste_build_flex_parser_0(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx) +{ + sb->rx = rx; + sb->inner = inner; + ste_ctx->build_flex_parser_0_init(sb, mask); +} + +void mlx5dr_ste_build_flex_parser_1(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx) +{ + sb->rx = rx; + sb->inner = inner; + ste_ctx->build_flex_parser_1_init(sb, mask); +} + +void mlx5dr_ste_build_tnl_header_0_1(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx) +{ + sb->rx = rx; + sb->inner = inner; + ste_ctx->build_tnl_header_0_1_init(sb, mask); +} + +struct mlx5dr_ste_ctx *mlx5dr_ste_get_ctx(u8 version) +{ + if (version == MLX5_STEERING_FORMAT_CONNECTX_5) + return mlx5dr_ste_get_ctx_v0(); + else if (version == MLX5_STEERING_FORMAT_CONNECTX_6DX) + return mlx5dr_ste_get_ctx_v1(); + else if (version == MLX5_STEERING_FORMAT_CONNECTX_7) + return mlx5dr_ste_get_ctx_v2(); + + return NULL; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.h new file mode 100644 index 000000000..17513baff --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.h @@ -0,0 +1,206 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2020 NVIDIA CORPORATION. All rights reserved. */ + +#ifndef _DR_STE_ +#define _DR_STE_ + +#include "dr_types.h" + +#define STE_IPV4 0x1 +#define STE_IPV6 0x2 +#define STE_TCP 0x1 +#define STE_UDP 0x2 +#define STE_SPI 0x3 +#define IP_VERSION_IPV4 0x4 +#define IP_VERSION_IPV6 0x6 +#define STE_SVLAN 0x1 +#define STE_CVLAN 0x2 +#define HDR_LEN_L2_MACS 0xC +#define HDR_LEN_L2_VLAN 0x4 +#define HDR_LEN_L2_ETHER 0x2 +#define HDR_LEN_L2 (HDR_LEN_L2_MACS + HDR_LEN_L2_ETHER) +#define HDR_LEN_L2_W_VLAN (HDR_LEN_L2 + HDR_LEN_L2_VLAN) + +/* Set to STE a specific value using DR_STE_SET */ +#define DR_STE_SET_VAL(lookup_type, tag, t_fname, spec, s_fname, value) do { \ + if ((spec)->s_fname) { \ + MLX5_SET(ste_##lookup_type, tag, t_fname, value); \ + (spec)->s_fname = 0; \ + } \ +} while (0) + +/* Set to STE spec->s_fname to tag->t_fname set spec->s_fname as used */ +#define DR_STE_SET_TAG(lookup_type, tag, t_fname, spec, s_fname) \ + DR_STE_SET_VAL(lookup_type, tag, t_fname, spec, s_fname, spec->s_fname) + +/* Set to STE -1 to tag->t_fname and set spec->s_fname as used */ +#define DR_STE_SET_ONES(lookup_type, tag, t_fname, spec, s_fname) \ + DR_STE_SET_VAL(lookup_type, tag, t_fname, spec, s_fname, -1) + +#define DR_STE_SET_TCP_FLAGS(lookup_type, tag, spec) do { \ + MLX5_SET(ste_##lookup_type, tag, tcp_ns, !!((spec)->tcp_flags & (1 << 8))); \ + MLX5_SET(ste_##lookup_type, tag, tcp_cwr, !!((spec)->tcp_flags & (1 << 7))); \ + MLX5_SET(ste_##lookup_type, tag, tcp_ece, !!((spec)->tcp_flags & (1 << 6))); \ + MLX5_SET(ste_##lookup_type, tag, tcp_urg, !!((spec)->tcp_flags & (1 << 5))); \ + MLX5_SET(ste_##lookup_type, tag, tcp_ack, !!((spec)->tcp_flags & (1 << 4))); \ + MLX5_SET(ste_##lookup_type, tag, tcp_psh, !!((spec)->tcp_flags & (1 << 3))); \ + MLX5_SET(ste_##lookup_type, tag, tcp_rst, !!((spec)->tcp_flags & (1 << 2))); \ + MLX5_SET(ste_##lookup_type, tag, tcp_syn, !!((spec)->tcp_flags & (1 << 1))); \ + MLX5_SET(ste_##lookup_type, tag, tcp_fin, !!((spec)->tcp_flags & (1 << 0))); \ +} while (0) + +#define DR_STE_SET_MPLS(lookup_type, mask, in_out, tag) do { \ + struct mlx5dr_match_misc2 *_mask = mask; \ + u8 *_tag = tag; \ + DR_STE_SET_TAG(lookup_type, _tag, mpls0_label, _mask, \ + in_out##_first_mpls_label);\ + DR_STE_SET_TAG(lookup_type, _tag, mpls0_s_bos, _mask, \ + in_out##_first_mpls_s_bos); \ + DR_STE_SET_TAG(lookup_type, _tag, mpls0_exp, _mask, \ + in_out##_first_mpls_exp); \ + DR_STE_SET_TAG(lookup_type, _tag, mpls0_ttl, _mask, \ + in_out##_first_mpls_ttl); \ +} while (0) + +#define DR_STE_SET_FLEX_PARSER_FIELD(tag, fname, caps, spec) do { \ + u8 parser_id = (caps)->flex_parser_id_##fname; \ + u8 *parser_ptr = dr_ste_calc_flex_parser_offset(tag, parser_id); \ + *(__be32 *)parser_ptr = cpu_to_be32((spec)->fname);\ + (spec)->fname = 0;\ +} while (0) + +#define DR_STE_IS_OUTER_MPLS_OVER_GRE_SET(_misc) (\ + (_misc)->outer_first_mpls_over_gre_label || \ + (_misc)->outer_first_mpls_over_gre_exp || \ + (_misc)->outer_first_mpls_over_gre_s_bos || \ + (_misc)->outer_first_mpls_over_gre_ttl) + +#define DR_STE_IS_OUTER_MPLS_OVER_UDP_SET(_misc) (\ + (_misc)->outer_first_mpls_over_udp_label || \ + (_misc)->outer_first_mpls_over_udp_exp || \ + (_misc)->outer_first_mpls_over_udp_s_bos || \ + (_misc)->outer_first_mpls_over_udp_ttl) + +enum dr_ste_action_modify_type_l3 { + DR_STE_ACTION_MDFY_TYPE_L3_NONE = 0x0, + DR_STE_ACTION_MDFY_TYPE_L3_IPV4 = 0x1, + DR_STE_ACTION_MDFY_TYPE_L3_IPV6 = 0x2, +}; + +enum dr_ste_action_modify_type_l4 { + DR_STE_ACTION_MDFY_TYPE_L4_NONE = 0x0, + DR_STE_ACTION_MDFY_TYPE_L4_TCP = 0x1, + DR_STE_ACTION_MDFY_TYPE_L4_UDP = 0x2, +}; + +enum { + HDR_MPLS_OFFSET_LABEL = 12, + HDR_MPLS_OFFSET_EXP = 9, + HDR_MPLS_OFFSET_S_BOS = 8, + HDR_MPLS_OFFSET_TTL = 0, +}; + +u16 mlx5dr_ste_conv_bit_to_byte_mask(u8 *bit_mask); + +static inline u8 * +dr_ste_calc_flex_parser_offset(u8 *tag, u8 parser_id) +{ + /* Calculate tag byte offset based on flex parser id */ + return tag + 4 * (3 - (parser_id % 4)); +} + +#define DR_STE_CTX_BUILDER(fname) \ + ((*build_##fname##_init)(struct mlx5dr_ste_build *sb, \ + struct mlx5dr_match_param *mask)) + +struct mlx5dr_ste_ctx { + /* Builders */ + void DR_STE_CTX_BUILDER(eth_l2_src_dst); + void DR_STE_CTX_BUILDER(eth_l3_ipv6_src); + void DR_STE_CTX_BUILDER(eth_l3_ipv6_dst); + void DR_STE_CTX_BUILDER(eth_l3_ipv4_5_tuple); + void DR_STE_CTX_BUILDER(eth_l2_src); + void DR_STE_CTX_BUILDER(eth_l2_dst); + void DR_STE_CTX_BUILDER(eth_l2_tnl); + void DR_STE_CTX_BUILDER(eth_l3_ipv4_misc); + void DR_STE_CTX_BUILDER(eth_ipv6_l3_l4); + void DR_STE_CTX_BUILDER(mpls); + void DR_STE_CTX_BUILDER(tnl_gre); + void DR_STE_CTX_BUILDER(tnl_mpls); + void DR_STE_CTX_BUILDER(tnl_mpls_over_gre); + void DR_STE_CTX_BUILDER(tnl_mpls_over_udp); + void DR_STE_CTX_BUILDER(icmp); + void DR_STE_CTX_BUILDER(general_purpose); + void DR_STE_CTX_BUILDER(eth_l4_misc); + void DR_STE_CTX_BUILDER(tnl_vxlan_gpe); + void DR_STE_CTX_BUILDER(tnl_geneve); + void DR_STE_CTX_BUILDER(tnl_geneve_tlv_opt); + void DR_STE_CTX_BUILDER(tnl_geneve_tlv_opt_exist); + void DR_STE_CTX_BUILDER(register_0); + void DR_STE_CTX_BUILDER(register_1); + void DR_STE_CTX_BUILDER(src_gvmi_qpn); + void DR_STE_CTX_BUILDER(flex_parser_0); + void DR_STE_CTX_BUILDER(flex_parser_1); + void DR_STE_CTX_BUILDER(tnl_gtpu); + void DR_STE_CTX_BUILDER(tnl_header_0_1); + void DR_STE_CTX_BUILDER(tnl_gtpu_flex_parser_0); + void DR_STE_CTX_BUILDER(tnl_gtpu_flex_parser_1); + + /* Getters and Setters */ + void (*ste_init)(u8 *hw_ste_p, u16 lu_type, + bool is_rx, u16 gvmi); + void (*set_next_lu_type)(u8 *hw_ste_p, u16 lu_type); + u16 (*get_next_lu_type)(u8 *hw_ste_p); + void (*set_miss_addr)(u8 *hw_ste_p, u64 miss_addr); + u64 (*get_miss_addr)(u8 *hw_ste_p); + void (*set_hit_addr)(u8 *hw_ste_p, u64 icm_addr, u32 ht_size); + void (*set_byte_mask)(u8 *hw_ste_p, u16 byte_mask); + u16 (*get_byte_mask)(u8 *hw_ste_p); + + /* Actions */ + u32 actions_caps; + void (*set_actions_rx)(struct mlx5dr_domain *dmn, + u8 *action_type_set, + u32 actions_caps, + u8 *hw_ste_arr, + struct mlx5dr_ste_actions_attr *attr, + u32 *added_stes); + void (*set_actions_tx)(struct mlx5dr_domain *dmn, + u8 *action_type_set, + u32 actions_caps, + u8 *hw_ste_arr, + struct mlx5dr_ste_actions_attr *attr, + u32 *added_stes); + u32 modify_field_arr_sz; + const struct mlx5dr_ste_action_modify_field *modify_field_arr; + void (*set_action_set)(u8 *hw_action, + u8 hw_field, + u8 shifter, + u8 length, + u32 data); + void (*set_action_add)(u8 *hw_action, + u8 hw_field, + u8 shifter, + u8 length, + u32 data); + void (*set_action_copy)(u8 *hw_action, + u8 dst_hw_field, + u8 dst_shifter, + u8 dst_len, + u8 src_hw_field, + u8 src_shifter); + int (*set_action_decap_l3_list)(void *data, + u32 data_sz, + u8 *hw_action, + u32 hw_action_sz, + u16 *used_hw_action_num); + + /* Send */ + void (*prepare_for_postsend)(u8 *hw_ste_p, u32 ste_size); +}; + +struct mlx5dr_ste_ctx *mlx5dr_ste_get_ctx_v0(void); +struct mlx5dr_ste_ctx *mlx5dr_ste_get_ctx_v1(void); +struct mlx5dr_ste_ctx *mlx5dr_ste_get_ctx_v2(void); + +#endif /* _DR_STE_ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v0.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v0.c new file mode 100644 index 000000000..2010d4ac6 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v0.c @@ -0,0 +1,1960 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2020 NVIDIA CORPORATION. All rights reserved. */ + +#include <linux/types.h> +#include <linux/crc32.h> +#include "dr_ste.h" + +#define SVLAN_ETHERTYPE 0x88a8 +#define DR_STE_ENABLE_FLOW_TAG BIT(31) + +enum dr_ste_v0_entry_type { + DR_STE_TYPE_TX = 1, + DR_STE_TYPE_RX = 2, + DR_STE_TYPE_MODIFY_PKT = 6, +}; + +enum dr_ste_v0_action_tunl { + DR_STE_TUNL_ACTION_NONE = 0, + DR_STE_TUNL_ACTION_ENABLE = 1, + DR_STE_TUNL_ACTION_DECAP = 2, + DR_STE_TUNL_ACTION_L3_DECAP = 3, + DR_STE_TUNL_ACTION_POP_VLAN = 4, +}; + +enum dr_ste_v0_action_type { + DR_STE_ACTION_TYPE_PUSH_VLAN = 1, + DR_STE_ACTION_TYPE_ENCAP_L3 = 3, + DR_STE_ACTION_TYPE_ENCAP = 4, +}; + +enum dr_ste_v0_action_mdfy_op { + DR_STE_ACTION_MDFY_OP_COPY = 0x1, + DR_STE_ACTION_MDFY_OP_SET = 0x2, + DR_STE_ACTION_MDFY_OP_ADD = 0x3, +}; + +#define DR_STE_CALC_LU_TYPE(lookup_type, rx, inner) \ + ((inner) ? DR_STE_V0_LU_TYPE_##lookup_type##_I : \ + (rx) ? DR_STE_V0_LU_TYPE_##lookup_type##_D : \ + DR_STE_V0_LU_TYPE_##lookup_type##_O) + +enum { + DR_STE_V0_LU_TYPE_NOP = 0x00, + DR_STE_V0_LU_TYPE_SRC_GVMI_AND_QP = 0x05, + DR_STE_V0_LU_TYPE_ETHL2_TUNNELING_I = 0x0a, + DR_STE_V0_LU_TYPE_ETHL2_DST_O = 0x06, + DR_STE_V0_LU_TYPE_ETHL2_DST_I = 0x07, + DR_STE_V0_LU_TYPE_ETHL2_DST_D = 0x1b, + DR_STE_V0_LU_TYPE_ETHL2_SRC_O = 0x08, + DR_STE_V0_LU_TYPE_ETHL2_SRC_I = 0x09, + DR_STE_V0_LU_TYPE_ETHL2_SRC_D = 0x1c, + DR_STE_V0_LU_TYPE_ETHL2_SRC_DST_O = 0x36, + DR_STE_V0_LU_TYPE_ETHL2_SRC_DST_I = 0x37, + DR_STE_V0_LU_TYPE_ETHL2_SRC_DST_D = 0x38, + DR_STE_V0_LU_TYPE_ETHL3_IPV6_DST_O = 0x0d, + DR_STE_V0_LU_TYPE_ETHL3_IPV6_DST_I = 0x0e, + DR_STE_V0_LU_TYPE_ETHL3_IPV6_DST_D = 0x1e, + DR_STE_V0_LU_TYPE_ETHL3_IPV6_SRC_O = 0x0f, + DR_STE_V0_LU_TYPE_ETHL3_IPV6_SRC_I = 0x10, + DR_STE_V0_LU_TYPE_ETHL3_IPV6_SRC_D = 0x1f, + DR_STE_V0_LU_TYPE_ETHL3_IPV4_5_TUPLE_O = 0x11, + DR_STE_V0_LU_TYPE_ETHL3_IPV4_5_TUPLE_I = 0x12, + DR_STE_V0_LU_TYPE_ETHL3_IPV4_5_TUPLE_D = 0x20, + DR_STE_V0_LU_TYPE_ETHL3_IPV4_MISC_O = 0x29, + DR_STE_V0_LU_TYPE_ETHL3_IPV4_MISC_I = 0x2a, + DR_STE_V0_LU_TYPE_ETHL3_IPV4_MISC_D = 0x2b, + DR_STE_V0_LU_TYPE_ETHL4_O = 0x13, + DR_STE_V0_LU_TYPE_ETHL4_I = 0x14, + DR_STE_V0_LU_TYPE_ETHL4_D = 0x21, + DR_STE_V0_LU_TYPE_ETHL4_MISC_O = 0x2c, + DR_STE_V0_LU_TYPE_ETHL4_MISC_I = 0x2d, + DR_STE_V0_LU_TYPE_ETHL4_MISC_D = 0x2e, + DR_STE_V0_LU_TYPE_MPLS_FIRST_O = 0x15, + DR_STE_V0_LU_TYPE_MPLS_FIRST_I = 0x24, + DR_STE_V0_LU_TYPE_MPLS_FIRST_D = 0x25, + DR_STE_V0_LU_TYPE_GRE = 0x16, + DR_STE_V0_LU_TYPE_FLEX_PARSER_0 = 0x22, + DR_STE_V0_LU_TYPE_FLEX_PARSER_1 = 0x23, + DR_STE_V0_LU_TYPE_FLEX_PARSER_TNL_HEADER = 0x19, + DR_STE_V0_LU_TYPE_GENERAL_PURPOSE = 0x18, + DR_STE_V0_LU_TYPE_STEERING_REGISTERS_0 = 0x2f, + DR_STE_V0_LU_TYPE_STEERING_REGISTERS_1 = 0x30, + DR_STE_V0_LU_TYPE_TUNNEL_HEADER = 0x34, + DR_STE_V0_LU_TYPE_DONT_CARE = MLX5DR_STE_LU_TYPE_DONT_CARE, +}; + +enum { + DR_STE_V0_ACTION_MDFY_FLD_L2_0 = 0, + DR_STE_V0_ACTION_MDFY_FLD_L2_1 = 1, + DR_STE_V0_ACTION_MDFY_FLD_L2_2 = 2, + DR_STE_V0_ACTION_MDFY_FLD_L3_0 = 3, + DR_STE_V0_ACTION_MDFY_FLD_L3_1 = 4, + DR_STE_V0_ACTION_MDFY_FLD_L3_2 = 5, + DR_STE_V0_ACTION_MDFY_FLD_L3_3 = 6, + DR_STE_V0_ACTION_MDFY_FLD_L3_4 = 7, + DR_STE_V0_ACTION_MDFY_FLD_L4_0 = 8, + DR_STE_V0_ACTION_MDFY_FLD_L4_1 = 9, + DR_STE_V0_ACTION_MDFY_FLD_MPLS = 10, + DR_STE_V0_ACTION_MDFY_FLD_L2_TNL_0 = 11, + DR_STE_V0_ACTION_MDFY_FLD_REG_0 = 12, + DR_STE_V0_ACTION_MDFY_FLD_REG_1 = 13, + DR_STE_V0_ACTION_MDFY_FLD_REG_2 = 14, + DR_STE_V0_ACTION_MDFY_FLD_REG_3 = 15, + DR_STE_V0_ACTION_MDFY_FLD_L4_2 = 16, + DR_STE_V0_ACTION_MDFY_FLD_FLEX_0 = 17, + DR_STE_V0_ACTION_MDFY_FLD_FLEX_1 = 18, + DR_STE_V0_ACTION_MDFY_FLD_FLEX_2 = 19, + DR_STE_V0_ACTION_MDFY_FLD_FLEX_3 = 20, + DR_STE_V0_ACTION_MDFY_FLD_L2_TNL_1 = 21, + DR_STE_V0_ACTION_MDFY_FLD_METADATA = 22, + DR_STE_V0_ACTION_MDFY_FLD_RESERVED = 23, +}; + +static const struct mlx5dr_ste_action_modify_field dr_ste_v0_action_modify_field_arr[] = { + [MLX5_ACTION_IN_FIELD_OUT_SMAC_47_16] = { + .hw_field = DR_STE_V0_ACTION_MDFY_FLD_L2_1, .start = 16, .end = 47, + }, + [MLX5_ACTION_IN_FIELD_OUT_SMAC_15_0] = { + .hw_field = DR_STE_V0_ACTION_MDFY_FLD_L2_1, .start = 0, .end = 15, + }, + [MLX5_ACTION_IN_FIELD_OUT_ETHERTYPE] = { + .hw_field = DR_STE_V0_ACTION_MDFY_FLD_L2_2, .start = 32, .end = 47, + }, + [MLX5_ACTION_IN_FIELD_OUT_DMAC_47_16] = { + .hw_field = DR_STE_V0_ACTION_MDFY_FLD_L2_0, .start = 16, .end = 47, + }, + [MLX5_ACTION_IN_FIELD_OUT_DMAC_15_0] = { + .hw_field = DR_STE_V0_ACTION_MDFY_FLD_L2_0, .start = 0, .end = 15, + }, + [MLX5_ACTION_IN_FIELD_OUT_IP_DSCP] = { + .hw_field = DR_STE_V0_ACTION_MDFY_FLD_L3_1, .start = 0, .end = 5, + }, + [MLX5_ACTION_IN_FIELD_OUT_TCP_FLAGS] = { + .hw_field = DR_STE_V0_ACTION_MDFY_FLD_L4_0, .start = 48, .end = 56, + .l4_type = DR_STE_ACTION_MDFY_TYPE_L4_TCP, + }, + [MLX5_ACTION_IN_FIELD_OUT_TCP_SPORT] = { + .hw_field = DR_STE_V0_ACTION_MDFY_FLD_L4_0, .start = 0, .end = 15, + .l4_type = DR_STE_ACTION_MDFY_TYPE_L4_TCP, + }, + [MLX5_ACTION_IN_FIELD_OUT_TCP_DPORT] = { + .hw_field = DR_STE_V0_ACTION_MDFY_FLD_L4_0, .start = 16, .end = 31, + .l4_type = DR_STE_ACTION_MDFY_TYPE_L4_TCP, + }, + [MLX5_ACTION_IN_FIELD_OUT_IP_TTL] = { + .hw_field = DR_STE_V0_ACTION_MDFY_FLD_L3_1, .start = 8, .end = 15, + .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV4, + }, + [MLX5_ACTION_IN_FIELD_OUT_IPV6_HOPLIMIT] = { + .hw_field = DR_STE_V0_ACTION_MDFY_FLD_L3_1, .start = 8, .end = 15, + .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6, + }, + [MLX5_ACTION_IN_FIELD_OUT_UDP_SPORT] = { + .hw_field = DR_STE_V0_ACTION_MDFY_FLD_L4_0, .start = 0, .end = 15, + .l4_type = DR_STE_ACTION_MDFY_TYPE_L4_UDP, + }, + [MLX5_ACTION_IN_FIELD_OUT_UDP_DPORT] = { + .hw_field = DR_STE_V0_ACTION_MDFY_FLD_L4_0, .start = 16, .end = 31, + .l4_type = DR_STE_ACTION_MDFY_TYPE_L4_UDP, + }, + [MLX5_ACTION_IN_FIELD_OUT_SIPV6_127_96] = { + .hw_field = DR_STE_V0_ACTION_MDFY_FLD_L3_3, .start = 32, .end = 63, + .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6, + }, + [MLX5_ACTION_IN_FIELD_OUT_SIPV6_95_64] = { + .hw_field = DR_STE_V0_ACTION_MDFY_FLD_L3_3, .start = 0, .end = 31, + .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6, + }, + [MLX5_ACTION_IN_FIELD_OUT_SIPV6_63_32] = { + .hw_field = DR_STE_V0_ACTION_MDFY_FLD_L3_4, .start = 32, .end = 63, + .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6, + }, + [MLX5_ACTION_IN_FIELD_OUT_SIPV6_31_0] = { + .hw_field = DR_STE_V0_ACTION_MDFY_FLD_L3_4, .start = 0, .end = 31, + .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6, + }, + [MLX5_ACTION_IN_FIELD_OUT_DIPV6_127_96] = { + .hw_field = DR_STE_V0_ACTION_MDFY_FLD_L3_0, .start = 32, .end = 63, + .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6, + }, + [MLX5_ACTION_IN_FIELD_OUT_DIPV6_95_64] = { + .hw_field = DR_STE_V0_ACTION_MDFY_FLD_L3_0, .start = 0, .end = 31, + .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6, + }, + [MLX5_ACTION_IN_FIELD_OUT_DIPV6_63_32] = { + .hw_field = DR_STE_V0_ACTION_MDFY_FLD_L3_2, .start = 32, .end = 63, + .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6, + }, + [MLX5_ACTION_IN_FIELD_OUT_DIPV6_31_0] = { + .hw_field = DR_STE_V0_ACTION_MDFY_FLD_L3_2, .start = 0, .end = 31, + .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6, + }, + [MLX5_ACTION_IN_FIELD_OUT_SIPV4] = { + .hw_field = DR_STE_V0_ACTION_MDFY_FLD_L3_0, .start = 0, .end = 31, + .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV4, + }, + [MLX5_ACTION_IN_FIELD_OUT_DIPV4] = { + .hw_field = DR_STE_V0_ACTION_MDFY_FLD_L3_0, .start = 32, .end = 63, + .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV4, + }, + [MLX5_ACTION_IN_FIELD_METADATA_REG_A] = { + .hw_field = DR_STE_V0_ACTION_MDFY_FLD_METADATA, .start = 0, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_METADATA_REG_B] = { + .hw_field = DR_STE_V0_ACTION_MDFY_FLD_METADATA, .start = 32, .end = 63, + }, + [MLX5_ACTION_IN_FIELD_METADATA_REG_C_0] = { + .hw_field = DR_STE_V0_ACTION_MDFY_FLD_REG_0, .start = 32, .end = 63, + }, + [MLX5_ACTION_IN_FIELD_METADATA_REG_C_1] = { + .hw_field = DR_STE_V0_ACTION_MDFY_FLD_REG_0, .start = 0, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_METADATA_REG_C_2] = { + .hw_field = DR_STE_V0_ACTION_MDFY_FLD_REG_1, .start = 32, .end = 63, + }, + [MLX5_ACTION_IN_FIELD_METADATA_REG_C_3] = { + .hw_field = DR_STE_V0_ACTION_MDFY_FLD_REG_1, .start = 0, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_METADATA_REG_C_4] = { + .hw_field = DR_STE_V0_ACTION_MDFY_FLD_REG_2, .start = 32, .end = 63, + }, + [MLX5_ACTION_IN_FIELD_METADATA_REG_C_5] = { + .hw_field = DR_STE_V0_ACTION_MDFY_FLD_REG_2, .start = 0, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_OUT_TCP_SEQ_NUM] = { + .hw_field = DR_STE_V0_ACTION_MDFY_FLD_L4_1, .start = 32, .end = 63, + }, + [MLX5_ACTION_IN_FIELD_OUT_TCP_ACK_NUM] = { + .hw_field = DR_STE_V0_ACTION_MDFY_FLD_L4_1, .start = 0, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_OUT_FIRST_VID] = { + .hw_field = DR_STE_V0_ACTION_MDFY_FLD_L2_2, .start = 0, .end = 15, + }, +}; + +static void dr_ste_v0_set_entry_type(u8 *hw_ste_p, u8 entry_type) +{ + MLX5_SET(ste_general, hw_ste_p, entry_type, entry_type); +} + +static u8 dr_ste_v0_get_entry_type(u8 *hw_ste_p) +{ + return MLX5_GET(ste_general, hw_ste_p, entry_type); +} + +static void dr_ste_v0_set_miss_addr(u8 *hw_ste_p, u64 miss_addr) +{ + u64 index = miss_addr >> 6; + + /* Miss address for TX and RX STEs located in the same offsets */ + MLX5_SET(ste_rx_steering_mult, hw_ste_p, miss_address_39_32, index >> 26); + MLX5_SET(ste_rx_steering_mult, hw_ste_p, miss_address_31_6, index); +} + +static u64 dr_ste_v0_get_miss_addr(u8 *hw_ste_p) +{ + u64 index = + ((u64)MLX5_GET(ste_rx_steering_mult, hw_ste_p, miss_address_31_6) | + ((u64)MLX5_GET(ste_rx_steering_mult, hw_ste_p, miss_address_39_32)) << 26); + + return index << 6; +} + +static void dr_ste_v0_set_byte_mask(u8 *hw_ste_p, u16 byte_mask) +{ + MLX5_SET(ste_general, hw_ste_p, byte_mask, byte_mask); +} + +static u16 dr_ste_v0_get_byte_mask(u8 *hw_ste_p) +{ + return MLX5_GET(ste_general, hw_ste_p, byte_mask); +} + +static void dr_ste_v0_set_lu_type(u8 *hw_ste_p, u16 lu_type) +{ + MLX5_SET(ste_general, hw_ste_p, entry_sub_type, lu_type); +} + +static void dr_ste_v0_set_next_lu_type(u8 *hw_ste_p, u16 lu_type) +{ + MLX5_SET(ste_general, hw_ste_p, next_lu_type, lu_type); +} + +static u16 dr_ste_v0_get_next_lu_type(u8 *hw_ste_p) +{ + return MLX5_GET(ste_general, hw_ste_p, next_lu_type); +} + +static void dr_ste_v0_set_hit_gvmi(u8 *hw_ste_p, u16 gvmi) +{ + MLX5_SET(ste_general, hw_ste_p, next_table_base_63_48, gvmi); +} + +static void dr_ste_v0_set_hit_addr(u8 *hw_ste_p, u64 icm_addr, u32 ht_size) +{ + u64 index = (icm_addr >> 5) | ht_size; + + MLX5_SET(ste_general, hw_ste_p, next_table_base_39_32_size, index >> 27); + MLX5_SET(ste_general, hw_ste_p, next_table_base_31_5_size, index); +} + +static void dr_ste_v0_init_full(u8 *hw_ste_p, u16 lu_type, + enum dr_ste_v0_entry_type entry_type, u16 gvmi) +{ + dr_ste_v0_set_entry_type(hw_ste_p, entry_type); + dr_ste_v0_set_lu_type(hw_ste_p, lu_type); + dr_ste_v0_set_next_lu_type(hw_ste_p, MLX5DR_STE_LU_TYPE_DONT_CARE); + + /* Set GVMI once, this is the same for RX/TX + * bits 63_48 of next table base / miss address encode the next GVMI + */ + MLX5_SET(ste_rx_steering_mult, hw_ste_p, gvmi, gvmi); + MLX5_SET(ste_rx_steering_mult, hw_ste_p, next_table_base_63_48, gvmi); + MLX5_SET(ste_rx_steering_mult, hw_ste_p, miss_address_63_48, gvmi); +} + +static void dr_ste_v0_init(u8 *hw_ste_p, u16 lu_type, + bool is_rx, u16 gvmi) +{ + enum dr_ste_v0_entry_type entry_type; + + entry_type = is_rx ? DR_STE_TYPE_RX : DR_STE_TYPE_TX; + dr_ste_v0_init_full(hw_ste_p, lu_type, entry_type, gvmi); +} + +static void dr_ste_v0_rx_set_flow_tag(u8 *hw_ste_p, u32 flow_tag) +{ + MLX5_SET(ste_rx_steering_mult, hw_ste_p, qp_list_pointer, + DR_STE_ENABLE_FLOW_TAG | flow_tag); +} + +static void dr_ste_v0_set_counter_id(u8 *hw_ste_p, u32 ctr_id) +{ + /* This can be used for both rx_steering_mult and for sx_transmit */ + MLX5_SET(ste_rx_steering_mult, hw_ste_p, counter_trigger_15_0, ctr_id); + MLX5_SET(ste_rx_steering_mult, hw_ste_p, counter_trigger_23_16, ctr_id >> 16); +} + +static void dr_ste_v0_set_go_back_bit(u8 *hw_ste_p) +{ + MLX5_SET(ste_sx_transmit, hw_ste_p, go_back, 1); +} + +static void dr_ste_v0_set_tx_push_vlan(u8 *hw_ste_p, u32 vlan_hdr, + bool go_back) +{ + MLX5_SET(ste_sx_transmit, hw_ste_p, action_type, + DR_STE_ACTION_TYPE_PUSH_VLAN); + MLX5_SET(ste_sx_transmit, hw_ste_p, encap_pointer_vlan_data, vlan_hdr); + /* Due to HW limitation we need to set this bit, otherwise reformat + + * push vlan will not work. + */ + if (go_back) + dr_ste_v0_set_go_back_bit(hw_ste_p); +} + +static void dr_ste_v0_set_tx_encap(void *hw_ste_p, u32 reformat_id, + int size, bool encap_l3) +{ + MLX5_SET(ste_sx_transmit, hw_ste_p, action_type, + encap_l3 ? DR_STE_ACTION_TYPE_ENCAP_L3 : DR_STE_ACTION_TYPE_ENCAP); + /* The hardware expects here size in words (2 byte) */ + MLX5_SET(ste_sx_transmit, hw_ste_p, action_description, size / 2); + MLX5_SET(ste_sx_transmit, hw_ste_p, encap_pointer_vlan_data, reformat_id); +} + +static void dr_ste_v0_set_rx_decap(u8 *hw_ste_p) +{ + MLX5_SET(ste_rx_steering_mult, hw_ste_p, tunneling_action, + DR_STE_TUNL_ACTION_DECAP); + MLX5_SET(ste_rx_steering_mult, hw_ste_p, fail_on_error, 1); +} + +static void dr_ste_v0_set_rx_pop_vlan(u8 *hw_ste_p) +{ + MLX5_SET(ste_rx_steering_mult, hw_ste_p, tunneling_action, + DR_STE_TUNL_ACTION_POP_VLAN); +} + +static void dr_ste_v0_set_rx_decap_l3(u8 *hw_ste_p, bool vlan) +{ + MLX5_SET(ste_rx_steering_mult, hw_ste_p, tunneling_action, + DR_STE_TUNL_ACTION_L3_DECAP); + MLX5_SET(ste_modify_packet, hw_ste_p, action_description, vlan ? 1 : 0); + MLX5_SET(ste_rx_steering_mult, hw_ste_p, fail_on_error, 1); +} + +static void dr_ste_v0_set_rewrite_actions(u8 *hw_ste_p, u16 num_of_actions, + u32 re_write_index) +{ + MLX5_SET(ste_modify_packet, hw_ste_p, number_of_re_write_actions, + num_of_actions); + MLX5_SET(ste_modify_packet, hw_ste_p, header_re_write_actions_pointer, + re_write_index); +} + +static void dr_ste_v0_arr_init_next(u8 **last_ste, + u32 *added_stes, + enum dr_ste_v0_entry_type entry_type, + u16 gvmi) +{ + (*added_stes)++; + *last_ste += DR_STE_SIZE; + dr_ste_v0_init_full(*last_ste, MLX5DR_STE_LU_TYPE_DONT_CARE, + entry_type, gvmi); +} + +static void +dr_ste_v0_set_actions_tx(struct mlx5dr_domain *dmn, + u8 *action_type_set, + u32 actions_caps, + u8 *last_ste, + struct mlx5dr_ste_actions_attr *attr, + u32 *added_stes) +{ + bool encap = action_type_set[DR_ACTION_TYP_L2_TO_TNL_L2] || + action_type_set[DR_ACTION_TYP_L2_TO_TNL_L3]; + + /* We want to make sure the modify header comes before L2 + * encapsulation. The reason for that is that we support + * modify headers for outer headers only + */ + if (action_type_set[DR_ACTION_TYP_MODIFY_HDR] && attr->modify_actions) { + dr_ste_v0_set_entry_type(last_ste, DR_STE_TYPE_MODIFY_PKT); + dr_ste_v0_set_rewrite_actions(last_ste, + attr->modify_actions, + attr->modify_index); + } + + if (action_type_set[DR_ACTION_TYP_PUSH_VLAN]) { + int i; + + for (i = 0; i < attr->vlans.count; i++) { + if (i || action_type_set[DR_ACTION_TYP_MODIFY_HDR]) + dr_ste_v0_arr_init_next(&last_ste, + added_stes, + DR_STE_TYPE_TX, + attr->gvmi); + + dr_ste_v0_set_tx_push_vlan(last_ste, + attr->vlans.headers[i], + encap); + } + } + + if (encap) { + /* Modify header and encapsulation require a different STEs. + * Since modify header STE format doesn't support encapsulation + * tunneling_action. + */ + if (action_type_set[DR_ACTION_TYP_MODIFY_HDR] || + action_type_set[DR_ACTION_TYP_PUSH_VLAN]) + dr_ste_v0_arr_init_next(&last_ste, + added_stes, + DR_STE_TYPE_TX, + attr->gvmi); + + dr_ste_v0_set_tx_encap(last_ste, + attr->reformat.id, + attr->reformat.size, + action_type_set[DR_ACTION_TYP_L2_TO_TNL_L3]); + /* Whenever prio_tag_required enabled, we can be sure that the + * previous table (ACL) already push vlan to our packet, + * And due to HW limitation we need to set this bit, otherwise + * push vlan + reformat will not work. + */ + if (MLX5_CAP_GEN(dmn->mdev, prio_tag_required)) + dr_ste_v0_set_go_back_bit(last_ste); + } + + if (action_type_set[DR_ACTION_TYP_CTR]) + dr_ste_v0_set_counter_id(last_ste, attr->ctr_id); + + dr_ste_v0_set_hit_gvmi(last_ste, attr->hit_gvmi); + dr_ste_v0_set_hit_addr(last_ste, attr->final_icm_addr, 1); +} + +static void +dr_ste_v0_set_actions_rx(struct mlx5dr_domain *dmn, + u8 *action_type_set, + u32 actions_caps, + u8 *last_ste, + struct mlx5dr_ste_actions_attr *attr, + u32 *added_stes) +{ + if (action_type_set[DR_ACTION_TYP_CTR]) + dr_ste_v0_set_counter_id(last_ste, attr->ctr_id); + + if (action_type_set[DR_ACTION_TYP_TNL_L3_TO_L2]) { + dr_ste_v0_set_entry_type(last_ste, DR_STE_TYPE_MODIFY_PKT); + dr_ste_v0_set_rx_decap_l3(last_ste, attr->decap_with_vlan); + dr_ste_v0_set_rewrite_actions(last_ste, + attr->decap_actions, + attr->decap_index); + } + + if (action_type_set[DR_ACTION_TYP_TNL_L2_TO_L2]) + dr_ste_v0_set_rx_decap(last_ste); + + if (action_type_set[DR_ACTION_TYP_POP_VLAN]) { + int i; + + for (i = 0; i < attr->vlans.count; i++) { + if (i || + action_type_set[DR_ACTION_TYP_TNL_L2_TO_L2] || + action_type_set[DR_ACTION_TYP_TNL_L3_TO_L2]) + dr_ste_v0_arr_init_next(&last_ste, + added_stes, + DR_STE_TYPE_RX, + attr->gvmi); + + dr_ste_v0_set_rx_pop_vlan(last_ste); + } + } + + if (action_type_set[DR_ACTION_TYP_MODIFY_HDR] && attr->modify_actions) { + if (dr_ste_v0_get_entry_type(last_ste) == DR_STE_TYPE_MODIFY_PKT) + dr_ste_v0_arr_init_next(&last_ste, + added_stes, + DR_STE_TYPE_MODIFY_PKT, + attr->gvmi); + else + dr_ste_v0_set_entry_type(last_ste, DR_STE_TYPE_MODIFY_PKT); + + dr_ste_v0_set_rewrite_actions(last_ste, + attr->modify_actions, + attr->modify_index); + } + + if (action_type_set[DR_ACTION_TYP_TAG]) { + if (dr_ste_v0_get_entry_type(last_ste) == DR_STE_TYPE_MODIFY_PKT) + dr_ste_v0_arr_init_next(&last_ste, + added_stes, + DR_STE_TYPE_RX, + attr->gvmi); + + dr_ste_v0_rx_set_flow_tag(last_ste, attr->flow_tag); + } + + dr_ste_v0_set_hit_gvmi(last_ste, attr->hit_gvmi); + dr_ste_v0_set_hit_addr(last_ste, attr->final_icm_addr, 1); +} + +static void dr_ste_v0_set_action_set(u8 *hw_action, + u8 hw_field, + u8 shifter, + u8 length, + u32 data) +{ + length = (length == 32) ? 0 : length; + MLX5_SET(dr_action_hw_set, hw_action, opcode, DR_STE_ACTION_MDFY_OP_SET); + MLX5_SET(dr_action_hw_set, hw_action, destination_field_code, hw_field); + MLX5_SET(dr_action_hw_set, hw_action, destination_left_shifter, shifter); + MLX5_SET(dr_action_hw_set, hw_action, destination_length, length); + MLX5_SET(dr_action_hw_set, hw_action, inline_data, data); +} + +static void dr_ste_v0_set_action_add(u8 *hw_action, + u8 hw_field, + u8 shifter, + u8 length, + u32 data) +{ + length = (length == 32) ? 0 : length; + MLX5_SET(dr_action_hw_set, hw_action, opcode, DR_STE_ACTION_MDFY_OP_ADD); + MLX5_SET(dr_action_hw_set, hw_action, destination_field_code, hw_field); + MLX5_SET(dr_action_hw_set, hw_action, destination_left_shifter, shifter); + MLX5_SET(dr_action_hw_set, hw_action, destination_length, length); + MLX5_SET(dr_action_hw_set, hw_action, inline_data, data); +} + +static void dr_ste_v0_set_action_copy(u8 *hw_action, + u8 dst_hw_field, + u8 dst_shifter, + u8 dst_len, + u8 src_hw_field, + u8 src_shifter) +{ + MLX5_SET(dr_action_hw_copy, hw_action, opcode, DR_STE_ACTION_MDFY_OP_COPY); + MLX5_SET(dr_action_hw_copy, hw_action, destination_field_code, dst_hw_field); + MLX5_SET(dr_action_hw_copy, hw_action, destination_left_shifter, dst_shifter); + MLX5_SET(dr_action_hw_copy, hw_action, destination_length, dst_len); + MLX5_SET(dr_action_hw_copy, hw_action, source_field_code, src_hw_field); + MLX5_SET(dr_action_hw_copy, hw_action, source_left_shifter, src_shifter); +} + +#define DR_STE_DECAP_L3_MIN_ACTION_NUM 5 + +static int +dr_ste_v0_set_action_decap_l3_list(void *data, u32 data_sz, + u8 *hw_action, u32 hw_action_sz, + u16 *used_hw_action_num) +{ + struct mlx5_ifc_l2_hdr_bits *l2_hdr = data; + u32 hw_action_num; + int required_actions; + u32 hdr_fld_4b; + u16 hdr_fld_2b; + u16 vlan_type; + bool vlan; + + vlan = (data_sz != HDR_LEN_L2); + hw_action_num = hw_action_sz / MLX5_ST_SZ_BYTES(dr_action_hw_set); + required_actions = DR_STE_DECAP_L3_MIN_ACTION_NUM + !!vlan; + + if (hw_action_num < required_actions) + return -ENOMEM; + + /* dmac_47_16 */ + MLX5_SET(dr_action_hw_set, hw_action, + opcode, DR_STE_ACTION_MDFY_OP_SET); + MLX5_SET(dr_action_hw_set, hw_action, + destination_length, 0); + MLX5_SET(dr_action_hw_set, hw_action, + destination_field_code, DR_STE_V0_ACTION_MDFY_FLD_L2_0); + MLX5_SET(dr_action_hw_set, hw_action, + destination_left_shifter, 16); + hdr_fld_4b = MLX5_GET(l2_hdr, l2_hdr, dmac_47_16); + MLX5_SET(dr_action_hw_set, hw_action, + inline_data, hdr_fld_4b); + hw_action += MLX5_ST_SZ_BYTES(dr_action_hw_set); + + /* smac_47_16 */ + MLX5_SET(dr_action_hw_set, hw_action, + opcode, DR_STE_ACTION_MDFY_OP_SET); + MLX5_SET(dr_action_hw_set, hw_action, + destination_length, 0); + MLX5_SET(dr_action_hw_set, hw_action, + destination_field_code, DR_STE_V0_ACTION_MDFY_FLD_L2_1); + MLX5_SET(dr_action_hw_set, hw_action, destination_left_shifter, 16); + hdr_fld_4b = (MLX5_GET(l2_hdr, l2_hdr, smac_31_0) >> 16 | + MLX5_GET(l2_hdr, l2_hdr, smac_47_32) << 16); + MLX5_SET(dr_action_hw_set, hw_action, inline_data, hdr_fld_4b); + hw_action += MLX5_ST_SZ_BYTES(dr_action_hw_set); + + /* dmac_15_0 */ + MLX5_SET(dr_action_hw_set, hw_action, + opcode, DR_STE_ACTION_MDFY_OP_SET); + MLX5_SET(dr_action_hw_set, hw_action, + destination_length, 16); + MLX5_SET(dr_action_hw_set, hw_action, + destination_field_code, DR_STE_V0_ACTION_MDFY_FLD_L2_0); + MLX5_SET(dr_action_hw_set, hw_action, + destination_left_shifter, 0); + hdr_fld_2b = MLX5_GET(l2_hdr, l2_hdr, dmac_15_0); + MLX5_SET(dr_action_hw_set, hw_action, + inline_data, hdr_fld_2b); + hw_action += MLX5_ST_SZ_BYTES(dr_action_hw_set); + + /* ethertype + (optional) vlan */ + MLX5_SET(dr_action_hw_set, hw_action, + opcode, DR_STE_ACTION_MDFY_OP_SET); + MLX5_SET(dr_action_hw_set, hw_action, + destination_field_code, DR_STE_V0_ACTION_MDFY_FLD_L2_2); + MLX5_SET(dr_action_hw_set, hw_action, + destination_left_shifter, 32); + if (!vlan) { + hdr_fld_2b = MLX5_GET(l2_hdr, l2_hdr, ethertype); + MLX5_SET(dr_action_hw_set, hw_action, inline_data, hdr_fld_2b); + MLX5_SET(dr_action_hw_set, hw_action, destination_length, 16); + } else { + hdr_fld_2b = MLX5_GET(l2_hdr, l2_hdr, ethertype); + vlan_type = hdr_fld_2b == SVLAN_ETHERTYPE ? DR_STE_SVLAN : DR_STE_CVLAN; + hdr_fld_2b = MLX5_GET(l2_hdr, l2_hdr, vlan); + hdr_fld_4b = (vlan_type << 16) | hdr_fld_2b; + MLX5_SET(dr_action_hw_set, hw_action, inline_data, hdr_fld_4b); + MLX5_SET(dr_action_hw_set, hw_action, destination_length, 18); + } + hw_action += MLX5_ST_SZ_BYTES(dr_action_hw_set); + + /* smac_15_0 */ + MLX5_SET(dr_action_hw_set, hw_action, + opcode, DR_STE_ACTION_MDFY_OP_SET); + MLX5_SET(dr_action_hw_set, hw_action, + destination_length, 16); + MLX5_SET(dr_action_hw_set, hw_action, + destination_field_code, DR_STE_V0_ACTION_MDFY_FLD_L2_1); + MLX5_SET(dr_action_hw_set, hw_action, + destination_left_shifter, 0); + hdr_fld_2b = MLX5_GET(l2_hdr, l2_hdr, smac_31_0); + MLX5_SET(dr_action_hw_set, hw_action, inline_data, hdr_fld_2b); + hw_action += MLX5_ST_SZ_BYTES(dr_action_hw_set); + + if (vlan) { + MLX5_SET(dr_action_hw_set, hw_action, + opcode, DR_STE_ACTION_MDFY_OP_SET); + hdr_fld_2b = MLX5_GET(l2_hdr, l2_hdr, vlan_type); + MLX5_SET(dr_action_hw_set, hw_action, + inline_data, hdr_fld_2b); + MLX5_SET(dr_action_hw_set, hw_action, + destination_length, 16); + MLX5_SET(dr_action_hw_set, hw_action, + destination_field_code, DR_STE_V0_ACTION_MDFY_FLD_L2_2); + MLX5_SET(dr_action_hw_set, hw_action, + destination_left_shifter, 0); + } + + *used_hw_action_num = required_actions; + + return 0; +} + +static void +dr_ste_v0_build_eth_l2_src_dst_bit_mask(struct mlx5dr_match_param *value, + bool inner, u8 *bit_mask) +{ + struct mlx5dr_match_spec *mask = inner ? &value->inner : &value->outer; + + DR_STE_SET_TAG(eth_l2_src_dst, bit_mask, dmac_47_16, mask, dmac_47_16); + DR_STE_SET_TAG(eth_l2_src_dst, bit_mask, dmac_15_0, mask, dmac_15_0); + + if (mask->smac_47_16 || mask->smac_15_0) { + MLX5_SET(ste_eth_l2_src_dst, bit_mask, smac_47_32, + mask->smac_47_16 >> 16); + MLX5_SET(ste_eth_l2_src_dst, bit_mask, smac_31_0, + mask->smac_47_16 << 16 | mask->smac_15_0); + mask->smac_47_16 = 0; + mask->smac_15_0 = 0; + } + + DR_STE_SET_TAG(eth_l2_src_dst, bit_mask, first_vlan_id, mask, first_vid); + DR_STE_SET_TAG(eth_l2_src_dst, bit_mask, first_cfi, mask, first_cfi); + DR_STE_SET_TAG(eth_l2_src_dst, bit_mask, first_priority, mask, first_prio); + DR_STE_SET_ONES(eth_l2_src_dst, bit_mask, l3_type, mask, ip_version); + + if (mask->cvlan_tag) { + MLX5_SET(ste_eth_l2_src_dst, bit_mask, first_vlan_qualifier, -1); + mask->cvlan_tag = 0; + } else if (mask->svlan_tag) { + MLX5_SET(ste_eth_l2_src_dst, bit_mask, first_vlan_qualifier, -1); + mask->svlan_tag = 0; + } +} + +static int +dr_ste_v0_build_eth_l2_src_dst_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *tag) +{ + struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer; + + DR_STE_SET_TAG(eth_l2_src_dst, tag, dmac_47_16, spec, dmac_47_16); + DR_STE_SET_TAG(eth_l2_src_dst, tag, dmac_15_0, spec, dmac_15_0); + + if (spec->smac_47_16 || spec->smac_15_0) { + MLX5_SET(ste_eth_l2_src_dst, tag, smac_47_32, + spec->smac_47_16 >> 16); + MLX5_SET(ste_eth_l2_src_dst, tag, smac_31_0, + spec->smac_47_16 << 16 | spec->smac_15_0); + spec->smac_47_16 = 0; + spec->smac_15_0 = 0; + } + + if (spec->ip_version) { + if (spec->ip_version == IP_VERSION_IPV4) { + MLX5_SET(ste_eth_l2_src_dst, tag, l3_type, STE_IPV4); + spec->ip_version = 0; + } else if (spec->ip_version == IP_VERSION_IPV6) { + MLX5_SET(ste_eth_l2_src_dst, tag, l3_type, STE_IPV6); + spec->ip_version = 0; + } else { + return -EINVAL; + } + } + + DR_STE_SET_TAG(eth_l2_src_dst, tag, first_vlan_id, spec, first_vid); + DR_STE_SET_TAG(eth_l2_src_dst, tag, first_cfi, spec, first_cfi); + DR_STE_SET_TAG(eth_l2_src_dst, tag, first_priority, spec, first_prio); + + if (spec->cvlan_tag) { + MLX5_SET(ste_eth_l2_src_dst, tag, first_vlan_qualifier, DR_STE_CVLAN); + spec->cvlan_tag = 0; + } else if (spec->svlan_tag) { + MLX5_SET(ste_eth_l2_src_dst, tag, first_vlan_qualifier, DR_STE_SVLAN); + spec->svlan_tag = 0; + } + return 0; +} + +static void +dr_ste_v0_build_eth_l2_src_dst_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + dr_ste_v0_build_eth_l2_src_dst_bit_mask(mask, sb->inner, sb->bit_mask); + + sb->lu_type = DR_STE_CALC_LU_TYPE(ETHL2_SRC_DST, sb->rx, sb->inner); + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v0_build_eth_l2_src_dst_tag; +} + +static int +dr_ste_v0_build_eth_l3_ipv6_dst_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *tag) +{ + struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer; + + DR_STE_SET_TAG(eth_l3_ipv6_dst, tag, dst_ip_127_96, spec, dst_ip_127_96); + DR_STE_SET_TAG(eth_l3_ipv6_dst, tag, dst_ip_95_64, spec, dst_ip_95_64); + DR_STE_SET_TAG(eth_l3_ipv6_dst, tag, dst_ip_63_32, spec, dst_ip_63_32); + DR_STE_SET_TAG(eth_l3_ipv6_dst, tag, dst_ip_31_0, spec, dst_ip_31_0); + + return 0; +} + +static void +dr_ste_v0_build_eth_l3_ipv6_dst_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + dr_ste_v0_build_eth_l3_ipv6_dst_tag(mask, sb, sb->bit_mask); + + sb->lu_type = DR_STE_CALC_LU_TYPE(ETHL3_IPV6_DST, sb->rx, sb->inner); + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v0_build_eth_l3_ipv6_dst_tag; +} + +static int +dr_ste_v0_build_eth_l3_ipv6_src_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *tag) +{ + struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer; + + DR_STE_SET_TAG(eth_l3_ipv6_src, tag, src_ip_127_96, spec, src_ip_127_96); + DR_STE_SET_TAG(eth_l3_ipv6_src, tag, src_ip_95_64, spec, src_ip_95_64); + DR_STE_SET_TAG(eth_l3_ipv6_src, tag, src_ip_63_32, spec, src_ip_63_32); + DR_STE_SET_TAG(eth_l3_ipv6_src, tag, src_ip_31_0, spec, src_ip_31_0); + + return 0; +} + +static void +dr_ste_v0_build_eth_l3_ipv6_src_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + dr_ste_v0_build_eth_l3_ipv6_src_tag(mask, sb, sb->bit_mask); + + sb->lu_type = DR_STE_CALC_LU_TYPE(ETHL3_IPV6_SRC, sb->rx, sb->inner); + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v0_build_eth_l3_ipv6_src_tag; +} + +static int +dr_ste_v0_build_eth_l3_ipv4_5_tuple_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *tag) +{ + struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer; + + DR_STE_SET_TAG(eth_l3_ipv4_5_tuple, tag, destination_address, spec, dst_ip_31_0); + DR_STE_SET_TAG(eth_l3_ipv4_5_tuple, tag, source_address, spec, src_ip_31_0); + DR_STE_SET_TAG(eth_l3_ipv4_5_tuple, tag, destination_port, spec, tcp_dport); + DR_STE_SET_TAG(eth_l3_ipv4_5_tuple, tag, destination_port, spec, udp_dport); + DR_STE_SET_TAG(eth_l3_ipv4_5_tuple, tag, source_port, spec, tcp_sport); + DR_STE_SET_TAG(eth_l3_ipv4_5_tuple, tag, source_port, spec, udp_sport); + DR_STE_SET_TAG(eth_l3_ipv4_5_tuple, tag, protocol, spec, ip_protocol); + DR_STE_SET_TAG(eth_l3_ipv4_5_tuple, tag, fragmented, spec, frag); + DR_STE_SET_TAG(eth_l3_ipv4_5_tuple, tag, dscp, spec, ip_dscp); + DR_STE_SET_TAG(eth_l3_ipv4_5_tuple, tag, ecn, spec, ip_ecn); + + if (spec->tcp_flags) { + DR_STE_SET_TCP_FLAGS(eth_l3_ipv4_5_tuple, tag, spec); + spec->tcp_flags = 0; + } + + return 0; +} + +static void +dr_ste_v0_build_eth_l3_ipv4_5_tuple_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + dr_ste_v0_build_eth_l3_ipv4_5_tuple_tag(mask, sb, sb->bit_mask); + + sb->lu_type = DR_STE_CALC_LU_TYPE(ETHL3_IPV4_5_TUPLE, sb->rx, sb->inner); + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v0_build_eth_l3_ipv4_5_tuple_tag; +} + +static void +dr_ste_v0_build_eth_l2_src_or_dst_bit_mask(struct mlx5dr_match_param *value, + bool inner, u8 *bit_mask) +{ + struct mlx5dr_match_spec *mask = inner ? &value->inner : &value->outer; + struct mlx5dr_match_misc *misc_mask = &value->misc; + + DR_STE_SET_TAG(eth_l2_src, bit_mask, first_vlan_id, mask, first_vid); + DR_STE_SET_TAG(eth_l2_src, bit_mask, first_cfi, mask, first_cfi); + DR_STE_SET_TAG(eth_l2_src, bit_mask, first_priority, mask, first_prio); + DR_STE_SET_TAG(eth_l2_src, bit_mask, ip_fragmented, mask, frag); + DR_STE_SET_TAG(eth_l2_src, bit_mask, l3_ethertype, mask, ethertype); + DR_STE_SET_ONES(eth_l2_src, bit_mask, l3_type, mask, ip_version); + + if (mask->svlan_tag || mask->cvlan_tag) { + MLX5_SET(ste_eth_l2_src, bit_mask, first_vlan_qualifier, -1); + mask->cvlan_tag = 0; + mask->svlan_tag = 0; + } + + if (inner) { + if (misc_mask->inner_second_cvlan_tag || + misc_mask->inner_second_svlan_tag) { + MLX5_SET(ste_eth_l2_src, bit_mask, second_vlan_qualifier, -1); + misc_mask->inner_second_cvlan_tag = 0; + misc_mask->inner_second_svlan_tag = 0; + } + + DR_STE_SET_TAG(eth_l2_src, bit_mask, + second_vlan_id, misc_mask, inner_second_vid); + DR_STE_SET_TAG(eth_l2_src, bit_mask, + second_cfi, misc_mask, inner_second_cfi); + DR_STE_SET_TAG(eth_l2_src, bit_mask, + second_priority, misc_mask, inner_second_prio); + } else { + if (misc_mask->outer_second_cvlan_tag || + misc_mask->outer_second_svlan_tag) { + MLX5_SET(ste_eth_l2_src, bit_mask, second_vlan_qualifier, -1); + misc_mask->outer_second_cvlan_tag = 0; + misc_mask->outer_second_svlan_tag = 0; + } + + DR_STE_SET_TAG(eth_l2_src, bit_mask, + second_vlan_id, misc_mask, outer_second_vid); + DR_STE_SET_TAG(eth_l2_src, bit_mask, + second_cfi, misc_mask, outer_second_cfi); + DR_STE_SET_TAG(eth_l2_src, bit_mask, + second_priority, misc_mask, outer_second_prio); + } +} + +static int +dr_ste_v0_build_eth_l2_src_or_dst_tag(struct mlx5dr_match_param *value, + bool inner, u8 *tag) +{ + struct mlx5dr_match_spec *spec = inner ? &value->inner : &value->outer; + struct mlx5dr_match_misc *misc_spec = &value->misc; + + DR_STE_SET_TAG(eth_l2_src, tag, first_vlan_id, spec, first_vid); + DR_STE_SET_TAG(eth_l2_src, tag, first_cfi, spec, first_cfi); + DR_STE_SET_TAG(eth_l2_src, tag, first_priority, spec, first_prio); + DR_STE_SET_TAG(eth_l2_src, tag, ip_fragmented, spec, frag); + DR_STE_SET_TAG(eth_l2_src, tag, l3_ethertype, spec, ethertype); + + if (spec->ip_version) { + if (spec->ip_version == IP_VERSION_IPV4) { + MLX5_SET(ste_eth_l2_src, tag, l3_type, STE_IPV4); + spec->ip_version = 0; + } else if (spec->ip_version == IP_VERSION_IPV6) { + MLX5_SET(ste_eth_l2_src, tag, l3_type, STE_IPV6); + spec->ip_version = 0; + } else { + return -EINVAL; + } + } + + if (spec->cvlan_tag) { + MLX5_SET(ste_eth_l2_src, tag, first_vlan_qualifier, DR_STE_CVLAN); + spec->cvlan_tag = 0; + } else if (spec->svlan_tag) { + MLX5_SET(ste_eth_l2_src, tag, first_vlan_qualifier, DR_STE_SVLAN); + spec->svlan_tag = 0; + } + + if (inner) { + if (misc_spec->inner_second_cvlan_tag) { + MLX5_SET(ste_eth_l2_src, tag, second_vlan_qualifier, DR_STE_CVLAN); + misc_spec->inner_second_cvlan_tag = 0; + } else if (misc_spec->inner_second_svlan_tag) { + MLX5_SET(ste_eth_l2_src, tag, second_vlan_qualifier, DR_STE_SVLAN); + misc_spec->inner_second_svlan_tag = 0; + } + + DR_STE_SET_TAG(eth_l2_src, tag, second_vlan_id, misc_spec, inner_second_vid); + DR_STE_SET_TAG(eth_l2_src, tag, second_cfi, misc_spec, inner_second_cfi); + DR_STE_SET_TAG(eth_l2_src, tag, second_priority, misc_spec, inner_second_prio); + } else { + if (misc_spec->outer_second_cvlan_tag) { + MLX5_SET(ste_eth_l2_src, tag, second_vlan_qualifier, DR_STE_CVLAN); + misc_spec->outer_second_cvlan_tag = 0; + } else if (misc_spec->outer_second_svlan_tag) { + MLX5_SET(ste_eth_l2_src, tag, second_vlan_qualifier, DR_STE_SVLAN); + misc_spec->outer_second_svlan_tag = 0; + } + DR_STE_SET_TAG(eth_l2_src, tag, second_vlan_id, misc_spec, outer_second_vid); + DR_STE_SET_TAG(eth_l2_src, tag, second_cfi, misc_spec, outer_second_cfi); + DR_STE_SET_TAG(eth_l2_src, tag, second_priority, misc_spec, outer_second_prio); + } + + return 0; +} + +static void +dr_ste_v0_build_eth_l2_src_bit_mask(struct mlx5dr_match_param *value, + bool inner, u8 *bit_mask) +{ + struct mlx5dr_match_spec *mask = inner ? &value->inner : &value->outer; + + DR_STE_SET_TAG(eth_l2_src, bit_mask, smac_47_16, mask, smac_47_16); + DR_STE_SET_TAG(eth_l2_src, bit_mask, smac_15_0, mask, smac_15_0); + + dr_ste_v0_build_eth_l2_src_or_dst_bit_mask(value, inner, bit_mask); +} + +static int +dr_ste_v0_build_eth_l2_src_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *tag) +{ + struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer; + + DR_STE_SET_TAG(eth_l2_src, tag, smac_47_16, spec, smac_47_16); + DR_STE_SET_TAG(eth_l2_src, tag, smac_15_0, spec, smac_15_0); + + return dr_ste_v0_build_eth_l2_src_or_dst_tag(value, sb->inner, tag); +} + +static void +dr_ste_v0_build_eth_l2_src_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + dr_ste_v0_build_eth_l2_src_bit_mask(mask, sb->inner, sb->bit_mask); + sb->lu_type = DR_STE_CALC_LU_TYPE(ETHL2_SRC, sb->rx, sb->inner); + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v0_build_eth_l2_src_tag; +} + +static void +dr_ste_v0_build_eth_l2_dst_bit_mask(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *bit_mask) +{ + struct mlx5dr_match_spec *mask = sb->inner ? &value->inner : &value->outer; + + DR_STE_SET_TAG(eth_l2_dst, bit_mask, dmac_47_16, mask, dmac_47_16); + DR_STE_SET_TAG(eth_l2_dst, bit_mask, dmac_15_0, mask, dmac_15_0); + + dr_ste_v0_build_eth_l2_src_or_dst_bit_mask(value, sb->inner, bit_mask); +} + +static int +dr_ste_v0_build_eth_l2_dst_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *tag) +{ + struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer; + + DR_STE_SET_TAG(eth_l2_dst, tag, dmac_47_16, spec, dmac_47_16); + DR_STE_SET_TAG(eth_l2_dst, tag, dmac_15_0, spec, dmac_15_0); + + return dr_ste_v0_build_eth_l2_src_or_dst_tag(value, sb->inner, tag); +} + +static void +dr_ste_v0_build_eth_l2_dst_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + dr_ste_v0_build_eth_l2_dst_bit_mask(mask, sb, sb->bit_mask); + + sb->lu_type = DR_STE_CALC_LU_TYPE(ETHL2_DST, sb->rx, sb->inner); + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v0_build_eth_l2_dst_tag; +} + +static void +dr_ste_v0_build_eth_l2_tnl_bit_mask(struct mlx5dr_match_param *value, + bool inner, u8 *bit_mask) +{ + struct mlx5dr_match_spec *mask = inner ? &value->inner : &value->outer; + struct mlx5dr_match_misc *misc = &value->misc; + + DR_STE_SET_TAG(eth_l2_tnl, bit_mask, dmac_47_16, mask, dmac_47_16); + DR_STE_SET_TAG(eth_l2_tnl, bit_mask, dmac_15_0, mask, dmac_15_0); + DR_STE_SET_TAG(eth_l2_tnl, bit_mask, first_vlan_id, mask, first_vid); + DR_STE_SET_TAG(eth_l2_tnl, bit_mask, first_cfi, mask, first_cfi); + DR_STE_SET_TAG(eth_l2_tnl, bit_mask, first_priority, mask, first_prio); + DR_STE_SET_TAG(eth_l2_tnl, bit_mask, ip_fragmented, mask, frag); + DR_STE_SET_TAG(eth_l2_tnl, bit_mask, l3_ethertype, mask, ethertype); + DR_STE_SET_ONES(eth_l2_tnl, bit_mask, l3_type, mask, ip_version); + + if (misc->vxlan_vni) { + MLX5_SET(ste_eth_l2_tnl, bit_mask, + l2_tunneling_network_id, (misc->vxlan_vni << 8)); + misc->vxlan_vni = 0; + } + + if (mask->svlan_tag || mask->cvlan_tag) { + MLX5_SET(ste_eth_l2_tnl, bit_mask, first_vlan_qualifier, -1); + mask->cvlan_tag = 0; + mask->svlan_tag = 0; + } +} + +static int +dr_ste_v0_build_eth_l2_tnl_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *tag) +{ + struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer; + struct mlx5dr_match_misc *misc = &value->misc; + + DR_STE_SET_TAG(eth_l2_tnl, tag, dmac_47_16, spec, dmac_47_16); + DR_STE_SET_TAG(eth_l2_tnl, tag, dmac_15_0, spec, dmac_15_0); + DR_STE_SET_TAG(eth_l2_tnl, tag, first_vlan_id, spec, first_vid); + DR_STE_SET_TAG(eth_l2_tnl, tag, first_cfi, spec, first_cfi); + DR_STE_SET_TAG(eth_l2_tnl, tag, ip_fragmented, spec, frag); + DR_STE_SET_TAG(eth_l2_tnl, tag, first_priority, spec, first_prio); + DR_STE_SET_TAG(eth_l2_tnl, tag, l3_ethertype, spec, ethertype); + + if (misc->vxlan_vni) { + MLX5_SET(ste_eth_l2_tnl, tag, l2_tunneling_network_id, + (misc->vxlan_vni << 8)); + misc->vxlan_vni = 0; + } + + if (spec->cvlan_tag) { + MLX5_SET(ste_eth_l2_tnl, tag, first_vlan_qualifier, DR_STE_CVLAN); + spec->cvlan_tag = 0; + } else if (spec->svlan_tag) { + MLX5_SET(ste_eth_l2_tnl, tag, first_vlan_qualifier, DR_STE_SVLAN); + spec->svlan_tag = 0; + } + + if (spec->ip_version) { + if (spec->ip_version == IP_VERSION_IPV4) { + MLX5_SET(ste_eth_l2_tnl, tag, l3_type, STE_IPV4); + spec->ip_version = 0; + } else if (spec->ip_version == IP_VERSION_IPV6) { + MLX5_SET(ste_eth_l2_tnl, tag, l3_type, STE_IPV6); + spec->ip_version = 0; + } else { + return -EINVAL; + } + } + + return 0; +} + +static void +dr_ste_v0_build_eth_l2_tnl_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + dr_ste_v0_build_eth_l2_tnl_bit_mask(mask, sb->inner, sb->bit_mask); + + sb->lu_type = DR_STE_V0_LU_TYPE_ETHL2_TUNNELING_I; + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v0_build_eth_l2_tnl_tag; +} + +static int +dr_ste_v0_build_eth_l3_ipv4_misc_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *tag) +{ + struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer; + + DR_STE_SET_TAG(eth_l3_ipv4_misc, tag, time_to_live, spec, ttl_hoplimit); + DR_STE_SET_TAG(eth_l3_ipv4_misc, tag, ihl, spec, ipv4_ihl); + + return 0; +} + +static void +dr_ste_v0_build_eth_l3_ipv4_misc_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + dr_ste_v0_build_eth_l3_ipv4_misc_tag(mask, sb, sb->bit_mask); + + sb->lu_type = DR_STE_CALC_LU_TYPE(ETHL3_IPV4_MISC, sb->rx, sb->inner); + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v0_build_eth_l3_ipv4_misc_tag; +} + +static int +dr_ste_v0_build_eth_ipv6_l3_l4_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *tag) +{ + struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer; + struct mlx5dr_match_misc *misc = &value->misc; + + DR_STE_SET_TAG(eth_l4, tag, dst_port, spec, tcp_dport); + DR_STE_SET_TAG(eth_l4, tag, src_port, spec, tcp_sport); + DR_STE_SET_TAG(eth_l4, tag, dst_port, spec, udp_dport); + DR_STE_SET_TAG(eth_l4, tag, src_port, spec, udp_sport); + DR_STE_SET_TAG(eth_l4, tag, protocol, spec, ip_protocol); + DR_STE_SET_TAG(eth_l4, tag, fragmented, spec, frag); + DR_STE_SET_TAG(eth_l4, tag, dscp, spec, ip_dscp); + DR_STE_SET_TAG(eth_l4, tag, ecn, spec, ip_ecn); + DR_STE_SET_TAG(eth_l4, tag, ipv6_hop_limit, spec, ttl_hoplimit); + + if (sb->inner) + DR_STE_SET_TAG(eth_l4, tag, flow_label, misc, inner_ipv6_flow_label); + else + DR_STE_SET_TAG(eth_l4, tag, flow_label, misc, outer_ipv6_flow_label); + + if (spec->tcp_flags) { + DR_STE_SET_TCP_FLAGS(eth_l4, tag, spec); + spec->tcp_flags = 0; + } + + return 0; +} + +static void +dr_ste_v0_build_eth_ipv6_l3_l4_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + dr_ste_v0_build_eth_ipv6_l3_l4_tag(mask, sb, sb->bit_mask); + + sb->lu_type = DR_STE_CALC_LU_TYPE(ETHL4, sb->rx, sb->inner); + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v0_build_eth_ipv6_l3_l4_tag; +} + +static int +dr_ste_v0_build_mpls_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *tag) +{ + struct mlx5dr_match_misc2 *misc2 = &value->misc2; + + if (sb->inner) + DR_STE_SET_MPLS(mpls, misc2, inner, tag); + else + DR_STE_SET_MPLS(mpls, misc2, outer, tag); + + return 0; +} + +static void +dr_ste_v0_build_mpls_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + dr_ste_v0_build_mpls_tag(mask, sb, sb->bit_mask); + + sb->lu_type = DR_STE_CALC_LU_TYPE(MPLS_FIRST, sb->rx, sb->inner); + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v0_build_mpls_tag; +} + +static int +dr_ste_v0_build_tnl_gre_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *tag) +{ + struct mlx5dr_match_misc *misc = &value->misc; + + DR_STE_SET_TAG(gre, tag, gre_protocol, misc, gre_protocol); + + DR_STE_SET_TAG(gre, tag, gre_k_present, misc, gre_k_present); + DR_STE_SET_TAG(gre, tag, gre_key_h, misc, gre_key_h); + DR_STE_SET_TAG(gre, tag, gre_key_l, misc, gre_key_l); + + DR_STE_SET_TAG(gre, tag, gre_c_present, misc, gre_c_present); + + DR_STE_SET_TAG(gre, tag, gre_s_present, misc, gre_s_present); + + return 0; +} + +static void +dr_ste_v0_build_tnl_gre_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + dr_ste_v0_build_tnl_gre_tag(mask, sb, sb->bit_mask); + + sb->lu_type = DR_STE_V0_LU_TYPE_GRE; + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v0_build_tnl_gre_tag; +} + +static int +dr_ste_v0_build_tnl_mpls_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *tag) +{ + struct mlx5dr_match_misc2 *misc_2 = &value->misc2; + u32 mpls_hdr; + + if (DR_STE_IS_OUTER_MPLS_OVER_GRE_SET(misc_2)) { + mpls_hdr = misc_2->outer_first_mpls_over_gre_label << HDR_MPLS_OFFSET_LABEL; + misc_2->outer_first_mpls_over_gre_label = 0; + mpls_hdr |= misc_2->outer_first_mpls_over_gre_exp << HDR_MPLS_OFFSET_EXP; + misc_2->outer_first_mpls_over_gre_exp = 0; + mpls_hdr |= misc_2->outer_first_mpls_over_gre_s_bos << HDR_MPLS_OFFSET_S_BOS; + misc_2->outer_first_mpls_over_gre_s_bos = 0; + mpls_hdr |= misc_2->outer_first_mpls_over_gre_ttl << HDR_MPLS_OFFSET_TTL; + misc_2->outer_first_mpls_over_gre_ttl = 0; + } else { + mpls_hdr = misc_2->outer_first_mpls_over_udp_label << HDR_MPLS_OFFSET_LABEL; + misc_2->outer_first_mpls_over_udp_label = 0; + mpls_hdr |= misc_2->outer_first_mpls_over_udp_exp << HDR_MPLS_OFFSET_EXP; + misc_2->outer_first_mpls_over_udp_exp = 0; + mpls_hdr |= misc_2->outer_first_mpls_over_udp_s_bos << HDR_MPLS_OFFSET_S_BOS; + misc_2->outer_first_mpls_over_udp_s_bos = 0; + mpls_hdr |= misc_2->outer_first_mpls_over_udp_ttl << HDR_MPLS_OFFSET_TTL; + misc_2->outer_first_mpls_over_udp_ttl = 0; + } + + MLX5_SET(ste_flex_parser_0, tag, flex_parser_3, mpls_hdr); + return 0; +} + +static void +dr_ste_v0_build_tnl_mpls_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + dr_ste_v0_build_tnl_mpls_tag(mask, sb, sb->bit_mask); + + sb->lu_type = DR_STE_V0_LU_TYPE_FLEX_PARSER_0; + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v0_build_tnl_mpls_tag; +} + +static int +dr_ste_v0_build_tnl_mpls_over_udp_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *tag) +{ + struct mlx5dr_match_misc2 *misc2 = &value->misc2; + u8 *parser_ptr; + u8 parser_id; + u32 mpls_hdr; + + mpls_hdr = misc2->outer_first_mpls_over_udp_label << HDR_MPLS_OFFSET_LABEL; + misc2->outer_first_mpls_over_udp_label = 0; + mpls_hdr |= misc2->outer_first_mpls_over_udp_exp << HDR_MPLS_OFFSET_EXP; + misc2->outer_first_mpls_over_udp_exp = 0; + mpls_hdr |= misc2->outer_first_mpls_over_udp_s_bos << HDR_MPLS_OFFSET_S_BOS; + misc2->outer_first_mpls_over_udp_s_bos = 0; + mpls_hdr |= misc2->outer_first_mpls_over_udp_ttl << HDR_MPLS_OFFSET_TTL; + misc2->outer_first_mpls_over_udp_ttl = 0; + + parser_id = sb->caps->flex_parser_id_mpls_over_udp; + parser_ptr = dr_ste_calc_flex_parser_offset(tag, parser_id); + *(__be32 *)parser_ptr = cpu_to_be32(mpls_hdr); + + return 0; +} + +static void +dr_ste_v0_build_tnl_mpls_over_udp_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + dr_ste_v0_build_tnl_mpls_over_udp_tag(mask, sb, sb->bit_mask); + /* STEs with lookup type FLEX_PARSER_{0/1} includes + * flex parsers_{0-3}/{4-7} respectively. + */ + sb->lu_type = sb->caps->flex_parser_id_mpls_over_udp > DR_STE_MAX_FLEX_0_ID ? + DR_STE_V0_LU_TYPE_FLEX_PARSER_1 : + DR_STE_V0_LU_TYPE_FLEX_PARSER_0; + + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v0_build_tnl_mpls_over_udp_tag; +} + +static int +dr_ste_v0_build_tnl_mpls_over_gre_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *tag) +{ + struct mlx5dr_match_misc2 *misc2 = &value->misc2; + u8 *parser_ptr; + u8 parser_id; + u32 mpls_hdr; + + mpls_hdr = misc2->outer_first_mpls_over_gre_label << HDR_MPLS_OFFSET_LABEL; + misc2->outer_first_mpls_over_gre_label = 0; + mpls_hdr |= misc2->outer_first_mpls_over_gre_exp << HDR_MPLS_OFFSET_EXP; + misc2->outer_first_mpls_over_gre_exp = 0; + mpls_hdr |= misc2->outer_first_mpls_over_gre_s_bos << HDR_MPLS_OFFSET_S_BOS; + misc2->outer_first_mpls_over_gre_s_bos = 0; + mpls_hdr |= misc2->outer_first_mpls_over_gre_ttl << HDR_MPLS_OFFSET_TTL; + misc2->outer_first_mpls_over_gre_ttl = 0; + + parser_id = sb->caps->flex_parser_id_mpls_over_gre; + parser_ptr = dr_ste_calc_flex_parser_offset(tag, parser_id); + *(__be32 *)parser_ptr = cpu_to_be32(mpls_hdr); + + return 0; +} + +static void +dr_ste_v0_build_tnl_mpls_over_gre_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + dr_ste_v0_build_tnl_mpls_over_gre_tag(mask, sb, sb->bit_mask); + + /* STEs with lookup type FLEX_PARSER_{0/1} includes + * flex parsers_{0-3}/{4-7} respectively. + */ + sb->lu_type = sb->caps->flex_parser_id_mpls_over_gre > DR_STE_MAX_FLEX_0_ID ? + DR_STE_V0_LU_TYPE_FLEX_PARSER_1 : + DR_STE_V0_LU_TYPE_FLEX_PARSER_0; + + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v0_build_tnl_mpls_over_gre_tag; +} + +#define ICMP_TYPE_OFFSET_FIRST_DW 24 +#define ICMP_CODE_OFFSET_FIRST_DW 16 + +static int +dr_ste_v0_build_icmp_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *tag) +{ + struct mlx5dr_match_misc3 *misc_3 = &value->misc3; + u32 *icmp_header_data; + int dw0_location; + int dw1_location; + u8 *parser_ptr; + u8 *icmp_type; + u8 *icmp_code; + bool is_ipv4; + u32 icmp_hdr; + + is_ipv4 = DR_MASK_IS_ICMPV4_SET(misc_3); + if (is_ipv4) { + icmp_header_data = &misc_3->icmpv4_header_data; + icmp_type = &misc_3->icmpv4_type; + icmp_code = &misc_3->icmpv4_code; + dw0_location = sb->caps->flex_parser_id_icmp_dw0; + dw1_location = sb->caps->flex_parser_id_icmp_dw1; + } else { + icmp_header_data = &misc_3->icmpv6_header_data; + icmp_type = &misc_3->icmpv6_type; + icmp_code = &misc_3->icmpv6_code; + dw0_location = sb->caps->flex_parser_id_icmpv6_dw0; + dw1_location = sb->caps->flex_parser_id_icmpv6_dw1; + } + + parser_ptr = dr_ste_calc_flex_parser_offset(tag, dw0_location); + icmp_hdr = (*icmp_type << ICMP_TYPE_OFFSET_FIRST_DW) | + (*icmp_code << ICMP_CODE_OFFSET_FIRST_DW); + *(__be32 *)parser_ptr = cpu_to_be32(icmp_hdr); + *icmp_code = 0; + *icmp_type = 0; + + parser_ptr = dr_ste_calc_flex_parser_offset(tag, dw1_location); + *(__be32 *)parser_ptr = cpu_to_be32(*icmp_header_data); + *icmp_header_data = 0; + + return 0; +} + +static void +dr_ste_v0_build_icmp_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + u8 parser_id; + bool is_ipv4; + + dr_ste_v0_build_icmp_tag(mask, sb, sb->bit_mask); + + /* STEs with lookup type FLEX_PARSER_{0/1} includes + * flex parsers_{0-3}/{4-7} respectively. + */ + is_ipv4 = DR_MASK_IS_ICMPV4_SET(&mask->misc3); + parser_id = is_ipv4 ? sb->caps->flex_parser_id_icmp_dw0 : + sb->caps->flex_parser_id_icmpv6_dw0; + sb->lu_type = parser_id > DR_STE_MAX_FLEX_0_ID ? + DR_STE_V0_LU_TYPE_FLEX_PARSER_1 : + DR_STE_V0_LU_TYPE_FLEX_PARSER_0; + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v0_build_icmp_tag; +} + +static int +dr_ste_v0_build_general_purpose_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *tag) +{ + struct mlx5dr_match_misc2 *misc_2 = &value->misc2; + + DR_STE_SET_TAG(general_purpose, tag, general_purpose_lookup_field, + misc_2, metadata_reg_a); + + return 0; +} + +static void +dr_ste_v0_build_general_purpose_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + dr_ste_v0_build_general_purpose_tag(mask, sb, sb->bit_mask); + + sb->lu_type = DR_STE_V0_LU_TYPE_GENERAL_PURPOSE; + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v0_build_general_purpose_tag; +} + +static int +dr_ste_v0_build_eth_l4_misc_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *tag) +{ + struct mlx5dr_match_misc3 *misc3 = &value->misc3; + + if (sb->inner) { + DR_STE_SET_TAG(eth_l4_misc, tag, seq_num, misc3, inner_tcp_seq_num); + DR_STE_SET_TAG(eth_l4_misc, tag, ack_num, misc3, inner_tcp_ack_num); + } else { + DR_STE_SET_TAG(eth_l4_misc, tag, seq_num, misc3, outer_tcp_seq_num); + DR_STE_SET_TAG(eth_l4_misc, tag, ack_num, misc3, outer_tcp_ack_num); + } + + return 0; +} + +static void +dr_ste_v0_build_eth_l4_misc_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + dr_ste_v0_build_eth_l4_misc_tag(mask, sb, sb->bit_mask); + + sb->lu_type = DR_STE_CALC_LU_TYPE(ETHL4_MISC, sb->rx, sb->inner); + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v0_build_eth_l4_misc_tag; +} + +static int +dr_ste_v0_build_flex_parser_tnl_vxlan_gpe_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *tag) +{ + struct mlx5dr_match_misc3 *misc3 = &value->misc3; + + DR_STE_SET_TAG(flex_parser_tnl_vxlan_gpe, tag, + outer_vxlan_gpe_flags, misc3, + outer_vxlan_gpe_flags); + DR_STE_SET_TAG(flex_parser_tnl_vxlan_gpe, tag, + outer_vxlan_gpe_next_protocol, misc3, + outer_vxlan_gpe_next_protocol); + DR_STE_SET_TAG(flex_parser_tnl_vxlan_gpe, tag, + outer_vxlan_gpe_vni, misc3, + outer_vxlan_gpe_vni); + + return 0; +} + +static void +dr_ste_v0_build_flex_parser_tnl_vxlan_gpe_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + dr_ste_v0_build_flex_parser_tnl_vxlan_gpe_tag(mask, sb, sb->bit_mask); + sb->lu_type = DR_STE_V0_LU_TYPE_FLEX_PARSER_TNL_HEADER; + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v0_build_flex_parser_tnl_vxlan_gpe_tag; +} + +static int +dr_ste_v0_build_flex_parser_tnl_geneve_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *tag) +{ + struct mlx5dr_match_misc *misc = &value->misc; + + DR_STE_SET_TAG(flex_parser_tnl_geneve, tag, + geneve_protocol_type, misc, geneve_protocol_type); + DR_STE_SET_TAG(flex_parser_tnl_geneve, tag, + geneve_oam, misc, geneve_oam); + DR_STE_SET_TAG(flex_parser_tnl_geneve, tag, + geneve_opt_len, misc, geneve_opt_len); + DR_STE_SET_TAG(flex_parser_tnl_geneve, tag, + geneve_vni, misc, geneve_vni); + + return 0; +} + +static void +dr_ste_v0_build_flex_parser_tnl_geneve_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + dr_ste_v0_build_flex_parser_tnl_geneve_tag(mask, sb, sb->bit_mask); + sb->lu_type = DR_STE_V0_LU_TYPE_FLEX_PARSER_TNL_HEADER; + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v0_build_flex_parser_tnl_geneve_tag; +} + +static int +dr_ste_v0_build_register_0_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *tag) +{ + struct mlx5dr_match_misc2 *misc2 = &value->misc2; + + DR_STE_SET_TAG(register_0, tag, register_0_h, misc2, metadata_reg_c_0); + DR_STE_SET_TAG(register_0, tag, register_0_l, misc2, metadata_reg_c_1); + DR_STE_SET_TAG(register_0, tag, register_1_h, misc2, metadata_reg_c_2); + DR_STE_SET_TAG(register_0, tag, register_1_l, misc2, metadata_reg_c_3); + + return 0; +} + +static void +dr_ste_v0_build_register_0_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + dr_ste_v0_build_register_0_tag(mask, sb, sb->bit_mask); + + sb->lu_type = DR_STE_V0_LU_TYPE_STEERING_REGISTERS_0; + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v0_build_register_0_tag; +} + +static int +dr_ste_v0_build_register_1_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *tag) +{ + struct mlx5dr_match_misc2 *misc2 = &value->misc2; + + DR_STE_SET_TAG(register_1, tag, register_2_h, misc2, metadata_reg_c_4); + DR_STE_SET_TAG(register_1, tag, register_2_l, misc2, metadata_reg_c_5); + DR_STE_SET_TAG(register_1, tag, register_3_h, misc2, metadata_reg_c_6); + DR_STE_SET_TAG(register_1, tag, register_3_l, misc2, metadata_reg_c_7); + + return 0; +} + +static void +dr_ste_v0_build_register_1_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + dr_ste_v0_build_register_1_tag(mask, sb, sb->bit_mask); + + sb->lu_type = DR_STE_V0_LU_TYPE_STEERING_REGISTERS_1; + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v0_build_register_1_tag; +} + +static void +dr_ste_v0_build_src_gvmi_qpn_bit_mask(struct mlx5dr_match_param *value, + u8 *bit_mask) +{ + struct mlx5dr_match_misc *misc_mask = &value->misc; + + DR_STE_SET_ONES(src_gvmi_qp, bit_mask, source_gvmi, misc_mask, source_port); + DR_STE_SET_ONES(src_gvmi_qp, bit_mask, source_qp, misc_mask, source_sqn); + misc_mask->source_eswitch_owner_vhca_id = 0; +} + +static int +dr_ste_v0_build_src_gvmi_qpn_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *tag) +{ + struct mlx5dr_match_misc *misc = &value->misc; + struct mlx5dr_cmd_vport_cap *vport_cap; + struct mlx5dr_domain *dmn = sb->dmn; + struct mlx5dr_domain *vport_dmn; + u8 *bit_mask = sb->bit_mask; + bool source_gvmi_set; + + DR_STE_SET_TAG(src_gvmi_qp, tag, source_qp, misc, source_sqn); + + if (sb->vhca_id_valid) { + /* Find port GVMI based on the eswitch_owner_vhca_id */ + if (misc->source_eswitch_owner_vhca_id == dmn->info.caps.gvmi) + vport_dmn = dmn; + else if (dmn->peer_dmn && (misc->source_eswitch_owner_vhca_id == + dmn->peer_dmn->info.caps.gvmi)) + vport_dmn = dmn->peer_dmn; + else + return -EINVAL; + + misc->source_eswitch_owner_vhca_id = 0; + } else { + vport_dmn = dmn; + } + + source_gvmi_set = MLX5_GET(ste_src_gvmi_qp, bit_mask, source_gvmi); + if (source_gvmi_set) { + vport_cap = mlx5dr_domain_get_vport_cap(vport_dmn, + misc->source_port); + if (!vport_cap) { + mlx5dr_err(dmn, "Vport 0x%x is disabled or invalid\n", + misc->source_port); + return -EINVAL; + } + + if (vport_cap->vport_gvmi) + MLX5_SET(ste_src_gvmi_qp, tag, source_gvmi, vport_cap->vport_gvmi); + + misc->source_port = 0; + } + + return 0; +} + +static void +dr_ste_v0_build_src_gvmi_qpn_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + dr_ste_v0_build_src_gvmi_qpn_bit_mask(mask, sb->bit_mask); + + sb->lu_type = DR_STE_V0_LU_TYPE_SRC_GVMI_AND_QP; + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v0_build_src_gvmi_qpn_tag; +} + +static void dr_ste_v0_set_flex_parser(u32 *misc4_field_id, + u32 *misc4_field_value, + bool *parser_is_used, + u8 *tag) +{ + u32 id = *misc4_field_id; + u8 *parser_ptr; + + if (id >= DR_NUM_OF_FLEX_PARSERS || parser_is_used[id]) + return; + + parser_is_used[id] = true; + parser_ptr = dr_ste_calc_flex_parser_offset(tag, id); + + *(__be32 *)parser_ptr = cpu_to_be32(*misc4_field_value); + *misc4_field_id = 0; + *misc4_field_value = 0; +} + +static int dr_ste_v0_build_flex_parser_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *tag) +{ + struct mlx5dr_match_misc4 *misc_4_mask = &value->misc4; + bool parser_is_used[DR_NUM_OF_FLEX_PARSERS] = {}; + + dr_ste_v0_set_flex_parser(&misc_4_mask->prog_sample_field_id_0, + &misc_4_mask->prog_sample_field_value_0, + parser_is_used, tag); + + dr_ste_v0_set_flex_parser(&misc_4_mask->prog_sample_field_id_1, + &misc_4_mask->prog_sample_field_value_1, + parser_is_used, tag); + + dr_ste_v0_set_flex_parser(&misc_4_mask->prog_sample_field_id_2, + &misc_4_mask->prog_sample_field_value_2, + parser_is_used, tag); + + dr_ste_v0_set_flex_parser(&misc_4_mask->prog_sample_field_id_3, + &misc_4_mask->prog_sample_field_value_3, + parser_is_used, tag); + + return 0; +} + +static void dr_ste_v0_build_flex_parser_0_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + sb->lu_type = DR_STE_V0_LU_TYPE_FLEX_PARSER_0; + dr_ste_v0_build_flex_parser_tag(mask, sb, sb->bit_mask); + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v0_build_flex_parser_tag; +} + +static void dr_ste_v0_build_flex_parser_1_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + sb->lu_type = DR_STE_V0_LU_TYPE_FLEX_PARSER_1; + dr_ste_v0_build_flex_parser_tag(mask, sb, sb->bit_mask); + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v0_build_flex_parser_tag; +} + +static int +dr_ste_v0_build_flex_parser_tnl_geneve_tlv_opt_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *tag) +{ + struct mlx5dr_match_misc3 *misc3 = &value->misc3; + u8 parser_id = sb->caps->flex_parser_id_geneve_tlv_option_0; + u8 *parser_ptr = dr_ste_calc_flex_parser_offset(tag, parser_id); + + MLX5_SET(ste_flex_parser_0, parser_ptr, flex_parser_3, + misc3->geneve_tlv_option_0_data); + misc3->geneve_tlv_option_0_data = 0; + + return 0; +} + +static void +dr_ste_v0_build_flex_parser_tnl_geneve_tlv_opt_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + dr_ste_v0_build_flex_parser_tnl_geneve_tlv_opt_tag(mask, sb, sb->bit_mask); + + /* STEs with lookup type FLEX_PARSER_{0/1} includes + * flex parsers_{0-3}/{4-7} respectively. + */ + sb->lu_type = sb->caps->flex_parser_id_geneve_tlv_option_0 > 3 ? + DR_STE_V0_LU_TYPE_FLEX_PARSER_1 : + DR_STE_V0_LU_TYPE_FLEX_PARSER_0; + + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v0_build_flex_parser_tnl_geneve_tlv_opt_tag; +} + +static int dr_ste_v0_build_flex_parser_tnl_gtpu_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *tag) +{ + struct mlx5dr_match_misc3 *misc3 = &value->misc3; + + DR_STE_SET_TAG(flex_parser_tnl_gtpu, tag, + gtpu_msg_flags, misc3, + gtpu_msg_flags); + DR_STE_SET_TAG(flex_parser_tnl_gtpu, tag, + gtpu_msg_type, misc3, + gtpu_msg_type); + DR_STE_SET_TAG(flex_parser_tnl_gtpu, tag, + gtpu_teid, misc3, + gtpu_teid); + + return 0; +} + +static void dr_ste_v0_build_flex_parser_tnl_gtpu_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + dr_ste_v0_build_flex_parser_tnl_gtpu_tag(mask, sb, sb->bit_mask); + + sb->lu_type = DR_STE_V0_LU_TYPE_FLEX_PARSER_TNL_HEADER; + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v0_build_flex_parser_tnl_gtpu_tag; +} + +static int +dr_ste_v0_build_tnl_gtpu_flex_parser_0_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *tag) +{ + if (dr_is_flex_parser_0_id(sb->caps->flex_parser_id_gtpu_dw_0)) + DR_STE_SET_FLEX_PARSER_FIELD(tag, gtpu_dw_0, sb->caps, &value->misc3); + if (dr_is_flex_parser_0_id(sb->caps->flex_parser_id_gtpu_teid)) + DR_STE_SET_FLEX_PARSER_FIELD(tag, gtpu_teid, sb->caps, &value->misc3); + if (dr_is_flex_parser_0_id(sb->caps->flex_parser_id_gtpu_dw_2)) + DR_STE_SET_FLEX_PARSER_FIELD(tag, gtpu_dw_2, sb->caps, &value->misc3); + if (dr_is_flex_parser_0_id(sb->caps->flex_parser_id_gtpu_first_ext_dw_0)) + DR_STE_SET_FLEX_PARSER_FIELD(tag, gtpu_first_ext_dw_0, sb->caps, &value->misc3); + return 0; +} + +static void +dr_ste_v0_build_tnl_gtpu_flex_parser_0_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + dr_ste_v0_build_tnl_gtpu_flex_parser_0_tag(mask, sb, sb->bit_mask); + + sb->lu_type = DR_STE_V0_LU_TYPE_FLEX_PARSER_0; + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v0_build_tnl_gtpu_flex_parser_0_tag; +} + +static int +dr_ste_v0_build_tnl_gtpu_flex_parser_1_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *tag) +{ + if (dr_is_flex_parser_1_id(sb->caps->flex_parser_id_gtpu_dw_0)) + DR_STE_SET_FLEX_PARSER_FIELD(tag, gtpu_dw_0, sb->caps, &value->misc3); + if (dr_is_flex_parser_1_id(sb->caps->flex_parser_id_gtpu_teid)) + DR_STE_SET_FLEX_PARSER_FIELD(tag, gtpu_teid, sb->caps, &value->misc3); + if (dr_is_flex_parser_1_id(sb->caps->flex_parser_id_gtpu_dw_2)) + DR_STE_SET_FLEX_PARSER_FIELD(tag, gtpu_dw_2, sb->caps, &value->misc3); + if (dr_is_flex_parser_1_id(sb->caps->flex_parser_id_gtpu_first_ext_dw_0)) + DR_STE_SET_FLEX_PARSER_FIELD(tag, gtpu_first_ext_dw_0, sb->caps, &value->misc3); + return 0; +} + +static void +dr_ste_v0_build_tnl_gtpu_flex_parser_1_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + dr_ste_v0_build_tnl_gtpu_flex_parser_1_tag(mask, sb, sb->bit_mask); + + sb->lu_type = DR_STE_V0_LU_TYPE_FLEX_PARSER_1; + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v0_build_tnl_gtpu_flex_parser_1_tag; +} + +static int dr_ste_v0_build_tnl_header_0_1_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + uint8_t *tag) +{ + struct mlx5dr_match_misc5 *misc5 = &value->misc5; + + DR_STE_SET_TAG(tunnel_header, tag, tunnel_header_0, misc5, tunnel_header_0); + DR_STE_SET_TAG(tunnel_header, tag, tunnel_header_1, misc5, tunnel_header_1); + + return 0; +} + +static void dr_ste_v0_build_tnl_header_0_1_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + sb->lu_type = DR_STE_V0_LU_TYPE_TUNNEL_HEADER; + dr_ste_v0_build_tnl_header_0_1_tag(mask, sb, sb->bit_mask); + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v0_build_tnl_header_0_1_tag; +} + +static struct mlx5dr_ste_ctx ste_ctx_v0 = { + /* Builders */ + .build_eth_l2_src_dst_init = &dr_ste_v0_build_eth_l2_src_dst_init, + .build_eth_l3_ipv6_src_init = &dr_ste_v0_build_eth_l3_ipv6_src_init, + .build_eth_l3_ipv6_dst_init = &dr_ste_v0_build_eth_l3_ipv6_dst_init, + .build_eth_l3_ipv4_5_tuple_init = &dr_ste_v0_build_eth_l3_ipv4_5_tuple_init, + .build_eth_l2_src_init = &dr_ste_v0_build_eth_l2_src_init, + .build_eth_l2_dst_init = &dr_ste_v0_build_eth_l2_dst_init, + .build_eth_l2_tnl_init = &dr_ste_v0_build_eth_l2_tnl_init, + .build_eth_l3_ipv4_misc_init = &dr_ste_v0_build_eth_l3_ipv4_misc_init, + .build_eth_ipv6_l3_l4_init = &dr_ste_v0_build_eth_ipv6_l3_l4_init, + .build_mpls_init = &dr_ste_v0_build_mpls_init, + .build_tnl_gre_init = &dr_ste_v0_build_tnl_gre_init, + .build_tnl_mpls_init = &dr_ste_v0_build_tnl_mpls_init, + .build_tnl_mpls_over_udp_init = &dr_ste_v0_build_tnl_mpls_over_udp_init, + .build_tnl_mpls_over_gre_init = &dr_ste_v0_build_tnl_mpls_over_gre_init, + .build_icmp_init = &dr_ste_v0_build_icmp_init, + .build_general_purpose_init = &dr_ste_v0_build_general_purpose_init, + .build_eth_l4_misc_init = &dr_ste_v0_build_eth_l4_misc_init, + .build_tnl_vxlan_gpe_init = &dr_ste_v0_build_flex_parser_tnl_vxlan_gpe_init, + .build_tnl_geneve_init = &dr_ste_v0_build_flex_parser_tnl_geneve_init, + .build_tnl_geneve_tlv_opt_init = &dr_ste_v0_build_flex_parser_tnl_geneve_tlv_opt_init, + .build_register_0_init = &dr_ste_v0_build_register_0_init, + .build_register_1_init = &dr_ste_v0_build_register_1_init, + .build_src_gvmi_qpn_init = &dr_ste_v0_build_src_gvmi_qpn_init, + .build_flex_parser_0_init = &dr_ste_v0_build_flex_parser_0_init, + .build_flex_parser_1_init = &dr_ste_v0_build_flex_parser_1_init, + .build_tnl_gtpu_init = &dr_ste_v0_build_flex_parser_tnl_gtpu_init, + .build_tnl_header_0_1_init = &dr_ste_v0_build_tnl_header_0_1_init, + .build_tnl_gtpu_flex_parser_0_init = &dr_ste_v0_build_tnl_gtpu_flex_parser_0_init, + .build_tnl_gtpu_flex_parser_1_init = &dr_ste_v0_build_tnl_gtpu_flex_parser_1_init, + + /* Getters and Setters */ + .ste_init = &dr_ste_v0_init, + .set_next_lu_type = &dr_ste_v0_set_next_lu_type, + .get_next_lu_type = &dr_ste_v0_get_next_lu_type, + .set_miss_addr = &dr_ste_v0_set_miss_addr, + .get_miss_addr = &dr_ste_v0_get_miss_addr, + .set_hit_addr = &dr_ste_v0_set_hit_addr, + .set_byte_mask = &dr_ste_v0_set_byte_mask, + .get_byte_mask = &dr_ste_v0_get_byte_mask, + + /* Actions */ + .actions_caps = DR_STE_CTX_ACTION_CAP_NONE, + .set_actions_rx = &dr_ste_v0_set_actions_rx, + .set_actions_tx = &dr_ste_v0_set_actions_tx, + .modify_field_arr_sz = ARRAY_SIZE(dr_ste_v0_action_modify_field_arr), + .modify_field_arr = dr_ste_v0_action_modify_field_arr, + .set_action_set = &dr_ste_v0_set_action_set, + .set_action_add = &dr_ste_v0_set_action_add, + .set_action_copy = &dr_ste_v0_set_action_copy, + .set_action_decap_l3_list = &dr_ste_v0_set_action_decap_l3_list, +}; + +struct mlx5dr_ste_ctx *mlx5dr_ste_get_ctx_v0(void) +{ + return &ste_ctx_v0; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c new file mode 100644 index 000000000..ee677a5c7 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c @@ -0,0 +1,2172 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2020 NVIDIA CORPORATION. All rights reserved. */ + +#include <linux/types.h> +#include "mlx5_ifc_dr_ste_v1.h" +#include "dr_ste_v1.h" + +#define DR_STE_CALC_DFNR_TYPE(lookup_type, inner) \ + ((inner) ? DR_STE_V1_LU_TYPE_##lookup_type##_I : \ + DR_STE_V1_LU_TYPE_##lookup_type##_O) + +enum dr_ste_v1_entry_format { + DR_STE_V1_TYPE_BWC_BYTE = 0x0, + DR_STE_V1_TYPE_BWC_DW = 0x1, + DR_STE_V1_TYPE_MATCH = 0x2, +}; + +/* Lookup type is built from 2B: [ Definer mode 1B ][ Definer index 1B ] */ +enum { + DR_STE_V1_LU_TYPE_NOP = 0x0000, + DR_STE_V1_LU_TYPE_ETHL2_TNL = 0x0002, + DR_STE_V1_LU_TYPE_IBL3_EXT = 0x0102, + DR_STE_V1_LU_TYPE_ETHL2_O = 0x0003, + DR_STE_V1_LU_TYPE_IBL4 = 0x0103, + DR_STE_V1_LU_TYPE_ETHL2_I = 0x0004, + DR_STE_V1_LU_TYPE_SRC_QP_GVMI = 0x0104, + DR_STE_V1_LU_TYPE_ETHL2_SRC_O = 0x0005, + DR_STE_V1_LU_TYPE_ETHL2_HEADERS_O = 0x0105, + DR_STE_V1_LU_TYPE_ETHL2_SRC_I = 0x0006, + DR_STE_V1_LU_TYPE_ETHL2_HEADERS_I = 0x0106, + DR_STE_V1_LU_TYPE_ETHL3_IPV4_5_TUPLE_O = 0x0007, + DR_STE_V1_LU_TYPE_IPV6_DES_O = 0x0107, + DR_STE_V1_LU_TYPE_ETHL3_IPV4_5_TUPLE_I = 0x0008, + DR_STE_V1_LU_TYPE_IPV6_DES_I = 0x0108, + DR_STE_V1_LU_TYPE_ETHL4_O = 0x0009, + DR_STE_V1_LU_TYPE_IPV6_SRC_O = 0x0109, + DR_STE_V1_LU_TYPE_ETHL4_I = 0x000a, + DR_STE_V1_LU_TYPE_IPV6_SRC_I = 0x010a, + DR_STE_V1_LU_TYPE_ETHL2_SRC_DST_O = 0x000b, + DR_STE_V1_LU_TYPE_MPLS_O = 0x010b, + DR_STE_V1_LU_TYPE_ETHL2_SRC_DST_I = 0x000c, + DR_STE_V1_LU_TYPE_MPLS_I = 0x010c, + DR_STE_V1_LU_TYPE_ETHL3_IPV4_MISC_O = 0x000d, + DR_STE_V1_LU_TYPE_GRE = 0x010d, + DR_STE_V1_LU_TYPE_FLEX_PARSER_TNL_HEADER = 0x000e, + DR_STE_V1_LU_TYPE_GENERAL_PURPOSE = 0x010e, + DR_STE_V1_LU_TYPE_ETHL3_IPV4_MISC_I = 0x000f, + DR_STE_V1_LU_TYPE_STEERING_REGISTERS_0 = 0x010f, + DR_STE_V1_LU_TYPE_STEERING_REGISTERS_1 = 0x0110, + DR_STE_V1_LU_TYPE_FLEX_PARSER_OK = 0x0011, + DR_STE_V1_LU_TYPE_FLEX_PARSER_0 = 0x0111, + DR_STE_V1_LU_TYPE_FLEX_PARSER_1 = 0x0112, + DR_STE_V1_LU_TYPE_ETHL4_MISC_O = 0x0113, + DR_STE_V1_LU_TYPE_ETHL4_MISC_I = 0x0114, + DR_STE_V1_LU_TYPE_INVALID = 0x00ff, + DR_STE_V1_LU_TYPE_DONT_CARE = MLX5DR_STE_LU_TYPE_DONT_CARE, +}; + +enum dr_ste_v1_header_anchors { + DR_STE_HEADER_ANCHOR_START_OUTER = 0x00, + DR_STE_HEADER_ANCHOR_1ST_VLAN = 0x02, + DR_STE_HEADER_ANCHOR_IPV6_IPV4 = 0x07, + DR_STE_HEADER_ANCHOR_INNER_MAC = 0x13, + DR_STE_HEADER_ANCHOR_INNER_IPV6_IPV4 = 0x19, +}; + +enum dr_ste_v1_action_size { + DR_STE_ACTION_SINGLE_SZ = 4, + DR_STE_ACTION_DOUBLE_SZ = 8, + DR_STE_ACTION_TRIPLE_SZ = 12, +}; + +enum dr_ste_v1_action_insert_ptr_attr { + DR_STE_V1_ACTION_INSERT_PTR_ATTR_NONE = 0, /* Regular push header (e.g. push vlan) */ + DR_STE_V1_ACTION_INSERT_PTR_ATTR_ENCAP = 1, /* Encapsulation / Tunneling */ + DR_STE_V1_ACTION_INSERT_PTR_ATTR_ESP = 2, /* IPsec */ +}; + +enum dr_ste_v1_action_id { + DR_STE_V1_ACTION_ID_NOP = 0x00, + DR_STE_V1_ACTION_ID_COPY = 0x05, + DR_STE_V1_ACTION_ID_SET = 0x06, + DR_STE_V1_ACTION_ID_ADD = 0x07, + DR_STE_V1_ACTION_ID_REMOVE_BY_SIZE = 0x08, + DR_STE_V1_ACTION_ID_REMOVE_HEADER_TO_HEADER = 0x09, + DR_STE_V1_ACTION_ID_INSERT_INLINE = 0x0a, + DR_STE_V1_ACTION_ID_INSERT_POINTER = 0x0b, + DR_STE_V1_ACTION_ID_FLOW_TAG = 0x0c, + DR_STE_V1_ACTION_ID_QUEUE_ID_SEL = 0x0d, + DR_STE_V1_ACTION_ID_ACCELERATED_LIST = 0x0e, + DR_STE_V1_ACTION_ID_MODIFY_LIST = 0x0f, + DR_STE_V1_ACTION_ID_ASO = 0x12, + DR_STE_V1_ACTION_ID_TRAILER = 0x13, + DR_STE_V1_ACTION_ID_COUNTER_ID = 0x14, + DR_STE_V1_ACTION_ID_MAX = 0x21, + /* use for special cases */ + DR_STE_V1_ACTION_ID_SPECIAL_ENCAP_L3 = 0x22, +}; + +enum { + DR_STE_V1_ACTION_MDFY_FLD_L2_OUT_0 = 0x00, + DR_STE_V1_ACTION_MDFY_FLD_L2_OUT_1 = 0x01, + DR_STE_V1_ACTION_MDFY_FLD_L2_OUT_2 = 0x02, + DR_STE_V1_ACTION_MDFY_FLD_SRC_L2_OUT_0 = 0x08, + DR_STE_V1_ACTION_MDFY_FLD_SRC_L2_OUT_1 = 0x09, + DR_STE_V1_ACTION_MDFY_FLD_L3_OUT_0 = 0x0e, + DR_STE_V1_ACTION_MDFY_FLD_L4_OUT_0 = 0x18, + DR_STE_V1_ACTION_MDFY_FLD_L4_OUT_1 = 0x19, + DR_STE_V1_ACTION_MDFY_FLD_IPV4_OUT_0 = 0x40, + DR_STE_V1_ACTION_MDFY_FLD_IPV4_OUT_1 = 0x41, + DR_STE_V1_ACTION_MDFY_FLD_IPV6_DST_OUT_0 = 0x44, + DR_STE_V1_ACTION_MDFY_FLD_IPV6_DST_OUT_1 = 0x45, + DR_STE_V1_ACTION_MDFY_FLD_IPV6_DST_OUT_2 = 0x46, + DR_STE_V1_ACTION_MDFY_FLD_IPV6_DST_OUT_3 = 0x47, + DR_STE_V1_ACTION_MDFY_FLD_IPV6_SRC_OUT_0 = 0x4c, + DR_STE_V1_ACTION_MDFY_FLD_IPV6_SRC_OUT_1 = 0x4d, + DR_STE_V1_ACTION_MDFY_FLD_IPV6_SRC_OUT_2 = 0x4e, + DR_STE_V1_ACTION_MDFY_FLD_IPV6_SRC_OUT_3 = 0x4f, + DR_STE_V1_ACTION_MDFY_FLD_TCP_MISC_0 = 0x5e, + DR_STE_V1_ACTION_MDFY_FLD_TCP_MISC_1 = 0x5f, + DR_STE_V1_ACTION_MDFY_FLD_CFG_HDR_0_0 = 0x6f, + DR_STE_V1_ACTION_MDFY_FLD_CFG_HDR_0_1 = 0x70, + DR_STE_V1_ACTION_MDFY_FLD_METADATA_2_CQE = 0x7b, + DR_STE_V1_ACTION_MDFY_FLD_GNRL_PURPOSE = 0x7c, + DR_STE_V1_ACTION_MDFY_FLD_REGISTER_2_0 = 0x8c, + DR_STE_V1_ACTION_MDFY_FLD_REGISTER_2_1 = 0x8d, + DR_STE_V1_ACTION_MDFY_FLD_REGISTER_1_0 = 0x8e, + DR_STE_V1_ACTION_MDFY_FLD_REGISTER_1_1 = 0x8f, + DR_STE_V1_ACTION_MDFY_FLD_REGISTER_0_0 = 0x90, + DR_STE_V1_ACTION_MDFY_FLD_REGISTER_0_1 = 0x91, +}; + +enum dr_ste_v1_aso_ctx_type { + DR_STE_V1_ASO_CTX_TYPE_POLICERS = 0x2, +}; + +static const struct mlx5dr_ste_action_modify_field dr_ste_v1_action_modify_field_arr[] = { + [MLX5_ACTION_IN_FIELD_OUT_SMAC_47_16] = { + .hw_field = DR_STE_V1_ACTION_MDFY_FLD_SRC_L2_OUT_0, .start = 0, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_OUT_SMAC_15_0] = { + .hw_field = DR_STE_V1_ACTION_MDFY_FLD_SRC_L2_OUT_1, .start = 16, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_OUT_ETHERTYPE] = { + .hw_field = DR_STE_V1_ACTION_MDFY_FLD_L2_OUT_1, .start = 0, .end = 15, + }, + [MLX5_ACTION_IN_FIELD_OUT_DMAC_47_16] = { + .hw_field = DR_STE_V1_ACTION_MDFY_FLD_L2_OUT_0, .start = 0, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_OUT_DMAC_15_0] = { + .hw_field = DR_STE_V1_ACTION_MDFY_FLD_L2_OUT_1, .start = 16, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_OUT_IP_DSCP] = { + .hw_field = DR_STE_V1_ACTION_MDFY_FLD_L3_OUT_0, .start = 18, .end = 23, + }, + [MLX5_ACTION_IN_FIELD_OUT_TCP_FLAGS] = { + .hw_field = DR_STE_V1_ACTION_MDFY_FLD_L4_OUT_1, .start = 16, .end = 24, + .l4_type = DR_STE_ACTION_MDFY_TYPE_L4_TCP, + }, + [MLX5_ACTION_IN_FIELD_OUT_TCP_SPORT] = { + .hw_field = DR_STE_V1_ACTION_MDFY_FLD_L4_OUT_0, .start = 16, .end = 31, + .l4_type = DR_STE_ACTION_MDFY_TYPE_L4_TCP, + }, + [MLX5_ACTION_IN_FIELD_OUT_TCP_DPORT] = { + .hw_field = DR_STE_V1_ACTION_MDFY_FLD_L4_OUT_0, .start = 0, .end = 15, + .l4_type = DR_STE_ACTION_MDFY_TYPE_L4_TCP, + }, + [MLX5_ACTION_IN_FIELD_OUT_IP_TTL] = { + .hw_field = DR_STE_V1_ACTION_MDFY_FLD_L3_OUT_0, .start = 8, .end = 15, + .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV4, + }, + [MLX5_ACTION_IN_FIELD_OUT_IPV6_HOPLIMIT] = { + .hw_field = DR_STE_V1_ACTION_MDFY_FLD_L3_OUT_0, .start = 8, .end = 15, + .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6, + }, + [MLX5_ACTION_IN_FIELD_OUT_UDP_SPORT] = { + .hw_field = DR_STE_V1_ACTION_MDFY_FLD_L4_OUT_0, .start = 16, .end = 31, + .l4_type = DR_STE_ACTION_MDFY_TYPE_L4_UDP, + }, + [MLX5_ACTION_IN_FIELD_OUT_UDP_DPORT] = { + .hw_field = DR_STE_V1_ACTION_MDFY_FLD_L4_OUT_0, .start = 0, .end = 15, + .l4_type = DR_STE_ACTION_MDFY_TYPE_L4_UDP, + }, + [MLX5_ACTION_IN_FIELD_OUT_SIPV6_127_96] = { + .hw_field = DR_STE_V1_ACTION_MDFY_FLD_IPV6_SRC_OUT_0, .start = 0, .end = 31, + .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6, + }, + [MLX5_ACTION_IN_FIELD_OUT_SIPV6_95_64] = { + .hw_field = DR_STE_V1_ACTION_MDFY_FLD_IPV6_SRC_OUT_1, .start = 0, .end = 31, + .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6, + }, + [MLX5_ACTION_IN_FIELD_OUT_SIPV6_63_32] = { + .hw_field = DR_STE_V1_ACTION_MDFY_FLD_IPV6_SRC_OUT_2, .start = 0, .end = 31, + .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6, + }, + [MLX5_ACTION_IN_FIELD_OUT_SIPV6_31_0] = { + .hw_field = DR_STE_V1_ACTION_MDFY_FLD_IPV6_SRC_OUT_3, .start = 0, .end = 31, + .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6, + }, + [MLX5_ACTION_IN_FIELD_OUT_DIPV6_127_96] = { + .hw_field = DR_STE_V1_ACTION_MDFY_FLD_IPV6_DST_OUT_0, .start = 0, .end = 31, + .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6, + }, + [MLX5_ACTION_IN_FIELD_OUT_DIPV6_95_64] = { + .hw_field = DR_STE_V1_ACTION_MDFY_FLD_IPV6_DST_OUT_1, .start = 0, .end = 31, + .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6, + }, + [MLX5_ACTION_IN_FIELD_OUT_DIPV6_63_32] = { + .hw_field = DR_STE_V1_ACTION_MDFY_FLD_IPV6_DST_OUT_2, .start = 0, .end = 31, + .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6, + }, + [MLX5_ACTION_IN_FIELD_OUT_DIPV6_31_0] = { + .hw_field = DR_STE_V1_ACTION_MDFY_FLD_IPV6_DST_OUT_3, .start = 0, .end = 31, + .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6, + }, + [MLX5_ACTION_IN_FIELD_OUT_SIPV4] = { + .hw_field = DR_STE_V1_ACTION_MDFY_FLD_IPV4_OUT_0, .start = 0, .end = 31, + .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV4, + }, + [MLX5_ACTION_IN_FIELD_OUT_DIPV4] = { + .hw_field = DR_STE_V1_ACTION_MDFY_FLD_IPV4_OUT_1, .start = 0, .end = 31, + .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV4, + }, + [MLX5_ACTION_IN_FIELD_METADATA_REG_A] = { + .hw_field = DR_STE_V1_ACTION_MDFY_FLD_GNRL_PURPOSE, .start = 0, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_METADATA_REG_B] = { + .hw_field = DR_STE_V1_ACTION_MDFY_FLD_METADATA_2_CQE, .start = 0, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_METADATA_REG_C_0] = { + .hw_field = DR_STE_V1_ACTION_MDFY_FLD_REGISTER_0_0, .start = 0, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_METADATA_REG_C_1] = { + .hw_field = DR_STE_V1_ACTION_MDFY_FLD_REGISTER_0_1, .start = 0, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_METADATA_REG_C_2] = { + .hw_field = DR_STE_V1_ACTION_MDFY_FLD_REGISTER_1_0, .start = 0, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_METADATA_REG_C_3] = { + .hw_field = DR_STE_V1_ACTION_MDFY_FLD_REGISTER_1_1, .start = 0, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_METADATA_REG_C_4] = { + .hw_field = DR_STE_V1_ACTION_MDFY_FLD_REGISTER_2_0, .start = 0, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_METADATA_REG_C_5] = { + .hw_field = DR_STE_V1_ACTION_MDFY_FLD_REGISTER_2_1, .start = 0, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_OUT_TCP_SEQ_NUM] = { + .hw_field = DR_STE_V1_ACTION_MDFY_FLD_TCP_MISC_0, .start = 0, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_OUT_TCP_ACK_NUM] = { + .hw_field = DR_STE_V1_ACTION_MDFY_FLD_TCP_MISC_1, .start = 0, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_OUT_FIRST_VID] = { + .hw_field = DR_STE_V1_ACTION_MDFY_FLD_L2_OUT_2, .start = 0, .end = 15, + }, + [MLX5_ACTION_IN_FIELD_OUT_EMD_31_0] = { + .hw_field = DR_STE_V1_ACTION_MDFY_FLD_CFG_HDR_0_1, .start = 0, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_OUT_EMD_47_32] = { + .hw_field = DR_STE_V1_ACTION_MDFY_FLD_CFG_HDR_0_0, .start = 0, .end = 15, + }, +}; + +static void dr_ste_v1_set_entry_type(u8 *hw_ste_p, u8 entry_type) +{ + MLX5_SET(ste_match_bwc_v1, hw_ste_p, entry_format, entry_type); +} + +void dr_ste_v1_set_miss_addr(u8 *hw_ste_p, u64 miss_addr) +{ + u64 index = miss_addr >> 6; + + MLX5_SET(ste_match_bwc_v1, hw_ste_p, miss_address_39_32, index >> 26); + MLX5_SET(ste_match_bwc_v1, hw_ste_p, miss_address_31_6, index); +} + +u64 dr_ste_v1_get_miss_addr(u8 *hw_ste_p) +{ + u64 index = + ((u64)MLX5_GET(ste_match_bwc_v1, hw_ste_p, miss_address_31_6) | + ((u64)MLX5_GET(ste_match_bwc_v1, hw_ste_p, miss_address_39_32)) << 26); + + return index << 6; +} + +void dr_ste_v1_set_byte_mask(u8 *hw_ste_p, u16 byte_mask) +{ + MLX5_SET(ste_match_bwc_v1, hw_ste_p, byte_mask, byte_mask); +} + +u16 dr_ste_v1_get_byte_mask(u8 *hw_ste_p) +{ + return MLX5_GET(ste_match_bwc_v1, hw_ste_p, byte_mask); +} + +static void dr_ste_v1_set_lu_type(u8 *hw_ste_p, u16 lu_type) +{ + MLX5_SET(ste_match_bwc_v1, hw_ste_p, entry_format, lu_type >> 8); + MLX5_SET(ste_match_bwc_v1, hw_ste_p, match_definer_ctx_idx, lu_type & 0xFF); +} + +void dr_ste_v1_set_next_lu_type(u8 *hw_ste_p, u16 lu_type) +{ + MLX5_SET(ste_match_bwc_v1, hw_ste_p, next_entry_format, lu_type >> 8); + MLX5_SET(ste_match_bwc_v1, hw_ste_p, hash_definer_ctx_idx, lu_type & 0xFF); +} + +u16 dr_ste_v1_get_next_lu_type(u8 *hw_ste_p) +{ + u8 mode = MLX5_GET(ste_match_bwc_v1, hw_ste_p, next_entry_format); + u8 index = MLX5_GET(ste_match_bwc_v1, hw_ste_p, hash_definer_ctx_idx); + + return (mode << 8 | index); +} + +static void dr_ste_v1_set_hit_gvmi(u8 *hw_ste_p, u16 gvmi) +{ + MLX5_SET(ste_match_bwc_v1, hw_ste_p, next_table_base_63_48, gvmi); +} + +void dr_ste_v1_set_hit_addr(u8 *hw_ste_p, u64 icm_addr, u32 ht_size) +{ + u64 index = (icm_addr >> 5) | ht_size; + + MLX5_SET(ste_match_bwc_v1, hw_ste_p, next_table_base_39_32_size, index >> 27); + MLX5_SET(ste_match_bwc_v1, hw_ste_p, next_table_base_31_5_size, index); +} + +void dr_ste_v1_init(u8 *hw_ste_p, u16 lu_type, bool is_rx, u16 gvmi) +{ + dr_ste_v1_set_lu_type(hw_ste_p, lu_type); + dr_ste_v1_set_next_lu_type(hw_ste_p, MLX5DR_STE_LU_TYPE_DONT_CARE); + + MLX5_SET(ste_match_bwc_v1, hw_ste_p, gvmi, gvmi); + MLX5_SET(ste_match_bwc_v1, hw_ste_p, next_table_base_63_48, gvmi); + MLX5_SET(ste_match_bwc_v1, hw_ste_p, miss_address_63_48, gvmi); +} + +void dr_ste_v1_prepare_for_postsend(u8 *hw_ste_p, u32 ste_size) +{ + u8 *tag = hw_ste_p + DR_STE_SIZE_CTRL; + u8 *mask = tag + DR_STE_SIZE_TAG; + u8 tmp_tag[DR_STE_SIZE_TAG] = {}; + + if (ste_size == DR_STE_SIZE_CTRL) + return; + + WARN_ON(ste_size != DR_STE_SIZE); + + /* Backup tag */ + memcpy(tmp_tag, tag, DR_STE_SIZE_TAG); + + /* Swap mask and tag both are the same size */ + memcpy(tag, mask, DR_STE_SIZE_MASK); + memcpy(mask, tmp_tag, DR_STE_SIZE_TAG); +} + +static void dr_ste_v1_set_rx_flow_tag(u8 *s_action, u32 flow_tag) +{ + MLX5_SET(ste_single_action_flow_tag_v1, s_action, action_id, + DR_STE_V1_ACTION_ID_FLOW_TAG); + MLX5_SET(ste_single_action_flow_tag_v1, s_action, flow_tag, flow_tag); +} + +static void dr_ste_v1_set_counter_id(u8 *hw_ste_p, u32 ctr_id) +{ + MLX5_SET(ste_match_bwc_v1, hw_ste_p, counter_id, ctr_id); +} + +static void dr_ste_v1_set_reparse(u8 *hw_ste_p) +{ + MLX5_SET(ste_match_bwc_v1, hw_ste_p, reparse, 1); +} + +static void dr_ste_v1_set_encap(u8 *hw_ste_p, u8 *d_action, + u32 reformat_id, int size) +{ + MLX5_SET(ste_double_action_insert_with_ptr_v1, d_action, action_id, + DR_STE_V1_ACTION_ID_INSERT_POINTER); + /* The hardware expects here size in words (2 byte) */ + MLX5_SET(ste_double_action_insert_with_ptr_v1, d_action, size, size / 2); + MLX5_SET(ste_double_action_insert_with_ptr_v1, d_action, pointer, reformat_id); + MLX5_SET(ste_double_action_insert_with_ptr_v1, d_action, attributes, + DR_STE_V1_ACTION_INSERT_PTR_ATTR_ENCAP); + dr_ste_v1_set_reparse(hw_ste_p); +} + +static void dr_ste_v1_set_insert_hdr(u8 *hw_ste_p, u8 *d_action, + u32 reformat_id, + u8 anchor, u8 offset, + int size) +{ + MLX5_SET(ste_double_action_insert_with_ptr_v1, d_action, + action_id, DR_STE_V1_ACTION_ID_INSERT_POINTER); + MLX5_SET(ste_double_action_insert_with_ptr_v1, d_action, start_anchor, anchor); + + /* The hardware expects here size and offset in words (2 byte) */ + MLX5_SET(ste_double_action_insert_with_ptr_v1, d_action, size, size / 2); + MLX5_SET(ste_double_action_insert_with_ptr_v1, d_action, start_offset, offset / 2); + + MLX5_SET(ste_double_action_insert_with_ptr_v1, d_action, pointer, reformat_id); + MLX5_SET(ste_double_action_insert_with_ptr_v1, d_action, attributes, + DR_STE_V1_ACTION_INSERT_PTR_ATTR_NONE); + + dr_ste_v1_set_reparse(hw_ste_p); +} + +static void dr_ste_v1_set_remove_hdr(u8 *hw_ste_p, u8 *s_action, + u8 anchor, u8 offset, + int size) +{ + MLX5_SET(ste_single_action_remove_header_size_v1, s_action, + action_id, DR_STE_V1_ACTION_ID_REMOVE_BY_SIZE); + MLX5_SET(ste_single_action_remove_header_size_v1, s_action, start_anchor, anchor); + + /* The hardware expects here size and offset in words (2 byte) */ + MLX5_SET(ste_single_action_remove_header_size_v1, s_action, remove_size, size / 2); + MLX5_SET(ste_single_action_remove_header_size_v1, s_action, start_offset, offset / 2); + + dr_ste_v1_set_reparse(hw_ste_p); +} + +static void dr_ste_v1_set_push_vlan(u8 *hw_ste_p, u8 *d_action, + u32 vlan_hdr) +{ + MLX5_SET(ste_double_action_insert_with_inline_v1, d_action, + action_id, DR_STE_V1_ACTION_ID_INSERT_INLINE); + /* The hardware expects offset to vlan header in words (2 byte) */ + MLX5_SET(ste_double_action_insert_with_inline_v1, d_action, + start_offset, HDR_LEN_L2_MACS >> 1); + MLX5_SET(ste_double_action_insert_with_inline_v1, d_action, + inline_data, vlan_hdr); + + dr_ste_v1_set_reparse(hw_ste_p); +} + +static void dr_ste_v1_set_pop_vlan(u8 *hw_ste_p, u8 *s_action, u8 vlans_num) +{ + MLX5_SET(ste_single_action_remove_header_size_v1, s_action, + action_id, DR_STE_V1_ACTION_ID_REMOVE_BY_SIZE); + MLX5_SET(ste_single_action_remove_header_size_v1, s_action, + start_anchor, DR_STE_HEADER_ANCHOR_1ST_VLAN); + /* The hardware expects here size in words (2 byte) */ + MLX5_SET(ste_single_action_remove_header_size_v1, s_action, + remove_size, (HDR_LEN_L2_VLAN >> 1) * vlans_num); + + dr_ste_v1_set_reparse(hw_ste_p); +} + +static void dr_ste_v1_set_encap_l3(u8 *hw_ste_p, + u8 *frst_s_action, + u8 *scnd_d_action, + u32 reformat_id, + int size) +{ + /* Remove L2 headers */ + MLX5_SET(ste_single_action_remove_header_v1, frst_s_action, action_id, + DR_STE_V1_ACTION_ID_REMOVE_HEADER_TO_HEADER); + MLX5_SET(ste_single_action_remove_header_v1, frst_s_action, end_anchor, + DR_STE_HEADER_ANCHOR_IPV6_IPV4); + + /* Encapsulate with given reformat ID */ + MLX5_SET(ste_double_action_insert_with_ptr_v1, scnd_d_action, action_id, + DR_STE_V1_ACTION_ID_INSERT_POINTER); + /* The hardware expects here size in words (2 byte) */ + MLX5_SET(ste_double_action_insert_with_ptr_v1, scnd_d_action, size, size / 2); + MLX5_SET(ste_double_action_insert_with_ptr_v1, scnd_d_action, pointer, reformat_id); + MLX5_SET(ste_double_action_insert_with_ptr_v1, scnd_d_action, attributes, + DR_STE_V1_ACTION_INSERT_PTR_ATTR_ENCAP); + + dr_ste_v1_set_reparse(hw_ste_p); +} + +static void dr_ste_v1_set_rx_decap(u8 *hw_ste_p, u8 *s_action) +{ + MLX5_SET(ste_single_action_remove_header_v1, s_action, action_id, + DR_STE_V1_ACTION_ID_REMOVE_HEADER_TO_HEADER); + MLX5_SET(ste_single_action_remove_header_v1, s_action, decap, 1); + MLX5_SET(ste_single_action_remove_header_v1, s_action, vni_to_cqe, 1); + MLX5_SET(ste_single_action_remove_header_v1, s_action, end_anchor, + DR_STE_HEADER_ANCHOR_INNER_MAC); + + dr_ste_v1_set_reparse(hw_ste_p); +} + +static void dr_ste_v1_set_rewrite_actions(u8 *hw_ste_p, + u8 *s_action, + u16 num_of_actions, + u32 re_write_index) +{ + MLX5_SET(ste_single_action_modify_list_v1, s_action, action_id, + DR_STE_V1_ACTION_ID_MODIFY_LIST); + MLX5_SET(ste_single_action_modify_list_v1, s_action, num_of_modify_actions, + num_of_actions); + MLX5_SET(ste_single_action_modify_list_v1, s_action, modify_actions_ptr, + re_write_index); + + dr_ste_v1_set_reparse(hw_ste_p); +} + +static void dr_ste_v1_set_aso_flow_meter(u8 *d_action, + u32 object_id, + u32 offset, + u8 dest_reg_id, + u8 init_color) +{ + MLX5_SET(ste_double_action_aso_v1, d_action, action_id, + DR_STE_V1_ACTION_ID_ASO); + MLX5_SET(ste_double_action_aso_v1, d_action, aso_context_number, + object_id + (offset / MLX5DR_ASO_FLOW_METER_NUM_PER_OBJ)); + /* Convert reg_c index to HW 64bit index */ + MLX5_SET(ste_double_action_aso_v1, d_action, dest_reg_id, + (dest_reg_id - 1) / 2); + MLX5_SET(ste_double_action_aso_v1, d_action, aso_context_type, + DR_STE_V1_ASO_CTX_TYPE_POLICERS); + MLX5_SET(ste_double_action_aso_v1, d_action, flow_meter.line_id, + offset % MLX5DR_ASO_FLOW_METER_NUM_PER_OBJ); + MLX5_SET(ste_double_action_aso_v1, d_action, flow_meter.initial_color, + init_color); +} + +static void dr_ste_v1_arr_init_next_match(u8 **last_ste, + u32 *added_stes, + u16 gvmi) +{ + u8 *action; + + (*added_stes)++; + *last_ste += DR_STE_SIZE; + dr_ste_v1_init(*last_ste, MLX5DR_STE_LU_TYPE_DONT_CARE, 0, gvmi); + dr_ste_v1_set_entry_type(*last_ste, DR_STE_V1_TYPE_MATCH); + + action = MLX5_ADDR_OF(ste_mask_and_match_v1, *last_ste, action); + memset(action, 0, MLX5_FLD_SZ_BYTES(ste_mask_and_match_v1, action)); +} + +void dr_ste_v1_set_actions_tx(struct mlx5dr_domain *dmn, + u8 *action_type_set, + u32 actions_caps, + u8 *last_ste, + struct mlx5dr_ste_actions_attr *attr, + u32 *added_stes) +{ + u8 *action = MLX5_ADDR_OF(ste_match_bwc_v1, last_ste, action); + u8 action_sz = DR_STE_ACTION_DOUBLE_SZ; + bool allow_modify_hdr = true; + bool allow_encap = true; + + if (action_type_set[DR_ACTION_TYP_POP_VLAN]) { + if (action_sz < DR_STE_ACTION_SINGLE_SZ) { + dr_ste_v1_arr_init_next_match(&last_ste, added_stes, + attr->gvmi); + action = MLX5_ADDR_OF(ste_mask_and_match_v1, + last_ste, action); + action_sz = DR_STE_ACTION_TRIPLE_SZ; + } + dr_ste_v1_set_pop_vlan(last_ste, action, attr->vlans.count); + action_sz -= DR_STE_ACTION_SINGLE_SZ; + action += DR_STE_ACTION_SINGLE_SZ; + + /* Check if vlan_pop and modify_hdr on same STE is supported */ + if (!(actions_caps & DR_STE_CTX_ACTION_CAP_POP_MDFY)) + allow_modify_hdr = false; + } + + if (action_type_set[DR_ACTION_TYP_CTR]) + dr_ste_v1_set_counter_id(last_ste, attr->ctr_id); + + if (action_type_set[DR_ACTION_TYP_MODIFY_HDR]) { + if (!allow_modify_hdr || action_sz < DR_STE_ACTION_DOUBLE_SZ) { + dr_ste_v1_arr_init_next_match(&last_ste, added_stes, + attr->gvmi); + action = MLX5_ADDR_OF(ste_mask_and_match_v1, + last_ste, action); + action_sz = DR_STE_ACTION_TRIPLE_SZ; + } + dr_ste_v1_set_rewrite_actions(last_ste, action, + attr->modify_actions, + attr->modify_index); + action_sz -= DR_STE_ACTION_DOUBLE_SZ; + action += DR_STE_ACTION_DOUBLE_SZ; + allow_encap = false; + } + + if (action_type_set[DR_ACTION_TYP_PUSH_VLAN]) { + int i; + + for (i = 0; i < attr->vlans.count; i++) { + if (action_sz < DR_STE_ACTION_DOUBLE_SZ || !allow_encap) { + dr_ste_v1_arr_init_next_match(&last_ste, added_stes, attr->gvmi); + action = MLX5_ADDR_OF(ste_mask_and_match_v1, last_ste, action); + action_sz = DR_STE_ACTION_TRIPLE_SZ; + allow_encap = true; + } + dr_ste_v1_set_push_vlan(last_ste, action, + attr->vlans.headers[i]); + action_sz -= DR_STE_ACTION_DOUBLE_SZ; + action += DR_STE_ACTION_DOUBLE_SZ; + } + } + + if (action_type_set[DR_ACTION_TYP_L2_TO_TNL_L2]) { + if (!allow_encap || action_sz < DR_STE_ACTION_DOUBLE_SZ) { + dr_ste_v1_arr_init_next_match(&last_ste, added_stes, attr->gvmi); + action = MLX5_ADDR_OF(ste_mask_and_match_v1, last_ste, action); + action_sz = DR_STE_ACTION_TRIPLE_SZ; + allow_encap = true; + } + dr_ste_v1_set_encap(last_ste, action, + attr->reformat.id, + attr->reformat.size); + action_sz -= DR_STE_ACTION_DOUBLE_SZ; + action += DR_STE_ACTION_DOUBLE_SZ; + } else if (action_type_set[DR_ACTION_TYP_L2_TO_TNL_L3]) { + u8 *d_action; + + if (action_sz < DR_STE_ACTION_TRIPLE_SZ) { + dr_ste_v1_arr_init_next_match(&last_ste, added_stes, attr->gvmi); + action = MLX5_ADDR_OF(ste_mask_and_match_v1, last_ste, action); + action_sz = DR_STE_ACTION_TRIPLE_SZ; + } + d_action = action + DR_STE_ACTION_SINGLE_SZ; + + dr_ste_v1_set_encap_l3(last_ste, + action, d_action, + attr->reformat.id, + attr->reformat.size); + action_sz -= DR_STE_ACTION_TRIPLE_SZ; + action += DR_STE_ACTION_TRIPLE_SZ; + } else if (action_type_set[DR_ACTION_TYP_INSERT_HDR]) { + if (!allow_encap || action_sz < DR_STE_ACTION_DOUBLE_SZ) { + dr_ste_v1_arr_init_next_match(&last_ste, added_stes, attr->gvmi); + action = MLX5_ADDR_OF(ste_mask_and_match_v1, last_ste, action); + action_sz = DR_STE_ACTION_TRIPLE_SZ; + } + dr_ste_v1_set_insert_hdr(last_ste, action, + attr->reformat.id, + attr->reformat.param_0, + attr->reformat.param_1, + attr->reformat.size); + action_sz -= DR_STE_ACTION_DOUBLE_SZ; + action += DR_STE_ACTION_DOUBLE_SZ; + } else if (action_type_set[DR_ACTION_TYP_REMOVE_HDR]) { + if (action_sz < DR_STE_ACTION_SINGLE_SZ) { + dr_ste_v1_arr_init_next_match(&last_ste, added_stes, attr->gvmi); + action = MLX5_ADDR_OF(ste_mask_and_match_v1, last_ste, action); + action_sz = DR_STE_ACTION_TRIPLE_SZ; + } + dr_ste_v1_set_remove_hdr(last_ste, action, + attr->reformat.param_0, + attr->reformat.param_1, + attr->reformat.size); + action_sz -= DR_STE_ACTION_SINGLE_SZ; + action += DR_STE_ACTION_SINGLE_SZ; + } + + if (action_type_set[DR_ACTION_TYP_ASO_FLOW_METER]) { + if (action_sz < DR_STE_ACTION_DOUBLE_SZ) { + dr_ste_v1_arr_init_next_match(&last_ste, added_stes, attr->gvmi); + action = MLX5_ADDR_OF(ste_mask_and_match_v1, last_ste, action); + action_sz = DR_STE_ACTION_TRIPLE_SZ; + } + dr_ste_v1_set_aso_flow_meter(action, + attr->aso_flow_meter.obj_id, + attr->aso_flow_meter.offset, + attr->aso_flow_meter.dest_reg_id, + attr->aso_flow_meter.init_color); + action_sz -= DR_STE_ACTION_DOUBLE_SZ; + action += DR_STE_ACTION_DOUBLE_SZ; + } + + dr_ste_v1_set_hit_gvmi(last_ste, attr->hit_gvmi); + dr_ste_v1_set_hit_addr(last_ste, attr->final_icm_addr, 1); +} + +void dr_ste_v1_set_actions_rx(struct mlx5dr_domain *dmn, + u8 *action_type_set, + u32 actions_caps, + u8 *last_ste, + struct mlx5dr_ste_actions_attr *attr, + u32 *added_stes) +{ + u8 *action = MLX5_ADDR_OF(ste_match_bwc_v1, last_ste, action); + u8 action_sz = DR_STE_ACTION_DOUBLE_SZ; + bool allow_modify_hdr = true; + bool allow_ctr = true; + + if (action_type_set[DR_ACTION_TYP_TNL_L3_TO_L2]) { + dr_ste_v1_set_rewrite_actions(last_ste, action, + attr->decap_actions, + attr->decap_index); + action_sz -= DR_STE_ACTION_DOUBLE_SZ; + action += DR_STE_ACTION_DOUBLE_SZ; + allow_modify_hdr = false; + allow_ctr = false; + } else if (action_type_set[DR_ACTION_TYP_TNL_L2_TO_L2]) { + dr_ste_v1_set_rx_decap(last_ste, action); + action_sz -= DR_STE_ACTION_SINGLE_SZ; + action += DR_STE_ACTION_SINGLE_SZ; + allow_modify_hdr = false; + allow_ctr = false; + } + + if (action_type_set[DR_ACTION_TYP_TAG]) { + if (action_sz < DR_STE_ACTION_SINGLE_SZ) { + dr_ste_v1_arr_init_next_match(&last_ste, added_stes, attr->gvmi); + action = MLX5_ADDR_OF(ste_mask_and_match_v1, last_ste, action); + action_sz = DR_STE_ACTION_TRIPLE_SZ; + allow_modify_hdr = true; + allow_ctr = true; + } + dr_ste_v1_set_rx_flow_tag(action, attr->flow_tag); + action_sz -= DR_STE_ACTION_SINGLE_SZ; + action += DR_STE_ACTION_SINGLE_SZ; + } + + if (action_type_set[DR_ACTION_TYP_POP_VLAN]) { + if (action_sz < DR_STE_ACTION_SINGLE_SZ || + !allow_modify_hdr) { + dr_ste_v1_arr_init_next_match(&last_ste, added_stes, attr->gvmi); + action = MLX5_ADDR_OF(ste_mask_and_match_v1, last_ste, action); + action_sz = DR_STE_ACTION_TRIPLE_SZ; + } + + dr_ste_v1_set_pop_vlan(last_ste, action, attr->vlans.count); + action_sz -= DR_STE_ACTION_SINGLE_SZ; + action += DR_STE_ACTION_SINGLE_SZ; + allow_ctr = false; + + /* Check if vlan_pop and modify_hdr on same STE is supported */ + if (!(actions_caps & DR_STE_CTX_ACTION_CAP_POP_MDFY)) + allow_modify_hdr = false; + } + + if (action_type_set[DR_ACTION_TYP_MODIFY_HDR]) { + /* Modify header and decapsulation must use different STEs */ + if (!allow_modify_hdr || action_sz < DR_STE_ACTION_DOUBLE_SZ) { + dr_ste_v1_arr_init_next_match(&last_ste, added_stes, attr->gvmi); + action = MLX5_ADDR_OF(ste_mask_and_match_v1, last_ste, action); + action_sz = DR_STE_ACTION_TRIPLE_SZ; + allow_modify_hdr = true; + allow_ctr = true; + } + dr_ste_v1_set_rewrite_actions(last_ste, action, + attr->modify_actions, + attr->modify_index); + action_sz -= DR_STE_ACTION_DOUBLE_SZ; + action += DR_STE_ACTION_DOUBLE_SZ; + } + + if (action_type_set[DR_ACTION_TYP_PUSH_VLAN]) { + int i; + + for (i = 0; i < attr->vlans.count; i++) { + if (action_sz < DR_STE_ACTION_DOUBLE_SZ || + !allow_modify_hdr) { + dr_ste_v1_arr_init_next_match(&last_ste, + added_stes, + attr->gvmi); + action = MLX5_ADDR_OF(ste_mask_and_match_v1, + last_ste, action); + action_sz = DR_STE_ACTION_TRIPLE_SZ; + } + dr_ste_v1_set_push_vlan(last_ste, action, + attr->vlans.headers[i]); + action_sz -= DR_STE_ACTION_DOUBLE_SZ; + action += DR_STE_ACTION_DOUBLE_SZ; + } + } + + if (action_type_set[DR_ACTION_TYP_CTR]) { + /* Counter action set after decap and before insert_hdr + * to exclude decaped / encaped header respectively. + */ + if (!allow_ctr) { + dr_ste_v1_arr_init_next_match(&last_ste, added_stes, attr->gvmi); + action = MLX5_ADDR_OF(ste_mask_and_match_v1, last_ste, action); + action_sz = DR_STE_ACTION_TRIPLE_SZ; + allow_modify_hdr = true; + } + dr_ste_v1_set_counter_id(last_ste, attr->ctr_id); + allow_ctr = false; + } + + if (action_type_set[DR_ACTION_TYP_L2_TO_TNL_L2]) { + if (action_sz < DR_STE_ACTION_DOUBLE_SZ) { + dr_ste_v1_arr_init_next_match(&last_ste, added_stes, attr->gvmi); + action = MLX5_ADDR_OF(ste_mask_and_match_v1, last_ste, action); + action_sz = DR_STE_ACTION_TRIPLE_SZ; + } + dr_ste_v1_set_encap(last_ste, action, + attr->reformat.id, + attr->reformat.size); + action_sz -= DR_STE_ACTION_DOUBLE_SZ; + action += DR_STE_ACTION_DOUBLE_SZ; + allow_modify_hdr = false; + } else if (action_type_set[DR_ACTION_TYP_L2_TO_TNL_L3]) { + u8 *d_action; + + if (action_sz < DR_STE_ACTION_TRIPLE_SZ) { + dr_ste_v1_arr_init_next_match(&last_ste, added_stes, attr->gvmi); + action = MLX5_ADDR_OF(ste_mask_and_match_v1, last_ste, action); + action_sz = DR_STE_ACTION_TRIPLE_SZ; + } + + d_action = action + DR_STE_ACTION_SINGLE_SZ; + + dr_ste_v1_set_encap_l3(last_ste, + action, d_action, + attr->reformat.id, + attr->reformat.size); + action_sz -= DR_STE_ACTION_TRIPLE_SZ; + allow_modify_hdr = false; + } else if (action_type_set[DR_ACTION_TYP_INSERT_HDR]) { + /* Modify header, decap, and encap must use different STEs */ + if (!allow_modify_hdr || action_sz < DR_STE_ACTION_DOUBLE_SZ) { + dr_ste_v1_arr_init_next_match(&last_ste, added_stes, attr->gvmi); + action = MLX5_ADDR_OF(ste_mask_and_match_v1, last_ste, action); + action_sz = DR_STE_ACTION_TRIPLE_SZ; + } + dr_ste_v1_set_insert_hdr(last_ste, action, + attr->reformat.id, + attr->reformat.param_0, + attr->reformat.param_1, + attr->reformat.size); + action_sz -= DR_STE_ACTION_DOUBLE_SZ; + action += DR_STE_ACTION_DOUBLE_SZ; + allow_modify_hdr = false; + } else if (action_type_set[DR_ACTION_TYP_REMOVE_HDR]) { + if (action_sz < DR_STE_ACTION_SINGLE_SZ) { + dr_ste_v1_arr_init_next_match(&last_ste, added_stes, attr->gvmi); + action = MLX5_ADDR_OF(ste_mask_and_match_v1, last_ste, action); + action_sz = DR_STE_ACTION_TRIPLE_SZ; + allow_modify_hdr = true; + allow_ctr = true; + } + dr_ste_v1_set_remove_hdr(last_ste, action, + attr->reformat.param_0, + attr->reformat.param_1, + attr->reformat.size); + action_sz -= DR_STE_ACTION_SINGLE_SZ; + action += DR_STE_ACTION_SINGLE_SZ; + } + + if (action_type_set[DR_ACTION_TYP_ASO_FLOW_METER]) { + if (action_sz < DR_STE_ACTION_DOUBLE_SZ) { + dr_ste_v1_arr_init_next_match(&last_ste, added_stes, attr->gvmi); + action = MLX5_ADDR_OF(ste_mask_and_match_v1, last_ste, action); + action_sz = DR_STE_ACTION_TRIPLE_SZ; + } + dr_ste_v1_set_aso_flow_meter(action, + attr->aso_flow_meter.obj_id, + attr->aso_flow_meter.offset, + attr->aso_flow_meter.dest_reg_id, + attr->aso_flow_meter.init_color); + action_sz -= DR_STE_ACTION_DOUBLE_SZ; + action += DR_STE_ACTION_DOUBLE_SZ; + } + + dr_ste_v1_set_hit_gvmi(last_ste, attr->hit_gvmi); + dr_ste_v1_set_hit_addr(last_ste, attr->final_icm_addr, 1); +} + +void dr_ste_v1_set_action_set(u8 *d_action, + u8 hw_field, + u8 shifter, + u8 length, + u32 data) +{ + shifter += MLX5_MODIFY_HEADER_V1_QW_OFFSET; + MLX5_SET(ste_double_action_set_v1, d_action, action_id, DR_STE_V1_ACTION_ID_SET); + MLX5_SET(ste_double_action_set_v1, d_action, destination_dw_offset, hw_field); + MLX5_SET(ste_double_action_set_v1, d_action, destination_left_shifter, shifter); + MLX5_SET(ste_double_action_set_v1, d_action, destination_length, length); + MLX5_SET(ste_double_action_set_v1, d_action, inline_data, data); +} + +void dr_ste_v1_set_action_add(u8 *d_action, + u8 hw_field, + u8 shifter, + u8 length, + u32 data) +{ + shifter += MLX5_MODIFY_HEADER_V1_QW_OFFSET; + MLX5_SET(ste_double_action_add_v1, d_action, action_id, DR_STE_V1_ACTION_ID_ADD); + MLX5_SET(ste_double_action_add_v1, d_action, destination_dw_offset, hw_field); + MLX5_SET(ste_double_action_add_v1, d_action, destination_left_shifter, shifter); + MLX5_SET(ste_double_action_add_v1, d_action, destination_length, length); + MLX5_SET(ste_double_action_add_v1, d_action, add_value, data); +} + +void dr_ste_v1_set_action_copy(u8 *d_action, + u8 dst_hw_field, + u8 dst_shifter, + u8 dst_len, + u8 src_hw_field, + u8 src_shifter) +{ + dst_shifter += MLX5_MODIFY_HEADER_V1_QW_OFFSET; + src_shifter += MLX5_MODIFY_HEADER_V1_QW_OFFSET; + MLX5_SET(ste_double_action_copy_v1, d_action, action_id, DR_STE_V1_ACTION_ID_COPY); + MLX5_SET(ste_double_action_copy_v1, d_action, destination_dw_offset, dst_hw_field); + MLX5_SET(ste_double_action_copy_v1, d_action, destination_left_shifter, dst_shifter); + MLX5_SET(ste_double_action_copy_v1, d_action, destination_length, dst_len); + MLX5_SET(ste_double_action_copy_v1, d_action, source_dw_offset, src_hw_field); + MLX5_SET(ste_double_action_copy_v1, d_action, source_right_shifter, src_shifter); +} + +#define DR_STE_DECAP_L3_ACTION_NUM 8 +#define DR_STE_L2_HDR_MAX_SZ 20 + +int dr_ste_v1_set_action_decap_l3_list(void *data, + u32 data_sz, + u8 *hw_action, + u32 hw_action_sz, + u16 *used_hw_action_num) +{ + u8 padded_data[DR_STE_L2_HDR_MAX_SZ] = {}; + void *data_ptr = padded_data; + u16 used_actions = 0; + u32 inline_data_sz; + u32 i; + + if (hw_action_sz / DR_STE_ACTION_DOUBLE_SZ < DR_STE_DECAP_L3_ACTION_NUM) + return -EINVAL; + + inline_data_sz = + MLX5_FLD_SZ_BYTES(ste_double_action_insert_with_inline_v1, inline_data); + + /* Add an alignment padding */ + memcpy(padded_data + data_sz % inline_data_sz, data, data_sz); + + /* Remove L2L3 outer headers */ + MLX5_SET(ste_single_action_remove_header_v1, hw_action, action_id, + DR_STE_V1_ACTION_ID_REMOVE_HEADER_TO_HEADER); + MLX5_SET(ste_single_action_remove_header_v1, hw_action, decap, 1); + MLX5_SET(ste_single_action_remove_header_v1, hw_action, vni_to_cqe, 1); + MLX5_SET(ste_single_action_remove_header_v1, hw_action, end_anchor, + DR_STE_HEADER_ANCHOR_INNER_IPV6_IPV4); + hw_action += DR_STE_ACTION_DOUBLE_SZ; + used_actions++; /* Remove and NOP are a single double action */ + + /* Point to the last dword of the header */ + data_ptr += (data_sz / inline_data_sz) * inline_data_sz; + + /* Add the new header using inline action 4Byte at a time, the header + * is added in reversed order to the beginning of the packet to avoid + * incorrect parsing by the HW. Since header is 14B or 18B an extra + * two bytes are padded and later removed. + */ + for (i = 0; i < data_sz / inline_data_sz + 1; i++) { + void *addr_inline; + + MLX5_SET(ste_double_action_insert_with_inline_v1, hw_action, action_id, + DR_STE_V1_ACTION_ID_INSERT_INLINE); + /* The hardware expects here offset to words (2 bytes) */ + MLX5_SET(ste_double_action_insert_with_inline_v1, hw_action, start_offset, 0); + + /* Copy bytes one by one to avoid endianness problem */ + addr_inline = MLX5_ADDR_OF(ste_double_action_insert_with_inline_v1, + hw_action, inline_data); + memcpy(addr_inline, data_ptr - i * inline_data_sz, inline_data_sz); + hw_action += DR_STE_ACTION_DOUBLE_SZ; + used_actions++; + } + + /* Remove first 2 extra bytes */ + MLX5_SET(ste_single_action_remove_header_size_v1, hw_action, action_id, + DR_STE_V1_ACTION_ID_REMOVE_BY_SIZE); + MLX5_SET(ste_single_action_remove_header_size_v1, hw_action, start_offset, 0); + /* The hardware expects here size in words (2 bytes) */ + MLX5_SET(ste_single_action_remove_header_size_v1, hw_action, remove_size, 1); + used_actions++; + + *used_hw_action_num = used_actions; + + return 0; +} + +static void dr_ste_v1_build_eth_l2_src_dst_bit_mask(struct mlx5dr_match_param *value, + bool inner, u8 *bit_mask) +{ + struct mlx5dr_match_spec *mask = inner ? &value->inner : &value->outer; + + DR_STE_SET_TAG(eth_l2_src_dst_v1, bit_mask, dmac_47_16, mask, dmac_47_16); + DR_STE_SET_TAG(eth_l2_src_dst_v1, bit_mask, dmac_15_0, mask, dmac_15_0); + + DR_STE_SET_TAG(eth_l2_src_dst_v1, bit_mask, smac_47_16, mask, smac_47_16); + DR_STE_SET_TAG(eth_l2_src_dst_v1, bit_mask, smac_15_0, mask, smac_15_0); + + DR_STE_SET_TAG(eth_l2_src_dst_v1, bit_mask, first_vlan_id, mask, first_vid); + DR_STE_SET_TAG(eth_l2_src_dst_v1, bit_mask, first_cfi, mask, first_cfi); + DR_STE_SET_TAG(eth_l2_src_dst_v1, bit_mask, first_priority, mask, first_prio); + DR_STE_SET_ONES(eth_l2_src_dst_v1, bit_mask, l3_type, mask, ip_version); + + if (mask->cvlan_tag) { + MLX5_SET(ste_eth_l2_src_dst_v1, bit_mask, first_vlan_qualifier, -1); + mask->cvlan_tag = 0; + } else if (mask->svlan_tag) { + MLX5_SET(ste_eth_l2_src_dst_v1, bit_mask, first_vlan_qualifier, -1); + mask->svlan_tag = 0; + } +} + +static int dr_ste_v1_build_eth_l2_src_dst_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *tag) +{ + struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer; + + DR_STE_SET_TAG(eth_l2_src_dst_v1, tag, dmac_47_16, spec, dmac_47_16); + DR_STE_SET_TAG(eth_l2_src_dst_v1, tag, dmac_15_0, spec, dmac_15_0); + + DR_STE_SET_TAG(eth_l2_src_dst_v1, tag, smac_47_16, spec, smac_47_16); + DR_STE_SET_TAG(eth_l2_src_dst_v1, tag, smac_15_0, spec, smac_15_0); + + if (spec->ip_version == IP_VERSION_IPV4) { + MLX5_SET(ste_eth_l2_src_dst_v1, tag, l3_type, STE_IPV4); + spec->ip_version = 0; + } else if (spec->ip_version == IP_VERSION_IPV6) { + MLX5_SET(ste_eth_l2_src_dst_v1, tag, l3_type, STE_IPV6); + spec->ip_version = 0; + } else if (spec->ip_version) { + return -EINVAL; + } + + DR_STE_SET_TAG(eth_l2_src_dst_v1, tag, first_vlan_id, spec, first_vid); + DR_STE_SET_TAG(eth_l2_src_dst_v1, tag, first_cfi, spec, first_cfi); + DR_STE_SET_TAG(eth_l2_src_dst_v1, tag, first_priority, spec, first_prio); + + if (spec->cvlan_tag) { + MLX5_SET(ste_eth_l2_src_dst_v1, tag, first_vlan_qualifier, DR_STE_CVLAN); + spec->cvlan_tag = 0; + } else if (spec->svlan_tag) { + MLX5_SET(ste_eth_l2_src_dst_v1, tag, first_vlan_qualifier, DR_STE_SVLAN); + spec->svlan_tag = 0; + } + return 0; +} + +void dr_ste_v1_build_eth_l2_src_dst_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + dr_ste_v1_build_eth_l2_src_dst_bit_mask(mask, sb->inner, sb->bit_mask); + + sb->lu_type = DR_STE_CALC_DFNR_TYPE(ETHL2_SRC_DST, sb->inner); + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v1_build_eth_l2_src_dst_tag; +} + +static int dr_ste_v1_build_eth_l3_ipv6_dst_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *tag) +{ + struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer; + + DR_STE_SET_TAG(eth_l3_ipv6_dst, tag, dst_ip_127_96, spec, dst_ip_127_96); + DR_STE_SET_TAG(eth_l3_ipv6_dst, tag, dst_ip_95_64, spec, dst_ip_95_64); + DR_STE_SET_TAG(eth_l3_ipv6_dst, tag, dst_ip_63_32, spec, dst_ip_63_32); + DR_STE_SET_TAG(eth_l3_ipv6_dst, tag, dst_ip_31_0, spec, dst_ip_31_0); + + return 0; +} + +void dr_ste_v1_build_eth_l3_ipv6_dst_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + dr_ste_v1_build_eth_l3_ipv6_dst_tag(mask, sb, sb->bit_mask); + + sb->lu_type = DR_STE_CALC_DFNR_TYPE(IPV6_DES, sb->inner); + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v1_build_eth_l3_ipv6_dst_tag; +} + +static int dr_ste_v1_build_eth_l3_ipv6_src_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *tag) +{ + struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer; + + DR_STE_SET_TAG(eth_l3_ipv6_src, tag, src_ip_127_96, spec, src_ip_127_96); + DR_STE_SET_TAG(eth_l3_ipv6_src, tag, src_ip_95_64, spec, src_ip_95_64); + DR_STE_SET_TAG(eth_l3_ipv6_src, tag, src_ip_63_32, spec, src_ip_63_32); + DR_STE_SET_TAG(eth_l3_ipv6_src, tag, src_ip_31_0, spec, src_ip_31_0); + + return 0; +} + +void dr_ste_v1_build_eth_l3_ipv6_src_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + dr_ste_v1_build_eth_l3_ipv6_src_tag(mask, sb, sb->bit_mask); + + sb->lu_type = DR_STE_CALC_DFNR_TYPE(IPV6_SRC, sb->inner); + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v1_build_eth_l3_ipv6_src_tag; +} + +static int dr_ste_v1_build_eth_l3_ipv4_5_tuple_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *tag) +{ + struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer; + + DR_STE_SET_TAG(eth_l3_ipv4_5_tuple_v1, tag, destination_address, spec, dst_ip_31_0); + DR_STE_SET_TAG(eth_l3_ipv4_5_tuple_v1, tag, source_address, spec, src_ip_31_0); + DR_STE_SET_TAG(eth_l3_ipv4_5_tuple_v1, tag, destination_port, spec, tcp_dport); + DR_STE_SET_TAG(eth_l3_ipv4_5_tuple_v1, tag, destination_port, spec, udp_dport); + DR_STE_SET_TAG(eth_l3_ipv4_5_tuple_v1, tag, source_port, spec, tcp_sport); + DR_STE_SET_TAG(eth_l3_ipv4_5_tuple_v1, tag, source_port, spec, udp_sport); + DR_STE_SET_TAG(eth_l3_ipv4_5_tuple_v1, tag, protocol, spec, ip_protocol); + DR_STE_SET_TAG(eth_l3_ipv4_5_tuple_v1, tag, fragmented, spec, frag); + DR_STE_SET_TAG(eth_l3_ipv4_5_tuple_v1, tag, dscp, spec, ip_dscp); + DR_STE_SET_TAG(eth_l3_ipv4_5_tuple_v1, tag, ecn, spec, ip_ecn); + + if (spec->tcp_flags) { + DR_STE_SET_TCP_FLAGS(eth_l3_ipv4_5_tuple_v1, tag, spec); + spec->tcp_flags = 0; + } + + return 0; +} + +void dr_ste_v1_build_eth_l3_ipv4_5_tuple_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + dr_ste_v1_build_eth_l3_ipv4_5_tuple_tag(mask, sb, sb->bit_mask); + + sb->lu_type = DR_STE_CALC_DFNR_TYPE(ETHL3_IPV4_5_TUPLE, sb->inner); + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v1_build_eth_l3_ipv4_5_tuple_tag; +} + +static void dr_ste_v1_build_eth_l2_src_or_dst_bit_mask(struct mlx5dr_match_param *value, + bool inner, u8 *bit_mask) +{ + struct mlx5dr_match_spec *mask = inner ? &value->inner : &value->outer; + struct mlx5dr_match_misc *misc_mask = &value->misc; + + DR_STE_SET_TAG(eth_l2_src_v1, bit_mask, first_vlan_id, mask, first_vid); + DR_STE_SET_TAG(eth_l2_src_v1, bit_mask, first_cfi, mask, first_cfi); + DR_STE_SET_TAG(eth_l2_src_v1, bit_mask, first_priority, mask, first_prio); + DR_STE_SET_TAG(eth_l2_src_v1, bit_mask, ip_fragmented, mask, frag); + DR_STE_SET_TAG(eth_l2_src_v1, bit_mask, l3_ethertype, mask, ethertype); + DR_STE_SET_ONES(eth_l2_src_v1, bit_mask, l3_type, mask, ip_version); + + if (mask->svlan_tag || mask->cvlan_tag) { + MLX5_SET(ste_eth_l2_src_v1, bit_mask, first_vlan_qualifier, -1); + mask->cvlan_tag = 0; + mask->svlan_tag = 0; + } + + if (inner) { + if (misc_mask->inner_second_cvlan_tag || + misc_mask->inner_second_svlan_tag) { + MLX5_SET(ste_eth_l2_src_v1, bit_mask, second_vlan_qualifier, -1); + misc_mask->inner_second_cvlan_tag = 0; + misc_mask->inner_second_svlan_tag = 0; + } + + DR_STE_SET_TAG(eth_l2_src_v1, bit_mask, + second_vlan_id, misc_mask, inner_second_vid); + DR_STE_SET_TAG(eth_l2_src_v1, bit_mask, + second_cfi, misc_mask, inner_second_cfi); + DR_STE_SET_TAG(eth_l2_src_v1, bit_mask, + second_priority, misc_mask, inner_second_prio); + } else { + if (misc_mask->outer_second_cvlan_tag || + misc_mask->outer_second_svlan_tag) { + MLX5_SET(ste_eth_l2_src_v1, bit_mask, second_vlan_qualifier, -1); + misc_mask->outer_second_cvlan_tag = 0; + misc_mask->outer_second_svlan_tag = 0; + } + + DR_STE_SET_TAG(eth_l2_src_v1, bit_mask, + second_vlan_id, misc_mask, outer_second_vid); + DR_STE_SET_TAG(eth_l2_src_v1, bit_mask, + second_cfi, misc_mask, outer_second_cfi); + DR_STE_SET_TAG(eth_l2_src_v1, bit_mask, + second_priority, misc_mask, outer_second_prio); + } +} + +static int dr_ste_v1_build_eth_l2_src_or_dst_tag(struct mlx5dr_match_param *value, + bool inner, u8 *tag) +{ + struct mlx5dr_match_spec *spec = inner ? &value->inner : &value->outer; + struct mlx5dr_match_misc *misc_spec = &value->misc; + + DR_STE_SET_TAG(eth_l2_src_v1, tag, first_vlan_id, spec, first_vid); + DR_STE_SET_TAG(eth_l2_src_v1, tag, first_cfi, spec, first_cfi); + DR_STE_SET_TAG(eth_l2_src_v1, tag, first_priority, spec, first_prio); + DR_STE_SET_TAG(eth_l2_src_v1, tag, ip_fragmented, spec, frag); + DR_STE_SET_TAG(eth_l2_src_v1, tag, l3_ethertype, spec, ethertype); + + if (spec->ip_version == IP_VERSION_IPV4) { + MLX5_SET(ste_eth_l2_src_v1, tag, l3_type, STE_IPV4); + spec->ip_version = 0; + } else if (spec->ip_version == IP_VERSION_IPV6) { + MLX5_SET(ste_eth_l2_src_v1, tag, l3_type, STE_IPV6); + spec->ip_version = 0; + } else if (spec->ip_version) { + return -EINVAL; + } + + if (spec->cvlan_tag) { + MLX5_SET(ste_eth_l2_src_v1, tag, first_vlan_qualifier, DR_STE_CVLAN); + spec->cvlan_tag = 0; + } else if (spec->svlan_tag) { + MLX5_SET(ste_eth_l2_src_v1, tag, first_vlan_qualifier, DR_STE_SVLAN); + spec->svlan_tag = 0; + } + + if (inner) { + if (misc_spec->inner_second_cvlan_tag) { + MLX5_SET(ste_eth_l2_src_v1, tag, second_vlan_qualifier, DR_STE_CVLAN); + misc_spec->inner_second_cvlan_tag = 0; + } else if (misc_spec->inner_second_svlan_tag) { + MLX5_SET(ste_eth_l2_src_v1, tag, second_vlan_qualifier, DR_STE_SVLAN); + misc_spec->inner_second_svlan_tag = 0; + } + + DR_STE_SET_TAG(eth_l2_src_v1, tag, second_vlan_id, misc_spec, inner_second_vid); + DR_STE_SET_TAG(eth_l2_src_v1, tag, second_cfi, misc_spec, inner_second_cfi); + DR_STE_SET_TAG(eth_l2_src_v1, tag, second_priority, misc_spec, inner_second_prio); + } else { + if (misc_spec->outer_second_cvlan_tag) { + MLX5_SET(ste_eth_l2_src_v1, tag, second_vlan_qualifier, DR_STE_CVLAN); + misc_spec->outer_second_cvlan_tag = 0; + } else if (misc_spec->outer_second_svlan_tag) { + MLX5_SET(ste_eth_l2_src_v1, tag, second_vlan_qualifier, DR_STE_SVLAN); + misc_spec->outer_second_svlan_tag = 0; + } + DR_STE_SET_TAG(eth_l2_src_v1, tag, second_vlan_id, misc_spec, outer_second_vid); + DR_STE_SET_TAG(eth_l2_src_v1, tag, second_cfi, misc_spec, outer_second_cfi); + DR_STE_SET_TAG(eth_l2_src_v1, tag, second_priority, misc_spec, outer_second_prio); + } + + return 0; +} + +static void dr_ste_v1_build_eth_l2_src_bit_mask(struct mlx5dr_match_param *value, + bool inner, u8 *bit_mask) +{ + struct mlx5dr_match_spec *mask = inner ? &value->inner : &value->outer; + + DR_STE_SET_TAG(eth_l2_src_v1, bit_mask, smac_47_16, mask, smac_47_16); + DR_STE_SET_TAG(eth_l2_src_v1, bit_mask, smac_15_0, mask, smac_15_0); + + dr_ste_v1_build_eth_l2_src_or_dst_bit_mask(value, inner, bit_mask); +} + +static int dr_ste_v1_build_eth_l2_src_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *tag) +{ + struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer; + + DR_STE_SET_TAG(eth_l2_src_v1, tag, smac_47_16, spec, smac_47_16); + DR_STE_SET_TAG(eth_l2_src_v1, tag, smac_15_0, spec, smac_15_0); + + return dr_ste_v1_build_eth_l2_src_or_dst_tag(value, sb->inner, tag); +} + +void dr_ste_v1_build_eth_l2_src_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + dr_ste_v1_build_eth_l2_src_bit_mask(mask, sb->inner, sb->bit_mask); + + sb->lu_type = DR_STE_CALC_DFNR_TYPE(ETHL2_SRC, sb->inner); + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v1_build_eth_l2_src_tag; +} + +static void dr_ste_v1_build_eth_l2_dst_bit_mask(struct mlx5dr_match_param *value, + bool inner, u8 *bit_mask) +{ + struct mlx5dr_match_spec *mask = inner ? &value->inner : &value->outer; + + DR_STE_SET_TAG(eth_l2_dst_v1, bit_mask, dmac_47_16, mask, dmac_47_16); + DR_STE_SET_TAG(eth_l2_dst_v1, bit_mask, dmac_15_0, mask, dmac_15_0); + + dr_ste_v1_build_eth_l2_src_or_dst_bit_mask(value, inner, bit_mask); +} + +static int dr_ste_v1_build_eth_l2_dst_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *tag) +{ + struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer; + + DR_STE_SET_TAG(eth_l2_dst_v1, tag, dmac_47_16, spec, dmac_47_16); + DR_STE_SET_TAG(eth_l2_dst_v1, tag, dmac_15_0, spec, dmac_15_0); + + return dr_ste_v1_build_eth_l2_src_or_dst_tag(value, sb->inner, tag); +} + +void dr_ste_v1_build_eth_l2_dst_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + dr_ste_v1_build_eth_l2_dst_bit_mask(mask, sb->inner, sb->bit_mask); + + sb->lu_type = DR_STE_CALC_DFNR_TYPE(ETHL2, sb->inner); + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v1_build_eth_l2_dst_tag; +} + +static void dr_ste_v1_build_eth_l2_tnl_bit_mask(struct mlx5dr_match_param *value, + bool inner, u8 *bit_mask) +{ + struct mlx5dr_match_spec *mask = inner ? &value->inner : &value->outer; + struct mlx5dr_match_misc *misc = &value->misc; + + DR_STE_SET_TAG(eth_l2_tnl_v1, bit_mask, dmac_47_16, mask, dmac_47_16); + DR_STE_SET_TAG(eth_l2_tnl_v1, bit_mask, dmac_15_0, mask, dmac_15_0); + DR_STE_SET_TAG(eth_l2_tnl_v1, bit_mask, first_vlan_id, mask, first_vid); + DR_STE_SET_TAG(eth_l2_tnl_v1, bit_mask, first_cfi, mask, first_cfi); + DR_STE_SET_TAG(eth_l2_tnl_v1, bit_mask, first_priority, mask, first_prio); + DR_STE_SET_TAG(eth_l2_tnl_v1, bit_mask, ip_fragmented, mask, frag); + DR_STE_SET_TAG(eth_l2_tnl_v1, bit_mask, l3_ethertype, mask, ethertype); + DR_STE_SET_ONES(eth_l2_tnl_v1, bit_mask, l3_type, mask, ip_version); + + if (misc->vxlan_vni) { + MLX5_SET(ste_eth_l2_tnl_v1, bit_mask, + l2_tunneling_network_id, (misc->vxlan_vni << 8)); + misc->vxlan_vni = 0; + } + + if (mask->svlan_tag || mask->cvlan_tag) { + MLX5_SET(ste_eth_l2_tnl_v1, bit_mask, first_vlan_qualifier, -1); + mask->cvlan_tag = 0; + mask->svlan_tag = 0; + } +} + +static int dr_ste_v1_build_eth_l2_tnl_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *tag) +{ + struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer; + struct mlx5dr_match_misc *misc = &value->misc; + + DR_STE_SET_TAG(eth_l2_tnl_v1, tag, dmac_47_16, spec, dmac_47_16); + DR_STE_SET_TAG(eth_l2_tnl_v1, tag, dmac_15_0, spec, dmac_15_0); + DR_STE_SET_TAG(eth_l2_tnl_v1, tag, first_vlan_id, spec, first_vid); + DR_STE_SET_TAG(eth_l2_tnl_v1, tag, first_cfi, spec, first_cfi); + DR_STE_SET_TAG(eth_l2_tnl_v1, tag, ip_fragmented, spec, frag); + DR_STE_SET_TAG(eth_l2_tnl_v1, tag, first_priority, spec, first_prio); + DR_STE_SET_TAG(eth_l2_tnl_v1, tag, l3_ethertype, spec, ethertype); + + if (misc->vxlan_vni) { + MLX5_SET(ste_eth_l2_tnl_v1, tag, l2_tunneling_network_id, + (misc->vxlan_vni << 8)); + misc->vxlan_vni = 0; + } + + if (spec->cvlan_tag) { + MLX5_SET(ste_eth_l2_tnl_v1, tag, first_vlan_qualifier, DR_STE_CVLAN); + spec->cvlan_tag = 0; + } else if (spec->svlan_tag) { + MLX5_SET(ste_eth_l2_tnl_v1, tag, first_vlan_qualifier, DR_STE_SVLAN); + spec->svlan_tag = 0; + } + + if (spec->ip_version == IP_VERSION_IPV4) { + MLX5_SET(ste_eth_l2_tnl_v1, tag, l3_type, STE_IPV4); + spec->ip_version = 0; + } else if (spec->ip_version == IP_VERSION_IPV6) { + MLX5_SET(ste_eth_l2_tnl_v1, tag, l3_type, STE_IPV6); + spec->ip_version = 0; + } else if (spec->ip_version) { + return -EINVAL; + } + + return 0; +} + +void dr_ste_v1_build_eth_l2_tnl_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + dr_ste_v1_build_eth_l2_tnl_bit_mask(mask, sb->inner, sb->bit_mask); + + sb->lu_type = DR_STE_V1_LU_TYPE_ETHL2_TNL; + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v1_build_eth_l2_tnl_tag; +} + +static int dr_ste_v1_build_eth_l3_ipv4_misc_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *tag) +{ + struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer; + + DR_STE_SET_TAG(eth_l3_ipv4_misc_v1, tag, time_to_live, spec, ttl_hoplimit); + DR_STE_SET_TAG(eth_l3_ipv4_misc_v1, tag, ihl, spec, ipv4_ihl); + + return 0; +} + +void dr_ste_v1_build_eth_l3_ipv4_misc_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + dr_ste_v1_build_eth_l3_ipv4_misc_tag(mask, sb, sb->bit_mask); + + sb->lu_type = DR_STE_CALC_DFNR_TYPE(ETHL3_IPV4_MISC, sb->inner); + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v1_build_eth_l3_ipv4_misc_tag; +} + +static int dr_ste_v1_build_eth_ipv6_l3_l4_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *tag) +{ + struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer; + struct mlx5dr_match_misc *misc = &value->misc; + + DR_STE_SET_TAG(eth_l4_v1, tag, dst_port, spec, tcp_dport); + DR_STE_SET_TAG(eth_l4_v1, tag, src_port, spec, tcp_sport); + DR_STE_SET_TAG(eth_l4_v1, tag, dst_port, spec, udp_dport); + DR_STE_SET_TAG(eth_l4_v1, tag, src_port, spec, udp_sport); + DR_STE_SET_TAG(eth_l4_v1, tag, protocol, spec, ip_protocol); + DR_STE_SET_TAG(eth_l4_v1, tag, fragmented, spec, frag); + DR_STE_SET_TAG(eth_l4_v1, tag, dscp, spec, ip_dscp); + DR_STE_SET_TAG(eth_l4_v1, tag, ecn, spec, ip_ecn); + DR_STE_SET_TAG(eth_l4_v1, tag, ipv6_hop_limit, spec, ttl_hoplimit); + + if (sb->inner) + DR_STE_SET_TAG(eth_l4_v1, tag, flow_label, misc, inner_ipv6_flow_label); + else + DR_STE_SET_TAG(eth_l4_v1, tag, flow_label, misc, outer_ipv6_flow_label); + + if (spec->tcp_flags) { + DR_STE_SET_TCP_FLAGS(eth_l4_v1, tag, spec); + spec->tcp_flags = 0; + } + + return 0; +} + +void dr_ste_v1_build_eth_ipv6_l3_l4_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + dr_ste_v1_build_eth_ipv6_l3_l4_tag(mask, sb, sb->bit_mask); + + sb->lu_type = DR_STE_CALC_DFNR_TYPE(ETHL4, sb->inner); + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v1_build_eth_ipv6_l3_l4_tag; +} + +static int dr_ste_v1_build_mpls_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *tag) +{ + struct mlx5dr_match_misc2 *misc2 = &value->misc2; + + if (sb->inner) + DR_STE_SET_MPLS(mpls_v1, misc2, inner, tag); + else + DR_STE_SET_MPLS(mpls_v1, misc2, outer, tag); + + return 0; +} + +void dr_ste_v1_build_mpls_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + dr_ste_v1_build_mpls_tag(mask, sb, sb->bit_mask); + + sb->lu_type = DR_STE_CALC_DFNR_TYPE(MPLS, sb->inner); + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v1_build_mpls_tag; +} + +static int dr_ste_v1_build_tnl_gre_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *tag) +{ + struct mlx5dr_match_misc *misc = &value->misc; + + DR_STE_SET_TAG(gre_v1, tag, gre_protocol, misc, gre_protocol); + DR_STE_SET_TAG(gre_v1, tag, gre_k_present, misc, gre_k_present); + DR_STE_SET_TAG(gre_v1, tag, gre_key_h, misc, gre_key_h); + DR_STE_SET_TAG(gre_v1, tag, gre_key_l, misc, gre_key_l); + + DR_STE_SET_TAG(gre_v1, tag, gre_c_present, misc, gre_c_present); + DR_STE_SET_TAG(gre_v1, tag, gre_s_present, misc, gre_s_present); + + return 0; +} + +void dr_ste_v1_build_tnl_gre_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + dr_ste_v1_build_tnl_gre_tag(mask, sb, sb->bit_mask); + + sb->lu_type = DR_STE_V1_LU_TYPE_GRE; + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v1_build_tnl_gre_tag; +} + +static int dr_ste_v1_build_tnl_mpls_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *tag) +{ + struct mlx5dr_match_misc2 *misc2 = &value->misc2; + + if (DR_STE_IS_OUTER_MPLS_OVER_GRE_SET(misc2)) { + DR_STE_SET_TAG(mpls_v1, tag, mpls0_label, + misc2, outer_first_mpls_over_gre_label); + + DR_STE_SET_TAG(mpls_v1, tag, mpls0_exp, + misc2, outer_first_mpls_over_gre_exp); + + DR_STE_SET_TAG(mpls_v1, tag, mpls0_s_bos, + misc2, outer_first_mpls_over_gre_s_bos); + + DR_STE_SET_TAG(mpls_v1, tag, mpls0_ttl, + misc2, outer_first_mpls_over_gre_ttl); + } else { + DR_STE_SET_TAG(mpls_v1, tag, mpls0_label, + misc2, outer_first_mpls_over_udp_label); + + DR_STE_SET_TAG(mpls_v1, tag, mpls0_exp, + misc2, outer_first_mpls_over_udp_exp); + + DR_STE_SET_TAG(mpls_v1, tag, mpls0_s_bos, + misc2, outer_first_mpls_over_udp_s_bos); + + DR_STE_SET_TAG(mpls_v1, tag, mpls0_ttl, + misc2, outer_first_mpls_over_udp_ttl); + } + + return 0; +} + +void dr_ste_v1_build_tnl_mpls_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + dr_ste_v1_build_tnl_mpls_tag(mask, sb, sb->bit_mask); + + sb->lu_type = DR_STE_V1_LU_TYPE_MPLS_I; + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v1_build_tnl_mpls_tag; +} + +static int dr_ste_v1_build_tnl_mpls_over_udp_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *tag) +{ + struct mlx5dr_match_misc2 *misc2 = &value->misc2; + u8 *parser_ptr; + u8 parser_id; + u32 mpls_hdr; + + mpls_hdr = misc2->outer_first_mpls_over_udp_label << HDR_MPLS_OFFSET_LABEL; + misc2->outer_first_mpls_over_udp_label = 0; + mpls_hdr |= misc2->outer_first_mpls_over_udp_exp << HDR_MPLS_OFFSET_EXP; + misc2->outer_first_mpls_over_udp_exp = 0; + mpls_hdr |= misc2->outer_first_mpls_over_udp_s_bos << HDR_MPLS_OFFSET_S_BOS; + misc2->outer_first_mpls_over_udp_s_bos = 0; + mpls_hdr |= misc2->outer_first_mpls_over_udp_ttl << HDR_MPLS_OFFSET_TTL; + misc2->outer_first_mpls_over_udp_ttl = 0; + + parser_id = sb->caps->flex_parser_id_mpls_over_udp; + parser_ptr = dr_ste_calc_flex_parser_offset(tag, parser_id); + *(__be32 *)parser_ptr = cpu_to_be32(mpls_hdr); + + return 0; +} + +void dr_ste_v1_build_tnl_mpls_over_udp_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + dr_ste_v1_build_tnl_mpls_over_udp_tag(mask, sb, sb->bit_mask); + + /* STEs with lookup type FLEX_PARSER_{0/1} includes + * flex parsers_{0-3}/{4-7} respectively. + */ + sb->lu_type = sb->caps->flex_parser_id_mpls_over_udp > DR_STE_MAX_FLEX_0_ID ? + DR_STE_V1_LU_TYPE_FLEX_PARSER_1 : + DR_STE_V1_LU_TYPE_FLEX_PARSER_0; + + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v1_build_tnl_mpls_over_udp_tag; +} + +static int dr_ste_v1_build_tnl_mpls_over_gre_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *tag) +{ + struct mlx5dr_match_misc2 *misc2 = &value->misc2; + u8 *parser_ptr; + u8 parser_id; + u32 mpls_hdr; + + mpls_hdr = misc2->outer_first_mpls_over_gre_label << HDR_MPLS_OFFSET_LABEL; + misc2->outer_first_mpls_over_gre_label = 0; + mpls_hdr |= misc2->outer_first_mpls_over_gre_exp << HDR_MPLS_OFFSET_EXP; + misc2->outer_first_mpls_over_gre_exp = 0; + mpls_hdr |= misc2->outer_first_mpls_over_gre_s_bos << HDR_MPLS_OFFSET_S_BOS; + misc2->outer_first_mpls_over_gre_s_bos = 0; + mpls_hdr |= misc2->outer_first_mpls_over_gre_ttl << HDR_MPLS_OFFSET_TTL; + misc2->outer_first_mpls_over_gre_ttl = 0; + + parser_id = sb->caps->flex_parser_id_mpls_over_gre; + parser_ptr = dr_ste_calc_flex_parser_offset(tag, parser_id); + *(__be32 *)parser_ptr = cpu_to_be32(mpls_hdr); + + return 0; +} + +void dr_ste_v1_build_tnl_mpls_over_gre_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + dr_ste_v1_build_tnl_mpls_over_gre_tag(mask, sb, sb->bit_mask); + + /* STEs with lookup type FLEX_PARSER_{0/1} includes + * flex parsers_{0-3}/{4-7} respectively. + */ + sb->lu_type = sb->caps->flex_parser_id_mpls_over_gre > DR_STE_MAX_FLEX_0_ID ? + DR_STE_V1_LU_TYPE_FLEX_PARSER_1 : + DR_STE_V1_LU_TYPE_FLEX_PARSER_0; + + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v1_build_tnl_mpls_over_gre_tag; +} + +static int dr_ste_v1_build_icmp_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *tag) +{ + struct mlx5dr_match_misc3 *misc3 = &value->misc3; + bool is_ipv4 = DR_MASK_IS_ICMPV4_SET(misc3); + u32 *icmp_header_data; + u8 *icmp_type; + u8 *icmp_code; + + if (is_ipv4) { + icmp_header_data = &misc3->icmpv4_header_data; + icmp_type = &misc3->icmpv4_type; + icmp_code = &misc3->icmpv4_code; + } else { + icmp_header_data = &misc3->icmpv6_header_data; + icmp_type = &misc3->icmpv6_type; + icmp_code = &misc3->icmpv6_code; + } + + MLX5_SET(ste_icmp_v1, tag, icmp_header_data, *icmp_header_data); + MLX5_SET(ste_icmp_v1, tag, icmp_type, *icmp_type); + MLX5_SET(ste_icmp_v1, tag, icmp_code, *icmp_code); + + *icmp_header_data = 0; + *icmp_type = 0; + *icmp_code = 0; + + return 0; +} + +void dr_ste_v1_build_icmp_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + dr_ste_v1_build_icmp_tag(mask, sb, sb->bit_mask); + + sb->lu_type = DR_STE_V1_LU_TYPE_ETHL4_MISC_O; + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v1_build_icmp_tag; +} + +static int dr_ste_v1_build_general_purpose_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *tag) +{ + struct mlx5dr_match_misc2 *misc2 = &value->misc2; + + DR_STE_SET_TAG(general_purpose, tag, general_purpose_lookup_field, + misc2, metadata_reg_a); + + return 0; +} + +void dr_ste_v1_build_general_purpose_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + dr_ste_v1_build_general_purpose_tag(mask, sb, sb->bit_mask); + + sb->lu_type = DR_STE_V1_LU_TYPE_GENERAL_PURPOSE; + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v1_build_general_purpose_tag; +} + +static int dr_ste_v1_build_eth_l4_misc_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *tag) +{ + struct mlx5dr_match_misc3 *misc3 = &value->misc3; + + if (sb->inner) { + DR_STE_SET_TAG(eth_l4_misc_v1, tag, seq_num, misc3, inner_tcp_seq_num); + DR_STE_SET_TAG(eth_l4_misc_v1, tag, ack_num, misc3, inner_tcp_ack_num); + } else { + DR_STE_SET_TAG(eth_l4_misc_v1, tag, seq_num, misc3, outer_tcp_seq_num); + DR_STE_SET_TAG(eth_l4_misc_v1, tag, ack_num, misc3, outer_tcp_ack_num); + } + + return 0; +} + +void dr_ste_v1_build_eth_l4_misc_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + dr_ste_v1_build_eth_l4_misc_tag(mask, sb, sb->bit_mask); + + sb->lu_type = DR_STE_V1_LU_TYPE_ETHL4_MISC_O; + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v1_build_eth_l4_misc_tag; +} + +static int +dr_ste_v1_build_flex_parser_tnl_vxlan_gpe_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *tag) +{ + struct mlx5dr_match_misc3 *misc3 = &value->misc3; + + DR_STE_SET_TAG(flex_parser_tnl_vxlan_gpe, tag, + outer_vxlan_gpe_flags, misc3, + outer_vxlan_gpe_flags); + DR_STE_SET_TAG(flex_parser_tnl_vxlan_gpe, tag, + outer_vxlan_gpe_next_protocol, misc3, + outer_vxlan_gpe_next_protocol); + DR_STE_SET_TAG(flex_parser_tnl_vxlan_gpe, tag, + outer_vxlan_gpe_vni, misc3, + outer_vxlan_gpe_vni); + + return 0; +} + +void dr_ste_v1_build_flex_parser_tnl_vxlan_gpe_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + dr_ste_v1_build_flex_parser_tnl_vxlan_gpe_tag(mask, sb, sb->bit_mask); + + sb->lu_type = DR_STE_V1_LU_TYPE_FLEX_PARSER_TNL_HEADER; + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v1_build_flex_parser_tnl_vxlan_gpe_tag; +} + +static int +dr_ste_v1_build_flex_parser_tnl_geneve_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *tag) +{ + struct mlx5dr_match_misc *misc = &value->misc; + + DR_STE_SET_TAG(flex_parser_tnl_geneve, tag, + geneve_protocol_type, misc, geneve_protocol_type); + DR_STE_SET_TAG(flex_parser_tnl_geneve, tag, + geneve_oam, misc, geneve_oam); + DR_STE_SET_TAG(flex_parser_tnl_geneve, tag, + geneve_opt_len, misc, geneve_opt_len); + DR_STE_SET_TAG(flex_parser_tnl_geneve, tag, + geneve_vni, misc, geneve_vni); + + return 0; +} + +void dr_ste_v1_build_flex_parser_tnl_geneve_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + dr_ste_v1_build_flex_parser_tnl_geneve_tag(mask, sb, sb->bit_mask); + + sb->lu_type = DR_STE_V1_LU_TYPE_FLEX_PARSER_TNL_HEADER; + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v1_build_flex_parser_tnl_geneve_tag; +} + +static int dr_ste_v1_build_tnl_header_0_1_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + uint8_t *tag) +{ + struct mlx5dr_match_misc5 *misc5 = &value->misc5; + + DR_STE_SET_TAG(tunnel_header, tag, tunnel_header_0, misc5, tunnel_header_0); + DR_STE_SET_TAG(tunnel_header, tag, tunnel_header_1, misc5, tunnel_header_1); + + return 0; +} + +void dr_ste_v1_build_tnl_header_0_1_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + sb->lu_type = DR_STE_V1_LU_TYPE_FLEX_PARSER_TNL_HEADER; + dr_ste_v1_build_tnl_header_0_1_tag(mask, sb, sb->bit_mask); + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v1_build_tnl_header_0_1_tag; +} + +static int dr_ste_v1_build_register_0_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *tag) +{ + struct mlx5dr_match_misc2 *misc2 = &value->misc2; + + DR_STE_SET_TAG(register_0, tag, register_0_h, misc2, metadata_reg_c_0); + DR_STE_SET_TAG(register_0, tag, register_0_l, misc2, metadata_reg_c_1); + DR_STE_SET_TAG(register_0, tag, register_1_h, misc2, metadata_reg_c_2); + DR_STE_SET_TAG(register_0, tag, register_1_l, misc2, metadata_reg_c_3); + + return 0; +} + +void dr_ste_v1_build_register_0_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + dr_ste_v1_build_register_0_tag(mask, sb, sb->bit_mask); + + sb->lu_type = DR_STE_V1_LU_TYPE_STEERING_REGISTERS_0; + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v1_build_register_0_tag; +} + +static int dr_ste_v1_build_register_1_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *tag) +{ + struct mlx5dr_match_misc2 *misc2 = &value->misc2; + + DR_STE_SET_TAG(register_1, tag, register_2_h, misc2, metadata_reg_c_4); + DR_STE_SET_TAG(register_1, tag, register_2_l, misc2, metadata_reg_c_5); + DR_STE_SET_TAG(register_1, tag, register_3_h, misc2, metadata_reg_c_6); + DR_STE_SET_TAG(register_1, tag, register_3_l, misc2, metadata_reg_c_7); + + return 0; +} + +void dr_ste_v1_build_register_1_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + dr_ste_v1_build_register_1_tag(mask, sb, sb->bit_mask); + + sb->lu_type = DR_STE_V1_LU_TYPE_STEERING_REGISTERS_1; + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v1_build_register_1_tag; +} + +static void dr_ste_v1_build_src_gvmi_qpn_bit_mask(struct mlx5dr_match_param *value, + u8 *bit_mask) +{ + struct mlx5dr_match_misc *misc_mask = &value->misc; + + DR_STE_SET_ONES(src_gvmi_qp_v1, bit_mask, source_gvmi, misc_mask, source_port); + DR_STE_SET_ONES(src_gvmi_qp_v1, bit_mask, source_qp, misc_mask, source_sqn); + misc_mask->source_eswitch_owner_vhca_id = 0; +} + +static int dr_ste_v1_build_src_gvmi_qpn_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *tag) +{ + struct mlx5dr_match_misc *misc = &value->misc; + struct mlx5dr_cmd_vport_cap *vport_cap; + struct mlx5dr_domain *dmn = sb->dmn; + struct mlx5dr_domain *vport_dmn; + u8 *bit_mask = sb->bit_mask; + + DR_STE_SET_TAG(src_gvmi_qp_v1, tag, source_qp, misc, source_sqn); + + if (sb->vhca_id_valid) { + /* Find port GVMI based on the eswitch_owner_vhca_id */ + if (misc->source_eswitch_owner_vhca_id == dmn->info.caps.gvmi) + vport_dmn = dmn; + else if (dmn->peer_dmn && (misc->source_eswitch_owner_vhca_id == + dmn->peer_dmn->info.caps.gvmi)) + vport_dmn = dmn->peer_dmn; + else + return -EINVAL; + + misc->source_eswitch_owner_vhca_id = 0; + } else { + vport_dmn = dmn; + } + + if (!MLX5_GET(ste_src_gvmi_qp_v1, bit_mask, source_gvmi)) + return 0; + + vport_cap = mlx5dr_domain_get_vport_cap(vport_dmn, misc->source_port); + if (!vport_cap) { + mlx5dr_err(dmn, "Vport 0x%x is disabled or invalid\n", + misc->source_port); + return -EINVAL; + } + + if (vport_cap->vport_gvmi) + MLX5_SET(ste_src_gvmi_qp_v1, tag, source_gvmi, vport_cap->vport_gvmi); + + misc->source_port = 0; + return 0; +} + +void dr_ste_v1_build_src_gvmi_qpn_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + dr_ste_v1_build_src_gvmi_qpn_bit_mask(mask, sb->bit_mask); + + sb->lu_type = DR_STE_V1_LU_TYPE_SRC_QP_GVMI; + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v1_build_src_gvmi_qpn_tag; +} + +static void dr_ste_v1_set_flex_parser(u32 *misc4_field_id, + u32 *misc4_field_value, + bool *parser_is_used, + u8 *tag) +{ + u32 id = *misc4_field_id; + u8 *parser_ptr; + + if (id >= DR_NUM_OF_FLEX_PARSERS || parser_is_used[id]) + return; + + parser_is_used[id] = true; + parser_ptr = dr_ste_calc_flex_parser_offset(tag, id); + + *(__be32 *)parser_ptr = cpu_to_be32(*misc4_field_value); + *misc4_field_id = 0; + *misc4_field_value = 0; +} + +static int dr_ste_v1_build_felx_parser_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *tag) +{ + struct mlx5dr_match_misc4 *misc_4_mask = &value->misc4; + bool parser_is_used[DR_NUM_OF_FLEX_PARSERS] = {}; + + dr_ste_v1_set_flex_parser(&misc_4_mask->prog_sample_field_id_0, + &misc_4_mask->prog_sample_field_value_0, + parser_is_used, tag); + + dr_ste_v1_set_flex_parser(&misc_4_mask->prog_sample_field_id_1, + &misc_4_mask->prog_sample_field_value_1, + parser_is_used, tag); + + dr_ste_v1_set_flex_parser(&misc_4_mask->prog_sample_field_id_2, + &misc_4_mask->prog_sample_field_value_2, + parser_is_used, tag); + + dr_ste_v1_set_flex_parser(&misc_4_mask->prog_sample_field_id_3, + &misc_4_mask->prog_sample_field_value_3, + parser_is_used, tag); + + return 0; +} + +void dr_ste_v1_build_flex_parser_0_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + sb->lu_type = DR_STE_V1_LU_TYPE_FLEX_PARSER_0; + dr_ste_v1_build_felx_parser_tag(mask, sb, sb->bit_mask); + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v1_build_felx_parser_tag; +} + +void dr_ste_v1_build_flex_parser_1_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + sb->lu_type = DR_STE_V1_LU_TYPE_FLEX_PARSER_1; + dr_ste_v1_build_felx_parser_tag(mask, sb, sb->bit_mask); + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v1_build_felx_parser_tag; +} + +static int +dr_ste_v1_build_flex_parser_tnl_geneve_tlv_opt_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *tag) +{ + struct mlx5dr_match_misc3 *misc3 = &value->misc3; + u8 parser_id = sb->caps->flex_parser_id_geneve_tlv_option_0; + u8 *parser_ptr = dr_ste_calc_flex_parser_offset(tag, parser_id); + + MLX5_SET(ste_flex_parser_0, parser_ptr, flex_parser_3, + misc3->geneve_tlv_option_0_data); + misc3->geneve_tlv_option_0_data = 0; + + return 0; +} + +void +dr_ste_v1_build_flex_parser_tnl_geneve_tlv_opt_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + dr_ste_v1_build_flex_parser_tnl_geneve_tlv_opt_tag(mask, sb, sb->bit_mask); + + /* STEs with lookup type FLEX_PARSER_{0/1} includes + * flex parsers_{0-3}/{4-7} respectively. + */ + sb->lu_type = sb->caps->flex_parser_id_geneve_tlv_option_0 > 3 ? + DR_STE_V1_LU_TYPE_FLEX_PARSER_1 : + DR_STE_V1_LU_TYPE_FLEX_PARSER_0; + + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v1_build_flex_parser_tnl_geneve_tlv_opt_tag; +} + +static int +dr_ste_v1_build_flex_parser_tnl_geneve_tlv_opt_exist_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + uint8_t *tag) +{ + u8 parser_id = sb->caps->flex_parser_id_geneve_tlv_option_0; + struct mlx5dr_match_misc *misc = &value->misc; + + if (misc->geneve_tlv_option_0_exist) { + MLX5_SET(ste_flex_parser_ok, tag, flex_parsers_ok, 1 << parser_id); + misc->geneve_tlv_option_0_exist = 0; + } + + return 0; +} + +void +dr_ste_v1_build_flex_parser_tnl_geneve_tlv_opt_exist_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + sb->lu_type = DR_STE_V1_LU_TYPE_FLEX_PARSER_OK; + dr_ste_v1_build_flex_parser_tnl_geneve_tlv_opt_exist_tag(mask, sb, sb->bit_mask); + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v1_build_flex_parser_tnl_geneve_tlv_opt_exist_tag; +} + +static int dr_ste_v1_build_flex_parser_tnl_gtpu_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *tag) +{ + struct mlx5dr_match_misc3 *misc3 = &value->misc3; + + DR_STE_SET_TAG(flex_parser_tnl_gtpu, tag, gtpu_msg_flags, misc3, gtpu_msg_flags); + DR_STE_SET_TAG(flex_parser_tnl_gtpu, tag, gtpu_msg_type, misc3, gtpu_msg_type); + DR_STE_SET_TAG(flex_parser_tnl_gtpu, tag, gtpu_teid, misc3, gtpu_teid); + + return 0; +} + +void dr_ste_v1_build_flex_parser_tnl_gtpu_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + dr_ste_v1_build_flex_parser_tnl_gtpu_tag(mask, sb, sb->bit_mask); + + sb->lu_type = DR_STE_V1_LU_TYPE_FLEX_PARSER_TNL_HEADER; + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v1_build_flex_parser_tnl_gtpu_tag; +} + +static int +dr_ste_v1_build_tnl_gtpu_flex_parser_0_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *tag) +{ + if (dr_is_flex_parser_0_id(sb->caps->flex_parser_id_gtpu_dw_0)) + DR_STE_SET_FLEX_PARSER_FIELD(tag, gtpu_dw_0, sb->caps, &value->misc3); + if (dr_is_flex_parser_0_id(sb->caps->flex_parser_id_gtpu_teid)) + DR_STE_SET_FLEX_PARSER_FIELD(tag, gtpu_teid, sb->caps, &value->misc3); + if (dr_is_flex_parser_0_id(sb->caps->flex_parser_id_gtpu_dw_2)) + DR_STE_SET_FLEX_PARSER_FIELD(tag, gtpu_dw_2, sb->caps, &value->misc3); + if (dr_is_flex_parser_0_id(sb->caps->flex_parser_id_gtpu_first_ext_dw_0)) + DR_STE_SET_FLEX_PARSER_FIELD(tag, gtpu_first_ext_dw_0, sb->caps, &value->misc3); + return 0; +} + +void +dr_ste_v1_build_tnl_gtpu_flex_parser_0_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + dr_ste_v1_build_tnl_gtpu_flex_parser_0_tag(mask, sb, sb->bit_mask); + + sb->lu_type = DR_STE_V1_LU_TYPE_FLEX_PARSER_0; + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v1_build_tnl_gtpu_flex_parser_0_tag; +} + +static int +dr_ste_v1_build_tnl_gtpu_flex_parser_1_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *tag) +{ + if (dr_is_flex_parser_1_id(sb->caps->flex_parser_id_gtpu_dw_0)) + DR_STE_SET_FLEX_PARSER_FIELD(tag, gtpu_dw_0, sb->caps, &value->misc3); + if (dr_is_flex_parser_1_id(sb->caps->flex_parser_id_gtpu_teid)) + DR_STE_SET_FLEX_PARSER_FIELD(tag, gtpu_teid, sb->caps, &value->misc3); + if (dr_is_flex_parser_1_id(sb->caps->flex_parser_id_gtpu_dw_2)) + DR_STE_SET_FLEX_PARSER_FIELD(tag, gtpu_dw_2, sb->caps, &value->misc3); + if (dr_is_flex_parser_1_id(sb->caps->flex_parser_id_gtpu_first_ext_dw_0)) + DR_STE_SET_FLEX_PARSER_FIELD(tag, gtpu_first_ext_dw_0, sb->caps, &value->misc3); + return 0; +} + +void +dr_ste_v1_build_tnl_gtpu_flex_parser_1_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + dr_ste_v1_build_tnl_gtpu_flex_parser_1_tag(mask, sb, sb->bit_mask); + + sb->lu_type = DR_STE_V1_LU_TYPE_FLEX_PARSER_1; + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v1_build_tnl_gtpu_flex_parser_1_tag; +} + +static struct mlx5dr_ste_ctx ste_ctx_v1 = { + /* Builders */ + .build_eth_l2_src_dst_init = &dr_ste_v1_build_eth_l2_src_dst_init, + .build_eth_l3_ipv6_src_init = &dr_ste_v1_build_eth_l3_ipv6_src_init, + .build_eth_l3_ipv6_dst_init = &dr_ste_v1_build_eth_l3_ipv6_dst_init, + .build_eth_l3_ipv4_5_tuple_init = &dr_ste_v1_build_eth_l3_ipv4_5_tuple_init, + .build_eth_l2_src_init = &dr_ste_v1_build_eth_l2_src_init, + .build_eth_l2_dst_init = &dr_ste_v1_build_eth_l2_dst_init, + .build_eth_l2_tnl_init = &dr_ste_v1_build_eth_l2_tnl_init, + .build_eth_l3_ipv4_misc_init = &dr_ste_v1_build_eth_l3_ipv4_misc_init, + .build_eth_ipv6_l3_l4_init = &dr_ste_v1_build_eth_ipv6_l3_l4_init, + .build_mpls_init = &dr_ste_v1_build_mpls_init, + .build_tnl_gre_init = &dr_ste_v1_build_tnl_gre_init, + .build_tnl_mpls_init = &dr_ste_v1_build_tnl_mpls_init, + .build_tnl_mpls_over_udp_init = &dr_ste_v1_build_tnl_mpls_over_udp_init, + .build_tnl_mpls_over_gre_init = &dr_ste_v1_build_tnl_mpls_over_gre_init, + .build_icmp_init = &dr_ste_v1_build_icmp_init, + .build_general_purpose_init = &dr_ste_v1_build_general_purpose_init, + .build_eth_l4_misc_init = &dr_ste_v1_build_eth_l4_misc_init, + .build_tnl_vxlan_gpe_init = &dr_ste_v1_build_flex_parser_tnl_vxlan_gpe_init, + .build_tnl_geneve_init = &dr_ste_v1_build_flex_parser_tnl_geneve_init, + .build_tnl_geneve_tlv_opt_init = &dr_ste_v1_build_flex_parser_tnl_geneve_tlv_opt_init, + .build_tnl_geneve_tlv_opt_exist_init = &dr_ste_v1_build_flex_parser_tnl_geneve_tlv_opt_exist_init, + .build_register_0_init = &dr_ste_v1_build_register_0_init, + .build_register_1_init = &dr_ste_v1_build_register_1_init, + .build_src_gvmi_qpn_init = &dr_ste_v1_build_src_gvmi_qpn_init, + .build_flex_parser_0_init = &dr_ste_v1_build_flex_parser_0_init, + .build_flex_parser_1_init = &dr_ste_v1_build_flex_parser_1_init, + .build_tnl_gtpu_init = &dr_ste_v1_build_flex_parser_tnl_gtpu_init, + .build_tnl_header_0_1_init = &dr_ste_v1_build_tnl_header_0_1_init, + .build_tnl_gtpu_flex_parser_0_init = &dr_ste_v1_build_tnl_gtpu_flex_parser_0_init, + .build_tnl_gtpu_flex_parser_1_init = &dr_ste_v1_build_tnl_gtpu_flex_parser_1_init, + + /* Getters and Setters */ + .ste_init = &dr_ste_v1_init, + .set_next_lu_type = &dr_ste_v1_set_next_lu_type, + .get_next_lu_type = &dr_ste_v1_get_next_lu_type, + .set_miss_addr = &dr_ste_v1_set_miss_addr, + .get_miss_addr = &dr_ste_v1_get_miss_addr, + .set_hit_addr = &dr_ste_v1_set_hit_addr, + .set_byte_mask = &dr_ste_v1_set_byte_mask, + .get_byte_mask = &dr_ste_v1_get_byte_mask, + /* Actions */ + .actions_caps = DR_STE_CTX_ACTION_CAP_TX_POP | + DR_STE_CTX_ACTION_CAP_RX_PUSH | + DR_STE_CTX_ACTION_CAP_RX_ENCAP | + DR_STE_CTX_ACTION_CAP_POP_MDFY, + .set_actions_rx = &dr_ste_v1_set_actions_rx, + .set_actions_tx = &dr_ste_v1_set_actions_tx, + .modify_field_arr_sz = ARRAY_SIZE(dr_ste_v1_action_modify_field_arr), + .modify_field_arr = dr_ste_v1_action_modify_field_arr, + .set_action_set = &dr_ste_v1_set_action_set, + .set_action_add = &dr_ste_v1_set_action_add, + .set_action_copy = &dr_ste_v1_set_action_copy, + .set_action_decap_l3_list = &dr_ste_v1_set_action_decap_l3_list, + /* Send */ + .prepare_for_postsend = &dr_ste_v1_prepare_for_postsend, +}; + +struct mlx5dr_ste_ctx *mlx5dr_ste_get_ctx_v1(void) +{ + return &ste_ctx_v1; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.h new file mode 100644 index 000000000..8a1d49790 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.h @@ -0,0 +1,94 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#ifndef _DR_STE_V1_ +#define _DR_STE_V1_ + +#include "dr_types.h" +#include "dr_ste.h" + +void dr_ste_v1_set_miss_addr(u8 *hw_ste_p, u64 miss_addr); +u64 dr_ste_v1_get_miss_addr(u8 *hw_ste_p); +void dr_ste_v1_set_byte_mask(u8 *hw_ste_p, u16 byte_mask); +u16 dr_ste_v1_get_byte_mask(u8 *hw_ste_p); +void dr_ste_v1_set_next_lu_type(u8 *hw_ste_p, u16 lu_type); +u16 dr_ste_v1_get_next_lu_type(u8 *hw_ste_p); +void dr_ste_v1_set_hit_addr(u8 *hw_ste_p, u64 icm_addr, u32 ht_size); +void dr_ste_v1_init(u8 *hw_ste_p, u16 lu_type, bool is_rx, u16 gvmi); +void dr_ste_v1_prepare_for_postsend(u8 *hw_ste_p, u32 ste_size); +void dr_ste_v1_set_actions_tx(struct mlx5dr_domain *dmn, u8 *action_type_set, + u32 actions_caps, u8 *last_ste, + struct mlx5dr_ste_actions_attr *attr, u32 *added_stes); +void dr_ste_v1_set_actions_rx(struct mlx5dr_domain *dmn, u8 *action_type_set, + u32 actions_caps, u8 *last_ste, + struct mlx5dr_ste_actions_attr *attr, u32 *added_stes); +void dr_ste_v1_set_action_set(u8 *d_action, u8 hw_field, u8 shifter, + u8 length, u32 data); +void dr_ste_v1_set_action_add(u8 *d_action, u8 hw_field, u8 shifter, + u8 length, u32 data); +void dr_ste_v1_set_action_copy(u8 *d_action, u8 dst_hw_field, u8 dst_shifter, + u8 dst_len, u8 src_hw_field, u8 src_shifter); +int dr_ste_v1_set_action_decap_l3_list(void *data, u32 data_sz, u8 *hw_action, + u32 hw_action_sz, u16 *used_hw_action_num); +void dr_ste_v1_build_eth_l2_src_dst_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask); +void dr_ste_v1_build_eth_l3_ipv6_dst_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask); +void dr_ste_v1_build_eth_l3_ipv6_src_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask); +void dr_ste_v1_build_eth_l3_ipv4_5_tuple_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask); +void dr_ste_v1_build_eth_l2_src_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask); +void dr_ste_v1_build_eth_l2_dst_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask); +void dr_ste_v1_build_eth_l2_tnl_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask); +void dr_ste_v1_build_eth_l3_ipv4_misc_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask); +void dr_ste_v1_build_eth_ipv6_l3_l4_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask); +void dr_ste_v1_build_mpls_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask); +void dr_ste_v1_build_tnl_gre_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask); +void dr_ste_v1_build_tnl_mpls_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask); +void dr_ste_v1_build_tnl_mpls_over_udp_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask); +void dr_ste_v1_build_tnl_mpls_over_gre_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask); +void dr_ste_v1_build_icmp_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask); +void dr_ste_v1_build_general_purpose_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask); +void dr_ste_v1_build_eth_l4_misc_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask); +void dr_ste_v1_build_flex_parser_tnl_vxlan_gpe_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask); +void dr_ste_v1_build_flex_parser_tnl_geneve_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask); +void dr_ste_v1_build_tnl_header_0_1_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask); +void dr_ste_v1_build_register_0_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask); +void dr_ste_v1_build_register_1_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask); +void dr_ste_v1_build_src_gvmi_qpn_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask); +void dr_ste_v1_build_flex_parser_0_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask); +void dr_ste_v1_build_flex_parser_1_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask); +void dr_ste_v1_build_flex_parser_tnl_geneve_tlv_opt_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask); +void dr_ste_v1_build_flex_parser_tnl_geneve_tlv_opt_exist_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask); +void dr_ste_v1_build_flex_parser_tnl_gtpu_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask); +void dr_ste_v1_build_tnl_gtpu_flex_parser_0_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask); +void dr_ste_v1_build_tnl_gtpu_flex_parser_1_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask); + +#endif /* _DR_STE_V1_ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v2.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v2.c new file mode 100644 index 000000000..c60fddd12 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v2.c @@ -0,0 +1,231 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#include "dr_ste_v1.h" + +enum { + DR_STE_V2_ACTION_MDFY_FLD_L2_OUT_0 = 0x00, + DR_STE_V2_ACTION_MDFY_FLD_L2_OUT_1 = 0x01, + DR_STE_V2_ACTION_MDFY_FLD_L2_OUT_2 = 0x02, + DR_STE_V2_ACTION_MDFY_FLD_SRC_L2_OUT_0 = 0x08, + DR_STE_V2_ACTION_MDFY_FLD_SRC_L2_OUT_1 = 0x09, + DR_STE_V2_ACTION_MDFY_FLD_L3_OUT_0 = 0x0e, + DR_STE_V2_ACTION_MDFY_FLD_L4_OUT_0 = 0x18, + DR_STE_V2_ACTION_MDFY_FLD_L4_OUT_1 = 0x19, + DR_STE_V2_ACTION_MDFY_FLD_IPV4_OUT_0 = 0x40, + DR_STE_V2_ACTION_MDFY_FLD_IPV4_OUT_1 = 0x41, + DR_STE_V2_ACTION_MDFY_FLD_IPV6_DST_OUT_0 = 0x44, + DR_STE_V2_ACTION_MDFY_FLD_IPV6_DST_OUT_1 = 0x45, + DR_STE_V2_ACTION_MDFY_FLD_IPV6_DST_OUT_2 = 0x46, + DR_STE_V2_ACTION_MDFY_FLD_IPV6_DST_OUT_3 = 0x47, + DR_STE_V2_ACTION_MDFY_FLD_IPV6_SRC_OUT_0 = 0x4c, + DR_STE_V2_ACTION_MDFY_FLD_IPV6_SRC_OUT_1 = 0x4d, + DR_STE_V2_ACTION_MDFY_FLD_IPV6_SRC_OUT_2 = 0x4e, + DR_STE_V2_ACTION_MDFY_FLD_IPV6_SRC_OUT_3 = 0x4f, + DR_STE_V2_ACTION_MDFY_FLD_TCP_MISC_0 = 0x5e, + DR_STE_V2_ACTION_MDFY_FLD_TCP_MISC_1 = 0x5f, + DR_STE_V2_ACTION_MDFY_FLD_CFG_HDR_0_0 = 0x6f, + DR_STE_V2_ACTION_MDFY_FLD_CFG_HDR_0_1 = 0x70, + DR_STE_V2_ACTION_MDFY_FLD_METADATA_2_CQE = 0x7b, + DR_STE_V2_ACTION_MDFY_FLD_GNRL_PURPOSE = 0x7c, + DR_STE_V2_ACTION_MDFY_FLD_REGISTER_2_0 = 0x90, + DR_STE_V2_ACTION_MDFY_FLD_REGISTER_2_1 = 0x91, + DR_STE_V2_ACTION_MDFY_FLD_REGISTER_1_0 = 0x92, + DR_STE_V2_ACTION_MDFY_FLD_REGISTER_1_1 = 0x93, + DR_STE_V2_ACTION_MDFY_FLD_REGISTER_0_0 = 0x94, + DR_STE_V2_ACTION_MDFY_FLD_REGISTER_0_1 = 0x95, +}; + +static const struct mlx5dr_ste_action_modify_field dr_ste_v2_action_modify_field_arr[] = { + [MLX5_ACTION_IN_FIELD_OUT_SMAC_47_16] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_SRC_L2_OUT_0, .start = 0, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_OUT_SMAC_15_0] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_SRC_L2_OUT_1, .start = 16, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_OUT_ETHERTYPE] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_L2_OUT_1, .start = 0, .end = 15, + }, + [MLX5_ACTION_IN_FIELD_OUT_DMAC_47_16] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_L2_OUT_0, .start = 0, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_OUT_DMAC_15_0] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_L2_OUT_1, .start = 16, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_OUT_IP_DSCP] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_L3_OUT_0, .start = 18, .end = 23, + }, + [MLX5_ACTION_IN_FIELD_OUT_TCP_FLAGS] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_L4_OUT_1, .start = 16, .end = 24, + .l4_type = DR_STE_ACTION_MDFY_TYPE_L4_TCP, + }, + [MLX5_ACTION_IN_FIELD_OUT_TCP_SPORT] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_L4_OUT_0, .start = 16, .end = 31, + .l4_type = DR_STE_ACTION_MDFY_TYPE_L4_TCP, + }, + [MLX5_ACTION_IN_FIELD_OUT_TCP_DPORT] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_L4_OUT_0, .start = 0, .end = 15, + .l4_type = DR_STE_ACTION_MDFY_TYPE_L4_TCP, + }, + [MLX5_ACTION_IN_FIELD_OUT_IP_TTL] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_L3_OUT_0, .start = 8, .end = 15, + .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV4, + }, + [MLX5_ACTION_IN_FIELD_OUT_IPV6_HOPLIMIT] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_L3_OUT_0, .start = 8, .end = 15, + .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6, + }, + [MLX5_ACTION_IN_FIELD_OUT_UDP_SPORT] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_L4_OUT_0, .start = 16, .end = 31, + .l4_type = DR_STE_ACTION_MDFY_TYPE_L4_UDP, + }, + [MLX5_ACTION_IN_FIELD_OUT_UDP_DPORT] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_L4_OUT_0, .start = 0, .end = 15, + .l4_type = DR_STE_ACTION_MDFY_TYPE_L4_UDP, + }, + [MLX5_ACTION_IN_FIELD_OUT_SIPV6_127_96] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_IPV6_SRC_OUT_0, .start = 0, .end = 31, + .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6, + }, + [MLX5_ACTION_IN_FIELD_OUT_SIPV6_95_64] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_IPV6_SRC_OUT_1, .start = 0, .end = 31, + .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6, + }, + [MLX5_ACTION_IN_FIELD_OUT_SIPV6_63_32] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_IPV6_SRC_OUT_2, .start = 0, .end = 31, + .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6, + }, + [MLX5_ACTION_IN_FIELD_OUT_SIPV6_31_0] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_IPV6_SRC_OUT_3, .start = 0, .end = 31, + .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6, + }, + [MLX5_ACTION_IN_FIELD_OUT_DIPV6_127_96] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_IPV6_DST_OUT_0, .start = 0, .end = 31, + .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6, + }, + [MLX5_ACTION_IN_FIELD_OUT_DIPV6_95_64] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_IPV6_DST_OUT_1, .start = 0, .end = 31, + .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6, + }, + [MLX5_ACTION_IN_FIELD_OUT_DIPV6_63_32] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_IPV6_DST_OUT_2, .start = 0, .end = 31, + .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6, + }, + [MLX5_ACTION_IN_FIELD_OUT_DIPV6_31_0] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_IPV6_DST_OUT_3, .start = 0, .end = 31, + .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6, + }, + [MLX5_ACTION_IN_FIELD_OUT_SIPV4] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_IPV4_OUT_0, .start = 0, .end = 31, + .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV4, + }, + [MLX5_ACTION_IN_FIELD_OUT_DIPV4] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_IPV4_OUT_1, .start = 0, .end = 31, + .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV4, + }, + [MLX5_ACTION_IN_FIELD_METADATA_REG_A] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_GNRL_PURPOSE, .start = 0, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_METADATA_REG_B] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_METADATA_2_CQE, .start = 0, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_METADATA_REG_C_0] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_REGISTER_0_0, .start = 0, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_METADATA_REG_C_1] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_REGISTER_0_1, .start = 0, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_METADATA_REG_C_2] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_REGISTER_1_0, .start = 0, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_METADATA_REG_C_3] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_REGISTER_1_1, .start = 0, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_METADATA_REG_C_4] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_REGISTER_2_0, .start = 0, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_METADATA_REG_C_5] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_REGISTER_2_1, .start = 0, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_OUT_TCP_SEQ_NUM] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_TCP_MISC_0, .start = 0, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_OUT_TCP_ACK_NUM] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_TCP_MISC_1, .start = 0, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_OUT_FIRST_VID] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_L2_OUT_2, .start = 0, .end = 15, + }, + [MLX5_ACTION_IN_FIELD_OUT_EMD_31_0] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_CFG_HDR_0_1, .start = 0, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_OUT_EMD_47_32] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_CFG_HDR_0_0, .start = 0, .end = 15, + }, +}; + +static struct mlx5dr_ste_ctx ste_ctx_v2 = { + /* Builders */ + .build_eth_l2_src_dst_init = &dr_ste_v1_build_eth_l2_src_dst_init, + .build_eth_l3_ipv6_src_init = &dr_ste_v1_build_eth_l3_ipv6_src_init, + .build_eth_l3_ipv6_dst_init = &dr_ste_v1_build_eth_l3_ipv6_dst_init, + .build_eth_l3_ipv4_5_tuple_init = &dr_ste_v1_build_eth_l3_ipv4_5_tuple_init, + .build_eth_l2_src_init = &dr_ste_v1_build_eth_l2_src_init, + .build_eth_l2_dst_init = &dr_ste_v1_build_eth_l2_dst_init, + .build_eth_l2_tnl_init = &dr_ste_v1_build_eth_l2_tnl_init, + .build_eth_l3_ipv4_misc_init = &dr_ste_v1_build_eth_l3_ipv4_misc_init, + .build_eth_ipv6_l3_l4_init = &dr_ste_v1_build_eth_ipv6_l3_l4_init, + .build_mpls_init = &dr_ste_v1_build_mpls_init, + .build_tnl_gre_init = &dr_ste_v1_build_tnl_gre_init, + .build_tnl_mpls_init = &dr_ste_v1_build_tnl_mpls_init, + .build_tnl_mpls_over_udp_init = &dr_ste_v1_build_tnl_mpls_over_udp_init, + .build_tnl_mpls_over_gre_init = &dr_ste_v1_build_tnl_mpls_over_gre_init, + .build_icmp_init = &dr_ste_v1_build_icmp_init, + .build_general_purpose_init = &dr_ste_v1_build_general_purpose_init, + .build_eth_l4_misc_init = &dr_ste_v1_build_eth_l4_misc_init, + .build_tnl_vxlan_gpe_init = &dr_ste_v1_build_flex_parser_tnl_vxlan_gpe_init, + .build_tnl_geneve_init = &dr_ste_v1_build_flex_parser_tnl_geneve_init, + .build_tnl_geneve_tlv_opt_init = &dr_ste_v1_build_flex_parser_tnl_geneve_tlv_opt_init, + .build_tnl_geneve_tlv_opt_exist_init = + &dr_ste_v1_build_flex_parser_tnl_geneve_tlv_opt_exist_init, + .build_register_0_init = &dr_ste_v1_build_register_0_init, + .build_register_1_init = &dr_ste_v1_build_register_1_init, + .build_src_gvmi_qpn_init = &dr_ste_v1_build_src_gvmi_qpn_init, + .build_flex_parser_0_init = &dr_ste_v1_build_flex_parser_0_init, + .build_flex_parser_1_init = &dr_ste_v1_build_flex_parser_1_init, + .build_tnl_gtpu_init = &dr_ste_v1_build_flex_parser_tnl_gtpu_init, + .build_tnl_header_0_1_init = &dr_ste_v1_build_tnl_header_0_1_init, + .build_tnl_gtpu_flex_parser_0_init = &dr_ste_v1_build_tnl_gtpu_flex_parser_0_init, + .build_tnl_gtpu_flex_parser_1_init = &dr_ste_v1_build_tnl_gtpu_flex_parser_1_init, + + /* Getters and Setters */ + .ste_init = &dr_ste_v1_init, + .set_next_lu_type = &dr_ste_v1_set_next_lu_type, + .get_next_lu_type = &dr_ste_v1_get_next_lu_type, + .set_miss_addr = &dr_ste_v1_set_miss_addr, + .get_miss_addr = &dr_ste_v1_get_miss_addr, + .set_hit_addr = &dr_ste_v1_set_hit_addr, + .set_byte_mask = &dr_ste_v1_set_byte_mask, + .get_byte_mask = &dr_ste_v1_get_byte_mask, + + /* Actions */ + .actions_caps = DR_STE_CTX_ACTION_CAP_TX_POP | + DR_STE_CTX_ACTION_CAP_RX_PUSH | + DR_STE_CTX_ACTION_CAP_RX_ENCAP, + .set_actions_rx = &dr_ste_v1_set_actions_rx, + .set_actions_tx = &dr_ste_v1_set_actions_tx, + .modify_field_arr_sz = ARRAY_SIZE(dr_ste_v2_action_modify_field_arr), + .modify_field_arr = dr_ste_v2_action_modify_field_arr, + .set_action_set = &dr_ste_v1_set_action_set, + .set_action_add = &dr_ste_v1_set_action_add, + .set_action_copy = &dr_ste_v1_set_action_copy, + .set_action_decap_l3_list = &dr_ste_v1_set_action_decap_l3_list, + + /* Send */ + .prepare_for_postsend = &dr_ste_v1_prepare_for_postsend, +}; + +struct mlx5dr_ste_ctx *mlx5dr_ste_get_ctx_v2(void) +{ + return &ste_ctx_v2; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_table.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_table.c new file mode 100644 index 000000000..f68461b13 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_table.c @@ -0,0 +1,319 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2019 Mellanox Technologies. */ + +#include "dr_types.h" + +static int dr_table_set_miss_action_nic(struct mlx5dr_domain *dmn, + struct mlx5dr_table_rx_tx *nic_tbl, + struct mlx5dr_action *action) +{ + struct mlx5dr_matcher_rx_tx *last_nic_matcher = NULL; + struct mlx5dr_htbl_connect_info info; + struct mlx5dr_ste_htbl *last_htbl; + struct mlx5dr_icm_chunk *chunk; + int ret; + + if (!list_empty(&nic_tbl->nic_matcher_list)) + last_nic_matcher = list_last_entry(&nic_tbl->nic_matcher_list, + struct mlx5dr_matcher_rx_tx, + list_node); + + if (last_nic_matcher) + last_htbl = last_nic_matcher->e_anchor; + else + last_htbl = nic_tbl->s_anchor; + + if (action) { + chunk = nic_tbl->nic_dmn->type == DR_DOMAIN_NIC_TYPE_RX ? + action->dest_tbl->tbl->rx.s_anchor->chunk : + action->dest_tbl->tbl->tx.s_anchor->chunk; + nic_tbl->default_icm_addr = mlx5dr_icm_pool_get_chunk_icm_addr(chunk); + } else { + nic_tbl->default_icm_addr = nic_tbl->nic_dmn->default_icm_addr; + } + + info.type = CONNECT_MISS; + info.miss_icm_addr = nic_tbl->default_icm_addr; + + ret = mlx5dr_ste_htbl_init_and_postsend(dmn, nic_tbl->nic_dmn, + last_htbl, &info, true); + if (ret) + mlx5dr_dbg(dmn, "Failed to set NIC RX/TX miss action, ret %d\n", ret); + + return ret; +} + +int mlx5dr_table_set_miss_action(struct mlx5dr_table *tbl, + struct mlx5dr_action *action) +{ + int ret = -EOPNOTSUPP; + + if (action && action->action_type != DR_ACTION_TYP_FT) + return -EOPNOTSUPP; + + mlx5dr_domain_lock(tbl->dmn); + + if (tbl->dmn->type == MLX5DR_DOMAIN_TYPE_NIC_RX || + tbl->dmn->type == MLX5DR_DOMAIN_TYPE_FDB) { + ret = dr_table_set_miss_action_nic(tbl->dmn, &tbl->rx, action); + if (ret) + goto out; + } + + if (tbl->dmn->type == MLX5DR_DOMAIN_TYPE_NIC_TX || + tbl->dmn->type == MLX5DR_DOMAIN_TYPE_FDB) { + ret = dr_table_set_miss_action_nic(tbl->dmn, &tbl->tx, action); + if (ret) + goto out; + } + + if (ret) + goto out; + + /* Release old action */ + if (tbl->miss_action) + refcount_dec(&tbl->miss_action->refcount); + + /* Set new miss action */ + tbl->miss_action = action; + if (tbl->miss_action) + refcount_inc(&action->refcount); + +out: + mlx5dr_domain_unlock(tbl->dmn); + return ret; +} + +static void dr_table_uninit_nic(struct mlx5dr_table_rx_tx *nic_tbl) +{ + mlx5dr_htbl_put(nic_tbl->s_anchor); +} + +static void dr_table_uninit_fdb(struct mlx5dr_table *tbl) +{ + dr_table_uninit_nic(&tbl->rx); + dr_table_uninit_nic(&tbl->tx); +} + +static void dr_table_uninit(struct mlx5dr_table *tbl) +{ + mlx5dr_domain_lock(tbl->dmn); + + switch (tbl->dmn->type) { + case MLX5DR_DOMAIN_TYPE_NIC_RX: + dr_table_uninit_nic(&tbl->rx); + break; + case MLX5DR_DOMAIN_TYPE_NIC_TX: + dr_table_uninit_nic(&tbl->tx); + break; + case MLX5DR_DOMAIN_TYPE_FDB: + dr_table_uninit_fdb(tbl); + break; + default: + WARN_ON(true); + break; + } + + mlx5dr_domain_unlock(tbl->dmn); +} + +static int dr_table_init_nic(struct mlx5dr_domain *dmn, + struct mlx5dr_table_rx_tx *nic_tbl) +{ + struct mlx5dr_domain_rx_tx *nic_dmn = nic_tbl->nic_dmn; + struct mlx5dr_htbl_connect_info info; + int ret; + + INIT_LIST_HEAD(&nic_tbl->nic_matcher_list); + + nic_tbl->default_icm_addr = nic_dmn->default_icm_addr; + + nic_tbl->s_anchor = mlx5dr_ste_htbl_alloc(dmn->ste_icm_pool, + DR_CHUNK_SIZE_1, + MLX5DR_STE_LU_TYPE_DONT_CARE, + 0); + if (!nic_tbl->s_anchor) { + mlx5dr_err(dmn, "Failed allocating htbl\n"); + return -ENOMEM; + } + + info.type = CONNECT_MISS; + info.miss_icm_addr = nic_dmn->default_icm_addr; + ret = mlx5dr_ste_htbl_init_and_postsend(dmn, nic_dmn, + nic_tbl->s_anchor, + &info, true); + if (ret) { + mlx5dr_err(dmn, "Failed int and send htbl\n"); + goto free_s_anchor; + } + + mlx5dr_htbl_get(nic_tbl->s_anchor); + + return 0; + +free_s_anchor: + mlx5dr_ste_htbl_free(nic_tbl->s_anchor); + return ret; +} + +static int dr_table_init_fdb(struct mlx5dr_table *tbl) +{ + int ret; + + ret = dr_table_init_nic(tbl->dmn, &tbl->rx); + if (ret) + return ret; + + ret = dr_table_init_nic(tbl->dmn, &tbl->tx); + if (ret) + goto destroy_rx; + + return 0; + +destroy_rx: + dr_table_uninit_nic(&tbl->rx); + return ret; +} + +static int dr_table_init(struct mlx5dr_table *tbl) +{ + int ret = 0; + + INIT_LIST_HEAD(&tbl->matcher_list); + + mlx5dr_domain_lock(tbl->dmn); + + switch (tbl->dmn->type) { + case MLX5DR_DOMAIN_TYPE_NIC_RX: + tbl->table_type = MLX5_FLOW_TABLE_TYPE_NIC_RX; + tbl->rx.nic_dmn = &tbl->dmn->info.rx; + ret = dr_table_init_nic(tbl->dmn, &tbl->rx); + break; + case MLX5DR_DOMAIN_TYPE_NIC_TX: + tbl->table_type = MLX5_FLOW_TABLE_TYPE_NIC_TX; + tbl->tx.nic_dmn = &tbl->dmn->info.tx; + ret = dr_table_init_nic(tbl->dmn, &tbl->tx); + break; + case MLX5DR_DOMAIN_TYPE_FDB: + tbl->table_type = MLX5_FLOW_TABLE_TYPE_FDB; + tbl->rx.nic_dmn = &tbl->dmn->info.rx; + tbl->tx.nic_dmn = &tbl->dmn->info.tx; + ret = dr_table_init_fdb(tbl); + break; + default: + WARN_ON(true); + break; + } + + mlx5dr_domain_unlock(tbl->dmn); + + return ret; +} + +static int dr_table_destroy_sw_owned_tbl(struct mlx5dr_table *tbl) +{ + return mlx5dr_cmd_destroy_flow_table(tbl->dmn->mdev, + tbl->table_id, + tbl->table_type); +} + +static int dr_table_create_sw_owned_tbl(struct mlx5dr_table *tbl, u16 uid) +{ + bool en_encap = !!(tbl->flags & MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT); + bool en_decap = !!(tbl->flags & MLX5_FLOW_TABLE_TUNNEL_EN_DECAP); + struct mlx5dr_cmd_create_flow_table_attr ft_attr = {}; + u64 icm_addr_rx = 0; + u64 icm_addr_tx = 0; + int ret; + + if (tbl->rx.s_anchor) + icm_addr_rx = mlx5dr_icm_pool_get_chunk_icm_addr(tbl->rx.s_anchor->chunk); + + if (tbl->tx.s_anchor) + icm_addr_tx = mlx5dr_icm_pool_get_chunk_icm_addr(tbl->tx.s_anchor->chunk); + + ft_attr.table_type = tbl->table_type; + ft_attr.icm_addr_rx = icm_addr_rx; + ft_attr.icm_addr_tx = icm_addr_tx; + ft_attr.level = tbl->dmn->info.caps.max_ft_level - 1; + ft_attr.sw_owner = true; + ft_attr.decap_en = en_decap; + ft_attr.reformat_en = en_encap; + ft_attr.uid = uid; + + ret = mlx5dr_cmd_create_flow_table(tbl->dmn->mdev, &ft_attr, + NULL, &tbl->table_id); + + return ret; +} + +struct mlx5dr_table *mlx5dr_table_create(struct mlx5dr_domain *dmn, u32 level, + u32 flags, u16 uid) +{ + struct mlx5dr_table *tbl; + int ret; + + refcount_inc(&dmn->refcount); + + tbl = kzalloc(sizeof(*tbl), GFP_KERNEL); + if (!tbl) + goto dec_ref; + + tbl->dmn = dmn; + tbl->level = level; + tbl->flags = flags; + refcount_set(&tbl->refcount, 1); + + ret = dr_table_init(tbl); + if (ret) + goto free_tbl; + + ret = dr_table_create_sw_owned_tbl(tbl, uid); + if (ret) + goto uninit_tbl; + + INIT_LIST_HEAD(&tbl->dbg_node); + mlx5dr_dbg_tbl_add(tbl); + return tbl; + +uninit_tbl: + dr_table_uninit(tbl); +free_tbl: + kfree(tbl); +dec_ref: + refcount_dec(&dmn->refcount); + return NULL; +} + +int mlx5dr_table_destroy(struct mlx5dr_table *tbl) +{ + int ret; + + if (WARN_ON_ONCE(refcount_read(&tbl->refcount) > 1)) + return -EBUSY; + + mlx5dr_dbg_tbl_del(tbl); + ret = dr_table_destroy_sw_owned_tbl(tbl); + if (ret) + return ret; + + dr_table_uninit(tbl); + + if (tbl->miss_action) + refcount_dec(&tbl->miss_action->refcount); + + refcount_dec(&tbl->dmn->refcount); + kfree(tbl); + + return ret; +} + +u32 mlx5dr_table_get_id(struct mlx5dr_table *tbl) +{ + return tbl->table_id; +} + +struct mlx5dr_table *mlx5dr_table_get_from_fs_ft(struct mlx5_flow_table *ft) +{ + return ft->fs_dr_table.dr_table; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h new file mode 100644 index 000000000..1777a1e50 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h @@ -0,0 +1,1472 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2019, Mellanox Technologies */ + +#ifndef _DR_TYPES_ +#define _DR_TYPES_ + +#include <linux/mlx5/vport.h> +#include <linux/refcount.h> +#include "fs_core.h" +#include "wq.h" +#include "lib/mlx5.h" +#include "mlx5_ifc_dr.h" +#include "mlx5dr.h" +#include "dr_dbg.h" + +#define DR_RULE_MAX_STES 18 +#define DR_ACTION_MAX_STES 5 +#define DR_STE_SVLAN 0x1 +#define DR_STE_CVLAN 0x2 +#define DR_SZ_MATCH_PARAM (MLX5_ST_SZ_DW_MATCH_PARAM * 4) +#define DR_NUM_OF_FLEX_PARSERS 8 +#define DR_STE_MAX_FLEX_0_ID 3 +#define DR_STE_MAX_FLEX_1_ID 7 + +#define mlx5dr_err(dmn, arg...) mlx5_core_err((dmn)->mdev, ##arg) +#define mlx5dr_info(dmn, arg...) mlx5_core_info((dmn)->mdev, ##arg) +#define mlx5dr_dbg(dmn, arg...) mlx5_core_dbg((dmn)->mdev, ##arg) + +static inline bool dr_is_flex_parser_0_id(u8 parser_id) +{ + return parser_id <= DR_STE_MAX_FLEX_0_ID; +} + +static inline bool dr_is_flex_parser_1_id(u8 parser_id) +{ + return parser_id > DR_STE_MAX_FLEX_0_ID; +} + +enum mlx5dr_icm_chunk_size { + DR_CHUNK_SIZE_1, + DR_CHUNK_SIZE_MIN = DR_CHUNK_SIZE_1, /* keep updated when changing */ + DR_CHUNK_SIZE_2, + DR_CHUNK_SIZE_4, + DR_CHUNK_SIZE_8, + DR_CHUNK_SIZE_16, + DR_CHUNK_SIZE_32, + DR_CHUNK_SIZE_64, + DR_CHUNK_SIZE_128, + DR_CHUNK_SIZE_256, + DR_CHUNK_SIZE_512, + DR_CHUNK_SIZE_1K, + DR_CHUNK_SIZE_2K, + DR_CHUNK_SIZE_4K, + DR_CHUNK_SIZE_8K, + DR_CHUNK_SIZE_16K, + DR_CHUNK_SIZE_32K, + DR_CHUNK_SIZE_64K, + DR_CHUNK_SIZE_128K, + DR_CHUNK_SIZE_256K, + DR_CHUNK_SIZE_512K, + DR_CHUNK_SIZE_1024K, + DR_CHUNK_SIZE_2048K, + DR_CHUNK_SIZE_MAX, +}; + +enum mlx5dr_icm_type { + DR_ICM_TYPE_STE, + DR_ICM_TYPE_MODIFY_ACTION, +}; + +static inline enum mlx5dr_icm_chunk_size +mlx5dr_icm_next_higher_chunk(enum mlx5dr_icm_chunk_size chunk) +{ + chunk += 2; + if (chunk < DR_CHUNK_SIZE_MAX) + return chunk; + + return DR_CHUNK_SIZE_MAX; +} + +enum { + DR_STE_SIZE = 64, + DR_STE_SIZE_CTRL = 32, + DR_STE_SIZE_TAG = 16, + DR_STE_SIZE_MASK = 16, + DR_STE_SIZE_REDUCED = DR_STE_SIZE - DR_STE_SIZE_MASK, +}; + +enum mlx5dr_ste_ctx_action_cap { + DR_STE_CTX_ACTION_CAP_NONE = 0, + DR_STE_CTX_ACTION_CAP_TX_POP = 1 << 0, + DR_STE_CTX_ACTION_CAP_RX_PUSH = 1 << 1, + DR_STE_CTX_ACTION_CAP_RX_ENCAP = 1 << 2, + DR_STE_CTX_ACTION_CAP_POP_MDFY = 1 << 3, +}; + +enum { + DR_MODIFY_ACTION_SIZE = 8, +}; + +enum mlx5dr_matcher_criteria { + DR_MATCHER_CRITERIA_EMPTY = 0, + DR_MATCHER_CRITERIA_OUTER = 1 << 0, + DR_MATCHER_CRITERIA_MISC = 1 << 1, + DR_MATCHER_CRITERIA_INNER = 1 << 2, + DR_MATCHER_CRITERIA_MISC2 = 1 << 3, + DR_MATCHER_CRITERIA_MISC3 = 1 << 4, + DR_MATCHER_CRITERIA_MISC4 = 1 << 5, + DR_MATCHER_CRITERIA_MISC5 = 1 << 6, + DR_MATCHER_CRITERIA_MAX = 1 << 7, +}; + +enum mlx5dr_action_type { + DR_ACTION_TYP_TNL_L2_TO_L2, + DR_ACTION_TYP_L2_TO_TNL_L2, + DR_ACTION_TYP_TNL_L3_TO_L2, + DR_ACTION_TYP_L2_TO_TNL_L3, + DR_ACTION_TYP_DROP, + DR_ACTION_TYP_QP, + DR_ACTION_TYP_FT, + DR_ACTION_TYP_CTR, + DR_ACTION_TYP_TAG, + DR_ACTION_TYP_MODIFY_HDR, + DR_ACTION_TYP_VPORT, + DR_ACTION_TYP_POP_VLAN, + DR_ACTION_TYP_PUSH_VLAN, + DR_ACTION_TYP_INSERT_HDR, + DR_ACTION_TYP_REMOVE_HDR, + DR_ACTION_TYP_SAMPLER, + DR_ACTION_TYP_ASO_FLOW_METER, + DR_ACTION_TYP_MAX, +}; + +enum mlx5dr_ipv { + DR_RULE_IPV4, + DR_RULE_IPV6, + DR_RULE_IPV_MAX, +}; + +struct mlx5dr_icm_pool; +struct mlx5dr_icm_chunk; +struct mlx5dr_icm_buddy_mem; +struct mlx5dr_ste_htbl; +struct mlx5dr_match_param; +struct mlx5dr_cmd_caps; +struct mlx5dr_rule_rx_tx; +struct mlx5dr_matcher_rx_tx; +struct mlx5dr_ste_ctx; + +struct mlx5dr_ste { + /* refcount: indicates the num of rules that using this ste */ + u32 refcount; + + /* this ste is part of a rule, located in ste's chain */ + u8 ste_chain_location; + + /* attached to the miss_list head at each htbl entry */ + struct list_head miss_list_node; + + /* this ste is member of htbl */ + struct mlx5dr_ste_htbl *htbl; + + struct mlx5dr_ste_htbl *next_htbl; + + /* The rule this STE belongs to */ + struct mlx5dr_rule_rx_tx *rule_rx_tx; +}; + +struct mlx5dr_ste_htbl_ctrl { + /* total number of valid entries belonging to this hash table. This + * includes the non collision and collision entries + */ + unsigned int num_of_valid_entries; + + /* total number of collisions entries attached to this table */ + unsigned int num_of_collisions; +}; + +struct mlx5dr_ste_htbl { + u16 lu_type; + u16 byte_mask; + u32 refcount; + struct mlx5dr_icm_chunk *chunk; + struct mlx5dr_ste *pointing_ste; + struct mlx5dr_ste_htbl_ctrl ctrl; +}; + +struct mlx5dr_ste_send_info { + struct mlx5dr_ste *ste; + struct list_head send_list; + u16 size; + u16 offset; + u8 data_cont[DR_STE_SIZE]; + u8 *data; +}; + +void mlx5dr_send_fill_and_append_ste_send_info(struct mlx5dr_ste *ste, u16 size, + u16 offset, u8 *data, + struct mlx5dr_ste_send_info *ste_info, + struct list_head *send_list, + bool copy_data); + +struct mlx5dr_ste_build { + u8 inner:1; + u8 rx:1; + u8 vhca_id_valid:1; + struct mlx5dr_domain *dmn; + struct mlx5dr_cmd_caps *caps; + u16 lu_type; + u16 byte_mask; + u8 bit_mask[DR_STE_SIZE_MASK]; + int (*ste_build_tag_func)(struct mlx5dr_match_param *spec, + struct mlx5dr_ste_build *sb, + u8 *tag); +}; + +struct mlx5dr_ste_htbl * +mlx5dr_ste_htbl_alloc(struct mlx5dr_icm_pool *pool, + enum mlx5dr_icm_chunk_size chunk_size, + u16 lu_type, u16 byte_mask); + +int mlx5dr_ste_htbl_free(struct mlx5dr_ste_htbl *htbl); + +static inline void mlx5dr_htbl_put(struct mlx5dr_ste_htbl *htbl) +{ + htbl->refcount--; + if (!htbl->refcount) + mlx5dr_ste_htbl_free(htbl); +} + +static inline void mlx5dr_htbl_get(struct mlx5dr_ste_htbl *htbl) +{ + htbl->refcount++; +} + +/* STE utils */ +u32 mlx5dr_ste_calc_hash_index(u8 *hw_ste_p, struct mlx5dr_ste_htbl *htbl); +void mlx5dr_ste_set_miss_addr(struct mlx5dr_ste_ctx *ste_ctx, + u8 *hw_ste, u64 miss_addr); +void mlx5dr_ste_set_hit_addr(struct mlx5dr_ste_ctx *ste_ctx, + u8 *hw_ste, u64 icm_addr, u32 ht_size); +void mlx5dr_ste_set_hit_addr_by_next_htbl(struct mlx5dr_ste_ctx *ste_ctx, + u8 *hw_ste, + struct mlx5dr_ste_htbl *next_htbl); +void mlx5dr_ste_set_bit_mask(u8 *hw_ste_p, u8 *bit_mask); +bool mlx5dr_ste_is_last_in_rule(struct mlx5dr_matcher_rx_tx *nic_matcher, + u8 ste_location); +u64 mlx5dr_ste_get_icm_addr(struct mlx5dr_ste *ste); +u64 mlx5dr_ste_get_mr_addr(struct mlx5dr_ste *ste); +struct list_head *mlx5dr_ste_get_miss_list(struct mlx5dr_ste *ste); + +#define MLX5DR_MAX_VLANS 2 + +struct mlx5dr_ste_actions_attr { + u32 modify_index; + u16 modify_actions; + u32 decap_index; + u16 decap_actions; + u8 decap_with_vlan:1; + u64 final_icm_addr; + u32 flow_tag; + u32 ctr_id; + u16 gvmi; + u16 hit_gvmi; + struct { + u32 id; + u32 size; + u8 param_0; + u8 param_1; + } reformat; + struct { + int count; + u32 headers[MLX5DR_MAX_VLANS]; + } vlans; + + struct { + u32 obj_id; + u32 offset; + u8 dest_reg_id; + u8 init_color; + } aso_flow_meter; +}; + +void mlx5dr_ste_set_actions_rx(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_domain *dmn, + u8 *action_type_set, + u8 *last_ste, + struct mlx5dr_ste_actions_attr *attr, + u32 *added_stes); +void mlx5dr_ste_set_actions_tx(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_domain *dmn, + u8 *action_type_set, + u8 *last_ste, + struct mlx5dr_ste_actions_attr *attr, + u32 *added_stes); + +void mlx5dr_ste_set_action_set(struct mlx5dr_ste_ctx *ste_ctx, + __be64 *hw_action, + u8 hw_field, + u8 shifter, + u8 length, + u32 data); +void mlx5dr_ste_set_action_add(struct mlx5dr_ste_ctx *ste_ctx, + __be64 *hw_action, + u8 hw_field, + u8 shifter, + u8 length, + u32 data); +void mlx5dr_ste_set_action_copy(struct mlx5dr_ste_ctx *ste_ctx, + __be64 *hw_action, + u8 dst_hw_field, + u8 dst_shifter, + u8 dst_len, + u8 src_hw_field, + u8 src_shifter); +int mlx5dr_ste_set_action_decap_l3_list(struct mlx5dr_ste_ctx *ste_ctx, + void *data, + u32 data_sz, + u8 *hw_action, + u32 hw_action_sz, + u16 *used_hw_action_num); + +const struct mlx5dr_ste_action_modify_field * +mlx5dr_ste_conv_modify_hdr_sw_field(struct mlx5dr_ste_ctx *ste_ctx, u16 sw_field); + +struct mlx5dr_ste_ctx *mlx5dr_ste_get_ctx(u8 version); +void mlx5dr_ste_free(struct mlx5dr_ste *ste, + struct mlx5dr_matcher *matcher, + struct mlx5dr_matcher_rx_tx *nic_matcher); +static inline void mlx5dr_ste_put(struct mlx5dr_ste *ste, + struct mlx5dr_matcher *matcher, + struct mlx5dr_matcher_rx_tx *nic_matcher) +{ + ste->refcount--; + if (!ste->refcount) + mlx5dr_ste_free(ste, matcher, nic_matcher); +} + +/* initial as 0, increased only when ste appears in a new rule */ +static inline void mlx5dr_ste_get(struct mlx5dr_ste *ste) +{ + ste->refcount++; +} + +static inline bool mlx5dr_ste_is_not_used(struct mlx5dr_ste *ste) +{ + return !ste->refcount; +} + +bool mlx5dr_ste_equal_tag(void *src, void *dst); +int mlx5dr_ste_create_next_htbl(struct mlx5dr_matcher *matcher, + struct mlx5dr_matcher_rx_tx *nic_matcher, + struct mlx5dr_ste *ste, + u8 *cur_hw_ste, + enum mlx5dr_icm_chunk_size log_table_size); + +/* STE build functions */ +int mlx5dr_ste_build_pre_check(struct mlx5dr_domain *dmn, + u8 match_criteria, + struct mlx5dr_match_param *mask, + struct mlx5dr_match_param *value); +int mlx5dr_ste_build_ste_arr(struct mlx5dr_matcher *matcher, + struct mlx5dr_matcher_rx_tx *nic_matcher, + struct mlx5dr_match_param *value, + u8 *ste_arr); +void mlx5dr_ste_build_eth_l2_src_dst(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *builder, + struct mlx5dr_match_param *mask, + bool inner, bool rx); +void mlx5dr_ste_build_eth_l3_ipv4_5_tuple(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx); +void mlx5dr_ste_build_eth_l3_ipv4_misc(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx); +void mlx5dr_ste_build_eth_l3_ipv6_dst(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx); +void mlx5dr_ste_build_eth_l3_ipv6_src(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx); +void mlx5dr_ste_build_eth_l2_src(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx); +void mlx5dr_ste_build_eth_l2_dst(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx); +void mlx5dr_ste_build_eth_l2_tnl(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx); +void mlx5dr_ste_build_eth_ipv6_l3_l4(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx); +void mlx5dr_ste_build_eth_l4_misc(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx); +void mlx5dr_ste_build_tnl_gre(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx); +void mlx5dr_ste_build_mpls(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx); +void mlx5dr_ste_build_tnl_mpls(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx); +void mlx5dr_ste_build_tnl_mpls_over_gre(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + struct mlx5dr_cmd_caps *caps, + bool inner, bool rx); +void mlx5dr_ste_build_tnl_mpls_over_udp(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + struct mlx5dr_cmd_caps *caps, + bool inner, bool rx); +void mlx5dr_ste_build_icmp(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + struct mlx5dr_cmd_caps *caps, + bool inner, bool rx); +void mlx5dr_ste_build_tnl_vxlan_gpe(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx); +void mlx5dr_ste_build_tnl_geneve(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx); +void mlx5dr_ste_build_tnl_geneve_tlv_opt(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + struct mlx5dr_cmd_caps *caps, + bool inner, bool rx); +void mlx5dr_ste_build_tnl_geneve_tlv_opt_exist(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + struct mlx5dr_cmd_caps *caps, + bool inner, bool rx); +void mlx5dr_ste_build_tnl_gtpu(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx); +void mlx5dr_ste_build_tnl_gtpu_flex_parser_0(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + struct mlx5dr_cmd_caps *caps, + bool inner, bool rx); +void mlx5dr_ste_build_tnl_gtpu_flex_parser_1(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + struct mlx5dr_cmd_caps *caps, + bool inner, bool rx); +void mlx5dr_ste_build_tnl_header_0_1(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx); +void mlx5dr_ste_build_general_purpose(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx); +void mlx5dr_ste_build_register_0(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx); +void mlx5dr_ste_build_register_1(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx); +void mlx5dr_ste_build_src_gvmi_qpn(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + struct mlx5dr_domain *dmn, + bool inner, bool rx); +void mlx5dr_ste_build_flex_parser_0(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx); +void mlx5dr_ste_build_flex_parser_1(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx); +void mlx5dr_ste_build_empty_always_hit(struct mlx5dr_ste_build *sb, bool rx); + +/* Actions utils */ +int mlx5dr_actions_build_ste_arr(struct mlx5dr_matcher *matcher, + struct mlx5dr_matcher_rx_tx *nic_matcher, + struct mlx5dr_action *actions[], + u32 num_actions, + u8 *ste_arr, + u32 *new_hw_ste_arr_sz); + +struct mlx5dr_match_spec { + u32 smac_47_16; /* Source MAC address of incoming packet */ + /* Incoming packet Ethertype - this is the Ethertype + * following the last VLAN tag of the packet + */ + u32 smac_15_0:16; /* Source MAC address of incoming packet */ + u32 ethertype:16; + + u32 dmac_47_16; /* Destination MAC address of incoming packet */ + + u32 dmac_15_0:16; /* Destination MAC address of incoming packet */ + /* Priority of first VLAN tag in the incoming packet. + * Valid only when cvlan_tag==1 or svlan_tag==1 + */ + u32 first_prio:3; + /* CFI bit of first VLAN tag in the incoming packet. + * Valid only when cvlan_tag==1 or svlan_tag==1 + */ + u32 first_cfi:1; + /* VLAN ID of first VLAN tag in the incoming packet. + * Valid only when cvlan_tag==1 or svlan_tag==1 + */ + u32 first_vid:12; + + u32 ip_protocol:8; /* IP protocol */ + /* Differentiated Services Code Point derived from + * Traffic Class/TOS field of IPv6/v4 + */ + u32 ip_dscp:6; + /* Explicit Congestion Notification derived from + * Traffic Class/TOS field of IPv6/v4 + */ + u32 ip_ecn:2; + /* The first vlan in the packet is c-vlan (0x8100). + * cvlan_tag and svlan_tag cannot be set together + */ + u32 cvlan_tag:1; + /* The first vlan in the packet is s-vlan (0x8a88). + * cvlan_tag and svlan_tag cannot be set together + */ + u32 svlan_tag:1; + u32 frag:1; /* Packet is an IP fragment */ + u32 ip_version:4; /* IP version */ + /* TCP flags. ;Bit 0: FIN;Bit 1: SYN;Bit 2: RST;Bit 3: PSH;Bit 4: ACK; + * Bit 5: URG;Bit 6: ECE;Bit 7: CWR;Bit 8: NS + */ + u32 tcp_flags:9; + + /* TCP source port.;tcp and udp sport/dport are mutually exclusive */ + u32 tcp_sport:16; + /* TCP destination port. + * tcp and udp sport/dport are mutually exclusive + */ + u32 tcp_dport:16; + + u32 reserved_auto1:16; + u32 ipv4_ihl:4; + u32 reserved_auto2:4; + u32 ttl_hoplimit:8; + + /* UDP source port.;tcp and udp sport/dport are mutually exclusive */ + u32 udp_sport:16; + /* UDP destination port.;tcp and udp sport/dport are mutually exclusive */ + u32 udp_dport:16; + + /* IPv6 source address of incoming packets + * For IPv4 address use bits 31:0 (rest of the bits are reserved) + * This field should be qualified by an appropriate ethertype + */ + u32 src_ip_127_96; + /* IPv6 source address of incoming packets + * For IPv4 address use bits 31:0 (rest of the bits are reserved) + * This field should be qualified by an appropriate ethertype + */ + u32 src_ip_95_64; + /* IPv6 source address of incoming packets + * For IPv4 address use bits 31:0 (rest of the bits are reserved) + * This field should be qualified by an appropriate ethertype + */ + u32 src_ip_63_32; + /* IPv6 source address of incoming packets + * For IPv4 address use bits 31:0 (rest of the bits are reserved) + * This field should be qualified by an appropriate ethertype + */ + u32 src_ip_31_0; + /* IPv6 destination address of incoming packets + * For IPv4 address use bits 31:0 (rest of the bits are reserved) + * This field should be qualified by an appropriate ethertype + */ + u32 dst_ip_127_96; + /* IPv6 destination address of incoming packets + * For IPv4 address use bits 31:0 (rest of the bits are reserved) + * This field should be qualified by an appropriate ethertype + */ + u32 dst_ip_95_64; + /* IPv6 destination address of incoming packets + * For IPv4 address use bits 31:0 (rest of the bits are reserved) + * This field should be qualified by an appropriate ethertype + */ + u32 dst_ip_63_32; + /* IPv6 destination address of incoming packets + * For IPv4 address use bits 31:0 (rest of the bits are reserved) + * This field should be qualified by an appropriate ethertype + */ + u32 dst_ip_31_0; +}; + +struct mlx5dr_match_misc { + /* used with GRE, checksum exist when gre_c_present == 1 */ + u32 gre_c_present:1; + u32 reserved_auto1:1; + /* used with GRE, key exist when gre_k_present == 1 */ + u32 gre_k_present:1; + /* used with GRE, sequence number exist when gre_s_present == 1 */ + u32 gre_s_present:1; + u32 source_vhca_port:4; + u32 source_sqn:24; /* Source SQN */ + + u32 source_eswitch_owner_vhca_id:16; + /* Source port.;0xffff determines wire port */ + u32 source_port:16; + + /* Priority of second VLAN tag in the outer header of the incoming packet. + * Valid only when outer_second_cvlan_tag ==1 or outer_second_svlan_tag ==1 + */ + u32 outer_second_prio:3; + /* CFI bit of first VLAN tag in the outer header of the incoming packet. + * Valid only when outer_second_cvlan_tag ==1 or outer_second_svlan_tag ==1 + */ + u32 outer_second_cfi:1; + /* VLAN ID of first VLAN tag the outer header of the incoming packet. + * Valid only when outer_second_cvlan_tag ==1 or outer_second_svlan_tag ==1 + */ + u32 outer_second_vid:12; + /* Priority of second VLAN tag in the inner header of the incoming packet. + * Valid only when inner_second_cvlan_tag ==1 or inner_second_svlan_tag ==1 + */ + u32 inner_second_prio:3; + /* CFI bit of first VLAN tag in the inner header of the incoming packet. + * Valid only when inner_second_cvlan_tag ==1 or inner_second_svlan_tag ==1 + */ + u32 inner_second_cfi:1; + /* VLAN ID of first VLAN tag the inner header of the incoming packet. + * Valid only when inner_second_cvlan_tag ==1 or inner_second_svlan_tag ==1 + */ + u32 inner_second_vid:12; + + u32 outer_second_cvlan_tag:1; + u32 inner_second_cvlan_tag:1; + /* The second vlan in the outer header of the packet is c-vlan (0x8100). + * outer_second_cvlan_tag and outer_second_svlan_tag cannot be set together + */ + u32 outer_second_svlan_tag:1; + /* The second vlan in the inner header of the packet is c-vlan (0x8100). + * inner_second_cvlan_tag and inner_second_svlan_tag cannot be set together + */ + u32 inner_second_svlan_tag:1; + /* The second vlan in the outer header of the packet is s-vlan (0x8a88). + * outer_second_cvlan_tag and outer_second_svlan_tag cannot be set together + */ + u32 reserved_auto2:12; + /* The second vlan in the inner header of the packet is s-vlan (0x8a88). + * inner_second_cvlan_tag and inner_second_svlan_tag cannot be set together + */ + u32 gre_protocol:16; /* GRE Protocol (outer) */ + + u32 gre_key_h:24; /* GRE Key[31:8] (outer) */ + u32 gre_key_l:8; /* GRE Key [7:0] (outer) */ + + u32 vxlan_vni:24; /* VXLAN VNI (outer) */ + u32 reserved_auto3:8; + + u32 geneve_vni:24; /* GENEVE VNI field (outer) */ + u32 reserved_auto4:6; + u32 geneve_tlv_option_0_exist:1; + u32 geneve_oam:1; /* GENEVE OAM field (outer) */ + + u32 reserved_auto5:12; + u32 outer_ipv6_flow_label:20; /* Flow label of incoming IPv6 packet (outer) */ + + u32 reserved_auto6:12; + u32 inner_ipv6_flow_label:20; /* Flow label of incoming IPv6 packet (inner) */ + + u32 reserved_auto7:10; + u32 geneve_opt_len:6; /* GENEVE OptLen (outer) */ + u32 geneve_protocol_type:16; /* GENEVE protocol type (outer) */ + + u32 reserved_auto8:8; + u32 bth_dst_qp:24; /* Destination QP in BTH header */ + + u32 reserved_auto9; + u32 outer_esp_spi; + u32 reserved_auto10[3]; +}; + +struct mlx5dr_match_misc2 { + u32 outer_first_mpls_label:20; /* First MPLS LABEL (outer) */ + u32 outer_first_mpls_exp:3; /* First MPLS EXP (outer) */ + u32 outer_first_mpls_s_bos:1; /* First MPLS S_BOS (outer) */ + u32 outer_first_mpls_ttl:8; /* First MPLS TTL (outer) */ + + u32 inner_first_mpls_label:20; /* First MPLS LABEL (inner) */ + u32 inner_first_mpls_exp:3; /* First MPLS EXP (inner) */ + u32 inner_first_mpls_s_bos:1; /* First MPLS S_BOS (inner) */ + u32 inner_first_mpls_ttl:8; /* First MPLS TTL (inner) */ + + u32 outer_first_mpls_over_gre_label:20; /* last MPLS LABEL (outer) */ + u32 outer_first_mpls_over_gre_exp:3; /* last MPLS EXP (outer) */ + u32 outer_first_mpls_over_gre_s_bos:1; /* last MPLS S_BOS (outer) */ + u32 outer_first_mpls_over_gre_ttl:8; /* last MPLS TTL (outer) */ + + u32 outer_first_mpls_over_udp_label:20; /* last MPLS LABEL (outer) */ + u32 outer_first_mpls_over_udp_exp:3; /* last MPLS EXP (outer) */ + u32 outer_first_mpls_over_udp_s_bos:1; /* last MPLS S_BOS (outer) */ + u32 outer_first_mpls_over_udp_ttl:8; /* last MPLS TTL (outer) */ + + u32 metadata_reg_c_7; /* metadata_reg_c_7 */ + u32 metadata_reg_c_6; /* metadata_reg_c_6 */ + u32 metadata_reg_c_5; /* metadata_reg_c_5 */ + u32 metadata_reg_c_4; /* metadata_reg_c_4 */ + u32 metadata_reg_c_3; /* metadata_reg_c_3 */ + u32 metadata_reg_c_2; /* metadata_reg_c_2 */ + u32 metadata_reg_c_1; /* metadata_reg_c_1 */ + u32 metadata_reg_c_0; /* metadata_reg_c_0 */ + u32 metadata_reg_a; /* metadata_reg_a */ + u32 reserved_auto1[3]; +}; + +struct mlx5dr_match_misc3 { + u32 inner_tcp_seq_num; + u32 outer_tcp_seq_num; + u32 inner_tcp_ack_num; + u32 outer_tcp_ack_num; + + u32 reserved_auto1:8; + u32 outer_vxlan_gpe_vni:24; + + u32 outer_vxlan_gpe_next_protocol:8; + u32 outer_vxlan_gpe_flags:8; + u32 reserved_auto2:16; + + u32 icmpv4_header_data; + u32 icmpv6_header_data; + + u8 icmpv4_type; + u8 icmpv4_code; + u8 icmpv6_type; + u8 icmpv6_code; + + u32 geneve_tlv_option_0_data; + + u32 gtpu_teid; + + u8 gtpu_msg_type; + u8 gtpu_msg_flags; + u32 reserved_auto3:16; + + u32 gtpu_dw_2; + u32 gtpu_first_ext_dw_0; + u32 gtpu_dw_0; + u32 reserved_auto4; +}; + +struct mlx5dr_match_misc4 { + u32 prog_sample_field_value_0; + u32 prog_sample_field_id_0; + u32 prog_sample_field_value_1; + u32 prog_sample_field_id_1; + u32 prog_sample_field_value_2; + u32 prog_sample_field_id_2; + u32 prog_sample_field_value_3; + u32 prog_sample_field_id_3; + u32 reserved_auto1[8]; +}; + +struct mlx5dr_match_misc5 { + u32 macsec_tag_0; + u32 macsec_tag_1; + u32 macsec_tag_2; + u32 macsec_tag_3; + u32 tunnel_header_0; + u32 tunnel_header_1; + u32 tunnel_header_2; + u32 tunnel_header_3; +}; + +struct mlx5dr_match_param { + struct mlx5dr_match_spec outer; + struct mlx5dr_match_misc misc; + struct mlx5dr_match_spec inner; + struct mlx5dr_match_misc2 misc2; + struct mlx5dr_match_misc3 misc3; + struct mlx5dr_match_misc4 misc4; + struct mlx5dr_match_misc5 misc5; +}; + +#define DR_MASK_IS_ICMPV4_SET(_misc3) ((_misc3)->icmpv4_type || \ + (_misc3)->icmpv4_code || \ + (_misc3)->icmpv4_header_data) + +#define DR_MASK_IS_SRC_IP_SET(_spec) ((_spec)->src_ip_127_96 || \ + (_spec)->src_ip_95_64 || \ + (_spec)->src_ip_63_32 || \ + (_spec)->src_ip_31_0) + +#define DR_MASK_IS_DST_IP_SET(_spec) ((_spec)->dst_ip_127_96 || \ + (_spec)->dst_ip_95_64 || \ + (_spec)->dst_ip_63_32 || \ + (_spec)->dst_ip_31_0) + +struct mlx5dr_esw_caps { + u64 drop_icm_address_rx; + u64 drop_icm_address_tx; + u64 uplink_icm_address_rx; + u64 uplink_icm_address_tx; + u8 sw_owner:1; + u8 sw_owner_v2:1; +}; + +struct mlx5dr_cmd_vport_cap { + u16 vport_gvmi; + u16 vhca_gvmi; + u16 num; + u64 icm_address_rx; + u64 icm_address_tx; +}; + +struct mlx5dr_roce_cap { + u8 roce_en:1; + u8 fl_rc_qp_when_roce_disabled:1; + u8 fl_rc_qp_when_roce_enabled:1; +}; + +struct mlx5dr_vports { + struct mlx5dr_cmd_vport_cap esw_manager_caps; + struct mlx5dr_cmd_vport_cap uplink_caps; + struct xarray vports_caps_xa; +}; + +struct mlx5dr_cmd_caps { + u16 gvmi; + u64 nic_rx_drop_address; + u64 nic_tx_drop_address; + u64 nic_tx_allow_address; + u64 esw_rx_drop_address; + u64 esw_tx_drop_address; + u32 log_icm_size; + u64 hdr_modify_icm_addr; + u32 flex_protocols; + u8 flex_parser_id_icmp_dw0; + u8 flex_parser_id_icmp_dw1; + u8 flex_parser_id_icmpv6_dw0; + u8 flex_parser_id_icmpv6_dw1; + u8 flex_parser_id_geneve_tlv_option_0; + u8 flex_parser_id_mpls_over_gre; + u8 flex_parser_id_mpls_over_udp; + u8 flex_parser_id_gtpu_dw_0; + u8 flex_parser_id_gtpu_teid; + u8 flex_parser_id_gtpu_dw_2; + u8 flex_parser_id_gtpu_first_ext_dw_0; + u8 flex_parser_ok_bits_supp; + u8 max_ft_level; + u16 roce_min_src_udp; + u8 sw_format_ver; + bool eswitch_manager; + bool rx_sw_owner; + bool tx_sw_owner; + bool fdb_sw_owner; + u8 rx_sw_owner_v2:1; + u8 tx_sw_owner_v2:1; + u8 fdb_sw_owner_v2:1; + struct mlx5dr_esw_caps esw_caps; + struct mlx5dr_vports vports; + bool prio_tag_required; + struct mlx5dr_roce_cap roce_caps; + u8 is_ecpf:1; + u8 isolate_vl_tc:1; +}; + +enum mlx5dr_domain_nic_type { + DR_DOMAIN_NIC_TYPE_RX, + DR_DOMAIN_NIC_TYPE_TX, +}; + +struct mlx5dr_domain_rx_tx { + u64 drop_icm_addr; + u64 default_icm_addr; + enum mlx5dr_domain_nic_type type; + struct mutex mutex; /* protect rx/tx domain */ +}; + +struct mlx5dr_domain_info { + bool supp_sw_steering; + u32 max_inline_size; + u32 max_send_wr; + u32 max_log_sw_icm_sz; + u32 max_log_action_icm_sz; + struct mlx5dr_domain_rx_tx rx; + struct mlx5dr_domain_rx_tx tx; + struct mlx5dr_cmd_caps caps; +}; + +struct mlx5dr_domain { + struct mlx5dr_domain *peer_dmn; + struct mlx5_core_dev *mdev; + u32 pdn; + struct mlx5_uars_page *uar; + enum mlx5dr_domain_type type; + refcount_t refcount; + struct mlx5dr_icm_pool *ste_icm_pool; + struct mlx5dr_icm_pool *action_icm_pool; + struct mlx5dr_send_ring *send_ring; + struct mlx5dr_domain_info info; + struct xarray csum_fts_xa; + struct mlx5dr_ste_ctx *ste_ctx; + struct list_head dbg_tbl_list; + struct mlx5dr_dbg_dump_info dump_info; +}; + +struct mlx5dr_table_rx_tx { + struct mlx5dr_ste_htbl *s_anchor; + struct mlx5dr_domain_rx_tx *nic_dmn; + u64 default_icm_addr; + struct list_head nic_matcher_list; +}; + +struct mlx5dr_table { + struct mlx5dr_domain *dmn; + struct mlx5dr_table_rx_tx rx; + struct mlx5dr_table_rx_tx tx; + u32 level; + u32 table_type; + u32 table_id; + u32 flags; + struct list_head matcher_list; + struct mlx5dr_action *miss_action; + refcount_t refcount; + struct list_head dbg_node; +}; + +struct mlx5dr_matcher_rx_tx { + struct mlx5dr_ste_htbl *s_htbl; + struct mlx5dr_ste_htbl *e_anchor; + struct mlx5dr_ste_build *ste_builder; + struct mlx5dr_ste_build ste_builder_arr[DR_RULE_IPV_MAX] + [DR_RULE_IPV_MAX] + [DR_RULE_MAX_STES]; + u8 num_of_builders; + u8 num_of_builders_arr[DR_RULE_IPV_MAX][DR_RULE_IPV_MAX]; + u64 default_icm_addr; + struct mlx5dr_table_rx_tx *nic_tbl; + u32 prio; + struct list_head list_node; + u32 rules; +}; + +struct mlx5dr_matcher { + struct mlx5dr_table *tbl; + struct mlx5dr_matcher_rx_tx rx; + struct mlx5dr_matcher_rx_tx tx; + struct list_head list_node; /* Used for both matchers and dbg managing */ + u32 prio; + struct mlx5dr_match_param mask; + u8 match_criteria; + refcount_t refcount; + struct list_head dbg_rule_list; +}; + +struct mlx5dr_ste_action_modify_field { + u16 hw_field; + u8 start; + u8 end; + u8 l3_type; + u8 l4_type; +}; + +struct mlx5dr_action_rewrite { + struct mlx5dr_domain *dmn; + struct mlx5dr_icm_chunk *chunk; + u8 *data; + u16 num_of_actions; + u32 index; + u8 allow_rx:1; + u8 allow_tx:1; + u8 modify_ttl:1; +}; + +struct mlx5dr_action_reformat { + struct mlx5dr_domain *dmn; + u32 id; + u32 size; + u8 param_0; + u8 param_1; +}; + +struct mlx5dr_action_sampler { + struct mlx5dr_domain *dmn; + u64 rx_icm_addr; + u64 tx_icm_addr; + u32 sampler_id; +}; + +struct mlx5dr_action_dest_tbl { + u8 is_fw_tbl:1; + union { + struct mlx5dr_table *tbl; + struct { + struct mlx5dr_domain *dmn; + u32 id; + u32 group_id; + enum fs_flow_table_type type; + u64 rx_icm_addr; + u64 tx_icm_addr; + struct mlx5dr_action **ref_actions; + u32 num_of_ref_actions; + } fw_tbl; + }; +}; + +struct mlx5dr_action_ctr { + u32 ctr_id; + u32 offset; +}; + +struct mlx5dr_action_vport { + struct mlx5dr_domain *dmn; + struct mlx5dr_cmd_vport_cap *caps; +}; + +struct mlx5dr_action_push_vlan { + u32 vlan_hdr; /* tpid_pcp_dei_vid */ +}; + +struct mlx5dr_action_flow_tag { + u32 flow_tag; +}; + +struct mlx5dr_rule_action_member { + struct mlx5dr_action *action; + struct list_head list; +}; + +struct mlx5dr_action_aso_flow_meter { + struct mlx5dr_domain *dmn; + u32 obj_id; + u32 offset; + u8 dest_reg_id; + u8 init_color; +}; + +struct mlx5dr_action { + enum mlx5dr_action_type action_type; + refcount_t refcount; + + union { + void *data; + struct mlx5dr_action_rewrite *rewrite; + struct mlx5dr_action_reformat *reformat; + struct mlx5dr_action_sampler *sampler; + struct mlx5dr_action_dest_tbl *dest_tbl; + struct mlx5dr_action_ctr *ctr; + struct mlx5dr_action_vport *vport; + struct mlx5dr_action_push_vlan *push_vlan; + struct mlx5dr_action_flow_tag *flow_tag; + struct mlx5dr_action_aso_flow_meter *aso; + }; +}; + +enum mlx5dr_connect_type { + CONNECT_HIT = 1, + CONNECT_MISS = 2, +}; + +struct mlx5dr_htbl_connect_info { + enum mlx5dr_connect_type type; + union { + struct mlx5dr_ste_htbl *hit_next_htbl; + u64 miss_icm_addr; + }; +}; + +struct mlx5dr_rule_rx_tx { + struct mlx5dr_matcher_rx_tx *nic_matcher; + struct mlx5dr_ste *last_rule_ste; +}; + +struct mlx5dr_rule { + struct mlx5dr_matcher *matcher; + struct mlx5dr_rule_rx_tx rx; + struct mlx5dr_rule_rx_tx tx; + struct list_head rule_actions_list; + struct list_head dbg_node; + u32 flow_source; +}; + +void mlx5dr_rule_set_last_member(struct mlx5dr_rule_rx_tx *nic_rule, + struct mlx5dr_ste *ste, + bool force); +int mlx5dr_rule_get_reverse_rule_members(struct mlx5dr_ste **ste_arr, + struct mlx5dr_ste *curr_ste, + int *num_of_stes); + +struct mlx5dr_icm_chunk { + struct mlx5dr_icm_buddy_mem *buddy_mem; + struct list_head chunk_list; + + /* indicates the index of this chunk in the whole memory, + * used for deleting the chunk from the buddy + */ + unsigned int seg; + enum mlx5dr_icm_chunk_size size; + + /* Memory optimisation */ + struct mlx5dr_ste *ste_arr; + u8 *hw_ste_arr; + struct list_head *miss_list; +}; + +static inline void mlx5dr_domain_nic_lock(struct mlx5dr_domain_rx_tx *nic_dmn) +{ + mutex_lock(&nic_dmn->mutex); +} + +static inline void mlx5dr_domain_nic_unlock(struct mlx5dr_domain_rx_tx *nic_dmn) +{ + mutex_unlock(&nic_dmn->mutex); +} + +static inline void mlx5dr_domain_lock(struct mlx5dr_domain *dmn) +{ + mlx5dr_domain_nic_lock(&dmn->info.rx); + mlx5dr_domain_nic_lock(&dmn->info.tx); +} + +static inline void mlx5dr_domain_unlock(struct mlx5dr_domain *dmn) +{ + mlx5dr_domain_nic_unlock(&dmn->info.tx); + mlx5dr_domain_nic_unlock(&dmn->info.rx); +} + +int mlx5dr_matcher_add_to_tbl_nic(struct mlx5dr_domain *dmn, + struct mlx5dr_matcher_rx_tx *nic_matcher); +int mlx5dr_matcher_remove_from_tbl_nic(struct mlx5dr_domain *dmn, + struct mlx5dr_matcher_rx_tx *nic_matcher); + +int mlx5dr_matcher_select_builders(struct mlx5dr_matcher *matcher, + struct mlx5dr_matcher_rx_tx *nic_matcher, + enum mlx5dr_ipv outer_ipv, + enum mlx5dr_ipv inner_ipv); + +u64 mlx5dr_icm_pool_get_chunk_mr_addr(struct mlx5dr_icm_chunk *chunk); +u32 mlx5dr_icm_pool_get_chunk_rkey(struct mlx5dr_icm_chunk *chunk); +u64 mlx5dr_icm_pool_get_chunk_icm_addr(struct mlx5dr_icm_chunk *chunk); +u32 mlx5dr_icm_pool_get_chunk_num_of_entries(struct mlx5dr_icm_chunk *chunk); +u32 mlx5dr_icm_pool_get_chunk_byte_size(struct mlx5dr_icm_chunk *chunk); +u8 *mlx5dr_ste_get_hw_ste(struct mlx5dr_ste *ste); + +static inline int +mlx5dr_icm_pool_dm_type_to_entry_size(enum mlx5dr_icm_type icm_type) +{ + if (icm_type == DR_ICM_TYPE_STE) + return DR_STE_SIZE; + + return DR_MODIFY_ACTION_SIZE; +} + +static inline u32 +mlx5dr_icm_pool_chunk_size_to_entries(enum mlx5dr_icm_chunk_size chunk_size) +{ + return 1 << chunk_size; +} + +static inline int +mlx5dr_icm_pool_chunk_size_to_byte(enum mlx5dr_icm_chunk_size chunk_size, + enum mlx5dr_icm_type icm_type) +{ + int num_of_entries; + int entry_size; + + entry_size = mlx5dr_icm_pool_dm_type_to_entry_size(icm_type); + num_of_entries = mlx5dr_icm_pool_chunk_size_to_entries(chunk_size); + + return entry_size * num_of_entries; +} + +static inline int +mlx5dr_ste_htbl_increase_threshold(struct mlx5dr_ste_htbl *htbl) +{ + int num_of_entries = + mlx5dr_icm_pool_chunk_size_to_entries(htbl->chunk->size); + + /* Threshold is 50%, one is added to table of size 1 */ + return (num_of_entries + 1) / 2; +} + +static inline bool +mlx5dr_ste_htbl_may_grow(struct mlx5dr_ste_htbl *htbl) +{ + if (htbl->chunk->size == DR_CHUNK_SIZE_MAX - 1 || !htbl->byte_mask) + return false; + + return true; +} + +struct mlx5dr_cmd_vport_cap * +mlx5dr_domain_get_vport_cap(struct mlx5dr_domain *dmn, u16 vport); + +struct mlx5dr_cmd_query_flow_table_details { + u8 status; + u8 level; + u64 sw_owner_icm_root_1; + u64 sw_owner_icm_root_0; +}; + +struct mlx5dr_cmd_create_flow_table_attr { + u32 table_type; + u16 uid; + u64 icm_addr_rx; + u64 icm_addr_tx; + u8 level; + bool sw_owner; + bool term_tbl; + bool decap_en; + bool reformat_en; +}; + +/* internal API functions */ +int mlx5dr_cmd_query_device(struct mlx5_core_dev *mdev, + struct mlx5dr_cmd_caps *caps); +int mlx5dr_cmd_query_esw_vport_context(struct mlx5_core_dev *mdev, + bool other_vport, u16 vport_number, + u64 *icm_address_rx, + u64 *icm_address_tx); +int mlx5dr_cmd_query_gvmi(struct mlx5_core_dev *mdev, + bool other_vport, u16 vport_number, u16 *gvmi); +int mlx5dr_cmd_query_esw_caps(struct mlx5_core_dev *mdev, + struct mlx5dr_esw_caps *caps); +int mlx5dr_cmd_query_flow_sampler(struct mlx5_core_dev *dev, + u32 sampler_id, + u64 *rx_icm_addr, + u64 *tx_icm_addr); +int mlx5dr_cmd_sync_steering(struct mlx5_core_dev *mdev); +int mlx5dr_cmd_set_fte_modify_and_vport(struct mlx5_core_dev *mdev, + u32 table_type, + u32 table_id, + u32 group_id, + u32 modify_header_id, + u16 vport_id); +int mlx5dr_cmd_del_flow_table_entry(struct mlx5_core_dev *mdev, + u32 table_type, + u32 table_id); +int mlx5dr_cmd_alloc_modify_header(struct mlx5_core_dev *mdev, + u32 table_type, + u8 num_of_actions, + u64 *actions, + u32 *modify_header_id); +int mlx5dr_cmd_dealloc_modify_header(struct mlx5_core_dev *mdev, + u32 modify_header_id); +int mlx5dr_cmd_create_empty_flow_group(struct mlx5_core_dev *mdev, + u32 table_type, + u32 table_id, + u32 *group_id); +int mlx5dr_cmd_destroy_flow_group(struct mlx5_core_dev *mdev, + u32 table_type, + u32 table_id, + u32 group_id); +int mlx5dr_cmd_create_flow_table(struct mlx5_core_dev *mdev, + struct mlx5dr_cmd_create_flow_table_attr *attr, + u64 *fdb_rx_icm_addr, + u32 *table_id); +int mlx5dr_cmd_destroy_flow_table(struct mlx5_core_dev *mdev, + u32 table_id, + u32 table_type); +int mlx5dr_cmd_query_flow_table(struct mlx5_core_dev *dev, + enum fs_flow_table_type type, + u32 table_id, + struct mlx5dr_cmd_query_flow_table_details *output); +int mlx5dr_cmd_create_reformat_ctx(struct mlx5_core_dev *mdev, + enum mlx5_reformat_ctx_type rt, + u8 reformat_param_0, + u8 reformat_param_1, + size_t reformat_size, + void *reformat_data, + u32 *reformat_id); +void mlx5dr_cmd_destroy_reformat_ctx(struct mlx5_core_dev *mdev, + u32 reformat_id); + +struct mlx5dr_cmd_gid_attr { + u8 gid[16]; + u8 mac[6]; + u32 roce_ver; +}; + +int mlx5dr_cmd_query_gid(struct mlx5_core_dev *mdev, u8 vhca_port_num, + u16 index, struct mlx5dr_cmd_gid_attr *attr); + +struct mlx5dr_icm_pool *mlx5dr_icm_pool_create(struct mlx5dr_domain *dmn, + enum mlx5dr_icm_type icm_type); +void mlx5dr_icm_pool_destroy(struct mlx5dr_icm_pool *pool); + +struct mlx5dr_icm_chunk * +mlx5dr_icm_alloc_chunk(struct mlx5dr_icm_pool *pool, + enum mlx5dr_icm_chunk_size chunk_size); +void mlx5dr_icm_free_chunk(struct mlx5dr_icm_chunk *chunk); + +void mlx5dr_ste_prepare_for_postsend(struct mlx5dr_ste_ctx *ste_ctx, + u8 *hw_ste_p, u32 ste_size); +int mlx5dr_ste_htbl_init_and_postsend(struct mlx5dr_domain *dmn, + struct mlx5dr_domain_rx_tx *nic_dmn, + struct mlx5dr_ste_htbl *htbl, + struct mlx5dr_htbl_connect_info *connect_info, + bool update_hw_ste); +void mlx5dr_ste_set_formatted_ste(struct mlx5dr_ste_ctx *ste_ctx, + u16 gvmi, + enum mlx5dr_domain_nic_type nic_type, + struct mlx5dr_ste_htbl *htbl, + u8 *formatted_ste, + struct mlx5dr_htbl_connect_info *connect_info); +void mlx5dr_ste_copy_param(u8 match_criteria, + struct mlx5dr_match_param *set_param, + struct mlx5dr_match_parameters *mask, + bool clear); + +struct mlx5dr_qp { + struct mlx5_core_dev *mdev; + struct mlx5_wq_qp wq; + struct mlx5_uars_page *uar; + struct mlx5_wq_ctrl wq_ctrl; + u32 qpn; + struct { + unsigned int pc; + unsigned int cc; + unsigned int size; + unsigned int *wqe_head; + unsigned int wqe_cnt; + } sq; + struct { + unsigned int pc; + unsigned int cc; + unsigned int size; + unsigned int wqe_cnt; + } rq; + int max_inline_data; +}; + +struct mlx5dr_cq { + struct mlx5_core_dev *mdev; + struct mlx5_cqwq wq; + struct mlx5_wq_ctrl wq_ctrl; + struct mlx5_core_cq mcq; + struct mlx5dr_qp *qp; +}; + +struct mlx5dr_mr { + struct mlx5_core_dev *mdev; + u32 mkey; + dma_addr_t dma_addr; + void *addr; + size_t size; +}; + +#define MAX_SEND_CQE 64 +#define MIN_READ_SYNC 64 + +struct mlx5dr_send_ring { + struct mlx5dr_cq *cq; + struct mlx5dr_qp *qp; + struct mlx5dr_mr *mr; + /* How much wqes are waiting for completion */ + u32 pending_wqe; + /* Signal request per this trash hold value */ + u16 signal_th; + /* Each post_send_size less than max_post_send_size */ + u32 max_post_send_size; + /* manage the send queue */ + u32 tx_head; + void *buf; + u32 buf_size; + u8 sync_buff[MIN_READ_SYNC]; + struct mlx5dr_mr *sync_mr; + spinlock_t lock; /* Protect the data path of the send ring */ + bool err_state; /* send_ring is not usable in err state */ +}; + +int mlx5dr_send_ring_alloc(struct mlx5dr_domain *dmn); +void mlx5dr_send_ring_free(struct mlx5dr_domain *dmn, + struct mlx5dr_send_ring *send_ring); +int mlx5dr_send_ring_force_drain(struct mlx5dr_domain *dmn); +int mlx5dr_send_postsend_ste(struct mlx5dr_domain *dmn, + struct mlx5dr_ste *ste, + u8 *data, + u16 size, + u16 offset); +int mlx5dr_send_postsend_htbl(struct mlx5dr_domain *dmn, + struct mlx5dr_ste_htbl *htbl, + u8 *formatted_ste, u8 *mask); +int mlx5dr_send_postsend_formatted_htbl(struct mlx5dr_domain *dmn, + struct mlx5dr_ste_htbl *htbl, + u8 *ste_init_data, + bool update_hw_ste); +int mlx5dr_send_postsend_action(struct mlx5dr_domain *dmn, + struct mlx5dr_action *action); + +struct mlx5dr_cmd_ft_info { + u32 id; + u16 vport; + enum fs_flow_table_type type; +}; + +struct mlx5dr_cmd_flow_destination_hw_info { + enum mlx5_flow_destination_type type; + union { + u32 tir_num; + u32 ft_num; + u32 ft_id; + u32 counter_id; + u32 sampler_id; + struct { + u16 num; + u16 vhca_id; + u32 reformat_id; + u8 flags; + } vport; + }; +}; + +struct mlx5dr_cmd_fte_info { + u32 dests_size; + u32 index; + struct mlx5_flow_context flow_context; + u32 *val; + struct mlx5_flow_act action; + struct mlx5dr_cmd_flow_destination_hw_info *dest_arr; + bool ignore_flow_level; +}; + +int mlx5dr_cmd_set_fte(struct mlx5_core_dev *dev, + int opmod, int modify_mask, + struct mlx5dr_cmd_ft_info *ft, + u32 group_id, + struct mlx5dr_cmd_fte_info *fte); + +bool mlx5dr_ste_supp_ttl_cs_recalc(struct mlx5dr_cmd_caps *caps); + +struct mlx5dr_fw_recalc_cs_ft { + u64 rx_icm_addr; + u32 table_id; + u32 group_id; + u32 modify_hdr_id; +}; + +struct mlx5dr_fw_recalc_cs_ft * +mlx5dr_fw_create_recalc_cs_ft(struct mlx5dr_domain *dmn, u16 vport_num); +void mlx5dr_fw_destroy_recalc_cs_ft(struct mlx5dr_domain *dmn, + struct mlx5dr_fw_recalc_cs_ft *recalc_cs_ft); +int mlx5dr_domain_get_recalc_cs_ft_addr(struct mlx5dr_domain *dmn, + u16 vport_num, + u64 *rx_icm_addr); +int mlx5dr_fw_create_md_tbl(struct mlx5dr_domain *dmn, + struct mlx5dr_cmd_flow_destination_hw_info *dest, + int num_dest, + bool reformat_req, + u32 *tbl_id, + u32 *group_id, + bool ignore_flow_level, + u32 flow_source); +void mlx5dr_fw_destroy_md_tbl(struct mlx5dr_domain *dmn, u32 tbl_id, + u32 group_id); +#endif /* _DR_TYPES_H_ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c new file mode 100644 index 000000000..13b6d4721 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c @@ -0,0 +1,820 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2019 Mellanox Technologies */ + +#include <linux/mlx5/vport.h> +#include "mlx5_core.h" +#include "fs_core.h" +#include "fs_cmd.h" +#include "mlx5dr.h" +#include "fs_dr.h" + +static bool mlx5_dr_is_fw_table(u32 flags) +{ + if (flags & MLX5_FLOW_TABLE_TERMINATION) + return true; + + return false; +} + +static int mlx5_cmd_dr_update_root_ft(struct mlx5_flow_root_namespace *ns, + struct mlx5_flow_table *ft, + u32 underlay_qpn, + bool disconnect) +{ + return mlx5_fs_cmd_get_fw_cmds()->update_root_ft(ns, ft, underlay_qpn, + disconnect); +} + +static int set_miss_action(struct mlx5_flow_root_namespace *ns, + struct mlx5_flow_table *ft, + struct mlx5_flow_table *next_ft) +{ + struct mlx5dr_action *old_miss_action; + struct mlx5dr_action *action = NULL; + struct mlx5dr_table *next_tbl; + int err; + + next_tbl = next_ft ? next_ft->fs_dr_table.dr_table : NULL; + if (next_tbl) { + action = mlx5dr_action_create_dest_table(next_tbl); + if (!action) + return -EINVAL; + } + old_miss_action = ft->fs_dr_table.miss_action; + err = mlx5dr_table_set_miss_action(ft->fs_dr_table.dr_table, action); + if (err && action) { + err = mlx5dr_action_destroy(action); + if (err) + mlx5_core_err(ns->dev, + "Failed to destroy action (%d)\n", err); + action = NULL; + } + ft->fs_dr_table.miss_action = action; + if (old_miss_action) { + err = mlx5dr_action_destroy(old_miss_action); + if (err) + mlx5_core_err(ns->dev, "Failed to destroy action (%d)\n", + err); + } + + return err; +} + +static int mlx5_cmd_dr_create_flow_table(struct mlx5_flow_root_namespace *ns, + struct mlx5_flow_table *ft, + struct mlx5_flow_table_attr *ft_attr, + struct mlx5_flow_table *next_ft) +{ + struct mlx5dr_table *tbl; + u32 flags; + int err; + + if (mlx5_dr_is_fw_table(ft->flags)) + return mlx5_fs_cmd_get_fw_cmds()->create_flow_table(ns, ft, + ft_attr, + next_ft); + flags = ft->flags; + /* turn off encap/decap if not supported for sw-str by fw */ + if (!MLX5_CAP_FLOWTABLE(ns->dev, sw_owner_reformat_supported)) + flags = ft->flags & ~(MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT | + MLX5_FLOW_TABLE_TUNNEL_EN_DECAP); + + tbl = mlx5dr_table_create(ns->fs_dr_domain.dr_domain, ft->level, flags, + ft_attr->uid); + if (!tbl) { + mlx5_core_err(ns->dev, "Failed creating dr flow_table\n"); + return -EINVAL; + } + + ft->fs_dr_table.dr_table = tbl; + ft->id = mlx5dr_table_get_id(tbl); + + if (next_ft) { + err = set_miss_action(ns, ft, next_ft); + if (err) { + mlx5dr_table_destroy(tbl); + ft->fs_dr_table.dr_table = NULL; + return err; + } + } + + ft->max_fte = INT_MAX; + + return 0; +} + +static int mlx5_cmd_dr_destroy_flow_table(struct mlx5_flow_root_namespace *ns, + struct mlx5_flow_table *ft) +{ + struct mlx5dr_action *action = ft->fs_dr_table.miss_action; + int err; + + if (mlx5_dr_is_fw_table(ft->flags)) + return mlx5_fs_cmd_get_fw_cmds()->destroy_flow_table(ns, ft); + + err = mlx5dr_table_destroy(ft->fs_dr_table.dr_table); + if (err) { + mlx5_core_err(ns->dev, "Failed to destroy flow_table (%d)\n", + err); + return err; + } + if (action) { + err = mlx5dr_action_destroy(action); + if (err) { + mlx5_core_err(ns->dev, "Failed to destroy action(%d)\n", + err); + return err; + } + } + + return err; +} + +static int mlx5_cmd_dr_modify_flow_table(struct mlx5_flow_root_namespace *ns, + struct mlx5_flow_table *ft, + struct mlx5_flow_table *next_ft) +{ + if (mlx5_dr_is_fw_table(ft->flags)) + return mlx5_fs_cmd_get_fw_cmds()->modify_flow_table(ns, ft, next_ft); + + return set_miss_action(ns, ft, next_ft); +} + +static int mlx5_cmd_dr_create_flow_group(struct mlx5_flow_root_namespace *ns, + struct mlx5_flow_table *ft, + u32 *in, + struct mlx5_flow_group *fg) +{ + struct mlx5dr_matcher *matcher; + u32 priority = MLX5_GET(create_flow_group_in, in, + start_flow_index); + u8 match_criteria_enable = MLX5_GET(create_flow_group_in, + in, + match_criteria_enable); + struct mlx5dr_match_parameters mask; + + if (mlx5_dr_is_fw_table(ft->flags)) + return mlx5_fs_cmd_get_fw_cmds()->create_flow_group(ns, ft, in, + fg); + + mask.match_buf = MLX5_ADDR_OF(create_flow_group_in, + in, match_criteria); + mask.match_sz = sizeof(fg->mask.match_criteria); + + matcher = mlx5dr_matcher_create(ft->fs_dr_table.dr_table, + priority, + match_criteria_enable, + &mask); + if (!matcher) { + mlx5_core_err(ns->dev, "Failed creating matcher\n"); + return -EINVAL; + } + + fg->fs_dr_matcher.dr_matcher = matcher; + return 0; +} + +static int mlx5_cmd_dr_destroy_flow_group(struct mlx5_flow_root_namespace *ns, + struct mlx5_flow_table *ft, + struct mlx5_flow_group *fg) +{ + if (mlx5_dr_is_fw_table(ft->flags)) + return mlx5_fs_cmd_get_fw_cmds()->destroy_flow_group(ns, ft, fg); + + return mlx5dr_matcher_destroy(fg->fs_dr_matcher.dr_matcher); +} + +static struct mlx5dr_action *create_vport_action(struct mlx5dr_domain *domain, + struct mlx5_flow_rule *dst) +{ + struct mlx5_flow_destination *dest_attr = &dst->dest_attr; + + return mlx5dr_action_create_dest_vport(domain, dest_attr->vport.num, + dest_attr->vport.flags & + MLX5_FLOW_DEST_VPORT_VHCA_ID, + dest_attr->vport.vhca_id); +} + +static struct mlx5dr_action *create_uplink_action(struct mlx5dr_domain *domain, + struct mlx5_flow_rule *dst) +{ + struct mlx5_flow_destination *dest_attr = &dst->dest_attr; + + return mlx5dr_action_create_dest_vport(domain, MLX5_VPORT_UPLINK, 1, + dest_attr->vport.vhca_id); +} + +static struct mlx5dr_action *create_ft_action(struct mlx5dr_domain *domain, + struct mlx5_flow_rule *dst) +{ + struct mlx5_flow_table *dest_ft = dst->dest_attr.ft; + + if (mlx5_dr_is_fw_table(dest_ft->flags)) + return mlx5dr_action_create_dest_flow_fw_table(domain, dest_ft); + return mlx5dr_action_create_dest_table(dest_ft->fs_dr_table.dr_table); +} + +static struct mlx5dr_action *create_action_push_vlan(struct mlx5dr_domain *domain, + struct mlx5_fs_vlan *vlan) +{ + u16 n_ethtype = vlan->ethtype; + u8 prio = vlan->prio; + u16 vid = vlan->vid; + u32 vlan_hdr; + + vlan_hdr = (u32)n_ethtype << 16 | (u32)(prio) << 12 | (u32)vid; + return mlx5dr_action_create_push_vlan(domain, htonl(vlan_hdr)); +} + +static bool contain_vport_reformat_action(struct mlx5_flow_rule *dst) +{ + return (dst->dest_attr.type == MLX5_FLOW_DESTINATION_TYPE_VPORT || + dst->dest_attr.type == MLX5_FLOW_DESTINATION_TYPE_UPLINK) && + dst->dest_attr.vport.flags & MLX5_FLOW_DEST_VPORT_REFORMAT_ID; +} + +/* We want to support a rule with 32 destinations, which means we need to + * account for 32 destinations plus usually a counter plus one more action + * for a multi-destination flow table. + */ +#define MLX5_FLOW_CONTEXT_ACTION_MAX 34 +static int mlx5_cmd_dr_create_fte(struct mlx5_flow_root_namespace *ns, + struct mlx5_flow_table *ft, + struct mlx5_flow_group *group, + struct fs_fte *fte) +{ + struct mlx5dr_domain *domain = ns->fs_dr_domain.dr_domain; + struct mlx5dr_action_dest *term_actions; + struct mlx5dr_match_parameters params; + struct mlx5_core_dev *dev = ns->dev; + struct mlx5dr_action **fs_dr_actions; + struct mlx5dr_action *tmp_action; + struct mlx5dr_action **actions; + bool delay_encap_set = false; + struct mlx5dr_rule *rule; + struct mlx5_flow_rule *dst; + int fs_dr_num_actions = 0; + int num_term_actions = 0; + int num_actions = 0; + size_t match_sz; + int err = 0; + int i; + + if (mlx5_dr_is_fw_table(ft->flags)) + return mlx5_fs_cmd_get_fw_cmds()->create_fte(ns, ft, group, fte); + + actions = kcalloc(MLX5_FLOW_CONTEXT_ACTION_MAX, sizeof(*actions), + GFP_KERNEL); + if (!actions) { + err = -ENOMEM; + goto out_err; + } + + fs_dr_actions = kcalloc(MLX5_FLOW_CONTEXT_ACTION_MAX, + sizeof(*fs_dr_actions), GFP_KERNEL); + if (!fs_dr_actions) { + err = -ENOMEM; + goto free_actions_alloc; + } + + term_actions = kcalloc(MLX5_FLOW_CONTEXT_ACTION_MAX, + sizeof(*term_actions), GFP_KERNEL); + if (!term_actions) { + err = -ENOMEM; + goto free_fs_dr_actions_alloc; + } + + match_sz = sizeof(fte->val); + + /* Drop reformat action bit if destination vport set with reformat */ + if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) { + list_for_each_entry(dst, &fte->node.children, node.list) { + if (!contain_vport_reformat_action(dst)) + continue; + + fte->action.action &= ~MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT; + break; + } + } + + /* The order of the actions are must to be keep, only the following + * order is supported by SW steering: + * TX: modify header -> push vlan -> encap + * RX: decap -> pop vlan -> modify header + */ + if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_DECAP) { + enum mlx5dr_action_reformat_type decap_type = + DR_ACTION_REFORMAT_TYP_TNL_L2_TO_L2; + + tmp_action = mlx5dr_action_create_packet_reformat(domain, + decap_type, + 0, 0, 0, + NULL); + if (!tmp_action) { + err = -ENOMEM; + goto free_actions; + } + fs_dr_actions[fs_dr_num_actions++] = tmp_action; + actions[num_actions++] = tmp_action; + } + + if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT) { + bool is_decap = fte->action.pkt_reformat->reformat_type == + MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2; + + if (is_decap) + actions[num_actions++] = + fte->action.pkt_reformat->action.dr_action; + else + delay_encap_set = true; + } + + if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP) { + tmp_action = + mlx5dr_action_create_pop_vlan(); + if (!tmp_action) { + err = -ENOMEM; + goto free_actions; + } + fs_dr_actions[fs_dr_num_actions++] = tmp_action; + actions[num_actions++] = tmp_action; + } + + if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP_2) { + tmp_action = + mlx5dr_action_create_pop_vlan(); + if (!tmp_action) { + err = -ENOMEM; + goto free_actions; + } + fs_dr_actions[fs_dr_num_actions++] = tmp_action; + actions[num_actions++] = tmp_action; + } + + if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) + actions[num_actions++] = + fte->action.modify_hdr->action.dr_action; + + if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH) { + tmp_action = create_action_push_vlan(domain, &fte->action.vlan[0]); + if (!tmp_action) { + err = -ENOMEM; + goto free_actions; + } + fs_dr_actions[fs_dr_num_actions++] = tmp_action; + actions[num_actions++] = tmp_action; + } + + if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2) { + tmp_action = create_action_push_vlan(domain, &fte->action.vlan[1]); + if (!tmp_action) { + err = -ENOMEM; + goto free_actions; + } + fs_dr_actions[fs_dr_num_actions++] = tmp_action; + actions[num_actions++] = tmp_action; + } + + if (delay_encap_set) + actions[num_actions++] = + fte->action.pkt_reformat->action.dr_action; + + /* The order of the actions below is not important */ + + if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_DROP) { + tmp_action = mlx5dr_action_create_drop(); + if (!tmp_action) { + err = -ENOMEM; + goto free_actions; + } + fs_dr_actions[fs_dr_num_actions++] = tmp_action; + term_actions[num_term_actions++].dest = tmp_action; + } + + if (fte->flow_context.flow_tag) { + tmp_action = + mlx5dr_action_create_tag(fte->flow_context.flow_tag); + if (!tmp_action) { + err = -ENOMEM; + goto free_actions; + } + fs_dr_actions[fs_dr_num_actions++] = tmp_action; + actions[num_actions++] = tmp_action; + } + + if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) { + list_for_each_entry(dst, &fte->node.children, node.list) { + enum mlx5_flow_destination_type type = dst->dest_attr.type; + u32 id; + + if (fs_dr_num_actions == MLX5_FLOW_CONTEXT_ACTION_MAX || + num_term_actions == MLX5_FLOW_CONTEXT_ACTION_MAX) { + err = -EOPNOTSUPP; + goto free_actions; + } + + if (type == MLX5_FLOW_DESTINATION_TYPE_COUNTER) + continue; + + switch (type) { + case MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE: + tmp_action = create_ft_action(domain, dst); + if (!tmp_action) { + err = -ENOMEM; + goto free_actions; + } + fs_dr_actions[fs_dr_num_actions++] = tmp_action; + term_actions[num_term_actions++].dest = tmp_action; + break; + case MLX5_FLOW_DESTINATION_TYPE_UPLINK: + case MLX5_FLOW_DESTINATION_TYPE_VPORT: + tmp_action = type == MLX5_FLOW_DESTINATION_TYPE_VPORT ? + create_vport_action(domain, dst) : + create_uplink_action(domain, dst); + if (!tmp_action) { + err = -ENOMEM; + goto free_actions; + } + fs_dr_actions[fs_dr_num_actions++] = tmp_action; + term_actions[num_term_actions].dest = tmp_action; + + if (dst->dest_attr.vport.flags & + MLX5_FLOW_DEST_VPORT_REFORMAT_ID) + term_actions[num_term_actions].reformat = + dst->dest_attr.vport.pkt_reformat->action.dr_action; + + num_term_actions++; + break; + case MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM: + id = dst->dest_attr.ft_num; + tmp_action = mlx5dr_action_create_dest_table_num(domain, + id); + if (!tmp_action) { + err = -ENOMEM; + goto free_actions; + } + fs_dr_actions[fs_dr_num_actions++] = tmp_action; + term_actions[num_term_actions++].dest = tmp_action; + break; + case MLX5_FLOW_DESTINATION_TYPE_FLOW_SAMPLER: + id = dst->dest_attr.sampler_id; + tmp_action = mlx5dr_action_create_flow_sampler(domain, + id); + if (!tmp_action) { + err = -ENOMEM; + goto free_actions; + } + fs_dr_actions[fs_dr_num_actions++] = tmp_action; + term_actions[num_term_actions++].dest = tmp_action; + break; + default: + err = -EOPNOTSUPP; + goto free_actions; + } + } + } + + if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { + list_for_each_entry(dst, &fte->node.children, node.list) { + u32 id; + + if (dst->dest_attr.type != + MLX5_FLOW_DESTINATION_TYPE_COUNTER) + continue; + + if (num_actions == MLX5_FLOW_CONTEXT_ACTION_MAX || + fs_dr_num_actions == MLX5_FLOW_CONTEXT_ACTION_MAX) { + err = -EOPNOTSUPP; + goto free_actions; + } + + id = dst->dest_attr.counter_id; + tmp_action = + mlx5dr_action_create_flow_counter(id); + if (!tmp_action) { + err = -ENOMEM; + goto free_actions; + } + + fs_dr_actions[fs_dr_num_actions++] = tmp_action; + actions[num_actions++] = tmp_action; + } + } + + if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_EXECUTE_ASO) { + if (fte->action.exe_aso.type != MLX5_EXE_ASO_FLOW_METER) { + err = -EOPNOTSUPP; + goto free_actions; + } + + tmp_action = + mlx5dr_action_create_aso(domain, + fte->action.exe_aso.object_id, + fte->action.exe_aso.return_reg_id, + fte->action.exe_aso.type, + fte->action.exe_aso.flow_meter.init_color, + fte->action.exe_aso.flow_meter.meter_idx); + if (!tmp_action) { + err = -ENOMEM; + goto free_actions; + } + fs_dr_actions[fs_dr_num_actions++] = tmp_action; + actions[num_actions++] = tmp_action; + } + + params.match_sz = match_sz; + params.match_buf = (u64 *)fte->val; + if (num_term_actions == 1) { + if (term_actions->reformat) { + if (num_actions == MLX5_FLOW_CONTEXT_ACTION_MAX) { + err = -EOPNOTSUPP; + goto free_actions; + } + actions[num_actions++] = term_actions->reformat; + } + + if (num_actions == MLX5_FLOW_CONTEXT_ACTION_MAX) { + err = -EOPNOTSUPP; + goto free_actions; + } + actions[num_actions++] = term_actions->dest; + } else if (num_term_actions > 1) { + bool ignore_flow_level = + !!(fte->action.flags & FLOW_ACT_IGNORE_FLOW_LEVEL); + u32 flow_source = fte->flow_context.flow_source; + + if (num_actions == MLX5_FLOW_CONTEXT_ACTION_MAX || + fs_dr_num_actions == MLX5_FLOW_CONTEXT_ACTION_MAX) { + err = -EOPNOTSUPP; + goto free_actions; + } + tmp_action = mlx5dr_action_create_mult_dest_tbl(domain, + term_actions, + num_term_actions, + ignore_flow_level, + flow_source); + if (!tmp_action) { + err = -EOPNOTSUPP; + goto free_actions; + } + fs_dr_actions[fs_dr_num_actions++] = tmp_action; + actions[num_actions++] = tmp_action; + } + + rule = mlx5dr_rule_create(group->fs_dr_matcher.dr_matcher, + ¶ms, + num_actions, + actions, + fte->flow_context.flow_source); + if (!rule) { + err = -EINVAL; + goto free_actions; + } + + kfree(term_actions); + kfree(actions); + + fte->fs_dr_rule.dr_rule = rule; + fte->fs_dr_rule.num_actions = fs_dr_num_actions; + fte->fs_dr_rule.dr_actions = fs_dr_actions; + + return 0; + +free_actions: + /* Free in reverse order to handle action dependencies */ + for (i = fs_dr_num_actions - 1; i >= 0; i--) + if (!IS_ERR_OR_NULL(fs_dr_actions[i])) + mlx5dr_action_destroy(fs_dr_actions[i]); + + kfree(term_actions); +free_fs_dr_actions_alloc: + kfree(fs_dr_actions); +free_actions_alloc: + kfree(actions); +out_err: + mlx5_core_err(dev, "Failed to create dr rule err(%d)\n", err); + return err; +} + +static int mlx5_cmd_dr_packet_reformat_alloc(struct mlx5_flow_root_namespace *ns, + struct mlx5_pkt_reformat_params *params, + enum mlx5_flow_namespace_type namespace, + struct mlx5_pkt_reformat *pkt_reformat) +{ + struct mlx5dr_domain *dr_domain = ns->fs_dr_domain.dr_domain; + struct mlx5dr_action *action; + int dr_reformat; + + switch (params->type) { + case MLX5_REFORMAT_TYPE_L2_TO_VXLAN: + case MLX5_REFORMAT_TYPE_L2_TO_NVGRE: + case MLX5_REFORMAT_TYPE_L2_TO_L2_TUNNEL: + dr_reformat = DR_ACTION_REFORMAT_TYP_L2_TO_TNL_L2; + break; + case MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2: + dr_reformat = DR_ACTION_REFORMAT_TYP_TNL_L3_TO_L2; + break; + case MLX5_REFORMAT_TYPE_L2_TO_L3_TUNNEL: + dr_reformat = DR_ACTION_REFORMAT_TYP_L2_TO_TNL_L3; + break; + case MLX5_REFORMAT_TYPE_INSERT_HDR: + dr_reformat = DR_ACTION_REFORMAT_TYP_INSERT_HDR; + break; + case MLX5_REFORMAT_TYPE_REMOVE_HDR: + dr_reformat = DR_ACTION_REFORMAT_TYP_REMOVE_HDR; + break; + default: + mlx5_core_err(ns->dev, "Packet-reformat not supported(%d)\n", + params->type); + return -EOPNOTSUPP; + } + + action = mlx5dr_action_create_packet_reformat(dr_domain, + dr_reformat, + params->param_0, + params->param_1, + params->size, + params->data); + if (!action) { + mlx5_core_err(ns->dev, "Failed allocating packet-reformat action\n"); + return -EINVAL; + } + + pkt_reformat->action.dr_action = action; + + return 0; +} + +static void mlx5_cmd_dr_packet_reformat_dealloc(struct mlx5_flow_root_namespace *ns, + struct mlx5_pkt_reformat *pkt_reformat) +{ + mlx5dr_action_destroy(pkt_reformat->action.dr_action); +} + +static int mlx5_cmd_dr_modify_header_alloc(struct mlx5_flow_root_namespace *ns, + u8 namespace, u8 num_actions, + void *modify_actions, + struct mlx5_modify_hdr *modify_hdr) +{ + struct mlx5dr_domain *dr_domain = ns->fs_dr_domain.dr_domain; + struct mlx5dr_action *action; + size_t actions_sz; + + actions_sz = MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto) * + num_actions; + action = mlx5dr_action_create_modify_header(dr_domain, 0, + actions_sz, + modify_actions); + if (!action) { + mlx5_core_err(ns->dev, "Failed allocating modify-header action\n"); + return -EINVAL; + } + + modify_hdr->action.dr_action = action; + + return 0; +} + +static void mlx5_cmd_dr_modify_header_dealloc(struct mlx5_flow_root_namespace *ns, + struct mlx5_modify_hdr *modify_hdr) +{ + mlx5dr_action_destroy(modify_hdr->action.dr_action); +} + +static int +mlx5_cmd_dr_destroy_match_definer(struct mlx5_flow_root_namespace *ns, + int definer_id) +{ + return -EOPNOTSUPP; +} + +static int mlx5_cmd_dr_create_match_definer(struct mlx5_flow_root_namespace *ns, + u16 format_id, u32 *match_mask) +{ + return -EOPNOTSUPP; +} + +static int mlx5_cmd_dr_delete_fte(struct mlx5_flow_root_namespace *ns, + struct mlx5_flow_table *ft, + struct fs_fte *fte) +{ + struct mlx5_fs_dr_rule *rule = &fte->fs_dr_rule; + int err; + int i; + + if (mlx5_dr_is_fw_table(ft->flags)) + return mlx5_fs_cmd_get_fw_cmds()->delete_fte(ns, ft, fte); + + err = mlx5dr_rule_destroy(rule->dr_rule); + if (err) + return err; + + /* Free in reverse order to handle action dependencies */ + for (i = rule->num_actions - 1; i >= 0; i--) + if (!IS_ERR_OR_NULL(rule->dr_actions[i])) + mlx5dr_action_destroy(rule->dr_actions[i]); + + kfree(rule->dr_actions); + return 0; +} + +static int mlx5_cmd_dr_update_fte(struct mlx5_flow_root_namespace *ns, + struct mlx5_flow_table *ft, + struct mlx5_flow_group *group, + int modify_mask, + struct fs_fte *fte) +{ + struct fs_fte fte_tmp = {}; + int ret; + + if (mlx5_dr_is_fw_table(ft->flags)) + return mlx5_fs_cmd_get_fw_cmds()->update_fte(ns, ft, group, modify_mask, fte); + + /* Backup current dr rule details */ + fte_tmp.fs_dr_rule = fte->fs_dr_rule; + memset(&fte->fs_dr_rule, 0, sizeof(struct mlx5_fs_dr_rule)); + + /* First add the new updated rule, then delete the old rule */ + ret = mlx5_cmd_dr_create_fte(ns, ft, group, fte); + if (ret) + goto restore_fte; + + ret = mlx5_cmd_dr_delete_fte(ns, ft, &fte_tmp); + WARN_ONCE(ret, "dr update fte duplicate rule deletion failed\n"); + return ret; + +restore_fte: + fte->fs_dr_rule = fte_tmp.fs_dr_rule; + return ret; +} + +static int mlx5_cmd_dr_set_peer(struct mlx5_flow_root_namespace *ns, + struct mlx5_flow_root_namespace *peer_ns) +{ + struct mlx5dr_domain *peer_domain = NULL; + + if (peer_ns) + peer_domain = peer_ns->fs_dr_domain.dr_domain; + mlx5dr_domain_set_peer(ns->fs_dr_domain.dr_domain, + peer_domain); + return 0; +} + +static int mlx5_cmd_dr_create_ns(struct mlx5_flow_root_namespace *ns) +{ + ns->fs_dr_domain.dr_domain = + mlx5dr_domain_create(ns->dev, + MLX5DR_DOMAIN_TYPE_FDB); + if (!ns->fs_dr_domain.dr_domain) { + mlx5_core_err(ns->dev, "Failed to create dr flow namespace\n"); + return -EOPNOTSUPP; + } + return 0; +} + +static int mlx5_cmd_dr_destroy_ns(struct mlx5_flow_root_namespace *ns) +{ + return mlx5dr_domain_destroy(ns->fs_dr_domain.dr_domain); +} + +static u32 mlx5_cmd_dr_get_capabilities(struct mlx5_flow_root_namespace *ns, + enum fs_flow_table_type ft_type) +{ + if (ft_type != FS_FT_FDB || + MLX5_CAP_GEN(ns->dev, steering_format_version) == MLX5_STEERING_FORMAT_CONNECTX_5) + return 0; + + return MLX5_FLOW_STEERING_CAP_VLAN_PUSH_ON_RX | MLX5_FLOW_STEERING_CAP_VLAN_POP_ON_TX; +} + +bool mlx5_fs_dr_is_supported(struct mlx5_core_dev *dev) +{ + return mlx5dr_is_supported(dev); +} + +static const struct mlx5_flow_cmds mlx5_flow_cmds_dr = { + .create_flow_table = mlx5_cmd_dr_create_flow_table, + .destroy_flow_table = mlx5_cmd_dr_destroy_flow_table, + .modify_flow_table = mlx5_cmd_dr_modify_flow_table, + .create_flow_group = mlx5_cmd_dr_create_flow_group, + .destroy_flow_group = mlx5_cmd_dr_destroy_flow_group, + .create_fte = mlx5_cmd_dr_create_fte, + .update_fte = mlx5_cmd_dr_update_fte, + .delete_fte = mlx5_cmd_dr_delete_fte, + .update_root_ft = mlx5_cmd_dr_update_root_ft, + .packet_reformat_alloc = mlx5_cmd_dr_packet_reformat_alloc, + .packet_reformat_dealloc = mlx5_cmd_dr_packet_reformat_dealloc, + .modify_header_alloc = mlx5_cmd_dr_modify_header_alloc, + .modify_header_dealloc = mlx5_cmd_dr_modify_header_dealloc, + .create_match_definer = mlx5_cmd_dr_create_match_definer, + .destroy_match_definer = mlx5_cmd_dr_destroy_match_definer, + .set_peer = mlx5_cmd_dr_set_peer, + .create_ns = mlx5_cmd_dr_create_ns, + .destroy_ns = mlx5_cmd_dr_destroy_ns, + .get_capabilities = mlx5_cmd_dr_get_capabilities, +}; + +const struct mlx5_flow_cmds *mlx5_fs_cmd_get_dr_cmds(void) +{ + return &mlx5_flow_cmds_dr; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.h new file mode 100644 index 000000000..d16862206 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.h @@ -0,0 +1,56 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB + * Copyright (c) 2019 Mellanox Technologies + */ + +#ifndef _MLX5_FS_DR_ +#define _MLX5_FS_DR_ + +#include "mlx5dr.h" + +struct mlx5_flow_root_namespace; +struct fs_fte; + +struct mlx5_fs_dr_action { + struct mlx5dr_action *dr_action; +}; + +struct mlx5_fs_dr_rule { + struct mlx5dr_rule *dr_rule; + /* Only actions created by fs_dr */ + struct mlx5dr_action **dr_actions; + int num_actions; +}; + +struct mlx5_fs_dr_domain { + struct mlx5dr_domain *dr_domain; +}; + +struct mlx5_fs_dr_matcher { + struct mlx5dr_matcher *dr_matcher; +}; + +struct mlx5_fs_dr_table { + struct mlx5dr_table *dr_table; + struct mlx5dr_action *miss_action; +}; + +#ifdef CONFIG_MLX5_SW_STEERING + +bool mlx5_fs_dr_is_supported(struct mlx5_core_dev *dev); + +const struct mlx5_flow_cmds *mlx5_fs_cmd_get_dr_cmds(void); + +#else + +static inline const struct mlx5_flow_cmds *mlx5_fs_cmd_get_dr_cmds(void) +{ + return NULL; +} + +static inline bool mlx5_fs_dr_is_supported(struct mlx5_core_dev *dev) +{ + return false; +} + +#endif /* CONFIG_MLX5_SW_STEERING */ +#endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5_ifc_dr.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5_ifc_dr.h new file mode 100644 index 000000000..fb078fa0f --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5_ifc_dr.h @@ -0,0 +1,603 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2019, Mellanox Technologies */ + +#ifndef MLX5_IFC_DR_H +#define MLX5_IFC_DR_H + +enum { + MLX5DR_STE_LU_TYPE_DONT_CARE = 0x0f, +}; + +struct mlx5_ifc_ste_general_bits { + u8 entry_type[0x4]; + u8 reserved_at_4[0x4]; + u8 entry_sub_type[0x8]; + u8 byte_mask[0x10]; + + u8 next_table_base_63_48[0x10]; + u8 next_lu_type[0x8]; + u8 next_table_base_39_32_size[0x8]; + + u8 next_table_base_31_5_size[0x1b]; + u8 linear_hash_enable[0x1]; + u8 reserved_at_5c[0x2]; + u8 next_table_rank[0x2]; + + u8 reserved_at_60[0xa0]; + u8 tag_value[0x60]; + u8 bit_mask[0x60]; +}; + +struct mlx5_ifc_ste_sx_transmit_bits { + u8 entry_type[0x4]; + u8 reserved_at_4[0x4]; + u8 entry_sub_type[0x8]; + u8 byte_mask[0x10]; + + u8 next_table_base_63_48[0x10]; + u8 next_lu_type[0x8]; + u8 next_table_base_39_32_size[0x8]; + + u8 next_table_base_31_5_size[0x1b]; + u8 linear_hash_enable[0x1]; + u8 reserved_at_5c[0x2]; + u8 next_table_rank[0x2]; + + u8 sx_wire[0x1]; + u8 sx_func_lb[0x1]; + u8 sx_sniffer[0x1]; + u8 sx_wire_enable[0x1]; + u8 sx_func_lb_enable[0x1]; + u8 sx_sniffer_enable[0x1]; + u8 action_type[0x3]; + u8 reserved_at_69[0x1]; + u8 action_description[0x6]; + u8 gvmi[0x10]; + + u8 encap_pointer_vlan_data[0x20]; + + u8 loopback_syndome_en[0x8]; + u8 loopback_syndome[0x8]; + u8 counter_trigger[0x10]; + + u8 miss_address_63_48[0x10]; + u8 counter_trigger_23_16[0x8]; + u8 miss_address_39_32[0x8]; + + u8 miss_address_31_6[0x1a]; + u8 learning_point[0x1]; + u8 go_back[0x1]; + u8 match_polarity[0x1]; + u8 mask_mode[0x1]; + u8 miss_rank[0x2]; +}; + +struct mlx5_ifc_ste_rx_steering_mult_bits { + u8 entry_type[0x4]; + u8 reserved_at_4[0x4]; + u8 entry_sub_type[0x8]; + u8 byte_mask[0x10]; + + u8 next_table_base_63_48[0x10]; + u8 next_lu_type[0x8]; + u8 next_table_base_39_32_size[0x8]; + + u8 next_table_base_31_5_size[0x1b]; + u8 linear_hash_enable[0x1]; + u8 reserved_at_[0x2]; + u8 next_table_rank[0x2]; + + u8 member_count[0x10]; + u8 gvmi[0x10]; + + u8 qp_list_pointer[0x20]; + + u8 reserved_at_a0[0x1]; + u8 tunneling_action[0x3]; + u8 action_description[0x4]; + u8 reserved_at_a8[0x8]; + u8 counter_trigger_15_0[0x10]; + + u8 miss_address_63_48[0x10]; + u8 counter_trigger_23_16[0x08]; + u8 miss_address_39_32[0x8]; + + u8 miss_address_31_6[0x1a]; + u8 learning_point[0x1]; + u8 fail_on_error[0x1]; + u8 match_polarity[0x1]; + u8 mask_mode[0x1]; + u8 miss_rank[0x2]; +}; + +struct mlx5_ifc_ste_modify_packet_bits { + u8 entry_type[0x4]; + u8 reserved_at_4[0x4]; + u8 entry_sub_type[0x8]; + u8 byte_mask[0x10]; + + u8 next_table_base_63_48[0x10]; + u8 next_lu_type[0x8]; + u8 next_table_base_39_32_size[0x8]; + + u8 next_table_base_31_5_size[0x1b]; + u8 linear_hash_enable[0x1]; + u8 reserved_at_[0x2]; + u8 next_table_rank[0x2]; + + u8 number_of_re_write_actions[0x10]; + u8 gvmi[0x10]; + + u8 header_re_write_actions_pointer[0x20]; + + u8 reserved_at_a0[0x1]; + u8 tunneling_action[0x3]; + u8 action_description[0x4]; + u8 reserved_at_a8[0x8]; + u8 counter_trigger_15_0[0x10]; + + u8 miss_address_63_48[0x10]; + u8 counter_trigger_23_16[0x08]; + u8 miss_address_39_32[0x8]; + + u8 miss_address_31_6[0x1a]; + u8 learning_point[0x1]; + u8 fail_on_error[0x1]; + u8 match_polarity[0x1]; + u8 mask_mode[0x1]; + u8 miss_rank[0x2]; +}; + +struct mlx5_ifc_ste_eth_l2_src_bits { + u8 smac_47_16[0x20]; + + u8 smac_15_0[0x10]; + u8 l3_ethertype[0x10]; + + u8 qp_type[0x2]; + u8 ethertype_filter[0x1]; + u8 reserved_at_43[0x1]; + u8 sx_sniffer[0x1]; + u8 force_lb[0x1]; + u8 functional_lb[0x1]; + u8 port[0x1]; + u8 reserved_at_48[0x4]; + u8 first_priority[0x3]; + u8 first_cfi[0x1]; + u8 first_vlan_qualifier[0x2]; + u8 reserved_at_52[0x2]; + u8 first_vlan_id[0xc]; + + u8 ip_fragmented[0x1]; + u8 tcp_syn[0x1]; + u8 encp_type[0x2]; + u8 l3_type[0x2]; + u8 l4_type[0x2]; + u8 reserved_at_68[0x4]; + u8 second_priority[0x3]; + u8 second_cfi[0x1]; + u8 second_vlan_qualifier[0x2]; + u8 reserved_at_72[0x2]; + u8 second_vlan_id[0xc]; +}; + +struct mlx5_ifc_ste_eth_l2_dst_bits { + u8 dmac_47_16[0x20]; + + u8 dmac_15_0[0x10]; + u8 l3_ethertype[0x10]; + + u8 qp_type[0x2]; + u8 ethertype_filter[0x1]; + u8 reserved_at_43[0x1]; + u8 sx_sniffer[0x1]; + u8 force_lb[0x1]; + u8 functional_lb[0x1]; + u8 port[0x1]; + u8 reserved_at_48[0x4]; + u8 first_priority[0x3]; + u8 first_cfi[0x1]; + u8 first_vlan_qualifier[0x2]; + u8 reserved_at_52[0x2]; + u8 first_vlan_id[0xc]; + + u8 ip_fragmented[0x1]; + u8 tcp_syn[0x1]; + u8 encp_type[0x2]; + u8 l3_type[0x2]; + u8 l4_type[0x2]; + u8 reserved_at_68[0x4]; + u8 second_priority[0x3]; + u8 second_cfi[0x1]; + u8 second_vlan_qualifier[0x2]; + u8 reserved_at_72[0x2]; + u8 second_vlan_id[0xc]; +}; + +struct mlx5_ifc_ste_eth_l2_src_dst_bits { + u8 dmac_47_16[0x20]; + + u8 dmac_15_0[0x10]; + u8 smac_47_32[0x10]; + + u8 smac_31_0[0x20]; + + u8 sx_sniffer[0x1]; + u8 force_lb[0x1]; + u8 functional_lb[0x1]; + u8 port[0x1]; + u8 l3_type[0x2]; + u8 reserved_at_66[0x6]; + u8 first_priority[0x3]; + u8 first_cfi[0x1]; + u8 first_vlan_qualifier[0x2]; + u8 reserved_at_72[0x2]; + u8 first_vlan_id[0xc]; +}; + +struct mlx5_ifc_ste_eth_l3_ipv4_5_tuple_bits { + u8 destination_address[0x20]; + + u8 source_address[0x20]; + + u8 source_port[0x10]; + u8 destination_port[0x10]; + + u8 fragmented[0x1]; + u8 first_fragment[0x1]; + u8 reserved_at_62[0x2]; + u8 reserved_at_64[0x1]; + u8 ecn[0x2]; + u8 tcp_ns[0x1]; + u8 tcp_cwr[0x1]; + u8 tcp_ece[0x1]; + u8 tcp_urg[0x1]; + u8 tcp_ack[0x1]; + u8 tcp_psh[0x1]; + u8 tcp_rst[0x1]; + u8 tcp_syn[0x1]; + u8 tcp_fin[0x1]; + u8 dscp[0x6]; + u8 reserved_at_76[0x2]; + u8 protocol[0x8]; +}; + +struct mlx5_ifc_ste_eth_l3_ipv6_dst_bits { + u8 dst_ip_127_96[0x20]; + + u8 dst_ip_95_64[0x20]; + + u8 dst_ip_63_32[0x20]; + + u8 dst_ip_31_0[0x20]; +}; + +struct mlx5_ifc_ste_eth_l2_tnl_bits { + u8 dmac_47_16[0x20]; + + u8 dmac_15_0[0x10]; + u8 l3_ethertype[0x10]; + + u8 l2_tunneling_network_id[0x20]; + + u8 ip_fragmented[0x1]; + u8 tcp_syn[0x1]; + u8 encp_type[0x2]; + u8 l3_type[0x2]; + u8 l4_type[0x2]; + u8 first_priority[0x3]; + u8 first_cfi[0x1]; + u8 reserved_at_6c[0x3]; + u8 gre_key_flag[0x1]; + u8 first_vlan_qualifier[0x2]; + u8 reserved_at_72[0x2]; + u8 first_vlan_id[0xc]; +}; + +struct mlx5_ifc_ste_eth_l3_ipv6_src_bits { + u8 src_ip_127_96[0x20]; + + u8 src_ip_95_64[0x20]; + + u8 src_ip_63_32[0x20]; + + u8 src_ip_31_0[0x20]; +}; + +struct mlx5_ifc_ste_eth_l3_ipv4_misc_bits { + u8 version[0x4]; + u8 ihl[0x4]; + u8 reserved_at_8[0x8]; + u8 total_length[0x10]; + + u8 identification[0x10]; + u8 flags[0x3]; + u8 fragment_offset[0xd]; + + u8 time_to_live[0x8]; + u8 reserved_at_48[0x8]; + u8 checksum[0x10]; + + u8 reserved_at_60[0x20]; +}; + +struct mlx5_ifc_ste_eth_l4_bits { + u8 fragmented[0x1]; + u8 first_fragment[0x1]; + u8 reserved_at_2[0x6]; + u8 protocol[0x8]; + u8 dst_port[0x10]; + + u8 ipv6_version[0x4]; + u8 reserved_at_24[0x1]; + u8 ecn[0x2]; + u8 tcp_ns[0x1]; + u8 tcp_cwr[0x1]; + u8 tcp_ece[0x1]; + u8 tcp_urg[0x1]; + u8 tcp_ack[0x1]; + u8 tcp_psh[0x1]; + u8 tcp_rst[0x1]; + u8 tcp_syn[0x1]; + u8 tcp_fin[0x1]; + u8 src_port[0x10]; + + u8 ipv6_payload_length[0x10]; + u8 ipv6_hop_limit[0x8]; + u8 dscp[0x6]; + u8 reserved_at_5e[0x2]; + + u8 tcp_data_offset[0x4]; + u8 reserved_at_64[0x8]; + u8 flow_label[0x14]; +}; + +struct mlx5_ifc_ste_eth_l4_misc_bits { + u8 checksum[0x10]; + u8 length[0x10]; + + u8 seq_num[0x20]; + + u8 ack_num[0x20]; + + u8 urgent_pointer[0x10]; + u8 window_size[0x10]; +}; + +struct mlx5_ifc_ste_mpls_bits { + u8 mpls0_label[0x14]; + u8 mpls0_exp[0x3]; + u8 mpls0_s_bos[0x1]; + u8 mpls0_ttl[0x8]; + + u8 mpls1_label[0x20]; + + u8 mpls2_label[0x20]; + + u8 reserved_at_60[0x16]; + u8 mpls4_s_bit[0x1]; + u8 mpls4_qualifier[0x1]; + u8 mpls3_s_bit[0x1]; + u8 mpls3_qualifier[0x1]; + u8 mpls2_s_bit[0x1]; + u8 mpls2_qualifier[0x1]; + u8 mpls1_s_bit[0x1]; + u8 mpls1_qualifier[0x1]; + u8 mpls0_s_bit[0x1]; + u8 mpls0_qualifier[0x1]; +}; + +struct mlx5_ifc_ste_register_0_bits { + u8 register_0_h[0x20]; + + u8 register_0_l[0x20]; + + u8 register_1_h[0x20]; + + u8 register_1_l[0x20]; +}; + +struct mlx5_ifc_ste_register_1_bits { + u8 register_2_h[0x20]; + + u8 register_2_l[0x20]; + + u8 register_3_h[0x20]; + + u8 register_3_l[0x20]; +}; + +struct mlx5_ifc_ste_gre_bits { + u8 gre_c_present[0x1]; + u8 reserved_at_30[0x1]; + u8 gre_k_present[0x1]; + u8 gre_s_present[0x1]; + u8 strict_src_route[0x1]; + u8 recur[0x3]; + u8 flags[0x5]; + u8 version[0x3]; + u8 gre_protocol[0x10]; + + u8 checksum[0x10]; + u8 offset[0x10]; + + u8 gre_key_h[0x18]; + u8 gre_key_l[0x8]; + + u8 seq_num[0x20]; +}; + +struct mlx5_ifc_ste_flex_parser_0_bits { + u8 flex_parser_3[0x20]; + + u8 flex_parser_2[0x20]; + + u8 flex_parser_1[0x20]; + + u8 flex_parser_0[0x20]; +}; + +struct mlx5_ifc_ste_flex_parser_1_bits { + u8 flex_parser_7[0x20]; + + u8 flex_parser_6[0x20]; + + u8 flex_parser_5[0x20]; + + u8 flex_parser_4[0x20]; +}; + +struct mlx5_ifc_ste_flex_parser_ok_bits { + u8 flex_parser_3[0x20]; + u8 flex_parser_2[0x20]; + u8 flex_parsers_ok[0x8]; + u8 reserved_at_48[0x18]; + u8 flex_parser_0[0x20]; +}; + +struct mlx5_ifc_ste_flex_parser_tnl_bits { + u8 flex_parser_tunneling_header_63_32[0x20]; + + u8 flex_parser_tunneling_header_31_0[0x20]; + + u8 reserved_at_40[0x40]; +}; + +struct mlx5_ifc_ste_flex_parser_tnl_vxlan_gpe_bits { + u8 outer_vxlan_gpe_flags[0x8]; + u8 reserved_at_8[0x10]; + u8 outer_vxlan_gpe_next_protocol[0x8]; + + u8 outer_vxlan_gpe_vni[0x18]; + u8 reserved_at_38[0x8]; + + u8 reserved_at_40[0x40]; +}; + +struct mlx5_ifc_ste_flex_parser_tnl_geneve_bits { + u8 reserved_at_0[0x2]; + u8 geneve_opt_len[0x6]; + u8 geneve_oam[0x1]; + u8 reserved_at_9[0x7]; + u8 geneve_protocol_type[0x10]; + + u8 geneve_vni[0x18]; + u8 reserved_at_38[0x8]; + + u8 reserved_at_40[0x40]; +}; + +struct mlx5_ifc_ste_flex_parser_tnl_gtpu_bits { + u8 reserved_at_0[0x5]; + u8 gtpu_msg_flags[0x3]; + u8 gtpu_msg_type[0x8]; + u8 reserved_at_10[0x10]; + + u8 gtpu_teid[0x20]; + + u8 reserved_at_40[0x40]; +}; + +struct mlx5_ifc_ste_tunnel_header_bits { + u8 tunnel_header_0[0x20]; + + u8 tunnel_header_1[0x20]; + + u8 reserved_at_40[0x40]; +}; + +struct mlx5_ifc_ste_general_purpose_bits { + u8 general_purpose_lookup_field[0x20]; + + u8 reserved_at_20[0x20]; + + u8 reserved_at_40[0x20]; + + u8 reserved_at_60[0x20]; +}; + +struct mlx5_ifc_ste_src_gvmi_qp_bits { + u8 loopback_syndrome[0x8]; + u8 reserved_at_8[0x8]; + u8 source_gvmi[0x10]; + + u8 reserved_at_20[0x5]; + u8 force_lb[0x1]; + u8 functional_lb[0x1]; + u8 source_is_requestor[0x1]; + u8 source_qp[0x18]; + + u8 reserved_at_40[0x20]; + + u8 reserved_at_60[0x20]; +}; + +struct mlx5_ifc_l2_hdr_bits { + u8 dmac_47_16[0x20]; + + u8 dmac_15_0[0x10]; + u8 smac_47_32[0x10]; + + u8 smac_31_0[0x20]; + + u8 ethertype[0x10]; + u8 vlan_type[0x10]; + + u8 vlan[0x10]; + u8 reserved_at_90[0x10]; +}; + +/* Both HW set and HW add share the same HW format with different opcodes */ +struct mlx5_ifc_dr_action_hw_set_bits { + u8 opcode[0x8]; + u8 destination_field_code[0x8]; + u8 reserved_at_10[0x2]; + u8 destination_left_shifter[0x6]; + u8 reserved_at_18[0x3]; + u8 destination_length[0x5]; + + u8 inline_data[0x20]; +}; + +struct mlx5_ifc_dr_action_hw_copy_bits { + u8 opcode[0x8]; + u8 destination_field_code[0x8]; + u8 reserved_at_10[0x2]; + u8 destination_left_shifter[0x6]; + u8 reserved_at_18[0x2]; + u8 destination_length[0x6]; + + u8 reserved_at_20[0x8]; + u8 source_field_code[0x8]; + u8 reserved_at_30[0x2]; + u8 source_left_shifter[0x6]; + u8 reserved_at_38[0x8]; +}; + +enum { + MLX5DR_ASO_FLOW_METER_NUM_PER_OBJ = 2, +}; + +struct mlx5_ifc_ste_aso_flow_meter_action_bits { + u8 reserved_at_0[0xc]; + u8 action[0x1]; + u8 initial_color[0x2]; + u8 line_id[0x1]; +}; + +struct mlx5_ifc_ste_double_action_aso_v1_bits { + u8 action_id[0x8]; + u8 aso_context_number[0x18]; + + u8 dest_reg_id[0x2]; + u8 change_ordering_tag[0x1]; + u8 aso_check_ordering[0x1]; + u8 aso_context_type[0x4]; + u8 reserved_at_28[0x8]; + union { + u8 aso_fields[0x10]; + struct mlx5_ifc_ste_aso_flow_meter_action_bits flow_meter; + }; +}; + +#endif /* MLX5_IFC_DR_H */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5_ifc_dr_ste_v1.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5_ifc_dr_ste_v1.h new file mode 100644 index 000000000..34c2bd17a --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5_ifc_dr_ste_v1.h @@ -0,0 +1,434 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2020 NVIDIA CORPORATION. All rights reserved. */ + +#ifndef MLX5_IFC_DR_STE_V1_H +#define MLX5_IFC_DR_STE_V1_H + +enum mlx5_ifc_ste_v1_modify_hdr_offset { + MLX5_MODIFY_HEADER_V1_QW_OFFSET = 0x20, +}; + +struct mlx5_ifc_ste_single_action_flow_tag_v1_bits { + u8 action_id[0x8]; + u8 flow_tag[0x18]; +}; + +struct mlx5_ifc_ste_single_action_modify_list_v1_bits { + u8 action_id[0x8]; + u8 num_of_modify_actions[0x8]; + u8 modify_actions_ptr[0x10]; +}; + +struct mlx5_ifc_ste_single_action_remove_header_v1_bits { + u8 action_id[0x8]; + u8 reserved_at_8[0x2]; + u8 start_anchor[0x6]; + u8 reserved_at_10[0x2]; + u8 end_anchor[0x6]; + u8 reserved_at_18[0x4]; + u8 decap[0x1]; + u8 vni_to_cqe[0x1]; + u8 qos_profile[0x2]; +}; + +struct mlx5_ifc_ste_single_action_remove_header_size_v1_bits { + u8 action_id[0x8]; + u8 reserved_at_8[0x2]; + u8 start_anchor[0x6]; + u8 outer_l4_remove[0x1]; + u8 reserved_at_11[0x1]; + u8 start_offset[0x7]; + u8 reserved_at_18[0x1]; + u8 remove_size[0x6]; +}; + +struct mlx5_ifc_ste_double_action_copy_v1_bits { + u8 action_id[0x8]; + u8 destination_dw_offset[0x8]; + u8 reserved_at_10[0x2]; + u8 destination_left_shifter[0x6]; + u8 reserved_at_17[0x2]; + u8 destination_length[0x6]; + + u8 reserved_at_20[0x8]; + u8 source_dw_offset[0x8]; + u8 reserved_at_30[0x2]; + u8 source_right_shifter[0x6]; + u8 reserved_at_38[0x8]; +}; + +struct mlx5_ifc_ste_double_action_set_v1_bits { + u8 action_id[0x8]; + u8 destination_dw_offset[0x8]; + u8 reserved_at_10[0x2]; + u8 destination_left_shifter[0x6]; + u8 reserved_at_18[0x2]; + u8 destination_length[0x6]; + + u8 inline_data[0x20]; +}; + +struct mlx5_ifc_ste_double_action_add_v1_bits { + u8 action_id[0x8]; + u8 destination_dw_offset[0x8]; + u8 reserved_at_10[0x2]; + u8 destination_left_shifter[0x6]; + u8 reserved_at_18[0x2]; + u8 destination_length[0x6]; + + u8 add_value[0x20]; +}; + +struct mlx5_ifc_ste_double_action_insert_with_inline_v1_bits { + u8 action_id[0x8]; + u8 reserved_at_8[0x2]; + u8 start_anchor[0x6]; + u8 start_offset[0x7]; + u8 reserved_at_17[0x9]; + + u8 inline_data[0x20]; +}; + +struct mlx5_ifc_ste_double_action_insert_with_ptr_v1_bits { + u8 action_id[0x8]; + u8 reserved_at_8[0x2]; + u8 start_anchor[0x6]; + u8 start_offset[0x7]; + u8 size[0x6]; + u8 attributes[0x3]; + + u8 pointer[0x20]; +}; + +struct mlx5_ifc_ste_double_action_modify_action_list_v1_bits { + u8 action_id[0x8]; + u8 modify_actions_pattern_pointer[0x18]; + + u8 number_of_modify_actions[0x8]; + u8 modify_actions_argument_pointer[0x18]; +}; + +struct mlx5_ifc_ste_match_bwc_v1_bits { + u8 entry_format[0x8]; + u8 counter_id[0x18]; + + u8 miss_address_63_48[0x10]; + u8 match_definer_ctx_idx[0x8]; + u8 miss_address_39_32[0x8]; + + u8 miss_address_31_6[0x1a]; + u8 reserved_at_5a[0x1]; + u8 match_polarity[0x1]; + u8 reparse[0x1]; + u8 reserved_at_5d[0x3]; + + u8 next_table_base_63_48[0x10]; + u8 hash_definer_ctx_idx[0x8]; + u8 next_table_base_39_32_size[0x8]; + + u8 next_table_base_31_5_size[0x1b]; + u8 hash_type[0x2]; + u8 hash_after_actions[0x1]; + u8 reserved_at_9e[0x2]; + + u8 byte_mask[0x10]; + u8 next_entry_format[0x1]; + u8 mask_mode[0x1]; + u8 gvmi[0xe]; + + u8 action[0x40]; +}; + +struct mlx5_ifc_ste_mask_and_match_v1_bits { + u8 entry_format[0x8]; + u8 counter_id[0x18]; + + u8 miss_address_63_48[0x10]; + u8 match_definer_ctx_idx[0x8]; + u8 miss_address_39_32[0x8]; + + u8 miss_address_31_6[0x1a]; + u8 reserved_at_5a[0x1]; + u8 match_polarity[0x1]; + u8 reparse[0x1]; + u8 reserved_at_5d[0x3]; + + u8 next_table_base_63_48[0x10]; + u8 hash_definer_ctx_idx[0x8]; + u8 next_table_base_39_32_size[0x8]; + + u8 next_table_base_31_5_size[0x1b]; + u8 hash_type[0x2]; + u8 hash_after_actions[0x1]; + u8 reserved_at_9e[0x2]; + + u8 action[0x60]; +}; + +struct mlx5_ifc_ste_eth_l2_src_v1_bits { + u8 reserved_at_0[0x1]; + u8 sx_sniffer[0x1]; + u8 functional_loopback[0x1]; + u8 ip_fragmented[0x1]; + u8 qp_type[0x2]; + u8 encapsulation_type[0x2]; + u8 port[0x2]; + u8 l3_type[0x2]; + u8 l4_type[0x2]; + u8 first_vlan_qualifier[0x2]; + u8 first_priority[0x3]; + u8 first_cfi[0x1]; + u8 first_vlan_id[0xc]; + + u8 smac_47_16[0x20]; + + u8 smac_15_0[0x10]; + u8 l3_ethertype[0x10]; + + u8 reserved_at_60[0x6]; + u8 tcp_syn[0x1]; + u8 reserved_at_67[0x3]; + u8 force_loopback[0x1]; + u8 l2_ok[0x1]; + u8 l3_ok[0x1]; + u8 l4_ok[0x1]; + u8 second_vlan_qualifier[0x2]; + + u8 second_priority[0x3]; + u8 second_cfi[0x1]; + u8 second_vlan_id[0xc]; +}; + +struct mlx5_ifc_ste_eth_l2_dst_v1_bits { + u8 reserved_at_0[0x1]; + u8 sx_sniffer[0x1]; + u8 functional_lb[0x1]; + u8 ip_fragmented[0x1]; + u8 qp_type[0x2]; + u8 encapsulation_type[0x2]; + u8 port[0x2]; + u8 l3_type[0x2]; + u8 l4_type[0x2]; + u8 first_vlan_qualifier[0x2]; + u8 first_priority[0x3]; + u8 first_cfi[0x1]; + u8 first_vlan_id[0xc]; + + u8 dmac_47_16[0x20]; + + u8 dmac_15_0[0x10]; + u8 l3_ethertype[0x10]; + + u8 reserved_at_60[0x6]; + u8 tcp_syn[0x1]; + u8 reserved_at_67[0x3]; + u8 force_lb[0x1]; + u8 l2_ok[0x1]; + u8 l3_ok[0x1]; + u8 l4_ok[0x1]; + u8 second_vlan_qualifier[0x2]; + u8 second_priority[0x3]; + u8 second_cfi[0x1]; + u8 second_vlan_id[0xc]; +}; + +struct mlx5_ifc_ste_eth_l2_src_dst_v1_bits { + u8 dmac_47_16[0x20]; + + u8 smac_47_16[0x20]; + + u8 dmac_15_0[0x10]; + u8 reserved_at_50[0x2]; + u8 functional_lb[0x1]; + u8 reserved_at_53[0x5]; + u8 port[0x2]; + u8 l3_type[0x2]; + u8 reserved_at_5c[0x2]; + u8 first_vlan_qualifier[0x2]; + + u8 first_priority[0x3]; + u8 first_cfi[0x1]; + u8 first_vlan_id[0xc]; + u8 smac_15_0[0x10]; +}; + +struct mlx5_ifc_ste_eth_l3_ipv4_5_tuple_v1_bits { + u8 source_address[0x20]; + + u8 destination_address[0x20]; + + u8 source_port[0x10]; + u8 destination_port[0x10]; + + u8 reserved_at_60[0x4]; + u8 l4_ok[0x1]; + u8 l3_ok[0x1]; + u8 fragmented[0x1]; + u8 tcp_ns[0x1]; + u8 tcp_cwr[0x1]; + u8 tcp_ece[0x1]; + u8 tcp_urg[0x1]; + u8 tcp_ack[0x1]; + u8 tcp_psh[0x1]; + u8 tcp_rst[0x1]; + u8 tcp_syn[0x1]; + u8 tcp_fin[0x1]; + u8 dscp[0x6]; + u8 ecn[0x2]; + u8 protocol[0x8]; +}; + +struct mlx5_ifc_ste_eth_l2_tnl_v1_bits { + u8 l2_tunneling_network_id[0x20]; + + u8 dmac_47_16[0x20]; + + u8 dmac_15_0[0x10]; + u8 l3_ethertype[0x10]; + + u8 reserved_at_60[0x3]; + u8 ip_fragmented[0x1]; + u8 reserved_at_64[0x2]; + u8 encp_type[0x2]; + u8 reserved_at_68[0x2]; + u8 l3_type[0x2]; + u8 l4_type[0x2]; + u8 first_vlan_qualifier[0x2]; + u8 first_priority[0x3]; + u8 first_cfi[0x1]; + u8 first_vlan_id[0xc]; +}; + +struct mlx5_ifc_ste_eth_l3_ipv4_misc_v1_bits { + u8 identification[0x10]; + u8 flags[0x3]; + u8 fragment_offset[0xd]; + + u8 total_length[0x10]; + u8 checksum[0x10]; + + u8 version[0x4]; + u8 ihl[0x4]; + u8 time_to_live[0x8]; + u8 reserved_at_50[0x10]; + + u8 reserved_at_60[0x1c]; + u8 voq_internal_prio[0x4]; +}; + +struct mlx5_ifc_ste_eth_l4_v1_bits { + u8 ipv6_version[0x4]; + u8 reserved_at_4[0x4]; + u8 dscp[0x6]; + u8 ecn[0x2]; + u8 ipv6_hop_limit[0x8]; + u8 protocol[0x8]; + + u8 src_port[0x10]; + u8 dst_port[0x10]; + + u8 first_fragment[0x1]; + u8 reserved_at_41[0xb]; + u8 flow_label[0x14]; + + u8 tcp_data_offset[0x4]; + u8 l4_ok[0x1]; + u8 l3_ok[0x1]; + u8 fragmented[0x1]; + u8 tcp_ns[0x1]; + u8 tcp_cwr[0x1]; + u8 tcp_ece[0x1]; + u8 tcp_urg[0x1]; + u8 tcp_ack[0x1]; + u8 tcp_psh[0x1]; + u8 tcp_rst[0x1]; + u8 tcp_syn[0x1]; + u8 tcp_fin[0x1]; + u8 ipv6_paylen[0x10]; +}; + +struct mlx5_ifc_ste_eth_l4_misc_v1_bits { + u8 window_size[0x10]; + u8 urgent_pointer[0x10]; + + u8 ack_num[0x20]; + + u8 seq_num[0x20]; + + u8 length[0x10]; + u8 checksum[0x10]; +}; + +struct mlx5_ifc_ste_mpls_v1_bits { + u8 reserved_at_0[0x15]; + u8 mpls_ok[0x1]; + u8 mpls4_s_bit[0x1]; + u8 mpls4_qualifier[0x1]; + u8 mpls3_s_bit[0x1]; + u8 mpls3_qualifier[0x1]; + u8 mpls2_s_bit[0x1]; + u8 mpls2_qualifier[0x1]; + u8 mpls1_s_bit[0x1]; + u8 mpls1_qualifier[0x1]; + u8 mpls0_s_bit[0x1]; + u8 mpls0_qualifier[0x1]; + + u8 mpls0_label[0x14]; + u8 mpls0_exp[0x3]; + u8 mpls0_s_bos[0x1]; + u8 mpls0_ttl[0x8]; + + u8 mpls1_label[0x20]; + + u8 mpls2_label[0x20]; +}; + +struct mlx5_ifc_ste_gre_v1_bits { + u8 gre_c_present[0x1]; + u8 reserved_at_1[0x1]; + u8 gre_k_present[0x1]; + u8 gre_s_present[0x1]; + u8 strict_src_route[0x1]; + u8 recur[0x3]; + u8 flags[0x5]; + u8 version[0x3]; + u8 gre_protocol[0x10]; + + u8 reserved_at_20[0x20]; + + u8 gre_key_h[0x18]; + u8 gre_key_l[0x8]; + + u8 reserved_at_60[0x20]; +}; + +struct mlx5_ifc_ste_src_gvmi_qp_v1_bits { + u8 loopback_synd[0x8]; + u8 reserved_at_8[0x7]; + u8 functional_lb[0x1]; + u8 source_gvmi[0x10]; + + u8 force_lb[0x1]; + u8 reserved_at_21[0x1]; + u8 source_is_requestor[0x1]; + u8 reserved_at_23[0x5]; + u8 source_qp[0x18]; + + u8 reserved_at_40[0x20]; + + u8 reserved_at_60[0x20]; +}; + +struct mlx5_ifc_ste_icmp_v1_bits { + u8 icmp_payload_data[0x20]; + + u8 icmp_header_data[0x20]; + + u8 icmp_type[0x8]; + u8 icmp_code[0x8]; + u8 reserved_at_50[0x10]; + + u8 reserved_at_60[0x20]; +}; + +#endif /* MLX5_IFC_DR_STE_V1_H */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h new file mode 100644 index 000000000..226a0d7bb --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h @@ -0,0 +1,192 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2019, Mellanox Technologies */ + +#ifndef _MLX5DR_H_ +#define _MLX5DR_H_ + +struct mlx5dr_domain; +struct mlx5dr_table; +struct mlx5dr_matcher; +struct mlx5dr_rule; +struct mlx5dr_action; + +enum mlx5dr_domain_type { + MLX5DR_DOMAIN_TYPE_NIC_RX, + MLX5DR_DOMAIN_TYPE_NIC_TX, + MLX5DR_DOMAIN_TYPE_FDB, +}; + +enum mlx5dr_domain_sync_flags { + MLX5DR_DOMAIN_SYNC_FLAGS_SW = 1 << 0, + MLX5DR_DOMAIN_SYNC_FLAGS_HW = 1 << 1, +}; + +enum mlx5dr_action_reformat_type { + DR_ACTION_REFORMAT_TYP_TNL_L2_TO_L2, + DR_ACTION_REFORMAT_TYP_L2_TO_TNL_L2, + DR_ACTION_REFORMAT_TYP_TNL_L3_TO_L2, + DR_ACTION_REFORMAT_TYP_L2_TO_TNL_L3, + DR_ACTION_REFORMAT_TYP_INSERT_HDR, + DR_ACTION_REFORMAT_TYP_REMOVE_HDR, +}; + +struct mlx5dr_match_parameters { + size_t match_sz; + u64 *match_buf; /* Device spec format */ +}; + +struct mlx5dr_action_dest { + struct mlx5dr_action *dest; + struct mlx5dr_action *reformat; +}; + +struct mlx5dr_domain * +mlx5dr_domain_create(struct mlx5_core_dev *mdev, enum mlx5dr_domain_type type); + +int mlx5dr_domain_destroy(struct mlx5dr_domain *domain); + +int mlx5dr_domain_sync(struct mlx5dr_domain *domain, u32 flags); + +void mlx5dr_domain_set_peer(struct mlx5dr_domain *dmn, + struct mlx5dr_domain *peer_dmn); + +struct mlx5dr_table * +mlx5dr_table_create(struct mlx5dr_domain *domain, u32 level, u32 flags, + u16 uid); + +struct mlx5dr_table * +mlx5dr_table_get_from_fs_ft(struct mlx5_flow_table *ft); + +int mlx5dr_table_destroy(struct mlx5dr_table *table); + +u32 mlx5dr_table_get_id(struct mlx5dr_table *table); + +struct mlx5dr_matcher * +mlx5dr_matcher_create(struct mlx5dr_table *table, + u32 priority, + u8 match_criteria_enable, + struct mlx5dr_match_parameters *mask); + +int mlx5dr_matcher_destroy(struct mlx5dr_matcher *matcher); + +struct mlx5dr_rule * +mlx5dr_rule_create(struct mlx5dr_matcher *matcher, + struct mlx5dr_match_parameters *value, + size_t num_actions, + struct mlx5dr_action *actions[], + u32 flow_source); + +int mlx5dr_rule_destroy(struct mlx5dr_rule *rule); + +int mlx5dr_table_set_miss_action(struct mlx5dr_table *tbl, + struct mlx5dr_action *action); + +struct mlx5dr_action * +mlx5dr_action_create_dest_table_num(struct mlx5dr_domain *dmn, u32 table_num); + +struct mlx5dr_action * +mlx5dr_action_create_dest_table(struct mlx5dr_table *table); + +struct mlx5dr_action * +mlx5dr_action_create_dest_flow_fw_table(struct mlx5dr_domain *domain, + struct mlx5_flow_table *ft); + +struct mlx5dr_action * +mlx5dr_action_create_dest_vport(struct mlx5dr_domain *domain, + u16 vport, u8 vhca_id_valid, + u16 vhca_id); + +struct mlx5dr_action * +mlx5dr_action_create_mult_dest_tbl(struct mlx5dr_domain *dmn, + struct mlx5dr_action_dest *dests, + u32 num_of_dests, + bool ignore_flow_level, + u32 flow_source); + +struct mlx5dr_action *mlx5dr_action_create_drop(void); + +struct mlx5dr_action *mlx5dr_action_create_tag(u32 tag_value); + +struct mlx5dr_action * +mlx5dr_action_create_flow_sampler(struct mlx5dr_domain *dmn, u32 sampler_id); + +struct mlx5dr_action * +mlx5dr_action_create_flow_counter(u32 counter_id); + +struct mlx5dr_action * +mlx5dr_action_create_packet_reformat(struct mlx5dr_domain *dmn, + enum mlx5dr_action_reformat_type reformat_type, + u8 reformat_param_0, + u8 reformat_param_1, + size_t data_sz, + void *data); + +struct mlx5dr_action * +mlx5dr_action_create_modify_header(struct mlx5dr_domain *domain, + u32 flags, + size_t actions_sz, + __be64 actions[]); + +struct mlx5dr_action *mlx5dr_action_create_pop_vlan(void); + +struct mlx5dr_action * +mlx5dr_action_create_push_vlan(struct mlx5dr_domain *domain, __be32 vlan_hdr); + +struct mlx5dr_action * +mlx5dr_action_create_aso(struct mlx5dr_domain *dmn, + u32 obj_id, + u8 return_reg_id, + u8 aso_type, + u8 init_color, + u8 meter_id); + +int mlx5dr_action_destroy(struct mlx5dr_action *action); + +static inline bool +mlx5dr_is_supported(struct mlx5_core_dev *dev) +{ + return MLX5_CAP_GEN(dev, roce) && + (MLX5_CAP_ESW_FLOWTABLE_FDB(dev, sw_owner) || + (MLX5_CAP_ESW_FLOWTABLE_FDB(dev, sw_owner_v2) && + (MLX5_CAP_GEN(dev, steering_format_version) <= + MLX5_STEERING_FORMAT_CONNECTX_7))); +} + +/* buddy functions & structure */ + +struct mlx5dr_icm_mr; + +struct mlx5dr_icm_buddy_mem { + unsigned long **bitmap; + unsigned int *num_free; + u32 max_order; + struct list_head list_node; + struct mlx5dr_icm_mr *icm_mr; + struct mlx5dr_icm_pool *pool; + + /* This is the list of used chunks. HW may be accessing this memory */ + struct list_head used_list; + u64 used_memory; + + /* Hardware may be accessing this memory but at some future, + * undetermined time, it might cease to do so. + * sync_ste command sets them free. + */ + struct list_head hot_list; + + /* Memory optimisation */ + struct mlx5dr_ste *ste_arr; + struct list_head *miss_list; + u8 *hw_ste_arr; +}; + +int mlx5dr_buddy_init(struct mlx5dr_icm_buddy_mem *buddy, + unsigned int max_order); +void mlx5dr_buddy_cleanup(struct mlx5dr_icm_buddy_mem *buddy); +int mlx5dr_buddy_alloc_mem(struct mlx5dr_icm_buddy_mem *buddy, + unsigned int order, + unsigned int *segment); +void mlx5dr_buddy_free_mem(struct mlx5dr_icm_buddy_mem *buddy, + unsigned int seg, unsigned int order); + +#endif /* _MLX5DR_H_ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/transobj.c b/drivers/net/ethernet/mellanox/mlx5/core/transobj.c new file mode 100644 index 000000000..b6931bbe5 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/transobj.c @@ -0,0 +1,505 @@ +/* + * Copyright (c) 2013-2015, Mellanox Technologies, Ltd. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/mlx5/driver.h> +#include "mlx5_core.h" +#include <linux/mlx5/transobj.h> + +int mlx5_core_alloc_transport_domain(struct mlx5_core_dev *dev, u32 *tdn) +{ + u32 out[MLX5_ST_SZ_DW(alloc_transport_domain_out)] = {}; + u32 in[MLX5_ST_SZ_DW(alloc_transport_domain_in)] = {}; + int err; + + MLX5_SET(alloc_transport_domain_in, in, opcode, + MLX5_CMD_OP_ALLOC_TRANSPORT_DOMAIN); + + err = mlx5_cmd_exec_inout(dev, alloc_transport_domain, in, out); + if (!err) + *tdn = MLX5_GET(alloc_transport_domain_out, out, + transport_domain); + + return err; +} +EXPORT_SYMBOL(mlx5_core_alloc_transport_domain); + +void mlx5_core_dealloc_transport_domain(struct mlx5_core_dev *dev, u32 tdn) +{ + u32 in[MLX5_ST_SZ_DW(dealloc_transport_domain_in)] = {}; + + MLX5_SET(dealloc_transport_domain_in, in, opcode, + MLX5_CMD_OP_DEALLOC_TRANSPORT_DOMAIN); + MLX5_SET(dealloc_transport_domain_in, in, transport_domain, tdn); + mlx5_cmd_exec_in(dev, dealloc_transport_domain, in); +} +EXPORT_SYMBOL(mlx5_core_dealloc_transport_domain); + +int mlx5_core_create_rq(struct mlx5_core_dev *dev, u32 *in, int inlen, u32 *rqn) +{ + u32 out[MLX5_ST_SZ_DW(create_rq_out)] = {}; + int err; + + MLX5_SET(create_rq_in, in, opcode, MLX5_CMD_OP_CREATE_RQ); + err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); + if (!err) + *rqn = MLX5_GET(create_rq_out, out, rqn); + + return err; +} +EXPORT_SYMBOL(mlx5_core_create_rq); + +int mlx5_core_modify_rq(struct mlx5_core_dev *dev, u32 rqn, u32 *in) +{ + MLX5_SET(modify_rq_in, in, rqn, rqn); + MLX5_SET(modify_rq_in, in, opcode, MLX5_CMD_OP_MODIFY_RQ); + + return mlx5_cmd_exec_in(dev, modify_rq, in); +} +EXPORT_SYMBOL(mlx5_core_modify_rq); + +void mlx5_core_destroy_rq(struct mlx5_core_dev *dev, u32 rqn) +{ + u32 in[MLX5_ST_SZ_DW(destroy_rq_in)] = {}; + + MLX5_SET(destroy_rq_in, in, opcode, MLX5_CMD_OP_DESTROY_RQ); + MLX5_SET(destroy_rq_in, in, rqn, rqn); + mlx5_cmd_exec_in(dev, destroy_rq, in); +} +EXPORT_SYMBOL(mlx5_core_destroy_rq); + +int mlx5_core_query_rq(struct mlx5_core_dev *dev, u32 rqn, u32 *out) +{ + u32 in[MLX5_ST_SZ_DW(query_rq_in)] = {}; + + MLX5_SET(query_rq_in, in, opcode, MLX5_CMD_OP_QUERY_RQ); + MLX5_SET(query_rq_in, in, rqn, rqn); + + return mlx5_cmd_exec_inout(dev, query_rq, in, out); +} +EXPORT_SYMBOL(mlx5_core_query_rq); + +int mlx5_core_create_sq(struct mlx5_core_dev *dev, u32 *in, int inlen, u32 *sqn) +{ + u32 out[MLX5_ST_SZ_DW(create_sq_out)] = {}; + int err; + + MLX5_SET(create_sq_in, in, opcode, MLX5_CMD_OP_CREATE_SQ); + err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); + if (!err) + *sqn = MLX5_GET(create_sq_out, out, sqn); + + return err; +} + +int mlx5_core_modify_sq(struct mlx5_core_dev *dev, u32 sqn, u32 *in) +{ + MLX5_SET(modify_sq_in, in, sqn, sqn); + MLX5_SET(modify_sq_in, in, opcode, MLX5_CMD_OP_MODIFY_SQ); + return mlx5_cmd_exec_in(dev, modify_sq, in); +} +EXPORT_SYMBOL(mlx5_core_modify_sq); + +void mlx5_core_destroy_sq(struct mlx5_core_dev *dev, u32 sqn) +{ + u32 in[MLX5_ST_SZ_DW(destroy_sq_in)] = {}; + + MLX5_SET(destroy_sq_in, in, opcode, MLX5_CMD_OP_DESTROY_SQ); + MLX5_SET(destroy_sq_in, in, sqn, sqn); + mlx5_cmd_exec_in(dev, destroy_sq, in); +} + +int mlx5_core_query_sq(struct mlx5_core_dev *dev, u32 sqn, u32 *out) +{ + u32 in[MLX5_ST_SZ_DW(query_sq_in)] = {}; + + MLX5_SET(query_sq_in, in, opcode, MLX5_CMD_OP_QUERY_SQ); + MLX5_SET(query_sq_in, in, sqn, sqn); + return mlx5_cmd_exec_inout(dev, query_sq, in, out); +} +EXPORT_SYMBOL(mlx5_core_query_sq); + +int mlx5_core_query_sq_state(struct mlx5_core_dev *dev, u32 sqn, u8 *state) +{ + void *out; + void *sqc; + int inlen; + int err; + + inlen = MLX5_ST_SZ_BYTES(query_sq_out); + out = kvzalloc(inlen, GFP_KERNEL); + if (!out) + return -ENOMEM; + + err = mlx5_core_query_sq(dev, sqn, out); + if (err) + goto out; + + sqc = MLX5_ADDR_OF(query_sq_out, out, sq_context); + *state = MLX5_GET(sqc, sqc, state); + +out: + kvfree(out); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_core_query_sq_state); + +int mlx5_core_create_tir(struct mlx5_core_dev *dev, u32 *in, u32 *tirn) +{ + u32 out[MLX5_ST_SZ_DW(create_tir_out)] = {}; + int err; + + MLX5_SET(create_tir_in, in, opcode, MLX5_CMD_OP_CREATE_TIR); + err = mlx5_cmd_exec_inout(dev, create_tir, in, out); + if (!err) + *tirn = MLX5_GET(create_tir_out, out, tirn); + + return err; +} +EXPORT_SYMBOL(mlx5_core_create_tir); + +int mlx5_core_modify_tir(struct mlx5_core_dev *dev, u32 tirn, u32 *in) +{ + MLX5_SET(modify_tir_in, in, tirn, tirn); + MLX5_SET(modify_tir_in, in, opcode, MLX5_CMD_OP_MODIFY_TIR); + return mlx5_cmd_exec_in(dev, modify_tir, in); +} + +void mlx5_core_destroy_tir(struct mlx5_core_dev *dev, u32 tirn) +{ + u32 in[MLX5_ST_SZ_DW(destroy_tir_in)] = {}; + + MLX5_SET(destroy_tir_in, in, opcode, MLX5_CMD_OP_DESTROY_TIR); + MLX5_SET(destroy_tir_in, in, tirn, tirn); + mlx5_cmd_exec_in(dev, destroy_tir, in); +} +EXPORT_SYMBOL(mlx5_core_destroy_tir); + +int mlx5_core_create_tis(struct mlx5_core_dev *dev, u32 *in, u32 *tisn) +{ + u32 out[MLX5_ST_SZ_DW(create_tis_out)] = {}; + int err; + + MLX5_SET(create_tis_in, in, opcode, MLX5_CMD_OP_CREATE_TIS); + err = mlx5_cmd_exec_inout(dev, create_tis, in, out); + if (!err) + *tisn = MLX5_GET(create_tis_out, out, tisn); + + return err; +} +EXPORT_SYMBOL(mlx5_core_create_tis); + +int mlx5_core_modify_tis(struct mlx5_core_dev *dev, u32 tisn, u32 *in) +{ + MLX5_SET(modify_tis_in, in, tisn, tisn); + MLX5_SET(modify_tis_in, in, opcode, MLX5_CMD_OP_MODIFY_TIS); + + return mlx5_cmd_exec_in(dev, modify_tis, in); +} +EXPORT_SYMBOL(mlx5_core_modify_tis); + +void mlx5_core_destroy_tis(struct mlx5_core_dev *dev, u32 tisn) +{ + u32 in[MLX5_ST_SZ_DW(destroy_tis_in)] = {}; + + MLX5_SET(destroy_tis_in, in, opcode, MLX5_CMD_OP_DESTROY_TIS); + MLX5_SET(destroy_tis_in, in, tisn, tisn); + mlx5_cmd_exec_in(dev, destroy_tis, in); +} +EXPORT_SYMBOL(mlx5_core_destroy_tis); + +int mlx5_core_create_rqt(struct mlx5_core_dev *dev, u32 *in, int inlen, + u32 *rqtn) +{ + u32 out[MLX5_ST_SZ_DW(create_rqt_out)] = {}; + int err; + + MLX5_SET(create_rqt_in, in, opcode, MLX5_CMD_OP_CREATE_RQT); + err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); + if (!err) + *rqtn = MLX5_GET(create_rqt_out, out, rqtn); + + return err; +} +EXPORT_SYMBOL(mlx5_core_create_rqt); + +int mlx5_core_modify_rqt(struct mlx5_core_dev *dev, u32 rqtn, u32 *in, + int inlen) +{ + u32 out[MLX5_ST_SZ_DW(modify_rqt_out)] = {}; + + MLX5_SET(modify_rqt_in, in, rqtn, rqtn); + MLX5_SET(modify_rqt_in, in, opcode, MLX5_CMD_OP_MODIFY_RQT); + return mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); +} + +void mlx5_core_destroy_rqt(struct mlx5_core_dev *dev, u32 rqtn) +{ + u32 in[MLX5_ST_SZ_DW(destroy_rqt_in)] = {}; + + MLX5_SET(destroy_rqt_in, in, opcode, MLX5_CMD_OP_DESTROY_RQT); + MLX5_SET(destroy_rqt_in, in, rqtn, rqtn); + mlx5_cmd_exec_in(dev, destroy_rqt, in); +} +EXPORT_SYMBOL(mlx5_core_destroy_rqt); + +static int mlx5_hairpin_create_rq(struct mlx5_core_dev *mdev, + struct mlx5_hairpin_params *params, u32 *rqn) +{ + u32 in[MLX5_ST_SZ_DW(create_rq_in)] = {0}; + void *rqc, *wq; + + rqc = MLX5_ADDR_OF(create_rq_in, in, ctx); + wq = MLX5_ADDR_OF(rqc, rqc, wq); + + MLX5_SET(rqc, rqc, hairpin, 1); + MLX5_SET(rqc, rqc, state, MLX5_RQC_STATE_RST); + MLX5_SET(rqc, rqc, counter_set_id, params->q_counter); + + MLX5_SET(wq, wq, log_hairpin_data_sz, params->log_data_size); + MLX5_SET(wq, wq, log_hairpin_num_packets, params->log_num_packets); + + return mlx5_core_create_rq(mdev, in, MLX5_ST_SZ_BYTES(create_rq_in), rqn); +} + +static int mlx5_hairpin_create_sq(struct mlx5_core_dev *mdev, + struct mlx5_hairpin_params *params, u32 *sqn) +{ + u32 in[MLX5_ST_SZ_DW(create_sq_in)] = {0}; + void *sqc, *wq; + + sqc = MLX5_ADDR_OF(create_sq_in, in, ctx); + wq = MLX5_ADDR_OF(sqc, sqc, wq); + + MLX5_SET(sqc, sqc, hairpin, 1); + MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RST); + + MLX5_SET(wq, wq, log_hairpin_data_sz, params->log_data_size); + MLX5_SET(wq, wq, log_hairpin_num_packets, params->log_num_packets); + + return mlx5_core_create_sq(mdev, in, MLX5_ST_SZ_BYTES(create_sq_in), sqn); +} + +static int mlx5_hairpin_create_queues(struct mlx5_hairpin *hp, + struct mlx5_hairpin_params *params) +{ + int i, j, err; + + for (i = 0; i < hp->num_channels; i++) { + err = mlx5_hairpin_create_rq(hp->func_mdev, params, &hp->rqn[i]); + if (err) + goto out_err_rq; + } + + for (i = 0; i < hp->num_channels; i++) { + err = mlx5_hairpin_create_sq(hp->peer_mdev, params, &hp->sqn[i]); + if (err) + goto out_err_sq; + } + + return 0; + +out_err_sq: + for (j = 0; j < i; j++) + mlx5_core_destroy_sq(hp->peer_mdev, hp->sqn[j]); + i = hp->num_channels; +out_err_rq: + for (j = 0; j < i; j++) + mlx5_core_destroy_rq(hp->func_mdev, hp->rqn[j]); + return err; +} + +static void mlx5_hairpin_destroy_queues(struct mlx5_hairpin *hp) +{ + int i; + + for (i = 0; i < hp->num_channels; i++) { + mlx5_core_destroy_rq(hp->func_mdev, hp->rqn[i]); + if (!hp->peer_gone) + mlx5_core_destroy_sq(hp->peer_mdev, hp->sqn[i]); + } +} + +static int mlx5_hairpin_modify_rq(struct mlx5_core_dev *func_mdev, u32 rqn, + int curr_state, int next_state, + u16 peer_vhca, u32 peer_sq) +{ + u32 in[MLX5_ST_SZ_DW(modify_rq_in)] = {}; + void *rqc; + + rqc = MLX5_ADDR_OF(modify_rq_in, in, ctx); + + if (next_state == MLX5_RQC_STATE_RDY) { + MLX5_SET(rqc, rqc, hairpin_peer_sq, peer_sq); + MLX5_SET(rqc, rqc, hairpin_peer_vhca, peer_vhca); + } + + MLX5_SET(modify_rq_in, in, rq_state, curr_state); + MLX5_SET(rqc, rqc, state, next_state); + + return mlx5_core_modify_rq(func_mdev, rqn, in); +} + +static int mlx5_hairpin_modify_sq(struct mlx5_core_dev *peer_mdev, u32 sqn, + int curr_state, int next_state, + u16 peer_vhca, u32 peer_rq) +{ + u32 in[MLX5_ST_SZ_DW(modify_sq_in)] = {0}; + void *sqc; + + sqc = MLX5_ADDR_OF(modify_sq_in, in, ctx); + + if (next_state == MLX5_SQC_STATE_RDY) { + MLX5_SET(sqc, sqc, hairpin_peer_rq, peer_rq); + MLX5_SET(sqc, sqc, hairpin_peer_vhca, peer_vhca); + } + + MLX5_SET(modify_sq_in, in, sq_state, curr_state); + MLX5_SET(sqc, sqc, state, next_state); + + return mlx5_core_modify_sq(peer_mdev, sqn, in); +} + +static int mlx5_hairpin_pair_queues(struct mlx5_hairpin *hp) +{ + int i, j, err; + + /* set peer SQs */ + for (i = 0; i < hp->num_channels; i++) { + err = mlx5_hairpin_modify_sq(hp->peer_mdev, hp->sqn[i], + MLX5_SQC_STATE_RST, MLX5_SQC_STATE_RDY, + MLX5_CAP_GEN(hp->func_mdev, vhca_id), hp->rqn[i]); + if (err) + goto err_modify_sq; + } + + /* set func RQs */ + for (i = 0; i < hp->num_channels; i++) { + err = mlx5_hairpin_modify_rq(hp->func_mdev, hp->rqn[i], + MLX5_RQC_STATE_RST, MLX5_RQC_STATE_RDY, + MLX5_CAP_GEN(hp->peer_mdev, vhca_id), hp->sqn[i]); + if (err) + goto err_modify_rq; + } + + return 0; + +err_modify_rq: + for (j = 0; j < i; j++) + mlx5_hairpin_modify_rq(hp->func_mdev, hp->rqn[j], MLX5_RQC_STATE_RDY, + MLX5_RQC_STATE_RST, 0, 0); + i = hp->num_channels; +err_modify_sq: + for (j = 0; j < i; j++) + mlx5_hairpin_modify_sq(hp->peer_mdev, hp->sqn[j], MLX5_SQC_STATE_RDY, + MLX5_SQC_STATE_RST, 0, 0); + return err; +} + +static void mlx5_hairpin_unpair_peer_sq(struct mlx5_hairpin *hp) +{ + int i; + + for (i = 0; i < hp->num_channels; i++) + mlx5_hairpin_modify_sq(hp->peer_mdev, hp->sqn[i], MLX5_SQC_STATE_RDY, + MLX5_SQC_STATE_RST, 0, 0); +} + +static void mlx5_hairpin_unpair_queues(struct mlx5_hairpin *hp) +{ + int i; + + /* unset func RQs */ + for (i = 0; i < hp->num_channels; i++) + mlx5_hairpin_modify_rq(hp->func_mdev, hp->rqn[i], MLX5_RQC_STATE_RDY, + MLX5_RQC_STATE_RST, 0, 0); + /* unset peer SQs */ + if (!hp->peer_gone) + mlx5_hairpin_unpair_peer_sq(hp); +} + +struct mlx5_hairpin * +mlx5_core_hairpin_create(struct mlx5_core_dev *func_mdev, + struct mlx5_core_dev *peer_mdev, + struct mlx5_hairpin_params *params) +{ + struct mlx5_hairpin *hp; + int size, err; + + size = sizeof(*hp) + params->num_channels * 2 * sizeof(u32); + hp = kzalloc(size, GFP_KERNEL); + if (!hp) + return ERR_PTR(-ENOMEM); + + hp->func_mdev = func_mdev; + hp->peer_mdev = peer_mdev; + hp->num_channels = params->num_channels; + + hp->rqn = (void *)hp + sizeof(*hp); + hp->sqn = hp->rqn + params->num_channels; + + /* alloc and pair func --> peer hairpin */ + err = mlx5_hairpin_create_queues(hp, params); + if (err) + goto err_create_queues; + + err = mlx5_hairpin_pair_queues(hp); + if (err) + goto err_pair_queues; + + return hp; + +err_pair_queues: + mlx5_hairpin_destroy_queues(hp); +err_create_queues: + kfree(hp); + return ERR_PTR(err); +} + +void mlx5_core_hairpin_destroy(struct mlx5_hairpin *hp) +{ + mlx5_hairpin_unpair_queues(hp); + mlx5_hairpin_destroy_queues(hp); + kfree(hp); +} + +void mlx5_core_hairpin_clear_dead_peer(struct mlx5_hairpin *hp) +{ + int i; + + mlx5_hairpin_unpair_peer_sq(hp); + + /* destroy peer SQ */ + for (i = 0; i < hp->num_channels; i++) + mlx5_core_destroy_sq(hp->peer_mdev, hp->sqn[i]); + + hp->peer_gone = true; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/uar.c b/drivers/net/ethernet/mellanox/mlx5/core/uar.c new file mode 100644 index 000000000..8455e79bc --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/uar.c @@ -0,0 +1,328 @@ +/* + * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/kernel.h> +#include <linux/io-mapping.h> +#include <linux/mlx5/driver.h> +#include "mlx5_core.h" + +static int mlx5_cmd_alloc_uar(struct mlx5_core_dev *dev, u32 *uarn) +{ + u32 out[MLX5_ST_SZ_DW(alloc_uar_out)] = {}; + u32 in[MLX5_ST_SZ_DW(alloc_uar_in)] = {}; + int err; + + MLX5_SET(alloc_uar_in, in, opcode, MLX5_CMD_OP_ALLOC_UAR); + err = mlx5_cmd_exec_inout(dev, alloc_uar, in, out); + if (err) + return err; + + *uarn = MLX5_GET(alloc_uar_out, out, uar); + return 0; +} + +static int mlx5_cmd_free_uar(struct mlx5_core_dev *dev, u32 uarn) +{ + u32 in[MLX5_ST_SZ_DW(dealloc_uar_in)] = {}; + + MLX5_SET(dealloc_uar_in, in, opcode, MLX5_CMD_OP_DEALLOC_UAR); + MLX5_SET(dealloc_uar_in, in, uar, uarn); + return mlx5_cmd_exec_in(dev, dealloc_uar, in); +} + +static int uars_per_sys_page(struct mlx5_core_dev *mdev) +{ + if (MLX5_CAP_GEN(mdev, uar_4k)) + return MLX5_CAP_GEN(mdev, num_of_uars_per_page); + + return 1; +} + +static u64 uar2pfn(struct mlx5_core_dev *mdev, u32 index) +{ + u32 system_page_index; + + if (MLX5_CAP_GEN(mdev, uar_4k)) + system_page_index = index >> (PAGE_SHIFT - MLX5_ADAPTER_PAGE_SHIFT); + else + system_page_index = index; + + return (mdev->bar_addr >> PAGE_SHIFT) + system_page_index; +} + +static void up_rel_func(struct kref *kref) +{ + struct mlx5_uars_page *up = container_of(kref, struct mlx5_uars_page, ref_count); + + list_del(&up->list); + iounmap(up->map); + if (mlx5_cmd_free_uar(up->mdev, up->index)) + mlx5_core_warn(up->mdev, "failed to free uar index %d\n", up->index); + bitmap_free(up->reg_bitmap); + bitmap_free(up->fp_bitmap); + kfree(up); +} + +static struct mlx5_uars_page *alloc_uars_page(struct mlx5_core_dev *mdev, + bool map_wc) +{ + struct mlx5_uars_page *up; + int err = -ENOMEM; + phys_addr_t pfn; + int bfregs; + int node; + int i; + + bfregs = uars_per_sys_page(mdev) * MLX5_BFREGS_PER_UAR; + node = mdev->priv.numa_node; + up = kzalloc_node(sizeof(*up), GFP_KERNEL, node); + if (!up) + return ERR_PTR(err); + + up->mdev = mdev; + up->reg_bitmap = bitmap_zalloc_node(bfregs, GFP_KERNEL, node); + if (!up->reg_bitmap) + goto error1; + + up->fp_bitmap = bitmap_zalloc_node(bfregs, GFP_KERNEL, node); + if (!up->fp_bitmap) + goto error1; + + for (i = 0; i < bfregs; i++) + if ((i % MLX5_BFREGS_PER_UAR) < MLX5_NON_FP_BFREGS_PER_UAR) + set_bit(i, up->reg_bitmap); + else + set_bit(i, up->fp_bitmap); + + up->bfregs = bfregs; + up->fp_avail = bfregs * MLX5_FP_BFREGS_PER_UAR / MLX5_BFREGS_PER_UAR; + up->reg_avail = bfregs * MLX5_NON_FP_BFREGS_PER_UAR / MLX5_BFREGS_PER_UAR; + + err = mlx5_cmd_alloc_uar(mdev, &up->index); + if (err) { + mlx5_core_warn(mdev, "mlx5_cmd_alloc_uar() failed, %d\n", err); + goto error1; + } + + pfn = uar2pfn(mdev, up->index); + if (map_wc) { + up->map = ioremap_wc(pfn << PAGE_SHIFT, PAGE_SIZE); + if (!up->map) { + err = -EAGAIN; + goto error2; + } + } else { + up->map = ioremap(pfn << PAGE_SHIFT, PAGE_SIZE); + if (!up->map) { + err = -ENOMEM; + goto error2; + } + } + kref_init(&up->ref_count); + mlx5_core_dbg(mdev, "allocated UAR page: index %d, total bfregs %d\n", + up->index, up->bfregs); + return up; + +error2: + if (mlx5_cmd_free_uar(mdev, up->index)) + mlx5_core_warn(mdev, "failed to free uar index %d\n", up->index); +error1: + bitmap_free(up->fp_bitmap); + bitmap_free(up->reg_bitmap); + kfree(up); + return ERR_PTR(err); +} + +struct mlx5_uars_page *mlx5_get_uars_page(struct mlx5_core_dev *mdev) +{ + struct mlx5_uars_page *ret; + + mutex_lock(&mdev->priv.bfregs.reg_head.lock); + if (!list_empty(&mdev->priv.bfregs.reg_head.list)) { + ret = list_first_entry(&mdev->priv.bfregs.reg_head.list, + struct mlx5_uars_page, list); + kref_get(&ret->ref_count); + goto out; + } + ret = alloc_uars_page(mdev, false); + if (IS_ERR(ret)) + goto out; + list_add(&ret->list, &mdev->priv.bfregs.reg_head.list); +out: + mutex_unlock(&mdev->priv.bfregs.reg_head.lock); + + return ret; +} +EXPORT_SYMBOL(mlx5_get_uars_page); + +void mlx5_put_uars_page(struct mlx5_core_dev *mdev, struct mlx5_uars_page *up) +{ + mutex_lock(&mdev->priv.bfregs.reg_head.lock); + kref_put(&up->ref_count, up_rel_func); + mutex_unlock(&mdev->priv.bfregs.reg_head.lock); +} +EXPORT_SYMBOL(mlx5_put_uars_page); + +static unsigned long map_offset(struct mlx5_core_dev *mdev, int dbi) +{ + /* return the offset in bytes from the start of the page to the + * blue flame area of the UAR + */ + return dbi / MLX5_BFREGS_PER_UAR * MLX5_ADAPTER_PAGE_SIZE + + (dbi % MLX5_BFREGS_PER_UAR) * + (1 << MLX5_CAP_GEN(mdev, log_bf_reg_size)) + MLX5_BF_OFFSET; +} + +static int alloc_bfreg(struct mlx5_core_dev *mdev, struct mlx5_sq_bfreg *bfreg, + bool map_wc, bool fast_path) +{ + struct mlx5_bfreg_data *bfregs; + struct mlx5_uars_page *up; + struct list_head *head; + unsigned long *bitmap; + unsigned int *avail; + struct mutex *lock; /* pointer to right mutex */ + int dbi; + + bfregs = &mdev->priv.bfregs; + if (map_wc) { + head = &bfregs->wc_head.list; + lock = &bfregs->wc_head.lock; + } else { + head = &bfregs->reg_head.list; + lock = &bfregs->reg_head.lock; + } + mutex_lock(lock); + if (list_empty(head)) { + up = alloc_uars_page(mdev, map_wc); + if (IS_ERR(up)) { + mutex_unlock(lock); + return PTR_ERR(up); + } + list_add(&up->list, head); + } else { + up = list_entry(head->next, struct mlx5_uars_page, list); + kref_get(&up->ref_count); + } + if (fast_path) { + bitmap = up->fp_bitmap; + avail = &up->fp_avail; + } else { + bitmap = up->reg_bitmap; + avail = &up->reg_avail; + } + dbi = find_first_bit(bitmap, up->bfregs); + clear_bit(dbi, bitmap); + (*avail)--; + if (!(*avail)) + list_del(&up->list); + + bfreg->map = up->map + map_offset(mdev, dbi); + bfreg->up = up; + bfreg->wc = map_wc; + bfreg->index = up->index + dbi / MLX5_BFREGS_PER_UAR; + mutex_unlock(lock); + + return 0; +} + +int mlx5_alloc_bfreg(struct mlx5_core_dev *mdev, struct mlx5_sq_bfreg *bfreg, + bool map_wc, bool fast_path) +{ + int err; + + err = alloc_bfreg(mdev, bfreg, map_wc, fast_path); + if (!err) + return 0; + + if (err == -EAGAIN && map_wc) + return alloc_bfreg(mdev, bfreg, false, fast_path); + + return err; +} +EXPORT_SYMBOL(mlx5_alloc_bfreg); + +static unsigned int addr_to_dbi_in_syspage(struct mlx5_core_dev *dev, + struct mlx5_uars_page *up, + struct mlx5_sq_bfreg *bfreg) +{ + unsigned int uar_idx; + unsigned int bfreg_idx; + unsigned int bf_reg_size; + + bf_reg_size = 1 << MLX5_CAP_GEN(dev, log_bf_reg_size); + + uar_idx = (bfreg->map - up->map) >> MLX5_ADAPTER_PAGE_SHIFT; + bfreg_idx = (((uintptr_t)bfreg->map % MLX5_ADAPTER_PAGE_SIZE) - MLX5_BF_OFFSET) / bf_reg_size; + + return uar_idx * MLX5_BFREGS_PER_UAR + bfreg_idx; +} + +void mlx5_free_bfreg(struct mlx5_core_dev *mdev, struct mlx5_sq_bfreg *bfreg) +{ + struct mlx5_bfreg_data *bfregs; + struct mlx5_uars_page *up; + struct mutex *lock; /* pointer to right mutex */ + unsigned int dbi; + bool fp; + unsigned int *avail; + unsigned long *bitmap; + struct list_head *head; + + bfregs = &mdev->priv.bfregs; + if (bfreg->wc) { + head = &bfregs->wc_head.list; + lock = &bfregs->wc_head.lock; + } else { + head = &bfregs->reg_head.list; + lock = &bfregs->reg_head.lock; + } + up = bfreg->up; + dbi = addr_to_dbi_in_syspage(mdev, up, bfreg); + fp = (dbi % MLX5_BFREGS_PER_UAR) >= MLX5_NON_FP_BFREGS_PER_UAR; + if (fp) { + avail = &up->fp_avail; + bitmap = up->fp_bitmap; + } else { + avail = &up->reg_avail; + bitmap = up->reg_bitmap; + } + mutex_lock(lock); + (*avail)++; + set_bit(dbi, bitmap); + if (*avail == 1) + list_add_tail(&up->list, head); + + kref_put(&up->ref_count, up_rel_func); + mutex_unlock(lock); +} +EXPORT_SYMBOL(mlx5_free_bfreg); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c new file mode 100644 index 000000000..3f68e3198 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c @@ -0,0 +1,1173 @@ +/* + * Copyright (c) 2013-2015, Mellanox Technologies, Ltd. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/export.h> +#include <linux/etherdevice.h> +#include <linux/mlx5/driver.h> +#include <linux/mlx5/vport.h> +#include <linux/mlx5/eswitch.h> +#include "mlx5_core.h" +#include "sf/sf.h" + +/* Mutex to hold while enabling or disabling RoCE */ +static DEFINE_MUTEX(mlx5_roce_en_lock); + +u8 mlx5_query_vport_state(struct mlx5_core_dev *mdev, u8 opmod, u16 vport) +{ + u32 out[MLX5_ST_SZ_DW(query_vport_state_out)] = {}; + u32 in[MLX5_ST_SZ_DW(query_vport_state_in)] = {}; + int err; + + MLX5_SET(query_vport_state_in, in, opcode, + MLX5_CMD_OP_QUERY_VPORT_STATE); + MLX5_SET(query_vport_state_in, in, op_mod, opmod); + MLX5_SET(query_vport_state_in, in, vport_number, vport); + if (vport) + MLX5_SET(query_vport_state_in, in, other_vport, 1); + + err = mlx5_cmd_exec_inout(mdev, query_vport_state, in, out); + if (err) + return 0; + + return MLX5_GET(query_vport_state_out, out, state); +} + +int mlx5_modify_vport_admin_state(struct mlx5_core_dev *mdev, u8 opmod, + u16 vport, u8 other_vport, u8 state) +{ + u32 in[MLX5_ST_SZ_DW(modify_vport_state_in)] = {}; + + MLX5_SET(modify_vport_state_in, in, opcode, + MLX5_CMD_OP_MODIFY_VPORT_STATE); + MLX5_SET(modify_vport_state_in, in, op_mod, opmod); + MLX5_SET(modify_vport_state_in, in, vport_number, vport); + MLX5_SET(modify_vport_state_in, in, other_vport, other_vport); + MLX5_SET(modify_vport_state_in, in, admin_state, state); + + return mlx5_cmd_exec_in(mdev, modify_vport_state, in); +} + +static int mlx5_query_nic_vport_context(struct mlx5_core_dev *mdev, u16 vport, + u32 *out) +{ + u32 in[MLX5_ST_SZ_DW(query_nic_vport_context_in)] = {}; + + MLX5_SET(query_nic_vport_context_in, in, opcode, + MLX5_CMD_OP_QUERY_NIC_VPORT_CONTEXT); + MLX5_SET(query_nic_vport_context_in, in, vport_number, vport); + if (vport) + MLX5_SET(query_nic_vport_context_in, in, other_vport, 1); + + return mlx5_cmd_exec_inout(mdev, query_nic_vport_context, in, out); +} + +int mlx5_query_nic_vport_min_inline(struct mlx5_core_dev *mdev, + u16 vport, u8 *min_inline) +{ + u32 out[MLX5_ST_SZ_DW(query_nic_vport_context_out)] = {}; + int err; + + err = mlx5_query_nic_vport_context(mdev, vport, out); + if (!err) + *min_inline = MLX5_GET(query_nic_vport_context_out, out, + nic_vport_context.min_wqe_inline_mode); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_min_inline); + +void mlx5_query_min_inline(struct mlx5_core_dev *mdev, + u8 *min_inline_mode) +{ + switch (MLX5_CAP_ETH(mdev, wqe_inline_mode)) { + case MLX5_CAP_INLINE_MODE_VPORT_CONTEXT: + if (!mlx5_query_nic_vport_min_inline(mdev, 0, min_inline_mode)) + break; + fallthrough; + case MLX5_CAP_INLINE_MODE_L2: + *min_inline_mode = MLX5_INLINE_MODE_L2; + break; + case MLX5_CAP_INLINE_MODE_NOT_REQUIRED: + *min_inline_mode = MLX5_INLINE_MODE_NONE; + break; + } +} +EXPORT_SYMBOL_GPL(mlx5_query_min_inline); + +int mlx5_modify_nic_vport_min_inline(struct mlx5_core_dev *mdev, + u16 vport, u8 min_inline) +{ + u32 in[MLX5_ST_SZ_DW(modify_nic_vport_context_in)] = {}; + void *nic_vport_ctx; + + MLX5_SET(modify_nic_vport_context_in, in, + field_select.min_inline, 1); + MLX5_SET(modify_nic_vport_context_in, in, vport_number, vport); + MLX5_SET(modify_nic_vport_context_in, in, other_vport, 1); + + nic_vport_ctx = MLX5_ADDR_OF(modify_nic_vport_context_in, + in, nic_vport_context); + MLX5_SET(nic_vport_context, nic_vport_ctx, + min_wqe_inline_mode, min_inline); + MLX5_SET(modify_nic_vport_context_in, in, opcode, + MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT); + + return mlx5_cmd_exec_in(mdev, modify_nic_vport_context, in); +} + +int mlx5_query_nic_vport_mac_address(struct mlx5_core_dev *mdev, + u16 vport, bool other, u8 *addr) +{ + u32 out[MLX5_ST_SZ_DW(query_nic_vport_context_out)] = {}; + u32 in[MLX5_ST_SZ_DW(query_nic_vport_context_in)] = {}; + u8 *out_addr; + int err; + + out_addr = MLX5_ADDR_OF(query_nic_vport_context_out, out, + nic_vport_context.permanent_address); + + MLX5_SET(query_nic_vport_context_in, in, opcode, + MLX5_CMD_OP_QUERY_NIC_VPORT_CONTEXT); + MLX5_SET(query_nic_vport_context_in, in, vport_number, vport); + MLX5_SET(query_nic_vport_context_in, in, other_vport, other); + + err = mlx5_cmd_exec_inout(mdev, query_nic_vport_context, in, out); + if (!err) + ether_addr_copy(addr, &out_addr[2]); + + return err; +} +EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_mac_address); + +int mlx5_query_mac_address(struct mlx5_core_dev *mdev, u8 *addr) +{ + return mlx5_query_nic_vport_mac_address(mdev, 0, false, addr); +} +EXPORT_SYMBOL_GPL(mlx5_query_mac_address); + +int mlx5_modify_nic_vport_mac_address(struct mlx5_core_dev *mdev, + u16 vport, const u8 *addr) +{ + void *in; + int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in); + int err; + void *nic_vport_ctx; + u8 *perm_mac; + + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + MLX5_SET(modify_nic_vport_context_in, in, + field_select.permanent_address, 1); + MLX5_SET(modify_nic_vport_context_in, in, vport_number, vport); + MLX5_SET(modify_nic_vport_context_in, in, other_vport, 1); + + nic_vport_ctx = MLX5_ADDR_OF(modify_nic_vport_context_in, + in, nic_vport_context); + perm_mac = MLX5_ADDR_OF(nic_vport_context, nic_vport_ctx, + permanent_address); + + ether_addr_copy(&perm_mac[2], addr); + MLX5_SET(modify_nic_vport_context_in, in, opcode, + MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT); + + err = mlx5_cmd_exec_in(mdev, modify_nic_vport_context, in); + + kvfree(in); + + return err; +} +EXPORT_SYMBOL_GPL(mlx5_modify_nic_vport_mac_address); + +int mlx5_query_nic_vport_mtu(struct mlx5_core_dev *mdev, u16 *mtu) +{ + int outlen = MLX5_ST_SZ_BYTES(query_nic_vport_context_out); + u32 *out; + int err; + + out = kvzalloc(outlen, GFP_KERNEL); + if (!out) + return -ENOMEM; + + err = mlx5_query_nic_vport_context(mdev, 0, out); + if (!err) + *mtu = MLX5_GET(query_nic_vport_context_out, out, + nic_vport_context.mtu); + + kvfree(out); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_mtu); + +int mlx5_modify_nic_vport_mtu(struct mlx5_core_dev *mdev, u16 mtu) +{ + int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in); + void *in; + int err; + + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + MLX5_SET(modify_nic_vport_context_in, in, field_select.mtu, 1); + MLX5_SET(modify_nic_vport_context_in, in, nic_vport_context.mtu, mtu); + MLX5_SET(modify_nic_vport_context_in, in, opcode, + MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT); + + err = mlx5_cmd_exec_in(mdev, modify_nic_vport_context, in); + + kvfree(in); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_modify_nic_vport_mtu); + +int mlx5_query_nic_vport_mac_list(struct mlx5_core_dev *dev, + u16 vport, + enum mlx5_list_type list_type, + u8 addr_list[][ETH_ALEN], + int *list_size) +{ + u32 in[MLX5_ST_SZ_DW(query_nic_vport_context_in)] = {0}; + void *nic_vport_ctx; + int max_list_size; + int req_list_size; + int out_sz; + void *out; + int err; + int i; + + req_list_size = *list_size; + + max_list_size = list_type == MLX5_NVPRT_LIST_TYPE_UC ? + 1 << MLX5_CAP_GEN(dev, log_max_current_uc_list) : + 1 << MLX5_CAP_GEN(dev, log_max_current_mc_list); + + if (req_list_size > max_list_size) { + mlx5_core_warn(dev, "Requested list size (%d) > (%d) max_list_size\n", + req_list_size, max_list_size); + req_list_size = max_list_size; + } + + out_sz = MLX5_ST_SZ_BYTES(query_nic_vport_context_out) + + req_list_size * MLX5_ST_SZ_BYTES(mac_address_layout); + + out = kvzalloc(out_sz, GFP_KERNEL); + if (!out) + return -ENOMEM; + + MLX5_SET(query_nic_vport_context_in, in, opcode, + MLX5_CMD_OP_QUERY_NIC_VPORT_CONTEXT); + MLX5_SET(query_nic_vport_context_in, in, allowed_list_type, list_type); + MLX5_SET(query_nic_vport_context_in, in, vport_number, vport); + MLX5_SET(query_nic_vport_context_in, in, other_vport, 1); + + err = mlx5_cmd_exec(dev, in, sizeof(in), out, out_sz); + if (err) + goto out; + + nic_vport_ctx = MLX5_ADDR_OF(query_nic_vport_context_out, out, + nic_vport_context); + req_list_size = MLX5_GET(nic_vport_context, nic_vport_ctx, + allowed_list_size); + + *list_size = req_list_size; + for (i = 0; i < req_list_size; i++) { + u8 *mac_addr = MLX5_ADDR_OF(nic_vport_context, + nic_vport_ctx, + current_uc_mac_address[i]) + 2; + ether_addr_copy(addr_list[i], mac_addr); + } +out: + kvfree(out); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_mac_list); + +int mlx5_modify_nic_vport_mac_list(struct mlx5_core_dev *dev, + enum mlx5_list_type list_type, + u8 addr_list[][ETH_ALEN], + int list_size) +{ + u32 out[MLX5_ST_SZ_DW(modify_nic_vport_context_out)] = {}; + void *nic_vport_ctx; + int max_list_size; + int in_sz; + void *in; + int err; + int i; + + max_list_size = list_type == MLX5_NVPRT_LIST_TYPE_UC ? + 1 << MLX5_CAP_GEN(dev, log_max_current_uc_list) : + 1 << MLX5_CAP_GEN(dev, log_max_current_mc_list); + + if (list_size > max_list_size) + return -ENOSPC; + + in_sz = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in) + + list_size * MLX5_ST_SZ_BYTES(mac_address_layout); + + in = kvzalloc(in_sz, GFP_KERNEL); + if (!in) + return -ENOMEM; + + MLX5_SET(modify_nic_vport_context_in, in, opcode, + MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT); + MLX5_SET(modify_nic_vport_context_in, in, + field_select.addresses_list, 1); + + nic_vport_ctx = MLX5_ADDR_OF(modify_nic_vport_context_in, in, + nic_vport_context); + + MLX5_SET(nic_vport_context, nic_vport_ctx, + allowed_list_type, list_type); + MLX5_SET(nic_vport_context, nic_vport_ctx, + allowed_list_size, list_size); + + for (i = 0; i < list_size; i++) { + u8 *curr_mac = MLX5_ADDR_OF(nic_vport_context, + nic_vport_ctx, + current_uc_mac_address[i]) + 2; + ether_addr_copy(curr_mac, addr_list[i]); + } + + err = mlx5_cmd_exec(dev, in, in_sz, out, sizeof(out)); + kvfree(in); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_modify_nic_vport_mac_list); + +int mlx5_modify_nic_vport_vlans(struct mlx5_core_dev *dev, + u16 vlans[], + int list_size) +{ + u32 out[MLX5_ST_SZ_DW(modify_nic_vport_context_out)]; + void *nic_vport_ctx; + int max_list_size; + int in_sz; + void *in; + int err; + int i; + + max_list_size = 1 << MLX5_CAP_GEN(dev, log_max_vlan_list); + + if (list_size > max_list_size) + return -ENOSPC; + + in_sz = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in) + + list_size * MLX5_ST_SZ_BYTES(vlan_layout); + + memset(out, 0, sizeof(out)); + in = kvzalloc(in_sz, GFP_KERNEL); + if (!in) + return -ENOMEM; + + MLX5_SET(modify_nic_vport_context_in, in, opcode, + MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT); + MLX5_SET(modify_nic_vport_context_in, in, + field_select.addresses_list, 1); + + nic_vport_ctx = MLX5_ADDR_OF(modify_nic_vport_context_in, in, + nic_vport_context); + + MLX5_SET(nic_vport_context, nic_vport_ctx, + allowed_list_type, MLX5_NVPRT_LIST_TYPE_VLAN); + MLX5_SET(nic_vport_context, nic_vport_ctx, + allowed_list_size, list_size); + + for (i = 0; i < list_size; i++) { + void *vlan_addr = MLX5_ADDR_OF(nic_vport_context, + nic_vport_ctx, + current_uc_mac_address[i]); + MLX5_SET(vlan_layout, vlan_addr, vlan, vlans[i]); + } + + err = mlx5_cmd_exec(dev, in, in_sz, out, sizeof(out)); + kvfree(in); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_modify_nic_vport_vlans); + +int mlx5_query_nic_vport_system_image_guid(struct mlx5_core_dev *mdev, + u64 *system_image_guid) +{ + u32 *out; + int outlen = MLX5_ST_SZ_BYTES(query_nic_vport_context_out); + int err; + + out = kvzalloc(outlen, GFP_KERNEL); + if (!out) + return -ENOMEM; + + err = mlx5_query_nic_vport_context(mdev, 0, out); + if (err) + goto out; + + *system_image_guid = MLX5_GET64(query_nic_vport_context_out, out, + nic_vport_context.system_image_guid); +out: + kvfree(out); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_system_image_guid); + +int mlx5_query_nic_vport_node_guid(struct mlx5_core_dev *mdev, u64 *node_guid) +{ + u32 *out; + int outlen = MLX5_ST_SZ_BYTES(query_nic_vport_context_out); + + out = kvzalloc(outlen, GFP_KERNEL); + if (!out) + return -ENOMEM; + + mlx5_query_nic_vport_context(mdev, 0, out); + + *node_guid = MLX5_GET64(query_nic_vport_context_out, out, + nic_vport_context.node_guid); + + kvfree(out); + + return 0; +} +EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_node_guid); + +int mlx5_modify_nic_vport_node_guid(struct mlx5_core_dev *mdev, + u16 vport, u64 node_guid) +{ + int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in); + void *nic_vport_context; + void *in; + int err; + + if (!MLX5_CAP_GEN(mdev, vport_group_manager)) + return -EACCES; + + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + MLX5_SET(modify_nic_vport_context_in, in, + field_select.node_guid, 1); + MLX5_SET(modify_nic_vport_context_in, in, vport_number, vport); + MLX5_SET(modify_nic_vport_context_in, in, other_vport, 1); + + nic_vport_context = MLX5_ADDR_OF(modify_nic_vport_context_in, + in, nic_vport_context); + MLX5_SET64(nic_vport_context, nic_vport_context, node_guid, node_guid); + MLX5_SET(modify_nic_vport_context_in, in, opcode, + MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT); + + err = mlx5_cmd_exec_in(mdev, modify_nic_vport_context, in); + + kvfree(in); + + return err; +} + +int mlx5_query_nic_vport_qkey_viol_cntr(struct mlx5_core_dev *mdev, + u16 *qkey_viol_cntr) +{ + u32 *out; + int outlen = MLX5_ST_SZ_BYTES(query_nic_vport_context_out); + + out = kvzalloc(outlen, GFP_KERNEL); + if (!out) + return -ENOMEM; + + mlx5_query_nic_vport_context(mdev, 0, out); + + *qkey_viol_cntr = MLX5_GET(query_nic_vport_context_out, out, + nic_vport_context.qkey_violation_counter); + + kvfree(out); + + return 0; +} +EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_qkey_viol_cntr); + +int mlx5_query_hca_vport_gid(struct mlx5_core_dev *dev, u8 other_vport, + u8 port_num, u16 vf_num, u16 gid_index, + union ib_gid *gid) +{ + int in_sz = MLX5_ST_SZ_BYTES(query_hca_vport_gid_in); + int out_sz = MLX5_ST_SZ_BYTES(query_hca_vport_gid_out); + int is_group_manager; + void *out = NULL; + void *in = NULL; + union ib_gid *tmp; + int tbsz; + int nout; + int err; + + is_group_manager = MLX5_CAP_GEN(dev, vport_group_manager); + tbsz = mlx5_get_gid_table_len(MLX5_CAP_GEN(dev, gid_table_size)); + mlx5_core_dbg(dev, "vf_num %d, index %d, gid_table_size %d\n", + vf_num, gid_index, tbsz); + + if (gid_index > tbsz && gid_index != 0xffff) + return -EINVAL; + + if (gid_index == 0xffff) + nout = tbsz; + else + nout = 1; + + out_sz += nout * sizeof(*gid); + + in = kvzalloc(in_sz, GFP_KERNEL); + out = kvzalloc(out_sz, GFP_KERNEL); + if (!in || !out) { + err = -ENOMEM; + goto out; + } + + MLX5_SET(query_hca_vport_gid_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_VPORT_GID); + if (other_vport) { + if (is_group_manager) { + MLX5_SET(query_hca_vport_gid_in, in, vport_number, vf_num); + MLX5_SET(query_hca_vport_gid_in, in, other_vport, 1); + } else { + err = -EPERM; + goto out; + } + } + MLX5_SET(query_hca_vport_gid_in, in, gid_index, gid_index); + + if (MLX5_CAP_GEN(dev, num_ports) == 2) + MLX5_SET(query_hca_vport_gid_in, in, port_num, port_num); + + err = mlx5_cmd_exec(dev, in, in_sz, out, out_sz); + if (err) + goto out; + + tmp = out + MLX5_ST_SZ_BYTES(query_hca_vport_gid_out); + gid->global.subnet_prefix = tmp->global.subnet_prefix; + gid->global.interface_id = tmp->global.interface_id; + +out: + kvfree(in); + kvfree(out); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_query_hca_vport_gid); + +int mlx5_query_hca_vport_pkey(struct mlx5_core_dev *dev, u8 other_vport, + u8 port_num, u16 vf_num, u16 pkey_index, + u16 *pkey) +{ + int in_sz = MLX5_ST_SZ_BYTES(query_hca_vport_pkey_in); + int out_sz = MLX5_ST_SZ_BYTES(query_hca_vport_pkey_out); + int is_group_manager; + void *out = NULL; + void *in = NULL; + void *pkarr; + int nout; + int tbsz; + int err; + int i; + + is_group_manager = MLX5_CAP_GEN(dev, vport_group_manager); + + tbsz = mlx5_to_sw_pkey_sz(MLX5_CAP_GEN(dev, pkey_table_size)); + if (pkey_index > tbsz && pkey_index != 0xffff) + return -EINVAL; + + if (pkey_index == 0xffff) + nout = tbsz; + else + nout = 1; + + out_sz += nout * MLX5_ST_SZ_BYTES(pkey); + + in = kvzalloc(in_sz, GFP_KERNEL); + out = kvzalloc(out_sz, GFP_KERNEL); + if (!in || !out) { + err = -ENOMEM; + goto out; + } + + MLX5_SET(query_hca_vport_pkey_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_VPORT_PKEY); + if (other_vport) { + if (is_group_manager) { + MLX5_SET(query_hca_vport_pkey_in, in, vport_number, vf_num); + MLX5_SET(query_hca_vport_pkey_in, in, other_vport, 1); + } else { + err = -EPERM; + goto out; + } + } + MLX5_SET(query_hca_vport_pkey_in, in, pkey_index, pkey_index); + + if (MLX5_CAP_GEN(dev, num_ports) == 2) + MLX5_SET(query_hca_vport_pkey_in, in, port_num, port_num); + + err = mlx5_cmd_exec(dev, in, in_sz, out, out_sz); + if (err) + goto out; + + pkarr = MLX5_ADDR_OF(query_hca_vport_pkey_out, out, pkey); + for (i = 0; i < nout; i++, pkey++, pkarr += MLX5_ST_SZ_BYTES(pkey)) + *pkey = MLX5_GET_PR(pkey, pkarr, pkey); + +out: + kvfree(in); + kvfree(out); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_query_hca_vport_pkey); + +int mlx5_query_hca_vport_context(struct mlx5_core_dev *dev, + u8 other_vport, u8 port_num, + u16 vf_num, + struct mlx5_hca_vport_context *rep) +{ + int out_sz = MLX5_ST_SZ_BYTES(query_hca_vport_context_out); + int in[MLX5_ST_SZ_DW(query_hca_vport_context_in)] = {}; + int is_group_manager; + void *out; + void *ctx; + int err; + + is_group_manager = MLX5_CAP_GEN(dev, vport_group_manager); + + out = kvzalloc(out_sz, GFP_KERNEL); + if (!out) + return -ENOMEM; + + MLX5_SET(query_hca_vport_context_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_VPORT_CONTEXT); + + if (other_vport) { + if (is_group_manager) { + MLX5_SET(query_hca_vport_context_in, in, other_vport, 1); + MLX5_SET(query_hca_vport_context_in, in, vport_number, vf_num); + } else { + err = -EPERM; + goto ex; + } + } + + if (MLX5_CAP_GEN(dev, num_ports) == 2) + MLX5_SET(query_hca_vport_context_in, in, port_num, port_num); + + err = mlx5_cmd_exec_inout(dev, query_hca_vport_context, in, out); + if (err) + goto ex; + + ctx = MLX5_ADDR_OF(query_hca_vport_context_out, out, hca_vport_context); + rep->field_select = MLX5_GET_PR(hca_vport_context, ctx, field_select); + rep->sm_virt_aware = MLX5_GET_PR(hca_vport_context, ctx, sm_virt_aware); + rep->has_smi = MLX5_GET_PR(hca_vport_context, ctx, has_smi); + rep->has_raw = MLX5_GET_PR(hca_vport_context, ctx, has_raw); + rep->policy = MLX5_GET_PR(hca_vport_context, ctx, vport_state_policy); + rep->phys_state = MLX5_GET_PR(hca_vport_context, ctx, + port_physical_state); + rep->vport_state = MLX5_GET_PR(hca_vport_context, ctx, vport_state); + rep->port_physical_state = MLX5_GET_PR(hca_vport_context, ctx, + port_physical_state); + rep->port_guid = MLX5_GET64_PR(hca_vport_context, ctx, port_guid); + rep->node_guid = MLX5_GET64_PR(hca_vport_context, ctx, node_guid); + rep->cap_mask1 = MLX5_GET_PR(hca_vport_context, ctx, cap_mask1); + rep->cap_mask1_perm = MLX5_GET_PR(hca_vport_context, ctx, + cap_mask1_field_select); + rep->cap_mask2 = MLX5_GET_PR(hca_vport_context, ctx, cap_mask2); + rep->cap_mask2_perm = MLX5_GET_PR(hca_vport_context, ctx, + cap_mask2_field_select); + rep->lid = MLX5_GET_PR(hca_vport_context, ctx, lid); + rep->init_type_reply = MLX5_GET_PR(hca_vport_context, ctx, + init_type_reply); + rep->lmc = MLX5_GET_PR(hca_vport_context, ctx, lmc); + rep->subnet_timeout = MLX5_GET_PR(hca_vport_context, ctx, + subnet_timeout); + rep->sm_lid = MLX5_GET_PR(hca_vport_context, ctx, sm_lid); + rep->sm_sl = MLX5_GET_PR(hca_vport_context, ctx, sm_sl); + rep->qkey_violation_counter = MLX5_GET_PR(hca_vport_context, ctx, + qkey_violation_counter); + rep->pkey_violation_counter = MLX5_GET_PR(hca_vport_context, ctx, + pkey_violation_counter); + rep->grh_required = MLX5_GET_PR(hca_vport_context, ctx, grh_required); + rep->sys_image_guid = MLX5_GET64_PR(hca_vport_context, ctx, + system_image_guid); + +ex: + kvfree(out); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_query_hca_vport_context); + +int mlx5_query_hca_vport_system_image_guid(struct mlx5_core_dev *dev, + u64 *sys_image_guid) +{ + struct mlx5_hca_vport_context *rep; + int err; + + rep = kvzalloc(sizeof(*rep), GFP_KERNEL); + if (!rep) + return -ENOMEM; + + err = mlx5_query_hca_vport_context(dev, 0, 1, 0, rep); + if (!err) + *sys_image_guid = rep->sys_image_guid; + + kvfree(rep); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_query_hca_vport_system_image_guid); + +int mlx5_query_hca_vport_node_guid(struct mlx5_core_dev *dev, + u64 *node_guid) +{ + struct mlx5_hca_vport_context *rep; + int err; + + rep = kvzalloc(sizeof(*rep), GFP_KERNEL); + if (!rep) + return -ENOMEM; + + err = mlx5_query_hca_vport_context(dev, 0, 1, 0, rep); + if (!err) + *node_guid = rep->node_guid; + + kvfree(rep); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_query_hca_vport_node_guid); + +int mlx5_query_nic_vport_promisc(struct mlx5_core_dev *mdev, + u16 vport, + int *promisc_uc, + int *promisc_mc, + int *promisc_all) +{ + u32 *out; + int outlen = MLX5_ST_SZ_BYTES(query_nic_vport_context_out); + int err; + + out = kvzalloc(outlen, GFP_KERNEL); + if (!out) + return -ENOMEM; + + err = mlx5_query_nic_vport_context(mdev, vport, out); + if (err) + goto out; + + *promisc_uc = MLX5_GET(query_nic_vport_context_out, out, + nic_vport_context.promisc_uc); + *promisc_mc = MLX5_GET(query_nic_vport_context_out, out, + nic_vport_context.promisc_mc); + *promisc_all = MLX5_GET(query_nic_vport_context_out, out, + nic_vport_context.promisc_all); + +out: + kvfree(out); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_promisc); + +int mlx5_modify_nic_vport_promisc(struct mlx5_core_dev *mdev, + int promisc_uc, + int promisc_mc, + int promisc_all) +{ + void *in; + int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in); + int err; + + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + MLX5_SET(modify_nic_vport_context_in, in, field_select.promisc, 1); + MLX5_SET(modify_nic_vport_context_in, in, + nic_vport_context.promisc_uc, promisc_uc); + MLX5_SET(modify_nic_vport_context_in, in, + nic_vport_context.promisc_mc, promisc_mc); + MLX5_SET(modify_nic_vport_context_in, in, + nic_vport_context.promisc_all, promisc_all); + MLX5_SET(modify_nic_vport_context_in, in, opcode, + MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT); + + err = mlx5_cmd_exec_in(mdev, modify_nic_vport_context, in); + + kvfree(in); + + return err; +} +EXPORT_SYMBOL_GPL(mlx5_modify_nic_vport_promisc); + +enum { + UC_LOCAL_LB, + MC_LOCAL_LB +}; + +int mlx5_nic_vport_update_local_lb(struct mlx5_core_dev *mdev, bool enable) +{ + int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in); + void *in; + int err; + + if (!MLX5_CAP_GEN(mdev, disable_local_lb_mc) && + !MLX5_CAP_GEN(mdev, disable_local_lb_uc)) + return 0; + + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + MLX5_SET(modify_nic_vport_context_in, in, + nic_vport_context.disable_mc_local_lb, !enable); + MLX5_SET(modify_nic_vport_context_in, in, + nic_vport_context.disable_uc_local_lb, !enable); + + if (MLX5_CAP_GEN(mdev, disable_local_lb_mc)) + MLX5_SET(modify_nic_vport_context_in, in, + field_select.disable_mc_local_lb, 1); + + if (MLX5_CAP_GEN(mdev, disable_local_lb_uc)) + MLX5_SET(modify_nic_vport_context_in, in, + field_select.disable_uc_local_lb, 1); + MLX5_SET(modify_nic_vport_context_in, in, opcode, + MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT); + + err = mlx5_cmd_exec_in(mdev, modify_nic_vport_context, in); + + if (!err) + mlx5_core_dbg(mdev, "%s local_lb\n", + enable ? "enable" : "disable"); + + kvfree(in); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_nic_vport_update_local_lb); + +int mlx5_nic_vport_query_local_lb(struct mlx5_core_dev *mdev, bool *status) +{ + int outlen = MLX5_ST_SZ_BYTES(query_nic_vport_context_out); + u32 *out; + int value; + int err; + + out = kvzalloc(outlen, GFP_KERNEL); + if (!out) + return -ENOMEM; + + err = mlx5_query_nic_vport_context(mdev, 0, out); + if (err) + goto out; + + value = MLX5_GET(query_nic_vport_context_out, out, + nic_vport_context.disable_mc_local_lb) << MC_LOCAL_LB; + + value |= MLX5_GET(query_nic_vport_context_out, out, + nic_vport_context.disable_uc_local_lb) << UC_LOCAL_LB; + + *status = !value; + +out: + kvfree(out); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_nic_vport_query_local_lb); + +enum mlx5_vport_roce_state { + MLX5_VPORT_ROCE_DISABLED = 0, + MLX5_VPORT_ROCE_ENABLED = 1, +}; + +static int mlx5_nic_vport_update_roce_state(struct mlx5_core_dev *mdev, + enum mlx5_vport_roce_state state) +{ + void *in; + int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in); + int err; + + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + MLX5_SET(modify_nic_vport_context_in, in, field_select.roce_en, 1); + MLX5_SET(modify_nic_vport_context_in, in, nic_vport_context.roce_en, + state); + MLX5_SET(modify_nic_vport_context_in, in, opcode, + MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT); + + err = mlx5_cmd_exec_in(mdev, modify_nic_vport_context, in); + + kvfree(in); + + return err; +} + +int mlx5_nic_vport_enable_roce(struct mlx5_core_dev *mdev) +{ + int err = 0; + + mutex_lock(&mlx5_roce_en_lock); + if (!mdev->roce.roce_en) + err = mlx5_nic_vport_update_roce_state(mdev, MLX5_VPORT_ROCE_ENABLED); + + if (!err) + mdev->roce.roce_en++; + mutex_unlock(&mlx5_roce_en_lock); + + return err; +} +EXPORT_SYMBOL_GPL(mlx5_nic_vport_enable_roce); + +int mlx5_nic_vport_disable_roce(struct mlx5_core_dev *mdev) +{ + int err = 0; + + mutex_lock(&mlx5_roce_en_lock); + if (mdev->roce.roce_en) { + mdev->roce.roce_en--; + if (mdev->roce.roce_en == 0) + err = mlx5_nic_vport_update_roce_state(mdev, MLX5_VPORT_ROCE_DISABLED); + + if (err) + mdev->roce.roce_en++; + } + mutex_unlock(&mlx5_roce_en_lock); + return err; +} +EXPORT_SYMBOL(mlx5_nic_vport_disable_roce); + +int mlx5_core_query_vport_counter(struct mlx5_core_dev *dev, u8 other_vport, + int vf, u8 port_num, void *out) +{ + int in_sz = MLX5_ST_SZ_BYTES(query_vport_counter_in); + int is_group_manager; + void *in; + int err; + + is_group_manager = MLX5_CAP_GEN(dev, vport_group_manager); + in = kvzalloc(in_sz, GFP_KERNEL); + if (!in) { + err = -ENOMEM; + return err; + } + + MLX5_SET(query_vport_counter_in, in, opcode, + MLX5_CMD_OP_QUERY_VPORT_COUNTER); + if (other_vport) { + if (is_group_manager) { + MLX5_SET(query_vport_counter_in, in, other_vport, 1); + MLX5_SET(query_vport_counter_in, in, vport_number, vf + 1); + } else { + err = -EPERM; + goto free; + } + } + if (MLX5_CAP_GEN(dev, num_ports) == 2) + MLX5_SET(query_vport_counter_in, in, port_num, port_num); + + err = mlx5_cmd_exec_inout(dev, query_vport_counter, in, out); +free: + kvfree(in); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_core_query_vport_counter); + +int mlx5_query_vport_down_stats(struct mlx5_core_dev *mdev, u16 vport, + u8 other_vport, u64 *rx_discard_vport_down, + u64 *tx_discard_vport_down) +{ + u32 out[MLX5_ST_SZ_DW(query_vnic_env_out)] = {}; + u32 in[MLX5_ST_SZ_DW(query_vnic_env_in)] = {}; + int err; + + MLX5_SET(query_vnic_env_in, in, opcode, + MLX5_CMD_OP_QUERY_VNIC_ENV); + MLX5_SET(query_vnic_env_in, in, op_mod, 0); + MLX5_SET(query_vnic_env_in, in, vport_number, vport); + MLX5_SET(query_vnic_env_in, in, other_vport, other_vport); + + err = mlx5_cmd_exec_inout(mdev, query_vnic_env, in, out); + if (err) + return err; + + *rx_discard_vport_down = MLX5_GET64(query_vnic_env_out, out, + vport_env.receive_discard_vport_down); + *tx_discard_vport_down = MLX5_GET64(query_vnic_env_out, out, + vport_env.transmit_discard_vport_down); + return 0; +} + +int mlx5_core_modify_hca_vport_context(struct mlx5_core_dev *dev, + u8 other_vport, u8 port_num, + int vf, + struct mlx5_hca_vport_context *req) +{ + int in_sz = MLX5_ST_SZ_BYTES(modify_hca_vport_context_in); + int is_group_manager; + void *ctx; + void *in; + int err; + + mlx5_core_dbg(dev, "vf %d\n", vf); + is_group_manager = MLX5_CAP_GEN(dev, vport_group_manager); + in = kvzalloc(in_sz, GFP_KERNEL); + if (!in) + return -ENOMEM; + + MLX5_SET(modify_hca_vport_context_in, in, opcode, MLX5_CMD_OP_MODIFY_HCA_VPORT_CONTEXT); + if (other_vport) { + if (is_group_manager) { + MLX5_SET(modify_hca_vport_context_in, in, other_vport, 1); + MLX5_SET(modify_hca_vport_context_in, in, vport_number, vf); + } else { + err = -EPERM; + goto ex; + } + } + + if (MLX5_CAP_GEN(dev, num_ports) > 1) + MLX5_SET(modify_hca_vport_context_in, in, port_num, port_num); + + ctx = MLX5_ADDR_OF(modify_hca_vport_context_in, in, hca_vport_context); + MLX5_SET(hca_vport_context, ctx, field_select, req->field_select); + if (req->field_select & MLX5_HCA_VPORT_SEL_STATE_POLICY) + MLX5_SET(hca_vport_context, ctx, vport_state_policy, + req->policy); + if (req->field_select & MLX5_HCA_VPORT_SEL_PORT_GUID) + MLX5_SET64(hca_vport_context, ctx, port_guid, req->port_guid); + if (req->field_select & MLX5_HCA_VPORT_SEL_NODE_GUID) + MLX5_SET64(hca_vport_context, ctx, node_guid, req->node_guid); + MLX5_SET(hca_vport_context, ctx, cap_mask1, req->cap_mask1); + MLX5_SET(hca_vport_context, ctx, cap_mask1_field_select, + req->cap_mask1_perm); + err = mlx5_cmd_exec_in(dev, modify_hca_vport_context, in); +ex: + kvfree(in); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_core_modify_hca_vport_context); + +int mlx5_nic_vport_affiliate_multiport(struct mlx5_core_dev *master_mdev, + struct mlx5_core_dev *port_mdev) +{ + int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in); + void *in; + int err; + + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + err = mlx5_nic_vport_enable_roce(port_mdev); + if (err) + goto free; + + MLX5_SET(modify_nic_vport_context_in, in, field_select.affiliation, 1); + if (MLX5_CAP_GEN_2(master_mdev, sw_vhca_id_valid)) { + MLX5_SET(modify_nic_vport_context_in, in, + nic_vport_context.vhca_id_type, VHCA_ID_TYPE_SW); + MLX5_SET(modify_nic_vport_context_in, in, + nic_vport_context.affiliated_vhca_id, + MLX5_CAP_GEN_2(master_mdev, sw_vhca_id)); + } else { + MLX5_SET(modify_nic_vport_context_in, in, + nic_vport_context.affiliated_vhca_id, + MLX5_CAP_GEN(master_mdev, vhca_id)); + } + MLX5_SET(modify_nic_vport_context_in, in, + nic_vport_context.affiliation_criteria, + MLX5_CAP_GEN(port_mdev, affiliate_nic_vport_criteria)); + MLX5_SET(modify_nic_vport_context_in, in, opcode, + MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT); + + err = mlx5_cmd_exec_in(port_mdev, modify_nic_vport_context, in); + if (err) + mlx5_nic_vport_disable_roce(port_mdev); + +free: + kvfree(in); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_nic_vport_affiliate_multiport); + +int mlx5_nic_vport_unaffiliate_multiport(struct mlx5_core_dev *port_mdev) +{ + int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in); + void *in; + int err; + + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + MLX5_SET(modify_nic_vport_context_in, in, field_select.affiliation, 1); + MLX5_SET(modify_nic_vport_context_in, in, + nic_vport_context.affiliated_vhca_id, 0); + MLX5_SET(modify_nic_vport_context_in, in, + nic_vport_context.affiliation_criteria, 0); + MLX5_SET(modify_nic_vport_context_in, in, opcode, + MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT); + + err = mlx5_cmd_exec_in(port_mdev, modify_nic_vport_context, in); + if (!err) + mlx5_nic_vport_disable_roce(port_mdev); + + kvfree(in); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_nic_vport_unaffiliate_multiport); + +u64 mlx5_query_nic_system_image_guid(struct mlx5_core_dev *mdev) +{ + int port_type_cap = MLX5_CAP_GEN(mdev, port_type); + u64 tmp; + int err; + + if (mdev->sys_image_guid) + return mdev->sys_image_guid; + + if (port_type_cap == MLX5_CAP_PORT_TYPE_ETH) + err = mlx5_query_nic_vport_system_image_guid(mdev, &tmp); + else + err = mlx5_query_hca_vport_system_image_guid(mdev, &tmp); + + mdev->sys_image_guid = err ? 0 : tmp; + + return mdev->sys_image_guid; +} +EXPORT_SYMBOL_GPL(mlx5_query_nic_system_image_guid); + +int mlx5_vport_get_other_func_cap(struct mlx5_core_dev *dev, u16 function_id, void *out) +{ + u16 opmod = (MLX5_CAP_GENERAL << 1) | (HCA_CAP_OPMOD_GET_MAX & 0x01); + u8 in[MLX5_ST_SZ_BYTES(query_hca_cap_in)] = {}; + + MLX5_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP); + MLX5_SET(query_hca_cap_in, in, op_mod, opmod); + MLX5_SET(query_hca_cap_in, in, function_id, function_id); + MLX5_SET(query_hca_cap_in, in, other_function, true); + return mlx5_cmd_exec_inout(dev, query_hca_cap, in, out); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wq.c b/drivers/net/ethernet/mellanox/mlx5/core/wq.c new file mode 100644 index 000000000..3091dd014 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/wq.c @@ -0,0 +1,261 @@ +/* + * Copyright (c) 2013-2015, Mellanox Technologies, Ltd. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/mlx5/driver.h> +#include "wq.h" +#include "mlx5_core.h" + +int mlx5_wq_cyc_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param, + void *wqc, struct mlx5_wq_cyc *wq, + struct mlx5_wq_ctrl *wq_ctrl) +{ + u8 log_wq_stride = MLX5_GET(wq, wqc, log_wq_stride); + u8 log_wq_sz = MLX5_GET(wq, wqc, log_wq_sz); + struct mlx5_frag_buf_ctrl *fbc = &wq->fbc; + int err; + + err = mlx5_db_alloc_node(mdev, &wq_ctrl->db, param->db_numa_node); + if (err) { + mlx5_core_warn(mdev, "mlx5_db_alloc_node() failed, %d\n", err); + return err; + } + + wq->db = wq_ctrl->db.db; + + err = mlx5_frag_buf_alloc_node(mdev, wq_get_byte_sz(log_wq_sz, log_wq_stride), + &wq_ctrl->buf, param->buf_numa_node); + if (err) { + mlx5_core_warn(mdev, "mlx5_frag_buf_alloc_node() failed, %d\n", err); + goto err_db_free; + } + + mlx5_init_fbc(wq_ctrl->buf.frags, log_wq_stride, log_wq_sz, fbc); + wq->sz = mlx5_wq_cyc_get_size(wq); + + wq_ctrl->mdev = mdev; + + return 0; + +err_db_free: + mlx5_db_free(mdev, &wq_ctrl->db); + + return err; +} + +void mlx5_wq_cyc_wqe_dump(struct mlx5_wq_cyc *wq, u16 ix, u8 nstrides) +{ + size_t len; + void *wqe; + + if (!net_ratelimit()) + return; + + nstrides = max_t(u8, nstrides, 1); + + len = nstrides << wq->fbc.log_stride; + wqe = mlx5_wq_cyc_get_wqe(wq, ix); + + pr_info("WQE DUMP: WQ size %d WQ cur size %d, WQE index 0x%x, len: %zu\n", + mlx5_wq_cyc_get_size(wq), wq->cur_sz, ix, len); + print_hex_dump(KERN_WARNING, "", DUMP_PREFIX_OFFSET, 16, 1, wqe, len, false); +} + +void mlx5_wq_cyc_reset(struct mlx5_wq_cyc *wq) +{ + wq->wqe_ctr = 0; + wq->cur_sz = 0; + mlx5_wq_cyc_update_db_record(wq); +} + +int mlx5_wq_qp_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param, + void *qpc, struct mlx5_wq_qp *wq, + struct mlx5_wq_ctrl *wq_ctrl) +{ + u8 log_rq_stride = MLX5_GET(qpc, qpc, log_rq_stride) + 4; + u8 log_rq_sz = MLX5_GET(qpc, qpc, log_rq_size); + u8 log_sq_stride = ilog2(MLX5_SEND_WQE_BB); + u8 log_sq_sz = MLX5_GET(qpc, qpc, log_sq_size); + + u32 rq_byte_size; + int err; + + + + err = mlx5_db_alloc_node(mdev, &wq_ctrl->db, param->db_numa_node); + if (err) { + mlx5_core_warn(mdev, "mlx5_db_alloc_node() failed, %d\n", err); + return err; + } + + err = mlx5_frag_buf_alloc_node(mdev, + wq_get_byte_sz(log_rq_sz, log_rq_stride) + + wq_get_byte_sz(log_sq_sz, log_sq_stride), + &wq_ctrl->buf, param->buf_numa_node); + if (err) { + mlx5_core_warn(mdev, "mlx5_frag_buf_alloc_node() failed, %d\n", err); + goto err_db_free; + } + + mlx5_init_fbc(wq_ctrl->buf.frags, log_rq_stride, log_rq_sz, &wq->rq.fbc); + + rq_byte_size = wq_get_byte_sz(log_rq_sz, log_rq_stride); + + if (rq_byte_size < PAGE_SIZE) { + /* SQ starts within the same page of the RQ */ + u16 sq_strides_offset = rq_byte_size / MLX5_SEND_WQE_BB; + + mlx5_init_fbc_offset(wq_ctrl->buf.frags, + log_sq_stride, log_sq_sz, sq_strides_offset, + &wq->sq.fbc); + } else { + u16 rq_npages = rq_byte_size >> PAGE_SHIFT; + + mlx5_init_fbc(wq_ctrl->buf.frags + rq_npages, + log_sq_stride, log_sq_sz, &wq->sq.fbc); + } + + wq->rq.db = &wq_ctrl->db.db[MLX5_RCV_DBR]; + wq->sq.db = &wq_ctrl->db.db[MLX5_SND_DBR]; + + wq_ctrl->mdev = mdev; + + return 0; + +err_db_free: + mlx5_db_free(mdev, &wq_ctrl->db); + + return err; +} + +int mlx5_cqwq_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param, + void *cqc, struct mlx5_cqwq *wq, + struct mlx5_wq_ctrl *wq_ctrl) +{ + /* CQE_STRIDE_128 and CQE_STRIDE_128_PAD both mean 128B stride */ + u8 log_wq_stride = MLX5_GET(cqc, cqc, cqe_sz) == CQE_STRIDE_64 ? 6 : 7; + u8 log_wq_sz = MLX5_GET(cqc, cqc, log_cq_size); + int err; + + err = mlx5_db_alloc_node(mdev, &wq_ctrl->db, param->db_numa_node); + if (err) { + mlx5_core_warn(mdev, "mlx5_db_alloc_node() failed, %d\n", err); + return err; + } + + wq->db = wq_ctrl->db.db; + + err = mlx5_frag_buf_alloc_node(mdev, wq_get_byte_sz(log_wq_sz, log_wq_stride), + &wq_ctrl->buf, + param->buf_numa_node); + if (err) { + mlx5_core_warn(mdev, "mlx5_frag_buf_alloc_node() failed, %d\n", + err); + goto err_db_free; + } + + mlx5_init_fbc(wq_ctrl->buf.frags, log_wq_stride, log_wq_sz, &wq->fbc); + + wq_ctrl->mdev = mdev; + + return 0; + +err_db_free: + mlx5_db_free(mdev, &wq_ctrl->db); + + return err; +} + +static void mlx5_wq_ll_init_list(struct mlx5_wq_ll *wq) +{ + struct mlx5_wqe_srq_next_seg *next_seg; + int i; + + for (i = 0; i < wq->fbc.sz_m1; i++) { + next_seg = mlx5_wq_ll_get_wqe(wq, i); + next_seg->next_wqe_index = cpu_to_be16(i + 1); + } + next_seg = mlx5_wq_ll_get_wqe(wq, i); + wq->tail_next = &next_seg->next_wqe_index; +} + +int mlx5_wq_ll_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param, + void *wqc, struct mlx5_wq_ll *wq, + struct mlx5_wq_ctrl *wq_ctrl) +{ + u8 log_wq_stride = MLX5_GET(wq, wqc, log_wq_stride); + u8 log_wq_sz = MLX5_GET(wq, wqc, log_wq_sz); + struct mlx5_frag_buf_ctrl *fbc = &wq->fbc; + int err; + + err = mlx5_db_alloc_node(mdev, &wq_ctrl->db, param->db_numa_node); + if (err) { + mlx5_core_warn(mdev, "mlx5_db_alloc_node() failed, %d\n", err); + return err; + } + + wq->db = wq_ctrl->db.db; + + err = mlx5_frag_buf_alloc_node(mdev, wq_get_byte_sz(log_wq_sz, log_wq_stride), + &wq_ctrl->buf, param->buf_numa_node); + if (err) { + mlx5_core_warn(mdev, "mlx5_frag_buf_alloc_node() failed, %d\n", err); + goto err_db_free; + } + + mlx5_init_fbc(wq_ctrl->buf.frags, log_wq_stride, log_wq_sz, fbc); + + mlx5_wq_ll_init_list(wq); + wq_ctrl->mdev = mdev; + + return 0; + +err_db_free: + mlx5_db_free(mdev, &wq_ctrl->db); + + return err; +} + +void mlx5_wq_ll_reset(struct mlx5_wq_ll *wq) +{ + wq->head = 0; + wq->wqe_ctr = 0; + wq->cur_sz = 0; + mlx5_wq_ll_init_list(wq); + mlx5_wq_ll_update_db_record(wq); +} + +void mlx5_wq_destroy(struct mlx5_wq_ctrl *wq_ctrl) +{ + mlx5_frag_buf_free(wq_ctrl->mdev, &wq_ctrl->buf); + mlx5_db_free(wq_ctrl->mdev, &wq_ctrl->db); +} + diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wq.h b/drivers/net/ethernet/mellanox/mlx5/core/wq.h new file mode 100644 index 000000000..4d629e5dd --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/wq.h @@ -0,0 +1,308 @@ +/* + * Copyright (c) 2013-2015, Mellanox Technologies, Ltd. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __MLX5_WQ_H__ +#define __MLX5_WQ_H__ + +#include <linux/mlx5/mlx5_ifc.h> +#include <linux/mlx5/cq.h> +#include <linux/mlx5/qp.h> + +struct mlx5_wq_param { + int buf_numa_node; + int db_numa_node; +}; + +struct mlx5_wq_ctrl { + struct mlx5_core_dev *mdev; + struct mlx5_frag_buf buf; + struct mlx5_db db; +}; + +struct mlx5_wq_cyc { + struct mlx5_frag_buf_ctrl fbc; + __be32 *db; + u16 sz; + u16 wqe_ctr; + u16 cur_sz; +}; + +struct mlx5_wq_qp { + struct mlx5_wq_cyc rq; + struct mlx5_wq_cyc sq; +}; + +struct mlx5_cqwq { + struct mlx5_frag_buf_ctrl fbc; + __be32 *db; + u32 cc; /* consumer counter */ +}; + +struct mlx5_wq_ll { + struct mlx5_frag_buf_ctrl fbc; + __be32 *db; + __be16 *tail_next; + u16 head; + u16 wqe_ctr; + u16 cur_sz; +}; + +int mlx5_wq_cyc_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param, + void *wqc, struct mlx5_wq_cyc *wq, + struct mlx5_wq_ctrl *wq_ctrl); +void mlx5_wq_cyc_wqe_dump(struct mlx5_wq_cyc *wq, u16 ix, u8 nstrides); +void mlx5_wq_cyc_reset(struct mlx5_wq_cyc *wq); + +int mlx5_wq_qp_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param, + void *qpc, struct mlx5_wq_qp *wq, + struct mlx5_wq_ctrl *wq_ctrl); + +int mlx5_cqwq_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param, + void *cqc, struct mlx5_cqwq *wq, + struct mlx5_wq_ctrl *wq_ctrl); + +int mlx5_wq_ll_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param, + void *wqc, struct mlx5_wq_ll *wq, + struct mlx5_wq_ctrl *wq_ctrl); +void mlx5_wq_ll_reset(struct mlx5_wq_ll *wq); + +void mlx5_wq_destroy(struct mlx5_wq_ctrl *wq_ctrl); + +static inline u32 mlx5_wq_cyc_get_size(struct mlx5_wq_cyc *wq) +{ + return (u32)wq->fbc.sz_m1 + 1; +} + +static inline int mlx5_wq_cyc_is_full(struct mlx5_wq_cyc *wq) +{ + return wq->cur_sz == wq->sz; +} + +static inline int mlx5_wq_cyc_missing(struct mlx5_wq_cyc *wq) +{ + return wq->sz - wq->cur_sz; +} + +static inline int mlx5_wq_cyc_is_empty(struct mlx5_wq_cyc *wq) +{ + return !wq->cur_sz; +} + +static inline void mlx5_wq_cyc_push(struct mlx5_wq_cyc *wq) +{ + wq->wqe_ctr++; + wq->cur_sz++; +} + +static inline void mlx5_wq_cyc_push_n(struct mlx5_wq_cyc *wq, u16 n) +{ + wq->wqe_ctr += n; + wq->cur_sz += n; +} + +static inline void mlx5_wq_cyc_pop(struct mlx5_wq_cyc *wq) +{ + wq->cur_sz--; +} + +static inline void mlx5_wq_cyc_update_db_record(struct mlx5_wq_cyc *wq) +{ + *wq->db = cpu_to_be32(wq->wqe_ctr); +} + +static inline u16 mlx5_wq_cyc_ctr2ix(struct mlx5_wq_cyc *wq, u16 ctr) +{ + return ctr & wq->fbc.sz_m1; +} + +static inline u16 mlx5_wq_cyc_get_head(struct mlx5_wq_cyc *wq) +{ + return mlx5_wq_cyc_ctr2ix(wq, wq->wqe_ctr); +} + +static inline u16 mlx5_wq_cyc_get_tail(struct mlx5_wq_cyc *wq) +{ + return mlx5_wq_cyc_ctr2ix(wq, wq->wqe_ctr - wq->cur_sz); +} + +static inline void *mlx5_wq_cyc_get_wqe(struct mlx5_wq_cyc *wq, u16 ix) +{ + return mlx5_frag_buf_get_wqe(&wq->fbc, ix); +} + +static inline u16 mlx5_wq_cyc_get_contig_wqebbs(struct mlx5_wq_cyc *wq, u16 ix) +{ + return mlx5_frag_buf_get_idx_last_contig_stride(&wq->fbc, ix) - ix + 1; +} + +static inline int mlx5_wq_cyc_cc_bigger(u16 cc1, u16 cc2) +{ + int equal = (cc1 == cc2); + int smaller = 0x8000 & (cc1 - cc2); + + return !equal && !smaller; +} + +static inline u16 mlx5_wq_cyc_get_counter(struct mlx5_wq_cyc *wq) +{ + return wq->wqe_ctr; +} + +static inline u32 mlx5_cqwq_get_size(struct mlx5_cqwq *wq) +{ + return wq->fbc.sz_m1 + 1; +} + +static inline u8 mlx5_cqwq_get_log_stride_size(struct mlx5_cqwq *wq) +{ + return wq->fbc.log_stride; +} + +static inline u32 mlx5_cqwq_ctr2ix(struct mlx5_cqwq *wq, u32 ctr) +{ + return ctr & wq->fbc.sz_m1; +} + +static inline u32 mlx5_cqwq_get_ci(struct mlx5_cqwq *wq) +{ + return mlx5_cqwq_ctr2ix(wq, wq->cc); +} + +static inline struct mlx5_cqe64 *mlx5_cqwq_get_wqe(struct mlx5_cqwq *wq, u32 ix) +{ + struct mlx5_cqe64 *cqe = mlx5_frag_buf_get_wqe(&wq->fbc, ix); + + /* For 128B CQEs the data is in the last 64B */ + cqe += wq->fbc.log_stride == 7; + + return cqe; +} + +static inline u32 mlx5_cqwq_get_ctr_wrap_cnt(struct mlx5_cqwq *wq, u32 ctr) +{ + return ctr >> wq->fbc.log_sz; +} + +static inline u32 mlx5_cqwq_get_wrap_cnt(struct mlx5_cqwq *wq) +{ + return mlx5_cqwq_get_ctr_wrap_cnt(wq, wq->cc); +} + +static inline void mlx5_cqwq_pop(struct mlx5_cqwq *wq) +{ + wq->cc++; +} + +static inline void mlx5_cqwq_update_db_record(struct mlx5_cqwq *wq) +{ + *wq->db = cpu_to_be32(wq->cc & 0xffffff); +} + +static inline struct mlx5_cqe64 *mlx5_cqwq_get_cqe(struct mlx5_cqwq *wq) +{ + u32 ci = mlx5_cqwq_get_ci(wq); + struct mlx5_cqe64 *cqe = mlx5_cqwq_get_wqe(wq, ci); + u8 cqe_ownership_bit = cqe->op_own & MLX5_CQE_OWNER_MASK; + u8 sw_ownership_val = mlx5_cqwq_get_wrap_cnt(wq) & 1; + + if (cqe_ownership_bit != sw_ownership_val) + return NULL; + + /* ensure cqe content is read after cqe ownership bit */ + dma_rmb(); + + return cqe; +} + +static inline u32 mlx5_wq_ll_get_size(struct mlx5_wq_ll *wq) +{ + return (u32)wq->fbc.sz_m1 + 1; +} + +static inline int mlx5_wq_ll_is_full(struct mlx5_wq_ll *wq) +{ + return wq->cur_sz == wq->fbc.sz_m1; +} + +static inline int mlx5_wq_ll_is_empty(struct mlx5_wq_ll *wq) +{ + return !wq->cur_sz; +} + +static inline int mlx5_wq_ll_missing(struct mlx5_wq_ll *wq) +{ + return wq->fbc.sz_m1 - wq->cur_sz; +} + +static inline void *mlx5_wq_ll_get_wqe(struct mlx5_wq_ll *wq, u16 ix) +{ + return mlx5_frag_buf_get_wqe(&wq->fbc, ix); +} + +static inline u16 mlx5_wq_ll_get_wqe_next_ix(struct mlx5_wq_ll *wq, u16 ix) +{ + struct mlx5_wqe_srq_next_seg *wqe = mlx5_wq_ll_get_wqe(wq, ix); + + return be16_to_cpu(wqe->next_wqe_index); +} + +static inline void mlx5_wq_ll_push(struct mlx5_wq_ll *wq, u16 head_next) +{ + wq->head = head_next; + wq->wqe_ctr++; + wq->cur_sz++; +} + +static inline void mlx5_wq_ll_pop(struct mlx5_wq_ll *wq, __be16 ix, + __be16 *next_tail_next) +{ + *wq->tail_next = ix; + wq->tail_next = next_tail_next; + wq->cur_sz--; +} + +static inline void mlx5_wq_ll_update_db_record(struct mlx5_wq_ll *wq) +{ + *wq->db = cpu_to_be32(wq->wqe_ctr); +} + +static inline u16 mlx5_wq_ll_get_head(struct mlx5_wq_ll *wq) +{ + return wq->head; +} + +static inline u16 mlx5_wq_ll_get_counter(struct mlx5_wq_ll *wq) +{ + return wq->wqe_ctr; +} + +#endif /* __MLX5_WQ_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlxbf_gige/Kconfig b/drivers/net/ethernet/mellanox/mlxbf_gige/Kconfig new file mode 100644 index 000000000..4cdebafaf --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxbf_gige/Kconfig @@ -0,0 +1,13 @@ +# SPDX-License-Identifier: GPL-2.0-only OR BSD-3-Clause +# +# Mellanox GigE driver configuration +# + +config MLXBF_GIGE + tristate "Mellanox Technologies BlueField Gigabit Ethernet support" + depends on (ARM64 && ACPI) || COMPILE_TEST + select PHYLIB + help + The second generation BlueField SoC from Mellanox Technologies + supports an out-of-band Gigabit Ethernet management port to the + Arm subsystem. diff --git a/drivers/net/ethernet/mellanox/mlxbf_gige/Makefile b/drivers/net/ethernet/mellanox/mlxbf_gige/Makefile new file mode 100644 index 000000000..a97c2bef8 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxbf_gige/Makefile @@ -0,0 +1,10 @@ +# SPDX-License-Identifier: GPL-2.0-only OR BSD-3-Clause + +obj-$(CONFIG_MLXBF_GIGE) += mlxbf_gige.o + +mlxbf_gige-y := mlxbf_gige_ethtool.o \ + mlxbf_gige_intr.o \ + mlxbf_gige_main.o \ + mlxbf_gige_mdio.o \ + mlxbf_gige_rx.o \ + mlxbf_gige_tx.o diff --git a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige.h b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige.h new file mode 100644 index 000000000..5a1027b07 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige.h @@ -0,0 +1,177 @@ +/* SPDX-License-Identifier: GPL-2.0-only OR BSD-3-Clause */ + +/* Header file for Gigabit Ethernet driver for Mellanox BlueField SoC + * - this file contains software data structures and any chip-specific + * data structures (e.g. TX WQE format) that are memory resident. + * + * Copyright (C) 2020-2021 NVIDIA CORPORATION & AFFILIATES + */ + +#ifndef __MLXBF_GIGE_H__ +#define __MLXBF_GIGE_H__ + +#include <linux/io-64-nonatomic-lo-hi.h> +#include <linux/irqreturn.h> +#include <linux/netdevice.h> +#include <linux/irq.h> + +/* The silicon design supports a maximum RX ring size of + * 32K entries. Based on current testing this maximum size + * is not required to be supported. Instead the RX ring + * will be capped at a realistic value of 1024 entries. + */ +#define MLXBF_GIGE_MIN_RXQ_SZ 32 +#define MLXBF_GIGE_MAX_RXQ_SZ 1024 +#define MLXBF_GIGE_DEFAULT_RXQ_SZ 128 + +#define MLXBF_GIGE_MIN_TXQ_SZ 4 +#define MLXBF_GIGE_MAX_TXQ_SZ 256 +#define MLXBF_GIGE_DEFAULT_TXQ_SZ 128 + +#define MLXBF_GIGE_DEFAULT_BUF_SZ 2048 + +#define MLXBF_GIGE_DMA_PAGE_SZ 4096 +#define MLXBF_GIGE_DMA_PAGE_SHIFT 12 + +/* There are four individual MAC RX filters. Currently + * two of them are being used: one for the broadcast MAC + * (index 0) and one for local MAC (index 1) + */ +#define MLXBF_GIGE_BCAST_MAC_FILTER_IDX 0 +#define MLXBF_GIGE_LOCAL_MAC_FILTER_IDX 1 + +/* Define for broadcast MAC literal */ +#define BCAST_MAC_ADDR 0xFFFFFFFFFFFF + +/* There are three individual interrupts: + * 1) Errors, "OOB" interrupt line + * 2) Receive Packet, "OOB_LLU" interrupt line + * 3) LLU and PLU Events, "OOB_PLU" interrupt line + */ +#define MLXBF_GIGE_ERROR_INTR_IDX 0 +#define MLXBF_GIGE_RECEIVE_PKT_INTR_IDX 1 +#define MLXBF_GIGE_LLU_PLU_INTR_IDX 2 + +struct mlxbf_gige_stats { + u64 hw_access_errors; + u64 tx_invalid_checksums; + u64 tx_small_frames; + u64 tx_index_errors; + u64 sw_config_errors; + u64 sw_access_errors; + u64 rx_truncate_errors; + u64 rx_mac_errors; + u64 rx_din_dropped_pkts; + u64 tx_fifo_full; + u64 rx_filter_passed_pkts; + u64 rx_filter_discard_pkts; +}; + +struct mlxbf_gige { + void __iomem *base; + void __iomem *llu_base; + void __iomem *plu_base; + struct device *dev; + struct net_device *netdev; + struct platform_device *pdev; + void __iomem *mdio_io; + void __iomem *clk_io; + struct mii_bus *mdiobus; + spinlock_t lock; /* for packet processing indices */ + u16 rx_q_entries; + u16 tx_q_entries; + u64 *tx_wqe_base; + dma_addr_t tx_wqe_base_dma; + u64 *tx_wqe_next; + u64 *tx_cc; + dma_addr_t tx_cc_dma; + dma_addr_t *rx_wqe_base; + dma_addr_t rx_wqe_base_dma; + u64 *rx_cqe_base; + dma_addr_t rx_cqe_base_dma; + u16 tx_pi; + u16 prev_tx_ci; + struct sk_buff *rx_skb[MLXBF_GIGE_MAX_RXQ_SZ]; + struct sk_buff *tx_skb[MLXBF_GIGE_MAX_TXQ_SZ]; + int error_irq; + int rx_irq; + int llu_plu_irq; + int phy_irq; + int hw_phy_irq; + bool promisc_enabled; + u8 valid_polarity; + struct napi_struct napi; + struct mlxbf_gige_stats stats; +}; + +/* Rx Work Queue Element definitions */ +#define MLXBF_GIGE_RX_WQE_SZ 8 + +/* Rx Completion Queue Element definitions */ +#define MLXBF_GIGE_RX_CQE_SZ 8 +#define MLXBF_GIGE_RX_CQE_PKT_LEN_MASK GENMASK(10, 0) +#define MLXBF_GIGE_RX_CQE_VALID_MASK GENMASK(11, 11) +#define MLXBF_GIGE_RX_CQE_PKT_STATUS_MASK GENMASK(15, 12) +#define MLXBF_GIGE_RX_CQE_PKT_STATUS_MAC_ERR GENMASK(12, 12) +#define MLXBF_GIGE_RX_CQE_PKT_STATUS_TRUNCATED GENMASK(13, 13) +#define MLXBF_GIGE_RX_CQE_CHKSUM_MASK GENMASK(31, 16) + +/* Tx Work Queue Element definitions */ +#define MLXBF_GIGE_TX_WQE_SZ_QWORDS 2 +#define MLXBF_GIGE_TX_WQE_SZ 16 +#define MLXBF_GIGE_TX_WQE_PKT_LEN_MASK GENMASK(10, 0) +#define MLXBF_GIGE_TX_WQE_UPDATE_MASK GENMASK(31, 31) +#define MLXBF_GIGE_TX_WQE_CHKSUM_LEN_MASK GENMASK(42, 32) +#define MLXBF_GIGE_TX_WQE_CHKSUM_START_MASK GENMASK(55, 48) +#define MLXBF_GIGE_TX_WQE_CHKSUM_OFFSET_MASK GENMASK(63, 56) + +/* Macro to return packet length of specified TX WQE */ +#define MLXBF_GIGE_TX_WQE_PKT_LEN(tx_wqe_addr) \ + (*((tx_wqe_addr) + 1) & MLXBF_GIGE_TX_WQE_PKT_LEN_MASK) + +/* Tx Completion Count */ +#define MLXBF_GIGE_TX_CC_SZ 8 + +/* List of resources in ACPI table */ +enum mlxbf_gige_res { + MLXBF_GIGE_RES_MAC, + MLXBF_GIGE_RES_MDIO9, + MLXBF_GIGE_RES_GPIO0, + MLXBF_GIGE_RES_LLU, + MLXBF_GIGE_RES_PLU, + MLXBF_GIGE_RES_CLK +}; + +/* Version of register data returned by mlxbf_gige_get_regs() */ +#define MLXBF_GIGE_REGS_VERSION 1 + +int mlxbf_gige_mdio_probe(struct platform_device *pdev, + struct mlxbf_gige *priv); +void mlxbf_gige_mdio_remove(struct mlxbf_gige *priv); +irqreturn_t mlxbf_gige_mdio_handle_phy_interrupt(int irq, void *dev_id); +void mlxbf_gige_mdio_enable_phy_int(struct mlxbf_gige *priv); + +void mlxbf_gige_set_mac_rx_filter(struct mlxbf_gige *priv, + unsigned int index, u64 dmac); +void mlxbf_gige_get_mac_rx_filter(struct mlxbf_gige *priv, + unsigned int index, u64 *dmac); +void mlxbf_gige_enable_promisc(struct mlxbf_gige *priv); +void mlxbf_gige_disable_promisc(struct mlxbf_gige *priv); +int mlxbf_gige_rx_init(struct mlxbf_gige *priv); +void mlxbf_gige_rx_deinit(struct mlxbf_gige *priv); +int mlxbf_gige_tx_init(struct mlxbf_gige *priv); +void mlxbf_gige_tx_deinit(struct mlxbf_gige *priv); +bool mlxbf_gige_handle_tx_complete(struct mlxbf_gige *priv); +netdev_tx_t mlxbf_gige_start_xmit(struct sk_buff *skb, + struct net_device *netdev); +struct sk_buff *mlxbf_gige_alloc_skb(struct mlxbf_gige *priv, + unsigned int map_len, + dma_addr_t *buf_dma, + enum dma_data_direction dir); +int mlxbf_gige_request_irqs(struct mlxbf_gige *priv); +void mlxbf_gige_free_irqs(struct mlxbf_gige *priv); +int mlxbf_gige_poll(struct napi_struct *napi, int budget); +extern const struct ethtool_ops mlxbf_gige_ethtool_ops; +void mlxbf_gige_update_tx_wqe_next(struct mlxbf_gige *priv); + +#endif /* !defined(__MLXBF_GIGE_H__) */ diff --git a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_ethtool.c b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_ethtool.c new file mode 100644 index 000000000..41ebef25a --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_ethtool.c @@ -0,0 +1,138 @@ +// SPDX-License-Identifier: GPL-2.0-only OR BSD-3-Clause + +/* Ethtool support for Mellanox Gigabit Ethernet driver + * + * Copyright (C) 2020-2021 NVIDIA CORPORATION & AFFILIATES + */ + +#include <linux/phy.h> + +#include "mlxbf_gige.h" +#include "mlxbf_gige_regs.h" + +/* Start of struct ethtool_ops functions */ +static int mlxbf_gige_get_regs_len(struct net_device *netdev) +{ + return MLXBF_GIGE_MMIO_REG_SZ; +} + +static void mlxbf_gige_get_regs(struct net_device *netdev, + struct ethtool_regs *regs, void *p) +{ + struct mlxbf_gige *priv = netdev_priv(netdev); + + regs->version = MLXBF_GIGE_REGS_VERSION; + + /* Read entire MMIO register space and store results + * into the provided buffer. By design, a read to an + * offset without an existing register will be + * acknowledged and return zero. + */ + memcpy_fromio(p, priv->base, MLXBF_GIGE_MMIO_REG_SZ); +} + +static void +mlxbf_gige_get_ringparam(struct net_device *netdev, + struct ethtool_ringparam *ering, + struct kernel_ethtool_ringparam *kernel_ering, + struct netlink_ext_ack *extack) +{ + struct mlxbf_gige *priv = netdev_priv(netdev); + + ering->rx_max_pending = MLXBF_GIGE_MAX_RXQ_SZ; + ering->tx_max_pending = MLXBF_GIGE_MAX_TXQ_SZ; + ering->rx_pending = priv->rx_q_entries; + ering->tx_pending = priv->tx_q_entries; +} + +static const struct { + const char string[ETH_GSTRING_LEN]; +} mlxbf_gige_ethtool_stats_keys[] = { + { "hw_access_errors" }, + { "tx_invalid_checksums" }, + { "tx_small_frames" }, + { "tx_index_errors" }, + { "sw_config_errors" }, + { "sw_access_errors" }, + { "rx_truncate_errors" }, + { "rx_mac_errors" }, + { "rx_din_dropped_pkts" }, + { "tx_fifo_full" }, + { "rx_filter_passed_pkts" }, + { "rx_filter_discard_pkts" }, +}; + +static int mlxbf_gige_get_sset_count(struct net_device *netdev, int stringset) +{ + if (stringset != ETH_SS_STATS) + return -EOPNOTSUPP; + return ARRAY_SIZE(mlxbf_gige_ethtool_stats_keys); +} + +static void mlxbf_gige_get_strings(struct net_device *netdev, u32 stringset, + u8 *buf) +{ + if (stringset != ETH_SS_STATS) + return; + memcpy(buf, &mlxbf_gige_ethtool_stats_keys, + sizeof(mlxbf_gige_ethtool_stats_keys)); +} + +static void mlxbf_gige_get_ethtool_stats(struct net_device *netdev, + struct ethtool_stats *estats, + u64 *data) +{ + struct mlxbf_gige *priv = netdev_priv(netdev); + + /* Fill data array with interface statistics + * + * NOTE: the data writes must be in + * sync with the strings shown in + * the mlxbf_gige_ethtool_stats_keys[] array + * + * NOTE2: certain statistics below are zeroed upon + * port disable, so the calculation below + * must include the "cached" value of the stat + * plus the value read directly from hardware. + * Cached statistics are currently: + * rx_din_dropped_pkts + * rx_filter_passed_pkts + * rx_filter_discard_pkts + */ + *data++ = priv->stats.hw_access_errors; + *data++ = priv->stats.tx_invalid_checksums; + *data++ = priv->stats.tx_small_frames; + *data++ = priv->stats.tx_index_errors; + *data++ = priv->stats.sw_config_errors; + *data++ = priv->stats.sw_access_errors; + *data++ = priv->stats.rx_truncate_errors; + *data++ = priv->stats.rx_mac_errors; + *data++ = (priv->stats.rx_din_dropped_pkts + + readq(priv->base + MLXBF_GIGE_RX_DIN_DROP_COUNTER)); + *data++ = priv->stats.tx_fifo_full; + *data++ = (priv->stats.rx_filter_passed_pkts + + readq(priv->base + MLXBF_GIGE_RX_PASS_COUNTER_ALL)); + *data++ = (priv->stats.rx_filter_discard_pkts + + readq(priv->base + MLXBF_GIGE_RX_DISC_COUNTER_ALL)); +} + +static void mlxbf_gige_get_pauseparam(struct net_device *netdev, + struct ethtool_pauseparam *pause) +{ + pause->autoneg = AUTONEG_DISABLE; + pause->rx_pause = 1; + pause->tx_pause = 1; +} + +const struct ethtool_ops mlxbf_gige_ethtool_ops = { + .get_link = ethtool_op_get_link, + .get_ringparam = mlxbf_gige_get_ringparam, + .get_regs_len = mlxbf_gige_get_regs_len, + .get_regs = mlxbf_gige_get_regs, + .get_strings = mlxbf_gige_get_strings, + .get_sset_count = mlxbf_gige_get_sset_count, + .get_ethtool_stats = mlxbf_gige_get_ethtool_stats, + .nway_reset = phy_ethtool_nway_reset, + .get_pauseparam = mlxbf_gige_get_pauseparam, + .get_link_ksettings = phy_ethtool_get_link_ksettings, +}; diff --git a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_intr.c b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_intr.c new file mode 100644 index 000000000..5b3519f0c --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_intr.c @@ -0,0 +1,133 @@ +// SPDX-License-Identifier: GPL-2.0-only OR BSD-3-Clause + +/* Interrupt related logic for Mellanox Gigabit Ethernet driver + * + * Copyright (C) 2020-2021 NVIDIA CORPORATION & AFFILIATES + */ + +#include <linux/interrupt.h> + +#include "mlxbf_gige.h" +#include "mlxbf_gige_regs.h" + +static irqreturn_t mlxbf_gige_error_intr(int irq, void *dev_id) +{ + struct mlxbf_gige *priv; + u64 int_status; + + priv = dev_id; + + int_status = readq(priv->base + MLXBF_GIGE_INT_STATUS); + + if (int_status & MLXBF_GIGE_INT_STATUS_HW_ACCESS_ERROR) + priv->stats.hw_access_errors++; + + if (int_status & MLXBF_GIGE_INT_STATUS_TX_CHECKSUM_INPUTS) { + priv->stats.tx_invalid_checksums++; + /* This error condition is latched into MLXBF_GIGE_INT_STATUS + * when the GigE silicon operates on the offending + * TX WQE. The write to MLXBF_GIGE_INT_STATUS at the bottom + * of this routine clears this error condition. + */ + } + + if (int_status & MLXBF_GIGE_INT_STATUS_TX_SMALL_FRAME_SIZE) { + priv->stats.tx_small_frames++; + /* This condition happens when the networking stack invokes + * this driver's "start_xmit()" method with a packet whose + * size < 60 bytes. The GigE silicon will automatically pad + * this small frame up to a minimum-sized frame before it is + * sent. The "tx_small_frame" condition is latched into the + * MLXBF_GIGE_INT_STATUS register when the GigE silicon + * operates on the offending TX WQE. The write to + * MLXBF_GIGE_INT_STATUS at the bottom of this routine + * clears this condition. + */ + } + + if (int_status & MLXBF_GIGE_INT_STATUS_TX_PI_CI_EXCEED_WQ_SIZE) + priv->stats.tx_index_errors++; + + if (int_status & MLXBF_GIGE_INT_STATUS_SW_CONFIG_ERROR) + priv->stats.sw_config_errors++; + + if (int_status & MLXBF_GIGE_INT_STATUS_SW_ACCESS_ERROR) + priv->stats.sw_access_errors++; + + /* Clear all error interrupts by writing '1' back to + * all the asserted bits in INT_STATUS. Do not write + * '1' back to 'receive packet' bit, since that is + * managed separately. + */ + + int_status &= ~MLXBF_GIGE_INT_STATUS_RX_RECEIVE_PACKET; + + writeq(int_status, priv->base + MLXBF_GIGE_INT_STATUS); + + return IRQ_HANDLED; +} + +static irqreturn_t mlxbf_gige_rx_intr(int irq, void *dev_id) +{ + struct mlxbf_gige *priv; + + priv = dev_id; + + /* NOTE: GigE silicon automatically disables "packet rx" interrupt by + * setting MLXBF_GIGE_INT_MASK bit0 upon triggering the interrupt + * to the ARM cores. Software needs to re-enable "packet rx" + * interrupts by clearing MLXBF_GIGE_INT_MASK bit0. + */ + + napi_schedule(&priv->napi); + + return IRQ_HANDLED; +} + +static irqreturn_t mlxbf_gige_llu_plu_intr(int irq, void *dev_id) +{ + return IRQ_HANDLED; +} + +int mlxbf_gige_request_irqs(struct mlxbf_gige *priv) +{ + int err; + + err = request_irq(priv->error_irq, mlxbf_gige_error_intr, 0, + "mlxbf_gige_error", priv); + if (err) { + dev_err(priv->dev, "Request error_irq failure\n"); + return err; + } + + err = request_irq(priv->rx_irq, mlxbf_gige_rx_intr, 0, + "mlxbf_gige_rx", priv); + if (err) { + dev_err(priv->dev, "Request rx_irq failure\n"); + goto free_error_irq; + } + + err = request_irq(priv->llu_plu_irq, mlxbf_gige_llu_plu_intr, 0, + "mlxbf_gige_llu_plu", priv); + if (err) { + dev_err(priv->dev, "Request llu_plu_irq failure\n"); + goto free_rx_irq; + } + + return 0; + +free_rx_irq: + free_irq(priv->rx_irq, priv); + +free_error_irq: + free_irq(priv->error_irq, priv); + + return err; +} + +void mlxbf_gige_free_irqs(struct mlxbf_gige *priv) +{ + free_irq(priv->error_irq, priv); + free_irq(priv->rx_irq, priv); + free_irq(priv->llu_plu_irq, priv); +} diff --git a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c new file mode 100644 index 000000000..83c465939 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c @@ -0,0 +1,438 @@ +// SPDX-License-Identifier: GPL-2.0-only OR BSD-3-Clause + +/* Gigabit Ethernet driver for Mellanox BlueField SoC + * + * Copyright (C) 2020-2021 NVIDIA CORPORATION & AFFILIATES + */ + +#include <linux/acpi.h> +#include <linux/device.h> +#include <linux/dma-mapping.h> +#include <linux/etherdevice.h> +#include <linux/interrupt.h> +#include <linux/iopoll.h> +#include <linux/module.h> +#include <linux/phy.h> +#include <linux/platform_device.h> +#include <linux/skbuff.h> + +#include "mlxbf_gige.h" +#include "mlxbf_gige_regs.h" + +/* Allocate SKB whose payload pointer aligns with the Bluefield + * hardware DMA limitation, i.e. DMA operation can't cross + * a 4KB boundary. A maximum packet size of 2KB is assumed in the + * alignment formula. The alignment logic overallocates an SKB, + * and then adjusts the headroom so that the SKB data pointer is + * naturally aligned to a 2KB boundary. + */ +struct sk_buff *mlxbf_gige_alloc_skb(struct mlxbf_gige *priv, + unsigned int map_len, + dma_addr_t *buf_dma, + enum dma_data_direction dir) +{ + struct sk_buff *skb; + u64 addr, offset; + + /* Overallocate the SKB so that any headroom adjustment (to + * provide 2KB natural alignment) does not exceed payload area + */ + skb = netdev_alloc_skb(priv->netdev, MLXBF_GIGE_DEFAULT_BUF_SZ * 2); + if (!skb) + return NULL; + + /* Adjust the headroom so that skb->data is naturally aligned to + * a 2KB boundary, which is the maximum packet size supported. + */ + addr = (long)skb->data; + offset = (addr + MLXBF_GIGE_DEFAULT_BUF_SZ - 1) & + ~(MLXBF_GIGE_DEFAULT_BUF_SZ - 1); + offset -= addr; + if (offset) + skb_reserve(skb, offset); + + /* Return streaming DMA mapping to caller */ + *buf_dma = dma_map_single(priv->dev, skb->data, map_len, dir); + if (dma_mapping_error(priv->dev, *buf_dma)) { + dev_kfree_skb(skb); + *buf_dma = (dma_addr_t)0; + return NULL; + } + + return skb; +} + +static void mlxbf_gige_initial_mac(struct mlxbf_gige *priv) +{ + u8 mac[ETH_ALEN]; + u64 local_mac; + + eth_zero_addr(mac); + mlxbf_gige_get_mac_rx_filter(priv, MLXBF_GIGE_LOCAL_MAC_FILTER_IDX, + &local_mac); + u64_to_ether_addr(local_mac, mac); + + if (is_valid_ether_addr(mac)) { + eth_hw_addr_set(priv->netdev, mac); + } else { + /* Provide a random MAC if for some reason the device has + * not been configured with a valid MAC address already. + */ + eth_hw_addr_random(priv->netdev); + } + + local_mac = ether_addr_to_u64(priv->netdev->dev_addr); + mlxbf_gige_set_mac_rx_filter(priv, MLXBF_GIGE_LOCAL_MAC_FILTER_IDX, + local_mac); +} + +static void mlxbf_gige_cache_stats(struct mlxbf_gige *priv) +{ + struct mlxbf_gige_stats *p; + + /* Cache stats that will be cleared by clean port operation */ + p = &priv->stats; + p->rx_din_dropped_pkts += readq(priv->base + + MLXBF_GIGE_RX_DIN_DROP_COUNTER); + p->rx_filter_passed_pkts += readq(priv->base + + MLXBF_GIGE_RX_PASS_COUNTER_ALL); + p->rx_filter_discard_pkts += readq(priv->base + + MLXBF_GIGE_RX_DISC_COUNTER_ALL); +} + +static int mlxbf_gige_clean_port(struct mlxbf_gige *priv) +{ + u64 control; + u64 temp; + int err; + + /* Set the CLEAN_PORT_EN bit to trigger SW reset */ + control = readq(priv->base + MLXBF_GIGE_CONTROL); + control |= MLXBF_GIGE_CONTROL_CLEAN_PORT_EN; + writeq(control, priv->base + MLXBF_GIGE_CONTROL); + + /* Ensure completion of "clean port" write before polling status */ + mb(); + + err = readq_poll_timeout_atomic(priv->base + MLXBF_GIGE_STATUS, temp, + (temp & MLXBF_GIGE_STATUS_READY), + 100, 100000); + + /* Clear the CLEAN_PORT_EN bit at end of this loop */ + control = readq(priv->base + MLXBF_GIGE_CONTROL); + control &= ~MLXBF_GIGE_CONTROL_CLEAN_PORT_EN; + writeq(control, priv->base + MLXBF_GIGE_CONTROL); + + return err; +} + +static int mlxbf_gige_open(struct net_device *netdev) +{ + struct mlxbf_gige *priv = netdev_priv(netdev); + struct phy_device *phydev = netdev->phydev; + u64 control; + u64 int_en; + int err; + + /* Perform general init of GigE block */ + control = readq(priv->base + MLXBF_GIGE_CONTROL); + control |= MLXBF_GIGE_CONTROL_PORT_EN; + writeq(control, priv->base + MLXBF_GIGE_CONTROL); + + err = mlxbf_gige_request_irqs(priv); + if (err) + return err; + mlxbf_gige_cache_stats(priv); + err = mlxbf_gige_clean_port(priv); + if (err) + goto free_irqs; + + /* Clear driver's valid_polarity to match hardware, + * since the above call to clean_port() resets the + * receive polarity used by hardware. + */ + priv->valid_polarity = 0; + + phy_start(phydev); + + err = mlxbf_gige_tx_init(priv); + if (err) + goto free_irqs; + err = mlxbf_gige_rx_init(priv); + if (err) + goto tx_deinit; + + netif_napi_add(netdev, &priv->napi, mlxbf_gige_poll); + napi_enable(&priv->napi); + netif_start_queue(netdev); + + /* Set bits in INT_EN that we care about */ + int_en = MLXBF_GIGE_INT_EN_HW_ACCESS_ERROR | + MLXBF_GIGE_INT_EN_TX_CHECKSUM_INPUTS | + MLXBF_GIGE_INT_EN_TX_SMALL_FRAME_SIZE | + MLXBF_GIGE_INT_EN_TX_PI_CI_EXCEED_WQ_SIZE | + MLXBF_GIGE_INT_EN_SW_CONFIG_ERROR | + MLXBF_GIGE_INT_EN_SW_ACCESS_ERROR | + MLXBF_GIGE_INT_EN_RX_RECEIVE_PACKET; + + /* Ensure completion of all initialization before enabling interrupts */ + mb(); + + writeq(int_en, priv->base + MLXBF_GIGE_INT_EN); + + return 0; + +tx_deinit: + mlxbf_gige_tx_deinit(priv); + +free_irqs: + mlxbf_gige_free_irqs(priv); + return err; +} + +static int mlxbf_gige_stop(struct net_device *netdev) +{ + struct mlxbf_gige *priv = netdev_priv(netdev); + + writeq(0, priv->base + MLXBF_GIGE_INT_EN); + netif_stop_queue(netdev); + napi_disable(&priv->napi); + netif_napi_del(&priv->napi); + mlxbf_gige_free_irqs(priv); + + phy_stop(netdev->phydev); + + mlxbf_gige_rx_deinit(priv); + mlxbf_gige_tx_deinit(priv); + mlxbf_gige_cache_stats(priv); + mlxbf_gige_clean_port(priv); + + return 0; +} + +static int mlxbf_gige_eth_ioctl(struct net_device *netdev, + struct ifreq *ifr, int cmd) +{ + if (!(netif_running(netdev))) + return -EINVAL; + + return phy_mii_ioctl(netdev->phydev, ifr, cmd); +} + +static void mlxbf_gige_set_rx_mode(struct net_device *netdev) +{ + struct mlxbf_gige *priv = netdev_priv(netdev); + bool new_promisc_enabled; + + new_promisc_enabled = netdev->flags & IFF_PROMISC; + + /* Only write to the hardware registers if the new setting + * of promiscuous mode is different from the current one. + */ + if (new_promisc_enabled != priv->promisc_enabled) { + priv->promisc_enabled = new_promisc_enabled; + + if (new_promisc_enabled) + mlxbf_gige_enable_promisc(priv); + else + mlxbf_gige_disable_promisc(priv); + } +} + +static void mlxbf_gige_get_stats64(struct net_device *netdev, + struct rtnl_link_stats64 *stats) +{ + struct mlxbf_gige *priv = netdev_priv(netdev); + + netdev_stats_to_stats64(stats, &netdev->stats); + + stats->rx_length_errors = priv->stats.rx_truncate_errors; + stats->rx_fifo_errors = priv->stats.rx_din_dropped_pkts + + readq(priv->base + MLXBF_GIGE_RX_DIN_DROP_COUNTER); + stats->rx_crc_errors = priv->stats.rx_mac_errors; + stats->rx_errors = stats->rx_length_errors + + stats->rx_fifo_errors + + stats->rx_crc_errors; + + stats->tx_fifo_errors = priv->stats.tx_fifo_full; + stats->tx_errors = stats->tx_fifo_errors; +} + +static const struct net_device_ops mlxbf_gige_netdev_ops = { + .ndo_open = mlxbf_gige_open, + .ndo_stop = mlxbf_gige_stop, + .ndo_start_xmit = mlxbf_gige_start_xmit, + .ndo_set_mac_address = eth_mac_addr, + .ndo_validate_addr = eth_validate_addr, + .ndo_eth_ioctl = mlxbf_gige_eth_ioctl, + .ndo_set_rx_mode = mlxbf_gige_set_rx_mode, + .ndo_get_stats64 = mlxbf_gige_get_stats64, +}; + +static void mlxbf_gige_adjust_link(struct net_device *netdev) +{ + struct phy_device *phydev = netdev->phydev; + + phy_print_status(phydev); +} + +static int mlxbf_gige_probe(struct platform_device *pdev) +{ + struct phy_device *phydev; + struct net_device *netdev; + struct mlxbf_gige *priv; + void __iomem *llu_base; + void __iomem *plu_base; + void __iomem *base; + int addr, phy_irq; + int err; + + base = devm_platform_ioremap_resource(pdev, MLXBF_GIGE_RES_MAC); + if (IS_ERR(base)) + return PTR_ERR(base); + + llu_base = devm_platform_ioremap_resource(pdev, MLXBF_GIGE_RES_LLU); + if (IS_ERR(llu_base)) + return PTR_ERR(llu_base); + + plu_base = devm_platform_ioremap_resource(pdev, MLXBF_GIGE_RES_PLU); + if (IS_ERR(plu_base)) + return PTR_ERR(plu_base); + + netdev = devm_alloc_etherdev(&pdev->dev, sizeof(*priv)); + if (!netdev) + return -ENOMEM; + + SET_NETDEV_DEV(netdev, &pdev->dev); + netdev->netdev_ops = &mlxbf_gige_netdev_ops; + netdev->ethtool_ops = &mlxbf_gige_ethtool_ops; + priv = netdev_priv(netdev); + priv->netdev = netdev; + + platform_set_drvdata(pdev, priv); + priv->dev = &pdev->dev; + priv->pdev = pdev; + + spin_lock_init(&priv->lock); + + /* Attach MDIO device */ + err = mlxbf_gige_mdio_probe(pdev, priv); + if (err) + return err; + + priv->base = base; + priv->llu_base = llu_base; + priv->plu_base = plu_base; + + priv->rx_q_entries = MLXBF_GIGE_DEFAULT_RXQ_SZ; + priv->tx_q_entries = MLXBF_GIGE_DEFAULT_TXQ_SZ; + + /* Write initial MAC address to hardware */ + mlxbf_gige_initial_mac(priv); + + err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); + if (err) { + dev_err(&pdev->dev, "DMA configuration failed: 0x%x\n", err); + goto out; + } + + priv->error_irq = platform_get_irq(pdev, MLXBF_GIGE_ERROR_INTR_IDX); + priv->rx_irq = platform_get_irq(pdev, MLXBF_GIGE_RECEIVE_PKT_INTR_IDX); + priv->llu_plu_irq = platform_get_irq(pdev, MLXBF_GIGE_LLU_PLU_INTR_IDX); + + phy_irq = acpi_dev_gpio_irq_get_by(ACPI_COMPANION(&pdev->dev), "phy-gpios", 0); + if (phy_irq < 0) { + dev_err(&pdev->dev, "Error getting PHY irq. Use polling instead"); + phy_irq = PHY_POLL; + } + + phydev = phy_find_first(priv->mdiobus); + if (!phydev) { + err = -ENODEV; + goto out; + } + + addr = phydev->mdio.addr; + priv->mdiobus->irq[addr] = phy_irq; + phydev->irq = phy_irq; + + err = phy_connect_direct(netdev, phydev, + mlxbf_gige_adjust_link, + PHY_INTERFACE_MODE_GMII); + if (err) { + dev_err(&pdev->dev, "Could not attach to PHY\n"); + goto out; + } + + /* MAC only supports 1000T full duplex mode */ + phy_remove_link_mode(phydev, ETHTOOL_LINK_MODE_1000baseT_Half_BIT); + phy_remove_link_mode(phydev, ETHTOOL_LINK_MODE_100baseT_Full_BIT); + phy_remove_link_mode(phydev, ETHTOOL_LINK_MODE_100baseT_Half_BIT); + phy_remove_link_mode(phydev, ETHTOOL_LINK_MODE_10baseT_Full_BIT); + phy_remove_link_mode(phydev, ETHTOOL_LINK_MODE_10baseT_Half_BIT); + + /* Only symmetric pause with flow control enabled is supported so no + * need to negotiate pause. + */ + linkmode_clear_bit(ETHTOOL_LINK_MODE_Pause_BIT, phydev->advertising); + linkmode_clear_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, phydev->advertising); + + /* Display information about attached PHY device */ + phy_attached_info(phydev); + + err = register_netdev(netdev); + if (err) { + dev_err(&pdev->dev, "Failed to register netdev\n"); + phy_disconnect(phydev); + goto out; + } + + return 0; + +out: + mlxbf_gige_mdio_remove(priv); + return err; +} + +static int mlxbf_gige_remove(struct platform_device *pdev) +{ + struct mlxbf_gige *priv = platform_get_drvdata(pdev); + + unregister_netdev(priv->netdev); + phy_disconnect(priv->netdev->phydev); + mlxbf_gige_mdio_remove(priv); + platform_set_drvdata(pdev, NULL); + + return 0; +} + +static void mlxbf_gige_shutdown(struct platform_device *pdev) +{ + struct mlxbf_gige *priv = platform_get_drvdata(pdev); + + writeq(0, priv->base + MLXBF_GIGE_INT_EN); + mlxbf_gige_clean_port(priv); +} + +static const struct acpi_device_id __maybe_unused mlxbf_gige_acpi_match[] = { + { "MLNXBF17", 0 }, + {}, +}; +MODULE_DEVICE_TABLE(acpi, mlxbf_gige_acpi_match); + +static struct platform_driver mlxbf_gige_driver = { + .probe = mlxbf_gige_probe, + .remove = mlxbf_gige_remove, + .shutdown = mlxbf_gige_shutdown, + .driver = { + .name = KBUILD_MODNAME, + .acpi_match_table = ACPI_PTR(mlxbf_gige_acpi_match), + }, +}; + +module_platform_driver(mlxbf_gige_driver); + +MODULE_DESCRIPTION("Mellanox BlueField SoC Gigabit Ethernet Driver"); +MODULE_AUTHOR("David Thompson <davthompson@nvidia.com>"); +MODULE_AUTHOR("Asmaa Mnebhi <asmaa@nvidia.com>"); +MODULE_LICENSE("Dual BSD/GPL"); diff --git a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_mdio.c b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_mdio.c new file mode 100644 index 000000000..aa780b161 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_mdio.c @@ -0,0 +1,278 @@ +// SPDX-License-Identifier: GPL-2.0-only OR BSD-3-Clause + +/* MDIO support for Mellanox Gigabit Ethernet driver + * + * Copyright (C) 2020-2021 NVIDIA CORPORATION & AFFILIATES + */ + +#include <linux/acpi.h> +#include <linux/bitfield.h> +#include <linux/delay.h> +#include <linux/device.h> +#include <linux/err.h> +#include <linux/io.h> +#include <linux/iopoll.h> +#include <linux/ioport.h> +#include <linux/irqreturn.h> +#include <linux/jiffies.h> +#include <linux/module.h> +#include <linux/mod_devicetable.h> +#include <linux/phy.h> +#include <linux/platform_device.h> +#include <linux/property.h> + +#include "mlxbf_gige.h" +#include "mlxbf_gige_regs.h" + +#define MLXBF_GIGE_MDIO_GW_OFFSET 0x0 +#define MLXBF_GIGE_MDIO_CFG_OFFSET 0x4 + +#define MLXBF_GIGE_MDIO_FREQ_REFERENCE 156250000ULL +#define MLXBF_GIGE_MDIO_COREPLL_CONST 16384ULL +#define MLXBF_GIGE_MDC_CLK_NS 400 +#define MLXBF_GIGE_MDIO_PLL_I1CLK_REG1 0x4 +#define MLXBF_GIGE_MDIO_PLL_I1CLK_REG2 0x8 +#define MLXBF_GIGE_MDIO_CORE_F_SHIFT 0 +#define MLXBF_GIGE_MDIO_CORE_F_MASK GENMASK(25, 0) +#define MLXBF_GIGE_MDIO_CORE_R_SHIFT 26 +#define MLXBF_GIGE_MDIO_CORE_R_MASK GENMASK(31, 26) +#define MLXBF_GIGE_MDIO_CORE_OD_SHIFT 0 +#define MLXBF_GIGE_MDIO_CORE_OD_MASK GENMASK(3, 0) + +/* Support clause 22 */ +#define MLXBF_GIGE_MDIO_CL22_ST1 0x1 +#define MLXBF_GIGE_MDIO_CL22_WRITE 0x1 +#define MLXBF_GIGE_MDIO_CL22_READ 0x2 + +/* Busy bit is set by software and cleared by hardware */ +#define MLXBF_GIGE_MDIO_SET_BUSY 0x1 + +/* MDIO GW register bits */ +#define MLXBF_GIGE_MDIO_GW_AD_MASK GENMASK(15, 0) +#define MLXBF_GIGE_MDIO_GW_DEVAD_MASK GENMASK(20, 16) +#define MLXBF_GIGE_MDIO_GW_PARTAD_MASK GENMASK(25, 21) +#define MLXBF_GIGE_MDIO_GW_OPCODE_MASK GENMASK(27, 26) +#define MLXBF_GIGE_MDIO_GW_ST1_MASK GENMASK(28, 28) +#define MLXBF_GIGE_MDIO_GW_BUSY_MASK GENMASK(30, 30) + +/* MDIO config register bits */ +#define MLXBF_GIGE_MDIO_CFG_MDIO_MODE_MASK GENMASK(1, 0) +#define MLXBF_GIGE_MDIO_CFG_MDIO3_3_MASK GENMASK(2, 2) +#define MLXBF_GIGE_MDIO_CFG_MDIO_FULL_DRIVE_MASK GENMASK(4, 4) +#define MLXBF_GIGE_MDIO_CFG_MDC_PERIOD_MASK GENMASK(15, 8) +#define MLXBF_GIGE_MDIO_CFG_MDIO_IN_SAMP_MASK GENMASK(23, 16) +#define MLXBF_GIGE_MDIO_CFG_MDIO_OUT_SAMP_MASK GENMASK(31, 24) + +#define MLXBF_GIGE_MDIO_CFG_VAL (FIELD_PREP(MLXBF_GIGE_MDIO_CFG_MDIO_MODE_MASK, 1) | \ + FIELD_PREP(MLXBF_GIGE_MDIO_CFG_MDIO3_3_MASK, 1) | \ + FIELD_PREP(MLXBF_GIGE_MDIO_CFG_MDIO_FULL_DRIVE_MASK, 1) | \ + FIELD_PREP(MLXBF_GIGE_MDIO_CFG_MDIO_IN_SAMP_MASK, 6) | \ + FIELD_PREP(MLXBF_GIGE_MDIO_CFG_MDIO_OUT_SAMP_MASK, 13)) + +#define MLXBF_GIGE_BF2_COREPLL_ADDR 0x02800c30 +#define MLXBF_GIGE_BF2_COREPLL_SIZE 0x0000000c + +static struct resource corepll_params[] = { + [MLXBF_GIGE_VERSION_BF2] = { + .start = MLXBF_GIGE_BF2_COREPLL_ADDR, + .end = MLXBF_GIGE_BF2_COREPLL_ADDR + MLXBF_GIGE_BF2_COREPLL_SIZE - 1, + .name = "COREPLL_RES" + }, +}; + +/* Returns core clock i1clk in Hz */ +static u64 calculate_i1clk(struct mlxbf_gige *priv) +{ + u8 core_od, core_r; + u64 freq_output; + u32 reg1, reg2; + u32 core_f; + + reg1 = readl(priv->clk_io + MLXBF_GIGE_MDIO_PLL_I1CLK_REG1); + reg2 = readl(priv->clk_io + MLXBF_GIGE_MDIO_PLL_I1CLK_REG2); + + core_f = (reg1 & MLXBF_GIGE_MDIO_CORE_F_MASK) >> + MLXBF_GIGE_MDIO_CORE_F_SHIFT; + core_r = (reg1 & MLXBF_GIGE_MDIO_CORE_R_MASK) >> + MLXBF_GIGE_MDIO_CORE_R_SHIFT; + core_od = (reg2 & MLXBF_GIGE_MDIO_CORE_OD_MASK) >> + MLXBF_GIGE_MDIO_CORE_OD_SHIFT; + + /* Compute PLL output frequency as follow: + * + * CORE_F / 16384 + * freq_output = freq_reference * ---------------------------- + * (CORE_R + 1) * (CORE_OD + 1) + */ + freq_output = div_u64((MLXBF_GIGE_MDIO_FREQ_REFERENCE * core_f), + MLXBF_GIGE_MDIO_COREPLL_CONST); + freq_output = div_u64(freq_output, (core_r + 1) * (core_od + 1)); + + return freq_output; +} + +/* Formula for encoding the MDIO period. The encoded value is + * passed to the MDIO config register. + * + * mdc_clk = 2*(val + 1)*(core clock in sec) + * + * i1clk is in Hz: + * 400 ns = 2*(val + 1)*(1/i1clk) + * + * val = (((400/10^9) / (1/i1clk) / 2) - 1) + * val = (400/2 * i1clk)/10^9 - 1 + */ +static u8 mdio_period_map(struct mlxbf_gige *priv) +{ + u8 mdio_period; + u64 i1clk; + + i1clk = calculate_i1clk(priv); + + mdio_period = div_u64((MLXBF_GIGE_MDC_CLK_NS >> 1) * i1clk, 1000000000) - 1; + + return mdio_period; +} + +static u32 mlxbf_gige_mdio_create_cmd(u16 data, int phy_add, + int phy_reg, u32 opcode) +{ + u32 gw_reg = 0; + + gw_reg |= FIELD_PREP(MLXBF_GIGE_MDIO_GW_AD_MASK, data); + gw_reg |= FIELD_PREP(MLXBF_GIGE_MDIO_GW_DEVAD_MASK, phy_reg); + gw_reg |= FIELD_PREP(MLXBF_GIGE_MDIO_GW_PARTAD_MASK, phy_add); + gw_reg |= FIELD_PREP(MLXBF_GIGE_MDIO_GW_OPCODE_MASK, opcode); + gw_reg |= FIELD_PREP(MLXBF_GIGE_MDIO_GW_ST1_MASK, + MLXBF_GIGE_MDIO_CL22_ST1); + gw_reg |= FIELD_PREP(MLXBF_GIGE_MDIO_GW_BUSY_MASK, + MLXBF_GIGE_MDIO_SET_BUSY); + + return gw_reg; +} + +static int mlxbf_gige_mdio_read(struct mii_bus *bus, int phy_add, int phy_reg) +{ + struct mlxbf_gige *priv = bus->priv; + u32 cmd; + int ret; + u32 val; + + if (phy_reg & MII_ADDR_C45) + return -EOPNOTSUPP; + + /* Send mdio read request */ + cmd = mlxbf_gige_mdio_create_cmd(0, phy_add, phy_reg, MLXBF_GIGE_MDIO_CL22_READ); + + writel(cmd, priv->mdio_io + MLXBF_GIGE_MDIO_GW_OFFSET); + + ret = readl_poll_timeout_atomic(priv->mdio_io + MLXBF_GIGE_MDIO_GW_OFFSET, + val, !(val & MLXBF_GIGE_MDIO_GW_BUSY_MASK), + 5, 1000000); + + if (ret) { + writel(0, priv->mdio_io + MLXBF_GIGE_MDIO_GW_OFFSET); + return ret; + } + + ret = readl(priv->mdio_io + MLXBF_GIGE_MDIO_GW_OFFSET); + /* Only return ad bits of the gw register */ + ret &= MLXBF_GIGE_MDIO_GW_AD_MASK; + + /* The MDIO lock is set on read. To release it, clear gw register */ + writel(0, priv->mdio_io + MLXBF_GIGE_MDIO_GW_OFFSET); + + return ret; +} + +static int mlxbf_gige_mdio_write(struct mii_bus *bus, int phy_add, + int phy_reg, u16 val) +{ + struct mlxbf_gige *priv = bus->priv; + u32 temp; + u32 cmd; + int ret; + + if (phy_reg & MII_ADDR_C45) + return -EOPNOTSUPP; + + /* Send mdio write request */ + cmd = mlxbf_gige_mdio_create_cmd(val, phy_add, phy_reg, + MLXBF_GIGE_MDIO_CL22_WRITE); + writel(cmd, priv->mdio_io + MLXBF_GIGE_MDIO_GW_OFFSET); + + /* If the poll timed out, drop the request */ + ret = readl_poll_timeout_atomic(priv->mdio_io + MLXBF_GIGE_MDIO_GW_OFFSET, + temp, !(temp & MLXBF_GIGE_MDIO_GW_BUSY_MASK), + 5, 1000000); + + /* The MDIO lock is set on read. To release it, clear gw register */ + writel(0, priv->mdio_io + MLXBF_GIGE_MDIO_GW_OFFSET); + + return ret; +} + +static void mlxbf_gige_mdio_cfg(struct mlxbf_gige *priv) +{ + u8 mdio_period; + u32 val; + + mdio_period = mdio_period_map(priv); + + val = MLXBF_GIGE_MDIO_CFG_VAL; + val |= FIELD_PREP(MLXBF_GIGE_MDIO_CFG_MDC_PERIOD_MASK, mdio_period); + writel(val, priv->mdio_io + MLXBF_GIGE_MDIO_CFG_OFFSET); +} + +int mlxbf_gige_mdio_probe(struct platform_device *pdev, struct mlxbf_gige *priv) +{ + struct device *dev = &pdev->dev; + struct resource *res; + int ret; + + priv->mdio_io = devm_platform_ioremap_resource(pdev, MLXBF_GIGE_RES_MDIO9); + if (IS_ERR(priv->mdio_io)) + return PTR_ERR(priv->mdio_io); + + /* clk resource shared with other drivers so cannot use + * devm_platform_ioremap_resource + */ + res = platform_get_resource(pdev, IORESOURCE_MEM, MLXBF_GIGE_RES_CLK); + if (!res) { + /* For backward compatibility with older ACPI tables, also keep + * CLK resource internal to the driver. + */ + res = &corepll_params[MLXBF_GIGE_VERSION_BF2]; + } + + priv->clk_io = devm_ioremap(dev, res->start, resource_size(res)); + if (!priv->clk_io) + return -ENOMEM; + + mlxbf_gige_mdio_cfg(priv); + + priv->mdiobus = devm_mdiobus_alloc(dev); + if (!priv->mdiobus) { + dev_err(dev, "Failed to alloc MDIO bus\n"); + return -ENOMEM; + } + + priv->mdiobus->name = "mlxbf-mdio"; + priv->mdiobus->read = mlxbf_gige_mdio_read; + priv->mdiobus->write = mlxbf_gige_mdio_write; + priv->mdiobus->parent = dev; + priv->mdiobus->priv = priv; + snprintf(priv->mdiobus->id, MII_BUS_ID_SIZE, "%s", + dev_name(dev)); + + ret = mdiobus_register(priv->mdiobus); + if (ret) + dev_err(dev, "Failed to register MDIO bus\n"); + + return ret; +} + +void mlxbf_gige_mdio_remove(struct mlxbf_gige *priv) +{ + mdiobus_unregister(priv->mdiobus); +} diff --git a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_regs.h b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_regs.h new file mode 100644 index 000000000..7be3a7939 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_regs.h @@ -0,0 +1,80 @@ +/* SPDX-License-Identifier: GPL-2.0-only OR BSD-3-Clause */ + +/* Header file for Mellanox BlueField GigE register defines + * + * Copyright (C) 2020-2021 NVIDIA CORPORATION & AFFILIATES + */ + +#ifndef __MLXBF_GIGE_REGS_H__ +#define __MLXBF_GIGE_REGS_H__ + +#define MLXBF_GIGE_VERSION 0x0000 +#define MLXBF_GIGE_VERSION_BF2 0x0 +#define MLXBF_GIGE_STATUS 0x0010 +#define MLXBF_GIGE_STATUS_READY BIT(0) +#define MLXBF_GIGE_INT_STATUS 0x0028 +#define MLXBF_GIGE_INT_STATUS_RX_RECEIVE_PACKET BIT(0) +#define MLXBF_GIGE_INT_STATUS_RX_MAC_ERROR BIT(1) +#define MLXBF_GIGE_INT_STATUS_RX_TRN_ERROR BIT(2) +#define MLXBF_GIGE_INT_STATUS_SW_ACCESS_ERROR BIT(3) +#define MLXBF_GIGE_INT_STATUS_SW_CONFIG_ERROR BIT(4) +#define MLXBF_GIGE_INT_STATUS_TX_PI_CI_EXCEED_WQ_SIZE BIT(5) +#define MLXBF_GIGE_INT_STATUS_TX_SMALL_FRAME_SIZE BIT(6) +#define MLXBF_GIGE_INT_STATUS_TX_CHECKSUM_INPUTS BIT(7) +#define MLXBF_GIGE_INT_STATUS_HW_ACCESS_ERROR BIT(8) +#define MLXBF_GIGE_INT_EN 0x0030 +#define MLXBF_GIGE_INT_EN_RX_RECEIVE_PACKET BIT(0) +#define MLXBF_GIGE_INT_EN_RX_MAC_ERROR BIT(1) +#define MLXBF_GIGE_INT_EN_RX_TRN_ERROR BIT(2) +#define MLXBF_GIGE_INT_EN_SW_ACCESS_ERROR BIT(3) +#define MLXBF_GIGE_INT_EN_SW_CONFIG_ERROR BIT(4) +#define MLXBF_GIGE_INT_EN_TX_PI_CI_EXCEED_WQ_SIZE BIT(5) +#define MLXBF_GIGE_INT_EN_TX_SMALL_FRAME_SIZE BIT(6) +#define MLXBF_GIGE_INT_EN_TX_CHECKSUM_INPUTS BIT(7) +#define MLXBF_GIGE_INT_EN_HW_ACCESS_ERROR BIT(8) +#define MLXBF_GIGE_INT_MASK 0x0038 +#define MLXBF_GIGE_INT_MASK_RX_RECEIVE_PACKET BIT(0) +#define MLXBF_GIGE_CONTROL 0x0040 +#define MLXBF_GIGE_CONTROL_PORT_EN BIT(0) +#define MLXBF_GIGE_CONTROL_MAC_ID_RANGE_EN BIT(1) +#define MLXBF_GIGE_CONTROL_EN_SPECIFIC_MAC BIT(4) +#define MLXBF_GIGE_CONTROL_CLEAN_PORT_EN BIT(31) +#define MLXBF_GIGE_RX_WQ_BASE 0x0200 +#define MLXBF_GIGE_RX_WQE_SIZE_LOG2 0x0208 +#define MLXBF_GIGE_RX_WQE_SIZE_LOG2_RESET_VAL 7 +#define MLXBF_GIGE_RX_CQ_BASE 0x0210 +#define MLXBF_GIGE_TX_WQ_BASE 0x0218 +#define MLXBF_GIGE_TX_WQ_SIZE_LOG2 0x0220 +#define MLXBF_GIGE_TX_WQ_SIZE_LOG2_RESET_VAL 7 +#define MLXBF_GIGE_TX_CI_UPDATE_ADDRESS 0x0228 +#define MLXBF_GIGE_RX_WQE_PI 0x0230 +#define MLXBF_GIGE_TX_PRODUCER_INDEX 0x0238 +#define MLXBF_GIGE_RX_MAC_FILTER 0x0240 +#define MLXBF_GIGE_RX_MAC_FILTER_STRIDE 0x0008 +#define MLXBF_GIGE_RX_DIN_DROP_COUNTER 0x0260 +#define MLXBF_GIGE_TX_CONSUMER_INDEX 0x0310 +#define MLXBF_GIGE_TX_CONTROL 0x0318 +#define MLXBF_GIGE_TX_CONTROL_GRACEFUL_STOP BIT(0) +#define MLXBF_GIGE_TX_STATUS 0x0388 +#define MLXBF_GIGE_TX_STATUS_DATA_FIFO_FULL BIT(1) +#define MLXBF_GIGE_RX_MAC_FILTER_DMAC_RANGE_START 0x0520 +#define MLXBF_GIGE_RX_MAC_FILTER_DMAC_RANGE_END 0x0528 +#define MLXBF_GIGE_RX_MAC_FILTER_COUNT_DISC 0x0540 +#define MLXBF_GIGE_RX_MAC_FILTER_COUNT_DISC_EN BIT(0) +#define MLXBF_GIGE_RX_MAC_FILTER_COUNT_PASS 0x0548 +#define MLXBF_GIGE_RX_MAC_FILTER_COUNT_PASS_EN BIT(0) +#define MLXBF_GIGE_RX_PASS_COUNTER_ALL 0x0550 +#define MLXBF_GIGE_RX_DISC_COUNTER_ALL 0x0560 +#define MLXBF_GIGE_RX 0x0578 +#define MLXBF_GIGE_RX_STRIP_CRC_EN BIT(1) +#define MLXBF_GIGE_RX_DMA 0x0580 +#define MLXBF_GIGE_RX_DMA_EN BIT(0) +#define MLXBF_GIGE_RX_CQE_PACKET_CI 0x05b0 +#define MLXBF_GIGE_MAC_CFG 0x05e8 + +/* NOTE: MLXBF_GIGE_MAC_CFG is the last defined register offset, + * so use that plus size of single register to derive total size + */ +#define MLXBF_GIGE_MMIO_REG_SZ (MLXBF_GIGE_MAC_CFG + 8) + +#endif /* !defined(__MLXBF_GIGE_REGS_H__) */ diff --git a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_rx.c b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_rx.c new file mode 100644 index 000000000..699984358 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_rx.c @@ -0,0 +1,326 @@ +// SPDX-License-Identifier: GPL-2.0-only OR BSD-3-Clause + +/* Packet receive logic for Mellanox Gigabit Ethernet driver + * + * Copyright (C) 2020-2021 NVIDIA CORPORATION & AFFILIATES + */ + +#include <linux/etherdevice.h> +#include <linux/skbuff.h> + +#include "mlxbf_gige.h" +#include "mlxbf_gige_regs.h" + +void mlxbf_gige_set_mac_rx_filter(struct mlxbf_gige *priv, + unsigned int index, u64 dmac) +{ + void __iomem *base = priv->base; + u64 control; + + /* Write destination MAC to specified MAC RX filter */ + writeq(dmac, base + MLXBF_GIGE_RX_MAC_FILTER + + (index * MLXBF_GIGE_RX_MAC_FILTER_STRIDE)); + + /* Enable MAC receive filter mask for specified index */ + control = readq(base + MLXBF_GIGE_CONTROL); + control |= (MLXBF_GIGE_CONTROL_EN_SPECIFIC_MAC << index); + writeq(control, base + MLXBF_GIGE_CONTROL); +} + +void mlxbf_gige_get_mac_rx_filter(struct mlxbf_gige *priv, + unsigned int index, u64 *dmac) +{ + void __iomem *base = priv->base; + + /* Read destination MAC from specified MAC RX filter */ + *dmac = readq(base + MLXBF_GIGE_RX_MAC_FILTER + + (index * MLXBF_GIGE_RX_MAC_FILTER_STRIDE)); +} + +void mlxbf_gige_enable_promisc(struct mlxbf_gige *priv) +{ + void __iomem *base = priv->base; + u64 control; + u64 end_mac; + + /* Enable MAC_ID_RANGE match functionality */ + control = readq(base + MLXBF_GIGE_CONTROL); + control |= MLXBF_GIGE_CONTROL_MAC_ID_RANGE_EN; + writeq(control, base + MLXBF_GIGE_CONTROL); + + /* Set start of destination MAC range check to 0 */ + writeq(0, base + MLXBF_GIGE_RX_MAC_FILTER_DMAC_RANGE_START); + + /* Set end of destination MAC range check to all FFs */ + end_mac = BCAST_MAC_ADDR; + writeq(end_mac, base + MLXBF_GIGE_RX_MAC_FILTER_DMAC_RANGE_END); +} + +void mlxbf_gige_disable_promisc(struct mlxbf_gige *priv) +{ + void __iomem *base = priv->base; + u64 control; + + /* Disable MAC_ID_RANGE match functionality */ + control = readq(base + MLXBF_GIGE_CONTROL); + control &= ~MLXBF_GIGE_CONTROL_MAC_ID_RANGE_EN; + writeq(control, base + MLXBF_GIGE_CONTROL); + + /* NOTE: no need to change DMAC_RANGE_START or END; + * those values are ignored since MAC_ID_RANGE_EN=0 + */ +} + +/* Receive Initialization + * 1) Configures RX MAC filters via MMIO registers + * 2) Allocates RX WQE array using coherent DMA mapping + * 3) Initializes each element of RX WQE array with a receive + * buffer pointer (also using coherent DMA mapping) + * 4) Allocates RX CQE array using coherent DMA mapping + * 5) Completes other misc receive initialization + */ +int mlxbf_gige_rx_init(struct mlxbf_gige *priv) +{ + size_t wq_size, cq_size; + dma_addr_t *rx_wqe_ptr; + dma_addr_t rx_buf_dma; + u64 data; + int i, j; + + /* Configure MAC RX filter #0 to allow RX of broadcast pkts */ + mlxbf_gige_set_mac_rx_filter(priv, MLXBF_GIGE_BCAST_MAC_FILTER_IDX, + BCAST_MAC_ADDR); + + wq_size = MLXBF_GIGE_RX_WQE_SZ * priv->rx_q_entries; + priv->rx_wqe_base = dma_alloc_coherent(priv->dev, wq_size, + &priv->rx_wqe_base_dma, + GFP_KERNEL); + if (!priv->rx_wqe_base) + return -ENOMEM; + + /* Initialize 'rx_wqe_ptr' to point to first RX WQE in array + * Each RX WQE is simply a receive buffer pointer, so walk + * the entire array, allocating a 2KB buffer for each element + */ + rx_wqe_ptr = priv->rx_wqe_base; + + for (i = 0; i < priv->rx_q_entries; i++) { + priv->rx_skb[i] = mlxbf_gige_alloc_skb(priv, MLXBF_GIGE_DEFAULT_BUF_SZ, + &rx_buf_dma, DMA_FROM_DEVICE); + if (!priv->rx_skb[i]) + goto free_wqe_and_skb; + *rx_wqe_ptr++ = rx_buf_dma; + } + + /* Write RX WQE base address into MMIO reg */ + writeq(priv->rx_wqe_base_dma, priv->base + MLXBF_GIGE_RX_WQ_BASE); + + cq_size = MLXBF_GIGE_RX_CQE_SZ * priv->rx_q_entries; + priv->rx_cqe_base = dma_alloc_coherent(priv->dev, cq_size, + &priv->rx_cqe_base_dma, + GFP_KERNEL); + if (!priv->rx_cqe_base) + goto free_wqe_and_skb; + + for (i = 0; i < priv->rx_q_entries; i++) + priv->rx_cqe_base[i] |= MLXBF_GIGE_RX_CQE_VALID_MASK; + + /* Write RX CQE base address into MMIO reg */ + writeq(priv->rx_cqe_base_dma, priv->base + MLXBF_GIGE_RX_CQ_BASE); + + /* Write RX_WQE_PI with current number of replenished buffers */ + writeq(priv->rx_q_entries, priv->base + MLXBF_GIGE_RX_WQE_PI); + + /* Enable removal of CRC during RX */ + data = readq(priv->base + MLXBF_GIGE_RX); + data |= MLXBF_GIGE_RX_STRIP_CRC_EN; + writeq(data, priv->base + MLXBF_GIGE_RX); + + /* Enable RX MAC filter pass and discard counters */ + writeq(MLXBF_GIGE_RX_MAC_FILTER_COUNT_DISC_EN, + priv->base + MLXBF_GIGE_RX_MAC_FILTER_COUNT_DISC); + writeq(MLXBF_GIGE_RX_MAC_FILTER_COUNT_PASS_EN, + priv->base + MLXBF_GIGE_RX_MAC_FILTER_COUNT_PASS); + + writeq(ilog2(priv->rx_q_entries), + priv->base + MLXBF_GIGE_RX_WQE_SIZE_LOG2); + + /* Clear MLXBF_GIGE_INT_MASK 'receive pkt' bit to + * indicate readiness to receive interrupts + */ + data = readq(priv->base + MLXBF_GIGE_INT_MASK); + data &= ~MLXBF_GIGE_INT_MASK_RX_RECEIVE_PACKET; + writeq(data, priv->base + MLXBF_GIGE_INT_MASK); + + /* Enable RX DMA to write new packets to memory */ + data = readq(priv->base + MLXBF_GIGE_RX_DMA); + data |= MLXBF_GIGE_RX_DMA_EN; + writeq(data, priv->base + MLXBF_GIGE_RX_DMA); + + return 0; + +free_wqe_and_skb: + rx_wqe_ptr = priv->rx_wqe_base; + for (j = 0; j < i; j++) { + dma_unmap_single(priv->dev, *rx_wqe_ptr, + MLXBF_GIGE_DEFAULT_BUF_SZ, DMA_FROM_DEVICE); + dev_kfree_skb(priv->rx_skb[j]); + rx_wqe_ptr++; + } + dma_free_coherent(priv->dev, wq_size, + priv->rx_wqe_base, priv->rx_wqe_base_dma); + return -ENOMEM; +} + +/* Receive Deinitialization + * This routine will free allocations done by mlxbf_gige_rx_init(), + * namely the RX WQE and RX CQE arrays, as well as all RX buffers + */ +void mlxbf_gige_rx_deinit(struct mlxbf_gige *priv) +{ + dma_addr_t *rx_wqe_ptr; + size_t size; + u64 data; + int i; + + /* Disable RX DMA to prevent packet transfers to memory */ + data = readq(priv->base + MLXBF_GIGE_RX_DMA); + data &= ~MLXBF_GIGE_RX_DMA_EN; + writeq(data, priv->base + MLXBF_GIGE_RX_DMA); + + rx_wqe_ptr = priv->rx_wqe_base; + + for (i = 0; i < priv->rx_q_entries; i++) { + dma_unmap_single(priv->dev, *rx_wqe_ptr, MLXBF_GIGE_DEFAULT_BUF_SZ, + DMA_FROM_DEVICE); + dev_kfree_skb(priv->rx_skb[i]); + rx_wqe_ptr++; + } + + size = MLXBF_GIGE_RX_WQE_SZ * priv->rx_q_entries; + dma_free_coherent(priv->dev, size, + priv->rx_wqe_base, priv->rx_wqe_base_dma); + + size = MLXBF_GIGE_RX_CQE_SZ * priv->rx_q_entries; + dma_free_coherent(priv->dev, size, + priv->rx_cqe_base, priv->rx_cqe_base_dma); + + priv->rx_wqe_base = NULL; + priv->rx_wqe_base_dma = 0; + priv->rx_cqe_base = NULL; + priv->rx_cqe_base_dma = 0; + writeq(0, priv->base + MLXBF_GIGE_RX_WQ_BASE); + writeq(0, priv->base + MLXBF_GIGE_RX_CQ_BASE); +} + +static bool mlxbf_gige_rx_packet(struct mlxbf_gige *priv, int *rx_pkts) +{ + struct net_device *netdev = priv->netdev; + struct sk_buff *skb = NULL, *rx_skb; + u16 rx_pi_rem, rx_ci_rem; + dma_addr_t *rx_wqe_addr; + dma_addr_t rx_buf_dma; + u64 *rx_cqe_addr; + u64 datalen; + u64 rx_cqe; + u16 rx_ci; + u16 rx_pi; + + /* Index into RX buffer array is rx_pi w/wrap based on RX_CQE_SIZE */ + rx_pi = readq(priv->base + MLXBF_GIGE_RX_WQE_PI); + rx_pi_rem = rx_pi % priv->rx_q_entries; + + rx_wqe_addr = priv->rx_wqe_base + rx_pi_rem; + rx_cqe_addr = priv->rx_cqe_base + rx_pi_rem; + rx_cqe = *rx_cqe_addr; + + if ((!!(rx_cqe & MLXBF_GIGE_RX_CQE_VALID_MASK)) != priv->valid_polarity) + return false; + + if ((rx_cqe & MLXBF_GIGE_RX_CQE_PKT_STATUS_MASK) == 0) { + /* Packet is OK, increment stats */ + datalen = rx_cqe & MLXBF_GIGE_RX_CQE_PKT_LEN_MASK; + netdev->stats.rx_packets++; + netdev->stats.rx_bytes += datalen; + + skb = priv->rx_skb[rx_pi_rem]; + + /* Alloc another RX SKB for this same index */ + rx_skb = mlxbf_gige_alloc_skb(priv, MLXBF_GIGE_DEFAULT_BUF_SZ, + &rx_buf_dma, DMA_FROM_DEVICE); + if (!rx_skb) + return false; + priv->rx_skb[rx_pi_rem] = rx_skb; + dma_unmap_single(priv->dev, *rx_wqe_addr, + MLXBF_GIGE_DEFAULT_BUF_SZ, DMA_FROM_DEVICE); + + skb_put(skb, datalen); + + skb->ip_summed = CHECKSUM_NONE; /* device did not checksum packet */ + + skb->protocol = eth_type_trans(skb, netdev); + + *rx_wqe_addr = rx_buf_dma; + } else if (rx_cqe & MLXBF_GIGE_RX_CQE_PKT_STATUS_MAC_ERR) { + priv->stats.rx_mac_errors++; + } else if (rx_cqe & MLXBF_GIGE_RX_CQE_PKT_STATUS_TRUNCATED) { + priv->stats.rx_truncate_errors++; + } + + /* Read receive consumer index before replenish so that this routine + * returns accurate return value even if packet is received into + * just-replenished buffer prior to exiting this routine. + */ + rx_ci = readq(priv->base + MLXBF_GIGE_RX_CQE_PACKET_CI); + rx_ci_rem = rx_ci % priv->rx_q_entries; + + /* Let hardware know we've replenished one buffer */ + rx_pi++; + + /* Ensure completion of all writes before notifying HW of replenish */ + wmb(); + writeq(rx_pi, priv->base + MLXBF_GIGE_RX_WQE_PI); + + (*rx_pkts)++; + + rx_pi_rem = rx_pi % priv->rx_q_entries; + if (rx_pi_rem == 0) + priv->valid_polarity ^= 1; + + if (skb) + netif_receive_skb(skb); + + return rx_pi_rem != rx_ci_rem; +} + +/* Driver poll() function called by NAPI infrastructure */ +int mlxbf_gige_poll(struct napi_struct *napi, int budget) +{ + struct mlxbf_gige *priv; + bool remaining_pkts; + int work_done = 0; + u64 data; + + priv = container_of(napi, struct mlxbf_gige, napi); + + mlxbf_gige_handle_tx_complete(priv); + + do { + remaining_pkts = mlxbf_gige_rx_packet(priv, &work_done); + } while (remaining_pkts && work_done < budget); + + /* If amount of work done < budget, turn off NAPI polling + * via napi_complete_done(napi, work_done) and then + * re-enable interrupts. + */ + if (work_done < budget && napi_complete_done(napi, work_done)) { + /* Clear MLXBF_GIGE_INT_MASK 'receive pkt' bit to + * indicate receive readiness + */ + data = readq(priv->base + MLXBF_GIGE_INT_MASK); + data &= ~MLXBF_GIGE_INT_MASK_RX_RECEIVE_PACKET; + writeq(data, priv->base + MLXBF_GIGE_INT_MASK); + } + + return work_done; +} diff --git a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_tx.c b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_tx.c new file mode 100644 index 000000000..04982e888 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_tx.c @@ -0,0 +1,284 @@ +// SPDX-License-Identifier: GPL-2.0-only OR BSD-3-Clause + +/* Packet transmit logic for Mellanox Gigabit Ethernet driver + * + * Copyright (C) 2020-2021 NVIDIA CORPORATION & AFFILIATES + */ + +#include <linux/skbuff.h> + +#include "mlxbf_gige.h" +#include "mlxbf_gige_regs.h" + +/* Transmit Initialization + * 1) Allocates TX WQE array using coherent DMA mapping + * 2) Allocates TX completion counter using coherent DMA mapping + */ +int mlxbf_gige_tx_init(struct mlxbf_gige *priv) +{ + size_t size; + + size = MLXBF_GIGE_TX_WQE_SZ * priv->tx_q_entries; + priv->tx_wqe_base = dma_alloc_coherent(priv->dev, size, + &priv->tx_wqe_base_dma, + GFP_KERNEL); + if (!priv->tx_wqe_base) + return -ENOMEM; + + priv->tx_wqe_next = priv->tx_wqe_base; + + /* Write TX WQE base address into MMIO reg */ + writeq(priv->tx_wqe_base_dma, priv->base + MLXBF_GIGE_TX_WQ_BASE); + + /* Allocate address for TX completion count */ + priv->tx_cc = dma_alloc_coherent(priv->dev, MLXBF_GIGE_TX_CC_SZ, + &priv->tx_cc_dma, GFP_KERNEL); + if (!priv->tx_cc) { + dma_free_coherent(priv->dev, size, + priv->tx_wqe_base, priv->tx_wqe_base_dma); + return -ENOMEM; + } + + /* Write TX CC base address into MMIO reg */ + writeq(priv->tx_cc_dma, priv->base + MLXBF_GIGE_TX_CI_UPDATE_ADDRESS); + + writeq(ilog2(priv->tx_q_entries), + priv->base + MLXBF_GIGE_TX_WQ_SIZE_LOG2); + + priv->prev_tx_ci = 0; + priv->tx_pi = 0; + + return 0; +} + +/* Transmit Deinitialization + * This routine will free allocations done by mlxbf_gige_tx_init(), + * namely the TX WQE array and the TX completion counter + */ +void mlxbf_gige_tx_deinit(struct mlxbf_gige *priv) +{ + u64 *tx_wqe_addr; + size_t size; + int i; + + tx_wqe_addr = priv->tx_wqe_base; + + for (i = 0; i < priv->tx_q_entries; i++) { + if (priv->tx_skb[i]) { + dma_unmap_single(priv->dev, *tx_wqe_addr, + priv->tx_skb[i]->len, DMA_TO_DEVICE); + dev_kfree_skb(priv->tx_skb[i]); + priv->tx_skb[i] = NULL; + } + tx_wqe_addr += 2; + } + + size = MLXBF_GIGE_TX_WQE_SZ * priv->tx_q_entries; + dma_free_coherent(priv->dev, size, + priv->tx_wqe_base, priv->tx_wqe_base_dma); + + dma_free_coherent(priv->dev, MLXBF_GIGE_TX_CC_SZ, + priv->tx_cc, priv->tx_cc_dma); + + priv->tx_wqe_base = NULL; + priv->tx_wqe_base_dma = 0; + priv->tx_cc = NULL; + priv->tx_cc_dma = 0; + priv->tx_wqe_next = NULL; + writeq(0, priv->base + MLXBF_GIGE_TX_WQ_BASE); + writeq(0, priv->base + MLXBF_GIGE_TX_CI_UPDATE_ADDRESS); +} + +/* Function that returns status of TX ring: + * 0: TX ring is full, i.e. there are no + * available un-used entries in TX ring. + * non-null: TX ring is not full, i.e. there are + * some available entries in TX ring. + * The non-null value is a measure of + * how many TX entries are available, but + * it is not the exact number of available + * entries (see below). + * + * The algorithm makes the assumption that if + * (prev_tx_ci == tx_pi) then the TX ring is empty. + * An empty ring actually has (tx_q_entries-1) + * entries, which allows the algorithm to differentiate + * the case of an empty ring vs. a full ring. + */ +static u16 mlxbf_gige_tx_buffs_avail(struct mlxbf_gige *priv) +{ + unsigned long flags; + u16 avail; + + spin_lock_irqsave(&priv->lock, flags); + + if (priv->prev_tx_ci == priv->tx_pi) + avail = priv->tx_q_entries - 1; + else + avail = ((priv->tx_q_entries + priv->prev_tx_ci - priv->tx_pi) + % priv->tx_q_entries) - 1; + + spin_unlock_irqrestore(&priv->lock, flags); + + return avail; +} + +bool mlxbf_gige_handle_tx_complete(struct mlxbf_gige *priv) +{ + struct net_device_stats *stats; + u16 tx_wqe_index; + u64 *tx_wqe_addr; + u64 tx_status; + u16 tx_ci; + + tx_status = readq(priv->base + MLXBF_GIGE_TX_STATUS); + if (tx_status & MLXBF_GIGE_TX_STATUS_DATA_FIFO_FULL) + priv->stats.tx_fifo_full++; + tx_ci = readq(priv->base + MLXBF_GIGE_TX_CONSUMER_INDEX); + stats = &priv->netdev->stats; + + /* Transmit completion logic needs to loop until the completion + * index (in SW) equals TX consumer index (from HW). These + * parameters are unsigned 16-bit values and the wrap case needs + * to be supported, that is TX consumer index wrapped from 0xFFFF + * to 0 while TX completion index is still < 0xFFFF. + */ + for (; priv->prev_tx_ci != tx_ci; priv->prev_tx_ci++) { + tx_wqe_index = priv->prev_tx_ci % priv->tx_q_entries; + /* Each TX WQE is 16 bytes. The 8 MSB store the 2KB TX + * buffer address and the 8 LSB contain information + * about the TX WQE. + */ + tx_wqe_addr = priv->tx_wqe_base + + (tx_wqe_index * MLXBF_GIGE_TX_WQE_SZ_QWORDS); + + stats->tx_packets++; + stats->tx_bytes += MLXBF_GIGE_TX_WQE_PKT_LEN(tx_wqe_addr); + + dma_unmap_single(priv->dev, *tx_wqe_addr, + priv->tx_skb[tx_wqe_index]->len, DMA_TO_DEVICE); + dev_consume_skb_any(priv->tx_skb[tx_wqe_index]); + priv->tx_skb[tx_wqe_index] = NULL; + + /* Ensure completion of updates across all cores */ + mb(); + } + + /* Since the TX ring was likely just drained, check if TX queue + * had previously been stopped and now that there are TX buffers + * available the TX queue can be awakened. + */ + if (netif_queue_stopped(priv->netdev) && + mlxbf_gige_tx_buffs_avail(priv)) + netif_wake_queue(priv->netdev); + + return true; +} + +/* Function to advance the tx_wqe_next pointer to next TX WQE */ +void mlxbf_gige_update_tx_wqe_next(struct mlxbf_gige *priv) +{ + /* Advance tx_wqe_next pointer */ + priv->tx_wqe_next += MLXBF_GIGE_TX_WQE_SZ_QWORDS; + + /* Check if 'next' pointer is beyond end of TX ring */ + /* If so, set 'next' back to 'base' pointer of ring */ + if (priv->tx_wqe_next == (priv->tx_wqe_base + + (priv->tx_q_entries * MLXBF_GIGE_TX_WQE_SZ_QWORDS))) + priv->tx_wqe_next = priv->tx_wqe_base; +} + +netdev_tx_t mlxbf_gige_start_xmit(struct sk_buff *skb, + struct net_device *netdev) +{ + struct mlxbf_gige *priv = netdev_priv(netdev); + long buff_addr, start_dma_page, end_dma_page; + struct sk_buff *tx_skb; + dma_addr_t tx_buf_dma; + unsigned long flags; + u64 *tx_wqe_addr; + u64 word2; + + /* If needed, linearize TX SKB as hardware DMA expects this */ + if (skb->len > MLXBF_GIGE_DEFAULT_BUF_SZ || skb_linearize(skb)) { + dev_kfree_skb(skb); + netdev->stats.tx_dropped++; + return NETDEV_TX_OK; + } + + buff_addr = (long)skb->data; + start_dma_page = buff_addr >> MLXBF_GIGE_DMA_PAGE_SHIFT; + end_dma_page = (buff_addr + skb->len - 1) >> MLXBF_GIGE_DMA_PAGE_SHIFT; + + /* Verify that payload pointer and data length of SKB to be + * transmitted does not violate the hardware DMA limitation. + */ + if (start_dma_page != end_dma_page) { + /* DMA operation would fail as-is, alloc new aligned SKB */ + tx_skb = mlxbf_gige_alloc_skb(priv, skb->len, + &tx_buf_dma, DMA_TO_DEVICE); + if (!tx_skb) { + /* Free original skb, could not alloc new aligned SKB */ + dev_kfree_skb(skb); + netdev->stats.tx_dropped++; + return NETDEV_TX_OK; + } + + skb_put_data(tx_skb, skb->data, skb->len); + + /* Free the original SKB */ + dev_kfree_skb(skb); + } else { + tx_skb = skb; + tx_buf_dma = dma_map_single(priv->dev, skb->data, + skb->len, DMA_TO_DEVICE); + if (dma_mapping_error(priv->dev, tx_buf_dma)) { + dev_kfree_skb(skb); + netdev->stats.tx_dropped++; + return NETDEV_TX_OK; + } + } + + /* Get address of TX WQE */ + tx_wqe_addr = priv->tx_wqe_next; + + mlxbf_gige_update_tx_wqe_next(priv); + + /* Put PA of buffer address into first 64-bit word of TX WQE */ + *tx_wqe_addr = tx_buf_dma; + + /* Set TX WQE pkt_len appropriately + * NOTE: GigE silicon will automatically pad up to + * minimum packet length if needed. + */ + word2 = tx_skb->len & MLXBF_GIGE_TX_WQE_PKT_LEN_MASK; + + /* Write entire 2nd word of TX WQE */ + *(tx_wqe_addr + 1) = word2; + + spin_lock_irqsave(&priv->lock, flags); + priv->tx_skb[priv->tx_pi % priv->tx_q_entries] = tx_skb; + priv->tx_pi++; + spin_unlock_irqrestore(&priv->lock, flags); + + if (!netdev_xmit_more()) { + /* Create memory barrier before write to TX PI */ + wmb(); + writeq(priv->tx_pi, priv->base + MLXBF_GIGE_TX_PRODUCER_INDEX); + } + + /* Check if the last TX entry was just used */ + if (!mlxbf_gige_tx_buffs_avail(priv)) { + /* TX ring is full, inform stack */ + netif_stop_queue(netdev); + + /* Since there is no separate "TX complete" interrupt, need + * to explicitly schedule NAPI poll. This will trigger logic + * which processes TX completions, and will hopefully drain + * the TX ring allowing the TX queue to be awakened. + */ + napi_schedule(&priv->napi); + } + + return NETDEV_TX_OK; +} diff --git a/drivers/net/ethernet/mellanox/mlxfw/Kconfig b/drivers/net/ethernet/mellanox/mlxfw/Kconfig new file mode 100644 index 000000000..c339f3c46 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxfw/Kconfig @@ -0,0 +1,15 @@ +# SPDX-License-Identifier: GPL-2.0-only +# +# Mellanox firmware flash library configuration +# + +config MLXFW + tristate "Mellanox Technologies firmware flash module" + help + This driver supports Mellanox Technologies Firmware + flashing common logic. + + To compile this driver as a module, choose M here: the + module will be called mlxfw. + select XZ_DEC + select NET_DEVLINK diff --git a/drivers/net/ethernet/mellanox/mlxfw/Makefile b/drivers/net/ethernet/mellanox/mlxfw/Makefile new file mode 100644 index 000000000..36007cd24 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxfw/Makefile @@ -0,0 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0-only +obj-$(CONFIG_MLXFW) += mlxfw.o +mlxfw-objs := mlxfw_fsm.o mlxfw_mfa2_tlv_multi.o mlxfw_mfa2.o diff --git a/drivers/net/ethernet/mellanox/mlxfw/mlxfw.h b/drivers/net/ethernet/mellanox/mlxfw/mlxfw.h new file mode 100644 index 000000000..e6475ea77 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxfw/mlxfw.h @@ -0,0 +1,115 @@ +/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 */ +/* Copyright (c) 2017-2019 Mellanox Technologies. All rights reserved */ + +#ifndef _MLXFW_H +#define _MLXFW_H + +#include <linux/firmware.h> +#include <linux/netlink.h> +#include <linux/device.h> +#include <net/devlink.h> + +struct mlxfw_dev { + const struct mlxfw_dev_ops *ops; + const char *psid; + u16 psid_size; + struct devlink *devlink; +}; + +static inline +struct device *mlxfw_dev_dev(struct mlxfw_dev *mlxfw_dev) +{ + return devlink_to_dev(mlxfw_dev->devlink); +} + +#define MLXFW_PRFX "mlxfw: " + +#define mlxfw_info(mlxfw_dev, fmt, ...) \ + dev_info(mlxfw_dev_dev(mlxfw_dev), MLXFW_PRFX fmt, ## __VA_ARGS__) +#define mlxfw_err(mlxfw_dev, fmt, ...) \ + dev_err(mlxfw_dev_dev(mlxfw_dev), MLXFW_PRFX fmt, ## __VA_ARGS__) +#define mlxfw_dbg(mlxfw_dev, fmt, ...) \ + dev_dbg(mlxfw_dev_dev(mlxfw_dev), MLXFW_PRFX fmt, ## __VA_ARGS__) + +enum mlxfw_fsm_state { + MLXFW_FSM_STATE_IDLE, + MLXFW_FSM_STATE_LOCKED, + MLXFW_FSM_STATE_INITIALIZE, + MLXFW_FSM_STATE_DOWNLOAD, + MLXFW_FSM_STATE_VERIFY, + MLXFW_FSM_STATE_APPLY, + MLXFW_FSM_STATE_ACTIVATE, +}; + +enum mlxfw_fsm_state_err { + MLXFW_FSM_STATE_ERR_OK, + MLXFW_FSM_STATE_ERR_ERROR, + MLXFW_FSM_STATE_ERR_REJECTED_DIGEST_ERR, + MLXFW_FSM_STATE_ERR_REJECTED_NOT_APPLICABLE, + MLXFW_FSM_STATE_ERR_REJECTED_UNKNOWN_KEY, + MLXFW_FSM_STATE_ERR_REJECTED_AUTH_FAILED, + MLXFW_FSM_STATE_ERR_REJECTED_UNSIGNED, + MLXFW_FSM_STATE_ERR_REJECTED_KEY_NOT_APPLICABLE, + MLXFW_FSM_STATE_ERR_REJECTED_BAD_FORMAT, + MLXFW_FSM_STATE_ERR_BLOCKED_PENDING_RESET, + MLXFW_FSM_STATE_ERR_MAX, +}; + +enum mlxfw_fsm_reactivate_status { + MLXFW_FSM_REACTIVATE_STATUS_OK, + MLXFW_FSM_REACTIVATE_STATUS_BUSY, + MLXFW_FSM_REACTIVATE_STATUS_PROHIBITED_FW_VER_ERR, + MLXFW_FSM_REACTIVATE_STATUS_FIRST_PAGE_COPY_FAILED, + MLXFW_FSM_REACTIVATE_STATUS_FIRST_PAGE_ERASE_FAILED, + MLXFW_FSM_REACTIVATE_STATUS_FIRST_PAGE_RESTORE_FAILED, + MLXFW_FSM_REACTIVATE_STATUS_CANDIDATE_FW_DEACTIVATION_FAILED, + MLXFW_FSM_REACTIVATE_STATUS_FW_ALREADY_ACTIVATED, + MLXFW_FSM_REACTIVATE_STATUS_ERR_DEVICE_RESET_REQUIRED, + MLXFW_FSM_REACTIVATE_STATUS_ERR_FW_PROGRAMMING_NEEDED, + MLXFW_FSM_REACTIVATE_STATUS_MAX, +}; + +struct mlxfw_dev_ops { + int (*component_query)(struct mlxfw_dev *mlxfw_dev, u16 component_index, + u32 *p_max_size, u8 *p_align_bits, + u16 *p_max_write_size); + + int (*fsm_lock)(struct mlxfw_dev *mlxfw_dev, u32 *fwhandle); + + int (*fsm_component_update)(struct mlxfw_dev *mlxfw_dev, u32 fwhandle, + u16 component_index, u32 component_size); + + int (*fsm_block_download)(struct mlxfw_dev *mlxfw_dev, u32 fwhandle, + u8 *data, u16 size, u32 offset); + + int (*fsm_component_verify)(struct mlxfw_dev *mlxfw_dev, u32 fwhandle, + u16 component_index); + + int (*fsm_activate)(struct mlxfw_dev *mlxfw_dev, u32 fwhandle); + + int (*fsm_reactivate)(struct mlxfw_dev *mlxfw_dev, u8 *status); + + int (*fsm_query_state)(struct mlxfw_dev *mlxfw_dev, u32 fwhandle, + enum mlxfw_fsm_state *fsm_state, + enum mlxfw_fsm_state_err *fsm_state_err); + + void (*fsm_cancel)(struct mlxfw_dev *mlxfw_dev, u32 fwhandle); + + void (*fsm_release)(struct mlxfw_dev *mlxfw_dev, u32 fwhandle); +}; + +#if IS_REACHABLE(CONFIG_MLXFW) +int mlxfw_firmware_flash(struct mlxfw_dev *mlxfw_dev, + const struct firmware *firmware, + struct netlink_ext_ack *extack); +#else +static inline +int mlxfw_firmware_flash(struct mlxfw_dev *mlxfw_dev, + const struct firmware *firmware, + struct netlink_ext_ack *extack) +{ + return -EOPNOTSUPP; +} +#endif + +#endif diff --git a/drivers/net/ethernet/mellanox/mlxfw/mlxfw_fsm.c b/drivers/net/ethernet/mellanox/mlxfw/mlxfw_fsm.c new file mode 100644 index 000000000..46245e0b2 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxfw/mlxfw_fsm.c @@ -0,0 +1,436 @@ +// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 +/* Copyright (c) 2017-2019 Mellanox Technologies. All rights reserved */ + +#define pr_fmt(fmt) "mlxfw: " fmt + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/delay.h> + +#include "mlxfw.h" +#include "mlxfw_mfa2.h" + +#define MLXFW_FSM_STATE_WAIT_CYCLE_MS 200 +#define MLXFW_FSM_STATE_WAIT_TIMEOUT_MS 30000 +#define MLXFW_FSM_STATE_WAIT_ROUNDS \ + (MLXFW_FSM_STATE_WAIT_TIMEOUT_MS / MLXFW_FSM_STATE_WAIT_CYCLE_MS) +#define MLXFW_FSM_MAX_COMPONENT_SIZE (10 * (1 << 20)) + +static const int mlxfw_fsm_state_errno[] = { + [MLXFW_FSM_STATE_ERR_ERROR] = -EIO, + [MLXFW_FSM_STATE_ERR_REJECTED_DIGEST_ERR] = -EBADMSG, + [MLXFW_FSM_STATE_ERR_REJECTED_NOT_APPLICABLE] = -ENOENT, + [MLXFW_FSM_STATE_ERR_REJECTED_UNKNOWN_KEY] = -ENOKEY, + [MLXFW_FSM_STATE_ERR_REJECTED_AUTH_FAILED] = -EACCES, + [MLXFW_FSM_STATE_ERR_REJECTED_UNSIGNED] = -EKEYREVOKED, + [MLXFW_FSM_STATE_ERR_REJECTED_KEY_NOT_APPLICABLE] = -EKEYREJECTED, + [MLXFW_FSM_STATE_ERR_REJECTED_BAD_FORMAT] = -ENOEXEC, + [MLXFW_FSM_STATE_ERR_BLOCKED_PENDING_RESET] = -EBUSY, + [MLXFW_FSM_STATE_ERR_MAX] = -EINVAL +}; + +#define MLXFW_ERR_PRFX "Firmware flash failed: " +#define MLXFW_ERR_MSG(fwdev, extack, msg, err) do { \ + mlxfw_err(fwdev, "%s, err (%d)\n", MLXFW_ERR_PRFX msg, err); \ + NL_SET_ERR_MSG_MOD(extack, MLXFW_ERR_PRFX msg); \ +} while (0) + +static int mlxfw_fsm_state_err(struct mlxfw_dev *mlxfw_dev, + struct netlink_ext_ack *extack, + enum mlxfw_fsm_state_err err) +{ + enum mlxfw_fsm_state_err fsm_state_err; + + fsm_state_err = min_t(enum mlxfw_fsm_state_err, err, + MLXFW_FSM_STATE_ERR_MAX); + + switch (fsm_state_err) { + case MLXFW_FSM_STATE_ERR_ERROR: + MLXFW_ERR_MSG(mlxfw_dev, extack, "general error", err); + break; + case MLXFW_FSM_STATE_ERR_REJECTED_DIGEST_ERR: + MLXFW_ERR_MSG(mlxfw_dev, extack, "component hash mismatch", err); + break; + case MLXFW_FSM_STATE_ERR_REJECTED_NOT_APPLICABLE: + MLXFW_ERR_MSG(mlxfw_dev, extack, "component not applicable", err); + break; + case MLXFW_FSM_STATE_ERR_REJECTED_UNKNOWN_KEY: + MLXFW_ERR_MSG(mlxfw_dev, extack, "unknown key", err); + break; + case MLXFW_FSM_STATE_ERR_REJECTED_AUTH_FAILED: + MLXFW_ERR_MSG(mlxfw_dev, extack, "authentication failed", err); + break; + case MLXFW_FSM_STATE_ERR_REJECTED_UNSIGNED: + MLXFW_ERR_MSG(mlxfw_dev, extack, "component was not signed", err); + break; + case MLXFW_FSM_STATE_ERR_REJECTED_KEY_NOT_APPLICABLE: + MLXFW_ERR_MSG(mlxfw_dev, extack, "key not applicable", err); + break; + case MLXFW_FSM_STATE_ERR_REJECTED_BAD_FORMAT: + MLXFW_ERR_MSG(mlxfw_dev, extack, "bad format", err); + break; + case MLXFW_FSM_STATE_ERR_BLOCKED_PENDING_RESET: + MLXFW_ERR_MSG(mlxfw_dev, extack, "pending reset", err); + break; + case MLXFW_FSM_STATE_ERR_OK: + case MLXFW_FSM_STATE_ERR_MAX: + MLXFW_ERR_MSG(mlxfw_dev, extack, "unknown error", err); + break; + } + + return mlxfw_fsm_state_errno[fsm_state_err]; +}; + +static int mlxfw_fsm_state_wait(struct mlxfw_dev *mlxfw_dev, u32 fwhandle, + enum mlxfw_fsm_state fsm_state, + struct netlink_ext_ack *extack) +{ + enum mlxfw_fsm_state_err fsm_state_err; + enum mlxfw_fsm_state curr_fsm_state; + int times; + int err; + + times = MLXFW_FSM_STATE_WAIT_ROUNDS; +retry: + err = mlxfw_dev->ops->fsm_query_state(mlxfw_dev, fwhandle, + &curr_fsm_state, &fsm_state_err); + if (err) { + MLXFW_ERR_MSG(mlxfw_dev, extack, "FSM state query failed", err); + return err; + } + + if (fsm_state_err != MLXFW_FSM_STATE_ERR_OK) + return mlxfw_fsm_state_err(mlxfw_dev, extack, fsm_state_err); + + if (curr_fsm_state != fsm_state) { + if (--times == 0) { + MLXFW_ERR_MSG(mlxfw_dev, extack, + "Timeout reached on FSM state change", -ETIMEDOUT); + return -ETIMEDOUT; + } + msleep(MLXFW_FSM_STATE_WAIT_CYCLE_MS); + goto retry; + } + return 0; +} + +static int +mlxfw_fsm_reactivate_err(struct mlxfw_dev *mlxfw_dev, + struct netlink_ext_ack *extack, u8 err) +{ + enum mlxfw_fsm_reactivate_status status; + +#define MXFW_REACT_PRFX "Reactivate FSM: " +#define MLXFW_REACT_ERR(msg, err) \ + MLXFW_ERR_MSG(mlxfw_dev, extack, MXFW_REACT_PRFX msg, err) + + status = min_t(enum mlxfw_fsm_reactivate_status, err, + MLXFW_FSM_REACTIVATE_STATUS_MAX); + + switch (status) { + case MLXFW_FSM_REACTIVATE_STATUS_BUSY: + MLXFW_REACT_ERR("busy", err); + break; + case MLXFW_FSM_REACTIVATE_STATUS_PROHIBITED_FW_VER_ERR: + MLXFW_REACT_ERR("prohibited fw ver", err); + break; + case MLXFW_FSM_REACTIVATE_STATUS_FIRST_PAGE_COPY_FAILED: + MLXFW_REACT_ERR("first page copy failed", err); + break; + case MLXFW_FSM_REACTIVATE_STATUS_FIRST_PAGE_ERASE_FAILED: + MLXFW_REACT_ERR("first page erase failed", err); + break; + case MLXFW_FSM_REACTIVATE_STATUS_FIRST_PAGE_RESTORE_FAILED: + MLXFW_REACT_ERR("first page restore failed", err); + break; + case MLXFW_FSM_REACTIVATE_STATUS_CANDIDATE_FW_DEACTIVATION_FAILED: + MLXFW_REACT_ERR("candidate fw deactivation failed", err); + break; + case MLXFW_FSM_REACTIVATE_STATUS_ERR_DEVICE_RESET_REQUIRED: + MLXFW_REACT_ERR("device reset required", err); + break; + case MLXFW_FSM_REACTIVATE_STATUS_ERR_FW_PROGRAMMING_NEEDED: + MLXFW_REACT_ERR("fw programming needed", err); + break; + case MLXFW_FSM_REACTIVATE_STATUS_FW_ALREADY_ACTIVATED: + MLXFW_REACT_ERR("fw already activated", err); + break; + case MLXFW_FSM_REACTIVATE_STATUS_OK: + case MLXFW_FSM_REACTIVATE_STATUS_MAX: + MLXFW_REACT_ERR("unexpected error", err); + break; + } + return -EREMOTEIO; +}; + +static int mlxfw_fsm_reactivate(struct mlxfw_dev *mlxfw_dev, + struct netlink_ext_ack *extack, + bool *supported) +{ + u8 status; + int err; + + if (!mlxfw_dev->ops->fsm_reactivate) + return 0; + + err = mlxfw_dev->ops->fsm_reactivate(mlxfw_dev, &status); + if (err == -EOPNOTSUPP) { + *supported = false; + return 0; + } + + if (err) { + MLXFW_ERR_MSG(mlxfw_dev, extack, + "Could not reactivate firmware flash", err); + return err; + } + + if (status == MLXFW_FSM_REACTIVATE_STATUS_OK || + status == MLXFW_FSM_REACTIVATE_STATUS_FW_ALREADY_ACTIVATED) + return 0; + + return mlxfw_fsm_reactivate_err(mlxfw_dev, extack, status); +} + +static void mlxfw_status_notify(struct mlxfw_dev *mlxfw_dev, + const char *msg, const char *comp_name, + u32 done_bytes, u32 total_bytes) +{ + devlink_flash_update_status_notify(mlxfw_dev->devlink, msg, comp_name, + done_bytes, total_bytes); +} + +#define MLXFW_ALIGN_DOWN(x, align_bits) ((x) & ~((1 << (align_bits)) - 1)) +#define MLXFW_ALIGN_UP(x, align_bits) \ + MLXFW_ALIGN_DOWN((x) + ((1 << (align_bits)) - 1), (align_bits)) + +static int mlxfw_flash_component(struct mlxfw_dev *mlxfw_dev, + u32 fwhandle, + struct mlxfw_mfa2_component *comp, + bool reactivate_supp, + struct netlink_ext_ack *extack) +{ + u16 comp_max_write_size; + u8 comp_align_bits; + u32 comp_max_size; + char comp_name[8]; + u16 block_size; + u8 *block_ptr; + u32 offset; + int err; + + sprintf(comp_name, "%u", comp->index); + + err = mlxfw_dev->ops->component_query(mlxfw_dev, comp->index, + &comp_max_size, &comp_align_bits, + &comp_max_write_size); + if (err) { + MLXFW_ERR_MSG(mlxfw_dev, extack, "FSM component query failed", err); + return err; + } + + comp_max_size = min_t(u32, comp_max_size, MLXFW_FSM_MAX_COMPONENT_SIZE); + if (comp->data_size > comp_max_size) { + MLXFW_ERR_MSG(mlxfw_dev, extack, + "Component size is bigger than limit", -EINVAL); + return -EINVAL; + } + + comp_max_write_size = MLXFW_ALIGN_DOWN(comp_max_write_size, + comp_align_bits); + + mlxfw_dbg(mlxfw_dev, "Component update\n"); + mlxfw_status_notify(mlxfw_dev, "Updating component", comp_name, 0, 0); + err = mlxfw_dev->ops->fsm_component_update(mlxfw_dev, fwhandle, + comp->index, + comp->data_size); + if (err) { + if (!reactivate_supp) + MLXFW_ERR_MSG(mlxfw_dev, extack, + "FSM component update failed, FW reactivate is not supported", + err); + else + MLXFW_ERR_MSG(mlxfw_dev, extack, + "FSM component update failed", err); + return err; + } + + err = mlxfw_fsm_state_wait(mlxfw_dev, fwhandle, + MLXFW_FSM_STATE_DOWNLOAD, extack); + if (err) + goto err_out; + + mlxfw_dbg(mlxfw_dev, "Component download\n"); + mlxfw_status_notify(mlxfw_dev, "Downloading component", + comp_name, 0, comp->data_size); + for (offset = 0; + offset < MLXFW_ALIGN_UP(comp->data_size, comp_align_bits); + offset += comp_max_write_size) { + block_ptr = comp->data + offset; + block_size = (u16) min_t(u32, comp->data_size - offset, + comp_max_write_size); + err = mlxfw_dev->ops->fsm_block_download(mlxfw_dev, fwhandle, + block_ptr, block_size, + offset); + if (err) { + MLXFW_ERR_MSG(mlxfw_dev, extack, + "Component download failed", err); + goto err_out; + } + mlxfw_status_notify(mlxfw_dev, "Downloading component", + comp_name, offset + block_size, + comp->data_size); + } + + mlxfw_dbg(mlxfw_dev, "Component verify\n"); + mlxfw_status_notify(mlxfw_dev, "Verifying component", comp_name, 0, 0); + err = mlxfw_dev->ops->fsm_component_verify(mlxfw_dev, fwhandle, + comp->index); + if (err) { + MLXFW_ERR_MSG(mlxfw_dev, extack, + "FSM component verify failed", err); + goto err_out; + } + + err = mlxfw_fsm_state_wait(mlxfw_dev, fwhandle, + MLXFW_FSM_STATE_LOCKED, extack); + if (err) + goto err_out; + return 0; + +err_out: + mlxfw_dev->ops->fsm_cancel(mlxfw_dev, fwhandle); + return err; +} + +static int mlxfw_flash_components(struct mlxfw_dev *mlxfw_dev, u32 fwhandle, + struct mlxfw_mfa2_file *mfa2_file, + bool reactivate_supp, + struct netlink_ext_ack *extack) +{ + u32 component_count; + int err; + int i; + + err = mlxfw_mfa2_file_component_count(mfa2_file, mlxfw_dev->psid, + mlxfw_dev->psid_size, + &component_count); + if (err) { + MLXFW_ERR_MSG(mlxfw_dev, extack, + "Could not find device PSID in MFA2 file", err); + return err; + } + + for (i = 0; i < component_count; i++) { + struct mlxfw_mfa2_component *comp; + + comp = mlxfw_mfa2_file_component_get(mfa2_file, mlxfw_dev->psid, + mlxfw_dev->psid_size, i); + if (IS_ERR(comp)) { + err = PTR_ERR(comp); + MLXFW_ERR_MSG(mlxfw_dev, extack, + "Failed to get MFA2 component", err); + return err; + } + + mlxfw_info(mlxfw_dev, "Flashing component type %d\n", + comp->index); + err = mlxfw_flash_component(mlxfw_dev, fwhandle, comp, + reactivate_supp, extack); + mlxfw_mfa2_file_component_put(comp); + if (err) + return err; + } + return 0; +} + +int mlxfw_firmware_flash(struct mlxfw_dev *mlxfw_dev, + const struct firmware *firmware, + struct netlink_ext_ack *extack) +{ + struct mlxfw_mfa2_file *mfa2_file; + bool reactivate_supp = true; + u32 fwhandle; + int err; + + if (!mlxfw_mfa2_check(firmware)) { + MLXFW_ERR_MSG(mlxfw_dev, extack, + "Firmware file is not MFA2", -EINVAL); + return -EINVAL; + } + + mfa2_file = mlxfw_mfa2_file_init(firmware); + if (IS_ERR(mfa2_file)) { + err = PTR_ERR(mfa2_file); + MLXFW_ERR_MSG(mlxfw_dev, extack, + "Failed to initialize MFA2 firmware file", err); + return err; + } + + mlxfw_info(mlxfw_dev, "Initialize firmware flash process\n"); + mlxfw_status_notify(mlxfw_dev, "Initializing firmware flash process", + NULL, 0, 0); + err = mlxfw_dev->ops->fsm_lock(mlxfw_dev, &fwhandle); + if (err) { + MLXFW_ERR_MSG(mlxfw_dev, extack, + "Could not lock the firmware FSM", err); + goto err_fsm_lock; + } + + err = mlxfw_fsm_state_wait(mlxfw_dev, fwhandle, + MLXFW_FSM_STATE_LOCKED, extack); + if (err) + goto err_state_wait_idle_to_locked; + + err = mlxfw_fsm_reactivate(mlxfw_dev, extack, &reactivate_supp); + if (err) + goto err_fsm_reactivate; + + err = mlxfw_fsm_state_wait(mlxfw_dev, fwhandle, + MLXFW_FSM_STATE_LOCKED, extack); + if (err) + goto err_state_wait_reactivate_to_locked; + + err = mlxfw_flash_components(mlxfw_dev, fwhandle, mfa2_file, + reactivate_supp, extack); + if (err) + goto err_flash_components; + + mlxfw_dbg(mlxfw_dev, "Activate image\n"); + mlxfw_status_notify(mlxfw_dev, "Activating image", NULL, 0, 0); + err = mlxfw_dev->ops->fsm_activate(mlxfw_dev, fwhandle); + if (err) { + MLXFW_ERR_MSG(mlxfw_dev, extack, + "Could not activate the downloaded image", err); + goto err_fsm_activate; + } + + err = mlxfw_fsm_state_wait(mlxfw_dev, fwhandle, + MLXFW_FSM_STATE_LOCKED, extack); + if (err) + goto err_state_wait_activate_to_locked; + + mlxfw_dbg(mlxfw_dev, "Handle release\n"); + mlxfw_dev->ops->fsm_release(mlxfw_dev, fwhandle); + + mlxfw_info(mlxfw_dev, "Firmware flash done\n"); + mlxfw_status_notify(mlxfw_dev, "Firmware flash done", NULL, 0, 0); + mlxfw_mfa2_file_fini(mfa2_file); + return 0; + +err_state_wait_activate_to_locked: +err_fsm_activate: +err_flash_components: +err_state_wait_reactivate_to_locked: +err_fsm_reactivate: +err_state_wait_idle_to_locked: + mlxfw_dev->ops->fsm_release(mlxfw_dev, fwhandle); +err_fsm_lock: + mlxfw_mfa2_file_fini(mfa2_file); + return err; +} +EXPORT_SYMBOL(mlxfw_firmware_flash); + +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_AUTHOR("Yotam Gigi <yotamg@mellanox.com>"); +MODULE_DESCRIPTION("Mellanox firmware flash lib"); diff --git a/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2.c b/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2.c new file mode 100644 index 000000000..e6f677e42 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2.c @@ -0,0 +1,589 @@ +// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 +/* Copyright (c) 2017-2019 Mellanox Technologies. All rights reserved */ + +#define pr_fmt(fmt) "mlxfw_mfa2: " fmt + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/netlink.h> +#include <linux/vmalloc.h> +#include <linux/xz.h> +#include "mlxfw_mfa2.h" +#include "mlxfw_mfa2_file.h" +#include "mlxfw_mfa2_tlv.h" +#include "mlxfw_mfa2_format.h" +#include "mlxfw_mfa2_tlv_multi.h" + +/* MFA2 FILE + * +----------------------------------+ + * | MFA2 finger print | + * +----------------------------------+ + * | package descriptor multi_tlv | + * | +------------------------------+ | +-----------------+ + * | | package descriptor tlv +-----> |num_devices=n | + * | +------------------------------+ | |num_components=m | + * +----------------------------------+ |CB offset | + * | device descriptor multi_tlv | |... | + * | +------------------------------+ | | | + * | | PSID tlv | | +-----------------+ + * | +------------------------------+ | + * | | component index tlv | | + * | +------------------------------+ | + * +----------------------------------+ + * | component descriptor multi_tlv | + * | +------------------------------+ | +-----------------+ + * | | component descriptor tlv +-----> |Among others: | + * | +------------------------------+ | |CB offset=o | + * +----------------------------------+ |comp index=i | + * | | |... | + * | | | | + * | | +-----------------+ + * | COMPONENT BLOCK (CB) | + * | | + * | | + * | | + * +----------------------------------+ + * + * On the top level, an MFA2 file contains: + * - Fingerprint + * - Several multi_tlvs (TLVs of type MLXFW_MFA2_TLV_MULTI, as defined in + * mlxfw_mfa2_format.h) + * - Compresses content block + * + * The first multi_tlv + * ------------------- + * The first multi TLV is treated as package descriptor, and expected to have a + * first TLV child of type MLXFW_MFA2_TLV_PACKAGE_DESCRIPTOR which contains all + * the global information needed to parse the file. Among others, it contains + * the number of device descriptors and component descriptor following this + * multi TLV. + * + * The device descriptor multi_tlv + * ------------------------------- + * The multi TLVs following the package descriptor are treated as device + * descriptor, and are expected to have the following children: + * - PSID TLV child of type MLXFW_MFA2_TLV_PSID containing that device PSID. + * - Component index of type MLXFW_MFA2_TLV_COMPONENT_PTR that contains that + * device component index. + * + * The component descriptor multi_tlv + * ---------------------------------- + * The multi TLVs following the device descriptor multi TLVs are treated as + * component descriptor, and are expected to have a first child of type + * MLXFW_MFA2_TLV_COMPONENT_DESCRIPTOR that contains mostly the component index, + * needed for the flash process and the offset to the binary within the + * component block. + */ + +static const u8 mlxfw_mfa2_fingerprint[] = "MLNX.MFA2.XZ.00!"; +static const int mlxfw_mfa2_fingerprint_len = + sizeof(mlxfw_mfa2_fingerprint) - 1; + +static const u8 mlxfw_mfa2_comp_magic[] = "#BIN.COMPONENT!#"; +static const int mlxfw_mfa2_comp_magic_len = sizeof(mlxfw_mfa2_comp_magic) - 1; + +bool mlxfw_mfa2_check(const struct firmware *fw) +{ + if (fw->size < sizeof(mlxfw_mfa2_fingerprint)) + return false; + + return memcmp(fw->data, mlxfw_mfa2_fingerprint, + mlxfw_mfa2_fingerprint_len) == 0; +} + +static bool +mlxfw_mfa2_tlv_multi_validate(const struct mlxfw_mfa2_file *mfa2_file, + const struct mlxfw_mfa2_tlv_multi *multi) +{ + const struct mlxfw_mfa2_tlv *tlv; + u16 idx; + + /* Check that all children are valid */ + mlxfw_mfa2_tlv_multi_foreach(mfa2_file, tlv, idx, multi) { + if (!tlv) { + pr_err("Multi has invalid child"); + return false; + } + } + return true; +} + +static bool +mlxfw_mfa2_file_dev_validate(const struct mlxfw_mfa2_file *mfa2_file, + const struct mlxfw_mfa2_tlv *dev_tlv, + u16 dev_idx) +{ + const struct mlxfw_mfa2_tlv_component_ptr *cptr; + const struct mlxfw_mfa2_tlv_multi *multi; + const struct mlxfw_mfa2_tlv_psid *psid; + const struct mlxfw_mfa2_tlv *tlv; + u16 cptr_count; + u16 cptr_idx; + int err; + + pr_debug("Device %d\n", dev_idx); + + multi = mlxfw_mfa2_tlv_multi_get(mfa2_file, dev_tlv); + if (!multi) { + pr_err("Device %d is not a valid TLV error\n", dev_idx); + return false; + } + + if (!mlxfw_mfa2_tlv_multi_validate(mfa2_file, multi)) + return false; + + /* Validate the device has PSID tlv */ + tlv = mlxfw_mfa2_tlv_multi_child_find(mfa2_file, multi, + MLXFW_MFA2_TLV_PSID, 0); + if (!tlv) { + pr_err("Device %d does not have PSID\n", dev_idx); + return false; + } + + psid = mlxfw_mfa2_tlv_psid_get(mfa2_file, tlv); + if (!psid) { + pr_err("Device %d PSID TLV is not valid\n", dev_idx); + return false; + } + + print_hex_dump_debug(" -- Device PSID ", DUMP_PREFIX_NONE, 16, 16, + psid->psid, be16_to_cpu(tlv->len), true); + + /* Validate the device has COMPONENT_PTR */ + err = mlxfw_mfa2_tlv_multi_child_count(mfa2_file, multi, + MLXFW_MFA2_TLV_COMPONENT_PTR, + &cptr_count); + if (err) + return false; + + if (cptr_count == 0) { + pr_err("Device %d has no components\n", dev_idx); + return false; + } + + for (cptr_idx = 0; cptr_idx < cptr_count; cptr_idx++) { + tlv = mlxfw_mfa2_tlv_multi_child_find(mfa2_file, multi, + MLXFW_MFA2_TLV_COMPONENT_PTR, + cptr_idx); + if (!tlv) + return false; + + cptr = mlxfw_mfa2_tlv_component_ptr_get(mfa2_file, tlv); + if (!cptr) { + pr_err("Device %d COMPONENT_PTR TLV is not valid\n", + dev_idx); + return false; + } + + pr_debug(" -- Component index %d\n", + be16_to_cpu(cptr->component_index)); + } + return true; +} + +static bool +mlxfw_mfa2_file_comp_validate(const struct mlxfw_mfa2_file *mfa2_file, + const struct mlxfw_mfa2_tlv *comp_tlv, + u16 comp_idx) +{ + const struct mlxfw_mfa2_tlv_component_descriptor *cdesc; + const struct mlxfw_mfa2_tlv_multi *multi; + const struct mlxfw_mfa2_tlv *tlv; + + pr_debug("Component %d\n", comp_idx); + + multi = mlxfw_mfa2_tlv_multi_get(mfa2_file, comp_tlv); + if (!multi) { + pr_err("Component %d is not a valid TLV error\n", comp_idx); + return false; + } + + if (!mlxfw_mfa2_tlv_multi_validate(mfa2_file, multi)) + return false; + + /* Check that component have COMPONENT_DESCRIPTOR as first child */ + tlv = mlxfw_mfa2_tlv_multi_child(mfa2_file, multi); + if (!tlv) { + pr_err("Component descriptor %d multi TLV error\n", comp_idx); + return false; + } + + cdesc = mlxfw_mfa2_tlv_component_descriptor_get(mfa2_file, tlv); + if (!cdesc) { + pr_err("Component %d does not have a valid descriptor\n", + comp_idx); + return false; + } + pr_debug(" -- Component type %d\n", be16_to_cpu(cdesc->identifier)); + pr_debug(" -- Offset 0x%llx and size %d\n", + ((u64) be32_to_cpu(cdesc->cb_offset_h) << 32) + | be32_to_cpu(cdesc->cb_offset_l), be32_to_cpu(cdesc->size)); + + return true; +} + +static bool mlxfw_mfa2_file_validate(const struct mlxfw_mfa2_file *mfa2_file) +{ + const struct mlxfw_mfa2_tlv *tlv; + u16 idx; + + pr_debug("Validating file\n"); + + /* check that all the devices exist */ + mlxfw_mfa2_tlv_foreach(mfa2_file, tlv, idx, mfa2_file->first_dev, + mfa2_file->dev_count) { + if (!tlv) { + pr_err("Device TLV error\n"); + return false; + } + + /* Check each device */ + if (!mlxfw_mfa2_file_dev_validate(mfa2_file, tlv, idx)) + return false; + } + + /* check that all the components exist */ + mlxfw_mfa2_tlv_foreach(mfa2_file, tlv, idx, mfa2_file->first_component, + mfa2_file->component_count) { + if (!tlv) { + pr_err("Device TLV error\n"); + return false; + } + + /* Check each component */ + if (!mlxfw_mfa2_file_comp_validate(mfa2_file, tlv, idx)) + return false; + } + return true; +} + +struct mlxfw_mfa2_file *mlxfw_mfa2_file_init(const struct firmware *fw) +{ + const struct mlxfw_mfa2_tlv_package_descriptor *pd; + const struct mlxfw_mfa2_tlv_multi *multi; + const struct mlxfw_mfa2_tlv *multi_child; + const struct mlxfw_mfa2_tlv *first_tlv; + struct mlxfw_mfa2_file *mfa2_file; + const void *first_tlv_ptr; + const void *cb_top_ptr; + + mfa2_file = kzalloc(sizeof(*mfa2_file), GFP_KERNEL); + if (!mfa2_file) + return ERR_PTR(-ENOMEM); + + mfa2_file->fw = fw; + first_tlv_ptr = fw->data + NLA_ALIGN(mlxfw_mfa2_fingerprint_len); + first_tlv = mlxfw_mfa2_tlv_get(mfa2_file, first_tlv_ptr); + if (!first_tlv) { + pr_err("Could not parse package descriptor TLV\n"); + goto err_out; + } + + multi = mlxfw_mfa2_tlv_multi_get(mfa2_file, first_tlv); + if (!multi) { + pr_err("First TLV is not of valid multi type\n"); + goto err_out; + } + + multi_child = mlxfw_mfa2_tlv_multi_child(mfa2_file, multi); + if (!multi_child) + goto err_out; + + pd = mlxfw_mfa2_tlv_package_descriptor_get(mfa2_file, multi_child); + if (!pd) { + pr_err("Could not parse package descriptor TLV\n"); + goto err_out; + } + + mfa2_file->first_dev = mlxfw_mfa2_tlv_next(mfa2_file, first_tlv); + if (!mfa2_file->first_dev) { + pr_err("First device TLV is not valid\n"); + goto err_out; + } + + mfa2_file->dev_count = be16_to_cpu(pd->num_devices); + mfa2_file->first_component = mlxfw_mfa2_tlv_advance(mfa2_file, + mfa2_file->first_dev, + mfa2_file->dev_count); + mfa2_file->component_count = be16_to_cpu(pd->num_components); + mfa2_file->cb = fw->data + NLA_ALIGN(be32_to_cpu(pd->cb_offset)); + if (!mlxfw_mfa2_valid_ptr(mfa2_file, mfa2_file->cb)) { + pr_err("Component block is out side the file\n"); + goto err_out; + } + mfa2_file->cb_archive_size = be32_to_cpu(pd->cb_archive_size); + cb_top_ptr = mfa2_file->cb + mfa2_file->cb_archive_size - 1; + if (!mlxfw_mfa2_valid_ptr(mfa2_file, cb_top_ptr)) { + pr_err("Component block size is too big\n"); + goto err_out; + } + + if (!mlxfw_mfa2_file_validate(mfa2_file)) + goto err_out; + return mfa2_file; +err_out: + kfree(mfa2_file); + return ERR_PTR(-EINVAL); +} + +static const struct mlxfw_mfa2_tlv_multi * +mlxfw_mfa2_tlv_dev_get(const struct mlxfw_mfa2_file *mfa2_file, + const char *psid, u16 psid_size) +{ + const struct mlxfw_mfa2_tlv_psid *tlv_psid; + const struct mlxfw_mfa2_tlv_multi *dev_multi; + const struct mlxfw_mfa2_tlv *dev_tlv; + const struct mlxfw_mfa2_tlv *tlv; + u32 idx; + + /* for each device tlv */ + mlxfw_mfa2_tlv_foreach(mfa2_file, dev_tlv, idx, mfa2_file->first_dev, + mfa2_file->dev_count) { + if (!dev_tlv) + return NULL; + + dev_multi = mlxfw_mfa2_tlv_multi_get(mfa2_file, dev_tlv); + if (!dev_multi) + return NULL; + + /* find psid child and compare */ + tlv = mlxfw_mfa2_tlv_multi_child_find(mfa2_file, dev_multi, + MLXFW_MFA2_TLV_PSID, 0); + if (!tlv) + return NULL; + if (be16_to_cpu(tlv->len) != psid_size) + continue; + + tlv_psid = mlxfw_mfa2_tlv_psid_get(mfa2_file, tlv); + if (!tlv_psid) + return NULL; + + if (memcmp(psid, tlv_psid->psid, psid_size) == 0) + return dev_multi; + } + + return NULL; +} + +int mlxfw_mfa2_file_component_count(const struct mlxfw_mfa2_file *mfa2_file, + const char *psid, u32 psid_size, + u32 *p_count) +{ + const struct mlxfw_mfa2_tlv_multi *dev_multi; + u16 count; + int err; + + dev_multi = mlxfw_mfa2_tlv_dev_get(mfa2_file, psid, psid_size); + if (!dev_multi) + return -EINVAL; + + err = mlxfw_mfa2_tlv_multi_child_count(mfa2_file, dev_multi, + MLXFW_MFA2_TLV_COMPONENT_PTR, + &count); + if (err) + return err; + + *p_count = count; + return 0; +} + +static int mlxfw_mfa2_xz_dec_run(struct xz_dec *xz_dec, struct xz_buf *xz_buf, + bool *finished) +{ + enum xz_ret xz_ret; + + xz_ret = xz_dec_run(xz_dec, xz_buf); + + switch (xz_ret) { + case XZ_STREAM_END: + *finished = true; + return 0; + case XZ_OK: + *finished = false; + return 0; + case XZ_MEM_ERROR: + pr_err("xz no memory\n"); + return -ENOMEM; + case XZ_DATA_ERROR: + pr_err("xz file corrupted\n"); + return -EINVAL; + case XZ_FORMAT_ERROR: + pr_err("xz format not found\n"); + return -EINVAL; + case XZ_OPTIONS_ERROR: + pr_err("unsupported xz option\n"); + return -EINVAL; + case XZ_MEMLIMIT_ERROR: + pr_err("xz dictionary too small\n"); + return -EINVAL; + default: + pr_err("xz error %d\n", xz_ret); + return -EINVAL; + } +} + +static int mlxfw_mfa2_file_cb_offset_xz(const struct mlxfw_mfa2_file *mfa2_file, + off_t off, size_t size, u8 *buf) +{ + struct xz_dec *xz_dec; + struct xz_buf dec_buf; + off_t curr_off = 0; + bool finished; + int err; + + xz_dec = xz_dec_init(XZ_DYNALLOC, (u32) -1); + if (!xz_dec) + return -EINVAL; + + dec_buf.in_size = mfa2_file->cb_archive_size; + dec_buf.in = mfa2_file->cb; + dec_buf.in_pos = 0; + dec_buf.out = buf; + + /* decode up to the offset */ + do { + dec_buf.out_pos = 0; + dec_buf.out_size = min_t(size_t, size, off - curr_off); + if (dec_buf.out_size == 0) + break; + + err = mlxfw_mfa2_xz_dec_run(xz_dec, &dec_buf, &finished); + if (err) + goto out; + if (finished) { + pr_err("xz section too short\n"); + err = -EINVAL; + goto out; + } + curr_off += dec_buf.out_pos; + } while (curr_off != off); + + /* decode the needed section */ + dec_buf.out_pos = 0; + dec_buf.out_size = size; + err = mlxfw_mfa2_xz_dec_run(xz_dec, &dec_buf, &finished); +out: + xz_dec_end(xz_dec); + return err; +} + +static const struct mlxfw_mfa2_tlv_component_descriptor * +mlxfw_mfa2_file_component_tlv_get(const struct mlxfw_mfa2_file *mfa2_file, + u16 comp_index) +{ + const struct mlxfw_mfa2_tlv_multi *multi; + const struct mlxfw_mfa2_tlv *multi_child; + const struct mlxfw_mfa2_tlv *comp_tlv; + + if (comp_index > mfa2_file->component_count) + return NULL; + + comp_tlv = mlxfw_mfa2_tlv_advance(mfa2_file, mfa2_file->first_component, + comp_index); + if (!comp_tlv) + return NULL; + + multi = mlxfw_mfa2_tlv_multi_get(mfa2_file, comp_tlv); + if (!multi) + return NULL; + + multi_child = mlxfw_mfa2_tlv_multi_child(mfa2_file, multi); + if (!multi_child) + return NULL; + + return mlxfw_mfa2_tlv_component_descriptor_get(mfa2_file, multi_child); +} + +struct mlxfw_mfa2_comp_data { + struct mlxfw_mfa2_component comp; + u8 buff[]; +}; + +static const struct mlxfw_mfa2_tlv_component_descriptor * +mlxfw_mfa2_file_component_find(const struct mlxfw_mfa2_file *mfa2_file, + const char *psid, int psid_size, + int component_index) +{ + const struct mlxfw_mfa2_tlv_component_ptr *cptr; + const struct mlxfw_mfa2_tlv_multi *dev_multi; + const struct mlxfw_mfa2_tlv *cptr_tlv; + u16 comp_idx; + + dev_multi = mlxfw_mfa2_tlv_dev_get(mfa2_file, psid, psid_size); + if (!dev_multi) + return NULL; + + cptr_tlv = mlxfw_mfa2_tlv_multi_child_find(mfa2_file, dev_multi, + MLXFW_MFA2_TLV_COMPONENT_PTR, + component_index); + if (!cptr_tlv) + return NULL; + + cptr = mlxfw_mfa2_tlv_component_ptr_get(mfa2_file, cptr_tlv); + if (!cptr) + return NULL; + + comp_idx = be16_to_cpu(cptr->component_index); + return mlxfw_mfa2_file_component_tlv_get(mfa2_file, comp_idx); +} + +struct mlxfw_mfa2_component * +mlxfw_mfa2_file_component_get(const struct mlxfw_mfa2_file *mfa2_file, + const char *psid, int psid_size, + int component_index) +{ + const struct mlxfw_mfa2_tlv_component_descriptor *comp; + struct mlxfw_mfa2_comp_data *comp_data; + u32 comp_buf_size; + off_t cb_offset; + u32 comp_size; + int err; + + comp = mlxfw_mfa2_file_component_find(mfa2_file, psid, psid_size, + component_index); + if (!comp) + return ERR_PTR(-EINVAL); + + cb_offset = (u64) be32_to_cpu(comp->cb_offset_h) << 32 | + be32_to_cpu(comp->cb_offset_l); + comp_size = be32_to_cpu(comp->size); + comp_buf_size = comp_size + mlxfw_mfa2_comp_magic_len; + + comp_data = vzalloc(sizeof(*comp_data) + comp_buf_size); + if (!comp_data) + return ERR_PTR(-ENOMEM); + comp_data->comp.data_size = comp_size; + comp_data->comp.index = be16_to_cpu(comp->identifier); + err = mlxfw_mfa2_file_cb_offset_xz(mfa2_file, cb_offset, comp_buf_size, + comp_data->buff); + if (err) { + pr_err("Component could not be reached in CB\n"); + goto err_out; + } + + if (memcmp(comp_data->buff, mlxfw_mfa2_comp_magic, + mlxfw_mfa2_comp_magic_len) != 0) { + pr_err("Component has wrong magic\n"); + err = -EINVAL; + goto err_out; + } + + comp_data->comp.data = comp_data->buff + mlxfw_mfa2_comp_magic_len; + return &comp_data->comp; +err_out: + vfree(comp_data); + return ERR_PTR(err); +} + +void mlxfw_mfa2_file_component_put(struct mlxfw_mfa2_component *comp) +{ + const struct mlxfw_mfa2_comp_data *comp_data; + + comp_data = container_of(comp, struct mlxfw_mfa2_comp_data, comp); + vfree(comp_data); +} + +void mlxfw_mfa2_file_fini(struct mlxfw_mfa2_file *mfa2_file) +{ + kfree(mfa2_file); +} diff --git a/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2.h b/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2.h new file mode 100644 index 000000000..5bba6ad79 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2.h @@ -0,0 +1,35 @@ +/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 */ +/* Copyright (c) 2017-2019 Mellanox Technologies. All rights reserved */ + +#ifndef _MLXFW_MFA2_H +#define _MLXFW_MFA2_H + +#include <linux/firmware.h> +#include "mlxfw.h" + +struct mlxfw_mfa2_component { + u16 index; + u32 data_size; + u8 *data; +}; + +struct mlxfw_mfa2_file; + +bool mlxfw_mfa2_check(const struct firmware *fw); + +struct mlxfw_mfa2_file *mlxfw_mfa2_file_init(const struct firmware *fw); + +int mlxfw_mfa2_file_component_count(const struct mlxfw_mfa2_file *mfa2_file, + const char *psid, u32 psid_size, + u32 *p_count); + +struct mlxfw_mfa2_component * +mlxfw_mfa2_file_component_get(const struct mlxfw_mfa2_file *mfa2_file, + const char *psid, int psid_size, + int component_index); + +void mlxfw_mfa2_file_component_put(struct mlxfw_mfa2_component *component); + +void mlxfw_mfa2_file_fini(struct mlxfw_mfa2_file *mfa2_file); + +#endif diff --git a/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_file.h b/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_file.h new file mode 100644 index 000000000..874c0a247 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_file.h @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 */ +/* Copyright (c) 2017-2019 Mellanox Technologies. All rights reserved */ + +#ifndef _MLXFW_MFA2_FILE_H +#define _MLXFW_MFA2_FILE_H + +#include <linux/firmware.h> +#include <linux/kernel.h> + +struct mlxfw_mfa2_file { + const struct firmware *fw; + const struct mlxfw_mfa2_tlv *first_dev; + u16 dev_count; + const struct mlxfw_mfa2_tlv *first_component; + u16 component_count; + const void *cb; /* components block */ + u32 cb_archive_size; /* size of compressed components block */ +}; + +static inline bool mlxfw_mfa2_valid_ptr(const struct mlxfw_mfa2_file *mfa2_file, + const void *ptr) +{ + const void *valid_to = mfa2_file->fw->data + mfa2_file->fw->size; + const void *valid_from = mfa2_file->fw->data; + + return ptr > valid_from && ptr < valid_to; +} + +#endif diff --git a/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_format.h b/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_format.h new file mode 100644 index 000000000..b001e5258 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_format.h @@ -0,0 +1,73 @@ +/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 */ +/* Copyright (c) 2017-2019 Mellanox Technologies. All rights reserved */ + +#ifndef _MLXFW_MFA2_FORMAT_H +#define _MLXFW_MFA2_FORMAT_H + +#include "mlxfw_mfa2_file.h" +#include "mlxfw_mfa2_tlv.h" + +enum mlxfw_mfa2_tlv_type { + MLXFW_MFA2_TLV_MULTI_PART = 0x01, + MLXFW_MFA2_TLV_PACKAGE_DESCRIPTOR = 0x02, + MLXFW_MFA2_TLV_COMPONENT_DESCRIPTOR = 0x04, + MLXFW_MFA2_TLV_COMPONENT_PTR = 0x22, + MLXFW_MFA2_TLV_PSID = 0x2A, +}; + +enum mlxfw_mfa2_compression_type { + MLXFW_MFA2_COMPRESSION_TYPE_NONE, + MLXFW_MFA2_COMPRESSION_TYPE_XZ, +}; + +struct mlxfw_mfa2_tlv_package_descriptor { + __be16 num_components; + __be16 num_devices; + __be32 cb_offset; + __be32 cb_archive_size; + __be32 cb_size_h; + __be32 cb_size_l; + u8 padding[3]; + u8 cv_compression; + __be32 user_data_offset; +} __packed; + +MLXFW_MFA2_TLV(package_descriptor, struct mlxfw_mfa2_tlv_package_descriptor, + MLXFW_MFA2_TLV_PACKAGE_DESCRIPTOR); + +struct mlxfw_mfa2_tlv_multi { + __be16 num_extensions; + __be16 total_len; +} __packed; + +MLXFW_MFA2_TLV(multi, struct mlxfw_mfa2_tlv_multi, + MLXFW_MFA2_TLV_MULTI_PART); + +struct mlxfw_mfa2_tlv_psid { + u8 psid[0]; +} __packed; + +MLXFW_MFA2_TLV_VARSIZE(psid, struct mlxfw_mfa2_tlv_psid, + MLXFW_MFA2_TLV_PSID); + +struct mlxfw_mfa2_tlv_component_ptr { + __be16 storage_id; + __be16 component_index; + __be32 storage_address; +} __packed; + +MLXFW_MFA2_TLV(component_ptr, struct mlxfw_mfa2_tlv_component_ptr, + MLXFW_MFA2_TLV_COMPONENT_PTR); + +struct mlxfw_mfa2_tlv_component_descriptor { + __be16 pldm_classification; + __be16 identifier; + __be32 cb_offset_h; + __be32 cb_offset_l; + __be32 size; +} __packed; + +MLXFW_MFA2_TLV(component_descriptor, struct mlxfw_mfa2_tlv_component_descriptor, + MLXFW_MFA2_TLV_COMPONENT_DESCRIPTOR); + +#endif diff --git a/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_tlv.h b/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_tlv.h new file mode 100644 index 000000000..2014a5de5 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_tlv.h @@ -0,0 +1,67 @@ +/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 */ +/* Copyright (c) 2017-2019 Mellanox Technologies. All rights reserved */ + +#ifndef _MLXFW_MFA2_TLV_H +#define _MLXFW_MFA2_TLV_H + +#include <linux/kernel.h> +#include "mlxfw_mfa2_file.h" + +struct mlxfw_mfa2_tlv { + u8 version; + u8 type; + __be16 len; + u8 data[]; +} __packed; + +static inline const struct mlxfw_mfa2_tlv * +mlxfw_mfa2_tlv_get(const struct mlxfw_mfa2_file *mfa2_file, const void *ptr) +{ + if (!mlxfw_mfa2_valid_ptr(mfa2_file, ptr) || + !mlxfw_mfa2_valid_ptr(mfa2_file, ptr + sizeof(struct mlxfw_mfa2_tlv))) + return NULL; + return ptr; +} + +static inline const void * +mlxfw_mfa2_tlv_payload_get(const struct mlxfw_mfa2_file *mfa2_file, + const struct mlxfw_mfa2_tlv *tlv, u8 payload_type, + size_t payload_size, bool varsize) +{ + void *tlv_top; + + tlv_top = (void *) tlv + be16_to_cpu(tlv->len) - 1; + if (!mlxfw_mfa2_valid_ptr(mfa2_file, tlv) || + !mlxfw_mfa2_valid_ptr(mfa2_file, tlv_top)) + return NULL; + if (tlv->type != payload_type) + return NULL; + if (varsize && (be16_to_cpu(tlv->len) < payload_size)) + return NULL; + if (!varsize && (be16_to_cpu(tlv->len) != payload_size)) + return NULL; + + return tlv->data; +} + +#define MLXFW_MFA2_TLV(name, payload_type, tlv_type) \ +static inline const payload_type * \ +mlxfw_mfa2_tlv_ ## name ## _get(const struct mlxfw_mfa2_file *mfa2_file, \ + const struct mlxfw_mfa2_tlv *tlv) \ +{ \ + return mlxfw_mfa2_tlv_payload_get(mfa2_file, tlv, \ + tlv_type, sizeof(payload_type), \ + false); \ +} + +#define MLXFW_MFA2_TLV_VARSIZE(name, payload_type, tlv_type) \ +static inline const payload_type * \ +mlxfw_mfa2_tlv_ ## name ## _get(const struct mlxfw_mfa2_file *mfa2_file, \ + const struct mlxfw_mfa2_tlv *tlv) \ +{ \ + return mlxfw_mfa2_tlv_payload_get(mfa2_file, tlv, \ + tlv_type, sizeof(payload_type), \ + true); \ +} + +#endif diff --git a/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_tlv_multi.c b/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_tlv_multi.c new file mode 100644 index 000000000..972c571b4 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_tlv_multi.c @@ -0,0 +1,97 @@ +// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 +/* Copyright (c) 2017-2019 Mellanox Technologies. All rights reserved */ + +#define pr_fmt(fmt) "MFA2: " fmt + +#include "mlxfw_mfa2_tlv_multi.h" +#include <uapi/linux/netlink.h> + +#define MLXFW_MFA2_TLV_TOTAL_SIZE(tlv) \ + NLA_ALIGN(sizeof(*(tlv)) + be16_to_cpu((tlv)->len)) + +const struct mlxfw_mfa2_tlv * +mlxfw_mfa2_tlv_multi_child(const struct mlxfw_mfa2_file *mfa2_file, + const struct mlxfw_mfa2_tlv_multi *multi) +{ + size_t multi_len; + + multi_len = NLA_ALIGN(sizeof(struct mlxfw_mfa2_tlv_multi)); + return mlxfw_mfa2_tlv_get(mfa2_file, (void *) multi + multi_len); +} + +const struct mlxfw_mfa2_tlv * +mlxfw_mfa2_tlv_next(const struct mlxfw_mfa2_file *mfa2_file, + const struct mlxfw_mfa2_tlv *tlv) +{ + const struct mlxfw_mfa2_tlv_multi *multi; + u16 tlv_len; + void *next; + + tlv_len = MLXFW_MFA2_TLV_TOTAL_SIZE(tlv); + + if (tlv->type == MLXFW_MFA2_TLV_MULTI_PART) { + multi = mlxfw_mfa2_tlv_multi_get(mfa2_file, tlv); + if (!multi) + return NULL; + tlv_len = NLA_ALIGN(tlv_len + be16_to_cpu(multi->total_len)); + } + + next = (void *) tlv + tlv_len; + return mlxfw_mfa2_tlv_get(mfa2_file, next); +} + +const struct mlxfw_mfa2_tlv * +mlxfw_mfa2_tlv_advance(const struct mlxfw_mfa2_file *mfa2_file, + const struct mlxfw_mfa2_tlv *from_tlv, u16 count) +{ + const struct mlxfw_mfa2_tlv *tlv; + u16 idx; + + mlxfw_mfa2_tlv_foreach(mfa2_file, tlv, idx, from_tlv, count) + if (!tlv) + return NULL; + return tlv; +} + +const struct mlxfw_mfa2_tlv * +mlxfw_mfa2_tlv_multi_child_find(const struct mlxfw_mfa2_file *mfa2_file, + const struct mlxfw_mfa2_tlv_multi *multi, + enum mlxfw_mfa2_tlv_type type, u16 index) +{ + const struct mlxfw_mfa2_tlv *tlv; + u16 skip = 0; + u16 idx; + + mlxfw_mfa2_tlv_multi_foreach(mfa2_file, tlv, idx, multi) { + if (!tlv) { + pr_err("TLV parsing error\n"); + return NULL; + } + if (tlv->type == type) + if (skip++ == index) + return tlv; + } + return NULL; +} + +int mlxfw_mfa2_tlv_multi_child_count(const struct mlxfw_mfa2_file *mfa2_file, + const struct mlxfw_mfa2_tlv_multi *multi, + enum mlxfw_mfa2_tlv_type type, + u16 *p_count) +{ + const struct mlxfw_mfa2_tlv *tlv; + u16 count = 0; + u16 idx; + + mlxfw_mfa2_tlv_multi_foreach(mfa2_file, tlv, idx, multi) { + if (!tlv) { + pr_err("TLV parsing error\n"); + return -EINVAL; + } + + if (tlv->type == type) + count++; + } + *p_count = count; + return 0; +} diff --git a/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_tlv_multi.h b/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_tlv_multi.h new file mode 100644 index 000000000..633284eed --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_tlv_multi.h @@ -0,0 +1,41 @@ +/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 */ +/* Copyright (c) 2017-2019 Mellanox Technologies. All rights reserved */ + +#ifndef _MLXFW_MFA2_TLV_MULTI_H +#define _MLXFW_MFA2_TLV_MULTI_H + +#include "mlxfw_mfa2_tlv.h" +#include "mlxfw_mfa2_format.h" +#include "mlxfw_mfa2_file.h" + +const struct mlxfw_mfa2_tlv * +mlxfw_mfa2_tlv_multi_child(const struct mlxfw_mfa2_file *mfa2_file, + const struct mlxfw_mfa2_tlv_multi *multi); + +const struct mlxfw_mfa2_tlv * +mlxfw_mfa2_tlv_next(const struct mlxfw_mfa2_file *mfa2_file, + const struct mlxfw_mfa2_tlv *tlv); + +const struct mlxfw_mfa2_tlv * +mlxfw_mfa2_tlv_advance(const struct mlxfw_mfa2_file *mfa2_file, + const struct mlxfw_mfa2_tlv *from_tlv, u16 count); + +const struct mlxfw_mfa2_tlv * +mlxfw_mfa2_tlv_multi_child_find(const struct mlxfw_mfa2_file *mfa2_file, + const struct mlxfw_mfa2_tlv_multi *multi, + enum mlxfw_mfa2_tlv_type type, u16 index); + +int mlxfw_mfa2_tlv_multi_child_count(const struct mlxfw_mfa2_file *mfa2_file, + const struct mlxfw_mfa2_tlv_multi *multi, + enum mlxfw_mfa2_tlv_type type, + u16 *p_count); + +#define mlxfw_mfa2_tlv_foreach(mfa2_file, tlv, idx, from_tlv, count) \ + for (idx = 0, tlv = from_tlv; idx < (count); \ + idx++, tlv = mlxfw_mfa2_tlv_next(mfa2_file, tlv)) + +#define mlxfw_mfa2_tlv_multi_foreach(mfa2_file, tlv, idx, multi) \ + mlxfw_mfa2_tlv_foreach(mfa2_file, tlv, idx, \ + mlxfw_mfa2_tlv_multi_child(mfa2_file, multi), \ + be16_to_cpu(multi->num_extensions) + 1) +#endif diff --git a/drivers/net/ethernet/mellanox/mlxsw/Kconfig b/drivers/net/ethernet/mellanox/mlxsw/Kconfig new file mode 100644 index 000000000..a510bf2cf --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/Kconfig @@ -0,0 +1,92 @@ +# SPDX-License-Identifier: GPL-2.0-only +# +# Mellanox switch drivers configuration +# + +config MLXSW_CORE + tristate "Mellanox Technologies Switch ASICs support" + select NET_DEVLINK + select MLXFW + select AUXILIARY_BUS + help + This driver supports Mellanox Technologies Switch ASICs family. + + To compile this driver as a module, choose M here: the + module will be called mlxsw_core. + +config MLXSW_CORE_HWMON + bool "HWMON support for Mellanox Technologies Switch ASICs" + depends on MLXSW_CORE && HWMON + depends on !(MLXSW_CORE=y && HWMON=m) + default y + help + Say Y here if you want to expose HWMON interface on mlxsw devices. + +config MLXSW_CORE_THERMAL + bool "Thermal zone support for Mellanox Technologies Switch ASICs" + depends on MLXSW_CORE && THERMAL + default y + help + Say Y here if you want to automatically control fans speed according + ambient temperature reported by ASIC. + +config MLXSW_PCI + tristate "PCI bus implementation for Mellanox Technologies Switch ASICs" + depends on PCI && HAS_IOMEM && MLXSW_CORE + default m + help + This is PCI bus implementation for Mellanox Technologies Switch ASICs. + + To compile this driver as a module, choose M here: the + module will be called mlxsw_pci. + +config MLXSW_I2C + tristate "I2C bus implementation for Mellanox Technologies Switch ASICs" + depends on I2C && MLXSW_CORE + default m + help + This is I2C bus implementation for Mellanox Technologies Switch ASICs. + + To compile this driver as a module, choose M here: the + module will be called mlxsw_i2c. + +config MLXSW_SPECTRUM + tristate "Mellanox Technologies Spectrum family support" + depends on MLXSW_CORE && MLXSW_PCI && NET_SWITCHDEV && VLAN_8021Q + depends on PSAMPLE || PSAMPLE=n + depends on BRIDGE || BRIDGE=n + depends on IPV6 || IPV6=n + depends on NET_IPGRE || NET_IPGRE=n + depends on IPV6_GRE || IPV6_GRE=n + depends on VXLAN || VXLAN=n + depends on PTP_1588_CLOCK_OPTIONAL + select GENERIC_ALLOCATOR + select PARMAN + select OBJAGG + select NET_PTP_CLASSIFY if PTP_1588_CLOCK + default m + help + This driver supports Mellanox Technologies + Spectrum/Spectrum-2/Spectrum-3/Spectrum-4 Ethernet Switch ASICs. + + To compile this driver as a module, choose M here: the + module will be called mlxsw_spectrum. + +config MLXSW_SPECTRUM_DCB + bool "Data Center Bridging (DCB) support" + depends on MLXSW_SPECTRUM && DCB + default y + help + Say Y here if you want to use Data Center Bridging (DCB) in the + driver. + +config MLXSW_MINIMAL + tristate "Mellanox Technologies minimal I2C support" + depends on MLXSW_CORE && MLXSW_I2C + default m + help + This driver supports I2C access for Mellanox Technologies Switch + ASICs. + + To compile this driver as a module, choose M here: the + module will be called mlxsw_minimal. diff --git a/drivers/net/ethernet/mellanox/mlxsw/Makefile b/drivers/net/ethernet/mellanox/mlxsw/Makefile new file mode 100644 index 000000000..3ca9fce75 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/Makefile @@ -0,0 +1,36 @@ +# SPDX-License-Identifier: GPL-2.0 +obj-$(CONFIG_MLXSW_CORE) += mlxsw_core.o +mlxsw_core-objs := core.o core_acl_flex_keys.o \ + core_acl_flex_actions.o core_env.o \ + core_linecards.o core_linecard_dev.o +mlxsw_core-$(CONFIG_MLXSW_CORE_HWMON) += core_hwmon.o +mlxsw_core-$(CONFIG_MLXSW_CORE_THERMAL) += core_thermal.o +obj-$(CONFIG_MLXSW_PCI) += mlxsw_pci.o +mlxsw_pci-objs := pci.o +obj-$(CONFIG_MLXSW_I2C) += mlxsw_i2c.o +mlxsw_i2c-objs := i2c.o +obj-$(CONFIG_MLXSW_SPECTRUM) += mlxsw_spectrum.o +mlxsw_spectrum-objs := spectrum.o spectrum_buffers.o \ + spectrum_switchdev.o spectrum_router.o \ + spectrum1_kvdl.o spectrum2_kvdl.o \ + spectrum_kvdl.o \ + spectrum_acl_tcam.o spectrum_acl_ctcam.o \ + spectrum_acl_atcam.o spectrum_acl_erp.o \ + spectrum1_acl_tcam.o spectrum2_acl_tcam.o \ + spectrum_acl_bloom_filter.o spectrum_acl.o \ + spectrum_flow.o spectrum_matchall.o \ + spectrum_flower.o spectrum_cnt.o \ + spectrum_fid.o spectrum_ipip.o \ + spectrum_acl_flex_actions.o \ + spectrum_acl_flex_keys.o \ + spectrum1_mr_tcam.o spectrum2_mr_tcam.o \ + spectrum_mr_tcam.o spectrum_mr.o \ + spectrum_qdisc.o spectrum_span.o \ + spectrum_nve.o spectrum_nve_vxlan.o \ + spectrum_dpipe.o spectrum_trap.o \ + spectrum_ethtool.o spectrum_policer.o \ + spectrum_pgt.o +mlxsw_spectrum-$(CONFIG_MLXSW_SPECTRUM_DCB) += spectrum_dcb.o +mlxsw_spectrum-$(CONFIG_PTP_1588_CLOCK) += spectrum_ptp.o +obj-$(CONFIG_MLXSW_MINIMAL) += mlxsw_minimal.o +mlxsw_minimal-objs := minimal.o diff --git a/drivers/net/ethernet/mellanox/mlxsw/cmd.h b/drivers/net/ethernet/mellanox/mlxsw/cmd.h new file mode 100644 index 000000000..09bef04b1 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/cmd.h @@ -0,0 +1,1281 @@ +/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 */ +/* Copyright (c) 2015-2018 Mellanox Technologies. All rights reserved */ + +#ifndef _MLXSW_CMD_H +#define _MLXSW_CMD_H + +#include "item.h" + +#define MLXSW_CMD_MBOX_SIZE 4096 + +static inline char *mlxsw_cmd_mbox_alloc(void) +{ + return kzalloc(MLXSW_CMD_MBOX_SIZE, GFP_KERNEL); +} + +static inline void mlxsw_cmd_mbox_free(char *mbox) +{ + kfree(mbox); +} + +static inline void mlxsw_cmd_mbox_zero(char *mbox) +{ + memset(mbox, 0, MLXSW_CMD_MBOX_SIZE); +} + +struct mlxsw_core; + +int mlxsw_cmd_exec(struct mlxsw_core *mlxsw_core, u16 opcode, u8 opcode_mod, + u32 in_mod, bool out_mbox_direct, bool reset_ok, + char *in_mbox, size_t in_mbox_size, + char *out_mbox, size_t out_mbox_size); + +static inline int mlxsw_cmd_exec_in(struct mlxsw_core *mlxsw_core, u16 opcode, + u8 opcode_mod, u32 in_mod, char *in_mbox, + size_t in_mbox_size) +{ + return mlxsw_cmd_exec(mlxsw_core, opcode, opcode_mod, in_mod, false, + false, in_mbox, in_mbox_size, NULL, 0); +} + +static inline int mlxsw_cmd_exec_out(struct mlxsw_core *mlxsw_core, u16 opcode, + u8 opcode_mod, u32 in_mod, + bool out_mbox_direct, + char *out_mbox, size_t out_mbox_size) +{ + return mlxsw_cmd_exec(mlxsw_core, opcode, opcode_mod, in_mod, + out_mbox_direct, false, NULL, 0, + out_mbox, out_mbox_size); +} + +static inline int mlxsw_cmd_exec_none(struct mlxsw_core *mlxsw_core, u16 opcode, + u8 opcode_mod, u32 in_mod) +{ + return mlxsw_cmd_exec(mlxsw_core, opcode, opcode_mod, in_mod, false, + false, NULL, 0, NULL, 0); +} + +enum mlxsw_cmd_opcode { + MLXSW_CMD_OPCODE_QUERY_FW = 0x004, + MLXSW_CMD_OPCODE_QUERY_BOARDINFO = 0x006, + MLXSW_CMD_OPCODE_QUERY_AQ_CAP = 0x003, + MLXSW_CMD_OPCODE_MAP_FA = 0xFFF, + MLXSW_CMD_OPCODE_UNMAP_FA = 0xFFE, + MLXSW_CMD_OPCODE_CONFIG_PROFILE = 0x100, + MLXSW_CMD_OPCODE_ACCESS_REG = 0x040, + MLXSW_CMD_OPCODE_SW2HW_DQ = 0x201, + MLXSW_CMD_OPCODE_HW2SW_DQ = 0x202, + MLXSW_CMD_OPCODE_2ERR_DQ = 0x01E, + MLXSW_CMD_OPCODE_QUERY_DQ = 0x022, + MLXSW_CMD_OPCODE_SW2HW_CQ = 0x016, + MLXSW_CMD_OPCODE_HW2SW_CQ = 0x017, + MLXSW_CMD_OPCODE_QUERY_CQ = 0x018, + MLXSW_CMD_OPCODE_SW2HW_EQ = 0x013, + MLXSW_CMD_OPCODE_HW2SW_EQ = 0x014, + MLXSW_CMD_OPCODE_QUERY_EQ = 0x015, + MLXSW_CMD_OPCODE_QUERY_RESOURCES = 0x101, +}; + +static inline const char *mlxsw_cmd_opcode_str(u16 opcode) +{ + switch (opcode) { + case MLXSW_CMD_OPCODE_QUERY_FW: + return "QUERY_FW"; + case MLXSW_CMD_OPCODE_QUERY_BOARDINFO: + return "QUERY_BOARDINFO"; + case MLXSW_CMD_OPCODE_QUERY_AQ_CAP: + return "QUERY_AQ_CAP"; + case MLXSW_CMD_OPCODE_MAP_FA: + return "MAP_FA"; + case MLXSW_CMD_OPCODE_UNMAP_FA: + return "UNMAP_FA"; + case MLXSW_CMD_OPCODE_CONFIG_PROFILE: + return "CONFIG_PROFILE"; + case MLXSW_CMD_OPCODE_ACCESS_REG: + return "ACCESS_REG"; + case MLXSW_CMD_OPCODE_SW2HW_DQ: + return "SW2HW_DQ"; + case MLXSW_CMD_OPCODE_HW2SW_DQ: + return "HW2SW_DQ"; + case MLXSW_CMD_OPCODE_2ERR_DQ: + return "2ERR_DQ"; + case MLXSW_CMD_OPCODE_QUERY_DQ: + return "QUERY_DQ"; + case MLXSW_CMD_OPCODE_SW2HW_CQ: + return "SW2HW_CQ"; + case MLXSW_CMD_OPCODE_HW2SW_CQ: + return "HW2SW_CQ"; + case MLXSW_CMD_OPCODE_QUERY_CQ: + return "QUERY_CQ"; + case MLXSW_CMD_OPCODE_SW2HW_EQ: + return "SW2HW_EQ"; + case MLXSW_CMD_OPCODE_HW2SW_EQ: + return "HW2SW_EQ"; + case MLXSW_CMD_OPCODE_QUERY_EQ: + return "QUERY_EQ"; + case MLXSW_CMD_OPCODE_QUERY_RESOURCES: + return "QUERY_RESOURCES"; + default: + return "*UNKNOWN*"; + } +} + +enum mlxsw_cmd_status { + /* Command execution succeeded. */ + MLXSW_CMD_STATUS_OK = 0x00, + /* Internal error (e.g. bus error) occurred while processing command. */ + MLXSW_CMD_STATUS_INTERNAL_ERR = 0x01, + /* Operation/command not supported or opcode modifier not supported. */ + MLXSW_CMD_STATUS_BAD_OP = 0x02, + /* Parameter not supported, parameter out of range. */ + MLXSW_CMD_STATUS_BAD_PARAM = 0x03, + /* System was not enabled or bad system state. */ + MLXSW_CMD_STATUS_BAD_SYS_STATE = 0x04, + /* Attempt to access reserved or unallocated resource, or resource in + * inappropriate ownership. + */ + MLXSW_CMD_STATUS_BAD_RESOURCE = 0x05, + /* Requested resource is currently executing a command. */ + MLXSW_CMD_STATUS_RESOURCE_BUSY = 0x06, + /* Required capability exceeds device limits. */ + MLXSW_CMD_STATUS_EXCEED_LIM = 0x08, + /* Resource is not in the appropriate state or ownership. */ + MLXSW_CMD_STATUS_BAD_RES_STATE = 0x09, + /* Index out of range (might be beyond table size or attempt to + * access a reserved resource). + */ + MLXSW_CMD_STATUS_BAD_INDEX = 0x0A, + /* NVMEM checksum/CRC failed. */ + MLXSW_CMD_STATUS_BAD_NVMEM = 0x0B, + /* Device is currently running reset */ + MLXSW_CMD_STATUS_RUNNING_RESET = 0x26, + /* Bad management packet (silently discarded). */ + MLXSW_CMD_STATUS_BAD_PKT = 0x30, +}; + +static inline const char *mlxsw_cmd_status_str(u8 status) +{ + switch (status) { + case MLXSW_CMD_STATUS_OK: + return "OK"; + case MLXSW_CMD_STATUS_INTERNAL_ERR: + return "INTERNAL_ERR"; + case MLXSW_CMD_STATUS_BAD_OP: + return "BAD_OP"; + case MLXSW_CMD_STATUS_BAD_PARAM: + return "BAD_PARAM"; + case MLXSW_CMD_STATUS_BAD_SYS_STATE: + return "BAD_SYS_STATE"; + case MLXSW_CMD_STATUS_BAD_RESOURCE: + return "BAD_RESOURCE"; + case MLXSW_CMD_STATUS_RESOURCE_BUSY: + return "RESOURCE_BUSY"; + case MLXSW_CMD_STATUS_EXCEED_LIM: + return "EXCEED_LIM"; + case MLXSW_CMD_STATUS_BAD_RES_STATE: + return "BAD_RES_STATE"; + case MLXSW_CMD_STATUS_BAD_INDEX: + return "BAD_INDEX"; + case MLXSW_CMD_STATUS_BAD_NVMEM: + return "BAD_NVMEM"; + case MLXSW_CMD_STATUS_RUNNING_RESET: + return "RUNNING_RESET"; + case MLXSW_CMD_STATUS_BAD_PKT: + return "BAD_PKT"; + default: + return "*UNKNOWN*"; + } +} + +/* QUERY_FW - Query Firmware + * ------------------------- + * OpMod == 0, INMmod == 0 + * ----------------------- + * The QUERY_FW command retrieves information related to firmware, command + * interface version and the amount of resources that should be allocated to + * the firmware. + */ + +static inline int mlxsw_cmd_query_fw(struct mlxsw_core *mlxsw_core, + char *out_mbox) +{ + return mlxsw_cmd_exec_out(mlxsw_core, MLXSW_CMD_OPCODE_QUERY_FW, + 0, 0, false, out_mbox, MLXSW_CMD_MBOX_SIZE); +} + +/* cmd_mbox_query_fw_fw_pages + * Amount of physical memory to be allocatedfor firmware usage in 4KB pages. + */ +MLXSW_ITEM32(cmd_mbox, query_fw, fw_pages, 0x00, 16, 16); + +/* cmd_mbox_query_fw_fw_rev_major + * Firmware Revision - Major + */ +MLXSW_ITEM32(cmd_mbox, query_fw, fw_rev_major, 0x00, 0, 16); + +/* cmd_mbox_query_fw_fw_rev_subminor + * Firmware Sub-minor version (Patch level) + */ +MLXSW_ITEM32(cmd_mbox, query_fw, fw_rev_subminor, 0x04, 16, 16); + +/* cmd_mbox_query_fw_fw_rev_minor + * Firmware Revision - Minor + */ +MLXSW_ITEM32(cmd_mbox, query_fw, fw_rev_minor, 0x04, 0, 16); + +/* cmd_mbox_query_fw_core_clk + * Internal Clock Frequency (in MHz) + */ +MLXSW_ITEM32(cmd_mbox, query_fw, core_clk, 0x08, 16, 16); + +/* cmd_mbox_query_fw_cmd_interface_rev + * Command Interface Interpreter Revision ID. This number is bumped up + * every time a non-backward-compatible change is done for the command + * interface. The current cmd_interface_rev is 1. + */ +MLXSW_ITEM32(cmd_mbox, query_fw, cmd_interface_rev, 0x08, 0, 16); + +/* cmd_mbox_query_fw_dt + * If set, Debug Trace is supported + */ +MLXSW_ITEM32(cmd_mbox, query_fw, dt, 0x0C, 31, 1); + +/* cmd_mbox_query_fw_api_version + * Indicates the version of the API, to enable software querying + * for compatibility. The current api_version is 1. + */ +MLXSW_ITEM32(cmd_mbox, query_fw, api_version, 0x0C, 0, 16); + +/* cmd_mbox_query_fw_fw_hour + * Firmware timestamp - hour + */ +MLXSW_ITEM32(cmd_mbox, query_fw, fw_hour, 0x10, 24, 8); + +/* cmd_mbox_query_fw_fw_minutes + * Firmware timestamp - minutes + */ +MLXSW_ITEM32(cmd_mbox, query_fw, fw_minutes, 0x10, 16, 8); + +/* cmd_mbox_query_fw_fw_seconds + * Firmware timestamp - seconds + */ +MLXSW_ITEM32(cmd_mbox, query_fw, fw_seconds, 0x10, 8, 8); + +/* cmd_mbox_query_fw_fw_year + * Firmware timestamp - year + */ +MLXSW_ITEM32(cmd_mbox, query_fw, fw_year, 0x14, 16, 16); + +/* cmd_mbox_query_fw_fw_month + * Firmware timestamp - month + */ +MLXSW_ITEM32(cmd_mbox, query_fw, fw_month, 0x14, 8, 8); + +/* cmd_mbox_query_fw_fw_day + * Firmware timestamp - day + */ +MLXSW_ITEM32(cmd_mbox, query_fw, fw_day, 0x14, 0, 8); + +/* cmd_mbox_query_fw_clr_int_base_offset + * Clear Interrupt register's offset from clr_int_bar register + * in PCI address space. + */ +MLXSW_ITEM64(cmd_mbox, query_fw, clr_int_base_offset, 0x20, 0, 64); + +/* cmd_mbox_query_fw_clr_int_bar + * PCI base address register (BAR) where clr_int register is located. + * 00 - BAR 0-1 (64 bit BAR) + */ +MLXSW_ITEM32(cmd_mbox, query_fw, clr_int_bar, 0x28, 30, 2); + +/* cmd_mbox_query_fw_error_buf_offset + * Read Only buffer for internal error reports of offset + * from error_buf_bar register in PCI address space). + */ +MLXSW_ITEM64(cmd_mbox, query_fw, error_buf_offset, 0x30, 0, 64); + +/* cmd_mbox_query_fw_error_buf_size + * Internal error buffer size in DWORDs + */ +MLXSW_ITEM32(cmd_mbox, query_fw, error_buf_size, 0x38, 0, 32); + +/* cmd_mbox_query_fw_error_int_bar + * PCI base address register (BAR) where error buffer + * register is located. + * 00 - BAR 0-1 (64 bit BAR) + */ +MLXSW_ITEM32(cmd_mbox, query_fw, error_int_bar, 0x3C, 30, 2); + +/* cmd_mbox_query_fw_doorbell_page_offset + * Offset of the doorbell page + */ +MLXSW_ITEM64(cmd_mbox, query_fw, doorbell_page_offset, 0x40, 0, 64); + +/* cmd_mbox_query_fw_doorbell_page_bar + * PCI base address register (BAR) of the doorbell page + * 00 - BAR 0-1 (64 bit BAR) + */ +MLXSW_ITEM32(cmd_mbox, query_fw, doorbell_page_bar, 0x48, 30, 2); + +/* cmd_mbox_query_fw_free_running_clock_offset + * The offset of the free running clock page + */ +MLXSW_ITEM64(cmd_mbox, query_fw, free_running_clock_offset, 0x50, 0, 64); + +/* cmd_mbox_query_fw_fr_rn_clk_bar + * PCI base address register (BAR) of the free running clock page + * 0: BAR 0 + * 1: 64 bit BAR + */ +MLXSW_ITEM32(cmd_mbox, query_fw, fr_rn_clk_bar, 0x58, 30, 2); + +/* cmd_mbox_query_fw_utc_sec_offset + * The offset of the UTC_Sec page + */ +MLXSW_ITEM64(cmd_mbox, query_fw, utc_sec_offset, 0x70, 0, 64); + +/* cmd_mbox_query_fw_utc_sec_bar + * PCI base address register (BAR) of the UTC_Sec page + * 0: BAR 0 + * 1: 64 bit BAR + * Reserved on SwitchX/-2, Switch-IB/2, Spectrum-1 + */ +MLXSW_ITEM32(cmd_mbox, query_fw, utc_sec_bar, 0x78, 30, 2); + +/* cmd_mbox_query_fw_utc_nsec_offset + * The offset of the UTC_nSec page + */ +MLXSW_ITEM64(cmd_mbox, query_fw, utc_nsec_offset, 0x80, 0, 64); + +/* cmd_mbox_query_fw_utc_nsec_bar + * PCI base address register (BAR) of the UTC_nSec page + * 0: BAR 0 + * 1: 64 bit BAR + * Reserved on SwitchX/-2, Switch-IB/2, Spectrum-1 + */ +MLXSW_ITEM32(cmd_mbox, query_fw, utc_nsec_bar, 0x88, 30, 2); + +/* QUERY_BOARDINFO - Query Board Information + * ----------------------------------------- + * OpMod == 0 (N/A), INMmod == 0 (N/A) + * ----------------------------------- + * The QUERY_BOARDINFO command retrieves adapter specific parameters. + */ + +static inline int mlxsw_cmd_boardinfo(struct mlxsw_core *mlxsw_core, + char *out_mbox) +{ + return mlxsw_cmd_exec_out(mlxsw_core, MLXSW_CMD_OPCODE_QUERY_BOARDINFO, + 0, 0, false, out_mbox, MLXSW_CMD_MBOX_SIZE); +} + +/* cmd_mbox_boardinfo_intapin + * When PCIe interrupt messages are being used, this value is used for clearing + * an interrupt. When using MSI-X, this register is not used. + */ +MLXSW_ITEM32(cmd_mbox, boardinfo, intapin, 0x10, 24, 8); + +/* cmd_mbox_boardinfo_vsd_vendor_id + * PCISIG Vendor ID (www.pcisig.com/membership/vid_search) of the vendor + * specifying/formatting the VSD. The vsd_vendor_id identifies the management + * domain of the VSD/PSID data. Different vendors may choose different VSD/PSID + * format and encoding as long as they use their assigned vsd_vendor_id. + */ +MLXSW_ITEM32(cmd_mbox, boardinfo, vsd_vendor_id, 0x1C, 0, 16); + +/* cmd_mbox_boardinfo_vsd + * Vendor Specific Data. The VSD string that is burnt to the Flash + * with the firmware. + */ +#define MLXSW_CMD_BOARDINFO_VSD_LEN 208 +MLXSW_ITEM_BUF(cmd_mbox, boardinfo, vsd, 0x20, MLXSW_CMD_BOARDINFO_VSD_LEN); + +/* cmd_mbox_boardinfo_psid + * The PSID field is a 16-ascii (byte) character string which acts as + * the board ID. The PSID format is used in conjunction with + * Mellanox vsd_vendor_id (15B3h). + */ +#define MLXSW_CMD_BOARDINFO_PSID_LEN 16 +MLXSW_ITEM_BUF(cmd_mbox, boardinfo, psid, 0xF0, MLXSW_CMD_BOARDINFO_PSID_LEN); + +/* QUERY_AQ_CAP - Query Asynchronous Queues Capabilities + * ----------------------------------------------------- + * OpMod == 0 (N/A), INMmod == 0 (N/A) + * ----------------------------------- + * The QUERY_AQ_CAP command returns the device asynchronous queues + * capabilities supported. + */ + +static inline int mlxsw_cmd_query_aq_cap(struct mlxsw_core *mlxsw_core, + char *out_mbox) +{ + return mlxsw_cmd_exec_out(mlxsw_core, MLXSW_CMD_OPCODE_QUERY_AQ_CAP, + 0, 0, false, out_mbox, MLXSW_CMD_MBOX_SIZE); +} + +/* cmd_mbox_query_aq_cap_log_max_sdq_sz + * Log (base 2) of max WQEs allowed on SDQ. + */ +MLXSW_ITEM32(cmd_mbox, query_aq_cap, log_max_sdq_sz, 0x00, 24, 8); + +/* cmd_mbox_query_aq_cap_max_num_sdqs + * Maximum number of SDQs. + */ +MLXSW_ITEM32(cmd_mbox, query_aq_cap, max_num_sdqs, 0x00, 0, 8); + +/* cmd_mbox_query_aq_cap_log_max_rdq_sz + * Log (base 2) of max WQEs allowed on RDQ. + */ +MLXSW_ITEM32(cmd_mbox, query_aq_cap, log_max_rdq_sz, 0x04, 24, 8); + +/* cmd_mbox_query_aq_cap_max_num_rdqs + * Maximum number of RDQs. + */ +MLXSW_ITEM32(cmd_mbox, query_aq_cap, max_num_rdqs, 0x04, 0, 8); + +/* cmd_mbox_query_aq_cap_log_max_cq_sz + * Log (base 2) of the Maximum CQEs allowed in a CQ for CQEv0 and CQEv1. + */ +MLXSW_ITEM32(cmd_mbox, query_aq_cap, log_max_cq_sz, 0x08, 24, 8); + +/* cmd_mbox_query_aq_cap_log_max_cqv2_sz + * Log (base 2) of the Maximum CQEs allowed in a CQ for CQEv2. + */ +MLXSW_ITEM32(cmd_mbox, query_aq_cap, log_max_cqv2_sz, 0x08, 16, 8); + +/* cmd_mbox_query_aq_cap_max_num_cqs + * Maximum number of CQs. + */ +MLXSW_ITEM32(cmd_mbox, query_aq_cap, max_num_cqs, 0x08, 0, 8); + +/* cmd_mbox_query_aq_cap_log_max_eq_sz + * Log (base 2) of max EQEs allowed on EQ. + */ +MLXSW_ITEM32(cmd_mbox, query_aq_cap, log_max_eq_sz, 0x0C, 24, 8); + +/* cmd_mbox_query_aq_cap_max_num_eqs + * Maximum number of EQs. + */ +MLXSW_ITEM32(cmd_mbox, query_aq_cap, max_num_eqs, 0x0C, 0, 8); + +/* cmd_mbox_query_aq_cap_max_sg_sq + * The maximum S/G list elements in an DSQ. DSQ must not contain + * more S/G entries than indicated here. + */ +MLXSW_ITEM32(cmd_mbox, query_aq_cap, max_sg_sq, 0x10, 8, 8); + +/* cmd_mbox_query_aq_cap_ + * The maximum S/G list elements in an DRQ. DRQ must not contain + * more S/G entries than indicated here. + */ +MLXSW_ITEM32(cmd_mbox, query_aq_cap, max_sg_rq, 0x10, 0, 8); + +/* MAP_FA - Map Firmware Area + * -------------------------- + * OpMod == 0 (N/A), INMmod == Number of VPM entries + * ------------------------------------------------- + * The MAP_FA command passes physical pages to the switch. These pages + * are used to store the device firmware. MAP_FA can be executed multiple + * times until all the firmware area is mapped (the size that should be + * mapped is retrieved through the QUERY_FW command). All required pages + * must be mapped to finish the initialization phase. Physical memory + * passed in this command must be pinned. + */ + +#define MLXSW_CMD_MAP_FA_VPM_ENTRIES_MAX 32 + +static inline int mlxsw_cmd_map_fa(struct mlxsw_core *mlxsw_core, + char *in_mbox, u32 vpm_entries_count) +{ + return mlxsw_cmd_exec_in(mlxsw_core, MLXSW_CMD_OPCODE_MAP_FA, + 0, vpm_entries_count, + in_mbox, MLXSW_CMD_MBOX_SIZE); +} + +/* cmd_mbox_map_fa_pa + * Physical Address. + */ +MLXSW_ITEM64_INDEXED(cmd_mbox, map_fa, pa, 0x00, 12, 52, 0x08, 0x00, true); + +/* cmd_mbox_map_fa_log2size + * Log (base 2) of the size in 4KB pages of the physical and contiguous memory + * that starts at PA_L/H. + */ +MLXSW_ITEM32_INDEXED(cmd_mbox, map_fa, log2size, 0x00, 0, 5, 0x08, 0x04, false); + +/* UNMAP_FA - Unmap Firmware Area + * ------------------------------ + * OpMod == 0 (N/A), INMmod == 0 (N/A) + * ----------------------------------- + * The UNMAP_FA command unload the firmware and unmaps all the + * firmware area. After this command is completed the device will not access + * the pages that were mapped to the firmware area. After executing UNMAP_FA + * command, software reset must be done prior to execution of MAP_FW command. + */ + +static inline int mlxsw_cmd_unmap_fa(struct mlxsw_core *mlxsw_core) +{ + return mlxsw_cmd_exec_none(mlxsw_core, MLXSW_CMD_OPCODE_UNMAP_FA, 0, 0); +} + +/* QUERY_RESOURCES - Query chip resources + * -------------------------------------- + * OpMod == 0 (N/A) , INMmod is index + * ---------------------------------- + * The QUERY_RESOURCES command retrieves information related to chip resources + * by resource ID. Every command returns 32 entries. INmod is being use as base. + * for example, index 1 will return entries 32-63. When the tables end and there + * are no more sources in the table, will return resource id 0xFFF to indicate + * it. + */ + +#define MLXSW_CMD_QUERY_RESOURCES_TABLE_END_ID 0xffff +#define MLXSW_CMD_QUERY_RESOURCES_MAX_QUERIES 100 +#define MLXSW_CMD_QUERY_RESOURCES_PER_QUERY 32 + +static inline int mlxsw_cmd_query_resources(struct mlxsw_core *mlxsw_core, + char *out_mbox, int index) +{ + return mlxsw_cmd_exec_out(mlxsw_core, MLXSW_CMD_OPCODE_QUERY_RESOURCES, + 0, index, false, out_mbox, + MLXSW_CMD_MBOX_SIZE); +} + +/* cmd_mbox_query_resource_id + * The resource id. 0xFFFF indicates table's end. + */ +MLXSW_ITEM32_INDEXED(cmd_mbox, query_resource, id, 0x00, 16, 16, 0x8, 0, false); + +/* cmd_mbox_query_resource_data + * The resource + */ +MLXSW_ITEM64_INDEXED(cmd_mbox, query_resource, data, + 0x00, 0, 40, 0x8, 0, false); + +/* CONFIG_PROFILE (Set) - Configure Switch Profile + * ------------------------------ + * OpMod == 1 (Set), INMmod == 0 (N/A) + * ----------------------------------- + * The CONFIG_PROFILE command sets the switch profile. The command can be + * executed on the device only once at startup in order to allocate and + * configure all switch resources and prepare it for operational mode. + * It is not possible to change the device profile after the chip is + * in operational mode. + * Failure of the CONFIG_PROFILE command leaves the hardware in an indeterminate + * state therefore it is required to perform software reset to the device + * following an unsuccessful completion of the command. It is required + * to perform software reset to the device to change an existing profile. + */ + +static inline int mlxsw_cmd_config_profile_set(struct mlxsw_core *mlxsw_core, + char *in_mbox) +{ + return mlxsw_cmd_exec_in(mlxsw_core, MLXSW_CMD_OPCODE_CONFIG_PROFILE, + 1, 0, in_mbox, MLXSW_CMD_MBOX_SIZE); +} + +/* cmd_mbox_config_profile_set_max_vepa_channels + * Capability bit. Setting a bit to 1 configures the profile + * according to the mailbox contents. + */ +MLXSW_ITEM32(cmd_mbox, config_profile, set_max_vepa_channels, 0x0C, 0, 1); + +/* cmd_mbox_config_profile_set_max_lag + * Capability bit. Setting a bit to 1 configures the profile + * according to the mailbox contents. + */ +MLXSW_ITEM32(cmd_mbox, config_profile, set_max_lag, 0x0C, 1, 1); + +/* cmd_mbox_config_profile_set_max_port_per_lag + * Capability bit. Setting a bit to 1 configures the profile + * according to the mailbox contents. + */ +MLXSW_ITEM32(cmd_mbox, config_profile, set_max_port_per_lag, 0x0C, 2, 1); + +/* cmd_mbox_config_profile_set_max_mid + * Capability bit. Setting a bit to 1 configures the profile + * according to the mailbox contents. + */ +MLXSW_ITEM32(cmd_mbox, config_profile, set_max_mid, 0x0C, 3, 1); + +/* cmd_mbox_config_profile_set_max_pgt + * Capability bit. Setting a bit to 1 configures the profile + * according to the mailbox contents. + */ +MLXSW_ITEM32(cmd_mbox, config_profile, set_max_pgt, 0x0C, 4, 1); + +/* cmd_mbox_config_profile_set_max_system_port + * Capability bit. Setting a bit to 1 configures the profile + * according to the mailbox contents. + */ +MLXSW_ITEM32(cmd_mbox, config_profile, set_max_system_port, 0x0C, 5, 1); + +/* cmd_mbox_config_profile_set_max_vlan_groups + * Capability bit. Setting a bit to 1 configures the profile + * according to the mailbox contents. + */ +MLXSW_ITEM32(cmd_mbox, config_profile, set_max_vlan_groups, 0x0C, 6, 1); + +/* cmd_mbox_config_profile_set_max_regions + * Capability bit. Setting a bit to 1 configures the profile + * according to the mailbox contents. + */ +MLXSW_ITEM32(cmd_mbox, config_profile, set_max_regions, 0x0C, 7, 1); + +/* cmd_mbox_config_profile_set_flood_mode + * Capability bit. Setting a bit to 1 configures the profile + * according to the mailbox contents. + */ +MLXSW_ITEM32(cmd_mbox, config_profile, set_flood_mode, 0x0C, 8, 1); + +/* cmd_mbox_config_profile_set_max_flood_tables + * Capability bit. Setting a bit to 1 configures the profile + * according to the mailbox contents. + */ +MLXSW_ITEM32(cmd_mbox, config_profile, set_flood_tables, 0x0C, 9, 1); + +/* cmd_mbox_config_profile_set_max_ib_mc + * Capability bit. Setting a bit to 1 configures the profile + * according to the mailbox contents. + */ +MLXSW_ITEM32(cmd_mbox, config_profile, set_max_ib_mc, 0x0C, 12, 1); + +/* cmd_mbox_config_profile_set_max_pkey + * Capability bit. Setting a bit to 1 configures the profile + * according to the mailbox contents. + */ +MLXSW_ITEM32(cmd_mbox, config_profile, set_max_pkey, 0x0C, 13, 1); + +/* cmd_mbox_config_profile_set_adaptive_routing_group_cap + * Capability bit. Setting a bit to 1 configures the profile + * according to the mailbox contents. + */ +MLXSW_ITEM32(cmd_mbox, config_profile, + set_adaptive_routing_group_cap, 0x0C, 14, 1); + +/* cmd_mbox_config_profile_set_ar_sec + * Capability bit. Setting a bit to 1 configures the profile + * according to the mailbox contents. + */ +MLXSW_ITEM32(cmd_mbox, config_profile, set_ar_sec, 0x0C, 15, 1); + +/* cmd_mbox_config_set_ubridge + * Capability bit. Setting a bit to 1 configures the profile + * according to the mailbox contents. + */ +MLXSW_ITEM32(cmd_mbox, config_profile, set_ubridge, 0x0C, 22, 1); + +/* cmd_mbox_config_set_kvd_linear_size + * Capability bit. Setting a bit to 1 configures the profile + * according to the mailbox contents. + */ +MLXSW_ITEM32(cmd_mbox, config_profile, set_kvd_linear_size, 0x0C, 24, 1); + +/* cmd_mbox_config_set_kvd_hash_single_size + * Capability bit. Setting a bit to 1 configures the profile + * according to the mailbox contents. + */ +MLXSW_ITEM32(cmd_mbox, config_profile, set_kvd_hash_single_size, 0x0C, 25, 1); + +/* cmd_mbox_config_set_kvd_hash_double_size + * Capability bit. Setting a bit to 1 configures the profile + * according to the mailbox contents. + */ +MLXSW_ITEM32(cmd_mbox, config_profile, set_kvd_hash_double_size, 0x0C, 26, 1); + +/* cmd_mbox_config_set_cqe_version + * Capability bit. Setting a bit to 1 configures the profile + * according to the mailbox contents. + */ +MLXSW_ITEM32(cmd_mbox, config_profile, set_cqe_version, 0x08, 0, 1); + +/* cmd_mbox_config_set_cqe_time_stamp_type + * Capability bit. Setting a bit to 1 configures the profile + * according to the mailbox contents. + */ +MLXSW_ITEM32(cmd_mbox, config_profile, set_cqe_time_stamp_type, 0x08, 2, 1); + +/* cmd_mbox_config_profile_max_vepa_channels + * Maximum number of VEPA channels per port (0 through 16) + * 0 - multi-channel VEPA is disabled + */ +MLXSW_ITEM32(cmd_mbox, config_profile, max_vepa_channels, 0x10, 0, 8); + +/* cmd_mbox_config_profile_max_lag + * Maximum number of LAG IDs requested. + * Reserved when Spectrum-1/2/3, supported from Spectrum-4 and above. + * For Spectrum-4, firmware sets 128 for values between 1-128 and 256 for values + * between 129-256. + */ +MLXSW_ITEM32(cmd_mbox, config_profile, max_lag, 0x14, 0, 16); + +/* cmd_mbox_config_profile_max_port_per_lag + * Maximum number of ports per LAG requested. + */ +MLXSW_ITEM32(cmd_mbox, config_profile, max_port_per_lag, 0x18, 0, 16); + +/* cmd_mbox_config_profile_max_mid + * Maximum Multicast IDs. + * Multicast IDs are allocated from 0 to max_mid-1 + */ +MLXSW_ITEM32(cmd_mbox, config_profile, max_mid, 0x1C, 0, 16); + +/* cmd_mbox_config_profile_max_pgt + * Maximum records in the Port Group Table per Switch Partition. + * Port Group Table indexes are from 0 to max_pgt-1 + */ +MLXSW_ITEM32(cmd_mbox, config_profile, max_pgt, 0x20, 0, 16); + +/* cmd_mbox_config_profile_max_system_port + * The maximum number of system ports that can be allocated. + */ +MLXSW_ITEM32(cmd_mbox, config_profile, max_system_port, 0x24, 0, 16); + +/* cmd_mbox_config_profile_max_vlan_groups + * Maximum number VLAN Groups for VLAN binding. + */ +MLXSW_ITEM32(cmd_mbox, config_profile, max_vlan_groups, 0x28, 0, 12); + +/* cmd_mbox_config_profile_max_regions + * Maximum number of TCAM Regions. + */ +MLXSW_ITEM32(cmd_mbox, config_profile, max_regions, 0x2C, 0, 16); + +/* cmd_mbox_config_profile_max_flood_tables + * Maximum number of single-entry flooding tables. Different flooding tables + * can be associated with different packet types. + */ +MLXSW_ITEM32(cmd_mbox, config_profile, max_flood_tables, 0x30, 16, 4); + +/* cmd_mbox_config_profile_max_vid_flood_tables + * Maximum number of per-vid flooding tables. Flooding tables are associated + * to the different packet types for the different switch partitions. + * Table size is 4K entries covering all VID space. + */ +MLXSW_ITEM32(cmd_mbox, config_profile, max_vid_flood_tables, 0x30, 8, 4); + +enum mlxsw_cmd_mbox_config_profile_flood_mode { + /* Mixed mode, where: + * max_flood_tables indicates the number of single-entry tables. + * max_vid_flood_tables indicates the number of per-VID tables. + * max_fid_offset_flood_tables indicates the number of FID-offset + * tables. max_fid_flood_tables indicates the number of per-FID tables. + * Reserved when unified bridge model is used. + */ + MLXSW_CMD_MBOX_CONFIG_PROFILE_FLOOD_MODE_MIXED = 3, + /* Controlled flood tables. Reserved when legacy bridge model is + * used. + */ + MLXSW_CMD_MBOX_CONFIG_PROFILE_FLOOD_MODE_CONTROLLED = 4, +}; + +/* cmd_mbox_config_profile_flood_mode + * Flooding mode to use. + */ +MLXSW_ITEM32(cmd_mbox, config_profile, flood_mode, 0x30, 0, 3); + +/* cmd_mbox_config_profile_max_fid_offset_flood_tables + * Maximum number of FID-offset flooding tables. + */ +MLXSW_ITEM32(cmd_mbox, config_profile, + max_fid_offset_flood_tables, 0x34, 24, 4); + +/* cmd_mbox_config_profile_fid_offset_flood_table_size + * The size (number of entries) of each FID-offset flood table. + */ +MLXSW_ITEM32(cmd_mbox, config_profile, + fid_offset_flood_table_size, 0x34, 0, 16); + +/* cmd_mbox_config_profile_max_fid_flood_tables + * Maximum number of per-FID flooding tables. + * + * Note: This flooding tables cover special FIDs only (vFIDs), starting at + * FID value 4K and higher. + */ +MLXSW_ITEM32(cmd_mbox, config_profile, max_fid_flood_tables, 0x38, 24, 4); + +/* cmd_mbox_config_profile_fid_flood_table_size + * The size (number of entries) of each per-FID table. + */ +MLXSW_ITEM32(cmd_mbox, config_profile, fid_flood_table_size, 0x38, 0, 16); + +/* cmd_mbox_config_profile_max_ib_mc + * Maximum number of multicast FDB records for InfiniBand + * FDB (in 512 chunks) per InfiniBand switch partition. + */ +MLXSW_ITEM32(cmd_mbox, config_profile, max_ib_mc, 0x40, 0, 15); + +/* cmd_mbox_config_profile_max_pkey + * Maximum per port PKEY table size (for PKEY enforcement) + */ +MLXSW_ITEM32(cmd_mbox, config_profile, max_pkey, 0x44, 0, 15); + +/* cmd_mbox_config_profile_ar_sec + * Primary/secondary capability + * Describes the number of adaptive routing sub-groups + * 0 - disable primary/secondary (single group) + * 1 - enable primary/secondary (2 sub-groups) + * 2 - 3 sub-groups: Not supported in SwitchX, SwitchX-2 + * 3 - 4 sub-groups: Not supported in SwitchX, SwitchX-2 + */ +MLXSW_ITEM32(cmd_mbox, config_profile, ar_sec, 0x4C, 24, 2); + +/* cmd_mbox_config_profile_adaptive_routing_group_cap + * Adaptive Routing Group Capability. Indicates the number of AR groups + * supported. Note that when Primary/secondary is enabled, each + * primary/secondary couple consumes 2 adaptive routing entries. + */ +MLXSW_ITEM32(cmd_mbox, config_profile, adaptive_routing_group_cap, 0x4C, 0, 16); + +/* cmd_mbox_config_profile_arn + * Adaptive Routing Notification Enable + * Not supported in SwitchX, SwitchX-2 + */ +MLXSW_ITEM32(cmd_mbox, config_profile, arn, 0x50, 31, 1); + +/* cmd_mbox_config_profile_ubridge + * Unified Bridge + * 0 - non unified bridge + * 1 - unified bridge + */ +MLXSW_ITEM32(cmd_mbox, config_profile, ubridge, 0x50, 4, 1); + +/* cmd_mbox_config_kvd_linear_size + * KVD Linear Size + * Valid for Spectrum only + * Allowed values are 128*N where N=0 or higher + */ +MLXSW_ITEM32(cmd_mbox, config_profile, kvd_linear_size, 0x54, 0, 24); + +/* cmd_mbox_config_kvd_hash_single_size + * KVD Hash single-entries size + * Valid for Spectrum only + * Allowed values are 128*N where N=0 or higher + * Must be greater or equal to cap_min_kvd_hash_single_size + * Must be smaller or equal to cap_kvd_size - kvd_linear_size + */ +MLXSW_ITEM32(cmd_mbox, config_profile, kvd_hash_single_size, 0x58, 0, 24); + +/* cmd_mbox_config_kvd_hash_double_size + * KVD Hash double-entries size (units of single-size entries) + * Valid for Spectrum only + * Allowed values are 128*N where N=0 or higher + * Must be either 0 or greater or equal to cap_min_kvd_hash_double_size + * Must be smaller or equal to cap_kvd_size - kvd_linear_size + */ +MLXSW_ITEM32(cmd_mbox, config_profile, kvd_hash_double_size, 0x5C, 0, 24); + +/* cmd_mbox_config_profile_swid_config_mask + * Modify Switch Partition Configuration mask. When set, the configu- + * ration value for the Switch Partition are taken from the mailbox. + * When clear, the current configuration values are used. + * Bit 0 - set type + * Bit 1 - properties + * Other - reserved + */ +MLXSW_ITEM32_INDEXED(cmd_mbox, config_profile, swid_config_mask, + 0x60, 24, 8, 0x08, 0x00, false); + +/* cmd_mbox_config_profile_swid_config_type + * Switch Partition type. + * 0000 - disabled (Switch Partition does not exist) + * 0001 - InfiniBand + * 0010 - Ethernet + * 1000 - router port (SwitchX-2 only) + * Other - reserved + */ +MLXSW_ITEM32_INDEXED(cmd_mbox, config_profile, swid_config_type, + 0x60, 20, 4, 0x08, 0x00, false); + +/* cmd_mbox_config_profile_swid_config_properties + * Switch Partition properties. + */ +MLXSW_ITEM32_INDEXED(cmd_mbox, config_profile, swid_config_properties, + 0x60, 0, 8, 0x08, 0x00, false); + +enum mlxsw_cmd_mbox_config_profile_cqe_time_stamp_type { + /* uSec - 1.024uSec (default). Only bits 15:0 are valid. */ + MLXSW_CMD_MBOX_CONFIG_PROFILE_CQE_TIME_STAMP_TYPE_USEC, + /* FRC - Free Running Clock, units of 1nSec. + * Reserved when SwitchX/-2, Switch-IB/2 and Spectrum-1. + */ + MLXSW_CMD_MBOX_CONFIG_PROFILE_CQE_TIME_STAMP_TYPE_FRC, + /* UTC. time_stamp[37:30] = Sec, time_stamp[29:0] = nSec. + * Reserved when SwitchX/2, Switch-IB/2 and Spectrum-1. + */ + MLXSW_CMD_MBOX_CONFIG_PROFILE_CQE_TIME_STAMP_TYPE_UTC, +}; + +/* cmd_mbox_config_profile_cqe_time_stamp_type + * CQE time_stamp_type for non-mirror-packets. + * Configured if set_cqe_time_stamp_type is set. + * Reserved when SwitchX/-2, Switch-IB/2 and Spectrum-1. + */ +MLXSW_ITEM32(cmd_mbox, config_profile, cqe_time_stamp_type, 0xB0, 8, 2); + +/* cmd_mbox_config_profile_cqe_version + * CQE version: + * 0: CQE version is 0 + * 1: CQE version is either 1 or 2 + * CQE ver 1 or 2 is configured by Completion Queue Context field cqe_ver. + */ +MLXSW_ITEM32(cmd_mbox, config_profile, cqe_version, 0xB0, 0, 8); + +/* ACCESS_REG - Access EMAD Supported Register + * ---------------------------------- + * OpMod == 0 (N/A), INMmod == 0 (N/A) + * ------------------------------------- + * The ACCESS_REG command supports accessing device registers. This access + * is mainly used for bootstrapping. + */ + +static inline int mlxsw_cmd_access_reg(struct mlxsw_core *mlxsw_core, + bool reset_ok, + char *in_mbox, char *out_mbox) +{ + return mlxsw_cmd_exec(mlxsw_core, MLXSW_CMD_OPCODE_ACCESS_REG, + 0, 0, false, reset_ok, + in_mbox, MLXSW_CMD_MBOX_SIZE, + out_mbox, MLXSW_CMD_MBOX_SIZE); +} + +/* SW2HW_DQ - Software to Hardware DQ + * ---------------------------------- + * OpMod == 0 (send DQ) / OpMod == 1 (receive DQ) + * INMmod == DQ number + * ---------------------------------------------- + * The SW2HW_DQ command transitions a descriptor queue from software to + * hardware ownership. The command enables posting WQEs and ringing DoorBells + * on the descriptor queue. + */ + +static inline int __mlxsw_cmd_sw2hw_dq(struct mlxsw_core *mlxsw_core, + char *in_mbox, u32 dq_number, + u8 opcode_mod) +{ + return mlxsw_cmd_exec_in(mlxsw_core, MLXSW_CMD_OPCODE_SW2HW_DQ, + opcode_mod, dq_number, + in_mbox, MLXSW_CMD_MBOX_SIZE); +} + +enum { + MLXSW_CMD_OPCODE_MOD_SDQ = 0, + MLXSW_CMD_OPCODE_MOD_RDQ = 1, +}; + +static inline int mlxsw_cmd_sw2hw_sdq(struct mlxsw_core *mlxsw_core, + char *in_mbox, u32 dq_number) +{ + return __mlxsw_cmd_sw2hw_dq(mlxsw_core, in_mbox, dq_number, + MLXSW_CMD_OPCODE_MOD_SDQ); +} + +static inline int mlxsw_cmd_sw2hw_rdq(struct mlxsw_core *mlxsw_core, + char *in_mbox, u32 dq_number) +{ + return __mlxsw_cmd_sw2hw_dq(mlxsw_core, in_mbox, dq_number, + MLXSW_CMD_OPCODE_MOD_RDQ); +} + +/* cmd_mbox_sw2hw_dq_cq + * Number of the CQ that this Descriptor Queue reports completions to. + */ +MLXSW_ITEM32(cmd_mbox, sw2hw_dq, cq, 0x00, 24, 8); + +enum mlxsw_cmd_mbox_sw2hw_dq_sdq_lp { + MLXSW_CMD_MBOX_SW2HW_DQ_SDQ_LP_WQE, + MLXSW_CMD_MBOX_SW2HW_DQ_SDQ_LP_IGNORE_WQE, +}; + +/* cmd_mbox_sw2hw_dq_sdq_lp + * SDQ local Processing + * 0: local processing by wqe.lp + * 1: local processing (ignoring wqe.lp) + */ +MLXSW_ITEM32(cmd_mbox, sw2hw_dq, sdq_lp, 0x00, 23, 1); + +/* cmd_mbox_sw2hw_dq_sdq_tclass + * SDQ: CPU Egress TClass + * RDQ: Reserved + */ +MLXSW_ITEM32(cmd_mbox, sw2hw_dq, sdq_tclass, 0x00, 16, 6); + +/* cmd_mbox_sw2hw_dq_log2_dq_sz + * Log (base 2) of the Descriptor Queue size in 4KB pages. + */ +MLXSW_ITEM32(cmd_mbox, sw2hw_dq, log2_dq_sz, 0x00, 0, 6); + +/* cmd_mbox_sw2hw_dq_pa + * Physical Address. + */ +MLXSW_ITEM64_INDEXED(cmd_mbox, sw2hw_dq, pa, 0x10, 12, 52, 0x08, 0x00, true); + +/* HW2SW_DQ - Hardware to Software DQ + * ---------------------------------- + * OpMod == 0 (send DQ) / OpMod == 1 (receive DQ) + * INMmod == DQ number + * ---------------------------------------------- + * The HW2SW_DQ command transitions a descriptor queue from hardware to + * software ownership. Incoming packets on the DQ are silently discarded, + * SW should not post descriptors on nonoperational DQs. + */ + +static inline int __mlxsw_cmd_hw2sw_dq(struct mlxsw_core *mlxsw_core, + u32 dq_number, u8 opcode_mod) +{ + return mlxsw_cmd_exec_none(mlxsw_core, MLXSW_CMD_OPCODE_HW2SW_DQ, + opcode_mod, dq_number); +} + +static inline int mlxsw_cmd_hw2sw_sdq(struct mlxsw_core *mlxsw_core, + u32 dq_number) +{ + return __mlxsw_cmd_hw2sw_dq(mlxsw_core, dq_number, + MLXSW_CMD_OPCODE_MOD_SDQ); +} + +static inline int mlxsw_cmd_hw2sw_rdq(struct mlxsw_core *mlxsw_core, + u32 dq_number) +{ + return __mlxsw_cmd_hw2sw_dq(mlxsw_core, dq_number, + MLXSW_CMD_OPCODE_MOD_RDQ); +} + +/* 2ERR_DQ - To Error DQ + * --------------------- + * OpMod == 0 (send DQ) / OpMod == 1 (receive DQ) + * INMmod == DQ number + * ---------------------------------------------- + * The 2ERR_DQ command transitions the DQ into the error state from the state + * in which it has been. While the command is executed, some in-process + * descriptors may complete. Once the DQ transitions into the error state, + * if there are posted descriptors on the RDQ/SDQ, the hardware writes + * a completion with error (flushed) for all descriptors posted in the RDQ/SDQ. + * When the command is completed successfully, the DQ is already in + * the error state. + */ + +static inline int __mlxsw_cmd_2err_dq(struct mlxsw_core *mlxsw_core, + u32 dq_number, u8 opcode_mod) +{ + return mlxsw_cmd_exec_none(mlxsw_core, MLXSW_CMD_OPCODE_2ERR_DQ, + opcode_mod, dq_number); +} + +static inline int mlxsw_cmd_2err_sdq(struct mlxsw_core *mlxsw_core, + u32 dq_number) +{ + return __mlxsw_cmd_2err_dq(mlxsw_core, dq_number, + MLXSW_CMD_OPCODE_MOD_SDQ); +} + +static inline int mlxsw_cmd_2err_rdq(struct mlxsw_core *mlxsw_core, + u32 dq_number) +{ + return __mlxsw_cmd_2err_dq(mlxsw_core, dq_number, + MLXSW_CMD_OPCODE_MOD_RDQ); +} + +/* QUERY_DQ - Query DQ + * --------------------- + * OpMod == 0 (send DQ) / OpMod == 1 (receive DQ) + * INMmod == DQ number + * ---------------------------------------------- + * The QUERY_DQ command retrieves a snapshot of DQ parameters from the hardware. + * + * Note: Output mailbox has the same format as SW2HW_DQ. + */ + +static inline int __mlxsw_cmd_query_dq(struct mlxsw_core *mlxsw_core, + char *out_mbox, u32 dq_number, + u8 opcode_mod) +{ + return mlxsw_cmd_exec_out(mlxsw_core, MLXSW_CMD_OPCODE_2ERR_DQ, + opcode_mod, dq_number, false, + out_mbox, MLXSW_CMD_MBOX_SIZE); +} + +static inline int mlxsw_cmd_query_sdq(struct mlxsw_core *mlxsw_core, + char *out_mbox, u32 dq_number) +{ + return __mlxsw_cmd_query_dq(mlxsw_core, out_mbox, dq_number, + MLXSW_CMD_OPCODE_MOD_SDQ); +} + +static inline int mlxsw_cmd_query_rdq(struct mlxsw_core *mlxsw_core, + char *out_mbox, u32 dq_number) +{ + return __mlxsw_cmd_query_dq(mlxsw_core, out_mbox, dq_number, + MLXSW_CMD_OPCODE_MOD_RDQ); +} + +/* SW2HW_CQ - Software to Hardware CQ + * ---------------------------------- + * OpMod == 0 (N/A), INMmod == CQ number + * ------------------------------------- + * The SW2HW_CQ command transfers ownership of a CQ context entry from software + * to hardware. The command takes the CQ context entry from the input mailbox + * and stores it in the CQC in the ownership of the hardware. The command fails + * if the requested CQC entry is already in the ownership of the hardware. + */ + +static inline int mlxsw_cmd_sw2hw_cq(struct mlxsw_core *mlxsw_core, + char *in_mbox, u32 cq_number) +{ + return mlxsw_cmd_exec_in(mlxsw_core, MLXSW_CMD_OPCODE_SW2HW_CQ, + 0, cq_number, in_mbox, MLXSW_CMD_MBOX_SIZE); +} + +enum mlxsw_cmd_mbox_sw2hw_cq_cqe_ver { + MLXSW_CMD_MBOX_SW2HW_CQ_CQE_VER_1, + MLXSW_CMD_MBOX_SW2HW_CQ_CQE_VER_2, +}; + +/* cmd_mbox_sw2hw_cq_cqe_ver + * CQE Version. + */ +MLXSW_ITEM32(cmd_mbox, sw2hw_cq, cqe_ver, 0x00, 28, 4); + +/* cmd_mbox_sw2hw_cq_c_eqn + * Event Queue this CQ reports completion events to. + */ +MLXSW_ITEM32(cmd_mbox, sw2hw_cq, c_eqn, 0x00, 24, 1); + +/* cmd_mbox_sw2hw_cq_st + * Event delivery state machine + * 0x0 - FIRED + * 0x1 - ARMED (Request for Notification) + */ +MLXSW_ITEM32(cmd_mbox, sw2hw_cq, st, 0x00, 8, 1); + +/* cmd_mbox_sw2hw_cq_log_cq_size + * Log (base 2) of the CQ size (in entries). + */ +MLXSW_ITEM32(cmd_mbox, sw2hw_cq, log_cq_size, 0x00, 0, 4); + +/* cmd_mbox_sw2hw_cq_producer_counter + * Producer Counter. The counter is incremented for each CQE that is + * written by the HW to the CQ. + * Maintained by HW (valid for the QUERY_CQ command only) + */ +MLXSW_ITEM32(cmd_mbox, sw2hw_cq, producer_counter, 0x04, 0, 16); + +/* cmd_mbox_sw2hw_cq_pa + * Physical Address. + */ +MLXSW_ITEM64_INDEXED(cmd_mbox, sw2hw_cq, pa, 0x10, 11, 53, 0x08, 0x00, true); + +/* HW2SW_CQ - Hardware to Software CQ + * ---------------------------------- + * OpMod == 0 (N/A), INMmod == CQ number + * ------------------------------------- + * The HW2SW_CQ command transfers ownership of a CQ context entry from hardware + * to software. The CQC entry is invalidated as a result of this command. + */ + +static inline int mlxsw_cmd_hw2sw_cq(struct mlxsw_core *mlxsw_core, + u32 cq_number) +{ + return mlxsw_cmd_exec_none(mlxsw_core, MLXSW_CMD_OPCODE_HW2SW_CQ, + 0, cq_number); +} + +/* QUERY_CQ - Query CQ + * ---------------------------------- + * OpMod == 0 (N/A), INMmod == CQ number + * ------------------------------------- + * The QUERY_CQ command retrieves a snapshot of the current CQ context entry. + * The command stores the snapshot in the output mailbox in the software format. + * Note that the CQ context state and values are not affected by the QUERY_CQ + * command. The QUERY_CQ command is for debug purposes only. + * + * Note: Output mailbox has the same format as SW2HW_CQ. + */ + +static inline int mlxsw_cmd_query_cq(struct mlxsw_core *mlxsw_core, + char *out_mbox, u32 cq_number) +{ + return mlxsw_cmd_exec_out(mlxsw_core, MLXSW_CMD_OPCODE_QUERY_CQ, + 0, cq_number, false, + out_mbox, MLXSW_CMD_MBOX_SIZE); +} + +/* SW2HW_EQ - Software to Hardware EQ + * ---------------------------------- + * OpMod == 0 (N/A), INMmod == EQ number + * ------------------------------------- + * The SW2HW_EQ command transfers ownership of an EQ context entry from software + * to hardware. The command takes the EQ context entry from the input mailbox + * and stores it in the EQC in the ownership of the hardware. The command fails + * if the requested EQC entry is already in the ownership of the hardware. + */ + +static inline int mlxsw_cmd_sw2hw_eq(struct mlxsw_core *mlxsw_core, + char *in_mbox, u32 eq_number) +{ + return mlxsw_cmd_exec_in(mlxsw_core, MLXSW_CMD_OPCODE_SW2HW_EQ, + 0, eq_number, in_mbox, MLXSW_CMD_MBOX_SIZE); +} + +/* cmd_mbox_sw2hw_eq_int_msix + * When set, MSI-X cycles will be generated by this EQ. + * When cleared, an interrupt will be generated by this EQ. + */ +MLXSW_ITEM32(cmd_mbox, sw2hw_eq, int_msix, 0x00, 24, 1); + +/* cmd_mbox_sw2hw_eq_st + * Event delivery state machine + * 0x0 - FIRED + * 0x1 - ARMED (Request for Notification) + * 0x11 - Always ARMED + * other - reserved + */ +MLXSW_ITEM32(cmd_mbox, sw2hw_eq, st, 0x00, 8, 2); + +/* cmd_mbox_sw2hw_eq_log_eq_size + * Log (base 2) of the EQ size (in entries). + */ +MLXSW_ITEM32(cmd_mbox, sw2hw_eq, log_eq_size, 0x00, 0, 4); + +/* cmd_mbox_sw2hw_eq_producer_counter + * Producer Counter. The counter is incremented for each EQE that is written + * by the HW to the EQ. + * Maintained by HW (valid for the QUERY_EQ command only) + */ +MLXSW_ITEM32(cmd_mbox, sw2hw_eq, producer_counter, 0x04, 0, 16); + +/* cmd_mbox_sw2hw_eq_pa + * Physical Address. + */ +MLXSW_ITEM64_INDEXED(cmd_mbox, sw2hw_eq, pa, 0x10, 11, 53, 0x08, 0x00, true); + +/* HW2SW_EQ - Hardware to Software EQ + * ---------------------------------- + * OpMod == 0 (N/A), INMmod == EQ number + * ------------------------------------- + */ + +static inline int mlxsw_cmd_hw2sw_eq(struct mlxsw_core *mlxsw_core, + u32 eq_number) +{ + return mlxsw_cmd_exec_none(mlxsw_core, MLXSW_CMD_OPCODE_HW2SW_EQ, + 0, eq_number); +} + +/* QUERY_EQ - Query EQ + * ---------------------------------- + * OpMod == 0 (N/A), INMmod == EQ number + * ------------------------------------- + * + * Note: Output mailbox has the same format as SW2HW_EQ. + */ + +static inline int mlxsw_cmd_query_eq(struct mlxsw_core *mlxsw_core, + char *out_mbox, u32 eq_number) +{ + return mlxsw_cmd_exec_out(mlxsw_core, MLXSW_CMD_OPCODE_QUERY_EQ, + 0, eq_number, false, + out_mbox, MLXSW_CMD_MBOX_SIZE); +} + +#endif diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c new file mode 100644 index 000000000..e2a985ec2 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/core.c @@ -0,0 +1,3443 @@ +// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 +/* Copyright (c) 2015-2018 Mellanox Technologies. All rights reserved */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/device.h> +#include <linux/export.h> +#include <linux/err.h> +#include <linux/if_link.h> +#include <linux/netdevice.h> +#include <linux/completion.h> +#include <linux/skbuff.h> +#include <linux/etherdevice.h> +#include <linux/types.h> +#include <linux/string.h> +#include <linux/gfp.h> +#include <linux/random.h> +#include <linux/jiffies.h> +#include <linux/mutex.h> +#include <linux/rcupdate.h> +#include <linux/slab.h> +#include <linux/workqueue.h> +#include <linux/firmware.h> +#include <asm/byteorder.h> +#include <net/devlink.h> +#include <trace/events/devlink.h> + +#include "core.h" +#include "core_env.h" +#include "item.h" +#include "cmd.h" +#include "port.h" +#include "trap.h" +#include "emad.h" +#include "reg.h" +#include "resources.h" +#include "../mlxfw/mlxfw.h" + +static LIST_HEAD(mlxsw_core_driver_list); +static DEFINE_SPINLOCK(mlxsw_core_driver_list_lock); + +static const char mlxsw_core_driver_name[] = "mlxsw_core"; + +static struct workqueue_struct *mlxsw_wq; +static struct workqueue_struct *mlxsw_owq; + +struct mlxsw_core_port { + struct devlink_port devlink_port; + void *port_driver_priv; + u16 local_port; + struct mlxsw_linecard *linecard; +}; + +void *mlxsw_core_port_driver_priv(struct mlxsw_core_port *mlxsw_core_port) +{ + return mlxsw_core_port->port_driver_priv; +} +EXPORT_SYMBOL(mlxsw_core_port_driver_priv); + +static bool mlxsw_core_port_check(struct mlxsw_core_port *mlxsw_core_port) +{ + return mlxsw_core_port->port_driver_priv != NULL; +} + +struct mlxsw_core { + struct mlxsw_driver *driver; + const struct mlxsw_bus *bus; + void *bus_priv; + const struct mlxsw_bus_info *bus_info; + struct workqueue_struct *emad_wq; + struct list_head rx_listener_list; + struct list_head event_listener_list; + struct list_head irq_event_handler_list; + struct mutex irq_event_handler_lock; /* Locks access to handlers list */ + struct { + atomic64_t tid; + struct list_head trans_list; + spinlock_t trans_list_lock; /* protects trans_list writes */ + bool use_emad; + bool enable_string_tlv; + } emad; + struct { + u16 *mapping; /* lag_id+port_index to local_port mapping */ + } lag; + struct mlxsw_res res; + struct mlxsw_hwmon *hwmon; + struct mlxsw_thermal *thermal; + struct mlxsw_linecards *linecards; + struct mlxsw_core_port *ports; + unsigned int max_ports; + atomic_t active_ports_count; + bool fw_flash_in_progress; + struct { + struct devlink_health_reporter *fw_fatal; + } health; + struct mlxsw_env *env; + unsigned long driver_priv[]; + /* driver_priv has to be always the last item */ +}; + +struct mlxsw_linecards *mlxsw_core_linecards(struct mlxsw_core *mlxsw_core) +{ + return mlxsw_core->linecards; +} + +void mlxsw_core_linecards_set(struct mlxsw_core *mlxsw_core, + struct mlxsw_linecards *linecards) +{ + mlxsw_core->linecards = linecards; +} + +#define MLXSW_PORT_MAX_PORTS_DEFAULT 0x40 + +static u64 mlxsw_ports_occ_get(void *priv) +{ + struct mlxsw_core *mlxsw_core = priv; + + return atomic_read(&mlxsw_core->active_ports_count); +} + +static int mlxsw_core_resources_ports_register(struct mlxsw_core *mlxsw_core) +{ + struct devlink *devlink = priv_to_devlink(mlxsw_core); + struct devlink_resource_size_params ports_num_params; + u32 max_ports; + + max_ports = mlxsw_core->max_ports - 1; + devlink_resource_size_params_init(&ports_num_params, max_ports, + max_ports, 1, + DEVLINK_RESOURCE_UNIT_ENTRY); + + return devl_resource_register(devlink, + DEVLINK_RESOURCE_GENERIC_NAME_PORTS, + max_ports, MLXSW_CORE_RESOURCE_PORTS, + DEVLINK_RESOURCE_ID_PARENT_TOP, + &ports_num_params); +} + +static int mlxsw_ports_init(struct mlxsw_core *mlxsw_core, bool reload) +{ + struct devlink *devlink = priv_to_devlink(mlxsw_core); + int err; + + /* Switch ports are numbered from 1 to queried value */ + if (MLXSW_CORE_RES_VALID(mlxsw_core, MAX_SYSTEM_PORT)) + mlxsw_core->max_ports = MLXSW_CORE_RES_GET(mlxsw_core, + MAX_SYSTEM_PORT) + 1; + else + mlxsw_core->max_ports = MLXSW_PORT_MAX_PORTS_DEFAULT + 1; + + mlxsw_core->ports = kcalloc(mlxsw_core->max_ports, + sizeof(struct mlxsw_core_port), GFP_KERNEL); + if (!mlxsw_core->ports) + return -ENOMEM; + + if (!reload) { + err = mlxsw_core_resources_ports_register(mlxsw_core); + if (err) + goto err_resources_ports_register; + } + atomic_set(&mlxsw_core->active_ports_count, 0); + devl_resource_occ_get_register(devlink, MLXSW_CORE_RESOURCE_PORTS, + mlxsw_ports_occ_get, mlxsw_core); + + return 0; + +err_resources_ports_register: + kfree(mlxsw_core->ports); + return err; +} + +static void mlxsw_ports_fini(struct mlxsw_core *mlxsw_core, bool reload) +{ + struct devlink *devlink = priv_to_devlink(mlxsw_core); + + devl_resource_occ_get_unregister(devlink, MLXSW_CORE_RESOURCE_PORTS); + if (!reload) + devl_resources_unregister(priv_to_devlink(mlxsw_core)); + + kfree(mlxsw_core->ports); +} + +unsigned int mlxsw_core_max_ports(const struct mlxsw_core *mlxsw_core) +{ + return mlxsw_core->max_ports; +} +EXPORT_SYMBOL(mlxsw_core_max_ports); + +int mlxsw_core_max_lag(struct mlxsw_core *mlxsw_core, u16 *p_max_lag) +{ + struct mlxsw_driver *driver = mlxsw_core->driver; + + if (driver->profile->used_max_lag) { + *p_max_lag = driver->profile->max_lag; + return 0; + } + + if (!MLXSW_CORE_RES_VALID(mlxsw_core, MAX_LAG)) + return -EIO; + + *p_max_lag = MLXSW_CORE_RES_GET(mlxsw_core, MAX_LAG); + return 0; +} +EXPORT_SYMBOL(mlxsw_core_max_lag); + +void *mlxsw_core_driver_priv(struct mlxsw_core *mlxsw_core) +{ + return mlxsw_core->driver_priv; +} +EXPORT_SYMBOL(mlxsw_core_driver_priv); + +bool +mlxsw_core_fw_rev_minor_subminor_validate(const struct mlxsw_fw_rev *rev, + const struct mlxsw_fw_rev *req_rev) +{ + return rev->minor > req_rev->minor || + (rev->minor == req_rev->minor && + rev->subminor >= req_rev->subminor); +} +EXPORT_SYMBOL(mlxsw_core_fw_rev_minor_subminor_validate); + +struct mlxsw_rx_listener_item { + struct list_head list; + struct mlxsw_rx_listener rxl; + void *priv; + bool enabled; +}; + +struct mlxsw_event_listener_item { + struct list_head list; + struct mlxsw_core *mlxsw_core; + struct mlxsw_event_listener el; + void *priv; +}; + +static const u8 mlxsw_core_trap_groups[] = { + MLXSW_REG_HTGT_TRAP_GROUP_EMAD, + MLXSW_REG_HTGT_TRAP_GROUP_CORE_EVENT, +}; + +static int mlxsw_core_trap_groups_set(struct mlxsw_core *mlxsw_core) +{ + char htgt_pl[MLXSW_REG_HTGT_LEN]; + int err; + int i; + + if (!(mlxsw_core->bus->features & MLXSW_BUS_F_TXRX)) + return 0; + + for (i = 0; i < ARRAY_SIZE(mlxsw_core_trap_groups); i++) { + mlxsw_reg_htgt_pack(htgt_pl, mlxsw_core_trap_groups[i], + MLXSW_REG_HTGT_INVALID_POLICER, + MLXSW_REG_HTGT_DEFAULT_PRIORITY, + MLXSW_REG_HTGT_DEFAULT_TC); + err = mlxsw_reg_write(mlxsw_core, MLXSW_REG(htgt), htgt_pl); + if (err) + return err; + } + return 0; +} + +/****************** + * EMAD processing + ******************/ + +/* emad_eth_hdr_dmac + * Destination MAC in EMAD's Ethernet header. + * Must be set to 01:02:c9:00:00:01 + */ +MLXSW_ITEM_BUF(emad, eth_hdr, dmac, 0x00, 6); + +/* emad_eth_hdr_smac + * Source MAC in EMAD's Ethernet header. + * Must be set to 00:02:c9:01:02:03 + */ +MLXSW_ITEM_BUF(emad, eth_hdr, smac, 0x06, 6); + +/* emad_eth_hdr_ethertype + * Ethertype in EMAD's Ethernet header. + * Must be set to 0x8932 + */ +MLXSW_ITEM32(emad, eth_hdr, ethertype, 0x0C, 16, 16); + +/* emad_eth_hdr_mlx_proto + * Mellanox protocol. + * Must be set to 0x0. + */ +MLXSW_ITEM32(emad, eth_hdr, mlx_proto, 0x0C, 8, 8); + +/* emad_eth_hdr_ver + * Mellanox protocol version. + * Must be set to 0x0. + */ +MLXSW_ITEM32(emad, eth_hdr, ver, 0x0C, 4, 4); + +/* emad_op_tlv_type + * Type of the TLV. + * Must be set to 0x1 (operation TLV). + */ +MLXSW_ITEM32(emad, op_tlv, type, 0x00, 27, 5); + +/* emad_op_tlv_len + * Length of the operation TLV in u32. + * Must be set to 0x4. + */ +MLXSW_ITEM32(emad, op_tlv, len, 0x00, 16, 11); + +/* emad_op_tlv_dr + * Direct route bit. Setting to 1 indicates the EMAD is a direct route + * EMAD. DR TLV must follow. + * + * Note: Currently not supported and must not be set. + */ +MLXSW_ITEM32(emad, op_tlv, dr, 0x00, 15, 1); + +/* emad_op_tlv_status + * Returned status in case of EMAD response. Must be set to 0 in case + * of EMAD request. + * 0x0 - success + * 0x1 - device is busy. Requester should retry + * 0x2 - Mellanox protocol version not supported + * 0x3 - unknown TLV + * 0x4 - register not supported + * 0x5 - operation class not supported + * 0x6 - EMAD method not supported + * 0x7 - bad parameter (e.g. port out of range) + * 0x8 - resource not available + * 0x9 - message receipt acknowledgment. Requester should retry + * 0x70 - internal error + */ +MLXSW_ITEM32(emad, op_tlv, status, 0x00, 8, 7); + +/* emad_op_tlv_register_id + * Register ID of register within register TLV. + */ +MLXSW_ITEM32(emad, op_tlv, register_id, 0x04, 16, 16); + +/* emad_op_tlv_r + * Response bit. Setting to 1 indicates Response, otherwise request. + */ +MLXSW_ITEM32(emad, op_tlv, r, 0x04, 15, 1); + +/* emad_op_tlv_method + * EMAD method type. + * 0x1 - query + * 0x2 - write + * 0x3 - send (currently not supported) + * 0x4 - event + */ +MLXSW_ITEM32(emad, op_tlv, method, 0x04, 8, 7); + +/* emad_op_tlv_class + * EMAD operation class. Must be set to 0x1 (REG_ACCESS). + */ +MLXSW_ITEM32(emad, op_tlv, class, 0x04, 0, 8); + +/* emad_op_tlv_tid + * EMAD transaction ID. Used for pairing request and response EMADs. + */ +MLXSW_ITEM64(emad, op_tlv, tid, 0x08, 0, 64); + +/* emad_string_tlv_type + * Type of the TLV. + * Must be set to 0x2 (string TLV). + */ +MLXSW_ITEM32(emad, string_tlv, type, 0x00, 27, 5); + +/* emad_string_tlv_len + * Length of the string TLV in u32. + */ +MLXSW_ITEM32(emad, string_tlv, len, 0x00, 16, 11); + +#define MLXSW_EMAD_STRING_TLV_STRING_LEN 128 + +/* emad_string_tlv_string + * String provided by the device's firmware in case of erroneous register access + */ +MLXSW_ITEM_BUF(emad, string_tlv, string, 0x04, + MLXSW_EMAD_STRING_TLV_STRING_LEN); + +/* emad_reg_tlv_type + * Type of the TLV. + * Must be set to 0x3 (register TLV). + */ +MLXSW_ITEM32(emad, reg_tlv, type, 0x00, 27, 5); + +/* emad_reg_tlv_len + * Length of the operation TLV in u32. + */ +MLXSW_ITEM32(emad, reg_tlv, len, 0x00, 16, 11); + +/* emad_end_tlv_type + * Type of the TLV. + * Must be set to 0x0 (end TLV). + */ +MLXSW_ITEM32(emad, end_tlv, type, 0x00, 27, 5); + +/* emad_end_tlv_len + * Length of the end TLV in u32. + * Must be set to 1. + */ +MLXSW_ITEM32(emad, end_tlv, len, 0x00, 16, 11); + +enum mlxsw_core_reg_access_type { + MLXSW_CORE_REG_ACCESS_TYPE_QUERY, + MLXSW_CORE_REG_ACCESS_TYPE_WRITE, +}; + +static inline const char * +mlxsw_core_reg_access_type_str(enum mlxsw_core_reg_access_type type) +{ + switch (type) { + case MLXSW_CORE_REG_ACCESS_TYPE_QUERY: + return "query"; + case MLXSW_CORE_REG_ACCESS_TYPE_WRITE: + return "write"; + } + BUG(); +} + +static void mlxsw_emad_pack_end_tlv(char *end_tlv) +{ + mlxsw_emad_end_tlv_type_set(end_tlv, MLXSW_EMAD_TLV_TYPE_END); + mlxsw_emad_end_tlv_len_set(end_tlv, MLXSW_EMAD_END_TLV_LEN); +} + +static void mlxsw_emad_pack_reg_tlv(char *reg_tlv, + const struct mlxsw_reg_info *reg, + char *payload) +{ + mlxsw_emad_reg_tlv_type_set(reg_tlv, MLXSW_EMAD_TLV_TYPE_REG); + mlxsw_emad_reg_tlv_len_set(reg_tlv, reg->len / sizeof(u32) + 1); + memcpy(reg_tlv + sizeof(u32), payload, reg->len); +} + +static void mlxsw_emad_pack_string_tlv(char *string_tlv) +{ + mlxsw_emad_string_tlv_type_set(string_tlv, MLXSW_EMAD_TLV_TYPE_STRING); + mlxsw_emad_string_tlv_len_set(string_tlv, MLXSW_EMAD_STRING_TLV_LEN); +} + +static void mlxsw_emad_pack_op_tlv(char *op_tlv, + const struct mlxsw_reg_info *reg, + enum mlxsw_core_reg_access_type type, + u64 tid) +{ + mlxsw_emad_op_tlv_type_set(op_tlv, MLXSW_EMAD_TLV_TYPE_OP); + mlxsw_emad_op_tlv_len_set(op_tlv, MLXSW_EMAD_OP_TLV_LEN); + mlxsw_emad_op_tlv_dr_set(op_tlv, 0); + mlxsw_emad_op_tlv_status_set(op_tlv, 0); + mlxsw_emad_op_tlv_register_id_set(op_tlv, reg->id); + mlxsw_emad_op_tlv_r_set(op_tlv, MLXSW_EMAD_OP_TLV_REQUEST); + if (type == MLXSW_CORE_REG_ACCESS_TYPE_QUERY) + mlxsw_emad_op_tlv_method_set(op_tlv, + MLXSW_EMAD_OP_TLV_METHOD_QUERY); + else + mlxsw_emad_op_tlv_method_set(op_tlv, + MLXSW_EMAD_OP_TLV_METHOD_WRITE); + mlxsw_emad_op_tlv_class_set(op_tlv, + MLXSW_EMAD_OP_TLV_CLASS_REG_ACCESS); + mlxsw_emad_op_tlv_tid_set(op_tlv, tid); +} + +static int mlxsw_emad_construct_eth_hdr(struct sk_buff *skb) +{ + char *eth_hdr = skb_push(skb, MLXSW_EMAD_ETH_HDR_LEN); + + mlxsw_emad_eth_hdr_dmac_memcpy_to(eth_hdr, MLXSW_EMAD_EH_DMAC); + mlxsw_emad_eth_hdr_smac_memcpy_to(eth_hdr, MLXSW_EMAD_EH_SMAC); + mlxsw_emad_eth_hdr_ethertype_set(eth_hdr, MLXSW_EMAD_EH_ETHERTYPE); + mlxsw_emad_eth_hdr_mlx_proto_set(eth_hdr, MLXSW_EMAD_EH_MLX_PROTO); + mlxsw_emad_eth_hdr_ver_set(eth_hdr, MLXSW_EMAD_EH_PROTO_VERSION); + + skb_reset_mac_header(skb); + + return 0; +} + +static void mlxsw_emad_construct(struct sk_buff *skb, + const struct mlxsw_reg_info *reg, + char *payload, + enum mlxsw_core_reg_access_type type, + u64 tid, bool enable_string_tlv) +{ + char *buf; + + buf = skb_push(skb, MLXSW_EMAD_END_TLV_LEN * sizeof(u32)); + mlxsw_emad_pack_end_tlv(buf); + + buf = skb_push(skb, reg->len + sizeof(u32)); + mlxsw_emad_pack_reg_tlv(buf, reg, payload); + + if (enable_string_tlv) { + buf = skb_push(skb, MLXSW_EMAD_STRING_TLV_LEN * sizeof(u32)); + mlxsw_emad_pack_string_tlv(buf); + } + + buf = skb_push(skb, MLXSW_EMAD_OP_TLV_LEN * sizeof(u32)); + mlxsw_emad_pack_op_tlv(buf, reg, type, tid); + + mlxsw_emad_construct_eth_hdr(skb); +} + +struct mlxsw_emad_tlv_offsets { + u16 op_tlv; + u16 string_tlv; + u16 reg_tlv; +}; + +static bool mlxsw_emad_tlv_is_string_tlv(const char *tlv) +{ + u8 tlv_type = mlxsw_emad_string_tlv_type_get(tlv); + + return tlv_type == MLXSW_EMAD_TLV_TYPE_STRING; +} + +static void mlxsw_emad_tlv_parse(struct sk_buff *skb) +{ + struct mlxsw_emad_tlv_offsets *offsets = + (struct mlxsw_emad_tlv_offsets *) skb->cb; + + offsets->op_tlv = MLXSW_EMAD_ETH_HDR_LEN; + offsets->string_tlv = 0; + offsets->reg_tlv = MLXSW_EMAD_ETH_HDR_LEN + + MLXSW_EMAD_OP_TLV_LEN * sizeof(u32); + + /* If string TLV is present, it must come after the operation TLV. */ + if (mlxsw_emad_tlv_is_string_tlv(skb->data + offsets->reg_tlv)) { + offsets->string_tlv = offsets->reg_tlv; + offsets->reg_tlv += MLXSW_EMAD_STRING_TLV_LEN * sizeof(u32); + } +} + +static char *mlxsw_emad_op_tlv(const struct sk_buff *skb) +{ + struct mlxsw_emad_tlv_offsets *offsets = + (struct mlxsw_emad_tlv_offsets *) skb->cb; + + return ((char *) (skb->data + offsets->op_tlv)); +} + +static char *mlxsw_emad_string_tlv(const struct sk_buff *skb) +{ + struct mlxsw_emad_tlv_offsets *offsets = + (struct mlxsw_emad_tlv_offsets *) skb->cb; + + if (!offsets->string_tlv) + return NULL; + + return ((char *) (skb->data + offsets->string_tlv)); +} + +static char *mlxsw_emad_reg_tlv(const struct sk_buff *skb) +{ + struct mlxsw_emad_tlv_offsets *offsets = + (struct mlxsw_emad_tlv_offsets *) skb->cb; + + return ((char *) (skb->data + offsets->reg_tlv)); +} + +static char *mlxsw_emad_reg_payload(const char *reg_tlv) +{ + return ((char *) (reg_tlv + sizeof(u32))); +} + +static char *mlxsw_emad_reg_payload_cmd(const char *mbox) +{ + return ((char *) (mbox + (MLXSW_EMAD_OP_TLV_LEN + 1) * sizeof(u32))); +} + +static u64 mlxsw_emad_get_tid(const struct sk_buff *skb) +{ + char *op_tlv; + + op_tlv = mlxsw_emad_op_tlv(skb); + return mlxsw_emad_op_tlv_tid_get(op_tlv); +} + +static bool mlxsw_emad_is_resp(const struct sk_buff *skb) +{ + char *op_tlv; + + op_tlv = mlxsw_emad_op_tlv(skb); + return (mlxsw_emad_op_tlv_r_get(op_tlv) == MLXSW_EMAD_OP_TLV_RESPONSE); +} + +static int mlxsw_emad_process_status(char *op_tlv, + enum mlxsw_emad_op_tlv_status *p_status) +{ + *p_status = mlxsw_emad_op_tlv_status_get(op_tlv); + + switch (*p_status) { + case MLXSW_EMAD_OP_TLV_STATUS_SUCCESS: + return 0; + case MLXSW_EMAD_OP_TLV_STATUS_BUSY: + case MLXSW_EMAD_OP_TLV_STATUS_MESSAGE_RECEIPT_ACK: + return -EAGAIN; + case MLXSW_EMAD_OP_TLV_STATUS_VERSION_NOT_SUPPORTED: + case MLXSW_EMAD_OP_TLV_STATUS_UNKNOWN_TLV: + case MLXSW_EMAD_OP_TLV_STATUS_REGISTER_NOT_SUPPORTED: + case MLXSW_EMAD_OP_TLV_STATUS_CLASS_NOT_SUPPORTED: + case MLXSW_EMAD_OP_TLV_STATUS_METHOD_NOT_SUPPORTED: + case MLXSW_EMAD_OP_TLV_STATUS_BAD_PARAMETER: + case MLXSW_EMAD_OP_TLV_STATUS_RESOURCE_NOT_AVAILABLE: + case MLXSW_EMAD_OP_TLV_STATUS_INTERNAL_ERROR: + default: + return -EIO; + } +} + +static int +mlxsw_emad_process_status_skb(struct sk_buff *skb, + enum mlxsw_emad_op_tlv_status *p_status) +{ + return mlxsw_emad_process_status(mlxsw_emad_op_tlv(skb), p_status); +} + +struct mlxsw_reg_trans { + struct list_head list; + struct list_head bulk_list; + struct mlxsw_core *core; + struct sk_buff *tx_skb; + struct mlxsw_tx_info tx_info; + struct delayed_work timeout_dw; + unsigned int retries; + u64 tid; + struct completion completion; + atomic_t active; + mlxsw_reg_trans_cb_t *cb; + unsigned long cb_priv; + const struct mlxsw_reg_info *reg; + enum mlxsw_core_reg_access_type type; + int err; + char *emad_err_string; + enum mlxsw_emad_op_tlv_status emad_status; + struct rcu_head rcu; +}; + +static void mlxsw_emad_process_string_tlv(const struct sk_buff *skb, + struct mlxsw_reg_trans *trans) +{ + char *string_tlv; + char *string; + + string_tlv = mlxsw_emad_string_tlv(skb); + if (!string_tlv) + return; + + trans->emad_err_string = kzalloc(MLXSW_EMAD_STRING_TLV_STRING_LEN, + GFP_ATOMIC); + if (!trans->emad_err_string) + return; + + string = mlxsw_emad_string_tlv_string_data(string_tlv); + strscpy(trans->emad_err_string, string, + MLXSW_EMAD_STRING_TLV_STRING_LEN); +} + +#define MLXSW_EMAD_TIMEOUT_DURING_FW_FLASH_MS 3000 +#define MLXSW_EMAD_TIMEOUT_MS 200 + +static void mlxsw_emad_trans_timeout_schedule(struct mlxsw_reg_trans *trans) +{ + unsigned long timeout = msecs_to_jiffies(MLXSW_EMAD_TIMEOUT_MS); + + if (trans->core->fw_flash_in_progress) + timeout = msecs_to_jiffies(MLXSW_EMAD_TIMEOUT_DURING_FW_FLASH_MS); + + queue_delayed_work(trans->core->emad_wq, &trans->timeout_dw, + timeout << trans->retries); +} + +static int mlxsw_emad_transmit(struct mlxsw_core *mlxsw_core, + struct mlxsw_reg_trans *trans) +{ + struct sk_buff *skb; + int err; + + skb = skb_clone(trans->tx_skb, GFP_KERNEL); + if (!skb) + return -ENOMEM; + + trace_devlink_hwmsg(priv_to_devlink(mlxsw_core), false, 0, + skb->data + mlxsw_core->driver->txhdr_len, + skb->len - mlxsw_core->driver->txhdr_len); + + atomic_set(&trans->active, 1); + err = mlxsw_core_skb_transmit(mlxsw_core, skb, &trans->tx_info); + if (err) { + dev_kfree_skb(skb); + return err; + } + mlxsw_emad_trans_timeout_schedule(trans); + return 0; +} + +static void mlxsw_emad_trans_finish(struct mlxsw_reg_trans *trans, int err) +{ + struct mlxsw_core *mlxsw_core = trans->core; + + dev_kfree_skb(trans->tx_skb); + spin_lock_bh(&mlxsw_core->emad.trans_list_lock); + list_del_rcu(&trans->list); + spin_unlock_bh(&mlxsw_core->emad.trans_list_lock); + trans->err = err; + complete(&trans->completion); +} + +static void mlxsw_emad_transmit_retry(struct mlxsw_core *mlxsw_core, + struct mlxsw_reg_trans *trans) +{ + int err; + + if (trans->retries < MLXSW_EMAD_MAX_RETRY) { + trans->retries++; + err = mlxsw_emad_transmit(trans->core, trans); + if (err == 0) + return; + + if (!atomic_dec_and_test(&trans->active)) + return; + } else { + err = -EIO; + } + mlxsw_emad_trans_finish(trans, err); +} + +static void mlxsw_emad_trans_timeout_work(struct work_struct *work) +{ + struct mlxsw_reg_trans *trans = container_of(work, + struct mlxsw_reg_trans, + timeout_dw.work); + + if (!atomic_dec_and_test(&trans->active)) + return; + + mlxsw_emad_transmit_retry(trans->core, trans); +} + +static void mlxsw_emad_process_response(struct mlxsw_core *mlxsw_core, + struct mlxsw_reg_trans *trans, + struct sk_buff *skb) +{ + int err; + + if (!atomic_dec_and_test(&trans->active)) + return; + + err = mlxsw_emad_process_status_skb(skb, &trans->emad_status); + if (err == -EAGAIN) { + mlxsw_emad_transmit_retry(mlxsw_core, trans); + } else { + if (err == 0) { + char *reg_tlv = mlxsw_emad_reg_tlv(skb); + + if (trans->cb) + trans->cb(mlxsw_core, + mlxsw_emad_reg_payload(reg_tlv), + trans->reg->len, trans->cb_priv); + } else { + mlxsw_emad_process_string_tlv(skb, trans); + } + mlxsw_emad_trans_finish(trans, err); + } +} + +/* called with rcu read lock held */ +static void mlxsw_emad_rx_listener_func(struct sk_buff *skb, u16 local_port, + void *priv) +{ + struct mlxsw_core *mlxsw_core = priv; + struct mlxsw_reg_trans *trans; + + trace_devlink_hwmsg(priv_to_devlink(mlxsw_core), true, 0, + skb->data, skb->len); + + mlxsw_emad_tlv_parse(skb); + + if (!mlxsw_emad_is_resp(skb)) + goto free_skb; + + list_for_each_entry_rcu(trans, &mlxsw_core->emad.trans_list, list) { + if (mlxsw_emad_get_tid(skb) == trans->tid) { + mlxsw_emad_process_response(mlxsw_core, trans, skb); + break; + } + } + +free_skb: + dev_kfree_skb(skb); +} + +static const struct mlxsw_listener mlxsw_emad_rx_listener = + MLXSW_RXL(mlxsw_emad_rx_listener_func, ETHEMAD, TRAP_TO_CPU, false, + EMAD, DISCARD); + +static int mlxsw_emad_init(struct mlxsw_core *mlxsw_core) +{ + struct workqueue_struct *emad_wq; + u64 tid; + int err; + + if (!(mlxsw_core->bus->features & MLXSW_BUS_F_TXRX)) + return 0; + + emad_wq = alloc_workqueue("mlxsw_core_emad", 0, 0); + if (!emad_wq) + return -ENOMEM; + mlxsw_core->emad_wq = emad_wq; + + /* Set the upper 32 bits of the transaction ID field to a random + * number. This allows us to discard EMADs addressed to other + * devices. + */ + get_random_bytes(&tid, 4); + tid <<= 32; + atomic64_set(&mlxsw_core->emad.tid, tid); + + INIT_LIST_HEAD(&mlxsw_core->emad.trans_list); + spin_lock_init(&mlxsw_core->emad.trans_list_lock); + + err = mlxsw_core_trap_register(mlxsw_core, &mlxsw_emad_rx_listener, + mlxsw_core); + if (err) + goto err_trap_register; + + mlxsw_core->emad.use_emad = true; + + return 0; + +err_trap_register: + destroy_workqueue(mlxsw_core->emad_wq); + return err; +} + +static void mlxsw_emad_fini(struct mlxsw_core *mlxsw_core) +{ + + if (!(mlxsw_core->bus->features & MLXSW_BUS_F_TXRX)) + return; + + mlxsw_core->emad.use_emad = false; + mlxsw_core_trap_unregister(mlxsw_core, &mlxsw_emad_rx_listener, + mlxsw_core); + destroy_workqueue(mlxsw_core->emad_wq); +} + +static struct sk_buff *mlxsw_emad_alloc(const struct mlxsw_core *mlxsw_core, + u16 reg_len, bool enable_string_tlv) +{ + struct sk_buff *skb; + u16 emad_len; + + emad_len = (reg_len + sizeof(u32) + MLXSW_EMAD_ETH_HDR_LEN + + (MLXSW_EMAD_OP_TLV_LEN + MLXSW_EMAD_END_TLV_LEN) * + sizeof(u32) + mlxsw_core->driver->txhdr_len); + if (enable_string_tlv) + emad_len += MLXSW_EMAD_STRING_TLV_LEN * sizeof(u32); + if (emad_len > MLXSW_EMAD_MAX_FRAME_LEN) + return NULL; + + skb = netdev_alloc_skb(NULL, emad_len); + if (!skb) + return NULL; + memset(skb->data, 0, emad_len); + skb_reserve(skb, emad_len); + + return skb; +} + +static int mlxsw_emad_reg_access(struct mlxsw_core *mlxsw_core, + const struct mlxsw_reg_info *reg, + char *payload, + enum mlxsw_core_reg_access_type type, + struct mlxsw_reg_trans *trans, + struct list_head *bulk_list, + mlxsw_reg_trans_cb_t *cb, + unsigned long cb_priv, u64 tid) +{ + bool enable_string_tlv; + struct sk_buff *skb; + int err; + + dev_dbg(mlxsw_core->bus_info->dev, "EMAD reg access (tid=%llx,reg_id=%x(%s),type=%s)\n", + tid, reg->id, mlxsw_reg_id_str(reg->id), + mlxsw_core_reg_access_type_str(type)); + + /* Since this can be changed during emad_reg_access, read it once and + * use the value all the way. + */ + enable_string_tlv = mlxsw_core->emad.enable_string_tlv; + + skb = mlxsw_emad_alloc(mlxsw_core, reg->len, enable_string_tlv); + if (!skb) + return -ENOMEM; + + list_add_tail(&trans->bulk_list, bulk_list); + trans->core = mlxsw_core; + trans->tx_skb = skb; + trans->tx_info.local_port = MLXSW_PORT_CPU_PORT; + trans->tx_info.is_emad = true; + INIT_DELAYED_WORK(&trans->timeout_dw, mlxsw_emad_trans_timeout_work); + trans->tid = tid; + init_completion(&trans->completion); + trans->cb = cb; + trans->cb_priv = cb_priv; + trans->reg = reg; + trans->type = type; + + mlxsw_emad_construct(skb, reg, payload, type, trans->tid, + enable_string_tlv); + mlxsw_core->driver->txhdr_construct(skb, &trans->tx_info); + + spin_lock_bh(&mlxsw_core->emad.trans_list_lock); + list_add_tail_rcu(&trans->list, &mlxsw_core->emad.trans_list); + spin_unlock_bh(&mlxsw_core->emad.trans_list_lock); + err = mlxsw_emad_transmit(mlxsw_core, trans); + if (err) + goto err_out; + return 0; + +err_out: + spin_lock_bh(&mlxsw_core->emad.trans_list_lock); + list_del_rcu(&trans->list); + spin_unlock_bh(&mlxsw_core->emad.trans_list_lock); + list_del(&trans->bulk_list); + dev_kfree_skb(trans->tx_skb); + return err; +} + +/***************** + * Core functions + *****************/ + +int mlxsw_core_driver_register(struct mlxsw_driver *mlxsw_driver) +{ + spin_lock(&mlxsw_core_driver_list_lock); + list_add_tail(&mlxsw_driver->list, &mlxsw_core_driver_list); + spin_unlock(&mlxsw_core_driver_list_lock); + return 0; +} +EXPORT_SYMBOL(mlxsw_core_driver_register); + +void mlxsw_core_driver_unregister(struct mlxsw_driver *mlxsw_driver) +{ + spin_lock(&mlxsw_core_driver_list_lock); + list_del(&mlxsw_driver->list); + spin_unlock(&mlxsw_core_driver_list_lock); +} +EXPORT_SYMBOL(mlxsw_core_driver_unregister); + +static struct mlxsw_driver *__driver_find(const char *kind) +{ + struct mlxsw_driver *mlxsw_driver; + + list_for_each_entry(mlxsw_driver, &mlxsw_core_driver_list, list) { + if (strcmp(mlxsw_driver->kind, kind) == 0) + return mlxsw_driver; + } + return NULL; +} + +static struct mlxsw_driver *mlxsw_core_driver_get(const char *kind) +{ + struct mlxsw_driver *mlxsw_driver; + + spin_lock(&mlxsw_core_driver_list_lock); + mlxsw_driver = __driver_find(kind); + spin_unlock(&mlxsw_core_driver_list_lock); + return mlxsw_driver; +} + +int mlxsw_core_fw_flash(struct mlxsw_core *mlxsw_core, + struct mlxfw_dev *mlxfw_dev, + const struct firmware *firmware, + struct netlink_ext_ack *extack) +{ + int err; + + mlxsw_core->fw_flash_in_progress = true; + err = mlxfw_firmware_flash(mlxfw_dev, firmware, extack); + mlxsw_core->fw_flash_in_progress = false; + + return err; +} + +struct mlxsw_core_fw_info { + struct mlxfw_dev mlxfw_dev; + struct mlxsw_core *mlxsw_core; +}; + +static int mlxsw_core_fw_component_query(struct mlxfw_dev *mlxfw_dev, + u16 component_index, u32 *p_max_size, + u8 *p_align_bits, u16 *p_max_write_size) +{ + struct mlxsw_core_fw_info *mlxsw_core_fw_info = + container_of(mlxfw_dev, struct mlxsw_core_fw_info, mlxfw_dev); + struct mlxsw_core *mlxsw_core = mlxsw_core_fw_info->mlxsw_core; + char mcqi_pl[MLXSW_REG_MCQI_LEN]; + int err; + + mlxsw_reg_mcqi_pack(mcqi_pl, component_index); + err = mlxsw_reg_query(mlxsw_core, MLXSW_REG(mcqi), mcqi_pl); + if (err) + return err; + mlxsw_reg_mcqi_unpack(mcqi_pl, p_max_size, p_align_bits, p_max_write_size); + + *p_align_bits = max_t(u8, *p_align_bits, 2); + *p_max_write_size = min_t(u16, *p_max_write_size, MLXSW_REG_MCDA_MAX_DATA_LEN); + return 0; +} + +static int mlxsw_core_fw_fsm_lock(struct mlxfw_dev *mlxfw_dev, u32 *fwhandle) +{ + struct mlxsw_core_fw_info *mlxsw_core_fw_info = + container_of(mlxfw_dev, struct mlxsw_core_fw_info, mlxfw_dev); + struct mlxsw_core *mlxsw_core = mlxsw_core_fw_info->mlxsw_core; + char mcc_pl[MLXSW_REG_MCC_LEN]; + u8 control_state; + int err; + + mlxsw_reg_mcc_pack(mcc_pl, 0, 0, 0, 0); + err = mlxsw_reg_query(mlxsw_core, MLXSW_REG(mcc), mcc_pl); + if (err) + return err; + + mlxsw_reg_mcc_unpack(mcc_pl, fwhandle, NULL, &control_state); + if (control_state != MLXFW_FSM_STATE_IDLE) + return -EBUSY; + + mlxsw_reg_mcc_pack(mcc_pl, MLXSW_REG_MCC_INSTRUCTION_LOCK_UPDATE_HANDLE, 0, *fwhandle, 0); + return mlxsw_reg_write(mlxsw_core, MLXSW_REG(mcc), mcc_pl); +} + +static int mlxsw_core_fw_fsm_component_update(struct mlxfw_dev *mlxfw_dev, u32 fwhandle, + u16 component_index, u32 component_size) +{ + struct mlxsw_core_fw_info *mlxsw_core_fw_info = + container_of(mlxfw_dev, struct mlxsw_core_fw_info, mlxfw_dev); + struct mlxsw_core *mlxsw_core = mlxsw_core_fw_info->mlxsw_core; + char mcc_pl[MLXSW_REG_MCC_LEN]; + + mlxsw_reg_mcc_pack(mcc_pl, MLXSW_REG_MCC_INSTRUCTION_UPDATE_COMPONENT, + component_index, fwhandle, component_size); + return mlxsw_reg_write(mlxsw_core, MLXSW_REG(mcc), mcc_pl); +} + +static int mlxsw_core_fw_fsm_block_download(struct mlxfw_dev *mlxfw_dev, u32 fwhandle, + u8 *data, u16 size, u32 offset) +{ + struct mlxsw_core_fw_info *mlxsw_core_fw_info = + container_of(mlxfw_dev, struct mlxsw_core_fw_info, mlxfw_dev); + struct mlxsw_core *mlxsw_core = mlxsw_core_fw_info->mlxsw_core; + char mcda_pl[MLXSW_REG_MCDA_LEN]; + + mlxsw_reg_mcda_pack(mcda_pl, fwhandle, offset, size, data); + return mlxsw_reg_write(mlxsw_core, MLXSW_REG(mcda), mcda_pl); +} + +static int mlxsw_core_fw_fsm_component_verify(struct mlxfw_dev *mlxfw_dev, u32 fwhandle, + u16 component_index) +{ + struct mlxsw_core_fw_info *mlxsw_core_fw_info = + container_of(mlxfw_dev, struct mlxsw_core_fw_info, mlxfw_dev); + struct mlxsw_core *mlxsw_core = mlxsw_core_fw_info->mlxsw_core; + char mcc_pl[MLXSW_REG_MCC_LEN]; + + mlxsw_reg_mcc_pack(mcc_pl, MLXSW_REG_MCC_INSTRUCTION_VERIFY_COMPONENT, + component_index, fwhandle, 0); + return mlxsw_reg_write(mlxsw_core, MLXSW_REG(mcc), mcc_pl); +} + +static int mlxsw_core_fw_fsm_activate(struct mlxfw_dev *mlxfw_dev, u32 fwhandle) +{ + struct mlxsw_core_fw_info *mlxsw_core_fw_info = + container_of(mlxfw_dev, struct mlxsw_core_fw_info, mlxfw_dev); + struct mlxsw_core *mlxsw_core = mlxsw_core_fw_info->mlxsw_core; + char mcc_pl[MLXSW_REG_MCC_LEN]; + + mlxsw_reg_mcc_pack(mcc_pl, MLXSW_REG_MCC_INSTRUCTION_ACTIVATE, 0, fwhandle, 0); + return mlxsw_reg_write(mlxsw_core, MLXSW_REG(mcc), mcc_pl); +} + +static int mlxsw_core_fw_fsm_query_state(struct mlxfw_dev *mlxfw_dev, u32 fwhandle, + enum mlxfw_fsm_state *fsm_state, + enum mlxfw_fsm_state_err *fsm_state_err) +{ + struct mlxsw_core_fw_info *mlxsw_core_fw_info = + container_of(mlxfw_dev, struct mlxsw_core_fw_info, mlxfw_dev); + struct mlxsw_core *mlxsw_core = mlxsw_core_fw_info->mlxsw_core; + char mcc_pl[MLXSW_REG_MCC_LEN]; + u8 control_state; + u8 error_code; + int err; + + mlxsw_reg_mcc_pack(mcc_pl, 0, 0, fwhandle, 0); + err = mlxsw_reg_query(mlxsw_core, MLXSW_REG(mcc), mcc_pl); + if (err) + return err; + + mlxsw_reg_mcc_unpack(mcc_pl, NULL, &error_code, &control_state); + *fsm_state = control_state; + *fsm_state_err = min_t(enum mlxfw_fsm_state_err, error_code, MLXFW_FSM_STATE_ERR_MAX); + return 0; +} + +static void mlxsw_core_fw_fsm_cancel(struct mlxfw_dev *mlxfw_dev, u32 fwhandle) +{ + struct mlxsw_core_fw_info *mlxsw_core_fw_info = + container_of(mlxfw_dev, struct mlxsw_core_fw_info, mlxfw_dev); + struct mlxsw_core *mlxsw_core = mlxsw_core_fw_info->mlxsw_core; + char mcc_pl[MLXSW_REG_MCC_LEN]; + + mlxsw_reg_mcc_pack(mcc_pl, MLXSW_REG_MCC_INSTRUCTION_CANCEL, 0, fwhandle, 0); + mlxsw_reg_write(mlxsw_core, MLXSW_REG(mcc), mcc_pl); +} + +static void mlxsw_core_fw_fsm_release(struct mlxfw_dev *mlxfw_dev, u32 fwhandle) +{ + struct mlxsw_core_fw_info *mlxsw_core_fw_info = + container_of(mlxfw_dev, struct mlxsw_core_fw_info, mlxfw_dev); + struct mlxsw_core *mlxsw_core = mlxsw_core_fw_info->mlxsw_core; + char mcc_pl[MLXSW_REG_MCC_LEN]; + + mlxsw_reg_mcc_pack(mcc_pl, MLXSW_REG_MCC_INSTRUCTION_RELEASE_UPDATE_HANDLE, 0, fwhandle, 0); + mlxsw_reg_write(mlxsw_core, MLXSW_REG(mcc), mcc_pl); +} + +static const struct mlxfw_dev_ops mlxsw_core_fw_mlxsw_dev_ops = { + .component_query = mlxsw_core_fw_component_query, + .fsm_lock = mlxsw_core_fw_fsm_lock, + .fsm_component_update = mlxsw_core_fw_fsm_component_update, + .fsm_block_download = mlxsw_core_fw_fsm_block_download, + .fsm_component_verify = mlxsw_core_fw_fsm_component_verify, + .fsm_activate = mlxsw_core_fw_fsm_activate, + .fsm_query_state = mlxsw_core_fw_fsm_query_state, + .fsm_cancel = mlxsw_core_fw_fsm_cancel, + .fsm_release = mlxsw_core_fw_fsm_release, +}; + +static int mlxsw_core_dev_fw_flash(struct mlxsw_core *mlxsw_core, + const struct firmware *firmware, + struct netlink_ext_ack *extack) +{ + struct mlxsw_core_fw_info mlxsw_core_fw_info = { + .mlxfw_dev = { + .ops = &mlxsw_core_fw_mlxsw_dev_ops, + .psid = mlxsw_core->bus_info->psid, + .psid_size = strlen(mlxsw_core->bus_info->psid), + .devlink = priv_to_devlink(mlxsw_core), + }, + .mlxsw_core = mlxsw_core + }; + + return mlxsw_core_fw_flash(mlxsw_core, &mlxsw_core_fw_info.mlxfw_dev, + firmware, extack); +} + +static int mlxsw_core_fw_rev_validate(struct mlxsw_core *mlxsw_core, + const struct mlxsw_bus_info *mlxsw_bus_info, + const struct mlxsw_fw_rev *req_rev, + const char *filename) +{ + const struct mlxsw_fw_rev *rev = &mlxsw_bus_info->fw_rev; + union devlink_param_value value; + const struct firmware *firmware; + int err; + + /* Don't check if driver does not require it */ + if (!req_rev || !filename) + return 0; + + /* Don't check if devlink 'fw_load_policy' param is 'flash' */ + err = devlink_param_driverinit_value_get(priv_to_devlink(mlxsw_core), + DEVLINK_PARAM_GENERIC_ID_FW_LOAD_POLICY, + &value); + if (err) + return err; + if (value.vu8 == DEVLINK_PARAM_FW_LOAD_POLICY_VALUE_FLASH) + return 0; + + /* Validate driver & FW are compatible */ + if (rev->major != req_rev->major) { + WARN(1, "Mismatch in major FW version [%d:%d] is never expected; Please contact support\n", + rev->major, req_rev->major); + return -EINVAL; + } + if (mlxsw_core_fw_rev_minor_subminor_validate(rev, req_rev)) + return 0; + + dev_err(mlxsw_bus_info->dev, "The firmware version %d.%d.%d is incompatible with the driver (required >= %d.%d.%d)\n", + rev->major, rev->minor, rev->subminor, req_rev->major, + req_rev->minor, req_rev->subminor); + dev_info(mlxsw_bus_info->dev, "Flashing firmware using file %s\n", filename); + + err = request_firmware_direct(&firmware, filename, mlxsw_bus_info->dev); + if (err) { + dev_err(mlxsw_bus_info->dev, "Could not request firmware file %s\n", filename); + return err; + } + + err = mlxsw_core_dev_fw_flash(mlxsw_core, firmware, NULL); + release_firmware(firmware); + if (err) + dev_err(mlxsw_bus_info->dev, "Could not upgrade firmware\n"); + + /* On FW flash success, tell the caller FW reset is needed + * if current FW supports it. + */ + if (rev->minor >= req_rev->can_reset_minor) + return err ? err : -EAGAIN; + else + return 0; +} + +static int mlxsw_core_fw_flash_update(struct mlxsw_core *mlxsw_core, + struct devlink_flash_update_params *params, + struct netlink_ext_ack *extack) +{ + return mlxsw_core_dev_fw_flash(mlxsw_core, params->fw, extack); +} + +static int mlxsw_core_devlink_param_fw_load_policy_validate(struct devlink *devlink, u32 id, + union devlink_param_value val, + struct netlink_ext_ack *extack) +{ + if (val.vu8 != DEVLINK_PARAM_FW_LOAD_POLICY_VALUE_DRIVER && + val.vu8 != DEVLINK_PARAM_FW_LOAD_POLICY_VALUE_FLASH) { + NL_SET_ERR_MSG_MOD(extack, "'fw_load_policy' must be 'driver' or 'flash'"); + return -EINVAL; + } + + return 0; +} + +static const struct devlink_param mlxsw_core_fw_devlink_params[] = { + DEVLINK_PARAM_GENERIC(FW_LOAD_POLICY, BIT(DEVLINK_PARAM_CMODE_DRIVERINIT), NULL, NULL, + mlxsw_core_devlink_param_fw_load_policy_validate), +}; + +static int mlxsw_core_fw_params_register(struct mlxsw_core *mlxsw_core) +{ + struct devlink *devlink = priv_to_devlink(mlxsw_core); + union devlink_param_value value; + int err; + + err = devlink_params_register(devlink, mlxsw_core_fw_devlink_params, + ARRAY_SIZE(mlxsw_core_fw_devlink_params)); + if (err) + return err; + + value.vu8 = DEVLINK_PARAM_FW_LOAD_POLICY_VALUE_DRIVER; + devlink_param_driverinit_value_set(devlink, DEVLINK_PARAM_GENERIC_ID_FW_LOAD_POLICY, value); + return 0; +} + +static void mlxsw_core_fw_params_unregister(struct mlxsw_core *mlxsw_core) +{ + devlink_params_unregister(priv_to_devlink(mlxsw_core), mlxsw_core_fw_devlink_params, + ARRAY_SIZE(mlxsw_core_fw_devlink_params)); +} + +static void *__dl_port(struct devlink_port *devlink_port) +{ + return container_of(devlink_port, struct mlxsw_core_port, devlink_port); +} + +static int mlxsw_devlink_port_split(struct devlink *devlink, + struct devlink_port *port, + unsigned int count, + struct netlink_ext_ack *extack) +{ + struct mlxsw_core_port *mlxsw_core_port = __dl_port(port); + struct mlxsw_core *mlxsw_core = devlink_priv(devlink); + + if (!mlxsw_core->driver->port_split) + return -EOPNOTSUPP; + return mlxsw_core->driver->port_split(mlxsw_core, + mlxsw_core_port->local_port, + count, extack); +} + +static int mlxsw_devlink_port_unsplit(struct devlink *devlink, + struct devlink_port *port, + struct netlink_ext_ack *extack) +{ + struct mlxsw_core_port *mlxsw_core_port = __dl_port(port); + struct mlxsw_core *mlxsw_core = devlink_priv(devlink); + + if (!mlxsw_core->driver->port_unsplit) + return -EOPNOTSUPP; + return mlxsw_core->driver->port_unsplit(mlxsw_core, + mlxsw_core_port->local_port, + extack); +} + +static int +mlxsw_devlink_sb_pool_get(struct devlink *devlink, + unsigned int sb_index, u16 pool_index, + struct devlink_sb_pool_info *pool_info) +{ + struct mlxsw_core *mlxsw_core = devlink_priv(devlink); + struct mlxsw_driver *mlxsw_driver = mlxsw_core->driver; + + if (!mlxsw_driver->sb_pool_get) + return -EOPNOTSUPP; + return mlxsw_driver->sb_pool_get(mlxsw_core, sb_index, + pool_index, pool_info); +} + +static int +mlxsw_devlink_sb_pool_set(struct devlink *devlink, + unsigned int sb_index, u16 pool_index, u32 size, + enum devlink_sb_threshold_type threshold_type, + struct netlink_ext_ack *extack) +{ + struct mlxsw_core *mlxsw_core = devlink_priv(devlink); + struct mlxsw_driver *mlxsw_driver = mlxsw_core->driver; + + if (!mlxsw_driver->sb_pool_set) + return -EOPNOTSUPP; + return mlxsw_driver->sb_pool_set(mlxsw_core, sb_index, + pool_index, size, threshold_type, + extack); +} + +static int mlxsw_devlink_sb_port_pool_get(struct devlink_port *devlink_port, + unsigned int sb_index, u16 pool_index, + u32 *p_threshold) +{ + struct mlxsw_core *mlxsw_core = devlink_priv(devlink_port->devlink); + struct mlxsw_driver *mlxsw_driver = mlxsw_core->driver; + struct mlxsw_core_port *mlxsw_core_port = __dl_port(devlink_port); + + if (!mlxsw_driver->sb_port_pool_get || + !mlxsw_core_port_check(mlxsw_core_port)) + return -EOPNOTSUPP; + return mlxsw_driver->sb_port_pool_get(mlxsw_core_port, sb_index, + pool_index, p_threshold); +} + +static int mlxsw_devlink_sb_port_pool_set(struct devlink_port *devlink_port, + unsigned int sb_index, u16 pool_index, + u32 threshold, + struct netlink_ext_ack *extack) +{ + struct mlxsw_core *mlxsw_core = devlink_priv(devlink_port->devlink); + struct mlxsw_driver *mlxsw_driver = mlxsw_core->driver; + struct mlxsw_core_port *mlxsw_core_port = __dl_port(devlink_port); + + if (!mlxsw_driver->sb_port_pool_set || + !mlxsw_core_port_check(mlxsw_core_port)) + return -EOPNOTSUPP; + return mlxsw_driver->sb_port_pool_set(mlxsw_core_port, sb_index, + pool_index, threshold, extack); +} + +static int +mlxsw_devlink_sb_tc_pool_bind_get(struct devlink_port *devlink_port, + unsigned int sb_index, u16 tc_index, + enum devlink_sb_pool_type pool_type, + u16 *p_pool_index, u32 *p_threshold) +{ + struct mlxsw_core *mlxsw_core = devlink_priv(devlink_port->devlink); + struct mlxsw_driver *mlxsw_driver = mlxsw_core->driver; + struct mlxsw_core_port *mlxsw_core_port = __dl_port(devlink_port); + + if (!mlxsw_driver->sb_tc_pool_bind_get || + !mlxsw_core_port_check(mlxsw_core_port)) + return -EOPNOTSUPP; + return mlxsw_driver->sb_tc_pool_bind_get(mlxsw_core_port, sb_index, + tc_index, pool_type, + p_pool_index, p_threshold); +} + +static int +mlxsw_devlink_sb_tc_pool_bind_set(struct devlink_port *devlink_port, + unsigned int sb_index, u16 tc_index, + enum devlink_sb_pool_type pool_type, + u16 pool_index, u32 threshold, + struct netlink_ext_ack *extack) +{ + struct mlxsw_core *mlxsw_core = devlink_priv(devlink_port->devlink); + struct mlxsw_driver *mlxsw_driver = mlxsw_core->driver; + struct mlxsw_core_port *mlxsw_core_port = __dl_port(devlink_port); + + if (!mlxsw_driver->sb_tc_pool_bind_set || + !mlxsw_core_port_check(mlxsw_core_port)) + return -EOPNOTSUPP; + return mlxsw_driver->sb_tc_pool_bind_set(mlxsw_core_port, sb_index, + tc_index, pool_type, + pool_index, threshold, extack); +} + +static int mlxsw_devlink_sb_occ_snapshot(struct devlink *devlink, + unsigned int sb_index) +{ + struct mlxsw_core *mlxsw_core = devlink_priv(devlink); + struct mlxsw_driver *mlxsw_driver = mlxsw_core->driver; + + if (!mlxsw_driver->sb_occ_snapshot) + return -EOPNOTSUPP; + return mlxsw_driver->sb_occ_snapshot(mlxsw_core, sb_index); +} + +static int mlxsw_devlink_sb_occ_max_clear(struct devlink *devlink, + unsigned int sb_index) +{ + struct mlxsw_core *mlxsw_core = devlink_priv(devlink); + struct mlxsw_driver *mlxsw_driver = mlxsw_core->driver; + + if (!mlxsw_driver->sb_occ_max_clear) + return -EOPNOTSUPP; + return mlxsw_driver->sb_occ_max_clear(mlxsw_core, sb_index); +} + +static int +mlxsw_devlink_sb_occ_port_pool_get(struct devlink_port *devlink_port, + unsigned int sb_index, u16 pool_index, + u32 *p_cur, u32 *p_max) +{ + struct mlxsw_core *mlxsw_core = devlink_priv(devlink_port->devlink); + struct mlxsw_driver *mlxsw_driver = mlxsw_core->driver; + struct mlxsw_core_port *mlxsw_core_port = __dl_port(devlink_port); + + if (!mlxsw_driver->sb_occ_port_pool_get || + !mlxsw_core_port_check(mlxsw_core_port)) + return -EOPNOTSUPP; + return mlxsw_driver->sb_occ_port_pool_get(mlxsw_core_port, sb_index, + pool_index, p_cur, p_max); +} + +static int +mlxsw_devlink_sb_occ_tc_port_bind_get(struct devlink_port *devlink_port, + unsigned int sb_index, u16 tc_index, + enum devlink_sb_pool_type pool_type, + u32 *p_cur, u32 *p_max) +{ + struct mlxsw_core *mlxsw_core = devlink_priv(devlink_port->devlink); + struct mlxsw_driver *mlxsw_driver = mlxsw_core->driver; + struct mlxsw_core_port *mlxsw_core_port = __dl_port(devlink_port); + + if (!mlxsw_driver->sb_occ_tc_port_bind_get || + !mlxsw_core_port_check(mlxsw_core_port)) + return -EOPNOTSUPP; + return mlxsw_driver->sb_occ_tc_port_bind_get(mlxsw_core_port, + sb_index, tc_index, + pool_type, p_cur, p_max); +} + +static int +mlxsw_devlink_info_get(struct devlink *devlink, struct devlink_info_req *req, + struct netlink_ext_ack *extack) +{ + struct mlxsw_core *mlxsw_core = devlink_priv(devlink); + char fw_info_psid[MLXSW_REG_MGIR_FW_INFO_PSID_SIZE]; + u32 hw_rev, fw_major, fw_minor, fw_sub_minor; + char mgir_pl[MLXSW_REG_MGIR_LEN]; + char buf[32]; + int err; + + err = devlink_info_driver_name_put(req, + mlxsw_core->bus_info->device_kind); + if (err) + return err; + + mlxsw_reg_mgir_pack(mgir_pl); + err = mlxsw_reg_query(mlxsw_core, MLXSW_REG(mgir), mgir_pl); + if (err) + return err; + mlxsw_reg_mgir_unpack(mgir_pl, &hw_rev, fw_info_psid, &fw_major, + &fw_minor, &fw_sub_minor); + + sprintf(buf, "%X", hw_rev); + err = devlink_info_version_fixed_put(req, "hw.revision", buf); + if (err) + return err; + + err = devlink_info_version_fixed_put(req, + DEVLINK_INFO_VERSION_GENERIC_FW_PSID, + fw_info_psid); + if (err) + return err; + + sprintf(buf, "%d.%d.%d", fw_major, fw_minor, fw_sub_minor); + err = devlink_info_version_running_put(req, "fw.version", buf); + if (err) + return err; + + return devlink_info_version_running_put(req, + DEVLINK_INFO_VERSION_GENERIC_FW, + buf); +} + +static int +mlxsw_devlink_core_bus_device_reload_down(struct devlink *devlink, + bool netns_change, enum devlink_reload_action action, + enum devlink_reload_limit limit, + struct netlink_ext_ack *extack) +{ + struct mlxsw_core *mlxsw_core = devlink_priv(devlink); + + if (!(mlxsw_core->bus->features & MLXSW_BUS_F_RESET)) + return -EOPNOTSUPP; + + mlxsw_core_bus_device_unregister(mlxsw_core, true); + return 0; +} + +static int +mlxsw_devlink_core_bus_device_reload_up(struct devlink *devlink, enum devlink_reload_action action, + enum devlink_reload_limit limit, u32 *actions_performed, + struct netlink_ext_ack *extack) +{ + struct mlxsw_core *mlxsw_core = devlink_priv(devlink); + int err; + + *actions_performed = BIT(DEVLINK_RELOAD_ACTION_DRIVER_REINIT) | + BIT(DEVLINK_RELOAD_ACTION_FW_ACTIVATE); + err = mlxsw_core_bus_device_register(mlxsw_core->bus_info, + mlxsw_core->bus, + mlxsw_core->bus_priv, true, + devlink, extack); + return err; +} + +static int mlxsw_devlink_flash_update(struct devlink *devlink, + struct devlink_flash_update_params *params, + struct netlink_ext_ack *extack) +{ + struct mlxsw_core *mlxsw_core = devlink_priv(devlink); + + return mlxsw_core_fw_flash_update(mlxsw_core, params, extack); +} + +static int mlxsw_devlink_trap_init(struct devlink *devlink, + const struct devlink_trap *trap, + void *trap_ctx) +{ + struct mlxsw_core *mlxsw_core = devlink_priv(devlink); + struct mlxsw_driver *mlxsw_driver = mlxsw_core->driver; + + if (!mlxsw_driver->trap_init) + return -EOPNOTSUPP; + return mlxsw_driver->trap_init(mlxsw_core, trap, trap_ctx); +} + +static void mlxsw_devlink_trap_fini(struct devlink *devlink, + const struct devlink_trap *trap, + void *trap_ctx) +{ + struct mlxsw_core *mlxsw_core = devlink_priv(devlink); + struct mlxsw_driver *mlxsw_driver = mlxsw_core->driver; + + if (!mlxsw_driver->trap_fini) + return; + mlxsw_driver->trap_fini(mlxsw_core, trap, trap_ctx); +} + +static int mlxsw_devlink_trap_action_set(struct devlink *devlink, + const struct devlink_trap *trap, + enum devlink_trap_action action, + struct netlink_ext_ack *extack) +{ + struct mlxsw_core *mlxsw_core = devlink_priv(devlink); + struct mlxsw_driver *mlxsw_driver = mlxsw_core->driver; + + if (!mlxsw_driver->trap_action_set) + return -EOPNOTSUPP; + return mlxsw_driver->trap_action_set(mlxsw_core, trap, action, extack); +} + +static int +mlxsw_devlink_trap_group_init(struct devlink *devlink, + const struct devlink_trap_group *group) +{ + struct mlxsw_core *mlxsw_core = devlink_priv(devlink); + struct mlxsw_driver *mlxsw_driver = mlxsw_core->driver; + + if (!mlxsw_driver->trap_group_init) + return -EOPNOTSUPP; + return mlxsw_driver->trap_group_init(mlxsw_core, group); +} + +static int +mlxsw_devlink_trap_group_set(struct devlink *devlink, + const struct devlink_trap_group *group, + const struct devlink_trap_policer *policer, + struct netlink_ext_ack *extack) +{ + struct mlxsw_core *mlxsw_core = devlink_priv(devlink); + struct mlxsw_driver *mlxsw_driver = mlxsw_core->driver; + + if (!mlxsw_driver->trap_group_set) + return -EOPNOTSUPP; + return mlxsw_driver->trap_group_set(mlxsw_core, group, policer, extack); +} + +static int +mlxsw_devlink_trap_policer_init(struct devlink *devlink, + const struct devlink_trap_policer *policer) +{ + struct mlxsw_core *mlxsw_core = devlink_priv(devlink); + struct mlxsw_driver *mlxsw_driver = mlxsw_core->driver; + + if (!mlxsw_driver->trap_policer_init) + return -EOPNOTSUPP; + return mlxsw_driver->trap_policer_init(mlxsw_core, policer); +} + +static void +mlxsw_devlink_trap_policer_fini(struct devlink *devlink, + const struct devlink_trap_policer *policer) +{ + struct mlxsw_core *mlxsw_core = devlink_priv(devlink); + struct mlxsw_driver *mlxsw_driver = mlxsw_core->driver; + + if (!mlxsw_driver->trap_policer_fini) + return; + mlxsw_driver->trap_policer_fini(mlxsw_core, policer); +} + +static int +mlxsw_devlink_trap_policer_set(struct devlink *devlink, + const struct devlink_trap_policer *policer, + u64 rate, u64 burst, + struct netlink_ext_ack *extack) +{ + struct mlxsw_core *mlxsw_core = devlink_priv(devlink); + struct mlxsw_driver *mlxsw_driver = mlxsw_core->driver; + + if (!mlxsw_driver->trap_policer_set) + return -EOPNOTSUPP; + return mlxsw_driver->trap_policer_set(mlxsw_core, policer, rate, burst, + extack); +} + +static int +mlxsw_devlink_trap_policer_counter_get(struct devlink *devlink, + const struct devlink_trap_policer *policer, + u64 *p_drops) +{ + struct mlxsw_core *mlxsw_core = devlink_priv(devlink); + struct mlxsw_driver *mlxsw_driver = mlxsw_core->driver; + + if (!mlxsw_driver->trap_policer_counter_get) + return -EOPNOTSUPP; + return mlxsw_driver->trap_policer_counter_get(mlxsw_core, policer, + p_drops); +} + +static const struct devlink_ops mlxsw_devlink_ops = { + .reload_actions = BIT(DEVLINK_RELOAD_ACTION_DRIVER_REINIT) | + BIT(DEVLINK_RELOAD_ACTION_FW_ACTIVATE), + .reload_down = mlxsw_devlink_core_bus_device_reload_down, + .reload_up = mlxsw_devlink_core_bus_device_reload_up, + .port_split = mlxsw_devlink_port_split, + .port_unsplit = mlxsw_devlink_port_unsplit, + .sb_pool_get = mlxsw_devlink_sb_pool_get, + .sb_pool_set = mlxsw_devlink_sb_pool_set, + .sb_port_pool_get = mlxsw_devlink_sb_port_pool_get, + .sb_port_pool_set = mlxsw_devlink_sb_port_pool_set, + .sb_tc_pool_bind_get = mlxsw_devlink_sb_tc_pool_bind_get, + .sb_tc_pool_bind_set = mlxsw_devlink_sb_tc_pool_bind_set, + .sb_occ_snapshot = mlxsw_devlink_sb_occ_snapshot, + .sb_occ_max_clear = mlxsw_devlink_sb_occ_max_clear, + .sb_occ_port_pool_get = mlxsw_devlink_sb_occ_port_pool_get, + .sb_occ_tc_port_bind_get = mlxsw_devlink_sb_occ_tc_port_bind_get, + .info_get = mlxsw_devlink_info_get, + .flash_update = mlxsw_devlink_flash_update, + .trap_init = mlxsw_devlink_trap_init, + .trap_fini = mlxsw_devlink_trap_fini, + .trap_action_set = mlxsw_devlink_trap_action_set, + .trap_group_init = mlxsw_devlink_trap_group_init, + .trap_group_set = mlxsw_devlink_trap_group_set, + .trap_policer_init = mlxsw_devlink_trap_policer_init, + .trap_policer_fini = mlxsw_devlink_trap_policer_fini, + .trap_policer_set = mlxsw_devlink_trap_policer_set, + .trap_policer_counter_get = mlxsw_devlink_trap_policer_counter_get, +}; + +static int mlxsw_core_params_register(struct mlxsw_core *mlxsw_core) +{ + int err; + + err = mlxsw_core_fw_params_register(mlxsw_core); + if (err) + return err; + + if (mlxsw_core->driver->params_register) { + err = mlxsw_core->driver->params_register(mlxsw_core); + if (err) + goto err_params_register; + } + return 0; + +err_params_register: + mlxsw_core_fw_params_unregister(mlxsw_core); + return err; +} + +static void mlxsw_core_params_unregister(struct mlxsw_core *mlxsw_core) +{ + mlxsw_core_fw_params_unregister(mlxsw_core); + if (mlxsw_core->driver->params_register) + mlxsw_core->driver->params_unregister(mlxsw_core); +} + +struct mlxsw_core_health_event { + struct mlxsw_core *mlxsw_core; + char mfde_pl[MLXSW_REG_MFDE_LEN]; + struct work_struct work; +}; + +static void mlxsw_core_health_event_work(struct work_struct *work) +{ + struct mlxsw_core_health_event *event; + struct mlxsw_core *mlxsw_core; + + event = container_of(work, struct mlxsw_core_health_event, work); + mlxsw_core = event->mlxsw_core; + devlink_health_report(mlxsw_core->health.fw_fatal, "FW fatal event occurred", + event->mfde_pl); + kfree(event); +} + +static void mlxsw_core_health_listener_func(const struct mlxsw_reg_info *reg, + char *mfde_pl, void *priv) +{ + struct mlxsw_core_health_event *event; + struct mlxsw_core *mlxsw_core = priv; + + event = kmalloc(sizeof(*event), GFP_ATOMIC); + if (!event) + return; + event->mlxsw_core = mlxsw_core; + memcpy(event->mfde_pl, mfde_pl, sizeof(event->mfde_pl)); + INIT_WORK(&event->work, mlxsw_core_health_event_work); + mlxsw_core_schedule_work(&event->work); +} + +static const struct mlxsw_listener mlxsw_core_health_listener = + MLXSW_CORE_EVENTL(mlxsw_core_health_listener_func, MFDE); + +static int +mlxsw_core_health_fw_fatal_dump_fatal_cause(const char *mfde_pl, + struct devlink_fmsg *fmsg) +{ + u32 val, tile_v; + int err; + + val = mlxsw_reg_mfde_fatal_cause_id_get(mfde_pl); + err = devlink_fmsg_u32_pair_put(fmsg, "cause_id", val); + if (err) + return err; + tile_v = mlxsw_reg_mfde_fatal_cause_tile_v_get(mfde_pl); + if (tile_v) { + val = mlxsw_reg_mfde_fatal_cause_tile_index_get(mfde_pl); + err = devlink_fmsg_u8_pair_put(fmsg, "tile_index", val); + if (err) + return err; + } + + return 0; +} + +static int +mlxsw_core_health_fw_fatal_dump_fw_assert(const char *mfde_pl, + struct devlink_fmsg *fmsg) +{ + u32 val, tile_v; + int err; + + val = mlxsw_reg_mfde_fw_assert_var0_get(mfde_pl); + err = devlink_fmsg_u32_pair_put(fmsg, "var0", val); + if (err) + return err; + val = mlxsw_reg_mfde_fw_assert_var1_get(mfde_pl); + err = devlink_fmsg_u32_pair_put(fmsg, "var1", val); + if (err) + return err; + val = mlxsw_reg_mfde_fw_assert_var2_get(mfde_pl); + err = devlink_fmsg_u32_pair_put(fmsg, "var2", val); + if (err) + return err; + val = mlxsw_reg_mfde_fw_assert_var3_get(mfde_pl); + err = devlink_fmsg_u32_pair_put(fmsg, "var3", val); + if (err) + return err; + val = mlxsw_reg_mfde_fw_assert_var4_get(mfde_pl); + err = devlink_fmsg_u32_pair_put(fmsg, "var4", val); + if (err) + return err; + val = mlxsw_reg_mfde_fw_assert_existptr_get(mfde_pl); + err = devlink_fmsg_u32_pair_put(fmsg, "existptr", val); + if (err) + return err; + val = mlxsw_reg_mfde_fw_assert_callra_get(mfde_pl); + err = devlink_fmsg_u32_pair_put(fmsg, "callra", val); + if (err) + return err; + val = mlxsw_reg_mfde_fw_assert_oe_get(mfde_pl); + err = devlink_fmsg_bool_pair_put(fmsg, "old_event", val); + if (err) + return err; + tile_v = mlxsw_reg_mfde_fw_assert_tile_v_get(mfde_pl); + if (tile_v) { + val = mlxsw_reg_mfde_fw_assert_tile_index_get(mfde_pl); + err = devlink_fmsg_u8_pair_put(fmsg, "tile_index", val); + if (err) + return err; + } + val = mlxsw_reg_mfde_fw_assert_ext_synd_get(mfde_pl); + err = devlink_fmsg_u32_pair_put(fmsg, "ext_synd", val); + if (err) + return err; + + return 0; +} + +static int +mlxsw_core_health_fw_fatal_dump_kvd_im_stop(const char *mfde_pl, + struct devlink_fmsg *fmsg) +{ + u32 val; + int err; + + val = mlxsw_reg_mfde_kvd_im_stop_oe_get(mfde_pl); + err = devlink_fmsg_bool_pair_put(fmsg, "old_event", val); + if (err) + return err; + val = mlxsw_reg_mfde_kvd_im_stop_pipes_mask_get(mfde_pl); + return devlink_fmsg_u32_pair_put(fmsg, "pipes_mask", val); +} + +static int +mlxsw_core_health_fw_fatal_dump_crspace_to(const char *mfde_pl, + struct devlink_fmsg *fmsg) +{ + u32 val; + int err; + + val = mlxsw_reg_mfde_crspace_to_log_address_get(mfde_pl); + err = devlink_fmsg_u32_pair_put(fmsg, "log_address", val); + if (err) + return err; + val = mlxsw_reg_mfde_crspace_to_oe_get(mfde_pl); + err = devlink_fmsg_bool_pair_put(fmsg, "old_event", val); + if (err) + return err; + val = mlxsw_reg_mfde_crspace_to_log_id_get(mfde_pl); + err = devlink_fmsg_u8_pair_put(fmsg, "log_irisc_id", val); + if (err) + return err; + val = mlxsw_reg_mfde_crspace_to_log_ip_get(mfde_pl); + err = devlink_fmsg_u64_pair_put(fmsg, "log_ip", val); + if (err) + return err; + + return 0; +} + +static int mlxsw_core_health_fw_fatal_dump(struct devlink_health_reporter *reporter, + struct devlink_fmsg *fmsg, void *priv_ctx, + struct netlink_ext_ack *extack) +{ + char *mfde_pl = priv_ctx; + char *val_str; + u8 event_id; + u32 val; + int err; + + if (!priv_ctx) + /* User-triggered dumps are not possible */ + return -EOPNOTSUPP; + + val = mlxsw_reg_mfde_irisc_id_get(mfde_pl); + err = devlink_fmsg_u8_pair_put(fmsg, "irisc_id", val); + if (err) + return err; + err = devlink_fmsg_arr_pair_nest_start(fmsg, "event"); + if (err) + return err; + + event_id = mlxsw_reg_mfde_event_id_get(mfde_pl); + err = devlink_fmsg_u32_pair_put(fmsg, "id", event_id); + if (err) + return err; + switch (event_id) { + case MLXSW_REG_MFDE_EVENT_ID_CRSPACE_TO: + val_str = "CR space timeout"; + break; + case MLXSW_REG_MFDE_EVENT_ID_KVD_IM_STOP: + val_str = "KVD insertion machine stopped"; + break; + case MLXSW_REG_MFDE_EVENT_ID_TEST: + val_str = "Test"; + break; + case MLXSW_REG_MFDE_EVENT_ID_FW_ASSERT: + val_str = "FW assert"; + break; + case MLXSW_REG_MFDE_EVENT_ID_FATAL_CAUSE: + val_str = "Fatal cause"; + break; + default: + val_str = NULL; + } + if (val_str) { + err = devlink_fmsg_string_pair_put(fmsg, "desc", val_str); + if (err) + return err; + } + + err = devlink_fmsg_arr_pair_nest_end(fmsg); + if (err) + return err; + + err = devlink_fmsg_arr_pair_nest_start(fmsg, "severity"); + if (err) + return err; + + val = mlxsw_reg_mfde_severity_get(mfde_pl); + err = devlink_fmsg_u8_pair_put(fmsg, "id", val); + if (err) + return err; + switch (val) { + case MLXSW_REG_MFDE_SEVERITY_FATL: + val_str = "Fatal"; + break; + case MLXSW_REG_MFDE_SEVERITY_NRML: + val_str = "Normal"; + break; + case MLXSW_REG_MFDE_SEVERITY_INTR: + val_str = "Debug"; + break; + default: + val_str = NULL; + } + if (val_str) { + err = devlink_fmsg_string_pair_put(fmsg, "desc", val_str); + if (err) + return err; + } + + err = devlink_fmsg_arr_pair_nest_end(fmsg); + if (err) + return err; + + val = mlxsw_reg_mfde_method_get(mfde_pl); + switch (val) { + case MLXSW_REG_MFDE_METHOD_QUERY: + val_str = "query"; + break; + case MLXSW_REG_MFDE_METHOD_WRITE: + val_str = "write"; + break; + default: + val_str = NULL; + } + if (val_str) { + err = devlink_fmsg_string_pair_put(fmsg, "method", val_str); + if (err) + return err; + } + + val = mlxsw_reg_mfde_long_process_get(mfde_pl); + err = devlink_fmsg_bool_pair_put(fmsg, "long_process", val); + if (err) + return err; + + val = mlxsw_reg_mfde_command_type_get(mfde_pl); + switch (val) { + case MLXSW_REG_MFDE_COMMAND_TYPE_MAD: + val_str = "mad"; + break; + case MLXSW_REG_MFDE_COMMAND_TYPE_EMAD: + val_str = "emad"; + break; + case MLXSW_REG_MFDE_COMMAND_TYPE_CMDIF: + val_str = "cmdif"; + break; + default: + val_str = NULL; + } + if (val_str) { + err = devlink_fmsg_string_pair_put(fmsg, "command_type", val_str); + if (err) + return err; + } + + val = mlxsw_reg_mfde_reg_attr_id_get(mfde_pl); + err = devlink_fmsg_u32_pair_put(fmsg, "reg_attr_id", val); + if (err) + return err; + + switch (event_id) { + case MLXSW_REG_MFDE_EVENT_ID_CRSPACE_TO: + return mlxsw_core_health_fw_fatal_dump_crspace_to(mfde_pl, + fmsg); + case MLXSW_REG_MFDE_EVENT_ID_KVD_IM_STOP: + return mlxsw_core_health_fw_fatal_dump_kvd_im_stop(mfde_pl, + fmsg); + case MLXSW_REG_MFDE_EVENT_ID_FW_ASSERT: + return mlxsw_core_health_fw_fatal_dump_fw_assert(mfde_pl, fmsg); + case MLXSW_REG_MFDE_EVENT_ID_FATAL_CAUSE: + return mlxsw_core_health_fw_fatal_dump_fatal_cause(mfde_pl, + fmsg); + } + + return 0; +} + +static int +mlxsw_core_health_fw_fatal_test(struct devlink_health_reporter *reporter, + struct netlink_ext_ack *extack) +{ + struct mlxsw_core *mlxsw_core = devlink_health_reporter_priv(reporter); + char mfgd_pl[MLXSW_REG_MFGD_LEN]; + int err; + + /* Read the register first to make sure no other bits are changed. */ + err = mlxsw_reg_query(mlxsw_core, MLXSW_REG(mfgd), mfgd_pl); + if (err) + return err; + mlxsw_reg_mfgd_trigger_test_set(mfgd_pl, true); + return mlxsw_reg_write(mlxsw_core, MLXSW_REG(mfgd), mfgd_pl); +} + +static const struct devlink_health_reporter_ops +mlxsw_core_health_fw_fatal_ops = { + .name = "fw_fatal", + .dump = mlxsw_core_health_fw_fatal_dump, + .test = mlxsw_core_health_fw_fatal_test, +}; + +static int mlxsw_core_health_fw_fatal_config(struct mlxsw_core *mlxsw_core, + bool enable) +{ + char mfgd_pl[MLXSW_REG_MFGD_LEN]; + int err; + + /* Read the register first to make sure no other bits are changed. */ + err = mlxsw_reg_query(mlxsw_core, MLXSW_REG(mfgd), mfgd_pl); + if (err) + return err; + mlxsw_reg_mfgd_fatal_event_mode_set(mfgd_pl, enable); + return mlxsw_reg_write(mlxsw_core, MLXSW_REG(mfgd), mfgd_pl); +} + +static int mlxsw_core_health_init(struct mlxsw_core *mlxsw_core) +{ + struct devlink *devlink = priv_to_devlink(mlxsw_core); + struct devlink_health_reporter *fw_fatal; + int err; + + if (!(mlxsw_core->bus->features & MLXSW_BUS_F_TXRX)) + return 0; + + fw_fatal = devlink_health_reporter_create(devlink, &mlxsw_core_health_fw_fatal_ops, + 0, mlxsw_core); + if (IS_ERR(fw_fatal)) { + dev_err(mlxsw_core->bus_info->dev, "Failed to create fw fatal reporter"); + return PTR_ERR(fw_fatal); + } + mlxsw_core->health.fw_fatal = fw_fatal; + + err = mlxsw_core_trap_register(mlxsw_core, &mlxsw_core_health_listener, mlxsw_core); + if (err) + goto err_trap_register; + + err = mlxsw_core_health_fw_fatal_config(mlxsw_core, true); + if (err) + goto err_fw_fatal_config; + + return 0; + +err_fw_fatal_config: + mlxsw_core_trap_unregister(mlxsw_core, &mlxsw_core_health_listener, mlxsw_core); +err_trap_register: + devlink_health_reporter_destroy(mlxsw_core->health.fw_fatal); + return err; +} + +static void mlxsw_core_health_fini(struct mlxsw_core *mlxsw_core) +{ + if (!(mlxsw_core->bus->features & MLXSW_BUS_F_TXRX)) + return; + + mlxsw_core_health_fw_fatal_config(mlxsw_core, false); + mlxsw_core_trap_unregister(mlxsw_core, &mlxsw_core_health_listener, mlxsw_core); + /* Make sure there is no more event work scheduled */ + mlxsw_core_flush_owq(); + devlink_health_reporter_destroy(mlxsw_core->health.fw_fatal); +} + +static void mlxsw_core_irq_event_handler_init(struct mlxsw_core *mlxsw_core) +{ + INIT_LIST_HEAD(&mlxsw_core->irq_event_handler_list); + mutex_init(&mlxsw_core->irq_event_handler_lock); +} + +static void mlxsw_core_irq_event_handler_fini(struct mlxsw_core *mlxsw_core) +{ + mutex_destroy(&mlxsw_core->irq_event_handler_lock); + WARN_ON(!list_empty(&mlxsw_core->irq_event_handler_list)); +} + +static int +__mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info, + const struct mlxsw_bus *mlxsw_bus, + void *bus_priv, bool reload, + struct devlink *devlink, + struct netlink_ext_ack *extack) +{ + const char *device_kind = mlxsw_bus_info->device_kind; + struct mlxsw_core *mlxsw_core; + struct mlxsw_driver *mlxsw_driver; + size_t alloc_size; + u16 max_lag; + int err; + + mlxsw_driver = mlxsw_core_driver_get(device_kind); + if (!mlxsw_driver) + return -EINVAL; + + if (!reload) { + alloc_size = sizeof(*mlxsw_core) + mlxsw_driver->priv_size; + devlink = devlink_alloc(&mlxsw_devlink_ops, alloc_size, + mlxsw_bus_info->dev); + if (!devlink) { + err = -ENOMEM; + goto err_devlink_alloc; + } + devl_lock(devlink); + } + + mlxsw_core = devlink_priv(devlink); + INIT_LIST_HEAD(&mlxsw_core->rx_listener_list); + INIT_LIST_HEAD(&mlxsw_core->event_listener_list); + mlxsw_core->driver = mlxsw_driver; + mlxsw_core->bus = mlxsw_bus; + mlxsw_core->bus_priv = bus_priv; + mlxsw_core->bus_info = mlxsw_bus_info; + mlxsw_core_irq_event_handler_init(mlxsw_core); + + err = mlxsw_bus->init(bus_priv, mlxsw_core, mlxsw_driver->profile, + &mlxsw_core->res); + if (err) + goto err_bus_init; + + if (mlxsw_driver->resources_register && !reload) { + err = mlxsw_driver->resources_register(mlxsw_core); + if (err) + goto err_register_resources; + } + + err = mlxsw_ports_init(mlxsw_core, reload); + if (err) + goto err_ports_init; + + err = mlxsw_core_max_lag(mlxsw_core, &max_lag); + if (!err && MLXSW_CORE_RES_VALID(mlxsw_core, MAX_LAG_MEMBERS)) { + alloc_size = sizeof(*mlxsw_core->lag.mapping) * max_lag * + MLXSW_CORE_RES_GET(mlxsw_core, MAX_LAG_MEMBERS); + mlxsw_core->lag.mapping = kzalloc(alloc_size, GFP_KERNEL); + if (!mlxsw_core->lag.mapping) { + err = -ENOMEM; + goto err_alloc_lag_mapping; + } + } + + err = mlxsw_core_trap_groups_set(mlxsw_core); + if (err) + goto err_trap_groups_set; + + err = mlxsw_emad_init(mlxsw_core); + if (err) + goto err_emad_init; + + if (!reload) { + err = mlxsw_core_params_register(mlxsw_core); + if (err) + goto err_register_params; + } + + err = mlxsw_core_fw_rev_validate(mlxsw_core, mlxsw_bus_info, mlxsw_driver->fw_req_rev, + mlxsw_driver->fw_filename); + if (err) + goto err_fw_rev_validate; + + err = mlxsw_linecards_init(mlxsw_core, mlxsw_bus_info); + if (err) + goto err_linecards_init; + + err = mlxsw_core_health_init(mlxsw_core); + if (err) + goto err_health_init; + + err = mlxsw_hwmon_init(mlxsw_core, mlxsw_bus_info, &mlxsw_core->hwmon); + if (err) + goto err_hwmon_init; + + err = mlxsw_thermal_init(mlxsw_core, mlxsw_bus_info, + &mlxsw_core->thermal); + if (err) + goto err_thermal_init; + + err = mlxsw_env_init(mlxsw_core, mlxsw_bus_info, &mlxsw_core->env); + if (err) + goto err_env_init; + + if (mlxsw_driver->init) { + err = mlxsw_driver->init(mlxsw_core, mlxsw_bus_info, extack); + if (err) + goto err_driver_init; + } + + if (!reload) { + devlink_set_features(devlink, DEVLINK_F_RELOAD); + devl_unlock(devlink); + devlink_register(devlink); + } + return 0; + +err_driver_init: + mlxsw_env_fini(mlxsw_core->env); +err_env_init: + mlxsw_thermal_fini(mlxsw_core->thermal); +err_thermal_init: + mlxsw_hwmon_fini(mlxsw_core->hwmon); +err_hwmon_init: + mlxsw_core_health_fini(mlxsw_core); +err_health_init: + mlxsw_linecards_fini(mlxsw_core); +err_linecards_init: +err_fw_rev_validate: + if (!reload) + mlxsw_core_params_unregister(mlxsw_core); +err_register_params: + mlxsw_emad_fini(mlxsw_core); +err_emad_init: +err_trap_groups_set: + kfree(mlxsw_core->lag.mapping); +err_alloc_lag_mapping: + mlxsw_ports_fini(mlxsw_core, reload); +err_ports_init: + if (!reload) + devl_resources_unregister(devlink); +err_register_resources: + mlxsw_bus->fini(bus_priv); +err_bus_init: + mlxsw_core_irq_event_handler_fini(mlxsw_core); + if (!reload) { + devl_unlock(devlink); + devlink_free(devlink); + } +err_devlink_alloc: + return err; +} + +int mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info, + const struct mlxsw_bus *mlxsw_bus, + void *bus_priv, bool reload, + struct devlink *devlink, + struct netlink_ext_ack *extack) +{ + bool called_again = false; + int err; + +again: + err = __mlxsw_core_bus_device_register(mlxsw_bus_info, mlxsw_bus, + bus_priv, reload, + devlink, extack); + /* -EAGAIN is returned in case the FW was updated. FW needs + * a reset, so lets try to call __mlxsw_core_bus_device_register() + * again. + */ + if (err == -EAGAIN && !called_again) { + called_again = true; + goto again; + } + + return err; +} +EXPORT_SYMBOL(mlxsw_core_bus_device_register); + +void mlxsw_core_bus_device_unregister(struct mlxsw_core *mlxsw_core, + bool reload) +{ + struct devlink *devlink = priv_to_devlink(mlxsw_core); + + if (!reload) { + devlink_unregister(devlink); + devl_lock(devlink); + } + + if (devlink_is_reload_failed(devlink)) { + if (!reload) + /* Only the parts that were not de-initialized in the + * failed reload attempt need to be de-initialized. + */ + goto reload_fail_deinit; + else + return; + } + + if (mlxsw_core->driver->fini) + mlxsw_core->driver->fini(mlxsw_core); + mlxsw_env_fini(mlxsw_core->env); + mlxsw_thermal_fini(mlxsw_core->thermal); + mlxsw_hwmon_fini(mlxsw_core->hwmon); + mlxsw_core_health_fini(mlxsw_core); + mlxsw_linecards_fini(mlxsw_core); + if (!reload) + mlxsw_core_params_unregister(mlxsw_core); + mlxsw_emad_fini(mlxsw_core); + kfree(mlxsw_core->lag.mapping); + mlxsw_ports_fini(mlxsw_core, reload); + if (!reload) + devl_resources_unregister(devlink); + mlxsw_core->bus->fini(mlxsw_core->bus_priv); + mlxsw_core_irq_event_handler_fini(mlxsw_core); + if (!reload) { + devl_unlock(devlink); + devlink_free(devlink); + } + + return; + +reload_fail_deinit: + mlxsw_core_params_unregister(mlxsw_core); + devl_resources_unregister(devlink); + devl_unlock(devlink); + devlink_free(devlink); +} +EXPORT_SYMBOL(mlxsw_core_bus_device_unregister); + +bool mlxsw_core_skb_transmit_busy(struct mlxsw_core *mlxsw_core, + const struct mlxsw_tx_info *tx_info) +{ + return mlxsw_core->bus->skb_transmit_busy(mlxsw_core->bus_priv, + tx_info); +} +EXPORT_SYMBOL(mlxsw_core_skb_transmit_busy); + +int mlxsw_core_skb_transmit(struct mlxsw_core *mlxsw_core, struct sk_buff *skb, + const struct mlxsw_tx_info *tx_info) +{ + return mlxsw_core->bus->skb_transmit(mlxsw_core->bus_priv, skb, + tx_info); +} +EXPORT_SYMBOL(mlxsw_core_skb_transmit); + +void mlxsw_core_ptp_transmitted(struct mlxsw_core *mlxsw_core, + struct sk_buff *skb, u16 local_port) +{ + if (mlxsw_core->driver->ptp_transmitted) + mlxsw_core->driver->ptp_transmitted(mlxsw_core, skb, + local_port); +} +EXPORT_SYMBOL(mlxsw_core_ptp_transmitted); + +static bool __is_rx_listener_equal(const struct mlxsw_rx_listener *rxl_a, + const struct mlxsw_rx_listener *rxl_b) +{ + return (rxl_a->func == rxl_b->func && + rxl_a->local_port == rxl_b->local_port && + rxl_a->trap_id == rxl_b->trap_id && + rxl_a->mirror_reason == rxl_b->mirror_reason); +} + +static struct mlxsw_rx_listener_item * +__find_rx_listener_item(struct mlxsw_core *mlxsw_core, + const struct mlxsw_rx_listener *rxl) +{ + struct mlxsw_rx_listener_item *rxl_item; + + list_for_each_entry(rxl_item, &mlxsw_core->rx_listener_list, list) { + if (__is_rx_listener_equal(&rxl_item->rxl, rxl)) + return rxl_item; + } + return NULL; +} + +int mlxsw_core_rx_listener_register(struct mlxsw_core *mlxsw_core, + const struct mlxsw_rx_listener *rxl, + void *priv, bool enabled) +{ + struct mlxsw_rx_listener_item *rxl_item; + + rxl_item = __find_rx_listener_item(mlxsw_core, rxl); + if (rxl_item) + return -EEXIST; + rxl_item = kmalloc(sizeof(*rxl_item), GFP_KERNEL); + if (!rxl_item) + return -ENOMEM; + rxl_item->rxl = *rxl; + rxl_item->priv = priv; + rxl_item->enabled = enabled; + + list_add_rcu(&rxl_item->list, &mlxsw_core->rx_listener_list); + return 0; +} +EXPORT_SYMBOL(mlxsw_core_rx_listener_register); + +void mlxsw_core_rx_listener_unregister(struct mlxsw_core *mlxsw_core, + const struct mlxsw_rx_listener *rxl) +{ + struct mlxsw_rx_listener_item *rxl_item; + + rxl_item = __find_rx_listener_item(mlxsw_core, rxl); + if (!rxl_item) + return; + list_del_rcu(&rxl_item->list); + synchronize_rcu(); + kfree(rxl_item); +} +EXPORT_SYMBOL(mlxsw_core_rx_listener_unregister); + +static void +mlxsw_core_rx_listener_state_set(struct mlxsw_core *mlxsw_core, + const struct mlxsw_rx_listener *rxl, + bool enabled) +{ + struct mlxsw_rx_listener_item *rxl_item; + + rxl_item = __find_rx_listener_item(mlxsw_core, rxl); + if (WARN_ON(!rxl_item)) + return; + rxl_item->enabled = enabled; +} + +static void mlxsw_core_event_listener_func(struct sk_buff *skb, u16 local_port, + void *priv) +{ + struct mlxsw_event_listener_item *event_listener_item = priv; + struct mlxsw_core *mlxsw_core; + struct mlxsw_reg_info reg; + char *payload; + char *reg_tlv; + char *op_tlv; + + mlxsw_core = event_listener_item->mlxsw_core; + trace_devlink_hwmsg(priv_to_devlink(mlxsw_core), true, 0, + skb->data, skb->len); + + mlxsw_emad_tlv_parse(skb); + op_tlv = mlxsw_emad_op_tlv(skb); + reg_tlv = mlxsw_emad_reg_tlv(skb); + + reg.id = mlxsw_emad_op_tlv_register_id_get(op_tlv); + reg.len = (mlxsw_emad_reg_tlv_len_get(reg_tlv) - 1) * sizeof(u32); + payload = mlxsw_emad_reg_payload(reg_tlv); + event_listener_item->el.func(®, payload, event_listener_item->priv); + dev_kfree_skb(skb); +} + +static bool __is_event_listener_equal(const struct mlxsw_event_listener *el_a, + const struct mlxsw_event_listener *el_b) +{ + return (el_a->func == el_b->func && + el_a->trap_id == el_b->trap_id); +} + +static struct mlxsw_event_listener_item * +__find_event_listener_item(struct mlxsw_core *mlxsw_core, + const struct mlxsw_event_listener *el) +{ + struct mlxsw_event_listener_item *el_item; + + list_for_each_entry(el_item, &mlxsw_core->event_listener_list, list) { + if (__is_event_listener_equal(&el_item->el, el)) + return el_item; + } + return NULL; +} + +int mlxsw_core_event_listener_register(struct mlxsw_core *mlxsw_core, + const struct mlxsw_event_listener *el, + void *priv) +{ + int err; + struct mlxsw_event_listener_item *el_item; + const struct mlxsw_rx_listener rxl = { + .func = mlxsw_core_event_listener_func, + .local_port = MLXSW_PORT_DONT_CARE, + .trap_id = el->trap_id, + }; + + el_item = __find_event_listener_item(mlxsw_core, el); + if (el_item) + return -EEXIST; + el_item = kmalloc(sizeof(*el_item), GFP_KERNEL); + if (!el_item) + return -ENOMEM; + el_item->mlxsw_core = mlxsw_core; + el_item->el = *el; + el_item->priv = priv; + + err = mlxsw_core_rx_listener_register(mlxsw_core, &rxl, el_item, true); + if (err) + goto err_rx_listener_register; + + /* No reason to save item if we did not manage to register an RX + * listener for it. + */ + list_add_rcu(&el_item->list, &mlxsw_core->event_listener_list); + + return 0; + +err_rx_listener_register: + kfree(el_item); + return err; +} +EXPORT_SYMBOL(mlxsw_core_event_listener_register); + +void mlxsw_core_event_listener_unregister(struct mlxsw_core *mlxsw_core, + const struct mlxsw_event_listener *el) +{ + struct mlxsw_event_listener_item *el_item; + const struct mlxsw_rx_listener rxl = { + .func = mlxsw_core_event_listener_func, + .local_port = MLXSW_PORT_DONT_CARE, + .trap_id = el->trap_id, + }; + + el_item = __find_event_listener_item(mlxsw_core, el); + if (!el_item) + return; + mlxsw_core_rx_listener_unregister(mlxsw_core, &rxl); + list_del(&el_item->list); + kfree(el_item); +} +EXPORT_SYMBOL(mlxsw_core_event_listener_unregister); + +static int mlxsw_core_listener_register(struct mlxsw_core *mlxsw_core, + const struct mlxsw_listener *listener, + void *priv, bool enabled) +{ + if (listener->is_event) { + WARN_ON(!enabled); + return mlxsw_core_event_listener_register(mlxsw_core, + &listener->event_listener, + priv); + } else { + return mlxsw_core_rx_listener_register(mlxsw_core, + &listener->rx_listener, + priv, enabled); + } +} + +static void mlxsw_core_listener_unregister(struct mlxsw_core *mlxsw_core, + const struct mlxsw_listener *listener, + void *priv) +{ + if (listener->is_event) + mlxsw_core_event_listener_unregister(mlxsw_core, + &listener->event_listener); + else + mlxsw_core_rx_listener_unregister(mlxsw_core, + &listener->rx_listener); +} + +int mlxsw_core_trap_register(struct mlxsw_core *mlxsw_core, + const struct mlxsw_listener *listener, void *priv) +{ + enum mlxsw_reg_htgt_trap_group trap_group; + enum mlxsw_reg_hpkt_action action; + char hpkt_pl[MLXSW_REG_HPKT_LEN]; + int err; + + if (!(mlxsw_core->bus->features & MLXSW_BUS_F_TXRX)) + return 0; + + err = mlxsw_core_listener_register(mlxsw_core, listener, priv, + listener->enabled_on_register); + if (err) + return err; + + action = listener->enabled_on_register ? listener->en_action : + listener->dis_action; + trap_group = listener->enabled_on_register ? listener->en_trap_group : + listener->dis_trap_group; + mlxsw_reg_hpkt_pack(hpkt_pl, action, listener->trap_id, + trap_group, listener->is_ctrl); + err = mlxsw_reg_write(mlxsw_core, MLXSW_REG(hpkt), hpkt_pl); + if (err) + goto err_trap_set; + + return 0; + +err_trap_set: + mlxsw_core_listener_unregister(mlxsw_core, listener, priv); + return err; +} +EXPORT_SYMBOL(mlxsw_core_trap_register); + +void mlxsw_core_trap_unregister(struct mlxsw_core *mlxsw_core, + const struct mlxsw_listener *listener, + void *priv) +{ + char hpkt_pl[MLXSW_REG_HPKT_LEN]; + + if (!(mlxsw_core->bus->features & MLXSW_BUS_F_TXRX)) + return; + + if (!listener->is_event) { + mlxsw_reg_hpkt_pack(hpkt_pl, listener->dis_action, + listener->trap_id, listener->dis_trap_group, + listener->is_ctrl); + mlxsw_reg_write(mlxsw_core, MLXSW_REG(hpkt), hpkt_pl); + } + + mlxsw_core_listener_unregister(mlxsw_core, listener, priv); +} +EXPORT_SYMBOL(mlxsw_core_trap_unregister); + +int mlxsw_core_traps_register(struct mlxsw_core *mlxsw_core, + const struct mlxsw_listener *listeners, + size_t listeners_count, void *priv) +{ + int i, err; + + for (i = 0; i < listeners_count; i++) { + err = mlxsw_core_trap_register(mlxsw_core, + &listeners[i], + priv); + if (err) + goto err_listener_register; + } + return 0; + +err_listener_register: + for (i--; i >= 0; i--) { + mlxsw_core_trap_unregister(mlxsw_core, + &listeners[i], + priv); + } + return err; +} +EXPORT_SYMBOL(mlxsw_core_traps_register); + +void mlxsw_core_traps_unregister(struct mlxsw_core *mlxsw_core, + const struct mlxsw_listener *listeners, + size_t listeners_count, void *priv) +{ + int i; + + for (i = 0; i < listeners_count; i++) { + mlxsw_core_trap_unregister(mlxsw_core, + &listeners[i], + priv); + } +} +EXPORT_SYMBOL(mlxsw_core_traps_unregister); + +int mlxsw_core_trap_state_set(struct mlxsw_core *mlxsw_core, + const struct mlxsw_listener *listener, + bool enabled) +{ + enum mlxsw_reg_htgt_trap_group trap_group; + enum mlxsw_reg_hpkt_action action; + char hpkt_pl[MLXSW_REG_HPKT_LEN]; + int err; + + /* Not supported for event listener */ + if (WARN_ON(listener->is_event)) + return -EINVAL; + + action = enabled ? listener->en_action : listener->dis_action; + trap_group = enabled ? listener->en_trap_group : + listener->dis_trap_group; + mlxsw_reg_hpkt_pack(hpkt_pl, action, listener->trap_id, + trap_group, listener->is_ctrl); + err = mlxsw_reg_write(mlxsw_core, MLXSW_REG(hpkt), hpkt_pl); + if (err) + return err; + + mlxsw_core_rx_listener_state_set(mlxsw_core, &listener->rx_listener, + enabled); + return 0; +} +EXPORT_SYMBOL(mlxsw_core_trap_state_set); + +static u64 mlxsw_core_tid_get(struct mlxsw_core *mlxsw_core) +{ + return atomic64_inc_return(&mlxsw_core->emad.tid); +} + +static int mlxsw_core_reg_access_emad(struct mlxsw_core *mlxsw_core, + const struct mlxsw_reg_info *reg, + char *payload, + enum mlxsw_core_reg_access_type type, + struct list_head *bulk_list, + mlxsw_reg_trans_cb_t *cb, + unsigned long cb_priv) +{ + u64 tid = mlxsw_core_tid_get(mlxsw_core); + struct mlxsw_reg_trans *trans; + int err; + + trans = kzalloc(sizeof(*trans), GFP_KERNEL); + if (!trans) + return -ENOMEM; + + err = mlxsw_emad_reg_access(mlxsw_core, reg, payload, type, trans, + bulk_list, cb, cb_priv, tid); + if (err) { + kfree_rcu(trans, rcu); + return err; + } + return 0; +} + +int mlxsw_reg_trans_query(struct mlxsw_core *mlxsw_core, + const struct mlxsw_reg_info *reg, char *payload, + struct list_head *bulk_list, + mlxsw_reg_trans_cb_t *cb, unsigned long cb_priv) +{ + return mlxsw_core_reg_access_emad(mlxsw_core, reg, payload, + MLXSW_CORE_REG_ACCESS_TYPE_QUERY, + bulk_list, cb, cb_priv); +} +EXPORT_SYMBOL(mlxsw_reg_trans_query); + +int mlxsw_reg_trans_write(struct mlxsw_core *mlxsw_core, + const struct mlxsw_reg_info *reg, char *payload, + struct list_head *bulk_list, + mlxsw_reg_trans_cb_t *cb, unsigned long cb_priv) +{ + return mlxsw_core_reg_access_emad(mlxsw_core, reg, payload, + MLXSW_CORE_REG_ACCESS_TYPE_WRITE, + bulk_list, cb, cb_priv); +} +EXPORT_SYMBOL(mlxsw_reg_trans_write); + +#define MLXSW_REG_TRANS_ERR_STRING_SIZE 256 + +static int mlxsw_reg_trans_wait(struct mlxsw_reg_trans *trans) +{ + char err_string[MLXSW_REG_TRANS_ERR_STRING_SIZE]; + struct mlxsw_core *mlxsw_core = trans->core; + int err; + + wait_for_completion(&trans->completion); + cancel_delayed_work_sync(&trans->timeout_dw); + err = trans->err; + + if (trans->retries) + dev_warn(mlxsw_core->bus_info->dev, "EMAD retries (%d/%d) (tid=%llx)\n", + trans->retries, MLXSW_EMAD_MAX_RETRY, trans->tid); + if (err) { + dev_err(mlxsw_core->bus_info->dev, "EMAD reg access failed (tid=%llx,reg_id=%x(%s),type=%s,status=%x(%s))\n", + trans->tid, trans->reg->id, + mlxsw_reg_id_str(trans->reg->id), + mlxsw_core_reg_access_type_str(trans->type), + trans->emad_status, + mlxsw_emad_op_tlv_status_str(trans->emad_status)); + + snprintf(err_string, MLXSW_REG_TRANS_ERR_STRING_SIZE, + "(tid=%llx,reg_id=%x(%s)) %s (%s)\n", trans->tid, + trans->reg->id, mlxsw_reg_id_str(trans->reg->id), + mlxsw_emad_op_tlv_status_str(trans->emad_status), + trans->emad_err_string ? trans->emad_err_string : ""); + + trace_devlink_hwerr(priv_to_devlink(mlxsw_core), + trans->emad_status, err_string); + + kfree(trans->emad_err_string); + } + + list_del(&trans->bulk_list); + kfree_rcu(trans, rcu); + return err; +} + +int mlxsw_reg_trans_bulk_wait(struct list_head *bulk_list) +{ + struct mlxsw_reg_trans *trans; + struct mlxsw_reg_trans *tmp; + int sum_err = 0; + int err; + + list_for_each_entry_safe(trans, tmp, bulk_list, bulk_list) { + err = mlxsw_reg_trans_wait(trans); + if (err && sum_err == 0) + sum_err = err; /* first error to be returned */ + } + return sum_err; +} +EXPORT_SYMBOL(mlxsw_reg_trans_bulk_wait); + +struct mlxsw_core_irq_event_handler_item { + struct list_head list; + void (*cb)(struct mlxsw_core *mlxsw_core); +}; + +int mlxsw_core_irq_event_handler_register(struct mlxsw_core *mlxsw_core, + mlxsw_irq_event_cb_t cb) +{ + struct mlxsw_core_irq_event_handler_item *item; + + item = kzalloc(sizeof(*item), GFP_KERNEL); + if (!item) + return -ENOMEM; + item->cb = cb; + mutex_lock(&mlxsw_core->irq_event_handler_lock); + list_add_tail(&item->list, &mlxsw_core->irq_event_handler_list); + mutex_unlock(&mlxsw_core->irq_event_handler_lock); + return 0; +} +EXPORT_SYMBOL(mlxsw_core_irq_event_handler_register); + +void mlxsw_core_irq_event_handler_unregister(struct mlxsw_core *mlxsw_core, + mlxsw_irq_event_cb_t cb) +{ + struct mlxsw_core_irq_event_handler_item *item, *tmp; + + mutex_lock(&mlxsw_core->irq_event_handler_lock); + list_for_each_entry_safe(item, tmp, + &mlxsw_core->irq_event_handler_list, list) { + if (item->cb == cb) { + list_del(&item->list); + kfree(item); + } + } + mutex_unlock(&mlxsw_core->irq_event_handler_lock); +} +EXPORT_SYMBOL(mlxsw_core_irq_event_handler_unregister); + +void mlxsw_core_irq_event_handlers_call(struct mlxsw_core *mlxsw_core) +{ + struct mlxsw_core_irq_event_handler_item *item; + + mutex_lock(&mlxsw_core->irq_event_handler_lock); + list_for_each_entry(item, &mlxsw_core->irq_event_handler_list, list) { + if (item->cb) + item->cb(mlxsw_core); + } + mutex_unlock(&mlxsw_core->irq_event_handler_lock); +} +EXPORT_SYMBOL(mlxsw_core_irq_event_handlers_call); + +static int mlxsw_core_reg_access_cmd(struct mlxsw_core *mlxsw_core, + const struct mlxsw_reg_info *reg, + char *payload, + enum mlxsw_core_reg_access_type type) +{ + enum mlxsw_emad_op_tlv_status status; + int err, n_retry; + bool reset_ok; + char *in_mbox, *out_mbox, *tmp; + + dev_dbg(mlxsw_core->bus_info->dev, "Reg cmd access (reg_id=%x(%s),type=%s)\n", + reg->id, mlxsw_reg_id_str(reg->id), + mlxsw_core_reg_access_type_str(type)); + + in_mbox = mlxsw_cmd_mbox_alloc(); + if (!in_mbox) + return -ENOMEM; + + out_mbox = mlxsw_cmd_mbox_alloc(); + if (!out_mbox) { + err = -ENOMEM; + goto free_in_mbox; + } + + mlxsw_emad_pack_op_tlv(in_mbox, reg, type, + mlxsw_core_tid_get(mlxsw_core)); + tmp = in_mbox + MLXSW_EMAD_OP_TLV_LEN * sizeof(u32); + mlxsw_emad_pack_reg_tlv(tmp, reg, payload); + + /* There is a special treatment needed for MRSR (reset) register. + * The command interface will return error after the command + * is executed, so tell the lower layer to expect it + * and cope accordingly. + */ + reset_ok = reg->id == MLXSW_REG_MRSR_ID; + + n_retry = 0; +retry: + err = mlxsw_cmd_access_reg(mlxsw_core, reset_ok, in_mbox, out_mbox); + if (!err) { + err = mlxsw_emad_process_status(out_mbox, &status); + if (err) { + if (err == -EAGAIN && n_retry++ < MLXSW_EMAD_MAX_RETRY) + goto retry; + dev_err(mlxsw_core->bus_info->dev, "Reg cmd access status failed (status=%x(%s))\n", + status, mlxsw_emad_op_tlv_status_str(status)); + } + } + + if (!err) + memcpy(payload, mlxsw_emad_reg_payload_cmd(out_mbox), + reg->len); + + mlxsw_cmd_mbox_free(out_mbox); +free_in_mbox: + mlxsw_cmd_mbox_free(in_mbox); + if (err) + dev_err(mlxsw_core->bus_info->dev, "Reg cmd access failed (reg_id=%x(%s),type=%s)\n", + reg->id, mlxsw_reg_id_str(reg->id), + mlxsw_core_reg_access_type_str(type)); + return err; +} + +static void mlxsw_core_reg_access_cb(struct mlxsw_core *mlxsw_core, + char *payload, size_t payload_len, + unsigned long cb_priv) +{ + char *orig_payload = (char *) cb_priv; + + memcpy(orig_payload, payload, payload_len); +} + +static int mlxsw_core_reg_access(struct mlxsw_core *mlxsw_core, + const struct mlxsw_reg_info *reg, + char *payload, + enum mlxsw_core_reg_access_type type) +{ + LIST_HEAD(bulk_list); + int err; + + /* During initialization EMAD interface is not available to us, + * so we default to command interface. We switch to EMAD interface + * after setting the appropriate traps. + */ + if (!mlxsw_core->emad.use_emad) + return mlxsw_core_reg_access_cmd(mlxsw_core, reg, + payload, type); + + err = mlxsw_core_reg_access_emad(mlxsw_core, reg, + payload, type, &bulk_list, + mlxsw_core_reg_access_cb, + (unsigned long) payload); + if (err) + return err; + return mlxsw_reg_trans_bulk_wait(&bulk_list); +} + +int mlxsw_reg_query(struct mlxsw_core *mlxsw_core, + const struct mlxsw_reg_info *reg, char *payload) +{ + return mlxsw_core_reg_access(mlxsw_core, reg, payload, + MLXSW_CORE_REG_ACCESS_TYPE_QUERY); +} +EXPORT_SYMBOL(mlxsw_reg_query); + +int mlxsw_reg_write(struct mlxsw_core *mlxsw_core, + const struct mlxsw_reg_info *reg, char *payload) +{ + return mlxsw_core_reg_access(mlxsw_core, reg, payload, + MLXSW_CORE_REG_ACCESS_TYPE_WRITE); +} +EXPORT_SYMBOL(mlxsw_reg_write); + +void mlxsw_core_skb_receive(struct mlxsw_core *mlxsw_core, struct sk_buff *skb, + struct mlxsw_rx_info *rx_info) +{ + struct mlxsw_rx_listener_item *rxl_item; + const struct mlxsw_rx_listener *rxl; + u16 local_port; + bool found = false; + + if (rx_info->is_lag) { + dev_dbg_ratelimited(mlxsw_core->bus_info->dev, "%s: lag_id = %d, lag_port_index = 0x%x\n", + __func__, rx_info->u.lag_id, + rx_info->trap_id); + /* Upper layer does not care if the skb came from LAG or not, + * so just get the local_port for the lag port and push it up. + */ + local_port = mlxsw_core_lag_mapping_get(mlxsw_core, + rx_info->u.lag_id, + rx_info->lag_port_index); + } else { + local_port = rx_info->u.sys_port; + } + + dev_dbg_ratelimited(mlxsw_core->bus_info->dev, "%s: local_port = %d, trap_id = 0x%x\n", + __func__, local_port, rx_info->trap_id); + + if ((rx_info->trap_id >= MLXSW_TRAP_ID_MAX) || + (local_port >= mlxsw_core->max_ports)) + goto drop; + + rcu_read_lock(); + list_for_each_entry_rcu(rxl_item, &mlxsw_core->rx_listener_list, list) { + rxl = &rxl_item->rxl; + if ((rxl->local_port == MLXSW_PORT_DONT_CARE || + rxl->local_port == local_port) && + rxl->trap_id == rx_info->trap_id && + rxl->mirror_reason == rx_info->mirror_reason) { + if (rxl_item->enabled) + found = true; + break; + } + } + if (!found) { + rcu_read_unlock(); + goto drop; + } + + rxl->func(skb, local_port, rxl_item->priv); + rcu_read_unlock(); + return; + +drop: + dev_kfree_skb(skb); +} +EXPORT_SYMBOL(mlxsw_core_skb_receive); + +static int mlxsw_core_lag_mapping_index(struct mlxsw_core *mlxsw_core, + u16 lag_id, u8 port_index) +{ + return MLXSW_CORE_RES_GET(mlxsw_core, MAX_LAG_MEMBERS) * lag_id + + port_index; +} + +void mlxsw_core_lag_mapping_set(struct mlxsw_core *mlxsw_core, + u16 lag_id, u8 port_index, u16 local_port) +{ + int index = mlxsw_core_lag_mapping_index(mlxsw_core, + lag_id, port_index); + + mlxsw_core->lag.mapping[index] = local_port; +} +EXPORT_SYMBOL(mlxsw_core_lag_mapping_set); + +u16 mlxsw_core_lag_mapping_get(struct mlxsw_core *mlxsw_core, + u16 lag_id, u8 port_index) +{ + int index = mlxsw_core_lag_mapping_index(mlxsw_core, + lag_id, port_index); + + return mlxsw_core->lag.mapping[index]; +} +EXPORT_SYMBOL(mlxsw_core_lag_mapping_get); + +void mlxsw_core_lag_mapping_clear(struct mlxsw_core *mlxsw_core, + u16 lag_id, u16 local_port) +{ + int i; + + for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_core, MAX_LAG_MEMBERS); i++) { + int index = mlxsw_core_lag_mapping_index(mlxsw_core, + lag_id, i); + + if (mlxsw_core->lag.mapping[index] == local_port) + mlxsw_core->lag.mapping[index] = 0; + } +} +EXPORT_SYMBOL(mlxsw_core_lag_mapping_clear); + +bool mlxsw_core_res_valid(struct mlxsw_core *mlxsw_core, + enum mlxsw_res_id res_id) +{ + return mlxsw_res_valid(&mlxsw_core->res, res_id); +} +EXPORT_SYMBOL(mlxsw_core_res_valid); + +u64 mlxsw_core_res_get(struct mlxsw_core *mlxsw_core, + enum mlxsw_res_id res_id) +{ + return mlxsw_res_get(&mlxsw_core->res, res_id); +} +EXPORT_SYMBOL(mlxsw_core_res_get); + +static int __mlxsw_core_port_init(struct mlxsw_core *mlxsw_core, u16 local_port, + enum devlink_port_flavour flavour, + u8 slot_index, u32 port_number, bool split, + u32 split_port_subnumber, + bool splittable, u32 lanes, + const unsigned char *switch_id, + unsigned char switch_id_len) +{ + struct devlink *devlink = priv_to_devlink(mlxsw_core); + struct mlxsw_core_port *mlxsw_core_port = + &mlxsw_core->ports[local_port]; + struct devlink_port *devlink_port = &mlxsw_core_port->devlink_port; + struct devlink_port_attrs attrs = {}; + int err; + + attrs.split = split; + attrs.lanes = lanes; + attrs.splittable = splittable; + attrs.flavour = flavour; + attrs.phys.port_number = port_number; + attrs.phys.split_subport_number = split_port_subnumber; + memcpy(attrs.switch_id.id, switch_id, switch_id_len); + attrs.switch_id.id_len = switch_id_len; + mlxsw_core_port->local_port = local_port; + devlink_port_attrs_set(devlink_port, &attrs); + if (slot_index) { + struct mlxsw_linecard *linecard; + + linecard = mlxsw_linecard_get(mlxsw_core->linecards, + slot_index); + mlxsw_core_port->linecard = linecard; + devlink_port_linecard_set(devlink_port, + linecard->devlink_linecard); + } + err = devl_port_register(devlink, devlink_port, local_port); + if (err) + memset(mlxsw_core_port, 0, sizeof(*mlxsw_core_port)); + return err; +} + +static void __mlxsw_core_port_fini(struct mlxsw_core *mlxsw_core, u16 local_port) +{ + struct mlxsw_core_port *mlxsw_core_port = + &mlxsw_core->ports[local_port]; + struct devlink_port *devlink_port = &mlxsw_core_port->devlink_port; + + devl_port_unregister(devlink_port); + memset(mlxsw_core_port, 0, sizeof(*mlxsw_core_port)); +} + +int mlxsw_core_port_init(struct mlxsw_core *mlxsw_core, u16 local_port, + u8 slot_index, u32 port_number, bool split, + u32 split_port_subnumber, + bool splittable, u32 lanes, + const unsigned char *switch_id, + unsigned char switch_id_len) +{ + int err; + + err = __mlxsw_core_port_init(mlxsw_core, local_port, + DEVLINK_PORT_FLAVOUR_PHYSICAL, slot_index, + port_number, split, split_port_subnumber, + splittable, lanes, + switch_id, switch_id_len); + if (err) + return err; + + atomic_inc(&mlxsw_core->active_ports_count); + return 0; +} +EXPORT_SYMBOL(mlxsw_core_port_init); + +void mlxsw_core_port_fini(struct mlxsw_core *mlxsw_core, u16 local_port) +{ + atomic_dec(&mlxsw_core->active_ports_count); + + __mlxsw_core_port_fini(mlxsw_core, local_port); +} +EXPORT_SYMBOL(mlxsw_core_port_fini); + +int mlxsw_core_cpu_port_init(struct mlxsw_core *mlxsw_core, + void *port_driver_priv, + const unsigned char *switch_id, + unsigned char switch_id_len) +{ + struct mlxsw_core_port *mlxsw_core_port = + &mlxsw_core->ports[MLXSW_PORT_CPU_PORT]; + int err; + + err = __mlxsw_core_port_init(mlxsw_core, MLXSW_PORT_CPU_PORT, + DEVLINK_PORT_FLAVOUR_CPU, + 0, 0, false, 0, false, 0, + switch_id, switch_id_len); + if (err) + return err; + + mlxsw_core_port->port_driver_priv = port_driver_priv; + return 0; +} +EXPORT_SYMBOL(mlxsw_core_cpu_port_init); + +void mlxsw_core_cpu_port_fini(struct mlxsw_core *mlxsw_core) +{ + __mlxsw_core_port_fini(mlxsw_core, MLXSW_PORT_CPU_PORT); +} +EXPORT_SYMBOL(mlxsw_core_cpu_port_fini); + +void mlxsw_core_port_eth_set(struct mlxsw_core *mlxsw_core, u16 local_port, + void *port_driver_priv, struct net_device *dev) +{ + struct mlxsw_core_port *mlxsw_core_port = + &mlxsw_core->ports[local_port]; + struct devlink_port *devlink_port = &mlxsw_core_port->devlink_port; + + mlxsw_core_port->port_driver_priv = port_driver_priv; + devlink_port_type_eth_set(devlink_port, dev); +} +EXPORT_SYMBOL(mlxsw_core_port_eth_set); + +void mlxsw_core_port_clear(struct mlxsw_core *mlxsw_core, u16 local_port, + void *port_driver_priv) +{ + struct mlxsw_core_port *mlxsw_core_port = + &mlxsw_core->ports[local_port]; + struct devlink_port *devlink_port = &mlxsw_core_port->devlink_port; + + mlxsw_core_port->port_driver_priv = port_driver_priv; + devlink_port_type_clear(devlink_port); +} +EXPORT_SYMBOL(mlxsw_core_port_clear); + +struct devlink_port * +mlxsw_core_port_devlink_port_get(struct mlxsw_core *mlxsw_core, + u16 local_port) +{ + struct mlxsw_core_port *mlxsw_core_port = + &mlxsw_core->ports[local_port]; + struct devlink_port *devlink_port = &mlxsw_core_port->devlink_port; + + return devlink_port; +} +EXPORT_SYMBOL(mlxsw_core_port_devlink_port_get); + +struct mlxsw_linecard * +mlxsw_core_port_linecard_get(struct mlxsw_core *mlxsw_core, + u16 local_port) +{ + struct mlxsw_core_port *mlxsw_core_port = + &mlxsw_core->ports[local_port]; + + return mlxsw_core_port->linecard; +} + +void mlxsw_core_ports_remove_selected(struct mlxsw_core *mlxsw_core, + bool (*selector)(void *priv, u16 local_port), + void *priv) +{ + if (WARN_ON_ONCE(!mlxsw_core->driver->ports_remove_selected)) + return; + mlxsw_core->driver->ports_remove_selected(mlxsw_core, selector, priv); +} + +struct mlxsw_env *mlxsw_core_env(const struct mlxsw_core *mlxsw_core) +{ + return mlxsw_core->env; +} + +static void mlxsw_core_buf_dump_dbg(struct mlxsw_core *mlxsw_core, + const char *buf, size_t size) +{ + __be32 *m = (__be32 *) buf; + int i; + int count = size / sizeof(__be32); + + for (i = count - 1; i >= 0; i--) + if (m[i]) + break; + i++; + count = i ? i : 1; + for (i = 0; i < count; i += 4) + dev_dbg(mlxsw_core->bus_info->dev, "%04x - %08x %08x %08x %08x\n", + i * 4, be32_to_cpu(m[i]), be32_to_cpu(m[i + 1]), + be32_to_cpu(m[i + 2]), be32_to_cpu(m[i + 3])); +} + +int mlxsw_cmd_exec(struct mlxsw_core *mlxsw_core, u16 opcode, u8 opcode_mod, + u32 in_mod, bool out_mbox_direct, bool reset_ok, + char *in_mbox, size_t in_mbox_size, + char *out_mbox, size_t out_mbox_size) +{ + u8 status; + int err; + + BUG_ON(in_mbox_size % sizeof(u32) || out_mbox_size % sizeof(u32)); + if (!mlxsw_core->bus->cmd_exec) + return -EOPNOTSUPP; + + dev_dbg(mlxsw_core->bus_info->dev, "Cmd exec (opcode=%x(%s),opcode_mod=%x,in_mod=%x)\n", + opcode, mlxsw_cmd_opcode_str(opcode), opcode_mod, in_mod); + if (in_mbox) { + dev_dbg(mlxsw_core->bus_info->dev, "Input mailbox:\n"); + mlxsw_core_buf_dump_dbg(mlxsw_core, in_mbox, in_mbox_size); + } + + err = mlxsw_core->bus->cmd_exec(mlxsw_core->bus_priv, opcode, + opcode_mod, in_mod, out_mbox_direct, + in_mbox, in_mbox_size, + out_mbox, out_mbox_size, &status); + + if (!err && out_mbox) { + dev_dbg(mlxsw_core->bus_info->dev, "Output mailbox:\n"); + mlxsw_core_buf_dump_dbg(mlxsw_core, out_mbox, out_mbox_size); + } + + if (reset_ok && err == -EIO && + status == MLXSW_CMD_STATUS_RUNNING_RESET) { + err = 0; + } else if (err == -EIO && status != MLXSW_CMD_STATUS_OK) { + dev_err(mlxsw_core->bus_info->dev, "Cmd exec failed (opcode=%x(%s),opcode_mod=%x,in_mod=%x,status=%x(%s))\n", + opcode, mlxsw_cmd_opcode_str(opcode), opcode_mod, + in_mod, status, mlxsw_cmd_status_str(status)); + } else if (err == -ETIMEDOUT) { + dev_err(mlxsw_core->bus_info->dev, "Cmd exec timed-out (opcode=%x(%s),opcode_mod=%x,in_mod=%x)\n", + opcode, mlxsw_cmd_opcode_str(opcode), opcode_mod, + in_mod); + } + + return err; +} +EXPORT_SYMBOL(mlxsw_cmd_exec); + +int mlxsw_core_schedule_dw(struct delayed_work *dwork, unsigned long delay) +{ + return queue_delayed_work(mlxsw_wq, dwork, delay); +} +EXPORT_SYMBOL(mlxsw_core_schedule_dw); + +bool mlxsw_core_schedule_work(struct work_struct *work) +{ + return queue_work(mlxsw_owq, work); +} +EXPORT_SYMBOL(mlxsw_core_schedule_work); + +void mlxsw_core_flush_owq(void) +{ + flush_workqueue(mlxsw_owq); +} +EXPORT_SYMBOL(mlxsw_core_flush_owq); + +int mlxsw_core_kvd_sizes_get(struct mlxsw_core *mlxsw_core, + const struct mlxsw_config_profile *profile, + u64 *p_single_size, u64 *p_double_size, + u64 *p_linear_size) +{ + struct mlxsw_driver *driver = mlxsw_core->driver; + + if (!driver->kvd_sizes_get) + return -EINVAL; + + return driver->kvd_sizes_get(mlxsw_core, profile, + p_single_size, p_double_size, + p_linear_size); +} +EXPORT_SYMBOL(mlxsw_core_kvd_sizes_get); + +int mlxsw_core_resources_query(struct mlxsw_core *mlxsw_core, char *mbox, + struct mlxsw_res *res) +{ + int index, i; + u64 data; + u16 id; + int err; + + mlxsw_cmd_mbox_zero(mbox); + + for (index = 0; index < MLXSW_CMD_QUERY_RESOURCES_MAX_QUERIES; + index++) { + err = mlxsw_cmd_query_resources(mlxsw_core, mbox, index); + if (err) + return err; + + for (i = 0; i < MLXSW_CMD_QUERY_RESOURCES_PER_QUERY; i++) { + id = mlxsw_cmd_mbox_query_resource_id_get(mbox, i); + data = mlxsw_cmd_mbox_query_resource_data_get(mbox, i); + + if (id == MLXSW_CMD_QUERY_RESOURCES_TABLE_END_ID) + return 0; + + mlxsw_res_parse(res, id, data); + } + } + + /* If after MLXSW_RESOURCES_QUERY_MAX_QUERIES we still didn't get + * MLXSW_RESOURCES_TABLE_END_ID, something went bad in the FW. + */ + return -EIO; +} +EXPORT_SYMBOL(mlxsw_core_resources_query); + +u32 mlxsw_core_read_frc_h(struct mlxsw_core *mlxsw_core) +{ + return mlxsw_core->bus->read_frc_h(mlxsw_core->bus_priv); +} +EXPORT_SYMBOL(mlxsw_core_read_frc_h); + +u32 mlxsw_core_read_frc_l(struct mlxsw_core *mlxsw_core) +{ + return mlxsw_core->bus->read_frc_l(mlxsw_core->bus_priv); +} +EXPORT_SYMBOL(mlxsw_core_read_frc_l); + +u32 mlxsw_core_read_utc_sec(struct mlxsw_core *mlxsw_core) +{ + return mlxsw_core->bus->read_utc_sec(mlxsw_core->bus_priv); +} +EXPORT_SYMBOL(mlxsw_core_read_utc_sec); + +u32 mlxsw_core_read_utc_nsec(struct mlxsw_core *mlxsw_core) +{ + return mlxsw_core->bus->read_utc_nsec(mlxsw_core->bus_priv); +} +EXPORT_SYMBOL(mlxsw_core_read_utc_nsec); + +bool mlxsw_core_sdq_supports_cqe_v2(struct mlxsw_core *mlxsw_core) +{ + return mlxsw_core->driver->sdq_supports_cqe_v2; +} +EXPORT_SYMBOL(mlxsw_core_sdq_supports_cqe_v2); + +void mlxsw_core_emad_string_tlv_enable(struct mlxsw_core *mlxsw_core) +{ + mlxsw_core->emad.enable_string_tlv = true; +} +EXPORT_SYMBOL(mlxsw_core_emad_string_tlv_enable); + +static int __init mlxsw_core_module_init(void) +{ + int err; + + err = mlxsw_linecard_driver_register(); + if (err) + return err; + + mlxsw_wq = alloc_workqueue(mlxsw_core_driver_name, 0, 0); + if (!mlxsw_wq) { + err = -ENOMEM; + goto err_alloc_workqueue; + } + mlxsw_owq = alloc_ordered_workqueue("%s_ordered", 0, + mlxsw_core_driver_name); + if (!mlxsw_owq) { + err = -ENOMEM; + goto err_alloc_ordered_workqueue; + } + return 0; + +err_alloc_ordered_workqueue: + destroy_workqueue(mlxsw_wq); +err_alloc_workqueue: + mlxsw_linecard_driver_unregister(); + return err; +} + +static void __exit mlxsw_core_module_exit(void) +{ + destroy_workqueue(mlxsw_owq); + destroy_workqueue(mlxsw_wq); + mlxsw_linecard_driver_unregister(); +} + +module_init(mlxsw_core_module_init); +module_exit(mlxsw_core_module_exit); + +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_AUTHOR("Jiri Pirko <jiri@mellanox.com>"); +MODULE_DESCRIPTION("Mellanox switch device core driver"); diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.h b/drivers/net/ethernet/mellanox/mlxsw/core.h new file mode 100644 index 000000000..ca0c3d2be --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/core.h @@ -0,0 +1,674 @@ +/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 */ +/* Copyright (c) 2015-2018 Mellanox Technologies. All rights reserved */ + +#ifndef _MLXSW_CORE_H +#define _MLXSW_CORE_H + +#include <linux/module.h> +#include <linux/device.h> +#include <linux/slab.h> +#include <linux/gfp.h> +#include <linux/types.h> +#include <linux/skbuff.h> +#include <linux/workqueue.h> +#include <linux/net_namespace.h> +#include <linux/auxiliary_bus.h> +#include <net/devlink.h> + +#include "trap.h" +#include "reg.h" +#include "cmd.h" +#include "resources.h" +#include "../mlxfw/mlxfw.h" + +enum mlxsw_core_resource_id { + MLXSW_CORE_RESOURCE_PORTS = 1, + MLXSW_CORE_RESOURCE_MAX, +}; + +struct mlxsw_core; +struct mlxsw_core_port; +struct mlxsw_driver; +struct mlxsw_bus; +struct mlxsw_bus_info; +struct mlxsw_fw_rev; + +unsigned int mlxsw_core_max_ports(const struct mlxsw_core *mlxsw_core); + +int mlxsw_core_max_lag(struct mlxsw_core *mlxsw_core, u16 *p_max_lag); + +void *mlxsw_core_driver_priv(struct mlxsw_core *mlxsw_core); + +struct mlxsw_linecards *mlxsw_core_linecards(struct mlxsw_core *mlxsw_core); + +void mlxsw_core_linecards_set(struct mlxsw_core *mlxsw_core, + struct mlxsw_linecards *linecard); + +bool +mlxsw_core_fw_rev_minor_subminor_validate(const struct mlxsw_fw_rev *rev, + const struct mlxsw_fw_rev *req_rev); + +int mlxsw_core_driver_register(struct mlxsw_driver *mlxsw_driver); +void mlxsw_core_driver_unregister(struct mlxsw_driver *mlxsw_driver); + +int mlxsw_core_fw_flash(struct mlxsw_core *mlxsw_core, + struct mlxfw_dev *mlxfw_dev, + const struct firmware *firmware, + struct netlink_ext_ack *extack); + +int mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info, + const struct mlxsw_bus *mlxsw_bus, + void *bus_priv, bool reload, + struct devlink *devlink, + struct netlink_ext_ack *extack); +void mlxsw_core_bus_device_unregister(struct mlxsw_core *mlxsw_core, bool reload); + +struct mlxsw_tx_info { + u16 local_port; + bool is_emad; +}; + +struct mlxsw_rx_md_info { + u32 cookie_index; + u32 latency; + u32 tx_congestion; + union { + /* Valid when 'tx_port_valid' is set. */ + u16 tx_sys_port; + u16 tx_lag_id; + }; + u16 tx_lag_port_index; /* Valid when 'tx_port_is_lag' is set. */ + u8 tx_tc; + u8 latency_valid:1, + tx_congestion_valid:1, + tx_tc_valid:1, + tx_port_valid:1, + tx_port_is_lag:1, + unused:3; +}; + +bool mlxsw_core_skb_transmit_busy(struct mlxsw_core *mlxsw_core, + const struct mlxsw_tx_info *tx_info); +int mlxsw_core_skb_transmit(struct mlxsw_core *mlxsw_core, struct sk_buff *skb, + const struct mlxsw_tx_info *tx_info); +void mlxsw_core_ptp_transmitted(struct mlxsw_core *mlxsw_core, + struct sk_buff *skb, u16 local_port); + +struct mlxsw_rx_listener { + void (*func)(struct sk_buff *skb, u16 local_port, void *priv); + u16 local_port; + u8 mirror_reason; + u16 trap_id; +}; + +struct mlxsw_event_listener { + void (*func)(const struct mlxsw_reg_info *reg, + char *payload, void *priv); + enum mlxsw_event_trap_id trap_id; +}; + +struct mlxsw_listener { + u16 trap_id; + union { + struct mlxsw_rx_listener rx_listener; + struct mlxsw_event_listener event_listener; + }; + enum mlxsw_reg_hpkt_action en_action; /* Action when enabled */ + enum mlxsw_reg_hpkt_action dis_action; /* Action when disabled */ + u8 en_trap_group; /* Trap group when enabled */ + u8 dis_trap_group; /* Trap group when disabled */ + u8 is_ctrl:1, /* should go via control buffer or not */ + is_event:1, + enabled_on_register:1; /* Trap should be enabled when listener + * is registered. + */ +}; + +#define __MLXSW_RXL(_func, _trap_id, _en_action, _is_ctrl, _en_trap_group, \ + _dis_action, _enabled_on_register, _dis_trap_group, \ + _mirror_reason) \ + { \ + .trap_id = MLXSW_TRAP_ID_##_trap_id, \ + .rx_listener = \ + { \ + .func = _func, \ + .local_port = MLXSW_PORT_DONT_CARE, \ + .mirror_reason = _mirror_reason, \ + .trap_id = MLXSW_TRAP_ID_##_trap_id, \ + }, \ + .en_action = MLXSW_REG_HPKT_ACTION_##_en_action, \ + .dis_action = MLXSW_REG_HPKT_ACTION_##_dis_action, \ + .en_trap_group = MLXSW_REG_HTGT_TRAP_GROUP_##_en_trap_group, \ + .dis_trap_group = MLXSW_REG_HTGT_TRAP_GROUP_##_dis_trap_group, \ + .is_ctrl = _is_ctrl, \ + .enabled_on_register = _enabled_on_register, \ + } + +#define MLXSW_RXL(_func, _trap_id, _en_action, _is_ctrl, _trap_group, \ + _dis_action) \ + __MLXSW_RXL(_func, _trap_id, _en_action, _is_ctrl, _trap_group, \ + _dis_action, true, _trap_group, 0) + +#define MLXSW_RXL_DIS(_func, _trap_id, _en_action, _is_ctrl, _en_trap_group, \ + _dis_action, _dis_trap_group) \ + __MLXSW_RXL(_func, _trap_id, _en_action, _is_ctrl, _en_trap_group, \ + _dis_action, false, _dis_trap_group, 0) + +#define MLXSW_RXL_MIRROR(_func, _session_id, _trap_group, _mirror_reason) \ + __MLXSW_RXL(_func, MIRROR_SESSION##_session_id, TRAP_TO_CPU, false, \ + _trap_group, TRAP_TO_CPU, true, _trap_group, \ + _mirror_reason) + +#define MLXSW_EVENTL(_func, _trap_id, _trap_group) \ + { \ + .trap_id = MLXSW_TRAP_ID_##_trap_id, \ + .event_listener = \ + { \ + .func = _func, \ + .trap_id = MLXSW_TRAP_ID_##_trap_id, \ + }, \ + .en_action = MLXSW_REG_HPKT_ACTION_TRAP_TO_CPU, \ + .en_trap_group = MLXSW_REG_HTGT_TRAP_GROUP_##_trap_group, \ + .is_event = true, \ + .enabled_on_register = true, \ + } + +#define MLXSW_CORE_EVENTL(_func, _trap_id) \ + MLXSW_EVENTL(_func, _trap_id, CORE_EVENT) + +int mlxsw_core_rx_listener_register(struct mlxsw_core *mlxsw_core, + const struct mlxsw_rx_listener *rxl, + void *priv, bool enabled); +void mlxsw_core_rx_listener_unregister(struct mlxsw_core *mlxsw_core, + const struct mlxsw_rx_listener *rxl); + +int mlxsw_core_event_listener_register(struct mlxsw_core *mlxsw_core, + const struct mlxsw_event_listener *el, + void *priv); +void mlxsw_core_event_listener_unregister(struct mlxsw_core *mlxsw_core, + const struct mlxsw_event_listener *el); + +int mlxsw_core_trap_register(struct mlxsw_core *mlxsw_core, + const struct mlxsw_listener *listener, + void *priv); +void mlxsw_core_trap_unregister(struct mlxsw_core *mlxsw_core, + const struct mlxsw_listener *listener, + void *priv); +int mlxsw_core_traps_register(struct mlxsw_core *mlxsw_core, + const struct mlxsw_listener *listeners, + size_t listeners_count, void *priv); +void mlxsw_core_traps_unregister(struct mlxsw_core *mlxsw_core, + const struct mlxsw_listener *listeners, + size_t listeners_count, void *priv); +int mlxsw_core_trap_state_set(struct mlxsw_core *mlxsw_core, + const struct mlxsw_listener *listener, + bool enabled); + +typedef void mlxsw_reg_trans_cb_t(struct mlxsw_core *mlxsw_core, char *payload, + size_t payload_len, unsigned long cb_priv); + +int mlxsw_reg_trans_query(struct mlxsw_core *mlxsw_core, + const struct mlxsw_reg_info *reg, char *payload, + struct list_head *bulk_list, + mlxsw_reg_trans_cb_t *cb, unsigned long cb_priv); +int mlxsw_reg_trans_write(struct mlxsw_core *mlxsw_core, + const struct mlxsw_reg_info *reg, char *payload, + struct list_head *bulk_list, + mlxsw_reg_trans_cb_t *cb, unsigned long cb_priv); +int mlxsw_reg_trans_bulk_wait(struct list_head *bulk_list); + +typedef void mlxsw_irq_event_cb_t(struct mlxsw_core *mlxsw_core); + +int mlxsw_core_irq_event_handler_register(struct mlxsw_core *mlxsw_core, + mlxsw_irq_event_cb_t cb); +void mlxsw_core_irq_event_handler_unregister(struct mlxsw_core *mlxsw_core, + mlxsw_irq_event_cb_t cb); +void mlxsw_core_irq_event_handlers_call(struct mlxsw_core *mlxsw_core); + +int mlxsw_reg_query(struct mlxsw_core *mlxsw_core, + const struct mlxsw_reg_info *reg, char *payload); +int mlxsw_reg_write(struct mlxsw_core *mlxsw_core, + const struct mlxsw_reg_info *reg, char *payload); + +struct mlxsw_rx_info { + bool is_lag; + union { + u16 sys_port; + u16 lag_id; + } u; + u16 lag_port_index; + u8 mirror_reason; + int trap_id; +}; + +void mlxsw_core_skb_receive(struct mlxsw_core *mlxsw_core, struct sk_buff *skb, + struct mlxsw_rx_info *rx_info); + +void mlxsw_core_lag_mapping_set(struct mlxsw_core *mlxsw_core, + u16 lag_id, u8 port_index, u16 local_port); +u16 mlxsw_core_lag_mapping_get(struct mlxsw_core *mlxsw_core, + u16 lag_id, u8 port_index); +void mlxsw_core_lag_mapping_clear(struct mlxsw_core *mlxsw_core, + u16 lag_id, u16 local_port); + +void *mlxsw_core_port_driver_priv(struct mlxsw_core_port *mlxsw_core_port); +int mlxsw_core_port_init(struct mlxsw_core *mlxsw_core, u16 local_port, + u8 slot_index, u32 port_number, bool split, + u32 split_port_subnumber, + bool splittable, u32 lanes, + const unsigned char *switch_id, + unsigned char switch_id_len); +void mlxsw_core_port_fini(struct mlxsw_core *mlxsw_core, u16 local_port); +int mlxsw_core_cpu_port_init(struct mlxsw_core *mlxsw_core, + void *port_driver_priv, + const unsigned char *switch_id, + unsigned char switch_id_len); +void mlxsw_core_cpu_port_fini(struct mlxsw_core *mlxsw_core); +void mlxsw_core_port_eth_set(struct mlxsw_core *mlxsw_core, u16 local_port, + void *port_driver_priv, struct net_device *dev); +void mlxsw_core_port_clear(struct mlxsw_core *mlxsw_core, u16 local_port, + void *port_driver_priv); +struct devlink_port * +mlxsw_core_port_devlink_port_get(struct mlxsw_core *mlxsw_core, + u16 local_port); +struct mlxsw_linecard * +mlxsw_core_port_linecard_get(struct mlxsw_core *mlxsw_core, + u16 local_port); +void mlxsw_core_ports_remove_selected(struct mlxsw_core *mlxsw_core, + bool (*selector)(void *priv, + u16 local_port), + void *priv); +struct mlxsw_env *mlxsw_core_env(const struct mlxsw_core *mlxsw_core); + +int mlxsw_core_schedule_dw(struct delayed_work *dwork, unsigned long delay); +bool mlxsw_core_schedule_work(struct work_struct *work); +void mlxsw_core_flush_owq(void); +int mlxsw_core_resources_query(struct mlxsw_core *mlxsw_core, char *mbox, + struct mlxsw_res *res); + +#define MLXSW_CONFIG_PROFILE_SWID_COUNT 8 + +struct mlxsw_swid_config { + u8 used_type:1, + used_properties:1; + u8 type; + u8 properties; +}; + +struct mlxsw_config_profile { + u16 used_max_vepa_channels:1, + used_max_lag:1, + used_max_mid:1, + used_max_pgt:1, + used_max_system_port:1, + used_max_vlan_groups:1, + used_max_regions:1, + used_flood_tables:1, + used_flood_mode:1, + used_max_ib_mc:1, + used_max_pkey:1, + used_ar_sec:1, + used_adaptive_routing_group_cap:1, + used_ubridge:1, + used_kvd_sizes:1, + used_cqe_time_stamp_type:1; + u8 max_vepa_channels; + u16 max_lag; + u16 max_mid; + u16 max_pgt; + u16 max_system_port; + u16 max_vlan_groups; + u16 max_regions; + u8 max_flood_tables; + u8 max_vid_flood_tables; + u8 flood_mode; + u8 max_fid_offset_flood_tables; + u16 fid_offset_flood_table_size; + u8 max_fid_flood_tables; + u16 fid_flood_table_size; + u16 max_ib_mc; + u16 max_pkey; + u8 ar_sec; + u16 adaptive_routing_group_cap; + u8 arn; + u8 ubridge; + u32 kvd_linear_size; + u8 kvd_hash_single_parts; + u8 kvd_hash_double_parts; + u8 cqe_time_stamp_type; + struct mlxsw_swid_config swid_config[MLXSW_CONFIG_PROFILE_SWID_COUNT]; +}; + +struct mlxsw_driver { + struct list_head list; + const char *kind; + size_t priv_size; + const struct mlxsw_fw_rev *fw_req_rev; + const char *fw_filename; + int (*init)(struct mlxsw_core *mlxsw_core, + const struct mlxsw_bus_info *mlxsw_bus_info, + struct netlink_ext_ack *extack); + void (*fini)(struct mlxsw_core *mlxsw_core); + int (*port_split)(struct mlxsw_core *mlxsw_core, u16 local_port, + unsigned int count, struct netlink_ext_ack *extack); + int (*port_unsplit)(struct mlxsw_core *mlxsw_core, u16 local_port, + struct netlink_ext_ack *extack); + void (*ports_remove_selected)(struct mlxsw_core *mlxsw_core, + bool (*selector)(void *priv, + u16 local_port), + void *priv); + int (*sb_pool_get)(struct mlxsw_core *mlxsw_core, + unsigned int sb_index, u16 pool_index, + struct devlink_sb_pool_info *pool_info); + int (*sb_pool_set)(struct mlxsw_core *mlxsw_core, + unsigned int sb_index, u16 pool_index, u32 size, + enum devlink_sb_threshold_type threshold_type, + struct netlink_ext_ack *extack); + int (*sb_port_pool_get)(struct mlxsw_core_port *mlxsw_core_port, + unsigned int sb_index, u16 pool_index, + u32 *p_threshold); + int (*sb_port_pool_set)(struct mlxsw_core_port *mlxsw_core_port, + unsigned int sb_index, u16 pool_index, + u32 threshold, struct netlink_ext_ack *extack); + int (*sb_tc_pool_bind_get)(struct mlxsw_core_port *mlxsw_core_port, + unsigned int sb_index, u16 tc_index, + enum devlink_sb_pool_type pool_type, + u16 *p_pool_index, u32 *p_threshold); + int (*sb_tc_pool_bind_set)(struct mlxsw_core_port *mlxsw_core_port, + unsigned int sb_index, u16 tc_index, + enum devlink_sb_pool_type pool_type, + u16 pool_index, u32 threshold, + struct netlink_ext_ack *extack); + int (*sb_occ_snapshot)(struct mlxsw_core *mlxsw_core, + unsigned int sb_index); + int (*sb_occ_max_clear)(struct mlxsw_core *mlxsw_core, + unsigned int sb_index); + int (*sb_occ_port_pool_get)(struct mlxsw_core_port *mlxsw_core_port, + unsigned int sb_index, u16 pool_index, + u32 *p_cur, u32 *p_max); + int (*sb_occ_tc_port_bind_get)(struct mlxsw_core_port *mlxsw_core_port, + unsigned int sb_index, u16 tc_index, + enum devlink_sb_pool_type pool_type, + u32 *p_cur, u32 *p_max); + int (*trap_init)(struct mlxsw_core *mlxsw_core, + const struct devlink_trap *trap, void *trap_ctx); + void (*trap_fini)(struct mlxsw_core *mlxsw_core, + const struct devlink_trap *trap, void *trap_ctx); + int (*trap_action_set)(struct mlxsw_core *mlxsw_core, + const struct devlink_trap *trap, + enum devlink_trap_action action, + struct netlink_ext_ack *extack); + int (*trap_group_init)(struct mlxsw_core *mlxsw_core, + const struct devlink_trap_group *group); + int (*trap_group_set)(struct mlxsw_core *mlxsw_core, + const struct devlink_trap_group *group, + const struct devlink_trap_policer *policer, + struct netlink_ext_ack *extack); + int (*trap_policer_init)(struct mlxsw_core *mlxsw_core, + const struct devlink_trap_policer *policer); + void (*trap_policer_fini)(struct mlxsw_core *mlxsw_core, + const struct devlink_trap_policer *policer); + int (*trap_policer_set)(struct mlxsw_core *mlxsw_core, + const struct devlink_trap_policer *policer, + u64 rate, u64 burst, + struct netlink_ext_ack *extack); + int (*trap_policer_counter_get)(struct mlxsw_core *mlxsw_core, + const struct devlink_trap_policer *policer, + u64 *p_drops); + void (*txhdr_construct)(struct sk_buff *skb, + const struct mlxsw_tx_info *tx_info); + int (*resources_register)(struct mlxsw_core *mlxsw_core); + int (*kvd_sizes_get)(struct mlxsw_core *mlxsw_core, + const struct mlxsw_config_profile *profile, + u64 *p_single_size, u64 *p_double_size, + u64 *p_linear_size); + int (*params_register)(struct mlxsw_core *mlxsw_core); + void (*params_unregister)(struct mlxsw_core *mlxsw_core); + + /* Notify a driver that a timestamped packet was transmitted. Driver + * is responsible for freeing the passed-in SKB. + */ + void (*ptp_transmitted)(struct mlxsw_core *mlxsw_core, + struct sk_buff *skb, u16 local_port); + + u8 txhdr_len; + const struct mlxsw_config_profile *profile; + bool sdq_supports_cqe_v2; +}; + +int mlxsw_core_kvd_sizes_get(struct mlxsw_core *mlxsw_core, + const struct mlxsw_config_profile *profile, + u64 *p_single_size, u64 *p_double_size, + u64 *p_linear_size); + +u32 mlxsw_core_read_frc_h(struct mlxsw_core *mlxsw_core); +u32 mlxsw_core_read_frc_l(struct mlxsw_core *mlxsw_core); + +u32 mlxsw_core_read_utc_sec(struct mlxsw_core *mlxsw_core); +u32 mlxsw_core_read_utc_nsec(struct mlxsw_core *mlxsw_core); + +bool mlxsw_core_sdq_supports_cqe_v2(struct mlxsw_core *mlxsw_core); + +void mlxsw_core_emad_string_tlv_enable(struct mlxsw_core *mlxsw_core); + +bool mlxsw_core_res_valid(struct mlxsw_core *mlxsw_core, + enum mlxsw_res_id res_id); + +#define MLXSW_CORE_RES_VALID(mlxsw_core, short_res_id) \ + mlxsw_core_res_valid(mlxsw_core, MLXSW_RES_ID_##short_res_id) + +u64 mlxsw_core_res_get(struct mlxsw_core *mlxsw_core, + enum mlxsw_res_id res_id); + +#define MLXSW_CORE_RES_GET(mlxsw_core, short_res_id) \ + mlxsw_core_res_get(mlxsw_core, MLXSW_RES_ID_##short_res_id) + +static inline struct net *mlxsw_core_net(struct mlxsw_core *mlxsw_core) +{ + return devlink_net(priv_to_devlink(mlxsw_core)); +} + +#define MLXSW_BUS_F_TXRX BIT(0) +#define MLXSW_BUS_F_RESET BIT(1) + +struct mlxsw_bus { + const char *kind; + int (*init)(void *bus_priv, struct mlxsw_core *mlxsw_core, + const struct mlxsw_config_profile *profile, + struct mlxsw_res *res); + void (*fini)(void *bus_priv); + bool (*skb_transmit_busy)(void *bus_priv, + const struct mlxsw_tx_info *tx_info); + int (*skb_transmit)(void *bus_priv, struct sk_buff *skb, + const struct mlxsw_tx_info *tx_info); + int (*cmd_exec)(void *bus_priv, u16 opcode, u8 opcode_mod, + u32 in_mod, bool out_mbox_direct, + char *in_mbox, size_t in_mbox_size, + char *out_mbox, size_t out_mbox_size, + u8 *p_status); + u32 (*read_frc_h)(void *bus_priv); + u32 (*read_frc_l)(void *bus_priv); + u32 (*read_utc_sec)(void *bus_priv); + u32 (*read_utc_nsec)(void *bus_priv); + u8 features; +}; + +struct mlxsw_fw_rev { + u16 major; + u16 minor; + u16 subminor; + u16 can_reset_minor; +}; + +struct mlxsw_bus_info { + const char *device_kind; + const char *device_name; + struct device *dev; + struct mlxsw_fw_rev fw_rev; + u8 vsd[MLXSW_CMD_BOARDINFO_VSD_LEN]; + u8 psid[MLXSW_CMD_BOARDINFO_PSID_LEN]; + u8 low_frequency:1, + read_clock_capable:1; +}; + +struct mlxsw_hwmon; + +#ifdef CONFIG_MLXSW_CORE_HWMON + +int mlxsw_hwmon_init(struct mlxsw_core *mlxsw_core, + const struct mlxsw_bus_info *mlxsw_bus_info, + struct mlxsw_hwmon **p_hwmon); +void mlxsw_hwmon_fini(struct mlxsw_hwmon *mlxsw_hwmon); + +#else + +static inline int mlxsw_hwmon_init(struct mlxsw_core *mlxsw_core, + const struct mlxsw_bus_info *mlxsw_bus_info, + struct mlxsw_hwmon **p_hwmon) +{ + return 0; +} + +static inline void mlxsw_hwmon_fini(struct mlxsw_hwmon *mlxsw_hwmon) +{ +} + +#endif + +struct mlxsw_thermal; + +#ifdef CONFIG_MLXSW_CORE_THERMAL + +int mlxsw_thermal_init(struct mlxsw_core *mlxsw_core, + const struct mlxsw_bus_info *mlxsw_bus_info, + struct mlxsw_thermal **p_thermal); +void mlxsw_thermal_fini(struct mlxsw_thermal *thermal); + +#else + +static inline int mlxsw_thermal_init(struct mlxsw_core *mlxsw_core, + const struct mlxsw_bus_info *mlxsw_bus_info, + struct mlxsw_thermal **p_thermal) +{ + return 0; +} + +static inline void mlxsw_thermal_fini(struct mlxsw_thermal *thermal) +{ +} + +#endif + +enum mlxsw_devlink_param_id { + MLXSW_DEVLINK_PARAM_ID_BASE = DEVLINK_PARAM_GENERIC_ID_MAX, + MLXSW_DEVLINK_PARAM_ID_ACL_REGION_REHASH_INTERVAL, +}; + +struct mlxsw_cqe_ts { + u8 sec; + u32 nsec; +}; + +struct mlxsw_skb_cb { + union { + struct mlxsw_tx_info tx_info; + struct mlxsw_rx_md_info rx_md_info; + }; + struct mlxsw_cqe_ts cqe_ts; +}; + +static inline struct mlxsw_skb_cb *mlxsw_skb_cb(struct sk_buff *skb) +{ + BUILD_BUG_ON(sizeof(mlxsw_skb_cb) > sizeof(skb->cb)); + return (struct mlxsw_skb_cb *) skb->cb; +} + +struct mlxsw_linecards; + +enum mlxsw_linecard_status_event_type { + MLXSW_LINECARD_STATUS_EVENT_TYPE_PROVISION, + MLXSW_LINECARD_STATUS_EVENT_TYPE_UNPROVISION, +}; + +struct mlxsw_linecard_bdev; + +struct mlxsw_linecard_device_info { + u16 fw_major; + u16 fw_minor; + u16 fw_sub_minor; + char psid[MLXSW_REG_MGIR_FW_INFO_PSID_SIZE]; +}; + +struct mlxsw_linecard { + u8 slot_index; + struct mlxsw_linecards *linecards; + struct devlink_linecard *devlink_linecard; + struct mutex lock; /* Locks accesses to the linecard structure */ + char name[MLXSW_REG_MDDQ_SLOT_ASCII_NAME_LEN]; + char mbct_pl[MLXSW_REG_MBCT_LEN]; /* Too big for stack */ + enum mlxsw_linecard_status_event_type status_event_type_to; + struct delayed_work status_event_to_dw; + u8 provisioned:1, + ready:1, + active:1; + u16 hw_revision; + u16 ini_version; + struct mlxsw_linecard_bdev *bdev; + struct { + struct mlxsw_linecard_device_info info; + u8 index; + } device; +}; + +struct mlxsw_linecard_types_info; + +struct mlxsw_linecards { + struct mlxsw_core *mlxsw_core; + const struct mlxsw_bus_info *bus_info; + u8 count; + struct mlxsw_linecard_types_info *types_info; + struct list_head event_ops_list; + struct mutex event_ops_list_lock; /* Locks accesses to event ops list */ + struct mlxsw_linecard linecards[]; +}; + +static inline struct mlxsw_linecard * +mlxsw_linecard_get(struct mlxsw_linecards *linecards, u8 slot_index) +{ + return &linecards->linecards[slot_index - 1]; +} + +int mlxsw_linecard_devlink_info_get(struct mlxsw_linecard *linecard, + struct devlink_info_req *req, + struct netlink_ext_ack *extack); +int mlxsw_linecard_flash_update(struct devlink *linecard_devlink, + struct mlxsw_linecard *linecard, + const struct firmware *firmware, + struct netlink_ext_ack *extack); + +int mlxsw_linecards_init(struct mlxsw_core *mlxsw_core, + const struct mlxsw_bus_info *bus_info); +void mlxsw_linecards_fini(struct mlxsw_core *mlxsw_core); + +typedef void mlxsw_linecards_event_op_t(struct mlxsw_core *mlxsw_core, + u8 slot_index, void *priv); + +struct mlxsw_linecards_event_ops { + mlxsw_linecards_event_op_t *got_active; + mlxsw_linecards_event_op_t *got_inactive; +}; + +int mlxsw_linecards_event_ops_register(struct mlxsw_core *mlxsw_core, + struct mlxsw_linecards_event_ops *ops, + void *priv); +void mlxsw_linecards_event_ops_unregister(struct mlxsw_core *mlxsw_core, + struct mlxsw_linecards_event_ops *ops, + void *priv); + +int mlxsw_linecard_bdev_add(struct mlxsw_linecard *linecard); +void mlxsw_linecard_bdev_del(struct mlxsw_linecard *linecard); + +int mlxsw_linecard_driver_register(void); +void mlxsw_linecard_driver_unregister(void); + +#endif diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c new file mode 100644 index 000000000..9dfe71481 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c @@ -0,0 +1,2218 @@ +// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 +/* Copyright (c) 2017-2018 Mellanox Technologies. All rights reserved */ + +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/slab.h> +#include <linux/errno.h> +#include <linux/rhashtable.h> +#include <linux/list.h> +#include <linux/idr.h> +#include <linux/refcount.h> +#include <net/flow_offload.h> + +#include "item.h" +#include "trap.h" +#include "core_acl_flex_actions.h" + +enum mlxsw_afa_set_type { + MLXSW_AFA_SET_TYPE_NEXT, + MLXSW_AFA_SET_TYPE_GOTO, +}; + +/* afa_set_type + * Type of the record at the end of the action set. + */ +MLXSW_ITEM32(afa, set, type, 0xA0, 28, 4); + +/* afa_set_next_action_set_ptr + * A pointer to the next action set in the KVD Centralized database. + */ +MLXSW_ITEM32(afa, set, next_action_set_ptr, 0xA4, 0, 24); + +/* afa_set_goto_g + * group - When set, the binding is of an ACL group. When cleared, + * the binding is of an ACL. + * Must be set to 1 for Spectrum. + */ +MLXSW_ITEM32(afa, set, goto_g, 0xA4, 29, 1); + +enum mlxsw_afa_set_goto_binding_cmd { + /* continue go the next binding point */ + MLXSW_AFA_SET_GOTO_BINDING_CMD_NONE, + /* jump to the next binding point no return */ + MLXSW_AFA_SET_GOTO_BINDING_CMD_JUMP, + /* terminate the acl binding */ + MLXSW_AFA_SET_GOTO_BINDING_CMD_TERM = 4, +}; + +/* afa_set_goto_binding_cmd */ +MLXSW_ITEM32(afa, set, goto_binding_cmd, 0xA4, 24, 3); + +/* afa_set_goto_next_binding + * ACL/ACL group identifier. If the g bit is set, this field should hold + * the acl_group_id, else it should hold the acl_id. + */ +MLXSW_ITEM32(afa, set, goto_next_binding, 0xA4, 0, 16); + +/* afa_all_action_type + * Action Type. + */ +MLXSW_ITEM32(afa, all, action_type, 0x00, 24, 6); + +struct mlxsw_afa { + unsigned int max_acts_per_set; + const struct mlxsw_afa_ops *ops; + void *ops_priv; + struct rhashtable set_ht; + struct rhashtable fwd_entry_ht; + struct rhashtable cookie_ht; + struct rhashtable policer_ht; + struct idr cookie_idr; + struct list_head policer_list; +}; + +#define MLXSW_AFA_SET_LEN 0xA8 + +struct mlxsw_afa_set_ht_key { + char enc_actions[MLXSW_AFA_SET_LEN]; /* Encoded set */ + bool is_first; +}; + +/* Set structure holds one action set record. It contains up to three + * actions (depends on size of particular actions). The set is either + * put directly to a rule, or it is stored in KVD linear area. + * To prevent duplicate entries in KVD linear area, a hashtable is + * used to track sets that were previously inserted and may be shared. + */ + +struct mlxsw_afa_set { + struct rhash_head ht_node; + struct mlxsw_afa_set_ht_key ht_key; + u32 kvdl_index; + u8 shared:1, /* Inserted in hashtable (doesn't mean that + * kvdl_index is valid). + */ + has_trap:1, + has_police:1; + unsigned int ref_count; + struct mlxsw_afa_set *next; /* Pointer to the next set. */ + struct mlxsw_afa_set *prev; /* Pointer to the previous set, + * note that set may have multiple + * sets from multiple blocks + * pointing at it. This is only + * usable until commit. + */ +}; + +static const struct rhashtable_params mlxsw_afa_set_ht_params = { + .key_len = sizeof(struct mlxsw_afa_set_ht_key), + .key_offset = offsetof(struct mlxsw_afa_set, ht_key), + .head_offset = offsetof(struct mlxsw_afa_set, ht_node), + .automatic_shrinking = true, +}; + +struct mlxsw_afa_fwd_entry_ht_key { + u16 local_port; +}; + +struct mlxsw_afa_fwd_entry { + struct rhash_head ht_node; + struct mlxsw_afa_fwd_entry_ht_key ht_key; + u32 kvdl_index; + unsigned int ref_count; +}; + +static const struct rhashtable_params mlxsw_afa_fwd_entry_ht_params = { + .key_len = sizeof(struct mlxsw_afa_fwd_entry_ht_key), + .key_offset = offsetof(struct mlxsw_afa_fwd_entry, ht_key), + .head_offset = offsetof(struct mlxsw_afa_fwd_entry, ht_node), + .automatic_shrinking = true, +}; + +struct mlxsw_afa_cookie { + struct rhash_head ht_node; + refcount_t ref_count; + struct rcu_head rcu; + u32 cookie_index; + struct flow_action_cookie fa_cookie; +}; + +static u32 mlxsw_afa_cookie_hash(const struct flow_action_cookie *fa_cookie, + u32 seed) +{ + return jhash2((u32 *) fa_cookie->cookie, + fa_cookie->cookie_len / sizeof(u32), seed); +} + +static u32 mlxsw_afa_cookie_key_hashfn(const void *data, u32 len, u32 seed) +{ + const struct flow_action_cookie *fa_cookie = data; + + return mlxsw_afa_cookie_hash(fa_cookie, seed); +} + +static u32 mlxsw_afa_cookie_obj_hashfn(const void *data, u32 len, u32 seed) +{ + const struct mlxsw_afa_cookie *cookie = data; + + return mlxsw_afa_cookie_hash(&cookie->fa_cookie, seed); +} + +static int mlxsw_afa_cookie_obj_cmpfn(struct rhashtable_compare_arg *arg, + const void *obj) +{ + const struct flow_action_cookie *fa_cookie = arg->key; + const struct mlxsw_afa_cookie *cookie = obj; + + if (cookie->fa_cookie.cookie_len == fa_cookie->cookie_len) + return memcmp(cookie->fa_cookie.cookie, fa_cookie->cookie, + fa_cookie->cookie_len); + return 1; +} + +static const struct rhashtable_params mlxsw_afa_cookie_ht_params = { + .head_offset = offsetof(struct mlxsw_afa_cookie, ht_node), + .hashfn = mlxsw_afa_cookie_key_hashfn, + .obj_hashfn = mlxsw_afa_cookie_obj_hashfn, + .obj_cmpfn = mlxsw_afa_cookie_obj_cmpfn, + .automatic_shrinking = true, +}; + +struct mlxsw_afa_policer { + struct rhash_head ht_node; + struct list_head list; /* Member of policer_list */ + refcount_t ref_count; + u32 fa_index; + u16 policer_index; +}; + +static const struct rhashtable_params mlxsw_afa_policer_ht_params = { + .key_len = sizeof(u32), + .key_offset = offsetof(struct mlxsw_afa_policer, fa_index), + .head_offset = offsetof(struct mlxsw_afa_policer, ht_node), + .automatic_shrinking = true, +}; + +struct mlxsw_afa *mlxsw_afa_create(unsigned int max_acts_per_set, + const struct mlxsw_afa_ops *ops, + void *ops_priv) +{ + struct mlxsw_afa *mlxsw_afa; + int err; + + mlxsw_afa = kzalloc(sizeof(*mlxsw_afa), GFP_KERNEL); + if (!mlxsw_afa) + return ERR_PTR(-ENOMEM); + err = rhashtable_init(&mlxsw_afa->set_ht, &mlxsw_afa_set_ht_params); + if (err) + goto err_set_rhashtable_init; + err = rhashtable_init(&mlxsw_afa->fwd_entry_ht, + &mlxsw_afa_fwd_entry_ht_params); + if (err) + goto err_fwd_entry_rhashtable_init; + err = rhashtable_init(&mlxsw_afa->cookie_ht, + &mlxsw_afa_cookie_ht_params); + if (err) + goto err_cookie_rhashtable_init; + err = rhashtable_init(&mlxsw_afa->policer_ht, + &mlxsw_afa_policer_ht_params); + if (err) + goto err_policer_rhashtable_init; + idr_init(&mlxsw_afa->cookie_idr); + INIT_LIST_HEAD(&mlxsw_afa->policer_list); + mlxsw_afa->max_acts_per_set = max_acts_per_set; + mlxsw_afa->ops = ops; + mlxsw_afa->ops_priv = ops_priv; + return mlxsw_afa; + +err_policer_rhashtable_init: + rhashtable_destroy(&mlxsw_afa->cookie_ht); +err_cookie_rhashtable_init: + rhashtable_destroy(&mlxsw_afa->fwd_entry_ht); +err_fwd_entry_rhashtable_init: + rhashtable_destroy(&mlxsw_afa->set_ht); +err_set_rhashtable_init: + kfree(mlxsw_afa); + return ERR_PTR(err); +} +EXPORT_SYMBOL(mlxsw_afa_create); + +void mlxsw_afa_destroy(struct mlxsw_afa *mlxsw_afa) +{ + WARN_ON(!list_empty(&mlxsw_afa->policer_list)); + WARN_ON(!idr_is_empty(&mlxsw_afa->cookie_idr)); + idr_destroy(&mlxsw_afa->cookie_idr); + rhashtable_destroy(&mlxsw_afa->policer_ht); + rhashtable_destroy(&mlxsw_afa->cookie_ht); + rhashtable_destroy(&mlxsw_afa->fwd_entry_ht); + rhashtable_destroy(&mlxsw_afa->set_ht); + kfree(mlxsw_afa); +} +EXPORT_SYMBOL(mlxsw_afa_destroy); + +static void mlxsw_afa_set_goto_set(struct mlxsw_afa_set *set, + enum mlxsw_afa_set_goto_binding_cmd cmd, + u16 group_id) +{ + char *actions = set->ht_key.enc_actions; + + mlxsw_afa_set_type_set(actions, MLXSW_AFA_SET_TYPE_GOTO); + mlxsw_afa_set_goto_g_set(actions, true); + mlxsw_afa_set_goto_binding_cmd_set(actions, cmd); + mlxsw_afa_set_goto_next_binding_set(actions, group_id); +} + +static void mlxsw_afa_set_next_set(struct mlxsw_afa_set *set, + u32 next_set_kvdl_index) +{ + char *actions = set->ht_key.enc_actions; + + mlxsw_afa_set_type_set(actions, MLXSW_AFA_SET_TYPE_NEXT); + mlxsw_afa_set_next_action_set_ptr_set(actions, next_set_kvdl_index); +} + +static struct mlxsw_afa_set *mlxsw_afa_set_create(bool is_first) +{ + struct mlxsw_afa_set *set; + + set = kzalloc(sizeof(*set), GFP_KERNEL); + if (!set) + return NULL; + /* Need to initialize the set to pass by default */ + mlxsw_afa_set_goto_set(set, MLXSW_AFA_SET_GOTO_BINDING_CMD_TERM, 0); + set->ht_key.is_first = is_first; + set->ref_count = 1; + return set; +} + +static void mlxsw_afa_set_destroy(struct mlxsw_afa_set *set) +{ + kfree(set); +} + +static int mlxsw_afa_set_share(struct mlxsw_afa *mlxsw_afa, + struct mlxsw_afa_set *set) +{ + int err; + + err = rhashtable_insert_fast(&mlxsw_afa->set_ht, &set->ht_node, + mlxsw_afa_set_ht_params); + if (err) + return err; + err = mlxsw_afa->ops->kvdl_set_add(mlxsw_afa->ops_priv, + &set->kvdl_index, + set->ht_key.enc_actions, + set->ht_key.is_first); + if (err) + goto err_kvdl_set_add; + set->shared = true; + set->prev = NULL; + return 0; + +err_kvdl_set_add: + rhashtable_remove_fast(&mlxsw_afa->set_ht, &set->ht_node, + mlxsw_afa_set_ht_params); + return err; +} + +static void mlxsw_afa_set_unshare(struct mlxsw_afa *mlxsw_afa, + struct mlxsw_afa_set *set) +{ + mlxsw_afa->ops->kvdl_set_del(mlxsw_afa->ops_priv, + set->kvdl_index, + set->ht_key.is_first); + rhashtable_remove_fast(&mlxsw_afa->set_ht, &set->ht_node, + mlxsw_afa_set_ht_params); + set->shared = false; +} + +static void mlxsw_afa_set_put(struct mlxsw_afa *mlxsw_afa, + struct mlxsw_afa_set *set) +{ + if (--set->ref_count) + return; + if (set->shared) + mlxsw_afa_set_unshare(mlxsw_afa, set); + mlxsw_afa_set_destroy(set); +} + +static struct mlxsw_afa_set *mlxsw_afa_set_get(struct mlxsw_afa *mlxsw_afa, + struct mlxsw_afa_set *orig_set) +{ + struct mlxsw_afa_set *set; + int err; + + /* There is a hashtable of sets maintained. If a set with the exact + * same encoding exists, we reuse it. Otherwise, the current set + * is shared by making it available to others using the hash table. + */ + set = rhashtable_lookup_fast(&mlxsw_afa->set_ht, &orig_set->ht_key, + mlxsw_afa_set_ht_params); + if (set) { + set->ref_count++; + mlxsw_afa_set_put(mlxsw_afa, orig_set); + } else { + set = orig_set; + err = mlxsw_afa_set_share(mlxsw_afa, set); + if (err) + return ERR_PTR(err); + } + return set; +} + +/* Block structure holds a list of action sets. One action block + * represents one chain of actions executed upon match of a rule. + */ + +struct mlxsw_afa_block { + struct mlxsw_afa *afa; + bool finished; + struct mlxsw_afa_set *first_set; + struct mlxsw_afa_set *cur_set; + unsigned int cur_act_index; /* In current set. */ + struct list_head resource_list; /* List of resources held by actions + * in this block. + */ +}; + +struct mlxsw_afa_resource { + struct list_head list; + void (*destructor)(struct mlxsw_afa_block *block, + struct mlxsw_afa_resource *resource); +}; + +static void mlxsw_afa_resource_add(struct mlxsw_afa_block *block, + struct mlxsw_afa_resource *resource) +{ + list_add(&resource->list, &block->resource_list); +} + +static void mlxsw_afa_resource_del(struct mlxsw_afa_resource *resource) +{ + list_del(&resource->list); +} + +static void mlxsw_afa_resources_destroy(struct mlxsw_afa_block *block) +{ + struct mlxsw_afa_resource *resource, *tmp; + + list_for_each_entry_safe(resource, tmp, &block->resource_list, list) { + resource->destructor(block, resource); + } +} + +struct mlxsw_afa_block *mlxsw_afa_block_create(struct mlxsw_afa *mlxsw_afa) +{ + struct mlxsw_afa_block *block; + + block = kzalloc(sizeof(*block), GFP_KERNEL); + if (!block) + return ERR_PTR(-ENOMEM); + INIT_LIST_HEAD(&block->resource_list); + block->afa = mlxsw_afa; + + /* At least one action set is always present, so just create it here */ + block->first_set = mlxsw_afa_set_create(true); + if (!block->first_set) + goto err_first_set_create; + + /* In case user instructs to have dummy first set, we leave it + * empty here and create another, real, set right away. + */ + if (mlxsw_afa->ops->dummy_first_set) { + block->cur_set = mlxsw_afa_set_create(false); + if (!block->cur_set) + goto err_second_set_create; + block->cur_set->prev = block->first_set; + block->first_set->next = block->cur_set; + } else { + block->cur_set = block->first_set; + } + + return block; + +err_second_set_create: + mlxsw_afa_set_destroy(block->first_set); +err_first_set_create: + kfree(block); + return ERR_PTR(-ENOMEM); +} +EXPORT_SYMBOL(mlxsw_afa_block_create); + +void mlxsw_afa_block_destroy(struct mlxsw_afa_block *block) +{ + struct mlxsw_afa_set *set = block->first_set; + struct mlxsw_afa_set *next_set; + + do { + next_set = set->next; + mlxsw_afa_set_put(block->afa, set); + set = next_set; + } while (set); + mlxsw_afa_resources_destroy(block); + kfree(block); +} +EXPORT_SYMBOL(mlxsw_afa_block_destroy); + +int mlxsw_afa_block_commit(struct mlxsw_afa_block *block) +{ + struct mlxsw_afa_set *set = block->cur_set; + struct mlxsw_afa_set *prev_set; + + block->cur_set = NULL; + block->finished = true; + + /* Go over all linked sets starting from last + * and try to find existing set in the hash table. + * In case it is not there, assign a KVD linear index + * and insert it. + */ + do { + prev_set = set->prev; + set = mlxsw_afa_set_get(block->afa, set); + if (IS_ERR(set)) + /* No rollback is needed since the chain is + * in consistent state and mlxsw_afa_block_destroy + * will take care of putting it away. + */ + return PTR_ERR(set); + if (prev_set) { + prev_set->next = set; + mlxsw_afa_set_next_set(prev_set, set->kvdl_index); + set = prev_set; + } + } while (prev_set); + + block->first_set = set; + return 0; +} +EXPORT_SYMBOL(mlxsw_afa_block_commit); + +char *mlxsw_afa_block_first_set(struct mlxsw_afa_block *block) +{ + return block->first_set->ht_key.enc_actions; +} +EXPORT_SYMBOL(mlxsw_afa_block_first_set); + +char *mlxsw_afa_block_cur_set(struct mlxsw_afa_block *block) +{ + return block->cur_set->ht_key.enc_actions; +} +EXPORT_SYMBOL(mlxsw_afa_block_cur_set); + +u32 mlxsw_afa_block_first_kvdl_index(struct mlxsw_afa_block *block) +{ + /* First set is never in KVD linear. So the first set + * with valid KVD linear index is always the second one. + */ + if (WARN_ON(!block->first_set->next)) + return 0; + return block->first_set->next->kvdl_index; +} +EXPORT_SYMBOL(mlxsw_afa_block_first_kvdl_index); + +int mlxsw_afa_block_activity_get(struct mlxsw_afa_block *block, bool *activity) +{ + u32 kvdl_index = mlxsw_afa_block_first_kvdl_index(block); + + return block->afa->ops->kvdl_set_activity_get(block->afa->ops_priv, + kvdl_index, activity); +} +EXPORT_SYMBOL(mlxsw_afa_block_activity_get); + +int mlxsw_afa_block_continue(struct mlxsw_afa_block *block) +{ + if (block->finished) + return -EINVAL; + mlxsw_afa_set_goto_set(block->cur_set, + MLXSW_AFA_SET_GOTO_BINDING_CMD_NONE, 0); + block->finished = true; + return 0; +} +EXPORT_SYMBOL(mlxsw_afa_block_continue); + +int mlxsw_afa_block_jump(struct mlxsw_afa_block *block, u16 group_id) +{ + if (block->finished) + return -EINVAL; + mlxsw_afa_set_goto_set(block->cur_set, + MLXSW_AFA_SET_GOTO_BINDING_CMD_JUMP, group_id); + block->finished = true; + return 0; +} +EXPORT_SYMBOL(mlxsw_afa_block_jump); + +int mlxsw_afa_block_terminate(struct mlxsw_afa_block *block) +{ + if (block->finished) + return -EINVAL; + mlxsw_afa_set_goto_set(block->cur_set, + MLXSW_AFA_SET_GOTO_BINDING_CMD_TERM, 0); + block->finished = true; + return 0; +} +EXPORT_SYMBOL(mlxsw_afa_block_terminate); + +static struct mlxsw_afa_fwd_entry * +mlxsw_afa_fwd_entry_create(struct mlxsw_afa *mlxsw_afa, u16 local_port) +{ + struct mlxsw_afa_fwd_entry *fwd_entry; + int err; + + fwd_entry = kzalloc(sizeof(*fwd_entry), GFP_KERNEL); + if (!fwd_entry) + return ERR_PTR(-ENOMEM); + fwd_entry->ht_key.local_port = local_port; + fwd_entry->ref_count = 1; + + err = rhashtable_insert_fast(&mlxsw_afa->fwd_entry_ht, + &fwd_entry->ht_node, + mlxsw_afa_fwd_entry_ht_params); + if (err) + goto err_rhashtable_insert; + + err = mlxsw_afa->ops->kvdl_fwd_entry_add(mlxsw_afa->ops_priv, + &fwd_entry->kvdl_index, + local_port); + if (err) + goto err_kvdl_fwd_entry_add; + return fwd_entry; + +err_kvdl_fwd_entry_add: + rhashtable_remove_fast(&mlxsw_afa->fwd_entry_ht, &fwd_entry->ht_node, + mlxsw_afa_fwd_entry_ht_params); +err_rhashtable_insert: + kfree(fwd_entry); + return ERR_PTR(err); +} + +static void mlxsw_afa_fwd_entry_destroy(struct mlxsw_afa *mlxsw_afa, + struct mlxsw_afa_fwd_entry *fwd_entry) +{ + mlxsw_afa->ops->kvdl_fwd_entry_del(mlxsw_afa->ops_priv, + fwd_entry->kvdl_index); + rhashtable_remove_fast(&mlxsw_afa->fwd_entry_ht, &fwd_entry->ht_node, + mlxsw_afa_fwd_entry_ht_params); + kfree(fwd_entry); +} + +static struct mlxsw_afa_fwd_entry * +mlxsw_afa_fwd_entry_get(struct mlxsw_afa *mlxsw_afa, u16 local_port) +{ + struct mlxsw_afa_fwd_entry_ht_key ht_key = {0}; + struct mlxsw_afa_fwd_entry *fwd_entry; + + ht_key.local_port = local_port; + fwd_entry = rhashtable_lookup_fast(&mlxsw_afa->fwd_entry_ht, &ht_key, + mlxsw_afa_fwd_entry_ht_params); + if (fwd_entry) { + fwd_entry->ref_count++; + return fwd_entry; + } + return mlxsw_afa_fwd_entry_create(mlxsw_afa, local_port); +} + +static void mlxsw_afa_fwd_entry_put(struct mlxsw_afa *mlxsw_afa, + struct mlxsw_afa_fwd_entry *fwd_entry) +{ + if (--fwd_entry->ref_count) + return; + mlxsw_afa_fwd_entry_destroy(mlxsw_afa, fwd_entry); +} + +struct mlxsw_afa_fwd_entry_ref { + struct mlxsw_afa_resource resource; + struct mlxsw_afa_fwd_entry *fwd_entry; +}; + +static void +mlxsw_afa_fwd_entry_ref_destroy(struct mlxsw_afa_block *block, + struct mlxsw_afa_fwd_entry_ref *fwd_entry_ref) +{ + mlxsw_afa_resource_del(&fwd_entry_ref->resource); + mlxsw_afa_fwd_entry_put(block->afa, fwd_entry_ref->fwd_entry); + kfree(fwd_entry_ref); +} + +static void +mlxsw_afa_fwd_entry_ref_destructor(struct mlxsw_afa_block *block, + struct mlxsw_afa_resource *resource) +{ + struct mlxsw_afa_fwd_entry_ref *fwd_entry_ref; + + fwd_entry_ref = container_of(resource, struct mlxsw_afa_fwd_entry_ref, + resource); + mlxsw_afa_fwd_entry_ref_destroy(block, fwd_entry_ref); +} + +static struct mlxsw_afa_fwd_entry_ref * +mlxsw_afa_fwd_entry_ref_create(struct mlxsw_afa_block *block, u16 local_port) +{ + struct mlxsw_afa_fwd_entry_ref *fwd_entry_ref; + struct mlxsw_afa_fwd_entry *fwd_entry; + int err; + + fwd_entry_ref = kzalloc(sizeof(*fwd_entry_ref), GFP_KERNEL); + if (!fwd_entry_ref) + return ERR_PTR(-ENOMEM); + fwd_entry = mlxsw_afa_fwd_entry_get(block->afa, local_port); + if (IS_ERR(fwd_entry)) { + err = PTR_ERR(fwd_entry); + goto err_fwd_entry_get; + } + fwd_entry_ref->fwd_entry = fwd_entry; + fwd_entry_ref->resource.destructor = mlxsw_afa_fwd_entry_ref_destructor; + mlxsw_afa_resource_add(block, &fwd_entry_ref->resource); + return fwd_entry_ref; + +err_fwd_entry_get: + kfree(fwd_entry_ref); + return ERR_PTR(err); +} + +struct mlxsw_afa_counter { + struct mlxsw_afa_resource resource; + u32 counter_index; +}; + +static void +mlxsw_afa_counter_destroy(struct mlxsw_afa_block *block, + struct mlxsw_afa_counter *counter) +{ + mlxsw_afa_resource_del(&counter->resource); + block->afa->ops->counter_index_put(block->afa->ops_priv, + counter->counter_index); + kfree(counter); +} + +static void +mlxsw_afa_counter_destructor(struct mlxsw_afa_block *block, + struct mlxsw_afa_resource *resource) +{ + struct mlxsw_afa_counter *counter; + + counter = container_of(resource, struct mlxsw_afa_counter, resource); + mlxsw_afa_counter_destroy(block, counter); +} + +static struct mlxsw_afa_counter * +mlxsw_afa_counter_create(struct mlxsw_afa_block *block) +{ + struct mlxsw_afa_counter *counter; + int err; + + counter = kzalloc(sizeof(*counter), GFP_KERNEL); + if (!counter) + return ERR_PTR(-ENOMEM); + + err = block->afa->ops->counter_index_get(block->afa->ops_priv, + &counter->counter_index); + if (err) + goto err_counter_index_get; + counter->resource.destructor = mlxsw_afa_counter_destructor; + mlxsw_afa_resource_add(block, &counter->resource); + return counter; + +err_counter_index_get: + kfree(counter); + return ERR_PTR(err); +} + +/* 20 bits is a maximum that hardware can handle in trap with userdef action + * and carry along with the trapped packet. + */ +#define MLXSW_AFA_COOKIE_INDEX_BITS 20 +#define MLXSW_AFA_COOKIE_INDEX_MAX ((1 << MLXSW_AFA_COOKIE_INDEX_BITS) - 1) + +static struct mlxsw_afa_cookie * +mlxsw_afa_cookie_create(struct mlxsw_afa *mlxsw_afa, + const struct flow_action_cookie *fa_cookie) +{ + struct mlxsw_afa_cookie *cookie; + u32 cookie_index; + int err; + + cookie = kzalloc(sizeof(*cookie) + fa_cookie->cookie_len, GFP_KERNEL); + if (!cookie) + return ERR_PTR(-ENOMEM); + refcount_set(&cookie->ref_count, 1); + cookie->fa_cookie = *fa_cookie; + memcpy(cookie->fa_cookie.cookie, fa_cookie->cookie, + fa_cookie->cookie_len); + + err = rhashtable_insert_fast(&mlxsw_afa->cookie_ht, &cookie->ht_node, + mlxsw_afa_cookie_ht_params); + if (err) + goto err_rhashtable_insert; + + /* Start cookie indexes with 1. Leave the 0 index unused. Packets + * that come from the HW which are not dropped by drop-with-cookie + * action are going to pass cookie_index 0 to lookup. + */ + cookie_index = 1; + err = idr_alloc_u32(&mlxsw_afa->cookie_idr, cookie, &cookie_index, + MLXSW_AFA_COOKIE_INDEX_MAX, GFP_KERNEL); + if (err) + goto err_idr_alloc; + cookie->cookie_index = cookie_index; + return cookie; + +err_idr_alloc: + rhashtable_remove_fast(&mlxsw_afa->cookie_ht, &cookie->ht_node, + mlxsw_afa_cookie_ht_params); +err_rhashtable_insert: + kfree(cookie); + return ERR_PTR(err); +} + +static void mlxsw_afa_cookie_destroy(struct mlxsw_afa *mlxsw_afa, + struct mlxsw_afa_cookie *cookie) +{ + idr_remove(&mlxsw_afa->cookie_idr, cookie->cookie_index); + rhashtable_remove_fast(&mlxsw_afa->cookie_ht, &cookie->ht_node, + mlxsw_afa_cookie_ht_params); + kfree_rcu(cookie, rcu); +} + +static struct mlxsw_afa_cookie * +mlxsw_afa_cookie_get(struct mlxsw_afa *mlxsw_afa, + const struct flow_action_cookie *fa_cookie) +{ + struct mlxsw_afa_cookie *cookie; + + cookie = rhashtable_lookup_fast(&mlxsw_afa->cookie_ht, fa_cookie, + mlxsw_afa_cookie_ht_params); + if (cookie) { + refcount_inc(&cookie->ref_count); + return cookie; + } + return mlxsw_afa_cookie_create(mlxsw_afa, fa_cookie); +} + +static void mlxsw_afa_cookie_put(struct mlxsw_afa *mlxsw_afa, + struct mlxsw_afa_cookie *cookie) +{ + if (!refcount_dec_and_test(&cookie->ref_count)) + return; + mlxsw_afa_cookie_destroy(mlxsw_afa, cookie); +} + +/* RCU read lock must be held */ +const struct flow_action_cookie * +mlxsw_afa_cookie_lookup(struct mlxsw_afa *mlxsw_afa, u32 cookie_index) +{ + struct mlxsw_afa_cookie *cookie; + + /* 0 index means no cookie */ + if (!cookie_index) + return NULL; + cookie = idr_find(&mlxsw_afa->cookie_idr, cookie_index); + if (!cookie) + return NULL; + return &cookie->fa_cookie; +} +EXPORT_SYMBOL(mlxsw_afa_cookie_lookup); + +struct mlxsw_afa_cookie_ref { + struct mlxsw_afa_resource resource; + struct mlxsw_afa_cookie *cookie; +}; + +static void +mlxsw_afa_cookie_ref_destroy(struct mlxsw_afa_block *block, + struct mlxsw_afa_cookie_ref *cookie_ref) +{ + mlxsw_afa_resource_del(&cookie_ref->resource); + mlxsw_afa_cookie_put(block->afa, cookie_ref->cookie); + kfree(cookie_ref); +} + +static void +mlxsw_afa_cookie_ref_destructor(struct mlxsw_afa_block *block, + struct mlxsw_afa_resource *resource) +{ + struct mlxsw_afa_cookie_ref *cookie_ref; + + cookie_ref = container_of(resource, struct mlxsw_afa_cookie_ref, + resource); + mlxsw_afa_cookie_ref_destroy(block, cookie_ref); +} + +static struct mlxsw_afa_cookie_ref * +mlxsw_afa_cookie_ref_create(struct mlxsw_afa_block *block, + const struct flow_action_cookie *fa_cookie) +{ + struct mlxsw_afa_cookie_ref *cookie_ref; + struct mlxsw_afa_cookie *cookie; + int err; + + cookie_ref = kzalloc(sizeof(*cookie_ref), GFP_KERNEL); + if (!cookie_ref) + return ERR_PTR(-ENOMEM); + cookie = mlxsw_afa_cookie_get(block->afa, fa_cookie); + if (IS_ERR(cookie)) { + err = PTR_ERR(cookie); + goto err_cookie_get; + } + cookie_ref->cookie = cookie; + cookie_ref->resource.destructor = mlxsw_afa_cookie_ref_destructor; + mlxsw_afa_resource_add(block, &cookie_ref->resource); + return cookie_ref; + +err_cookie_get: + kfree(cookie_ref); + return ERR_PTR(err); +} + +static struct mlxsw_afa_policer * +mlxsw_afa_policer_create(struct mlxsw_afa *mlxsw_afa, u32 fa_index, + u64 rate_bytes_ps, u32 burst, + struct netlink_ext_ack *extack) +{ + struct mlxsw_afa_policer *policer; + int err; + + policer = kzalloc(sizeof(*policer), GFP_KERNEL); + if (!policer) + return ERR_PTR(-ENOMEM); + + err = mlxsw_afa->ops->policer_add(mlxsw_afa->ops_priv, rate_bytes_ps, + burst, &policer->policer_index, + extack); + if (err) + goto err_policer_add; + + refcount_set(&policer->ref_count, 1); + policer->fa_index = fa_index; + + err = rhashtable_insert_fast(&mlxsw_afa->policer_ht, &policer->ht_node, + mlxsw_afa_policer_ht_params); + if (err) + goto err_rhashtable_insert; + + list_add_tail(&policer->list, &mlxsw_afa->policer_list); + + return policer; + +err_rhashtable_insert: + mlxsw_afa->ops->policer_del(mlxsw_afa->ops_priv, + policer->policer_index); +err_policer_add: + kfree(policer); + return ERR_PTR(err); +} + +static void mlxsw_afa_policer_destroy(struct mlxsw_afa *mlxsw_afa, + struct mlxsw_afa_policer *policer) +{ + list_del(&policer->list); + rhashtable_remove_fast(&mlxsw_afa->policer_ht, &policer->ht_node, + mlxsw_afa_policer_ht_params); + mlxsw_afa->ops->policer_del(mlxsw_afa->ops_priv, + policer->policer_index); + kfree(policer); +} + +static struct mlxsw_afa_policer * +mlxsw_afa_policer_get(struct mlxsw_afa *mlxsw_afa, u32 fa_index, + u64 rate_bytes_ps, u32 burst, + struct netlink_ext_ack *extack) +{ + struct mlxsw_afa_policer *policer; + + policer = rhashtable_lookup_fast(&mlxsw_afa->policer_ht, &fa_index, + mlxsw_afa_policer_ht_params); + if (policer) { + refcount_inc(&policer->ref_count); + return policer; + } + + return mlxsw_afa_policer_create(mlxsw_afa, fa_index, rate_bytes_ps, + burst, extack); +} + +static void mlxsw_afa_policer_put(struct mlxsw_afa *mlxsw_afa, + struct mlxsw_afa_policer *policer) +{ + if (!refcount_dec_and_test(&policer->ref_count)) + return; + mlxsw_afa_policer_destroy(mlxsw_afa, policer); +} + +struct mlxsw_afa_policer_ref { + struct mlxsw_afa_resource resource; + struct mlxsw_afa_policer *policer; +}; + +static void +mlxsw_afa_policer_ref_destroy(struct mlxsw_afa_block *block, + struct mlxsw_afa_policer_ref *policer_ref) +{ + mlxsw_afa_resource_del(&policer_ref->resource); + mlxsw_afa_policer_put(block->afa, policer_ref->policer); + kfree(policer_ref); +} + +static void +mlxsw_afa_policer_ref_destructor(struct mlxsw_afa_block *block, + struct mlxsw_afa_resource *resource) +{ + struct mlxsw_afa_policer_ref *policer_ref; + + policer_ref = container_of(resource, struct mlxsw_afa_policer_ref, + resource); + mlxsw_afa_policer_ref_destroy(block, policer_ref); +} + +static struct mlxsw_afa_policer_ref * +mlxsw_afa_policer_ref_create(struct mlxsw_afa_block *block, u32 fa_index, + u64 rate_bytes_ps, u32 burst, + struct netlink_ext_ack *extack) +{ + struct mlxsw_afa_policer_ref *policer_ref; + struct mlxsw_afa_policer *policer; + int err; + + policer_ref = kzalloc(sizeof(*policer_ref), GFP_KERNEL); + if (!policer_ref) + return ERR_PTR(-ENOMEM); + + policer = mlxsw_afa_policer_get(block->afa, fa_index, rate_bytes_ps, + burst, extack); + if (IS_ERR(policer)) { + err = PTR_ERR(policer); + goto err_policer_get; + } + + policer_ref->policer = policer; + policer_ref->resource.destructor = mlxsw_afa_policer_ref_destructor; + mlxsw_afa_resource_add(block, &policer_ref->resource); + + return policer_ref; + +err_policer_get: + kfree(policer_ref); + return ERR_PTR(err); +} + +#define MLXSW_AFA_ONE_ACTION_LEN 32 +#define MLXSW_AFA_PAYLOAD_OFFSET 4 + +enum mlxsw_afa_action_type { + MLXSW_AFA_ACTION_TYPE_TRAP, + MLXSW_AFA_ACTION_TYPE_POLICE, + MLXSW_AFA_ACTION_TYPE_OTHER, +}; + +static bool +mlxsw_afa_block_need_split(const struct mlxsw_afa_block *block, + enum mlxsw_afa_action_type type) +{ + struct mlxsw_afa_set *cur_set = block->cur_set; + + /* Due to a hardware limitation, police action cannot be in the same + * action set with MLXSW_AFA_TRAP_CODE or MLXSW_AFA_TRAPWU_CODE + * actions. Work around this limitation by creating a new action set + * and place the new action there. + */ + return (cur_set->has_trap && type == MLXSW_AFA_ACTION_TYPE_POLICE) || + (cur_set->has_police && type == MLXSW_AFA_ACTION_TYPE_TRAP); +} + +static char *mlxsw_afa_block_append_action_ext(struct mlxsw_afa_block *block, + u8 action_code, u8 action_size, + enum mlxsw_afa_action_type type) +{ + char *oneact; + char *actions; + + if (block->finished) + return ERR_PTR(-EINVAL); + if (block->cur_act_index + action_size > block->afa->max_acts_per_set || + mlxsw_afa_block_need_split(block, type)) { + struct mlxsw_afa_set *set; + + /* The appended action won't fit into the current action set, + * so create a new set. + */ + set = mlxsw_afa_set_create(false); + if (!set) + return ERR_PTR(-ENOBUFS); + set->prev = block->cur_set; + block->cur_act_index = 0; + block->cur_set->next = set; + block->cur_set = set; + } + + switch (type) { + case MLXSW_AFA_ACTION_TYPE_TRAP: + block->cur_set->has_trap = true; + break; + case MLXSW_AFA_ACTION_TYPE_POLICE: + block->cur_set->has_police = true; + break; + default: + break; + } + + actions = block->cur_set->ht_key.enc_actions; + oneact = actions + block->cur_act_index * MLXSW_AFA_ONE_ACTION_LEN; + block->cur_act_index += action_size; + mlxsw_afa_all_action_type_set(oneact, action_code); + return oneact + MLXSW_AFA_PAYLOAD_OFFSET; +} + +static char *mlxsw_afa_block_append_action(struct mlxsw_afa_block *block, + u8 action_code, u8 action_size) +{ + return mlxsw_afa_block_append_action_ext(block, action_code, + action_size, + MLXSW_AFA_ACTION_TYPE_OTHER); +} + +/* VLAN Action + * ----------- + * VLAN action is used for manipulating VLANs. It can be used to implement QinQ, + * VLAN translation, change of PCP bits of the VLAN tag, push, pop as swap VLANs + * and more. + */ + +#define MLXSW_AFA_VLAN_CODE 0x02 +#define MLXSW_AFA_VLAN_SIZE 1 + +enum mlxsw_afa_vlan_vlan_tag_cmd { + MLXSW_AFA_VLAN_VLAN_TAG_CMD_NOP, + MLXSW_AFA_VLAN_VLAN_TAG_CMD_PUSH_TAG, + MLXSW_AFA_VLAN_VLAN_TAG_CMD_POP_TAG, +}; + +enum mlxsw_afa_vlan_cmd { + MLXSW_AFA_VLAN_CMD_NOP, + MLXSW_AFA_VLAN_CMD_SET_OUTER, + MLXSW_AFA_VLAN_CMD_SET_INNER, + MLXSW_AFA_VLAN_CMD_COPY_OUTER_TO_INNER, + MLXSW_AFA_VLAN_CMD_COPY_INNER_TO_OUTER, + MLXSW_AFA_VLAN_CMD_SWAP, +}; + +/* afa_vlan_vlan_tag_cmd + * Tag command: push, pop, nop VLAN header. + */ +MLXSW_ITEM32(afa, vlan, vlan_tag_cmd, 0x00, 29, 3); + +/* afa_vlan_vid_cmd */ +MLXSW_ITEM32(afa, vlan, vid_cmd, 0x04, 29, 3); + +/* afa_vlan_vid */ +MLXSW_ITEM32(afa, vlan, vid, 0x04, 0, 12); + +/* afa_vlan_ethertype_cmd */ +MLXSW_ITEM32(afa, vlan, ethertype_cmd, 0x08, 29, 3); + +/* afa_vlan_ethertype + * Index to EtherTypes in Switch VLAN EtherType Register (SVER). + */ +MLXSW_ITEM32(afa, vlan, ethertype, 0x08, 24, 3); + +/* afa_vlan_pcp_cmd */ +MLXSW_ITEM32(afa, vlan, pcp_cmd, 0x08, 13, 3); + +/* afa_vlan_pcp */ +MLXSW_ITEM32(afa, vlan, pcp, 0x08, 8, 3); + +static inline void +mlxsw_afa_vlan_pack(char *payload, + enum mlxsw_afa_vlan_vlan_tag_cmd vlan_tag_cmd, + enum mlxsw_afa_vlan_cmd vid_cmd, u16 vid, + enum mlxsw_afa_vlan_cmd pcp_cmd, u8 pcp, + enum mlxsw_afa_vlan_cmd ethertype_cmd, u8 ethertype) +{ + mlxsw_afa_vlan_vlan_tag_cmd_set(payload, vlan_tag_cmd); + mlxsw_afa_vlan_vid_cmd_set(payload, vid_cmd); + mlxsw_afa_vlan_vid_set(payload, vid); + mlxsw_afa_vlan_pcp_cmd_set(payload, pcp_cmd); + mlxsw_afa_vlan_pcp_set(payload, pcp); + mlxsw_afa_vlan_ethertype_cmd_set(payload, ethertype_cmd); + mlxsw_afa_vlan_ethertype_set(payload, ethertype); +} + +int mlxsw_afa_block_append_vlan_modify(struct mlxsw_afa_block *block, + u16 vid, u8 pcp, u8 et, + struct netlink_ext_ack *extack) +{ + char *act = mlxsw_afa_block_append_action(block, + MLXSW_AFA_VLAN_CODE, + MLXSW_AFA_VLAN_SIZE); + + if (IS_ERR(act)) { + NL_SET_ERR_MSG_MOD(extack, "Cannot append vlan_modify action"); + return PTR_ERR(act); + } + mlxsw_afa_vlan_pack(act, MLXSW_AFA_VLAN_VLAN_TAG_CMD_NOP, + MLXSW_AFA_VLAN_CMD_SET_OUTER, vid, + MLXSW_AFA_VLAN_CMD_SET_OUTER, pcp, + MLXSW_AFA_VLAN_CMD_SET_OUTER, et); + return 0; +} +EXPORT_SYMBOL(mlxsw_afa_block_append_vlan_modify); + +/* Trap Action / Trap With Userdef Action + * -------------------------------------- + * The Trap action enables trapping / mirroring packets to the CPU + * as well as discarding packets. + * The ACL Trap / Discard separates the forward/discard control from CPU + * trap control. In addition, the Trap / Discard action enables activating + * SPAN (port mirroring). + * + * The Trap with userdef action has the same functionality as + * the Trap action with addition of user defined value that can be set + * and used by higher layer applications. + */ + +#define MLXSW_AFA_TRAP_CODE 0x03 +#define MLXSW_AFA_TRAP_SIZE 1 + +#define MLXSW_AFA_TRAPWU_CODE 0x04 +#define MLXSW_AFA_TRAPWU_SIZE 2 + +enum mlxsw_afa_trap_trap_action { + MLXSW_AFA_TRAP_TRAP_ACTION_NOP = 0, + MLXSW_AFA_TRAP_TRAP_ACTION_TRAP = 2, +}; + +/* afa_trap_trap_action + * Trap Action. + */ +MLXSW_ITEM32(afa, trap, trap_action, 0x00, 24, 4); + +enum mlxsw_afa_trap_forward_action { + MLXSW_AFA_TRAP_FORWARD_ACTION_FORWARD = 1, + MLXSW_AFA_TRAP_FORWARD_ACTION_DISCARD = 3, +}; + +/* afa_trap_forward_action + * Forward Action. + */ +MLXSW_ITEM32(afa, trap, forward_action, 0x00, 0, 4); + +/* afa_trap_trap_id + * Trap ID to configure. + */ +MLXSW_ITEM32(afa, trap, trap_id, 0x04, 0, 9); + +/* afa_trap_mirror_agent + * Mirror agent. + */ +MLXSW_ITEM32(afa, trap, mirror_agent, 0x08, 29, 3); + +/* afa_trap_mirror_enable + * Mirror enable. + */ +MLXSW_ITEM32(afa, trap, mirror_enable, 0x08, 24, 1); + +/* user_def_val + * Value for the SW usage. Can be used to pass information of which + * rule has caused a trap. This may be overwritten by later traps. + * This field does a set on the packet's user_def_val only if this + * is the first trap_id or if the trap_id has replaced the previous + * packet's trap_id. + */ +MLXSW_ITEM32(afa, trap, user_def_val, 0x0C, 0, 20); + +static inline void +mlxsw_afa_trap_pack(char *payload, + enum mlxsw_afa_trap_trap_action trap_action, + enum mlxsw_afa_trap_forward_action forward_action, + u16 trap_id) +{ + mlxsw_afa_trap_trap_action_set(payload, trap_action); + mlxsw_afa_trap_forward_action_set(payload, forward_action); + mlxsw_afa_trap_trap_id_set(payload, trap_id); +} + +static inline void +mlxsw_afa_trapwu_pack(char *payload, + enum mlxsw_afa_trap_trap_action trap_action, + enum mlxsw_afa_trap_forward_action forward_action, + u16 trap_id, u32 user_def_val) +{ + mlxsw_afa_trap_pack(payload, trap_action, forward_action, trap_id); + mlxsw_afa_trap_user_def_val_set(payload, user_def_val); +} + +static inline void +mlxsw_afa_trap_mirror_pack(char *payload, bool mirror_enable, + u8 mirror_agent) +{ + mlxsw_afa_trap_mirror_enable_set(payload, mirror_enable); + mlxsw_afa_trap_mirror_agent_set(payload, mirror_agent); +} + +static char *mlxsw_afa_block_append_action_trap(struct mlxsw_afa_block *block, + u8 action_code, u8 action_size) +{ + return mlxsw_afa_block_append_action_ext(block, action_code, + action_size, + MLXSW_AFA_ACTION_TYPE_TRAP); +} + +static int mlxsw_afa_block_append_drop_plain(struct mlxsw_afa_block *block, + bool ingress) +{ + char *act = mlxsw_afa_block_append_action_trap(block, + MLXSW_AFA_TRAP_CODE, + MLXSW_AFA_TRAP_SIZE); + + if (IS_ERR(act)) + return PTR_ERR(act); + mlxsw_afa_trap_pack(act, MLXSW_AFA_TRAP_TRAP_ACTION_TRAP, + MLXSW_AFA_TRAP_FORWARD_ACTION_DISCARD, + ingress ? MLXSW_TRAP_ID_DISCARD_INGRESS_ACL : + MLXSW_TRAP_ID_DISCARD_EGRESS_ACL); + return 0; +} + +static int +mlxsw_afa_block_append_drop_with_cookie(struct mlxsw_afa_block *block, + bool ingress, + const struct flow_action_cookie *fa_cookie, + struct netlink_ext_ack *extack) +{ + struct mlxsw_afa_cookie_ref *cookie_ref; + u32 cookie_index; + char *act; + int err; + + cookie_ref = mlxsw_afa_cookie_ref_create(block, fa_cookie); + if (IS_ERR(cookie_ref)) { + NL_SET_ERR_MSG_MOD(extack, "Cannot create cookie for drop action"); + return PTR_ERR(cookie_ref); + } + cookie_index = cookie_ref->cookie->cookie_index; + + act = mlxsw_afa_block_append_action_trap(block, MLXSW_AFA_TRAPWU_CODE, + MLXSW_AFA_TRAPWU_SIZE); + if (IS_ERR(act)) { + NL_SET_ERR_MSG_MOD(extack, "Cannot append drop with cookie action"); + err = PTR_ERR(act); + goto err_append_action; + } + mlxsw_afa_trapwu_pack(act, MLXSW_AFA_TRAP_TRAP_ACTION_TRAP, + MLXSW_AFA_TRAP_FORWARD_ACTION_DISCARD, + ingress ? MLXSW_TRAP_ID_DISCARD_INGRESS_ACL : + MLXSW_TRAP_ID_DISCARD_EGRESS_ACL, + cookie_index); + return 0; + +err_append_action: + mlxsw_afa_cookie_ref_destroy(block, cookie_ref); + return err; +} + +int mlxsw_afa_block_append_drop(struct mlxsw_afa_block *block, bool ingress, + const struct flow_action_cookie *fa_cookie, + struct netlink_ext_ack *extack) +{ + return fa_cookie ? + mlxsw_afa_block_append_drop_with_cookie(block, ingress, + fa_cookie, extack) : + mlxsw_afa_block_append_drop_plain(block, ingress); +} +EXPORT_SYMBOL(mlxsw_afa_block_append_drop); + +int mlxsw_afa_block_append_trap(struct mlxsw_afa_block *block, u16 trap_id) +{ + char *act = mlxsw_afa_block_append_action_trap(block, + MLXSW_AFA_TRAP_CODE, + MLXSW_AFA_TRAP_SIZE); + + if (IS_ERR(act)) + return PTR_ERR(act); + mlxsw_afa_trap_pack(act, MLXSW_AFA_TRAP_TRAP_ACTION_TRAP, + MLXSW_AFA_TRAP_FORWARD_ACTION_DISCARD, trap_id); + return 0; +} +EXPORT_SYMBOL(mlxsw_afa_block_append_trap); + +int mlxsw_afa_block_append_trap_and_forward(struct mlxsw_afa_block *block, + u16 trap_id) +{ + char *act = mlxsw_afa_block_append_action_trap(block, + MLXSW_AFA_TRAP_CODE, + MLXSW_AFA_TRAP_SIZE); + + if (IS_ERR(act)) + return PTR_ERR(act); + mlxsw_afa_trap_pack(act, MLXSW_AFA_TRAP_TRAP_ACTION_TRAP, + MLXSW_AFA_TRAP_FORWARD_ACTION_FORWARD, trap_id); + return 0; +} +EXPORT_SYMBOL(mlxsw_afa_block_append_trap_and_forward); + +struct mlxsw_afa_mirror { + struct mlxsw_afa_resource resource; + int span_id; + u16 local_in_port; + bool ingress; +}; + +static void +mlxsw_afa_mirror_destroy(struct mlxsw_afa_block *block, + struct mlxsw_afa_mirror *mirror) +{ + mlxsw_afa_resource_del(&mirror->resource); + block->afa->ops->mirror_del(block->afa->ops_priv, + mirror->local_in_port, + mirror->span_id, + mirror->ingress); + kfree(mirror); +} + +static void +mlxsw_afa_mirror_destructor(struct mlxsw_afa_block *block, + struct mlxsw_afa_resource *resource) +{ + struct mlxsw_afa_mirror *mirror; + + mirror = container_of(resource, struct mlxsw_afa_mirror, resource); + mlxsw_afa_mirror_destroy(block, mirror); +} + +static struct mlxsw_afa_mirror * +mlxsw_afa_mirror_create(struct mlxsw_afa_block *block, u16 local_in_port, + const struct net_device *out_dev, bool ingress) +{ + struct mlxsw_afa_mirror *mirror; + int err; + + mirror = kzalloc(sizeof(*mirror), GFP_KERNEL); + if (!mirror) + return ERR_PTR(-ENOMEM); + + err = block->afa->ops->mirror_add(block->afa->ops_priv, + local_in_port, out_dev, + ingress, &mirror->span_id); + if (err) + goto err_mirror_add; + + mirror->ingress = ingress; + mirror->local_in_port = local_in_port; + mirror->resource.destructor = mlxsw_afa_mirror_destructor; + mlxsw_afa_resource_add(block, &mirror->resource); + return mirror; + +err_mirror_add: + kfree(mirror); + return ERR_PTR(err); +} + +static int +mlxsw_afa_block_append_allocated_mirror(struct mlxsw_afa_block *block, + u8 mirror_agent) +{ + char *act = mlxsw_afa_block_append_action_trap(block, + MLXSW_AFA_TRAP_CODE, + MLXSW_AFA_TRAP_SIZE); + + if (IS_ERR(act)) + return PTR_ERR(act); + mlxsw_afa_trap_pack(act, MLXSW_AFA_TRAP_TRAP_ACTION_NOP, + MLXSW_AFA_TRAP_FORWARD_ACTION_FORWARD, 0); + mlxsw_afa_trap_mirror_pack(act, true, mirror_agent); + return 0; +} + +int +mlxsw_afa_block_append_mirror(struct mlxsw_afa_block *block, u16 local_in_port, + const struct net_device *out_dev, bool ingress, + struct netlink_ext_ack *extack) +{ + struct mlxsw_afa_mirror *mirror; + int err; + + mirror = mlxsw_afa_mirror_create(block, local_in_port, out_dev, + ingress); + if (IS_ERR(mirror)) { + NL_SET_ERR_MSG_MOD(extack, "Cannot create mirror action"); + return PTR_ERR(mirror); + } + err = mlxsw_afa_block_append_allocated_mirror(block, mirror->span_id); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Cannot append mirror action"); + goto err_append_allocated_mirror; + } + + return 0; + +err_append_allocated_mirror: + mlxsw_afa_mirror_destroy(block, mirror); + return err; +} +EXPORT_SYMBOL(mlxsw_afa_block_append_mirror); + +/* QoS Action + * ---------- + * The QOS_ACTION is used for manipulating the QoS attributes of a packet. It + * can be used to change the DCSP, ECN, Color and Switch Priority of the packet. + * Note that PCP field can be changed using the VLAN action. + */ + +#define MLXSW_AFA_QOS_CODE 0x06 +#define MLXSW_AFA_QOS_SIZE 1 + +enum mlxsw_afa_qos_ecn_cmd { + /* Do nothing */ + MLXSW_AFA_QOS_ECN_CMD_NOP, + /* Set ECN to afa_qos_ecn */ + MLXSW_AFA_QOS_ECN_CMD_SET, +}; + +/* afa_qos_ecn_cmd + */ +MLXSW_ITEM32(afa, qos, ecn_cmd, 0x04, 29, 3); + +/* afa_qos_ecn + * ECN value. + */ +MLXSW_ITEM32(afa, qos, ecn, 0x04, 24, 2); + +enum mlxsw_afa_qos_dscp_cmd { + /* Do nothing */ + MLXSW_AFA_QOS_DSCP_CMD_NOP, + /* Set DSCP 3 LSB bits according to dscp[2:0] */ + MLXSW_AFA_QOS_DSCP_CMD_SET_3LSB, + /* Set DSCP 3 MSB bits according to dscp[5:3] */ + MLXSW_AFA_QOS_DSCP_CMD_SET_3MSB, + /* Set DSCP 6 bits according to dscp[5:0] */ + MLXSW_AFA_QOS_DSCP_CMD_SET_ALL, +}; + +/* afa_qos_dscp_cmd + * DSCP command. + */ +MLXSW_ITEM32(afa, qos, dscp_cmd, 0x04, 14, 2); + +/* afa_qos_dscp + * DSCP value. + */ +MLXSW_ITEM32(afa, qos, dscp, 0x04, 0, 6); + +enum mlxsw_afa_qos_switch_prio_cmd { + /* Do nothing */ + MLXSW_AFA_QOS_SWITCH_PRIO_CMD_NOP, + /* Set Switch Priority to afa_qos_switch_prio */ + MLXSW_AFA_QOS_SWITCH_PRIO_CMD_SET, +}; + +/* afa_qos_switch_prio_cmd + */ +MLXSW_ITEM32(afa, qos, switch_prio_cmd, 0x08, 14, 2); + +/* afa_qos_switch_prio + * Switch Priority. + */ +MLXSW_ITEM32(afa, qos, switch_prio, 0x08, 0, 4); + +enum mlxsw_afa_qos_dscp_rw { + MLXSW_AFA_QOS_DSCP_RW_PRESERVE, + MLXSW_AFA_QOS_DSCP_RW_SET, + MLXSW_AFA_QOS_DSCP_RW_CLEAR, +}; + +/* afa_qos_dscp_rw + * DSCP Re-write Enable. Controlling the rewrite_enable for DSCP. + */ +MLXSW_ITEM32(afa, qos, dscp_rw, 0x0C, 30, 2); + +static inline void +mlxsw_afa_qos_ecn_pack(char *payload, + enum mlxsw_afa_qos_ecn_cmd ecn_cmd, u8 ecn) +{ + mlxsw_afa_qos_ecn_cmd_set(payload, ecn_cmd); + mlxsw_afa_qos_ecn_set(payload, ecn); +} + +static inline void +mlxsw_afa_qos_dscp_pack(char *payload, + enum mlxsw_afa_qos_dscp_cmd dscp_cmd, u8 dscp) +{ + mlxsw_afa_qos_dscp_cmd_set(payload, dscp_cmd); + mlxsw_afa_qos_dscp_set(payload, dscp); +} + +static inline void +mlxsw_afa_qos_switch_prio_pack(char *payload, + enum mlxsw_afa_qos_switch_prio_cmd prio_cmd, + u8 prio) +{ + mlxsw_afa_qos_switch_prio_cmd_set(payload, prio_cmd); + mlxsw_afa_qos_switch_prio_set(payload, prio); +} + +static int __mlxsw_afa_block_append_qos_dsfield(struct mlxsw_afa_block *block, + bool set_dscp, u8 dscp, + bool set_ecn, u8 ecn, + struct netlink_ext_ack *extack) +{ + char *act = mlxsw_afa_block_append_action(block, + MLXSW_AFA_QOS_CODE, + MLXSW_AFA_QOS_SIZE); + + if (IS_ERR(act)) { + NL_SET_ERR_MSG_MOD(extack, "Cannot append QOS action"); + return PTR_ERR(act); + } + + if (set_ecn) + mlxsw_afa_qos_ecn_pack(act, MLXSW_AFA_QOS_ECN_CMD_SET, ecn); + if (set_dscp) { + mlxsw_afa_qos_dscp_pack(act, MLXSW_AFA_QOS_DSCP_CMD_SET_ALL, + dscp); + mlxsw_afa_qos_dscp_rw_set(act, MLXSW_AFA_QOS_DSCP_RW_CLEAR); + } + + return 0; +} + +int mlxsw_afa_block_append_qos_dsfield(struct mlxsw_afa_block *block, + u8 dsfield, + struct netlink_ext_ack *extack) +{ + return __mlxsw_afa_block_append_qos_dsfield(block, + true, dsfield >> 2, + true, dsfield & 0x03, + extack); +} +EXPORT_SYMBOL(mlxsw_afa_block_append_qos_dsfield); + +int mlxsw_afa_block_append_qos_dscp(struct mlxsw_afa_block *block, + u8 dscp, struct netlink_ext_ack *extack) +{ + return __mlxsw_afa_block_append_qos_dsfield(block, + true, dscp, + false, 0, + extack); +} +EXPORT_SYMBOL(mlxsw_afa_block_append_qos_dscp); + +int mlxsw_afa_block_append_qos_ecn(struct mlxsw_afa_block *block, + u8 ecn, struct netlink_ext_ack *extack) +{ + return __mlxsw_afa_block_append_qos_dsfield(block, + false, 0, + true, ecn, + extack); +} +EXPORT_SYMBOL(mlxsw_afa_block_append_qos_ecn); + +int mlxsw_afa_block_append_qos_switch_prio(struct mlxsw_afa_block *block, + u8 prio, + struct netlink_ext_ack *extack) +{ + char *act = mlxsw_afa_block_append_action(block, + MLXSW_AFA_QOS_CODE, + MLXSW_AFA_QOS_SIZE); + + if (IS_ERR(act)) { + NL_SET_ERR_MSG_MOD(extack, "Cannot append QOS action"); + return PTR_ERR(act); + } + mlxsw_afa_qos_switch_prio_pack(act, MLXSW_AFA_QOS_SWITCH_PRIO_CMD_SET, + prio); + return 0; +} +EXPORT_SYMBOL(mlxsw_afa_block_append_qos_switch_prio); + +/* Forwarding Action + * ----------------- + * Forwarding Action can be used to implement Policy Based Switching (PBS) + * as well as OpenFlow related "Output" action. + */ + +#define MLXSW_AFA_FORWARD_CODE 0x07 +#define MLXSW_AFA_FORWARD_SIZE 1 + +enum mlxsw_afa_forward_type { + /* PBS, Policy Based Switching */ + MLXSW_AFA_FORWARD_TYPE_PBS, + /* Output, OpenFlow output type */ + MLXSW_AFA_FORWARD_TYPE_OUTPUT, +}; + +/* afa_forward_type */ +MLXSW_ITEM32(afa, forward, type, 0x00, 24, 2); + +/* afa_forward_pbs_ptr + * A pointer to the PBS entry configured by PPBS register. + * Reserved when in_port is set. + */ +MLXSW_ITEM32(afa, forward, pbs_ptr, 0x08, 0, 24); + +/* afa_forward_in_port + * Packet is forwarded back to the ingress port. + */ +MLXSW_ITEM32(afa, forward, in_port, 0x0C, 0, 1); + +static inline void +mlxsw_afa_forward_pack(char *payload, enum mlxsw_afa_forward_type type, + u32 pbs_ptr, bool in_port) +{ + mlxsw_afa_forward_type_set(payload, type); + mlxsw_afa_forward_pbs_ptr_set(payload, pbs_ptr); + mlxsw_afa_forward_in_port_set(payload, in_port); +} + +int mlxsw_afa_block_append_fwd(struct mlxsw_afa_block *block, + u16 local_port, bool in_port, + struct netlink_ext_ack *extack) +{ + struct mlxsw_afa_fwd_entry_ref *fwd_entry_ref; + u32 kvdl_index; + char *act; + int err; + + if (in_port) { + NL_SET_ERR_MSG_MOD(extack, "Forwarding to ingress port is not supported"); + return -EOPNOTSUPP; + } + fwd_entry_ref = mlxsw_afa_fwd_entry_ref_create(block, local_port); + if (IS_ERR(fwd_entry_ref)) { + NL_SET_ERR_MSG_MOD(extack, "Cannot create forward action"); + return PTR_ERR(fwd_entry_ref); + } + kvdl_index = fwd_entry_ref->fwd_entry->kvdl_index; + + act = mlxsw_afa_block_append_action(block, MLXSW_AFA_FORWARD_CODE, + MLXSW_AFA_FORWARD_SIZE); + if (IS_ERR(act)) { + NL_SET_ERR_MSG_MOD(extack, "Cannot append forward action"); + err = PTR_ERR(act); + goto err_append_action; + } + mlxsw_afa_forward_pack(act, MLXSW_AFA_FORWARD_TYPE_PBS, + kvdl_index, in_port); + return 0; + +err_append_action: + mlxsw_afa_fwd_entry_ref_destroy(block, fwd_entry_ref); + return err; +} +EXPORT_SYMBOL(mlxsw_afa_block_append_fwd); + +/* Policing and Counting Action + * ---------------------------- + * Policing and Counting action is used for binding policer and counter + * to ACL rules. + */ + +#define MLXSW_AFA_POLCNT_CODE 0x08 +#define MLXSW_AFA_POLCNT_SIZE 1 + +enum { + MLXSW_AFA_POLCNT_COUNTER, + MLXSW_AFA_POLCNT_POLICER, +}; + +/* afa_polcnt_c_p + * Counter or policer. + * Indicates whether the action binds a policer or a counter to the flow. + * 0: Counter + * 1: Policer + */ +MLXSW_ITEM32(afa, polcnt, c_p, 0x00, 31, 1); + +enum mlxsw_afa_polcnt_counter_set_type { + /* No count */ + MLXSW_AFA_POLCNT_COUNTER_SET_TYPE_NO_COUNT = 0x00, + /* Count packets and bytes */ + MLXSW_AFA_POLCNT_COUNTER_SET_TYPE_PACKETS_BYTES = 0x03, + /* Count only packets */ + MLXSW_AFA_POLCNT_COUNTER_SET_TYPE_PACKETS = 0x05, +}; + +/* afa_polcnt_counter_set_type + * Counter set type for flow counters. + */ +MLXSW_ITEM32(afa, polcnt, counter_set_type, 0x04, 24, 8); + +/* afa_polcnt_counter_index + * Counter index for flow counters. + */ +MLXSW_ITEM32(afa, polcnt, counter_index, 0x04, 0, 24); + +/* afa_polcnt_pid + * Policer ID. + * Reserved when c_p = 0 + */ +MLXSW_ITEM32(afa, polcnt, pid, 0x08, 0, 14); + +static inline void +mlxsw_afa_polcnt_pack(char *payload, + enum mlxsw_afa_polcnt_counter_set_type set_type, + u32 counter_index) +{ + mlxsw_afa_polcnt_c_p_set(payload, MLXSW_AFA_POLCNT_COUNTER); + mlxsw_afa_polcnt_counter_set_type_set(payload, set_type); + mlxsw_afa_polcnt_counter_index_set(payload, counter_index); +} + +static void mlxsw_afa_polcnt_policer_pack(char *payload, u16 policer_index) +{ + mlxsw_afa_polcnt_c_p_set(payload, MLXSW_AFA_POLCNT_POLICER); + mlxsw_afa_polcnt_pid_set(payload, policer_index); +} + +int mlxsw_afa_block_append_allocated_counter(struct mlxsw_afa_block *block, + u32 counter_index) +{ + char *act = mlxsw_afa_block_append_action(block, MLXSW_AFA_POLCNT_CODE, + MLXSW_AFA_POLCNT_SIZE); + if (IS_ERR(act)) + return PTR_ERR(act); + mlxsw_afa_polcnt_pack(act, MLXSW_AFA_POLCNT_COUNTER_SET_TYPE_PACKETS_BYTES, + counter_index); + return 0; +} +EXPORT_SYMBOL(mlxsw_afa_block_append_allocated_counter); + +int mlxsw_afa_block_append_counter(struct mlxsw_afa_block *block, + u32 *p_counter_index, + struct netlink_ext_ack *extack) +{ + struct mlxsw_afa_counter *counter; + u32 counter_index; + int err; + + counter = mlxsw_afa_counter_create(block); + if (IS_ERR(counter)) { + NL_SET_ERR_MSG_MOD(extack, "Cannot create count action"); + return PTR_ERR(counter); + } + counter_index = counter->counter_index; + + err = mlxsw_afa_block_append_allocated_counter(block, counter_index); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Cannot append count action"); + goto err_append_allocated_counter; + } + if (p_counter_index) + *p_counter_index = counter_index; + return 0; + +err_append_allocated_counter: + mlxsw_afa_counter_destroy(block, counter); + return err; +} +EXPORT_SYMBOL(mlxsw_afa_block_append_counter); + +int mlxsw_afa_block_append_police(struct mlxsw_afa_block *block, + u32 fa_index, u64 rate_bytes_ps, u32 burst, + u16 *p_policer_index, + struct netlink_ext_ack *extack) +{ + struct mlxsw_afa_policer_ref *policer_ref; + char *act; + int err; + + policer_ref = mlxsw_afa_policer_ref_create(block, fa_index, + rate_bytes_ps, + burst, extack); + if (IS_ERR(policer_ref)) + return PTR_ERR(policer_ref); + *p_policer_index = policer_ref->policer->policer_index; + + act = mlxsw_afa_block_append_action_ext(block, MLXSW_AFA_POLCNT_CODE, + MLXSW_AFA_POLCNT_SIZE, + MLXSW_AFA_ACTION_TYPE_POLICE); + if (IS_ERR(act)) { + NL_SET_ERR_MSG_MOD(extack, "Cannot append police action"); + err = PTR_ERR(act); + goto err_append_action; + } + mlxsw_afa_polcnt_policer_pack(act, *p_policer_index); + + return 0; + +err_append_action: + mlxsw_afa_policer_ref_destroy(block, policer_ref); + return err; +} +EXPORT_SYMBOL(mlxsw_afa_block_append_police); + +/* Virtual Router and Forwarding Domain Action + * ------------------------------------------- + * Virtual Switch action is used for manipulate the Virtual Router (VR), + * MPLS label space and the Forwarding Identifier (FID). + */ + +#define MLXSW_AFA_VIRFWD_CODE 0x0E +#define MLXSW_AFA_VIRFWD_SIZE 1 + +enum mlxsw_afa_virfwd_fid_cmd { + /* Do nothing */ + MLXSW_AFA_VIRFWD_FID_CMD_NOOP, + /* Set the Forwarding Identifier (FID) to fid */ + MLXSW_AFA_VIRFWD_FID_CMD_SET, +}; + +/* afa_virfwd_fid_cmd */ +MLXSW_ITEM32(afa, virfwd, fid_cmd, 0x08, 29, 3); + +/* afa_virfwd_fid + * The FID value. + */ +MLXSW_ITEM32(afa, virfwd, fid, 0x08, 0, 16); + +static inline void mlxsw_afa_virfwd_pack(char *payload, + enum mlxsw_afa_virfwd_fid_cmd fid_cmd, + u16 fid) +{ + mlxsw_afa_virfwd_fid_cmd_set(payload, fid_cmd); + mlxsw_afa_virfwd_fid_set(payload, fid); +} + +int mlxsw_afa_block_append_fid_set(struct mlxsw_afa_block *block, u16 fid, + struct netlink_ext_ack *extack) +{ + char *act = mlxsw_afa_block_append_action(block, + MLXSW_AFA_VIRFWD_CODE, + MLXSW_AFA_VIRFWD_SIZE); + if (IS_ERR(act)) { + NL_SET_ERR_MSG_MOD(extack, "Cannot append fid_set action"); + return PTR_ERR(act); + } + mlxsw_afa_virfwd_pack(act, MLXSW_AFA_VIRFWD_FID_CMD_SET, fid); + return 0; +} +EXPORT_SYMBOL(mlxsw_afa_block_append_fid_set); + +/* MC Routing Action + * ----------------- + * The Multicast router action. Can be used by RMFT_V2 - Router Multicast + * Forwarding Table Version 2 Register. + */ + +#define MLXSW_AFA_MCROUTER_CODE 0x10 +#define MLXSW_AFA_MCROUTER_SIZE 2 + +enum mlxsw_afa_mcrouter_rpf_action { + MLXSW_AFA_MCROUTER_RPF_ACTION_NOP, + MLXSW_AFA_MCROUTER_RPF_ACTION_TRAP, + MLXSW_AFA_MCROUTER_RPF_ACTION_DISCARD_ERROR, +}; + +/* afa_mcrouter_rpf_action */ +MLXSW_ITEM32(afa, mcrouter, rpf_action, 0x00, 28, 3); + +/* afa_mcrouter_expected_irif */ +MLXSW_ITEM32(afa, mcrouter, expected_irif, 0x00, 0, 16); + +/* afa_mcrouter_min_mtu */ +MLXSW_ITEM32(afa, mcrouter, min_mtu, 0x08, 0, 16); + +enum mlxsw_afa_mrouter_vrmid { + MLXSW_AFA_MCROUTER_VRMID_INVALID, + MLXSW_AFA_MCROUTER_VRMID_VALID +}; + +/* afa_mcrouter_vrmid + * Valid RMID: rigr_rmid_index is used as RMID + */ +MLXSW_ITEM32(afa, mcrouter, vrmid, 0x0C, 31, 1); + +/* afa_mcrouter_rigr_rmid_index + * When the vrmid field is set to invalid, the field is used as pointer to + * Router Interface Group (RIGR) Table in the KVD linear. + * When the vrmid is set to valid, the field is used as RMID index, ranged + * from 0 to max_mid - 1. The index is to the Port Group Table. + */ +MLXSW_ITEM32(afa, mcrouter, rigr_rmid_index, 0x0C, 0, 24); + +static inline void +mlxsw_afa_mcrouter_pack(char *payload, + enum mlxsw_afa_mcrouter_rpf_action rpf_action, + u16 expected_irif, u16 min_mtu, + enum mlxsw_afa_mrouter_vrmid vrmid, u32 rigr_rmid_index) + +{ + mlxsw_afa_mcrouter_rpf_action_set(payload, rpf_action); + mlxsw_afa_mcrouter_expected_irif_set(payload, expected_irif); + mlxsw_afa_mcrouter_min_mtu_set(payload, min_mtu); + mlxsw_afa_mcrouter_vrmid_set(payload, vrmid); + mlxsw_afa_mcrouter_rigr_rmid_index_set(payload, rigr_rmid_index); +} + +int mlxsw_afa_block_append_mcrouter(struct mlxsw_afa_block *block, + u16 expected_irif, u16 min_mtu, + bool rmid_valid, u32 kvdl_index) +{ + char *act = mlxsw_afa_block_append_action(block, + MLXSW_AFA_MCROUTER_CODE, + MLXSW_AFA_MCROUTER_SIZE); + if (IS_ERR(act)) + return PTR_ERR(act); + mlxsw_afa_mcrouter_pack(act, MLXSW_AFA_MCROUTER_RPF_ACTION_TRAP, + expected_irif, min_mtu, rmid_valid, kvdl_index); + return 0; +} +EXPORT_SYMBOL(mlxsw_afa_block_append_mcrouter); + +/* SIP DIP Action + * -------------- + * The SIP_DIP_ACTION is used for modifying the SIP and DIP fields of the + * packet, e.g. for NAT. The L3 checksum is updated. Also, if the L4 is TCP or + * if the L4 is UDP and the checksum field is not zero, then the L4 checksum is + * updated. + */ + +#define MLXSW_AFA_IP_CODE 0x11 +#define MLXSW_AFA_IP_SIZE 2 + +enum mlxsw_afa_ip_s_d { + /* ip refers to dip */ + MLXSW_AFA_IP_S_D_DIP, + /* ip refers to sip */ + MLXSW_AFA_IP_S_D_SIP, +}; + +/* afa_ip_s_d + * Source or destination. + */ +MLXSW_ITEM32(afa, ip, s_d, 0x00, 31, 1); + +enum mlxsw_afa_ip_m_l { + /* LSB: ip[63:0] refers to ip[63:0] */ + MLXSW_AFA_IP_M_L_LSB, + /* MSB: ip[63:0] refers to ip[127:64] */ + MLXSW_AFA_IP_M_L_MSB, +}; + +/* afa_ip_m_l + * MSB or LSB. + */ +MLXSW_ITEM32(afa, ip, m_l, 0x00, 30, 1); + +/* afa_ip_ip_63_32 + * Bits [63:32] in the IP address to change to. + */ +MLXSW_ITEM32(afa, ip, ip_63_32, 0x08, 0, 32); + +/* afa_ip_ip_31_0 + * Bits [31:0] in the IP address to change to. + */ +MLXSW_ITEM32(afa, ip, ip_31_0, 0x0C, 0, 32); + +static void mlxsw_afa_ip_pack(char *payload, enum mlxsw_afa_ip_s_d s_d, + enum mlxsw_afa_ip_m_l m_l, u32 ip_31_0, + u32 ip_63_32) +{ + mlxsw_afa_ip_s_d_set(payload, s_d); + mlxsw_afa_ip_m_l_set(payload, m_l); + mlxsw_afa_ip_ip_31_0_set(payload, ip_31_0); + mlxsw_afa_ip_ip_63_32_set(payload, ip_63_32); +} + +int mlxsw_afa_block_append_ip(struct mlxsw_afa_block *block, bool is_dip, + bool is_lsb, u32 val_31_0, u32 val_63_32, + struct netlink_ext_ack *extack) +{ + enum mlxsw_afa_ip_s_d s_d = is_dip ? MLXSW_AFA_IP_S_D_DIP : + MLXSW_AFA_IP_S_D_SIP; + enum mlxsw_afa_ip_m_l m_l = is_lsb ? MLXSW_AFA_IP_M_L_LSB : + MLXSW_AFA_IP_M_L_MSB; + char *act = mlxsw_afa_block_append_action(block, + MLXSW_AFA_IP_CODE, + MLXSW_AFA_IP_SIZE); + + if (IS_ERR(act)) { + NL_SET_ERR_MSG_MOD(extack, "Cannot append IP action"); + return PTR_ERR(act); + } + + mlxsw_afa_ip_pack(act, s_d, m_l, val_31_0, val_63_32); + return 0; +} +EXPORT_SYMBOL(mlxsw_afa_block_append_ip); + +/* L4 Port Action + * -------------- + * The L4_PORT_ACTION is used for modifying the sport and dport fields of the packet, e.g. for NAT. + * If (the L4 is TCP) or if (the L4 is UDP and checksum field!=0) then the L4 checksum is updated. + */ + +#define MLXSW_AFA_L4PORT_CODE 0x12 +#define MLXSW_AFA_L4PORT_SIZE 1 + +enum mlxsw_afa_l4port_s_d { + /* configure src_l4_port */ + MLXSW_AFA_L4PORT_S_D_SRC, + /* configure dst_l4_port */ + MLXSW_AFA_L4PORT_S_D_DST, +}; + +/* afa_l4port_s_d + * Source or destination. + */ +MLXSW_ITEM32(afa, l4port, s_d, 0x00, 31, 1); + +/* afa_l4port_l4_port + * Number of port to change to. + */ +MLXSW_ITEM32(afa, l4port, l4_port, 0x08, 0, 16); + +static void mlxsw_afa_l4port_pack(char *payload, enum mlxsw_afa_l4port_s_d s_d, u16 l4_port) +{ + mlxsw_afa_l4port_s_d_set(payload, s_d); + mlxsw_afa_l4port_l4_port_set(payload, l4_port); +} + +int mlxsw_afa_block_append_l4port(struct mlxsw_afa_block *block, bool is_dport, u16 l4_port, + struct netlink_ext_ack *extack) +{ + enum mlxsw_afa_l4port_s_d s_d = is_dport ? MLXSW_AFA_L4PORT_S_D_DST : + MLXSW_AFA_L4PORT_S_D_SRC; + char *act = mlxsw_afa_block_append_action(block, + MLXSW_AFA_L4PORT_CODE, + MLXSW_AFA_L4PORT_SIZE); + + if (IS_ERR(act)) { + NL_SET_ERR_MSG_MOD(extack, "Cannot append L4_PORT action"); + return PTR_ERR(act); + } + + mlxsw_afa_l4port_pack(act, s_d, l4_port); + return 0; +} +EXPORT_SYMBOL(mlxsw_afa_block_append_l4port); + +/* Mirror Sampler Action + * --------------------- + * The SAMPLER_ACTION is used to mirror packets with a probability (sampling). + */ + +#define MLXSW_AFA_SAMPLER_CODE 0x13 +#define MLXSW_AFA_SAMPLER_SIZE 1 + +/* afa_sampler_mirror_agent + * Mirror (SPAN) agent. + */ +MLXSW_ITEM32(afa, sampler, mirror_agent, 0x04, 0, 3); + +#define MLXSW_AFA_SAMPLER_RATE_MAX (BIT(24) - 1) + +/* afa_sampler_mirror_probability_rate + * Mirroring probability. + * Valid values are 1 to 2^24 - 1 + */ +MLXSW_ITEM32(afa, sampler, mirror_probability_rate, 0x08, 0, 24); + +static void mlxsw_afa_sampler_pack(char *payload, u8 mirror_agent, u32 rate) +{ + mlxsw_afa_sampler_mirror_agent_set(payload, mirror_agent); + mlxsw_afa_sampler_mirror_probability_rate_set(payload, rate); +} + +struct mlxsw_afa_sampler { + struct mlxsw_afa_resource resource; + int span_id; + u16 local_port; + bool ingress; +}; + +static void mlxsw_afa_sampler_destroy(struct mlxsw_afa_block *block, + struct mlxsw_afa_sampler *sampler) +{ + mlxsw_afa_resource_del(&sampler->resource); + block->afa->ops->sampler_del(block->afa->ops_priv, sampler->local_port, + sampler->span_id, sampler->ingress); + kfree(sampler); +} + +static void mlxsw_afa_sampler_destructor(struct mlxsw_afa_block *block, + struct mlxsw_afa_resource *resource) +{ + struct mlxsw_afa_sampler *sampler; + + sampler = container_of(resource, struct mlxsw_afa_sampler, resource); + mlxsw_afa_sampler_destroy(block, sampler); +} + +static struct mlxsw_afa_sampler * +mlxsw_afa_sampler_create(struct mlxsw_afa_block *block, u16 local_port, + struct psample_group *psample_group, u32 rate, + u32 trunc_size, bool truncate, bool ingress, + struct netlink_ext_ack *extack) +{ + struct mlxsw_afa_sampler *sampler; + int err; + + sampler = kzalloc(sizeof(*sampler), GFP_KERNEL); + if (!sampler) + return ERR_PTR(-ENOMEM); + + err = block->afa->ops->sampler_add(block->afa->ops_priv, local_port, + psample_group, rate, trunc_size, + truncate, ingress, &sampler->span_id, + extack); + if (err) + goto err_sampler_add; + + sampler->ingress = ingress; + sampler->local_port = local_port; + sampler->resource.destructor = mlxsw_afa_sampler_destructor; + mlxsw_afa_resource_add(block, &sampler->resource); + return sampler; + +err_sampler_add: + kfree(sampler); + return ERR_PTR(err); +} + +static int +mlxsw_afa_block_append_allocated_sampler(struct mlxsw_afa_block *block, + u8 mirror_agent, u32 rate) +{ + char *act = mlxsw_afa_block_append_action(block, MLXSW_AFA_SAMPLER_CODE, + MLXSW_AFA_SAMPLER_SIZE); + + if (IS_ERR(act)) + return PTR_ERR(act); + mlxsw_afa_sampler_pack(act, mirror_agent, rate); + return 0; +} + +int mlxsw_afa_block_append_sampler(struct mlxsw_afa_block *block, u16 local_port, + struct psample_group *psample_group, + u32 rate, u32 trunc_size, bool truncate, + bool ingress, + struct netlink_ext_ack *extack) +{ + struct mlxsw_afa_sampler *sampler; + int err; + + if (rate > MLXSW_AFA_SAMPLER_RATE_MAX) { + NL_SET_ERR_MSG_MOD(extack, "Sampling rate is too high"); + return -EINVAL; + } + + sampler = mlxsw_afa_sampler_create(block, local_port, psample_group, + rate, trunc_size, truncate, ingress, + extack); + if (IS_ERR(sampler)) + return PTR_ERR(sampler); + + err = mlxsw_afa_block_append_allocated_sampler(block, sampler->span_id, + rate); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Cannot append sampler action"); + goto err_append_allocated_sampler; + } + + return 0; + +err_append_allocated_sampler: + mlxsw_afa_sampler_destroy(block, sampler); + return err; +} +EXPORT_SYMBOL(mlxsw_afa_block_append_sampler); diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h new file mode 100644 index 000000000..db58037be --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h @@ -0,0 +1,110 @@ +/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 */ +/* Copyright (c) 2017-2018 Mellanox Technologies. All rights reserved */ + +#ifndef _MLXSW_CORE_ACL_FLEX_ACTIONS_H +#define _MLXSW_CORE_ACL_FLEX_ACTIONS_H + +#include <linux/types.h> +#include <linux/netdevice.h> +#include <net/flow_offload.h> + +struct mlxsw_afa; +struct mlxsw_afa_block; + +struct mlxsw_afa_ops { + int (*kvdl_set_add)(void *priv, u32 *p_kvdl_index, + char *enc_actions, bool is_first); + void (*kvdl_set_del)(void *priv, u32 kvdl_index, bool is_first); + int (*kvdl_set_activity_get)(void *priv, u32 kvdl_index, + bool *activity); + int (*kvdl_fwd_entry_add)(void *priv, u32 *p_kvdl_index, u16 local_port); + void (*kvdl_fwd_entry_del)(void *priv, u32 kvdl_index); + int (*counter_index_get)(void *priv, unsigned int *p_counter_index); + void (*counter_index_put)(void *priv, unsigned int counter_index); + int (*mirror_add)(void *priv, u16 local_in_port, + const struct net_device *out_dev, + bool ingress, int *p_span_id); + void (*mirror_del)(void *priv, u16 local_in_port, int span_id, + bool ingress); + int (*policer_add)(void *priv, u64 rate_bytes_ps, u32 burst, + u16 *p_policer_index, + struct netlink_ext_ack *extack); + void (*policer_del)(void *priv, u16 policer_index); + int (*sampler_add)(void *priv, u16 local_port, + struct psample_group *psample_group, u32 rate, + u32 trunc_size, bool truncate, bool ingress, + int *p_span_id, struct netlink_ext_ack *extack); + void (*sampler_del)(void *priv, u16 local_port, int span_id, + bool ingress); + bool dummy_first_set; +}; + +struct mlxsw_afa *mlxsw_afa_create(unsigned int max_acts_per_set, + const struct mlxsw_afa_ops *ops, + void *ops_priv); +void mlxsw_afa_destroy(struct mlxsw_afa *mlxsw_afa); +struct mlxsw_afa_block *mlxsw_afa_block_create(struct mlxsw_afa *mlxsw_afa); +void mlxsw_afa_block_destroy(struct mlxsw_afa_block *block); +int mlxsw_afa_block_commit(struct mlxsw_afa_block *block); +char *mlxsw_afa_block_first_set(struct mlxsw_afa_block *block); +char *mlxsw_afa_block_cur_set(struct mlxsw_afa_block *block); +u32 mlxsw_afa_block_first_kvdl_index(struct mlxsw_afa_block *block); +int mlxsw_afa_block_activity_get(struct mlxsw_afa_block *block, bool *activity); +int mlxsw_afa_block_continue(struct mlxsw_afa_block *block); +int mlxsw_afa_block_jump(struct mlxsw_afa_block *block, u16 group_id); +int mlxsw_afa_block_terminate(struct mlxsw_afa_block *block); +const struct flow_action_cookie * +mlxsw_afa_cookie_lookup(struct mlxsw_afa *mlxsw_afa, u32 cookie_index); +int mlxsw_afa_block_append_drop(struct mlxsw_afa_block *block, bool ingress, + const struct flow_action_cookie *fa_cookie, + struct netlink_ext_ack *extack); +int mlxsw_afa_block_append_trap(struct mlxsw_afa_block *block, u16 trap_id); +int mlxsw_afa_block_append_trap_and_forward(struct mlxsw_afa_block *block, + u16 trap_id); +int mlxsw_afa_block_append_mirror(struct mlxsw_afa_block *block, + u16 local_in_port, + const struct net_device *out_dev, + bool ingress, + struct netlink_ext_ack *extack); +int mlxsw_afa_block_append_fwd(struct mlxsw_afa_block *block, + u16 local_port, bool in_port, + struct netlink_ext_ack *extack); +int mlxsw_afa_block_append_vlan_modify(struct mlxsw_afa_block *block, + u16 vid, u8 pcp, u8 et, + struct netlink_ext_ack *extack); +int mlxsw_afa_block_append_qos_switch_prio(struct mlxsw_afa_block *block, + u8 prio, + struct netlink_ext_ack *extack); +int mlxsw_afa_block_append_qos_dsfield(struct mlxsw_afa_block *block, + u8 dsfield, + struct netlink_ext_ack *extack); +int mlxsw_afa_block_append_qos_dscp(struct mlxsw_afa_block *block, + u8 dscp, struct netlink_ext_ack *extack); +int mlxsw_afa_block_append_qos_ecn(struct mlxsw_afa_block *block, + u8 ecn, struct netlink_ext_ack *extack); +int mlxsw_afa_block_append_allocated_counter(struct mlxsw_afa_block *block, + u32 counter_index); +int mlxsw_afa_block_append_counter(struct mlxsw_afa_block *block, + u32 *p_counter_index, + struct netlink_ext_ack *extack); +int mlxsw_afa_block_append_fid_set(struct mlxsw_afa_block *block, u16 fid, + struct netlink_ext_ack *extack); +int mlxsw_afa_block_append_mcrouter(struct mlxsw_afa_block *block, + u16 expected_irif, u16 min_mtu, + bool rmid_valid, u32 kvdl_index); +int mlxsw_afa_block_append_ip(struct mlxsw_afa_block *block, bool is_dip, + bool is_lsb, u32 val_31_0, u32 val_63_32, + struct netlink_ext_ack *extack); +int mlxsw_afa_block_append_l4port(struct mlxsw_afa_block *block, bool is_dport, u16 l4_port, + struct netlink_ext_ack *extack); +int mlxsw_afa_block_append_police(struct mlxsw_afa_block *block, + u32 fa_index, u64 rate_bytes_ps, u32 burst, + u16 *p_policer_index, + struct netlink_ext_ack *extack); +int mlxsw_afa_block_append_sampler(struct mlxsw_afa_block *block, u16 local_port, + struct psample_group *psample_group, + u32 rate, u32 trunc_size, bool truncate, + bool ingress, + struct netlink_ext_ack *extack); + +#endif diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.c b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.c new file mode 100644 index 000000000..f208a237d --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.c @@ -0,0 +1,506 @@ +// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 +/* Copyright (c) 2017-2018 Mellanox Technologies. All rights reserved */ + +#include <linux/kernel.h> +#include <linux/slab.h> +#include <linux/list.h> +#include <linux/errno.h> + +#include "item.h" +#include "core_acl_flex_keys.h" + +/* For the purpose of the driver, define an internal storage scratchpad + * that will be used to store key/mask values. For each defined element type + * define an internal storage geometry. + * + * When adding new elements, MLXSW_AFK_ELEMENT_STORAGE_SIZE must be increased + * accordingly. + */ +static const struct mlxsw_afk_element_info mlxsw_afk_element_infos[] = { + MLXSW_AFK_ELEMENT_INFO_U32(SRC_SYS_PORT, 0x00, 16, 16), + MLXSW_AFK_ELEMENT_INFO_BUF(DMAC_32_47, 0x04, 2), + MLXSW_AFK_ELEMENT_INFO_BUF(DMAC_0_31, 0x06, 4), + MLXSW_AFK_ELEMENT_INFO_BUF(SMAC_32_47, 0x0A, 2), + MLXSW_AFK_ELEMENT_INFO_BUF(SMAC_0_31, 0x0C, 4), + MLXSW_AFK_ELEMENT_INFO_U32(ETHERTYPE, 0x00, 0, 16), + MLXSW_AFK_ELEMENT_INFO_U32(IP_PROTO, 0x10, 0, 8), + MLXSW_AFK_ELEMENT_INFO_U32(VID, 0x10, 8, 12), + MLXSW_AFK_ELEMENT_INFO_U32(PCP, 0x10, 20, 3), + MLXSW_AFK_ELEMENT_INFO_U32(TCP_FLAGS, 0x10, 23, 9), + MLXSW_AFK_ELEMENT_INFO_U32(DST_L4_PORT, 0x14, 0, 16), + MLXSW_AFK_ELEMENT_INFO_U32(SRC_L4_PORT, 0x14, 16, 16), + MLXSW_AFK_ELEMENT_INFO_U32(IP_TTL_, 0x18, 0, 8), + MLXSW_AFK_ELEMENT_INFO_U32(IP_ECN, 0x18, 9, 2), + MLXSW_AFK_ELEMENT_INFO_U32(IP_DSCP, 0x18, 11, 6), + MLXSW_AFK_ELEMENT_INFO_U32(VIRT_ROUTER_MSB, 0x18, 17, 4), + MLXSW_AFK_ELEMENT_INFO_U32(VIRT_ROUTER_LSB, 0x18, 21, 8), + MLXSW_AFK_ELEMENT_INFO_BUF(SRC_IP_96_127, 0x20, 4), + MLXSW_AFK_ELEMENT_INFO_BUF(SRC_IP_64_95, 0x24, 4), + MLXSW_AFK_ELEMENT_INFO_BUF(SRC_IP_32_63, 0x28, 4), + MLXSW_AFK_ELEMENT_INFO_BUF(SRC_IP_0_31, 0x2C, 4), + MLXSW_AFK_ELEMENT_INFO_BUF(DST_IP_96_127, 0x30, 4), + MLXSW_AFK_ELEMENT_INFO_BUF(DST_IP_64_95, 0x34, 4), + MLXSW_AFK_ELEMENT_INFO_BUF(DST_IP_32_63, 0x38, 4), + MLXSW_AFK_ELEMENT_INFO_BUF(DST_IP_0_31, 0x3C, 4), +}; + +struct mlxsw_afk { + struct list_head key_info_list; + unsigned int max_blocks; + const struct mlxsw_afk_ops *ops; + const struct mlxsw_afk_block *blocks; + unsigned int blocks_count; +}; + +static bool mlxsw_afk_blocks_check(struct mlxsw_afk *mlxsw_afk) +{ + int i; + int j; + + for (i = 0; i < mlxsw_afk->blocks_count; i++) { + const struct mlxsw_afk_block *block = &mlxsw_afk->blocks[i]; + + for (j = 0; j < block->instances_count; j++) { + const struct mlxsw_afk_element_info *elinfo; + struct mlxsw_afk_element_inst *elinst; + + elinst = &block->instances[j]; + elinfo = &mlxsw_afk_element_infos[elinst->element]; + if (elinst->type != elinfo->type || + (!elinst->avoid_size_check && + elinst->item.size.bits != + elinfo->item.size.bits)) + return false; + } + } + return true; +} + +struct mlxsw_afk *mlxsw_afk_create(unsigned int max_blocks, + const struct mlxsw_afk_ops *ops) +{ + struct mlxsw_afk *mlxsw_afk; + + mlxsw_afk = kzalloc(sizeof(*mlxsw_afk), GFP_KERNEL); + if (!mlxsw_afk) + return NULL; + INIT_LIST_HEAD(&mlxsw_afk->key_info_list); + mlxsw_afk->max_blocks = max_blocks; + mlxsw_afk->ops = ops; + mlxsw_afk->blocks = ops->blocks; + mlxsw_afk->blocks_count = ops->blocks_count; + WARN_ON(!mlxsw_afk_blocks_check(mlxsw_afk)); + return mlxsw_afk; +} +EXPORT_SYMBOL(mlxsw_afk_create); + +void mlxsw_afk_destroy(struct mlxsw_afk *mlxsw_afk) +{ + WARN_ON(!list_empty(&mlxsw_afk->key_info_list)); + kfree(mlxsw_afk); +} +EXPORT_SYMBOL(mlxsw_afk_destroy); + +struct mlxsw_afk_key_info { + struct list_head list; + unsigned int ref_count; + unsigned int blocks_count; + int element_to_block[MLXSW_AFK_ELEMENT_MAX]; /* index is element, value + * is index inside "blocks" + */ + struct mlxsw_afk_element_usage elusage; + const struct mlxsw_afk_block *blocks[]; +}; + +static bool +mlxsw_afk_key_info_elements_eq(struct mlxsw_afk_key_info *key_info, + struct mlxsw_afk_element_usage *elusage) +{ + return memcmp(&key_info->elusage, elusage, sizeof(*elusage)) == 0; +} + +static struct mlxsw_afk_key_info * +mlxsw_afk_key_info_find(struct mlxsw_afk *mlxsw_afk, + struct mlxsw_afk_element_usage *elusage) +{ + struct mlxsw_afk_key_info *key_info; + + list_for_each_entry(key_info, &mlxsw_afk->key_info_list, list) { + if (mlxsw_afk_key_info_elements_eq(key_info, elusage)) + return key_info; + } + return NULL; +} + +struct mlxsw_afk_picker { + DECLARE_BITMAP(element, MLXSW_AFK_ELEMENT_MAX); + unsigned int total; +}; + +static void mlxsw_afk_picker_count_hits(struct mlxsw_afk *mlxsw_afk, + struct mlxsw_afk_picker *picker, + enum mlxsw_afk_element element) +{ + int i; + int j; + + for (i = 0; i < mlxsw_afk->blocks_count; i++) { + const struct mlxsw_afk_block *block = &mlxsw_afk->blocks[i]; + + for (j = 0; j < block->instances_count; j++) { + struct mlxsw_afk_element_inst *elinst; + + elinst = &block->instances[j]; + if (elinst->element == element) { + __set_bit(element, picker[i].element); + picker[i].total++; + } + } + } +} + +static void mlxsw_afk_picker_subtract_hits(struct mlxsw_afk *mlxsw_afk, + struct mlxsw_afk_picker *picker, + int block_index) +{ + DECLARE_BITMAP(hits_element, MLXSW_AFK_ELEMENT_MAX); + int i; + int j; + + memcpy(&hits_element, &picker[block_index].element, + sizeof(hits_element)); + + for (i = 0; i < mlxsw_afk->blocks_count; i++) { + for_each_set_bit(j, hits_element, MLXSW_AFK_ELEMENT_MAX) { + if (__test_and_clear_bit(j, picker[i].element)) + picker[i].total--; + } + } +} + +static int mlxsw_afk_picker_most_hits_get(struct mlxsw_afk *mlxsw_afk, + struct mlxsw_afk_picker *picker) +{ + int most_index = -EINVAL; /* Should never happen to return this */ + int most_hits = 0; + int i; + + for (i = 0; i < mlxsw_afk->blocks_count; i++) { + if (picker[i].total > most_hits) { + most_hits = picker[i].total; + most_index = i; + } + } + return most_index; +} + +static int mlxsw_afk_picker_key_info_add(struct mlxsw_afk *mlxsw_afk, + struct mlxsw_afk_picker *picker, + int block_index, + struct mlxsw_afk_key_info *key_info) +{ + enum mlxsw_afk_element element; + + if (key_info->blocks_count == mlxsw_afk->max_blocks) + return -EINVAL; + + for_each_set_bit(element, picker[block_index].element, + MLXSW_AFK_ELEMENT_MAX) { + key_info->element_to_block[element] = key_info->blocks_count; + mlxsw_afk_element_usage_add(&key_info->elusage, element); + } + + key_info->blocks[key_info->blocks_count] = + &mlxsw_afk->blocks[block_index]; + key_info->blocks_count++; + return 0; +} + +static int mlxsw_afk_picker(struct mlxsw_afk *mlxsw_afk, + struct mlxsw_afk_key_info *key_info, + struct mlxsw_afk_element_usage *elusage) +{ + struct mlxsw_afk_picker *picker; + enum mlxsw_afk_element element; + int err; + + picker = kcalloc(mlxsw_afk->blocks_count, sizeof(*picker), GFP_KERNEL); + if (!picker) + return -ENOMEM; + + /* Since the same elements could be present in multiple blocks, + * we must find out optimal block list in order to make the + * block count as low as possible. + * + * First, we count hits. We go over all available blocks and count + * how many of requested elements are covered by each. + * + * Then in loop, we find block with most hits and add it to + * output key_info. Then we have to subtract this block hits so + * the next iteration will find most suitable block for + * the rest of requested elements. + */ + + mlxsw_afk_element_usage_for_each(element, elusage) + mlxsw_afk_picker_count_hits(mlxsw_afk, picker, element); + + do { + int block_index; + + block_index = mlxsw_afk_picker_most_hits_get(mlxsw_afk, picker); + if (block_index < 0) { + err = block_index; + goto out; + } + err = mlxsw_afk_picker_key_info_add(mlxsw_afk, picker, + block_index, key_info); + if (err) + goto out; + mlxsw_afk_picker_subtract_hits(mlxsw_afk, picker, block_index); + } while (!mlxsw_afk_key_info_elements_eq(key_info, elusage)); + + err = 0; +out: + kfree(picker); + return err; +} + +static struct mlxsw_afk_key_info * +mlxsw_afk_key_info_create(struct mlxsw_afk *mlxsw_afk, + struct mlxsw_afk_element_usage *elusage) +{ + struct mlxsw_afk_key_info *key_info; + int err; + + key_info = kzalloc(struct_size(key_info, blocks, mlxsw_afk->max_blocks), + GFP_KERNEL); + if (!key_info) + return ERR_PTR(-ENOMEM); + err = mlxsw_afk_picker(mlxsw_afk, key_info, elusage); + if (err) + goto err_picker; + list_add(&key_info->list, &mlxsw_afk->key_info_list); + key_info->ref_count = 1; + return key_info; + +err_picker: + kfree(key_info); + return ERR_PTR(err); +} + +static void mlxsw_afk_key_info_destroy(struct mlxsw_afk_key_info *key_info) +{ + list_del(&key_info->list); + kfree(key_info); +} + +struct mlxsw_afk_key_info * +mlxsw_afk_key_info_get(struct mlxsw_afk *mlxsw_afk, + struct mlxsw_afk_element_usage *elusage) +{ + struct mlxsw_afk_key_info *key_info; + + key_info = mlxsw_afk_key_info_find(mlxsw_afk, elusage); + if (key_info) { + key_info->ref_count++; + return key_info; + } + return mlxsw_afk_key_info_create(mlxsw_afk, elusage); +} +EXPORT_SYMBOL(mlxsw_afk_key_info_get); + +void mlxsw_afk_key_info_put(struct mlxsw_afk_key_info *key_info) +{ + if (--key_info->ref_count) + return; + mlxsw_afk_key_info_destroy(key_info); +} +EXPORT_SYMBOL(mlxsw_afk_key_info_put); + +bool mlxsw_afk_key_info_subset(struct mlxsw_afk_key_info *key_info, + struct mlxsw_afk_element_usage *elusage) +{ + return mlxsw_afk_element_usage_subset(elusage, &key_info->elusage); +} +EXPORT_SYMBOL(mlxsw_afk_key_info_subset); + +static const struct mlxsw_afk_element_inst * +mlxsw_afk_block_elinst_get(const struct mlxsw_afk_block *block, + enum mlxsw_afk_element element) +{ + int i; + + for (i = 0; i < block->instances_count; i++) { + struct mlxsw_afk_element_inst *elinst; + + elinst = &block->instances[i]; + if (elinst->element == element) + return elinst; + } + return NULL; +} + +static const struct mlxsw_afk_element_inst * +mlxsw_afk_key_info_elinst_get(struct mlxsw_afk_key_info *key_info, + enum mlxsw_afk_element element, + int *p_block_index) +{ + const struct mlxsw_afk_element_inst *elinst; + const struct mlxsw_afk_block *block; + int block_index; + + if (WARN_ON(!test_bit(element, key_info->elusage.usage))) + return NULL; + block_index = key_info->element_to_block[element]; + block = key_info->blocks[block_index]; + + elinst = mlxsw_afk_block_elinst_get(block, element); + if (WARN_ON(!elinst)) + return NULL; + + *p_block_index = block_index; + return elinst; +} + +u16 +mlxsw_afk_key_info_block_encoding_get(const struct mlxsw_afk_key_info *key_info, + int block_index) +{ + return key_info->blocks[block_index]->encoding; +} +EXPORT_SYMBOL(mlxsw_afk_key_info_block_encoding_get); + +unsigned int +mlxsw_afk_key_info_blocks_count_get(const struct mlxsw_afk_key_info *key_info) +{ + return key_info->blocks_count; +} +EXPORT_SYMBOL(mlxsw_afk_key_info_blocks_count_get); + +void mlxsw_afk_values_add_u32(struct mlxsw_afk_element_values *values, + enum mlxsw_afk_element element, + u32 key_value, u32 mask_value) +{ + const struct mlxsw_afk_element_info *elinfo = + &mlxsw_afk_element_infos[element]; + const struct mlxsw_item *storage_item = &elinfo->item; + + if (!mask_value) + return; + if (WARN_ON(elinfo->type != MLXSW_AFK_ELEMENT_TYPE_U32)) + return; + __mlxsw_item_set32(values->storage.key, storage_item, 0, key_value); + __mlxsw_item_set32(values->storage.mask, storage_item, 0, mask_value); + mlxsw_afk_element_usage_add(&values->elusage, element); +} +EXPORT_SYMBOL(mlxsw_afk_values_add_u32); + +void mlxsw_afk_values_add_buf(struct mlxsw_afk_element_values *values, + enum mlxsw_afk_element element, + const char *key_value, const char *mask_value, + unsigned int len) +{ + const struct mlxsw_afk_element_info *elinfo = + &mlxsw_afk_element_infos[element]; + const struct mlxsw_item *storage_item = &elinfo->item; + + if (!memchr_inv(mask_value, 0, len)) /* If mask is zero */ + return; + if (WARN_ON(elinfo->type != MLXSW_AFK_ELEMENT_TYPE_BUF) || + WARN_ON(elinfo->item.size.bytes != len)) + return; + __mlxsw_item_memcpy_to(values->storage.key, key_value, + storage_item, 0); + __mlxsw_item_memcpy_to(values->storage.mask, mask_value, + storage_item, 0); + mlxsw_afk_element_usage_add(&values->elusage, element); +} +EXPORT_SYMBOL(mlxsw_afk_values_add_buf); + +static void mlxsw_sp_afk_encode_u32(const struct mlxsw_item *storage_item, + const struct mlxsw_item *output_item, + char *storage, char *output, int diff) +{ + u32 value; + + value = __mlxsw_item_get32(storage, storage_item, 0); + __mlxsw_item_set32(output, output_item, 0, value + diff); +} + +static void mlxsw_sp_afk_encode_buf(const struct mlxsw_item *storage_item, + const struct mlxsw_item *output_item, + char *storage, char *output) +{ + char *storage_data = __mlxsw_item_data(storage, storage_item, 0); + char *output_data = __mlxsw_item_data(output, output_item, 0); + size_t len = output_item->size.bytes; + + memcpy(output_data, storage_data, len); +} + +static void +mlxsw_sp_afk_encode_one(const struct mlxsw_afk_element_inst *elinst, + char *output, char *storage, int u32_diff) +{ + const struct mlxsw_item *output_item = &elinst->item; + const struct mlxsw_afk_element_info *elinfo; + const struct mlxsw_item *storage_item; + + elinfo = &mlxsw_afk_element_infos[elinst->element]; + storage_item = &elinfo->item; + if (elinst->type == MLXSW_AFK_ELEMENT_TYPE_U32) + mlxsw_sp_afk_encode_u32(storage_item, output_item, + storage, output, u32_diff); + else if (elinst->type == MLXSW_AFK_ELEMENT_TYPE_BUF) + mlxsw_sp_afk_encode_buf(storage_item, output_item, + storage, output); +} + +#define MLXSW_SP_AFK_KEY_BLOCK_MAX_SIZE 16 + +void mlxsw_afk_encode(struct mlxsw_afk *mlxsw_afk, + struct mlxsw_afk_key_info *key_info, + struct mlxsw_afk_element_values *values, + char *key, char *mask) +{ + unsigned int blocks_count = + mlxsw_afk_key_info_blocks_count_get(key_info); + char block_mask[MLXSW_SP_AFK_KEY_BLOCK_MAX_SIZE]; + char block_key[MLXSW_SP_AFK_KEY_BLOCK_MAX_SIZE]; + const struct mlxsw_afk_element_inst *elinst; + enum mlxsw_afk_element element; + int block_index, i; + + for (i = 0; i < blocks_count; i++) { + memset(block_key, 0, MLXSW_SP_AFK_KEY_BLOCK_MAX_SIZE); + memset(block_mask, 0, MLXSW_SP_AFK_KEY_BLOCK_MAX_SIZE); + + mlxsw_afk_element_usage_for_each(element, &values->elusage) { + elinst = mlxsw_afk_key_info_elinst_get(key_info, + element, + &block_index); + if (!elinst || block_index != i) + continue; + + mlxsw_sp_afk_encode_one(elinst, block_key, + values->storage.key, + elinst->u32_key_diff); + mlxsw_sp_afk_encode_one(elinst, block_mask, + values->storage.mask, 0); + } + + mlxsw_afk->ops->encode_block(key, i, block_key); + mlxsw_afk->ops->encode_block(mask, i, block_mask); + } +} +EXPORT_SYMBOL(mlxsw_afk_encode); + +void mlxsw_afk_clear(struct mlxsw_afk *mlxsw_afk, char *key, + int block_start, int block_end) +{ + int i; + + for (i = block_start; i <= block_end; i++) + mlxsw_afk->ops->clear_block(key, i); +} +EXPORT_SYMBOL(mlxsw_afk_clear); diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.h b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.h new file mode 100644 index 000000000..3a037fe47 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.h @@ -0,0 +1,222 @@ +/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 */ +/* Copyright (c) 2017-2018 Mellanox Technologies. All rights reserved */ + +#ifndef _MLXSW_CORE_ACL_FLEX_KEYS_H +#define _MLXSW_CORE_ACL_FLEX_KEYS_H + +#include <linux/types.h> +#include <linux/bitmap.h> + +#include "item.h" + +enum mlxsw_afk_element { + MLXSW_AFK_ELEMENT_SRC_SYS_PORT, + MLXSW_AFK_ELEMENT_DMAC_32_47, + MLXSW_AFK_ELEMENT_DMAC_0_31, + MLXSW_AFK_ELEMENT_SMAC_32_47, + MLXSW_AFK_ELEMENT_SMAC_0_31, + MLXSW_AFK_ELEMENT_ETHERTYPE, + MLXSW_AFK_ELEMENT_IP_PROTO, + MLXSW_AFK_ELEMENT_SRC_IP_96_127, + MLXSW_AFK_ELEMENT_SRC_IP_64_95, + MLXSW_AFK_ELEMENT_SRC_IP_32_63, + MLXSW_AFK_ELEMENT_SRC_IP_0_31, + MLXSW_AFK_ELEMENT_DST_IP_96_127, + MLXSW_AFK_ELEMENT_DST_IP_64_95, + MLXSW_AFK_ELEMENT_DST_IP_32_63, + MLXSW_AFK_ELEMENT_DST_IP_0_31, + MLXSW_AFK_ELEMENT_DST_L4_PORT, + MLXSW_AFK_ELEMENT_SRC_L4_PORT, + MLXSW_AFK_ELEMENT_VID, + MLXSW_AFK_ELEMENT_PCP, + MLXSW_AFK_ELEMENT_TCP_FLAGS, + MLXSW_AFK_ELEMENT_IP_TTL_, + MLXSW_AFK_ELEMENT_IP_ECN, + MLXSW_AFK_ELEMENT_IP_DSCP, + MLXSW_AFK_ELEMENT_VIRT_ROUTER_MSB, + MLXSW_AFK_ELEMENT_VIRT_ROUTER_LSB, + MLXSW_AFK_ELEMENT_MAX, +}; + +enum mlxsw_afk_element_type { + MLXSW_AFK_ELEMENT_TYPE_U32, + MLXSW_AFK_ELEMENT_TYPE_BUF, +}; + +struct mlxsw_afk_element_info { + enum mlxsw_afk_element element; /* element ID */ + enum mlxsw_afk_element_type type; + struct mlxsw_item item; /* element geometry in internal storage */ +}; + +#define MLXSW_AFK_ELEMENT_INFO(_type, _element, _offset, _shift, _size) \ + [MLXSW_AFK_ELEMENT_##_element] = { \ + .element = MLXSW_AFK_ELEMENT_##_element, \ + .type = _type, \ + .item = { \ + .offset = _offset, \ + .shift = _shift, \ + .size = {.bits = _size}, \ + .name = #_element, \ + }, \ + } + +#define MLXSW_AFK_ELEMENT_INFO_U32(_element, _offset, _shift, _size) \ + MLXSW_AFK_ELEMENT_INFO(MLXSW_AFK_ELEMENT_TYPE_U32, \ + _element, _offset, _shift, _size) + +#define MLXSW_AFK_ELEMENT_INFO_BUF(_element, _offset, _size) \ + MLXSW_AFK_ELEMENT_INFO(MLXSW_AFK_ELEMENT_TYPE_BUF, \ + _element, _offset, 0, _size) + +#define MLXSW_AFK_ELEMENT_STORAGE_SIZE 0x40 + +struct mlxsw_afk_element_inst { /* element instance in actual block */ + enum mlxsw_afk_element element; + enum mlxsw_afk_element_type type; + struct mlxsw_item item; /* element geometry in block */ + int u32_key_diff; /* in case value needs to be adjusted before write + * this diff is here to handle that + */ + bool avoid_size_check; +}; + +#define MLXSW_AFK_ELEMENT_INST(_type, _element, _offset, \ + _shift, _size, _u32_key_diff, _avoid_size_check) \ + { \ + .element = MLXSW_AFK_ELEMENT_##_element, \ + .type = _type, \ + .item = { \ + .offset = _offset, \ + .shift = _shift, \ + .size = {.bits = _size}, \ + .name = #_element, \ + }, \ + .u32_key_diff = _u32_key_diff, \ + .avoid_size_check = _avoid_size_check, \ + } + +#define MLXSW_AFK_ELEMENT_INST_U32(_element, _offset, _shift, _size) \ + MLXSW_AFK_ELEMENT_INST(MLXSW_AFK_ELEMENT_TYPE_U32, \ + _element, _offset, _shift, _size, 0, false) + +#define MLXSW_AFK_ELEMENT_INST_EXT_U32(_element, _offset, \ + _shift, _size, _key_diff, \ + _avoid_size_check) \ + MLXSW_AFK_ELEMENT_INST(MLXSW_AFK_ELEMENT_TYPE_U32, \ + _element, _offset, _shift, _size, \ + _key_diff, _avoid_size_check) + +#define MLXSW_AFK_ELEMENT_INST_BUF(_element, _offset, _size) \ + MLXSW_AFK_ELEMENT_INST(MLXSW_AFK_ELEMENT_TYPE_BUF, \ + _element, _offset, 0, _size, 0, false) + +struct mlxsw_afk_block { + u16 encoding; /* block ID */ + struct mlxsw_afk_element_inst *instances; + unsigned int instances_count; +}; + +#define MLXSW_AFK_BLOCK(_encoding, _instances) \ + { \ + .encoding = _encoding, \ + .instances = _instances, \ + .instances_count = ARRAY_SIZE(_instances), \ + } + +struct mlxsw_afk_element_usage { + DECLARE_BITMAP(usage, MLXSW_AFK_ELEMENT_MAX); +}; + +#define mlxsw_afk_element_usage_for_each(element, elusage) \ + for_each_set_bit(element, (elusage)->usage, MLXSW_AFK_ELEMENT_MAX) + +static inline void +mlxsw_afk_element_usage_add(struct mlxsw_afk_element_usage *elusage, + enum mlxsw_afk_element element) +{ + __set_bit(element, elusage->usage); +} + +static inline void +mlxsw_afk_element_usage_zero(struct mlxsw_afk_element_usage *elusage) +{ + bitmap_zero(elusage->usage, MLXSW_AFK_ELEMENT_MAX); +} + +static inline void +mlxsw_afk_element_usage_fill(struct mlxsw_afk_element_usage *elusage, + const enum mlxsw_afk_element *elements, + unsigned int elements_count) +{ + int i; + + mlxsw_afk_element_usage_zero(elusage); + for (i = 0; i < elements_count; i++) + mlxsw_afk_element_usage_add(elusage, elements[i]); +} + +static inline bool +mlxsw_afk_element_usage_subset(struct mlxsw_afk_element_usage *elusage_small, + struct mlxsw_afk_element_usage *elusage_big) +{ + int i; + + for (i = 0; i < MLXSW_AFK_ELEMENT_MAX; i++) + if (test_bit(i, elusage_small->usage) && + !test_bit(i, elusage_big->usage)) + return false; + return true; +} + +struct mlxsw_afk; + +struct mlxsw_afk_ops { + const struct mlxsw_afk_block *blocks; + unsigned int blocks_count; + void (*encode_block)(char *output, int block_index, char *block); + void (*clear_block)(char *output, int block_index); +}; + +struct mlxsw_afk *mlxsw_afk_create(unsigned int max_blocks, + const struct mlxsw_afk_ops *ops); +void mlxsw_afk_destroy(struct mlxsw_afk *mlxsw_afk); + +struct mlxsw_afk_key_info; + +struct mlxsw_afk_key_info * +mlxsw_afk_key_info_get(struct mlxsw_afk *mlxsw_afk, + struct mlxsw_afk_element_usage *elusage); +void mlxsw_afk_key_info_put(struct mlxsw_afk_key_info *key_info); +bool mlxsw_afk_key_info_subset(struct mlxsw_afk_key_info *key_info, + struct mlxsw_afk_element_usage *elusage); + +u16 +mlxsw_afk_key_info_block_encoding_get(const struct mlxsw_afk_key_info *key_info, + int block_index); +unsigned int +mlxsw_afk_key_info_blocks_count_get(const struct mlxsw_afk_key_info *key_info); + +struct mlxsw_afk_element_values { + struct mlxsw_afk_element_usage elusage; + struct { + char key[MLXSW_AFK_ELEMENT_STORAGE_SIZE]; + char mask[MLXSW_AFK_ELEMENT_STORAGE_SIZE]; + } storage; +}; + +void mlxsw_afk_values_add_u32(struct mlxsw_afk_element_values *values, + enum mlxsw_afk_element element, + u32 key_value, u32 mask_value); +void mlxsw_afk_values_add_buf(struct mlxsw_afk_element_values *values, + enum mlxsw_afk_element element, + const char *key_value, const char *mask_value, + unsigned int len); +void mlxsw_afk_encode(struct mlxsw_afk *mlxsw_afk, + struct mlxsw_afk_key_info *key_info, + struct mlxsw_afk_element_values *values, + char *key, char *mask); +void mlxsw_afk_clear(struct mlxsw_afk *mlxsw_afk, char *key, + int block_start, int block_end); + +#endif diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_env.c b/drivers/net/ethernet/mellanox/mlxsw/core_env.c new file mode 100644 index 000000000..0107cbc32 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/core_env.c @@ -0,0 +1,1464 @@ +// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 +/* Copyright (c) 2018 Mellanox Technologies. All rights reserved */ + +#include <linux/kernel.h> +#include <linux/err.h> +#include <linux/ethtool.h> +#include <linux/sfp.h> +#include <linux/mutex.h> + +#include "core.h" +#include "core_env.h" +#include "item.h" +#include "reg.h" + +struct mlxsw_env_module_info { + u64 module_overheat_counter; + bool is_overheat; + int num_ports_mapped; + int num_ports_up; + enum ethtool_module_power_mode_policy power_mode_policy; + enum mlxsw_reg_pmtm_module_type type; +}; + +struct mlxsw_env_line_card { + u8 module_count; + bool active; + struct mlxsw_env_module_info module_info[]; +}; + +struct mlxsw_env { + struct mlxsw_core *core; + const struct mlxsw_bus_info *bus_info; + u8 max_module_count; /* Maximum number of modules per-slot. */ + u8 num_of_slots; /* Including the main board. */ + struct mutex line_cards_lock; /* Protects line cards. */ + struct mlxsw_env_line_card *line_cards[]; +}; + +static bool __mlxsw_env_linecard_is_active(struct mlxsw_env *mlxsw_env, + u8 slot_index) +{ + return mlxsw_env->line_cards[slot_index]->active; +} + +static bool mlxsw_env_linecard_is_active(struct mlxsw_env *mlxsw_env, + u8 slot_index) +{ + bool active; + + mutex_lock(&mlxsw_env->line_cards_lock); + active = __mlxsw_env_linecard_is_active(mlxsw_env, slot_index); + mutex_unlock(&mlxsw_env->line_cards_lock); + + return active; +} + +static struct +mlxsw_env_module_info *mlxsw_env_module_info_get(struct mlxsw_core *mlxsw_core, + u8 slot_index, u8 module) +{ + struct mlxsw_env *mlxsw_env = mlxsw_core_env(mlxsw_core); + + return &mlxsw_env->line_cards[slot_index]->module_info[module]; +} + +static int __mlxsw_env_validate_module_type(struct mlxsw_core *core, + u8 slot_index, u8 module) +{ + struct mlxsw_env *mlxsw_env = mlxsw_core_env(core); + struct mlxsw_env_module_info *module_info; + int err; + + if (!__mlxsw_env_linecard_is_active(mlxsw_env, slot_index)) + return 0; + + module_info = mlxsw_env_module_info_get(core, slot_index, module); + switch (module_info->type) { + case MLXSW_REG_PMTM_MODULE_TYPE_TWISTED_PAIR: + err = -EINVAL; + break; + default: + err = 0; + } + + return err; +} + +static int mlxsw_env_validate_module_type(struct mlxsw_core *core, + u8 slot_index, u8 module) +{ + struct mlxsw_env *mlxsw_env = mlxsw_core_env(core); + int err; + + mutex_lock(&mlxsw_env->line_cards_lock); + err = __mlxsw_env_validate_module_type(core, slot_index, module); + mutex_unlock(&mlxsw_env->line_cards_lock); + + return err; +} + +static int +mlxsw_env_validate_cable_ident(struct mlxsw_core *core, u8 slot_index, int id, + bool *qsfp, bool *cmis) +{ + char mcia_pl[MLXSW_REG_MCIA_LEN]; + char *eeprom_tmp; + u8 ident; + int err; + + err = mlxsw_env_validate_module_type(core, slot_index, id); + if (err) + return err; + + mlxsw_reg_mcia_pack(mcia_pl, slot_index, id, 0, + MLXSW_REG_MCIA_PAGE0_LO_OFF, 0, 1, + MLXSW_REG_MCIA_I2C_ADDR_LOW); + err = mlxsw_reg_query(core, MLXSW_REG(mcia), mcia_pl); + if (err) + return err; + eeprom_tmp = mlxsw_reg_mcia_eeprom_data(mcia_pl); + ident = eeprom_tmp[0]; + *cmis = false; + switch (ident) { + case MLXSW_REG_MCIA_EEPROM_MODULE_INFO_ID_SFP: + *qsfp = false; + break; + case MLXSW_REG_MCIA_EEPROM_MODULE_INFO_ID_QSFP: + case MLXSW_REG_MCIA_EEPROM_MODULE_INFO_ID_QSFP_PLUS: + case MLXSW_REG_MCIA_EEPROM_MODULE_INFO_ID_QSFP28: + *qsfp = true; + break; + case MLXSW_REG_MCIA_EEPROM_MODULE_INFO_ID_QSFP_DD: + case MLXSW_REG_MCIA_EEPROM_MODULE_INFO_ID_OSFP: + *qsfp = true; + *cmis = true; + break; + default: + return -EINVAL; + } + + return 0; +} + +static int +mlxsw_env_query_module_eeprom(struct mlxsw_core *mlxsw_core, u8 slot_index, + int module, u16 offset, u16 size, void *data, + bool qsfp, unsigned int *p_read_size) +{ + char mcia_pl[MLXSW_REG_MCIA_LEN]; + char *eeprom_tmp; + u16 i2c_addr; + u8 page = 0; + int status; + int err; + + /* MCIA register accepts buffer size <= 48. Page of size 128 should be + * read by chunks of size 48, 48, 32. Align the size of the last chunk + * to avoid reading after the end of the page. + */ + size = min_t(u16, size, MLXSW_REG_MCIA_EEPROM_SIZE); + + if (offset < MLXSW_REG_MCIA_EEPROM_PAGE_LENGTH && + offset + size > MLXSW_REG_MCIA_EEPROM_PAGE_LENGTH) + /* Cross pages read, read until offset 256 in low page */ + size = MLXSW_REG_MCIA_EEPROM_PAGE_LENGTH - offset; + + i2c_addr = MLXSW_REG_MCIA_I2C_ADDR_LOW; + if (offset >= MLXSW_REG_MCIA_EEPROM_PAGE_LENGTH) { + if (qsfp) { + /* When reading upper pages 1, 2 and 3 the offset + * starts at 128. Please refer to "QSFP+ Memory Map" + * figure in SFF-8436 specification and to "CMIS Module + * Memory Map" figure in CMIS specification for + * graphical depiction. + */ + page = MLXSW_REG_MCIA_PAGE_GET(offset); + offset -= MLXSW_REG_MCIA_EEPROM_UP_PAGE_LENGTH * page; + if (offset + size > MLXSW_REG_MCIA_EEPROM_PAGE_LENGTH) + size = MLXSW_REG_MCIA_EEPROM_PAGE_LENGTH - offset; + } else { + /* When reading upper pages 1, 2 and 3 the offset + * starts at 0 and I2C high address is used. Please refer + * to "Memory Organization" figure in SFF-8472 + * specification for graphical depiction. + */ + i2c_addr = MLXSW_REG_MCIA_I2C_ADDR_HIGH; + offset -= MLXSW_REG_MCIA_EEPROM_PAGE_LENGTH; + } + } + + mlxsw_reg_mcia_pack(mcia_pl, slot_index, module, 0, page, offset, size, + i2c_addr); + + err = mlxsw_reg_query(mlxsw_core, MLXSW_REG(mcia), mcia_pl); + if (err) + return err; + + status = mlxsw_reg_mcia_status_get(mcia_pl); + if (status) + return -EIO; + + eeprom_tmp = mlxsw_reg_mcia_eeprom_data(mcia_pl); + memcpy(data, eeprom_tmp, size); + *p_read_size = size; + + return 0; +} + +int +mlxsw_env_module_temp_thresholds_get(struct mlxsw_core *core, u8 slot_index, + int module, int off, int *temp) +{ + unsigned int module_temp, module_crit, module_emerg; + union { + u8 buf[MLXSW_REG_MCIA_TH_ITEM_SIZE]; + u16 temp; + } temp_thresh; + char mcia_pl[MLXSW_REG_MCIA_LEN] = {0}; + char mtmp_pl[MLXSW_REG_MTMP_LEN]; + char *eeprom_tmp; + bool qsfp, cmis; + int page; + int err; + + mlxsw_reg_mtmp_pack(mtmp_pl, slot_index, + MLXSW_REG_MTMP_MODULE_INDEX_MIN + module, false, + false); + err = mlxsw_reg_query(core, MLXSW_REG(mtmp), mtmp_pl); + if (err) + return err; + mlxsw_reg_mtmp_unpack(mtmp_pl, &module_temp, NULL, &module_crit, + &module_emerg, NULL); + if (!module_temp) { + *temp = 0; + return 0; + } + + /* Validate if threshold reading is available through MTMP register, + * otherwise fallback to read through MCIA. + */ + if (module_emerg) { + *temp = off == SFP_TEMP_HIGH_WARN ? module_crit : module_emerg; + return 0; + } + + /* Read Free Side Device Temperature Thresholds from page 03h + * (MSB at lower byte address). + * Bytes: + * 128-129 - Temp High Alarm (SFP_TEMP_HIGH_ALARM); + * 130-131 - Temp Low Alarm (SFP_TEMP_LOW_ALARM); + * 132-133 - Temp High Warning (SFP_TEMP_HIGH_WARN); + * 134-135 - Temp Low Warning (SFP_TEMP_LOW_WARN); + */ + + /* Validate module identifier value. */ + err = mlxsw_env_validate_cable_ident(core, slot_index, module, &qsfp, + &cmis); + if (err) + return err; + + if (qsfp) { + /* For QSFP/CMIS module-defined thresholds are located in page + * 02h, otherwise in page 03h. + */ + if (cmis) + page = MLXSW_REG_MCIA_TH_PAGE_CMIS_NUM; + else + page = MLXSW_REG_MCIA_TH_PAGE_NUM; + mlxsw_reg_mcia_pack(mcia_pl, slot_index, module, 0, page, + MLXSW_REG_MCIA_TH_PAGE_OFF + off, + MLXSW_REG_MCIA_TH_ITEM_SIZE, + MLXSW_REG_MCIA_I2C_ADDR_LOW); + } else { + mlxsw_reg_mcia_pack(mcia_pl, slot_index, module, 0, + MLXSW_REG_MCIA_PAGE0_LO, + off, MLXSW_REG_MCIA_TH_ITEM_SIZE, + MLXSW_REG_MCIA_I2C_ADDR_HIGH); + } + + err = mlxsw_reg_query(core, MLXSW_REG(mcia), mcia_pl); + if (err) + return err; + + eeprom_tmp = mlxsw_reg_mcia_eeprom_data(mcia_pl); + memcpy(temp_thresh.buf, eeprom_tmp, MLXSW_REG_MCIA_TH_ITEM_SIZE); + *temp = temp_thresh.temp * 1000; + + return 0; +} + +int mlxsw_env_get_module_info(struct net_device *netdev, + struct mlxsw_core *mlxsw_core, u8 slot_index, + int module, struct ethtool_modinfo *modinfo) +{ + struct mlxsw_env *mlxsw_env = mlxsw_core_env(mlxsw_core); + u8 module_info[MLXSW_REG_MCIA_EEPROM_MODULE_INFO_SIZE]; + u16 offset = MLXSW_REG_MCIA_EEPROM_MODULE_INFO_SIZE; + u8 module_rev_id, module_id, diag_mon; + unsigned int read_size; + int err; + + if (!mlxsw_env_linecard_is_active(mlxsw_env, slot_index)) { + netdev_err(netdev, "Cannot read EEPROM of module on an inactive line card\n"); + return -EIO; + } + + err = mlxsw_env_validate_module_type(mlxsw_core, slot_index, module); + if (err) { + netdev_err(netdev, + "EEPROM is not equipped on port module type"); + return err; + } + + err = mlxsw_env_query_module_eeprom(mlxsw_core, slot_index, module, 0, + offset, module_info, false, + &read_size); + if (err) + return err; + + if (read_size < offset) + return -EIO; + + module_rev_id = module_info[MLXSW_REG_MCIA_EEPROM_MODULE_INFO_REV_ID]; + module_id = module_info[MLXSW_REG_MCIA_EEPROM_MODULE_INFO_ID]; + + switch (module_id) { + case MLXSW_REG_MCIA_EEPROM_MODULE_INFO_ID_QSFP: + modinfo->type = ETH_MODULE_SFF_8436; + modinfo->eeprom_len = ETH_MODULE_SFF_8436_MAX_LEN; + break; + case MLXSW_REG_MCIA_EEPROM_MODULE_INFO_ID_QSFP_PLUS: + case MLXSW_REG_MCIA_EEPROM_MODULE_INFO_ID_QSFP28: + if (module_id == MLXSW_REG_MCIA_EEPROM_MODULE_INFO_ID_QSFP28 || + module_rev_id >= + MLXSW_REG_MCIA_EEPROM_MODULE_INFO_REV_ID_8636) { + modinfo->type = ETH_MODULE_SFF_8636; + modinfo->eeprom_len = ETH_MODULE_SFF_8636_MAX_LEN; + } else { + modinfo->type = ETH_MODULE_SFF_8436; + modinfo->eeprom_len = ETH_MODULE_SFF_8436_MAX_LEN; + } + break; + case MLXSW_REG_MCIA_EEPROM_MODULE_INFO_ID_SFP: + /* Verify if transceiver provides diagnostic monitoring page */ + err = mlxsw_env_query_module_eeprom(mlxsw_core, slot_index, + module, SFP_DIAGMON, 1, + &diag_mon, false, + &read_size); + if (err) + return err; + + if (read_size < 1) + return -EIO; + + modinfo->type = ETH_MODULE_SFF_8472; + if (diag_mon) + modinfo->eeprom_len = ETH_MODULE_SFF_8472_LEN; + else + modinfo->eeprom_len = ETH_MODULE_SFF_8472_LEN / 2; + break; + case MLXSW_REG_MCIA_EEPROM_MODULE_INFO_ID_QSFP_DD: + case MLXSW_REG_MCIA_EEPROM_MODULE_INFO_ID_OSFP: + /* Use SFF_8636 as base type. ethtool should recognize specific + * type through the identifier value. + */ + modinfo->type = ETH_MODULE_SFF_8636; + /* Verify if module EEPROM is a flat memory. In case of flat + * memory only page 00h (0-255 bytes) can be read. Otherwise + * upper pages 01h and 02h can also be read. Upper pages 10h + * and 11h are currently not supported by the driver. + */ + if (module_info[MLXSW_REG_MCIA_EEPROM_MODULE_INFO_TYPE_ID] & + MLXSW_REG_MCIA_EEPROM_CMIS_FLAT_MEMORY) + modinfo->eeprom_len = ETH_MODULE_SFF_8636_LEN; + else + modinfo->eeprom_len = ETH_MODULE_SFF_8472_LEN; + break; + default: + return -EINVAL; + } + + return 0; +} +EXPORT_SYMBOL(mlxsw_env_get_module_info); + +int mlxsw_env_get_module_eeprom(struct net_device *netdev, + struct mlxsw_core *mlxsw_core, u8 slot_index, + int module, struct ethtool_eeprom *ee, + u8 *data) +{ + struct mlxsw_env *mlxsw_env = mlxsw_core_env(mlxsw_core); + int offset = ee->offset; + unsigned int read_size; + bool qsfp, cmis; + int i = 0; + int err; + + if (!ee->len) + return -EINVAL; + + if (!mlxsw_env_linecard_is_active(mlxsw_env, slot_index)) { + netdev_err(netdev, "Cannot read EEPROM of module on an inactive line card\n"); + return -EIO; + } + + memset(data, 0, ee->len); + /* Validate module identifier value. */ + err = mlxsw_env_validate_cable_ident(mlxsw_core, slot_index, module, + &qsfp, &cmis); + if (err) + return err; + + while (i < ee->len) { + err = mlxsw_env_query_module_eeprom(mlxsw_core, slot_index, + module, offset, + ee->len - i, data + i, + qsfp, &read_size); + if (err) { + netdev_err(netdev, "Eeprom query failed\n"); + return err; + } + + i += read_size; + offset += read_size; + } + + return 0; +} +EXPORT_SYMBOL(mlxsw_env_get_module_eeprom); + +static int mlxsw_env_mcia_status_process(const char *mcia_pl, + struct netlink_ext_ack *extack) +{ + u8 status = mlxsw_reg_mcia_status_get(mcia_pl); + + switch (status) { + case MLXSW_REG_MCIA_STATUS_GOOD: + return 0; + case MLXSW_REG_MCIA_STATUS_NO_EEPROM_MODULE: + NL_SET_ERR_MSG_MOD(extack, "No response from module's EEPROM"); + return -EIO; + case MLXSW_REG_MCIA_STATUS_MODULE_NOT_SUPPORTED: + NL_SET_ERR_MSG_MOD(extack, "Module type not supported by the device"); + return -EOPNOTSUPP; + case MLXSW_REG_MCIA_STATUS_MODULE_NOT_CONNECTED: + NL_SET_ERR_MSG_MOD(extack, "No module present indication"); + return -EIO; + case MLXSW_REG_MCIA_STATUS_I2C_ERROR: + NL_SET_ERR_MSG_MOD(extack, "Error occurred while trying to access module's EEPROM using I2C"); + return -EIO; + case MLXSW_REG_MCIA_STATUS_MODULE_DISABLED: + NL_SET_ERR_MSG_MOD(extack, "Module is disabled"); + return -EIO; + default: + NL_SET_ERR_MSG_MOD(extack, "Unknown error"); + return -EIO; + } +} + +int +mlxsw_env_get_module_eeprom_by_page(struct mlxsw_core *mlxsw_core, + u8 slot_index, u8 module, + const struct ethtool_module_eeprom *page, + struct netlink_ext_ack *extack) +{ + struct mlxsw_env *mlxsw_env = mlxsw_core_env(mlxsw_core); + u32 bytes_read = 0; + u16 device_addr; + int err; + + if (!mlxsw_env_linecard_is_active(mlxsw_env, slot_index)) { + NL_SET_ERR_MSG_MOD(extack, + "Cannot read EEPROM of module on an inactive line card"); + return -EIO; + } + + err = mlxsw_env_validate_module_type(mlxsw_core, slot_index, module); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "EEPROM is not equipped on port module type"); + return err; + } + + /* Offset cannot be larger than 2 * ETH_MODULE_EEPROM_PAGE_LEN */ + device_addr = page->offset; + + while (bytes_read < page->length) { + char mcia_pl[MLXSW_REG_MCIA_LEN]; + char *eeprom_tmp; + u8 size; + + size = min_t(u8, page->length - bytes_read, + MLXSW_REG_MCIA_EEPROM_SIZE); + + mlxsw_reg_mcia_pack(mcia_pl, slot_index, module, 0, page->page, + device_addr + bytes_read, size, + page->i2c_address); + mlxsw_reg_mcia_bank_number_set(mcia_pl, page->bank); + + err = mlxsw_reg_query(mlxsw_core, MLXSW_REG(mcia), mcia_pl); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Failed to access module's EEPROM"); + return err; + } + + err = mlxsw_env_mcia_status_process(mcia_pl, extack); + if (err) + return err; + + eeprom_tmp = mlxsw_reg_mcia_eeprom_data(mcia_pl); + memcpy(page->data + bytes_read, eeprom_tmp, size); + bytes_read += size; + } + + return bytes_read; +} +EXPORT_SYMBOL(mlxsw_env_get_module_eeprom_by_page); + +static int mlxsw_env_module_reset(struct mlxsw_core *mlxsw_core, u8 slot_index, + u8 module) +{ + char pmaos_pl[MLXSW_REG_PMAOS_LEN]; + + mlxsw_reg_pmaos_pack(pmaos_pl, slot_index, module); + mlxsw_reg_pmaos_rst_set(pmaos_pl, true); + + return mlxsw_reg_write(mlxsw_core, MLXSW_REG(pmaos), pmaos_pl); +} + +int mlxsw_env_reset_module(struct net_device *netdev, + struct mlxsw_core *mlxsw_core, u8 slot_index, + u8 module, u32 *flags) +{ + struct mlxsw_env *mlxsw_env = mlxsw_core_env(mlxsw_core); + struct mlxsw_env_module_info *module_info; + u32 req = *flags; + int err; + + if (!(req & ETH_RESET_PHY) && + !(req & (ETH_RESET_PHY << ETH_RESET_SHARED_SHIFT))) + return 0; + + if (!mlxsw_env_linecard_is_active(mlxsw_env, slot_index)) { + netdev_err(netdev, "Cannot reset module on an inactive line card\n"); + return -EIO; + } + + mutex_lock(&mlxsw_env->line_cards_lock); + + err = __mlxsw_env_validate_module_type(mlxsw_core, slot_index, module); + if (err) { + netdev_err(netdev, "Reset module is not supported on port module type\n"); + goto out; + } + + module_info = mlxsw_env_module_info_get(mlxsw_core, slot_index, module); + if (module_info->num_ports_up) { + netdev_err(netdev, "Cannot reset module when ports using it are administratively up\n"); + err = -EINVAL; + goto out; + } + + if (module_info->num_ports_mapped > 1 && + !(req & (ETH_RESET_PHY << ETH_RESET_SHARED_SHIFT))) { + netdev_err(netdev, "Cannot reset module without \"phy-shared\" flag when shared by multiple ports\n"); + err = -EINVAL; + goto out; + } + + err = mlxsw_env_module_reset(mlxsw_core, slot_index, module); + if (err) { + netdev_err(netdev, "Failed to reset module\n"); + goto out; + } + + *flags &= ~(ETH_RESET_PHY | (ETH_RESET_PHY << ETH_RESET_SHARED_SHIFT)); + +out: + mutex_unlock(&mlxsw_env->line_cards_lock); + return err; +} +EXPORT_SYMBOL(mlxsw_env_reset_module); + +int +mlxsw_env_get_module_power_mode(struct mlxsw_core *mlxsw_core, u8 slot_index, + u8 module, + struct ethtool_module_power_mode_params *params, + struct netlink_ext_ack *extack) +{ + struct mlxsw_env *mlxsw_env = mlxsw_core_env(mlxsw_core); + struct mlxsw_env_module_info *module_info; + char mcion_pl[MLXSW_REG_MCION_LEN]; + u32 status_bits; + int err = 0; + + mutex_lock(&mlxsw_env->line_cards_lock); + + err = __mlxsw_env_validate_module_type(mlxsw_core, slot_index, module); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Power mode is not supported on port module type"); + goto out; + } + + module_info = mlxsw_env_module_info_get(mlxsw_core, slot_index, module); + params->policy = module_info->power_mode_policy; + + /* Avoid accessing an inactive line card, as it will result in an error. */ + if (!__mlxsw_env_linecard_is_active(mlxsw_env, slot_index)) + goto out; + + mlxsw_reg_mcion_pack(mcion_pl, slot_index, module); + err = mlxsw_reg_query(mlxsw_core, MLXSW_REG(mcion), mcion_pl); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Failed to retrieve module's power mode"); + goto out; + } + + status_bits = mlxsw_reg_mcion_module_status_bits_get(mcion_pl); + if (!(status_bits & MLXSW_REG_MCION_MODULE_STATUS_BITS_PRESENT_MASK)) + goto out; + + if (status_bits & MLXSW_REG_MCION_MODULE_STATUS_BITS_LOW_POWER_MASK) + params->mode = ETHTOOL_MODULE_POWER_MODE_LOW; + else + params->mode = ETHTOOL_MODULE_POWER_MODE_HIGH; + +out: + mutex_unlock(&mlxsw_env->line_cards_lock); + return err; +} +EXPORT_SYMBOL(mlxsw_env_get_module_power_mode); + +static int mlxsw_env_module_enable_set(struct mlxsw_core *mlxsw_core, + u8 slot_index, u8 module, bool enable) +{ + enum mlxsw_reg_pmaos_admin_status admin_status; + char pmaos_pl[MLXSW_REG_PMAOS_LEN]; + + mlxsw_reg_pmaos_pack(pmaos_pl, slot_index, module); + admin_status = enable ? MLXSW_REG_PMAOS_ADMIN_STATUS_ENABLED : + MLXSW_REG_PMAOS_ADMIN_STATUS_DISABLED; + mlxsw_reg_pmaos_admin_status_set(pmaos_pl, admin_status); + mlxsw_reg_pmaos_ase_set(pmaos_pl, true); + + return mlxsw_reg_write(mlxsw_core, MLXSW_REG(pmaos), pmaos_pl); +} + +static int mlxsw_env_module_low_power_set(struct mlxsw_core *mlxsw_core, + u8 slot_index, u8 module, + bool low_power) +{ + u16 eeprom_override_mask, eeprom_override; + char pmmp_pl[MLXSW_REG_PMMP_LEN]; + + mlxsw_reg_pmmp_pack(pmmp_pl, slot_index, module); + mlxsw_reg_pmmp_sticky_set(pmmp_pl, true); + /* Mask all the bits except low power mode. */ + eeprom_override_mask = ~MLXSW_REG_PMMP_EEPROM_OVERRIDE_LOW_POWER_MASK; + mlxsw_reg_pmmp_eeprom_override_mask_set(pmmp_pl, eeprom_override_mask); + eeprom_override = low_power ? MLXSW_REG_PMMP_EEPROM_OVERRIDE_LOW_POWER_MASK : + 0; + mlxsw_reg_pmmp_eeprom_override_set(pmmp_pl, eeprom_override); + + return mlxsw_reg_write(mlxsw_core, MLXSW_REG(pmmp), pmmp_pl); +} + +static int __mlxsw_env_set_module_power_mode(struct mlxsw_core *mlxsw_core, + u8 slot_index, u8 module, + bool low_power, + struct netlink_ext_ack *extack) +{ + struct mlxsw_env *mlxsw_env = mlxsw_core_env(mlxsw_core); + int err; + + /* Avoid accessing an inactive line card, as it will result in an error. + * Cached configuration will be applied by mlxsw_env_got_active() when + * line card becomes active. + */ + if (!__mlxsw_env_linecard_is_active(mlxsw_env, slot_index)) + return 0; + + err = mlxsw_env_module_enable_set(mlxsw_core, slot_index, module, false); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Failed to disable module"); + return err; + } + + err = mlxsw_env_module_low_power_set(mlxsw_core, slot_index, module, + low_power); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Failed to set module's power mode"); + goto err_module_low_power_set; + } + + err = mlxsw_env_module_enable_set(mlxsw_core, slot_index, module, true); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Failed to enable module"); + goto err_module_enable_set; + } + + return 0; + +err_module_enable_set: + mlxsw_env_module_low_power_set(mlxsw_core, slot_index, module, + !low_power); +err_module_low_power_set: + mlxsw_env_module_enable_set(mlxsw_core, slot_index, module, true); + return err; +} + +static int +mlxsw_env_set_module_power_mode_apply(struct mlxsw_core *mlxsw_core, + u8 slot_index, u8 module, + enum ethtool_module_power_mode_policy policy, + struct netlink_ext_ack *extack) +{ + struct mlxsw_env_module_info *module_info; + bool low_power; + int err = 0; + + err = __mlxsw_env_validate_module_type(mlxsw_core, slot_index, module); + if (err) { + NL_SET_ERR_MSG_MOD(extack, + "Power mode set is not supported on port module type"); + goto out; + } + + module_info = mlxsw_env_module_info_get(mlxsw_core, slot_index, module); + if (module_info->power_mode_policy == policy) + goto out; + + /* If any ports are up, we are already in high power mode. */ + if (module_info->num_ports_up) + goto out_set_policy; + + low_power = policy == ETHTOOL_MODULE_POWER_MODE_POLICY_AUTO; + err = __mlxsw_env_set_module_power_mode(mlxsw_core, slot_index, module, + low_power, extack); + if (err) + goto out; + +out_set_policy: + module_info->power_mode_policy = policy; +out: + return err; +} + +int +mlxsw_env_set_module_power_mode(struct mlxsw_core *mlxsw_core, u8 slot_index, + u8 module, + enum ethtool_module_power_mode_policy policy, + struct netlink_ext_ack *extack) +{ + struct mlxsw_env *mlxsw_env = mlxsw_core_env(mlxsw_core); + int err; + + if (policy != ETHTOOL_MODULE_POWER_MODE_POLICY_HIGH && + policy != ETHTOOL_MODULE_POWER_MODE_POLICY_AUTO) { + NL_SET_ERR_MSG_MOD(extack, "Unsupported power mode policy"); + return -EOPNOTSUPP; + } + + mutex_lock(&mlxsw_env->line_cards_lock); + err = mlxsw_env_set_module_power_mode_apply(mlxsw_core, slot_index, + module, policy, extack); + mutex_unlock(&mlxsw_env->line_cards_lock); + + return err; +} +EXPORT_SYMBOL(mlxsw_env_set_module_power_mode); + +static int mlxsw_env_module_has_temp_sensor(struct mlxsw_core *mlxsw_core, + u8 slot_index, u8 module, + bool *p_has_temp_sensor) +{ + char mtbr_pl[MLXSW_REG_MTBR_LEN]; + u16 temp; + int err; + + mlxsw_reg_mtbr_pack(mtbr_pl, slot_index, + MLXSW_REG_MTBR_BASE_MODULE_INDEX + module, 1); + err = mlxsw_reg_query(mlxsw_core, MLXSW_REG(mtbr), mtbr_pl); + if (err) + return err; + + mlxsw_reg_mtbr_temp_unpack(mtbr_pl, 0, &temp, NULL); + + switch (temp) { + case MLXSW_REG_MTBR_BAD_SENS_INFO: + case MLXSW_REG_MTBR_NO_CONN: + case MLXSW_REG_MTBR_NO_TEMP_SENS: + case MLXSW_REG_MTBR_INDEX_NA: + *p_has_temp_sensor = false; + break; + default: + *p_has_temp_sensor = temp ? true : false; + } + return 0; +} + +static int +mlxsw_env_temp_event_set(struct mlxsw_core *mlxsw_core, u8 slot_index, + u16 sensor_index, bool enable) +{ + char mtmp_pl[MLXSW_REG_MTMP_LEN] = {0}; + enum mlxsw_reg_mtmp_tee tee; + int err, threshold_hi; + + mlxsw_reg_mtmp_slot_index_set(mtmp_pl, slot_index); + mlxsw_reg_mtmp_sensor_index_set(mtmp_pl, sensor_index); + err = mlxsw_reg_query(mlxsw_core, MLXSW_REG(mtmp), mtmp_pl); + if (err) + return err; + + if (enable) { + err = mlxsw_env_module_temp_thresholds_get(mlxsw_core, + slot_index, + sensor_index - + MLXSW_REG_MTMP_MODULE_INDEX_MIN, + SFP_TEMP_HIGH_WARN, + &threshold_hi); + /* In case it is not possible to query the module's threshold, + * use the default value. + */ + if (err) + threshold_hi = MLXSW_REG_MTMP_THRESH_HI; + else + /* mlxsw_env_module_temp_thresholds_get() multiplies + * Celsius degrees by 1000 whereas MTMP expects + * temperature in 0.125 Celsius degrees units. + * Convert threshold_hi to correct units. + */ + threshold_hi = threshold_hi / 1000 * 8; + + mlxsw_reg_mtmp_temperature_threshold_hi_set(mtmp_pl, threshold_hi); + mlxsw_reg_mtmp_temperature_threshold_lo_set(mtmp_pl, threshold_hi - + MLXSW_REG_MTMP_HYSTERESIS_TEMP); + } + tee = enable ? MLXSW_REG_MTMP_TEE_GENERATE_EVENT : MLXSW_REG_MTMP_TEE_NO_EVENT; + mlxsw_reg_mtmp_tee_set(mtmp_pl, tee); + return mlxsw_reg_write(mlxsw_core, MLXSW_REG(mtmp), mtmp_pl); +} + +static int mlxsw_env_module_temp_event_enable(struct mlxsw_core *mlxsw_core, + u8 slot_index) +{ + struct mlxsw_env *mlxsw_env = mlxsw_core_env(mlxsw_core); + int i, err, sensor_index; + bool has_temp_sensor; + + for (i = 0; i < mlxsw_env->line_cards[slot_index]->module_count; i++) { + err = mlxsw_env_module_has_temp_sensor(mlxsw_core, slot_index, + i, &has_temp_sensor); + if (err) + return err; + + if (!has_temp_sensor) + continue; + + sensor_index = i + MLXSW_REG_MTMP_MODULE_INDEX_MIN; + err = mlxsw_env_temp_event_set(mlxsw_core, slot_index, + sensor_index, true); + if (err) + return err; + } + + return 0; +} + +struct mlxsw_env_module_temp_warn_event { + struct mlxsw_env *mlxsw_env; + char mtwe_pl[MLXSW_REG_MTWE_LEN]; + struct work_struct work; +}; + +static void mlxsw_env_mtwe_event_work(struct work_struct *work) +{ + struct mlxsw_env_module_temp_warn_event *event; + struct mlxsw_env_module_info *module_info; + struct mlxsw_env *mlxsw_env; + int i, sensor_warning; + bool is_overheat; + + event = container_of(work, struct mlxsw_env_module_temp_warn_event, + work); + mlxsw_env = event->mlxsw_env; + + for (i = 0; i < mlxsw_env->max_module_count; i++) { + /* 64-127 of sensor_index are mapped to the port modules + * sequentially (module 0 is mapped to sensor_index 64, + * module 1 to sensor_index 65 and so on) + */ + sensor_warning = + mlxsw_reg_mtwe_sensor_warning_get(event->mtwe_pl, + i + MLXSW_REG_MTMP_MODULE_INDEX_MIN); + mutex_lock(&mlxsw_env->line_cards_lock); + /* MTWE only supports main board. */ + module_info = mlxsw_env_module_info_get(mlxsw_env->core, 0, i); + is_overheat = module_info->is_overheat; + + if ((is_overheat && sensor_warning) || + (!is_overheat && !sensor_warning)) { + /* Current state is "warning" and MTWE still reports + * warning OR current state in "no warning" and MTWE + * does not report warning. + */ + mutex_unlock(&mlxsw_env->line_cards_lock); + continue; + } else if (is_overheat && !sensor_warning) { + /* MTWE reports "no warning", turn is_overheat off. + */ + module_info->is_overheat = false; + mutex_unlock(&mlxsw_env->line_cards_lock); + } else { + /* Current state is "no warning" and MTWE reports + * "warning", increase the counter and turn is_overheat + * on. + */ + module_info->is_overheat = true; + module_info->module_overheat_counter++; + mutex_unlock(&mlxsw_env->line_cards_lock); + } + } + + kfree(event); +} + +static void +mlxsw_env_mtwe_listener_func(const struct mlxsw_reg_info *reg, char *mtwe_pl, + void *priv) +{ + struct mlxsw_env_module_temp_warn_event *event; + struct mlxsw_env *mlxsw_env = priv; + + event = kmalloc(sizeof(*event), GFP_ATOMIC); + if (!event) + return; + + event->mlxsw_env = mlxsw_env; + memcpy(event->mtwe_pl, mtwe_pl, MLXSW_REG_MTWE_LEN); + INIT_WORK(&event->work, mlxsw_env_mtwe_event_work); + mlxsw_core_schedule_work(&event->work); +} + +static const struct mlxsw_listener mlxsw_env_temp_warn_listener = + MLXSW_CORE_EVENTL(mlxsw_env_mtwe_listener_func, MTWE); + +static int mlxsw_env_temp_warn_event_register(struct mlxsw_core *mlxsw_core) +{ + struct mlxsw_env *mlxsw_env = mlxsw_core_env(mlxsw_core); + + return mlxsw_core_trap_register(mlxsw_core, + &mlxsw_env_temp_warn_listener, + mlxsw_env); +} + +static void mlxsw_env_temp_warn_event_unregister(struct mlxsw_env *mlxsw_env) +{ + mlxsw_core_trap_unregister(mlxsw_env->core, + &mlxsw_env_temp_warn_listener, mlxsw_env); +} + +struct mlxsw_env_module_plug_unplug_event { + struct mlxsw_env *mlxsw_env; + u8 slot_index; + u8 module; + struct work_struct work; +}; + +static void mlxsw_env_pmpe_event_work(struct work_struct *work) +{ + struct mlxsw_env_module_plug_unplug_event *event; + struct mlxsw_env_module_info *module_info; + struct mlxsw_env *mlxsw_env; + bool has_temp_sensor; + u16 sensor_index; + int err; + + event = container_of(work, struct mlxsw_env_module_plug_unplug_event, + work); + mlxsw_env = event->mlxsw_env; + + mutex_lock(&mlxsw_env->line_cards_lock); + module_info = mlxsw_env_module_info_get(mlxsw_env->core, + event->slot_index, + event->module); + module_info->is_overheat = false; + mutex_unlock(&mlxsw_env->line_cards_lock); + + err = mlxsw_env_module_has_temp_sensor(mlxsw_env->core, + event->slot_index, + event->module, + &has_temp_sensor); + /* Do not disable events on modules without sensors or faulty sensors + * because FW returns errors. + */ + if (err) + goto out; + + if (!has_temp_sensor) + goto out; + + sensor_index = event->module + MLXSW_REG_MTMP_MODULE_INDEX_MIN; + mlxsw_env_temp_event_set(mlxsw_env->core, event->slot_index, + sensor_index, true); + +out: + kfree(event); +} + +static void +mlxsw_env_pmpe_listener_func(const struct mlxsw_reg_info *reg, char *pmpe_pl, + void *priv) +{ + u8 slot_index = mlxsw_reg_pmpe_slot_index_get(pmpe_pl); + struct mlxsw_env_module_plug_unplug_event *event; + enum mlxsw_reg_pmpe_module_status module_status; + u8 module = mlxsw_reg_pmpe_module_get(pmpe_pl); + struct mlxsw_env *mlxsw_env = priv; + + if (WARN_ON_ONCE(module >= mlxsw_env->max_module_count || + slot_index >= mlxsw_env->num_of_slots)) + return; + + module_status = mlxsw_reg_pmpe_module_status_get(pmpe_pl); + if (module_status != MLXSW_REG_PMPE_MODULE_STATUS_PLUGGED_ENABLED) + return; + + event = kmalloc(sizeof(*event), GFP_ATOMIC); + if (!event) + return; + + event->mlxsw_env = mlxsw_env; + event->slot_index = slot_index; + event->module = module; + INIT_WORK(&event->work, mlxsw_env_pmpe_event_work); + mlxsw_core_schedule_work(&event->work); +} + +static const struct mlxsw_listener mlxsw_env_module_plug_listener = + MLXSW_CORE_EVENTL(mlxsw_env_pmpe_listener_func, PMPE); + +static int +mlxsw_env_module_plug_event_register(struct mlxsw_core *mlxsw_core) +{ + struct mlxsw_env *mlxsw_env = mlxsw_core_env(mlxsw_core); + + return mlxsw_core_trap_register(mlxsw_core, + &mlxsw_env_module_plug_listener, + mlxsw_env); +} + +static void +mlxsw_env_module_plug_event_unregister(struct mlxsw_env *mlxsw_env) +{ + mlxsw_core_trap_unregister(mlxsw_env->core, + &mlxsw_env_module_plug_listener, + mlxsw_env); +} + +static int +mlxsw_env_module_oper_state_event_enable(struct mlxsw_core *mlxsw_core, + u8 slot_index) +{ + struct mlxsw_env *mlxsw_env = mlxsw_core_env(mlxsw_core); + int i, err; + + for (i = 0; i < mlxsw_env->line_cards[slot_index]->module_count; i++) { + char pmaos_pl[MLXSW_REG_PMAOS_LEN]; + + mlxsw_reg_pmaos_pack(pmaos_pl, slot_index, i); + mlxsw_reg_pmaos_e_set(pmaos_pl, + MLXSW_REG_PMAOS_E_GENERATE_EVENT); + mlxsw_reg_pmaos_ee_set(pmaos_pl, true); + err = mlxsw_reg_write(mlxsw_core, MLXSW_REG(pmaos), pmaos_pl); + if (err) + return err; + } + return 0; +} + +int +mlxsw_env_module_overheat_counter_get(struct mlxsw_core *mlxsw_core, u8 slot_index, + u8 module, u64 *p_counter) +{ + struct mlxsw_env *mlxsw_env = mlxsw_core_env(mlxsw_core); + struct mlxsw_env_module_info *module_info; + + mutex_lock(&mlxsw_env->line_cards_lock); + module_info = mlxsw_env_module_info_get(mlxsw_core, slot_index, module); + *p_counter = module_info->module_overheat_counter; + mutex_unlock(&mlxsw_env->line_cards_lock); + + return 0; +} +EXPORT_SYMBOL(mlxsw_env_module_overheat_counter_get); + +void mlxsw_env_module_port_map(struct mlxsw_core *mlxsw_core, u8 slot_index, + u8 module) +{ + struct mlxsw_env *mlxsw_env = mlxsw_core_env(mlxsw_core); + struct mlxsw_env_module_info *module_info; + + mutex_lock(&mlxsw_env->line_cards_lock); + module_info = mlxsw_env_module_info_get(mlxsw_core, slot_index, module); + module_info->num_ports_mapped++; + mutex_unlock(&mlxsw_env->line_cards_lock); +} +EXPORT_SYMBOL(mlxsw_env_module_port_map); + +void mlxsw_env_module_port_unmap(struct mlxsw_core *mlxsw_core, u8 slot_index, + u8 module) +{ + struct mlxsw_env *mlxsw_env = mlxsw_core_env(mlxsw_core); + struct mlxsw_env_module_info *module_info; + + mutex_lock(&mlxsw_env->line_cards_lock); + module_info = mlxsw_env_module_info_get(mlxsw_core, slot_index, module); + module_info->num_ports_mapped--; + mutex_unlock(&mlxsw_env->line_cards_lock); +} +EXPORT_SYMBOL(mlxsw_env_module_port_unmap); + +int mlxsw_env_module_port_up(struct mlxsw_core *mlxsw_core, u8 slot_index, + u8 module) +{ + struct mlxsw_env *mlxsw_env = mlxsw_core_env(mlxsw_core); + struct mlxsw_env_module_info *module_info; + int err = 0; + + mutex_lock(&mlxsw_env->line_cards_lock); + + module_info = mlxsw_env_module_info_get(mlxsw_core, slot_index, module); + if (module_info->power_mode_policy != + ETHTOOL_MODULE_POWER_MODE_POLICY_AUTO) + goto out_inc; + + if (module_info->num_ports_up != 0) + goto out_inc; + + /* Transition to high power mode following first port using the module + * being put administratively up. + */ + err = __mlxsw_env_set_module_power_mode(mlxsw_core, slot_index, module, + false, NULL); + if (err) + goto out_unlock; + +out_inc: + module_info->num_ports_up++; +out_unlock: + mutex_unlock(&mlxsw_env->line_cards_lock); + return err; +} +EXPORT_SYMBOL(mlxsw_env_module_port_up); + +void mlxsw_env_module_port_down(struct mlxsw_core *mlxsw_core, u8 slot_index, + u8 module) +{ + struct mlxsw_env *mlxsw_env = mlxsw_core_env(mlxsw_core); + struct mlxsw_env_module_info *module_info; + + mutex_lock(&mlxsw_env->line_cards_lock); + + module_info = mlxsw_env_module_info_get(mlxsw_core, slot_index, module); + module_info->num_ports_up--; + + if (module_info->power_mode_policy != + ETHTOOL_MODULE_POWER_MODE_POLICY_AUTO) + goto out_unlock; + + if (module_info->num_ports_up != 0) + goto out_unlock; + + /* Transition to low power mode following last port using the module + * being put administratively down. + */ + __mlxsw_env_set_module_power_mode(mlxsw_core, slot_index, module, true, + NULL); + +out_unlock: + mutex_unlock(&mlxsw_env->line_cards_lock); +} +EXPORT_SYMBOL(mlxsw_env_module_port_down); + +static int mlxsw_env_line_cards_alloc(struct mlxsw_env *env) +{ + struct mlxsw_env_module_info *module_info; + int i, j; + + for (i = 0; i < env->num_of_slots; i++) { + env->line_cards[i] = kzalloc(struct_size(env->line_cards[i], + module_info, + env->max_module_count), + GFP_KERNEL); + if (!env->line_cards[i]) + goto kzalloc_err; + + /* Firmware defaults to high power mode policy where modules + * are transitioned to high power mode following plug-in. + */ + for (j = 0; j < env->max_module_count; j++) { + module_info = &env->line_cards[i]->module_info[j]; + module_info->power_mode_policy = + ETHTOOL_MODULE_POWER_MODE_POLICY_HIGH; + } + } + + return 0; + +kzalloc_err: + for (i--; i >= 0; i--) + kfree(env->line_cards[i]); + return -ENOMEM; +} + +static void mlxsw_env_line_cards_free(struct mlxsw_env *env) +{ + int i = env->num_of_slots; + + for (i--; i >= 0; i--) + kfree(env->line_cards[i]); +} + +static int +mlxsw_env_module_event_enable(struct mlxsw_env *mlxsw_env, u8 slot_index) +{ + int err; + + err = mlxsw_env_module_oper_state_event_enable(mlxsw_env->core, + slot_index); + if (err) + return err; + + err = mlxsw_env_module_temp_event_enable(mlxsw_env->core, slot_index); + if (err) + return err; + + return 0; +} + +static void +mlxsw_env_module_event_disable(struct mlxsw_env *mlxsw_env, u8 slot_index) +{ +} + +static int +mlxsw_env_module_type_set(struct mlxsw_core *mlxsw_core, u8 slot_index) +{ + struct mlxsw_env *mlxsw_env = mlxsw_core_env(mlxsw_core); + int i; + + for (i = 0; i < mlxsw_env->line_cards[slot_index]->module_count; i++) { + struct mlxsw_env_module_info *module_info; + char pmtm_pl[MLXSW_REG_PMTM_LEN]; + int err; + + mlxsw_reg_pmtm_pack(pmtm_pl, slot_index, i); + err = mlxsw_reg_query(mlxsw_core, MLXSW_REG(pmtm), pmtm_pl); + if (err) + return err; + + module_info = mlxsw_env_module_info_get(mlxsw_core, slot_index, + i); + module_info->type = mlxsw_reg_pmtm_module_type_get(pmtm_pl); + } + + return 0; +} + +static void +mlxsw_env_linecard_modules_power_mode_apply(struct mlxsw_core *mlxsw_core, + struct mlxsw_env *env, + u8 slot_index) +{ + int i; + + for (i = 0; i < env->line_cards[slot_index]->module_count; i++) { + enum ethtool_module_power_mode_policy policy; + struct mlxsw_env_module_info *module_info; + struct netlink_ext_ack extack; + int err; + + module_info = &env->line_cards[slot_index]->module_info[i]; + policy = module_info->power_mode_policy; + err = mlxsw_env_set_module_power_mode_apply(mlxsw_core, + slot_index, i, + policy, &extack); + if (err) + dev_err(env->bus_info->dev, "%s\n", extack._msg); + } +} + +static void +mlxsw_env_got_active(struct mlxsw_core *mlxsw_core, u8 slot_index, void *priv) +{ + struct mlxsw_env *mlxsw_env = priv; + char mgpir_pl[MLXSW_REG_MGPIR_LEN]; + int err; + + mutex_lock(&mlxsw_env->line_cards_lock); + if (__mlxsw_env_linecard_is_active(mlxsw_env, slot_index)) + goto out_unlock; + + mlxsw_reg_mgpir_pack(mgpir_pl, slot_index); + err = mlxsw_reg_query(mlxsw_env->core, MLXSW_REG(mgpir), mgpir_pl); + if (err) + goto out_unlock; + + mlxsw_reg_mgpir_unpack(mgpir_pl, NULL, NULL, NULL, + &mlxsw_env->line_cards[slot_index]->module_count, + NULL); + + err = mlxsw_env_module_event_enable(mlxsw_env, slot_index); + if (err) { + dev_err(mlxsw_env->bus_info->dev, "Failed to enable port module events for line card in slot %d\n", + slot_index); + goto err_mlxsw_env_module_event_enable; + } + err = mlxsw_env_module_type_set(mlxsw_env->core, slot_index); + if (err) { + dev_err(mlxsw_env->bus_info->dev, "Failed to set modules' type for line card in slot %d\n", + slot_index); + goto err_type_set; + } + + mlxsw_env->line_cards[slot_index]->active = true; + /* Apply power mode policy. */ + mlxsw_env_linecard_modules_power_mode_apply(mlxsw_core, mlxsw_env, + slot_index); + mutex_unlock(&mlxsw_env->line_cards_lock); + + return; + +err_type_set: + mlxsw_env_module_event_disable(mlxsw_env, slot_index); +err_mlxsw_env_module_event_enable: +out_unlock: + mutex_unlock(&mlxsw_env->line_cards_lock); +} + +static void +mlxsw_env_got_inactive(struct mlxsw_core *mlxsw_core, u8 slot_index, + void *priv) +{ + struct mlxsw_env *mlxsw_env = priv; + + mutex_lock(&mlxsw_env->line_cards_lock); + if (!__mlxsw_env_linecard_is_active(mlxsw_env, slot_index)) + goto out_unlock; + mlxsw_env->line_cards[slot_index]->active = false; + mlxsw_env_module_event_disable(mlxsw_env, slot_index); + mlxsw_env->line_cards[slot_index]->module_count = 0; +out_unlock: + mutex_unlock(&mlxsw_env->line_cards_lock); +} + +static struct mlxsw_linecards_event_ops mlxsw_env_event_ops = { + .got_active = mlxsw_env_got_active, + .got_inactive = mlxsw_env_got_inactive, +}; + +int mlxsw_env_init(struct mlxsw_core *mlxsw_core, + const struct mlxsw_bus_info *bus_info, + struct mlxsw_env **p_env) +{ + u8 module_count, num_of_slots, max_module_count; + char mgpir_pl[MLXSW_REG_MGPIR_LEN]; + struct mlxsw_env *env; + int err; + + mlxsw_reg_mgpir_pack(mgpir_pl, 0); + err = mlxsw_reg_query(mlxsw_core, MLXSW_REG(mgpir), mgpir_pl); + if (err) + return err; + + mlxsw_reg_mgpir_unpack(mgpir_pl, NULL, NULL, NULL, &module_count, + &num_of_slots); + /* If the system is modular, get the maximum number of modules per-slot. + * Otherwise, get the maximum number of modules on the main board. + */ + max_module_count = num_of_slots ? + mlxsw_reg_mgpir_max_modules_per_slot_get(mgpir_pl) : + module_count; + + env = kzalloc(struct_size(env, line_cards, num_of_slots + 1), + GFP_KERNEL); + if (!env) + return -ENOMEM; + + env->core = mlxsw_core; + env->bus_info = bus_info; + env->num_of_slots = num_of_slots + 1; + env->max_module_count = max_module_count; + err = mlxsw_env_line_cards_alloc(env); + if (err) + goto err_mlxsw_env_line_cards_alloc; + + mutex_init(&env->line_cards_lock); + *p_env = env; + + err = mlxsw_linecards_event_ops_register(env->core, + &mlxsw_env_event_ops, env); + if (err) + goto err_linecards_event_ops_register; + + err = mlxsw_env_temp_warn_event_register(mlxsw_core); + if (err) + goto err_temp_warn_event_register; + + err = mlxsw_env_module_plug_event_register(mlxsw_core); + if (err) + goto err_module_plug_event_register; + + /* Set 'module_count' only for main board. Actual count for line card + * is to be set after line card is activated. + */ + env->line_cards[0]->module_count = num_of_slots ? 0 : module_count; + /* Enable events only for main board. Line card events are to be + * configured only after line card is activated. Before that, access to + * modules on line cards is not allowed. + */ + err = mlxsw_env_module_event_enable(env, 0); + if (err) + goto err_mlxsw_env_module_event_enable; + + err = mlxsw_env_module_type_set(mlxsw_core, 0); + if (err) + goto err_type_set; + + env->line_cards[0]->active = true; + + return 0; + +err_type_set: + mlxsw_env_module_event_disable(env, 0); +err_mlxsw_env_module_event_enable: + mlxsw_env_module_plug_event_unregister(env); +err_module_plug_event_register: + mlxsw_env_temp_warn_event_unregister(env); +err_temp_warn_event_register: + mlxsw_linecards_event_ops_unregister(env->core, + &mlxsw_env_event_ops, env); +err_linecards_event_ops_register: + mutex_destroy(&env->line_cards_lock); + mlxsw_env_line_cards_free(env); +err_mlxsw_env_line_cards_alloc: + kfree(env); + return err; +} + +void mlxsw_env_fini(struct mlxsw_env *env) +{ + env->line_cards[0]->active = false; + mlxsw_env_module_event_disable(env, 0); + mlxsw_env_module_plug_event_unregister(env); + /* Make sure there is no more event work scheduled. */ + mlxsw_core_flush_owq(); + mlxsw_env_temp_warn_event_unregister(env); + mlxsw_linecards_event_ops_unregister(env->core, + &mlxsw_env_event_ops, env); + mutex_destroy(&env->line_cards_lock); + mlxsw_env_line_cards_free(env); + kfree(env); +} diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_env.h b/drivers/net/ethernet/mellanox/mlxsw/core_env.h new file mode 100644 index 000000000..a197e3ae0 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/core_env.h @@ -0,0 +1,68 @@ +/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 */ +/* Copyright (c) 2018 Mellanox Technologies. All rights reserved */ + +#ifndef _MLXSW_CORE_ENV_H +#define _MLXSW_CORE_ENV_H + +#include <linux/ethtool.h> + +struct ethtool_modinfo; +struct ethtool_eeprom; + +int mlxsw_env_module_temp_thresholds_get(struct mlxsw_core *core, + u8 slot_index, int module, int off, + int *temp); + +int mlxsw_env_get_module_info(struct net_device *netdev, + struct mlxsw_core *mlxsw_core, u8 slot_index, + int module, struct ethtool_modinfo *modinfo); + +int mlxsw_env_get_module_eeprom(struct net_device *netdev, + struct mlxsw_core *mlxsw_core, u8 slot_index, + int module, struct ethtool_eeprom *ee, + u8 *data); + +int +mlxsw_env_get_module_eeprom_by_page(struct mlxsw_core *mlxsw_core, + u8 slot_index, u8 module, + const struct ethtool_module_eeprom *page, + struct netlink_ext_ack *extack); + +int mlxsw_env_reset_module(struct net_device *netdev, + struct mlxsw_core *mlxsw_core, u8 slot_index, + u8 module, u32 *flags); + +int +mlxsw_env_get_module_power_mode(struct mlxsw_core *mlxsw_core, u8 slot_index, + u8 module, + struct ethtool_module_power_mode_params *params, + struct netlink_ext_ack *extack); + +int +mlxsw_env_set_module_power_mode(struct mlxsw_core *mlxsw_core, u8 slot_index, + u8 module, + enum ethtool_module_power_mode_policy policy, + struct netlink_ext_ack *extack); + +int +mlxsw_env_module_overheat_counter_get(struct mlxsw_core *mlxsw_core, u8 slot_index, + u8 module, u64 *p_counter); + +void mlxsw_env_module_port_map(struct mlxsw_core *mlxsw_core, u8 slot_index, + u8 module); + +void mlxsw_env_module_port_unmap(struct mlxsw_core *mlxsw_core, u8 slot_index, + u8 module); + +int mlxsw_env_module_port_up(struct mlxsw_core *mlxsw_core, u8 slot_index, + u8 module); + +void mlxsw_env_module_port_down(struct mlxsw_core *mlxsw_core, u8 slot_index, + u8 module); + +int mlxsw_env_init(struct mlxsw_core *core, + const struct mlxsw_bus_info *bus_info, + struct mlxsw_env **p_env); +void mlxsw_env_fini(struct mlxsw_env *env); + +#endif diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c b/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c new file mode 100644 index 000000000..0fd290d77 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c @@ -0,0 +1,941 @@ +// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 +/* Copyright (c) 2015-2018 Mellanox Technologies. All rights reserved */ + +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/device.h> +#include <linux/sysfs.h> +#include <linux/hwmon.h> +#include <linux/err.h> +#include <linux/sfp.h> + +#include "core.h" +#include "core_env.h" + +#define MLXSW_HWMON_SENSORS_MAX_COUNT 64 +#define MLXSW_HWMON_MODULES_MAX_COUNT 64 +#define MLXSW_HWMON_GEARBOXES_MAX_COUNT 32 + +#define MLXSW_HWMON_ATTR_PER_SENSOR 3 +#define MLXSW_HWMON_ATTR_PER_MODULE 7 +#define MLXSW_HWMON_ATTR_PER_GEARBOX 4 +#define MLXSW_HWMON_DEV_NAME_LEN_MAX 16 + +#define MLXSW_HWMON_ATTR_COUNT (MLXSW_HWMON_SENSORS_MAX_COUNT * MLXSW_HWMON_ATTR_PER_SENSOR + \ + MLXSW_HWMON_MODULES_MAX_COUNT * MLXSW_HWMON_ATTR_PER_MODULE + \ + MLXSW_HWMON_GEARBOXES_MAX_COUNT * MLXSW_HWMON_ATTR_PER_GEARBOX + \ + MLXSW_MFCR_TACHOS_MAX + MLXSW_MFCR_PWMS_MAX) + +struct mlxsw_hwmon_attr { + struct device_attribute dev_attr; + struct mlxsw_hwmon_dev *mlxsw_hwmon_dev; + unsigned int type_index; + char name[32]; +}; + +static int mlxsw_hwmon_get_attr_index(int index, int count) +{ + if (index >= count) + return index % count + MLXSW_REG_MTMP_GBOX_INDEX_MIN; + + return index; +} + +struct mlxsw_hwmon_dev { + char name[MLXSW_HWMON_DEV_NAME_LEN_MAX]; + struct mlxsw_hwmon *hwmon; + struct device *hwmon_dev; + struct attribute_group group; + const struct attribute_group *groups[2]; + struct attribute *attrs[MLXSW_HWMON_ATTR_COUNT + 1]; + struct mlxsw_hwmon_attr hwmon_attrs[MLXSW_HWMON_ATTR_COUNT]; + unsigned int attrs_count; + u8 sensor_count; + u8 module_sensor_max; + u8 slot_index; + bool active; +}; + +struct mlxsw_hwmon { + struct mlxsw_core *core; + const struct mlxsw_bus_info *bus_info; + struct mlxsw_hwmon_dev line_cards[]; +}; + +static ssize_t mlxsw_hwmon_temp_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct mlxsw_hwmon_attr *mlxsw_hwmon_attr = + container_of(attr, struct mlxsw_hwmon_attr, dev_attr); + struct mlxsw_hwmon_dev *mlxsw_hwmon_dev = mlxsw_hwmon_attr->mlxsw_hwmon_dev; + struct mlxsw_hwmon *mlxsw_hwmon = mlxsw_hwmon_dev->hwmon; + char mtmp_pl[MLXSW_REG_MTMP_LEN]; + int temp, index; + int err; + + index = mlxsw_hwmon_get_attr_index(mlxsw_hwmon_attr->type_index, + mlxsw_hwmon_dev->module_sensor_max); + mlxsw_reg_mtmp_pack(mtmp_pl, mlxsw_hwmon_dev->slot_index, index, false, + false); + err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(mtmp), mtmp_pl); + if (err) { + dev_err(mlxsw_hwmon->bus_info->dev, "Failed to query temp sensor\n"); + return err; + } + mlxsw_reg_mtmp_unpack(mtmp_pl, &temp, NULL, NULL, NULL, NULL); + return sprintf(buf, "%d\n", temp); +} + +static ssize_t mlxsw_hwmon_temp_max_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct mlxsw_hwmon_attr *mlxsw_hwmon_attr = + container_of(attr, struct mlxsw_hwmon_attr, dev_attr); + struct mlxsw_hwmon_dev *mlxsw_hwmon_dev = mlxsw_hwmon_attr->mlxsw_hwmon_dev; + struct mlxsw_hwmon *mlxsw_hwmon = mlxsw_hwmon_dev->hwmon; + char mtmp_pl[MLXSW_REG_MTMP_LEN]; + int temp_max, index; + int err; + + index = mlxsw_hwmon_get_attr_index(mlxsw_hwmon_attr->type_index, + mlxsw_hwmon_dev->module_sensor_max); + mlxsw_reg_mtmp_pack(mtmp_pl, mlxsw_hwmon_dev->slot_index, index, false, + false); + err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(mtmp), mtmp_pl); + if (err) { + dev_err(mlxsw_hwmon->bus_info->dev, "Failed to query temp sensor\n"); + return err; + } + mlxsw_reg_mtmp_unpack(mtmp_pl, NULL, &temp_max, NULL, NULL, NULL); + return sprintf(buf, "%d\n", temp_max); +} + +static ssize_t mlxsw_hwmon_temp_rst_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t len) +{ + struct mlxsw_hwmon_attr *mlxsw_hwmon_attr = + container_of(attr, struct mlxsw_hwmon_attr, dev_attr); + struct mlxsw_hwmon_dev *mlxsw_hwmon_dev = mlxsw_hwmon_attr->mlxsw_hwmon_dev; + struct mlxsw_hwmon *mlxsw_hwmon = mlxsw_hwmon_dev->hwmon; + char mtmp_pl[MLXSW_REG_MTMP_LEN]; + unsigned long val; + int index; + int err; + + err = kstrtoul(buf, 10, &val); + if (err) + return err; + if (val != 1) + return -EINVAL; + + index = mlxsw_hwmon_get_attr_index(mlxsw_hwmon_attr->type_index, + mlxsw_hwmon_dev->module_sensor_max); + + mlxsw_reg_mtmp_slot_index_set(mtmp_pl, mlxsw_hwmon_dev->slot_index); + mlxsw_reg_mtmp_sensor_index_set(mtmp_pl, index); + err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(mtmp), mtmp_pl); + if (err) + return err; + mlxsw_reg_mtmp_mte_set(mtmp_pl, true); + mlxsw_reg_mtmp_mtr_set(mtmp_pl, true); + err = mlxsw_reg_write(mlxsw_hwmon->core, MLXSW_REG(mtmp), mtmp_pl); + if (err) { + dev_err(mlxsw_hwmon->bus_info->dev, "Failed to reset temp sensor history\n"); + return err; + } + return len; +} + +static ssize_t mlxsw_hwmon_fan_rpm_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct mlxsw_hwmon_attr *mlxsw_hwmon_attr = + container_of(attr, struct mlxsw_hwmon_attr, dev_attr); + struct mlxsw_hwmon_dev *mlxsw_hwmon_dev = mlxsw_hwmon_attr->mlxsw_hwmon_dev; + struct mlxsw_hwmon *mlxsw_hwmon = mlxsw_hwmon_dev->hwmon; + char mfsm_pl[MLXSW_REG_MFSM_LEN]; + int err; + + mlxsw_reg_mfsm_pack(mfsm_pl, mlxsw_hwmon_attr->type_index); + err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(mfsm), mfsm_pl); + if (err) { + dev_err(mlxsw_hwmon->bus_info->dev, "Failed to query fan\n"); + return err; + } + return sprintf(buf, "%u\n", mlxsw_reg_mfsm_rpm_get(mfsm_pl)); +} + +static ssize_t mlxsw_hwmon_fan_fault_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct mlxsw_hwmon_attr *mlxsw_hwmon_attr = + container_of(attr, struct mlxsw_hwmon_attr, dev_attr); + struct mlxsw_hwmon_dev *mlxsw_hwmon_dev = mlxsw_hwmon_attr->mlxsw_hwmon_dev; + struct mlxsw_hwmon *mlxsw_hwmon = mlxsw_hwmon_dev->hwmon; + char fore_pl[MLXSW_REG_FORE_LEN]; + bool fault; + int err; + + err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(fore), fore_pl); + if (err) { + dev_err(mlxsw_hwmon->bus_info->dev, "Failed to query fan\n"); + return err; + } + mlxsw_reg_fore_unpack(fore_pl, mlxsw_hwmon_attr->type_index, &fault); + + return sprintf(buf, "%u\n", fault); +} + +static ssize_t mlxsw_hwmon_pwm_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct mlxsw_hwmon_attr *mlxsw_hwmon_attr = + container_of(attr, struct mlxsw_hwmon_attr, dev_attr); + struct mlxsw_hwmon_dev *mlxsw_hwmon_dev = mlxsw_hwmon_attr->mlxsw_hwmon_dev; + struct mlxsw_hwmon *mlxsw_hwmon = mlxsw_hwmon_dev->hwmon; + char mfsc_pl[MLXSW_REG_MFSC_LEN]; + int err; + + mlxsw_reg_mfsc_pack(mfsc_pl, mlxsw_hwmon_attr->type_index, 0); + err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(mfsc), mfsc_pl); + if (err) { + dev_err(mlxsw_hwmon->bus_info->dev, "Failed to query PWM\n"); + return err; + } + return sprintf(buf, "%u\n", + mlxsw_reg_mfsc_pwm_duty_cycle_get(mfsc_pl)); +} + +static ssize_t mlxsw_hwmon_pwm_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t len) +{ + struct mlxsw_hwmon_attr *mlxsw_hwmon_attr = + container_of(attr, struct mlxsw_hwmon_attr, dev_attr); + struct mlxsw_hwmon_dev *mlxsw_hwmon_dev = mlxsw_hwmon_attr->mlxsw_hwmon_dev; + struct mlxsw_hwmon *mlxsw_hwmon = mlxsw_hwmon_dev->hwmon; + char mfsc_pl[MLXSW_REG_MFSC_LEN]; + unsigned long val; + int err; + + err = kstrtoul(buf, 10, &val); + if (err) + return err; + if (val > 255) + return -EINVAL; + + mlxsw_reg_mfsc_pack(mfsc_pl, mlxsw_hwmon_attr->type_index, val); + err = mlxsw_reg_write(mlxsw_hwmon->core, MLXSW_REG(mfsc), mfsc_pl); + if (err) { + dev_err(mlxsw_hwmon->bus_info->dev, "Failed to write PWM\n"); + return err; + } + return len; +} + +static int mlxsw_hwmon_module_temp_get(struct device *dev, + struct device_attribute *attr, + int *p_temp) +{ + struct mlxsw_hwmon_attr *mlxsw_hwmon_attr = + container_of(attr, struct mlxsw_hwmon_attr, dev_attr); + struct mlxsw_hwmon_dev *mlxsw_hwmon_dev = mlxsw_hwmon_attr->mlxsw_hwmon_dev; + struct mlxsw_hwmon *mlxsw_hwmon = mlxsw_hwmon_dev->hwmon; + char mtmp_pl[MLXSW_REG_MTMP_LEN]; + u8 module; + int err; + + module = mlxsw_hwmon_attr->type_index - mlxsw_hwmon_dev->sensor_count; + mlxsw_reg_mtmp_pack(mtmp_pl, mlxsw_hwmon_dev->slot_index, + MLXSW_REG_MTMP_MODULE_INDEX_MIN + module, false, + false); + err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(mtmp), mtmp_pl); + if (err) { + dev_err(dev, "Failed to query module temperature\n"); + return err; + } + mlxsw_reg_mtmp_unpack(mtmp_pl, p_temp, NULL, NULL, NULL, NULL); + + return 0; +} + +static ssize_t mlxsw_hwmon_module_temp_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + int err, temp; + + err = mlxsw_hwmon_module_temp_get(dev, attr, &temp); + if (err) + return err; + + return sprintf(buf, "%d\n", temp); +} + +static ssize_t mlxsw_hwmon_module_temp_fault_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct mlxsw_hwmon_attr *mlxsw_hwmon_attr = + container_of(attr, struct mlxsw_hwmon_attr, dev_attr); + struct mlxsw_hwmon_dev *mlxsw_hwmon_dev = mlxsw_hwmon_attr->mlxsw_hwmon_dev; + struct mlxsw_hwmon *mlxsw_hwmon = mlxsw_hwmon_dev->hwmon; + char mtbr_pl[MLXSW_REG_MTBR_LEN] = {0}; + u8 module, fault; + u16 temp; + int err; + + module = mlxsw_hwmon_attr->type_index - mlxsw_hwmon_dev->sensor_count; + mlxsw_reg_mtbr_pack(mtbr_pl, mlxsw_hwmon_dev->slot_index, + MLXSW_REG_MTBR_BASE_MODULE_INDEX + module, 1); + err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(mtbr), mtbr_pl); + if (err) { + dev_err(dev, "Failed to query module temperature sensor\n"); + return err; + } + + mlxsw_reg_mtbr_temp_unpack(mtbr_pl, 0, &temp, NULL); + + /* Update status and temperature cache. */ + switch (temp) { + case MLXSW_REG_MTBR_BAD_SENS_INFO: + /* Untrusted cable is connected. Reading temperature from its + * sensor is faulty. + */ + fault = 1; + break; + case MLXSW_REG_MTBR_NO_CONN: + case MLXSW_REG_MTBR_NO_TEMP_SENS: + case MLXSW_REG_MTBR_INDEX_NA: + default: + fault = 0; + break; + } + + return sprintf(buf, "%u\n", fault); +} + +static int mlxsw_hwmon_module_temp_critical_get(struct device *dev, + struct device_attribute *attr, + int *p_temp) +{ + struct mlxsw_hwmon_attr *mlxsw_hwmon_attr = + container_of(attr, struct mlxsw_hwmon_attr, dev_attr); + struct mlxsw_hwmon_dev *mlxsw_hwmon_dev = mlxsw_hwmon_attr->mlxsw_hwmon_dev; + struct mlxsw_hwmon *mlxsw_hwmon = mlxsw_hwmon_dev->hwmon; + u8 module; + int err; + + module = mlxsw_hwmon_attr->type_index - mlxsw_hwmon_dev->sensor_count; + err = mlxsw_env_module_temp_thresholds_get(mlxsw_hwmon->core, + mlxsw_hwmon_dev->slot_index, + module, SFP_TEMP_HIGH_WARN, + p_temp); + if (err) { + dev_err(dev, "Failed to query module temperature thresholds\n"); + return err; + } + + return 0; +} + +static ssize_t +mlxsw_hwmon_module_temp_critical_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + int err, temp; + + err = mlxsw_hwmon_module_temp_critical_get(dev, attr, &temp); + if (err) + return err; + + return sprintf(buf, "%u\n", temp); +} + +static int mlxsw_hwmon_module_temp_emergency_get(struct device *dev, + struct device_attribute *attr, + int *p_temp) +{ + struct mlxsw_hwmon_attr *mlxsw_hwmon_attr = + container_of(attr, struct mlxsw_hwmon_attr, dev_attr); + struct mlxsw_hwmon_dev *mlxsw_hwmon_dev = mlxsw_hwmon_attr->mlxsw_hwmon_dev; + struct mlxsw_hwmon *mlxsw_hwmon = mlxsw_hwmon_dev->hwmon; + u8 module; + int err; + + module = mlxsw_hwmon_attr->type_index - mlxsw_hwmon_dev->sensor_count; + err = mlxsw_env_module_temp_thresholds_get(mlxsw_hwmon->core, + mlxsw_hwmon_dev->slot_index, + module, SFP_TEMP_HIGH_ALARM, + p_temp); + if (err) { + dev_err(dev, "Failed to query module temperature thresholds\n"); + return err; + } + + return 0; +} + +static ssize_t +mlxsw_hwmon_module_temp_emergency_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + int err, temp; + + err = mlxsw_hwmon_module_temp_emergency_get(dev, attr, &temp); + if (err) + return err; + + return sprintf(buf, "%u\n", temp); +} + +static ssize_t +mlxsw_hwmon_module_temp_label_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct mlxsw_hwmon_attr *mlxsw_hwmon_attr = + container_of(attr, struct mlxsw_hwmon_attr, dev_attr); + + return sprintf(buf, "front panel %03u\n", + mlxsw_hwmon_attr->type_index + 1 - + mlxsw_hwmon_attr->mlxsw_hwmon_dev->sensor_count); +} + +static ssize_t +mlxsw_hwmon_gbox_temp_label_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct mlxsw_hwmon_attr *mlxsw_hwmon_attr = + container_of(attr, struct mlxsw_hwmon_attr, dev_attr); + struct mlxsw_hwmon_dev *mlxsw_hwmon_dev = mlxsw_hwmon_attr->mlxsw_hwmon_dev; + int index = mlxsw_hwmon_attr->type_index - + mlxsw_hwmon_dev->module_sensor_max + 1; + + return sprintf(buf, "gearbox %03u\n", index); +} + +static ssize_t mlxsw_hwmon_temp_critical_alarm_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + int err, temp, emergency_temp, critic_temp; + + err = mlxsw_hwmon_module_temp_get(dev, attr, &temp); + if (err) + return err; + + if (temp <= 0) + return sprintf(buf, "%d\n", false); + + err = mlxsw_hwmon_module_temp_emergency_get(dev, attr, &emergency_temp); + if (err) + return err; + + if (temp >= emergency_temp) + return sprintf(buf, "%d\n", false); + + err = mlxsw_hwmon_module_temp_critical_get(dev, attr, &critic_temp); + if (err) + return err; + + return sprintf(buf, "%d\n", temp >= critic_temp); +} + +static ssize_t mlxsw_hwmon_temp_emergency_alarm_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + int err, temp, emergency_temp; + + err = mlxsw_hwmon_module_temp_get(dev, attr, &temp); + if (err) + return err; + + if (temp <= 0) + return sprintf(buf, "%d\n", false); + + err = mlxsw_hwmon_module_temp_emergency_get(dev, attr, &emergency_temp); + if (err) + return err; + + return sprintf(buf, "%d\n", temp >= emergency_temp); +} + +enum mlxsw_hwmon_attr_type { + MLXSW_HWMON_ATTR_TYPE_TEMP, + MLXSW_HWMON_ATTR_TYPE_TEMP_MAX, + MLXSW_HWMON_ATTR_TYPE_TEMP_RST, + MLXSW_HWMON_ATTR_TYPE_FAN_RPM, + MLXSW_HWMON_ATTR_TYPE_FAN_FAULT, + MLXSW_HWMON_ATTR_TYPE_PWM, + MLXSW_HWMON_ATTR_TYPE_TEMP_MODULE, + MLXSW_HWMON_ATTR_TYPE_TEMP_MODULE_FAULT, + MLXSW_HWMON_ATTR_TYPE_TEMP_MODULE_CRIT, + MLXSW_HWMON_ATTR_TYPE_TEMP_MODULE_EMERG, + MLXSW_HWMON_ATTR_TYPE_TEMP_MODULE_LABEL, + MLXSW_HWMON_ATTR_TYPE_TEMP_GBOX_LABEL, + MLXSW_HWMON_ATTR_TYPE_TEMP_CRIT_ALARM, + MLXSW_HWMON_ATTR_TYPE_TEMP_EMERGENCY_ALARM, +}; + +static void mlxsw_hwmon_attr_add(struct mlxsw_hwmon_dev *mlxsw_hwmon_dev, + enum mlxsw_hwmon_attr_type attr_type, + unsigned int type_index, unsigned int num) +{ + struct mlxsw_hwmon_attr *mlxsw_hwmon_attr; + unsigned int attr_index; + + attr_index = mlxsw_hwmon_dev->attrs_count; + mlxsw_hwmon_attr = &mlxsw_hwmon_dev->hwmon_attrs[attr_index]; + + switch (attr_type) { + case MLXSW_HWMON_ATTR_TYPE_TEMP: + mlxsw_hwmon_attr->dev_attr.show = mlxsw_hwmon_temp_show; + mlxsw_hwmon_attr->dev_attr.attr.mode = 0444; + snprintf(mlxsw_hwmon_attr->name, sizeof(mlxsw_hwmon_attr->name), + "temp%u_input", num + 1); + break; + case MLXSW_HWMON_ATTR_TYPE_TEMP_MAX: + mlxsw_hwmon_attr->dev_attr.show = mlxsw_hwmon_temp_max_show; + mlxsw_hwmon_attr->dev_attr.attr.mode = 0444; + snprintf(mlxsw_hwmon_attr->name, sizeof(mlxsw_hwmon_attr->name), + "temp%u_highest", num + 1); + break; + case MLXSW_HWMON_ATTR_TYPE_TEMP_RST: + mlxsw_hwmon_attr->dev_attr.store = mlxsw_hwmon_temp_rst_store; + mlxsw_hwmon_attr->dev_attr.attr.mode = 0200; + snprintf(mlxsw_hwmon_attr->name, sizeof(mlxsw_hwmon_attr->name), + "temp%u_reset_history", num + 1); + break; + case MLXSW_HWMON_ATTR_TYPE_FAN_RPM: + mlxsw_hwmon_attr->dev_attr.show = mlxsw_hwmon_fan_rpm_show; + mlxsw_hwmon_attr->dev_attr.attr.mode = 0444; + snprintf(mlxsw_hwmon_attr->name, sizeof(mlxsw_hwmon_attr->name), + "fan%u_input", num + 1); + break; + case MLXSW_HWMON_ATTR_TYPE_FAN_FAULT: + mlxsw_hwmon_attr->dev_attr.show = mlxsw_hwmon_fan_fault_show; + mlxsw_hwmon_attr->dev_attr.attr.mode = 0444; + snprintf(mlxsw_hwmon_attr->name, sizeof(mlxsw_hwmon_attr->name), + "fan%u_fault", num + 1); + break; + case MLXSW_HWMON_ATTR_TYPE_PWM: + mlxsw_hwmon_attr->dev_attr.show = mlxsw_hwmon_pwm_show; + mlxsw_hwmon_attr->dev_attr.store = mlxsw_hwmon_pwm_store; + mlxsw_hwmon_attr->dev_attr.attr.mode = 0644; + snprintf(mlxsw_hwmon_attr->name, sizeof(mlxsw_hwmon_attr->name), + "pwm%u", num + 1); + break; + case MLXSW_HWMON_ATTR_TYPE_TEMP_MODULE: + mlxsw_hwmon_attr->dev_attr.show = mlxsw_hwmon_module_temp_show; + mlxsw_hwmon_attr->dev_attr.attr.mode = 0444; + snprintf(mlxsw_hwmon_attr->name, sizeof(mlxsw_hwmon_attr->name), + "temp%u_input", num + 1); + break; + case MLXSW_HWMON_ATTR_TYPE_TEMP_MODULE_FAULT: + mlxsw_hwmon_attr->dev_attr.show = + mlxsw_hwmon_module_temp_fault_show; + mlxsw_hwmon_attr->dev_attr.attr.mode = 0444; + snprintf(mlxsw_hwmon_attr->name, sizeof(mlxsw_hwmon_attr->name), + "temp%u_fault", num + 1); + break; + case MLXSW_HWMON_ATTR_TYPE_TEMP_MODULE_CRIT: + mlxsw_hwmon_attr->dev_attr.show = + mlxsw_hwmon_module_temp_critical_show; + mlxsw_hwmon_attr->dev_attr.attr.mode = 0444; + snprintf(mlxsw_hwmon_attr->name, sizeof(mlxsw_hwmon_attr->name), + "temp%u_crit", num + 1); + break; + case MLXSW_HWMON_ATTR_TYPE_TEMP_MODULE_EMERG: + mlxsw_hwmon_attr->dev_attr.show = + mlxsw_hwmon_module_temp_emergency_show; + mlxsw_hwmon_attr->dev_attr.attr.mode = 0444; + snprintf(mlxsw_hwmon_attr->name, sizeof(mlxsw_hwmon_attr->name), + "temp%u_emergency", num + 1); + break; + case MLXSW_HWMON_ATTR_TYPE_TEMP_MODULE_LABEL: + mlxsw_hwmon_attr->dev_attr.show = + mlxsw_hwmon_module_temp_label_show; + mlxsw_hwmon_attr->dev_attr.attr.mode = 0444; + snprintf(mlxsw_hwmon_attr->name, sizeof(mlxsw_hwmon_attr->name), + "temp%u_label", num + 1); + break; + case MLXSW_HWMON_ATTR_TYPE_TEMP_GBOX_LABEL: + mlxsw_hwmon_attr->dev_attr.show = + mlxsw_hwmon_gbox_temp_label_show; + mlxsw_hwmon_attr->dev_attr.attr.mode = 0444; + snprintf(mlxsw_hwmon_attr->name, sizeof(mlxsw_hwmon_attr->name), + "temp%u_label", num + 1); + break; + case MLXSW_HWMON_ATTR_TYPE_TEMP_CRIT_ALARM: + mlxsw_hwmon_attr->dev_attr.show = + mlxsw_hwmon_temp_critical_alarm_show; + mlxsw_hwmon_attr->dev_attr.attr.mode = 0444; + snprintf(mlxsw_hwmon_attr->name, sizeof(mlxsw_hwmon_attr->name), + "temp%u_crit_alarm", num + 1); + break; + case MLXSW_HWMON_ATTR_TYPE_TEMP_EMERGENCY_ALARM: + mlxsw_hwmon_attr->dev_attr.show = + mlxsw_hwmon_temp_emergency_alarm_show; + mlxsw_hwmon_attr->dev_attr.attr.mode = 0444; + snprintf(mlxsw_hwmon_attr->name, sizeof(mlxsw_hwmon_attr->name), + "temp%u_emergency_alarm", num + 1); + break; + default: + WARN_ON(1); + } + + mlxsw_hwmon_attr->type_index = type_index; + mlxsw_hwmon_attr->mlxsw_hwmon_dev = mlxsw_hwmon_dev; + mlxsw_hwmon_attr->dev_attr.attr.name = mlxsw_hwmon_attr->name; + sysfs_attr_init(&mlxsw_hwmon_attr->dev_attr.attr); + + mlxsw_hwmon_dev->attrs[attr_index] = &mlxsw_hwmon_attr->dev_attr.attr; + mlxsw_hwmon_dev->attrs_count++; +} + +static int mlxsw_hwmon_temp_init(struct mlxsw_hwmon_dev *mlxsw_hwmon_dev) +{ + struct mlxsw_hwmon *mlxsw_hwmon = mlxsw_hwmon_dev->hwmon; + char mtcap_pl[MLXSW_REG_MTCAP_LEN] = {0}; + int i; + int err; + + err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(mtcap), mtcap_pl); + if (err) { + dev_err(mlxsw_hwmon->bus_info->dev, "Failed to get number of temp sensors\n"); + return err; + } + mlxsw_hwmon_dev->sensor_count = mlxsw_reg_mtcap_sensor_count_get(mtcap_pl); + for (i = 0; i < mlxsw_hwmon_dev->sensor_count; i++) { + char mtmp_pl[MLXSW_REG_MTMP_LEN] = {0}; + + mlxsw_reg_mtmp_slot_index_set(mtmp_pl, + mlxsw_hwmon_dev->slot_index); + mlxsw_reg_mtmp_sensor_index_set(mtmp_pl, i); + err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(mtmp), + mtmp_pl); + if (err) + return err; + mlxsw_reg_mtmp_mte_set(mtmp_pl, true); + mlxsw_reg_mtmp_mtr_set(mtmp_pl, true); + err = mlxsw_reg_write(mlxsw_hwmon->core, + MLXSW_REG(mtmp), mtmp_pl); + if (err) { + dev_err(mlxsw_hwmon->bus_info->dev, "Failed to setup temp sensor number %d\n", + i); + return err; + } + mlxsw_hwmon_attr_add(mlxsw_hwmon_dev, + MLXSW_HWMON_ATTR_TYPE_TEMP, i, i); + mlxsw_hwmon_attr_add(mlxsw_hwmon_dev, + MLXSW_HWMON_ATTR_TYPE_TEMP_MAX, i, i); + mlxsw_hwmon_attr_add(mlxsw_hwmon_dev, + MLXSW_HWMON_ATTR_TYPE_TEMP_RST, i, i); + } + return 0; +} + +static int mlxsw_hwmon_fans_init(struct mlxsw_hwmon_dev *mlxsw_hwmon_dev) +{ + struct mlxsw_hwmon *mlxsw_hwmon = mlxsw_hwmon_dev->hwmon; + char mfcr_pl[MLXSW_REG_MFCR_LEN] = {0}; + enum mlxsw_reg_mfcr_pwm_frequency freq; + unsigned int type_index; + unsigned int num; + u16 tacho_active; + u8 pwm_active; + int err; + + err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(mfcr), mfcr_pl); + if (err) { + dev_err(mlxsw_hwmon->bus_info->dev, "Failed to get to probe PWMs and Tachometers\n"); + return err; + } + mlxsw_reg_mfcr_unpack(mfcr_pl, &freq, &tacho_active, &pwm_active); + num = 0; + for (type_index = 0; type_index < MLXSW_MFCR_TACHOS_MAX; type_index++) { + if (tacho_active & BIT(type_index)) { + mlxsw_hwmon_attr_add(mlxsw_hwmon_dev, + MLXSW_HWMON_ATTR_TYPE_FAN_RPM, + type_index, num); + mlxsw_hwmon_attr_add(mlxsw_hwmon_dev, + MLXSW_HWMON_ATTR_TYPE_FAN_FAULT, + type_index, num++); + } + } + num = 0; + for (type_index = 0; type_index < MLXSW_MFCR_PWMS_MAX; type_index++) { + if (pwm_active & BIT(type_index)) + mlxsw_hwmon_attr_add(mlxsw_hwmon_dev, + MLXSW_HWMON_ATTR_TYPE_PWM, + type_index, num++); + } + return 0; +} + +static int mlxsw_hwmon_module_init(struct mlxsw_hwmon_dev *mlxsw_hwmon_dev) +{ + struct mlxsw_hwmon *mlxsw_hwmon = mlxsw_hwmon_dev->hwmon; + char mgpir_pl[MLXSW_REG_MGPIR_LEN]; + u8 module_sensor_max; + int i, err; + + mlxsw_reg_mgpir_pack(mgpir_pl, mlxsw_hwmon_dev->slot_index); + err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(mgpir), mgpir_pl); + if (err) + return err; + + mlxsw_reg_mgpir_unpack(mgpir_pl, NULL, NULL, NULL, + &module_sensor_max, NULL); + + /* Add extra attributes for module temperature. Sensor index is + * assigned to sensor_count value, while all indexed before + * sensor_count are already utilized by the sensors connected through + * mtmp register by mlxsw_hwmon_temp_init(). + */ + mlxsw_hwmon_dev->module_sensor_max = mlxsw_hwmon_dev->sensor_count + + module_sensor_max; + for (i = mlxsw_hwmon_dev->sensor_count; + i < mlxsw_hwmon_dev->module_sensor_max; i++) { + mlxsw_hwmon_attr_add(mlxsw_hwmon_dev, + MLXSW_HWMON_ATTR_TYPE_TEMP_MODULE, i, i); + mlxsw_hwmon_attr_add(mlxsw_hwmon_dev, + MLXSW_HWMON_ATTR_TYPE_TEMP_MODULE_FAULT, + i, i); + mlxsw_hwmon_attr_add(mlxsw_hwmon_dev, + MLXSW_HWMON_ATTR_TYPE_TEMP_MODULE_CRIT, i, + i); + mlxsw_hwmon_attr_add(mlxsw_hwmon_dev, + MLXSW_HWMON_ATTR_TYPE_TEMP_MODULE_EMERG, + i, i); + mlxsw_hwmon_attr_add(mlxsw_hwmon_dev, + MLXSW_HWMON_ATTR_TYPE_TEMP_MODULE_LABEL, + i, i); + mlxsw_hwmon_attr_add(mlxsw_hwmon_dev, + MLXSW_HWMON_ATTR_TYPE_TEMP_CRIT_ALARM, + i, i); + mlxsw_hwmon_attr_add(mlxsw_hwmon_dev, + MLXSW_HWMON_ATTR_TYPE_TEMP_EMERGENCY_ALARM, + i, i); + } + + return 0; +} + +static int mlxsw_hwmon_gearbox_init(struct mlxsw_hwmon_dev *mlxsw_hwmon_dev) +{ + struct mlxsw_hwmon *mlxsw_hwmon = mlxsw_hwmon_dev->hwmon; + enum mlxsw_reg_mgpir_device_type device_type; + int index, max_index, sensor_index; + char mgpir_pl[MLXSW_REG_MGPIR_LEN]; + char mtmp_pl[MLXSW_REG_MTMP_LEN]; + u8 gbox_num; + int err; + + mlxsw_reg_mgpir_pack(mgpir_pl, mlxsw_hwmon_dev->slot_index); + err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(mgpir), mgpir_pl); + if (err) + return err; + + mlxsw_reg_mgpir_unpack(mgpir_pl, &gbox_num, &device_type, NULL, NULL, + NULL); + if (device_type != MLXSW_REG_MGPIR_DEVICE_TYPE_GEARBOX_DIE || + !gbox_num) + return 0; + + index = mlxsw_hwmon_dev->module_sensor_max; + max_index = mlxsw_hwmon_dev->module_sensor_max + gbox_num; + while (index < max_index) { + sensor_index = index % mlxsw_hwmon_dev->module_sensor_max + + MLXSW_REG_MTMP_GBOX_INDEX_MIN; + mlxsw_reg_mtmp_pack(mtmp_pl, mlxsw_hwmon_dev->slot_index, + sensor_index, true, true); + err = mlxsw_reg_write(mlxsw_hwmon->core, + MLXSW_REG(mtmp), mtmp_pl); + if (err) { + dev_err(mlxsw_hwmon->bus_info->dev, "Failed to setup temp sensor number %d\n", + sensor_index); + return err; + } + mlxsw_hwmon_attr_add(mlxsw_hwmon_dev, + MLXSW_HWMON_ATTR_TYPE_TEMP, index, index); + mlxsw_hwmon_attr_add(mlxsw_hwmon_dev, + MLXSW_HWMON_ATTR_TYPE_TEMP_MAX, index, + index); + mlxsw_hwmon_attr_add(mlxsw_hwmon_dev, + MLXSW_HWMON_ATTR_TYPE_TEMP_RST, index, + index); + mlxsw_hwmon_attr_add(mlxsw_hwmon_dev, + MLXSW_HWMON_ATTR_TYPE_TEMP_GBOX_LABEL, + index, index); + index++; + } + + return 0; +} + +static void +mlxsw_hwmon_got_active(struct mlxsw_core *mlxsw_core, u8 slot_index, + void *priv) +{ + struct mlxsw_hwmon *hwmon = priv; + struct mlxsw_hwmon_dev *linecard; + struct device *dev; + int err; + + dev = hwmon->bus_info->dev; + linecard = &hwmon->line_cards[slot_index]; + if (linecard->active) + return; + /* For the main board, module sensor indexes start from 1, sensor index + * 0 is used for the ASIC. Use the same numbering for line cards. + */ + linecard->sensor_count = 1; + linecard->slot_index = slot_index; + linecard->hwmon = hwmon; + err = mlxsw_hwmon_module_init(linecard); + if (err) { + dev_err(dev, "Failed to configure hwmon objects for line card modules in slot %d\n", + slot_index); + return; + } + + err = mlxsw_hwmon_gearbox_init(linecard); + if (err) { + dev_err(dev, "Failed to configure hwmon objects for line card gearboxes in slot %d\n", + slot_index); + return; + } + + linecard->groups[0] = &linecard->group; + linecard->group.attrs = linecard->attrs; + sprintf(linecard->name, "%s#%02u", "linecard", slot_index); + linecard->hwmon_dev = + hwmon_device_register_with_groups(dev, linecard->name, + linecard, linecard->groups); + if (IS_ERR(linecard->hwmon_dev)) { + dev_err(dev, "Failed to register hwmon objects for line card in slot %d\n", + slot_index); + return; + } + + linecard->active = true; +} + +static void +mlxsw_hwmon_got_inactive(struct mlxsw_core *mlxsw_core, u8 slot_index, + void *priv) +{ + struct mlxsw_hwmon *hwmon = priv; + struct mlxsw_hwmon_dev *linecard; + + linecard = &hwmon->line_cards[slot_index]; + if (!linecard->active) + return; + linecard->active = false; + hwmon_device_unregister(linecard->hwmon_dev); + /* Reset attributes counter */ + linecard->attrs_count = 0; +} + +static struct mlxsw_linecards_event_ops mlxsw_hwmon_event_ops = { + .got_active = mlxsw_hwmon_got_active, + .got_inactive = mlxsw_hwmon_got_inactive, +}; + +int mlxsw_hwmon_init(struct mlxsw_core *mlxsw_core, + const struct mlxsw_bus_info *mlxsw_bus_info, + struct mlxsw_hwmon **p_hwmon) +{ + char mgpir_pl[MLXSW_REG_MGPIR_LEN]; + struct mlxsw_hwmon *mlxsw_hwmon; + struct device *hwmon_dev; + u8 num_of_slots; + int err; + + mlxsw_reg_mgpir_pack(mgpir_pl, 0); + err = mlxsw_reg_query(mlxsw_core, MLXSW_REG(mgpir), mgpir_pl); + if (err) + return err; + + mlxsw_reg_mgpir_unpack(mgpir_pl, NULL, NULL, NULL, NULL, + &num_of_slots); + + mlxsw_hwmon = kzalloc(struct_size(mlxsw_hwmon, line_cards, + num_of_slots + 1), GFP_KERNEL); + if (!mlxsw_hwmon) + return -ENOMEM; + + mlxsw_hwmon->core = mlxsw_core; + mlxsw_hwmon->bus_info = mlxsw_bus_info; + mlxsw_hwmon->line_cards[0].hwmon = mlxsw_hwmon; + mlxsw_hwmon->line_cards[0].slot_index = 0; + + err = mlxsw_hwmon_temp_init(&mlxsw_hwmon->line_cards[0]); + if (err) + goto err_temp_init; + + err = mlxsw_hwmon_fans_init(&mlxsw_hwmon->line_cards[0]); + if (err) + goto err_fans_init; + + err = mlxsw_hwmon_module_init(&mlxsw_hwmon->line_cards[0]); + if (err) + goto err_temp_module_init; + + err = mlxsw_hwmon_gearbox_init(&mlxsw_hwmon->line_cards[0]); + if (err) + goto err_temp_gearbox_init; + + mlxsw_hwmon->line_cards[0].groups[0] = &mlxsw_hwmon->line_cards[0].group; + mlxsw_hwmon->line_cards[0].group.attrs = mlxsw_hwmon->line_cards[0].attrs; + + hwmon_dev = hwmon_device_register_with_groups(mlxsw_bus_info->dev, + "mlxsw", + &mlxsw_hwmon->line_cards[0], + mlxsw_hwmon->line_cards[0].groups); + if (IS_ERR(hwmon_dev)) { + err = PTR_ERR(hwmon_dev); + goto err_hwmon_register; + } + + err = mlxsw_linecards_event_ops_register(mlxsw_hwmon->core, + &mlxsw_hwmon_event_ops, + mlxsw_hwmon); + if (err) + goto err_linecards_event_ops_register; + + mlxsw_hwmon->line_cards[0].hwmon_dev = hwmon_dev; + mlxsw_hwmon->line_cards[0].active = true; + *p_hwmon = mlxsw_hwmon; + return 0; + +err_linecards_event_ops_register: + hwmon_device_unregister(mlxsw_hwmon->line_cards[0].hwmon_dev); +err_hwmon_register: +err_temp_gearbox_init: +err_temp_module_init: +err_fans_init: +err_temp_init: + kfree(mlxsw_hwmon); + return err; +} + +void mlxsw_hwmon_fini(struct mlxsw_hwmon *mlxsw_hwmon) +{ + mlxsw_hwmon->line_cards[0].active = false; + mlxsw_linecards_event_ops_unregister(mlxsw_hwmon->core, + &mlxsw_hwmon_event_ops, mlxsw_hwmon); + hwmon_device_unregister(mlxsw_hwmon->line_cards[0].hwmon_dev); + kfree(mlxsw_hwmon); +} diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_linecard_dev.c b/drivers/net/ethernet/mellanox/mlxsw/core_linecard_dev.c new file mode 100644 index 000000000..af37e650a --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/core_linecard_dev.c @@ -0,0 +1,183 @@ +// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 +/* Copyright (c) 2022 NVIDIA Corporation and Mellanox Technologies. All rights reserved */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/err.h> +#include <linux/types.h> +#include <linux/auxiliary_bus.h> +#include <linux/idr.h> +#include <linux/gfp.h> +#include <linux/slab.h> +#include <net/devlink.h> +#include "core.h" + +#define MLXSW_LINECARD_DEV_ID_NAME "lc" + +struct mlxsw_linecard_dev { + struct mlxsw_linecard *linecard; +}; + +struct mlxsw_linecard_bdev { + struct auxiliary_device adev; + struct mlxsw_linecard *linecard; + struct mlxsw_linecard_dev *linecard_dev; +}; + +static DEFINE_IDA(mlxsw_linecard_bdev_ida); + +static int mlxsw_linecard_bdev_id_alloc(void) +{ + return ida_alloc(&mlxsw_linecard_bdev_ida, GFP_KERNEL); +} + +static void mlxsw_linecard_bdev_id_free(int id) +{ + ida_free(&mlxsw_linecard_bdev_ida, id); +} + +static void mlxsw_linecard_bdev_release(struct device *device) +{ + struct auxiliary_device *adev = + container_of(device, struct auxiliary_device, dev); + struct mlxsw_linecard_bdev *linecard_bdev = + container_of(adev, struct mlxsw_linecard_bdev, adev); + + mlxsw_linecard_bdev_id_free(adev->id); + kfree(linecard_bdev); +} + +int mlxsw_linecard_bdev_add(struct mlxsw_linecard *linecard) +{ + struct mlxsw_linecard_bdev *linecard_bdev; + int err; + int id; + + id = mlxsw_linecard_bdev_id_alloc(); + if (id < 0) + return id; + + linecard_bdev = kzalloc(sizeof(*linecard_bdev), GFP_KERNEL); + if (!linecard_bdev) { + mlxsw_linecard_bdev_id_free(id); + return -ENOMEM; + } + linecard_bdev->adev.id = id; + linecard_bdev->adev.name = MLXSW_LINECARD_DEV_ID_NAME; + linecard_bdev->adev.dev.release = mlxsw_linecard_bdev_release; + linecard_bdev->adev.dev.parent = linecard->linecards->bus_info->dev; + linecard_bdev->linecard = linecard; + + err = auxiliary_device_init(&linecard_bdev->adev); + if (err) { + mlxsw_linecard_bdev_id_free(id); + kfree(linecard_bdev); + return err; + } + + err = auxiliary_device_add(&linecard_bdev->adev); + if (err) { + auxiliary_device_uninit(&linecard_bdev->adev); + return err; + } + + linecard->bdev = linecard_bdev; + return 0; +} + +void mlxsw_linecard_bdev_del(struct mlxsw_linecard *linecard) +{ + struct mlxsw_linecard_bdev *linecard_bdev = linecard->bdev; + + if (!linecard_bdev) + /* Unprovisioned line cards do not have an auxiliary device. */ + return; + auxiliary_device_delete(&linecard_bdev->adev); + auxiliary_device_uninit(&linecard_bdev->adev); + linecard->bdev = NULL; +} + +static int mlxsw_linecard_dev_devlink_info_get(struct devlink *devlink, + struct devlink_info_req *req, + struct netlink_ext_ack *extack) +{ + struct mlxsw_linecard_dev *linecard_dev = devlink_priv(devlink); + struct mlxsw_linecard *linecard = linecard_dev->linecard; + + return mlxsw_linecard_devlink_info_get(linecard, req, extack); +} + +static int +mlxsw_linecard_dev_devlink_flash_update(struct devlink *devlink, + struct devlink_flash_update_params *params, + struct netlink_ext_ack *extack) +{ + struct mlxsw_linecard_dev *linecard_dev = devlink_priv(devlink); + struct mlxsw_linecard *linecard = linecard_dev->linecard; + + return mlxsw_linecard_flash_update(devlink, linecard, + params->fw, extack); +} + +static const struct devlink_ops mlxsw_linecard_dev_devlink_ops = { + .info_get = mlxsw_linecard_dev_devlink_info_get, + .flash_update = mlxsw_linecard_dev_devlink_flash_update, +}; + +static int mlxsw_linecard_bdev_probe(struct auxiliary_device *adev, + const struct auxiliary_device_id *id) +{ + struct mlxsw_linecard_bdev *linecard_bdev = + container_of(adev, struct mlxsw_linecard_bdev, adev); + struct mlxsw_linecard *linecard = linecard_bdev->linecard; + struct mlxsw_linecard_dev *linecard_dev; + struct devlink *devlink; + + devlink = devlink_alloc(&mlxsw_linecard_dev_devlink_ops, + sizeof(*linecard_dev), &adev->dev); + if (!devlink) + return -ENOMEM; + linecard_dev = devlink_priv(devlink); + linecard_dev->linecard = linecard_bdev->linecard; + linecard_bdev->linecard_dev = linecard_dev; + + devlink_register(devlink); + devlink_linecard_nested_dl_set(linecard->devlink_linecard, devlink); + return 0; +} + +static void mlxsw_linecard_bdev_remove(struct auxiliary_device *adev) +{ + struct mlxsw_linecard_bdev *linecard_bdev = + container_of(adev, struct mlxsw_linecard_bdev, adev); + struct devlink *devlink = priv_to_devlink(linecard_bdev->linecard_dev); + struct mlxsw_linecard *linecard = linecard_bdev->linecard; + + devlink_linecard_nested_dl_set(linecard->devlink_linecard, NULL); + devlink_unregister(devlink); + devlink_free(devlink); +} + +static const struct auxiliary_device_id mlxsw_linecard_bdev_id_table[] = { + { .name = KBUILD_MODNAME "." MLXSW_LINECARD_DEV_ID_NAME }, + {}, +}; + +MODULE_DEVICE_TABLE(auxiliary, mlxsw_linecard_bdev_id_table); + +static struct auxiliary_driver mlxsw_linecard_driver = { + .name = MLXSW_LINECARD_DEV_ID_NAME, + .probe = mlxsw_linecard_bdev_probe, + .remove = mlxsw_linecard_bdev_remove, + .id_table = mlxsw_linecard_bdev_id_table, +}; + +int mlxsw_linecard_driver_register(void) +{ + return auxiliary_driver_register(&mlxsw_linecard_driver); +} + +void mlxsw_linecard_driver_unregister(void) +{ + auxiliary_driver_unregister(&mlxsw_linecard_driver); +} diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_linecards.c b/drivers/net/ethernet/mellanox/mlxsw/core_linecards.c new file mode 100644 index 000000000..83d2dc91b --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/core_linecards.c @@ -0,0 +1,1601 @@ +// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 +/* Copyright (c) 2022 NVIDIA Corporation and Mellanox Technologies. All rights reserved */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/err.h> +#include <linux/types.h> +#include <linux/string.h> +#include <linux/workqueue.h> +#include <linux/gfp.h> +#include <linux/slab.h> +#include <linux/list.h> +#include <linux/vmalloc.h> + +#include "core.h" +#include "../mlxfw/mlxfw.h" + +struct mlxsw_linecard_ini_file { + __le16 size; + union { + u8 data[0]; + struct { + __be16 hw_revision; + __be16 ini_version; + u8 __dontcare[3]; + u8 type; + u8 name[20]; + } format; + }; +}; + +struct mlxsw_linecard_types_info { + struct mlxsw_linecard_ini_file **ini_files; + unsigned int count; + size_t data_size; + char *data; +}; + +#define MLXSW_LINECARD_STATUS_EVENT_TO (10 * MSEC_PER_SEC) + +static void +mlxsw_linecard_status_event_to_schedule(struct mlxsw_linecard *linecard, + enum mlxsw_linecard_status_event_type status_event_type) +{ + cancel_delayed_work_sync(&linecard->status_event_to_dw); + linecard->status_event_type_to = status_event_type; + mlxsw_core_schedule_dw(&linecard->status_event_to_dw, + msecs_to_jiffies(MLXSW_LINECARD_STATUS_EVENT_TO)); +} + +static void +mlxsw_linecard_status_event_done(struct mlxsw_linecard *linecard, + enum mlxsw_linecard_status_event_type status_event_type) +{ + if (linecard->status_event_type_to == status_event_type) + cancel_delayed_work_sync(&linecard->status_event_to_dw); +} + +static const char * +mlxsw_linecard_types_lookup(struct mlxsw_linecards *linecards, u8 card_type) +{ + struct mlxsw_linecard_types_info *types_info; + struct mlxsw_linecard_ini_file *ini_file; + int i; + + types_info = linecards->types_info; + if (!types_info) + return NULL; + for (i = 0; i < types_info->count; i++) { + ini_file = linecards->types_info->ini_files[i]; + if (ini_file->format.type == card_type) + return ini_file->format.name; + } + return NULL; +} + +static const char *mlxsw_linecard_type_name(struct mlxsw_linecard *linecard) +{ + struct mlxsw_core *mlxsw_core = linecard->linecards->mlxsw_core; + char mddq_pl[MLXSW_REG_MDDQ_LEN]; + int err; + + mlxsw_reg_mddq_slot_name_pack(mddq_pl, linecard->slot_index); + err = mlxsw_reg_query(mlxsw_core, MLXSW_REG(mddq), mddq_pl); + if (err) + return ERR_PTR(err); + mlxsw_reg_mddq_slot_name_unpack(mddq_pl, linecard->name); + return linecard->name; +} + +struct mlxsw_linecard_device_fw_info { + struct mlxfw_dev mlxfw_dev; + struct mlxsw_core *mlxsw_core; + struct mlxsw_linecard *linecard; +}; + +static int mlxsw_linecard_device_fw_component_query(struct mlxfw_dev *mlxfw_dev, + u16 component_index, + u32 *p_max_size, + u8 *p_align_bits, + u16 *p_max_write_size) +{ + struct mlxsw_linecard_device_fw_info *info = + container_of(mlxfw_dev, struct mlxsw_linecard_device_fw_info, + mlxfw_dev); + struct mlxsw_linecard *linecard = info->linecard; + struct mlxsw_core *mlxsw_core = info->mlxsw_core; + char mddt_pl[MLXSW_REG_MDDT_LEN]; + char *mcqi_pl; + int err; + + mlxsw_reg_mddt_pack(mddt_pl, linecard->slot_index, + linecard->device.index, + MLXSW_REG_MDDT_METHOD_QUERY, + MLXSW_REG(mcqi), &mcqi_pl); + + mlxsw_reg_mcqi_pack(mcqi_pl, component_index); + err = mlxsw_reg_query(mlxsw_core, MLXSW_REG(mddt), mddt_pl); + if (err) + return err; + mlxsw_reg_mcqi_unpack(mcqi_pl, p_max_size, p_align_bits, + p_max_write_size); + + *p_align_bits = max_t(u8, *p_align_bits, 2); + *p_max_write_size = min_t(u16, *p_max_write_size, + MLXSW_REG_MCDA_MAX_DATA_LEN); + return 0; +} + +static int mlxsw_linecard_device_fw_fsm_lock(struct mlxfw_dev *mlxfw_dev, + u32 *fwhandle) +{ + struct mlxsw_linecard_device_fw_info *info = + container_of(mlxfw_dev, struct mlxsw_linecard_device_fw_info, + mlxfw_dev); + struct mlxsw_linecard *linecard = info->linecard; + struct mlxsw_core *mlxsw_core = info->mlxsw_core; + char mddt_pl[MLXSW_REG_MDDT_LEN]; + u8 control_state; + char *mcc_pl; + int err; + + mlxsw_reg_mddt_pack(mddt_pl, linecard->slot_index, + linecard->device.index, + MLXSW_REG_MDDT_METHOD_QUERY, + MLXSW_REG(mcc), &mcc_pl); + mlxsw_reg_mcc_pack(mcc_pl, 0, 0, 0, 0); + err = mlxsw_reg_query(mlxsw_core, MLXSW_REG(mddt), mddt_pl); + if (err) + return err; + + mlxsw_reg_mcc_unpack(mcc_pl, fwhandle, NULL, &control_state); + if (control_state != MLXFW_FSM_STATE_IDLE) + return -EBUSY; + + mlxsw_reg_mddt_pack(mddt_pl, linecard->slot_index, + linecard->device.index, + MLXSW_REG_MDDT_METHOD_WRITE, + MLXSW_REG(mcc), &mcc_pl); + mlxsw_reg_mcc_pack(mcc_pl, MLXSW_REG_MCC_INSTRUCTION_LOCK_UPDATE_HANDLE, + 0, *fwhandle, 0); + return mlxsw_reg_write(mlxsw_core, MLXSW_REG(mddt), mddt_pl); +} + +static int +mlxsw_linecard_device_fw_fsm_component_update(struct mlxfw_dev *mlxfw_dev, + u32 fwhandle, + u16 component_index, + u32 component_size) +{ + struct mlxsw_linecard_device_fw_info *info = + container_of(mlxfw_dev, struct mlxsw_linecard_device_fw_info, + mlxfw_dev); + struct mlxsw_linecard *linecard = info->linecard; + struct mlxsw_core *mlxsw_core = info->mlxsw_core; + char mddt_pl[MLXSW_REG_MDDT_LEN]; + char *mcc_pl; + + mlxsw_reg_mddt_pack(mddt_pl, linecard->slot_index, + linecard->device.index, + MLXSW_REG_MDDT_METHOD_WRITE, + MLXSW_REG(mcc), &mcc_pl); + mlxsw_reg_mcc_pack(mcc_pl, MLXSW_REG_MCC_INSTRUCTION_UPDATE_COMPONENT, + component_index, fwhandle, component_size); + return mlxsw_reg_write(mlxsw_core, MLXSW_REG(mddt), mddt_pl); +} + +static int +mlxsw_linecard_device_fw_fsm_block_download(struct mlxfw_dev *mlxfw_dev, + u32 fwhandle, u8 *data, + u16 size, u32 offset) +{ + struct mlxsw_linecard_device_fw_info *info = + container_of(mlxfw_dev, struct mlxsw_linecard_device_fw_info, + mlxfw_dev); + struct mlxsw_linecard *linecard = info->linecard; + struct mlxsw_core *mlxsw_core = info->mlxsw_core; + char mddt_pl[MLXSW_REG_MDDT_LEN]; + char *mcda_pl; + + mlxsw_reg_mddt_pack(mddt_pl, linecard->slot_index, + linecard->device.index, + MLXSW_REG_MDDT_METHOD_WRITE, + MLXSW_REG(mcda), &mcda_pl); + mlxsw_reg_mcda_pack(mcda_pl, fwhandle, offset, size, data); + return mlxsw_reg_write(mlxsw_core, MLXSW_REG(mddt), mddt_pl); +} + +static int +mlxsw_linecard_device_fw_fsm_component_verify(struct mlxfw_dev *mlxfw_dev, + u32 fwhandle, u16 component_index) +{ + struct mlxsw_linecard_device_fw_info *info = + container_of(mlxfw_dev, struct mlxsw_linecard_device_fw_info, + mlxfw_dev); + struct mlxsw_linecard *linecard = info->linecard; + struct mlxsw_core *mlxsw_core = info->mlxsw_core; + char mddt_pl[MLXSW_REG_MDDT_LEN]; + char *mcc_pl; + + mlxsw_reg_mddt_pack(mddt_pl, linecard->slot_index, + linecard->device.index, + MLXSW_REG_MDDT_METHOD_WRITE, + MLXSW_REG(mcc), &mcc_pl); + mlxsw_reg_mcc_pack(mcc_pl, MLXSW_REG_MCC_INSTRUCTION_VERIFY_COMPONENT, + component_index, fwhandle, 0); + return mlxsw_reg_write(mlxsw_core, MLXSW_REG(mddt), mddt_pl); +} + +static int mlxsw_linecard_device_fw_fsm_activate(struct mlxfw_dev *mlxfw_dev, + u32 fwhandle) +{ + struct mlxsw_linecard_device_fw_info *info = + container_of(mlxfw_dev, struct mlxsw_linecard_device_fw_info, + mlxfw_dev); + struct mlxsw_linecard *linecard = info->linecard; + struct mlxsw_core *mlxsw_core = info->mlxsw_core; + char mddt_pl[MLXSW_REG_MDDT_LEN]; + char *mcc_pl; + + mlxsw_reg_mddt_pack(mddt_pl, linecard->slot_index, + linecard->device.index, + MLXSW_REG_MDDT_METHOD_WRITE, + MLXSW_REG(mcc), &mcc_pl); + mlxsw_reg_mcc_pack(mcc_pl, MLXSW_REG_MCC_INSTRUCTION_ACTIVATE, + 0, fwhandle, 0); + return mlxsw_reg_write(mlxsw_core, MLXSW_REG(mddt), mddt_pl); +} + +static int +mlxsw_linecard_device_fw_fsm_query_state(struct mlxfw_dev *mlxfw_dev, + u32 fwhandle, + enum mlxfw_fsm_state *fsm_state, + enum mlxfw_fsm_state_err *fsm_state_err) +{ + struct mlxsw_linecard_device_fw_info *info = + container_of(mlxfw_dev, struct mlxsw_linecard_device_fw_info, + mlxfw_dev); + struct mlxsw_linecard *linecard = info->linecard; + struct mlxsw_core *mlxsw_core = info->mlxsw_core; + char mddt_pl[MLXSW_REG_MDDT_LEN]; + u8 control_state; + u8 error_code; + char *mcc_pl; + int err; + + mlxsw_reg_mddt_pack(mddt_pl, linecard->slot_index, + linecard->device.index, + MLXSW_REG_MDDT_METHOD_QUERY, + MLXSW_REG(mcc), &mcc_pl); + mlxsw_reg_mcc_pack(mcc_pl, 0, 0, fwhandle, 0); + err = mlxsw_reg_query(mlxsw_core, MLXSW_REG(mddt), mddt_pl); + if (err) + return err; + + mlxsw_reg_mcc_unpack(mcc_pl, NULL, &error_code, &control_state); + *fsm_state = control_state; + *fsm_state_err = min_t(enum mlxfw_fsm_state_err, error_code, + MLXFW_FSM_STATE_ERR_MAX); + return 0; +} + +static void mlxsw_linecard_device_fw_fsm_cancel(struct mlxfw_dev *mlxfw_dev, + u32 fwhandle) +{ + struct mlxsw_linecard_device_fw_info *info = + container_of(mlxfw_dev, struct mlxsw_linecard_device_fw_info, + mlxfw_dev); + struct mlxsw_linecard *linecard = info->linecard; + struct mlxsw_core *mlxsw_core = info->mlxsw_core; + char mddt_pl[MLXSW_REG_MDDT_LEN]; + char *mcc_pl; + + mlxsw_reg_mddt_pack(mddt_pl, linecard->slot_index, + linecard->device.index, + MLXSW_REG_MDDT_METHOD_WRITE, + MLXSW_REG(mcc), &mcc_pl); + mlxsw_reg_mcc_pack(mcc_pl, MLXSW_REG_MCC_INSTRUCTION_CANCEL, + 0, fwhandle, 0); + mlxsw_reg_write(mlxsw_core, MLXSW_REG(mddt), mddt_pl); +} + +static void mlxsw_linecard_device_fw_fsm_release(struct mlxfw_dev *mlxfw_dev, + u32 fwhandle) +{ + struct mlxsw_linecard_device_fw_info *info = + container_of(mlxfw_dev, struct mlxsw_linecard_device_fw_info, + mlxfw_dev); + struct mlxsw_linecard *linecard = info->linecard; + struct mlxsw_core *mlxsw_core = info->mlxsw_core; + char mddt_pl[MLXSW_REG_MDDT_LEN]; + char *mcc_pl; + + mlxsw_reg_mddt_pack(mddt_pl, linecard->slot_index, + linecard->device.index, + MLXSW_REG_MDDT_METHOD_WRITE, + MLXSW_REG(mcc), &mcc_pl); + mlxsw_reg_mcc_pack(mcc_pl, + MLXSW_REG_MCC_INSTRUCTION_RELEASE_UPDATE_HANDLE, + 0, fwhandle, 0); + mlxsw_reg_write(mlxsw_core, MLXSW_REG(mddt), mddt_pl); +} + +static const struct mlxfw_dev_ops mlxsw_linecard_device_dev_ops = { + .component_query = mlxsw_linecard_device_fw_component_query, + .fsm_lock = mlxsw_linecard_device_fw_fsm_lock, + .fsm_component_update = mlxsw_linecard_device_fw_fsm_component_update, + .fsm_block_download = mlxsw_linecard_device_fw_fsm_block_download, + .fsm_component_verify = mlxsw_linecard_device_fw_fsm_component_verify, + .fsm_activate = mlxsw_linecard_device_fw_fsm_activate, + .fsm_query_state = mlxsw_linecard_device_fw_fsm_query_state, + .fsm_cancel = mlxsw_linecard_device_fw_fsm_cancel, + .fsm_release = mlxsw_linecard_device_fw_fsm_release, +}; + +int mlxsw_linecard_flash_update(struct devlink *linecard_devlink, + struct mlxsw_linecard *linecard, + const struct firmware *firmware, + struct netlink_ext_ack *extack) +{ + struct mlxsw_core *mlxsw_core = linecard->linecards->mlxsw_core; + struct mlxsw_linecard_device_fw_info info = { + .mlxfw_dev = { + .ops = &mlxsw_linecard_device_dev_ops, + .psid = linecard->device.info.psid, + .psid_size = strlen(linecard->device.info.psid), + .devlink = linecard_devlink, + }, + .mlxsw_core = mlxsw_core, + .linecard = linecard, + }; + int err; + + mutex_lock(&linecard->lock); + if (!linecard->active) { + NL_SET_ERR_MSG_MOD(extack, "Only active line cards can be flashed"); + err = -EINVAL; + goto unlock; + } + err = mlxsw_core_fw_flash(mlxsw_core, &info.mlxfw_dev, + firmware, extack); +unlock: + mutex_unlock(&linecard->lock); + return err; +} + +static int mlxsw_linecard_device_psid_get(struct mlxsw_linecard *linecard, + u8 device_index, char *psid) +{ + struct mlxsw_core *mlxsw_core = linecard->linecards->mlxsw_core; + char mddt_pl[MLXSW_REG_MDDT_LEN]; + char *mgir_pl; + int err; + + mlxsw_reg_mddt_pack(mddt_pl, linecard->slot_index, device_index, + MLXSW_REG_MDDT_METHOD_QUERY, + MLXSW_REG(mgir), &mgir_pl); + + mlxsw_reg_mgir_pack(mgir_pl); + err = mlxsw_reg_query(mlxsw_core, MLXSW_REG(mddt), mddt_pl); + if (err) + return err; + + mlxsw_reg_mgir_fw_info_psid_memcpy_from(mgir_pl, psid); + return 0; +} + +static int mlxsw_linecard_device_info_update(struct mlxsw_linecard *linecard) +{ + struct mlxsw_core *mlxsw_core = linecard->linecards->mlxsw_core; + bool flashable_found = false; + u8 msg_seq = 0; + + do { + struct mlxsw_linecard_device_info info; + char mddq_pl[MLXSW_REG_MDDQ_LEN]; + bool flash_owner; + bool data_valid; + u8 device_index; + int err; + + mlxsw_reg_mddq_device_info_pack(mddq_pl, linecard->slot_index, + msg_seq); + err = mlxsw_reg_query(mlxsw_core, MLXSW_REG(mddq), mddq_pl); + if (err) + return err; + mlxsw_reg_mddq_device_info_unpack(mddq_pl, &msg_seq, + &data_valid, &flash_owner, + &device_index, + &info.fw_major, + &info.fw_minor, + &info.fw_sub_minor); + if (!data_valid) + break; + if (!flash_owner) /* We care only about flashable ones. */ + continue; + if (flashable_found) { + dev_warn_once(linecard->linecards->bus_info->dev, "linecard %u: More flashable devices present, exposing only the first one\n", + linecard->slot_index); + return 0; + } + + err = mlxsw_linecard_device_psid_get(linecard, device_index, + info.psid); + if (err) + return err; + + linecard->device.info = info; + linecard->device.index = device_index; + flashable_found = true; + } while (msg_seq); + + return 0; +} + +static void mlxsw_linecard_provision_fail(struct mlxsw_linecard *linecard) +{ + linecard->provisioned = false; + linecard->ready = false; + linecard->active = false; + devlink_linecard_provision_fail(linecard->devlink_linecard); +} + +struct mlxsw_linecards_event_ops_item { + struct list_head list; + const struct mlxsw_linecards_event_ops *event_ops; + void *priv; +}; + +static void +mlxsw_linecard_event_op_call(struct mlxsw_linecard *linecard, + mlxsw_linecards_event_op_t *op, void *priv) +{ + struct mlxsw_core *mlxsw_core = linecard->linecards->mlxsw_core; + + if (!op) + return; + op(mlxsw_core, linecard->slot_index, priv); +} + +static void +mlxsw_linecard_active_ops_call(struct mlxsw_linecard *linecard) +{ + struct mlxsw_linecards *linecards = linecard->linecards; + struct mlxsw_linecards_event_ops_item *item; + + mutex_lock(&linecards->event_ops_list_lock); + list_for_each_entry(item, &linecards->event_ops_list, list) + mlxsw_linecard_event_op_call(linecard, + item->event_ops->got_active, + item->priv); + mutex_unlock(&linecards->event_ops_list_lock); +} + +static void +mlxsw_linecard_inactive_ops_call(struct mlxsw_linecard *linecard) +{ + struct mlxsw_linecards *linecards = linecard->linecards; + struct mlxsw_linecards_event_ops_item *item; + + mutex_lock(&linecards->event_ops_list_lock); + list_for_each_entry(item, &linecards->event_ops_list, list) + mlxsw_linecard_event_op_call(linecard, + item->event_ops->got_inactive, + item->priv); + mutex_unlock(&linecards->event_ops_list_lock); +} + +static void +mlxsw_linecards_event_ops_register_call(struct mlxsw_linecards *linecards, + const struct mlxsw_linecards_event_ops_item *item) +{ + struct mlxsw_linecard *linecard; + int i; + + for (i = 0; i < linecards->count; i++) { + linecard = mlxsw_linecard_get(linecards, i + 1); + mutex_lock(&linecard->lock); + if (linecard->active) + mlxsw_linecard_event_op_call(linecard, + item->event_ops->got_active, + item->priv); + mutex_unlock(&linecard->lock); + } +} + +static void +mlxsw_linecards_event_ops_unregister_call(struct mlxsw_linecards *linecards, + const struct mlxsw_linecards_event_ops_item *item) +{ + struct mlxsw_linecard *linecard; + int i; + + for (i = 0; i < linecards->count; i++) { + linecard = mlxsw_linecard_get(linecards, i + 1); + mutex_lock(&linecard->lock); + if (linecard->active) + mlxsw_linecard_event_op_call(linecard, + item->event_ops->got_inactive, + item->priv); + mutex_unlock(&linecard->lock); + } +} + +int mlxsw_linecards_event_ops_register(struct mlxsw_core *mlxsw_core, + struct mlxsw_linecards_event_ops *ops, + void *priv) +{ + struct mlxsw_linecards *linecards = mlxsw_core_linecards(mlxsw_core); + struct mlxsw_linecards_event_ops_item *item; + + if (!linecards) + return 0; + item = kzalloc(sizeof(*item), GFP_KERNEL); + if (!item) + return -ENOMEM; + item->event_ops = ops; + item->priv = priv; + + mutex_lock(&linecards->event_ops_list_lock); + list_add_tail(&item->list, &linecards->event_ops_list); + mutex_unlock(&linecards->event_ops_list_lock); + mlxsw_linecards_event_ops_register_call(linecards, item); + return 0; +} +EXPORT_SYMBOL(mlxsw_linecards_event_ops_register); + +void mlxsw_linecards_event_ops_unregister(struct mlxsw_core *mlxsw_core, + struct mlxsw_linecards_event_ops *ops, + void *priv) +{ + struct mlxsw_linecards *linecards = mlxsw_core_linecards(mlxsw_core); + struct mlxsw_linecards_event_ops_item *item, *tmp; + bool found = false; + + if (!linecards) + return; + mutex_lock(&linecards->event_ops_list_lock); + list_for_each_entry_safe(item, tmp, &linecards->event_ops_list, list) { + if (item->event_ops == ops && item->priv == priv) { + list_del(&item->list); + found = true; + break; + } + } + mutex_unlock(&linecards->event_ops_list_lock); + + if (!found) + return; + mlxsw_linecards_event_ops_unregister_call(linecards, item); + kfree(item); +} +EXPORT_SYMBOL(mlxsw_linecards_event_ops_unregister); + +int mlxsw_linecard_devlink_info_get(struct mlxsw_linecard *linecard, + struct devlink_info_req *req, + struct netlink_ext_ack *extack) +{ + char buf[32]; + int err; + + mutex_lock(&linecard->lock); + if (WARN_ON(!linecard->provisioned)) { + err = -EOPNOTSUPP; + goto unlock; + } + + sprintf(buf, "%d", linecard->hw_revision); + err = devlink_info_version_fixed_put(req, "hw.revision", buf); + if (err) + goto unlock; + + sprintf(buf, "%d", linecard->ini_version); + err = devlink_info_version_running_put(req, "ini.version", buf); + if (err) + goto unlock; + + if (linecard->active) { + struct mlxsw_linecard_device_info *info = &linecard->device.info; + + err = devlink_info_version_fixed_put(req, + DEVLINK_INFO_VERSION_GENERIC_FW_PSID, + info->psid); + + sprintf(buf, "%u.%u.%u", info->fw_major, info->fw_minor, + info->fw_sub_minor); + err = devlink_info_version_running_put(req, + DEVLINK_INFO_VERSION_GENERIC_FW, + buf); + if (err) + goto unlock; + } + +unlock: + mutex_unlock(&linecard->lock); + return err; +} + +static int +mlxsw_linecard_provision_set(struct mlxsw_linecard *linecard, u8 card_type, + u16 hw_revision, u16 ini_version) +{ + struct mlxsw_linecards *linecards = linecard->linecards; + const char *type; + int err; + + type = mlxsw_linecard_types_lookup(linecards, card_type); + mlxsw_linecard_status_event_done(linecard, + MLXSW_LINECARD_STATUS_EVENT_TYPE_PROVISION); + if (!type) { + /* It is possible for a line card to be provisioned before + * driver initialization. Due to a missing INI bundle file + * or an outdated one, the queried card's type might not + * be recognized by the driver. In this case, try to query + * the card's name from the device. + */ + type = mlxsw_linecard_type_name(linecard); + if (IS_ERR(type)) { + mlxsw_linecard_provision_fail(linecard); + return PTR_ERR(type); + } + } + linecard->provisioned = true; + linecard->hw_revision = hw_revision; + linecard->ini_version = ini_version; + + err = mlxsw_linecard_bdev_add(linecard); + if (err) { + linecard->provisioned = false; + mlxsw_linecard_provision_fail(linecard); + return err; + } + + devlink_linecard_provision_set(linecard->devlink_linecard, type); + return 0; +} + +static void mlxsw_linecard_provision_clear(struct mlxsw_linecard *linecard) +{ + mlxsw_linecard_status_event_done(linecard, + MLXSW_LINECARD_STATUS_EVENT_TYPE_UNPROVISION); + mlxsw_linecard_bdev_del(linecard); + linecard->provisioned = false; + devlink_linecard_provision_clear(linecard->devlink_linecard); +} + +static int mlxsw_linecard_ready_set(struct mlxsw_linecard *linecard) +{ + struct mlxsw_core *mlxsw_core = linecard->linecards->mlxsw_core; + char mddc_pl[MLXSW_REG_MDDC_LEN]; + int err; + + err = mlxsw_linecard_device_info_update(linecard); + if (err) + return err; + + mlxsw_reg_mddc_pack(mddc_pl, linecard->slot_index, false, true); + err = mlxsw_reg_write(mlxsw_core, MLXSW_REG(mddc), mddc_pl); + if (err) + return err; + linecard->ready = true; + return 0; +} + +static int mlxsw_linecard_ready_clear(struct mlxsw_linecard *linecard) +{ + struct mlxsw_core *mlxsw_core = linecard->linecards->mlxsw_core; + char mddc_pl[MLXSW_REG_MDDC_LEN]; + int err; + + mlxsw_reg_mddc_pack(mddc_pl, linecard->slot_index, false, false); + err = mlxsw_reg_write(mlxsw_core, MLXSW_REG(mddc), mddc_pl); + if (err) + return err; + linecard->ready = false; + return 0; +} + +static void mlxsw_linecard_active_set(struct mlxsw_linecard *linecard) +{ + mlxsw_linecard_active_ops_call(linecard); + linecard->active = true; + devlink_linecard_activate(linecard->devlink_linecard); +} + +static void mlxsw_linecard_active_clear(struct mlxsw_linecard *linecard) +{ + mlxsw_linecard_inactive_ops_call(linecard); + linecard->active = false; + devlink_linecard_deactivate(linecard->devlink_linecard); +} + +static int mlxsw_linecard_status_process(struct mlxsw_linecards *linecards, + struct mlxsw_linecard *linecard, + const char *mddq_pl) +{ + enum mlxsw_reg_mddq_slot_info_ready ready; + bool provisioned, sr_valid, active; + u16 ini_version, hw_revision; + u8 slot_index, card_type; + int err = 0; + + mlxsw_reg_mddq_slot_info_unpack(mddq_pl, &slot_index, &provisioned, + &sr_valid, &ready, &active, + &hw_revision, &ini_version, + &card_type); + + if (linecard) { + if (WARN_ON(slot_index != linecard->slot_index)) + return -EINVAL; + } else { + if (WARN_ON(slot_index > linecards->count)) + return -EINVAL; + linecard = mlxsw_linecard_get(linecards, slot_index); + } + + mutex_lock(&linecard->lock); + + if (provisioned && linecard->provisioned != provisioned) { + err = mlxsw_linecard_provision_set(linecard, card_type, + hw_revision, ini_version); + if (err) + goto out; + } + + if (ready == MLXSW_REG_MDDQ_SLOT_INFO_READY_READY && !linecard->ready) { + err = mlxsw_linecard_ready_set(linecard); + if (err) + goto out; + } + + if (active && linecard->active != active) + mlxsw_linecard_active_set(linecard); + + if (!active && linecard->active != active) + mlxsw_linecard_active_clear(linecard); + + if (ready != MLXSW_REG_MDDQ_SLOT_INFO_READY_READY && + linecard->ready) { + err = mlxsw_linecard_ready_clear(linecard); + if (err) + goto out; + } + + if (!provisioned && linecard->provisioned != provisioned) + mlxsw_linecard_provision_clear(linecard); + +out: + mutex_unlock(&linecard->lock); + return err; +} + +static int mlxsw_linecard_status_get_and_process(struct mlxsw_core *mlxsw_core, + struct mlxsw_linecards *linecards, + struct mlxsw_linecard *linecard) +{ + char mddq_pl[MLXSW_REG_MDDQ_LEN]; + int err; + + mlxsw_reg_mddq_slot_info_pack(mddq_pl, linecard->slot_index, false); + err = mlxsw_reg_query(mlxsw_core, MLXSW_REG(mddq), mddq_pl); + if (err) + return err; + + return mlxsw_linecard_status_process(linecards, linecard, mddq_pl); +} + +static void mlxsw_linecards_irq_event_handler(struct mlxsw_core *mlxsw_core) +{ + struct mlxsw_linecards *linecards = mlxsw_core_linecards(mlxsw_core); + int i; + + /* Handle change of line card active state. */ + for (i = 0; i < linecards->count; i++) { + struct mlxsw_linecard *linecard = mlxsw_linecard_get(linecards, + i + 1); + + mlxsw_linecard_status_get_and_process(mlxsw_core, linecards, + linecard); + } +} + +static const char * const mlxsw_linecard_status_event_type_name[] = { + [MLXSW_LINECARD_STATUS_EVENT_TYPE_PROVISION] = "provision", + [MLXSW_LINECARD_STATUS_EVENT_TYPE_UNPROVISION] = "unprovision", +}; + +static void mlxsw_linecard_status_event_to_work(struct work_struct *work) +{ + struct mlxsw_linecard *linecard = + container_of(work, struct mlxsw_linecard, + status_event_to_dw.work); + + mutex_lock(&linecard->lock); + dev_err(linecard->linecards->bus_info->dev, "linecard %u: Timeout reached waiting on %s status event", + linecard->slot_index, + mlxsw_linecard_status_event_type_name[linecard->status_event_type_to]); + mlxsw_linecard_provision_fail(linecard); + mutex_unlock(&linecard->lock); +} + +static int __mlxsw_linecard_fix_fsm_state(struct mlxsw_linecard *linecard) +{ + dev_info(linecard->linecards->bus_info->dev, "linecard %u: Clearing FSM state error", + linecard->slot_index); + mlxsw_reg_mbct_pack(linecard->mbct_pl, linecard->slot_index, + MLXSW_REG_MBCT_OP_CLEAR_ERRORS, false); + return mlxsw_reg_write(linecard->linecards->mlxsw_core, + MLXSW_REG(mbct), linecard->mbct_pl); +} + +static int mlxsw_linecard_fix_fsm_state(struct mlxsw_linecard *linecard, + enum mlxsw_reg_mbct_fsm_state fsm_state) +{ + if (fsm_state != MLXSW_REG_MBCT_FSM_STATE_ERROR) + return 0; + return __mlxsw_linecard_fix_fsm_state(linecard); +} + +static int +mlxsw_linecard_query_ini_status(struct mlxsw_linecard *linecard, + enum mlxsw_reg_mbct_status *status, + enum mlxsw_reg_mbct_fsm_state *fsm_state, + struct netlink_ext_ack *extack) +{ + int err; + + mlxsw_reg_mbct_pack(linecard->mbct_pl, linecard->slot_index, + MLXSW_REG_MBCT_OP_QUERY_STATUS, false); + err = mlxsw_reg_query(linecard->linecards->mlxsw_core, MLXSW_REG(mbct), + linecard->mbct_pl); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Failed to query linecard INI status"); + return err; + } + mlxsw_reg_mbct_unpack(linecard->mbct_pl, NULL, status, fsm_state); + return err; +} + +static int +mlxsw_linecard_ini_transfer(struct mlxsw_core *mlxsw_core, + struct mlxsw_linecard *linecard, + const struct mlxsw_linecard_ini_file *ini_file, + struct netlink_ext_ack *extack) +{ + enum mlxsw_reg_mbct_fsm_state fsm_state; + enum mlxsw_reg_mbct_status status; + size_t size_left; + const u8 *data; + int err; + + size_left = le16_to_cpu(ini_file->size); + data = ini_file->data; + while (size_left) { + size_t data_size = MLXSW_REG_MBCT_DATA_LEN; + bool is_last = false; + + if (size_left <= MLXSW_REG_MBCT_DATA_LEN) { + data_size = size_left; + is_last = true; + } + + mlxsw_reg_mbct_pack(linecard->mbct_pl, linecard->slot_index, + MLXSW_REG_MBCT_OP_DATA_TRANSFER, false); + mlxsw_reg_mbct_dt_pack(linecard->mbct_pl, data_size, + is_last, data); + err = mlxsw_reg_write(mlxsw_core, MLXSW_REG(mbct), + linecard->mbct_pl); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Failed to issue linecard INI data transfer"); + return err; + } + mlxsw_reg_mbct_unpack(linecard->mbct_pl, NULL, + &status, &fsm_state); + if ((!is_last && status != MLXSW_REG_MBCT_STATUS_PART_DATA) || + (is_last && status != MLXSW_REG_MBCT_STATUS_LAST_DATA)) { + NL_SET_ERR_MSG_MOD(extack, "Failed to transfer linecard INI data"); + mlxsw_linecard_fix_fsm_state(linecard, fsm_state); + return -EINVAL; + } + size_left -= data_size; + data += data_size; + } + + return 0; +} + +static int +mlxsw_linecard_ini_erase(struct mlxsw_core *mlxsw_core, + struct mlxsw_linecard *linecard, + struct netlink_ext_ack *extack) +{ + enum mlxsw_reg_mbct_fsm_state fsm_state; + enum mlxsw_reg_mbct_status status; + int err; + + mlxsw_reg_mbct_pack(linecard->mbct_pl, linecard->slot_index, + MLXSW_REG_MBCT_OP_ERASE_INI_IMAGE, false); + err = mlxsw_reg_write(mlxsw_core, MLXSW_REG(mbct), + linecard->mbct_pl); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Failed to issue linecard INI erase"); + return err; + } + mlxsw_reg_mbct_unpack(linecard->mbct_pl, NULL, &status, &fsm_state); + switch (status) { + case MLXSW_REG_MBCT_STATUS_ERASE_COMPLETE: + break; + default: + /* Should not happen */ + fallthrough; + case MLXSW_REG_MBCT_STATUS_ERASE_FAILED: + NL_SET_ERR_MSG_MOD(extack, "Failed to erase linecard INI"); + goto fix_fsm_err_out; + case MLXSW_REG_MBCT_STATUS_ERROR_INI_IN_USE: + NL_SET_ERR_MSG_MOD(extack, "Failed to erase linecard INI while being used"); + goto fix_fsm_err_out; + } + return 0; + +fix_fsm_err_out: + mlxsw_linecard_fix_fsm_state(linecard, fsm_state); + return -EINVAL; +} + +static void mlxsw_linecard_bct_process(struct mlxsw_core *mlxsw_core, + const char *mbct_pl) +{ + struct mlxsw_linecards *linecards = mlxsw_core_linecards(mlxsw_core); + enum mlxsw_reg_mbct_fsm_state fsm_state; + enum mlxsw_reg_mbct_status status; + struct mlxsw_linecard *linecard; + u8 slot_index; + + mlxsw_reg_mbct_unpack(mbct_pl, &slot_index, &status, &fsm_state); + if (WARN_ON(slot_index > linecards->count)) + return; + linecard = mlxsw_linecard_get(linecards, slot_index); + mutex_lock(&linecard->lock); + if (status == MLXSW_REG_MBCT_STATUS_ACTIVATION_FAILED) { + dev_err(linecards->bus_info->dev, "linecard %u: Failed to activate INI", + linecard->slot_index); + goto fix_fsm_out; + } + mutex_unlock(&linecard->lock); + return; + +fix_fsm_out: + mlxsw_linecard_fix_fsm_state(linecard, fsm_state); + mlxsw_linecard_provision_fail(linecard); + mutex_unlock(&linecard->lock); +} + +static int +mlxsw_linecard_ini_activate(struct mlxsw_core *mlxsw_core, + struct mlxsw_linecard *linecard, + struct netlink_ext_ack *extack) +{ + enum mlxsw_reg_mbct_fsm_state fsm_state; + enum mlxsw_reg_mbct_status status; + int err; + + mlxsw_reg_mbct_pack(linecard->mbct_pl, linecard->slot_index, + MLXSW_REG_MBCT_OP_ACTIVATE, true); + err = mlxsw_reg_write(mlxsw_core, MLXSW_REG(mbct), linecard->mbct_pl); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Failed to issue linecard INI activation"); + return err; + } + mlxsw_reg_mbct_unpack(linecard->mbct_pl, NULL, &status, &fsm_state); + if (status == MLXSW_REG_MBCT_STATUS_ACTIVATION_FAILED) { + NL_SET_ERR_MSG_MOD(extack, "Failed to activate linecard INI"); + goto fix_fsm_err_out; + } + + return 0; + +fix_fsm_err_out: + mlxsw_linecard_fix_fsm_state(linecard, fsm_state); + return -EINVAL; +} + +#define MLXSW_LINECARD_INI_WAIT_RETRIES 10 +#define MLXSW_LINECARD_INI_WAIT_MS 500 + +static int +mlxsw_linecard_ini_in_use_wait(struct mlxsw_core *mlxsw_core, + struct mlxsw_linecard *linecard, + struct netlink_ext_ack *extack) +{ + enum mlxsw_reg_mbct_fsm_state fsm_state; + enum mlxsw_reg_mbct_status status; + unsigned int ini_wait_retries = 0; + int err; + +query_ini_status: + err = mlxsw_linecard_query_ini_status(linecard, &status, + &fsm_state, extack); + if (err) + return err; + + switch (fsm_state) { + case MLXSW_REG_MBCT_FSM_STATE_INI_IN_USE: + if (ini_wait_retries++ > MLXSW_LINECARD_INI_WAIT_RETRIES) { + NL_SET_ERR_MSG_MOD(extack, "Failed to wait for linecard INI to be unused"); + return -EINVAL; + } + mdelay(MLXSW_LINECARD_INI_WAIT_MS); + goto query_ini_status; + default: + break; + } + return 0; +} + +static bool mlxsw_linecard_port_selector(void *priv, u16 local_port) +{ + struct mlxsw_linecard *linecard = priv; + struct mlxsw_core *mlxsw_core; + + mlxsw_core = linecard->linecards->mlxsw_core; + return linecard == mlxsw_core_port_linecard_get(mlxsw_core, local_port); +} + +static int mlxsw_linecard_provision(struct devlink_linecard *devlink_linecard, + void *priv, const char *type, + const void *type_priv, + struct netlink_ext_ack *extack) +{ + const struct mlxsw_linecard_ini_file *ini_file = type_priv; + struct mlxsw_linecard *linecard = priv; + struct mlxsw_core *mlxsw_core; + int err; + + mutex_lock(&linecard->lock); + + mlxsw_core = linecard->linecards->mlxsw_core; + + err = mlxsw_linecard_ini_erase(mlxsw_core, linecard, extack); + if (err) + goto err_out; + + err = mlxsw_linecard_ini_transfer(mlxsw_core, linecard, + ini_file, extack); + if (err) + goto err_out; + + mlxsw_linecard_status_event_to_schedule(linecard, + MLXSW_LINECARD_STATUS_EVENT_TYPE_PROVISION); + err = mlxsw_linecard_ini_activate(mlxsw_core, linecard, extack); + if (err) + goto err_out; + + goto out; + +err_out: + mlxsw_linecard_provision_fail(linecard); +out: + mutex_unlock(&linecard->lock); + return err; +} + +static int mlxsw_linecard_unprovision(struct devlink_linecard *devlink_linecard, + void *priv, + struct netlink_ext_ack *extack) +{ + struct mlxsw_linecard *linecard = priv; + struct mlxsw_core *mlxsw_core; + int err; + + mutex_lock(&linecard->lock); + + mlxsw_core = linecard->linecards->mlxsw_core; + + mlxsw_core_ports_remove_selected(mlxsw_core, + mlxsw_linecard_port_selector, + linecard); + + err = mlxsw_linecard_ini_in_use_wait(mlxsw_core, linecard, extack); + if (err) + goto err_out; + + mlxsw_linecard_status_event_to_schedule(linecard, + MLXSW_LINECARD_STATUS_EVENT_TYPE_UNPROVISION); + err = mlxsw_linecard_ini_erase(mlxsw_core, linecard, extack); + if (err) + goto err_out; + + goto out; + +err_out: + mlxsw_linecard_provision_fail(linecard); +out: + mutex_unlock(&linecard->lock); + return err; +} + +static bool mlxsw_linecard_same_provision(struct devlink_linecard *devlink_linecard, + void *priv, const char *type, + const void *type_priv) +{ + const struct mlxsw_linecard_ini_file *ini_file = type_priv; + struct mlxsw_linecard *linecard = priv; + bool ret; + + mutex_lock(&linecard->lock); + ret = linecard->hw_revision == be16_to_cpu(ini_file->format.hw_revision) && + linecard->ini_version == be16_to_cpu(ini_file->format.ini_version); + mutex_unlock(&linecard->lock); + return ret; +} + +static unsigned int +mlxsw_linecard_types_count(struct devlink_linecard *devlink_linecard, + void *priv) +{ + struct mlxsw_linecard *linecard = priv; + + return linecard->linecards->types_info ? + linecard->linecards->types_info->count : 0; +} + +static void mlxsw_linecard_types_get(struct devlink_linecard *devlink_linecard, + void *priv, unsigned int index, + const char **type, const void **type_priv) +{ + struct mlxsw_linecard_types_info *types_info; + struct mlxsw_linecard_ini_file *ini_file; + struct mlxsw_linecard *linecard = priv; + + types_info = linecard->linecards->types_info; + if (WARN_ON_ONCE(!types_info)) + return; + ini_file = types_info->ini_files[index]; + *type = ini_file->format.name; + *type_priv = ini_file; +} + +static const struct devlink_linecard_ops mlxsw_linecard_ops = { + .provision = mlxsw_linecard_provision, + .unprovision = mlxsw_linecard_unprovision, + .same_provision = mlxsw_linecard_same_provision, + .types_count = mlxsw_linecard_types_count, + .types_get = mlxsw_linecard_types_get, +}; + +struct mlxsw_linecard_status_event { + struct mlxsw_core *mlxsw_core; + char mddq_pl[MLXSW_REG_MDDQ_LEN]; + struct work_struct work; +}; + +static void mlxsw_linecard_status_event_work(struct work_struct *work) +{ + struct mlxsw_linecard_status_event *event; + struct mlxsw_linecards *linecards; + struct mlxsw_core *mlxsw_core; + + event = container_of(work, struct mlxsw_linecard_status_event, work); + mlxsw_core = event->mlxsw_core; + linecards = mlxsw_core_linecards(mlxsw_core); + mlxsw_linecard_status_process(linecards, NULL, event->mddq_pl); + kfree(event); +} + +static void +mlxsw_linecard_status_listener_func(const struct mlxsw_reg_info *reg, + char *mddq_pl, void *priv) +{ + struct mlxsw_linecard_status_event *event; + struct mlxsw_core *mlxsw_core = priv; + + event = kmalloc(sizeof(*event), GFP_ATOMIC); + if (!event) + return; + event->mlxsw_core = mlxsw_core; + memcpy(event->mddq_pl, mddq_pl, sizeof(event->mddq_pl)); + INIT_WORK(&event->work, mlxsw_linecard_status_event_work); + mlxsw_core_schedule_work(&event->work); +} + +struct mlxsw_linecard_bct_event { + struct mlxsw_core *mlxsw_core; + char mbct_pl[MLXSW_REG_MBCT_LEN]; + struct work_struct work; +}; + +static void mlxsw_linecard_bct_event_work(struct work_struct *work) +{ + struct mlxsw_linecard_bct_event *event; + struct mlxsw_core *mlxsw_core; + + event = container_of(work, struct mlxsw_linecard_bct_event, work); + mlxsw_core = event->mlxsw_core; + mlxsw_linecard_bct_process(mlxsw_core, event->mbct_pl); + kfree(event); +} + +static void +mlxsw_linecard_bct_listener_func(const struct mlxsw_reg_info *reg, + char *mbct_pl, void *priv) +{ + struct mlxsw_linecard_bct_event *event; + struct mlxsw_core *mlxsw_core = priv; + + event = kmalloc(sizeof(*event), GFP_ATOMIC); + if (!event) + return; + event->mlxsw_core = mlxsw_core; + memcpy(event->mbct_pl, mbct_pl, sizeof(event->mbct_pl)); + INIT_WORK(&event->work, mlxsw_linecard_bct_event_work); + mlxsw_core_schedule_work(&event->work); +} + +static const struct mlxsw_listener mlxsw_linecard_listener[] = { + MLXSW_CORE_EVENTL(mlxsw_linecard_status_listener_func, DSDSC), + MLXSW_CORE_EVENTL(mlxsw_linecard_bct_listener_func, BCTOE), +}; + +static int mlxsw_linecard_event_delivery_set(struct mlxsw_core *mlxsw_core, + struct mlxsw_linecard *linecard, + bool enable) +{ + char mddq_pl[MLXSW_REG_MDDQ_LEN]; + + mlxsw_reg_mddq_slot_info_pack(mddq_pl, linecard->slot_index, enable); + return mlxsw_reg_write(mlxsw_core, MLXSW_REG(mddq), mddq_pl); +} + +static int mlxsw_linecard_init(struct mlxsw_core *mlxsw_core, + struct mlxsw_linecards *linecards, + u8 slot_index) +{ + struct devlink_linecard *devlink_linecard; + struct mlxsw_linecard *linecard; + + linecard = mlxsw_linecard_get(linecards, slot_index); + linecard->slot_index = slot_index; + linecard->linecards = linecards; + mutex_init(&linecard->lock); + + devlink_linecard = devlink_linecard_create(priv_to_devlink(mlxsw_core), + slot_index, &mlxsw_linecard_ops, + linecard); + if (IS_ERR(devlink_linecard)) + return PTR_ERR(devlink_linecard); + + linecard->devlink_linecard = devlink_linecard; + INIT_DELAYED_WORK(&linecard->status_event_to_dw, + &mlxsw_linecard_status_event_to_work); + + return 0; +} + +static void mlxsw_linecard_fini(struct mlxsw_core *mlxsw_core, + struct mlxsw_linecards *linecards, + u8 slot_index) +{ + struct mlxsw_linecard *linecard; + + linecard = mlxsw_linecard_get(linecards, slot_index); + cancel_delayed_work_sync(&linecard->status_event_to_dw); + /* Make sure all scheduled events are processed */ + mlxsw_core_flush_owq(); + if (linecard->active) + mlxsw_linecard_active_clear(linecard); + mlxsw_linecard_bdev_del(linecard); + devlink_linecard_destroy(linecard->devlink_linecard); + mutex_destroy(&linecard->lock); +} + +static int +mlxsw_linecard_event_delivery_init(struct mlxsw_core *mlxsw_core, + struct mlxsw_linecards *linecards, + u8 slot_index) +{ + struct mlxsw_linecard *linecard; + int err; + + linecard = mlxsw_linecard_get(linecards, slot_index); + err = mlxsw_linecard_event_delivery_set(mlxsw_core, linecard, true); + if (err) + return err; + + err = mlxsw_linecard_status_get_and_process(mlxsw_core, linecards, + linecard); + if (err) + goto err_status_get_and_process; + + return 0; + +err_status_get_and_process: + mlxsw_linecard_event_delivery_set(mlxsw_core, linecard, false); + return err; +} + +static void +mlxsw_linecard_event_delivery_fini(struct mlxsw_core *mlxsw_core, + struct mlxsw_linecards *linecards, + u8 slot_index) +{ + struct mlxsw_linecard *linecard; + + linecard = mlxsw_linecard_get(linecards, slot_index); + mlxsw_linecard_event_delivery_set(mlxsw_core, linecard, false); +} + +/* LINECARDS INI BUNDLE FILE + * +----------------------------------+ + * | MAGIC ("NVLCINI+") | + * +----------------------------------+ +--------------------+ + * | INI 0 +---> | __le16 size | + * +----------------------------------+ | __be16 hw_revision | + * | INI 1 | | __be16 ini_version | + * +----------------------------------+ | u8 __dontcare[3] | + * | ... | | u8 type | + * +----------------------------------+ | u8 name[20] | + * | INI N | | ... | + * +----------------------------------+ +--------------------+ + */ + +#define MLXSW_LINECARDS_INI_BUNDLE_MAGIC "NVLCINI+" + +static int +mlxsw_linecard_types_file_validate(struct mlxsw_linecards *linecards, + struct mlxsw_linecard_types_info *types_info) +{ + size_t magic_size = strlen(MLXSW_LINECARDS_INI_BUNDLE_MAGIC); + struct mlxsw_linecard_ini_file *ini_file; + size_t size = types_info->data_size; + const u8 *data = types_info->data; + unsigned int count = 0; + u16 ini_file_size; + + if (size < magic_size) { + dev_warn(linecards->bus_info->dev, "Invalid linecards INIs file size, smaller than magic size\n"); + return -EINVAL; + } + if (memcmp(data, MLXSW_LINECARDS_INI_BUNDLE_MAGIC, magic_size)) { + dev_warn(linecards->bus_info->dev, "Invalid linecards INIs file magic pattern\n"); + return -EINVAL; + } + + data += magic_size; + size -= magic_size; + + while (size > 0) { + if (size < sizeof(*ini_file)) { + dev_warn(linecards->bus_info->dev, "Linecards INIs file contains INI which is smaller than bare minimum\n"); + return -EINVAL; + } + ini_file = (struct mlxsw_linecard_ini_file *) data; + ini_file_size = le16_to_cpu(ini_file->size); + if (ini_file_size + sizeof(__le16) > size) { + dev_warn(linecards->bus_info->dev, "Linecards INIs file appears to be truncated\n"); + return -EINVAL; + } + if (ini_file_size % 4) { + dev_warn(linecards->bus_info->dev, "Linecards INIs file contains INI with invalid size\n"); + return -EINVAL; + } + data += ini_file_size + sizeof(__le16); + size -= ini_file_size + sizeof(__le16); + count++; + } + if (!count) { + dev_warn(linecards->bus_info->dev, "Linecards INIs file does not contain any INI\n"); + return -EINVAL; + } + types_info->count = count; + return 0; +} + +static void +mlxsw_linecard_types_file_parse(struct mlxsw_linecard_types_info *types_info) +{ + size_t magic_size = strlen(MLXSW_LINECARDS_INI_BUNDLE_MAGIC); + size_t size = types_info->data_size - magic_size; + const u8 *data = types_info->data + magic_size; + struct mlxsw_linecard_ini_file *ini_file; + unsigned int count = 0; + u16 ini_file_size; + int i; + + while (size) { + ini_file = (struct mlxsw_linecard_ini_file *) data; + ini_file_size = le16_to_cpu(ini_file->size); + for (i = 0; i < ini_file_size / 4; i++) { + u32 *val = &((u32 *) ini_file->data)[i]; + + *val = swab32(*val); + } + types_info->ini_files[count] = ini_file; + data += ini_file_size + sizeof(__le16); + size -= ini_file_size + sizeof(__le16); + count++; + } +} + +#define MLXSW_LINECARDS_INI_BUNDLE_FILENAME_FMT \ + "mellanox/lc_ini_bundle_%u_%u.bin" +#define MLXSW_LINECARDS_INI_BUNDLE_FILENAME_LEN \ + (sizeof(MLXSW_LINECARDS_INI_BUNDLE_FILENAME_FMT) + 4) + +static int mlxsw_linecard_types_init(struct mlxsw_core *mlxsw_core, + struct mlxsw_linecards *linecards) +{ + const struct mlxsw_fw_rev *rev = &linecards->bus_info->fw_rev; + char filename[MLXSW_LINECARDS_INI_BUNDLE_FILENAME_LEN]; + struct mlxsw_linecard_types_info *types_info; + const struct firmware *firmware; + int err; + + err = snprintf(filename, sizeof(filename), + MLXSW_LINECARDS_INI_BUNDLE_FILENAME_FMT, + rev->minor, rev->subminor); + WARN_ON(err >= sizeof(filename)); + + err = request_firmware_direct(&firmware, filename, + linecards->bus_info->dev); + if (err) { + dev_warn(linecards->bus_info->dev, "Could not request linecards INI file \"%s\", provisioning will not be possible\n", + filename); + return 0; + } + + types_info = kzalloc(sizeof(*types_info), GFP_KERNEL); + if (!types_info) { + release_firmware(firmware); + return -ENOMEM; + } + linecards->types_info = types_info; + + types_info->data_size = firmware->size; + types_info->data = vmalloc(types_info->data_size); + if (!types_info->data) { + err = -ENOMEM; + release_firmware(firmware); + goto err_data_alloc; + } + memcpy(types_info->data, firmware->data, types_info->data_size); + release_firmware(firmware); + + err = mlxsw_linecard_types_file_validate(linecards, types_info); + if (err) { + err = 0; + goto err_type_file_file_validate; + } + + types_info->ini_files = kmalloc_array(types_info->count, + sizeof(struct mlxsw_linecard_ini_file *), + GFP_KERNEL); + if (!types_info->ini_files) { + err = -ENOMEM; + goto err_ini_files_alloc; + } + + mlxsw_linecard_types_file_parse(types_info); + + return 0; + +err_ini_files_alloc: +err_type_file_file_validate: + vfree(types_info->data); +err_data_alloc: + kfree(types_info); + return err; +} + +static void mlxsw_linecard_types_fini(struct mlxsw_linecards *linecards) +{ + struct mlxsw_linecard_types_info *types_info = linecards->types_info; + + if (!types_info) + return; + kfree(types_info->ini_files); + vfree(types_info->data); + kfree(types_info); +} + +int mlxsw_linecards_init(struct mlxsw_core *mlxsw_core, + const struct mlxsw_bus_info *bus_info) +{ + char mgpir_pl[MLXSW_REG_MGPIR_LEN]; + struct mlxsw_linecards *linecards; + u8 slot_count; + int err; + int i; + + mlxsw_reg_mgpir_pack(mgpir_pl, 0); + err = mlxsw_reg_query(mlxsw_core, MLXSW_REG(mgpir), mgpir_pl); + if (err) + return err; + + mlxsw_reg_mgpir_unpack(mgpir_pl, NULL, NULL, NULL, + NULL, &slot_count); + if (!slot_count) + return 0; + + linecards = vzalloc(struct_size(linecards, linecards, slot_count)); + if (!linecards) + return -ENOMEM; + linecards->count = slot_count; + linecards->mlxsw_core = mlxsw_core; + linecards->bus_info = bus_info; + INIT_LIST_HEAD(&linecards->event_ops_list); + mutex_init(&linecards->event_ops_list_lock); + + err = mlxsw_linecard_types_init(mlxsw_core, linecards); + if (err) + goto err_types_init; + + err = mlxsw_core_traps_register(mlxsw_core, mlxsw_linecard_listener, + ARRAY_SIZE(mlxsw_linecard_listener), + mlxsw_core); + if (err) + goto err_traps_register; + + err = mlxsw_core_irq_event_handler_register(mlxsw_core, + mlxsw_linecards_irq_event_handler); + if (err) + goto err_irq_event_handler_register; + + mlxsw_core_linecards_set(mlxsw_core, linecards); + + for (i = 0; i < linecards->count; i++) { + err = mlxsw_linecard_init(mlxsw_core, linecards, i + 1); + if (err) + goto err_linecard_init; + } + + for (i = 0; i < linecards->count; i++) { + err = mlxsw_linecard_event_delivery_init(mlxsw_core, linecards, + i + 1); + if (err) + goto err_linecard_event_delivery_init; + } + + return 0; + +err_linecard_event_delivery_init: + for (i--; i >= 0; i--) + mlxsw_linecard_event_delivery_fini(mlxsw_core, linecards, i + 1); + i = linecards->count; +err_linecard_init: + for (i--; i >= 0; i--) + mlxsw_linecard_fini(mlxsw_core, linecards, i + 1); + mlxsw_core_irq_event_handler_unregister(mlxsw_core, + mlxsw_linecards_irq_event_handler); +err_irq_event_handler_register: + mlxsw_core_traps_unregister(mlxsw_core, mlxsw_linecard_listener, + ARRAY_SIZE(mlxsw_linecard_listener), + mlxsw_core); +err_traps_register: + mlxsw_linecard_types_fini(linecards); +err_types_init: + vfree(linecards); + return err; +} + +void mlxsw_linecards_fini(struct mlxsw_core *mlxsw_core) +{ + struct mlxsw_linecards *linecards = mlxsw_core_linecards(mlxsw_core); + int i; + + if (!linecards) + return; + for (i = 0; i < linecards->count; i++) + mlxsw_linecard_event_delivery_fini(mlxsw_core, linecards, i + 1); + for (i = 0; i < linecards->count; i++) + mlxsw_linecard_fini(mlxsw_core, linecards, i + 1); + mlxsw_core_irq_event_handler_unregister(mlxsw_core, + mlxsw_linecards_irq_event_handler); + mlxsw_core_traps_unregister(mlxsw_core, mlxsw_linecard_listener, + ARRAY_SIZE(mlxsw_linecard_listener), + mlxsw_core); + mlxsw_linecard_types_fini(linecards); + mutex_destroy(&linecards->event_ops_list_lock); + WARN_ON(!list_empty(&linecards->event_ops_list)); + vfree(linecards); +} diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c new file mode 100644 index 000000000..ef5e61708 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c @@ -0,0 +1,1063 @@ +// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 +/* Copyright (c) 2016-2018 Mellanox Technologies. All rights reserved + * Copyright (c) 2016 Ivan Vecera <cera@cera.cz> + */ + +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/device.h> +#include <linux/sysfs.h> +#include <linux/thermal.h> +#include <linux/err.h> +#include <linux/sfp.h> + +#include "core.h" +#include "core_env.h" + +#define MLXSW_THERMAL_POLL_INT 1000 /* ms */ +#define MLXSW_THERMAL_SLOW_POLL_INT 20000 /* ms */ +#define MLXSW_THERMAL_ASIC_TEMP_NORM 75000 /* 75C */ +#define MLXSW_THERMAL_ASIC_TEMP_HIGH 85000 /* 85C */ +#define MLXSW_THERMAL_ASIC_TEMP_HOT 105000 /* 105C */ +#define MLXSW_THERMAL_HYSTERESIS_TEMP 5000 /* 5C */ +#define MLXSW_THERMAL_MODULE_TEMP_SHIFT (MLXSW_THERMAL_HYSTERESIS_TEMP * 2) +#define MLXSW_THERMAL_MAX_STATE 10 +#define MLXSW_THERMAL_MIN_STATE 2 +#define MLXSW_THERMAL_MAX_DUTY 255 + +/* External cooling devices, allowed for binding to mlxsw thermal zones. */ +static char * const mlxsw_thermal_external_allowed_cdev[] = { + "mlxreg_fan", +}; + +enum mlxsw_thermal_trips { + MLXSW_THERMAL_TEMP_TRIP_NORM, + MLXSW_THERMAL_TEMP_TRIP_HIGH, + MLXSW_THERMAL_TEMP_TRIP_HOT, +}; + +struct mlxsw_thermal_trip { + int type; + int temp; + int hyst; + int min_state; + int max_state; +}; + +static const struct mlxsw_thermal_trip default_thermal_trips[] = { + { /* In range - 0-40% PWM */ + .type = THERMAL_TRIP_ACTIVE, + .temp = MLXSW_THERMAL_ASIC_TEMP_NORM, + .hyst = MLXSW_THERMAL_HYSTERESIS_TEMP, + .min_state = 0, + .max_state = (4 * MLXSW_THERMAL_MAX_STATE) / 10, + }, + { + /* In range - 40-100% PWM */ + .type = THERMAL_TRIP_ACTIVE, + .temp = MLXSW_THERMAL_ASIC_TEMP_HIGH, + .hyst = MLXSW_THERMAL_HYSTERESIS_TEMP, + .min_state = (4 * MLXSW_THERMAL_MAX_STATE) / 10, + .max_state = MLXSW_THERMAL_MAX_STATE, + }, + { /* Warning */ + .type = THERMAL_TRIP_HOT, + .temp = MLXSW_THERMAL_ASIC_TEMP_HOT, + .min_state = MLXSW_THERMAL_MAX_STATE, + .max_state = MLXSW_THERMAL_MAX_STATE, + }, +}; + +#define MLXSW_THERMAL_NUM_TRIPS ARRAY_SIZE(default_thermal_trips) + +/* Make sure all trips are writable */ +#define MLXSW_THERMAL_TRIP_MASK (BIT(MLXSW_THERMAL_NUM_TRIPS) - 1) + +struct mlxsw_thermal; + +struct mlxsw_thermal_module { + struct mlxsw_thermal *parent; + struct thermal_zone_device *tzdev; + struct mlxsw_thermal_trip trips[MLXSW_THERMAL_NUM_TRIPS]; + int module; /* Module or gearbox number */ + u8 slot_index; +}; + +struct mlxsw_thermal_area { + struct mlxsw_thermal_module *tz_module_arr; + u8 tz_module_num; + struct mlxsw_thermal_module *tz_gearbox_arr; + u8 tz_gearbox_num; + u8 slot_index; + bool active; +}; + +struct mlxsw_thermal { + struct mlxsw_core *core; + const struct mlxsw_bus_info *bus_info; + struct thermal_zone_device *tzdev; + int polling_delay; + struct thermal_cooling_device *cdevs[MLXSW_MFCR_PWMS_MAX]; + struct mlxsw_thermal_trip trips[MLXSW_THERMAL_NUM_TRIPS]; + struct mlxsw_thermal_area line_cards[]; +}; + +static inline u8 mlxsw_state_to_duty(int state) +{ + return DIV_ROUND_CLOSEST(state * MLXSW_THERMAL_MAX_DUTY, + MLXSW_THERMAL_MAX_STATE); +} + +static inline int mlxsw_duty_to_state(u8 duty) +{ + return DIV_ROUND_CLOSEST(duty * MLXSW_THERMAL_MAX_STATE, + MLXSW_THERMAL_MAX_DUTY); +} + +static int mlxsw_get_cooling_device_idx(struct mlxsw_thermal *thermal, + struct thermal_cooling_device *cdev) +{ + int i; + + for (i = 0; i < MLXSW_MFCR_PWMS_MAX; i++) + if (thermal->cdevs[i] == cdev) + return i; + + /* Allow mlxsw thermal zone binding to an external cooling device */ + for (i = 0; i < ARRAY_SIZE(mlxsw_thermal_external_allowed_cdev); i++) { + if (!strcmp(cdev->type, mlxsw_thermal_external_allowed_cdev[i])) + return 0; + } + + return -ENODEV; +} + +static void +mlxsw_thermal_module_trips_reset(struct mlxsw_thermal_module *tz) +{ + tz->trips[MLXSW_THERMAL_TEMP_TRIP_NORM].temp = 0; + tz->trips[MLXSW_THERMAL_TEMP_TRIP_HIGH].temp = 0; + tz->trips[MLXSW_THERMAL_TEMP_TRIP_HOT].temp = 0; +} + +static int +mlxsw_thermal_module_trips_update(struct device *dev, struct mlxsw_core *core, + struct mlxsw_thermal_module *tz, + int crit_temp, int emerg_temp) +{ + int err; + + /* Do not try to query temperature thresholds directly from the module's + * EEPROM if we got valid thresholds from MTMP. + */ + if (!emerg_temp || !crit_temp) { + err = mlxsw_env_module_temp_thresholds_get(core, tz->slot_index, + tz->module, + SFP_TEMP_HIGH_WARN, + &crit_temp); + if (err) + return err; + + err = mlxsw_env_module_temp_thresholds_get(core, tz->slot_index, + tz->module, + SFP_TEMP_HIGH_ALARM, + &emerg_temp); + if (err) + return err; + } + + if (crit_temp > emerg_temp) { + dev_warn(dev, "%s : Critical threshold %d is above emergency threshold %d\n", + tz->tzdev->type, crit_temp, emerg_temp); + return 0; + } + + /* According to the system thermal requirements, the thermal zones are + * defined with three trip points. The critical and emergency + * temperature thresholds, provided by QSFP module are set as "active" + * and "hot" trip points, "normal" trip point is derived from "active" + * by subtracting double hysteresis value. + */ + if (crit_temp >= MLXSW_THERMAL_MODULE_TEMP_SHIFT) + tz->trips[MLXSW_THERMAL_TEMP_TRIP_NORM].temp = crit_temp - + MLXSW_THERMAL_MODULE_TEMP_SHIFT; + else + tz->trips[MLXSW_THERMAL_TEMP_TRIP_NORM].temp = crit_temp; + tz->trips[MLXSW_THERMAL_TEMP_TRIP_HIGH].temp = crit_temp; + tz->trips[MLXSW_THERMAL_TEMP_TRIP_HOT].temp = emerg_temp; + + return 0; +} + +static int mlxsw_thermal_bind(struct thermal_zone_device *tzdev, + struct thermal_cooling_device *cdev) +{ + struct mlxsw_thermal *thermal = tzdev->devdata; + struct device *dev = thermal->bus_info->dev; + int i, err; + + /* If the cooling device is one of ours bind it */ + if (mlxsw_get_cooling_device_idx(thermal, cdev) < 0) + return 0; + + for (i = 0; i < MLXSW_THERMAL_NUM_TRIPS; i++) { + const struct mlxsw_thermal_trip *trip = &thermal->trips[i]; + + err = thermal_zone_bind_cooling_device(tzdev, i, cdev, + trip->max_state, + trip->min_state, + THERMAL_WEIGHT_DEFAULT); + if (err < 0) { + dev_err(dev, "Failed to bind cooling device to trip %d\n", i); + return err; + } + } + return 0; +} + +static int mlxsw_thermal_unbind(struct thermal_zone_device *tzdev, + struct thermal_cooling_device *cdev) +{ + struct mlxsw_thermal *thermal = tzdev->devdata; + struct device *dev = thermal->bus_info->dev; + int i; + int err; + + /* If the cooling device is our one unbind it */ + if (mlxsw_get_cooling_device_idx(thermal, cdev) < 0) + return 0; + + for (i = 0; i < MLXSW_THERMAL_NUM_TRIPS; i++) { + err = thermal_zone_unbind_cooling_device(tzdev, i, cdev); + if (err < 0) { + dev_err(dev, "Failed to unbind cooling device\n"); + return err; + } + } + return 0; +} + +static int mlxsw_thermal_get_temp(struct thermal_zone_device *tzdev, + int *p_temp) +{ + struct mlxsw_thermal *thermal = tzdev->devdata; + struct device *dev = thermal->bus_info->dev; + char mtmp_pl[MLXSW_REG_MTMP_LEN]; + int temp; + int err; + + mlxsw_reg_mtmp_pack(mtmp_pl, 0, 0, false, false); + + err = mlxsw_reg_query(thermal->core, MLXSW_REG(mtmp), mtmp_pl); + if (err) { + dev_err(dev, "Failed to query temp sensor\n"); + return err; + } + mlxsw_reg_mtmp_unpack(mtmp_pl, &temp, NULL, NULL, NULL, NULL); + + *p_temp = temp; + return 0; +} + +static int mlxsw_thermal_get_trip_type(struct thermal_zone_device *tzdev, + int trip, + enum thermal_trip_type *p_type) +{ + struct mlxsw_thermal *thermal = tzdev->devdata; + + if (trip < 0 || trip >= MLXSW_THERMAL_NUM_TRIPS) + return -EINVAL; + + *p_type = thermal->trips[trip].type; + return 0; +} + +static int mlxsw_thermal_get_trip_temp(struct thermal_zone_device *tzdev, + int trip, int *p_temp) +{ + struct mlxsw_thermal *thermal = tzdev->devdata; + + if (trip < 0 || trip >= MLXSW_THERMAL_NUM_TRIPS) + return -EINVAL; + + *p_temp = thermal->trips[trip].temp; + return 0; +} + +static int mlxsw_thermal_set_trip_temp(struct thermal_zone_device *tzdev, + int trip, int temp) +{ + struct mlxsw_thermal *thermal = tzdev->devdata; + + if (trip < 0 || trip >= MLXSW_THERMAL_NUM_TRIPS) + return -EINVAL; + + thermal->trips[trip].temp = temp; + return 0; +} + +static int mlxsw_thermal_get_trip_hyst(struct thermal_zone_device *tzdev, + int trip, int *p_hyst) +{ + struct mlxsw_thermal *thermal = tzdev->devdata; + + *p_hyst = thermal->trips[trip].hyst; + return 0; +} + +static int mlxsw_thermal_set_trip_hyst(struct thermal_zone_device *tzdev, + int trip, int hyst) +{ + struct mlxsw_thermal *thermal = tzdev->devdata; + + thermal->trips[trip].hyst = hyst; + return 0; +} + +static struct thermal_zone_params mlxsw_thermal_params = { + .no_hwmon = true, +}; + +static struct thermal_zone_device_ops mlxsw_thermal_ops = { + .bind = mlxsw_thermal_bind, + .unbind = mlxsw_thermal_unbind, + .get_temp = mlxsw_thermal_get_temp, + .get_trip_type = mlxsw_thermal_get_trip_type, + .get_trip_temp = mlxsw_thermal_get_trip_temp, + .set_trip_temp = mlxsw_thermal_set_trip_temp, + .get_trip_hyst = mlxsw_thermal_get_trip_hyst, + .set_trip_hyst = mlxsw_thermal_set_trip_hyst, +}; + +static int mlxsw_thermal_module_bind(struct thermal_zone_device *tzdev, + struct thermal_cooling_device *cdev) +{ + struct mlxsw_thermal_module *tz = tzdev->devdata; + struct mlxsw_thermal *thermal = tz->parent; + int i, j, err; + + /* If the cooling device is one of ours bind it */ + if (mlxsw_get_cooling_device_idx(thermal, cdev) < 0) + return 0; + + for (i = 0; i < MLXSW_THERMAL_NUM_TRIPS; i++) { + const struct mlxsw_thermal_trip *trip = &tz->trips[i]; + + err = thermal_zone_bind_cooling_device(tzdev, i, cdev, + trip->max_state, + trip->min_state, + THERMAL_WEIGHT_DEFAULT); + if (err < 0) + goto err_thermal_zone_bind_cooling_device; + } + return 0; + +err_thermal_zone_bind_cooling_device: + for (j = i - 1; j >= 0; j--) + thermal_zone_unbind_cooling_device(tzdev, j, cdev); + return err; +} + +static int mlxsw_thermal_module_unbind(struct thermal_zone_device *tzdev, + struct thermal_cooling_device *cdev) +{ + struct mlxsw_thermal_module *tz = tzdev->devdata; + struct mlxsw_thermal *thermal = tz->parent; + int i; + int err; + + /* If the cooling device is one of ours unbind it */ + if (mlxsw_get_cooling_device_idx(thermal, cdev) < 0) + return 0; + + for (i = 0; i < MLXSW_THERMAL_NUM_TRIPS; i++) { + err = thermal_zone_unbind_cooling_device(tzdev, i, cdev); + WARN_ON(err); + } + return err; +} + +static void +mlxsw_thermal_module_temp_and_thresholds_get(struct mlxsw_core *core, + u8 slot_index, u16 sensor_index, + int *p_temp, int *p_crit_temp, + int *p_emerg_temp) +{ + char mtmp_pl[MLXSW_REG_MTMP_LEN]; + int err; + + /* Read module temperature and thresholds. */ + mlxsw_reg_mtmp_pack(mtmp_pl, slot_index, sensor_index, + false, false); + err = mlxsw_reg_query(core, MLXSW_REG(mtmp), mtmp_pl); + if (err) { + /* Set temperature and thresholds to zero to avoid passing + * uninitialized data back to the caller. + */ + *p_temp = 0; + *p_crit_temp = 0; + *p_emerg_temp = 0; + + return; + } + mlxsw_reg_mtmp_unpack(mtmp_pl, p_temp, NULL, p_crit_temp, p_emerg_temp, + NULL); +} + +static int mlxsw_thermal_module_temp_get(struct thermal_zone_device *tzdev, + int *p_temp) +{ + struct mlxsw_thermal_module *tz = tzdev->devdata; + struct mlxsw_thermal *thermal = tz->parent; + int temp, crit_temp, emerg_temp; + struct device *dev; + u16 sensor_index; + + dev = thermal->bus_info->dev; + sensor_index = MLXSW_REG_MTMP_MODULE_INDEX_MIN + tz->module; + + /* Read module temperature and thresholds. */ + mlxsw_thermal_module_temp_and_thresholds_get(thermal->core, + tz->slot_index, + sensor_index, &temp, + &crit_temp, &emerg_temp); + *p_temp = temp; + + if (!temp) + return 0; + + /* Update trip points. */ + mlxsw_thermal_module_trips_update(dev, thermal->core, tz, + crit_temp, emerg_temp); + + return 0; +} + +static int +mlxsw_thermal_module_trip_type_get(struct thermal_zone_device *tzdev, int trip, + enum thermal_trip_type *p_type) +{ + struct mlxsw_thermal_module *tz = tzdev->devdata; + + if (trip < 0 || trip >= MLXSW_THERMAL_NUM_TRIPS) + return -EINVAL; + + *p_type = tz->trips[trip].type; + return 0; +} + +static int +mlxsw_thermal_module_trip_temp_get(struct thermal_zone_device *tzdev, + int trip, int *p_temp) +{ + struct mlxsw_thermal_module *tz = tzdev->devdata; + + if (trip < 0 || trip >= MLXSW_THERMAL_NUM_TRIPS) + return -EINVAL; + + *p_temp = tz->trips[trip].temp; + return 0; +} + +static int +mlxsw_thermal_module_trip_temp_set(struct thermal_zone_device *tzdev, + int trip, int temp) +{ + struct mlxsw_thermal_module *tz = tzdev->devdata; + + if (trip < 0 || trip >= MLXSW_THERMAL_NUM_TRIPS) + return -EINVAL; + + tz->trips[trip].temp = temp; + return 0; +} + +static int +mlxsw_thermal_module_trip_hyst_get(struct thermal_zone_device *tzdev, int trip, + int *p_hyst) +{ + struct mlxsw_thermal_module *tz = tzdev->devdata; + + *p_hyst = tz->trips[trip].hyst; + return 0; +} + +static int +mlxsw_thermal_module_trip_hyst_set(struct thermal_zone_device *tzdev, int trip, + int hyst) +{ + struct mlxsw_thermal_module *tz = tzdev->devdata; + + tz->trips[trip].hyst = hyst; + return 0; +} + +static struct thermal_zone_device_ops mlxsw_thermal_module_ops = { + .bind = mlxsw_thermal_module_bind, + .unbind = mlxsw_thermal_module_unbind, + .get_temp = mlxsw_thermal_module_temp_get, + .get_trip_type = mlxsw_thermal_module_trip_type_get, + .get_trip_temp = mlxsw_thermal_module_trip_temp_get, + .set_trip_temp = mlxsw_thermal_module_trip_temp_set, + .get_trip_hyst = mlxsw_thermal_module_trip_hyst_get, + .set_trip_hyst = mlxsw_thermal_module_trip_hyst_set, +}; + +static int mlxsw_thermal_gearbox_temp_get(struct thermal_zone_device *tzdev, + int *p_temp) +{ + struct mlxsw_thermal_module *tz = tzdev->devdata; + struct mlxsw_thermal *thermal = tz->parent; + char mtmp_pl[MLXSW_REG_MTMP_LEN]; + u16 index; + int temp; + int err; + + index = MLXSW_REG_MTMP_GBOX_INDEX_MIN + tz->module; + mlxsw_reg_mtmp_pack(mtmp_pl, tz->slot_index, index, false, false); + + err = mlxsw_reg_query(thermal->core, MLXSW_REG(mtmp), mtmp_pl); + if (err) + return err; + + mlxsw_reg_mtmp_unpack(mtmp_pl, &temp, NULL, NULL, NULL, NULL); + + *p_temp = temp; + return 0; +} + +static struct thermal_zone_device_ops mlxsw_thermal_gearbox_ops = { + .bind = mlxsw_thermal_module_bind, + .unbind = mlxsw_thermal_module_unbind, + .get_temp = mlxsw_thermal_gearbox_temp_get, + .get_trip_type = mlxsw_thermal_module_trip_type_get, + .get_trip_temp = mlxsw_thermal_module_trip_temp_get, + .set_trip_temp = mlxsw_thermal_module_trip_temp_set, + .get_trip_hyst = mlxsw_thermal_module_trip_hyst_get, + .set_trip_hyst = mlxsw_thermal_module_trip_hyst_set, +}; + +static int mlxsw_thermal_get_max_state(struct thermal_cooling_device *cdev, + unsigned long *p_state) +{ + *p_state = MLXSW_THERMAL_MAX_STATE; + return 0; +} + +static int mlxsw_thermal_get_cur_state(struct thermal_cooling_device *cdev, + unsigned long *p_state) + +{ + struct mlxsw_thermal *thermal = cdev->devdata; + struct device *dev = thermal->bus_info->dev; + char mfsc_pl[MLXSW_REG_MFSC_LEN]; + int err, idx; + u8 duty; + + idx = mlxsw_get_cooling_device_idx(thermal, cdev); + if (idx < 0) + return idx; + + mlxsw_reg_mfsc_pack(mfsc_pl, idx, 0); + err = mlxsw_reg_query(thermal->core, MLXSW_REG(mfsc), mfsc_pl); + if (err) { + dev_err(dev, "Failed to query PWM duty\n"); + return err; + } + + duty = mlxsw_reg_mfsc_pwm_duty_cycle_get(mfsc_pl); + *p_state = mlxsw_duty_to_state(duty); + return 0; +} + +static int mlxsw_thermal_set_cur_state(struct thermal_cooling_device *cdev, + unsigned long state) + +{ + struct mlxsw_thermal *thermal = cdev->devdata; + struct device *dev = thermal->bus_info->dev; + char mfsc_pl[MLXSW_REG_MFSC_LEN]; + int idx; + int err; + + if (state > MLXSW_THERMAL_MAX_STATE) + return -EINVAL; + + idx = mlxsw_get_cooling_device_idx(thermal, cdev); + if (idx < 0) + return idx; + + /* Normalize the state to the valid speed range. */ + state = max_t(unsigned long, MLXSW_THERMAL_MIN_STATE, state); + mlxsw_reg_mfsc_pack(mfsc_pl, idx, mlxsw_state_to_duty(state)); + err = mlxsw_reg_write(thermal->core, MLXSW_REG(mfsc), mfsc_pl); + if (err) { + dev_err(dev, "Failed to write PWM duty\n"); + return err; + } + return 0; +} + +static const struct thermal_cooling_device_ops mlxsw_cooling_ops = { + .get_max_state = mlxsw_thermal_get_max_state, + .get_cur_state = mlxsw_thermal_get_cur_state, + .set_cur_state = mlxsw_thermal_set_cur_state, +}; + +static int +mlxsw_thermal_module_tz_init(struct mlxsw_thermal_module *module_tz) +{ + char tz_name[THERMAL_NAME_LENGTH]; + int err; + + if (module_tz->slot_index) + snprintf(tz_name, sizeof(tz_name), "mlxsw-lc%d-module%d", + module_tz->slot_index, module_tz->module + 1); + else + snprintf(tz_name, sizeof(tz_name), "mlxsw-module%d", + module_tz->module + 1); + module_tz->tzdev = thermal_zone_device_register(tz_name, + MLXSW_THERMAL_NUM_TRIPS, + MLXSW_THERMAL_TRIP_MASK, + module_tz, + &mlxsw_thermal_module_ops, + &mlxsw_thermal_params, + 0, + module_tz->parent->polling_delay); + if (IS_ERR(module_tz->tzdev)) { + err = PTR_ERR(module_tz->tzdev); + return err; + } + + err = thermal_zone_device_enable(module_tz->tzdev); + if (err) + thermal_zone_device_unregister(module_tz->tzdev); + + return err; +} + +static void mlxsw_thermal_module_tz_fini(struct thermal_zone_device *tzdev) +{ + thermal_zone_device_unregister(tzdev); +} + +static int +mlxsw_thermal_module_init(struct device *dev, struct mlxsw_core *core, + struct mlxsw_thermal *thermal, + struct mlxsw_thermal_area *area, u8 module) +{ + struct mlxsw_thermal_module *module_tz; + int dummy_temp, crit_temp, emerg_temp; + u16 sensor_index; + + sensor_index = MLXSW_REG_MTMP_MODULE_INDEX_MIN + module; + module_tz = &area->tz_module_arr[module]; + /* Skip if parent is already set (case of port split). */ + if (module_tz->parent) + return 0; + module_tz->module = module; + module_tz->slot_index = area->slot_index; + module_tz->parent = thermal; + memcpy(module_tz->trips, default_thermal_trips, + sizeof(thermal->trips)); + /* Initialize all trip point. */ + mlxsw_thermal_module_trips_reset(module_tz); + /* Read module temperature and thresholds. */ + mlxsw_thermal_module_temp_and_thresholds_get(core, area->slot_index, + sensor_index, &dummy_temp, + &crit_temp, &emerg_temp); + /* Update trip point according to the module data. */ + return mlxsw_thermal_module_trips_update(dev, core, module_tz, + crit_temp, emerg_temp); +} + +static void mlxsw_thermal_module_fini(struct mlxsw_thermal_module *module_tz) +{ + if (module_tz && module_tz->tzdev) { + mlxsw_thermal_module_tz_fini(module_tz->tzdev); + module_tz->tzdev = NULL; + module_tz->parent = NULL; + } +} + +static int +mlxsw_thermal_modules_init(struct device *dev, struct mlxsw_core *core, + struct mlxsw_thermal *thermal, + struct mlxsw_thermal_area *area) +{ + struct mlxsw_thermal_module *module_tz; + char mgpir_pl[MLXSW_REG_MGPIR_LEN]; + int i, err; + + mlxsw_reg_mgpir_pack(mgpir_pl, area->slot_index); + err = mlxsw_reg_query(core, MLXSW_REG(mgpir), mgpir_pl); + if (err) + return err; + + mlxsw_reg_mgpir_unpack(mgpir_pl, NULL, NULL, NULL, + &area->tz_module_num, NULL); + + /* For modular system module counter could be zero. */ + if (!area->tz_module_num) + return 0; + + area->tz_module_arr = kcalloc(area->tz_module_num, + sizeof(*area->tz_module_arr), + GFP_KERNEL); + if (!area->tz_module_arr) + return -ENOMEM; + + for (i = 0; i < area->tz_module_num; i++) { + err = mlxsw_thermal_module_init(dev, core, thermal, area, i); + if (err) + goto err_thermal_module_init; + } + + for (i = 0; i < area->tz_module_num; i++) { + module_tz = &area->tz_module_arr[i]; + if (!module_tz->parent) + continue; + err = mlxsw_thermal_module_tz_init(module_tz); + if (err) + goto err_thermal_module_tz_init; + } + + return 0; + +err_thermal_module_tz_init: +err_thermal_module_init: + for (i = area->tz_module_num - 1; i >= 0; i--) + mlxsw_thermal_module_fini(&area->tz_module_arr[i]); + kfree(area->tz_module_arr); + return err; +} + +static void +mlxsw_thermal_modules_fini(struct mlxsw_thermal *thermal, + struct mlxsw_thermal_area *area) +{ + int i; + + for (i = area->tz_module_num - 1; i >= 0; i--) + mlxsw_thermal_module_fini(&area->tz_module_arr[i]); + kfree(area->tz_module_arr); +} + +static int +mlxsw_thermal_gearbox_tz_init(struct mlxsw_thermal_module *gearbox_tz) +{ + char tz_name[THERMAL_NAME_LENGTH]; + int ret; + + if (gearbox_tz->slot_index) + snprintf(tz_name, sizeof(tz_name), "mlxsw-lc%d-gearbox%d", + gearbox_tz->slot_index, gearbox_tz->module + 1); + else + snprintf(tz_name, sizeof(tz_name), "mlxsw-gearbox%d", + gearbox_tz->module + 1); + gearbox_tz->tzdev = thermal_zone_device_register(tz_name, + MLXSW_THERMAL_NUM_TRIPS, + MLXSW_THERMAL_TRIP_MASK, + gearbox_tz, + &mlxsw_thermal_gearbox_ops, + &mlxsw_thermal_params, 0, + gearbox_tz->parent->polling_delay); + if (IS_ERR(gearbox_tz->tzdev)) + return PTR_ERR(gearbox_tz->tzdev); + + ret = thermal_zone_device_enable(gearbox_tz->tzdev); + if (ret) + thermal_zone_device_unregister(gearbox_tz->tzdev); + + return ret; +} + +static void +mlxsw_thermal_gearbox_tz_fini(struct mlxsw_thermal_module *gearbox_tz) +{ + thermal_zone_device_unregister(gearbox_tz->tzdev); +} + +static int +mlxsw_thermal_gearboxes_init(struct device *dev, struct mlxsw_core *core, + struct mlxsw_thermal *thermal, + struct mlxsw_thermal_area *area) +{ + enum mlxsw_reg_mgpir_device_type device_type; + struct mlxsw_thermal_module *gearbox_tz; + char mgpir_pl[MLXSW_REG_MGPIR_LEN]; + u8 gbox_num; + int i; + int err; + + mlxsw_reg_mgpir_pack(mgpir_pl, area->slot_index); + err = mlxsw_reg_query(core, MLXSW_REG(mgpir), mgpir_pl); + if (err) + return err; + + mlxsw_reg_mgpir_unpack(mgpir_pl, &gbox_num, &device_type, NULL, + NULL, NULL); + if (device_type != MLXSW_REG_MGPIR_DEVICE_TYPE_GEARBOX_DIE || + !gbox_num) + return 0; + + area->tz_gearbox_num = gbox_num; + area->tz_gearbox_arr = kcalloc(area->tz_gearbox_num, + sizeof(*area->tz_gearbox_arr), + GFP_KERNEL); + if (!area->tz_gearbox_arr) + return -ENOMEM; + + for (i = 0; i < area->tz_gearbox_num; i++) { + gearbox_tz = &area->tz_gearbox_arr[i]; + memcpy(gearbox_tz->trips, default_thermal_trips, + sizeof(thermal->trips)); + gearbox_tz->module = i; + gearbox_tz->parent = thermal; + gearbox_tz->slot_index = area->slot_index; + err = mlxsw_thermal_gearbox_tz_init(gearbox_tz); + if (err) + goto err_thermal_gearbox_tz_init; + } + + return 0; + +err_thermal_gearbox_tz_init: + for (i--; i >= 0; i--) + mlxsw_thermal_gearbox_tz_fini(&area->tz_gearbox_arr[i]); + kfree(area->tz_gearbox_arr); + return err; +} + +static void +mlxsw_thermal_gearboxes_fini(struct mlxsw_thermal *thermal, + struct mlxsw_thermal_area *area) +{ + int i; + + for (i = area->tz_gearbox_num - 1; i >= 0; i--) + mlxsw_thermal_gearbox_tz_fini(&area->tz_gearbox_arr[i]); + kfree(area->tz_gearbox_arr); +} + +static void +mlxsw_thermal_got_active(struct mlxsw_core *mlxsw_core, u8 slot_index, + void *priv) +{ + struct mlxsw_thermal *thermal = priv; + struct mlxsw_thermal_area *linecard; + int err; + + linecard = &thermal->line_cards[slot_index]; + + if (linecard->active) + return; + + linecard->slot_index = slot_index; + err = mlxsw_thermal_modules_init(thermal->bus_info->dev, thermal->core, + thermal, linecard); + if (err) { + dev_err(thermal->bus_info->dev, "Failed to configure thermal objects for line card modules in slot %d\n", + slot_index); + return; + } + + err = mlxsw_thermal_gearboxes_init(thermal->bus_info->dev, + thermal->core, thermal, linecard); + if (err) { + dev_err(thermal->bus_info->dev, "Failed to configure thermal objects for line card gearboxes in slot %d\n", + slot_index); + goto err_thermal_linecard_gearboxes_init; + } + + linecard->active = true; + + return; + +err_thermal_linecard_gearboxes_init: + mlxsw_thermal_modules_fini(thermal, linecard); +} + +static void +mlxsw_thermal_got_inactive(struct mlxsw_core *mlxsw_core, u8 slot_index, + void *priv) +{ + struct mlxsw_thermal *thermal = priv; + struct mlxsw_thermal_area *linecard; + + linecard = &thermal->line_cards[slot_index]; + if (!linecard->active) + return; + linecard->active = false; + mlxsw_thermal_gearboxes_fini(thermal, linecard); + mlxsw_thermal_modules_fini(thermal, linecard); +} + +static struct mlxsw_linecards_event_ops mlxsw_thermal_event_ops = { + .got_active = mlxsw_thermal_got_active, + .got_inactive = mlxsw_thermal_got_inactive, +}; + +int mlxsw_thermal_init(struct mlxsw_core *core, + const struct mlxsw_bus_info *bus_info, + struct mlxsw_thermal **p_thermal) +{ + char mfcr_pl[MLXSW_REG_MFCR_LEN] = { 0 }; + enum mlxsw_reg_mfcr_pwm_frequency freq; + struct device *dev = bus_info->dev; + char mgpir_pl[MLXSW_REG_MGPIR_LEN]; + struct mlxsw_thermal *thermal; + u8 pwm_active, num_of_slots; + u16 tacho_active; + int err, i; + + mlxsw_reg_mgpir_pack(mgpir_pl, 0); + err = mlxsw_reg_query(core, MLXSW_REG(mgpir), mgpir_pl); + if (err) + return err; + + mlxsw_reg_mgpir_unpack(mgpir_pl, NULL, NULL, NULL, NULL, + &num_of_slots); + + thermal = kzalloc(struct_size(thermal, line_cards, num_of_slots + 1), + GFP_KERNEL); + if (!thermal) + return -ENOMEM; + + thermal->core = core; + thermal->bus_info = bus_info; + memcpy(thermal->trips, default_thermal_trips, sizeof(thermal->trips)); + thermal->line_cards[0].slot_index = 0; + + err = mlxsw_reg_query(thermal->core, MLXSW_REG(mfcr), mfcr_pl); + if (err) { + dev_err(dev, "Failed to probe PWMs\n"); + goto err_reg_query; + } + mlxsw_reg_mfcr_unpack(mfcr_pl, &freq, &tacho_active, &pwm_active); + + for (i = 0; i < MLXSW_MFCR_TACHOS_MAX; i++) { + if (tacho_active & BIT(i)) { + char mfsl_pl[MLXSW_REG_MFSL_LEN]; + + mlxsw_reg_mfsl_pack(mfsl_pl, i, 0, 0); + + /* We need to query the register to preserve maximum */ + err = mlxsw_reg_query(thermal->core, MLXSW_REG(mfsl), + mfsl_pl); + if (err) + goto err_reg_query; + + /* set the minimal RPMs to 0 */ + mlxsw_reg_mfsl_tach_min_set(mfsl_pl, 0); + err = mlxsw_reg_write(thermal->core, MLXSW_REG(mfsl), + mfsl_pl); + if (err) + goto err_reg_write; + } + } + for (i = 0; i < MLXSW_MFCR_PWMS_MAX; i++) { + if (pwm_active & BIT(i)) { + struct thermal_cooling_device *cdev; + + cdev = thermal_cooling_device_register("mlxsw_fan", + thermal, + &mlxsw_cooling_ops); + if (IS_ERR(cdev)) { + err = PTR_ERR(cdev); + dev_err(dev, "Failed to register cooling device\n"); + goto err_thermal_cooling_device_register; + } + thermal->cdevs[i] = cdev; + } + } + + thermal->polling_delay = bus_info->low_frequency ? + MLXSW_THERMAL_SLOW_POLL_INT : + MLXSW_THERMAL_POLL_INT; + + thermal->tzdev = thermal_zone_device_register("mlxsw", + MLXSW_THERMAL_NUM_TRIPS, + MLXSW_THERMAL_TRIP_MASK, + thermal, + &mlxsw_thermal_ops, + &mlxsw_thermal_params, 0, + thermal->polling_delay); + if (IS_ERR(thermal->tzdev)) { + err = PTR_ERR(thermal->tzdev); + dev_err(dev, "Failed to register thermal zone\n"); + goto err_thermal_zone_device_register; + } + + err = mlxsw_thermal_modules_init(dev, core, thermal, + &thermal->line_cards[0]); + if (err) + goto err_thermal_modules_init; + + err = mlxsw_thermal_gearboxes_init(dev, core, thermal, + &thermal->line_cards[0]); + if (err) + goto err_thermal_gearboxes_init; + + err = mlxsw_linecards_event_ops_register(core, + &mlxsw_thermal_event_ops, + thermal); + if (err) + goto err_linecards_event_ops_register; + + err = thermal_zone_device_enable(thermal->tzdev); + if (err) + goto err_thermal_zone_device_enable; + + thermal->line_cards[0].active = true; + *p_thermal = thermal; + return 0; + +err_thermal_zone_device_enable: + mlxsw_linecards_event_ops_unregister(thermal->core, + &mlxsw_thermal_event_ops, + thermal); +err_linecards_event_ops_register: + mlxsw_thermal_gearboxes_fini(thermal, &thermal->line_cards[0]); +err_thermal_gearboxes_init: + mlxsw_thermal_modules_fini(thermal, &thermal->line_cards[0]); +err_thermal_modules_init: + if (thermal->tzdev) { + thermal_zone_device_unregister(thermal->tzdev); + thermal->tzdev = NULL; + } +err_thermal_zone_device_register: +err_thermal_cooling_device_register: + for (i = 0; i < MLXSW_MFCR_PWMS_MAX; i++) + if (thermal->cdevs[i]) + thermal_cooling_device_unregister(thermal->cdevs[i]); +err_reg_write: +err_reg_query: + kfree(thermal); + return err; +} + +void mlxsw_thermal_fini(struct mlxsw_thermal *thermal) +{ + int i; + + thermal->line_cards[0].active = false; + mlxsw_linecards_event_ops_unregister(thermal->core, + &mlxsw_thermal_event_ops, + thermal); + mlxsw_thermal_gearboxes_fini(thermal, &thermal->line_cards[0]); + mlxsw_thermal_modules_fini(thermal, &thermal->line_cards[0]); + if (thermal->tzdev) { + thermal_zone_device_unregister(thermal->tzdev); + thermal->tzdev = NULL; + } + + for (i = 0; i < MLXSW_MFCR_PWMS_MAX; i++) { + if (thermal->cdevs[i]) { + thermal_cooling_device_unregister(thermal->cdevs[i]); + thermal->cdevs[i] = NULL; + } + } + + kfree(thermal); +} diff --git a/drivers/net/ethernet/mellanox/mlxsw/emad.h b/drivers/net/ethernet/mellanox/mlxsw/emad.h new file mode 100644 index 000000000..acfbbec52 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/emad.h @@ -0,0 +1,96 @@ +/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 */ +/* Copyright (c) 2015-2018 Mellanox Technologies. All rights reserved */ + +#ifndef _MLXSW_EMAD_H +#define _MLXSW_EMAD_H + +#define MLXSW_EMAD_MAX_FRAME_LEN 1518 /* Length in u8 */ +#define MLXSW_EMAD_MAX_RETRY 5 + +/* EMAD Ethernet header */ +#define MLXSW_EMAD_ETH_HDR_LEN 0x10 /* Length in u8 */ +#define MLXSW_EMAD_EH_DMAC "\x01\x02\xc9\x00\x00\x01" +#define MLXSW_EMAD_EH_SMAC "\x00\x02\xc9\x01\x02\x03" +#define MLXSW_EMAD_EH_ETHERTYPE 0x8932 +#define MLXSW_EMAD_EH_MLX_PROTO 0 +#define MLXSW_EMAD_EH_PROTO_VERSION 0 + +/* EMAD TLV Types */ +enum { + MLXSW_EMAD_TLV_TYPE_END, + MLXSW_EMAD_TLV_TYPE_OP, + MLXSW_EMAD_TLV_TYPE_STRING, + MLXSW_EMAD_TLV_TYPE_REG, +}; + +/* OP TLV */ +#define MLXSW_EMAD_OP_TLV_LEN 4 /* Length in u32 */ + +enum { + MLXSW_EMAD_OP_TLV_CLASS_REG_ACCESS = 1, + MLXSW_EMAD_OP_TLV_CLASS_IPC = 2, +}; + +enum mlxsw_emad_op_tlv_status { + MLXSW_EMAD_OP_TLV_STATUS_SUCCESS, + MLXSW_EMAD_OP_TLV_STATUS_BUSY, + MLXSW_EMAD_OP_TLV_STATUS_VERSION_NOT_SUPPORTED, + MLXSW_EMAD_OP_TLV_STATUS_UNKNOWN_TLV, + MLXSW_EMAD_OP_TLV_STATUS_REGISTER_NOT_SUPPORTED, + MLXSW_EMAD_OP_TLV_STATUS_CLASS_NOT_SUPPORTED, + MLXSW_EMAD_OP_TLV_STATUS_METHOD_NOT_SUPPORTED, + MLXSW_EMAD_OP_TLV_STATUS_BAD_PARAMETER, + MLXSW_EMAD_OP_TLV_STATUS_RESOURCE_NOT_AVAILABLE, + MLXSW_EMAD_OP_TLV_STATUS_MESSAGE_RECEIPT_ACK, + MLXSW_EMAD_OP_TLV_STATUS_INTERNAL_ERROR = 0x70, +}; + +static inline char *mlxsw_emad_op_tlv_status_str(u8 status) +{ + switch (status) { + case MLXSW_EMAD_OP_TLV_STATUS_SUCCESS: + return "operation performed"; + case MLXSW_EMAD_OP_TLV_STATUS_BUSY: + return "device is busy"; + case MLXSW_EMAD_OP_TLV_STATUS_VERSION_NOT_SUPPORTED: + return "version not supported"; + case MLXSW_EMAD_OP_TLV_STATUS_UNKNOWN_TLV: + return "unknown TLV"; + case MLXSW_EMAD_OP_TLV_STATUS_REGISTER_NOT_SUPPORTED: + return "register not supported"; + case MLXSW_EMAD_OP_TLV_STATUS_CLASS_NOT_SUPPORTED: + return "class not supported"; + case MLXSW_EMAD_OP_TLV_STATUS_METHOD_NOT_SUPPORTED: + return "method not supported"; + case MLXSW_EMAD_OP_TLV_STATUS_BAD_PARAMETER: + return "bad parameter"; + case MLXSW_EMAD_OP_TLV_STATUS_RESOURCE_NOT_AVAILABLE: + return "resource not available"; + case MLXSW_EMAD_OP_TLV_STATUS_MESSAGE_RECEIPT_ACK: + return "acknowledged. retransmit"; + case MLXSW_EMAD_OP_TLV_STATUS_INTERNAL_ERROR: + return "internal error"; + default: + return "*UNKNOWN*"; + } +} + +enum { + MLXSW_EMAD_OP_TLV_REQUEST, + MLXSW_EMAD_OP_TLV_RESPONSE +}; + +enum { + MLXSW_EMAD_OP_TLV_METHOD_QUERY = 1, + MLXSW_EMAD_OP_TLV_METHOD_WRITE = 2, + MLXSW_EMAD_OP_TLV_METHOD_SEND = 3, + MLXSW_EMAD_OP_TLV_METHOD_EVENT = 5, +}; + +/* STRING TLV */ +#define MLXSW_EMAD_STRING_TLV_LEN 33 /* Length in u32 */ + +/* END TLV */ +#define MLXSW_EMAD_END_TLV_LEN 1 /* Length in u32 */ + +#endif diff --git a/drivers/net/ethernet/mellanox/mlxsw/i2c.c b/drivers/net/ethernet/mellanox/mlxsw/i2c.c new file mode 100644 index 000000000..3beefc167 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/i2c.c @@ -0,0 +1,769 @@ +// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 +/* Copyright (c) 2016-2018 Mellanox Technologies. All rights reserved */ + +#include <linux/err.h> +#include <linux/i2c.h> +#include <linux/init.h> +#include <linux/jiffies.h> +#include <linux/kernel.h> +#include <linux/mutex.h> +#include <linux/module.h> +#include <linux/mod_devicetable.h> +#include <linux/platform_data/mlxreg.h> +#include <linux/slab.h> + +#include "cmd.h" +#include "core.h" +#include "i2c.h" +#include "resources.h" + +#define MLXSW_I2C_CIR2_BASE 0x72000 +#define MLXSW_I2C_CIR_STATUS_OFF 0x18 +#define MLXSW_I2C_CIR2_OFF_STATUS (MLXSW_I2C_CIR2_BASE + \ + MLXSW_I2C_CIR_STATUS_OFF) +#define MLXSW_I2C_OPMOD_SHIFT 12 +#define MLXSW_I2C_EVENT_BIT_SHIFT 22 +#define MLXSW_I2C_GO_BIT_SHIFT 23 +#define MLXSW_I2C_CIR_CTRL_STATUS_SHIFT 24 +#define MLXSW_I2C_EVENT_BIT BIT(MLXSW_I2C_EVENT_BIT_SHIFT) +#define MLXSW_I2C_GO_BIT BIT(MLXSW_I2C_GO_BIT_SHIFT) +#define MLXSW_I2C_GO_OPMODE BIT(MLXSW_I2C_OPMOD_SHIFT) +#define MLXSW_I2C_SET_IMM_CMD (MLXSW_I2C_GO_OPMODE | \ + MLXSW_CMD_OPCODE_QUERY_FW) +#define MLXSW_I2C_PUSH_IMM_CMD (MLXSW_I2C_GO_BIT | \ + MLXSW_I2C_SET_IMM_CMD) +#define MLXSW_I2C_SET_CMD (MLXSW_CMD_OPCODE_ACCESS_REG) +#define MLXSW_I2C_PUSH_CMD (MLXSW_I2C_GO_BIT | MLXSW_I2C_SET_CMD) +#define MLXSW_I2C_TLV_HDR_SIZE 0x10 +#define MLXSW_I2C_ADDR_WIDTH 4 +#define MLXSW_I2C_PUSH_CMD_SIZE (MLXSW_I2C_ADDR_WIDTH + 4) +#define MLXSW_I2C_SET_EVENT_CMD (MLXSW_I2C_EVENT_BIT) +#define MLXSW_I2C_PUSH_EVENT_CMD (MLXSW_I2C_GO_BIT | \ + MLXSW_I2C_SET_EVENT_CMD) +#define MLXSW_I2C_READ_SEMA_SIZE 4 +#define MLXSW_I2C_PREP_SIZE (MLXSW_I2C_ADDR_WIDTH + 28) +#define MLXSW_I2C_MBOX_SIZE 20 +#define MLXSW_I2C_MBOX_OUT_PARAM_OFF 12 +#define MLXSW_I2C_MBOX_OFFSET_BITS 20 +#define MLXSW_I2C_MBOX_SIZE_BITS 12 +#define MLXSW_I2C_ADDR_BUF_SIZE 4 +#define MLXSW_I2C_BLK_DEF 32 +#define MLXSW_I2C_BLK_MAX 100 +#define MLXSW_I2C_RETRY 5 +#define MLXSW_I2C_TIMEOUT_MSECS 5000 +#define MLXSW_I2C_MAX_DATA_SIZE 256 + +/* Driver can be initialized by kernel platform driver or from the user + * space. In the first case IRQ line number is passed through the platform + * data, otherwise default IRQ line is to be used. Default IRQ is relevant + * only for specific I2C slave address, allowing 3.4 MHz I2C path to the chip + * (special hardware feature for I2C acceleration). + */ +#define MLXSW_I2C_DEFAULT_IRQ 17 +#define MLXSW_FAST_I2C_SLAVE 0x37 + +/** + * struct mlxsw_i2c - device private data: + * @cmd: command attributes; + * @cmd.mb_size_in: input mailbox size; + * @cmd.mb_off_in: input mailbox offset in register space; + * @cmd.mb_size_out: output mailbox size; + * @cmd.mb_off_out: output mailbox offset in register space; + * @cmd.lock: command execution lock; + * @dev: I2C device; + * @core: switch core pointer; + * @bus_info: bus info block; + * @block_size: maximum block size allowed to pass to under layer; + * @pdata: device platform data; + * @irq_work: interrupts work item; + * @irq: IRQ line number; + */ +struct mlxsw_i2c { + struct { + u32 mb_size_in; + u32 mb_off_in; + u32 mb_size_out; + u32 mb_off_out; + struct mutex lock; + } cmd; + struct device *dev; + struct mlxsw_core *core; + struct mlxsw_bus_info bus_info; + u16 block_size; + struct mlxreg_core_hotplug_platform_data *pdata; + struct work_struct irq_work; + int irq; +}; + +#define MLXSW_I2C_READ_MSG(_client, _addr_buf, _buf, _len) { \ + { .addr = (_client)->addr, \ + .buf = (_addr_buf), \ + .len = MLXSW_I2C_ADDR_BUF_SIZE, \ + .flags = 0 }, \ + { .addr = (_client)->addr, \ + .buf = (_buf), \ + .len = (_len), \ + .flags = I2C_M_RD } } + +#define MLXSW_I2C_WRITE_MSG(_client, _buf, _len) \ + { .addr = (_client)->addr, \ + .buf = (u8 *)(_buf), \ + .len = (_len), \ + .flags = 0 } + +/* Routine converts in and out mail boxes offset and size. */ +static inline void +mlxsw_i2c_convert_mbox(struct mlxsw_i2c *mlxsw_i2c, u8 *buf) +{ + u32 tmp; + + /* Local in/out mailboxes: 20 bits for offset, 12 for size */ + tmp = be32_to_cpup((__be32 *) buf); + mlxsw_i2c->cmd.mb_off_in = tmp & + GENMASK(MLXSW_I2C_MBOX_OFFSET_BITS - 1, 0); + mlxsw_i2c->cmd.mb_size_in = (tmp & GENMASK(31, + MLXSW_I2C_MBOX_OFFSET_BITS)) >> + MLXSW_I2C_MBOX_OFFSET_BITS; + + tmp = be32_to_cpup((__be32 *) (buf + MLXSW_I2C_ADDR_WIDTH)); + mlxsw_i2c->cmd.mb_off_out = tmp & + GENMASK(MLXSW_I2C_MBOX_OFFSET_BITS - 1, 0); + mlxsw_i2c->cmd.mb_size_out = (tmp & GENMASK(31, + MLXSW_I2C_MBOX_OFFSET_BITS)) >> + MLXSW_I2C_MBOX_OFFSET_BITS; +} + +/* Routine obtains register size from mail box buffer. */ +static inline int mlxsw_i2c_get_reg_size(u8 *in_mbox) +{ + u16 tmp = be16_to_cpup((__be16 *) (in_mbox + MLXSW_I2C_TLV_HDR_SIZE)); + + return (tmp & 0x7ff) * 4 + MLXSW_I2C_TLV_HDR_SIZE; +} + +/* Routine sets I2C device internal offset in the transaction buffer. */ +static inline void mlxsw_i2c_set_slave_addr(u8 *buf, u32 off) +{ + __be32 *val = (__be32 *) buf; + + *val = htonl(off); +} + +/* Routine waits until go bit is cleared. */ +static int mlxsw_i2c_wait_go_bit(struct i2c_client *client, + struct mlxsw_i2c *mlxsw_i2c, u8 *p_status) +{ + u8 addr_buf[MLXSW_I2C_ADDR_BUF_SIZE]; + u8 buf[MLXSW_I2C_READ_SEMA_SIZE]; + int len = MLXSW_I2C_READ_SEMA_SIZE; + struct i2c_msg read_sema[] = + MLXSW_I2C_READ_MSG(client, addr_buf, buf, len); + bool wait_done = false; + unsigned long end; + int i = 0, err; + + mlxsw_i2c_set_slave_addr(addr_buf, MLXSW_I2C_CIR2_OFF_STATUS); + + end = jiffies + msecs_to_jiffies(MLXSW_I2C_TIMEOUT_MSECS); + do { + u32 ctrl; + + err = i2c_transfer(client->adapter, read_sema, + ARRAY_SIZE(read_sema)); + + ctrl = be32_to_cpu(*(__be32 *) buf); + if (err == ARRAY_SIZE(read_sema)) { + if (!(ctrl & MLXSW_I2C_GO_BIT)) { + wait_done = true; + *p_status = ctrl >> + MLXSW_I2C_CIR_CTRL_STATUS_SHIFT; + break; + } + } + cond_resched(); + } while ((time_before(jiffies, end)) || (i++ < MLXSW_I2C_RETRY)); + + if (wait_done) { + if (*p_status) + err = -EIO; + } else { + return -ETIMEDOUT; + } + + return err > 0 ? 0 : err; +} + +/* Routine posts a command to ASIC through mail box. */ +static int mlxsw_i2c_write_cmd(struct i2c_client *client, + struct mlxsw_i2c *mlxsw_i2c, + int immediate) +{ + __be32 push_cmd_buf[MLXSW_I2C_PUSH_CMD_SIZE / 4] = { + 0, cpu_to_be32(MLXSW_I2C_PUSH_IMM_CMD) + }; + __be32 prep_cmd_buf[MLXSW_I2C_PREP_SIZE / 4] = { + 0, 0, 0, 0, 0, 0, + cpu_to_be32(client->adapter->nr & 0xffff), + cpu_to_be32(MLXSW_I2C_SET_IMM_CMD) + }; + struct i2c_msg push_cmd = + MLXSW_I2C_WRITE_MSG(client, push_cmd_buf, + MLXSW_I2C_PUSH_CMD_SIZE); + struct i2c_msg prep_cmd = + MLXSW_I2C_WRITE_MSG(client, prep_cmd_buf, MLXSW_I2C_PREP_SIZE); + int err; + + if (!immediate) { + push_cmd_buf[1] = cpu_to_be32(MLXSW_I2C_PUSH_CMD); + prep_cmd_buf[7] = cpu_to_be32(MLXSW_I2C_SET_CMD); + } + mlxsw_i2c_set_slave_addr((u8 *)prep_cmd_buf, + MLXSW_I2C_CIR2_BASE); + mlxsw_i2c_set_slave_addr((u8 *)push_cmd_buf, + MLXSW_I2C_CIR2_OFF_STATUS); + + /* Prepare Command Interface Register for transaction */ + err = i2c_transfer(client->adapter, &prep_cmd, 1); + if (err < 0) + return err; + else if (err != 1) + return -EIO; + + /* Write out Command Interface Register GO bit to push transaction */ + err = i2c_transfer(client->adapter, &push_cmd, 1); + if (err < 0) + return err; + else if (err != 1) + return -EIO; + + return 0; +} + +/* Routine posts initialization command to ASIC through mail box. */ +static int +mlxsw_i2c_write_init_cmd(struct i2c_client *client, + struct mlxsw_i2c *mlxsw_i2c, u16 opcode, u32 in_mod) +{ + __be32 push_cmd_buf[MLXSW_I2C_PUSH_CMD_SIZE / 4] = { + 0, cpu_to_be32(MLXSW_I2C_PUSH_EVENT_CMD) + }; + __be32 prep_cmd_buf[MLXSW_I2C_PREP_SIZE / 4] = { + 0, 0, 0, 0, 0, 0, + cpu_to_be32(client->adapter->nr & 0xffff), + cpu_to_be32(MLXSW_I2C_SET_EVENT_CMD) + }; + struct i2c_msg push_cmd = + MLXSW_I2C_WRITE_MSG(client, push_cmd_buf, + MLXSW_I2C_PUSH_CMD_SIZE); + struct i2c_msg prep_cmd = + MLXSW_I2C_WRITE_MSG(client, prep_cmd_buf, MLXSW_I2C_PREP_SIZE); + u8 status; + int err; + + push_cmd_buf[1] = cpu_to_be32(MLXSW_I2C_PUSH_EVENT_CMD | opcode); + prep_cmd_buf[3] = cpu_to_be32(in_mod); + prep_cmd_buf[7] = cpu_to_be32(MLXSW_I2C_GO_BIT | opcode); + mlxsw_i2c_set_slave_addr((u8 *)prep_cmd_buf, + MLXSW_I2C_CIR2_BASE); + mlxsw_i2c_set_slave_addr((u8 *)push_cmd_buf, + MLXSW_I2C_CIR2_OFF_STATUS); + + /* Prepare Command Interface Register for transaction */ + err = i2c_transfer(client->adapter, &prep_cmd, 1); + if (err < 0) + return err; + else if (err != 1) + return -EIO; + + /* Write out Command Interface Register GO bit to push transaction */ + err = i2c_transfer(client->adapter, &push_cmd, 1); + if (err < 0) + return err; + else if (err != 1) + return -EIO; + + /* Wait until go bit is cleared. */ + err = mlxsw_i2c_wait_go_bit(client, mlxsw_i2c, &status); + if (err) { + dev_err(&client->dev, "HW semaphore is not released"); + return err; + } + + /* Validate transaction completion status. */ + if (status) { + dev_err(&client->dev, "Bad transaction completion status %x\n", + status); + return -EIO; + } + + return 0; +} + +/* Routine obtains mail box offsets from ASIC register space. */ +static int mlxsw_i2c_get_mbox(struct i2c_client *client, + struct mlxsw_i2c *mlxsw_i2c) +{ + u8 addr_buf[MLXSW_I2C_ADDR_BUF_SIZE]; + u8 buf[MLXSW_I2C_MBOX_SIZE]; + struct i2c_msg mbox_cmd[] = + MLXSW_I2C_READ_MSG(client, addr_buf, buf, MLXSW_I2C_MBOX_SIZE); + int err; + + /* Read mail boxes offsets. */ + mlxsw_i2c_set_slave_addr(addr_buf, MLXSW_I2C_CIR2_BASE); + err = i2c_transfer(client->adapter, mbox_cmd, 2); + if (err != 2) { + dev_err(&client->dev, "Could not obtain mail boxes\n"); + if (!err) + return -EIO; + else + return err; + } + + /* Convert mail boxes. */ + mlxsw_i2c_convert_mbox(mlxsw_i2c, &buf[MLXSW_I2C_MBOX_OUT_PARAM_OFF]); + + return err; +} + +/* Routine sends I2C write transaction to ASIC device. */ +static int +mlxsw_i2c_write(struct device *dev, size_t in_mbox_size, u8 *in_mbox, int num, + u8 *p_status) +{ + struct i2c_client *client = to_i2c_client(dev); + struct mlxsw_i2c *mlxsw_i2c = i2c_get_clientdata(client); + unsigned long timeout = msecs_to_jiffies(MLXSW_I2C_TIMEOUT_MSECS); + int off = mlxsw_i2c->cmd.mb_off_in, chunk_size, i, j; + unsigned long end; + u8 *tran_buf; + struct i2c_msg write_tran = + MLXSW_I2C_WRITE_MSG(client, NULL, MLXSW_I2C_PUSH_CMD_SIZE); + int err; + + tran_buf = kmalloc(mlxsw_i2c->block_size + MLXSW_I2C_ADDR_BUF_SIZE, + GFP_KERNEL); + if (!tran_buf) + return -ENOMEM; + + write_tran.buf = tran_buf; + for (i = 0; i < num; i++) { + chunk_size = (in_mbox_size > mlxsw_i2c->block_size) ? + mlxsw_i2c->block_size : in_mbox_size; + write_tran.len = MLXSW_I2C_ADDR_WIDTH + chunk_size; + mlxsw_i2c_set_slave_addr(tran_buf, off); + memcpy(&tran_buf[MLXSW_I2C_ADDR_BUF_SIZE], in_mbox + + mlxsw_i2c->block_size * i, chunk_size); + + j = 0; + end = jiffies + timeout; + do { + err = i2c_transfer(client->adapter, &write_tran, 1); + if (err == 1) + break; + + cond_resched(); + } while ((time_before(jiffies, end)) || + (j++ < MLXSW_I2C_RETRY)); + + if (err != 1) { + if (!err) { + err = -EIO; + goto mlxsw_i2c_write_exit; + } + } + + off += chunk_size; + in_mbox_size -= chunk_size; + } + + /* Prepare and write out Command Interface Register for transaction. */ + err = mlxsw_i2c_write_cmd(client, mlxsw_i2c, 0); + if (err) { + dev_err(&client->dev, "Could not start transaction"); + err = -EIO; + goto mlxsw_i2c_write_exit; + } + + /* Wait until go bit is cleared. */ + err = mlxsw_i2c_wait_go_bit(client, mlxsw_i2c, p_status); + if (err) { + dev_err(&client->dev, "HW semaphore is not released"); + goto mlxsw_i2c_write_exit; + } + + /* Validate transaction completion status. */ + if (*p_status) { + dev_err(&client->dev, "Bad transaction completion status %x\n", + *p_status); + err = -EIO; + } + +mlxsw_i2c_write_exit: + kfree(tran_buf); + return err; +} + +/* Routine executes I2C command. */ +static int +mlxsw_i2c_cmd(struct device *dev, u16 opcode, u32 in_mod, size_t in_mbox_size, + u8 *in_mbox, size_t out_mbox_size, u8 *out_mbox, u8 *status) +{ + struct i2c_client *client = to_i2c_client(dev); + struct mlxsw_i2c *mlxsw_i2c = i2c_get_clientdata(client); + unsigned long timeout = msecs_to_jiffies(MLXSW_I2C_TIMEOUT_MSECS); + u8 tran_buf[MLXSW_I2C_ADDR_BUF_SIZE]; + int num, chunk_size, reg_size, i, j; + int off = mlxsw_i2c->cmd.mb_off_out; + unsigned long end; + struct i2c_msg read_tran[] = + MLXSW_I2C_READ_MSG(client, tran_buf, NULL, 0); + int err; + + WARN_ON(in_mbox_size % sizeof(u32) || out_mbox_size % sizeof(u32)); + + if (in_mbox) { + reg_size = mlxsw_i2c_get_reg_size(in_mbox); + num = reg_size / mlxsw_i2c->block_size; + if (reg_size % mlxsw_i2c->block_size) + num++; + + if (mutex_lock_interruptible(&mlxsw_i2c->cmd.lock) < 0) { + dev_err(&client->dev, "Could not acquire lock"); + return -EINVAL; + } + + err = mlxsw_i2c_write(dev, reg_size, in_mbox, num, status); + if (err) + goto cmd_fail; + + /* No out mailbox is case of write transaction. */ + if (!out_mbox) { + mutex_unlock(&mlxsw_i2c->cmd.lock); + return 0; + } + } else { + /* No input mailbox is case of initialization query command. */ + reg_size = MLXSW_I2C_MAX_DATA_SIZE; + num = DIV_ROUND_UP(reg_size, mlxsw_i2c->block_size); + + if (mutex_lock_interruptible(&mlxsw_i2c->cmd.lock) < 0) { + dev_err(&client->dev, "Could not acquire lock"); + return -EINVAL; + } + + err = mlxsw_i2c_write_init_cmd(client, mlxsw_i2c, opcode, + in_mod); + if (err) + goto cmd_fail; + } + + /* Send read transaction to get output mailbox content. */ + read_tran[1].buf = out_mbox; + for (i = 0; i < num; i++) { + chunk_size = (reg_size > mlxsw_i2c->block_size) ? + mlxsw_i2c->block_size : reg_size; + read_tran[1].len = chunk_size; + mlxsw_i2c_set_slave_addr(tran_buf, off); + + j = 0; + end = jiffies + timeout; + do { + err = i2c_transfer(client->adapter, read_tran, + ARRAY_SIZE(read_tran)); + if (err == ARRAY_SIZE(read_tran)) + break; + + cond_resched(); + } while ((time_before(jiffies, end)) || + (j++ < MLXSW_I2C_RETRY)); + + if (err != ARRAY_SIZE(read_tran)) { + if (!err) + err = -EIO; + + goto cmd_fail; + } + + off += chunk_size; + reg_size -= chunk_size; + read_tran[1].buf += chunk_size; + } + + mutex_unlock(&mlxsw_i2c->cmd.lock); + + return 0; + +cmd_fail: + mutex_unlock(&mlxsw_i2c->cmd.lock); + return err; +} + +static int mlxsw_i2c_cmd_exec(void *bus_priv, u16 opcode, u8 opcode_mod, + u32 in_mod, bool out_mbox_direct, + char *in_mbox, size_t in_mbox_size, + char *out_mbox, size_t out_mbox_size, + u8 *status) +{ + struct mlxsw_i2c *mlxsw_i2c = bus_priv; + + return mlxsw_i2c_cmd(mlxsw_i2c->dev, opcode, in_mod, in_mbox_size, + in_mbox, out_mbox_size, out_mbox, status); +} + +static bool mlxsw_i2c_skb_transmit_busy(void *bus_priv, + const struct mlxsw_tx_info *tx_info) +{ + return false; +} + +static int mlxsw_i2c_skb_transmit(void *bus_priv, struct sk_buff *skb, + const struct mlxsw_tx_info *tx_info) +{ + return 0; +} + +static int +mlxsw_i2c_init(void *bus_priv, struct mlxsw_core *mlxsw_core, + const struct mlxsw_config_profile *profile, + struct mlxsw_res *res) +{ + struct mlxsw_i2c *mlxsw_i2c = bus_priv; + char *mbox; + int err; + + mlxsw_i2c->core = mlxsw_core; + + mbox = mlxsw_cmd_mbox_alloc(); + if (!mbox) + return -ENOMEM; + + err = mlxsw_cmd_query_fw(mlxsw_core, mbox); + if (err) + goto mbox_put; + + mlxsw_i2c->bus_info.fw_rev.major = + mlxsw_cmd_mbox_query_fw_fw_rev_major_get(mbox); + mlxsw_i2c->bus_info.fw_rev.minor = + mlxsw_cmd_mbox_query_fw_fw_rev_minor_get(mbox); + mlxsw_i2c->bus_info.fw_rev.subminor = + mlxsw_cmd_mbox_query_fw_fw_rev_subminor_get(mbox); + + err = mlxsw_core_resources_query(mlxsw_core, mbox, res); + +mbox_put: + mlxsw_cmd_mbox_free(mbox); + return err; +} + +static void mlxsw_i2c_fini(void *bus_priv) +{ + struct mlxsw_i2c *mlxsw_i2c = bus_priv; + + mlxsw_i2c->core = NULL; +} + +static void mlxsw_i2c_work_handler(struct work_struct *work) +{ + struct mlxsw_i2c *mlxsw_i2c; + + mlxsw_i2c = container_of(work, struct mlxsw_i2c, irq_work); + mlxsw_core_irq_event_handlers_call(mlxsw_i2c->core); +} + +static irqreturn_t mlxsw_i2c_irq_handler(int irq, void *dev) +{ + struct mlxsw_i2c *mlxsw_i2c = dev; + + mlxsw_core_schedule_work(&mlxsw_i2c->irq_work); + + /* Interrupt handler shares IRQ line with 'main' interrupt handler. + * Return here IRQ_NONE, while main handler will return IRQ_HANDLED. + */ + return IRQ_NONE; +} + +static int mlxsw_i2c_irq_init(struct mlxsw_i2c *mlxsw_i2c, u8 addr) +{ + int err; + + /* Initialize interrupt handler if system hotplug driver is reachable, + * otherwise interrupt line is not enabled and interrupts will not be + * raised to CPU. Also request_irq() call will be not valid. + */ + if (!IS_REACHABLE(CONFIG_MLXREG_HOTPLUG)) + return 0; + + /* Set default interrupt line. */ + if (mlxsw_i2c->pdata && mlxsw_i2c->pdata->irq) + mlxsw_i2c->irq = mlxsw_i2c->pdata->irq; + else if (addr == MLXSW_FAST_I2C_SLAVE) + mlxsw_i2c->irq = MLXSW_I2C_DEFAULT_IRQ; + + if (!mlxsw_i2c->irq) + return 0; + + INIT_WORK(&mlxsw_i2c->irq_work, mlxsw_i2c_work_handler); + err = request_irq(mlxsw_i2c->irq, mlxsw_i2c_irq_handler, + IRQF_TRIGGER_FALLING | IRQF_SHARED, "mlxsw-i2c", + mlxsw_i2c); + if (err) { + dev_err(mlxsw_i2c->bus_info.dev, "Failed to request irq: %d\n", + err); + return err; + } + + return 0; +} + +static void mlxsw_i2c_irq_fini(struct mlxsw_i2c *mlxsw_i2c) +{ + if (!IS_REACHABLE(CONFIG_MLXREG_HOTPLUG) || !mlxsw_i2c->irq) + return; + cancel_work_sync(&mlxsw_i2c->irq_work); + free_irq(mlxsw_i2c->irq, mlxsw_i2c); +} + +static const struct mlxsw_bus mlxsw_i2c_bus = { + .kind = "i2c", + .init = mlxsw_i2c_init, + .fini = mlxsw_i2c_fini, + .skb_transmit_busy = mlxsw_i2c_skb_transmit_busy, + .skb_transmit = mlxsw_i2c_skb_transmit, + .cmd_exec = mlxsw_i2c_cmd_exec, +}; + +static int mlxsw_i2c_probe(struct i2c_client *client, + const struct i2c_device_id *id) +{ + const struct i2c_adapter_quirks *quirks = client->adapter->quirks; + struct mlxsw_i2c *mlxsw_i2c; + u8 status; + int err; + + mlxsw_i2c = devm_kzalloc(&client->dev, sizeof(*mlxsw_i2c), GFP_KERNEL); + if (!mlxsw_i2c) + return -ENOMEM; + + if (quirks) { + if ((quirks->max_read_len && + quirks->max_read_len < MLXSW_I2C_BLK_DEF) || + (quirks->max_write_len && + quirks->max_write_len < MLXSW_I2C_BLK_DEF)) { + dev_err(&client->dev, "Insufficient transaction buffer length\n"); + return -EOPNOTSUPP; + } + + mlxsw_i2c->block_size = min_t(u16, MLXSW_I2C_BLK_MAX, + min_t(u16, quirks->max_read_len, + quirks->max_write_len)); + } else { + mlxsw_i2c->block_size = MLXSW_I2C_BLK_DEF; + } + + i2c_set_clientdata(client, mlxsw_i2c); + mutex_init(&mlxsw_i2c->cmd.lock); + + /* In order to use mailboxes through the i2c, special area is reserved + * on the i2c address space that can be used for input and output + * mailboxes. Such mailboxes are called local mailboxes. When using a + * local mailbox, software should specify 0 as the Input/Output + * parameters. The location of the Local Mailbox addresses on the i2c + * space can be retrieved through the QUERY_FW command. + * For this purpose QUERY_FW is to be issued with opcode modifier equal + * 0x01. For such command the output parameter is an immediate value. + * Here QUERY_FW command is invoked for ASIC probing and for getting + * local mailboxes addresses from immedate output parameters. + */ + + /* Prepare and write out Command Interface Register for transaction */ + err = mlxsw_i2c_write_cmd(client, mlxsw_i2c, 1); + if (err) { + dev_err(&client->dev, "Could not start transaction"); + goto errout; + } + + /* Wait until go bit is cleared. */ + err = mlxsw_i2c_wait_go_bit(client, mlxsw_i2c, &status); + if (err) { + dev_err(&client->dev, "HW semaphore is not released"); + goto errout; + } + + /* Validate transaction completion status. */ + if (status) { + dev_err(&client->dev, "Bad transaction completion status %x\n", + status); + err = -EIO; + goto errout; + } + + /* Get mailbox offsets. */ + err = mlxsw_i2c_get_mbox(client, mlxsw_i2c); + if (err < 0) { + dev_err(&client->dev, "Fail to get mailboxes\n"); + goto errout; + } + + dev_info(&client->dev, "%s mb size=%x off=0x%08x out mb size=%x off=0x%08x\n", + id->name, mlxsw_i2c->cmd.mb_size_in, + mlxsw_i2c->cmd.mb_off_in, mlxsw_i2c->cmd.mb_size_out, + mlxsw_i2c->cmd.mb_off_out); + + /* Register device bus. */ + mlxsw_i2c->bus_info.device_kind = id->name; + mlxsw_i2c->bus_info.device_name = client->name; + mlxsw_i2c->bus_info.dev = &client->dev; + mlxsw_i2c->bus_info.low_frequency = true; + mlxsw_i2c->dev = &client->dev; + mlxsw_i2c->pdata = client->dev.platform_data; + + err = mlxsw_i2c_irq_init(mlxsw_i2c, client->addr); + if (err) + goto errout; + + err = mlxsw_core_bus_device_register(&mlxsw_i2c->bus_info, + &mlxsw_i2c_bus, mlxsw_i2c, false, + NULL, NULL); + if (err) { + dev_err(&client->dev, "Fail to register core bus\n"); + goto err_bus_device_register; + } + + return 0; + +err_bus_device_register: + mlxsw_i2c_irq_fini(mlxsw_i2c); +errout: + mutex_destroy(&mlxsw_i2c->cmd.lock); + i2c_set_clientdata(client, NULL); + + return err; +} + +static void mlxsw_i2c_remove(struct i2c_client *client) +{ + struct mlxsw_i2c *mlxsw_i2c = i2c_get_clientdata(client); + + mlxsw_core_bus_device_unregister(mlxsw_i2c->core, false); + mlxsw_i2c_irq_fini(mlxsw_i2c); + mutex_destroy(&mlxsw_i2c->cmd.lock); +} + +int mlxsw_i2c_driver_register(struct i2c_driver *i2c_driver) +{ + i2c_driver->probe = mlxsw_i2c_probe; + i2c_driver->remove = mlxsw_i2c_remove; + return i2c_add_driver(i2c_driver); +} +EXPORT_SYMBOL(mlxsw_i2c_driver_register); + +void mlxsw_i2c_driver_unregister(struct i2c_driver *i2c_driver) +{ + i2c_del_driver(i2c_driver); +} +EXPORT_SYMBOL(mlxsw_i2c_driver_unregister); + +MODULE_AUTHOR("Vadim Pasternak <vadimp@mellanox.com>"); +MODULE_DESCRIPTION("Mellanox switch I2C interface driver"); +MODULE_LICENSE("Dual BSD/GPL"); diff --git a/drivers/net/ethernet/mellanox/mlxsw/i2c.h b/drivers/net/ethernet/mellanox/mlxsw/i2c.h new file mode 100644 index 000000000..17e059d47 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/i2c.h @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 */ +/* Copyright (c) 2016-2018 Mellanox Technologies. All rights reserved */ + +#ifndef _MLXSW_I2C_H +#define _MLXSW_I2C_H + +#include <linux/i2c.h> + +#if IS_ENABLED(CONFIG_MLXSW_I2C) + +int mlxsw_i2c_driver_register(struct i2c_driver *i2c_driver); +void mlxsw_i2c_driver_unregister(struct i2c_driver *i2c_driver); + +#else + +static inline int +mlxsw_i2c_driver_register(struct i2c_driver *i2c_driver) +{ + return -ENODEV; +} + +static inline void +mlxsw_i2c_driver_unregister(struct i2c_driver *i2c_driver) +{ +} + +#endif + +#endif diff --git a/drivers/net/ethernet/mellanox/mlxsw/item.h b/drivers/net/ethernet/mellanox/mlxsw/item.h new file mode 100644 index 000000000..cfafbeb42 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/item.h @@ -0,0 +1,553 @@ +/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 */ +/* Copyright (c) 2015-2018 Mellanox Technologies. All rights reserved */ + +#ifndef _MLXSW_ITEM_H +#define _MLXSW_ITEM_H + +#include <linux/types.h> +#include <linux/string.h> +#include <linux/bitops.h> + +struct mlxsw_item { + unsigned short offset; /* bytes in container */ + short step; /* step in bytes for indexed items */ + unsigned short in_step_offset; /* offset within one step */ + unsigned char shift; /* shift in bits */ + unsigned char element_size; /* size of element in bit array */ + bool no_real_shift; + union { + unsigned char bits; + unsigned short bytes; + } size; + const char *name; +}; + +static inline unsigned int +__mlxsw_item_offset(const struct mlxsw_item *item, unsigned short index, + size_t typesize) +{ + BUG_ON(index && !item->step); + if (item->offset % typesize != 0 || + item->step % typesize != 0 || + item->in_step_offset % typesize != 0) { + pr_err("mlxsw: item bug (name=%s,offset=%x,step=%x,in_step_offset=%x,typesize=%zx)\n", + item->name, item->offset, item->step, + item->in_step_offset, typesize); + BUG(); + } + + return ((item->offset + item->step * index + item->in_step_offset) / + typesize); +} + +static inline u8 __mlxsw_item_get8(const char *buf, + const struct mlxsw_item *item, + unsigned short index) +{ + unsigned int offset = __mlxsw_item_offset(item, index, sizeof(u8)); + u8 *b = (u8 *) buf; + u8 tmp; + + tmp = b[offset]; + tmp >>= item->shift; + tmp &= GENMASK(item->size.bits - 1, 0); + if (item->no_real_shift) + tmp <<= item->shift; + return tmp; +} + +static inline void __mlxsw_item_set8(char *buf, const struct mlxsw_item *item, + unsigned short index, u8 val) +{ + unsigned int offset = __mlxsw_item_offset(item, index, + sizeof(u8)); + u8 *b = (u8 *) buf; + u8 mask = GENMASK(item->size.bits - 1, 0) << item->shift; + u8 tmp; + + if (!item->no_real_shift) + val <<= item->shift; + val &= mask; + tmp = b[offset]; + tmp &= ~mask; + tmp |= val; + b[offset] = tmp; +} + +static inline u16 __mlxsw_item_get16(const char *buf, + const struct mlxsw_item *item, + unsigned short index) +{ + unsigned int offset = __mlxsw_item_offset(item, index, sizeof(u16)); + __be16 *b = (__be16 *) buf; + u16 tmp; + + tmp = be16_to_cpu(b[offset]); + tmp >>= item->shift; + tmp &= GENMASK(item->size.bits - 1, 0); + if (item->no_real_shift) + tmp <<= item->shift; + return tmp; +} + +static inline void __mlxsw_item_set16(char *buf, const struct mlxsw_item *item, + unsigned short index, u16 val) +{ + unsigned int offset = __mlxsw_item_offset(item, index, + sizeof(u16)); + __be16 *b = (__be16 *) buf; + u16 mask = GENMASK(item->size.bits - 1, 0) << item->shift; + u16 tmp; + + if (!item->no_real_shift) + val <<= item->shift; + val &= mask; + tmp = be16_to_cpu(b[offset]); + tmp &= ~mask; + tmp |= val; + b[offset] = cpu_to_be16(tmp); +} + +static inline u32 __mlxsw_item_get32(const char *buf, + const struct mlxsw_item *item, + unsigned short index) +{ + unsigned int offset = __mlxsw_item_offset(item, index, sizeof(u32)); + __be32 *b = (__be32 *) buf; + u32 tmp; + + tmp = be32_to_cpu(b[offset]); + tmp >>= item->shift; + tmp &= GENMASK(item->size.bits - 1, 0); + if (item->no_real_shift) + tmp <<= item->shift; + return tmp; +} + +static inline void __mlxsw_item_set32(char *buf, const struct mlxsw_item *item, + unsigned short index, u32 val) +{ + unsigned int offset = __mlxsw_item_offset(item, index, + sizeof(u32)); + __be32 *b = (__be32 *) buf; + u32 mask = GENMASK(item->size.bits - 1, 0) << item->shift; + u32 tmp; + + if (!item->no_real_shift) + val <<= item->shift; + val &= mask; + tmp = be32_to_cpu(b[offset]); + tmp &= ~mask; + tmp |= val; + b[offset] = cpu_to_be32(tmp); +} + +static inline u64 __mlxsw_item_get64(const char *buf, + const struct mlxsw_item *item, + unsigned short index) +{ + unsigned int offset = __mlxsw_item_offset(item, index, sizeof(u64)); + __be64 *b = (__be64 *) buf; + u64 tmp; + + tmp = be64_to_cpu(b[offset]); + tmp >>= item->shift; + tmp &= GENMASK_ULL(item->size.bits - 1, 0); + if (item->no_real_shift) + tmp <<= item->shift; + return tmp; +} + +static inline void __mlxsw_item_set64(char *buf, const struct mlxsw_item *item, + unsigned short index, u64 val) +{ + unsigned int offset = __mlxsw_item_offset(item, index, sizeof(u64)); + __be64 *b = (__be64 *) buf; + u64 mask = GENMASK_ULL(item->size.bits - 1, 0) << item->shift; + u64 tmp; + + if (!item->no_real_shift) + val <<= item->shift; + val &= mask; + tmp = be64_to_cpu(b[offset]); + tmp &= ~mask; + tmp |= val; + b[offset] = cpu_to_be64(tmp); +} + +static inline void __mlxsw_item_memcpy_from(const char *buf, char *dst, + const struct mlxsw_item *item, + unsigned short index) +{ + unsigned int offset = __mlxsw_item_offset(item, index, sizeof(char)); + + memcpy(dst, &buf[offset], item->size.bytes); +} + +static inline void __mlxsw_item_memcpy_to(char *buf, const char *src, + const struct mlxsw_item *item, + unsigned short index) +{ + unsigned int offset = __mlxsw_item_offset(item, index, sizeof(char)); + + memcpy(&buf[offset], src, item->size.bytes); +} + +static inline char *__mlxsw_item_data(char *buf, const struct mlxsw_item *item, + unsigned short index) +{ + unsigned int offset = __mlxsw_item_offset(item, index, sizeof(char)); + + return &buf[offset]; +} + +static inline u16 +__mlxsw_item_bit_array_offset(const struct mlxsw_item *item, + u16 index, u8 *shift) +{ + u16 max_index, be_index; + u16 offset; /* byte offset inside the array */ + u8 in_byte_index; + + BUG_ON(index && !item->element_size); + if (item->offset % sizeof(u32) != 0 || + BITS_PER_BYTE % item->element_size != 0) { + pr_err("mlxsw: item bug (name=%s,offset=%x,element_size=%x)\n", + item->name, item->offset, item->element_size); + BUG(); + } + + max_index = (item->size.bytes << 3) / item->element_size - 1; + be_index = max_index - index; + offset = be_index * item->element_size >> 3; + in_byte_index = index % (BITS_PER_BYTE / item->element_size); + *shift = in_byte_index * item->element_size; + + return item->offset + offset; +} + +static inline u8 __mlxsw_item_bit_array_get(const char *buf, + const struct mlxsw_item *item, + u16 index) +{ + u8 shift, tmp; + u16 offset = __mlxsw_item_bit_array_offset(item, index, &shift); + + tmp = buf[offset]; + tmp >>= shift; + tmp &= GENMASK(item->element_size - 1, 0); + return tmp; +} + +static inline void __mlxsw_item_bit_array_set(char *buf, + const struct mlxsw_item *item, + u16 index, u8 val) +{ + u8 shift, tmp; + u16 offset = __mlxsw_item_bit_array_offset(item, index, &shift); + u8 mask = GENMASK(item->element_size - 1, 0) << shift; + + val <<= shift; + val &= mask; + tmp = buf[offset]; + tmp &= ~mask; + tmp |= val; + buf[offset] = tmp; +} + +#define __ITEM_NAME(_type, _cname, _iname) \ + mlxsw_##_type##_##_cname##_##_iname##_item + +/* _type: cmd_mbox, reg, etc. + * _cname: containter name (e.g. command name, register name) + * _iname: item name within the container + */ + +#define MLXSW_ITEM8(_type, _cname, _iname, _offset, _shift, _sizebits) \ +static struct mlxsw_item __ITEM_NAME(_type, _cname, _iname) = { \ + .offset = _offset, \ + .shift = _shift, \ + .size = {.bits = _sizebits,}, \ + .name = #_type "_" #_cname "_" #_iname, \ +}; \ +static inline u8 __maybe_unused \ +mlxsw_##_type##_##_cname##_##_iname##_get(const char *buf) \ +{ \ + return __mlxsw_item_get8(buf, &__ITEM_NAME(_type, _cname, _iname), 0); \ +} \ +static inline void __maybe_unused \ +mlxsw_##_type##_##_cname##_##_iname##_set(char *buf, u8 val) \ +{ \ + __mlxsw_item_set8(buf, &__ITEM_NAME(_type, _cname, _iname), 0, val); \ +} + +#define MLXSW_ITEM8_INDEXED(_type, _cname, _iname, _offset, _shift, _sizebits, \ + _step, _instepoffset, _norealshift) \ +static struct mlxsw_item __ITEM_NAME(_type, _cname, _iname) = { \ + .offset = _offset, \ + .step = _step, \ + .in_step_offset = _instepoffset, \ + .shift = _shift, \ + .no_real_shift = _norealshift, \ + .size = {.bits = _sizebits,}, \ + .name = #_type "_" #_cname "_" #_iname, \ +}; \ +static inline u8 __maybe_unused \ +mlxsw_##_type##_##_cname##_##_iname##_get(const char *buf, unsigned short index)\ +{ \ + return __mlxsw_item_get8(buf, &__ITEM_NAME(_type, _cname, _iname), \ + index); \ +} \ +static inline void __maybe_unused \ +mlxsw_##_type##_##_cname##_##_iname##_set(char *buf, unsigned short index, \ + u8 val) \ +{ \ + __mlxsw_item_set8(buf, &__ITEM_NAME(_type, _cname, _iname), \ + index, val); \ +} + +#define MLXSW_ITEM16(_type, _cname, _iname, _offset, _shift, _sizebits) \ +static struct mlxsw_item __ITEM_NAME(_type, _cname, _iname) = { \ + .offset = _offset, \ + .shift = _shift, \ + .size = {.bits = _sizebits,}, \ + .name = #_type "_" #_cname "_" #_iname, \ +}; \ +static inline u16 __maybe_unused \ +mlxsw_##_type##_##_cname##_##_iname##_get(const char *buf) \ +{ \ + return __mlxsw_item_get16(buf, &__ITEM_NAME(_type, _cname, _iname), 0); \ +} \ +static inline void __maybe_unused \ +mlxsw_##_type##_##_cname##_##_iname##_set(char *buf, u16 val) \ +{ \ + __mlxsw_item_set16(buf, &__ITEM_NAME(_type, _cname, _iname), 0, val); \ +} + +#define MLXSW_ITEM16_INDEXED(_type, _cname, _iname, _offset, _shift, _sizebits, \ + _step, _instepoffset, _norealshift) \ +static struct mlxsw_item __ITEM_NAME(_type, _cname, _iname) = { \ + .offset = _offset, \ + .step = _step, \ + .in_step_offset = _instepoffset, \ + .shift = _shift, \ + .no_real_shift = _norealshift, \ + .size = {.bits = _sizebits,}, \ + .name = #_type "_" #_cname "_" #_iname, \ +}; \ +static inline u16 __maybe_unused \ +mlxsw_##_type##_##_cname##_##_iname##_get(const char *buf, unsigned short index)\ +{ \ + return __mlxsw_item_get16(buf, &__ITEM_NAME(_type, _cname, _iname), \ + index); \ +} \ +static inline void __maybe_unused \ +mlxsw_##_type##_##_cname##_##_iname##_set(char *buf, unsigned short index, \ + u16 val) \ +{ \ + __mlxsw_item_set16(buf, &__ITEM_NAME(_type, _cname, _iname), \ + index, val); \ +} + +#define MLXSW_ITEM32(_type, _cname, _iname, _offset, _shift, _sizebits) \ +static struct mlxsw_item __ITEM_NAME(_type, _cname, _iname) = { \ + .offset = _offset, \ + .shift = _shift, \ + .size = {.bits = _sizebits,}, \ + .name = #_type "_" #_cname "_" #_iname, \ +}; \ +static inline u32 __maybe_unused \ +mlxsw_##_type##_##_cname##_##_iname##_get(const char *buf) \ +{ \ + return __mlxsw_item_get32(buf, &__ITEM_NAME(_type, _cname, _iname), 0); \ +} \ +static inline void __maybe_unused \ +mlxsw_##_type##_##_cname##_##_iname##_set(char *buf, u32 val) \ +{ \ + __mlxsw_item_set32(buf, &__ITEM_NAME(_type, _cname, _iname), 0, val); \ +} + +#define LOCAL_PORT_LSB_SIZE 8 +#define LOCAL_PORT_MSB_SIZE 2 + +#define MLXSW_ITEM32_LP(_type, _cname, _offset1, _shift1, _offset2, _shift2) \ +static struct mlxsw_item __ITEM_NAME(_type, _cname, local_port) = { \ + .offset = _offset1, \ + .shift = _shift1, \ + .size = {.bits = LOCAL_PORT_LSB_SIZE,}, \ + .name = #_type "_" #_cname "_local_port", \ +}; \ +static struct mlxsw_item __ITEM_NAME(_type, _cname, lp_msb) = { \ + .offset = _offset2, \ + .shift = _shift2, \ + .size = {.bits = LOCAL_PORT_MSB_SIZE,}, \ + .name = #_type "_" #_cname "_lp_msb", \ +}; \ +static inline u32 __maybe_unused \ +mlxsw_##_type##_##_cname##_local_port_get(const char *buf) \ +{ \ + u32 local_port, lp_msb; \ + \ + local_port = __mlxsw_item_get32(buf, &__ITEM_NAME(_type, _cname, \ + local_port), 0); \ + lp_msb = __mlxsw_item_get32(buf, &__ITEM_NAME(_type, _cname, lp_msb), \ + 0); \ + return (lp_msb << LOCAL_PORT_LSB_SIZE) + local_port; \ +} \ +static inline void __maybe_unused \ +mlxsw_##_type##_##_cname##_local_port_set(char *buf, u32 val) \ +{ \ + __mlxsw_item_set32(buf, &__ITEM_NAME(_type, _cname, local_port), 0, \ + val & ((1 << LOCAL_PORT_LSB_SIZE) - 1)); \ + __mlxsw_item_set32(buf, &__ITEM_NAME(_type, _cname, lp_msb), 0, \ + val >> LOCAL_PORT_LSB_SIZE); \ +} + +#define MLXSW_ITEM32_INDEXED(_type, _cname, _iname, _offset, _shift, _sizebits, \ + _step, _instepoffset, _norealshift) \ +static struct mlxsw_item __ITEM_NAME(_type, _cname, _iname) = { \ + .offset = _offset, \ + .step = _step, \ + .in_step_offset = _instepoffset, \ + .shift = _shift, \ + .no_real_shift = _norealshift, \ + .size = {.bits = _sizebits,}, \ + .name = #_type "_" #_cname "_" #_iname, \ +}; \ +static inline u32 __maybe_unused \ +mlxsw_##_type##_##_cname##_##_iname##_get(const char *buf, unsigned short index)\ +{ \ + return __mlxsw_item_get32(buf, &__ITEM_NAME(_type, _cname, _iname), \ + index); \ +} \ +static inline void __maybe_unused \ +mlxsw_##_type##_##_cname##_##_iname##_set(char *buf, unsigned short index, \ + u32 val) \ +{ \ + __mlxsw_item_set32(buf, &__ITEM_NAME(_type, _cname, _iname), \ + index, val); \ +} + +#define MLXSW_ITEM64(_type, _cname, _iname, _offset, _shift, _sizebits) \ +static struct mlxsw_item __ITEM_NAME(_type, _cname, _iname) = { \ + .offset = _offset, \ + .shift = _shift, \ + .size = {.bits = _sizebits,}, \ + .name = #_type "_" #_cname "_" #_iname, \ +}; \ +static inline u64 __maybe_unused \ +mlxsw_##_type##_##_cname##_##_iname##_get(const char *buf) \ +{ \ + return __mlxsw_item_get64(buf, &__ITEM_NAME(_type, _cname, _iname), 0); \ +} \ +static inline void __maybe_unused \ +mlxsw_##_type##_##_cname##_##_iname##_set(char *buf, u64 val) \ +{ \ + __mlxsw_item_set64(buf, &__ITEM_NAME(_type, _cname, _iname), 0, val); \ +} + +#define MLXSW_ITEM64_INDEXED(_type, _cname, _iname, _offset, _shift, \ + _sizebits, _step, _instepoffset, _norealshift) \ +static struct mlxsw_item __ITEM_NAME(_type, _cname, _iname) = { \ + .offset = _offset, \ + .step = _step, \ + .in_step_offset = _instepoffset, \ + .shift = _shift, \ + .no_real_shift = _norealshift, \ + .size = {.bits = _sizebits,}, \ + .name = #_type "_" #_cname "_" #_iname, \ +}; \ +static inline u64 __maybe_unused \ +mlxsw_##_type##_##_cname##_##_iname##_get(const char *buf, unsigned short index)\ +{ \ + return __mlxsw_item_get64(buf, &__ITEM_NAME(_type, _cname, _iname), \ + index); \ +} \ +static inline void __maybe_unused \ +mlxsw_##_type##_##_cname##_##_iname##_set(char *buf, unsigned short index, \ + u64 val) \ +{ \ + __mlxsw_item_set64(buf, &__ITEM_NAME(_type, _cname, _iname), \ + index, val); \ +} + +#define MLXSW_ITEM_BUF(_type, _cname, _iname, _offset, _sizebytes) \ +static struct mlxsw_item __ITEM_NAME(_type, _cname, _iname) = { \ + .offset = _offset, \ + .size = {.bytes = _sizebytes,}, \ + .name = #_type "_" #_cname "_" #_iname, \ +}; \ +static inline void __maybe_unused \ +mlxsw_##_type##_##_cname##_##_iname##_memcpy_from(const char *buf, char *dst) \ +{ \ + __mlxsw_item_memcpy_from(buf, dst, \ + &__ITEM_NAME(_type, _cname, _iname), 0); \ +} \ +static inline void __maybe_unused \ +mlxsw_##_type##_##_cname##_##_iname##_memcpy_to(char *buf, const char *src) \ +{ \ + __mlxsw_item_memcpy_to(buf, src, \ + &__ITEM_NAME(_type, _cname, _iname), 0); \ +} \ +static inline char * __maybe_unused \ +mlxsw_##_type##_##_cname##_##_iname##_data(char *buf) \ +{ \ + return __mlxsw_item_data(buf, &__ITEM_NAME(_type, _cname, _iname), 0); \ +} + +#define MLXSW_ITEM_BUF_INDEXED(_type, _cname, _iname, _offset, _sizebytes, \ + _step, _instepoffset) \ +static struct mlxsw_item __ITEM_NAME(_type, _cname, _iname) = { \ + .offset = _offset, \ + .step = _step, \ + .in_step_offset = _instepoffset, \ + .size = {.bytes = _sizebytes,}, \ + .name = #_type "_" #_cname "_" #_iname, \ +}; \ +static inline void __maybe_unused \ +mlxsw_##_type##_##_cname##_##_iname##_memcpy_from(const char *buf, \ + unsigned short index, \ + char *dst) \ +{ \ + __mlxsw_item_memcpy_from(buf, dst, \ + &__ITEM_NAME(_type, _cname, _iname), index); \ +} \ +static inline void __maybe_unused \ +mlxsw_##_type##_##_cname##_##_iname##_memcpy_to(char *buf, \ + unsigned short index, \ + const char *src) \ +{ \ + __mlxsw_item_memcpy_to(buf, src, \ + &__ITEM_NAME(_type, _cname, _iname), index); \ +} \ +static inline char * __maybe_unused \ +mlxsw_##_type##_##_cname##_##_iname##_data(char *buf, unsigned short index) \ +{ \ + return __mlxsw_item_data(buf, \ + &__ITEM_NAME(_type, _cname, _iname), index); \ +} + +#define MLXSW_ITEM_BIT_ARRAY(_type, _cname, _iname, _offset, _sizebytes, \ + _element_size) \ +static struct mlxsw_item __ITEM_NAME(_type, _cname, _iname) = { \ + .offset = _offset, \ + .element_size = _element_size, \ + .size = {.bytes = _sizebytes,}, \ + .name = #_type "_" #_cname "_" #_iname, \ +}; \ +static inline u8 __maybe_unused \ +mlxsw_##_type##_##_cname##_##_iname##_get(const char *buf, u16 index) \ +{ \ + return __mlxsw_item_bit_array_get(buf, \ + &__ITEM_NAME(_type, _cname, _iname), \ + index); \ +} \ +static inline void __maybe_unused \ +mlxsw_##_type##_##_cname##_##_iname##_set(char *buf, u16 index, u8 val) \ +{ \ + return __mlxsw_item_bit_array_set(buf, \ + &__ITEM_NAME(_type, _cname, _iname), \ + index, val); \ +} \ + +#endif diff --git a/drivers/net/ethernet/mellanox/mlxsw/minimal.c b/drivers/net/ethernet/mellanox/mlxsw/minimal.c new file mode 100644 index 000000000..15116d930 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/minimal.c @@ -0,0 +1,760 @@ +// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 +/* Copyright (c) 2016-2019 Mellanox Technologies. All rights reserved */ + +#include <linux/netdevice.h> +#include <linux/etherdevice.h> +#include <linux/ethtool.h> +#include <linux/i2c.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/mod_devicetable.h> +#include <linux/types.h> + +#include "core.h" +#include "core_env.h" +#include "i2c.h" + +static const char mlxsw_m_driver_name[] = "mlxsw_minimal"; + +#define MLXSW_M_FWREV_MINOR 2000 +#define MLXSW_M_FWREV_SUBMINOR 1886 + +static const struct mlxsw_fw_rev mlxsw_m_fw_rev = { + .minor = MLXSW_M_FWREV_MINOR, + .subminor = MLXSW_M_FWREV_SUBMINOR, +}; + +struct mlxsw_m_port; + +struct mlxsw_m_line_card { + bool active; + int module_to_port[]; +}; + +struct mlxsw_m { + struct mlxsw_m_port **ports; + struct mlxsw_core *core; + const struct mlxsw_bus_info *bus_info; + u8 base_mac[ETH_ALEN]; + u8 max_ports; + u8 max_modules_per_slot; /* Maximum number of modules per-slot. */ + u8 num_of_slots; /* Including the main board. */ + struct mlxsw_m_line_card **line_cards; +}; + +struct mlxsw_m_port { + struct net_device *dev; + struct mlxsw_m *mlxsw_m; + u16 local_port; + u8 slot_index; + u8 module; + u8 module_offset; +}; + +static int mlxsw_m_base_mac_get(struct mlxsw_m *mlxsw_m) +{ + char spad_pl[MLXSW_REG_SPAD_LEN] = {0}; + int err; + + err = mlxsw_reg_query(mlxsw_m->core, MLXSW_REG(spad), spad_pl); + if (err) + return err; + mlxsw_reg_spad_base_mac_memcpy_from(spad_pl, mlxsw_m->base_mac); + return 0; +} + +static int mlxsw_m_port_open(struct net_device *dev) +{ + struct mlxsw_m_port *mlxsw_m_port = netdev_priv(dev); + struct mlxsw_m *mlxsw_m = mlxsw_m_port->mlxsw_m; + + return mlxsw_env_module_port_up(mlxsw_m->core, 0, + mlxsw_m_port->module); +} + +static int mlxsw_m_port_stop(struct net_device *dev) +{ + struct mlxsw_m_port *mlxsw_m_port = netdev_priv(dev); + struct mlxsw_m *mlxsw_m = mlxsw_m_port->mlxsw_m; + + mlxsw_env_module_port_down(mlxsw_m->core, 0, mlxsw_m_port->module); + return 0; +} + +static struct devlink_port * +mlxsw_m_port_get_devlink_port(struct net_device *dev) +{ + struct mlxsw_m_port *mlxsw_m_port = netdev_priv(dev); + struct mlxsw_m *mlxsw_m = mlxsw_m_port->mlxsw_m; + + return mlxsw_core_port_devlink_port_get(mlxsw_m->core, + mlxsw_m_port->local_port); +} + +static const struct net_device_ops mlxsw_m_port_netdev_ops = { + .ndo_open = mlxsw_m_port_open, + .ndo_stop = mlxsw_m_port_stop, + .ndo_get_devlink_port = mlxsw_m_port_get_devlink_port, +}; + +static void mlxsw_m_module_get_drvinfo(struct net_device *dev, + struct ethtool_drvinfo *drvinfo) +{ + struct mlxsw_m_port *mlxsw_m_port = netdev_priv(dev); + struct mlxsw_m *mlxsw_m = mlxsw_m_port->mlxsw_m; + + strscpy(drvinfo->driver, mlxsw_m->bus_info->device_kind, + sizeof(drvinfo->driver)); + snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version), + "%d.%d.%d", + mlxsw_m->bus_info->fw_rev.major, + mlxsw_m->bus_info->fw_rev.minor, + mlxsw_m->bus_info->fw_rev.subminor); + strscpy(drvinfo->bus_info, mlxsw_m->bus_info->device_name, + sizeof(drvinfo->bus_info)); +} + +static int mlxsw_m_get_module_info(struct net_device *netdev, + struct ethtool_modinfo *modinfo) +{ + struct mlxsw_m_port *mlxsw_m_port = netdev_priv(netdev); + struct mlxsw_core *core = mlxsw_m_port->mlxsw_m->core; + + return mlxsw_env_get_module_info(netdev, core, + mlxsw_m_port->slot_index, + mlxsw_m_port->module, modinfo); +} + +static int +mlxsw_m_get_module_eeprom(struct net_device *netdev, struct ethtool_eeprom *ee, + u8 *data) +{ + struct mlxsw_m_port *mlxsw_m_port = netdev_priv(netdev); + struct mlxsw_core *core = mlxsw_m_port->mlxsw_m->core; + + return mlxsw_env_get_module_eeprom(netdev, core, + mlxsw_m_port->slot_index, + mlxsw_m_port->module, ee, data); +} + +static int +mlxsw_m_get_module_eeprom_by_page(struct net_device *netdev, + const struct ethtool_module_eeprom *page, + struct netlink_ext_ack *extack) +{ + struct mlxsw_m_port *mlxsw_m_port = netdev_priv(netdev); + struct mlxsw_core *core = mlxsw_m_port->mlxsw_m->core; + + return mlxsw_env_get_module_eeprom_by_page(core, + mlxsw_m_port->slot_index, + mlxsw_m_port->module, + page, extack); +} + +static int mlxsw_m_reset(struct net_device *netdev, u32 *flags) +{ + struct mlxsw_m_port *mlxsw_m_port = netdev_priv(netdev); + struct mlxsw_core *core = mlxsw_m_port->mlxsw_m->core; + + return mlxsw_env_reset_module(netdev, core, mlxsw_m_port->slot_index, + mlxsw_m_port->module, + flags); +} + +static int +mlxsw_m_get_module_power_mode(struct net_device *netdev, + struct ethtool_module_power_mode_params *params, + struct netlink_ext_ack *extack) +{ + struct mlxsw_m_port *mlxsw_m_port = netdev_priv(netdev); + struct mlxsw_core *core = mlxsw_m_port->mlxsw_m->core; + + return mlxsw_env_get_module_power_mode(core, mlxsw_m_port->slot_index, + mlxsw_m_port->module, + params, extack); +} + +static int +mlxsw_m_set_module_power_mode(struct net_device *netdev, + const struct ethtool_module_power_mode_params *params, + struct netlink_ext_ack *extack) +{ + struct mlxsw_m_port *mlxsw_m_port = netdev_priv(netdev); + struct mlxsw_core *core = mlxsw_m_port->mlxsw_m->core; + + return mlxsw_env_set_module_power_mode(core, mlxsw_m_port->slot_index, + mlxsw_m_port->module, + params->policy, extack); +} + +static const struct ethtool_ops mlxsw_m_port_ethtool_ops = { + .get_drvinfo = mlxsw_m_module_get_drvinfo, + .get_module_info = mlxsw_m_get_module_info, + .get_module_eeprom = mlxsw_m_get_module_eeprom, + .get_module_eeprom_by_page = mlxsw_m_get_module_eeprom_by_page, + .reset = mlxsw_m_reset, + .get_module_power_mode = mlxsw_m_get_module_power_mode, + .set_module_power_mode = mlxsw_m_set_module_power_mode, +}; + +static int +mlxsw_m_port_module_info_get(struct mlxsw_m *mlxsw_m, u16 local_port, + u8 *p_module, u8 *p_width, u8 *p_slot_index) +{ + char pmlp_pl[MLXSW_REG_PMLP_LEN]; + int err; + + mlxsw_reg_pmlp_pack(pmlp_pl, local_port); + err = mlxsw_reg_query(mlxsw_m->core, MLXSW_REG(pmlp), pmlp_pl); + if (err) + return err; + *p_module = mlxsw_reg_pmlp_module_get(pmlp_pl, 0); + *p_width = mlxsw_reg_pmlp_width_get(pmlp_pl); + *p_slot_index = mlxsw_reg_pmlp_slot_index_get(pmlp_pl, 0); + + return 0; +} + +static int +mlxsw_m_port_dev_addr_get(struct mlxsw_m_port *mlxsw_m_port) +{ + struct mlxsw_m *mlxsw_m = mlxsw_m_port->mlxsw_m; + char ppad_pl[MLXSW_REG_PPAD_LEN]; + u8 addr[ETH_ALEN]; + int err; + + mlxsw_reg_ppad_pack(ppad_pl, false, 0); + err = mlxsw_reg_query(mlxsw_m->core, MLXSW_REG(ppad), ppad_pl); + if (err) + return err; + mlxsw_reg_ppad_mac_memcpy_from(ppad_pl, addr); + eth_hw_addr_gen(mlxsw_m_port->dev, addr, mlxsw_m_port->module + 1 + + mlxsw_m_port->module_offset); + return 0; +} + +static bool mlxsw_m_port_created(struct mlxsw_m *mlxsw_m, u16 local_port) +{ + return mlxsw_m->ports[local_port]; +} + +static int +mlxsw_m_port_create(struct mlxsw_m *mlxsw_m, u16 local_port, u8 slot_index, + u8 module) +{ + struct mlxsw_m_port *mlxsw_m_port; + struct net_device *dev; + int err; + + err = mlxsw_core_port_init(mlxsw_m->core, local_port, slot_index, + module + 1, false, 0, false, + 0, mlxsw_m->base_mac, + sizeof(mlxsw_m->base_mac)); + if (err) { + dev_err(mlxsw_m->bus_info->dev, "Port %d: Failed to init core port\n", + local_port); + return err; + } + + dev = alloc_etherdev(sizeof(struct mlxsw_m_port)); + if (!dev) { + err = -ENOMEM; + goto err_alloc_etherdev; + } + + SET_NETDEV_DEV(dev, mlxsw_m->bus_info->dev); + dev_net_set(dev, mlxsw_core_net(mlxsw_m->core)); + mlxsw_m_port = netdev_priv(dev); + mlxsw_m_port->dev = dev; + mlxsw_m_port->mlxsw_m = mlxsw_m; + mlxsw_m_port->local_port = local_port; + mlxsw_m_port->module = module; + mlxsw_m_port->slot_index = slot_index; + /* Add module offset for line card. Offset for main board iz zero. + * For line card in slot #n offset is calculated as (#n - 1) + * multiplied by maximum modules number, which could be found on a line + * card. + */ + mlxsw_m_port->module_offset = mlxsw_m_port->slot_index ? + (mlxsw_m_port->slot_index - 1) * + mlxsw_m->max_modules_per_slot : 0; + + dev->netdev_ops = &mlxsw_m_port_netdev_ops; + dev->ethtool_ops = &mlxsw_m_port_ethtool_ops; + + err = mlxsw_m_port_dev_addr_get(mlxsw_m_port); + if (err) { + dev_err(mlxsw_m->bus_info->dev, "Port %d: Unable to get port mac address\n", + mlxsw_m_port->local_port); + goto err_dev_addr_get; + } + + netif_carrier_off(dev); + mlxsw_m->ports[local_port] = mlxsw_m_port; + err = register_netdev(dev); + if (err) { + dev_err(mlxsw_m->bus_info->dev, "Port %d: Failed to register netdev\n", + mlxsw_m_port->local_port); + goto err_register_netdev; + } + + mlxsw_core_port_eth_set(mlxsw_m->core, mlxsw_m_port->local_port, + mlxsw_m_port, dev); + + return 0; + +err_register_netdev: + mlxsw_m->ports[local_port] = NULL; +err_dev_addr_get: + free_netdev(dev); +err_alloc_etherdev: + mlxsw_core_port_fini(mlxsw_m->core, local_port); + return err; +} + +static void mlxsw_m_port_remove(struct mlxsw_m *mlxsw_m, u16 local_port) +{ + struct mlxsw_m_port *mlxsw_m_port = mlxsw_m->ports[local_port]; + + mlxsw_core_port_clear(mlxsw_m->core, local_port, mlxsw_m); + unregister_netdev(mlxsw_m_port->dev); /* This calls ndo_stop */ + mlxsw_m->ports[local_port] = NULL; + free_netdev(mlxsw_m_port->dev); + mlxsw_core_port_fini(mlxsw_m->core, local_port); +} + +static int* +mlxsw_m_port_mapping_get(struct mlxsw_m *mlxsw_m, u8 slot_index, u8 module) +{ + return &mlxsw_m->line_cards[slot_index]->module_to_port[module]; +} + +static int mlxsw_m_port_module_map(struct mlxsw_m *mlxsw_m, u16 local_port, + u8 *last_module) +{ + unsigned int max_ports = mlxsw_core_max_ports(mlxsw_m->core); + u8 module, width, slot_index; + int *module_to_port; + int err; + + /* Fill out to local port mapping array */ + err = mlxsw_m_port_module_info_get(mlxsw_m, local_port, &module, + &width, &slot_index); + if (err) + return err; + + /* Skip if line card has been already configured */ + if (mlxsw_m->line_cards[slot_index]->active) + return 0; + if (!width) + return 0; + /* Skip, if port belongs to the cluster */ + if (module == *last_module) + return 0; + *last_module = module; + + if (WARN_ON_ONCE(module >= max_ports)) + return -EINVAL; + mlxsw_env_module_port_map(mlxsw_m->core, slot_index, module); + module_to_port = mlxsw_m_port_mapping_get(mlxsw_m, slot_index, module); + *module_to_port = local_port; + + return 0; +} + +static void +mlxsw_m_port_module_unmap(struct mlxsw_m *mlxsw_m, u8 slot_index, u8 module) +{ + int *module_to_port = mlxsw_m_port_mapping_get(mlxsw_m, slot_index, + module); + *module_to_port = -1; + mlxsw_env_module_port_unmap(mlxsw_m->core, slot_index, module); +} + +static int mlxsw_m_linecards_init(struct mlxsw_m *mlxsw_m) +{ + unsigned int max_ports = mlxsw_core_max_ports(mlxsw_m->core); + char mgpir_pl[MLXSW_REG_MGPIR_LEN]; + u8 num_of_modules; + int i, j, err; + + mlxsw_reg_mgpir_pack(mgpir_pl, 0); + err = mlxsw_reg_query(mlxsw_m->core, MLXSW_REG(mgpir), mgpir_pl); + if (err) + return err; + + mlxsw_reg_mgpir_unpack(mgpir_pl, NULL, NULL, NULL, &num_of_modules, + &mlxsw_m->num_of_slots); + /* If the system is modular, get the maximum number of modules per-slot. + * Otherwise, get the maximum number of modules on the main board. + */ + if (mlxsw_m->num_of_slots) + mlxsw_m->max_modules_per_slot = + mlxsw_reg_mgpir_max_modules_per_slot_get(mgpir_pl); + else + mlxsw_m->max_modules_per_slot = num_of_modules; + /* Add slot for main board. */ + mlxsw_m->num_of_slots += 1; + + mlxsw_m->ports = kcalloc(max_ports, sizeof(*mlxsw_m->ports), + GFP_KERNEL); + if (!mlxsw_m->ports) + return -ENOMEM; + + mlxsw_m->line_cards = kcalloc(mlxsw_m->num_of_slots, + sizeof(*mlxsw_m->line_cards), + GFP_KERNEL); + if (!mlxsw_m->line_cards) { + err = -ENOMEM; + goto err_kcalloc; + } + + for (i = 0; i < mlxsw_m->num_of_slots; i++) { + mlxsw_m->line_cards[i] = + kzalloc(struct_size(mlxsw_m->line_cards[i], + module_to_port, + mlxsw_m->max_modules_per_slot), + GFP_KERNEL); + if (!mlxsw_m->line_cards[i]) { + err = -ENOMEM; + goto err_kmalloc_array; + } + + /* Invalidate the entries of module to local port mapping array. */ + for (j = 0; j < mlxsw_m->max_modules_per_slot; j++) + mlxsw_m->line_cards[i]->module_to_port[j] = -1; + } + + return 0; + +err_kmalloc_array: + for (i--; i >= 0; i--) + kfree(mlxsw_m->line_cards[i]); + kfree(mlxsw_m->line_cards); +err_kcalloc: + kfree(mlxsw_m->ports); + return err; +} + +static void mlxsw_m_linecards_fini(struct mlxsw_m *mlxsw_m) +{ + int i = mlxsw_m->num_of_slots; + + for (i--; i >= 0; i--) + kfree(mlxsw_m->line_cards[i]); + kfree(mlxsw_m->line_cards); + kfree(mlxsw_m->ports); +} + +static void +mlxsw_m_linecard_port_module_unmap(struct mlxsw_m *mlxsw_m, u8 slot_index) +{ + int i; + + for (i = mlxsw_m->max_modules_per_slot - 1; i >= 0; i--) { + int *module_to_port; + + module_to_port = mlxsw_m_port_mapping_get(mlxsw_m, slot_index, i); + if (*module_to_port > 0) + mlxsw_m_port_module_unmap(mlxsw_m, slot_index, i); + } +} + +static int +mlxsw_m_linecard_ports_create(struct mlxsw_m *mlxsw_m, u8 slot_index) +{ + int *module_to_port; + int i, err; + + for (i = 0; i < mlxsw_m->max_modules_per_slot; i++) { + module_to_port = mlxsw_m_port_mapping_get(mlxsw_m, slot_index, i); + if (*module_to_port > 0) { + err = mlxsw_m_port_create(mlxsw_m, *module_to_port, + slot_index, i); + if (err) + goto err_port_create; + /* Mark slot as active */ + if (!mlxsw_m->line_cards[slot_index]->active) + mlxsw_m->line_cards[slot_index]->active = true; + } + } + return 0; + +err_port_create: + for (i--; i >= 0; i--) { + module_to_port = mlxsw_m_port_mapping_get(mlxsw_m, slot_index, i); + if (*module_to_port > 0 && + mlxsw_m_port_created(mlxsw_m, *module_to_port)) { + mlxsw_m_port_remove(mlxsw_m, *module_to_port); + /* Mark slot as inactive */ + if (mlxsw_m->line_cards[slot_index]->active) + mlxsw_m->line_cards[slot_index]->active = false; + } + } + return err; +} + +static void +mlxsw_m_linecard_ports_remove(struct mlxsw_m *mlxsw_m, u8 slot_index) +{ + int i; + + for (i = 0; i < mlxsw_m->max_modules_per_slot; i++) { + int *module_to_port = mlxsw_m_port_mapping_get(mlxsw_m, + slot_index, i); + + if (*module_to_port > 0 && + mlxsw_m_port_created(mlxsw_m, *module_to_port)) { + mlxsw_m_port_remove(mlxsw_m, *module_to_port); + mlxsw_m_port_module_unmap(mlxsw_m, slot_index, i); + } + } +} + +static int mlxsw_m_ports_module_map(struct mlxsw_m *mlxsw_m) +{ + unsigned int max_ports = mlxsw_core_max_ports(mlxsw_m->core); + u8 last_module = max_ports; + int i, err; + + for (i = 1; i < max_ports; i++) { + err = mlxsw_m_port_module_map(mlxsw_m, i, &last_module); + if (err) + return err; + } + + return 0; +} + +static int mlxsw_m_ports_create(struct mlxsw_m *mlxsw_m) +{ + int err; + + /* Fill out module to local port mapping array */ + err = mlxsw_m_ports_module_map(mlxsw_m); + if (err) + goto err_ports_module_map; + + /* Create port objects for each valid entry */ + err = mlxsw_m_linecard_ports_create(mlxsw_m, 0); + if (err) + goto err_linecard_ports_create; + + return 0; + +err_linecard_ports_create: +err_ports_module_map: + mlxsw_m_linecard_port_module_unmap(mlxsw_m, 0); + + return err; +} + +static void mlxsw_m_ports_remove(struct mlxsw_m *mlxsw_m) +{ + mlxsw_m_linecard_ports_remove(mlxsw_m, 0); +} + +static void +mlxsw_m_ports_remove_selected(struct mlxsw_core *mlxsw_core, + bool (*selector)(void *priv, u16 local_port), + void *priv) +{ + struct mlxsw_m *mlxsw_m = mlxsw_core_driver_priv(mlxsw_core); + struct mlxsw_linecard *linecard_priv = priv; + struct mlxsw_m_line_card *linecard; + + linecard = mlxsw_m->line_cards[linecard_priv->slot_index]; + + if (WARN_ON(!linecard->active)) + return; + + mlxsw_m_linecard_ports_remove(mlxsw_m, linecard_priv->slot_index); + linecard->active = false; +} + +static int mlxsw_m_fw_rev_validate(struct mlxsw_m *mlxsw_m) +{ + const struct mlxsw_fw_rev *rev = &mlxsw_m->bus_info->fw_rev; + + /* Validate driver and FW are compatible. + * Do not check major version, since it defines chip type, while + * driver is supposed to support any type. + */ + if (mlxsw_core_fw_rev_minor_subminor_validate(rev, &mlxsw_m_fw_rev)) + return 0; + + dev_err(mlxsw_m->bus_info->dev, "The firmware version %d.%d.%d is incompatible with the driver (required >= %d.%d.%d)\n", + rev->major, rev->minor, rev->subminor, rev->major, + mlxsw_m_fw_rev.minor, mlxsw_m_fw_rev.subminor); + + return -EINVAL; +} + +static void +mlxsw_m_got_active(struct mlxsw_core *mlxsw_core, u8 slot_index, void *priv) +{ + struct mlxsw_m_line_card *linecard; + struct mlxsw_m *mlxsw_m = priv; + int err; + + linecard = mlxsw_m->line_cards[slot_index]; + /* Skip if line card has been already configured during init */ + if (linecard->active) + return; + + /* Fill out module to local port mapping array */ + err = mlxsw_m_ports_module_map(mlxsw_m); + if (err) + goto err_ports_module_map; + + /* Create port objects for each valid entry */ + err = mlxsw_m_linecard_ports_create(mlxsw_m, slot_index); + if (err) { + dev_err(mlxsw_m->bus_info->dev, "Failed to create port for line card at slot %d\n", + slot_index); + goto err_linecard_ports_create; + } + + linecard->active = true; + + return; + +err_linecard_ports_create: +err_ports_module_map: + mlxsw_m_linecard_port_module_unmap(mlxsw_m, slot_index); +} + +static void +mlxsw_m_got_inactive(struct mlxsw_core *mlxsw_core, u8 slot_index, void *priv) +{ + struct mlxsw_m_line_card *linecard; + struct mlxsw_m *mlxsw_m = priv; + + linecard = mlxsw_m->line_cards[slot_index]; + + if (WARN_ON(!linecard->active)) + return; + + mlxsw_m_linecard_ports_remove(mlxsw_m, slot_index); + linecard->active = false; +} + +static struct mlxsw_linecards_event_ops mlxsw_m_event_ops = { + .got_active = mlxsw_m_got_active, + .got_inactive = mlxsw_m_got_inactive, +}; + +static int mlxsw_m_init(struct mlxsw_core *mlxsw_core, + const struct mlxsw_bus_info *mlxsw_bus_info, + struct netlink_ext_ack *extack) +{ + struct mlxsw_m *mlxsw_m = mlxsw_core_driver_priv(mlxsw_core); + int err; + + mlxsw_m->core = mlxsw_core; + mlxsw_m->bus_info = mlxsw_bus_info; + + err = mlxsw_m_fw_rev_validate(mlxsw_m); + if (err) + return err; + + err = mlxsw_m_base_mac_get(mlxsw_m); + if (err) { + dev_err(mlxsw_m->bus_info->dev, "Failed to get base mac\n"); + return err; + } + + err = mlxsw_m_linecards_init(mlxsw_m); + if (err) { + dev_err(mlxsw_m->bus_info->dev, "Failed to create line cards\n"); + return err; + } + + err = mlxsw_linecards_event_ops_register(mlxsw_core, + &mlxsw_m_event_ops, mlxsw_m); + if (err) { + dev_err(mlxsw_m->bus_info->dev, "Failed to register line cards operations\n"); + goto linecards_event_ops_register; + } + + err = mlxsw_m_ports_create(mlxsw_m); + if (err) { + dev_err(mlxsw_m->bus_info->dev, "Failed to create ports\n"); + goto err_ports_create; + } + + return 0; + +err_ports_create: + mlxsw_linecards_event_ops_unregister(mlxsw_core, + &mlxsw_m_event_ops, mlxsw_m); +linecards_event_ops_register: + mlxsw_m_linecards_fini(mlxsw_m); + return err; +} + +static void mlxsw_m_fini(struct mlxsw_core *mlxsw_core) +{ + struct mlxsw_m *mlxsw_m = mlxsw_core_driver_priv(mlxsw_core); + + mlxsw_m_ports_remove(mlxsw_m); + mlxsw_linecards_event_ops_unregister(mlxsw_core, + &mlxsw_m_event_ops, mlxsw_m); + mlxsw_m_linecards_fini(mlxsw_m); +} + +static const struct mlxsw_config_profile mlxsw_m_config_profile; + +static struct mlxsw_driver mlxsw_m_driver = { + .kind = mlxsw_m_driver_name, + .priv_size = sizeof(struct mlxsw_m), + .init = mlxsw_m_init, + .fini = mlxsw_m_fini, + .ports_remove_selected = mlxsw_m_ports_remove_selected, + .profile = &mlxsw_m_config_profile, +}; + +static const struct i2c_device_id mlxsw_m_i2c_id[] = { + { "mlxsw_minimal", 0}, + { }, +}; + +static struct i2c_driver mlxsw_m_i2c_driver = { + .driver.name = "mlxsw_minimal", + .class = I2C_CLASS_HWMON, + .id_table = mlxsw_m_i2c_id, +}; + +static int __init mlxsw_m_module_init(void) +{ + int err; + + err = mlxsw_core_driver_register(&mlxsw_m_driver); + if (err) + return err; + + err = mlxsw_i2c_driver_register(&mlxsw_m_i2c_driver); + if (err) + goto err_i2c_driver_register; + + return 0; + +err_i2c_driver_register: + mlxsw_core_driver_unregister(&mlxsw_m_driver); + + return err; +} + +static void __exit mlxsw_m_module_exit(void) +{ + mlxsw_i2c_driver_unregister(&mlxsw_m_i2c_driver); + mlxsw_core_driver_unregister(&mlxsw_m_driver); +} + +module_init(mlxsw_m_module_init); +module_exit(mlxsw_m_module_exit); + +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_AUTHOR("Vadim Pasternak <vadimp@mellanox.com>"); +MODULE_DESCRIPTION("Mellanox minimal driver"); +MODULE_DEVICE_TABLE(i2c, mlxsw_m_i2c_id); diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c new file mode 100644 index 000000000..51eea1f05 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c @@ -0,0 +1,2068 @@ +// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 +/* Copyright (c) 2015-2018 Mellanox Technologies. All rights reserved */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/export.h> +#include <linux/err.h> +#include <linux/device.h> +#include <linux/pci.h> +#include <linux/interrupt.h> +#include <linux/wait.h> +#include <linux/types.h> +#include <linux/skbuff.h> +#include <linux/if_vlan.h> +#include <linux/log2.h> +#include <linux/string.h> + +#include "pci_hw.h" +#include "pci.h" +#include "core.h" +#include "cmd.h" +#include "port.h" +#include "resources.h" + +#define mlxsw_pci_write32(mlxsw_pci, reg, val) \ + iowrite32be(val, (mlxsw_pci)->hw_addr + (MLXSW_PCI_ ## reg)) +#define mlxsw_pci_read32(mlxsw_pci, reg) \ + ioread32be((mlxsw_pci)->hw_addr + (MLXSW_PCI_ ## reg)) + +enum mlxsw_pci_queue_type { + MLXSW_PCI_QUEUE_TYPE_SDQ, + MLXSW_PCI_QUEUE_TYPE_RDQ, + MLXSW_PCI_QUEUE_TYPE_CQ, + MLXSW_PCI_QUEUE_TYPE_EQ, +}; + +#define MLXSW_PCI_QUEUE_TYPE_COUNT 4 + +static const u16 mlxsw_pci_doorbell_type_offset[] = { + MLXSW_PCI_DOORBELL_SDQ_OFFSET, /* for type MLXSW_PCI_QUEUE_TYPE_SDQ */ + MLXSW_PCI_DOORBELL_RDQ_OFFSET, /* for type MLXSW_PCI_QUEUE_TYPE_RDQ */ + MLXSW_PCI_DOORBELL_CQ_OFFSET, /* for type MLXSW_PCI_QUEUE_TYPE_CQ */ + MLXSW_PCI_DOORBELL_EQ_OFFSET, /* for type MLXSW_PCI_QUEUE_TYPE_EQ */ +}; + +static const u16 mlxsw_pci_doorbell_arm_type_offset[] = { + 0, /* unused */ + 0, /* unused */ + MLXSW_PCI_DOORBELL_ARM_CQ_OFFSET, /* for type MLXSW_PCI_QUEUE_TYPE_CQ */ + MLXSW_PCI_DOORBELL_ARM_EQ_OFFSET, /* for type MLXSW_PCI_QUEUE_TYPE_EQ */ +}; + +struct mlxsw_pci_mem_item { + char *buf; + dma_addr_t mapaddr; + size_t size; +}; + +struct mlxsw_pci_queue_elem_info { + char *elem; /* pointer to actual dma mapped element mem chunk */ + union { + struct { + struct sk_buff *skb; + } sdq; + struct { + struct sk_buff *skb; + } rdq; + } u; +}; + +struct mlxsw_pci_queue { + spinlock_t lock; /* for queue accesses */ + struct mlxsw_pci_mem_item mem_item; + struct mlxsw_pci_queue_elem_info *elem_info; + u16 producer_counter; + u16 consumer_counter; + u16 count; /* number of elements in queue */ + u8 num; /* queue number */ + u8 elem_size; /* size of one element */ + enum mlxsw_pci_queue_type type; + struct tasklet_struct tasklet; /* queue processing tasklet */ + struct mlxsw_pci *pci; + union { + struct { + u32 comp_sdq_count; + u32 comp_rdq_count; + enum mlxsw_pci_cqe_v v; + } cq; + struct { + u32 ev_cmd_count; + u32 ev_comp_count; + u32 ev_other_count; + } eq; + } u; +}; + +struct mlxsw_pci_queue_type_group { + struct mlxsw_pci_queue *q; + u8 count; /* number of queues in group */ +}; + +struct mlxsw_pci { + struct pci_dev *pdev; + u8 __iomem *hw_addr; + u64 free_running_clock_offset; + u64 utc_sec_offset; + u64 utc_nsec_offset; + struct mlxsw_pci_queue_type_group queues[MLXSW_PCI_QUEUE_TYPE_COUNT]; + u32 doorbell_offset; + struct mlxsw_core *core; + struct { + struct mlxsw_pci_mem_item *items; + unsigned int count; + } fw_area; + struct { + struct mlxsw_pci_mem_item out_mbox; + struct mlxsw_pci_mem_item in_mbox; + struct mutex lock; /* Lock access to command registers */ + bool nopoll; + wait_queue_head_t wait; + bool wait_done; + struct { + u8 status; + u64 out_param; + } comp; + } cmd; + struct mlxsw_bus_info bus_info; + const struct pci_device_id *id; + enum mlxsw_pci_cqe_v max_cqe_ver; /* Maximal supported CQE version */ + u8 num_sdq_cqs; /* Number of CQs used for SDQs */ +}; + +static void mlxsw_pci_queue_tasklet_schedule(struct mlxsw_pci_queue *q) +{ + tasklet_schedule(&q->tasklet); +} + +static char *__mlxsw_pci_queue_elem_get(struct mlxsw_pci_queue *q, + size_t elem_size, int elem_index) +{ + return q->mem_item.buf + (elem_size * elem_index); +} + +static struct mlxsw_pci_queue_elem_info * +mlxsw_pci_queue_elem_info_get(struct mlxsw_pci_queue *q, int elem_index) +{ + return &q->elem_info[elem_index]; +} + +static struct mlxsw_pci_queue_elem_info * +mlxsw_pci_queue_elem_info_producer_get(struct mlxsw_pci_queue *q) +{ + int index = q->producer_counter & (q->count - 1); + + if ((u16) (q->producer_counter - q->consumer_counter) == q->count) + return NULL; + return mlxsw_pci_queue_elem_info_get(q, index); +} + +static struct mlxsw_pci_queue_elem_info * +mlxsw_pci_queue_elem_info_consumer_get(struct mlxsw_pci_queue *q) +{ + int index = q->consumer_counter & (q->count - 1); + + return mlxsw_pci_queue_elem_info_get(q, index); +} + +static char *mlxsw_pci_queue_elem_get(struct mlxsw_pci_queue *q, int elem_index) +{ + return mlxsw_pci_queue_elem_info_get(q, elem_index)->elem; +} + +static bool mlxsw_pci_elem_hw_owned(struct mlxsw_pci_queue *q, bool owner_bit) +{ + return owner_bit != !!(q->consumer_counter & q->count); +} + +static struct mlxsw_pci_queue_type_group * +mlxsw_pci_queue_type_group_get(struct mlxsw_pci *mlxsw_pci, + enum mlxsw_pci_queue_type q_type) +{ + return &mlxsw_pci->queues[q_type]; +} + +static u8 __mlxsw_pci_queue_count(struct mlxsw_pci *mlxsw_pci, + enum mlxsw_pci_queue_type q_type) +{ + struct mlxsw_pci_queue_type_group *queue_group; + + queue_group = mlxsw_pci_queue_type_group_get(mlxsw_pci, q_type); + return queue_group->count; +} + +static u8 mlxsw_pci_sdq_count(struct mlxsw_pci *mlxsw_pci) +{ + return __mlxsw_pci_queue_count(mlxsw_pci, MLXSW_PCI_QUEUE_TYPE_SDQ); +} + +static u8 mlxsw_pci_cq_count(struct mlxsw_pci *mlxsw_pci) +{ + return __mlxsw_pci_queue_count(mlxsw_pci, MLXSW_PCI_QUEUE_TYPE_CQ); +} + +static struct mlxsw_pci_queue * +__mlxsw_pci_queue_get(struct mlxsw_pci *mlxsw_pci, + enum mlxsw_pci_queue_type q_type, u8 q_num) +{ + return &mlxsw_pci->queues[q_type].q[q_num]; +} + +static struct mlxsw_pci_queue *mlxsw_pci_sdq_get(struct mlxsw_pci *mlxsw_pci, + u8 q_num) +{ + return __mlxsw_pci_queue_get(mlxsw_pci, + MLXSW_PCI_QUEUE_TYPE_SDQ, q_num); +} + +static struct mlxsw_pci_queue *mlxsw_pci_rdq_get(struct mlxsw_pci *mlxsw_pci, + u8 q_num) +{ + return __mlxsw_pci_queue_get(mlxsw_pci, + MLXSW_PCI_QUEUE_TYPE_RDQ, q_num); +} + +static struct mlxsw_pci_queue *mlxsw_pci_cq_get(struct mlxsw_pci *mlxsw_pci, + u8 q_num) +{ + return __mlxsw_pci_queue_get(mlxsw_pci, MLXSW_PCI_QUEUE_TYPE_CQ, q_num); +} + +static struct mlxsw_pci_queue *mlxsw_pci_eq_get(struct mlxsw_pci *mlxsw_pci, + u8 q_num) +{ + return __mlxsw_pci_queue_get(mlxsw_pci, MLXSW_PCI_QUEUE_TYPE_EQ, q_num); +} + +static void __mlxsw_pci_queue_doorbell_set(struct mlxsw_pci *mlxsw_pci, + struct mlxsw_pci_queue *q, + u16 val) +{ + mlxsw_pci_write32(mlxsw_pci, + DOORBELL(mlxsw_pci->doorbell_offset, + mlxsw_pci_doorbell_type_offset[q->type], + q->num), val); +} + +static void __mlxsw_pci_queue_doorbell_arm_set(struct mlxsw_pci *mlxsw_pci, + struct mlxsw_pci_queue *q, + u16 val) +{ + mlxsw_pci_write32(mlxsw_pci, + DOORBELL(mlxsw_pci->doorbell_offset, + mlxsw_pci_doorbell_arm_type_offset[q->type], + q->num), val); +} + +static void mlxsw_pci_queue_doorbell_producer_ring(struct mlxsw_pci *mlxsw_pci, + struct mlxsw_pci_queue *q) +{ + wmb(); /* ensure all writes are done before we ring a bell */ + __mlxsw_pci_queue_doorbell_set(mlxsw_pci, q, q->producer_counter); +} + +static void mlxsw_pci_queue_doorbell_consumer_ring(struct mlxsw_pci *mlxsw_pci, + struct mlxsw_pci_queue *q) +{ + wmb(); /* ensure all writes are done before we ring a bell */ + __mlxsw_pci_queue_doorbell_set(mlxsw_pci, q, + q->consumer_counter + q->count); +} + +static void +mlxsw_pci_queue_doorbell_arm_consumer_ring(struct mlxsw_pci *mlxsw_pci, + struct mlxsw_pci_queue *q) +{ + wmb(); /* ensure all writes are done before we ring a bell */ + __mlxsw_pci_queue_doorbell_arm_set(mlxsw_pci, q, q->consumer_counter); +} + +static dma_addr_t __mlxsw_pci_queue_page_get(struct mlxsw_pci_queue *q, + int page_index) +{ + return q->mem_item.mapaddr + MLXSW_PCI_PAGE_SIZE * page_index; +} + +static int mlxsw_pci_sdq_init(struct mlxsw_pci *mlxsw_pci, char *mbox, + struct mlxsw_pci_queue *q) +{ + int tclass; + int lp; + int i; + int err; + + q->producer_counter = 0; + q->consumer_counter = 0; + tclass = q->num == MLXSW_PCI_SDQ_EMAD_INDEX ? MLXSW_PCI_SDQ_EMAD_TC : + MLXSW_PCI_SDQ_CTL_TC; + lp = q->num == MLXSW_PCI_SDQ_EMAD_INDEX ? MLXSW_CMD_MBOX_SW2HW_DQ_SDQ_LP_IGNORE_WQE : + MLXSW_CMD_MBOX_SW2HW_DQ_SDQ_LP_WQE; + + /* Set CQ of same number of this SDQ. */ + mlxsw_cmd_mbox_sw2hw_dq_cq_set(mbox, q->num); + mlxsw_cmd_mbox_sw2hw_dq_sdq_lp_set(mbox, lp); + mlxsw_cmd_mbox_sw2hw_dq_sdq_tclass_set(mbox, tclass); + mlxsw_cmd_mbox_sw2hw_dq_log2_dq_sz_set(mbox, 3); /* 8 pages */ + for (i = 0; i < MLXSW_PCI_AQ_PAGES; i++) { + dma_addr_t mapaddr = __mlxsw_pci_queue_page_get(q, i); + + mlxsw_cmd_mbox_sw2hw_dq_pa_set(mbox, i, mapaddr); + } + + err = mlxsw_cmd_sw2hw_sdq(mlxsw_pci->core, mbox, q->num); + if (err) + return err; + mlxsw_pci_queue_doorbell_producer_ring(mlxsw_pci, q); + return 0; +} + +static void mlxsw_pci_sdq_fini(struct mlxsw_pci *mlxsw_pci, + struct mlxsw_pci_queue *q) +{ + mlxsw_cmd_hw2sw_sdq(mlxsw_pci->core, q->num); +} + +static int mlxsw_pci_wqe_frag_map(struct mlxsw_pci *mlxsw_pci, char *wqe, + int index, char *frag_data, size_t frag_len, + int direction) +{ + struct pci_dev *pdev = mlxsw_pci->pdev; + dma_addr_t mapaddr; + + mapaddr = dma_map_single(&pdev->dev, frag_data, frag_len, direction); + if (unlikely(dma_mapping_error(&pdev->dev, mapaddr))) { + dev_err_ratelimited(&pdev->dev, "failed to dma map tx frag\n"); + return -EIO; + } + mlxsw_pci_wqe_address_set(wqe, index, mapaddr); + mlxsw_pci_wqe_byte_count_set(wqe, index, frag_len); + return 0; +} + +static void mlxsw_pci_wqe_frag_unmap(struct mlxsw_pci *mlxsw_pci, char *wqe, + int index, int direction) +{ + struct pci_dev *pdev = mlxsw_pci->pdev; + size_t frag_len = mlxsw_pci_wqe_byte_count_get(wqe, index); + dma_addr_t mapaddr = mlxsw_pci_wqe_address_get(wqe, index); + + if (!frag_len) + return; + dma_unmap_single(&pdev->dev, mapaddr, frag_len, direction); +} + +static int mlxsw_pci_rdq_skb_alloc(struct mlxsw_pci *mlxsw_pci, + struct mlxsw_pci_queue_elem_info *elem_info) +{ + size_t buf_len = MLXSW_PORT_MAX_MTU; + char *wqe = elem_info->elem; + struct sk_buff *skb; + int err; + + skb = netdev_alloc_skb_ip_align(NULL, buf_len); + if (!skb) + return -ENOMEM; + + err = mlxsw_pci_wqe_frag_map(mlxsw_pci, wqe, 0, skb->data, + buf_len, DMA_FROM_DEVICE); + if (err) + goto err_frag_map; + + elem_info->u.rdq.skb = skb; + return 0; + +err_frag_map: + dev_kfree_skb_any(skb); + return err; +} + +static void mlxsw_pci_rdq_skb_free(struct mlxsw_pci *mlxsw_pci, + struct mlxsw_pci_queue_elem_info *elem_info) +{ + struct sk_buff *skb; + char *wqe; + + skb = elem_info->u.rdq.skb; + wqe = elem_info->elem; + + mlxsw_pci_wqe_frag_unmap(mlxsw_pci, wqe, 0, DMA_FROM_DEVICE); + dev_kfree_skb_any(skb); +} + +static int mlxsw_pci_rdq_init(struct mlxsw_pci *mlxsw_pci, char *mbox, + struct mlxsw_pci_queue *q) +{ + struct mlxsw_pci_queue_elem_info *elem_info; + u8 sdq_count = mlxsw_pci_sdq_count(mlxsw_pci); + int i; + int err; + + q->producer_counter = 0; + q->consumer_counter = 0; + + /* Set CQ of same number of this RDQ with base + * above SDQ count as the lower ones are assigned to SDQs. + */ + mlxsw_cmd_mbox_sw2hw_dq_cq_set(mbox, sdq_count + q->num); + mlxsw_cmd_mbox_sw2hw_dq_log2_dq_sz_set(mbox, 3); /* 8 pages */ + for (i = 0; i < MLXSW_PCI_AQ_PAGES; i++) { + dma_addr_t mapaddr = __mlxsw_pci_queue_page_get(q, i); + + mlxsw_cmd_mbox_sw2hw_dq_pa_set(mbox, i, mapaddr); + } + + err = mlxsw_cmd_sw2hw_rdq(mlxsw_pci->core, mbox, q->num); + if (err) + return err; + + mlxsw_pci_queue_doorbell_producer_ring(mlxsw_pci, q); + + for (i = 0; i < q->count; i++) { + elem_info = mlxsw_pci_queue_elem_info_producer_get(q); + BUG_ON(!elem_info); + err = mlxsw_pci_rdq_skb_alloc(mlxsw_pci, elem_info); + if (err) + goto rollback; + /* Everything is set up, ring doorbell to pass elem to HW */ + q->producer_counter++; + mlxsw_pci_queue_doorbell_producer_ring(mlxsw_pci, q); + } + + return 0; + +rollback: + for (i--; i >= 0; i--) { + elem_info = mlxsw_pci_queue_elem_info_get(q, i); + mlxsw_pci_rdq_skb_free(mlxsw_pci, elem_info); + } + mlxsw_cmd_hw2sw_rdq(mlxsw_pci->core, q->num); + + return err; +} + +static void mlxsw_pci_rdq_fini(struct mlxsw_pci *mlxsw_pci, + struct mlxsw_pci_queue *q) +{ + struct mlxsw_pci_queue_elem_info *elem_info; + int i; + + mlxsw_cmd_hw2sw_rdq(mlxsw_pci->core, q->num); + for (i = 0; i < q->count; i++) { + elem_info = mlxsw_pci_queue_elem_info_get(q, i); + mlxsw_pci_rdq_skb_free(mlxsw_pci, elem_info); + } +} + +static void mlxsw_pci_cq_pre_init(struct mlxsw_pci *mlxsw_pci, + struct mlxsw_pci_queue *q) +{ + q->u.cq.v = mlxsw_pci->max_cqe_ver; + + if (q->u.cq.v == MLXSW_PCI_CQE_V2 && + q->num < mlxsw_pci->num_sdq_cqs && + !mlxsw_core_sdq_supports_cqe_v2(mlxsw_pci->core)) + q->u.cq.v = MLXSW_PCI_CQE_V1; +} + +static int mlxsw_pci_cq_init(struct mlxsw_pci *mlxsw_pci, char *mbox, + struct mlxsw_pci_queue *q) +{ + int i; + int err; + + q->consumer_counter = 0; + + for (i = 0; i < q->count; i++) { + char *elem = mlxsw_pci_queue_elem_get(q, i); + + mlxsw_pci_cqe_owner_set(q->u.cq.v, elem, 1); + } + + if (q->u.cq.v == MLXSW_PCI_CQE_V1) + mlxsw_cmd_mbox_sw2hw_cq_cqe_ver_set(mbox, + MLXSW_CMD_MBOX_SW2HW_CQ_CQE_VER_1); + else if (q->u.cq.v == MLXSW_PCI_CQE_V2) + mlxsw_cmd_mbox_sw2hw_cq_cqe_ver_set(mbox, + MLXSW_CMD_MBOX_SW2HW_CQ_CQE_VER_2); + + mlxsw_cmd_mbox_sw2hw_cq_c_eqn_set(mbox, MLXSW_PCI_EQ_COMP_NUM); + mlxsw_cmd_mbox_sw2hw_cq_st_set(mbox, 0); + mlxsw_cmd_mbox_sw2hw_cq_log_cq_size_set(mbox, ilog2(q->count)); + for (i = 0; i < MLXSW_PCI_AQ_PAGES; i++) { + dma_addr_t mapaddr = __mlxsw_pci_queue_page_get(q, i); + + mlxsw_cmd_mbox_sw2hw_cq_pa_set(mbox, i, mapaddr); + } + err = mlxsw_cmd_sw2hw_cq(mlxsw_pci->core, mbox, q->num); + if (err) + return err; + mlxsw_pci_queue_doorbell_consumer_ring(mlxsw_pci, q); + mlxsw_pci_queue_doorbell_arm_consumer_ring(mlxsw_pci, q); + return 0; +} + +static void mlxsw_pci_cq_fini(struct mlxsw_pci *mlxsw_pci, + struct mlxsw_pci_queue *q) +{ + mlxsw_cmd_hw2sw_cq(mlxsw_pci->core, q->num); +} + +static unsigned int mlxsw_pci_read32_off(struct mlxsw_pci *mlxsw_pci, + ptrdiff_t off) +{ + return ioread32be(mlxsw_pci->hw_addr + off); +} + +static void mlxsw_pci_skb_cb_ts_set(struct mlxsw_pci *mlxsw_pci, + struct sk_buff *skb, + enum mlxsw_pci_cqe_v cqe_v, char *cqe) +{ + u8 ts_type; + + if (cqe_v != MLXSW_PCI_CQE_V2) + return; + + ts_type = mlxsw_pci_cqe2_time_stamp_type_get(cqe); + + if (ts_type != MLXSW_PCI_CQE_TIME_STAMP_TYPE_UTC && + ts_type != MLXSW_PCI_CQE_TIME_STAMP_TYPE_MIRROR_UTC) + return; + + mlxsw_skb_cb(skb)->cqe_ts.sec = mlxsw_pci_cqe2_time_stamp_sec_get(cqe); + mlxsw_skb_cb(skb)->cqe_ts.nsec = + mlxsw_pci_cqe2_time_stamp_nsec_get(cqe); +} + +static void mlxsw_pci_cqe_sdq_handle(struct mlxsw_pci *mlxsw_pci, + struct mlxsw_pci_queue *q, + u16 consumer_counter_limit, + enum mlxsw_pci_cqe_v cqe_v, + char *cqe) +{ + struct pci_dev *pdev = mlxsw_pci->pdev; + struct mlxsw_pci_queue_elem_info *elem_info; + struct mlxsw_tx_info tx_info; + char *wqe; + struct sk_buff *skb; + int i; + + spin_lock(&q->lock); + elem_info = mlxsw_pci_queue_elem_info_consumer_get(q); + tx_info = mlxsw_skb_cb(elem_info->u.sdq.skb)->tx_info; + skb = elem_info->u.sdq.skb; + wqe = elem_info->elem; + for (i = 0; i < MLXSW_PCI_WQE_SG_ENTRIES; i++) + mlxsw_pci_wqe_frag_unmap(mlxsw_pci, wqe, i, DMA_TO_DEVICE); + + if (unlikely(!tx_info.is_emad && + skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) { + mlxsw_pci_skb_cb_ts_set(mlxsw_pci, skb, cqe_v, cqe); + mlxsw_core_ptp_transmitted(mlxsw_pci->core, skb, + tx_info.local_port); + skb = NULL; + } + + if (skb) + dev_kfree_skb_any(skb); + elem_info->u.sdq.skb = NULL; + + if (q->consumer_counter++ != consumer_counter_limit) + dev_dbg_ratelimited(&pdev->dev, "Consumer counter does not match limit in SDQ\n"); + spin_unlock(&q->lock); +} + +static void mlxsw_pci_cqe_rdq_md_tx_port_init(struct sk_buff *skb, + const char *cqe) +{ + struct mlxsw_skb_cb *cb = mlxsw_skb_cb(skb); + + if (mlxsw_pci_cqe2_tx_lag_get(cqe)) { + cb->rx_md_info.tx_port_is_lag = true; + cb->rx_md_info.tx_lag_id = mlxsw_pci_cqe2_tx_lag_id_get(cqe); + cb->rx_md_info.tx_lag_port_index = + mlxsw_pci_cqe2_tx_lag_subport_get(cqe); + } else { + cb->rx_md_info.tx_port_is_lag = false; + cb->rx_md_info.tx_sys_port = + mlxsw_pci_cqe2_tx_system_port_get(cqe); + } + + if (cb->rx_md_info.tx_sys_port != MLXSW_PCI_CQE2_TX_PORT_MULTI_PORT && + cb->rx_md_info.tx_sys_port != MLXSW_PCI_CQE2_TX_PORT_INVALID) + cb->rx_md_info.tx_port_valid = 1; + else + cb->rx_md_info.tx_port_valid = 0; +} + +static void mlxsw_pci_cqe_rdq_md_init(struct sk_buff *skb, const char *cqe) +{ + struct mlxsw_skb_cb *cb = mlxsw_skb_cb(skb); + + cb->rx_md_info.tx_congestion = mlxsw_pci_cqe2_mirror_cong_get(cqe); + if (cb->rx_md_info.tx_congestion != MLXSW_PCI_CQE2_MIRROR_CONG_INVALID) + cb->rx_md_info.tx_congestion_valid = 1; + else + cb->rx_md_info.tx_congestion_valid = 0; + cb->rx_md_info.tx_congestion <<= MLXSW_PCI_CQE2_MIRROR_CONG_SHIFT; + + cb->rx_md_info.latency = mlxsw_pci_cqe2_mirror_latency_get(cqe); + if (cb->rx_md_info.latency != MLXSW_PCI_CQE2_MIRROR_LATENCY_INVALID) + cb->rx_md_info.latency_valid = 1; + else + cb->rx_md_info.latency_valid = 0; + + cb->rx_md_info.tx_tc = mlxsw_pci_cqe2_mirror_tclass_get(cqe); + if (cb->rx_md_info.tx_tc != MLXSW_PCI_CQE2_MIRROR_TCLASS_INVALID) + cb->rx_md_info.tx_tc_valid = 1; + else + cb->rx_md_info.tx_tc_valid = 0; + + mlxsw_pci_cqe_rdq_md_tx_port_init(skb, cqe); +} + +static void mlxsw_pci_cqe_rdq_handle(struct mlxsw_pci *mlxsw_pci, + struct mlxsw_pci_queue *q, + u16 consumer_counter_limit, + enum mlxsw_pci_cqe_v cqe_v, char *cqe) +{ + struct pci_dev *pdev = mlxsw_pci->pdev; + struct mlxsw_pci_queue_elem_info *elem_info; + struct mlxsw_rx_info rx_info = {}; + char wqe[MLXSW_PCI_WQE_SIZE]; + struct sk_buff *skb; + u16 byte_count; + int err; + + elem_info = mlxsw_pci_queue_elem_info_consumer_get(q); + skb = elem_info->u.rdq.skb; + memcpy(wqe, elem_info->elem, MLXSW_PCI_WQE_SIZE); + + if (q->consumer_counter++ != consumer_counter_limit) + dev_dbg_ratelimited(&pdev->dev, "Consumer counter does not match limit in RDQ\n"); + + err = mlxsw_pci_rdq_skb_alloc(mlxsw_pci, elem_info); + if (err) { + dev_err_ratelimited(&pdev->dev, "Failed to alloc skb for RDQ\n"); + goto out; + } + + mlxsw_pci_wqe_frag_unmap(mlxsw_pci, wqe, 0, DMA_FROM_DEVICE); + + if (mlxsw_pci_cqe_lag_get(cqe_v, cqe)) { + rx_info.is_lag = true; + rx_info.u.lag_id = mlxsw_pci_cqe_lag_id_get(cqe_v, cqe); + rx_info.lag_port_index = + mlxsw_pci_cqe_lag_subport_get(cqe_v, cqe); + } else { + rx_info.is_lag = false; + rx_info.u.sys_port = mlxsw_pci_cqe_system_port_get(cqe); + } + + rx_info.trap_id = mlxsw_pci_cqe_trap_id_get(cqe); + + if (rx_info.trap_id == MLXSW_TRAP_ID_DISCARD_INGRESS_ACL || + rx_info.trap_id == MLXSW_TRAP_ID_DISCARD_EGRESS_ACL) { + u32 cookie_index = 0; + + if (mlxsw_pci->max_cqe_ver >= MLXSW_PCI_CQE_V2) + cookie_index = mlxsw_pci_cqe2_user_def_val_orig_pkt_len_get(cqe); + mlxsw_skb_cb(skb)->rx_md_info.cookie_index = cookie_index; + } else if (rx_info.trap_id >= MLXSW_TRAP_ID_MIRROR_SESSION0 && + rx_info.trap_id <= MLXSW_TRAP_ID_MIRROR_SESSION7 && + mlxsw_pci->max_cqe_ver >= MLXSW_PCI_CQE_V2) { + rx_info.mirror_reason = mlxsw_pci_cqe2_mirror_reason_get(cqe); + mlxsw_pci_cqe_rdq_md_init(skb, cqe); + } else if (rx_info.trap_id == MLXSW_TRAP_ID_PKT_SAMPLE && + mlxsw_pci->max_cqe_ver >= MLXSW_PCI_CQE_V2) { + mlxsw_pci_cqe_rdq_md_tx_port_init(skb, cqe); + } + + mlxsw_pci_skb_cb_ts_set(mlxsw_pci, skb, cqe_v, cqe); + + byte_count = mlxsw_pci_cqe_byte_count_get(cqe); + if (mlxsw_pci_cqe_crc_get(cqe_v, cqe)) + byte_count -= ETH_FCS_LEN; + skb_put(skb, byte_count); + mlxsw_core_skb_receive(mlxsw_pci->core, skb, &rx_info); + +out: + /* Everything is set up, ring doorbell to pass elem to HW */ + q->producer_counter++; + mlxsw_pci_queue_doorbell_producer_ring(mlxsw_pci, q); + return; +} + +static char *mlxsw_pci_cq_sw_cqe_get(struct mlxsw_pci_queue *q) +{ + struct mlxsw_pci_queue_elem_info *elem_info; + char *elem; + bool owner_bit; + + elem_info = mlxsw_pci_queue_elem_info_consumer_get(q); + elem = elem_info->elem; + owner_bit = mlxsw_pci_cqe_owner_get(q->u.cq.v, elem); + if (mlxsw_pci_elem_hw_owned(q, owner_bit)) + return NULL; + q->consumer_counter++; + rmb(); /* make sure we read owned bit before the rest of elem */ + return elem; +} + +static void mlxsw_pci_cq_tasklet(struct tasklet_struct *t) +{ + struct mlxsw_pci_queue *q = from_tasklet(q, t, tasklet); + struct mlxsw_pci *mlxsw_pci = q->pci; + char *cqe; + int items = 0; + int credits = q->count >> 1; + + while ((cqe = mlxsw_pci_cq_sw_cqe_get(q))) { + u16 wqe_counter = mlxsw_pci_cqe_wqe_counter_get(cqe); + u8 sendq = mlxsw_pci_cqe_sr_get(q->u.cq.v, cqe); + u8 dqn = mlxsw_pci_cqe_dqn_get(q->u.cq.v, cqe); + char ncqe[MLXSW_PCI_CQE_SIZE_MAX]; + + memcpy(ncqe, cqe, q->elem_size); + mlxsw_pci_queue_doorbell_consumer_ring(mlxsw_pci, q); + + if (sendq) { + struct mlxsw_pci_queue *sdq; + + sdq = mlxsw_pci_sdq_get(mlxsw_pci, dqn); + mlxsw_pci_cqe_sdq_handle(mlxsw_pci, sdq, + wqe_counter, q->u.cq.v, ncqe); + q->u.cq.comp_sdq_count++; + } else { + struct mlxsw_pci_queue *rdq; + + rdq = mlxsw_pci_rdq_get(mlxsw_pci, dqn); + mlxsw_pci_cqe_rdq_handle(mlxsw_pci, rdq, + wqe_counter, q->u.cq.v, ncqe); + q->u.cq.comp_rdq_count++; + } + if (++items == credits) + break; + } + if (items) + mlxsw_pci_queue_doorbell_arm_consumer_ring(mlxsw_pci, q); +} + +static u16 mlxsw_pci_cq_elem_count(const struct mlxsw_pci_queue *q) +{ + return q->u.cq.v == MLXSW_PCI_CQE_V2 ? MLXSW_PCI_CQE2_COUNT : + MLXSW_PCI_CQE01_COUNT; +} + +static u8 mlxsw_pci_cq_elem_size(const struct mlxsw_pci_queue *q) +{ + return q->u.cq.v == MLXSW_PCI_CQE_V2 ? MLXSW_PCI_CQE2_SIZE : + MLXSW_PCI_CQE01_SIZE; +} + +static int mlxsw_pci_eq_init(struct mlxsw_pci *mlxsw_pci, char *mbox, + struct mlxsw_pci_queue *q) +{ + int i; + int err; + + q->consumer_counter = 0; + + for (i = 0; i < q->count; i++) { + char *elem = mlxsw_pci_queue_elem_get(q, i); + + mlxsw_pci_eqe_owner_set(elem, 1); + } + + mlxsw_cmd_mbox_sw2hw_eq_int_msix_set(mbox, 1); /* MSI-X used */ + mlxsw_cmd_mbox_sw2hw_eq_st_set(mbox, 1); /* armed */ + mlxsw_cmd_mbox_sw2hw_eq_log_eq_size_set(mbox, ilog2(q->count)); + for (i = 0; i < MLXSW_PCI_AQ_PAGES; i++) { + dma_addr_t mapaddr = __mlxsw_pci_queue_page_get(q, i); + + mlxsw_cmd_mbox_sw2hw_eq_pa_set(mbox, i, mapaddr); + } + err = mlxsw_cmd_sw2hw_eq(mlxsw_pci->core, mbox, q->num); + if (err) + return err; + mlxsw_pci_queue_doorbell_consumer_ring(mlxsw_pci, q); + mlxsw_pci_queue_doorbell_arm_consumer_ring(mlxsw_pci, q); + return 0; +} + +static void mlxsw_pci_eq_fini(struct mlxsw_pci *mlxsw_pci, + struct mlxsw_pci_queue *q) +{ + mlxsw_cmd_hw2sw_eq(mlxsw_pci->core, q->num); +} + +static void mlxsw_pci_eq_cmd_event(struct mlxsw_pci *mlxsw_pci, char *eqe) +{ + mlxsw_pci->cmd.comp.status = mlxsw_pci_eqe_cmd_status_get(eqe); + mlxsw_pci->cmd.comp.out_param = + ((u64) mlxsw_pci_eqe_cmd_out_param_h_get(eqe)) << 32 | + mlxsw_pci_eqe_cmd_out_param_l_get(eqe); + mlxsw_pci->cmd.wait_done = true; + wake_up(&mlxsw_pci->cmd.wait); +} + +static char *mlxsw_pci_eq_sw_eqe_get(struct mlxsw_pci_queue *q) +{ + struct mlxsw_pci_queue_elem_info *elem_info; + char *elem; + bool owner_bit; + + elem_info = mlxsw_pci_queue_elem_info_consumer_get(q); + elem = elem_info->elem; + owner_bit = mlxsw_pci_eqe_owner_get(elem); + if (mlxsw_pci_elem_hw_owned(q, owner_bit)) + return NULL; + q->consumer_counter++; + rmb(); /* make sure we read owned bit before the rest of elem */ + return elem; +} + +static void mlxsw_pci_eq_tasklet(struct tasklet_struct *t) +{ + struct mlxsw_pci_queue *q = from_tasklet(q, t, tasklet); + struct mlxsw_pci *mlxsw_pci = q->pci; + u8 cq_count = mlxsw_pci_cq_count(mlxsw_pci); + unsigned long active_cqns[BITS_TO_LONGS(MLXSW_PCI_CQS_MAX)]; + char *eqe; + u8 cqn; + bool cq_handle = false; + int items = 0; + int credits = q->count >> 1; + + memset(&active_cqns, 0, sizeof(active_cqns)); + + while ((eqe = mlxsw_pci_eq_sw_eqe_get(q))) { + + /* Command interface completion events are always received on + * queue MLXSW_PCI_EQ_ASYNC_NUM (EQ0) and completion events + * are mapped to queue MLXSW_PCI_EQ_COMP_NUM (EQ1). + */ + switch (q->num) { + case MLXSW_PCI_EQ_ASYNC_NUM: + mlxsw_pci_eq_cmd_event(mlxsw_pci, eqe); + q->u.eq.ev_cmd_count++; + break; + case MLXSW_PCI_EQ_COMP_NUM: + cqn = mlxsw_pci_eqe_cqn_get(eqe); + set_bit(cqn, active_cqns); + cq_handle = true; + q->u.eq.ev_comp_count++; + break; + default: + q->u.eq.ev_other_count++; + } + if (++items == credits) + break; + } + if (items) { + mlxsw_pci_queue_doorbell_consumer_ring(mlxsw_pci, q); + mlxsw_pci_queue_doorbell_arm_consumer_ring(mlxsw_pci, q); + } + + if (!cq_handle) + return; + for_each_set_bit(cqn, active_cqns, cq_count) { + q = mlxsw_pci_cq_get(mlxsw_pci, cqn); + mlxsw_pci_queue_tasklet_schedule(q); + } +} + +struct mlxsw_pci_queue_ops { + const char *name; + enum mlxsw_pci_queue_type type; + void (*pre_init)(struct mlxsw_pci *mlxsw_pci, + struct mlxsw_pci_queue *q); + int (*init)(struct mlxsw_pci *mlxsw_pci, char *mbox, + struct mlxsw_pci_queue *q); + void (*fini)(struct mlxsw_pci *mlxsw_pci, + struct mlxsw_pci_queue *q); + void (*tasklet)(struct tasklet_struct *t); + u16 (*elem_count_f)(const struct mlxsw_pci_queue *q); + u8 (*elem_size_f)(const struct mlxsw_pci_queue *q); + u16 elem_count; + u8 elem_size; +}; + +static const struct mlxsw_pci_queue_ops mlxsw_pci_sdq_ops = { + .type = MLXSW_PCI_QUEUE_TYPE_SDQ, + .init = mlxsw_pci_sdq_init, + .fini = mlxsw_pci_sdq_fini, + .elem_count = MLXSW_PCI_WQE_COUNT, + .elem_size = MLXSW_PCI_WQE_SIZE, +}; + +static const struct mlxsw_pci_queue_ops mlxsw_pci_rdq_ops = { + .type = MLXSW_PCI_QUEUE_TYPE_RDQ, + .init = mlxsw_pci_rdq_init, + .fini = mlxsw_pci_rdq_fini, + .elem_count = MLXSW_PCI_WQE_COUNT, + .elem_size = MLXSW_PCI_WQE_SIZE +}; + +static const struct mlxsw_pci_queue_ops mlxsw_pci_cq_ops = { + .type = MLXSW_PCI_QUEUE_TYPE_CQ, + .pre_init = mlxsw_pci_cq_pre_init, + .init = mlxsw_pci_cq_init, + .fini = mlxsw_pci_cq_fini, + .tasklet = mlxsw_pci_cq_tasklet, + .elem_count_f = mlxsw_pci_cq_elem_count, + .elem_size_f = mlxsw_pci_cq_elem_size +}; + +static const struct mlxsw_pci_queue_ops mlxsw_pci_eq_ops = { + .type = MLXSW_PCI_QUEUE_TYPE_EQ, + .init = mlxsw_pci_eq_init, + .fini = mlxsw_pci_eq_fini, + .tasklet = mlxsw_pci_eq_tasklet, + .elem_count = MLXSW_PCI_EQE_COUNT, + .elem_size = MLXSW_PCI_EQE_SIZE +}; + +static int mlxsw_pci_queue_init(struct mlxsw_pci *mlxsw_pci, char *mbox, + const struct mlxsw_pci_queue_ops *q_ops, + struct mlxsw_pci_queue *q, u8 q_num) +{ + struct mlxsw_pci_mem_item *mem_item = &q->mem_item; + int i; + int err; + + q->num = q_num; + if (q_ops->pre_init) + q_ops->pre_init(mlxsw_pci, q); + + spin_lock_init(&q->lock); + q->count = q_ops->elem_count_f ? q_ops->elem_count_f(q) : + q_ops->elem_count; + q->elem_size = q_ops->elem_size_f ? q_ops->elem_size_f(q) : + q_ops->elem_size; + q->type = q_ops->type; + q->pci = mlxsw_pci; + + if (q_ops->tasklet) + tasklet_setup(&q->tasklet, q_ops->tasklet); + + mem_item->size = MLXSW_PCI_AQ_SIZE; + mem_item->buf = dma_alloc_coherent(&mlxsw_pci->pdev->dev, + mem_item->size, &mem_item->mapaddr, + GFP_KERNEL); + if (!mem_item->buf) + return -ENOMEM; + + q->elem_info = kcalloc(q->count, sizeof(*q->elem_info), GFP_KERNEL); + if (!q->elem_info) { + err = -ENOMEM; + goto err_elem_info_alloc; + } + + /* Initialize dma mapped elements info elem_info for + * future easy access. + */ + for (i = 0; i < q->count; i++) { + struct mlxsw_pci_queue_elem_info *elem_info; + + elem_info = mlxsw_pci_queue_elem_info_get(q, i); + elem_info->elem = + __mlxsw_pci_queue_elem_get(q, q->elem_size, i); + } + + mlxsw_cmd_mbox_zero(mbox); + err = q_ops->init(mlxsw_pci, mbox, q); + if (err) + goto err_q_ops_init; + return 0; + +err_q_ops_init: + kfree(q->elem_info); +err_elem_info_alloc: + dma_free_coherent(&mlxsw_pci->pdev->dev, mem_item->size, + mem_item->buf, mem_item->mapaddr); + return err; +} + +static void mlxsw_pci_queue_fini(struct mlxsw_pci *mlxsw_pci, + const struct mlxsw_pci_queue_ops *q_ops, + struct mlxsw_pci_queue *q) +{ + struct mlxsw_pci_mem_item *mem_item = &q->mem_item; + + q_ops->fini(mlxsw_pci, q); + kfree(q->elem_info); + dma_free_coherent(&mlxsw_pci->pdev->dev, mem_item->size, + mem_item->buf, mem_item->mapaddr); +} + +static int mlxsw_pci_queue_group_init(struct mlxsw_pci *mlxsw_pci, char *mbox, + const struct mlxsw_pci_queue_ops *q_ops, + u8 num_qs) +{ + struct mlxsw_pci_queue_type_group *queue_group; + int i; + int err; + + queue_group = mlxsw_pci_queue_type_group_get(mlxsw_pci, q_ops->type); + queue_group->q = kcalloc(num_qs, sizeof(*queue_group->q), GFP_KERNEL); + if (!queue_group->q) + return -ENOMEM; + + for (i = 0; i < num_qs; i++) { + err = mlxsw_pci_queue_init(mlxsw_pci, mbox, q_ops, + &queue_group->q[i], i); + if (err) + goto err_queue_init; + } + queue_group->count = num_qs; + + return 0; + +err_queue_init: + for (i--; i >= 0; i--) + mlxsw_pci_queue_fini(mlxsw_pci, q_ops, &queue_group->q[i]); + kfree(queue_group->q); + return err; +} + +static void mlxsw_pci_queue_group_fini(struct mlxsw_pci *mlxsw_pci, + const struct mlxsw_pci_queue_ops *q_ops) +{ + struct mlxsw_pci_queue_type_group *queue_group; + int i; + + queue_group = mlxsw_pci_queue_type_group_get(mlxsw_pci, q_ops->type); + for (i = 0; i < queue_group->count; i++) + mlxsw_pci_queue_fini(mlxsw_pci, q_ops, &queue_group->q[i]); + kfree(queue_group->q); +} + +static int mlxsw_pci_aqs_init(struct mlxsw_pci *mlxsw_pci, char *mbox) +{ + struct pci_dev *pdev = mlxsw_pci->pdev; + u8 num_sdqs; + u8 sdq_log2sz; + u8 num_rdqs; + u8 rdq_log2sz; + u8 num_cqs; + u8 cq_log2sz; + u8 cqv2_log2sz; + u8 num_eqs; + u8 eq_log2sz; + int err; + + mlxsw_cmd_mbox_zero(mbox); + err = mlxsw_cmd_query_aq_cap(mlxsw_pci->core, mbox); + if (err) + return err; + + num_sdqs = mlxsw_cmd_mbox_query_aq_cap_max_num_sdqs_get(mbox); + sdq_log2sz = mlxsw_cmd_mbox_query_aq_cap_log_max_sdq_sz_get(mbox); + num_rdqs = mlxsw_cmd_mbox_query_aq_cap_max_num_rdqs_get(mbox); + rdq_log2sz = mlxsw_cmd_mbox_query_aq_cap_log_max_rdq_sz_get(mbox); + num_cqs = mlxsw_cmd_mbox_query_aq_cap_max_num_cqs_get(mbox); + cq_log2sz = mlxsw_cmd_mbox_query_aq_cap_log_max_cq_sz_get(mbox); + cqv2_log2sz = mlxsw_cmd_mbox_query_aq_cap_log_max_cqv2_sz_get(mbox); + num_eqs = mlxsw_cmd_mbox_query_aq_cap_max_num_eqs_get(mbox); + eq_log2sz = mlxsw_cmd_mbox_query_aq_cap_log_max_eq_sz_get(mbox); + + if (num_sdqs + num_rdqs > num_cqs || + num_sdqs < MLXSW_PCI_SDQS_MIN || + num_cqs > MLXSW_PCI_CQS_MAX || num_eqs != MLXSW_PCI_EQS_COUNT) { + dev_err(&pdev->dev, "Unsupported number of queues\n"); + return -EINVAL; + } + + if ((1 << sdq_log2sz != MLXSW_PCI_WQE_COUNT) || + (1 << rdq_log2sz != MLXSW_PCI_WQE_COUNT) || + (1 << cq_log2sz != MLXSW_PCI_CQE01_COUNT) || + (mlxsw_pci->max_cqe_ver == MLXSW_PCI_CQE_V2 && + (1 << cqv2_log2sz != MLXSW_PCI_CQE2_COUNT)) || + (1 << eq_log2sz != MLXSW_PCI_EQE_COUNT)) { + dev_err(&pdev->dev, "Unsupported number of async queue descriptors\n"); + return -EINVAL; + } + + mlxsw_pci->num_sdq_cqs = num_sdqs; + + err = mlxsw_pci_queue_group_init(mlxsw_pci, mbox, &mlxsw_pci_eq_ops, + num_eqs); + if (err) { + dev_err(&pdev->dev, "Failed to initialize event queues\n"); + return err; + } + + err = mlxsw_pci_queue_group_init(mlxsw_pci, mbox, &mlxsw_pci_cq_ops, + num_cqs); + if (err) { + dev_err(&pdev->dev, "Failed to initialize completion queues\n"); + goto err_cqs_init; + } + + err = mlxsw_pci_queue_group_init(mlxsw_pci, mbox, &mlxsw_pci_sdq_ops, + num_sdqs); + if (err) { + dev_err(&pdev->dev, "Failed to initialize send descriptor queues\n"); + goto err_sdqs_init; + } + + err = mlxsw_pci_queue_group_init(mlxsw_pci, mbox, &mlxsw_pci_rdq_ops, + num_rdqs); + if (err) { + dev_err(&pdev->dev, "Failed to initialize receive descriptor queues\n"); + goto err_rdqs_init; + } + + /* We have to poll in command interface until queues are initialized */ + mlxsw_pci->cmd.nopoll = true; + return 0; + +err_rdqs_init: + mlxsw_pci_queue_group_fini(mlxsw_pci, &mlxsw_pci_sdq_ops); +err_sdqs_init: + mlxsw_pci_queue_group_fini(mlxsw_pci, &mlxsw_pci_cq_ops); +err_cqs_init: + mlxsw_pci_queue_group_fini(mlxsw_pci, &mlxsw_pci_eq_ops); + return err; +} + +static void mlxsw_pci_aqs_fini(struct mlxsw_pci *mlxsw_pci) +{ + mlxsw_pci->cmd.nopoll = false; + mlxsw_pci_queue_group_fini(mlxsw_pci, &mlxsw_pci_rdq_ops); + mlxsw_pci_queue_group_fini(mlxsw_pci, &mlxsw_pci_sdq_ops); + mlxsw_pci_queue_group_fini(mlxsw_pci, &mlxsw_pci_cq_ops); + mlxsw_pci_queue_group_fini(mlxsw_pci, &mlxsw_pci_eq_ops); +} + +static void +mlxsw_pci_config_profile_swid_config(struct mlxsw_pci *mlxsw_pci, + char *mbox, int index, + const struct mlxsw_swid_config *swid) +{ + u8 mask = 0; + + if (swid->used_type) { + mlxsw_cmd_mbox_config_profile_swid_config_type_set( + mbox, index, swid->type); + mask |= 1; + } + if (swid->used_properties) { + mlxsw_cmd_mbox_config_profile_swid_config_properties_set( + mbox, index, swid->properties); + mask |= 2; + } + mlxsw_cmd_mbox_config_profile_swid_config_mask_set(mbox, index, mask); +} + +static int +mlxsw_pci_profile_get_kvd_sizes(const struct mlxsw_pci *mlxsw_pci, + const struct mlxsw_config_profile *profile, + struct mlxsw_res *res) +{ + u64 single_size, double_size, linear_size; + int err; + + err = mlxsw_core_kvd_sizes_get(mlxsw_pci->core, profile, + &single_size, &double_size, + &linear_size); + if (err) + return err; + + MLXSW_RES_SET(res, KVD_SINGLE_SIZE, single_size); + MLXSW_RES_SET(res, KVD_DOUBLE_SIZE, double_size); + MLXSW_RES_SET(res, KVD_LINEAR_SIZE, linear_size); + + return 0; +} + +static int mlxsw_pci_config_profile(struct mlxsw_pci *mlxsw_pci, char *mbox, + const struct mlxsw_config_profile *profile, + struct mlxsw_res *res) +{ + int i; + int err; + + mlxsw_cmd_mbox_zero(mbox); + + if (profile->used_max_vepa_channels) { + mlxsw_cmd_mbox_config_profile_set_max_vepa_channels_set( + mbox, 1); + mlxsw_cmd_mbox_config_profile_max_vepa_channels_set( + mbox, profile->max_vepa_channels); + } + if (profile->used_max_lag) { + mlxsw_cmd_mbox_config_profile_set_max_lag_set(mbox, 1); + mlxsw_cmd_mbox_config_profile_max_lag_set(mbox, + profile->max_lag); + } + if (profile->used_max_mid) { + mlxsw_cmd_mbox_config_profile_set_max_mid_set( + mbox, 1); + mlxsw_cmd_mbox_config_profile_max_mid_set( + mbox, profile->max_mid); + } + if (profile->used_max_pgt) { + mlxsw_cmd_mbox_config_profile_set_max_pgt_set( + mbox, 1); + mlxsw_cmd_mbox_config_profile_max_pgt_set( + mbox, profile->max_pgt); + } + if (profile->used_max_system_port) { + mlxsw_cmd_mbox_config_profile_set_max_system_port_set( + mbox, 1); + mlxsw_cmd_mbox_config_profile_max_system_port_set( + mbox, profile->max_system_port); + } + if (profile->used_max_vlan_groups) { + mlxsw_cmd_mbox_config_profile_set_max_vlan_groups_set( + mbox, 1); + mlxsw_cmd_mbox_config_profile_max_vlan_groups_set( + mbox, profile->max_vlan_groups); + } + if (profile->used_max_regions) { + mlxsw_cmd_mbox_config_profile_set_max_regions_set( + mbox, 1); + mlxsw_cmd_mbox_config_profile_max_regions_set( + mbox, profile->max_regions); + } + if (profile->used_flood_tables) { + mlxsw_cmd_mbox_config_profile_set_flood_tables_set( + mbox, 1); + mlxsw_cmd_mbox_config_profile_max_flood_tables_set( + mbox, profile->max_flood_tables); + mlxsw_cmd_mbox_config_profile_max_vid_flood_tables_set( + mbox, profile->max_vid_flood_tables); + mlxsw_cmd_mbox_config_profile_max_fid_offset_flood_tables_set( + mbox, profile->max_fid_offset_flood_tables); + mlxsw_cmd_mbox_config_profile_fid_offset_flood_table_size_set( + mbox, profile->fid_offset_flood_table_size); + mlxsw_cmd_mbox_config_profile_max_fid_flood_tables_set( + mbox, profile->max_fid_flood_tables); + mlxsw_cmd_mbox_config_profile_fid_flood_table_size_set( + mbox, profile->fid_flood_table_size); + } + if (profile->used_flood_mode) { + mlxsw_cmd_mbox_config_profile_set_flood_mode_set( + mbox, 1); + mlxsw_cmd_mbox_config_profile_flood_mode_set( + mbox, profile->flood_mode); + } + if (profile->used_max_ib_mc) { + mlxsw_cmd_mbox_config_profile_set_max_ib_mc_set( + mbox, 1); + mlxsw_cmd_mbox_config_profile_max_ib_mc_set( + mbox, profile->max_ib_mc); + } + if (profile->used_max_pkey) { + mlxsw_cmd_mbox_config_profile_set_max_pkey_set( + mbox, 1); + mlxsw_cmd_mbox_config_profile_max_pkey_set( + mbox, profile->max_pkey); + } + if (profile->used_ar_sec) { + mlxsw_cmd_mbox_config_profile_set_ar_sec_set( + mbox, 1); + mlxsw_cmd_mbox_config_profile_ar_sec_set( + mbox, profile->ar_sec); + } + if (profile->used_adaptive_routing_group_cap) { + mlxsw_cmd_mbox_config_profile_set_adaptive_routing_group_cap_set( + mbox, 1); + mlxsw_cmd_mbox_config_profile_adaptive_routing_group_cap_set( + mbox, profile->adaptive_routing_group_cap); + } + if (profile->used_ubridge) { + mlxsw_cmd_mbox_config_profile_set_ubridge_set(mbox, 1); + mlxsw_cmd_mbox_config_profile_ubridge_set(mbox, + profile->ubridge); + } + if (profile->used_kvd_sizes && MLXSW_RES_VALID(res, KVD_SIZE)) { + err = mlxsw_pci_profile_get_kvd_sizes(mlxsw_pci, profile, res); + if (err) + return err; + + mlxsw_cmd_mbox_config_profile_set_kvd_linear_size_set(mbox, 1); + mlxsw_cmd_mbox_config_profile_kvd_linear_size_set(mbox, + MLXSW_RES_GET(res, KVD_LINEAR_SIZE)); + mlxsw_cmd_mbox_config_profile_set_kvd_hash_single_size_set(mbox, + 1); + mlxsw_cmd_mbox_config_profile_kvd_hash_single_size_set(mbox, + MLXSW_RES_GET(res, KVD_SINGLE_SIZE)); + mlxsw_cmd_mbox_config_profile_set_kvd_hash_double_size_set( + mbox, 1); + mlxsw_cmd_mbox_config_profile_kvd_hash_double_size_set(mbox, + MLXSW_RES_GET(res, KVD_DOUBLE_SIZE)); + } + + for (i = 0; i < MLXSW_CONFIG_PROFILE_SWID_COUNT; i++) + mlxsw_pci_config_profile_swid_config(mlxsw_pci, mbox, i, + &profile->swid_config[i]); + + if (mlxsw_pci->max_cqe_ver > MLXSW_PCI_CQE_V0) { + mlxsw_cmd_mbox_config_profile_set_cqe_version_set(mbox, 1); + mlxsw_cmd_mbox_config_profile_cqe_version_set(mbox, 1); + } + + if (profile->used_cqe_time_stamp_type) { + mlxsw_cmd_mbox_config_profile_set_cqe_time_stamp_type_set(mbox, + 1); + mlxsw_cmd_mbox_config_profile_cqe_time_stamp_type_set(mbox, + profile->cqe_time_stamp_type); + } + + return mlxsw_cmd_config_profile_set(mlxsw_pci->core, mbox); +} + +static int mlxsw_pci_boardinfo(struct mlxsw_pci *mlxsw_pci, char *mbox) +{ + struct mlxsw_bus_info *bus_info = &mlxsw_pci->bus_info; + int err; + + mlxsw_cmd_mbox_zero(mbox); + err = mlxsw_cmd_boardinfo(mlxsw_pci->core, mbox); + if (err) + return err; + mlxsw_cmd_mbox_boardinfo_vsd_memcpy_from(mbox, bus_info->vsd); + mlxsw_cmd_mbox_boardinfo_psid_memcpy_from(mbox, bus_info->psid); + return 0; +} + +static int mlxsw_pci_fw_area_init(struct mlxsw_pci *mlxsw_pci, char *mbox, + u16 num_pages) +{ + struct mlxsw_pci_mem_item *mem_item; + int nent = 0; + int i; + int err; + + mlxsw_pci->fw_area.items = kcalloc(num_pages, sizeof(*mem_item), + GFP_KERNEL); + if (!mlxsw_pci->fw_area.items) + return -ENOMEM; + mlxsw_pci->fw_area.count = num_pages; + + mlxsw_cmd_mbox_zero(mbox); + for (i = 0; i < num_pages; i++) { + mem_item = &mlxsw_pci->fw_area.items[i]; + + mem_item->size = MLXSW_PCI_PAGE_SIZE; + mem_item->buf = dma_alloc_coherent(&mlxsw_pci->pdev->dev, + mem_item->size, + &mem_item->mapaddr, GFP_KERNEL); + if (!mem_item->buf) { + err = -ENOMEM; + goto err_alloc; + } + mlxsw_cmd_mbox_map_fa_pa_set(mbox, nent, mem_item->mapaddr); + mlxsw_cmd_mbox_map_fa_log2size_set(mbox, nent, 0); /* 1 page */ + if (++nent == MLXSW_CMD_MAP_FA_VPM_ENTRIES_MAX) { + err = mlxsw_cmd_map_fa(mlxsw_pci->core, mbox, nent); + if (err) + goto err_cmd_map_fa; + nent = 0; + mlxsw_cmd_mbox_zero(mbox); + } + } + + if (nent) { + err = mlxsw_cmd_map_fa(mlxsw_pci->core, mbox, nent); + if (err) + goto err_cmd_map_fa; + } + + return 0; + +err_cmd_map_fa: +err_alloc: + for (i--; i >= 0; i--) { + mem_item = &mlxsw_pci->fw_area.items[i]; + + dma_free_coherent(&mlxsw_pci->pdev->dev, mem_item->size, + mem_item->buf, mem_item->mapaddr); + } + kfree(mlxsw_pci->fw_area.items); + return err; +} + +static void mlxsw_pci_fw_area_fini(struct mlxsw_pci *mlxsw_pci) +{ + struct mlxsw_pci_mem_item *mem_item; + int i; + + mlxsw_cmd_unmap_fa(mlxsw_pci->core); + + for (i = 0; i < mlxsw_pci->fw_area.count; i++) { + mem_item = &mlxsw_pci->fw_area.items[i]; + + dma_free_coherent(&mlxsw_pci->pdev->dev, mem_item->size, + mem_item->buf, mem_item->mapaddr); + } + kfree(mlxsw_pci->fw_area.items); +} + +static irqreturn_t mlxsw_pci_eq_irq_handler(int irq, void *dev_id) +{ + struct mlxsw_pci *mlxsw_pci = dev_id; + struct mlxsw_pci_queue *q; + int i; + + for (i = 0; i < MLXSW_PCI_EQS_COUNT; i++) { + q = mlxsw_pci_eq_get(mlxsw_pci, i); + mlxsw_pci_queue_tasklet_schedule(q); + } + return IRQ_HANDLED; +} + +static int mlxsw_pci_mbox_alloc(struct mlxsw_pci *mlxsw_pci, + struct mlxsw_pci_mem_item *mbox) +{ + struct pci_dev *pdev = mlxsw_pci->pdev; + int err = 0; + + mbox->size = MLXSW_CMD_MBOX_SIZE; + mbox->buf = dma_alloc_coherent(&pdev->dev, MLXSW_CMD_MBOX_SIZE, + &mbox->mapaddr, GFP_KERNEL); + if (!mbox->buf) { + dev_err(&pdev->dev, "Failed allocating memory for mailbox\n"); + err = -ENOMEM; + } + + return err; +} + +static void mlxsw_pci_mbox_free(struct mlxsw_pci *mlxsw_pci, + struct mlxsw_pci_mem_item *mbox) +{ + struct pci_dev *pdev = mlxsw_pci->pdev; + + dma_free_coherent(&pdev->dev, MLXSW_CMD_MBOX_SIZE, mbox->buf, + mbox->mapaddr); +} + +static int mlxsw_pci_sys_ready_wait(struct mlxsw_pci *mlxsw_pci, + const struct pci_device_id *id, + u32 *p_sys_status) +{ + unsigned long end; + u32 val; + + /* We must wait for the HW to become responsive. */ + msleep(MLXSW_PCI_SW_RESET_WAIT_MSECS); + + end = jiffies + msecs_to_jiffies(MLXSW_PCI_SW_RESET_TIMEOUT_MSECS); + do { + val = mlxsw_pci_read32(mlxsw_pci, FW_READY); + if ((val & MLXSW_PCI_FW_READY_MASK) == MLXSW_PCI_FW_READY_MAGIC) + return 0; + cond_resched(); + } while (time_before(jiffies, end)); + + *p_sys_status = val & MLXSW_PCI_FW_READY_MASK; + + return -EBUSY; +} + +static int mlxsw_pci_sw_reset(struct mlxsw_pci *mlxsw_pci, + const struct pci_device_id *id) +{ + struct pci_dev *pdev = mlxsw_pci->pdev; + char mrsr_pl[MLXSW_REG_MRSR_LEN]; + u32 sys_status; + int err; + + err = mlxsw_pci_sys_ready_wait(mlxsw_pci, id, &sys_status); + if (err) { + dev_err(&pdev->dev, "Failed to reach system ready status before reset. Status is 0x%x\n", + sys_status); + return err; + } + + mlxsw_reg_mrsr_pack(mrsr_pl); + err = mlxsw_reg_write(mlxsw_pci->core, MLXSW_REG(mrsr), mrsr_pl); + if (err) + return err; + + err = mlxsw_pci_sys_ready_wait(mlxsw_pci, id, &sys_status); + if (err) { + dev_err(&pdev->dev, "Failed to reach system ready status after reset. Status is 0x%x\n", + sys_status); + return err; + } + + return 0; +} + +static int mlxsw_pci_alloc_irq_vectors(struct mlxsw_pci *mlxsw_pci) +{ + int err; + + err = pci_alloc_irq_vectors(mlxsw_pci->pdev, 1, 1, PCI_IRQ_MSIX); + if (err < 0) + dev_err(&mlxsw_pci->pdev->dev, "MSI-X init failed\n"); + return err; +} + +static void mlxsw_pci_free_irq_vectors(struct mlxsw_pci *mlxsw_pci) +{ + pci_free_irq_vectors(mlxsw_pci->pdev); +} + +static int mlxsw_pci_init(void *bus_priv, struct mlxsw_core *mlxsw_core, + const struct mlxsw_config_profile *profile, + struct mlxsw_res *res) +{ + struct mlxsw_pci *mlxsw_pci = bus_priv; + struct pci_dev *pdev = mlxsw_pci->pdev; + char *mbox; + u16 num_pages; + int err; + + mlxsw_pci->core = mlxsw_core; + + mbox = mlxsw_cmd_mbox_alloc(); + if (!mbox) + return -ENOMEM; + + err = mlxsw_pci_sw_reset(mlxsw_pci, mlxsw_pci->id); + if (err) + goto err_sw_reset; + + err = mlxsw_pci_alloc_irq_vectors(mlxsw_pci); + if (err < 0) { + dev_err(&pdev->dev, "MSI-X init failed\n"); + goto err_alloc_irq; + } + + err = mlxsw_cmd_query_fw(mlxsw_core, mbox); + if (err) + goto err_query_fw; + + mlxsw_pci->bus_info.fw_rev.major = + mlxsw_cmd_mbox_query_fw_fw_rev_major_get(mbox); + mlxsw_pci->bus_info.fw_rev.minor = + mlxsw_cmd_mbox_query_fw_fw_rev_minor_get(mbox); + mlxsw_pci->bus_info.fw_rev.subminor = + mlxsw_cmd_mbox_query_fw_fw_rev_subminor_get(mbox); + + if (mlxsw_cmd_mbox_query_fw_cmd_interface_rev_get(mbox) != 1) { + dev_err(&pdev->dev, "Unsupported cmd interface revision ID queried from hw\n"); + err = -EINVAL; + goto err_iface_rev; + } + if (mlxsw_cmd_mbox_query_fw_doorbell_page_bar_get(mbox) != 0) { + dev_err(&pdev->dev, "Unsupported doorbell page bar queried from hw\n"); + err = -EINVAL; + goto err_doorbell_page_bar; + } + + mlxsw_pci->doorbell_offset = + mlxsw_cmd_mbox_query_fw_doorbell_page_offset_get(mbox); + + if (mlxsw_cmd_mbox_query_fw_fr_rn_clk_bar_get(mbox) != 0) { + dev_err(&pdev->dev, "Unsupported free running clock BAR queried from hw\n"); + err = -EINVAL; + goto err_fr_rn_clk_bar; + } + + mlxsw_pci->free_running_clock_offset = + mlxsw_cmd_mbox_query_fw_free_running_clock_offset_get(mbox); + + if (mlxsw_cmd_mbox_query_fw_utc_sec_bar_get(mbox) != 0) { + dev_err(&pdev->dev, "Unsupported UTC sec BAR queried from hw\n"); + err = -EINVAL; + goto err_utc_sec_bar; + } + + mlxsw_pci->utc_sec_offset = + mlxsw_cmd_mbox_query_fw_utc_sec_offset_get(mbox); + + if (mlxsw_cmd_mbox_query_fw_utc_nsec_bar_get(mbox) != 0) { + dev_err(&pdev->dev, "Unsupported UTC nsec BAR queried from hw\n"); + err = -EINVAL; + goto err_utc_nsec_bar; + } + + mlxsw_pci->utc_nsec_offset = + mlxsw_cmd_mbox_query_fw_utc_nsec_offset_get(mbox); + + num_pages = mlxsw_cmd_mbox_query_fw_fw_pages_get(mbox); + err = mlxsw_pci_fw_area_init(mlxsw_pci, mbox, num_pages); + if (err) + goto err_fw_area_init; + + err = mlxsw_pci_boardinfo(mlxsw_pci, mbox); + if (err) + goto err_boardinfo; + + err = mlxsw_core_resources_query(mlxsw_core, mbox, res); + if (err) + goto err_query_resources; + + if (MLXSW_CORE_RES_VALID(mlxsw_core, CQE_V2) && + MLXSW_CORE_RES_GET(mlxsw_core, CQE_V2)) + mlxsw_pci->max_cqe_ver = MLXSW_PCI_CQE_V2; + else if (MLXSW_CORE_RES_VALID(mlxsw_core, CQE_V1) && + MLXSW_CORE_RES_GET(mlxsw_core, CQE_V1)) + mlxsw_pci->max_cqe_ver = MLXSW_PCI_CQE_V1; + else if ((MLXSW_CORE_RES_VALID(mlxsw_core, CQE_V0) && + MLXSW_CORE_RES_GET(mlxsw_core, CQE_V0)) || + !MLXSW_CORE_RES_VALID(mlxsw_core, CQE_V0)) { + mlxsw_pci->max_cqe_ver = MLXSW_PCI_CQE_V0; + } else { + dev_err(&pdev->dev, "Invalid supported CQE version combination reported\n"); + goto err_cqe_v_check; + } + + err = mlxsw_pci_config_profile(mlxsw_pci, mbox, profile, res); + if (err) + goto err_config_profile; + + /* Some resources depend on unified bridge model, which is configured + * as part of config_profile. Query the resources again to get correct + * values. + */ + err = mlxsw_core_resources_query(mlxsw_core, mbox, res); + if (err) + goto err_requery_resources; + + err = mlxsw_pci_aqs_init(mlxsw_pci, mbox); + if (err) + goto err_aqs_init; + + err = request_irq(pci_irq_vector(pdev, 0), + mlxsw_pci_eq_irq_handler, 0, + mlxsw_pci->bus_info.device_kind, mlxsw_pci); + if (err) { + dev_err(&pdev->dev, "IRQ request failed\n"); + goto err_request_eq_irq; + } + + goto mbox_put; + +err_request_eq_irq: + mlxsw_pci_aqs_fini(mlxsw_pci); +err_aqs_init: +err_requery_resources: +err_config_profile: +err_cqe_v_check: +err_query_resources: +err_boardinfo: + mlxsw_pci_fw_area_fini(mlxsw_pci); +err_fw_area_init: +err_utc_nsec_bar: +err_utc_sec_bar: +err_fr_rn_clk_bar: +err_doorbell_page_bar: +err_iface_rev: +err_query_fw: + mlxsw_pci_free_irq_vectors(mlxsw_pci); +err_alloc_irq: +err_sw_reset: +mbox_put: + mlxsw_cmd_mbox_free(mbox); + return err; +} + +static void mlxsw_pci_fini(void *bus_priv) +{ + struct mlxsw_pci *mlxsw_pci = bus_priv; + + free_irq(pci_irq_vector(mlxsw_pci->pdev, 0), mlxsw_pci); + mlxsw_pci_aqs_fini(mlxsw_pci); + mlxsw_pci_fw_area_fini(mlxsw_pci); + mlxsw_pci_free_irq_vectors(mlxsw_pci); +} + +static struct mlxsw_pci_queue * +mlxsw_pci_sdq_pick(struct mlxsw_pci *mlxsw_pci, + const struct mlxsw_tx_info *tx_info) +{ + u8 ctl_sdq_count = mlxsw_pci_sdq_count(mlxsw_pci) - 1; + u8 sdqn; + + if (tx_info->is_emad) { + sdqn = MLXSW_PCI_SDQ_EMAD_INDEX; + } else { + BUILD_BUG_ON(MLXSW_PCI_SDQ_EMAD_INDEX != 0); + sdqn = 1 + (tx_info->local_port % ctl_sdq_count); + } + + return mlxsw_pci_sdq_get(mlxsw_pci, sdqn); +} + +static bool mlxsw_pci_skb_transmit_busy(void *bus_priv, + const struct mlxsw_tx_info *tx_info) +{ + struct mlxsw_pci *mlxsw_pci = bus_priv; + struct mlxsw_pci_queue *q = mlxsw_pci_sdq_pick(mlxsw_pci, tx_info); + + return !mlxsw_pci_queue_elem_info_producer_get(q); +} + +static int mlxsw_pci_skb_transmit(void *bus_priv, struct sk_buff *skb, + const struct mlxsw_tx_info *tx_info) +{ + struct mlxsw_pci *mlxsw_pci = bus_priv; + struct mlxsw_pci_queue *q; + struct mlxsw_pci_queue_elem_info *elem_info; + char *wqe; + int i; + int err; + + if (skb_shinfo(skb)->nr_frags > MLXSW_PCI_WQE_SG_ENTRIES - 1) { + err = skb_linearize(skb); + if (err) + return err; + } + + q = mlxsw_pci_sdq_pick(mlxsw_pci, tx_info); + spin_lock_bh(&q->lock); + elem_info = mlxsw_pci_queue_elem_info_producer_get(q); + if (!elem_info) { + /* queue is full */ + err = -EAGAIN; + goto unlock; + } + mlxsw_skb_cb(skb)->tx_info = *tx_info; + elem_info->u.sdq.skb = skb; + + wqe = elem_info->elem; + mlxsw_pci_wqe_c_set(wqe, 1); /* always report completion */ + mlxsw_pci_wqe_lp_set(wqe, 0); + mlxsw_pci_wqe_type_set(wqe, MLXSW_PCI_WQE_TYPE_ETHERNET); + + err = mlxsw_pci_wqe_frag_map(mlxsw_pci, wqe, 0, skb->data, + skb_headlen(skb), DMA_TO_DEVICE); + if (err) + goto unlock; + + for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { + const skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; + + err = mlxsw_pci_wqe_frag_map(mlxsw_pci, wqe, i + 1, + skb_frag_address(frag), + skb_frag_size(frag), + DMA_TO_DEVICE); + if (err) + goto unmap_frags; + } + + if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) + skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; + + /* Set unused sq entries byte count to zero. */ + for (i++; i < MLXSW_PCI_WQE_SG_ENTRIES; i++) + mlxsw_pci_wqe_byte_count_set(wqe, i, 0); + + /* Everything is set up, ring producer doorbell to get HW going */ + q->producer_counter++; + mlxsw_pci_queue_doorbell_producer_ring(mlxsw_pci, q); + + goto unlock; + +unmap_frags: + for (; i >= 0; i--) + mlxsw_pci_wqe_frag_unmap(mlxsw_pci, wqe, i, DMA_TO_DEVICE); +unlock: + spin_unlock_bh(&q->lock); + return err; +} + +static int mlxsw_pci_cmd_exec(void *bus_priv, u16 opcode, u8 opcode_mod, + u32 in_mod, bool out_mbox_direct, + char *in_mbox, size_t in_mbox_size, + char *out_mbox, size_t out_mbox_size, + u8 *p_status) +{ + struct mlxsw_pci *mlxsw_pci = bus_priv; + dma_addr_t in_mapaddr = 0, out_mapaddr = 0; + bool evreq = mlxsw_pci->cmd.nopoll; + unsigned long timeout = msecs_to_jiffies(MLXSW_PCI_CIR_TIMEOUT_MSECS); + bool *p_wait_done = &mlxsw_pci->cmd.wait_done; + int err; + + *p_status = MLXSW_CMD_STATUS_OK; + + err = mutex_lock_interruptible(&mlxsw_pci->cmd.lock); + if (err) + return err; + + if (in_mbox) { + memcpy(mlxsw_pci->cmd.in_mbox.buf, in_mbox, in_mbox_size); + in_mapaddr = mlxsw_pci->cmd.in_mbox.mapaddr; + } + mlxsw_pci_write32(mlxsw_pci, CIR_IN_PARAM_HI, upper_32_bits(in_mapaddr)); + mlxsw_pci_write32(mlxsw_pci, CIR_IN_PARAM_LO, lower_32_bits(in_mapaddr)); + + if (out_mbox) + out_mapaddr = mlxsw_pci->cmd.out_mbox.mapaddr; + mlxsw_pci_write32(mlxsw_pci, CIR_OUT_PARAM_HI, upper_32_bits(out_mapaddr)); + mlxsw_pci_write32(mlxsw_pci, CIR_OUT_PARAM_LO, lower_32_bits(out_mapaddr)); + + mlxsw_pci_write32(mlxsw_pci, CIR_IN_MODIFIER, in_mod); + mlxsw_pci_write32(mlxsw_pci, CIR_TOKEN, 0); + + *p_wait_done = false; + + wmb(); /* all needs to be written before we write control register */ + mlxsw_pci_write32(mlxsw_pci, CIR_CTRL, + MLXSW_PCI_CIR_CTRL_GO_BIT | + (evreq ? MLXSW_PCI_CIR_CTRL_EVREQ_BIT : 0) | + (opcode_mod << MLXSW_PCI_CIR_CTRL_OPCODE_MOD_SHIFT) | + opcode); + + if (!evreq) { + unsigned long end; + + end = jiffies + timeout; + do { + u32 ctrl = mlxsw_pci_read32(mlxsw_pci, CIR_CTRL); + + if (!(ctrl & MLXSW_PCI_CIR_CTRL_GO_BIT)) { + *p_wait_done = true; + *p_status = ctrl >> MLXSW_PCI_CIR_CTRL_STATUS_SHIFT; + break; + } + cond_resched(); + } while (time_before(jiffies, end)); + } else { + wait_event_timeout(mlxsw_pci->cmd.wait, *p_wait_done, timeout); + *p_status = mlxsw_pci->cmd.comp.status; + } + + err = 0; + if (*p_wait_done) { + if (*p_status) + err = -EIO; + } else { + err = -ETIMEDOUT; + } + + if (!err && out_mbox && out_mbox_direct) { + /* Some commands don't use output param as address to mailbox + * but they store output directly into registers. In that case, + * copy registers into mbox buffer. + */ + __be32 tmp; + + if (!evreq) { + tmp = cpu_to_be32(mlxsw_pci_read32(mlxsw_pci, + CIR_OUT_PARAM_HI)); + memcpy(out_mbox, &tmp, sizeof(tmp)); + tmp = cpu_to_be32(mlxsw_pci_read32(mlxsw_pci, + CIR_OUT_PARAM_LO)); + memcpy(out_mbox + sizeof(tmp), &tmp, sizeof(tmp)); + } + } else if (!err && out_mbox) { + memcpy(out_mbox, mlxsw_pci->cmd.out_mbox.buf, out_mbox_size); + } + + mutex_unlock(&mlxsw_pci->cmd.lock); + + return err; +} + +static u32 mlxsw_pci_read_frc_h(void *bus_priv) +{ + struct mlxsw_pci *mlxsw_pci = bus_priv; + u64 frc_offset_h; + + frc_offset_h = mlxsw_pci->free_running_clock_offset; + return mlxsw_pci_read32_off(mlxsw_pci, frc_offset_h); +} + +static u32 mlxsw_pci_read_frc_l(void *bus_priv) +{ + struct mlxsw_pci *mlxsw_pci = bus_priv; + u64 frc_offset_l; + + frc_offset_l = mlxsw_pci->free_running_clock_offset + 4; + return mlxsw_pci_read32_off(mlxsw_pci, frc_offset_l); +} + +static u32 mlxsw_pci_read_utc_sec(void *bus_priv) +{ + struct mlxsw_pci *mlxsw_pci = bus_priv; + + return mlxsw_pci_read32_off(mlxsw_pci, mlxsw_pci->utc_sec_offset); +} + +static u32 mlxsw_pci_read_utc_nsec(void *bus_priv) +{ + struct mlxsw_pci *mlxsw_pci = bus_priv; + + return mlxsw_pci_read32_off(mlxsw_pci, mlxsw_pci->utc_nsec_offset); +} + +static const struct mlxsw_bus mlxsw_pci_bus = { + .kind = "pci", + .init = mlxsw_pci_init, + .fini = mlxsw_pci_fini, + .skb_transmit_busy = mlxsw_pci_skb_transmit_busy, + .skb_transmit = mlxsw_pci_skb_transmit, + .cmd_exec = mlxsw_pci_cmd_exec, + .read_frc_h = mlxsw_pci_read_frc_h, + .read_frc_l = mlxsw_pci_read_frc_l, + .read_utc_sec = mlxsw_pci_read_utc_sec, + .read_utc_nsec = mlxsw_pci_read_utc_nsec, + .features = MLXSW_BUS_F_TXRX | MLXSW_BUS_F_RESET, +}; + +static int mlxsw_pci_cmd_init(struct mlxsw_pci *mlxsw_pci) +{ + int err; + + mutex_init(&mlxsw_pci->cmd.lock); + init_waitqueue_head(&mlxsw_pci->cmd.wait); + + err = mlxsw_pci_mbox_alloc(mlxsw_pci, &mlxsw_pci->cmd.in_mbox); + if (err) + goto err_in_mbox_alloc; + + err = mlxsw_pci_mbox_alloc(mlxsw_pci, &mlxsw_pci->cmd.out_mbox); + if (err) + goto err_out_mbox_alloc; + + return 0; + +err_out_mbox_alloc: + mlxsw_pci_mbox_free(mlxsw_pci, &mlxsw_pci->cmd.in_mbox); +err_in_mbox_alloc: + mutex_destroy(&mlxsw_pci->cmd.lock); + return err; +} + +static void mlxsw_pci_cmd_fini(struct mlxsw_pci *mlxsw_pci) +{ + mlxsw_pci_mbox_free(mlxsw_pci, &mlxsw_pci->cmd.out_mbox); + mlxsw_pci_mbox_free(mlxsw_pci, &mlxsw_pci->cmd.in_mbox); + mutex_destroy(&mlxsw_pci->cmd.lock); +} + +static int mlxsw_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) +{ + const char *driver_name = dev_driver_string(&pdev->dev); + struct mlxsw_pci *mlxsw_pci; + int err; + + mlxsw_pci = kzalloc(sizeof(*mlxsw_pci), GFP_KERNEL); + if (!mlxsw_pci) + return -ENOMEM; + + err = pci_enable_device(pdev); + if (err) { + dev_err(&pdev->dev, "pci_enable_device failed\n"); + goto err_pci_enable_device; + } + + err = pci_request_regions(pdev, driver_name); + if (err) { + dev_err(&pdev->dev, "pci_request_regions failed\n"); + goto err_pci_request_regions; + } + + err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); + if (err) { + err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32)); + if (err) { + dev_err(&pdev->dev, "dma_set_mask failed\n"); + goto err_pci_set_dma_mask; + } + } + + if (pci_resource_len(pdev, 0) < MLXSW_PCI_BAR0_SIZE) { + dev_err(&pdev->dev, "invalid PCI region size\n"); + err = -EINVAL; + goto err_pci_resource_len_check; + } + + mlxsw_pci->hw_addr = ioremap(pci_resource_start(pdev, 0), + pci_resource_len(pdev, 0)); + if (!mlxsw_pci->hw_addr) { + dev_err(&pdev->dev, "ioremap failed\n"); + err = -EIO; + goto err_ioremap; + } + pci_set_master(pdev); + + mlxsw_pci->pdev = pdev; + pci_set_drvdata(pdev, mlxsw_pci); + + err = mlxsw_pci_cmd_init(mlxsw_pci); + if (err) + goto err_pci_cmd_init; + + mlxsw_pci->bus_info.device_kind = driver_name; + mlxsw_pci->bus_info.device_name = pci_name(mlxsw_pci->pdev); + mlxsw_pci->bus_info.dev = &pdev->dev; + mlxsw_pci->bus_info.read_clock_capable = true; + mlxsw_pci->id = id; + + err = mlxsw_core_bus_device_register(&mlxsw_pci->bus_info, + &mlxsw_pci_bus, mlxsw_pci, false, + NULL, NULL); + if (err) { + dev_err(&pdev->dev, "cannot register bus device\n"); + goto err_bus_device_register; + } + + return 0; + +err_bus_device_register: + mlxsw_pci_cmd_fini(mlxsw_pci); +err_pci_cmd_init: + iounmap(mlxsw_pci->hw_addr); +err_ioremap: +err_pci_resource_len_check: +err_pci_set_dma_mask: + pci_release_regions(pdev); +err_pci_request_regions: + pci_disable_device(pdev); +err_pci_enable_device: + kfree(mlxsw_pci); + return err; +} + +static void mlxsw_pci_remove(struct pci_dev *pdev) +{ + struct mlxsw_pci *mlxsw_pci = pci_get_drvdata(pdev); + + mlxsw_core_bus_device_unregister(mlxsw_pci->core, false); + mlxsw_pci_cmd_fini(mlxsw_pci); + iounmap(mlxsw_pci->hw_addr); + pci_release_regions(mlxsw_pci->pdev); + pci_disable_device(mlxsw_pci->pdev); + kfree(mlxsw_pci); +} + +int mlxsw_pci_driver_register(struct pci_driver *pci_driver) +{ + pci_driver->probe = mlxsw_pci_probe; + pci_driver->remove = mlxsw_pci_remove; + pci_driver->shutdown = mlxsw_pci_remove; + return pci_register_driver(pci_driver); +} +EXPORT_SYMBOL(mlxsw_pci_driver_register); + +void mlxsw_pci_driver_unregister(struct pci_driver *pci_driver) +{ + pci_unregister_driver(pci_driver); +} +EXPORT_SYMBOL(mlxsw_pci_driver_unregister); + +static int __init mlxsw_pci_module_init(void) +{ + return 0; +} + +static void __exit mlxsw_pci_module_exit(void) +{ +} + +module_init(mlxsw_pci_module_init); +module_exit(mlxsw_pci_module_exit); + +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_AUTHOR("Jiri Pirko <jiri@mellanox.com>"); +MODULE_DESCRIPTION("Mellanox switch PCI interface driver"); diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.h b/drivers/net/ethernet/mellanox/mlxsw/pci.h new file mode 100644 index 000000000..cacc2f9fa --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/pci.h @@ -0,0 +1,34 @@ +/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 */ +/* Copyright (c) 2016-2018 Mellanox Technologies. All rights reserved */ + +#ifndef _MLXSW_PCI_H +#define _MLXSW_PCI_H + +#include <linux/pci.h> + +#define PCI_DEVICE_ID_MELLANOX_SPECTRUM 0xcb84 +#define PCI_DEVICE_ID_MELLANOX_SPECTRUM2 0xcf6c +#define PCI_DEVICE_ID_MELLANOX_SPECTRUM3 0xcf70 +#define PCI_DEVICE_ID_MELLANOX_SPECTRUM4 0xcf80 + +#if IS_ENABLED(CONFIG_MLXSW_PCI) + +int mlxsw_pci_driver_register(struct pci_driver *pci_driver); +void mlxsw_pci_driver_unregister(struct pci_driver *pci_driver); + +#else + +static inline int +mlxsw_pci_driver_register(struct pci_driver *pci_driver) +{ + return 0; +} + +static inline void +mlxsw_pci_driver_unregister(struct pci_driver *pci_driver) +{ +} + +#endif + +#endif diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h b/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h new file mode 100644 index 000000000..7cdf0ce24 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h @@ -0,0 +1,416 @@ +/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 */ +/* Copyright (c) 2015-2018 Mellanox Technologies. All rights reserved */ + +#ifndef _MLXSW_PCI_HW_H +#define _MLXSW_PCI_HW_H + +#include <linux/bitops.h> + +#include "item.h" + +#define MLXSW_PCI_BAR0_SIZE (1024 * 1024) /* 1MB */ +#define MLXSW_PCI_PAGE_SIZE 4096 + +#define MLXSW_PCI_CIR_BASE 0x71000 +#define MLXSW_PCI_CIR_IN_PARAM_HI MLXSW_PCI_CIR_BASE +#define MLXSW_PCI_CIR_IN_PARAM_LO (MLXSW_PCI_CIR_BASE + 0x04) +#define MLXSW_PCI_CIR_IN_MODIFIER (MLXSW_PCI_CIR_BASE + 0x08) +#define MLXSW_PCI_CIR_OUT_PARAM_HI (MLXSW_PCI_CIR_BASE + 0x0C) +#define MLXSW_PCI_CIR_OUT_PARAM_LO (MLXSW_PCI_CIR_BASE + 0x10) +#define MLXSW_PCI_CIR_TOKEN (MLXSW_PCI_CIR_BASE + 0x14) +#define MLXSW_PCI_CIR_CTRL (MLXSW_PCI_CIR_BASE + 0x18) +#define MLXSW_PCI_CIR_CTRL_GO_BIT BIT(23) +#define MLXSW_PCI_CIR_CTRL_EVREQ_BIT BIT(22) +#define MLXSW_PCI_CIR_CTRL_OPCODE_MOD_SHIFT 12 +#define MLXSW_PCI_CIR_CTRL_STATUS_SHIFT 24 +#define MLXSW_PCI_CIR_TIMEOUT_MSECS 1000 + +#define MLXSW_PCI_SW_RESET_TIMEOUT_MSECS 900000 +#define MLXSW_PCI_SW_RESET_WAIT_MSECS 400 +#define MLXSW_PCI_FW_READY 0xA1844 +#define MLXSW_PCI_FW_READY_MASK 0xFFFF +#define MLXSW_PCI_FW_READY_MAGIC 0x5E + +#define MLXSW_PCI_DOORBELL_SDQ_OFFSET 0x000 +#define MLXSW_PCI_DOORBELL_RDQ_OFFSET 0x200 +#define MLXSW_PCI_DOORBELL_CQ_OFFSET 0x400 +#define MLXSW_PCI_DOORBELL_EQ_OFFSET 0x600 +#define MLXSW_PCI_DOORBELL_ARM_CQ_OFFSET 0x800 +#define MLXSW_PCI_DOORBELL_ARM_EQ_OFFSET 0xA00 + +#define MLXSW_PCI_DOORBELL(offset, type_offset, num) \ + ((offset) + (type_offset) + (num) * 4) + +#define MLXSW_PCI_CQS_MAX 96 +#define MLXSW_PCI_EQS_COUNT 2 +#define MLXSW_PCI_EQ_ASYNC_NUM 0 +#define MLXSW_PCI_EQ_COMP_NUM 1 + +#define MLXSW_PCI_SDQS_MIN 2 /* EMAD and control traffic */ +#define MLXSW_PCI_SDQ_EMAD_INDEX 0 +#define MLXSW_PCI_SDQ_EMAD_TC 0 +#define MLXSW_PCI_SDQ_CTL_TC 3 + +#define MLXSW_PCI_AQ_PAGES 8 +#define MLXSW_PCI_AQ_SIZE (MLXSW_PCI_PAGE_SIZE * MLXSW_PCI_AQ_PAGES) +#define MLXSW_PCI_WQE_SIZE 32 /* 32 bytes per element */ +#define MLXSW_PCI_CQE01_SIZE 16 /* 16 bytes per element */ +#define MLXSW_PCI_CQE2_SIZE 32 /* 32 bytes per element */ +#define MLXSW_PCI_CQE_SIZE_MAX MLXSW_PCI_CQE2_SIZE +#define MLXSW_PCI_EQE_SIZE 16 /* 16 bytes per element */ +#define MLXSW_PCI_WQE_COUNT (MLXSW_PCI_AQ_SIZE / MLXSW_PCI_WQE_SIZE) +#define MLXSW_PCI_CQE01_COUNT (MLXSW_PCI_AQ_SIZE / MLXSW_PCI_CQE01_SIZE) +#define MLXSW_PCI_CQE2_COUNT (MLXSW_PCI_AQ_SIZE / MLXSW_PCI_CQE2_SIZE) +#define MLXSW_PCI_EQE_COUNT (MLXSW_PCI_AQ_SIZE / MLXSW_PCI_EQE_SIZE) +#define MLXSW_PCI_EQE_UPDATE_COUNT 0x80 + +#define MLXSW_PCI_WQE_SG_ENTRIES 3 +#define MLXSW_PCI_WQE_TYPE_ETHERNET 0xA + +/* pci_wqe_c + * If set it indicates that a completion should be reported upon + * execution of this descriptor. + */ +MLXSW_ITEM32(pci, wqe, c, 0x00, 31, 1); + +/* pci_wqe_lp + * Local Processing, set if packet should be processed by the local + * switch hardware: + * For Ethernet EMAD (Direct Route and non Direct Route) - + * must be set if packet destination is local device + * For InfiniBand CTL - must be set if packet destination is local device + * Otherwise it must be clear + * Local Process packets must not exceed the size of 2K (including payload + * and headers). + */ +MLXSW_ITEM32(pci, wqe, lp, 0x00, 30, 1); + +/* pci_wqe_type + * Packet type. + */ +MLXSW_ITEM32(pci, wqe, type, 0x00, 23, 4); + +/* pci_wqe_byte_count + * Size of i-th scatter/gather entry, 0 if entry is unused. + */ +MLXSW_ITEM16_INDEXED(pci, wqe, byte_count, 0x02, 0, 14, 0x02, 0x00, false); + +/* pci_wqe_address + * Physical address of i-th scatter/gather entry. + * Gather Entries must be 2Byte aligned. + */ +MLXSW_ITEM64_INDEXED(pci, wqe, address, 0x08, 0, 64, 0x8, 0x0, false); + +enum mlxsw_pci_cqe_v { + MLXSW_PCI_CQE_V0, + MLXSW_PCI_CQE_V1, + MLXSW_PCI_CQE_V2, +}; + +#define mlxsw_pci_cqe_item_helpers(name, v0, v1, v2) \ +static inline u32 mlxsw_pci_cqe_##name##_get(enum mlxsw_pci_cqe_v v, char *cqe) \ +{ \ + switch (v) { \ + default: \ + case MLXSW_PCI_CQE_V0: \ + return mlxsw_pci_cqe##v0##_##name##_get(cqe); \ + case MLXSW_PCI_CQE_V1: \ + return mlxsw_pci_cqe##v1##_##name##_get(cqe); \ + case MLXSW_PCI_CQE_V2: \ + return mlxsw_pci_cqe##v2##_##name##_get(cqe); \ + } \ +} \ +static inline void mlxsw_pci_cqe_##name##_set(enum mlxsw_pci_cqe_v v, \ + char *cqe, u32 val) \ +{ \ + switch (v) { \ + default: \ + case MLXSW_PCI_CQE_V0: \ + mlxsw_pci_cqe##v0##_##name##_set(cqe, val); \ + break; \ + case MLXSW_PCI_CQE_V1: \ + mlxsw_pci_cqe##v1##_##name##_set(cqe, val); \ + break; \ + case MLXSW_PCI_CQE_V2: \ + mlxsw_pci_cqe##v2##_##name##_set(cqe, val); \ + break; \ + } \ +} + +/* pci_cqe_lag + * Packet arrives from a port which is a LAG + */ +MLXSW_ITEM32(pci, cqe0, lag, 0x00, 23, 1); +MLXSW_ITEM32(pci, cqe12, lag, 0x00, 24, 1); +mlxsw_pci_cqe_item_helpers(lag, 0, 12, 12); + +/* pci_cqe_system_port/lag_id + * When lag=0: System port on which the packet was received + * When lag=1: + * bits [15:4] LAG ID on which the packet was received + * bits [3:0] sub_port on which the packet was received + */ +MLXSW_ITEM32(pci, cqe, system_port, 0x00, 0, 16); +MLXSW_ITEM32(pci, cqe0, lag_id, 0x00, 4, 12); +MLXSW_ITEM32(pci, cqe12, lag_id, 0x00, 0, 16); +mlxsw_pci_cqe_item_helpers(lag_id, 0, 12, 12); +MLXSW_ITEM32(pci, cqe0, lag_subport, 0x00, 0, 4); +MLXSW_ITEM32(pci, cqe12, lag_subport, 0x00, 16, 8); +mlxsw_pci_cqe_item_helpers(lag_subport, 0, 12, 12); + +/* pci_cqe_wqe_counter + * WQE count of the WQEs completed on the associated dqn + */ +MLXSW_ITEM32(pci, cqe, wqe_counter, 0x04, 16, 16); + +/* pci_cqe_byte_count + * Byte count of received packets including additional two + * Reserved Bytes that are append to the end of the frame. + * Reserved for Send CQE. + */ +MLXSW_ITEM32(pci, cqe, byte_count, 0x04, 0, 14); + +#define MLXSW_PCI_CQE2_MIRROR_CONG_INVALID 0xFFFF + +/* pci_cqe_mirror_cong_high + * Congestion level in units of 8KB of the egress traffic class of the original + * packet that does mirroring to the CPU. Value of 0xFFFF means that the + * congestion level is invalid. + */ +MLXSW_ITEM32(pci, cqe2, mirror_cong_high, 0x08, 16, 4); + +/* pci_cqe_trap_id + * Trap ID that captured the packet. + */ +MLXSW_ITEM32(pci, cqe, trap_id, 0x08, 0, 10); + +/* pci_cqe_crc + * Length include CRC. Indicates the length field includes + * the packet's CRC. + */ +MLXSW_ITEM32(pci, cqe0, crc, 0x0C, 8, 1); +MLXSW_ITEM32(pci, cqe12, crc, 0x0C, 9, 1); +mlxsw_pci_cqe_item_helpers(crc, 0, 12, 12); + +/* pci_cqe_e + * CQE with Error. + */ +MLXSW_ITEM32(pci, cqe0, e, 0x0C, 7, 1); +MLXSW_ITEM32(pci, cqe12, e, 0x00, 27, 1); +mlxsw_pci_cqe_item_helpers(e, 0, 12, 12); + +/* pci_cqe_sr + * 1 - Send Queue + * 0 - Receive Queue + */ +MLXSW_ITEM32(pci, cqe0, sr, 0x0C, 6, 1); +MLXSW_ITEM32(pci, cqe12, sr, 0x00, 26, 1); +mlxsw_pci_cqe_item_helpers(sr, 0, 12, 12); + +/* pci_cqe_dqn + * Descriptor Queue (DQ) Number. + */ +MLXSW_ITEM32(pci, cqe0, dqn, 0x0C, 1, 5); +MLXSW_ITEM32(pci, cqe12, dqn, 0x0C, 1, 6); +mlxsw_pci_cqe_item_helpers(dqn, 0, 12, 12); + +/* pci_cqe_time_stamp_low + * Time stamp of the CQE + * Format according to time_stamp_type: + * 0: uSec - 1.024uSec (default for devices which do not support + * time_stamp_type). Only bits 15:0 are valid + * 1: FRC - Free Running Clock - units of 1nSec + * 2: UTC - time_stamp[37:30] = Sec + * - time_stamp[29:0] = nSec + * 3: Mirror_UTC. UTC time stamp of the original packet that has + * MIRROR_SESSION traps + * - time_stamp[37:30] = Sec + * - time_stamp[29:0] = nSec + * Formats 0..2 are configured by + * CONFIG_PROFILE.cqe_time_stamp_type for PTP traps + * Format 3 is used for MIRROR_SESSION traps + * Note that Spectrum does not reveal FRC, UTC and Mirror_UTC + */ +MLXSW_ITEM32(pci, cqe2, time_stamp_low, 0x0C, 16, 16); + +#define MLXSW_PCI_CQE2_MIRROR_TCLASS_INVALID 0x1F + +/* pci_cqe_mirror_tclass + * The egress traffic class of the original packet that does mirroring to the + * CPU. Value of 0x1F means that the traffic class is invalid. + */ +MLXSW_ITEM32(pci, cqe2, mirror_tclass, 0x10, 27, 5); + +/* pci_cqe_tx_lag + * The Tx port of a packet that is mirrored / sampled to the CPU is a LAG. + */ +MLXSW_ITEM32(pci, cqe2, tx_lag, 0x10, 24, 1); + +/* pci_cqe_tx_lag_subport + * The port index within the LAG of a packet that is mirrored / sampled to the + * CPU. Reserved when tx_lag is 0. + */ +MLXSW_ITEM32(pci, cqe2, tx_lag_subport, 0x10, 16, 8); + +#define MLXSW_PCI_CQE2_TX_PORT_MULTI_PORT 0xFFFE +#define MLXSW_PCI_CQE2_TX_PORT_INVALID 0xFFFF + +/* pci_cqe_tx_lag_id + * The Tx LAG ID of the original packet that is mirrored / sampled to the CPU. + * Value of 0xFFFE means multi-port. Value fo 0xFFFF means that the Tx LAG ID + * is invalid. Reserved when tx_lag is 0. + */ +MLXSW_ITEM32(pci, cqe2, tx_lag_id, 0x10, 0, 16); + +/* pci_cqe_tx_system_port + * The Tx port of the original packet that is mirrored / sampled to the CPU. + * Value of 0xFFFE means multi-port. Value fo 0xFFFF means that the Tx port is + * invalid. Reserved when tx_lag is 1. + */ +MLXSW_ITEM32(pci, cqe2, tx_system_port, 0x10, 0, 16); + +/* pci_cqe_mirror_cong_low + * Congestion level in units of 8KB of the egress traffic class of the original + * packet that does mirroring to the CPU. Value of 0xFFFF means that the + * congestion level is invalid. + */ +MLXSW_ITEM32(pci, cqe2, mirror_cong_low, 0x14, 20, 12); + +#define MLXSW_PCI_CQE2_MIRROR_CONG_SHIFT 13 /* Units of 8KB. */ + +static inline u16 mlxsw_pci_cqe2_mirror_cong_get(const char *cqe) +{ + u16 cong_high = mlxsw_pci_cqe2_mirror_cong_high_get(cqe); + u16 cong_low = mlxsw_pci_cqe2_mirror_cong_low_get(cqe); + + return cong_high << 12 | cong_low; +} + +/* pci_cqe_user_def_val_orig_pkt_len + * When trap_id is an ACL: User defined value from policy engine action. + */ +MLXSW_ITEM32(pci, cqe2, user_def_val_orig_pkt_len, 0x14, 0, 20); + +/* pci_cqe_mirror_reason + * Mirror reason. + */ +MLXSW_ITEM32(pci, cqe2, mirror_reason, 0x18, 24, 8); + +enum mlxsw_pci_cqe_time_stamp_type { + MLXSW_PCI_CQE_TIME_STAMP_TYPE_USEC, + MLXSW_PCI_CQE_TIME_STAMP_TYPE_FRC, + MLXSW_PCI_CQE_TIME_STAMP_TYPE_UTC, + MLXSW_PCI_CQE_TIME_STAMP_TYPE_MIRROR_UTC, +}; + +/* pci_cqe_time_stamp_type + * Time stamp type: + * 0: uSec - 1.024uSec (default for devices which do not support + * time_stamp_type) + * 1: FRC - Free Running Clock - units of 1nSec + * 2: UTC + * 3: Mirror_UTC. UTC time stamp of the original packet that has + * MIRROR_SESSION traps + */ +MLXSW_ITEM32(pci, cqe2, time_stamp_type, 0x18, 22, 2); + +#define MLXSW_PCI_CQE2_MIRROR_LATENCY_INVALID 0xFFFFFF + +/* pci_cqe_time_stamp_high + * Time stamp of the CQE + * Format according to time_stamp_type: + * 0: uSec - 1.024uSec (default for devices which do not support + * time_stamp_type). Only bits 15:0 are valid + * 1: FRC - Free Running Clock - units of 1nSec + * 2: UTC - time_stamp[37:30] = Sec + * - time_stamp[29:0] = nSec + * 3: Mirror_UTC. UTC time stamp of the original packet that has + * MIRROR_SESSION traps + * - time_stamp[37:30] = Sec + * - time_stamp[29:0] = nSec + * Formats 0..2 are configured by + * CONFIG_PROFILE.cqe_time_stamp_type for PTP traps + * Format 3 is used for MIRROR_SESSION traps + * Note that Spectrum does not reveal FRC, UTC and Mirror_UTC + */ +MLXSW_ITEM32(pci, cqe2, time_stamp_high, 0x18, 0, 22); + +static inline u64 mlxsw_pci_cqe2_time_stamp_get(const char *cqe) +{ + u64 ts_high = mlxsw_pci_cqe2_time_stamp_high_get(cqe); + u64 ts_low = mlxsw_pci_cqe2_time_stamp_low_get(cqe); + + return ts_high << 16 | ts_low; +} + +static inline u8 mlxsw_pci_cqe2_time_stamp_sec_get(const char *cqe) +{ + u64 full_ts = mlxsw_pci_cqe2_time_stamp_get(cqe); + + return full_ts >> 30 & 0xFF; +} + +static inline u32 mlxsw_pci_cqe2_time_stamp_nsec_get(const char *cqe) +{ + u64 full_ts = mlxsw_pci_cqe2_time_stamp_get(cqe); + + return full_ts & 0x3FFFFFFF; +} + +/* pci_cqe_mirror_latency + * End-to-end latency of the original packet that does mirroring to the CPU. + * Value of 0xFFFFFF means that the latency is invalid. Units are according to + * MOGCR.mirror_latency_units. + */ +MLXSW_ITEM32(pci, cqe2, mirror_latency, 0x1C, 8, 24); + +/* pci_cqe_owner + * Ownership bit. + */ +MLXSW_ITEM32(pci, cqe01, owner, 0x0C, 0, 1); +MLXSW_ITEM32(pci, cqe2, owner, 0x1C, 0, 1); +mlxsw_pci_cqe_item_helpers(owner, 01, 01, 2); + +/* pci_eqe_event_type + * Event type. + */ +MLXSW_ITEM32(pci, eqe, event_type, 0x0C, 24, 8); +#define MLXSW_PCI_EQE_EVENT_TYPE_COMP 0x00 +#define MLXSW_PCI_EQE_EVENT_TYPE_CMD 0x0A + +/* pci_eqe_event_sub_type + * Event type. + */ +MLXSW_ITEM32(pci, eqe, event_sub_type, 0x0C, 16, 8); + +/* pci_eqe_cqn + * Completion Queue that triggered this EQE. + */ +MLXSW_ITEM32(pci, eqe, cqn, 0x0C, 8, 7); + +/* pci_eqe_owner + * Ownership bit. + */ +MLXSW_ITEM32(pci, eqe, owner, 0x0C, 0, 1); + +/* pci_eqe_cmd_token + * Command completion event - token + */ +MLXSW_ITEM32(pci, eqe, cmd_token, 0x00, 16, 16); + +/* pci_eqe_cmd_status + * Command completion event - status + */ +MLXSW_ITEM32(pci, eqe, cmd_status, 0x00, 0, 8); + +/* pci_eqe_cmd_out_param_h + * Command completion event - output parameter - higher part + */ +MLXSW_ITEM32(pci, eqe, cmd_out_param_h, 0x04, 0, 32); + +/* pci_eqe_cmd_out_param_l + * Command completion event - output parameter - lower part + */ +MLXSW_ITEM32(pci, eqe, cmd_out_param_l, 0x08, 0, 32); + +#endif diff --git a/drivers/net/ethernet/mellanox/mlxsw/port.h b/drivers/net/ethernet/mellanox/mlxsw/port.h new file mode 100644 index 000000000..ac4d4ea51 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/port.h @@ -0,0 +1,38 @@ +/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 */ +/* Copyright (c) 2015-2018 Mellanox Technologies. All rights reserved */ + +#ifndef _MLXSW_PORT_H +#define _MLXSW_PORT_H + +#include <linux/types.h> + +#define MLXSW_PORT_MAX_MTU 10000 + +#define MLXSW_PORT_DEFAULT_VID 1 + +#define MLXSW_PORT_SWID_DISABLED_PORT 255 +#define MLXSW_PORT_SWID_ALL_SWIDS 254 +#define MLXSW_PORT_SWID_TYPE_IB 1 +#define MLXSW_PORT_SWID_TYPE_ETH 2 + +#define MLXSW_PORT_MAX_IB_PHY_PORTS 36 +#define MLXSW_PORT_MAX_IB_PORTS (MLXSW_PORT_MAX_IB_PHY_PORTS + 1) + +#define MLXSW_PORT_CPU_PORT 0x0 + +#define MLXSW_PORT_DONT_CARE 0xFF + +enum mlxsw_port_admin_status { + MLXSW_PORT_ADMIN_STATUS_UP = 1, + MLXSW_PORT_ADMIN_STATUS_DOWN = 2, + MLXSW_PORT_ADMIN_STATUS_UP_ONCE = 3, + MLXSW_PORT_ADMIN_STATUS_DISABLED = 4, +}; + +enum mlxsw_reg_pude_oper_status { + MLXSW_PORT_OPER_STATUS_UP = 1, + MLXSW_PORT_OPER_STATUS_DOWN = 2, + MLXSW_PORT_OPER_STATUS_FAILURE = 4, /* Can be set to up again. */ +}; + +#endif /* _MLXSW_PORT_H */ diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h new file mode 100644 index 000000000..a34ff19c5 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h @@ -0,0 +1,12937 @@ +/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 */ +/* Copyright (c) 2015-2018 Mellanox Technologies. All rights reserved */ + +#ifndef _MLXSW_REG_H +#define _MLXSW_REG_H + +#include <linux/kernel.h> +#include <linux/string.h> +#include <linux/bitops.h> +#include <linux/if_vlan.h> + +#include "item.h" +#include "port.h" + +struct mlxsw_reg_info { + u16 id; + u16 len; /* In u8 */ + const char *name; +}; + +#define MLXSW_REG_DEFINE(_name, _id, _len) \ +static const struct mlxsw_reg_info mlxsw_reg_##_name = { \ + .id = _id, \ + .len = _len, \ + .name = #_name, \ +} + +#define MLXSW_REG(type) (&mlxsw_reg_##type) +#define MLXSW_REG_LEN(type) MLXSW_REG(type)->len +#define MLXSW_REG_ZERO(type, payload) memset(payload, 0, MLXSW_REG(type)->len) + +/* SGCR - Switch General Configuration Register + * -------------------------------------------- + * This register is used for configuration of the switch capabilities. + */ +#define MLXSW_REG_SGCR_ID 0x2000 +#define MLXSW_REG_SGCR_LEN 0x10 + +MLXSW_REG_DEFINE(sgcr, MLXSW_REG_SGCR_ID, MLXSW_REG_SGCR_LEN); + +/* reg_sgcr_llb + * Link Local Broadcast (Default=0) + * When set, all Link Local packets (224.0.0.X) will be treated as broadcast + * packets and ignore the IGMP snooping entries. + * Access: RW + */ +MLXSW_ITEM32(reg, sgcr, llb, 0x04, 0, 1); + +static inline void mlxsw_reg_sgcr_pack(char *payload, bool llb) +{ + MLXSW_REG_ZERO(sgcr, payload); + mlxsw_reg_sgcr_llb_set(payload, !!llb); +} + +/* SPAD - Switch Physical Address Register + * --------------------------------------- + * The SPAD register configures the switch physical MAC address. + */ +#define MLXSW_REG_SPAD_ID 0x2002 +#define MLXSW_REG_SPAD_LEN 0x10 + +MLXSW_REG_DEFINE(spad, MLXSW_REG_SPAD_ID, MLXSW_REG_SPAD_LEN); + +/* reg_spad_base_mac + * Base MAC address for the switch partitions. + * Per switch partition MAC address is equal to: + * base_mac + swid + * Access: RW + */ +MLXSW_ITEM_BUF(reg, spad, base_mac, 0x02, 6); + +/* SSPR - Switch System Port Record Register + * ----------------------------------------- + * Configures the system port to local port mapping. + */ +#define MLXSW_REG_SSPR_ID 0x2008 +#define MLXSW_REG_SSPR_LEN 0x8 + +MLXSW_REG_DEFINE(sspr, MLXSW_REG_SSPR_ID, MLXSW_REG_SSPR_LEN); + +/* reg_sspr_m + * Master - if set, then the record describes the master system port. + * This is needed in case a local port is mapped into several system ports + * (for multipathing). That number will be reported as the source system + * port when packets are forwarded to the CPU. Only one master port is allowed + * per local port. + * + * Note: Must be set for Spectrum. + * Access: RW + */ +MLXSW_ITEM32(reg, sspr, m, 0x00, 31, 1); + +/* reg_sspr_local_port + * Local port number. + * + * Access: RW + */ +MLXSW_ITEM32_LP(reg, sspr, 0x00, 16, 0x00, 12); + +/* reg_sspr_system_port + * Unique identifier within the stacking domain that represents all the ports + * that are available in the system (external ports). + * + * Currently, only single-ASIC configurations are supported, so we default to + * 1:1 mapping between system ports and local ports. + * Access: Index + */ +MLXSW_ITEM32(reg, sspr, system_port, 0x04, 0, 16); + +static inline void mlxsw_reg_sspr_pack(char *payload, u16 local_port) +{ + MLXSW_REG_ZERO(sspr, payload); + mlxsw_reg_sspr_m_set(payload, 1); + mlxsw_reg_sspr_local_port_set(payload, local_port); + mlxsw_reg_sspr_system_port_set(payload, local_port); +} + +/* SFDAT - Switch Filtering Database Aging Time + * -------------------------------------------- + * Controls the Switch aging time. Aging time is able to be set per Switch + * Partition. + */ +#define MLXSW_REG_SFDAT_ID 0x2009 +#define MLXSW_REG_SFDAT_LEN 0x8 + +MLXSW_REG_DEFINE(sfdat, MLXSW_REG_SFDAT_ID, MLXSW_REG_SFDAT_LEN); + +/* reg_sfdat_swid + * Switch partition ID. + * Access: Index + */ +MLXSW_ITEM32(reg, sfdat, swid, 0x00, 24, 8); + +/* reg_sfdat_age_time + * Aging time in seconds + * Min - 10 seconds + * Max - 1,000,000 seconds + * Default is 300 seconds. + * Access: RW + */ +MLXSW_ITEM32(reg, sfdat, age_time, 0x04, 0, 20); + +static inline void mlxsw_reg_sfdat_pack(char *payload, u32 age_time) +{ + MLXSW_REG_ZERO(sfdat, payload); + mlxsw_reg_sfdat_swid_set(payload, 0); + mlxsw_reg_sfdat_age_time_set(payload, age_time); +} + +/* SFD - Switch Filtering Database + * ------------------------------- + * The following register defines the access to the filtering database. + * The register supports querying, adding, removing and modifying the database. + * The access is optimized for bulk updates in which case more than one + * FDB record is present in the same command. + */ +#define MLXSW_REG_SFD_ID 0x200A +#define MLXSW_REG_SFD_BASE_LEN 0x10 /* base length, without records */ +#define MLXSW_REG_SFD_REC_LEN 0x10 /* record length */ +#define MLXSW_REG_SFD_REC_MAX_COUNT 64 +#define MLXSW_REG_SFD_LEN (MLXSW_REG_SFD_BASE_LEN + \ + MLXSW_REG_SFD_REC_LEN * MLXSW_REG_SFD_REC_MAX_COUNT) + +MLXSW_REG_DEFINE(sfd, MLXSW_REG_SFD_ID, MLXSW_REG_SFD_LEN); + +/* reg_sfd_swid + * Switch partition ID for queries. Reserved on Write. + * Access: Index + */ +MLXSW_ITEM32(reg, sfd, swid, 0x00, 24, 8); + +enum mlxsw_reg_sfd_op { + /* Dump entire FDB a (process according to record_locator) */ + MLXSW_REG_SFD_OP_QUERY_DUMP = 0, + /* Query records by {MAC, VID/FID} value */ + MLXSW_REG_SFD_OP_QUERY_QUERY = 1, + /* Query and clear activity. Query records by {MAC, VID/FID} value */ + MLXSW_REG_SFD_OP_QUERY_QUERY_AND_CLEAR_ACTIVITY = 2, + /* Test. Response indicates if each of the records could be + * added to the FDB. + */ + MLXSW_REG_SFD_OP_WRITE_TEST = 0, + /* Add/modify. Aged-out records cannot be added. This command removes + * the learning notification of the {MAC, VID/FID}. Response includes + * the entries that were added to the FDB. + */ + MLXSW_REG_SFD_OP_WRITE_EDIT = 1, + /* Remove record by {MAC, VID/FID}. This command also removes + * the learning notification and aged-out notifications + * of the {MAC, VID/FID}. The response provides current (pre-removal) + * entries as non-aged-out. + */ + MLXSW_REG_SFD_OP_WRITE_REMOVE = 2, + /* Remove learned notification by {MAC, VID/FID}. The response provides + * the removed learning notification. + */ + MLXSW_REG_SFD_OP_WRITE_REMOVE_NOTIFICATION = 2, +}; + +/* reg_sfd_op + * Operation. + * Access: OP + */ +MLXSW_ITEM32(reg, sfd, op, 0x04, 30, 2); + +/* reg_sfd_record_locator + * Used for querying the FDB. Use record_locator=0 to initiate the + * query. When a record is returned, a new record_locator is + * returned to be used in the subsequent query. + * Reserved for database update. + * Access: Index + */ +MLXSW_ITEM32(reg, sfd, record_locator, 0x04, 0, 30); + +/* reg_sfd_num_rec + * Request: Number of records to read/add/modify/remove + * Response: Number of records read/added/replaced/removed + * See above description for more details. + * Ranges 0..64 + * Access: RW + */ +MLXSW_ITEM32(reg, sfd, num_rec, 0x08, 0, 8); + +static inline void mlxsw_reg_sfd_pack(char *payload, enum mlxsw_reg_sfd_op op, + u32 record_locator) +{ + MLXSW_REG_ZERO(sfd, payload); + mlxsw_reg_sfd_op_set(payload, op); + mlxsw_reg_sfd_record_locator_set(payload, record_locator); +} + +/* reg_sfd_rec_swid + * Switch partition ID. + * Access: Index + */ +MLXSW_ITEM32_INDEXED(reg, sfd, rec_swid, MLXSW_REG_SFD_BASE_LEN, 24, 8, + MLXSW_REG_SFD_REC_LEN, 0x00, false); + +enum mlxsw_reg_sfd_rec_type { + MLXSW_REG_SFD_REC_TYPE_UNICAST = 0x0, + MLXSW_REG_SFD_REC_TYPE_UNICAST_LAG = 0x1, + MLXSW_REG_SFD_REC_TYPE_MULTICAST = 0x2, + MLXSW_REG_SFD_REC_TYPE_UNICAST_TUNNEL = 0xC, +}; + +/* reg_sfd_rec_type + * FDB record type. + * Access: RW + */ +MLXSW_ITEM32_INDEXED(reg, sfd, rec_type, MLXSW_REG_SFD_BASE_LEN, 20, 4, + MLXSW_REG_SFD_REC_LEN, 0x00, false); + +enum mlxsw_reg_sfd_rec_policy { + /* Replacement disabled, aging disabled. */ + MLXSW_REG_SFD_REC_POLICY_STATIC_ENTRY = 0, + /* (mlag remote): Replacement enabled, aging disabled, + * learning notification enabled on this port. + */ + MLXSW_REG_SFD_REC_POLICY_DYNAMIC_ENTRY_MLAG = 1, + /* (ingress device): Replacement enabled, aging enabled. */ + MLXSW_REG_SFD_REC_POLICY_DYNAMIC_ENTRY_INGRESS = 3, +}; + +/* reg_sfd_rec_policy + * Policy. + * Access: RW + */ +MLXSW_ITEM32_INDEXED(reg, sfd, rec_policy, MLXSW_REG_SFD_BASE_LEN, 18, 2, + MLXSW_REG_SFD_REC_LEN, 0x00, false); + +/* reg_sfd_rec_a + * Activity. Set for new static entries. Set for static entries if a frame SMAC + * lookup hits on the entry. + * To clear the a bit, use "query and clear activity" op. + * Access: RO + */ +MLXSW_ITEM32_INDEXED(reg, sfd, rec_a, MLXSW_REG_SFD_BASE_LEN, 16, 1, + MLXSW_REG_SFD_REC_LEN, 0x00, false); + +/* reg_sfd_rec_mac + * MAC address. + * Access: Index + */ +MLXSW_ITEM_BUF_INDEXED(reg, sfd, rec_mac, MLXSW_REG_SFD_BASE_LEN, 6, + MLXSW_REG_SFD_REC_LEN, 0x02); + +enum mlxsw_reg_sfd_rec_action { + /* forward */ + MLXSW_REG_SFD_REC_ACTION_NOP = 0, + /* forward and trap, trap_id is FDB_TRAP */ + MLXSW_REG_SFD_REC_ACTION_MIRROR_TO_CPU = 1, + /* trap and do not forward, trap_id is FDB_TRAP */ + MLXSW_REG_SFD_REC_ACTION_TRAP = 2, + /* forward to IP router */ + MLXSW_REG_SFD_REC_ACTION_FORWARD_IP_ROUTER = 3, + MLXSW_REG_SFD_REC_ACTION_DISCARD_ERROR = 15, +}; + +/* reg_sfd_rec_action + * Action to apply on the packet. + * Note: Dynamic entries can only be configured with NOP action. + * Access: RW + */ +MLXSW_ITEM32_INDEXED(reg, sfd, rec_action, MLXSW_REG_SFD_BASE_LEN, 28, 4, + MLXSW_REG_SFD_REC_LEN, 0x0C, false); + +/* reg_sfd_uc_sub_port + * VEPA channel on local port. + * Valid only if local port is a non-stacking port. Must be 0 if multichannel + * VEPA is not enabled. + * Access: RW + */ +MLXSW_ITEM32_INDEXED(reg, sfd, uc_sub_port, MLXSW_REG_SFD_BASE_LEN, 16, 8, + MLXSW_REG_SFD_REC_LEN, 0x08, false); + +/* reg_sfd_uc_set_vid + * Set VID. + * 0 - Do not update VID. + * 1 - Set VID. + * For Spectrum-2 when set_vid=0 and smpe_valid=1, the smpe will modify the vid. + * Access: RW + * + * Note: Reserved when legacy bridge model is used. + */ +MLXSW_ITEM32_INDEXED(reg, sfd, uc_set_vid, MLXSW_REG_SFD_BASE_LEN, 31, 1, + MLXSW_REG_SFD_REC_LEN, 0x08, false); + +/* reg_sfd_uc_fid_vid + * Filtering ID or VLAN ID + * For SwitchX and SwitchX-2: + * - Dynamic entries (policy 2,3) use FID + * - Static entries (policy 0) use VID + * - When independent learning is configured, VID=FID + * For Spectrum: use FID for both Dynamic and Static entries. + * VID should not be used. + * Access: Index + */ +MLXSW_ITEM32_INDEXED(reg, sfd, uc_fid_vid, MLXSW_REG_SFD_BASE_LEN, 0, 16, + MLXSW_REG_SFD_REC_LEN, 0x08, false); + +/* reg_sfd_uc_vid + * New VID when set_vid=1. + * Access: RW + * + * Note: Reserved when legacy bridge model is used and when set_vid=0. + */ +MLXSW_ITEM32_INDEXED(reg, sfd, uc_vid, MLXSW_REG_SFD_BASE_LEN, 16, 12, + MLXSW_REG_SFD_REC_LEN, 0x0C, false); + +/* reg_sfd_uc_system_port + * Unique port identifier for the final destination of the packet. + * Access: RW + */ +MLXSW_ITEM32_INDEXED(reg, sfd, uc_system_port, MLXSW_REG_SFD_BASE_LEN, 0, 16, + MLXSW_REG_SFD_REC_LEN, 0x0C, false); + +static inline void mlxsw_reg_sfd_rec_pack(char *payload, int rec_index, + enum mlxsw_reg_sfd_rec_type rec_type, + const char *mac, + enum mlxsw_reg_sfd_rec_action action) +{ + u8 num_rec = mlxsw_reg_sfd_num_rec_get(payload); + + if (rec_index >= num_rec) + mlxsw_reg_sfd_num_rec_set(payload, rec_index + 1); + mlxsw_reg_sfd_rec_swid_set(payload, rec_index, 0); + mlxsw_reg_sfd_rec_type_set(payload, rec_index, rec_type); + mlxsw_reg_sfd_rec_mac_memcpy_to(payload, rec_index, mac); + mlxsw_reg_sfd_rec_action_set(payload, rec_index, action); +} + +static inline void mlxsw_reg_sfd_uc_pack(char *payload, int rec_index, + enum mlxsw_reg_sfd_rec_policy policy, + const char *mac, u16 fid_vid, u16 vid, + enum mlxsw_reg_sfd_rec_action action, + u16 local_port) +{ + mlxsw_reg_sfd_rec_pack(payload, rec_index, + MLXSW_REG_SFD_REC_TYPE_UNICAST, mac, action); + mlxsw_reg_sfd_rec_policy_set(payload, rec_index, policy); + mlxsw_reg_sfd_uc_sub_port_set(payload, rec_index, 0); + mlxsw_reg_sfd_uc_fid_vid_set(payload, rec_index, fid_vid); + mlxsw_reg_sfd_uc_set_vid_set(payload, rec_index, vid ? true : false); + mlxsw_reg_sfd_uc_vid_set(payload, rec_index, vid); + mlxsw_reg_sfd_uc_system_port_set(payload, rec_index, local_port); +} + +/* reg_sfd_uc_lag_sub_port + * LAG sub port. + * Must be 0 if multichannel VEPA is not enabled. + * Access: RW + */ +MLXSW_ITEM32_INDEXED(reg, sfd, uc_lag_sub_port, MLXSW_REG_SFD_BASE_LEN, 16, 8, + MLXSW_REG_SFD_REC_LEN, 0x08, false); + +/* reg_sfd_uc_lag_set_vid + * Set VID. + * 0 - Do not update VID. + * 1 - Set VID. + * For Spectrum-2 when set_vid=0 and smpe_valid=1, the smpe will modify the vid. + * Access: RW + * + * Note: Reserved when legacy bridge model is used. + */ +MLXSW_ITEM32_INDEXED(reg, sfd, uc_lag_set_vid, MLXSW_REG_SFD_BASE_LEN, 31, 1, + MLXSW_REG_SFD_REC_LEN, 0x08, false); + +/* reg_sfd_uc_lag_fid_vid + * Filtering ID or VLAN ID + * For SwitchX and SwitchX-2: + * - Dynamic entries (policy 2,3) use FID + * - Static entries (policy 0) use VID + * - When independent learning is configured, VID=FID + * For Spectrum: use FID for both Dynamic and Static entries. + * VID should not be used. + * Access: Index + */ +MLXSW_ITEM32_INDEXED(reg, sfd, uc_lag_fid_vid, MLXSW_REG_SFD_BASE_LEN, 0, 16, + MLXSW_REG_SFD_REC_LEN, 0x08, false); + +/* reg_sfd_uc_lag_lag_vid + * New vlan ID. + * Access: RW + * + * Note: Reserved when legacy bridge model is used and set_vid=0. + */ +MLXSW_ITEM32_INDEXED(reg, sfd, uc_lag_lag_vid, MLXSW_REG_SFD_BASE_LEN, 16, 12, + MLXSW_REG_SFD_REC_LEN, 0x0C, false); + +/* reg_sfd_uc_lag_lag_id + * LAG Identifier - pointer into the LAG descriptor table. + * Access: RW + */ +MLXSW_ITEM32_INDEXED(reg, sfd, uc_lag_lag_id, MLXSW_REG_SFD_BASE_LEN, 0, 10, + MLXSW_REG_SFD_REC_LEN, 0x0C, false); + +static inline void +mlxsw_reg_sfd_uc_lag_pack(char *payload, int rec_index, + enum mlxsw_reg_sfd_rec_policy policy, + const char *mac, u16 fid_vid, + enum mlxsw_reg_sfd_rec_action action, u16 lag_vid, + u16 lag_id) +{ + mlxsw_reg_sfd_rec_pack(payload, rec_index, + MLXSW_REG_SFD_REC_TYPE_UNICAST_LAG, + mac, action); + mlxsw_reg_sfd_rec_policy_set(payload, rec_index, policy); + mlxsw_reg_sfd_uc_lag_sub_port_set(payload, rec_index, 0); + mlxsw_reg_sfd_uc_lag_fid_vid_set(payload, rec_index, fid_vid); + mlxsw_reg_sfd_uc_lag_set_vid_set(payload, rec_index, true); + mlxsw_reg_sfd_uc_lag_lag_vid_set(payload, rec_index, lag_vid); + mlxsw_reg_sfd_uc_lag_lag_id_set(payload, rec_index, lag_id); +} + +/* reg_sfd_mc_pgi + * + * Multicast port group index - index into the port group table. + * Value 0x1FFF indicates the pgi should point to the MID entry. + * For Spectrum this value must be set to 0x1FFF + * Access: RW + */ +MLXSW_ITEM32_INDEXED(reg, sfd, mc_pgi, MLXSW_REG_SFD_BASE_LEN, 16, 13, + MLXSW_REG_SFD_REC_LEN, 0x08, false); + +/* reg_sfd_mc_fid_vid + * + * Filtering ID or VLAN ID + * Access: Index + */ +MLXSW_ITEM32_INDEXED(reg, sfd, mc_fid_vid, MLXSW_REG_SFD_BASE_LEN, 0, 16, + MLXSW_REG_SFD_REC_LEN, 0x08, false); + +/* reg_sfd_mc_mid + * + * Multicast identifier - global identifier that represents the multicast + * group across all devices. + * Access: RW + */ +MLXSW_ITEM32_INDEXED(reg, sfd, mc_mid, MLXSW_REG_SFD_BASE_LEN, 0, 16, + MLXSW_REG_SFD_REC_LEN, 0x0C, false); + +static inline void +mlxsw_reg_sfd_mc_pack(char *payload, int rec_index, + const char *mac, u16 fid_vid, + enum mlxsw_reg_sfd_rec_action action, u16 mid) +{ + mlxsw_reg_sfd_rec_pack(payload, rec_index, + MLXSW_REG_SFD_REC_TYPE_MULTICAST, mac, action); + mlxsw_reg_sfd_mc_pgi_set(payload, rec_index, 0x1FFF); + mlxsw_reg_sfd_mc_fid_vid_set(payload, rec_index, fid_vid); + mlxsw_reg_sfd_mc_mid_set(payload, rec_index, mid); +} + +/* reg_sfd_uc_tunnel_uip_msb + * When protocol is IPv4, the most significant byte of the underlay IPv4 + * destination IP. + * When protocol is IPv6, reserved. + * Access: RW + */ +MLXSW_ITEM32_INDEXED(reg, sfd, uc_tunnel_uip_msb, MLXSW_REG_SFD_BASE_LEN, 24, + 8, MLXSW_REG_SFD_REC_LEN, 0x08, false); + +/* reg_sfd_uc_tunnel_fid + * Filtering ID. + * Access: Index + */ +MLXSW_ITEM32_INDEXED(reg, sfd, uc_tunnel_fid, MLXSW_REG_SFD_BASE_LEN, 0, 16, + MLXSW_REG_SFD_REC_LEN, 0x08, false); + +enum mlxsw_reg_sfd_uc_tunnel_protocol { + MLXSW_REG_SFD_UC_TUNNEL_PROTOCOL_IPV4, + MLXSW_REG_SFD_UC_TUNNEL_PROTOCOL_IPV6, +}; + +/* reg_sfd_uc_tunnel_protocol + * IP protocol. + * Access: RW + */ +MLXSW_ITEM32_INDEXED(reg, sfd, uc_tunnel_protocol, MLXSW_REG_SFD_BASE_LEN, 27, + 1, MLXSW_REG_SFD_REC_LEN, 0x0C, false); + +/* reg_sfd_uc_tunnel_uip_lsb + * When protocol is IPv4, the least significant bytes of the underlay + * IPv4 destination IP. + * When protocol is IPv6, pointer to the underlay IPv6 destination IP + * which is configured by RIPS. + * Access: RW + */ +MLXSW_ITEM32_INDEXED(reg, sfd, uc_tunnel_uip_lsb, MLXSW_REG_SFD_BASE_LEN, 0, + 24, MLXSW_REG_SFD_REC_LEN, 0x0C, false); + +static inline void +mlxsw_reg_sfd_uc_tunnel_pack(char *payload, int rec_index, + enum mlxsw_reg_sfd_rec_policy policy, + const char *mac, u16 fid, + enum mlxsw_reg_sfd_rec_action action, + enum mlxsw_reg_sfd_uc_tunnel_protocol proto) +{ + mlxsw_reg_sfd_rec_pack(payload, rec_index, + MLXSW_REG_SFD_REC_TYPE_UNICAST_TUNNEL, mac, + action); + mlxsw_reg_sfd_rec_policy_set(payload, rec_index, policy); + mlxsw_reg_sfd_uc_tunnel_fid_set(payload, rec_index, fid); + mlxsw_reg_sfd_uc_tunnel_protocol_set(payload, rec_index, proto); +} + +static inline void +mlxsw_reg_sfd_uc_tunnel_pack4(char *payload, int rec_index, + enum mlxsw_reg_sfd_rec_policy policy, + const char *mac, u16 fid, + enum mlxsw_reg_sfd_rec_action action, u32 uip) +{ + mlxsw_reg_sfd_uc_tunnel_uip_msb_set(payload, rec_index, uip >> 24); + mlxsw_reg_sfd_uc_tunnel_uip_lsb_set(payload, rec_index, uip); + mlxsw_reg_sfd_uc_tunnel_pack(payload, rec_index, policy, mac, fid, + action, + MLXSW_REG_SFD_UC_TUNNEL_PROTOCOL_IPV4); +} + +static inline void +mlxsw_reg_sfd_uc_tunnel_pack6(char *payload, int rec_index, const char *mac, + u16 fid, enum mlxsw_reg_sfd_rec_action action, + u32 uip_ptr) +{ + mlxsw_reg_sfd_uc_tunnel_uip_lsb_set(payload, rec_index, uip_ptr); + /* Only static policy is supported for IPv6 unicast tunnel entry. */ + mlxsw_reg_sfd_uc_tunnel_pack(payload, rec_index, + MLXSW_REG_SFD_REC_POLICY_STATIC_ENTRY, + mac, fid, action, + MLXSW_REG_SFD_UC_TUNNEL_PROTOCOL_IPV6); +} + +enum mlxsw_reg_tunnel_port { + MLXSW_REG_TUNNEL_PORT_NVE, + MLXSW_REG_TUNNEL_PORT_VPLS, + MLXSW_REG_TUNNEL_PORT_FLEX_TUNNEL0, + MLXSW_REG_TUNNEL_PORT_FLEX_TUNNEL1, +}; + +/* SFN - Switch FDB Notification Register + * ------------------------------------------- + * The switch provides notifications on newly learned FDB entries and + * aged out entries. The notifications can be polled by software. + */ +#define MLXSW_REG_SFN_ID 0x200B +#define MLXSW_REG_SFN_BASE_LEN 0x10 /* base length, without records */ +#define MLXSW_REG_SFN_REC_LEN 0x10 /* record length */ +#define MLXSW_REG_SFN_REC_MAX_COUNT 64 +#define MLXSW_REG_SFN_LEN (MLXSW_REG_SFN_BASE_LEN + \ + MLXSW_REG_SFN_REC_LEN * MLXSW_REG_SFN_REC_MAX_COUNT) + +MLXSW_REG_DEFINE(sfn, MLXSW_REG_SFN_ID, MLXSW_REG_SFN_LEN); + +/* reg_sfn_swid + * Switch partition ID. + * Access: Index + */ +MLXSW_ITEM32(reg, sfn, swid, 0x00, 24, 8); + +/* reg_sfn_end + * Forces the current session to end. + * Access: OP + */ +MLXSW_ITEM32(reg, sfn, end, 0x04, 20, 1); + +/* reg_sfn_num_rec + * Request: Number of learned notifications and aged-out notification + * records requested. + * Response: Number of notification records returned (must be smaller + * than or equal to the value requested) + * Ranges 0..64 + * Access: OP + */ +MLXSW_ITEM32(reg, sfn, num_rec, 0x04, 0, 8); + +static inline void mlxsw_reg_sfn_pack(char *payload) +{ + MLXSW_REG_ZERO(sfn, payload); + mlxsw_reg_sfn_swid_set(payload, 0); + mlxsw_reg_sfn_end_set(payload, 0); + mlxsw_reg_sfn_num_rec_set(payload, MLXSW_REG_SFN_REC_MAX_COUNT); +} + +/* reg_sfn_rec_swid + * Switch partition ID. + * Access: RO + */ +MLXSW_ITEM32_INDEXED(reg, sfn, rec_swid, MLXSW_REG_SFN_BASE_LEN, 24, 8, + MLXSW_REG_SFN_REC_LEN, 0x00, false); + +enum mlxsw_reg_sfn_rec_type { + /* MAC addresses learned on a regular port. */ + MLXSW_REG_SFN_REC_TYPE_LEARNED_MAC = 0x5, + /* MAC addresses learned on a LAG port. */ + MLXSW_REG_SFN_REC_TYPE_LEARNED_MAC_LAG = 0x6, + /* Aged-out MAC address on a regular port. */ + MLXSW_REG_SFN_REC_TYPE_AGED_OUT_MAC = 0x7, + /* Aged-out MAC address on a LAG port. */ + MLXSW_REG_SFN_REC_TYPE_AGED_OUT_MAC_LAG = 0x8, + /* Learned unicast tunnel record. */ + MLXSW_REG_SFN_REC_TYPE_LEARNED_UNICAST_TUNNEL = 0xD, + /* Aged-out unicast tunnel record. */ + MLXSW_REG_SFN_REC_TYPE_AGED_OUT_UNICAST_TUNNEL = 0xE, +}; + +/* reg_sfn_rec_type + * Notification record type. + * Access: RO + */ +MLXSW_ITEM32_INDEXED(reg, sfn, rec_type, MLXSW_REG_SFN_BASE_LEN, 20, 4, + MLXSW_REG_SFN_REC_LEN, 0x00, false); + +/* reg_sfn_rec_mac + * MAC address. + * Access: RO + */ +MLXSW_ITEM_BUF_INDEXED(reg, sfn, rec_mac, MLXSW_REG_SFN_BASE_LEN, 6, + MLXSW_REG_SFN_REC_LEN, 0x02); + +/* reg_sfn_mac_sub_port + * VEPA channel on the local port. + * 0 if multichannel VEPA is not enabled. + * Access: RO + */ +MLXSW_ITEM32_INDEXED(reg, sfn, mac_sub_port, MLXSW_REG_SFN_BASE_LEN, 16, 8, + MLXSW_REG_SFN_REC_LEN, 0x08, false); + +/* reg_sfn_mac_fid + * Filtering identifier. + * Access: RO + */ +MLXSW_ITEM32_INDEXED(reg, sfn, mac_fid, MLXSW_REG_SFN_BASE_LEN, 0, 16, + MLXSW_REG_SFN_REC_LEN, 0x08, false); + +/* reg_sfn_mac_system_port + * Unique port identifier for the final destination of the packet. + * Access: RO + */ +MLXSW_ITEM32_INDEXED(reg, sfn, mac_system_port, MLXSW_REG_SFN_BASE_LEN, 0, 16, + MLXSW_REG_SFN_REC_LEN, 0x0C, false); + +static inline void mlxsw_reg_sfn_mac_unpack(char *payload, int rec_index, + char *mac, u16 *p_vid, + u16 *p_local_port) +{ + mlxsw_reg_sfn_rec_mac_memcpy_from(payload, rec_index, mac); + *p_vid = mlxsw_reg_sfn_mac_fid_get(payload, rec_index); + *p_local_port = mlxsw_reg_sfn_mac_system_port_get(payload, rec_index); +} + +/* reg_sfn_mac_lag_lag_id + * LAG ID (pointer into the LAG descriptor table). + * Access: RO + */ +MLXSW_ITEM32_INDEXED(reg, sfn, mac_lag_lag_id, MLXSW_REG_SFN_BASE_LEN, 0, 10, + MLXSW_REG_SFN_REC_LEN, 0x0C, false); + +static inline void mlxsw_reg_sfn_mac_lag_unpack(char *payload, int rec_index, + char *mac, u16 *p_vid, + u16 *p_lag_id) +{ + mlxsw_reg_sfn_rec_mac_memcpy_from(payload, rec_index, mac); + *p_vid = mlxsw_reg_sfn_mac_fid_get(payload, rec_index); + *p_lag_id = mlxsw_reg_sfn_mac_lag_lag_id_get(payload, rec_index); +} + +/* reg_sfn_uc_tunnel_uip_msb + * When protocol is IPv4, the most significant byte of the underlay IPv4 + * address of the remote VTEP. + * When protocol is IPv6, reserved. + * Access: RO + */ +MLXSW_ITEM32_INDEXED(reg, sfn, uc_tunnel_uip_msb, MLXSW_REG_SFN_BASE_LEN, 24, + 8, MLXSW_REG_SFN_REC_LEN, 0x08, false); + +enum mlxsw_reg_sfn_uc_tunnel_protocol { + MLXSW_REG_SFN_UC_TUNNEL_PROTOCOL_IPV4, + MLXSW_REG_SFN_UC_TUNNEL_PROTOCOL_IPV6, +}; + +/* reg_sfn_uc_tunnel_protocol + * IP protocol. + * Access: RO + */ +MLXSW_ITEM32_INDEXED(reg, sfn, uc_tunnel_protocol, MLXSW_REG_SFN_BASE_LEN, 27, + 1, MLXSW_REG_SFN_REC_LEN, 0x0C, false); + +/* reg_sfn_uc_tunnel_uip_lsb + * When protocol is IPv4, the least significant bytes of the underlay + * IPv4 address of the remote VTEP. + * When protocol is IPv6, ipv6_id to be queried from TNIPSD. + * Access: RO + */ +MLXSW_ITEM32_INDEXED(reg, sfn, uc_tunnel_uip_lsb, MLXSW_REG_SFN_BASE_LEN, 0, + 24, MLXSW_REG_SFN_REC_LEN, 0x0C, false); + +/* reg_sfn_uc_tunnel_port + * Tunnel port. + * Reserved on Spectrum. + * Access: RO + */ +MLXSW_ITEM32_INDEXED(reg, sfn, tunnel_port, MLXSW_REG_SFN_BASE_LEN, 0, 4, + MLXSW_REG_SFN_REC_LEN, 0x10, false); + +static inline void +mlxsw_reg_sfn_uc_tunnel_unpack(char *payload, int rec_index, char *mac, + u16 *p_fid, u32 *p_uip, + enum mlxsw_reg_sfn_uc_tunnel_protocol *p_proto) +{ + u32 uip_msb, uip_lsb; + + mlxsw_reg_sfn_rec_mac_memcpy_from(payload, rec_index, mac); + *p_fid = mlxsw_reg_sfn_mac_fid_get(payload, rec_index); + uip_msb = mlxsw_reg_sfn_uc_tunnel_uip_msb_get(payload, rec_index); + uip_lsb = mlxsw_reg_sfn_uc_tunnel_uip_lsb_get(payload, rec_index); + *p_uip = uip_msb << 24 | uip_lsb; + *p_proto = mlxsw_reg_sfn_uc_tunnel_protocol_get(payload, rec_index); +} + +/* SPMS - Switch Port MSTP/RSTP State Register + * ------------------------------------------- + * Configures the spanning tree state of a physical port. + */ +#define MLXSW_REG_SPMS_ID 0x200D +#define MLXSW_REG_SPMS_LEN 0x404 + +MLXSW_REG_DEFINE(spms, MLXSW_REG_SPMS_ID, MLXSW_REG_SPMS_LEN); + +/* reg_spms_local_port + * Local port number. + * Access: Index + */ +MLXSW_ITEM32_LP(reg, spms, 0x00, 16, 0x00, 12); + +enum mlxsw_reg_spms_state { + MLXSW_REG_SPMS_STATE_NO_CHANGE, + MLXSW_REG_SPMS_STATE_DISCARDING, + MLXSW_REG_SPMS_STATE_LEARNING, + MLXSW_REG_SPMS_STATE_FORWARDING, +}; + +/* reg_spms_state + * Spanning tree state of each VLAN ID (VID) of the local port. + * 0 - Do not change spanning tree state (used only when writing). + * 1 - Discarding. No learning or forwarding to/from this port (default). + * 2 - Learning. Port is learning, but not forwarding. + * 3 - Forwarding. Port is learning and forwarding. + * Access: RW + */ +MLXSW_ITEM_BIT_ARRAY(reg, spms, state, 0x04, 0x400, 2); + +static inline void mlxsw_reg_spms_pack(char *payload, u16 local_port) +{ + MLXSW_REG_ZERO(spms, payload); + mlxsw_reg_spms_local_port_set(payload, local_port); +} + +static inline void mlxsw_reg_spms_vid_pack(char *payload, u16 vid, + enum mlxsw_reg_spms_state state) +{ + mlxsw_reg_spms_state_set(payload, vid, state); +} + +/* SPVID - Switch Port VID + * ----------------------- + * The switch port VID configures the default VID for a port. + */ +#define MLXSW_REG_SPVID_ID 0x200E +#define MLXSW_REG_SPVID_LEN 0x08 + +MLXSW_REG_DEFINE(spvid, MLXSW_REG_SPVID_ID, MLXSW_REG_SPVID_LEN); + +/* reg_spvid_tport + * Port is tunnel port. + * Reserved when SwitchX/-2 or Spectrum-1. + * Access: Index + */ +MLXSW_ITEM32(reg, spvid, tport, 0x00, 24, 1); + +/* reg_spvid_local_port + * When tport = 0: Local port number. Not supported for CPU port. + * When tport = 1: Tunnel port. + * Access: Index + */ +MLXSW_ITEM32_LP(reg, spvid, 0x00, 16, 0x00, 12); + +/* reg_spvid_sub_port + * Virtual port within the physical port. + * Should be set to 0 when virtual ports are not enabled on the port. + * Access: Index + */ +MLXSW_ITEM32(reg, spvid, sub_port, 0x00, 8, 8); + +/* reg_spvid_egr_et_set + * When VLAN is pushed at ingress (for untagged packets or for + * QinQ push mode) then the EtherType is decided at the egress port. + * Reserved when Spectrum-1. + * Access: RW + */ +MLXSW_ITEM32(reg, spvid, egr_et_set, 0x04, 24, 1); + +/* reg_spvid_et_vlan + * EtherType used for when VLAN is pushed at ingress (for untagged + * packets or for QinQ push mode). + * 0: ether_type0 - (default) + * 1: ether_type1 + * 2: ether_type2 - Reserved when Spectrum-1, supported by Spectrum-2 + * Ethertype IDs are configured by SVER. + * Reserved when egr_et_set = 1. + * Access: RW + */ +MLXSW_ITEM32(reg, spvid, et_vlan, 0x04, 16, 2); + +/* reg_spvid_pvid + * Port default VID + * Access: RW + */ +MLXSW_ITEM32(reg, spvid, pvid, 0x04, 0, 12); + +static inline void mlxsw_reg_spvid_pack(char *payload, u16 local_port, u16 pvid, + u8 et_vlan) +{ + MLXSW_REG_ZERO(spvid, payload); + mlxsw_reg_spvid_local_port_set(payload, local_port); + mlxsw_reg_spvid_pvid_set(payload, pvid); + mlxsw_reg_spvid_et_vlan_set(payload, et_vlan); +} + +/* SPVM - Switch Port VLAN Membership + * ---------------------------------- + * The Switch Port VLAN Membership register configures the VLAN membership + * of a port in a VLAN denoted by VID. VLAN membership is managed per + * virtual port. The register can be used to add and remove VID(s) from a port. + */ +#define MLXSW_REG_SPVM_ID 0x200F +#define MLXSW_REG_SPVM_BASE_LEN 0x04 /* base length, without records */ +#define MLXSW_REG_SPVM_REC_LEN 0x04 /* record length */ +#define MLXSW_REG_SPVM_REC_MAX_COUNT 255 +#define MLXSW_REG_SPVM_LEN (MLXSW_REG_SPVM_BASE_LEN + \ + MLXSW_REG_SPVM_REC_LEN * MLXSW_REG_SPVM_REC_MAX_COUNT) + +MLXSW_REG_DEFINE(spvm, MLXSW_REG_SPVM_ID, MLXSW_REG_SPVM_LEN); + +/* reg_spvm_pt + * Priority tagged. If this bit is set, packets forwarded to the port with + * untagged VLAN membership (u bit is set) will be tagged with priority tag + * (VID=0) + * Access: RW + */ +MLXSW_ITEM32(reg, spvm, pt, 0x00, 31, 1); + +/* reg_spvm_pte + * Priority Tagged Update Enable. On Write operations, if this bit is cleared, + * the pt bit will NOT be updated. To update the pt bit, pte must be set. + * Access: WO + */ +MLXSW_ITEM32(reg, spvm, pte, 0x00, 30, 1); + +/* reg_spvm_local_port + * Local port number. + * Access: Index + */ +MLXSW_ITEM32_LP(reg, spvm, 0x00, 16, 0x00, 12); + +/* reg_spvm_sub_port + * Virtual port within the physical port. + * Should be set to 0 when virtual ports are not enabled on the port. + * Access: Index + */ +MLXSW_ITEM32(reg, spvm, sub_port, 0x00, 8, 8); + +/* reg_spvm_num_rec + * Number of records to update. Each record contains: i, e, u, vid. + * Access: OP + */ +MLXSW_ITEM32(reg, spvm, num_rec, 0x00, 0, 8); + +/* reg_spvm_rec_i + * Ingress membership in VLAN ID. + * Access: Index + */ +MLXSW_ITEM32_INDEXED(reg, spvm, rec_i, + MLXSW_REG_SPVM_BASE_LEN, 14, 1, + MLXSW_REG_SPVM_REC_LEN, 0, false); + +/* reg_spvm_rec_e + * Egress membership in VLAN ID. + * Access: Index + */ +MLXSW_ITEM32_INDEXED(reg, spvm, rec_e, + MLXSW_REG_SPVM_BASE_LEN, 13, 1, + MLXSW_REG_SPVM_REC_LEN, 0, false); + +/* reg_spvm_rec_u + * Untagged - port is an untagged member - egress transmission uses untagged + * frames on VID<n> + * Access: Index + */ +MLXSW_ITEM32_INDEXED(reg, spvm, rec_u, + MLXSW_REG_SPVM_BASE_LEN, 12, 1, + MLXSW_REG_SPVM_REC_LEN, 0, false); + +/* reg_spvm_rec_vid + * Egress membership in VLAN ID. + * Access: Index + */ +MLXSW_ITEM32_INDEXED(reg, spvm, rec_vid, + MLXSW_REG_SPVM_BASE_LEN, 0, 12, + MLXSW_REG_SPVM_REC_LEN, 0, false); + +static inline void mlxsw_reg_spvm_pack(char *payload, u16 local_port, + u16 vid_begin, u16 vid_end, + bool is_member, bool untagged) +{ + int size = vid_end - vid_begin + 1; + int i; + + MLXSW_REG_ZERO(spvm, payload); + mlxsw_reg_spvm_local_port_set(payload, local_port); + mlxsw_reg_spvm_num_rec_set(payload, size); + + for (i = 0; i < size; i++) { + mlxsw_reg_spvm_rec_i_set(payload, i, is_member); + mlxsw_reg_spvm_rec_e_set(payload, i, is_member); + mlxsw_reg_spvm_rec_u_set(payload, i, untagged); + mlxsw_reg_spvm_rec_vid_set(payload, i, vid_begin + i); + } +} + +/* SPAFT - Switch Port Acceptable Frame Types + * ------------------------------------------ + * The Switch Port Acceptable Frame Types register configures the frame + * admittance of the port. + */ +#define MLXSW_REG_SPAFT_ID 0x2010 +#define MLXSW_REG_SPAFT_LEN 0x08 + +MLXSW_REG_DEFINE(spaft, MLXSW_REG_SPAFT_ID, MLXSW_REG_SPAFT_LEN); + +/* reg_spaft_local_port + * Local port number. + * Access: Index + * + * Note: CPU port is not supported (all tag types are allowed). + */ +MLXSW_ITEM32_LP(reg, spaft, 0x00, 16, 0x00, 12); + +/* reg_spaft_sub_port + * Virtual port within the physical port. + * Should be set to 0 when virtual ports are not enabled on the port. + * Access: RW + */ +MLXSW_ITEM32(reg, spaft, sub_port, 0x00, 8, 8); + +/* reg_spaft_allow_untagged + * When set, untagged frames on the ingress are allowed (default). + * Access: RW + */ +MLXSW_ITEM32(reg, spaft, allow_untagged, 0x04, 31, 1); + +/* reg_spaft_allow_prio_tagged + * When set, priority tagged frames on the ingress are allowed (default). + * Access: RW + */ +MLXSW_ITEM32(reg, spaft, allow_prio_tagged, 0x04, 30, 1); + +/* reg_spaft_allow_tagged + * When set, tagged frames on the ingress are allowed (default). + * Access: RW + */ +MLXSW_ITEM32(reg, spaft, allow_tagged, 0x04, 29, 1); + +static inline void mlxsw_reg_spaft_pack(char *payload, u16 local_port, + bool allow_untagged) +{ + MLXSW_REG_ZERO(spaft, payload); + mlxsw_reg_spaft_local_port_set(payload, local_port); + mlxsw_reg_spaft_allow_untagged_set(payload, allow_untagged); + mlxsw_reg_spaft_allow_prio_tagged_set(payload, allow_untagged); + mlxsw_reg_spaft_allow_tagged_set(payload, true); +} + +/* SFGC - Switch Flooding Group Configuration + * ------------------------------------------ + * The following register controls the association of flooding tables and MIDs + * to packet types used for flooding. + */ +#define MLXSW_REG_SFGC_ID 0x2011 +#define MLXSW_REG_SFGC_LEN 0x14 + +MLXSW_REG_DEFINE(sfgc, MLXSW_REG_SFGC_ID, MLXSW_REG_SFGC_LEN); + +enum mlxsw_reg_sfgc_type { + MLXSW_REG_SFGC_TYPE_BROADCAST, + MLXSW_REG_SFGC_TYPE_UNKNOWN_UNICAST, + MLXSW_REG_SFGC_TYPE_UNREGISTERED_MULTICAST_IPV4, + MLXSW_REG_SFGC_TYPE_UNREGISTERED_MULTICAST_IPV6, + MLXSW_REG_SFGC_TYPE_RESERVED, + MLXSW_REG_SFGC_TYPE_UNREGISTERED_MULTICAST_NON_IP, + MLXSW_REG_SFGC_TYPE_IPV4_LINK_LOCAL, + MLXSW_REG_SFGC_TYPE_IPV6_ALL_HOST, + MLXSW_REG_SFGC_TYPE_MAX, +}; + +/* reg_sfgc_type + * The traffic type to reach the flooding table. + * Access: Index + */ +MLXSW_ITEM32(reg, sfgc, type, 0x00, 0, 4); + +/* bridge_type is used in SFGC and SFMR. */ +enum mlxsw_reg_bridge_type { + MLXSW_REG_BRIDGE_TYPE_0 = 0, /* Used for .1q FIDs. */ + MLXSW_REG_BRIDGE_TYPE_1 = 1, /* Used for .1d FIDs. */ +}; + +/* reg_sfgc_bridge_type + * Access: Index + * + * Note: SwitchX-2 only supports 802.1Q mode. + */ +MLXSW_ITEM32(reg, sfgc, bridge_type, 0x04, 24, 3); + +enum mlxsw_flood_table_type { + MLXSW_REG_SFGC_TABLE_TYPE_VID = 1, + MLXSW_REG_SFGC_TABLE_TYPE_SINGLE = 2, + MLXSW_REG_SFGC_TABLE_TYPE_ANY = 0, + MLXSW_REG_SFGC_TABLE_TYPE_FID_OFFSET = 3, + MLXSW_REG_SFGC_TABLE_TYPE_FID = 4, +}; + +/* reg_sfgc_table_type + * See mlxsw_flood_table_type + * Access: RW + * + * Note: FID offset and FID types are not supported in SwitchX-2. + */ +MLXSW_ITEM32(reg, sfgc, table_type, 0x04, 16, 3); + +/* reg_sfgc_flood_table + * Flooding table index to associate with the specific type on the specific + * switch partition. + * Access: RW + */ +MLXSW_ITEM32(reg, sfgc, flood_table, 0x04, 0, 6); + +/* reg_sfgc_counter_set_type + * Counter Set Type for flow counters. + * Access: RW + */ +MLXSW_ITEM32(reg, sfgc, counter_set_type, 0x0C, 24, 8); + +/* reg_sfgc_counter_index + * Counter Index for flow counters. + * Access: RW + */ +MLXSW_ITEM32(reg, sfgc, counter_index, 0x0C, 0, 24); + +/* reg_sfgc_mid_base + * MID Base. + * Access: RW + * + * Note: Reserved when legacy bridge model is used. + */ +MLXSW_ITEM32(reg, sfgc, mid_base, 0x10, 0, 16); + +static inline void +mlxsw_reg_sfgc_pack(char *payload, enum mlxsw_reg_sfgc_type type, + enum mlxsw_reg_bridge_type bridge_type, + enum mlxsw_flood_table_type table_type, + unsigned int flood_table, u16 mid_base) +{ + MLXSW_REG_ZERO(sfgc, payload); + mlxsw_reg_sfgc_type_set(payload, type); + mlxsw_reg_sfgc_bridge_type_set(payload, bridge_type); + mlxsw_reg_sfgc_table_type_set(payload, table_type); + mlxsw_reg_sfgc_flood_table_set(payload, flood_table); + mlxsw_reg_sfgc_mid_base_set(payload, mid_base); +} + +/* SFDF - Switch Filtering DB Flush + * -------------------------------- + * The switch filtering DB flush register is used to flush the FDB. + * Note that FDB notifications are flushed as well. + */ +#define MLXSW_REG_SFDF_ID 0x2013 +#define MLXSW_REG_SFDF_LEN 0x14 + +MLXSW_REG_DEFINE(sfdf, MLXSW_REG_SFDF_ID, MLXSW_REG_SFDF_LEN); + +/* reg_sfdf_swid + * Switch partition ID. + * Access: Index + */ +MLXSW_ITEM32(reg, sfdf, swid, 0x00, 24, 8); + +enum mlxsw_reg_sfdf_flush_type { + MLXSW_REG_SFDF_FLUSH_PER_SWID, + MLXSW_REG_SFDF_FLUSH_PER_FID, + MLXSW_REG_SFDF_FLUSH_PER_PORT, + MLXSW_REG_SFDF_FLUSH_PER_PORT_AND_FID, + MLXSW_REG_SFDF_FLUSH_PER_LAG, + MLXSW_REG_SFDF_FLUSH_PER_LAG_AND_FID, + MLXSW_REG_SFDF_FLUSH_PER_NVE, + MLXSW_REG_SFDF_FLUSH_PER_NVE_AND_FID, +}; + +/* reg_sfdf_flush_type + * Flush type. + * 0 - All SWID dynamic entries are flushed. + * 1 - All FID dynamic entries are flushed. + * 2 - All dynamic entries pointing to port are flushed. + * 3 - All FID dynamic entries pointing to port are flushed. + * 4 - All dynamic entries pointing to LAG are flushed. + * 5 - All FID dynamic entries pointing to LAG are flushed. + * 6 - All entries of type "Unicast Tunnel" or "Multicast Tunnel" are + * flushed. + * 7 - All entries of type "Unicast Tunnel" or "Multicast Tunnel" are + * flushed, per FID. + * Access: RW + */ +MLXSW_ITEM32(reg, sfdf, flush_type, 0x04, 28, 4); + +/* reg_sfdf_flush_static + * Static. + * 0 - Flush only dynamic entries. + * 1 - Flush both dynamic and static entries. + * Access: RW + */ +MLXSW_ITEM32(reg, sfdf, flush_static, 0x04, 24, 1); + +static inline void mlxsw_reg_sfdf_pack(char *payload, + enum mlxsw_reg_sfdf_flush_type type) +{ + MLXSW_REG_ZERO(sfdf, payload); + mlxsw_reg_sfdf_flush_type_set(payload, type); + mlxsw_reg_sfdf_flush_static_set(payload, true); +} + +/* reg_sfdf_fid + * FID to flush. + * Access: RW + */ +MLXSW_ITEM32(reg, sfdf, fid, 0x0C, 0, 16); + +/* reg_sfdf_system_port + * Port to flush. + * Access: RW + */ +MLXSW_ITEM32(reg, sfdf, system_port, 0x0C, 0, 16); + +/* reg_sfdf_port_fid_system_port + * Port to flush, pointed to by FID. + * Access: RW + */ +MLXSW_ITEM32(reg, sfdf, port_fid_system_port, 0x08, 0, 16); + +/* reg_sfdf_lag_id + * LAG ID to flush. + * Access: RW + */ +MLXSW_ITEM32(reg, sfdf, lag_id, 0x0C, 0, 10); + +/* reg_sfdf_lag_fid_lag_id + * LAG ID to flush, pointed to by FID. + * Access: RW + */ +MLXSW_ITEM32(reg, sfdf, lag_fid_lag_id, 0x08, 0, 10); + +/* SLDR - Switch LAG Descriptor Register + * ----------------------------------------- + * The switch LAG descriptor register is populated by LAG descriptors. + * Each LAG descriptor is indexed by lag_id. The LAG ID runs from 0 to + * max_lag-1. + */ +#define MLXSW_REG_SLDR_ID 0x2014 +#define MLXSW_REG_SLDR_LEN 0x0C /* counting in only one port in list */ + +MLXSW_REG_DEFINE(sldr, MLXSW_REG_SLDR_ID, MLXSW_REG_SLDR_LEN); + +enum mlxsw_reg_sldr_op { + /* Indicates a creation of a new LAG-ID, lag_id must be valid */ + MLXSW_REG_SLDR_OP_LAG_CREATE, + MLXSW_REG_SLDR_OP_LAG_DESTROY, + /* Ports that appear in the list have the Distributor enabled */ + MLXSW_REG_SLDR_OP_LAG_ADD_PORT_LIST, + /* Removes ports from the disributor list */ + MLXSW_REG_SLDR_OP_LAG_REMOVE_PORT_LIST, +}; + +/* reg_sldr_op + * Operation. + * Access: RW + */ +MLXSW_ITEM32(reg, sldr, op, 0x00, 29, 3); + +/* reg_sldr_lag_id + * LAG identifier. The lag_id is the index into the LAG descriptor table. + * Access: Index + */ +MLXSW_ITEM32(reg, sldr, lag_id, 0x00, 0, 10); + +static inline void mlxsw_reg_sldr_lag_create_pack(char *payload, u8 lag_id) +{ + MLXSW_REG_ZERO(sldr, payload); + mlxsw_reg_sldr_op_set(payload, MLXSW_REG_SLDR_OP_LAG_CREATE); + mlxsw_reg_sldr_lag_id_set(payload, lag_id); +} + +static inline void mlxsw_reg_sldr_lag_destroy_pack(char *payload, u8 lag_id) +{ + MLXSW_REG_ZERO(sldr, payload); + mlxsw_reg_sldr_op_set(payload, MLXSW_REG_SLDR_OP_LAG_DESTROY); + mlxsw_reg_sldr_lag_id_set(payload, lag_id); +} + +/* reg_sldr_num_ports + * The number of member ports of the LAG. + * Reserved for Create / Destroy operations + * For Add / Remove operations - indicates the number of ports in the list. + * Access: RW + */ +MLXSW_ITEM32(reg, sldr, num_ports, 0x04, 24, 8); + +/* reg_sldr_system_port + * System port. + * Access: RW + */ +MLXSW_ITEM32_INDEXED(reg, sldr, system_port, 0x08, 0, 16, 4, 0, false); + +static inline void mlxsw_reg_sldr_lag_add_port_pack(char *payload, u8 lag_id, + u16 local_port) +{ + MLXSW_REG_ZERO(sldr, payload); + mlxsw_reg_sldr_op_set(payload, MLXSW_REG_SLDR_OP_LAG_ADD_PORT_LIST); + mlxsw_reg_sldr_lag_id_set(payload, lag_id); + mlxsw_reg_sldr_num_ports_set(payload, 1); + mlxsw_reg_sldr_system_port_set(payload, 0, local_port); +} + +static inline void mlxsw_reg_sldr_lag_remove_port_pack(char *payload, u8 lag_id, + u16 local_port) +{ + MLXSW_REG_ZERO(sldr, payload); + mlxsw_reg_sldr_op_set(payload, MLXSW_REG_SLDR_OP_LAG_REMOVE_PORT_LIST); + mlxsw_reg_sldr_lag_id_set(payload, lag_id); + mlxsw_reg_sldr_num_ports_set(payload, 1); + mlxsw_reg_sldr_system_port_set(payload, 0, local_port); +} + +/* SLCR - Switch LAG Configuration 2 Register + * ------------------------------------------- + * The Switch LAG Configuration register is used for configuring the + * LAG properties of the switch. + */ +#define MLXSW_REG_SLCR_ID 0x2015 +#define MLXSW_REG_SLCR_LEN 0x10 + +MLXSW_REG_DEFINE(slcr, MLXSW_REG_SLCR_ID, MLXSW_REG_SLCR_LEN); + +enum mlxsw_reg_slcr_pp { + /* Global Configuration (for all ports) */ + MLXSW_REG_SLCR_PP_GLOBAL, + /* Per port configuration, based on local_port field */ + MLXSW_REG_SLCR_PP_PER_PORT, +}; + +/* reg_slcr_pp + * Per Port Configuration + * Note: Reading at Global mode results in reading port 1 configuration. + * Access: Index + */ +MLXSW_ITEM32(reg, slcr, pp, 0x00, 24, 1); + +/* reg_slcr_local_port + * Local port number + * Supported from CPU port + * Not supported from router port + * Reserved when pp = Global Configuration + * Access: Index + */ +MLXSW_ITEM32_LP(reg, slcr, 0x00, 16, 0x00, 12); + +enum mlxsw_reg_slcr_type { + MLXSW_REG_SLCR_TYPE_CRC, /* default */ + MLXSW_REG_SLCR_TYPE_XOR, + MLXSW_REG_SLCR_TYPE_RANDOM, +}; + +/* reg_slcr_type + * Hash type + * Access: RW + */ +MLXSW_ITEM32(reg, slcr, type, 0x00, 0, 4); + +/* Ingress port */ +#define MLXSW_REG_SLCR_LAG_HASH_IN_PORT BIT(0) +/* SMAC - for IPv4 and IPv6 packets */ +#define MLXSW_REG_SLCR_LAG_HASH_SMAC_IP BIT(1) +/* SMAC - for non-IP packets */ +#define MLXSW_REG_SLCR_LAG_HASH_SMAC_NONIP BIT(2) +#define MLXSW_REG_SLCR_LAG_HASH_SMAC \ + (MLXSW_REG_SLCR_LAG_HASH_SMAC_IP | \ + MLXSW_REG_SLCR_LAG_HASH_SMAC_NONIP) +/* DMAC - for IPv4 and IPv6 packets */ +#define MLXSW_REG_SLCR_LAG_HASH_DMAC_IP BIT(3) +/* DMAC - for non-IP packets */ +#define MLXSW_REG_SLCR_LAG_HASH_DMAC_NONIP BIT(4) +#define MLXSW_REG_SLCR_LAG_HASH_DMAC \ + (MLXSW_REG_SLCR_LAG_HASH_DMAC_IP | \ + MLXSW_REG_SLCR_LAG_HASH_DMAC_NONIP) +/* Ethertype - for IPv4 and IPv6 packets */ +#define MLXSW_REG_SLCR_LAG_HASH_ETHERTYPE_IP BIT(5) +/* Ethertype - for non-IP packets */ +#define MLXSW_REG_SLCR_LAG_HASH_ETHERTYPE_NONIP BIT(6) +#define MLXSW_REG_SLCR_LAG_HASH_ETHERTYPE \ + (MLXSW_REG_SLCR_LAG_HASH_ETHERTYPE_IP | \ + MLXSW_REG_SLCR_LAG_HASH_ETHERTYPE_NONIP) +/* VLAN ID - for IPv4 and IPv6 packets */ +#define MLXSW_REG_SLCR_LAG_HASH_VLANID_IP BIT(7) +/* VLAN ID - for non-IP packets */ +#define MLXSW_REG_SLCR_LAG_HASH_VLANID_NONIP BIT(8) +#define MLXSW_REG_SLCR_LAG_HASH_VLANID \ + (MLXSW_REG_SLCR_LAG_HASH_VLANID_IP | \ + MLXSW_REG_SLCR_LAG_HASH_VLANID_NONIP) +/* Source IP address (can be IPv4 or IPv6) */ +#define MLXSW_REG_SLCR_LAG_HASH_SIP BIT(9) +/* Destination IP address (can be IPv4 or IPv6) */ +#define MLXSW_REG_SLCR_LAG_HASH_DIP BIT(10) +/* TCP/UDP source port */ +#define MLXSW_REG_SLCR_LAG_HASH_SPORT BIT(11) +/* TCP/UDP destination port*/ +#define MLXSW_REG_SLCR_LAG_HASH_DPORT BIT(12) +/* IPv4 Protocol/IPv6 Next Header */ +#define MLXSW_REG_SLCR_LAG_HASH_IPPROTO BIT(13) +/* IPv6 Flow label */ +#define MLXSW_REG_SLCR_LAG_HASH_FLOWLABEL BIT(14) +/* SID - FCoE source ID */ +#define MLXSW_REG_SLCR_LAG_HASH_FCOE_SID BIT(15) +/* DID - FCoE destination ID */ +#define MLXSW_REG_SLCR_LAG_HASH_FCOE_DID BIT(16) +/* OXID - FCoE originator exchange ID */ +#define MLXSW_REG_SLCR_LAG_HASH_FCOE_OXID BIT(17) +/* Destination QP number - for RoCE packets */ +#define MLXSW_REG_SLCR_LAG_HASH_ROCE_DQP BIT(19) + +/* reg_slcr_lag_hash + * LAG hashing configuration. This is a bitmask, in which each set + * bit includes the corresponding item in the LAG hash calculation. + * The default lag_hash contains SMAC, DMAC, VLANID and + * Ethertype (for all packet types). + * Access: RW + */ +MLXSW_ITEM32(reg, slcr, lag_hash, 0x04, 0, 20); + +/* reg_slcr_seed + * LAG seed value. The seed is the same for all ports. + * Access: RW + */ +MLXSW_ITEM32(reg, slcr, seed, 0x08, 0, 32); + +static inline void mlxsw_reg_slcr_pack(char *payload, u16 lag_hash, u32 seed) +{ + MLXSW_REG_ZERO(slcr, payload); + mlxsw_reg_slcr_pp_set(payload, MLXSW_REG_SLCR_PP_GLOBAL); + mlxsw_reg_slcr_type_set(payload, MLXSW_REG_SLCR_TYPE_CRC); + mlxsw_reg_slcr_lag_hash_set(payload, lag_hash); + mlxsw_reg_slcr_seed_set(payload, seed); +} + +/* SLCOR - Switch LAG Collector Register + * ------------------------------------- + * The Switch LAG Collector register controls the Local Port membership + * in a LAG and enablement of the collector. + */ +#define MLXSW_REG_SLCOR_ID 0x2016 +#define MLXSW_REG_SLCOR_LEN 0x10 + +MLXSW_REG_DEFINE(slcor, MLXSW_REG_SLCOR_ID, MLXSW_REG_SLCOR_LEN); + +enum mlxsw_reg_slcor_col { + /* Port is added with collector disabled */ + MLXSW_REG_SLCOR_COL_LAG_ADD_PORT, + MLXSW_REG_SLCOR_COL_LAG_COLLECTOR_ENABLED, + MLXSW_REG_SLCOR_COL_LAG_COLLECTOR_DISABLED, + MLXSW_REG_SLCOR_COL_LAG_REMOVE_PORT, +}; + +/* reg_slcor_col + * Collector configuration + * Access: RW + */ +MLXSW_ITEM32(reg, slcor, col, 0x00, 30, 2); + +/* reg_slcor_local_port + * Local port number + * Not supported for CPU port + * Access: Index + */ +MLXSW_ITEM32_LP(reg, slcor, 0x00, 16, 0x00, 12); + +/* reg_slcor_lag_id + * LAG Identifier. Index into the LAG descriptor table. + * Access: Index + */ +MLXSW_ITEM32(reg, slcor, lag_id, 0x00, 0, 10); + +/* reg_slcor_port_index + * Port index in the LAG list. Only valid on Add Port to LAG col. + * Valid range is from 0 to cap_max_lag_members-1 + * Access: RW + */ +MLXSW_ITEM32(reg, slcor, port_index, 0x04, 0, 10); + +static inline void mlxsw_reg_slcor_pack(char *payload, + u16 local_port, u16 lag_id, + enum mlxsw_reg_slcor_col col) +{ + MLXSW_REG_ZERO(slcor, payload); + mlxsw_reg_slcor_col_set(payload, col); + mlxsw_reg_slcor_local_port_set(payload, local_port); + mlxsw_reg_slcor_lag_id_set(payload, lag_id); +} + +static inline void mlxsw_reg_slcor_port_add_pack(char *payload, + u16 local_port, u16 lag_id, + u8 port_index) +{ + mlxsw_reg_slcor_pack(payload, local_port, lag_id, + MLXSW_REG_SLCOR_COL_LAG_ADD_PORT); + mlxsw_reg_slcor_port_index_set(payload, port_index); +} + +static inline void mlxsw_reg_slcor_port_remove_pack(char *payload, + u16 local_port, u16 lag_id) +{ + mlxsw_reg_slcor_pack(payload, local_port, lag_id, + MLXSW_REG_SLCOR_COL_LAG_REMOVE_PORT); +} + +static inline void mlxsw_reg_slcor_col_enable_pack(char *payload, + u16 local_port, u16 lag_id) +{ + mlxsw_reg_slcor_pack(payload, local_port, lag_id, + MLXSW_REG_SLCOR_COL_LAG_COLLECTOR_ENABLED); +} + +static inline void mlxsw_reg_slcor_col_disable_pack(char *payload, + u16 local_port, u16 lag_id) +{ + mlxsw_reg_slcor_pack(payload, local_port, lag_id, + MLXSW_REG_SLCOR_COL_LAG_COLLECTOR_ENABLED); +} + +/* SPMLR - Switch Port MAC Learning Register + * ----------------------------------------- + * Controls the Switch MAC learning policy per port. + */ +#define MLXSW_REG_SPMLR_ID 0x2018 +#define MLXSW_REG_SPMLR_LEN 0x8 + +MLXSW_REG_DEFINE(spmlr, MLXSW_REG_SPMLR_ID, MLXSW_REG_SPMLR_LEN); + +/* reg_spmlr_local_port + * Local port number. + * Access: Index + */ +MLXSW_ITEM32_LP(reg, spmlr, 0x00, 16, 0x00, 12); + +/* reg_spmlr_sub_port + * Virtual port within the physical port. + * Should be set to 0 when virtual ports are not enabled on the port. + * Access: Index + */ +MLXSW_ITEM32(reg, spmlr, sub_port, 0x00, 8, 8); + +enum mlxsw_reg_spmlr_learn_mode { + MLXSW_REG_SPMLR_LEARN_MODE_DISABLE = 0, + MLXSW_REG_SPMLR_LEARN_MODE_ENABLE = 2, + MLXSW_REG_SPMLR_LEARN_MODE_SEC = 3, +}; + +/* reg_spmlr_learn_mode + * Learning mode on the port. + * 0 - Learning disabled. + * 2 - Learning enabled. + * 3 - Security mode. + * + * In security mode the switch does not learn MACs on the port, but uses the + * SMAC to see if it exists on another ingress port. If so, the packet is + * classified as a bad packet and is discarded unless the software registers + * to receive port security error packets usign HPKT. + */ +MLXSW_ITEM32(reg, spmlr, learn_mode, 0x04, 30, 2); + +static inline void mlxsw_reg_spmlr_pack(char *payload, u16 local_port, + enum mlxsw_reg_spmlr_learn_mode mode) +{ + MLXSW_REG_ZERO(spmlr, payload); + mlxsw_reg_spmlr_local_port_set(payload, local_port); + mlxsw_reg_spmlr_sub_port_set(payload, 0); + mlxsw_reg_spmlr_learn_mode_set(payload, mode); +} + +/* SVFA - Switch VID to FID Allocation Register + * -------------------------------------------- + * Controls the VID to FID mapping and {Port, VID} to FID mapping for + * virtualized ports. + */ +#define MLXSW_REG_SVFA_ID 0x201C +#define MLXSW_REG_SVFA_LEN 0x18 + +MLXSW_REG_DEFINE(svfa, MLXSW_REG_SVFA_ID, MLXSW_REG_SVFA_LEN); + +/* reg_svfa_swid + * Switch partition ID. + * Access: Index + */ +MLXSW_ITEM32(reg, svfa, swid, 0x00, 24, 8); + +/* reg_svfa_local_port + * Local port number. + * Access: Index + * + * Note: Reserved for 802.1Q FIDs. + */ +MLXSW_ITEM32_LP(reg, svfa, 0x00, 16, 0x00, 12); + +enum mlxsw_reg_svfa_mt { + MLXSW_REG_SVFA_MT_VID_TO_FID, + MLXSW_REG_SVFA_MT_PORT_VID_TO_FID, + MLXSW_REG_SVFA_MT_VNI_TO_FID, +}; + +/* reg_svfa_mapping_table + * Mapping table: + * 0 - VID to FID + * 1 - {Port, VID} to FID + * Access: Index + * + * Note: Reserved for SwitchX-2. + */ +MLXSW_ITEM32(reg, svfa, mapping_table, 0x00, 8, 3); + +/* reg_svfa_v + * Valid. + * Valid if set. + * Access: RW + * + * Note: Reserved for SwitchX-2. + */ +MLXSW_ITEM32(reg, svfa, v, 0x00, 0, 1); + +/* reg_svfa_fid + * Filtering ID. + * Access: RW + */ +MLXSW_ITEM32(reg, svfa, fid, 0x04, 16, 16); + +/* reg_svfa_vid + * VLAN ID. + * Access: Index + */ +MLXSW_ITEM32(reg, svfa, vid, 0x04, 0, 12); + +/* reg_svfa_counter_set_type + * Counter set type for flow counters. + * Access: RW + * + * Note: Reserved for SwitchX-2. + */ +MLXSW_ITEM32(reg, svfa, counter_set_type, 0x08, 24, 8); + +/* reg_svfa_counter_index + * Counter index for flow counters. + * Access: RW + * + * Note: Reserved for SwitchX-2. + */ +MLXSW_ITEM32(reg, svfa, counter_index, 0x08, 0, 24); + +/* reg_svfa_vni + * Virtual Network Identifier. + * Access: Index + * + * Note: Reserved when mapping_table is not 2 (VNI mapping table). + */ +MLXSW_ITEM32(reg, svfa, vni, 0x10, 0, 24); + +/* reg_svfa_irif_v + * Ingress RIF valid. + * 0 - Ingress RIF is not valid, no ingress RIF assigned. + * 1 - Ingress RIF valid. + * Must not be set for a non enabled RIF. + * Access: RW + * + * Note: Reserved when legacy bridge model is used. + */ +MLXSW_ITEM32(reg, svfa, irif_v, 0x14, 24, 1); + +/* reg_svfa_irif + * Ingress RIF (Router Interface). + * Range is 0..cap_max_router_interfaces-1. + * Access: RW + * + * Note: Reserved when legacy bridge model is used and when irif_v=0. + */ +MLXSW_ITEM32(reg, svfa, irif, 0x14, 0, 16); + +static inline void __mlxsw_reg_svfa_pack(char *payload, + enum mlxsw_reg_svfa_mt mt, bool valid, + u16 fid, bool irif_v, u16 irif) +{ + MLXSW_REG_ZERO(svfa, payload); + mlxsw_reg_svfa_swid_set(payload, 0); + mlxsw_reg_svfa_mapping_table_set(payload, mt); + mlxsw_reg_svfa_v_set(payload, valid); + mlxsw_reg_svfa_fid_set(payload, fid); + mlxsw_reg_svfa_irif_v_set(payload, irif_v); + mlxsw_reg_svfa_irif_set(payload, irif_v ? irif : 0); +} + +static inline void mlxsw_reg_svfa_port_vid_pack(char *payload, u16 local_port, + bool valid, u16 fid, u16 vid, + bool irif_v, u16 irif) +{ + enum mlxsw_reg_svfa_mt mt = MLXSW_REG_SVFA_MT_PORT_VID_TO_FID; + + __mlxsw_reg_svfa_pack(payload, mt, valid, fid, irif_v, irif); + mlxsw_reg_svfa_local_port_set(payload, local_port); + mlxsw_reg_svfa_vid_set(payload, vid); +} + +static inline void mlxsw_reg_svfa_vid_pack(char *payload, bool valid, u16 fid, + u16 vid, bool irif_v, u16 irif) +{ + enum mlxsw_reg_svfa_mt mt = MLXSW_REG_SVFA_MT_VID_TO_FID; + + __mlxsw_reg_svfa_pack(payload, mt, valid, fid, irif_v, irif); + mlxsw_reg_svfa_vid_set(payload, vid); +} + +static inline void mlxsw_reg_svfa_vni_pack(char *payload, bool valid, u16 fid, + u32 vni, bool irif_v, u16 irif) +{ + enum mlxsw_reg_svfa_mt mt = MLXSW_REG_SVFA_MT_VNI_TO_FID; + + __mlxsw_reg_svfa_pack(payload, mt, valid, fid, irif_v, irif); + mlxsw_reg_svfa_vni_set(payload, vni); +} + +/* SPVTR - Switch Port VLAN Stacking Register + * ------------------------------------------ + * The Switch Port VLAN Stacking register configures the VLAN mode of the port + * to enable VLAN stacking. + */ +#define MLXSW_REG_SPVTR_ID 0x201D +#define MLXSW_REG_SPVTR_LEN 0x10 + +MLXSW_REG_DEFINE(spvtr, MLXSW_REG_SPVTR_ID, MLXSW_REG_SPVTR_LEN); + +/* reg_spvtr_tport + * Port is tunnel port. + * Access: Index + * + * Note: Reserved when SwitchX/-2 or Spectrum-1. + */ +MLXSW_ITEM32(reg, spvtr, tport, 0x00, 24, 1); + +/* reg_spvtr_local_port + * When tport = 0: local port number (Not supported from/to CPU). + * When tport = 1: tunnel port. + * Access: Index + */ +MLXSW_ITEM32_LP(reg, spvtr, 0x00, 16, 0x00, 12); + +/* reg_spvtr_ippe + * Ingress Port Prio Mode Update Enable. + * When set, the Port Prio Mode is updated with the provided ipprio_mode field. + * Reserved on Get operations. + * Access: OP + */ +MLXSW_ITEM32(reg, spvtr, ippe, 0x04, 31, 1); + +/* reg_spvtr_ipve + * Ingress Port VID Mode Update Enable. + * When set, the Ingress Port VID Mode is updated with the provided ipvid_mode + * field. + * Reserved on Get operations. + * Access: OP + */ +MLXSW_ITEM32(reg, spvtr, ipve, 0x04, 30, 1); + +/* reg_spvtr_epve + * Egress Port VID Mode Update Enable. + * When set, the Egress Port VID Mode is updated with the provided epvid_mode + * field. + * Access: OP + */ +MLXSW_ITEM32(reg, spvtr, epve, 0x04, 29, 1); + +/* reg_spvtr_ipprio_mode + * Ingress Port Priority Mode. + * This controls the PCP and DEI of the new outer VLAN + * Note: for SwitchX/-2 the DEI is not affected. + * 0: use port default PCP and DEI (configured by QPDPC). + * 1: use C-VLAN PCP and DEI. + * Has no effect when ipvid_mode = 0. + * Reserved when tport = 1. + * Access: RW + */ +MLXSW_ITEM32(reg, spvtr, ipprio_mode, 0x04, 20, 4); + +enum mlxsw_reg_spvtr_ipvid_mode { + /* IEEE Compliant PVID (default) */ + MLXSW_REG_SPVTR_IPVID_MODE_IEEE_COMPLIANT_PVID, + /* Push VLAN (for VLAN stacking, except prio tagged packets) */ + MLXSW_REG_SPVTR_IPVID_MODE_PUSH_VLAN_FOR_UNTAGGED_PACKET, + /* Always push VLAN (also for prio tagged packets) */ + MLXSW_REG_SPVTR_IPVID_MODE_ALWAYS_PUSH_VLAN, +}; + +/* reg_spvtr_ipvid_mode + * Ingress Port VLAN-ID Mode. + * For Spectrum family, this affects the values of SPVM.i + * Access: RW + */ +MLXSW_ITEM32(reg, spvtr, ipvid_mode, 0x04, 16, 4); + +enum mlxsw_reg_spvtr_epvid_mode { + /* IEEE Compliant VLAN membership */ + MLXSW_REG_SPVTR_EPVID_MODE_IEEE_COMPLIANT_VLAN_MEMBERSHIP, + /* Pop VLAN (for VLAN stacking) */ + MLXSW_REG_SPVTR_EPVID_MODE_POP_VLAN, +}; + +/* reg_spvtr_epvid_mode + * Egress Port VLAN-ID Mode. + * For Spectrum family, this affects the values of SPVM.e,u,pt. + * Access: WO + */ +MLXSW_ITEM32(reg, spvtr, epvid_mode, 0x04, 0, 4); + +static inline void mlxsw_reg_spvtr_pack(char *payload, bool tport, + u16 local_port, + enum mlxsw_reg_spvtr_ipvid_mode ipvid_mode) +{ + MLXSW_REG_ZERO(spvtr, payload); + mlxsw_reg_spvtr_tport_set(payload, tport); + mlxsw_reg_spvtr_local_port_set(payload, local_port); + mlxsw_reg_spvtr_ipvid_mode_set(payload, ipvid_mode); + mlxsw_reg_spvtr_ipve_set(payload, true); +} + +/* SVPE - Switch Virtual-Port Enabling Register + * -------------------------------------------- + * Enables port virtualization. + */ +#define MLXSW_REG_SVPE_ID 0x201E +#define MLXSW_REG_SVPE_LEN 0x4 + +MLXSW_REG_DEFINE(svpe, MLXSW_REG_SVPE_ID, MLXSW_REG_SVPE_LEN); + +/* reg_svpe_local_port + * Local port number + * Access: Index + * + * Note: CPU port is not supported (uses VLAN mode only). + */ +MLXSW_ITEM32_LP(reg, svpe, 0x00, 16, 0x00, 12); + +/* reg_svpe_vp_en + * Virtual port enable. + * 0 - Disable, VLAN mode (VID to FID). + * 1 - Enable, Virtual port mode ({Port, VID} to FID). + * Access: RW + */ +MLXSW_ITEM32(reg, svpe, vp_en, 0x00, 8, 1); + +static inline void mlxsw_reg_svpe_pack(char *payload, u16 local_port, + bool enable) +{ + MLXSW_REG_ZERO(svpe, payload); + mlxsw_reg_svpe_local_port_set(payload, local_port); + mlxsw_reg_svpe_vp_en_set(payload, enable); +} + +/* SFMR - Switch FID Management Register + * ------------------------------------- + * Creates and configures FIDs. + */ +#define MLXSW_REG_SFMR_ID 0x201F +#define MLXSW_REG_SFMR_LEN 0x30 + +MLXSW_REG_DEFINE(sfmr, MLXSW_REG_SFMR_ID, MLXSW_REG_SFMR_LEN); + +enum mlxsw_reg_sfmr_op { + MLXSW_REG_SFMR_OP_CREATE_FID, + MLXSW_REG_SFMR_OP_DESTROY_FID, +}; + +/* reg_sfmr_op + * Operation. + * 0 - Create or edit FID. + * 1 - Destroy FID. + * Access: WO + */ +MLXSW_ITEM32(reg, sfmr, op, 0x00, 24, 4); + +/* reg_sfmr_fid + * Filtering ID. + * Access: Index + */ +MLXSW_ITEM32(reg, sfmr, fid, 0x00, 0, 16); + +/* reg_sfmr_flood_rsp + * Router sub-port flooding table. + * 0 - Regular flooding table. + * 1 - Router sub-port flooding table. For this FID the flooding is per + * router-sub-port local_port. Must not be set for a FID which is not a + * router-sub-port and must be set prior to enabling the relevant RIF. + * Access: RW + * + * Note: Reserved when legacy bridge model is used. + */ +MLXSW_ITEM32(reg, sfmr, flood_rsp, 0x08, 31, 1); + +/* reg_sfmr_flood_bridge_type + * Flood bridge type (see SFGC.bridge_type). + * 0 - type_0. + * 1 - type_1. + * Access: RW + * + * Note: Reserved when legacy bridge model is used and when flood_rsp=1. + */ +MLXSW_ITEM32(reg, sfmr, flood_bridge_type, 0x08, 28, 1); + +/* reg_sfmr_fid_offset + * FID offset. + * Used to point into the flooding table selected by SFGC register if + * the table is of type FID-Offset. Otherwise, this field is reserved. + * Access: RW + */ +MLXSW_ITEM32(reg, sfmr, fid_offset, 0x08, 0, 16); + +/* reg_sfmr_vtfp + * Valid Tunnel Flood Pointer. + * If not set, then nve_tunnel_flood_ptr is reserved and considered NULL. + * Access: RW + * + * Note: Reserved for 802.1Q FIDs. + */ +MLXSW_ITEM32(reg, sfmr, vtfp, 0x0C, 31, 1); + +/* reg_sfmr_nve_tunnel_flood_ptr + * Underlay Flooding and BC Pointer. + * Used as a pointer to the first entry of the group based link lists of + * flooding or BC entries (for NVE tunnels). + * Access: RW + */ +MLXSW_ITEM32(reg, sfmr, nve_tunnel_flood_ptr, 0x0C, 0, 24); + +/* reg_sfmr_vv + * VNI Valid. + * If not set, then vni is reserved. + * Access: RW + * + * Note: Reserved for 802.1Q FIDs. + */ +MLXSW_ITEM32(reg, sfmr, vv, 0x10, 31, 1); + +/* reg_sfmr_vni + * Virtual Network Identifier. + * When legacy bridge model is used, a given VNI can only be assigned to one + * FID. When unified bridge model is used, it configures only the FID->VNI, + * the VNI->FID is done by SVFA. + * Access: RW + */ +MLXSW_ITEM32(reg, sfmr, vni, 0x10, 0, 24); + +/* reg_sfmr_irif_v + * Ingress RIF valid. + * 0 - Ingress RIF is not valid, no ingress RIF assigned. + * 1 - Ingress RIF valid. + * Must not be set for a non valid RIF. + * Access: RW + * + * Note: Reserved when legacy bridge model is used. + */ +MLXSW_ITEM32(reg, sfmr, irif_v, 0x14, 24, 1); + +/* reg_sfmr_irif + * Ingress RIF (Router Interface). + * Range is 0..cap_max_router_interfaces-1. + * Access: RW + * + * Note: Reserved when legacy bridge model is used and when irif_v=0. + */ +MLXSW_ITEM32(reg, sfmr, irif, 0x14, 0, 16); + +/* reg_sfmr_smpe_valid + * SMPE is valid. + * Access: RW + * + * Note: Reserved when legacy bridge model is used, when flood_rsp=1 and on + * Spectrum-1. + */ +MLXSW_ITEM32(reg, sfmr, smpe_valid, 0x28, 20, 1); + +/* reg_sfmr_smpe + * Switch multicast port to egress VID. + * Range is 0..cap_max_rmpe-1 + * Access: RW + * + * Note: Reserved when legacy bridge model is used, when flood_rsp=1 and on + * Spectrum-1. + */ +MLXSW_ITEM32(reg, sfmr, smpe, 0x28, 0, 16); + +static inline void mlxsw_reg_sfmr_pack(char *payload, + enum mlxsw_reg_sfmr_op op, u16 fid, + u16 fid_offset, bool flood_rsp, + enum mlxsw_reg_bridge_type bridge_type, + bool smpe_valid, u16 smpe) +{ + MLXSW_REG_ZERO(sfmr, payload); + mlxsw_reg_sfmr_op_set(payload, op); + mlxsw_reg_sfmr_fid_set(payload, fid); + mlxsw_reg_sfmr_fid_offset_set(payload, fid_offset); + mlxsw_reg_sfmr_vtfp_set(payload, false); + mlxsw_reg_sfmr_vv_set(payload, false); + mlxsw_reg_sfmr_flood_rsp_set(payload, flood_rsp); + mlxsw_reg_sfmr_flood_bridge_type_set(payload, bridge_type); + mlxsw_reg_sfmr_smpe_valid_set(payload, smpe_valid); + mlxsw_reg_sfmr_smpe_set(payload, smpe); +} + +/* SPVMLR - Switch Port VLAN MAC Learning Register + * ----------------------------------------------- + * Controls the switch MAC learning policy per {Port, VID}. + */ +#define MLXSW_REG_SPVMLR_ID 0x2020 +#define MLXSW_REG_SPVMLR_BASE_LEN 0x04 /* base length, without records */ +#define MLXSW_REG_SPVMLR_REC_LEN 0x04 /* record length */ +#define MLXSW_REG_SPVMLR_REC_MAX_COUNT 255 +#define MLXSW_REG_SPVMLR_LEN (MLXSW_REG_SPVMLR_BASE_LEN + \ + MLXSW_REG_SPVMLR_REC_LEN * \ + MLXSW_REG_SPVMLR_REC_MAX_COUNT) + +MLXSW_REG_DEFINE(spvmlr, MLXSW_REG_SPVMLR_ID, MLXSW_REG_SPVMLR_LEN); + +/* reg_spvmlr_local_port + * Local ingress port. + * Access: Index + * + * Note: CPU port is not supported. + */ +MLXSW_ITEM32_LP(reg, spvmlr, 0x00, 16, 0x00, 12); + +/* reg_spvmlr_num_rec + * Number of records to update. + * Access: OP + */ +MLXSW_ITEM32(reg, spvmlr, num_rec, 0x00, 0, 8); + +/* reg_spvmlr_rec_learn_enable + * 0 - Disable learning for {Port, VID}. + * 1 - Enable learning for {Port, VID}. + * Access: RW + */ +MLXSW_ITEM32_INDEXED(reg, spvmlr, rec_learn_enable, MLXSW_REG_SPVMLR_BASE_LEN, + 31, 1, MLXSW_REG_SPVMLR_REC_LEN, 0x00, false); + +/* reg_spvmlr_rec_vid + * VLAN ID to be added/removed from port or for querying. + * Access: Index + */ +MLXSW_ITEM32_INDEXED(reg, spvmlr, rec_vid, MLXSW_REG_SPVMLR_BASE_LEN, 0, 12, + MLXSW_REG_SPVMLR_REC_LEN, 0x00, false); + +static inline void mlxsw_reg_spvmlr_pack(char *payload, u16 local_port, + u16 vid_begin, u16 vid_end, + bool learn_enable) +{ + int num_rec = vid_end - vid_begin + 1; + int i; + + WARN_ON(num_rec < 1 || num_rec > MLXSW_REG_SPVMLR_REC_MAX_COUNT); + + MLXSW_REG_ZERO(spvmlr, payload); + mlxsw_reg_spvmlr_local_port_set(payload, local_port); + mlxsw_reg_spvmlr_num_rec_set(payload, num_rec); + + for (i = 0; i < num_rec; i++) { + mlxsw_reg_spvmlr_rec_learn_enable_set(payload, i, learn_enable); + mlxsw_reg_spvmlr_rec_vid_set(payload, i, vid_begin + i); + } +} + +/* SPVC - Switch Port VLAN Classification Register + * ----------------------------------------------- + * Configures the port to identify packets as untagged / single tagged / + * double packets based on the packet EtherTypes. + * Ethertype IDs are configured by SVER. + */ +#define MLXSW_REG_SPVC_ID 0x2026 +#define MLXSW_REG_SPVC_LEN 0x0C + +MLXSW_REG_DEFINE(spvc, MLXSW_REG_SPVC_ID, MLXSW_REG_SPVC_LEN); + +/* reg_spvc_local_port + * Local port. + * Access: Index + * + * Note: applies both to Rx port and Tx port, so if a packet traverses + * through Rx port i and a Tx port j then port i and port j must have the + * same configuration. + */ +MLXSW_ITEM32_LP(reg, spvc, 0x00, 16, 0x00, 12); + +/* reg_spvc_inner_et2 + * Vlan Tag1 EtherType2 enable. + * Packet is initially classified as double VLAN Tag if in addition to + * being classified with a tag0 VLAN Tag its tag1 EtherType value is + * equal to ether_type2. + * 0: disable (default) + * 1: enable + * Access: RW + */ +MLXSW_ITEM32(reg, spvc, inner_et2, 0x08, 17, 1); + +/* reg_spvc_et2 + * Vlan Tag0 EtherType2 enable. + * Packet is initially classified as VLAN Tag if its tag0 EtherType is + * equal to ether_type2. + * 0: disable (default) + * 1: enable + * Access: RW + */ +MLXSW_ITEM32(reg, spvc, et2, 0x08, 16, 1); + +/* reg_spvc_inner_et1 + * Vlan Tag1 EtherType1 enable. + * Packet is initially classified as double VLAN Tag if in addition to + * being classified with a tag0 VLAN Tag its tag1 EtherType value is + * equal to ether_type1. + * 0: disable + * 1: enable (default) + * Access: RW + */ +MLXSW_ITEM32(reg, spvc, inner_et1, 0x08, 9, 1); + +/* reg_spvc_et1 + * Vlan Tag0 EtherType1 enable. + * Packet is initially classified as VLAN Tag if its tag0 EtherType is + * equal to ether_type1. + * 0: disable + * 1: enable (default) + * Access: RW + */ +MLXSW_ITEM32(reg, spvc, et1, 0x08, 8, 1); + +/* reg_inner_et0 + * Vlan Tag1 EtherType0 enable. + * Packet is initially classified as double VLAN Tag if in addition to + * being classified with a tag0 VLAN Tag its tag1 EtherType value is + * equal to ether_type0. + * 0: disable + * 1: enable (default) + * Access: RW + */ +MLXSW_ITEM32(reg, spvc, inner_et0, 0x08, 1, 1); + +/* reg_et0 + * Vlan Tag0 EtherType0 enable. + * Packet is initially classified as VLAN Tag if its tag0 EtherType is + * equal to ether_type0. + * 0: disable + * 1: enable (default) + * Access: RW + */ +MLXSW_ITEM32(reg, spvc, et0, 0x08, 0, 1); + +static inline void mlxsw_reg_spvc_pack(char *payload, u16 local_port, bool et1, + bool et0) +{ + MLXSW_REG_ZERO(spvc, payload); + mlxsw_reg_spvc_local_port_set(payload, local_port); + /* Enable inner_et1 and inner_et0 to enable identification of double + * tagged packets. + */ + mlxsw_reg_spvc_inner_et1_set(payload, 1); + mlxsw_reg_spvc_inner_et0_set(payload, 1); + mlxsw_reg_spvc_et1_set(payload, et1); + mlxsw_reg_spvc_et0_set(payload, et0); +} + +/* SPEVET - Switch Port Egress VLAN EtherType + * ------------------------------------------ + * The switch port egress VLAN EtherType configures which EtherType to push at + * egress for packets incoming through a local port for which 'SPVID.egr_et_set' + * is set. + */ +#define MLXSW_REG_SPEVET_ID 0x202A +#define MLXSW_REG_SPEVET_LEN 0x08 + +MLXSW_REG_DEFINE(spevet, MLXSW_REG_SPEVET_ID, MLXSW_REG_SPEVET_LEN); + +/* reg_spevet_local_port + * Egress Local port number. + * Not supported to CPU port. + * Access: Index + */ +MLXSW_ITEM32_LP(reg, spevet, 0x00, 16, 0x00, 12); + +/* reg_spevet_et_vlan + * Egress EtherType VLAN to push when SPVID.egr_et_set field set for the packet: + * 0: ether_type0 - (default) + * 1: ether_type1 + * 2: ether_type2 + * Access: RW + */ +MLXSW_ITEM32(reg, spevet, et_vlan, 0x04, 16, 2); + +static inline void mlxsw_reg_spevet_pack(char *payload, u16 local_port, + u8 et_vlan) +{ + MLXSW_REG_ZERO(spevet, payload); + mlxsw_reg_spevet_local_port_set(payload, local_port); + mlxsw_reg_spevet_et_vlan_set(payload, et_vlan); +} + +/* SMPE - Switch Multicast Port to Egress VID + * ------------------------------------------ + * The switch multicast port to egress VID maps + * {egress_port, SMPE index} -> {VID}. + */ +#define MLXSW_REG_SMPE_ID 0x202B +#define MLXSW_REG_SMPE_LEN 0x0C + +MLXSW_REG_DEFINE(smpe, MLXSW_REG_SMPE_ID, MLXSW_REG_SMPE_LEN); + +/* reg_smpe_local_port + * Local port number. + * CPU port is not supported. + * Access: Index + */ +MLXSW_ITEM32_LP(reg, smpe, 0x00, 16, 0x00, 12); + +/* reg_smpe_smpe_index + * Switch multicast port to egress VID. + * Range is 0..cap_max_rmpe-1. + * Access: Index + */ +MLXSW_ITEM32(reg, smpe, smpe_index, 0x04, 0, 16); + +/* reg_smpe_evid + * Egress VID. + * Access: RW + */ +MLXSW_ITEM32(reg, smpe, evid, 0x08, 0, 12); + +static inline void mlxsw_reg_smpe_pack(char *payload, u16 local_port, + u16 smpe_index, u16 evid) +{ + MLXSW_REG_ZERO(smpe, payload); + mlxsw_reg_smpe_local_port_set(payload, local_port); + mlxsw_reg_smpe_smpe_index_set(payload, smpe_index); + mlxsw_reg_smpe_evid_set(payload, evid); +} + +/* SMID-V2 - Switch Multicast ID Version 2 Register + * ------------------------------------------------ + * The MID record maps from a MID (Multicast ID), which is a unique identifier + * of the multicast group within the stacking domain, into a list of local + * ports into which the packet is replicated. + */ +#define MLXSW_REG_SMID2_ID 0x2034 +#define MLXSW_REG_SMID2_LEN 0x120 + +MLXSW_REG_DEFINE(smid2, MLXSW_REG_SMID2_ID, MLXSW_REG_SMID2_LEN); + +/* reg_smid2_swid + * Switch partition ID. + * Access: Index + */ +MLXSW_ITEM32(reg, smid2, swid, 0x00, 24, 8); + +/* reg_smid2_mid + * Multicast identifier - global identifier that represents the multicast group + * across all devices. + * Access: Index + */ +MLXSW_ITEM32(reg, smid2, mid, 0x00, 0, 16); + +/* reg_smid2_smpe_valid + * SMPE is valid. + * When not valid, the egress VID will not be modified by the SMPE table. + * Access: RW + * + * Note: Reserved when legacy bridge model is used and on Spectrum-2. + */ +MLXSW_ITEM32(reg, smid2, smpe_valid, 0x08, 20, 1); + +/* reg_smid2_smpe + * Switch multicast port to egress VID. + * Access: RW + * + * Note: Reserved when legacy bridge model is used and on Spectrum-2. + */ +MLXSW_ITEM32(reg, smid2, smpe, 0x08, 0, 16); + +/* reg_smid2_port + * Local port memebership (1 bit per port). + * Access: RW + */ +MLXSW_ITEM_BIT_ARRAY(reg, smid2, port, 0x20, 0x80, 1); + +/* reg_smid2_port_mask + * Local port mask (1 bit per port). + * Access: WO + */ +MLXSW_ITEM_BIT_ARRAY(reg, smid2, port_mask, 0xA0, 0x80, 1); + +static inline void mlxsw_reg_smid2_pack(char *payload, u16 mid, u16 port, + bool set, bool smpe_valid, u16 smpe) +{ + MLXSW_REG_ZERO(smid2, payload); + mlxsw_reg_smid2_swid_set(payload, 0); + mlxsw_reg_smid2_mid_set(payload, mid); + mlxsw_reg_smid2_port_set(payload, port, set); + mlxsw_reg_smid2_port_mask_set(payload, port, 1); + mlxsw_reg_smid2_smpe_valid_set(payload, smpe_valid); + mlxsw_reg_smid2_smpe_set(payload, smpe_valid ? smpe : 0); +} + +/* CWTP - Congetion WRED ECN TClass Profile + * ---------------------------------------- + * Configures the profiles for queues of egress port and traffic class + */ +#define MLXSW_REG_CWTP_ID 0x2802 +#define MLXSW_REG_CWTP_BASE_LEN 0x28 +#define MLXSW_REG_CWTP_PROFILE_DATA_REC_LEN 0x08 +#define MLXSW_REG_CWTP_LEN 0x40 + +MLXSW_REG_DEFINE(cwtp, MLXSW_REG_CWTP_ID, MLXSW_REG_CWTP_LEN); + +/* reg_cwtp_local_port + * Local port number + * Not supported for CPU port + * Access: Index + */ +MLXSW_ITEM32_LP(reg, cwtp, 0x00, 16, 0x00, 12); + +/* reg_cwtp_traffic_class + * Traffic Class to configure + * Access: Index + */ +MLXSW_ITEM32(reg, cwtp, traffic_class, 32, 0, 8); + +/* reg_cwtp_profile_min + * Minimum Average Queue Size of the profile in cells. + * Access: RW + */ +MLXSW_ITEM32_INDEXED(reg, cwtp, profile_min, MLXSW_REG_CWTP_BASE_LEN, + 0, 20, MLXSW_REG_CWTP_PROFILE_DATA_REC_LEN, 0, false); + +/* reg_cwtp_profile_percent + * Percentage of WRED and ECN marking for maximum Average Queue size + * Range is 0 to 100, units of integer percentage + * Access: RW + */ +MLXSW_ITEM32_INDEXED(reg, cwtp, profile_percent, MLXSW_REG_CWTP_BASE_LEN, + 24, 7, MLXSW_REG_CWTP_PROFILE_DATA_REC_LEN, 4, false); + +/* reg_cwtp_profile_max + * Maximum Average Queue size of the profile in cells + * Access: RW + */ +MLXSW_ITEM32_INDEXED(reg, cwtp, profile_max, MLXSW_REG_CWTP_BASE_LEN, + 0, 20, MLXSW_REG_CWTP_PROFILE_DATA_REC_LEN, 4, false); + +#define MLXSW_REG_CWTP_MIN_VALUE 64 +#define MLXSW_REG_CWTP_MAX_PROFILE 2 +#define MLXSW_REG_CWTP_DEFAULT_PROFILE 1 + +static inline void mlxsw_reg_cwtp_pack(char *payload, u16 local_port, + u8 traffic_class) +{ + int i; + + MLXSW_REG_ZERO(cwtp, payload); + mlxsw_reg_cwtp_local_port_set(payload, local_port); + mlxsw_reg_cwtp_traffic_class_set(payload, traffic_class); + + for (i = 0; i <= MLXSW_REG_CWTP_MAX_PROFILE; i++) { + mlxsw_reg_cwtp_profile_min_set(payload, i, + MLXSW_REG_CWTP_MIN_VALUE); + mlxsw_reg_cwtp_profile_max_set(payload, i, + MLXSW_REG_CWTP_MIN_VALUE); + } +} + +#define MLXSW_REG_CWTP_PROFILE_TO_INDEX(profile) (profile - 1) + +static inline void +mlxsw_reg_cwtp_profile_pack(char *payload, u8 profile, u32 min, u32 max, + u32 probability) +{ + u8 index = MLXSW_REG_CWTP_PROFILE_TO_INDEX(profile); + + mlxsw_reg_cwtp_profile_min_set(payload, index, min); + mlxsw_reg_cwtp_profile_max_set(payload, index, max); + mlxsw_reg_cwtp_profile_percent_set(payload, index, probability); +} + +/* CWTPM - Congestion WRED ECN TClass and Pool Mapping + * --------------------------------------------------- + * The CWTPM register maps each egress port and traffic class to profile num. + */ +#define MLXSW_REG_CWTPM_ID 0x2803 +#define MLXSW_REG_CWTPM_LEN 0x44 + +MLXSW_REG_DEFINE(cwtpm, MLXSW_REG_CWTPM_ID, MLXSW_REG_CWTPM_LEN); + +/* reg_cwtpm_local_port + * Local port number + * Not supported for CPU port + * Access: Index + */ +MLXSW_ITEM32_LP(reg, cwtpm, 0x00, 16, 0x00, 12); + +/* reg_cwtpm_traffic_class + * Traffic Class to configure + * Access: Index + */ +MLXSW_ITEM32(reg, cwtpm, traffic_class, 32, 0, 8); + +/* reg_cwtpm_ew + * Control enablement of WRED for traffic class: + * 0 - Disable + * 1 - Enable + * Access: RW + */ +MLXSW_ITEM32(reg, cwtpm, ew, 36, 1, 1); + +/* reg_cwtpm_ee + * Control enablement of ECN for traffic class: + * 0 - Disable + * 1 - Enable + * Access: RW + */ +MLXSW_ITEM32(reg, cwtpm, ee, 36, 0, 1); + +/* reg_cwtpm_tcp_g + * TCP Green Profile. + * Index of the profile within {port, traffic class} to use. + * 0 for disabling both WRED and ECN for this type of traffic. + * Access: RW + */ +MLXSW_ITEM32(reg, cwtpm, tcp_g, 52, 0, 2); + +/* reg_cwtpm_tcp_y + * TCP Yellow Profile. + * Index of the profile within {port, traffic class} to use. + * 0 for disabling both WRED and ECN for this type of traffic. + * Access: RW + */ +MLXSW_ITEM32(reg, cwtpm, tcp_y, 56, 16, 2); + +/* reg_cwtpm_tcp_r + * TCP Red Profile. + * Index of the profile within {port, traffic class} to use. + * 0 for disabling both WRED and ECN for this type of traffic. + * Access: RW + */ +MLXSW_ITEM32(reg, cwtpm, tcp_r, 56, 0, 2); + +/* reg_cwtpm_ntcp_g + * Non-TCP Green Profile. + * Index of the profile within {port, traffic class} to use. + * 0 for disabling both WRED and ECN for this type of traffic. + * Access: RW + */ +MLXSW_ITEM32(reg, cwtpm, ntcp_g, 60, 0, 2); + +/* reg_cwtpm_ntcp_y + * Non-TCP Yellow Profile. + * Index of the profile within {port, traffic class} to use. + * 0 for disabling both WRED and ECN for this type of traffic. + * Access: RW + */ +MLXSW_ITEM32(reg, cwtpm, ntcp_y, 64, 16, 2); + +/* reg_cwtpm_ntcp_r + * Non-TCP Red Profile. + * Index of the profile within {port, traffic class} to use. + * 0 for disabling both WRED and ECN for this type of traffic. + * Access: RW + */ +MLXSW_ITEM32(reg, cwtpm, ntcp_r, 64, 0, 2); + +#define MLXSW_REG_CWTPM_RESET_PROFILE 0 + +static inline void mlxsw_reg_cwtpm_pack(char *payload, u16 local_port, + u8 traffic_class, u8 profile, + bool wred, bool ecn) +{ + MLXSW_REG_ZERO(cwtpm, payload); + mlxsw_reg_cwtpm_local_port_set(payload, local_port); + mlxsw_reg_cwtpm_traffic_class_set(payload, traffic_class); + mlxsw_reg_cwtpm_ew_set(payload, wred); + mlxsw_reg_cwtpm_ee_set(payload, ecn); + mlxsw_reg_cwtpm_tcp_g_set(payload, profile); + mlxsw_reg_cwtpm_tcp_y_set(payload, profile); + mlxsw_reg_cwtpm_tcp_r_set(payload, profile); + mlxsw_reg_cwtpm_ntcp_g_set(payload, profile); + mlxsw_reg_cwtpm_ntcp_y_set(payload, profile); + mlxsw_reg_cwtpm_ntcp_r_set(payload, profile); +} + +/* PGCR - Policy-Engine General Configuration Register + * --------------------------------------------------- + * This register configures general Policy-Engine settings. + */ +#define MLXSW_REG_PGCR_ID 0x3001 +#define MLXSW_REG_PGCR_LEN 0x20 + +MLXSW_REG_DEFINE(pgcr, MLXSW_REG_PGCR_ID, MLXSW_REG_PGCR_LEN); + +/* reg_pgcr_default_action_pointer_base + * Default action pointer base. Each region has a default action pointer + * which is equal to default_action_pointer_base + region_id. + * Access: RW + */ +MLXSW_ITEM32(reg, pgcr, default_action_pointer_base, 0x1C, 0, 24); + +static inline void mlxsw_reg_pgcr_pack(char *payload, u32 pointer_base) +{ + MLXSW_REG_ZERO(pgcr, payload); + mlxsw_reg_pgcr_default_action_pointer_base_set(payload, pointer_base); +} + +/* PPBT - Policy-Engine Port Binding Table + * --------------------------------------- + * This register is used for configuration of the Port Binding Table. + */ +#define MLXSW_REG_PPBT_ID 0x3002 +#define MLXSW_REG_PPBT_LEN 0x14 + +MLXSW_REG_DEFINE(ppbt, MLXSW_REG_PPBT_ID, MLXSW_REG_PPBT_LEN); + +enum mlxsw_reg_pxbt_e { + MLXSW_REG_PXBT_E_IACL, + MLXSW_REG_PXBT_E_EACL, +}; + +/* reg_ppbt_e + * Access: Index + */ +MLXSW_ITEM32(reg, ppbt, e, 0x00, 31, 1); + +enum mlxsw_reg_pxbt_op { + MLXSW_REG_PXBT_OP_BIND, + MLXSW_REG_PXBT_OP_UNBIND, +}; + +/* reg_ppbt_op + * Access: RW + */ +MLXSW_ITEM32(reg, ppbt, op, 0x00, 28, 3); + +/* reg_ppbt_local_port + * Local port. Not including CPU port. + * Access: Index + */ +MLXSW_ITEM32_LP(reg, ppbt, 0x00, 16, 0x00, 12); + +/* reg_ppbt_g + * group - When set, the binding is of an ACL group. When cleared, + * the binding is of an ACL. + * Must be set to 1 for Spectrum. + * Access: RW + */ +MLXSW_ITEM32(reg, ppbt, g, 0x10, 31, 1); + +/* reg_ppbt_acl_info + * ACL/ACL group identifier. If the g bit is set, this field should hold + * the acl_group_id, else it should hold the acl_id. + * Access: RW + */ +MLXSW_ITEM32(reg, ppbt, acl_info, 0x10, 0, 16); + +static inline void mlxsw_reg_ppbt_pack(char *payload, enum mlxsw_reg_pxbt_e e, + enum mlxsw_reg_pxbt_op op, + u16 local_port, u16 acl_info) +{ + MLXSW_REG_ZERO(ppbt, payload); + mlxsw_reg_ppbt_e_set(payload, e); + mlxsw_reg_ppbt_op_set(payload, op); + mlxsw_reg_ppbt_local_port_set(payload, local_port); + mlxsw_reg_ppbt_g_set(payload, true); + mlxsw_reg_ppbt_acl_info_set(payload, acl_info); +} + +/* PACL - Policy-Engine ACL Register + * --------------------------------- + * This register is used for configuration of the ACL. + */ +#define MLXSW_REG_PACL_ID 0x3004 +#define MLXSW_REG_PACL_LEN 0x70 + +MLXSW_REG_DEFINE(pacl, MLXSW_REG_PACL_ID, MLXSW_REG_PACL_LEN); + +/* reg_pacl_v + * Valid. Setting the v bit makes the ACL valid. It should not be cleared + * while the ACL is bounded to either a port, VLAN or ACL rule. + * Access: RW + */ +MLXSW_ITEM32(reg, pacl, v, 0x00, 24, 1); + +/* reg_pacl_acl_id + * An identifier representing the ACL (managed by software) + * Range 0 .. cap_max_acl_regions - 1 + * Access: Index + */ +MLXSW_ITEM32(reg, pacl, acl_id, 0x08, 0, 16); + +#define MLXSW_REG_PXXX_TCAM_REGION_INFO_LEN 16 + +/* reg_pacl_tcam_region_info + * Opaque object that represents a TCAM region. + * Obtained through PTAR register. + * Access: RW + */ +MLXSW_ITEM_BUF(reg, pacl, tcam_region_info, 0x30, + MLXSW_REG_PXXX_TCAM_REGION_INFO_LEN); + +static inline void mlxsw_reg_pacl_pack(char *payload, u16 acl_id, + bool valid, const char *tcam_region_info) +{ + MLXSW_REG_ZERO(pacl, payload); + mlxsw_reg_pacl_acl_id_set(payload, acl_id); + mlxsw_reg_pacl_v_set(payload, valid); + mlxsw_reg_pacl_tcam_region_info_memcpy_to(payload, tcam_region_info); +} + +/* PAGT - Policy-Engine ACL Group Table + * ------------------------------------ + * This register is used for configuration of the ACL Group Table. + */ +#define MLXSW_REG_PAGT_ID 0x3005 +#define MLXSW_REG_PAGT_BASE_LEN 0x30 +#define MLXSW_REG_PAGT_ACL_LEN 4 +#define MLXSW_REG_PAGT_ACL_MAX_NUM 16 +#define MLXSW_REG_PAGT_LEN (MLXSW_REG_PAGT_BASE_LEN + \ + MLXSW_REG_PAGT_ACL_MAX_NUM * MLXSW_REG_PAGT_ACL_LEN) + +MLXSW_REG_DEFINE(pagt, MLXSW_REG_PAGT_ID, MLXSW_REG_PAGT_LEN); + +/* reg_pagt_size + * Number of ACLs in the group. + * Size 0 invalidates a group. + * Range 0 .. cap_max_acl_group_size (hard coded to 16 for now) + * Total number of ACLs in all groups must be lower or equal + * to cap_max_acl_tot_groups + * Note: a group which is binded must not be invalidated + * Access: Index + */ +MLXSW_ITEM32(reg, pagt, size, 0x00, 0, 8); + +/* reg_pagt_acl_group_id + * An identifier (numbered from 0..cap_max_acl_groups-1) representing + * the ACL Group identifier (managed by software). + * Access: Index + */ +MLXSW_ITEM32(reg, pagt, acl_group_id, 0x08, 0, 16); + +/* reg_pagt_multi + * Multi-ACL + * 0 - This ACL is the last ACL in the multi-ACL + * 1 - This ACL is part of a multi-ACL + * Access: RW + */ +MLXSW_ITEM32_INDEXED(reg, pagt, multi, 0x30, 31, 1, 0x04, 0x00, false); + +/* reg_pagt_acl_id + * ACL identifier + * Access: RW + */ +MLXSW_ITEM32_INDEXED(reg, pagt, acl_id, 0x30, 0, 16, 0x04, 0x00, false); + +static inline void mlxsw_reg_pagt_pack(char *payload, u16 acl_group_id) +{ + MLXSW_REG_ZERO(pagt, payload); + mlxsw_reg_pagt_acl_group_id_set(payload, acl_group_id); +} + +static inline void mlxsw_reg_pagt_acl_id_pack(char *payload, int index, + u16 acl_id, bool multi) +{ + u8 size = mlxsw_reg_pagt_size_get(payload); + + if (index >= size) + mlxsw_reg_pagt_size_set(payload, index + 1); + mlxsw_reg_pagt_multi_set(payload, index, multi); + mlxsw_reg_pagt_acl_id_set(payload, index, acl_id); +} + +/* PTAR - Policy-Engine TCAM Allocation Register + * --------------------------------------------- + * This register is used for allocation of regions in the TCAM. + * Note: Query method is not supported on this register. + */ +#define MLXSW_REG_PTAR_ID 0x3006 +#define MLXSW_REG_PTAR_BASE_LEN 0x20 +#define MLXSW_REG_PTAR_KEY_ID_LEN 1 +#define MLXSW_REG_PTAR_KEY_ID_MAX_NUM 16 +#define MLXSW_REG_PTAR_LEN (MLXSW_REG_PTAR_BASE_LEN + \ + MLXSW_REG_PTAR_KEY_ID_MAX_NUM * MLXSW_REG_PTAR_KEY_ID_LEN) + +MLXSW_REG_DEFINE(ptar, MLXSW_REG_PTAR_ID, MLXSW_REG_PTAR_LEN); + +enum mlxsw_reg_ptar_op { + /* allocate a TCAM region */ + MLXSW_REG_PTAR_OP_ALLOC, + /* resize a TCAM region */ + MLXSW_REG_PTAR_OP_RESIZE, + /* deallocate TCAM region */ + MLXSW_REG_PTAR_OP_FREE, + /* test allocation */ + MLXSW_REG_PTAR_OP_TEST, +}; + +/* reg_ptar_op + * Access: OP + */ +MLXSW_ITEM32(reg, ptar, op, 0x00, 28, 4); + +/* reg_ptar_action_set_type + * Type of action set to be used on this region. + * For Spectrum and Spectrum-2, this is always type 2 - "flexible" + * Access: WO + */ +MLXSW_ITEM32(reg, ptar, action_set_type, 0x00, 16, 8); + +enum mlxsw_reg_ptar_key_type { + MLXSW_REG_PTAR_KEY_TYPE_FLEX = 0x50, /* Spetrum */ + MLXSW_REG_PTAR_KEY_TYPE_FLEX2 = 0x51, /* Spectrum-2 */ +}; + +/* reg_ptar_key_type + * TCAM key type for the region. + * Access: WO + */ +MLXSW_ITEM32(reg, ptar, key_type, 0x00, 0, 8); + +/* reg_ptar_region_size + * TCAM region size. When allocating/resizing this is the requested size, + * the response is the actual size. Note that actual size may be + * larger than requested. + * Allowed range 1 .. cap_max_rules-1 + * Reserved during op deallocate. + * Access: WO + */ +MLXSW_ITEM32(reg, ptar, region_size, 0x04, 0, 16); + +/* reg_ptar_region_id + * Region identifier + * Range 0 .. cap_max_regions-1 + * Access: Index + */ +MLXSW_ITEM32(reg, ptar, region_id, 0x08, 0, 16); + +/* reg_ptar_tcam_region_info + * Opaque object that represents the TCAM region. + * Returned when allocating a region. + * Provided by software for ACL generation and region deallocation and resize. + * Access: RW + */ +MLXSW_ITEM_BUF(reg, ptar, tcam_region_info, 0x10, + MLXSW_REG_PXXX_TCAM_REGION_INFO_LEN); + +/* reg_ptar_flexible_key_id + * Identifier of the Flexible Key. + * Only valid if key_type == "FLEX_KEY" + * The key size will be rounded up to one of the following values: + * 9B, 18B, 36B, 54B. + * This field is reserved for in resize operation. + * Access: WO + */ +MLXSW_ITEM8_INDEXED(reg, ptar, flexible_key_id, 0x20, 0, 8, + MLXSW_REG_PTAR_KEY_ID_LEN, 0x00, false); + +static inline void mlxsw_reg_ptar_pack(char *payload, enum mlxsw_reg_ptar_op op, + enum mlxsw_reg_ptar_key_type key_type, + u16 region_size, u16 region_id, + const char *tcam_region_info) +{ + MLXSW_REG_ZERO(ptar, payload); + mlxsw_reg_ptar_op_set(payload, op); + mlxsw_reg_ptar_action_set_type_set(payload, 2); /* "flexible" */ + mlxsw_reg_ptar_key_type_set(payload, key_type); + mlxsw_reg_ptar_region_size_set(payload, region_size); + mlxsw_reg_ptar_region_id_set(payload, region_id); + mlxsw_reg_ptar_tcam_region_info_memcpy_to(payload, tcam_region_info); +} + +static inline void mlxsw_reg_ptar_key_id_pack(char *payload, int index, + u16 key_id) +{ + mlxsw_reg_ptar_flexible_key_id_set(payload, index, key_id); +} + +static inline void mlxsw_reg_ptar_unpack(char *payload, char *tcam_region_info) +{ + mlxsw_reg_ptar_tcam_region_info_memcpy_from(payload, tcam_region_info); +} + +/* PPBS - Policy-Engine Policy Based Switching Register + * ---------------------------------------------------- + * This register retrieves and sets Policy Based Switching Table entries. + */ +#define MLXSW_REG_PPBS_ID 0x300C +#define MLXSW_REG_PPBS_LEN 0x14 + +MLXSW_REG_DEFINE(ppbs, MLXSW_REG_PPBS_ID, MLXSW_REG_PPBS_LEN); + +/* reg_ppbs_pbs_ptr + * Index into the PBS table. + * For Spectrum, the index points to the KVD Linear. + * Access: Index + */ +MLXSW_ITEM32(reg, ppbs, pbs_ptr, 0x08, 0, 24); + +/* reg_ppbs_system_port + * Unique port identifier for the final destination of the packet. + * Access: RW + */ +MLXSW_ITEM32(reg, ppbs, system_port, 0x10, 0, 16); + +static inline void mlxsw_reg_ppbs_pack(char *payload, u32 pbs_ptr, + u16 system_port) +{ + MLXSW_REG_ZERO(ppbs, payload); + mlxsw_reg_ppbs_pbs_ptr_set(payload, pbs_ptr); + mlxsw_reg_ppbs_system_port_set(payload, system_port); +} + +/* PRCR - Policy-Engine Rules Copy Register + * ---------------------------------------- + * This register is used for accessing rules within a TCAM region. + */ +#define MLXSW_REG_PRCR_ID 0x300D +#define MLXSW_REG_PRCR_LEN 0x40 + +MLXSW_REG_DEFINE(prcr, MLXSW_REG_PRCR_ID, MLXSW_REG_PRCR_LEN); + +enum mlxsw_reg_prcr_op { + /* Move rules. Moves the rules from "tcam_region_info" starting + * at offset "offset" to "dest_tcam_region_info" + * at offset "dest_offset." + */ + MLXSW_REG_PRCR_OP_MOVE, + /* Copy rules. Copies the rules from "tcam_region_info" starting + * at offset "offset" to "dest_tcam_region_info" + * at offset "dest_offset." + */ + MLXSW_REG_PRCR_OP_COPY, +}; + +/* reg_prcr_op + * Access: OP + */ +MLXSW_ITEM32(reg, prcr, op, 0x00, 28, 4); + +/* reg_prcr_offset + * Offset within the source region to copy/move from. + * Access: Index + */ +MLXSW_ITEM32(reg, prcr, offset, 0x00, 0, 16); + +/* reg_prcr_size + * The number of rules to copy/move. + * Access: WO + */ +MLXSW_ITEM32(reg, prcr, size, 0x04, 0, 16); + +/* reg_prcr_tcam_region_info + * Opaque object that represents the source TCAM region. + * Access: Index + */ +MLXSW_ITEM_BUF(reg, prcr, tcam_region_info, 0x10, + MLXSW_REG_PXXX_TCAM_REGION_INFO_LEN); + +/* reg_prcr_dest_offset + * Offset within the source region to copy/move to. + * Access: Index + */ +MLXSW_ITEM32(reg, prcr, dest_offset, 0x20, 0, 16); + +/* reg_prcr_dest_tcam_region_info + * Opaque object that represents the destination TCAM region. + * Access: Index + */ +MLXSW_ITEM_BUF(reg, prcr, dest_tcam_region_info, 0x30, + MLXSW_REG_PXXX_TCAM_REGION_INFO_LEN); + +static inline void mlxsw_reg_prcr_pack(char *payload, enum mlxsw_reg_prcr_op op, + const char *src_tcam_region_info, + u16 src_offset, + const char *dest_tcam_region_info, + u16 dest_offset, u16 size) +{ + MLXSW_REG_ZERO(prcr, payload); + mlxsw_reg_prcr_op_set(payload, op); + mlxsw_reg_prcr_offset_set(payload, src_offset); + mlxsw_reg_prcr_size_set(payload, size); + mlxsw_reg_prcr_tcam_region_info_memcpy_to(payload, + src_tcam_region_info); + mlxsw_reg_prcr_dest_offset_set(payload, dest_offset); + mlxsw_reg_prcr_dest_tcam_region_info_memcpy_to(payload, + dest_tcam_region_info); +} + +/* PEFA - Policy-Engine Extended Flexible Action Register + * ------------------------------------------------------ + * This register is used for accessing an extended flexible action entry + * in the central KVD Linear Database. + */ +#define MLXSW_REG_PEFA_ID 0x300F +#define MLXSW_REG_PEFA_LEN 0xB0 + +MLXSW_REG_DEFINE(pefa, MLXSW_REG_PEFA_ID, MLXSW_REG_PEFA_LEN); + +/* reg_pefa_index + * Index in the KVD Linear Centralized Database. + * Access: Index + */ +MLXSW_ITEM32(reg, pefa, index, 0x00, 0, 24); + +/* reg_pefa_a + * Index in the KVD Linear Centralized Database. + * Activity + * For a new entry: set if ca=0, clear if ca=1 + * Set if a packet lookup has hit on the specific entry + * Access: RO + */ +MLXSW_ITEM32(reg, pefa, a, 0x04, 29, 1); + +/* reg_pefa_ca + * Clear activity + * When write: activity is according to this field + * When read: after reading the activity is cleared according to ca + * Access: OP + */ +MLXSW_ITEM32(reg, pefa, ca, 0x04, 24, 1); + +#define MLXSW_REG_FLEX_ACTION_SET_LEN 0xA8 + +/* reg_pefa_flex_action_set + * Action-set to perform when rule is matched. + * Must be zero padded if action set is shorter. + * Access: RW + */ +MLXSW_ITEM_BUF(reg, pefa, flex_action_set, 0x08, MLXSW_REG_FLEX_ACTION_SET_LEN); + +static inline void mlxsw_reg_pefa_pack(char *payload, u32 index, bool ca, + const char *flex_action_set) +{ + MLXSW_REG_ZERO(pefa, payload); + mlxsw_reg_pefa_index_set(payload, index); + mlxsw_reg_pefa_ca_set(payload, ca); + if (flex_action_set) + mlxsw_reg_pefa_flex_action_set_memcpy_to(payload, + flex_action_set); +} + +static inline void mlxsw_reg_pefa_unpack(char *payload, bool *p_a) +{ + *p_a = mlxsw_reg_pefa_a_get(payload); +} + +/* PEMRBT - Policy-Engine Multicast Router Binding Table Register + * -------------------------------------------------------------- + * This register is used for binding Multicast router to an ACL group + * that serves the MC router. + * This register is not supported by SwitchX/-2 and Spectrum. + */ +#define MLXSW_REG_PEMRBT_ID 0x3014 +#define MLXSW_REG_PEMRBT_LEN 0x14 + +MLXSW_REG_DEFINE(pemrbt, MLXSW_REG_PEMRBT_ID, MLXSW_REG_PEMRBT_LEN); + +enum mlxsw_reg_pemrbt_protocol { + MLXSW_REG_PEMRBT_PROTO_IPV4, + MLXSW_REG_PEMRBT_PROTO_IPV6, +}; + +/* reg_pemrbt_protocol + * Access: Index + */ +MLXSW_ITEM32(reg, pemrbt, protocol, 0x00, 0, 1); + +/* reg_pemrbt_group_id + * ACL group identifier. + * Range 0..cap_max_acl_groups-1 + * Access: RW + */ +MLXSW_ITEM32(reg, pemrbt, group_id, 0x10, 0, 16); + +static inline void +mlxsw_reg_pemrbt_pack(char *payload, enum mlxsw_reg_pemrbt_protocol protocol, + u16 group_id) +{ + MLXSW_REG_ZERO(pemrbt, payload); + mlxsw_reg_pemrbt_protocol_set(payload, protocol); + mlxsw_reg_pemrbt_group_id_set(payload, group_id); +} + +/* PTCE-V2 - Policy-Engine TCAM Entry Register Version 2 + * ----------------------------------------------------- + * This register is used for accessing rules within a TCAM region. + * It is a new version of PTCE in order to support wider key, + * mask and action within a TCAM region. This register is not supported + * by SwitchX and SwitchX-2. + */ +#define MLXSW_REG_PTCE2_ID 0x3017 +#define MLXSW_REG_PTCE2_LEN 0x1D8 + +MLXSW_REG_DEFINE(ptce2, MLXSW_REG_PTCE2_ID, MLXSW_REG_PTCE2_LEN); + +/* reg_ptce2_v + * Valid. + * Access: RW + */ +MLXSW_ITEM32(reg, ptce2, v, 0x00, 31, 1); + +/* reg_ptce2_a + * Activity. Set if a packet lookup has hit on the specific entry. + * To clear the "a" bit, use "clear activity" op or "clear on read" op. + * Access: RO + */ +MLXSW_ITEM32(reg, ptce2, a, 0x00, 30, 1); + +enum mlxsw_reg_ptce2_op { + /* Read operation. */ + MLXSW_REG_PTCE2_OP_QUERY_READ = 0, + /* clear on read operation. Used to read entry + * and clear Activity bit. + */ + MLXSW_REG_PTCE2_OP_QUERY_CLEAR_ON_READ = 1, + /* Write operation. Used to write a new entry to the table. + * All R/W fields are relevant for new entry. Activity bit is set + * for new entries - Note write with v = 0 will delete the entry. + */ + MLXSW_REG_PTCE2_OP_WRITE_WRITE = 0, + /* Update action. Only action set will be updated. */ + MLXSW_REG_PTCE2_OP_WRITE_UPDATE = 1, + /* Clear activity. A bit is cleared for the entry. */ + MLXSW_REG_PTCE2_OP_WRITE_CLEAR_ACTIVITY = 2, +}; + +/* reg_ptce2_op + * Access: OP + */ +MLXSW_ITEM32(reg, ptce2, op, 0x00, 20, 3); + +/* reg_ptce2_offset + * Access: Index + */ +MLXSW_ITEM32(reg, ptce2, offset, 0x00, 0, 16); + +/* reg_ptce2_priority + * Priority of the rule, higher values win. The range is 1..cap_kvd_size-1. + * Note: priority does not have to be unique per rule. + * Within a region, higher priority should have lower offset (no limitation + * between regions in a multi-region). + * Access: RW + */ +MLXSW_ITEM32(reg, ptce2, priority, 0x04, 0, 24); + +/* reg_ptce2_tcam_region_info + * Opaque object that represents the TCAM region. + * Access: Index + */ +MLXSW_ITEM_BUF(reg, ptce2, tcam_region_info, 0x10, + MLXSW_REG_PXXX_TCAM_REGION_INFO_LEN); + +#define MLXSW_REG_PTCEX_FLEX_KEY_BLOCKS_LEN 96 + +/* reg_ptce2_flex_key_blocks + * ACL Key. + * Access: RW + */ +MLXSW_ITEM_BUF(reg, ptce2, flex_key_blocks, 0x20, + MLXSW_REG_PTCEX_FLEX_KEY_BLOCKS_LEN); + +/* reg_ptce2_mask + * mask- in the same size as key. A bit that is set directs the TCAM + * to compare the corresponding bit in key. A bit that is clear directs + * the TCAM to ignore the corresponding bit in key. + * Access: RW + */ +MLXSW_ITEM_BUF(reg, ptce2, mask, 0x80, + MLXSW_REG_PTCEX_FLEX_KEY_BLOCKS_LEN); + +/* reg_ptce2_flex_action_set + * ACL action set. + * Access: RW + */ +MLXSW_ITEM_BUF(reg, ptce2, flex_action_set, 0xE0, + MLXSW_REG_FLEX_ACTION_SET_LEN); + +static inline void mlxsw_reg_ptce2_pack(char *payload, bool valid, + enum mlxsw_reg_ptce2_op op, + const char *tcam_region_info, + u16 offset, u32 priority) +{ + MLXSW_REG_ZERO(ptce2, payload); + mlxsw_reg_ptce2_v_set(payload, valid); + mlxsw_reg_ptce2_op_set(payload, op); + mlxsw_reg_ptce2_offset_set(payload, offset); + mlxsw_reg_ptce2_priority_set(payload, priority); + mlxsw_reg_ptce2_tcam_region_info_memcpy_to(payload, tcam_region_info); +} + +/* PERPT - Policy-Engine ERP Table Register + * ---------------------------------------- + * This register adds and removes eRPs from the eRP table. + */ +#define MLXSW_REG_PERPT_ID 0x3021 +#define MLXSW_REG_PERPT_LEN 0x80 + +MLXSW_REG_DEFINE(perpt, MLXSW_REG_PERPT_ID, MLXSW_REG_PERPT_LEN); + +/* reg_perpt_erpt_bank + * eRP table bank. + * Range 0 .. cap_max_erp_table_banks - 1 + * Access: Index + */ +MLXSW_ITEM32(reg, perpt, erpt_bank, 0x00, 16, 4); + +/* reg_perpt_erpt_index + * Index to eRP table within the eRP bank. + * Range is 0 .. cap_max_erp_table_bank_size - 1 + * Access: Index + */ +MLXSW_ITEM32(reg, perpt, erpt_index, 0x00, 0, 8); + +enum mlxsw_reg_perpt_key_size { + MLXSW_REG_PERPT_KEY_SIZE_2KB, + MLXSW_REG_PERPT_KEY_SIZE_4KB, + MLXSW_REG_PERPT_KEY_SIZE_8KB, + MLXSW_REG_PERPT_KEY_SIZE_12KB, +}; + +/* reg_perpt_key_size + * Access: OP + */ +MLXSW_ITEM32(reg, perpt, key_size, 0x04, 0, 4); + +/* reg_perpt_bf_bypass + * 0 - The eRP is used only if bloom filter state is set for the given + * rule. + * 1 - The eRP is used regardless of bloom filter state. + * The bypass is an OR condition of region_id or eRP. See PERCR.bf_bypass + * Access: RW + */ +MLXSW_ITEM32(reg, perpt, bf_bypass, 0x08, 8, 1); + +/* reg_perpt_erp_id + * eRP ID for use by the rules. + * Access: RW + */ +MLXSW_ITEM32(reg, perpt, erp_id, 0x08, 0, 4); + +/* reg_perpt_erpt_base_bank + * Base eRP table bank, points to head of erp_vector + * Range is 0 .. cap_max_erp_table_banks - 1 + * Access: OP + */ +MLXSW_ITEM32(reg, perpt, erpt_base_bank, 0x0C, 16, 4); + +/* reg_perpt_erpt_base_index + * Base index to eRP table within the eRP bank + * Range is 0 .. cap_max_erp_table_bank_size - 1 + * Access: OP + */ +MLXSW_ITEM32(reg, perpt, erpt_base_index, 0x0C, 0, 8); + +/* reg_perpt_erp_index_in_vector + * eRP index in the vector. + * Access: OP + */ +MLXSW_ITEM32(reg, perpt, erp_index_in_vector, 0x10, 0, 4); + +/* reg_perpt_erp_vector + * eRP vector. + * Access: OP + */ +MLXSW_ITEM_BIT_ARRAY(reg, perpt, erp_vector, 0x14, 4, 1); + +/* reg_perpt_mask + * Mask + * 0 - A-TCAM will ignore the bit in key + * 1 - A-TCAM will compare the bit in key + * Access: RW + */ +MLXSW_ITEM_BUF(reg, perpt, mask, 0x20, MLXSW_REG_PTCEX_FLEX_KEY_BLOCKS_LEN); + +static inline void mlxsw_reg_perpt_erp_vector_pack(char *payload, + unsigned long *erp_vector, + unsigned long size) +{ + unsigned long bit; + + for_each_set_bit(bit, erp_vector, size) + mlxsw_reg_perpt_erp_vector_set(payload, bit, true); +} + +static inline void +mlxsw_reg_perpt_pack(char *payload, u8 erpt_bank, u8 erpt_index, + enum mlxsw_reg_perpt_key_size key_size, u8 erp_id, + u8 erpt_base_bank, u8 erpt_base_index, u8 erp_index, + char *mask) +{ + MLXSW_REG_ZERO(perpt, payload); + mlxsw_reg_perpt_erpt_bank_set(payload, erpt_bank); + mlxsw_reg_perpt_erpt_index_set(payload, erpt_index); + mlxsw_reg_perpt_key_size_set(payload, key_size); + mlxsw_reg_perpt_bf_bypass_set(payload, false); + mlxsw_reg_perpt_erp_id_set(payload, erp_id); + mlxsw_reg_perpt_erpt_base_bank_set(payload, erpt_base_bank); + mlxsw_reg_perpt_erpt_base_index_set(payload, erpt_base_index); + mlxsw_reg_perpt_erp_index_in_vector_set(payload, erp_index); + mlxsw_reg_perpt_mask_memcpy_to(payload, mask); +} + +/* PERAR - Policy-Engine Region Association Register + * ------------------------------------------------- + * This register associates a hw region for region_id's. Changing on the fly + * is supported by the device. + */ +#define MLXSW_REG_PERAR_ID 0x3026 +#define MLXSW_REG_PERAR_LEN 0x08 + +MLXSW_REG_DEFINE(perar, MLXSW_REG_PERAR_ID, MLXSW_REG_PERAR_LEN); + +/* reg_perar_region_id + * Region identifier + * Range 0 .. cap_max_regions-1 + * Access: Index + */ +MLXSW_ITEM32(reg, perar, region_id, 0x00, 0, 16); + +static inline unsigned int +mlxsw_reg_perar_hw_regions_needed(unsigned int block_num) +{ + return DIV_ROUND_UP(block_num, 4); +} + +/* reg_perar_hw_region + * HW Region + * Range 0 .. cap_max_regions-1 + * Default: hw_region = region_id + * For a 8 key block region, 2 consecutive regions are used + * For a 12 key block region, 3 consecutive regions are used + * Access: RW + */ +MLXSW_ITEM32(reg, perar, hw_region, 0x04, 0, 16); + +static inline void mlxsw_reg_perar_pack(char *payload, u16 region_id, + u16 hw_region) +{ + MLXSW_REG_ZERO(perar, payload); + mlxsw_reg_perar_region_id_set(payload, region_id); + mlxsw_reg_perar_hw_region_set(payload, hw_region); +} + +/* PTCE-V3 - Policy-Engine TCAM Entry Register Version 3 + * ----------------------------------------------------- + * This register is a new version of PTCE-V2 in order to support the + * A-TCAM. This register is not supported by SwitchX/-2 and Spectrum. + */ +#define MLXSW_REG_PTCE3_ID 0x3027 +#define MLXSW_REG_PTCE3_LEN 0xF0 + +MLXSW_REG_DEFINE(ptce3, MLXSW_REG_PTCE3_ID, MLXSW_REG_PTCE3_LEN); + +/* reg_ptce3_v + * Valid. + * Access: RW + */ +MLXSW_ITEM32(reg, ptce3, v, 0x00, 31, 1); + +enum mlxsw_reg_ptce3_op { + /* Write operation. Used to write a new entry to the table. + * All R/W fields are relevant for new entry. Activity bit is set + * for new entries. Write with v = 0 will delete the entry. Must + * not be used if an entry exists. + */ + MLXSW_REG_PTCE3_OP_WRITE_WRITE = 0, + /* Update operation */ + MLXSW_REG_PTCE3_OP_WRITE_UPDATE = 1, + /* Read operation */ + MLXSW_REG_PTCE3_OP_QUERY_READ = 0, +}; + +/* reg_ptce3_op + * Access: OP + */ +MLXSW_ITEM32(reg, ptce3, op, 0x00, 20, 3); + +/* reg_ptce3_priority + * Priority of the rule. Higher values win. + * For Spectrum-2 range is 1..cap_kvd_size - 1 + * Note: Priority does not have to be unique per rule. + * Access: RW + */ +MLXSW_ITEM32(reg, ptce3, priority, 0x04, 0, 24); + +/* reg_ptce3_tcam_region_info + * Opaque object that represents the TCAM region. + * Access: Index + */ +MLXSW_ITEM_BUF(reg, ptce3, tcam_region_info, 0x10, + MLXSW_REG_PXXX_TCAM_REGION_INFO_LEN); + +/* reg_ptce3_flex2_key_blocks + * ACL key. The key must be masked according to eRP (if exists) or + * according to master mask. + * Access: Index + */ +MLXSW_ITEM_BUF(reg, ptce3, flex2_key_blocks, 0x20, + MLXSW_REG_PTCEX_FLEX_KEY_BLOCKS_LEN); + +/* reg_ptce3_erp_id + * eRP ID. + * Access: Index + */ +MLXSW_ITEM32(reg, ptce3, erp_id, 0x80, 0, 4); + +/* reg_ptce3_delta_start + * Start point of delta_value and delta_mask, in bits. Must not exceed + * num_key_blocks * 36 - 8. Reserved when delta_mask = 0. + * Access: Index + */ +MLXSW_ITEM32(reg, ptce3, delta_start, 0x84, 0, 10); + +/* reg_ptce3_delta_mask + * Delta mask. + * 0 - Ignore relevant bit in delta_value + * 1 - Compare relevant bit in delta_value + * Delta mask must not be set for reserved fields in the key blocks. + * Note: No delta when no eRPs. Thus, for regions with + * PERERP.erpt_pointer_valid = 0 the delta mask must be 0. + * Access: Index + */ +MLXSW_ITEM32(reg, ptce3, delta_mask, 0x88, 16, 8); + +/* reg_ptce3_delta_value + * Delta value. + * Bits which are masked by delta_mask must be 0. + * Access: Index + */ +MLXSW_ITEM32(reg, ptce3, delta_value, 0x88, 0, 8); + +/* reg_ptce3_prune_vector + * Pruning vector relative to the PERPT.erp_id. + * Used for reducing lookups. + * 0 - NEED: Do a lookup using the eRP. + * 1 - PRUNE: Do not perform a lookup using the eRP. + * Maybe be modified by PEAPBL and PEAPBM. + * Note: In Spectrum-2, a region of 8 key blocks must be set to either + * all 1's or all 0's. + * Access: RW + */ +MLXSW_ITEM_BIT_ARRAY(reg, ptce3, prune_vector, 0x90, 4, 1); + +/* reg_ptce3_prune_ctcam + * Pruning on C-TCAM. Used for reducing lookups. + * 0 - NEED: Do a lookup in the C-TCAM. + * 1 - PRUNE: Do not perform a lookup in the C-TCAM. + * Access: RW + */ +MLXSW_ITEM32(reg, ptce3, prune_ctcam, 0x94, 31, 1); + +/* reg_ptce3_large_exists + * Large entry key ID exists. + * Within the region: + * 0 - SINGLE: The large_entry_key_id is not currently in use. + * For rule insert: The MSB of the key (blocks 6..11) will be added. + * For rule delete: The MSB of the key will be removed. + * 1 - NON_SINGLE: The large_entry_key_id is currently in use. + * For rule insert: The MSB of the key (blocks 6..11) will not be added. + * For rule delete: The MSB of the key will not be removed. + * Access: WO + */ +MLXSW_ITEM32(reg, ptce3, large_exists, 0x98, 31, 1); + +/* reg_ptce3_large_entry_key_id + * Large entry key ID. + * A key for 12 key blocks rules. Reserved when region has less than 12 key + * blocks. Must be different for different keys which have the same common + * 6 key blocks (MSB, blocks 6..11) key within a region. + * Range is 0..cap_max_pe_large_key_id - 1 + * Access: RW + */ +MLXSW_ITEM32(reg, ptce3, large_entry_key_id, 0x98, 0, 24); + +/* reg_ptce3_action_pointer + * Pointer to action. + * Range is 0..cap_max_kvd_action_sets - 1 + * Access: RW + */ +MLXSW_ITEM32(reg, ptce3, action_pointer, 0xA0, 0, 24); + +static inline void mlxsw_reg_ptce3_pack(char *payload, bool valid, + enum mlxsw_reg_ptce3_op op, + u32 priority, + const char *tcam_region_info, + const char *key, u8 erp_id, + u16 delta_start, u8 delta_mask, + u8 delta_value, bool large_exists, + u32 lkey_id, u32 action_pointer) +{ + MLXSW_REG_ZERO(ptce3, payload); + mlxsw_reg_ptce3_v_set(payload, valid); + mlxsw_reg_ptce3_op_set(payload, op); + mlxsw_reg_ptce3_priority_set(payload, priority); + mlxsw_reg_ptce3_tcam_region_info_memcpy_to(payload, tcam_region_info); + mlxsw_reg_ptce3_flex2_key_blocks_memcpy_to(payload, key); + mlxsw_reg_ptce3_erp_id_set(payload, erp_id); + mlxsw_reg_ptce3_delta_start_set(payload, delta_start); + mlxsw_reg_ptce3_delta_mask_set(payload, delta_mask); + mlxsw_reg_ptce3_delta_value_set(payload, delta_value); + mlxsw_reg_ptce3_large_exists_set(payload, large_exists); + mlxsw_reg_ptce3_large_entry_key_id_set(payload, lkey_id); + mlxsw_reg_ptce3_action_pointer_set(payload, action_pointer); +} + +/* PERCR - Policy-Engine Region Configuration Register + * --------------------------------------------------- + * This register configures the region parameters. The region_id must be + * allocated. + */ +#define MLXSW_REG_PERCR_ID 0x302A +#define MLXSW_REG_PERCR_LEN 0x80 + +MLXSW_REG_DEFINE(percr, MLXSW_REG_PERCR_ID, MLXSW_REG_PERCR_LEN); + +/* reg_percr_region_id + * Region identifier. + * Range 0..cap_max_regions-1 + * Access: Index + */ +MLXSW_ITEM32(reg, percr, region_id, 0x00, 0, 16); + +/* reg_percr_atcam_ignore_prune + * Ignore prune_vector by other A-TCAM rules. Used e.g., for a new rule. + * Access: RW + */ +MLXSW_ITEM32(reg, percr, atcam_ignore_prune, 0x04, 25, 1); + +/* reg_percr_ctcam_ignore_prune + * Ignore prune_ctcam by other A-TCAM rules. Used e.g., for a new rule. + * Access: RW + */ +MLXSW_ITEM32(reg, percr, ctcam_ignore_prune, 0x04, 24, 1); + +/* reg_percr_bf_bypass + * Bloom filter bypass. + * 0 - Bloom filter is used (default) + * 1 - Bloom filter is bypassed. The bypass is an OR condition of + * region_id or eRP. See PERPT.bf_bypass + * Access: RW + */ +MLXSW_ITEM32(reg, percr, bf_bypass, 0x04, 16, 1); + +/* reg_percr_master_mask + * Master mask. Logical OR mask of all masks of all rules of a region + * (both A-TCAM and C-TCAM). When there are no eRPs + * (erpt_pointer_valid = 0), then this provides the mask. + * Access: RW + */ +MLXSW_ITEM_BUF(reg, percr, master_mask, 0x20, 96); + +static inline void mlxsw_reg_percr_pack(char *payload, u16 region_id) +{ + MLXSW_REG_ZERO(percr, payload); + mlxsw_reg_percr_region_id_set(payload, region_id); + mlxsw_reg_percr_atcam_ignore_prune_set(payload, false); + mlxsw_reg_percr_ctcam_ignore_prune_set(payload, false); + mlxsw_reg_percr_bf_bypass_set(payload, false); +} + +/* PERERP - Policy-Engine Region eRP Register + * ------------------------------------------ + * This register configures the region eRP. The region_id must be + * allocated. + */ +#define MLXSW_REG_PERERP_ID 0x302B +#define MLXSW_REG_PERERP_LEN 0x1C + +MLXSW_REG_DEFINE(pererp, MLXSW_REG_PERERP_ID, MLXSW_REG_PERERP_LEN); + +/* reg_pererp_region_id + * Region identifier. + * Range 0..cap_max_regions-1 + * Access: Index + */ +MLXSW_ITEM32(reg, pererp, region_id, 0x00, 0, 16); + +/* reg_pererp_ctcam_le + * C-TCAM lookup enable. Reserved when erpt_pointer_valid = 0. + * Access: RW + */ +MLXSW_ITEM32(reg, pererp, ctcam_le, 0x04, 28, 1); + +/* reg_pererp_erpt_pointer_valid + * erpt_pointer is valid. + * Access: RW + */ +MLXSW_ITEM32(reg, pererp, erpt_pointer_valid, 0x10, 31, 1); + +/* reg_pererp_erpt_bank_pointer + * Pointer to eRP table bank. May be modified at any time. + * Range 0..cap_max_erp_table_banks-1 + * Reserved when erpt_pointer_valid = 0 + */ +MLXSW_ITEM32(reg, pererp, erpt_bank_pointer, 0x10, 16, 4); + +/* reg_pererp_erpt_pointer + * Pointer to eRP table within the eRP bank. Can be changed for an + * existing region. + * Range 0..cap_max_erp_table_size-1 + * Reserved when erpt_pointer_valid = 0 + * Access: RW + */ +MLXSW_ITEM32(reg, pererp, erpt_pointer, 0x10, 0, 8); + +/* reg_pererp_erpt_vector + * Vector of allowed eRP indexes starting from erpt_pointer within the + * erpt_bank_pointer. Next entries will be in next bank. + * Note that eRP index is used and not eRP ID. + * Reserved when erpt_pointer_valid = 0 + * Access: RW + */ +MLXSW_ITEM_BIT_ARRAY(reg, pererp, erpt_vector, 0x14, 4, 1); + +/* reg_pererp_master_rp_id + * Master RP ID. When there are no eRPs, then this provides the eRP ID + * for the lookup. Can be changed for an existing region. + * Reserved when erpt_pointer_valid = 1 + * Access: RW + */ +MLXSW_ITEM32(reg, pererp, master_rp_id, 0x18, 0, 4); + +static inline void mlxsw_reg_pererp_erp_vector_pack(char *payload, + unsigned long *erp_vector, + unsigned long size) +{ + unsigned long bit; + + for_each_set_bit(bit, erp_vector, size) + mlxsw_reg_pererp_erpt_vector_set(payload, bit, true); +} + +static inline void mlxsw_reg_pererp_pack(char *payload, u16 region_id, + bool ctcam_le, bool erpt_pointer_valid, + u8 erpt_bank_pointer, u8 erpt_pointer, + u8 master_rp_id) +{ + MLXSW_REG_ZERO(pererp, payload); + mlxsw_reg_pererp_region_id_set(payload, region_id); + mlxsw_reg_pererp_ctcam_le_set(payload, ctcam_le); + mlxsw_reg_pererp_erpt_pointer_valid_set(payload, erpt_pointer_valid); + mlxsw_reg_pererp_erpt_bank_pointer_set(payload, erpt_bank_pointer); + mlxsw_reg_pererp_erpt_pointer_set(payload, erpt_pointer); + mlxsw_reg_pererp_master_rp_id_set(payload, master_rp_id); +} + +/* PEABFE - Policy-Engine Algorithmic Bloom Filter Entries Register + * ---------------------------------------------------------------- + * This register configures the Bloom filter entries. + */ +#define MLXSW_REG_PEABFE_ID 0x3022 +#define MLXSW_REG_PEABFE_BASE_LEN 0x10 +#define MLXSW_REG_PEABFE_BF_REC_LEN 0x4 +#define MLXSW_REG_PEABFE_BF_REC_MAX_COUNT 256 +#define MLXSW_REG_PEABFE_LEN (MLXSW_REG_PEABFE_BASE_LEN + \ + MLXSW_REG_PEABFE_BF_REC_LEN * \ + MLXSW_REG_PEABFE_BF_REC_MAX_COUNT) + +MLXSW_REG_DEFINE(peabfe, MLXSW_REG_PEABFE_ID, MLXSW_REG_PEABFE_LEN); + +/* reg_peabfe_size + * Number of BF entries to be updated. + * Range 1..256 + * Access: Op + */ +MLXSW_ITEM32(reg, peabfe, size, 0x00, 0, 9); + +/* reg_peabfe_bf_entry_state + * Bloom filter state + * 0 - Clear + * 1 - Set + * Access: RW + */ +MLXSW_ITEM32_INDEXED(reg, peabfe, bf_entry_state, + MLXSW_REG_PEABFE_BASE_LEN, 31, 1, + MLXSW_REG_PEABFE_BF_REC_LEN, 0x00, false); + +/* reg_peabfe_bf_entry_bank + * Bloom filter bank ID + * Range 0..cap_max_erp_table_banks-1 + * Access: Index + */ +MLXSW_ITEM32_INDEXED(reg, peabfe, bf_entry_bank, + MLXSW_REG_PEABFE_BASE_LEN, 24, 4, + MLXSW_REG_PEABFE_BF_REC_LEN, 0x00, false); + +/* reg_peabfe_bf_entry_index + * Bloom filter entry index + * Range 0..2^cap_max_bf_log-1 + * Access: Index + */ +MLXSW_ITEM32_INDEXED(reg, peabfe, bf_entry_index, + MLXSW_REG_PEABFE_BASE_LEN, 0, 24, + MLXSW_REG_PEABFE_BF_REC_LEN, 0x00, false); + +static inline void mlxsw_reg_peabfe_pack(char *payload) +{ + MLXSW_REG_ZERO(peabfe, payload); +} + +static inline void mlxsw_reg_peabfe_rec_pack(char *payload, int rec_index, + u8 state, u8 bank, u32 bf_index) +{ + u8 num_rec = mlxsw_reg_peabfe_size_get(payload); + + if (rec_index >= num_rec) + mlxsw_reg_peabfe_size_set(payload, rec_index + 1); + mlxsw_reg_peabfe_bf_entry_state_set(payload, rec_index, state); + mlxsw_reg_peabfe_bf_entry_bank_set(payload, rec_index, bank); + mlxsw_reg_peabfe_bf_entry_index_set(payload, rec_index, bf_index); +} + +/* IEDR - Infrastructure Entry Delete Register + * ---------------------------------------------------- + * This register is used for deleting entries from the entry tables. + * It is legitimate to attempt to delete a nonexisting entry (the device will + * respond as a good flow). + */ +#define MLXSW_REG_IEDR_ID 0x3804 +#define MLXSW_REG_IEDR_BASE_LEN 0x10 /* base length, without records */ +#define MLXSW_REG_IEDR_REC_LEN 0x8 /* record length */ +#define MLXSW_REG_IEDR_REC_MAX_COUNT 64 +#define MLXSW_REG_IEDR_LEN (MLXSW_REG_IEDR_BASE_LEN + \ + MLXSW_REG_IEDR_REC_LEN * \ + MLXSW_REG_IEDR_REC_MAX_COUNT) + +MLXSW_REG_DEFINE(iedr, MLXSW_REG_IEDR_ID, MLXSW_REG_IEDR_LEN); + +/* reg_iedr_num_rec + * Number of records. + * Access: OP + */ +MLXSW_ITEM32(reg, iedr, num_rec, 0x00, 0, 8); + +/* reg_iedr_rec_type + * Resource type. + * Access: OP + */ +MLXSW_ITEM32_INDEXED(reg, iedr, rec_type, MLXSW_REG_IEDR_BASE_LEN, 24, 8, + MLXSW_REG_IEDR_REC_LEN, 0x00, false); + +/* reg_iedr_rec_size + * Size of entries do be deleted. The unit is 1 entry, regardless of entry type. + * Access: OP + */ +MLXSW_ITEM32_INDEXED(reg, iedr, rec_size, MLXSW_REG_IEDR_BASE_LEN, 0, 13, + MLXSW_REG_IEDR_REC_LEN, 0x00, false); + +/* reg_iedr_rec_index_start + * Resource index start. + * Access: OP + */ +MLXSW_ITEM32_INDEXED(reg, iedr, rec_index_start, MLXSW_REG_IEDR_BASE_LEN, 0, 24, + MLXSW_REG_IEDR_REC_LEN, 0x04, false); + +static inline void mlxsw_reg_iedr_pack(char *payload) +{ + MLXSW_REG_ZERO(iedr, payload); +} + +static inline void mlxsw_reg_iedr_rec_pack(char *payload, int rec_index, + u8 rec_type, u16 rec_size, + u32 rec_index_start) +{ + u8 num_rec = mlxsw_reg_iedr_num_rec_get(payload); + + if (rec_index >= num_rec) + mlxsw_reg_iedr_num_rec_set(payload, rec_index + 1); + mlxsw_reg_iedr_rec_type_set(payload, rec_index, rec_type); + mlxsw_reg_iedr_rec_size_set(payload, rec_index, rec_size); + mlxsw_reg_iedr_rec_index_start_set(payload, rec_index, rec_index_start); +} + +/* QPTS - QoS Priority Trust State Register + * ---------------------------------------- + * This register controls the port policy to calculate the switch priority and + * packet color based on incoming packet fields. + */ +#define MLXSW_REG_QPTS_ID 0x4002 +#define MLXSW_REG_QPTS_LEN 0x8 + +MLXSW_REG_DEFINE(qpts, MLXSW_REG_QPTS_ID, MLXSW_REG_QPTS_LEN); + +/* reg_qpts_local_port + * Local port number. + * Access: Index + * + * Note: CPU port is supported. + */ +MLXSW_ITEM32_LP(reg, qpts, 0x00, 16, 0x00, 12); + +enum mlxsw_reg_qpts_trust_state { + MLXSW_REG_QPTS_TRUST_STATE_PCP = 1, + MLXSW_REG_QPTS_TRUST_STATE_DSCP = 2, /* For MPLS, trust EXP. */ +}; + +/* reg_qpts_trust_state + * Trust state for a given port. + * Access: RW + */ +MLXSW_ITEM32(reg, qpts, trust_state, 0x04, 0, 3); + +static inline void mlxsw_reg_qpts_pack(char *payload, u16 local_port, + enum mlxsw_reg_qpts_trust_state ts) +{ + MLXSW_REG_ZERO(qpts, payload); + + mlxsw_reg_qpts_local_port_set(payload, local_port); + mlxsw_reg_qpts_trust_state_set(payload, ts); +} + +/* QPCR - QoS Policer Configuration Register + * ----------------------------------------- + * The QPCR register is used to create policers - that limit + * the rate of bytes or packets via some trap group. + */ +#define MLXSW_REG_QPCR_ID 0x4004 +#define MLXSW_REG_QPCR_LEN 0x28 + +MLXSW_REG_DEFINE(qpcr, MLXSW_REG_QPCR_ID, MLXSW_REG_QPCR_LEN); + +enum mlxsw_reg_qpcr_g { + MLXSW_REG_QPCR_G_GLOBAL = 2, + MLXSW_REG_QPCR_G_STORM_CONTROL = 3, +}; + +/* reg_qpcr_g + * The policer type. + * Access: Index + */ +MLXSW_ITEM32(reg, qpcr, g, 0x00, 14, 2); + +/* reg_qpcr_pid + * Policer ID. + * Access: Index + */ +MLXSW_ITEM32(reg, qpcr, pid, 0x00, 0, 14); + +/* reg_qpcr_clear_counter + * Clear counters. + * Access: OP + */ +MLXSW_ITEM32(reg, qpcr, clear_counter, 0x04, 31, 1); + +/* reg_qpcr_color_aware + * Is the policer aware of colors. + * Must be 0 (unaware) for cpu port. + * Access: RW for unbounded policer. RO for bounded policer. + */ +MLXSW_ITEM32(reg, qpcr, color_aware, 0x04, 15, 1); + +/* reg_qpcr_bytes + * Is policer limit is for bytes per sec or packets per sec. + * 0 - packets + * 1 - bytes + * Access: RW for unbounded policer. RO for bounded policer. + */ +MLXSW_ITEM32(reg, qpcr, bytes, 0x04, 14, 1); + +enum mlxsw_reg_qpcr_ir_units { + MLXSW_REG_QPCR_IR_UNITS_M, + MLXSW_REG_QPCR_IR_UNITS_K, +}; + +/* reg_qpcr_ir_units + * Policer's units for cir and eir fields (for bytes limits only) + * 1 - 10^3 + * 0 - 10^6 + * Access: OP + */ +MLXSW_ITEM32(reg, qpcr, ir_units, 0x04, 12, 1); + +enum mlxsw_reg_qpcr_rate_type { + MLXSW_REG_QPCR_RATE_TYPE_SINGLE = 1, + MLXSW_REG_QPCR_RATE_TYPE_DOUBLE = 2, +}; + +/* reg_qpcr_rate_type + * Policer can have one limit (single rate) or 2 limits with specific operation + * for packets that exceed the lower rate but not the upper one. + * (For cpu port must be single rate) + * Access: RW for unbounded policer. RO for bounded policer. + */ +MLXSW_ITEM32(reg, qpcr, rate_type, 0x04, 8, 2); + +/* reg_qpc_cbs + * Policer's committed burst size. + * The policer is working with time slices of 50 nano sec. By default every + * slice is granted the proportionate share of the committed rate. If we want to + * allow a slice to exceed that share (while still keeping the rate per sec) we + * can allow burst. The burst size is between the default proportionate share + * (and no lower than 8) to 32Gb. (Even though giving a number higher than the + * committed rate will result in exceeding the rate). The burst size must be a + * log of 2 and will be determined by 2^cbs. + * Access: RW + */ +MLXSW_ITEM32(reg, qpcr, cbs, 0x08, 24, 6); + +/* reg_qpcr_cir + * Policer's committed rate. + * The rate used for sungle rate, the lower rate for double rate. + * For bytes limits, the rate will be this value * the unit from ir_units. + * (Resolution error is up to 1%). + * Access: RW + */ +MLXSW_ITEM32(reg, qpcr, cir, 0x0C, 0, 32); + +/* reg_qpcr_eir + * Policer's exceed rate. + * The higher rate for double rate, reserved for single rate. + * Lower rate for double rate policer. + * For bytes limits, the rate will be this value * the unit from ir_units. + * (Resolution error is up to 1%). + * Access: RW + */ +MLXSW_ITEM32(reg, qpcr, eir, 0x10, 0, 32); + +#define MLXSW_REG_QPCR_DOUBLE_RATE_ACTION 2 + +/* reg_qpcr_exceed_action. + * What to do with packets between the 2 limits for double rate. + * Access: RW for unbounded policer. RO for bounded policer. + */ +MLXSW_ITEM32(reg, qpcr, exceed_action, 0x14, 0, 4); + +enum mlxsw_reg_qpcr_action { + /* Discard */ + MLXSW_REG_QPCR_ACTION_DISCARD = 1, + /* Forward and set color to red. + * If the packet is intended to cpu port, it will be dropped. + */ + MLXSW_REG_QPCR_ACTION_FORWARD = 2, +}; + +/* reg_qpcr_violate_action + * What to do with packets that cross the cir limit (for single rate) or the eir + * limit (for double rate). + * Access: RW for unbounded policer. RO for bounded policer. + */ +MLXSW_ITEM32(reg, qpcr, violate_action, 0x18, 0, 4); + +/* reg_qpcr_violate_count + * Counts the number of times violate_action happened on this PID. + * Access: RW + */ +MLXSW_ITEM64(reg, qpcr, violate_count, 0x20, 0, 64); + +/* Packets */ +#define MLXSW_REG_QPCR_LOWEST_CIR 1 +#define MLXSW_REG_QPCR_HIGHEST_CIR (2 * 1000 * 1000 * 1000) /* 2Gpps */ +#define MLXSW_REG_QPCR_LOWEST_CBS 4 +#define MLXSW_REG_QPCR_HIGHEST_CBS 24 + +/* Bandwidth */ +#define MLXSW_REG_QPCR_LOWEST_CIR_BITS 1024 /* bps */ +#define MLXSW_REG_QPCR_HIGHEST_CIR_BITS 2000000000000ULL /* 2Tbps */ +#define MLXSW_REG_QPCR_LOWEST_CBS_BITS_SP1 4 +#define MLXSW_REG_QPCR_LOWEST_CBS_BITS_SP2 4 +#define MLXSW_REG_QPCR_HIGHEST_CBS_BITS_SP1 25 +#define MLXSW_REG_QPCR_HIGHEST_CBS_BITS_SP2 31 + +static inline void mlxsw_reg_qpcr_pack(char *payload, u16 pid, + enum mlxsw_reg_qpcr_ir_units ir_units, + bool bytes, u32 cir, u16 cbs) +{ + MLXSW_REG_ZERO(qpcr, payload); + mlxsw_reg_qpcr_pid_set(payload, pid); + mlxsw_reg_qpcr_g_set(payload, MLXSW_REG_QPCR_G_GLOBAL); + mlxsw_reg_qpcr_rate_type_set(payload, MLXSW_REG_QPCR_RATE_TYPE_SINGLE); + mlxsw_reg_qpcr_violate_action_set(payload, + MLXSW_REG_QPCR_ACTION_DISCARD); + mlxsw_reg_qpcr_cir_set(payload, cir); + mlxsw_reg_qpcr_ir_units_set(payload, ir_units); + mlxsw_reg_qpcr_bytes_set(payload, bytes); + mlxsw_reg_qpcr_cbs_set(payload, cbs); +} + +/* QTCT - QoS Switch Traffic Class Table + * ------------------------------------- + * Configures the mapping between the packet switch priority and the + * traffic class on the transmit port. + */ +#define MLXSW_REG_QTCT_ID 0x400A +#define MLXSW_REG_QTCT_LEN 0x08 + +MLXSW_REG_DEFINE(qtct, MLXSW_REG_QTCT_ID, MLXSW_REG_QTCT_LEN); + +/* reg_qtct_local_port + * Local port number. + * Access: Index + * + * Note: CPU port is not supported. + */ +MLXSW_ITEM32_LP(reg, qtct, 0x00, 16, 0x00, 12); + +/* reg_qtct_sub_port + * Virtual port within the physical port. + * Should be set to 0 when virtual ports are not enabled on the port. + * Access: Index + */ +MLXSW_ITEM32(reg, qtct, sub_port, 0x00, 8, 8); + +/* reg_qtct_switch_prio + * Switch priority. + * Access: Index + */ +MLXSW_ITEM32(reg, qtct, switch_prio, 0x00, 0, 4); + +/* reg_qtct_tclass + * Traffic class. + * Default values: + * switch_prio 0 : tclass 1 + * switch_prio 1 : tclass 0 + * switch_prio i : tclass i, for i > 1 + * Access: RW + */ +MLXSW_ITEM32(reg, qtct, tclass, 0x04, 0, 4); + +static inline void mlxsw_reg_qtct_pack(char *payload, u16 local_port, + u8 switch_prio, u8 tclass) +{ + MLXSW_REG_ZERO(qtct, payload); + mlxsw_reg_qtct_local_port_set(payload, local_port); + mlxsw_reg_qtct_switch_prio_set(payload, switch_prio); + mlxsw_reg_qtct_tclass_set(payload, tclass); +} + +/* QEEC - QoS ETS Element Configuration Register + * --------------------------------------------- + * Configures the ETS elements. + */ +#define MLXSW_REG_QEEC_ID 0x400D +#define MLXSW_REG_QEEC_LEN 0x20 + +MLXSW_REG_DEFINE(qeec, MLXSW_REG_QEEC_ID, MLXSW_REG_QEEC_LEN); + +/* reg_qeec_local_port + * Local port number. + * Access: Index + * + * Note: CPU port is supported. + */ +MLXSW_ITEM32_LP(reg, qeec, 0x00, 16, 0x00, 12); + +enum mlxsw_reg_qeec_hr { + MLXSW_REG_QEEC_HR_PORT, + MLXSW_REG_QEEC_HR_GROUP, + MLXSW_REG_QEEC_HR_SUBGROUP, + MLXSW_REG_QEEC_HR_TC, +}; + +/* reg_qeec_element_hierarchy + * 0 - Port + * 1 - Group + * 2 - Subgroup + * 3 - Traffic Class + * Access: Index + */ +MLXSW_ITEM32(reg, qeec, element_hierarchy, 0x04, 16, 4); + +/* reg_qeec_element_index + * The index of the element in the hierarchy. + * Access: Index + */ +MLXSW_ITEM32(reg, qeec, element_index, 0x04, 0, 8); + +/* reg_qeec_next_element_index + * The index of the next (lower) element in the hierarchy. + * Access: RW + * + * Note: Reserved for element_hierarchy 0. + */ +MLXSW_ITEM32(reg, qeec, next_element_index, 0x08, 0, 8); + +/* reg_qeec_mise + * Min shaper configuration enable. Enables configuration of the min + * shaper on this ETS element + * 0 - Disable + * 1 - Enable + * Access: RW + */ +MLXSW_ITEM32(reg, qeec, mise, 0x0C, 31, 1); + +/* reg_qeec_ptps + * PTP shaper + * 0: regular shaper mode + * 1: PTP oriented shaper + * Allowed only for hierarchy 0 + * Not supported for CPU port + * Note that ptps mode may affect the shaper rates of all hierarchies + * Supported only on Spectrum-1 + * Access: RW + */ +MLXSW_ITEM32(reg, qeec, ptps, 0x0C, 29, 1); + +enum { + MLXSW_REG_QEEC_BYTES_MODE, + MLXSW_REG_QEEC_PACKETS_MODE, +}; + +/* reg_qeec_pb + * Packets or bytes mode. + * 0 - Bytes mode + * 1 - Packets mode + * Access: RW + * + * Note: Used for max shaper configuration. For Spectrum, packets mode + * is supported only for traffic classes of CPU port. + */ +MLXSW_ITEM32(reg, qeec, pb, 0x0C, 28, 1); + +/* The smallest permitted min shaper rate. */ +#define MLXSW_REG_QEEC_MIS_MIN 200000 /* Kbps */ + +/* reg_qeec_min_shaper_rate + * Min shaper information rate. + * For CPU port, can only be configured for port hierarchy. + * When in bytes mode, value is specified in units of 1000bps. + * Access: RW + */ +MLXSW_ITEM32(reg, qeec, min_shaper_rate, 0x0C, 0, 28); + +/* reg_qeec_mase + * Max shaper configuration enable. Enables configuration of the max + * shaper on this ETS element. + * 0 - Disable + * 1 - Enable + * Access: RW + */ +MLXSW_ITEM32(reg, qeec, mase, 0x10, 31, 1); + +/* The largest max shaper value possible to disable the shaper. */ +#define MLXSW_REG_QEEC_MAS_DIS ((1u << 31) - 1) /* Kbps */ + +/* reg_qeec_max_shaper_rate + * Max shaper information rate. + * For CPU port, can only be configured for port hierarchy. + * When in bytes mode, value is specified in units of 1000bps. + * Access: RW + */ +MLXSW_ITEM32(reg, qeec, max_shaper_rate, 0x10, 0, 31); + +/* reg_qeec_de + * DWRR configuration enable. Enables configuration of the dwrr and + * dwrr_weight. + * 0 - Disable + * 1 - Enable + * Access: RW + */ +MLXSW_ITEM32(reg, qeec, de, 0x18, 31, 1); + +/* reg_qeec_dwrr + * Transmission selection algorithm to use on the link going down from + * the ETS element. + * 0 - Strict priority + * 1 - DWRR + * Access: RW + */ +MLXSW_ITEM32(reg, qeec, dwrr, 0x18, 15, 1); + +/* reg_qeec_dwrr_weight + * DWRR weight on the link going down from the ETS element. The + * percentage of bandwidth guaranteed to an ETS element within + * its hierarchy. The sum of all weights across all ETS elements + * within one hierarchy should be equal to 100. Reserved when + * transmission selection algorithm is strict priority. + * Access: RW + */ +MLXSW_ITEM32(reg, qeec, dwrr_weight, 0x18, 0, 8); + +/* reg_qeec_max_shaper_bs + * Max shaper burst size + * Burst size is 2^max_shaper_bs * 512 bits + * For Spectrum-1: Range is: 5..25 + * For Spectrum-2: Range is: 11..25 + * Reserved when ptps = 1 + * Access: RW + */ +MLXSW_ITEM32(reg, qeec, max_shaper_bs, 0x1C, 0, 6); + +#define MLXSW_REG_QEEC_HIGHEST_SHAPER_BS 25 +#define MLXSW_REG_QEEC_LOWEST_SHAPER_BS_SP1 5 +#define MLXSW_REG_QEEC_LOWEST_SHAPER_BS_SP2 11 +#define MLXSW_REG_QEEC_LOWEST_SHAPER_BS_SP3 11 +#define MLXSW_REG_QEEC_LOWEST_SHAPER_BS_SP4 11 + +static inline void mlxsw_reg_qeec_pack(char *payload, u16 local_port, + enum mlxsw_reg_qeec_hr hr, u8 index, + u8 next_index) +{ + MLXSW_REG_ZERO(qeec, payload); + mlxsw_reg_qeec_local_port_set(payload, local_port); + mlxsw_reg_qeec_element_hierarchy_set(payload, hr); + mlxsw_reg_qeec_element_index_set(payload, index); + mlxsw_reg_qeec_next_element_index_set(payload, next_index); +} + +static inline void mlxsw_reg_qeec_ptps_pack(char *payload, u16 local_port, + bool ptps) +{ + MLXSW_REG_ZERO(qeec, payload); + mlxsw_reg_qeec_local_port_set(payload, local_port); + mlxsw_reg_qeec_element_hierarchy_set(payload, MLXSW_REG_QEEC_HR_PORT); + mlxsw_reg_qeec_ptps_set(payload, ptps); +} + +/* QRWE - QoS ReWrite Enable + * ------------------------- + * This register configures the rewrite enable per receive port. + */ +#define MLXSW_REG_QRWE_ID 0x400F +#define MLXSW_REG_QRWE_LEN 0x08 + +MLXSW_REG_DEFINE(qrwe, MLXSW_REG_QRWE_ID, MLXSW_REG_QRWE_LEN); + +/* reg_qrwe_local_port + * Local port number. + * Access: Index + * + * Note: CPU port is supported. No support for router port. + */ +MLXSW_ITEM32_LP(reg, qrwe, 0x00, 16, 0x00, 12); + +/* reg_qrwe_dscp + * Whether to enable DSCP rewrite (default is 0, don't rewrite). + * Access: RW + */ +MLXSW_ITEM32(reg, qrwe, dscp, 0x04, 1, 1); + +/* reg_qrwe_pcp + * Whether to enable PCP and DEI rewrite (default is 0, don't rewrite). + * Access: RW + */ +MLXSW_ITEM32(reg, qrwe, pcp, 0x04, 0, 1); + +static inline void mlxsw_reg_qrwe_pack(char *payload, u16 local_port, + bool rewrite_pcp, bool rewrite_dscp) +{ + MLXSW_REG_ZERO(qrwe, payload); + mlxsw_reg_qrwe_local_port_set(payload, local_port); + mlxsw_reg_qrwe_pcp_set(payload, rewrite_pcp); + mlxsw_reg_qrwe_dscp_set(payload, rewrite_dscp); +} + +/* QPDSM - QoS Priority to DSCP Mapping + * ------------------------------------ + * QoS Priority to DSCP Mapping Register + */ +#define MLXSW_REG_QPDSM_ID 0x4011 +#define MLXSW_REG_QPDSM_BASE_LEN 0x04 /* base length, without records */ +#define MLXSW_REG_QPDSM_PRIO_ENTRY_REC_LEN 0x4 /* record length */ +#define MLXSW_REG_QPDSM_PRIO_ENTRY_REC_MAX_COUNT 16 +#define MLXSW_REG_QPDSM_LEN (MLXSW_REG_QPDSM_BASE_LEN + \ + MLXSW_REG_QPDSM_PRIO_ENTRY_REC_LEN * \ + MLXSW_REG_QPDSM_PRIO_ENTRY_REC_MAX_COUNT) + +MLXSW_REG_DEFINE(qpdsm, MLXSW_REG_QPDSM_ID, MLXSW_REG_QPDSM_LEN); + +/* reg_qpdsm_local_port + * Local Port. Supported for data packets from CPU port. + * Access: Index + */ +MLXSW_ITEM32_LP(reg, qpdsm, 0x00, 16, 0x00, 12); + +/* reg_qpdsm_prio_entry_color0_e + * Enable update of the entry for color 0 and a given port. + * Access: WO + */ +MLXSW_ITEM32_INDEXED(reg, qpdsm, prio_entry_color0_e, + MLXSW_REG_QPDSM_BASE_LEN, 31, 1, + MLXSW_REG_QPDSM_PRIO_ENTRY_REC_LEN, 0x00, false); + +/* reg_qpdsm_prio_entry_color0_dscp + * DSCP field in the outer label of the packet for color 0 and a given port. + * Reserved when e=0. + * Access: RW + */ +MLXSW_ITEM32_INDEXED(reg, qpdsm, prio_entry_color0_dscp, + MLXSW_REG_QPDSM_BASE_LEN, 24, 6, + MLXSW_REG_QPDSM_PRIO_ENTRY_REC_LEN, 0x00, false); + +/* reg_qpdsm_prio_entry_color1_e + * Enable update of the entry for color 1 and a given port. + * Access: WO + */ +MLXSW_ITEM32_INDEXED(reg, qpdsm, prio_entry_color1_e, + MLXSW_REG_QPDSM_BASE_LEN, 23, 1, + MLXSW_REG_QPDSM_PRIO_ENTRY_REC_LEN, 0x00, false); + +/* reg_qpdsm_prio_entry_color1_dscp + * DSCP field in the outer label of the packet for color 1 and a given port. + * Reserved when e=0. + * Access: RW + */ +MLXSW_ITEM32_INDEXED(reg, qpdsm, prio_entry_color1_dscp, + MLXSW_REG_QPDSM_BASE_LEN, 16, 6, + MLXSW_REG_QPDSM_PRIO_ENTRY_REC_LEN, 0x00, false); + +/* reg_qpdsm_prio_entry_color2_e + * Enable update of the entry for color 2 and a given port. + * Access: WO + */ +MLXSW_ITEM32_INDEXED(reg, qpdsm, prio_entry_color2_e, + MLXSW_REG_QPDSM_BASE_LEN, 15, 1, + MLXSW_REG_QPDSM_PRIO_ENTRY_REC_LEN, 0x00, false); + +/* reg_qpdsm_prio_entry_color2_dscp + * DSCP field in the outer label of the packet for color 2 and a given port. + * Reserved when e=0. + * Access: RW + */ +MLXSW_ITEM32_INDEXED(reg, qpdsm, prio_entry_color2_dscp, + MLXSW_REG_QPDSM_BASE_LEN, 8, 6, + MLXSW_REG_QPDSM_PRIO_ENTRY_REC_LEN, 0x00, false); + +static inline void mlxsw_reg_qpdsm_pack(char *payload, u16 local_port) +{ + MLXSW_REG_ZERO(qpdsm, payload); + mlxsw_reg_qpdsm_local_port_set(payload, local_port); +} + +static inline void +mlxsw_reg_qpdsm_prio_pack(char *payload, unsigned short prio, u8 dscp) +{ + mlxsw_reg_qpdsm_prio_entry_color0_e_set(payload, prio, 1); + mlxsw_reg_qpdsm_prio_entry_color0_dscp_set(payload, prio, dscp); + mlxsw_reg_qpdsm_prio_entry_color1_e_set(payload, prio, 1); + mlxsw_reg_qpdsm_prio_entry_color1_dscp_set(payload, prio, dscp); + mlxsw_reg_qpdsm_prio_entry_color2_e_set(payload, prio, 1); + mlxsw_reg_qpdsm_prio_entry_color2_dscp_set(payload, prio, dscp); +} + +/* QPDP - QoS Port DSCP to Priority Mapping Register + * ------------------------------------------------- + * This register controls the port default Switch Priority and Color. The + * default Switch Priority and Color are used for frames where the trust state + * uses default values. All member ports of a LAG should be configured with the + * same default values. + */ +#define MLXSW_REG_QPDP_ID 0x4007 +#define MLXSW_REG_QPDP_LEN 0x8 + +MLXSW_REG_DEFINE(qpdp, MLXSW_REG_QPDP_ID, MLXSW_REG_QPDP_LEN); + +/* reg_qpdp_local_port + * Local Port. Supported for data packets from CPU port. + * Access: Index + */ +MLXSW_ITEM32_LP(reg, qpdp, 0x00, 16, 0x00, 12); + +/* reg_qpdp_switch_prio + * Default port Switch Priority (default 0) + * Access: RW + */ +MLXSW_ITEM32(reg, qpdp, switch_prio, 0x04, 0, 4); + +static inline void mlxsw_reg_qpdp_pack(char *payload, u16 local_port, + u8 switch_prio) +{ + MLXSW_REG_ZERO(qpdp, payload); + mlxsw_reg_qpdp_local_port_set(payload, local_port); + mlxsw_reg_qpdp_switch_prio_set(payload, switch_prio); +} + +/* QPDPM - QoS Port DSCP to Priority Mapping Register + * -------------------------------------------------- + * This register controls the mapping from DSCP field to + * Switch Priority for IP packets. + */ +#define MLXSW_REG_QPDPM_ID 0x4013 +#define MLXSW_REG_QPDPM_BASE_LEN 0x4 /* base length, without records */ +#define MLXSW_REG_QPDPM_DSCP_ENTRY_REC_LEN 0x2 /* record length */ +#define MLXSW_REG_QPDPM_DSCP_ENTRY_REC_MAX_COUNT 64 +#define MLXSW_REG_QPDPM_LEN (MLXSW_REG_QPDPM_BASE_LEN + \ + MLXSW_REG_QPDPM_DSCP_ENTRY_REC_LEN * \ + MLXSW_REG_QPDPM_DSCP_ENTRY_REC_MAX_COUNT) + +MLXSW_REG_DEFINE(qpdpm, MLXSW_REG_QPDPM_ID, MLXSW_REG_QPDPM_LEN); + +/* reg_qpdpm_local_port + * Local Port. Supported for data packets from CPU port. + * Access: Index + */ +MLXSW_ITEM32_LP(reg, qpdpm, 0x00, 16, 0x00, 12); + +/* reg_qpdpm_dscp_e + * Enable update of the specific entry. When cleared, the switch_prio and color + * fields are ignored and the previous switch_prio and color values are + * preserved. + * Access: WO + */ +MLXSW_ITEM16_INDEXED(reg, qpdpm, dscp_entry_e, MLXSW_REG_QPDPM_BASE_LEN, 15, 1, + MLXSW_REG_QPDPM_DSCP_ENTRY_REC_LEN, 0x00, false); + +/* reg_qpdpm_dscp_prio + * The new Switch Priority value for the relevant DSCP value. + * Access: RW + */ +MLXSW_ITEM16_INDEXED(reg, qpdpm, dscp_entry_prio, + MLXSW_REG_QPDPM_BASE_LEN, 0, 4, + MLXSW_REG_QPDPM_DSCP_ENTRY_REC_LEN, 0x00, false); + +static inline void mlxsw_reg_qpdpm_pack(char *payload, u16 local_port) +{ + MLXSW_REG_ZERO(qpdpm, payload); + mlxsw_reg_qpdpm_local_port_set(payload, local_port); +} + +static inline void +mlxsw_reg_qpdpm_dscp_pack(char *payload, unsigned short dscp, u8 prio) +{ + mlxsw_reg_qpdpm_dscp_entry_e_set(payload, dscp, 1); + mlxsw_reg_qpdpm_dscp_entry_prio_set(payload, dscp, prio); +} + +/* QTCTM - QoS Switch Traffic Class Table is Multicast-Aware Register + * ------------------------------------------------------------------ + * This register configures if the Switch Priority to Traffic Class mapping is + * based on Multicast packet indication. If so, then multicast packets will get + * a Traffic Class that is plus (cap_max_tclass_data/2) the value configured by + * QTCT. + * By default, Switch Priority to Traffic Class mapping is not based on + * Multicast packet indication. + */ +#define MLXSW_REG_QTCTM_ID 0x401A +#define MLXSW_REG_QTCTM_LEN 0x08 + +MLXSW_REG_DEFINE(qtctm, MLXSW_REG_QTCTM_ID, MLXSW_REG_QTCTM_LEN); + +/* reg_qtctm_local_port + * Local port number. + * No support for CPU port. + * Access: Index + */ +MLXSW_ITEM32_LP(reg, qtctm, 0x00, 16, 0x00, 12); + +/* reg_qtctm_mc + * Multicast Mode + * Whether Switch Priority to Traffic Class mapping is based on Multicast packet + * indication (default is 0, not based on Multicast packet indication). + */ +MLXSW_ITEM32(reg, qtctm, mc, 0x04, 0, 1); + +static inline void +mlxsw_reg_qtctm_pack(char *payload, u16 local_port, bool mc) +{ + MLXSW_REG_ZERO(qtctm, payload); + mlxsw_reg_qtctm_local_port_set(payload, local_port); + mlxsw_reg_qtctm_mc_set(payload, mc); +} + +/* QPSC - QoS PTP Shaper Configuration Register + * -------------------------------------------- + * The QPSC allows advanced configuration of the shapers when QEEC.ptps=1. + * Supported only on Spectrum-1. + */ +#define MLXSW_REG_QPSC_ID 0x401B +#define MLXSW_REG_QPSC_LEN 0x28 + +MLXSW_REG_DEFINE(qpsc, MLXSW_REG_QPSC_ID, MLXSW_REG_QPSC_LEN); + +enum mlxsw_reg_qpsc_port_speed { + MLXSW_REG_QPSC_PORT_SPEED_100M, + MLXSW_REG_QPSC_PORT_SPEED_1G, + MLXSW_REG_QPSC_PORT_SPEED_10G, + MLXSW_REG_QPSC_PORT_SPEED_25G, +}; + +/* reg_qpsc_port_speed + * Port speed. + * Access: Index + */ +MLXSW_ITEM32(reg, qpsc, port_speed, 0x00, 0, 4); + +/* reg_qpsc_shaper_time_exp + * The base-time-interval for updating the shapers tokens (for all hierarchies). + * shaper_update_rate = 2 ^ shaper_time_exp * (1 + shaper_time_mantissa) * 32nSec + * shaper_rate = 64bit * shaper_inc / shaper_update_rate + * Access: RW + */ +MLXSW_ITEM32(reg, qpsc, shaper_time_exp, 0x04, 16, 4); + +/* reg_qpsc_shaper_time_mantissa + * The base-time-interval for updating the shapers tokens (for all hierarchies). + * shaper_update_rate = 2 ^ shaper_time_exp * (1 + shaper_time_mantissa) * 32nSec + * shaper_rate = 64bit * shaper_inc / shaper_update_rate + * Access: RW + */ +MLXSW_ITEM32(reg, qpsc, shaper_time_mantissa, 0x04, 0, 5); + +/* reg_qpsc_shaper_inc + * Number of tokens added to shaper on each update. + * Units of 8B. + * Access: RW + */ +MLXSW_ITEM32(reg, qpsc, shaper_inc, 0x08, 0, 5); + +/* reg_qpsc_shaper_bs + * Max shaper Burst size. + * Burst size is 2 ^ max_shaper_bs * 512 [bits] + * Range is: 5..25 (from 2KB..2GB) + * Access: RW + */ +MLXSW_ITEM32(reg, qpsc, shaper_bs, 0x0C, 0, 6); + +/* reg_qpsc_ptsc_we + * Write enable to port_to_shaper_credits. + * Access: WO + */ +MLXSW_ITEM32(reg, qpsc, ptsc_we, 0x10, 31, 1); + +/* reg_qpsc_port_to_shaper_credits + * For split ports: range 1..57 + * For non-split ports: range 1..112 + * Written only when ptsc_we is set. + * Access: RW + */ +MLXSW_ITEM32(reg, qpsc, port_to_shaper_credits, 0x10, 0, 8); + +/* reg_qpsc_ing_timestamp_inc + * Ingress timestamp increment. + * 2's complement. + * The timestamp of MTPPTR at ingress will be incremented by this value. Global + * value for all ports. + * Same units as used by MTPPTR. + * Access: RW + */ +MLXSW_ITEM32(reg, qpsc, ing_timestamp_inc, 0x20, 0, 32); + +/* reg_qpsc_egr_timestamp_inc + * Egress timestamp increment. + * 2's complement. + * The timestamp of MTPPTR at egress will be incremented by this value. Global + * value for all ports. + * Same units as used by MTPPTR. + * Access: RW + */ +MLXSW_ITEM32(reg, qpsc, egr_timestamp_inc, 0x24, 0, 32); + +static inline void +mlxsw_reg_qpsc_pack(char *payload, enum mlxsw_reg_qpsc_port_speed port_speed, + u8 shaper_time_exp, u8 shaper_time_mantissa, u8 shaper_inc, + u8 shaper_bs, u8 port_to_shaper_credits, + int ing_timestamp_inc, int egr_timestamp_inc) +{ + MLXSW_REG_ZERO(qpsc, payload); + mlxsw_reg_qpsc_port_speed_set(payload, port_speed); + mlxsw_reg_qpsc_shaper_time_exp_set(payload, shaper_time_exp); + mlxsw_reg_qpsc_shaper_time_mantissa_set(payload, shaper_time_mantissa); + mlxsw_reg_qpsc_shaper_inc_set(payload, shaper_inc); + mlxsw_reg_qpsc_shaper_bs_set(payload, shaper_bs); + mlxsw_reg_qpsc_ptsc_we_set(payload, true); + mlxsw_reg_qpsc_port_to_shaper_credits_set(payload, port_to_shaper_credits); + mlxsw_reg_qpsc_ing_timestamp_inc_set(payload, ing_timestamp_inc); + mlxsw_reg_qpsc_egr_timestamp_inc_set(payload, egr_timestamp_inc); +} + +/* PMLP - Ports Module to Local Port Register + * ------------------------------------------ + * Configures the assignment of modules to local ports. + */ +#define MLXSW_REG_PMLP_ID 0x5002 +#define MLXSW_REG_PMLP_LEN 0x40 + +MLXSW_REG_DEFINE(pmlp, MLXSW_REG_PMLP_ID, MLXSW_REG_PMLP_LEN); + +/* reg_pmlp_rxtx + * 0 - Tx value is used for both Tx and Rx. + * 1 - Rx value is taken from a separte field. + * Access: RW + */ +MLXSW_ITEM32(reg, pmlp, rxtx, 0x00, 31, 1); + +/* reg_pmlp_local_port + * Local port number. + * Access: Index + */ +MLXSW_ITEM32_LP(reg, pmlp, 0x00, 16, 0x00, 12); + +/* reg_pmlp_width + * 0 - Unmap local port. + * 1 - Lane 0 is used. + * 2 - Lanes 0 and 1 are used. + * 4 - Lanes 0, 1, 2 and 3 are used. + * 8 - Lanes 0-7 are used. + * Access: RW + */ +MLXSW_ITEM32(reg, pmlp, width, 0x00, 0, 8); + +/* reg_pmlp_module + * Module number. + * Access: RW + */ +MLXSW_ITEM32_INDEXED(reg, pmlp, module, 0x04, 0, 8, 0x04, 0x00, false); + +/* reg_pmlp_slot_index + * Module number. + * Slot_index + * Slot_index = 0 represent the onboard (motherboard). + * In case of non-modular system only slot_index = 0 is available. + * Access: RW + */ +MLXSW_ITEM32_INDEXED(reg, pmlp, slot_index, 0x04, 8, 4, 0x04, 0x00, false); + +/* reg_pmlp_tx_lane + * Tx Lane. When rxtx field is cleared, this field is used for Rx as well. + * Access: RW + */ +MLXSW_ITEM32_INDEXED(reg, pmlp, tx_lane, 0x04, 16, 4, 0x04, 0x00, false); + +/* reg_pmlp_rx_lane + * Rx Lane. When rxtx field is cleared, this field is ignored and Rx lane is + * equal to Tx lane. + * Access: RW + */ +MLXSW_ITEM32_INDEXED(reg, pmlp, rx_lane, 0x04, 24, 4, 0x04, 0x00, false); + +static inline void mlxsw_reg_pmlp_pack(char *payload, u16 local_port) +{ + MLXSW_REG_ZERO(pmlp, payload); + mlxsw_reg_pmlp_local_port_set(payload, local_port); +} + +/* PMTU - Port MTU Register + * ------------------------ + * Configures and reports the port MTU. + */ +#define MLXSW_REG_PMTU_ID 0x5003 +#define MLXSW_REG_PMTU_LEN 0x10 + +MLXSW_REG_DEFINE(pmtu, MLXSW_REG_PMTU_ID, MLXSW_REG_PMTU_LEN); + +/* reg_pmtu_local_port + * Local port number. + * Access: Index + */ +MLXSW_ITEM32_LP(reg, pmtu, 0x00, 16, 0x00, 12); + +/* reg_pmtu_max_mtu + * Maximum MTU. + * When port type (e.g. Ethernet) is configured, the relevant MTU is + * reported, otherwise the minimum between the max_mtu of the different + * types is reported. + * Access: RO + */ +MLXSW_ITEM32(reg, pmtu, max_mtu, 0x04, 16, 16); + +/* reg_pmtu_admin_mtu + * MTU value to set port to. Must be smaller or equal to max_mtu. + * Note: If port type is Infiniband, then port must be disabled, when its + * MTU is set. + * Access: RW + */ +MLXSW_ITEM32(reg, pmtu, admin_mtu, 0x08, 16, 16); + +/* reg_pmtu_oper_mtu + * The actual MTU configured on the port. Packets exceeding this size + * will be dropped. + * Note: In Ethernet and FC oper_mtu == admin_mtu, however, in Infiniband + * oper_mtu might be smaller than admin_mtu. + * Access: RO + */ +MLXSW_ITEM32(reg, pmtu, oper_mtu, 0x0C, 16, 16); + +static inline void mlxsw_reg_pmtu_pack(char *payload, u16 local_port, + u16 new_mtu) +{ + MLXSW_REG_ZERO(pmtu, payload); + mlxsw_reg_pmtu_local_port_set(payload, local_port); + mlxsw_reg_pmtu_max_mtu_set(payload, 0); + mlxsw_reg_pmtu_admin_mtu_set(payload, new_mtu); + mlxsw_reg_pmtu_oper_mtu_set(payload, 0); +} + +/* PTYS - Port Type and Speed Register + * ----------------------------------- + * Configures and reports the port speed type. + * + * Note: When set while the link is up, the changes will not take effect + * until the port transitions from down to up state. + */ +#define MLXSW_REG_PTYS_ID 0x5004 +#define MLXSW_REG_PTYS_LEN 0x40 + +MLXSW_REG_DEFINE(ptys, MLXSW_REG_PTYS_ID, MLXSW_REG_PTYS_LEN); + +/* an_disable_admin + * Auto negotiation disable administrative configuration + * 0 - Device doesn't support AN disable. + * 1 - Device supports AN disable. + * Access: RW + */ +MLXSW_ITEM32(reg, ptys, an_disable_admin, 0x00, 30, 1); + +/* reg_ptys_local_port + * Local port number. + * Access: Index + */ +MLXSW_ITEM32_LP(reg, ptys, 0x00, 16, 0x00, 12); + +#define MLXSW_REG_PTYS_PROTO_MASK_IB BIT(0) +#define MLXSW_REG_PTYS_PROTO_MASK_ETH BIT(2) + +/* reg_ptys_proto_mask + * Protocol mask. Indicates which protocol is used. + * 0 - Infiniband. + * 1 - Fibre Channel. + * 2 - Ethernet. + * Access: Index + */ +MLXSW_ITEM32(reg, ptys, proto_mask, 0x00, 0, 3); + +enum { + MLXSW_REG_PTYS_AN_STATUS_NA, + MLXSW_REG_PTYS_AN_STATUS_OK, + MLXSW_REG_PTYS_AN_STATUS_FAIL, +}; + +/* reg_ptys_an_status + * Autonegotiation status. + * Access: RO + */ +MLXSW_ITEM32(reg, ptys, an_status, 0x04, 28, 4); + +#define MLXSW_REG_PTYS_EXT_ETH_SPEED_SGMII_100M BIT(0) +#define MLXSW_REG_PTYS_EXT_ETH_SPEED_1000BASE_X_SGMII BIT(1) +#define MLXSW_REG_PTYS_EXT_ETH_SPEED_5GBASE_R BIT(3) +#define MLXSW_REG_PTYS_EXT_ETH_SPEED_XFI_XAUI_1_10G BIT(4) +#define MLXSW_REG_PTYS_EXT_ETH_SPEED_XLAUI_4_XLPPI_4_40G BIT(5) +#define MLXSW_REG_PTYS_EXT_ETH_SPEED_25GAUI_1_25GBASE_CR_KR BIT(6) +#define MLXSW_REG_PTYS_EXT_ETH_SPEED_50GAUI_2_LAUI_2_50GBASE_CR2_KR2 BIT(7) +#define MLXSW_REG_PTYS_EXT_ETH_SPEED_50GAUI_1_LAUI_1_50GBASE_CR_KR BIT(8) +#define MLXSW_REG_PTYS_EXT_ETH_SPEED_CAUI_4_100GBASE_CR4_KR4 BIT(9) +#define MLXSW_REG_PTYS_EXT_ETH_SPEED_100GAUI_2_100GBASE_CR2_KR2 BIT(10) +#define MLXSW_REG_PTYS_EXT_ETH_SPEED_200GAUI_4_200GBASE_CR4_KR4 BIT(12) +#define MLXSW_REG_PTYS_EXT_ETH_SPEED_400GAUI_8 BIT(15) + +/* reg_ptys_ext_eth_proto_cap + * Extended Ethernet port supported speeds and protocols. + * Access: RO + */ +MLXSW_ITEM32(reg, ptys, ext_eth_proto_cap, 0x08, 0, 32); + +#define MLXSW_REG_PTYS_ETH_SPEED_SGMII BIT(0) +#define MLXSW_REG_PTYS_ETH_SPEED_1000BASE_KX BIT(1) +#define MLXSW_REG_PTYS_ETH_SPEED_10GBASE_CX4 BIT(2) +#define MLXSW_REG_PTYS_ETH_SPEED_10GBASE_KX4 BIT(3) +#define MLXSW_REG_PTYS_ETH_SPEED_10GBASE_KR BIT(4) +#define MLXSW_REG_PTYS_ETH_SPEED_40GBASE_CR4 BIT(6) +#define MLXSW_REG_PTYS_ETH_SPEED_40GBASE_KR4 BIT(7) +#define MLXSW_REG_PTYS_ETH_SPEED_10GBASE_CR BIT(12) +#define MLXSW_REG_PTYS_ETH_SPEED_10GBASE_SR BIT(13) +#define MLXSW_REG_PTYS_ETH_SPEED_10GBASE_ER_LR BIT(14) +#define MLXSW_REG_PTYS_ETH_SPEED_40GBASE_SR4 BIT(15) +#define MLXSW_REG_PTYS_ETH_SPEED_40GBASE_LR4_ER4 BIT(16) +#define MLXSW_REG_PTYS_ETH_SPEED_50GBASE_SR2 BIT(18) +#define MLXSW_REG_PTYS_ETH_SPEED_50GBASE_KR4 BIT(19) +#define MLXSW_REG_PTYS_ETH_SPEED_100GBASE_CR4 BIT(20) +#define MLXSW_REG_PTYS_ETH_SPEED_100GBASE_SR4 BIT(21) +#define MLXSW_REG_PTYS_ETH_SPEED_100GBASE_KR4 BIT(22) +#define MLXSW_REG_PTYS_ETH_SPEED_100GBASE_LR4_ER4 BIT(23) +#define MLXSW_REG_PTYS_ETH_SPEED_100BASE_T BIT(24) +#define MLXSW_REG_PTYS_ETH_SPEED_1000BASE_T BIT(25) +#define MLXSW_REG_PTYS_ETH_SPEED_25GBASE_CR BIT(27) +#define MLXSW_REG_PTYS_ETH_SPEED_25GBASE_KR BIT(28) +#define MLXSW_REG_PTYS_ETH_SPEED_25GBASE_SR BIT(29) +#define MLXSW_REG_PTYS_ETH_SPEED_50GBASE_CR2 BIT(30) +#define MLXSW_REG_PTYS_ETH_SPEED_50GBASE_KR2 BIT(31) + +/* reg_ptys_eth_proto_cap + * Ethernet port supported speeds and protocols. + * Access: RO + */ +MLXSW_ITEM32(reg, ptys, eth_proto_cap, 0x0C, 0, 32); + +/* reg_ptys_ext_eth_proto_admin + * Extended speed and protocol to set port to. + * Access: RW + */ +MLXSW_ITEM32(reg, ptys, ext_eth_proto_admin, 0x14, 0, 32); + +/* reg_ptys_eth_proto_admin + * Speed and protocol to set port to. + * Access: RW + */ +MLXSW_ITEM32(reg, ptys, eth_proto_admin, 0x18, 0, 32); + +/* reg_ptys_ext_eth_proto_oper + * The extended current speed and protocol configured for the port. + * Access: RO + */ +MLXSW_ITEM32(reg, ptys, ext_eth_proto_oper, 0x20, 0, 32); + +/* reg_ptys_eth_proto_oper + * The current speed and protocol configured for the port. + * Access: RO + */ +MLXSW_ITEM32(reg, ptys, eth_proto_oper, 0x24, 0, 32); + +enum mlxsw_reg_ptys_connector_type { + MLXSW_REG_PTYS_CONNECTOR_TYPE_UNKNOWN_OR_NO_CONNECTOR, + MLXSW_REG_PTYS_CONNECTOR_TYPE_PORT_NONE, + MLXSW_REG_PTYS_CONNECTOR_TYPE_PORT_TP, + MLXSW_REG_PTYS_CONNECTOR_TYPE_PORT_AUI, + MLXSW_REG_PTYS_CONNECTOR_TYPE_PORT_BNC, + MLXSW_REG_PTYS_CONNECTOR_TYPE_PORT_MII, + MLXSW_REG_PTYS_CONNECTOR_TYPE_PORT_FIBRE, + MLXSW_REG_PTYS_CONNECTOR_TYPE_PORT_DA, + MLXSW_REG_PTYS_CONNECTOR_TYPE_PORT_OTHER, +}; + +/* reg_ptys_connector_type + * Connector type indication. + * Access: RO + */ +MLXSW_ITEM32(reg, ptys, connector_type, 0x2C, 0, 4); + +static inline void mlxsw_reg_ptys_eth_pack(char *payload, u16 local_port, + u32 proto_admin, bool autoneg) +{ + MLXSW_REG_ZERO(ptys, payload); + mlxsw_reg_ptys_local_port_set(payload, local_port); + mlxsw_reg_ptys_proto_mask_set(payload, MLXSW_REG_PTYS_PROTO_MASK_ETH); + mlxsw_reg_ptys_eth_proto_admin_set(payload, proto_admin); + mlxsw_reg_ptys_an_disable_admin_set(payload, !autoneg); +} + +static inline void mlxsw_reg_ptys_ext_eth_pack(char *payload, u16 local_port, + u32 proto_admin, bool autoneg) +{ + MLXSW_REG_ZERO(ptys, payload); + mlxsw_reg_ptys_local_port_set(payload, local_port); + mlxsw_reg_ptys_proto_mask_set(payload, MLXSW_REG_PTYS_PROTO_MASK_ETH); + mlxsw_reg_ptys_ext_eth_proto_admin_set(payload, proto_admin); + mlxsw_reg_ptys_an_disable_admin_set(payload, !autoneg); +} + +static inline void mlxsw_reg_ptys_eth_unpack(char *payload, + u32 *p_eth_proto_cap, + u32 *p_eth_proto_admin, + u32 *p_eth_proto_oper) +{ + if (p_eth_proto_cap) + *p_eth_proto_cap = + mlxsw_reg_ptys_eth_proto_cap_get(payload); + if (p_eth_proto_admin) + *p_eth_proto_admin = + mlxsw_reg_ptys_eth_proto_admin_get(payload); + if (p_eth_proto_oper) + *p_eth_proto_oper = + mlxsw_reg_ptys_eth_proto_oper_get(payload); +} + +static inline void mlxsw_reg_ptys_ext_eth_unpack(char *payload, + u32 *p_eth_proto_cap, + u32 *p_eth_proto_admin, + u32 *p_eth_proto_oper) +{ + if (p_eth_proto_cap) + *p_eth_proto_cap = + mlxsw_reg_ptys_ext_eth_proto_cap_get(payload); + if (p_eth_proto_admin) + *p_eth_proto_admin = + mlxsw_reg_ptys_ext_eth_proto_admin_get(payload); + if (p_eth_proto_oper) + *p_eth_proto_oper = + mlxsw_reg_ptys_ext_eth_proto_oper_get(payload); +} + +/* PPAD - Port Physical Address Register + * ------------------------------------- + * The PPAD register configures the per port physical MAC address. + */ +#define MLXSW_REG_PPAD_ID 0x5005 +#define MLXSW_REG_PPAD_LEN 0x10 + +MLXSW_REG_DEFINE(ppad, MLXSW_REG_PPAD_ID, MLXSW_REG_PPAD_LEN); + +/* reg_ppad_single_base_mac + * 0: base_mac, local port should be 0 and mac[7:0] is + * reserved. HW will set incremental + * 1: single_mac - mac of the local_port + * Access: RW + */ +MLXSW_ITEM32(reg, ppad, single_base_mac, 0x00, 28, 1); + +/* reg_ppad_local_port + * port number, if single_base_mac = 0 then local_port is reserved + * Access: RW + */ +MLXSW_ITEM32_LP(reg, ppad, 0x00, 16, 0x00, 24); + +/* reg_ppad_mac + * If single_base_mac = 0 - base MAC address, mac[7:0] is reserved. + * If single_base_mac = 1 - the per port MAC address + * Access: RW + */ +MLXSW_ITEM_BUF(reg, ppad, mac, 0x02, 6); + +static inline void mlxsw_reg_ppad_pack(char *payload, bool single_base_mac, + u16 local_port) +{ + MLXSW_REG_ZERO(ppad, payload); + mlxsw_reg_ppad_single_base_mac_set(payload, !!single_base_mac); + mlxsw_reg_ppad_local_port_set(payload, local_port); +} + +/* PAOS - Ports Administrative and Operational Status Register + * ----------------------------------------------------------- + * Configures and retrieves per port administrative and operational status. + */ +#define MLXSW_REG_PAOS_ID 0x5006 +#define MLXSW_REG_PAOS_LEN 0x10 + +MLXSW_REG_DEFINE(paos, MLXSW_REG_PAOS_ID, MLXSW_REG_PAOS_LEN); + +/* reg_paos_swid + * Switch partition ID with which to associate the port. + * Note: while external ports uses unique local port numbers (and thus swid is + * redundant), router ports use the same local port number where swid is the + * only indication for the relevant port. + * Access: Index + */ +MLXSW_ITEM32(reg, paos, swid, 0x00, 24, 8); + +/* reg_paos_local_port + * Local port number. + * Access: Index + */ +MLXSW_ITEM32_LP(reg, paos, 0x00, 16, 0x00, 12); + +/* reg_paos_admin_status + * Port administrative state (the desired state of the port): + * 1 - Up. + * 2 - Down. + * 3 - Up once. This means that in case of link failure, the port won't go + * into polling mode, but will wait to be re-enabled by software. + * 4 - Disabled by system. Can only be set by hardware. + * Access: RW + */ +MLXSW_ITEM32(reg, paos, admin_status, 0x00, 8, 4); + +/* reg_paos_oper_status + * Port operational state (the current state): + * 1 - Up. + * 2 - Down. + * 3 - Down by port failure. This means that the device will not let the + * port up again until explicitly specified by software. + * Access: RO + */ +MLXSW_ITEM32(reg, paos, oper_status, 0x00, 0, 4); + +/* reg_paos_ase + * Admin state update enabled. + * Access: WO + */ +MLXSW_ITEM32(reg, paos, ase, 0x04, 31, 1); + +/* reg_paos_ee + * Event update enable. If this bit is set, event generation will be + * updated based on the e field. + * Access: WO + */ +MLXSW_ITEM32(reg, paos, ee, 0x04, 30, 1); + +/* reg_paos_e + * Event generation on operational state change: + * 0 - Do not generate event. + * 1 - Generate Event. + * 2 - Generate Single Event. + * Access: RW + */ +MLXSW_ITEM32(reg, paos, e, 0x04, 0, 2); + +static inline void mlxsw_reg_paos_pack(char *payload, u16 local_port, + enum mlxsw_port_admin_status status) +{ + MLXSW_REG_ZERO(paos, payload); + mlxsw_reg_paos_swid_set(payload, 0); + mlxsw_reg_paos_local_port_set(payload, local_port); + mlxsw_reg_paos_admin_status_set(payload, status); + mlxsw_reg_paos_oper_status_set(payload, 0); + mlxsw_reg_paos_ase_set(payload, 1); + mlxsw_reg_paos_ee_set(payload, 1); + mlxsw_reg_paos_e_set(payload, 1); +} + +/* PFCC - Ports Flow Control Configuration Register + * ------------------------------------------------ + * Configures and retrieves the per port flow control configuration. + */ +#define MLXSW_REG_PFCC_ID 0x5007 +#define MLXSW_REG_PFCC_LEN 0x20 + +MLXSW_REG_DEFINE(pfcc, MLXSW_REG_PFCC_ID, MLXSW_REG_PFCC_LEN); + +/* reg_pfcc_local_port + * Local port number. + * Access: Index + */ +MLXSW_ITEM32_LP(reg, pfcc, 0x00, 16, 0x00, 12); + +/* reg_pfcc_pnat + * Port number access type. Determines the way local_port is interpreted: + * 0 - Local port number. + * 1 - IB / label port number. + * Access: Index + */ +MLXSW_ITEM32(reg, pfcc, pnat, 0x00, 14, 2); + +/* reg_pfcc_shl_cap + * Send to higher layers capabilities: + * 0 - No capability of sending Pause and PFC frames to higher layers. + * 1 - Device has capability of sending Pause and PFC frames to higher + * layers. + * Access: RO + */ +MLXSW_ITEM32(reg, pfcc, shl_cap, 0x00, 1, 1); + +/* reg_pfcc_shl_opr + * Send to higher layers operation: + * 0 - Pause and PFC frames are handled by the port (default). + * 1 - Pause and PFC frames are handled by the port and also sent to + * higher layers. Only valid if shl_cap = 1. + * Access: RW + */ +MLXSW_ITEM32(reg, pfcc, shl_opr, 0x00, 0, 1); + +/* reg_pfcc_ppan + * Pause policy auto negotiation. + * 0 - Disabled. Generate / ignore Pause frames based on pptx / pprtx. + * 1 - Enabled. When auto-negotiation is performed, set the Pause policy + * based on the auto-negotiation resolution. + * Access: RW + * + * Note: The auto-negotiation advertisement is set according to pptx and + * pprtx. When PFC is set on Tx / Rx, ppan must be set to 0. + */ +MLXSW_ITEM32(reg, pfcc, ppan, 0x04, 28, 4); + +/* reg_pfcc_prio_mask_tx + * Bit per priority indicating if Tx flow control policy should be + * updated based on bit pfctx. + * Access: WO + */ +MLXSW_ITEM32(reg, pfcc, prio_mask_tx, 0x04, 16, 8); + +/* reg_pfcc_prio_mask_rx + * Bit per priority indicating if Rx flow control policy should be + * updated based on bit pfcrx. + * Access: WO + */ +MLXSW_ITEM32(reg, pfcc, prio_mask_rx, 0x04, 0, 8); + +/* reg_pfcc_pptx + * Admin Pause policy on Tx. + * 0 - Never generate Pause frames (default). + * 1 - Generate Pause frames according to Rx buffer threshold. + * Access: RW + */ +MLXSW_ITEM32(reg, pfcc, pptx, 0x08, 31, 1); + +/* reg_pfcc_aptx + * Active (operational) Pause policy on Tx. + * 0 - Never generate Pause frames. + * 1 - Generate Pause frames according to Rx buffer threshold. + * Access: RO + */ +MLXSW_ITEM32(reg, pfcc, aptx, 0x08, 30, 1); + +/* reg_pfcc_pfctx + * Priority based flow control policy on Tx[7:0]. Per-priority bit mask: + * 0 - Never generate priority Pause frames on the specified priority + * (default). + * 1 - Generate priority Pause frames according to Rx buffer threshold on + * the specified priority. + * Access: RW + * + * Note: pfctx and pptx must be mutually exclusive. + */ +MLXSW_ITEM32(reg, pfcc, pfctx, 0x08, 16, 8); + +/* reg_pfcc_pprx + * Admin Pause policy on Rx. + * 0 - Ignore received Pause frames (default). + * 1 - Respect received Pause frames. + * Access: RW + */ +MLXSW_ITEM32(reg, pfcc, pprx, 0x0C, 31, 1); + +/* reg_pfcc_aprx + * Active (operational) Pause policy on Rx. + * 0 - Ignore received Pause frames. + * 1 - Respect received Pause frames. + * Access: RO + */ +MLXSW_ITEM32(reg, pfcc, aprx, 0x0C, 30, 1); + +/* reg_pfcc_pfcrx + * Priority based flow control policy on Rx[7:0]. Per-priority bit mask: + * 0 - Ignore incoming priority Pause frames on the specified priority + * (default). + * 1 - Respect incoming priority Pause frames on the specified priority. + * Access: RW + */ +MLXSW_ITEM32(reg, pfcc, pfcrx, 0x0C, 16, 8); + +#define MLXSW_REG_PFCC_ALL_PRIO 0xFF + +static inline void mlxsw_reg_pfcc_prio_pack(char *payload, u8 pfc_en) +{ + mlxsw_reg_pfcc_prio_mask_tx_set(payload, MLXSW_REG_PFCC_ALL_PRIO); + mlxsw_reg_pfcc_prio_mask_rx_set(payload, MLXSW_REG_PFCC_ALL_PRIO); + mlxsw_reg_pfcc_pfctx_set(payload, pfc_en); + mlxsw_reg_pfcc_pfcrx_set(payload, pfc_en); +} + +static inline void mlxsw_reg_pfcc_pack(char *payload, u16 local_port) +{ + MLXSW_REG_ZERO(pfcc, payload); + mlxsw_reg_pfcc_local_port_set(payload, local_port); +} + +/* PPCNT - Ports Performance Counters Register + * ------------------------------------------- + * The PPCNT register retrieves per port performance counters. + */ +#define MLXSW_REG_PPCNT_ID 0x5008 +#define MLXSW_REG_PPCNT_LEN 0x100 +#define MLXSW_REG_PPCNT_COUNTERS_OFFSET 0x08 + +MLXSW_REG_DEFINE(ppcnt, MLXSW_REG_PPCNT_ID, MLXSW_REG_PPCNT_LEN); + +/* reg_ppcnt_swid + * For HCA: must be always 0. + * Switch partition ID to associate port with. + * Switch partitions are numbered from 0 to 7 inclusively. + * Switch partition 254 indicates stacking ports. + * Switch partition 255 indicates all switch partitions. + * Only valid on Set() operation with local_port=255. + * Access: Index + */ +MLXSW_ITEM32(reg, ppcnt, swid, 0x00, 24, 8); + +/* reg_ppcnt_local_port + * Local port number. + * Access: Index + */ +MLXSW_ITEM32_LP(reg, ppcnt, 0x00, 16, 0x00, 12); + +/* reg_ppcnt_pnat + * Port number access type: + * 0 - Local port number + * 1 - IB port number + * Access: Index + */ +MLXSW_ITEM32(reg, ppcnt, pnat, 0x00, 14, 2); + +enum mlxsw_reg_ppcnt_grp { + MLXSW_REG_PPCNT_IEEE_8023_CNT = 0x0, + MLXSW_REG_PPCNT_RFC_2863_CNT = 0x1, + MLXSW_REG_PPCNT_RFC_2819_CNT = 0x2, + MLXSW_REG_PPCNT_RFC_3635_CNT = 0x3, + MLXSW_REG_PPCNT_EXT_CNT = 0x5, + MLXSW_REG_PPCNT_DISCARD_CNT = 0x6, + MLXSW_REG_PPCNT_PRIO_CNT = 0x10, + MLXSW_REG_PPCNT_TC_CNT = 0x11, + MLXSW_REG_PPCNT_TC_CONG_CNT = 0x13, +}; + +/* reg_ppcnt_grp + * Performance counter group. + * Group 63 indicates all groups. Only valid on Set() operation with + * clr bit set. + * 0x0: IEEE 802.3 Counters + * 0x1: RFC 2863 Counters + * 0x2: RFC 2819 Counters + * 0x3: RFC 3635 Counters + * 0x5: Ethernet Extended Counters + * 0x6: Ethernet Discard Counters + * 0x8: Link Level Retransmission Counters + * 0x10: Per Priority Counters + * 0x11: Per Traffic Class Counters + * 0x12: Physical Layer Counters + * 0x13: Per Traffic Class Congestion Counters + * Access: Index + */ +MLXSW_ITEM32(reg, ppcnt, grp, 0x00, 0, 6); + +/* reg_ppcnt_clr + * Clear counters. Setting the clr bit will reset the counter value + * for all counters in the counter group. This bit can be set + * for both Set() and Get() operation. + * Access: OP + */ +MLXSW_ITEM32(reg, ppcnt, clr, 0x04, 31, 1); + +/* reg_ppcnt_lp_gl + * Local port global variable. + * 0: local_port 255 = all ports of the device. + * 1: local_port indicates local port number for all ports. + * Access: OP + */ +MLXSW_ITEM32(reg, ppcnt, lp_gl, 0x04, 30, 1); + +/* reg_ppcnt_prio_tc + * Priority for counter set that support per priority, valid values: 0-7. + * Traffic class for counter set that support per traffic class, + * valid values: 0- cap_max_tclass-1 . + * For HCA: cap_max_tclass is always 8. + * Otherwise must be 0. + * Access: Index + */ +MLXSW_ITEM32(reg, ppcnt, prio_tc, 0x04, 0, 5); + +/* Ethernet IEEE 802.3 Counter Group */ + +/* reg_ppcnt_a_frames_transmitted_ok + * Access: RO + */ +MLXSW_ITEM64(reg, ppcnt, a_frames_transmitted_ok, + MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x00, 0, 64); + +/* reg_ppcnt_a_frames_received_ok + * Access: RO + */ +MLXSW_ITEM64(reg, ppcnt, a_frames_received_ok, + MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x08, 0, 64); + +/* reg_ppcnt_a_frame_check_sequence_errors + * Access: RO + */ +MLXSW_ITEM64(reg, ppcnt, a_frame_check_sequence_errors, + MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x10, 0, 64); + +/* reg_ppcnt_a_alignment_errors + * Access: RO + */ +MLXSW_ITEM64(reg, ppcnt, a_alignment_errors, + MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x18, 0, 64); + +/* reg_ppcnt_a_octets_transmitted_ok + * Access: RO + */ +MLXSW_ITEM64(reg, ppcnt, a_octets_transmitted_ok, + MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x20, 0, 64); + +/* reg_ppcnt_a_octets_received_ok + * Access: RO + */ +MLXSW_ITEM64(reg, ppcnt, a_octets_received_ok, + MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x28, 0, 64); + +/* reg_ppcnt_a_multicast_frames_xmitted_ok + * Access: RO + */ +MLXSW_ITEM64(reg, ppcnt, a_multicast_frames_xmitted_ok, + MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x30, 0, 64); + +/* reg_ppcnt_a_broadcast_frames_xmitted_ok + * Access: RO + */ +MLXSW_ITEM64(reg, ppcnt, a_broadcast_frames_xmitted_ok, + MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x38, 0, 64); + +/* reg_ppcnt_a_multicast_frames_received_ok + * Access: RO + */ +MLXSW_ITEM64(reg, ppcnt, a_multicast_frames_received_ok, + MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x40, 0, 64); + +/* reg_ppcnt_a_broadcast_frames_received_ok + * Access: RO + */ +MLXSW_ITEM64(reg, ppcnt, a_broadcast_frames_received_ok, + MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x48, 0, 64); + +/* reg_ppcnt_a_in_range_length_errors + * Access: RO + */ +MLXSW_ITEM64(reg, ppcnt, a_in_range_length_errors, + MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x50, 0, 64); + +/* reg_ppcnt_a_out_of_range_length_field + * Access: RO + */ +MLXSW_ITEM64(reg, ppcnt, a_out_of_range_length_field, + MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x58, 0, 64); + +/* reg_ppcnt_a_frame_too_long_errors + * Access: RO + */ +MLXSW_ITEM64(reg, ppcnt, a_frame_too_long_errors, + MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x60, 0, 64); + +/* reg_ppcnt_a_symbol_error_during_carrier + * Access: RO + */ +MLXSW_ITEM64(reg, ppcnt, a_symbol_error_during_carrier, + MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x68, 0, 64); + +/* reg_ppcnt_a_mac_control_frames_transmitted + * Access: RO + */ +MLXSW_ITEM64(reg, ppcnt, a_mac_control_frames_transmitted, + MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x70, 0, 64); + +/* reg_ppcnt_a_mac_control_frames_received + * Access: RO + */ +MLXSW_ITEM64(reg, ppcnt, a_mac_control_frames_received, + MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x78, 0, 64); + +/* reg_ppcnt_a_unsupported_opcodes_received + * Access: RO + */ +MLXSW_ITEM64(reg, ppcnt, a_unsupported_opcodes_received, + MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x80, 0, 64); + +/* reg_ppcnt_a_pause_mac_ctrl_frames_received + * Access: RO + */ +MLXSW_ITEM64(reg, ppcnt, a_pause_mac_ctrl_frames_received, + MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x88, 0, 64); + +/* reg_ppcnt_a_pause_mac_ctrl_frames_transmitted + * Access: RO + */ +MLXSW_ITEM64(reg, ppcnt, a_pause_mac_ctrl_frames_transmitted, + MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x90, 0, 64); + +/* Ethernet RFC 2863 Counter Group */ + +/* reg_ppcnt_if_in_discards + * Access: RO + */ +MLXSW_ITEM64(reg, ppcnt, if_in_discards, + MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x10, 0, 64); + +/* reg_ppcnt_if_out_discards + * Access: RO + */ +MLXSW_ITEM64(reg, ppcnt, if_out_discards, + MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x38, 0, 64); + +/* reg_ppcnt_if_out_errors + * Access: RO + */ +MLXSW_ITEM64(reg, ppcnt, if_out_errors, + MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x40, 0, 64); + +/* Ethernet RFC 2819 Counter Group */ + +/* reg_ppcnt_ether_stats_undersize_pkts + * Access: RO + */ +MLXSW_ITEM64(reg, ppcnt, ether_stats_undersize_pkts, + MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x30, 0, 64); + +/* reg_ppcnt_ether_stats_oversize_pkts + * Access: RO + */ +MLXSW_ITEM64(reg, ppcnt, ether_stats_oversize_pkts, + MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x38, 0, 64); + +/* reg_ppcnt_ether_stats_fragments + * Access: RO + */ +MLXSW_ITEM64(reg, ppcnt, ether_stats_fragments, + MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x40, 0, 64); + +/* reg_ppcnt_ether_stats_pkts64octets + * Access: RO + */ +MLXSW_ITEM64(reg, ppcnt, ether_stats_pkts64octets, + MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x58, 0, 64); + +/* reg_ppcnt_ether_stats_pkts65to127octets + * Access: RO + */ +MLXSW_ITEM64(reg, ppcnt, ether_stats_pkts65to127octets, + MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x60, 0, 64); + +/* reg_ppcnt_ether_stats_pkts128to255octets + * Access: RO + */ +MLXSW_ITEM64(reg, ppcnt, ether_stats_pkts128to255octets, + MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x68, 0, 64); + +/* reg_ppcnt_ether_stats_pkts256to511octets + * Access: RO + */ +MLXSW_ITEM64(reg, ppcnt, ether_stats_pkts256to511octets, + MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x70, 0, 64); + +/* reg_ppcnt_ether_stats_pkts512to1023octets + * Access: RO + */ +MLXSW_ITEM64(reg, ppcnt, ether_stats_pkts512to1023octets, + MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x78, 0, 64); + +/* reg_ppcnt_ether_stats_pkts1024to1518octets + * Access: RO + */ +MLXSW_ITEM64(reg, ppcnt, ether_stats_pkts1024to1518octets, + MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x80, 0, 64); + +/* reg_ppcnt_ether_stats_pkts1519to2047octets + * Access: RO + */ +MLXSW_ITEM64(reg, ppcnt, ether_stats_pkts1519to2047octets, + MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x88, 0, 64); + +/* reg_ppcnt_ether_stats_pkts2048to4095octets + * Access: RO + */ +MLXSW_ITEM64(reg, ppcnt, ether_stats_pkts2048to4095octets, + MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x90, 0, 64); + +/* reg_ppcnt_ether_stats_pkts4096to8191octets + * Access: RO + */ +MLXSW_ITEM64(reg, ppcnt, ether_stats_pkts4096to8191octets, + MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x98, 0, 64); + +/* reg_ppcnt_ether_stats_pkts8192to10239octets + * Access: RO + */ +MLXSW_ITEM64(reg, ppcnt, ether_stats_pkts8192to10239octets, + MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0xA0, 0, 64); + +/* Ethernet RFC 3635 Counter Group */ + +/* reg_ppcnt_dot3stats_fcs_errors + * Access: RO + */ +MLXSW_ITEM64(reg, ppcnt, dot3stats_fcs_errors, + MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x08, 0, 64); + +/* reg_ppcnt_dot3stats_symbol_errors + * Access: RO + */ +MLXSW_ITEM64(reg, ppcnt, dot3stats_symbol_errors, + MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x60, 0, 64); + +/* reg_ppcnt_dot3control_in_unknown_opcodes + * Access: RO + */ +MLXSW_ITEM64(reg, ppcnt, dot3control_in_unknown_opcodes, + MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x68, 0, 64); + +/* reg_ppcnt_dot3in_pause_frames + * Access: RO + */ +MLXSW_ITEM64(reg, ppcnt, dot3in_pause_frames, + MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x70, 0, 64); + +/* Ethernet Extended Counter Group Counters */ + +/* reg_ppcnt_ecn_marked + * Access: RO + */ +MLXSW_ITEM64(reg, ppcnt, ecn_marked, + MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x08, 0, 64); + +/* Ethernet Discard Counter Group Counters */ + +/* reg_ppcnt_ingress_general + * Access: RO + */ +MLXSW_ITEM64(reg, ppcnt, ingress_general, + MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x00, 0, 64); + +/* reg_ppcnt_ingress_policy_engine + * Access: RO + */ +MLXSW_ITEM64(reg, ppcnt, ingress_policy_engine, + MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x08, 0, 64); + +/* reg_ppcnt_ingress_vlan_membership + * Access: RO + */ +MLXSW_ITEM64(reg, ppcnt, ingress_vlan_membership, + MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x10, 0, 64); + +/* reg_ppcnt_ingress_tag_frame_type + * Access: RO + */ +MLXSW_ITEM64(reg, ppcnt, ingress_tag_frame_type, + MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x18, 0, 64); + +/* reg_ppcnt_egress_vlan_membership + * Access: RO + */ +MLXSW_ITEM64(reg, ppcnt, egress_vlan_membership, + MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x20, 0, 64); + +/* reg_ppcnt_loopback_filter + * Access: RO + */ +MLXSW_ITEM64(reg, ppcnt, loopback_filter, + MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x28, 0, 64); + +/* reg_ppcnt_egress_general + * Access: RO + */ +MLXSW_ITEM64(reg, ppcnt, egress_general, + MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x30, 0, 64); + +/* reg_ppcnt_egress_hoq + * Access: RO + */ +MLXSW_ITEM64(reg, ppcnt, egress_hoq, + MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x40, 0, 64); + +/* reg_ppcnt_egress_policy_engine + * Access: RO + */ +MLXSW_ITEM64(reg, ppcnt, egress_policy_engine, + MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x50, 0, 64); + +/* reg_ppcnt_ingress_tx_link_down + * Access: RO + */ +MLXSW_ITEM64(reg, ppcnt, ingress_tx_link_down, + MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x58, 0, 64); + +/* reg_ppcnt_egress_stp_filter + * Access: RO + */ +MLXSW_ITEM64(reg, ppcnt, egress_stp_filter, + MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x60, 0, 64); + +/* reg_ppcnt_egress_sll + * Access: RO + */ +MLXSW_ITEM64(reg, ppcnt, egress_sll, + MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x70, 0, 64); + +/* Ethernet Per Priority Group Counters */ + +/* reg_ppcnt_rx_octets + * Access: RO + */ +MLXSW_ITEM64(reg, ppcnt, rx_octets, + MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x00, 0, 64); + +/* reg_ppcnt_rx_frames + * Access: RO + */ +MLXSW_ITEM64(reg, ppcnt, rx_frames, + MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x20, 0, 64); + +/* reg_ppcnt_tx_octets + * Access: RO + */ +MLXSW_ITEM64(reg, ppcnt, tx_octets, + MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x28, 0, 64); + +/* reg_ppcnt_tx_frames + * Access: RO + */ +MLXSW_ITEM64(reg, ppcnt, tx_frames, + MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x48, 0, 64); + +/* reg_ppcnt_rx_pause + * Access: RO + */ +MLXSW_ITEM64(reg, ppcnt, rx_pause, + MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x50, 0, 64); + +/* reg_ppcnt_rx_pause_duration + * Access: RO + */ +MLXSW_ITEM64(reg, ppcnt, rx_pause_duration, + MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x58, 0, 64); + +/* reg_ppcnt_tx_pause + * Access: RO + */ +MLXSW_ITEM64(reg, ppcnt, tx_pause, + MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x60, 0, 64); + +/* reg_ppcnt_tx_pause_duration + * Access: RO + */ +MLXSW_ITEM64(reg, ppcnt, tx_pause_duration, + MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x68, 0, 64); + +/* reg_ppcnt_rx_pause_transition + * Access: RO + */ +MLXSW_ITEM64(reg, ppcnt, tx_pause_transition, + MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x70, 0, 64); + +/* Ethernet Per Traffic Class Counters */ + +/* reg_ppcnt_tc_transmit_queue + * Contains the transmit queue depth in cells of traffic class + * selected by prio_tc and the port selected by local_port. + * The field cannot be cleared. + * Access: RO + */ +MLXSW_ITEM64(reg, ppcnt, tc_transmit_queue, + MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x00, 0, 64); + +/* reg_ppcnt_tc_no_buffer_discard_uc + * The number of unicast packets dropped due to lack of shared + * buffer resources. + * Access: RO + */ +MLXSW_ITEM64(reg, ppcnt, tc_no_buffer_discard_uc, + MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x08, 0, 64); + +/* Ethernet Per Traffic Class Congestion Group Counters */ + +/* reg_ppcnt_wred_discard + * Access: RO + */ +MLXSW_ITEM64(reg, ppcnt, wred_discard, + MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x00, 0, 64); + +/* reg_ppcnt_ecn_marked_tc + * Access: RO + */ +MLXSW_ITEM64(reg, ppcnt, ecn_marked_tc, + MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x08, 0, 64); + +static inline void mlxsw_reg_ppcnt_pack(char *payload, u16 local_port, + enum mlxsw_reg_ppcnt_grp grp, + u8 prio_tc) +{ + MLXSW_REG_ZERO(ppcnt, payload); + mlxsw_reg_ppcnt_swid_set(payload, 0); + mlxsw_reg_ppcnt_local_port_set(payload, local_port); + mlxsw_reg_ppcnt_pnat_set(payload, 0); + mlxsw_reg_ppcnt_grp_set(payload, grp); + mlxsw_reg_ppcnt_clr_set(payload, 0); + mlxsw_reg_ppcnt_lp_gl_set(payload, 1); + mlxsw_reg_ppcnt_prio_tc_set(payload, prio_tc); +} + +/* PPTB - Port Prio To Buffer Register + * ----------------------------------- + * Configures the switch priority to buffer table. + */ +#define MLXSW_REG_PPTB_ID 0x500B +#define MLXSW_REG_PPTB_LEN 0x10 + +MLXSW_REG_DEFINE(pptb, MLXSW_REG_PPTB_ID, MLXSW_REG_PPTB_LEN); + +enum { + MLXSW_REG_PPTB_MM_UM, + MLXSW_REG_PPTB_MM_UNICAST, + MLXSW_REG_PPTB_MM_MULTICAST, +}; + +/* reg_pptb_mm + * Mapping mode. + * 0 - Map both unicast and multicast packets to the same buffer. + * 1 - Map only unicast packets. + * 2 - Map only multicast packets. + * Access: Index + * + * Note: SwitchX-2 only supports the first option. + */ +MLXSW_ITEM32(reg, pptb, mm, 0x00, 28, 2); + +/* reg_pptb_local_port + * Local port number. + * Access: Index + */ +MLXSW_ITEM32_LP(reg, pptb, 0x00, 16, 0x00, 12); + +/* reg_pptb_um + * Enables the update of the untagged_buf field. + * Access: RW + */ +MLXSW_ITEM32(reg, pptb, um, 0x00, 8, 1); + +/* reg_pptb_pm + * Enables the update of the prio_to_buff field. + * Bit <i> is a flag for updating the mapping for switch priority <i>. + * Access: RW + */ +MLXSW_ITEM32(reg, pptb, pm, 0x00, 0, 8); + +/* reg_pptb_prio_to_buff + * Mapping of switch priority <i> to one of the allocated receive port + * buffers. + * Access: RW + */ +MLXSW_ITEM_BIT_ARRAY(reg, pptb, prio_to_buff, 0x04, 0x04, 4); + +/* reg_pptb_pm_msb + * Enables the update of the prio_to_buff field. + * Bit <i> is a flag for updating the mapping for switch priority <i+8>. + * Access: RW + */ +MLXSW_ITEM32(reg, pptb, pm_msb, 0x08, 24, 8); + +/* reg_pptb_untagged_buff + * Mapping of untagged frames to one of the allocated receive port buffers. + * Access: RW + * + * Note: In SwitchX-2 this field must be mapped to buffer 8. Reserved for + * Spectrum, as it maps untagged packets based on the default switch priority. + */ +MLXSW_ITEM32(reg, pptb, untagged_buff, 0x08, 0, 4); + +/* reg_pptb_prio_to_buff_msb + * Mapping of switch priority <i+8> to one of the allocated receive port + * buffers. + * Access: RW + */ +MLXSW_ITEM_BIT_ARRAY(reg, pptb, prio_to_buff_msb, 0x0C, 0x04, 4); + +#define MLXSW_REG_PPTB_ALL_PRIO 0xFF + +static inline void mlxsw_reg_pptb_pack(char *payload, u16 local_port) +{ + MLXSW_REG_ZERO(pptb, payload); + mlxsw_reg_pptb_mm_set(payload, MLXSW_REG_PPTB_MM_UM); + mlxsw_reg_pptb_local_port_set(payload, local_port); + mlxsw_reg_pptb_pm_set(payload, MLXSW_REG_PPTB_ALL_PRIO); + mlxsw_reg_pptb_pm_msb_set(payload, MLXSW_REG_PPTB_ALL_PRIO); +} + +static inline void mlxsw_reg_pptb_prio_to_buff_pack(char *payload, u8 prio, + u8 buff) +{ + mlxsw_reg_pptb_prio_to_buff_set(payload, prio, buff); + mlxsw_reg_pptb_prio_to_buff_msb_set(payload, prio, buff); +} + +/* PBMC - Port Buffer Management Control Register + * ---------------------------------------------- + * The PBMC register configures and retrieves the port packet buffer + * allocation for different Prios, and the Pause threshold management. + */ +#define MLXSW_REG_PBMC_ID 0x500C +#define MLXSW_REG_PBMC_LEN 0x6C + +MLXSW_REG_DEFINE(pbmc, MLXSW_REG_PBMC_ID, MLXSW_REG_PBMC_LEN); + +/* reg_pbmc_local_port + * Local port number. + * Access: Index + */ +MLXSW_ITEM32_LP(reg, pbmc, 0x00, 16, 0x00, 12); + +/* reg_pbmc_xoff_timer_value + * When device generates a pause frame, it uses this value as the pause + * timer (time for the peer port to pause in quota-512 bit time). + * Access: RW + */ +MLXSW_ITEM32(reg, pbmc, xoff_timer_value, 0x04, 16, 16); + +/* reg_pbmc_xoff_refresh + * The time before a new pause frame should be sent to refresh the pause RW + * state. Using the same units as xoff_timer_value above (in quota-512 bit + * time). + * Access: RW + */ +MLXSW_ITEM32(reg, pbmc, xoff_refresh, 0x04, 0, 16); + +#define MLXSW_REG_PBMC_PORT_SHARED_BUF_IDX 11 + +/* reg_pbmc_buf_lossy + * The field indicates if the buffer is lossy. + * 0 - Lossless + * 1 - Lossy + * Access: RW + */ +MLXSW_ITEM32_INDEXED(reg, pbmc, buf_lossy, 0x0C, 25, 1, 0x08, 0x00, false); + +/* reg_pbmc_buf_epsb + * Eligible for Port Shared buffer. + * If epsb is set, packets assigned to buffer are allowed to insert the port + * shared buffer. + * When buf_lossy is MLXSW_REG_PBMC_LOSSY_LOSSY this field is reserved. + * Access: RW + */ +MLXSW_ITEM32_INDEXED(reg, pbmc, buf_epsb, 0x0C, 24, 1, 0x08, 0x00, false); + +/* reg_pbmc_buf_size + * The part of the packet buffer array is allocated for the specific buffer. + * Units are represented in cells. + * Access: RW + */ +MLXSW_ITEM32_INDEXED(reg, pbmc, buf_size, 0x0C, 0, 16, 0x08, 0x00, false); + +/* reg_pbmc_buf_xoff_threshold + * Once the amount of data in the buffer goes above this value, device + * starts sending PFC frames for all priorities associated with the + * buffer. Units are represented in cells. Reserved in case of lossy + * buffer. + * Access: RW + * + * Note: In Spectrum, reserved for buffer[9]. + */ +MLXSW_ITEM32_INDEXED(reg, pbmc, buf_xoff_threshold, 0x0C, 16, 16, + 0x08, 0x04, false); + +/* reg_pbmc_buf_xon_threshold + * When the amount of data in the buffer goes below this value, device + * stops sending PFC frames for the priorities associated with the + * buffer. Units are represented in cells. Reserved in case of lossy + * buffer. + * Access: RW + * + * Note: In Spectrum, reserved for buffer[9]. + */ +MLXSW_ITEM32_INDEXED(reg, pbmc, buf_xon_threshold, 0x0C, 0, 16, + 0x08, 0x04, false); + +static inline void mlxsw_reg_pbmc_pack(char *payload, u16 local_port, + u16 xoff_timer_value, u16 xoff_refresh) +{ + MLXSW_REG_ZERO(pbmc, payload); + mlxsw_reg_pbmc_local_port_set(payload, local_port); + mlxsw_reg_pbmc_xoff_timer_value_set(payload, xoff_timer_value); + mlxsw_reg_pbmc_xoff_refresh_set(payload, xoff_refresh); +} + +static inline void mlxsw_reg_pbmc_lossy_buffer_pack(char *payload, + int buf_index, + u16 size) +{ + mlxsw_reg_pbmc_buf_lossy_set(payload, buf_index, 1); + mlxsw_reg_pbmc_buf_epsb_set(payload, buf_index, 0); + mlxsw_reg_pbmc_buf_size_set(payload, buf_index, size); +} + +static inline void mlxsw_reg_pbmc_lossless_buffer_pack(char *payload, + int buf_index, u16 size, + u16 threshold) +{ + mlxsw_reg_pbmc_buf_lossy_set(payload, buf_index, 0); + mlxsw_reg_pbmc_buf_epsb_set(payload, buf_index, 0); + mlxsw_reg_pbmc_buf_size_set(payload, buf_index, size); + mlxsw_reg_pbmc_buf_xoff_threshold_set(payload, buf_index, threshold); + mlxsw_reg_pbmc_buf_xon_threshold_set(payload, buf_index, threshold); +} + +/* PSPA - Port Switch Partition Allocation + * --------------------------------------- + * Controls the association of a port with a switch partition and enables + * configuring ports as stacking ports. + */ +#define MLXSW_REG_PSPA_ID 0x500D +#define MLXSW_REG_PSPA_LEN 0x8 + +MLXSW_REG_DEFINE(pspa, MLXSW_REG_PSPA_ID, MLXSW_REG_PSPA_LEN); + +/* reg_pspa_swid + * Switch partition ID. + * Access: RW + */ +MLXSW_ITEM32(reg, pspa, swid, 0x00, 24, 8); + +/* reg_pspa_local_port + * Local port number. + * Access: Index + */ +MLXSW_ITEM32_LP(reg, pspa, 0x00, 16, 0x00, 0); + +/* reg_pspa_sub_port + * Virtual port within the local port. Set to 0 when virtual ports are + * disabled on the local port. + * Access: Index + */ +MLXSW_ITEM32(reg, pspa, sub_port, 0x00, 8, 8); + +static inline void mlxsw_reg_pspa_pack(char *payload, u8 swid, u16 local_port) +{ + MLXSW_REG_ZERO(pspa, payload); + mlxsw_reg_pspa_swid_set(payload, swid); + mlxsw_reg_pspa_local_port_set(payload, local_port); + mlxsw_reg_pspa_sub_port_set(payload, 0); +} + +/* PMAOS - Ports Module Administrative and Operational Status + * ---------------------------------------------------------- + * This register configures and retrieves the per module status. + */ +#define MLXSW_REG_PMAOS_ID 0x5012 +#define MLXSW_REG_PMAOS_LEN 0x10 + +MLXSW_REG_DEFINE(pmaos, MLXSW_REG_PMAOS_ID, MLXSW_REG_PMAOS_LEN); + +/* reg_pmaos_rst + * Module reset toggle. + * Note: Setting reset while module is plugged-in will result in transition to + * "initializing" operational state. + * Access: OP + */ +MLXSW_ITEM32(reg, pmaos, rst, 0x00, 31, 1); + +/* reg_pmaos_slot_index + * Slot index. + * Access: Index + */ +MLXSW_ITEM32(reg, pmaos, slot_index, 0x00, 24, 4); + +/* reg_pmaos_module + * Module number. + * Access: Index + */ +MLXSW_ITEM32(reg, pmaos, module, 0x00, 16, 8); + +enum mlxsw_reg_pmaos_admin_status { + MLXSW_REG_PMAOS_ADMIN_STATUS_ENABLED = 1, + MLXSW_REG_PMAOS_ADMIN_STATUS_DISABLED = 2, + /* If the module is active and then unplugged, or experienced an error + * event, the operational status should go to "disabled" and can only + * be enabled upon explicit enable command. + */ + MLXSW_REG_PMAOS_ADMIN_STATUS_ENABLED_ONCE = 3, +}; + +/* reg_pmaos_admin_status + * Module administrative state (the desired state of the module). + * Note: To disable a module, all ports associated with the port must be + * administatively down first. + * Access: RW + */ +MLXSW_ITEM32(reg, pmaos, admin_status, 0x00, 8, 4); + +/* reg_pmaos_ase + * Admin state update enable. + * If this bit is set, admin state will be updated based on admin_state field. + * Only relevant on Set() operations. + * Access: WO + */ +MLXSW_ITEM32(reg, pmaos, ase, 0x04, 31, 1); + +/* reg_pmaos_ee + * Event update enable. + * If this bit is set, event generation will be updated based on the e field. + * Only relevant on Set operations. + * Access: WO + */ +MLXSW_ITEM32(reg, pmaos, ee, 0x04, 30, 1); + +enum mlxsw_reg_pmaos_e { + MLXSW_REG_PMAOS_E_DO_NOT_GENERATE_EVENT, + MLXSW_REG_PMAOS_E_GENERATE_EVENT, + MLXSW_REG_PMAOS_E_GENERATE_SINGLE_EVENT, +}; + +/* reg_pmaos_e + * Event Generation on operational state change. + * Access: RW + */ +MLXSW_ITEM32(reg, pmaos, e, 0x04, 0, 2); + +static inline void mlxsw_reg_pmaos_pack(char *payload, u8 slot_index, u8 module) +{ + MLXSW_REG_ZERO(pmaos, payload); + mlxsw_reg_pmaos_slot_index_set(payload, slot_index); + mlxsw_reg_pmaos_module_set(payload, module); +} + +/* PPLR - Port Physical Loopback Register + * -------------------------------------- + * This register allows configuration of the port's loopback mode. + */ +#define MLXSW_REG_PPLR_ID 0x5018 +#define MLXSW_REG_PPLR_LEN 0x8 + +MLXSW_REG_DEFINE(pplr, MLXSW_REG_PPLR_ID, MLXSW_REG_PPLR_LEN); + +/* reg_pplr_local_port + * Local port number. + * Access: Index + */ +MLXSW_ITEM32_LP(reg, pplr, 0x00, 16, 0x00, 12); + +/* Phy local loopback. When set the port's egress traffic is looped back + * to the receiver and the port transmitter is disabled. + */ +#define MLXSW_REG_PPLR_LB_TYPE_BIT_PHY_LOCAL BIT(1) + +/* reg_pplr_lb_en + * Loopback enable. + * Access: RW + */ +MLXSW_ITEM32(reg, pplr, lb_en, 0x04, 0, 8); + +static inline void mlxsw_reg_pplr_pack(char *payload, u16 local_port, + bool phy_local) +{ + MLXSW_REG_ZERO(pplr, payload); + mlxsw_reg_pplr_local_port_set(payload, local_port); + mlxsw_reg_pplr_lb_en_set(payload, + phy_local ? + MLXSW_REG_PPLR_LB_TYPE_BIT_PHY_LOCAL : 0); +} + +/* PMTDB - Port Module To local DataBase Register + * ---------------------------------------------- + * The PMTDB register allows to query the possible module<->local port + * mapping than can be used in PMLP. It does not represent the actual/current + * mapping of the local to module. Actual mapping is only defined by PMLP. + */ +#define MLXSW_REG_PMTDB_ID 0x501A +#define MLXSW_REG_PMTDB_LEN 0x40 + +MLXSW_REG_DEFINE(pmtdb, MLXSW_REG_PMTDB_ID, MLXSW_REG_PMTDB_LEN); + +/* reg_pmtdb_slot_index + * Slot index (0: Main board). + * Access: Index + */ +MLXSW_ITEM32(reg, pmtdb, slot_index, 0x00, 24, 4); + +/* reg_pmtdb_module + * Module number. + * Access: Index + */ +MLXSW_ITEM32(reg, pmtdb, module, 0x00, 16, 8); + +/* reg_pmtdb_ports_width + * Port's width + * Access: Index + */ +MLXSW_ITEM32(reg, pmtdb, ports_width, 0x00, 12, 4); + +/* reg_pmtdb_num_ports + * Number of ports in a single module (split/breakout) + * Access: Index + */ +MLXSW_ITEM32(reg, pmtdb, num_ports, 0x00, 8, 4); + +enum mlxsw_reg_pmtdb_status { + MLXSW_REG_PMTDB_STATUS_SUCCESS, +}; + +/* reg_pmtdb_status + * Status + * Access: RO + */ +MLXSW_ITEM32(reg, pmtdb, status, 0x00, 0, 4); + +/* reg_pmtdb_port_num + * The local_port value which can be assigned to the module. + * In case of more than one port, port<x> represent the /<x> port of + * the module. + * Access: RO + */ +MLXSW_ITEM16_INDEXED(reg, pmtdb, port_num, 0x04, 0, 10, 0x02, 0x00, false); + +static inline void mlxsw_reg_pmtdb_pack(char *payload, u8 slot_index, u8 module, + u8 ports_width, u8 num_ports) +{ + MLXSW_REG_ZERO(pmtdb, payload); + mlxsw_reg_pmtdb_slot_index_set(payload, slot_index); + mlxsw_reg_pmtdb_module_set(payload, module); + mlxsw_reg_pmtdb_ports_width_set(payload, ports_width); + mlxsw_reg_pmtdb_num_ports_set(payload, num_ports); +} + +/* PMECR - Ports Mapping Event Configuration Register + * -------------------------------------------------- + * The PMECR register is used to enable/disable event triggering + * in case of local port mapping change. + */ +#define MLXSW_REG_PMECR_ID 0x501B +#define MLXSW_REG_PMECR_LEN 0x20 + +MLXSW_REG_DEFINE(pmecr, MLXSW_REG_PMECR_ID, MLXSW_REG_PMECR_LEN); + +/* reg_pmecr_local_port + * Local port number. + * Access: Index + */ +MLXSW_ITEM32_LP(reg, pmecr, 0x00, 16, 0x00, 12); + +/* reg_pmecr_ee + * Event update enable. If this bit is set, event generation will be updated + * based on the e field. Only relevant on Set operations. + * Access: WO + */ +MLXSW_ITEM32(reg, pmecr, ee, 0x04, 30, 1); + +/* reg_pmecr_eswi + * Software ignore enable bit. If this bit is set, the value of swi is used. + * If this bit is clear, the value of swi is ignored. + * Only relevant on Set operations. + * Access: WO + */ +MLXSW_ITEM32(reg, pmecr, eswi, 0x04, 24, 1); + +/* reg_pmecr_swi + * Software ignore. If this bit is set, the device shouldn't generate events + * in case of PMLP SET operation but only upon self local port mapping change + * (if applicable according to e configuration). This is supplementary + * configuration on top of e value. + * Access: RW + */ +MLXSW_ITEM32(reg, pmecr, swi, 0x04, 8, 1); + +enum mlxsw_reg_pmecr_e { + MLXSW_REG_PMECR_E_DO_NOT_GENERATE_EVENT, + MLXSW_REG_PMECR_E_GENERATE_EVENT, + MLXSW_REG_PMECR_E_GENERATE_SINGLE_EVENT, +}; + +/* reg_pmecr_e + * Event generation on local port mapping change. + * Access: RW + */ +MLXSW_ITEM32(reg, pmecr, e, 0x04, 0, 2); + +static inline void mlxsw_reg_pmecr_pack(char *payload, u16 local_port, + enum mlxsw_reg_pmecr_e e) +{ + MLXSW_REG_ZERO(pmecr, payload); + mlxsw_reg_pmecr_local_port_set(payload, local_port); + mlxsw_reg_pmecr_e_set(payload, e); + mlxsw_reg_pmecr_ee_set(payload, true); + mlxsw_reg_pmecr_swi_set(payload, true); + mlxsw_reg_pmecr_eswi_set(payload, true); +} + +/* PMPE - Port Module Plug/Unplug Event Register + * --------------------------------------------- + * This register reports any operational status change of a module. + * A change in the module’s state will generate an event only if the change + * happens after arming the event mechanism. Any changes to the module state + * while the event mechanism is not armed will not be reported. Software can + * query the PMPE register for module status. + */ +#define MLXSW_REG_PMPE_ID 0x5024 +#define MLXSW_REG_PMPE_LEN 0x10 + +MLXSW_REG_DEFINE(pmpe, MLXSW_REG_PMPE_ID, MLXSW_REG_PMPE_LEN); + +/* reg_pmpe_slot_index + * Slot index. + * Access: Index + */ +MLXSW_ITEM32(reg, pmpe, slot_index, 0x00, 24, 4); + +/* reg_pmpe_module + * Module number. + * Access: Index + */ +MLXSW_ITEM32(reg, pmpe, module, 0x00, 16, 8); + +enum mlxsw_reg_pmpe_module_status { + MLXSW_REG_PMPE_MODULE_STATUS_PLUGGED_ENABLED = 1, + MLXSW_REG_PMPE_MODULE_STATUS_UNPLUGGED, + MLXSW_REG_PMPE_MODULE_STATUS_PLUGGED_ERROR, + MLXSW_REG_PMPE_MODULE_STATUS_PLUGGED_DISABLED, +}; + +/* reg_pmpe_module_status + * Module status. + * Access: RO + */ +MLXSW_ITEM32(reg, pmpe, module_status, 0x00, 0, 4); + +/* reg_pmpe_error_type + * Module error details. + * Access: RO + */ +MLXSW_ITEM32(reg, pmpe, error_type, 0x04, 8, 4); + +/* PDDR - Port Diagnostics Database Register + * ----------------------------------------- + * The PDDR enables to read the Phy debug database + */ +#define MLXSW_REG_PDDR_ID 0x5031 +#define MLXSW_REG_PDDR_LEN 0x100 + +MLXSW_REG_DEFINE(pddr, MLXSW_REG_PDDR_ID, MLXSW_REG_PDDR_LEN); + +/* reg_pddr_local_port + * Local port number. + * Access: Index + */ +MLXSW_ITEM32_LP(reg, pddr, 0x00, 16, 0x00, 12); + +enum mlxsw_reg_pddr_page_select { + MLXSW_REG_PDDR_PAGE_SELECT_TROUBLESHOOTING_INFO = 1, +}; + +/* reg_pddr_page_select + * Page select index. + * Access: Index + */ +MLXSW_ITEM32(reg, pddr, page_select, 0x04, 0, 8); + +enum mlxsw_reg_pddr_trblsh_group_opcode { + /* Monitor opcodes */ + MLXSW_REG_PDDR_TRBLSH_GROUP_OPCODE_MONITOR, +}; + +/* reg_pddr_group_opcode + * Group selector. + * Access: Index + */ +MLXSW_ITEM32(reg, pddr, trblsh_group_opcode, 0x08, 0, 16); + +/* reg_pddr_status_opcode + * Group selector. + * Access: RO + */ +MLXSW_ITEM32(reg, pddr, trblsh_status_opcode, 0x0C, 0, 16); + +static inline void mlxsw_reg_pddr_pack(char *payload, u16 local_port, + u8 page_select) +{ + MLXSW_REG_ZERO(pddr, payload); + mlxsw_reg_pddr_local_port_set(payload, local_port); + mlxsw_reg_pddr_page_select_set(payload, page_select); +} + +/* PMMP - Port Module Memory Map Properties Register + * ------------------------------------------------- + * The PMMP register allows to override the module memory map advertisement. + * The register can only be set when the module is disabled by PMAOS register. + */ +#define MLXSW_REG_PMMP_ID 0x5044 +#define MLXSW_REG_PMMP_LEN 0x2C + +MLXSW_REG_DEFINE(pmmp, MLXSW_REG_PMMP_ID, MLXSW_REG_PMMP_LEN); + +/* reg_pmmp_module + * Module number. + * Access: Index + */ +MLXSW_ITEM32(reg, pmmp, module, 0x00, 16, 8); + +/* reg_pmmp_slot_index + * Slot index. + * Access: Index + */ +MLXSW_ITEM32(reg, pmmp, slot_index, 0x00, 24, 4); + +/* reg_pmmp_sticky + * When set, will keep eeprom_override values after plug-out event. + * Access: OP + */ +MLXSW_ITEM32(reg, pmmp, sticky, 0x00, 0, 1); + +/* reg_pmmp_eeprom_override_mask + * Write mask bit (negative polarity). + * 0 - Allow write + * 1 - Ignore write + * On write, indicates which of the bits from eeprom_override field are + * updated. + * Access: WO + */ +MLXSW_ITEM32(reg, pmmp, eeprom_override_mask, 0x04, 16, 16); + +enum { + /* Set module to low power mode */ + MLXSW_REG_PMMP_EEPROM_OVERRIDE_LOW_POWER_MASK = BIT(8), +}; + +/* reg_pmmp_eeprom_override + * Override / ignore EEPROM advertisement properties bitmask + * Access: RW + */ +MLXSW_ITEM32(reg, pmmp, eeprom_override, 0x04, 0, 16); + +static inline void mlxsw_reg_pmmp_pack(char *payload, u8 slot_index, u8 module) +{ + MLXSW_REG_ZERO(pmmp, payload); + mlxsw_reg_pmmp_slot_index_set(payload, slot_index); + mlxsw_reg_pmmp_module_set(payload, module); +} + +/* PLLP - Port Local port to Label Port mapping Register + * ----------------------------------------------------- + * The PLLP register returns the mapping from Local Port into Label Port. + */ +#define MLXSW_REG_PLLP_ID 0x504A +#define MLXSW_REG_PLLP_LEN 0x10 + +MLXSW_REG_DEFINE(pllp, MLXSW_REG_PLLP_ID, MLXSW_REG_PLLP_LEN); + +/* reg_pllp_local_port + * Local port number. + * Access: Index + */ +MLXSW_ITEM32_LP(reg, pllp, 0x00, 16, 0x00, 12); + +/* reg_pllp_label_port + * Front panel label of the port. + * Access: RO + */ +MLXSW_ITEM32(reg, pllp, label_port, 0x00, 0, 8); + +/* reg_pllp_split_num + * Label split mapping for local_port. + * Access: RO + */ +MLXSW_ITEM32(reg, pllp, split_num, 0x04, 0, 4); + +/* reg_pllp_slot_index + * Slot index (0: Main board). + * Access: RO + */ +MLXSW_ITEM32(reg, pllp, slot_index, 0x08, 0, 4); + +static inline void mlxsw_reg_pllp_pack(char *payload, u16 local_port) +{ + MLXSW_REG_ZERO(pllp, payload); + mlxsw_reg_pllp_local_port_set(payload, local_port); +} + +static inline void mlxsw_reg_pllp_unpack(char *payload, u8 *label_port, + u8 *split_num, u8 *slot_index) +{ + *label_port = mlxsw_reg_pllp_label_port_get(payload); + *split_num = mlxsw_reg_pllp_split_num_get(payload); + *slot_index = mlxsw_reg_pllp_slot_index_get(payload); +} + +/* PMTM - Port Module Type Mapping Register + * ---------------------------------------- + * The PMTM register allows query or configuration of module types. + * The register can only be set when the module is disabled by PMAOS register + */ +#define MLXSW_REG_PMTM_ID 0x5067 +#define MLXSW_REG_PMTM_LEN 0x10 + +MLXSW_REG_DEFINE(pmtm, MLXSW_REG_PMTM_ID, MLXSW_REG_PMTM_LEN); + +/* reg_pmtm_slot_index + * Slot index. + * Access: Index + */ +MLXSW_ITEM32(reg, pmtm, slot_index, 0x00, 24, 4); + +/* reg_pmtm_module + * Module number. + * Access: Index + */ +MLXSW_ITEM32(reg, pmtm, module, 0x00, 16, 8); + +enum mlxsw_reg_pmtm_module_type { + MLXSW_REG_PMTM_MODULE_TYPE_BACKPLANE_4_LANES = 0, + MLXSW_REG_PMTM_MODULE_TYPE_QSFP = 1, + MLXSW_REG_PMTM_MODULE_TYPE_SFP = 2, + MLXSW_REG_PMTM_MODULE_TYPE_BACKPLANE_SINGLE_LANE = 4, + MLXSW_REG_PMTM_MODULE_TYPE_BACKPLANE_2_LANES = 8, + MLXSW_REG_PMTM_MODULE_TYPE_CHIP2CHIP4X = 10, + MLXSW_REG_PMTM_MODULE_TYPE_CHIP2CHIP2X = 11, + MLXSW_REG_PMTM_MODULE_TYPE_CHIP2CHIP1X = 12, + MLXSW_REG_PMTM_MODULE_TYPE_QSFP_DD = 14, + MLXSW_REG_PMTM_MODULE_TYPE_OSFP = 15, + MLXSW_REG_PMTM_MODULE_TYPE_SFP_DD = 16, + MLXSW_REG_PMTM_MODULE_TYPE_DSFP = 17, + MLXSW_REG_PMTM_MODULE_TYPE_CHIP2CHIP8X = 18, + MLXSW_REG_PMTM_MODULE_TYPE_TWISTED_PAIR = 19, +}; + +/* reg_pmtm_module_type + * Module type. + * Access: RW + */ +MLXSW_ITEM32(reg, pmtm, module_type, 0x04, 0, 5); + +static inline void mlxsw_reg_pmtm_pack(char *payload, u8 slot_index, u8 module) +{ + MLXSW_REG_ZERO(pmtm, payload); + mlxsw_reg_pmtm_slot_index_set(payload, slot_index); + mlxsw_reg_pmtm_module_set(payload, module); +} + +/* HTGT - Host Trap Group Table + * ---------------------------- + * Configures the properties for forwarding to CPU. + */ +#define MLXSW_REG_HTGT_ID 0x7002 +#define MLXSW_REG_HTGT_LEN 0x20 + +MLXSW_REG_DEFINE(htgt, MLXSW_REG_HTGT_ID, MLXSW_REG_HTGT_LEN); + +/* reg_htgt_swid + * Switch partition ID. + * Access: Index + */ +MLXSW_ITEM32(reg, htgt, swid, 0x00, 24, 8); + +#define MLXSW_REG_HTGT_PATH_TYPE_LOCAL 0x0 /* For locally attached CPU */ + +/* reg_htgt_type + * CPU path type. + * Access: RW + */ +MLXSW_ITEM32(reg, htgt, type, 0x00, 8, 4); + +enum mlxsw_reg_htgt_trap_group { + MLXSW_REG_HTGT_TRAP_GROUP_EMAD, + MLXSW_REG_HTGT_TRAP_GROUP_CORE_EVENT, + MLXSW_REG_HTGT_TRAP_GROUP_SP_STP, + MLXSW_REG_HTGT_TRAP_GROUP_SP_LACP, + MLXSW_REG_HTGT_TRAP_GROUP_SP_LLDP, + MLXSW_REG_HTGT_TRAP_GROUP_SP_MC_SNOOPING, + MLXSW_REG_HTGT_TRAP_GROUP_SP_BGP, + MLXSW_REG_HTGT_TRAP_GROUP_SP_OSPF, + MLXSW_REG_HTGT_TRAP_GROUP_SP_PIM, + MLXSW_REG_HTGT_TRAP_GROUP_SP_MULTICAST, + MLXSW_REG_HTGT_TRAP_GROUP_SP_NEIGH_DISCOVERY, + MLXSW_REG_HTGT_TRAP_GROUP_SP_ROUTER_EXP, + MLXSW_REG_HTGT_TRAP_GROUP_SP_EXTERNAL_ROUTE, + MLXSW_REG_HTGT_TRAP_GROUP_SP_IP2ME, + MLXSW_REG_HTGT_TRAP_GROUP_SP_DHCP, + MLXSW_REG_HTGT_TRAP_GROUP_SP_EVENT, + MLXSW_REG_HTGT_TRAP_GROUP_SP_IPV6, + MLXSW_REG_HTGT_TRAP_GROUP_SP_LBERROR, + MLXSW_REG_HTGT_TRAP_GROUP_SP_PTP0, + MLXSW_REG_HTGT_TRAP_GROUP_SP_PTP1, + MLXSW_REG_HTGT_TRAP_GROUP_SP_VRRP, + MLXSW_REG_HTGT_TRAP_GROUP_SP_PKT_SAMPLE, + MLXSW_REG_HTGT_TRAP_GROUP_SP_FLOW_LOGGING, + MLXSW_REG_HTGT_TRAP_GROUP_SP_FID_MISS, + MLXSW_REG_HTGT_TRAP_GROUP_SP_BFD, + MLXSW_REG_HTGT_TRAP_GROUP_SP_DUMMY, + MLXSW_REG_HTGT_TRAP_GROUP_SP_L2_DISCARDS, + MLXSW_REG_HTGT_TRAP_GROUP_SP_L3_DISCARDS, + MLXSW_REG_HTGT_TRAP_GROUP_SP_L3_EXCEPTIONS, + MLXSW_REG_HTGT_TRAP_GROUP_SP_TUNNEL_DISCARDS, + MLXSW_REG_HTGT_TRAP_GROUP_SP_ACL_DISCARDS, + MLXSW_REG_HTGT_TRAP_GROUP_SP_BUFFER_DISCARDS, + + __MLXSW_REG_HTGT_TRAP_GROUP_MAX, + MLXSW_REG_HTGT_TRAP_GROUP_MAX = __MLXSW_REG_HTGT_TRAP_GROUP_MAX - 1 +}; + +/* reg_htgt_trap_group + * Trap group number. User defined number specifying which trap groups + * should be forwarded to the CPU. The mapping between trap IDs and trap + * groups is configured using HPKT register. + * Access: Index + */ +MLXSW_ITEM32(reg, htgt, trap_group, 0x00, 0, 8); + +enum { + MLXSW_REG_HTGT_POLICER_DISABLE, + MLXSW_REG_HTGT_POLICER_ENABLE, +}; + +/* reg_htgt_pide + * Enable policer ID specified using 'pid' field. + * Access: RW + */ +MLXSW_ITEM32(reg, htgt, pide, 0x04, 15, 1); + +#define MLXSW_REG_HTGT_INVALID_POLICER 0xff + +/* reg_htgt_pid + * Policer ID for the trap group. + * Access: RW + */ +MLXSW_ITEM32(reg, htgt, pid, 0x04, 0, 8); + +#define MLXSW_REG_HTGT_TRAP_TO_CPU 0x0 + +/* reg_htgt_mirror_action + * Mirror action to use. + * 0 - Trap to CPU. + * 1 - Trap to CPU and mirror to a mirroring agent. + * 2 - Mirror to a mirroring agent and do not trap to CPU. + * Access: RW + * + * Note: Mirroring to a mirroring agent is only supported in Spectrum. + */ +MLXSW_ITEM32(reg, htgt, mirror_action, 0x08, 8, 2); + +/* reg_htgt_mirroring_agent + * Mirroring agent. + * Access: RW + */ +MLXSW_ITEM32(reg, htgt, mirroring_agent, 0x08, 0, 3); + +#define MLXSW_REG_HTGT_DEFAULT_PRIORITY 0 + +/* reg_htgt_priority + * Trap group priority. + * In case a packet matches multiple classification rules, the packet will + * only be trapped once, based on the trap ID associated with the group (via + * register HPKT) with the highest priority. + * Supported values are 0-7, with 7 represnting the highest priority. + * Access: RW + * + * Note: In SwitchX-2 this field is ignored and the priority value is replaced + * by the 'trap_group' field. + */ +MLXSW_ITEM32(reg, htgt, priority, 0x0C, 0, 4); + +#define MLXSW_REG_HTGT_DEFAULT_TC 7 + +/* reg_htgt_local_path_cpu_tclass + * CPU ingress traffic class for the trap group. + * Access: RW + */ +MLXSW_ITEM32(reg, htgt, local_path_cpu_tclass, 0x10, 16, 6); + +enum mlxsw_reg_htgt_local_path_rdq { + MLXSW_REG_HTGT_LOCAL_PATH_RDQ_SX2_CTRL = 0x13, + MLXSW_REG_HTGT_LOCAL_PATH_RDQ_SX2_RX = 0x14, + MLXSW_REG_HTGT_LOCAL_PATH_RDQ_SX2_EMAD = 0x15, + MLXSW_REG_HTGT_LOCAL_PATH_RDQ_SIB_EMAD = 0x15, +}; +/* reg_htgt_local_path_rdq + * Receive descriptor queue (RDQ) to use for the trap group. + * Access: RW + */ +MLXSW_ITEM32(reg, htgt, local_path_rdq, 0x10, 0, 6); + +static inline void mlxsw_reg_htgt_pack(char *payload, u8 group, u8 policer_id, + u8 priority, u8 tc) +{ + MLXSW_REG_ZERO(htgt, payload); + + if (policer_id == MLXSW_REG_HTGT_INVALID_POLICER) { + mlxsw_reg_htgt_pide_set(payload, + MLXSW_REG_HTGT_POLICER_DISABLE); + } else { + mlxsw_reg_htgt_pide_set(payload, + MLXSW_REG_HTGT_POLICER_ENABLE); + mlxsw_reg_htgt_pid_set(payload, policer_id); + } + + mlxsw_reg_htgt_type_set(payload, MLXSW_REG_HTGT_PATH_TYPE_LOCAL); + mlxsw_reg_htgt_trap_group_set(payload, group); + mlxsw_reg_htgt_mirror_action_set(payload, MLXSW_REG_HTGT_TRAP_TO_CPU); + mlxsw_reg_htgt_mirroring_agent_set(payload, 0); + mlxsw_reg_htgt_priority_set(payload, priority); + mlxsw_reg_htgt_local_path_cpu_tclass_set(payload, tc); + mlxsw_reg_htgt_local_path_rdq_set(payload, group); +} + +/* HPKT - Host Packet Trap + * ----------------------- + * Configures trap IDs inside trap groups. + */ +#define MLXSW_REG_HPKT_ID 0x7003 +#define MLXSW_REG_HPKT_LEN 0x10 + +MLXSW_REG_DEFINE(hpkt, MLXSW_REG_HPKT_ID, MLXSW_REG_HPKT_LEN); + +enum { + MLXSW_REG_HPKT_ACK_NOT_REQUIRED, + MLXSW_REG_HPKT_ACK_REQUIRED, +}; + +/* reg_hpkt_ack + * Require acknowledgements from the host for events. + * If set, then the device will wait for the event it sent to be acknowledged + * by the host. This option is only relevant for event trap IDs. + * Access: RW + * + * Note: Currently not supported by firmware. + */ +MLXSW_ITEM32(reg, hpkt, ack, 0x00, 24, 1); + +enum mlxsw_reg_hpkt_action { + MLXSW_REG_HPKT_ACTION_FORWARD, + MLXSW_REG_HPKT_ACTION_TRAP_TO_CPU, + MLXSW_REG_HPKT_ACTION_MIRROR_TO_CPU, + MLXSW_REG_HPKT_ACTION_DISCARD, + MLXSW_REG_HPKT_ACTION_SOFT_DISCARD, + MLXSW_REG_HPKT_ACTION_TRAP_AND_SOFT_DISCARD, + MLXSW_REG_HPKT_ACTION_TRAP_EXCEPTION_TO_CPU, + MLXSW_REG_HPKT_ACTION_SET_FW_DEFAULT = 15, +}; + +/* reg_hpkt_action + * Action to perform on packet when trapped. + * 0 - No action. Forward to CPU based on switching rules. + * 1 - Trap to CPU (CPU receives sole copy). + * 2 - Mirror to CPU (CPU receives a replica of the packet). + * 3 - Discard. + * 4 - Soft discard (allow other traps to act on the packet). + * 5 - Trap and soft discard (allow other traps to overwrite this trap). + * 6 - Trap to CPU (CPU receives sole copy) and count it as error. + * 15 - Restore the firmware's default action. + * Access: RW + * + * Note: Must be set to 0 (forward) for event trap IDs, as they are already + * addressed to the CPU. + */ +MLXSW_ITEM32(reg, hpkt, action, 0x00, 20, 3); + +/* reg_hpkt_trap_group + * Trap group to associate the trap with. + * Access: RW + */ +MLXSW_ITEM32(reg, hpkt, trap_group, 0x00, 12, 6); + +/* reg_hpkt_trap_id + * Trap ID. + * Access: Index + * + * Note: A trap ID can only be associated with a single trap group. The device + * will associate the trap ID with the last trap group configured. + */ +MLXSW_ITEM32(reg, hpkt, trap_id, 0x00, 0, 10); + +enum { + MLXSW_REG_HPKT_CTRL_PACKET_DEFAULT, + MLXSW_REG_HPKT_CTRL_PACKET_NO_BUFFER, + MLXSW_REG_HPKT_CTRL_PACKET_USE_BUFFER, +}; + +/* reg_hpkt_ctrl + * Configure dedicated buffer resources for control packets. + * Ignored by SwitchX-2. + * 0 - Keep factory defaults. + * 1 - Do not use control buffer for this trap ID. + * 2 - Use control buffer for this trap ID. + * Access: RW + */ +MLXSW_ITEM32(reg, hpkt, ctrl, 0x04, 16, 2); + +static inline void mlxsw_reg_hpkt_pack(char *payload, u8 action, u16 trap_id, + enum mlxsw_reg_htgt_trap_group trap_group, + bool is_ctrl) +{ + MLXSW_REG_ZERO(hpkt, payload); + mlxsw_reg_hpkt_ack_set(payload, MLXSW_REG_HPKT_ACK_NOT_REQUIRED); + mlxsw_reg_hpkt_action_set(payload, action); + mlxsw_reg_hpkt_trap_group_set(payload, trap_group); + mlxsw_reg_hpkt_trap_id_set(payload, trap_id); + mlxsw_reg_hpkt_ctrl_set(payload, is_ctrl ? + MLXSW_REG_HPKT_CTRL_PACKET_USE_BUFFER : + MLXSW_REG_HPKT_CTRL_PACKET_NO_BUFFER); +} + +/* RGCR - Router General Configuration Register + * -------------------------------------------- + * The register is used for setting up the router configuration. + */ +#define MLXSW_REG_RGCR_ID 0x8001 +#define MLXSW_REG_RGCR_LEN 0x28 + +MLXSW_REG_DEFINE(rgcr, MLXSW_REG_RGCR_ID, MLXSW_REG_RGCR_LEN); + +/* reg_rgcr_ipv4_en + * IPv4 router enable. + * Access: RW + */ +MLXSW_ITEM32(reg, rgcr, ipv4_en, 0x00, 31, 1); + +/* reg_rgcr_ipv6_en + * IPv6 router enable. + * Access: RW + */ +MLXSW_ITEM32(reg, rgcr, ipv6_en, 0x00, 30, 1); + +/* reg_rgcr_max_router_interfaces + * Defines the maximum number of active router interfaces for all virtual + * routers. + * Access: RW + */ +MLXSW_ITEM32(reg, rgcr, max_router_interfaces, 0x10, 0, 16); + +/* reg_rgcr_usp + * Update switch priority and packet color. + * 0 - Preserve the value of Switch Priority and packet color. + * 1 - Recalculate the value of Switch Priority and packet color. + * Access: RW + * + * Note: Not supported by SwitchX and SwitchX-2. + */ +MLXSW_ITEM32(reg, rgcr, usp, 0x18, 20, 1); + +/* reg_rgcr_pcp_rw + * Indicates how to handle the pcp_rewrite_en value: + * 0 - Preserve the value of pcp_rewrite_en. + * 2 - Disable PCP rewrite. + * 3 - Enable PCP rewrite. + * Access: RW + * + * Note: Not supported by SwitchX and SwitchX-2. + */ +MLXSW_ITEM32(reg, rgcr, pcp_rw, 0x18, 16, 2); + +/* reg_rgcr_activity_dis + * Activity disable: + * 0 - Activity will be set when an entry is hit (default). + * 1 - Activity will not be set when an entry is hit. + * + * Bit 0 - Disable activity bit in Router Algorithmic LPM Unicast Entry + * (RALUE). + * Bit 1 - Disable activity bit in Router Algorithmic LPM Unicast Host + * Entry (RAUHT). + * Bits 2:7 are reserved. + * Access: RW + * + * Note: Not supported by SwitchX, SwitchX-2 and Switch-IB. + */ +MLXSW_ITEM32(reg, rgcr, activity_dis, 0x20, 0, 8); + +static inline void mlxsw_reg_rgcr_pack(char *payload, bool ipv4_en, + bool ipv6_en) +{ + MLXSW_REG_ZERO(rgcr, payload); + mlxsw_reg_rgcr_ipv4_en_set(payload, ipv4_en); + mlxsw_reg_rgcr_ipv6_en_set(payload, ipv6_en); +} + +/* RITR - Router Interface Table Register + * -------------------------------------- + * The register is used to configure the router interface table. + */ +#define MLXSW_REG_RITR_ID 0x8002 +#define MLXSW_REG_RITR_LEN 0x40 + +MLXSW_REG_DEFINE(ritr, MLXSW_REG_RITR_ID, MLXSW_REG_RITR_LEN); + +/* reg_ritr_enable + * Enables routing on the router interface. + * Access: RW + */ +MLXSW_ITEM32(reg, ritr, enable, 0x00, 31, 1); + +/* reg_ritr_ipv4 + * IPv4 routing enable. Enables routing of IPv4 traffic on the router + * interface. + * Access: RW + */ +MLXSW_ITEM32(reg, ritr, ipv4, 0x00, 29, 1); + +/* reg_ritr_ipv6 + * IPv6 routing enable. Enables routing of IPv6 traffic on the router + * interface. + * Access: RW + */ +MLXSW_ITEM32(reg, ritr, ipv6, 0x00, 28, 1); + +/* reg_ritr_ipv4_mc + * IPv4 multicast routing enable. + * Access: RW + */ +MLXSW_ITEM32(reg, ritr, ipv4_mc, 0x00, 27, 1); + +/* reg_ritr_ipv6_mc + * IPv6 multicast routing enable. + * Access: RW + */ +MLXSW_ITEM32(reg, ritr, ipv6_mc, 0x00, 26, 1); + +enum mlxsw_reg_ritr_if_type { + /* VLAN interface. */ + MLXSW_REG_RITR_VLAN_IF, + /* FID interface. */ + MLXSW_REG_RITR_FID_IF, + /* Sub-port interface. */ + MLXSW_REG_RITR_SP_IF, + /* Loopback Interface. */ + MLXSW_REG_RITR_LOOPBACK_IF, +}; + +/* reg_ritr_type + * Router interface type as per enum mlxsw_reg_ritr_if_type. + * Access: RW + */ +MLXSW_ITEM32(reg, ritr, type, 0x00, 23, 3); + +enum { + MLXSW_REG_RITR_RIF_CREATE, + MLXSW_REG_RITR_RIF_DEL, +}; + +/* reg_ritr_op + * Opcode: + * 0 - Create or edit RIF. + * 1 - Delete RIF. + * Reserved for SwitchX-2. For Spectrum, editing of interface properties + * is not supported. An interface must be deleted and re-created in order + * to update properties. + * Access: WO + */ +MLXSW_ITEM32(reg, ritr, op, 0x00, 20, 2); + +/* reg_ritr_rif + * Router interface index. A pointer to the Router Interface Table. + * Access: Index + */ +MLXSW_ITEM32(reg, ritr, rif, 0x00, 0, 16); + +/* reg_ritr_ipv4_fe + * IPv4 Forwarding Enable. + * Enables routing of IPv4 traffic on the router interface. When disabled, + * forwarding is blocked but local traffic (traps and IP2ME) will be enabled. + * Not supported in SwitchX-2. + * Access: RW + */ +MLXSW_ITEM32(reg, ritr, ipv4_fe, 0x04, 29, 1); + +/* reg_ritr_ipv6_fe + * IPv6 Forwarding Enable. + * Enables routing of IPv6 traffic on the router interface. When disabled, + * forwarding is blocked but local traffic (traps and IP2ME) will be enabled. + * Not supported in SwitchX-2. + * Access: RW + */ +MLXSW_ITEM32(reg, ritr, ipv6_fe, 0x04, 28, 1); + +/* reg_ritr_ipv4_mc_fe + * IPv4 Multicast Forwarding Enable. + * When disabled, forwarding is blocked but local traffic (traps and IP to me) + * will be enabled. + * Access: RW + */ +MLXSW_ITEM32(reg, ritr, ipv4_mc_fe, 0x04, 27, 1); + +/* reg_ritr_ipv6_mc_fe + * IPv6 Multicast Forwarding Enable. + * When disabled, forwarding is blocked but local traffic (traps and IP to me) + * will be enabled. + * Access: RW + */ +MLXSW_ITEM32(reg, ritr, ipv6_mc_fe, 0x04, 26, 1); + +/* reg_ritr_lb_en + * Loop-back filter enable for unicast packets. + * If the flag is set then loop-back filter for unicast packets is + * implemented on the RIF. Multicast packets are always subject to + * loop-back filtering. + * Access: RW + */ +MLXSW_ITEM32(reg, ritr, lb_en, 0x04, 24, 1); + +/* reg_ritr_virtual_router + * Virtual router ID associated with the router interface. + * Access: RW + */ +MLXSW_ITEM32(reg, ritr, virtual_router, 0x04, 0, 16); + +/* reg_ritr_mtu + * Router interface MTU. + * Access: RW + */ +MLXSW_ITEM32(reg, ritr, mtu, 0x34, 0, 16); + +/* reg_ritr_if_swid + * Switch partition ID. + * Access: RW + */ +MLXSW_ITEM32(reg, ritr, if_swid, 0x08, 24, 8); + +/* reg_ritr_if_mac_profile_id + * MAC msb profile ID. + * Access: RW + */ +MLXSW_ITEM32(reg, ritr, if_mac_profile_id, 0x10, 16, 4); + +/* reg_ritr_if_mac + * Router interface MAC address. + * In Spectrum, all MAC addresses must have the same 38 MSBits. + * Access: RW + */ +MLXSW_ITEM_BUF(reg, ritr, if_mac, 0x12, 6); + +/* reg_ritr_if_vrrp_id_ipv6 + * VRRP ID for IPv6 + * Note: Reserved for RIF types other than VLAN, FID and Sub-port. + * Access: RW + */ +MLXSW_ITEM32(reg, ritr, if_vrrp_id_ipv6, 0x1C, 8, 8); + +/* reg_ritr_if_vrrp_id_ipv4 + * VRRP ID for IPv4 + * Note: Reserved for RIF types other than VLAN, FID and Sub-port. + * Access: RW + */ +MLXSW_ITEM32(reg, ritr, if_vrrp_id_ipv4, 0x1C, 0, 8); + +/* VLAN Interface */ + +/* reg_ritr_vlan_if_vlan_id + * VLAN ID. + * Access: RW + */ +MLXSW_ITEM32(reg, ritr, vlan_if_vlan_id, 0x08, 0, 12); + +/* reg_ritr_vlan_if_efid + * Egress FID. + * Used to connect the RIF to a bridge. + * Access: RW + * + * Note: Reserved when legacy bridge model is used and on Spectrum-1. + */ +MLXSW_ITEM32(reg, ritr, vlan_if_efid, 0x0C, 0, 16); + +/* FID Interface */ + +/* reg_ritr_fid_if_fid + * Filtering ID. Used to connect a bridge to the router. + * When legacy bridge model is used, only FIDs from the vFID range are + * supported. When unified bridge model is used, this is the egress FID for + * router to bridge. + * Access: RW + */ +MLXSW_ITEM32(reg, ritr, fid_if_fid, 0x08, 0, 16); + +/* Sub-port Interface */ + +/* reg_ritr_sp_if_lag + * LAG indication. When this bit is set the system_port field holds the + * LAG identifier. + * Access: RW + */ +MLXSW_ITEM32(reg, ritr, sp_if_lag, 0x08, 24, 1); + +/* reg_ritr_sp_system_port + * Port unique indentifier. When lag bit is set, this field holds the + * lag_id in bits 0:9. + * Access: RW + */ +MLXSW_ITEM32(reg, ritr, sp_if_system_port, 0x08, 0, 16); + +/* reg_ritr_sp_if_efid + * Egress filtering ID. + * Used to connect the eRIF to a bridge if eRIF-ACL has modified the DMAC or + * the VID. + * Access: RW + * + * Note: Reserved when legacy bridge model is used. + */ +MLXSW_ITEM32(reg, ritr, sp_if_efid, 0x0C, 0, 16); + +/* reg_ritr_sp_if_vid + * VLAN ID. + * Access: RW + */ +MLXSW_ITEM32(reg, ritr, sp_if_vid, 0x18, 0, 12); + +/* Loopback Interface */ + +enum mlxsw_reg_ritr_loopback_protocol { + /* IPinIP IPv4 underlay Unicast */ + MLXSW_REG_RITR_LOOPBACK_PROTOCOL_IPIP_IPV4, + /* IPinIP IPv6 underlay Unicast */ + MLXSW_REG_RITR_LOOPBACK_PROTOCOL_IPIP_IPV6, + /* IPinIP generic - used for Spectrum-2 underlay RIF */ + MLXSW_REG_RITR_LOOPBACK_GENERIC, +}; + +/* reg_ritr_loopback_protocol + * Access: RW + */ +MLXSW_ITEM32(reg, ritr, loopback_protocol, 0x08, 28, 4); + +enum mlxsw_reg_ritr_loopback_ipip_type { + /* Tunnel is IPinIP. */ + MLXSW_REG_RITR_LOOPBACK_IPIP_TYPE_IP_IN_IP, + /* Tunnel is GRE, no key. */ + MLXSW_REG_RITR_LOOPBACK_IPIP_TYPE_IP_IN_GRE_IN_IP, + /* Tunnel is GRE, with a key. */ + MLXSW_REG_RITR_LOOPBACK_IPIP_TYPE_IP_IN_GRE_KEY_IN_IP, +}; + +/* reg_ritr_loopback_ipip_type + * Encapsulation type. + * Access: RW + */ +MLXSW_ITEM32(reg, ritr, loopback_ipip_type, 0x10, 24, 4); + +enum mlxsw_reg_ritr_loopback_ipip_options { + /* The key is defined by gre_key. */ + MLXSW_REG_RITR_LOOPBACK_IPIP_OPTIONS_GRE_KEY_PRESET, +}; + +/* reg_ritr_loopback_ipip_options + * Access: RW + */ +MLXSW_ITEM32(reg, ritr, loopback_ipip_options, 0x10, 20, 4); + +/* reg_ritr_loopback_ipip_uvr + * Underlay Virtual Router ID. + * Range is 0..cap_max_virtual_routers-1. + * Reserved for Spectrum-2. + * Access: RW + */ +MLXSW_ITEM32(reg, ritr, loopback_ipip_uvr, 0x10, 0, 16); + +/* reg_ritr_loopback_ipip_underlay_rif + * Underlay ingress router interface. + * Reserved for Spectrum. + * Access: RW + */ +MLXSW_ITEM32(reg, ritr, loopback_ipip_underlay_rif, 0x14, 0, 16); + +/* reg_ritr_loopback_ipip_usip* + * Encapsulation Underlay source IP. + * Access: RW + */ +MLXSW_ITEM_BUF(reg, ritr, loopback_ipip_usip6, 0x18, 16); +MLXSW_ITEM32(reg, ritr, loopback_ipip_usip4, 0x24, 0, 32); + +/* reg_ritr_loopback_ipip_gre_key + * GRE Key. + * Reserved when ipip_type is not IP_IN_GRE_KEY_IN_IP. + * Access: RW + */ +MLXSW_ITEM32(reg, ritr, loopback_ipip_gre_key, 0x28, 0, 32); + +/* Shared between ingress/egress */ +enum mlxsw_reg_ritr_counter_set_type { + /* No Count. */ + MLXSW_REG_RITR_COUNTER_SET_TYPE_NO_COUNT = 0x0, + /* Basic. Used for router interfaces, counting the following: + * - Error and Discard counters. + * - Unicast, Multicast and Broadcast counters. Sharing the + * same set of counters for the different type of traffic + * (IPv4, IPv6 and mpls). + */ + MLXSW_REG_RITR_COUNTER_SET_TYPE_BASIC = 0x9, +}; + +/* reg_ritr_ingress_counter_index + * Counter Index for flow counter. + * Access: RW + */ +MLXSW_ITEM32(reg, ritr, ingress_counter_index, 0x38, 0, 24); + +/* reg_ritr_ingress_counter_set_type + * Igress Counter Set Type for router interface counter. + * Access: RW + */ +MLXSW_ITEM32(reg, ritr, ingress_counter_set_type, 0x38, 24, 8); + +/* reg_ritr_egress_counter_index + * Counter Index for flow counter. + * Access: RW + */ +MLXSW_ITEM32(reg, ritr, egress_counter_index, 0x3C, 0, 24); + +/* reg_ritr_egress_counter_set_type + * Egress Counter Set Type for router interface counter. + * Access: RW + */ +MLXSW_ITEM32(reg, ritr, egress_counter_set_type, 0x3C, 24, 8); + +static inline void mlxsw_reg_ritr_counter_pack(char *payload, u32 index, + bool enable, bool egress) +{ + enum mlxsw_reg_ritr_counter_set_type set_type; + + if (enable) + set_type = MLXSW_REG_RITR_COUNTER_SET_TYPE_BASIC; + else + set_type = MLXSW_REG_RITR_COUNTER_SET_TYPE_NO_COUNT; + + if (egress) { + mlxsw_reg_ritr_egress_counter_set_type_set(payload, set_type); + mlxsw_reg_ritr_egress_counter_index_set(payload, index); + } else { + mlxsw_reg_ritr_ingress_counter_set_type_set(payload, set_type); + mlxsw_reg_ritr_ingress_counter_index_set(payload, index); + } +} + +static inline void mlxsw_reg_ritr_rif_pack(char *payload, u16 rif) +{ + MLXSW_REG_ZERO(ritr, payload); + mlxsw_reg_ritr_rif_set(payload, rif); +} + +static inline void mlxsw_reg_ritr_sp_if_pack(char *payload, bool lag, + u16 system_port, u16 efid, u16 vid) +{ + mlxsw_reg_ritr_sp_if_lag_set(payload, lag); + mlxsw_reg_ritr_sp_if_system_port_set(payload, system_port); + mlxsw_reg_ritr_sp_if_efid_set(payload, efid); + mlxsw_reg_ritr_sp_if_vid_set(payload, vid); +} + +static inline void mlxsw_reg_ritr_pack(char *payload, bool enable, + enum mlxsw_reg_ritr_if_type type, + u16 rif, u16 vr_id, u16 mtu) +{ + bool op = enable ? MLXSW_REG_RITR_RIF_CREATE : MLXSW_REG_RITR_RIF_DEL; + + MLXSW_REG_ZERO(ritr, payload); + mlxsw_reg_ritr_enable_set(payload, enable); + mlxsw_reg_ritr_ipv4_set(payload, 1); + mlxsw_reg_ritr_ipv6_set(payload, 1); + mlxsw_reg_ritr_ipv4_mc_set(payload, 1); + mlxsw_reg_ritr_ipv6_mc_set(payload, 1); + mlxsw_reg_ritr_type_set(payload, type); + mlxsw_reg_ritr_op_set(payload, op); + mlxsw_reg_ritr_rif_set(payload, rif); + mlxsw_reg_ritr_ipv4_fe_set(payload, 1); + mlxsw_reg_ritr_ipv6_fe_set(payload, 1); + mlxsw_reg_ritr_ipv4_mc_fe_set(payload, 1); + mlxsw_reg_ritr_ipv6_mc_fe_set(payload, 1); + mlxsw_reg_ritr_lb_en_set(payload, 1); + mlxsw_reg_ritr_virtual_router_set(payload, vr_id); + mlxsw_reg_ritr_mtu_set(payload, mtu); +} + +static inline void mlxsw_reg_ritr_mac_pack(char *payload, const char *mac) +{ + mlxsw_reg_ritr_if_mac_memcpy_to(payload, mac); +} + +static inline void +mlxsw_reg_ritr_vlan_if_pack(char *payload, bool enable, u16 rif, u16 vr_id, + u16 mtu, const char *mac, u8 mac_profile_id, + u16 vlan_id, u16 efid) +{ + enum mlxsw_reg_ritr_if_type type = MLXSW_REG_RITR_VLAN_IF; + + mlxsw_reg_ritr_pack(payload, enable, type, rif, vr_id, mtu); + mlxsw_reg_ritr_if_mac_memcpy_to(payload, mac); + mlxsw_reg_ritr_if_mac_profile_id_set(payload, mac_profile_id); + mlxsw_reg_ritr_vlan_if_vlan_id_set(payload, vlan_id); + mlxsw_reg_ritr_vlan_if_efid_set(payload, efid); +} + +static inline void +mlxsw_reg_ritr_loopback_ipip_common_pack(char *payload, + enum mlxsw_reg_ritr_loopback_ipip_type ipip_type, + enum mlxsw_reg_ritr_loopback_ipip_options options, + u16 uvr_id, u16 underlay_rif, u32 gre_key) +{ + mlxsw_reg_ritr_loopback_ipip_type_set(payload, ipip_type); + mlxsw_reg_ritr_loopback_ipip_options_set(payload, options); + mlxsw_reg_ritr_loopback_ipip_uvr_set(payload, uvr_id); + mlxsw_reg_ritr_loopback_ipip_underlay_rif_set(payload, underlay_rif); + mlxsw_reg_ritr_loopback_ipip_gre_key_set(payload, gre_key); +} + +static inline void +mlxsw_reg_ritr_loopback_ipip4_pack(char *payload, + enum mlxsw_reg_ritr_loopback_ipip_type ipip_type, + enum mlxsw_reg_ritr_loopback_ipip_options options, + u16 uvr_id, u16 underlay_rif, u32 usip, u32 gre_key) +{ + mlxsw_reg_ritr_loopback_protocol_set(payload, + MLXSW_REG_RITR_LOOPBACK_PROTOCOL_IPIP_IPV4); + mlxsw_reg_ritr_loopback_ipip_common_pack(payload, ipip_type, options, + uvr_id, underlay_rif, gre_key); + mlxsw_reg_ritr_loopback_ipip_usip4_set(payload, usip); +} + +static inline void +mlxsw_reg_ritr_loopback_ipip6_pack(char *payload, + enum mlxsw_reg_ritr_loopback_ipip_type ipip_type, + enum mlxsw_reg_ritr_loopback_ipip_options options, + u16 uvr_id, u16 underlay_rif, + const struct in6_addr *usip, u32 gre_key) +{ + enum mlxsw_reg_ritr_loopback_protocol protocol = + MLXSW_REG_RITR_LOOPBACK_PROTOCOL_IPIP_IPV6; + + mlxsw_reg_ritr_loopback_protocol_set(payload, protocol); + mlxsw_reg_ritr_loopback_ipip_common_pack(payload, ipip_type, options, + uvr_id, underlay_rif, gre_key); + mlxsw_reg_ritr_loopback_ipip_usip6_memcpy_to(payload, + (const char *)usip); +} + +/* RTAR - Router TCAM Allocation Register + * -------------------------------------- + * This register is used for allocation of regions in the TCAM table. + */ +#define MLXSW_REG_RTAR_ID 0x8004 +#define MLXSW_REG_RTAR_LEN 0x20 + +MLXSW_REG_DEFINE(rtar, MLXSW_REG_RTAR_ID, MLXSW_REG_RTAR_LEN); + +enum mlxsw_reg_rtar_op { + MLXSW_REG_RTAR_OP_ALLOCATE, + MLXSW_REG_RTAR_OP_RESIZE, + MLXSW_REG_RTAR_OP_DEALLOCATE, +}; + +/* reg_rtar_op + * Access: WO + */ +MLXSW_ITEM32(reg, rtar, op, 0x00, 28, 4); + +enum mlxsw_reg_rtar_key_type { + MLXSW_REG_RTAR_KEY_TYPE_IPV4_MULTICAST = 1, + MLXSW_REG_RTAR_KEY_TYPE_IPV6_MULTICAST = 3 +}; + +/* reg_rtar_key_type + * TCAM key type for the region. + * Access: WO + */ +MLXSW_ITEM32(reg, rtar, key_type, 0x00, 0, 8); + +/* reg_rtar_region_size + * TCAM region size. When allocating/resizing this is the requested + * size, the response is the actual size. + * Note: Actual size may be larger than requested. + * Reserved for op = Deallocate + * Access: WO + */ +MLXSW_ITEM32(reg, rtar, region_size, 0x04, 0, 16); + +static inline void mlxsw_reg_rtar_pack(char *payload, + enum mlxsw_reg_rtar_op op, + enum mlxsw_reg_rtar_key_type key_type, + u16 region_size) +{ + MLXSW_REG_ZERO(rtar, payload); + mlxsw_reg_rtar_op_set(payload, op); + mlxsw_reg_rtar_key_type_set(payload, key_type); + mlxsw_reg_rtar_region_size_set(payload, region_size); +} + +/* RATR - Router Adjacency Table Register + * -------------------------------------- + * The RATR register is used to configure the Router Adjacency (next-hop) + * Table. + */ +#define MLXSW_REG_RATR_ID 0x8008 +#define MLXSW_REG_RATR_LEN 0x2C + +MLXSW_REG_DEFINE(ratr, MLXSW_REG_RATR_ID, MLXSW_REG_RATR_LEN); + +enum mlxsw_reg_ratr_op { + /* Read */ + MLXSW_REG_RATR_OP_QUERY_READ = 0, + /* Read and clear activity */ + MLXSW_REG_RATR_OP_QUERY_READ_CLEAR = 2, + /* Write Adjacency entry */ + MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY = 1, + /* Write Adjacency entry only if the activity is cleared. + * The write may not succeed if the activity is set. There is not + * direct feedback if the write has succeeded or not, however + * the get will reveal the actual entry (SW can compare the get + * response to the set command). + */ + MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY_ON_ACTIVITY = 3, +}; + +/* reg_ratr_op + * Note that Write operation may also be used for updating + * counter_set_type and counter_index. In this case all other + * fields must not be updated. + * Access: OP + */ +MLXSW_ITEM32(reg, ratr, op, 0x00, 28, 4); + +/* reg_ratr_v + * Valid bit. Indicates if the adjacency entry is valid. + * Note: the device may need some time before reusing an invalidated + * entry. During this time the entry can not be reused. It is + * recommended to use another entry before reusing an invalidated + * entry (e.g. software can put it at the end of the list for + * reusing). Trying to access an invalidated entry not yet cleared + * by the device results with failure indicating "Try Again" status. + * When valid is '0' then egress_router_interface,trap_action, + * adjacency_parameters and counters are reserved + * Access: RW + */ +MLXSW_ITEM32(reg, ratr, v, 0x00, 24, 1); + +/* reg_ratr_a + * Activity. Set for new entries. Set if a packet lookup has hit on + * the specific entry. To clear the a bit, use "clear activity". + * Access: RO + */ +MLXSW_ITEM32(reg, ratr, a, 0x00, 16, 1); + +enum mlxsw_reg_ratr_type { + /* Ethernet */ + MLXSW_REG_RATR_TYPE_ETHERNET, + /* IPoIB Unicast without GRH. + * Reserved for Spectrum. + */ + MLXSW_REG_RATR_TYPE_IPOIB_UC, + /* IPoIB Unicast with GRH. Supported only in table 0 (Ethernet unicast + * adjacency). + * Reserved for Spectrum. + */ + MLXSW_REG_RATR_TYPE_IPOIB_UC_W_GRH, + /* IPoIB Multicast. + * Reserved for Spectrum. + */ + MLXSW_REG_RATR_TYPE_IPOIB_MC, + /* MPLS. + * Reserved for SwitchX/-2. + */ + MLXSW_REG_RATR_TYPE_MPLS, + /* IPinIP Encap. + * Reserved for SwitchX/-2. + */ + MLXSW_REG_RATR_TYPE_IPIP, +}; + +/* reg_ratr_type + * Adjacency entry type. + * Access: RW + */ +MLXSW_ITEM32(reg, ratr, type, 0x04, 28, 4); + +/* reg_ratr_adjacency_index_low + * Bits 15:0 of index into the adjacency table. + * For SwitchX and SwitchX-2, the adjacency table is linear and + * used for adjacency entries only. + * For Spectrum, the index is to the KVD linear. + * Access: Index + */ +MLXSW_ITEM32(reg, ratr, adjacency_index_low, 0x04, 0, 16); + +/* reg_ratr_egress_router_interface + * Range is 0 .. cap_max_router_interfaces - 1 + * Access: RW + */ +MLXSW_ITEM32(reg, ratr, egress_router_interface, 0x08, 0, 16); + +enum mlxsw_reg_ratr_trap_action { + MLXSW_REG_RATR_TRAP_ACTION_NOP, + MLXSW_REG_RATR_TRAP_ACTION_TRAP, + MLXSW_REG_RATR_TRAP_ACTION_MIRROR_TO_CPU, + MLXSW_REG_RATR_TRAP_ACTION_MIRROR, + MLXSW_REG_RATR_TRAP_ACTION_DISCARD_ERRORS, +}; + +/* reg_ratr_trap_action + * see mlxsw_reg_ratr_trap_action + * Access: RW + */ +MLXSW_ITEM32(reg, ratr, trap_action, 0x0C, 28, 4); + +/* reg_ratr_adjacency_index_high + * Bits 23:16 of the adjacency_index. + * Access: Index + */ +MLXSW_ITEM32(reg, ratr, adjacency_index_high, 0x0C, 16, 8); + +enum mlxsw_reg_ratr_trap_id { + MLXSW_REG_RATR_TRAP_ID_RTR_EGRESS0, + MLXSW_REG_RATR_TRAP_ID_RTR_EGRESS1, +}; + +/* reg_ratr_trap_id + * Trap ID to be reported to CPU. + * Trap-ID is RTR_EGRESS0 or RTR_EGRESS1. + * For trap_action of NOP, MIRROR and DISCARD_ERROR + * Access: RW + */ +MLXSW_ITEM32(reg, ratr, trap_id, 0x0C, 0, 8); + +/* reg_ratr_eth_destination_mac + * MAC address of the destination next-hop. + * Access: RW + */ +MLXSW_ITEM_BUF(reg, ratr, eth_destination_mac, 0x12, 6); + +enum mlxsw_reg_ratr_ipip_type { + /* IPv4, address set by mlxsw_reg_ratr_ipip_ipv4_udip. */ + MLXSW_REG_RATR_IPIP_TYPE_IPV4, + /* IPv6, address set by mlxsw_reg_ratr_ipip_ipv6_ptr. */ + MLXSW_REG_RATR_IPIP_TYPE_IPV6, +}; + +/* reg_ratr_ipip_type + * Underlay destination ip type. + * Note: the type field must match the protocol of the router interface. + * Access: RW + */ +MLXSW_ITEM32(reg, ratr, ipip_type, 0x10, 16, 4); + +/* reg_ratr_ipip_ipv4_udip + * Underlay ipv4 dip. + * Reserved when ipip_type is IPv6. + * Access: RW + */ +MLXSW_ITEM32(reg, ratr, ipip_ipv4_udip, 0x18, 0, 32); + +/* reg_ratr_ipip_ipv6_ptr + * Pointer to IPv6 underlay destination ip address. + * For Spectrum: Pointer to KVD linear space. + * Access: RW + */ +MLXSW_ITEM32(reg, ratr, ipip_ipv6_ptr, 0x1C, 0, 24); + +enum mlxsw_reg_flow_counter_set_type { + /* No count */ + MLXSW_REG_FLOW_COUNTER_SET_TYPE_NO_COUNT = 0x00, + /* Count packets and bytes */ + MLXSW_REG_FLOW_COUNTER_SET_TYPE_PACKETS_BYTES = 0x03, + /* Count only packets */ + MLXSW_REG_FLOW_COUNTER_SET_TYPE_PACKETS = 0x05, +}; + +/* reg_ratr_counter_set_type + * Counter set type for flow counters + * Access: RW + */ +MLXSW_ITEM32(reg, ratr, counter_set_type, 0x28, 24, 8); + +/* reg_ratr_counter_index + * Counter index for flow counters + * Access: RW + */ +MLXSW_ITEM32(reg, ratr, counter_index, 0x28, 0, 24); + +static inline void +mlxsw_reg_ratr_pack(char *payload, + enum mlxsw_reg_ratr_op op, bool valid, + enum mlxsw_reg_ratr_type type, + u32 adjacency_index, u16 egress_rif) +{ + MLXSW_REG_ZERO(ratr, payload); + mlxsw_reg_ratr_op_set(payload, op); + mlxsw_reg_ratr_v_set(payload, valid); + mlxsw_reg_ratr_type_set(payload, type); + mlxsw_reg_ratr_adjacency_index_low_set(payload, adjacency_index); + mlxsw_reg_ratr_adjacency_index_high_set(payload, adjacency_index >> 16); + mlxsw_reg_ratr_egress_router_interface_set(payload, egress_rif); +} + +static inline void mlxsw_reg_ratr_eth_entry_pack(char *payload, + const char *dest_mac) +{ + mlxsw_reg_ratr_eth_destination_mac_memcpy_to(payload, dest_mac); +} + +static inline void mlxsw_reg_ratr_ipip4_entry_pack(char *payload, u32 ipv4_udip) +{ + mlxsw_reg_ratr_ipip_type_set(payload, MLXSW_REG_RATR_IPIP_TYPE_IPV4); + mlxsw_reg_ratr_ipip_ipv4_udip_set(payload, ipv4_udip); +} + +static inline void mlxsw_reg_ratr_ipip6_entry_pack(char *payload, u32 ipv6_ptr) +{ + mlxsw_reg_ratr_ipip_type_set(payload, MLXSW_REG_RATR_IPIP_TYPE_IPV6); + mlxsw_reg_ratr_ipip_ipv6_ptr_set(payload, ipv6_ptr); +} + +static inline void mlxsw_reg_ratr_counter_pack(char *payload, u64 counter_index, + bool counter_enable) +{ + enum mlxsw_reg_flow_counter_set_type set_type; + + if (counter_enable) + set_type = MLXSW_REG_FLOW_COUNTER_SET_TYPE_PACKETS_BYTES; + else + set_type = MLXSW_REG_FLOW_COUNTER_SET_TYPE_NO_COUNT; + + mlxsw_reg_ratr_counter_index_set(payload, counter_index); + mlxsw_reg_ratr_counter_set_type_set(payload, set_type); +} + +/* RDPM - Router DSCP to Priority Mapping + * -------------------------------------- + * Controls the mapping from DSCP field to switch priority on routed packets + */ +#define MLXSW_REG_RDPM_ID 0x8009 +#define MLXSW_REG_RDPM_BASE_LEN 0x00 +#define MLXSW_REG_RDPM_DSCP_ENTRY_REC_LEN 0x01 +#define MLXSW_REG_RDPM_DSCP_ENTRY_REC_MAX_COUNT 64 +#define MLXSW_REG_RDPM_LEN 0x40 +#define MLXSW_REG_RDPM_LAST_ENTRY (MLXSW_REG_RDPM_BASE_LEN + \ + MLXSW_REG_RDPM_LEN - \ + MLXSW_REG_RDPM_DSCP_ENTRY_REC_LEN) + +MLXSW_REG_DEFINE(rdpm, MLXSW_REG_RDPM_ID, MLXSW_REG_RDPM_LEN); + +/* reg_dscp_entry_e + * Enable update of the specific entry + * Access: Index + */ +MLXSW_ITEM8_INDEXED(reg, rdpm, dscp_entry_e, MLXSW_REG_RDPM_LAST_ENTRY, 7, 1, + -MLXSW_REG_RDPM_DSCP_ENTRY_REC_LEN, 0x00, false); + +/* reg_dscp_entry_prio + * Switch Priority + * Access: RW + */ +MLXSW_ITEM8_INDEXED(reg, rdpm, dscp_entry_prio, MLXSW_REG_RDPM_LAST_ENTRY, 0, 4, + -MLXSW_REG_RDPM_DSCP_ENTRY_REC_LEN, 0x00, false); + +static inline void mlxsw_reg_rdpm_pack(char *payload, unsigned short index, + u8 prio) +{ + mlxsw_reg_rdpm_dscp_entry_e_set(payload, index, 1); + mlxsw_reg_rdpm_dscp_entry_prio_set(payload, index, prio); +} + +/* RICNT - Router Interface Counter Register + * ----------------------------------------- + * The RICNT register retrieves per port performance counters + */ +#define MLXSW_REG_RICNT_ID 0x800B +#define MLXSW_REG_RICNT_LEN 0x100 + +MLXSW_REG_DEFINE(ricnt, MLXSW_REG_RICNT_ID, MLXSW_REG_RICNT_LEN); + +/* reg_ricnt_counter_index + * Counter index + * Access: RW + */ +MLXSW_ITEM32(reg, ricnt, counter_index, 0x04, 0, 24); + +enum mlxsw_reg_ricnt_counter_set_type { + /* No Count. */ + MLXSW_REG_RICNT_COUNTER_SET_TYPE_NO_COUNT = 0x00, + /* Basic. Used for router interfaces, counting the following: + * - Error and Discard counters. + * - Unicast, Multicast and Broadcast counters. Sharing the + * same set of counters for the different type of traffic + * (IPv4, IPv6 and mpls). + */ + MLXSW_REG_RICNT_COUNTER_SET_TYPE_BASIC = 0x09, +}; + +/* reg_ricnt_counter_set_type + * Counter Set Type for router interface counter + * Access: RW + */ +MLXSW_ITEM32(reg, ricnt, counter_set_type, 0x04, 24, 8); + +enum mlxsw_reg_ricnt_opcode { + /* Nop. Supported only for read access*/ + MLXSW_REG_RICNT_OPCODE_NOP = 0x00, + /* Clear. Setting the clr bit will reset the counter value for + * all counters of the specified Router Interface. + */ + MLXSW_REG_RICNT_OPCODE_CLEAR = 0x08, +}; + +/* reg_ricnt_opcode + * Opcode + * Access: RW + */ +MLXSW_ITEM32(reg, ricnt, op, 0x00, 28, 4); + +/* reg_ricnt_good_unicast_packets + * good unicast packets. + * Access: RW + */ +MLXSW_ITEM64(reg, ricnt, good_unicast_packets, 0x08, 0, 64); + +/* reg_ricnt_good_multicast_packets + * good multicast packets. + * Access: RW + */ +MLXSW_ITEM64(reg, ricnt, good_multicast_packets, 0x10, 0, 64); + +/* reg_ricnt_good_broadcast_packets + * good broadcast packets + * Access: RW + */ +MLXSW_ITEM64(reg, ricnt, good_broadcast_packets, 0x18, 0, 64); + +/* reg_ricnt_good_unicast_bytes + * A count of L3 data and padding octets not including L2 headers + * for good unicast frames. + * Access: RW + */ +MLXSW_ITEM64(reg, ricnt, good_unicast_bytes, 0x20, 0, 64); + +/* reg_ricnt_good_multicast_bytes + * A count of L3 data and padding octets not including L2 headers + * for good multicast frames. + * Access: RW + */ +MLXSW_ITEM64(reg, ricnt, good_multicast_bytes, 0x28, 0, 64); + +/* reg_ritr_good_broadcast_bytes + * A count of L3 data and padding octets not including L2 headers + * for good broadcast frames. + * Access: RW + */ +MLXSW_ITEM64(reg, ricnt, good_broadcast_bytes, 0x30, 0, 64); + +/* reg_ricnt_error_packets + * A count of errored frames that do not pass the router checks. + * Access: RW + */ +MLXSW_ITEM64(reg, ricnt, error_packets, 0x38, 0, 64); + +/* reg_ricnt_discrad_packets + * A count of non-errored frames that do not pass the router checks. + * Access: RW + */ +MLXSW_ITEM64(reg, ricnt, discard_packets, 0x40, 0, 64); + +/* reg_ricnt_error_bytes + * A count of L3 data and padding octets not including L2 headers + * for errored frames. + * Access: RW + */ +MLXSW_ITEM64(reg, ricnt, error_bytes, 0x48, 0, 64); + +/* reg_ricnt_discard_bytes + * A count of L3 data and padding octets not including L2 headers + * for non-errored frames that do not pass the router checks. + * Access: RW + */ +MLXSW_ITEM64(reg, ricnt, discard_bytes, 0x50, 0, 64); + +static inline void mlxsw_reg_ricnt_pack(char *payload, u32 index, + enum mlxsw_reg_ricnt_opcode op) +{ + MLXSW_REG_ZERO(ricnt, payload); + mlxsw_reg_ricnt_op_set(payload, op); + mlxsw_reg_ricnt_counter_index_set(payload, index); + mlxsw_reg_ricnt_counter_set_type_set(payload, + MLXSW_REG_RICNT_COUNTER_SET_TYPE_BASIC); +} + +/* RRCR - Router Rules Copy Register Layout + * ---------------------------------------- + * This register is used for moving and copying route entry rules. + */ +#define MLXSW_REG_RRCR_ID 0x800F +#define MLXSW_REG_RRCR_LEN 0x24 + +MLXSW_REG_DEFINE(rrcr, MLXSW_REG_RRCR_ID, MLXSW_REG_RRCR_LEN); + +enum mlxsw_reg_rrcr_op { + /* Move rules */ + MLXSW_REG_RRCR_OP_MOVE, + /* Copy rules */ + MLXSW_REG_RRCR_OP_COPY, +}; + +/* reg_rrcr_op + * Access: WO + */ +MLXSW_ITEM32(reg, rrcr, op, 0x00, 28, 4); + +/* reg_rrcr_offset + * Offset within the region from which to copy/move. + * Access: Index + */ +MLXSW_ITEM32(reg, rrcr, offset, 0x00, 0, 16); + +/* reg_rrcr_size + * The number of rules to copy/move. + * Access: WO + */ +MLXSW_ITEM32(reg, rrcr, size, 0x04, 0, 16); + +/* reg_rrcr_table_id + * Identifier of the table on which to perform the operation. Encoding is the + * same as in RTAR.key_type + * Access: Index + */ +MLXSW_ITEM32(reg, rrcr, table_id, 0x10, 0, 4); + +/* reg_rrcr_dest_offset + * Offset within the region to which to copy/move + * Access: Index + */ +MLXSW_ITEM32(reg, rrcr, dest_offset, 0x20, 0, 16); + +static inline void mlxsw_reg_rrcr_pack(char *payload, enum mlxsw_reg_rrcr_op op, + u16 offset, u16 size, + enum mlxsw_reg_rtar_key_type table_id, + u16 dest_offset) +{ + MLXSW_REG_ZERO(rrcr, payload); + mlxsw_reg_rrcr_op_set(payload, op); + mlxsw_reg_rrcr_offset_set(payload, offset); + mlxsw_reg_rrcr_size_set(payload, size); + mlxsw_reg_rrcr_table_id_set(payload, table_id); + mlxsw_reg_rrcr_dest_offset_set(payload, dest_offset); +} + +/* RALTA - Router Algorithmic LPM Tree Allocation Register + * ------------------------------------------------------- + * RALTA is used to allocate the LPM trees of the SHSPM method. + */ +#define MLXSW_REG_RALTA_ID 0x8010 +#define MLXSW_REG_RALTA_LEN 0x04 + +MLXSW_REG_DEFINE(ralta, MLXSW_REG_RALTA_ID, MLXSW_REG_RALTA_LEN); + +/* reg_ralta_op + * opcode (valid for Write, must be 0 on Read) + * 0 - allocate a tree + * 1 - deallocate a tree + * Access: OP + */ +MLXSW_ITEM32(reg, ralta, op, 0x00, 28, 2); + +enum mlxsw_reg_ralxx_protocol { + MLXSW_REG_RALXX_PROTOCOL_IPV4, + MLXSW_REG_RALXX_PROTOCOL_IPV6, +}; + +/* reg_ralta_protocol + * Protocol. + * Deallocation opcode: Reserved. + * Access: RW + */ +MLXSW_ITEM32(reg, ralta, protocol, 0x00, 24, 4); + +/* reg_ralta_tree_id + * An identifier (numbered from 1..cap_shspm_max_trees-1) representing + * the tree identifier (managed by software). + * Note that tree_id 0 is allocated for a default-route tree. + * Access: Index + */ +MLXSW_ITEM32(reg, ralta, tree_id, 0x00, 0, 8); + +static inline void mlxsw_reg_ralta_pack(char *payload, bool alloc, + enum mlxsw_reg_ralxx_protocol protocol, + u8 tree_id) +{ + MLXSW_REG_ZERO(ralta, payload); + mlxsw_reg_ralta_op_set(payload, !alloc); + mlxsw_reg_ralta_protocol_set(payload, protocol); + mlxsw_reg_ralta_tree_id_set(payload, tree_id); +} + +/* RALST - Router Algorithmic LPM Structure Tree Register + * ------------------------------------------------------ + * RALST is used to set and query the structure of an LPM tree. + * The structure of the tree must be sorted as a sorted binary tree, while + * each node is a bin that is tagged as the length of the prefixes the lookup + * will refer to. Therefore, bin X refers to a set of entries with prefixes + * of X bits to match with the destination address. The bin 0 indicates + * the default action, when there is no match of any prefix. + */ +#define MLXSW_REG_RALST_ID 0x8011 +#define MLXSW_REG_RALST_LEN 0x104 + +MLXSW_REG_DEFINE(ralst, MLXSW_REG_RALST_ID, MLXSW_REG_RALST_LEN); + +/* reg_ralst_root_bin + * The bin number of the root bin. + * 0<root_bin=<(length of IP address) + * For a default-route tree configure 0xff + * Access: RW + */ +MLXSW_ITEM32(reg, ralst, root_bin, 0x00, 16, 8); + +/* reg_ralst_tree_id + * Tree identifier numbered from 1..(cap_shspm_max_trees-1). + * Access: Index + */ +MLXSW_ITEM32(reg, ralst, tree_id, 0x00, 0, 8); + +#define MLXSW_REG_RALST_BIN_NO_CHILD 0xff +#define MLXSW_REG_RALST_BIN_OFFSET 0x04 +#define MLXSW_REG_RALST_BIN_COUNT 128 + +/* reg_ralst_left_child_bin + * Holding the children of the bin according to the stored tree's structure. + * For trees composed of less than 4 blocks, the bins in excess are reserved. + * Note that tree_id 0 is allocated for a default-route tree, bins are 0xff + * Access: RW + */ +MLXSW_ITEM16_INDEXED(reg, ralst, left_child_bin, 0x04, 8, 8, 0x02, 0x00, false); + +/* reg_ralst_right_child_bin + * Holding the children of the bin according to the stored tree's structure. + * For trees composed of less than 4 blocks, the bins in excess are reserved. + * Note that tree_id 0 is allocated for a default-route tree, bins are 0xff + * Access: RW + */ +MLXSW_ITEM16_INDEXED(reg, ralst, right_child_bin, 0x04, 0, 8, 0x02, 0x00, + false); + +static inline void mlxsw_reg_ralst_pack(char *payload, u8 root_bin, u8 tree_id) +{ + MLXSW_REG_ZERO(ralst, payload); + + /* Initialize all bins to have no left or right child */ + memset(payload + MLXSW_REG_RALST_BIN_OFFSET, + MLXSW_REG_RALST_BIN_NO_CHILD, MLXSW_REG_RALST_BIN_COUNT * 2); + + mlxsw_reg_ralst_root_bin_set(payload, root_bin); + mlxsw_reg_ralst_tree_id_set(payload, tree_id); +} + +static inline void mlxsw_reg_ralst_bin_pack(char *payload, u8 bin_number, + u8 left_child_bin, + u8 right_child_bin) +{ + int bin_index = bin_number - 1; + + mlxsw_reg_ralst_left_child_bin_set(payload, bin_index, left_child_bin); + mlxsw_reg_ralst_right_child_bin_set(payload, bin_index, + right_child_bin); +} + +/* RALTB - Router Algorithmic LPM Tree Binding Register + * ---------------------------------------------------- + * RALTB is used to bind virtual router and protocol to an allocated LPM tree. + */ +#define MLXSW_REG_RALTB_ID 0x8012 +#define MLXSW_REG_RALTB_LEN 0x04 + +MLXSW_REG_DEFINE(raltb, MLXSW_REG_RALTB_ID, MLXSW_REG_RALTB_LEN); + +/* reg_raltb_virtual_router + * Virtual Router ID + * Range is 0..cap_max_virtual_routers-1 + * Access: Index + */ +MLXSW_ITEM32(reg, raltb, virtual_router, 0x00, 16, 16); + +/* reg_raltb_protocol + * Protocol. + * Access: Index + */ +MLXSW_ITEM32(reg, raltb, protocol, 0x00, 12, 4); + +/* reg_raltb_tree_id + * Tree to be used for the {virtual_router, protocol} + * Tree identifier numbered from 1..(cap_shspm_max_trees-1). + * By default, all Unicast IPv4 and IPv6 are bound to tree_id 0. + * Access: RW + */ +MLXSW_ITEM32(reg, raltb, tree_id, 0x00, 0, 8); + +static inline void mlxsw_reg_raltb_pack(char *payload, u16 virtual_router, + enum mlxsw_reg_ralxx_protocol protocol, + u8 tree_id) +{ + MLXSW_REG_ZERO(raltb, payload); + mlxsw_reg_raltb_virtual_router_set(payload, virtual_router); + mlxsw_reg_raltb_protocol_set(payload, protocol); + mlxsw_reg_raltb_tree_id_set(payload, tree_id); +} + +/* RALUE - Router Algorithmic LPM Unicast Entry Register + * ----------------------------------------------------- + * RALUE is used to configure and query LPM entries that serve + * the Unicast protocols. + */ +#define MLXSW_REG_RALUE_ID 0x8013 +#define MLXSW_REG_RALUE_LEN 0x38 + +MLXSW_REG_DEFINE(ralue, MLXSW_REG_RALUE_ID, MLXSW_REG_RALUE_LEN); + +/* reg_ralue_protocol + * Protocol. + * Access: Index + */ +MLXSW_ITEM32(reg, ralue, protocol, 0x00, 24, 4); + +enum mlxsw_reg_ralue_op { + /* Read operation. If entry doesn't exist, the operation fails. */ + MLXSW_REG_RALUE_OP_QUERY_READ = 0, + /* Clear on read operation. Used to read entry and + * clear Activity bit. + */ + MLXSW_REG_RALUE_OP_QUERY_CLEAR = 1, + /* Write operation. Used to write a new entry to the table. All RW + * fields are written for new entry. Activity bit is set + * for new entries. + */ + MLXSW_REG_RALUE_OP_WRITE_WRITE = 0, + /* Update operation. Used to update an existing route entry and + * only update the RW fields that are detailed in the field + * op_u_mask. If entry doesn't exist, the operation fails. + */ + MLXSW_REG_RALUE_OP_WRITE_UPDATE = 1, + /* Clear activity. The Activity bit (the field a) is cleared + * for the entry. + */ + MLXSW_REG_RALUE_OP_WRITE_CLEAR = 2, + /* Delete operation. Used to delete an existing entry. If entry + * doesn't exist, the operation fails. + */ + MLXSW_REG_RALUE_OP_WRITE_DELETE = 3, +}; + +/* reg_ralue_op + * Operation. + * Access: OP + */ +MLXSW_ITEM32(reg, ralue, op, 0x00, 20, 3); + +/* reg_ralue_a + * Activity. Set for new entries. Set if a packet lookup has hit on the + * specific entry, only if the entry is a route. To clear the a bit, use + * "clear activity" op. + * Enabled by activity_dis in RGCR + * Access: RO + */ +MLXSW_ITEM32(reg, ralue, a, 0x00, 16, 1); + +/* reg_ralue_virtual_router + * Virtual Router ID + * Range is 0..cap_max_virtual_routers-1 + * Access: Index + */ +MLXSW_ITEM32(reg, ralue, virtual_router, 0x04, 16, 16); + +#define MLXSW_REG_RALUE_OP_U_MASK_ENTRY_TYPE BIT(0) +#define MLXSW_REG_RALUE_OP_U_MASK_BMP_LEN BIT(1) +#define MLXSW_REG_RALUE_OP_U_MASK_ACTION BIT(2) + +/* reg_ralue_op_u_mask + * opcode update mask. + * On read operation, this field is reserved. + * This field is valid for update opcode, otherwise - reserved. + * This field is a bitmask of the fields that should be updated. + * Access: WO + */ +MLXSW_ITEM32(reg, ralue, op_u_mask, 0x04, 8, 3); + +/* reg_ralue_prefix_len + * Number of bits in the prefix of the LPM route. + * Note that for IPv6 prefixes, if prefix_len>64 the entry consumes + * two entries in the physical HW table. + * Access: Index + */ +MLXSW_ITEM32(reg, ralue, prefix_len, 0x08, 0, 8); + +/* reg_ralue_dip* + * The prefix of the route or of the marker that the object of the LPM + * is compared with. The most significant bits of the dip are the prefix. + * The least significant bits must be '0' if the prefix_len is smaller + * than 128 for IPv6 or smaller than 32 for IPv4. + * IPv4 address uses bits dip[31:0] and bits dip[127:32] are reserved. + * Access: Index + */ +MLXSW_ITEM32(reg, ralue, dip4, 0x18, 0, 32); +MLXSW_ITEM_BUF(reg, ralue, dip6, 0x0C, 16); + +enum mlxsw_reg_ralue_entry_type { + MLXSW_REG_RALUE_ENTRY_TYPE_MARKER_ENTRY = 1, + MLXSW_REG_RALUE_ENTRY_TYPE_ROUTE_ENTRY = 2, + MLXSW_REG_RALUE_ENTRY_TYPE_MARKER_AND_ROUTE_ENTRY = 3, +}; + +/* reg_ralue_entry_type + * Entry type. + * Note - for Marker entries, the action_type and action fields are reserved. + * Access: RW + */ +MLXSW_ITEM32(reg, ralue, entry_type, 0x1C, 30, 2); + +/* reg_ralue_bmp_len + * The best match prefix length in the case that there is no match for + * longer prefixes. + * If (entry_type != MARKER_ENTRY), bmp_len must be equal to prefix_len + * Note for any update operation with entry_type modification this + * field must be set. + * Access: RW + */ +MLXSW_ITEM32(reg, ralue, bmp_len, 0x1C, 16, 8); + +enum mlxsw_reg_ralue_action_type { + MLXSW_REG_RALUE_ACTION_TYPE_REMOTE, + MLXSW_REG_RALUE_ACTION_TYPE_LOCAL, + MLXSW_REG_RALUE_ACTION_TYPE_IP2ME, +}; + +/* reg_ralue_action_type + * Action Type + * Indicates how the IP address is connected. + * It can be connected to a local subnet through local_erif or can be + * on a remote subnet connected through a next-hop router, + * or transmitted to the CPU. + * Reserved when entry_type = MARKER_ENTRY + * Access: RW + */ +MLXSW_ITEM32(reg, ralue, action_type, 0x1C, 0, 2); + +enum mlxsw_reg_ralue_trap_action { + MLXSW_REG_RALUE_TRAP_ACTION_NOP, + MLXSW_REG_RALUE_TRAP_ACTION_TRAP, + MLXSW_REG_RALUE_TRAP_ACTION_MIRROR_TO_CPU, + MLXSW_REG_RALUE_TRAP_ACTION_MIRROR, + MLXSW_REG_RALUE_TRAP_ACTION_DISCARD_ERROR, +}; + +/* reg_ralue_trap_action + * Trap action. + * For IP2ME action, only NOP and MIRROR are possible. + * Access: RW + */ +MLXSW_ITEM32(reg, ralue, trap_action, 0x20, 28, 4); + +/* reg_ralue_trap_id + * Trap ID to be reported to CPU. + * Trap ID is RTR_INGRESS0 or RTR_INGRESS1. + * For trap_action of NOP, MIRROR and DISCARD_ERROR, trap_id is reserved. + * Access: RW + */ +MLXSW_ITEM32(reg, ralue, trap_id, 0x20, 0, 9); + +/* reg_ralue_adjacency_index + * Points to the first entry of the group-based ECMP. + * Only relevant in case of REMOTE action. + * Access: RW + */ +MLXSW_ITEM32(reg, ralue, adjacency_index, 0x24, 0, 24); + +/* reg_ralue_ecmp_size + * Amount of sequential entries starting + * from the adjacency_index (the number of ECMPs). + * The valid range is 1-64, 512, 1024, 2048 and 4096. + * Reserved when trap_action is TRAP or DISCARD_ERROR. + * Only relevant in case of REMOTE action. + * Access: RW + */ +MLXSW_ITEM32(reg, ralue, ecmp_size, 0x28, 0, 13); + +/* reg_ralue_local_erif + * Egress Router Interface. + * Only relevant in case of LOCAL action. + * Access: RW + */ +MLXSW_ITEM32(reg, ralue, local_erif, 0x24, 0, 16); + +/* reg_ralue_ip2me_v + * Valid bit for the tunnel_ptr field. + * If valid = 0 then trap to CPU as IP2ME trap ID. + * If valid = 1 and the packet format allows NVE or IPinIP tunnel + * decapsulation then tunnel decapsulation is done. + * If valid = 1 and packet format does not allow NVE or IPinIP tunnel + * decapsulation then trap as IP2ME trap ID. + * Only relevant in case of IP2ME action. + * Access: RW + */ +MLXSW_ITEM32(reg, ralue, ip2me_v, 0x24, 31, 1); + +/* reg_ralue_ip2me_tunnel_ptr + * Tunnel Pointer for NVE or IPinIP tunnel decapsulation. + * For Spectrum, pointer to KVD Linear. + * Only relevant in case of IP2ME action. + * Access: RW + */ +MLXSW_ITEM32(reg, ralue, ip2me_tunnel_ptr, 0x24, 0, 24); + +static inline void mlxsw_reg_ralue_pack(char *payload, + enum mlxsw_reg_ralxx_protocol protocol, + enum mlxsw_reg_ralue_op op, + u16 virtual_router, u8 prefix_len) +{ + MLXSW_REG_ZERO(ralue, payload); + mlxsw_reg_ralue_protocol_set(payload, protocol); + mlxsw_reg_ralue_op_set(payload, op); + mlxsw_reg_ralue_virtual_router_set(payload, virtual_router); + mlxsw_reg_ralue_prefix_len_set(payload, prefix_len); + mlxsw_reg_ralue_entry_type_set(payload, + MLXSW_REG_RALUE_ENTRY_TYPE_ROUTE_ENTRY); + mlxsw_reg_ralue_bmp_len_set(payload, prefix_len); +} + +static inline void mlxsw_reg_ralue_pack4(char *payload, + enum mlxsw_reg_ralxx_protocol protocol, + enum mlxsw_reg_ralue_op op, + u16 virtual_router, u8 prefix_len, + u32 dip) +{ + mlxsw_reg_ralue_pack(payload, protocol, op, virtual_router, prefix_len); + mlxsw_reg_ralue_dip4_set(payload, dip); +} + +static inline void mlxsw_reg_ralue_pack6(char *payload, + enum mlxsw_reg_ralxx_protocol protocol, + enum mlxsw_reg_ralue_op op, + u16 virtual_router, u8 prefix_len, + const void *dip) +{ + mlxsw_reg_ralue_pack(payload, protocol, op, virtual_router, prefix_len); + mlxsw_reg_ralue_dip6_memcpy_to(payload, dip); +} + +static inline void +mlxsw_reg_ralue_act_remote_pack(char *payload, + enum mlxsw_reg_ralue_trap_action trap_action, + u16 trap_id, u32 adjacency_index, u16 ecmp_size) +{ + mlxsw_reg_ralue_action_type_set(payload, + MLXSW_REG_RALUE_ACTION_TYPE_REMOTE); + mlxsw_reg_ralue_trap_action_set(payload, trap_action); + mlxsw_reg_ralue_trap_id_set(payload, trap_id); + mlxsw_reg_ralue_adjacency_index_set(payload, adjacency_index); + mlxsw_reg_ralue_ecmp_size_set(payload, ecmp_size); +} + +static inline void +mlxsw_reg_ralue_act_local_pack(char *payload, + enum mlxsw_reg_ralue_trap_action trap_action, + u16 trap_id, u16 local_erif) +{ + mlxsw_reg_ralue_action_type_set(payload, + MLXSW_REG_RALUE_ACTION_TYPE_LOCAL); + mlxsw_reg_ralue_trap_action_set(payload, trap_action); + mlxsw_reg_ralue_trap_id_set(payload, trap_id); + mlxsw_reg_ralue_local_erif_set(payload, local_erif); +} + +static inline void +mlxsw_reg_ralue_act_ip2me_pack(char *payload) +{ + mlxsw_reg_ralue_action_type_set(payload, + MLXSW_REG_RALUE_ACTION_TYPE_IP2ME); +} + +static inline void +mlxsw_reg_ralue_act_ip2me_tun_pack(char *payload, u32 tunnel_ptr) +{ + mlxsw_reg_ralue_action_type_set(payload, + MLXSW_REG_RALUE_ACTION_TYPE_IP2ME); + mlxsw_reg_ralue_ip2me_v_set(payload, 1); + mlxsw_reg_ralue_ip2me_tunnel_ptr_set(payload, tunnel_ptr); +} + +/* RAUHT - Router Algorithmic LPM Unicast Host Table Register + * ---------------------------------------------------------- + * The RAUHT register is used to configure and query the Unicast Host table in + * devices that implement the Algorithmic LPM. + */ +#define MLXSW_REG_RAUHT_ID 0x8014 +#define MLXSW_REG_RAUHT_LEN 0x74 + +MLXSW_REG_DEFINE(rauht, MLXSW_REG_RAUHT_ID, MLXSW_REG_RAUHT_LEN); + +enum mlxsw_reg_rauht_type { + MLXSW_REG_RAUHT_TYPE_IPV4, + MLXSW_REG_RAUHT_TYPE_IPV6, +}; + +/* reg_rauht_type + * Access: Index + */ +MLXSW_ITEM32(reg, rauht, type, 0x00, 24, 2); + +enum mlxsw_reg_rauht_op { + MLXSW_REG_RAUHT_OP_QUERY_READ = 0, + /* Read operation */ + MLXSW_REG_RAUHT_OP_QUERY_CLEAR_ON_READ = 1, + /* Clear on read operation. Used to read entry and clear + * activity bit. + */ + MLXSW_REG_RAUHT_OP_WRITE_ADD = 0, + /* Add. Used to write a new entry to the table. All R/W fields are + * relevant for new entry. Activity bit is set for new entries. + */ + MLXSW_REG_RAUHT_OP_WRITE_UPDATE = 1, + /* Update action. Used to update an existing route entry and + * only update the following fields: + * trap_action, trap_id, mac, counter_set_type, counter_index + */ + MLXSW_REG_RAUHT_OP_WRITE_CLEAR_ACTIVITY = 2, + /* Clear activity. A bit is cleared for the entry. */ + MLXSW_REG_RAUHT_OP_WRITE_DELETE = 3, + /* Delete entry */ + MLXSW_REG_RAUHT_OP_WRITE_DELETE_ALL = 4, + /* Delete all host entries on a RIF. In this command, dip + * field is reserved. + */ +}; + +/* reg_rauht_op + * Access: OP + */ +MLXSW_ITEM32(reg, rauht, op, 0x00, 20, 3); + +/* reg_rauht_a + * Activity. Set for new entries. Set if a packet lookup has hit on + * the specific entry. + * To clear the a bit, use "clear activity" op. + * Enabled by activity_dis in RGCR + * Access: RO + */ +MLXSW_ITEM32(reg, rauht, a, 0x00, 16, 1); + +/* reg_rauht_rif + * Router Interface + * Access: Index + */ +MLXSW_ITEM32(reg, rauht, rif, 0x00, 0, 16); + +/* reg_rauht_dip* + * Destination address. + * Access: Index + */ +MLXSW_ITEM32(reg, rauht, dip4, 0x1C, 0x0, 32); +MLXSW_ITEM_BUF(reg, rauht, dip6, 0x10, 16); + +enum mlxsw_reg_rauht_trap_action { + MLXSW_REG_RAUHT_TRAP_ACTION_NOP, + MLXSW_REG_RAUHT_TRAP_ACTION_TRAP, + MLXSW_REG_RAUHT_TRAP_ACTION_MIRROR_TO_CPU, + MLXSW_REG_RAUHT_TRAP_ACTION_MIRROR, + MLXSW_REG_RAUHT_TRAP_ACTION_DISCARD_ERRORS, +}; + +/* reg_rauht_trap_action + * Access: RW + */ +MLXSW_ITEM32(reg, rauht, trap_action, 0x60, 28, 4); + +enum mlxsw_reg_rauht_trap_id { + MLXSW_REG_RAUHT_TRAP_ID_RTR_EGRESS0, + MLXSW_REG_RAUHT_TRAP_ID_RTR_EGRESS1, +}; + +/* reg_rauht_trap_id + * Trap ID to be reported to CPU. + * Trap-ID is RTR_EGRESS0 or RTR_EGRESS1. + * For trap_action of NOP, MIRROR and DISCARD_ERROR, + * trap_id is reserved. + * Access: RW + */ +MLXSW_ITEM32(reg, rauht, trap_id, 0x60, 0, 9); + +/* reg_rauht_counter_set_type + * Counter set type for flow counters + * Access: RW + */ +MLXSW_ITEM32(reg, rauht, counter_set_type, 0x68, 24, 8); + +/* reg_rauht_counter_index + * Counter index for flow counters + * Access: RW + */ +MLXSW_ITEM32(reg, rauht, counter_index, 0x68, 0, 24); + +/* reg_rauht_mac + * MAC address. + * Access: RW + */ +MLXSW_ITEM_BUF(reg, rauht, mac, 0x6E, 6); + +static inline void mlxsw_reg_rauht_pack(char *payload, + enum mlxsw_reg_rauht_op op, u16 rif, + const char *mac) +{ + MLXSW_REG_ZERO(rauht, payload); + mlxsw_reg_rauht_op_set(payload, op); + mlxsw_reg_rauht_rif_set(payload, rif); + mlxsw_reg_rauht_mac_memcpy_to(payload, mac); +} + +static inline void mlxsw_reg_rauht_pack4(char *payload, + enum mlxsw_reg_rauht_op op, u16 rif, + const char *mac, u32 dip) +{ + mlxsw_reg_rauht_pack(payload, op, rif, mac); + mlxsw_reg_rauht_dip4_set(payload, dip); +} + +static inline void mlxsw_reg_rauht_pack6(char *payload, + enum mlxsw_reg_rauht_op op, u16 rif, + const char *mac, const char *dip) +{ + mlxsw_reg_rauht_pack(payload, op, rif, mac); + mlxsw_reg_rauht_type_set(payload, MLXSW_REG_RAUHT_TYPE_IPV6); + mlxsw_reg_rauht_dip6_memcpy_to(payload, dip); +} + +static inline void mlxsw_reg_rauht_pack_counter(char *payload, + u64 counter_index) +{ + mlxsw_reg_rauht_counter_index_set(payload, counter_index); + mlxsw_reg_rauht_counter_set_type_set(payload, + MLXSW_REG_FLOW_COUNTER_SET_TYPE_PACKETS_BYTES); +} + +/* RALEU - Router Algorithmic LPM ECMP Update Register + * --------------------------------------------------- + * The register enables updating the ECMP section in the action for multiple + * LPM Unicast entries in a single operation. The update is executed to + * all entries of a {virtual router, protocol} tuple using the same ECMP group. + */ +#define MLXSW_REG_RALEU_ID 0x8015 +#define MLXSW_REG_RALEU_LEN 0x28 + +MLXSW_REG_DEFINE(raleu, MLXSW_REG_RALEU_ID, MLXSW_REG_RALEU_LEN); + +/* reg_raleu_protocol + * Protocol. + * Access: Index + */ +MLXSW_ITEM32(reg, raleu, protocol, 0x00, 24, 4); + +/* reg_raleu_virtual_router + * Virtual Router ID + * Range is 0..cap_max_virtual_routers-1 + * Access: Index + */ +MLXSW_ITEM32(reg, raleu, virtual_router, 0x00, 0, 16); + +/* reg_raleu_adjacency_index + * Adjacency Index used for matching on the existing entries. + * Access: Index + */ +MLXSW_ITEM32(reg, raleu, adjacency_index, 0x10, 0, 24); + +/* reg_raleu_ecmp_size + * ECMP Size used for matching on the existing entries. + * Access: Index + */ +MLXSW_ITEM32(reg, raleu, ecmp_size, 0x14, 0, 13); + +/* reg_raleu_new_adjacency_index + * New Adjacency Index. + * Access: WO + */ +MLXSW_ITEM32(reg, raleu, new_adjacency_index, 0x20, 0, 24); + +/* reg_raleu_new_ecmp_size + * New ECMP Size. + * Access: WO + */ +MLXSW_ITEM32(reg, raleu, new_ecmp_size, 0x24, 0, 13); + +static inline void mlxsw_reg_raleu_pack(char *payload, + enum mlxsw_reg_ralxx_protocol protocol, + u16 virtual_router, + u32 adjacency_index, u16 ecmp_size, + u32 new_adjacency_index, + u16 new_ecmp_size) +{ + MLXSW_REG_ZERO(raleu, payload); + mlxsw_reg_raleu_protocol_set(payload, protocol); + mlxsw_reg_raleu_virtual_router_set(payload, virtual_router); + mlxsw_reg_raleu_adjacency_index_set(payload, adjacency_index); + mlxsw_reg_raleu_ecmp_size_set(payload, ecmp_size); + mlxsw_reg_raleu_new_adjacency_index_set(payload, new_adjacency_index); + mlxsw_reg_raleu_new_ecmp_size_set(payload, new_ecmp_size); +} + +/* RAUHTD - Router Algorithmic LPM Unicast Host Table Dump Register + * ---------------------------------------------------------------- + * The RAUHTD register allows dumping entries from the Router Unicast Host + * Table. For a given session an entry is dumped no more than one time. The + * first RAUHTD access after reset is a new session. A session ends when the + * num_rec response is smaller than num_rec request or for IPv4 when the + * num_entries is smaller than 4. The clear activity affect the current session + * or the last session if a new session has not started. + */ +#define MLXSW_REG_RAUHTD_ID 0x8018 +#define MLXSW_REG_RAUHTD_BASE_LEN 0x20 +#define MLXSW_REG_RAUHTD_REC_LEN 0x20 +#define MLXSW_REG_RAUHTD_REC_MAX_NUM 32 +#define MLXSW_REG_RAUHTD_LEN (MLXSW_REG_RAUHTD_BASE_LEN + \ + MLXSW_REG_RAUHTD_REC_MAX_NUM * MLXSW_REG_RAUHTD_REC_LEN) +#define MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC 4 + +MLXSW_REG_DEFINE(rauhtd, MLXSW_REG_RAUHTD_ID, MLXSW_REG_RAUHTD_LEN); + +#define MLXSW_REG_RAUHTD_FILTER_A BIT(0) +#define MLXSW_REG_RAUHTD_FILTER_RIF BIT(3) + +/* reg_rauhtd_filter_fields + * if a bit is '0' then the relevant field is ignored and dump is done + * regardless of the field value + * Bit0 - filter by activity: entry_a + * Bit3 - filter by entry rip: entry_rif + * Access: Index + */ +MLXSW_ITEM32(reg, rauhtd, filter_fields, 0x00, 0, 8); + +enum mlxsw_reg_rauhtd_op { + MLXSW_REG_RAUHTD_OP_DUMP, + MLXSW_REG_RAUHTD_OP_DUMP_AND_CLEAR, +}; + +/* reg_rauhtd_op + * Access: OP + */ +MLXSW_ITEM32(reg, rauhtd, op, 0x04, 24, 2); + +/* reg_rauhtd_num_rec + * At request: number of records requested + * At response: number of records dumped + * For IPv4, each record has 4 entries at request and up to 4 entries + * at response + * Range is 0..MLXSW_REG_RAUHTD_REC_MAX_NUM + * Access: Index + */ +MLXSW_ITEM32(reg, rauhtd, num_rec, 0x04, 0, 8); + +/* reg_rauhtd_entry_a + * Dump only if activity has value of entry_a + * Reserved if filter_fields bit0 is '0' + * Access: Index + */ +MLXSW_ITEM32(reg, rauhtd, entry_a, 0x08, 16, 1); + +enum mlxsw_reg_rauhtd_type { + MLXSW_REG_RAUHTD_TYPE_IPV4, + MLXSW_REG_RAUHTD_TYPE_IPV6, +}; + +/* reg_rauhtd_type + * Dump only if record type is: + * 0 - IPv4 + * 1 - IPv6 + * Access: Index + */ +MLXSW_ITEM32(reg, rauhtd, type, 0x08, 0, 4); + +/* reg_rauhtd_entry_rif + * Dump only if RIF has value of entry_rif + * Reserved if filter_fields bit3 is '0' + * Access: Index + */ +MLXSW_ITEM32(reg, rauhtd, entry_rif, 0x0C, 0, 16); + +static inline void mlxsw_reg_rauhtd_pack(char *payload, + enum mlxsw_reg_rauhtd_type type) +{ + MLXSW_REG_ZERO(rauhtd, payload); + mlxsw_reg_rauhtd_filter_fields_set(payload, MLXSW_REG_RAUHTD_FILTER_A); + mlxsw_reg_rauhtd_op_set(payload, MLXSW_REG_RAUHTD_OP_DUMP_AND_CLEAR); + mlxsw_reg_rauhtd_num_rec_set(payload, MLXSW_REG_RAUHTD_REC_MAX_NUM); + mlxsw_reg_rauhtd_entry_a_set(payload, 1); + mlxsw_reg_rauhtd_type_set(payload, type); +} + +/* reg_rauhtd_ipv4_rec_num_entries + * Number of valid entries in this record: + * 0 - 1 valid entry + * 1 - 2 valid entries + * 2 - 3 valid entries + * 3 - 4 valid entries + * Access: RO + */ +MLXSW_ITEM32_INDEXED(reg, rauhtd, ipv4_rec_num_entries, + MLXSW_REG_RAUHTD_BASE_LEN, 28, 2, + MLXSW_REG_RAUHTD_REC_LEN, 0x00, false); + +/* reg_rauhtd_rec_type + * Record type. + * 0 - IPv4 + * 1 - IPv6 + * Access: RO + */ +MLXSW_ITEM32_INDEXED(reg, rauhtd, rec_type, MLXSW_REG_RAUHTD_BASE_LEN, 24, 2, + MLXSW_REG_RAUHTD_REC_LEN, 0x00, false); + +#define MLXSW_REG_RAUHTD_IPV4_ENT_LEN 0x8 + +/* reg_rauhtd_ipv4_ent_a + * Activity. Set for new entries. Set if a packet lookup has hit on the + * specific entry. + * Access: RO + */ +MLXSW_ITEM32_INDEXED(reg, rauhtd, ipv4_ent_a, MLXSW_REG_RAUHTD_BASE_LEN, 16, 1, + MLXSW_REG_RAUHTD_IPV4_ENT_LEN, 0x00, false); + +/* reg_rauhtd_ipv4_ent_rif + * Router interface. + * Access: RO + */ +MLXSW_ITEM32_INDEXED(reg, rauhtd, ipv4_ent_rif, MLXSW_REG_RAUHTD_BASE_LEN, 0, + 16, MLXSW_REG_RAUHTD_IPV4_ENT_LEN, 0x00, false); + +/* reg_rauhtd_ipv4_ent_dip + * Destination IPv4 address. + * Access: RO + */ +MLXSW_ITEM32_INDEXED(reg, rauhtd, ipv4_ent_dip, MLXSW_REG_RAUHTD_BASE_LEN, 0, + 32, MLXSW_REG_RAUHTD_IPV4_ENT_LEN, 0x04, false); + +#define MLXSW_REG_RAUHTD_IPV6_ENT_LEN 0x20 + +/* reg_rauhtd_ipv6_ent_a + * Activity. Set for new entries. Set if a packet lookup has hit on the + * specific entry. + * Access: RO + */ +MLXSW_ITEM32_INDEXED(reg, rauhtd, ipv6_ent_a, MLXSW_REG_RAUHTD_BASE_LEN, 16, 1, + MLXSW_REG_RAUHTD_IPV6_ENT_LEN, 0x00, false); + +/* reg_rauhtd_ipv6_ent_rif + * Router interface. + * Access: RO + */ +MLXSW_ITEM32_INDEXED(reg, rauhtd, ipv6_ent_rif, MLXSW_REG_RAUHTD_BASE_LEN, 0, + 16, MLXSW_REG_RAUHTD_IPV6_ENT_LEN, 0x00, false); + +/* reg_rauhtd_ipv6_ent_dip + * Destination IPv6 address. + * Access: RO + */ +MLXSW_ITEM_BUF_INDEXED(reg, rauhtd, ipv6_ent_dip, MLXSW_REG_RAUHTD_BASE_LEN, + 16, MLXSW_REG_RAUHTD_IPV6_ENT_LEN, 0x10); + +static inline void mlxsw_reg_rauhtd_ent_ipv4_unpack(char *payload, + int ent_index, u16 *p_rif, + u32 *p_dip) +{ + *p_rif = mlxsw_reg_rauhtd_ipv4_ent_rif_get(payload, ent_index); + *p_dip = mlxsw_reg_rauhtd_ipv4_ent_dip_get(payload, ent_index); +} + +static inline void mlxsw_reg_rauhtd_ent_ipv6_unpack(char *payload, + int rec_index, u16 *p_rif, + char *p_dip) +{ + *p_rif = mlxsw_reg_rauhtd_ipv6_ent_rif_get(payload, rec_index); + mlxsw_reg_rauhtd_ipv6_ent_dip_memcpy_from(payload, rec_index, p_dip); +} + +/* RTDP - Routing Tunnel Decap Properties Register + * ----------------------------------------------- + * The RTDP register is used for configuring the tunnel decap properties of NVE + * and IPinIP. + */ +#define MLXSW_REG_RTDP_ID 0x8020 +#define MLXSW_REG_RTDP_LEN 0x44 + +MLXSW_REG_DEFINE(rtdp, MLXSW_REG_RTDP_ID, MLXSW_REG_RTDP_LEN); + +enum mlxsw_reg_rtdp_type { + MLXSW_REG_RTDP_TYPE_NVE, + MLXSW_REG_RTDP_TYPE_IPIP, +}; + +/* reg_rtdp_type + * Type of the RTDP entry as per enum mlxsw_reg_rtdp_type. + * Access: RW + */ +MLXSW_ITEM32(reg, rtdp, type, 0x00, 28, 4); + +/* reg_rtdp_tunnel_index + * Index to the Decap entry. + * For Spectrum, Index to KVD Linear. + * Access: Index + */ +MLXSW_ITEM32(reg, rtdp, tunnel_index, 0x00, 0, 24); + +/* reg_rtdp_egress_router_interface + * Underlay egress router interface. + * Valid range is from 0 to cap_max_router_interfaces - 1 + * Access: RW + */ +MLXSW_ITEM32(reg, rtdp, egress_router_interface, 0x40, 0, 16); + +/* IPinIP */ + +/* reg_rtdp_ipip_irif + * Ingress Router Interface for the overlay router + * Access: RW + */ +MLXSW_ITEM32(reg, rtdp, ipip_irif, 0x04, 16, 16); + +enum mlxsw_reg_rtdp_ipip_sip_check { + /* No sip checks. */ + MLXSW_REG_RTDP_IPIP_SIP_CHECK_NO, + /* Filter packet if underlay is not IPv4 or if underlay SIP does not + * equal ipv4_usip. + */ + MLXSW_REG_RTDP_IPIP_SIP_CHECK_FILTER_IPV4, + /* Filter packet if underlay is not IPv6 or if underlay SIP does not + * equal ipv6_usip. + */ + MLXSW_REG_RTDP_IPIP_SIP_CHECK_FILTER_IPV6 = 3, +}; + +/* reg_rtdp_ipip_sip_check + * SIP check to perform. If decapsulation failed due to these configurations + * then trap_id is IPIP_DECAP_ERROR. + * Access: RW + */ +MLXSW_ITEM32(reg, rtdp, ipip_sip_check, 0x04, 0, 3); + +/* If set, allow decapsulation of IPinIP (without GRE). */ +#define MLXSW_REG_RTDP_IPIP_TYPE_CHECK_ALLOW_IPIP BIT(0) +/* If set, allow decapsulation of IPinGREinIP without a key. */ +#define MLXSW_REG_RTDP_IPIP_TYPE_CHECK_ALLOW_GRE BIT(1) +/* If set, allow decapsulation of IPinGREinIP with a key. */ +#define MLXSW_REG_RTDP_IPIP_TYPE_CHECK_ALLOW_GRE_KEY BIT(2) + +/* reg_rtdp_ipip_type_check + * Flags as per MLXSW_REG_RTDP_IPIP_TYPE_CHECK_*. If decapsulation failed due to + * these configurations then trap_id is IPIP_DECAP_ERROR. + * Access: RW + */ +MLXSW_ITEM32(reg, rtdp, ipip_type_check, 0x08, 24, 3); + +/* reg_rtdp_ipip_gre_key_check + * Whether GRE key should be checked. When check is enabled: + * - A packet received as IPinIP (without GRE) will always pass. + * - A packet received as IPinGREinIP without a key will not pass the check. + * - A packet received as IPinGREinIP with a key will pass the check only if the + * key in the packet is equal to expected_gre_key. + * If decapsulation failed due to GRE key then trap_id is IPIP_DECAP_ERROR. + * Access: RW + */ +MLXSW_ITEM32(reg, rtdp, ipip_gre_key_check, 0x08, 23, 1); + +/* reg_rtdp_ipip_ipv4_usip + * Underlay IPv4 address for ipv4 source address check. + * Reserved when sip_check is not '1'. + * Access: RW + */ +MLXSW_ITEM32(reg, rtdp, ipip_ipv4_usip, 0x0C, 0, 32); + +/* reg_rtdp_ipip_ipv6_usip_ptr + * This field is valid when sip_check is "sipv6 check explicitly". This is a + * pointer to the IPv6 DIP which is configured by RIPS. For Spectrum, the index + * is to the KVD linear. + * Reserved when sip_check is not MLXSW_REG_RTDP_IPIP_SIP_CHECK_FILTER_IPV6. + * Access: RW + */ +MLXSW_ITEM32(reg, rtdp, ipip_ipv6_usip_ptr, 0x10, 0, 24); + +/* reg_rtdp_ipip_expected_gre_key + * GRE key for checking. + * Reserved when gre_key_check is '0'. + * Access: RW + */ +MLXSW_ITEM32(reg, rtdp, ipip_expected_gre_key, 0x14, 0, 32); + +static inline void mlxsw_reg_rtdp_pack(char *payload, + enum mlxsw_reg_rtdp_type type, + u32 tunnel_index) +{ + MLXSW_REG_ZERO(rtdp, payload); + mlxsw_reg_rtdp_type_set(payload, type); + mlxsw_reg_rtdp_tunnel_index_set(payload, tunnel_index); +} + +static inline void +mlxsw_reg_rtdp_ipip_pack(char *payload, u16 irif, + enum mlxsw_reg_rtdp_ipip_sip_check sip_check, + unsigned int type_check, bool gre_key_check, + u32 expected_gre_key) +{ + mlxsw_reg_rtdp_ipip_irif_set(payload, irif); + mlxsw_reg_rtdp_ipip_sip_check_set(payload, sip_check); + mlxsw_reg_rtdp_ipip_type_check_set(payload, type_check); + mlxsw_reg_rtdp_ipip_gre_key_check_set(payload, gre_key_check); + mlxsw_reg_rtdp_ipip_expected_gre_key_set(payload, expected_gre_key); +} + +static inline void +mlxsw_reg_rtdp_ipip4_pack(char *payload, u16 irif, + enum mlxsw_reg_rtdp_ipip_sip_check sip_check, + unsigned int type_check, bool gre_key_check, + u32 ipv4_usip, u32 expected_gre_key) +{ + mlxsw_reg_rtdp_ipip_pack(payload, irif, sip_check, type_check, + gre_key_check, expected_gre_key); + mlxsw_reg_rtdp_ipip_ipv4_usip_set(payload, ipv4_usip); +} + +static inline void +mlxsw_reg_rtdp_ipip6_pack(char *payload, u16 irif, + enum mlxsw_reg_rtdp_ipip_sip_check sip_check, + unsigned int type_check, bool gre_key_check, + u32 ipv6_usip_ptr, u32 expected_gre_key) +{ + mlxsw_reg_rtdp_ipip_pack(payload, irif, sip_check, type_check, + gre_key_check, expected_gre_key); + mlxsw_reg_rtdp_ipip_ipv6_usip_ptr_set(payload, ipv6_usip_ptr); +} + +/* RIPS - Router IP version Six Register + * ------------------------------------- + * The RIPS register is used to store IPv6 addresses for use by the NVE and + * IPinIP + */ +#define MLXSW_REG_RIPS_ID 0x8021 +#define MLXSW_REG_RIPS_LEN 0x14 + +MLXSW_REG_DEFINE(rips, MLXSW_REG_RIPS_ID, MLXSW_REG_RIPS_LEN); + +/* reg_rips_index + * Index to IPv6 address. + * For Spectrum, the index is to the KVD linear. + * Access: Index + */ +MLXSW_ITEM32(reg, rips, index, 0x00, 0, 24); + +/* reg_rips_ipv6 + * IPv6 address + * Access: RW + */ +MLXSW_ITEM_BUF(reg, rips, ipv6, 0x04, 16); + +static inline void mlxsw_reg_rips_pack(char *payload, u32 index, + const struct in6_addr *ipv6) +{ + MLXSW_REG_ZERO(rips, payload); + mlxsw_reg_rips_index_set(payload, index); + mlxsw_reg_rips_ipv6_memcpy_to(payload, (const char *)ipv6); +} + +/* RATRAD - Router Adjacency Table Activity Dump Register + * ------------------------------------------------------ + * The RATRAD register is used to dump and optionally clear activity bits of + * router adjacency table entries. + */ +#define MLXSW_REG_RATRAD_ID 0x8022 +#define MLXSW_REG_RATRAD_LEN 0x210 + +MLXSW_REG_DEFINE(ratrad, MLXSW_REG_RATRAD_ID, MLXSW_REG_RATRAD_LEN); + +enum { + /* Read activity */ + MLXSW_REG_RATRAD_OP_READ_ACTIVITY, + /* Read and clear activity */ + MLXSW_REG_RATRAD_OP_READ_CLEAR_ACTIVITY, +}; + +/* reg_ratrad_op + * Access: Operation + */ +MLXSW_ITEM32(reg, ratrad, op, 0x00, 30, 2); + +/* reg_ratrad_ecmp_size + * ecmp_size is the amount of sequential entries from adjacency_index. Valid + * ranges: + * Spectrum-1: 32-64, 512, 1024, 2048, 4096 + * Spectrum-2/3: 32-128, 256, 512, 1024, 2048, 4096 + * Access: Index + */ +MLXSW_ITEM32(reg, ratrad, ecmp_size, 0x00, 0, 13); + +/* reg_ratrad_adjacency_index + * Index into the adjacency table. + * Access: Index + */ +MLXSW_ITEM32(reg, ratrad, adjacency_index, 0x04, 0, 24); + +/* reg_ratrad_activity_vector + * Activity bit per adjacency index. + * Bits higher than ecmp_size are reserved. + * Access: RO + */ +MLXSW_ITEM_BIT_ARRAY(reg, ratrad, activity_vector, 0x10, 0x200, 1); + +static inline void mlxsw_reg_ratrad_pack(char *payload, u32 adjacency_index, + u16 ecmp_size) +{ + MLXSW_REG_ZERO(ratrad, payload); + mlxsw_reg_ratrad_op_set(payload, + MLXSW_REG_RATRAD_OP_READ_CLEAR_ACTIVITY); + mlxsw_reg_ratrad_ecmp_size_set(payload, ecmp_size); + mlxsw_reg_ratrad_adjacency_index_set(payload, adjacency_index); +} + +/* RIGR-V2 - Router Interface Group Register Version 2 + * --------------------------------------------------- + * The RIGR_V2 register is used to add, remove and query egress interface list + * of a multicast forwarding entry. + */ +#define MLXSW_REG_RIGR2_ID 0x8023 +#define MLXSW_REG_RIGR2_LEN 0xB0 + +#define MLXSW_REG_RIGR2_MAX_ERIFS 32 + +MLXSW_REG_DEFINE(rigr2, MLXSW_REG_RIGR2_ID, MLXSW_REG_RIGR2_LEN); + +/* reg_rigr2_rigr_index + * KVD Linear index. + * Access: Index + */ +MLXSW_ITEM32(reg, rigr2, rigr_index, 0x04, 0, 24); + +/* reg_rigr2_vnext + * Next RIGR Index is valid. + * Access: RW + */ +MLXSW_ITEM32(reg, rigr2, vnext, 0x08, 31, 1); + +/* reg_rigr2_next_rigr_index + * Next RIGR Index. The index is to the KVD linear. + * Reserved when vnxet = '0'. + * Access: RW + */ +MLXSW_ITEM32(reg, rigr2, next_rigr_index, 0x08, 0, 24); + +/* reg_rigr2_vrmid + * RMID Index is valid. + * Access: RW + */ +MLXSW_ITEM32(reg, rigr2, vrmid, 0x20, 31, 1); + +/* reg_rigr2_rmid_index + * RMID Index. + * Range 0 .. max_mid - 1 + * Reserved when vrmid = '0'. + * The index is to the Port Group Table (PGT) + * Access: RW + */ +MLXSW_ITEM32(reg, rigr2, rmid_index, 0x20, 0, 16); + +/* reg_rigr2_erif_entry_v + * Egress Router Interface is valid. + * Note that low-entries must be set if high-entries are set. For + * example: if erif_entry[2].v is set then erif_entry[1].v and + * erif_entry[0].v must be set. + * Index can be from 0 to cap_mc_erif_list_entries-1 + * Access: RW + */ +MLXSW_ITEM32_INDEXED(reg, rigr2, erif_entry_v, 0x24, 31, 1, 4, 0, false); + +/* reg_rigr2_erif_entry_erif + * Egress Router Interface. + * Valid range is from 0 to cap_max_router_interfaces - 1 + * Index can be from 0 to MLXSW_REG_RIGR2_MAX_ERIFS - 1 + * Access: RW + */ +MLXSW_ITEM32_INDEXED(reg, rigr2, erif_entry_erif, 0x24, 0, 16, 4, 0, false); + +static inline void mlxsw_reg_rigr2_pack(char *payload, u32 rigr_index, + bool vnext, u32 next_rigr_index) +{ + MLXSW_REG_ZERO(rigr2, payload); + mlxsw_reg_rigr2_rigr_index_set(payload, rigr_index); + mlxsw_reg_rigr2_vnext_set(payload, vnext); + mlxsw_reg_rigr2_next_rigr_index_set(payload, next_rigr_index); + mlxsw_reg_rigr2_vrmid_set(payload, 0); + mlxsw_reg_rigr2_rmid_index_set(payload, 0); +} + +static inline void mlxsw_reg_rigr2_erif_entry_pack(char *payload, int index, + bool v, u16 erif) +{ + mlxsw_reg_rigr2_erif_entry_v_set(payload, index, v); + mlxsw_reg_rigr2_erif_entry_erif_set(payload, index, erif); +} + +/* RECR-V2 - Router ECMP Configuration Version 2 Register + * ------------------------------------------------------ + */ +#define MLXSW_REG_RECR2_ID 0x8025 +#define MLXSW_REG_RECR2_LEN 0x38 + +MLXSW_REG_DEFINE(recr2, MLXSW_REG_RECR2_ID, MLXSW_REG_RECR2_LEN); + +/* reg_recr2_pp + * Per-port configuration + * Access: Index + */ +MLXSW_ITEM32(reg, recr2, pp, 0x00, 24, 1); + +/* reg_recr2_sh + * Symmetric hash + * Access: RW + */ +MLXSW_ITEM32(reg, recr2, sh, 0x00, 8, 1); + +/* reg_recr2_seed + * Seed + * Access: RW + */ +MLXSW_ITEM32(reg, recr2, seed, 0x08, 0, 32); + +enum { + /* Enable IPv4 fields if packet is not TCP and not UDP */ + MLXSW_REG_RECR2_IPV4_EN_NOT_TCP_NOT_UDP = 3, + /* Enable IPv4 fields if packet is TCP or UDP */ + MLXSW_REG_RECR2_IPV4_EN_TCP_UDP = 4, + /* Enable IPv6 fields if packet is not TCP and not UDP */ + MLXSW_REG_RECR2_IPV6_EN_NOT_TCP_NOT_UDP = 5, + /* Enable IPv6 fields if packet is TCP or UDP */ + MLXSW_REG_RECR2_IPV6_EN_TCP_UDP = 6, + /* Enable TCP/UDP header fields if packet is IPv4 */ + MLXSW_REG_RECR2_TCP_UDP_EN_IPV4 = 7, + /* Enable TCP/UDP header fields if packet is IPv6 */ + MLXSW_REG_RECR2_TCP_UDP_EN_IPV6 = 8, + + __MLXSW_REG_RECR2_HEADER_CNT, +}; + +/* reg_recr2_outer_header_enables + * Bit mask where each bit enables a specific layer to be included in + * the hash calculation. + * Access: RW + */ +MLXSW_ITEM_BIT_ARRAY(reg, recr2, outer_header_enables, 0x10, 0x04, 1); + +enum { + /* IPv4 Source IP */ + MLXSW_REG_RECR2_IPV4_SIP0 = 9, + MLXSW_REG_RECR2_IPV4_SIP3 = 12, + /* IPv4 Destination IP */ + MLXSW_REG_RECR2_IPV4_DIP0 = 13, + MLXSW_REG_RECR2_IPV4_DIP3 = 16, + /* IP Protocol */ + MLXSW_REG_RECR2_IPV4_PROTOCOL = 17, + /* IPv6 Source IP */ + MLXSW_REG_RECR2_IPV6_SIP0_7 = 21, + MLXSW_REG_RECR2_IPV6_SIP8 = 29, + MLXSW_REG_RECR2_IPV6_SIP15 = 36, + /* IPv6 Destination IP */ + MLXSW_REG_RECR2_IPV6_DIP0_7 = 37, + MLXSW_REG_RECR2_IPV6_DIP8 = 45, + MLXSW_REG_RECR2_IPV6_DIP15 = 52, + /* IPv6 Next Header */ + MLXSW_REG_RECR2_IPV6_NEXT_HEADER = 53, + /* IPv6 Flow Label */ + MLXSW_REG_RECR2_IPV6_FLOW_LABEL = 57, + /* TCP/UDP Source Port */ + MLXSW_REG_RECR2_TCP_UDP_SPORT = 74, + /* TCP/UDP Destination Port */ + MLXSW_REG_RECR2_TCP_UDP_DPORT = 75, + + __MLXSW_REG_RECR2_FIELD_CNT, +}; + +/* reg_recr2_outer_header_fields_enable + * Packet fields to enable for ECMP hash subject to outer_header_enable. + * Access: RW + */ +MLXSW_ITEM_BIT_ARRAY(reg, recr2, outer_header_fields_enable, 0x14, 0x14, 1); + +/* reg_recr2_inner_header_enables + * Bit mask where each bit enables a specific inner layer to be included in the + * hash calculation. Same values as reg_recr2_outer_header_enables. + * Access: RW + */ +MLXSW_ITEM_BIT_ARRAY(reg, recr2, inner_header_enables, 0x2C, 0x04, 1); + +enum { + /* Inner IPv4 Source IP */ + MLXSW_REG_RECR2_INNER_IPV4_SIP0 = 3, + MLXSW_REG_RECR2_INNER_IPV4_SIP3 = 6, + /* Inner IPv4 Destination IP */ + MLXSW_REG_RECR2_INNER_IPV4_DIP0 = 7, + MLXSW_REG_RECR2_INNER_IPV4_DIP3 = 10, + /* Inner IP Protocol */ + MLXSW_REG_RECR2_INNER_IPV4_PROTOCOL = 11, + /* Inner IPv6 Source IP */ + MLXSW_REG_RECR2_INNER_IPV6_SIP0_7 = 12, + MLXSW_REG_RECR2_INNER_IPV6_SIP8 = 20, + MLXSW_REG_RECR2_INNER_IPV6_SIP15 = 27, + /* Inner IPv6 Destination IP */ + MLXSW_REG_RECR2_INNER_IPV6_DIP0_7 = 28, + MLXSW_REG_RECR2_INNER_IPV6_DIP8 = 36, + MLXSW_REG_RECR2_INNER_IPV6_DIP15 = 43, + /* Inner IPv6 Next Header */ + MLXSW_REG_RECR2_INNER_IPV6_NEXT_HEADER = 44, + /* Inner IPv6 Flow Label */ + MLXSW_REG_RECR2_INNER_IPV6_FLOW_LABEL = 45, + /* Inner TCP/UDP Source Port */ + MLXSW_REG_RECR2_INNER_TCP_UDP_SPORT = 46, + /* Inner TCP/UDP Destination Port */ + MLXSW_REG_RECR2_INNER_TCP_UDP_DPORT = 47, + + __MLXSW_REG_RECR2_INNER_FIELD_CNT, +}; + +/* reg_recr2_inner_header_fields_enable + * Inner packet fields to enable for ECMP hash subject to inner_header_enables. + * Access: RW + */ +MLXSW_ITEM_BIT_ARRAY(reg, recr2, inner_header_fields_enable, 0x30, 0x08, 1); + +static inline void mlxsw_reg_recr2_pack(char *payload, u32 seed) +{ + MLXSW_REG_ZERO(recr2, payload); + mlxsw_reg_recr2_pp_set(payload, false); + mlxsw_reg_recr2_sh_set(payload, true); + mlxsw_reg_recr2_seed_set(payload, seed); +} + +/* RMFT-V2 - Router Multicast Forwarding Table Version 2 Register + * -------------------------------------------------------------- + * The RMFT_V2 register is used to configure and query the multicast table. + */ +#define MLXSW_REG_RMFT2_ID 0x8027 +#define MLXSW_REG_RMFT2_LEN 0x174 + +MLXSW_REG_DEFINE(rmft2, MLXSW_REG_RMFT2_ID, MLXSW_REG_RMFT2_LEN); + +/* reg_rmft2_v + * Valid + * Access: RW + */ +MLXSW_ITEM32(reg, rmft2, v, 0x00, 31, 1); + +enum mlxsw_reg_rmft2_type { + MLXSW_REG_RMFT2_TYPE_IPV4, + MLXSW_REG_RMFT2_TYPE_IPV6 +}; + +/* reg_rmft2_type + * Access: Index + */ +MLXSW_ITEM32(reg, rmft2, type, 0x00, 28, 2); + +enum mlxsw_sp_reg_rmft2_op { + /* For Write: + * Write operation. Used to write a new entry to the table. All RW + * fields are relevant for new entry. Activity bit is set for new + * entries - Note write with v (Valid) 0 will delete the entry. + * For Query: + * Read operation + */ + MLXSW_REG_RMFT2_OP_READ_WRITE, +}; + +/* reg_rmft2_op + * Operation. + * Access: OP + */ +MLXSW_ITEM32(reg, rmft2, op, 0x00, 20, 2); + +/* reg_rmft2_a + * Activity. Set for new entries. Set if a packet lookup has hit on the specific + * entry. + * Access: RO + */ +MLXSW_ITEM32(reg, rmft2, a, 0x00, 16, 1); + +/* reg_rmft2_offset + * Offset within the multicast forwarding table to write to. + * Access: Index + */ +MLXSW_ITEM32(reg, rmft2, offset, 0x00, 0, 16); + +/* reg_rmft2_virtual_router + * Virtual Router ID. Range from 0..cap_max_virtual_routers-1 + * Access: RW + */ +MLXSW_ITEM32(reg, rmft2, virtual_router, 0x04, 0, 16); + +enum mlxsw_reg_rmft2_irif_mask { + MLXSW_REG_RMFT2_IRIF_MASK_IGNORE, + MLXSW_REG_RMFT2_IRIF_MASK_COMPARE +}; + +/* reg_rmft2_irif_mask + * Ingress RIF mask. + * Access: RW + */ +MLXSW_ITEM32(reg, rmft2, irif_mask, 0x08, 24, 1); + +/* reg_rmft2_irif + * Ingress RIF index. + * Access: RW + */ +MLXSW_ITEM32(reg, rmft2, irif, 0x08, 0, 16); + +/* reg_rmft2_dip{4,6} + * Destination IPv4/6 address + * Access: RW + */ +MLXSW_ITEM_BUF(reg, rmft2, dip6, 0x10, 16); +MLXSW_ITEM32(reg, rmft2, dip4, 0x1C, 0, 32); + +/* reg_rmft2_dip{4,6}_mask + * A bit that is set directs the TCAM to compare the corresponding bit in key. A + * bit that is clear directs the TCAM to ignore the corresponding bit in key. + * Access: RW + */ +MLXSW_ITEM_BUF(reg, rmft2, dip6_mask, 0x20, 16); +MLXSW_ITEM32(reg, rmft2, dip4_mask, 0x2C, 0, 32); + +/* reg_rmft2_sip{4,6} + * Source IPv4/6 address + * Access: RW + */ +MLXSW_ITEM_BUF(reg, rmft2, sip6, 0x30, 16); +MLXSW_ITEM32(reg, rmft2, sip4, 0x3C, 0, 32); + +/* reg_rmft2_sip{4,6}_mask + * A bit that is set directs the TCAM to compare the corresponding bit in key. A + * bit that is clear directs the TCAM to ignore the corresponding bit in key. + * Access: RW + */ +MLXSW_ITEM_BUF(reg, rmft2, sip6_mask, 0x40, 16); +MLXSW_ITEM32(reg, rmft2, sip4_mask, 0x4C, 0, 32); + +/* reg_rmft2_flexible_action_set + * ACL action set. The only supported action types in this field and in any + * action-set pointed from here are as follows: + * 00h: ACTION_NULL + * 01h: ACTION_MAC_TTL, only TTL configuration is supported. + * 03h: ACTION_TRAP + * 06h: ACTION_QOS + * 08h: ACTION_POLICING_MONITORING + * 10h: ACTION_ROUTER_MC + * Access: RW + */ +MLXSW_ITEM_BUF(reg, rmft2, flexible_action_set, 0x80, + MLXSW_REG_FLEX_ACTION_SET_LEN); + +static inline void +mlxsw_reg_rmft2_common_pack(char *payload, bool v, u16 offset, + u16 virtual_router, + enum mlxsw_reg_rmft2_irif_mask irif_mask, u16 irif, + const char *flex_action_set) +{ + MLXSW_REG_ZERO(rmft2, payload); + mlxsw_reg_rmft2_v_set(payload, v); + mlxsw_reg_rmft2_op_set(payload, MLXSW_REG_RMFT2_OP_READ_WRITE); + mlxsw_reg_rmft2_offset_set(payload, offset); + mlxsw_reg_rmft2_virtual_router_set(payload, virtual_router); + mlxsw_reg_rmft2_irif_mask_set(payload, irif_mask); + mlxsw_reg_rmft2_irif_set(payload, irif); + if (flex_action_set) + mlxsw_reg_rmft2_flexible_action_set_memcpy_to(payload, + flex_action_set); +} + +static inline void +mlxsw_reg_rmft2_ipv4_pack(char *payload, bool v, u16 offset, u16 virtual_router, + enum mlxsw_reg_rmft2_irif_mask irif_mask, u16 irif, + u32 dip4, u32 dip4_mask, u32 sip4, u32 sip4_mask, + const char *flexible_action_set) +{ + mlxsw_reg_rmft2_common_pack(payload, v, offset, virtual_router, + irif_mask, irif, flexible_action_set); + mlxsw_reg_rmft2_type_set(payload, MLXSW_REG_RMFT2_TYPE_IPV4); + mlxsw_reg_rmft2_dip4_set(payload, dip4); + mlxsw_reg_rmft2_dip4_mask_set(payload, dip4_mask); + mlxsw_reg_rmft2_sip4_set(payload, sip4); + mlxsw_reg_rmft2_sip4_mask_set(payload, sip4_mask); +} + +static inline void +mlxsw_reg_rmft2_ipv6_pack(char *payload, bool v, u16 offset, u16 virtual_router, + enum mlxsw_reg_rmft2_irif_mask irif_mask, u16 irif, + struct in6_addr dip6, struct in6_addr dip6_mask, + struct in6_addr sip6, struct in6_addr sip6_mask, + const char *flexible_action_set) +{ + mlxsw_reg_rmft2_common_pack(payload, v, offset, virtual_router, + irif_mask, irif, flexible_action_set); + mlxsw_reg_rmft2_type_set(payload, MLXSW_REG_RMFT2_TYPE_IPV6); + mlxsw_reg_rmft2_dip6_memcpy_to(payload, (void *)&dip6); + mlxsw_reg_rmft2_dip6_mask_memcpy_to(payload, (void *)&dip6_mask); + mlxsw_reg_rmft2_sip6_memcpy_to(payload, (void *)&sip6); + mlxsw_reg_rmft2_sip6_mask_memcpy_to(payload, (void *)&sip6_mask); +} + +/* REIV - Router Egress Interface to VID Register + * ---------------------------------------------- + * The REIV register maps {eRIF, egress_port} -> VID. + * This mapping is done at the egress, after the ACLs. + * This mapping always takes effect after router, regardless of cast + * (for unicast/multicast/port-base multicast), regardless of eRIF type and + * regardless of bridge decisions (e.g. SFD for unicast or SMPE). + * Reserved when the RIF is a loopback RIF. + * + * Note: Reserved when legacy bridge model is used. + */ +#define MLXSW_REG_REIV_ID 0x8034 +#define MLXSW_REG_REIV_BASE_LEN 0x20 /* base length, without records */ +#define MLXSW_REG_REIV_REC_LEN 0x04 /* record length */ +#define MLXSW_REG_REIV_REC_MAX_COUNT 256 /* firmware limitation */ +#define MLXSW_REG_REIV_LEN (MLXSW_REG_REIV_BASE_LEN + \ + MLXSW_REG_REIV_REC_LEN * \ + MLXSW_REG_REIV_REC_MAX_COUNT) + +MLXSW_REG_DEFINE(reiv, MLXSW_REG_REIV_ID, MLXSW_REG_REIV_LEN); + +/* reg_reiv_port_page + * Port page - elport_record[0] is 256*port_page. + * Access: Index + */ +MLXSW_ITEM32(reg, reiv, port_page, 0x00, 0, 4); + +/* reg_reiv_erif + * Egress RIF. + * Range is 0..cap_max_router_interfaces-1. + * Access: Index + */ +MLXSW_ITEM32(reg, reiv, erif, 0x04, 0, 16); + +/* reg_reiv_rec_update + * Update enable (when write): + * 0 - Do not update the entry. + * 1 - Update the entry. + * Access: OP + */ +MLXSW_ITEM32_INDEXED(reg, reiv, rec_update, MLXSW_REG_REIV_BASE_LEN, 31, 1, + MLXSW_REG_REIV_REC_LEN, 0x00, false); + +/* reg_reiv_rec_evid + * Egress VID. + * Range is 0..4095. + * Access: RW + */ +MLXSW_ITEM32_INDEXED(reg, reiv, rec_evid, MLXSW_REG_REIV_BASE_LEN, 0, 12, + MLXSW_REG_REIV_REC_LEN, 0x00, false); + +static inline void mlxsw_reg_reiv_pack(char *payload, u8 port_page, u16 erif) +{ + MLXSW_REG_ZERO(reiv, payload); + mlxsw_reg_reiv_port_page_set(payload, port_page); + mlxsw_reg_reiv_erif_set(payload, erif); +} + +/* MFCR - Management Fan Control Register + * -------------------------------------- + * This register controls the settings of the Fan Speed PWM mechanism. + */ +#define MLXSW_REG_MFCR_ID 0x9001 +#define MLXSW_REG_MFCR_LEN 0x08 + +MLXSW_REG_DEFINE(mfcr, MLXSW_REG_MFCR_ID, MLXSW_REG_MFCR_LEN); + +enum mlxsw_reg_mfcr_pwm_frequency { + MLXSW_REG_MFCR_PWM_FEQ_11HZ = 0x00, + MLXSW_REG_MFCR_PWM_FEQ_14_7HZ = 0x01, + MLXSW_REG_MFCR_PWM_FEQ_22_1HZ = 0x02, + MLXSW_REG_MFCR_PWM_FEQ_1_4KHZ = 0x40, + MLXSW_REG_MFCR_PWM_FEQ_5KHZ = 0x41, + MLXSW_REG_MFCR_PWM_FEQ_20KHZ = 0x42, + MLXSW_REG_MFCR_PWM_FEQ_22_5KHZ = 0x43, + MLXSW_REG_MFCR_PWM_FEQ_25KHZ = 0x44, +}; + +/* reg_mfcr_pwm_frequency + * Controls the frequency of the PWM signal. + * Access: RW + */ +MLXSW_ITEM32(reg, mfcr, pwm_frequency, 0x00, 0, 7); + +#define MLXSW_MFCR_TACHOS_MAX 10 + +/* reg_mfcr_tacho_active + * Indicates which of the tachometer is active (bit per tachometer). + * Access: RO + */ +MLXSW_ITEM32(reg, mfcr, tacho_active, 0x04, 16, MLXSW_MFCR_TACHOS_MAX); + +#define MLXSW_MFCR_PWMS_MAX 5 + +/* reg_mfcr_pwm_active + * Indicates which of the PWM control is active (bit per PWM). + * Access: RO + */ +MLXSW_ITEM32(reg, mfcr, pwm_active, 0x04, 0, MLXSW_MFCR_PWMS_MAX); + +static inline void +mlxsw_reg_mfcr_pack(char *payload, + enum mlxsw_reg_mfcr_pwm_frequency pwm_frequency) +{ + MLXSW_REG_ZERO(mfcr, payload); + mlxsw_reg_mfcr_pwm_frequency_set(payload, pwm_frequency); +} + +static inline void +mlxsw_reg_mfcr_unpack(char *payload, + enum mlxsw_reg_mfcr_pwm_frequency *p_pwm_frequency, + u16 *p_tacho_active, u8 *p_pwm_active) +{ + *p_pwm_frequency = mlxsw_reg_mfcr_pwm_frequency_get(payload); + *p_tacho_active = mlxsw_reg_mfcr_tacho_active_get(payload); + *p_pwm_active = mlxsw_reg_mfcr_pwm_active_get(payload); +} + +/* MFSC - Management Fan Speed Control Register + * -------------------------------------------- + * This register controls the settings of the Fan Speed PWM mechanism. + */ +#define MLXSW_REG_MFSC_ID 0x9002 +#define MLXSW_REG_MFSC_LEN 0x08 + +MLXSW_REG_DEFINE(mfsc, MLXSW_REG_MFSC_ID, MLXSW_REG_MFSC_LEN); + +/* reg_mfsc_pwm + * Fan pwm to control / monitor. + * Access: Index + */ +MLXSW_ITEM32(reg, mfsc, pwm, 0x00, 24, 3); + +/* reg_mfsc_pwm_duty_cycle + * Controls the duty cycle of the PWM. Value range from 0..255 to + * represent duty cycle of 0%...100%. + * Access: RW + */ +MLXSW_ITEM32(reg, mfsc, pwm_duty_cycle, 0x04, 0, 8); + +static inline void mlxsw_reg_mfsc_pack(char *payload, u8 pwm, + u8 pwm_duty_cycle) +{ + MLXSW_REG_ZERO(mfsc, payload); + mlxsw_reg_mfsc_pwm_set(payload, pwm); + mlxsw_reg_mfsc_pwm_duty_cycle_set(payload, pwm_duty_cycle); +} + +/* MFSM - Management Fan Speed Measurement + * --------------------------------------- + * This register controls the settings of the Tacho measurements and + * enables reading the Tachometer measurements. + */ +#define MLXSW_REG_MFSM_ID 0x9003 +#define MLXSW_REG_MFSM_LEN 0x08 + +MLXSW_REG_DEFINE(mfsm, MLXSW_REG_MFSM_ID, MLXSW_REG_MFSM_LEN); + +/* reg_mfsm_tacho + * Fan tachometer index. + * Access: Index + */ +MLXSW_ITEM32(reg, mfsm, tacho, 0x00, 24, 4); + +/* reg_mfsm_rpm + * Fan speed (round per minute). + * Access: RO + */ +MLXSW_ITEM32(reg, mfsm, rpm, 0x04, 0, 16); + +static inline void mlxsw_reg_mfsm_pack(char *payload, u8 tacho) +{ + MLXSW_REG_ZERO(mfsm, payload); + mlxsw_reg_mfsm_tacho_set(payload, tacho); +} + +/* MFSL - Management Fan Speed Limit Register + * ------------------------------------------ + * The Fan Speed Limit register is used to configure the fan speed + * event / interrupt notification mechanism. Fan speed threshold are + * defined for both under-speed and over-speed. + */ +#define MLXSW_REG_MFSL_ID 0x9004 +#define MLXSW_REG_MFSL_LEN 0x0C + +MLXSW_REG_DEFINE(mfsl, MLXSW_REG_MFSL_ID, MLXSW_REG_MFSL_LEN); + +/* reg_mfsl_tacho + * Fan tachometer index. + * Access: Index + */ +MLXSW_ITEM32(reg, mfsl, tacho, 0x00, 24, 4); + +/* reg_mfsl_tach_min + * Tachometer minimum value (minimum RPM). + * Access: RW + */ +MLXSW_ITEM32(reg, mfsl, tach_min, 0x04, 0, 16); + +/* reg_mfsl_tach_max + * Tachometer maximum value (maximum RPM). + * Access: RW + */ +MLXSW_ITEM32(reg, mfsl, tach_max, 0x08, 0, 16); + +static inline void mlxsw_reg_mfsl_pack(char *payload, u8 tacho, + u16 tach_min, u16 tach_max) +{ + MLXSW_REG_ZERO(mfsl, payload); + mlxsw_reg_mfsl_tacho_set(payload, tacho); + mlxsw_reg_mfsl_tach_min_set(payload, tach_min); + mlxsw_reg_mfsl_tach_max_set(payload, tach_max); +} + +static inline void mlxsw_reg_mfsl_unpack(char *payload, u8 tacho, + u16 *p_tach_min, u16 *p_tach_max) +{ + if (p_tach_min) + *p_tach_min = mlxsw_reg_mfsl_tach_min_get(payload); + + if (p_tach_max) + *p_tach_max = mlxsw_reg_mfsl_tach_max_get(payload); +} + +/* FORE - Fan Out of Range Event Register + * -------------------------------------- + * This register reports the status of the controlled fans compared to the + * range defined by the MFSL register. + */ +#define MLXSW_REG_FORE_ID 0x9007 +#define MLXSW_REG_FORE_LEN 0x0C + +MLXSW_REG_DEFINE(fore, MLXSW_REG_FORE_ID, MLXSW_REG_FORE_LEN); + +/* fan_under_limit + * Fan speed is below the low limit defined in MFSL register. Each bit relates + * to a single tachometer and indicates the specific tachometer reading is + * below the threshold. + * Access: RO + */ +MLXSW_ITEM32(reg, fore, fan_under_limit, 0x00, 16, 10); + +static inline void mlxsw_reg_fore_unpack(char *payload, u8 tacho, + bool *fault) +{ + u16 limit; + + if (fault) { + limit = mlxsw_reg_fore_fan_under_limit_get(payload); + *fault = limit & BIT(tacho); + } +} + +/* MTCAP - Management Temperature Capabilities + * ------------------------------------------- + * This register exposes the capabilities of the device and + * system temperature sensing. + */ +#define MLXSW_REG_MTCAP_ID 0x9009 +#define MLXSW_REG_MTCAP_LEN 0x08 + +MLXSW_REG_DEFINE(mtcap, MLXSW_REG_MTCAP_ID, MLXSW_REG_MTCAP_LEN); + +/* reg_mtcap_sensor_count + * Number of sensors supported by the device. + * This includes the QSFP module sensors (if exists in the QSFP module). + * Access: RO + */ +MLXSW_ITEM32(reg, mtcap, sensor_count, 0x00, 0, 7); + +/* MTMP - Management Temperature + * ----------------------------- + * This register controls the settings of the temperature measurements + * and enables reading the temperature measurements. Note that temperature + * is in 0.125 degrees Celsius. + */ +#define MLXSW_REG_MTMP_ID 0x900A +#define MLXSW_REG_MTMP_LEN 0x20 + +MLXSW_REG_DEFINE(mtmp, MLXSW_REG_MTMP_ID, MLXSW_REG_MTMP_LEN); + +/* reg_mtmp_slot_index + * Slot index (0: Main board). + * Access: Index + */ +MLXSW_ITEM32(reg, mtmp, slot_index, 0x00, 16, 4); + +#define MLXSW_REG_MTMP_MODULE_INDEX_MIN 64 +#define MLXSW_REG_MTMP_GBOX_INDEX_MIN 256 +/* reg_mtmp_sensor_index + * Sensors index to access. + * 64-127 of sensor_index are mapped to the SFP+/QSFP modules sequentially + * (module 0 is mapped to sensor_index 64). + * Access: Index + */ +MLXSW_ITEM32(reg, mtmp, sensor_index, 0x00, 0, 12); + +/* Convert to milli degrees Celsius */ +#define MLXSW_REG_MTMP_TEMP_TO_MC(val) ({ typeof(val) v_ = (val); \ + ((v_) >= 0) ? ((v_) * 125) : \ + ((s16)((GENMASK(15, 0) + (v_) + 1) \ + * 125)); }) + +/* reg_mtmp_max_operational_temperature + * The highest temperature in the nominal operational range. Reading is in + * 0.125 Celsius degrees units. + * In case of module this is SFF critical temperature threshold. + * Access: RO + */ +MLXSW_ITEM32(reg, mtmp, max_operational_temperature, 0x04, 16, 16); + +/* reg_mtmp_temperature + * Temperature reading from the sensor. Reading is in 0.125 Celsius + * degrees units. + * Access: RO + */ +MLXSW_ITEM32(reg, mtmp, temperature, 0x04, 0, 16); + +/* reg_mtmp_mte + * Max Temperature Enable - enables measuring the max temperature on a sensor. + * Access: RW + */ +MLXSW_ITEM32(reg, mtmp, mte, 0x08, 31, 1); + +/* reg_mtmp_mtr + * Max Temperature Reset - clears the value of the max temperature register. + * Access: WO + */ +MLXSW_ITEM32(reg, mtmp, mtr, 0x08, 30, 1); + +/* reg_mtmp_max_temperature + * The highest measured temperature from the sensor. + * When the bit mte is cleared, the field max_temperature is reserved. + * Access: RO + */ +MLXSW_ITEM32(reg, mtmp, max_temperature, 0x08, 0, 16); + +/* reg_mtmp_tee + * Temperature Event Enable. + * 0 - Do not generate event + * 1 - Generate event + * 2 - Generate single event + * Access: RW + */ + +enum mlxsw_reg_mtmp_tee { + MLXSW_REG_MTMP_TEE_NO_EVENT, + MLXSW_REG_MTMP_TEE_GENERATE_EVENT, + MLXSW_REG_MTMP_TEE_GENERATE_SINGLE_EVENT, +}; + +MLXSW_ITEM32(reg, mtmp, tee, 0x0C, 30, 2); + +#define MLXSW_REG_MTMP_THRESH_HI 0x348 /* 105 Celsius */ + +/* reg_mtmp_temperature_threshold_hi + * High threshold for Temperature Warning Event. In 0.125 Celsius. + * Access: RW + */ +MLXSW_ITEM32(reg, mtmp, temperature_threshold_hi, 0x0C, 0, 16); + +#define MLXSW_REG_MTMP_HYSTERESIS_TEMP 0x28 /* 5 Celsius */ +/* reg_mtmp_temperature_threshold_lo + * Low threshold for Temperature Warning Event. In 0.125 Celsius. + * Access: RW + */ +MLXSW_ITEM32(reg, mtmp, temperature_threshold_lo, 0x10, 0, 16); + +#define MLXSW_REG_MTMP_SENSOR_NAME_SIZE 8 + +/* reg_mtmp_sensor_name + * Sensor Name + * Access: RO + */ +MLXSW_ITEM_BUF(reg, mtmp, sensor_name, 0x18, MLXSW_REG_MTMP_SENSOR_NAME_SIZE); + +static inline void mlxsw_reg_mtmp_pack(char *payload, u8 slot_index, + u16 sensor_index, bool max_temp_enable, + bool max_temp_reset) +{ + MLXSW_REG_ZERO(mtmp, payload); + mlxsw_reg_mtmp_slot_index_set(payload, slot_index); + mlxsw_reg_mtmp_sensor_index_set(payload, sensor_index); + mlxsw_reg_mtmp_mte_set(payload, max_temp_enable); + mlxsw_reg_mtmp_mtr_set(payload, max_temp_reset); + mlxsw_reg_mtmp_temperature_threshold_hi_set(payload, + MLXSW_REG_MTMP_THRESH_HI); +} + +static inline void mlxsw_reg_mtmp_unpack(char *payload, int *p_temp, + int *p_max_temp, int *p_temp_hi, + int *p_max_oper_temp, + char *sensor_name) +{ + s16 temp; + + if (p_temp) { + temp = mlxsw_reg_mtmp_temperature_get(payload); + *p_temp = MLXSW_REG_MTMP_TEMP_TO_MC(temp); + } + if (p_max_temp) { + temp = mlxsw_reg_mtmp_max_temperature_get(payload); + *p_max_temp = MLXSW_REG_MTMP_TEMP_TO_MC(temp); + } + if (p_temp_hi) { + temp = mlxsw_reg_mtmp_temperature_threshold_hi_get(payload); + *p_temp_hi = MLXSW_REG_MTMP_TEMP_TO_MC(temp); + } + if (p_max_oper_temp) { + temp = mlxsw_reg_mtmp_max_operational_temperature_get(payload); + *p_max_oper_temp = MLXSW_REG_MTMP_TEMP_TO_MC(temp); + } + if (sensor_name) + mlxsw_reg_mtmp_sensor_name_memcpy_from(payload, sensor_name); +} + +/* MTWE - Management Temperature Warning Event + * ------------------------------------------- + * This register is used for over temperature warning. + */ +#define MLXSW_REG_MTWE_ID 0x900B +#define MLXSW_REG_MTWE_LEN 0x10 + +MLXSW_REG_DEFINE(mtwe, MLXSW_REG_MTWE_ID, MLXSW_REG_MTWE_LEN); + +/* reg_mtwe_sensor_warning + * Bit vector indicating which of the sensor reading is above threshold. + * Address 00h bit31 is sensor_warning[127]. + * Address 0Ch bit0 is sensor_warning[0]. + * Access: RO + */ +MLXSW_ITEM_BIT_ARRAY(reg, mtwe, sensor_warning, 0x0, 0x10, 1); + +/* MTBR - Management Temperature Bulk Register + * ------------------------------------------- + * This register is used for bulk temperature reading. + */ +#define MLXSW_REG_MTBR_ID 0x900F +#define MLXSW_REG_MTBR_BASE_LEN 0x10 /* base length, without records */ +#define MLXSW_REG_MTBR_REC_LEN 0x04 /* record length */ +#define MLXSW_REG_MTBR_REC_MAX_COUNT 47 /* firmware limitation */ +#define MLXSW_REG_MTBR_LEN (MLXSW_REG_MTBR_BASE_LEN + \ + MLXSW_REG_MTBR_REC_LEN * \ + MLXSW_REG_MTBR_REC_MAX_COUNT) + +MLXSW_REG_DEFINE(mtbr, MLXSW_REG_MTBR_ID, MLXSW_REG_MTBR_LEN); + +/* reg_mtbr_slot_index + * Slot index (0: Main board). + * Access: Index + */ +MLXSW_ITEM32(reg, mtbr, slot_index, 0x00, 16, 4); + +/* reg_mtbr_base_sensor_index + * Base sensors index to access (0 - ASIC sensor, 1-63 - ambient sensors, + * 64-127 are mapped to the SFP+/QSFP modules sequentially). + * Access: Index + */ +MLXSW_ITEM32(reg, mtbr, base_sensor_index, 0x00, 0, 12); + +/* reg_mtbr_num_rec + * Request: Number of records to read + * Response: Number of records read + * See above description for more details. + * Range 1..255 + * Access: RW + */ +MLXSW_ITEM32(reg, mtbr, num_rec, 0x04, 0, 8); + +/* reg_mtbr_rec_max_temp + * The highest measured temperature from the sensor. + * When the bit mte is cleared, the field max_temperature is reserved. + * Access: RO + */ +MLXSW_ITEM32_INDEXED(reg, mtbr, rec_max_temp, MLXSW_REG_MTBR_BASE_LEN, 16, + 16, MLXSW_REG_MTBR_REC_LEN, 0x00, false); + +/* reg_mtbr_rec_temp + * Temperature reading from the sensor. Reading is in 0..125 Celsius + * degrees units. + * Access: RO + */ +MLXSW_ITEM32_INDEXED(reg, mtbr, rec_temp, MLXSW_REG_MTBR_BASE_LEN, 0, 16, + MLXSW_REG_MTBR_REC_LEN, 0x00, false); + +static inline void mlxsw_reg_mtbr_pack(char *payload, u8 slot_index, + u16 base_sensor_index, u8 num_rec) +{ + MLXSW_REG_ZERO(mtbr, payload); + mlxsw_reg_mtbr_slot_index_set(payload, slot_index); + mlxsw_reg_mtbr_base_sensor_index_set(payload, base_sensor_index); + mlxsw_reg_mtbr_num_rec_set(payload, num_rec); +} + +/* Error codes from temperatute reading */ +enum mlxsw_reg_mtbr_temp_status { + MLXSW_REG_MTBR_NO_CONN = 0x8000, + MLXSW_REG_MTBR_NO_TEMP_SENS = 0x8001, + MLXSW_REG_MTBR_INDEX_NA = 0x8002, + MLXSW_REG_MTBR_BAD_SENS_INFO = 0x8003, +}; + +/* Base index for reading modules temperature */ +#define MLXSW_REG_MTBR_BASE_MODULE_INDEX 64 + +static inline void mlxsw_reg_mtbr_temp_unpack(char *payload, int rec_ind, + u16 *p_temp, u16 *p_max_temp) +{ + if (p_temp) + *p_temp = mlxsw_reg_mtbr_rec_temp_get(payload, rec_ind); + if (p_max_temp) + *p_max_temp = mlxsw_reg_mtbr_rec_max_temp_get(payload, rec_ind); +} + +/* MCIA - Management Cable Info Access + * ----------------------------------- + * MCIA register is used to access the SFP+ and QSFP connector's EPROM. + */ + +#define MLXSW_REG_MCIA_ID 0x9014 +#define MLXSW_REG_MCIA_LEN 0x40 + +MLXSW_REG_DEFINE(mcia, MLXSW_REG_MCIA_ID, MLXSW_REG_MCIA_LEN); + +/* reg_mcia_l + * Lock bit. Setting this bit will lock the access to the specific + * cable. Used for updating a full page in a cable EPROM. Any access + * other then subsequence writes will fail while the port is locked. + * Access: RW + */ +MLXSW_ITEM32(reg, mcia, l, 0x00, 31, 1); + +/* reg_mcia_module + * Module number. + * Access: Index + */ +MLXSW_ITEM32(reg, mcia, module, 0x00, 16, 8); + +/* reg_mcia_slot_index + * Slot index (0: Main board) + * Access: Index + */ +MLXSW_ITEM32(reg, mcia, slot, 0x00, 12, 4); + +enum { + MLXSW_REG_MCIA_STATUS_GOOD = 0, + /* No response from module's EEPROM. */ + MLXSW_REG_MCIA_STATUS_NO_EEPROM_MODULE = 1, + /* Module type not supported by the device. */ + MLXSW_REG_MCIA_STATUS_MODULE_NOT_SUPPORTED = 2, + /* No module present indication. */ + MLXSW_REG_MCIA_STATUS_MODULE_NOT_CONNECTED = 3, + /* Error occurred while trying to access module's EEPROM using I2C. */ + MLXSW_REG_MCIA_STATUS_I2C_ERROR = 9, + /* Module is disabled. */ + MLXSW_REG_MCIA_STATUS_MODULE_DISABLED = 16, +}; + +/* reg_mcia_status + * Module status. + * Access: RO + */ +MLXSW_ITEM32(reg, mcia, status, 0x00, 0, 8); + +/* reg_mcia_i2c_device_address + * I2C device address. + * Access: RW + */ +MLXSW_ITEM32(reg, mcia, i2c_device_address, 0x04, 24, 8); + +/* reg_mcia_page_number + * Page number. + * Access: RW + */ +MLXSW_ITEM32(reg, mcia, page_number, 0x04, 16, 8); + +/* reg_mcia_device_address + * Device address. + * Access: RW + */ +MLXSW_ITEM32(reg, mcia, device_address, 0x04, 0, 16); + +/* reg_mcia_bank_number + * Bank number. + * Access: Index + */ +MLXSW_ITEM32(reg, mcia, bank_number, 0x08, 16, 8); + +/* reg_mcia_size + * Number of bytes to read/write (up to 48 bytes). + * Access: RW + */ +MLXSW_ITEM32(reg, mcia, size, 0x08, 0, 16); + +#define MLXSW_REG_MCIA_EEPROM_PAGE_LENGTH 256 +#define MLXSW_REG_MCIA_EEPROM_UP_PAGE_LENGTH 128 +#define MLXSW_REG_MCIA_EEPROM_SIZE 48 +#define MLXSW_REG_MCIA_I2C_ADDR_LOW 0x50 +#define MLXSW_REG_MCIA_I2C_ADDR_HIGH 0x51 +#define MLXSW_REG_MCIA_PAGE0_LO_OFF 0xa0 +#define MLXSW_REG_MCIA_TH_ITEM_SIZE 2 +#define MLXSW_REG_MCIA_TH_PAGE_NUM 3 +#define MLXSW_REG_MCIA_TH_PAGE_CMIS_NUM 2 +#define MLXSW_REG_MCIA_PAGE0_LO 0 +#define MLXSW_REG_MCIA_TH_PAGE_OFF 0x80 +#define MLXSW_REG_MCIA_EEPROM_CMIS_FLAT_MEMORY BIT(7) + +enum mlxsw_reg_mcia_eeprom_module_info_rev_id { + MLXSW_REG_MCIA_EEPROM_MODULE_INFO_REV_ID_UNSPC = 0x00, + MLXSW_REG_MCIA_EEPROM_MODULE_INFO_REV_ID_8436 = 0x01, + MLXSW_REG_MCIA_EEPROM_MODULE_INFO_REV_ID_8636 = 0x03, +}; + +enum mlxsw_reg_mcia_eeprom_module_info_id { + MLXSW_REG_MCIA_EEPROM_MODULE_INFO_ID_SFP = 0x03, + MLXSW_REG_MCIA_EEPROM_MODULE_INFO_ID_QSFP = 0x0C, + MLXSW_REG_MCIA_EEPROM_MODULE_INFO_ID_QSFP_PLUS = 0x0D, + MLXSW_REG_MCIA_EEPROM_MODULE_INFO_ID_QSFP28 = 0x11, + MLXSW_REG_MCIA_EEPROM_MODULE_INFO_ID_QSFP_DD = 0x18, + MLXSW_REG_MCIA_EEPROM_MODULE_INFO_ID_OSFP = 0x19, +}; + +enum mlxsw_reg_mcia_eeprom_module_info { + MLXSW_REG_MCIA_EEPROM_MODULE_INFO_ID, + MLXSW_REG_MCIA_EEPROM_MODULE_INFO_REV_ID, + MLXSW_REG_MCIA_EEPROM_MODULE_INFO_TYPE_ID, + MLXSW_REG_MCIA_EEPROM_MODULE_INFO_SIZE, +}; + +/* reg_mcia_eeprom + * Bytes to read/write. + * Access: RW + */ +MLXSW_ITEM_BUF(reg, mcia, eeprom, 0x10, MLXSW_REG_MCIA_EEPROM_SIZE); + +/* This is used to access the optional upper pages (1-3) in the QSFP+ + * memory map. Page 1 is available on offset 256 through 383, page 2 - + * on offset 384 through 511, page 3 - on offset 512 through 639. + */ +#define MLXSW_REG_MCIA_PAGE_GET(off) (((off) - \ + MLXSW_REG_MCIA_EEPROM_PAGE_LENGTH) / \ + MLXSW_REG_MCIA_EEPROM_UP_PAGE_LENGTH + 1) + +static inline void mlxsw_reg_mcia_pack(char *payload, u8 slot_index, u8 module, + u8 lock, u8 page_number, + u16 device_addr, u8 size, + u8 i2c_device_addr) +{ + MLXSW_REG_ZERO(mcia, payload); + mlxsw_reg_mcia_slot_set(payload, slot_index); + mlxsw_reg_mcia_module_set(payload, module); + mlxsw_reg_mcia_l_set(payload, lock); + mlxsw_reg_mcia_page_number_set(payload, page_number); + mlxsw_reg_mcia_device_address_set(payload, device_addr); + mlxsw_reg_mcia_size_set(payload, size); + mlxsw_reg_mcia_i2c_device_address_set(payload, i2c_device_addr); +} + +/* MPAT - Monitoring Port Analyzer Table + * ------------------------------------- + * MPAT Register is used to query and configure the Switch PortAnalyzer Table. + * For an enabled analyzer, all fields except e (enable) cannot be modified. + */ +#define MLXSW_REG_MPAT_ID 0x901A +#define MLXSW_REG_MPAT_LEN 0x78 + +MLXSW_REG_DEFINE(mpat, MLXSW_REG_MPAT_ID, MLXSW_REG_MPAT_LEN); + +/* reg_mpat_pa_id + * Port Analyzer ID. + * Access: Index + */ +MLXSW_ITEM32(reg, mpat, pa_id, 0x00, 28, 4); + +/* reg_mpat_session_id + * Mirror Session ID. + * Used for MIRROR_SESSION<i> trap. + * Access: RW + */ +MLXSW_ITEM32(reg, mpat, session_id, 0x00, 24, 4); + +/* reg_mpat_system_port + * A unique port identifier for the final destination of the packet. + * Access: RW + */ +MLXSW_ITEM32(reg, mpat, system_port, 0x00, 0, 16); + +/* reg_mpat_e + * Enable. Indicating the Port Analyzer is enabled. + * Access: RW + */ +MLXSW_ITEM32(reg, mpat, e, 0x04, 31, 1); + +/* reg_mpat_qos + * Quality Of Service Mode. + * 0: CONFIGURED - QoS parameters (Switch Priority, and encapsulation + * PCP, DEI, DSCP or VL) are configured. + * 1: MAINTAIN - QoS parameters (Switch Priority, Color) are the + * same as in the original packet that has triggered the mirroring. For + * SPAN also the pcp,dei are maintained. + * Access: RW + */ +MLXSW_ITEM32(reg, mpat, qos, 0x04, 26, 1); + +/* reg_mpat_be + * Best effort mode. Indicates mirroring traffic should not cause packet + * drop or back pressure, but will discard the mirrored packets. Mirrored + * packets will be forwarded on a best effort manner. + * 0: Do not discard mirrored packets + * 1: Discard mirrored packets if causing congestion + * Access: RW + */ +MLXSW_ITEM32(reg, mpat, be, 0x04, 25, 1); + +enum mlxsw_reg_mpat_span_type { + /* Local SPAN Ethernet. + * The original packet is not encapsulated. + */ + MLXSW_REG_MPAT_SPAN_TYPE_LOCAL_ETH = 0x0, + + /* Remote SPAN Ethernet VLAN. + * The packet is forwarded to the monitoring port on the monitoring + * VLAN. + */ + MLXSW_REG_MPAT_SPAN_TYPE_REMOTE_ETH = 0x1, + + /* Encapsulated Remote SPAN Ethernet L3 GRE. + * The packet is encapsulated with GRE header. + */ + MLXSW_REG_MPAT_SPAN_TYPE_REMOTE_ETH_L3 = 0x3, +}; + +/* reg_mpat_span_type + * SPAN type. + * Access: RW + */ +MLXSW_ITEM32(reg, mpat, span_type, 0x04, 0, 4); + +/* reg_mpat_pide + * Policer enable. + * Access: RW + */ +MLXSW_ITEM32(reg, mpat, pide, 0x0C, 15, 1); + +/* reg_mpat_pid + * Policer ID. + * Access: RW + */ +MLXSW_ITEM32(reg, mpat, pid, 0x0C, 0, 14); + +/* Remote SPAN - Ethernet VLAN + * - - - - - - - - - - - - - - + */ + +/* reg_mpat_eth_rspan_vid + * Encapsulation header VLAN ID. + * Access: RW + */ +MLXSW_ITEM32(reg, mpat, eth_rspan_vid, 0x18, 0, 12); + +/* Encapsulated Remote SPAN - Ethernet L2 + * - - - - - - - - - - - - - - - - - - - + */ + +enum mlxsw_reg_mpat_eth_rspan_version { + MLXSW_REG_MPAT_ETH_RSPAN_VERSION_NO_HEADER = 15, +}; + +/* reg_mpat_eth_rspan_version + * RSPAN mirror header version. + * Access: RW + */ +MLXSW_ITEM32(reg, mpat, eth_rspan_version, 0x10, 18, 4); + +/* reg_mpat_eth_rspan_mac + * Destination MAC address. + * Access: RW + */ +MLXSW_ITEM_BUF(reg, mpat, eth_rspan_mac, 0x12, 6); + +/* reg_mpat_eth_rspan_tp + * Tag Packet. Indicates whether the mirroring header should be VLAN tagged. + * Access: RW + */ +MLXSW_ITEM32(reg, mpat, eth_rspan_tp, 0x18, 16, 1); + +/* Encapsulated Remote SPAN - Ethernet L3 + * - - - - - - - - - - - - - - - - - - - + */ + +enum mlxsw_reg_mpat_eth_rspan_protocol { + MLXSW_REG_MPAT_ETH_RSPAN_PROTOCOL_IPV4, + MLXSW_REG_MPAT_ETH_RSPAN_PROTOCOL_IPV6, +}; + +/* reg_mpat_eth_rspan_protocol + * SPAN encapsulation protocol. + * Access: RW + */ +MLXSW_ITEM32(reg, mpat, eth_rspan_protocol, 0x18, 24, 4); + +/* reg_mpat_eth_rspan_ttl + * Encapsulation header Time-to-Live/HopLimit. + * Access: RW + */ +MLXSW_ITEM32(reg, mpat, eth_rspan_ttl, 0x1C, 4, 8); + +/* reg_mpat_eth_rspan_smac + * Source MAC address + * Access: RW + */ +MLXSW_ITEM_BUF(reg, mpat, eth_rspan_smac, 0x22, 6); + +/* reg_mpat_eth_rspan_dip* + * Destination IP address. The IP version is configured by protocol. + * Access: RW + */ +MLXSW_ITEM32(reg, mpat, eth_rspan_dip4, 0x4C, 0, 32); +MLXSW_ITEM_BUF(reg, mpat, eth_rspan_dip6, 0x40, 16); + +/* reg_mpat_eth_rspan_sip* + * Source IP address. The IP version is configured by protocol. + * Access: RW + */ +MLXSW_ITEM32(reg, mpat, eth_rspan_sip4, 0x5C, 0, 32); +MLXSW_ITEM_BUF(reg, mpat, eth_rspan_sip6, 0x50, 16); + +static inline void mlxsw_reg_mpat_pack(char *payload, u8 pa_id, + u16 system_port, bool e, + enum mlxsw_reg_mpat_span_type span_type) +{ + MLXSW_REG_ZERO(mpat, payload); + mlxsw_reg_mpat_pa_id_set(payload, pa_id); + mlxsw_reg_mpat_system_port_set(payload, system_port); + mlxsw_reg_mpat_e_set(payload, e); + mlxsw_reg_mpat_qos_set(payload, 1); + mlxsw_reg_mpat_be_set(payload, 1); + mlxsw_reg_mpat_span_type_set(payload, span_type); +} + +static inline void mlxsw_reg_mpat_eth_rspan_pack(char *payload, u16 vid) +{ + mlxsw_reg_mpat_eth_rspan_vid_set(payload, vid); +} + +static inline void +mlxsw_reg_mpat_eth_rspan_l2_pack(char *payload, + enum mlxsw_reg_mpat_eth_rspan_version version, + const char *mac, + bool tp) +{ + mlxsw_reg_mpat_eth_rspan_version_set(payload, version); + mlxsw_reg_mpat_eth_rspan_mac_memcpy_to(payload, mac); + mlxsw_reg_mpat_eth_rspan_tp_set(payload, tp); +} + +static inline void +mlxsw_reg_mpat_eth_rspan_l3_ipv4_pack(char *payload, u8 ttl, + const char *smac, + u32 sip, u32 dip) +{ + mlxsw_reg_mpat_eth_rspan_ttl_set(payload, ttl); + mlxsw_reg_mpat_eth_rspan_smac_memcpy_to(payload, smac); + mlxsw_reg_mpat_eth_rspan_protocol_set(payload, + MLXSW_REG_MPAT_ETH_RSPAN_PROTOCOL_IPV4); + mlxsw_reg_mpat_eth_rspan_sip4_set(payload, sip); + mlxsw_reg_mpat_eth_rspan_dip4_set(payload, dip); +} + +static inline void +mlxsw_reg_mpat_eth_rspan_l3_ipv6_pack(char *payload, u8 ttl, + const char *smac, + struct in6_addr sip, struct in6_addr dip) +{ + mlxsw_reg_mpat_eth_rspan_ttl_set(payload, ttl); + mlxsw_reg_mpat_eth_rspan_smac_memcpy_to(payload, smac); + mlxsw_reg_mpat_eth_rspan_protocol_set(payload, + MLXSW_REG_MPAT_ETH_RSPAN_PROTOCOL_IPV6); + mlxsw_reg_mpat_eth_rspan_sip6_memcpy_to(payload, (void *)&sip); + mlxsw_reg_mpat_eth_rspan_dip6_memcpy_to(payload, (void *)&dip); +} + +/* MPAR - Monitoring Port Analyzer Register + * ---------------------------------------- + * MPAR register is used to query and configure the port analyzer port mirroring + * properties. + */ +#define MLXSW_REG_MPAR_ID 0x901B +#define MLXSW_REG_MPAR_LEN 0x0C + +MLXSW_REG_DEFINE(mpar, MLXSW_REG_MPAR_ID, MLXSW_REG_MPAR_LEN); + +/* reg_mpar_local_port + * The local port to mirror the packets from. + * Access: Index + */ +MLXSW_ITEM32_LP(reg, mpar, 0x00, 16, 0x00, 4); + +enum mlxsw_reg_mpar_i_e { + MLXSW_REG_MPAR_TYPE_EGRESS, + MLXSW_REG_MPAR_TYPE_INGRESS, +}; + +/* reg_mpar_i_e + * Ingress/Egress + * Access: Index + */ +MLXSW_ITEM32(reg, mpar, i_e, 0x00, 0, 4); + +/* reg_mpar_enable + * Enable mirroring + * By default, port mirroring is disabled for all ports. + * Access: RW + */ +MLXSW_ITEM32(reg, mpar, enable, 0x04, 31, 1); + +/* reg_mpar_pa_id + * Port Analyzer ID. + * Access: RW + */ +MLXSW_ITEM32(reg, mpar, pa_id, 0x04, 0, 4); + +#define MLXSW_REG_MPAR_RATE_MAX 3500000000UL + +/* reg_mpar_probability_rate + * Sampling rate. + * Valid values are: 1 to 3.5*10^9 + * Value of 1 means "sample all". Default is 1. + * Reserved when Spectrum-1. + * Access: RW + */ +MLXSW_ITEM32(reg, mpar, probability_rate, 0x08, 0, 32); + +static inline void mlxsw_reg_mpar_pack(char *payload, u16 local_port, + enum mlxsw_reg_mpar_i_e i_e, + bool enable, u8 pa_id, + u32 probability_rate) +{ + MLXSW_REG_ZERO(mpar, payload); + mlxsw_reg_mpar_local_port_set(payload, local_port); + mlxsw_reg_mpar_enable_set(payload, enable); + mlxsw_reg_mpar_i_e_set(payload, i_e); + mlxsw_reg_mpar_pa_id_set(payload, pa_id); + mlxsw_reg_mpar_probability_rate_set(payload, probability_rate); +} + +/* MGIR - Management General Information Register + * ---------------------------------------------- + * MGIR register allows software to query the hardware and firmware general + * information. + */ +#define MLXSW_REG_MGIR_ID 0x9020 +#define MLXSW_REG_MGIR_LEN 0x9C + +MLXSW_REG_DEFINE(mgir, MLXSW_REG_MGIR_ID, MLXSW_REG_MGIR_LEN); + +/* reg_mgir_hw_info_device_hw_revision + * Access: RO + */ +MLXSW_ITEM32(reg, mgir, hw_info_device_hw_revision, 0x0, 16, 16); + +#define MLXSW_REG_MGIR_FW_INFO_PSID_SIZE 16 + +/* reg_mgir_fw_info_psid + * PSID (ASCII string). + * Access: RO + */ +MLXSW_ITEM_BUF(reg, mgir, fw_info_psid, 0x30, MLXSW_REG_MGIR_FW_INFO_PSID_SIZE); + +/* reg_mgir_fw_info_extended_major + * Access: RO + */ +MLXSW_ITEM32(reg, mgir, fw_info_extended_major, 0x44, 0, 32); + +/* reg_mgir_fw_info_extended_minor + * Access: RO + */ +MLXSW_ITEM32(reg, mgir, fw_info_extended_minor, 0x48, 0, 32); + +/* reg_mgir_fw_info_extended_sub_minor + * Access: RO + */ +MLXSW_ITEM32(reg, mgir, fw_info_extended_sub_minor, 0x4C, 0, 32); + +static inline void mlxsw_reg_mgir_pack(char *payload) +{ + MLXSW_REG_ZERO(mgir, payload); +} + +static inline void +mlxsw_reg_mgir_unpack(char *payload, u32 *hw_rev, char *fw_info_psid, + u32 *fw_major, u32 *fw_minor, u32 *fw_sub_minor) +{ + *hw_rev = mlxsw_reg_mgir_hw_info_device_hw_revision_get(payload); + mlxsw_reg_mgir_fw_info_psid_memcpy_from(payload, fw_info_psid); + *fw_major = mlxsw_reg_mgir_fw_info_extended_major_get(payload); + *fw_minor = mlxsw_reg_mgir_fw_info_extended_minor_get(payload); + *fw_sub_minor = mlxsw_reg_mgir_fw_info_extended_sub_minor_get(payload); +} + +/* MRSR - Management Reset and Shutdown Register + * --------------------------------------------- + * MRSR register is used to reset or shutdown the switch or + * the entire system (when applicable). + */ +#define MLXSW_REG_MRSR_ID 0x9023 +#define MLXSW_REG_MRSR_LEN 0x08 + +MLXSW_REG_DEFINE(mrsr, MLXSW_REG_MRSR_ID, MLXSW_REG_MRSR_LEN); + +/* reg_mrsr_command + * Reset/shutdown command + * 0 - do nothing + * 1 - software reset + * Access: WO + */ +MLXSW_ITEM32(reg, mrsr, command, 0x00, 0, 4); + +static inline void mlxsw_reg_mrsr_pack(char *payload) +{ + MLXSW_REG_ZERO(mrsr, payload); + mlxsw_reg_mrsr_command_set(payload, 1); +} + +/* MLCR - Management LED Control Register + * -------------------------------------- + * Controls the system LEDs. + */ +#define MLXSW_REG_MLCR_ID 0x902B +#define MLXSW_REG_MLCR_LEN 0x0C + +MLXSW_REG_DEFINE(mlcr, MLXSW_REG_MLCR_ID, MLXSW_REG_MLCR_LEN); + +/* reg_mlcr_local_port + * Local port number. + * Access: RW + */ +MLXSW_ITEM32_LP(reg, mlcr, 0x00, 16, 0x00, 24); + +#define MLXSW_REG_MLCR_DURATION_MAX 0xFFFF + +/* reg_mlcr_beacon_duration + * Duration of the beacon to be active, in seconds. + * 0x0 - Will turn off the beacon. + * 0xFFFF - Will turn on the beacon until explicitly turned off. + * Access: RW + */ +MLXSW_ITEM32(reg, mlcr, beacon_duration, 0x04, 0, 16); + +/* reg_mlcr_beacon_remain + * Remaining duration of the beacon, in seconds. + * 0xFFFF indicates an infinite amount of time. + * Access: RO + */ +MLXSW_ITEM32(reg, mlcr, beacon_remain, 0x08, 0, 16); + +static inline void mlxsw_reg_mlcr_pack(char *payload, u16 local_port, + bool active) +{ + MLXSW_REG_ZERO(mlcr, payload); + mlxsw_reg_mlcr_local_port_set(payload, local_port); + mlxsw_reg_mlcr_beacon_duration_set(payload, active ? + MLXSW_REG_MLCR_DURATION_MAX : 0); +} + +/* MCION - Management Cable IO and Notifications Register + * ------------------------------------------------------ + * The MCION register is used to query transceiver modules' IO pins and other + * notifications. + */ +#define MLXSW_REG_MCION_ID 0x9052 +#define MLXSW_REG_MCION_LEN 0x18 + +MLXSW_REG_DEFINE(mcion, MLXSW_REG_MCION_ID, MLXSW_REG_MCION_LEN); + +/* reg_mcion_module + * Module number. + * Access: Index + */ +MLXSW_ITEM32(reg, mcion, module, 0x00, 16, 8); + +/* reg_mcion_slot_index + * Slot index. + * Access: Index + */ +MLXSW_ITEM32(reg, mcion, slot_index, 0x00, 12, 4); + +enum { + MLXSW_REG_MCION_MODULE_STATUS_BITS_PRESENT_MASK = BIT(0), + MLXSW_REG_MCION_MODULE_STATUS_BITS_LOW_POWER_MASK = BIT(8), +}; + +/* reg_mcion_module_status_bits + * Module IO status as defined by SFF. + * Access: RO + */ +MLXSW_ITEM32(reg, mcion, module_status_bits, 0x04, 0, 16); + +static inline void mlxsw_reg_mcion_pack(char *payload, u8 slot_index, u8 module) +{ + MLXSW_REG_ZERO(mcion, payload); + mlxsw_reg_mcion_slot_index_set(payload, slot_index); + mlxsw_reg_mcion_module_set(payload, module); +} + +/* MTPPS - Management Pulse Per Second Register + * -------------------------------------------- + * This register provides the device PPS capabilities, configure the PPS in and + * out modules and holds the PPS in time stamp. + */ +#define MLXSW_REG_MTPPS_ID 0x9053 +#define MLXSW_REG_MTPPS_LEN 0x3C + +MLXSW_REG_DEFINE(mtpps, MLXSW_REG_MTPPS_ID, MLXSW_REG_MTPPS_LEN); + +/* reg_mtpps_enable + * Enables the PPS functionality the specific pin. + * A boolean variable. + * Access: RW + */ +MLXSW_ITEM32(reg, mtpps, enable, 0x20, 31, 1); + +enum mlxsw_reg_mtpps_pin_mode { + MLXSW_REG_MTPPS_PIN_MODE_VIRTUAL_PIN = 0x2, +}; + +/* reg_mtpps_pin_mode + * Pin mode to be used. The mode must comply with the supported modes of the + * requested pin. + * Access: RW + */ +MLXSW_ITEM32(reg, mtpps, pin_mode, 0x20, 8, 4); + +#define MLXSW_REG_MTPPS_PIN_SP_VIRTUAL_PIN 7 + +/* reg_mtpps_pin + * Pin to be configured or queried out of the supported pins. + * Access: Index + */ +MLXSW_ITEM32(reg, mtpps, pin, 0x20, 0, 8); + +/* reg_mtpps_time_stamp + * When pin_mode = pps_in, the latched device time when it was triggered from + * the external GPIO pin. + * When pin_mode = pps_out or virtual_pin or pps_out_and_virtual_pin, the target + * time to generate next output signal. + * Time is in units of device clock. + * Access: RW + */ +MLXSW_ITEM64(reg, mtpps, time_stamp, 0x28, 0, 64); + +static inline void +mlxsw_reg_mtpps_vpin_pack(char *payload, u64 time_stamp) +{ + MLXSW_REG_ZERO(mtpps, payload); + mlxsw_reg_mtpps_pin_set(payload, MLXSW_REG_MTPPS_PIN_SP_VIRTUAL_PIN); + mlxsw_reg_mtpps_pin_mode_set(payload, + MLXSW_REG_MTPPS_PIN_MODE_VIRTUAL_PIN); + mlxsw_reg_mtpps_enable_set(payload, true); + mlxsw_reg_mtpps_time_stamp_set(payload, time_stamp); +} + +/* MTUTC - Management UTC Register + * ------------------------------- + * Configures the HW UTC counter. + */ +#define MLXSW_REG_MTUTC_ID 0x9055 +#define MLXSW_REG_MTUTC_LEN 0x1C + +MLXSW_REG_DEFINE(mtutc, MLXSW_REG_MTUTC_ID, MLXSW_REG_MTUTC_LEN); + +enum mlxsw_reg_mtutc_operation { + MLXSW_REG_MTUTC_OPERATION_SET_TIME_AT_NEXT_SEC = 0, + MLXSW_REG_MTUTC_OPERATION_SET_TIME_IMMEDIATE = 1, + MLXSW_REG_MTUTC_OPERATION_ADJUST_TIME = 2, + MLXSW_REG_MTUTC_OPERATION_ADJUST_FREQ = 3, +}; + +/* reg_mtutc_operation + * Operation. + * Access: OP + */ +MLXSW_ITEM32(reg, mtutc, operation, 0x00, 0, 4); + +/* reg_mtutc_freq_adjustment + * Frequency adjustment: Every PPS the HW frequency will be + * adjusted by this value. Units of HW clock, where HW counts + * 10^9 HW clocks for 1 HW second. Range is from -50,000,000 to +50,000,000. + * In Spectrum-2, the field is reversed, positive values mean to decrease the + * frequency. + * Access: RW + */ +MLXSW_ITEM32(reg, mtutc, freq_adjustment, 0x04, 0, 32); + +#define MLXSW_REG_MTUTC_MAX_FREQ_ADJ (50 * 1000 * 1000) + +/* reg_mtutc_utc_sec + * UTC seconds. + * Access: WO + */ +MLXSW_ITEM32(reg, mtutc, utc_sec, 0x10, 0, 32); + +/* reg_mtutc_utc_nsec + * UTC nSecs. + * Range 0..(10^9-1) + * Updated when operation is SET_TIME_IMMEDIATE. + * Reserved on Spectrum-1. + * Access: WO + */ +MLXSW_ITEM32(reg, mtutc, utc_nsec, 0x14, 0, 30); + +/* reg_mtutc_time_adjustment + * Time adjustment. + * Units of nSec. + * Range is from -32768 to +32767. + * Updated when operation is ADJUST_TIME. + * Reserved on Spectrum-1. + * Access: WO + */ +MLXSW_ITEM32(reg, mtutc, time_adjustment, 0x18, 0, 32); + +static inline void +mlxsw_reg_mtutc_pack(char *payload, enum mlxsw_reg_mtutc_operation oper, + u32 freq_adj, u32 utc_sec, u32 utc_nsec, u32 time_adj) +{ + MLXSW_REG_ZERO(mtutc, payload); + mlxsw_reg_mtutc_operation_set(payload, oper); + mlxsw_reg_mtutc_freq_adjustment_set(payload, freq_adj); + mlxsw_reg_mtutc_utc_sec_set(payload, utc_sec); + mlxsw_reg_mtutc_utc_nsec_set(payload, utc_nsec); + mlxsw_reg_mtutc_time_adjustment_set(payload, time_adj); +} + +/* MCQI - Management Component Query Information + * --------------------------------------------- + * This register allows querying information about firmware components. + */ +#define MLXSW_REG_MCQI_ID 0x9061 +#define MLXSW_REG_MCQI_BASE_LEN 0x18 +#define MLXSW_REG_MCQI_CAP_LEN 0x14 +#define MLXSW_REG_MCQI_LEN (MLXSW_REG_MCQI_BASE_LEN + MLXSW_REG_MCQI_CAP_LEN) + +MLXSW_REG_DEFINE(mcqi, MLXSW_REG_MCQI_ID, MLXSW_REG_MCQI_LEN); + +/* reg_mcqi_component_index + * Index of the accessed component. + * Access: Index + */ +MLXSW_ITEM32(reg, mcqi, component_index, 0x00, 0, 16); + +enum mlxfw_reg_mcqi_info_type { + MLXSW_REG_MCQI_INFO_TYPE_CAPABILITIES, +}; + +/* reg_mcqi_info_type + * Component properties set. + * Access: RW + */ +MLXSW_ITEM32(reg, mcqi, info_type, 0x08, 0, 5); + +/* reg_mcqi_offset + * The requested/returned data offset from the section start, given in bytes. + * Must be DWORD aligned. + * Access: RW + */ +MLXSW_ITEM32(reg, mcqi, offset, 0x10, 0, 32); + +/* reg_mcqi_data_size + * The requested/returned data size, given in bytes. If data_size is not DWORD + * aligned, the last bytes are zero padded. + * Access: RW + */ +MLXSW_ITEM32(reg, mcqi, data_size, 0x14, 0, 16); + +/* reg_mcqi_cap_max_component_size + * Maximum size for this component, given in bytes. + * Access: RO + */ +MLXSW_ITEM32(reg, mcqi, cap_max_component_size, 0x20, 0, 32); + +/* reg_mcqi_cap_log_mcda_word_size + * Log 2 of the access word size in bytes. Read and write access must be aligned + * to the word size. Write access must be done for an integer number of words. + * Access: RO + */ +MLXSW_ITEM32(reg, mcqi, cap_log_mcda_word_size, 0x24, 28, 4); + +/* reg_mcqi_cap_mcda_max_write_size + * Maximal write size for MCDA register + * Access: RO + */ +MLXSW_ITEM32(reg, mcqi, cap_mcda_max_write_size, 0x24, 0, 16); + +static inline void mlxsw_reg_mcqi_pack(char *payload, u16 component_index) +{ + MLXSW_REG_ZERO(mcqi, payload); + mlxsw_reg_mcqi_component_index_set(payload, component_index); + mlxsw_reg_mcqi_info_type_set(payload, + MLXSW_REG_MCQI_INFO_TYPE_CAPABILITIES); + mlxsw_reg_mcqi_offset_set(payload, 0); + mlxsw_reg_mcqi_data_size_set(payload, MLXSW_REG_MCQI_CAP_LEN); +} + +static inline void mlxsw_reg_mcqi_unpack(char *payload, + u32 *p_cap_max_component_size, + u8 *p_cap_log_mcda_word_size, + u16 *p_cap_mcda_max_write_size) +{ + *p_cap_max_component_size = + mlxsw_reg_mcqi_cap_max_component_size_get(payload); + *p_cap_log_mcda_word_size = + mlxsw_reg_mcqi_cap_log_mcda_word_size_get(payload); + *p_cap_mcda_max_write_size = + mlxsw_reg_mcqi_cap_mcda_max_write_size_get(payload); +} + +/* MCC - Management Component Control + * ---------------------------------- + * Controls the firmware component and updates the FSM. + */ +#define MLXSW_REG_MCC_ID 0x9062 +#define MLXSW_REG_MCC_LEN 0x1C + +MLXSW_REG_DEFINE(mcc, MLXSW_REG_MCC_ID, MLXSW_REG_MCC_LEN); + +enum mlxsw_reg_mcc_instruction { + MLXSW_REG_MCC_INSTRUCTION_LOCK_UPDATE_HANDLE = 0x01, + MLXSW_REG_MCC_INSTRUCTION_RELEASE_UPDATE_HANDLE = 0x02, + MLXSW_REG_MCC_INSTRUCTION_UPDATE_COMPONENT = 0x03, + MLXSW_REG_MCC_INSTRUCTION_VERIFY_COMPONENT = 0x04, + MLXSW_REG_MCC_INSTRUCTION_ACTIVATE = 0x06, + MLXSW_REG_MCC_INSTRUCTION_CANCEL = 0x08, +}; + +/* reg_mcc_instruction + * Command to be executed by the FSM. + * Applicable for write operation only. + * Access: RW + */ +MLXSW_ITEM32(reg, mcc, instruction, 0x00, 0, 8); + +/* reg_mcc_component_index + * Index of the accessed component. Applicable only for commands that + * refer to components. Otherwise, this field is reserved. + * Access: Index + */ +MLXSW_ITEM32(reg, mcc, component_index, 0x04, 0, 16); + +/* reg_mcc_update_handle + * Token representing the current flow executed by the FSM. + * Access: WO + */ +MLXSW_ITEM32(reg, mcc, update_handle, 0x08, 0, 24); + +/* reg_mcc_error_code + * Indicates the successful completion of the instruction, or the reason it + * failed + * Access: RO + */ +MLXSW_ITEM32(reg, mcc, error_code, 0x0C, 8, 8); + +/* reg_mcc_control_state + * Current FSM state + * Access: RO + */ +MLXSW_ITEM32(reg, mcc, control_state, 0x0C, 0, 4); + +/* reg_mcc_component_size + * Component size in bytes. Valid for UPDATE_COMPONENT instruction. Specifying + * the size may shorten the update time. Value 0x0 means that size is + * unspecified. + * Access: WO + */ +MLXSW_ITEM32(reg, mcc, component_size, 0x10, 0, 32); + +static inline void mlxsw_reg_mcc_pack(char *payload, + enum mlxsw_reg_mcc_instruction instr, + u16 component_index, u32 update_handle, + u32 component_size) +{ + MLXSW_REG_ZERO(mcc, payload); + mlxsw_reg_mcc_instruction_set(payload, instr); + mlxsw_reg_mcc_component_index_set(payload, component_index); + mlxsw_reg_mcc_update_handle_set(payload, update_handle); + mlxsw_reg_mcc_component_size_set(payload, component_size); +} + +static inline void mlxsw_reg_mcc_unpack(char *payload, u32 *p_update_handle, + u8 *p_error_code, u8 *p_control_state) +{ + if (p_update_handle) + *p_update_handle = mlxsw_reg_mcc_update_handle_get(payload); + if (p_error_code) + *p_error_code = mlxsw_reg_mcc_error_code_get(payload); + if (p_control_state) + *p_control_state = mlxsw_reg_mcc_control_state_get(payload); +} + +/* MCDA - Management Component Data Access + * --------------------------------------- + * This register allows reading and writing a firmware component. + */ +#define MLXSW_REG_MCDA_ID 0x9063 +#define MLXSW_REG_MCDA_BASE_LEN 0x10 +#define MLXSW_REG_MCDA_MAX_DATA_LEN 0x80 +#define MLXSW_REG_MCDA_LEN \ + (MLXSW_REG_MCDA_BASE_LEN + MLXSW_REG_MCDA_MAX_DATA_LEN) + +MLXSW_REG_DEFINE(mcda, MLXSW_REG_MCDA_ID, MLXSW_REG_MCDA_LEN); + +/* reg_mcda_update_handle + * Token representing the current flow executed by the FSM. + * Access: RW + */ +MLXSW_ITEM32(reg, mcda, update_handle, 0x00, 0, 24); + +/* reg_mcda_offset + * Offset of accessed address relative to component start. Accesses must be in + * accordance to log_mcda_word_size in MCQI reg. + * Access: RW + */ +MLXSW_ITEM32(reg, mcda, offset, 0x04, 0, 32); + +/* reg_mcda_size + * Size of the data accessed, given in bytes. + * Access: RW + */ +MLXSW_ITEM32(reg, mcda, size, 0x08, 0, 16); + +/* reg_mcda_data + * Data block accessed. + * Access: RW + */ +MLXSW_ITEM32_INDEXED(reg, mcda, data, 0x10, 0, 32, 4, 0, false); + +static inline void mlxsw_reg_mcda_pack(char *payload, u32 update_handle, + u32 offset, u16 size, u8 *data) +{ + int i; + + MLXSW_REG_ZERO(mcda, payload); + mlxsw_reg_mcda_update_handle_set(payload, update_handle); + mlxsw_reg_mcda_offset_set(payload, offset); + mlxsw_reg_mcda_size_set(payload, size); + + for (i = 0; i < size / 4; i++) + mlxsw_reg_mcda_data_set(payload, i, *(u32 *) &data[i * 4]); +} + +/* MPSC - Monitoring Packet Sampling Configuration Register + * -------------------------------------------------------- + * MPSC Register is used to configure the Packet Sampling mechanism. + */ +#define MLXSW_REG_MPSC_ID 0x9080 +#define MLXSW_REG_MPSC_LEN 0x1C + +MLXSW_REG_DEFINE(mpsc, MLXSW_REG_MPSC_ID, MLXSW_REG_MPSC_LEN); + +/* reg_mpsc_local_port + * Local port number + * Not supported for CPU port + * Access: Index + */ +MLXSW_ITEM32_LP(reg, mpsc, 0x00, 16, 0x00, 12); + +/* reg_mpsc_e + * Enable sampling on port local_port + * Access: RW + */ +MLXSW_ITEM32(reg, mpsc, e, 0x04, 30, 1); + +#define MLXSW_REG_MPSC_RATE_MAX 3500000000UL + +/* reg_mpsc_rate + * Sampling rate = 1 out of rate packets (with randomization around + * the point). Valid values are: 1 to MLXSW_REG_MPSC_RATE_MAX + * Access: RW + */ +MLXSW_ITEM32(reg, mpsc, rate, 0x08, 0, 32); + +static inline void mlxsw_reg_mpsc_pack(char *payload, u16 local_port, bool e, + u32 rate) +{ + MLXSW_REG_ZERO(mpsc, payload); + mlxsw_reg_mpsc_local_port_set(payload, local_port); + mlxsw_reg_mpsc_e_set(payload, e); + mlxsw_reg_mpsc_rate_set(payload, rate); +} + +/* MGPC - Monitoring General Purpose Counter Set Register + * The MGPC register retrieves and sets the General Purpose Counter Set. + */ +#define MLXSW_REG_MGPC_ID 0x9081 +#define MLXSW_REG_MGPC_LEN 0x18 + +MLXSW_REG_DEFINE(mgpc, MLXSW_REG_MGPC_ID, MLXSW_REG_MGPC_LEN); + +/* reg_mgpc_counter_set_type + * Counter set type. + * Access: OP + */ +MLXSW_ITEM32(reg, mgpc, counter_set_type, 0x00, 24, 8); + +/* reg_mgpc_counter_index + * Counter index. + * Access: Index + */ +MLXSW_ITEM32(reg, mgpc, counter_index, 0x00, 0, 24); + +enum mlxsw_reg_mgpc_opcode { + /* Nop */ + MLXSW_REG_MGPC_OPCODE_NOP = 0x00, + /* Clear counters */ + MLXSW_REG_MGPC_OPCODE_CLEAR = 0x08, +}; + +/* reg_mgpc_opcode + * Opcode. + * Access: OP + */ +MLXSW_ITEM32(reg, mgpc, opcode, 0x04, 28, 4); + +/* reg_mgpc_byte_counter + * Byte counter value. + * Access: RW + */ +MLXSW_ITEM64(reg, mgpc, byte_counter, 0x08, 0, 64); + +/* reg_mgpc_packet_counter + * Packet counter value. + * Access: RW + */ +MLXSW_ITEM64(reg, mgpc, packet_counter, 0x10, 0, 64); + +static inline void mlxsw_reg_mgpc_pack(char *payload, u32 counter_index, + enum mlxsw_reg_mgpc_opcode opcode, + enum mlxsw_reg_flow_counter_set_type set_type) +{ + MLXSW_REG_ZERO(mgpc, payload); + mlxsw_reg_mgpc_counter_index_set(payload, counter_index); + mlxsw_reg_mgpc_counter_set_type_set(payload, set_type); + mlxsw_reg_mgpc_opcode_set(payload, opcode); +} + +/* MPRS - Monitoring Parsing State Register + * ---------------------------------------- + * The MPRS register is used for setting up the parsing for hash, + * policy-engine and routing. + */ +#define MLXSW_REG_MPRS_ID 0x9083 +#define MLXSW_REG_MPRS_LEN 0x14 + +MLXSW_REG_DEFINE(mprs, MLXSW_REG_MPRS_ID, MLXSW_REG_MPRS_LEN); + +/* reg_mprs_parsing_depth + * Minimum parsing depth. + * Need to enlarge parsing depth according to L3, MPLS, tunnels, ACL + * rules, traps, hash, etc. Default is 96 bytes. Reserved when SwitchX-2. + * Access: RW + */ +MLXSW_ITEM32(reg, mprs, parsing_depth, 0x00, 0, 16); + +/* reg_mprs_parsing_en + * Parsing enable. + * Bit 0 - Enable parsing of NVE of types VxLAN, VxLAN-GPE, GENEVE and + * NVGRE. Default is enabled. Reserved when SwitchX-2. + * Access: RW + */ +MLXSW_ITEM32(reg, mprs, parsing_en, 0x04, 0, 16); + +/* reg_mprs_vxlan_udp_dport + * VxLAN UDP destination port. + * Used for identifying VxLAN packets and for dport field in + * encapsulation. Default is 4789. + * Access: RW + */ +MLXSW_ITEM32(reg, mprs, vxlan_udp_dport, 0x10, 0, 16); + +static inline void mlxsw_reg_mprs_pack(char *payload, u16 parsing_depth, + u16 vxlan_udp_dport) +{ + MLXSW_REG_ZERO(mprs, payload); + mlxsw_reg_mprs_parsing_depth_set(payload, parsing_depth); + mlxsw_reg_mprs_parsing_en_set(payload, true); + mlxsw_reg_mprs_vxlan_udp_dport_set(payload, vxlan_udp_dport); +} + +/* MOGCR - Monitoring Global Configuration Register + * ------------------------------------------------ + */ +#define MLXSW_REG_MOGCR_ID 0x9086 +#define MLXSW_REG_MOGCR_LEN 0x20 + +MLXSW_REG_DEFINE(mogcr, MLXSW_REG_MOGCR_ID, MLXSW_REG_MOGCR_LEN); + +/* reg_mogcr_ptp_iftc + * PTP Ingress FIFO Trap Clear + * The PTP_ING_FIFO trap provides MTPPTR with clr according + * to this value. Default 0. + * Reserved when IB switches and when SwitchX/-2, Spectrum-2 + * Access: RW + */ +MLXSW_ITEM32(reg, mogcr, ptp_iftc, 0x00, 1, 1); + +/* reg_mogcr_ptp_eftc + * PTP Egress FIFO Trap Clear + * The PTP_EGR_FIFO trap provides MTPPTR with clr according + * to this value. Default 0. + * Reserved when IB switches and when SwitchX/-2, Spectrum-2 + * Access: RW + */ +MLXSW_ITEM32(reg, mogcr, ptp_eftc, 0x00, 0, 1); + +/* reg_mogcr_mirroring_pid_base + * Base policer id for mirroring policers. + * Must have an even value (e.g. 1000, not 1001). + * Reserved when SwitchX/-2, Switch-IB/2, Spectrum-1 and Quantum. + * Access: RW + */ +MLXSW_ITEM32(reg, mogcr, mirroring_pid_base, 0x0C, 0, 14); + +/* MPAGR - Monitoring Port Analyzer Global Register + * ------------------------------------------------ + * This register is used for global port analyzer configurations. + * Note: This register is not supported by current FW versions for Spectrum-1. + */ +#define MLXSW_REG_MPAGR_ID 0x9089 +#define MLXSW_REG_MPAGR_LEN 0x0C + +MLXSW_REG_DEFINE(mpagr, MLXSW_REG_MPAGR_ID, MLXSW_REG_MPAGR_LEN); + +enum mlxsw_reg_mpagr_trigger { + MLXSW_REG_MPAGR_TRIGGER_EGRESS, + MLXSW_REG_MPAGR_TRIGGER_INGRESS, + MLXSW_REG_MPAGR_TRIGGER_INGRESS_WRED, + MLXSW_REG_MPAGR_TRIGGER_INGRESS_SHARED_BUFFER, + MLXSW_REG_MPAGR_TRIGGER_INGRESS_ING_CONG, + MLXSW_REG_MPAGR_TRIGGER_INGRESS_EGR_CONG, + MLXSW_REG_MPAGR_TRIGGER_EGRESS_ECN, + MLXSW_REG_MPAGR_TRIGGER_EGRESS_HIGH_LATENCY, +}; + +/* reg_mpagr_trigger + * Mirror trigger. + * Access: Index + */ +MLXSW_ITEM32(reg, mpagr, trigger, 0x00, 0, 4); + +/* reg_mpagr_pa_id + * Port analyzer ID. + * Access: RW + */ +MLXSW_ITEM32(reg, mpagr, pa_id, 0x04, 0, 4); + +#define MLXSW_REG_MPAGR_RATE_MAX 3500000000UL + +/* reg_mpagr_probability_rate + * Sampling rate. + * Valid values are: 1 to 3.5*10^9 + * Value of 1 means "sample all". Default is 1. + * Access: RW + */ +MLXSW_ITEM32(reg, mpagr, probability_rate, 0x08, 0, 32); + +static inline void mlxsw_reg_mpagr_pack(char *payload, + enum mlxsw_reg_mpagr_trigger trigger, + u8 pa_id, u32 probability_rate) +{ + MLXSW_REG_ZERO(mpagr, payload); + mlxsw_reg_mpagr_trigger_set(payload, trigger); + mlxsw_reg_mpagr_pa_id_set(payload, pa_id); + mlxsw_reg_mpagr_probability_rate_set(payload, probability_rate); +} + +/* MOMTE - Monitoring Mirror Trigger Enable Register + * ------------------------------------------------- + * This register is used to configure the mirror enable for different mirror + * reasons. + */ +#define MLXSW_REG_MOMTE_ID 0x908D +#define MLXSW_REG_MOMTE_LEN 0x10 + +MLXSW_REG_DEFINE(momte, MLXSW_REG_MOMTE_ID, MLXSW_REG_MOMTE_LEN); + +/* reg_momte_local_port + * Local port number. + * Access: Index + */ +MLXSW_ITEM32_LP(reg, momte, 0x00, 16, 0x00, 12); + +enum mlxsw_reg_momte_type { + MLXSW_REG_MOMTE_TYPE_WRED = 0x20, + MLXSW_REG_MOMTE_TYPE_SHARED_BUFFER_TCLASS = 0x31, + MLXSW_REG_MOMTE_TYPE_SHARED_BUFFER_TCLASS_DESCRIPTORS = 0x32, + MLXSW_REG_MOMTE_TYPE_SHARED_BUFFER_EGRESS_PORT = 0x33, + MLXSW_REG_MOMTE_TYPE_ING_CONG = 0x40, + MLXSW_REG_MOMTE_TYPE_EGR_CONG = 0x50, + MLXSW_REG_MOMTE_TYPE_ECN = 0x60, + MLXSW_REG_MOMTE_TYPE_HIGH_LATENCY = 0x70, +}; + +/* reg_momte_type + * Type of mirroring. + * Access: Index + */ +MLXSW_ITEM32(reg, momte, type, 0x04, 0, 8); + +/* reg_momte_tclass_en + * TClass/PG mirror enable. Each bit represents corresponding tclass. + * 0: disable (default) + * 1: enable + * Access: RW + */ +MLXSW_ITEM_BIT_ARRAY(reg, momte, tclass_en, 0x08, 0x08, 1); + +static inline void mlxsw_reg_momte_pack(char *payload, u16 local_port, + enum mlxsw_reg_momte_type type) +{ + MLXSW_REG_ZERO(momte, payload); + mlxsw_reg_momte_local_port_set(payload, local_port); + mlxsw_reg_momte_type_set(payload, type); +} + +/* MTPPPC - Time Precision Packet Port Configuration + * ------------------------------------------------- + * This register serves for configuration of which PTP messages should be + * timestamped. This is a global configuration, despite the register name. + * + * Reserved when Spectrum-2. + */ +#define MLXSW_REG_MTPPPC_ID 0x9090 +#define MLXSW_REG_MTPPPC_LEN 0x28 + +MLXSW_REG_DEFINE(mtpppc, MLXSW_REG_MTPPPC_ID, MLXSW_REG_MTPPPC_LEN); + +/* reg_mtpppc_ing_timestamp_message_type + * Bitwise vector of PTP message types to timestamp at ingress. + * MessageType field as defined by IEEE 1588 + * Each bit corresponds to a value (e.g. Bit0: Sync, Bit1: Delay_Req) + * Default all 0 + * Access: RW + */ +MLXSW_ITEM32(reg, mtpppc, ing_timestamp_message_type, 0x08, 0, 16); + +/* reg_mtpppc_egr_timestamp_message_type + * Bitwise vector of PTP message types to timestamp at egress. + * MessageType field as defined by IEEE 1588 + * Each bit corresponds to a value (e.g. Bit0: Sync, Bit1: Delay_Req) + * Default all 0 + * Access: RW + */ +MLXSW_ITEM32(reg, mtpppc, egr_timestamp_message_type, 0x0C, 0, 16); + +static inline void mlxsw_reg_mtpppc_pack(char *payload, u16 ing, u16 egr) +{ + MLXSW_REG_ZERO(mtpppc, payload); + mlxsw_reg_mtpppc_ing_timestamp_message_type_set(payload, ing); + mlxsw_reg_mtpppc_egr_timestamp_message_type_set(payload, egr); +} + +/* MTPPTR - Time Precision Packet Timestamping Reading + * --------------------------------------------------- + * The MTPPTR is used for reading the per port PTP timestamp FIFO. + * There is a trap for packets which are latched to the timestamp FIFO, thus the + * SW knows which FIFO to read. Note that packets enter the FIFO before been + * trapped. The sequence number is used to synchronize the timestamp FIFO + * entries and the trapped packets. + * Reserved when Spectrum-2. + */ + +#define MLXSW_REG_MTPPTR_ID 0x9091 +#define MLXSW_REG_MTPPTR_BASE_LEN 0x10 /* base length, without records */ +#define MLXSW_REG_MTPPTR_REC_LEN 0x10 /* record length */ +#define MLXSW_REG_MTPPTR_REC_MAX_COUNT 4 +#define MLXSW_REG_MTPPTR_LEN (MLXSW_REG_MTPPTR_BASE_LEN + \ + MLXSW_REG_MTPPTR_REC_LEN * MLXSW_REG_MTPPTR_REC_MAX_COUNT) + +MLXSW_REG_DEFINE(mtpptr, MLXSW_REG_MTPPTR_ID, MLXSW_REG_MTPPTR_LEN); + +/* reg_mtpptr_local_port + * Not supported for CPU port. + * Access: Index + */ +MLXSW_ITEM32_LP(reg, mtpptr, 0x00, 16, 0x00, 12); + +enum mlxsw_reg_mtpptr_dir { + MLXSW_REG_MTPPTR_DIR_INGRESS, + MLXSW_REG_MTPPTR_DIR_EGRESS, +}; + +/* reg_mtpptr_dir + * Direction. + * Access: Index + */ +MLXSW_ITEM32(reg, mtpptr, dir, 0x00, 0, 1); + +/* reg_mtpptr_clr + * Clear the records. + * Access: OP + */ +MLXSW_ITEM32(reg, mtpptr, clr, 0x04, 31, 1); + +/* reg_mtpptr_num_rec + * Number of valid records in the response + * Range 0.. cap_ptp_timestamp_fifo + * Access: RO + */ +MLXSW_ITEM32(reg, mtpptr, num_rec, 0x08, 0, 4); + +/* reg_mtpptr_rec_message_type + * MessageType field as defined by IEEE 1588 Each bit corresponds to a value + * (e.g. Bit0: Sync, Bit1: Delay_Req) + * Access: RO + */ +MLXSW_ITEM32_INDEXED(reg, mtpptr, rec_message_type, + MLXSW_REG_MTPPTR_BASE_LEN, 8, 4, + MLXSW_REG_MTPPTR_REC_LEN, 0, false); + +/* reg_mtpptr_rec_domain_number + * DomainNumber field as defined by IEEE 1588 + * Access: RO + */ +MLXSW_ITEM32_INDEXED(reg, mtpptr, rec_domain_number, + MLXSW_REG_MTPPTR_BASE_LEN, 0, 8, + MLXSW_REG_MTPPTR_REC_LEN, 0, false); + +/* reg_mtpptr_rec_sequence_id + * SequenceId field as defined by IEEE 1588 + * Access: RO + */ +MLXSW_ITEM32_INDEXED(reg, mtpptr, rec_sequence_id, + MLXSW_REG_MTPPTR_BASE_LEN, 0, 16, + MLXSW_REG_MTPPTR_REC_LEN, 0x4, false); + +/* reg_mtpptr_rec_timestamp_high + * Timestamp of when the PTP packet has passed through the port Units of PLL + * clock time. + * For Spectrum-1 the PLL clock is 156.25Mhz and PLL clock time is 6.4nSec. + * Access: RO + */ +MLXSW_ITEM32_INDEXED(reg, mtpptr, rec_timestamp_high, + MLXSW_REG_MTPPTR_BASE_LEN, 0, 32, + MLXSW_REG_MTPPTR_REC_LEN, 0x8, false); + +/* reg_mtpptr_rec_timestamp_low + * See rec_timestamp_high. + * Access: RO + */ +MLXSW_ITEM32_INDEXED(reg, mtpptr, rec_timestamp_low, + MLXSW_REG_MTPPTR_BASE_LEN, 0, 32, + MLXSW_REG_MTPPTR_REC_LEN, 0xC, false); + +static inline void mlxsw_reg_mtpptr_unpack(const char *payload, + unsigned int rec, + u8 *p_message_type, + u8 *p_domain_number, + u16 *p_sequence_id, + u64 *p_timestamp) +{ + u32 timestamp_high, timestamp_low; + + *p_message_type = mlxsw_reg_mtpptr_rec_message_type_get(payload, rec); + *p_domain_number = mlxsw_reg_mtpptr_rec_domain_number_get(payload, rec); + *p_sequence_id = mlxsw_reg_mtpptr_rec_sequence_id_get(payload, rec); + timestamp_high = mlxsw_reg_mtpptr_rec_timestamp_high_get(payload, rec); + timestamp_low = mlxsw_reg_mtpptr_rec_timestamp_low_get(payload, rec); + *p_timestamp = (u64)timestamp_high << 32 | timestamp_low; +} + +/* MTPTPT - Monitoring Precision Time Protocol Trap Register + * --------------------------------------------------------- + * This register is used for configuring under which trap to deliver PTP + * packets depending on type of the packet. + */ +#define MLXSW_REG_MTPTPT_ID 0x9092 +#define MLXSW_REG_MTPTPT_LEN 0x08 + +MLXSW_REG_DEFINE(mtptpt, MLXSW_REG_MTPTPT_ID, MLXSW_REG_MTPTPT_LEN); + +enum mlxsw_reg_mtptpt_trap_id { + MLXSW_REG_MTPTPT_TRAP_ID_PTP0, + MLXSW_REG_MTPTPT_TRAP_ID_PTP1, +}; + +/* reg_mtptpt_trap_id + * Trap id. + * Access: Index + */ +MLXSW_ITEM32(reg, mtptpt, trap_id, 0x00, 0, 4); + +/* reg_mtptpt_message_type + * Bitwise vector of PTP message types to trap. This is a necessary but + * non-sufficient condition since need to enable also per port. See MTPPPC. + * Message types are defined by IEEE 1588 Each bit corresponds to a value (e.g. + * Bit0: Sync, Bit1: Delay_Req) + */ +MLXSW_ITEM32(reg, mtptpt, message_type, 0x04, 0, 16); + +static inline void mlxsw_reg_mtptpt_pack(char *payload, + enum mlxsw_reg_mtptpt_trap_id trap_id, + u16 message_type) +{ + MLXSW_REG_ZERO(mtptpt, payload); + mlxsw_reg_mtptpt_trap_id_set(payload, trap_id); + mlxsw_reg_mtptpt_message_type_set(payload, message_type); +} + +/* MTPCPC - Monitoring Time Precision Correction Port Configuration Register + * ------------------------------------------------------------------------- + */ +#define MLXSW_REG_MTPCPC_ID 0x9093 +#define MLXSW_REG_MTPCPC_LEN 0x2C + +MLXSW_REG_DEFINE(mtpcpc, MLXSW_REG_MTPCPC_ID, MLXSW_REG_MTPCPC_LEN); + +/* reg_mtpcpc_pport + * Per port: + * 0: config is global. When reading - the local_port is 1. + * 1: config is per port. + * Access: Index + */ +MLXSW_ITEM32(reg, mtpcpc, pport, 0x00, 31, 1); + +/* reg_mtpcpc_local_port + * Local port number. + * Supported to/from CPU port. + * Reserved when pport = 0. + * Access: Index + */ +MLXSW_ITEM32_LP(reg, mtpcpc, 0x00, 16, 0x00, 12); + +/* reg_mtpcpc_ptp_trap_en + * Enable PTP traps. + * The trap_id is configured by MTPTPT. + * Access: RW + */ +MLXSW_ITEM32(reg, mtpcpc, ptp_trap_en, 0x04, 0, 1); + +/* reg_mtpcpc_ing_correction_message_type + * Bitwise vector of PTP message types to update correction-field at ingress. + * MessageType field as defined by IEEE 1588 Each bit corresponds to a value + * (e.g. Bit0: Sync, Bit1: Delay_Req). Supported also from CPU port. + * Default all 0 + * Access: RW + */ +MLXSW_ITEM32(reg, mtpcpc, ing_correction_message_type, 0x10, 0, 16); + +/* reg_mtpcpc_egr_correction_message_type + * Bitwise vector of PTP message types to update correction-field at egress. + * MessageType field as defined by IEEE 1588 Each bit corresponds to a value + * (e.g. Bit0: Sync, Bit1: Delay_Req). Supported also from CPU port. + * Default all 0 + * Access: RW + */ +MLXSW_ITEM32(reg, mtpcpc, egr_correction_message_type, 0x14, 0, 16); + +static inline void mlxsw_reg_mtpcpc_pack(char *payload, bool pport, + u16 local_port, bool ptp_trap_en, + u16 ing, u16 egr) +{ + MLXSW_REG_ZERO(mtpcpc, payload); + mlxsw_reg_mtpcpc_pport_set(payload, pport); + mlxsw_reg_mtpcpc_local_port_set(payload, pport ? local_port : 0); + mlxsw_reg_mtpcpc_ptp_trap_en_set(payload, ptp_trap_en); + mlxsw_reg_mtpcpc_ing_correction_message_type_set(payload, ing); + mlxsw_reg_mtpcpc_egr_correction_message_type_set(payload, egr); +} + +/* MFGD - Monitoring FW General Debug Register + * ------------------------------------------- + */ +#define MLXSW_REG_MFGD_ID 0x90F0 +#define MLXSW_REG_MFGD_LEN 0x0C + +MLXSW_REG_DEFINE(mfgd, MLXSW_REG_MFGD_ID, MLXSW_REG_MFGD_LEN); + +/* reg_mfgd_fw_fatal_event_mode + * 0 - don't check FW fatal (default) + * 1 - check FW fatal - enable MFDE trap + * Access: RW + */ +MLXSW_ITEM32(reg, mfgd, fatal_event_mode, 0x00, 9, 2); + +/* reg_mfgd_trigger_test + * Access: WO + */ +MLXSW_ITEM32(reg, mfgd, trigger_test, 0x00, 11, 1); + +/* MGPIR - Management General Peripheral Information Register + * ---------------------------------------------------------- + * MGPIR register allows software to query the hardware and + * firmware general information of peripheral entities. + */ +#define MLXSW_REG_MGPIR_ID 0x9100 +#define MLXSW_REG_MGPIR_LEN 0xA0 + +MLXSW_REG_DEFINE(mgpir, MLXSW_REG_MGPIR_ID, MLXSW_REG_MGPIR_LEN); + +enum mlxsw_reg_mgpir_device_type { + MLXSW_REG_MGPIR_DEVICE_TYPE_NONE, + MLXSW_REG_MGPIR_DEVICE_TYPE_GEARBOX_DIE, +}; + +/* mgpir_slot_index + * Slot index (0: Main board). + * Access: Index + */ +MLXSW_ITEM32(reg, mgpir, slot_index, 0x00, 28, 4); + +/* mgpir_device_type + * Access: RO + */ +MLXSW_ITEM32(reg, mgpir, device_type, 0x00, 24, 4); + +/* mgpir_devices_per_flash + * Number of devices of device_type per flash (can be shared by few devices). + * Access: RO + */ +MLXSW_ITEM32(reg, mgpir, devices_per_flash, 0x00, 16, 8); + +/* mgpir_num_of_devices + * Number of devices of device_type. + * Access: RO + */ +MLXSW_ITEM32(reg, mgpir, num_of_devices, 0x00, 0, 8); + +/* max_modules_per_slot + * Maximum number of modules that can be connected per slot. + * Access: RO + */ +MLXSW_ITEM32(reg, mgpir, max_modules_per_slot, 0x04, 16, 8); + +/* mgpir_num_of_slots + * Number of slots in the system. + * Access: RO + */ +MLXSW_ITEM32(reg, mgpir, num_of_slots, 0x04, 8, 8); + +/* mgpir_num_of_modules + * Number of modules. + * Access: RO + */ +MLXSW_ITEM32(reg, mgpir, num_of_modules, 0x04, 0, 8); + +static inline void mlxsw_reg_mgpir_pack(char *payload, u8 slot_index) +{ + MLXSW_REG_ZERO(mgpir, payload); + mlxsw_reg_mgpir_slot_index_set(payload, slot_index); +} + +static inline void +mlxsw_reg_mgpir_unpack(char *payload, u8 *num_of_devices, + enum mlxsw_reg_mgpir_device_type *device_type, + u8 *devices_per_flash, u8 *num_of_modules, + u8 *num_of_slots) +{ + if (num_of_devices) + *num_of_devices = mlxsw_reg_mgpir_num_of_devices_get(payload); + if (device_type) + *device_type = mlxsw_reg_mgpir_device_type_get(payload); + if (devices_per_flash) + *devices_per_flash = + mlxsw_reg_mgpir_devices_per_flash_get(payload); + if (num_of_modules) + *num_of_modules = mlxsw_reg_mgpir_num_of_modules_get(payload); + if (num_of_slots) + *num_of_slots = mlxsw_reg_mgpir_num_of_slots_get(payload); +} + +/* MBCT - Management Binary Code Transfer Register + * ----------------------------------------------- + * This register allows to transfer binary codes from the host to + * the management FW by transferring it by chunks of maximum 1KB. + */ +#define MLXSW_REG_MBCT_ID 0x9120 +#define MLXSW_REG_MBCT_LEN 0x420 + +MLXSW_REG_DEFINE(mbct, MLXSW_REG_MBCT_ID, MLXSW_REG_MBCT_LEN); + +/* reg_mbct_slot_index + * Slot index. 0 is reserved. + * Access: Index + */ +MLXSW_ITEM32(reg, mbct, slot_index, 0x00, 0, 4); + +/* reg_mbct_data_size + * Actual data field size in bytes for the current data transfer. + * Access: WO + */ +MLXSW_ITEM32(reg, mbct, data_size, 0x04, 0, 11); + +enum mlxsw_reg_mbct_op { + MLXSW_REG_MBCT_OP_ERASE_INI_IMAGE = 1, + MLXSW_REG_MBCT_OP_DATA_TRANSFER, /* Download */ + MLXSW_REG_MBCT_OP_ACTIVATE, + MLXSW_REG_MBCT_OP_CLEAR_ERRORS = 6, + MLXSW_REG_MBCT_OP_QUERY_STATUS, +}; + +/* reg_mbct_op + * Access: WO + */ +MLXSW_ITEM32(reg, mbct, op, 0x08, 28, 4); + +/* reg_mbct_last + * Indicates that the current data field is the last chunk of the INI. + * Access: WO + */ +MLXSW_ITEM32(reg, mbct, last, 0x08, 26, 1); + +/* reg_mbct_oee + * Opcode Event Enable. When set a BCTOE event will be sent once the opcode + * was executed and the fsm_state has changed. + * Access: WO + */ +MLXSW_ITEM32(reg, mbct, oee, 0x08, 25, 1); + +enum mlxsw_reg_mbct_status { + /* Partial data transfer completed successfully and ready for next + * data transfer. + */ + MLXSW_REG_MBCT_STATUS_PART_DATA = 2, + MLXSW_REG_MBCT_STATUS_LAST_DATA, + MLXSW_REG_MBCT_STATUS_ERASE_COMPLETE, + /* Error - trying to erase INI while it being used. */ + MLXSW_REG_MBCT_STATUS_ERROR_INI_IN_USE, + /* Last data transfer completed, applying magic pattern. */ + MLXSW_REG_MBCT_STATUS_ERASE_FAILED = 7, + MLXSW_REG_MBCT_STATUS_INI_ERROR, + MLXSW_REG_MBCT_STATUS_ACTIVATION_FAILED, + MLXSW_REG_MBCT_STATUS_ILLEGAL_OPERATION = 11, +}; + +/* reg_mbct_status + * Status. + * Access: RO + */ +MLXSW_ITEM32(reg, mbct, status, 0x0C, 24, 5); + +enum mlxsw_reg_mbct_fsm_state { + MLXSW_REG_MBCT_FSM_STATE_INI_IN_USE = 5, + MLXSW_REG_MBCT_FSM_STATE_ERROR, +}; + +/* reg_mbct_fsm_state + * FSM state. + * Access: RO + */ +MLXSW_ITEM32(reg, mbct, fsm_state, 0x0C, 16, 4); + +#define MLXSW_REG_MBCT_DATA_LEN 1024 + +/* reg_mbct_data + * Up to 1KB of data. + * Access: WO + */ +MLXSW_ITEM_BUF(reg, mbct, data, 0x20, MLXSW_REG_MBCT_DATA_LEN); + +static inline void mlxsw_reg_mbct_pack(char *payload, u8 slot_index, + enum mlxsw_reg_mbct_op op, bool oee) +{ + MLXSW_REG_ZERO(mbct, payload); + mlxsw_reg_mbct_slot_index_set(payload, slot_index); + mlxsw_reg_mbct_op_set(payload, op); + mlxsw_reg_mbct_oee_set(payload, oee); +} + +static inline void mlxsw_reg_mbct_dt_pack(char *payload, + u16 data_size, bool last, + const char *data) +{ + if (WARN_ON(data_size > MLXSW_REG_MBCT_DATA_LEN)) + return; + mlxsw_reg_mbct_data_size_set(payload, data_size); + mlxsw_reg_mbct_last_set(payload, last); + mlxsw_reg_mbct_data_memcpy_to(payload, data); +} + +static inline void +mlxsw_reg_mbct_unpack(const char *payload, u8 *p_slot_index, + enum mlxsw_reg_mbct_status *p_status, + enum mlxsw_reg_mbct_fsm_state *p_fsm_state) +{ + if (p_slot_index) + *p_slot_index = mlxsw_reg_mbct_slot_index_get(payload); + *p_status = mlxsw_reg_mbct_status_get(payload); + if (p_fsm_state) + *p_fsm_state = mlxsw_reg_mbct_fsm_state_get(payload); +} + +/* MDDT - Management DownStream Device Tunneling Register + * ------------------------------------------------------ + * This register allows to deliver query and request messages (PRM registers, + * commands) to a DownStream device. + */ +#define MLXSW_REG_MDDT_ID 0x9160 +#define MLXSW_REG_MDDT_LEN 0x110 + +MLXSW_REG_DEFINE(mddt, MLXSW_REG_MDDT_ID, MLXSW_REG_MDDT_LEN); + +/* reg_mddt_slot_index + * Slot index. + * Access: Index + */ +MLXSW_ITEM32(reg, mddt, slot_index, 0x00, 8, 4); + +/* reg_mddt_device_index + * Device index. + * Access: Index + */ +MLXSW_ITEM32(reg, mddt, device_index, 0x00, 0, 8); + +/* reg_mddt_read_size + * Read size in D-Words. + * Access: OP + */ +MLXSW_ITEM32(reg, mddt, read_size, 0x04, 24, 8); + +/* reg_mddt_write_size + * Write size in D-Words. + * Access: OP + */ +MLXSW_ITEM32(reg, mddt, write_size, 0x04, 16, 8); + +enum mlxsw_reg_mddt_status { + MLXSW_REG_MDDT_STATUS_OK, +}; + +/* reg_mddt_status + * Return code of the Downstream Device to the register that was sent. + * Access: RO + */ +MLXSW_ITEM32(reg, mddt, status, 0x0C, 24, 8); + +enum mlxsw_reg_mddt_method { + MLXSW_REG_MDDT_METHOD_QUERY, + MLXSW_REG_MDDT_METHOD_WRITE, +}; + +/* reg_mddt_method + * Access: OP + */ +MLXSW_ITEM32(reg, mddt, method, 0x0C, 22, 2); + +/* reg_mddt_register_id + * Access: Index + */ +MLXSW_ITEM32(reg, mddt, register_id, 0x0C, 0, 16); + +#define MLXSW_REG_MDDT_PAYLOAD_OFFSET 0x0C +#define MLXSW_REG_MDDT_PRM_REGISTER_HEADER_LEN 4 + +static inline char *mlxsw_reg_mddt_inner_payload(char *payload) +{ + return payload + MLXSW_REG_MDDT_PAYLOAD_OFFSET + + MLXSW_REG_MDDT_PRM_REGISTER_HEADER_LEN; +} + +static inline void mlxsw_reg_mddt_pack(char *payload, u8 slot_index, + u8 device_index, + enum mlxsw_reg_mddt_method method, + const struct mlxsw_reg_info *reg, + char **inner_payload) +{ + int len = reg->len + MLXSW_REG_MDDT_PRM_REGISTER_HEADER_LEN; + + if (WARN_ON(len + MLXSW_REG_MDDT_PAYLOAD_OFFSET > MLXSW_REG_MDDT_LEN)) + len = MLXSW_REG_MDDT_LEN - MLXSW_REG_MDDT_PAYLOAD_OFFSET; + + MLXSW_REG_ZERO(mddt, payload); + mlxsw_reg_mddt_slot_index_set(payload, slot_index); + mlxsw_reg_mddt_device_index_set(payload, device_index); + mlxsw_reg_mddt_method_set(payload, method); + mlxsw_reg_mddt_register_id_set(payload, reg->id); + mlxsw_reg_mddt_read_size_set(payload, len / 4); + mlxsw_reg_mddt_write_size_set(payload, len / 4); + *inner_payload = mlxsw_reg_mddt_inner_payload(payload); +} + +/* MDDQ - Management DownStream Device Query Register + * -------------------------------------------------- + * This register allows to query the DownStream device properties. The desired + * information is chosen upon the query_type field and is delivered by 32B + * of data blocks. + */ +#define MLXSW_REG_MDDQ_ID 0x9161 +#define MLXSW_REG_MDDQ_LEN 0x30 + +MLXSW_REG_DEFINE(mddq, MLXSW_REG_MDDQ_ID, MLXSW_REG_MDDQ_LEN); + +/* reg_mddq_sie + * Slot info event enable. + * When set to '1', each change in the slot_info.provisioned / sr_valid / + * active / ready will generate a DSDSC event. + * Access: RW + */ +MLXSW_ITEM32(reg, mddq, sie, 0x00, 31, 1); + +enum mlxsw_reg_mddq_query_type { + MLXSW_REG_MDDQ_QUERY_TYPE_SLOT_INFO = 1, + MLXSW_REG_MDDQ_QUERY_TYPE_DEVICE_INFO, /* If there are no devices + * on the slot, data_valid + * will be '0'. + */ + MLXSW_REG_MDDQ_QUERY_TYPE_SLOT_NAME, +}; + +/* reg_mddq_query_type + * Access: Index + */ +MLXSW_ITEM32(reg, mddq, query_type, 0x00, 16, 8); + +/* reg_mddq_slot_index + * Slot index. 0 is reserved. + * Access: Index + */ +MLXSW_ITEM32(reg, mddq, slot_index, 0x00, 0, 4); + +/* reg_mddq_response_msg_seq + * Response message sequential number. For a specific request, the response + * message sequential number is the following one. In addition, the last + * message should be 0. + * Access: RO + */ +MLXSW_ITEM32(reg, mddq, response_msg_seq, 0x04, 16, 8); + +/* reg_mddq_request_msg_seq + * Request message sequential number. + * The first message number should be 0. + * Access: Index + */ +MLXSW_ITEM32(reg, mddq, request_msg_seq, 0x04, 0, 8); + +/* reg_mddq_data_valid + * If set, the data in the data field is valid and contain the information + * for the queried index. + * Access: RO + */ +MLXSW_ITEM32(reg, mddq, data_valid, 0x08, 31, 1); + +/* reg_mddq_slot_info_provisioned + * If set, the INI file is applied and the card is provisioned. + * Access: RO + */ +MLXSW_ITEM32(reg, mddq, slot_info_provisioned, 0x10, 31, 1); + +/* reg_mddq_slot_info_sr_valid + * If set, Shift Register is valid (after being provisioned) and data + * can be sent from the switch ASIC to the line-card CPLD over Shift-Register. + * Access: RO + */ +MLXSW_ITEM32(reg, mddq, slot_info_sr_valid, 0x10, 30, 1); + +enum mlxsw_reg_mddq_slot_info_ready { + MLXSW_REG_MDDQ_SLOT_INFO_READY_NOT_READY, + MLXSW_REG_MDDQ_SLOT_INFO_READY_READY, + MLXSW_REG_MDDQ_SLOT_INFO_READY_ERROR, +}; + +/* reg_mddq_slot_info_lc_ready + * If set, the LC is powered on, matching the INI version and a new FW + * version can be burnt (if necessary). + * Access: RO + */ +MLXSW_ITEM32(reg, mddq, slot_info_lc_ready, 0x10, 28, 2); + +/* reg_mddq_slot_info_active + * If set, the FW has completed the MDDC.device_enable command. + * Access: RO + */ +MLXSW_ITEM32(reg, mddq, slot_info_active, 0x10, 27, 1); + +/* reg_mddq_slot_info_hw_revision + * Major user-configured version number of the current INI file. + * Valid only when active or ready are '1'. + * Access: RO + */ +MLXSW_ITEM32(reg, mddq, slot_info_hw_revision, 0x14, 16, 16); + +/* reg_mddq_slot_info_ini_file_version + * User-configured version number of the current INI file. + * Valid only when active or lc_ready are '1'. + * Access: RO + */ +MLXSW_ITEM32(reg, mddq, slot_info_ini_file_version, 0x14, 0, 16); + +/* reg_mddq_slot_info_card_type + * Access: RO + */ +MLXSW_ITEM32(reg, mddq, slot_info_card_type, 0x18, 0, 8); + +static inline void +__mlxsw_reg_mddq_pack(char *payload, u8 slot_index, + enum mlxsw_reg_mddq_query_type query_type) +{ + MLXSW_REG_ZERO(mddq, payload); + mlxsw_reg_mddq_slot_index_set(payload, slot_index); + mlxsw_reg_mddq_query_type_set(payload, query_type); +} + +static inline void +mlxsw_reg_mddq_slot_info_pack(char *payload, u8 slot_index, bool sie) +{ + __mlxsw_reg_mddq_pack(payload, slot_index, + MLXSW_REG_MDDQ_QUERY_TYPE_SLOT_INFO); + mlxsw_reg_mddq_sie_set(payload, sie); +} + +static inline void +mlxsw_reg_mddq_slot_info_unpack(const char *payload, u8 *p_slot_index, + bool *p_provisioned, bool *p_sr_valid, + enum mlxsw_reg_mddq_slot_info_ready *p_lc_ready, + bool *p_active, u16 *p_hw_revision, + u16 *p_ini_file_version, + u8 *p_card_type) +{ + *p_slot_index = mlxsw_reg_mddq_slot_index_get(payload); + *p_provisioned = mlxsw_reg_mddq_slot_info_provisioned_get(payload); + *p_sr_valid = mlxsw_reg_mddq_slot_info_sr_valid_get(payload); + *p_lc_ready = mlxsw_reg_mddq_slot_info_lc_ready_get(payload); + *p_active = mlxsw_reg_mddq_slot_info_active_get(payload); + *p_hw_revision = mlxsw_reg_mddq_slot_info_hw_revision_get(payload); + *p_ini_file_version = mlxsw_reg_mddq_slot_info_ini_file_version_get(payload); + *p_card_type = mlxsw_reg_mddq_slot_info_card_type_get(payload); +} + +/* reg_mddq_device_info_flash_owner + * If set, the device is the flash owner. Otherwise, a shared flash + * is used by this device (another device is the flash owner). + * Access: RO + */ +MLXSW_ITEM32(reg, mddq, device_info_flash_owner, 0x10, 30, 1); + +/* reg_mddq_device_info_device_index + * Device index. The first device should number 0. + * Access: RO + */ +MLXSW_ITEM32(reg, mddq, device_info_device_index, 0x10, 0, 8); + +/* reg_mddq_device_info_fw_major + * Major FW version number. + * Access: RO + */ +MLXSW_ITEM32(reg, mddq, device_info_fw_major, 0x14, 16, 16); + +/* reg_mddq_device_info_fw_minor + * Minor FW version number. + * Access: RO + */ +MLXSW_ITEM32(reg, mddq, device_info_fw_minor, 0x18, 16, 16); + +/* reg_mddq_device_info_fw_sub_minor + * Sub-minor FW version number. + * Access: RO + */ +MLXSW_ITEM32(reg, mddq, device_info_fw_sub_minor, 0x18, 0, 16); + +static inline void +mlxsw_reg_mddq_device_info_pack(char *payload, u8 slot_index, + u8 request_msg_seq) +{ + __mlxsw_reg_mddq_pack(payload, slot_index, + MLXSW_REG_MDDQ_QUERY_TYPE_DEVICE_INFO); + mlxsw_reg_mddq_request_msg_seq_set(payload, request_msg_seq); +} + +static inline void +mlxsw_reg_mddq_device_info_unpack(const char *payload, u8 *p_response_msg_seq, + bool *p_data_valid, bool *p_flash_owner, + u8 *p_device_index, u16 *p_fw_major, + u16 *p_fw_minor, u16 *p_fw_sub_minor) +{ + *p_response_msg_seq = mlxsw_reg_mddq_response_msg_seq_get(payload); + *p_data_valid = mlxsw_reg_mddq_data_valid_get(payload); + *p_flash_owner = mlxsw_reg_mddq_device_info_flash_owner_get(payload); + *p_device_index = mlxsw_reg_mddq_device_info_device_index_get(payload); + *p_fw_major = mlxsw_reg_mddq_device_info_fw_major_get(payload); + *p_fw_minor = mlxsw_reg_mddq_device_info_fw_minor_get(payload); + *p_fw_sub_minor = mlxsw_reg_mddq_device_info_fw_sub_minor_get(payload); +} + +#define MLXSW_REG_MDDQ_SLOT_ASCII_NAME_LEN 20 + +/* reg_mddq_slot_ascii_name + * Slot's ASCII name. + * Access: RO + */ +MLXSW_ITEM_BUF(reg, mddq, slot_ascii_name, 0x10, + MLXSW_REG_MDDQ_SLOT_ASCII_NAME_LEN); + +static inline void +mlxsw_reg_mddq_slot_name_pack(char *payload, u8 slot_index) +{ + __mlxsw_reg_mddq_pack(payload, slot_index, + MLXSW_REG_MDDQ_QUERY_TYPE_SLOT_NAME); +} + +static inline void +mlxsw_reg_mddq_slot_name_unpack(const char *payload, char *slot_ascii_name) +{ + mlxsw_reg_mddq_slot_ascii_name_memcpy_from(payload, slot_ascii_name); +} + +/* MDDC - Management DownStream Device Control Register + * ---------------------------------------------------- + * This register allows to control downstream devices and line cards. + */ +#define MLXSW_REG_MDDC_ID 0x9163 +#define MLXSW_REG_MDDC_LEN 0x30 + +MLXSW_REG_DEFINE(mddc, MLXSW_REG_MDDC_ID, MLXSW_REG_MDDC_LEN); + +/* reg_mddc_slot_index + * Slot index. 0 is reserved. + * Access: Index + */ +MLXSW_ITEM32(reg, mddc, slot_index, 0x00, 0, 4); + +/* reg_mddc_rst + * Reset request. + * Access: OP + */ +MLXSW_ITEM32(reg, mddc, rst, 0x04, 29, 1); + +/* reg_mddc_device_enable + * When set, FW is the manager and allowed to program the downstream device. + * Access: RW + */ +MLXSW_ITEM32(reg, mddc, device_enable, 0x04, 28, 1); + +static inline void mlxsw_reg_mddc_pack(char *payload, u8 slot_index, bool rst, + bool device_enable) +{ + MLXSW_REG_ZERO(mddc, payload); + mlxsw_reg_mddc_slot_index_set(payload, slot_index); + mlxsw_reg_mddc_rst_set(payload, rst); + mlxsw_reg_mddc_device_enable_set(payload, device_enable); +} + +/* MFDE - Monitoring FW Debug Register + * ----------------------------------- + */ +#define MLXSW_REG_MFDE_ID 0x9200 +#define MLXSW_REG_MFDE_LEN 0x30 + +MLXSW_REG_DEFINE(mfde, MLXSW_REG_MFDE_ID, MLXSW_REG_MFDE_LEN); + +/* reg_mfde_irisc_id + * Which irisc triggered the event + * Access: RO + */ +MLXSW_ITEM32(reg, mfde, irisc_id, 0x00, 24, 8); + +enum mlxsw_reg_mfde_severity { + /* Unrecoverable switch behavior */ + MLXSW_REG_MFDE_SEVERITY_FATL = 2, + /* Unexpected state with possible systemic failure */ + MLXSW_REG_MFDE_SEVERITY_NRML = 3, + /* Unexpected state without systemic failure */ + MLXSW_REG_MFDE_SEVERITY_INTR = 5, +}; + +/* reg_mfde_severity + * The severity of the event. + * Access: RO + */ +MLXSW_ITEM32(reg, mfde, severity, 0x00, 16, 8); + +enum mlxsw_reg_mfde_event_id { + /* CRspace timeout */ + MLXSW_REG_MFDE_EVENT_ID_CRSPACE_TO = 1, + /* KVD insertion machine stopped */ + MLXSW_REG_MFDE_EVENT_ID_KVD_IM_STOP, + /* Triggered by MFGD.trigger_test */ + MLXSW_REG_MFDE_EVENT_ID_TEST, + /* Triggered when firmware hits an assert */ + MLXSW_REG_MFDE_EVENT_ID_FW_ASSERT, + /* Fatal error interrupt from hardware */ + MLXSW_REG_MFDE_EVENT_ID_FATAL_CAUSE, +}; + +/* reg_mfde_event_id + * Access: RO + */ +MLXSW_ITEM32(reg, mfde, event_id, 0x00, 0, 16); + +enum mlxsw_reg_mfde_method { + MLXSW_REG_MFDE_METHOD_QUERY, + MLXSW_REG_MFDE_METHOD_WRITE, +}; + +/* reg_mfde_method + * Access: RO + */ +MLXSW_ITEM32(reg, mfde, method, 0x04, 29, 1); + +/* reg_mfde_long_process + * Indicates if the command is in long_process mode. + * Access: RO + */ +MLXSW_ITEM32(reg, mfde, long_process, 0x04, 28, 1); + +enum mlxsw_reg_mfde_command_type { + MLXSW_REG_MFDE_COMMAND_TYPE_MAD, + MLXSW_REG_MFDE_COMMAND_TYPE_EMAD, + MLXSW_REG_MFDE_COMMAND_TYPE_CMDIF, +}; + +/* reg_mfde_command_type + * Access: RO + */ +MLXSW_ITEM32(reg, mfde, command_type, 0x04, 24, 2); + +/* reg_mfde_reg_attr_id + * EMAD - register id, MAD - attibute id + * Access: RO + */ +MLXSW_ITEM32(reg, mfde, reg_attr_id, 0x04, 0, 16); + +/* reg_mfde_crspace_to_log_address + * crspace address accessed, which resulted in timeout. + * Access: RO + */ +MLXSW_ITEM32(reg, mfde, crspace_to_log_address, 0x10, 0, 32); + +/* reg_mfde_crspace_to_oe + * 0 - New event + * 1 - Old event, occurred before MFGD activation. + * Access: RO + */ +MLXSW_ITEM32(reg, mfde, crspace_to_oe, 0x14, 24, 1); + +/* reg_mfde_crspace_to_log_id + * Which irisc triggered the timeout. + * Access: RO + */ +MLXSW_ITEM32(reg, mfde, crspace_to_log_id, 0x14, 0, 4); + +/* reg_mfde_crspace_to_log_ip + * IP (instruction pointer) that triggered the timeout. + * Access: RO + */ +MLXSW_ITEM64(reg, mfde, crspace_to_log_ip, 0x18, 0, 64); + +/* reg_mfde_kvd_im_stop_oe + * 0 - New event + * 1 - Old event, occurred before MFGD activation. + * Access: RO + */ +MLXSW_ITEM32(reg, mfde, kvd_im_stop_oe, 0x10, 24, 1); + +/* reg_mfde_kvd_im_stop_pipes_mask + * Bit per kvh pipe. + * Access: RO + */ +MLXSW_ITEM32(reg, mfde, kvd_im_stop_pipes_mask, 0x10, 0, 16); + +/* reg_mfde_fw_assert_var0-4 + * Variables passed to assert. + * Access: RO + */ +MLXSW_ITEM32(reg, mfde, fw_assert_var0, 0x10, 0, 32); +MLXSW_ITEM32(reg, mfde, fw_assert_var1, 0x14, 0, 32); +MLXSW_ITEM32(reg, mfde, fw_assert_var2, 0x18, 0, 32); +MLXSW_ITEM32(reg, mfde, fw_assert_var3, 0x1C, 0, 32); +MLXSW_ITEM32(reg, mfde, fw_assert_var4, 0x20, 0, 32); + +/* reg_mfde_fw_assert_existptr + * The instruction pointer when assert was triggered. + * Access: RO + */ +MLXSW_ITEM32(reg, mfde, fw_assert_existptr, 0x24, 0, 32); + +/* reg_mfde_fw_assert_callra + * The return address after triggering assert. + * Access: RO + */ +MLXSW_ITEM32(reg, mfde, fw_assert_callra, 0x28, 0, 32); + +/* reg_mfde_fw_assert_oe + * 0 - New event + * 1 - Old event, occurred before MFGD activation. + * Access: RO + */ +MLXSW_ITEM32(reg, mfde, fw_assert_oe, 0x2C, 24, 1); + +/* reg_mfde_fw_assert_tile_v + * 0: The assert was from main + * 1: The assert was from a tile + * Access: RO + */ +MLXSW_ITEM32(reg, mfde, fw_assert_tile_v, 0x2C, 23, 1); + +/* reg_mfde_fw_assert_tile_index + * When tile_v=1, the tile_index that caused the assert. + * Access: RO + */ +MLXSW_ITEM32(reg, mfde, fw_assert_tile_index, 0x2C, 16, 6); + +/* reg_mfde_fw_assert_ext_synd + * A generated one-to-one identifier which is specific per-assert. + * Access: RO + */ +MLXSW_ITEM32(reg, mfde, fw_assert_ext_synd, 0x2C, 0, 16); + +/* reg_mfde_fatal_cause_id + * HW interrupt cause id. + * Access: RO + */ +MLXSW_ITEM32(reg, mfde, fatal_cause_id, 0x10, 0, 18); + +/* reg_mfde_fatal_cause_tile_v + * 0: The assert was from main + * 1: The assert was from a tile + * Access: RO + */ +MLXSW_ITEM32(reg, mfde, fatal_cause_tile_v, 0x14, 23, 1); + +/* reg_mfde_fatal_cause_tile_index + * When tile_v=1, the tile_index that caused the assert. + * Access: RO + */ +MLXSW_ITEM32(reg, mfde, fatal_cause_tile_index, 0x14, 16, 6); + +/* TNGCR - Tunneling NVE General Configuration Register + * ---------------------------------------------------- + * The TNGCR register is used for setting up the NVE Tunneling configuration. + */ +#define MLXSW_REG_TNGCR_ID 0xA001 +#define MLXSW_REG_TNGCR_LEN 0x44 + +MLXSW_REG_DEFINE(tngcr, MLXSW_REG_TNGCR_ID, MLXSW_REG_TNGCR_LEN); + +enum mlxsw_reg_tngcr_type { + MLXSW_REG_TNGCR_TYPE_VXLAN, + MLXSW_REG_TNGCR_TYPE_VXLAN_GPE, + MLXSW_REG_TNGCR_TYPE_GENEVE, + MLXSW_REG_TNGCR_TYPE_NVGRE, +}; + +/* reg_tngcr_type + * Tunnel type for encapsulation and decapsulation. The types are mutually + * exclusive. + * Note: For Spectrum the NVE parsing must be enabled in MPRS. + * Access: RW + */ +MLXSW_ITEM32(reg, tngcr, type, 0x00, 0, 4); + +/* reg_tngcr_nve_valid + * The VTEP is valid. Allows adding FDB entries for tunnel encapsulation. + * Access: RW + */ +MLXSW_ITEM32(reg, tngcr, nve_valid, 0x04, 31, 1); + +/* reg_tngcr_nve_ttl_uc + * The TTL for NVE tunnel encapsulation underlay unicast packets. + * Access: RW + */ +MLXSW_ITEM32(reg, tngcr, nve_ttl_uc, 0x04, 0, 8); + +/* reg_tngcr_nve_ttl_mc + * The TTL for NVE tunnel encapsulation underlay multicast packets. + * Access: RW + */ +MLXSW_ITEM32(reg, tngcr, nve_ttl_mc, 0x08, 0, 8); + +enum { + /* Do not copy flow label. Calculate flow label using nve_flh. */ + MLXSW_REG_TNGCR_FL_NO_COPY, + /* Copy flow label from inner packet if packet is IPv6 and + * encapsulation is by IPv6. Otherwise, calculate flow label using + * nve_flh. + */ + MLXSW_REG_TNGCR_FL_COPY, +}; + +/* reg_tngcr_nve_flc + * For NVE tunnel encapsulation: Flow label copy from inner packet. + * Access: RW + */ +MLXSW_ITEM32(reg, tngcr, nve_flc, 0x0C, 25, 1); + +enum { + /* Flow label is static. In Spectrum this means '0'. Spectrum-2 + * uses {nve_fl_prefix, nve_fl_suffix}. + */ + MLXSW_REG_TNGCR_FL_NO_HASH, + /* 8 LSBs of the flow label are calculated from ECMP hash of the + * inner packet. 12 MSBs are configured by nve_fl_prefix. + */ + MLXSW_REG_TNGCR_FL_HASH, +}; + +/* reg_tngcr_nve_flh + * NVE flow label hash. + * Access: RW + */ +MLXSW_ITEM32(reg, tngcr, nve_flh, 0x0C, 24, 1); + +/* reg_tngcr_nve_fl_prefix + * NVE flow label prefix. Constant 12 MSBs of the flow label. + * Access: RW + */ +MLXSW_ITEM32(reg, tngcr, nve_fl_prefix, 0x0C, 8, 12); + +/* reg_tngcr_nve_fl_suffix + * NVE flow label suffix. Constant 8 LSBs of the flow label. + * Reserved when nve_flh=1 and for Spectrum. + * Access: RW + */ +MLXSW_ITEM32(reg, tngcr, nve_fl_suffix, 0x0C, 0, 8); + +enum { + /* Source UDP port is fixed (default '0') */ + MLXSW_REG_TNGCR_UDP_SPORT_NO_HASH, + /* Source UDP port is calculated based on hash */ + MLXSW_REG_TNGCR_UDP_SPORT_HASH, +}; + +/* reg_tngcr_nve_udp_sport_type + * NVE UDP source port type. + * Spectrum uses LAG hash (SLCRv2). Spectrum-2 uses ECMP hash (RECRv2). + * When the source UDP port is calculated based on hash, then the 8 LSBs + * are calculated from hash the 8 MSBs are configured by + * nve_udp_sport_prefix. + * Access: RW + */ +MLXSW_ITEM32(reg, tngcr, nve_udp_sport_type, 0x10, 24, 1); + +/* reg_tngcr_nve_udp_sport_prefix + * NVE UDP source port prefix. Constant 8 MSBs of the UDP source port. + * Reserved when NVE type is NVGRE. + * Access: RW + */ +MLXSW_ITEM32(reg, tngcr, nve_udp_sport_prefix, 0x10, 8, 8); + +/* reg_tngcr_nve_group_size_mc + * The amount of sequential linked lists of MC entries. The first linked + * list is configured by SFD.underlay_mc_ptr. + * Valid values: 1, 2, 4, 8, 16, 32, 64 + * The linked list are configured by TNUMT. + * The hash is set by LAG hash. + * Access: RW + */ +MLXSW_ITEM32(reg, tngcr, nve_group_size_mc, 0x18, 0, 8); + +/* reg_tngcr_nve_group_size_flood + * The amount of sequential linked lists of flooding entries. The first + * linked list is configured by SFMR.nve_tunnel_flood_ptr + * Valid values: 1, 2, 4, 8, 16, 32, 64 + * The linked list are configured by TNUMT. + * The hash is set by LAG hash. + * Access: RW + */ +MLXSW_ITEM32(reg, tngcr, nve_group_size_flood, 0x1C, 0, 8); + +/* reg_tngcr_learn_enable + * During decapsulation, whether to learn from NVE port. + * Reserved when Spectrum-2. See TNPC. + * Access: RW + */ +MLXSW_ITEM32(reg, tngcr, learn_enable, 0x20, 31, 1); + +/* reg_tngcr_underlay_virtual_router + * Underlay virtual router. + * Reserved when Spectrum-2. + * Access: RW + */ +MLXSW_ITEM32(reg, tngcr, underlay_virtual_router, 0x20, 0, 16); + +/* reg_tngcr_underlay_rif + * Underlay ingress router interface. RIF type should be loopback generic. + * Reserved when Spectrum. + * Access: RW + */ +MLXSW_ITEM32(reg, tngcr, underlay_rif, 0x24, 0, 16); + +/* reg_tngcr_usipv4 + * Underlay source IPv4 address of the NVE. + * Access: RW + */ +MLXSW_ITEM32(reg, tngcr, usipv4, 0x28, 0, 32); + +/* reg_tngcr_usipv6 + * Underlay source IPv6 address of the NVE. For Spectrum, must not be + * modified under traffic of NVE tunneling encapsulation. + * Access: RW + */ +MLXSW_ITEM_BUF(reg, tngcr, usipv6, 0x30, 16); + +static inline void mlxsw_reg_tngcr_pack(char *payload, + enum mlxsw_reg_tngcr_type type, + bool valid, u8 ttl) +{ + MLXSW_REG_ZERO(tngcr, payload); + mlxsw_reg_tngcr_type_set(payload, type); + mlxsw_reg_tngcr_nve_valid_set(payload, valid); + mlxsw_reg_tngcr_nve_ttl_uc_set(payload, ttl); + mlxsw_reg_tngcr_nve_ttl_mc_set(payload, ttl); + mlxsw_reg_tngcr_nve_flc_set(payload, MLXSW_REG_TNGCR_FL_NO_COPY); + mlxsw_reg_tngcr_nve_flh_set(payload, 0); + mlxsw_reg_tngcr_nve_udp_sport_type_set(payload, + MLXSW_REG_TNGCR_UDP_SPORT_HASH); + mlxsw_reg_tngcr_nve_udp_sport_prefix_set(payload, 0); + mlxsw_reg_tngcr_nve_group_size_mc_set(payload, 1); + mlxsw_reg_tngcr_nve_group_size_flood_set(payload, 1); +} + +/* TNUMT - Tunneling NVE Underlay Multicast Table Register + * ------------------------------------------------------- + * The TNUMT register is for building the underlay MC table. It is used + * for MC, flooding and BC traffic into the NVE tunnel. + */ +#define MLXSW_REG_TNUMT_ID 0xA003 +#define MLXSW_REG_TNUMT_LEN 0x20 + +MLXSW_REG_DEFINE(tnumt, MLXSW_REG_TNUMT_ID, MLXSW_REG_TNUMT_LEN); + +enum mlxsw_reg_tnumt_record_type { + MLXSW_REG_TNUMT_RECORD_TYPE_IPV4, + MLXSW_REG_TNUMT_RECORD_TYPE_IPV6, + MLXSW_REG_TNUMT_RECORD_TYPE_LABEL, +}; + +/* reg_tnumt_record_type + * Record type. + * Access: RW + */ +MLXSW_ITEM32(reg, tnumt, record_type, 0x00, 28, 4); + +/* reg_tnumt_tunnel_port + * Tunnel port. + * Access: RW + */ +MLXSW_ITEM32(reg, tnumt, tunnel_port, 0x00, 24, 4); + +/* reg_tnumt_underlay_mc_ptr + * Index to the underlay multicast table. + * For Spectrum the index is to the KVD linear. + * Access: Index + */ +MLXSW_ITEM32(reg, tnumt, underlay_mc_ptr, 0x00, 0, 24); + +/* reg_tnumt_vnext + * The next_underlay_mc_ptr is valid. + * Access: RW + */ +MLXSW_ITEM32(reg, tnumt, vnext, 0x04, 31, 1); + +/* reg_tnumt_next_underlay_mc_ptr + * The next index to the underlay multicast table. + * Access: RW + */ +MLXSW_ITEM32(reg, tnumt, next_underlay_mc_ptr, 0x04, 0, 24); + +/* reg_tnumt_record_size + * Number of IP addresses in the record. + * Range is 1..cap_max_nve_mc_entries_ipv{4,6} + * Access: RW + */ +MLXSW_ITEM32(reg, tnumt, record_size, 0x08, 0, 3); + +/* reg_tnumt_udip + * The underlay IPv4 addresses. udip[i] is reserved if i >= size + * Access: RW + */ +MLXSW_ITEM32_INDEXED(reg, tnumt, udip, 0x0C, 0, 32, 0x04, 0x00, false); + +/* reg_tnumt_udip_ptr + * The pointer to the underlay IPv6 addresses. udip_ptr[i] is reserved if + * i >= size. The IPv6 addresses are configured by RIPS. + * Access: RW + */ +MLXSW_ITEM32_INDEXED(reg, tnumt, udip_ptr, 0x0C, 0, 24, 0x04, 0x00, false); + +static inline void mlxsw_reg_tnumt_pack(char *payload, + enum mlxsw_reg_tnumt_record_type type, + enum mlxsw_reg_tunnel_port tport, + u32 underlay_mc_ptr, bool vnext, + u32 next_underlay_mc_ptr, + u8 record_size) +{ + MLXSW_REG_ZERO(tnumt, payload); + mlxsw_reg_tnumt_record_type_set(payload, type); + mlxsw_reg_tnumt_tunnel_port_set(payload, tport); + mlxsw_reg_tnumt_underlay_mc_ptr_set(payload, underlay_mc_ptr); + mlxsw_reg_tnumt_vnext_set(payload, vnext); + mlxsw_reg_tnumt_next_underlay_mc_ptr_set(payload, next_underlay_mc_ptr); + mlxsw_reg_tnumt_record_size_set(payload, record_size); +} + +/* TNQCR - Tunneling NVE QoS Configuration Register + * ------------------------------------------------ + * The TNQCR register configures how QoS is set in encapsulation into the + * underlay network. + */ +#define MLXSW_REG_TNQCR_ID 0xA010 +#define MLXSW_REG_TNQCR_LEN 0x0C + +MLXSW_REG_DEFINE(tnqcr, MLXSW_REG_TNQCR_ID, MLXSW_REG_TNQCR_LEN); + +/* reg_tnqcr_enc_set_dscp + * For encapsulation: How to set DSCP field: + * 0 - Copy the DSCP from the overlay (inner) IP header to the underlay + * (outer) IP header. If there is no IP header, use TNQDR.dscp + * 1 - Set the DSCP field as TNQDR.dscp + * Access: RW + */ +MLXSW_ITEM32(reg, tnqcr, enc_set_dscp, 0x04, 28, 1); + +static inline void mlxsw_reg_tnqcr_pack(char *payload) +{ + MLXSW_REG_ZERO(tnqcr, payload); + mlxsw_reg_tnqcr_enc_set_dscp_set(payload, 0); +} + +/* TNQDR - Tunneling NVE QoS Default Register + * ------------------------------------------ + * The TNQDR register configures the default QoS settings for NVE + * encapsulation. + */ +#define MLXSW_REG_TNQDR_ID 0xA011 +#define MLXSW_REG_TNQDR_LEN 0x08 + +MLXSW_REG_DEFINE(tnqdr, MLXSW_REG_TNQDR_ID, MLXSW_REG_TNQDR_LEN); + +/* reg_tnqdr_local_port + * Local port number (receive port). CPU port is supported. + * Access: Index + */ +MLXSW_ITEM32_LP(reg, tnqdr, 0x00, 16, 0x00, 12); + +/* reg_tnqdr_dscp + * For encapsulation, the default DSCP. + * Access: RW + */ +MLXSW_ITEM32(reg, tnqdr, dscp, 0x04, 0, 6); + +static inline void mlxsw_reg_tnqdr_pack(char *payload, u16 local_port) +{ + MLXSW_REG_ZERO(tnqdr, payload); + mlxsw_reg_tnqdr_local_port_set(payload, local_port); + mlxsw_reg_tnqdr_dscp_set(payload, 0); +} + +/* TNEEM - Tunneling NVE Encapsulation ECN Mapping Register + * -------------------------------------------------------- + * The TNEEM register maps ECN of the IP header at the ingress to the + * encapsulation to the ECN of the underlay network. + */ +#define MLXSW_REG_TNEEM_ID 0xA012 +#define MLXSW_REG_TNEEM_LEN 0x0C + +MLXSW_REG_DEFINE(tneem, MLXSW_REG_TNEEM_ID, MLXSW_REG_TNEEM_LEN); + +/* reg_tneem_overlay_ecn + * ECN of the IP header in the overlay network. + * Access: Index + */ +MLXSW_ITEM32(reg, tneem, overlay_ecn, 0x04, 24, 2); + +/* reg_tneem_underlay_ecn + * ECN of the IP header in the underlay network. + * Access: RW + */ +MLXSW_ITEM32(reg, tneem, underlay_ecn, 0x04, 16, 2); + +static inline void mlxsw_reg_tneem_pack(char *payload, u8 overlay_ecn, + u8 underlay_ecn) +{ + MLXSW_REG_ZERO(tneem, payload); + mlxsw_reg_tneem_overlay_ecn_set(payload, overlay_ecn); + mlxsw_reg_tneem_underlay_ecn_set(payload, underlay_ecn); +} + +/* TNDEM - Tunneling NVE Decapsulation ECN Mapping Register + * -------------------------------------------------------- + * The TNDEM register configures the actions that are done in the + * decapsulation. + */ +#define MLXSW_REG_TNDEM_ID 0xA013 +#define MLXSW_REG_TNDEM_LEN 0x0C + +MLXSW_REG_DEFINE(tndem, MLXSW_REG_TNDEM_ID, MLXSW_REG_TNDEM_LEN); + +/* reg_tndem_underlay_ecn + * ECN field of the IP header in the underlay network. + * Access: Index + */ +MLXSW_ITEM32(reg, tndem, underlay_ecn, 0x04, 24, 2); + +/* reg_tndem_overlay_ecn + * ECN field of the IP header in the overlay network. + * Access: Index + */ +MLXSW_ITEM32(reg, tndem, overlay_ecn, 0x04, 16, 2); + +/* reg_tndem_eip_ecn + * Egress IP ECN. ECN field of the IP header of the packet which goes out + * from the decapsulation. + * Access: RW + */ +MLXSW_ITEM32(reg, tndem, eip_ecn, 0x04, 8, 2); + +/* reg_tndem_trap_en + * Trap enable: + * 0 - No trap due to decap ECN + * 1 - Trap enable with trap_id + * Access: RW + */ +MLXSW_ITEM32(reg, tndem, trap_en, 0x08, 28, 4); + +/* reg_tndem_trap_id + * Trap ID. Either DECAP_ECN0 or DECAP_ECN1. + * Reserved when trap_en is '0'. + * Access: RW + */ +MLXSW_ITEM32(reg, tndem, trap_id, 0x08, 0, 9); + +static inline void mlxsw_reg_tndem_pack(char *payload, u8 underlay_ecn, + u8 overlay_ecn, u8 ecn, bool trap_en, + u16 trap_id) +{ + MLXSW_REG_ZERO(tndem, payload); + mlxsw_reg_tndem_underlay_ecn_set(payload, underlay_ecn); + mlxsw_reg_tndem_overlay_ecn_set(payload, overlay_ecn); + mlxsw_reg_tndem_eip_ecn_set(payload, ecn); + mlxsw_reg_tndem_trap_en_set(payload, trap_en); + mlxsw_reg_tndem_trap_id_set(payload, trap_id); +} + +/* TNPC - Tunnel Port Configuration Register + * ----------------------------------------- + * The TNPC register is used for tunnel port configuration. + * Reserved when Spectrum. + */ +#define MLXSW_REG_TNPC_ID 0xA020 +#define MLXSW_REG_TNPC_LEN 0x18 + +MLXSW_REG_DEFINE(tnpc, MLXSW_REG_TNPC_ID, MLXSW_REG_TNPC_LEN); + +/* reg_tnpc_tunnel_port + * Tunnel port. + * Access: Index + */ +MLXSW_ITEM32(reg, tnpc, tunnel_port, 0x00, 0, 4); + +/* reg_tnpc_learn_enable_v6 + * During IPv6 underlay decapsulation, whether to learn from tunnel port. + * Access: RW + */ +MLXSW_ITEM32(reg, tnpc, learn_enable_v6, 0x04, 1, 1); + +/* reg_tnpc_learn_enable_v4 + * During IPv4 underlay decapsulation, whether to learn from tunnel port. + * Access: RW + */ +MLXSW_ITEM32(reg, tnpc, learn_enable_v4, 0x04, 0, 1); + +static inline void mlxsw_reg_tnpc_pack(char *payload, + enum mlxsw_reg_tunnel_port tport, + bool learn_enable) +{ + MLXSW_REG_ZERO(tnpc, payload); + mlxsw_reg_tnpc_tunnel_port_set(payload, tport); + mlxsw_reg_tnpc_learn_enable_v4_set(payload, learn_enable); + mlxsw_reg_tnpc_learn_enable_v6_set(payload, learn_enable); +} + +/* TIGCR - Tunneling IPinIP General Configuration Register + * ------------------------------------------------------- + * The TIGCR register is used for setting up the IPinIP Tunnel configuration. + */ +#define MLXSW_REG_TIGCR_ID 0xA801 +#define MLXSW_REG_TIGCR_LEN 0x10 + +MLXSW_REG_DEFINE(tigcr, MLXSW_REG_TIGCR_ID, MLXSW_REG_TIGCR_LEN); + +/* reg_tigcr_ipip_ttlc + * For IPinIP Tunnel encapsulation: whether to copy the ttl from the packet + * header. + * Access: RW + */ +MLXSW_ITEM32(reg, tigcr, ttlc, 0x04, 8, 1); + +/* reg_tigcr_ipip_ttl_uc + * The TTL for IPinIP Tunnel encapsulation of unicast packets if + * reg_tigcr_ipip_ttlc is unset. + * Access: RW + */ +MLXSW_ITEM32(reg, tigcr, ttl_uc, 0x04, 0, 8); + +static inline void mlxsw_reg_tigcr_pack(char *payload, bool ttlc, u8 ttl_uc) +{ + MLXSW_REG_ZERO(tigcr, payload); + mlxsw_reg_tigcr_ttlc_set(payload, ttlc); + mlxsw_reg_tigcr_ttl_uc_set(payload, ttl_uc); +} + +/* TIEEM - Tunneling IPinIP Encapsulation ECN Mapping Register + * ----------------------------------------------------------- + * The TIEEM register maps ECN of the IP header at the ingress to the + * encapsulation to the ECN of the underlay network. + */ +#define MLXSW_REG_TIEEM_ID 0xA812 +#define MLXSW_REG_TIEEM_LEN 0x0C + +MLXSW_REG_DEFINE(tieem, MLXSW_REG_TIEEM_ID, MLXSW_REG_TIEEM_LEN); + +/* reg_tieem_overlay_ecn + * ECN of the IP header in the overlay network. + * Access: Index + */ +MLXSW_ITEM32(reg, tieem, overlay_ecn, 0x04, 24, 2); + +/* reg_tineem_underlay_ecn + * ECN of the IP header in the underlay network. + * Access: RW + */ +MLXSW_ITEM32(reg, tieem, underlay_ecn, 0x04, 16, 2); + +static inline void mlxsw_reg_tieem_pack(char *payload, u8 overlay_ecn, + u8 underlay_ecn) +{ + MLXSW_REG_ZERO(tieem, payload); + mlxsw_reg_tieem_overlay_ecn_set(payload, overlay_ecn); + mlxsw_reg_tieem_underlay_ecn_set(payload, underlay_ecn); +} + +/* TIDEM - Tunneling IPinIP Decapsulation ECN Mapping Register + * ----------------------------------------------------------- + * The TIDEM register configures the actions that are done in the + * decapsulation. + */ +#define MLXSW_REG_TIDEM_ID 0xA813 +#define MLXSW_REG_TIDEM_LEN 0x0C + +MLXSW_REG_DEFINE(tidem, MLXSW_REG_TIDEM_ID, MLXSW_REG_TIDEM_LEN); + +/* reg_tidem_underlay_ecn + * ECN field of the IP header in the underlay network. + * Access: Index + */ +MLXSW_ITEM32(reg, tidem, underlay_ecn, 0x04, 24, 2); + +/* reg_tidem_overlay_ecn + * ECN field of the IP header in the overlay network. + * Access: Index + */ +MLXSW_ITEM32(reg, tidem, overlay_ecn, 0x04, 16, 2); + +/* reg_tidem_eip_ecn + * Egress IP ECN. ECN field of the IP header of the packet which goes out + * from the decapsulation. + * Access: RW + */ +MLXSW_ITEM32(reg, tidem, eip_ecn, 0x04, 8, 2); + +/* reg_tidem_trap_en + * Trap enable: + * 0 - No trap due to decap ECN + * 1 - Trap enable with trap_id + * Access: RW + */ +MLXSW_ITEM32(reg, tidem, trap_en, 0x08, 28, 4); + +/* reg_tidem_trap_id + * Trap ID. Either DECAP_ECN0 or DECAP_ECN1. + * Reserved when trap_en is '0'. + * Access: RW + */ +MLXSW_ITEM32(reg, tidem, trap_id, 0x08, 0, 9); + +static inline void mlxsw_reg_tidem_pack(char *payload, u8 underlay_ecn, + u8 overlay_ecn, u8 eip_ecn, + bool trap_en, u16 trap_id) +{ + MLXSW_REG_ZERO(tidem, payload); + mlxsw_reg_tidem_underlay_ecn_set(payload, underlay_ecn); + mlxsw_reg_tidem_overlay_ecn_set(payload, overlay_ecn); + mlxsw_reg_tidem_eip_ecn_set(payload, eip_ecn); + mlxsw_reg_tidem_trap_en_set(payload, trap_en); + mlxsw_reg_tidem_trap_id_set(payload, trap_id); +} + +/* SBPR - Shared Buffer Pools Register + * ----------------------------------- + * The SBPR configures and retrieves the shared buffer pools and configuration. + */ +#define MLXSW_REG_SBPR_ID 0xB001 +#define MLXSW_REG_SBPR_LEN 0x14 + +MLXSW_REG_DEFINE(sbpr, MLXSW_REG_SBPR_ID, MLXSW_REG_SBPR_LEN); + +/* reg_sbpr_desc + * When set, configures descriptor buffer. + * Access: Index + */ +MLXSW_ITEM32(reg, sbpr, desc, 0x00, 31, 1); + +/* shared direstion enum for SBPR, SBCM, SBPM */ +enum mlxsw_reg_sbxx_dir { + MLXSW_REG_SBXX_DIR_INGRESS, + MLXSW_REG_SBXX_DIR_EGRESS, +}; + +/* reg_sbpr_dir + * Direction. + * Access: Index + */ +MLXSW_ITEM32(reg, sbpr, dir, 0x00, 24, 2); + +/* reg_sbpr_pool + * Pool index. + * Access: Index + */ +MLXSW_ITEM32(reg, sbpr, pool, 0x00, 0, 4); + +/* reg_sbpr_infi_size + * Size is infinite. + * Access: RW + */ +MLXSW_ITEM32(reg, sbpr, infi_size, 0x04, 31, 1); + +/* reg_sbpr_size + * Pool size in buffer cells. + * Reserved when infi_size = 1. + * Access: RW + */ +MLXSW_ITEM32(reg, sbpr, size, 0x04, 0, 24); + +enum mlxsw_reg_sbpr_mode { + MLXSW_REG_SBPR_MODE_STATIC, + MLXSW_REG_SBPR_MODE_DYNAMIC, +}; + +/* reg_sbpr_mode + * Pool quota calculation mode. + * Access: RW + */ +MLXSW_ITEM32(reg, sbpr, mode, 0x08, 0, 4); + +static inline void mlxsw_reg_sbpr_pack(char *payload, u8 pool, + enum mlxsw_reg_sbxx_dir dir, + enum mlxsw_reg_sbpr_mode mode, u32 size, + bool infi_size) +{ + MLXSW_REG_ZERO(sbpr, payload); + mlxsw_reg_sbpr_pool_set(payload, pool); + mlxsw_reg_sbpr_dir_set(payload, dir); + mlxsw_reg_sbpr_mode_set(payload, mode); + mlxsw_reg_sbpr_size_set(payload, size); + mlxsw_reg_sbpr_infi_size_set(payload, infi_size); +} + +/* SBCM - Shared Buffer Class Management Register + * ---------------------------------------------- + * The SBCM register configures and retrieves the shared buffer allocation + * and configuration according to Port-PG, including the binding to pool + * and definition of the associated quota. + */ +#define MLXSW_REG_SBCM_ID 0xB002 +#define MLXSW_REG_SBCM_LEN 0x28 + +MLXSW_REG_DEFINE(sbcm, MLXSW_REG_SBCM_ID, MLXSW_REG_SBCM_LEN); + +/* reg_sbcm_local_port + * Local port number. + * For Ingress: excludes CPU port and Router port + * For Egress: excludes IP Router + * Access: Index + */ +MLXSW_ITEM32_LP(reg, sbcm, 0x00, 16, 0x00, 4); + +/* reg_sbcm_pg_buff + * PG buffer - Port PG (dir=ingress) / traffic class (dir=egress) + * For PG buffer: range is 0..cap_max_pg_buffers - 1 + * For traffic class: range is 0..cap_max_tclass - 1 + * Note that when traffic class is in MC aware mode then the traffic + * classes which are MC aware cannot be configured. + * Access: Index + */ +MLXSW_ITEM32(reg, sbcm, pg_buff, 0x00, 8, 6); + +/* reg_sbcm_dir + * Direction. + * Access: Index + */ +MLXSW_ITEM32(reg, sbcm, dir, 0x00, 0, 2); + +/* reg_sbcm_min_buff + * Minimum buffer size for the limiter, in cells. + * Access: RW + */ +MLXSW_ITEM32(reg, sbcm, min_buff, 0x18, 0, 24); + +/* shared max_buff limits for dynamic threshold for SBCM, SBPM */ +#define MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN 1 +#define MLXSW_REG_SBXX_DYN_MAX_BUFF_MAX 14 + +/* reg_sbcm_infi_max + * Max buffer is infinite. + * Access: RW + */ +MLXSW_ITEM32(reg, sbcm, infi_max, 0x1C, 31, 1); + +/* reg_sbcm_max_buff + * When the pool associated to the port-pg/tclass is configured to + * static, Maximum buffer size for the limiter configured in cells. + * When the pool associated to the port-pg/tclass is configured to + * dynamic, the max_buff holds the "alpha" parameter, supporting + * the following values: + * 0: 0 + * i: (1/128)*2^(i-1), for i=1..14 + * 0xFF: Infinity + * Reserved when infi_max = 1. + * Access: RW + */ +MLXSW_ITEM32(reg, sbcm, max_buff, 0x1C, 0, 24); + +/* reg_sbcm_pool + * Association of the port-priority to a pool. + * Access: RW + */ +MLXSW_ITEM32(reg, sbcm, pool, 0x24, 0, 4); + +static inline void mlxsw_reg_sbcm_pack(char *payload, u16 local_port, u8 pg_buff, + enum mlxsw_reg_sbxx_dir dir, + u32 min_buff, u32 max_buff, + bool infi_max, u8 pool) +{ + MLXSW_REG_ZERO(sbcm, payload); + mlxsw_reg_sbcm_local_port_set(payload, local_port); + mlxsw_reg_sbcm_pg_buff_set(payload, pg_buff); + mlxsw_reg_sbcm_dir_set(payload, dir); + mlxsw_reg_sbcm_min_buff_set(payload, min_buff); + mlxsw_reg_sbcm_max_buff_set(payload, max_buff); + mlxsw_reg_sbcm_infi_max_set(payload, infi_max); + mlxsw_reg_sbcm_pool_set(payload, pool); +} + +/* SBPM - Shared Buffer Port Management Register + * --------------------------------------------- + * The SBPM register configures and retrieves the shared buffer allocation + * and configuration according to Port-Pool, including the definition + * of the associated quota. + */ +#define MLXSW_REG_SBPM_ID 0xB003 +#define MLXSW_REG_SBPM_LEN 0x28 + +MLXSW_REG_DEFINE(sbpm, MLXSW_REG_SBPM_ID, MLXSW_REG_SBPM_LEN); + +/* reg_sbpm_local_port + * Local port number. + * For Ingress: excludes CPU port and Router port + * For Egress: excludes IP Router + * Access: Index + */ +MLXSW_ITEM32_LP(reg, sbpm, 0x00, 16, 0x00, 12); + +/* reg_sbpm_pool + * The pool associated to quota counting on the local_port. + * Access: Index + */ +MLXSW_ITEM32(reg, sbpm, pool, 0x00, 8, 4); + +/* reg_sbpm_dir + * Direction. + * Access: Index + */ +MLXSW_ITEM32(reg, sbpm, dir, 0x00, 0, 2); + +/* reg_sbpm_buff_occupancy + * Current buffer occupancy in cells. + * Access: RO + */ +MLXSW_ITEM32(reg, sbpm, buff_occupancy, 0x10, 0, 24); + +/* reg_sbpm_clr + * Clear Max Buffer Occupancy + * When this bit is set, max_buff_occupancy field is cleared (and a + * new max value is tracked from the time the clear was performed). + * Access: OP + */ +MLXSW_ITEM32(reg, sbpm, clr, 0x14, 31, 1); + +/* reg_sbpm_max_buff_occupancy + * Maximum value of buffer occupancy in cells monitored. Cleared by + * writing to the clr field. + * Access: RO + */ +MLXSW_ITEM32(reg, sbpm, max_buff_occupancy, 0x14, 0, 24); + +/* reg_sbpm_min_buff + * Minimum buffer size for the limiter, in cells. + * Access: RW + */ +MLXSW_ITEM32(reg, sbpm, min_buff, 0x18, 0, 24); + +/* reg_sbpm_max_buff + * When the pool associated to the port-pg/tclass is configured to + * static, Maximum buffer size for the limiter configured in cells. + * When the pool associated to the port-pg/tclass is configured to + * dynamic, the max_buff holds the "alpha" parameter, supporting + * the following values: + * 0: 0 + * i: (1/128)*2^(i-1), for i=1..14 + * 0xFF: Infinity + * Access: RW + */ +MLXSW_ITEM32(reg, sbpm, max_buff, 0x1C, 0, 24); + +static inline void mlxsw_reg_sbpm_pack(char *payload, u16 local_port, u8 pool, + enum mlxsw_reg_sbxx_dir dir, bool clr, + u32 min_buff, u32 max_buff) +{ + MLXSW_REG_ZERO(sbpm, payload); + mlxsw_reg_sbpm_local_port_set(payload, local_port); + mlxsw_reg_sbpm_pool_set(payload, pool); + mlxsw_reg_sbpm_dir_set(payload, dir); + mlxsw_reg_sbpm_clr_set(payload, clr); + mlxsw_reg_sbpm_min_buff_set(payload, min_buff); + mlxsw_reg_sbpm_max_buff_set(payload, max_buff); +} + +static inline void mlxsw_reg_sbpm_unpack(char *payload, u32 *p_buff_occupancy, + u32 *p_max_buff_occupancy) +{ + *p_buff_occupancy = mlxsw_reg_sbpm_buff_occupancy_get(payload); + *p_max_buff_occupancy = mlxsw_reg_sbpm_max_buff_occupancy_get(payload); +} + +/* SBMM - Shared Buffer Multicast Management Register + * -------------------------------------------------- + * The SBMM register configures and retrieves the shared buffer allocation + * and configuration for MC packets according to Switch-Priority, including + * the binding to pool and definition of the associated quota. + */ +#define MLXSW_REG_SBMM_ID 0xB004 +#define MLXSW_REG_SBMM_LEN 0x28 + +MLXSW_REG_DEFINE(sbmm, MLXSW_REG_SBMM_ID, MLXSW_REG_SBMM_LEN); + +/* reg_sbmm_prio + * Switch Priority. + * Access: Index + */ +MLXSW_ITEM32(reg, sbmm, prio, 0x00, 8, 4); + +/* reg_sbmm_min_buff + * Minimum buffer size for the limiter, in cells. + * Access: RW + */ +MLXSW_ITEM32(reg, sbmm, min_buff, 0x18, 0, 24); + +/* reg_sbmm_max_buff + * When the pool associated to the port-pg/tclass is configured to + * static, Maximum buffer size for the limiter configured in cells. + * When the pool associated to the port-pg/tclass is configured to + * dynamic, the max_buff holds the "alpha" parameter, supporting + * the following values: + * 0: 0 + * i: (1/128)*2^(i-1), for i=1..14 + * 0xFF: Infinity + * Access: RW + */ +MLXSW_ITEM32(reg, sbmm, max_buff, 0x1C, 0, 24); + +/* reg_sbmm_pool + * Association of the port-priority to a pool. + * Access: RW + */ +MLXSW_ITEM32(reg, sbmm, pool, 0x24, 0, 4); + +static inline void mlxsw_reg_sbmm_pack(char *payload, u8 prio, u32 min_buff, + u32 max_buff, u8 pool) +{ + MLXSW_REG_ZERO(sbmm, payload); + mlxsw_reg_sbmm_prio_set(payload, prio); + mlxsw_reg_sbmm_min_buff_set(payload, min_buff); + mlxsw_reg_sbmm_max_buff_set(payload, max_buff); + mlxsw_reg_sbmm_pool_set(payload, pool); +} + +/* SBSR - Shared Buffer Status Register + * ------------------------------------ + * The SBSR register retrieves the shared buffer occupancy according to + * Port-Pool. Note that this register enables reading a large amount of data. + * It is the user's responsibility to limit the amount of data to ensure the + * response can match the maximum transfer unit. In case the response exceeds + * the maximum transport unit, it will be truncated with no special notice. + */ +#define MLXSW_REG_SBSR_ID 0xB005 +#define MLXSW_REG_SBSR_BASE_LEN 0x5C /* base length, without records */ +#define MLXSW_REG_SBSR_REC_LEN 0x8 /* record length */ +#define MLXSW_REG_SBSR_REC_MAX_COUNT 120 +#define MLXSW_REG_SBSR_LEN (MLXSW_REG_SBSR_BASE_LEN + \ + MLXSW_REG_SBSR_REC_LEN * \ + MLXSW_REG_SBSR_REC_MAX_COUNT) + +MLXSW_REG_DEFINE(sbsr, MLXSW_REG_SBSR_ID, MLXSW_REG_SBSR_LEN); + +/* reg_sbsr_clr + * Clear Max Buffer Occupancy. When this bit is set, the max_buff_occupancy + * field is cleared (and a new max value is tracked from the time the clear + * was performed). + * Access: OP + */ +MLXSW_ITEM32(reg, sbsr, clr, 0x00, 31, 1); + +#define MLXSW_REG_SBSR_NUM_PORTS_IN_PAGE 256 + +/* reg_sbsr_port_page + * Determines the range of the ports specified in the 'ingress_port_mask' + * and 'egress_port_mask' bit masks. + * {ingress,egress}_port_mask[x] is (256 * port_page) + x + * Access: Index + */ +MLXSW_ITEM32(reg, sbsr, port_page, 0x04, 0, 4); + +/* reg_sbsr_ingress_port_mask + * Bit vector for all ingress network ports. + * Indicates which of the ports (for which the relevant bit is set) + * are affected by the set operation. Configuration of any other port + * does not change. + * Access: Index + */ +MLXSW_ITEM_BIT_ARRAY(reg, sbsr, ingress_port_mask, 0x10, 0x20, 1); + +/* reg_sbsr_pg_buff_mask + * Bit vector for all switch priority groups. + * Indicates which of the priorities (for which the relevant bit is set) + * are affected by the set operation. Configuration of any other priority + * does not change. + * Range is 0..cap_max_pg_buffers - 1 + * Access: Index + */ +MLXSW_ITEM_BIT_ARRAY(reg, sbsr, pg_buff_mask, 0x30, 0x4, 1); + +/* reg_sbsr_egress_port_mask + * Bit vector for all egress network ports. + * Indicates which of the ports (for which the relevant bit is set) + * are affected by the set operation. Configuration of any other port + * does not change. + * Access: Index + */ +MLXSW_ITEM_BIT_ARRAY(reg, sbsr, egress_port_mask, 0x34, 0x20, 1); + +/* reg_sbsr_tclass_mask + * Bit vector for all traffic classes. + * Indicates which of the traffic classes (for which the relevant bit is + * set) are affected by the set operation. Configuration of any other + * traffic class does not change. + * Range is 0..cap_max_tclass - 1 + * Access: Index + */ +MLXSW_ITEM_BIT_ARRAY(reg, sbsr, tclass_mask, 0x54, 0x8, 1); + +static inline void mlxsw_reg_sbsr_pack(char *payload, bool clr) +{ + MLXSW_REG_ZERO(sbsr, payload); + mlxsw_reg_sbsr_clr_set(payload, clr); +} + +/* reg_sbsr_rec_buff_occupancy + * Current buffer occupancy in cells. + * Access: RO + */ +MLXSW_ITEM32_INDEXED(reg, sbsr, rec_buff_occupancy, MLXSW_REG_SBSR_BASE_LEN, + 0, 24, MLXSW_REG_SBSR_REC_LEN, 0x00, false); + +/* reg_sbsr_rec_max_buff_occupancy + * Maximum value of buffer occupancy in cells monitored. Cleared by + * writing to the clr field. + * Access: RO + */ +MLXSW_ITEM32_INDEXED(reg, sbsr, rec_max_buff_occupancy, MLXSW_REG_SBSR_BASE_LEN, + 0, 24, MLXSW_REG_SBSR_REC_LEN, 0x04, false); + +static inline void mlxsw_reg_sbsr_rec_unpack(char *payload, int rec_index, + u32 *p_buff_occupancy, + u32 *p_max_buff_occupancy) +{ + *p_buff_occupancy = + mlxsw_reg_sbsr_rec_buff_occupancy_get(payload, rec_index); + *p_max_buff_occupancy = + mlxsw_reg_sbsr_rec_max_buff_occupancy_get(payload, rec_index); +} + +/* SBIB - Shared Buffer Internal Buffer Register + * --------------------------------------------- + * The SBIB register configures per port buffers for internal use. The internal + * buffers consume memory on the port buffers (note that the port buffers are + * used also by PBMC). + * + * For Spectrum this is used for egress mirroring. + */ +#define MLXSW_REG_SBIB_ID 0xB006 +#define MLXSW_REG_SBIB_LEN 0x10 + +MLXSW_REG_DEFINE(sbib, MLXSW_REG_SBIB_ID, MLXSW_REG_SBIB_LEN); + +/* reg_sbib_local_port + * Local port number + * Not supported for CPU port and router port + * Access: Index + */ +MLXSW_ITEM32_LP(reg, sbib, 0x00, 16, 0x00, 12); + +/* reg_sbib_buff_size + * Units represented in cells + * Allowed range is 0 to (cap_max_headroom_size - 1) + * Default is 0 + * Access: RW + */ +MLXSW_ITEM32(reg, sbib, buff_size, 0x08, 0, 24); + +static inline void mlxsw_reg_sbib_pack(char *payload, u16 local_port, + u32 buff_size) +{ + MLXSW_REG_ZERO(sbib, payload); + mlxsw_reg_sbib_local_port_set(payload, local_port); + mlxsw_reg_sbib_buff_size_set(payload, buff_size); +} + +static const struct mlxsw_reg_info *mlxsw_reg_infos[] = { + MLXSW_REG(sgcr), + MLXSW_REG(spad), + MLXSW_REG(sspr), + MLXSW_REG(sfdat), + MLXSW_REG(sfd), + MLXSW_REG(sfn), + MLXSW_REG(spms), + MLXSW_REG(spvid), + MLXSW_REG(spvm), + MLXSW_REG(spaft), + MLXSW_REG(sfgc), + MLXSW_REG(sfdf), + MLXSW_REG(sldr), + MLXSW_REG(slcr), + MLXSW_REG(slcor), + MLXSW_REG(spmlr), + MLXSW_REG(svfa), + MLXSW_REG(spvtr), + MLXSW_REG(svpe), + MLXSW_REG(sfmr), + MLXSW_REG(spvmlr), + MLXSW_REG(spvc), + MLXSW_REG(spevet), + MLXSW_REG(smpe), + MLXSW_REG(smid2), + MLXSW_REG(cwtp), + MLXSW_REG(cwtpm), + MLXSW_REG(pgcr), + MLXSW_REG(ppbt), + MLXSW_REG(pacl), + MLXSW_REG(pagt), + MLXSW_REG(ptar), + MLXSW_REG(ppbs), + MLXSW_REG(prcr), + MLXSW_REG(pefa), + MLXSW_REG(pemrbt), + MLXSW_REG(ptce2), + MLXSW_REG(perpt), + MLXSW_REG(peabfe), + MLXSW_REG(perar), + MLXSW_REG(ptce3), + MLXSW_REG(percr), + MLXSW_REG(pererp), + MLXSW_REG(iedr), + MLXSW_REG(qpts), + MLXSW_REG(qpcr), + MLXSW_REG(qtct), + MLXSW_REG(qeec), + MLXSW_REG(qrwe), + MLXSW_REG(qpdsm), + MLXSW_REG(qpdp), + MLXSW_REG(qpdpm), + MLXSW_REG(qtctm), + MLXSW_REG(qpsc), + MLXSW_REG(pmlp), + MLXSW_REG(pmtu), + MLXSW_REG(ptys), + MLXSW_REG(ppad), + MLXSW_REG(paos), + MLXSW_REG(pfcc), + MLXSW_REG(ppcnt), + MLXSW_REG(pptb), + MLXSW_REG(pbmc), + MLXSW_REG(pspa), + MLXSW_REG(pmaos), + MLXSW_REG(pplr), + MLXSW_REG(pmtdb), + MLXSW_REG(pmecr), + MLXSW_REG(pmpe), + MLXSW_REG(pddr), + MLXSW_REG(pmmp), + MLXSW_REG(pllp), + MLXSW_REG(pmtm), + MLXSW_REG(htgt), + MLXSW_REG(hpkt), + MLXSW_REG(rgcr), + MLXSW_REG(ritr), + MLXSW_REG(rtar), + MLXSW_REG(ratr), + MLXSW_REG(rtdp), + MLXSW_REG(rips), + MLXSW_REG(ratrad), + MLXSW_REG(rdpm), + MLXSW_REG(ricnt), + MLXSW_REG(rrcr), + MLXSW_REG(ralta), + MLXSW_REG(ralst), + MLXSW_REG(raltb), + MLXSW_REG(ralue), + MLXSW_REG(rauht), + MLXSW_REG(raleu), + MLXSW_REG(rauhtd), + MLXSW_REG(rigr2), + MLXSW_REG(recr2), + MLXSW_REG(rmft2), + MLXSW_REG(reiv), + MLXSW_REG(mfcr), + MLXSW_REG(mfsc), + MLXSW_REG(mfsm), + MLXSW_REG(mfsl), + MLXSW_REG(fore), + MLXSW_REG(mtcap), + MLXSW_REG(mtmp), + MLXSW_REG(mtwe), + MLXSW_REG(mtbr), + MLXSW_REG(mcia), + MLXSW_REG(mpat), + MLXSW_REG(mpar), + MLXSW_REG(mgir), + MLXSW_REG(mrsr), + MLXSW_REG(mlcr), + MLXSW_REG(mcion), + MLXSW_REG(mtpps), + MLXSW_REG(mtutc), + MLXSW_REG(mpsc), + MLXSW_REG(mcqi), + MLXSW_REG(mcc), + MLXSW_REG(mcda), + MLXSW_REG(mgpc), + MLXSW_REG(mprs), + MLXSW_REG(mogcr), + MLXSW_REG(mpagr), + MLXSW_REG(momte), + MLXSW_REG(mtpppc), + MLXSW_REG(mtpptr), + MLXSW_REG(mtptpt), + MLXSW_REG(mtpcpc), + MLXSW_REG(mfgd), + MLXSW_REG(mgpir), + MLXSW_REG(mbct), + MLXSW_REG(mddt), + MLXSW_REG(mddq), + MLXSW_REG(mddc), + MLXSW_REG(mfde), + MLXSW_REG(tngcr), + MLXSW_REG(tnumt), + MLXSW_REG(tnqcr), + MLXSW_REG(tnqdr), + MLXSW_REG(tneem), + MLXSW_REG(tndem), + MLXSW_REG(tnpc), + MLXSW_REG(tigcr), + MLXSW_REG(tieem), + MLXSW_REG(tidem), + MLXSW_REG(sbpr), + MLXSW_REG(sbcm), + MLXSW_REG(sbpm), + MLXSW_REG(sbmm), + MLXSW_REG(sbsr), + MLXSW_REG(sbib), +}; + +static inline const char *mlxsw_reg_id_str(u16 reg_id) +{ + const struct mlxsw_reg_info *reg_info; + int i; + + for (i = 0; i < ARRAY_SIZE(mlxsw_reg_infos); i++) { + reg_info = mlxsw_reg_infos[i]; + if (reg_info->id == reg_id) + return reg_info->name; + } + return "*UNKNOWN*"; +} + +/* PUDE - Port Up / Down Event + * --------------------------- + * Reports the operational state change of a port. + */ +#define MLXSW_REG_PUDE_LEN 0x10 + +/* reg_pude_swid + * Switch partition ID with which to associate the port. + * Access: Index + */ +MLXSW_ITEM32(reg, pude, swid, 0x00, 24, 8); + +/* reg_pude_local_port + * Local port number. + * Access: Index + */ +MLXSW_ITEM32_LP(reg, pude, 0x00, 16, 0x00, 12); + +/* reg_pude_admin_status + * Port administrative state (the desired state). + * 1 - Up. + * 2 - Down. + * 3 - Up once. This means that in case of link failure, the port won't go + * into polling mode, but will wait to be re-enabled by software. + * 4 - Disabled by system. Can only be set by hardware. + * Access: RO + */ +MLXSW_ITEM32(reg, pude, admin_status, 0x00, 8, 4); + +/* reg_pude_oper_status + * Port operatioanl state. + * 1 - Up. + * 2 - Down. + * 3 - Down by port failure. This means that the device will not let the + * port up again until explicitly specified by software. + * Access: RO + */ +MLXSW_ITEM32(reg, pude, oper_status, 0x00, 0, 4); + +#endif diff --git a/drivers/net/ethernet/mellanox/mlxsw/resources.h b/drivers/net/ethernet/mellanox/mlxsw/resources.h new file mode 100644 index 000000000..19ae0d1c7 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/resources.h @@ -0,0 +1,168 @@ +/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 */ +/* Copyright (c) 2016-2018 Mellanox Technologies. All rights reserved */ + +#ifndef _MLXSW_RESOURCES_H +#define _MLXSW_RESOURCES_H + +#include <linux/kernel.h> +#include <linux/types.h> + +enum mlxsw_res_id { + MLXSW_RES_ID_KVD_SIZE, + MLXSW_RES_ID_KVD_SINGLE_MIN_SIZE, + MLXSW_RES_ID_KVD_DOUBLE_MIN_SIZE, + MLXSW_RES_ID_PGT_SIZE, + MLXSW_RES_ID_MAX_KVD_LINEAR_RANGE, + MLXSW_RES_ID_MAX_KVD_ACTION_SETS, + MLXSW_RES_ID_MAX_TRAP_GROUPS, + MLXSW_RES_ID_CQE_V0, + MLXSW_RES_ID_CQE_V1, + MLXSW_RES_ID_CQE_V2, + MLXSW_RES_ID_COUNTER_POOL_SIZE, + MLXSW_RES_ID_COUNTER_BANK_SIZE, + MLXSW_RES_ID_MAX_SPAN, + MLXSW_RES_ID_COUNTER_SIZE_PACKETS_BYTES, + MLXSW_RES_ID_COUNTER_SIZE_ROUTER_BASIC, + MLXSW_RES_ID_MAX_SYSTEM_PORT, + MLXSW_RES_ID_FID, + MLXSW_RES_ID_MAX_LAG, + MLXSW_RES_ID_MAX_LAG_MEMBERS, + MLXSW_RES_ID_GUARANTEED_SHARED_BUFFER, + MLXSW_RES_ID_CELL_SIZE, + MLXSW_RES_ID_MAX_HEADROOM_SIZE, + MLXSW_RES_ID_ACL_MAX_TCAM_REGIONS, + MLXSW_RES_ID_ACL_MAX_TCAM_RULES, + MLXSW_RES_ID_ACL_MAX_REGIONS, + MLXSW_RES_ID_ACL_MAX_GROUPS, + MLXSW_RES_ID_ACL_MAX_GROUP_SIZE, + MLXSW_RES_ID_ACL_MAX_DEFAULT_ACTIONS, + MLXSW_RES_ID_ACL_FLEX_KEYS, + MLXSW_RES_ID_ACL_MAX_ACTION_PER_RULE, + MLXSW_RES_ID_ACL_ACTIONS_PER_SET, + MLXSW_RES_ID_ACL_MAX_ERPT_BANKS, + MLXSW_RES_ID_ACL_MAX_ERPT_BANK_SIZE, + MLXSW_RES_ID_ACL_MAX_LARGE_KEY_ID, + MLXSW_RES_ID_ACL_ERPT_ENTRIES_2KB, + MLXSW_RES_ID_ACL_ERPT_ENTRIES_4KB, + MLXSW_RES_ID_ACL_ERPT_ENTRIES_8KB, + MLXSW_RES_ID_ACL_ERPT_ENTRIES_12KB, + MLXSW_RES_ID_ACL_MAX_BF_LOG, + MLXSW_RES_ID_MAX_GLOBAL_POLICERS, + MLXSW_RES_ID_MAX_CPU_POLICERS, + MLXSW_RES_ID_MAX_VRS, + MLXSW_RES_ID_MAX_RIFS, + MLXSW_RES_ID_MC_ERIF_LIST_ENTRIES, + MLXSW_RES_ID_MAX_RIF_MAC_PROFILES, + MLXSW_RES_ID_MAX_LPM_TREES, + MLXSW_RES_ID_MAX_NVE_MC_ENTRIES_IPV4, + MLXSW_RES_ID_MAX_NVE_MC_ENTRIES_IPV6, + + /* Internal resources. + * Determined by the SW, not queried from the HW. + */ + MLXSW_RES_ID_KVD_SINGLE_SIZE, + MLXSW_RES_ID_KVD_DOUBLE_SIZE, + MLXSW_RES_ID_KVD_LINEAR_SIZE, + + __MLXSW_RES_ID_MAX, +}; + +static u16 mlxsw_res_ids[] = { + [MLXSW_RES_ID_KVD_SIZE] = 0x1001, + [MLXSW_RES_ID_KVD_SINGLE_MIN_SIZE] = 0x1002, + [MLXSW_RES_ID_KVD_DOUBLE_MIN_SIZE] = 0x1003, + [MLXSW_RES_ID_PGT_SIZE] = 0x1004, + [MLXSW_RES_ID_MAX_KVD_LINEAR_RANGE] = 0x1005, + [MLXSW_RES_ID_MAX_KVD_ACTION_SETS] = 0x1007, + [MLXSW_RES_ID_MAX_TRAP_GROUPS] = 0x2201, + [MLXSW_RES_ID_CQE_V0] = 0x2210, + [MLXSW_RES_ID_CQE_V1] = 0x2211, + [MLXSW_RES_ID_CQE_V2] = 0x2212, + [MLXSW_RES_ID_COUNTER_POOL_SIZE] = 0x2410, + [MLXSW_RES_ID_COUNTER_BANK_SIZE] = 0x2411, + [MLXSW_RES_ID_MAX_SPAN] = 0x2420, + [MLXSW_RES_ID_COUNTER_SIZE_PACKETS_BYTES] = 0x2443, + [MLXSW_RES_ID_COUNTER_SIZE_ROUTER_BASIC] = 0x2449, + [MLXSW_RES_ID_MAX_SYSTEM_PORT] = 0x2502, + [MLXSW_RES_ID_FID] = 0x2512, + [MLXSW_RES_ID_MAX_LAG] = 0x2520, + [MLXSW_RES_ID_MAX_LAG_MEMBERS] = 0x2521, + [MLXSW_RES_ID_GUARANTEED_SHARED_BUFFER] = 0x2805, /* Bytes */ + [MLXSW_RES_ID_CELL_SIZE] = 0x2803, /* Bytes */ + [MLXSW_RES_ID_MAX_HEADROOM_SIZE] = 0x2811, /* Bytes */ + [MLXSW_RES_ID_ACL_MAX_TCAM_REGIONS] = 0x2901, + [MLXSW_RES_ID_ACL_MAX_TCAM_RULES] = 0x2902, + [MLXSW_RES_ID_ACL_MAX_REGIONS] = 0x2903, + [MLXSW_RES_ID_ACL_MAX_GROUPS] = 0x2904, + [MLXSW_RES_ID_ACL_MAX_GROUP_SIZE] = 0x2905, + [MLXSW_RES_ID_ACL_MAX_DEFAULT_ACTIONS] = 0x2908, + [MLXSW_RES_ID_ACL_FLEX_KEYS] = 0x2910, + [MLXSW_RES_ID_ACL_MAX_ACTION_PER_RULE] = 0x2911, + [MLXSW_RES_ID_ACL_ACTIONS_PER_SET] = 0x2912, + [MLXSW_RES_ID_ACL_MAX_ERPT_BANKS] = 0x2940, + [MLXSW_RES_ID_ACL_MAX_ERPT_BANK_SIZE] = 0x2941, + [MLXSW_RES_ID_ACL_MAX_LARGE_KEY_ID] = 0x2942, + [MLXSW_RES_ID_ACL_ERPT_ENTRIES_2KB] = 0x2950, + [MLXSW_RES_ID_ACL_ERPT_ENTRIES_4KB] = 0x2951, + [MLXSW_RES_ID_ACL_ERPT_ENTRIES_8KB] = 0x2952, + [MLXSW_RES_ID_ACL_ERPT_ENTRIES_12KB] = 0x2953, + [MLXSW_RES_ID_ACL_MAX_BF_LOG] = 0x2960, + [MLXSW_RES_ID_MAX_GLOBAL_POLICERS] = 0x2A10, + [MLXSW_RES_ID_MAX_CPU_POLICERS] = 0x2A13, + [MLXSW_RES_ID_MAX_VRS] = 0x2C01, + [MLXSW_RES_ID_MAX_RIFS] = 0x2C02, + [MLXSW_RES_ID_MC_ERIF_LIST_ENTRIES] = 0x2C10, + [MLXSW_RES_ID_MAX_RIF_MAC_PROFILES] = 0x2C14, + [MLXSW_RES_ID_MAX_LPM_TREES] = 0x2C30, + [MLXSW_RES_ID_MAX_NVE_MC_ENTRIES_IPV4] = 0x2E02, + [MLXSW_RES_ID_MAX_NVE_MC_ENTRIES_IPV6] = 0x2E03, +}; + +struct mlxsw_res { + bool valid[__MLXSW_RES_ID_MAX]; + u64 values[__MLXSW_RES_ID_MAX]; +}; + +static inline bool mlxsw_res_valid(struct mlxsw_res *res, + enum mlxsw_res_id res_id) +{ + return res->valid[res_id]; +} + +#define MLXSW_RES_VALID(res, short_res_id) \ + mlxsw_res_valid(res, MLXSW_RES_ID_##short_res_id) + +static inline u64 mlxsw_res_get(struct mlxsw_res *res, + enum mlxsw_res_id res_id) +{ + if (WARN_ON(!res->valid[res_id])) + return 0; + return res->values[res_id]; +} + +#define MLXSW_RES_GET(res, short_res_id) \ + mlxsw_res_get(res, MLXSW_RES_ID_##short_res_id) + +static inline void mlxsw_res_set(struct mlxsw_res *res, + enum mlxsw_res_id res_id, u64 value) +{ + res->valid[res_id] = true; + res->values[res_id] = value; +} + +#define MLXSW_RES_SET(res, short_res_id, value) \ + mlxsw_res_set(res, MLXSW_RES_ID_##short_res_id, value) + +static inline void mlxsw_res_parse(struct mlxsw_res *res, u16 id, u64 value) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(mlxsw_res_ids); i++) { + if (mlxsw_res_ids[i] == id) { + mlxsw_res_set(res, i, value); + return; + } + } +} + +#endif diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c new file mode 100644 index 000000000..67ecdb9e7 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -0,0 +1,5331 @@ +// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 +/* Copyright (c) 2015-2018 Mellanox Technologies. All rights reserved */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/types.h> +#include <linux/pci.h> +#include <linux/netdevice.h> +#include <linux/etherdevice.h> +#include <linux/ethtool.h> +#include <linux/slab.h> +#include <linux/device.h> +#include <linux/skbuff.h> +#include <linux/if_vlan.h> +#include <linux/if_bridge.h> +#include <linux/workqueue.h> +#include <linux/jiffies.h> +#include <linux/bitops.h> +#include <linux/list.h> +#include <linux/notifier.h> +#include <linux/dcbnl.h> +#include <linux/inetdevice.h> +#include <linux/netlink.h> +#include <linux/jhash.h> +#include <linux/log2.h> +#include <linux/refcount.h> +#include <linux/rhashtable.h> +#include <net/switchdev.h> +#include <net/pkt_cls.h> +#include <net/netevent.h> +#include <net/addrconf.h> +#include <linux/ptp_classify.h> + +#include "spectrum.h" +#include "pci.h" +#include "core.h" +#include "core_env.h" +#include "reg.h" +#include "port.h" +#include "trap.h" +#include "txheader.h" +#include "spectrum_cnt.h" +#include "spectrum_dpipe.h" +#include "spectrum_acl_flex_actions.h" +#include "spectrum_span.h" +#include "spectrum_ptp.h" +#include "spectrum_trap.h" + +#define MLXSW_SP_FWREV_MINOR 2010 +#define MLXSW_SP_FWREV_SUBMINOR 1006 + +#define MLXSW_SP1_FWREV_MAJOR 13 +#define MLXSW_SP1_FWREV_CAN_RESET_MINOR 1702 + +static const struct mlxsw_fw_rev mlxsw_sp1_fw_rev = { + .major = MLXSW_SP1_FWREV_MAJOR, + .minor = MLXSW_SP_FWREV_MINOR, + .subminor = MLXSW_SP_FWREV_SUBMINOR, + .can_reset_minor = MLXSW_SP1_FWREV_CAN_RESET_MINOR, +}; + +#define MLXSW_SP1_FW_FILENAME \ + "mellanox/mlxsw_spectrum-" __stringify(MLXSW_SP1_FWREV_MAJOR) \ + "." __stringify(MLXSW_SP_FWREV_MINOR) \ + "." __stringify(MLXSW_SP_FWREV_SUBMINOR) ".mfa2" + +#define MLXSW_SP2_FWREV_MAJOR 29 + +static const struct mlxsw_fw_rev mlxsw_sp2_fw_rev = { + .major = MLXSW_SP2_FWREV_MAJOR, + .minor = MLXSW_SP_FWREV_MINOR, + .subminor = MLXSW_SP_FWREV_SUBMINOR, +}; + +#define MLXSW_SP2_FW_FILENAME \ + "mellanox/mlxsw_spectrum2-" __stringify(MLXSW_SP2_FWREV_MAJOR) \ + "." __stringify(MLXSW_SP_FWREV_MINOR) \ + "." __stringify(MLXSW_SP_FWREV_SUBMINOR) ".mfa2" + +#define MLXSW_SP3_FWREV_MAJOR 30 + +static const struct mlxsw_fw_rev mlxsw_sp3_fw_rev = { + .major = MLXSW_SP3_FWREV_MAJOR, + .minor = MLXSW_SP_FWREV_MINOR, + .subminor = MLXSW_SP_FWREV_SUBMINOR, +}; + +#define MLXSW_SP3_FW_FILENAME \ + "mellanox/mlxsw_spectrum3-" __stringify(MLXSW_SP3_FWREV_MAJOR) \ + "." __stringify(MLXSW_SP_FWREV_MINOR) \ + "." __stringify(MLXSW_SP_FWREV_SUBMINOR) ".mfa2" + +#define MLXSW_SP_LINECARDS_INI_BUNDLE_FILENAME \ + "mellanox/lc_ini_bundle_" \ + __stringify(MLXSW_SP_FWREV_MINOR) "_" \ + __stringify(MLXSW_SP_FWREV_SUBMINOR) ".bin" + +static const char mlxsw_sp1_driver_name[] = "mlxsw_spectrum"; +static const char mlxsw_sp2_driver_name[] = "mlxsw_spectrum2"; +static const char mlxsw_sp3_driver_name[] = "mlxsw_spectrum3"; +static const char mlxsw_sp4_driver_name[] = "mlxsw_spectrum4"; + +static const unsigned char mlxsw_sp1_mac_mask[ETH_ALEN] = { + 0xff, 0xff, 0xff, 0xff, 0xfc, 0x00 +}; +static const unsigned char mlxsw_sp2_mac_mask[ETH_ALEN] = { + 0xff, 0xff, 0xff, 0xff, 0xf0, 0x00 +}; + +/* tx_hdr_version + * Tx header version. + * Must be set to 1. + */ +MLXSW_ITEM32(tx, hdr, version, 0x00, 28, 4); + +/* tx_hdr_ctl + * Packet control type. + * 0 - Ethernet control (e.g. EMADs, LACP) + * 1 - Ethernet data + */ +MLXSW_ITEM32(tx, hdr, ctl, 0x00, 26, 2); + +/* tx_hdr_proto + * Packet protocol type. Must be set to 1 (Ethernet). + */ +MLXSW_ITEM32(tx, hdr, proto, 0x00, 21, 3); + +/* tx_hdr_rx_is_router + * Packet is sent from the router. Valid for data packets only. + */ +MLXSW_ITEM32(tx, hdr, rx_is_router, 0x00, 19, 1); + +/* tx_hdr_fid_valid + * Indicates if the 'fid' field is valid and should be used for + * forwarding lookup. Valid for data packets only. + */ +MLXSW_ITEM32(tx, hdr, fid_valid, 0x00, 16, 1); + +/* tx_hdr_swid + * Switch partition ID. Must be set to 0. + */ +MLXSW_ITEM32(tx, hdr, swid, 0x00, 12, 3); + +/* tx_hdr_control_tclass + * Indicates if the packet should use the control TClass and not one + * of the data TClasses. + */ +MLXSW_ITEM32(tx, hdr, control_tclass, 0x00, 6, 1); + +/* tx_hdr_etclass + * Egress TClass to be used on the egress device on the egress port. + */ +MLXSW_ITEM32(tx, hdr, etclass, 0x00, 0, 4); + +/* tx_hdr_port_mid + * Destination local port for unicast packets. + * Destination multicast ID for multicast packets. + * + * Control packets are directed to a specific egress port, while data + * packets are transmitted through the CPU port (0) into the switch partition, + * where forwarding rules are applied. + */ +MLXSW_ITEM32(tx, hdr, port_mid, 0x04, 16, 16); + +/* tx_hdr_fid + * Forwarding ID used for L2 forwarding lookup. Valid only if 'fid_valid' is + * set, otherwise calculated based on the packet's VID using VID to FID mapping. + * Valid for data packets only. + */ +MLXSW_ITEM32(tx, hdr, fid, 0x08, 16, 16); + +/* tx_hdr_type + * 0 - Data packets + * 6 - Control packets + */ +MLXSW_ITEM32(tx, hdr, type, 0x0C, 0, 4); + +int mlxsw_sp_flow_counter_get(struct mlxsw_sp *mlxsw_sp, + unsigned int counter_index, u64 *packets, + u64 *bytes) +{ + char mgpc_pl[MLXSW_REG_MGPC_LEN]; + int err; + + mlxsw_reg_mgpc_pack(mgpc_pl, counter_index, MLXSW_REG_MGPC_OPCODE_NOP, + MLXSW_REG_FLOW_COUNTER_SET_TYPE_PACKETS_BYTES); + err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(mgpc), mgpc_pl); + if (err) + return err; + if (packets) + *packets = mlxsw_reg_mgpc_packet_counter_get(mgpc_pl); + if (bytes) + *bytes = mlxsw_reg_mgpc_byte_counter_get(mgpc_pl); + return 0; +} + +static int mlxsw_sp_flow_counter_clear(struct mlxsw_sp *mlxsw_sp, + unsigned int counter_index) +{ + char mgpc_pl[MLXSW_REG_MGPC_LEN]; + + mlxsw_reg_mgpc_pack(mgpc_pl, counter_index, MLXSW_REG_MGPC_OPCODE_CLEAR, + MLXSW_REG_FLOW_COUNTER_SET_TYPE_PACKETS_BYTES); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mgpc), mgpc_pl); +} + +int mlxsw_sp_flow_counter_alloc(struct mlxsw_sp *mlxsw_sp, + unsigned int *p_counter_index) +{ + int err; + + err = mlxsw_sp_counter_alloc(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_FLOW, + p_counter_index); + if (err) + return err; + err = mlxsw_sp_flow_counter_clear(mlxsw_sp, *p_counter_index); + if (err) + goto err_counter_clear; + return 0; + +err_counter_clear: + mlxsw_sp_counter_free(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_FLOW, + *p_counter_index); + return err; +} + +void mlxsw_sp_flow_counter_free(struct mlxsw_sp *mlxsw_sp, + unsigned int counter_index) +{ + mlxsw_sp_counter_free(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_FLOW, + counter_index); +} + +void mlxsw_sp_txhdr_construct(struct sk_buff *skb, + const struct mlxsw_tx_info *tx_info) +{ + char *txhdr = skb_push(skb, MLXSW_TXHDR_LEN); + + memset(txhdr, 0, MLXSW_TXHDR_LEN); + + mlxsw_tx_hdr_version_set(txhdr, MLXSW_TXHDR_VERSION_1); + mlxsw_tx_hdr_ctl_set(txhdr, MLXSW_TXHDR_ETH_CTL); + mlxsw_tx_hdr_proto_set(txhdr, MLXSW_TXHDR_PROTO_ETH); + mlxsw_tx_hdr_swid_set(txhdr, 0); + mlxsw_tx_hdr_control_tclass_set(txhdr, 1); + mlxsw_tx_hdr_port_mid_set(txhdr, tx_info->local_port); + mlxsw_tx_hdr_type_set(txhdr, MLXSW_TXHDR_TYPE_CONTROL); +} + +int +mlxsw_sp_txhdr_ptp_data_construct(struct mlxsw_core *mlxsw_core, + struct mlxsw_sp_port *mlxsw_sp_port, + struct sk_buff *skb, + const struct mlxsw_tx_info *tx_info) +{ + char *txhdr; + u16 max_fid; + int err; + + if (skb_cow_head(skb, MLXSW_TXHDR_LEN)) { + err = -ENOMEM; + goto err_skb_cow_head; + } + + if (!MLXSW_CORE_RES_VALID(mlxsw_core, FID)) { + err = -EIO; + goto err_res_valid; + } + max_fid = MLXSW_CORE_RES_GET(mlxsw_core, FID); + + txhdr = skb_push(skb, MLXSW_TXHDR_LEN); + memset(txhdr, 0, MLXSW_TXHDR_LEN); + + mlxsw_tx_hdr_version_set(txhdr, MLXSW_TXHDR_VERSION_1); + mlxsw_tx_hdr_proto_set(txhdr, MLXSW_TXHDR_PROTO_ETH); + mlxsw_tx_hdr_rx_is_router_set(txhdr, true); + mlxsw_tx_hdr_fid_valid_set(txhdr, true); + mlxsw_tx_hdr_fid_set(txhdr, max_fid + tx_info->local_port - 1); + mlxsw_tx_hdr_type_set(txhdr, MLXSW_TXHDR_TYPE_DATA); + return 0; + +err_res_valid: +err_skb_cow_head: + this_cpu_inc(mlxsw_sp_port->pcpu_stats->tx_dropped); + dev_kfree_skb_any(skb); + return err; +} + +static bool mlxsw_sp_skb_requires_ts(struct sk_buff *skb) +{ + unsigned int type; + + if (!(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) + return false; + + type = ptp_classify_raw(skb); + return !!ptp_parse_header(skb, type); +} + +static int mlxsw_sp_txhdr_handle(struct mlxsw_core *mlxsw_core, + struct mlxsw_sp_port *mlxsw_sp_port, + struct sk_buff *skb, + const struct mlxsw_tx_info *tx_info) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core); + + /* In Spectrum-2 and Spectrum-3, PTP events that require a time stamp + * need special handling and cannot be transmitted as regular control + * packets. + */ + if (unlikely(mlxsw_sp_skb_requires_ts(skb))) + return mlxsw_sp->ptp_ops->txhdr_construct(mlxsw_core, + mlxsw_sp_port, skb, + tx_info); + + if (skb_cow_head(skb, MLXSW_TXHDR_LEN)) { + this_cpu_inc(mlxsw_sp_port->pcpu_stats->tx_dropped); + dev_kfree_skb_any(skb); + return -ENOMEM; + } + + mlxsw_sp_txhdr_construct(skb, tx_info); + return 0; +} + +enum mlxsw_reg_spms_state mlxsw_sp_stp_spms_state(u8 state) +{ + switch (state) { + case BR_STATE_FORWARDING: + return MLXSW_REG_SPMS_STATE_FORWARDING; + case BR_STATE_LEARNING: + return MLXSW_REG_SPMS_STATE_LEARNING; + case BR_STATE_LISTENING: + case BR_STATE_DISABLED: + case BR_STATE_BLOCKING: + return MLXSW_REG_SPMS_STATE_DISCARDING; + default: + BUG(); + } +} + +int mlxsw_sp_port_vid_stp_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid, + u8 state) +{ + enum mlxsw_reg_spms_state spms_state = mlxsw_sp_stp_spms_state(state); + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + char *spms_pl; + int err; + + spms_pl = kmalloc(MLXSW_REG_SPMS_LEN, GFP_KERNEL); + if (!spms_pl) + return -ENOMEM; + mlxsw_reg_spms_pack(spms_pl, mlxsw_sp_port->local_port); + mlxsw_reg_spms_vid_pack(spms_pl, vid, spms_state); + + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(spms), spms_pl); + kfree(spms_pl); + return err; +} + +static int mlxsw_sp_base_mac_get(struct mlxsw_sp *mlxsw_sp) +{ + char spad_pl[MLXSW_REG_SPAD_LEN] = {0}; + int err; + + err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(spad), spad_pl); + if (err) + return err; + mlxsw_reg_spad_base_mac_memcpy_from(spad_pl, mlxsw_sp->base_mac); + return 0; +} + +int mlxsw_sp_port_admin_status_set(struct mlxsw_sp_port *mlxsw_sp_port, + bool is_up) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + char paos_pl[MLXSW_REG_PAOS_LEN]; + + mlxsw_reg_paos_pack(paos_pl, mlxsw_sp_port->local_port, + is_up ? MLXSW_PORT_ADMIN_STATUS_UP : + MLXSW_PORT_ADMIN_STATUS_DOWN); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(paos), paos_pl); +} + +static int mlxsw_sp_port_dev_addr_set(struct mlxsw_sp_port *mlxsw_sp_port, + const unsigned char *addr) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + char ppad_pl[MLXSW_REG_PPAD_LEN]; + + mlxsw_reg_ppad_pack(ppad_pl, true, mlxsw_sp_port->local_port); + mlxsw_reg_ppad_mac_memcpy_to(ppad_pl, addr); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ppad), ppad_pl); +} + +static int mlxsw_sp_port_dev_addr_init(struct mlxsw_sp_port *mlxsw_sp_port) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + + eth_hw_addr_gen(mlxsw_sp_port->dev, mlxsw_sp->base_mac, + mlxsw_sp_port->local_port); + return mlxsw_sp_port_dev_addr_set(mlxsw_sp_port, + mlxsw_sp_port->dev->dev_addr); +} + +static int mlxsw_sp_port_max_mtu_get(struct mlxsw_sp_port *mlxsw_sp_port, int *p_max_mtu) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + char pmtu_pl[MLXSW_REG_PMTU_LEN]; + int err; + + mlxsw_reg_pmtu_pack(pmtu_pl, mlxsw_sp_port->local_port, 0); + err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(pmtu), pmtu_pl); + if (err) + return err; + + *p_max_mtu = mlxsw_reg_pmtu_max_mtu_get(pmtu_pl); + return 0; +} + +static int mlxsw_sp_port_mtu_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 mtu) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + char pmtu_pl[MLXSW_REG_PMTU_LEN]; + + mtu += MLXSW_TXHDR_LEN + ETH_HLEN; + if (mtu > mlxsw_sp_port->max_mtu) + return -EINVAL; + + mlxsw_reg_pmtu_pack(pmtu_pl, mlxsw_sp_port->local_port, mtu); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(pmtu), pmtu_pl); +} + +static int mlxsw_sp_port_swid_set(struct mlxsw_sp *mlxsw_sp, + u16 local_port, u8 swid) +{ + char pspa_pl[MLXSW_REG_PSPA_LEN]; + + mlxsw_reg_pspa_pack(pspa_pl, swid, local_port); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(pspa), pspa_pl); +} + +int mlxsw_sp_port_vp_mode_set(struct mlxsw_sp_port *mlxsw_sp_port, bool enable) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + char svpe_pl[MLXSW_REG_SVPE_LEN]; + + mlxsw_reg_svpe_pack(svpe_pl, mlxsw_sp_port->local_port, enable); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(svpe), svpe_pl); +} + +int mlxsw_sp_port_vid_learning_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid, + bool learn_enable) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + char *spvmlr_pl; + int err; + + spvmlr_pl = kmalloc(MLXSW_REG_SPVMLR_LEN, GFP_KERNEL); + if (!spvmlr_pl) + return -ENOMEM; + mlxsw_reg_spvmlr_pack(spvmlr_pl, mlxsw_sp_port->local_port, vid, vid, + learn_enable); + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(spvmlr), spvmlr_pl); + kfree(spvmlr_pl); + return err; +} + +int mlxsw_sp_ethtype_to_sver_type(u16 ethtype, u8 *p_sver_type) +{ + switch (ethtype) { + case ETH_P_8021Q: + *p_sver_type = 0; + break; + case ETH_P_8021AD: + *p_sver_type = 1; + break; + default: + return -EINVAL; + } + + return 0; +} + +int mlxsw_sp_port_egress_ethtype_set(struct mlxsw_sp_port *mlxsw_sp_port, + u16 ethtype) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + char spevet_pl[MLXSW_REG_SPEVET_LEN]; + u8 sver_type; + int err; + + err = mlxsw_sp_ethtype_to_sver_type(ethtype, &sver_type); + if (err) + return err; + + mlxsw_reg_spevet_pack(spevet_pl, mlxsw_sp_port->local_port, sver_type); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(spevet), spevet_pl); +} + +static int __mlxsw_sp_port_pvid_set(struct mlxsw_sp_port *mlxsw_sp_port, + u16 vid, u16 ethtype) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + char spvid_pl[MLXSW_REG_SPVID_LEN]; + u8 sver_type; + int err; + + err = mlxsw_sp_ethtype_to_sver_type(ethtype, &sver_type); + if (err) + return err; + + mlxsw_reg_spvid_pack(spvid_pl, mlxsw_sp_port->local_port, vid, + sver_type); + + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(spvid), spvid_pl); +} + +static int mlxsw_sp_port_allow_untagged_set(struct mlxsw_sp_port *mlxsw_sp_port, + bool allow) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + char spaft_pl[MLXSW_REG_SPAFT_LEN]; + + mlxsw_reg_spaft_pack(spaft_pl, mlxsw_sp_port->local_port, allow); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(spaft), spaft_pl); +} + +int mlxsw_sp_port_pvid_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid, + u16 ethtype) +{ + int err; + + if (!vid) { + err = mlxsw_sp_port_allow_untagged_set(mlxsw_sp_port, false); + if (err) + return err; + } else { + err = __mlxsw_sp_port_pvid_set(mlxsw_sp_port, vid, ethtype); + if (err) + return err; + err = mlxsw_sp_port_allow_untagged_set(mlxsw_sp_port, true); + if (err) + goto err_port_allow_untagged_set; + } + + mlxsw_sp_port->pvid = vid; + return 0; + +err_port_allow_untagged_set: + __mlxsw_sp_port_pvid_set(mlxsw_sp_port, mlxsw_sp_port->pvid, ethtype); + return err; +} + +static int +mlxsw_sp_port_system_port_mapping_set(struct mlxsw_sp_port *mlxsw_sp_port) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + char sspr_pl[MLXSW_REG_SSPR_LEN]; + + mlxsw_reg_sspr_pack(sspr_pl, mlxsw_sp_port->local_port); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sspr), sspr_pl); +} + +static int +mlxsw_sp_port_module_info_parse(struct mlxsw_sp *mlxsw_sp, + u16 local_port, char *pmlp_pl, + struct mlxsw_sp_port_mapping *port_mapping) +{ + bool separate_rxtx; + u8 first_lane; + u8 slot_index; + u8 module; + u8 width; + int i; + + module = mlxsw_reg_pmlp_module_get(pmlp_pl, 0); + slot_index = mlxsw_reg_pmlp_slot_index_get(pmlp_pl, 0); + width = mlxsw_reg_pmlp_width_get(pmlp_pl); + separate_rxtx = mlxsw_reg_pmlp_rxtx_get(pmlp_pl); + first_lane = mlxsw_reg_pmlp_tx_lane_get(pmlp_pl, 0); + + if (width && !is_power_of_2(width)) { + dev_err(mlxsw_sp->bus_info->dev, "Port %d: Unsupported module config: width value is not power of 2\n", + local_port); + return -EINVAL; + } + + for (i = 0; i < width; i++) { + if (mlxsw_reg_pmlp_module_get(pmlp_pl, i) != module) { + dev_err(mlxsw_sp->bus_info->dev, "Port %d: Unsupported module config: contains multiple modules\n", + local_port); + return -EINVAL; + } + if (mlxsw_reg_pmlp_slot_index_get(pmlp_pl, i) != slot_index) { + dev_err(mlxsw_sp->bus_info->dev, "Port %d: Unsupported module config: contains multiple slot indexes\n", + local_port); + return -EINVAL; + } + if (separate_rxtx && + mlxsw_reg_pmlp_tx_lane_get(pmlp_pl, i) != + mlxsw_reg_pmlp_rx_lane_get(pmlp_pl, i)) { + dev_err(mlxsw_sp->bus_info->dev, "Port %d: Unsupported module config: TX and RX lane numbers are different\n", + local_port); + return -EINVAL; + } + if (mlxsw_reg_pmlp_tx_lane_get(pmlp_pl, i) != i + first_lane) { + dev_err(mlxsw_sp->bus_info->dev, "Port %d: Unsupported module config: TX and RX lane numbers are not sequential\n", + local_port); + return -EINVAL; + } + } + + port_mapping->module = module; + port_mapping->slot_index = slot_index; + port_mapping->width = width; + port_mapping->module_width = width; + port_mapping->lane = mlxsw_reg_pmlp_tx_lane_get(pmlp_pl, 0); + return 0; +} + +static int +mlxsw_sp_port_module_info_get(struct mlxsw_sp *mlxsw_sp, u16 local_port, + struct mlxsw_sp_port_mapping *port_mapping) +{ + char pmlp_pl[MLXSW_REG_PMLP_LEN]; + int err; + + mlxsw_reg_pmlp_pack(pmlp_pl, local_port); + err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(pmlp), pmlp_pl); + if (err) + return err; + return mlxsw_sp_port_module_info_parse(mlxsw_sp, local_port, + pmlp_pl, port_mapping); +} + +static int +mlxsw_sp_port_module_map(struct mlxsw_sp *mlxsw_sp, u16 local_port, + const struct mlxsw_sp_port_mapping *port_mapping) +{ + char pmlp_pl[MLXSW_REG_PMLP_LEN]; + int i, err; + + mlxsw_env_module_port_map(mlxsw_sp->core, port_mapping->slot_index, + port_mapping->module); + + mlxsw_reg_pmlp_pack(pmlp_pl, local_port); + mlxsw_reg_pmlp_width_set(pmlp_pl, port_mapping->width); + for (i = 0; i < port_mapping->width; i++) { + mlxsw_reg_pmlp_slot_index_set(pmlp_pl, i, + port_mapping->slot_index); + mlxsw_reg_pmlp_module_set(pmlp_pl, i, port_mapping->module); + mlxsw_reg_pmlp_tx_lane_set(pmlp_pl, i, port_mapping->lane + i); /* Rx & Tx */ + } + + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(pmlp), pmlp_pl); + if (err) + goto err_pmlp_write; + return 0; + +err_pmlp_write: + mlxsw_env_module_port_unmap(mlxsw_sp->core, port_mapping->slot_index, + port_mapping->module); + return err; +} + +static void mlxsw_sp_port_module_unmap(struct mlxsw_sp *mlxsw_sp, u16 local_port, + u8 slot_index, u8 module) +{ + char pmlp_pl[MLXSW_REG_PMLP_LEN]; + + mlxsw_reg_pmlp_pack(pmlp_pl, local_port); + mlxsw_reg_pmlp_width_set(pmlp_pl, 0); + mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(pmlp), pmlp_pl); + mlxsw_env_module_port_unmap(mlxsw_sp->core, slot_index, module); +} + +static int mlxsw_sp_port_open(struct net_device *dev) +{ + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + int err; + + err = mlxsw_env_module_port_up(mlxsw_sp->core, + mlxsw_sp_port->mapping.slot_index, + mlxsw_sp_port->mapping.module); + if (err) + return err; + err = mlxsw_sp_port_admin_status_set(mlxsw_sp_port, true); + if (err) + goto err_port_admin_status_set; + netif_start_queue(dev); + return 0; + +err_port_admin_status_set: + mlxsw_env_module_port_down(mlxsw_sp->core, + mlxsw_sp_port->mapping.slot_index, + mlxsw_sp_port->mapping.module); + return err; +} + +static int mlxsw_sp_port_stop(struct net_device *dev) +{ + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + + netif_stop_queue(dev); + mlxsw_sp_port_admin_status_set(mlxsw_sp_port, false); + mlxsw_env_module_port_down(mlxsw_sp->core, + mlxsw_sp_port->mapping.slot_index, + mlxsw_sp_port->mapping.module); + return 0; +} + +static netdev_tx_t mlxsw_sp_port_xmit(struct sk_buff *skb, + struct net_device *dev) +{ + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + struct mlxsw_sp_port_pcpu_stats *pcpu_stats; + const struct mlxsw_tx_info tx_info = { + .local_port = mlxsw_sp_port->local_port, + .is_emad = false, + }; + u64 len; + int err; + + memset(skb->cb, 0, sizeof(struct mlxsw_skb_cb)); + + if (mlxsw_core_skb_transmit_busy(mlxsw_sp->core, &tx_info)) + return NETDEV_TX_BUSY; + + if (eth_skb_pad(skb)) { + this_cpu_inc(mlxsw_sp_port->pcpu_stats->tx_dropped); + return NETDEV_TX_OK; + } + + err = mlxsw_sp_txhdr_handle(mlxsw_sp->core, mlxsw_sp_port, skb, + &tx_info); + if (err) + return NETDEV_TX_OK; + + /* TX header is consumed by HW on the way so we shouldn't count its + * bytes as being sent. + */ + len = skb->len - MLXSW_TXHDR_LEN; + + /* Due to a race we might fail here because of a full queue. In that + * unlikely case we simply drop the packet. + */ + err = mlxsw_core_skb_transmit(mlxsw_sp->core, skb, &tx_info); + + if (!err) { + pcpu_stats = this_cpu_ptr(mlxsw_sp_port->pcpu_stats); + u64_stats_update_begin(&pcpu_stats->syncp); + pcpu_stats->tx_packets++; + pcpu_stats->tx_bytes += len; + u64_stats_update_end(&pcpu_stats->syncp); + } else { + this_cpu_inc(mlxsw_sp_port->pcpu_stats->tx_dropped); + dev_kfree_skb_any(skb); + } + return NETDEV_TX_OK; +} + +static void mlxsw_sp_set_rx_mode(struct net_device *dev) +{ +} + +static int mlxsw_sp_port_set_mac_address(struct net_device *dev, void *p) +{ + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); + struct sockaddr *addr = p; + int err; + + if (!is_valid_ether_addr(addr->sa_data)) + return -EADDRNOTAVAIL; + + err = mlxsw_sp_port_dev_addr_set(mlxsw_sp_port, addr->sa_data); + if (err) + return err; + eth_hw_addr_set(dev, addr->sa_data); + return 0; +} + +static int mlxsw_sp_port_change_mtu(struct net_device *dev, int mtu) +{ + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); + struct mlxsw_sp_hdroom orig_hdroom; + struct mlxsw_sp_hdroom hdroom; + int err; + + orig_hdroom = *mlxsw_sp_port->hdroom; + + hdroom = orig_hdroom; + hdroom.mtu = mtu; + mlxsw_sp_hdroom_bufs_reset_sizes(mlxsw_sp_port, &hdroom); + + err = mlxsw_sp_hdroom_configure(mlxsw_sp_port, &hdroom); + if (err) { + netdev_err(dev, "Failed to configure port's headroom\n"); + return err; + } + + err = mlxsw_sp_port_mtu_set(mlxsw_sp_port, mtu); + if (err) + goto err_port_mtu_set; + dev->mtu = mtu; + return 0; + +err_port_mtu_set: + mlxsw_sp_hdroom_configure(mlxsw_sp_port, &orig_hdroom); + return err; +} + +static int +mlxsw_sp_port_get_sw_stats64(const struct net_device *dev, + struct rtnl_link_stats64 *stats) +{ + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); + struct mlxsw_sp_port_pcpu_stats *p; + u64 rx_packets, rx_bytes, tx_packets, tx_bytes; + u32 tx_dropped = 0; + unsigned int start; + int i; + + for_each_possible_cpu(i) { + p = per_cpu_ptr(mlxsw_sp_port->pcpu_stats, i); + do { + start = u64_stats_fetch_begin_irq(&p->syncp); + rx_packets = p->rx_packets; + rx_bytes = p->rx_bytes; + tx_packets = p->tx_packets; + tx_bytes = p->tx_bytes; + } while (u64_stats_fetch_retry_irq(&p->syncp, start)); + + stats->rx_packets += rx_packets; + stats->rx_bytes += rx_bytes; + stats->tx_packets += tx_packets; + stats->tx_bytes += tx_bytes; + /* tx_dropped is u32, updated without syncp protection. */ + tx_dropped += p->tx_dropped; + } + stats->tx_dropped = tx_dropped; + return 0; +} + +static bool mlxsw_sp_port_has_offload_stats(const struct net_device *dev, int attr_id) +{ + switch (attr_id) { + case IFLA_OFFLOAD_XSTATS_CPU_HIT: + return true; + } + + return false; +} + +static int mlxsw_sp_port_get_offload_stats(int attr_id, const struct net_device *dev, + void *sp) +{ + switch (attr_id) { + case IFLA_OFFLOAD_XSTATS_CPU_HIT: + return mlxsw_sp_port_get_sw_stats64(dev, sp); + } + + return -EINVAL; +} + +int mlxsw_sp_port_get_stats_raw(struct net_device *dev, int grp, + int prio, char *ppcnt_pl) +{ + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + + mlxsw_reg_ppcnt_pack(ppcnt_pl, mlxsw_sp_port->local_port, grp, prio); + return mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ppcnt), ppcnt_pl); +} + +static int mlxsw_sp_port_get_hw_stats(struct net_device *dev, + struct rtnl_link_stats64 *stats) +{ + char ppcnt_pl[MLXSW_REG_PPCNT_LEN]; + int err; + + err = mlxsw_sp_port_get_stats_raw(dev, MLXSW_REG_PPCNT_IEEE_8023_CNT, + 0, ppcnt_pl); + if (err) + goto out; + + stats->tx_packets = + mlxsw_reg_ppcnt_a_frames_transmitted_ok_get(ppcnt_pl); + stats->rx_packets = + mlxsw_reg_ppcnt_a_frames_received_ok_get(ppcnt_pl); + stats->tx_bytes = + mlxsw_reg_ppcnt_a_octets_transmitted_ok_get(ppcnt_pl); + stats->rx_bytes = + mlxsw_reg_ppcnt_a_octets_received_ok_get(ppcnt_pl); + stats->multicast = + mlxsw_reg_ppcnt_a_multicast_frames_received_ok_get(ppcnt_pl); + + stats->rx_crc_errors = + mlxsw_reg_ppcnt_a_frame_check_sequence_errors_get(ppcnt_pl); + stats->rx_frame_errors = + mlxsw_reg_ppcnt_a_alignment_errors_get(ppcnt_pl); + + stats->rx_length_errors = ( + mlxsw_reg_ppcnt_a_in_range_length_errors_get(ppcnt_pl) + + mlxsw_reg_ppcnt_a_out_of_range_length_field_get(ppcnt_pl) + + mlxsw_reg_ppcnt_a_frame_too_long_errors_get(ppcnt_pl)); + + stats->rx_errors = (stats->rx_crc_errors + + stats->rx_frame_errors + stats->rx_length_errors); + +out: + return err; +} + +static void +mlxsw_sp_port_get_hw_xstats(struct net_device *dev, + struct mlxsw_sp_port_xstats *xstats) +{ + char ppcnt_pl[MLXSW_REG_PPCNT_LEN]; + int err, i; + + err = mlxsw_sp_port_get_stats_raw(dev, MLXSW_REG_PPCNT_EXT_CNT, 0, + ppcnt_pl); + if (!err) + xstats->ecn = mlxsw_reg_ppcnt_ecn_marked_get(ppcnt_pl); + + for (i = 0; i < TC_MAX_QUEUE; i++) { + err = mlxsw_sp_port_get_stats_raw(dev, + MLXSW_REG_PPCNT_TC_CONG_CNT, + i, ppcnt_pl); + if (err) + goto tc_cnt; + + xstats->wred_drop[i] = + mlxsw_reg_ppcnt_wred_discard_get(ppcnt_pl); + xstats->tc_ecn[i] = mlxsw_reg_ppcnt_ecn_marked_tc_get(ppcnt_pl); + +tc_cnt: + err = mlxsw_sp_port_get_stats_raw(dev, MLXSW_REG_PPCNT_TC_CNT, + i, ppcnt_pl); + if (err) + continue; + + xstats->backlog[i] = + mlxsw_reg_ppcnt_tc_transmit_queue_get(ppcnt_pl); + xstats->tail_drop[i] = + mlxsw_reg_ppcnt_tc_no_buffer_discard_uc_get(ppcnt_pl); + } + + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { + err = mlxsw_sp_port_get_stats_raw(dev, MLXSW_REG_PPCNT_PRIO_CNT, + i, ppcnt_pl); + if (err) + continue; + + xstats->tx_packets[i] = mlxsw_reg_ppcnt_tx_frames_get(ppcnt_pl); + xstats->tx_bytes[i] = mlxsw_reg_ppcnt_tx_octets_get(ppcnt_pl); + } +} + +static void update_stats_cache(struct work_struct *work) +{ + struct mlxsw_sp_port *mlxsw_sp_port = + container_of(work, struct mlxsw_sp_port, + periodic_hw_stats.update_dw.work); + + if (!netif_carrier_ok(mlxsw_sp_port->dev)) + /* Note: mlxsw_sp_port_down_wipe_counters() clears the cache as + * necessary when port goes down. + */ + goto out; + + mlxsw_sp_port_get_hw_stats(mlxsw_sp_port->dev, + &mlxsw_sp_port->periodic_hw_stats.stats); + mlxsw_sp_port_get_hw_xstats(mlxsw_sp_port->dev, + &mlxsw_sp_port->periodic_hw_stats.xstats); + +out: + mlxsw_core_schedule_dw(&mlxsw_sp_port->periodic_hw_stats.update_dw, + MLXSW_HW_STATS_UPDATE_TIME); +} + +/* Return the stats from a cache that is updated periodically, + * as this function might get called in an atomic context. + */ +static void +mlxsw_sp_port_get_stats64(struct net_device *dev, + struct rtnl_link_stats64 *stats) +{ + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); + + memcpy(stats, &mlxsw_sp_port->periodic_hw_stats.stats, sizeof(*stats)); +} + +static int __mlxsw_sp_port_vlan_set(struct mlxsw_sp_port *mlxsw_sp_port, + u16 vid_begin, u16 vid_end, + bool is_member, bool untagged) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + char *spvm_pl; + int err; + + spvm_pl = kmalloc(MLXSW_REG_SPVM_LEN, GFP_KERNEL); + if (!spvm_pl) + return -ENOMEM; + + mlxsw_reg_spvm_pack(spvm_pl, mlxsw_sp_port->local_port, vid_begin, + vid_end, is_member, untagged); + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(spvm), spvm_pl); + kfree(spvm_pl); + return err; +} + +int mlxsw_sp_port_vlan_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid_begin, + u16 vid_end, bool is_member, bool untagged) +{ + u16 vid, vid_e; + int err; + + for (vid = vid_begin; vid <= vid_end; + vid += MLXSW_REG_SPVM_REC_MAX_COUNT) { + vid_e = min((u16) (vid + MLXSW_REG_SPVM_REC_MAX_COUNT - 1), + vid_end); + + err = __mlxsw_sp_port_vlan_set(mlxsw_sp_port, vid, vid_e, + is_member, untagged); + if (err) + return err; + } + + return 0; +} + +static void mlxsw_sp_port_vlan_flush(struct mlxsw_sp_port *mlxsw_sp_port, + bool flush_default) +{ + struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan, *tmp; + + list_for_each_entry_safe(mlxsw_sp_port_vlan, tmp, + &mlxsw_sp_port->vlans_list, list) { + if (!flush_default && + mlxsw_sp_port_vlan->vid == MLXSW_SP_DEFAULT_VID) + continue; + mlxsw_sp_port_vlan_destroy(mlxsw_sp_port_vlan); + } +} + +static void +mlxsw_sp_port_vlan_cleanup(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan) +{ + if (mlxsw_sp_port_vlan->bridge_port) + mlxsw_sp_port_vlan_bridge_leave(mlxsw_sp_port_vlan); + else if (mlxsw_sp_port_vlan->fid) + mlxsw_sp_port_vlan_router_leave(mlxsw_sp_port_vlan); +} + +struct mlxsw_sp_port_vlan * +mlxsw_sp_port_vlan_create(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid) +{ + struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan; + bool untagged = vid == MLXSW_SP_DEFAULT_VID; + int err; + + mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_vid(mlxsw_sp_port, vid); + if (mlxsw_sp_port_vlan) + return ERR_PTR(-EEXIST); + + err = mlxsw_sp_port_vlan_set(mlxsw_sp_port, vid, vid, true, untagged); + if (err) + return ERR_PTR(err); + + mlxsw_sp_port_vlan = kzalloc(sizeof(*mlxsw_sp_port_vlan), GFP_KERNEL); + if (!mlxsw_sp_port_vlan) { + err = -ENOMEM; + goto err_port_vlan_alloc; + } + + mlxsw_sp_port_vlan->mlxsw_sp_port = mlxsw_sp_port; + mlxsw_sp_port_vlan->vid = vid; + list_add(&mlxsw_sp_port_vlan->list, &mlxsw_sp_port->vlans_list); + + return mlxsw_sp_port_vlan; + +err_port_vlan_alloc: + mlxsw_sp_port_vlan_set(mlxsw_sp_port, vid, vid, false, false); + return ERR_PTR(err); +} + +void mlxsw_sp_port_vlan_destroy(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan) +{ + struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port; + u16 vid = mlxsw_sp_port_vlan->vid; + + mlxsw_sp_port_vlan_cleanup(mlxsw_sp_port_vlan); + list_del(&mlxsw_sp_port_vlan->list); + kfree(mlxsw_sp_port_vlan); + mlxsw_sp_port_vlan_set(mlxsw_sp_port, vid, vid, false, false); +} + +static int mlxsw_sp_port_add_vid(struct net_device *dev, + __be16 __always_unused proto, u16 vid) +{ + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); + + /* VLAN 0 is added to HW filter when device goes up, but it is + * reserved in our case, so simply return. + */ + if (!vid) + return 0; + + return PTR_ERR_OR_ZERO(mlxsw_sp_port_vlan_create(mlxsw_sp_port, vid)); +} + +static int mlxsw_sp_port_kill_vid(struct net_device *dev, + __be16 __always_unused proto, u16 vid) +{ + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); + struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan; + + /* VLAN 0 is removed from HW filter when device goes down, but + * it is reserved in our case, so simply return. + */ + if (!vid) + return 0; + + mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_vid(mlxsw_sp_port, vid); + if (!mlxsw_sp_port_vlan) + return 0; + mlxsw_sp_port_vlan_destroy(mlxsw_sp_port_vlan); + + return 0; +} + +static int mlxsw_sp_setup_tc_block(struct mlxsw_sp_port *mlxsw_sp_port, + struct flow_block_offload *f) +{ + switch (f->binder_type) { + case FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS: + return mlxsw_sp_setup_tc_block_clsact(mlxsw_sp_port, f, true); + case FLOW_BLOCK_BINDER_TYPE_CLSACT_EGRESS: + return mlxsw_sp_setup_tc_block_clsact(mlxsw_sp_port, f, false); + case FLOW_BLOCK_BINDER_TYPE_RED_EARLY_DROP: + return mlxsw_sp_setup_tc_block_qevent_early_drop(mlxsw_sp_port, f); + case FLOW_BLOCK_BINDER_TYPE_RED_MARK: + return mlxsw_sp_setup_tc_block_qevent_mark(mlxsw_sp_port, f); + default: + return -EOPNOTSUPP; + } +} + +static int mlxsw_sp_setup_tc(struct net_device *dev, enum tc_setup_type type, + void *type_data) +{ + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); + + switch (type) { + case TC_SETUP_BLOCK: + return mlxsw_sp_setup_tc_block(mlxsw_sp_port, type_data); + case TC_SETUP_QDISC_RED: + return mlxsw_sp_setup_tc_red(mlxsw_sp_port, type_data); + case TC_SETUP_QDISC_PRIO: + return mlxsw_sp_setup_tc_prio(mlxsw_sp_port, type_data); + case TC_SETUP_QDISC_ETS: + return mlxsw_sp_setup_tc_ets(mlxsw_sp_port, type_data); + case TC_SETUP_QDISC_TBF: + return mlxsw_sp_setup_tc_tbf(mlxsw_sp_port, type_data); + case TC_SETUP_QDISC_FIFO: + return mlxsw_sp_setup_tc_fifo(mlxsw_sp_port, type_data); + default: + return -EOPNOTSUPP; + } +} + +static int mlxsw_sp_feature_hw_tc(struct net_device *dev, bool enable) +{ + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); + + if (!enable) { + if (mlxsw_sp_flow_block_rule_count(mlxsw_sp_port->ing_flow_block) || + mlxsw_sp_flow_block_rule_count(mlxsw_sp_port->eg_flow_block)) { + netdev_err(dev, "Active offloaded tc filters, can't turn hw_tc_offload off\n"); + return -EINVAL; + } + mlxsw_sp_flow_block_disable_inc(mlxsw_sp_port->ing_flow_block); + mlxsw_sp_flow_block_disable_inc(mlxsw_sp_port->eg_flow_block); + } else { + mlxsw_sp_flow_block_disable_dec(mlxsw_sp_port->ing_flow_block); + mlxsw_sp_flow_block_disable_dec(mlxsw_sp_port->eg_flow_block); + } + return 0; +} + +static int mlxsw_sp_feature_loopback(struct net_device *dev, bool enable) +{ + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); + char pplr_pl[MLXSW_REG_PPLR_LEN]; + int err; + + if (netif_running(dev)) + mlxsw_sp_port_admin_status_set(mlxsw_sp_port, false); + + mlxsw_reg_pplr_pack(pplr_pl, mlxsw_sp_port->local_port, enable); + err = mlxsw_reg_write(mlxsw_sp_port->mlxsw_sp->core, MLXSW_REG(pplr), + pplr_pl); + + if (netif_running(dev)) + mlxsw_sp_port_admin_status_set(mlxsw_sp_port, true); + + return err; +} + +typedef int (*mlxsw_sp_feature_handler)(struct net_device *dev, bool enable); + +static int mlxsw_sp_handle_feature(struct net_device *dev, + netdev_features_t wanted_features, + netdev_features_t feature, + mlxsw_sp_feature_handler feature_handler) +{ + netdev_features_t changes = wanted_features ^ dev->features; + bool enable = !!(wanted_features & feature); + int err; + + if (!(changes & feature)) + return 0; + + err = feature_handler(dev, enable); + if (err) { + netdev_err(dev, "%s feature %pNF failed, err %d\n", + enable ? "Enable" : "Disable", &feature, err); + return err; + } + + if (enable) + dev->features |= feature; + else + dev->features &= ~feature; + + return 0; +} +static int mlxsw_sp_set_features(struct net_device *dev, + netdev_features_t features) +{ + netdev_features_t oper_features = dev->features; + int err = 0; + + err |= mlxsw_sp_handle_feature(dev, features, NETIF_F_HW_TC, + mlxsw_sp_feature_hw_tc); + err |= mlxsw_sp_handle_feature(dev, features, NETIF_F_LOOPBACK, + mlxsw_sp_feature_loopback); + + if (err) { + dev->features = oper_features; + return -EINVAL; + } + + return 0; +} + +static struct devlink_port * +mlxsw_sp_port_get_devlink_port(struct net_device *dev) +{ + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + + return mlxsw_core_port_devlink_port_get(mlxsw_sp->core, + mlxsw_sp_port->local_port); +} + +static int mlxsw_sp_port_hwtstamp_set(struct mlxsw_sp_port *mlxsw_sp_port, + struct ifreq *ifr) +{ + struct hwtstamp_config config; + int err; + + if (copy_from_user(&config, ifr->ifr_data, sizeof(config))) + return -EFAULT; + + err = mlxsw_sp_port->mlxsw_sp->ptp_ops->hwtstamp_set(mlxsw_sp_port, + &config); + if (err) + return err; + + if (copy_to_user(ifr->ifr_data, &config, sizeof(config))) + return -EFAULT; + + return 0; +} + +static int mlxsw_sp_port_hwtstamp_get(struct mlxsw_sp_port *mlxsw_sp_port, + struct ifreq *ifr) +{ + struct hwtstamp_config config; + int err; + + err = mlxsw_sp_port->mlxsw_sp->ptp_ops->hwtstamp_get(mlxsw_sp_port, + &config); + if (err) + return err; + + if (copy_to_user(ifr->ifr_data, &config, sizeof(config))) + return -EFAULT; + + return 0; +} + +static inline void mlxsw_sp_port_ptp_clear(struct mlxsw_sp_port *mlxsw_sp_port) +{ + struct hwtstamp_config config = {0}; + + mlxsw_sp_port->mlxsw_sp->ptp_ops->hwtstamp_set(mlxsw_sp_port, &config); +} + +static int +mlxsw_sp_port_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) +{ + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); + + switch (cmd) { + case SIOCSHWTSTAMP: + return mlxsw_sp_port_hwtstamp_set(mlxsw_sp_port, ifr); + case SIOCGHWTSTAMP: + return mlxsw_sp_port_hwtstamp_get(mlxsw_sp_port, ifr); + default: + return -EOPNOTSUPP; + } +} + +static const struct net_device_ops mlxsw_sp_port_netdev_ops = { + .ndo_open = mlxsw_sp_port_open, + .ndo_stop = mlxsw_sp_port_stop, + .ndo_start_xmit = mlxsw_sp_port_xmit, + .ndo_setup_tc = mlxsw_sp_setup_tc, + .ndo_set_rx_mode = mlxsw_sp_set_rx_mode, + .ndo_set_mac_address = mlxsw_sp_port_set_mac_address, + .ndo_change_mtu = mlxsw_sp_port_change_mtu, + .ndo_get_stats64 = mlxsw_sp_port_get_stats64, + .ndo_has_offload_stats = mlxsw_sp_port_has_offload_stats, + .ndo_get_offload_stats = mlxsw_sp_port_get_offload_stats, + .ndo_vlan_rx_add_vid = mlxsw_sp_port_add_vid, + .ndo_vlan_rx_kill_vid = mlxsw_sp_port_kill_vid, + .ndo_set_features = mlxsw_sp_set_features, + .ndo_get_devlink_port = mlxsw_sp_port_get_devlink_port, + .ndo_eth_ioctl = mlxsw_sp_port_ioctl, +}; + +static int +mlxsw_sp_port_speed_by_width_set(struct mlxsw_sp_port *mlxsw_sp_port) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + u32 eth_proto_cap, eth_proto_admin, eth_proto_oper; + const struct mlxsw_sp_port_type_speed_ops *ops; + char ptys_pl[MLXSW_REG_PTYS_LEN]; + u32 eth_proto_cap_masked; + int err; + + ops = mlxsw_sp->port_type_speed_ops; + + /* Set advertised speeds to speeds supported by both the driver + * and the device. + */ + ops->reg_ptys_eth_pack(mlxsw_sp, ptys_pl, mlxsw_sp_port->local_port, + 0, false); + err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ptys), ptys_pl); + if (err) + return err; + + ops->reg_ptys_eth_unpack(mlxsw_sp, ptys_pl, ð_proto_cap, + ð_proto_admin, ð_proto_oper); + eth_proto_cap_masked = ops->ptys_proto_cap_masked_get(eth_proto_cap); + ops->reg_ptys_eth_pack(mlxsw_sp, ptys_pl, mlxsw_sp_port->local_port, + eth_proto_cap_masked, + mlxsw_sp_port->link.autoneg); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ptys), ptys_pl); +} + +int mlxsw_sp_port_speed_get(struct mlxsw_sp_port *mlxsw_sp_port, u32 *speed) +{ + const struct mlxsw_sp_port_type_speed_ops *port_type_speed_ops; + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + char ptys_pl[MLXSW_REG_PTYS_LEN]; + u32 eth_proto_oper; + int err; + + port_type_speed_ops = mlxsw_sp->port_type_speed_ops; + port_type_speed_ops->reg_ptys_eth_pack(mlxsw_sp, ptys_pl, + mlxsw_sp_port->local_port, 0, + false); + err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ptys), ptys_pl); + if (err) + return err; + port_type_speed_ops->reg_ptys_eth_unpack(mlxsw_sp, ptys_pl, NULL, NULL, + ð_proto_oper); + *speed = port_type_speed_ops->from_ptys_speed(mlxsw_sp, eth_proto_oper); + return 0; +} + +int mlxsw_sp_port_ets_set(struct mlxsw_sp_port *mlxsw_sp_port, + enum mlxsw_reg_qeec_hr hr, u8 index, u8 next_index, + bool dwrr, u8 dwrr_weight) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + char qeec_pl[MLXSW_REG_QEEC_LEN]; + + mlxsw_reg_qeec_pack(qeec_pl, mlxsw_sp_port->local_port, hr, index, + next_index); + mlxsw_reg_qeec_de_set(qeec_pl, true); + mlxsw_reg_qeec_dwrr_set(qeec_pl, dwrr); + mlxsw_reg_qeec_dwrr_weight_set(qeec_pl, dwrr_weight); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(qeec), qeec_pl); +} + +int mlxsw_sp_port_ets_maxrate_set(struct mlxsw_sp_port *mlxsw_sp_port, + enum mlxsw_reg_qeec_hr hr, u8 index, + u8 next_index, u32 maxrate, u8 burst_size) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + char qeec_pl[MLXSW_REG_QEEC_LEN]; + + mlxsw_reg_qeec_pack(qeec_pl, mlxsw_sp_port->local_port, hr, index, + next_index); + mlxsw_reg_qeec_mase_set(qeec_pl, true); + mlxsw_reg_qeec_max_shaper_rate_set(qeec_pl, maxrate); + mlxsw_reg_qeec_max_shaper_bs_set(qeec_pl, burst_size); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(qeec), qeec_pl); +} + +static int mlxsw_sp_port_min_bw_set(struct mlxsw_sp_port *mlxsw_sp_port, + enum mlxsw_reg_qeec_hr hr, u8 index, + u8 next_index, u32 minrate) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + char qeec_pl[MLXSW_REG_QEEC_LEN]; + + mlxsw_reg_qeec_pack(qeec_pl, mlxsw_sp_port->local_port, hr, index, + next_index); + mlxsw_reg_qeec_mise_set(qeec_pl, true); + mlxsw_reg_qeec_min_shaper_rate_set(qeec_pl, minrate); + + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(qeec), qeec_pl); +} + +int mlxsw_sp_port_prio_tc_set(struct mlxsw_sp_port *mlxsw_sp_port, + u8 switch_prio, u8 tclass) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + char qtct_pl[MLXSW_REG_QTCT_LEN]; + + mlxsw_reg_qtct_pack(qtct_pl, mlxsw_sp_port->local_port, switch_prio, + tclass); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(qtct), qtct_pl); +} + +static int mlxsw_sp_port_ets_init(struct mlxsw_sp_port *mlxsw_sp_port) +{ + int err, i; + + /* Setup the elements hierarcy, so that each TC is linked to + * one subgroup, which are all member in the same group. + */ + err = mlxsw_sp_port_ets_set(mlxsw_sp_port, + MLXSW_REG_QEEC_HR_GROUP, 0, 0, false, 0); + if (err) + return err; + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { + err = mlxsw_sp_port_ets_set(mlxsw_sp_port, + MLXSW_REG_QEEC_HR_SUBGROUP, i, + 0, false, 0); + if (err) + return err; + } + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { + err = mlxsw_sp_port_ets_set(mlxsw_sp_port, + MLXSW_REG_QEEC_HR_TC, i, i, + false, 0); + if (err) + return err; + + err = mlxsw_sp_port_ets_set(mlxsw_sp_port, + MLXSW_REG_QEEC_HR_TC, + i + 8, i, + true, 100); + if (err) + return err; + } + + /* Make sure the max shaper is disabled in all hierarchies that support + * it. Note that this disables ptps (PTP shaper), but that is intended + * for the initial configuration. + */ + err = mlxsw_sp_port_ets_maxrate_set(mlxsw_sp_port, + MLXSW_REG_QEEC_HR_PORT, 0, 0, + MLXSW_REG_QEEC_MAS_DIS, 0); + if (err) + return err; + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { + err = mlxsw_sp_port_ets_maxrate_set(mlxsw_sp_port, + MLXSW_REG_QEEC_HR_SUBGROUP, + i, 0, + MLXSW_REG_QEEC_MAS_DIS, 0); + if (err) + return err; + } + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { + err = mlxsw_sp_port_ets_maxrate_set(mlxsw_sp_port, + MLXSW_REG_QEEC_HR_TC, + i, i, + MLXSW_REG_QEEC_MAS_DIS, 0); + if (err) + return err; + + err = mlxsw_sp_port_ets_maxrate_set(mlxsw_sp_port, + MLXSW_REG_QEEC_HR_TC, + i + 8, i, + MLXSW_REG_QEEC_MAS_DIS, 0); + if (err) + return err; + } + + /* Configure the min shaper for multicast TCs. */ + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { + err = mlxsw_sp_port_min_bw_set(mlxsw_sp_port, + MLXSW_REG_QEEC_HR_TC, + i + 8, i, + MLXSW_REG_QEEC_MIS_MIN); + if (err) + return err; + } + + /* Map all priorities to traffic class 0. */ + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { + err = mlxsw_sp_port_prio_tc_set(mlxsw_sp_port, i, 0); + if (err) + return err; + } + + return 0; +} + +static int mlxsw_sp_port_tc_mc_mode_set(struct mlxsw_sp_port *mlxsw_sp_port, + bool enable) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + char qtctm_pl[MLXSW_REG_QTCTM_LEN]; + + mlxsw_reg_qtctm_pack(qtctm_pl, mlxsw_sp_port->local_port, enable); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(qtctm), qtctm_pl); +} + +static int mlxsw_sp_port_overheat_init_val_set(struct mlxsw_sp_port *mlxsw_sp_port) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + u8 slot_index = mlxsw_sp_port->mapping.slot_index; + u8 module = mlxsw_sp_port->mapping.module; + u64 overheat_counter; + int err; + + err = mlxsw_env_module_overheat_counter_get(mlxsw_sp->core, slot_index, + module, &overheat_counter); + if (err) + return err; + + mlxsw_sp_port->module_overheat_initial_val = overheat_counter; + return 0; +} + +int +mlxsw_sp_port_vlan_classification_set(struct mlxsw_sp_port *mlxsw_sp_port, + bool is_8021ad_tagged, + bool is_8021q_tagged) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + char spvc_pl[MLXSW_REG_SPVC_LEN]; + + mlxsw_reg_spvc_pack(spvc_pl, mlxsw_sp_port->local_port, + is_8021ad_tagged, is_8021q_tagged); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(spvc), spvc_pl); +} + +static int mlxsw_sp_port_label_info_get(struct mlxsw_sp *mlxsw_sp, + u16 local_port, u8 *port_number, + u8 *split_port_subnumber, + u8 *slot_index) +{ + char pllp_pl[MLXSW_REG_PLLP_LEN]; + int err; + + mlxsw_reg_pllp_pack(pllp_pl, local_port); + err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(pllp), pllp_pl); + if (err) + return err; + mlxsw_reg_pllp_unpack(pllp_pl, port_number, + split_port_subnumber, slot_index); + return 0; +} + +static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u16 local_port, + bool split, + struct mlxsw_sp_port_mapping *port_mapping) +{ + struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan; + struct mlxsw_sp_port *mlxsw_sp_port; + u32 lanes = port_mapping->width; + u8 split_port_subnumber; + struct net_device *dev; + u8 port_number; + u8 slot_index; + bool splittable; + int err; + + err = mlxsw_sp_port_module_map(mlxsw_sp, local_port, port_mapping); + if (err) { + dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to map module\n", + local_port); + return err; + } + + err = mlxsw_sp_port_swid_set(mlxsw_sp, local_port, 0); + if (err) { + dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to set SWID\n", + local_port); + goto err_port_swid_set; + } + + err = mlxsw_sp_port_label_info_get(mlxsw_sp, local_port, &port_number, + &split_port_subnumber, &slot_index); + if (err) { + dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to get port label information\n", + local_port); + goto err_port_label_info_get; + } + + splittable = lanes > 1 && !split; + err = mlxsw_core_port_init(mlxsw_sp->core, local_port, slot_index, + port_number, split, split_port_subnumber, + splittable, lanes, mlxsw_sp->base_mac, + sizeof(mlxsw_sp->base_mac)); + if (err) { + dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to init core port\n", + local_port); + goto err_core_port_init; + } + + dev = alloc_etherdev(sizeof(struct mlxsw_sp_port)); + if (!dev) { + err = -ENOMEM; + goto err_alloc_etherdev; + } + SET_NETDEV_DEV(dev, mlxsw_sp->bus_info->dev); + dev_net_set(dev, mlxsw_sp_net(mlxsw_sp)); + mlxsw_sp_port = netdev_priv(dev); + mlxsw_sp_port->dev = dev; + mlxsw_sp_port->mlxsw_sp = mlxsw_sp; + mlxsw_sp_port->local_port = local_port; + mlxsw_sp_port->pvid = MLXSW_SP_DEFAULT_VID; + mlxsw_sp_port->split = split; + mlxsw_sp_port->mapping = *port_mapping; + mlxsw_sp_port->link.autoneg = 1; + INIT_LIST_HEAD(&mlxsw_sp_port->vlans_list); + + mlxsw_sp_port->pcpu_stats = + netdev_alloc_pcpu_stats(struct mlxsw_sp_port_pcpu_stats); + if (!mlxsw_sp_port->pcpu_stats) { + err = -ENOMEM; + goto err_alloc_stats; + } + + INIT_DELAYED_WORK(&mlxsw_sp_port->periodic_hw_stats.update_dw, + &update_stats_cache); + + dev->netdev_ops = &mlxsw_sp_port_netdev_ops; + dev->ethtool_ops = &mlxsw_sp_port_ethtool_ops; + + err = mlxsw_sp_port_dev_addr_init(mlxsw_sp_port); + if (err) { + dev_err(mlxsw_sp->bus_info->dev, "Port %d: Unable to init port mac address\n", + mlxsw_sp_port->local_port); + goto err_dev_addr_init; + } + + netif_carrier_off(dev); + + dev->features |= NETIF_F_NETNS_LOCAL | NETIF_F_LLTX | NETIF_F_SG | + NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_HW_TC; + dev->hw_features |= NETIF_F_HW_TC | NETIF_F_LOOPBACK; + + dev->min_mtu = 0; + dev->max_mtu = ETH_MAX_MTU; + + /* Each packet needs to have a Tx header (metadata) on top all other + * headers. + */ + dev->needed_headroom = MLXSW_TXHDR_LEN; + + err = mlxsw_sp_port_system_port_mapping_set(mlxsw_sp_port); + if (err) { + dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to set system port mapping\n", + mlxsw_sp_port->local_port); + goto err_port_system_port_mapping_set; + } + + err = mlxsw_sp_port_speed_by_width_set(mlxsw_sp_port); + if (err) { + dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to enable speeds\n", + mlxsw_sp_port->local_port); + goto err_port_speed_by_width_set; + } + + err = mlxsw_sp->port_type_speed_ops->ptys_max_speed(mlxsw_sp_port, + &mlxsw_sp_port->max_speed); + if (err) { + dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to get maximum speed\n", + mlxsw_sp_port->local_port); + goto err_max_speed_get; + } + + err = mlxsw_sp_port_max_mtu_get(mlxsw_sp_port, &mlxsw_sp_port->max_mtu); + if (err) { + dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to get maximum MTU\n", + mlxsw_sp_port->local_port); + goto err_port_max_mtu_get; + } + + err = mlxsw_sp_port_mtu_set(mlxsw_sp_port, ETH_DATA_LEN); + if (err) { + dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to set MTU\n", + mlxsw_sp_port->local_port); + goto err_port_mtu_set; + } + + err = mlxsw_sp_port_admin_status_set(mlxsw_sp_port, false); + if (err) + goto err_port_admin_status_set; + + err = mlxsw_sp_port_buffers_init(mlxsw_sp_port); + if (err) { + dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to initialize buffers\n", + mlxsw_sp_port->local_port); + goto err_port_buffers_init; + } + + err = mlxsw_sp_port_ets_init(mlxsw_sp_port); + if (err) { + dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to initialize ETS\n", + mlxsw_sp_port->local_port); + goto err_port_ets_init; + } + + err = mlxsw_sp_port_tc_mc_mode_set(mlxsw_sp_port, true); + if (err) { + dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to initialize TC MC mode\n", + mlxsw_sp_port->local_port); + goto err_port_tc_mc_mode; + } + + /* ETS and buffers must be initialized before DCB. */ + err = mlxsw_sp_port_dcb_init(mlxsw_sp_port); + if (err) { + dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to initialize DCB\n", + mlxsw_sp_port->local_port); + goto err_port_dcb_init; + } + + err = mlxsw_sp_port_fids_init(mlxsw_sp_port); + if (err) { + dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to initialize FIDs\n", + mlxsw_sp_port->local_port); + goto err_port_fids_init; + } + + err = mlxsw_sp_tc_qdisc_init(mlxsw_sp_port); + if (err) { + dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to initialize TC qdiscs\n", + mlxsw_sp_port->local_port); + goto err_port_qdiscs_init; + } + + err = mlxsw_sp_port_vlan_set(mlxsw_sp_port, 0, VLAN_N_VID - 1, false, + false); + if (err) { + dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to clear VLAN filter\n", + mlxsw_sp_port->local_port); + goto err_port_vlan_clear; + } + + err = mlxsw_sp_port_nve_init(mlxsw_sp_port); + if (err) { + dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to initialize NVE\n", + mlxsw_sp_port->local_port); + goto err_port_nve_init; + } + + err = mlxsw_sp_port_pvid_set(mlxsw_sp_port, MLXSW_SP_DEFAULT_VID, + ETH_P_8021Q); + if (err) { + dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to set PVID\n", + mlxsw_sp_port->local_port); + goto err_port_pvid_set; + } + + mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_create(mlxsw_sp_port, + MLXSW_SP_DEFAULT_VID); + if (IS_ERR(mlxsw_sp_port_vlan)) { + dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to create VID 1\n", + mlxsw_sp_port->local_port); + err = PTR_ERR(mlxsw_sp_port_vlan); + goto err_port_vlan_create; + } + mlxsw_sp_port->default_vlan = mlxsw_sp_port_vlan; + + /* Set SPVC.et0=true and SPVC.et1=false to make the local port to treat + * only packets with 802.1q header as tagged packets. + */ + err = mlxsw_sp_port_vlan_classification_set(mlxsw_sp_port, false, true); + if (err) { + dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to set default VLAN classification\n", + local_port); + goto err_port_vlan_classification_set; + } + + INIT_DELAYED_WORK(&mlxsw_sp_port->ptp.shaper_dw, + mlxsw_sp->ptp_ops->shaper_work); + + mlxsw_sp->ports[local_port] = mlxsw_sp_port; + + err = mlxsw_sp_port_overheat_init_val_set(mlxsw_sp_port); + if (err) { + dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to set overheat initial value\n", + mlxsw_sp_port->local_port); + goto err_port_overheat_init_val_set; + } + + err = register_netdev(dev); + if (err) { + dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to register netdev\n", + mlxsw_sp_port->local_port); + goto err_register_netdev; + } + + mlxsw_core_port_eth_set(mlxsw_sp->core, mlxsw_sp_port->local_port, + mlxsw_sp_port, dev); + mlxsw_core_schedule_dw(&mlxsw_sp_port->periodic_hw_stats.update_dw, 0); + return 0; + +err_register_netdev: +err_port_overheat_init_val_set: + mlxsw_sp_port_vlan_classification_set(mlxsw_sp_port, true, true); +err_port_vlan_classification_set: + mlxsw_sp->ports[local_port] = NULL; + mlxsw_sp_port_vlan_destroy(mlxsw_sp_port_vlan); +err_port_vlan_create: +err_port_pvid_set: + mlxsw_sp_port_nve_fini(mlxsw_sp_port); +err_port_nve_init: +err_port_vlan_clear: + mlxsw_sp_tc_qdisc_fini(mlxsw_sp_port); +err_port_qdiscs_init: + mlxsw_sp_port_fids_fini(mlxsw_sp_port); +err_port_fids_init: + mlxsw_sp_port_dcb_fini(mlxsw_sp_port); +err_port_dcb_init: + mlxsw_sp_port_tc_mc_mode_set(mlxsw_sp_port, false); +err_port_tc_mc_mode: +err_port_ets_init: + mlxsw_sp_port_buffers_fini(mlxsw_sp_port); +err_port_buffers_init: +err_port_admin_status_set: +err_port_mtu_set: +err_port_max_mtu_get: +err_max_speed_get: +err_port_speed_by_width_set: +err_port_system_port_mapping_set: +err_dev_addr_init: + free_percpu(mlxsw_sp_port->pcpu_stats); +err_alloc_stats: + free_netdev(dev); +err_alloc_etherdev: + mlxsw_core_port_fini(mlxsw_sp->core, local_port); +err_core_port_init: +err_port_label_info_get: + mlxsw_sp_port_swid_set(mlxsw_sp, local_port, + MLXSW_PORT_SWID_DISABLED_PORT); +err_port_swid_set: + mlxsw_sp_port_module_unmap(mlxsw_sp, local_port, + port_mapping->slot_index, + port_mapping->module); + return err; +} + +static void mlxsw_sp_port_remove(struct mlxsw_sp *mlxsw_sp, u16 local_port) +{ + struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp->ports[local_port]; + u8 slot_index = mlxsw_sp_port->mapping.slot_index; + u8 module = mlxsw_sp_port->mapping.module; + + cancel_delayed_work_sync(&mlxsw_sp_port->periodic_hw_stats.update_dw); + cancel_delayed_work_sync(&mlxsw_sp_port->ptp.shaper_dw); + mlxsw_core_port_clear(mlxsw_sp->core, local_port, mlxsw_sp); + unregister_netdev(mlxsw_sp_port->dev); /* This calls ndo_stop */ + mlxsw_sp_port_ptp_clear(mlxsw_sp_port); + mlxsw_sp_port_vlan_classification_set(mlxsw_sp_port, true, true); + mlxsw_sp->ports[local_port] = NULL; + mlxsw_sp_port_vlan_flush(mlxsw_sp_port, true); + mlxsw_sp_port_nve_fini(mlxsw_sp_port); + mlxsw_sp_tc_qdisc_fini(mlxsw_sp_port); + mlxsw_sp_port_fids_fini(mlxsw_sp_port); + mlxsw_sp_port_dcb_fini(mlxsw_sp_port); + mlxsw_sp_port_tc_mc_mode_set(mlxsw_sp_port, false); + mlxsw_sp_port_buffers_fini(mlxsw_sp_port); + free_percpu(mlxsw_sp_port->pcpu_stats); + WARN_ON_ONCE(!list_empty(&mlxsw_sp_port->vlans_list)); + free_netdev(mlxsw_sp_port->dev); + mlxsw_core_port_fini(mlxsw_sp->core, local_port); + mlxsw_sp_port_swid_set(mlxsw_sp, local_port, + MLXSW_PORT_SWID_DISABLED_PORT); + mlxsw_sp_port_module_unmap(mlxsw_sp, local_port, slot_index, module); +} + +static int mlxsw_sp_cpu_port_create(struct mlxsw_sp *mlxsw_sp) +{ + struct mlxsw_sp_port *mlxsw_sp_port; + int err; + + mlxsw_sp_port = kzalloc(sizeof(*mlxsw_sp_port), GFP_KERNEL); + if (!mlxsw_sp_port) + return -ENOMEM; + + mlxsw_sp_port->mlxsw_sp = mlxsw_sp; + mlxsw_sp_port->local_port = MLXSW_PORT_CPU_PORT; + + err = mlxsw_core_cpu_port_init(mlxsw_sp->core, + mlxsw_sp_port, + mlxsw_sp->base_mac, + sizeof(mlxsw_sp->base_mac)); + if (err) { + dev_err(mlxsw_sp->bus_info->dev, "Failed to initialize core CPU port\n"); + goto err_core_cpu_port_init; + } + + mlxsw_sp->ports[MLXSW_PORT_CPU_PORT] = mlxsw_sp_port; + return 0; + +err_core_cpu_port_init: + kfree(mlxsw_sp_port); + return err; +} + +static void mlxsw_sp_cpu_port_remove(struct mlxsw_sp *mlxsw_sp) +{ + struct mlxsw_sp_port *mlxsw_sp_port = + mlxsw_sp->ports[MLXSW_PORT_CPU_PORT]; + + mlxsw_core_cpu_port_fini(mlxsw_sp->core); + mlxsw_sp->ports[MLXSW_PORT_CPU_PORT] = NULL; + kfree(mlxsw_sp_port); +} + +static bool mlxsw_sp_local_port_valid(u16 local_port) +{ + return local_port != MLXSW_PORT_CPU_PORT; +} + +static bool mlxsw_sp_port_created(struct mlxsw_sp *mlxsw_sp, u16 local_port) +{ + if (!mlxsw_sp_local_port_valid(local_port)) + return false; + return mlxsw_sp->ports[local_port] != NULL; +} + +static int mlxsw_sp_port_mapping_event_set(struct mlxsw_sp *mlxsw_sp, + u16 local_port, bool enable) +{ + char pmecr_pl[MLXSW_REG_PMECR_LEN]; + + mlxsw_reg_pmecr_pack(pmecr_pl, local_port, + enable ? MLXSW_REG_PMECR_E_GENERATE_EVENT : + MLXSW_REG_PMECR_E_DO_NOT_GENERATE_EVENT); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(pmecr), pmecr_pl); +} + +struct mlxsw_sp_port_mapping_event { + struct list_head list; + char pmlp_pl[MLXSW_REG_PMLP_LEN]; +}; + +static void mlxsw_sp_port_mapping_events_work(struct work_struct *work) +{ + struct mlxsw_sp_port_mapping_event *event, *next_event; + struct mlxsw_sp_port_mapping_events *events; + struct mlxsw_sp_port_mapping port_mapping; + struct mlxsw_sp *mlxsw_sp; + struct devlink *devlink; + LIST_HEAD(event_queue); + u16 local_port; + int err; + + events = container_of(work, struct mlxsw_sp_port_mapping_events, work); + mlxsw_sp = container_of(events, struct mlxsw_sp, port_mapping_events); + devlink = priv_to_devlink(mlxsw_sp->core); + + spin_lock_bh(&events->queue_lock); + list_splice_init(&events->queue, &event_queue); + spin_unlock_bh(&events->queue_lock); + + list_for_each_entry_safe(event, next_event, &event_queue, list) { + local_port = mlxsw_reg_pmlp_local_port_get(event->pmlp_pl); + err = mlxsw_sp_port_module_info_parse(mlxsw_sp, local_port, + event->pmlp_pl, &port_mapping); + if (err) + goto out; + + if (WARN_ON_ONCE(!port_mapping.width)) + goto out; + + devl_lock(devlink); + + if (!mlxsw_sp_port_created(mlxsw_sp, local_port)) + mlxsw_sp_port_create(mlxsw_sp, local_port, + false, &port_mapping); + else + WARN_ON_ONCE(1); + + devl_unlock(devlink); + + mlxsw_sp->port_mapping[local_port] = port_mapping; + +out: + kfree(event); + } +} + +static void +mlxsw_sp_port_mapping_listener_func(const struct mlxsw_reg_info *reg, + char *pmlp_pl, void *priv) +{ + struct mlxsw_sp_port_mapping_events *events; + struct mlxsw_sp_port_mapping_event *event; + struct mlxsw_sp *mlxsw_sp = priv; + u16 local_port; + + local_port = mlxsw_reg_pmlp_local_port_get(pmlp_pl); + if (WARN_ON_ONCE(!mlxsw_sp_local_port_is_valid(mlxsw_sp, local_port))) + return; + + events = &mlxsw_sp->port_mapping_events; + event = kmalloc(sizeof(*event), GFP_ATOMIC); + if (!event) + return; + memcpy(event->pmlp_pl, pmlp_pl, sizeof(event->pmlp_pl)); + spin_lock(&events->queue_lock); + list_add_tail(&event->list, &events->queue); + spin_unlock(&events->queue_lock); + mlxsw_core_schedule_work(&events->work); +} + +static void +__mlxsw_sp_port_mapping_events_cancel(struct mlxsw_sp *mlxsw_sp) +{ + struct mlxsw_sp_port_mapping_event *event, *next_event; + struct mlxsw_sp_port_mapping_events *events; + + events = &mlxsw_sp->port_mapping_events; + + /* Caller needs to make sure that no new event is going to appear. */ + cancel_work_sync(&events->work); + list_for_each_entry_safe(event, next_event, &events->queue, list) { + list_del(&event->list); + kfree(event); + } +} + +static void mlxsw_sp_ports_remove(struct mlxsw_sp *mlxsw_sp) +{ + unsigned int max_ports = mlxsw_core_max_ports(mlxsw_sp->core); + int i; + + for (i = 1; i < max_ports; i++) + mlxsw_sp_port_mapping_event_set(mlxsw_sp, i, false); + /* Make sure all scheduled events are processed */ + __mlxsw_sp_port_mapping_events_cancel(mlxsw_sp); + + for (i = 1; i < max_ports; i++) + if (mlxsw_sp_port_created(mlxsw_sp, i)) + mlxsw_sp_port_remove(mlxsw_sp, i); + mlxsw_sp_cpu_port_remove(mlxsw_sp); + kfree(mlxsw_sp->ports); + mlxsw_sp->ports = NULL; +} + +static void +mlxsw_sp_ports_remove_selected(struct mlxsw_core *mlxsw_core, + bool (*selector)(void *priv, u16 local_port), + void *priv) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core); + unsigned int max_ports = mlxsw_core_max_ports(mlxsw_core); + int i; + + for (i = 1; i < max_ports; i++) + if (mlxsw_sp_port_created(mlxsw_sp, i) && selector(priv, i)) + mlxsw_sp_port_remove(mlxsw_sp, i); +} + +static int mlxsw_sp_ports_create(struct mlxsw_sp *mlxsw_sp) +{ + unsigned int max_ports = mlxsw_core_max_ports(mlxsw_sp->core); + struct mlxsw_sp_port_mapping_events *events; + struct mlxsw_sp_port_mapping *port_mapping; + size_t alloc_size; + int i; + int err; + + alloc_size = sizeof(struct mlxsw_sp_port *) * max_ports; + mlxsw_sp->ports = kzalloc(alloc_size, GFP_KERNEL); + if (!mlxsw_sp->ports) + return -ENOMEM; + + events = &mlxsw_sp->port_mapping_events; + INIT_LIST_HEAD(&events->queue); + spin_lock_init(&events->queue_lock); + INIT_WORK(&events->work, mlxsw_sp_port_mapping_events_work); + + for (i = 1; i < max_ports; i++) { + err = mlxsw_sp_port_mapping_event_set(mlxsw_sp, i, true); + if (err) + goto err_event_enable; + } + + err = mlxsw_sp_cpu_port_create(mlxsw_sp); + if (err) + goto err_cpu_port_create; + + for (i = 1; i < max_ports; i++) { + port_mapping = &mlxsw_sp->port_mapping[i]; + if (!port_mapping->width) + continue; + err = mlxsw_sp_port_create(mlxsw_sp, i, false, port_mapping); + if (err) + goto err_port_create; + } + return 0; + +err_port_create: + for (i--; i >= 1; i--) + if (mlxsw_sp_port_created(mlxsw_sp, i)) + mlxsw_sp_port_remove(mlxsw_sp, i); + i = max_ports; + mlxsw_sp_cpu_port_remove(mlxsw_sp); +err_cpu_port_create: +err_event_enable: + for (i--; i >= 1; i--) + mlxsw_sp_port_mapping_event_set(mlxsw_sp, i, false); + /* Make sure all scheduled events are processed */ + __mlxsw_sp_port_mapping_events_cancel(mlxsw_sp); + kfree(mlxsw_sp->ports); + mlxsw_sp->ports = NULL; + return err; +} + +static int mlxsw_sp_port_module_info_init(struct mlxsw_sp *mlxsw_sp) +{ + unsigned int max_ports = mlxsw_core_max_ports(mlxsw_sp->core); + struct mlxsw_sp_port_mapping *port_mapping; + int i; + int err; + + mlxsw_sp->port_mapping = kcalloc(max_ports, + sizeof(struct mlxsw_sp_port_mapping), + GFP_KERNEL); + if (!mlxsw_sp->port_mapping) + return -ENOMEM; + + for (i = 1; i < max_ports; i++) { + port_mapping = &mlxsw_sp->port_mapping[i]; + err = mlxsw_sp_port_module_info_get(mlxsw_sp, i, port_mapping); + if (err) + goto err_port_module_info_get; + } + return 0; + +err_port_module_info_get: + kfree(mlxsw_sp->port_mapping); + return err; +} + +static void mlxsw_sp_port_module_info_fini(struct mlxsw_sp *mlxsw_sp) +{ + kfree(mlxsw_sp->port_mapping); +} + +static int +mlxsw_sp_port_split_create(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_port_mapping *port_mapping, + unsigned int count, const char *pmtdb_pl) +{ + struct mlxsw_sp_port_mapping split_port_mapping; + int err, i; + + split_port_mapping = *port_mapping; + split_port_mapping.width /= count; + for (i = 0; i < count; i++) { + u16 s_local_port = mlxsw_reg_pmtdb_port_num_get(pmtdb_pl, i); + + if (!mlxsw_sp_local_port_valid(s_local_port)) + continue; + + err = mlxsw_sp_port_create(mlxsw_sp, s_local_port, + true, &split_port_mapping); + if (err) + goto err_port_create; + split_port_mapping.lane += split_port_mapping.width; + } + + return 0; + +err_port_create: + for (i--; i >= 0; i--) { + u16 s_local_port = mlxsw_reg_pmtdb_port_num_get(pmtdb_pl, i); + + if (mlxsw_sp_port_created(mlxsw_sp, s_local_port)) + mlxsw_sp_port_remove(mlxsw_sp, s_local_port); + } + return err; +} + +static void mlxsw_sp_port_unsplit_create(struct mlxsw_sp *mlxsw_sp, + unsigned int count, + const char *pmtdb_pl) +{ + struct mlxsw_sp_port_mapping *port_mapping; + int i; + + /* Go over original unsplit ports in the gap and recreate them. */ + for (i = 0; i < count; i++) { + u16 local_port = mlxsw_reg_pmtdb_port_num_get(pmtdb_pl, i); + + port_mapping = &mlxsw_sp->port_mapping[local_port]; + if (!port_mapping->width || !mlxsw_sp_local_port_valid(local_port)) + continue; + mlxsw_sp_port_create(mlxsw_sp, local_port, + false, port_mapping); + } +} + +static struct mlxsw_sp_port * +mlxsw_sp_port_get_by_local_port(struct mlxsw_sp *mlxsw_sp, u16 local_port) +{ + if (mlxsw_sp->ports && mlxsw_sp->ports[local_port]) + return mlxsw_sp->ports[local_port]; + return NULL; +} + +static int mlxsw_sp_port_split(struct mlxsw_core *mlxsw_core, u16 local_port, + unsigned int count, + struct netlink_ext_ack *extack) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core); + struct mlxsw_sp_port_mapping port_mapping; + struct mlxsw_sp_port *mlxsw_sp_port; + enum mlxsw_reg_pmtdb_status status; + char pmtdb_pl[MLXSW_REG_PMTDB_LEN]; + int i; + int err; + + mlxsw_sp_port = mlxsw_sp_port_get_by_local_port(mlxsw_sp, local_port); + if (!mlxsw_sp_port) { + dev_err(mlxsw_sp->bus_info->dev, "Port number \"%d\" does not exist\n", + local_port); + NL_SET_ERR_MSG_MOD(extack, "Port number does not exist"); + return -EINVAL; + } + + if (mlxsw_sp_port->split) { + NL_SET_ERR_MSG_MOD(extack, "Port is already split"); + return -EINVAL; + } + + mlxsw_reg_pmtdb_pack(pmtdb_pl, mlxsw_sp_port->mapping.slot_index, + mlxsw_sp_port->mapping.module, + mlxsw_sp_port->mapping.module_width / count, + count); + err = mlxsw_reg_query(mlxsw_core, MLXSW_REG(pmtdb), pmtdb_pl); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Failed to query split info"); + return err; + } + + status = mlxsw_reg_pmtdb_status_get(pmtdb_pl); + if (status != MLXSW_REG_PMTDB_STATUS_SUCCESS) { + NL_SET_ERR_MSG_MOD(extack, "Unsupported split configuration"); + return -EINVAL; + } + + port_mapping = mlxsw_sp_port->mapping; + + for (i = 0; i < count; i++) { + u16 s_local_port = mlxsw_reg_pmtdb_port_num_get(pmtdb_pl, i); + + if (mlxsw_sp_port_created(mlxsw_sp, s_local_port)) + mlxsw_sp_port_remove(mlxsw_sp, s_local_port); + } + + err = mlxsw_sp_port_split_create(mlxsw_sp, &port_mapping, + count, pmtdb_pl); + if (err) { + dev_err(mlxsw_sp->bus_info->dev, "Failed to create split ports\n"); + goto err_port_split_create; + } + + return 0; + +err_port_split_create: + mlxsw_sp_port_unsplit_create(mlxsw_sp, count, pmtdb_pl); + + return err; +} + +static int mlxsw_sp_port_unsplit(struct mlxsw_core *mlxsw_core, u16 local_port, + struct netlink_ext_ack *extack) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core); + struct mlxsw_sp_port *mlxsw_sp_port; + char pmtdb_pl[MLXSW_REG_PMTDB_LEN]; + unsigned int count; + int i; + int err; + + mlxsw_sp_port = mlxsw_sp_port_get_by_local_port(mlxsw_sp, local_port); + if (!mlxsw_sp_port) { + dev_err(mlxsw_sp->bus_info->dev, "Port number \"%d\" does not exist\n", + local_port); + NL_SET_ERR_MSG_MOD(extack, "Port number does not exist"); + return -EINVAL; + } + + if (!mlxsw_sp_port->split) { + NL_SET_ERR_MSG_MOD(extack, "Port was not split"); + return -EINVAL; + } + + count = mlxsw_sp_port->mapping.module_width / + mlxsw_sp_port->mapping.width; + + mlxsw_reg_pmtdb_pack(pmtdb_pl, mlxsw_sp_port->mapping.slot_index, + mlxsw_sp_port->mapping.module, + mlxsw_sp_port->mapping.module_width / count, + count); + err = mlxsw_reg_query(mlxsw_core, MLXSW_REG(pmtdb), pmtdb_pl); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Failed to query split info"); + return err; + } + + for (i = 0; i < count; i++) { + u16 s_local_port = mlxsw_reg_pmtdb_port_num_get(pmtdb_pl, i); + + if (mlxsw_sp_port_created(mlxsw_sp, s_local_port)) + mlxsw_sp_port_remove(mlxsw_sp, s_local_port); + } + + mlxsw_sp_port_unsplit_create(mlxsw_sp, count, pmtdb_pl); + + return 0; +} + +static void +mlxsw_sp_port_down_wipe_counters(struct mlxsw_sp_port *mlxsw_sp_port) +{ + int i; + + for (i = 0; i < TC_MAX_QUEUE; i++) + mlxsw_sp_port->periodic_hw_stats.xstats.backlog[i] = 0; +} + +static void mlxsw_sp_pude_event_func(const struct mlxsw_reg_info *reg, + char *pude_pl, void *priv) +{ + struct mlxsw_sp *mlxsw_sp = priv; + struct mlxsw_sp_port *mlxsw_sp_port; + enum mlxsw_reg_pude_oper_status status; + u16 local_port; + + local_port = mlxsw_reg_pude_local_port_get(pude_pl); + + if (WARN_ON_ONCE(!mlxsw_sp_local_port_is_valid(mlxsw_sp, local_port))) + return; + mlxsw_sp_port = mlxsw_sp->ports[local_port]; + if (!mlxsw_sp_port) + return; + + status = mlxsw_reg_pude_oper_status_get(pude_pl); + if (status == MLXSW_PORT_OPER_STATUS_UP) { + netdev_info(mlxsw_sp_port->dev, "link up\n"); + netif_carrier_on(mlxsw_sp_port->dev); + mlxsw_core_schedule_dw(&mlxsw_sp_port->ptp.shaper_dw, 0); + } else { + netdev_info(mlxsw_sp_port->dev, "link down\n"); + netif_carrier_off(mlxsw_sp_port->dev); + mlxsw_sp_port_down_wipe_counters(mlxsw_sp_port); + } +} + +static void mlxsw_sp1_ptp_fifo_event_func(struct mlxsw_sp *mlxsw_sp, + char *mtpptr_pl, bool ingress) +{ + u16 local_port; + u8 num_rec; + int i; + + local_port = mlxsw_reg_mtpptr_local_port_get(mtpptr_pl); + num_rec = mlxsw_reg_mtpptr_num_rec_get(mtpptr_pl); + for (i = 0; i < num_rec; i++) { + u8 domain_number; + u8 message_type; + u16 sequence_id; + u64 timestamp; + + mlxsw_reg_mtpptr_unpack(mtpptr_pl, i, &message_type, + &domain_number, &sequence_id, + ×tamp); + mlxsw_sp1_ptp_got_timestamp(mlxsw_sp, ingress, local_port, + message_type, domain_number, + sequence_id, timestamp); + } +} + +static void mlxsw_sp1_ptp_ing_fifo_event_func(const struct mlxsw_reg_info *reg, + char *mtpptr_pl, void *priv) +{ + struct mlxsw_sp *mlxsw_sp = priv; + + mlxsw_sp1_ptp_fifo_event_func(mlxsw_sp, mtpptr_pl, true); +} + +static void mlxsw_sp1_ptp_egr_fifo_event_func(const struct mlxsw_reg_info *reg, + char *mtpptr_pl, void *priv) +{ + struct mlxsw_sp *mlxsw_sp = priv; + + mlxsw_sp1_ptp_fifo_event_func(mlxsw_sp, mtpptr_pl, false); +} + +void mlxsw_sp_rx_listener_no_mark_func(struct sk_buff *skb, + u16 local_port, void *priv) +{ + struct mlxsw_sp *mlxsw_sp = priv; + struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp->ports[local_port]; + struct mlxsw_sp_port_pcpu_stats *pcpu_stats; + + if (unlikely(!mlxsw_sp_port)) { + dev_warn_ratelimited(mlxsw_sp->bus_info->dev, "Port %d: skb received for non-existent port\n", + local_port); + return; + } + + skb->dev = mlxsw_sp_port->dev; + + pcpu_stats = this_cpu_ptr(mlxsw_sp_port->pcpu_stats); + u64_stats_update_begin(&pcpu_stats->syncp); + pcpu_stats->rx_packets++; + pcpu_stats->rx_bytes += skb->len; + u64_stats_update_end(&pcpu_stats->syncp); + + skb->protocol = eth_type_trans(skb, skb->dev); + netif_receive_skb(skb); +} + +static void mlxsw_sp_rx_listener_mark_func(struct sk_buff *skb, u16 local_port, + void *priv) +{ + skb->offload_fwd_mark = 1; + return mlxsw_sp_rx_listener_no_mark_func(skb, local_port, priv); +} + +static void mlxsw_sp_rx_listener_l3_mark_func(struct sk_buff *skb, + u16 local_port, void *priv) +{ + skb->offload_l3_fwd_mark = 1; + skb->offload_fwd_mark = 1; + return mlxsw_sp_rx_listener_no_mark_func(skb, local_port, priv); +} + +void mlxsw_sp_ptp_receive(struct mlxsw_sp *mlxsw_sp, struct sk_buff *skb, + u16 local_port) +{ + mlxsw_sp->ptp_ops->receive(mlxsw_sp, skb, local_port); +} + +#define MLXSW_SP_RXL_NO_MARK(_trap_id, _action, _trap_group, _is_ctrl) \ + MLXSW_RXL(mlxsw_sp_rx_listener_no_mark_func, _trap_id, _action, \ + _is_ctrl, SP_##_trap_group, DISCARD) + +#define MLXSW_SP_RXL_MARK(_trap_id, _action, _trap_group, _is_ctrl) \ + MLXSW_RXL(mlxsw_sp_rx_listener_mark_func, _trap_id, _action, \ + _is_ctrl, SP_##_trap_group, DISCARD) + +#define MLXSW_SP_RXL_L3_MARK(_trap_id, _action, _trap_group, _is_ctrl) \ + MLXSW_RXL(mlxsw_sp_rx_listener_l3_mark_func, _trap_id, _action, \ + _is_ctrl, SP_##_trap_group, DISCARD) + +#define MLXSW_SP_EVENTL(_func, _trap_id) \ + MLXSW_EVENTL(_func, _trap_id, SP_EVENT) + +static const struct mlxsw_listener mlxsw_sp_listener[] = { + /* Events */ + MLXSW_SP_EVENTL(mlxsw_sp_pude_event_func, PUDE), + /* L2 traps */ + MLXSW_SP_RXL_NO_MARK(FID_MISS, TRAP_TO_CPU, FID_MISS, false), + /* L3 traps */ + MLXSW_SP_RXL_MARK(IPV6_UNSPECIFIED_ADDRESS, TRAP_TO_CPU, ROUTER_EXP, + false), + MLXSW_SP_RXL_MARK(IPV6_LINK_LOCAL_SRC, TRAP_TO_CPU, ROUTER_EXP, false), + MLXSW_SP_RXL_MARK(IPV6_MC_LINK_LOCAL_DEST, TRAP_TO_CPU, ROUTER_EXP, + false), + MLXSW_SP_RXL_NO_MARK(DISCARD_ING_ROUTER_SIP_CLASS_E, FORWARD, + ROUTER_EXP, false), + MLXSW_SP_RXL_NO_MARK(DISCARD_ING_ROUTER_MC_DMAC, FORWARD, + ROUTER_EXP, false), + MLXSW_SP_RXL_NO_MARK(DISCARD_ING_ROUTER_SIP_DIP, FORWARD, + ROUTER_EXP, false), + MLXSW_SP_RXL_NO_MARK(DISCARD_ING_ROUTER_DIP_LINK_LOCAL, FORWARD, + ROUTER_EXP, false), + /* Multicast Router Traps */ + MLXSW_SP_RXL_MARK(ACL1, TRAP_TO_CPU, MULTICAST, false), + MLXSW_SP_RXL_L3_MARK(ACL2, TRAP_TO_CPU, MULTICAST, false), + /* NVE traps */ + MLXSW_SP_RXL_MARK(NVE_ENCAP_ARP, TRAP_TO_CPU, NEIGH_DISCOVERY, false), +}; + +static const struct mlxsw_listener mlxsw_sp1_listener[] = { + /* Events */ + MLXSW_EVENTL(mlxsw_sp1_ptp_egr_fifo_event_func, PTP_EGR_FIFO, SP_PTP0), + MLXSW_EVENTL(mlxsw_sp1_ptp_ing_fifo_event_func, PTP_ING_FIFO, SP_PTP0), +}; + +static const struct mlxsw_listener mlxsw_sp2_listener[] = { + /* Events */ + MLXSW_SP_EVENTL(mlxsw_sp_port_mapping_listener_func, PMLPE), +}; + +static int mlxsw_sp_cpu_policers_set(struct mlxsw_core *mlxsw_core) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core); + char qpcr_pl[MLXSW_REG_QPCR_LEN]; + enum mlxsw_reg_qpcr_ir_units ir_units; + int max_cpu_policers; + bool is_bytes; + u8 burst_size; + u32 rate; + int i, err; + + if (!MLXSW_CORE_RES_VALID(mlxsw_core, MAX_CPU_POLICERS)) + return -EIO; + + max_cpu_policers = MLXSW_CORE_RES_GET(mlxsw_core, MAX_CPU_POLICERS); + + ir_units = MLXSW_REG_QPCR_IR_UNITS_M; + for (i = 0; i < max_cpu_policers; i++) { + is_bytes = false; + switch (i) { + case MLXSW_REG_HTGT_TRAP_GROUP_SP_ROUTER_EXP: + case MLXSW_REG_HTGT_TRAP_GROUP_SP_MULTICAST: + case MLXSW_REG_HTGT_TRAP_GROUP_SP_FID_MISS: + rate = 1024; + burst_size = 7; + break; + default: + continue; + } + + __set_bit(i, mlxsw_sp->trap->policers_usage); + mlxsw_reg_qpcr_pack(qpcr_pl, i, ir_units, is_bytes, rate, + burst_size); + err = mlxsw_reg_write(mlxsw_core, MLXSW_REG(qpcr), qpcr_pl); + if (err) + return err; + } + + return 0; +} + +static int mlxsw_sp_trap_groups_set(struct mlxsw_core *mlxsw_core) +{ + char htgt_pl[MLXSW_REG_HTGT_LEN]; + enum mlxsw_reg_htgt_trap_group i; + int max_cpu_policers; + int max_trap_groups; + u8 priority, tc; + u16 policer_id; + int err; + + if (!MLXSW_CORE_RES_VALID(mlxsw_core, MAX_TRAP_GROUPS)) + return -EIO; + + max_trap_groups = MLXSW_CORE_RES_GET(mlxsw_core, MAX_TRAP_GROUPS); + max_cpu_policers = MLXSW_CORE_RES_GET(mlxsw_core, MAX_CPU_POLICERS); + + for (i = 0; i < max_trap_groups; i++) { + policer_id = i; + switch (i) { + case MLXSW_REG_HTGT_TRAP_GROUP_SP_ROUTER_EXP: + case MLXSW_REG_HTGT_TRAP_GROUP_SP_MULTICAST: + case MLXSW_REG_HTGT_TRAP_GROUP_SP_FID_MISS: + priority = 1; + tc = 1; + break; + case MLXSW_REG_HTGT_TRAP_GROUP_SP_EVENT: + priority = MLXSW_REG_HTGT_DEFAULT_PRIORITY; + tc = MLXSW_REG_HTGT_DEFAULT_TC; + policer_id = MLXSW_REG_HTGT_INVALID_POLICER; + break; + default: + continue; + } + + if (max_cpu_policers <= policer_id && + policer_id != MLXSW_REG_HTGT_INVALID_POLICER) + return -EIO; + + mlxsw_reg_htgt_pack(htgt_pl, i, policer_id, priority, tc); + err = mlxsw_reg_write(mlxsw_core, MLXSW_REG(htgt), htgt_pl); + if (err) + return err; + } + + return 0; +} + +static int mlxsw_sp_traps_init(struct mlxsw_sp *mlxsw_sp) +{ + struct mlxsw_sp_trap *trap; + u64 max_policers; + int err; + + if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_CPU_POLICERS)) + return -EIO; + max_policers = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_CPU_POLICERS); + trap = kzalloc(struct_size(trap, policers_usage, + BITS_TO_LONGS(max_policers)), GFP_KERNEL); + if (!trap) + return -ENOMEM; + trap->max_policers = max_policers; + mlxsw_sp->trap = trap; + + err = mlxsw_sp_cpu_policers_set(mlxsw_sp->core); + if (err) + goto err_cpu_policers_set; + + err = mlxsw_sp_trap_groups_set(mlxsw_sp->core); + if (err) + goto err_trap_groups_set; + + err = mlxsw_core_traps_register(mlxsw_sp->core, mlxsw_sp_listener, + ARRAY_SIZE(mlxsw_sp_listener), + mlxsw_sp); + if (err) + goto err_traps_register; + + err = mlxsw_core_traps_register(mlxsw_sp->core, mlxsw_sp->listeners, + mlxsw_sp->listeners_count, mlxsw_sp); + if (err) + goto err_extra_traps_init; + + return 0; + +err_extra_traps_init: + mlxsw_core_traps_unregister(mlxsw_sp->core, mlxsw_sp_listener, + ARRAY_SIZE(mlxsw_sp_listener), + mlxsw_sp); +err_traps_register: +err_trap_groups_set: +err_cpu_policers_set: + kfree(trap); + return err; +} + +static void mlxsw_sp_traps_fini(struct mlxsw_sp *mlxsw_sp) +{ + mlxsw_core_traps_unregister(mlxsw_sp->core, mlxsw_sp->listeners, + mlxsw_sp->listeners_count, + mlxsw_sp); + mlxsw_core_traps_unregister(mlxsw_sp->core, mlxsw_sp_listener, + ARRAY_SIZE(mlxsw_sp_listener), mlxsw_sp); + kfree(mlxsw_sp->trap); +} + +#define MLXSW_SP_LAG_SEED_INIT 0xcafecafe + +static int mlxsw_sp_lag_init(struct mlxsw_sp *mlxsw_sp) +{ + char slcr_pl[MLXSW_REG_SLCR_LEN]; + u16 max_lag; + u32 seed; + int err; + + seed = jhash(mlxsw_sp->base_mac, sizeof(mlxsw_sp->base_mac), + MLXSW_SP_LAG_SEED_INIT); + mlxsw_reg_slcr_pack(slcr_pl, MLXSW_REG_SLCR_LAG_HASH_SMAC | + MLXSW_REG_SLCR_LAG_HASH_DMAC | + MLXSW_REG_SLCR_LAG_HASH_ETHERTYPE | + MLXSW_REG_SLCR_LAG_HASH_VLANID | + MLXSW_REG_SLCR_LAG_HASH_SIP | + MLXSW_REG_SLCR_LAG_HASH_DIP | + MLXSW_REG_SLCR_LAG_HASH_SPORT | + MLXSW_REG_SLCR_LAG_HASH_DPORT | + MLXSW_REG_SLCR_LAG_HASH_IPPROTO, seed); + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(slcr), slcr_pl); + if (err) + return err; + + err = mlxsw_core_max_lag(mlxsw_sp->core, &max_lag); + if (err) + return err; + + if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_LAG_MEMBERS)) + return -EIO; + + mlxsw_sp->lags = kcalloc(max_lag, sizeof(struct mlxsw_sp_upper), + GFP_KERNEL); + if (!mlxsw_sp->lags) + return -ENOMEM; + + return 0; +} + +static void mlxsw_sp_lag_fini(struct mlxsw_sp *mlxsw_sp) +{ + kfree(mlxsw_sp->lags); +} + +static const struct mlxsw_sp_ptp_ops mlxsw_sp1_ptp_ops = { + .clock_init = mlxsw_sp1_ptp_clock_init, + .clock_fini = mlxsw_sp1_ptp_clock_fini, + .init = mlxsw_sp1_ptp_init, + .fini = mlxsw_sp1_ptp_fini, + .receive = mlxsw_sp1_ptp_receive, + .transmitted = mlxsw_sp1_ptp_transmitted, + .hwtstamp_get = mlxsw_sp1_ptp_hwtstamp_get, + .hwtstamp_set = mlxsw_sp1_ptp_hwtstamp_set, + .shaper_work = mlxsw_sp1_ptp_shaper_work, + .get_ts_info = mlxsw_sp1_ptp_get_ts_info, + .get_stats_count = mlxsw_sp1_get_stats_count, + .get_stats_strings = mlxsw_sp1_get_stats_strings, + .get_stats = mlxsw_sp1_get_stats, + .txhdr_construct = mlxsw_sp_ptp_txhdr_construct, +}; + +static const struct mlxsw_sp_ptp_ops mlxsw_sp2_ptp_ops = { + .clock_init = mlxsw_sp2_ptp_clock_init, + .clock_fini = mlxsw_sp2_ptp_clock_fini, + .init = mlxsw_sp2_ptp_init, + .fini = mlxsw_sp2_ptp_fini, + .receive = mlxsw_sp2_ptp_receive, + .transmitted = mlxsw_sp2_ptp_transmitted, + .hwtstamp_get = mlxsw_sp2_ptp_hwtstamp_get, + .hwtstamp_set = mlxsw_sp2_ptp_hwtstamp_set, + .shaper_work = mlxsw_sp2_ptp_shaper_work, + .get_ts_info = mlxsw_sp2_ptp_get_ts_info, + .get_stats_count = mlxsw_sp2_get_stats_count, + .get_stats_strings = mlxsw_sp2_get_stats_strings, + .get_stats = mlxsw_sp2_get_stats, + .txhdr_construct = mlxsw_sp2_ptp_txhdr_construct, +}; + +static const struct mlxsw_sp_ptp_ops mlxsw_sp4_ptp_ops = { + .clock_init = mlxsw_sp2_ptp_clock_init, + .clock_fini = mlxsw_sp2_ptp_clock_fini, + .init = mlxsw_sp2_ptp_init, + .fini = mlxsw_sp2_ptp_fini, + .receive = mlxsw_sp2_ptp_receive, + .transmitted = mlxsw_sp2_ptp_transmitted, + .hwtstamp_get = mlxsw_sp2_ptp_hwtstamp_get, + .hwtstamp_set = mlxsw_sp2_ptp_hwtstamp_set, + .shaper_work = mlxsw_sp2_ptp_shaper_work, + .get_ts_info = mlxsw_sp2_ptp_get_ts_info, + .get_stats_count = mlxsw_sp2_get_stats_count, + .get_stats_strings = mlxsw_sp2_get_stats_strings, + .get_stats = mlxsw_sp2_get_stats, + .txhdr_construct = mlxsw_sp_ptp_txhdr_construct, +}; + +struct mlxsw_sp_sample_trigger_node { + struct mlxsw_sp_sample_trigger trigger; + struct mlxsw_sp_sample_params params; + struct rhash_head ht_node; + struct rcu_head rcu; + refcount_t refcount; +}; + +static const struct rhashtable_params mlxsw_sp_sample_trigger_ht_params = { + .key_offset = offsetof(struct mlxsw_sp_sample_trigger_node, trigger), + .head_offset = offsetof(struct mlxsw_sp_sample_trigger_node, ht_node), + .key_len = sizeof(struct mlxsw_sp_sample_trigger), + .automatic_shrinking = true, +}; + +static void +mlxsw_sp_sample_trigger_key_init(struct mlxsw_sp_sample_trigger *key, + const struct mlxsw_sp_sample_trigger *trigger) +{ + memset(key, 0, sizeof(*key)); + key->type = trigger->type; + key->local_port = trigger->local_port; +} + +/* RCU read lock must be held */ +struct mlxsw_sp_sample_params * +mlxsw_sp_sample_trigger_params_lookup(struct mlxsw_sp *mlxsw_sp, + const struct mlxsw_sp_sample_trigger *trigger) +{ + struct mlxsw_sp_sample_trigger_node *trigger_node; + struct mlxsw_sp_sample_trigger key; + + mlxsw_sp_sample_trigger_key_init(&key, trigger); + trigger_node = rhashtable_lookup(&mlxsw_sp->sample_trigger_ht, &key, + mlxsw_sp_sample_trigger_ht_params); + if (!trigger_node) + return NULL; + + return &trigger_node->params; +} + +static int +mlxsw_sp_sample_trigger_node_init(struct mlxsw_sp *mlxsw_sp, + const struct mlxsw_sp_sample_trigger *trigger, + const struct mlxsw_sp_sample_params *params) +{ + struct mlxsw_sp_sample_trigger_node *trigger_node; + int err; + + trigger_node = kzalloc(sizeof(*trigger_node), GFP_KERNEL); + if (!trigger_node) + return -ENOMEM; + + trigger_node->trigger = *trigger; + trigger_node->params = *params; + refcount_set(&trigger_node->refcount, 1); + + err = rhashtable_insert_fast(&mlxsw_sp->sample_trigger_ht, + &trigger_node->ht_node, + mlxsw_sp_sample_trigger_ht_params); + if (err) + goto err_rhashtable_insert; + + return 0; + +err_rhashtable_insert: + kfree(trigger_node); + return err; +} + +static void +mlxsw_sp_sample_trigger_node_fini(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_sample_trigger_node *trigger_node) +{ + rhashtable_remove_fast(&mlxsw_sp->sample_trigger_ht, + &trigger_node->ht_node, + mlxsw_sp_sample_trigger_ht_params); + kfree_rcu(trigger_node, rcu); +} + +int +mlxsw_sp_sample_trigger_params_set(struct mlxsw_sp *mlxsw_sp, + const struct mlxsw_sp_sample_trigger *trigger, + const struct mlxsw_sp_sample_params *params, + struct netlink_ext_ack *extack) +{ + struct mlxsw_sp_sample_trigger_node *trigger_node; + struct mlxsw_sp_sample_trigger key; + + ASSERT_RTNL(); + + mlxsw_sp_sample_trigger_key_init(&key, trigger); + + trigger_node = rhashtable_lookup_fast(&mlxsw_sp->sample_trigger_ht, + &key, + mlxsw_sp_sample_trigger_ht_params); + if (!trigger_node) + return mlxsw_sp_sample_trigger_node_init(mlxsw_sp, &key, + params); + + if (trigger_node->trigger.local_port) { + NL_SET_ERR_MSG_MOD(extack, "Sampling already enabled on port"); + return -EINVAL; + } + + if (trigger_node->params.psample_group != params->psample_group || + trigger_node->params.truncate != params->truncate || + trigger_node->params.rate != params->rate || + trigger_node->params.trunc_size != params->trunc_size) { + NL_SET_ERR_MSG_MOD(extack, "Sampling parameters do not match for an existing sampling trigger"); + return -EINVAL; + } + + refcount_inc(&trigger_node->refcount); + + return 0; +} + +void +mlxsw_sp_sample_trigger_params_unset(struct mlxsw_sp *mlxsw_sp, + const struct mlxsw_sp_sample_trigger *trigger) +{ + struct mlxsw_sp_sample_trigger_node *trigger_node; + struct mlxsw_sp_sample_trigger key; + + ASSERT_RTNL(); + + mlxsw_sp_sample_trigger_key_init(&key, trigger); + + trigger_node = rhashtable_lookup_fast(&mlxsw_sp->sample_trigger_ht, + &key, + mlxsw_sp_sample_trigger_ht_params); + if (!trigger_node) + return; + + if (!refcount_dec_and_test(&trigger_node->refcount)) + return; + + mlxsw_sp_sample_trigger_node_fini(mlxsw_sp, trigger_node); +} + +static int mlxsw_sp_netdevice_event(struct notifier_block *unused, + unsigned long event, void *ptr); + +#define MLXSW_SP_DEFAULT_PARSING_DEPTH 96 +#define MLXSW_SP_INCREASED_PARSING_DEPTH 128 +#define MLXSW_SP_DEFAULT_VXLAN_UDP_DPORT 4789 + +static void mlxsw_sp_parsing_init(struct mlxsw_sp *mlxsw_sp) +{ + refcount_set(&mlxsw_sp->parsing.parsing_depth_ref, 0); + mlxsw_sp->parsing.parsing_depth = MLXSW_SP_DEFAULT_PARSING_DEPTH; + mlxsw_sp->parsing.vxlan_udp_dport = MLXSW_SP_DEFAULT_VXLAN_UDP_DPORT; + mutex_init(&mlxsw_sp->parsing.lock); +} + +static void mlxsw_sp_parsing_fini(struct mlxsw_sp *mlxsw_sp) +{ + mutex_destroy(&mlxsw_sp->parsing.lock); + WARN_ON_ONCE(refcount_read(&mlxsw_sp->parsing.parsing_depth_ref)); +} + +struct mlxsw_sp_ipv6_addr_node { + struct in6_addr key; + struct rhash_head ht_node; + u32 kvdl_index; + refcount_t refcount; +}; + +static const struct rhashtable_params mlxsw_sp_ipv6_addr_ht_params = { + .key_offset = offsetof(struct mlxsw_sp_ipv6_addr_node, key), + .head_offset = offsetof(struct mlxsw_sp_ipv6_addr_node, ht_node), + .key_len = sizeof(struct in6_addr), + .automatic_shrinking = true, +}; + +static int +mlxsw_sp_ipv6_addr_init(struct mlxsw_sp *mlxsw_sp, const struct in6_addr *addr6, + u32 *p_kvdl_index) +{ + struct mlxsw_sp_ipv6_addr_node *node; + char rips_pl[MLXSW_REG_RIPS_LEN]; + int err; + + err = mlxsw_sp_kvdl_alloc(mlxsw_sp, + MLXSW_SP_KVDL_ENTRY_TYPE_IPV6_ADDRESS, 1, + p_kvdl_index); + if (err) + return err; + + mlxsw_reg_rips_pack(rips_pl, *p_kvdl_index, addr6); + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rips), rips_pl); + if (err) + goto err_rips_write; + + node = kzalloc(sizeof(*node), GFP_KERNEL); + if (!node) { + err = -ENOMEM; + goto err_node_alloc; + } + + node->key = *addr6; + node->kvdl_index = *p_kvdl_index; + refcount_set(&node->refcount, 1); + + err = rhashtable_insert_fast(&mlxsw_sp->ipv6_addr_ht, + &node->ht_node, + mlxsw_sp_ipv6_addr_ht_params); + if (err) + goto err_rhashtable_insert; + + return 0; + +err_rhashtable_insert: + kfree(node); +err_node_alloc: +err_rips_write: + mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_IPV6_ADDRESS, 1, + *p_kvdl_index); + return err; +} + +static void mlxsw_sp_ipv6_addr_fini(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_ipv6_addr_node *node) +{ + u32 kvdl_index = node->kvdl_index; + + rhashtable_remove_fast(&mlxsw_sp->ipv6_addr_ht, &node->ht_node, + mlxsw_sp_ipv6_addr_ht_params); + kfree(node); + mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_IPV6_ADDRESS, 1, + kvdl_index); +} + +int mlxsw_sp_ipv6_addr_kvdl_index_get(struct mlxsw_sp *mlxsw_sp, + const struct in6_addr *addr6, + u32 *p_kvdl_index) +{ + struct mlxsw_sp_ipv6_addr_node *node; + int err = 0; + + mutex_lock(&mlxsw_sp->ipv6_addr_ht_lock); + node = rhashtable_lookup_fast(&mlxsw_sp->ipv6_addr_ht, addr6, + mlxsw_sp_ipv6_addr_ht_params); + if (node) { + refcount_inc(&node->refcount); + *p_kvdl_index = node->kvdl_index; + goto out_unlock; + } + + err = mlxsw_sp_ipv6_addr_init(mlxsw_sp, addr6, p_kvdl_index); + +out_unlock: + mutex_unlock(&mlxsw_sp->ipv6_addr_ht_lock); + return err; +} + +void +mlxsw_sp_ipv6_addr_put(struct mlxsw_sp *mlxsw_sp, const struct in6_addr *addr6) +{ + struct mlxsw_sp_ipv6_addr_node *node; + + mutex_lock(&mlxsw_sp->ipv6_addr_ht_lock); + node = rhashtable_lookup_fast(&mlxsw_sp->ipv6_addr_ht, addr6, + mlxsw_sp_ipv6_addr_ht_params); + if (WARN_ON(!node)) + goto out_unlock; + + if (!refcount_dec_and_test(&node->refcount)) + goto out_unlock; + + mlxsw_sp_ipv6_addr_fini(mlxsw_sp, node); + +out_unlock: + mutex_unlock(&mlxsw_sp->ipv6_addr_ht_lock); +} + +static int mlxsw_sp_ipv6_addr_ht_init(struct mlxsw_sp *mlxsw_sp) +{ + int err; + + err = rhashtable_init(&mlxsw_sp->ipv6_addr_ht, + &mlxsw_sp_ipv6_addr_ht_params); + if (err) + return err; + + mutex_init(&mlxsw_sp->ipv6_addr_ht_lock); + return 0; +} + +static void mlxsw_sp_ipv6_addr_ht_fini(struct mlxsw_sp *mlxsw_sp) +{ + mutex_destroy(&mlxsw_sp->ipv6_addr_ht_lock); + rhashtable_destroy(&mlxsw_sp->ipv6_addr_ht); +} + +static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core, + const struct mlxsw_bus_info *mlxsw_bus_info, + struct netlink_ext_ack *extack) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core); + int err; + + mlxsw_sp->core = mlxsw_core; + mlxsw_sp->bus_info = mlxsw_bus_info; + + mlxsw_sp_parsing_init(mlxsw_sp); + mlxsw_core_emad_string_tlv_enable(mlxsw_core); + + err = mlxsw_sp_base_mac_get(mlxsw_sp); + if (err) { + dev_err(mlxsw_sp->bus_info->dev, "Failed to get base mac\n"); + return err; + } + + err = mlxsw_sp_kvdl_init(mlxsw_sp); + if (err) { + dev_err(mlxsw_sp->bus_info->dev, "Failed to initialize KVDL\n"); + return err; + } + + err = mlxsw_sp_pgt_init(mlxsw_sp); + if (err) { + dev_err(mlxsw_sp->bus_info->dev, "Failed to initialize PGT\n"); + goto err_pgt_init; + } + + err = mlxsw_sp_fids_init(mlxsw_sp); + if (err) { + dev_err(mlxsw_sp->bus_info->dev, "Failed to initialize FIDs\n"); + goto err_fids_init; + } + + err = mlxsw_sp_policers_init(mlxsw_sp); + if (err) { + dev_err(mlxsw_sp->bus_info->dev, "Failed to initialize policers\n"); + goto err_policers_init; + } + + err = mlxsw_sp_traps_init(mlxsw_sp); + if (err) { + dev_err(mlxsw_sp->bus_info->dev, "Failed to set traps\n"); + goto err_traps_init; + } + + err = mlxsw_sp_devlink_traps_init(mlxsw_sp); + if (err) { + dev_err(mlxsw_sp->bus_info->dev, "Failed to initialize devlink traps\n"); + goto err_devlink_traps_init; + } + + err = mlxsw_sp_buffers_init(mlxsw_sp); + if (err) { + dev_err(mlxsw_sp->bus_info->dev, "Failed to initialize buffers\n"); + goto err_buffers_init; + } + + err = mlxsw_sp_lag_init(mlxsw_sp); + if (err) { + dev_err(mlxsw_sp->bus_info->dev, "Failed to initialize LAG\n"); + goto err_lag_init; + } + + /* Initialize SPAN before router and switchdev, so that those components + * can call mlxsw_sp_span_respin(). + */ + err = mlxsw_sp_span_init(mlxsw_sp); + if (err) { + dev_err(mlxsw_sp->bus_info->dev, "Failed to init span system\n"); + goto err_span_init; + } + + err = mlxsw_sp_switchdev_init(mlxsw_sp); + if (err) { + dev_err(mlxsw_sp->bus_info->dev, "Failed to initialize switchdev\n"); + goto err_switchdev_init; + } + + err = mlxsw_sp_counter_pool_init(mlxsw_sp); + if (err) { + dev_err(mlxsw_sp->bus_info->dev, "Failed to init counter pool\n"); + goto err_counter_pool_init; + } + + err = mlxsw_sp_afa_init(mlxsw_sp); + if (err) { + dev_err(mlxsw_sp->bus_info->dev, "Failed to initialize ACL actions\n"); + goto err_afa_init; + } + + err = mlxsw_sp_ipv6_addr_ht_init(mlxsw_sp); + if (err) { + dev_err(mlxsw_sp->bus_info->dev, "Failed to initialize hash table for IPv6 addresses\n"); + goto err_ipv6_addr_ht_init; + } + + err = mlxsw_sp_nve_init(mlxsw_sp); + if (err) { + dev_err(mlxsw_sp->bus_info->dev, "Failed to initialize NVE\n"); + goto err_nve_init; + } + + err = mlxsw_sp_acl_init(mlxsw_sp); + if (err) { + dev_err(mlxsw_sp->bus_info->dev, "Failed to initialize ACL\n"); + goto err_acl_init; + } + + err = mlxsw_sp_router_init(mlxsw_sp, extack); + if (err) { + dev_err(mlxsw_sp->bus_info->dev, "Failed to initialize router\n"); + goto err_router_init; + } + + if (mlxsw_sp->bus_info->read_clock_capable) { + /* NULL is a valid return value from clock_init */ + mlxsw_sp->clock = + mlxsw_sp->ptp_ops->clock_init(mlxsw_sp, + mlxsw_sp->bus_info->dev); + if (IS_ERR(mlxsw_sp->clock)) { + err = PTR_ERR(mlxsw_sp->clock); + dev_err(mlxsw_sp->bus_info->dev, "Failed to init ptp clock\n"); + goto err_ptp_clock_init; + } + } + + if (mlxsw_sp->clock) { + /* NULL is a valid return value from ptp_ops->init */ + mlxsw_sp->ptp_state = mlxsw_sp->ptp_ops->init(mlxsw_sp); + if (IS_ERR(mlxsw_sp->ptp_state)) { + err = PTR_ERR(mlxsw_sp->ptp_state); + dev_err(mlxsw_sp->bus_info->dev, "Failed to initialize PTP\n"); + goto err_ptp_init; + } + } + + /* Initialize netdevice notifier after SPAN is initialized, so that the + * event handler can call SPAN respin. + */ + mlxsw_sp->netdevice_nb.notifier_call = mlxsw_sp_netdevice_event; + err = register_netdevice_notifier_net(mlxsw_sp_net(mlxsw_sp), + &mlxsw_sp->netdevice_nb); + if (err) { + dev_err(mlxsw_sp->bus_info->dev, "Failed to register netdev notifier\n"); + goto err_netdev_notifier; + } + + err = mlxsw_sp_dpipe_init(mlxsw_sp); + if (err) { + dev_err(mlxsw_sp->bus_info->dev, "Failed to init pipeline debug\n"); + goto err_dpipe_init; + } + + err = mlxsw_sp_port_module_info_init(mlxsw_sp); + if (err) { + dev_err(mlxsw_sp->bus_info->dev, "Failed to init port module info\n"); + goto err_port_module_info_init; + } + + err = rhashtable_init(&mlxsw_sp->sample_trigger_ht, + &mlxsw_sp_sample_trigger_ht_params); + if (err) { + dev_err(mlxsw_sp->bus_info->dev, "Failed to init sampling trigger hashtable\n"); + goto err_sample_trigger_init; + } + + err = mlxsw_sp_ports_create(mlxsw_sp); + if (err) { + dev_err(mlxsw_sp->bus_info->dev, "Failed to create ports\n"); + goto err_ports_create; + } + + return 0; + +err_ports_create: + rhashtable_destroy(&mlxsw_sp->sample_trigger_ht); +err_sample_trigger_init: + mlxsw_sp_port_module_info_fini(mlxsw_sp); +err_port_module_info_init: + mlxsw_sp_dpipe_fini(mlxsw_sp); +err_dpipe_init: + unregister_netdevice_notifier_net(mlxsw_sp_net(mlxsw_sp), + &mlxsw_sp->netdevice_nb); +err_netdev_notifier: + if (mlxsw_sp->clock) + mlxsw_sp->ptp_ops->fini(mlxsw_sp->ptp_state); +err_ptp_init: + if (mlxsw_sp->clock) + mlxsw_sp->ptp_ops->clock_fini(mlxsw_sp->clock); +err_ptp_clock_init: + mlxsw_sp_router_fini(mlxsw_sp); +err_router_init: + mlxsw_sp_acl_fini(mlxsw_sp); +err_acl_init: + mlxsw_sp_nve_fini(mlxsw_sp); +err_nve_init: + mlxsw_sp_ipv6_addr_ht_fini(mlxsw_sp); +err_ipv6_addr_ht_init: + mlxsw_sp_afa_fini(mlxsw_sp); +err_afa_init: + mlxsw_sp_counter_pool_fini(mlxsw_sp); +err_counter_pool_init: + mlxsw_sp_switchdev_fini(mlxsw_sp); +err_switchdev_init: + mlxsw_sp_span_fini(mlxsw_sp); +err_span_init: + mlxsw_sp_lag_fini(mlxsw_sp); +err_lag_init: + mlxsw_sp_buffers_fini(mlxsw_sp); +err_buffers_init: + mlxsw_sp_devlink_traps_fini(mlxsw_sp); +err_devlink_traps_init: + mlxsw_sp_traps_fini(mlxsw_sp); +err_traps_init: + mlxsw_sp_policers_fini(mlxsw_sp); +err_policers_init: + mlxsw_sp_fids_fini(mlxsw_sp); +err_fids_init: + mlxsw_sp_pgt_fini(mlxsw_sp); +err_pgt_init: + mlxsw_sp_kvdl_fini(mlxsw_sp); + mlxsw_sp_parsing_fini(mlxsw_sp); + return err; +} + +static int mlxsw_sp1_init(struct mlxsw_core *mlxsw_core, + const struct mlxsw_bus_info *mlxsw_bus_info, + struct netlink_ext_ack *extack) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core); + + mlxsw_sp->switchdev_ops = &mlxsw_sp1_switchdev_ops; + mlxsw_sp->kvdl_ops = &mlxsw_sp1_kvdl_ops; + mlxsw_sp->afa_ops = &mlxsw_sp1_act_afa_ops; + mlxsw_sp->afk_ops = &mlxsw_sp1_afk_ops; + mlxsw_sp->mr_tcam_ops = &mlxsw_sp1_mr_tcam_ops; + mlxsw_sp->acl_rulei_ops = &mlxsw_sp1_acl_rulei_ops; + mlxsw_sp->acl_tcam_ops = &mlxsw_sp1_acl_tcam_ops; + mlxsw_sp->nve_ops_arr = mlxsw_sp1_nve_ops_arr; + mlxsw_sp->mac_mask = mlxsw_sp1_mac_mask; + mlxsw_sp->sb_vals = &mlxsw_sp1_sb_vals; + mlxsw_sp->sb_ops = &mlxsw_sp1_sb_ops; + mlxsw_sp->port_type_speed_ops = &mlxsw_sp1_port_type_speed_ops; + mlxsw_sp->ptp_ops = &mlxsw_sp1_ptp_ops; + mlxsw_sp->span_ops = &mlxsw_sp1_span_ops; + mlxsw_sp->policer_core_ops = &mlxsw_sp1_policer_core_ops; + mlxsw_sp->trap_ops = &mlxsw_sp1_trap_ops; + mlxsw_sp->mall_ops = &mlxsw_sp1_mall_ops; + mlxsw_sp->router_ops = &mlxsw_sp1_router_ops; + mlxsw_sp->listeners = mlxsw_sp1_listener; + mlxsw_sp->listeners_count = ARRAY_SIZE(mlxsw_sp1_listener); + mlxsw_sp->fid_family_arr = mlxsw_sp1_fid_family_arr; + mlxsw_sp->lowest_shaper_bs = MLXSW_REG_QEEC_LOWEST_SHAPER_BS_SP1; + mlxsw_sp->pgt_smpe_index_valid = true; + + return mlxsw_sp_init(mlxsw_core, mlxsw_bus_info, extack); +} + +static int mlxsw_sp2_init(struct mlxsw_core *mlxsw_core, + const struct mlxsw_bus_info *mlxsw_bus_info, + struct netlink_ext_ack *extack) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core); + + mlxsw_sp->switchdev_ops = &mlxsw_sp2_switchdev_ops; + mlxsw_sp->kvdl_ops = &mlxsw_sp2_kvdl_ops; + mlxsw_sp->afa_ops = &mlxsw_sp2_act_afa_ops; + mlxsw_sp->afk_ops = &mlxsw_sp2_afk_ops; + mlxsw_sp->mr_tcam_ops = &mlxsw_sp2_mr_tcam_ops; + mlxsw_sp->acl_rulei_ops = &mlxsw_sp2_acl_rulei_ops; + mlxsw_sp->acl_tcam_ops = &mlxsw_sp2_acl_tcam_ops; + mlxsw_sp->acl_bf_ops = &mlxsw_sp2_acl_bf_ops; + mlxsw_sp->nve_ops_arr = mlxsw_sp2_nve_ops_arr; + mlxsw_sp->mac_mask = mlxsw_sp2_mac_mask; + mlxsw_sp->sb_vals = &mlxsw_sp2_sb_vals; + mlxsw_sp->sb_ops = &mlxsw_sp2_sb_ops; + mlxsw_sp->port_type_speed_ops = &mlxsw_sp2_port_type_speed_ops; + mlxsw_sp->ptp_ops = &mlxsw_sp2_ptp_ops; + mlxsw_sp->span_ops = &mlxsw_sp2_span_ops; + mlxsw_sp->policer_core_ops = &mlxsw_sp2_policer_core_ops; + mlxsw_sp->trap_ops = &mlxsw_sp2_trap_ops; + mlxsw_sp->mall_ops = &mlxsw_sp2_mall_ops; + mlxsw_sp->router_ops = &mlxsw_sp2_router_ops; + mlxsw_sp->listeners = mlxsw_sp2_listener; + mlxsw_sp->listeners_count = ARRAY_SIZE(mlxsw_sp2_listener); + mlxsw_sp->fid_family_arr = mlxsw_sp2_fid_family_arr; + mlxsw_sp->lowest_shaper_bs = MLXSW_REG_QEEC_LOWEST_SHAPER_BS_SP2; + mlxsw_sp->pgt_smpe_index_valid = false; + + return mlxsw_sp_init(mlxsw_core, mlxsw_bus_info, extack); +} + +static int mlxsw_sp3_init(struct mlxsw_core *mlxsw_core, + const struct mlxsw_bus_info *mlxsw_bus_info, + struct netlink_ext_ack *extack) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core); + + mlxsw_sp->switchdev_ops = &mlxsw_sp2_switchdev_ops; + mlxsw_sp->kvdl_ops = &mlxsw_sp2_kvdl_ops; + mlxsw_sp->afa_ops = &mlxsw_sp2_act_afa_ops; + mlxsw_sp->afk_ops = &mlxsw_sp2_afk_ops; + mlxsw_sp->mr_tcam_ops = &mlxsw_sp2_mr_tcam_ops; + mlxsw_sp->acl_rulei_ops = &mlxsw_sp2_acl_rulei_ops; + mlxsw_sp->acl_tcam_ops = &mlxsw_sp2_acl_tcam_ops; + mlxsw_sp->acl_bf_ops = &mlxsw_sp2_acl_bf_ops; + mlxsw_sp->nve_ops_arr = mlxsw_sp2_nve_ops_arr; + mlxsw_sp->mac_mask = mlxsw_sp2_mac_mask; + mlxsw_sp->sb_vals = &mlxsw_sp2_sb_vals; + mlxsw_sp->sb_ops = &mlxsw_sp3_sb_ops; + mlxsw_sp->port_type_speed_ops = &mlxsw_sp2_port_type_speed_ops; + mlxsw_sp->ptp_ops = &mlxsw_sp2_ptp_ops; + mlxsw_sp->span_ops = &mlxsw_sp3_span_ops; + mlxsw_sp->policer_core_ops = &mlxsw_sp2_policer_core_ops; + mlxsw_sp->trap_ops = &mlxsw_sp2_trap_ops; + mlxsw_sp->mall_ops = &mlxsw_sp2_mall_ops; + mlxsw_sp->router_ops = &mlxsw_sp2_router_ops; + mlxsw_sp->listeners = mlxsw_sp2_listener; + mlxsw_sp->listeners_count = ARRAY_SIZE(mlxsw_sp2_listener); + mlxsw_sp->fid_family_arr = mlxsw_sp2_fid_family_arr; + mlxsw_sp->lowest_shaper_bs = MLXSW_REG_QEEC_LOWEST_SHAPER_BS_SP3; + mlxsw_sp->pgt_smpe_index_valid = false; + + return mlxsw_sp_init(mlxsw_core, mlxsw_bus_info, extack); +} + +static int mlxsw_sp4_init(struct mlxsw_core *mlxsw_core, + const struct mlxsw_bus_info *mlxsw_bus_info, + struct netlink_ext_ack *extack) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core); + + mlxsw_sp->switchdev_ops = &mlxsw_sp2_switchdev_ops; + mlxsw_sp->kvdl_ops = &mlxsw_sp2_kvdl_ops; + mlxsw_sp->afa_ops = &mlxsw_sp2_act_afa_ops; + mlxsw_sp->afk_ops = &mlxsw_sp4_afk_ops; + mlxsw_sp->mr_tcam_ops = &mlxsw_sp2_mr_tcam_ops; + mlxsw_sp->acl_rulei_ops = &mlxsw_sp2_acl_rulei_ops; + mlxsw_sp->acl_tcam_ops = &mlxsw_sp2_acl_tcam_ops; + mlxsw_sp->acl_bf_ops = &mlxsw_sp4_acl_bf_ops; + mlxsw_sp->nve_ops_arr = mlxsw_sp2_nve_ops_arr; + mlxsw_sp->mac_mask = mlxsw_sp2_mac_mask; + mlxsw_sp->sb_vals = &mlxsw_sp2_sb_vals; + mlxsw_sp->sb_ops = &mlxsw_sp3_sb_ops; + mlxsw_sp->port_type_speed_ops = &mlxsw_sp2_port_type_speed_ops; + mlxsw_sp->ptp_ops = &mlxsw_sp4_ptp_ops; + mlxsw_sp->span_ops = &mlxsw_sp3_span_ops; + mlxsw_sp->policer_core_ops = &mlxsw_sp2_policer_core_ops; + mlxsw_sp->trap_ops = &mlxsw_sp2_trap_ops; + mlxsw_sp->mall_ops = &mlxsw_sp2_mall_ops; + mlxsw_sp->router_ops = &mlxsw_sp2_router_ops; + mlxsw_sp->listeners = mlxsw_sp2_listener; + mlxsw_sp->listeners_count = ARRAY_SIZE(mlxsw_sp2_listener); + mlxsw_sp->fid_family_arr = mlxsw_sp2_fid_family_arr; + mlxsw_sp->lowest_shaper_bs = MLXSW_REG_QEEC_LOWEST_SHAPER_BS_SP4; + mlxsw_sp->pgt_smpe_index_valid = false; + + return mlxsw_sp_init(mlxsw_core, mlxsw_bus_info, extack); +} + +static void mlxsw_sp_fini(struct mlxsw_core *mlxsw_core) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core); + + mlxsw_sp_ports_remove(mlxsw_sp); + rhashtable_destroy(&mlxsw_sp->sample_trigger_ht); + mlxsw_sp_port_module_info_fini(mlxsw_sp); + mlxsw_sp_dpipe_fini(mlxsw_sp); + unregister_netdevice_notifier_net(mlxsw_sp_net(mlxsw_sp), + &mlxsw_sp->netdevice_nb); + if (mlxsw_sp->clock) { + mlxsw_sp->ptp_ops->fini(mlxsw_sp->ptp_state); + mlxsw_sp->ptp_ops->clock_fini(mlxsw_sp->clock); + } + mlxsw_sp_router_fini(mlxsw_sp); + mlxsw_sp_acl_fini(mlxsw_sp); + mlxsw_sp_nve_fini(mlxsw_sp); + mlxsw_sp_ipv6_addr_ht_fini(mlxsw_sp); + mlxsw_sp_afa_fini(mlxsw_sp); + mlxsw_sp_counter_pool_fini(mlxsw_sp); + mlxsw_sp_switchdev_fini(mlxsw_sp); + mlxsw_sp_span_fini(mlxsw_sp); + mlxsw_sp_lag_fini(mlxsw_sp); + mlxsw_sp_buffers_fini(mlxsw_sp); + mlxsw_sp_devlink_traps_fini(mlxsw_sp); + mlxsw_sp_traps_fini(mlxsw_sp); + mlxsw_sp_policers_fini(mlxsw_sp); + mlxsw_sp_fids_fini(mlxsw_sp); + mlxsw_sp_pgt_fini(mlxsw_sp); + mlxsw_sp_kvdl_fini(mlxsw_sp); + mlxsw_sp_parsing_fini(mlxsw_sp); +} + +static const struct mlxsw_config_profile mlxsw_sp1_config_profile = { + .used_flood_mode = 1, + .flood_mode = MLXSW_CMD_MBOX_CONFIG_PROFILE_FLOOD_MODE_CONTROLLED, + .used_max_ib_mc = 1, + .max_ib_mc = 0, + .used_max_pkey = 1, + .max_pkey = 0, + .used_ubridge = 1, + .ubridge = 1, + .used_kvd_sizes = 1, + .kvd_hash_single_parts = 59, + .kvd_hash_double_parts = 41, + .kvd_linear_size = MLXSW_SP_KVD_LINEAR_SIZE, + .swid_config = { + { + .used_type = 1, + .type = MLXSW_PORT_SWID_TYPE_ETH, + } + }, +}; + +static const struct mlxsw_config_profile mlxsw_sp2_config_profile = { + .used_flood_mode = 1, + .flood_mode = MLXSW_CMD_MBOX_CONFIG_PROFILE_FLOOD_MODE_CONTROLLED, + .used_max_ib_mc = 1, + .max_ib_mc = 0, + .used_max_pkey = 1, + .max_pkey = 0, + .used_ubridge = 1, + .ubridge = 1, + .swid_config = { + { + .used_type = 1, + .type = MLXSW_PORT_SWID_TYPE_ETH, + } + }, + .used_cqe_time_stamp_type = 1, + .cqe_time_stamp_type = MLXSW_CMD_MBOX_CONFIG_PROFILE_CQE_TIME_STAMP_TYPE_UTC, +}; + +/* Reduce number of LAGs from full capacity (256) to the maximum supported LAGs + * in Spectrum-2/3, to avoid regression in number of free entries in the PGT + * table. + */ +#define MLXSW_SP4_CONFIG_PROFILE_MAX_LAG 128 + +static const struct mlxsw_config_profile mlxsw_sp4_config_profile = { + .used_max_lag = 1, + .max_lag = MLXSW_SP4_CONFIG_PROFILE_MAX_LAG, + .used_flood_mode = 1, + .flood_mode = MLXSW_CMD_MBOX_CONFIG_PROFILE_FLOOD_MODE_CONTROLLED, + .used_max_ib_mc = 1, + .max_ib_mc = 0, + .used_max_pkey = 1, + .max_pkey = 0, + .used_ubridge = 1, + .ubridge = 1, + .swid_config = { + { + .used_type = 1, + .type = MLXSW_PORT_SWID_TYPE_ETH, + } + }, + .used_cqe_time_stamp_type = 1, + .cqe_time_stamp_type = MLXSW_CMD_MBOX_CONFIG_PROFILE_CQE_TIME_STAMP_TYPE_UTC, +}; + +static void +mlxsw_sp_resource_size_params_prepare(struct mlxsw_core *mlxsw_core, + struct devlink_resource_size_params *kvd_size_params, + struct devlink_resource_size_params *linear_size_params, + struct devlink_resource_size_params *hash_double_size_params, + struct devlink_resource_size_params *hash_single_size_params) +{ + u32 single_size_min = MLXSW_CORE_RES_GET(mlxsw_core, + KVD_SINGLE_MIN_SIZE); + u32 double_size_min = MLXSW_CORE_RES_GET(mlxsw_core, + KVD_DOUBLE_MIN_SIZE); + u32 kvd_size = MLXSW_CORE_RES_GET(mlxsw_core, KVD_SIZE); + u32 linear_size_min = 0; + + devlink_resource_size_params_init(kvd_size_params, kvd_size, kvd_size, + MLXSW_SP_KVD_GRANULARITY, + DEVLINK_RESOURCE_UNIT_ENTRY); + devlink_resource_size_params_init(linear_size_params, linear_size_min, + kvd_size - single_size_min - + double_size_min, + MLXSW_SP_KVD_GRANULARITY, + DEVLINK_RESOURCE_UNIT_ENTRY); + devlink_resource_size_params_init(hash_double_size_params, + double_size_min, + kvd_size - single_size_min - + linear_size_min, + MLXSW_SP_KVD_GRANULARITY, + DEVLINK_RESOURCE_UNIT_ENTRY); + devlink_resource_size_params_init(hash_single_size_params, + single_size_min, + kvd_size - double_size_min - + linear_size_min, + MLXSW_SP_KVD_GRANULARITY, + DEVLINK_RESOURCE_UNIT_ENTRY); +} + +static int mlxsw_sp1_resources_kvd_register(struct mlxsw_core *mlxsw_core) +{ + struct devlink *devlink = priv_to_devlink(mlxsw_core); + struct devlink_resource_size_params hash_single_size_params; + struct devlink_resource_size_params hash_double_size_params; + struct devlink_resource_size_params linear_size_params; + struct devlink_resource_size_params kvd_size_params; + u32 kvd_size, single_size, double_size, linear_size; + const struct mlxsw_config_profile *profile; + int err; + + profile = &mlxsw_sp1_config_profile; + if (!MLXSW_CORE_RES_VALID(mlxsw_core, KVD_SIZE)) + return -EIO; + + mlxsw_sp_resource_size_params_prepare(mlxsw_core, &kvd_size_params, + &linear_size_params, + &hash_double_size_params, + &hash_single_size_params); + + kvd_size = MLXSW_CORE_RES_GET(mlxsw_core, KVD_SIZE); + err = devl_resource_register(devlink, MLXSW_SP_RESOURCE_NAME_KVD, + kvd_size, MLXSW_SP_RESOURCE_KVD, + DEVLINK_RESOURCE_ID_PARENT_TOP, + &kvd_size_params); + if (err) + return err; + + linear_size = profile->kvd_linear_size; + err = devl_resource_register(devlink, MLXSW_SP_RESOURCE_NAME_KVD_LINEAR, + linear_size, + MLXSW_SP_RESOURCE_KVD_LINEAR, + MLXSW_SP_RESOURCE_KVD, + &linear_size_params); + if (err) + return err; + + err = mlxsw_sp1_kvdl_resources_register(mlxsw_core); + if (err) + return err; + + double_size = kvd_size - linear_size; + double_size *= profile->kvd_hash_double_parts; + double_size /= profile->kvd_hash_double_parts + + profile->kvd_hash_single_parts; + double_size = rounddown(double_size, MLXSW_SP_KVD_GRANULARITY); + err = devl_resource_register(devlink, MLXSW_SP_RESOURCE_NAME_KVD_HASH_DOUBLE, + double_size, + MLXSW_SP_RESOURCE_KVD_HASH_DOUBLE, + MLXSW_SP_RESOURCE_KVD, + &hash_double_size_params); + if (err) + return err; + + single_size = kvd_size - double_size - linear_size; + err = devl_resource_register(devlink, MLXSW_SP_RESOURCE_NAME_KVD_HASH_SINGLE, + single_size, + MLXSW_SP_RESOURCE_KVD_HASH_SINGLE, + MLXSW_SP_RESOURCE_KVD, + &hash_single_size_params); + if (err) + return err; + + return 0; +} + +static int mlxsw_sp2_resources_kvd_register(struct mlxsw_core *mlxsw_core) +{ + struct devlink *devlink = priv_to_devlink(mlxsw_core); + struct devlink_resource_size_params kvd_size_params; + u32 kvd_size; + + if (!MLXSW_CORE_RES_VALID(mlxsw_core, KVD_SIZE)) + return -EIO; + + kvd_size = MLXSW_CORE_RES_GET(mlxsw_core, KVD_SIZE); + devlink_resource_size_params_init(&kvd_size_params, kvd_size, kvd_size, + MLXSW_SP_KVD_GRANULARITY, + DEVLINK_RESOURCE_UNIT_ENTRY); + + return devl_resource_register(devlink, MLXSW_SP_RESOURCE_NAME_KVD, + kvd_size, MLXSW_SP_RESOURCE_KVD, + DEVLINK_RESOURCE_ID_PARENT_TOP, + &kvd_size_params); +} + +static int mlxsw_sp_resources_span_register(struct mlxsw_core *mlxsw_core) +{ + struct devlink *devlink = priv_to_devlink(mlxsw_core); + struct devlink_resource_size_params span_size_params; + u32 max_span; + + if (!MLXSW_CORE_RES_VALID(mlxsw_core, MAX_SPAN)) + return -EIO; + + max_span = MLXSW_CORE_RES_GET(mlxsw_core, MAX_SPAN); + devlink_resource_size_params_init(&span_size_params, max_span, max_span, + 1, DEVLINK_RESOURCE_UNIT_ENTRY); + + return devl_resource_register(devlink, MLXSW_SP_RESOURCE_NAME_SPAN, + max_span, MLXSW_SP_RESOURCE_SPAN, + DEVLINK_RESOURCE_ID_PARENT_TOP, + &span_size_params); +} + +static int +mlxsw_sp_resources_rif_mac_profile_register(struct mlxsw_core *mlxsw_core) +{ + struct devlink *devlink = priv_to_devlink(mlxsw_core); + struct devlink_resource_size_params size_params; + u8 max_rif_mac_profiles; + + if (!MLXSW_CORE_RES_VALID(mlxsw_core, MAX_RIF_MAC_PROFILES)) + max_rif_mac_profiles = 1; + else + max_rif_mac_profiles = MLXSW_CORE_RES_GET(mlxsw_core, + MAX_RIF_MAC_PROFILES); + devlink_resource_size_params_init(&size_params, max_rif_mac_profiles, + max_rif_mac_profiles, 1, + DEVLINK_RESOURCE_UNIT_ENTRY); + + return devl_resource_register(devlink, + "rif_mac_profiles", + max_rif_mac_profiles, + MLXSW_SP_RESOURCE_RIF_MAC_PROFILES, + DEVLINK_RESOURCE_ID_PARENT_TOP, + &size_params); +} + +static int mlxsw_sp_resources_rifs_register(struct mlxsw_core *mlxsw_core) +{ + struct devlink *devlink = priv_to_devlink(mlxsw_core); + struct devlink_resource_size_params size_params; + u64 max_rifs; + + if (!MLXSW_CORE_RES_VALID(mlxsw_core, MAX_RIFS)) + return -EIO; + + max_rifs = MLXSW_CORE_RES_GET(mlxsw_core, MAX_RIFS); + devlink_resource_size_params_init(&size_params, max_rifs, max_rifs, + 1, DEVLINK_RESOURCE_UNIT_ENTRY); + + return devl_resource_register(devlink, "rifs", max_rifs, + MLXSW_SP_RESOURCE_RIFS, + DEVLINK_RESOURCE_ID_PARENT_TOP, + &size_params); +} + +static int mlxsw_sp1_resources_register(struct mlxsw_core *mlxsw_core) +{ + int err; + + err = mlxsw_sp1_resources_kvd_register(mlxsw_core); + if (err) + return err; + + err = mlxsw_sp_resources_span_register(mlxsw_core); + if (err) + goto err_resources_span_register; + + err = mlxsw_sp_counter_resources_register(mlxsw_core); + if (err) + goto err_resources_counter_register; + + err = mlxsw_sp_policer_resources_register(mlxsw_core); + if (err) + goto err_policer_resources_register; + + err = mlxsw_sp_resources_rif_mac_profile_register(mlxsw_core); + if (err) + goto err_resources_rif_mac_profile_register; + + err = mlxsw_sp_resources_rifs_register(mlxsw_core); + if (err) + goto err_resources_rifs_register; + + return 0; + +err_resources_rifs_register: +err_resources_rif_mac_profile_register: +err_policer_resources_register: +err_resources_counter_register: +err_resources_span_register: + devl_resources_unregister(priv_to_devlink(mlxsw_core)); + return err; +} + +static int mlxsw_sp2_resources_register(struct mlxsw_core *mlxsw_core) +{ + int err; + + err = mlxsw_sp2_resources_kvd_register(mlxsw_core); + if (err) + return err; + + err = mlxsw_sp_resources_span_register(mlxsw_core); + if (err) + goto err_resources_span_register; + + err = mlxsw_sp_counter_resources_register(mlxsw_core); + if (err) + goto err_resources_counter_register; + + err = mlxsw_sp_policer_resources_register(mlxsw_core); + if (err) + goto err_policer_resources_register; + + err = mlxsw_sp_resources_rif_mac_profile_register(mlxsw_core); + if (err) + goto err_resources_rif_mac_profile_register; + + err = mlxsw_sp_resources_rifs_register(mlxsw_core); + if (err) + goto err_resources_rifs_register; + + return 0; + +err_resources_rifs_register: +err_resources_rif_mac_profile_register: +err_policer_resources_register: +err_resources_counter_register: +err_resources_span_register: + devl_resources_unregister(priv_to_devlink(mlxsw_core)); + return err; +} + +static int mlxsw_sp_kvd_sizes_get(struct mlxsw_core *mlxsw_core, + const struct mlxsw_config_profile *profile, + u64 *p_single_size, u64 *p_double_size, + u64 *p_linear_size) +{ + struct devlink *devlink = priv_to_devlink(mlxsw_core); + u32 double_size; + int err; + + if (!MLXSW_CORE_RES_VALID(mlxsw_core, KVD_SINGLE_MIN_SIZE) || + !MLXSW_CORE_RES_VALID(mlxsw_core, KVD_DOUBLE_MIN_SIZE)) + return -EIO; + + /* The hash part is what left of the kvd without the + * linear part. It is split to the single size and + * double size by the parts ratio from the profile. + * Both sizes must be a multiplications of the + * granularity from the profile. In case the user + * provided the sizes they are obtained via devlink. + */ + err = devl_resource_size_get(devlink, + MLXSW_SP_RESOURCE_KVD_LINEAR, + p_linear_size); + if (err) + *p_linear_size = profile->kvd_linear_size; + + err = devl_resource_size_get(devlink, + MLXSW_SP_RESOURCE_KVD_HASH_DOUBLE, + p_double_size); + if (err) { + double_size = MLXSW_CORE_RES_GET(mlxsw_core, KVD_SIZE) - + *p_linear_size; + double_size *= profile->kvd_hash_double_parts; + double_size /= profile->kvd_hash_double_parts + + profile->kvd_hash_single_parts; + *p_double_size = rounddown(double_size, + MLXSW_SP_KVD_GRANULARITY); + } + + err = devl_resource_size_get(devlink, + MLXSW_SP_RESOURCE_KVD_HASH_SINGLE, + p_single_size); + if (err) + *p_single_size = MLXSW_CORE_RES_GET(mlxsw_core, KVD_SIZE) - + *p_double_size - *p_linear_size; + + /* Check results are legal. */ + if (*p_single_size < MLXSW_CORE_RES_GET(mlxsw_core, KVD_SINGLE_MIN_SIZE) || + *p_double_size < MLXSW_CORE_RES_GET(mlxsw_core, KVD_DOUBLE_MIN_SIZE) || + MLXSW_CORE_RES_GET(mlxsw_core, KVD_SIZE) < *p_linear_size) + return -EIO; + + return 0; +} + +static int +mlxsw_sp_params_acl_region_rehash_intrvl_get(struct devlink *devlink, u32 id, + struct devlink_param_gset_ctx *ctx) +{ + struct mlxsw_core *mlxsw_core = devlink_priv(devlink); + struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core); + + ctx->val.vu32 = mlxsw_sp_acl_region_rehash_intrvl_get(mlxsw_sp); + return 0; +} + +static int +mlxsw_sp_params_acl_region_rehash_intrvl_set(struct devlink *devlink, u32 id, + struct devlink_param_gset_ctx *ctx) +{ + struct mlxsw_core *mlxsw_core = devlink_priv(devlink); + struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core); + + return mlxsw_sp_acl_region_rehash_intrvl_set(mlxsw_sp, ctx->val.vu32); +} + +static const struct devlink_param mlxsw_sp2_devlink_params[] = { + DEVLINK_PARAM_DRIVER(MLXSW_DEVLINK_PARAM_ID_ACL_REGION_REHASH_INTERVAL, + "acl_region_rehash_interval", + DEVLINK_PARAM_TYPE_U32, + BIT(DEVLINK_PARAM_CMODE_RUNTIME), + mlxsw_sp_params_acl_region_rehash_intrvl_get, + mlxsw_sp_params_acl_region_rehash_intrvl_set, + NULL), +}; + +static int mlxsw_sp2_params_register(struct mlxsw_core *mlxsw_core) +{ + struct devlink *devlink = priv_to_devlink(mlxsw_core); + union devlink_param_value value; + int err; + + err = devlink_params_register(devlink, mlxsw_sp2_devlink_params, + ARRAY_SIZE(mlxsw_sp2_devlink_params)); + if (err) + return err; + + value.vu32 = 0; + devlink_param_driverinit_value_set(devlink, + MLXSW_DEVLINK_PARAM_ID_ACL_REGION_REHASH_INTERVAL, + value); + return 0; +} + +static void mlxsw_sp2_params_unregister(struct mlxsw_core *mlxsw_core) +{ + devlink_params_unregister(priv_to_devlink(mlxsw_core), + mlxsw_sp2_devlink_params, + ARRAY_SIZE(mlxsw_sp2_devlink_params)); +} + +static void mlxsw_sp_ptp_transmitted(struct mlxsw_core *mlxsw_core, + struct sk_buff *skb, u16 local_port) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core); + + skb_pull(skb, MLXSW_TXHDR_LEN); + mlxsw_sp->ptp_ops->transmitted(mlxsw_sp, skb, local_port); +} + +static struct mlxsw_driver mlxsw_sp1_driver = { + .kind = mlxsw_sp1_driver_name, + .priv_size = sizeof(struct mlxsw_sp), + .fw_req_rev = &mlxsw_sp1_fw_rev, + .fw_filename = MLXSW_SP1_FW_FILENAME, + .init = mlxsw_sp1_init, + .fini = mlxsw_sp_fini, + .port_split = mlxsw_sp_port_split, + .port_unsplit = mlxsw_sp_port_unsplit, + .sb_pool_get = mlxsw_sp_sb_pool_get, + .sb_pool_set = mlxsw_sp_sb_pool_set, + .sb_port_pool_get = mlxsw_sp_sb_port_pool_get, + .sb_port_pool_set = mlxsw_sp_sb_port_pool_set, + .sb_tc_pool_bind_get = mlxsw_sp_sb_tc_pool_bind_get, + .sb_tc_pool_bind_set = mlxsw_sp_sb_tc_pool_bind_set, + .sb_occ_snapshot = mlxsw_sp_sb_occ_snapshot, + .sb_occ_max_clear = mlxsw_sp_sb_occ_max_clear, + .sb_occ_port_pool_get = mlxsw_sp_sb_occ_port_pool_get, + .sb_occ_tc_port_bind_get = mlxsw_sp_sb_occ_tc_port_bind_get, + .trap_init = mlxsw_sp_trap_init, + .trap_fini = mlxsw_sp_trap_fini, + .trap_action_set = mlxsw_sp_trap_action_set, + .trap_group_init = mlxsw_sp_trap_group_init, + .trap_group_set = mlxsw_sp_trap_group_set, + .trap_policer_init = mlxsw_sp_trap_policer_init, + .trap_policer_fini = mlxsw_sp_trap_policer_fini, + .trap_policer_set = mlxsw_sp_trap_policer_set, + .trap_policer_counter_get = mlxsw_sp_trap_policer_counter_get, + .txhdr_construct = mlxsw_sp_txhdr_construct, + .resources_register = mlxsw_sp1_resources_register, + .kvd_sizes_get = mlxsw_sp_kvd_sizes_get, + .ptp_transmitted = mlxsw_sp_ptp_transmitted, + .txhdr_len = MLXSW_TXHDR_LEN, + .profile = &mlxsw_sp1_config_profile, + .sdq_supports_cqe_v2 = false, +}; + +static struct mlxsw_driver mlxsw_sp2_driver = { + .kind = mlxsw_sp2_driver_name, + .priv_size = sizeof(struct mlxsw_sp), + .fw_req_rev = &mlxsw_sp2_fw_rev, + .fw_filename = MLXSW_SP2_FW_FILENAME, + .init = mlxsw_sp2_init, + .fini = mlxsw_sp_fini, + .port_split = mlxsw_sp_port_split, + .port_unsplit = mlxsw_sp_port_unsplit, + .ports_remove_selected = mlxsw_sp_ports_remove_selected, + .sb_pool_get = mlxsw_sp_sb_pool_get, + .sb_pool_set = mlxsw_sp_sb_pool_set, + .sb_port_pool_get = mlxsw_sp_sb_port_pool_get, + .sb_port_pool_set = mlxsw_sp_sb_port_pool_set, + .sb_tc_pool_bind_get = mlxsw_sp_sb_tc_pool_bind_get, + .sb_tc_pool_bind_set = mlxsw_sp_sb_tc_pool_bind_set, + .sb_occ_snapshot = mlxsw_sp_sb_occ_snapshot, + .sb_occ_max_clear = mlxsw_sp_sb_occ_max_clear, + .sb_occ_port_pool_get = mlxsw_sp_sb_occ_port_pool_get, + .sb_occ_tc_port_bind_get = mlxsw_sp_sb_occ_tc_port_bind_get, + .trap_init = mlxsw_sp_trap_init, + .trap_fini = mlxsw_sp_trap_fini, + .trap_action_set = mlxsw_sp_trap_action_set, + .trap_group_init = mlxsw_sp_trap_group_init, + .trap_group_set = mlxsw_sp_trap_group_set, + .trap_policer_init = mlxsw_sp_trap_policer_init, + .trap_policer_fini = mlxsw_sp_trap_policer_fini, + .trap_policer_set = mlxsw_sp_trap_policer_set, + .trap_policer_counter_get = mlxsw_sp_trap_policer_counter_get, + .txhdr_construct = mlxsw_sp_txhdr_construct, + .resources_register = mlxsw_sp2_resources_register, + .params_register = mlxsw_sp2_params_register, + .params_unregister = mlxsw_sp2_params_unregister, + .ptp_transmitted = mlxsw_sp_ptp_transmitted, + .txhdr_len = MLXSW_TXHDR_LEN, + .profile = &mlxsw_sp2_config_profile, + .sdq_supports_cqe_v2 = true, +}; + +static struct mlxsw_driver mlxsw_sp3_driver = { + .kind = mlxsw_sp3_driver_name, + .priv_size = sizeof(struct mlxsw_sp), + .fw_req_rev = &mlxsw_sp3_fw_rev, + .fw_filename = MLXSW_SP3_FW_FILENAME, + .init = mlxsw_sp3_init, + .fini = mlxsw_sp_fini, + .port_split = mlxsw_sp_port_split, + .port_unsplit = mlxsw_sp_port_unsplit, + .ports_remove_selected = mlxsw_sp_ports_remove_selected, + .sb_pool_get = mlxsw_sp_sb_pool_get, + .sb_pool_set = mlxsw_sp_sb_pool_set, + .sb_port_pool_get = mlxsw_sp_sb_port_pool_get, + .sb_port_pool_set = mlxsw_sp_sb_port_pool_set, + .sb_tc_pool_bind_get = mlxsw_sp_sb_tc_pool_bind_get, + .sb_tc_pool_bind_set = mlxsw_sp_sb_tc_pool_bind_set, + .sb_occ_snapshot = mlxsw_sp_sb_occ_snapshot, + .sb_occ_max_clear = mlxsw_sp_sb_occ_max_clear, + .sb_occ_port_pool_get = mlxsw_sp_sb_occ_port_pool_get, + .sb_occ_tc_port_bind_get = mlxsw_sp_sb_occ_tc_port_bind_get, + .trap_init = mlxsw_sp_trap_init, + .trap_fini = mlxsw_sp_trap_fini, + .trap_action_set = mlxsw_sp_trap_action_set, + .trap_group_init = mlxsw_sp_trap_group_init, + .trap_group_set = mlxsw_sp_trap_group_set, + .trap_policer_init = mlxsw_sp_trap_policer_init, + .trap_policer_fini = mlxsw_sp_trap_policer_fini, + .trap_policer_set = mlxsw_sp_trap_policer_set, + .trap_policer_counter_get = mlxsw_sp_trap_policer_counter_get, + .txhdr_construct = mlxsw_sp_txhdr_construct, + .resources_register = mlxsw_sp2_resources_register, + .params_register = mlxsw_sp2_params_register, + .params_unregister = mlxsw_sp2_params_unregister, + .ptp_transmitted = mlxsw_sp_ptp_transmitted, + .txhdr_len = MLXSW_TXHDR_LEN, + .profile = &mlxsw_sp2_config_profile, + .sdq_supports_cqe_v2 = true, +}; + +static struct mlxsw_driver mlxsw_sp4_driver = { + .kind = mlxsw_sp4_driver_name, + .priv_size = sizeof(struct mlxsw_sp), + .init = mlxsw_sp4_init, + .fini = mlxsw_sp_fini, + .port_split = mlxsw_sp_port_split, + .port_unsplit = mlxsw_sp_port_unsplit, + .ports_remove_selected = mlxsw_sp_ports_remove_selected, + .sb_pool_get = mlxsw_sp_sb_pool_get, + .sb_pool_set = mlxsw_sp_sb_pool_set, + .sb_port_pool_get = mlxsw_sp_sb_port_pool_get, + .sb_port_pool_set = mlxsw_sp_sb_port_pool_set, + .sb_tc_pool_bind_get = mlxsw_sp_sb_tc_pool_bind_get, + .sb_tc_pool_bind_set = mlxsw_sp_sb_tc_pool_bind_set, + .sb_occ_snapshot = mlxsw_sp_sb_occ_snapshot, + .sb_occ_max_clear = mlxsw_sp_sb_occ_max_clear, + .sb_occ_port_pool_get = mlxsw_sp_sb_occ_port_pool_get, + .sb_occ_tc_port_bind_get = mlxsw_sp_sb_occ_tc_port_bind_get, + .trap_init = mlxsw_sp_trap_init, + .trap_fini = mlxsw_sp_trap_fini, + .trap_action_set = mlxsw_sp_trap_action_set, + .trap_group_init = mlxsw_sp_trap_group_init, + .trap_group_set = mlxsw_sp_trap_group_set, + .trap_policer_init = mlxsw_sp_trap_policer_init, + .trap_policer_fini = mlxsw_sp_trap_policer_fini, + .trap_policer_set = mlxsw_sp_trap_policer_set, + .trap_policer_counter_get = mlxsw_sp_trap_policer_counter_get, + .txhdr_construct = mlxsw_sp_txhdr_construct, + .resources_register = mlxsw_sp2_resources_register, + .params_register = mlxsw_sp2_params_register, + .params_unregister = mlxsw_sp2_params_unregister, + .ptp_transmitted = mlxsw_sp_ptp_transmitted, + .txhdr_len = MLXSW_TXHDR_LEN, + .profile = &mlxsw_sp4_config_profile, + .sdq_supports_cqe_v2 = true, +}; + +bool mlxsw_sp_port_dev_check(const struct net_device *dev) +{ + return dev->netdev_ops == &mlxsw_sp_port_netdev_ops; +} + +static int mlxsw_sp_lower_dev_walk(struct net_device *lower_dev, + struct netdev_nested_priv *priv) +{ + int ret = 0; + + if (mlxsw_sp_port_dev_check(lower_dev)) { + priv->data = (void *)netdev_priv(lower_dev); + ret = 1; + } + + return ret; +} + +struct mlxsw_sp_port *mlxsw_sp_port_dev_lower_find(struct net_device *dev) +{ + struct netdev_nested_priv priv = { + .data = NULL, + }; + + if (mlxsw_sp_port_dev_check(dev)) + return netdev_priv(dev); + + netdev_walk_all_lower_dev(dev, mlxsw_sp_lower_dev_walk, &priv); + + return (struct mlxsw_sp_port *)priv.data; +} + +struct mlxsw_sp *mlxsw_sp_lower_get(struct net_device *dev) +{ + struct mlxsw_sp_port *mlxsw_sp_port; + + mlxsw_sp_port = mlxsw_sp_port_dev_lower_find(dev); + return mlxsw_sp_port ? mlxsw_sp_port->mlxsw_sp : NULL; +} + +struct mlxsw_sp_port *mlxsw_sp_port_dev_lower_find_rcu(struct net_device *dev) +{ + struct netdev_nested_priv priv = { + .data = NULL, + }; + + if (mlxsw_sp_port_dev_check(dev)) + return netdev_priv(dev); + + netdev_walk_all_lower_dev_rcu(dev, mlxsw_sp_lower_dev_walk, + &priv); + + return (struct mlxsw_sp_port *)priv.data; +} + +struct mlxsw_sp_port *mlxsw_sp_port_lower_dev_hold(struct net_device *dev) +{ + struct mlxsw_sp_port *mlxsw_sp_port; + + rcu_read_lock(); + mlxsw_sp_port = mlxsw_sp_port_dev_lower_find_rcu(dev); + if (mlxsw_sp_port) + dev_hold(mlxsw_sp_port->dev); + rcu_read_unlock(); + return mlxsw_sp_port; +} + +void mlxsw_sp_port_dev_put(struct mlxsw_sp_port *mlxsw_sp_port) +{ + dev_put(mlxsw_sp_port->dev); +} + +int mlxsw_sp_parsing_depth_inc(struct mlxsw_sp *mlxsw_sp) +{ + char mprs_pl[MLXSW_REG_MPRS_LEN]; + int err = 0; + + mutex_lock(&mlxsw_sp->parsing.lock); + + if (refcount_inc_not_zero(&mlxsw_sp->parsing.parsing_depth_ref)) + goto out_unlock; + + mlxsw_reg_mprs_pack(mprs_pl, MLXSW_SP_INCREASED_PARSING_DEPTH, + mlxsw_sp->parsing.vxlan_udp_dport); + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mprs), mprs_pl); + if (err) + goto out_unlock; + + mlxsw_sp->parsing.parsing_depth = MLXSW_SP_INCREASED_PARSING_DEPTH; + refcount_set(&mlxsw_sp->parsing.parsing_depth_ref, 1); + +out_unlock: + mutex_unlock(&mlxsw_sp->parsing.lock); + return err; +} + +void mlxsw_sp_parsing_depth_dec(struct mlxsw_sp *mlxsw_sp) +{ + char mprs_pl[MLXSW_REG_MPRS_LEN]; + + mutex_lock(&mlxsw_sp->parsing.lock); + + if (!refcount_dec_and_test(&mlxsw_sp->parsing.parsing_depth_ref)) + goto out_unlock; + + mlxsw_reg_mprs_pack(mprs_pl, MLXSW_SP_DEFAULT_PARSING_DEPTH, + mlxsw_sp->parsing.vxlan_udp_dport); + mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mprs), mprs_pl); + mlxsw_sp->parsing.parsing_depth = MLXSW_SP_DEFAULT_PARSING_DEPTH; + +out_unlock: + mutex_unlock(&mlxsw_sp->parsing.lock); +} + +int mlxsw_sp_parsing_vxlan_udp_dport_set(struct mlxsw_sp *mlxsw_sp, + __be16 udp_dport) +{ + char mprs_pl[MLXSW_REG_MPRS_LEN]; + int err; + + mutex_lock(&mlxsw_sp->parsing.lock); + + mlxsw_reg_mprs_pack(mprs_pl, mlxsw_sp->parsing.parsing_depth, + be16_to_cpu(udp_dport)); + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mprs), mprs_pl); + if (err) + goto out_unlock; + + mlxsw_sp->parsing.vxlan_udp_dport = be16_to_cpu(udp_dport); + +out_unlock: + mutex_unlock(&mlxsw_sp->parsing.lock); + return err; +} + +static void +mlxsw_sp_port_lag_uppers_cleanup(struct mlxsw_sp_port *mlxsw_sp_port, + struct net_device *lag_dev) +{ + struct net_device *br_dev = netdev_master_upper_dev_get(lag_dev); + struct net_device *upper_dev; + struct list_head *iter; + + if (netif_is_bridge_port(lag_dev)) + mlxsw_sp_port_bridge_leave(mlxsw_sp_port, lag_dev, br_dev); + + netdev_for_each_upper_dev_rcu(lag_dev, upper_dev, iter) { + if (!netif_is_bridge_port(upper_dev)) + continue; + br_dev = netdev_master_upper_dev_get(upper_dev); + mlxsw_sp_port_bridge_leave(mlxsw_sp_port, upper_dev, br_dev); + } +} + +static int mlxsw_sp_lag_create(struct mlxsw_sp *mlxsw_sp, u16 lag_id) +{ + char sldr_pl[MLXSW_REG_SLDR_LEN]; + + mlxsw_reg_sldr_lag_create_pack(sldr_pl, lag_id); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sldr), sldr_pl); +} + +static int mlxsw_sp_lag_destroy(struct mlxsw_sp *mlxsw_sp, u16 lag_id) +{ + char sldr_pl[MLXSW_REG_SLDR_LEN]; + + mlxsw_reg_sldr_lag_destroy_pack(sldr_pl, lag_id); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sldr), sldr_pl); +} + +static int mlxsw_sp_lag_col_port_add(struct mlxsw_sp_port *mlxsw_sp_port, + u16 lag_id, u8 port_index) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + char slcor_pl[MLXSW_REG_SLCOR_LEN]; + + mlxsw_reg_slcor_port_add_pack(slcor_pl, mlxsw_sp_port->local_port, + lag_id, port_index); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(slcor), slcor_pl); +} + +static int mlxsw_sp_lag_col_port_remove(struct mlxsw_sp_port *mlxsw_sp_port, + u16 lag_id) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + char slcor_pl[MLXSW_REG_SLCOR_LEN]; + + mlxsw_reg_slcor_port_remove_pack(slcor_pl, mlxsw_sp_port->local_port, + lag_id); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(slcor), slcor_pl); +} + +static int mlxsw_sp_lag_col_port_enable(struct mlxsw_sp_port *mlxsw_sp_port, + u16 lag_id) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + char slcor_pl[MLXSW_REG_SLCOR_LEN]; + + mlxsw_reg_slcor_col_enable_pack(slcor_pl, mlxsw_sp_port->local_port, + lag_id); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(slcor), slcor_pl); +} + +static int mlxsw_sp_lag_col_port_disable(struct mlxsw_sp_port *mlxsw_sp_port, + u16 lag_id) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + char slcor_pl[MLXSW_REG_SLCOR_LEN]; + + mlxsw_reg_slcor_col_disable_pack(slcor_pl, mlxsw_sp_port->local_port, + lag_id); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(slcor), slcor_pl); +} + +static int mlxsw_sp_lag_index_get(struct mlxsw_sp *mlxsw_sp, + struct net_device *lag_dev, + u16 *p_lag_id) +{ + struct mlxsw_sp_upper *lag; + int free_lag_id = -1; + u16 max_lag; + int err, i; + + err = mlxsw_core_max_lag(mlxsw_sp->core, &max_lag); + if (err) + return err; + + for (i = 0; i < max_lag; i++) { + lag = mlxsw_sp_lag_get(mlxsw_sp, i); + if (lag->ref_count) { + if (lag->dev == lag_dev) { + *p_lag_id = i; + return 0; + } + } else if (free_lag_id < 0) { + free_lag_id = i; + } + } + if (free_lag_id < 0) + return -EBUSY; + *p_lag_id = free_lag_id; + return 0; +} + +static bool +mlxsw_sp_master_lag_check(struct mlxsw_sp *mlxsw_sp, + struct net_device *lag_dev, + struct netdev_lag_upper_info *lag_upper_info, + struct netlink_ext_ack *extack) +{ + u16 lag_id; + + if (mlxsw_sp_lag_index_get(mlxsw_sp, lag_dev, &lag_id) != 0) { + NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported LAG devices"); + return false; + } + if (lag_upper_info->tx_type != NETDEV_LAG_TX_TYPE_HASH) { + NL_SET_ERR_MSG_MOD(extack, "LAG device using unsupported Tx type"); + return false; + } + return true; +} + +static int mlxsw_sp_port_lag_index_get(struct mlxsw_sp *mlxsw_sp, + u16 lag_id, u8 *p_port_index) +{ + u64 max_lag_members; + int i; + + max_lag_members = MLXSW_CORE_RES_GET(mlxsw_sp->core, + MAX_LAG_MEMBERS); + for (i = 0; i < max_lag_members; i++) { + if (!mlxsw_sp_port_lagged_get(mlxsw_sp, lag_id, i)) { + *p_port_index = i; + return 0; + } + } + return -EBUSY; +} + +static int mlxsw_sp_port_lag_join(struct mlxsw_sp_port *mlxsw_sp_port, + struct net_device *lag_dev, + struct netlink_ext_ack *extack) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + struct mlxsw_sp_upper *lag; + u16 lag_id; + u8 port_index; + int err; + + err = mlxsw_sp_lag_index_get(mlxsw_sp, lag_dev, &lag_id); + if (err) + return err; + lag = mlxsw_sp_lag_get(mlxsw_sp, lag_id); + if (!lag->ref_count) { + err = mlxsw_sp_lag_create(mlxsw_sp, lag_id); + if (err) + return err; + lag->dev = lag_dev; + } + + err = mlxsw_sp_port_lag_index_get(mlxsw_sp, lag_id, &port_index); + if (err) + return err; + err = mlxsw_sp_lag_col_port_add(mlxsw_sp_port, lag_id, port_index); + if (err) + goto err_col_port_add; + + mlxsw_core_lag_mapping_set(mlxsw_sp->core, lag_id, port_index, + mlxsw_sp_port->local_port); + mlxsw_sp_port->lag_id = lag_id; + mlxsw_sp_port->lagged = 1; + lag->ref_count++; + + /* Port is no longer usable as a router interface */ + if (mlxsw_sp_port->default_vlan->fid) + mlxsw_sp_port_vlan_router_leave(mlxsw_sp_port->default_vlan); + + /* Join a router interface configured on the LAG, if exists */ + err = mlxsw_sp_port_vlan_router_join(mlxsw_sp_port->default_vlan, + lag_dev, extack); + if (err) + goto err_router_join; + + return 0; + +err_router_join: + lag->ref_count--; + mlxsw_sp_port->lagged = 0; + mlxsw_core_lag_mapping_clear(mlxsw_sp->core, lag_id, + mlxsw_sp_port->local_port); + mlxsw_sp_lag_col_port_remove(mlxsw_sp_port, lag_id); +err_col_port_add: + if (!lag->ref_count) + mlxsw_sp_lag_destroy(mlxsw_sp, lag_id); + return err; +} + +static void mlxsw_sp_port_lag_leave(struct mlxsw_sp_port *mlxsw_sp_port, + struct net_device *lag_dev) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + u16 lag_id = mlxsw_sp_port->lag_id; + struct mlxsw_sp_upper *lag; + + if (!mlxsw_sp_port->lagged) + return; + lag = mlxsw_sp_lag_get(mlxsw_sp, lag_id); + WARN_ON(lag->ref_count == 0); + + mlxsw_sp_lag_col_port_remove(mlxsw_sp_port, lag_id); + + /* Any VLANs configured on the port are no longer valid */ + mlxsw_sp_port_vlan_flush(mlxsw_sp_port, false); + mlxsw_sp_port_vlan_cleanup(mlxsw_sp_port->default_vlan); + /* Make the LAG and its directly linked uppers leave bridges they + * are memeber in + */ + mlxsw_sp_port_lag_uppers_cleanup(mlxsw_sp_port, lag_dev); + + if (lag->ref_count == 1) + mlxsw_sp_lag_destroy(mlxsw_sp, lag_id); + + mlxsw_core_lag_mapping_clear(mlxsw_sp->core, lag_id, + mlxsw_sp_port->local_port); + mlxsw_sp_port->lagged = 0; + lag->ref_count--; + + /* Make sure untagged frames are allowed to ingress */ + mlxsw_sp_port_pvid_set(mlxsw_sp_port, MLXSW_SP_DEFAULT_VID, + ETH_P_8021Q); +} + +static int mlxsw_sp_lag_dist_port_add(struct mlxsw_sp_port *mlxsw_sp_port, + u16 lag_id) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + char sldr_pl[MLXSW_REG_SLDR_LEN]; + + mlxsw_reg_sldr_lag_add_port_pack(sldr_pl, lag_id, + mlxsw_sp_port->local_port); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sldr), sldr_pl); +} + +static int mlxsw_sp_lag_dist_port_remove(struct mlxsw_sp_port *mlxsw_sp_port, + u16 lag_id) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + char sldr_pl[MLXSW_REG_SLDR_LEN]; + + mlxsw_reg_sldr_lag_remove_port_pack(sldr_pl, lag_id, + mlxsw_sp_port->local_port); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sldr), sldr_pl); +} + +static int +mlxsw_sp_port_lag_col_dist_enable(struct mlxsw_sp_port *mlxsw_sp_port) +{ + int err; + + err = mlxsw_sp_lag_col_port_enable(mlxsw_sp_port, + mlxsw_sp_port->lag_id); + if (err) + return err; + + err = mlxsw_sp_lag_dist_port_add(mlxsw_sp_port, mlxsw_sp_port->lag_id); + if (err) + goto err_dist_port_add; + + return 0; + +err_dist_port_add: + mlxsw_sp_lag_col_port_disable(mlxsw_sp_port, mlxsw_sp_port->lag_id); + return err; +} + +static int +mlxsw_sp_port_lag_col_dist_disable(struct mlxsw_sp_port *mlxsw_sp_port) +{ + int err; + + err = mlxsw_sp_lag_dist_port_remove(mlxsw_sp_port, + mlxsw_sp_port->lag_id); + if (err) + return err; + + err = mlxsw_sp_lag_col_port_disable(mlxsw_sp_port, + mlxsw_sp_port->lag_id); + if (err) + goto err_col_port_disable; + + return 0; + +err_col_port_disable: + mlxsw_sp_lag_dist_port_add(mlxsw_sp_port, mlxsw_sp_port->lag_id); + return err; +} + +static int mlxsw_sp_port_lag_changed(struct mlxsw_sp_port *mlxsw_sp_port, + struct netdev_lag_lower_state_info *info) +{ + if (info->tx_enabled) + return mlxsw_sp_port_lag_col_dist_enable(mlxsw_sp_port); + else + return mlxsw_sp_port_lag_col_dist_disable(mlxsw_sp_port); +} + +static int mlxsw_sp_port_stp_set(struct mlxsw_sp_port *mlxsw_sp_port, + bool enable) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + enum mlxsw_reg_spms_state spms_state; + char *spms_pl; + u16 vid; + int err; + + spms_state = enable ? MLXSW_REG_SPMS_STATE_FORWARDING : + MLXSW_REG_SPMS_STATE_DISCARDING; + + spms_pl = kmalloc(MLXSW_REG_SPMS_LEN, GFP_KERNEL); + if (!spms_pl) + return -ENOMEM; + mlxsw_reg_spms_pack(spms_pl, mlxsw_sp_port->local_port); + + for (vid = 0; vid < VLAN_N_VID; vid++) + mlxsw_reg_spms_vid_pack(spms_pl, vid, spms_state); + + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(spms), spms_pl); + kfree(spms_pl); + return err; +} + +static int mlxsw_sp_port_ovs_join(struct mlxsw_sp_port *mlxsw_sp_port) +{ + u16 vid = 1; + int err; + + err = mlxsw_sp_port_vp_mode_set(mlxsw_sp_port, true); + if (err) + return err; + err = mlxsw_sp_port_stp_set(mlxsw_sp_port, true); + if (err) + goto err_port_stp_set; + err = mlxsw_sp_port_vlan_set(mlxsw_sp_port, 1, VLAN_N_VID - 2, + true, false); + if (err) + goto err_port_vlan_set; + + for (; vid <= VLAN_N_VID - 1; vid++) { + err = mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, + vid, false); + if (err) + goto err_vid_learning_set; + } + + return 0; + +err_vid_learning_set: + for (vid--; vid >= 1; vid--) + mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, true); +err_port_vlan_set: + mlxsw_sp_port_stp_set(mlxsw_sp_port, false); +err_port_stp_set: + mlxsw_sp_port_vp_mode_set(mlxsw_sp_port, false); + return err; +} + +static void mlxsw_sp_port_ovs_leave(struct mlxsw_sp_port *mlxsw_sp_port) +{ + u16 vid; + + for (vid = VLAN_N_VID - 1; vid >= 1; vid--) + mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, + vid, true); + + mlxsw_sp_port_vlan_set(mlxsw_sp_port, 1, VLAN_N_VID - 2, + false, false); + mlxsw_sp_port_stp_set(mlxsw_sp_port, false); + mlxsw_sp_port_vp_mode_set(mlxsw_sp_port, false); +} + +static bool mlxsw_sp_bridge_has_multiple_vxlans(struct net_device *br_dev) +{ + unsigned int num_vxlans = 0; + struct net_device *dev; + struct list_head *iter; + + netdev_for_each_lower_dev(br_dev, dev, iter) { + if (netif_is_vxlan(dev)) + num_vxlans++; + } + + return num_vxlans > 1; +} + +static bool mlxsw_sp_bridge_vxlan_vlan_is_valid(struct net_device *br_dev) +{ + DECLARE_BITMAP(vlans, VLAN_N_VID) = {0}; + struct net_device *dev; + struct list_head *iter; + + netdev_for_each_lower_dev(br_dev, dev, iter) { + u16 pvid; + int err; + + if (!netif_is_vxlan(dev)) + continue; + + err = mlxsw_sp_vxlan_mapped_vid(dev, &pvid); + if (err || !pvid) + continue; + + if (test_and_set_bit(pvid, vlans)) + return false; + } + + return true; +} + +static bool mlxsw_sp_bridge_vxlan_is_valid(struct net_device *br_dev, + struct netlink_ext_ack *extack) +{ + if (br_multicast_enabled(br_dev)) { + NL_SET_ERR_MSG_MOD(extack, "Multicast can not be enabled on a bridge with a VxLAN device"); + return false; + } + + if (!br_vlan_enabled(br_dev) && + mlxsw_sp_bridge_has_multiple_vxlans(br_dev)) { + NL_SET_ERR_MSG_MOD(extack, "Multiple VxLAN devices are not supported in a VLAN-unaware bridge"); + return false; + } + + if (br_vlan_enabled(br_dev) && + !mlxsw_sp_bridge_vxlan_vlan_is_valid(br_dev)) { + NL_SET_ERR_MSG_MOD(extack, "Multiple VxLAN devices cannot have the same VLAN as PVID and egress untagged"); + return false; + } + + return true; +} + +static int mlxsw_sp_netdevice_port_upper_event(struct net_device *lower_dev, + struct net_device *dev, + unsigned long event, void *ptr) +{ + struct netdev_notifier_changeupper_info *info; + struct mlxsw_sp_port *mlxsw_sp_port; + struct netlink_ext_ack *extack; + struct net_device *upper_dev; + struct mlxsw_sp *mlxsw_sp; + int err = 0; + u16 proto; + + mlxsw_sp_port = netdev_priv(dev); + mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + info = ptr; + extack = netdev_notifier_info_to_extack(&info->info); + + switch (event) { + case NETDEV_PRECHANGEUPPER: + upper_dev = info->upper_dev; + if (!is_vlan_dev(upper_dev) && + !netif_is_lag_master(upper_dev) && + !netif_is_bridge_master(upper_dev) && + !netif_is_ovs_master(upper_dev) && + !netif_is_macvlan(upper_dev) && + !netif_is_l3_master(upper_dev)) { + NL_SET_ERR_MSG_MOD(extack, "Unknown upper device type"); + return -EINVAL; + } + if (!info->linking) + break; + if (netif_is_bridge_master(upper_dev) && + !mlxsw_sp_bridge_device_is_offloaded(mlxsw_sp, upper_dev) && + mlxsw_sp_bridge_has_vxlan(upper_dev) && + !mlxsw_sp_bridge_vxlan_is_valid(upper_dev, extack)) + return -EOPNOTSUPP; + if (netdev_has_any_upper_dev(upper_dev) && + (!netif_is_bridge_master(upper_dev) || + !mlxsw_sp_bridge_device_is_offloaded(mlxsw_sp, + upper_dev))) { + NL_SET_ERR_MSG_MOD(extack, "Enslaving a port to a device that already has an upper device is not supported"); + return -EINVAL; + } + if (netif_is_lag_master(upper_dev) && + !mlxsw_sp_master_lag_check(mlxsw_sp, upper_dev, + info->upper_info, extack)) + return -EINVAL; + if (netif_is_lag_master(upper_dev) && vlan_uses_dev(dev)) { + NL_SET_ERR_MSG_MOD(extack, "Master device is a LAG master and this device has a VLAN"); + return -EINVAL; + } + if (netif_is_lag_port(dev) && is_vlan_dev(upper_dev) && + !netif_is_lag_master(vlan_dev_real_dev(upper_dev))) { + NL_SET_ERR_MSG_MOD(extack, "Can not put a VLAN on a LAG port"); + return -EINVAL; + } + if (netif_is_macvlan(upper_dev) && + !mlxsw_sp_rif_exists(mlxsw_sp, lower_dev)) { + NL_SET_ERR_MSG_MOD(extack, "macvlan is only supported on top of router interfaces"); + return -EOPNOTSUPP; + } + if (netif_is_ovs_master(upper_dev) && vlan_uses_dev(dev)) { + NL_SET_ERR_MSG_MOD(extack, "Master device is an OVS master and this device has a VLAN"); + return -EINVAL; + } + if (netif_is_ovs_port(dev) && is_vlan_dev(upper_dev)) { + NL_SET_ERR_MSG_MOD(extack, "Can not put a VLAN on an OVS port"); + return -EINVAL; + } + if (netif_is_bridge_master(upper_dev)) { + br_vlan_get_proto(upper_dev, &proto); + if (br_vlan_enabled(upper_dev) && + proto != ETH_P_8021Q && proto != ETH_P_8021AD) { + NL_SET_ERR_MSG_MOD(extack, "Enslaving a port to a bridge with unknown VLAN protocol is not supported"); + return -EOPNOTSUPP; + } + if (vlan_uses_dev(lower_dev) && + br_vlan_enabled(upper_dev) && + proto == ETH_P_8021AD) { + NL_SET_ERR_MSG_MOD(extack, "Enslaving a port that already has a VLAN upper to an 802.1ad bridge is not supported"); + return -EOPNOTSUPP; + } + } + if (netif_is_bridge_port(lower_dev) && is_vlan_dev(upper_dev)) { + struct net_device *br_dev = netdev_master_upper_dev_get(lower_dev); + + if (br_vlan_enabled(br_dev)) { + br_vlan_get_proto(br_dev, &proto); + if (proto == ETH_P_8021AD) { + NL_SET_ERR_MSG_MOD(extack, "VLAN uppers are not supported on a port enslaved to an 802.1ad bridge"); + return -EOPNOTSUPP; + } + } + } + if (is_vlan_dev(upper_dev) && + ntohs(vlan_dev_vlan_proto(upper_dev)) != ETH_P_8021Q) { + NL_SET_ERR_MSG_MOD(extack, "VLAN uppers are only supported with 802.1q VLAN protocol"); + return -EOPNOTSUPP; + } + break; + case NETDEV_CHANGEUPPER: + upper_dev = info->upper_dev; + if (netif_is_bridge_master(upper_dev)) { + if (info->linking) + err = mlxsw_sp_port_bridge_join(mlxsw_sp_port, + lower_dev, + upper_dev, + extack); + else + mlxsw_sp_port_bridge_leave(mlxsw_sp_port, + lower_dev, + upper_dev); + } else if (netif_is_lag_master(upper_dev)) { + if (info->linking) { + err = mlxsw_sp_port_lag_join(mlxsw_sp_port, + upper_dev, extack); + } else { + mlxsw_sp_port_lag_col_dist_disable(mlxsw_sp_port); + mlxsw_sp_port_lag_leave(mlxsw_sp_port, + upper_dev); + } + } else if (netif_is_ovs_master(upper_dev)) { + if (info->linking) + err = mlxsw_sp_port_ovs_join(mlxsw_sp_port); + else + mlxsw_sp_port_ovs_leave(mlxsw_sp_port); + } else if (netif_is_macvlan(upper_dev)) { + if (!info->linking) + mlxsw_sp_rif_macvlan_del(mlxsw_sp, upper_dev); + } else if (is_vlan_dev(upper_dev)) { + struct net_device *br_dev; + + if (!netif_is_bridge_port(upper_dev)) + break; + if (info->linking) + break; + br_dev = netdev_master_upper_dev_get(upper_dev); + mlxsw_sp_port_bridge_leave(mlxsw_sp_port, upper_dev, + br_dev); + } + break; + } + + return err; +} + +static int mlxsw_sp_netdevice_port_lower_event(struct net_device *dev, + unsigned long event, void *ptr) +{ + struct netdev_notifier_changelowerstate_info *info; + struct mlxsw_sp_port *mlxsw_sp_port; + int err; + + mlxsw_sp_port = netdev_priv(dev); + info = ptr; + + switch (event) { + case NETDEV_CHANGELOWERSTATE: + if (netif_is_lag_port(dev) && mlxsw_sp_port->lagged) { + err = mlxsw_sp_port_lag_changed(mlxsw_sp_port, + info->lower_state_info); + if (err) + netdev_err(dev, "Failed to reflect link aggregation lower state change\n"); + } + break; + } + + return 0; +} + +static int mlxsw_sp_netdevice_port_event(struct net_device *lower_dev, + struct net_device *port_dev, + unsigned long event, void *ptr) +{ + switch (event) { + case NETDEV_PRECHANGEUPPER: + case NETDEV_CHANGEUPPER: + return mlxsw_sp_netdevice_port_upper_event(lower_dev, port_dev, + event, ptr); + case NETDEV_CHANGELOWERSTATE: + return mlxsw_sp_netdevice_port_lower_event(port_dev, event, + ptr); + } + + return 0; +} + +static int mlxsw_sp_netdevice_lag_event(struct net_device *lag_dev, + unsigned long event, void *ptr) +{ + struct net_device *dev; + struct list_head *iter; + int ret; + + netdev_for_each_lower_dev(lag_dev, dev, iter) { + if (mlxsw_sp_port_dev_check(dev)) { + ret = mlxsw_sp_netdevice_port_event(lag_dev, dev, event, + ptr); + if (ret) + return ret; + } + } + + return 0; +} + +static int mlxsw_sp_netdevice_port_vlan_event(struct net_device *vlan_dev, + struct net_device *dev, + unsigned long event, void *ptr, + u16 vid) +{ + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + struct netdev_notifier_changeupper_info *info = ptr; + struct netlink_ext_ack *extack; + struct net_device *upper_dev; + int err = 0; + + extack = netdev_notifier_info_to_extack(&info->info); + + switch (event) { + case NETDEV_PRECHANGEUPPER: + upper_dev = info->upper_dev; + if (!netif_is_bridge_master(upper_dev) && + !netif_is_macvlan(upper_dev) && + !netif_is_l3_master(upper_dev)) { + NL_SET_ERR_MSG_MOD(extack, "Unknown upper device type"); + return -EINVAL; + } + if (!info->linking) + break; + if (netif_is_bridge_master(upper_dev) && + !mlxsw_sp_bridge_device_is_offloaded(mlxsw_sp, upper_dev) && + mlxsw_sp_bridge_has_vxlan(upper_dev) && + !mlxsw_sp_bridge_vxlan_is_valid(upper_dev, extack)) + return -EOPNOTSUPP; + if (netdev_has_any_upper_dev(upper_dev) && + (!netif_is_bridge_master(upper_dev) || + !mlxsw_sp_bridge_device_is_offloaded(mlxsw_sp, + upper_dev))) { + NL_SET_ERR_MSG_MOD(extack, "Enslaving a port to a device that already has an upper device is not supported"); + return -EINVAL; + } + if (netif_is_macvlan(upper_dev) && + !mlxsw_sp_rif_exists(mlxsw_sp, vlan_dev)) { + NL_SET_ERR_MSG_MOD(extack, "macvlan is only supported on top of router interfaces"); + return -EOPNOTSUPP; + } + break; + case NETDEV_CHANGEUPPER: + upper_dev = info->upper_dev; + if (netif_is_bridge_master(upper_dev)) { + if (info->linking) + err = mlxsw_sp_port_bridge_join(mlxsw_sp_port, + vlan_dev, + upper_dev, + extack); + else + mlxsw_sp_port_bridge_leave(mlxsw_sp_port, + vlan_dev, + upper_dev); + } else if (netif_is_macvlan(upper_dev)) { + if (!info->linking) + mlxsw_sp_rif_macvlan_del(mlxsw_sp, upper_dev); + } + break; + } + + return err; +} + +static int mlxsw_sp_netdevice_lag_port_vlan_event(struct net_device *vlan_dev, + struct net_device *lag_dev, + unsigned long event, + void *ptr, u16 vid) +{ + struct net_device *dev; + struct list_head *iter; + int ret; + + netdev_for_each_lower_dev(lag_dev, dev, iter) { + if (mlxsw_sp_port_dev_check(dev)) { + ret = mlxsw_sp_netdevice_port_vlan_event(vlan_dev, dev, + event, ptr, + vid); + if (ret) + return ret; + } + } + + return 0; +} + +static int mlxsw_sp_netdevice_bridge_vlan_event(struct net_device *vlan_dev, + struct net_device *br_dev, + unsigned long event, void *ptr, + u16 vid) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(vlan_dev); + struct netdev_notifier_changeupper_info *info = ptr; + struct netlink_ext_ack *extack; + struct net_device *upper_dev; + + if (!mlxsw_sp) + return 0; + + extack = netdev_notifier_info_to_extack(&info->info); + + switch (event) { + case NETDEV_PRECHANGEUPPER: + upper_dev = info->upper_dev; + if (!netif_is_macvlan(upper_dev) && + !netif_is_l3_master(upper_dev)) { + NL_SET_ERR_MSG_MOD(extack, "Unknown upper device type"); + return -EOPNOTSUPP; + } + if (!info->linking) + break; + if (netif_is_macvlan(upper_dev) && + !mlxsw_sp_rif_exists(mlxsw_sp, vlan_dev)) { + NL_SET_ERR_MSG_MOD(extack, "macvlan is only supported on top of router interfaces"); + return -EOPNOTSUPP; + } + break; + case NETDEV_CHANGEUPPER: + upper_dev = info->upper_dev; + if (info->linking) + break; + if (netif_is_macvlan(upper_dev)) + mlxsw_sp_rif_macvlan_del(mlxsw_sp, upper_dev); + break; + } + + return 0; +} + +static int mlxsw_sp_netdevice_vlan_event(struct net_device *vlan_dev, + unsigned long event, void *ptr) +{ + struct net_device *real_dev = vlan_dev_real_dev(vlan_dev); + u16 vid = vlan_dev_vlan_id(vlan_dev); + + if (mlxsw_sp_port_dev_check(real_dev)) + return mlxsw_sp_netdevice_port_vlan_event(vlan_dev, real_dev, + event, ptr, vid); + else if (netif_is_lag_master(real_dev)) + return mlxsw_sp_netdevice_lag_port_vlan_event(vlan_dev, + real_dev, event, + ptr, vid); + else if (netif_is_bridge_master(real_dev)) + return mlxsw_sp_netdevice_bridge_vlan_event(vlan_dev, real_dev, + event, ptr, vid); + + return 0; +} + +static int mlxsw_sp_netdevice_bridge_event(struct net_device *br_dev, + unsigned long event, void *ptr) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(br_dev); + struct netdev_notifier_changeupper_info *info = ptr; + struct netlink_ext_ack *extack; + struct net_device *upper_dev; + u16 proto; + + if (!mlxsw_sp) + return 0; + + extack = netdev_notifier_info_to_extack(&info->info); + + switch (event) { + case NETDEV_PRECHANGEUPPER: + upper_dev = info->upper_dev; + if (!is_vlan_dev(upper_dev) && + !netif_is_macvlan(upper_dev) && + !netif_is_l3_master(upper_dev)) { + NL_SET_ERR_MSG_MOD(extack, "Unknown upper device type"); + return -EOPNOTSUPP; + } + if (!info->linking) + break; + if (br_vlan_enabled(br_dev)) { + br_vlan_get_proto(br_dev, &proto); + if (proto == ETH_P_8021AD) { + NL_SET_ERR_MSG_MOD(extack, "Upper devices are not supported on top of an 802.1ad bridge"); + return -EOPNOTSUPP; + } + } + if (is_vlan_dev(upper_dev) && + ntohs(vlan_dev_vlan_proto(upper_dev)) != ETH_P_8021Q) { + NL_SET_ERR_MSG_MOD(extack, "VLAN uppers are only supported with 802.1q VLAN protocol"); + return -EOPNOTSUPP; + } + if (netif_is_macvlan(upper_dev) && + !mlxsw_sp_rif_exists(mlxsw_sp, br_dev)) { + NL_SET_ERR_MSG_MOD(extack, "macvlan is only supported on top of router interfaces"); + return -EOPNOTSUPP; + } + break; + case NETDEV_CHANGEUPPER: + upper_dev = info->upper_dev; + if (info->linking) + break; + if (is_vlan_dev(upper_dev)) + mlxsw_sp_rif_destroy_by_dev(mlxsw_sp, upper_dev); + if (netif_is_macvlan(upper_dev)) + mlxsw_sp_rif_macvlan_del(mlxsw_sp, upper_dev); + break; + } + + return 0; +} + +static int mlxsw_sp_netdevice_macvlan_event(struct net_device *macvlan_dev, + unsigned long event, void *ptr) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(macvlan_dev); + struct netdev_notifier_changeupper_info *info = ptr; + struct netlink_ext_ack *extack; + struct net_device *upper_dev; + + if (!mlxsw_sp || event != NETDEV_PRECHANGEUPPER) + return 0; + + extack = netdev_notifier_info_to_extack(&info->info); + upper_dev = info->upper_dev; + + if (!netif_is_l3_master(upper_dev)) { + NL_SET_ERR_MSG_MOD(extack, "Unknown upper device type"); + return -EOPNOTSUPP; + } + + return 0; +} + +static int mlxsw_sp_netdevice_vxlan_event(struct mlxsw_sp *mlxsw_sp, + struct net_device *dev, + unsigned long event, void *ptr) +{ + struct netdev_notifier_changeupper_info *cu_info; + struct netdev_notifier_info *info = ptr; + struct netlink_ext_ack *extack; + struct net_device *upper_dev; + + extack = netdev_notifier_info_to_extack(info); + + switch (event) { + case NETDEV_CHANGEUPPER: + cu_info = container_of(info, + struct netdev_notifier_changeupper_info, + info); + upper_dev = cu_info->upper_dev; + if (!netif_is_bridge_master(upper_dev)) + return 0; + if (!mlxsw_sp_lower_get(upper_dev)) + return 0; + if (!mlxsw_sp_bridge_vxlan_is_valid(upper_dev, extack)) + return -EOPNOTSUPP; + if (cu_info->linking) { + if (!netif_running(dev)) + return 0; + /* When the bridge is VLAN-aware, the VNI of the VxLAN + * device needs to be mapped to a VLAN, but at this + * point no VLANs are configured on the VxLAN device + */ + if (br_vlan_enabled(upper_dev)) + return 0; + return mlxsw_sp_bridge_vxlan_join(mlxsw_sp, upper_dev, + dev, 0, extack); + } else { + /* VLANs were already flushed, which triggered the + * necessary cleanup + */ + if (br_vlan_enabled(upper_dev)) + return 0; + mlxsw_sp_bridge_vxlan_leave(mlxsw_sp, dev); + } + break; + case NETDEV_PRE_UP: + upper_dev = netdev_master_upper_dev_get(dev); + if (!upper_dev) + return 0; + if (!netif_is_bridge_master(upper_dev)) + return 0; + if (!mlxsw_sp_lower_get(upper_dev)) + return 0; + return mlxsw_sp_bridge_vxlan_join(mlxsw_sp, upper_dev, dev, 0, + extack); + case NETDEV_DOWN: + upper_dev = netdev_master_upper_dev_get(dev); + if (!upper_dev) + return 0; + if (!netif_is_bridge_master(upper_dev)) + return 0; + if (!mlxsw_sp_lower_get(upper_dev)) + return 0; + mlxsw_sp_bridge_vxlan_leave(mlxsw_sp, dev); + break; + } + + return 0; +} + +static int mlxsw_sp_netdevice_event(struct notifier_block *nb, + unsigned long event, void *ptr) +{ + struct net_device *dev = netdev_notifier_info_to_dev(ptr); + struct mlxsw_sp_span_entry *span_entry; + struct mlxsw_sp *mlxsw_sp; + int err = 0; + + mlxsw_sp = container_of(nb, struct mlxsw_sp, netdevice_nb); + if (event == NETDEV_UNREGISTER) { + span_entry = mlxsw_sp_span_entry_find_by_port(mlxsw_sp, dev); + if (span_entry) + mlxsw_sp_span_entry_invalidate(mlxsw_sp, span_entry); + } + mlxsw_sp_span_respin(mlxsw_sp); + + if (netif_is_vxlan(dev)) + err = mlxsw_sp_netdevice_vxlan_event(mlxsw_sp, dev, event, ptr); + else if (mlxsw_sp_port_dev_check(dev)) + err = mlxsw_sp_netdevice_port_event(dev, dev, event, ptr); + else if (netif_is_lag_master(dev)) + err = mlxsw_sp_netdevice_lag_event(dev, event, ptr); + else if (is_vlan_dev(dev)) + err = mlxsw_sp_netdevice_vlan_event(dev, event, ptr); + else if (netif_is_bridge_master(dev)) + err = mlxsw_sp_netdevice_bridge_event(dev, event, ptr); + else if (netif_is_macvlan(dev)) + err = mlxsw_sp_netdevice_macvlan_event(dev, event, ptr); + + return notifier_from_errno(err); +} + +static struct notifier_block mlxsw_sp_inetaddr_valid_nb __read_mostly = { + .notifier_call = mlxsw_sp_inetaddr_valid_event, +}; + +static struct notifier_block mlxsw_sp_inet6addr_valid_nb __read_mostly = { + .notifier_call = mlxsw_sp_inet6addr_valid_event, +}; + +static const struct pci_device_id mlxsw_sp1_pci_id_table[] = { + {PCI_VDEVICE(MELLANOX, PCI_DEVICE_ID_MELLANOX_SPECTRUM), 0}, + {0, }, +}; + +static struct pci_driver mlxsw_sp1_pci_driver = { + .name = mlxsw_sp1_driver_name, + .id_table = mlxsw_sp1_pci_id_table, +}; + +static const struct pci_device_id mlxsw_sp2_pci_id_table[] = { + {PCI_VDEVICE(MELLANOX, PCI_DEVICE_ID_MELLANOX_SPECTRUM2), 0}, + {0, }, +}; + +static struct pci_driver mlxsw_sp2_pci_driver = { + .name = mlxsw_sp2_driver_name, + .id_table = mlxsw_sp2_pci_id_table, +}; + +static const struct pci_device_id mlxsw_sp3_pci_id_table[] = { + {PCI_VDEVICE(MELLANOX, PCI_DEVICE_ID_MELLANOX_SPECTRUM3), 0}, + {0, }, +}; + +static struct pci_driver mlxsw_sp3_pci_driver = { + .name = mlxsw_sp3_driver_name, + .id_table = mlxsw_sp3_pci_id_table, +}; + +static const struct pci_device_id mlxsw_sp4_pci_id_table[] = { + {PCI_VDEVICE(MELLANOX, PCI_DEVICE_ID_MELLANOX_SPECTRUM4), 0}, + {0, }, +}; + +static struct pci_driver mlxsw_sp4_pci_driver = { + .name = mlxsw_sp4_driver_name, + .id_table = mlxsw_sp4_pci_id_table, +}; + +static int __init mlxsw_sp_module_init(void) +{ + int err; + + register_inetaddr_validator_notifier(&mlxsw_sp_inetaddr_valid_nb); + register_inet6addr_validator_notifier(&mlxsw_sp_inet6addr_valid_nb); + + err = mlxsw_core_driver_register(&mlxsw_sp1_driver); + if (err) + goto err_sp1_core_driver_register; + + err = mlxsw_core_driver_register(&mlxsw_sp2_driver); + if (err) + goto err_sp2_core_driver_register; + + err = mlxsw_core_driver_register(&mlxsw_sp3_driver); + if (err) + goto err_sp3_core_driver_register; + + err = mlxsw_core_driver_register(&mlxsw_sp4_driver); + if (err) + goto err_sp4_core_driver_register; + + err = mlxsw_pci_driver_register(&mlxsw_sp1_pci_driver); + if (err) + goto err_sp1_pci_driver_register; + + err = mlxsw_pci_driver_register(&mlxsw_sp2_pci_driver); + if (err) + goto err_sp2_pci_driver_register; + + err = mlxsw_pci_driver_register(&mlxsw_sp3_pci_driver); + if (err) + goto err_sp3_pci_driver_register; + + err = mlxsw_pci_driver_register(&mlxsw_sp4_pci_driver); + if (err) + goto err_sp4_pci_driver_register; + + return 0; + +err_sp4_pci_driver_register: + mlxsw_pci_driver_unregister(&mlxsw_sp3_pci_driver); +err_sp3_pci_driver_register: + mlxsw_pci_driver_unregister(&mlxsw_sp2_pci_driver); +err_sp2_pci_driver_register: + mlxsw_pci_driver_unregister(&mlxsw_sp1_pci_driver); +err_sp1_pci_driver_register: + mlxsw_core_driver_unregister(&mlxsw_sp4_driver); +err_sp4_core_driver_register: + mlxsw_core_driver_unregister(&mlxsw_sp3_driver); +err_sp3_core_driver_register: + mlxsw_core_driver_unregister(&mlxsw_sp2_driver); +err_sp2_core_driver_register: + mlxsw_core_driver_unregister(&mlxsw_sp1_driver); +err_sp1_core_driver_register: + unregister_inet6addr_validator_notifier(&mlxsw_sp_inet6addr_valid_nb); + unregister_inetaddr_validator_notifier(&mlxsw_sp_inetaddr_valid_nb); + return err; +} + +static void __exit mlxsw_sp_module_exit(void) +{ + mlxsw_pci_driver_unregister(&mlxsw_sp4_pci_driver); + mlxsw_pci_driver_unregister(&mlxsw_sp3_pci_driver); + mlxsw_pci_driver_unregister(&mlxsw_sp2_pci_driver); + mlxsw_pci_driver_unregister(&mlxsw_sp1_pci_driver); + mlxsw_core_driver_unregister(&mlxsw_sp4_driver); + mlxsw_core_driver_unregister(&mlxsw_sp3_driver); + mlxsw_core_driver_unregister(&mlxsw_sp2_driver); + mlxsw_core_driver_unregister(&mlxsw_sp1_driver); + unregister_inet6addr_validator_notifier(&mlxsw_sp_inet6addr_valid_nb); + unregister_inetaddr_validator_notifier(&mlxsw_sp_inetaddr_valid_nb); +} + +module_init(mlxsw_sp_module_init); +module_exit(mlxsw_sp_module_exit); + +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_AUTHOR("Jiri Pirko <jiri@mellanox.com>"); +MODULE_DESCRIPTION("Mellanox Spectrum driver"); +MODULE_DEVICE_TABLE(pci, mlxsw_sp1_pci_id_table); +MODULE_DEVICE_TABLE(pci, mlxsw_sp2_pci_id_table); +MODULE_DEVICE_TABLE(pci, mlxsw_sp3_pci_id_table); +MODULE_DEVICE_TABLE(pci, mlxsw_sp4_pci_id_table); +MODULE_FIRMWARE(MLXSW_SP1_FW_FILENAME); +MODULE_FIRMWARE(MLXSW_SP2_FW_FILENAME); +MODULE_FIRMWARE(MLXSW_SP3_FW_FILENAME); +MODULE_FIRMWARE(MLXSW_SP_LINECARDS_INI_BUNDLE_FILENAME); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h new file mode 100644 index 000000000..c8ff2a6d7 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -0,0 +1,1493 @@ +/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 */ +/* Copyright (c) 2015-2018 Mellanox Technologies. All rights reserved */ + +#ifndef _MLXSW_SPECTRUM_H +#define _MLXSW_SPECTRUM_H + +#include <linux/ethtool.h> +#include <linux/types.h> +#include <linux/netdevice.h> +#include <linux/rhashtable.h> +#include <linux/bitops.h> +#include <linux/if_bridge.h> +#include <linux/if_vlan.h> +#include <linux/list.h> +#include <linux/dcbnl.h> +#include <linux/in6.h> +#include <linux/notifier.h> +#include <linux/net_namespace.h> +#include <linux/spinlock.h> +#include <net/psample.h> +#include <net/pkt_cls.h> +#include <net/red.h> +#include <net/vxlan.h> +#include <net/flow_offload.h> +#include <net/inet_ecn.h> + +#include "port.h" +#include "core.h" +#include "core_acl_flex_keys.h" +#include "core_acl_flex_actions.h" +#include "reg.h" + +#define MLXSW_SP_DEFAULT_VID (VLAN_N_VID - 1) + +#define MLXSW_SP_FID_8021D_MAX 1024 + +#define MLXSW_SP_MID_MAX 7000 + +#define MLXSW_SP_KVD_LINEAR_SIZE 98304 /* entries */ +#define MLXSW_SP_KVD_GRANULARITY 128 + +#define MLXSW_SP_RESOURCE_NAME_KVD "kvd" +#define MLXSW_SP_RESOURCE_NAME_KVD_LINEAR "linear" +#define MLXSW_SP_RESOURCE_NAME_KVD_HASH_SINGLE "hash_single" +#define MLXSW_SP_RESOURCE_NAME_KVD_HASH_DOUBLE "hash_double" +#define MLXSW_SP_RESOURCE_NAME_KVD_LINEAR_SINGLES "singles" +#define MLXSW_SP_RESOURCE_NAME_KVD_LINEAR_CHUNKS "chunks" +#define MLXSW_SP_RESOURCE_NAME_KVD_LINEAR_LARGE_CHUNKS "large_chunks" + +#define MLXSW_SP_RESOURCE_NAME_SPAN "span_agents" + +#define MLXSW_SP_RESOURCE_NAME_COUNTERS "counters" +#define MLXSW_SP_RESOURCE_NAME_COUNTERS_FLOW "flow" +#define MLXSW_SP_RESOURCE_NAME_COUNTERS_RIF "rif" + +enum mlxsw_sp_resource_id { + MLXSW_SP_RESOURCE_KVD = MLXSW_CORE_RESOURCE_MAX, + MLXSW_SP_RESOURCE_KVD_LINEAR, + MLXSW_SP_RESOURCE_KVD_HASH_SINGLE, + MLXSW_SP_RESOURCE_KVD_HASH_DOUBLE, + MLXSW_SP_RESOURCE_KVD_LINEAR_SINGLE, + MLXSW_SP_RESOURCE_KVD_LINEAR_CHUNKS, + MLXSW_SP_RESOURCE_KVD_LINEAR_LARGE_CHUNKS, + MLXSW_SP_RESOURCE_SPAN, + MLXSW_SP_RESOURCE_COUNTERS, + MLXSW_SP_RESOURCE_COUNTERS_FLOW, + MLXSW_SP_RESOURCE_COUNTERS_RIF, + MLXSW_SP_RESOURCE_GLOBAL_POLICERS, + MLXSW_SP_RESOURCE_SINGLE_RATE_POLICERS, + MLXSW_SP_RESOURCE_RIF_MAC_PROFILES, + MLXSW_SP_RESOURCE_RIFS, +}; + +struct mlxsw_sp_port; +struct mlxsw_sp_rif; +struct mlxsw_sp_span_entry; +enum mlxsw_sp_l3proto; +union mlxsw_sp_l3addr; + +struct mlxsw_sp_upper { + struct net_device *dev; + unsigned int ref_count; +}; + +enum mlxsw_sp_rif_type { + MLXSW_SP_RIF_TYPE_SUBPORT, + MLXSW_SP_RIF_TYPE_VLAN, + MLXSW_SP_RIF_TYPE_FID, + MLXSW_SP_RIF_TYPE_IPIP_LB, /* IP-in-IP loopback. */ + MLXSW_SP_RIF_TYPE_MAX, +}; + +struct mlxsw_sp_router_ops; + +extern const struct mlxsw_sp_router_ops mlxsw_sp1_router_ops; +extern const struct mlxsw_sp_router_ops mlxsw_sp2_router_ops; + +struct mlxsw_sp_switchdev_ops; + +extern const struct mlxsw_sp_switchdev_ops mlxsw_sp1_switchdev_ops; +extern const struct mlxsw_sp_switchdev_ops mlxsw_sp2_switchdev_ops; + +enum mlxsw_sp_fid_type { + MLXSW_SP_FID_TYPE_8021Q, + MLXSW_SP_FID_TYPE_8021D, + MLXSW_SP_FID_TYPE_RFID, + MLXSW_SP_FID_TYPE_DUMMY, + MLXSW_SP_FID_TYPE_MAX, +}; + +enum mlxsw_sp_nve_type { + MLXSW_SP_NVE_TYPE_VXLAN, +}; + +struct mlxsw_sp_sb; +struct mlxsw_sp_bridge; +struct mlxsw_sp_router; +struct mlxsw_sp_mr; +struct mlxsw_sp_acl; +struct mlxsw_sp_counter_pool; +struct mlxsw_sp_fid_core; +struct mlxsw_sp_kvdl; +struct mlxsw_sp_nve; +struct mlxsw_sp_kvdl_ops; +struct mlxsw_sp_mr_tcam_ops; +struct mlxsw_sp_acl_rulei_ops; +struct mlxsw_sp_acl_tcam_ops; +struct mlxsw_sp_nve_ops; +struct mlxsw_sp_sb_ops; +struct mlxsw_sp_sb_vals; +struct mlxsw_sp_port_type_speed_ops; +struct mlxsw_sp_ptp_state; +struct mlxsw_sp_ptp_ops; +struct mlxsw_sp_span_ops; +struct mlxsw_sp_qdisc_state; +struct mlxsw_sp_mall_entry; +struct mlxsw_sp_pgt; + +struct mlxsw_sp_port_mapping { + u8 module; + u8 slot_index; + u8 width; /* Number of lanes used by the port */ + u8 module_width; /* Number of lanes in the module (static) */ + u8 lane; +}; + +struct mlxsw_sp_port_mapping_events { + struct list_head queue; + spinlock_t queue_lock; /* protects queue */ + struct work_struct work; +}; + +struct mlxsw_sp_parsing { + refcount_t parsing_depth_ref; + u16 parsing_depth; + u16 vxlan_udp_dport; + struct mutex lock; /* Protects parsing configuration */ +}; + +struct mlxsw_sp { + struct mlxsw_sp_port **ports; + struct mlxsw_core *core; + const struct mlxsw_bus_info *bus_info; + unsigned char base_mac[ETH_ALEN]; + const unsigned char *mac_mask; + struct mlxsw_sp_upper *lags; + struct mlxsw_sp_port_mapping *port_mapping; + struct mlxsw_sp_port_mapping_events port_mapping_events; + struct rhashtable sample_trigger_ht; + struct mlxsw_sp_sb *sb; + struct mlxsw_sp_bridge *bridge; + struct mlxsw_sp_router *router; + struct mlxsw_sp_mr *mr; + struct mlxsw_afa *afa; + struct mlxsw_sp_acl *acl; + struct mlxsw_sp_fid_core *fid_core; + struct mlxsw_sp_policer_core *policer_core; + struct mlxsw_sp_kvdl *kvdl; + struct mlxsw_sp_nve *nve; + struct notifier_block netdevice_nb; + struct mlxsw_sp_ptp_clock *clock; + struct mlxsw_sp_ptp_state *ptp_state; + struct mlxsw_sp_counter_pool *counter_pool; + struct mlxsw_sp_span *span; + struct mlxsw_sp_trap *trap; + struct mlxsw_sp_parsing parsing; + const struct mlxsw_sp_switchdev_ops *switchdev_ops; + const struct mlxsw_sp_kvdl_ops *kvdl_ops; + const struct mlxsw_afa_ops *afa_ops; + const struct mlxsw_afk_ops *afk_ops; + const struct mlxsw_sp_mr_tcam_ops *mr_tcam_ops; + const struct mlxsw_sp_acl_rulei_ops *acl_rulei_ops; + const struct mlxsw_sp_acl_tcam_ops *acl_tcam_ops; + const struct mlxsw_sp_acl_bf_ops *acl_bf_ops; + const struct mlxsw_sp_nve_ops **nve_ops_arr; + const struct mlxsw_sp_sb_vals *sb_vals; + const struct mlxsw_sp_sb_ops *sb_ops; + const struct mlxsw_sp_port_type_speed_ops *port_type_speed_ops; + const struct mlxsw_sp_ptp_ops *ptp_ops; + const struct mlxsw_sp_span_ops *span_ops; + const struct mlxsw_sp_policer_core_ops *policer_core_ops; + const struct mlxsw_sp_trap_ops *trap_ops; + const struct mlxsw_sp_mall_ops *mall_ops; + const struct mlxsw_sp_router_ops *router_ops; + const struct mlxsw_listener *listeners; + const struct mlxsw_sp_fid_family **fid_family_arr; + size_t listeners_count; + u32 lowest_shaper_bs; + struct rhashtable ipv6_addr_ht; + struct mutex ipv6_addr_ht_lock; /* Protects ipv6_addr_ht */ + struct mlxsw_sp_pgt *pgt; + bool pgt_smpe_index_valid; +}; + +struct mlxsw_sp_ptp_ops { + struct mlxsw_sp_ptp_clock * + (*clock_init)(struct mlxsw_sp *mlxsw_sp, struct device *dev); + void (*clock_fini)(struct mlxsw_sp_ptp_clock *clock); + + struct mlxsw_sp_ptp_state *(*init)(struct mlxsw_sp *mlxsw_sp); + void (*fini)(struct mlxsw_sp_ptp_state *ptp_state); + + /* Notify a driver that a packet that might be PTP was received. Driver + * is responsible for freeing the passed-in SKB. + */ + void (*receive)(struct mlxsw_sp *mlxsw_sp, struct sk_buff *skb, + u16 local_port); + + /* Notify a driver that a timestamped packet was transmitted. Driver + * is responsible for freeing the passed-in SKB. + */ + void (*transmitted)(struct mlxsw_sp *mlxsw_sp, struct sk_buff *skb, + u16 local_port); + + int (*hwtstamp_get)(struct mlxsw_sp_port *mlxsw_sp_port, + struct hwtstamp_config *config); + int (*hwtstamp_set)(struct mlxsw_sp_port *mlxsw_sp_port, + struct hwtstamp_config *config); + void (*shaper_work)(struct work_struct *work); + int (*get_ts_info)(struct mlxsw_sp *mlxsw_sp, + struct ethtool_ts_info *info); + int (*get_stats_count)(void); + void (*get_stats_strings)(u8 **p); + void (*get_stats)(struct mlxsw_sp_port *mlxsw_sp_port, + u64 *data, int data_index); + int (*txhdr_construct)(struct mlxsw_core *mlxsw_core, + struct mlxsw_sp_port *mlxsw_sp_port, + struct sk_buff *skb, + const struct mlxsw_tx_info *tx_info); +}; + +static inline struct mlxsw_sp_upper * +mlxsw_sp_lag_get(struct mlxsw_sp *mlxsw_sp, u16 lag_id) +{ + return &mlxsw_sp->lags[lag_id]; +} + +struct mlxsw_sp_port_pcpu_stats { + u64 rx_packets; + u64 rx_bytes; + u64 tx_packets; + u64 tx_bytes; + struct u64_stats_sync syncp; + u32 tx_dropped; +}; + +enum mlxsw_sp_sample_trigger_type { + MLXSW_SP_SAMPLE_TRIGGER_TYPE_INGRESS, + MLXSW_SP_SAMPLE_TRIGGER_TYPE_EGRESS, + MLXSW_SP_SAMPLE_TRIGGER_TYPE_POLICY_ENGINE, +}; + +struct mlxsw_sp_sample_trigger { + enum mlxsw_sp_sample_trigger_type type; + u16 local_port; /* Reserved when trigger type is not ingress / egress. */ +}; + +struct mlxsw_sp_sample_params { + struct psample_group *psample_group; + u32 trunc_size; + u32 rate; + bool truncate; +}; + +struct mlxsw_sp_bridge_port; +struct mlxsw_sp_fid; + +struct mlxsw_sp_port_vlan { + struct list_head list; + struct mlxsw_sp_port *mlxsw_sp_port; + struct mlxsw_sp_fid *fid; + u16 vid; + struct mlxsw_sp_bridge_port *bridge_port; + struct list_head bridge_vlan_node; +}; + +/* No need an internal lock; At worse - miss a single periodic iteration */ +struct mlxsw_sp_port_xstats { + u64 ecn; + u64 tc_ecn[TC_MAX_QUEUE]; + u64 wred_drop[TC_MAX_QUEUE]; + u64 tail_drop[TC_MAX_QUEUE]; + u64 backlog[TC_MAX_QUEUE]; + u64 tx_bytes[IEEE_8021QAZ_MAX_TCS]; + u64 tx_packets[IEEE_8021QAZ_MAX_TCS]; +}; + +struct mlxsw_sp_ptp_port_dir_stats { + u64 packets; + u64 timestamps; +}; + +struct mlxsw_sp_ptp_port_stats { + struct mlxsw_sp_ptp_port_dir_stats rx_gcd; + struct mlxsw_sp_ptp_port_dir_stats tx_gcd; +}; + +struct mlxsw_sp_port { + struct net_device *dev; + struct mlxsw_sp_port_pcpu_stats __percpu *pcpu_stats; + struct mlxsw_sp *mlxsw_sp; + u16 local_port; + u8 lagged:1, + split:1; + u16 pvid; + u16 lag_id; + struct { + u8 tx_pause:1, + rx_pause:1, + autoneg:1; + } link; + struct { + struct ieee_ets *ets; + struct ieee_maxrate *maxrate; + struct ieee_pfc *pfc; + enum mlxsw_reg_qpts_trust_state trust_state; + } dcb; + struct mlxsw_sp_port_mapping mapping; /* mapping is constant during the + * mlxsw_sp_port lifetime, however + * the same localport can have + * different mapping. + */ + struct { + #define MLXSW_HW_STATS_UPDATE_TIME HZ + struct rtnl_link_stats64 stats; + struct mlxsw_sp_port_xstats xstats; + struct delayed_work update_dw; + } periodic_hw_stats; + struct list_head vlans_list; + struct mlxsw_sp_port_vlan *default_vlan; + struct mlxsw_sp_qdisc_state *qdisc; + unsigned acl_rule_count; + struct mlxsw_sp_flow_block *ing_flow_block; + struct mlxsw_sp_flow_block *eg_flow_block; + struct { + struct delayed_work shaper_dw; + struct hwtstamp_config hwtstamp_config; + u16 ing_types; + u16 egr_types; + struct mlxsw_sp_ptp_port_stats stats; + } ptp; + int max_mtu; + u32 max_speed; + struct mlxsw_sp_hdroom *hdroom; + u64 module_overheat_initial_val; +}; + +struct mlxsw_sp_port_type_speed_ops { + void (*from_ptys_supported_port)(struct mlxsw_sp *mlxsw_sp, + u32 ptys_eth_proto, + struct ethtool_link_ksettings *cmd); + void (*from_ptys_link)(struct mlxsw_sp *mlxsw_sp, u32 ptys_eth_proto, + unsigned long *mode); + u32 (*from_ptys_speed)(struct mlxsw_sp *mlxsw_sp, u32 ptys_eth_proto); + void (*from_ptys_link_mode)(struct mlxsw_sp *mlxsw_sp, + bool carrier_ok, u32 ptys_eth_proto, + struct ethtool_link_ksettings *cmd); + int (*ptys_max_speed)(struct mlxsw_sp_port *mlxsw_sp_port, u32 *p_max_speed); + u32 (*to_ptys_advert_link)(struct mlxsw_sp *mlxsw_sp, + const struct ethtool_link_ksettings *cmd); + u32 (*to_ptys_speed_lanes)(struct mlxsw_sp *mlxsw_sp, u8 width, + const struct ethtool_link_ksettings *cmd); + void (*reg_ptys_eth_pack)(struct mlxsw_sp *mlxsw_sp, char *payload, + u16 local_port, u32 proto_admin, bool autoneg); + void (*reg_ptys_eth_unpack)(struct mlxsw_sp *mlxsw_sp, char *payload, + u32 *p_eth_proto_cap, + u32 *p_eth_proto_admin, + u32 *p_eth_proto_oper); + u32 (*ptys_proto_cap_masked_get)(u32 eth_proto_cap); +}; + +struct mlxsw_sp_ports_bitmap { + unsigned long *bitmap; + unsigned int nbits; +}; + +static inline int +mlxsw_sp_port_bitmap_init(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_ports_bitmap *ports_bm) +{ + unsigned int nbits = mlxsw_core_max_ports(mlxsw_sp->core); + + ports_bm->nbits = nbits; + ports_bm->bitmap = bitmap_zalloc(nbits, GFP_KERNEL); + if (!ports_bm->bitmap) + return -ENOMEM; + + return 0; +} + +static inline void +mlxsw_sp_port_bitmap_fini(struct mlxsw_sp_ports_bitmap *ports_bm) +{ + bitmap_free(ports_bm->bitmap); +} + +static inline u8 mlxsw_sp_tunnel_ecn_decap(u8 outer_ecn, u8 inner_ecn, + bool *trap_en) +{ + bool set_ce = false; + + *trap_en = !!__INET_ECN_decapsulate(outer_ecn, inner_ecn, &set_ce); + if (set_ce) + return INET_ECN_CE; + else if (outer_ecn == INET_ECN_ECT_1 && inner_ecn == INET_ECN_ECT_0) + return INET_ECN_ECT_1; + else + return inner_ecn; +} + +static inline struct net_device * +mlxsw_sp_bridge_vxlan_dev_find(struct net_device *br_dev) +{ + struct net_device *dev; + struct list_head *iter; + + netdev_for_each_lower_dev(br_dev, dev, iter) { + if (netif_is_vxlan(dev)) + return dev; + } + + return NULL; +} + +static inline bool mlxsw_sp_bridge_has_vxlan(struct net_device *br_dev) +{ + return !!mlxsw_sp_bridge_vxlan_dev_find(br_dev); +} + +static inline int +mlxsw_sp_vxlan_mapped_vid(const struct net_device *vxlan_dev, u16 *p_vid) +{ + struct bridge_vlan_info vinfo; + u16 vid = 0; + int err; + + err = br_vlan_get_pvid(vxlan_dev, &vid); + if (err || !vid) + goto out; + + err = br_vlan_get_info(vxlan_dev, vid, &vinfo); + if (err || !(vinfo.flags & BRIDGE_VLAN_INFO_UNTAGGED)) + vid = 0; + +out: + *p_vid = vid; + return err; +} + +static inline bool +mlxsw_sp_port_is_pause_en(const struct mlxsw_sp_port *mlxsw_sp_port) +{ + return mlxsw_sp_port->link.tx_pause || mlxsw_sp_port->link.rx_pause; +} + +static inline struct mlxsw_sp_port * +mlxsw_sp_port_lagged_get(struct mlxsw_sp *mlxsw_sp, u16 lag_id, u8 port_index) +{ + struct mlxsw_sp_port *mlxsw_sp_port; + u16 local_port; + + local_port = mlxsw_core_lag_mapping_get(mlxsw_sp->core, + lag_id, port_index); + mlxsw_sp_port = mlxsw_sp->ports[local_port]; + return mlxsw_sp_port && mlxsw_sp_port->lagged ? mlxsw_sp_port : NULL; +} + +static inline struct mlxsw_sp_port_vlan * +mlxsw_sp_port_vlan_find_by_vid(const struct mlxsw_sp_port *mlxsw_sp_port, + u16 vid) +{ + struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan; + + list_for_each_entry(mlxsw_sp_port_vlan, &mlxsw_sp_port->vlans_list, + list) { + if (mlxsw_sp_port_vlan->vid == vid) + return mlxsw_sp_port_vlan; + } + + return NULL; +} + +enum mlxsw_sp_flood_type { + MLXSW_SP_FLOOD_TYPE_UC, + MLXSW_SP_FLOOD_TYPE_BC, + MLXSW_SP_FLOOD_TYPE_MC, +}; + +int mlxsw_sp_port_get_stats_raw(struct net_device *dev, int grp, + int prio, char *ppcnt_pl); +int mlxsw_sp_port_admin_status_set(struct mlxsw_sp_port *mlxsw_sp_port, + bool is_up); +int +mlxsw_sp_port_vlan_classification_set(struct mlxsw_sp_port *mlxsw_sp_port, + bool is_8021ad_tagged, + bool is_8021q_tagged); +static inline bool +mlxsw_sp_local_port_is_valid(struct mlxsw_sp *mlxsw_sp, u16 local_port) +{ + unsigned int max_ports = mlxsw_core_max_ports(mlxsw_sp->core); + + return local_port < max_ports && local_port; +} + +/* spectrum_buffers.c */ +struct mlxsw_sp_hdroom_prio { + /* Number of port buffer associated with this priority. This is the + * actually configured value. + */ + u8 buf_idx; + /* Value of buf_idx deduced from the DCB ETS configuration. */ + u8 ets_buf_idx; + /* Value of buf_idx taken from the dcbnl_setbuffer configuration. */ + u8 set_buf_idx; + bool lossy; +}; + +struct mlxsw_sp_hdroom_buf { + u32 thres_cells; + u32 size_cells; + /* Size requirement form dcbnl_setbuffer. */ + u32 set_size_cells; + bool lossy; +}; + +enum mlxsw_sp_hdroom_mode { + MLXSW_SP_HDROOM_MODE_DCB, + MLXSW_SP_HDROOM_MODE_TC, +}; + +#define MLXSW_SP_PB_COUNT 10 + +struct mlxsw_sp_hdroom { + enum mlxsw_sp_hdroom_mode mode; + + struct { + struct mlxsw_sp_hdroom_prio prio[IEEE_8021Q_MAX_PRIORITIES]; + } prios; + struct { + struct mlxsw_sp_hdroom_buf buf[MLXSW_SP_PB_COUNT]; + } bufs; + struct { + /* Size actually configured for the internal buffer. Equal to + * reserve when internal buffer is enabled. + */ + u32 size_cells; + /* Space reserved in the headroom for the internal buffer. Port + * buffers are not allowed to grow into this space. + */ + u32 reserve_cells; + bool enable; + } int_buf; + int delay_bytes; + int mtu; +}; + +int mlxsw_sp_buffers_init(struct mlxsw_sp *mlxsw_sp); +void mlxsw_sp_buffers_fini(struct mlxsw_sp *mlxsw_sp); +int mlxsw_sp_port_buffers_init(struct mlxsw_sp_port *mlxsw_sp_port); +void mlxsw_sp_port_buffers_fini(struct mlxsw_sp_port *mlxsw_sp_port); +int mlxsw_sp_sb_pool_get(struct mlxsw_core *mlxsw_core, + unsigned int sb_index, u16 pool_index, + struct devlink_sb_pool_info *pool_info); +int mlxsw_sp_sb_pool_set(struct mlxsw_core *mlxsw_core, + unsigned int sb_index, u16 pool_index, u32 size, + enum devlink_sb_threshold_type threshold_type, + struct netlink_ext_ack *extack); +int mlxsw_sp_sb_port_pool_get(struct mlxsw_core_port *mlxsw_core_port, + unsigned int sb_index, u16 pool_index, + u32 *p_threshold); +int mlxsw_sp_sb_port_pool_set(struct mlxsw_core_port *mlxsw_core_port, + unsigned int sb_index, u16 pool_index, + u32 threshold, struct netlink_ext_ack *extack); +int mlxsw_sp_sb_tc_pool_bind_get(struct mlxsw_core_port *mlxsw_core_port, + unsigned int sb_index, u16 tc_index, + enum devlink_sb_pool_type pool_type, + u16 *p_pool_index, u32 *p_threshold); +int mlxsw_sp_sb_tc_pool_bind_set(struct mlxsw_core_port *mlxsw_core_port, + unsigned int sb_index, u16 tc_index, + enum devlink_sb_pool_type pool_type, + u16 pool_index, u32 threshold, + struct netlink_ext_ack *extack); +int mlxsw_sp_sb_occ_snapshot(struct mlxsw_core *mlxsw_core, + unsigned int sb_index); +int mlxsw_sp_sb_occ_max_clear(struct mlxsw_core *mlxsw_core, + unsigned int sb_index); +int mlxsw_sp_sb_occ_port_pool_get(struct mlxsw_core_port *mlxsw_core_port, + unsigned int sb_index, u16 pool_index, + u32 *p_cur, u32 *p_max); +int mlxsw_sp_sb_occ_tc_port_bind_get(struct mlxsw_core_port *mlxsw_core_port, + unsigned int sb_index, u16 tc_index, + enum devlink_sb_pool_type pool_type, + u32 *p_cur, u32 *p_max); +u32 mlxsw_sp_cells_bytes(const struct mlxsw_sp *mlxsw_sp, u32 cells); +u32 mlxsw_sp_bytes_cells(const struct mlxsw_sp *mlxsw_sp, u32 bytes); +void mlxsw_sp_hdroom_prios_reset_buf_idx(struct mlxsw_sp_hdroom *hdroom); +void mlxsw_sp_hdroom_bufs_reset_lossiness(struct mlxsw_sp_hdroom *hdroom); +void mlxsw_sp_hdroom_bufs_reset_sizes(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_hdroom *hdroom); +int mlxsw_sp_hdroom_configure(struct mlxsw_sp_port *mlxsw_sp_port, + const struct mlxsw_sp_hdroom *hdroom); +struct mlxsw_sp_sample_params * +mlxsw_sp_sample_trigger_params_lookup(struct mlxsw_sp *mlxsw_sp, + const struct mlxsw_sp_sample_trigger *trigger); +int +mlxsw_sp_sample_trigger_params_set(struct mlxsw_sp *mlxsw_sp, + const struct mlxsw_sp_sample_trigger *trigger, + const struct mlxsw_sp_sample_params *params, + struct netlink_ext_ack *extack); +void +mlxsw_sp_sample_trigger_params_unset(struct mlxsw_sp *mlxsw_sp, + const struct mlxsw_sp_sample_trigger *trigger); +int mlxsw_sp_ipv6_addr_kvdl_index_get(struct mlxsw_sp *mlxsw_sp, + const struct in6_addr *addr6, + u32 *p_kvdl_index); +void +mlxsw_sp_ipv6_addr_put(struct mlxsw_sp *mlxsw_sp, const struct in6_addr *addr6); + +extern const struct mlxsw_sp_sb_vals mlxsw_sp1_sb_vals; +extern const struct mlxsw_sp_sb_vals mlxsw_sp2_sb_vals; + +extern const struct mlxsw_sp_sb_ops mlxsw_sp1_sb_ops; +extern const struct mlxsw_sp_sb_ops mlxsw_sp2_sb_ops; +extern const struct mlxsw_sp_sb_ops mlxsw_sp3_sb_ops; + +/* spectrum_switchdev.c */ +int mlxsw_sp_switchdev_init(struct mlxsw_sp *mlxsw_sp); +void mlxsw_sp_switchdev_fini(struct mlxsw_sp *mlxsw_sp); +int mlxsw_sp_rif_fdb_op(struct mlxsw_sp *mlxsw_sp, const char *mac, u16 fid, + bool adding); +void +mlxsw_sp_port_vlan_bridge_leave(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan); +int mlxsw_sp_port_bridge_join(struct mlxsw_sp_port *mlxsw_sp_port, + struct net_device *brport_dev, + struct net_device *br_dev, + struct netlink_ext_ack *extack); +void mlxsw_sp_port_bridge_leave(struct mlxsw_sp_port *mlxsw_sp_port, + struct net_device *brport_dev, + struct net_device *br_dev); +bool mlxsw_sp_bridge_device_is_offloaded(const struct mlxsw_sp *mlxsw_sp, + const struct net_device *br_dev); +int mlxsw_sp_bridge_vxlan_join(struct mlxsw_sp *mlxsw_sp, + const struct net_device *br_dev, + const struct net_device *vxlan_dev, u16 vid, + struct netlink_ext_ack *extack); +void mlxsw_sp_bridge_vxlan_leave(struct mlxsw_sp *mlxsw_sp, + const struct net_device *vxlan_dev); +extern struct notifier_block mlxsw_sp_switchdev_notifier; + +/* spectrum.c */ +void mlxsw_sp_rx_listener_no_mark_func(struct sk_buff *skb, + u16 local_port, void *priv); +void mlxsw_sp_ptp_receive(struct mlxsw_sp *mlxsw_sp, struct sk_buff *skb, + u16 local_port); +int mlxsw_sp_port_speed_get(struct mlxsw_sp_port *mlxsw_sp_port, u32 *speed); +int mlxsw_sp_port_ets_set(struct mlxsw_sp_port *mlxsw_sp_port, + enum mlxsw_reg_qeec_hr hr, u8 index, u8 next_index, + bool dwrr, u8 dwrr_weight); +int mlxsw_sp_port_prio_tc_set(struct mlxsw_sp_port *mlxsw_sp_port, + u8 switch_prio, u8 tclass); +int mlxsw_sp_port_ets_maxrate_set(struct mlxsw_sp_port *mlxsw_sp_port, + enum mlxsw_reg_qeec_hr hr, u8 index, + u8 next_index, u32 maxrate, u8 burst_size); +enum mlxsw_reg_spms_state mlxsw_sp_stp_spms_state(u8 stp_state); +int mlxsw_sp_port_vid_stp_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid, + u8 state); +int mlxsw_sp_port_vp_mode_set(struct mlxsw_sp_port *mlxsw_sp_port, bool enable); +int mlxsw_sp_port_vid_learning_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid, + bool learn_enable); +int mlxsw_sp_ethtype_to_sver_type(u16 ethtype, u8 *p_sver_type); +int mlxsw_sp_port_egress_ethtype_set(struct mlxsw_sp_port *mlxsw_sp_port, + u16 ethtype); +int mlxsw_sp_port_pvid_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid, + u16 ethtype); +struct mlxsw_sp_port_vlan * +mlxsw_sp_port_vlan_create(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid); +void mlxsw_sp_port_vlan_destroy(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan); +int mlxsw_sp_port_vlan_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid_begin, + u16 vid_end, bool is_member, bool untagged); +int mlxsw_sp_flow_counter_get(struct mlxsw_sp *mlxsw_sp, + unsigned int counter_index, u64 *packets, + u64 *bytes); +int mlxsw_sp_flow_counter_alloc(struct mlxsw_sp *mlxsw_sp, + unsigned int *p_counter_index); +void mlxsw_sp_flow_counter_free(struct mlxsw_sp *mlxsw_sp, + unsigned int counter_index); +void mlxsw_sp_txhdr_construct(struct sk_buff *skb, + const struct mlxsw_tx_info *tx_info); +int mlxsw_sp_txhdr_ptp_data_construct(struct mlxsw_core *mlxsw_core, + struct mlxsw_sp_port *mlxsw_sp_port, + struct sk_buff *skb, + const struct mlxsw_tx_info *tx_info); +bool mlxsw_sp_port_dev_check(const struct net_device *dev); +struct mlxsw_sp *mlxsw_sp_lower_get(struct net_device *dev); +struct mlxsw_sp_port *mlxsw_sp_port_dev_lower_find(struct net_device *dev); +struct mlxsw_sp_port *mlxsw_sp_port_lower_dev_hold(struct net_device *dev); +void mlxsw_sp_port_dev_put(struct mlxsw_sp_port *mlxsw_sp_port); +struct mlxsw_sp_port *mlxsw_sp_port_dev_lower_find_rcu(struct net_device *dev); +int mlxsw_sp_parsing_depth_inc(struct mlxsw_sp *mlxsw_sp); +void mlxsw_sp_parsing_depth_dec(struct mlxsw_sp *mlxsw_sp); +int mlxsw_sp_parsing_vxlan_udp_dport_set(struct mlxsw_sp *mlxsw_sp, + __be16 udp_dport); + +/* spectrum_dcb.c */ +#ifdef CONFIG_MLXSW_SPECTRUM_DCB +int mlxsw_sp_port_dcb_init(struct mlxsw_sp_port *mlxsw_sp_port); +void mlxsw_sp_port_dcb_fini(struct mlxsw_sp_port *mlxsw_sp_port); +#else +static inline int mlxsw_sp_port_dcb_init(struct mlxsw_sp_port *mlxsw_sp_port) +{ + return 0; +} +static inline void mlxsw_sp_port_dcb_fini(struct mlxsw_sp_port *mlxsw_sp_port) +{} +#endif + +/* spectrum_router.c */ +enum mlxsw_sp_l3proto { + MLXSW_SP_L3_PROTO_IPV4, + MLXSW_SP_L3_PROTO_IPV6, +#define MLXSW_SP_L3_PROTO_MAX (MLXSW_SP_L3_PROTO_IPV6 + 1) +}; + +union mlxsw_sp_l3addr { + __be32 addr4; + struct in6_addr addr6; +}; + +u16 mlxsw_sp_rif_index(const struct mlxsw_sp_rif *rif); +int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp, + struct netlink_ext_ack *extack); +void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp); +void mlxsw_sp_rif_macvlan_del(struct mlxsw_sp *mlxsw_sp, + const struct net_device *macvlan_dev); +int mlxsw_sp_inetaddr_valid_event(struct notifier_block *unused, + unsigned long event, void *ptr); +int mlxsw_sp_inet6addr_valid_event(struct notifier_block *unused, + unsigned long event, void *ptr); +int +mlxsw_sp_port_vlan_router_join(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan, + struct net_device *l3_dev, + struct netlink_ext_ack *extack); +void +mlxsw_sp_port_vlan_router_leave(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan); +void mlxsw_sp_rif_destroy_by_dev(struct mlxsw_sp *mlxsw_sp, + struct net_device *dev); +bool mlxsw_sp_rif_exists(struct mlxsw_sp *mlxsw_sp, + const struct net_device *dev); +u16 mlxsw_sp_rif_vid(struct mlxsw_sp *mlxsw_sp, const struct net_device *dev); +u16 mlxsw_sp_router_port(const struct mlxsw_sp *mlxsw_sp); +int mlxsw_sp_router_nve_promote_decap(struct mlxsw_sp *mlxsw_sp, u32 ul_tb_id, + enum mlxsw_sp_l3proto ul_proto, + const union mlxsw_sp_l3addr *ul_sip, + u32 tunnel_index); +void mlxsw_sp_router_nve_demote_decap(struct mlxsw_sp *mlxsw_sp, u32 ul_tb_id, + enum mlxsw_sp_l3proto ul_proto, + const union mlxsw_sp_l3addr *ul_sip); +int mlxsw_sp_router_tb_id_vr_id(struct mlxsw_sp *mlxsw_sp, u32 tb_id, + u16 *vr_id); +int mlxsw_sp_router_ul_rif_get(struct mlxsw_sp *mlxsw_sp, u32 ul_tb_id, + u16 *ul_rif_index); +void mlxsw_sp_router_ul_rif_put(struct mlxsw_sp *mlxsw_sp, u16 ul_rif_index); + +/* spectrum_kvdl.c */ +enum mlxsw_sp_kvdl_entry_type { + MLXSW_SP_KVDL_ENTRY_TYPE_ADJ, + MLXSW_SP_KVDL_ENTRY_TYPE_ACTSET, + MLXSW_SP_KVDL_ENTRY_TYPE_PBS, + MLXSW_SP_KVDL_ENTRY_TYPE_MCRIGR, + MLXSW_SP_KVDL_ENTRY_TYPE_IPV6_ADDRESS, + MLXSW_SP_KVDL_ENTRY_TYPE_TNUMT, +}; + +static inline unsigned int +mlxsw_sp_kvdl_entry_size(enum mlxsw_sp_kvdl_entry_type type) +{ + switch (type) { + case MLXSW_SP_KVDL_ENTRY_TYPE_ADJ: + case MLXSW_SP_KVDL_ENTRY_TYPE_ACTSET: + case MLXSW_SP_KVDL_ENTRY_TYPE_PBS: + case MLXSW_SP_KVDL_ENTRY_TYPE_MCRIGR: + case MLXSW_SP_KVDL_ENTRY_TYPE_IPV6_ADDRESS: + case MLXSW_SP_KVDL_ENTRY_TYPE_TNUMT: + default: + return 1; + } +} + +struct mlxsw_sp_kvdl_ops { + size_t priv_size; + int (*init)(struct mlxsw_sp *mlxsw_sp, void *priv); + void (*fini)(struct mlxsw_sp *mlxsw_sp, void *priv); + int (*alloc)(struct mlxsw_sp *mlxsw_sp, void *priv, + enum mlxsw_sp_kvdl_entry_type type, + unsigned int entry_count, u32 *p_entry_index); + void (*free)(struct mlxsw_sp *mlxsw_sp, void *priv, + enum mlxsw_sp_kvdl_entry_type type, + unsigned int entry_count, int entry_index); + int (*alloc_size_query)(struct mlxsw_sp *mlxsw_sp, void *priv, + enum mlxsw_sp_kvdl_entry_type type, + unsigned int entry_count, + unsigned int *p_alloc_count); + int (*resources_register)(struct mlxsw_sp *mlxsw_sp, void *priv); +}; + +int mlxsw_sp_kvdl_init(struct mlxsw_sp *mlxsw_sp); +void mlxsw_sp_kvdl_fini(struct mlxsw_sp *mlxsw_sp); +int mlxsw_sp_kvdl_alloc(struct mlxsw_sp *mlxsw_sp, + enum mlxsw_sp_kvdl_entry_type type, + unsigned int entry_count, u32 *p_entry_index); +void mlxsw_sp_kvdl_free(struct mlxsw_sp *mlxsw_sp, + enum mlxsw_sp_kvdl_entry_type type, + unsigned int entry_count, int entry_index); +int mlxsw_sp_kvdl_alloc_count_query(struct mlxsw_sp *mlxsw_sp, + enum mlxsw_sp_kvdl_entry_type type, + unsigned int entry_count, + unsigned int *p_alloc_count); + +/* spectrum1_kvdl.c */ +extern const struct mlxsw_sp_kvdl_ops mlxsw_sp1_kvdl_ops; +int mlxsw_sp1_kvdl_resources_register(struct mlxsw_core *mlxsw_core); + +/* spectrum2_kvdl.c */ +extern const struct mlxsw_sp_kvdl_ops mlxsw_sp2_kvdl_ops; + +enum mlxsw_sp_acl_mangle_field { + MLXSW_SP_ACL_MANGLE_FIELD_IP_DSFIELD, + MLXSW_SP_ACL_MANGLE_FIELD_IP_DSCP, + MLXSW_SP_ACL_MANGLE_FIELD_IP_ECN, + MLXSW_SP_ACL_MANGLE_FIELD_IP_SPORT, + MLXSW_SP_ACL_MANGLE_FIELD_IP_DPORT, + MLXSW_SP_ACL_MANGLE_FIELD_IP4_SIP, + MLXSW_SP_ACL_MANGLE_FIELD_IP4_DIP, + MLXSW_SP_ACL_MANGLE_FIELD_IP6_SIP_1, + MLXSW_SP_ACL_MANGLE_FIELD_IP6_SIP_2, + MLXSW_SP_ACL_MANGLE_FIELD_IP6_SIP_3, + MLXSW_SP_ACL_MANGLE_FIELD_IP6_SIP_4, + MLXSW_SP_ACL_MANGLE_FIELD_IP6_DIP_1, + MLXSW_SP_ACL_MANGLE_FIELD_IP6_DIP_2, + MLXSW_SP_ACL_MANGLE_FIELD_IP6_DIP_3, + MLXSW_SP_ACL_MANGLE_FIELD_IP6_DIP_4, +}; + +struct mlxsw_sp_acl_rule_info { + unsigned int priority; + struct mlxsw_afk_element_values values; + struct mlxsw_afa_block *act_block; + u8 action_created:1, + ingress_bind_blocker:1, + egress_bind_blocker:1, + counter_valid:1, + policer_index_valid:1, + ipv6_valid:1; + unsigned int counter_index; + u16 policer_index; + struct { + u32 prev_val; + enum mlxsw_sp_acl_mangle_field prev_field; + } ipv6; +}; + +/* spectrum_flow.c */ +struct mlxsw_sp_flow_block { + struct list_head binding_list; + struct { + struct list_head list; + unsigned int min_prio; + unsigned int max_prio; + } mall; + struct mlxsw_sp_acl_ruleset *ruleset_zero; + struct mlxsw_sp *mlxsw_sp; + unsigned int rule_count; + unsigned int disable_count; + unsigned int ingress_blocker_rule_count; + unsigned int egress_blocker_rule_count; + unsigned int ingress_binding_count; + unsigned int egress_binding_count; + struct net *net; +}; + +struct mlxsw_sp_flow_block_binding { + struct list_head list; + struct mlxsw_sp_port *mlxsw_sp_port; + bool ingress; +}; + +static inline struct mlxsw_sp * +mlxsw_sp_flow_block_mlxsw_sp(struct mlxsw_sp_flow_block *block) +{ + return block->mlxsw_sp; +} + +static inline unsigned int +mlxsw_sp_flow_block_rule_count(const struct mlxsw_sp_flow_block *block) +{ + return block ? block->rule_count : 0; +} + +static inline void +mlxsw_sp_flow_block_disable_inc(struct mlxsw_sp_flow_block *block) +{ + if (block) + block->disable_count++; +} + +static inline void +mlxsw_sp_flow_block_disable_dec(struct mlxsw_sp_flow_block *block) +{ + if (block) + block->disable_count--; +} + +static inline bool +mlxsw_sp_flow_block_disabled(const struct mlxsw_sp_flow_block *block) +{ + return block->disable_count; +} + +static inline bool +mlxsw_sp_flow_block_is_egress_bound(const struct mlxsw_sp_flow_block *block) +{ + return block->egress_binding_count; +} + +static inline bool +mlxsw_sp_flow_block_is_ingress_bound(const struct mlxsw_sp_flow_block *block) +{ + return block->ingress_binding_count; +} + +static inline bool +mlxsw_sp_flow_block_is_mixed_bound(const struct mlxsw_sp_flow_block *block) +{ + return block->ingress_binding_count && block->egress_binding_count; +} + +struct mlxsw_sp_flow_block *mlxsw_sp_flow_block_create(struct mlxsw_sp *mlxsw_sp, + struct net *net); +void mlxsw_sp_flow_block_destroy(struct mlxsw_sp_flow_block *block); +int mlxsw_sp_setup_tc_block_clsact(struct mlxsw_sp_port *mlxsw_sp_port, + struct flow_block_offload *f, + bool ingress); + +/* spectrum_acl.c */ +struct mlxsw_sp_acl_ruleset; + +enum mlxsw_sp_acl_profile { + MLXSW_SP_ACL_PROFILE_FLOWER, + MLXSW_SP_ACL_PROFILE_MR, +}; + +struct mlxsw_afk *mlxsw_sp_acl_afk(struct mlxsw_sp_acl *acl); + +int mlxsw_sp_acl_ruleset_bind(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_flow_block *block, + struct mlxsw_sp_flow_block_binding *binding); +void mlxsw_sp_acl_ruleset_unbind(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_flow_block *block, + struct mlxsw_sp_flow_block_binding *binding); +struct mlxsw_sp_acl_ruleset * +mlxsw_sp_acl_ruleset_lookup(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_flow_block *block, u32 chain_index, + enum mlxsw_sp_acl_profile profile); +struct mlxsw_sp_acl_ruleset * +mlxsw_sp_acl_ruleset_get(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_flow_block *block, u32 chain_index, + enum mlxsw_sp_acl_profile profile, + struct mlxsw_afk_element_usage *tmplt_elusage); +void mlxsw_sp_acl_ruleset_put(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_ruleset *ruleset); +u16 mlxsw_sp_acl_ruleset_group_id(struct mlxsw_sp_acl_ruleset *ruleset); +void mlxsw_sp_acl_ruleset_prio_get(struct mlxsw_sp_acl_ruleset *ruleset, + unsigned int *p_min_prio, + unsigned int *p_max_prio); + +struct mlxsw_sp_acl_rule_info * +mlxsw_sp_acl_rulei_create(struct mlxsw_sp_acl *acl, + struct mlxsw_afa_block *afa_block); +void mlxsw_sp_acl_rulei_destroy(struct mlxsw_sp_acl_rule_info *rulei); +int mlxsw_sp_acl_rulei_commit(struct mlxsw_sp_acl_rule_info *rulei); +void mlxsw_sp_acl_rulei_priority(struct mlxsw_sp_acl_rule_info *rulei, + unsigned int priority); +void mlxsw_sp_acl_rulei_keymask_u32(struct mlxsw_sp_acl_rule_info *rulei, + enum mlxsw_afk_element element, + u32 key_value, u32 mask_value); +void mlxsw_sp_acl_rulei_keymask_buf(struct mlxsw_sp_acl_rule_info *rulei, + enum mlxsw_afk_element element, + const char *key_value, + const char *mask_value, unsigned int len); +int mlxsw_sp_acl_rulei_act_continue(struct mlxsw_sp_acl_rule_info *rulei); +int mlxsw_sp_acl_rulei_act_jump(struct mlxsw_sp_acl_rule_info *rulei, + u16 group_id); +int mlxsw_sp_acl_rulei_act_terminate(struct mlxsw_sp_acl_rule_info *rulei); +int mlxsw_sp_acl_rulei_act_drop(struct mlxsw_sp_acl_rule_info *rulei, + bool ingress, + const struct flow_action_cookie *fa_cookie, + struct netlink_ext_ack *extack); +int mlxsw_sp_acl_rulei_act_trap(struct mlxsw_sp_acl_rule_info *rulei); +int mlxsw_sp_acl_rulei_act_mirror(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_rule_info *rulei, + struct mlxsw_sp_flow_block *block, + struct net_device *out_dev, + struct netlink_ext_ack *extack); +int mlxsw_sp_acl_rulei_act_fwd(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_rule_info *rulei, + struct net_device *out_dev, + struct netlink_ext_ack *extack); +int mlxsw_sp_acl_rulei_act_vlan(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_rule_info *rulei, + u32 action, u16 vid, u16 proto, u8 prio, + struct netlink_ext_ack *extack); +int mlxsw_sp_acl_rulei_act_priority(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_rule_info *rulei, + u32 prio, struct netlink_ext_ack *extack); +int mlxsw_sp_acl_rulei_act_mangle(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_rule_info *rulei, + enum flow_action_mangle_base htype, + u32 offset, u32 mask, u32 val, + struct netlink_ext_ack *extack); +int mlxsw_sp_acl_rulei_act_police(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_rule_info *rulei, + u32 index, u64 rate_bytes_ps, + u32 burst, struct netlink_ext_ack *extack); +int mlxsw_sp_acl_rulei_act_count(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_rule_info *rulei, + struct netlink_ext_ack *extack); +int mlxsw_sp_acl_rulei_act_fid_set(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_rule_info *rulei, + u16 fid, struct netlink_ext_ack *extack); +int mlxsw_sp_acl_rulei_act_sample(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_rule_info *rulei, + struct mlxsw_sp_flow_block *block, + struct psample_group *psample_group, u32 rate, + u32 trunc_size, bool truncate, + struct netlink_ext_ack *extack); + +struct mlxsw_sp_acl_rule; + +struct mlxsw_sp_acl_rule * +mlxsw_sp_acl_rule_create(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_ruleset *ruleset, + unsigned long cookie, + struct mlxsw_afa_block *afa_block, + struct netlink_ext_ack *extack); +void mlxsw_sp_acl_rule_destroy(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_rule *rule); +int mlxsw_sp_acl_rule_add(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_rule *rule); +void mlxsw_sp_acl_rule_del(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_rule *rule); +int mlxsw_sp_acl_rule_action_replace(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_rule *rule, + struct mlxsw_afa_block *afa_block); +struct mlxsw_sp_acl_rule * +mlxsw_sp_acl_rule_lookup(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_ruleset *ruleset, + unsigned long cookie); +struct mlxsw_sp_acl_rule_info * +mlxsw_sp_acl_rule_rulei(struct mlxsw_sp_acl_rule *rule); +int mlxsw_sp_acl_rule_get_stats(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_rule *rule, + u64 *packets, u64 *bytes, u64 *drops, + u64 *last_use, + enum flow_action_hw_stats *used_hw_stats); + +struct mlxsw_sp_fid *mlxsw_sp_acl_dummy_fid(struct mlxsw_sp *mlxsw_sp); + +static inline const struct flow_action_cookie * +mlxsw_sp_acl_act_cookie_lookup(struct mlxsw_sp *mlxsw_sp, u32 cookie_index) +{ + return mlxsw_afa_cookie_lookup(mlxsw_sp->afa, cookie_index); +} + +int mlxsw_sp_acl_init(struct mlxsw_sp *mlxsw_sp); +void mlxsw_sp_acl_fini(struct mlxsw_sp *mlxsw_sp); +u32 mlxsw_sp_acl_region_rehash_intrvl_get(struct mlxsw_sp *mlxsw_sp); +int mlxsw_sp_acl_region_rehash_intrvl_set(struct mlxsw_sp *mlxsw_sp, u32 val); + +struct mlxsw_sp_acl_mangle_action; + +struct mlxsw_sp_acl_rulei_ops { + int (*act_mangle_field)(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_rule_info *rulei, + struct mlxsw_sp_acl_mangle_action *mact, u32 val, + struct netlink_ext_ack *extack); +}; + +extern struct mlxsw_sp_acl_rulei_ops mlxsw_sp1_acl_rulei_ops; +extern struct mlxsw_sp_acl_rulei_ops mlxsw_sp2_acl_rulei_ops; + +/* spectrum_acl_tcam.c */ +struct mlxsw_sp_acl_tcam; +struct mlxsw_sp_acl_tcam_region; + +struct mlxsw_sp_acl_tcam_ops { + enum mlxsw_reg_ptar_key_type key_type; + size_t priv_size; + int (*init)(struct mlxsw_sp *mlxsw_sp, void *priv, + struct mlxsw_sp_acl_tcam *tcam); + void (*fini)(struct mlxsw_sp *mlxsw_sp, void *priv); + size_t region_priv_size; + int (*region_init)(struct mlxsw_sp *mlxsw_sp, void *region_priv, + void *tcam_priv, + struct mlxsw_sp_acl_tcam_region *region, + void *hints_priv); + void (*region_fini)(struct mlxsw_sp *mlxsw_sp, void *region_priv); + int (*region_associate)(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_region *region); + void * (*region_rehash_hints_get)(void *region_priv); + void (*region_rehash_hints_put)(void *hints_priv); + size_t chunk_priv_size; + void (*chunk_init)(void *region_priv, void *chunk_priv, + unsigned int priority); + void (*chunk_fini)(void *chunk_priv); + size_t entry_priv_size; + int (*entry_add)(struct mlxsw_sp *mlxsw_sp, + void *region_priv, void *chunk_priv, + void *entry_priv, + struct mlxsw_sp_acl_rule_info *rulei); + void (*entry_del)(struct mlxsw_sp *mlxsw_sp, + void *region_priv, void *chunk_priv, + void *entry_priv); + int (*entry_action_replace)(struct mlxsw_sp *mlxsw_sp, + void *region_priv, void *entry_priv, + struct mlxsw_sp_acl_rule_info *rulei); + int (*entry_activity_get)(struct mlxsw_sp *mlxsw_sp, + void *region_priv, void *entry_priv, + bool *activity); +}; + +/* spectrum1_acl_tcam.c */ +extern const struct mlxsw_sp_acl_tcam_ops mlxsw_sp1_acl_tcam_ops; + +/* spectrum2_acl_tcam.c */ +extern const struct mlxsw_sp_acl_tcam_ops mlxsw_sp2_acl_tcam_ops; + +/* spectrum_acl_flex_actions.c */ +extern const struct mlxsw_afa_ops mlxsw_sp1_act_afa_ops; +extern const struct mlxsw_afa_ops mlxsw_sp2_act_afa_ops; + +/* spectrum_acl_flex_keys.c */ +extern const struct mlxsw_afk_ops mlxsw_sp1_afk_ops; +extern const struct mlxsw_afk_ops mlxsw_sp2_afk_ops; +extern const struct mlxsw_afk_ops mlxsw_sp4_afk_ops; + +/* spectrum_acl_bloom_filter.c */ +extern const struct mlxsw_sp_acl_bf_ops mlxsw_sp2_acl_bf_ops; +extern const struct mlxsw_sp_acl_bf_ops mlxsw_sp4_acl_bf_ops; + +/* spectrum_matchall.c */ +struct mlxsw_sp_mall_ops { + int (*sample_add)(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_mall_entry *mall_entry, + struct netlink_ext_ack *extack); + void (*sample_del)(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_mall_entry *mall_entry); +}; + +extern const struct mlxsw_sp_mall_ops mlxsw_sp1_mall_ops; +extern const struct mlxsw_sp_mall_ops mlxsw_sp2_mall_ops; + +enum mlxsw_sp_mall_action_type { + MLXSW_SP_MALL_ACTION_TYPE_MIRROR, + MLXSW_SP_MALL_ACTION_TYPE_SAMPLE, + MLXSW_SP_MALL_ACTION_TYPE_TRAP, +}; + +struct mlxsw_sp_mall_mirror_entry { + const struct net_device *to_dev; + int span_id; +}; + +struct mlxsw_sp_mall_trap_entry { + int span_id; +}; + +struct mlxsw_sp_mall_sample_entry { + struct mlxsw_sp_sample_params params; + int span_id; /* Relevant for Spectrum-2 onwards. */ +}; + +struct mlxsw_sp_mall_entry { + struct list_head list; + unsigned long cookie; + unsigned int priority; + enum mlxsw_sp_mall_action_type type; + bool ingress; + union { + struct mlxsw_sp_mall_mirror_entry mirror; + struct mlxsw_sp_mall_trap_entry trap; + struct mlxsw_sp_mall_sample_entry sample; + }; + struct rcu_head rcu; +}; + +int mlxsw_sp_mall_replace(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_flow_block *block, + struct tc_cls_matchall_offload *f); +void mlxsw_sp_mall_destroy(struct mlxsw_sp_flow_block *block, + struct tc_cls_matchall_offload *f); +int mlxsw_sp_mall_port_bind(struct mlxsw_sp_flow_block *block, + struct mlxsw_sp_port *mlxsw_sp_port, + struct netlink_ext_ack *extack); +void mlxsw_sp_mall_port_unbind(struct mlxsw_sp_flow_block *block, + struct mlxsw_sp_port *mlxsw_sp_port); +int mlxsw_sp_mall_prio_get(struct mlxsw_sp_flow_block *block, u32 chain_index, + unsigned int *p_min_prio, unsigned int *p_max_prio); + +/* spectrum_flower.c */ +int mlxsw_sp_flower_replace(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_flow_block *block, + struct flow_cls_offload *f); +void mlxsw_sp_flower_destroy(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_flow_block *block, + struct flow_cls_offload *f); +int mlxsw_sp_flower_stats(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_flow_block *block, + struct flow_cls_offload *f); +int mlxsw_sp_flower_tmplt_create(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_flow_block *block, + struct flow_cls_offload *f); +void mlxsw_sp_flower_tmplt_destroy(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_flow_block *block, + struct flow_cls_offload *f); +int mlxsw_sp_flower_prio_get(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_flow_block *block, + u32 chain_index, unsigned int *p_min_prio, + unsigned int *p_max_prio); + +/* spectrum_qdisc.c */ +int mlxsw_sp_tc_qdisc_init(struct mlxsw_sp_port *mlxsw_sp_port); +void mlxsw_sp_tc_qdisc_fini(struct mlxsw_sp_port *mlxsw_sp_port); +int mlxsw_sp_setup_tc_red(struct mlxsw_sp_port *mlxsw_sp_port, + struct tc_red_qopt_offload *p); +int mlxsw_sp_setup_tc_prio(struct mlxsw_sp_port *mlxsw_sp_port, + struct tc_prio_qopt_offload *p); +int mlxsw_sp_setup_tc_ets(struct mlxsw_sp_port *mlxsw_sp_port, + struct tc_ets_qopt_offload *p); +int mlxsw_sp_setup_tc_tbf(struct mlxsw_sp_port *mlxsw_sp_port, + struct tc_tbf_qopt_offload *p); +int mlxsw_sp_setup_tc_fifo(struct mlxsw_sp_port *mlxsw_sp_port, + struct tc_fifo_qopt_offload *p); +int mlxsw_sp_setup_tc_block_qevent_early_drop(struct mlxsw_sp_port *mlxsw_sp_port, + struct flow_block_offload *f); +int mlxsw_sp_setup_tc_block_qevent_mark(struct mlxsw_sp_port *mlxsw_sp_port, + struct flow_block_offload *f); + +/* spectrum_fid.c */ +bool mlxsw_sp_fid_is_dummy(struct mlxsw_sp *mlxsw_sp, u16 fid_index); +struct mlxsw_sp_fid *mlxsw_sp_fid_lookup_by_index(struct mlxsw_sp *mlxsw_sp, + u16 fid_index); +int mlxsw_sp_fid_nve_ifindex(const struct mlxsw_sp_fid *fid, int *nve_ifindex); +int mlxsw_sp_fid_nve_type(const struct mlxsw_sp_fid *fid, + enum mlxsw_sp_nve_type *p_type); +struct mlxsw_sp_fid *mlxsw_sp_fid_lookup_by_vni(struct mlxsw_sp *mlxsw_sp, + __be32 vni); +int mlxsw_sp_fid_vni(const struct mlxsw_sp_fid *fid, __be32 *vni); +int mlxsw_sp_fid_nve_flood_index_set(struct mlxsw_sp_fid *fid, + u32 nve_flood_index); +void mlxsw_sp_fid_nve_flood_index_clear(struct mlxsw_sp_fid *fid); +bool mlxsw_sp_fid_nve_flood_index_is_set(const struct mlxsw_sp_fid *fid); +int mlxsw_sp_fid_vni_set(struct mlxsw_sp_fid *fid, enum mlxsw_sp_nve_type type, + __be32 vni, int nve_ifindex); +void mlxsw_sp_fid_vni_clear(struct mlxsw_sp_fid *fid); +bool mlxsw_sp_fid_vni_is_set(const struct mlxsw_sp_fid *fid); +void mlxsw_sp_fid_fdb_clear_offload(const struct mlxsw_sp_fid *fid, + const struct net_device *nve_dev); +int mlxsw_sp_fid_flood_set(struct mlxsw_sp_fid *fid, + enum mlxsw_sp_flood_type packet_type, u16 local_port, + bool member); +int mlxsw_sp_fid_port_vid_map(struct mlxsw_sp_fid *fid, + struct mlxsw_sp_port *mlxsw_sp_port, u16 vid); +void mlxsw_sp_fid_port_vid_unmap(struct mlxsw_sp_fid *fid, + struct mlxsw_sp_port *mlxsw_sp_port, u16 vid); +u16 mlxsw_sp_fid_index(const struct mlxsw_sp_fid *fid); +enum mlxsw_sp_fid_type mlxsw_sp_fid_type(const struct mlxsw_sp_fid *fid); +int mlxsw_sp_fid_rif_set(struct mlxsw_sp_fid *fid, struct mlxsw_sp_rif *rif); +void mlxsw_sp_fid_rif_unset(struct mlxsw_sp_fid *fid); +struct mlxsw_sp_rif *mlxsw_sp_fid_rif(const struct mlxsw_sp_fid *fid); +enum mlxsw_sp_rif_type +mlxsw_sp_fid_type_rif_type(const struct mlxsw_sp *mlxsw_sp, + enum mlxsw_sp_fid_type type); +u16 mlxsw_sp_fid_8021q_vid(const struct mlxsw_sp_fid *fid); +struct mlxsw_sp_fid *mlxsw_sp_fid_8021q_get(struct mlxsw_sp *mlxsw_sp, u16 vid); +struct mlxsw_sp_fid *mlxsw_sp_fid_8021d_get(struct mlxsw_sp *mlxsw_sp, + int br_ifindex); +struct mlxsw_sp_fid *mlxsw_sp_fid_8021q_lookup(struct mlxsw_sp *mlxsw_sp, + u16 vid); +struct mlxsw_sp_fid *mlxsw_sp_fid_8021d_lookup(struct mlxsw_sp *mlxsw_sp, + int br_ifindex); +struct mlxsw_sp_fid *mlxsw_sp_fid_rfid_get(struct mlxsw_sp *mlxsw_sp, + u16 rif_index); +struct mlxsw_sp_fid *mlxsw_sp_fid_dummy_get(struct mlxsw_sp *mlxsw_sp); +void mlxsw_sp_fid_put(struct mlxsw_sp_fid *fid); +int mlxsw_sp_port_fids_init(struct mlxsw_sp_port *mlxsw_sp_port); +void mlxsw_sp_port_fids_fini(struct mlxsw_sp_port *mlxsw_sp_port); +int mlxsw_sp_fids_init(struct mlxsw_sp *mlxsw_sp); +void mlxsw_sp_fids_fini(struct mlxsw_sp *mlxsw_sp); + +extern const struct mlxsw_sp_fid_family *mlxsw_sp1_fid_family_arr[]; +extern const struct mlxsw_sp_fid_family *mlxsw_sp2_fid_family_arr[]; + +/* spectrum_mr.c */ +enum mlxsw_sp_mr_route_prio { + MLXSW_SP_MR_ROUTE_PRIO_SG, + MLXSW_SP_MR_ROUTE_PRIO_STARG, + MLXSW_SP_MR_ROUTE_PRIO_CATCHALL, + __MLXSW_SP_MR_ROUTE_PRIO_MAX +}; + +#define MLXSW_SP_MR_ROUTE_PRIO_MAX (__MLXSW_SP_MR_ROUTE_PRIO_MAX - 1) + +struct mlxsw_sp_mr_route_key; + +struct mlxsw_sp_mr_tcam_ops { + size_t priv_size; + int (*init)(struct mlxsw_sp *mlxsw_sp, void *priv); + void (*fini)(void *priv); + size_t route_priv_size; + int (*route_create)(struct mlxsw_sp *mlxsw_sp, void *priv, + void *route_priv, + struct mlxsw_sp_mr_route_key *key, + struct mlxsw_afa_block *afa_block, + enum mlxsw_sp_mr_route_prio prio); + void (*route_destroy)(struct mlxsw_sp *mlxsw_sp, void *priv, + void *route_priv, + struct mlxsw_sp_mr_route_key *key); + int (*route_update)(struct mlxsw_sp *mlxsw_sp, void *route_priv, + struct mlxsw_sp_mr_route_key *key, + struct mlxsw_afa_block *afa_block); +}; + +/* spectrum1_mr_tcam.c */ +extern const struct mlxsw_sp_mr_tcam_ops mlxsw_sp1_mr_tcam_ops; + +/* spectrum2_mr_tcam.c */ +extern const struct mlxsw_sp_mr_tcam_ops mlxsw_sp2_mr_tcam_ops; + +/* spectrum_nve.c */ +struct mlxsw_sp_nve_params { + enum mlxsw_sp_nve_type type; + __be32 vni; + const struct net_device *dev; + u16 ethertype; +}; + +extern const struct mlxsw_sp_nve_ops *mlxsw_sp1_nve_ops_arr[]; +extern const struct mlxsw_sp_nve_ops *mlxsw_sp2_nve_ops_arr[]; + +int mlxsw_sp_nve_learned_ip_resolve(struct mlxsw_sp *mlxsw_sp, u32 uip, + enum mlxsw_sp_l3proto proto, + union mlxsw_sp_l3addr *addr); +int mlxsw_sp_nve_flood_ip_add(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fid *fid, + enum mlxsw_sp_l3proto proto, + union mlxsw_sp_l3addr *addr); +void mlxsw_sp_nve_flood_ip_del(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fid *fid, + enum mlxsw_sp_l3proto proto, + union mlxsw_sp_l3addr *addr); +int mlxsw_sp_nve_ipv6_addr_kvdl_set(struct mlxsw_sp *mlxsw_sp, + const struct in6_addr *addr6, + u32 *p_kvdl_index); +void mlxsw_sp_nve_ipv6_addr_kvdl_unset(struct mlxsw_sp *mlxsw_sp, + const struct in6_addr *addr6); +int +mlxsw_sp_nve_ipv6_addr_map_replace(struct mlxsw_sp *mlxsw_sp, const char *mac, + u16 fid_index, + const struct in6_addr *new_addr6); +void mlxsw_sp_nve_ipv6_addr_map_del(struct mlxsw_sp *mlxsw_sp, const char *mac, + u16 fid_index); +int mlxsw_sp_nve_fid_enable(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fid *fid, + struct mlxsw_sp_nve_params *params, + struct netlink_ext_ack *extack); +void mlxsw_sp_nve_fid_disable(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fid *fid); +int mlxsw_sp_port_nve_init(struct mlxsw_sp_port *mlxsw_sp_port); +void mlxsw_sp_port_nve_fini(struct mlxsw_sp_port *mlxsw_sp_port); +int mlxsw_sp_nve_init(struct mlxsw_sp *mlxsw_sp); +void mlxsw_sp_nve_fini(struct mlxsw_sp *mlxsw_sp); + +/* spectrum_nve_vxlan.c */ +int mlxsw_sp_nve_inc_parsing_depth_get(struct mlxsw_sp *mlxsw_sp); +void mlxsw_sp_nve_inc_parsing_depth_put(struct mlxsw_sp *mlxsw_sp); + +/* spectrum_trap.c */ +int mlxsw_sp_devlink_traps_init(struct mlxsw_sp *mlxsw_sp); +void mlxsw_sp_devlink_traps_fini(struct mlxsw_sp *mlxsw_sp); +int mlxsw_sp_trap_init(struct mlxsw_core *mlxsw_core, + const struct devlink_trap *trap, void *trap_ctx); +void mlxsw_sp_trap_fini(struct mlxsw_core *mlxsw_core, + const struct devlink_trap *trap, void *trap_ctx); +int mlxsw_sp_trap_action_set(struct mlxsw_core *mlxsw_core, + const struct devlink_trap *trap, + enum devlink_trap_action action, + struct netlink_ext_ack *extack); +int mlxsw_sp_trap_group_init(struct mlxsw_core *mlxsw_core, + const struct devlink_trap_group *group); +int mlxsw_sp_trap_group_set(struct mlxsw_core *mlxsw_core, + const struct devlink_trap_group *group, + const struct devlink_trap_policer *policer, + struct netlink_ext_ack *extack); +int +mlxsw_sp_trap_policer_init(struct mlxsw_core *mlxsw_core, + const struct devlink_trap_policer *policer); +void mlxsw_sp_trap_policer_fini(struct mlxsw_core *mlxsw_core, + const struct devlink_trap_policer *policer); +int +mlxsw_sp_trap_policer_set(struct mlxsw_core *mlxsw_core, + const struct devlink_trap_policer *policer, + u64 rate, u64 burst, struct netlink_ext_ack *extack); +int +mlxsw_sp_trap_policer_counter_get(struct mlxsw_core *mlxsw_core, + const struct devlink_trap_policer *policer, + u64 *p_drops); +int mlxsw_sp_trap_group_policer_hw_id_get(struct mlxsw_sp *mlxsw_sp, u16 id, + bool *p_enabled, u16 *p_hw_id); + +static inline struct net *mlxsw_sp_net(struct mlxsw_sp *mlxsw_sp) +{ + return mlxsw_core_net(mlxsw_sp->core); +} + +/* spectrum_ethtool.c */ +extern const struct ethtool_ops mlxsw_sp_port_ethtool_ops; +extern const struct mlxsw_sp_port_type_speed_ops mlxsw_sp1_port_type_speed_ops; +extern const struct mlxsw_sp_port_type_speed_ops mlxsw_sp2_port_type_speed_ops; + +/* spectrum_policer.c */ +extern const struct mlxsw_sp_policer_core_ops mlxsw_sp1_policer_core_ops; +extern const struct mlxsw_sp_policer_core_ops mlxsw_sp2_policer_core_ops; + +enum mlxsw_sp_policer_type { + MLXSW_SP_POLICER_TYPE_SINGLE_RATE, + + __MLXSW_SP_POLICER_TYPE_MAX, + MLXSW_SP_POLICER_TYPE_MAX = __MLXSW_SP_POLICER_TYPE_MAX - 1, +}; + +struct mlxsw_sp_policer_params { + u64 rate; + u64 burst; + bool bytes; +}; + +int mlxsw_sp_policer_add(struct mlxsw_sp *mlxsw_sp, + enum mlxsw_sp_policer_type type, + const struct mlxsw_sp_policer_params *params, + struct netlink_ext_ack *extack, u16 *p_policer_index); +void mlxsw_sp_policer_del(struct mlxsw_sp *mlxsw_sp, + enum mlxsw_sp_policer_type type, + u16 policer_index); +int mlxsw_sp_policer_drops_counter_get(struct mlxsw_sp *mlxsw_sp, + enum mlxsw_sp_policer_type type, + u16 policer_index, u64 *p_drops); +int mlxsw_sp_policers_init(struct mlxsw_sp *mlxsw_sp); +void mlxsw_sp_policers_fini(struct mlxsw_sp *mlxsw_sp); +int mlxsw_sp_policer_resources_register(struct mlxsw_core *mlxsw_core); + +/* spectrum_pgt.c */ +int mlxsw_sp_pgt_mid_alloc(struct mlxsw_sp *mlxsw_sp, u16 *p_mid); +void mlxsw_sp_pgt_mid_free(struct mlxsw_sp *mlxsw_sp, u16 mid_base); +int mlxsw_sp_pgt_mid_alloc_range(struct mlxsw_sp *mlxsw_sp, u16 mid_base, + u16 count); +void mlxsw_sp_pgt_mid_free_range(struct mlxsw_sp *mlxsw_sp, u16 mid_base, + u16 count); +int mlxsw_sp_pgt_entry_port_set(struct mlxsw_sp *mlxsw_sp, u16 mid, + u16 smpe, u16 local_port, bool member); +int mlxsw_sp_pgt_init(struct mlxsw_sp *mlxsw_sp); +void mlxsw_sp_pgt_fini(struct mlxsw_sp *mlxsw_sp); + +#endif diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum1_acl_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum1_acl_tcam.c new file mode 100644 index 000000000..3a636f753 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum1_acl_tcam.c @@ -0,0 +1,254 @@ +// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 +/* Copyright (c) 2017-2018 Mellanox Technologies. All rights reserved */ + +#include <linux/kernel.h> +#include <linux/slab.h> + +#include "reg.h" +#include "core.h" +#include "spectrum.h" +#include "spectrum_acl_tcam.h" + +struct mlxsw_sp1_acl_tcam_region { + struct mlxsw_sp_acl_ctcam_region cregion; + struct mlxsw_sp_acl_tcam_region *region; + struct { + struct mlxsw_sp_acl_ctcam_chunk cchunk; + struct mlxsw_sp_acl_ctcam_entry centry; + struct mlxsw_sp_acl_rule_info *rulei; + } catchall; +}; + +struct mlxsw_sp1_acl_tcam_chunk { + struct mlxsw_sp_acl_ctcam_chunk cchunk; +}; + +struct mlxsw_sp1_acl_tcam_entry { + struct mlxsw_sp_acl_ctcam_entry centry; +}; + +static int +mlxsw_sp1_acl_ctcam_region_entry_insert(struct mlxsw_sp_acl_ctcam_region *cregion, + struct mlxsw_sp_acl_ctcam_entry *centry, + const char *mask) +{ + return 0; +} + +static void +mlxsw_sp1_acl_ctcam_region_entry_remove(struct mlxsw_sp_acl_ctcam_region *cregion, + struct mlxsw_sp_acl_ctcam_entry *centry) +{ +} + +static const struct mlxsw_sp_acl_ctcam_region_ops +mlxsw_sp1_acl_ctcam_region_ops = { + .entry_insert = mlxsw_sp1_acl_ctcam_region_entry_insert, + .entry_remove = mlxsw_sp1_acl_ctcam_region_entry_remove, +}; + +static int mlxsw_sp1_acl_tcam_init(struct mlxsw_sp *mlxsw_sp, void *priv, + struct mlxsw_sp_acl_tcam *tcam) +{ + return 0; +} + +static void mlxsw_sp1_acl_tcam_fini(struct mlxsw_sp *mlxsw_sp, void *priv) +{ +} + +static int +mlxsw_sp1_acl_ctcam_region_catchall_add(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp1_acl_tcam_region *region) +{ + struct mlxsw_sp_acl_rule_info *rulei; + int err; + + mlxsw_sp_acl_ctcam_chunk_init(®ion->cregion, + ®ion->catchall.cchunk, + MLXSW_SP_ACL_TCAM_CATCHALL_PRIO); + rulei = mlxsw_sp_acl_rulei_create(mlxsw_sp->acl, NULL); + if (IS_ERR(rulei)) { + err = PTR_ERR(rulei); + goto err_rulei_create; + } + err = mlxsw_sp_acl_rulei_act_continue(rulei); + if (WARN_ON(err)) + goto err_rulei_act_continue; + err = mlxsw_sp_acl_rulei_commit(rulei); + if (err) + goto err_rulei_commit; + err = mlxsw_sp_acl_ctcam_entry_add(mlxsw_sp, ®ion->cregion, + ®ion->catchall.cchunk, + ®ion->catchall.centry, + rulei, false); + if (err) + goto err_entry_add; + region->catchall.rulei = rulei; + return 0; + +err_entry_add: +err_rulei_commit: +err_rulei_act_continue: + mlxsw_sp_acl_rulei_destroy(rulei); +err_rulei_create: + mlxsw_sp_acl_ctcam_chunk_fini(®ion->catchall.cchunk); + return err; +} + +static void +mlxsw_sp1_acl_ctcam_region_catchall_del(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp1_acl_tcam_region *region) +{ + struct mlxsw_sp_acl_rule_info *rulei = region->catchall.rulei; + + mlxsw_sp_acl_ctcam_entry_del(mlxsw_sp, ®ion->cregion, + ®ion->catchall.cchunk, + ®ion->catchall.centry); + mlxsw_sp_acl_rulei_destroy(rulei); + mlxsw_sp_acl_ctcam_chunk_fini(®ion->catchall.cchunk); +} + +static int +mlxsw_sp1_acl_tcam_region_init(struct mlxsw_sp *mlxsw_sp, void *region_priv, + void *tcam_priv, + struct mlxsw_sp_acl_tcam_region *_region, + void *hints_priv) +{ + struct mlxsw_sp1_acl_tcam_region *region = region_priv; + int err; + + err = mlxsw_sp_acl_ctcam_region_init(mlxsw_sp, ®ion->cregion, + _region, + &mlxsw_sp1_acl_ctcam_region_ops); + if (err) + return err; + err = mlxsw_sp1_acl_ctcam_region_catchall_add(mlxsw_sp, region); + if (err) + goto err_catchall_add; + region->region = _region; + return 0; + +err_catchall_add: + mlxsw_sp_acl_ctcam_region_fini(®ion->cregion); + return err; +} + +static void +mlxsw_sp1_acl_tcam_region_fini(struct mlxsw_sp *mlxsw_sp, void *region_priv) +{ + struct mlxsw_sp1_acl_tcam_region *region = region_priv; + + mlxsw_sp1_acl_ctcam_region_catchall_del(mlxsw_sp, region); + mlxsw_sp_acl_ctcam_region_fini(®ion->cregion); +} + +static int +mlxsw_sp1_acl_tcam_region_associate(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_region *region) +{ + return 0; +} + +static void mlxsw_sp1_acl_tcam_chunk_init(void *region_priv, void *chunk_priv, + unsigned int priority) +{ + struct mlxsw_sp1_acl_tcam_region *region = region_priv; + struct mlxsw_sp1_acl_tcam_chunk *chunk = chunk_priv; + + mlxsw_sp_acl_ctcam_chunk_init(®ion->cregion, &chunk->cchunk, + priority); +} + +static void mlxsw_sp1_acl_tcam_chunk_fini(void *chunk_priv) +{ + struct mlxsw_sp1_acl_tcam_chunk *chunk = chunk_priv; + + mlxsw_sp_acl_ctcam_chunk_fini(&chunk->cchunk); +} + +static int mlxsw_sp1_acl_tcam_entry_add(struct mlxsw_sp *mlxsw_sp, + void *region_priv, void *chunk_priv, + void *entry_priv, + struct mlxsw_sp_acl_rule_info *rulei) +{ + struct mlxsw_sp1_acl_tcam_region *region = region_priv; + struct mlxsw_sp1_acl_tcam_chunk *chunk = chunk_priv; + struct mlxsw_sp1_acl_tcam_entry *entry = entry_priv; + + return mlxsw_sp_acl_ctcam_entry_add(mlxsw_sp, ®ion->cregion, + &chunk->cchunk, &entry->centry, + rulei, false); +} + +static void mlxsw_sp1_acl_tcam_entry_del(struct mlxsw_sp *mlxsw_sp, + void *region_priv, void *chunk_priv, + void *entry_priv) +{ + struct mlxsw_sp1_acl_tcam_region *region = region_priv; + struct mlxsw_sp1_acl_tcam_chunk *chunk = chunk_priv; + struct mlxsw_sp1_acl_tcam_entry *entry = entry_priv; + + mlxsw_sp_acl_ctcam_entry_del(mlxsw_sp, ®ion->cregion, + &chunk->cchunk, &entry->centry); +} + +static int +mlxsw_sp1_acl_tcam_entry_action_replace(struct mlxsw_sp *mlxsw_sp, + void *region_priv, void *entry_priv, + struct mlxsw_sp_acl_rule_info *rulei) +{ + return -EOPNOTSUPP; +} + +static int +mlxsw_sp1_acl_tcam_region_entry_activity_get(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_region *_region, + unsigned int offset, + bool *activity) +{ + char ptce2_pl[MLXSW_REG_PTCE2_LEN]; + int err; + + mlxsw_reg_ptce2_pack(ptce2_pl, true, MLXSW_REG_PTCE2_OP_QUERY_CLEAR_ON_READ, + _region->tcam_region_info, offset, 0); + err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ptce2), ptce2_pl); + if (err) + return err; + *activity = mlxsw_reg_ptce2_a_get(ptce2_pl); + return 0; +} + +static int +mlxsw_sp1_acl_tcam_entry_activity_get(struct mlxsw_sp *mlxsw_sp, + void *region_priv, void *entry_priv, + bool *activity) +{ + struct mlxsw_sp1_acl_tcam_region *region = region_priv; + struct mlxsw_sp1_acl_tcam_entry *entry = entry_priv; + unsigned int offset; + + offset = mlxsw_sp_acl_ctcam_entry_offset(&entry->centry); + return mlxsw_sp1_acl_tcam_region_entry_activity_get(mlxsw_sp, + region->region, + offset, activity); +} + +const struct mlxsw_sp_acl_tcam_ops mlxsw_sp1_acl_tcam_ops = { + .key_type = MLXSW_REG_PTAR_KEY_TYPE_FLEX, + .priv_size = 0, + .init = mlxsw_sp1_acl_tcam_init, + .fini = mlxsw_sp1_acl_tcam_fini, + .region_priv_size = sizeof(struct mlxsw_sp1_acl_tcam_region), + .region_init = mlxsw_sp1_acl_tcam_region_init, + .region_fini = mlxsw_sp1_acl_tcam_region_fini, + .region_associate = mlxsw_sp1_acl_tcam_region_associate, + .chunk_priv_size = sizeof(struct mlxsw_sp1_acl_tcam_chunk), + .chunk_init = mlxsw_sp1_acl_tcam_chunk_init, + .chunk_fini = mlxsw_sp1_acl_tcam_chunk_fini, + .entry_priv_size = sizeof(struct mlxsw_sp1_acl_tcam_entry), + .entry_add = mlxsw_sp1_acl_tcam_entry_add, + .entry_del = mlxsw_sp1_acl_tcam_entry_del, + .entry_action_replace = mlxsw_sp1_acl_tcam_entry_action_replace, + .entry_activity_get = mlxsw_sp1_acl_tcam_entry_activity_get, +}; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum1_kvdl.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum1_kvdl.c new file mode 100644 index 000000000..1e3fc9893 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum1_kvdl.c @@ -0,0 +1,427 @@ +// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 +/* Copyright (c) 2018 Mellanox Technologies. All rights reserved */ + +#include <linux/kernel.h> +#include <linux/bitops.h> + +#include "spectrum.h" + +#define MLXSW_SP1_KVDL_SINGLE_BASE 0 +#define MLXSW_SP1_KVDL_SINGLE_SIZE 16384 +#define MLXSW_SP1_KVDL_SINGLE_END \ + (MLXSW_SP1_KVDL_SINGLE_SIZE + MLXSW_SP1_KVDL_SINGLE_BASE - 1) + +#define MLXSW_SP1_KVDL_CHUNKS_BASE \ + (MLXSW_SP1_KVDL_SINGLE_BASE + MLXSW_SP1_KVDL_SINGLE_SIZE) +#define MLXSW_SP1_KVDL_CHUNKS_SIZE 49152 +#define MLXSW_SP1_KVDL_CHUNKS_END \ + (MLXSW_SP1_KVDL_CHUNKS_SIZE + MLXSW_SP1_KVDL_CHUNKS_BASE - 1) + +#define MLXSW_SP1_KVDL_LARGE_CHUNKS_BASE \ + (MLXSW_SP1_KVDL_CHUNKS_BASE + MLXSW_SP1_KVDL_CHUNKS_SIZE) +#define MLXSW_SP1_KVDL_LARGE_CHUNKS_SIZE \ + (MLXSW_SP_KVD_LINEAR_SIZE - MLXSW_SP1_KVDL_LARGE_CHUNKS_BASE) +#define MLXSW_SP1_KVDL_LARGE_CHUNKS_END \ + (MLXSW_SP1_KVDL_LARGE_CHUNKS_SIZE + MLXSW_SP1_KVDL_LARGE_CHUNKS_BASE - 1) + +#define MLXSW_SP1_KVDL_SINGLE_ALLOC_SIZE 1 +#define MLXSW_SP1_KVDL_CHUNKS_ALLOC_SIZE 32 +#define MLXSW_SP1_KVDL_LARGE_CHUNKS_ALLOC_SIZE 512 + +struct mlxsw_sp1_kvdl_part_info { + unsigned int part_index; + unsigned int start_index; + unsigned int end_index; + unsigned int alloc_size; + enum mlxsw_sp_resource_id resource_id; +}; + +enum mlxsw_sp1_kvdl_part_id { + MLXSW_SP1_KVDL_PART_ID_SINGLE, + MLXSW_SP1_KVDL_PART_ID_CHUNKS, + MLXSW_SP1_KVDL_PART_ID_LARGE_CHUNKS, +}; + +#define MLXSW_SP1_KVDL_PART_INFO(id) \ +[MLXSW_SP1_KVDL_PART_ID_##id] = { \ + .start_index = MLXSW_SP1_KVDL_##id##_BASE, \ + .end_index = MLXSW_SP1_KVDL_##id##_END, \ + .alloc_size = MLXSW_SP1_KVDL_##id##_ALLOC_SIZE, \ + .resource_id = MLXSW_SP_RESOURCE_KVD_LINEAR_##id, \ +} + +static const struct mlxsw_sp1_kvdl_part_info mlxsw_sp1_kvdl_parts_info[] = { + MLXSW_SP1_KVDL_PART_INFO(SINGLE), + MLXSW_SP1_KVDL_PART_INFO(CHUNKS), + MLXSW_SP1_KVDL_PART_INFO(LARGE_CHUNKS), +}; + +#define MLXSW_SP1_KVDL_PARTS_INFO_LEN ARRAY_SIZE(mlxsw_sp1_kvdl_parts_info) + +struct mlxsw_sp1_kvdl_part { + struct mlxsw_sp1_kvdl_part_info info; + unsigned long usage[]; /* Entries */ +}; + +struct mlxsw_sp1_kvdl { + struct mlxsw_sp1_kvdl_part *parts[MLXSW_SP1_KVDL_PARTS_INFO_LEN]; +}; + +static struct mlxsw_sp1_kvdl_part * +mlxsw_sp1_kvdl_alloc_size_part(struct mlxsw_sp1_kvdl *kvdl, + unsigned int alloc_size) +{ + struct mlxsw_sp1_kvdl_part *part, *min_part = NULL; + int i; + + for (i = 0; i < MLXSW_SP1_KVDL_PARTS_INFO_LEN; i++) { + part = kvdl->parts[i]; + if (alloc_size <= part->info.alloc_size && + (!min_part || + part->info.alloc_size <= min_part->info.alloc_size)) + min_part = part; + } + + return min_part ?: ERR_PTR(-ENOBUFS); +} + +static struct mlxsw_sp1_kvdl_part * +mlxsw_sp1_kvdl_index_part(struct mlxsw_sp1_kvdl *kvdl, u32 kvdl_index) +{ + struct mlxsw_sp1_kvdl_part *part; + int i; + + for (i = 0; i < MLXSW_SP1_KVDL_PARTS_INFO_LEN; i++) { + part = kvdl->parts[i]; + if (kvdl_index >= part->info.start_index && + kvdl_index <= part->info.end_index) + return part; + } + + return ERR_PTR(-EINVAL); +} + +static u32 +mlxsw_sp1_kvdl_to_kvdl_index(const struct mlxsw_sp1_kvdl_part_info *info, + unsigned int entry_index) +{ + return info->start_index + entry_index * info->alloc_size; +} + +static unsigned int +mlxsw_sp1_kvdl_to_entry_index(const struct mlxsw_sp1_kvdl_part_info *info, + u32 kvdl_index) +{ + return (kvdl_index - info->start_index) / info->alloc_size; +} + +static int mlxsw_sp1_kvdl_part_alloc(struct mlxsw_sp1_kvdl_part *part, + u32 *p_kvdl_index) +{ + const struct mlxsw_sp1_kvdl_part_info *info = &part->info; + unsigned int entry_index, nr_entries; + + nr_entries = (info->end_index - info->start_index + 1) / + info->alloc_size; + entry_index = find_first_zero_bit(part->usage, nr_entries); + if (entry_index == nr_entries) + return -ENOBUFS; + __set_bit(entry_index, part->usage); + + *p_kvdl_index = mlxsw_sp1_kvdl_to_kvdl_index(info, entry_index); + + return 0; +} + +static void mlxsw_sp1_kvdl_part_free(struct mlxsw_sp1_kvdl_part *part, + u32 kvdl_index) +{ + const struct mlxsw_sp1_kvdl_part_info *info = &part->info; + unsigned int entry_index; + + entry_index = mlxsw_sp1_kvdl_to_entry_index(info, kvdl_index); + __clear_bit(entry_index, part->usage); +} + +static int mlxsw_sp1_kvdl_alloc(struct mlxsw_sp *mlxsw_sp, void *priv, + enum mlxsw_sp_kvdl_entry_type type, + unsigned int entry_count, + u32 *p_entry_index) +{ + struct mlxsw_sp1_kvdl *kvdl = priv; + struct mlxsw_sp1_kvdl_part *part; + + /* Find partition with smallest allocation size satisfying the + * requested size. + */ + part = mlxsw_sp1_kvdl_alloc_size_part(kvdl, entry_count); + if (IS_ERR(part)) + return PTR_ERR(part); + + return mlxsw_sp1_kvdl_part_alloc(part, p_entry_index); +} + +static void mlxsw_sp1_kvdl_free(struct mlxsw_sp *mlxsw_sp, void *priv, + enum mlxsw_sp_kvdl_entry_type type, + unsigned int entry_count, int entry_index) +{ + struct mlxsw_sp1_kvdl *kvdl = priv; + struct mlxsw_sp1_kvdl_part *part; + + part = mlxsw_sp1_kvdl_index_part(kvdl, entry_index); + if (IS_ERR(part)) + return; + mlxsw_sp1_kvdl_part_free(part, entry_index); +} + +static int mlxsw_sp1_kvdl_alloc_size_query(struct mlxsw_sp *mlxsw_sp, + void *priv, + enum mlxsw_sp_kvdl_entry_type type, + unsigned int entry_count, + unsigned int *p_alloc_size) +{ + struct mlxsw_sp1_kvdl *kvdl = priv; + struct mlxsw_sp1_kvdl_part *part; + + part = mlxsw_sp1_kvdl_alloc_size_part(kvdl, entry_count); + if (IS_ERR(part)) + return PTR_ERR(part); + + *p_alloc_size = part->info.alloc_size; + + return 0; +} + +static void mlxsw_sp1_kvdl_part_update(struct mlxsw_sp1_kvdl_part *part, + struct mlxsw_sp1_kvdl_part *part_prev, + unsigned int size) +{ + if (!part_prev) { + part->info.end_index = size - 1; + } else { + part->info.start_index = part_prev->info.end_index + 1; + part->info.end_index = part->info.start_index + size - 1; + } +} + +static struct mlxsw_sp1_kvdl_part * +mlxsw_sp1_kvdl_part_init(struct mlxsw_sp *mlxsw_sp, + const struct mlxsw_sp1_kvdl_part_info *info, + struct mlxsw_sp1_kvdl_part *part_prev) +{ + struct devlink *devlink = priv_to_devlink(mlxsw_sp->core); + struct mlxsw_sp1_kvdl_part *part; + bool need_update = true; + unsigned int nr_entries; + u64 resource_size; + int err; + + err = devl_resource_size_get(devlink, info->resource_id, + &resource_size); + if (err) { + need_update = false; + resource_size = info->end_index - info->start_index + 1; + } + + nr_entries = div_u64(resource_size, info->alloc_size); + part = kzalloc(struct_size(part, usage, BITS_TO_LONGS(nr_entries)), + GFP_KERNEL); + if (!part) + return ERR_PTR(-ENOMEM); + + memcpy(&part->info, info, sizeof(part->info)); + + if (need_update) + mlxsw_sp1_kvdl_part_update(part, part_prev, resource_size); + return part; +} + +static void mlxsw_sp1_kvdl_part_fini(struct mlxsw_sp1_kvdl_part *part) +{ + kfree(part); +} + +static int mlxsw_sp1_kvdl_parts_init(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp1_kvdl *kvdl) +{ + const struct mlxsw_sp1_kvdl_part_info *info; + struct mlxsw_sp1_kvdl_part *part_prev = NULL; + int err, i; + + for (i = 0; i < MLXSW_SP1_KVDL_PARTS_INFO_LEN; i++) { + info = &mlxsw_sp1_kvdl_parts_info[i]; + kvdl->parts[i] = mlxsw_sp1_kvdl_part_init(mlxsw_sp, info, + part_prev); + if (IS_ERR(kvdl->parts[i])) { + err = PTR_ERR(kvdl->parts[i]); + goto err_kvdl_part_init; + } + part_prev = kvdl->parts[i]; + } + return 0; + +err_kvdl_part_init: + for (i--; i >= 0; i--) + mlxsw_sp1_kvdl_part_fini(kvdl->parts[i]); + return err; +} + +static void mlxsw_sp1_kvdl_parts_fini(struct mlxsw_sp1_kvdl *kvdl) +{ + int i; + + for (i = 0; i < MLXSW_SP1_KVDL_PARTS_INFO_LEN; i++) + mlxsw_sp1_kvdl_part_fini(kvdl->parts[i]); +} + +static u64 mlxsw_sp1_kvdl_part_occ(struct mlxsw_sp1_kvdl_part *part) +{ + const struct mlxsw_sp1_kvdl_part_info *info = &part->info; + unsigned int nr_entries; + int bit = -1; + u64 occ = 0; + + nr_entries = (info->end_index - + info->start_index + 1) / + info->alloc_size; + while ((bit = find_next_bit(part->usage, nr_entries, bit + 1)) + < nr_entries) + occ += info->alloc_size; + return occ; +} + +static u64 mlxsw_sp1_kvdl_occ_get(void *priv) +{ + const struct mlxsw_sp1_kvdl *kvdl = priv; + u64 occ = 0; + int i; + + for (i = 0; i < MLXSW_SP1_KVDL_PARTS_INFO_LEN; i++) + occ += mlxsw_sp1_kvdl_part_occ(kvdl->parts[i]); + + return occ; +} + +static u64 mlxsw_sp1_kvdl_single_occ_get(void *priv) +{ + const struct mlxsw_sp1_kvdl *kvdl = priv; + struct mlxsw_sp1_kvdl_part *part; + + part = kvdl->parts[MLXSW_SP1_KVDL_PART_ID_SINGLE]; + return mlxsw_sp1_kvdl_part_occ(part); +} + +static u64 mlxsw_sp1_kvdl_chunks_occ_get(void *priv) +{ + const struct mlxsw_sp1_kvdl *kvdl = priv; + struct mlxsw_sp1_kvdl_part *part; + + part = kvdl->parts[MLXSW_SP1_KVDL_PART_ID_CHUNKS]; + return mlxsw_sp1_kvdl_part_occ(part); +} + +static u64 mlxsw_sp1_kvdl_large_chunks_occ_get(void *priv) +{ + const struct mlxsw_sp1_kvdl *kvdl = priv; + struct mlxsw_sp1_kvdl_part *part; + + part = kvdl->parts[MLXSW_SP1_KVDL_PART_ID_LARGE_CHUNKS]; + return mlxsw_sp1_kvdl_part_occ(part); +} + +static int mlxsw_sp1_kvdl_init(struct mlxsw_sp *mlxsw_sp, void *priv) +{ + struct devlink *devlink = priv_to_devlink(mlxsw_sp->core); + struct mlxsw_sp1_kvdl *kvdl = priv; + int err; + + err = mlxsw_sp1_kvdl_parts_init(mlxsw_sp, kvdl); + if (err) + return err; + devl_resource_occ_get_register(devlink, + MLXSW_SP_RESOURCE_KVD_LINEAR, + mlxsw_sp1_kvdl_occ_get, + kvdl); + devl_resource_occ_get_register(devlink, + MLXSW_SP_RESOURCE_KVD_LINEAR_SINGLE, + mlxsw_sp1_kvdl_single_occ_get, + kvdl); + devl_resource_occ_get_register(devlink, + MLXSW_SP_RESOURCE_KVD_LINEAR_CHUNKS, + mlxsw_sp1_kvdl_chunks_occ_get, + kvdl); + devl_resource_occ_get_register(devlink, + MLXSW_SP_RESOURCE_KVD_LINEAR_LARGE_CHUNKS, + mlxsw_sp1_kvdl_large_chunks_occ_get, + kvdl); + return 0; +} + +static void mlxsw_sp1_kvdl_fini(struct mlxsw_sp *mlxsw_sp, void *priv) +{ + struct devlink *devlink = priv_to_devlink(mlxsw_sp->core); + struct mlxsw_sp1_kvdl *kvdl = priv; + + devl_resource_occ_get_unregister(devlink, + MLXSW_SP_RESOURCE_KVD_LINEAR_LARGE_CHUNKS); + devl_resource_occ_get_unregister(devlink, + MLXSW_SP_RESOURCE_KVD_LINEAR_CHUNKS); + devl_resource_occ_get_unregister(devlink, + MLXSW_SP_RESOURCE_KVD_LINEAR_SINGLE); + devl_resource_occ_get_unregister(devlink, + MLXSW_SP_RESOURCE_KVD_LINEAR); + mlxsw_sp1_kvdl_parts_fini(kvdl); +} + +const struct mlxsw_sp_kvdl_ops mlxsw_sp1_kvdl_ops = { + .priv_size = sizeof(struct mlxsw_sp1_kvdl), + .init = mlxsw_sp1_kvdl_init, + .fini = mlxsw_sp1_kvdl_fini, + .alloc = mlxsw_sp1_kvdl_alloc, + .free = mlxsw_sp1_kvdl_free, + .alloc_size_query = mlxsw_sp1_kvdl_alloc_size_query, +}; + +int mlxsw_sp1_kvdl_resources_register(struct mlxsw_core *mlxsw_core) +{ + struct devlink *devlink = priv_to_devlink(mlxsw_core); + static struct devlink_resource_size_params size_params; + u32 kvdl_max_size; + int err; + + kvdl_max_size = MLXSW_CORE_RES_GET(mlxsw_core, KVD_SIZE) - + MLXSW_CORE_RES_GET(mlxsw_core, KVD_SINGLE_MIN_SIZE) - + MLXSW_CORE_RES_GET(mlxsw_core, KVD_DOUBLE_MIN_SIZE); + + devlink_resource_size_params_init(&size_params, 0, kvdl_max_size, + MLXSW_SP1_KVDL_SINGLE_ALLOC_SIZE, + DEVLINK_RESOURCE_UNIT_ENTRY); + err = devl_resource_register(devlink, MLXSW_SP_RESOURCE_NAME_KVD_LINEAR_SINGLES, + MLXSW_SP1_KVDL_SINGLE_SIZE, + MLXSW_SP_RESOURCE_KVD_LINEAR_SINGLE, + MLXSW_SP_RESOURCE_KVD_LINEAR, + &size_params); + if (err) + return err; + + devlink_resource_size_params_init(&size_params, 0, kvdl_max_size, + MLXSW_SP1_KVDL_CHUNKS_ALLOC_SIZE, + DEVLINK_RESOURCE_UNIT_ENTRY); + err = devl_resource_register(devlink, MLXSW_SP_RESOURCE_NAME_KVD_LINEAR_CHUNKS, + MLXSW_SP1_KVDL_CHUNKS_SIZE, + MLXSW_SP_RESOURCE_KVD_LINEAR_CHUNKS, + MLXSW_SP_RESOURCE_KVD_LINEAR, + &size_params); + if (err) + return err; + + devlink_resource_size_params_init(&size_params, 0, kvdl_max_size, + MLXSW_SP1_KVDL_LARGE_CHUNKS_ALLOC_SIZE, + DEVLINK_RESOURCE_UNIT_ENTRY); + err = devl_resource_register(devlink, MLXSW_SP_RESOURCE_NAME_KVD_LINEAR_LARGE_CHUNKS, + MLXSW_SP1_KVDL_LARGE_CHUNKS_SIZE, + MLXSW_SP_RESOURCE_KVD_LINEAR_LARGE_CHUNKS, + MLXSW_SP_RESOURCE_KVD_LINEAR, + &size_params); + return err; +} diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum1_mr_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum1_mr_tcam.c new file mode 100644 index 000000000..c8c675369 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum1_mr_tcam.c @@ -0,0 +1,342 @@ +// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 +/* Copyright (c) 2017-2018 Mellanox Technologies. All rights reserved */ + +#include <linux/kernel.h> +#include <linux/parman.h> + +#include "reg.h" +#include "spectrum.h" +#include "core_acl_flex_actions.h" +#include "spectrum_mr.h" + +struct mlxsw_sp1_mr_tcam_region { + struct mlxsw_sp *mlxsw_sp; + enum mlxsw_reg_rtar_key_type rtar_key_type; + struct parman *parman; + struct parman_prio *parman_prios; +}; + +struct mlxsw_sp1_mr_tcam { + struct mlxsw_sp1_mr_tcam_region tcam_regions[MLXSW_SP_L3_PROTO_MAX]; +}; + +struct mlxsw_sp1_mr_tcam_route { + struct parman_item parman_item; + struct parman_prio *parman_prio; +}; + +static int mlxsw_sp1_mr_tcam_route_replace(struct mlxsw_sp *mlxsw_sp, + struct parman_item *parman_item, + struct mlxsw_sp_mr_route_key *key, + struct mlxsw_afa_block *afa_block) +{ + char rmft2_pl[MLXSW_REG_RMFT2_LEN]; + + switch (key->proto) { + case MLXSW_SP_L3_PROTO_IPV4: + mlxsw_reg_rmft2_ipv4_pack(rmft2_pl, true, parman_item->index, + key->vrid, + MLXSW_REG_RMFT2_IRIF_MASK_IGNORE, 0, + ntohl(key->group.addr4), + ntohl(key->group_mask.addr4), + ntohl(key->source.addr4), + ntohl(key->source_mask.addr4), + mlxsw_afa_block_first_set(afa_block)); + break; + case MLXSW_SP_L3_PROTO_IPV6: + mlxsw_reg_rmft2_ipv6_pack(rmft2_pl, true, parman_item->index, + key->vrid, + MLXSW_REG_RMFT2_IRIF_MASK_IGNORE, 0, + key->group.addr6, + key->group_mask.addr6, + key->source.addr6, + key->source_mask.addr6, + mlxsw_afa_block_first_set(afa_block)); + } + + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rmft2), rmft2_pl); +} + +static int mlxsw_sp1_mr_tcam_route_remove(struct mlxsw_sp *mlxsw_sp, + struct parman_item *parman_item, + struct mlxsw_sp_mr_route_key *key) +{ + struct in6_addr zero_addr = IN6ADDR_ANY_INIT; + char rmft2_pl[MLXSW_REG_RMFT2_LEN]; + + switch (key->proto) { + case MLXSW_SP_L3_PROTO_IPV4: + mlxsw_reg_rmft2_ipv4_pack(rmft2_pl, false, parman_item->index, + key->vrid, 0, 0, 0, 0, 0, 0, NULL); + break; + case MLXSW_SP_L3_PROTO_IPV6: + mlxsw_reg_rmft2_ipv6_pack(rmft2_pl, false, parman_item->index, + key->vrid, 0, 0, zero_addr, zero_addr, + zero_addr, zero_addr, NULL); + break; + } + + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rmft2), rmft2_pl); +} + +static struct mlxsw_sp1_mr_tcam_region * +mlxsw_sp1_mr_tcam_protocol_region(struct mlxsw_sp1_mr_tcam *mr_tcam, + enum mlxsw_sp_l3proto proto) +{ + return &mr_tcam->tcam_regions[proto]; +} + +static int +mlxsw_sp1_mr_tcam_route_parman_item_add(struct mlxsw_sp1_mr_tcam *mr_tcam, + struct mlxsw_sp1_mr_tcam_route *route, + struct mlxsw_sp_mr_route_key *key, + enum mlxsw_sp_mr_route_prio prio) +{ + struct mlxsw_sp1_mr_tcam_region *tcam_region; + int err; + + tcam_region = mlxsw_sp1_mr_tcam_protocol_region(mr_tcam, key->proto); + err = parman_item_add(tcam_region->parman, + &tcam_region->parman_prios[prio], + &route->parman_item); + if (err) + return err; + + route->parman_prio = &tcam_region->parman_prios[prio]; + return 0; +} + +static void +mlxsw_sp1_mr_tcam_route_parman_item_remove(struct mlxsw_sp1_mr_tcam *mr_tcam, + struct mlxsw_sp1_mr_tcam_route *route, + struct mlxsw_sp_mr_route_key *key) +{ + struct mlxsw_sp1_mr_tcam_region *tcam_region; + + tcam_region = mlxsw_sp1_mr_tcam_protocol_region(mr_tcam, key->proto); + parman_item_remove(tcam_region->parman, + route->parman_prio, &route->parman_item); +} + +static int +mlxsw_sp1_mr_tcam_route_create(struct mlxsw_sp *mlxsw_sp, void *priv, + void *route_priv, + struct mlxsw_sp_mr_route_key *key, + struct mlxsw_afa_block *afa_block, + enum mlxsw_sp_mr_route_prio prio) +{ + struct mlxsw_sp1_mr_tcam_route *route = route_priv; + struct mlxsw_sp1_mr_tcam *mr_tcam = priv; + int err; + + err = mlxsw_sp1_mr_tcam_route_parman_item_add(mr_tcam, route, + key, prio); + if (err) + return err; + + err = mlxsw_sp1_mr_tcam_route_replace(mlxsw_sp, &route->parman_item, + key, afa_block); + if (err) + goto err_route_replace; + return 0; + +err_route_replace: + mlxsw_sp1_mr_tcam_route_parman_item_remove(mr_tcam, route, key); + return err; +} + +static void +mlxsw_sp1_mr_tcam_route_destroy(struct mlxsw_sp *mlxsw_sp, void *priv, + void *route_priv, + struct mlxsw_sp_mr_route_key *key) +{ + struct mlxsw_sp1_mr_tcam_route *route = route_priv; + struct mlxsw_sp1_mr_tcam *mr_tcam = priv; + + mlxsw_sp1_mr_tcam_route_remove(mlxsw_sp, &route->parman_item, key); + mlxsw_sp1_mr_tcam_route_parman_item_remove(mr_tcam, route, key); +} + +static int +mlxsw_sp1_mr_tcam_route_update(struct mlxsw_sp *mlxsw_sp, + void *route_priv, + struct mlxsw_sp_mr_route_key *key, + struct mlxsw_afa_block *afa_block) +{ + struct mlxsw_sp1_mr_tcam_route *route = route_priv; + + return mlxsw_sp1_mr_tcam_route_replace(mlxsw_sp, &route->parman_item, + key, afa_block); +} + +#define MLXSW_SP1_MR_TCAM_REGION_BASE_COUNT 16 +#define MLXSW_SP1_MR_TCAM_REGION_RESIZE_STEP 16 + +static int +mlxsw_sp1_mr_tcam_region_alloc(struct mlxsw_sp1_mr_tcam_region *mr_tcam_region) +{ + struct mlxsw_sp *mlxsw_sp = mr_tcam_region->mlxsw_sp; + char rtar_pl[MLXSW_REG_RTAR_LEN]; + + mlxsw_reg_rtar_pack(rtar_pl, MLXSW_REG_RTAR_OP_ALLOCATE, + mr_tcam_region->rtar_key_type, + MLXSW_SP1_MR_TCAM_REGION_BASE_COUNT); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rtar), rtar_pl); +} + +static void +mlxsw_sp1_mr_tcam_region_free(struct mlxsw_sp1_mr_tcam_region *mr_tcam_region) +{ + struct mlxsw_sp *mlxsw_sp = mr_tcam_region->mlxsw_sp; + char rtar_pl[MLXSW_REG_RTAR_LEN]; + + mlxsw_reg_rtar_pack(rtar_pl, MLXSW_REG_RTAR_OP_DEALLOCATE, + mr_tcam_region->rtar_key_type, 0); + mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rtar), rtar_pl); +} + +static int mlxsw_sp1_mr_tcam_region_parman_resize(void *priv, + unsigned long new_count) +{ + struct mlxsw_sp1_mr_tcam_region *mr_tcam_region = priv; + struct mlxsw_sp *mlxsw_sp = mr_tcam_region->mlxsw_sp; + char rtar_pl[MLXSW_REG_RTAR_LEN]; + u64 max_tcam_rules; + + max_tcam_rules = MLXSW_CORE_RES_GET(mlxsw_sp->core, ACL_MAX_TCAM_RULES); + if (new_count > max_tcam_rules) + return -EINVAL; + mlxsw_reg_rtar_pack(rtar_pl, MLXSW_REG_RTAR_OP_RESIZE, + mr_tcam_region->rtar_key_type, new_count); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rtar), rtar_pl); +} + +static void mlxsw_sp1_mr_tcam_region_parman_move(void *priv, + unsigned long from_index, + unsigned long to_index, + unsigned long count) +{ + struct mlxsw_sp1_mr_tcam_region *mr_tcam_region = priv; + struct mlxsw_sp *mlxsw_sp = mr_tcam_region->mlxsw_sp; + char rrcr_pl[MLXSW_REG_RRCR_LEN]; + + mlxsw_reg_rrcr_pack(rrcr_pl, MLXSW_REG_RRCR_OP_MOVE, + from_index, count, + mr_tcam_region->rtar_key_type, to_index); + mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rrcr), rrcr_pl); +} + +static const struct parman_ops mlxsw_sp1_mr_tcam_region_parman_ops = { + .base_count = MLXSW_SP1_MR_TCAM_REGION_BASE_COUNT, + .resize_step = MLXSW_SP1_MR_TCAM_REGION_RESIZE_STEP, + .resize = mlxsw_sp1_mr_tcam_region_parman_resize, + .move = mlxsw_sp1_mr_tcam_region_parman_move, + .algo = PARMAN_ALGO_TYPE_LSORT, +}; + +static int +mlxsw_sp1_mr_tcam_region_init(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp1_mr_tcam_region *mr_tcam_region, + enum mlxsw_reg_rtar_key_type rtar_key_type) +{ + struct parman_prio *parman_prios; + struct parman *parman; + int err; + int i; + + mr_tcam_region->rtar_key_type = rtar_key_type; + mr_tcam_region->mlxsw_sp = mlxsw_sp; + + err = mlxsw_sp1_mr_tcam_region_alloc(mr_tcam_region); + if (err) + return err; + + parman = parman_create(&mlxsw_sp1_mr_tcam_region_parman_ops, + mr_tcam_region); + if (!parman) { + err = -ENOMEM; + goto err_parman_create; + } + mr_tcam_region->parman = parman; + + parman_prios = kmalloc_array(MLXSW_SP_MR_ROUTE_PRIO_MAX + 1, + sizeof(*parman_prios), GFP_KERNEL); + if (!parman_prios) { + err = -ENOMEM; + goto err_parman_prios_alloc; + } + mr_tcam_region->parman_prios = parman_prios; + + for (i = 0; i < MLXSW_SP_MR_ROUTE_PRIO_MAX + 1; i++) + parman_prio_init(mr_tcam_region->parman, + &mr_tcam_region->parman_prios[i], i); + return 0; + +err_parman_prios_alloc: + parman_destroy(parman); +err_parman_create: + mlxsw_sp1_mr_tcam_region_free(mr_tcam_region); + return err; +} + +static void +mlxsw_sp1_mr_tcam_region_fini(struct mlxsw_sp1_mr_tcam_region *mr_tcam_region) +{ + int i; + + for (i = 0; i < MLXSW_SP_MR_ROUTE_PRIO_MAX + 1; i++) + parman_prio_fini(&mr_tcam_region->parman_prios[i]); + kfree(mr_tcam_region->parman_prios); + parman_destroy(mr_tcam_region->parman); + mlxsw_sp1_mr_tcam_region_free(mr_tcam_region); +} + +static int mlxsw_sp1_mr_tcam_init(struct mlxsw_sp *mlxsw_sp, void *priv) +{ + struct mlxsw_sp1_mr_tcam *mr_tcam = priv; + struct mlxsw_sp1_mr_tcam_region *region = &mr_tcam->tcam_regions[0]; + u32 rtar_key; + int err; + + if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, ACL_MAX_TCAM_RULES)) + return -EIO; + + rtar_key = MLXSW_REG_RTAR_KEY_TYPE_IPV4_MULTICAST; + err = mlxsw_sp1_mr_tcam_region_init(mlxsw_sp, + ®ion[MLXSW_SP_L3_PROTO_IPV4], + rtar_key); + if (err) + return err; + + rtar_key = MLXSW_REG_RTAR_KEY_TYPE_IPV6_MULTICAST; + err = mlxsw_sp1_mr_tcam_region_init(mlxsw_sp, + ®ion[MLXSW_SP_L3_PROTO_IPV6], + rtar_key); + if (err) + goto err_ipv6_region_init; + + return 0; + +err_ipv6_region_init: + mlxsw_sp1_mr_tcam_region_fini(®ion[MLXSW_SP_L3_PROTO_IPV4]); + return err; +} + +static void mlxsw_sp1_mr_tcam_fini(void *priv) +{ + struct mlxsw_sp1_mr_tcam *mr_tcam = priv; + struct mlxsw_sp1_mr_tcam_region *region = &mr_tcam->tcam_regions[0]; + + mlxsw_sp1_mr_tcam_region_fini(®ion[MLXSW_SP_L3_PROTO_IPV6]); + mlxsw_sp1_mr_tcam_region_fini(®ion[MLXSW_SP_L3_PROTO_IPV4]); +} + +const struct mlxsw_sp_mr_tcam_ops mlxsw_sp1_mr_tcam_ops = { + .priv_size = sizeof(struct mlxsw_sp1_mr_tcam), + .init = mlxsw_sp1_mr_tcam_init, + .fini = mlxsw_sp1_mr_tcam_fini, + .route_priv_size = sizeof(struct mlxsw_sp1_mr_tcam_route), + .route_create = mlxsw_sp1_mr_tcam_route_create, + .route_destroy = mlxsw_sp1_mr_tcam_route_destroy, + .route_update = mlxsw_sp1_mr_tcam_route_update, +}; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum2_acl_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum2_acl_tcam.c new file mode 100644 index 000000000..5b0210862 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum2_acl_tcam.c @@ -0,0 +1,280 @@ +// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 +/* Copyright (c) 2018 Mellanox Technologies. All rights reserved */ + +#include <linux/kernel.h> + +#include "spectrum.h" +#include "spectrum_acl_tcam.h" +#include "core_acl_flex_actions.h" + +struct mlxsw_sp2_acl_tcam { + struct mlxsw_sp_acl_atcam atcam; + u32 kvdl_index; + unsigned int kvdl_count; +}; + +struct mlxsw_sp2_acl_tcam_region { + struct mlxsw_sp_acl_atcam_region aregion; + struct mlxsw_sp_acl_tcam_region *region; +}; + +struct mlxsw_sp2_acl_tcam_chunk { + struct mlxsw_sp_acl_atcam_chunk achunk; +}; + +struct mlxsw_sp2_acl_tcam_entry { + struct mlxsw_sp_acl_atcam_entry aentry; + struct mlxsw_afa_block *act_block; +}; + +static int +mlxsw_sp2_acl_ctcam_region_entry_insert(struct mlxsw_sp_acl_ctcam_region *cregion, + struct mlxsw_sp_acl_ctcam_entry *centry, + const char *mask) +{ + struct mlxsw_sp_acl_atcam_region *aregion; + struct mlxsw_sp_acl_atcam_entry *aentry; + struct mlxsw_sp_acl_erp_mask *erp_mask; + + aregion = mlxsw_sp_acl_tcam_cregion_aregion(cregion); + aentry = mlxsw_sp_acl_tcam_centry_aentry(centry); + + erp_mask = mlxsw_sp_acl_erp_mask_get(aregion, mask, true); + if (IS_ERR(erp_mask)) + return PTR_ERR(erp_mask); + aentry->erp_mask = erp_mask; + + return 0; +} + +static void +mlxsw_sp2_acl_ctcam_region_entry_remove(struct mlxsw_sp_acl_ctcam_region *cregion, + struct mlxsw_sp_acl_ctcam_entry *centry) +{ + struct mlxsw_sp_acl_atcam_region *aregion; + struct mlxsw_sp_acl_atcam_entry *aentry; + + aregion = mlxsw_sp_acl_tcam_cregion_aregion(cregion); + aentry = mlxsw_sp_acl_tcam_centry_aentry(centry); + + mlxsw_sp_acl_erp_mask_put(aregion, aentry->erp_mask); +} + +static const struct mlxsw_sp_acl_ctcam_region_ops +mlxsw_sp2_acl_ctcam_region_ops = { + .entry_insert = mlxsw_sp2_acl_ctcam_region_entry_insert, + .entry_remove = mlxsw_sp2_acl_ctcam_region_entry_remove, +}; + +static int mlxsw_sp2_acl_tcam_init(struct mlxsw_sp *mlxsw_sp, void *priv, + struct mlxsw_sp_acl_tcam *_tcam) +{ + struct mlxsw_sp2_acl_tcam *tcam = priv; + struct mlxsw_afa_block *afa_block; + char pefa_pl[MLXSW_REG_PEFA_LEN]; + char pgcr_pl[MLXSW_REG_PGCR_LEN]; + char *enc_actions; + int i; + int err; + + /* Some TCAM regions are not exposed to the host and used internally + * by the device. Allocate KVDL entries for the default actions of + * these regions to avoid the host from overwriting them. + */ + tcam->kvdl_count = _tcam->max_regions; + if (MLXSW_CORE_RES_VALID(mlxsw_sp->core, ACL_MAX_DEFAULT_ACTIONS)) + tcam->kvdl_count = MLXSW_CORE_RES_GET(mlxsw_sp->core, + ACL_MAX_DEFAULT_ACTIONS); + err = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ACTSET, + tcam->kvdl_count, &tcam->kvdl_index); + if (err) + return err; + + /* Create flex action block, set default action (continue) + * but don't commit. We need just the current set encoding + * to be written using PEFA register to all indexes for all regions. + */ + afa_block = mlxsw_afa_block_create(mlxsw_sp->afa); + if (IS_ERR(afa_block)) { + err = PTR_ERR(afa_block); + goto err_afa_block; + } + err = mlxsw_afa_block_continue(afa_block); + if (WARN_ON(err)) + goto err_afa_block_continue; + enc_actions = mlxsw_afa_block_cur_set(afa_block); + + /* Only write to KVDL entries used by TCAM regions exposed to the + * host. + */ + for (i = 0; i < _tcam->max_regions; i++) { + mlxsw_reg_pefa_pack(pefa_pl, tcam->kvdl_index + i, + true, enc_actions); + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(pefa), pefa_pl); + if (err) + goto err_pefa_write; + } + mlxsw_reg_pgcr_pack(pgcr_pl, tcam->kvdl_index); + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(pgcr), pgcr_pl); + if (err) + goto err_pgcr_write; + + err = mlxsw_sp_acl_atcam_init(mlxsw_sp, &tcam->atcam); + if (err) + goto err_atcam_init; + + mlxsw_afa_block_destroy(afa_block); + return 0; + +err_atcam_init: +err_pgcr_write: +err_pefa_write: +err_afa_block_continue: + mlxsw_afa_block_destroy(afa_block); +err_afa_block: + mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ACTSET, + tcam->kvdl_count, tcam->kvdl_index); + return err; +} + +static void mlxsw_sp2_acl_tcam_fini(struct mlxsw_sp *mlxsw_sp, void *priv) +{ + struct mlxsw_sp2_acl_tcam *tcam = priv; + + mlxsw_sp_acl_atcam_fini(mlxsw_sp, &tcam->atcam); + mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ACTSET, + tcam->kvdl_count, tcam->kvdl_index); +} + +static int +mlxsw_sp2_acl_tcam_region_init(struct mlxsw_sp *mlxsw_sp, void *region_priv, + void *tcam_priv, + struct mlxsw_sp_acl_tcam_region *_region, + void *hints_priv) +{ + struct mlxsw_sp2_acl_tcam_region *region = region_priv; + struct mlxsw_sp2_acl_tcam *tcam = tcam_priv; + + region->region = _region; + + return mlxsw_sp_acl_atcam_region_init(mlxsw_sp, &tcam->atcam, + ®ion->aregion, + _region, hints_priv, + &mlxsw_sp2_acl_ctcam_region_ops); +} + +static void +mlxsw_sp2_acl_tcam_region_fini(struct mlxsw_sp *mlxsw_sp, void *region_priv) +{ + struct mlxsw_sp2_acl_tcam_region *region = region_priv; + + mlxsw_sp_acl_atcam_region_fini(®ion->aregion); +} + +static int +mlxsw_sp2_acl_tcam_region_associate(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_region *region) +{ + return mlxsw_sp_acl_atcam_region_associate(mlxsw_sp, region->id); +} + +static void *mlxsw_sp2_acl_tcam_region_rehash_hints_get(void *region_priv) +{ + struct mlxsw_sp2_acl_tcam_region *region = region_priv; + + return mlxsw_sp_acl_atcam_rehash_hints_get(®ion->aregion); +} + +static void mlxsw_sp2_acl_tcam_region_rehash_hints_put(void *hints_priv) +{ + mlxsw_sp_acl_atcam_rehash_hints_put(hints_priv); +} + +static void mlxsw_sp2_acl_tcam_chunk_init(void *region_priv, void *chunk_priv, + unsigned int priority) +{ + struct mlxsw_sp2_acl_tcam_region *region = region_priv; + struct mlxsw_sp2_acl_tcam_chunk *chunk = chunk_priv; + + mlxsw_sp_acl_atcam_chunk_init(®ion->aregion, &chunk->achunk, + priority); +} + +static void mlxsw_sp2_acl_tcam_chunk_fini(void *chunk_priv) +{ + struct mlxsw_sp2_acl_tcam_chunk *chunk = chunk_priv; + + mlxsw_sp_acl_atcam_chunk_fini(&chunk->achunk); +} + +static int mlxsw_sp2_acl_tcam_entry_add(struct mlxsw_sp *mlxsw_sp, + void *region_priv, void *chunk_priv, + void *entry_priv, + struct mlxsw_sp_acl_rule_info *rulei) +{ + struct mlxsw_sp2_acl_tcam_region *region = region_priv; + struct mlxsw_sp2_acl_tcam_chunk *chunk = chunk_priv; + struct mlxsw_sp2_acl_tcam_entry *entry = entry_priv; + + entry->act_block = rulei->act_block; + return mlxsw_sp_acl_atcam_entry_add(mlxsw_sp, ®ion->aregion, + &chunk->achunk, &entry->aentry, + rulei); +} + +static void mlxsw_sp2_acl_tcam_entry_del(struct mlxsw_sp *mlxsw_sp, + void *region_priv, void *chunk_priv, + void *entry_priv) +{ + struct mlxsw_sp2_acl_tcam_region *region = region_priv; + struct mlxsw_sp2_acl_tcam_chunk *chunk = chunk_priv; + struct mlxsw_sp2_acl_tcam_entry *entry = entry_priv; + + mlxsw_sp_acl_atcam_entry_del(mlxsw_sp, ®ion->aregion, &chunk->achunk, + &entry->aentry); +} + +static int +mlxsw_sp2_acl_tcam_entry_action_replace(struct mlxsw_sp *mlxsw_sp, + void *region_priv, void *entry_priv, + struct mlxsw_sp_acl_rule_info *rulei) +{ + struct mlxsw_sp2_acl_tcam_region *region = region_priv; + struct mlxsw_sp2_acl_tcam_entry *entry = entry_priv; + + entry->act_block = rulei->act_block; + return mlxsw_sp_acl_atcam_entry_action_replace(mlxsw_sp, + ®ion->aregion, + &entry->aentry, rulei); +} + +static int +mlxsw_sp2_acl_tcam_entry_activity_get(struct mlxsw_sp *mlxsw_sp, + void *region_priv, void *entry_priv, + bool *activity) +{ + struct mlxsw_sp2_acl_tcam_entry *entry = entry_priv; + + return mlxsw_afa_block_activity_get(entry->act_block, activity); +} + +const struct mlxsw_sp_acl_tcam_ops mlxsw_sp2_acl_tcam_ops = { + .key_type = MLXSW_REG_PTAR_KEY_TYPE_FLEX2, + .priv_size = sizeof(struct mlxsw_sp2_acl_tcam), + .init = mlxsw_sp2_acl_tcam_init, + .fini = mlxsw_sp2_acl_tcam_fini, + .region_priv_size = sizeof(struct mlxsw_sp2_acl_tcam_region), + .region_init = mlxsw_sp2_acl_tcam_region_init, + .region_fini = mlxsw_sp2_acl_tcam_region_fini, + .region_associate = mlxsw_sp2_acl_tcam_region_associate, + .region_rehash_hints_get = mlxsw_sp2_acl_tcam_region_rehash_hints_get, + .region_rehash_hints_put = mlxsw_sp2_acl_tcam_region_rehash_hints_put, + .chunk_priv_size = sizeof(struct mlxsw_sp2_acl_tcam_chunk), + .chunk_init = mlxsw_sp2_acl_tcam_chunk_init, + .chunk_fini = mlxsw_sp2_acl_tcam_chunk_fini, + .entry_priv_size = sizeof(struct mlxsw_sp2_acl_tcam_entry), + .entry_add = mlxsw_sp2_acl_tcam_entry_add, + .entry_del = mlxsw_sp2_acl_tcam_entry_del, + .entry_action_replace = mlxsw_sp2_acl_tcam_entry_action_replace, + .entry_activity_get = mlxsw_sp2_acl_tcam_entry_activity_get, +}; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum2_kvdl.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum2_kvdl.c new file mode 100644 index 000000000..24ff305a2 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum2_kvdl.c @@ -0,0 +1,273 @@ +// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 +/* Copyright (c) 2018 Mellanox Technologies. All rights reserved */ + +#include <linux/kernel.h> +#include <linux/bitops.h> + +#include "spectrum.h" +#include "core.h" +#include "reg.h" +#include "resources.h" + +struct mlxsw_sp2_kvdl_part_info { + u8 res_type; + /* For each defined partititon we need to know how many + * usage bits we need and how many indexes there are + * represented by a single bit. This could be got from FW + * querying appropriate resources. So have the resource + * ids for this purpose in partition definition. + */ + enum mlxsw_res_id usage_bit_count_res_id; + enum mlxsw_res_id index_range_res_id; +}; + +#define MLXSW_SP2_KVDL_PART_INFO(_entry_type, _res_type, \ + _usage_bit_count_res_id, _index_range_res_id) \ +[MLXSW_SP_KVDL_ENTRY_TYPE_##_entry_type] = { \ + .res_type = _res_type, \ + .usage_bit_count_res_id = MLXSW_RES_ID_##_usage_bit_count_res_id, \ + .index_range_res_id = MLXSW_RES_ID_##_index_range_res_id, \ +} + +static const struct mlxsw_sp2_kvdl_part_info mlxsw_sp2_kvdl_parts_info[] = { + MLXSW_SP2_KVDL_PART_INFO(ADJ, 0x21, KVD_SIZE, MAX_KVD_LINEAR_RANGE), + MLXSW_SP2_KVDL_PART_INFO(ACTSET, 0x23, MAX_KVD_ACTION_SETS, + MAX_KVD_ACTION_SETS), + MLXSW_SP2_KVDL_PART_INFO(PBS, 0x24, KVD_SIZE, KVD_SIZE), + MLXSW_SP2_KVDL_PART_INFO(MCRIGR, 0x26, KVD_SIZE, KVD_SIZE), + MLXSW_SP2_KVDL_PART_INFO(IPV6_ADDRESS, 0x28, KVD_SIZE, KVD_SIZE), + MLXSW_SP2_KVDL_PART_INFO(TNUMT, 0x29, KVD_SIZE, KVD_SIZE), +}; + +#define MLXSW_SP2_KVDL_PARTS_INFO_LEN ARRAY_SIZE(mlxsw_sp2_kvdl_parts_info) + +struct mlxsw_sp2_kvdl_part { + const struct mlxsw_sp2_kvdl_part_info *info; + unsigned int usage_bit_count; + unsigned int indexes_per_usage_bit; + unsigned int last_allocated_bit; + unsigned long usage[]; /* Usage bits */ +}; + +struct mlxsw_sp2_kvdl { + struct mlxsw_sp2_kvdl_part *parts[MLXSW_SP2_KVDL_PARTS_INFO_LEN]; +}; + +static int mlxsw_sp2_kvdl_part_find_zero_bits(struct mlxsw_sp2_kvdl_part *part, + unsigned int bit_count, + unsigned int *p_bit) +{ + unsigned int start_bit; + unsigned int bit; + unsigned int i; + bool wrap = false; + + start_bit = part->last_allocated_bit + 1; + if (start_bit == part->usage_bit_count) + start_bit = 0; + bit = start_bit; +again: + bit = find_next_zero_bit(part->usage, part->usage_bit_count, bit); + if (!wrap && bit + bit_count >= part->usage_bit_count) { + wrap = true; + bit = 0; + goto again; + } + if (wrap && bit + bit_count >= start_bit) + return -ENOBUFS; + for (i = 0; i < bit_count; i++) { + if (test_bit(bit + i, part->usage)) { + bit += bit_count; + goto again; + } + } + *p_bit = bit; + return 0; +} + +static int mlxsw_sp2_kvdl_part_alloc(struct mlxsw_sp2_kvdl_part *part, + unsigned int size, + u32 *p_kvdl_index) +{ + unsigned int bit_count; + unsigned int bit; + unsigned int i; + int err; + + bit_count = DIV_ROUND_UP(size, part->indexes_per_usage_bit); + err = mlxsw_sp2_kvdl_part_find_zero_bits(part, bit_count, &bit); + if (err) + return err; + for (i = 0; i < bit_count; i++) + __set_bit(bit + i, part->usage); + *p_kvdl_index = bit * part->indexes_per_usage_bit; + return 0; +} + +static int mlxsw_sp2_kvdl_rec_del(struct mlxsw_sp *mlxsw_sp, u8 res_type, + u16 size, u32 kvdl_index) +{ + char *iedr_pl; + int err; + + iedr_pl = kmalloc(MLXSW_REG_IEDR_LEN, GFP_KERNEL); + if (!iedr_pl) + return -ENOMEM; + + mlxsw_reg_iedr_pack(iedr_pl); + mlxsw_reg_iedr_rec_pack(iedr_pl, 0, res_type, size, kvdl_index); + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(iedr), iedr_pl); + kfree(iedr_pl); + return err; +} + +static void mlxsw_sp2_kvdl_part_free(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp2_kvdl_part *part, + unsigned int size, u32 kvdl_index) +{ + unsigned int bit_count; + unsigned int bit; + unsigned int i; + int err; + + /* We need to ask FW to delete previously used KVD linear index */ + err = mlxsw_sp2_kvdl_rec_del(mlxsw_sp, part->info->res_type, + size, kvdl_index); + if (err) + return; + + bit_count = DIV_ROUND_UP(size, part->indexes_per_usage_bit); + bit = kvdl_index / part->indexes_per_usage_bit; + for (i = 0; i < bit_count; i++) + __clear_bit(bit + i, part->usage); +} + +static int mlxsw_sp2_kvdl_alloc(struct mlxsw_sp *mlxsw_sp, void *priv, + enum mlxsw_sp_kvdl_entry_type type, + unsigned int entry_count, + u32 *p_entry_index) +{ + unsigned int size = entry_count * mlxsw_sp_kvdl_entry_size(type); + struct mlxsw_sp2_kvdl *kvdl = priv; + struct mlxsw_sp2_kvdl_part *part = kvdl->parts[type]; + + return mlxsw_sp2_kvdl_part_alloc(part, size, p_entry_index); +} + +static void mlxsw_sp2_kvdl_free(struct mlxsw_sp *mlxsw_sp, void *priv, + enum mlxsw_sp_kvdl_entry_type type, + unsigned int entry_count, + int entry_index) +{ + unsigned int size = entry_count * mlxsw_sp_kvdl_entry_size(type); + struct mlxsw_sp2_kvdl *kvdl = priv; + struct mlxsw_sp2_kvdl_part *part = kvdl->parts[type]; + + return mlxsw_sp2_kvdl_part_free(mlxsw_sp, part, size, entry_index); +} + +static int mlxsw_sp2_kvdl_alloc_size_query(struct mlxsw_sp *mlxsw_sp, + void *priv, + enum mlxsw_sp_kvdl_entry_type type, + unsigned int entry_count, + unsigned int *p_alloc_count) +{ + *p_alloc_count = entry_count; + return 0; +} + +static struct mlxsw_sp2_kvdl_part * +mlxsw_sp2_kvdl_part_init(struct mlxsw_sp *mlxsw_sp, + const struct mlxsw_sp2_kvdl_part_info *info) +{ + unsigned int indexes_per_usage_bit; + struct mlxsw_sp2_kvdl_part *part; + unsigned int index_range; + unsigned int usage_bit_count; + size_t usage_size; + + if (!mlxsw_core_res_valid(mlxsw_sp->core, + info->usage_bit_count_res_id) || + !mlxsw_core_res_valid(mlxsw_sp->core, + info->index_range_res_id)) + return ERR_PTR(-EIO); + usage_bit_count = mlxsw_core_res_get(mlxsw_sp->core, + info->usage_bit_count_res_id); + index_range = mlxsw_core_res_get(mlxsw_sp->core, + info->index_range_res_id); + + /* For some partitions, one usage bit represents a group of indexes. + * That's why we compute the number of indexes per usage bit here, + * according to queried resources. + */ + indexes_per_usage_bit = index_range / usage_bit_count; + + usage_size = BITS_TO_LONGS(usage_bit_count) * sizeof(unsigned long); + part = kzalloc(sizeof(*part) + usage_size, GFP_KERNEL); + if (!part) + return ERR_PTR(-ENOMEM); + part->info = info; + part->usage_bit_count = usage_bit_count; + part->indexes_per_usage_bit = indexes_per_usage_bit; + part->last_allocated_bit = usage_bit_count - 1; + return part; +} + +static void mlxsw_sp2_kvdl_part_fini(struct mlxsw_sp2_kvdl_part *part) +{ + kfree(part); +} + +static int mlxsw_sp2_kvdl_parts_init(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp2_kvdl *kvdl) +{ + const struct mlxsw_sp2_kvdl_part_info *info; + int i; + int err; + + for (i = 0; i < MLXSW_SP2_KVDL_PARTS_INFO_LEN; i++) { + info = &mlxsw_sp2_kvdl_parts_info[i]; + kvdl->parts[i] = mlxsw_sp2_kvdl_part_init(mlxsw_sp, info); + if (IS_ERR(kvdl->parts[i])) { + err = PTR_ERR(kvdl->parts[i]); + goto err_kvdl_part_init; + } + } + return 0; + +err_kvdl_part_init: + for (i--; i >= 0; i--) + mlxsw_sp2_kvdl_part_fini(kvdl->parts[i]); + return err; +} + +static void mlxsw_sp2_kvdl_parts_fini(struct mlxsw_sp2_kvdl *kvdl) +{ + int i; + + for (i = 0; i < MLXSW_SP2_KVDL_PARTS_INFO_LEN; i++) + mlxsw_sp2_kvdl_part_fini(kvdl->parts[i]); +} + +static int mlxsw_sp2_kvdl_init(struct mlxsw_sp *mlxsw_sp, void *priv) +{ + struct mlxsw_sp2_kvdl *kvdl = priv; + + return mlxsw_sp2_kvdl_parts_init(mlxsw_sp, kvdl); +} + +static void mlxsw_sp2_kvdl_fini(struct mlxsw_sp *mlxsw_sp, void *priv) +{ + struct mlxsw_sp2_kvdl *kvdl = priv; + + mlxsw_sp2_kvdl_parts_fini(kvdl); +} + +const struct mlxsw_sp_kvdl_ops mlxsw_sp2_kvdl_ops = { + .priv_size = sizeof(struct mlxsw_sp2_kvdl), + .init = mlxsw_sp2_kvdl_init, + .fini = mlxsw_sp2_kvdl_fini, + .alloc = mlxsw_sp2_kvdl_alloc, + .free = mlxsw_sp2_kvdl_free, + .alloc_size_query = mlxsw_sp2_kvdl_alloc_size_query, +}; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum2_mr_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum2_mr_tcam.c new file mode 100644 index 000000000..b1178b7a7 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum2_mr_tcam.c @@ -0,0 +1,330 @@ +// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 +/* Copyright (c) 2018 Mellanox Technologies. All rights reserved */ + +#include <linux/kernel.h> + +#include "core_acl_flex_actions.h" +#include "spectrum.h" +#include "spectrum_mr.h" + +struct mlxsw_sp2_mr_tcam { + struct mlxsw_sp *mlxsw_sp; + struct mlxsw_sp_flow_block *flow_block; + struct mlxsw_sp_acl_ruleset *ruleset4; + struct mlxsw_sp_acl_ruleset *ruleset6; +}; + +struct mlxsw_sp2_mr_route { + struct mlxsw_sp2_mr_tcam *mr_tcam; +}; + +static struct mlxsw_sp_acl_ruleset * +mlxsw_sp2_mr_tcam_proto_ruleset(struct mlxsw_sp2_mr_tcam *mr_tcam, + enum mlxsw_sp_l3proto proto) +{ + switch (proto) { + case MLXSW_SP_L3_PROTO_IPV4: + return mr_tcam->ruleset4; + case MLXSW_SP_L3_PROTO_IPV6: + return mr_tcam->ruleset6; + } + return NULL; +} + +static int mlxsw_sp2_mr_tcam_bind_group(struct mlxsw_sp *mlxsw_sp, + enum mlxsw_reg_pemrbt_protocol protocol, + struct mlxsw_sp_acl_ruleset *ruleset) +{ + char pemrbt_pl[MLXSW_REG_PEMRBT_LEN]; + u16 group_id; + + group_id = mlxsw_sp_acl_ruleset_group_id(ruleset); + + mlxsw_reg_pemrbt_pack(pemrbt_pl, protocol, group_id); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(pemrbt), pemrbt_pl); +} + +static const enum mlxsw_afk_element mlxsw_sp2_mr_tcam_usage_ipv4[] = { + MLXSW_AFK_ELEMENT_VIRT_ROUTER_MSB, + MLXSW_AFK_ELEMENT_VIRT_ROUTER_LSB, + MLXSW_AFK_ELEMENT_SRC_IP_0_31, + MLXSW_AFK_ELEMENT_DST_IP_0_31, +}; + +static int mlxsw_sp2_mr_tcam_ipv4_init(struct mlxsw_sp2_mr_tcam *mr_tcam) +{ + struct mlxsw_afk_element_usage elusage; + int err; + + /* Initialize IPv4 ACL group. */ + mlxsw_afk_element_usage_fill(&elusage, + mlxsw_sp2_mr_tcam_usage_ipv4, + ARRAY_SIZE(mlxsw_sp2_mr_tcam_usage_ipv4)); + mr_tcam->ruleset4 = mlxsw_sp_acl_ruleset_get(mr_tcam->mlxsw_sp, + mr_tcam->flow_block, + MLXSW_SP_L3_PROTO_IPV4, + MLXSW_SP_ACL_PROFILE_MR, + &elusage); + + if (IS_ERR(mr_tcam->ruleset4)) + return PTR_ERR(mr_tcam->ruleset4); + + /* MC Router groups should be bound before routes are inserted. */ + err = mlxsw_sp2_mr_tcam_bind_group(mr_tcam->mlxsw_sp, + MLXSW_REG_PEMRBT_PROTO_IPV4, + mr_tcam->ruleset4); + if (err) + goto err_bind_group; + + return 0; + +err_bind_group: + mlxsw_sp_acl_ruleset_put(mr_tcam->mlxsw_sp, mr_tcam->ruleset4); + return err; +} + +static void mlxsw_sp2_mr_tcam_ipv4_fini(struct mlxsw_sp2_mr_tcam *mr_tcam) +{ + mlxsw_sp_acl_ruleset_put(mr_tcam->mlxsw_sp, mr_tcam->ruleset4); +} + +static const enum mlxsw_afk_element mlxsw_sp2_mr_tcam_usage_ipv6[] = { + MLXSW_AFK_ELEMENT_VIRT_ROUTER_MSB, + MLXSW_AFK_ELEMENT_VIRT_ROUTER_LSB, + MLXSW_AFK_ELEMENT_SRC_IP_96_127, + MLXSW_AFK_ELEMENT_SRC_IP_64_95, + MLXSW_AFK_ELEMENT_SRC_IP_32_63, + MLXSW_AFK_ELEMENT_SRC_IP_0_31, + MLXSW_AFK_ELEMENT_DST_IP_96_127, + MLXSW_AFK_ELEMENT_DST_IP_64_95, + MLXSW_AFK_ELEMENT_DST_IP_32_63, + MLXSW_AFK_ELEMENT_DST_IP_0_31, +}; + +static int mlxsw_sp2_mr_tcam_ipv6_init(struct mlxsw_sp2_mr_tcam *mr_tcam) +{ + struct mlxsw_afk_element_usage elusage; + int err; + + /* Initialize IPv6 ACL group */ + mlxsw_afk_element_usage_fill(&elusage, + mlxsw_sp2_mr_tcam_usage_ipv6, + ARRAY_SIZE(mlxsw_sp2_mr_tcam_usage_ipv6)); + mr_tcam->ruleset6 = mlxsw_sp_acl_ruleset_get(mr_tcam->mlxsw_sp, + mr_tcam->flow_block, + MLXSW_SP_L3_PROTO_IPV6, + MLXSW_SP_ACL_PROFILE_MR, + &elusage); + + if (IS_ERR(mr_tcam->ruleset6)) + return PTR_ERR(mr_tcam->ruleset6); + + /* MC Router groups should be bound before routes are inserted. */ + err = mlxsw_sp2_mr_tcam_bind_group(mr_tcam->mlxsw_sp, + MLXSW_REG_PEMRBT_PROTO_IPV6, + mr_tcam->ruleset6); + if (err) + goto err_bind_group; + + return 0; + +err_bind_group: + mlxsw_sp_acl_ruleset_put(mr_tcam->mlxsw_sp, mr_tcam->ruleset6); + return err; +} + +static void mlxsw_sp2_mr_tcam_ipv6_fini(struct mlxsw_sp2_mr_tcam *mr_tcam) +{ + mlxsw_sp_acl_ruleset_put(mr_tcam->mlxsw_sp, mr_tcam->ruleset6); +} + +static void +mlxsw_sp2_mr_tcam_rule_parse4(struct mlxsw_sp_acl_rule_info *rulei, + struct mlxsw_sp_mr_route_key *key) +{ + mlxsw_sp_acl_rulei_keymask_buf(rulei, MLXSW_AFK_ELEMENT_SRC_IP_0_31, + (char *) &key->source.addr4, + (char *) &key->source_mask.addr4, 4); + mlxsw_sp_acl_rulei_keymask_buf(rulei, MLXSW_AFK_ELEMENT_DST_IP_0_31, + (char *) &key->group.addr4, + (char *) &key->group_mask.addr4, 4); +} + +static void +mlxsw_sp2_mr_tcam_rule_parse6(struct mlxsw_sp_acl_rule_info *rulei, + struct mlxsw_sp_mr_route_key *key) +{ + mlxsw_sp_acl_rulei_keymask_buf(rulei, MLXSW_AFK_ELEMENT_SRC_IP_96_127, + &key->source.addr6.s6_addr[0x0], + &key->source_mask.addr6.s6_addr[0x0], 4); + mlxsw_sp_acl_rulei_keymask_buf(rulei, MLXSW_AFK_ELEMENT_SRC_IP_64_95, + &key->source.addr6.s6_addr[0x4], + &key->source_mask.addr6.s6_addr[0x4], 4); + mlxsw_sp_acl_rulei_keymask_buf(rulei, MLXSW_AFK_ELEMENT_SRC_IP_32_63, + &key->source.addr6.s6_addr[0x8], + &key->source_mask.addr6.s6_addr[0x8], 4); + mlxsw_sp_acl_rulei_keymask_buf(rulei, MLXSW_AFK_ELEMENT_SRC_IP_0_31, + &key->source.addr6.s6_addr[0xc], + &key->source_mask.addr6.s6_addr[0xc], 4); + mlxsw_sp_acl_rulei_keymask_buf(rulei, MLXSW_AFK_ELEMENT_DST_IP_96_127, + &key->group.addr6.s6_addr[0x0], + &key->group_mask.addr6.s6_addr[0x0], 4); + mlxsw_sp_acl_rulei_keymask_buf(rulei, MLXSW_AFK_ELEMENT_DST_IP_64_95, + &key->group.addr6.s6_addr[0x4], + &key->group_mask.addr6.s6_addr[0x4], 4); + mlxsw_sp_acl_rulei_keymask_buf(rulei, MLXSW_AFK_ELEMENT_DST_IP_32_63, + &key->group.addr6.s6_addr[0x8], + &key->group_mask.addr6.s6_addr[0x8], 4); + mlxsw_sp_acl_rulei_keymask_buf(rulei, MLXSW_AFK_ELEMENT_DST_IP_0_31, + &key->group.addr6.s6_addr[0xc], + &key->group_mask.addr6.s6_addr[0xc], 4); +} + +static void +mlxsw_sp2_mr_tcam_rule_parse(struct mlxsw_sp_acl_rule *rule, + struct mlxsw_sp_mr_route_key *key, + unsigned int priority) +{ + struct mlxsw_sp_acl_rule_info *rulei; + + rulei = mlxsw_sp_acl_rule_rulei(rule); + rulei->priority = priority; + mlxsw_sp_acl_rulei_keymask_u32(rulei, MLXSW_AFK_ELEMENT_VIRT_ROUTER_LSB, + key->vrid, GENMASK(7, 0)); + mlxsw_sp_acl_rulei_keymask_u32(rulei, + MLXSW_AFK_ELEMENT_VIRT_ROUTER_MSB, + key->vrid >> 8, GENMASK(3, 0)); + switch (key->proto) { + case MLXSW_SP_L3_PROTO_IPV4: + return mlxsw_sp2_mr_tcam_rule_parse4(rulei, key); + case MLXSW_SP_L3_PROTO_IPV6: + return mlxsw_sp2_mr_tcam_rule_parse6(rulei, key); + } +} + +static int +mlxsw_sp2_mr_tcam_route_create(struct mlxsw_sp *mlxsw_sp, void *priv, + void *route_priv, + struct mlxsw_sp_mr_route_key *key, + struct mlxsw_afa_block *afa_block, + enum mlxsw_sp_mr_route_prio prio) +{ + struct mlxsw_sp2_mr_route *mr_route = route_priv; + struct mlxsw_sp2_mr_tcam *mr_tcam = priv; + struct mlxsw_sp_acl_ruleset *ruleset; + struct mlxsw_sp_acl_rule *rule; + int err; + + mr_route->mr_tcam = mr_tcam; + ruleset = mlxsw_sp2_mr_tcam_proto_ruleset(mr_tcam, key->proto); + if (WARN_ON(!ruleset)) + return -EINVAL; + + rule = mlxsw_sp_acl_rule_create(mlxsw_sp, ruleset, + (unsigned long) route_priv, afa_block, + NULL); + if (IS_ERR(rule)) + return PTR_ERR(rule); + + mlxsw_sp2_mr_tcam_rule_parse(rule, key, prio); + err = mlxsw_sp_acl_rule_add(mlxsw_sp, rule); + if (err) + goto err_rule_add; + + return 0; + +err_rule_add: + mlxsw_sp_acl_rule_destroy(mlxsw_sp, rule); + return err; +} + +static void +mlxsw_sp2_mr_tcam_route_destroy(struct mlxsw_sp *mlxsw_sp, void *priv, + void *route_priv, + struct mlxsw_sp_mr_route_key *key) +{ + struct mlxsw_sp2_mr_tcam *mr_tcam = priv; + struct mlxsw_sp_acl_ruleset *ruleset; + struct mlxsw_sp_acl_rule *rule; + + ruleset = mlxsw_sp2_mr_tcam_proto_ruleset(mr_tcam, key->proto); + if (WARN_ON(!ruleset)) + return; + + rule = mlxsw_sp_acl_rule_lookup(mlxsw_sp, ruleset, + (unsigned long) route_priv); + if (WARN_ON(!rule)) + return; + + mlxsw_sp_acl_rule_del(mlxsw_sp, rule); + mlxsw_sp_acl_rule_destroy(mlxsw_sp, rule); +} + +static int +mlxsw_sp2_mr_tcam_route_update(struct mlxsw_sp *mlxsw_sp, + void *route_priv, + struct mlxsw_sp_mr_route_key *key, + struct mlxsw_afa_block *afa_block) +{ + struct mlxsw_sp2_mr_route *mr_route = route_priv; + struct mlxsw_sp2_mr_tcam *mr_tcam = mr_route->mr_tcam; + struct mlxsw_sp_acl_ruleset *ruleset; + struct mlxsw_sp_acl_rule *rule; + + ruleset = mlxsw_sp2_mr_tcam_proto_ruleset(mr_tcam, key->proto); + if (WARN_ON(!ruleset)) + return -EINVAL; + + rule = mlxsw_sp_acl_rule_lookup(mlxsw_sp, ruleset, + (unsigned long) route_priv); + if (WARN_ON(!rule)) + return -EINVAL; + + return mlxsw_sp_acl_rule_action_replace(mlxsw_sp, rule, afa_block); +} + +static int mlxsw_sp2_mr_tcam_init(struct mlxsw_sp *mlxsw_sp, void *priv) +{ + struct mlxsw_sp2_mr_tcam *mr_tcam = priv; + int err; + + mr_tcam->mlxsw_sp = mlxsw_sp; + mr_tcam->flow_block = mlxsw_sp_flow_block_create(mlxsw_sp, NULL); + if (!mr_tcam->flow_block) + return -ENOMEM; + + err = mlxsw_sp2_mr_tcam_ipv4_init(mr_tcam); + if (err) + goto err_ipv4_init; + + err = mlxsw_sp2_mr_tcam_ipv6_init(mr_tcam); + if (err) + goto err_ipv6_init; + + return 0; + +err_ipv6_init: + mlxsw_sp2_mr_tcam_ipv4_fini(mr_tcam); +err_ipv4_init: + mlxsw_sp_flow_block_destroy(mr_tcam->flow_block); + return err; +} + +static void mlxsw_sp2_mr_tcam_fini(void *priv) +{ + struct mlxsw_sp2_mr_tcam *mr_tcam = priv; + + mlxsw_sp2_mr_tcam_ipv6_fini(mr_tcam); + mlxsw_sp2_mr_tcam_ipv4_fini(mr_tcam); + mlxsw_sp_flow_block_destroy(mr_tcam->flow_block); +} + +const struct mlxsw_sp_mr_tcam_ops mlxsw_sp2_mr_tcam_ops = { + .priv_size = sizeof(struct mlxsw_sp2_mr_tcam), + .init = mlxsw_sp2_mr_tcam_init, + .fini = mlxsw_sp2_mr_tcam_fini, + .route_priv_size = sizeof(struct mlxsw_sp2_mr_route), + .route_create = mlxsw_sp2_mr_tcam_route_create, + .route_destroy = mlxsw_sp2_mr_tcam_route_destroy, + .route_update = mlxsw_sp2_mr_tcam_route_update, +}; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c new file mode 100644 index 000000000..6c5af0185 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c @@ -0,0 +1,1124 @@ +// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 +/* Copyright (c) 2017-2018 Mellanox Technologies. All rights reserved */ + +#include <linux/kernel.h> +#include <linux/slab.h> +#include <linux/errno.h> +#include <linux/list.h> +#include <linux/string.h> +#include <linux/rhashtable.h> +#include <linux/netdevice.h> +#include <linux/mutex.h> +#include <net/net_namespace.h> +#include <net/tc_act/tc_vlan.h> + +#include "reg.h" +#include "core.h" +#include "resources.h" +#include "spectrum.h" +#include "core_acl_flex_keys.h" +#include "core_acl_flex_actions.h" +#include "spectrum_acl_tcam.h" + +struct mlxsw_sp_acl { + struct mlxsw_sp *mlxsw_sp; + struct mlxsw_afk *afk; + struct mlxsw_sp_fid *dummy_fid; + struct rhashtable ruleset_ht; + struct list_head rules; + struct mutex rules_lock; /* Protects rules list */ + struct { + struct delayed_work dw; + unsigned long interval; /* ms */ +#define MLXSW_SP_ACL_RULE_ACTIVITY_UPDATE_PERIOD_MS 1000 + } rule_activity_update; + struct mlxsw_sp_acl_tcam tcam; +}; + +struct mlxsw_afk *mlxsw_sp_acl_afk(struct mlxsw_sp_acl *acl) +{ + return acl->afk; +} + +struct mlxsw_sp_acl_ruleset_ht_key { + struct mlxsw_sp_flow_block *block; + u32 chain_index; + const struct mlxsw_sp_acl_profile_ops *ops; +}; + +struct mlxsw_sp_acl_ruleset { + struct rhash_head ht_node; /* Member of acl HT */ + struct mlxsw_sp_acl_ruleset_ht_key ht_key; + struct rhashtable rule_ht; + unsigned int ref_count; + unsigned int min_prio; + unsigned int max_prio; + unsigned long priv[]; + /* priv has to be always the last item */ +}; + +struct mlxsw_sp_acl_rule { + struct rhash_head ht_node; /* Member of rule HT */ + struct list_head list; + unsigned long cookie; /* HT key */ + struct mlxsw_sp_acl_ruleset *ruleset; + struct mlxsw_sp_acl_rule_info *rulei; + u64 last_used; + u64 last_packets; + u64 last_bytes; + u64 last_drops; + unsigned long priv[]; + /* priv has to be always the last item */ +}; + +static const struct rhashtable_params mlxsw_sp_acl_ruleset_ht_params = { + .key_len = sizeof(struct mlxsw_sp_acl_ruleset_ht_key), + .key_offset = offsetof(struct mlxsw_sp_acl_ruleset, ht_key), + .head_offset = offsetof(struct mlxsw_sp_acl_ruleset, ht_node), + .automatic_shrinking = true, +}; + +static const struct rhashtable_params mlxsw_sp_acl_rule_ht_params = { + .key_len = sizeof(unsigned long), + .key_offset = offsetof(struct mlxsw_sp_acl_rule, cookie), + .head_offset = offsetof(struct mlxsw_sp_acl_rule, ht_node), + .automatic_shrinking = true, +}; + +struct mlxsw_sp_fid *mlxsw_sp_acl_dummy_fid(struct mlxsw_sp *mlxsw_sp) +{ + return mlxsw_sp->acl->dummy_fid; +} + +static bool +mlxsw_sp_acl_ruleset_is_singular(const struct mlxsw_sp_acl_ruleset *ruleset) +{ + /* We hold a reference on ruleset ourselves */ + return ruleset->ref_count == 2; +} + +int mlxsw_sp_acl_ruleset_bind(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_flow_block *block, + struct mlxsw_sp_flow_block_binding *binding) +{ + struct mlxsw_sp_acl_ruleset *ruleset = block->ruleset_zero; + const struct mlxsw_sp_acl_profile_ops *ops = ruleset->ht_key.ops; + + return ops->ruleset_bind(mlxsw_sp, ruleset->priv, + binding->mlxsw_sp_port, binding->ingress); +} + +void mlxsw_sp_acl_ruleset_unbind(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_flow_block *block, + struct mlxsw_sp_flow_block_binding *binding) +{ + struct mlxsw_sp_acl_ruleset *ruleset = block->ruleset_zero; + const struct mlxsw_sp_acl_profile_ops *ops = ruleset->ht_key.ops; + + ops->ruleset_unbind(mlxsw_sp, ruleset->priv, + binding->mlxsw_sp_port, binding->ingress); +} + +static int +mlxsw_sp_acl_ruleset_block_bind(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_ruleset *ruleset, + struct mlxsw_sp_flow_block *block) +{ + struct mlxsw_sp_flow_block_binding *binding; + int err; + + block->ruleset_zero = ruleset; + list_for_each_entry(binding, &block->binding_list, list) { + err = mlxsw_sp_acl_ruleset_bind(mlxsw_sp, block, binding); + if (err) + goto rollback; + } + return 0; + +rollback: + list_for_each_entry_continue_reverse(binding, &block->binding_list, + list) + mlxsw_sp_acl_ruleset_unbind(mlxsw_sp, block, binding); + block->ruleset_zero = NULL; + + return err; +} + +static void +mlxsw_sp_acl_ruleset_block_unbind(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_ruleset *ruleset, + struct mlxsw_sp_flow_block *block) +{ + struct mlxsw_sp_flow_block_binding *binding; + + list_for_each_entry(binding, &block->binding_list, list) + mlxsw_sp_acl_ruleset_unbind(mlxsw_sp, block, binding); + block->ruleset_zero = NULL; +} + +static struct mlxsw_sp_acl_ruleset * +mlxsw_sp_acl_ruleset_create(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_flow_block *block, u32 chain_index, + const struct mlxsw_sp_acl_profile_ops *ops, + struct mlxsw_afk_element_usage *tmplt_elusage) +{ + struct mlxsw_sp_acl *acl = mlxsw_sp->acl; + struct mlxsw_sp_acl_ruleset *ruleset; + size_t alloc_size; + int err; + + alloc_size = sizeof(*ruleset) + ops->ruleset_priv_size; + ruleset = kzalloc(alloc_size, GFP_KERNEL); + if (!ruleset) + return ERR_PTR(-ENOMEM); + ruleset->ref_count = 1; + ruleset->ht_key.block = block; + ruleset->ht_key.chain_index = chain_index; + ruleset->ht_key.ops = ops; + + err = rhashtable_init(&ruleset->rule_ht, &mlxsw_sp_acl_rule_ht_params); + if (err) + goto err_rhashtable_init; + + err = ops->ruleset_add(mlxsw_sp, &acl->tcam, ruleset->priv, + tmplt_elusage, &ruleset->min_prio, + &ruleset->max_prio); + if (err) + goto err_ops_ruleset_add; + + err = rhashtable_insert_fast(&acl->ruleset_ht, &ruleset->ht_node, + mlxsw_sp_acl_ruleset_ht_params); + if (err) + goto err_ht_insert; + + return ruleset; + +err_ht_insert: + ops->ruleset_del(mlxsw_sp, ruleset->priv); +err_ops_ruleset_add: + rhashtable_destroy(&ruleset->rule_ht); +err_rhashtable_init: + kfree(ruleset); + return ERR_PTR(err); +} + +static void mlxsw_sp_acl_ruleset_destroy(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_ruleset *ruleset) +{ + const struct mlxsw_sp_acl_profile_ops *ops = ruleset->ht_key.ops; + struct mlxsw_sp_acl *acl = mlxsw_sp->acl; + + rhashtable_remove_fast(&acl->ruleset_ht, &ruleset->ht_node, + mlxsw_sp_acl_ruleset_ht_params); + ops->ruleset_del(mlxsw_sp, ruleset->priv); + rhashtable_destroy(&ruleset->rule_ht); + kfree(ruleset); +} + +static void mlxsw_sp_acl_ruleset_ref_inc(struct mlxsw_sp_acl_ruleset *ruleset) +{ + ruleset->ref_count++; +} + +static void mlxsw_sp_acl_ruleset_ref_dec(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_ruleset *ruleset) +{ + if (--ruleset->ref_count) + return; + mlxsw_sp_acl_ruleset_destroy(mlxsw_sp, ruleset); +} + +static struct mlxsw_sp_acl_ruleset * +__mlxsw_sp_acl_ruleset_lookup(struct mlxsw_sp_acl *acl, + struct mlxsw_sp_flow_block *block, u32 chain_index, + const struct mlxsw_sp_acl_profile_ops *ops) +{ + struct mlxsw_sp_acl_ruleset_ht_key ht_key; + + memset(&ht_key, 0, sizeof(ht_key)); + ht_key.block = block; + ht_key.chain_index = chain_index; + ht_key.ops = ops; + return rhashtable_lookup_fast(&acl->ruleset_ht, &ht_key, + mlxsw_sp_acl_ruleset_ht_params); +} + +struct mlxsw_sp_acl_ruleset * +mlxsw_sp_acl_ruleset_lookup(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_flow_block *block, u32 chain_index, + enum mlxsw_sp_acl_profile profile) +{ + const struct mlxsw_sp_acl_profile_ops *ops; + struct mlxsw_sp_acl *acl = mlxsw_sp->acl; + struct mlxsw_sp_acl_ruleset *ruleset; + + ops = mlxsw_sp_acl_tcam_profile_ops(mlxsw_sp, profile); + if (!ops) + return ERR_PTR(-EINVAL); + ruleset = __mlxsw_sp_acl_ruleset_lookup(acl, block, chain_index, ops); + if (!ruleset) + return ERR_PTR(-ENOENT); + return ruleset; +} + +struct mlxsw_sp_acl_ruleset * +mlxsw_sp_acl_ruleset_get(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_flow_block *block, u32 chain_index, + enum mlxsw_sp_acl_profile profile, + struct mlxsw_afk_element_usage *tmplt_elusage) +{ + const struct mlxsw_sp_acl_profile_ops *ops; + struct mlxsw_sp_acl *acl = mlxsw_sp->acl; + struct mlxsw_sp_acl_ruleset *ruleset; + + ops = mlxsw_sp_acl_tcam_profile_ops(mlxsw_sp, profile); + if (!ops) + return ERR_PTR(-EINVAL); + + ruleset = __mlxsw_sp_acl_ruleset_lookup(acl, block, chain_index, ops); + if (ruleset) { + mlxsw_sp_acl_ruleset_ref_inc(ruleset); + return ruleset; + } + return mlxsw_sp_acl_ruleset_create(mlxsw_sp, block, chain_index, ops, + tmplt_elusage); +} + +void mlxsw_sp_acl_ruleset_put(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_ruleset *ruleset) +{ + mlxsw_sp_acl_ruleset_ref_dec(mlxsw_sp, ruleset); +} + +u16 mlxsw_sp_acl_ruleset_group_id(struct mlxsw_sp_acl_ruleset *ruleset) +{ + const struct mlxsw_sp_acl_profile_ops *ops = ruleset->ht_key.ops; + + return ops->ruleset_group_id(ruleset->priv); +} + +void mlxsw_sp_acl_ruleset_prio_get(struct mlxsw_sp_acl_ruleset *ruleset, + unsigned int *p_min_prio, + unsigned int *p_max_prio) +{ + *p_min_prio = ruleset->min_prio; + *p_max_prio = ruleset->max_prio; +} + +struct mlxsw_sp_acl_rule_info * +mlxsw_sp_acl_rulei_create(struct mlxsw_sp_acl *acl, + struct mlxsw_afa_block *afa_block) +{ + struct mlxsw_sp_acl_rule_info *rulei; + int err; + + rulei = kzalloc(sizeof(*rulei), GFP_KERNEL); + if (!rulei) + return ERR_PTR(-ENOMEM); + + if (afa_block) { + rulei->act_block = afa_block; + return rulei; + } + + rulei->act_block = mlxsw_afa_block_create(acl->mlxsw_sp->afa); + if (IS_ERR(rulei->act_block)) { + err = PTR_ERR(rulei->act_block); + goto err_afa_block_create; + } + rulei->action_created = 1; + return rulei; + +err_afa_block_create: + kfree(rulei); + return ERR_PTR(err); +} + +void mlxsw_sp_acl_rulei_destroy(struct mlxsw_sp_acl_rule_info *rulei) +{ + if (rulei->action_created) + mlxsw_afa_block_destroy(rulei->act_block); + kfree(rulei); +} + +int mlxsw_sp_acl_rulei_commit(struct mlxsw_sp_acl_rule_info *rulei) +{ + return mlxsw_afa_block_commit(rulei->act_block); +} + +void mlxsw_sp_acl_rulei_priority(struct mlxsw_sp_acl_rule_info *rulei, + unsigned int priority) +{ + rulei->priority = priority; +} + +void mlxsw_sp_acl_rulei_keymask_u32(struct mlxsw_sp_acl_rule_info *rulei, + enum mlxsw_afk_element element, + u32 key_value, u32 mask_value) +{ + mlxsw_afk_values_add_u32(&rulei->values, element, + key_value, mask_value); +} + +void mlxsw_sp_acl_rulei_keymask_buf(struct mlxsw_sp_acl_rule_info *rulei, + enum mlxsw_afk_element element, + const char *key_value, + const char *mask_value, unsigned int len) +{ + mlxsw_afk_values_add_buf(&rulei->values, element, + key_value, mask_value, len); +} + +int mlxsw_sp_acl_rulei_act_continue(struct mlxsw_sp_acl_rule_info *rulei) +{ + return mlxsw_afa_block_continue(rulei->act_block); +} + +int mlxsw_sp_acl_rulei_act_jump(struct mlxsw_sp_acl_rule_info *rulei, + u16 group_id) +{ + return mlxsw_afa_block_jump(rulei->act_block, group_id); +} + +int mlxsw_sp_acl_rulei_act_terminate(struct mlxsw_sp_acl_rule_info *rulei) +{ + return mlxsw_afa_block_terminate(rulei->act_block); +} + +int mlxsw_sp_acl_rulei_act_drop(struct mlxsw_sp_acl_rule_info *rulei, + bool ingress, + const struct flow_action_cookie *fa_cookie, + struct netlink_ext_ack *extack) +{ + return mlxsw_afa_block_append_drop(rulei->act_block, ingress, + fa_cookie, extack); +} + +int mlxsw_sp_acl_rulei_act_trap(struct mlxsw_sp_acl_rule_info *rulei) +{ + return mlxsw_afa_block_append_trap(rulei->act_block, + MLXSW_TRAP_ID_ACL0); +} + +int mlxsw_sp_acl_rulei_act_fwd(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_rule_info *rulei, + struct net_device *out_dev, + struct netlink_ext_ack *extack) +{ + struct mlxsw_sp_port *mlxsw_sp_port; + u16 local_port; + bool in_port; + + if (out_dev) { + if (!mlxsw_sp_port_dev_check(out_dev)) { + NL_SET_ERR_MSG_MOD(extack, "Invalid output device"); + return -EINVAL; + } + mlxsw_sp_port = netdev_priv(out_dev); + if (mlxsw_sp_port->mlxsw_sp != mlxsw_sp) { + NL_SET_ERR_MSG_MOD(extack, "Invalid output device"); + return -EINVAL; + } + local_port = mlxsw_sp_port->local_port; + in_port = false; + } else { + /* If out_dev is NULL, the caller wants to + * set forward to ingress port. + */ + local_port = 0; + in_port = true; + } + return mlxsw_afa_block_append_fwd(rulei->act_block, + local_port, in_port, extack); +} + +int mlxsw_sp_acl_rulei_act_mirror(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_rule_info *rulei, + struct mlxsw_sp_flow_block *block, + struct net_device *out_dev, + struct netlink_ext_ack *extack) +{ + struct mlxsw_sp_flow_block_binding *binding; + struct mlxsw_sp_port *in_port; + + if (!list_is_singular(&block->binding_list)) { + NL_SET_ERR_MSG_MOD(extack, "Only a single mirror source is allowed"); + return -EOPNOTSUPP; + } + binding = list_first_entry(&block->binding_list, + struct mlxsw_sp_flow_block_binding, list); + in_port = binding->mlxsw_sp_port; + + return mlxsw_afa_block_append_mirror(rulei->act_block, + in_port->local_port, + out_dev, + binding->ingress, + extack); +} + +int mlxsw_sp_acl_rulei_act_vlan(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_rule_info *rulei, + u32 action, u16 vid, u16 proto, u8 prio, + struct netlink_ext_ack *extack) +{ + u8 ethertype; + + if (action == FLOW_ACTION_VLAN_MANGLE) { + switch (proto) { + case ETH_P_8021Q: + ethertype = 0; + break; + case ETH_P_8021AD: + ethertype = 1; + break; + default: + NL_SET_ERR_MSG_MOD(extack, "Unsupported VLAN protocol"); + dev_err(mlxsw_sp->bus_info->dev, "Unsupported VLAN protocol %#04x\n", + proto); + return -EINVAL; + } + + return mlxsw_afa_block_append_vlan_modify(rulei->act_block, + vid, prio, ethertype, + extack); + } else { + NL_SET_ERR_MSG_MOD(extack, "Unsupported VLAN action"); + dev_err(mlxsw_sp->bus_info->dev, "Unsupported VLAN action\n"); + return -EINVAL; + } +} + +int mlxsw_sp_acl_rulei_act_priority(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_rule_info *rulei, + u32 prio, struct netlink_ext_ack *extack) +{ + /* Even though both Linux and Spectrum switches support 16 priorities, + * spectrum_qdisc only processes the first eight priomap elements, and + * the DCB and PFC features are tied to 8 priorities as well. Therefore + * bounce attempts to prioritize packets to higher priorities. + */ + if (prio >= IEEE_8021QAZ_MAX_TCS) { + NL_SET_ERR_MSG_MOD(extack, "Only priorities 0..7 are supported"); + return -EINVAL; + } + return mlxsw_afa_block_append_qos_switch_prio(rulei->act_block, prio, + extack); +} + +struct mlxsw_sp_acl_mangle_action { + enum flow_action_mangle_base htype; + /* Offset is u32-aligned. */ + u32 offset; + /* Mask bits are unset for the modified field. */ + u32 mask; + /* Shift required to extract the set value. */ + u32 shift; + enum mlxsw_sp_acl_mangle_field field; +}; + +#define MLXSW_SP_ACL_MANGLE_ACTION(_htype, _offset, _mask, _shift, _field) \ + { \ + .htype = _htype, \ + .offset = _offset, \ + .mask = _mask, \ + .shift = _shift, \ + .field = MLXSW_SP_ACL_MANGLE_FIELD_##_field, \ + } + +#define MLXSW_SP_ACL_MANGLE_ACTION_IP4(_offset, _mask, _shift, _field) \ + MLXSW_SP_ACL_MANGLE_ACTION(FLOW_ACT_MANGLE_HDR_TYPE_IP4, \ + _offset, _mask, _shift, _field) + +#define MLXSW_SP_ACL_MANGLE_ACTION_IP6(_offset, _mask, _shift, _field) \ + MLXSW_SP_ACL_MANGLE_ACTION(FLOW_ACT_MANGLE_HDR_TYPE_IP6, \ + _offset, _mask, _shift, _field) + +#define MLXSW_SP_ACL_MANGLE_ACTION_TCP(_offset, _mask, _shift, _field) \ + MLXSW_SP_ACL_MANGLE_ACTION(FLOW_ACT_MANGLE_HDR_TYPE_TCP, _offset, _mask, _shift, _field) + +#define MLXSW_SP_ACL_MANGLE_ACTION_UDP(_offset, _mask, _shift, _field) \ + MLXSW_SP_ACL_MANGLE_ACTION(FLOW_ACT_MANGLE_HDR_TYPE_UDP, _offset, _mask, _shift, _field) + +static struct mlxsw_sp_acl_mangle_action mlxsw_sp_acl_mangle_actions[] = { + MLXSW_SP_ACL_MANGLE_ACTION_IP4(0, 0xff00ffff, 16, IP_DSFIELD), + MLXSW_SP_ACL_MANGLE_ACTION_IP4(0, 0xff03ffff, 18, IP_DSCP), + MLXSW_SP_ACL_MANGLE_ACTION_IP4(0, 0xfffcffff, 16, IP_ECN), + + MLXSW_SP_ACL_MANGLE_ACTION_IP6(0, 0xf00fffff, 20, IP_DSFIELD), + MLXSW_SP_ACL_MANGLE_ACTION_IP6(0, 0xf03fffff, 22, IP_DSCP), + MLXSW_SP_ACL_MANGLE_ACTION_IP6(0, 0xffcfffff, 20, IP_ECN), + + MLXSW_SP_ACL_MANGLE_ACTION_TCP(0, 0x0000ffff, 16, IP_SPORT), + MLXSW_SP_ACL_MANGLE_ACTION_TCP(0, 0xffff0000, 0, IP_DPORT), + + MLXSW_SP_ACL_MANGLE_ACTION_UDP(0, 0x0000ffff, 16, IP_SPORT), + MLXSW_SP_ACL_MANGLE_ACTION_UDP(0, 0xffff0000, 0, IP_DPORT), + + MLXSW_SP_ACL_MANGLE_ACTION_IP4(12, 0x00000000, 0, IP4_SIP), + MLXSW_SP_ACL_MANGLE_ACTION_IP4(16, 0x00000000, 0, IP4_DIP), + + MLXSW_SP_ACL_MANGLE_ACTION_IP6(8, 0x00000000, 0, IP6_SIP_1), + MLXSW_SP_ACL_MANGLE_ACTION_IP6(12, 0x00000000, 0, IP6_SIP_2), + MLXSW_SP_ACL_MANGLE_ACTION_IP6(16, 0x00000000, 0, IP6_SIP_3), + MLXSW_SP_ACL_MANGLE_ACTION_IP6(20, 0x00000000, 0, IP6_SIP_4), + MLXSW_SP_ACL_MANGLE_ACTION_IP6(24, 0x00000000, 0, IP6_DIP_1), + MLXSW_SP_ACL_MANGLE_ACTION_IP6(28, 0x00000000, 0, IP6_DIP_2), + MLXSW_SP_ACL_MANGLE_ACTION_IP6(32, 0x00000000, 0, IP6_DIP_3), + MLXSW_SP_ACL_MANGLE_ACTION_IP6(36, 0x00000000, 0, IP6_DIP_4), +}; + +static int +mlxsw_sp_acl_rulei_act_mangle_field(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_rule_info *rulei, + struct mlxsw_sp_acl_mangle_action *mact, + u32 val, struct netlink_ext_ack *extack) +{ + switch (mact->field) { + case MLXSW_SP_ACL_MANGLE_FIELD_IP_DSFIELD: + return mlxsw_afa_block_append_qos_dsfield(rulei->act_block, + val, extack); + case MLXSW_SP_ACL_MANGLE_FIELD_IP_DSCP: + return mlxsw_afa_block_append_qos_dscp(rulei->act_block, + val, extack); + case MLXSW_SP_ACL_MANGLE_FIELD_IP_ECN: + return mlxsw_afa_block_append_qos_ecn(rulei->act_block, + val, extack); + default: + return -EOPNOTSUPP; + } +} + +static int mlxsw_sp1_acl_rulei_act_mangle_field(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_rule_info *rulei, + struct mlxsw_sp_acl_mangle_action *mact, + u32 val, struct netlink_ext_ack *extack) +{ + int err; + + err = mlxsw_sp_acl_rulei_act_mangle_field(mlxsw_sp, rulei, mact, val, extack); + if (err != -EOPNOTSUPP) + return err; + + NL_SET_ERR_MSG_MOD(extack, "Unsupported mangle field"); + return err; +} + +static int +mlxsw_sp2_acl_rulei_act_mangle_field_ip_odd(struct mlxsw_sp_acl_rule_info *rulei, + enum mlxsw_sp_acl_mangle_field field, + u32 val, struct netlink_ext_ack *extack) +{ + if (!rulei->ipv6_valid) { + rulei->ipv6.prev_val = val; + rulei->ipv6_valid = true; + rulei->ipv6.prev_field = field; + return 0; + } + + NL_SET_ERR_MSG_MOD(extack, "Unsupported mangle field order"); + return -EOPNOTSUPP; +} + +static int mlxsw_sp2_acl_rulei_act_mangle_field(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_rule_info *rulei, + struct mlxsw_sp_acl_mangle_action *mact, + u32 val, struct netlink_ext_ack *extack) +{ + int err; + + err = mlxsw_sp_acl_rulei_act_mangle_field(mlxsw_sp, rulei, mact, val, extack); + if (err != -EOPNOTSUPP) + return err; + + switch (mact->field) { + case MLXSW_SP_ACL_MANGLE_FIELD_IP_SPORT: + return mlxsw_afa_block_append_l4port(rulei->act_block, false, val, extack); + case MLXSW_SP_ACL_MANGLE_FIELD_IP_DPORT: + return mlxsw_afa_block_append_l4port(rulei->act_block, true, val, extack); + /* IPv4 fields */ + case MLXSW_SP_ACL_MANGLE_FIELD_IP4_SIP: + return mlxsw_afa_block_append_ip(rulei->act_block, false, + true, val, 0, extack); + case MLXSW_SP_ACL_MANGLE_FIELD_IP4_DIP: + return mlxsw_afa_block_append_ip(rulei->act_block, true, + true, val, 0, extack); + /* IPv6 fields */ + case MLXSW_SP_ACL_MANGLE_FIELD_IP6_SIP_1: + case MLXSW_SP_ACL_MANGLE_FIELD_IP6_SIP_3: + case MLXSW_SP_ACL_MANGLE_FIELD_IP6_DIP_1: + case MLXSW_SP_ACL_MANGLE_FIELD_IP6_DIP_3: + return mlxsw_sp2_acl_rulei_act_mangle_field_ip_odd(rulei, + mact->field, + val, extack); + case MLXSW_SP_ACL_MANGLE_FIELD_IP6_SIP_2: + if (rulei->ipv6_valid && + rulei->ipv6.prev_field == MLXSW_SP_ACL_MANGLE_FIELD_IP6_SIP_1) { + rulei->ipv6_valid = false; + return mlxsw_afa_block_append_ip(rulei->act_block, + false, false, val, + rulei->ipv6.prev_val, + extack); + } + break; + case MLXSW_SP_ACL_MANGLE_FIELD_IP6_SIP_4: + if (rulei->ipv6_valid && + rulei->ipv6.prev_field == MLXSW_SP_ACL_MANGLE_FIELD_IP6_SIP_3) { + rulei->ipv6_valid = false; + return mlxsw_afa_block_append_ip(rulei->act_block, + false, true, val, + rulei->ipv6.prev_val, + extack); + } + break; + case MLXSW_SP_ACL_MANGLE_FIELD_IP6_DIP_2: + if (rulei->ipv6_valid && + rulei->ipv6.prev_field == MLXSW_SP_ACL_MANGLE_FIELD_IP6_DIP_1) { + rulei->ipv6_valid = false; + return mlxsw_afa_block_append_ip(rulei->act_block, + true, false, val, + rulei->ipv6.prev_val, + extack); + } + break; + case MLXSW_SP_ACL_MANGLE_FIELD_IP6_DIP_4: + if (rulei->ipv6_valid && + rulei->ipv6.prev_field == MLXSW_SP_ACL_MANGLE_FIELD_IP6_DIP_3) { + rulei->ipv6_valid = false; + return mlxsw_afa_block_append_ip(rulei->act_block, + true, true, val, + rulei->ipv6.prev_val, + extack); + } + break; + default: + break; + } + + NL_SET_ERR_MSG_MOD(extack, "Unsupported mangle field"); + return err; +} + +int mlxsw_sp_acl_rulei_act_mangle(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_rule_info *rulei, + enum flow_action_mangle_base htype, + u32 offset, u32 mask, u32 val, + struct netlink_ext_ack *extack) +{ + const struct mlxsw_sp_acl_rulei_ops *acl_rulei_ops = mlxsw_sp->acl_rulei_ops; + struct mlxsw_sp_acl_mangle_action *mact; + size_t i; + + for (i = 0; i < ARRAY_SIZE(mlxsw_sp_acl_mangle_actions); ++i) { + mact = &mlxsw_sp_acl_mangle_actions[i]; + if (mact->htype == htype && + mact->offset == offset && + mact->mask == mask) { + val >>= mact->shift; + return acl_rulei_ops->act_mangle_field(mlxsw_sp, + rulei, mact, + val, extack); + } + } + + NL_SET_ERR_MSG_MOD(extack, "Unknown mangle field"); + return -EINVAL; +} + +int mlxsw_sp_acl_rulei_act_police(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_rule_info *rulei, + u32 index, u64 rate_bytes_ps, + u32 burst, struct netlink_ext_ack *extack) +{ + int err; + + err = mlxsw_afa_block_append_police(rulei->act_block, index, + rate_bytes_ps, burst, + &rulei->policer_index, extack); + if (err) + return err; + + rulei->policer_index_valid = true; + + return 0; +} + +int mlxsw_sp_acl_rulei_act_count(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_rule_info *rulei, + struct netlink_ext_ack *extack) +{ + int err; + + err = mlxsw_afa_block_append_counter(rulei->act_block, + &rulei->counter_index, extack); + if (err) + return err; + rulei->counter_valid = true; + return 0; +} + +int mlxsw_sp_acl_rulei_act_fid_set(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_rule_info *rulei, + u16 fid, struct netlink_ext_ack *extack) +{ + return mlxsw_afa_block_append_fid_set(rulei->act_block, fid, extack); +} + +int mlxsw_sp_acl_rulei_act_sample(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_rule_info *rulei, + struct mlxsw_sp_flow_block *block, + struct psample_group *psample_group, u32 rate, + u32 trunc_size, bool truncate, + struct netlink_ext_ack *extack) +{ + struct mlxsw_sp_flow_block_binding *binding; + struct mlxsw_sp_port *mlxsw_sp_port; + + if (!list_is_singular(&block->binding_list)) { + NL_SET_ERR_MSG_MOD(extack, "Only a single sampling source is allowed"); + return -EOPNOTSUPP; + } + binding = list_first_entry(&block->binding_list, + struct mlxsw_sp_flow_block_binding, list); + mlxsw_sp_port = binding->mlxsw_sp_port; + + return mlxsw_afa_block_append_sampler(rulei->act_block, + mlxsw_sp_port->local_port, + psample_group, rate, trunc_size, + truncate, binding->ingress, + extack); +} + +struct mlxsw_sp_acl_rule * +mlxsw_sp_acl_rule_create(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_ruleset *ruleset, + unsigned long cookie, + struct mlxsw_afa_block *afa_block, + struct netlink_ext_ack *extack) +{ + const struct mlxsw_sp_acl_profile_ops *ops = ruleset->ht_key.ops; + struct mlxsw_sp_acl_rule *rule; + int err; + + mlxsw_sp_acl_ruleset_ref_inc(ruleset); + rule = kzalloc(sizeof(*rule) + ops->rule_priv_size, + GFP_KERNEL); + if (!rule) { + err = -ENOMEM; + goto err_alloc; + } + rule->cookie = cookie; + rule->ruleset = ruleset; + + rule->rulei = mlxsw_sp_acl_rulei_create(mlxsw_sp->acl, afa_block); + if (IS_ERR(rule->rulei)) { + err = PTR_ERR(rule->rulei); + goto err_rulei_create; + } + + return rule; + +err_rulei_create: + kfree(rule); +err_alloc: + mlxsw_sp_acl_ruleset_ref_dec(mlxsw_sp, ruleset); + return ERR_PTR(err); +} + +void mlxsw_sp_acl_rule_destroy(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_rule *rule) +{ + struct mlxsw_sp_acl_ruleset *ruleset = rule->ruleset; + + mlxsw_sp_acl_rulei_destroy(rule->rulei); + kfree(rule); + mlxsw_sp_acl_ruleset_ref_dec(mlxsw_sp, ruleset); +} + +int mlxsw_sp_acl_rule_add(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_rule *rule) +{ + struct mlxsw_sp_acl_ruleset *ruleset = rule->ruleset; + const struct mlxsw_sp_acl_profile_ops *ops = ruleset->ht_key.ops; + struct mlxsw_sp_flow_block *block = ruleset->ht_key.block; + int err; + + err = ops->rule_add(mlxsw_sp, ruleset->priv, rule->priv, rule->rulei); + if (err) + return err; + + err = rhashtable_insert_fast(&ruleset->rule_ht, &rule->ht_node, + mlxsw_sp_acl_rule_ht_params); + if (err) + goto err_rhashtable_insert; + + if (!ruleset->ht_key.chain_index && + mlxsw_sp_acl_ruleset_is_singular(ruleset)) { + /* We only need ruleset with chain index 0, the implicit + * one, to be directly bound to device. The rest of the + * rulesets are bound by "Goto action set". + */ + err = mlxsw_sp_acl_ruleset_block_bind(mlxsw_sp, ruleset, block); + if (err) + goto err_ruleset_block_bind; + } + + mutex_lock(&mlxsw_sp->acl->rules_lock); + list_add_tail(&rule->list, &mlxsw_sp->acl->rules); + mutex_unlock(&mlxsw_sp->acl->rules_lock); + block->rule_count++; + block->ingress_blocker_rule_count += rule->rulei->ingress_bind_blocker; + block->egress_blocker_rule_count += rule->rulei->egress_bind_blocker; + return 0; + +err_ruleset_block_bind: + rhashtable_remove_fast(&ruleset->rule_ht, &rule->ht_node, + mlxsw_sp_acl_rule_ht_params); +err_rhashtable_insert: + ops->rule_del(mlxsw_sp, rule->priv); + return err; +} + +void mlxsw_sp_acl_rule_del(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_rule *rule) +{ + struct mlxsw_sp_acl_ruleset *ruleset = rule->ruleset; + const struct mlxsw_sp_acl_profile_ops *ops = ruleset->ht_key.ops; + struct mlxsw_sp_flow_block *block = ruleset->ht_key.block; + + block->egress_blocker_rule_count -= rule->rulei->egress_bind_blocker; + block->ingress_blocker_rule_count -= rule->rulei->ingress_bind_blocker; + block->rule_count--; + mutex_lock(&mlxsw_sp->acl->rules_lock); + list_del(&rule->list); + mutex_unlock(&mlxsw_sp->acl->rules_lock); + if (!ruleset->ht_key.chain_index && + mlxsw_sp_acl_ruleset_is_singular(ruleset)) + mlxsw_sp_acl_ruleset_block_unbind(mlxsw_sp, ruleset, block); + rhashtable_remove_fast(&ruleset->rule_ht, &rule->ht_node, + mlxsw_sp_acl_rule_ht_params); + ops->rule_del(mlxsw_sp, rule->priv); +} + +int mlxsw_sp_acl_rule_action_replace(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_rule *rule, + struct mlxsw_afa_block *afa_block) +{ + struct mlxsw_sp_acl_ruleset *ruleset = rule->ruleset; + const struct mlxsw_sp_acl_profile_ops *ops = ruleset->ht_key.ops; + struct mlxsw_sp_acl_rule_info *rulei; + + rulei = mlxsw_sp_acl_rule_rulei(rule); + rulei->act_block = afa_block; + + return ops->rule_action_replace(mlxsw_sp, rule->priv, rule->rulei); +} + +struct mlxsw_sp_acl_rule * +mlxsw_sp_acl_rule_lookup(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_ruleset *ruleset, + unsigned long cookie) +{ + return rhashtable_lookup_fast(&ruleset->rule_ht, &cookie, + mlxsw_sp_acl_rule_ht_params); +} + +struct mlxsw_sp_acl_rule_info * +mlxsw_sp_acl_rule_rulei(struct mlxsw_sp_acl_rule *rule) +{ + return rule->rulei; +} + +static int mlxsw_sp_acl_rule_activity_update(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_rule *rule) +{ + struct mlxsw_sp_acl_ruleset *ruleset = rule->ruleset; + const struct mlxsw_sp_acl_profile_ops *ops = ruleset->ht_key.ops; + bool active; + int err; + + err = ops->rule_activity_get(mlxsw_sp, rule->priv, &active); + if (err) + return err; + if (active) + rule->last_used = jiffies; + return 0; +} + +static int mlxsw_sp_acl_rules_activity_update(struct mlxsw_sp_acl *acl) +{ + struct mlxsw_sp_acl_rule *rule; + int err; + + mutex_lock(&acl->rules_lock); + list_for_each_entry(rule, &acl->rules, list) { + err = mlxsw_sp_acl_rule_activity_update(acl->mlxsw_sp, + rule); + if (err) + goto err_rule_update; + } + mutex_unlock(&acl->rules_lock); + return 0; + +err_rule_update: + mutex_unlock(&acl->rules_lock); + return err; +} + +static void mlxsw_sp_acl_rule_activity_work_schedule(struct mlxsw_sp_acl *acl) +{ + unsigned long interval = acl->rule_activity_update.interval; + + mlxsw_core_schedule_dw(&acl->rule_activity_update.dw, + msecs_to_jiffies(interval)); +} + +static void mlxsw_sp_acl_rule_activity_update_work(struct work_struct *work) +{ + struct mlxsw_sp_acl *acl = container_of(work, struct mlxsw_sp_acl, + rule_activity_update.dw.work); + int err; + + err = mlxsw_sp_acl_rules_activity_update(acl); + if (err) + dev_err(acl->mlxsw_sp->bus_info->dev, "Could not update acl activity"); + + mlxsw_sp_acl_rule_activity_work_schedule(acl); +} + +int mlxsw_sp_acl_rule_get_stats(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_rule *rule, + u64 *packets, u64 *bytes, u64 *drops, + u64 *last_use, + enum flow_action_hw_stats *used_hw_stats) + +{ + enum mlxsw_sp_policer_type type = MLXSW_SP_POLICER_TYPE_SINGLE_RATE; + struct mlxsw_sp_acl_rule_info *rulei; + u64 current_packets = 0; + u64 current_bytes = 0; + u64 current_drops = 0; + int err; + + rulei = mlxsw_sp_acl_rule_rulei(rule); + if (rulei->counter_valid) { + err = mlxsw_sp_flow_counter_get(mlxsw_sp, rulei->counter_index, + ¤t_packets, + ¤t_bytes); + if (err) + return err; + *used_hw_stats = FLOW_ACTION_HW_STATS_IMMEDIATE; + } + if (rulei->policer_index_valid) { + err = mlxsw_sp_policer_drops_counter_get(mlxsw_sp, type, + rulei->policer_index, + ¤t_drops); + if (err) + return err; + } + *packets = current_packets - rule->last_packets; + *bytes = current_bytes - rule->last_bytes; + *drops = current_drops - rule->last_drops; + *last_use = rule->last_used; + + rule->last_bytes = current_bytes; + rule->last_packets = current_packets; + rule->last_drops = current_drops; + + return 0; +} + +int mlxsw_sp_acl_init(struct mlxsw_sp *mlxsw_sp) +{ + struct mlxsw_sp_fid *fid; + struct mlxsw_sp_acl *acl; + size_t alloc_size; + int err; + + alloc_size = sizeof(*acl) + mlxsw_sp_acl_tcam_priv_size(mlxsw_sp); + acl = kzalloc(alloc_size, GFP_KERNEL); + if (!acl) + return -ENOMEM; + mlxsw_sp->acl = acl; + acl->mlxsw_sp = mlxsw_sp; + acl->afk = mlxsw_afk_create(MLXSW_CORE_RES_GET(mlxsw_sp->core, + ACL_FLEX_KEYS), + mlxsw_sp->afk_ops); + if (!acl->afk) { + err = -ENOMEM; + goto err_afk_create; + } + + err = rhashtable_init(&acl->ruleset_ht, + &mlxsw_sp_acl_ruleset_ht_params); + if (err) + goto err_rhashtable_init; + + fid = mlxsw_sp_fid_dummy_get(mlxsw_sp); + if (IS_ERR(fid)) { + err = PTR_ERR(fid); + goto err_fid_get; + } + acl->dummy_fid = fid; + + INIT_LIST_HEAD(&acl->rules); + mutex_init(&acl->rules_lock); + err = mlxsw_sp_acl_tcam_init(mlxsw_sp, &acl->tcam); + if (err) + goto err_acl_ops_init; + + /* Create the delayed work for the rule activity_update */ + INIT_DELAYED_WORK(&acl->rule_activity_update.dw, + mlxsw_sp_acl_rule_activity_update_work); + acl->rule_activity_update.interval = MLXSW_SP_ACL_RULE_ACTIVITY_UPDATE_PERIOD_MS; + mlxsw_core_schedule_dw(&acl->rule_activity_update.dw, 0); + return 0; + +err_acl_ops_init: + mutex_destroy(&acl->rules_lock); + mlxsw_sp_fid_put(fid); +err_fid_get: + rhashtable_destroy(&acl->ruleset_ht); +err_rhashtable_init: + mlxsw_afk_destroy(acl->afk); +err_afk_create: + kfree(acl); + return err; +} + +void mlxsw_sp_acl_fini(struct mlxsw_sp *mlxsw_sp) +{ + struct mlxsw_sp_acl *acl = mlxsw_sp->acl; + + cancel_delayed_work_sync(&mlxsw_sp->acl->rule_activity_update.dw); + mlxsw_sp_acl_tcam_fini(mlxsw_sp, &acl->tcam); + mutex_destroy(&acl->rules_lock); + WARN_ON(!list_empty(&acl->rules)); + mlxsw_sp_fid_put(acl->dummy_fid); + rhashtable_destroy(&acl->ruleset_ht); + mlxsw_afk_destroy(acl->afk); + kfree(acl); +} + +u32 mlxsw_sp_acl_region_rehash_intrvl_get(struct mlxsw_sp *mlxsw_sp) +{ + struct mlxsw_sp_acl *acl = mlxsw_sp->acl; + + return mlxsw_sp_acl_tcam_vregion_rehash_intrvl_get(mlxsw_sp, + &acl->tcam); +} + +int mlxsw_sp_acl_region_rehash_intrvl_set(struct mlxsw_sp *mlxsw_sp, u32 val) +{ + struct mlxsw_sp_acl *acl = mlxsw_sp->acl; + + return mlxsw_sp_acl_tcam_vregion_rehash_intrvl_set(mlxsw_sp, + &acl->tcam, val); +} + +struct mlxsw_sp_acl_rulei_ops mlxsw_sp1_acl_rulei_ops = { + .act_mangle_field = mlxsw_sp1_acl_rulei_act_mangle_field, +}; + +struct mlxsw_sp_acl_rulei_ops mlxsw_sp2_acl_rulei_ops = { + .act_mangle_field = mlxsw_sp2_acl_rulei_act_mangle_field, +}; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_atcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_atcam.c new file mode 100644 index 000000000..4b713832f --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_atcam.c @@ -0,0 +1,646 @@ +// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 +/* Copyright (c) 2018 Mellanox Technologies. All rights reserved */ + +#include <linux/kernel.h> +#include <linux/err.h> +#include <linux/errno.h> +#include <linux/gfp.h> +#include <linux/refcount.h> +#include <linux/rhashtable.h> +#define CREATE_TRACE_POINTS +#include <trace/events/mlxsw.h> + +#include "reg.h" +#include "core.h" +#include "spectrum.h" +#include "spectrum_acl_tcam.h" +#include "core_acl_flex_keys.h" + +#define MLXSW_SP_ACL_ATCAM_LKEY_ID_BLOCK_CLEAR_START 0 +#define MLXSW_SP_ACL_ATCAM_LKEY_ID_BLOCK_CLEAR_END 5 + +struct mlxsw_sp_acl_atcam_lkey_id_ht_key { + char enc_key[MLXSW_REG_PTCEX_FLEX_KEY_BLOCKS_LEN]; /* MSB blocks */ + u8 erp_id; +}; + +struct mlxsw_sp_acl_atcam_lkey_id { + struct rhash_head ht_node; + struct mlxsw_sp_acl_atcam_lkey_id_ht_key ht_key; + refcount_t refcnt; + u32 id; +}; + +struct mlxsw_sp_acl_atcam_region_ops { + int (*init)(struct mlxsw_sp_acl_atcam_region *aregion); + void (*fini)(struct mlxsw_sp_acl_atcam_region *aregion); + struct mlxsw_sp_acl_atcam_lkey_id * + (*lkey_id_get)(struct mlxsw_sp_acl_atcam_region *aregion, + char *enc_key, u8 erp_id); + void (*lkey_id_put)(struct mlxsw_sp_acl_atcam_region *aregion, + struct mlxsw_sp_acl_atcam_lkey_id *lkey_id); +}; + +struct mlxsw_sp_acl_atcam_region_generic { + struct mlxsw_sp_acl_atcam_lkey_id dummy_lkey_id; +}; + +struct mlxsw_sp_acl_atcam_region_12kb { + struct rhashtable lkey_ht; + unsigned int max_lkey_id; + unsigned long *used_lkey_id; +}; + +static const struct rhashtable_params mlxsw_sp_acl_atcam_lkey_id_ht_params = { + .key_len = sizeof(struct mlxsw_sp_acl_atcam_lkey_id_ht_key), + .key_offset = offsetof(struct mlxsw_sp_acl_atcam_lkey_id, ht_key), + .head_offset = offsetof(struct mlxsw_sp_acl_atcam_lkey_id, ht_node), +}; + +static const struct rhashtable_params mlxsw_sp_acl_atcam_entries_ht_params = { + .key_len = sizeof(struct mlxsw_sp_acl_atcam_entry_ht_key), + .key_offset = offsetof(struct mlxsw_sp_acl_atcam_entry, ht_key), + .head_offset = offsetof(struct mlxsw_sp_acl_atcam_entry, ht_node), +}; + +static bool +mlxsw_sp_acl_atcam_is_centry(const struct mlxsw_sp_acl_atcam_entry *aentry) +{ + return mlxsw_sp_acl_erp_mask_is_ctcam(aentry->erp_mask); +} + +static int +mlxsw_sp_acl_atcam_region_generic_init(struct mlxsw_sp_acl_atcam_region *aregion) +{ + struct mlxsw_sp_acl_atcam_region_generic *region_generic; + + region_generic = kzalloc(sizeof(*region_generic), GFP_KERNEL); + if (!region_generic) + return -ENOMEM; + + refcount_set(®ion_generic->dummy_lkey_id.refcnt, 1); + aregion->priv = region_generic; + + return 0; +} + +static void +mlxsw_sp_acl_atcam_region_generic_fini(struct mlxsw_sp_acl_atcam_region *aregion) +{ + kfree(aregion->priv); +} + +static struct mlxsw_sp_acl_atcam_lkey_id * +mlxsw_sp_acl_atcam_generic_lkey_id_get(struct mlxsw_sp_acl_atcam_region *aregion, + char *enc_key, u8 erp_id) +{ + struct mlxsw_sp_acl_atcam_region_generic *region_generic; + + region_generic = aregion->priv; + return ®ion_generic->dummy_lkey_id; +} + +static void +mlxsw_sp_acl_atcam_generic_lkey_id_put(struct mlxsw_sp_acl_atcam_region *aregion, + struct mlxsw_sp_acl_atcam_lkey_id *lkey_id) +{ +} + +static const struct mlxsw_sp_acl_atcam_region_ops +mlxsw_sp_acl_atcam_region_generic_ops = { + .init = mlxsw_sp_acl_atcam_region_generic_init, + .fini = mlxsw_sp_acl_atcam_region_generic_fini, + .lkey_id_get = mlxsw_sp_acl_atcam_generic_lkey_id_get, + .lkey_id_put = mlxsw_sp_acl_atcam_generic_lkey_id_put, +}; + +static int +mlxsw_sp_acl_atcam_region_12kb_init(struct mlxsw_sp_acl_atcam_region *aregion) +{ + struct mlxsw_sp *mlxsw_sp = aregion->region->mlxsw_sp; + struct mlxsw_sp_acl_atcam_region_12kb *region_12kb; + u64 max_lkey_id; + int err; + + if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, ACL_MAX_LARGE_KEY_ID)) + return -EIO; + + max_lkey_id = MLXSW_CORE_RES_GET(mlxsw_sp->core, ACL_MAX_LARGE_KEY_ID); + region_12kb = kzalloc(sizeof(*region_12kb), GFP_KERNEL); + if (!region_12kb) + return -ENOMEM; + + region_12kb->used_lkey_id = bitmap_zalloc(max_lkey_id, GFP_KERNEL); + if (!region_12kb->used_lkey_id) { + err = -ENOMEM; + goto err_used_lkey_id_alloc; + } + + err = rhashtable_init(®ion_12kb->lkey_ht, + &mlxsw_sp_acl_atcam_lkey_id_ht_params); + if (err) + goto err_rhashtable_init; + + region_12kb->max_lkey_id = max_lkey_id; + aregion->priv = region_12kb; + + return 0; + +err_rhashtable_init: + bitmap_free(region_12kb->used_lkey_id); +err_used_lkey_id_alloc: + kfree(region_12kb); + return err; +} + +static void +mlxsw_sp_acl_atcam_region_12kb_fini(struct mlxsw_sp_acl_atcam_region *aregion) +{ + struct mlxsw_sp_acl_atcam_region_12kb *region_12kb = aregion->priv; + + rhashtable_destroy(®ion_12kb->lkey_ht); + bitmap_free(region_12kb->used_lkey_id); + kfree(region_12kb); +} + +static struct mlxsw_sp_acl_atcam_lkey_id * +mlxsw_sp_acl_atcam_lkey_id_create(struct mlxsw_sp_acl_atcam_region *aregion, + struct mlxsw_sp_acl_atcam_lkey_id_ht_key *ht_key) +{ + struct mlxsw_sp_acl_atcam_region_12kb *region_12kb = aregion->priv; + struct mlxsw_sp_acl_atcam_lkey_id *lkey_id; + u32 id; + int err; + + id = find_first_zero_bit(region_12kb->used_lkey_id, + region_12kb->max_lkey_id); + if (id < region_12kb->max_lkey_id) + __set_bit(id, region_12kb->used_lkey_id); + else + return ERR_PTR(-ENOBUFS); + + lkey_id = kzalloc(sizeof(*lkey_id), GFP_KERNEL); + if (!lkey_id) { + err = -ENOMEM; + goto err_lkey_id_alloc; + } + + lkey_id->id = id; + memcpy(&lkey_id->ht_key, ht_key, sizeof(*ht_key)); + refcount_set(&lkey_id->refcnt, 1); + + err = rhashtable_insert_fast(®ion_12kb->lkey_ht, + &lkey_id->ht_node, + mlxsw_sp_acl_atcam_lkey_id_ht_params); + if (err) + goto err_rhashtable_insert; + + return lkey_id; + +err_rhashtable_insert: + kfree(lkey_id); +err_lkey_id_alloc: + __clear_bit(id, region_12kb->used_lkey_id); + return ERR_PTR(err); +} + +static void +mlxsw_sp_acl_atcam_lkey_id_destroy(struct mlxsw_sp_acl_atcam_region *aregion, + struct mlxsw_sp_acl_atcam_lkey_id *lkey_id) +{ + struct mlxsw_sp_acl_atcam_region_12kb *region_12kb = aregion->priv; + u32 id = lkey_id->id; + + rhashtable_remove_fast(®ion_12kb->lkey_ht, &lkey_id->ht_node, + mlxsw_sp_acl_atcam_lkey_id_ht_params); + kfree(lkey_id); + __clear_bit(id, region_12kb->used_lkey_id); +} + +static struct mlxsw_sp_acl_atcam_lkey_id * +mlxsw_sp_acl_atcam_12kb_lkey_id_get(struct mlxsw_sp_acl_atcam_region *aregion, + char *enc_key, u8 erp_id) +{ + struct mlxsw_sp_acl_atcam_region_12kb *region_12kb = aregion->priv; + struct mlxsw_sp_acl_tcam_region *region = aregion->region; + struct mlxsw_sp_acl_atcam_lkey_id_ht_key ht_key = {{ 0 } }; + struct mlxsw_sp *mlxsw_sp = region->mlxsw_sp; + struct mlxsw_afk *afk = mlxsw_sp_acl_afk(mlxsw_sp->acl); + struct mlxsw_sp_acl_atcam_lkey_id *lkey_id; + + memcpy(ht_key.enc_key, enc_key, sizeof(ht_key.enc_key)); + mlxsw_afk_clear(afk, ht_key.enc_key, + MLXSW_SP_ACL_ATCAM_LKEY_ID_BLOCK_CLEAR_START, + MLXSW_SP_ACL_ATCAM_LKEY_ID_BLOCK_CLEAR_END); + ht_key.erp_id = erp_id; + lkey_id = rhashtable_lookup_fast(®ion_12kb->lkey_ht, &ht_key, + mlxsw_sp_acl_atcam_lkey_id_ht_params); + if (lkey_id) { + refcount_inc(&lkey_id->refcnt); + return lkey_id; + } + + return mlxsw_sp_acl_atcam_lkey_id_create(aregion, &ht_key); +} + +static void +mlxsw_sp_acl_atcam_12kb_lkey_id_put(struct mlxsw_sp_acl_atcam_region *aregion, + struct mlxsw_sp_acl_atcam_lkey_id *lkey_id) +{ + if (refcount_dec_and_test(&lkey_id->refcnt)) + mlxsw_sp_acl_atcam_lkey_id_destroy(aregion, lkey_id); +} + +static const struct mlxsw_sp_acl_atcam_region_ops +mlxsw_sp_acl_atcam_region_12kb_ops = { + .init = mlxsw_sp_acl_atcam_region_12kb_init, + .fini = mlxsw_sp_acl_atcam_region_12kb_fini, + .lkey_id_get = mlxsw_sp_acl_atcam_12kb_lkey_id_get, + .lkey_id_put = mlxsw_sp_acl_atcam_12kb_lkey_id_put, +}; + +static const struct mlxsw_sp_acl_atcam_region_ops * +mlxsw_sp_acl_atcam_region_ops_arr[] = { + [MLXSW_SP_ACL_ATCAM_REGION_TYPE_2KB] = + &mlxsw_sp_acl_atcam_region_generic_ops, + [MLXSW_SP_ACL_ATCAM_REGION_TYPE_4KB] = + &mlxsw_sp_acl_atcam_region_generic_ops, + [MLXSW_SP_ACL_ATCAM_REGION_TYPE_8KB] = + &mlxsw_sp_acl_atcam_region_generic_ops, + [MLXSW_SP_ACL_ATCAM_REGION_TYPE_12KB] = + &mlxsw_sp_acl_atcam_region_12kb_ops, +}; + +int mlxsw_sp_acl_atcam_region_associate(struct mlxsw_sp *mlxsw_sp, + u16 region_id) +{ + char perar_pl[MLXSW_REG_PERAR_LEN]; + /* For now, just assume that every region has 12 key blocks */ + u16 hw_region = region_id * 3; + u64 max_regions; + + max_regions = MLXSW_CORE_RES_GET(mlxsw_sp->core, ACL_MAX_REGIONS); + if (hw_region >= max_regions) + return -ENOBUFS; + + mlxsw_reg_perar_pack(perar_pl, region_id, hw_region); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(perar), perar_pl); +} + +static void +mlxsw_sp_acl_atcam_region_type_init(struct mlxsw_sp_acl_atcam_region *aregion) +{ + struct mlxsw_sp_acl_tcam_region *region = aregion->region; + enum mlxsw_sp_acl_atcam_region_type region_type; + unsigned int blocks_count; + + /* We already know the blocks count can not exceed the maximum + * blocks count. + */ + blocks_count = mlxsw_afk_key_info_blocks_count_get(region->key_info); + if (blocks_count <= 2) + region_type = MLXSW_SP_ACL_ATCAM_REGION_TYPE_2KB; + else if (blocks_count <= 4) + region_type = MLXSW_SP_ACL_ATCAM_REGION_TYPE_4KB; + else if (blocks_count <= 8) + region_type = MLXSW_SP_ACL_ATCAM_REGION_TYPE_8KB; + else + region_type = MLXSW_SP_ACL_ATCAM_REGION_TYPE_12KB; + + aregion->type = region_type; + aregion->ops = mlxsw_sp_acl_atcam_region_ops_arr[region_type]; +} + +int +mlxsw_sp_acl_atcam_region_init(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_atcam *atcam, + struct mlxsw_sp_acl_atcam_region *aregion, + struct mlxsw_sp_acl_tcam_region *region, + void *hints_priv, + const struct mlxsw_sp_acl_ctcam_region_ops *ops) +{ + int err; + + aregion->region = region; + aregion->atcam = atcam; + mlxsw_sp_acl_atcam_region_type_init(aregion); + INIT_LIST_HEAD(&aregion->entries_list); + + err = rhashtable_init(&aregion->entries_ht, + &mlxsw_sp_acl_atcam_entries_ht_params); + if (err) + return err; + err = aregion->ops->init(aregion); + if (err) + goto err_ops_init; + err = mlxsw_sp_acl_erp_region_init(aregion, hints_priv); + if (err) + goto err_erp_region_init; + err = mlxsw_sp_acl_ctcam_region_init(mlxsw_sp, &aregion->cregion, + region, ops); + if (err) + goto err_ctcam_region_init; + + return 0; + +err_ctcam_region_init: + mlxsw_sp_acl_erp_region_fini(aregion); +err_erp_region_init: + aregion->ops->fini(aregion); +err_ops_init: + rhashtable_destroy(&aregion->entries_ht); + return err; +} + +void mlxsw_sp_acl_atcam_region_fini(struct mlxsw_sp_acl_atcam_region *aregion) +{ + mlxsw_sp_acl_ctcam_region_fini(&aregion->cregion); + mlxsw_sp_acl_erp_region_fini(aregion); + aregion->ops->fini(aregion); + rhashtable_destroy(&aregion->entries_ht); + WARN_ON(!list_empty(&aregion->entries_list)); +} + +void mlxsw_sp_acl_atcam_chunk_init(struct mlxsw_sp_acl_atcam_region *aregion, + struct mlxsw_sp_acl_atcam_chunk *achunk, + unsigned int priority) +{ + mlxsw_sp_acl_ctcam_chunk_init(&aregion->cregion, &achunk->cchunk, + priority); +} + +void mlxsw_sp_acl_atcam_chunk_fini(struct mlxsw_sp_acl_atcam_chunk *achunk) +{ + mlxsw_sp_acl_ctcam_chunk_fini(&achunk->cchunk); +} + +static int +mlxsw_sp_acl_atcam_region_entry_insert(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_atcam_region *aregion, + struct mlxsw_sp_acl_atcam_entry *aentry, + struct mlxsw_sp_acl_rule_info *rulei) +{ + struct mlxsw_sp_acl_tcam_region *region = aregion->region; + u8 erp_id = mlxsw_sp_acl_erp_mask_erp_id(aentry->erp_mask); + struct mlxsw_sp_acl_atcam_lkey_id *lkey_id; + char ptce3_pl[MLXSW_REG_PTCE3_LEN]; + u32 kvdl_index, priority; + int err; + + err = mlxsw_sp_acl_tcam_priority_get(mlxsw_sp, rulei, &priority, true); + if (err) + return err; + + lkey_id = aregion->ops->lkey_id_get(aregion, aentry->enc_key, erp_id); + if (IS_ERR(lkey_id)) + return PTR_ERR(lkey_id); + aentry->lkey_id = lkey_id; + + kvdl_index = mlxsw_afa_block_first_kvdl_index(rulei->act_block); + mlxsw_reg_ptce3_pack(ptce3_pl, true, MLXSW_REG_PTCE3_OP_WRITE_WRITE, + priority, region->tcam_region_info, + aentry->enc_key, erp_id, + aentry->delta_info.start, + aentry->delta_info.mask, + aentry->delta_info.value, + refcount_read(&lkey_id->refcnt) != 1, lkey_id->id, + kvdl_index); + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ptce3), ptce3_pl); + if (err) + goto err_ptce3_write; + + return 0; + +err_ptce3_write: + aregion->ops->lkey_id_put(aregion, lkey_id); + return err; +} + +static void +mlxsw_sp_acl_atcam_region_entry_remove(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_atcam_region *aregion, + struct mlxsw_sp_acl_atcam_entry *aentry) +{ + struct mlxsw_sp_acl_atcam_lkey_id *lkey_id = aentry->lkey_id; + struct mlxsw_sp_acl_tcam_region *region = aregion->region; + u8 erp_id = mlxsw_sp_acl_erp_mask_erp_id(aentry->erp_mask); + char ptce3_pl[MLXSW_REG_PTCE3_LEN]; + + mlxsw_reg_ptce3_pack(ptce3_pl, false, MLXSW_REG_PTCE3_OP_WRITE_WRITE, 0, + region->tcam_region_info, + aentry->enc_key, erp_id, + aentry->delta_info.start, + aentry->delta_info.mask, + aentry->delta_info.value, + refcount_read(&lkey_id->refcnt) != 1, + lkey_id->id, 0); + mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ptce3), ptce3_pl); + aregion->ops->lkey_id_put(aregion, lkey_id); +} + +static int +mlxsw_sp_acl_atcam_region_entry_action_replace(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_atcam_region *aregion, + struct mlxsw_sp_acl_atcam_entry *aentry, + struct mlxsw_sp_acl_rule_info *rulei) +{ + struct mlxsw_sp_acl_atcam_lkey_id *lkey_id = aentry->lkey_id; + u8 erp_id = mlxsw_sp_acl_erp_mask_erp_id(aentry->erp_mask); + struct mlxsw_sp_acl_tcam_region *region = aregion->region; + char ptce3_pl[MLXSW_REG_PTCE3_LEN]; + u32 kvdl_index, priority; + int err; + + err = mlxsw_sp_acl_tcam_priority_get(mlxsw_sp, rulei, &priority, true); + if (err) + return err; + kvdl_index = mlxsw_afa_block_first_kvdl_index(rulei->act_block); + mlxsw_reg_ptce3_pack(ptce3_pl, true, MLXSW_REG_PTCE3_OP_WRITE_UPDATE, + priority, region->tcam_region_info, + aentry->enc_key, erp_id, + aentry->delta_info.start, + aentry->delta_info.mask, + aentry->delta_info.value, + refcount_read(&lkey_id->refcnt) != 1, lkey_id->id, + kvdl_index); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ptce3), ptce3_pl); +} + +static int +__mlxsw_sp_acl_atcam_entry_add(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_atcam_region *aregion, + struct mlxsw_sp_acl_atcam_entry *aentry, + struct mlxsw_sp_acl_rule_info *rulei) +{ + struct mlxsw_sp_acl_tcam_region *region = aregion->region; + char mask[MLXSW_REG_PTCEX_FLEX_KEY_BLOCKS_LEN] = { 0 }; + struct mlxsw_afk *afk = mlxsw_sp_acl_afk(mlxsw_sp->acl); + const struct mlxsw_sp_acl_erp_delta *delta; + struct mlxsw_sp_acl_erp_mask *erp_mask; + int err; + + mlxsw_afk_encode(afk, region->key_info, &rulei->values, + aentry->ht_key.full_enc_key, mask); + + erp_mask = mlxsw_sp_acl_erp_mask_get(aregion, mask, false); + if (IS_ERR(erp_mask)) + return PTR_ERR(erp_mask); + aentry->erp_mask = erp_mask; + aentry->ht_key.erp_id = mlxsw_sp_acl_erp_mask_erp_id(erp_mask); + memcpy(aentry->enc_key, aentry->ht_key.full_enc_key, + sizeof(aentry->enc_key)); + + /* Compute all needed delta information and clear the delta bits + * from the encrypted key. + */ + delta = mlxsw_sp_acl_erp_delta(aentry->erp_mask); + aentry->delta_info.start = mlxsw_sp_acl_erp_delta_start(delta); + aentry->delta_info.mask = mlxsw_sp_acl_erp_delta_mask(delta); + aentry->delta_info.value = + mlxsw_sp_acl_erp_delta_value(delta, + aentry->ht_key.full_enc_key); + mlxsw_sp_acl_erp_delta_clear(delta, aentry->enc_key); + + /* Add rule to the list of A-TCAM rules, assuming this + * rule is intended to A-TCAM. In case this rule does + * not fit into A-TCAM it will be removed from the list. + */ + list_add(&aentry->list, &aregion->entries_list); + + /* We can't insert identical rules into the A-TCAM, so fail and + * let the rule spill into C-TCAM + */ + err = rhashtable_lookup_insert_fast(&aregion->entries_ht, + &aentry->ht_node, + mlxsw_sp_acl_atcam_entries_ht_params); + if (err) + goto err_rhashtable_insert; + + /* Bloom filter must be updated here, before inserting the rule into + * the A-TCAM. + */ + err = mlxsw_sp_acl_erp_bf_insert(mlxsw_sp, aregion, erp_mask, aentry); + if (err) + goto err_bf_insert; + + err = mlxsw_sp_acl_atcam_region_entry_insert(mlxsw_sp, aregion, aentry, + rulei); + if (err) + goto err_rule_insert; + + return 0; + +err_rule_insert: + mlxsw_sp_acl_erp_bf_remove(mlxsw_sp, aregion, erp_mask, aentry); +err_bf_insert: + rhashtable_remove_fast(&aregion->entries_ht, &aentry->ht_node, + mlxsw_sp_acl_atcam_entries_ht_params); +err_rhashtable_insert: + list_del(&aentry->list); + mlxsw_sp_acl_erp_mask_put(aregion, erp_mask); + return err; +} + +static void +__mlxsw_sp_acl_atcam_entry_del(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_atcam_region *aregion, + struct mlxsw_sp_acl_atcam_entry *aentry) +{ + mlxsw_sp_acl_atcam_region_entry_remove(mlxsw_sp, aregion, aentry); + mlxsw_sp_acl_erp_bf_remove(mlxsw_sp, aregion, aentry->erp_mask, aentry); + rhashtable_remove_fast(&aregion->entries_ht, &aentry->ht_node, + mlxsw_sp_acl_atcam_entries_ht_params); + list_del(&aentry->list); + mlxsw_sp_acl_erp_mask_put(aregion, aentry->erp_mask); +} + +static int +__mlxsw_sp_acl_atcam_entry_action_replace(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_atcam_region *aregion, + struct mlxsw_sp_acl_atcam_entry *aentry, + struct mlxsw_sp_acl_rule_info *rulei) +{ + return mlxsw_sp_acl_atcam_region_entry_action_replace(mlxsw_sp, aregion, + aentry, rulei); +} + +int mlxsw_sp_acl_atcam_entry_add(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_atcam_region *aregion, + struct mlxsw_sp_acl_atcam_chunk *achunk, + struct mlxsw_sp_acl_atcam_entry *aentry, + struct mlxsw_sp_acl_rule_info *rulei) +{ + int err; + + err = __mlxsw_sp_acl_atcam_entry_add(mlxsw_sp, aregion, aentry, rulei); + if (!err) + return 0; + + /* It is possible we failed to add the rule to the A-TCAM due to + * exceeded number of masks. Try to spill into C-TCAM. + */ + trace_mlxsw_sp_acl_atcam_entry_add_ctcam_spill(mlxsw_sp, aregion); + err = mlxsw_sp_acl_ctcam_entry_add(mlxsw_sp, &aregion->cregion, + &achunk->cchunk, &aentry->centry, + rulei, true); + if (!err) + return 0; + + return err; +} + +void mlxsw_sp_acl_atcam_entry_del(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_atcam_region *aregion, + struct mlxsw_sp_acl_atcam_chunk *achunk, + struct mlxsw_sp_acl_atcam_entry *aentry) +{ + if (mlxsw_sp_acl_atcam_is_centry(aentry)) + mlxsw_sp_acl_ctcam_entry_del(mlxsw_sp, &aregion->cregion, + &achunk->cchunk, &aentry->centry); + else + __mlxsw_sp_acl_atcam_entry_del(mlxsw_sp, aregion, aentry); +} + +int +mlxsw_sp_acl_atcam_entry_action_replace(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_atcam_region *aregion, + struct mlxsw_sp_acl_atcam_entry *aentry, + struct mlxsw_sp_acl_rule_info *rulei) +{ + int err; + + if (mlxsw_sp_acl_atcam_is_centry(aentry)) + err = mlxsw_sp_acl_ctcam_entry_action_replace(mlxsw_sp, + &aregion->cregion, + &aentry->centry, + rulei); + else + err = __mlxsw_sp_acl_atcam_entry_action_replace(mlxsw_sp, + aregion, aentry, + rulei); + + return err; +} + +int mlxsw_sp_acl_atcam_init(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_atcam *atcam) +{ + return mlxsw_sp_acl_erps_init(mlxsw_sp, atcam); +} + +void mlxsw_sp_acl_atcam_fini(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_atcam *atcam) +{ + mlxsw_sp_acl_erps_fini(mlxsw_sp, atcam); +} + +void * +mlxsw_sp_acl_atcam_rehash_hints_get(struct mlxsw_sp_acl_atcam_region *aregion) +{ + return mlxsw_sp_acl_erp_rehash_hints_get(aregion); +} + +void mlxsw_sp_acl_atcam_rehash_hints_put(void *hints_priv) +{ + mlxsw_sp_acl_erp_rehash_hints_put(hints_priv); +} diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_bloom_filter.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_bloom_filter.c new file mode 100644 index 000000000..95f63fcf4 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_bloom_filter.c @@ -0,0 +1,522 @@ +// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 +/* Copyright (c) 2018 Mellanox Technologies. All rights reserved */ + +#include <linux/errno.h> +#include <linux/gfp.h> +#include <linux/kernel.h> +#include <linux/refcount.h> +#include <linux/mutex.h> + +#include "spectrum.h" +#include "spectrum_acl_tcam.h" + +struct mlxsw_sp_acl_bf { + struct mutex lock; /* Protects Bloom Filter updates. */ + unsigned int bank_size; + refcount_t refcnt[]; +}; + +/* Bloom filter uses a crc-16 hash over chunks of data which contain 4 key + * blocks, eRP ID and region ID. In Spectrum-2 and above, region key is combined + * of up to 12 key blocks, so there can be up to 3 chunks in the Bloom filter + * key, depending on the actual number of key blocks used in the region. + * The layout of the Bloom filter key is as follows: + * + * +-------------------------+------------------------+------------------------+ + * | Chunk 2 Key blocks 11-8 | Chunk 1 Key blocks 7-4 | Chunk 0 Key blocks 3-0 | + * +-------------------------+------------------------+------------------------+ + */ +#define MLXSW_BLOOM_KEY_CHUNKS 3 + +/* Spectrum-2 and Spectrum-3 chunks */ +#define MLXSW_SP2_BLOOM_KEY_LEN 69 + +/* Each chunk size is 23 bytes. 18 bytes of it contain 4 key blocks, each is + * 36 bits, 2 bytes which hold eRP ID and region ID, and 3 bytes of zero + * padding. + * The layout of each chunk is as follows: + * + * +---------+----------------------+-----------------------------------+ + * | 3 bytes | 2 bytes | 18 bytes | + * +---------+-----------+----------+-----------------------------------+ + * | 183:158 | 157:148 | 147:144 | 143:0 | + * +---------+-----------+----------+-----------------------------------+ + * | 0 | region ID | eRP ID | 4 Key blocks (18 Bytes) | + * +---------+-----------+----------+-----------------------------------+ + */ +#define MLXSW_SP2_BLOOM_CHUNK_PAD_BYTES 3 +#define MLXSW_SP2_BLOOM_CHUNK_KEY_BYTES 18 +#define MLXSW_SP2_BLOOM_KEY_CHUNK_BYTES 23 + +/* The offset of the key block within a chunk is 5 bytes as it comes after + * 3 bytes of zero padding and 16 bits of region ID and eRP ID. + */ +#define MLXSW_SP2_BLOOM_CHUNK_KEY_OFFSET 5 + +/* This table is just the CRC of each possible byte which is used for + * Spectrum-{2-3}. It is computed, Msbit first, for the Bloom filter + * polynomial which is 0x8529 (1 + x^3 + x^5 + x^8 + x^10 + x^15 and + * the implicit x^16). + */ +static const u16 mlxsw_sp2_acl_bf_crc16_tab[256] = { +0x0000, 0x8529, 0x8f7b, 0x0a52, 0x9bdf, 0x1ef6, 0x14a4, 0x918d, +0xb297, 0x37be, 0x3dec, 0xb8c5, 0x2948, 0xac61, 0xa633, 0x231a, +0xe007, 0x652e, 0x6f7c, 0xea55, 0x7bd8, 0xfef1, 0xf4a3, 0x718a, +0x5290, 0xd7b9, 0xddeb, 0x58c2, 0xc94f, 0x4c66, 0x4634, 0xc31d, +0x4527, 0xc00e, 0xca5c, 0x4f75, 0xdef8, 0x5bd1, 0x5183, 0xd4aa, +0xf7b0, 0x7299, 0x78cb, 0xfde2, 0x6c6f, 0xe946, 0xe314, 0x663d, +0xa520, 0x2009, 0x2a5b, 0xaf72, 0x3eff, 0xbbd6, 0xb184, 0x34ad, +0x17b7, 0x929e, 0x98cc, 0x1de5, 0x8c68, 0x0941, 0x0313, 0x863a, +0x8a4e, 0x0f67, 0x0535, 0x801c, 0x1191, 0x94b8, 0x9eea, 0x1bc3, +0x38d9, 0xbdf0, 0xb7a2, 0x328b, 0xa306, 0x262f, 0x2c7d, 0xa954, +0x6a49, 0xef60, 0xe532, 0x601b, 0xf196, 0x74bf, 0x7eed, 0xfbc4, +0xd8de, 0x5df7, 0x57a5, 0xd28c, 0x4301, 0xc628, 0xcc7a, 0x4953, +0xcf69, 0x4a40, 0x4012, 0xc53b, 0x54b6, 0xd19f, 0xdbcd, 0x5ee4, +0x7dfe, 0xf8d7, 0xf285, 0x77ac, 0xe621, 0x6308, 0x695a, 0xec73, +0x2f6e, 0xaa47, 0xa015, 0x253c, 0xb4b1, 0x3198, 0x3bca, 0xbee3, +0x9df9, 0x18d0, 0x1282, 0x97ab, 0x0626, 0x830f, 0x895d, 0x0c74, +0x91b5, 0x149c, 0x1ece, 0x9be7, 0x0a6a, 0x8f43, 0x8511, 0x0038, +0x2322, 0xa60b, 0xac59, 0x2970, 0xb8fd, 0x3dd4, 0x3786, 0xb2af, +0x71b2, 0xf49b, 0xfec9, 0x7be0, 0xea6d, 0x6f44, 0x6516, 0xe03f, +0xc325, 0x460c, 0x4c5e, 0xc977, 0x58fa, 0xddd3, 0xd781, 0x52a8, +0xd492, 0x51bb, 0x5be9, 0xdec0, 0x4f4d, 0xca64, 0xc036, 0x451f, +0x6605, 0xe32c, 0xe97e, 0x6c57, 0xfdda, 0x78f3, 0x72a1, 0xf788, +0x3495, 0xb1bc, 0xbbee, 0x3ec7, 0xaf4a, 0x2a63, 0x2031, 0xa518, +0x8602, 0x032b, 0x0979, 0x8c50, 0x1ddd, 0x98f4, 0x92a6, 0x178f, +0x1bfb, 0x9ed2, 0x9480, 0x11a9, 0x8024, 0x050d, 0x0f5f, 0x8a76, +0xa96c, 0x2c45, 0x2617, 0xa33e, 0x32b3, 0xb79a, 0xbdc8, 0x38e1, +0xfbfc, 0x7ed5, 0x7487, 0xf1ae, 0x6023, 0xe50a, 0xef58, 0x6a71, +0x496b, 0xcc42, 0xc610, 0x4339, 0xd2b4, 0x579d, 0x5dcf, 0xd8e6, +0x5edc, 0xdbf5, 0xd1a7, 0x548e, 0xc503, 0x402a, 0x4a78, 0xcf51, +0xec4b, 0x6962, 0x6330, 0xe619, 0x7794, 0xf2bd, 0xf8ef, 0x7dc6, +0xbedb, 0x3bf2, 0x31a0, 0xb489, 0x2504, 0xa02d, 0xaa7f, 0x2f56, +0x0c4c, 0x8965, 0x8337, 0x061e, 0x9793, 0x12ba, 0x18e8, 0x9dc1, +}; + +/* Spectrum-4 chunks */ +#define MLXSW_SP4_BLOOM_KEY_LEN 60 + +/* In Spectrum-4, there is no padding. Each chunk size is 20 bytes. + * 18 bytes of it contain 4 key blocks, each is 36 bits, and 2 bytes which hold + * eRP ID and region ID. + * The layout of each chunk is as follows: + * + * +----------------------+-----------------------------------+ + * | 2 bytes | 18 bytes | + * +-----------+----------+-----------------------------------+ + * | 157:148 | 147:144 | 143:0 | + * +---------+-----------+----------+-------------------------+ + * | region ID | eRP ID | 4 Key blocks (18 Bytes) | + * +-----------+----------+-----------------------------------+ + */ + +#define MLXSW_SP4_BLOOM_CHUNK_PAD_BYTES 0 +#define MLXSW_SP4_BLOOM_CHUNK_KEY_BYTES 18 +#define MLXSW_SP4_BLOOM_KEY_CHUNK_BYTES 20 + +/* The offset of the key block within a chunk is 2 bytes as it comes after + * 16 bits of region ID and eRP ID. + */ +#define MLXSW_SP4_BLOOM_CHUNK_KEY_OFFSET 2 + +/* For Spectrum-4, two hash functions are used, CRC-10 and CRC-6 based. + * The result is combination of the two calculations - + * 6 bit column are MSB (result of CRC-6), + * 10 bit row are LSB (result of CRC-10). + */ + +/* This table is just the CRC of each possible byte which is used for + * Spectrum-4. It is computed, Msbit first, for the Bloom filter + * polynomial which is 0x1b (1 + x^1 + x^3 + x^4 and the implicit x^10). + */ +static const u16 mlxsw_sp4_acl_bf_crc10_tab[256] = { +0x0000, 0x001b, 0x0036, 0x002d, 0x006c, 0x0077, 0x005a, 0x0041, +0x00d8, 0x00c3, 0x00ee, 0x00f5, 0x00b4, 0x00af, 0x0082, 0x0099, +0x01b0, 0x01ab, 0x0186, 0x019d, 0x01dc, 0x01c7, 0x01ea, 0x01f1, +0x0168, 0x0173, 0x015e, 0x0145, 0x0104, 0x011f, 0x0132, 0x0129, +0x0360, 0x037b, 0x0356, 0x034d, 0x030c, 0x0317, 0x033a, 0x0321, +0x03b8, 0x03a3, 0x038e, 0x0395, 0x03d4, 0x03cf, 0x03e2, 0x03f9, +0x02d0, 0x02cb, 0x02e6, 0x02fd, 0x02bc, 0x02a7, 0x028a, 0x0291, +0x0208, 0x0213, 0x023e, 0x0225, 0x0264, 0x027f, 0x0252, 0x0249, +0x02db, 0x02c0, 0x02ed, 0x02f6, 0x02b7, 0x02ac, 0x0281, 0x029a, +0x0203, 0x0218, 0x0235, 0x022e, 0x026f, 0x0274, 0x0259, 0x0242, +0x036b, 0x0370, 0x035d, 0x0346, 0x0307, 0x031c, 0x0331, 0x032a, +0x03b3, 0x03a8, 0x0385, 0x039e, 0x03df, 0x03c4, 0x03e9, 0x03f2, +0x01bb, 0x01a0, 0x018d, 0x0196, 0x01d7, 0x01cc, 0x01e1, 0x01fa, +0x0163, 0x0178, 0x0155, 0x014e, 0x010f, 0x0114, 0x0139, 0x0122, +0x000b, 0x0010, 0x003d, 0x0026, 0x0067, 0x007c, 0x0051, 0x004a, +0x00d3, 0x00c8, 0x00e5, 0x00fe, 0x00bf, 0x00a4, 0x0089, 0x0092, +0x01ad, 0x01b6, 0x019b, 0x0180, 0x01c1, 0x01da, 0x01f7, 0x01ec, +0x0175, 0x016e, 0x0143, 0x0158, 0x0119, 0x0102, 0x012f, 0x0134, +0x001d, 0x0006, 0x002b, 0x0030, 0x0071, 0x006a, 0x0047, 0x005c, +0x00c5, 0x00de, 0x00f3, 0x00e8, 0x00a9, 0x00b2, 0x009f, 0x0084, +0x02cd, 0x02d6, 0x02fb, 0x02e0, 0x02a1, 0x02ba, 0x0297, 0x028c, +0x0215, 0x020e, 0x0223, 0x0238, 0x0279, 0x0262, 0x024f, 0x0254, +0x037d, 0x0366, 0x034b, 0x0350, 0x0311, 0x030a, 0x0327, 0x033c, +0x03a5, 0x03be, 0x0393, 0x0388, 0x03c9, 0x03d2, 0x03ff, 0x03e4, +0x0376, 0x036d, 0x0340, 0x035b, 0x031a, 0x0301, 0x032c, 0x0337, +0x03ae, 0x03b5, 0x0398, 0x0383, 0x03c2, 0x03d9, 0x03f4, 0x03ef, +0x02c6, 0x02dd, 0x02f0, 0x02eb, 0x02aa, 0x02b1, 0x029c, 0x0287, +0x021e, 0x0205, 0x0228, 0x0233, 0x0272, 0x0269, 0x0244, 0x025f, +0x0016, 0x000d, 0x0020, 0x003b, 0x007a, 0x0061, 0x004c, 0x0057, +0x00ce, 0x00d5, 0x00f8, 0x00e3, 0x00a2, 0x00b9, 0x0094, 0x008f, +0x01a6, 0x01bd, 0x0190, 0x018b, 0x01ca, 0x01d1, 0x01fc, 0x01e7, +0x017e, 0x0165, 0x0148, 0x0153, 0x0112, 0x0109, 0x0124, 0x013f, +}; + +/* This table is just the CRC of each possible byte which is used for + * Spectrum-4. It is computed, Msbit first, for the Bloom filter + * polynomial which is 0x2d (1 + x^2+ x^3 + x^5 and the implicit x^6). + */ +static const u8 mlxsw_sp4_acl_bf_crc6_tab[256] = { +0x00, 0x2d, 0x37, 0x1a, 0x03, 0x2e, 0x34, 0x19, +0x06, 0x2b, 0x31, 0x1c, 0x05, 0x28, 0x32, 0x1f, +0x0c, 0x21, 0x3b, 0x16, 0x0f, 0x22, 0x38, 0x15, +0x0a, 0x27, 0x3d, 0x10, 0x09, 0x24, 0x3e, 0x13, +0x18, 0x35, 0x2f, 0x02, 0x1b, 0x36, 0x2c, 0x01, +0x1e, 0x33, 0x29, 0x04, 0x1d, 0x30, 0x2a, 0x07, +0x14, 0x39, 0x23, 0x0e, 0x17, 0x3a, 0x20, 0x0d, +0x12, 0x3f, 0x25, 0x08, 0x11, 0x3c, 0x26, 0x0b, +0x30, 0x1d, 0x07, 0x2a, 0x33, 0x1e, 0x04, 0x29, +0x36, 0x1b, 0x01, 0x2c, 0x35, 0x18, 0x02, 0x2f, +0x3c, 0x11, 0x0b, 0x26, 0x3f, 0x12, 0x08, 0x25, +0x3a, 0x17, 0x0d, 0x20, 0x39, 0x14, 0x0e, 0x23, +0x28, 0x05, 0x1f, 0x32, 0x2b, 0x06, 0x1c, 0x31, +0x2e, 0x03, 0x19, 0x34, 0x2d, 0x00, 0x1a, 0x37, +0x24, 0x09, 0x13, 0x3e, 0x27, 0x0a, 0x10, 0x3d, +0x22, 0x0f, 0x15, 0x38, 0x21, 0x0c, 0x16, 0x3b, +0x0d, 0x20, 0x3a, 0x17, 0x0e, 0x23, 0x39, 0x14, +0x0b, 0x26, 0x3c, 0x11, 0x08, 0x25, 0x3f, 0x12, +0x01, 0x2c, 0x36, 0x1b, 0x02, 0x2f, 0x35, 0x18, +0x07, 0x2a, 0x30, 0x1d, 0x04, 0x29, 0x33, 0x1e, +0x15, 0x38, 0x22, 0x0f, 0x16, 0x3b, 0x21, 0x0c, +0x13, 0x3e, 0x24, 0x09, 0x10, 0x3d, 0x27, 0x0a, +0x19, 0x34, 0x2e, 0x03, 0x1a, 0x37, 0x2d, 0x00, +0x1f, 0x32, 0x28, 0x05, 0x1c, 0x31, 0x2b, 0x06, +0x3d, 0x10, 0x0a, 0x27, 0x3e, 0x13, 0x09, 0x24, +0x3b, 0x16, 0x0c, 0x21, 0x38, 0x15, 0x0f, 0x22, +0x31, 0x1c, 0x06, 0x2b, 0x32, 0x1f, 0x05, 0x28, +0x37, 0x1a, 0x00, 0x2d, 0x34, 0x19, 0x03, 0x2e, +0x25, 0x08, 0x12, 0x3f, 0x26, 0x0b, 0x11, 0x3c, +0x23, 0x0e, 0x14, 0x39, 0x20, 0x0d, 0x17, 0x3a, +0x29, 0x04, 0x1e, 0x33, 0x2a, 0x07, 0x1d, 0x30, +0x2f, 0x02, 0x18, 0x35, 0x2c, 0x01, 0x1b, 0x36, +}; + +/* Each chunk contains 4 key blocks. Chunk 2 uses key blocks 11-8, + * and we need to populate it with 4 key blocks copied from the entry encoded + * key. The original keys layout is same for Spectrum-{2,3,4}. + * Since the encoded key contains a 2 bytes padding, key block 11 starts at + * offset 2. block 7 that is used in chunk 1 starts at offset 20 as 4 key blocks + * take 18 bytes. See 'MLXSW_SP2_AFK_BLOCK_LAYOUT' for more details. + * This array defines key offsets for easy access when copying key blocks from + * entry key to Bloom filter chunk. + */ +static const u8 chunk_key_offsets[MLXSW_BLOOM_KEY_CHUNKS] = {2, 20, 38}; + +static u16 mlxsw_sp2_acl_bf_crc16_byte(u16 crc, u8 c) +{ + return (crc << 8) ^ mlxsw_sp2_acl_bf_crc16_tab[(crc >> 8) ^ c]; +} + +static u16 mlxsw_sp2_acl_bf_crc(const u8 *buffer, size_t len) +{ + u16 crc = 0; + + while (len--) + crc = mlxsw_sp2_acl_bf_crc16_byte(crc, *buffer++); + return crc; +} + +static void +__mlxsw_sp_acl_bf_key_encode(struct mlxsw_sp_acl_atcam_region *aregion, + struct mlxsw_sp_acl_atcam_entry *aentry, + char *output, u8 *len, u8 max_chunks, u8 pad_bytes, + u8 key_offset, u8 chunk_key_len, u8 chunk_len) +{ + struct mlxsw_afk_key_info *key_info = aregion->region->key_info; + u8 chunk_index, chunk_count, block_count; + char *chunk = output; + __be16 erp_region_id; + + block_count = mlxsw_afk_key_info_blocks_count_get(key_info); + chunk_count = 1 + ((block_count - 1) >> 2); + erp_region_id = cpu_to_be16(aentry->ht_key.erp_id | + (aregion->region->id << 4)); + for (chunk_index = max_chunks - chunk_count; chunk_index < max_chunks; + chunk_index++) { + memset(chunk, 0, pad_bytes); + memcpy(chunk + pad_bytes, &erp_region_id, + sizeof(erp_region_id)); + memcpy(chunk + key_offset, + &aentry->enc_key[chunk_key_offsets[chunk_index]], + chunk_key_len); + chunk += chunk_len; + } + *len = chunk_count * chunk_len; +} + +static void +mlxsw_sp2_acl_bf_key_encode(struct mlxsw_sp_acl_atcam_region *aregion, + struct mlxsw_sp_acl_atcam_entry *aentry, + char *output, u8 *len) +{ + __mlxsw_sp_acl_bf_key_encode(aregion, aentry, output, len, + MLXSW_BLOOM_KEY_CHUNKS, + MLXSW_SP2_BLOOM_CHUNK_PAD_BYTES, + MLXSW_SP2_BLOOM_CHUNK_KEY_OFFSET, + MLXSW_SP2_BLOOM_CHUNK_KEY_BYTES, + MLXSW_SP2_BLOOM_KEY_CHUNK_BYTES); +} + +static unsigned int +mlxsw_sp2_acl_bf_index_get(struct mlxsw_sp_acl_bf *bf, + struct mlxsw_sp_acl_atcam_region *aregion, + struct mlxsw_sp_acl_atcam_entry *aentry) +{ + char bf_key[MLXSW_SP2_BLOOM_KEY_LEN]; + u8 bf_size; + + mlxsw_sp2_acl_bf_key_encode(aregion, aentry, bf_key, &bf_size); + return mlxsw_sp2_acl_bf_crc(bf_key, bf_size); +} + +static u16 mlxsw_sp4_acl_bf_crc10_byte(u16 crc, u8 c) +{ + u8 index = ((crc >> 2) ^ c) & 0xff; + + return ((crc << 8) ^ mlxsw_sp4_acl_bf_crc10_tab[index]) & 0x3ff; +} + +static u16 mlxsw_sp4_acl_bf_crc6_byte(u16 crc, u8 c) +{ + u8 index = (crc ^ c) & 0xff; + + return ((crc << 6) ^ (mlxsw_sp4_acl_bf_crc6_tab[index] << 2)) & 0xfc; +} + +static u16 mlxsw_sp4_acl_bf_crc(const u8 *buffer, size_t len) +{ + u16 crc_row = 0, crc_col = 0; + + while (len--) { + crc_row = mlxsw_sp4_acl_bf_crc10_byte(crc_row, *buffer); + crc_col = mlxsw_sp4_acl_bf_crc6_byte(crc_col, *buffer); + buffer++; + } + + crc_col >>= 2; + + /* 6 bit column are MSB, 10 bit row are LSB */ + return (crc_col << 10) | crc_row; +} + +static void right_shift_array(char *arr, u8 len, u8 shift_bits) +{ + u8 byte_mask = 0xff >> shift_bits; + int i; + + if (WARN_ON(!shift_bits || shift_bits >= 8)) + return; + + for (i = len - 1; i >= 0; i--) { + /* The first iteration looks like out-of-bounds access, + * but actually references a buffer that the array is shifted + * into. This move is legal as we never send the last chunk to + * this function. + */ + arr[i + 1] &= byte_mask; + arr[i + 1] |= arr[i] << (8 - shift_bits); + arr[i] = arr[i] >> shift_bits; + } +} + +static void mlxsw_sp4_bf_key_shift_chunks(u8 chunk_count, char *output) +{ + /* The chunks are suppoosed to be continuous, with no padding. + * Since region ID and eRP ID use 14 bits, and not fully 2 bytes, + * and in Spectrum-4 there is no padding, it is necessary to shift some + * chunks 2 bits right. + */ + switch (chunk_count) { + case 2: + /* The chunks are copied as follow: + * +-------------+-----------------+ + * | Chunk 0 | Chunk 1 | + * | IDs | keys |(**) IDs | keys | + * +-------------+-----------------+ + * In (**), there are two unused bits, therefore, chunk 0 needs + * to be shifted two bits right. + */ + right_shift_array(output, MLXSW_SP4_BLOOM_KEY_CHUNK_BYTES, 2); + break; + case 3: + /* The chunks are copied as follow: + * +-------------+-----------------+-----------------+ + * | Chunk 0 | Chunk 1 | Chunk 2 | + * | IDs | keys |(**) IDs | keys |(**) IDs | keys | + * +-------------+-----------------+-----------------+ + * In (**), there are two unused bits, therefore, chunk 1 needs + * to be shifted two bits right and chunk 0 needs to be shifted + * four bits right. + */ + right_shift_array(output + MLXSW_SP4_BLOOM_KEY_CHUNK_BYTES, + MLXSW_SP4_BLOOM_KEY_CHUNK_BYTES, 2); + right_shift_array(output, MLXSW_SP4_BLOOM_KEY_CHUNK_BYTES, 4); + break; + default: + WARN_ON(chunk_count > MLXSW_BLOOM_KEY_CHUNKS); + } +} + +static void +mlxsw_sp4_acl_bf_key_encode(struct mlxsw_sp_acl_atcam_region *aregion, + struct mlxsw_sp_acl_atcam_entry *aentry, + char *output, u8 *len) +{ + struct mlxsw_afk_key_info *key_info = aregion->region->key_info; + u8 block_count = mlxsw_afk_key_info_blocks_count_get(key_info); + u8 chunk_count = 1 + ((block_count - 1) >> 2); + + __mlxsw_sp_acl_bf_key_encode(aregion, aentry, output, len, + MLXSW_BLOOM_KEY_CHUNKS, + MLXSW_SP4_BLOOM_CHUNK_PAD_BYTES, + MLXSW_SP4_BLOOM_CHUNK_KEY_OFFSET, + MLXSW_SP4_BLOOM_CHUNK_KEY_BYTES, + MLXSW_SP4_BLOOM_KEY_CHUNK_BYTES); + mlxsw_sp4_bf_key_shift_chunks(chunk_count, output); +} + +static unsigned int +mlxsw_sp4_acl_bf_index_get(struct mlxsw_sp_acl_bf *bf, + struct mlxsw_sp_acl_atcam_region *aregion, + struct mlxsw_sp_acl_atcam_entry *aentry) +{ + char bf_key[MLXSW_SP4_BLOOM_KEY_LEN] = {}; + u8 bf_size; + + mlxsw_sp4_acl_bf_key_encode(aregion, aentry, bf_key, &bf_size); + return mlxsw_sp4_acl_bf_crc(bf_key, bf_size); +} + +static unsigned int +mlxsw_sp_acl_bf_rule_count_index_get(struct mlxsw_sp_acl_bf *bf, + unsigned int erp_bank, + unsigned int bf_index) +{ + return erp_bank * bf->bank_size + bf_index; +} + +int +mlxsw_sp_acl_bf_entry_add(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_bf *bf, + struct mlxsw_sp_acl_atcam_region *aregion, + unsigned int erp_bank, + struct mlxsw_sp_acl_atcam_entry *aentry) +{ + unsigned int rule_index; + char *peabfe_pl; + u16 bf_index; + int err; + + mutex_lock(&bf->lock); + + bf_index = mlxsw_sp->acl_bf_ops->index_get(bf, aregion, aentry); + rule_index = mlxsw_sp_acl_bf_rule_count_index_get(bf, erp_bank, + bf_index); + + if (refcount_inc_not_zero(&bf->refcnt[rule_index])) { + err = 0; + goto unlock; + } + + peabfe_pl = kmalloc(MLXSW_REG_PEABFE_LEN, GFP_KERNEL); + if (!peabfe_pl) { + err = -ENOMEM; + goto unlock; + } + + mlxsw_reg_peabfe_pack(peabfe_pl); + mlxsw_reg_peabfe_rec_pack(peabfe_pl, 0, 1, erp_bank, bf_index); + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(peabfe), peabfe_pl); + kfree(peabfe_pl); + if (err) + goto unlock; + + refcount_set(&bf->refcnt[rule_index], 1); + err = 0; + +unlock: + mutex_unlock(&bf->lock); + return err; +} + +void +mlxsw_sp_acl_bf_entry_del(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_bf *bf, + struct mlxsw_sp_acl_atcam_region *aregion, + unsigned int erp_bank, + struct mlxsw_sp_acl_atcam_entry *aentry) +{ + unsigned int rule_index; + char *peabfe_pl; + u16 bf_index; + + mutex_lock(&bf->lock); + + bf_index = mlxsw_sp->acl_bf_ops->index_get(bf, aregion, aentry); + rule_index = mlxsw_sp_acl_bf_rule_count_index_get(bf, erp_bank, + bf_index); + + if (refcount_dec_and_test(&bf->refcnt[rule_index])) { + peabfe_pl = kmalloc(MLXSW_REG_PEABFE_LEN, GFP_KERNEL); + if (!peabfe_pl) + goto unlock; + + mlxsw_reg_peabfe_pack(peabfe_pl); + mlxsw_reg_peabfe_rec_pack(peabfe_pl, 0, 0, erp_bank, bf_index); + mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(peabfe), peabfe_pl); + kfree(peabfe_pl); + } + +unlock: + mutex_unlock(&bf->lock); +} + +struct mlxsw_sp_acl_bf * +mlxsw_sp_acl_bf_init(struct mlxsw_sp *mlxsw_sp, unsigned int num_erp_banks) +{ + struct mlxsw_sp_acl_bf *bf; + unsigned int bf_bank_size; + + if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, ACL_MAX_BF_LOG)) + return ERR_PTR(-EIO); + + /* Bloom filter size per erp_table_bank + * is 2^ACL_MAX_BF_LOG + */ + bf_bank_size = 1 << MLXSW_CORE_RES_GET(mlxsw_sp->core, ACL_MAX_BF_LOG); + bf = kzalloc(struct_size(bf, refcnt, size_mul(bf_bank_size, num_erp_banks)), + GFP_KERNEL); + if (!bf) + return ERR_PTR(-ENOMEM); + + bf->bank_size = bf_bank_size; + mutex_init(&bf->lock); + + return bf; +} + +void mlxsw_sp_acl_bf_fini(struct mlxsw_sp_acl_bf *bf) +{ + mutex_destroy(&bf->lock); + kfree(bf); +} + +const struct mlxsw_sp_acl_bf_ops mlxsw_sp2_acl_bf_ops = { + .index_get = mlxsw_sp2_acl_bf_index_get, +}; + +const struct mlxsw_sp_acl_bf_ops mlxsw_sp4_acl_bf_ops = { + .index_get = mlxsw_sp4_acl_bf_index_get, +}; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_ctcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_ctcam.c new file mode 100644 index 000000000..05680a7e6 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_ctcam.c @@ -0,0 +1,233 @@ +// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 +/* Copyright (c) 2017-2018 Mellanox Technologies. All rights reserved */ + +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/parman.h> + +#include "reg.h" +#include "core.h" +#include "spectrum.h" +#include "spectrum_acl_tcam.h" + +static int +mlxsw_sp_acl_ctcam_region_resize(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_region *region, + u16 new_size) +{ + char ptar_pl[MLXSW_REG_PTAR_LEN]; + + mlxsw_reg_ptar_pack(ptar_pl, MLXSW_REG_PTAR_OP_RESIZE, + region->key_type, new_size, region->id, + region->tcam_region_info); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ptar), ptar_pl); +} + +static void +mlxsw_sp_acl_ctcam_region_move(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_region *region, + u16 src_offset, u16 dst_offset, u16 size) +{ + char prcr_pl[MLXSW_REG_PRCR_LEN]; + + mlxsw_reg_prcr_pack(prcr_pl, MLXSW_REG_PRCR_OP_MOVE, + region->tcam_region_info, src_offset, + region->tcam_region_info, dst_offset, size); + mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(prcr), prcr_pl); +} + +static int +mlxsw_sp_acl_ctcam_region_entry_insert(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_ctcam_region *cregion, + struct mlxsw_sp_acl_ctcam_entry *centry, + struct mlxsw_sp_acl_rule_info *rulei, + bool fillup_priority) +{ + struct mlxsw_sp_acl_tcam_region *region = cregion->region; + struct mlxsw_afk *afk = mlxsw_sp_acl_afk(mlxsw_sp->acl); + char ptce2_pl[MLXSW_REG_PTCE2_LEN]; + char *act_set; + u32 priority; + char *mask; + char *key; + int err; + + err = mlxsw_sp_acl_tcam_priority_get(mlxsw_sp, rulei, &priority, + fillup_priority); + if (err) + return err; + + mlxsw_reg_ptce2_pack(ptce2_pl, true, MLXSW_REG_PTCE2_OP_WRITE_WRITE, + region->tcam_region_info, + centry->parman_item.index, priority); + key = mlxsw_reg_ptce2_flex_key_blocks_data(ptce2_pl); + mask = mlxsw_reg_ptce2_mask_data(ptce2_pl); + mlxsw_afk_encode(afk, region->key_info, &rulei->values, key, mask); + + err = cregion->ops->entry_insert(cregion, centry, mask); + if (err) + return err; + + /* Only the first action set belongs here, the rest is in KVD */ + act_set = mlxsw_afa_block_first_set(rulei->act_block); + mlxsw_reg_ptce2_flex_action_set_memcpy_to(ptce2_pl, act_set); + + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ptce2), ptce2_pl); + if (err) + goto err_ptce2_write; + + return 0; + +err_ptce2_write: + cregion->ops->entry_remove(cregion, centry); + return err; +} + +static void +mlxsw_sp_acl_ctcam_region_entry_remove(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_ctcam_region *cregion, + struct mlxsw_sp_acl_ctcam_entry *centry) +{ + char ptce2_pl[MLXSW_REG_PTCE2_LEN]; + + mlxsw_reg_ptce2_pack(ptce2_pl, false, MLXSW_REG_PTCE2_OP_WRITE_WRITE, + cregion->region->tcam_region_info, + centry->parman_item.index, 0); + mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ptce2), ptce2_pl); + cregion->ops->entry_remove(cregion, centry); +} + +static int +mlxsw_sp_acl_ctcam_region_entry_action_replace(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_ctcam_region *cregion, + struct mlxsw_sp_acl_ctcam_entry *centry, + struct mlxsw_afa_block *afa_block, + unsigned int priority) +{ + char ptce2_pl[MLXSW_REG_PTCE2_LEN]; + char *act_set; + + mlxsw_reg_ptce2_pack(ptce2_pl, true, MLXSW_REG_PTCE2_OP_WRITE_UPDATE, + cregion->region->tcam_region_info, + centry->parman_item.index, priority); + + act_set = mlxsw_afa_block_first_set(afa_block); + mlxsw_reg_ptce2_flex_action_set_memcpy_to(ptce2_pl, act_set); + + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ptce2), ptce2_pl); +} + + +static int mlxsw_sp_acl_ctcam_region_parman_resize(void *priv, + unsigned long new_count) +{ + struct mlxsw_sp_acl_ctcam_region *cregion = priv; + struct mlxsw_sp_acl_tcam_region *region = cregion->region; + struct mlxsw_sp *mlxsw_sp = region->mlxsw_sp; + u64 max_tcam_rules; + + max_tcam_rules = MLXSW_CORE_RES_GET(mlxsw_sp->core, ACL_MAX_TCAM_RULES); + if (new_count > max_tcam_rules) + return -EINVAL; + return mlxsw_sp_acl_ctcam_region_resize(mlxsw_sp, region, new_count); +} + +static void mlxsw_sp_acl_ctcam_region_parman_move(void *priv, + unsigned long from_index, + unsigned long to_index, + unsigned long count) +{ + struct mlxsw_sp_acl_ctcam_region *cregion = priv; + struct mlxsw_sp_acl_tcam_region *region = cregion->region; + struct mlxsw_sp *mlxsw_sp = region->mlxsw_sp; + + mlxsw_sp_acl_ctcam_region_move(mlxsw_sp, region, + from_index, to_index, count); +} + +static const struct parman_ops mlxsw_sp_acl_ctcam_region_parman_ops = { + .base_count = MLXSW_SP_ACL_TCAM_REGION_BASE_COUNT, + .resize_step = MLXSW_SP_ACL_TCAM_REGION_RESIZE_STEP, + .resize = mlxsw_sp_acl_ctcam_region_parman_resize, + .move = mlxsw_sp_acl_ctcam_region_parman_move, + .algo = PARMAN_ALGO_TYPE_LSORT, +}; + +int +mlxsw_sp_acl_ctcam_region_init(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_ctcam_region *cregion, + struct mlxsw_sp_acl_tcam_region *region, + const struct mlxsw_sp_acl_ctcam_region_ops *ops) +{ + cregion->region = region; + cregion->ops = ops; + cregion->parman = parman_create(&mlxsw_sp_acl_ctcam_region_parman_ops, + cregion); + if (!cregion->parman) + return -ENOMEM; + return 0; +} + +void mlxsw_sp_acl_ctcam_region_fini(struct mlxsw_sp_acl_ctcam_region *cregion) +{ + parman_destroy(cregion->parman); +} + +void mlxsw_sp_acl_ctcam_chunk_init(struct mlxsw_sp_acl_ctcam_region *cregion, + struct mlxsw_sp_acl_ctcam_chunk *cchunk, + unsigned int priority) +{ + parman_prio_init(cregion->parman, &cchunk->parman_prio, priority); +} + +void mlxsw_sp_acl_ctcam_chunk_fini(struct mlxsw_sp_acl_ctcam_chunk *cchunk) +{ + parman_prio_fini(&cchunk->parman_prio); +} + +int mlxsw_sp_acl_ctcam_entry_add(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_ctcam_region *cregion, + struct mlxsw_sp_acl_ctcam_chunk *cchunk, + struct mlxsw_sp_acl_ctcam_entry *centry, + struct mlxsw_sp_acl_rule_info *rulei, + bool fillup_priority) +{ + int err; + + err = parman_item_add(cregion->parman, &cchunk->parman_prio, + ¢ry->parman_item); + if (err) + return err; + + err = mlxsw_sp_acl_ctcam_region_entry_insert(mlxsw_sp, cregion, centry, + rulei, fillup_priority); + if (err) + goto err_rule_insert; + return 0; + +err_rule_insert: + parman_item_remove(cregion->parman, &cchunk->parman_prio, + ¢ry->parman_item); + return err; +} + +void mlxsw_sp_acl_ctcam_entry_del(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_ctcam_region *cregion, + struct mlxsw_sp_acl_ctcam_chunk *cchunk, + struct mlxsw_sp_acl_ctcam_entry *centry) +{ + mlxsw_sp_acl_ctcam_region_entry_remove(mlxsw_sp, cregion, centry); + parman_item_remove(cregion->parman, &cchunk->parman_prio, + ¢ry->parman_item); +} + +int mlxsw_sp_acl_ctcam_entry_action_replace(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_ctcam_region *cregion, + struct mlxsw_sp_acl_ctcam_entry *centry, + struct mlxsw_sp_acl_rule_info *rulei) +{ + return mlxsw_sp_acl_ctcam_region_entry_action_replace(mlxsw_sp, cregion, + centry, + rulei->act_block, + rulei->priority); +} diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_erp.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_erp.c new file mode 100644 index 000000000..d231f4d28 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_erp.c @@ -0,0 +1,1601 @@ +// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 +/* Copyright (c) 2018 Mellanox Technologies. All rights reserved */ + +#include <linux/bitmap.h> +#include <linux/errno.h> +#include <linux/genalloc.h> +#include <linux/gfp.h> +#include <linux/kernel.h> +#include <linux/list.h> +#include <linux/mutex.h> +#include <linux/objagg.h> +#include <linux/rtnetlink.h> +#include <linux/slab.h> + +#include "core.h" +#include "reg.h" +#include "spectrum.h" +#include "spectrum_acl_tcam.h" + +/* gen_pool_alloc() returns 0 when allocation fails, so use an offset */ +#define MLXSW_SP_ACL_ERP_GENALLOC_OFFSET 0x100 +#define MLXSW_SP_ACL_ERP_MAX_PER_REGION 16 + +struct mlxsw_sp_acl_erp_core { + unsigned int erpt_entries_size[MLXSW_SP_ACL_ATCAM_REGION_TYPE_MAX + 1]; + struct gen_pool *erp_tables; + struct mlxsw_sp *mlxsw_sp; + struct mlxsw_sp_acl_bf *bf; + unsigned int num_erp_banks; +}; + +struct mlxsw_sp_acl_erp_key { + char mask[MLXSW_REG_PTCEX_FLEX_KEY_BLOCKS_LEN]; +#define __MASK_LEN 0x38 +#define __MASK_IDX(i) (__MASK_LEN - (i) - 1) + bool ctcam; +}; + +struct mlxsw_sp_acl_erp { + struct mlxsw_sp_acl_erp_key key; + u8 id; + u8 index; + DECLARE_BITMAP(mask_bitmap, MLXSW_SP_ACL_TCAM_MASK_LEN); + struct list_head list; + struct mlxsw_sp_acl_erp_table *erp_table; +}; + +struct mlxsw_sp_acl_erp_master_mask { + DECLARE_BITMAP(bitmap, MLXSW_SP_ACL_TCAM_MASK_LEN); + unsigned int count[MLXSW_SP_ACL_TCAM_MASK_LEN]; +}; + +struct mlxsw_sp_acl_erp_table { + struct mlxsw_sp_acl_erp_master_mask master_mask; + DECLARE_BITMAP(erp_id_bitmap, MLXSW_SP_ACL_ERP_MAX_PER_REGION); + DECLARE_BITMAP(erp_index_bitmap, MLXSW_SP_ACL_ERP_MAX_PER_REGION); + struct list_head atcam_erps_list; + struct mlxsw_sp_acl_erp_core *erp_core; + struct mlxsw_sp_acl_atcam_region *aregion; + const struct mlxsw_sp_acl_erp_table_ops *ops; + unsigned long base_index; + unsigned int num_atcam_erps; + unsigned int num_max_atcam_erps; + unsigned int num_ctcam_erps; + unsigned int num_deltas; + struct objagg *objagg; + struct mutex objagg_lock; /* guards objagg manipulation */ +}; + +struct mlxsw_sp_acl_erp_table_ops { + struct mlxsw_sp_acl_erp * + (*erp_create)(struct mlxsw_sp_acl_erp_table *erp_table, + struct mlxsw_sp_acl_erp_key *key); + void (*erp_destroy)(struct mlxsw_sp_acl_erp_table *erp_table, + struct mlxsw_sp_acl_erp *erp); +}; + +static struct mlxsw_sp_acl_erp * +mlxsw_sp_acl_erp_mask_create(struct mlxsw_sp_acl_erp_table *erp_table, + struct mlxsw_sp_acl_erp_key *key); +static void +mlxsw_sp_acl_erp_mask_destroy(struct mlxsw_sp_acl_erp_table *erp_table, + struct mlxsw_sp_acl_erp *erp); +static struct mlxsw_sp_acl_erp * +mlxsw_sp_acl_erp_second_mask_create(struct mlxsw_sp_acl_erp_table *erp_table, + struct mlxsw_sp_acl_erp_key *key); +static void +mlxsw_sp_acl_erp_second_mask_destroy(struct mlxsw_sp_acl_erp_table *erp_table, + struct mlxsw_sp_acl_erp *erp); +static struct mlxsw_sp_acl_erp * +mlxsw_sp_acl_erp_first_mask_create(struct mlxsw_sp_acl_erp_table *erp_table, + struct mlxsw_sp_acl_erp_key *key); +static void +mlxsw_sp_acl_erp_first_mask_destroy(struct mlxsw_sp_acl_erp_table *erp_table, + struct mlxsw_sp_acl_erp *erp); +static void +mlxsw_sp_acl_erp_no_mask_destroy(struct mlxsw_sp_acl_erp_table *erp_table, + struct mlxsw_sp_acl_erp *erp); + +static const struct mlxsw_sp_acl_erp_table_ops erp_multiple_masks_ops = { + .erp_create = mlxsw_sp_acl_erp_mask_create, + .erp_destroy = mlxsw_sp_acl_erp_mask_destroy, +}; + +static const struct mlxsw_sp_acl_erp_table_ops erp_two_masks_ops = { + .erp_create = mlxsw_sp_acl_erp_mask_create, + .erp_destroy = mlxsw_sp_acl_erp_second_mask_destroy, +}; + +static const struct mlxsw_sp_acl_erp_table_ops erp_single_mask_ops = { + .erp_create = mlxsw_sp_acl_erp_second_mask_create, + .erp_destroy = mlxsw_sp_acl_erp_first_mask_destroy, +}; + +static const struct mlxsw_sp_acl_erp_table_ops erp_no_mask_ops = { + .erp_create = mlxsw_sp_acl_erp_first_mask_create, + .erp_destroy = mlxsw_sp_acl_erp_no_mask_destroy, +}; + +static bool +mlxsw_sp_acl_erp_table_is_used(const struct mlxsw_sp_acl_erp_table *erp_table) +{ + return erp_table->ops != &erp_single_mask_ops && + erp_table->ops != &erp_no_mask_ops; +} + +static unsigned int +mlxsw_sp_acl_erp_bank_get(const struct mlxsw_sp_acl_erp *erp) +{ + return erp->index % erp->erp_table->erp_core->num_erp_banks; +} + +static unsigned int +mlxsw_sp_acl_erp_table_entry_size(const struct mlxsw_sp_acl_erp_table *erp_table) +{ + struct mlxsw_sp_acl_atcam_region *aregion = erp_table->aregion; + struct mlxsw_sp_acl_erp_core *erp_core = erp_table->erp_core; + + return erp_core->erpt_entries_size[aregion->type]; +} + +static int mlxsw_sp_acl_erp_id_get(struct mlxsw_sp_acl_erp_table *erp_table, + u8 *p_id) +{ + u8 id; + + id = find_first_zero_bit(erp_table->erp_id_bitmap, + MLXSW_SP_ACL_ERP_MAX_PER_REGION); + if (id < MLXSW_SP_ACL_ERP_MAX_PER_REGION) { + __set_bit(id, erp_table->erp_id_bitmap); + *p_id = id; + return 0; + } + + return -ENOBUFS; +} + +static void mlxsw_sp_acl_erp_id_put(struct mlxsw_sp_acl_erp_table *erp_table, + u8 id) +{ + __clear_bit(id, erp_table->erp_id_bitmap); +} + +static void +mlxsw_sp_acl_erp_master_mask_bit_set(unsigned long bit, + struct mlxsw_sp_acl_erp_master_mask *mask) +{ + if (mask->count[bit]++ == 0) + __set_bit(bit, mask->bitmap); +} + +static void +mlxsw_sp_acl_erp_master_mask_bit_clear(unsigned long bit, + struct mlxsw_sp_acl_erp_master_mask *mask) +{ + if (--mask->count[bit] == 0) + __clear_bit(bit, mask->bitmap); +} + +static int +mlxsw_sp_acl_erp_master_mask_update(struct mlxsw_sp_acl_erp_table *erp_table) +{ + struct mlxsw_sp_acl_tcam_region *region = erp_table->aregion->region; + struct mlxsw_sp *mlxsw_sp = region->mlxsw_sp; + char percr_pl[MLXSW_REG_PERCR_LEN]; + char *master_mask; + + mlxsw_reg_percr_pack(percr_pl, region->id); + master_mask = mlxsw_reg_percr_master_mask_data(percr_pl); + bitmap_to_arr32((u32 *) master_mask, erp_table->master_mask.bitmap, + MLXSW_SP_ACL_TCAM_MASK_LEN); + + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(percr), percr_pl); +} + +static int +mlxsw_sp_acl_erp_master_mask_set(struct mlxsw_sp_acl_erp_table *erp_table, + struct mlxsw_sp_acl_erp_key *key) +{ + DECLARE_BITMAP(mask_bitmap, MLXSW_SP_ACL_TCAM_MASK_LEN); + unsigned long bit; + int err; + + bitmap_from_arr32(mask_bitmap, (u32 *) key->mask, + MLXSW_SP_ACL_TCAM_MASK_LEN); + for_each_set_bit(bit, mask_bitmap, MLXSW_SP_ACL_TCAM_MASK_LEN) + mlxsw_sp_acl_erp_master_mask_bit_set(bit, + &erp_table->master_mask); + + err = mlxsw_sp_acl_erp_master_mask_update(erp_table); + if (err) + goto err_master_mask_update; + + return 0; + +err_master_mask_update: + for_each_set_bit(bit, mask_bitmap, MLXSW_SP_ACL_TCAM_MASK_LEN) + mlxsw_sp_acl_erp_master_mask_bit_clear(bit, + &erp_table->master_mask); + return err; +} + +static int +mlxsw_sp_acl_erp_master_mask_clear(struct mlxsw_sp_acl_erp_table *erp_table, + struct mlxsw_sp_acl_erp_key *key) +{ + DECLARE_BITMAP(mask_bitmap, MLXSW_SP_ACL_TCAM_MASK_LEN); + unsigned long bit; + int err; + + bitmap_from_arr32(mask_bitmap, (u32 *) key->mask, + MLXSW_SP_ACL_TCAM_MASK_LEN); + for_each_set_bit(bit, mask_bitmap, MLXSW_SP_ACL_TCAM_MASK_LEN) + mlxsw_sp_acl_erp_master_mask_bit_clear(bit, + &erp_table->master_mask); + + err = mlxsw_sp_acl_erp_master_mask_update(erp_table); + if (err) + goto err_master_mask_update; + + return 0; + +err_master_mask_update: + for_each_set_bit(bit, mask_bitmap, MLXSW_SP_ACL_TCAM_MASK_LEN) + mlxsw_sp_acl_erp_master_mask_bit_set(bit, + &erp_table->master_mask); + return err; +} + +static struct mlxsw_sp_acl_erp * +mlxsw_sp_acl_erp_generic_create(struct mlxsw_sp_acl_erp_table *erp_table, + struct mlxsw_sp_acl_erp_key *key) +{ + struct mlxsw_sp_acl_erp *erp; + int err; + + erp = kzalloc(sizeof(*erp), GFP_KERNEL); + if (!erp) + return ERR_PTR(-ENOMEM); + + err = mlxsw_sp_acl_erp_id_get(erp_table, &erp->id); + if (err) + goto err_erp_id_get; + + memcpy(&erp->key, key, sizeof(*key)); + list_add(&erp->list, &erp_table->atcam_erps_list); + erp_table->num_atcam_erps++; + erp->erp_table = erp_table; + + err = mlxsw_sp_acl_erp_master_mask_set(erp_table, &erp->key); + if (err) + goto err_master_mask_set; + + return erp; + +err_master_mask_set: + erp_table->num_atcam_erps--; + list_del(&erp->list); + mlxsw_sp_acl_erp_id_put(erp_table, erp->id); +err_erp_id_get: + kfree(erp); + return ERR_PTR(err); +} + +static void +mlxsw_sp_acl_erp_generic_destroy(struct mlxsw_sp_acl_erp *erp) +{ + struct mlxsw_sp_acl_erp_table *erp_table = erp->erp_table; + + mlxsw_sp_acl_erp_master_mask_clear(erp_table, &erp->key); + erp_table->num_atcam_erps--; + list_del(&erp->list); + mlxsw_sp_acl_erp_id_put(erp_table, erp->id); + kfree(erp); +} + +static int +mlxsw_sp_acl_erp_table_alloc(struct mlxsw_sp_acl_erp_core *erp_core, + unsigned int num_erps, + enum mlxsw_sp_acl_atcam_region_type region_type, + unsigned long *p_index) +{ + unsigned int num_rows, entry_size; + unsigned long index; + + /* We only allow allocations of entire rows */ + if (num_erps % erp_core->num_erp_banks != 0) + return -EINVAL; + + entry_size = erp_core->erpt_entries_size[region_type]; + num_rows = num_erps / erp_core->num_erp_banks; + + index = gen_pool_alloc(erp_core->erp_tables, num_rows * entry_size); + if (!index) + return -ENOBUFS; + + *p_index = index - MLXSW_SP_ACL_ERP_GENALLOC_OFFSET; + + return 0; +} + +static void +mlxsw_sp_acl_erp_table_free(struct mlxsw_sp_acl_erp_core *erp_core, + unsigned int num_erps, + enum mlxsw_sp_acl_atcam_region_type region_type, + unsigned long index) +{ + unsigned long base_index; + unsigned int entry_size; + size_t size; + + entry_size = erp_core->erpt_entries_size[region_type]; + base_index = index + MLXSW_SP_ACL_ERP_GENALLOC_OFFSET; + size = num_erps / erp_core->num_erp_banks * entry_size; + gen_pool_free(erp_core->erp_tables, base_index, size); +} + +static struct mlxsw_sp_acl_erp * +mlxsw_sp_acl_erp_table_master_rp(struct mlxsw_sp_acl_erp_table *erp_table) +{ + if (!list_is_singular(&erp_table->atcam_erps_list)) + return NULL; + + return list_first_entry(&erp_table->atcam_erps_list, + struct mlxsw_sp_acl_erp, list); +} + +static int mlxsw_sp_acl_erp_index_get(struct mlxsw_sp_acl_erp_table *erp_table, + u8 *p_index) +{ + u8 index; + + index = find_first_zero_bit(erp_table->erp_index_bitmap, + erp_table->num_max_atcam_erps); + if (index < erp_table->num_max_atcam_erps) { + __set_bit(index, erp_table->erp_index_bitmap); + *p_index = index; + return 0; + } + + return -ENOBUFS; +} + +static void mlxsw_sp_acl_erp_index_put(struct mlxsw_sp_acl_erp_table *erp_table, + u8 index) +{ + __clear_bit(index, erp_table->erp_index_bitmap); +} + +static void +mlxsw_sp_acl_erp_table_locate(const struct mlxsw_sp_acl_erp_table *erp_table, + const struct mlxsw_sp_acl_erp *erp, + u8 *p_erpt_bank, u8 *p_erpt_index) +{ + unsigned int entry_size = mlxsw_sp_acl_erp_table_entry_size(erp_table); + struct mlxsw_sp_acl_erp_core *erp_core = erp_table->erp_core; + unsigned int row; + + *p_erpt_bank = erp->index % erp_core->num_erp_banks; + row = erp->index / erp_core->num_erp_banks; + *p_erpt_index = erp_table->base_index + row * entry_size; +} + +static int +mlxsw_sp_acl_erp_table_erp_add(struct mlxsw_sp_acl_erp_table *erp_table, + struct mlxsw_sp_acl_erp *erp) +{ + struct mlxsw_sp *mlxsw_sp = erp_table->erp_core->mlxsw_sp; + enum mlxsw_reg_perpt_key_size key_size; + char perpt_pl[MLXSW_REG_PERPT_LEN]; + u8 erpt_bank, erpt_index; + + mlxsw_sp_acl_erp_table_locate(erp_table, erp, &erpt_bank, &erpt_index); + key_size = (enum mlxsw_reg_perpt_key_size) erp_table->aregion->type; + mlxsw_reg_perpt_pack(perpt_pl, erpt_bank, erpt_index, key_size, erp->id, + 0, erp_table->base_index, erp->index, + erp->key.mask); + mlxsw_reg_perpt_erp_vector_pack(perpt_pl, erp_table->erp_index_bitmap, + MLXSW_SP_ACL_ERP_MAX_PER_REGION); + mlxsw_reg_perpt_erp_vector_set(perpt_pl, erp->index, true); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(perpt), perpt_pl); +} + +static void mlxsw_sp_acl_erp_table_erp_del(struct mlxsw_sp_acl_erp *erp) +{ + char empty_mask[MLXSW_REG_PTCEX_FLEX_KEY_BLOCKS_LEN] = { 0 }; + struct mlxsw_sp_acl_erp_table *erp_table = erp->erp_table; + struct mlxsw_sp *mlxsw_sp = erp_table->erp_core->mlxsw_sp; + enum mlxsw_reg_perpt_key_size key_size; + char perpt_pl[MLXSW_REG_PERPT_LEN]; + u8 erpt_bank, erpt_index; + + mlxsw_sp_acl_erp_table_locate(erp_table, erp, &erpt_bank, &erpt_index); + key_size = (enum mlxsw_reg_perpt_key_size) erp_table->aregion->type; + mlxsw_reg_perpt_pack(perpt_pl, erpt_bank, erpt_index, key_size, erp->id, + 0, erp_table->base_index, erp->index, empty_mask); + mlxsw_reg_perpt_erp_vector_pack(perpt_pl, erp_table->erp_index_bitmap, + MLXSW_SP_ACL_ERP_MAX_PER_REGION); + mlxsw_reg_perpt_erp_vector_set(perpt_pl, erp->index, false); + mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(perpt), perpt_pl); +} + +static int +mlxsw_sp_acl_erp_table_enable(struct mlxsw_sp_acl_erp_table *erp_table, + bool ctcam_le) +{ + struct mlxsw_sp_acl_tcam_region *region = erp_table->aregion->region; + struct mlxsw_sp *mlxsw_sp = erp_table->erp_core->mlxsw_sp; + char pererp_pl[MLXSW_REG_PERERP_LEN]; + + mlxsw_reg_pererp_pack(pererp_pl, region->id, ctcam_le, true, 0, + erp_table->base_index, 0); + mlxsw_reg_pererp_erp_vector_pack(pererp_pl, erp_table->erp_index_bitmap, + MLXSW_SP_ACL_ERP_MAX_PER_REGION); + + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(pererp), pererp_pl); +} + +static void +mlxsw_sp_acl_erp_table_disable(struct mlxsw_sp_acl_erp_table *erp_table) +{ + struct mlxsw_sp_acl_tcam_region *region = erp_table->aregion->region; + struct mlxsw_sp *mlxsw_sp = erp_table->erp_core->mlxsw_sp; + char pererp_pl[MLXSW_REG_PERERP_LEN]; + struct mlxsw_sp_acl_erp *master_rp; + + master_rp = mlxsw_sp_acl_erp_table_master_rp(erp_table); + /* It is possible we do not have a master RP when we disable the + * table when there are no rules in the A-TCAM and the last C-TCAM + * rule is deleted + */ + mlxsw_reg_pererp_pack(pererp_pl, region->id, false, false, 0, 0, + master_rp ? master_rp->id : 0); + mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(pererp), pererp_pl); +} + +static int +mlxsw_sp_acl_erp_table_relocate(struct mlxsw_sp_acl_erp_table *erp_table) +{ + struct mlxsw_sp_acl_erp *erp; + int err; + + list_for_each_entry(erp, &erp_table->atcam_erps_list, list) { + err = mlxsw_sp_acl_erp_table_erp_add(erp_table, erp); + if (err) + goto err_table_erp_add; + } + + return 0; + +err_table_erp_add: + list_for_each_entry_continue_reverse(erp, &erp_table->atcam_erps_list, + list) + mlxsw_sp_acl_erp_table_erp_del(erp); + return err; +} + +static int +mlxsw_sp_acl_erp_table_expand(struct mlxsw_sp_acl_erp_table *erp_table) +{ + unsigned int num_erps, old_num_erps = erp_table->num_max_atcam_erps; + struct mlxsw_sp_acl_erp_core *erp_core = erp_table->erp_core; + unsigned long old_base_index = erp_table->base_index; + bool ctcam_le = erp_table->num_ctcam_erps > 0; + int err; + + if (erp_table->num_atcam_erps < erp_table->num_max_atcam_erps) + return 0; + + if (erp_table->num_max_atcam_erps == MLXSW_SP_ACL_ERP_MAX_PER_REGION) + return -ENOBUFS; + + num_erps = old_num_erps + erp_core->num_erp_banks; + err = mlxsw_sp_acl_erp_table_alloc(erp_core, num_erps, + erp_table->aregion->type, + &erp_table->base_index); + if (err) + return err; + erp_table->num_max_atcam_erps = num_erps; + + err = mlxsw_sp_acl_erp_table_relocate(erp_table); + if (err) + goto err_table_relocate; + + err = mlxsw_sp_acl_erp_table_enable(erp_table, ctcam_le); + if (err) + goto err_table_enable; + + mlxsw_sp_acl_erp_table_free(erp_core, old_num_erps, + erp_table->aregion->type, old_base_index); + + return 0; + +err_table_enable: +err_table_relocate: + erp_table->num_max_atcam_erps = old_num_erps; + mlxsw_sp_acl_erp_table_free(erp_core, num_erps, + erp_table->aregion->type, + erp_table->base_index); + erp_table->base_index = old_base_index; + return err; +} + +static int +mlxsw_acl_erp_table_bf_add(struct mlxsw_sp_acl_erp_table *erp_table, + struct mlxsw_sp_acl_erp *erp) +{ + struct mlxsw_sp_acl_atcam_region *aregion = erp_table->aregion; + unsigned int erp_bank = mlxsw_sp_acl_erp_bank_get(erp); + struct mlxsw_sp_acl_atcam_entry *aentry; + int err; + + list_for_each_entry(aentry, &aregion->entries_list, list) { + err = mlxsw_sp_acl_bf_entry_add(aregion->region->mlxsw_sp, + erp_table->erp_core->bf, + aregion, erp_bank, aentry); + if (err) + goto bf_entry_add_err; + } + + return 0; + +bf_entry_add_err: + list_for_each_entry_continue_reverse(aentry, &aregion->entries_list, + list) + mlxsw_sp_acl_bf_entry_del(aregion->region->mlxsw_sp, + erp_table->erp_core->bf, + aregion, erp_bank, aentry); + return err; +} + +static void +mlxsw_acl_erp_table_bf_del(struct mlxsw_sp_acl_erp_table *erp_table, + struct mlxsw_sp_acl_erp *erp) +{ + struct mlxsw_sp_acl_atcam_region *aregion = erp_table->aregion; + unsigned int erp_bank = mlxsw_sp_acl_erp_bank_get(erp); + struct mlxsw_sp_acl_atcam_entry *aentry; + + list_for_each_entry_reverse(aentry, &aregion->entries_list, list) + mlxsw_sp_acl_bf_entry_del(aregion->region->mlxsw_sp, + erp_table->erp_core->bf, + aregion, erp_bank, aentry); +} + +static int +mlxsw_sp_acl_erp_region_table_trans(struct mlxsw_sp_acl_erp_table *erp_table) +{ + struct mlxsw_sp_acl_erp_core *erp_core = erp_table->erp_core; + struct mlxsw_sp_acl_erp *master_rp; + int err; + + /* Initially, allocate a single eRP row. Expand later as needed */ + err = mlxsw_sp_acl_erp_table_alloc(erp_core, erp_core->num_erp_banks, + erp_table->aregion->type, + &erp_table->base_index); + if (err) + return err; + erp_table->num_max_atcam_erps = erp_core->num_erp_banks; + + /* Transition the sole RP currently configured (the master RP) + * to the eRP table + */ + master_rp = mlxsw_sp_acl_erp_table_master_rp(erp_table); + if (!master_rp) { + err = -EINVAL; + goto err_table_master_rp; + } + + /* Make sure the master RP is using a valid index, as + * only a single eRP row is currently allocated. + */ + master_rp->index = 0; + __set_bit(master_rp->index, erp_table->erp_index_bitmap); + + err = mlxsw_sp_acl_erp_table_erp_add(erp_table, master_rp); + if (err) + goto err_table_master_rp_add; + + /* Update Bloom filter before enabling eRP table, as rules + * on the master RP were not set to Bloom filter up to this + * point. + */ + err = mlxsw_acl_erp_table_bf_add(erp_table, master_rp); + if (err) + goto err_table_bf_add; + + err = mlxsw_sp_acl_erp_table_enable(erp_table, false); + if (err) + goto err_table_enable; + + return 0; + +err_table_enable: + mlxsw_acl_erp_table_bf_del(erp_table, master_rp); +err_table_bf_add: + mlxsw_sp_acl_erp_table_erp_del(master_rp); +err_table_master_rp_add: + __clear_bit(master_rp->index, erp_table->erp_index_bitmap); +err_table_master_rp: + mlxsw_sp_acl_erp_table_free(erp_core, erp_table->num_max_atcam_erps, + erp_table->aregion->type, + erp_table->base_index); + return err; +} + +static void +mlxsw_sp_acl_erp_region_master_mask_trans(struct mlxsw_sp_acl_erp_table *erp_table) +{ + struct mlxsw_sp_acl_erp_core *erp_core = erp_table->erp_core; + struct mlxsw_sp_acl_erp *master_rp; + + mlxsw_sp_acl_erp_table_disable(erp_table); + master_rp = mlxsw_sp_acl_erp_table_master_rp(erp_table); + if (!master_rp) + return; + mlxsw_acl_erp_table_bf_del(erp_table, master_rp); + mlxsw_sp_acl_erp_table_erp_del(master_rp); + __clear_bit(master_rp->index, erp_table->erp_index_bitmap); + mlxsw_sp_acl_erp_table_free(erp_core, erp_table->num_max_atcam_erps, + erp_table->aregion->type, + erp_table->base_index); +} + +static int +mlxsw_sp_acl_erp_region_erp_add(struct mlxsw_sp_acl_erp_table *erp_table, + struct mlxsw_sp_acl_erp *erp) +{ + struct mlxsw_sp_acl_tcam_region *region = erp_table->aregion->region; + struct mlxsw_sp *mlxsw_sp = erp_table->erp_core->mlxsw_sp; + bool ctcam_le = erp_table->num_ctcam_erps > 0; + char pererp_pl[MLXSW_REG_PERERP_LEN]; + + mlxsw_reg_pererp_pack(pererp_pl, region->id, ctcam_le, true, 0, + erp_table->base_index, 0); + mlxsw_reg_pererp_erp_vector_pack(pererp_pl, erp_table->erp_index_bitmap, + MLXSW_SP_ACL_ERP_MAX_PER_REGION); + mlxsw_reg_pererp_erpt_vector_set(pererp_pl, erp->index, true); + + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(pererp), pererp_pl); +} + +static void mlxsw_sp_acl_erp_region_erp_del(struct mlxsw_sp_acl_erp *erp) +{ + struct mlxsw_sp_acl_erp_table *erp_table = erp->erp_table; + struct mlxsw_sp_acl_tcam_region *region = erp_table->aregion->region; + struct mlxsw_sp *mlxsw_sp = erp_table->erp_core->mlxsw_sp; + bool ctcam_le = erp_table->num_ctcam_erps > 0; + char pererp_pl[MLXSW_REG_PERERP_LEN]; + + mlxsw_reg_pererp_pack(pererp_pl, region->id, ctcam_le, true, 0, + erp_table->base_index, 0); + mlxsw_reg_pererp_erp_vector_pack(pererp_pl, erp_table->erp_index_bitmap, + MLXSW_SP_ACL_ERP_MAX_PER_REGION); + mlxsw_reg_pererp_erpt_vector_set(pererp_pl, erp->index, false); + + mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(pererp), pererp_pl); +} + +static int +mlxsw_sp_acl_erp_region_ctcam_enable(struct mlxsw_sp_acl_erp_table *erp_table) +{ + /* No need to re-enable lookup in the C-TCAM */ + if (erp_table->num_ctcam_erps > 1) + return 0; + + return mlxsw_sp_acl_erp_table_enable(erp_table, true); +} + +static void +mlxsw_sp_acl_erp_region_ctcam_disable(struct mlxsw_sp_acl_erp_table *erp_table) +{ + /* Only disable C-TCAM lookup when last C-TCAM eRP is deleted */ + if (erp_table->num_ctcam_erps > 1) + return; + + mlxsw_sp_acl_erp_table_enable(erp_table, false); +} + +static int +__mlxsw_sp_acl_erp_table_other_inc(struct mlxsw_sp_acl_erp_table *erp_table, + unsigned int *inc_num) +{ + int err; + + /* If there are C-TCAM eRP or deltas in use we need to transition + * the region to use eRP table, if it is not already done + */ + if (!mlxsw_sp_acl_erp_table_is_used(erp_table)) { + err = mlxsw_sp_acl_erp_region_table_trans(erp_table); + if (err) + return err; + } + + /* When C-TCAM or deltas are used, the eRP table must be used */ + if (erp_table->ops != &erp_multiple_masks_ops) + erp_table->ops = &erp_multiple_masks_ops; + + (*inc_num)++; + + return 0; +} + +static int mlxsw_sp_acl_erp_ctcam_inc(struct mlxsw_sp_acl_erp_table *erp_table) +{ + return __mlxsw_sp_acl_erp_table_other_inc(erp_table, + &erp_table->num_ctcam_erps); +} + +static int mlxsw_sp_acl_erp_delta_inc(struct mlxsw_sp_acl_erp_table *erp_table) +{ + return __mlxsw_sp_acl_erp_table_other_inc(erp_table, + &erp_table->num_deltas); +} + +static void +__mlxsw_sp_acl_erp_table_other_dec(struct mlxsw_sp_acl_erp_table *erp_table, + unsigned int *dec_num) +{ + (*dec_num)--; + + /* If there are no C-TCAM eRP or deltas in use, the state we + * transition to depends on the number of A-TCAM eRPs currently + * in use. + */ + if (erp_table->num_ctcam_erps > 0 || erp_table->num_deltas > 0) + return; + + switch (erp_table->num_atcam_erps) { + case 2: + /* Keep using the eRP table, but correctly set the + * operations pointer so that when an A-TCAM eRP is + * deleted we will transition to use the master mask + */ + erp_table->ops = &erp_two_masks_ops; + break; + case 1: + /* We only kept the eRP table because we had C-TCAM + * eRPs in use. Now that the last C-TCAM eRP is gone we + * can stop using the table and transition to use the + * master mask + */ + mlxsw_sp_acl_erp_region_master_mask_trans(erp_table); + erp_table->ops = &erp_single_mask_ops; + break; + case 0: + /* There are no more eRPs of any kind used by the region + * so free its eRP table and transition to initial state + */ + mlxsw_sp_acl_erp_table_disable(erp_table); + mlxsw_sp_acl_erp_table_free(erp_table->erp_core, + erp_table->num_max_atcam_erps, + erp_table->aregion->type, + erp_table->base_index); + erp_table->ops = &erp_no_mask_ops; + break; + default: + break; + } +} + +static void mlxsw_sp_acl_erp_ctcam_dec(struct mlxsw_sp_acl_erp_table *erp_table) +{ + __mlxsw_sp_acl_erp_table_other_dec(erp_table, + &erp_table->num_ctcam_erps); +} + +static void mlxsw_sp_acl_erp_delta_dec(struct mlxsw_sp_acl_erp_table *erp_table) +{ + __mlxsw_sp_acl_erp_table_other_dec(erp_table, + &erp_table->num_deltas); +} + +static struct mlxsw_sp_acl_erp * +mlxsw_sp_acl_erp_ctcam_mask_create(struct mlxsw_sp_acl_erp_table *erp_table, + struct mlxsw_sp_acl_erp_key *key) +{ + struct mlxsw_sp_acl_erp *erp; + int err; + + erp = kzalloc(sizeof(*erp), GFP_KERNEL); + if (!erp) + return ERR_PTR(-ENOMEM); + + memcpy(&erp->key, key, sizeof(*key)); + bitmap_from_arr32(erp->mask_bitmap, (u32 *) key->mask, + MLXSW_SP_ACL_TCAM_MASK_LEN); + + err = mlxsw_sp_acl_erp_ctcam_inc(erp_table); + if (err) + goto err_erp_ctcam_inc; + + erp->erp_table = erp_table; + + err = mlxsw_sp_acl_erp_master_mask_set(erp_table, &erp->key); + if (err) + goto err_master_mask_set; + + err = mlxsw_sp_acl_erp_region_ctcam_enable(erp_table); + if (err) + goto err_erp_region_ctcam_enable; + + return erp; + +err_erp_region_ctcam_enable: + mlxsw_sp_acl_erp_master_mask_clear(erp_table, &erp->key); +err_master_mask_set: + mlxsw_sp_acl_erp_ctcam_dec(erp_table); +err_erp_ctcam_inc: + kfree(erp); + return ERR_PTR(err); +} + +static void +mlxsw_sp_acl_erp_ctcam_mask_destroy(struct mlxsw_sp_acl_erp *erp) +{ + struct mlxsw_sp_acl_erp_table *erp_table = erp->erp_table; + + mlxsw_sp_acl_erp_region_ctcam_disable(erp_table); + mlxsw_sp_acl_erp_master_mask_clear(erp_table, &erp->key); + mlxsw_sp_acl_erp_ctcam_dec(erp_table); + kfree(erp); +} + +static struct mlxsw_sp_acl_erp * +mlxsw_sp_acl_erp_mask_create(struct mlxsw_sp_acl_erp_table *erp_table, + struct mlxsw_sp_acl_erp_key *key) +{ + struct mlxsw_sp_acl_erp *erp; + int err; + + if (key->ctcam) + return mlxsw_sp_acl_erp_ctcam_mask_create(erp_table, key); + + /* Expand the eRP table for the new eRP, if needed */ + err = mlxsw_sp_acl_erp_table_expand(erp_table); + if (err) + return ERR_PTR(err); + + erp = mlxsw_sp_acl_erp_generic_create(erp_table, key); + if (IS_ERR(erp)) + return erp; + + err = mlxsw_sp_acl_erp_index_get(erp_table, &erp->index); + if (err) + goto err_erp_index_get; + + err = mlxsw_sp_acl_erp_table_erp_add(erp_table, erp); + if (err) + goto err_table_erp_add; + + err = mlxsw_sp_acl_erp_region_erp_add(erp_table, erp); + if (err) + goto err_region_erp_add; + + erp_table->ops = &erp_multiple_masks_ops; + + return erp; + +err_region_erp_add: + mlxsw_sp_acl_erp_table_erp_del(erp); +err_table_erp_add: + mlxsw_sp_acl_erp_index_put(erp_table, erp->index); +err_erp_index_get: + mlxsw_sp_acl_erp_generic_destroy(erp); + return ERR_PTR(err); +} + +static void +mlxsw_sp_acl_erp_mask_destroy(struct mlxsw_sp_acl_erp_table *erp_table, + struct mlxsw_sp_acl_erp *erp) +{ + if (erp->key.ctcam) + return mlxsw_sp_acl_erp_ctcam_mask_destroy(erp); + + mlxsw_sp_acl_erp_region_erp_del(erp); + mlxsw_sp_acl_erp_table_erp_del(erp); + mlxsw_sp_acl_erp_index_put(erp_table, erp->index); + mlxsw_sp_acl_erp_generic_destroy(erp); + + if (erp_table->num_atcam_erps == 2 && erp_table->num_ctcam_erps == 0 && + erp_table->num_deltas == 0) + erp_table->ops = &erp_two_masks_ops; +} + +static struct mlxsw_sp_acl_erp * +mlxsw_sp_acl_erp_second_mask_create(struct mlxsw_sp_acl_erp_table *erp_table, + struct mlxsw_sp_acl_erp_key *key) +{ + struct mlxsw_sp_acl_erp *erp; + int err; + + if (key->ctcam) + return mlxsw_sp_acl_erp_ctcam_mask_create(erp_table, key); + + /* Transition to use eRP table instead of master mask */ + err = mlxsw_sp_acl_erp_region_table_trans(erp_table); + if (err) + return ERR_PTR(err); + + erp = mlxsw_sp_acl_erp_generic_create(erp_table, key); + if (IS_ERR(erp)) { + err = PTR_ERR(erp); + goto err_erp_create; + } + + err = mlxsw_sp_acl_erp_index_get(erp_table, &erp->index); + if (err) + goto err_erp_index_get; + + err = mlxsw_sp_acl_erp_table_erp_add(erp_table, erp); + if (err) + goto err_table_erp_add; + + err = mlxsw_sp_acl_erp_region_erp_add(erp_table, erp); + if (err) + goto err_region_erp_add; + + erp_table->ops = &erp_two_masks_ops; + + return erp; + +err_region_erp_add: + mlxsw_sp_acl_erp_table_erp_del(erp); +err_table_erp_add: + mlxsw_sp_acl_erp_index_put(erp_table, erp->index); +err_erp_index_get: + mlxsw_sp_acl_erp_generic_destroy(erp); +err_erp_create: + mlxsw_sp_acl_erp_region_master_mask_trans(erp_table); + return ERR_PTR(err); +} + +static void +mlxsw_sp_acl_erp_second_mask_destroy(struct mlxsw_sp_acl_erp_table *erp_table, + struct mlxsw_sp_acl_erp *erp) +{ + if (erp->key.ctcam) + return mlxsw_sp_acl_erp_ctcam_mask_destroy(erp); + + mlxsw_sp_acl_erp_region_erp_del(erp); + mlxsw_sp_acl_erp_table_erp_del(erp); + mlxsw_sp_acl_erp_index_put(erp_table, erp->index); + mlxsw_sp_acl_erp_generic_destroy(erp); + /* Transition to use master mask instead of eRP table */ + mlxsw_sp_acl_erp_region_master_mask_trans(erp_table); + + erp_table->ops = &erp_single_mask_ops; +} + +static struct mlxsw_sp_acl_erp * +mlxsw_sp_acl_erp_first_mask_create(struct mlxsw_sp_acl_erp_table *erp_table, + struct mlxsw_sp_acl_erp_key *key) +{ + struct mlxsw_sp_acl_erp *erp; + + if (key->ctcam) + return ERR_PTR(-EINVAL); + + erp = mlxsw_sp_acl_erp_generic_create(erp_table, key); + if (IS_ERR(erp)) + return erp; + + erp_table->ops = &erp_single_mask_ops; + + return erp; +} + +static void +mlxsw_sp_acl_erp_first_mask_destroy(struct mlxsw_sp_acl_erp_table *erp_table, + struct mlxsw_sp_acl_erp *erp) +{ + mlxsw_sp_acl_erp_generic_destroy(erp); + erp_table->ops = &erp_no_mask_ops; +} + +static void +mlxsw_sp_acl_erp_no_mask_destroy(struct mlxsw_sp_acl_erp_table *erp_table, + struct mlxsw_sp_acl_erp *erp) +{ + WARN_ON(1); +} + +struct mlxsw_sp_acl_erp_mask * +mlxsw_sp_acl_erp_mask_get(struct mlxsw_sp_acl_atcam_region *aregion, + const char *mask, bool ctcam) +{ + struct mlxsw_sp_acl_erp_table *erp_table = aregion->erp_table; + struct mlxsw_sp_acl_erp_key key; + struct objagg_obj *objagg_obj; + + memcpy(key.mask, mask, MLXSW_REG_PTCEX_FLEX_KEY_BLOCKS_LEN); + key.ctcam = ctcam; + mutex_lock(&erp_table->objagg_lock); + objagg_obj = objagg_obj_get(erp_table->objagg, &key); + mutex_unlock(&erp_table->objagg_lock); + if (IS_ERR(objagg_obj)) + return ERR_CAST(objagg_obj); + return (struct mlxsw_sp_acl_erp_mask *) objagg_obj; +} + +void mlxsw_sp_acl_erp_mask_put(struct mlxsw_sp_acl_atcam_region *aregion, + struct mlxsw_sp_acl_erp_mask *erp_mask) +{ + struct objagg_obj *objagg_obj = (struct objagg_obj *) erp_mask; + struct mlxsw_sp_acl_erp_table *erp_table = aregion->erp_table; + + mutex_lock(&erp_table->objagg_lock); + objagg_obj_put(erp_table->objagg, objagg_obj); + mutex_unlock(&erp_table->objagg_lock); +} + +int mlxsw_sp_acl_erp_bf_insert(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_atcam_region *aregion, + struct mlxsw_sp_acl_erp_mask *erp_mask, + struct mlxsw_sp_acl_atcam_entry *aentry) +{ + struct objagg_obj *objagg_obj = (struct objagg_obj *) erp_mask; + const struct mlxsw_sp_acl_erp *erp = objagg_obj_root_priv(objagg_obj); + unsigned int erp_bank; + + if (!mlxsw_sp_acl_erp_table_is_used(erp->erp_table)) + return 0; + + erp_bank = mlxsw_sp_acl_erp_bank_get(erp); + return mlxsw_sp_acl_bf_entry_add(mlxsw_sp, + erp->erp_table->erp_core->bf, + aregion, erp_bank, aentry); +} + +void mlxsw_sp_acl_erp_bf_remove(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_atcam_region *aregion, + struct mlxsw_sp_acl_erp_mask *erp_mask, + struct mlxsw_sp_acl_atcam_entry *aentry) +{ + struct objagg_obj *objagg_obj = (struct objagg_obj *) erp_mask; + const struct mlxsw_sp_acl_erp *erp = objagg_obj_root_priv(objagg_obj); + unsigned int erp_bank; + + if (!mlxsw_sp_acl_erp_table_is_used(erp->erp_table)) + return; + + erp_bank = mlxsw_sp_acl_erp_bank_get(erp); + mlxsw_sp_acl_bf_entry_del(mlxsw_sp, + erp->erp_table->erp_core->bf, + aregion, erp_bank, aentry); +} + +bool +mlxsw_sp_acl_erp_mask_is_ctcam(const struct mlxsw_sp_acl_erp_mask *erp_mask) +{ + struct objagg_obj *objagg_obj = (struct objagg_obj *) erp_mask; + const struct mlxsw_sp_acl_erp_key *key = objagg_obj_raw(objagg_obj); + + return key->ctcam; +} + +u8 mlxsw_sp_acl_erp_mask_erp_id(const struct mlxsw_sp_acl_erp_mask *erp_mask) +{ + struct objagg_obj *objagg_obj = (struct objagg_obj *) erp_mask; + const struct mlxsw_sp_acl_erp *erp = objagg_obj_root_priv(objagg_obj); + + return erp->id; +} + +struct mlxsw_sp_acl_erp_delta { + struct mlxsw_sp_acl_erp_key key; + u16 start; + u8 mask; +}; + +u16 mlxsw_sp_acl_erp_delta_start(const struct mlxsw_sp_acl_erp_delta *delta) +{ + return delta->start; +} + +u8 mlxsw_sp_acl_erp_delta_mask(const struct mlxsw_sp_acl_erp_delta *delta) +{ + return delta->mask; +} + +u8 mlxsw_sp_acl_erp_delta_value(const struct mlxsw_sp_acl_erp_delta *delta, + const char *enc_key) +{ + u16 start = delta->start; + u8 mask = delta->mask; + u16 tmp; + + if (!mask) + return 0; + + tmp = (unsigned char) enc_key[__MASK_IDX(start / 8)]; + if (start / 8 + 1 < __MASK_LEN) + tmp |= (unsigned char) enc_key[__MASK_IDX(start / 8 + 1)] << 8; + tmp >>= start % 8; + tmp &= mask; + return tmp; +} + +void mlxsw_sp_acl_erp_delta_clear(const struct mlxsw_sp_acl_erp_delta *delta, + const char *enc_key) +{ + u16 start = delta->start; + u8 mask = delta->mask; + unsigned char *byte; + u16 tmp; + + tmp = mask; + tmp <<= start % 8; + tmp = ~tmp; + + byte = (unsigned char *) &enc_key[__MASK_IDX(start / 8)]; + *byte &= tmp & 0xff; + if (start / 8 + 1 < __MASK_LEN) { + byte = (unsigned char *) &enc_key[__MASK_IDX(start / 8 + 1)]; + *byte &= (tmp >> 8) & 0xff; + } +} + +static const struct mlxsw_sp_acl_erp_delta +mlxsw_sp_acl_erp_delta_default = {}; + +const struct mlxsw_sp_acl_erp_delta * +mlxsw_sp_acl_erp_delta(const struct mlxsw_sp_acl_erp_mask *erp_mask) +{ + struct objagg_obj *objagg_obj = (struct objagg_obj *) erp_mask; + const struct mlxsw_sp_acl_erp_delta *delta; + + delta = objagg_obj_delta_priv(objagg_obj); + if (!delta) + delta = &mlxsw_sp_acl_erp_delta_default; + return delta; +} + +static int +mlxsw_sp_acl_erp_delta_fill(const struct mlxsw_sp_acl_erp_key *parent_key, + const struct mlxsw_sp_acl_erp_key *key, + u16 *delta_start, u8 *delta_mask) +{ + int offset = 0; + int si = -1; + u16 pmask; + u16 mask; + int i; + + /* The difference between 2 masks can be up to 8 consecutive bits. */ + for (i = 0; i < __MASK_LEN; i++) { + if (parent_key->mask[__MASK_IDX(i)] == key->mask[__MASK_IDX(i)]) + continue; + if (si == -1) + si = i; + else if (si != i - 1) + return -EINVAL; + } + if (si == -1) { + /* The masks are the same, this can happen in case eRPs with + * the same mask were created in both A-TCAM and C-TCAM. + * The only possible condition under which this can happen + * is identical rule insertion. Delta is not possible here. + */ + return -EINVAL; + } + pmask = (unsigned char) parent_key->mask[__MASK_IDX(si)]; + mask = (unsigned char) key->mask[__MASK_IDX(si)]; + if (si + 1 < __MASK_LEN) { + pmask |= (unsigned char) parent_key->mask[__MASK_IDX(si + 1)] << 8; + mask |= (unsigned char) key->mask[__MASK_IDX(si + 1)] << 8; + } + + if ((pmask ^ mask) & pmask) + return -EINVAL; + mask &= ~pmask; + while (!(mask & (1 << offset))) + offset++; + while (!(mask & 1)) + mask >>= 1; + if (mask & 0xff00) + return -EINVAL; + + *delta_start = si * 8 + offset; + *delta_mask = mask; + + return 0; +} + +static bool mlxsw_sp_acl_erp_delta_check(void *priv, const void *parent_obj, + const void *obj) +{ + const struct mlxsw_sp_acl_erp_key *parent_key = parent_obj; + const struct mlxsw_sp_acl_erp_key *key = obj; + u16 delta_start; + u8 delta_mask; + int err; + + err = mlxsw_sp_acl_erp_delta_fill(parent_key, key, + &delta_start, &delta_mask); + return err ? false : true; +} + +static int mlxsw_sp_acl_erp_hints_obj_cmp(const void *obj1, const void *obj2) +{ + const struct mlxsw_sp_acl_erp_key *key1 = obj1; + const struct mlxsw_sp_acl_erp_key *key2 = obj2; + + /* For hints purposes, two objects are considered equal + * in case the masks are the same. Does not matter what + * the "ctcam" value is. + */ + return memcmp(key1->mask, key2->mask, sizeof(key1->mask)); +} + +static void *mlxsw_sp_acl_erp_delta_create(void *priv, void *parent_obj, + void *obj) +{ + struct mlxsw_sp_acl_erp_key *parent_key = parent_obj; + struct mlxsw_sp_acl_atcam_region *aregion = priv; + struct mlxsw_sp_acl_erp_table *erp_table = aregion->erp_table; + struct mlxsw_sp_acl_erp_key *key = obj; + struct mlxsw_sp_acl_erp_delta *delta; + u16 delta_start; + u8 delta_mask; + int err; + + if (parent_key->ctcam || key->ctcam) + return ERR_PTR(-EINVAL); + err = mlxsw_sp_acl_erp_delta_fill(parent_key, key, + &delta_start, &delta_mask); + if (err) + return ERR_PTR(-EINVAL); + + delta = kzalloc(sizeof(*delta), GFP_KERNEL); + if (!delta) + return ERR_PTR(-ENOMEM); + delta->start = delta_start; + delta->mask = delta_mask; + + err = mlxsw_sp_acl_erp_delta_inc(erp_table); + if (err) + goto err_erp_delta_inc; + + memcpy(&delta->key, key, sizeof(*key)); + err = mlxsw_sp_acl_erp_master_mask_set(erp_table, &delta->key); + if (err) + goto err_master_mask_set; + + return delta; + +err_master_mask_set: + mlxsw_sp_acl_erp_delta_dec(erp_table); +err_erp_delta_inc: + kfree(delta); + return ERR_PTR(err); +} + +static void mlxsw_sp_acl_erp_delta_destroy(void *priv, void *delta_priv) +{ + struct mlxsw_sp_acl_erp_delta *delta = delta_priv; + struct mlxsw_sp_acl_atcam_region *aregion = priv; + struct mlxsw_sp_acl_erp_table *erp_table = aregion->erp_table; + + mlxsw_sp_acl_erp_master_mask_clear(erp_table, &delta->key); + mlxsw_sp_acl_erp_delta_dec(erp_table); + kfree(delta); +} + +static void *mlxsw_sp_acl_erp_root_create(void *priv, void *obj, + unsigned int root_id) +{ + struct mlxsw_sp_acl_atcam_region *aregion = priv; + struct mlxsw_sp_acl_erp_table *erp_table = aregion->erp_table; + struct mlxsw_sp_acl_erp_key *key = obj; + + if (!key->ctcam && + root_id != OBJAGG_OBJ_ROOT_ID_INVALID && + root_id >= MLXSW_SP_ACL_ERP_MAX_PER_REGION) + return ERR_PTR(-ENOBUFS); + return erp_table->ops->erp_create(erp_table, key); +} + +static void mlxsw_sp_acl_erp_root_destroy(void *priv, void *root_priv) +{ + struct mlxsw_sp_acl_atcam_region *aregion = priv; + struct mlxsw_sp_acl_erp_table *erp_table = aregion->erp_table; + + erp_table->ops->erp_destroy(erp_table, root_priv); +} + +static const struct objagg_ops mlxsw_sp_acl_erp_objagg_ops = { + .obj_size = sizeof(struct mlxsw_sp_acl_erp_key), + .delta_check = mlxsw_sp_acl_erp_delta_check, + .hints_obj_cmp = mlxsw_sp_acl_erp_hints_obj_cmp, + .delta_create = mlxsw_sp_acl_erp_delta_create, + .delta_destroy = mlxsw_sp_acl_erp_delta_destroy, + .root_create = mlxsw_sp_acl_erp_root_create, + .root_destroy = mlxsw_sp_acl_erp_root_destroy, +}; + +static struct mlxsw_sp_acl_erp_table * +mlxsw_sp_acl_erp_table_create(struct mlxsw_sp_acl_atcam_region *aregion, + struct objagg_hints *hints) +{ + struct mlxsw_sp_acl_erp_table *erp_table; + int err; + + erp_table = kzalloc(sizeof(*erp_table), GFP_KERNEL); + if (!erp_table) + return ERR_PTR(-ENOMEM); + + erp_table->objagg = objagg_create(&mlxsw_sp_acl_erp_objagg_ops, + hints, aregion); + if (IS_ERR(erp_table->objagg)) { + err = PTR_ERR(erp_table->objagg); + goto err_objagg_create; + } + + erp_table->erp_core = aregion->atcam->erp_core; + erp_table->ops = &erp_no_mask_ops; + INIT_LIST_HEAD(&erp_table->atcam_erps_list); + erp_table->aregion = aregion; + mutex_init(&erp_table->objagg_lock); + + return erp_table; + +err_objagg_create: + kfree(erp_table); + return ERR_PTR(err); +} + +static void +mlxsw_sp_acl_erp_table_destroy(struct mlxsw_sp_acl_erp_table *erp_table) +{ + WARN_ON(!list_empty(&erp_table->atcam_erps_list)); + mutex_destroy(&erp_table->objagg_lock); + objagg_destroy(erp_table->objagg); + kfree(erp_table); +} + +static int +mlxsw_sp_acl_erp_master_mask_init(struct mlxsw_sp_acl_atcam_region *aregion) +{ + struct mlxsw_sp *mlxsw_sp = aregion->region->mlxsw_sp; + char percr_pl[MLXSW_REG_PERCR_LEN]; + + mlxsw_reg_percr_pack(percr_pl, aregion->region->id); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(percr), percr_pl); +} + +static int +mlxsw_sp_acl_erp_region_param_init(struct mlxsw_sp_acl_atcam_region *aregion) +{ + struct mlxsw_sp *mlxsw_sp = aregion->region->mlxsw_sp; + char pererp_pl[MLXSW_REG_PERERP_LEN]; + + mlxsw_reg_pererp_pack(pererp_pl, aregion->region->id, false, false, 0, + 0, 0); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(pererp), pererp_pl); +} + +static int +mlxsw_sp_acl_erp_hints_check(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_atcam_region *aregion, + struct objagg_hints *hints, bool *p_rehash_needed) +{ + struct mlxsw_sp_acl_erp_table *erp_table = aregion->erp_table; + const struct objagg_stats *ostats; + const struct objagg_stats *hstats; + int err; + + *p_rehash_needed = false; + + mutex_lock(&erp_table->objagg_lock); + ostats = objagg_stats_get(erp_table->objagg); + mutex_unlock(&erp_table->objagg_lock); + if (IS_ERR(ostats)) { + dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Failed to get ERP stats\n"); + return PTR_ERR(ostats); + } + + hstats = objagg_hints_stats_get(hints); + if (IS_ERR(hstats)) { + dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Failed to get ERP hints stats\n"); + err = PTR_ERR(hstats); + goto err_hints_stats_get; + } + + /* Very basic criterion for now. */ + if (hstats->root_count < ostats->root_count) + *p_rehash_needed = true; + + err = 0; + + objagg_stats_put(hstats); +err_hints_stats_get: + objagg_stats_put(ostats); + return err; +} + +void * +mlxsw_sp_acl_erp_rehash_hints_get(struct mlxsw_sp_acl_atcam_region *aregion) +{ + struct mlxsw_sp_acl_erp_table *erp_table = aregion->erp_table; + struct mlxsw_sp *mlxsw_sp = aregion->region->mlxsw_sp; + struct objagg_hints *hints; + bool rehash_needed; + int err; + + mutex_lock(&erp_table->objagg_lock); + hints = objagg_hints_get(erp_table->objagg, + OBJAGG_OPT_ALGO_SIMPLE_GREEDY); + mutex_unlock(&erp_table->objagg_lock); + if (IS_ERR(hints)) { + dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Failed to create ERP hints\n"); + return ERR_CAST(hints); + } + err = mlxsw_sp_acl_erp_hints_check(mlxsw_sp, aregion, hints, + &rehash_needed); + if (err) + goto errout; + + if (!rehash_needed) { + err = -EAGAIN; + goto errout; + } + return hints; + +errout: + objagg_hints_put(hints); + return ERR_PTR(err); +} + +void mlxsw_sp_acl_erp_rehash_hints_put(void *hints_priv) +{ + struct objagg_hints *hints = hints_priv; + + objagg_hints_put(hints); +} + +int mlxsw_sp_acl_erp_region_init(struct mlxsw_sp_acl_atcam_region *aregion, + void *hints_priv) +{ + struct mlxsw_sp_acl_erp_table *erp_table; + struct objagg_hints *hints = hints_priv; + int err; + + erp_table = mlxsw_sp_acl_erp_table_create(aregion, hints); + if (IS_ERR(erp_table)) + return PTR_ERR(erp_table); + aregion->erp_table = erp_table; + + /* Initialize the region's master mask to all zeroes */ + err = mlxsw_sp_acl_erp_master_mask_init(aregion); + if (err) + goto err_erp_master_mask_init; + + /* Initialize the region to not use the eRP table */ + err = mlxsw_sp_acl_erp_region_param_init(aregion); + if (err) + goto err_erp_region_param_init; + + return 0; + +err_erp_region_param_init: +err_erp_master_mask_init: + mlxsw_sp_acl_erp_table_destroy(erp_table); + return err; +} + +void mlxsw_sp_acl_erp_region_fini(struct mlxsw_sp_acl_atcam_region *aregion) +{ + mlxsw_sp_acl_erp_table_destroy(aregion->erp_table); +} + +static int +mlxsw_sp_acl_erp_tables_sizes_query(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_erp_core *erp_core) +{ + unsigned int size; + + if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, ACL_ERPT_ENTRIES_2KB) || + !MLXSW_CORE_RES_VALID(mlxsw_sp->core, ACL_ERPT_ENTRIES_4KB) || + !MLXSW_CORE_RES_VALID(mlxsw_sp->core, ACL_ERPT_ENTRIES_8KB) || + !MLXSW_CORE_RES_VALID(mlxsw_sp->core, ACL_ERPT_ENTRIES_12KB)) + return -EIO; + + size = MLXSW_CORE_RES_GET(mlxsw_sp->core, ACL_ERPT_ENTRIES_2KB); + erp_core->erpt_entries_size[MLXSW_SP_ACL_ATCAM_REGION_TYPE_2KB] = size; + + size = MLXSW_CORE_RES_GET(mlxsw_sp->core, ACL_ERPT_ENTRIES_4KB); + erp_core->erpt_entries_size[MLXSW_SP_ACL_ATCAM_REGION_TYPE_4KB] = size; + + size = MLXSW_CORE_RES_GET(mlxsw_sp->core, ACL_ERPT_ENTRIES_8KB); + erp_core->erpt_entries_size[MLXSW_SP_ACL_ATCAM_REGION_TYPE_8KB] = size; + + size = MLXSW_CORE_RES_GET(mlxsw_sp->core, ACL_ERPT_ENTRIES_12KB); + erp_core->erpt_entries_size[MLXSW_SP_ACL_ATCAM_REGION_TYPE_12KB] = size; + + return 0; +} + +static int mlxsw_sp_acl_erp_tables_init(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_erp_core *erp_core) +{ + unsigned int erpt_bank_size; + int err; + + if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, ACL_MAX_ERPT_BANK_SIZE) || + !MLXSW_CORE_RES_VALID(mlxsw_sp->core, ACL_MAX_ERPT_BANKS)) + return -EIO; + erpt_bank_size = MLXSW_CORE_RES_GET(mlxsw_sp->core, + ACL_MAX_ERPT_BANK_SIZE); + erp_core->num_erp_banks = MLXSW_CORE_RES_GET(mlxsw_sp->core, + ACL_MAX_ERPT_BANKS); + + erp_core->erp_tables = gen_pool_create(0, -1); + if (!erp_core->erp_tables) + return -ENOMEM; + gen_pool_set_algo(erp_core->erp_tables, gen_pool_best_fit, NULL); + + err = gen_pool_add(erp_core->erp_tables, + MLXSW_SP_ACL_ERP_GENALLOC_OFFSET, erpt_bank_size, + -1); + if (err) + goto err_gen_pool_add; + + erp_core->bf = mlxsw_sp_acl_bf_init(mlxsw_sp, erp_core->num_erp_banks); + if (IS_ERR(erp_core->bf)) { + err = PTR_ERR(erp_core->bf); + goto err_bf_init; + } + + /* Different regions require masks of different sizes */ + err = mlxsw_sp_acl_erp_tables_sizes_query(mlxsw_sp, erp_core); + if (err) + goto err_erp_tables_sizes_query; + + return 0; + +err_erp_tables_sizes_query: + mlxsw_sp_acl_bf_fini(erp_core->bf); +err_bf_init: +err_gen_pool_add: + gen_pool_destroy(erp_core->erp_tables); + return err; +} + +static void mlxsw_sp_acl_erp_tables_fini(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_erp_core *erp_core) +{ + mlxsw_sp_acl_bf_fini(erp_core->bf); + gen_pool_destroy(erp_core->erp_tables); +} + +int mlxsw_sp_acl_erps_init(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_atcam *atcam) +{ + struct mlxsw_sp_acl_erp_core *erp_core; + int err; + + erp_core = kzalloc(sizeof(*erp_core), GFP_KERNEL); + if (!erp_core) + return -ENOMEM; + erp_core->mlxsw_sp = mlxsw_sp; + atcam->erp_core = erp_core; + + err = mlxsw_sp_acl_erp_tables_init(mlxsw_sp, erp_core); + if (err) + goto err_erp_tables_init; + + return 0; + +err_erp_tables_init: + kfree(erp_core); + return err; +} + +void mlxsw_sp_acl_erps_fini(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_atcam *atcam) +{ + mlxsw_sp_acl_erp_tables_fini(mlxsw_sp, atcam->erp_core); + kfree(atcam->erp_core); +} diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.c new file mode 100644 index 000000000..50806594d --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.c @@ -0,0 +1,318 @@ +// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 +/* Copyright (c) 2017-2018 Mellanox Technologies. All rights reserved */ + +#include "spectrum_acl_flex_actions.h" +#include "core_acl_flex_actions.h" +#include "spectrum_span.h" + +static int mlxsw_sp_act_kvdl_set_add(void *priv, u32 *p_kvdl_index, + char *enc_actions, bool is_first, bool ca) +{ + struct mlxsw_sp *mlxsw_sp = priv; + char pefa_pl[MLXSW_REG_PEFA_LEN]; + u32 kvdl_index; + int err; + + /* The first action set of a TCAM entry is stored directly in TCAM, + * not KVD linear area. + */ + if (is_first) + return 0; + + err = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ACTSET, + 1, &kvdl_index); + if (err) + return err; + mlxsw_reg_pefa_pack(pefa_pl, kvdl_index, ca, enc_actions); + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(pefa), pefa_pl); + if (err) + goto err_pefa_write; + *p_kvdl_index = kvdl_index; + return 0; + +err_pefa_write: + mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ACTSET, + 1, kvdl_index); + return err; +} + +static int mlxsw_sp1_act_kvdl_set_add(void *priv, u32 *p_kvdl_index, + char *enc_actions, bool is_first) +{ + return mlxsw_sp_act_kvdl_set_add(priv, p_kvdl_index, enc_actions, + is_first, false); +} + +static int mlxsw_sp2_act_kvdl_set_add(void *priv, u32 *p_kvdl_index, + char *enc_actions, bool is_first) +{ + return mlxsw_sp_act_kvdl_set_add(priv, p_kvdl_index, enc_actions, + is_first, true); +} + +static void mlxsw_sp_act_kvdl_set_del(void *priv, u32 kvdl_index, + bool is_first) +{ + struct mlxsw_sp *mlxsw_sp = priv; + + if (is_first) + return; + mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ACTSET, + 1, kvdl_index); +} + +static int mlxsw_sp1_act_kvdl_set_activity_get(void *priv, u32 kvdl_index, + bool *activity) +{ + return -EOPNOTSUPP; +} + +static int mlxsw_sp2_act_kvdl_set_activity_get(void *priv, u32 kvdl_index, + bool *activity) +{ + struct mlxsw_sp *mlxsw_sp = priv; + char pefa_pl[MLXSW_REG_PEFA_LEN]; + int err; + + mlxsw_reg_pefa_pack(pefa_pl, kvdl_index, true, NULL); + err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(pefa), pefa_pl); + if (err) + return err; + mlxsw_reg_pefa_unpack(pefa_pl, activity); + return 0; +} + +static int mlxsw_sp_act_kvdl_fwd_entry_add(void *priv, u32 *p_kvdl_index, + u16 local_port) +{ + struct mlxsw_sp *mlxsw_sp = priv; + char ppbs_pl[MLXSW_REG_PPBS_LEN]; + u32 kvdl_index; + int err; + + err = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_PBS, + 1, &kvdl_index); + if (err) + return err; + mlxsw_reg_ppbs_pack(ppbs_pl, kvdl_index, local_port); + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ppbs), ppbs_pl); + if (err) + goto err_ppbs_write; + *p_kvdl_index = kvdl_index; + return 0; + +err_ppbs_write: + mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_PBS, + 1, kvdl_index); + return err; +} + +static void mlxsw_sp_act_kvdl_fwd_entry_del(void *priv, u32 kvdl_index) +{ + struct mlxsw_sp *mlxsw_sp = priv; + + mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_PBS, + 1, kvdl_index); +} + +static int +mlxsw_sp_act_counter_index_get(void *priv, unsigned int *p_counter_index) +{ + struct mlxsw_sp *mlxsw_sp = priv; + + return mlxsw_sp_flow_counter_alloc(mlxsw_sp, p_counter_index); +} + +static void +mlxsw_sp_act_counter_index_put(void *priv, unsigned int counter_index) +{ + struct mlxsw_sp *mlxsw_sp = priv; + + mlxsw_sp_flow_counter_free(mlxsw_sp, counter_index); +} + +static int +mlxsw_sp_act_mirror_add(void *priv, u16 local_in_port, + const struct net_device *out_dev, + bool ingress, int *p_span_id) +{ + struct mlxsw_sp_span_agent_parms agent_parms = {}; + struct mlxsw_sp_port *mlxsw_sp_port; + struct mlxsw_sp *mlxsw_sp = priv; + int err; + + agent_parms.to_dev = out_dev; + err = mlxsw_sp_span_agent_get(mlxsw_sp, p_span_id, &agent_parms); + if (err) + return err; + + mlxsw_sp_port = mlxsw_sp->ports[local_in_port]; + err = mlxsw_sp_span_analyzed_port_get(mlxsw_sp_port, ingress); + if (err) + goto err_analyzed_port_get; + + return 0; + +err_analyzed_port_get: + mlxsw_sp_span_agent_put(mlxsw_sp, *p_span_id); + return err; +} + +static void +mlxsw_sp_act_mirror_del(void *priv, u16 local_in_port, int span_id, bool ingress) +{ + struct mlxsw_sp_port *mlxsw_sp_port; + struct mlxsw_sp *mlxsw_sp = priv; + + mlxsw_sp_port = mlxsw_sp->ports[local_in_port]; + mlxsw_sp_span_analyzed_port_put(mlxsw_sp_port, ingress); + mlxsw_sp_span_agent_put(mlxsw_sp, span_id); +} + +static int mlxsw_sp_act_policer_add(void *priv, u64 rate_bytes_ps, u32 burst, + u16 *p_policer_index, + struct netlink_ext_ack *extack) +{ + struct mlxsw_sp_policer_params params; + struct mlxsw_sp *mlxsw_sp = priv; + + params.rate = rate_bytes_ps; + params.burst = burst; + params.bytes = true; + return mlxsw_sp_policer_add(mlxsw_sp, + MLXSW_SP_POLICER_TYPE_SINGLE_RATE, + ¶ms, extack, p_policer_index); +} + +static void mlxsw_sp_act_policer_del(void *priv, u16 policer_index) +{ + struct mlxsw_sp *mlxsw_sp = priv; + + mlxsw_sp_policer_del(mlxsw_sp, MLXSW_SP_POLICER_TYPE_SINGLE_RATE, + policer_index); +} + +static int mlxsw_sp1_act_sampler_add(void *priv, u16 local_port, + struct psample_group *psample_group, + u32 rate, u32 trunc_size, bool truncate, + bool ingress, int *p_span_id, + struct netlink_ext_ack *extack) +{ + NL_SET_ERR_MSG_MOD(extack, "Sampling action is not supported on Spectrum-1"); + return -EOPNOTSUPP; +} + +static void mlxsw_sp1_act_sampler_del(void *priv, u16 local_port, int span_id, + bool ingress) +{ + WARN_ON_ONCE(1); +} + +const struct mlxsw_afa_ops mlxsw_sp1_act_afa_ops = { + .kvdl_set_add = mlxsw_sp1_act_kvdl_set_add, + .kvdl_set_del = mlxsw_sp_act_kvdl_set_del, + .kvdl_set_activity_get = mlxsw_sp1_act_kvdl_set_activity_get, + .kvdl_fwd_entry_add = mlxsw_sp_act_kvdl_fwd_entry_add, + .kvdl_fwd_entry_del = mlxsw_sp_act_kvdl_fwd_entry_del, + .counter_index_get = mlxsw_sp_act_counter_index_get, + .counter_index_put = mlxsw_sp_act_counter_index_put, + .mirror_add = mlxsw_sp_act_mirror_add, + .mirror_del = mlxsw_sp_act_mirror_del, + .policer_add = mlxsw_sp_act_policer_add, + .policer_del = mlxsw_sp_act_policer_del, + .sampler_add = mlxsw_sp1_act_sampler_add, + .sampler_del = mlxsw_sp1_act_sampler_del, +}; + +static int mlxsw_sp2_act_sampler_add(void *priv, u16 local_port, + struct psample_group *psample_group, + u32 rate, u32 trunc_size, bool truncate, + bool ingress, int *p_span_id, + struct netlink_ext_ack *extack) +{ + struct mlxsw_sp_span_agent_parms agent_parms = { + .session_id = MLXSW_SP_SPAN_SESSION_ID_SAMPLING, + }; + struct mlxsw_sp_sample_trigger trigger = { + .type = MLXSW_SP_SAMPLE_TRIGGER_TYPE_POLICY_ENGINE, + }; + struct mlxsw_sp_sample_params params; + struct mlxsw_sp_port *mlxsw_sp_port; + struct mlxsw_sp *mlxsw_sp = priv; + int err; + + params.psample_group = psample_group; + params.trunc_size = trunc_size; + params.rate = rate; + params.truncate = truncate; + err = mlxsw_sp_sample_trigger_params_set(mlxsw_sp, &trigger, ¶ms, + extack); + if (err) + return err; + + err = mlxsw_sp_span_agent_get(mlxsw_sp, p_span_id, &agent_parms); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Failed to get SPAN agent"); + goto err_span_agent_get; + } + + mlxsw_sp_port = mlxsw_sp->ports[local_port]; + err = mlxsw_sp_span_analyzed_port_get(mlxsw_sp_port, ingress); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Failed to get analyzed port"); + goto err_analyzed_port_get; + } + + return 0; + +err_analyzed_port_get: + mlxsw_sp_span_agent_put(mlxsw_sp, *p_span_id); +err_span_agent_get: + mlxsw_sp_sample_trigger_params_unset(mlxsw_sp, &trigger); + return err; +} + +static void mlxsw_sp2_act_sampler_del(void *priv, u16 local_port, int span_id, + bool ingress) +{ + struct mlxsw_sp_sample_trigger trigger = { + .type = MLXSW_SP_SAMPLE_TRIGGER_TYPE_POLICY_ENGINE, + }; + struct mlxsw_sp_port *mlxsw_sp_port; + struct mlxsw_sp *mlxsw_sp = priv; + + mlxsw_sp_port = mlxsw_sp->ports[local_port]; + mlxsw_sp_span_analyzed_port_put(mlxsw_sp_port, ingress); + mlxsw_sp_span_agent_put(mlxsw_sp, span_id); + mlxsw_sp_sample_trigger_params_unset(mlxsw_sp, &trigger); +} + +const struct mlxsw_afa_ops mlxsw_sp2_act_afa_ops = { + .kvdl_set_add = mlxsw_sp2_act_kvdl_set_add, + .kvdl_set_del = mlxsw_sp_act_kvdl_set_del, + .kvdl_set_activity_get = mlxsw_sp2_act_kvdl_set_activity_get, + .kvdl_fwd_entry_add = mlxsw_sp_act_kvdl_fwd_entry_add, + .kvdl_fwd_entry_del = mlxsw_sp_act_kvdl_fwd_entry_del, + .counter_index_get = mlxsw_sp_act_counter_index_get, + .counter_index_put = mlxsw_sp_act_counter_index_put, + .mirror_add = mlxsw_sp_act_mirror_add, + .mirror_del = mlxsw_sp_act_mirror_del, + .policer_add = mlxsw_sp_act_policer_add, + .policer_del = mlxsw_sp_act_policer_del, + .sampler_add = mlxsw_sp2_act_sampler_add, + .sampler_del = mlxsw_sp2_act_sampler_del, + .dummy_first_set = true, +}; + +int mlxsw_sp_afa_init(struct mlxsw_sp *mlxsw_sp) +{ + mlxsw_sp->afa = mlxsw_afa_create(MLXSW_CORE_RES_GET(mlxsw_sp->core, + ACL_ACTIONS_PER_SET), + mlxsw_sp->afa_ops, mlxsw_sp); + return PTR_ERR_OR_ZERO(mlxsw_sp->afa); +} + +void mlxsw_sp_afa_fini(struct mlxsw_sp *mlxsw_sp) +{ + mlxsw_afa_destroy(mlxsw_sp->afa); +} diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.h new file mode 100644 index 000000000..fe436d816 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 */ +/* Copyright (c) 2017-2018 Mellanox Technologies. All rights reserved */ + +#ifndef _MLXSW_SPECTRUM_ACL_FLEX_ACTIONS_H +#define _MLXSW_SPECTRUM_ACL_FLEX_ACTIONS_H + +#include "spectrum.h" + +int mlxsw_sp_afa_init(struct mlxsw_sp *mlxsw_sp); +void mlxsw_sp_afa_fini(struct mlxsw_sp *mlxsw_sp); + +#endif diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_keys.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_keys.c new file mode 100644 index 000000000..173808c09 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_keys.c @@ -0,0 +1,355 @@ +// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 +/* Copyright (c) 2017-2018 Mellanox Technologies. All rights reserved */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include "spectrum.h" +#include "item.h" +#include "core_acl_flex_keys.h" + +static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_l2_dmac[] = { + MLXSW_AFK_ELEMENT_INST_BUF(DMAC_32_47, 0x00, 2), + MLXSW_AFK_ELEMENT_INST_BUF(DMAC_0_31, 0x02, 4), + MLXSW_AFK_ELEMENT_INST_U32(PCP, 0x08, 13, 3), + MLXSW_AFK_ELEMENT_INST_U32(VID, 0x08, 0, 12), + MLXSW_AFK_ELEMENT_INST_U32(SRC_SYS_PORT, 0x0C, 0, 16), +}; + +static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_l2_smac[] = { + MLXSW_AFK_ELEMENT_INST_BUF(SMAC_32_47, 0x00, 2), + MLXSW_AFK_ELEMENT_INST_BUF(SMAC_0_31, 0x02, 4), + MLXSW_AFK_ELEMENT_INST_U32(PCP, 0x08, 13, 3), + MLXSW_AFK_ELEMENT_INST_U32(VID, 0x08, 0, 12), + MLXSW_AFK_ELEMENT_INST_U32(SRC_SYS_PORT, 0x0C, 0, 16), +}; + +static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_l2_smac_ex[] = { + MLXSW_AFK_ELEMENT_INST_BUF(SMAC_32_47, 0x02, 2), + MLXSW_AFK_ELEMENT_INST_BUF(SMAC_0_31, 0x04, 4), + MLXSW_AFK_ELEMENT_INST_U32(ETHERTYPE, 0x0C, 0, 16), +}; + +static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv4_sip[] = { + MLXSW_AFK_ELEMENT_INST_BUF(SRC_IP_0_31, 0x00, 4), + MLXSW_AFK_ELEMENT_INST_U32(IP_PROTO, 0x08, 0, 8), + MLXSW_AFK_ELEMENT_INST_U32(SRC_SYS_PORT, 0x0C, 0, 16), +}; + +static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv4_dip[] = { + MLXSW_AFK_ELEMENT_INST_BUF(DST_IP_0_31, 0x00, 4), + MLXSW_AFK_ELEMENT_INST_U32(IP_PROTO, 0x08, 0, 8), + MLXSW_AFK_ELEMENT_INST_U32(SRC_SYS_PORT, 0x0C, 0, 16), +}; + +static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv4[] = { + MLXSW_AFK_ELEMENT_INST_BUF(SRC_IP_0_31, 0x00, 4), + MLXSW_AFK_ELEMENT_INST_U32(IP_ECN, 0x04, 4, 2), + MLXSW_AFK_ELEMENT_INST_U32(IP_TTL_, 0x04, 24, 8), + MLXSW_AFK_ELEMENT_INST_U32(IP_DSCP, 0x08, 0, 6), + MLXSW_AFK_ELEMENT_INST_U32(TCP_FLAGS, 0x08, 8, 9), /* TCP_CONTROL+TCP_ECN */ +}; + +static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv4_ex[] = { + MLXSW_AFK_ELEMENT_INST_U32(VID, 0x00, 0, 12), + MLXSW_AFK_ELEMENT_INST_U32(PCP, 0x08, 29, 3), + MLXSW_AFK_ELEMENT_INST_U32(SRC_L4_PORT, 0x08, 0, 16), + MLXSW_AFK_ELEMENT_INST_U32(DST_L4_PORT, 0x0C, 0, 16), +}; + +static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv6_dip[] = { + MLXSW_AFK_ELEMENT_INST_BUF(DST_IP_32_63, 0x00, 4), + MLXSW_AFK_ELEMENT_INST_BUF(DST_IP_0_31, 0x04, 4), +}; + +static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv6_ex1[] = { + MLXSW_AFK_ELEMENT_INST_BUF(DST_IP_96_127, 0x00, 4), + MLXSW_AFK_ELEMENT_INST_BUF(DST_IP_64_95, 0x04, 4), + MLXSW_AFK_ELEMENT_INST_U32(IP_PROTO, 0x08, 0, 8), +}; + +static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv6_sip[] = { + MLXSW_AFK_ELEMENT_INST_BUF(SRC_IP_32_63, 0x00, 4), + MLXSW_AFK_ELEMENT_INST_BUF(SRC_IP_0_31, 0x04, 4), +}; + +static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv6_sip_ex[] = { + MLXSW_AFK_ELEMENT_INST_BUF(SRC_IP_96_127, 0x00, 4), + MLXSW_AFK_ELEMENT_INST_BUF(SRC_IP_64_95, 0x04, 4), +}; + +static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_packet_type[] = { + MLXSW_AFK_ELEMENT_INST_U32(ETHERTYPE, 0x00, 0, 16), +}; + +static const struct mlxsw_afk_block mlxsw_sp1_afk_blocks[] = { + MLXSW_AFK_BLOCK(0x10, mlxsw_sp_afk_element_info_l2_dmac), + MLXSW_AFK_BLOCK(0x11, mlxsw_sp_afk_element_info_l2_smac), + MLXSW_AFK_BLOCK(0x12, mlxsw_sp_afk_element_info_l2_smac_ex), + MLXSW_AFK_BLOCK(0x30, mlxsw_sp_afk_element_info_ipv4_sip), + MLXSW_AFK_BLOCK(0x31, mlxsw_sp_afk_element_info_ipv4_dip), + MLXSW_AFK_BLOCK(0x32, mlxsw_sp_afk_element_info_ipv4), + MLXSW_AFK_BLOCK(0x33, mlxsw_sp_afk_element_info_ipv4_ex), + MLXSW_AFK_BLOCK(0x60, mlxsw_sp_afk_element_info_ipv6_dip), + MLXSW_AFK_BLOCK(0x65, mlxsw_sp_afk_element_info_ipv6_ex1), + MLXSW_AFK_BLOCK(0x62, mlxsw_sp_afk_element_info_ipv6_sip), + MLXSW_AFK_BLOCK(0x63, mlxsw_sp_afk_element_info_ipv6_sip_ex), + MLXSW_AFK_BLOCK(0xB0, mlxsw_sp_afk_element_info_packet_type), +}; + +#define MLXSW_SP1_AFK_KEY_BLOCK_SIZE 16 + +static void mlxsw_sp1_afk_encode_block(char *output, int block_index, + char *block) +{ + unsigned int offset = block_index * MLXSW_SP1_AFK_KEY_BLOCK_SIZE; + char *output_indexed = output + offset; + + memcpy(output_indexed, block, MLXSW_SP1_AFK_KEY_BLOCK_SIZE); +} + +static void mlxsw_sp1_afk_clear_block(char *output, int block_index) +{ + unsigned int offset = block_index * MLXSW_SP1_AFK_KEY_BLOCK_SIZE; + char *output_indexed = output + offset; + + memset(output_indexed, 0, MLXSW_SP1_AFK_KEY_BLOCK_SIZE); +} + +const struct mlxsw_afk_ops mlxsw_sp1_afk_ops = { + .blocks = mlxsw_sp1_afk_blocks, + .blocks_count = ARRAY_SIZE(mlxsw_sp1_afk_blocks), + .encode_block = mlxsw_sp1_afk_encode_block, + .clear_block = mlxsw_sp1_afk_clear_block, +}; + +static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_mac_0[] = { + MLXSW_AFK_ELEMENT_INST_BUF(DMAC_0_31, 0x04, 4), +}; + +static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_mac_1[] = { + MLXSW_AFK_ELEMENT_INST_BUF(SMAC_0_31, 0x04, 4), +}; + +static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_mac_2[] = { + MLXSW_AFK_ELEMENT_INST_BUF(SMAC_32_47, 0x04, 2), + MLXSW_AFK_ELEMENT_INST_BUF(DMAC_32_47, 0x06, 2), +}; + +static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_mac_3[] = { + MLXSW_AFK_ELEMENT_INST_U32(PCP, 0x00, 0, 3), + MLXSW_AFK_ELEMENT_INST_U32(VID, 0x04, 16, 12), + MLXSW_AFK_ELEMENT_INST_BUF(DMAC_32_47, 0x06, 2), +}; + +static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_mac_4[] = { + MLXSW_AFK_ELEMENT_INST_U32(PCP, 0x00, 0, 3), + MLXSW_AFK_ELEMENT_INST_U32(VID, 0x04, 16, 12), + MLXSW_AFK_ELEMENT_INST_U32(ETHERTYPE, 0x04, 0, 16), +}; + +static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_mac_5[] = { + MLXSW_AFK_ELEMENT_INST_U32(VID, 0x04, 16, 12), + MLXSW_AFK_ELEMENT_INST_EXT_U32(SRC_SYS_PORT, 0x04, 0, 8, -1, true), /* RX_ACL_SYSTEM_PORT */ +}; + +static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv4_0[] = { + MLXSW_AFK_ELEMENT_INST_BUF(DST_IP_0_31, 0x04, 4), +}; + +static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv4_1[] = { + MLXSW_AFK_ELEMENT_INST_BUF(SRC_IP_0_31, 0x04, 4), +}; + +static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv4_2[] = { + MLXSW_AFK_ELEMENT_INST_U32(IP_DSCP, 0x04, 0, 6), + MLXSW_AFK_ELEMENT_INST_U32(IP_ECN, 0x04, 6, 2), + MLXSW_AFK_ELEMENT_INST_U32(IP_TTL_, 0x04, 8, 8), + MLXSW_AFK_ELEMENT_INST_U32(IP_PROTO, 0x04, 16, 8), +}; + +static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv4_4[] = { + MLXSW_AFK_ELEMENT_INST_U32(VIRT_ROUTER_LSB, 0x04, 24, 8), + MLXSW_AFK_ELEMENT_INST_EXT_U32(VIRT_ROUTER_MSB, 0x00, 0, 3, 0, true), +}; + +static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv6_0[] = { + MLXSW_AFK_ELEMENT_INST_BUF(DST_IP_32_63, 0x04, 4), +}; + +static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv6_1[] = { + MLXSW_AFK_ELEMENT_INST_BUF(DST_IP_64_95, 0x04, 4), +}; + +static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv6_2[] = { + MLXSW_AFK_ELEMENT_INST_BUF(DST_IP_96_127, 0x04, 4), +}; + +static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv6_3[] = { + MLXSW_AFK_ELEMENT_INST_BUF(SRC_IP_32_63, 0x04, 4), +}; + +static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv6_4[] = { + MLXSW_AFK_ELEMENT_INST_BUF(SRC_IP_64_95, 0x04, 4), +}; + +static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv6_5[] = { + MLXSW_AFK_ELEMENT_INST_BUF(SRC_IP_96_127, 0x04, 4), +}; + +static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_l4_0[] = { + MLXSW_AFK_ELEMENT_INST_U32(SRC_L4_PORT, 0x04, 16, 16), + MLXSW_AFK_ELEMENT_INST_U32(DST_L4_PORT, 0x04, 0, 16), +}; + +static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_l4_2[] = { + MLXSW_AFK_ELEMENT_INST_U32(TCP_FLAGS, 0x04, 16, 9), /* TCP_CONTROL + TCP_ECN */ +}; + +static const struct mlxsw_afk_block mlxsw_sp2_afk_blocks[] = { + MLXSW_AFK_BLOCK(0x10, mlxsw_sp_afk_element_info_mac_0), + MLXSW_AFK_BLOCK(0x11, mlxsw_sp_afk_element_info_mac_1), + MLXSW_AFK_BLOCK(0x12, mlxsw_sp_afk_element_info_mac_2), + MLXSW_AFK_BLOCK(0x13, mlxsw_sp_afk_element_info_mac_3), + MLXSW_AFK_BLOCK(0x14, mlxsw_sp_afk_element_info_mac_4), + MLXSW_AFK_BLOCK(0x15, mlxsw_sp_afk_element_info_mac_5), + MLXSW_AFK_BLOCK(0x38, mlxsw_sp_afk_element_info_ipv4_0), + MLXSW_AFK_BLOCK(0x39, mlxsw_sp_afk_element_info_ipv4_1), + MLXSW_AFK_BLOCK(0x3A, mlxsw_sp_afk_element_info_ipv4_2), + MLXSW_AFK_BLOCK(0x3C, mlxsw_sp_afk_element_info_ipv4_4), + MLXSW_AFK_BLOCK(0x40, mlxsw_sp_afk_element_info_ipv6_0), + MLXSW_AFK_BLOCK(0x41, mlxsw_sp_afk_element_info_ipv6_1), + MLXSW_AFK_BLOCK(0x42, mlxsw_sp_afk_element_info_ipv6_2), + MLXSW_AFK_BLOCK(0x43, mlxsw_sp_afk_element_info_ipv6_3), + MLXSW_AFK_BLOCK(0x44, mlxsw_sp_afk_element_info_ipv6_4), + MLXSW_AFK_BLOCK(0x45, mlxsw_sp_afk_element_info_ipv6_5), + MLXSW_AFK_BLOCK(0x90, mlxsw_sp_afk_element_info_l4_0), + MLXSW_AFK_BLOCK(0x92, mlxsw_sp_afk_element_info_l4_2), +}; + +#define MLXSW_SP2_AFK_BITS_PER_BLOCK 36 + +/* A block in Spectrum-2 is of the following form: + * + * +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ + * | | | | | | | | | | | | | | | | | | | | | | | | | | | | |35|34|33|32| + * +-----------------------------------------------------------------------------------------------+ + * |31|30|29|28|27|26|25|24|23|22|21|20|19|18|17|16|15|14|13|12|11|10| 9| 8| 7| 6| 5| 4| 3| 2| 1| 0| + * +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ + */ +MLXSW_ITEM64(sp2_afk, block, value, 0x00, 0, MLXSW_SP2_AFK_BITS_PER_BLOCK); + +/* The key / mask block layout in Spectrum-2 is of the following form: + * + * +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ + * | | | | | | | | | | | | | | | | | block11_high | + * +-----------------------------------------------------------------------------------------------+ + * | block11_low | block10_high | + * +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ + * ... + */ + +struct mlxsw_sp2_afk_block_layout { + unsigned short offset; + struct mlxsw_item item; +}; + +#define MLXSW_SP2_AFK_BLOCK_LAYOUT(_block, _offset, _shift) \ + { \ + .offset = _offset, \ + { \ + .shift = _shift, \ + .size = {.bits = MLXSW_SP2_AFK_BITS_PER_BLOCK}, \ + .name = #_block, \ + } \ + } \ + +static const struct mlxsw_sp2_afk_block_layout mlxsw_sp2_afk_blocks_layout[] = { + MLXSW_SP2_AFK_BLOCK_LAYOUT(block0, 0x30, 0), + MLXSW_SP2_AFK_BLOCK_LAYOUT(block1, 0x2C, 4), + MLXSW_SP2_AFK_BLOCK_LAYOUT(block2, 0x28, 8), + MLXSW_SP2_AFK_BLOCK_LAYOUT(block3, 0x24, 12), + MLXSW_SP2_AFK_BLOCK_LAYOUT(block4, 0x20, 16), + MLXSW_SP2_AFK_BLOCK_LAYOUT(block5, 0x1C, 20), + MLXSW_SP2_AFK_BLOCK_LAYOUT(block6, 0x18, 24), + MLXSW_SP2_AFK_BLOCK_LAYOUT(block7, 0x14, 28), + MLXSW_SP2_AFK_BLOCK_LAYOUT(block8, 0x0C, 0), + MLXSW_SP2_AFK_BLOCK_LAYOUT(block9, 0x08, 4), + MLXSW_SP2_AFK_BLOCK_LAYOUT(block10, 0x04, 8), + MLXSW_SP2_AFK_BLOCK_LAYOUT(block11, 0x00, 12), +}; + +static void __mlxsw_sp2_afk_block_value_set(char *output, int block_index, + u64 block_value) +{ + const struct mlxsw_sp2_afk_block_layout *block_layout; + + if (WARN_ON(block_index < 0 || + block_index >= ARRAY_SIZE(mlxsw_sp2_afk_blocks_layout))) + return; + + block_layout = &mlxsw_sp2_afk_blocks_layout[block_index]; + __mlxsw_item_set64(output + block_layout->offset, + &block_layout->item, 0, block_value); +} + +static void mlxsw_sp2_afk_encode_block(char *output, int block_index, + char *block) +{ + u64 block_value = mlxsw_sp2_afk_block_value_get(block); + + __mlxsw_sp2_afk_block_value_set(output, block_index, block_value); +} + +static void mlxsw_sp2_afk_clear_block(char *output, int block_index) +{ + __mlxsw_sp2_afk_block_value_set(output, block_index, 0); +} + +const struct mlxsw_afk_ops mlxsw_sp2_afk_ops = { + .blocks = mlxsw_sp2_afk_blocks, + .blocks_count = ARRAY_SIZE(mlxsw_sp2_afk_blocks), + .encode_block = mlxsw_sp2_afk_encode_block, + .clear_block = mlxsw_sp2_afk_clear_block, +}; + +static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_mac_5b[] = { + MLXSW_AFK_ELEMENT_INST_U32(VID, 0x04, 18, 12), + MLXSW_AFK_ELEMENT_INST_EXT_U32(SRC_SYS_PORT, 0x04, 0, 9, -1, true), /* RX_ACL_SYSTEM_PORT */ +}; + +static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv4_4b[] = { + MLXSW_AFK_ELEMENT_INST_U32(VIRT_ROUTER_LSB, 0x04, 13, 8), + MLXSW_AFK_ELEMENT_INST_U32(VIRT_ROUTER_MSB, 0x04, 21, 4), +}; + +static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv6_2b[] = { + MLXSW_AFK_ELEMENT_INST_BUF(DST_IP_96_127, 0x04, 4), +}; + +static const struct mlxsw_afk_block mlxsw_sp4_afk_blocks[] = { + MLXSW_AFK_BLOCK(0x10, mlxsw_sp_afk_element_info_mac_0), + MLXSW_AFK_BLOCK(0x11, mlxsw_sp_afk_element_info_mac_1), + MLXSW_AFK_BLOCK(0x12, mlxsw_sp_afk_element_info_mac_2), + MLXSW_AFK_BLOCK(0x13, mlxsw_sp_afk_element_info_mac_3), + MLXSW_AFK_BLOCK(0x14, mlxsw_sp_afk_element_info_mac_4), + MLXSW_AFK_BLOCK(0x1A, mlxsw_sp_afk_element_info_mac_5b), + MLXSW_AFK_BLOCK(0x38, mlxsw_sp_afk_element_info_ipv4_0), + MLXSW_AFK_BLOCK(0x39, mlxsw_sp_afk_element_info_ipv4_1), + MLXSW_AFK_BLOCK(0x3A, mlxsw_sp_afk_element_info_ipv4_2), + MLXSW_AFK_BLOCK(0x35, mlxsw_sp_afk_element_info_ipv4_4b), + MLXSW_AFK_BLOCK(0x40, mlxsw_sp_afk_element_info_ipv6_0), + MLXSW_AFK_BLOCK(0x41, mlxsw_sp_afk_element_info_ipv6_1), + MLXSW_AFK_BLOCK(0x47, mlxsw_sp_afk_element_info_ipv6_2b), + MLXSW_AFK_BLOCK(0x43, mlxsw_sp_afk_element_info_ipv6_3), + MLXSW_AFK_BLOCK(0x44, mlxsw_sp_afk_element_info_ipv6_4), + MLXSW_AFK_BLOCK(0x45, mlxsw_sp_afk_element_info_ipv6_5), + MLXSW_AFK_BLOCK(0x90, mlxsw_sp_afk_element_info_l4_0), + MLXSW_AFK_BLOCK(0x92, mlxsw_sp_afk_element_info_l4_2), +}; + +const struct mlxsw_afk_ops mlxsw_sp4_afk_ops = { + .blocks = mlxsw_sp4_afk_blocks, + .blocks_count = ARRAY_SIZE(mlxsw_sp4_afk_blocks), + .encode_block = mlxsw_sp2_afk_encode_block, + .clear_block = mlxsw_sp2_afk_clear_block, +}; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c new file mode 100644 index 000000000..3b9ba8fa2 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c @@ -0,0 +1,1867 @@ +// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 +/* Copyright (c) 2017-2018 Mellanox Technologies. All rights reserved */ + +#include <linux/kernel.h> +#include <linux/slab.h> +#include <linux/errno.h> +#include <linux/bitops.h> +#include <linux/list.h> +#include <linux/rhashtable.h> +#include <linux/netdevice.h> +#include <linux/mutex.h> +#include <trace/events/mlxsw.h> + +#include "reg.h" +#include "core.h" +#include "resources.h" +#include "spectrum.h" +#include "spectrum_acl_tcam.h" +#include "core_acl_flex_keys.h" + +size_t mlxsw_sp_acl_tcam_priv_size(struct mlxsw_sp *mlxsw_sp) +{ + const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops; + + return ops->priv_size; +} + +#define MLXSW_SP_ACL_TCAM_VREGION_REHASH_INTRVL_DFLT 5000 /* ms */ +#define MLXSW_SP_ACL_TCAM_VREGION_REHASH_INTRVL_MIN 3000 /* ms */ +#define MLXSW_SP_ACL_TCAM_VREGION_REHASH_CREDITS 100 /* number of entries */ + +int mlxsw_sp_acl_tcam_init(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam *tcam) +{ + const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops; + u64 max_tcam_regions; + u64 max_regions; + u64 max_groups; + int err; + + mutex_init(&tcam->lock); + tcam->vregion_rehash_intrvl = + MLXSW_SP_ACL_TCAM_VREGION_REHASH_INTRVL_DFLT; + INIT_LIST_HEAD(&tcam->vregion_list); + + max_tcam_regions = MLXSW_CORE_RES_GET(mlxsw_sp->core, + ACL_MAX_TCAM_REGIONS); + max_regions = MLXSW_CORE_RES_GET(mlxsw_sp->core, ACL_MAX_REGIONS); + + /* Use 1:1 mapping between ACL region and TCAM region */ + if (max_tcam_regions < max_regions) + max_regions = max_tcam_regions; + + tcam->used_regions = bitmap_zalloc(max_regions, GFP_KERNEL); + if (!tcam->used_regions) + return -ENOMEM; + tcam->max_regions = max_regions; + + max_groups = MLXSW_CORE_RES_GET(mlxsw_sp->core, ACL_MAX_GROUPS); + tcam->used_groups = bitmap_zalloc(max_groups, GFP_KERNEL); + if (!tcam->used_groups) { + err = -ENOMEM; + goto err_alloc_used_groups; + } + tcam->max_groups = max_groups; + tcam->max_group_size = MLXSW_CORE_RES_GET(mlxsw_sp->core, + ACL_MAX_GROUP_SIZE); + + err = ops->init(mlxsw_sp, tcam->priv, tcam); + if (err) + goto err_tcam_init; + + return 0; + +err_tcam_init: + bitmap_free(tcam->used_groups); +err_alloc_used_groups: + bitmap_free(tcam->used_regions); + return err; +} + +void mlxsw_sp_acl_tcam_fini(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam *tcam) +{ + const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops; + + mutex_destroy(&tcam->lock); + ops->fini(mlxsw_sp, tcam->priv); + bitmap_free(tcam->used_groups); + bitmap_free(tcam->used_regions); +} + +int mlxsw_sp_acl_tcam_priority_get(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_rule_info *rulei, + u32 *priority, bool fillup_priority) +{ + u64 max_priority; + + if (!fillup_priority) { + *priority = 0; + return 0; + } + + if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, KVD_SIZE)) + return -EIO; + + /* Priority range is 1..cap_kvd_size-1. */ + max_priority = MLXSW_CORE_RES_GET(mlxsw_sp->core, KVD_SIZE) - 1; + if (rulei->priority >= max_priority) + return -EINVAL; + + /* Unlike in TC, in HW, higher number means higher priority. */ + *priority = max_priority - rulei->priority; + return 0; +} + +static int mlxsw_sp_acl_tcam_region_id_get(struct mlxsw_sp_acl_tcam *tcam, + u16 *p_id) +{ + u16 id; + + id = find_first_zero_bit(tcam->used_regions, tcam->max_regions); + if (id < tcam->max_regions) { + __set_bit(id, tcam->used_regions); + *p_id = id; + return 0; + } + return -ENOBUFS; +} + +static void mlxsw_sp_acl_tcam_region_id_put(struct mlxsw_sp_acl_tcam *tcam, + u16 id) +{ + __clear_bit(id, tcam->used_regions); +} + +static int mlxsw_sp_acl_tcam_group_id_get(struct mlxsw_sp_acl_tcam *tcam, + u16 *p_id) +{ + u16 id; + + id = find_first_zero_bit(tcam->used_groups, tcam->max_groups); + if (id < tcam->max_groups) { + __set_bit(id, tcam->used_groups); + *p_id = id; + return 0; + } + return -ENOBUFS; +} + +static void mlxsw_sp_acl_tcam_group_id_put(struct mlxsw_sp_acl_tcam *tcam, + u16 id) +{ + __clear_bit(id, tcam->used_groups); +} + +struct mlxsw_sp_acl_tcam_pattern { + const enum mlxsw_afk_element *elements; + unsigned int elements_count; +}; + +struct mlxsw_sp_acl_tcam_group { + struct mlxsw_sp_acl_tcam *tcam; + u16 id; + struct mutex lock; /* guards region list updates */ + struct list_head region_list; + unsigned int region_count; +}; + +struct mlxsw_sp_acl_tcam_vgroup { + struct mlxsw_sp_acl_tcam_group group; + struct list_head vregion_list; + struct rhashtable vchunk_ht; + const struct mlxsw_sp_acl_tcam_pattern *patterns; + unsigned int patterns_count; + bool tmplt_elusage_set; + struct mlxsw_afk_element_usage tmplt_elusage; + bool vregion_rehash_enabled; + unsigned int *p_min_prio; + unsigned int *p_max_prio; +}; + +struct mlxsw_sp_acl_tcam_rehash_ctx { + void *hints_priv; + bool this_is_rollback; + struct mlxsw_sp_acl_tcam_vchunk *current_vchunk; /* vchunk being + * currently migrated. + */ + struct mlxsw_sp_acl_tcam_ventry *start_ventry; /* ventry to start + * migration from in + * a vchunk being + * currently migrated. + */ + struct mlxsw_sp_acl_tcam_ventry *stop_ventry; /* ventry to stop + * migration at + * a vchunk being + * currently migrated. + */ +}; + +struct mlxsw_sp_acl_tcam_vregion { + struct mutex lock; /* Protects consistency of region, region2 pointers + * and vchunk_list. + */ + struct mlxsw_sp_acl_tcam_region *region; + struct mlxsw_sp_acl_tcam_region *region2; /* Used during migration */ + struct list_head list; /* Member of a TCAM group */ + struct list_head tlist; /* Member of a TCAM */ + struct list_head vchunk_list; /* List of vchunks under this vregion */ + struct mlxsw_afk_key_info *key_info; + struct mlxsw_sp_acl_tcam *tcam; + struct mlxsw_sp_acl_tcam_vgroup *vgroup; + struct { + struct delayed_work dw; + struct mlxsw_sp_acl_tcam_rehash_ctx ctx; + } rehash; + struct mlxsw_sp *mlxsw_sp; + unsigned int ref_count; +}; + +struct mlxsw_sp_acl_tcam_vchunk; + +struct mlxsw_sp_acl_tcam_chunk { + struct mlxsw_sp_acl_tcam_vchunk *vchunk; + struct mlxsw_sp_acl_tcam_region *region; + unsigned long priv[]; + /* priv has to be always the last item */ +}; + +struct mlxsw_sp_acl_tcam_vchunk { + struct mlxsw_sp_acl_tcam_chunk *chunk; + struct mlxsw_sp_acl_tcam_chunk *chunk2; /* Used during migration */ + struct list_head list; /* Member of a TCAM vregion */ + struct rhash_head ht_node; /* Member of a chunk HT */ + struct list_head ventry_list; + unsigned int priority; /* Priority within the vregion and group */ + struct mlxsw_sp_acl_tcam_vgroup *vgroup; + struct mlxsw_sp_acl_tcam_vregion *vregion; + unsigned int ref_count; +}; + +struct mlxsw_sp_acl_tcam_entry { + struct mlxsw_sp_acl_tcam_ventry *ventry; + struct mlxsw_sp_acl_tcam_chunk *chunk; + unsigned long priv[]; + /* priv has to be always the last item */ +}; + +struct mlxsw_sp_acl_tcam_ventry { + struct mlxsw_sp_acl_tcam_entry *entry; + struct list_head list; /* Member of a TCAM vchunk */ + struct mlxsw_sp_acl_tcam_vchunk *vchunk; + struct mlxsw_sp_acl_rule_info *rulei; +}; + +static const struct rhashtable_params mlxsw_sp_acl_tcam_vchunk_ht_params = { + .key_len = sizeof(unsigned int), + .key_offset = offsetof(struct mlxsw_sp_acl_tcam_vchunk, priority), + .head_offset = offsetof(struct mlxsw_sp_acl_tcam_vchunk, ht_node), + .automatic_shrinking = true, +}; + +static int mlxsw_sp_acl_tcam_group_update(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_group *group) +{ + struct mlxsw_sp_acl_tcam_region *region; + char pagt_pl[MLXSW_REG_PAGT_LEN]; + int acl_index = 0; + + mlxsw_reg_pagt_pack(pagt_pl, group->id); + list_for_each_entry(region, &group->region_list, list) { + bool multi = false; + + /* Check if the next entry in the list has the same vregion. */ + if (region->list.next != &group->region_list && + list_next_entry(region, list)->vregion == region->vregion) + multi = true; + mlxsw_reg_pagt_acl_id_pack(pagt_pl, acl_index++, + region->id, multi); + } + mlxsw_reg_pagt_size_set(pagt_pl, acl_index); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(pagt), pagt_pl); +} + +static int +mlxsw_sp_acl_tcam_group_add(struct mlxsw_sp_acl_tcam *tcam, + struct mlxsw_sp_acl_tcam_group *group) +{ + int err; + + group->tcam = tcam; + INIT_LIST_HEAD(&group->region_list); + + err = mlxsw_sp_acl_tcam_group_id_get(tcam, &group->id); + if (err) + return err; + + mutex_init(&group->lock); + + return 0; +} + +static void mlxsw_sp_acl_tcam_group_del(struct mlxsw_sp_acl_tcam_group *group) +{ + struct mlxsw_sp_acl_tcam *tcam = group->tcam; + + mutex_destroy(&group->lock); + mlxsw_sp_acl_tcam_group_id_put(tcam, group->id); + WARN_ON(!list_empty(&group->region_list)); +} + +static int +mlxsw_sp_acl_tcam_vgroup_add(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam *tcam, + struct mlxsw_sp_acl_tcam_vgroup *vgroup, + const struct mlxsw_sp_acl_tcam_pattern *patterns, + unsigned int patterns_count, + struct mlxsw_afk_element_usage *tmplt_elusage, + bool vregion_rehash_enabled, + unsigned int *p_min_prio, + unsigned int *p_max_prio) +{ + int err; + + vgroup->patterns = patterns; + vgroup->patterns_count = patterns_count; + vgroup->vregion_rehash_enabled = vregion_rehash_enabled; + vgroup->p_min_prio = p_min_prio; + vgroup->p_max_prio = p_max_prio; + + if (tmplt_elusage) { + vgroup->tmplt_elusage_set = true; + memcpy(&vgroup->tmplt_elusage, tmplt_elusage, + sizeof(vgroup->tmplt_elusage)); + } + INIT_LIST_HEAD(&vgroup->vregion_list); + + err = mlxsw_sp_acl_tcam_group_add(tcam, &vgroup->group); + if (err) + return err; + + err = rhashtable_init(&vgroup->vchunk_ht, + &mlxsw_sp_acl_tcam_vchunk_ht_params); + if (err) + goto err_rhashtable_init; + + return 0; + +err_rhashtable_init: + mlxsw_sp_acl_tcam_group_del(&vgroup->group); + return err; +} + +static void +mlxsw_sp_acl_tcam_vgroup_del(struct mlxsw_sp_acl_tcam_vgroup *vgroup) +{ + rhashtable_destroy(&vgroup->vchunk_ht); + mlxsw_sp_acl_tcam_group_del(&vgroup->group); + WARN_ON(!list_empty(&vgroup->vregion_list)); +} + +static int +mlxsw_sp_acl_tcam_group_bind(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_group *group, + struct mlxsw_sp_port *mlxsw_sp_port, + bool ingress) +{ + char ppbt_pl[MLXSW_REG_PPBT_LEN]; + + mlxsw_reg_ppbt_pack(ppbt_pl, ingress ? MLXSW_REG_PXBT_E_IACL : + MLXSW_REG_PXBT_E_EACL, + MLXSW_REG_PXBT_OP_BIND, mlxsw_sp_port->local_port, + group->id); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ppbt), ppbt_pl); +} + +static void +mlxsw_sp_acl_tcam_group_unbind(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_group *group, + struct mlxsw_sp_port *mlxsw_sp_port, + bool ingress) +{ + char ppbt_pl[MLXSW_REG_PPBT_LEN]; + + mlxsw_reg_ppbt_pack(ppbt_pl, ingress ? MLXSW_REG_PXBT_E_IACL : + MLXSW_REG_PXBT_E_EACL, + MLXSW_REG_PXBT_OP_UNBIND, mlxsw_sp_port->local_port, + group->id); + mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ppbt), ppbt_pl); +} + +static u16 +mlxsw_sp_acl_tcam_group_id(struct mlxsw_sp_acl_tcam_group *group) +{ + return group->id; +} + +static unsigned int +mlxsw_sp_acl_tcam_vregion_prio(struct mlxsw_sp_acl_tcam_vregion *vregion) +{ + struct mlxsw_sp_acl_tcam_vchunk *vchunk; + + if (list_empty(&vregion->vchunk_list)) + return 0; + /* As a priority of a vregion, return priority of the first vchunk */ + vchunk = list_first_entry(&vregion->vchunk_list, + typeof(*vchunk), list); + return vchunk->priority; +} + +static unsigned int +mlxsw_sp_acl_tcam_vregion_max_prio(struct mlxsw_sp_acl_tcam_vregion *vregion) +{ + struct mlxsw_sp_acl_tcam_vchunk *vchunk; + + if (list_empty(&vregion->vchunk_list)) + return 0; + vchunk = list_last_entry(&vregion->vchunk_list, + typeof(*vchunk), list); + return vchunk->priority; +} + +static void +mlxsw_sp_acl_tcam_vgroup_prio_update(struct mlxsw_sp_acl_tcam_vgroup *vgroup) +{ + struct mlxsw_sp_acl_tcam_vregion *vregion; + + if (list_empty(&vgroup->vregion_list)) + return; + vregion = list_first_entry(&vgroup->vregion_list, + typeof(*vregion), list); + *vgroup->p_min_prio = mlxsw_sp_acl_tcam_vregion_prio(vregion); + vregion = list_last_entry(&vgroup->vregion_list, + typeof(*vregion), list); + *vgroup->p_max_prio = mlxsw_sp_acl_tcam_vregion_max_prio(vregion); +} + +static int +mlxsw_sp_acl_tcam_group_region_attach(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_group *group, + struct mlxsw_sp_acl_tcam_region *region, + unsigned int priority, + struct mlxsw_sp_acl_tcam_region *next_region) +{ + struct mlxsw_sp_acl_tcam_region *region2; + struct list_head *pos; + int err; + + mutex_lock(&group->lock); + if (group->region_count == group->tcam->max_group_size) { + err = -ENOBUFS; + goto err_region_count_check; + } + + if (next_region) { + /* If the next region is defined, place the new one + * before it. The next one is a sibling. + */ + pos = &next_region->list; + } else { + /* Position the region inside the list according to priority */ + list_for_each(pos, &group->region_list) { + region2 = list_entry(pos, typeof(*region2), list); + if (mlxsw_sp_acl_tcam_vregion_prio(region2->vregion) > + priority) + break; + } + } + list_add_tail(®ion->list, pos); + region->group = group; + + err = mlxsw_sp_acl_tcam_group_update(mlxsw_sp, group); + if (err) + goto err_group_update; + + group->region_count++; + mutex_unlock(&group->lock); + return 0; + +err_group_update: + list_del(®ion->list); +err_region_count_check: + mutex_unlock(&group->lock); + return err; +} + +static void +mlxsw_sp_acl_tcam_group_region_detach(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_region *region) +{ + struct mlxsw_sp_acl_tcam_group *group = region->group; + + mutex_lock(&group->lock); + list_del(®ion->list); + group->region_count--; + mlxsw_sp_acl_tcam_group_update(mlxsw_sp, group); + mutex_unlock(&group->lock); +} + +static int +mlxsw_sp_acl_tcam_vgroup_vregion_attach(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_vgroup *vgroup, + struct mlxsw_sp_acl_tcam_vregion *vregion, + unsigned int priority) +{ + struct mlxsw_sp_acl_tcam_vregion *vregion2; + struct list_head *pos; + int err; + + /* Position the vregion inside the list according to priority */ + list_for_each(pos, &vgroup->vregion_list) { + vregion2 = list_entry(pos, typeof(*vregion2), list); + if (mlxsw_sp_acl_tcam_vregion_prio(vregion2) > priority) + break; + } + list_add_tail(&vregion->list, pos); + + err = mlxsw_sp_acl_tcam_group_region_attach(mlxsw_sp, &vgroup->group, + vregion->region, + priority, NULL); + if (err) + goto err_region_attach; + + return 0; + +err_region_attach: + list_del(&vregion->list); + return err; +} + +static void +mlxsw_sp_acl_tcam_vgroup_vregion_detach(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_vregion *vregion) +{ + list_del(&vregion->list); + if (vregion->region2) + mlxsw_sp_acl_tcam_group_region_detach(mlxsw_sp, + vregion->region2); + mlxsw_sp_acl_tcam_group_region_detach(mlxsw_sp, vregion->region); +} + +static struct mlxsw_sp_acl_tcam_vregion * +mlxsw_sp_acl_tcam_vgroup_vregion_find(struct mlxsw_sp_acl_tcam_vgroup *vgroup, + unsigned int priority, + struct mlxsw_afk_element_usage *elusage, + bool *p_need_split) +{ + struct mlxsw_sp_acl_tcam_vregion *vregion, *vregion2; + struct list_head *pos; + bool issubset; + + list_for_each(pos, &vgroup->vregion_list) { + vregion = list_entry(pos, typeof(*vregion), list); + + /* First, check if the requested priority does not rather belong + * under some of the next vregions. + */ + if (pos->next != &vgroup->vregion_list) { /* not last */ + vregion2 = list_entry(pos->next, typeof(*vregion2), + list); + if (priority >= + mlxsw_sp_acl_tcam_vregion_prio(vregion2)) + continue; + } + + issubset = mlxsw_afk_key_info_subset(vregion->key_info, + elusage); + + /* If requested element usage would not fit and the priority + * is lower than the currently inspected vregion we cannot + * use this region, so return NULL to indicate new vregion has + * to be created. + */ + if (!issubset && + priority < mlxsw_sp_acl_tcam_vregion_prio(vregion)) + return NULL; + + /* If requested element usage would not fit and the priority + * is higher than the currently inspected vregion we cannot + * use this vregion. There is still some hope that the next + * vregion would be the fit. So let it be processed and + * eventually break at the check right above this. + */ + if (!issubset && + priority > mlxsw_sp_acl_tcam_vregion_max_prio(vregion)) + continue; + + /* Indicate if the vregion needs to be split in order to add + * the requested priority. Split is needed when requested + * element usage won't fit into the found vregion. + */ + *p_need_split = !issubset; + return vregion; + } + return NULL; /* New vregion has to be created. */ +} + +static void +mlxsw_sp_acl_tcam_vgroup_use_patterns(struct mlxsw_sp_acl_tcam_vgroup *vgroup, + struct mlxsw_afk_element_usage *elusage, + struct mlxsw_afk_element_usage *out) +{ + const struct mlxsw_sp_acl_tcam_pattern *pattern; + int i; + + /* In case the template is set, we don't have to look up the pattern + * and just use the template. + */ + if (vgroup->tmplt_elusage_set) { + memcpy(out, &vgroup->tmplt_elusage, sizeof(*out)); + WARN_ON(!mlxsw_afk_element_usage_subset(elusage, out)); + return; + } + + for (i = 0; i < vgroup->patterns_count; i++) { + pattern = &vgroup->patterns[i]; + mlxsw_afk_element_usage_fill(out, pattern->elements, + pattern->elements_count); + if (mlxsw_afk_element_usage_subset(elusage, out)) + return; + } + memcpy(out, elusage, sizeof(*out)); +} + +static int +mlxsw_sp_acl_tcam_region_alloc(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_region *region) +{ + struct mlxsw_afk_key_info *key_info = region->key_info; + char ptar_pl[MLXSW_REG_PTAR_LEN]; + unsigned int encodings_count; + int i; + int err; + + mlxsw_reg_ptar_pack(ptar_pl, MLXSW_REG_PTAR_OP_ALLOC, + region->key_type, + MLXSW_SP_ACL_TCAM_REGION_BASE_COUNT, + region->id, region->tcam_region_info); + encodings_count = mlxsw_afk_key_info_blocks_count_get(key_info); + for (i = 0; i < encodings_count; i++) { + u16 encoding; + + encoding = mlxsw_afk_key_info_block_encoding_get(key_info, i); + mlxsw_reg_ptar_key_id_pack(ptar_pl, i, encoding); + } + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ptar), ptar_pl); + if (err) + return err; + mlxsw_reg_ptar_unpack(ptar_pl, region->tcam_region_info); + return 0; +} + +static void +mlxsw_sp_acl_tcam_region_free(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_region *region) +{ + char ptar_pl[MLXSW_REG_PTAR_LEN]; + + mlxsw_reg_ptar_pack(ptar_pl, MLXSW_REG_PTAR_OP_FREE, + region->key_type, 0, region->id, + region->tcam_region_info); + mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ptar), ptar_pl); +} + +static int +mlxsw_sp_acl_tcam_region_enable(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_region *region) +{ + char pacl_pl[MLXSW_REG_PACL_LEN]; + + mlxsw_reg_pacl_pack(pacl_pl, region->id, true, + region->tcam_region_info); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(pacl), pacl_pl); +} + +static void +mlxsw_sp_acl_tcam_region_disable(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_region *region) +{ + char pacl_pl[MLXSW_REG_PACL_LEN]; + + mlxsw_reg_pacl_pack(pacl_pl, region->id, false, + region->tcam_region_info); + mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(pacl), pacl_pl); +} + +static struct mlxsw_sp_acl_tcam_region * +mlxsw_sp_acl_tcam_region_create(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam *tcam, + struct mlxsw_sp_acl_tcam_vregion *vregion, + void *hints_priv) +{ + const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops; + struct mlxsw_sp_acl_tcam_region *region; + int err; + + region = kzalloc(sizeof(*region) + ops->region_priv_size, GFP_KERNEL); + if (!region) + return ERR_PTR(-ENOMEM); + region->mlxsw_sp = mlxsw_sp; + region->vregion = vregion; + region->key_info = vregion->key_info; + + err = mlxsw_sp_acl_tcam_region_id_get(tcam, ®ion->id); + if (err) + goto err_region_id_get; + + err = ops->region_associate(mlxsw_sp, region); + if (err) + goto err_tcam_region_associate; + + region->key_type = ops->key_type; + err = mlxsw_sp_acl_tcam_region_alloc(mlxsw_sp, region); + if (err) + goto err_tcam_region_alloc; + + err = mlxsw_sp_acl_tcam_region_enable(mlxsw_sp, region); + if (err) + goto err_tcam_region_enable; + + err = ops->region_init(mlxsw_sp, region->priv, tcam->priv, + region, hints_priv); + if (err) + goto err_tcam_region_init; + + return region; + +err_tcam_region_init: + mlxsw_sp_acl_tcam_region_disable(mlxsw_sp, region); +err_tcam_region_enable: + mlxsw_sp_acl_tcam_region_free(mlxsw_sp, region); +err_tcam_region_alloc: +err_tcam_region_associate: + mlxsw_sp_acl_tcam_region_id_put(tcam, region->id); +err_region_id_get: + kfree(region); + return ERR_PTR(err); +} + +static void +mlxsw_sp_acl_tcam_region_destroy(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_region *region) +{ + const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops; + + ops->region_fini(mlxsw_sp, region->priv); + mlxsw_sp_acl_tcam_region_disable(mlxsw_sp, region); + mlxsw_sp_acl_tcam_region_free(mlxsw_sp, region); + mlxsw_sp_acl_tcam_region_id_put(region->group->tcam, + region->id); + kfree(region); +} + +static void +mlxsw_sp_acl_tcam_vregion_rehash_work_schedule(struct mlxsw_sp_acl_tcam_vregion *vregion) +{ + unsigned long interval = vregion->tcam->vregion_rehash_intrvl; + + if (!interval) + return; + mlxsw_core_schedule_dw(&vregion->rehash.dw, + msecs_to_jiffies(interval)); +} + +static void +mlxsw_sp_acl_tcam_vregion_rehash(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_vregion *vregion, + int *credits); + +static void mlxsw_sp_acl_tcam_vregion_rehash_work(struct work_struct *work) +{ + struct mlxsw_sp_acl_tcam_vregion *vregion = + container_of(work, struct mlxsw_sp_acl_tcam_vregion, + rehash.dw.work); + int credits = MLXSW_SP_ACL_TCAM_VREGION_REHASH_CREDITS; + + mlxsw_sp_acl_tcam_vregion_rehash(vregion->mlxsw_sp, vregion, &credits); + if (credits < 0) + /* Rehash gone out of credits so it was interrupted. + * Schedule the work as soon as possible to continue. + */ + mlxsw_core_schedule_dw(&vregion->rehash.dw, 0); + else + mlxsw_sp_acl_tcam_vregion_rehash_work_schedule(vregion); +} + +static void +mlxsw_sp_acl_tcam_rehash_ctx_vchunk_changed(struct mlxsw_sp_acl_tcam_vchunk *vchunk) +{ + struct mlxsw_sp_acl_tcam_vregion *vregion = vchunk->vregion; + + /* If a rule was added or deleted from vchunk which is currently + * under rehash migration, we have to reset the ventry pointers + * to make sure all rules are properly migrated. + */ + if (vregion->rehash.ctx.current_vchunk == vchunk) { + vregion->rehash.ctx.start_ventry = NULL; + vregion->rehash.ctx.stop_ventry = NULL; + } +} + +static void +mlxsw_sp_acl_tcam_rehash_ctx_vregion_changed(struct mlxsw_sp_acl_tcam_vregion *vregion) +{ + /* If a chunk was added or deleted from vregion we have to reset + * the current chunk pointer to make sure all chunks + * are properly migrated. + */ + vregion->rehash.ctx.current_vchunk = NULL; +} + +static struct mlxsw_sp_acl_tcam_vregion * +mlxsw_sp_acl_tcam_vregion_create(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_vgroup *vgroup, + unsigned int priority, + struct mlxsw_afk_element_usage *elusage) +{ + const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops; + struct mlxsw_afk *afk = mlxsw_sp_acl_afk(mlxsw_sp->acl); + struct mlxsw_sp_acl_tcam *tcam = vgroup->group.tcam; + struct mlxsw_sp_acl_tcam_vregion *vregion; + int err; + + vregion = kzalloc(sizeof(*vregion), GFP_KERNEL); + if (!vregion) + return ERR_PTR(-ENOMEM); + INIT_LIST_HEAD(&vregion->vchunk_list); + mutex_init(&vregion->lock); + vregion->tcam = tcam; + vregion->mlxsw_sp = mlxsw_sp; + vregion->vgroup = vgroup; + vregion->ref_count = 1; + + vregion->key_info = mlxsw_afk_key_info_get(afk, elusage); + if (IS_ERR(vregion->key_info)) { + err = PTR_ERR(vregion->key_info); + goto err_key_info_get; + } + + vregion->region = mlxsw_sp_acl_tcam_region_create(mlxsw_sp, tcam, + vregion, NULL); + if (IS_ERR(vregion->region)) { + err = PTR_ERR(vregion->region); + goto err_region_create; + } + + err = mlxsw_sp_acl_tcam_vgroup_vregion_attach(mlxsw_sp, vgroup, vregion, + priority); + if (err) + goto err_vgroup_vregion_attach; + + if (vgroup->vregion_rehash_enabled && ops->region_rehash_hints_get) { + /* Create the delayed work for vregion periodic rehash */ + INIT_DELAYED_WORK(&vregion->rehash.dw, + mlxsw_sp_acl_tcam_vregion_rehash_work); + mlxsw_sp_acl_tcam_vregion_rehash_work_schedule(vregion); + mutex_lock(&tcam->lock); + list_add_tail(&vregion->tlist, &tcam->vregion_list); + mutex_unlock(&tcam->lock); + } + + return vregion; + +err_vgroup_vregion_attach: + mlxsw_sp_acl_tcam_region_destroy(mlxsw_sp, vregion->region); +err_region_create: + mlxsw_afk_key_info_put(vregion->key_info); +err_key_info_get: + kfree(vregion); + return ERR_PTR(err); +} + +static void +mlxsw_sp_acl_tcam_vregion_destroy(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_vregion *vregion) +{ + const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops; + struct mlxsw_sp_acl_tcam_vgroup *vgroup = vregion->vgroup; + struct mlxsw_sp_acl_tcam *tcam = vregion->tcam; + + if (vgroup->vregion_rehash_enabled && ops->region_rehash_hints_get) { + mutex_lock(&tcam->lock); + list_del(&vregion->tlist); + mutex_unlock(&tcam->lock); + cancel_delayed_work_sync(&vregion->rehash.dw); + } + mlxsw_sp_acl_tcam_vgroup_vregion_detach(mlxsw_sp, vregion); + if (vregion->region2) + mlxsw_sp_acl_tcam_region_destroy(mlxsw_sp, vregion->region2); + mlxsw_sp_acl_tcam_region_destroy(mlxsw_sp, vregion->region); + mlxsw_afk_key_info_put(vregion->key_info); + mutex_destroy(&vregion->lock); + kfree(vregion); +} + +u32 mlxsw_sp_acl_tcam_vregion_rehash_intrvl_get(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam *tcam) +{ + const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops; + u32 vregion_rehash_intrvl; + + if (WARN_ON(!ops->region_rehash_hints_get)) + return 0; + vregion_rehash_intrvl = tcam->vregion_rehash_intrvl; + return vregion_rehash_intrvl; +} + +int mlxsw_sp_acl_tcam_vregion_rehash_intrvl_set(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam *tcam, + u32 val) +{ + const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops; + struct mlxsw_sp_acl_tcam_vregion *vregion; + + if (val < MLXSW_SP_ACL_TCAM_VREGION_REHASH_INTRVL_MIN && val) + return -EINVAL; + if (WARN_ON(!ops->region_rehash_hints_get)) + return -EOPNOTSUPP; + tcam->vregion_rehash_intrvl = val; + mutex_lock(&tcam->lock); + list_for_each_entry(vregion, &tcam->vregion_list, tlist) { + if (val) + mlxsw_core_schedule_dw(&vregion->rehash.dw, 0); + else + cancel_delayed_work_sync(&vregion->rehash.dw); + } + mutex_unlock(&tcam->lock); + return 0; +} + +static struct mlxsw_sp_acl_tcam_vregion * +mlxsw_sp_acl_tcam_vregion_get(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_vgroup *vgroup, + unsigned int priority, + struct mlxsw_afk_element_usage *elusage) +{ + struct mlxsw_afk_element_usage vregion_elusage; + struct mlxsw_sp_acl_tcam_vregion *vregion; + bool need_split; + + vregion = mlxsw_sp_acl_tcam_vgroup_vregion_find(vgroup, priority, + elusage, &need_split); + if (vregion) { + if (need_split) { + /* According to priority, new vchunk should belong to + * an existing vregion. However, this vchunk needs + * elements that vregion does not contain. We need + * to split the existing vregion into two and create + * a new vregion for the new vchunk in between. + * This is not supported now. + */ + return ERR_PTR(-EOPNOTSUPP); + } + vregion->ref_count++; + return vregion; + } + + mlxsw_sp_acl_tcam_vgroup_use_patterns(vgroup, elusage, + &vregion_elusage); + + return mlxsw_sp_acl_tcam_vregion_create(mlxsw_sp, vgroup, priority, + &vregion_elusage); +} + +static void +mlxsw_sp_acl_tcam_vregion_put(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_vregion *vregion) +{ + if (--vregion->ref_count) + return; + mlxsw_sp_acl_tcam_vregion_destroy(mlxsw_sp, vregion); +} + +static struct mlxsw_sp_acl_tcam_chunk * +mlxsw_sp_acl_tcam_chunk_create(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_vchunk *vchunk, + struct mlxsw_sp_acl_tcam_region *region) +{ + const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops; + struct mlxsw_sp_acl_tcam_chunk *chunk; + + chunk = kzalloc(sizeof(*chunk) + ops->chunk_priv_size, GFP_KERNEL); + if (!chunk) + return ERR_PTR(-ENOMEM); + chunk->vchunk = vchunk; + chunk->region = region; + + ops->chunk_init(region->priv, chunk->priv, vchunk->priority); + return chunk; +} + +static void +mlxsw_sp_acl_tcam_chunk_destroy(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_chunk *chunk) +{ + const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops; + + ops->chunk_fini(chunk->priv); + kfree(chunk); +} + +static struct mlxsw_sp_acl_tcam_vchunk * +mlxsw_sp_acl_tcam_vchunk_create(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_vgroup *vgroup, + unsigned int priority, + struct mlxsw_afk_element_usage *elusage) +{ + struct mlxsw_sp_acl_tcam_vchunk *vchunk, *vchunk2; + struct mlxsw_sp_acl_tcam_vregion *vregion; + struct list_head *pos; + int err; + + if (priority == MLXSW_SP_ACL_TCAM_CATCHALL_PRIO) + return ERR_PTR(-EINVAL); + + vchunk = kzalloc(sizeof(*vchunk), GFP_KERNEL); + if (!vchunk) + return ERR_PTR(-ENOMEM); + INIT_LIST_HEAD(&vchunk->ventry_list); + vchunk->priority = priority; + vchunk->vgroup = vgroup; + vchunk->ref_count = 1; + + vregion = mlxsw_sp_acl_tcam_vregion_get(mlxsw_sp, vgroup, + priority, elusage); + if (IS_ERR(vregion)) { + err = PTR_ERR(vregion); + goto err_vregion_get; + } + + vchunk->vregion = vregion; + + err = rhashtable_insert_fast(&vgroup->vchunk_ht, &vchunk->ht_node, + mlxsw_sp_acl_tcam_vchunk_ht_params); + if (err) + goto err_rhashtable_insert; + + mutex_lock(&vregion->lock); + vchunk->chunk = mlxsw_sp_acl_tcam_chunk_create(mlxsw_sp, vchunk, + vchunk->vregion->region); + if (IS_ERR(vchunk->chunk)) { + mutex_unlock(&vregion->lock); + err = PTR_ERR(vchunk->chunk); + goto err_chunk_create; + } + + mlxsw_sp_acl_tcam_rehash_ctx_vregion_changed(vregion); + + /* Position the vchunk inside the list according to priority */ + list_for_each(pos, &vregion->vchunk_list) { + vchunk2 = list_entry(pos, typeof(*vchunk2), list); + if (vchunk2->priority > priority) + break; + } + list_add_tail(&vchunk->list, pos); + mutex_unlock(&vregion->lock); + mlxsw_sp_acl_tcam_vgroup_prio_update(vgroup); + + return vchunk; + +err_chunk_create: + rhashtable_remove_fast(&vgroup->vchunk_ht, &vchunk->ht_node, + mlxsw_sp_acl_tcam_vchunk_ht_params); +err_rhashtable_insert: + mlxsw_sp_acl_tcam_vregion_put(mlxsw_sp, vregion); +err_vregion_get: + kfree(vchunk); + return ERR_PTR(err); +} + +static void +mlxsw_sp_acl_tcam_vchunk_destroy(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_vchunk *vchunk) +{ + struct mlxsw_sp_acl_tcam_vregion *vregion = vchunk->vregion; + struct mlxsw_sp_acl_tcam_vgroup *vgroup = vchunk->vgroup; + + mutex_lock(&vregion->lock); + mlxsw_sp_acl_tcam_rehash_ctx_vregion_changed(vregion); + list_del(&vchunk->list); + if (vchunk->chunk2) + mlxsw_sp_acl_tcam_chunk_destroy(mlxsw_sp, vchunk->chunk2); + mlxsw_sp_acl_tcam_chunk_destroy(mlxsw_sp, vchunk->chunk); + mutex_unlock(&vregion->lock); + rhashtable_remove_fast(&vgroup->vchunk_ht, &vchunk->ht_node, + mlxsw_sp_acl_tcam_vchunk_ht_params); + mlxsw_sp_acl_tcam_vregion_put(mlxsw_sp, vchunk->vregion); + kfree(vchunk); + mlxsw_sp_acl_tcam_vgroup_prio_update(vgroup); +} + +static struct mlxsw_sp_acl_tcam_vchunk * +mlxsw_sp_acl_tcam_vchunk_get(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_vgroup *vgroup, + unsigned int priority, + struct mlxsw_afk_element_usage *elusage) +{ + struct mlxsw_sp_acl_tcam_vchunk *vchunk; + + vchunk = rhashtable_lookup_fast(&vgroup->vchunk_ht, &priority, + mlxsw_sp_acl_tcam_vchunk_ht_params); + if (vchunk) { + if (WARN_ON(!mlxsw_afk_key_info_subset(vchunk->vregion->key_info, + elusage))) + return ERR_PTR(-EINVAL); + vchunk->ref_count++; + return vchunk; + } + return mlxsw_sp_acl_tcam_vchunk_create(mlxsw_sp, vgroup, + priority, elusage); +} + +static void +mlxsw_sp_acl_tcam_vchunk_put(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_vchunk *vchunk) +{ + if (--vchunk->ref_count) + return; + mlxsw_sp_acl_tcam_vchunk_destroy(mlxsw_sp, vchunk); +} + +static struct mlxsw_sp_acl_tcam_entry * +mlxsw_sp_acl_tcam_entry_create(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_ventry *ventry, + struct mlxsw_sp_acl_tcam_chunk *chunk) +{ + const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops; + struct mlxsw_sp_acl_tcam_entry *entry; + int err; + + entry = kzalloc(sizeof(*entry) + ops->entry_priv_size, GFP_KERNEL); + if (!entry) + return ERR_PTR(-ENOMEM); + entry->ventry = ventry; + entry->chunk = chunk; + + err = ops->entry_add(mlxsw_sp, chunk->region->priv, chunk->priv, + entry->priv, ventry->rulei); + if (err) + goto err_entry_add; + + return entry; + +err_entry_add: + kfree(entry); + return ERR_PTR(err); +} + +static void mlxsw_sp_acl_tcam_entry_destroy(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_entry *entry) +{ + const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops; + + ops->entry_del(mlxsw_sp, entry->chunk->region->priv, + entry->chunk->priv, entry->priv); + kfree(entry); +} + +static int +mlxsw_sp_acl_tcam_entry_action_replace(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_region *region, + struct mlxsw_sp_acl_tcam_entry *entry, + struct mlxsw_sp_acl_rule_info *rulei) +{ + const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops; + + return ops->entry_action_replace(mlxsw_sp, region->priv, + entry->priv, rulei); +} + +static int +mlxsw_sp_acl_tcam_entry_activity_get(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_entry *entry, + bool *activity) +{ + const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops; + + return ops->entry_activity_get(mlxsw_sp, entry->chunk->region->priv, + entry->priv, activity); +} + +static int mlxsw_sp_acl_tcam_ventry_add(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_vgroup *vgroup, + struct mlxsw_sp_acl_tcam_ventry *ventry, + struct mlxsw_sp_acl_rule_info *rulei) +{ + struct mlxsw_sp_acl_tcam_vregion *vregion; + struct mlxsw_sp_acl_tcam_vchunk *vchunk; + int err; + + vchunk = mlxsw_sp_acl_tcam_vchunk_get(mlxsw_sp, vgroup, rulei->priority, + &rulei->values.elusage); + if (IS_ERR(vchunk)) + return PTR_ERR(vchunk); + + ventry->vchunk = vchunk; + ventry->rulei = rulei; + vregion = vchunk->vregion; + + mutex_lock(&vregion->lock); + ventry->entry = mlxsw_sp_acl_tcam_entry_create(mlxsw_sp, ventry, + vchunk->chunk); + if (IS_ERR(ventry->entry)) { + mutex_unlock(&vregion->lock); + err = PTR_ERR(ventry->entry); + goto err_entry_create; + } + + list_add_tail(&ventry->list, &vchunk->ventry_list); + mlxsw_sp_acl_tcam_rehash_ctx_vchunk_changed(vchunk); + mutex_unlock(&vregion->lock); + + return 0; + +err_entry_create: + mlxsw_sp_acl_tcam_vchunk_put(mlxsw_sp, vchunk); + return err; +} + +static void mlxsw_sp_acl_tcam_ventry_del(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_ventry *ventry) +{ + struct mlxsw_sp_acl_tcam_vchunk *vchunk = ventry->vchunk; + struct mlxsw_sp_acl_tcam_vregion *vregion = vchunk->vregion; + + mutex_lock(&vregion->lock); + mlxsw_sp_acl_tcam_rehash_ctx_vchunk_changed(vchunk); + list_del(&ventry->list); + mlxsw_sp_acl_tcam_entry_destroy(mlxsw_sp, ventry->entry); + mutex_unlock(&vregion->lock); + mlxsw_sp_acl_tcam_vchunk_put(mlxsw_sp, vchunk); +} + +static int +mlxsw_sp_acl_tcam_ventry_action_replace(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_ventry *ventry, + struct mlxsw_sp_acl_rule_info *rulei) +{ + struct mlxsw_sp_acl_tcam_vchunk *vchunk = ventry->vchunk; + + return mlxsw_sp_acl_tcam_entry_action_replace(mlxsw_sp, + vchunk->vregion->region, + ventry->entry, rulei); +} + +static int +mlxsw_sp_acl_tcam_ventry_activity_get(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_ventry *ventry, + bool *activity) +{ + return mlxsw_sp_acl_tcam_entry_activity_get(mlxsw_sp, + ventry->entry, activity); +} + +static int +mlxsw_sp_acl_tcam_ventry_migrate(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_ventry *ventry, + struct mlxsw_sp_acl_tcam_chunk *chunk, + int *credits) +{ + struct mlxsw_sp_acl_tcam_entry *new_entry; + + /* First check if the entry is not already where we want it to be. */ + if (ventry->entry->chunk == chunk) + return 0; + + if (--(*credits) < 0) + return 0; + + new_entry = mlxsw_sp_acl_tcam_entry_create(mlxsw_sp, ventry, chunk); + if (IS_ERR(new_entry)) + return PTR_ERR(new_entry); + mlxsw_sp_acl_tcam_entry_destroy(mlxsw_sp, ventry->entry); + ventry->entry = new_entry; + return 0; +} + +static int +mlxsw_sp_acl_tcam_vchunk_migrate_start(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_vchunk *vchunk, + struct mlxsw_sp_acl_tcam_region *region, + struct mlxsw_sp_acl_tcam_rehash_ctx *ctx) +{ + struct mlxsw_sp_acl_tcam_chunk *new_chunk; + + new_chunk = mlxsw_sp_acl_tcam_chunk_create(mlxsw_sp, vchunk, region); + if (IS_ERR(new_chunk)) + return PTR_ERR(new_chunk); + vchunk->chunk2 = vchunk->chunk; + vchunk->chunk = new_chunk; + ctx->current_vchunk = vchunk; + ctx->start_ventry = NULL; + ctx->stop_ventry = NULL; + return 0; +} + +static void +mlxsw_sp_acl_tcam_vchunk_migrate_end(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_vchunk *vchunk, + struct mlxsw_sp_acl_tcam_rehash_ctx *ctx) +{ + mlxsw_sp_acl_tcam_chunk_destroy(mlxsw_sp, vchunk->chunk2); + vchunk->chunk2 = NULL; + ctx->current_vchunk = NULL; +} + +static int +mlxsw_sp_acl_tcam_vchunk_migrate_one(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_vchunk *vchunk, + struct mlxsw_sp_acl_tcam_region *region, + struct mlxsw_sp_acl_tcam_rehash_ctx *ctx, + int *credits) +{ + struct mlxsw_sp_acl_tcam_ventry *ventry; + int err; + + if (vchunk->chunk->region != region) { + err = mlxsw_sp_acl_tcam_vchunk_migrate_start(mlxsw_sp, vchunk, + region, ctx); + if (err) + return err; + } else if (!vchunk->chunk2) { + /* The chunk is already as it should be, nothing to do. */ + return 0; + } + + /* If the migration got interrupted, we have the ventry to start from + * stored in context. + */ + if (ctx->start_ventry) + ventry = ctx->start_ventry; + else + ventry = list_first_entry(&vchunk->ventry_list, + typeof(*ventry), list); + + list_for_each_entry_from(ventry, &vchunk->ventry_list, list) { + /* During rollback, once we reach the ventry that failed + * to migrate, we are done. + */ + if (ventry == ctx->stop_ventry) + break; + + err = mlxsw_sp_acl_tcam_ventry_migrate(mlxsw_sp, ventry, + vchunk->chunk, credits); + if (err) { + if (ctx->this_is_rollback) { + /* Save the ventry which we ended with and try + * to continue later on. + */ + ctx->start_ventry = ventry; + return err; + } + /* Swap the chunk and chunk2 pointers so the follow-up + * rollback call will see the original chunk pointer + * in vchunk->chunk. + */ + swap(vchunk->chunk, vchunk->chunk2); + /* The rollback has to be done from beginning of the + * chunk, that is why we have to null the start_ventry. + * However, we know where to stop the rollback, + * at the current ventry. + */ + ctx->start_ventry = NULL; + ctx->stop_ventry = ventry; + return err; + } else if (*credits < 0) { + /* We are out of credits, the rest of the ventries + * will be migrated later. Save the ventry + * which we ended with. + */ + ctx->start_ventry = ventry; + return 0; + } + } + + mlxsw_sp_acl_tcam_vchunk_migrate_end(mlxsw_sp, vchunk, ctx); + return 0; +} + +static int +mlxsw_sp_acl_tcam_vchunk_migrate_all(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_vregion *vregion, + struct mlxsw_sp_acl_tcam_rehash_ctx *ctx, + int *credits) +{ + struct mlxsw_sp_acl_tcam_vchunk *vchunk; + int err; + + /* If the migration got interrupted, we have the vchunk + * we are working on stored in context. + */ + if (ctx->current_vchunk) + vchunk = ctx->current_vchunk; + else + vchunk = list_first_entry(&vregion->vchunk_list, + typeof(*vchunk), list); + + list_for_each_entry_from(vchunk, &vregion->vchunk_list, list) { + err = mlxsw_sp_acl_tcam_vchunk_migrate_one(mlxsw_sp, vchunk, + vregion->region, + ctx, credits); + if (err || *credits < 0) + return err; + } + return 0; +} + +static int +mlxsw_sp_acl_tcam_vregion_migrate(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_vregion *vregion, + struct mlxsw_sp_acl_tcam_rehash_ctx *ctx, + int *credits) +{ + int err, err2; + + trace_mlxsw_sp_acl_tcam_vregion_migrate(mlxsw_sp, vregion); + mutex_lock(&vregion->lock); + err = mlxsw_sp_acl_tcam_vchunk_migrate_all(mlxsw_sp, vregion, + ctx, credits); + if (err) { + /* In case migration was not successful, we need to swap + * so the original region pointer is assigned again + * to vregion->region. + */ + swap(vregion->region, vregion->region2); + ctx->current_vchunk = NULL; + ctx->this_is_rollback = true; + err2 = mlxsw_sp_acl_tcam_vchunk_migrate_all(mlxsw_sp, vregion, + ctx, credits); + if (err2) { + trace_mlxsw_sp_acl_tcam_vregion_rehash_rollback_failed(mlxsw_sp, + vregion); + dev_err(mlxsw_sp->bus_info->dev, "Failed to rollback during vregion migration fail\n"); + /* Let the rollback to be continued later on. */ + } + } + mutex_unlock(&vregion->lock); + trace_mlxsw_sp_acl_tcam_vregion_migrate_end(mlxsw_sp, vregion); + return err; +} + +static bool +mlxsw_sp_acl_tcam_vregion_rehash_in_progress(const struct mlxsw_sp_acl_tcam_rehash_ctx *ctx) +{ + return ctx->hints_priv; +} + +static int +mlxsw_sp_acl_tcam_vregion_rehash_start(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_vregion *vregion, + struct mlxsw_sp_acl_tcam_rehash_ctx *ctx) +{ + const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops; + unsigned int priority = mlxsw_sp_acl_tcam_vregion_prio(vregion); + struct mlxsw_sp_acl_tcam_region *new_region; + void *hints_priv; + int err; + + trace_mlxsw_sp_acl_tcam_vregion_rehash(mlxsw_sp, vregion); + + hints_priv = ops->region_rehash_hints_get(vregion->region->priv); + if (IS_ERR(hints_priv)) + return PTR_ERR(hints_priv); + + new_region = mlxsw_sp_acl_tcam_region_create(mlxsw_sp, vregion->tcam, + vregion, hints_priv); + if (IS_ERR(new_region)) { + err = PTR_ERR(new_region); + goto err_region_create; + } + + /* vregion->region contains the pointer to the new region + * we are going to migrate to. + */ + vregion->region2 = vregion->region; + vregion->region = new_region; + err = mlxsw_sp_acl_tcam_group_region_attach(mlxsw_sp, + vregion->region2->group, + new_region, priority, + vregion->region2); + if (err) + goto err_group_region_attach; + + ctx->hints_priv = hints_priv; + ctx->this_is_rollback = false; + + return 0; + +err_group_region_attach: + vregion->region = vregion->region2; + vregion->region2 = NULL; + mlxsw_sp_acl_tcam_region_destroy(mlxsw_sp, new_region); +err_region_create: + ops->region_rehash_hints_put(hints_priv); + return err; +} + +static void +mlxsw_sp_acl_tcam_vregion_rehash_end(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_vregion *vregion, + struct mlxsw_sp_acl_tcam_rehash_ctx *ctx) +{ + struct mlxsw_sp_acl_tcam_region *unused_region = vregion->region2; + const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops; + + vregion->region2 = NULL; + mlxsw_sp_acl_tcam_group_region_detach(mlxsw_sp, unused_region); + mlxsw_sp_acl_tcam_region_destroy(mlxsw_sp, unused_region); + ops->region_rehash_hints_put(ctx->hints_priv); + ctx->hints_priv = NULL; +} + +static void +mlxsw_sp_acl_tcam_vregion_rehash(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_vregion *vregion, + int *credits) +{ + struct mlxsw_sp_acl_tcam_rehash_ctx *ctx = &vregion->rehash.ctx; + int err; + + /* Check if the previous rehash work was interrupted + * which means we have to continue it now. + * If not, start a new rehash. + */ + if (!mlxsw_sp_acl_tcam_vregion_rehash_in_progress(ctx)) { + err = mlxsw_sp_acl_tcam_vregion_rehash_start(mlxsw_sp, + vregion, ctx); + if (err) { + if (err != -EAGAIN) + dev_err(mlxsw_sp->bus_info->dev, "Failed get rehash hints\n"); + return; + } + } + + err = mlxsw_sp_acl_tcam_vregion_migrate(mlxsw_sp, vregion, + ctx, credits); + if (err) { + dev_err(mlxsw_sp->bus_info->dev, "Failed to migrate vregion\n"); + } + + if (*credits >= 0) + mlxsw_sp_acl_tcam_vregion_rehash_end(mlxsw_sp, vregion, ctx); +} + +static const enum mlxsw_afk_element mlxsw_sp_acl_tcam_pattern_ipv4[] = { + MLXSW_AFK_ELEMENT_SRC_SYS_PORT, + MLXSW_AFK_ELEMENT_DMAC_32_47, + MLXSW_AFK_ELEMENT_DMAC_0_31, + MLXSW_AFK_ELEMENT_SMAC_32_47, + MLXSW_AFK_ELEMENT_SMAC_0_31, + MLXSW_AFK_ELEMENT_ETHERTYPE, + MLXSW_AFK_ELEMENT_IP_PROTO, + MLXSW_AFK_ELEMENT_SRC_IP_0_31, + MLXSW_AFK_ELEMENT_DST_IP_0_31, + MLXSW_AFK_ELEMENT_DST_L4_PORT, + MLXSW_AFK_ELEMENT_SRC_L4_PORT, + MLXSW_AFK_ELEMENT_VID, + MLXSW_AFK_ELEMENT_PCP, + MLXSW_AFK_ELEMENT_TCP_FLAGS, + MLXSW_AFK_ELEMENT_IP_TTL_, + MLXSW_AFK_ELEMENT_IP_ECN, + MLXSW_AFK_ELEMENT_IP_DSCP, +}; + +static const enum mlxsw_afk_element mlxsw_sp_acl_tcam_pattern_ipv6[] = { + MLXSW_AFK_ELEMENT_ETHERTYPE, + MLXSW_AFK_ELEMENT_IP_PROTO, + MLXSW_AFK_ELEMENT_SRC_IP_96_127, + MLXSW_AFK_ELEMENT_SRC_IP_64_95, + MLXSW_AFK_ELEMENT_SRC_IP_32_63, + MLXSW_AFK_ELEMENT_SRC_IP_0_31, + MLXSW_AFK_ELEMENT_DST_IP_96_127, + MLXSW_AFK_ELEMENT_DST_IP_64_95, + MLXSW_AFK_ELEMENT_DST_IP_32_63, + MLXSW_AFK_ELEMENT_DST_IP_0_31, + MLXSW_AFK_ELEMENT_DST_L4_PORT, + MLXSW_AFK_ELEMENT_SRC_L4_PORT, +}; + +static const struct mlxsw_sp_acl_tcam_pattern mlxsw_sp_acl_tcam_patterns[] = { + { + .elements = mlxsw_sp_acl_tcam_pattern_ipv4, + .elements_count = ARRAY_SIZE(mlxsw_sp_acl_tcam_pattern_ipv4), + }, + { + .elements = mlxsw_sp_acl_tcam_pattern_ipv6, + .elements_count = ARRAY_SIZE(mlxsw_sp_acl_tcam_pattern_ipv6), + }, +}; + +#define MLXSW_SP_ACL_TCAM_PATTERNS_COUNT \ + ARRAY_SIZE(mlxsw_sp_acl_tcam_patterns) + +struct mlxsw_sp_acl_tcam_flower_ruleset { + struct mlxsw_sp_acl_tcam_vgroup vgroup; +}; + +struct mlxsw_sp_acl_tcam_flower_rule { + struct mlxsw_sp_acl_tcam_ventry ventry; +}; + +static int +mlxsw_sp_acl_tcam_flower_ruleset_add(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam *tcam, + void *ruleset_priv, + struct mlxsw_afk_element_usage *tmplt_elusage, + unsigned int *p_min_prio, + unsigned int *p_max_prio) +{ + struct mlxsw_sp_acl_tcam_flower_ruleset *ruleset = ruleset_priv; + + return mlxsw_sp_acl_tcam_vgroup_add(mlxsw_sp, tcam, &ruleset->vgroup, + mlxsw_sp_acl_tcam_patterns, + MLXSW_SP_ACL_TCAM_PATTERNS_COUNT, + tmplt_elusage, true, + p_min_prio, p_max_prio); +} + +static void +mlxsw_sp_acl_tcam_flower_ruleset_del(struct mlxsw_sp *mlxsw_sp, + void *ruleset_priv) +{ + struct mlxsw_sp_acl_tcam_flower_ruleset *ruleset = ruleset_priv; + + mlxsw_sp_acl_tcam_vgroup_del(&ruleset->vgroup); +} + +static int +mlxsw_sp_acl_tcam_flower_ruleset_bind(struct mlxsw_sp *mlxsw_sp, + void *ruleset_priv, + struct mlxsw_sp_port *mlxsw_sp_port, + bool ingress) +{ + struct mlxsw_sp_acl_tcam_flower_ruleset *ruleset = ruleset_priv; + + return mlxsw_sp_acl_tcam_group_bind(mlxsw_sp, &ruleset->vgroup.group, + mlxsw_sp_port, ingress); +} + +static void +mlxsw_sp_acl_tcam_flower_ruleset_unbind(struct mlxsw_sp *mlxsw_sp, + void *ruleset_priv, + struct mlxsw_sp_port *mlxsw_sp_port, + bool ingress) +{ + struct mlxsw_sp_acl_tcam_flower_ruleset *ruleset = ruleset_priv; + + mlxsw_sp_acl_tcam_group_unbind(mlxsw_sp, &ruleset->vgroup.group, + mlxsw_sp_port, ingress); +} + +static u16 +mlxsw_sp_acl_tcam_flower_ruleset_group_id(void *ruleset_priv) +{ + struct mlxsw_sp_acl_tcam_flower_ruleset *ruleset = ruleset_priv; + + return mlxsw_sp_acl_tcam_group_id(&ruleset->vgroup.group); +} + +static int +mlxsw_sp_acl_tcam_flower_rule_add(struct mlxsw_sp *mlxsw_sp, + void *ruleset_priv, void *rule_priv, + struct mlxsw_sp_acl_rule_info *rulei) +{ + struct mlxsw_sp_acl_tcam_flower_ruleset *ruleset = ruleset_priv; + struct mlxsw_sp_acl_tcam_flower_rule *rule = rule_priv; + + return mlxsw_sp_acl_tcam_ventry_add(mlxsw_sp, &ruleset->vgroup, + &rule->ventry, rulei); +} + +static void +mlxsw_sp_acl_tcam_flower_rule_del(struct mlxsw_sp *mlxsw_sp, void *rule_priv) +{ + struct mlxsw_sp_acl_tcam_flower_rule *rule = rule_priv; + + mlxsw_sp_acl_tcam_ventry_del(mlxsw_sp, &rule->ventry); +} + +static int +mlxsw_sp_acl_tcam_flower_rule_action_replace(struct mlxsw_sp *mlxsw_sp, + void *rule_priv, + struct mlxsw_sp_acl_rule_info *rulei) +{ + return -EOPNOTSUPP; +} + +static int +mlxsw_sp_acl_tcam_flower_rule_activity_get(struct mlxsw_sp *mlxsw_sp, + void *rule_priv, bool *activity) +{ + struct mlxsw_sp_acl_tcam_flower_rule *rule = rule_priv; + + return mlxsw_sp_acl_tcam_ventry_activity_get(mlxsw_sp, &rule->ventry, + activity); +} + +static const struct mlxsw_sp_acl_profile_ops mlxsw_sp_acl_tcam_flower_ops = { + .ruleset_priv_size = sizeof(struct mlxsw_sp_acl_tcam_flower_ruleset), + .ruleset_add = mlxsw_sp_acl_tcam_flower_ruleset_add, + .ruleset_del = mlxsw_sp_acl_tcam_flower_ruleset_del, + .ruleset_bind = mlxsw_sp_acl_tcam_flower_ruleset_bind, + .ruleset_unbind = mlxsw_sp_acl_tcam_flower_ruleset_unbind, + .ruleset_group_id = mlxsw_sp_acl_tcam_flower_ruleset_group_id, + .rule_priv_size = sizeof(struct mlxsw_sp_acl_tcam_flower_rule), + .rule_add = mlxsw_sp_acl_tcam_flower_rule_add, + .rule_del = mlxsw_sp_acl_tcam_flower_rule_del, + .rule_action_replace = mlxsw_sp_acl_tcam_flower_rule_action_replace, + .rule_activity_get = mlxsw_sp_acl_tcam_flower_rule_activity_get, +}; + +struct mlxsw_sp_acl_tcam_mr_ruleset { + struct mlxsw_sp_acl_tcam_vchunk *vchunk; + struct mlxsw_sp_acl_tcam_vgroup vgroup; +}; + +struct mlxsw_sp_acl_tcam_mr_rule { + struct mlxsw_sp_acl_tcam_ventry ventry; +}; + +static int +mlxsw_sp_acl_tcam_mr_ruleset_add(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam *tcam, + void *ruleset_priv, + struct mlxsw_afk_element_usage *tmplt_elusage, + unsigned int *p_min_prio, + unsigned int *p_max_prio) +{ + struct mlxsw_sp_acl_tcam_mr_ruleset *ruleset = ruleset_priv; + int err; + + err = mlxsw_sp_acl_tcam_vgroup_add(mlxsw_sp, tcam, &ruleset->vgroup, + mlxsw_sp_acl_tcam_patterns, + MLXSW_SP_ACL_TCAM_PATTERNS_COUNT, + tmplt_elusage, false, + p_min_prio, p_max_prio); + if (err) + return err; + + /* For most of the TCAM clients it would make sense to take a tcam chunk + * only when the first rule is written. This is not the case for + * multicast router as it is required to bind the multicast router to a + * specific ACL Group ID which must exist in HW before multicast router + * is initialized. + */ + ruleset->vchunk = mlxsw_sp_acl_tcam_vchunk_get(mlxsw_sp, + &ruleset->vgroup, 1, + tmplt_elusage); + if (IS_ERR(ruleset->vchunk)) { + err = PTR_ERR(ruleset->vchunk); + goto err_chunk_get; + } + + return 0; + +err_chunk_get: + mlxsw_sp_acl_tcam_vgroup_del(&ruleset->vgroup); + return err; +} + +static void +mlxsw_sp_acl_tcam_mr_ruleset_del(struct mlxsw_sp *mlxsw_sp, void *ruleset_priv) +{ + struct mlxsw_sp_acl_tcam_mr_ruleset *ruleset = ruleset_priv; + + mlxsw_sp_acl_tcam_vchunk_put(mlxsw_sp, ruleset->vchunk); + mlxsw_sp_acl_tcam_vgroup_del(&ruleset->vgroup); +} + +static int +mlxsw_sp_acl_tcam_mr_ruleset_bind(struct mlxsw_sp *mlxsw_sp, void *ruleset_priv, + struct mlxsw_sp_port *mlxsw_sp_port, + bool ingress) +{ + /* Binding is done when initializing multicast router */ + return 0; +} + +static void +mlxsw_sp_acl_tcam_mr_ruleset_unbind(struct mlxsw_sp *mlxsw_sp, + void *ruleset_priv, + struct mlxsw_sp_port *mlxsw_sp_port, + bool ingress) +{ +} + +static u16 +mlxsw_sp_acl_tcam_mr_ruleset_group_id(void *ruleset_priv) +{ + struct mlxsw_sp_acl_tcam_mr_ruleset *ruleset = ruleset_priv; + + return mlxsw_sp_acl_tcam_group_id(&ruleset->vgroup.group); +} + +static int +mlxsw_sp_acl_tcam_mr_rule_add(struct mlxsw_sp *mlxsw_sp, void *ruleset_priv, + void *rule_priv, + struct mlxsw_sp_acl_rule_info *rulei) +{ + struct mlxsw_sp_acl_tcam_mr_ruleset *ruleset = ruleset_priv; + struct mlxsw_sp_acl_tcam_mr_rule *rule = rule_priv; + + return mlxsw_sp_acl_tcam_ventry_add(mlxsw_sp, &ruleset->vgroup, + &rule->ventry, rulei); +} + +static void +mlxsw_sp_acl_tcam_mr_rule_del(struct mlxsw_sp *mlxsw_sp, void *rule_priv) +{ + struct mlxsw_sp_acl_tcam_mr_rule *rule = rule_priv; + + mlxsw_sp_acl_tcam_ventry_del(mlxsw_sp, &rule->ventry); +} + +static int +mlxsw_sp_acl_tcam_mr_rule_action_replace(struct mlxsw_sp *mlxsw_sp, + void *rule_priv, + struct mlxsw_sp_acl_rule_info *rulei) +{ + struct mlxsw_sp_acl_tcam_mr_rule *rule = rule_priv; + + return mlxsw_sp_acl_tcam_ventry_action_replace(mlxsw_sp, &rule->ventry, + rulei); +} + +static int +mlxsw_sp_acl_tcam_mr_rule_activity_get(struct mlxsw_sp *mlxsw_sp, + void *rule_priv, bool *activity) +{ + *activity = false; + + return 0; +} + +static const struct mlxsw_sp_acl_profile_ops mlxsw_sp_acl_tcam_mr_ops = { + .ruleset_priv_size = sizeof(struct mlxsw_sp_acl_tcam_mr_ruleset), + .ruleset_add = mlxsw_sp_acl_tcam_mr_ruleset_add, + .ruleset_del = mlxsw_sp_acl_tcam_mr_ruleset_del, + .ruleset_bind = mlxsw_sp_acl_tcam_mr_ruleset_bind, + .ruleset_unbind = mlxsw_sp_acl_tcam_mr_ruleset_unbind, + .ruleset_group_id = mlxsw_sp_acl_tcam_mr_ruleset_group_id, + .rule_priv_size = sizeof(struct mlxsw_sp_acl_tcam_mr_rule), + .rule_add = mlxsw_sp_acl_tcam_mr_rule_add, + .rule_del = mlxsw_sp_acl_tcam_mr_rule_del, + .rule_action_replace = mlxsw_sp_acl_tcam_mr_rule_action_replace, + .rule_activity_get = mlxsw_sp_acl_tcam_mr_rule_activity_get, +}; + +static const struct mlxsw_sp_acl_profile_ops * +mlxsw_sp_acl_tcam_profile_ops_arr[] = { + [MLXSW_SP_ACL_PROFILE_FLOWER] = &mlxsw_sp_acl_tcam_flower_ops, + [MLXSW_SP_ACL_PROFILE_MR] = &mlxsw_sp_acl_tcam_mr_ops, +}; + +const struct mlxsw_sp_acl_profile_ops * +mlxsw_sp_acl_tcam_profile_ops(struct mlxsw_sp *mlxsw_sp, + enum mlxsw_sp_acl_profile profile) +{ + const struct mlxsw_sp_acl_profile_ops *ops; + + if (WARN_ON(profile >= ARRAY_SIZE(mlxsw_sp_acl_tcam_profile_ops_arr))) + return NULL; + ops = mlxsw_sp_acl_tcam_profile_ops_arr[profile]; + if (WARN_ON(!ops)) + return NULL; + return ops; +} diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.h new file mode 100644 index 000000000..edbbc89e7 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.h @@ -0,0 +1,312 @@ +/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 */ +/* Copyright (c) 2017-2018 Mellanox Technologies. All rights reserved */ + +#ifndef _MLXSW_SPECTRUM_ACL_TCAM_H +#define _MLXSW_SPECTRUM_ACL_TCAM_H + +#include <linux/list.h> +#include <linux/parman.h> + +#include "reg.h" +#include "spectrum.h" +#include "core_acl_flex_keys.h" + +struct mlxsw_sp_acl_tcam { + unsigned long *used_regions; /* bit array */ + unsigned int max_regions; + unsigned long *used_groups; /* bit array */ + unsigned int max_groups; + unsigned int max_group_size; + struct mutex lock; /* guards vregion list */ + struct list_head vregion_list; + u32 vregion_rehash_intrvl; /* ms */ + unsigned long priv[]; + /* priv has to be always the last item */ +}; + +size_t mlxsw_sp_acl_tcam_priv_size(struct mlxsw_sp *mlxsw_sp); +int mlxsw_sp_acl_tcam_init(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam *tcam); +void mlxsw_sp_acl_tcam_fini(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam *tcam); +u32 mlxsw_sp_acl_tcam_vregion_rehash_intrvl_get(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam *tcam); +int mlxsw_sp_acl_tcam_vregion_rehash_intrvl_set(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam *tcam, + u32 val); +int mlxsw_sp_acl_tcam_priority_get(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_rule_info *rulei, + u32 *priority, bool fillup_priority); + +struct mlxsw_sp_acl_profile_ops { + size_t ruleset_priv_size; + int (*ruleset_add)(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam *tcam, void *ruleset_priv, + struct mlxsw_afk_element_usage *tmplt_elusage, + unsigned int *p_min_prio, unsigned int *p_max_prio); + void (*ruleset_del)(struct mlxsw_sp *mlxsw_sp, void *ruleset_priv); + int (*ruleset_bind)(struct mlxsw_sp *mlxsw_sp, void *ruleset_priv, + struct mlxsw_sp_port *mlxsw_sp_port, + bool ingress); + void (*ruleset_unbind)(struct mlxsw_sp *mlxsw_sp, void *ruleset_priv, + struct mlxsw_sp_port *mlxsw_sp_port, + bool ingress); + u16 (*ruleset_group_id)(void *ruleset_priv); + size_t rule_priv_size; + int (*rule_add)(struct mlxsw_sp *mlxsw_sp, + void *ruleset_priv, void *rule_priv, + struct mlxsw_sp_acl_rule_info *rulei); + void (*rule_del)(struct mlxsw_sp *mlxsw_sp, void *rule_priv); + int (*rule_action_replace)(struct mlxsw_sp *mlxsw_sp, void *rule_priv, + struct mlxsw_sp_acl_rule_info *rulei); + int (*rule_activity_get)(struct mlxsw_sp *mlxsw_sp, void *rule_priv, + bool *activity); +}; + +const struct mlxsw_sp_acl_profile_ops * +mlxsw_sp_acl_tcam_profile_ops(struct mlxsw_sp *mlxsw_sp, + enum mlxsw_sp_acl_profile profile); + +#define MLXSW_SP_ACL_TCAM_REGION_BASE_COUNT 16 +#define MLXSW_SP_ACL_TCAM_REGION_RESIZE_STEP 16 + +#define MLXSW_SP_ACL_TCAM_CATCHALL_PRIO (~0U) + +#define MLXSW_SP_ACL_TCAM_MASK_LEN \ + (MLXSW_REG_PTCEX_FLEX_KEY_BLOCKS_LEN * BITS_PER_BYTE) + +struct mlxsw_sp_acl_tcam_group; +struct mlxsw_sp_acl_tcam_vregion; + +struct mlxsw_sp_acl_tcam_region { + struct mlxsw_sp_acl_tcam_vregion *vregion; + struct mlxsw_sp_acl_tcam_group *group; + struct list_head list; /* Member of a TCAM group */ + enum mlxsw_reg_ptar_key_type key_type; + u16 id; /* ACL ID and region ID - they are same */ + char tcam_region_info[MLXSW_REG_PXXX_TCAM_REGION_INFO_LEN]; + struct mlxsw_afk_key_info *key_info; + struct mlxsw_sp *mlxsw_sp; + unsigned long priv[]; + /* priv has to be always the last item */ +}; + +struct mlxsw_sp_acl_ctcam_region { + struct parman *parman; + const struct mlxsw_sp_acl_ctcam_region_ops *ops; + struct mlxsw_sp_acl_tcam_region *region; +}; + +struct mlxsw_sp_acl_ctcam_chunk { + struct parman_prio parman_prio; +}; + +struct mlxsw_sp_acl_ctcam_entry { + struct parman_item parman_item; +}; + +struct mlxsw_sp_acl_ctcam_region_ops { + int (*entry_insert)(struct mlxsw_sp_acl_ctcam_region *cregion, + struct mlxsw_sp_acl_ctcam_entry *centry, + const char *mask); + void (*entry_remove)(struct mlxsw_sp_acl_ctcam_region *cregion, + struct mlxsw_sp_acl_ctcam_entry *centry); +}; + +int +mlxsw_sp_acl_ctcam_region_init(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_ctcam_region *cregion, + struct mlxsw_sp_acl_tcam_region *region, + const struct mlxsw_sp_acl_ctcam_region_ops *ops); +void mlxsw_sp_acl_ctcam_region_fini(struct mlxsw_sp_acl_ctcam_region *cregion); +void mlxsw_sp_acl_ctcam_chunk_init(struct mlxsw_sp_acl_ctcam_region *cregion, + struct mlxsw_sp_acl_ctcam_chunk *cchunk, + unsigned int priority); +void mlxsw_sp_acl_ctcam_chunk_fini(struct mlxsw_sp_acl_ctcam_chunk *cchunk); +int mlxsw_sp_acl_ctcam_entry_add(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_ctcam_region *cregion, + struct mlxsw_sp_acl_ctcam_chunk *cchunk, + struct mlxsw_sp_acl_ctcam_entry *centry, + struct mlxsw_sp_acl_rule_info *rulei, + bool fillup_priority); +void mlxsw_sp_acl_ctcam_entry_del(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_ctcam_region *cregion, + struct mlxsw_sp_acl_ctcam_chunk *cchunk, + struct mlxsw_sp_acl_ctcam_entry *centry); +int mlxsw_sp_acl_ctcam_entry_action_replace(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_ctcam_region *cregion, + struct mlxsw_sp_acl_ctcam_entry *centry, + struct mlxsw_sp_acl_rule_info *rulei); +static inline unsigned int +mlxsw_sp_acl_ctcam_entry_offset(struct mlxsw_sp_acl_ctcam_entry *centry) +{ + return centry->parman_item.index; +} + +enum mlxsw_sp_acl_atcam_region_type { + MLXSW_SP_ACL_ATCAM_REGION_TYPE_2KB, + MLXSW_SP_ACL_ATCAM_REGION_TYPE_4KB, + MLXSW_SP_ACL_ATCAM_REGION_TYPE_8KB, + MLXSW_SP_ACL_ATCAM_REGION_TYPE_12KB, + __MLXSW_SP_ACL_ATCAM_REGION_TYPE_MAX, +}; + +#define MLXSW_SP_ACL_ATCAM_REGION_TYPE_MAX \ + (__MLXSW_SP_ACL_ATCAM_REGION_TYPE_MAX - 1) + +struct mlxsw_sp_acl_atcam { + struct mlxsw_sp_acl_erp_core *erp_core; +}; + +struct mlxsw_sp_acl_atcam_region { + struct rhashtable entries_ht; /* A-TCAM only */ + struct list_head entries_list; /* A-TCAM only */ + struct mlxsw_sp_acl_ctcam_region cregion; + const struct mlxsw_sp_acl_atcam_region_ops *ops; + struct mlxsw_sp_acl_tcam_region *region; + struct mlxsw_sp_acl_atcam *atcam; + enum mlxsw_sp_acl_atcam_region_type type; + struct mlxsw_sp_acl_erp_table *erp_table; + void *priv; +}; + +struct mlxsw_sp_acl_atcam_entry_ht_key { + char full_enc_key[MLXSW_REG_PTCEX_FLEX_KEY_BLOCKS_LEN]; /* Encoded + * key. + */ + u8 erp_id; +}; + +struct mlxsw_sp_acl_atcam_chunk { + struct mlxsw_sp_acl_ctcam_chunk cchunk; +}; + +struct mlxsw_sp_acl_atcam_entry { + struct rhash_head ht_node; + struct list_head list; /* Member in entries_list */ + struct mlxsw_sp_acl_atcam_entry_ht_key ht_key; + char enc_key[MLXSW_REG_PTCEX_FLEX_KEY_BLOCKS_LEN]; /* Encoded key, + * minus delta bits. + */ + struct { + u16 start; + u8 mask; + u8 value; + } delta_info; + struct mlxsw_sp_acl_ctcam_entry centry; + struct mlxsw_sp_acl_atcam_lkey_id *lkey_id; + struct mlxsw_sp_acl_erp_mask *erp_mask; +}; + +static inline struct mlxsw_sp_acl_atcam_region * +mlxsw_sp_acl_tcam_cregion_aregion(struct mlxsw_sp_acl_ctcam_region *cregion) +{ + return container_of(cregion, struct mlxsw_sp_acl_atcam_region, cregion); +} + +static inline struct mlxsw_sp_acl_atcam_entry * +mlxsw_sp_acl_tcam_centry_aentry(struct mlxsw_sp_acl_ctcam_entry *centry) +{ + return container_of(centry, struct mlxsw_sp_acl_atcam_entry, centry); +} + +int mlxsw_sp_acl_atcam_region_associate(struct mlxsw_sp *mlxsw_sp, + u16 region_id); +int +mlxsw_sp_acl_atcam_region_init(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_atcam *atcam, + struct mlxsw_sp_acl_atcam_region *aregion, + struct mlxsw_sp_acl_tcam_region *region, + void *hints_priv, + const struct mlxsw_sp_acl_ctcam_region_ops *ops); +void mlxsw_sp_acl_atcam_region_fini(struct mlxsw_sp_acl_atcam_region *aregion); +void mlxsw_sp_acl_atcam_chunk_init(struct mlxsw_sp_acl_atcam_region *aregion, + struct mlxsw_sp_acl_atcam_chunk *achunk, + unsigned int priority); +void mlxsw_sp_acl_atcam_chunk_fini(struct mlxsw_sp_acl_atcam_chunk *achunk); +int mlxsw_sp_acl_atcam_entry_add(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_atcam_region *aregion, + struct mlxsw_sp_acl_atcam_chunk *achunk, + struct mlxsw_sp_acl_atcam_entry *aentry, + struct mlxsw_sp_acl_rule_info *rulei); +void mlxsw_sp_acl_atcam_entry_del(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_atcam_region *aregion, + struct mlxsw_sp_acl_atcam_chunk *achunk, + struct mlxsw_sp_acl_atcam_entry *aentry); +int mlxsw_sp_acl_atcam_entry_action_replace(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_atcam_region *aregion, + struct mlxsw_sp_acl_atcam_entry *aentry, + struct mlxsw_sp_acl_rule_info *rulei); +int mlxsw_sp_acl_atcam_init(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_atcam *atcam); +void mlxsw_sp_acl_atcam_fini(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_atcam *atcam); +void * +mlxsw_sp_acl_atcam_rehash_hints_get(struct mlxsw_sp_acl_atcam_region *aregion); +void mlxsw_sp_acl_atcam_rehash_hints_put(void *hints_priv); + +struct mlxsw_sp_acl_erp_delta; + +u16 mlxsw_sp_acl_erp_delta_start(const struct mlxsw_sp_acl_erp_delta *delta); +u8 mlxsw_sp_acl_erp_delta_mask(const struct mlxsw_sp_acl_erp_delta *delta); +u8 mlxsw_sp_acl_erp_delta_value(const struct mlxsw_sp_acl_erp_delta *delta, + const char *enc_key); +void mlxsw_sp_acl_erp_delta_clear(const struct mlxsw_sp_acl_erp_delta *delta, + const char *enc_key); + +struct mlxsw_sp_acl_erp_mask; + +bool +mlxsw_sp_acl_erp_mask_is_ctcam(const struct mlxsw_sp_acl_erp_mask *erp_mask); +u8 mlxsw_sp_acl_erp_mask_erp_id(const struct mlxsw_sp_acl_erp_mask *erp_mask); +const struct mlxsw_sp_acl_erp_delta * +mlxsw_sp_acl_erp_delta(const struct mlxsw_sp_acl_erp_mask *erp_mask); +struct mlxsw_sp_acl_erp_mask * +mlxsw_sp_acl_erp_mask_get(struct mlxsw_sp_acl_atcam_region *aregion, + const char *mask, bool ctcam); +void mlxsw_sp_acl_erp_mask_put(struct mlxsw_sp_acl_atcam_region *aregion, + struct mlxsw_sp_acl_erp_mask *erp_mask); +int mlxsw_sp_acl_erp_bf_insert(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_atcam_region *aregion, + struct mlxsw_sp_acl_erp_mask *erp_mask, + struct mlxsw_sp_acl_atcam_entry *aentry); +void mlxsw_sp_acl_erp_bf_remove(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_atcam_region *aregion, + struct mlxsw_sp_acl_erp_mask *erp_mask, + struct mlxsw_sp_acl_atcam_entry *aentry); +void * +mlxsw_sp_acl_erp_rehash_hints_get(struct mlxsw_sp_acl_atcam_region *aregion); +void mlxsw_sp_acl_erp_rehash_hints_put(void *hints_priv); +int mlxsw_sp_acl_erp_region_init(struct mlxsw_sp_acl_atcam_region *aregion, + void *hints_priv); +void mlxsw_sp_acl_erp_region_fini(struct mlxsw_sp_acl_atcam_region *aregion); +int mlxsw_sp_acl_erps_init(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_atcam *atcam); +void mlxsw_sp_acl_erps_fini(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_atcam *atcam); + +struct mlxsw_sp_acl_bf; + +struct mlxsw_sp_acl_bf_ops { + unsigned int (*index_get)(struct mlxsw_sp_acl_bf *bf, + struct mlxsw_sp_acl_atcam_region *aregion, + struct mlxsw_sp_acl_atcam_entry *aentry); +}; + +int +mlxsw_sp_acl_bf_entry_add(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_bf *bf, + struct mlxsw_sp_acl_atcam_region *aregion, + unsigned int erp_bank, + struct mlxsw_sp_acl_atcam_entry *aentry); +void +mlxsw_sp_acl_bf_entry_del(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_bf *bf, + struct mlxsw_sp_acl_atcam_region *aregion, + unsigned int erp_bank, + struct mlxsw_sp_acl_atcam_entry *aentry); +struct mlxsw_sp_acl_bf * +mlxsw_sp_acl_bf_init(struct mlxsw_sp *mlxsw_sp, unsigned int num_erp_banks); +void mlxsw_sp_acl_bf_fini(struct mlxsw_sp_acl_bf *bf); + +#endif diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c new file mode 100644 index 000000000..c9f1c79f3 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c @@ -0,0 +1,1786 @@ +// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 +/* Copyright (c) 2015-2018 Mellanox Technologies. All rights reserved */ + +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/dcbnl.h> +#include <linux/if_ether.h> +#include <linux/list.h> +#include <linux/netlink.h> + +#include "spectrum.h" +#include "core.h" +#include "port.h" +#include "reg.h" + +struct mlxsw_sp_sb_pr { + enum mlxsw_reg_sbpr_mode mode; + u32 size; + u8 freeze_mode:1, + freeze_size:1; +}; + +struct mlxsw_cp_sb_occ { + u32 cur; + u32 max; +}; + +struct mlxsw_sp_sb_cm { + u32 min_buff; + u32 max_buff; + u16 pool_index; + struct mlxsw_cp_sb_occ occ; + u8 freeze_pool:1, + freeze_thresh:1; +}; + +#define MLXSW_SP_SB_INFI -1U +#define MLXSW_SP_SB_REST -2U + +struct mlxsw_sp_sb_pm { + u32 min_buff; + u32 max_buff; + struct mlxsw_cp_sb_occ occ; +}; + +struct mlxsw_sp_sb_mm { + u32 min_buff; + u32 max_buff; + u16 pool_index; +}; + +struct mlxsw_sp_sb_pool_des { + enum mlxsw_reg_sbxx_dir dir; + u8 pool; +}; + +#define MLXSW_SP_SB_POOL_ING 0 +#define MLXSW_SP_SB_POOL_EGR 4 +#define MLXSW_SP_SB_POOL_EGR_MC 8 +#define MLXSW_SP_SB_POOL_ING_CPU 9 +#define MLXSW_SP_SB_POOL_EGR_CPU 10 + +static const struct mlxsw_sp_sb_pool_des mlxsw_sp1_sb_pool_dess[] = { + {MLXSW_REG_SBXX_DIR_INGRESS, 0}, + {MLXSW_REG_SBXX_DIR_INGRESS, 1}, + {MLXSW_REG_SBXX_DIR_INGRESS, 2}, + {MLXSW_REG_SBXX_DIR_INGRESS, 3}, + {MLXSW_REG_SBXX_DIR_EGRESS, 0}, + {MLXSW_REG_SBXX_DIR_EGRESS, 1}, + {MLXSW_REG_SBXX_DIR_EGRESS, 2}, + {MLXSW_REG_SBXX_DIR_EGRESS, 3}, + {MLXSW_REG_SBXX_DIR_EGRESS, 15}, + {MLXSW_REG_SBXX_DIR_INGRESS, 4}, + {MLXSW_REG_SBXX_DIR_EGRESS, 4}, +}; + +static const struct mlxsw_sp_sb_pool_des mlxsw_sp2_sb_pool_dess[] = { + {MLXSW_REG_SBXX_DIR_INGRESS, 0}, + {MLXSW_REG_SBXX_DIR_INGRESS, 1}, + {MLXSW_REG_SBXX_DIR_INGRESS, 2}, + {MLXSW_REG_SBXX_DIR_INGRESS, 3}, + {MLXSW_REG_SBXX_DIR_EGRESS, 0}, + {MLXSW_REG_SBXX_DIR_EGRESS, 1}, + {MLXSW_REG_SBXX_DIR_EGRESS, 2}, + {MLXSW_REG_SBXX_DIR_EGRESS, 3}, + {MLXSW_REG_SBXX_DIR_EGRESS, 15}, + {MLXSW_REG_SBXX_DIR_INGRESS, 4}, + {MLXSW_REG_SBXX_DIR_EGRESS, 4}, +}; + +#define MLXSW_SP_SB_ING_TC_COUNT 8 +#define MLXSW_SP_SB_EG_TC_COUNT 16 + +struct mlxsw_sp_sb_port { + struct mlxsw_sp_sb_cm ing_cms[MLXSW_SP_SB_ING_TC_COUNT]; + struct mlxsw_sp_sb_cm eg_cms[MLXSW_SP_SB_EG_TC_COUNT]; + struct mlxsw_sp_sb_pm *pms; +}; + +struct mlxsw_sp_sb { + struct mlxsw_sp_sb_pr *prs; + struct mlxsw_sp_sb_port *ports; + u32 cell_size; + u32 max_headroom_cells; + u64 sb_size; +}; + +struct mlxsw_sp_sb_vals { + unsigned int pool_count; + const struct mlxsw_sp_sb_pool_des *pool_dess; + const struct mlxsw_sp_sb_pm *pms; + const struct mlxsw_sp_sb_pm *pms_cpu; + const struct mlxsw_sp_sb_pr *prs; + const struct mlxsw_sp_sb_mm *mms; + const struct mlxsw_sp_sb_cm *cms_ingress; + const struct mlxsw_sp_sb_cm *cms_egress; + const struct mlxsw_sp_sb_cm *cms_cpu; + unsigned int mms_count; + unsigned int cms_ingress_count; + unsigned int cms_egress_count; + unsigned int cms_cpu_count; +}; + +struct mlxsw_sp_sb_ops { + u32 (*int_buf_size_get)(int mtu, u32 speed); +}; + +u32 mlxsw_sp_cells_bytes(const struct mlxsw_sp *mlxsw_sp, u32 cells) +{ + return mlxsw_sp->sb->cell_size * cells; +} + +u32 mlxsw_sp_bytes_cells(const struct mlxsw_sp *mlxsw_sp, u32 bytes) +{ + return DIV_ROUND_UP(bytes, mlxsw_sp->sb->cell_size); +} + +static u32 mlxsw_sp_port_headroom_8x_adjust(const struct mlxsw_sp_port *mlxsw_sp_port, + u32 size_cells) +{ + /* Ports with eight lanes use two headroom buffers between which the + * configured headroom size is split. Therefore, multiply the calculated + * headroom size by two. + */ + return mlxsw_sp_port->mapping.width == 8 ? 2 * size_cells : size_cells; +} + +static struct mlxsw_sp_sb_pr *mlxsw_sp_sb_pr_get(struct mlxsw_sp *mlxsw_sp, + u16 pool_index) +{ + return &mlxsw_sp->sb->prs[pool_index]; +} + +static bool mlxsw_sp_sb_cm_exists(u8 pg_buff, enum mlxsw_reg_sbxx_dir dir) +{ + if (dir == MLXSW_REG_SBXX_DIR_INGRESS) + return pg_buff < MLXSW_SP_SB_ING_TC_COUNT; + else + return pg_buff < MLXSW_SP_SB_EG_TC_COUNT; +} + +static struct mlxsw_sp_sb_cm *mlxsw_sp_sb_cm_get(struct mlxsw_sp *mlxsw_sp, + u16 local_port, u8 pg_buff, + enum mlxsw_reg_sbxx_dir dir) +{ + struct mlxsw_sp_sb_port *sb_port = &mlxsw_sp->sb->ports[local_port]; + + WARN_ON(!mlxsw_sp_sb_cm_exists(pg_buff, dir)); + if (dir == MLXSW_REG_SBXX_DIR_INGRESS) + return &sb_port->ing_cms[pg_buff]; + else + return &sb_port->eg_cms[pg_buff]; +} + +static struct mlxsw_sp_sb_pm *mlxsw_sp_sb_pm_get(struct mlxsw_sp *mlxsw_sp, + u16 local_port, u16 pool_index) +{ + return &mlxsw_sp->sb->ports[local_port].pms[pool_index]; +} + +static int mlxsw_sp_sb_pr_write(struct mlxsw_sp *mlxsw_sp, u16 pool_index, + enum mlxsw_reg_sbpr_mode mode, + u32 size, bool infi_size) +{ + const struct mlxsw_sp_sb_pool_des *des = + &mlxsw_sp->sb_vals->pool_dess[pool_index]; + char sbpr_pl[MLXSW_REG_SBPR_LEN]; + struct mlxsw_sp_sb_pr *pr; + int err; + + mlxsw_reg_sbpr_pack(sbpr_pl, des->pool, des->dir, mode, + size, infi_size); + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbpr), sbpr_pl); + if (err) + return err; + + if (infi_size) + size = mlxsw_sp_bytes_cells(mlxsw_sp, mlxsw_sp->sb->sb_size); + pr = mlxsw_sp_sb_pr_get(mlxsw_sp, pool_index); + pr->mode = mode; + pr->size = size; + return 0; +} + +static int mlxsw_sp_sb_pr_desc_write(struct mlxsw_sp *mlxsw_sp, + enum mlxsw_reg_sbxx_dir dir, + enum mlxsw_reg_sbpr_mode mode, + u32 size, bool infi_size) +{ + char sbpr_pl[MLXSW_REG_SBPR_LEN]; + + /* The FW default descriptor buffer configuration uses only pool 14 for + * descriptors. + */ + mlxsw_reg_sbpr_pack(sbpr_pl, 14, dir, mode, size, infi_size); + mlxsw_reg_sbpr_desc_set(sbpr_pl, true); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbpr), sbpr_pl); +} + +static int mlxsw_sp_sb_cm_write(struct mlxsw_sp *mlxsw_sp, u16 local_port, + u8 pg_buff, u32 min_buff, u32 max_buff, + bool infi_max, u16 pool_index) +{ + const struct mlxsw_sp_sb_pool_des *des = + &mlxsw_sp->sb_vals->pool_dess[pool_index]; + char sbcm_pl[MLXSW_REG_SBCM_LEN]; + struct mlxsw_sp_sb_cm *cm; + int err; + + mlxsw_reg_sbcm_pack(sbcm_pl, local_port, pg_buff, des->dir, + min_buff, max_buff, infi_max, des->pool); + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbcm), sbcm_pl); + if (err) + return err; + + if (mlxsw_sp_sb_cm_exists(pg_buff, des->dir)) { + if (infi_max) + max_buff = mlxsw_sp_bytes_cells(mlxsw_sp, + mlxsw_sp->sb->sb_size); + + cm = mlxsw_sp_sb_cm_get(mlxsw_sp, local_port, pg_buff, + des->dir); + cm->min_buff = min_buff; + cm->max_buff = max_buff; + cm->pool_index = pool_index; + } + return 0; +} + +static int mlxsw_sp_sb_pm_write(struct mlxsw_sp *mlxsw_sp, u16 local_port, + u16 pool_index, u32 min_buff, u32 max_buff) +{ + const struct mlxsw_sp_sb_pool_des *des = + &mlxsw_sp->sb_vals->pool_dess[pool_index]; + char sbpm_pl[MLXSW_REG_SBPM_LEN]; + struct mlxsw_sp_sb_pm *pm; + int err; + + mlxsw_reg_sbpm_pack(sbpm_pl, local_port, des->pool, des->dir, false, + min_buff, max_buff); + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbpm), sbpm_pl); + if (err) + return err; + + pm = mlxsw_sp_sb_pm_get(mlxsw_sp, local_port, pool_index); + pm->min_buff = min_buff; + pm->max_buff = max_buff; + return 0; +} + +static int mlxsw_sp_sb_pm_occ_clear(struct mlxsw_sp *mlxsw_sp, u16 local_port, + u16 pool_index, struct list_head *bulk_list) +{ + const struct mlxsw_sp_sb_pool_des *des = + &mlxsw_sp->sb_vals->pool_dess[pool_index]; + char sbpm_pl[MLXSW_REG_SBPM_LEN]; + + if (local_port == MLXSW_PORT_CPU_PORT && + des->dir == MLXSW_REG_SBXX_DIR_INGRESS) + return 0; + + mlxsw_reg_sbpm_pack(sbpm_pl, local_port, des->pool, des->dir, + true, 0, 0); + return mlxsw_reg_trans_query(mlxsw_sp->core, MLXSW_REG(sbpm), sbpm_pl, + bulk_list, NULL, 0); +} + +static void mlxsw_sp_sb_pm_occ_query_cb(struct mlxsw_core *mlxsw_core, + char *sbpm_pl, size_t sbpm_pl_len, + unsigned long cb_priv) +{ + struct mlxsw_sp_sb_pm *pm = (struct mlxsw_sp_sb_pm *) cb_priv; + + mlxsw_reg_sbpm_unpack(sbpm_pl, &pm->occ.cur, &pm->occ.max); +} + +static int mlxsw_sp_sb_pm_occ_query(struct mlxsw_sp *mlxsw_sp, u16 local_port, + u16 pool_index, struct list_head *bulk_list) +{ + const struct mlxsw_sp_sb_pool_des *des = + &mlxsw_sp->sb_vals->pool_dess[pool_index]; + char sbpm_pl[MLXSW_REG_SBPM_LEN]; + struct mlxsw_sp_sb_pm *pm; + + if (local_port == MLXSW_PORT_CPU_PORT && + des->dir == MLXSW_REG_SBXX_DIR_INGRESS) + return 0; + + pm = mlxsw_sp_sb_pm_get(mlxsw_sp, local_port, pool_index); + mlxsw_reg_sbpm_pack(sbpm_pl, local_port, des->pool, des->dir, + false, 0, 0); + return mlxsw_reg_trans_query(mlxsw_sp->core, MLXSW_REG(sbpm), sbpm_pl, + bulk_list, + mlxsw_sp_sb_pm_occ_query_cb, + (unsigned long) pm); +} + +void mlxsw_sp_hdroom_prios_reset_buf_idx(struct mlxsw_sp_hdroom *hdroom) +{ + int prio; + + for (prio = 0; prio < IEEE_8021QAZ_MAX_TCS; prio++) { + switch (hdroom->mode) { + case MLXSW_SP_HDROOM_MODE_DCB: + hdroom->prios.prio[prio].buf_idx = hdroom->prios.prio[prio].ets_buf_idx; + break; + case MLXSW_SP_HDROOM_MODE_TC: + hdroom->prios.prio[prio].buf_idx = hdroom->prios.prio[prio].set_buf_idx; + break; + } + } +} + +void mlxsw_sp_hdroom_bufs_reset_lossiness(struct mlxsw_sp_hdroom *hdroom) +{ + int prio; + int i; + + for (i = 0; i < DCBX_MAX_BUFFERS; i++) + hdroom->bufs.buf[i].lossy = true; + + for (prio = 0; prio < IEEE_8021Q_MAX_PRIORITIES; prio++) { + if (!hdroom->prios.prio[prio].lossy) + hdroom->bufs.buf[hdroom->prios.prio[prio].buf_idx].lossy = false; + } +} + +static u16 mlxsw_sp_hdroom_buf_threshold_get(const struct mlxsw_sp *mlxsw_sp, int mtu) +{ + return 2 * mlxsw_sp_bytes_cells(mlxsw_sp, mtu); +} + +static void mlxsw_sp_hdroom_buf_pack(char *pbmc_pl, int index, u16 size, u16 thres, bool lossy) +{ + if (lossy) + mlxsw_reg_pbmc_lossy_buffer_pack(pbmc_pl, index, size); + else + mlxsw_reg_pbmc_lossless_buffer_pack(pbmc_pl, index, size, + thres); +} + +static u16 mlxsw_sp_hdroom_buf_delay_get(const struct mlxsw_sp *mlxsw_sp, + const struct mlxsw_sp_hdroom *hdroom) +{ + u16 delay_cells; + + delay_cells = mlxsw_sp_bytes_cells(mlxsw_sp, hdroom->delay_bytes); + + /* In the worst case scenario the delay will be made up of packets that + * are all of size CELL_SIZE + 1, which means each packet will require + * almost twice its true size when buffered in the switch. We therefore + * multiply this value by the "cell factor", which is close to 2. + * + * Another MTU is added in case the transmitting host already started + * transmitting a maximum length frame when the PFC packet was received. + */ + return 2 * delay_cells + mlxsw_sp_bytes_cells(mlxsw_sp, hdroom->mtu); +} + +static u32 mlxsw_sp_hdroom_int_buf_size_get(struct mlxsw_sp *mlxsw_sp, int mtu, u32 speed) +{ + u32 buffsize = mlxsw_sp->sb_ops->int_buf_size_get(mtu, speed); + + return mlxsw_sp_bytes_cells(mlxsw_sp, buffsize) + 1; +} + +static bool mlxsw_sp_hdroom_buf_is_used(const struct mlxsw_sp_hdroom *hdroom, int buf) +{ + int prio; + + for (prio = 0; prio < IEEE_8021QAZ_MAX_TCS; prio++) { + if (hdroom->prios.prio[prio].buf_idx == buf) + return true; + } + return false; +} + +void mlxsw_sp_hdroom_bufs_reset_sizes(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_hdroom *hdroom) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + u16 reserve_cells; + int i; + + /* Internal buffer. */ + reserve_cells = mlxsw_sp_hdroom_int_buf_size_get(mlxsw_sp, mlxsw_sp_port->max_mtu, + mlxsw_sp_port->max_speed); + reserve_cells = mlxsw_sp_port_headroom_8x_adjust(mlxsw_sp_port, reserve_cells); + hdroom->int_buf.reserve_cells = reserve_cells; + + if (hdroom->int_buf.enable) + hdroom->int_buf.size_cells = reserve_cells; + else + hdroom->int_buf.size_cells = 0; + + /* PG buffers. */ + for (i = 0; i < DCBX_MAX_BUFFERS; i++) { + struct mlxsw_sp_hdroom_buf *buf = &hdroom->bufs.buf[i]; + u16 thres_cells; + u16 delay_cells; + + if (!mlxsw_sp_hdroom_buf_is_used(hdroom, i)) { + thres_cells = 0; + delay_cells = 0; + } else if (buf->lossy) { + thres_cells = mlxsw_sp_hdroom_buf_threshold_get(mlxsw_sp, hdroom->mtu); + delay_cells = 0; + } else { + thres_cells = mlxsw_sp_hdroom_buf_threshold_get(mlxsw_sp, hdroom->mtu); + delay_cells = mlxsw_sp_hdroom_buf_delay_get(mlxsw_sp, hdroom); + } + + thres_cells = mlxsw_sp_port_headroom_8x_adjust(mlxsw_sp_port, thres_cells); + delay_cells = mlxsw_sp_port_headroom_8x_adjust(mlxsw_sp_port, delay_cells); + + buf->thres_cells = thres_cells; + if (hdroom->mode == MLXSW_SP_HDROOM_MODE_DCB) { + buf->size_cells = thres_cells + delay_cells; + } else { + /* Do not allow going below the minimum size, even if + * the user requested it. + */ + buf->size_cells = max(buf->set_size_cells, buf->thres_cells); + } + } +} + +#define MLXSW_SP_PB_UNUSED 8 + +static int mlxsw_sp_hdroom_configure_buffers(struct mlxsw_sp_port *mlxsw_sp_port, + const struct mlxsw_sp_hdroom *hdroom, bool force) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + char pbmc_pl[MLXSW_REG_PBMC_LEN]; + bool dirty; + int err; + int i; + + dirty = memcmp(&mlxsw_sp_port->hdroom->bufs, &hdroom->bufs, sizeof(hdroom->bufs)); + if (!dirty && !force) + return 0; + + mlxsw_reg_pbmc_pack(pbmc_pl, mlxsw_sp_port->local_port, 0xffff, 0xffff / 2); + for (i = 0; i < MLXSW_SP_PB_COUNT; i++) { + const struct mlxsw_sp_hdroom_buf *buf = &hdroom->bufs.buf[i]; + + if (i == MLXSW_SP_PB_UNUSED) + continue; + + mlxsw_sp_hdroom_buf_pack(pbmc_pl, i, buf->size_cells, buf->thres_cells, buf->lossy); + } + + mlxsw_reg_pbmc_lossy_buffer_pack(pbmc_pl, MLXSW_REG_PBMC_PORT_SHARED_BUF_IDX, 0); + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(pbmc), pbmc_pl); + if (err) + return err; + + mlxsw_sp_port->hdroom->bufs = hdroom->bufs; + return 0; +} + +static int mlxsw_sp_hdroom_configure_priomap(struct mlxsw_sp_port *mlxsw_sp_port, + const struct mlxsw_sp_hdroom *hdroom, bool force) +{ + char pptb_pl[MLXSW_REG_PPTB_LEN]; + bool dirty; + int prio; + int err; + + dirty = memcmp(&mlxsw_sp_port->hdroom->prios, &hdroom->prios, sizeof(hdroom->prios)); + if (!dirty && !force) + return 0; + + mlxsw_reg_pptb_pack(pptb_pl, mlxsw_sp_port->local_port); + for (prio = 0; prio < IEEE_8021QAZ_MAX_TCS; prio++) + mlxsw_reg_pptb_prio_to_buff_pack(pptb_pl, prio, hdroom->prios.prio[prio].buf_idx); + + err = mlxsw_reg_write(mlxsw_sp_port->mlxsw_sp->core, MLXSW_REG(pptb), pptb_pl); + if (err) + return err; + + mlxsw_sp_port->hdroom->prios = hdroom->prios; + return 0; +} + +static int mlxsw_sp_hdroom_configure_int_buf(struct mlxsw_sp_port *mlxsw_sp_port, + const struct mlxsw_sp_hdroom *hdroom, bool force) +{ + char sbib_pl[MLXSW_REG_SBIB_LEN]; + bool dirty; + int err; + + dirty = memcmp(&mlxsw_sp_port->hdroom->int_buf, &hdroom->int_buf, sizeof(hdroom->int_buf)); + if (!dirty && !force) + return 0; + + mlxsw_reg_sbib_pack(sbib_pl, mlxsw_sp_port->local_port, hdroom->int_buf.size_cells); + err = mlxsw_reg_write(mlxsw_sp_port->mlxsw_sp->core, MLXSW_REG(sbib), sbib_pl); + if (err) + return err; + + mlxsw_sp_port->hdroom->int_buf = hdroom->int_buf; + return 0; +} + +static bool mlxsw_sp_hdroom_bufs_fit(struct mlxsw_sp *mlxsw_sp, + const struct mlxsw_sp_hdroom *hdroom) +{ + u32 taken_headroom_cells = 0; + int i; + + for (i = 0; i < MLXSW_SP_PB_COUNT; i++) + taken_headroom_cells += hdroom->bufs.buf[i].size_cells; + + taken_headroom_cells += hdroom->int_buf.reserve_cells; + return taken_headroom_cells <= mlxsw_sp->sb->max_headroom_cells; +} + +static int __mlxsw_sp_hdroom_configure(struct mlxsw_sp_port *mlxsw_sp_port, + const struct mlxsw_sp_hdroom *hdroom, bool force) +{ + struct mlxsw_sp_hdroom orig_hdroom; + struct mlxsw_sp_hdroom tmp_hdroom; + int err; + int i; + + /* Port buffers need to be configured in three steps. First, all buffers + * with non-zero size are configured. Then, prio-to-buffer map is + * updated, allowing traffic to flow to the now non-zero buffers. + * Finally, zero-sized buffers are configured, because now no traffic + * should be directed to them anymore. This way, in a non-congested + * system, no packet drops are introduced by the reconfiguration. + */ + + orig_hdroom = *mlxsw_sp_port->hdroom; + tmp_hdroom = orig_hdroom; + for (i = 0; i < MLXSW_SP_PB_COUNT; i++) { + if (hdroom->bufs.buf[i].size_cells) + tmp_hdroom.bufs.buf[i] = hdroom->bufs.buf[i]; + } + + if (!mlxsw_sp_hdroom_bufs_fit(mlxsw_sp_port->mlxsw_sp, &tmp_hdroom) || + !mlxsw_sp_hdroom_bufs_fit(mlxsw_sp_port->mlxsw_sp, hdroom)) + return -ENOBUFS; + + err = mlxsw_sp_hdroom_configure_buffers(mlxsw_sp_port, &tmp_hdroom, force); + if (err) + return err; + + err = mlxsw_sp_hdroom_configure_priomap(mlxsw_sp_port, hdroom, force); + if (err) + goto err_configure_priomap; + + err = mlxsw_sp_hdroom_configure_buffers(mlxsw_sp_port, hdroom, false); + if (err) + goto err_configure_buffers; + + err = mlxsw_sp_hdroom_configure_int_buf(mlxsw_sp_port, hdroom, false); + if (err) + goto err_configure_int_buf; + + *mlxsw_sp_port->hdroom = *hdroom; + return 0; + +err_configure_int_buf: + mlxsw_sp_hdroom_configure_buffers(mlxsw_sp_port, &tmp_hdroom, false); +err_configure_buffers: + mlxsw_sp_hdroom_configure_priomap(mlxsw_sp_port, &tmp_hdroom, false); +err_configure_priomap: + mlxsw_sp_hdroom_configure_buffers(mlxsw_sp_port, &orig_hdroom, false); + return err; +} + +int mlxsw_sp_hdroom_configure(struct mlxsw_sp_port *mlxsw_sp_port, + const struct mlxsw_sp_hdroom *hdroom) +{ + return __mlxsw_sp_hdroom_configure(mlxsw_sp_port, hdroom, false); +} + +static int mlxsw_sp_port_headroom_init(struct mlxsw_sp_port *mlxsw_sp_port) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + struct mlxsw_sp_hdroom hdroom = {}; + u32 size9; + int prio; + + hdroom.mtu = mlxsw_sp_port->dev->mtu; + hdroom.mode = MLXSW_SP_HDROOM_MODE_DCB; + for (prio = 0; prio < IEEE_8021QAZ_MAX_TCS; prio++) + hdroom.prios.prio[prio].lossy = true; + + mlxsw_sp_hdroom_bufs_reset_lossiness(&hdroom); + mlxsw_sp_hdroom_bufs_reset_sizes(mlxsw_sp_port, &hdroom); + + /* Buffer 9 is used for control traffic. */ + size9 = mlxsw_sp_port_headroom_8x_adjust(mlxsw_sp_port, mlxsw_sp_port->max_mtu); + hdroom.bufs.buf[9].size_cells = mlxsw_sp_bytes_cells(mlxsw_sp, size9); + + return __mlxsw_sp_hdroom_configure(mlxsw_sp_port, &hdroom, true); +} + +static int mlxsw_sp_sb_port_init(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_sb_port *sb_port) +{ + struct mlxsw_sp_sb_pm *pms; + + pms = kcalloc(mlxsw_sp->sb_vals->pool_count, sizeof(*pms), + GFP_KERNEL); + if (!pms) + return -ENOMEM; + sb_port->pms = pms; + return 0; +} + +static void mlxsw_sp_sb_port_fini(struct mlxsw_sp_sb_port *sb_port) +{ + kfree(sb_port->pms); +} + +static int mlxsw_sp_sb_ports_init(struct mlxsw_sp *mlxsw_sp) +{ + unsigned int max_ports = mlxsw_core_max_ports(mlxsw_sp->core); + struct mlxsw_sp_sb_pr *prs; + int i; + int err; + + mlxsw_sp->sb->ports = kcalloc(max_ports, + sizeof(struct mlxsw_sp_sb_port), + GFP_KERNEL); + if (!mlxsw_sp->sb->ports) + return -ENOMEM; + + prs = kcalloc(mlxsw_sp->sb_vals->pool_count, sizeof(*prs), + GFP_KERNEL); + if (!prs) { + err = -ENOMEM; + goto err_alloc_prs; + } + mlxsw_sp->sb->prs = prs; + + for (i = 0; i < max_ports; i++) { + err = mlxsw_sp_sb_port_init(mlxsw_sp, &mlxsw_sp->sb->ports[i]); + if (err) + goto err_sb_port_init; + } + + return 0; + +err_sb_port_init: + for (i--; i >= 0; i--) + mlxsw_sp_sb_port_fini(&mlxsw_sp->sb->ports[i]); + kfree(mlxsw_sp->sb->prs); +err_alloc_prs: + kfree(mlxsw_sp->sb->ports); + return err; +} + +static void mlxsw_sp_sb_ports_fini(struct mlxsw_sp *mlxsw_sp) +{ + int max_ports = mlxsw_core_max_ports(mlxsw_sp->core); + int i; + + for (i = max_ports - 1; i >= 0; i--) + mlxsw_sp_sb_port_fini(&mlxsw_sp->sb->ports[i]); + kfree(mlxsw_sp->sb->prs); + kfree(mlxsw_sp->sb->ports); +} + +#define MLXSW_SP_SB_PR(_mode, _size) \ + { \ + .mode = _mode, \ + .size = _size, \ + } + +#define MLXSW_SP_SB_PR_EXT(_mode, _size, _freeze_mode, _freeze_size) \ + { \ + .mode = _mode, \ + .size = _size, \ + .freeze_mode = _freeze_mode, \ + .freeze_size = _freeze_size, \ + } + +#define MLXSW_SP1_SB_PR_CPU_SIZE (256 * 1000) + +/* Order according to mlxsw_sp1_sb_pool_dess */ +static const struct mlxsw_sp_sb_pr mlxsw_sp1_sb_prs[] = { + MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, MLXSW_SP_SB_REST), + MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, 0), + MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, 0), + MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, 0), + MLXSW_SP_SB_PR_EXT(MLXSW_REG_SBPR_MODE_DYNAMIC, MLXSW_SP_SB_REST, + true, false), + MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, 0), + MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, 0), + MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, 0), + MLXSW_SP_SB_PR_EXT(MLXSW_REG_SBPR_MODE_STATIC, MLXSW_SP_SB_INFI, + true, true), + MLXSW_SP_SB_PR_EXT(MLXSW_REG_SBPR_MODE_DYNAMIC, + MLXSW_SP1_SB_PR_CPU_SIZE, true, false), + MLXSW_SP_SB_PR_EXT(MLXSW_REG_SBPR_MODE_DYNAMIC, + MLXSW_SP1_SB_PR_CPU_SIZE, true, false), +}; + +#define MLXSW_SP2_SB_PR_CPU_SIZE (256 * 1000) + +/* Order according to mlxsw_sp2_sb_pool_dess */ +static const struct mlxsw_sp_sb_pr mlxsw_sp2_sb_prs[] = { + MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, MLXSW_SP_SB_REST), + MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_STATIC, 0), + MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_STATIC, 0), + MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_STATIC, 0), + MLXSW_SP_SB_PR_EXT(MLXSW_REG_SBPR_MODE_DYNAMIC, MLXSW_SP_SB_REST, + true, false), + MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_STATIC, 0), + MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_STATIC, 0), + MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_STATIC, 0), + MLXSW_SP_SB_PR_EXT(MLXSW_REG_SBPR_MODE_STATIC, MLXSW_SP_SB_INFI, + true, true), + MLXSW_SP_SB_PR_EXT(MLXSW_REG_SBPR_MODE_DYNAMIC, + MLXSW_SP2_SB_PR_CPU_SIZE, true, false), + MLXSW_SP_SB_PR_EXT(MLXSW_REG_SBPR_MODE_DYNAMIC, + MLXSW_SP2_SB_PR_CPU_SIZE, true, false), +}; + +static int mlxsw_sp_sb_prs_init(struct mlxsw_sp *mlxsw_sp, + const struct mlxsw_sp_sb_pr *prs, + const struct mlxsw_sp_sb_pool_des *pool_dess, + size_t prs_len) +{ + /* Round down, unlike mlxsw_sp_bytes_cells(). */ + u32 sb_cells = div_u64(mlxsw_sp->sb->sb_size, mlxsw_sp->sb->cell_size); + u32 rest_cells[2] = {sb_cells, sb_cells}; + int i; + int err; + + /* Calculate how much space to give to the "REST" pools in either + * direction. + */ + for (i = 0; i < prs_len; i++) { + enum mlxsw_reg_sbxx_dir dir = pool_dess[i].dir; + u32 size = prs[i].size; + u32 size_cells; + + if (size == MLXSW_SP_SB_INFI || size == MLXSW_SP_SB_REST) + continue; + + size_cells = mlxsw_sp_bytes_cells(mlxsw_sp, size); + if (WARN_ON_ONCE(size_cells > rest_cells[dir])) + continue; + + rest_cells[dir] -= size_cells; + } + + for (i = 0; i < prs_len; i++) { + u32 size = prs[i].size; + u32 size_cells; + + if (size == MLXSW_SP_SB_INFI) { + err = mlxsw_sp_sb_pr_write(mlxsw_sp, i, prs[i].mode, + 0, true); + } else if (size == MLXSW_SP_SB_REST) { + size_cells = rest_cells[pool_dess[i].dir]; + err = mlxsw_sp_sb_pr_write(mlxsw_sp, i, prs[i].mode, + size_cells, false); + } else { + size_cells = mlxsw_sp_bytes_cells(mlxsw_sp, size); + err = mlxsw_sp_sb_pr_write(mlxsw_sp, i, prs[i].mode, + size_cells, false); + } + if (err) + return err; + } + + err = mlxsw_sp_sb_pr_desc_write(mlxsw_sp, MLXSW_REG_SBXX_DIR_INGRESS, + MLXSW_REG_SBPR_MODE_DYNAMIC, 0, true); + if (err) + return err; + + err = mlxsw_sp_sb_pr_desc_write(mlxsw_sp, MLXSW_REG_SBXX_DIR_EGRESS, + MLXSW_REG_SBPR_MODE_DYNAMIC, 0, true); + if (err) + return err; + + return 0; +} + +#define MLXSW_SP_SB_CM(_min_buff, _max_buff, _pool) \ + { \ + .min_buff = _min_buff, \ + .max_buff = _max_buff, \ + .pool_index = _pool, \ + } + +#define MLXSW_SP_SB_CM_ING(_min_buff, _max_buff) \ + { \ + .min_buff = _min_buff, \ + .max_buff = _max_buff, \ + .pool_index = MLXSW_SP_SB_POOL_ING, \ + } + +#define MLXSW_SP_SB_CM_EGR(_min_buff, _max_buff) \ + { \ + .min_buff = _min_buff, \ + .max_buff = _max_buff, \ + .pool_index = MLXSW_SP_SB_POOL_EGR, \ + } + +#define MLXSW_SP_SB_CM_EGR_MC(_min_buff, _max_buff) \ + { \ + .min_buff = _min_buff, \ + .max_buff = _max_buff, \ + .pool_index = MLXSW_SP_SB_POOL_EGR_MC, \ + .freeze_pool = true, \ + .freeze_thresh = true, \ + } + +static const struct mlxsw_sp_sb_cm mlxsw_sp1_sb_cms_ingress[] = { + MLXSW_SP_SB_CM_ING(10000, 8), + MLXSW_SP_SB_CM_ING(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN), + MLXSW_SP_SB_CM_ING(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN), + MLXSW_SP_SB_CM_ING(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN), + MLXSW_SP_SB_CM_ING(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN), + MLXSW_SP_SB_CM_ING(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN), + MLXSW_SP_SB_CM_ING(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN), + MLXSW_SP_SB_CM_ING(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN), + MLXSW_SP_SB_CM_ING(0, 0), /* dummy, this PG does not exist */ + MLXSW_SP_SB_CM(10000, 8, MLXSW_SP_SB_POOL_ING_CPU), +}; + +static const struct mlxsw_sp_sb_cm mlxsw_sp2_sb_cms_ingress[] = { + MLXSW_SP_SB_CM_ING(0, 7), + MLXSW_SP_SB_CM_ING(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN), + MLXSW_SP_SB_CM_ING(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN), + MLXSW_SP_SB_CM_ING(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN), + MLXSW_SP_SB_CM_ING(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN), + MLXSW_SP_SB_CM_ING(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN), + MLXSW_SP_SB_CM_ING(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN), + MLXSW_SP_SB_CM_ING(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN), + MLXSW_SP_SB_CM_ING(0, 0), /* dummy, this PG does not exist */ + MLXSW_SP_SB_CM(10000, 8, MLXSW_SP_SB_POOL_ING_CPU), +}; + +static const struct mlxsw_sp_sb_cm mlxsw_sp1_sb_cms_egress[] = { + MLXSW_SP_SB_CM_EGR(1500, 9), + MLXSW_SP_SB_CM_EGR(1500, 9), + MLXSW_SP_SB_CM_EGR(1500, 9), + MLXSW_SP_SB_CM_EGR(1500, 9), + MLXSW_SP_SB_CM_EGR(1500, 9), + MLXSW_SP_SB_CM_EGR(1500, 9), + MLXSW_SP_SB_CM_EGR(1500, 9), + MLXSW_SP_SB_CM_EGR(1500, 9), + MLXSW_SP_SB_CM_EGR_MC(0, MLXSW_SP_SB_INFI), + MLXSW_SP_SB_CM_EGR_MC(0, MLXSW_SP_SB_INFI), + MLXSW_SP_SB_CM_EGR_MC(0, MLXSW_SP_SB_INFI), + MLXSW_SP_SB_CM_EGR_MC(0, MLXSW_SP_SB_INFI), + MLXSW_SP_SB_CM_EGR_MC(0, MLXSW_SP_SB_INFI), + MLXSW_SP_SB_CM_EGR_MC(0, MLXSW_SP_SB_INFI), + MLXSW_SP_SB_CM_EGR_MC(0, MLXSW_SP_SB_INFI), + MLXSW_SP_SB_CM_EGR_MC(0, MLXSW_SP_SB_INFI), + MLXSW_SP_SB_CM_EGR(1, 0xff), +}; + +static const struct mlxsw_sp_sb_cm mlxsw_sp2_sb_cms_egress[] = { + MLXSW_SP_SB_CM_EGR(0, 7), + MLXSW_SP_SB_CM_EGR(0, 7), + MLXSW_SP_SB_CM_EGR(0, 7), + MLXSW_SP_SB_CM_EGR(0, 7), + MLXSW_SP_SB_CM_EGR(0, 7), + MLXSW_SP_SB_CM_EGR(0, 7), + MLXSW_SP_SB_CM_EGR(0, 7), + MLXSW_SP_SB_CM_EGR(0, 7), + MLXSW_SP_SB_CM_EGR_MC(0, MLXSW_SP_SB_INFI), + MLXSW_SP_SB_CM_EGR_MC(0, MLXSW_SP_SB_INFI), + MLXSW_SP_SB_CM_EGR_MC(0, MLXSW_SP_SB_INFI), + MLXSW_SP_SB_CM_EGR_MC(0, MLXSW_SP_SB_INFI), + MLXSW_SP_SB_CM_EGR_MC(0, MLXSW_SP_SB_INFI), + MLXSW_SP_SB_CM_EGR_MC(0, MLXSW_SP_SB_INFI), + MLXSW_SP_SB_CM_EGR_MC(0, MLXSW_SP_SB_INFI), + MLXSW_SP_SB_CM_EGR_MC(0, MLXSW_SP_SB_INFI), + MLXSW_SP_SB_CM_EGR(1, 0xff), +}; + +#define MLXSW_SP_CPU_PORT_SB_CM MLXSW_SP_SB_CM(0, 0, MLXSW_SP_SB_POOL_EGR_CPU) + +static const struct mlxsw_sp_sb_cm mlxsw_sp_cpu_port_sb_cms[] = { + MLXSW_SP_SB_CM(1000, 8, MLXSW_SP_SB_POOL_EGR_CPU), + MLXSW_SP_SB_CM(1000, 8, MLXSW_SP_SB_POOL_EGR_CPU), + MLXSW_SP_SB_CM(1000, 8, MLXSW_SP_SB_POOL_EGR_CPU), + MLXSW_SP_SB_CM(1000, 8, MLXSW_SP_SB_POOL_EGR_CPU), + MLXSW_SP_SB_CM(1000, 8, MLXSW_SP_SB_POOL_EGR_CPU), + MLXSW_SP_SB_CM(1000, 8, MLXSW_SP_SB_POOL_EGR_CPU), + MLXSW_SP_CPU_PORT_SB_CM, + MLXSW_SP_SB_CM(1000, 8, MLXSW_SP_SB_POOL_EGR_CPU), + MLXSW_SP_CPU_PORT_SB_CM, + MLXSW_SP_CPU_PORT_SB_CM, + MLXSW_SP_CPU_PORT_SB_CM, + MLXSW_SP_CPU_PORT_SB_CM, + MLXSW_SP_CPU_PORT_SB_CM, + MLXSW_SP_CPU_PORT_SB_CM, + MLXSW_SP_CPU_PORT_SB_CM, + MLXSW_SP_CPU_PORT_SB_CM, + MLXSW_SP_CPU_PORT_SB_CM, + MLXSW_SP_CPU_PORT_SB_CM, + MLXSW_SP_CPU_PORT_SB_CM, + MLXSW_SP_CPU_PORT_SB_CM, + MLXSW_SP_CPU_PORT_SB_CM, + MLXSW_SP_CPU_PORT_SB_CM, + MLXSW_SP_CPU_PORT_SB_CM, + MLXSW_SP_CPU_PORT_SB_CM, + MLXSW_SP_CPU_PORT_SB_CM, + MLXSW_SP_CPU_PORT_SB_CM, + MLXSW_SP_CPU_PORT_SB_CM, + MLXSW_SP_CPU_PORT_SB_CM, + MLXSW_SP_CPU_PORT_SB_CM, + MLXSW_SP_CPU_PORT_SB_CM, + MLXSW_SP_CPU_PORT_SB_CM, + MLXSW_SP_CPU_PORT_SB_CM, +}; + +static bool +mlxsw_sp_sb_pool_is_static(struct mlxsw_sp *mlxsw_sp, u16 pool_index) +{ + struct mlxsw_sp_sb_pr *pr = mlxsw_sp_sb_pr_get(mlxsw_sp, pool_index); + + return pr->mode == MLXSW_REG_SBPR_MODE_STATIC; +} + +static int __mlxsw_sp_sb_cms_init(struct mlxsw_sp *mlxsw_sp, u16 local_port, + enum mlxsw_reg_sbxx_dir dir, + const struct mlxsw_sp_sb_cm *cms, + size_t cms_len) +{ + const struct mlxsw_sp_sb_vals *sb_vals = mlxsw_sp->sb_vals; + int i; + int err; + + for (i = 0; i < cms_len; i++) { + const struct mlxsw_sp_sb_cm *cm; + u32 min_buff; + u32 max_buff; + + if (i == 8 && dir == MLXSW_REG_SBXX_DIR_INGRESS) + continue; /* PG number 8 does not exist, skip it */ + cm = &cms[i]; + if (WARN_ON(sb_vals->pool_dess[cm->pool_index].dir != dir)) + continue; + + min_buff = mlxsw_sp_bytes_cells(mlxsw_sp, cm->min_buff); + max_buff = cm->max_buff; + if (max_buff == MLXSW_SP_SB_INFI) { + err = mlxsw_sp_sb_cm_write(mlxsw_sp, local_port, i, + min_buff, 0, + true, cm->pool_index); + } else { + if (mlxsw_sp_sb_pool_is_static(mlxsw_sp, + cm->pool_index)) + max_buff = mlxsw_sp_bytes_cells(mlxsw_sp, + max_buff); + err = mlxsw_sp_sb_cm_write(mlxsw_sp, local_port, i, + min_buff, max_buff, + false, cm->pool_index); + } + if (err) + return err; + } + return 0; +} + +static int mlxsw_sp_port_sb_cms_init(struct mlxsw_sp_port *mlxsw_sp_port) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + int err; + + err = __mlxsw_sp_sb_cms_init(mlxsw_sp, + mlxsw_sp_port->local_port, + MLXSW_REG_SBXX_DIR_INGRESS, + mlxsw_sp->sb_vals->cms_ingress, + mlxsw_sp->sb_vals->cms_ingress_count); + if (err) + return err; + return __mlxsw_sp_sb_cms_init(mlxsw_sp_port->mlxsw_sp, + mlxsw_sp_port->local_port, + MLXSW_REG_SBXX_DIR_EGRESS, + mlxsw_sp->sb_vals->cms_egress, + mlxsw_sp->sb_vals->cms_egress_count); +} + +static int mlxsw_sp_cpu_port_sb_cms_init(struct mlxsw_sp *mlxsw_sp) +{ + return __mlxsw_sp_sb_cms_init(mlxsw_sp, 0, MLXSW_REG_SBXX_DIR_EGRESS, + mlxsw_sp->sb_vals->cms_cpu, + mlxsw_sp->sb_vals->cms_cpu_count); +} + +#define MLXSW_SP_SB_PM(_min_buff, _max_buff) \ + { \ + .min_buff = _min_buff, \ + .max_buff = _max_buff, \ + } + +/* Order according to mlxsw_sp1_sb_pool_dess */ +static const struct mlxsw_sp_sb_pm mlxsw_sp1_sb_pms[] = { + MLXSW_SP_SB_PM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MAX), + MLXSW_SP_SB_PM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN), + MLXSW_SP_SB_PM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN), + MLXSW_SP_SB_PM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN), + MLXSW_SP_SB_PM(0, 7), + MLXSW_SP_SB_PM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN), + MLXSW_SP_SB_PM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN), + MLXSW_SP_SB_PM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN), + MLXSW_SP_SB_PM(10000, 90000), + MLXSW_SP_SB_PM(0, 8), /* 50% occupancy */ + MLXSW_SP_SB_PM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN), +}; + +/* Order according to mlxsw_sp2_sb_pool_dess */ +static const struct mlxsw_sp_sb_pm mlxsw_sp2_sb_pms[] = { + MLXSW_SP_SB_PM(0, 7), + MLXSW_SP_SB_PM(0, 0), + MLXSW_SP_SB_PM(0, 0), + MLXSW_SP_SB_PM(0, 0), + MLXSW_SP_SB_PM(0, 7), + MLXSW_SP_SB_PM(0, 0), + MLXSW_SP_SB_PM(0, 0), + MLXSW_SP_SB_PM(0, 0), + MLXSW_SP_SB_PM(10000, 90000), + MLXSW_SP_SB_PM(0, 8), /* 50% occupancy */ + MLXSW_SP_SB_PM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN), +}; + +/* Order according to mlxsw_sp*_sb_pool_dess */ +static const struct mlxsw_sp_sb_pm mlxsw_sp_cpu_port_sb_pms[] = { + MLXSW_SP_SB_PM(0, 0), + MLXSW_SP_SB_PM(0, 0), + MLXSW_SP_SB_PM(0, 0), + MLXSW_SP_SB_PM(0, 0), + MLXSW_SP_SB_PM(0, 0), + MLXSW_SP_SB_PM(0, 0), + MLXSW_SP_SB_PM(0, 0), + MLXSW_SP_SB_PM(0, 0), + MLXSW_SP_SB_PM(0, 90000), + MLXSW_SP_SB_PM(0, 0), + MLXSW_SP_SB_PM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MAX), +}; + +static int mlxsw_sp_sb_pms_init(struct mlxsw_sp *mlxsw_sp, u16 local_port, + const struct mlxsw_sp_sb_pm *pms, + bool skip_ingress) +{ + int i, err; + + for (i = 0; i < mlxsw_sp->sb_vals->pool_count; i++) { + const struct mlxsw_sp_sb_pm *pm = &pms[i]; + const struct mlxsw_sp_sb_pool_des *des; + u32 max_buff; + u32 min_buff; + + des = &mlxsw_sp->sb_vals->pool_dess[i]; + if (skip_ingress && des->dir == MLXSW_REG_SBXX_DIR_INGRESS) + continue; + + min_buff = mlxsw_sp_bytes_cells(mlxsw_sp, pm->min_buff); + max_buff = pm->max_buff; + if (mlxsw_sp_sb_pool_is_static(mlxsw_sp, i)) + max_buff = mlxsw_sp_bytes_cells(mlxsw_sp, max_buff); + err = mlxsw_sp_sb_pm_write(mlxsw_sp, local_port, i, min_buff, + max_buff); + if (err) + return err; + } + return 0; +} + +static int mlxsw_sp_port_sb_pms_init(struct mlxsw_sp_port *mlxsw_sp_port) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + + return mlxsw_sp_sb_pms_init(mlxsw_sp, mlxsw_sp_port->local_port, + mlxsw_sp->sb_vals->pms, false); +} + +static int mlxsw_sp_cpu_port_sb_pms_init(struct mlxsw_sp *mlxsw_sp) +{ + return mlxsw_sp_sb_pms_init(mlxsw_sp, 0, mlxsw_sp->sb_vals->pms_cpu, + true); +} + +#define MLXSW_SP_SB_MM(_min_buff, _max_buff) \ + { \ + .min_buff = _min_buff, \ + .max_buff = _max_buff, \ + .pool_index = MLXSW_SP_SB_POOL_EGR, \ + } + +static const struct mlxsw_sp_sb_mm mlxsw_sp_sb_mms[] = { + MLXSW_SP_SB_MM(0, 6), + MLXSW_SP_SB_MM(0, 6), + MLXSW_SP_SB_MM(0, 6), + MLXSW_SP_SB_MM(0, 6), + MLXSW_SP_SB_MM(0, 6), + MLXSW_SP_SB_MM(0, 6), + MLXSW_SP_SB_MM(0, 6), + MLXSW_SP_SB_MM(0, 6), + MLXSW_SP_SB_MM(0, 6), + MLXSW_SP_SB_MM(0, 6), + MLXSW_SP_SB_MM(0, 6), + MLXSW_SP_SB_MM(0, 6), + MLXSW_SP_SB_MM(0, 6), + MLXSW_SP_SB_MM(0, 6), + MLXSW_SP_SB_MM(0, 6), +}; + +static int mlxsw_sp_sb_mms_init(struct mlxsw_sp *mlxsw_sp) +{ + char sbmm_pl[MLXSW_REG_SBMM_LEN]; + int i; + int err; + + for (i = 0; i < mlxsw_sp->sb_vals->mms_count; i++) { + const struct mlxsw_sp_sb_pool_des *des; + const struct mlxsw_sp_sb_mm *mc; + u32 min_buff; + + mc = &mlxsw_sp->sb_vals->mms[i]; + des = &mlxsw_sp->sb_vals->pool_dess[mc->pool_index]; + /* All pools used by sb_mm's are initialized using dynamic + * thresholds, therefore 'max_buff' isn't specified in cells. + */ + min_buff = mlxsw_sp_bytes_cells(mlxsw_sp, mc->min_buff); + mlxsw_reg_sbmm_pack(sbmm_pl, i, min_buff, mc->max_buff, + des->pool); + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbmm), sbmm_pl); + if (err) + return err; + } + return 0; +} + +static void mlxsw_sp_pool_count(struct mlxsw_sp *mlxsw_sp, + u16 *p_ingress_len, u16 *p_egress_len) +{ + int i; + + for (i = 0; i < mlxsw_sp->sb_vals->pool_count; ++i) { + if (mlxsw_sp->sb_vals->pool_dess[i].dir == + MLXSW_REG_SBXX_DIR_INGRESS) + (*p_ingress_len)++; + else + (*p_egress_len)++; + } + + WARN(*p_egress_len == 0, "No egress pools\n"); +} + +const struct mlxsw_sp_sb_vals mlxsw_sp1_sb_vals = { + .pool_count = ARRAY_SIZE(mlxsw_sp1_sb_pool_dess), + .pool_dess = mlxsw_sp1_sb_pool_dess, + .pms = mlxsw_sp1_sb_pms, + .pms_cpu = mlxsw_sp_cpu_port_sb_pms, + .prs = mlxsw_sp1_sb_prs, + .mms = mlxsw_sp_sb_mms, + .cms_ingress = mlxsw_sp1_sb_cms_ingress, + .cms_egress = mlxsw_sp1_sb_cms_egress, + .cms_cpu = mlxsw_sp_cpu_port_sb_cms, + .mms_count = ARRAY_SIZE(mlxsw_sp_sb_mms), + .cms_ingress_count = ARRAY_SIZE(mlxsw_sp1_sb_cms_ingress), + .cms_egress_count = ARRAY_SIZE(mlxsw_sp1_sb_cms_egress), + .cms_cpu_count = ARRAY_SIZE(mlxsw_sp_cpu_port_sb_cms), +}; + +const struct mlxsw_sp_sb_vals mlxsw_sp2_sb_vals = { + .pool_count = ARRAY_SIZE(mlxsw_sp2_sb_pool_dess), + .pool_dess = mlxsw_sp2_sb_pool_dess, + .pms = mlxsw_sp2_sb_pms, + .pms_cpu = mlxsw_sp_cpu_port_sb_pms, + .prs = mlxsw_sp2_sb_prs, + .mms = mlxsw_sp_sb_mms, + .cms_ingress = mlxsw_sp2_sb_cms_ingress, + .cms_egress = mlxsw_sp2_sb_cms_egress, + .cms_cpu = mlxsw_sp_cpu_port_sb_cms, + .mms_count = ARRAY_SIZE(mlxsw_sp_sb_mms), + .cms_ingress_count = ARRAY_SIZE(mlxsw_sp2_sb_cms_ingress), + .cms_egress_count = ARRAY_SIZE(mlxsw_sp2_sb_cms_egress), + .cms_cpu_count = ARRAY_SIZE(mlxsw_sp_cpu_port_sb_cms), +}; + +static u32 mlxsw_sp1_pb_int_buf_size_get(int mtu, u32 speed) +{ + return mtu * 5 / 2; +} + +static u32 __mlxsw_sp_pb_int_buf_size_get(int mtu, u32 speed, u32 buffer_factor) +{ + return 3 * mtu + buffer_factor * speed / 1000; +} + +#define MLXSW_SP2_SPAN_EG_MIRROR_BUFFER_FACTOR 38 + +static u32 mlxsw_sp2_pb_int_buf_size_get(int mtu, u32 speed) +{ + int factor = MLXSW_SP2_SPAN_EG_MIRROR_BUFFER_FACTOR; + + return __mlxsw_sp_pb_int_buf_size_get(mtu, speed, factor); +} + +#define MLXSW_SP3_SPAN_EG_MIRROR_BUFFER_FACTOR 50 + +static u32 mlxsw_sp3_pb_int_buf_size_get(int mtu, u32 speed) +{ + int factor = MLXSW_SP3_SPAN_EG_MIRROR_BUFFER_FACTOR; + + return __mlxsw_sp_pb_int_buf_size_get(mtu, speed, factor); +} + +const struct mlxsw_sp_sb_ops mlxsw_sp1_sb_ops = { + .int_buf_size_get = mlxsw_sp1_pb_int_buf_size_get, +}; + +const struct mlxsw_sp_sb_ops mlxsw_sp2_sb_ops = { + .int_buf_size_get = mlxsw_sp2_pb_int_buf_size_get, +}; + +const struct mlxsw_sp_sb_ops mlxsw_sp3_sb_ops = { + .int_buf_size_get = mlxsw_sp3_pb_int_buf_size_get, +}; + +int mlxsw_sp_buffers_init(struct mlxsw_sp *mlxsw_sp) +{ + u32 max_headroom_size; + u16 ing_pool_count = 0; + u16 eg_pool_count = 0; + int err; + + if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, CELL_SIZE)) + return -EIO; + + if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, GUARANTEED_SHARED_BUFFER)) + return -EIO; + + if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_HEADROOM_SIZE)) + return -EIO; + + mlxsw_sp->sb = kzalloc(sizeof(*mlxsw_sp->sb), GFP_KERNEL); + if (!mlxsw_sp->sb) + return -ENOMEM; + mlxsw_sp->sb->cell_size = MLXSW_CORE_RES_GET(mlxsw_sp->core, CELL_SIZE); + mlxsw_sp->sb->sb_size = MLXSW_CORE_RES_GET(mlxsw_sp->core, + GUARANTEED_SHARED_BUFFER); + max_headroom_size = MLXSW_CORE_RES_GET(mlxsw_sp->core, + MAX_HEADROOM_SIZE); + /* Round down, because this limit must not be overstepped. */ + mlxsw_sp->sb->max_headroom_cells = max_headroom_size / + mlxsw_sp->sb->cell_size; + + err = mlxsw_sp_sb_ports_init(mlxsw_sp); + if (err) + goto err_sb_ports_init; + err = mlxsw_sp_sb_prs_init(mlxsw_sp, mlxsw_sp->sb_vals->prs, + mlxsw_sp->sb_vals->pool_dess, + mlxsw_sp->sb_vals->pool_count); + if (err) + goto err_sb_prs_init; + err = mlxsw_sp_cpu_port_sb_cms_init(mlxsw_sp); + if (err) + goto err_sb_cpu_port_sb_cms_init; + err = mlxsw_sp_cpu_port_sb_pms_init(mlxsw_sp); + if (err) + goto err_sb_cpu_port_pms_init; + err = mlxsw_sp_sb_mms_init(mlxsw_sp); + if (err) + goto err_sb_mms_init; + mlxsw_sp_pool_count(mlxsw_sp, &ing_pool_count, &eg_pool_count); + err = devl_sb_register(priv_to_devlink(mlxsw_sp->core), 0, + mlxsw_sp->sb->sb_size, + ing_pool_count, + eg_pool_count, + MLXSW_SP_SB_ING_TC_COUNT, + MLXSW_SP_SB_EG_TC_COUNT); + if (err) + goto err_devlink_sb_register; + + return 0; + +err_devlink_sb_register: +err_sb_mms_init: +err_sb_cpu_port_pms_init: +err_sb_cpu_port_sb_cms_init: +err_sb_prs_init: + mlxsw_sp_sb_ports_fini(mlxsw_sp); +err_sb_ports_init: + kfree(mlxsw_sp->sb); + return err; +} + +void mlxsw_sp_buffers_fini(struct mlxsw_sp *mlxsw_sp) +{ + devl_sb_unregister(priv_to_devlink(mlxsw_sp->core), 0); + mlxsw_sp_sb_ports_fini(mlxsw_sp); + kfree(mlxsw_sp->sb); +} + +int mlxsw_sp_port_buffers_init(struct mlxsw_sp_port *mlxsw_sp_port) +{ + int err; + + mlxsw_sp_port->hdroom = kzalloc(sizeof(*mlxsw_sp_port->hdroom), GFP_KERNEL); + if (!mlxsw_sp_port->hdroom) + return -ENOMEM; + mlxsw_sp_port->hdroom->mtu = mlxsw_sp_port->dev->mtu; + + err = mlxsw_sp_port_headroom_init(mlxsw_sp_port); + if (err) + goto err_headroom_init; + err = mlxsw_sp_port_sb_cms_init(mlxsw_sp_port); + if (err) + goto err_port_sb_cms_init; + err = mlxsw_sp_port_sb_pms_init(mlxsw_sp_port); + if (err) + goto err_port_sb_pms_init; + return 0; + +err_port_sb_pms_init: +err_port_sb_cms_init: +err_headroom_init: + kfree(mlxsw_sp_port->hdroom); + return err; +} + +void mlxsw_sp_port_buffers_fini(struct mlxsw_sp_port *mlxsw_sp_port) +{ + kfree(mlxsw_sp_port->hdroom); +} + +int mlxsw_sp_sb_pool_get(struct mlxsw_core *mlxsw_core, + unsigned int sb_index, u16 pool_index, + struct devlink_sb_pool_info *pool_info) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core); + enum mlxsw_reg_sbxx_dir dir; + struct mlxsw_sp_sb_pr *pr; + + dir = mlxsw_sp->sb_vals->pool_dess[pool_index].dir; + pr = mlxsw_sp_sb_pr_get(mlxsw_sp, pool_index); + pool_info->pool_type = (enum devlink_sb_pool_type) dir; + pool_info->size = mlxsw_sp_cells_bytes(mlxsw_sp, pr->size); + pool_info->threshold_type = (enum devlink_sb_threshold_type) pr->mode; + pool_info->cell_size = mlxsw_sp->sb->cell_size; + return 0; +} + +int mlxsw_sp_sb_pool_set(struct mlxsw_core *mlxsw_core, + unsigned int sb_index, u16 pool_index, u32 size, + enum devlink_sb_threshold_type threshold_type, + struct netlink_ext_ack *extack) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core); + u32 pool_size = mlxsw_sp_bytes_cells(mlxsw_sp, size); + const struct mlxsw_sp_sb_pr *pr; + enum mlxsw_reg_sbpr_mode mode; + + mode = (enum mlxsw_reg_sbpr_mode) threshold_type; + pr = &mlxsw_sp->sb_vals->prs[pool_index]; + + if (size > MLXSW_CORE_RES_GET(mlxsw_sp->core, + GUARANTEED_SHARED_BUFFER)) { + NL_SET_ERR_MSG_MOD(extack, "Exceeded shared buffer size"); + return -EINVAL; + } + + if (pr->freeze_mode && pr->mode != mode) { + NL_SET_ERR_MSG_MOD(extack, "Changing this pool's threshold type is forbidden"); + return -EINVAL; + } + + if (pr->freeze_size && pr->size != size) { + NL_SET_ERR_MSG_MOD(extack, "Changing this pool's size is forbidden"); + return -EINVAL; + } + + return mlxsw_sp_sb_pr_write(mlxsw_sp, pool_index, mode, + pool_size, false); +} + +#define MLXSW_SP_SB_THRESHOLD_TO_ALPHA_OFFSET (-2) /* 3->1, 16->14 */ + +static u32 mlxsw_sp_sb_threshold_out(struct mlxsw_sp *mlxsw_sp, u16 pool_index, + u32 max_buff) +{ + struct mlxsw_sp_sb_pr *pr = mlxsw_sp_sb_pr_get(mlxsw_sp, pool_index); + + if (pr->mode == MLXSW_REG_SBPR_MODE_DYNAMIC) + return max_buff - MLXSW_SP_SB_THRESHOLD_TO_ALPHA_OFFSET; + return mlxsw_sp_cells_bytes(mlxsw_sp, max_buff); +} + +static int mlxsw_sp_sb_threshold_in(struct mlxsw_sp *mlxsw_sp, u16 pool_index, + u32 threshold, u32 *p_max_buff, + struct netlink_ext_ack *extack) +{ + struct mlxsw_sp_sb_pr *pr = mlxsw_sp_sb_pr_get(mlxsw_sp, pool_index); + + if (pr->mode == MLXSW_REG_SBPR_MODE_DYNAMIC) { + int val; + + val = threshold + MLXSW_SP_SB_THRESHOLD_TO_ALPHA_OFFSET; + if (val < MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN || + val > MLXSW_REG_SBXX_DYN_MAX_BUFF_MAX) { + NL_SET_ERR_MSG_MOD(extack, "Invalid dynamic threshold value"); + return -EINVAL; + } + *p_max_buff = val; + } else { + *p_max_buff = mlxsw_sp_bytes_cells(mlxsw_sp, threshold); + } + return 0; +} + +int mlxsw_sp_sb_port_pool_get(struct mlxsw_core_port *mlxsw_core_port, + unsigned int sb_index, u16 pool_index, + u32 *p_threshold) +{ + struct mlxsw_sp_port *mlxsw_sp_port = + mlxsw_core_port_driver_priv(mlxsw_core_port); + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + u16 local_port = mlxsw_sp_port->local_port; + struct mlxsw_sp_sb_pm *pm = mlxsw_sp_sb_pm_get(mlxsw_sp, local_port, + pool_index); + + *p_threshold = mlxsw_sp_sb_threshold_out(mlxsw_sp, pool_index, + pm->max_buff); + return 0; +} + +int mlxsw_sp_sb_port_pool_set(struct mlxsw_core_port *mlxsw_core_port, + unsigned int sb_index, u16 pool_index, + u32 threshold, struct netlink_ext_ack *extack) +{ + struct mlxsw_sp_port *mlxsw_sp_port = + mlxsw_core_port_driver_priv(mlxsw_core_port); + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + u16 local_port = mlxsw_sp_port->local_port; + u32 max_buff; + int err; + + if (local_port == MLXSW_PORT_CPU_PORT) { + NL_SET_ERR_MSG_MOD(extack, "Changing CPU port's threshold is forbidden"); + return -EINVAL; + } + + err = mlxsw_sp_sb_threshold_in(mlxsw_sp, pool_index, + threshold, &max_buff, extack); + if (err) + return err; + + return mlxsw_sp_sb_pm_write(mlxsw_sp, local_port, pool_index, + 0, max_buff); +} + +int mlxsw_sp_sb_tc_pool_bind_get(struct mlxsw_core_port *mlxsw_core_port, + unsigned int sb_index, u16 tc_index, + enum devlink_sb_pool_type pool_type, + u16 *p_pool_index, u32 *p_threshold) +{ + struct mlxsw_sp_port *mlxsw_sp_port = + mlxsw_core_port_driver_priv(mlxsw_core_port); + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + u16 local_port = mlxsw_sp_port->local_port; + u8 pg_buff = tc_index; + enum mlxsw_reg_sbxx_dir dir = (enum mlxsw_reg_sbxx_dir) pool_type; + struct mlxsw_sp_sb_cm *cm = mlxsw_sp_sb_cm_get(mlxsw_sp, local_port, + pg_buff, dir); + + *p_threshold = mlxsw_sp_sb_threshold_out(mlxsw_sp, cm->pool_index, + cm->max_buff); + *p_pool_index = cm->pool_index; + return 0; +} + +int mlxsw_sp_sb_tc_pool_bind_set(struct mlxsw_core_port *mlxsw_core_port, + unsigned int sb_index, u16 tc_index, + enum devlink_sb_pool_type pool_type, + u16 pool_index, u32 threshold, + struct netlink_ext_ack *extack) +{ + struct mlxsw_sp_port *mlxsw_sp_port = + mlxsw_core_port_driver_priv(mlxsw_core_port); + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + u16 local_port = mlxsw_sp_port->local_port; + const struct mlxsw_sp_sb_cm *cm; + u8 pg_buff = tc_index; + enum mlxsw_reg_sbxx_dir dir = (enum mlxsw_reg_sbxx_dir) pool_type; + u32 max_buff; + int err; + + if (local_port == MLXSW_PORT_CPU_PORT) { + NL_SET_ERR_MSG_MOD(extack, "Changing CPU port's binding is forbidden"); + return -EINVAL; + } + + if (dir != mlxsw_sp->sb_vals->pool_dess[pool_index].dir) { + NL_SET_ERR_MSG_MOD(extack, "Binding egress TC to ingress pool and vice versa is forbidden"); + return -EINVAL; + } + + if (dir == MLXSW_REG_SBXX_DIR_INGRESS) + cm = &mlxsw_sp->sb_vals->cms_ingress[tc_index]; + else + cm = &mlxsw_sp->sb_vals->cms_egress[tc_index]; + + if (cm->freeze_pool && cm->pool_index != pool_index) { + NL_SET_ERR_MSG_MOD(extack, "Binding this TC to a different pool is forbidden"); + return -EINVAL; + } + + if (cm->freeze_thresh && cm->max_buff != threshold) { + NL_SET_ERR_MSG_MOD(extack, "Changing this TC's threshold is forbidden"); + return -EINVAL; + } + + err = mlxsw_sp_sb_threshold_in(mlxsw_sp, pool_index, + threshold, &max_buff, extack); + if (err) + return err; + + return mlxsw_sp_sb_cm_write(mlxsw_sp, local_port, pg_buff, + 0, max_buff, false, pool_index); +} + +#define MASKED_COUNT_MAX \ + (MLXSW_REG_SBSR_REC_MAX_COUNT / \ + (MLXSW_SP_SB_ING_TC_COUNT + MLXSW_SP_SB_EG_TC_COUNT)) + +struct mlxsw_sp_sb_sr_occ_query_cb_ctx { + u8 masked_count; + u16 local_port_1; +}; + +static void mlxsw_sp_sb_sr_occ_query_cb(struct mlxsw_core *mlxsw_core, + char *sbsr_pl, size_t sbsr_pl_len, + unsigned long cb_priv) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core); + struct mlxsw_sp_sb_sr_occ_query_cb_ctx cb_ctx; + u8 masked_count; + u16 local_port; + int rec_index = 0; + struct mlxsw_sp_sb_cm *cm; + int i; + + memcpy(&cb_ctx, &cb_priv, sizeof(cb_ctx)); + + masked_count = 0; + for (local_port = cb_ctx.local_port_1; + local_port < mlxsw_core_max_ports(mlxsw_core); local_port++) { + if (!mlxsw_sp->ports[local_port]) + continue; + if (local_port == MLXSW_PORT_CPU_PORT) { + /* Ingress quotas are not supported for the CPU port */ + masked_count++; + continue; + } + for (i = 0; i < MLXSW_SP_SB_ING_TC_COUNT; i++) { + cm = mlxsw_sp_sb_cm_get(mlxsw_sp, local_port, i, + MLXSW_REG_SBXX_DIR_INGRESS); + mlxsw_reg_sbsr_rec_unpack(sbsr_pl, rec_index++, + &cm->occ.cur, &cm->occ.max); + } + if (++masked_count == cb_ctx.masked_count) + break; + } + masked_count = 0; + for (local_port = cb_ctx.local_port_1; + local_port < mlxsw_core_max_ports(mlxsw_core); local_port++) { + if (!mlxsw_sp->ports[local_port]) + continue; + for (i = 0; i < MLXSW_SP_SB_EG_TC_COUNT; i++) { + cm = mlxsw_sp_sb_cm_get(mlxsw_sp, local_port, i, + MLXSW_REG_SBXX_DIR_EGRESS); + mlxsw_reg_sbsr_rec_unpack(sbsr_pl, rec_index++, + &cm->occ.cur, &cm->occ.max); + } + if (++masked_count == cb_ctx.masked_count) + break; + } +} + +int mlxsw_sp_sb_occ_snapshot(struct mlxsw_core *mlxsw_core, + unsigned int sb_index) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core); + u16 local_port, local_port_1, last_local_port; + struct mlxsw_sp_sb_sr_occ_query_cb_ctx cb_ctx; + u8 masked_count, current_page = 0; + unsigned long cb_priv = 0; + LIST_HEAD(bulk_list); + char *sbsr_pl; + int i; + int err; + int err2; + + sbsr_pl = kmalloc(MLXSW_REG_SBSR_LEN, GFP_KERNEL); + if (!sbsr_pl) + return -ENOMEM; + + local_port = MLXSW_PORT_CPU_PORT; +next_batch: + local_port_1 = local_port; + masked_count = 0; + mlxsw_reg_sbsr_pack(sbsr_pl, false); + mlxsw_reg_sbsr_port_page_set(sbsr_pl, current_page); + last_local_port = current_page * MLXSW_REG_SBSR_NUM_PORTS_IN_PAGE + + MLXSW_REG_SBSR_NUM_PORTS_IN_PAGE - 1; + + for (i = 0; i < MLXSW_SP_SB_ING_TC_COUNT; i++) + mlxsw_reg_sbsr_pg_buff_mask_set(sbsr_pl, i, 1); + for (i = 0; i < MLXSW_SP_SB_EG_TC_COUNT; i++) + mlxsw_reg_sbsr_tclass_mask_set(sbsr_pl, i, 1); + for (; local_port < mlxsw_core_max_ports(mlxsw_core); local_port++) { + if (!mlxsw_sp->ports[local_port]) + continue; + if (local_port > last_local_port) { + current_page++; + goto do_query; + } + if (local_port != MLXSW_PORT_CPU_PORT) { + /* Ingress quotas are not supported for the CPU port */ + mlxsw_reg_sbsr_ingress_port_mask_set(sbsr_pl, + local_port, 1); + } + mlxsw_reg_sbsr_egress_port_mask_set(sbsr_pl, local_port, 1); + for (i = 0; i < mlxsw_sp->sb_vals->pool_count; i++) { + err = mlxsw_sp_sb_pm_occ_query(mlxsw_sp, local_port, i, + &bulk_list); + if (err) + goto out; + } + if (++masked_count == MASKED_COUNT_MAX) + goto do_query; + } + +do_query: + cb_ctx.masked_count = masked_count; + cb_ctx.local_port_1 = local_port_1; + memcpy(&cb_priv, &cb_ctx, sizeof(cb_ctx)); + err = mlxsw_reg_trans_query(mlxsw_core, MLXSW_REG(sbsr), sbsr_pl, + &bulk_list, mlxsw_sp_sb_sr_occ_query_cb, + cb_priv); + if (err) + goto out; + if (local_port < mlxsw_core_max_ports(mlxsw_core)) { + local_port++; + goto next_batch; + } + +out: + err2 = mlxsw_reg_trans_bulk_wait(&bulk_list); + if (!err) + err = err2; + kfree(sbsr_pl); + return err; +} + +int mlxsw_sp_sb_occ_max_clear(struct mlxsw_core *mlxsw_core, + unsigned int sb_index) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core); + u16 local_port, last_local_port; + LIST_HEAD(bulk_list); + unsigned int masked_count; + u8 current_page = 0; + char *sbsr_pl; + int i; + int err; + int err2; + + sbsr_pl = kmalloc(MLXSW_REG_SBSR_LEN, GFP_KERNEL); + if (!sbsr_pl) + return -ENOMEM; + + local_port = MLXSW_PORT_CPU_PORT; +next_batch: + masked_count = 0; + mlxsw_reg_sbsr_pack(sbsr_pl, true); + mlxsw_reg_sbsr_port_page_set(sbsr_pl, current_page); + last_local_port = current_page * MLXSW_REG_SBSR_NUM_PORTS_IN_PAGE + + MLXSW_REG_SBSR_NUM_PORTS_IN_PAGE - 1; + + for (i = 0; i < MLXSW_SP_SB_ING_TC_COUNT; i++) + mlxsw_reg_sbsr_pg_buff_mask_set(sbsr_pl, i, 1); + for (i = 0; i < MLXSW_SP_SB_EG_TC_COUNT; i++) + mlxsw_reg_sbsr_tclass_mask_set(sbsr_pl, i, 1); + for (; local_port < mlxsw_core_max_ports(mlxsw_core); local_port++) { + if (!mlxsw_sp->ports[local_port]) + continue; + if (local_port > last_local_port) { + current_page++; + goto do_query; + } + if (local_port != MLXSW_PORT_CPU_PORT) { + /* Ingress quotas are not supported for the CPU port */ + mlxsw_reg_sbsr_ingress_port_mask_set(sbsr_pl, + local_port, 1); + } + mlxsw_reg_sbsr_egress_port_mask_set(sbsr_pl, local_port, 1); + for (i = 0; i < mlxsw_sp->sb_vals->pool_count; i++) { + err = mlxsw_sp_sb_pm_occ_clear(mlxsw_sp, local_port, i, + &bulk_list); + if (err) + goto out; + } + if (++masked_count == MASKED_COUNT_MAX) + goto do_query; + } + +do_query: + err = mlxsw_reg_trans_query(mlxsw_core, MLXSW_REG(sbsr), sbsr_pl, + &bulk_list, NULL, 0); + if (err) + goto out; + if (local_port < mlxsw_core_max_ports(mlxsw_core)) { + local_port++; + goto next_batch; + } + +out: + err2 = mlxsw_reg_trans_bulk_wait(&bulk_list); + if (!err) + err = err2; + kfree(sbsr_pl); + return err; +} + +int mlxsw_sp_sb_occ_port_pool_get(struct mlxsw_core_port *mlxsw_core_port, + unsigned int sb_index, u16 pool_index, + u32 *p_cur, u32 *p_max) +{ + struct mlxsw_sp_port *mlxsw_sp_port = + mlxsw_core_port_driver_priv(mlxsw_core_port); + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + u16 local_port = mlxsw_sp_port->local_port; + struct mlxsw_sp_sb_pm *pm = mlxsw_sp_sb_pm_get(mlxsw_sp, local_port, + pool_index); + + *p_cur = mlxsw_sp_cells_bytes(mlxsw_sp, pm->occ.cur); + *p_max = mlxsw_sp_cells_bytes(mlxsw_sp, pm->occ.max); + return 0; +} + +int mlxsw_sp_sb_occ_tc_port_bind_get(struct mlxsw_core_port *mlxsw_core_port, + unsigned int sb_index, u16 tc_index, + enum devlink_sb_pool_type pool_type, + u32 *p_cur, u32 *p_max) +{ + struct mlxsw_sp_port *mlxsw_sp_port = + mlxsw_core_port_driver_priv(mlxsw_core_port); + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + u16 local_port = mlxsw_sp_port->local_port; + u8 pg_buff = tc_index; + enum mlxsw_reg_sbxx_dir dir = (enum mlxsw_reg_sbxx_dir) pool_type; + struct mlxsw_sp_sb_cm *cm = mlxsw_sp_sb_cm_get(mlxsw_sp, local_port, + pg_buff, dir); + + *p_cur = mlxsw_sp_cells_bytes(mlxsw_sp, cm->occ.cur); + *p_max = mlxsw_sp_cells_bytes(mlxsw_sp, cm->occ.max); + return 0; +} diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_cnt.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_cnt.c new file mode 100644 index 000000000..ee59c7915 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_cnt.c @@ -0,0 +1,306 @@ +// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 +/* Copyright (c) 2017-2018 Mellanox Technologies. All rights reserved */ + +#include <linux/kernel.h> +#include <linux/bitops.h> +#include <linux/spinlock.h> + +#include "spectrum_cnt.h" + +struct mlxsw_sp_counter_sub_pool { + u64 size; + unsigned int base_index; + enum mlxsw_res_id entry_size_res_id; + const char *resource_name; /* devlink resource name */ + u64 resource_id; /* devlink resource id */ + unsigned int entry_size; + unsigned int bank_count; + atomic_t active_entries_count; +}; + +struct mlxsw_sp_counter_pool { + u64 pool_size; + unsigned long *usage; /* Usage bitmap */ + spinlock_t counter_pool_lock; /* Protects counter pool allocations */ + atomic_t active_entries_count; + unsigned int sub_pools_count; + struct mlxsw_sp_counter_sub_pool sub_pools[]; +}; + +static const struct mlxsw_sp_counter_sub_pool mlxsw_sp_counter_sub_pools[] = { + [MLXSW_SP_COUNTER_SUB_POOL_FLOW] = { + .entry_size_res_id = MLXSW_RES_ID_COUNTER_SIZE_PACKETS_BYTES, + .resource_name = MLXSW_SP_RESOURCE_NAME_COUNTERS_FLOW, + .resource_id = MLXSW_SP_RESOURCE_COUNTERS_FLOW, + .bank_count = 6, + }, + [MLXSW_SP_COUNTER_SUB_POOL_RIF] = { + .entry_size_res_id = MLXSW_RES_ID_COUNTER_SIZE_ROUTER_BASIC, + .resource_name = MLXSW_SP_RESOURCE_NAME_COUNTERS_RIF, + .resource_id = MLXSW_SP_RESOURCE_COUNTERS_RIF, + .bank_count = 2, + } +}; + +static u64 mlxsw_sp_counter_sub_pool_occ_get(void *priv) +{ + const struct mlxsw_sp_counter_sub_pool *sub_pool = priv; + + return atomic_read(&sub_pool->active_entries_count); +} + +static int mlxsw_sp_counter_sub_pools_init(struct mlxsw_sp *mlxsw_sp) +{ + struct mlxsw_sp_counter_pool *pool = mlxsw_sp->counter_pool; + struct devlink *devlink = priv_to_devlink(mlxsw_sp->core); + struct mlxsw_sp_counter_sub_pool *sub_pool; + unsigned int base_index = 0; + enum mlxsw_res_id res_id; + int err; + int i; + + for (i = 0; i < pool->sub_pools_count; i++) { + sub_pool = &pool->sub_pools[i]; + res_id = sub_pool->entry_size_res_id; + + if (!mlxsw_core_res_valid(mlxsw_sp->core, res_id)) + return -EIO; + sub_pool->entry_size = mlxsw_core_res_get(mlxsw_sp->core, + res_id); + err = devl_resource_size_get(devlink, + sub_pool->resource_id, + &sub_pool->size); + if (err) + goto err_resource_size_get; + + devl_resource_occ_get_register(devlink, + sub_pool->resource_id, + mlxsw_sp_counter_sub_pool_occ_get, + sub_pool); + + sub_pool->base_index = base_index; + base_index += sub_pool->size; + atomic_set(&sub_pool->active_entries_count, 0); + } + return 0; + +err_resource_size_get: + for (i--; i >= 0; i--) { + sub_pool = &pool->sub_pools[i]; + + devl_resource_occ_get_unregister(devlink, + sub_pool->resource_id); + } + return err; +} + +static void mlxsw_sp_counter_sub_pools_fini(struct mlxsw_sp *mlxsw_sp) +{ + struct mlxsw_sp_counter_pool *pool = mlxsw_sp->counter_pool; + struct devlink *devlink = priv_to_devlink(mlxsw_sp->core); + struct mlxsw_sp_counter_sub_pool *sub_pool; + int i; + + for (i = 0; i < pool->sub_pools_count; i++) { + sub_pool = &pool->sub_pools[i]; + + WARN_ON(atomic_read(&sub_pool->active_entries_count)); + devl_resource_occ_get_unregister(devlink, + sub_pool->resource_id); + } +} + +static u64 mlxsw_sp_counter_pool_occ_get(void *priv) +{ + const struct mlxsw_sp_counter_pool *pool = priv; + + return atomic_read(&pool->active_entries_count); +} + +int mlxsw_sp_counter_pool_init(struct mlxsw_sp *mlxsw_sp) +{ + unsigned int sub_pools_count = ARRAY_SIZE(mlxsw_sp_counter_sub_pools); + struct devlink *devlink = priv_to_devlink(mlxsw_sp->core); + struct mlxsw_sp_counter_pool *pool; + int err; + + pool = kzalloc(struct_size(pool, sub_pools, sub_pools_count), + GFP_KERNEL); + if (!pool) + return -ENOMEM; + mlxsw_sp->counter_pool = pool; + pool->sub_pools_count = sub_pools_count; + memcpy(pool->sub_pools, mlxsw_sp_counter_sub_pools, + flex_array_size(pool, sub_pools, pool->sub_pools_count)); + spin_lock_init(&pool->counter_pool_lock); + atomic_set(&pool->active_entries_count, 0); + + err = devl_resource_size_get(devlink, MLXSW_SP_RESOURCE_COUNTERS, + &pool->pool_size); + if (err) + goto err_pool_resource_size_get; + devl_resource_occ_get_register(devlink, MLXSW_SP_RESOURCE_COUNTERS, + mlxsw_sp_counter_pool_occ_get, pool); + + pool->usage = bitmap_zalloc(pool->pool_size, GFP_KERNEL); + if (!pool->usage) { + err = -ENOMEM; + goto err_usage_alloc; + } + + err = mlxsw_sp_counter_sub_pools_init(mlxsw_sp); + if (err) + goto err_sub_pools_init; + + return 0; + +err_sub_pools_init: + bitmap_free(pool->usage); +err_usage_alloc: + devl_resource_occ_get_unregister(devlink, + MLXSW_SP_RESOURCE_COUNTERS); +err_pool_resource_size_get: + kfree(pool); + return err; +} + +void mlxsw_sp_counter_pool_fini(struct mlxsw_sp *mlxsw_sp) +{ + struct mlxsw_sp_counter_pool *pool = mlxsw_sp->counter_pool; + struct devlink *devlink = priv_to_devlink(mlxsw_sp->core); + + mlxsw_sp_counter_sub_pools_fini(mlxsw_sp); + WARN_ON(find_first_bit(pool->usage, pool->pool_size) != + pool->pool_size); + WARN_ON(atomic_read(&pool->active_entries_count)); + bitmap_free(pool->usage); + devl_resource_occ_get_unregister(devlink, + MLXSW_SP_RESOURCE_COUNTERS); + kfree(pool); +} + +int mlxsw_sp_counter_alloc(struct mlxsw_sp *mlxsw_sp, + enum mlxsw_sp_counter_sub_pool_id sub_pool_id, + unsigned int *p_counter_index) +{ + struct mlxsw_sp_counter_pool *pool = mlxsw_sp->counter_pool; + struct mlxsw_sp_counter_sub_pool *sub_pool; + unsigned int entry_index; + unsigned int stop_index; + int i, err; + + sub_pool = &pool->sub_pools[sub_pool_id]; + stop_index = sub_pool->base_index + sub_pool->size; + entry_index = sub_pool->base_index; + + spin_lock(&pool->counter_pool_lock); + entry_index = find_next_zero_bit(pool->usage, stop_index, entry_index); + if (entry_index == stop_index) { + err = -ENOBUFS; + goto err_alloc; + } + /* The sub-pools can contain non-integer number of entries + * so we must check for overflow + */ + if (entry_index + sub_pool->entry_size > stop_index) { + err = -ENOBUFS; + goto err_alloc; + } + for (i = 0; i < sub_pool->entry_size; i++) + __set_bit(entry_index + i, pool->usage); + spin_unlock(&pool->counter_pool_lock); + + *p_counter_index = entry_index; + atomic_add(sub_pool->entry_size, &sub_pool->active_entries_count); + atomic_add(sub_pool->entry_size, &pool->active_entries_count); + return 0; + +err_alloc: + spin_unlock(&pool->counter_pool_lock); + return err; +} + +void mlxsw_sp_counter_free(struct mlxsw_sp *mlxsw_sp, + enum mlxsw_sp_counter_sub_pool_id sub_pool_id, + unsigned int counter_index) +{ + struct mlxsw_sp_counter_pool *pool = mlxsw_sp->counter_pool; + struct mlxsw_sp_counter_sub_pool *sub_pool; + int i; + + if (WARN_ON(counter_index >= pool->pool_size)) + return; + sub_pool = &pool->sub_pools[sub_pool_id]; + spin_lock(&pool->counter_pool_lock); + for (i = 0; i < sub_pool->entry_size; i++) + __clear_bit(counter_index + i, pool->usage); + spin_unlock(&pool->counter_pool_lock); + atomic_sub(sub_pool->entry_size, &sub_pool->active_entries_count); + atomic_sub(sub_pool->entry_size, &pool->active_entries_count); +} + +int mlxsw_sp_counter_resources_register(struct mlxsw_core *mlxsw_core) +{ + static struct devlink_resource_size_params size_params; + struct devlink *devlink = priv_to_devlink(mlxsw_core); + const struct mlxsw_sp_counter_sub_pool *sub_pool; + unsigned int total_bank_config; + u64 sub_pool_size; + u64 base_index; + u64 pool_size; + u64 bank_size; + int err; + int i; + + if (!MLXSW_CORE_RES_VALID(mlxsw_core, COUNTER_POOL_SIZE) || + !MLXSW_CORE_RES_VALID(mlxsw_core, COUNTER_BANK_SIZE)) + return -EIO; + + pool_size = MLXSW_CORE_RES_GET(mlxsw_core, COUNTER_POOL_SIZE); + bank_size = MLXSW_CORE_RES_GET(mlxsw_core, COUNTER_BANK_SIZE); + + devlink_resource_size_params_init(&size_params, pool_size, + pool_size, bank_size, + DEVLINK_RESOURCE_UNIT_ENTRY); + err = devl_resource_register(devlink, + MLXSW_SP_RESOURCE_NAME_COUNTERS, + pool_size, + MLXSW_SP_RESOURCE_COUNTERS, + DEVLINK_RESOURCE_ID_PARENT_TOP, + &size_params); + if (err) + return err; + + /* Allocation is based on bank count which should be + * specified for each sub pool statically. + */ + total_bank_config = 0; + base_index = 0; + for (i = 0; i < ARRAY_SIZE(mlxsw_sp_counter_sub_pools); i++) { + sub_pool = &mlxsw_sp_counter_sub_pools[i]; + sub_pool_size = sub_pool->bank_count * bank_size; + /* The last bank can't be fully used */ + if (base_index + sub_pool_size > pool_size) + sub_pool_size = pool_size - base_index; + base_index += sub_pool_size; + + devlink_resource_size_params_init(&size_params, sub_pool_size, + sub_pool_size, bank_size, + DEVLINK_RESOURCE_UNIT_ENTRY); + err = devl_resource_register(devlink, + sub_pool->resource_name, + sub_pool_size, + sub_pool->resource_id, + MLXSW_SP_RESOURCE_COUNTERS, + &size_params); + if (err) + return err; + total_bank_config += sub_pool->bank_count; + } + + /* Check config is valid, no bank over subscription */ + if (WARN_ON(total_bank_config > div64_u64(pool_size, bank_size) + 1)) + return -EINVAL; + + return 0; +} diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_cnt.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_cnt.h new file mode 100644 index 000000000..15c8d4de8 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_cnt.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 */ +/* Copyright (c) 2017-2018 Mellanox Technologies. All rights reserved */ + +#ifndef _MLXSW_SPECTRUM_CNT_H +#define _MLXSW_SPECTRUM_CNT_H + +#include "core.h" +#include "spectrum.h" + +enum mlxsw_sp_counter_sub_pool_id { + MLXSW_SP_COUNTER_SUB_POOL_RIF, + MLXSW_SP_COUNTER_SUB_POOL_FLOW, +}; + +int mlxsw_sp_counter_alloc(struct mlxsw_sp *mlxsw_sp, + enum mlxsw_sp_counter_sub_pool_id sub_pool_id, + unsigned int *p_counter_index); +void mlxsw_sp_counter_free(struct mlxsw_sp *mlxsw_sp, + enum mlxsw_sp_counter_sub_pool_id sub_pool_id, + unsigned int counter_index); +int mlxsw_sp_counter_pool_init(struct mlxsw_sp *mlxsw_sp); +void mlxsw_sp_counter_pool_fini(struct mlxsw_sp *mlxsw_sp); +int mlxsw_sp_counter_resources_register(struct mlxsw_core *mlxsw_core); + +#endif diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dcb.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dcb.c new file mode 100644 index 000000000..aff6d4f35 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dcb.c @@ -0,0 +1,740 @@ +// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 +/* Copyright (c) 2016-2018 Mellanox Technologies. All rights reserved */ + +#include <linux/netdevice.h> +#include <linux/string.h> +#include <linux/bitops.h> +#include <net/dcbnl.h> + +#include "spectrum.h" +#include "reg.h" + +static u8 mlxsw_sp_dcbnl_getdcbx(struct net_device __always_unused *dev) +{ + return DCB_CAP_DCBX_HOST | DCB_CAP_DCBX_VER_IEEE; +} + +static u8 mlxsw_sp_dcbnl_setdcbx(struct net_device __always_unused *dev, + u8 mode) +{ + return (mode != (DCB_CAP_DCBX_HOST | DCB_CAP_DCBX_VER_IEEE)) ? 1 : 0; +} + +static int mlxsw_sp_dcbnl_ieee_getets(struct net_device *dev, + struct ieee_ets *ets) +{ + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); + + memcpy(ets, mlxsw_sp_port->dcb.ets, sizeof(*ets)); + + return 0; +} + +static int mlxsw_sp_port_ets_validate(struct mlxsw_sp_port *mlxsw_sp_port, + struct ieee_ets *ets) +{ + struct net_device *dev = mlxsw_sp_port->dev; + bool has_ets_tc = false; + int i, tx_bw_sum = 0; + + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { + switch (ets->tc_tsa[i]) { + case IEEE_8021QAZ_TSA_STRICT: + break; + case IEEE_8021QAZ_TSA_ETS: + has_ets_tc = true; + tx_bw_sum += ets->tc_tx_bw[i]; + break; + default: + netdev_err(dev, "Only strict priority and ETS are supported\n"); + return -EINVAL; + } + + if (ets->prio_tc[i] >= IEEE_8021QAZ_MAX_TCS) { + netdev_err(dev, "Invalid TC\n"); + return -EINVAL; + } + } + + if (has_ets_tc && tx_bw_sum != 100) { + netdev_err(dev, "Total ETS bandwidth should equal 100\n"); + return -EINVAL; + } + + return 0; +} + +static int mlxsw_sp_port_headroom_ets_set(struct mlxsw_sp_port *mlxsw_sp_port, + struct ieee_ets *ets) +{ + struct net_device *dev = mlxsw_sp_port->dev; + struct mlxsw_sp_hdroom hdroom; + int prio; + int err; + + hdroom = *mlxsw_sp_port->hdroom; + for (prio = 0; prio < IEEE_8021QAZ_MAX_TCS; prio++) + hdroom.prios.prio[prio].ets_buf_idx = ets->prio_tc[prio]; + mlxsw_sp_hdroom_prios_reset_buf_idx(&hdroom); + mlxsw_sp_hdroom_bufs_reset_lossiness(&hdroom); + mlxsw_sp_hdroom_bufs_reset_sizes(mlxsw_sp_port, &hdroom); + + err = mlxsw_sp_hdroom_configure(mlxsw_sp_port, &hdroom); + if (err) { + netdev_err(dev, "Failed to configure port's headroom\n"); + return err; + } + + return 0; +} + +static int __mlxsw_sp_dcbnl_ieee_setets(struct mlxsw_sp_port *mlxsw_sp_port, + struct ieee_ets *ets) +{ + struct ieee_ets *my_ets = mlxsw_sp_port->dcb.ets; + struct net_device *dev = mlxsw_sp_port->dev; + int i, err; + + /* Egress configuration. */ + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { + bool dwrr = ets->tc_tsa[i] == IEEE_8021QAZ_TSA_ETS; + u8 weight = ets->tc_tx_bw[i]; + + err = mlxsw_sp_port_ets_set(mlxsw_sp_port, + MLXSW_REG_QEEC_HR_SUBGROUP, i, + 0, dwrr, weight); + if (err) { + netdev_err(dev, "Failed to link subgroup ETS element %d to group\n", + i); + goto err_port_ets_set; + } + } + + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { + err = mlxsw_sp_port_prio_tc_set(mlxsw_sp_port, i, + ets->prio_tc[i]); + if (err) { + netdev_err(dev, "Failed to map prio %d to TC %d\n", i, + ets->prio_tc[i]); + goto err_port_prio_tc_set; + } + } + + /* Ingress configuration. */ + err = mlxsw_sp_port_headroom_ets_set(mlxsw_sp_port, ets); + if (err) + goto err_port_headroom_set; + + return 0; + +err_port_headroom_set: + i = IEEE_8021QAZ_MAX_TCS; +err_port_prio_tc_set: + for (i--; i >= 0; i--) + mlxsw_sp_port_prio_tc_set(mlxsw_sp_port, i, my_ets->prio_tc[i]); + i = IEEE_8021QAZ_MAX_TCS; +err_port_ets_set: + for (i--; i >= 0; i--) { + bool dwrr = my_ets->tc_tsa[i] == IEEE_8021QAZ_TSA_ETS; + u8 weight = my_ets->tc_tx_bw[i]; + + err = mlxsw_sp_port_ets_set(mlxsw_sp_port, + MLXSW_REG_QEEC_HR_SUBGROUP, i, + 0, dwrr, weight); + } + return err; +} + +static int mlxsw_sp_dcbnl_ieee_setets(struct net_device *dev, + struct ieee_ets *ets) +{ + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); + int err; + + err = mlxsw_sp_port_ets_validate(mlxsw_sp_port, ets); + if (err) + return err; + + err = __mlxsw_sp_dcbnl_ieee_setets(mlxsw_sp_port, ets); + if (err) + return err; + + memcpy(mlxsw_sp_port->dcb.ets, ets, sizeof(*ets)); + mlxsw_sp_port->dcb.ets->ets_cap = IEEE_8021QAZ_MAX_TCS; + + return 0; +} + +static int mlxsw_sp_dcbnl_app_validate(struct net_device *dev, + struct dcb_app *app) +{ + if (app->priority >= IEEE_8021QAZ_MAX_TCS) { + netdev_err(dev, "APP entry with priority value %u is invalid\n", + app->priority); + return -EINVAL; + } + + switch (app->selector) { + case IEEE_8021QAZ_APP_SEL_DSCP: + if (app->protocol >= 64) { + netdev_err(dev, "DSCP APP entry with protocol value %u is invalid\n", + app->protocol); + return -EINVAL; + } + break; + + case IEEE_8021QAZ_APP_SEL_ETHERTYPE: + if (app->protocol) { + netdev_err(dev, "EtherType APP entries with protocol value != 0 not supported\n"); + return -EINVAL; + } + break; + + default: + netdev_err(dev, "APP entries with selector %u not supported\n", + app->selector); + return -EINVAL; + } + + return 0; +} + +static u8 +mlxsw_sp_port_dcb_app_default_prio(struct mlxsw_sp_port *mlxsw_sp_port) +{ + u8 prio_mask; + + prio_mask = dcb_ieee_getapp_default_prio_mask(mlxsw_sp_port->dev); + if (prio_mask) + /* Take the highest configured priority. */ + return fls(prio_mask) - 1; + + return 0; +} + +static void +mlxsw_sp_port_dcb_app_dscp_prio_map(struct mlxsw_sp_port *mlxsw_sp_port, + u8 default_prio, + struct dcb_ieee_app_dscp_map *map) +{ + int i; + + dcb_ieee_getapp_dscp_prio_mask_map(mlxsw_sp_port->dev, map); + for (i = 0; i < ARRAY_SIZE(map->map); ++i) { + if (map->map[i]) + map->map[i] = fls(map->map[i]) - 1; + else + map->map[i] = default_prio; + } +} + +static bool +mlxsw_sp_port_dcb_app_prio_dscp_map(struct mlxsw_sp_port *mlxsw_sp_port, + struct dcb_ieee_app_prio_map *map) +{ + bool have_dscp = false; + int i; + + dcb_ieee_getapp_prio_dscp_mask_map(mlxsw_sp_port->dev, map); + for (i = 0; i < ARRAY_SIZE(map->map); ++i) { + if (map->map[i]) { + map->map[i] = fls64(map->map[i]) - 1; + have_dscp = true; + } + } + + return have_dscp; +} + +static int +mlxsw_sp_port_dcb_app_update_qpts(struct mlxsw_sp_port *mlxsw_sp_port, + enum mlxsw_reg_qpts_trust_state ts) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + char qpts_pl[MLXSW_REG_QPTS_LEN]; + + mlxsw_reg_qpts_pack(qpts_pl, mlxsw_sp_port->local_port, ts); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(qpts), qpts_pl); +} + +static int +mlxsw_sp_port_dcb_app_update_qrwe(struct mlxsw_sp_port *mlxsw_sp_port, + bool rewrite_dscp) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + char qrwe_pl[MLXSW_REG_QRWE_LEN]; + + mlxsw_reg_qrwe_pack(qrwe_pl, mlxsw_sp_port->local_port, + false, rewrite_dscp); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(qrwe), qrwe_pl); +} + +static int +mlxsw_sp_port_dcb_toggle_trust(struct mlxsw_sp_port *mlxsw_sp_port, + enum mlxsw_reg_qpts_trust_state ts) +{ + bool rewrite_dscp = ts == MLXSW_REG_QPTS_TRUST_STATE_DSCP; + int err; + + if (mlxsw_sp_port->dcb.trust_state == ts) + return 0; + + err = mlxsw_sp_port_dcb_app_update_qpts(mlxsw_sp_port, ts); + if (err) + return err; + + err = mlxsw_sp_port_dcb_app_update_qrwe(mlxsw_sp_port, rewrite_dscp); + if (err) + goto err_update_qrwe; + + mlxsw_sp_port->dcb.trust_state = ts; + return 0; + +err_update_qrwe: + mlxsw_sp_port_dcb_app_update_qpts(mlxsw_sp_port, + mlxsw_sp_port->dcb.trust_state); + return err; +} + +static int +mlxsw_sp_port_dcb_app_update_qpdp(struct mlxsw_sp_port *mlxsw_sp_port, + u8 default_prio) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + char qpdp_pl[MLXSW_REG_QPDP_LEN]; + + mlxsw_reg_qpdp_pack(qpdp_pl, mlxsw_sp_port->local_port, default_prio); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(qpdp), qpdp_pl); +} + +static int +mlxsw_sp_port_dcb_app_update_qpdpm(struct mlxsw_sp_port *mlxsw_sp_port, + struct dcb_ieee_app_dscp_map *map) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + char qpdpm_pl[MLXSW_REG_QPDPM_LEN]; + short int i; + + mlxsw_reg_qpdpm_pack(qpdpm_pl, mlxsw_sp_port->local_port); + for (i = 0; i < ARRAY_SIZE(map->map); ++i) + mlxsw_reg_qpdpm_dscp_pack(qpdpm_pl, i, map->map[i]); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(qpdpm), qpdpm_pl); +} + +static int +mlxsw_sp_port_dcb_app_update_qpdsm(struct mlxsw_sp_port *mlxsw_sp_port, + struct dcb_ieee_app_prio_map *map) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + char qpdsm_pl[MLXSW_REG_QPDSM_LEN]; + short int i; + + mlxsw_reg_qpdsm_pack(qpdsm_pl, mlxsw_sp_port->local_port); + for (i = 0; i < ARRAY_SIZE(map->map); ++i) + mlxsw_reg_qpdsm_prio_pack(qpdsm_pl, i, map->map[i]); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(qpdsm), qpdsm_pl); +} + +static int mlxsw_sp_port_dcb_app_update(struct mlxsw_sp_port *mlxsw_sp_port) +{ + struct dcb_ieee_app_prio_map prio_map; + struct dcb_ieee_app_dscp_map dscp_map; + u8 default_prio; + bool have_dscp; + int err; + + default_prio = mlxsw_sp_port_dcb_app_default_prio(mlxsw_sp_port); + err = mlxsw_sp_port_dcb_app_update_qpdp(mlxsw_sp_port, default_prio); + if (err) { + netdev_err(mlxsw_sp_port->dev, "Couldn't configure port default priority\n"); + return err; + } + + have_dscp = mlxsw_sp_port_dcb_app_prio_dscp_map(mlxsw_sp_port, + &prio_map); + + mlxsw_sp_port_dcb_app_dscp_prio_map(mlxsw_sp_port, default_prio, + &dscp_map); + err = mlxsw_sp_port_dcb_app_update_qpdpm(mlxsw_sp_port, + &dscp_map); + if (err) { + netdev_err(mlxsw_sp_port->dev, "Couldn't configure priority map\n"); + return err; + } + + err = mlxsw_sp_port_dcb_app_update_qpdsm(mlxsw_sp_port, + &prio_map); + if (err) { + netdev_err(mlxsw_sp_port->dev, "Couldn't configure DSCP rewrite map\n"); + return err; + } + + if (!have_dscp) { + err = mlxsw_sp_port_dcb_toggle_trust(mlxsw_sp_port, + MLXSW_REG_QPTS_TRUST_STATE_PCP); + if (err) + netdev_err(mlxsw_sp_port->dev, "Couldn't switch to trust L2\n"); + return err; + } + + err = mlxsw_sp_port_dcb_toggle_trust(mlxsw_sp_port, + MLXSW_REG_QPTS_TRUST_STATE_DSCP); + if (err) { + /* A failure to set trust DSCP means that the QPDPM and QPDSM + * maps installed above are not in effect. And since we are here + * attempting to set trust DSCP, we couldn't have attempted to + * switch trust to PCP. Thus no cleanup is necessary. + */ + netdev_err(mlxsw_sp_port->dev, "Couldn't switch to trust L3\n"); + return err; + } + + return 0; +} + +static int mlxsw_sp_dcbnl_ieee_setapp(struct net_device *dev, + struct dcb_app *app) +{ + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); + int err; + + err = mlxsw_sp_dcbnl_app_validate(dev, app); + if (err) + return err; + + err = dcb_ieee_setapp(dev, app); + if (err) + return err; + + err = mlxsw_sp_port_dcb_app_update(mlxsw_sp_port); + if (err) + goto err_update; + + return 0; + +err_update: + dcb_ieee_delapp(dev, app); + return err; +} + +static int mlxsw_sp_dcbnl_ieee_delapp(struct net_device *dev, + struct dcb_app *app) +{ + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); + int err; + + err = dcb_ieee_delapp(dev, app); + if (err) + return err; + + err = mlxsw_sp_port_dcb_app_update(mlxsw_sp_port); + if (err) + netdev_err(dev, "Failed to update DCB APP configuration\n"); + return 0; +} + +static int mlxsw_sp_dcbnl_ieee_getmaxrate(struct net_device *dev, + struct ieee_maxrate *maxrate) +{ + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); + + memcpy(maxrate, mlxsw_sp_port->dcb.maxrate, sizeof(*maxrate)); + + return 0; +} + +static int mlxsw_sp_dcbnl_ieee_setmaxrate(struct net_device *dev, + struct ieee_maxrate *maxrate) +{ + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); + struct ieee_maxrate *my_maxrate = mlxsw_sp_port->dcb.maxrate; + int err, i; + + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { + err = mlxsw_sp_port_ets_maxrate_set(mlxsw_sp_port, + MLXSW_REG_QEEC_HR_SUBGROUP, + i, 0, + maxrate->tc_maxrate[i], 0); + if (err) { + netdev_err(dev, "Failed to set maxrate for TC %d\n", i); + goto err_port_ets_maxrate_set; + } + } + + memcpy(mlxsw_sp_port->dcb.maxrate, maxrate, sizeof(*maxrate)); + + return 0; + +err_port_ets_maxrate_set: + for (i--; i >= 0; i--) + mlxsw_sp_port_ets_maxrate_set(mlxsw_sp_port, + MLXSW_REG_QEEC_HR_SUBGROUP, + i, 0, + my_maxrate->tc_maxrate[i], 0); + return err; +} + +static int mlxsw_sp_port_pfc_cnt_get(struct mlxsw_sp_port *mlxsw_sp_port, + u8 prio) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + struct ieee_pfc *my_pfc = mlxsw_sp_port->dcb.pfc; + char ppcnt_pl[MLXSW_REG_PPCNT_LEN]; + int err; + + mlxsw_reg_ppcnt_pack(ppcnt_pl, mlxsw_sp_port->local_port, + MLXSW_REG_PPCNT_PRIO_CNT, prio); + err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ppcnt), ppcnt_pl); + if (err) + return err; + + my_pfc->requests[prio] = mlxsw_reg_ppcnt_tx_pause_get(ppcnt_pl); + my_pfc->indications[prio] = mlxsw_reg_ppcnt_rx_pause_get(ppcnt_pl); + + return 0; +} + +static int mlxsw_sp_dcbnl_ieee_getpfc(struct net_device *dev, + struct ieee_pfc *pfc) +{ + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); + int err, i; + + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { + err = mlxsw_sp_port_pfc_cnt_get(mlxsw_sp_port, i); + if (err) { + netdev_err(dev, "Failed to get PFC count for priority %d\n", + i); + return err; + } + } + + memcpy(pfc, mlxsw_sp_port->dcb.pfc, sizeof(*pfc)); + + return 0; +} + +static int mlxsw_sp_port_pfc_set(struct mlxsw_sp_port *mlxsw_sp_port, + struct ieee_pfc *pfc) +{ + char pfcc_pl[MLXSW_REG_PFCC_LEN]; + + mlxsw_reg_pfcc_pack(pfcc_pl, mlxsw_sp_port->local_port); + mlxsw_reg_pfcc_pprx_set(pfcc_pl, mlxsw_sp_port->link.rx_pause); + mlxsw_reg_pfcc_pptx_set(pfcc_pl, mlxsw_sp_port->link.tx_pause); + mlxsw_reg_pfcc_prio_pack(pfcc_pl, pfc->pfc_en); + + return mlxsw_reg_write(mlxsw_sp_port->mlxsw_sp->core, MLXSW_REG(pfcc), + pfcc_pl); +} + +static int mlxsw_sp_dcbnl_ieee_setpfc(struct net_device *dev, + struct ieee_pfc *pfc) +{ + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); + bool pause_en = mlxsw_sp_port_is_pause_en(mlxsw_sp_port); + struct mlxsw_sp_hdroom orig_hdroom; + struct mlxsw_sp_hdroom hdroom; + int prio; + int err; + + if (pause_en && pfc->pfc_en) { + netdev_err(dev, "PAUSE frames already enabled on port\n"); + return -EINVAL; + } + + orig_hdroom = *mlxsw_sp_port->hdroom; + + hdroom = orig_hdroom; + if (pfc->pfc_en) + hdroom.delay_bytes = DIV_ROUND_UP(pfc->delay, BITS_PER_BYTE); + else + hdroom.delay_bytes = 0; + + for (prio = 0; prio < IEEE_8021QAZ_MAX_TCS; prio++) + hdroom.prios.prio[prio].lossy = !(pfc->pfc_en & BIT(prio)); + + mlxsw_sp_hdroom_bufs_reset_lossiness(&hdroom); + mlxsw_sp_hdroom_bufs_reset_sizes(mlxsw_sp_port, &hdroom); + + err = mlxsw_sp_hdroom_configure(mlxsw_sp_port, &hdroom); + if (err) { + netdev_err(dev, "Failed to configure port's headroom for PFC\n"); + return err; + } + + err = mlxsw_sp_port_pfc_set(mlxsw_sp_port, pfc); + if (err) { + netdev_err(dev, "Failed to configure PFC\n"); + goto err_port_pfc_set; + } + + memcpy(mlxsw_sp_port->dcb.pfc, pfc, sizeof(*pfc)); + mlxsw_sp_port->dcb.pfc->pfc_cap = IEEE_8021QAZ_MAX_TCS; + + return 0; + +err_port_pfc_set: + mlxsw_sp_hdroom_configure(mlxsw_sp_port, &orig_hdroom); + return err; +} + +static int mlxsw_sp_dcbnl_getbuffer(struct net_device *dev, struct dcbnl_buffer *buf) +{ + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); + struct mlxsw_sp_hdroom *hdroom = mlxsw_sp_port->hdroom; + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + int prio; + int i; + + buf->total_size = 0; + + BUILD_BUG_ON(DCBX_MAX_BUFFERS > MLXSW_SP_PB_COUNT); + for (i = 0; i < MLXSW_SP_PB_COUNT; i++) { + u32 bytes = mlxsw_sp_cells_bytes(mlxsw_sp, hdroom->bufs.buf[i].size_cells); + + if (i < DCBX_MAX_BUFFERS) + buf->buffer_size[i] = bytes; + buf->total_size += bytes; + } + + buf->total_size += mlxsw_sp_cells_bytes(mlxsw_sp, hdroom->int_buf.size_cells); + + for (prio = 0; prio < IEEE_8021Q_MAX_PRIORITIES; prio++) + buf->prio2buffer[prio] = hdroom->prios.prio[prio].buf_idx; + + return 0; +} + +static int mlxsw_sp_dcbnl_setbuffer(struct net_device *dev, struct dcbnl_buffer *buf) +{ + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + struct mlxsw_sp_hdroom hdroom; + int prio; + int i; + + hdroom = *mlxsw_sp_port->hdroom; + + if (hdroom.mode != MLXSW_SP_HDROOM_MODE_TC) { + netdev_err(dev, "The use of dcbnl_setbuffer is only allowed if egress is configured using TC\n"); + return -EINVAL; + } + + for (prio = 0; prio < IEEE_8021Q_MAX_PRIORITIES; prio++) + hdroom.prios.prio[prio].set_buf_idx = buf->prio2buffer[prio]; + + BUILD_BUG_ON(DCBX_MAX_BUFFERS > MLXSW_SP_PB_COUNT); + for (i = 0; i < DCBX_MAX_BUFFERS; i++) + hdroom.bufs.buf[i].set_size_cells = mlxsw_sp_bytes_cells(mlxsw_sp, + buf->buffer_size[i]); + + mlxsw_sp_hdroom_prios_reset_buf_idx(&hdroom); + mlxsw_sp_hdroom_bufs_reset_lossiness(&hdroom); + mlxsw_sp_hdroom_bufs_reset_sizes(mlxsw_sp_port, &hdroom); + return mlxsw_sp_hdroom_configure(mlxsw_sp_port, &hdroom); +} + +static const struct dcbnl_rtnl_ops mlxsw_sp_dcbnl_ops = { + .ieee_getets = mlxsw_sp_dcbnl_ieee_getets, + .ieee_setets = mlxsw_sp_dcbnl_ieee_setets, + .ieee_getmaxrate = mlxsw_sp_dcbnl_ieee_getmaxrate, + .ieee_setmaxrate = mlxsw_sp_dcbnl_ieee_setmaxrate, + .ieee_getpfc = mlxsw_sp_dcbnl_ieee_getpfc, + .ieee_setpfc = mlxsw_sp_dcbnl_ieee_setpfc, + .ieee_setapp = mlxsw_sp_dcbnl_ieee_setapp, + .ieee_delapp = mlxsw_sp_dcbnl_ieee_delapp, + + .getdcbx = mlxsw_sp_dcbnl_getdcbx, + .setdcbx = mlxsw_sp_dcbnl_setdcbx, + + .dcbnl_getbuffer = mlxsw_sp_dcbnl_getbuffer, + .dcbnl_setbuffer = mlxsw_sp_dcbnl_setbuffer, +}; + +static int mlxsw_sp_port_ets_init(struct mlxsw_sp_port *mlxsw_sp_port) +{ + mlxsw_sp_port->dcb.ets = kzalloc(sizeof(*mlxsw_sp_port->dcb.ets), + GFP_KERNEL); + if (!mlxsw_sp_port->dcb.ets) + return -ENOMEM; + + mlxsw_sp_port->dcb.ets->ets_cap = IEEE_8021QAZ_MAX_TCS; + + return 0; +} + +static void mlxsw_sp_port_ets_fini(struct mlxsw_sp_port *mlxsw_sp_port) +{ + kfree(mlxsw_sp_port->dcb.ets); +} + +static int mlxsw_sp_port_maxrate_init(struct mlxsw_sp_port *mlxsw_sp_port) +{ + int i; + + mlxsw_sp_port->dcb.maxrate = kmalloc(sizeof(*mlxsw_sp_port->dcb.maxrate), + GFP_KERNEL); + if (!mlxsw_sp_port->dcb.maxrate) + return -ENOMEM; + + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) + mlxsw_sp_port->dcb.maxrate->tc_maxrate[i] = MLXSW_REG_QEEC_MAS_DIS; + + return 0; +} + +static void mlxsw_sp_port_maxrate_fini(struct mlxsw_sp_port *mlxsw_sp_port) +{ + kfree(mlxsw_sp_port->dcb.maxrate); +} + +static int mlxsw_sp_port_pfc_init(struct mlxsw_sp_port *mlxsw_sp_port) +{ + mlxsw_sp_port->dcb.pfc = kzalloc(sizeof(*mlxsw_sp_port->dcb.pfc), + GFP_KERNEL); + if (!mlxsw_sp_port->dcb.pfc) + return -ENOMEM; + + mlxsw_sp_port->dcb.pfc->pfc_cap = IEEE_8021QAZ_MAX_TCS; + + return 0; +} + +static void mlxsw_sp_port_pfc_fini(struct mlxsw_sp_port *mlxsw_sp_port) +{ + kfree(mlxsw_sp_port->dcb.pfc); +} + +int mlxsw_sp_port_dcb_init(struct mlxsw_sp_port *mlxsw_sp_port) +{ + int err; + + err = mlxsw_sp_port_ets_init(mlxsw_sp_port); + if (err) + return err; + err = mlxsw_sp_port_maxrate_init(mlxsw_sp_port); + if (err) + goto err_port_maxrate_init; + err = mlxsw_sp_port_pfc_init(mlxsw_sp_port); + if (err) + goto err_port_pfc_init; + + mlxsw_sp_port->dcb.trust_state = MLXSW_REG_QPTS_TRUST_STATE_PCP; + mlxsw_sp_port->dev->dcbnl_ops = &mlxsw_sp_dcbnl_ops; + + return 0; + +err_port_pfc_init: + mlxsw_sp_port_maxrate_fini(mlxsw_sp_port); +err_port_maxrate_init: + mlxsw_sp_port_ets_fini(mlxsw_sp_port); + return err; +} + +void mlxsw_sp_port_dcb_fini(struct mlxsw_sp_port *mlxsw_sp_port) +{ + mlxsw_sp_port_pfc_fini(mlxsw_sp_port); + mlxsw_sp_port_maxrate_fini(mlxsw_sp_port); + mlxsw_sp_port_ets_fini(mlxsw_sp_port); +} diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c new file mode 100644 index 000000000..5416093c0 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c @@ -0,0 +1,1308 @@ +// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 +/* Copyright (c) 2017-2018 Mellanox Technologies. All rights reserved */ + +#include <linux/kernel.h> +#include <linux/mutex.h> +#include <net/devlink.h> + +#include "spectrum.h" +#include "spectrum_dpipe.h" +#include "spectrum_router.h" + +enum mlxsw_sp_field_metadata_id { + MLXSW_SP_DPIPE_FIELD_METADATA_ERIF_PORT, + MLXSW_SP_DPIPE_FIELD_METADATA_L3_FORWARD, + MLXSW_SP_DPIPE_FIELD_METADATA_L3_DROP, + MLXSW_SP_DPIPE_FIELD_METADATA_ADJ_INDEX, + MLXSW_SP_DPIPE_FIELD_METADATA_ADJ_SIZE, + MLXSW_SP_DPIPE_FIELD_METADATA_ADJ_HASH_INDEX, +}; + +static struct devlink_dpipe_field mlxsw_sp_dpipe_fields_metadata[] = { + { + .name = "erif_port", + .id = MLXSW_SP_DPIPE_FIELD_METADATA_ERIF_PORT, + .bitwidth = 32, + .mapping_type = DEVLINK_DPIPE_FIELD_MAPPING_TYPE_IFINDEX, + }, + { + .name = "l3_forward", + .id = MLXSW_SP_DPIPE_FIELD_METADATA_L3_FORWARD, + .bitwidth = 1, + }, + { + .name = "l3_drop", + .id = MLXSW_SP_DPIPE_FIELD_METADATA_L3_DROP, + .bitwidth = 1, + }, + { + .name = "adj_index", + .id = MLXSW_SP_DPIPE_FIELD_METADATA_ADJ_INDEX, + .bitwidth = 32, + }, + { + .name = "adj_size", + .id = MLXSW_SP_DPIPE_FIELD_METADATA_ADJ_SIZE, + .bitwidth = 32, + }, + { + .name = "adj_hash_index", + .id = MLXSW_SP_DPIPE_FIELD_METADATA_ADJ_HASH_INDEX, + .bitwidth = 32, + }, +}; + +enum mlxsw_sp_dpipe_header_id { + MLXSW_SP_DPIPE_HEADER_METADATA, +}; + +static struct devlink_dpipe_header mlxsw_sp_dpipe_header_metadata = { + .name = "mlxsw_meta", + .id = MLXSW_SP_DPIPE_HEADER_METADATA, + .fields = mlxsw_sp_dpipe_fields_metadata, + .fields_count = ARRAY_SIZE(mlxsw_sp_dpipe_fields_metadata), +}; + +static struct devlink_dpipe_header *mlxsw_dpipe_headers[] = { + &mlxsw_sp_dpipe_header_metadata, + &devlink_dpipe_header_ethernet, + &devlink_dpipe_header_ipv4, + &devlink_dpipe_header_ipv6, +}; + +static struct devlink_dpipe_headers mlxsw_sp_dpipe_headers = { + .headers = mlxsw_dpipe_headers, + .headers_count = ARRAY_SIZE(mlxsw_dpipe_headers), +}; + +static int mlxsw_sp_dpipe_table_erif_actions_dump(void *priv, + struct sk_buff *skb) +{ + struct devlink_dpipe_action action = {0}; + int err; + + action.type = DEVLINK_DPIPE_ACTION_TYPE_FIELD_MODIFY; + action.header = &mlxsw_sp_dpipe_header_metadata; + action.field_id = MLXSW_SP_DPIPE_FIELD_METADATA_L3_FORWARD; + + err = devlink_dpipe_action_put(skb, &action); + if (err) + return err; + + action.type = DEVLINK_DPIPE_ACTION_TYPE_FIELD_MODIFY; + action.header = &mlxsw_sp_dpipe_header_metadata; + action.field_id = MLXSW_SP_DPIPE_FIELD_METADATA_L3_DROP; + + return devlink_dpipe_action_put(skb, &action); +} + +static int mlxsw_sp_dpipe_table_erif_matches_dump(void *priv, + struct sk_buff *skb) +{ + struct devlink_dpipe_match match = {0}; + + match.type = DEVLINK_DPIPE_MATCH_TYPE_FIELD_EXACT; + match.header = &mlxsw_sp_dpipe_header_metadata; + match.field_id = MLXSW_SP_DPIPE_FIELD_METADATA_ERIF_PORT; + + return devlink_dpipe_match_put(skb, &match); +} + +static void +mlxsw_sp_erif_match_action_prepare(struct devlink_dpipe_match *match, + struct devlink_dpipe_action *action) +{ + action->type = DEVLINK_DPIPE_ACTION_TYPE_FIELD_MODIFY; + action->header = &mlxsw_sp_dpipe_header_metadata; + action->field_id = MLXSW_SP_DPIPE_FIELD_METADATA_L3_FORWARD; + + match->type = DEVLINK_DPIPE_MATCH_TYPE_FIELD_EXACT; + match->header = &mlxsw_sp_dpipe_header_metadata; + match->field_id = MLXSW_SP_DPIPE_FIELD_METADATA_ERIF_PORT; +} + +static int mlxsw_sp_erif_entry_prepare(struct devlink_dpipe_entry *entry, + struct devlink_dpipe_value *match_value, + struct devlink_dpipe_match *match, + struct devlink_dpipe_value *action_value, + struct devlink_dpipe_action *action) +{ + entry->match_values = match_value; + entry->match_values_count = 1; + + entry->action_values = action_value; + entry->action_values_count = 1; + + match_value->match = match; + match_value->value_size = sizeof(u32); + match_value->value = kmalloc(match_value->value_size, GFP_KERNEL); + if (!match_value->value) + return -ENOMEM; + + action_value->action = action; + action_value->value_size = sizeof(u32); + action_value->value = kmalloc(action_value->value_size, GFP_KERNEL); + if (!action_value->value) + goto err_action_alloc; + return 0; + +err_action_alloc: + kfree(match_value->value); + return -ENOMEM; +} + +static int mlxsw_sp_erif_entry_get(struct mlxsw_sp *mlxsw_sp, + struct devlink_dpipe_entry *entry, + struct mlxsw_sp_rif *rif, + bool counters_enabled) +{ + u32 *action_value; + u32 *rif_value; + u64 cnt; + int err; + + /* Set Match RIF index */ + rif_value = entry->match_values->value; + *rif_value = mlxsw_sp_rif_index(rif); + entry->match_values->mapping_value = mlxsw_sp_rif_dev_ifindex(rif); + entry->match_values->mapping_valid = true; + + /* Set Action Forwarding */ + action_value = entry->action_values->value; + *action_value = 1; + + entry->counter_valid = false; + entry->counter = 0; + entry->index = mlxsw_sp_rif_index(rif); + + if (!counters_enabled) + return 0; + + err = mlxsw_sp_rif_counter_value_get(mlxsw_sp, rif, + MLXSW_SP_RIF_COUNTER_EGRESS, + &cnt); + if (!err) { + entry->counter = cnt; + entry->counter_valid = true; + } + return 0; +} + +static int +mlxsw_sp_dpipe_table_erif_entries_dump(void *priv, bool counters_enabled, + struct devlink_dpipe_dump_ctx *dump_ctx) +{ + struct devlink_dpipe_value match_value, action_value; + struct devlink_dpipe_action action = {0}; + struct devlink_dpipe_match match = {0}; + struct devlink_dpipe_entry entry = {0}; + struct mlxsw_sp *mlxsw_sp = priv; + unsigned int rif_count; + int i, j; + int err; + + memset(&match_value, 0, sizeof(match_value)); + memset(&action_value, 0, sizeof(action_value)); + + mlxsw_sp_erif_match_action_prepare(&match, &action); + err = mlxsw_sp_erif_entry_prepare(&entry, &match_value, &match, + &action_value, &action); + if (err) + return err; + + rif_count = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); + mutex_lock(&mlxsw_sp->router->lock); + i = 0; +start_again: + err = devlink_dpipe_entry_ctx_prepare(dump_ctx); + if (err) + goto err_ctx_prepare; + j = 0; + for (; i < rif_count; i++) { + struct mlxsw_sp_rif *rif = mlxsw_sp_rif_by_index(mlxsw_sp, i); + + if (!rif || !mlxsw_sp_rif_dev(rif)) + continue; + err = mlxsw_sp_erif_entry_get(mlxsw_sp, &entry, rif, + counters_enabled); + if (err) + goto err_entry_get; + err = devlink_dpipe_entry_ctx_append(dump_ctx, &entry); + if (err) { + if (err == -EMSGSIZE) { + if (!j) + goto err_entry_append; + break; + } + goto err_entry_append; + } + j++; + } + + devlink_dpipe_entry_ctx_close(dump_ctx); + if (i != rif_count) + goto start_again; + mutex_unlock(&mlxsw_sp->router->lock); + + devlink_dpipe_entry_clear(&entry); + return 0; +err_entry_append: +err_entry_get: +err_ctx_prepare: + mutex_unlock(&mlxsw_sp->router->lock); + devlink_dpipe_entry_clear(&entry); + return err; +} + +static int mlxsw_sp_dpipe_table_erif_counters_update(void *priv, bool enable) +{ + struct mlxsw_sp *mlxsw_sp = priv; + int i; + + mutex_lock(&mlxsw_sp->router->lock); + for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++) { + struct mlxsw_sp_rif *rif = mlxsw_sp_rif_by_index(mlxsw_sp, i); + + if (!rif) + continue; + if (enable) + mlxsw_sp_rif_counter_alloc(rif, + MLXSW_SP_RIF_COUNTER_EGRESS); + else + mlxsw_sp_rif_counter_free(rif, + MLXSW_SP_RIF_COUNTER_EGRESS); + } + mutex_unlock(&mlxsw_sp->router->lock); + return 0; +} + +static u64 mlxsw_sp_dpipe_table_erif_size_get(void *priv) +{ + struct mlxsw_sp *mlxsw_sp = priv; + + return MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); +} + +static struct devlink_dpipe_table_ops mlxsw_sp_erif_ops = { + .matches_dump = mlxsw_sp_dpipe_table_erif_matches_dump, + .actions_dump = mlxsw_sp_dpipe_table_erif_actions_dump, + .entries_dump = mlxsw_sp_dpipe_table_erif_entries_dump, + .counters_set_update = mlxsw_sp_dpipe_table_erif_counters_update, + .size_get = mlxsw_sp_dpipe_table_erif_size_get, +}; + +static int mlxsw_sp_dpipe_erif_table_init(struct mlxsw_sp *mlxsw_sp) +{ + struct devlink *devlink = priv_to_devlink(mlxsw_sp->core); + + return devl_dpipe_table_register(devlink, + MLXSW_SP_DPIPE_TABLE_NAME_ERIF, + &mlxsw_sp_erif_ops, + mlxsw_sp, false); +} + +static void mlxsw_sp_dpipe_erif_table_fini(struct mlxsw_sp *mlxsw_sp) +{ + struct devlink *devlink = priv_to_devlink(mlxsw_sp->core); + + devl_dpipe_table_unregister(devlink, MLXSW_SP_DPIPE_TABLE_NAME_ERIF); +} + +static int mlxsw_sp_dpipe_table_host_matches_dump(struct sk_buff *skb, int type) +{ + struct devlink_dpipe_match match = {0}; + int err; + + match.type = DEVLINK_DPIPE_MATCH_TYPE_FIELD_EXACT; + match.header = &mlxsw_sp_dpipe_header_metadata; + match.field_id = MLXSW_SP_DPIPE_FIELD_METADATA_ERIF_PORT; + + err = devlink_dpipe_match_put(skb, &match); + if (err) + return err; + + switch (type) { + case AF_INET: + match.type = DEVLINK_DPIPE_MATCH_TYPE_FIELD_EXACT; + match.header = &devlink_dpipe_header_ipv4; + match.field_id = DEVLINK_DPIPE_FIELD_IPV4_DST_IP; + break; + case AF_INET6: + match.type = DEVLINK_DPIPE_MATCH_TYPE_FIELD_EXACT; + match.header = &devlink_dpipe_header_ipv6; + match.field_id = DEVLINK_DPIPE_FIELD_IPV6_DST_IP; + break; + default: + WARN_ON(1); + return -EINVAL; + } + + return devlink_dpipe_match_put(skb, &match); +} + +static int +mlxsw_sp_dpipe_table_host4_matches_dump(void *priv, struct sk_buff *skb) +{ + return mlxsw_sp_dpipe_table_host_matches_dump(skb, AF_INET); +} + +static int +mlxsw_sp_dpipe_table_host_actions_dump(void *priv, struct sk_buff *skb) +{ + struct devlink_dpipe_action action = {0}; + + action.type = DEVLINK_DPIPE_ACTION_TYPE_FIELD_MODIFY; + action.header = &devlink_dpipe_header_ethernet; + action.field_id = DEVLINK_DPIPE_FIELD_ETHERNET_DST_MAC; + + return devlink_dpipe_action_put(skb, &action); +} + +enum mlxsw_sp_dpipe_table_host_match { + MLXSW_SP_DPIPE_TABLE_HOST_MATCH_RIF, + MLXSW_SP_DPIPE_TABLE_HOST_MATCH_DIP, + MLXSW_SP_DPIPE_TABLE_HOST_MATCH_COUNT, +}; + +static void +mlxsw_sp_dpipe_table_host_match_action_prepare(struct devlink_dpipe_match *matches, + struct devlink_dpipe_action *action, + int type) +{ + struct devlink_dpipe_match *match; + + match = &matches[MLXSW_SP_DPIPE_TABLE_HOST_MATCH_RIF]; + match->type = DEVLINK_DPIPE_MATCH_TYPE_FIELD_EXACT; + match->header = &mlxsw_sp_dpipe_header_metadata; + match->field_id = MLXSW_SP_DPIPE_FIELD_METADATA_ERIF_PORT; + + match = &matches[MLXSW_SP_DPIPE_TABLE_HOST_MATCH_DIP]; + match->type = DEVLINK_DPIPE_MATCH_TYPE_FIELD_EXACT; + switch (type) { + case AF_INET: + match->header = &devlink_dpipe_header_ipv4; + match->field_id = DEVLINK_DPIPE_FIELD_IPV4_DST_IP; + break; + case AF_INET6: + match->header = &devlink_dpipe_header_ipv6; + match->field_id = DEVLINK_DPIPE_FIELD_IPV6_DST_IP; + break; + default: + WARN_ON(1); + return; + } + + action->type = DEVLINK_DPIPE_ACTION_TYPE_FIELD_MODIFY; + action->header = &devlink_dpipe_header_ethernet; + action->field_id = DEVLINK_DPIPE_FIELD_ETHERNET_DST_MAC; +} + +static int +mlxsw_sp_dpipe_table_host_entry_prepare(struct devlink_dpipe_entry *entry, + struct devlink_dpipe_value *match_values, + struct devlink_dpipe_match *matches, + struct devlink_dpipe_value *action_value, + struct devlink_dpipe_action *action, + int type) +{ + struct devlink_dpipe_value *match_value; + struct devlink_dpipe_match *match; + + entry->match_values = match_values; + entry->match_values_count = MLXSW_SP_DPIPE_TABLE_HOST_MATCH_COUNT; + + entry->action_values = action_value; + entry->action_values_count = 1; + + match = &matches[MLXSW_SP_DPIPE_TABLE_HOST_MATCH_RIF]; + match_value = &match_values[MLXSW_SP_DPIPE_TABLE_HOST_MATCH_RIF]; + + match_value->match = match; + match_value->value_size = sizeof(u32); + match_value->value = kmalloc(match_value->value_size, GFP_KERNEL); + if (!match_value->value) + return -ENOMEM; + + match = &matches[MLXSW_SP_DPIPE_TABLE_HOST_MATCH_DIP]; + match_value = &match_values[MLXSW_SP_DPIPE_TABLE_HOST_MATCH_DIP]; + + match_value->match = match; + switch (type) { + case AF_INET: + match_value->value_size = sizeof(u32); + break; + case AF_INET6: + match_value->value_size = sizeof(struct in6_addr); + break; + default: + WARN_ON(1); + return -EINVAL; + } + + match_value->value = kmalloc(match_value->value_size, GFP_KERNEL); + if (!match_value->value) + return -ENOMEM; + + action_value->action = action; + action_value->value_size = sizeof(u64); + action_value->value = kmalloc(action_value->value_size, GFP_KERNEL); + if (!action_value->value) + return -ENOMEM; + + return 0; +} + +static void +__mlxsw_sp_dpipe_table_host_entry_fill(struct devlink_dpipe_entry *entry, + struct mlxsw_sp_rif *rif, + unsigned char *ha, void *dip) +{ + struct devlink_dpipe_value *value; + u32 *rif_value; + u8 *ha_value; + + /* Set Match RIF index */ + value = &entry->match_values[MLXSW_SP_DPIPE_TABLE_HOST_MATCH_RIF]; + + rif_value = value->value; + *rif_value = mlxsw_sp_rif_index(rif); + value->mapping_value = mlxsw_sp_rif_dev_ifindex(rif); + value->mapping_valid = true; + + /* Set Match DIP */ + value = &entry->match_values[MLXSW_SP_DPIPE_TABLE_HOST_MATCH_DIP]; + memcpy(value->value, dip, value->value_size); + + /* Set Action DMAC */ + value = entry->action_values; + ha_value = value->value; + ether_addr_copy(ha_value, ha); +} + +static void +mlxsw_sp_dpipe_table_host4_entry_fill(struct devlink_dpipe_entry *entry, + struct mlxsw_sp_neigh_entry *neigh_entry, + struct mlxsw_sp_rif *rif) +{ + unsigned char *ha; + u32 dip; + + ha = mlxsw_sp_neigh_entry_ha(neigh_entry); + dip = mlxsw_sp_neigh4_entry_dip(neigh_entry); + __mlxsw_sp_dpipe_table_host_entry_fill(entry, rif, ha, &dip); +} + +static void +mlxsw_sp_dpipe_table_host6_entry_fill(struct devlink_dpipe_entry *entry, + struct mlxsw_sp_neigh_entry *neigh_entry, + struct mlxsw_sp_rif *rif) +{ + struct in6_addr *dip; + unsigned char *ha; + + ha = mlxsw_sp_neigh_entry_ha(neigh_entry); + dip = mlxsw_sp_neigh6_entry_dip(neigh_entry); + + __mlxsw_sp_dpipe_table_host_entry_fill(entry, rif, ha, dip); +} + +static void +mlxsw_sp_dpipe_table_host_entry_fill(struct mlxsw_sp *mlxsw_sp, + struct devlink_dpipe_entry *entry, + struct mlxsw_sp_neigh_entry *neigh_entry, + struct mlxsw_sp_rif *rif, + int type) +{ + int err; + + switch (type) { + case AF_INET: + mlxsw_sp_dpipe_table_host4_entry_fill(entry, neigh_entry, rif); + break; + case AF_INET6: + mlxsw_sp_dpipe_table_host6_entry_fill(entry, neigh_entry, rif); + break; + default: + WARN_ON(1); + return; + } + + err = mlxsw_sp_neigh_counter_get(mlxsw_sp, neigh_entry, + &entry->counter); + if (!err) + entry->counter_valid = true; +} + +static int +mlxsw_sp_dpipe_table_host_entries_get(struct mlxsw_sp *mlxsw_sp, + struct devlink_dpipe_entry *entry, + bool counters_enabled, + struct devlink_dpipe_dump_ctx *dump_ctx, + int type) +{ + int rif_neigh_count = 0; + int rif_neigh_skip = 0; + int neigh_count = 0; + int rif_count; + int i, j; + int err; + + mutex_lock(&mlxsw_sp->router->lock); + i = 0; + rif_count = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); +start_again: + err = devlink_dpipe_entry_ctx_prepare(dump_ctx); + if (err) + goto err_ctx_prepare; + j = 0; + rif_neigh_skip = rif_neigh_count; + for (; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++) { + struct mlxsw_sp_rif *rif = mlxsw_sp_rif_by_index(mlxsw_sp, i); + struct mlxsw_sp_neigh_entry *neigh_entry; + + if (!rif) + continue; + + rif_neigh_count = 0; + mlxsw_sp_rif_neigh_for_each(neigh_entry, rif) { + int neigh_type = mlxsw_sp_neigh_entry_type(neigh_entry); + + if (neigh_type != type) + continue; + + if (neigh_type == AF_INET6 && + mlxsw_sp_neigh_ipv6_ignore(neigh_entry)) + continue; + + if (rif_neigh_count < rif_neigh_skip) + goto skip; + + mlxsw_sp_dpipe_table_host_entry_fill(mlxsw_sp, entry, + neigh_entry, rif, + type); + entry->index = neigh_count; + err = devlink_dpipe_entry_ctx_append(dump_ctx, entry); + if (err) { + if (err == -EMSGSIZE) { + if (!j) + goto err_entry_append; + else + goto out; + } + goto err_entry_append; + } + neigh_count++; + j++; +skip: + rif_neigh_count++; + } + rif_neigh_skip = 0; + } +out: + devlink_dpipe_entry_ctx_close(dump_ctx); + if (i != rif_count) + goto start_again; + + mutex_unlock(&mlxsw_sp->router->lock); + return 0; + +err_ctx_prepare: +err_entry_append: + mutex_unlock(&mlxsw_sp->router->lock); + return err; +} + +static int +mlxsw_sp_dpipe_table_host_entries_dump(struct mlxsw_sp *mlxsw_sp, + bool counters_enabled, + struct devlink_dpipe_dump_ctx *dump_ctx, + int type) +{ + struct devlink_dpipe_value match_values[MLXSW_SP_DPIPE_TABLE_HOST_MATCH_COUNT]; + struct devlink_dpipe_match matches[MLXSW_SP_DPIPE_TABLE_HOST_MATCH_COUNT]; + struct devlink_dpipe_value action_value; + struct devlink_dpipe_action action = {0}; + struct devlink_dpipe_entry entry = {0}; + int err; + + memset(matches, 0, MLXSW_SP_DPIPE_TABLE_HOST_MATCH_COUNT * + sizeof(matches[0])); + memset(match_values, 0, MLXSW_SP_DPIPE_TABLE_HOST_MATCH_COUNT * + sizeof(match_values[0])); + memset(&action_value, 0, sizeof(action_value)); + + mlxsw_sp_dpipe_table_host_match_action_prepare(matches, &action, type); + err = mlxsw_sp_dpipe_table_host_entry_prepare(&entry, match_values, + matches, &action_value, + &action, type); + if (err) + goto out; + + err = mlxsw_sp_dpipe_table_host_entries_get(mlxsw_sp, &entry, + counters_enabled, dump_ctx, + type); +out: + devlink_dpipe_entry_clear(&entry); + return err; +} + +static int +mlxsw_sp_dpipe_table_host4_entries_dump(void *priv, bool counters_enabled, + struct devlink_dpipe_dump_ctx *dump_ctx) +{ + struct mlxsw_sp *mlxsw_sp = priv; + + return mlxsw_sp_dpipe_table_host_entries_dump(mlxsw_sp, + counters_enabled, + dump_ctx, AF_INET); +} + +static void +mlxsw_sp_dpipe_table_host_counters_update(struct mlxsw_sp *mlxsw_sp, + bool enable, int type) +{ + int i; + + mutex_lock(&mlxsw_sp->router->lock); + for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++) { + struct mlxsw_sp_rif *rif = mlxsw_sp_rif_by_index(mlxsw_sp, i); + struct mlxsw_sp_neigh_entry *neigh_entry; + + if (!rif) + continue; + mlxsw_sp_rif_neigh_for_each(neigh_entry, rif) { + int neigh_type = mlxsw_sp_neigh_entry_type(neigh_entry); + + if (neigh_type != type) + continue; + + if (neigh_type == AF_INET6 && + mlxsw_sp_neigh_ipv6_ignore(neigh_entry)) + continue; + + mlxsw_sp_neigh_entry_counter_update(mlxsw_sp, + neigh_entry, + enable); + } + } + mutex_unlock(&mlxsw_sp->router->lock); +} + +static int mlxsw_sp_dpipe_table_host4_counters_update(void *priv, bool enable) +{ + struct mlxsw_sp *mlxsw_sp = priv; + + mlxsw_sp_dpipe_table_host_counters_update(mlxsw_sp, enable, AF_INET); + return 0; +} + +static u64 +mlxsw_sp_dpipe_table_host_size_get(struct mlxsw_sp *mlxsw_sp, int type) +{ + u64 size = 0; + int i; + + mutex_lock(&mlxsw_sp->router->lock); + for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++) { + struct mlxsw_sp_rif *rif = mlxsw_sp_rif_by_index(mlxsw_sp, i); + struct mlxsw_sp_neigh_entry *neigh_entry; + + if (!rif) + continue; + mlxsw_sp_rif_neigh_for_each(neigh_entry, rif) { + int neigh_type = mlxsw_sp_neigh_entry_type(neigh_entry); + + if (neigh_type != type) + continue; + + if (neigh_type == AF_INET6 && + mlxsw_sp_neigh_ipv6_ignore(neigh_entry)) + continue; + + size++; + } + } + mutex_unlock(&mlxsw_sp->router->lock); + + return size; +} + +static u64 mlxsw_sp_dpipe_table_host4_size_get(void *priv) +{ + struct mlxsw_sp *mlxsw_sp = priv; + + return mlxsw_sp_dpipe_table_host_size_get(mlxsw_sp, AF_INET); +} + +static struct devlink_dpipe_table_ops mlxsw_sp_host4_ops = { + .matches_dump = mlxsw_sp_dpipe_table_host4_matches_dump, + .actions_dump = mlxsw_sp_dpipe_table_host_actions_dump, + .entries_dump = mlxsw_sp_dpipe_table_host4_entries_dump, + .counters_set_update = mlxsw_sp_dpipe_table_host4_counters_update, + .size_get = mlxsw_sp_dpipe_table_host4_size_get, +}; + +#define MLXSW_SP_DPIPE_TABLE_RESOURCE_UNIT_HOST4 1 + +static int mlxsw_sp_dpipe_host4_table_init(struct mlxsw_sp *mlxsw_sp) +{ + struct devlink *devlink = priv_to_devlink(mlxsw_sp->core); + int err; + + err = devl_dpipe_table_register(devlink, + MLXSW_SP_DPIPE_TABLE_NAME_HOST4, + &mlxsw_sp_host4_ops, + mlxsw_sp, false); + if (err) + return err; + + err = devl_dpipe_table_resource_set(devlink, + MLXSW_SP_DPIPE_TABLE_NAME_HOST4, + MLXSW_SP_RESOURCE_KVD_HASH_SINGLE, + MLXSW_SP_DPIPE_TABLE_RESOURCE_UNIT_HOST4); + if (err) + goto err_resource_set; + + return 0; + +err_resource_set: + devl_dpipe_table_unregister(devlink, + MLXSW_SP_DPIPE_TABLE_NAME_HOST4); + return err; +} + +static void mlxsw_sp_dpipe_host4_table_fini(struct mlxsw_sp *mlxsw_sp) +{ + struct devlink *devlink = priv_to_devlink(mlxsw_sp->core); + + devl_dpipe_table_unregister(devlink, + MLXSW_SP_DPIPE_TABLE_NAME_HOST4); +} + +static int +mlxsw_sp_dpipe_table_host6_matches_dump(void *priv, struct sk_buff *skb) +{ + return mlxsw_sp_dpipe_table_host_matches_dump(skb, AF_INET6); +} + +static int +mlxsw_sp_dpipe_table_host6_entries_dump(void *priv, bool counters_enabled, + struct devlink_dpipe_dump_ctx *dump_ctx) +{ + struct mlxsw_sp *mlxsw_sp = priv; + + return mlxsw_sp_dpipe_table_host_entries_dump(mlxsw_sp, + counters_enabled, + dump_ctx, AF_INET6); +} + +static int mlxsw_sp_dpipe_table_host6_counters_update(void *priv, bool enable) +{ + struct mlxsw_sp *mlxsw_sp = priv; + + mlxsw_sp_dpipe_table_host_counters_update(mlxsw_sp, enable, AF_INET6); + return 0; +} + +static u64 mlxsw_sp_dpipe_table_host6_size_get(void *priv) +{ + struct mlxsw_sp *mlxsw_sp = priv; + + return mlxsw_sp_dpipe_table_host_size_get(mlxsw_sp, AF_INET6); +} + +static struct devlink_dpipe_table_ops mlxsw_sp_host6_ops = { + .matches_dump = mlxsw_sp_dpipe_table_host6_matches_dump, + .actions_dump = mlxsw_sp_dpipe_table_host_actions_dump, + .entries_dump = mlxsw_sp_dpipe_table_host6_entries_dump, + .counters_set_update = mlxsw_sp_dpipe_table_host6_counters_update, + .size_get = mlxsw_sp_dpipe_table_host6_size_get, +}; + +#define MLXSW_SP_DPIPE_TABLE_RESOURCE_UNIT_HOST6 2 + +static int mlxsw_sp_dpipe_host6_table_init(struct mlxsw_sp *mlxsw_sp) +{ + struct devlink *devlink = priv_to_devlink(mlxsw_sp->core); + int err; + + err = devl_dpipe_table_register(devlink, + MLXSW_SP_DPIPE_TABLE_NAME_HOST6, + &mlxsw_sp_host6_ops, + mlxsw_sp, false); + if (err) + return err; + + err = devl_dpipe_table_resource_set(devlink, + MLXSW_SP_DPIPE_TABLE_NAME_HOST6, + MLXSW_SP_RESOURCE_KVD_HASH_DOUBLE, + MLXSW_SP_DPIPE_TABLE_RESOURCE_UNIT_HOST6); + if (err) + goto err_resource_set; + + return 0; + +err_resource_set: + devl_dpipe_table_unregister(devlink, + MLXSW_SP_DPIPE_TABLE_NAME_HOST6); + return err; +} + +static void mlxsw_sp_dpipe_host6_table_fini(struct mlxsw_sp *mlxsw_sp) +{ + struct devlink *devlink = priv_to_devlink(mlxsw_sp->core); + + devl_dpipe_table_unregister(devlink, + MLXSW_SP_DPIPE_TABLE_NAME_HOST6); +} + +static int mlxsw_sp_dpipe_table_adj_matches_dump(void *priv, + struct sk_buff *skb) +{ + struct devlink_dpipe_match match = {0}; + int err; + + match.type = DEVLINK_DPIPE_MATCH_TYPE_FIELD_EXACT; + match.header = &mlxsw_sp_dpipe_header_metadata; + match.field_id = MLXSW_SP_DPIPE_FIELD_METADATA_ADJ_INDEX; + + err = devlink_dpipe_match_put(skb, &match); + if (err) + return err; + + match.type = DEVLINK_DPIPE_MATCH_TYPE_FIELD_EXACT; + match.header = &mlxsw_sp_dpipe_header_metadata; + match.field_id = MLXSW_SP_DPIPE_FIELD_METADATA_ADJ_SIZE; + + err = devlink_dpipe_match_put(skb, &match); + if (err) + return err; + + match.type = DEVLINK_DPIPE_MATCH_TYPE_FIELD_EXACT; + match.header = &mlxsw_sp_dpipe_header_metadata; + match.field_id = MLXSW_SP_DPIPE_FIELD_METADATA_ADJ_HASH_INDEX; + + return devlink_dpipe_match_put(skb, &match); +} + +static int mlxsw_sp_dpipe_table_adj_actions_dump(void *priv, + struct sk_buff *skb) +{ + struct devlink_dpipe_action action = {0}; + int err; + + action.type = DEVLINK_DPIPE_ACTION_TYPE_FIELD_MODIFY; + action.header = &devlink_dpipe_header_ethernet; + action.field_id = DEVLINK_DPIPE_FIELD_ETHERNET_DST_MAC; + + err = devlink_dpipe_action_put(skb, &action); + if (err) + return err; + + action.type = DEVLINK_DPIPE_ACTION_TYPE_FIELD_MODIFY; + action.header = &mlxsw_sp_dpipe_header_metadata; + action.field_id = MLXSW_SP_DPIPE_FIELD_METADATA_ERIF_PORT; + + return devlink_dpipe_action_put(skb, &action); +} + +static u64 mlxsw_sp_dpipe_table_adj_size(struct mlxsw_sp *mlxsw_sp) +{ + struct mlxsw_sp_nexthop *nh; + u64 size = 0; + + mlxsw_sp_nexthop_for_each(nh, mlxsw_sp->router) + if (mlxsw_sp_nexthop_is_forward(nh) && + !mlxsw_sp_nexthop_group_has_ipip(nh)) + size++; + return size; +} + +enum mlxsw_sp_dpipe_table_adj_match { + MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_INDEX, + MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_SIZE, + MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_HASH_INDEX, + MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_COUNT, +}; + +enum mlxsw_sp_dpipe_table_adj_action { + MLXSW_SP_DPIPE_TABLE_ADJ_ACTION_DST_MAC, + MLXSW_SP_DPIPE_TABLE_ADJ_ACTION_ERIF_PORT, + MLXSW_SP_DPIPE_TABLE_ADJ_ACTION_COUNT, +}; + +static void +mlxsw_sp_dpipe_table_adj_match_action_prepare(struct devlink_dpipe_match *matches, + struct devlink_dpipe_action *actions) +{ + struct devlink_dpipe_action *action; + struct devlink_dpipe_match *match; + + match = &matches[MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_INDEX]; + match->type = DEVLINK_DPIPE_MATCH_TYPE_FIELD_EXACT; + match->header = &mlxsw_sp_dpipe_header_metadata; + match->field_id = MLXSW_SP_DPIPE_FIELD_METADATA_ADJ_INDEX; + + match = &matches[MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_SIZE]; + match->type = DEVLINK_DPIPE_MATCH_TYPE_FIELD_EXACT; + match->header = &mlxsw_sp_dpipe_header_metadata; + match->field_id = MLXSW_SP_DPIPE_FIELD_METADATA_ADJ_SIZE; + + match = &matches[MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_HASH_INDEX]; + match->type = DEVLINK_DPIPE_MATCH_TYPE_FIELD_EXACT; + match->header = &mlxsw_sp_dpipe_header_metadata; + match->field_id = MLXSW_SP_DPIPE_FIELD_METADATA_ADJ_HASH_INDEX; + + action = &actions[MLXSW_SP_DPIPE_TABLE_ADJ_ACTION_DST_MAC]; + action->type = DEVLINK_DPIPE_ACTION_TYPE_FIELD_MODIFY; + action->header = &devlink_dpipe_header_ethernet; + action->field_id = DEVLINK_DPIPE_FIELD_ETHERNET_DST_MAC; + + action = &actions[MLXSW_SP_DPIPE_TABLE_ADJ_ACTION_ERIF_PORT]; + action->type = DEVLINK_DPIPE_ACTION_TYPE_FIELD_MODIFY; + action->header = &mlxsw_sp_dpipe_header_metadata; + action->field_id = MLXSW_SP_DPIPE_FIELD_METADATA_ERIF_PORT; +} + +static int +mlxsw_sp_dpipe_table_adj_entry_prepare(struct devlink_dpipe_entry *entry, + struct devlink_dpipe_value *match_values, + struct devlink_dpipe_match *matches, + struct devlink_dpipe_value *action_values, + struct devlink_dpipe_action *actions) +{ struct devlink_dpipe_value *action_value; + struct devlink_dpipe_value *match_value; + struct devlink_dpipe_action *action; + struct devlink_dpipe_match *match; + + entry->match_values = match_values; + entry->match_values_count = MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_COUNT; + + entry->action_values = action_values; + entry->action_values_count = MLXSW_SP_DPIPE_TABLE_ADJ_ACTION_COUNT; + + match = &matches[MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_INDEX]; + match_value = &match_values[MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_INDEX]; + + match_value->match = match; + match_value->value_size = sizeof(u32); + match_value->value = kmalloc(match_value->value_size, GFP_KERNEL); + if (!match_value->value) + return -ENOMEM; + + match = &matches[MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_SIZE]; + match_value = &match_values[MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_SIZE]; + + match_value->match = match; + match_value->value_size = sizeof(u32); + match_value->value = kmalloc(match_value->value_size, GFP_KERNEL); + if (!match_value->value) + return -ENOMEM; + + match = &matches[MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_HASH_INDEX]; + match_value = &match_values[MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_HASH_INDEX]; + + match_value->match = match; + match_value->value_size = sizeof(u32); + match_value->value = kmalloc(match_value->value_size, GFP_KERNEL); + if (!match_value->value) + return -ENOMEM; + + action = &actions[MLXSW_SP_DPIPE_TABLE_ADJ_ACTION_DST_MAC]; + action_value = &action_values[MLXSW_SP_DPIPE_TABLE_ADJ_ACTION_DST_MAC]; + + action_value->action = action; + action_value->value_size = sizeof(u64); + action_value->value = kmalloc(action_value->value_size, GFP_KERNEL); + if (!action_value->value) + return -ENOMEM; + + action = &actions[MLXSW_SP_DPIPE_TABLE_ADJ_ACTION_ERIF_PORT]; + action_value = &action_values[MLXSW_SP_DPIPE_TABLE_ADJ_ACTION_ERIF_PORT]; + + action_value->action = action; + action_value->value_size = sizeof(u32); + action_value->value = kmalloc(action_value->value_size, GFP_KERNEL); + if (!action_value->value) + return -ENOMEM; + + return 0; +} + +static void +__mlxsw_sp_dpipe_table_adj_entry_fill(struct devlink_dpipe_entry *entry, + u32 adj_index, u32 adj_size, + u32 adj_hash_index, unsigned char *ha, + struct mlxsw_sp_rif *rif) +{ + struct devlink_dpipe_value *value; + u32 *p_rif_value; + u32 *p_index; + + value = &entry->match_values[MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_INDEX]; + p_index = value->value; + *p_index = adj_index; + + value = &entry->match_values[MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_SIZE]; + p_index = value->value; + *p_index = adj_size; + + value = &entry->match_values[MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_HASH_INDEX]; + p_index = value->value; + *p_index = adj_hash_index; + + value = &entry->action_values[MLXSW_SP_DPIPE_TABLE_ADJ_ACTION_DST_MAC]; + ether_addr_copy(value->value, ha); + + value = &entry->action_values[MLXSW_SP_DPIPE_TABLE_ADJ_ACTION_ERIF_PORT]; + p_rif_value = value->value; + *p_rif_value = mlxsw_sp_rif_index(rif); + value->mapping_value = mlxsw_sp_rif_dev_ifindex(rif); + value->mapping_valid = true; +} + +static void mlxsw_sp_dpipe_table_adj_entry_fill(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop *nh, + struct devlink_dpipe_entry *entry) +{ + struct mlxsw_sp_rif *rif = mlxsw_sp_nexthop_rif(nh); + unsigned char *ha = mlxsw_sp_nexthop_ha(nh); + u32 adj_hash_index = 0; + u32 adj_index = 0; + u32 adj_size = 0; + int err; + + mlxsw_sp_nexthop_indexes(nh, &adj_index, &adj_size, &adj_hash_index); + __mlxsw_sp_dpipe_table_adj_entry_fill(entry, adj_index, adj_size, + adj_hash_index, ha, rif); + err = mlxsw_sp_nexthop_counter_get(mlxsw_sp, nh, &entry->counter); + if (!err) + entry->counter_valid = true; +} + +static int +mlxsw_sp_dpipe_table_adj_entries_get(struct mlxsw_sp *mlxsw_sp, + struct devlink_dpipe_entry *entry, + bool counters_enabled, + struct devlink_dpipe_dump_ctx *dump_ctx) +{ + struct mlxsw_sp_nexthop *nh; + int entry_index = 0; + int nh_count_max; + int nh_count = 0; + int nh_skip; + int j; + int err; + + mutex_lock(&mlxsw_sp->router->lock); + nh_count_max = mlxsw_sp_dpipe_table_adj_size(mlxsw_sp); +start_again: + err = devlink_dpipe_entry_ctx_prepare(dump_ctx); + if (err) + goto err_ctx_prepare; + j = 0; + nh_skip = nh_count; + nh_count = 0; + mlxsw_sp_nexthop_for_each(nh, mlxsw_sp->router) { + if (!mlxsw_sp_nexthop_is_forward(nh) || + mlxsw_sp_nexthop_group_has_ipip(nh)) + continue; + + if (nh_count < nh_skip) + goto skip; + + mlxsw_sp_dpipe_table_adj_entry_fill(mlxsw_sp, nh, entry); + entry->index = entry_index; + err = devlink_dpipe_entry_ctx_append(dump_ctx, entry); + if (err) { + if (err == -EMSGSIZE) { + if (!j) + goto err_entry_append; + break; + } + goto err_entry_append; + } + entry_index++; + j++; +skip: + nh_count++; + } + + devlink_dpipe_entry_ctx_close(dump_ctx); + if (nh_count != nh_count_max) + goto start_again; + mutex_unlock(&mlxsw_sp->router->lock); + + return 0; + +err_ctx_prepare: +err_entry_append: + mutex_unlock(&mlxsw_sp->router->lock); + return err; +} + +static int +mlxsw_sp_dpipe_table_adj_entries_dump(void *priv, bool counters_enabled, + struct devlink_dpipe_dump_ctx *dump_ctx) +{ + struct devlink_dpipe_value action_values[MLXSW_SP_DPIPE_TABLE_ADJ_ACTION_COUNT]; + struct devlink_dpipe_value match_values[MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_COUNT]; + struct devlink_dpipe_action actions[MLXSW_SP_DPIPE_TABLE_ADJ_ACTION_COUNT]; + struct devlink_dpipe_match matches[MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_COUNT]; + struct devlink_dpipe_entry entry = {0}; + struct mlxsw_sp *mlxsw_sp = priv; + int err; + + memset(matches, 0, MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_COUNT * + sizeof(matches[0])); + memset(match_values, 0, MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_COUNT * + sizeof(match_values[0])); + memset(actions, 0, MLXSW_SP_DPIPE_TABLE_ADJ_ACTION_COUNT * + sizeof(actions[0])); + memset(action_values, 0, MLXSW_SP_DPIPE_TABLE_ADJ_ACTION_COUNT * + sizeof(action_values[0])); + + mlxsw_sp_dpipe_table_adj_match_action_prepare(matches, actions); + err = mlxsw_sp_dpipe_table_adj_entry_prepare(&entry, + match_values, matches, + action_values, actions); + if (err) + goto out; + + err = mlxsw_sp_dpipe_table_adj_entries_get(mlxsw_sp, &entry, + counters_enabled, dump_ctx); +out: + devlink_dpipe_entry_clear(&entry); + return err; +} + +static int mlxsw_sp_dpipe_table_adj_counters_update(void *priv, bool enable) +{ + char ratr_pl[MLXSW_REG_RATR_LEN]; + struct mlxsw_sp *mlxsw_sp = priv; + struct mlxsw_sp_nexthop *nh; + u32 adj_hash_index = 0; + u32 adj_index = 0; + u32 adj_size = 0; + + mlxsw_sp_nexthop_for_each(nh, mlxsw_sp->router) { + if (!mlxsw_sp_nexthop_is_forward(nh) || + mlxsw_sp_nexthop_group_has_ipip(nh)) + continue; + + mlxsw_sp_nexthop_indexes(nh, &adj_index, &adj_size, + &adj_hash_index); + if (enable) + mlxsw_sp_nexthop_counter_alloc(mlxsw_sp, nh); + else + mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh); + mlxsw_sp_nexthop_eth_update(mlxsw_sp, + adj_index + adj_hash_index, nh, + true, ratr_pl); + } + return 0; +} + +static u64 +mlxsw_sp_dpipe_table_adj_size_get(void *priv) +{ + struct mlxsw_sp *mlxsw_sp = priv; + u64 size; + + mutex_lock(&mlxsw_sp->router->lock); + size = mlxsw_sp_dpipe_table_adj_size(mlxsw_sp); + mutex_unlock(&mlxsw_sp->router->lock); + + return size; +} + +static struct devlink_dpipe_table_ops mlxsw_sp_dpipe_table_adj_ops = { + .matches_dump = mlxsw_sp_dpipe_table_adj_matches_dump, + .actions_dump = mlxsw_sp_dpipe_table_adj_actions_dump, + .entries_dump = mlxsw_sp_dpipe_table_adj_entries_dump, + .counters_set_update = mlxsw_sp_dpipe_table_adj_counters_update, + .size_get = mlxsw_sp_dpipe_table_adj_size_get, +}; + +#define MLXSW_SP_DPIPE_TABLE_RESOURCE_UNIT_ADJ 1 + +static int mlxsw_sp_dpipe_adj_table_init(struct mlxsw_sp *mlxsw_sp) +{ + struct devlink *devlink = priv_to_devlink(mlxsw_sp->core); + int err; + + err = devl_dpipe_table_register(devlink, + MLXSW_SP_DPIPE_TABLE_NAME_ADJ, + &mlxsw_sp_dpipe_table_adj_ops, + mlxsw_sp, false); + if (err) + return err; + + err = devl_dpipe_table_resource_set(devlink, + MLXSW_SP_DPIPE_TABLE_NAME_ADJ, + MLXSW_SP_RESOURCE_KVD_LINEAR, + MLXSW_SP_DPIPE_TABLE_RESOURCE_UNIT_ADJ); + if (err) + goto err_resource_set; + + return 0; + +err_resource_set: + devl_dpipe_table_unregister(devlink, + MLXSW_SP_DPIPE_TABLE_NAME_ADJ); + return err; +} + +static void mlxsw_sp_dpipe_adj_table_fini(struct mlxsw_sp *mlxsw_sp) +{ + struct devlink *devlink = priv_to_devlink(mlxsw_sp->core); + + devl_dpipe_table_unregister(devlink, + MLXSW_SP_DPIPE_TABLE_NAME_ADJ); +} + +int mlxsw_sp_dpipe_init(struct mlxsw_sp *mlxsw_sp) +{ + struct devlink *devlink = priv_to_devlink(mlxsw_sp->core); + int err; + + devl_dpipe_headers_register(devlink, &mlxsw_sp_dpipe_headers); + + err = mlxsw_sp_dpipe_erif_table_init(mlxsw_sp); + if (err) + goto err_erif_table_init; + + err = mlxsw_sp_dpipe_host4_table_init(mlxsw_sp); + if (err) + goto err_host4_table_init; + + err = mlxsw_sp_dpipe_host6_table_init(mlxsw_sp); + if (err) + goto err_host6_table_init; + + err = mlxsw_sp_dpipe_adj_table_init(mlxsw_sp); + if (err) + goto err_adj_table_init; + + return 0; +err_adj_table_init: + mlxsw_sp_dpipe_host6_table_fini(mlxsw_sp); +err_host6_table_init: + mlxsw_sp_dpipe_host4_table_fini(mlxsw_sp); +err_host4_table_init: + mlxsw_sp_dpipe_erif_table_fini(mlxsw_sp); +err_erif_table_init: + devl_dpipe_headers_unregister(priv_to_devlink(mlxsw_sp->core)); + return err; +} + +void mlxsw_sp_dpipe_fini(struct mlxsw_sp *mlxsw_sp) +{ + struct devlink *devlink = priv_to_devlink(mlxsw_sp->core); + + mlxsw_sp_dpipe_adj_table_fini(mlxsw_sp); + mlxsw_sp_dpipe_host6_table_fini(mlxsw_sp); + mlxsw_sp_dpipe_host4_table_fini(mlxsw_sp); + mlxsw_sp_dpipe_erif_table_fini(mlxsw_sp); + devl_dpipe_headers_unregister(devlink); +} diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.h new file mode 100644 index 000000000..246dbb3c0 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 */ +/* Copyright (c) 2017-2018 Mellanox Technologies. All rights reserved */ + +#ifndef _MLXSW_PIPELINE_H_ +#define _MLXSW_PIPELINE_H_ + +int mlxsw_sp_dpipe_init(struct mlxsw_sp *mlxsw_sp); +void mlxsw_sp_dpipe_fini(struct mlxsw_sp *mlxsw_sp); + +#define MLXSW_SP_DPIPE_TABLE_NAME_ERIF "mlxsw_erif" +#define MLXSW_SP_DPIPE_TABLE_NAME_HOST4 "mlxsw_host4" +#define MLXSW_SP_DPIPE_TABLE_NAME_HOST6 "mlxsw_host6" +#define MLXSW_SP_DPIPE_TABLE_NAME_ADJ "mlxsw_adj" + +#endif /* _MLXSW_PIPELINE_H_*/ diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ethtool.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ethtool.c new file mode 100644 index 000000000..dcd79d7e2 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ethtool.c @@ -0,0 +1,2023 @@ +// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 +/* Copyright (c) 2020 Mellanox Technologies. All rights reserved */ + +#include "reg.h" +#include "core.h" +#include "spectrum.h" +#include "core_env.h" + +static const char mlxsw_sp_driver_version[] = "1.0"; + +static void mlxsw_sp_port_get_drvinfo(struct net_device *dev, + struct ethtool_drvinfo *drvinfo) +{ + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + + strscpy(drvinfo->driver, mlxsw_sp->bus_info->device_kind, + sizeof(drvinfo->driver)); + strscpy(drvinfo->version, mlxsw_sp_driver_version, + sizeof(drvinfo->version)); + snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version), + "%d.%d.%d", + mlxsw_sp->bus_info->fw_rev.major, + mlxsw_sp->bus_info->fw_rev.minor, + mlxsw_sp->bus_info->fw_rev.subminor); + strscpy(drvinfo->bus_info, mlxsw_sp->bus_info->device_name, + sizeof(drvinfo->bus_info)); +} + +struct mlxsw_sp_ethtool_link_ext_state_opcode_mapping { + u32 status_opcode; + enum ethtool_link_ext_state link_ext_state; + u8 link_ext_substate; +}; + +static const struct mlxsw_sp_ethtool_link_ext_state_opcode_mapping +mlxsw_sp_link_ext_state_opcode_map[] = { + {2, ETHTOOL_LINK_EXT_STATE_AUTONEG, + ETHTOOL_LINK_EXT_SUBSTATE_AN_NO_PARTNER_DETECTED}, + {3, ETHTOOL_LINK_EXT_STATE_AUTONEG, + ETHTOOL_LINK_EXT_SUBSTATE_AN_ACK_NOT_RECEIVED}, + {4, ETHTOOL_LINK_EXT_STATE_AUTONEG, + ETHTOOL_LINK_EXT_SUBSTATE_AN_NEXT_PAGE_EXCHANGE_FAILED}, + {36, ETHTOOL_LINK_EXT_STATE_AUTONEG, + ETHTOOL_LINK_EXT_SUBSTATE_AN_NO_PARTNER_DETECTED_FORCE_MODE}, + {38, ETHTOOL_LINK_EXT_STATE_AUTONEG, + ETHTOOL_LINK_EXT_SUBSTATE_AN_FEC_MISMATCH_DURING_OVERRIDE}, + {39, ETHTOOL_LINK_EXT_STATE_AUTONEG, + ETHTOOL_LINK_EXT_SUBSTATE_AN_NO_HCD}, + + {5, ETHTOOL_LINK_EXT_STATE_LINK_TRAINING_FAILURE, + ETHTOOL_LINK_EXT_SUBSTATE_LT_KR_FRAME_LOCK_NOT_ACQUIRED}, + {6, ETHTOOL_LINK_EXT_STATE_LINK_TRAINING_FAILURE, + ETHTOOL_LINK_EXT_SUBSTATE_LT_KR_LINK_INHIBIT_TIMEOUT}, + {7, ETHTOOL_LINK_EXT_STATE_LINK_TRAINING_FAILURE, + ETHTOOL_LINK_EXT_SUBSTATE_LT_KR_LINK_PARTNER_DID_NOT_SET_RECEIVER_READY}, + {8, ETHTOOL_LINK_EXT_STATE_LINK_TRAINING_FAILURE, 0}, + {14, ETHTOOL_LINK_EXT_STATE_LINK_TRAINING_FAILURE, + ETHTOOL_LINK_EXT_SUBSTATE_LT_REMOTE_FAULT}, + + {9, ETHTOOL_LINK_EXT_STATE_LINK_LOGICAL_MISMATCH, + ETHTOOL_LINK_EXT_SUBSTATE_LLM_PCS_DID_NOT_ACQUIRE_BLOCK_LOCK}, + {10, ETHTOOL_LINK_EXT_STATE_LINK_LOGICAL_MISMATCH, + ETHTOOL_LINK_EXT_SUBSTATE_LLM_PCS_DID_NOT_ACQUIRE_AM_LOCK}, + {11, ETHTOOL_LINK_EXT_STATE_LINK_LOGICAL_MISMATCH, + ETHTOOL_LINK_EXT_SUBSTATE_LLM_PCS_DID_NOT_GET_ALIGN_STATUS}, + {12, ETHTOOL_LINK_EXT_STATE_LINK_LOGICAL_MISMATCH, + ETHTOOL_LINK_EXT_SUBSTATE_LLM_FC_FEC_IS_NOT_LOCKED}, + {13, ETHTOOL_LINK_EXT_STATE_LINK_LOGICAL_MISMATCH, + ETHTOOL_LINK_EXT_SUBSTATE_LLM_RS_FEC_IS_NOT_LOCKED}, + + {15, ETHTOOL_LINK_EXT_STATE_BAD_SIGNAL_INTEGRITY, 0}, + {17, ETHTOOL_LINK_EXT_STATE_BAD_SIGNAL_INTEGRITY, + ETHTOOL_LINK_EXT_SUBSTATE_BSI_LARGE_NUMBER_OF_PHYSICAL_ERRORS}, + {42, ETHTOOL_LINK_EXT_STATE_BAD_SIGNAL_INTEGRITY, + ETHTOOL_LINK_EXT_SUBSTATE_BSI_UNSUPPORTED_RATE}, + + {1024, ETHTOOL_LINK_EXT_STATE_NO_CABLE, 0}, + + {16, ETHTOOL_LINK_EXT_STATE_CABLE_ISSUE, + ETHTOOL_LINK_EXT_SUBSTATE_CI_UNSUPPORTED_CABLE}, + {20, ETHTOOL_LINK_EXT_STATE_CABLE_ISSUE, + ETHTOOL_LINK_EXT_SUBSTATE_CI_UNSUPPORTED_CABLE}, + {29, ETHTOOL_LINK_EXT_STATE_CABLE_ISSUE, + ETHTOOL_LINK_EXT_SUBSTATE_CI_UNSUPPORTED_CABLE}, + {1025, ETHTOOL_LINK_EXT_STATE_CABLE_ISSUE, + ETHTOOL_LINK_EXT_SUBSTATE_CI_UNSUPPORTED_CABLE}, + {1029, ETHTOOL_LINK_EXT_STATE_CABLE_ISSUE, + ETHTOOL_LINK_EXT_SUBSTATE_CI_UNSUPPORTED_CABLE}, + {1031, ETHTOOL_LINK_EXT_STATE_CABLE_ISSUE, 0}, + + {1027, ETHTOOL_LINK_EXT_STATE_EEPROM_ISSUE, 0}, + + {23, ETHTOOL_LINK_EXT_STATE_CALIBRATION_FAILURE, 0}, + + {1032, ETHTOOL_LINK_EXT_STATE_POWER_BUDGET_EXCEEDED, 0}, + + {1030, ETHTOOL_LINK_EXT_STATE_OVERHEAT, 0}, + + {1042, ETHTOOL_LINK_EXT_STATE_MODULE, + ETHTOOL_LINK_EXT_SUBSTATE_MODULE_CMIS_NOT_READY}, +}; + +static void +mlxsw_sp_port_set_link_ext_state(struct mlxsw_sp_ethtool_link_ext_state_opcode_mapping + link_ext_state_mapping, + struct ethtool_link_ext_state_info *link_ext_state_info) +{ + switch (link_ext_state_mapping.link_ext_state) { + case ETHTOOL_LINK_EXT_STATE_AUTONEG: + link_ext_state_info->autoneg = + link_ext_state_mapping.link_ext_substate; + break; + case ETHTOOL_LINK_EXT_STATE_LINK_TRAINING_FAILURE: + link_ext_state_info->link_training = + link_ext_state_mapping.link_ext_substate; + break; + case ETHTOOL_LINK_EXT_STATE_LINK_LOGICAL_MISMATCH: + link_ext_state_info->link_logical_mismatch = + link_ext_state_mapping.link_ext_substate; + break; + case ETHTOOL_LINK_EXT_STATE_BAD_SIGNAL_INTEGRITY: + link_ext_state_info->bad_signal_integrity = + link_ext_state_mapping.link_ext_substate; + break; + case ETHTOOL_LINK_EXT_STATE_CABLE_ISSUE: + link_ext_state_info->cable_issue = + link_ext_state_mapping.link_ext_substate; + break; + case ETHTOOL_LINK_EXT_STATE_MODULE: + link_ext_state_info->module = + link_ext_state_mapping.link_ext_substate; + break; + default: + break; + } + + link_ext_state_info->link_ext_state = link_ext_state_mapping.link_ext_state; +} + +static int +mlxsw_sp_port_get_link_ext_state(struct net_device *dev, + struct ethtool_link_ext_state_info *link_ext_state_info) +{ + struct mlxsw_sp_ethtool_link_ext_state_opcode_mapping link_ext_state_mapping; + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); + char pddr_pl[MLXSW_REG_PDDR_LEN]; + int opcode, err, i; + u32 status_opcode; + + if (netif_carrier_ok(dev)) + return -ENODATA; + + mlxsw_reg_pddr_pack(pddr_pl, mlxsw_sp_port->local_port, + MLXSW_REG_PDDR_PAGE_SELECT_TROUBLESHOOTING_INFO); + + opcode = MLXSW_REG_PDDR_TRBLSH_GROUP_OPCODE_MONITOR; + mlxsw_reg_pddr_trblsh_group_opcode_set(pddr_pl, opcode); + + err = mlxsw_reg_query(mlxsw_sp_port->mlxsw_sp->core, MLXSW_REG(pddr), + pddr_pl); + if (err) + return err; + + status_opcode = mlxsw_reg_pddr_trblsh_status_opcode_get(pddr_pl); + if (!status_opcode) + return -ENODATA; + + for (i = 0; i < ARRAY_SIZE(mlxsw_sp_link_ext_state_opcode_map); i++) { + link_ext_state_mapping = mlxsw_sp_link_ext_state_opcode_map[i]; + if (link_ext_state_mapping.status_opcode == status_opcode) { + mlxsw_sp_port_set_link_ext_state(link_ext_state_mapping, + link_ext_state_info); + return 0; + } + } + + return -ENODATA; +} + +static void mlxsw_sp_port_get_pauseparam(struct net_device *dev, + struct ethtool_pauseparam *pause) +{ + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); + + pause->rx_pause = mlxsw_sp_port->link.rx_pause; + pause->tx_pause = mlxsw_sp_port->link.tx_pause; +} + +static int mlxsw_sp_port_pause_set(struct mlxsw_sp_port *mlxsw_sp_port, + struct ethtool_pauseparam *pause) +{ + char pfcc_pl[MLXSW_REG_PFCC_LEN]; + + mlxsw_reg_pfcc_pack(pfcc_pl, mlxsw_sp_port->local_port); + mlxsw_reg_pfcc_pprx_set(pfcc_pl, pause->rx_pause); + mlxsw_reg_pfcc_pptx_set(pfcc_pl, pause->tx_pause); + + return mlxsw_reg_write(mlxsw_sp_port->mlxsw_sp->core, MLXSW_REG(pfcc), + pfcc_pl); +} + +/* Maximum delay buffer needed in case of PAUSE frames. Similar to PFC delay, but is + * measured in bytes. Assumes 100m cable and does not take into account MTU. + */ +#define MLXSW_SP_PAUSE_DELAY_BYTES 19476 + +static int mlxsw_sp_port_set_pauseparam(struct net_device *dev, + struct ethtool_pauseparam *pause) +{ + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); + bool pause_en = pause->tx_pause || pause->rx_pause; + struct mlxsw_sp_hdroom orig_hdroom; + struct mlxsw_sp_hdroom hdroom; + int prio; + int err; + + if (mlxsw_sp_port->dcb.pfc && mlxsw_sp_port->dcb.pfc->pfc_en) { + netdev_err(dev, "PFC already enabled on port\n"); + return -EINVAL; + } + + if (pause->autoneg) { + netdev_err(dev, "PAUSE frames autonegotiation isn't supported\n"); + return -EINVAL; + } + + orig_hdroom = *mlxsw_sp_port->hdroom; + + hdroom = orig_hdroom; + if (pause_en) + hdroom.delay_bytes = MLXSW_SP_PAUSE_DELAY_BYTES; + else + hdroom.delay_bytes = 0; + + for (prio = 0; prio < IEEE_8021QAZ_MAX_TCS; prio++) + hdroom.prios.prio[prio].lossy = !pause_en; + + mlxsw_sp_hdroom_bufs_reset_lossiness(&hdroom); + mlxsw_sp_hdroom_bufs_reset_sizes(mlxsw_sp_port, &hdroom); + + err = mlxsw_sp_hdroom_configure(mlxsw_sp_port, &hdroom); + if (err) { + netdev_err(dev, "Failed to configure port's headroom\n"); + return err; + } + + err = mlxsw_sp_port_pause_set(mlxsw_sp_port, pause); + if (err) { + netdev_err(dev, "Failed to set PAUSE parameters\n"); + goto err_port_pause_configure; + } + + mlxsw_sp_port->link.rx_pause = pause->rx_pause; + mlxsw_sp_port->link.tx_pause = pause->tx_pause; + + return 0; + +err_port_pause_configure: + mlxsw_sp_hdroom_configure(mlxsw_sp_port, &orig_hdroom); + return err; +} + +struct mlxsw_sp_port_hw_stats { + char str[ETH_GSTRING_LEN]; + u64 (*getter)(const char *payload); + bool cells_bytes; +}; + +static struct mlxsw_sp_port_hw_stats mlxsw_sp_port_hw_stats[] = { + { + .str = "a_frames_transmitted_ok", + .getter = mlxsw_reg_ppcnt_a_frames_transmitted_ok_get, + }, + { + .str = "a_frames_received_ok", + .getter = mlxsw_reg_ppcnt_a_frames_received_ok_get, + }, + { + .str = "a_frame_check_sequence_errors", + .getter = mlxsw_reg_ppcnt_a_frame_check_sequence_errors_get, + }, + { + .str = "a_alignment_errors", + .getter = mlxsw_reg_ppcnt_a_alignment_errors_get, + }, + { + .str = "a_octets_transmitted_ok", + .getter = mlxsw_reg_ppcnt_a_octets_transmitted_ok_get, + }, + { + .str = "a_octets_received_ok", + .getter = mlxsw_reg_ppcnt_a_octets_received_ok_get, + }, + { + .str = "a_multicast_frames_xmitted_ok", + .getter = mlxsw_reg_ppcnt_a_multicast_frames_xmitted_ok_get, + }, + { + .str = "a_broadcast_frames_xmitted_ok", + .getter = mlxsw_reg_ppcnt_a_broadcast_frames_xmitted_ok_get, + }, + { + .str = "a_multicast_frames_received_ok", + .getter = mlxsw_reg_ppcnt_a_multicast_frames_received_ok_get, + }, + { + .str = "a_broadcast_frames_received_ok", + .getter = mlxsw_reg_ppcnt_a_broadcast_frames_received_ok_get, + }, + { + .str = "a_in_range_length_errors", + .getter = mlxsw_reg_ppcnt_a_in_range_length_errors_get, + }, + { + .str = "a_out_of_range_length_field", + .getter = mlxsw_reg_ppcnt_a_out_of_range_length_field_get, + }, + { + .str = "a_frame_too_long_errors", + .getter = mlxsw_reg_ppcnt_a_frame_too_long_errors_get, + }, + { + .str = "a_symbol_error_during_carrier", + .getter = mlxsw_reg_ppcnt_a_symbol_error_during_carrier_get, + }, + { + .str = "a_mac_control_frames_transmitted", + .getter = mlxsw_reg_ppcnt_a_mac_control_frames_transmitted_get, + }, + { + .str = "a_mac_control_frames_received", + .getter = mlxsw_reg_ppcnt_a_mac_control_frames_received_get, + }, + { + .str = "a_unsupported_opcodes_received", + .getter = mlxsw_reg_ppcnt_a_unsupported_opcodes_received_get, + }, + { + .str = "a_pause_mac_ctrl_frames_received", + .getter = mlxsw_reg_ppcnt_a_pause_mac_ctrl_frames_received_get, + }, + { + .str = "a_pause_mac_ctrl_frames_xmitted", + .getter = mlxsw_reg_ppcnt_a_pause_mac_ctrl_frames_transmitted_get, + }, +}; + +#define MLXSW_SP_PORT_HW_STATS_LEN ARRAY_SIZE(mlxsw_sp_port_hw_stats) + +static struct mlxsw_sp_port_hw_stats mlxsw_sp_port_hw_rfc_2863_stats[] = { + { + .str = "if_in_discards", + .getter = mlxsw_reg_ppcnt_if_in_discards_get, + }, + { + .str = "if_out_discards", + .getter = mlxsw_reg_ppcnt_if_out_discards_get, + }, + { + .str = "if_out_errors", + .getter = mlxsw_reg_ppcnt_if_out_errors_get, + }, +}; + +#define MLXSW_SP_PORT_HW_RFC_2863_STATS_LEN \ + ARRAY_SIZE(mlxsw_sp_port_hw_rfc_2863_stats) + +static struct mlxsw_sp_port_hw_stats mlxsw_sp_port_hw_rfc_2819_stats[] = { + { + .str = "ether_stats_undersize_pkts", + .getter = mlxsw_reg_ppcnt_ether_stats_undersize_pkts_get, + }, + { + .str = "ether_stats_oversize_pkts", + .getter = mlxsw_reg_ppcnt_ether_stats_oversize_pkts_get, + }, + { + .str = "ether_stats_fragments", + .getter = mlxsw_reg_ppcnt_ether_stats_fragments_get, + }, + { + .str = "ether_pkts64octets", + .getter = mlxsw_reg_ppcnt_ether_stats_pkts64octets_get, + }, + { + .str = "ether_pkts65to127octets", + .getter = mlxsw_reg_ppcnt_ether_stats_pkts65to127octets_get, + }, + { + .str = "ether_pkts128to255octets", + .getter = mlxsw_reg_ppcnt_ether_stats_pkts128to255octets_get, + }, + { + .str = "ether_pkts256to511octets", + .getter = mlxsw_reg_ppcnt_ether_stats_pkts256to511octets_get, + }, + { + .str = "ether_pkts512to1023octets", + .getter = mlxsw_reg_ppcnt_ether_stats_pkts512to1023octets_get, + }, + { + .str = "ether_pkts1024to1518octets", + .getter = mlxsw_reg_ppcnt_ether_stats_pkts1024to1518octets_get, + }, + { + .str = "ether_pkts1519to2047octets", + .getter = mlxsw_reg_ppcnt_ether_stats_pkts1519to2047octets_get, + }, + { + .str = "ether_pkts2048to4095octets", + .getter = mlxsw_reg_ppcnt_ether_stats_pkts2048to4095octets_get, + }, + { + .str = "ether_pkts4096to8191octets", + .getter = mlxsw_reg_ppcnt_ether_stats_pkts4096to8191octets_get, + }, + { + .str = "ether_pkts8192to10239octets", + .getter = mlxsw_reg_ppcnt_ether_stats_pkts8192to10239octets_get, + }, +}; + +#define MLXSW_SP_PORT_HW_RFC_2819_STATS_LEN \ + ARRAY_SIZE(mlxsw_sp_port_hw_rfc_2819_stats) + +static struct mlxsw_sp_port_hw_stats mlxsw_sp_port_hw_rfc_3635_stats[] = { + { + .str = "dot3stats_fcs_errors", + .getter = mlxsw_reg_ppcnt_dot3stats_fcs_errors_get, + }, + { + .str = "dot3stats_symbol_errors", + .getter = mlxsw_reg_ppcnt_dot3stats_symbol_errors_get, + }, + { + .str = "dot3control_in_unknown_opcodes", + .getter = mlxsw_reg_ppcnt_dot3control_in_unknown_opcodes_get, + }, + { + .str = "dot3in_pause_frames", + .getter = mlxsw_reg_ppcnt_dot3in_pause_frames_get, + }, +}; + +#define MLXSW_SP_PORT_HW_RFC_3635_STATS_LEN \ + ARRAY_SIZE(mlxsw_sp_port_hw_rfc_3635_stats) + +static struct mlxsw_sp_port_hw_stats mlxsw_sp_port_hw_ext_stats[] = { + { + .str = "ecn_marked", + .getter = mlxsw_reg_ppcnt_ecn_marked_get, + }, +}; + +#define MLXSW_SP_PORT_HW_EXT_STATS_LEN ARRAY_SIZE(mlxsw_sp_port_hw_ext_stats) + +static struct mlxsw_sp_port_hw_stats mlxsw_sp_port_hw_discard_stats[] = { + { + .str = "discard_ingress_general", + .getter = mlxsw_reg_ppcnt_ingress_general_get, + }, + { + .str = "discard_ingress_policy_engine", + .getter = mlxsw_reg_ppcnt_ingress_policy_engine_get, + }, + { + .str = "discard_ingress_vlan_membership", + .getter = mlxsw_reg_ppcnt_ingress_vlan_membership_get, + }, + { + .str = "discard_ingress_tag_frame_type", + .getter = mlxsw_reg_ppcnt_ingress_tag_frame_type_get, + }, + { + .str = "discard_egress_vlan_membership", + .getter = mlxsw_reg_ppcnt_egress_vlan_membership_get, + }, + { + .str = "discard_loopback_filter", + .getter = mlxsw_reg_ppcnt_loopback_filter_get, + }, + { + .str = "discard_egress_general", + .getter = mlxsw_reg_ppcnt_egress_general_get, + }, + { + .str = "discard_egress_hoq", + .getter = mlxsw_reg_ppcnt_egress_hoq_get, + }, + { + .str = "discard_egress_policy_engine", + .getter = mlxsw_reg_ppcnt_egress_policy_engine_get, + }, + { + .str = "discard_ingress_tx_link_down", + .getter = mlxsw_reg_ppcnt_ingress_tx_link_down_get, + }, + { + .str = "discard_egress_stp_filter", + .getter = mlxsw_reg_ppcnt_egress_stp_filter_get, + }, + { + .str = "discard_egress_sll", + .getter = mlxsw_reg_ppcnt_egress_sll_get, + }, +}; + +#define MLXSW_SP_PORT_HW_DISCARD_STATS_LEN \ + ARRAY_SIZE(mlxsw_sp_port_hw_discard_stats) + +static struct mlxsw_sp_port_hw_stats mlxsw_sp_port_hw_prio_stats[] = { + { + .str = "rx_octets_prio", + .getter = mlxsw_reg_ppcnt_rx_octets_get, + }, + { + .str = "rx_frames_prio", + .getter = mlxsw_reg_ppcnt_rx_frames_get, + }, + { + .str = "tx_octets_prio", + .getter = mlxsw_reg_ppcnt_tx_octets_get, + }, + { + .str = "tx_frames_prio", + .getter = mlxsw_reg_ppcnt_tx_frames_get, + }, + { + .str = "rx_pause_prio", + .getter = mlxsw_reg_ppcnt_rx_pause_get, + }, + { + .str = "rx_pause_duration_prio", + .getter = mlxsw_reg_ppcnt_rx_pause_duration_get, + }, + { + .str = "tx_pause_prio", + .getter = mlxsw_reg_ppcnt_tx_pause_get, + }, + { + .str = "tx_pause_duration_prio", + .getter = mlxsw_reg_ppcnt_tx_pause_duration_get, + }, +}; + +#define MLXSW_SP_PORT_HW_PRIO_STATS_LEN ARRAY_SIZE(mlxsw_sp_port_hw_prio_stats) + +static struct mlxsw_sp_port_hw_stats mlxsw_sp_port_hw_tc_stats[] = { + { + .str = "tc_transmit_queue_tc", + .getter = mlxsw_reg_ppcnt_tc_transmit_queue_get, + .cells_bytes = true, + }, + { + .str = "tc_no_buffer_discard_uc_tc", + .getter = mlxsw_reg_ppcnt_tc_no_buffer_discard_uc_get, + }, +}; + +#define MLXSW_SP_PORT_HW_TC_STATS_LEN ARRAY_SIZE(mlxsw_sp_port_hw_tc_stats) + +struct mlxsw_sp_port_stats { + char str[ETH_GSTRING_LEN]; + u64 (*getter)(struct mlxsw_sp_port *mlxsw_sp_port); +}; + +static u64 +mlxsw_sp_port_get_transceiver_overheat_stats(struct mlxsw_sp_port *mlxsw_sp_port) +{ + struct mlxsw_core *mlxsw_core = mlxsw_sp_port->mlxsw_sp->core; + u8 slot_index = mlxsw_sp_port->mapping.slot_index; + u8 module = mlxsw_sp_port->mapping.module; + u64 stats; + int err; + + err = mlxsw_env_module_overheat_counter_get(mlxsw_core, slot_index, + module, &stats); + if (err) + return mlxsw_sp_port->module_overheat_initial_val; + + return stats - mlxsw_sp_port->module_overheat_initial_val; +} + +static struct mlxsw_sp_port_stats mlxsw_sp_port_transceiver_stats[] = { + { + .str = "transceiver_overheat", + .getter = mlxsw_sp_port_get_transceiver_overheat_stats, + }, +}; + +#define MLXSW_SP_PORT_HW_TRANSCEIVER_STATS_LEN ARRAY_SIZE(mlxsw_sp_port_transceiver_stats) + +#define MLXSW_SP_PORT_ETHTOOL_STATS_LEN (MLXSW_SP_PORT_HW_STATS_LEN + \ + MLXSW_SP_PORT_HW_RFC_2863_STATS_LEN + \ + MLXSW_SP_PORT_HW_RFC_2819_STATS_LEN + \ + MLXSW_SP_PORT_HW_RFC_3635_STATS_LEN + \ + MLXSW_SP_PORT_HW_EXT_STATS_LEN + \ + MLXSW_SP_PORT_HW_DISCARD_STATS_LEN + \ + (MLXSW_SP_PORT_HW_PRIO_STATS_LEN * \ + IEEE_8021QAZ_MAX_TCS) + \ + (MLXSW_SP_PORT_HW_TC_STATS_LEN * \ + TC_MAX_QUEUE) + \ + MLXSW_SP_PORT_HW_TRANSCEIVER_STATS_LEN) + +static void mlxsw_sp_port_get_prio_strings(u8 **p, int prio) +{ + int i; + + for (i = 0; i < MLXSW_SP_PORT_HW_PRIO_STATS_LEN; i++) { + snprintf(*p, ETH_GSTRING_LEN, "%.29s_%.1d", + mlxsw_sp_port_hw_prio_stats[i].str, prio); + *p += ETH_GSTRING_LEN; + } +} + +static void mlxsw_sp_port_get_tc_strings(u8 **p, int tc) +{ + int i; + + for (i = 0; i < MLXSW_SP_PORT_HW_TC_STATS_LEN; i++) { + snprintf(*p, ETH_GSTRING_LEN, "%.29s_%.1d", + mlxsw_sp_port_hw_tc_stats[i].str, tc); + *p += ETH_GSTRING_LEN; + } +} + +static void mlxsw_sp_port_get_strings(struct net_device *dev, + u32 stringset, u8 *data) +{ + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); + u8 *p = data; + int i; + + switch (stringset) { + case ETH_SS_STATS: + for (i = 0; i < MLXSW_SP_PORT_HW_STATS_LEN; i++) { + memcpy(p, mlxsw_sp_port_hw_stats[i].str, + ETH_GSTRING_LEN); + p += ETH_GSTRING_LEN; + } + + for (i = 0; i < MLXSW_SP_PORT_HW_RFC_2863_STATS_LEN; i++) { + memcpy(p, mlxsw_sp_port_hw_rfc_2863_stats[i].str, + ETH_GSTRING_LEN); + p += ETH_GSTRING_LEN; + } + + for (i = 0; i < MLXSW_SP_PORT_HW_RFC_2819_STATS_LEN; i++) { + memcpy(p, mlxsw_sp_port_hw_rfc_2819_stats[i].str, + ETH_GSTRING_LEN); + p += ETH_GSTRING_LEN; + } + + for (i = 0; i < MLXSW_SP_PORT_HW_RFC_3635_STATS_LEN; i++) { + memcpy(p, mlxsw_sp_port_hw_rfc_3635_stats[i].str, + ETH_GSTRING_LEN); + p += ETH_GSTRING_LEN; + } + + for (i = 0; i < MLXSW_SP_PORT_HW_EXT_STATS_LEN; i++) { + memcpy(p, mlxsw_sp_port_hw_ext_stats[i].str, + ETH_GSTRING_LEN); + p += ETH_GSTRING_LEN; + } + + for (i = 0; i < MLXSW_SP_PORT_HW_DISCARD_STATS_LEN; i++) { + memcpy(p, mlxsw_sp_port_hw_discard_stats[i].str, + ETH_GSTRING_LEN); + p += ETH_GSTRING_LEN; + } + + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) + mlxsw_sp_port_get_prio_strings(&p, i); + + for (i = 0; i < TC_MAX_QUEUE; i++) + mlxsw_sp_port_get_tc_strings(&p, i); + + mlxsw_sp_port->mlxsw_sp->ptp_ops->get_stats_strings(&p); + + for (i = 0; i < MLXSW_SP_PORT_HW_TRANSCEIVER_STATS_LEN; i++) { + memcpy(p, mlxsw_sp_port_transceiver_stats[i].str, + ETH_GSTRING_LEN); + p += ETH_GSTRING_LEN; + } + break; + } +} + +static int mlxsw_sp_port_set_phys_id(struct net_device *dev, + enum ethtool_phys_id_state state) +{ + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + char mlcr_pl[MLXSW_REG_MLCR_LEN]; + bool active; + + switch (state) { + case ETHTOOL_ID_ACTIVE: + active = true; + break; + case ETHTOOL_ID_INACTIVE: + active = false; + break; + default: + return -EOPNOTSUPP; + } + + mlxsw_reg_mlcr_pack(mlcr_pl, mlxsw_sp_port->local_port, active); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mlcr), mlcr_pl); +} + +static int +mlxsw_sp_get_hw_stats_by_group(struct mlxsw_sp_port_hw_stats **p_hw_stats, + int *p_len, enum mlxsw_reg_ppcnt_grp grp) +{ + switch (grp) { + case MLXSW_REG_PPCNT_IEEE_8023_CNT: + *p_hw_stats = mlxsw_sp_port_hw_stats; + *p_len = MLXSW_SP_PORT_HW_STATS_LEN; + break; + case MLXSW_REG_PPCNT_RFC_2863_CNT: + *p_hw_stats = mlxsw_sp_port_hw_rfc_2863_stats; + *p_len = MLXSW_SP_PORT_HW_RFC_2863_STATS_LEN; + break; + case MLXSW_REG_PPCNT_RFC_2819_CNT: + *p_hw_stats = mlxsw_sp_port_hw_rfc_2819_stats; + *p_len = MLXSW_SP_PORT_HW_RFC_2819_STATS_LEN; + break; + case MLXSW_REG_PPCNT_RFC_3635_CNT: + *p_hw_stats = mlxsw_sp_port_hw_rfc_3635_stats; + *p_len = MLXSW_SP_PORT_HW_RFC_3635_STATS_LEN; + break; + case MLXSW_REG_PPCNT_EXT_CNT: + *p_hw_stats = mlxsw_sp_port_hw_ext_stats; + *p_len = MLXSW_SP_PORT_HW_EXT_STATS_LEN; + break; + case MLXSW_REG_PPCNT_DISCARD_CNT: + *p_hw_stats = mlxsw_sp_port_hw_discard_stats; + *p_len = MLXSW_SP_PORT_HW_DISCARD_STATS_LEN; + break; + case MLXSW_REG_PPCNT_PRIO_CNT: + *p_hw_stats = mlxsw_sp_port_hw_prio_stats; + *p_len = MLXSW_SP_PORT_HW_PRIO_STATS_LEN; + break; + case MLXSW_REG_PPCNT_TC_CNT: + *p_hw_stats = mlxsw_sp_port_hw_tc_stats; + *p_len = MLXSW_SP_PORT_HW_TC_STATS_LEN; + break; + default: + WARN_ON(1); + return -EOPNOTSUPP; + } + return 0; +} + +static void __mlxsw_sp_port_get_stats(struct net_device *dev, + enum mlxsw_reg_ppcnt_grp grp, int prio, + u64 *data, int data_index) +{ + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + struct mlxsw_sp_port_hw_stats *hw_stats; + char ppcnt_pl[MLXSW_REG_PPCNT_LEN]; + int i, len; + int err; + + err = mlxsw_sp_get_hw_stats_by_group(&hw_stats, &len, grp); + if (err) + return; + mlxsw_sp_port_get_stats_raw(dev, grp, prio, ppcnt_pl); + for (i = 0; i < len; i++) { + data[data_index + i] = hw_stats[i].getter(ppcnt_pl); + if (!hw_stats[i].cells_bytes) + continue; + data[data_index + i] = mlxsw_sp_cells_bytes(mlxsw_sp, + data[data_index + i]); + } +} + +static void __mlxsw_sp_port_get_env_stats(struct net_device *dev, u64 *data, int data_index, + struct mlxsw_sp_port_stats *port_stats, + int len) +{ + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); + int i; + + for (i = 0; i < len; i++) + data[data_index + i] = port_stats[i].getter(mlxsw_sp_port); +} + +static void mlxsw_sp_port_get_stats(struct net_device *dev, + struct ethtool_stats *stats, u64 *data) +{ + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); + int i, data_index = 0; + + /* IEEE 802.3 Counters */ + __mlxsw_sp_port_get_stats(dev, MLXSW_REG_PPCNT_IEEE_8023_CNT, 0, + data, data_index); + data_index = MLXSW_SP_PORT_HW_STATS_LEN; + + /* RFC 2863 Counters */ + __mlxsw_sp_port_get_stats(dev, MLXSW_REG_PPCNT_RFC_2863_CNT, 0, + data, data_index); + data_index += MLXSW_SP_PORT_HW_RFC_2863_STATS_LEN; + + /* RFC 2819 Counters */ + __mlxsw_sp_port_get_stats(dev, MLXSW_REG_PPCNT_RFC_2819_CNT, 0, + data, data_index); + data_index += MLXSW_SP_PORT_HW_RFC_2819_STATS_LEN; + + /* RFC 3635 Counters */ + __mlxsw_sp_port_get_stats(dev, MLXSW_REG_PPCNT_RFC_3635_CNT, 0, + data, data_index); + data_index += MLXSW_SP_PORT_HW_RFC_3635_STATS_LEN; + + /* Extended Counters */ + __mlxsw_sp_port_get_stats(dev, MLXSW_REG_PPCNT_EXT_CNT, 0, + data, data_index); + data_index += MLXSW_SP_PORT_HW_EXT_STATS_LEN; + + /* Discard Counters */ + __mlxsw_sp_port_get_stats(dev, MLXSW_REG_PPCNT_DISCARD_CNT, 0, + data, data_index); + data_index += MLXSW_SP_PORT_HW_DISCARD_STATS_LEN; + + /* Per-Priority Counters */ + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { + __mlxsw_sp_port_get_stats(dev, MLXSW_REG_PPCNT_PRIO_CNT, i, + data, data_index); + data_index += MLXSW_SP_PORT_HW_PRIO_STATS_LEN; + } + + /* Per-TC Counters */ + for (i = 0; i < TC_MAX_QUEUE; i++) { + __mlxsw_sp_port_get_stats(dev, MLXSW_REG_PPCNT_TC_CNT, i, + data, data_index); + data_index += MLXSW_SP_PORT_HW_TC_STATS_LEN; + } + + /* PTP counters */ + mlxsw_sp_port->mlxsw_sp->ptp_ops->get_stats(mlxsw_sp_port, + data, data_index); + data_index += mlxsw_sp_port->mlxsw_sp->ptp_ops->get_stats_count(); + + /* Transceiver counters */ + __mlxsw_sp_port_get_env_stats(dev, data, data_index, mlxsw_sp_port_transceiver_stats, + MLXSW_SP_PORT_HW_TRANSCEIVER_STATS_LEN); + data_index += MLXSW_SP_PORT_HW_TRANSCEIVER_STATS_LEN; +} + +static int mlxsw_sp_port_get_sset_count(struct net_device *dev, int sset) +{ + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); + + switch (sset) { + case ETH_SS_STATS: + return MLXSW_SP_PORT_ETHTOOL_STATS_LEN + + mlxsw_sp_port->mlxsw_sp->ptp_ops->get_stats_count(); + default: + return -EOPNOTSUPP; + } +} + +static void +mlxsw_sp_port_get_link_supported(struct mlxsw_sp *mlxsw_sp, u32 eth_proto_cap, + struct ethtool_link_ksettings *cmd) +{ + const struct mlxsw_sp_port_type_speed_ops *ops; + + ops = mlxsw_sp->port_type_speed_ops; + + ethtool_link_ksettings_add_link_mode(cmd, supported, Asym_Pause); + ethtool_link_ksettings_add_link_mode(cmd, supported, Autoneg); + ethtool_link_ksettings_add_link_mode(cmd, supported, Pause); + + ops->from_ptys_supported_port(mlxsw_sp, eth_proto_cap, cmd); + ops->from_ptys_link(mlxsw_sp, eth_proto_cap, + cmd->link_modes.supported); +} + +static void +mlxsw_sp_port_get_link_advertise(struct mlxsw_sp *mlxsw_sp, + u32 eth_proto_admin, bool autoneg, + struct ethtool_link_ksettings *cmd) +{ + const struct mlxsw_sp_port_type_speed_ops *ops; + + ops = mlxsw_sp->port_type_speed_ops; + + if (!autoneg) + return; + + ethtool_link_ksettings_add_link_mode(cmd, advertising, Autoneg); + ops->from_ptys_link(mlxsw_sp, eth_proto_admin, + cmd->link_modes.advertising); +} + +static u8 +mlxsw_sp_port_connector_port(enum mlxsw_reg_ptys_connector_type connector_type) +{ + switch (connector_type) { + case MLXSW_REG_PTYS_CONNECTOR_TYPE_UNKNOWN_OR_NO_CONNECTOR: + return PORT_OTHER; + case MLXSW_REG_PTYS_CONNECTOR_TYPE_PORT_NONE: + return PORT_NONE; + case MLXSW_REG_PTYS_CONNECTOR_TYPE_PORT_TP: + return PORT_TP; + case MLXSW_REG_PTYS_CONNECTOR_TYPE_PORT_AUI: + return PORT_AUI; + case MLXSW_REG_PTYS_CONNECTOR_TYPE_PORT_BNC: + return PORT_BNC; + case MLXSW_REG_PTYS_CONNECTOR_TYPE_PORT_MII: + return PORT_MII; + case MLXSW_REG_PTYS_CONNECTOR_TYPE_PORT_FIBRE: + return PORT_FIBRE; + case MLXSW_REG_PTYS_CONNECTOR_TYPE_PORT_DA: + return PORT_DA; + case MLXSW_REG_PTYS_CONNECTOR_TYPE_PORT_OTHER: + return PORT_OTHER; + default: + WARN_ON_ONCE(1); + return PORT_OTHER; + } +} + +static int mlxsw_sp_port_ptys_query(struct mlxsw_sp_port *mlxsw_sp_port, + u32 *p_eth_proto_cap, u32 *p_eth_proto_admin, + u32 *p_eth_proto_oper, u8 *p_connector_type) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + const struct mlxsw_sp_port_type_speed_ops *ops; + char ptys_pl[MLXSW_REG_PTYS_LEN]; + int err; + + ops = mlxsw_sp->port_type_speed_ops; + + ops->reg_ptys_eth_pack(mlxsw_sp, ptys_pl, mlxsw_sp_port->local_port, 0, false); + err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ptys), ptys_pl); + if (err) + return err; + + ops->reg_ptys_eth_unpack(mlxsw_sp, ptys_pl, p_eth_proto_cap, p_eth_proto_admin, + p_eth_proto_oper); + if (p_connector_type) + *p_connector_type = mlxsw_reg_ptys_connector_type_get(ptys_pl); + return 0; +} + +static int mlxsw_sp_port_get_link_ksettings(struct net_device *dev, + struct ethtool_link_ksettings *cmd) +{ + u32 eth_proto_cap, eth_proto_admin, eth_proto_oper; + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + const struct mlxsw_sp_port_type_speed_ops *ops; + u8 connector_type; + bool autoneg; + int err; + + err = mlxsw_sp_port_ptys_query(mlxsw_sp_port, ð_proto_cap, ð_proto_admin, + ð_proto_oper, &connector_type); + if (err) + return err; + + ops = mlxsw_sp->port_type_speed_ops; + autoneg = mlxsw_sp_port->link.autoneg; + + mlxsw_sp_port_get_link_supported(mlxsw_sp, eth_proto_cap, cmd); + + mlxsw_sp_port_get_link_advertise(mlxsw_sp, eth_proto_admin, autoneg, cmd); + + cmd->base.autoneg = autoneg ? AUTONEG_ENABLE : AUTONEG_DISABLE; + cmd->base.port = mlxsw_sp_port_connector_port(connector_type); + ops->from_ptys_link_mode(mlxsw_sp, netif_carrier_ok(dev), + eth_proto_oper, cmd); + + return 0; +} + +static int +mlxsw_sp_port_set_link_ksettings(struct net_device *dev, + const struct ethtool_link_ksettings *cmd) +{ + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + const struct mlxsw_sp_port_type_speed_ops *ops; + char ptys_pl[MLXSW_REG_PTYS_LEN]; + u32 eth_proto_cap, eth_proto_new; + bool autoneg; + int err; + + ops = mlxsw_sp->port_type_speed_ops; + + ops->reg_ptys_eth_pack(mlxsw_sp, ptys_pl, mlxsw_sp_port->local_port, + 0, false); + err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ptys), ptys_pl); + if (err) + return err; + ops->reg_ptys_eth_unpack(mlxsw_sp, ptys_pl, ð_proto_cap, NULL, NULL); + + autoneg = cmd->base.autoneg == AUTONEG_ENABLE; + eth_proto_new = autoneg ? + ops->to_ptys_advert_link(mlxsw_sp, cmd) : + ops->to_ptys_speed_lanes(mlxsw_sp, mlxsw_sp_port->mapping.width, + cmd); + + eth_proto_new = eth_proto_new & eth_proto_cap; + if (!eth_proto_new) { + netdev_err(dev, "No supported speed or lanes requested\n"); + return -EINVAL; + } + + ops->reg_ptys_eth_pack(mlxsw_sp, ptys_pl, mlxsw_sp_port->local_port, + eth_proto_new, autoneg); + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ptys), ptys_pl); + if (err) + return err; + + mlxsw_sp_port->link.autoneg = autoneg; + + if (!netif_running(dev)) + return 0; + + mlxsw_sp_port_admin_status_set(mlxsw_sp_port, false); + mlxsw_sp_port_admin_status_set(mlxsw_sp_port, true); + + return 0; +} + +static int mlxsw_sp_get_module_info(struct net_device *netdev, + struct ethtool_modinfo *modinfo) +{ + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(netdev); + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + + return mlxsw_env_get_module_info(netdev, mlxsw_sp->core, + mlxsw_sp_port->mapping.slot_index, + mlxsw_sp_port->mapping.module, + modinfo); +} + +static int mlxsw_sp_get_module_eeprom(struct net_device *netdev, + struct ethtool_eeprom *ee, u8 *data) +{ + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(netdev); + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + u8 slot_index = mlxsw_sp_port->mapping.slot_index; + u8 module = mlxsw_sp_port->mapping.module; + + return mlxsw_env_get_module_eeprom(netdev, mlxsw_sp->core, slot_index, + module, ee, data); +} + +static int +mlxsw_sp_get_module_eeprom_by_page(struct net_device *dev, + const struct ethtool_module_eeprom *page, + struct netlink_ext_ack *extack) +{ + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + u8 slot_index = mlxsw_sp_port->mapping.slot_index; + u8 module = mlxsw_sp_port->mapping.module; + + return mlxsw_env_get_module_eeprom_by_page(mlxsw_sp->core, slot_index, + module, page, extack); +} + +static int +mlxsw_sp_get_ts_info(struct net_device *netdev, struct ethtool_ts_info *info) +{ + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(netdev); + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + + return mlxsw_sp->ptp_ops->get_ts_info(mlxsw_sp, info); +} + +static void +mlxsw_sp_get_eth_phy_stats(struct net_device *dev, + struct ethtool_eth_phy_stats *phy_stats) +{ + char ppcnt_pl[MLXSW_REG_PPCNT_LEN]; + + if (mlxsw_sp_port_get_stats_raw(dev, MLXSW_REG_PPCNT_IEEE_8023_CNT, + 0, ppcnt_pl)) + return; + + phy_stats->SymbolErrorDuringCarrier = + mlxsw_reg_ppcnt_a_symbol_error_during_carrier_get(ppcnt_pl); +} + +static void +mlxsw_sp_get_eth_mac_stats(struct net_device *dev, + struct ethtool_eth_mac_stats *mac_stats) +{ + char ppcnt_pl[MLXSW_REG_PPCNT_LEN]; + + if (mlxsw_sp_port_get_stats_raw(dev, MLXSW_REG_PPCNT_IEEE_8023_CNT, + 0, ppcnt_pl)) + return; + + mac_stats->FramesTransmittedOK = + mlxsw_reg_ppcnt_a_frames_transmitted_ok_get(ppcnt_pl); + mac_stats->FramesReceivedOK = + mlxsw_reg_ppcnt_a_frames_received_ok_get(ppcnt_pl); + mac_stats->FrameCheckSequenceErrors = + mlxsw_reg_ppcnt_a_frame_check_sequence_errors_get(ppcnt_pl); + mac_stats->AlignmentErrors = + mlxsw_reg_ppcnt_a_alignment_errors_get(ppcnt_pl); + mac_stats->OctetsTransmittedOK = + mlxsw_reg_ppcnt_a_octets_transmitted_ok_get(ppcnt_pl); + mac_stats->OctetsReceivedOK = + mlxsw_reg_ppcnt_a_octets_received_ok_get(ppcnt_pl); + mac_stats->MulticastFramesXmittedOK = + mlxsw_reg_ppcnt_a_multicast_frames_xmitted_ok_get(ppcnt_pl); + mac_stats->BroadcastFramesXmittedOK = + mlxsw_reg_ppcnt_a_broadcast_frames_xmitted_ok_get(ppcnt_pl); + mac_stats->MulticastFramesReceivedOK = + mlxsw_reg_ppcnt_a_multicast_frames_received_ok_get(ppcnt_pl); + mac_stats->BroadcastFramesReceivedOK = + mlxsw_reg_ppcnt_a_broadcast_frames_received_ok_get(ppcnt_pl); + mac_stats->InRangeLengthErrors = + mlxsw_reg_ppcnt_a_in_range_length_errors_get(ppcnt_pl); + mac_stats->OutOfRangeLengthField = + mlxsw_reg_ppcnt_a_out_of_range_length_field_get(ppcnt_pl); + mac_stats->FrameTooLongErrors = + mlxsw_reg_ppcnt_a_frame_too_long_errors_get(ppcnt_pl); +} + +static void +mlxsw_sp_get_eth_ctrl_stats(struct net_device *dev, + struct ethtool_eth_ctrl_stats *ctrl_stats) +{ + char ppcnt_pl[MLXSW_REG_PPCNT_LEN]; + + if (mlxsw_sp_port_get_stats_raw(dev, MLXSW_REG_PPCNT_IEEE_8023_CNT, + 0, ppcnt_pl)) + return; + + ctrl_stats->MACControlFramesTransmitted = + mlxsw_reg_ppcnt_a_mac_control_frames_transmitted_get(ppcnt_pl); + ctrl_stats->MACControlFramesReceived = + mlxsw_reg_ppcnt_a_mac_control_frames_received_get(ppcnt_pl); + ctrl_stats->UnsupportedOpcodesReceived = + mlxsw_reg_ppcnt_a_unsupported_opcodes_received_get(ppcnt_pl); +} + +static const struct ethtool_rmon_hist_range mlxsw_rmon_ranges[] = { + { 0, 64 }, + { 65, 127 }, + { 128, 255 }, + { 256, 511 }, + { 512, 1023 }, + { 1024, 1518 }, + { 1519, 2047 }, + { 2048, 4095 }, + { 4096, 8191 }, + { 8192, 10239 }, + {} +}; + +static void +mlxsw_sp_get_rmon_stats(struct net_device *dev, + struct ethtool_rmon_stats *rmon, + const struct ethtool_rmon_hist_range **ranges) +{ + char ppcnt_pl[MLXSW_REG_PPCNT_LEN]; + + if (mlxsw_sp_port_get_stats_raw(dev, MLXSW_REG_PPCNT_RFC_2819_CNT, + 0, ppcnt_pl)) + return; + + rmon->undersize_pkts = + mlxsw_reg_ppcnt_ether_stats_undersize_pkts_get(ppcnt_pl); + rmon->oversize_pkts = + mlxsw_reg_ppcnt_ether_stats_oversize_pkts_get(ppcnt_pl); + rmon->fragments = + mlxsw_reg_ppcnt_ether_stats_fragments_get(ppcnt_pl); + + rmon->hist[0] = mlxsw_reg_ppcnt_ether_stats_pkts64octets_get(ppcnt_pl); + rmon->hist[1] = + mlxsw_reg_ppcnt_ether_stats_pkts65to127octets_get(ppcnt_pl); + rmon->hist[2] = + mlxsw_reg_ppcnt_ether_stats_pkts128to255octets_get(ppcnt_pl); + rmon->hist[3] = + mlxsw_reg_ppcnt_ether_stats_pkts256to511octets_get(ppcnt_pl); + rmon->hist[4] = + mlxsw_reg_ppcnt_ether_stats_pkts512to1023octets_get(ppcnt_pl); + rmon->hist[5] = + mlxsw_reg_ppcnt_ether_stats_pkts1024to1518octets_get(ppcnt_pl); + rmon->hist[6] = + mlxsw_reg_ppcnt_ether_stats_pkts1519to2047octets_get(ppcnt_pl); + rmon->hist[7] = + mlxsw_reg_ppcnt_ether_stats_pkts2048to4095octets_get(ppcnt_pl); + rmon->hist[8] = + mlxsw_reg_ppcnt_ether_stats_pkts4096to8191octets_get(ppcnt_pl); + rmon->hist[9] = + mlxsw_reg_ppcnt_ether_stats_pkts8192to10239octets_get(ppcnt_pl); + + *ranges = mlxsw_rmon_ranges; +} + +static int mlxsw_sp_reset(struct net_device *dev, u32 *flags) +{ + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + u8 slot_index = mlxsw_sp_port->mapping.slot_index; + u8 module = mlxsw_sp_port->mapping.module; + + return mlxsw_env_reset_module(dev, mlxsw_sp->core, slot_index, + module, flags); +} + +static int +mlxsw_sp_get_module_power_mode(struct net_device *dev, + struct ethtool_module_power_mode_params *params, + struct netlink_ext_ack *extack) +{ + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + u8 slot_index = mlxsw_sp_port->mapping.slot_index; + u8 module = mlxsw_sp_port->mapping.module; + + return mlxsw_env_get_module_power_mode(mlxsw_sp->core, slot_index, + module, params, extack); +} + +static int +mlxsw_sp_set_module_power_mode(struct net_device *dev, + const struct ethtool_module_power_mode_params *params, + struct netlink_ext_ack *extack) +{ + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + u8 slot_index = mlxsw_sp_port->mapping.slot_index; + u8 module = mlxsw_sp_port->mapping.module; + + return mlxsw_env_set_module_power_mode(mlxsw_sp->core, slot_index, + module, params->policy, extack); +} + +const struct ethtool_ops mlxsw_sp_port_ethtool_ops = { + .cap_link_lanes_supported = true, + .get_drvinfo = mlxsw_sp_port_get_drvinfo, + .get_link = ethtool_op_get_link, + .get_link_ext_state = mlxsw_sp_port_get_link_ext_state, + .get_pauseparam = mlxsw_sp_port_get_pauseparam, + .set_pauseparam = mlxsw_sp_port_set_pauseparam, + .get_strings = mlxsw_sp_port_get_strings, + .set_phys_id = mlxsw_sp_port_set_phys_id, + .get_ethtool_stats = mlxsw_sp_port_get_stats, + .get_sset_count = mlxsw_sp_port_get_sset_count, + .get_link_ksettings = mlxsw_sp_port_get_link_ksettings, + .set_link_ksettings = mlxsw_sp_port_set_link_ksettings, + .get_module_info = mlxsw_sp_get_module_info, + .get_module_eeprom = mlxsw_sp_get_module_eeprom, + .get_module_eeprom_by_page = mlxsw_sp_get_module_eeprom_by_page, + .get_ts_info = mlxsw_sp_get_ts_info, + .get_eth_phy_stats = mlxsw_sp_get_eth_phy_stats, + .get_eth_mac_stats = mlxsw_sp_get_eth_mac_stats, + .get_eth_ctrl_stats = mlxsw_sp_get_eth_ctrl_stats, + .get_rmon_stats = mlxsw_sp_get_rmon_stats, + .reset = mlxsw_sp_reset, + .get_module_power_mode = mlxsw_sp_get_module_power_mode, + .set_module_power_mode = mlxsw_sp_set_module_power_mode, +}; + +struct mlxsw_sp1_port_link_mode { + enum ethtool_link_mode_bit_indices mask_ethtool; + u32 mask; + u32 speed; +}; + +static const struct mlxsw_sp1_port_link_mode mlxsw_sp1_port_link_mode[] = { + { + .mask = MLXSW_REG_PTYS_ETH_SPEED_100BASE_T, + .mask_ethtool = ETHTOOL_LINK_MODE_100baseT_Full_BIT, + .speed = SPEED_100, + }, + { + .mask = MLXSW_REG_PTYS_ETH_SPEED_SGMII | + MLXSW_REG_PTYS_ETH_SPEED_1000BASE_KX, + .mask_ethtool = ETHTOOL_LINK_MODE_1000baseKX_Full_BIT, + .speed = SPEED_1000, + }, + { + .mask = MLXSW_REG_PTYS_ETH_SPEED_1000BASE_T, + .mask_ethtool = ETHTOOL_LINK_MODE_1000baseT_Full_BIT, + .speed = SPEED_1000, + }, + { + .mask = MLXSW_REG_PTYS_ETH_SPEED_10GBASE_CX4 | + MLXSW_REG_PTYS_ETH_SPEED_10GBASE_KX4, + .mask_ethtool = ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT, + .speed = SPEED_10000, + }, + { + .mask = MLXSW_REG_PTYS_ETH_SPEED_10GBASE_KR | + MLXSW_REG_PTYS_ETH_SPEED_10GBASE_CR | + MLXSW_REG_PTYS_ETH_SPEED_10GBASE_SR | + MLXSW_REG_PTYS_ETH_SPEED_10GBASE_ER_LR, + .mask_ethtool = ETHTOOL_LINK_MODE_10000baseKR_Full_BIT, + .speed = SPEED_10000, + }, + { + .mask = MLXSW_REG_PTYS_ETH_SPEED_40GBASE_CR4, + .mask_ethtool = ETHTOOL_LINK_MODE_40000baseCR4_Full_BIT, + .speed = SPEED_40000, + }, + { + .mask = MLXSW_REG_PTYS_ETH_SPEED_40GBASE_KR4, + .mask_ethtool = ETHTOOL_LINK_MODE_40000baseKR4_Full_BIT, + .speed = SPEED_40000, + }, + { + .mask = MLXSW_REG_PTYS_ETH_SPEED_40GBASE_SR4, + .mask_ethtool = ETHTOOL_LINK_MODE_40000baseSR4_Full_BIT, + .speed = SPEED_40000, + }, + { + .mask = MLXSW_REG_PTYS_ETH_SPEED_40GBASE_LR4_ER4, + .mask_ethtool = ETHTOOL_LINK_MODE_40000baseLR4_Full_BIT, + .speed = SPEED_40000, + }, + { + .mask = MLXSW_REG_PTYS_ETH_SPEED_25GBASE_CR, + .mask_ethtool = ETHTOOL_LINK_MODE_25000baseCR_Full_BIT, + .speed = SPEED_25000, + }, + { + .mask = MLXSW_REG_PTYS_ETH_SPEED_25GBASE_KR, + .mask_ethtool = ETHTOOL_LINK_MODE_25000baseKR_Full_BIT, + .speed = SPEED_25000, + }, + { + .mask = MLXSW_REG_PTYS_ETH_SPEED_25GBASE_SR, + .mask_ethtool = ETHTOOL_LINK_MODE_25000baseSR_Full_BIT, + .speed = SPEED_25000, + }, + { + .mask = MLXSW_REG_PTYS_ETH_SPEED_50GBASE_CR2, + .mask_ethtool = ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT, + .speed = SPEED_50000, + }, + { + .mask = MLXSW_REG_PTYS_ETH_SPEED_50GBASE_KR2, + .mask_ethtool = ETHTOOL_LINK_MODE_50000baseKR2_Full_BIT, + .speed = SPEED_50000, + }, + { + .mask = MLXSW_REG_PTYS_ETH_SPEED_50GBASE_SR2, + .mask_ethtool = ETHTOOL_LINK_MODE_50000baseSR2_Full_BIT, + .speed = SPEED_50000, + }, + { + .mask = MLXSW_REG_PTYS_ETH_SPEED_100GBASE_CR4, + .mask_ethtool = ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT, + .speed = SPEED_100000, + }, + { + .mask = MLXSW_REG_PTYS_ETH_SPEED_100GBASE_SR4, + .mask_ethtool = ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT, + .speed = SPEED_100000, + }, + { + .mask = MLXSW_REG_PTYS_ETH_SPEED_100GBASE_KR4, + .mask_ethtool = ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT, + .speed = SPEED_100000, + }, + { + .mask = MLXSW_REG_PTYS_ETH_SPEED_100GBASE_LR4_ER4, + .mask_ethtool = ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT, + .speed = SPEED_100000, + }, +}; + +#define MLXSW_SP1_PORT_LINK_MODE_LEN ARRAY_SIZE(mlxsw_sp1_port_link_mode) + +static void +mlxsw_sp1_from_ptys_supported_port(struct mlxsw_sp *mlxsw_sp, + u32 ptys_eth_proto, + struct ethtool_link_ksettings *cmd) +{ + if (ptys_eth_proto & (MLXSW_REG_PTYS_ETH_SPEED_10GBASE_CR | + MLXSW_REG_PTYS_ETH_SPEED_10GBASE_SR | + MLXSW_REG_PTYS_ETH_SPEED_40GBASE_CR4 | + MLXSW_REG_PTYS_ETH_SPEED_40GBASE_SR4 | + MLXSW_REG_PTYS_ETH_SPEED_100GBASE_SR4 | + MLXSW_REG_PTYS_ETH_SPEED_SGMII)) + ethtool_link_ksettings_add_link_mode(cmd, supported, FIBRE); + + if (ptys_eth_proto & (MLXSW_REG_PTYS_ETH_SPEED_10GBASE_KR | + MLXSW_REG_PTYS_ETH_SPEED_10GBASE_KX4 | + MLXSW_REG_PTYS_ETH_SPEED_40GBASE_KR4 | + MLXSW_REG_PTYS_ETH_SPEED_100GBASE_KR4 | + MLXSW_REG_PTYS_ETH_SPEED_1000BASE_KX)) + ethtool_link_ksettings_add_link_mode(cmd, supported, Backplane); +} + +static void +mlxsw_sp1_from_ptys_link(struct mlxsw_sp *mlxsw_sp, u32 ptys_eth_proto, + unsigned long *mode) +{ + int i; + + for (i = 0; i < MLXSW_SP1_PORT_LINK_MODE_LEN; i++) { + if (ptys_eth_proto & mlxsw_sp1_port_link_mode[i].mask) + __set_bit(mlxsw_sp1_port_link_mode[i].mask_ethtool, + mode); + } +} + +static u32 +mlxsw_sp1_from_ptys_speed(struct mlxsw_sp *mlxsw_sp, u32 ptys_eth_proto) +{ + int i; + + for (i = 0; i < MLXSW_SP1_PORT_LINK_MODE_LEN; i++) { + if (ptys_eth_proto & mlxsw_sp1_port_link_mode[i].mask) + return mlxsw_sp1_port_link_mode[i].speed; + } + + return SPEED_UNKNOWN; +} + +static void +mlxsw_sp1_from_ptys_link_mode(struct mlxsw_sp *mlxsw_sp, bool carrier_ok, + u32 ptys_eth_proto, + struct ethtool_link_ksettings *cmd) +{ + struct mlxsw_sp1_port_link_mode link; + int i; + + cmd->base.speed = SPEED_UNKNOWN; + cmd->base.duplex = DUPLEX_UNKNOWN; + cmd->lanes = 0; + + if (!carrier_ok) + return; + + for (i = 0; i < MLXSW_SP1_PORT_LINK_MODE_LEN; i++) { + if (ptys_eth_proto & mlxsw_sp1_port_link_mode[i].mask) { + link = mlxsw_sp1_port_link_mode[i]; + ethtool_params_from_link_mode(cmd, + link.mask_ethtool); + } + } +} + +static int mlxsw_sp1_ptys_max_speed(struct mlxsw_sp_port *mlxsw_sp_port, u32 *p_max_speed) +{ + u32 eth_proto_cap; + u32 max_speed = 0; + int err; + int i; + + err = mlxsw_sp_port_ptys_query(mlxsw_sp_port, ð_proto_cap, NULL, NULL, NULL); + if (err) + return err; + + for (i = 0; i < MLXSW_SP1_PORT_LINK_MODE_LEN; i++) { + if ((eth_proto_cap & mlxsw_sp1_port_link_mode[i].mask) && + mlxsw_sp1_port_link_mode[i].speed > max_speed) + max_speed = mlxsw_sp1_port_link_mode[i].speed; + } + + *p_max_speed = max_speed; + return 0; +} + +static u32 +mlxsw_sp1_to_ptys_advert_link(struct mlxsw_sp *mlxsw_sp, + const struct ethtool_link_ksettings *cmd) +{ + u32 ptys_proto = 0; + int i; + + for (i = 0; i < MLXSW_SP1_PORT_LINK_MODE_LEN; i++) { + if (test_bit(mlxsw_sp1_port_link_mode[i].mask_ethtool, + cmd->link_modes.advertising)) + ptys_proto |= mlxsw_sp1_port_link_mode[i].mask; + } + return ptys_proto; +} + +static u32 mlxsw_sp1_to_ptys_speed_lanes(struct mlxsw_sp *mlxsw_sp, u8 width, + const struct ethtool_link_ksettings *cmd) +{ + u32 ptys_proto = 0; + int i; + + if (cmd->lanes > width) + return ptys_proto; + + for (i = 0; i < MLXSW_SP1_PORT_LINK_MODE_LEN; i++) { + if (cmd->base.speed == mlxsw_sp1_port_link_mode[i].speed) + ptys_proto |= mlxsw_sp1_port_link_mode[i].mask; + } + return ptys_proto; +} + +static void +mlxsw_sp1_reg_ptys_eth_pack(struct mlxsw_sp *mlxsw_sp, char *payload, + u16 local_port, u32 proto_admin, bool autoneg) +{ + mlxsw_reg_ptys_eth_pack(payload, local_port, proto_admin, autoneg); +} + +static void +mlxsw_sp1_reg_ptys_eth_unpack(struct mlxsw_sp *mlxsw_sp, char *payload, + u32 *p_eth_proto_cap, u32 *p_eth_proto_admin, + u32 *p_eth_proto_oper) +{ + mlxsw_reg_ptys_eth_unpack(payload, p_eth_proto_cap, p_eth_proto_admin, + p_eth_proto_oper); +} + +static u32 mlxsw_sp1_ptys_proto_cap_masked_get(u32 eth_proto_cap) +{ + u32 ptys_proto_cap_masked = 0; + int i; + + for (i = 0; i < MLXSW_SP1_PORT_LINK_MODE_LEN; i++) { + if (mlxsw_sp1_port_link_mode[i].mask & eth_proto_cap) + ptys_proto_cap_masked |= + mlxsw_sp1_port_link_mode[i].mask; + } + + return ptys_proto_cap_masked; +} + +const struct mlxsw_sp_port_type_speed_ops mlxsw_sp1_port_type_speed_ops = { + .from_ptys_supported_port = mlxsw_sp1_from_ptys_supported_port, + .from_ptys_link = mlxsw_sp1_from_ptys_link, + .from_ptys_speed = mlxsw_sp1_from_ptys_speed, + .from_ptys_link_mode = mlxsw_sp1_from_ptys_link_mode, + .ptys_max_speed = mlxsw_sp1_ptys_max_speed, + .to_ptys_advert_link = mlxsw_sp1_to_ptys_advert_link, + .to_ptys_speed_lanes = mlxsw_sp1_to_ptys_speed_lanes, + .reg_ptys_eth_pack = mlxsw_sp1_reg_ptys_eth_pack, + .reg_ptys_eth_unpack = mlxsw_sp1_reg_ptys_eth_unpack, + .ptys_proto_cap_masked_get = mlxsw_sp1_ptys_proto_cap_masked_get, +}; + +static const enum ethtool_link_mode_bit_indices +mlxsw_sp2_mask_ethtool_sgmii_100m[] = { + ETHTOOL_LINK_MODE_100baseT_Full_BIT, +}; + +#define MLXSW_SP2_MASK_ETHTOOL_SGMII_100M_LEN \ + ARRAY_SIZE(mlxsw_sp2_mask_ethtool_sgmii_100m) + +static const enum ethtool_link_mode_bit_indices +mlxsw_sp2_mask_ethtool_1000base_x_sgmii[] = { + ETHTOOL_LINK_MODE_1000baseT_Full_BIT, + ETHTOOL_LINK_MODE_1000baseKX_Full_BIT, +}; + +#define MLXSW_SP2_MASK_ETHTOOL_1000BASE_X_SGMII_LEN \ + ARRAY_SIZE(mlxsw_sp2_mask_ethtool_1000base_x_sgmii) + +static const enum ethtool_link_mode_bit_indices +mlxsw_sp2_mask_ethtool_5gbase_r[] = { + ETHTOOL_LINK_MODE_5000baseT_Full_BIT, +}; + +#define MLXSW_SP2_MASK_ETHTOOL_5GBASE_R_LEN \ + ARRAY_SIZE(mlxsw_sp2_mask_ethtool_5gbase_r) + +static const enum ethtool_link_mode_bit_indices +mlxsw_sp2_mask_ethtool_xfi_xaui_1_10g[] = { + ETHTOOL_LINK_MODE_10000baseT_Full_BIT, + ETHTOOL_LINK_MODE_10000baseKR_Full_BIT, + ETHTOOL_LINK_MODE_10000baseR_FEC_BIT, + ETHTOOL_LINK_MODE_10000baseCR_Full_BIT, + ETHTOOL_LINK_MODE_10000baseSR_Full_BIT, + ETHTOOL_LINK_MODE_10000baseLR_Full_BIT, + ETHTOOL_LINK_MODE_10000baseER_Full_BIT, +}; + +#define MLXSW_SP2_MASK_ETHTOOL_XFI_XAUI_1_10G_LEN \ + ARRAY_SIZE(mlxsw_sp2_mask_ethtool_xfi_xaui_1_10g) + +static const enum ethtool_link_mode_bit_indices +mlxsw_sp2_mask_ethtool_xlaui_4_xlppi_4_40g[] = { + ETHTOOL_LINK_MODE_40000baseKR4_Full_BIT, + ETHTOOL_LINK_MODE_40000baseCR4_Full_BIT, + ETHTOOL_LINK_MODE_40000baseSR4_Full_BIT, + ETHTOOL_LINK_MODE_40000baseLR4_Full_BIT, +}; + +#define MLXSW_SP2_MASK_ETHTOOL_XLAUI_4_XLPPI_4_40G_LEN \ + ARRAY_SIZE(mlxsw_sp2_mask_ethtool_xlaui_4_xlppi_4_40g) + +static const enum ethtool_link_mode_bit_indices +mlxsw_sp2_mask_ethtool_25gaui_1_25gbase_cr_kr[] = { + ETHTOOL_LINK_MODE_25000baseCR_Full_BIT, + ETHTOOL_LINK_MODE_25000baseKR_Full_BIT, + ETHTOOL_LINK_MODE_25000baseSR_Full_BIT, +}; + +#define MLXSW_SP2_MASK_ETHTOOL_25GAUI_1_25GBASE_CR_KR_LEN \ + ARRAY_SIZE(mlxsw_sp2_mask_ethtool_25gaui_1_25gbase_cr_kr) + +static const enum ethtool_link_mode_bit_indices +mlxsw_sp2_mask_ethtool_50gaui_2_laui_2_50gbase_cr2_kr2[] = { + ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT, + ETHTOOL_LINK_MODE_50000baseKR2_Full_BIT, + ETHTOOL_LINK_MODE_50000baseSR2_Full_BIT, +}; + +#define MLXSW_SP2_MASK_ETHTOOL_50GAUI_2_LAUI_2_50GBASE_CR2_KR2_LEN \ + ARRAY_SIZE(mlxsw_sp2_mask_ethtool_50gaui_2_laui_2_50gbase_cr2_kr2) + +static const enum ethtool_link_mode_bit_indices +mlxsw_sp2_mask_ethtool_50gaui_1_laui_1_50gbase_cr_kr[] = { + ETHTOOL_LINK_MODE_50000baseKR_Full_BIT, + ETHTOOL_LINK_MODE_50000baseSR_Full_BIT, + ETHTOOL_LINK_MODE_50000baseCR_Full_BIT, + ETHTOOL_LINK_MODE_50000baseLR_ER_FR_Full_BIT, + ETHTOOL_LINK_MODE_50000baseDR_Full_BIT, +}; + +#define MLXSW_SP2_MASK_ETHTOOL_50GAUI_1_LAUI_1_50GBASE_CR_KR_LEN \ + ARRAY_SIZE(mlxsw_sp2_mask_ethtool_50gaui_1_laui_1_50gbase_cr_kr) + +static const enum ethtool_link_mode_bit_indices +mlxsw_sp2_mask_ethtool_caui_4_100gbase_cr4_kr4[] = { + ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT, + ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT, + ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT, + ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT, +}; + +#define MLXSW_SP2_MASK_ETHTOOL_CAUI_4_100GBASE_CR4_KR4_LEN \ + ARRAY_SIZE(mlxsw_sp2_mask_ethtool_caui_4_100gbase_cr4_kr4) + +static const enum ethtool_link_mode_bit_indices +mlxsw_sp2_mask_ethtool_100gaui_2_100gbase_cr2_kr2[] = { + ETHTOOL_LINK_MODE_100000baseKR2_Full_BIT, + ETHTOOL_LINK_MODE_100000baseSR2_Full_BIT, + ETHTOOL_LINK_MODE_100000baseCR2_Full_BIT, + ETHTOOL_LINK_MODE_100000baseLR2_ER2_FR2_Full_BIT, + ETHTOOL_LINK_MODE_100000baseDR2_Full_BIT, +}; + +#define MLXSW_SP2_MASK_ETHTOOL_100GAUI_2_100GBASE_CR2_KR2_LEN \ + ARRAY_SIZE(mlxsw_sp2_mask_ethtool_100gaui_2_100gbase_cr2_kr2) + +static const enum ethtool_link_mode_bit_indices +mlxsw_sp2_mask_ethtool_200gaui_4_200gbase_cr4_kr4[] = { + ETHTOOL_LINK_MODE_200000baseKR4_Full_BIT, + ETHTOOL_LINK_MODE_200000baseSR4_Full_BIT, + ETHTOOL_LINK_MODE_200000baseLR4_ER4_FR4_Full_BIT, + ETHTOOL_LINK_MODE_200000baseDR4_Full_BIT, + ETHTOOL_LINK_MODE_200000baseCR4_Full_BIT, +}; + +#define MLXSW_SP2_MASK_ETHTOOL_200GAUI_4_200GBASE_CR4_KR4_LEN \ + ARRAY_SIZE(mlxsw_sp2_mask_ethtool_200gaui_4_200gbase_cr4_kr4) + +static const enum ethtool_link_mode_bit_indices +mlxsw_sp2_mask_ethtool_400gaui_8[] = { + ETHTOOL_LINK_MODE_400000baseKR8_Full_BIT, + ETHTOOL_LINK_MODE_400000baseSR8_Full_BIT, + ETHTOOL_LINK_MODE_400000baseLR8_ER8_FR8_Full_BIT, + ETHTOOL_LINK_MODE_400000baseDR8_Full_BIT, + ETHTOOL_LINK_MODE_400000baseCR8_Full_BIT, +}; + +#define MLXSW_SP2_MASK_ETHTOOL_400GAUI_8_LEN \ + ARRAY_SIZE(mlxsw_sp2_mask_ethtool_400gaui_8) + +#define MLXSW_SP_PORT_MASK_WIDTH_1X BIT(0) +#define MLXSW_SP_PORT_MASK_WIDTH_2X BIT(1) +#define MLXSW_SP_PORT_MASK_WIDTH_4X BIT(2) +#define MLXSW_SP_PORT_MASK_WIDTH_8X BIT(3) + +static u8 mlxsw_sp_port_mask_width_get(u8 width) +{ + switch (width) { + case 1: + return MLXSW_SP_PORT_MASK_WIDTH_1X; + case 2: + return MLXSW_SP_PORT_MASK_WIDTH_2X; + case 4: + return MLXSW_SP_PORT_MASK_WIDTH_4X; + case 8: + return MLXSW_SP_PORT_MASK_WIDTH_8X; + default: + WARN_ON_ONCE(1); + return 0; + } +} + +struct mlxsw_sp2_port_link_mode { + const enum ethtool_link_mode_bit_indices *mask_ethtool; + int m_ethtool_len; + u32 mask; + u32 speed; + u32 width; + u8 mask_sup_width; +}; + +static const struct mlxsw_sp2_port_link_mode mlxsw_sp2_port_link_mode[] = { + { + .mask = MLXSW_REG_PTYS_EXT_ETH_SPEED_SGMII_100M, + .mask_ethtool = mlxsw_sp2_mask_ethtool_sgmii_100m, + .m_ethtool_len = MLXSW_SP2_MASK_ETHTOOL_SGMII_100M_LEN, + .mask_sup_width = MLXSW_SP_PORT_MASK_WIDTH_1X | + MLXSW_SP_PORT_MASK_WIDTH_2X | + MLXSW_SP_PORT_MASK_WIDTH_4X | + MLXSW_SP_PORT_MASK_WIDTH_8X, + .speed = SPEED_100, + .width = 1, + }, + { + .mask = MLXSW_REG_PTYS_EXT_ETH_SPEED_1000BASE_X_SGMII, + .mask_ethtool = mlxsw_sp2_mask_ethtool_1000base_x_sgmii, + .m_ethtool_len = MLXSW_SP2_MASK_ETHTOOL_1000BASE_X_SGMII_LEN, + .mask_sup_width = MLXSW_SP_PORT_MASK_WIDTH_1X | + MLXSW_SP_PORT_MASK_WIDTH_2X | + MLXSW_SP_PORT_MASK_WIDTH_4X | + MLXSW_SP_PORT_MASK_WIDTH_8X, + .speed = SPEED_1000, + .width = 1, + }, + { + .mask = MLXSW_REG_PTYS_EXT_ETH_SPEED_5GBASE_R, + .mask_ethtool = mlxsw_sp2_mask_ethtool_5gbase_r, + .m_ethtool_len = MLXSW_SP2_MASK_ETHTOOL_5GBASE_R_LEN, + .mask_sup_width = MLXSW_SP_PORT_MASK_WIDTH_1X | + MLXSW_SP_PORT_MASK_WIDTH_2X | + MLXSW_SP_PORT_MASK_WIDTH_4X | + MLXSW_SP_PORT_MASK_WIDTH_8X, + .speed = SPEED_5000, + .width = 1, + }, + { + .mask = MLXSW_REG_PTYS_EXT_ETH_SPEED_XFI_XAUI_1_10G, + .mask_ethtool = mlxsw_sp2_mask_ethtool_xfi_xaui_1_10g, + .m_ethtool_len = MLXSW_SP2_MASK_ETHTOOL_XFI_XAUI_1_10G_LEN, + .mask_sup_width = MLXSW_SP_PORT_MASK_WIDTH_1X | + MLXSW_SP_PORT_MASK_WIDTH_2X | + MLXSW_SP_PORT_MASK_WIDTH_4X | + MLXSW_SP_PORT_MASK_WIDTH_8X, + .speed = SPEED_10000, + .width = 1, + }, + { + .mask = MLXSW_REG_PTYS_EXT_ETH_SPEED_XLAUI_4_XLPPI_4_40G, + .mask_ethtool = mlxsw_sp2_mask_ethtool_xlaui_4_xlppi_4_40g, + .m_ethtool_len = MLXSW_SP2_MASK_ETHTOOL_XLAUI_4_XLPPI_4_40G_LEN, + .mask_sup_width = MLXSW_SP_PORT_MASK_WIDTH_4X | + MLXSW_SP_PORT_MASK_WIDTH_8X, + .speed = SPEED_40000, + .width = 4, + }, + { + .mask = MLXSW_REG_PTYS_EXT_ETH_SPEED_25GAUI_1_25GBASE_CR_KR, + .mask_ethtool = mlxsw_sp2_mask_ethtool_25gaui_1_25gbase_cr_kr, + .m_ethtool_len = MLXSW_SP2_MASK_ETHTOOL_25GAUI_1_25GBASE_CR_KR_LEN, + .mask_sup_width = MLXSW_SP_PORT_MASK_WIDTH_1X | + MLXSW_SP_PORT_MASK_WIDTH_2X | + MLXSW_SP_PORT_MASK_WIDTH_4X | + MLXSW_SP_PORT_MASK_WIDTH_8X, + .speed = SPEED_25000, + .width = 1, + }, + { + .mask = MLXSW_REG_PTYS_EXT_ETH_SPEED_50GAUI_2_LAUI_2_50GBASE_CR2_KR2, + .mask_ethtool = mlxsw_sp2_mask_ethtool_50gaui_2_laui_2_50gbase_cr2_kr2, + .m_ethtool_len = MLXSW_SP2_MASK_ETHTOOL_50GAUI_2_LAUI_2_50GBASE_CR2_KR2_LEN, + .mask_sup_width = MLXSW_SP_PORT_MASK_WIDTH_2X | + MLXSW_SP_PORT_MASK_WIDTH_4X | + MLXSW_SP_PORT_MASK_WIDTH_8X, + .speed = SPEED_50000, + .width = 2, + }, + { + .mask = MLXSW_REG_PTYS_EXT_ETH_SPEED_50GAUI_1_LAUI_1_50GBASE_CR_KR, + .mask_ethtool = mlxsw_sp2_mask_ethtool_50gaui_1_laui_1_50gbase_cr_kr, + .m_ethtool_len = MLXSW_SP2_MASK_ETHTOOL_50GAUI_1_LAUI_1_50GBASE_CR_KR_LEN, + .mask_sup_width = MLXSW_SP_PORT_MASK_WIDTH_1X, + .speed = SPEED_50000, + .width = 1, + }, + { + .mask = MLXSW_REG_PTYS_EXT_ETH_SPEED_CAUI_4_100GBASE_CR4_KR4, + .mask_ethtool = mlxsw_sp2_mask_ethtool_caui_4_100gbase_cr4_kr4, + .m_ethtool_len = MLXSW_SP2_MASK_ETHTOOL_CAUI_4_100GBASE_CR4_KR4_LEN, + .mask_sup_width = MLXSW_SP_PORT_MASK_WIDTH_4X | + MLXSW_SP_PORT_MASK_WIDTH_8X, + .speed = SPEED_100000, + .width = 4, + }, + { + .mask = MLXSW_REG_PTYS_EXT_ETH_SPEED_100GAUI_2_100GBASE_CR2_KR2, + .mask_ethtool = mlxsw_sp2_mask_ethtool_100gaui_2_100gbase_cr2_kr2, + .m_ethtool_len = MLXSW_SP2_MASK_ETHTOOL_100GAUI_2_100GBASE_CR2_KR2_LEN, + .mask_sup_width = MLXSW_SP_PORT_MASK_WIDTH_2X, + .speed = SPEED_100000, + .width = 2, + }, + { + .mask = MLXSW_REG_PTYS_EXT_ETH_SPEED_200GAUI_4_200GBASE_CR4_KR4, + .mask_ethtool = mlxsw_sp2_mask_ethtool_200gaui_4_200gbase_cr4_kr4, + .m_ethtool_len = MLXSW_SP2_MASK_ETHTOOL_200GAUI_4_200GBASE_CR4_KR4_LEN, + .mask_sup_width = MLXSW_SP_PORT_MASK_WIDTH_4X | + MLXSW_SP_PORT_MASK_WIDTH_8X, + .speed = SPEED_200000, + .width = 4, + }, + { + .mask = MLXSW_REG_PTYS_EXT_ETH_SPEED_400GAUI_8, + .mask_ethtool = mlxsw_sp2_mask_ethtool_400gaui_8, + .m_ethtool_len = MLXSW_SP2_MASK_ETHTOOL_400GAUI_8_LEN, + .mask_sup_width = MLXSW_SP_PORT_MASK_WIDTH_8X, + .speed = SPEED_400000, + .width = 8, + }, +}; + +#define MLXSW_SP2_PORT_LINK_MODE_LEN ARRAY_SIZE(mlxsw_sp2_port_link_mode) + +static void +mlxsw_sp2_from_ptys_supported_port(struct mlxsw_sp *mlxsw_sp, + u32 ptys_eth_proto, + struct ethtool_link_ksettings *cmd) +{ + ethtool_link_ksettings_add_link_mode(cmd, supported, FIBRE); + ethtool_link_ksettings_add_link_mode(cmd, supported, Backplane); +} + +static void +mlxsw_sp2_set_bit_ethtool(const struct mlxsw_sp2_port_link_mode *link_mode, + unsigned long *mode) +{ + int i; + + for (i = 0; i < link_mode->m_ethtool_len; i++) + __set_bit(link_mode->mask_ethtool[i], mode); +} + +static void +mlxsw_sp2_from_ptys_link(struct mlxsw_sp *mlxsw_sp, u32 ptys_eth_proto, + unsigned long *mode) +{ + int i; + + for (i = 0; i < MLXSW_SP2_PORT_LINK_MODE_LEN; i++) { + if (ptys_eth_proto & mlxsw_sp2_port_link_mode[i].mask) + mlxsw_sp2_set_bit_ethtool(&mlxsw_sp2_port_link_mode[i], + mode); + } +} + +static u32 +mlxsw_sp2_from_ptys_speed(struct mlxsw_sp *mlxsw_sp, u32 ptys_eth_proto) +{ + int i; + + for (i = 0; i < MLXSW_SP2_PORT_LINK_MODE_LEN; i++) { + if (ptys_eth_proto & mlxsw_sp2_port_link_mode[i].mask) + return mlxsw_sp2_port_link_mode[i].speed; + } + + return SPEED_UNKNOWN; +} + +static void +mlxsw_sp2_from_ptys_link_mode(struct mlxsw_sp *mlxsw_sp, bool carrier_ok, + u32 ptys_eth_proto, + struct ethtool_link_ksettings *cmd) +{ + struct mlxsw_sp2_port_link_mode link; + int i; + + cmd->base.speed = SPEED_UNKNOWN; + cmd->base.duplex = DUPLEX_UNKNOWN; + cmd->lanes = 0; + + if (!carrier_ok) + return; + + for (i = 0; i < MLXSW_SP2_PORT_LINK_MODE_LEN; i++) { + if (ptys_eth_proto & mlxsw_sp2_port_link_mode[i].mask) { + link = mlxsw_sp2_port_link_mode[i]; + ethtool_params_from_link_mode(cmd, + link.mask_ethtool[1]); + } + } +} + +static int mlxsw_sp2_ptys_max_speed(struct mlxsw_sp_port *mlxsw_sp_port, u32 *p_max_speed) +{ + u32 eth_proto_cap; + u32 max_speed = 0; + int err; + int i; + + err = mlxsw_sp_port_ptys_query(mlxsw_sp_port, ð_proto_cap, NULL, NULL, NULL); + if (err) + return err; + + for (i = 0; i < MLXSW_SP2_PORT_LINK_MODE_LEN; i++) { + if ((eth_proto_cap & mlxsw_sp2_port_link_mode[i].mask) && + mlxsw_sp2_port_link_mode[i].speed > max_speed) + max_speed = mlxsw_sp2_port_link_mode[i].speed; + } + + *p_max_speed = max_speed; + return 0; +} + +static bool +mlxsw_sp2_test_bit_ethtool(const struct mlxsw_sp2_port_link_mode *link_mode, + const unsigned long *mode) +{ + int cnt = 0; + int i; + + for (i = 0; i < link_mode->m_ethtool_len; i++) { + if (test_bit(link_mode->mask_ethtool[i], mode)) + cnt++; + } + + return cnt == link_mode->m_ethtool_len; +} + +static u32 +mlxsw_sp2_to_ptys_advert_link(struct mlxsw_sp *mlxsw_sp, + const struct ethtool_link_ksettings *cmd) +{ + u32 ptys_proto = 0; + int i; + + for (i = 0; i < MLXSW_SP2_PORT_LINK_MODE_LEN; i++) { + if (mlxsw_sp2_test_bit_ethtool(&mlxsw_sp2_port_link_mode[i], + cmd->link_modes.advertising)) + ptys_proto |= mlxsw_sp2_port_link_mode[i].mask; + } + return ptys_proto; +} + +static u32 mlxsw_sp2_to_ptys_speed_lanes(struct mlxsw_sp *mlxsw_sp, u8 width, + const struct ethtool_link_ksettings *cmd) +{ + u8 mask_width = mlxsw_sp_port_mask_width_get(width); + struct mlxsw_sp2_port_link_mode link_mode; + u32 ptys_proto = 0; + int i; + + if (cmd->lanes > width) + return ptys_proto; + + for (i = 0; i < MLXSW_SP2_PORT_LINK_MODE_LEN; i++) { + if (cmd->base.speed == mlxsw_sp2_port_link_mode[i].speed) { + link_mode = mlxsw_sp2_port_link_mode[i]; + + if (!cmd->lanes) { + /* If number of lanes was not set by user space, + * choose the link mode that supports the width + * of the port. + */ + if (mask_width & link_mode.mask_sup_width) + ptys_proto |= link_mode.mask; + } else if (cmd->lanes == link_mode.width) { + /* Else if the number of lanes was set, choose + * the link mode that its actual width equals to + * it. + */ + ptys_proto |= link_mode.mask; + } + } + } + return ptys_proto; +} + +static void +mlxsw_sp2_reg_ptys_eth_pack(struct mlxsw_sp *mlxsw_sp, char *payload, + u16 local_port, u32 proto_admin, + bool autoneg) +{ + mlxsw_reg_ptys_ext_eth_pack(payload, local_port, proto_admin, autoneg); +} + +static void +mlxsw_sp2_reg_ptys_eth_unpack(struct mlxsw_sp *mlxsw_sp, char *payload, + u32 *p_eth_proto_cap, u32 *p_eth_proto_admin, + u32 *p_eth_proto_oper) +{ + mlxsw_reg_ptys_ext_eth_unpack(payload, p_eth_proto_cap, + p_eth_proto_admin, p_eth_proto_oper); +} + +static u32 mlxsw_sp2_ptys_proto_cap_masked_get(u32 eth_proto_cap) +{ + u32 ptys_proto_cap_masked = 0; + int i; + + for (i = 0; i < MLXSW_SP2_PORT_LINK_MODE_LEN; i++) { + if (mlxsw_sp2_port_link_mode[i].mask & eth_proto_cap) + ptys_proto_cap_masked |= + mlxsw_sp2_port_link_mode[i].mask; + } + + return ptys_proto_cap_masked; +} + +const struct mlxsw_sp_port_type_speed_ops mlxsw_sp2_port_type_speed_ops = { + .from_ptys_supported_port = mlxsw_sp2_from_ptys_supported_port, + .from_ptys_link = mlxsw_sp2_from_ptys_link, + .from_ptys_speed = mlxsw_sp2_from_ptys_speed, + .from_ptys_link_mode = mlxsw_sp2_from_ptys_link_mode, + .ptys_max_speed = mlxsw_sp2_ptys_max_speed, + .to_ptys_advert_link = mlxsw_sp2_to_ptys_advert_link, + .to_ptys_speed_lanes = mlxsw_sp2_to_ptys_speed_lanes, + .reg_ptys_eth_pack = mlxsw_sp2_reg_ptys_eth_pack, + .reg_ptys_eth_unpack = mlxsw_sp2_reg_ptys_eth_unpack, + .ptys_proto_cap_masked_get = mlxsw_sp2_ptys_proto_cap_masked_get, +}; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_fid.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_fid.c new file mode 100644 index 000000000..b6ee2d658 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_fid.c @@ -0,0 +1,1876 @@ +// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 +/* Copyright (c) 2017-2018 Mellanox Technologies. All rights reserved */ + +#include <linux/kernel.h> +#include <linux/bitops.h> +#include <linux/if_vlan.h> +#include <linux/if_bridge.h> +#include <linux/netdevice.h> +#include <linux/rhashtable.h> +#include <linux/rtnetlink.h> +#include <linux/refcount.h> + +#include "spectrum.h" +#include "reg.h" + +struct mlxsw_sp_fid_family; + +struct mlxsw_sp_fid_core { + struct rhashtable fid_ht; + struct rhashtable vni_ht; + struct mlxsw_sp_fid_family *fid_family_arr[MLXSW_SP_FID_TYPE_MAX]; + unsigned int *port_fid_mappings; +}; + +struct mlxsw_sp_fid_port_vid { + struct list_head list; + u16 local_port; + u16 vid; +}; + +struct mlxsw_sp_fid { + struct list_head list; + struct mlxsw_sp_rif *rif; + refcount_t ref_count; + u16 fid_index; + u16 fid_offset; + struct mlxsw_sp_fid_family *fid_family; + struct rhash_head ht_node; + + struct rhash_head vni_ht_node; + enum mlxsw_sp_nve_type nve_type; + __be32 vni; + u32 nve_flood_index; + int nve_ifindex; + u8 vni_valid:1, + nve_flood_index_valid:1; + struct list_head port_vid_list; /* Ordered by local port. */ +}; + +struct mlxsw_sp_fid_8021q { + struct mlxsw_sp_fid common; + u16 vid; +}; + +struct mlxsw_sp_fid_8021d { + struct mlxsw_sp_fid common; + int br_ifindex; +}; + +static const struct rhashtable_params mlxsw_sp_fid_ht_params = { + .key_len = sizeof_field(struct mlxsw_sp_fid, fid_index), + .key_offset = offsetof(struct mlxsw_sp_fid, fid_index), + .head_offset = offsetof(struct mlxsw_sp_fid, ht_node), +}; + +static const struct rhashtable_params mlxsw_sp_fid_vni_ht_params = { + .key_len = sizeof_field(struct mlxsw_sp_fid, vni), + .key_offset = offsetof(struct mlxsw_sp_fid, vni), + .head_offset = offsetof(struct mlxsw_sp_fid, vni_ht_node), +}; + +struct mlxsw_sp_flood_table { + enum mlxsw_sp_flood_type packet_type; + enum mlxsw_flood_table_type table_type; + int table_index; +}; + +struct mlxsw_sp_fid_ops { + void (*setup)(struct mlxsw_sp_fid *fid, const void *arg); + int (*configure)(struct mlxsw_sp_fid *fid); + void (*deconfigure)(struct mlxsw_sp_fid *fid); + int (*index_alloc)(struct mlxsw_sp_fid *fid, const void *arg, + u16 *p_fid_index); + bool (*compare)(const struct mlxsw_sp_fid *fid, + const void *arg); + int (*port_vid_map)(struct mlxsw_sp_fid *fid, + struct mlxsw_sp_port *port, u16 vid); + void (*port_vid_unmap)(struct mlxsw_sp_fid *fid, + struct mlxsw_sp_port *port, u16 vid); + int (*vni_set)(struct mlxsw_sp_fid *fid); + void (*vni_clear)(struct mlxsw_sp_fid *fid); + int (*nve_flood_index_set)(struct mlxsw_sp_fid *fid); + void (*nve_flood_index_clear)(struct mlxsw_sp_fid *fid); + void (*fdb_clear_offload)(const struct mlxsw_sp_fid *fid, + const struct net_device *nve_dev); + int (*vid_to_fid_rif_update)(const struct mlxsw_sp_fid *fid, + const struct mlxsw_sp_rif *rif); +}; + +struct mlxsw_sp_fid_family { + enum mlxsw_sp_fid_type type; + size_t fid_size; + u16 start_index; + u16 end_index; + struct list_head fids_list; + unsigned long *fids_bitmap; + const struct mlxsw_sp_flood_table *flood_tables; + int nr_flood_tables; + enum mlxsw_sp_rif_type rif_type; + const struct mlxsw_sp_fid_ops *ops; + struct mlxsw_sp *mlxsw_sp; + bool flood_rsp; + enum mlxsw_reg_bridge_type bridge_type; + u16 pgt_base; + bool smpe_index_valid; +}; + +static const int mlxsw_sp_sfgc_uc_packet_types[MLXSW_REG_SFGC_TYPE_MAX] = { + [MLXSW_REG_SFGC_TYPE_UNKNOWN_UNICAST] = 1, +}; + +static const int mlxsw_sp_sfgc_bc_packet_types[MLXSW_REG_SFGC_TYPE_MAX] = { + [MLXSW_REG_SFGC_TYPE_BROADCAST] = 1, + [MLXSW_REG_SFGC_TYPE_UNREGISTERED_MULTICAST_NON_IP] = 1, + [MLXSW_REG_SFGC_TYPE_IPV4_LINK_LOCAL] = 1, + [MLXSW_REG_SFGC_TYPE_IPV6_ALL_HOST] = 1, + [MLXSW_REG_SFGC_TYPE_UNREGISTERED_MULTICAST_IPV6] = 1, +}; + +static const int mlxsw_sp_sfgc_mc_packet_types[MLXSW_REG_SFGC_TYPE_MAX] = { + [MLXSW_REG_SFGC_TYPE_UNREGISTERED_MULTICAST_IPV4] = 1, +}; + +static const int *mlxsw_sp_packet_type_sfgc_types[] = { + [MLXSW_SP_FLOOD_TYPE_UC] = mlxsw_sp_sfgc_uc_packet_types, + [MLXSW_SP_FLOOD_TYPE_BC] = mlxsw_sp_sfgc_bc_packet_types, + [MLXSW_SP_FLOOD_TYPE_MC] = mlxsw_sp_sfgc_mc_packet_types, +}; + +bool mlxsw_sp_fid_is_dummy(struct mlxsw_sp *mlxsw_sp, u16 fid_index) +{ + enum mlxsw_sp_fid_type fid_type = MLXSW_SP_FID_TYPE_DUMMY; + struct mlxsw_sp_fid_family *fid_family; + + fid_family = mlxsw_sp->fid_core->fid_family_arr[fid_type]; + + return fid_family->start_index == fid_index; +} + +struct mlxsw_sp_fid *mlxsw_sp_fid_lookup_by_index(struct mlxsw_sp *mlxsw_sp, + u16 fid_index) +{ + struct mlxsw_sp_fid *fid; + + fid = rhashtable_lookup_fast(&mlxsw_sp->fid_core->fid_ht, &fid_index, + mlxsw_sp_fid_ht_params); + if (fid) + refcount_inc(&fid->ref_count); + + return fid; +} + +int mlxsw_sp_fid_nve_ifindex(const struct mlxsw_sp_fid *fid, int *nve_ifindex) +{ + if (!fid->vni_valid) + return -EINVAL; + + *nve_ifindex = fid->nve_ifindex; + + return 0; +} + +int mlxsw_sp_fid_nve_type(const struct mlxsw_sp_fid *fid, + enum mlxsw_sp_nve_type *p_type) +{ + if (!fid->vni_valid) + return -EINVAL; + + *p_type = fid->nve_type; + + return 0; +} + +struct mlxsw_sp_fid *mlxsw_sp_fid_lookup_by_vni(struct mlxsw_sp *mlxsw_sp, + __be32 vni) +{ + struct mlxsw_sp_fid *fid; + + fid = rhashtable_lookup_fast(&mlxsw_sp->fid_core->vni_ht, &vni, + mlxsw_sp_fid_vni_ht_params); + if (fid) + refcount_inc(&fid->ref_count); + + return fid; +} + +int mlxsw_sp_fid_vni(const struct mlxsw_sp_fid *fid, __be32 *vni) +{ + if (!fid->vni_valid) + return -EINVAL; + + *vni = fid->vni; + + return 0; +} + +int mlxsw_sp_fid_nve_flood_index_set(struct mlxsw_sp_fid *fid, + u32 nve_flood_index) +{ + struct mlxsw_sp_fid_family *fid_family = fid->fid_family; + const struct mlxsw_sp_fid_ops *ops = fid_family->ops; + int err; + + if (WARN_ON(fid->nve_flood_index_valid)) + return -EINVAL; + + fid->nve_flood_index = nve_flood_index; + fid->nve_flood_index_valid = true; + err = ops->nve_flood_index_set(fid); + if (err) + goto err_nve_flood_index_set; + + return 0; + +err_nve_flood_index_set: + fid->nve_flood_index_valid = false; + return err; +} + +void mlxsw_sp_fid_nve_flood_index_clear(struct mlxsw_sp_fid *fid) +{ + struct mlxsw_sp_fid_family *fid_family = fid->fid_family; + const struct mlxsw_sp_fid_ops *ops = fid_family->ops; + + if (WARN_ON(!fid->nve_flood_index_valid)) + return; + + fid->nve_flood_index_valid = false; + ops->nve_flood_index_clear(fid); +} + +bool mlxsw_sp_fid_nve_flood_index_is_set(const struct mlxsw_sp_fid *fid) +{ + return fid->nve_flood_index_valid; +} + +int mlxsw_sp_fid_vni_set(struct mlxsw_sp_fid *fid, enum mlxsw_sp_nve_type type, + __be32 vni, int nve_ifindex) +{ + struct mlxsw_sp_fid_family *fid_family = fid->fid_family; + const struct mlxsw_sp_fid_ops *ops = fid_family->ops; + struct mlxsw_sp *mlxsw_sp = fid_family->mlxsw_sp; + int err; + + if (WARN_ON(fid->vni_valid)) + return -EINVAL; + + fid->nve_type = type; + fid->nve_ifindex = nve_ifindex; + fid->vni = vni; + err = rhashtable_lookup_insert_fast(&mlxsw_sp->fid_core->vni_ht, + &fid->vni_ht_node, + mlxsw_sp_fid_vni_ht_params); + if (err) + return err; + + fid->vni_valid = true; + err = ops->vni_set(fid); + if (err) + goto err_vni_set; + + return 0; + +err_vni_set: + fid->vni_valid = false; + rhashtable_remove_fast(&mlxsw_sp->fid_core->vni_ht, &fid->vni_ht_node, + mlxsw_sp_fid_vni_ht_params); + return err; +} + +void mlxsw_sp_fid_vni_clear(struct mlxsw_sp_fid *fid) +{ + struct mlxsw_sp_fid_family *fid_family = fid->fid_family; + const struct mlxsw_sp_fid_ops *ops = fid_family->ops; + struct mlxsw_sp *mlxsw_sp = fid_family->mlxsw_sp; + + if (WARN_ON(!fid->vni_valid)) + return; + + fid->vni_valid = false; + ops->vni_clear(fid); + rhashtable_remove_fast(&mlxsw_sp->fid_core->vni_ht, &fid->vni_ht_node, + mlxsw_sp_fid_vni_ht_params); +} + +bool mlxsw_sp_fid_vni_is_set(const struct mlxsw_sp_fid *fid) +{ + return fid->vni_valid; +} + +void mlxsw_sp_fid_fdb_clear_offload(const struct mlxsw_sp_fid *fid, + const struct net_device *nve_dev) +{ + struct mlxsw_sp_fid_family *fid_family = fid->fid_family; + const struct mlxsw_sp_fid_ops *ops = fid_family->ops; + + if (ops->fdb_clear_offload) + ops->fdb_clear_offload(fid, nve_dev); +} + +static const struct mlxsw_sp_flood_table * +mlxsw_sp_fid_flood_table_lookup(const struct mlxsw_sp_fid *fid, + enum mlxsw_sp_flood_type packet_type) +{ + struct mlxsw_sp_fid_family *fid_family = fid->fid_family; + int i; + + for (i = 0; i < fid_family->nr_flood_tables; i++) { + if (fid_family->flood_tables[i].packet_type != packet_type) + continue; + return &fid_family->flood_tables[i]; + } + + return NULL; +} + +static u16 +mlxsw_sp_fid_family_num_fids(const struct mlxsw_sp_fid_family *fid_family) +{ + return fid_family->end_index - fid_family->start_index + 1; +} + +static u16 +mlxsw_sp_fid_flood_table_mid(const struct mlxsw_sp_fid_family *fid_family, + const struct mlxsw_sp_flood_table *flood_table, + u16 fid_offset) +{ + u16 num_fids; + + num_fids = mlxsw_sp_fid_family_num_fids(fid_family); + return fid_family->pgt_base + num_fids * flood_table->table_index + + fid_offset; +} + +int mlxsw_sp_fid_flood_set(struct mlxsw_sp_fid *fid, + enum mlxsw_sp_flood_type packet_type, u16 local_port, + bool member) +{ + struct mlxsw_sp_fid_family *fid_family = fid->fid_family; + const struct mlxsw_sp_flood_table *flood_table; + u16 mid_index; + + if (WARN_ON(!fid_family->flood_tables)) + return -EINVAL; + + flood_table = mlxsw_sp_fid_flood_table_lookup(fid, packet_type); + if (!flood_table) + return -ESRCH; + + mid_index = mlxsw_sp_fid_flood_table_mid(fid_family, flood_table, + fid->fid_offset); + return mlxsw_sp_pgt_entry_port_set(fid_family->mlxsw_sp, mid_index, + fid->fid_index, local_port, member); +} + +int mlxsw_sp_fid_port_vid_map(struct mlxsw_sp_fid *fid, + struct mlxsw_sp_port *mlxsw_sp_port, u16 vid) +{ + if (WARN_ON(!fid->fid_family->ops->port_vid_map)) + return -EINVAL; + return fid->fid_family->ops->port_vid_map(fid, mlxsw_sp_port, vid); +} + +void mlxsw_sp_fid_port_vid_unmap(struct mlxsw_sp_fid *fid, + struct mlxsw_sp_port *mlxsw_sp_port, u16 vid) +{ + fid->fid_family->ops->port_vid_unmap(fid, mlxsw_sp_port, vid); +} + +u16 mlxsw_sp_fid_index(const struct mlxsw_sp_fid *fid) +{ + return fid->fid_index; +} + +enum mlxsw_sp_fid_type mlxsw_sp_fid_type(const struct mlxsw_sp_fid *fid) +{ + return fid->fid_family->type; +} + +struct mlxsw_sp_rif *mlxsw_sp_fid_rif(const struct mlxsw_sp_fid *fid) +{ + return fid->rif; +} + +enum mlxsw_sp_rif_type +mlxsw_sp_fid_type_rif_type(const struct mlxsw_sp *mlxsw_sp, + enum mlxsw_sp_fid_type type) +{ + struct mlxsw_sp_fid_core *fid_core = mlxsw_sp->fid_core; + + return fid_core->fid_family_arr[type]->rif_type; +} + +static struct mlxsw_sp_fid_8021q * +mlxsw_sp_fid_8021q_fid(const struct mlxsw_sp_fid *fid) +{ + return container_of(fid, struct mlxsw_sp_fid_8021q, common); +} + +u16 mlxsw_sp_fid_8021q_vid(const struct mlxsw_sp_fid *fid) +{ + return mlxsw_sp_fid_8021q_fid(fid)->vid; +} + +static void mlxsw_sp_fid_8021q_setup(struct mlxsw_sp_fid *fid, const void *arg) +{ + u16 vid = *(u16 *) arg; + + mlxsw_sp_fid_8021q_fid(fid)->vid = vid; + fid->fid_offset = fid->fid_index - fid->fid_family->start_index; +} + +static enum mlxsw_reg_sfmr_op mlxsw_sp_sfmr_op(bool valid) +{ + return valid ? MLXSW_REG_SFMR_OP_CREATE_FID : + MLXSW_REG_SFMR_OP_DESTROY_FID; +} + +static int mlxsw_sp_fid_op(const struct mlxsw_sp_fid *fid, bool valid) +{ + struct mlxsw_sp *mlxsw_sp = fid->fid_family->mlxsw_sp; + char sfmr_pl[MLXSW_REG_SFMR_LEN]; + u16 smpe; + + smpe = fid->fid_family->smpe_index_valid ? fid->fid_index : 0; + + mlxsw_reg_sfmr_pack(sfmr_pl, mlxsw_sp_sfmr_op(valid), fid->fid_index, + fid->fid_offset, fid->fid_family->flood_rsp, + fid->fid_family->bridge_type, + fid->fid_family->smpe_index_valid, smpe); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfmr), sfmr_pl); +} + +static int mlxsw_sp_fid_edit_op(const struct mlxsw_sp_fid *fid, + const struct mlxsw_sp_rif *rif) +{ + struct mlxsw_sp *mlxsw_sp = fid->fid_family->mlxsw_sp; + char sfmr_pl[MLXSW_REG_SFMR_LEN]; + u16 smpe; + + smpe = fid->fid_family->smpe_index_valid ? fid->fid_index : 0; + + mlxsw_reg_sfmr_pack(sfmr_pl, MLXSW_REG_SFMR_OP_CREATE_FID, + fid->fid_index, fid->fid_offset, + fid->fid_family->flood_rsp, + fid->fid_family->bridge_type, + fid->fid_family->smpe_index_valid, smpe); + mlxsw_reg_sfmr_vv_set(sfmr_pl, fid->vni_valid); + mlxsw_reg_sfmr_vni_set(sfmr_pl, be32_to_cpu(fid->vni)); + mlxsw_reg_sfmr_vtfp_set(sfmr_pl, fid->nve_flood_index_valid); + mlxsw_reg_sfmr_nve_tunnel_flood_ptr_set(sfmr_pl, fid->nve_flood_index); + + if (rif) { + mlxsw_reg_sfmr_irif_v_set(sfmr_pl, true); + mlxsw_reg_sfmr_irif_set(sfmr_pl, mlxsw_sp_rif_index(rif)); + } + + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfmr), sfmr_pl); +} + +static int mlxsw_sp_fid_vni_to_fid_map(const struct mlxsw_sp_fid *fid, + const struct mlxsw_sp_rif *rif, + bool valid) +{ + struct mlxsw_sp *mlxsw_sp = fid->fid_family->mlxsw_sp; + char svfa_pl[MLXSW_REG_SVFA_LEN]; + bool irif_valid; + u16 irif_index; + + irif_valid = !!rif; + irif_index = rif ? mlxsw_sp_rif_index(rif) : 0; + + mlxsw_reg_svfa_vni_pack(svfa_pl, valid, fid->fid_index, + be32_to_cpu(fid->vni), irif_valid, irif_index); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(svfa), svfa_pl); +} + +static int mlxsw_sp_fid_to_fid_rif_update(const struct mlxsw_sp_fid *fid, + const struct mlxsw_sp_rif *rif) +{ + return mlxsw_sp_fid_edit_op(fid, rif); +} + +static int mlxsw_sp_fid_vni_to_fid_rif_update(const struct mlxsw_sp_fid *fid, + const struct mlxsw_sp_rif *rif) +{ + if (!fid->vni_valid) + return 0; + + return mlxsw_sp_fid_vni_to_fid_map(fid, rif, fid->vni_valid); +} + +static int +mlxsw_sp_fid_vid_to_fid_map(const struct mlxsw_sp_fid *fid, u16 vid, bool valid, + const struct mlxsw_sp_rif *rif) +{ + struct mlxsw_sp *mlxsw_sp = fid->fid_family->mlxsw_sp; + char svfa_pl[MLXSW_REG_SVFA_LEN]; + bool irif_valid; + u16 irif_index; + + irif_valid = !!rif; + irif_index = rif ? mlxsw_sp_rif_index(rif) : 0; + + mlxsw_reg_svfa_vid_pack(svfa_pl, valid, fid->fid_index, vid, irif_valid, + irif_index); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(svfa), svfa_pl); +} + +static int +mlxsw_sp_fid_8021q_vid_to_fid_rif_update(const struct mlxsw_sp_fid *fid, + const struct mlxsw_sp_rif *rif) +{ + struct mlxsw_sp_fid_8021q *fid_8021q = mlxsw_sp_fid_8021q_fid(fid); + + /* Update the global VID => FID mapping we created when the FID was + * configured. + */ + return mlxsw_sp_fid_vid_to_fid_map(fid, fid_8021q->vid, true, rif); +} + +static int +mlxsw_sp_fid_port_vid_to_fid_rif_update_one(const struct mlxsw_sp_fid *fid, + struct mlxsw_sp_fid_port_vid *pv, + bool irif_valid, u16 irif_index) +{ + struct mlxsw_sp *mlxsw_sp = fid->fid_family->mlxsw_sp; + char svfa_pl[MLXSW_REG_SVFA_LEN]; + + mlxsw_reg_svfa_port_vid_pack(svfa_pl, pv->local_port, true, + fid->fid_index, pv->vid, irif_valid, + irif_index); + + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(svfa), svfa_pl); +} + +static int mlxsw_sp_fid_vid_to_fid_rif_set(const struct mlxsw_sp_fid *fid, + const struct mlxsw_sp_rif *rif) +{ + struct mlxsw_sp *mlxsw_sp = fid->fid_family->mlxsw_sp; + struct mlxsw_sp_fid_port_vid *pv; + u16 irif_index; + int err; + + err = fid->fid_family->ops->vid_to_fid_rif_update(fid, rif); + if (err) + return err; + + irif_index = mlxsw_sp_rif_index(rif); + + list_for_each_entry(pv, &fid->port_vid_list, list) { + /* If port is not in virtual mode, then it does not have any + * {Port, VID}->FID mappings that need to be updated with the + * ingress RIF. + */ + if (!mlxsw_sp->fid_core->port_fid_mappings[pv->local_port]) + continue; + + err = mlxsw_sp_fid_port_vid_to_fid_rif_update_one(fid, pv, + true, + irif_index); + if (err) + goto err_port_vid_to_fid_rif_update_one; + } + + return 0; + +err_port_vid_to_fid_rif_update_one: + list_for_each_entry_continue_reverse(pv, &fid->port_vid_list, list) { + if (!mlxsw_sp->fid_core->port_fid_mappings[pv->local_port]) + continue; + + mlxsw_sp_fid_port_vid_to_fid_rif_update_one(fid, pv, false, 0); + } + + fid->fid_family->ops->vid_to_fid_rif_update(fid, NULL); + return err; +} + +static void mlxsw_sp_fid_vid_to_fid_rif_unset(const struct mlxsw_sp_fid *fid) +{ + struct mlxsw_sp *mlxsw_sp = fid->fid_family->mlxsw_sp; + struct mlxsw_sp_fid_port_vid *pv; + + list_for_each_entry(pv, &fid->port_vid_list, list) { + /* If port is not in virtual mode, then it does not have any + * {Port, VID}->FID mappings that need to be updated. + */ + if (!mlxsw_sp->fid_core->port_fid_mappings[pv->local_port]) + continue; + + mlxsw_sp_fid_port_vid_to_fid_rif_update_one(fid, pv, false, 0); + } + + fid->fid_family->ops->vid_to_fid_rif_update(fid, NULL); +} + +static int mlxsw_sp_fid_reiv_handle(struct mlxsw_sp_fid *fid, u16 rif_index, + bool valid, u8 port_page) +{ + u16 local_port_end = (port_page + 1) * MLXSW_REG_REIV_REC_MAX_COUNT - 1; + u16 local_port_start = port_page * MLXSW_REG_REIV_REC_MAX_COUNT; + struct mlxsw_sp *mlxsw_sp = fid->fid_family->mlxsw_sp; + struct mlxsw_sp_fid_port_vid *port_vid; + u8 rec_num, entries_num = 0; + char *reiv_pl; + int err; + + reiv_pl = kmalloc(MLXSW_REG_REIV_LEN, GFP_KERNEL); + if (!reiv_pl) + return -ENOMEM; + + mlxsw_reg_reiv_pack(reiv_pl, port_page, rif_index); + + list_for_each_entry(port_vid, &fid->port_vid_list, list) { + /* port_vid_list is sorted by local_port. */ + if (port_vid->local_port < local_port_start) + continue; + + if (port_vid->local_port > local_port_end) + break; + + rec_num = port_vid->local_port % MLXSW_REG_REIV_REC_MAX_COUNT; + mlxsw_reg_reiv_rec_update_set(reiv_pl, rec_num, true); + mlxsw_reg_reiv_rec_evid_set(reiv_pl, rec_num, + valid ? port_vid->vid : 0); + entries_num++; + } + + if (!entries_num) { + kfree(reiv_pl); + return 0; + } + + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(reiv), reiv_pl); + if (err) + goto err_reg_write; + + kfree(reiv_pl); + return 0; + +err_reg_write: + kfree(reiv_pl); + return err; +} + +static int mlxsw_sp_fid_erif_eport_to_vid_map(struct mlxsw_sp_fid *fid, + u16 rif_index, bool valid) +{ + struct mlxsw_sp *mlxsw_sp = fid->fid_family->mlxsw_sp; + u8 num_port_pages; + int err, i; + + num_port_pages = mlxsw_core_max_ports(mlxsw_sp->core) / + MLXSW_REG_REIV_REC_MAX_COUNT + 1; + + for (i = 0; i < num_port_pages; i++) { + err = mlxsw_sp_fid_reiv_handle(fid, rif_index, valid, i); + if (err) + goto err_reiv_handle; + } + + return 0; + +err_reiv_handle: + for (; i >= 0; i--) + mlxsw_sp_fid_reiv_handle(fid, rif_index, !valid, i); + return err; +} + +int mlxsw_sp_fid_rif_set(struct mlxsw_sp_fid *fid, struct mlxsw_sp_rif *rif) +{ + u16 rif_index = mlxsw_sp_rif_index(rif); + int err; + + err = mlxsw_sp_fid_to_fid_rif_update(fid, rif); + if (err) + return err; + + err = mlxsw_sp_fid_vni_to_fid_rif_update(fid, rif); + if (err) + goto err_vni_to_fid_rif_update; + + err = mlxsw_sp_fid_vid_to_fid_rif_set(fid, rif); + if (err) + goto err_vid_to_fid_rif_set; + + err = mlxsw_sp_fid_erif_eport_to_vid_map(fid, rif_index, true); + if (err) + goto err_erif_eport_to_vid_map; + + fid->rif = rif; + return 0; + +err_erif_eport_to_vid_map: + mlxsw_sp_fid_vid_to_fid_rif_unset(fid); +err_vid_to_fid_rif_set: + mlxsw_sp_fid_vni_to_fid_rif_update(fid, NULL); +err_vni_to_fid_rif_update: + mlxsw_sp_fid_to_fid_rif_update(fid, NULL); + return err; +} + +void mlxsw_sp_fid_rif_unset(struct mlxsw_sp_fid *fid) +{ + u16 rif_index; + + if (!fid->rif) + return; + + rif_index = mlxsw_sp_rif_index(fid->rif); + fid->rif = NULL; + + mlxsw_sp_fid_erif_eport_to_vid_map(fid, rif_index, false); + mlxsw_sp_fid_vid_to_fid_rif_unset(fid); + mlxsw_sp_fid_vni_to_fid_rif_update(fid, NULL); + mlxsw_sp_fid_to_fid_rif_update(fid, NULL); +} + +static int mlxsw_sp_fid_vni_op(const struct mlxsw_sp_fid *fid) +{ + int err; + + err = mlxsw_sp_fid_vni_to_fid_map(fid, fid->rif, fid->vni_valid); + if (err) + return err; + + err = mlxsw_sp_fid_edit_op(fid, fid->rif); + if (err) + goto err_fid_edit_op; + + return 0; + +err_fid_edit_op: + mlxsw_sp_fid_vni_to_fid_map(fid, fid->rif, !fid->vni_valid); + return err; +} + +static int __mlxsw_sp_fid_port_vid_map(const struct mlxsw_sp_fid *fid, + u16 local_port, u16 vid, bool valid) +{ + struct mlxsw_sp *mlxsw_sp = fid->fid_family->mlxsw_sp; + char svfa_pl[MLXSW_REG_SVFA_LEN]; + bool irif_valid = false; + u16 irif_index = 0; + + if (fid->rif) { + irif_valid = true; + irif_index = mlxsw_sp_rif_index(fid->rif); + } + + mlxsw_reg_svfa_port_vid_pack(svfa_pl, local_port, valid, fid->fid_index, + vid, irif_valid, irif_index); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(svfa), svfa_pl); +} + +static struct mlxsw_sp_fid_8021d * +mlxsw_sp_fid_8021d_fid(const struct mlxsw_sp_fid *fid) +{ + return container_of(fid, struct mlxsw_sp_fid_8021d, common); +} + +static void mlxsw_sp_fid_8021d_setup(struct mlxsw_sp_fid *fid, const void *arg) +{ + int br_ifindex = *(int *) arg; + + mlxsw_sp_fid_8021d_fid(fid)->br_ifindex = br_ifindex; + fid->fid_offset = fid->fid_index - fid->fid_family->start_index; +} + +static int mlxsw_sp_fid_8021d_configure(struct mlxsw_sp_fid *fid) +{ + return mlxsw_sp_fid_op(fid, true); +} + +static void mlxsw_sp_fid_8021d_deconfigure(struct mlxsw_sp_fid *fid) +{ + if (fid->vni_valid) + mlxsw_sp_nve_fid_disable(fid->fid_family->mlxsw_sp, fid); + mlxsw_sp_fid_op(fid, false); +} + +static int mlxsw_sp_fid_8021d_index_alloc(struct mlxsw_sp_fid *fid, + const void *arg, u16 *p_fid_index) +{ + struct mlxsw_sp_fid_family *fid_family = fid->fid_family; + u16 nr_fids, fid_index; + + nr_fids = fid_family->end_index - fid_family->start_index + 1; + fid_index = find_first_zero_bit(fid_family->fids_bitmap, nr_fids); + if (fid_index == nr_fids) + return -ENOBUFS; + *p_fid_index = fid_family->start_index + fid_index; + + return 0; +} + +static bool +mlxsw_sp_fid_8021d_compare(const struct mlxsw_sp_fid *fid, const void *arg) +{ + int br_ifindex = *(int *) arg; + + return mlxsw_sp_fid_8021d_fid(fid)->br_ifindex == br_ifindex; +} + +static int mlxsw_sp_port_vp_mode_trans(struct mlxsw_sp_port *mlxsw_sp_port) +{ + struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan; + int err; + + list_for_each_entry(mlxsw_sp_port_vlan, &mlxsw_sp_port->vlans_list, + list) { + struct mlxsw_sp_fid *fid = mlxsw_sp_port_vlan->fid; + u16 vid = mlxsw_sp_port_vlan->vid; + + if (!fid) + continue; + + err = __mlxsw_sp_fid_port_vid_map(fid, + mlxsw_sp_port->local_port, + vid, true); + if (err) + goto err_fid_port_vid_map; + } + + err = mlxsw_sp_port_vp_mode_set(mlxsw_sp_port, true); + if (err) + goto err_port_vp_mode_set; + + return 0; + +err_port_vp_mode_set: +err_fid_port_vid_map: + list_for_each_entry_continue_reverse(mlxsw_sp_port_vlan, + &mlxsw_sp_port->vlans_list, list) { + struct mlxsw_sp_fid *fid = mlxsw_sp_port_vlan->fid; + u16 vid = mlxsw_sp_port_vlan->vid; + + if (!fid) + continue; + + __mlxsw_sp_fid_port_vid_map(fid, mlxsw_sp_port->local_port, vid, + false); + } + return err; +} + +static void mlxsw_sp_port_vlan_mode_trans(struct mlxsw_sp_port *mlxsw_sp_port) +{ + struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan; + + mlxsw_sp_port_vp_mode_set(mlxsw_sp_port, false); + + list_for_each_entry_reverse(mlxsw_sp_port_vlan, + &mlxsw_sp_port->vlans_list, list) { + struct mlxsw_sp_fid *fid = mlxsw_sp_port_vlan->fid; + u16 vid = mlxsw_sp_port_vlan->vid; + + if (!fid) + continue; + + __mlxsw_sp_fid_port_vid_map(fid, mlxsw_sp_port->local_port, vid, + false); + } +} + +static int +mlxsw_sp_fid_port_vid_list_add(struct mlxsw_sp_fid *fid, u16 local_port, + u16 vid) +{ + struct mlxsw_sp_fid_port_vid *port_vid, *tmp_port_vid; + + port_vid = kzalloc(sizeof(*port_vid), GFP_KERNEL); + if (!port_vid) + return -ENOMEM; + + port_vid->local_port = local_port; + port_vid->vid = vid; + + list_for_each_entry(tmp_port_vid, &fid->port_vid_list, list) { + if (tmp_port_vid->local_port > local_port) + break; + } + + list_add_tail(&port_vid->list, &tmp_port_vid->list); + return 0; +} + +static void +mlxsw_sp_fid_port_vid_list_del(struct mlxsw_sp_fid *fid, u16 local_port, + u16 vid) +{ + struct mlxsw_sp_fid_port_vid *port_vid, *tmp; + + list_for_each_entry_safe(port_vid, tmp, &fid->port_vid_list, list) { + if (port_vid->local_port != local_port || port_vid->vid != vid) + continue; + + list_del(&port_vid->list); + kfree(port_vid); + return; + } +} + +static int +mlxsw_sp_fid_mpe_table_map(const struct mlxsw_sp_fid *fid, u16 local_port, + u16 vid, bool valid) +{ + struct mlxsw_sp *mlxsw_sp = fid->fid_family->mlxsw_sp; + char smpe_pl[MLXSW_REG_SMPE_LEN]; + + mlxsw_reg_smpe_pack(smpe_pl, local_port, fid->fid_index, + valid ? vid : 0); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(smpe), smpe_pl); +} + +static int +mlxsw_sp_fid_erif_eport_to_vid_map_one(const struct mlxsw_sp_fid *fid, + u16 local_port, u16 vid, bool valid) +{ + u8 port_page = local_port / MLXSW_REG_REIV_REC_MAX_COUNT; + u8 rec_num = local_port % MLXSW_REG_REIV_REC_MAX_COUNT; + struct mlxsw_sp *mlxsw_sp = fid->fid_family->mlxsw_sp; + u16 rif_index = mlxsw_sp_rif_index(fid->rif); + char *reiv_pl; + int err; + + reiv_pl = kmalloc(MLXSW_REG_REIV_LEN, GFP_KERNEL); + if (!reiv_pl) + return -ENOMEM; + + mlxsw_reg_reiv_pack(reiv_pl, port_page, rif_index); + mlxsw_reg_reiv_rec_update_set(reiv_pl, rec_num, true); + mlxsw_reg_reiv_rec_evid_set(reiv_pl, rec_num, valid ? vid : 0); + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(reiv), reiv_pl); + kfree(reiv_pl); + return err; +} + +static int mlxsw_sp_fid_evid_map(const struct mlxsw_sp_fid *fid, u16 local_port, + u16 vid, bool valid) +{ + int err; + + err = mlxsw_sp_fid_mpe_table_map(fid, local_port, vid, valid); + if (err) + return err; + + if (!fid->rif) + return 0; + + err = mlxsw_sp_fid_erif_eport_to_vid_map_one(fid, local_port, vid, + valid); + if (err) + goto err_erif_eport_to_vid_map_one; + + return 0; + +err_erif_eport_to_vid_map_one: + mlxsw_sp_fid_mpe_table_map(fid, local_port, vid, !valid); + return err; +} + +static int mlxsw_sp_fid_8021d_port_vid_map(struct mlxsw_sp_fid *fid, + struct mlxsw_sp_port *mlxsw_sp_port, + u16 vid) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + u16 local_port = mlxsw_sp_port->local_port; + int err; + + err = __mlxsw_sp_fid_port_vid_map(fid, mlxsw_sp_port->local_port, vid, + true); + if (err) + return err; + + err = mlxsw_sp_fid_evid_map(fid, local_port, vid, true); + if (err) + goto err_fid_evid_map; + + err = mlxsw_sp_fid_port_vid_list_add(fid, mlxsw_sp_port->local_port, + vid); + if (err) + goto err_port_vid_list_add; + + if (mlxsw_sp->fid_core->port_fid_mappings[local_port]++ == 0) { + err = mlxsw_sp_port_vp_mode_trans(mlxsw_sp_port); + if (err) + goto err_port_vp_mode_trans; + } + + return 0; + +err_port_vp_mode_trans: + mlxsw_sp->fid_core->port_fid_mappings[local_port]--; + mlxsw_sp_fid_port_vid_list_del(fid, mlxsw_sp_port->local_port, vid); +err_port_vid_list_add: + mlxsw_sp_fid_evid_map(fid, local_port, vid, false); +err_fid_evid_map: + __mlxsw_sp_fid_port_vid_map(fid, mlxsw_sp_port->local_port, vid, false); + return err; +} + +static void +mlxsw_sp_fid_8021d_port_vid_unmap(struct mlxsw_sp_fid *fid, + struct mlxsw_sp_port *mlxsw_sp_port, u16 vid) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + u16 local_port = mlxsw_sp_port->local_port; + + if (mlxsw_sp->fid_core->port_fid_mappings[local_port] == 1) + mlxsw_sp_port_vlan_mode_trans(mlxsw_sp_port); + mlxsw_sp->fid_core->port_fid_mappings[local_port]--; + mlxsw_sp_fid_port_vid_list_del(fid, mlxsw_sp_port->local_port, vid); + mlxsw_sp_fid_evid_map(fid, local_port, vid, false); + __mlxsw_sp_fid_port_vid_map(fid, mlxsw_sp_port->local_port, vid, false); +} + +static int mlxsw_sp_fid_8021d_vni_set(struct mlxsw_sp_fid *fid) +{ + return mlxsw_sp_fid_vni_op(fid); +} + +static void mlxsw_sp_fid_8021d_vni_clear(struct mlxsw_sp_fid *fid) +{ + mlxsw_sp_fid_vni_op(fid); +} + +static int mlxsw_sp_fid_8021d_nve_flood_index_set(struct mlxsw_sp_fid *fid) +{ + return mlxsw_sp_fid_edit_op(fid, fid->rif); +} + +static void mlxsw_sp_fid_8021d_nve_flood_index_clear(struct mlxsw_sp_fid *fid) +{ + mlxsw_sp_fid_edit_op(fid, fid->rif); +} + +static void +mlxsw_sp_fid_8021d_fdb_clear_offload(const struct mlxsw_sp_fid *fid, + const struct net_device *nve_dev) +{ + br_fdb_clear_offload(nve_dev, 0); +} + +static int +mlxsw_sp_fid_8021d_vid_to_fid_rif_update(const struct mlxsw_sp_fid *fid, + const struct mlxsw_sp_rif *rif) +{ + return 0; +} + +static const struct mlxsw_sp_fid_ops mlxsw_sp_fid_8021d_ops = { + .setup = mlxsw_sp_fid_8021d_setup, + .configure = mlxsw_sp_fid_8021d_configure, + .deconfigure = mlxsw_sp_fid_8021d_deconfigure, + .index_alloc = mlxsw_sp_fid_8021d_index_alloc, + .compare = mlxsw_sp_fid_8021d_compare, + .port_vid_map = mlxsw_sp_fid_8021d_port_vid_map, + .port_vid_unmap = mlxsw_sp_fid_8021d_port_vid_unmap, + .vni_set = mlxsw_sp_fid_8021d_vni_set, + .vni_clear = mlxsw_sp_fid_8021d_vni_clear, + .nve_flood_index_set = mlxsw_sp_fid_8021d_nve_flood_index_set, + .nve_flood_index_clear = mlxsw_sp_fid_8021d_nve_flood_index_clear, + .fdb_clear_offload = mlxsw_sp_fid_8021d_fdb_clear_offload, + .vid_to_fid_rif_update = mlxsw_sp_fid_8021d_vid_to_fid_rif_update, +}; + +#define MLXSW_SP_FID_8021Q_MAX (VLAN_N_VID - 2) +#define MLXSW_SP_FID_RFID_MAX (11 * 1024) +#define MLXSW_SP_FID_8021Q_PGT_BASE 0 +#define MLXSW_SP_FID_8021D_PGT_BASE (3 * MLXSW_SP_FID_8021Q_MAX) + +static const struct mlxsw_sp_flood_table mlxsw_sp_fid_8021d_flood_tables[] = { + { + .packet_type = MLXSW_SP_FLOOD_TYPE_UC, + .table_type = MLXSW_REG_SFGC_TABLE_TYPE_FID_OFFSET, + .table_index = 0, + }, + { + .packet_type = MLXSW_SP_FLOOD_TYPE_MC, + .table_type = MLXSW_REG_SFGC_TABLE_TYPE_FID_OFFSET, + .table_index = 1, + }, + { + .packet_type = MLXSW_SP_FLOOD_TYPE_BC, + .table_type = MLXSW_REG_SFGC_TABLE_TYPE_FID_OFFSET, + .table_index = 2, + }, +}; + +static bool +mlxsw_sp_fid_8021q_compare(const struct mlxsw_sp_fid *fid, const void *arg) +{ + u16 vid = *(u16 *) arg; + + return mlxsw_sp_fid_8021q_fid(fid)->vid == vid; +} + +static void +mlxsw_sp_fid_8021q_fdb_clear_offload(const struct mlxsw_sp_fid *fid, + const struct net_device *nve_dev) +{ + br_fdb_clear_offload(nve_dev, mlxsw_sp_fid_8021q_vid(fid)); +} + +static void mlxsw_sp_fid_rfid_setup(struct mlxsw_sp_fid *fid, const void *arg) +{ + fid->fid_offset = 0; +} + +static int mlxsw_sp_fid_rfid_configure(struct mlxsw_sp_fid *fid) +{ + return mlxsw_sp_fid_op(fid, true); +} + +static void mlxsw_sp_fid_rfid_deconfigure(struct mlxsw_sp_fid *fid) +{ + mlxsw_sp_fid_op(fid, false); +} + +static int mlxsw_sp_fid_rfid_index_alloc(struct mlxsw_sp_fid *fid, + const void *arg, u16 *p_fid_index) +{ + u16 rif_index = *(u16 *) arg; + + *p_fid_index = fid->fid_family->start_index + rif_index; + + return 0; +} + +static bool mlxsw_sp_fid_rfid_compare(const struct mlxsw_sp_fid *fid, + const void *arg) +{ + u16 rif_index = *(u16 *) arg; + + return fid->fid_index == rif_index + fid->fid_family->start_index; +} + +static int mlxsw_sp_fid_rfid_port_vid_map(struct mlxsw_sp_fid *fid, + struct mlxsw_sp_port *mlxsw_sp_port, + u16 vid) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + u16 local_port = mlxsw_sp_port->local_port; + int err; + + err = mlxsw_sp_fid_port_vid_list_add(fid, mlxsw_sp_port->local_port, + vid); + if (err) + return err; + + /* Using legacy bridge model, we only need to transition the port to + * virtual mode since {Port, VID} => FID is done by the firmware upon + * RIF creation. Using unified bridge model, we need to map + * {Port, VID} => FID and map egress VID. + */ + err = __mlxsw_sp_fid_port_vid_map(fid, mlxsw_sp_port->local_port, vid, + true); + if (err) + goto err_port_vid_map; + + if (fid->rif) { + err = mlxsw_sp_fid_erif_eport_to_vid_map_one(fid, local_port, + vid, true); + if (err) + goto err_erif_eport_to_vid_map_one; + } + + if (mlxsw_sp->fid_core->port_fid_mappings[local_port]++ == 0) { + err = mlxsw_sp_port_vp_mode_trans(mlxsw_sp_port); + if (err) + goto err_port_vp_mode_trans; + } + + return 0; + +err_port_vp_mode_trans: + mlxsw_sp->fid_core->port_fid_mappings[local_port]--; + if (fid->rif) + mlxsw_sp_fid_erif_eport_to_vid_map_one(fid, local_port, vid, + false); +err_erif_eport_to_vid_map_one: + __mlxsw_sp_fid_port_vid_map(fid, mlxsw_sp_port->local_port, vid, false); +err_port_vid_map: + mlxsw_sp_fid_port_vid_list_del(fid, mlxsw_sp_port->local_port, vid); + return err; +} + +static void +mlxsw_sp_fid_rfid_port_vid_unmap(struct mlxsw_sp_fid *fid, + struct mlxsw_sp_port *mlxsw_sp_port, u16 vid) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + u16 local_port = mlxsw_sp_port->local_port; + + if (mlxsw_sp->fid_core->port_fid_mappings[local_port] == 1) + mlxsw_sp_port_vlan_mode_trans(mlxsw_sp_port); + mlxsw_sp->fid_core->port_fid_mappings[local_port]--; + + if (fid->rif) + mlxsw_sp_fid_erif_eport_to_vid_map_one(fid, local_port, vid, + false); + __mlxsw_sp_fid_port_vid_map(fid, mlxsw_sp_port->local_port, vid, false); + mlxsw_sp_fid_port_vid_list_del(fid, mlxsw_sp_port->local_port, vid); +} + +static int mlxsw_sp_fid_rfid_vni_set(struct mlxsw_sp_fid *fid) +{ + return -EOPNOTSUPP; +} + +static void mlxsw_sp_fid_rfid_vni_clear(struct mlxsw_sp_fid *fid) +{ + WARN_ON_ONCE(1); +} + +static int mlxsw_sp_fid_rfid_nve_flood_index_set(struct mlxsw_sp_fid *fid) +{ + return -EOPNOTSUPP; +} + +static void mlxsw_sp_fid_rfid_nve_flood_index_clear(struct mlxsw_sp_fid *fid) +{ + WARN_ON_ONCE(1); +} + +static int +mlxsw_sp_fid_rfid_vid_to_fid_rif_update(const struct mlxsw_sp_fid *fid, + const struct mlxsw_sp_rif *rif) +{ + return 0; +} + +static const struct mlxsw_sp_fid_ops mlxsw_sp_fid_rfid_ops = { + .setup = mlxsw_sp_fid_rfid_setup, + .configure = mlxsw_sp_fid_rfid_configure, + .deconfigure = mlxsw_sp_fid_rfid_deconfigure, + .index_alloc = mlxsw_sp_fid_rfid_index_alloc, + .compare = mlxsw_sp_fid_rfid_compare, + .port_vid_map = mlxsw_sp_fid_rfid_port_vid_map, + .port_vid_unmap = mlxsw_sp_fid_rfid_port_vid_unmap, + .vni_set = mlxsw_sp_fid_rfid_vni_set, + .vni_clear = mlxsw_sp_fid_rfid_vni_clear, + .nve_flood_index_set = mlxsw_sp_fid_rfid_nve_flood_index_set, + .nve_flood_index_clear = mlxsw_sp_fid_rfid_nve_flood_index_clear, + .vid_to_fid_rif_update = mlxsw_sp_fid_rfid_vid_to_fid_rif_update, +}; + +static void mlxsw_sp_fid_dummy_setup(struct mlxsw_sp_fid *fid, const void *arg) +{ + fid->fid_offset = 0; +} + +static int mlxsw_sp_fid_dummy_configure(struct mlxsw_sp_fid *fid) +{ + return mlxsw_sp_fid_op(fid, true); +} + +static void mlxsw_sp_fid_dummy_deconfigure(struct mlxsw_sp_fid *fid) +{ + mlxsw_sp_fid_op(fid, false); +} + +static int mlxsw_sp_fid_dummy_index_alloc(struct mlxsw_sp_fid *fid, + const void *arg, u16 *p_fid_index) +{ + *p_fid_index = fid->fid_family->start_index; + + return 0; +} + +static bool mlxsw_sp_fid_dummy_compare(const struct mlxsw_sp_fid *fid, + const void *arg) +{ + return true; +} + +static int mlxsw_sp_fid_dummy_vni_set(struct mlxsw_sp_fid *fid) +{ + return -EOPNOTSUPP; +} + +static void mlxsw_sp_fid_dummy_vni_clear(struct mlxsw_sp_fid *fid) +{ + WARN_ON_ONCE(1); +} + +static int mlxsw_sp_fid_dummy_nve_flood_index_set(struct mlxsw_sp_fid *fid) +{ + return -EOPNOTSUPP; +} + +static void mlxsw_sp_fid_dummy_nve_flood_index_clear(struct mlxsw_sp_fid *fid) +{ + WARN_ON_ONCE(1); +} + +static const struct mlxsw_sp_fid_ops mlxsw_sp_fid_dummy_ops = { + .setup = mlxsw_sp_fid_dummy_setup, + .configure = mlxsw_sp_fid_dummy_configure, + .deconfigure = mlxsw_sp_fid_dummy_deconfigure, + .index_alloc = mlxsw_sp_fid_dummy_index_alloc, + .compare = mlxsw_sp_fid_dummy_compare, + .vni_set = mlxsw_sp_fid_dummy_vni_set, + .vni_clear = mlxsw_sp_fid_dummy_vni_clear, + .nve_flood_index_set = mlxsw_sp_fid_dummy_nve_flood_index_set, + .nve_flood_index_clear = mlxsw_sp_fid_dummy_nve_flood_index_clear, +}; + +static int mlxsw_sp_fid_8021q_configure(struct mlxsw_sp_fid *fid) +{ + struct mlxsw_sp_fid_8021q *fid_8021q = mlxsw_sp_fid_8021q_fid(fid); + int err; + + err = mlxsw_sp_fid_op(fid, true); + if (err) + return err; + + err = mlxsw_sp_fid_vid_to_fid_map(fid, fid_8021q->vid, true, fid->rif); + if (err) + goto err_vid_to_fid_map; + + return 0; + +err_vid_to_fid_map: + mlxsw_sp_fid_op(fid, false); + return err; +} + +static void mlxsw_sp_fid_8021q_deconfigure(struct mlxsw_sp_fid *fid) +{ + struct mlxsw_sp_fid_8021q *fid_8021q = mlxsw_sp_fid_8021q_fid(fid); + + if (fid->vni_valid) + mlxsw_sp_nve_fid_disable(fid->fid_family->mlxsw_sp, fid); + + mlxsw_sp_fid_vid_to_fid_map(fid, fid_8021q->vid, false, NULL); + mlxsw_sp_fid_op(fid, false); +} + +static int mlxsw_sp_fid_8021q_port_vid_map(struct mlxsw_sp_fid *fid, + struct mlxsw_sp_port *mlxsw_sp_port, + u16 vid) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + u16 local_port = mlxsw_sp_port->local_port; + int err; + + /* In case there are no {Port, VID} => FID mappings on the port, + * we can use the global VID => FID mapping we created when the + * FID was configured, otherwise, configure new mapping. + */ + if (mlxsw_sp->fid_core->port_fid_mappings[local_port]) { + err = __mlxsw_sp_fid_port_vid_map(fid, local_port, vid, true); + if (err) + return err; + } + + err = mlxsw_sp_fid_evid_map(fid, local_port, vid, true); + if (err) + goto err_fid_evid_map; + + err = mlxsw_sp_fid_port_vid_list_add(fid, mlxsw_sp_port->local_port, + vid); + if (err) + goto err_port_vid_list_add; + + return 0; + +err_port_vid_list_add: + mlxsw_sp_fid_evid_map(fid, local_port, vid, false); +err_fid_evid_map: + if (mlxsw_sp->fid_core->port_fid_mappings[local_port]) + __mlxsw_sp_fid_port_vid_map(fid, local_port, vid, false); + return err; +} + +static void +mlxsw_sp_fid_8021q_port_vid_unmap(struct mlxsw_sp_fid *fid, + struct mlxsw_sp_port *mlxsw_sp_port, u16 vid) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + u16 local_port = mlxsw_sp_port->local_port; + + mlxsw_sp_fid_port_vid_list_del(fid, mlxsw_sp_port->local_port, vid); + mlxsw_sp_fid_evid_map(fid, local_port, vid, false); + if (mlxsw_sp->fid_core->port_fid_mappings[local_port]) + __mlxsw_sp_fid_port_vid_map(fid, local_port, vid, false); +} + +static const struct mlxsw_sp_fid_ops mlxsw_sp_fid_8021q_ops = { + .setup = mlxsw_sp_fid_8021q_setup, + .configure = mlxsw_sp_fid_8021q_configure, + .deconfigure = mlxsw_sp_fid_8021q_deconfigure, + .index_alloc = mlxsw_sp_fid_8021d_index_alloc, + .compare = mlxsw_sp_fid_8021q_compare, + .port_vid_map = mlxsw_sp_fid_8021q_port_vid_map, + .port_vid_unmap = mlxsw_sp_fid_8021q_port_vid_unmap, + .vni_set = mlxsw_sp_fid_8021d_vni_set, + .vni_clear = mlxsw_sp_fid_8021d_vni_clear, + .nve_flood_index_set = mlxsw_sp_fid_8021d_nve_flood_index_set, + .nve_flood_index_clear = mlxsw_sp_fid_8021d_nve_flood_index_clear, + .fdb_clear_offload = mlxsw_sp_fid_8021q_fdb_clear_offload, + .vid_to_fid_rif_update = mlxsw_sp_fid_8021q_vid_to_fid_rif_update, +}; + +/* There are 4K-2 802.1Q FIDs */ +#define MLXSW_SP_FID_8021Q_START 1 /* FID 0 is reserved. */ +#define MLXSW_SP_FID_8021Q_END (MLXSW_SP_FID_8021Q_START + \ + MLXSW_SP_FID_8021Q_MAX - 1) + +/* There are 1K 802.1D FIDs */ +#define MLXSW_SP_FID_8021D_START (MLXSW_SP_FID_8021Q_END + 1) +#define MLXSW_SP_FID_8021D_END (MLXSW_SP_FID_8021D_START + \ + MLXSW_SP_FID_8021D_MAX - 1) + +/* There is one dummy FID */ +#define MLXSW_SP_FID_DUMMY (MLXSW_SP_FID_8021D_END + 1) + +/* There are 11K rFIDs */ +#define MLXSW_SP_RFID_START (MLXSW_SP_FID_DUMMY + 1) +#define MLXSW_SP_RFID_END (MLXSW_SP_RFID_START + \ + MLXSW_SP_FID_RFID_MAX - 1) + +static const struct mlxsw_sp_fid_family mlxsw_sp1_fid_8021q_family = { + .type = MLXSW_SP_FID_TYPE_8021Q, + .fid_size = sizeof(struct mlxsw_sp_fid_8021q), + .start_index = MLXSW_SP_FID_8021Q_START, + .end_index = MLXSW_SP_FID_8021Q_END, + .flood_tables = mlxsw_sp_fid_8021d_flood_tables, + .nr_flood_tables = ARRAY_SIZE(mlxsw_sp_fid_8021d_flood_tables), + .rif_type = MLXSW_SP_RIF_TYPE_VLAN, + .ops = &mlxsw_sp_fid_8021q_ops, + .flood_rsp = false, + .bridge_type = MLXSW_REG_BRIDGE_TYPE_0, + .pgt_base = MLXSW_SP_FID_8021Q_PGT_BASE, + .smpe_index_valid = false, +}; + +static const struct mlxsw_sp_fid_family mlxsw_sp1_fid_8021d_family = { + .type = MLXSW_SP_FID_TYPE_8021D, + .fid_size = sizeof(struct mlxsw_sp_fid_8021d), + .start_index = MLXSW_SP_FID_8021D_START, + .end_index = MLXSW_SP_FID_8021D_END, + .flood_tables = mlxsw_sp_fid_8021d_flood_tables, + .nr_flood_tables = ARRAY_SIZE(mlxsw_sp_fid_8021d_flood_tables), + .rif_type = MLXSW_SP_RIF_TYPE_FID, + .ops = &mlxsw_sp_fid_8021d_ops, + .bridge_type = MLXSW_REG_BRIDGE_TYPE_1, + .pgt_base = MLXSW_SP_FID_8021D_PGT_BASE, + .smpe_index_valid = false, +}; + +static const struct mlxsw_sp_fid_family mlxsw_sp1_fid_dummy_family = { + .type = MLXSW_SP_FID_TYPE_DUMMY, + .fid_size = sizeof(struct mlxsw_sp_fid), + .start_index = MLXSW_SP_FID_DUMMY, + .end_index = MLXSW_SP_FID_DUMMY, + .ops = &mlxsw_sp_fid_dummy_ops, + .smpe_index_valid = false, +}; + +static const struct mlxsw_sp_fid_family mlxsw_sp_fid_rfid_family = { + .type = MLXSW_SP_FID_TYPE_RFID, + .fid_size = sizeof(struct mlxsw_sp_fid), + .start_index = MLXSW_SP_RFID_START, + .end_index = MLXSW_SP_RFID_END, + .rif_type = MLXSW_SP_RIF_TYPE_SUBPORT, + .ops = &mlxsw_sp_fid_rfid_ops, + .flood_rsp = true, + .smpe_index_valid = false, +}; + +const struct mlxsw_sp_fid_family *mlxsw_sp1_fid_family_arr[] = { + [MLXSW_SP_FID_TYPE_8021Q] = &mlxsw_sp1_fid_8021q_family, + [MLXSW_SP_FID_TYPE_8021D] = &mlxsw_sp1_fid_8021d_family, + [MLXSW_SP_FID_TYPE_DUMMY] = &mlxsw_sp1_fid_dummy_family, + [MLXSW_SP_FID_TYPE_RFID] = &mlxsw_sp_fid_rfid_family, +}; + +static const struct mlxsw_sp_fid_family mlxsw_sp2_fid_8021q_family = { + .type = MLXSW_SP_FID_TYPE_8021Q, + .fid_size = sizeof(struct mlxsw_sp_fid_8021q), + .start_index = MLXSW_SP_FID_8021Q_START, + .end_index = MLXSW_SP_FID_8021Q_END, + .flood_tables = mlxsw_sp_fid_8021d_flood_tables, + .nr_flood_tables = ARRAY_SIZE(mlxsw_sp_fid_8021d_flood_tables), + .rif_type = MLXSW_SP_RIF_TYPE_VLAN, + .ops = &mlxsw_sp_fid_8021q_ops, + .flood_rsp = false, + .bridge_type = MLXSW_REG_BRIDGE_TYPE_0, + .pgt_base = MLXSW_SP_FID_8021Q_PGT_BASE, + .smpe_index_valid = true, +}; + +static const struct mlxsw_sp_fid_family mlxsw_sp2_fid_8021d_family = { + .type = MLXSW_SP_FID_TYPE_8021D, + .fid_size = sizeof(struct mlxsw_sp_fid_8021d), + .start_index = MLXSW_SP_FID_8021D_START, + .end_index = MLXSW_SP_FID_8021D_END, + .flood_tables = mlxsw_sp_fid_8021d_flood_tables, + .nr_flood_tables = ARRAY_SIZE(mlxsw_sp_fid_8021d_flood_tables), + .rif_type = MLXSW_SP_RIF_TYPE_FID, + .ops = &mlxsw_sp_fid_8021d_ops, + .bridge_type = MLXSW_REG_BRIDGE_TYPE_1, + .pgt_base = MLXSW_SP_FID_8021D_PGT_BASE, + .smpe_index_valid = true, +}; + +static const struct mlxsw_sp_fid_family mlxsw_sp2_fid_dummy_family = { + .type = MLXSW_SP_FID_TYPE_DUMMY, + .fid_size = sizeof(struct mlxsw_sp_fid), + .start_index = MLXSW_SP_FID_DUMMY, + .end_index = MLXSW_SP_FID_DUMMY, + .ops = &mlxsw_sp_fid_dummy_ops, + .smpe_index_valid = false, +}; + +const struct mlxsw_sp_fid_family *mlxsw_sp2_fid_family_arr[] = { + [MLXSW_SP_FID_TYPE_8021Q] = &mlxsw_sp2_fid_8021q_family, + [MLXSW_SP_FID_TYPE_8021D] = &mlxsw_sp2_fid_8021d_family, + [MLXSW_SP_FID_TYPE_DUMMY] = &mlxsw_sp2_fid_dummy_family, + [MLXSW_SP_FID_TYPE_RFID] = &mlxsw_sp_fid_rfid_family, +}; + +static struct mlxsw_sp_fid *mlxsw_sp_fid_lookup(struct mlxsw_sp *mlxsw_sp, + enum mlxsw_sp_fid_type type, + const void *arg) +{ + struct mlxsw_sp_fid_family *fid_family; + struct mlxsw_sp_fid *fid; + + fid_family = mlxsw_sp->fid_core->fid_family_arr[type]; + list_for_each_entry(fid, &fid_family->fids_list, list) { + if (!fid->fid_family->ops->compare(fid, arg)) + continue; + refcount_inc(&fid->ref_count); + return fid; + } + + return NULL; +} + +static struct mlxsw_sp_fid *mlxsw_sp_fid_get(struct mlxsw_sp *mlxsw_sp, + enum mlxsw_sp_fid_type type, + const void *arg) +{ + struct mlxsw_sp_fid_family *fid_family; + struct mlxsw_sp_fid *fid; + u16 fid_index; + int err; + + fid = mlxsw_sp_fid_lookup(mlxsw_sp, type, arg); + if (fid) + return fid; + + fid_family = mlxsw_sp->fid_core->fid_family_arr[type]; + fid = kzalloc(fid_family->fid_size, GFP_KERNEL); + if (!fid) + return ERR_PTR(-ENOMEM); + + INIT_LIST_HEAD(&fid->port_vid_list); + fid->fid_family = fid_family; + + err = fid->fid_family->ops->index_alloc(fid, arg, &fid_index); + if (err) + goto err_index_alloc; + fid->fid_index = fid_index; + __set_bit(fid_index - fid_family->start_index, fid_family->fids_bitmap); + + fid->fid_family->ops->setup(fid, arg); + + err = fid->fid_family->ops->configure(fid); + if (err) + goto err_configure; + + err = rhashtable_insert_fast(&mlxsw_sp->fid_core->fid_ht, &fid->ht_node, + mlxsw_sp_fid_ht_params); + if (err) + goto err_rhashtable_insert; + + list_add(&fid->list, &fid_family->fids_list); + refcount_set(&fid->ref_count, 1); + return fid; + +err_rhashtable_insert: + fid->fid_family->ops->deconfigure(fid); +err_configure: + __clear_bit(fid_index - fid_family->start_index, + fid_family->fids_bitmap); +err_index_alloc: + kfree(fid); + return ERR_PTR(err); +} + +void mlxsw_sp_fid_put(struct mlxsw_sp_fid *fid) +{ + struct mlxsw_sp_fid_family *fid_family = fid->fid_family; + struct mlxsw_sp *mlxsw_sp = fid_family->mlxsw_sp; + + if (!refcount_dec_and_test(&fid->ref_count)) + return; + + list_del(&fid->list); + rhashtable_remove_fast(&mlxsw_sp->fid_core->fid_ht, + &fid->ht_node, mlxsw_sp_fid_ht_params); + fid->fid_family->ops->deconfigure(fid); + __clear_bit(fid->fid_index - fid_family->start_index, + fid_family->fids_bitmap); + WARN_ON_ONCE(!list_empty(&fid->port_vid_list)); + kfree(fid); +} + +struct mlxsw_sp_fid *mlxsw_sp_fid_8021q_get(struct mlxsw_sp *mlxsw_sp, u16 vid) +{ + return mlxsw_sp_fid_get(mlxsw_sp, MLXSW_SP_FID_TYPE_8021Q, &vid); +} + +struct mlxsw_sp_fid *mlxsw_sp_fid_8021d_get(struct mlxsw_sp *mlxsw_sp, + int br_ifindex) +{ + return mlxsw_sp_fid_get(mlxsw_sp, MLXSW_SP_FID_TYPE_8021D, &br_ifindex); +} + +struct mlxsw_sp_fid *mlxsw_sp_fid_8021q_lookup(struct mlxsw_sp *mlxsw_sp, + u16 vid) +{ + return mlxsw_sp_fid_lookup(mlxsw_sp, MLXSW_SP_FID_TYPE_8021Q, &vid); +} + +struct mlxsw_sp_fid *mlxsw_sp_fid_8021d_lookup(struct mlxsw_sp *mlxsw_sp, + int br_ifindex) +{ + return mlxsw_sp_fid_lookup(mlxsw_sp, MLXSW_SP_FID_TYPE_8021D, + &br_ifindex); +} + +struct mlxsw_sp_fid *mlxsw_sp_fid_rfid_get(struct mlxsw_sp *mlxsw_sp, + u16 rif_index) +{ + return mlxsw_sp_fid_get(mlxsw_sp, MLXSW_SP_FID_TYPE_RFID, &rif_index); +} + +struct mlxsw_sp_fid *mlxsw_sp_fid_dummy_get(struct mlxsw_sp *mlxsw_sp) +{ + return mlxsw_sp_fid_get(mlxsw_sp, MLXSW_SP_FID_TYPE_DUMMY, NULL); +} + +static int +mlxsw_sp_fid_flood_table_init(struct mlxsw_sp_fid_family *fid_family, + const struct mlxsw_sp_flood_table *flood_table) +{ + enum mlxsw_sp_flood_type packet_type = flood_table->packet_type; + struct mlxsw_sp *mlxsw_sp = fid_family->mlxsw_sp; + const int *sfgc_packet_types; + u16 num_fids, mid_base; + int err, i; + + mid_base = mlxsw_sp_fid_flood_table_mid(fid_family, flood_table, 0); + num_fids = mlxsw_sp_fid_family_num_fids(fid_family); + err = mlxsw_sp_pgt_mid_alloc_range(mlxsw_sp, mid_base, num_fids); + if (err) + return err; + + sfgc_packet_types = mlxsw_sp_packet_type_sfgc_types[packet_type]; + for (i = 0; i < MLXSW_REG_SFGC_TYPE_MAX; i++) { + char sfgc_pl[MLXSW_REG_SFGC_LEN]; + + if (!sfgc_packet_types[i]) + continue; + + mlxsw_reg_sfgc_pack(sfgc_pl, i, fid_family->bridge_type, + flood_table->table_type, 0, mid_base); + + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfgc), sfgc_pl); + if (err) + goto err_reg_write; + } + + return 0; + +err_reg_write: + mlxsw_sp_pgt_mid_free_range(mlxsw_sp, mid_base, num_fids); + return err; +} + +static void +mlxsw_sp_fid_flood_table_fini(struct mlxsw_sp_fid_family *fid_family, + const struct mlxsw_sp_flood_table *flood_table) +{ + struct mlxsw_sp *mlxsw_sp = fid_family->mlxsw_sp; + u16 num_fids, mid_base; + + mid_base = mlxsw_sp_fid_flood_table_mid(fid_family, flood_table, 0); + num_fids = mlxsw_sp_fid_family_num_fids(fid_family); + mlxsw_sp_pgt_mid_free_range(mlxsw_sp, mid_base, num_fids); +} + +static int +mlxsw_sp_fid_flood_tables_init(struct mlxsw_sp_fid_family *fid_family) +{ + int i; + + for (i = 0; i < fid_family->nr_flood_tables; i++) { + const struct mlxsw_sp_flood_table *flood_table; + int err; + + flood_table = &fid_family->flood_tables[i]; + err = mlxsw_sp_fid_flood_table_init(fid_family, flood_table); + if (err) + return err; + } + + return 0; +} + +static void +mlxsw_sp_fid_flood_tables_fini(struct mlxsw_sp_fid_family *fid_family) +{ + int i; + + for (i = 0; i < fid_family->nr_flood_tables; i++) { + const struct mlxsw_sp_flood_table *flood_table; + + flood_table = &fid_family->flood_tables[i]; + mlxsw_sp_fid_flood_table_fini(fid_family, flood_table); + } +} + +static int mlxsw_sp_fid_family_register(struct mlxsw_sp *mlxsw_sp, + const struct mlxsw_sp_fid_family *tmpl) +{ + u16 nr_fids = tmpl->end_index - tmpl->start_index + 1; + struct mlxsw_sp_fid_family *fid_family; + int err; + + fid_family = kmemdup(tmpl, sizeof(*fid_family), GFP_KERNEL); + if (!fid_family) + return -ENOMEM; + + fid_family->mlxsw_sp = mlxsw_sp; + INIT_LIST_HEAD(&fid_family->fids_list); + fid_family->fids_bitmap = bitmap_zalloc(nr_fids, GFP_KERNEL); + if (!fid_family->fids_bitmap) { + err = -ENOMEM; + goto err_alloc_fids_bitmap; + } + + if (fid_family->flood_tables) { + err = mlxsw_sp_fid_flood_tables_init(fid_family); + if (err) + goto err_fid_flood_tables_init; + } + + mlxsw_sp->fid_core->fid_family_arr[tmpl->type] = fid_family; + + return 0; + +err_fid_flood_tables_init: + bitmap_free(fid_family->fids_bitmap); +err_alloc_fids_bitmap: + kfree(fid_family); + return err; +} + +static void +mlxsw_sp_fid_family_unregister(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fid_family *fid_family) +{ + mlxsw_sp->fid_core->fid_family_arr[fid_family->type] = NULL; + + if (fid_family->flood_tables) + mlxsw_sp_fid_flood_tables_fini(fid_family); + + bitmap_free(fid_family->fids_bitmap); + WARN_ON_ONCE(!list_empty(&fid_family->fids_list)); + kfree(fid_family); +} + +int mlxsw_sp_port_fids_init(struct mlxsw_sp_port *mlxsw_sp_port) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + + /* Track number of FIDs configured on the port with mapping type + * PORT_VID_TO_FID, so that we know when to transition the port + * back to non-virtual (VLAN) mode. + */ + mlxsw_sp->fid_core->port_fid_mappings[mlxsw_sp_port->local_port] = 0; + + return mlxsw_sp_port_vp_mode_set(mlxsw_sp_port, false); +} + +void mlxsw_sp_port_fids_fini(struct mlxsw_sp_port *mlxsw_sp_port) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + + mlxsw_sp->fid_core->port_fid_mappings[mlxsw_sp_port->local_port] = 0; +} + +int mlxsw_sp_fids_init(struct mlxsw_sp *mlxsw_sp) +{ + unsigned int max_ports = mlxsw_core_max_ports(mlxsw_sp->core); + struct mlxsw_sp_fid_core *fid_core; + int err, i; + + fid_core = kzalloc(sizeof(*mlxsw_sp->fid_core), GFP_KERNEL); + if (!fid_core) + return -ENOMEM; + mlxsw_sp->fid_core = fid_core; + + err = rhashtable_init(&fid_core->fid_ht, &mlxsw_sp_fid_ht_params); + if (err) + goto err_rhashtable_fid_init; + + err = rhashtable_init(&fid_core->vni_ht, &mlxsw_sp_fid_vni_ht_params); + if (err) + goto err_rhashtable_vni_init; + + fid_core->port_fid_mappings = kcalloc(max_ports, sizeof(unsigned int), + GFP_KERNEL); + if (!fid_core->port_fid_mappings) { + err = -ENOMEM; + goto err_alloc_port_fid_mappings; + } + + for (i = 0; i < MLXSW_SP_FID_TYPE_MAX; i++) { + err = mlxsw_sp_fid_family_register(mlxsw_sp, + mlxsw_sp->fid_family_arr[i]); + + if (err) + goto err_fid_ops_register; + } + + return 0; + +err_fid_ops_register: + for (i--; i >= 0; i--) { + struct mlxsw_sp_fid_family *fid_family; + + fid_family = fid_core->fid_family_arr[i]; + mlxsw_sp_fid_family_unregister(mlxsw_sp, fid_family); + } + kfree(fid_core->port_fid_mappings); +err_alloc_port_fid_mappings: + rhashtable_destroy(&fid_core->vni_ht); +err_rhashtable_vni_init: + rhashtable_destroy(&fid_core->fid_ht); +err_rhashtable_fid_init: + kfree(fid_core); + return err; +} + +void mlxsw_sp_fids_fini(struct mlxsw_sp *mlxsw_sp) +{ + struct mlxsw_sp_fid_core *fid_core = mlxsw_sp->fid_core; + int i; + + for (i = 0; i < MLXSW_SP_FID_TYPE_MAX; i++) + mlxsw_sp_fid_family_unregister(mlxsw_sp, + fid_core->fid_family_arr[i]); + kfree(fid_core->port_fid_mappings); + rhashtable_destroy(&fid_core->vni_ht); + rhashtable_destroy(&fid_core->fid_ht); + kfree(fid_core); +} diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flow.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flow.c new file mode 100644 index 000000000..9e50c823a --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flow.c @@ -0,0 +1,295 @@ +// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 +/* Copyright (c) 2017-2020 Mellanox Technologies. All rights reserved */ + +#include <linux/kernel.h> +#include <linux/slab.h> +#include <linux/errno.h> +#include <linux/list.h> +#include <net/net_namespace.h> + +#include "spectrum.h" + +struct mlxsw_sp_flow_block * +mlxsw_sp_flow_block_create(struct mlxsw_sp *mlxsw_sp, struct net *net) +{ + struct mlxsw_sp_flow_block *block; + + block = kzalloc(sizeof(*block), GFP_KERNEL); + if (!block) + return NULL; + INIT_LIST_HEAD(&block->binding_list); + INIT_LIST_HEAD(&block->mall.list); + block->mlxsw_sp = mlxsw_sp; + block->net = net; + return block; +} + +void mlxsw_sp_flow_block_destroy(struct mlxsw_sp_flow_block *block) +{ + WARN_ON(!list_empty(&block->binding_list)); + kfree(block); +} + +static struct mlxsw_sp_flow_block_binding * +mlxsw_sp_flow_block_lookup(struct mlxsw_sp_flow_block *block, + struct mlxsw_sp_port *mlxsw_sp_port, bool ingress) +{ + struct mlxsw_sp_flow_block_binding *binding; + + list_for_each_entry(binding, &block->binding_list, list) + if (binding->mlxsw_sp_port == mlxsw_sp_port && + binding->ingress == ingress) + return binding; + return NULL; +} + +static bool +mlxsw_sp_flow_block_ruleset_bound(const struct mlxsw_sp_flow_block *block) +{ + return block->ruleset_zero; +} + +static int mlxsw_sp_flow_block_bind(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_flow_block *block, + struct mlxsw_sp_port *mlxsw_sp_port, + bool ingress, + struct netlink_ext_ack *extack) +{ + struct mlxsw_sp_flow_block_binding *binding; + int err; + + if (WARN_ON(mlxsw_sp_flow_block_lookup(block, mlxsw_sp_port, ingress))) + return -EEXIST; + + if (ingress && block->ingress_blocker_rule_count) { + NL_SET_ERR_MSG_MOD(extack, "Block cannot be bound to ingress because it contains unsupported rules"); + return -EOPNOTSUPP; + } + + if (!ingress && block->egress_blocker_rule_count) { + NL_SET_ERR_MSG_MOD(extack, "Block cannot be bound to egress because it contains unsupported rules"); + return -EOPNOTSUPP; + } + + err = mlxsw_sp_mall_port_bind(block, mlxsw_sp_port, extack); + if (err) + return err; + + binding = kzalloc(sizeof(*binding), GFP_KERNEL); + if (!binding) { + err = -ENOMEM; + goto err_binding_alloc; + } + binding->mlxsw_sp_port = mlxsw_sp_port; + binding->ingress = ingress; + + if (mlxsw_sp_flow_block_ruleset_bound(block)) { + err = mlxsw_sp_acl_ruleset_bind(mlxsw_sp, block, binding); + if (err) + goto err_ruleset_bind; + } + + if (ingress) + block->ingress_binding_count++; + else + block->egress_binding_count++; + list_add(&binding->list, &block->binding_list); + return 0; + +err_ruleset_bind: + kfree(binding); +err_binding_alloc: + mlxsw_sp_mall_port_unbind(block, mlxsw_sp_port); + + return err; +} + +static int mlxsw_sp_flow_block_unbind(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_flow_block *block, + struct mlxsw_sp_port *mlxsw_sp_port, + bool ingress) +{ + struct mlxsw_sp_flow_block_binding *binding; + + binding = mlxsw_sp_flow_block_lookup(block, mlxsw_sp_port, ingress); + if (!binding) + return -ENOENT; + + list_del(&binding->list); + + if (ingress) + block->ingress_binding_count--; + else + block->egress_binding_count--; + + if (mlxsw_sp_flow_block_ruleset_bound(block)) + mlxsw_sp_acl_ruleset_unbind(mlxsw_sp, block, binding); + + kfree(binding); + + mlxsw_sp_mall_port_unbind(block, mlxsw_sp_port); + + return 0; +} + +static int mlxsw_sp_flow_block_mall_cb(struct mlxsw_sp_flow_block *flow_block, + struct tc_cls_matchall_offload *f) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_flow_block_mlxsw_sp(flow_block); + + switch (f->command) { + case TC_CLSMATCHALL_REPLACE: + return mlxsw_sp_mall_replace(mlxsw_sp, flow_block, f); + case TC_CLSMATCHALL_DESTROY: + mlxsw_sp_mall_destroy(flow_block, f); + return 0; + default: + return -EOPNOTSUPP; + } +} + +static int mlxsw_sp_flow_block_flower_cb(struct mlxsw_sp_flow_block *flow_block, + struct flow_cls_offload *f) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_flow_block_mlxsw_sp(flow_block); + + switch (f->command) { + case FLOW_CLS_REPLACE: + return mlxsw_sp_flower_replace(mlxsw_sp, flow_block, f); + case FLOW_CLS_DESTROY: + mlxsw_sp_flower_destroy(mlxsw_sp, flow_block, f); + return 0; + case FLOW_CLS_STATS: + return mlxsw_sp_flower_stats(mlxsw_sp, flow_block, f); + case FLOW_CLS_TMPLT_CREATE: + return mlxsw_sp_flower_tmplt_create(mlxsw_sp, flow_block, f); + case FLOW_CLS_TMPLT_DESTROY: + mlxsw_sp_flower_tmplt_destroy(mlxsw_sp, flow_block, f); + return 0; + default: + return -EOPNOTSUPP; + } +} + +static int mlxsw_sp_flow_block_cb(enum tc_setup_type type, + void *type_data, void *cb_priv) +{ + struct mlxsw_sp_flow_block *flow_block = cb_priv; + + if (mlxsw_sp_flow_block_disabled(flow_block)) + return -EOPNOTSUPP; + + switch (type) { + case TC_SETUP_CLSMATCHALL: + return mlxsw_sp_flow_block_mall_cb(flow_block, type_data); + case TC_SETUP_CLSFLOWER: + return mlxsw_sp_flow_block_flower_cb(flow_block, type_data); + default: + return -EOPNOTSUPP; + } +} + +static void mlxsw_sp_tc_block_release(void *cb_priv) +{ + struct mlxsw_sp_flow_block *flow_block = cb_priv; + + mlxsw_sp_flow_block_destroy(flow_block); +} + +static LIST_HEAD(mlxsw_sp_block_cb_list); + +static int mlxsw_sp_setup_tc_block_bind(struct mlxsw_sp_port *mlxsw_sp_port, + struct flow_block_offload *f, + bool ingress) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + struct mlxsw_sp_flow_block *flow_block; + struct flow_block_cb *block_cb; + bool register_block = false; + int err; + + block_cb = flow_block_cb_lookup(f->block, mlxsw_sp_flow_block_cb, + mlxsw_sp); + if (!block_cb) { + flow_block = mlxsw_sp_flow_block_create(mlxsw_sp, f->net); + if (!flow_block) + return -ENOMEM; + block_cb = flow_block_cb_alloc(mlxsw_sp_flow_block_cb, + mlxsw_sp, flow_block, + mlxsw_sp_tc_block_release); + if (IS_ERR(block_cb)) { + mlxsw_sp_flow_block_destroy(flow_block); + return PTR_ERR(block_cb); + } + register_block = true; + } else { + flow_block = flow_block_cb_priv(block_cb); + } + flow_block_cb_incref(block_cb); + err = mlxsw_sp_flow_block_bind(mlxsw_sp, flow_block, + mlxsw_sp_port, ingress, f->extack); + if (err) + goto err_block_bind; + + if (ingress) + mlxsw_sp_port->ing_flow_block = flow_block; + else + mlxsw_sp_port->eg_flow_block = flow_block; + + if (register_block) { + flow_block_cb_add(block_cb, f); + list_add_tail(&block_cb->driver_list, &mlxsw_sp_block_cb_list); + } + + return 0; + +err_block_bind: + if (!flow_block_cb_decref(block_cb)) + flow_block_cb_free(block_cb); + return err; +} + +static void mlxsw_sp_setup_tc_block_unbind(struct mlxsw_sp_port *mlxsw_sp_port, + struct flow_block_offload *f, + bool ingress) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + struct mlxsw_sp_flow_block *flow_block; + struct flow_block_cb *block_cb; + int err; + + block_cb = flow_block_cb_lookup(f->block, mlxsw_sp_flow_block_cb, + mlxsw_sp); + if (!block_cb) + return; + + if (ingress) + mlxsw_sp_port->ing_flow_block = NULL; + else + mlxsw_sp_port->eg_flow_block = NULL; + + flow_block = flow_block_cb_priv(block_cb); + err = mlxsw_sp_flow_block_unbind(mlxsw_sp, flow_block, + mlxsw_sp_port, ingress); + if (!err && !flow_block_cb_decref(block_cb)) { + flow_block_cb_remove(block_cb, f); + list_del(&block_cb->driver_list); + } +} + +int mlxsw_sp_setup_tc_block_clsact(struct mlxsw_sp_port *mlxsw_sp_port, + struct flow_block_offload *f, + bool ingress) +{ + f->driver_block_list = &mlxsw_sp_block_cb_list; + + switch (f->command) { + case FLOW_BLOCK_BIND: + return mlxsw_sp_setup_tc_block_bind(mlxsw_sp_port, f, ingress); + case FLOW_BLOCK_UNBIND: + mlxsw_sp_setup_tc_block_unbind(mlxsw_sp_port, f, ingress); + return 0; + default: + return -EOPNOTSUPP; + } +} diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c new file mode 100644 index 000000000..e91fb205e --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c @@ -0,0 +1,796 @@ +// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 +/* Copyright (c) 2017-2018 Mellanox Technologies. All rights reserved */ + +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/netdevice.h> +#include <linux/log2.h> +#include <net/net_namespace.h> +#include <net/flow_dissector.h> +#include <net/pkt_cls.h> +#include <net/tc_act/tc_gact.h> +#include <net/tc_act/tc_mirred.h> +#include <net/tc_act/tc_vlan.h> + +#include "spectrum.h" +#include "core_acl_flex_keys.h" + +static int mlxsw_sp_policer_validate(const struct flow_action *action, + const struct flow_action_entry *act, + struct netlink_ext_ack *extack) +{ + if (act->police.exceed.act_id != FLOW_ACTION_DROP) { + NL_SET_ERR_MSG_MOD(extack, + "Offload not supported when exceed action is not drop"); + return -EOPNOTSUPP; + } + + if (act->police.notexceed.act_id != FLOW_ACTION_PIPE && + act->police.notexceed.act_id != FLOW_ACTION_ACCEPT) { + NL_SET_ERR_MSG_MOD(extack, + "Offload not supported when conform action is not pipe or ok"); + return -EOPNOTSUPP; + } + + if (act->police.notexceed.act_id == FLOW_ACTION_ACCEPT && + !flow_action_is_last_entry(action, act)) { + NL_SET_ERR_MSG_MOD(extack, + "Offload not supported when conform action is ok, but action is not last"); + return -EOPNOTSUPP; + } + + if (act->police.peakrate_bytes_ps || + act->police.avrate || act->police.overhead) { + NL_SET_ERR_MSG_MOD(extack, + "Offload not supported when peakrate/avrate/overhead is configured"); + return -EOPNOTSUPP; + } + + if (act->police.rate_pkt_ps) { + NL_SET_ERR_MSG_MOD(extack, + "QoS offload not support packets per second"); + return -EOPNOTSUPP; + } + + return 0; +} + +static int mlxsw_sp_flower_parse_actions(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_flow_block *block, + struct mlxsw_sp_acl_rule_info *rulei, + struct flow_action *flow_action, + struct netlink_ext_ack *extack) +{ + const struct flow_action_entry *act; + int mirror_act_count = 0; + int police_act_count = 0; + int sample_act_count = 0; + int err, i; + + if (!flow_action_has_entries(flow_action)) + return 0; + if (!flow_action_mixed_hw_stats_check(flow_action, extack)) + return -EOPNOTSUPP; + + act = flow_action_first_entry_get(flow_action); + if (act->hw_stats & FLOW_ACTION_HW_STATS_DISABLED) { + /* Nothing to do */ + } else if (act->hw_stats & FLOW_ACTION_HW_STATS_IMMEDIATE) { + /* Count action is inserted first */ + err = mlxsw_sp_acl_rulei_act_count(mlxsw_sp, rulei, extack); + if (err) + return err; + } else { + NL_SET_ERR_MSG_MOD(extack, "Unsupported action HW stats type"); + return -EOPNOTSUPP; + } + + flow_action_for_each(i, act, flow_action) { + switch (act->id) { + case FLOW_ACTION_ACCEPT: + err = mlxsw_sp_acl_rulei_act_terminate(rulei); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Cannot append terminate action"); + return err; + } + break; + case FLOW_ACTION_DROP: { + bool ingress; + + if (mlxsw_sp_flow_block_is_mixed_bound(block)) { + NL_SET_ERR_MSG_MOD(extack, "Drop action is not supported when block is bound to ingress and egress"); + return -EOPNOTSUPP; + } + ingress = mlxsw_sp_flow_block_is_ingress_bound(block); + err = mlxsw_sp_acl_rulei_act_drop(rulei, ingress, + act->cookie, extack); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Cannot append drop action"); + return err; + } + + /* Forbid block with this rulei to be bound + * to ingress/egress in future. Ingress rule is + * a blocker for egress and vice versa. + */ + if (ingress) + rulei->egress_bind_blocker = 1; + else + rulei->ingress_bind_blocker = 1; + } + break; + case FLOW_ACTION_TRAP: + err = mlxsw_sp_acl_rulei_act_trap(rulei); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Cannot append trap action"); + return err; + } + break; + case FLOW_ACTION_GOTO: { + u32 chain_index = act->chain_index; + struct mlxsw_sp_acl_ruleset *ruleset; + u16 group_id; + + ruleset = mlxsw_sp_acl_ruleset_lookup(mlxsw_sp, block, + chain_index, + MLXSW_SP_ACL_PROFILE_FLOWER); + if (IS_ERR(ruleset)) + return PTR_ERR(ruleset); + + group_id = mlxsw_sp_acl_ruleset_group_id(ruleset); + err = mlxsw_sp_acl_rulei_act_jump(rulei, group_id); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Cannot append jump action"); + return err; + } + } + break; + case FLOW_ACTION_REDIRECT: { + struct net_device *out_dev; + struct mlxsw_sp_fid *fid; + u16 fid_index; + + if (mlxsw_sp_flow_block_is_egress_bound(block)) { + NL_SET_ERR_MSG_MOD(extack, "Redirect action is not supported on egress"); + return -EOPNOTSUPP; + } + + /* Forbid block with this rulei to be bound + * to egress in future. + */ + rulei->egress_bind_blocker = 1; + + fid = mlxsw_sp_acl_dummy_fid(mlxsw_sp); + fid_index = mlxsw_sp_fid_index(fid); + err = mlxsw_sp_acl_rulei_act_fid_set(mlxsw_sp, rulei, + fid_index, extack); + if (err) + return err; + + out_dev = act->dev; + err = mlxsw_sp_acl_rulei_act_fwd(mlxsw_sp, rulei, + out_dev, extack); + if (err) + return err; + } + break; + case FLOW_ACTION_MIRRED: { + struct net_device *out_dev = act->dev; + + if (mirror_act_count++) { + NL_SET_ERR_MSG_MOD(extack, "Multiple mirror actions per rule are not supported"); + return -EOPNOTSUPP; + } + + err = mlxsw_sp_acl_rulei_act_mirror(mlxsw_sp, rulei, + block, out_dev, + extack); + if (err) + return err; + } + break; + case FLOW_ACTION_VLAN_MANGLE: { + u16 proto = be16_to_cpu(act->vlan.proto); + u8 prio = act->vlan.prio; + u16 vid = act->vlan.vid; + + err = mlxsw_sp_acl_rulei_act_vlan(mlxsw_sp, rulei, + act->id, vid, + proto, prio, extack); + if (err) + return err; + break; + } + case FLOW_ACTION_PRIORITY: + err = mlxsw_sp_acl_rulei_act_priority(mlxsw_sp, rulei, + act->priority, + extack); + if (err) + return err; + break; + case FLOW_ACTION_MANGLE: { + enum flow_action_mangle_base htype = act->mangle.htype; + __be32 be_mask = (__force __be32) act->mangle.mask; + __be32 be_val = (__force __be32) act->mangle.val; + u32 offset = act->mangle.offset; + u32 mask = be32_to_cpu(be_mask); + u32 val = be32_to_cpu(be_val); + + err = mlxsw_sp_acl_rulei_act_mangle(mlxsw_sp, rulei, + htype, offset, + mask, val, extack); + if (err) + return err; + break; + } + case FLOW_ACTION_POLICE: { + u32 burst; + + if (police_act_count++) { + NL_SET_ERR_MSG_MOD(extack, "Multiple police actions per rule are not supported"); + return -EOPNOTSUPP; + } + + err = mlxsw_sp_policer_validate(flow_action, act, extack); + if (err) + return err; + + /* The kernel might adjust the requested burst size so + * that it is not exactly a power of two. Re-adjust it + * here since the hardware only supports burst sizes + * that are a power of two. + */ + burst = roundup_pow_of_two(act->police.burst); + err = mlxsw_sp_acl_rulei_act_police(mlxsw_sp, rulei, + act->hw_index, + act->police.rate_bytes_ps, + burst, extack); + if (err) + return err; + break; + } + case FLOW_ACTION_SAMPLE: { + if (sample_act_count++) { + NL_SET_ERR_MSG_MOD(extack, "Multiple sample actions per rule are not supported"); + return -EOPNOTSUPP; + } + + err = mlxsw_sp_acl_rulei_act_sample(mlxsw_sp, rulei, + block, + act->sample.psample_group, + act->sample.rate, + act->sample.trunc_size, + act->sample.truncate, + extack); + if (err) + return err; + break; + } + default: + NL_SET_ERR_MSG_MOD(extack, "Unsupported action"); + dev_err(mlxsw_sp->bus_info->dev, "Unsupported action\n"); + return -EOPNOTSUPP; + } + } + + if (rulei->ipv6_valid) { + NL_SET_ERR_MSG_MOD(extack, "Unsupported mangle field"); + return -EOPNOTSUPP; + } + + return 0; +} + +static int mlxsw_sp_flower_parse_meta(struct mlxsw_sp_acl_rule_info *rulei, + struct flow_cls_offload *f, + struct mlxsw_sp_flow_block *block) +{ + struct flow_rule *rule = flow_cls_offload_flow_rule(f); + struct mlxsw_sp_port *mlxsw_sp_port; + struct net_device *ingress_dev; + struct flow_match_meta match; + + if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_META)) + return 0; + + flow_rule_match_meta(rule, &match); + if (match.mask->ingress_ifindex != 0xFFFFFFFF) { + NL_SET_ERR_MSG_MOD(f->common.extack, "Unsupported ingress ifindex mask"); + return -EINVAL; + } + + ingress_dev = __dev_get_by_index(block->net, + match.key->ingress_ifindex); + if (!ingress_dev) { + NL_SET_ERR_MSG_MOD(f->common.extack, "Can't find specified ingress port to match on"); + return -EINVAL; + } + + if (!mlxsw_sp_port_dev_check(ingress_dev)) { + NL_SET_ERR_MSG_MOD(f->common.extack, "Can't match on non-mlxsw ingress port"); + return -EINVAL; + } + + mlxsw_sp_port = netdev_priv(ingress_dev); + if (mlxsw_sp_port->mlxsw_sp != block->mlxsw_sp) { + NL_SET_ERR_MSG_MOD(f->common.extack, "Can't match on a port from different device"); + return -EINVAL; + } + + mlxsw_sp_acl_rulei_keymask_u32(rulei, + MLXSW_AFK_ELEMENT_SRC_SYS_PORT, + mlxsw_sp_port->local_port, + 0xFFFFFFFF); + return 0; +} + +static void mlxsw_sp_flower_parse_ipv4(struct mlxsw_sp_acl_rule_info *rulei, + struct flow_cls_offload *f) +{ + struct flow_match_ipv4_addrs match; + + flow_rule_match_ipv4_addrs(f->rule, &match); + + mlxsw_sp_acl_rulei_keymask_buf(rulei, MLXSW_AFK_ELEMENT_SRC_IP_0_31, + (char *) &match.key->src, + (char *) &match.mask->src, 4); + mlxsw_sp_acl_rulei_keymask_buf(rulei, MLXSW_AFK_ELEMENT_DST_IP_0_31, + (char *) &match.key->dst, + (char *) &match.mask->dst, 4); +} + +static void mlxsw_sp_flower_parse_ipv6(struct mlxsw_sp_acl_rule_info *rulei, + struct flow_cls_offload *f) +{ + struct flow_match_ipv6_addrs match; + + flow_rule_match_ipv6_addrs(f->rule, &match); + + mlxsw_sp_acl_rulei_keymask_buf(rulei, MLXSW_AFK_ELEMENT_SRC_IP_96_127, + &match.key->src.s6_addr[0x0], + &match.mask->src.s6_addr[0x0], 4); + mlxsw_sp_acl_rulei_keymask_buf(rulei, MLXSW_AFK_ELEMENT_SRC_IP_64_95, + &match.key->src.s6_addr[0x4], + &match.mask->src.s6_addr[0x4], 4); + mlxsw_sp_acl_rulei_keymask_buf(rulei, MLXSW_AFK_ELEMENT_SRC_IP_32_63, + &match.key->src.s6_addr[0x8], + &match.mask->src.s6_addr[0x8], 4); + mlxsw_sp_acl_rulei_keymask_buf(rulei, MLXSW_AFK_ELEMENT_SRC_IP_0_31, + &match.key->src.s6_addr[0xC], + &match.mask->src.s6_addr[0xC], 4); + mlxsw_sp_acl_rulei_keymask_buf(rulei, MLXSW_AFK_ELEMENT_DST_IP_96_127, + &match.key->dst.s6_addr[0x0], + &match.mask->dst.s6_addr[0x0], 4); + mlxsw_sp_acl_rulei_keymask_buf(rulei, MLXSW_AFK_ELEMENT_DST_IP_64_95, + &match.key->dst.s6_addr[0x4], + &match.mask->dst.s6_addr[0x4], 4); + mlxsw_sp_acl_rulei_keymask_buf(rulei, MLXSW_AFK_ELEMENT_DST_IP_32_63, + &match.key->dst.s6_addr[0x8], + &match.mask->dst.s6_addr[0x8], 4); + mlxsw_sp_acl_rulei_keymask_buf(rulei, MLXSW_AFK_ELEMENT_DST_IP_0_31, + &match.key->dst.s6_addr[0xC], + &match.mask->dst.s6_addr[0xC], 4); +} + +static int mlxsw_sp_flower_parse_ports(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_rule_info *rulei, + struct flow_cls_offload *f, + u8 ip_proto) +{ + const struct flow_rule *rule = flow_cls_offload_flow_rule(f); + struct flow_match_ports match; + + if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) + return 0; + + if (ip_proto != IPPROTO_TCP && ip_proto != IPPROTO_UDP) { + NL_SET_ERR_MSG_MOD(f->common.extack, "Only UDP and TCP keys are supported"); + dev_err(mlxsw_sp->bus_info->dev, "Only UDP and TCP keys are supported\n"); + return -EINVAL; + } + + flow_rule_match_ports(rule, &match); + mlxsw_sp_acl_rulei_keymask_u32(rulei, MLXSW_AFK_ELEMENT_DST_L4_PORT, + ntohs(match.key->dst), + ntohs(match.mask->dst)); + mlxsw_sp_acl_rulei_keymask_u32(rulei, MLXSW_AFK_ELEMENT_SRC_L4_PORT, + ntohs(match.key->src), + ntohs(match.mask->src)); + return 0; +} + +static int mlxsw_sp_flower_parse_tcp(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_rule_info *rulei, + struct flow_cls_offload *f, + u8 ip_proto) +{ + const struct flow_rule *rule = flow_cls_offload_flow_rule(f); + struct flow_match_tcp match; + + if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_TCP)) + return 0; + + if (ip_proto != IPPROTO_TCP) { + NL_SET_ERR_MSG_MOD(f->common.extack, "TCP keys supported only for TCP"); + dev_err(mlxsw_sp->bus_info->dev, "TCP keys supported only for TCP\n"); + return -EINVAL; + } + + flow_rule_match_tcp(rule, &match); + + if (match.mask->flags & htons(0x0E00)) { + NL_SET_ERR_MSG_MOD(f->common.extack, "TCP flags match not supported on reserved bits"); + dev_err(mlxsw_sp->bus_info->dev, "TCP flags match not supported on reserved bits\n"); + return -EINVAL; + } + + mlxsw_sp_acl_rulei_keymask_u32(rulei, MLXSW_AFK_ELEMENT_TCP_FLAGS, + ntohs(match.key->flags), + ntohs(match.mask->flags)); + return 0; +} + +static int mlxsw_sp_flower_parse_ip(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_rule_info *rulei, + struct flow_cls_offload *f, + u16 n_proto) +{ + const struct flow_rule *rule = flow_cls_offload_flow_rule(f); + struct flow_match_ip match; + + if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IP)) + return 0; + + if (n_proto != ETH_P_IP && n_proto != ETH_P_IPV6) { + NL_SET_ERR_MSG_MOD(f->common.extack, "IP keys supported only for IPv4/6"); + dev_err(mlxsw_sp->bus_info->dev, "IP keys supported only for IPv4/6\n"); + return -EINVAL; + } + + flow_rule_match_ip(rule, &match); + + mlxsw_sp_acl_rulei_keymask_u32(rulei, MLXSW_AFK_ELEMENT_IP_TTL_, + match.key->ttl, match.mask->ttl); + + mlxsw_sp_acl_rulei_keymask_u32(rulei, MLXSW_AFK_ELEMENT_IP_ECN, + match.key->tos & 0x3, + match.mask->tos & 0x3); + + mlxsw_sp_acl_rulei_keymask_u32(rulei, MLXSW_AFK_ELEMENT_IP_DSCP, + match.key->tos >> 2, + match.mask->tos >> 2); + + return 0; +} + +static int mlxsw_sp_flower_parse(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_flow_block *block, + struct mlxsw_sp_acl_rule_info *rulei, + struct flow_cls_offload *f) +{ + struct flow_rule *rule = flow_cls_offload_flow_rule(f); + struct flow_dissector *dissector = rule->match.dissector; + u16 n_proto_mask = 0; + u16 n_proto_key = 0; + u16 addr_type = 0; + u8 ip_proto = 0; + int err; + + if (dissector->used_keys & + ~(BIT(FLOW_DISSECTOR_KEY_META) | + BIT(FLOW_DISSECTOR_KEY_CONTROL) | + BIT(FLOW_DISSECTOR_KEY_BASIC) | + BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS) | + BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS) | + BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS) | + BIT(FLOW_DISSECTOR_KEY_PORTS) | + BIT(FLOW_DISSECTOR_KEY_TCP) | + BIT(FLOW_DISSECTOR_KEY_IP) | + BIT(FLOW_DISSECTOR_KEY_VLAN))) { + dev_err(mlxsw_sp->bus_info->dev, "Unsupported key\n"); + NL_SET_ERR_MSG_MOD(f->common.extack, "Unsupported key"); + return -EOPNOTSUPP; + } + + mlxsw_sp_acl_rulei_priority(rulei, f->common.prio); + + err = mlxsw_sp_flower_parse_meta(rulei, f, block); + if (err) + return err; + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) { + struct flow_match_control match; + + flow_rule_match_control(rule, &match); + addr_type = match.key->addr_type; + } + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) { + struct flow_match_basic match; + + flow_rule_match_basic(rule, &match); + n_proto_key = ntohs(match.key->n_proto); + n_proto_mask = ntohs(match.mask->n_proto); + + if (n_proto_key == ETH_P_ALL) { + n_proto_key = 0; + n_proto_mask = 0; + } + mlxsw_sp_acl_rulei_keymask_u32(rulei, + MLXSW_AFK_ELEMENT_ETHERTYPE, + n_proto_key, n_proto_mask); + + ip_proto = match.key->ip_proto; + mlxsw_sp_acl_rulei_keymask_u32(rulei, + MLXSW_AFK_ELEMENT_IP_PROTO, + match.key->ip_proto, + match.mask->ip_proto); + } + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ETH_ADDRS)) { + struct flow_match_eth_addrs match; + + flow_rule_match_eth_addrs(rule, &match); + mlxsw_sp_acl_rulei_keymask_buf(rulei, + MLXSW_AFK_ELEMENT_DMAC_32_47, + match.key->dst, + match.mask->dst, 2); + mlxsw_sp_acl_rulei_keymask_buf(rulei, + MLXSW_AFK_ELEMENT_DMAC_0_31, + match.key->dst + 2, + match.mask->dst + 2, 4); + mlxsw_sp_acl_rulei_keymask_buf(rulei, + MLXSW_AFK_ELEMENT_SMAC_32_47, + match.key->src, + match.mask->src, 2); + mlxsw_sp_acl_rulei_keymask_buf(rulei, + MLXSW_AFK_ELEMENT_SMAC_0_31, + match.key->src + 2, + match.mask->src + 2, 4); + } + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN)) { + struct flow_match_vlan match; + + flow_rule_match_vlan(rule, &match); + if (mlxsw_sp_flow_block_is_egress_bound(block) && + match.mask->vlan_id) { + NL_SET_ERR_MSG_MOD(f->common.extack, "vlan_id key is not supported on egress"); + return -EOPNOTSUPP; + } + + /* Forbid block with this rulei to be bound + * to egress in future. + */ + rulei->egress_bind_blocker = 1; + + if (match.mask->vlan_id != 0) + mlxsw_sp_acl_rulei_keymask_u32(rulei, + MLXSW_AFK_ELEMENT_VID, + match.key->vlan_id, + match.mask->vlan_id); + if (match.mask->vlan_priority != 0) + mlxsw_sp_acl_rulei_keymask_u32(rulei, + MLXSW_AFK_ELEMENT_PCP, + match.key->vlan_priority, + match.mask->vlan_priority); + } + + if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) + mlxsw_sp_flower_parse_ipv4(rulei, f); + + if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) + mlxsw_sp_flower_parse_ipv6(rulei, f); + + err = mlxsw_sp_flower_parse_ports(mlxsw_sp, rulei, f, ip_proto); + if (err) + return err; + err = mlxsw_sp_flower_parse_tcp(mlxsw_sp, rulei, f, ip_proto); + if (err) + return err; + + err = mlxsw_sp_flower_parse_ip(mlxsw_sp, rulei, f, n_proto_key & n_proto_mask); + if (err) + return err; + + return mlxsw_sp_flower_parse_actions(mlxsw_sp, block, rulei, + &f->rule->action, + f->common.extack); +} + +static int mlxsw_sp_flower_mall_prio_check(struct mlxsw_sp_flow_block *block, + struct flow_cls_offload *f) +{ + bool ingress = mlxsw_sp_flow_block_is_ingress_bound(block); + unsigned int mall_min_prio; + unsigned int mall_max_prio; + int err; + + err = mlxsw_sp_mall_prio_get(block, f->common.chain_index, + &mall_min_prio, &mall_max_prio); + if (err) { + if (err == -ENOENT) + /* No matchall filters installed on this chain. */ + return 0; + NL_SET_ERR_MSG(f->common.extack, "Failed to get matchall priorities"); + return err; + } + if (ingress && f->common.prio <= mall_min_prio) { + NL_SET_ERR_MSG(f->common.extack, "Failed to add in front of existing matchall rules"); + return -EOPNOTSUPP; + } + if (!ingress && f->common.prio >= mall_max_prio) { + NL_SET_ERR_MSG(f->common.extack, "Failed to add behind of existing matchall rules"); + return -EOPNOTSUPP; + } + return 0; +} + +int mlxsw_sp_flower_replace(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_flow_block *block, + struct flow_cls_offload *f) +{ + struct mlxsw_sp_acl_rule_info *rulei; + struct mlxsw_sp_acl_ruleset *ruleset; + struct mlxsw_sp_acl_rule *rule; + int err; + + err = mlxsw_sp_flower_mall_prio_check(block, f); + if (err) + return err; + + ruleset = mlxsw_sp_acl_ruleset_get(mlxsw_sp, block, + f->common.chain_index, + MLXSW_SP_ACL_PROFILE_FLOWER, NULL); + if (IS_ERR(ruleset)) + return PTR_ERR(ruleset); + + rule = mlxsw_sp_acl_rule_create(mlxsw_sp, ruleset, f->cookie, NULL, + f->common.extack); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + goto err_rule_create; + } + + rulei = mlxsw_sp_acl_rule_rulei(rule); + err = mlxsw_sp_flower_parse(mlxsw_sp, block, rulei, f); + if (err) + goto err_flower_parse; + + err = mlxsw_sp_acl_rulei_commit(rulei); + if (err) + goto err_rulei_commit; + + err = mlxsw_sp_acl_rule_add(mlxsw_sp, rule); + if (err) + goto err_rule_add; + + mlxsw_sp_acl_ruleset_put(mlxsw_sp, ruleset); + return 0; + +err_rule_add: +err_rulei_commit: +err_flower_parse: + mlxsw_sp_acl_rule_destroy(mlxsw_sp, rule); +err_rule_create: + mlxsw_sp_acl_ruleset_put(mlxsw_sp, ruleset); + return err; +} + +void mlxsw_sp_flower_destroy(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_flow_block *block, + struct flow_cls_offload *f) +{ + struct mlxsw_sp_acl_ruleset *ruleset; + struct mlxsw_sp_acl_rule *rule; + + ruleset = mlxsw_sp_acl_ruleset_get(mlxsw_sp, block, + f->common.chain_index, + MLXSW_SP_ACL_PROFILE_FLOWER, NULL); + if (IS_ERR(ruleset)) + return; + + rule = mlxsw_sp_acl_rule_lookup(mlxsw_sp, ruleset, f->cookie); + if (rule) { + mlxsw_sp_acl_rule_del(mlxsw_sp, rule); + mlxsw_sp_acl_rule_destroy(mlxsw_sp, rule); + } + + mlxsw_sp_acl_ruleset_put(mlxsw_sp, ruleset); +} + +int mlxsw_sp_flower_stats(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_flow_block *block, + struct flow_cls_offload *f) +{ + enum flow_action_hw_stats used_hw_stats = FLOW_ACTION_HW_STATS_DISABLED; + struct mlxsw_sp_acl_ruleset *ruleset; + struct mlxsw_sp_acl_rule *rule; + u64 packets; + u64 lastuse; + u64 bytes; + u64 drops; + int err; + + ruleset = mlxsw_sp_acl_ruleset_get(mlxsw_sp, block, + f->common.chain_index, + MLXSW_SP_ACL_PROFILE_FLOWER, NULL); + if (WARN_ON(IS_ERR(ruleset))) + return -EINVAL; + + rule = mlxsw_sp_acl_rule_lookup(mlxsw_sp, ruleset, f->cookie); + if (!rule) + return -EINVAL; + + err = mlxsw_sp_acl_rule_get_stats(mlxsw_sp, rule, &packets, &bytes, + &drops, &lastuse, &used_hw_stats); + if (err) + goto err_rule_get_stats; + + flow_stats_update(&f->stats, bytes, packets, drops, lastuse, + used_hw_stats); + + mlxsw_sp_acl_ruleset_put(mlxsw_sp, ruleset); + return 0; + +err_rule_get_stats: + mlxsw_sp_acl_ruleset_put(mlxsw_sp, ruleset); + return err; +} + +int mlxsw_sp_flower_tmplt_create(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_flow_block *block, + struct flow_cls_offload *f) +{ + struct mlxsw_sp_acl_ruleset *ruleset; + struct mlxsw_sp_acl_rule_info rulei; + int err; + + memset(&rulei, 0, sizeof(rulei)); + err = mlxsw_sp_flower_parse(mlxsw_sp, block, &rulei, f); + if (err) + return err; + ruleset = mlxsw_sp_acl_ruleset_get(mlxsw_sp, block, + f->common.chain_index, + MLXSW_SP_ACL_PROFILE_FLOWER, + &rulei.values.elusage); + + /* keep the reference to the ruleset */ + return PTR_ERR_OR_ZERO(ruleset); +} + +void mlxsw_sp_flower_tmplt_destroy(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_flow_block *block, + struct flow_cls_offload *f) +{ + struct mlxsw_sp_acl_ruleset *ruleset; + + ruleset = mlxsw_sp_acl_ruleset_get(mlxsw_sp, block, + f->common.chain_index, + MLXSW_SP_ACL_PROFILE_FLOWER, NULL); + if (IS_ERR(ruleset)) + return; + /* put the reference to the ruleset kept in create */ + mlxsw_sp_acl_ruleset_put(mlxsw_sp, ruleset); + mlxsw_sp_acl_ruleset_put(mlxsw_sp, ruleset); +} + +int mlxsw_sp_flower_prio_get(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_flow_block *block, + u32 chain_index, unsigned int *p_min_prio, + unsigned int *p_max_prio) +{ + struct mlxsw_sp_acl_ruleset *ruleset; + + ruleset = mlxsw_sp_acl_ruleset_lookup(mlxsw_sp, block, + chain_index, + MLXSW_SP_ACL_PROFILE_FLOWER); + if (IS_ERR(ruleset)) + /* In case there are no flower rules, the caller + * receives -ENOENT to indicate there is no need + * to check the priorities. + */ + return PTR_ERR(ruleset); + mlxsw_sp_acl_ruleset_prio_get(ruleset, p_min_prio, p_max_prio); + return 0; +} diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c new file mode 100644 index 000000000..a2ee695a3 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c @@ -0,0 +1,676 @@ +// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 +/* Copyright (c) 2017-2018 Mellanox Technologies. All rights reserved */ + +#include <net/ip_tunnels.h> +#include <net/ip6_tunnel.h> +#include <net/inet_ecn.h> + +#include "spectrum_ipip.h" +#include "reg.h" + +struct ip_tunnel_parm +mlxsw_sp_ipip_netdev_parms4(const struct net_device *ol_dev) +{ + struct ip_tunnel *tun = netdev_priv(ol_dev); + + return tun->parms; +} + +struct __ip6_tnl_parm +mlxsw_sp_ipip_netdev_parms6(const struct net_device *ol_dev) +{ + struct ip6_tnl *tun = netdev_priv(ol_dev); + + return tun->parms; +} + +static bool mlxsw_sp_ipip_parms4_has_ikey(const struct ip_tunnel_parm *parms) +{ + return !!(parms->i_flags & TUNNEL_KEY); +} + +static bool mlxsw_sp_ipip_parms6_has_ikey(const struct __ip6_tnl_parm *parms) +{ + return !!(parms->i_flags & TUNNEL_KEY); +} + +static bool mlxsw_sp_ipip_parms4_has_okey(const struct ip_tunnel_parm *parms) +{ + return !!(parms->o_flags & TUNNEL_KEY); +} + +static bool mlxsw_sp_ipip_parms6_has_okey(const struct __ip6_tnl_parm *parms) +{ + return !!(parms->o_flags & TUNNEL_KEY); +} + +static u32 mlxsw_sp_ipip_parms4_ikey(const struct ip_tunnel_parm *parms) +{ + return mlxsw_sp_ipip_parms4_has_ikey(parms) ? + be32_to_cpu(parms->i_key) : 0; +} + +static u32 mlxsw_sp_ipip_parms6_ikey(const struct __ip6_tnl_parm *parms) +{ + return mlxsw_sp_ipip_parms6_has_ikey(parms) ? + be32_to_cpu(parms->i_key) : 0; +} + +static u32 mlxsw_sp_ipip_parms4_okey(const struct ip_tunnel_parm *parms) +{ + return mlxsw_sp_ipip_parms4_has_okey(parms) ? + be32_to_cpu(parms->o_key) : 0; +} + +static u32 mlxsw_sp_ipip_parms6_okey(const struct __ip6_tnl_parm *parms) +{ + return mlxsw_sp_ipip_parms6_has_okey(parms) ? + be32_to_cpu(parms->o_key) : 0; +} + +static union mlxsw_sp_l3addr +mlxsw_sp_ipip_parms4_saddr(const struct ip_tunnel_parm *parms) +{ + return (union mlxsw_sp_l3addr) { .addr4 = parms->iph.saddr }; +} + +static union mlxsw_sp_l3addr +mlxsw_sp_ipip_parms6_saddr(const struct __ip6_tnl_parm *parms) +{ + return (union mlxsw_sp_l3addr) { .addr6 = parms->laddr }; +} + +static union mlxsw_sp_l3addr +mlxsw_sp_ipip_parms4_daddr(const struct ip_tunnel_parm *parms) +{ + return (union mlxsw_sp_l3addr) { .addr4 = parms->iph.daddr }; +} + +static union mlxsw_sp_l3addr +mlxsw_sp_ipip_parms6_daddr(const struct __ip6_tnl_parm *parms) +{ + return (union mlxsw_sp_l3addr) { .addr6 = parms->raddr }; +} + +union mlxsw_sp_l3addr +mlxsw_sp_ipip_netdev_saddr(enum mlxsw_sp_l3proto proto, + const struct net_device *ol_dev) +{ + struct ip_tunnel_parm parms4; + struct __ip6_tnl_parm parms6; + + switch (proto) { + case MLXSW_SP_L3_PROTO_IPV4: + parms4 = mlxsw_sp_ipip_netdev_parms4(ol_dev); + return mlxsw_sp_ipip_parms4_saddr(&parms4); + case MLXSW_SP_L3_PROTO_IPV6: + parms6 = mlxsw_sp_ipip_netdev_parms6(ol_dev); + return mlxsw_sp_ipip_parms6_saddr(&parms6); + } + + WARN_ON(1); + return (union mlxsw_sp_l3addr) {0}; +} + +static __be32 mlxsw_sp_ipip_netdev_daddr4(const struct net_device *ol_dev) +{ + + struct ip_tunnel_parm parms4 = mlxsw_sp_ipip_netdev_parms4(ol_dev); + + return mlxsw_sp_ipip_parms4_daddr(&parms4).addr4; +} + +static union mlxsw_sp_l3addr +mlxsw_sp_ipip_netdev_daddr(enum mlxsw_sp_l3proto proto, + const struct net_device *ol_dev) +{ + struct ip_tunnel_parm parms4; + struct __ip6_tnl_parm parms6; + + switch (proto) { + case MLXSW_SP_L3_PROTO_IPV4: + parms4 = mlxsw_sp_ipip_netdev_parms4(ol_dev); + return mlxsw_sp_ipip_parms4_daddr(&parms4); + case MLXSW_SP_L3_PROTO_IPV6: + parms6 = mlxsw_sp_ipip_netdev_parms6(ol_dev); + return mlxsw_sp_ipip_parms6_daddr(&parms6); + } + + WARN_ON(1); + return (union mlxsw_sp_l3addr) {0}; +} + +bool mlxsw_sp_l3addr_is_zero(union mlxsw_sp_l3addr addr) +{ + union mlxsw_sp_l3addr naddr = {0}; + + return !memcmp(&addr, &naddr, sizeof(naddr)); +} + +static struct mlxsw_sp_ipip_parms +mlxsw_sp_ipip_netdev_parms_init_gre4(const struct net_device *ol_dev) +{ + struct ip_tunnel_parm parms = mlxsw_sp_ipip_netdev_parms4(ol_dev); + + return (struct mlxsw_sp_ipip_parms) { + .proto = MLXSW_SP_L3_PROTO_IPV4, + .saddr = mlxsw_sp_ipip_parms4_saddr(&parms), + .daddr = mlxsw_sp_ipip_parms4_daddr(&parms), + .link = parms.link, + .ikey = mlxsw_sp_ipip_parms4_ikey(&parms), + .okey = mlxsw_sp_ipip_parms4_okey(&parms), + }; +} + +static int +mlxsw_sp_ipip_nexthop_update_gre4(struct mlxsw_sp *mlxsw_sp, u32 adj_index, + struct mlxsw_sp_ipip_entry *ipip_entry, + bool force, char *ratr_pl) +{ + u16 rif_index = mlxsw_sp_ipip_lb_rif_index(ipip_entry->ol_lb); + __be32 daddr4 = mlxsw_sp_ipip_netdev_daddr4(ipip_entry->ol_dev); + enum mlxsw_reg_ratr_op op; + + op = force ? MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY : + MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY_ON_ACTIVITY; + mlxsw_reg_ratr_pack(ratr_pl, op, true, MLXSW_REG_RATR_TYPE_IPIP, + adj_index, rif_index); + mlxsw_reg_ratr_ipip4_entry_pack(ratr_pl, be32_to_cpu(daddr4)); + + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ratr), ratr_pl); +} + +static int +mlxsw_sp_ipip_decap_config_gre4(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_ipip_entry *ipip_entry, + u32 tunnel_index) +{ + u16 rif_index = mlxsw_sp_ipip_lb_rif_index(ipip_entry->ol_lb); + u16 ul_rif_id = mlxsw_sp_ipip_lb_ul_rif_id(ipip_entry->ol_lb); + char rtdp_pl[MLXSW_REG_RTDP_LEN]; + struct ip_tunnel_parm parms; + unsigned int type_check; + bool has_ikey; + u32 daddr4; + u32 ikey; + + parms = mlxsw_sp_ipip_netdev_parms4(ipip_entry->ol_dev); + has_ikey = mlxsw_sp_ipip_parms4_has_ikey(&parms); + ikey = mlxsw_sp_ipip_parms4_ikey(&parms); + + mlxsw_reg_rtdp_pack(rtdp_pl, MLXSW_REG_RTDP_TYPE_IPIP, tunnel_index); + mlxsw_reg_rtdp_egress_router_interface_set(rtdp_pl, ul_rif_id); + + type_check = has_ikey ? + MLXSW_REG_RTDP_IPIP_TYPE_CHECK_ALLOW_GRE_KEY : + MLXSW_REG_RTDP_IPIP_TYPE_CHECK_ALLOW_GRE; + + /* Linux demuxes tunnels based on packet SIP (which must match tunnel + * remote IP). Thus configure decap so that it filters out packets that + * are not IPv4 or have the wrong SIP. IPIP_DECAP_ERROR trap is + * generated for packets that fail this criterion. Linux then handles + * such packets in slow path and generates ICMP destination unreachable. + */ + daddr4 = be32_to_cpu(mlxsw_sp_ipip_netdev_daddr4(ipip_entry->ol_dev)); + mlxsw_reg_rtdp_ipip4_pack(rtdp_pl, rif_index, + MLXSW_REG_RTDP_IPIP_SIP_CHECK_FILTER_IPV4, + type_check, has_ikey, daddr4, ikey); + + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rtdp), rtdp_pl); +} + +static bool mlxsw_sp_ipip_tunnel_complete(enum mlxsw_sp_l3proto proto, + const struct net_device *ol_dev) +{ + union mlxsw_sp_l3addr saddr = mlxsw_sp_ipip_netdev_saddr(proto, ol_dev); + union mlxsw_sp_l3addr daddr = mlxsw_sp_ipip_netdev_daddr(proto, ol_dev); + + /* Tunnels with unset local or remote address are valid in Linux and + * used for lightweight tunnels (LWT) and Non-Broadcast Multi-Access + * (NBMA) tunnels. In principle these can be offloaded, but the driver + * currently doesn't support this. So punt. + */ + return !mlxsw_sp_l3addr_is_zero(saddr) && + !mlxsw_sp_l3addr_is_zero(daddr); +} + +static bool mlxsw_sp_ipip_can_offload_gre4(const struct mlxsw_sp *mlxsw_sp, + const struct net_device *ol_dev) +{ + struct ip_tunnel *tunnel = netdev_priv(ol_dev); + __be16 okflags = TUNNEL_KEY; /* We can't offload any other features. */ + bool inherit_ttl = tunnel->parms.iph.ttl == 0; + bool inherit_tos = tunnel->parms.iph.tos & 0x1; + + return (tunnel->parms.i_flags & ~okflags) == 0 && + (tunnel->parms.o_flags & ~okflags) == 0 && + inherit_ttl && inherit_tos && + mlxsw_sp_ipip_tunnel_complete(MLXSW_SP_L3_PROTO_IPV4, ol_dev); +} + +static struct mlxsw_sp_rif_ipip_lb_config +mlxsw_sp_ipip_ol_loopback_config_gre4(struct mlxsw_sp *mlxsw_sp, + const struct net_device *ol_dev) +{ + struct ip_tunnel_parm parms = mlxsw_sp_ipip_netdev_parms4(ol_dev); + enum mlxsw_reg_ritr_loopback_ipip_type lb_ipipt; + + lb_ipipt = mlxsw_sp_ipip_parms4_has_okey(&parms) ? + MLXSW_REG_RITR_LOOPBACK_IPIP_TYPE_IP_IN_GRE_KEY_IN_IP : + MLXSW_REG_RITR_LOOPBACK_IPIP_TYPE_IP_IN_GRE_IN_IP; + return (struct mlxsw_sp_rif_ipip_lb_config){ + .lb_ipipt = lb_ipipt, + .okey = mlxsw_sp_ipip_parms4_okey(&parms), + .ul_protocol = MLXSW_SP_L3_PROTO_IPV4, + .saddr = mlxsw_sp_ipip_netdev_saddr(MLXSW_SP_L3_PROTO_IPV4, + ol_dev), + }; +} + +static int +mlxsw_sp_ipip_ol_netdev_change_gre(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_ipip_entry *ipip_entry, + const struct mlxsw_sp_ipip_parms *new_parms, + struct netlink_ext_ack *extack) +{ + const struct mlxsw_sp_ipip_parms *old_parms = &ipip_entry->parms; + bool update_tunnel = false; + bool update_decap = false; + bool update_nhs = false; + int err = 0; + + if (!mlxsw_sp_l3addr_eq(&new_parms->saddr, &old_parms->saddr)) { + u16 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ipip_entry->ol_dev); + + /* Since the local address has changed, if there is another + * tunnel with a matching saddr, both need to be demoted. + */ + if (mlxsw_sp_ipip_demote_tunnel_by_saddr(mlxsw_sp, + new_parms->proto, + new_parms->saddr, + ul_tb_id, + ipip_entry)) { + mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry); + return 0; + } + + update_tunnel = true; + } else if (old_parms->okey != new_parms->okey || + old_parms->link != new_parms->link) { + update_tunnel = true; + } else if (!mlxsw_sp_l3addr_eq(&new_parms->daddr, &old_parms->daddr)) { + update_nhs = true; + } else if (old_parms->ikey != new_parms->ikey) { + update_decap = true; + } + + if (update_tunnel) + err = __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry, + true, true, true, + extack); + else if (update_nhs) + err = __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry, + false, false, true, + extack); + else if (update_decap) + err = __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry, + false, false, false, + extack); + if (err) + return err; + + ipip_entry->parms = *new_parms; + return 0; +} + +static int +mlxsw_sp_ipip_ol_netdev_change_gre4(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_ipip_entry *ipip_entry, + struct netlink_ext_ack *extack) +{ + struct mlxsw_sp_ipip_parms new_parms; + + new_parms = mlxsw_sp_ipip_netdev_parms_init_gre4(ipip_entry->ol_dev); + return mlxsw_sp_ipip_ol_netdev_change_gre(mlxsw_sp, ipip_entry, + &new_parms, extack); +} + +static int +mlxsw_sp_ipip_rem_addr_set_gre4(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_ipip_entry *ipip_entry) +{ + return 0; +} + +static void +mlxsw_sp_ipip_rem_addr_unset_gre4(struct mlxsw_sp *mlxsw_sp, + const struct mlxsw_sp_ipip_entry *ipip_entry) +{ +} + +static const struct mlxsw_sp_ipip_ops mlxsw_sp_ipip_gre4_ops = { + .dev_type = ARPHRD_IPGRE, + .ul_proto = MLXSW_SP_L3_PROTO_IPV4, + .inc_parsing_depth = false, + .parms_init = mlxsw_sp_ipip_netdev_parms_init_gre4, + .nexthop_update = mlxsw_sp_ipip_nexthop_update_gre4, + .decap_config = mlxsw_sp_ipip_decap_config_gre4, + .can_offload = mlxsw_sp_ipip_can_offload_gre4, + .ol_loopback_config = mlxsw_sp_ipip_ol_loopback_config_gre4, + .ol_netdev_change = mlxsw_sp_ipip_ol_netdev_change_gre4, + .rem_ip_addr_set = mlxsw_sp_ipip_rem_addr_set_gre4, + .rem_ip_addr_unset = mlxsw_sp_ipip_rem_addr_unset_gre4, +}; + +static struct mlxsw_sp_ipip_parms +mlxsw_sp1_ipip_netdev_parms_init_gre6(const struct net_device *ol_dev) +{ + struct mlxsw_sp_ipip_parms parms = {0}; + + WARN_ON_ONCE(1); + return parms; +} + +static int +mlxsw_sp1_ipip_nexthop_update_gre6(struct mlxsw_sp *mlxsw_sp, u32 adj_index, + struct mlxsw_sp_ipip_entry *ipip_entry, + bool force, char *ratr_pl) +{ + WARN_ON_ONCE(1); + return -EINVAL; +} + +static int +mlxsw_sp1_ipip_decap_config_gre6(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_ipip_entry *ipip_entry, + u32 tunnel_index) +{ + WARN_ON_ONCE(1); + return -EINVAL; +} + +static bool mlxsw_sp1_ipip_can_offload_gre6(const struct mlxsw_sp *mlxsw_sp, + const struct net_device *ol_dev) +{ + return false; +} + +static struct mlxsw_sp_rif_ipip_lb_config +mlxsw_sp1_ipip_ol_loopback_config_gre6(struct mlxsw_sp *mlxsw_sp, + const struct net_device *ol_dev) +{ + struct mlxsw_sp_rif_ipip_lb_config config = {0}; + + WARN_ON_ONCE(1); + return config; +} + +static int +mlxsw_sp1_ipip_ol_netdev_change_gre6(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_ipip_entry *ipip_entry, + struct netlink_ext_ack *extack) +{ + WARN_ON_ONCE(1); + return -EINVAL; +} + +static int +mlxsw_sp1_ipip_rem_addr_set_gre6(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_ipip_entry *ipip_entry) +{ + WARN_ON_ONCE(1); + return -EINVAL; +} + +static void +mlxsw_sp1_ipip_rem_addr_unset_gre6(struct mlxsw_sp *mlxsw_sp, + const struct mlxsw_sp_ipip_entry *ipip_entry) +{ + WARN_ON_ONCE(1); +} + +static const struct mlxsw_sp_ipip_ops mlxsw_sp1_ipip_gre6_ops = { + .dev_type = ARPHRD_IP6GRE, + .ul_proto = MLXSW_SP_L3_PROTO_IPV6, + .inc_parsing_depth = true, + .parms_init = mlxsw_sp1_ipip_netdev_parms_init_gre6, + .nexthop_update = mlxsw_sp1_ipip_nexthop_update_gre6, + .decap_config = mlxsw_sp1_ipip_decap_config_gre6, + .can_offload = mlxsw_sp1_ipip_can_offload_gre6, + .ol_loopback_config = mlxsw_sp1_ipip_ol_loopback_config_gre6, + .ol_netdev_change = mlxsw_sp1_ipip_ol_netdev_change_gre6, + .rem_ip_addr_set = mlxsw_sp1_ipip_rem_addr_set_gre6, + .rem_ip_addr_unset = mlxsw_sp1_ipip_rem_addr_unset_gre6, +}; + +const struct mlxsw_sp_ipip_ops *mlxsw_sp1_ipip_ops_arr[] = { + [MLXSW_SP_IPIP_TYPE_GRE4] = &mlxsw_sp_ipip_gre4_ops, + [MLXSW_SP_IPIP_TYPE_GRE6] = &mlxsw_sp1_ipip_gre6_ops, +}; + +static struct mlxsw_sp_ipip_parms +mlxsw_sp2_ipip_netdev_parms_init_gre6(const struct net_device *ol_dev) +{ + struct __ip6_tnl_parm parms = mlxsw_sp_ipip_netdev_parms6(ol_dev); + + return (struct mlxsw_sp_ipip_parms) { + .proto = MLXSW_SP_L3_PROTO_IPV6, + .saddr = mlxsw_sp_ipip_parms6_saddr(&parms), + .daddr = mlxsw_sp_ipip_parms6_daddr(&parms), + .link = parms.link, + .ikey = mlxsw_sp_ipip_parms6_ikey(&parms), + .okey = mlxsw_sp_ipip_parms6_okey(&parms), + }; +} + +static int +mlxsw_sp2_ipip_nexthop_update_gre6(struct mlxsw_sp *mlxsw_sp, u32 adj_index, + struct mlxsw_sp_ipip_entry *ipip_entry, + bool force, char *ratr_pl) +{ + u16 rif_index = mlxsw_sp_ipip_lb_rif_index(ipip_entry->ol_lb); + enum mlxsw_reg_ratr_op op; + + op = force ? MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY : + MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY_ON_ACTIVITY; + mlxsw_reg_ratr_pack(ratr_pl, op, true, MLXSW_REG_RATR_TYPE_IPIP, + adj_index, rif_index); + mlxsw_reg_ratr_ipip6_entry_pack(ratr_pl, + ipip_entry->dip_kvdl_index); + + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ratr), ratr_pl); +} + +static int +mlxsw_sp2_ipip_decap_config_gre6(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_ipip_entry *ipip_entry, + u32 tunnel_index) +{ + u16 rif_index = mlxsw_sp_ipip_lb_rif_index(ipip_entry->ol_lb); + u16 ul_rif_id = mlxsw_sp_ipip_lb_ul_rif_id(ipip_entry->ol_lb); + char rtdp_pl[MLXSW_REG_RTDP_LEN]; + struct __ip6_tnl_parm parms; + unsigned int type_check; + bool has_ikey; + u32 ikey; + + parms = mlxsw_sp_ipip_netdev_parms6(ipip_entry->ol_dev); + has_ikey = mlxsw_sp_ipip_parms6_has_ikey(&parms); + ikey = mlxsw_sp_ipip_parms6_ikey(&parms); + + mlxsw_reg_rtdp_pack(rtdp_pl, MLXSW_REG_RTDP_TYPE_IPIP, tunnel_index); + mlxsw_reg_rtdp_egress_router_interface_set(rtdp_pl, ul_rif_id); + + type_check = has_ikey ? + MLXSW_REG_RTDP_IPIP_TYPE_CHECK_ALLOW_GRE_KEY : + MLXSW_REG_RTDP_IPIP_TYPE_CHECK_ALLOW_GRE; + + /* Linux demuxes tunnels based on packet SIP (which must match tunnel + * remote IP). Thus configure decap so that it filters out packets that + * are not IPv6 or have the wrong SIP. IPIP_DECAP_ERROR trap is + * generated for packets that fail this criterion. Linux then handles + * such packets in slow path and generates ICMP destination unreachable. + */ + mlxsw_reg_rtdp_ipip6_pack(rtdp_pl, rif_index, + MLXSW_REG_RTDP_IPIP_SIP_CHECK_FILTER_IPV6, + type_check, has_ikey, + ipip_entry->dip_kvdl_index, ikey); + + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rtdp), rtdp_pl); +} + +static bool mlxsw_sp2_ipip_can_offload_gre6(const struct mlxsw_sp *mlxsw_sp, + const struct net_device *ol_dev) +{ + struct __ip6_tnl_parm tparm = mlxsw_sp_ipip_netdev_parms6(ol_dev); + bool inherit_tos = tparm.flags & IP6_TNL_F_USE_ORIG_TCLASS; + bool inherit_ttl = tparm.hop_limit == 0; + __be16 okflags = TUNNEL_KEY; /* We can't offload any other features. */ + + return (tparm.i_flags & ~okflags) == 0 && + (tparm.o_flags & ~okflags) == 0 && + inherit_ttl && inherit_tos && + mlxsw_sp_ipip_tunnel_complete(MLXSW_SP_L3_PROTO_IPV6, ol_dev); +} + +static struct mlxsw_sp_rif_ipip_lb_config +mlxsw_sp2_ipip_ol_loopback_config_gre6(struct mlxsw_sp *mlxsw_sp, + const struct net_device *ol_dev) +{ + struct __ip6_tnl_parm parms = mlxsw_sp_ipip_netdev_parms6(ol_dev); + enum mlxsw_reg_ritr_loopback_ipip_type lb_ipipt; + + lb_ipipt = mlxsw_sp_ipip_parms6_has_okey(&parms) ? + MLXSW_REG_RITR_LOOPBACK_IPIP_TYPE_IP_IN_GRE_KEY_IN_IP : + MLXSW_REG_RITR_LOOPBACK_IPIP_TYPE_IP_IN_GRE_IN_IP; + return (struct mlxsw_sp_rif_ipip_lb_config){ + .lb_ipipt = lb_ipipt, + .okey = mlxsw_sp_ipip_parms6_okey(&parms), + .ul_protocol = MLXSW_SP_L3_PROTO_IPV6, + .saddr = mlxsw_sp_ipip_netdev_saddr(MLXSW_SP_L3_PROTO_IPV6, + ol_dev), + }; +} + +static int +mlxsw_sp2_ipip_ol_netdev_change_gre6(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_ipip_entry *ipip_entry, + struct netlink_ext_ack *extack) +{ + struct mlxsw_sp_ipip_parms new_parms; + + new_parms = mlxsw_sp2_ipip_netdev_parms_init_gre6(ipip_entry->ol_dev); + return mlxsw_sp_ipip_ol_netdev_change_gre(mlxsw_sp, ipip_entry, + &new_parms, extack); +} + +static int +mlxsw_sp2_ipip_rem_addr_set_gre6(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_ipip_entry *ipip_entry) +{ + return mlxsw_sp_ipv6_addr_kvdl_index_get(mlxsw_sp, + &ipip_entry->parms.daddr.addr6, + &ipip_entry->dip_kvdl_index); +} + +static void +mlxsw_sp2_ipip_rem_addr_unset_gre6(struct mlxsw_sp *mlxsw_sp, + const struct mlxsw_sp_ipip_entry *ipip_entry) +{ + mlxsw_sp_ipv6_addr_put(mlxsw_sp, &ipip_entry->parms.daddr.addr6); +} + +static const struct mlxsw_sp_ipip_ops mlxsw_sp2_ipip_gre6_ops = { + .dev_type = ARPHRD_IP6GRE, + .ul_proto = MLXSW_SP_L3_PROTO_IPV6, + .inc_parsing_depth = true, + .parms_init = mlxsw_sp2_ipip_netdev_parms_init_gre6, + .nexthop_update = mlxsw_sp2_ipip_nexthop_update_gre6, + .decap_config = mlxsw_sp2_ipip_decap_config_gre6, + .can_offload = mlxsw_sp2_ipip_can_offload_gre6, + .ol_loopback_config = mlxsw_sp2_ipip_ol_loopback_config_gre6, + .ol_netdev_change = mlxsw_sp2_ipip_ol_netdev_change_gre6, + .rem_ip_addr_set = mlxsw_sp2_ipip_rem_addr_set_gre6, + .rem_ip_addr_unset = mlxsw_sp2_ipip_rem_addr_unset_gre6, +}; + +const struct mlxsw_sp_ipip_ops *mlxsw_sp2_ipip_ops_arr[] = { + [MLXSW_SP_IPIP_TYPE_GRE4] = &mlxsw_sp_ipip_gre4_ops, + [MLXSW_SP_IPIP_TYPE_GRE6] = &mlxsw_sp2_ipip_gre6_ops, +}; + +static int mlxsw_sp_ipip_ecn_encap_init_one(struct mlxsw_sp *mlxsw_sp, + u8 inner_ecn, u8 outer_ecn) +{ + char tieem_pl[MLXSW_REG_TIEEM_LEN]; + + mlxsw_reg_tieem_pack(tieem_pl, inner_ecn, outer_ecn); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(tieem), tieem_pl); +} + +int mlxsw_sp_ipip_ecn_encap_init(struct mlxsw_sp *mlxsw_sp) +{ + int i; + + /* Iterate over inner ECN values */ + for (i = INET_ECN_NOT_ECT; i <= INET_ECN_CE; i++) { + u8 outer_ecn = INET_ECN_encapsulate(0, i); + int err; + + err = mlxsw_sp_ipip_ecn_encap_init_one(mlxsw_sp, i, outer_ecn); + if (err) + return err; + } + + return 0; +} + +static int mlxsw_sp_ipip_ecn_decap_init_one(struct mlxsw_sp *mlxsw_sp, + u8 inner_ecn, u8 outer_ecn) +{ + char tidem_pl[MLXSW_REG_TIDEM_LEN]; + u8 new_inner_ecn; + bool trap_en; + + new_inner_ecn = mlxsw_sp_tunnel_ecn_decap(outer_ecn, inner_ecn, + &trap_en); + mlxsw_reg_tidem_pack(tidem_pl, outer_ecn, inner_ecn, new_inner_ecn, + trap_en, trap_en ? MLXSW_TRAP_ID_DECAP_ECN0 : 0); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(tidem), tidem_pl); +} + +int mlxsw_sp_ipip_ecn_decap_init(struct mlxsw_sp *mlxsw_sp) +{ + int i, j, err; + + /* Iterate over inner ECN values */ + for (i = INET_ECN_NOT_ECT; i <= INET_ECN_CE; i++) { + /* Iterate over outer ECN values */ + for (j = INET_ECN_NOT_ECT; j <= INET_ECN_CE; j++) { + err = mlxsw_sp_ipip_ecn_decap_init_one(mlxsw_sp, i, j); + if (err) + return err; + } + } + + return 0; +} + +struct net_device * +mlxsw_sp_ipip_netdev_ul_dev_get(const struct net_device *ol_dev) +{ + struct net *net = dev_net(ol_dev); + struct ip_tunnel *tun4; + struct ip6_tnl *tun6; + + switch (ol_dev->type) { + case ARPHRD_IPGRE: + tun4 = netdev_priv(ol_dev); + return dev_get_by_index_rcu(net, tun4->parms.link); + case ARPHRD_IP6GRE: + tun6 = netdev_priv(ol_dev); + return dev_get_by_index_rcu(net, tun6->parms.link); + default: + return NULL; + } +} diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h new file mode 100644 index 000000000..8cc259dcc --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h @@ -0,0 +1,84 @@ +/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 */ +/* Copyright (c) 2017-2018 Mellanox Technologies. All rights reserved */ + +#ifndef _MLXSW_IPIP_H_ +#define _MLXSW_IPIP_H_ + +#include "spectrum_router.h" +#include <net/ip_fib.h> +#include <linux/if_tunnel.h> +#include <net/ip6_tunnel.h> + +struct ip_tunnel_parm +mlxsw_sp_ipip_netdev_parms4(const struct net_device *ol_dev); +struct __ip6_tnl_parm +mlxsw_sp_ipip_netdev_parms6(const struct net_device *ol_dev); + +union mlxsw_sp_l3addr +mlxsw_sp_ipip_netdev_saddr(enum mlxsw_sp_l3proto proto, + const struct net_device *ol_dev); + +bool mlxsw_sp_l3addr_is_zero(union mlxsw_sp_l3addr addr); + +enum mlxsw_sp_ipip_type { + MLXSW_SP_IPIP_TYPE_GRE4, + MLXSW_SP_IPIP_TYPE_GRE6, + MLXSW_SP_IPIP_TYPE_MAX, +}; + +struct mlxsw_sp_ipip_parms { + enum mlxsw_sp_l3proto proto; + union mlxsw_sp_l3addr saddr; + union mlxsw_sp_l3addr daddr; + int link; + u32 ikey; + u32 okey; +}; + +struct mlxsw_sp_ipip_entry { + enum mlxsw_sp_ipip_type ipipt; + struct net_device *ol_dev; /* Overlay. */ + struct mlxsw_sp_rif_ipip_lb *ol_lb; + struct mlxsw_sp_fib_entry *decap_fib_entry; + struct list_head ipip_list_node; + struct mlxsw_sp_ipip_parms parms; + u32 dip_kvdl_index; +}; + +struct mlxsw_sp_ipip_ops { + int dev_type; + enum mlxsw_sp_l3proto ul_proto; /* Underlay. */ + bool inc_parsing_depth; + + struct mlxsw_sp_ipip_parms + (*parms_init)(const struct net_device *ol_dev); + + int (*nexthop_update)(struct mlxsw_sp *mlxsw_sp, u32 adj_index, + struct mlxsw_sp_ipip_entry *ipip_entry, + bool force, char *ratr_pl); + + bool (*can_offload)(const struct mlxsw_sp *mlxsw_sp, + const struct net_device *ol_dev); + + /* Return a configuration for creating an overlay loopback RIF. */ + struct mlxsw_sp_rif_ipip_lb_config + (*ol_loopback_config)(struct mlxsw_sp *mlxsw_sp, + const struct net_device *ol_dev); + + int (*decap_config)(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_ipip_entry *ipip_entry, + u32 tunnel_index); + + int (*ol_netdev_change)(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_ipip_entry *ipip_entry, + struct netlink_ext_ack *extack); + int (*rem_ip_addr_set)(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_ipip_entry *ipip_entry); + void (*rem_ip_addr_unset)(struct mlxsw_sp *mlxsw_sp, + const struct mlxsw_sp_ipip_entry *ipip_entry); +}; + +extern const struct mlxsw_sp_ipip_ops *mlxsw_sp1_ipip_ops_arr[]; +extern const struct mlxsw_sp_ipip_ops *mlxsw_sp2_ipip_ops_arr[]; + +#endif /* _MLXSW_IPIP_H_*/ diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c new file mode 100644 index 000000000..20d72f1c0 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c @@ -0,0 +1,87 @@ +// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 +/* Copyright (c) 2016-2018 Mellanox Technologies. All rights reserved */ + +#include <linux/kernel.h> +#include <linux/mutex.h> +#include <linux/slab.h> + +#include "spectrum.h" + +struct mlxsw_sp_kvdl { + const struct mlxsw_sp_kvdl_ops *kvdl_ops; + struct mutex kvdl_lock; /* Protects kvdl allocations */ + unsigned long priv[]; + /* priv has to be always the last item */ +}; + +int mlxsw_sp_kvdl_init(struct mlxsw_sp *mlxsw_sp) +{ + const struct mlxsw_sp_kvdl_ops *kvdl_ops = mlxsw_sp->kvdl_ops; + struct mlxsw_sp_kvdl *kvdl; + int err; + + kvdl = kzalloc(sizeof(*mlxsw_sp->kvdl) + kvdl_ops->priv_size, + GFP_KERNEL); + if (!kvdl) + return -ENOMEM; + mutex_init(&kvdl->kvdl_lock); + kvdl->kvdl_ops = kvdl_ops; + mlxsw_sp->kvdl = kvdl; + + err = kvdl_ops->init(mlxsw_sp, kvdl->priv); + if (err) + goto err_init; + return 0; + +err_init: + mutex_destroy(&kvdl->kvdl_lock); + kfree(kvdl); + return err; +} + +void mlxsw_sp_kvdl_fini(struct mlxsw_sp *mlxsw_sp) +{ + struct mlxsw_sp_kvdl *kvdl = mlxsw_sp->kvdl; + + kvdl->kvdl_ops->fini(mlxsw_sp, kvdl->priv); + mutex_destroy(&kvdl->kvdl_lock); + kfree(kvdl); +} + +int mlxsw_sp_kvdl_alloc(struct mlxsw_sp *mlxsw_sp, + enum mlxsw_sp_kvdl_entry_type type, + unsigned int entry_count, u32 *p_entry_index) +{ + struct mlxsw_sp_kvdl *kvdl = mlxsw_sp->kvdl; + int err; + + mutex_lock(&kvdl->kvdl_lock); + err = kvdl->kvdl_ops->alloc(mlxsw_sp, kvdl->priv, type, + entry_count, p_entry_index); + mutex_unlock(&kvdl->kvdl_lock); + + return err; +} + +void mlxsw_sp_kvdl_free(struct mlxsw_sp *mlxsw_sp, + enum mlxsw_sp_kvdl_entry_type type, + unsigned int entry_count, int entry_index) +{ + struct mlxsw_sp_kvdl *kvdl = mlxsw_sp->kvdl; + + mutex_lock(&kvdl->kvdl_lock); + kvdl->kvdl_ops->free(mlxsw_sp, kvdl->priv, type, + entry_count, entry_index); + mutex_unlock(&kvdl->kvdl_lock); +} + +int mlxsw_sp_kvdl_alloc_count_query(struct mlxsw_sp *mlxsw_sp, + enum mlxsw_sp_kvdl_entry_type type, + unsigned int entry_count, + unsigned int *p_alloc_count) +{ + struct mlxsw_sp_kvdl *kvdl = mlxsw_sp->kvdl; + + return kvdl->kvdl_ops->alloc_size_query(mlxsw_sp, kvdl->priv, type, + entry_count, p_alloc_count); +} diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_matchall.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_matchall.c new file mode 100644 index 000000000..07b371cd9 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_matchall.c @@ -0,0 +1,478 @@ +// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 +/* Copyright (c) 2017-2020 Mellanox Technologies. All rights reserved */ + +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/netdevice.h> +#include <net/flow_offload.h> + +#include "spectrum.h" +#include "spectrum_span.h" +#include "reg.h" + +static struct mlxsw_sp_mall_entry * +mlxsw_sp_mall_entry_find(struct mlxsw_sp_flow_block *block, unsigned long cookie) +{ + struct mlxsw_sp_mall_entry *mall_entry; + + list_for_each_entry(mall_entry, &block->mall.list, list) + if (mall_entry->cookie == cookie) + return mall_entry; + + return NULL; +} + +static int +mlxsw_sp_mall_port_mirror_add(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_mall_entry *mall_entry, + struct netlink_ext_ack *extack) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + struct mlxsw_sp_span_agent_parms agent_parms = {}; + struct mlxsw_sp_span_trigger_parms parms; + enum mlxsw_sp_span_trigger trigger; + int err; + + if (!mall_entry->mirror.to_dev) { + NL_SET_ERR_MSG(extack, "Could not find requested device"); + return -EINVAL; + } + + agent_parms.to_dev = mall_entry->mirror.to_dev; + err = mlxsw_sp_span_agent_get(mlxsw_sp, &mall_entry->mirror.span_id, + &agent_parms); + if (err) { + NL_SET_ERR_MSG(extack, "Failed to get SPAN agent"); + return err; + } + + err = mlxsw_sp_span_analyzed_port_get(mlxsw_sp_port, + mall_entry->ingress); + if (err) { + NL_SET_ERR_MSG(extack, "Failed to get analyzed port"); + goto err_analyzed_port_get; + } + + trigger = mall_entry->ingress ? MLXSW_SP_SPAN_TRIGGER_INGRESS : + MLXSW_SP_SPAN_TRIGGER_EGRESS; + parms.span_id = mall_entry->mirror.span_id; + parms.probability_rate = 1; + err = mlxsw_sp_span_agent_bind(mlxsw_sp, trigger, mlxsw_sp_port, + &parms); + if (err) { + NL_SET_ERR_MSG(extack, "Failed to bind SPAN agent"); + goto err_agent_bind; + } + + return 0; + +err_agent_bind: + mlxsw_sp_span_analyzed_port_put(mlxsw_sp_port, mall_entry->ingress); +err_analyzed_port_get: + mlxsw_sp_span_agent_put(mlxsw_sp, mall_entry->mirror.span_id); + return err; +} + +static void +mlxsw_sp_mall_port_mirror_del(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_mall_entry *mall_entry) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + struct mlxsw_sp_span_trigger_parms parms; + enum mlxsw_sp_span_trigger trigger; + + trigger = mall_entry->ingress ? MLXSW_SP_SPAN_TRIGGER_INGRESS : + MLXSW_SP_SPAN_TRIGGER_EGRESS; + parms.span_id = mall_entry->mirror.span_id; + mlxsw_sp_span_agent_unbind(mlxsw_sp, trigger, mlxsw_sp_port, &parms); + mlxsw_sp_span_analyzed_port_put(mlxsw_sp_port, mall_entry->ingress); + mlxsw_sp_span_agent_put(mlxsw_sp, mall_entry->mirror.span_id); +} + +static int mlxsw_sp_mall_port_sample_set(struct mlxsw_sp_port *mlxsw_sp_port, + bool enable, u32 rate) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + char mpsc_pl[MLXSW_REG_MPSC_LEN]; + + mlxsw_reg_mpsc_pack(mpsc_pl, mlxsw_sp_port->local_port, enable, rate); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mpsc), mpsc_pl); +} + +static int +mlxsw_sp_mall_port_sample_add(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_mall_entry *mall_entry, + struct netlink_ext_ack *extack) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + struct mlxsw_sp_sample_trigger trigger; + int err; + + if (mall_entry->ingress) + trigger.type = MLXSW_SP_SAMPLE_TRIGGER_TYPE_INGRESS; + else + trigger.type = MLXSW_SP_SAMPLE_TRIGGER_TYPE_EGRESS; + trigger.local_port = mlxsw_sp_port->local_port; + err = mlxsw_sp_sample_trigger_params_set(mlxsw_sp, &trigger, + &mall_entry->sample.params, + extack); + if (err) + return err; + + err = mlxsw_sp->mall_ops->sample_add(mlxsw_sp, mlxsw_sp_port, + mall_entry, extack); + if (err) + goto err_port_sample_set; + return 0; + +err_port_sample_set: + mlxsw_sp_sample_trigger_params_unset(mlxsw_sp, &trigger); + return err; +} + +static void +mlxsw_sp_mall_port_sample_del(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_mall_entry *mall_entry) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + struct mlxsw_sp_sample_trigger trigger; + + if (mall_entry->ingress) + trigger.type = MLXSW_SP_SAMPLE_TRIGGER_TYPE_INGRESS; + else + trigger.type = MLXSW_SP_SAMPLE_TRIGGER_TYPE_EGRESS; + trigger.local_port = mlxsw_sp_port->local_port; + + mlxsw_sp->mall_ops->sample_del(mlxsw_sp, mlxsw_sp_port, mall_entry); + mlxsw_sp_sample_trigger_params_unset(mlxsw_sp, &trigger); +} + +static int +mlxsw_sp_mall_port_rule_add(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_mall_entry *mall_entry, + struct netlink_ext_ack *extack) +{ + switch (mall_entry->type) { + case MLXSW_SP_MALL_ACTION_TYPE_MIRROR: + return mlxsw_sp_mall_port_mirror_add(mlxsw_sp_port, mall_entry, + extack); + case MLXSW_SP_MALL_ACTION_TYPE_SAMPLE: + return mlxsw_sp_mall_port_sample_add(mlxsw_sp_port, mall_entry, + extack); + default: + WARN_ON(1); + return -EINVAL; + } +} + +static void +mlxsw_sp_mall_port_rule_del(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_mall_entry *mall_entry) +{ + switch (mall_entry->type) { + case MLXSW_SP_MALL_ACTION_TYPE_MIRROR: + mlxsw_sp_mall_port_mirror_del(mlxsw_sp_port, mall_entry); + break; + case MLXSW_SP_MALL_ACTION_TYPE_SAMPLE: + mlxsw_sp_mall_port_sample_del(mlxsw_sp_port, mall_entry); + break; + default: + WARN_ON(1); + } +} + +static void mlxsw_sp_mall_prio_update(struct mlxsw_sp_flow_block *block) +{ + struct mlxsw_sp_mall_entry *mall_entry; + + if (list_empty(&block->mall.list)) + return; + block->mall.min_prio = UINT_MAX; + block->mall.max_prio = 0; + list_for_each_entry(mall_entry, &block->mall.list, list) { + if (mall_entry->priority < block->mall.min_prio) + block->mall.min_prio = mall_entry->priority; + if (mall_entry->priority > block->mall.max_prio) + block->mall.max_prio = mall_entry->priority; + } +} + +int mlxsw_sp_mall_replace(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_flow_block *block, + struct tc_cls_matchall_offload *f) +{ + struct mlxsw_sp_flow_block_binding *binding; + struct mlxsw_sp_mall_entry *mall_entry; + __be16 protocol = f->common.protocol; + struct flow_action_entry *act; + unsigned int flower_min_prio; + unsigned int flower_max_prio; + bool flower_prio_valid; + int err; + + if (!flow_offload_has_one_action(&f->rule->action)) { + NL_SET_ERR_MSG(f->common.extack, "Only singular actions are supported"); + return -EOPNOTSUPP; + } + + if (f->common.chain_index) { + NL_SET_ERR_MSG(f->common.extack, "Only chain 0 is supported"); + return -EOPNOTSUPP; + } + + if (mlxsw_sp_flow_block_is_mixed_bound(block)) { + NL_SET_ERR_MSG(f->common.extack, "Only not mixed bound blocks are supported"); + return -EOPNOTSUPP; + } + + err = mlxsw_sp_flower_prio_get(mlxsw_sp, block, f->common.chain_index, + &flower_min_prio, &flower_max_prio); + if (err) { + if (err != -ENOENT) { + NL_SET_ERR_MSG(f->common.extack, "Failed to get flower priorities"); + return err; + } + flower_prio_valid = false; + /* No flower filters are installed in specified chain. */ + } else { + flower_prio_valid = true; + } + + if (protocol != htons(ETH_P_ALL)) { + NL_SET_ERR_MSG(f->common.extack, "matchall rules only supported with 'all' protocol"); + return -EOPNOTSUPP; + } + + mall_entry = kzalloc(sizeof(*mall_entry), GFP_KERNEL); + if (!mall_entry) + return -ENOMEM; + mall_entry->cookie = f->cookie; + mall_entry->priority = f->common.prio; + mall_entry->ingress = mlxsw_sp_flow_block_is_ingress_bound(block); + + if (flower_prio_valid && mall_entry->ingress && + mall_entry->priority >= flower_min_prio) { + NL_SET_ERR_MSG(f->common.extack, "Failed to add behind existing flower rules"); + err = -EOPNOTSUPP; + goto errout; + } + if (flower_prio_valid && !mall_entry->ingress && + mall_entry->priority <= flower_max_prio) { + NL_SET_ERR_MSG(f->common.extack, "Failed to add in front of existing flower rules"); + err = -EOPNOTSUPP; + goto errout; + } + + act = &f->rule->action.entries[0]; + + switch (act->id) { + case FLOW_ACTION_MIRRED: + mall_entry->type = MLXSW_SP_MALL_ACTION_TYPE_MIRROR; + mall_entry->mirror.to_dev = act->dev; + break; + case FLOW_ACTION_SAMPLE: + mall_entry->type = MLXSW_SP_MALL_ACTION_TYPE_SAMPLE; + mall_entry->sample.params.psample_group = act->sample.psample_group; + mall_entry->sample.params.truncate = act->sample.truncate; + mall_entry->sample.params.trunc_size = act->sample.trunc_size; + mall_entry->sample.params.rate = act->sample.rate; + break; + default: + err = -EOPNOTSUPP; + goto errout; + } + + list_for_each_entry(binding, &block->binding_list, list) { + err = mlxsw_sp_mall_port_rule_add(binding->mlxsw_sp_port, + mall_entry, f->common.extack); + if (err) + goto rollback; + } + + block->rule_count++; + if (mall_entry->ingress) + block->egress_blocker_rule_count++; + else + block->ingress_blocker_rule_count++; + list_add_tail(&mall_entry->list, &block->mall.list); + mlxsw_sp_mall_prio_update(block); + return 0; + +rollback: + list_for_each_entry_continue_reverse(binding, &block->binding_list, + list) + mlxsw_sp_mall_port_rule_del(binding->mlxsw_sp_port, mall_entry); +errout: + kfree(mall_entry); + return err; +} + +void mlxsw_sp_mall_destroy(struct mlxsw_sp_flow_block *block, + struct tc_cls_matchall_offload *f) +{ + struct mlxsw_sp_flow_block_binding *binding; + struct mlxsw_sp_mall_entry *mall_entry; + + mall_entry = mlxsw_sp_mall_entry_find(block, f->cookie); + if (!mall_entry) { + NL_SET_ERR_MSG(f->common.extack, "Entry not found"); + return; + } + + list_del(&mall_entry->list); + if (mall_entry->ingress) + block->egress_blocker_rule_count--; + else + block->ingress_blocker_rule_count--; + block->rule_count--; + list_for_each_entry(binding, &block->binding_list, list) + mlxsw_sp_mall_port_rule_del(binding->mlxsw_sp_port, mall_entry); + kfree_rcu(mall_entry, rcu); /* sample RX packets may be in-flight */ + mlxsw_sp_mall_prio_update(block); +} + +int mlxsw_sp_mall_port_bind(struct mlxsw_sp_flow_block *block, + struct mlxsw_sp_port *mlxsw_sp_port, + struct netlink_ext_ack *extack) +{ + struct mlxsw_sp_mall_entry *mall_entry; + int err; + + list_for_each_entry(mall_entry, &block->mall.list, list) { + err = mlxsw_sp_mall_port_rule_add(mlxsw_sp_port, mall_entry, + extack); + if (err) + goto rollback; + } + return 0; + +rollback: + list_for_each_entry_continue_reverse(mall_entry, &block->mall.list, + list) + mlxsw_sp_mall_port_rule_del(mlxsw_sp_port, mall_entry); + return err; +} + +void mlxsw_sp_mall_port_unbind(struct mlxsw_sp_flow_block *block, + struct mlxsw_sp_port *mlxsw_sp_port) +{ + struct mlxsw_sp_mall_entry *mall_entry; + + list_for_each_entry(mall_entry, &block->mall.list, list) + mlxsw_sp_mall_port_rule_del(mlxsw_sp_port, mall_entry); +} + +int mlxsw_sp_mall_prio_get(struct mlxsw_sp_flow_block *block, u32 chain_index, + unsigned int *p_min_prio, unsigned int *p_max_prio) +{ + if (chain_index || list_empty(&block->mall.list)) + /* In case there are no matchall rules, the caller + * receives -ENOENT to indicate there is no need + * to check the priorities. + */ + return -ENOENT; + *p_min_prio = block->mall.min_prio; + *p_max_prio = block->mall.max_prio; + return 0; +} + +static int mlxsw_sp1_mall_sample_add(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_mall_entry *mall_entry, + struct netlink_ext_ack *extack) +{ + u32 rate = mall_entry->sample.params.rate; + + if (!mall_entry->ingress) { + NL_SET_ERR_MSG(extack, "Sampling is not supported on egress"); + return -EOPNOTSUPP; + } + + if (rate > MLXSW_REG_MPSC_RATE_MAX) { + NL_SET_ERR_MSG(extack, "Unsupported sampling rate"); + return -EOPNOTSUPP; + } + + return mlxsw_sp_mall_port_sample_set(mlxsw_sp_port, true, rate); +} + +static void mlxsw_sp1_mall_sample_del(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_mall_entry *mall_entry) +{ + mlxsw_sp_mall_port_sample_set(mlxsw_sp_port, false, 1); +} + +const struct mlxsw_sp_mall_ops mlxsw_sp1_mall_ops = { + .sample_add = mlxsw_sp1_mall_sample_add, + .sample_del = mlxsw_sp1_mall_sample_del, +}; + +static int mlxsw_sp2_mall_sample_add(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_mall_entry *mall_entry, + struct netlink_ext_ack *extack) +{ + struct mlxsw_sp_span_trigger_parms trigger_parms = {}; + struct mlxsw_sp_span_agent_parms agent_parms = { + .to_dev = NULL, /* Mirror to CPU. */ + .session_id = MLXSW_SP_SPAN_SESSION_ID_SAMPLING, + }; + u32 rate = mall_entry->sample.params.rate; + enum mlxsw_sp_span_trigger span_trigger; + int err; + + err = mlxsw_sp_span_agent_get(mlxsw_sp, &mall_entry->sample.span_id, + &agent_parms); + if (err) { + NL_SET_ERR_MSG(extack, "Failed to get SPAN agent"); + return err; + } + + err = mlxsw_sp_span_analyzed_port_get(mlxsw_sp_port, + mall_entry->ingress); + if (err) { + NL_SET_ERR_MSG(extack, "Failed to get analyzed port"); + goto err_analyzed_port_get; + } + + span_trigger = mall_entry->ingress ? MLXSW_SP_SPAN_TRIGGER_INGRESS : + MLXSW_SP_SPAN_TRIGGER_EGRESS; + trigger_parms.span_id = mall_entry->sample.span_id; + trigger_parms.probability_rate = rate; + err = mlxsw_sp_span_agent_bind(mlxsw_sp, span_trigger, mlxsw_sp_port, + &trigger_parms); + if (err) { + NL_SET_ERR_MSG(extack, "Failed to bind SPAN agent"); + goto err_agent_bind; + } + + return 0; + +err_agent_bind: + mlxsw_sp_span_analyzed_port_put(mlxsw_sp_port, mall_entry->ingress); +err_analyzed_port_get: + mlxsw_sp_span_agent_put(mlxsw_sp, mall_entry->sample.span_id); + return err; +} + +static void mlxsw_sp2_mall_sample_del(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_mall_entry *mall_entry) +{ + struct mlxsw_sp_span_trigger_parms trigger_parms = {}; + enum mlxsw_sp_span_trigger span_trigger; + + span_trigger = mall_entry->ingress ? MLXSW_SP_SPAN_TRIGGER_INGRESS : + MLXSW_SP_SPAN_TRIGGER_EGRESS; + trigger_parms.span_id = mall_entry->sample.span_id; + mlxsw_sp_span_agent_unbind(mlxsw_sp, span_trigger, mlxsw_sp_port, + &trigger_parms); + mlxsw_sp_span_analyzed_port_put(mlxsw_sp_port, mall_entry->ingress); + mlxsw_sp_span_agent_put(mlxsw_sp, mall_entry->sample.span_id); +} + +const struct mlxsw_sp_mall_ops mlxsw_sp2_mall_ops = { + .sample_add = mlxsw_sp2_mall_sample_add, + .sample_del = mlxsw_sp2_mall_sample_del, +}; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c new file mode 100644 index 000000000..1f6bc0c7e --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c @@ -0,0 +1,1075 @@ +// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 +/* Copyright (c) 2017-2018 Mellanox Technologies. All rights reserved */ + +#include <linux/mutex.h> +#include <linux/rhashtable.h> +#include <net/ipv6.h> + +#include "spectrum_mr.h" +#include "spectrum_router.h" + +struct mlxsw_sp_mr { + const struct mlxsw_sp_mr_ops *mr_ops; + void *catchall_route_priv; + struct delayed_work stats_update_dw; + struct list_head table_list; + struct mutex table_list_lock; /* Protects table_list */ +#define MLXSW_SP_MR_ROUTES_COUNTER_UPDATE_INTERVAL 5000 /* ms */ + unsigned long priv[]; + /* priv has to be always the last item */ +}; + +struct mlxsw_sp_mr_vif; +struct mlxsw_sp_mr_vif_ops { + bool (*is_regular)(const struct mlxsw_sp_mr_vif *vif); +}; + +struct mlxsw_sp_mr_vif { + struct net_device *dev; + const struct mlxsw_sp_rif *rif; + unsigned long vif_flags; + + /* A list of route_vif_entry structs that point to routes that the VIF + * instance is used as one of the egress VIFs + */ + struct list_head route_evif_list; + + /* A list of route_vif_entry structs that point to routes that the VIF + * instance is used as an ingress VIF + */ + struct list_head route_ivif_list; + + /* Protocol specific operations for a VIF */ + const struct mlxsw_sp_mr_vif_ops *ops; +}; + +struct mlxsw_sp_mr_route_vif_entry { + struct list_head vif_node; + struct list_head route_node; + struct mlxsw_sp_mr_vif *mr_vif; + struct mlxsw_sp_mr_route *mr_route; +}; + +struct mlxsw_sp_mr_table; +struct mlxsw_sp_mr_table_ops { + bool (*is_route_valid)(const struct mlxsw_sp_mr_table *mr_table, + const struct mr_mfc *mfc); + void (*key_create)(struct mlxsw_sp_mr_table *mr_table, + struct mlxsw_sp_mr_route_key *key, + struct mr_mfc *mfc); + bool (*is_route_starg)(const struct mlxsw_sp_mr_table *mr_table, + const struct mlxsw_sp_mr_route *mr_route); +}; + +struct mlxsw_sp_mr_table { + struct list_head node; + enum mlxsw_sp_l3proto proto; + struct mlxsw_sp *mlxsw_sp; + u32 vr_id; + struct mlxsw_sp_mr_vif vifs[MAXVIFS]; + struct list_head route_list; + struct mutex route_list_lock; /* Protects route_list */ + struct rhashtable route_ht; + const struct mlxsw_sp_mr_table_ops *ops; + char catchall_route_priv[]; + /* catchall_route_priv has to be always the last item */ +}; + +struct mlxsw_sp_mr_route { + struct list_head node; + struct rhash_head ht_node; + struct mlxsw_sp_mr_route_key key; + enum mlxsw_sp_mr_route_action route_action; + u16 min_mtu; + struct mr_mfc *mfc; + void *route_priv; + const struct mlxsw_sp_mr_table *mr_table; + /* A list of route_vif_entry structs that point to the egress VIFs */ + struct list_head evif_list; + /* A route_vif_entry struct that point to the ingress VIF */ + struct mlxsw_sp_mr_route_vif_entry ivif; +}; + +static const struct rhashtable_params mlxsw_sp_mr_route_ht_params = { + .key_len = sizeof(struct mlxsw_sp_mr_route_key), + .key_offset = offsetof(struct mlxsw_sp_mr_route, key), + .head_offset = offsetof(struct mlxsw_sp_mr_route, ht_node), + .automatic_shrinking = true, +}; + +static bool mlxsw_sp_mr_vif_valid(const struct mlxsw_sp_mr_vif *vif) +{ + return vif->ops->is_regular(vif) && vif->dev && vif->rif; +} + +static bool mlxsw_sp_mr_vif_exists(const struct mlxsw_sp_mr_vif *vif) +{ + return vif->dev; +} + +static bool +mlxsw_sp_mr_route_ivif_in_evifs(const struct mlxsw_sp_mr_route *mr_route) +{ + vifi_t ivif = mr_route->mfc->mfc_parent; + + return mr_route->mfc->mfc_un.res.ttls[ivif] != 255; +} + +static int +mlxsw_sp_mr_route_valid_evifs_num(const struct mlxsw_sp_mr_route *mr_route) +{ + struct mlxsw_sp_mr_route_vif_entry *rve; + int valid_evifs; + + valid_evifs = 0; + list_for_each_entry(rve, &mr_route->evif_list, route_node) + if (mlxsw_sp_mr_vif_valid(rve->mr_vif)) + valid_evifs++; + return valid_evifs; +} + +static enum mlxsw_sp_mr_route_action +mlxsw_sp_mr_route_action(const struct mlxsw_sp_mr_route *mr_route) +{ + struct mlxsw_sp_mr_route_vif_entry *rve; + + /* If the ingress port is not regular and resolved, trap the route */ + if (!mlxsw_sp_mr_vif_valid(mr_route->ivif.mr_vif)) + return MLXSW_SP_MR_ROUTE_ACTION_TRAP; + + /* The kernel does not match a (*,G) route that the ingress interface is + * not one of the egress interfaces, so trap these kind of routes. + */ + if (mr_route->mr_table->ops->is_route_starg(mr_route->mr_table, + mr_route) && + !mlxsw_sp_mr_route_ivif_in_evifs(mr_route)) + return MLXSW_SP_MR_ROUTE_ACTION_TRAP; + + /* If the route has no valid eVIFs, trap it. */ + if (!mlxsw_sp_mr_route_valid_evifs_num(mr_route)) + return MLXSW_SP_MR_ROUTE_ACTION_TRAP; + + /* If one of the eVIFs has no RIF, trap-and-forward the route as there + * is some more routing to do in software too. + */ + list_for_each_entry(rve, &mr_route->evif_list, route_node) + if (mlxsw_sp_mr_vif_exists(rve->mr_vif) && !rve->mr_vif->rif) + return MLXSW_SP_MR_ROUTE_ACTION_TRAP_AND_FORWARD; + + return MLXSW_SP_MR_ROUTE_ACTION_FORWARD; +} + +static enum mlxsw_sp_mr_route_prio +mlxsw_sp_mr_route_prio(const struct mlxsw_sp_mr_route *mr_route) +{ + return mr_route->mr_table->ops->is_route_starg(mr_route->mr_table, + mr_route) ? + MLXSW_SP_MR_ROUTE_PRIO_STARG : MLXSW_SP_MR_ROUTE_PRIO_SG; +} + +static int mlxsw_sp_mr_route_evif_link(struct mlxsw_sp_mr_route *mr_route, + struct mlxsw_sp_mr_vif *mr_vif) +{ + struct mlxsw_sp_mr_route_vif_entry *rve; + + rve = kzalloc(sizeof(*rve), GFP_KERNEL); + if (!rve) + return -ENOMEM; + rve->mr_route = mr_route; + rve->mr_vif = mr_vif; + list_add_tail(&rve->route_node, &mr_route->evif_list); + list_add_tail(&rve->vif_node, &mr_vif->route_evif_list); + return 0; +} + +static void +mlxsw_sp_mr_route_evif_unlink(struct mlxsw_sp_mr_route_vif_entry *rve) +{ + list_del(&rve->route_node); + list_del(&rve->vif_node); + kfree(rve); +} + +static void mlxsw_sp_mr_route_ivif_link(struct mlxsw_sp_mr_route *mr_route, + struct mlxsw_sp_mr_vif *mr_vif) +{ + mr_route->ivif.mr_route = mr_route; + mr_route->ivif.mr_vif = mr_vif; + list_add_tail(&mr_route->ivif.vif_node, &mr_vif->route_ivif_list); +} + +static void mlxsw_sp_mr_route_ivif_unlink(struct mlxsw_sp_mr_route *mr_route) +{ + list_del(&mr_route->ivif.vif_node); +} + +static int +mlxsw_sp_mr_route_info_create(struct mlxsw_sp_mr_table *mr_table, + struct mlxsw_sp_mr_route *mr_route, + struct mlxsw_sp_mr_route_info *route_info) +{ + struct mlxsw_sp_mr_route_vif_entry *rve; + u16 *erif_indices; + u16 irif_index; + u16 erif = 0; + + erif_indices = kmalloc_array(MAXVIFS, sizeof(*erif_indices), + GFP_KERNEL); + if (!erif_indices) + return -ENOMEM; + + list_for_each_entry(rve, &mr_route->evif_list, route_node) { + if (mlxsw_sp_mr_vif_valid(rve->mr_vif)) { + u16 rifi = mlxsw_sp_rif_index(rve->mr_vif->rif); + + erif_indices[erif++] = rifi; + } + } + + if (mlxsw_sp_mr_vif_valid(mr_route->ivif.mr_vif)) + irif_index = mlxsw_sp_rif_index(mr_route->ivif.mr_vif->rif); + else + irif_index = 0; + + route_info->irif_index = irif_index; + route_info->erif_indices = erif_indices; + route_info->min_mtu = mr_route->min_mtu; + route_info->route_action = mr_route->route_action; + route_info->erif_num = erif; + return 0; +} + +static void +mlxsw_sp_mr_route_info_destroy(struct mlxsw_sp_mr_route_info *route_info) +{ + kfree(route_info->erif_indices); +} + +static int mlxsw_sp_mr_route_write(struct mlxsw_sp_mr_table *mr_table, + struct mlxsw_sp_mr_route *mr_route, + bool replace) +{ + struct mlxsw_sp *mlxsw_sp = mr_table->mlxsw_sp; + struct mlxsw_sp_mr_route_info route_info; + struct mlxsw_sp_mr *mr = mlxsw_sp->mr; + int err; + + err = mlxsw_sp_mr_route_info_create(mr_table, mr_route, &route_info); + if (err) + return err; + + if (!replace) { + struct mlxsw_sp_mr_route_params route_params; + + mr_route->route_priv = kzalloc(mr->mr_ops->route_priv_size, + GFP_KERNEL); + if (!mr_route->route_priv) { + err = -ENOMEM; + goto out; + } + + route_params.key = mr_route->key; + route_params.value = route_info; + route_params.prio = mlxsw_sp_mr_route_prio(mr_route); + err = mr->mr_ops->route_create(mlxsw_sp, mr->priv, + mr_route->route_priv, + &route_params); + if (err) + kfree(mr_route->route_priv); + } else { + err = mr->mr_ops->route_update(mlxsw_sp, mr_route->route_priv, + &route_info); + } +out: + mlxsw_sp_mr_route_info_destroy(&route_info); + return err; +} + +static void mlxsw_sp_mr_route_erase(struct mlxsw_sp_mr_table *mr_table, + struct mlxsw_sp_mr_route *mr_route) +{ + struct mlxsw_sp *mlxsw_sp = mr_table->mlxsw_sp; + struct mlxsw_sp_mr *mr = mlxsw_sp->mr; + + mr->mr_ops->route_destroy(mlxsw_sp, mr->priv, mr_route->route_priv); + kfree(mr_route->route_priv); +} + +static struct mlxsw_sp_mr_route * +mlxsw_sp_mr_route_create(struct mlxsw_sp_mr_table *mr_table, + struct mr_mfc *mfc) +{ + struct mlxsw_sp_mr_route_vif_entry *rve, *tmp; + struct mlxsw_sp_mr_route *mr_route; + int err = 0; + int i; + + /* Allocate and init a new route and fill it with parameters */ + mr_route = kzalloc(sizeof(*mr_route), GFP_KERNEL); + if (!mr_route) + return ERR_PTR(-ENOMEM); + INIT_LIST_HEAD(&mr_route->evif_list); + + /* Find min_mtu and link iVIF and eVIFs */ + mr_route->min_mtu = ETH_MAX_MTU; + mr_cache_hold(mfc); + mr_route->mfc = mfc; + mr_table->ops->key_create(mr_table, &mr_route->key, mr_route->mfc); + + mr_route->mr_table = mr_table; + for (i = 0; i < MAXVIFS; i++) { + if (mfc->mfc_un.res.ttls[i] != 255) { + err = mlxsw_sp_mr_route_evif_link(mr_route, + &mr_table->vifs[i]); + if (err) + goto err; + if (mr_table->vifs[i].dev && + mr_table->vifs[i].dev->mtu < mr_route->min_mtu) + mr_route->min_mtu = mr_table->vifs[i].dev->mtu; + } + } + mlxsw_sp_mr_route_ivif_link(mr_route, + &mr_table->vifs[mfc->mfc_parent]); + + mr_route->route_action = mlxsw_sp_mr_route_action(mr_route); + return mr_route; +err: + mr_cache_put(mfc); + list_for_each_entry_safe(rve, tmp, &mr_route->evif_list, route_node) + mlxsw_sp_mr_route_evif_unlink(rve); + kfree(mr_route); + return ERR_PTR(err); +} + +static void mlxsw_sp_mr_route_destroy(struct mlxsw_sp_mr_table *mr_table, + struct mlxsw_sp_mr_route *mr_route) +{ + struct mlxsw_sp_mr_route_vif_entry *rve, *tmp; + + mlxsw_sp_mr_route_ivif_unlink(mr_route); + mr_cache_put(mr_route->mfc); + list_for_each_entry_safe(rve, tmp, &mr_route->evif_list, route_node) + mlxsw_sp_mr_route_evif_unlink(rve); + kfree(mr_route); +} + +static void mlxsw_sp_mr_mfc_offload_set(struct mlxsw_sp_mr_route *mr_route, + bool offload) +{ + if (offload) + mr_route->mfc->mfc_flags |= MFC_OFFLOAD; + else + mr_route->mfc->mfc_flags &= ~MFC_OFFLOAD; +} + +static void mlxsw_sp_mr_mfc_offload_update(struct mlxsw_sp_mr_route *mr_route) +{ + bool offload; + + offload = mr_route->route_action != MLXSW_SP_MR_ROUTE_ACTION_TRAP; + mlxsw_sp_mr_mfc_offload_set(mr_route, offload); +} + +static void __mlxsw_sp_mr_route_del(struct mlxsw_sp_mr_table *mr_table, + struct mlxsw_sp_mr_route *mr_route) +{ + WARN_ON_ONCE(!mutex_is_locked(&mr_table->route_list_lock)); + + mlxsw_sp_mr_mfc_offload_set(mr_route, false); + rhashtable_remove_fast(&mr_table->route_ht, &mr_route->ht_node, + mlxsw_sp_mr_route_ht_params); + list_del(&mr_route->node); + mlxsw_sp_mr_route_erase(mr_table, mr_route); + mlxsw_sp_mr_route_destroy(mr_table, mr_route); +} + +int mlxsw_sp_mr_route_add(struct mlxsw_sp_mr_table *mr_table, + struct mr_mfc *mfc, bool replace) +{ + struct mlxsw_sp_mr_route *mr_orig_route = NULL; + struct mlxsw_sp_mr_route *mr_route; + int err; + + if (!mr_table->ops->is_route_valid(mr_table, mfc)) + return -EINVAL; + + /* Create a new route */ + mr_route = mlxsw_sp_mr_route_create(mr_table, mfc); + if (IS_ERR(mr_route)) + return PTR_ERR(mr_route); + + /* Find any route with a matching key */ + mr_orig_route = rhashtable_lookup_fast(&mr_table->route_ht, + &mr_route->key, + mlxsw_sp_mr_route_ht_params); + if (replace) { + /* On replace case, make the route point to the new route_priv. + */ + if (WARN_ON(!mr_orig_route)) { + err = -ENOENT; + goto err_no_orig_route; + } + mr_route->route_priv = mr_orig_route->route_priv; + } else if (mr_orig_route) { + /* On non replace case, if another route with the same key was + * found, abort, as duplicate routes are used for proxy routes. + */ + dev_warn(mr_table->mlxsw_sp->bus_info->dev, + "Offloading proxy routes is not supported.\n"); + err = -EINVAL; + goto err_duplicate_route; + } + + /* Write the route to the hardware */ + err = mlxsw_sp_mr_route_write(mr_table, mr_route, replace); + if (err) + goto err_mr_route_write; + + /* Put it in the table data-structures */ + mutex_lock(&mr_table->route_list_lock); + list_add_tail(&mr_route->node, &mr_table->route_list); + mutex_unlock(&mr_table->route_list_lock); + err = rhashtable_insert_fast(&mr_table->route_ht, + &mr_route->ht_node, + mlxsw_sp_mr_route_ht_params); + if (err) + goto err_rhashtable_insert; + + /* Destroy the original route */ + if (replace) { + rhashtable_remove_fast(&mr_table->route_ht, + &mr_orig_route->ht_node, + mlxsw_sp_mr_route_ht_params); + list_del(&mr_orig_route->node); + mlxsw_sp_mr_route_destroy(mr_table, mr_orig_route); + } + + mlxsw_sp_mr_mfc_offload_update(mr_route); + return 0; + +err_rhashtable_insert: + mutex_lock(&mr_table->route_list_lock); + list_del(&mr_route->node); + mutex_unlock(&mr_table->route_list_lock); + mlxsw_sp_mr_route_erase(mr_table, mr_route); +err_mr_route_write: +err_no_orig_route: +err_duplicate_route: + mlxsw_sp_mr_route_destroy(mr_table, mr_route); + return err; +} + +void mlxsw_sp_mr_route_del(struct mlxsw_sp_mr_table *mr_table, + struct mr_mfc *mfc) +{ + struct mlxsw_sp_mr_route *mr_route; + struct mlxsw_sp_mr_route_key key; + + mr_table->ops->key_create(mr_table, &key, mfc); + mr_route = rhashtable_lookup_fast(&mr_table->route_ht, &key, + mlxsw_sp_mr_route_ht_params); + if (mr_route) { + mutex_lock(&mr_table->route_list_lock); + __mlxsw_sp_mr_route_del(mr_table, mr_route); + mutex_unlock(&mr_table->route_list_lock); + } +} + +/* Should be called after the VIF struct is updated */ +static int +mlxsw_sp_mr_route_ivif_resolve(struct mlxsw_sp_mr_table *mr_table, + struct mlxsw_sp_mr_route_vif_entry *rve) +{ + struct mlxsw_sp *mlxsw_sp = mr_table->mlxsw_sp; + enum mlxsw_sp_mr_route_action route_action; + struct mlxsw_sp_mr *mr = mlxsw_sp->mr; + u16 irif_index; + int err; + + route_action = mlxsw_sp_mr_route_action(rve->mr_route); + if (route_action == MLXSW_SP_MR_ROUTE_ACTION_TRAP) + return 0; + + /* rve->mr_vif->rif is guaranteed to be valid at this stage */ + irif_index = mlxsw_sp_rif_index(rve->mr_vif->rif); + err = mr->mr_ops->route_irif_update(mlxsw_sp, rve->mr_route->route_priv, + irif_index); + if (err) + return err; + + err = mr->mr_ops->route_action_update(mlxsw_sp, + rve->mr_route->route_priv, + route_action); + if (err) + /* No need to rollback here because the iRIF change only takes + * place after the action has been updated. + */ + return err; + + rve->mr_route->route_action = route_action; + mlxsw_sp_mr_mfc_offload_update(rve->mr_route); + return 0; +} + +static void +mlxsw_sp_mr_route_ivif_unresolve(struct mlxsw_sp_mr_table *mr_table, + struct mlxsw_sp_mr_route_vif_entry *rve) +{ + struct mlxsw_sp *mlxsw_sp = mr_table->mlxsw_sp; + struct mlxsw_sp_mr *mr = mlxsw_sp->mr; + + mr->mr_ops->route_action_update(mlxsw_sp, rve->mr_route->route_priv, + MLXSW_SP_MR_ROUTE_ACTION_TRAP); + rve->mr_route->route_action = MLXSW_SP_MR_ROUTE_ACTION_TRAP; + mlxsw_sp_mr_mfc_offload_update(rve->mr_route); +} + +/* Should be called after the RIF struct is updated */ +static int +mlxsw_sp_mr_route_evif_resolve(struct mlxsw_sp_mr_table *mr_table, + struct mlxsw_sp_mr_route_vif_entry *rve) +{ + struct mlxsw_sp *mlxsw_sp = mr_table->mlxsw_sp; + enum mlxsw_sp_mr_route_action route_action; + struct mlxsw_sp_mr *mr = mlxsw_sp->mr; + u16 erif_index = 0; + int err; + + /* Add the eRIF */ + if (mlxsw_sp_mr_vif_valid(rve->mr_vif)) { + erif_index = mlxsw_sp_rif_index(rve->mr_vif->rif); + err = mr->mr_ops->route_erif_add(mlxsw_sp, + rve->mr_route->route_priv, + erif_index); + if (err) + return err; + } + + /* Update the route action, as the new eVIF can be a tunnel or a pimreg + * device which will require updating the action. + */ + route_action = mlxsw_sp_mr_route_action(rve->mr_route); + if (route_action != rve->mr_route->route_action) { + err = mr->mr_ops->route_action_update(mlxsw_sp, + rve->mr_route->route_priv, + route_action); + if (err) + goto err_route_action_update; + } + + /* Update the minimum MTU */ + if (rve->mr_vif->dev->mtu < rve->mr_route->min_mtu) { + rve->mr_route->min_mtu = rve->mr_vif->dev->mtu; + err = mr->mr_ops->route_min_mtu_update(mlxsw_sp, + rve->mr_route->route_priv, + rve->mr_route->min_mtu); + if (err) + goto err_route_min_mtu_update; + } + + rve->mr_route->route_action = route_action; + mlxsw_sp_mr_mfc_offload_update(rve->mr_route); + return 0; + +err_route_min_mtu_update: + if (route_action != rve->mr_route->route_action) + mr->mr_ops->route_action_update(mlxsw_sp, + rve->mr_route->route_priv, + rve->mr_route->route_action); +err_route_action_update: + if (mlxsw_sp_mr_vif_valid(rve->mr_vif)) + mr->mr_ops->route_erif_del(mlxsw_sp, rve->mr_route->route_priv, + erif_index); + return err; +} + +/* Should be called before the RIF struct is updated */ +static void +mlxsw_sp_mr_route_evif_unresolve(struct mlxsw_sp_mr_table *mr_table, + struct mlxsw_sp_mr_route_vif_entry *rve) +{ + struct mlxsw_sp *mlxsw_sp = mr_table->mlxsw_sp; + enum mlxsw_sp_mr_route_action route_action; + struct mlxsw_sp_mr *mr = mlxsw_sp->mr; + u16 rifi; + + /* If the unresolved RIF was not valid, no need to delete it */ + if (!mlxsw_sp_mr_vif_valid(rve->mr_vif)) + return; + + /* Update the route action: if there is only one valid eVIF in the + * route, set the action to trap as the VIF deletion will lead to zero + * valid eVIFs. On any other case, use the mlxsw_sp_mr_route_action to + * determine the route action. + */ + if (mlxsw_sp_mr_route_valid_evifs_num(rve->mr_route) == 1) + route_action = MLXSW_SP_MR_ROUTE_ACTION_TRAP; + else + route_action = mlxsw_sp_mr_route_action(rve->mr_route); + if (route_action != rve->mr_route->route_action) + mr->mr_ops->route_action_update(mlxsw_sp, + rve->mr_route->route_priv, + route_action); + + /* Delete the erif from the route */ + rifi = mlxsw_sp_rif_index(rve->mr_vif->rif); + mr->mr_ops->route_erif_del(mlxsw_sp, rve->mr_route->route_priv, rifi); + rve->mr_route->route_action = route_action; + mlxsw_sp_mr_mfc_offload_update(rve->mr_route); +} + +static int mlxsw_sp_mr_vif_resolve(struct mlxsw_sp_mr_table *mr_table, + struct net_device *dev, + struct mlxsw_sp_mr_vif *mr_vif, + unsigned long vif_flags, + const struct mlxsw_sp_rif *rif) +{ + struct mlxsw_sp_mr_route_vif_entry *irve, *erve; + int err; + + /* Update the VIF */ + mr_vif->dev = dev; + mr_vif->rif = rif; + mr_vif->vif_flags = vif_flags; + + /* Update all routes where this VIF is used as an unresolved iRIF */ + list_for_each_entry(irve, &mr_vif->route_ivif_list, vif_node) { + err = mlxsw_sp_mr_route_ivif_resolve(mr_table, irve); + if (err) + goto err_irif_unresolve; + } + + /* Update all routes where this VIF is used as an unresolved eRIF */ + list_for_each_entry(erve, &mr_vif->route_evif_list, vif_node) { + err = mlxsw_sp_mr_route_evif_resolve(mr_table, erve); + if (err) + goto err_erif_unresolve; + } + return 0; + +err_erif_unresolve: + list_for_each_entry_continue_reverse(erve, &mr_vif->route_evif_list, + vif_node) + mlxsw_sp_mr_route_evif_unresolve(mr_table, erve); +err_irif_unresolve: + list_for_each_entry_continue_reverse(irve, &mr_vif->route_ivif_list, + vif_node) + mlxsw_sp_mr_route_ivif_unresolve(mr_table, irve); + mr_vif->rif = NULL; + return err; +} + +static void mlxsw_sp_mr_vif_unresolve(struct mlxsw_sp_mr_table *mr_table, + struct net_device *dev, + struct mlxsw_sp_mr_vif *mr_vif) +{ + struct mlxsw_sp_mr_route_vif_entry *rve; + + /* Update all routes where this VIF is used as an unresolved eRIF */ + list_for_each_entry(rve, &mr_vif->route_evif_list, vif_node) + mlxsw_sp_mr_route_evif_unresolve(mr_table, rve); + + /* Update all routes where this VIF is used as an unresolved iRIF */ + list_for_each_entry(rve, &mr_vif->route_ivif_list, vif_node) + mlxsw_sp_mr_route_ivif_unresolve(mr_table, rve); + + /* Update the VIF */ + mr_vif->dev = dev; + mr_vif->rif = NULL; +} + +int mlxsw_sp_mr_vif_add(struct mlxsw_sp_mr_table *mr_table, + struct net_device *dev, vifi_t vif_index, + unsigned long vif_flags, const struct mlxsw_sp_rif *rif) +{ + struct mlxsw_sp_mr_vif *mr_vif = &mr_table->vifs[vif_index]; + + if (WARN_ON(vif_index >= MAXVIFS)) + return -EINVAL; + if (mr_vif->dev) + return -EEXIST; + return mlxsw_sp_mr_vif_resolve(mr_table, dev, mr_vif, vif_flags, rif); +} + +void mlxsw_sp_mr_vif_del(struct mlxsw_sp_mr_table *mr_table, vifi_t vif_index) +{ + struct mlxsw_sp_mr_vif *mr_vif = &mr_table->vifs[vif_index]; + + if (WARN_ON(vif_index >= MAXVIFS)) + return; + if (WARN_ON(!mr_vif->dev)) + return; + mlxsw_sp_mr_vif_unresolve(mr_table, NULL, mr_vif); +} + +static struct mlxsw_sp_mr_vif * +mlxsw_sp_mr_dev_vif_lookup(struct mlxsw_sp_mr_table *mr_table, + const struct net_device *dev) +{ + vifi_t vif_index; + + for (vif_index = 0; vif_index < MAXVIFS; vif_index++) + if (mr_table->vifs[vif_index].dev == dev) + return &mr_table->vifs[vif_index]; + return NULL; +} + +int mlxsw_sp_mr_rif_add(struct mlxsw_sp_mr_table *mr_table, + const struct mlxsw_sp_rif *rif) +{ + const struct net_device *rif_dev = mlxsw_sp_rif_dev(rif); + struct mlxsw_sp_mr_vif *mr_vif; + + if (!rif_dev) + return 0; + + mr_vif = mlxsw_sp_mr_dev_vif_lookup(mr_table, rif_dev); + if (!mr_vif) + return 0; + return mlxsw_sp_mr_vif_resolve(mr_table, mr_vif->dev, mr_vif, + mr_vif->vif_flags, rif); +} + +void mlxsw_sp_mr_rif_del(struct mlxsw_sp_mr_table *mr_table, + const struct mlxsw_sp_rif *rif) +{ + const struct net_device *rif_dev = mlxsw_sp_rif_dev(rif); + struct mlxsw_sp_mr_vif *mr_vif; + + if (!rif_dev) + return; + + mr_vif = mlxsw_sp_mr_dev_vif_lookup(mr_table, rif_dev); + if (!mr_vif) + return; + mlxsw_sp_mr_vif_unresolve(mr_table, mr_vif->dev, mr_vif); +} + +void mlxsw_sp_mr_rif_mtu_update(struct mlxsw_sp_mr_table *mr_table, + const struct mlxsw_sp_rif *rif, int mtu) +{ + const struct net_device *rif_dev = mlxsw_sp_rif_dev(rif); + struct mlxsw_sp *mlxsw_sp = mr_table->mlxsw_sp; + struct mlxsw_sp_mr_route_vif_entry *rve; + struct mlxsw_sp_mr *mr = mlxsw_sp->mr; + struct mlxsw_sp_mr_vif *mr_vif; + + if (!rif_dev) + return; + + /* Search for a VIF that use that RIF */ + mr_vif = mlxsw_sp_mr_dev_vif_lookup(mr_table, rif_dev); + if (!mr_vif) + return; + + /* Update all the routes that uses that VIF as eVIF */ + list_for_each_entry(rve, &mr_vif->route_evif_list, vif_node) { + if (mtu < rve->mr_route->min_mtu) { + rve->mr_route->min_mtu = mtu; + mr->mr_ops->route_min_mtu_update(mlxsw_sp, + rve->mr_route->route_priv, + mtu); + } + } +} + +/* Protocol specific functions */ +static bool +mlxsw_sp_mr_route4_validate(const struct mlxsw_sp_mr_table *mr_table, + const struct mr_mfc *c) +{ + struct mfc_cache *mfc = (struct mfc_cache *) c; + + /* If the route is a (*,*) route, abort, as these kind of routes are + * used for proxy routes. + */ + if (mfc->mfc_origin == htonl(INADDR_ANY) && + mfc->mfc_mcastgrp == htonl(INADDR_ANY)) { + dev_warn(mr_table->mlxsw_sp->bus_info->dev, + "Offloading proxy routes is not supported.\n"); + return false; + } + return true; +} + +static void mlxsw_sp_mr_route4_key(struct mlxsw_sp_mr_table *mr_table, + struct mlxsw_sp_mr_route_key *key, + struct mr_mfc *c) +{ + const struct mfc_cache *mfc = (struct mfc_cache *) c; + bool starg; + + starg = (mfc->mfc_origin == htonl(INADDR_ANY)); + + memset(key, 0, sizeof(*key)); + key->vrid = mr_table->vr_id; + key->proto = MLXSW_SP_L3_PROTO_IPV4; + key->group.addr4 = mfc->mfc_mcastgrp; + key->group_mask.addr4 = htonl(0xffffffff); + key->source.addr4 = mfc->mfc_origin; + key->source_mask.addr4 = htonl(starg ? 0 : 0xffffffff); +} + +static bool mlxsw_sp_mr_route4_starg(const struct mlxsw_sp_mr_table *mr_table, + const struct mlxsw_sp_mr_route *mr_route) +{ + return mr_route->key.source_mask.addr4 == htonl(INADDR_ANY); +} + +static bool mlxsw_sp_mr_vif4_is_regular(const struct mlxsw_sp_mr_vif *vif) +{ + return !(vif->vif_flags & (VIFF_TUNNEL | VIFF_REGISTER)); +} + +static bool +mlxsw_sp_mr_route6_validate(const struct mlxsw_sp_mr_table *mr_table, + const struct mr_mfc *c) +{ + struct mfc6_cache *mfc = (struct mfc6_cache *) c; + + /* If the route is a (*,*) route, abort, as these kind of routes are + * used for proxy routes. + */ + if (ipv6_addr_any(&mfc->mf6c_origin) && + ipv6_addr_any(&mfc->mf6c_mcastgrp)) { + dev_warn(mr_table->mlxsw_sp->bus_info->dev, + "Offloading proxy routes is not supported.\n"); + return false; + } + return true; +} + +static void mlxsw_sp_mr_route6_key(struct mlxsw_sp_mr_table *mr_table, + struct mlxsw_sp_mr_route_key *key, + struct mr_mfc *c) +{ + const struct mfc6_cache *mfc = (struct mfc6_cache *) c; + + memset(key, 0, sizeof(*key)); + key->vrid = mr_table->vr_id; + key->proto = MLXSW_SP_L3_PROTO_IPV6; + key->group.addr6 = mfc->mf6c_mcastgrp; + memset(&key->group_mask.addr6, 0xff, sizeof(key->group_mask.addr6)); + key->source.addr6 = mfc->mf6c_origin; + if (!ipv6_addr_any(&mfc->mf6c_origin)) + memset(&key->source_mask.addr6, 0xff, + sizeof(key->source_mask.addr6)); +} + +static bool mlxsw_sp_mr_route6_starg(const struct mlxsw_sp_mr_table *mr_table, + const struct mlxsw_sp_mr_route *mr_route) +{ + return ipv6_addr_any(&mr_route->key.source_mask.addr6); +} + +static bool mlxsw_sp_mr_vif6_is_regular(const struct mlxsw_sp_mr_vif *vif) +{ + return !(vif->vif_flags & MIFF_REGISTER); +} + +static struct +mlxsw_sp_mr_vif_ops mlxsw_sp_mr_vif_ops_arr[] = { + { + .is_regular = mlxsw_sp_mr_vif4_is_regular, + }, + { + .is_regular = mlxsw_sp_mr_vif6_is_regular, + }, +}; + +static struct +mlxsw_sp_mr_table_ops mlxsw_sp_mr_table_ops_arr[] = { + { + .is_route_valid = mlxsw_sp_mr_route4_validate, + .key_create = mlxsw_sp_mr_route4_key, + .is_route_starg = mlxsw_sp_mr_route4_starg, + }, + { + .is_route_valid = mlxsw_sp_mr_route6_validate, + .key_create = mlxsw_sp_mr_route6_key, + .is_route_starg = mlxsw_sp_mr_route6_starg, + }, + +}; + +struct mlxsw_sp_mr_table *mlxsw_sp_mr_table_create(struct mlxsw_sp *mlxsw_sp, + u32 vr_id, + enum mlxsw_sp_l3proto proto) +{ + struct mlxsw_sp_mr_route_params catchall_route_params = { + .prio = MLXSW_SP_MR_ROUTE_PRIO_CATCHALL, + .key = { + .vrid = vr_id, + .proto = proto, + }, + .value = { + .route_action = MLXSW_SP_MR_ROUTE_ACTION_TRAP, + } + }; + struct mlxsw_sp_mr *mr = mlxsw_sp->mr; + struct mlxsw_sp_mr_table *mr_table; + int err; + int i; + + mr_table = kzalloc(sizeof(*mr_table) + mr->mr_ops->route_priv_size, + GFP_KERNEL); + if (!mr_table) + return ERR_PTR(-ENOMEM); + + mr_table->vr_id = vr_id; + mr_table->mlxsw_sp = mlxsw_sp; + mr_table->proto = proto; + mr_table->ops = &mlxsw_sp_mr_table_ops_arr[proto]; + INIT_LIST_HEAD(&mr_table->route_list); + mutex_init(&mr_table->route_list_lock); + + err = rhashtable_init(&mr_table->route_ht, + &mlxsw_sp_mr_route_ht_params); + if (err) + goto err_route_rhashtable_init; + + for (i = 0; i < MAXVIFS; i++) { + INIT_LIST_HEAD(&mr_table->vifs[i].route_evif_list); + INIT_LIST_HEAD(&mr_table->vifs[i].route_ivif_list); + mr_table->vifs[i].ops = &mlxsw_sp_mr_vif_ops_arr[proto]; + } + + err = mr->mr_ops->route_create(mlxsw_sp, mr->priv, + mr_table->catchall_route_priv, + &catchall_route_params); + if (err) + goto err_ops_route_create; + mutex_lock(&mr->table_list_lock); + list_add_tail(&mr_table->node, &mr->table_list); + mutex_unlock(&mr->table_list_lock); + return mr_table; + +err_ops_route_create: + rhashtable_destroy(&mr_table->route_ht); +err_route_rhashtable_init: + mutex_destroy(&mr_table->route_list_lock); + kfree(mr_table); + return ERR_PTR(err); +} + +void mlxsw_sp_mr_table_destroy(struct mlxsw_sp_mr_table *mr_table) +{ + struct mlxsw_sp *mlxsw_sp = mr_table->mlxsw_sp; + struct mlxsw_sp_mr *mr = mlxsw_sp->mr; + + WARN_ON(!mlxsw_sp_mr_table_empty(mr_table)); + mutex_lock(&mr->table_list_lock); + list_del(&mr_table->node); + mutex_unlock(&mr->table_list_lock); + mr->mr_ops->route_destroy(mlxsw_sp, mr->priv, + &mr_table->catchall_route_priv); + rhashtable_destroy(&mr_table->route_ht); + mutex_destroy(&mr_table->route_list_lock); + kfree(mr_table); +} + +void mlxsw_sp_mr_table_flush(struct mlxsw_sp_mr_table *mr_table) +{ + struct mlxsw_sp_mr_route *mr_route, *tmp; + int i; + + mutex_lock(&mr_table->route_list_lock); + list_for_each_entry_safe(mr_route, tmp, &mr_table->route_list, node) + __mlxsw_sp_mr_route_del(mr_table, mr_route); + mutex_unlock(&mr_table->route_list_lock); + + for (i = 0; i < MAXVIFS; i++) { + mr_table->vifs[i].dev = NULL; + mr_table->vifs[i].rif = NULL; + } +} + +bool mlxsw_sp_mr_table_empty(const struct mlxsw_sp_mr_table *mr_table) +{ + int i; + + for (i = 0; i < MAXVIFS; i++) + if (mr_table->vifs[i].dev) + return false; + return list_empty(&mr_table->route_list); +} + +static void mlxsw_sp_mr_route_stats_update(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_mr_route *mr_route) +{ + struct mlxsw_sp_mr *mr = mlxsw_sp->mr; + u64 packets, bytes; + + if (mr_route->route_action == MLXSW_SP_MR_ROUTE_ACTION_TRAP) + return; + + mr->mr_ops->route_stats(mlxsw_sp, mr_route->route_priv, &packets, + &bytes); + + if (mr_route->mfc->mfc_un.res.pkt != packets) + mr_route->mfc->mfc_un.res.lastuse = jiffies; + mr_route->mfc->mfc_un.res.pkt = packets; + mr_route->mfc->mfc_un.res.bytes = bytes; +} + +static void mlxsw_sp_mr_stats_update(struct work_struct *work) +{ + struct mlxsw_sp_mr *mr = container_of(work, struct mlxsw_sp_mr, + stats_update_dw.work); + struct mlxsw_sp_mr_table *mr_table; + struct mlxsw_sp_mr_route *mr_route; + unsigned long interval; + + mutex_lock(&mr->table_list_lock); + list_for_each_entry(mr_table, &mr->table_list, node) { + mutex_lock(&mr_table->route_list_lock); + list_for_each_entry(mr_route, &mr_table->route_list, node) + mlxsw_sp_mr_route_stats_update(mr_table->mlxsw_sp, + mr_route); + mutex_unlock(&mr_table->route_list_lock); + } + mutex_unlock(&mr->table_list_lock); + + interval = msecs_to_jiffies(MLXSW_SP_MR_ROUTES_COUNTER_UPDATE_INTERVAL); + mlxsw_core_schedule_dw(&mr->stats_update_dw, interval); +} + +int mlxsw_sp_mr_init(struct mlxsw_sp *mlxsw_sp, + const struct mlxsw_sp_mr_ops *mr_ops) +{ + struct mlxsw_sp_mr *mr; + unsigned long interval; + int err; + + mr = kzalloc(sizeof(*mr) + mr_ops->priv_size, GFP_KERNEL); + if (!mr) + return -ENOMEM; + mr->mr_ops = mr_ops; + mlxsw_sp->mr = mr; + INIT_LIST_HEAD(&mr->table_list); + mutex_init(&mr->table_list_lock); + + err = mr_ops->init(mlxsw_sp, mr->priv); + if (err) + goto err; + + /* Create the delayed work for counter updates */ + INIT_DELAYED_WORK(&mr->stats_update_dw, mlxsw_sp_mr_stats_update); + interval = msecs_to_jiffies(MLXSW_SP_MR_ROUTES_COUNTER_UPDATE_INTERVAL); + mlxsw_core_schedule_dw(&mr->stats_update_dw, interval); + return 0; +err: + mutex_destroy(&mr->table_list_lock); + kfree(mr); + return err; +} + +void mlxsw_sp_mr_fini(struct mlxsw_sp *mlxsw_sp) +{ + struct mlxsw_sp_mr *mr = mlxsw_sp->mr; + + cancel_delayed_work_sync(&mr->stats_update_dw); + mr->mr_ops->fini(mlxsw_sp, mr->priv); + mutex_destroy(&mr->table_list_lock); + kfree(mr); +} diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.h new file mode 100644 index 000000000..3cde3671f --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.h @@ -0,0 +1,95 @@ +/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 */ +/* Copyright (c) 2017-2018 Mellanox Technologies. All rights reserved */ + +#ifndef _MLXSW_SPECTRUM_MCROUTER_H +#define _MLXSW_SPECTRUM_MCROUTER_H + +#include <linux/mroute.h> +#include <linux/mroute6.h> +#include "spectrum_router.h" +#include "spectrum.h" + +enum mlxsw_sp_mr_route_action { + MLXSW_SP_MR_ROUTE_ACTION_FORWARD, + MLXSW_SP_MR_ROUTE_ACTION_TRAP, + MLXSW_SP_MR_ROUTE_ACTION_TRAP_AND_FORWARD, +}; + +struct mlxsw_sp_mr_route_key { + int vrid; + enum mlxsw_sp_l3proto proto; + union mlxsw_sp_l3addr group; + union mlxsw_sp_l3addr group_mask; + union mlxsw_sp_l3addr source; + union mlxsw_sp_l3addr source_mask; +}; + +struct mlxsw_sp_mr_route_info { + enum mlxsw_sp_mr_route_action route_action; + u16 irif_index; + u16 *erif_indices; + size_t erif_num; + u16 min_mtu; +}; + +struct mlxsw_sp_mr_route_params { + struct mlxsw_sp_mr_route_key key; + struct mlxsw_sp_mr_route_info value; + enum mlxsw_sp_mr_route_prio prio; +}; + +struct mlxsw_sp_mr_ops { + int priv_size; + int route_priv_size; + int (*init)(struct mlxsw_sp *mlxsw_sp, void *priv); + int (*route_create)(struct mlxsw_sp *mlxsw_sp, void *priv, + void *route_priv, + struct mlxsw_sp_mr_route_params *route_params); + int (*route_update)(struct mlxsw_sp *mlxsw_sp, void *route_priv, + struct mlxsw_sp_mr_route_info *route_info); + int (*route_stats)(struct mlxsw_sp *mlxsw_sp, void *route_priv, + u64 *packets, u64 *bytes); + int (*route_action_update)(struct mlxsw_sp *mlxsw_sp, void *route_priv, + enum mlxsw_sp_mr_route_action route_action); + int (*route_min_mtu_update)(struct mlxsw_sp *mlxsw_sp, void *route_priv, + u16 min_mtu); + int (*route_irif_update)(struct mlxsw_sp *mlxsw_sp, void *route_priv, + u16 irif_index); + int (*route_erif_add)(struct mlxsw_sp *mlxsw_sp, void *route_priv, + u16 erif_index); + int (*route_erif_del)(struct mlxsw_sp *mlxsw_sp, void *route_priv, + u16 erif_index); + void (*route_destroy)(struct mlxsw_sp *mlxsw_sp, void *priv, + void *route_priv); + void (*fini)(struct mlxsw_sp *mlxsw_sp, void *priv); +}; + +struct mlxsw_sp_mr; +struct mlxsw_sp_mr_table; + +int mlxsw_sp_mr_init(struct mlxsw_sp *mlxsw_sp, + const struct mlxsw_sp_mr_ops *mr_ops); +void mlxsw_sp_mr_fini(struct mlxsw_sp *mlxsw_sp); +int mlxsw_sp_mr_route_add(struct mlxsw_sp_mr_table *mr_table, + struct mr_mfc *mfc, bool replace); +void mlxsw_sp_mr_route_del(struct mlxsw_sp_mr_table *mr_table, + struct mr_mfc *mfc); +int mlxsw_sp_mr_vif_add(struct mlxsw_sp_mr_table *mr_table, + struct net_device *dev, vifi_t vif_index, + unsigned long vif_flags, + const struct mlxsw_sp_rif *rif); +void mlxsw_sp_mr_vif_del(struct mlxsw_sp_mr_table *mr_table, vifi_t vif_index); +int mlxsw_sp_mr_rif_add(struct mlxsw_sp_mr_table *mr_table, + const struct mlxsw_sp_rif *rif); +void mlxsw_sp_mr_rif_del(struct mlxsw_sp_mr_table *mr_table, + const struct mlxsw_sp_rif *rif); +void mlxsw_sp_mr_rif_mtu_update(struct mlxsw_sp_mr_table *mr_table, + const struct mlxsw_sp_rif *rif, int mtu); +struct mlxsw_sp_mr_table *mlxsw_sp_mr_table_create(struct mlxsw_sp *mlxsw_sp, + u32 tb_id, + enum mlxsw_sp_l3proto proto); +void mlxsw_sp_mr_table_destroy(struct mlxsw_sp_mr_table *mr_table); +void mlxsw_sp_mr_table_flush(struct mlxsw_sp_mr_table *mr_table); +bool mlxsw_sp_mr_table_empty(const struct mlxsw_sp_mr_table *mr_table); + +#endif diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr_tcam.c new file mode 100644 index 000000000..221aa6a47 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr_tcam.c @@ -0,0 +1,615 @@ +// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 +/* Copyright (c) 2017-2018 Mellanox Technologies. All rights reserved */ + +#include <linux/kernel.h> +#include <linux/list.h> +#include <linux/netdevice.h> + +#include "spectrum_mr_tcam.h" +#include "reg.h" +#include "spectrum.h" +#include "core_acl_flex_actions.h" +#include "spectrum_mr.h" + +struct mlxsw_sp_mr_tcam { + void *priv; +}; + +/* This struct maps to one RIGR2 register entry */ +struct mlxsw_sp_mr_erif_sublist { + struct list_head list; + u32 rigr2_kvdl_index; + int num_erifs; + u16 erif_indices[MLXSW_REG_RIGR2_MAX_ERIFS]; + bool synced; +}; + +struct mlxsw_sp_mr_tcam_erif_list { + struct list_head erif_sublists; + u32 kvdl_index; +}; + +static bool +mlxsw_sp_mr_erif_sublist_full(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_mr_erif_sublist *erif_sublist) +{ + int erif_list_entries = MLXSW_CORE_RES_GET(mlxsw_sp->core, + MC_ERIF_LIST_ENTRIES); + + return erif_sublist->num_erifs == erif_list_entries; +} + +static void +mlxsw_sp_mr_erif_list_init(struct mlxsw_sp_mr_tcam_erif_list *erif_list) +{ + INIT_LIST_HEAD(&erif_list->erif_sublists); +} + +static struct mlxsw_sp_mr_erif_sublist * +mlxsw_sp_mr_erif_sublist_create(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_mr_tcam_erif_list *erif_list) +{ + struct mlxsw_sp_mr_erif_sublist *erif_sublist; + int err; + + erif_sublist = kzalloc(sizeof(*erif_sublist), GFP_KERNEL); + if (!erif_sublist) + return ERR_PTR(-ENOMEM); + err = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_MCRIGR, + 1, &erif_sublist->rigr2_kvdl_index); + if (err) { + kfree(erif_sublist); + return ERR_PTR(err); + } + + list_add_tail(&erif_sublist->list, &erif_list->erif_sublists); + return erif_sublist; +} + +static void +mlxsw_sp_mr_erif_sublist_destroy(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_mr_erif_sublist *erif_sublist) +{ + list_del(&erif_sublist->list); + mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_MCRIGR, + 1, erif_sublist->rigr2_kvdl_index); + kfree(erif_sublist); +} + +static int +mlxsw_sp_mr_erif_list_add(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_mr_tcam_erif_list *erif_list, + u16 erif_index) +{ + struct mlxsw_sp_mr_erif_sublist *sublist; + + /* If either there is no erif_entry or the last one is full, allocate a + * new one. + */ + if (list_empty(&erif_list->erif_sublists)) { + sublist = mlxsw_sp_mr_erif_sublist_create(mlxsw_sp, erif_list); + if (IS_ERR(sublist)) + return PTR_ERR(sublist); + erif_list->kvdl_index = sublist->rigr2_kvdl_index; + } else { + sublist = list_last_entry(&erif_list->erif_sublists, + struct mlxsw_sp_mr_erif_sublist, + list); + sublist->synced = false; + if (mlxsw_sp_mr_erif_sublist_full(mlxsw_sp, sublist)) { + sublist = mlxsw_sp_mr_erif_sublist_create(mlxsw_sp, + erif_list); + if (IS_ERR(sublist)) + return PTR_ERR(sublist); + } + } + + /* Add the eRIF to the last entry's last index */ + sublist->erif_indices[sublist->num_erifs++] = erif_index; + return 0; +} + +static void +mlxsw_sp_mr_erif_list_flush(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_mr_tcam_erif_list *erif_list) +{ + struct mlxsw_sp_mr_erif_sublist *erif_sublist, *tmp; + + list_for_each_entry_safe(erif_sublist, tmp, &erif_list->erif_sublists, + list) + mlxsw_sp_mr_erif_sublist_destroy(mlxsw_sp, erif_sublist); +} + +static int +mlxsw_sp_mr_erif_list_commit(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_mr_tcam_erif_list *erif_list) +{ + struct mlxsw_sp_mr_erif_sublist *curr_sublist; + char rigr2_pl[MLXSW_REG_RIGR2_LEN]; + int err; + int i; + + list_for_each_entry(curr_sublist, &erif_list->erif_sublists, list) { + if (curr_sublist->synced) + continue; + + /* If the sublist is not the last one, pack the next index */ + if (list_is_last(&curr_sublist->list, + &erif_list->erif_sublists)) { + mlxsw_reg_rigr2_pack(rigr2_pl, + curr_sublist->rigr2_kvdl_index, + false, 0); + } else { + struct mlxsw_sp_mr_erif_sublist *next_sublist; + + next_sublist = list_next_entry(curr_sublist, list); + mlxsw_reg_rigr2_pack(rigr2_pl, + curr_sublist->rigr2_kvdl_index, + true, + next_sublist->rigr2_kvdl_index); + } + + /* Pack all the erifs */ + for (i = 0; i < curr_sublist->num_erifs; i++) { + u16 erif_index = curr_sublist->erif_indices[i]; + + mlxsw_reg_rigr2_erif_entry_pack(rigr2_pl, i, true, + erif_index); + } + + /* Write the entry */ + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rigr2), + rigr2_pl); + if (err) + /* No need of a rollback here because this + * hardware entry should not be pointed yet. + */ + return err; + curr_sublist->synced = true; + } + return 0; +} + +static void mlxsw_sp_mr_erif_list_move(struct mlxsw_sp_mr_tcam_erif_list *to, + struct mlxsw_sp_mr_tcam_erif_list *from) +{ + list_splice(&from->erif_sublists, &to->erif_sublists); + to->kvdl_index = from->kvdl_index; +} + +struct mlxsw_sp_mr_tcam_route { + struct mlxsw_sp_mr_tcam_erif_list erif_list; + struct mlxsw_afa_block *afa_block; + u32 counter_index; + enum mlxsw_sp_mr_route_action action; + struct mlxsw_sp_mr_route_key key; + u16 irif_index; + u16 min_mtu; + void *priv; +}; + +static struct mlxsw_afa_block * +mlxsw_sp_mr_tcam_afa_block_create(struct mlxsw_sp *mlxsw_sp, + enum mlxsw_sp_mr_route_action route_action, + u16 irif_index, u32 counter_index, + u16 min_mtu, + struct mlxsw_sp_mr_tcam_erif_list *erif_list) +{ + struct mlxsw_afa_block *afa_block; + int err; + + afa_block = mlxsw_afa_block_create(mlxsw_sp->afa); + if (IS_ERR(afa_block)) + return afa_block; + + err = mlxsw_afa_block_append_allocated_counter(afa_block, + counter_index); + if (err) + goto err; + + switch (route_action) { + case MLXSW_SP_MR_ROUTE_ACTION_TRAP: + err = mlxsw_afa_block_append_trap(afa_block, + MLXSW_TRAP_ID_ACL1); + if (err) + goto err; + break; + case MLXSW_SP_MR_ROUTE_ACTION_TRAP_AND_FORWARD: + case MLXSW_SP_MR_ROUTE_ACTION_FORWARD: + /* If we are about to append a multicast router action, commit + * the erif_list. + */ + err = mlxsw_sp_mr_erif_list_commit(mlxsw_sp, erif_list); + if (err) + goto err; + + err = mlxsw_afa_block_append_mcrouter(afa_block, irif_index, + min_mtu, false, + erif_list->kvdl_index); + if (err) + goto err; + + if (route_action == MLXSW_SP_MR_ROUTE_ACTION_TRAP_AND_FORWARD) { + err = mlxsw_afa_block_append_trap_and_forward(afa_block, + MLXSW_TRAP_ID_ACL2); + if (err) + goto err; + } + break; + default: + err = -EINVAL; + goto err; + } + + err = mlxsw_afa_block_commit(afa_block); + if (err) + goto err; + return afa_block; +err: + mlxsw_afa_block_destroy(afa_block); + return ERR_PTR(err); +} + +static void +mlxsw_sp_mr_tcam_afa_block_destroy(struct mlxsw_afa_block *afa_block) +{ + mlxsw_afa_block_destroy(afa_block); +} + +static int +mlxsw_sp_mr_tcam_erif_populate(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_mr_tcam_erif_list *erif_list, + struct mlxsw_sp_mr_route_info *route_info) +{ + int err; + int i; + + for (i = 0; i < route_info->erif_num; i++) { + u16 erif_index = route_info->erif_indices[i]; + + err = mlxsw_sp_mr_erif_list_add(mlxsw_sp, erif_list, + erif_index); + if (err) + return err; + } + return 0; +} + +static int +mlxsw_sp_mr_tcam_route_create(struct mlxsw_sp *mlxsw_sp, void *priv, + void *route_priv, + struct mlxsw_sp_mr_route_params *route_params) +{ + const struct mlxsw_sp_mr_tcam_ops *ops = mlxsw_sp->mr_tcam_ops; + struct mlxsw_sp_mr_tcam_route *route = route_priv; + struct mlxsw_sp_mr_tcam *mr_tcam = priv; + int err; + + route->key = route_params->key; + route->irif_index = route_params->value.irif_index; + route->min_mtu = route_params->value.min_mtu; + route->action = route_params->value.route_action; + + /* Create the egress RIFs list */ + mlxsw_sp_mr_erif_list_init(&route->erif_list); + err = mlxsw_sp_mr_tcam_erif_populate(mlxsw_sp, &route->erif_list, + &route_params->value); + if (err) + goto err_erif_populate; + + /* Create the flow counter */ + err = mlxsw_sp_flow_counter_alloc(mlxsw_sp, &route->counter_index); + if (err) + goto err_counter_alloc; + + /* Create the flexible action block */ + route->afa_block = mlxsw_sp_mr_tcam_afa_block_create(mlxsw_sp, + route->action, + route->irif_index, + route->counter_index, + route->min_mtu, + &route->erif_list); + if (IS_ERR(route->afa_block)) { + err = PTR_ERR(route->afa_block); + goto err_afa_block_create; + } + + route->priv = kzalloc(ops->route_priv_size, GFP_KERNEL); + if (!route->priv) { + err = -ENOMEM; + goto err_route_priv_alloc; + } + + /* Write the route to the TCAM */ + err = ops->route_create(mlxsw_sp, mr_tcam->priv, route->priv, + &route->key, route->afa_block, + route_params->prio); + if (err) + goto err_route_create; + return 0; + +err_route_create: + kfree(route->priv); +err_route_priv_alloc: + mlxsw_sp_mr_tcam_afa_block_destroy(route->afa_block); +err_afa_block_create: + mlxsw_sp_flow_counter_free(mlxsw_sp, route->counter_index); +err_erif_populate: +err_counter_alloc: + mlxsw_sp_mr_erif_list_flush(mlxsw_sp, &route->erif_list); + return err; +} + +static void mlxsw_sp_mr_tcam_route_destroy(struct mlxsw_sp *mlxsw_sp, + void *priv, void *route_priv) +{ + const struct mlxsw_sp_mr_tcam_ops *ops = mlxsw_sp->mr_tcam_ops; + struct mlxsw_sp_mr_tcam_route *route = route_priv; + struct mlxsw_sp_mr_tcam *mr_tcam = priv; + + ops->route_destroy(mlxsw_sp, mr_tcam->priv, route->priv, &route->key); + kfree(route->priv); + mlxsw_sp_mr_tcam_afa_block_destroy(route->afa_block); + mlxsw_sp_flow_counter_free(mlxsw_sp, route->counter_index); + mlxsw_sp_mr_erif_list_flush(mlxsw_sp, &route->erif_list); +} + +static int mlxsw_sp_mr_tcam_route_stats(struct mlxsw_sp *mlxsw_sp, + void *route_priv, u64 *packets, + u64 *bytes) +{ + struct mlxsw_sp_mr_tcam_route *route = route_priv; + + return mlxsw_sp_flow_counter_get(mlxsw_sp, route->counter_index, + packets, bytes); +} + +static int +mlxsw_sp_mr_tcam_route_action_update(struct mlxsw_sp *mlxsw_sp, + void *route_priv, + enum mlxsw_sp_mr_route_action route_action) +{ + const struct mlxsw_sp_mr_tcam_ops *ops = mlxsw_sp->mr_tcam_ops; + struct mlxsw_sp_mr_tcam_route *route = route_priv; + struct mlxsw_afa_block *afa_block; + int err; + + /* Create a new flexible action block */ + afa_block = mlxsw_sp_mr_tcam_afa_block_create(mlxsw_sp, route_action, + route->irif_index, + route->counter_index, + route->min_mtu, + &route->erif_list); + if (IS_ERR(afa_block)) + return PTR_ERR(afa_block); + + /* Update the TCAM route entry */ + err = ops->route_update(mlxsw_sp, route->priv, &route->key, afa_block); + if (err) + goto err; + + /* Delete the old one */ + mlxsw_sp_mr_tcam_afa_block_destroy(route->afa_block); + route->afa_block = afa_block; + route->action = route_action; + return 0; +err: + mlxsw_sp_mr_tcam_afa_block_destroy(afa_block); + return err; +} + +static int mlxsw_sp_mr_tcam_route_min_mtu_update(struct mlxsw_sp *mlxsw_sp, + void *route_priv, u16 min_mtu) +{ + const struct mlxsw_sp_mr_tcam_ops *ops = mlxsw_sp->mr_tcam_ops; + struct mlxsw_sp_mr_tcam_route *route = route_priv; + struct mlxsw_afa_block *afa_block; + int err; + + /* Create a new flexible action block */ + afa_block = mlxsw_sp_mr_tcam_afa_block_create(mlxsw_sp, + route->action, + route->irif_index, + route->counter_index, + min_mtu, + &route->erif_list); + if (IS_ERR(afa_block)) + return PTR_ERR(afa_block); + + /* Update the TCAM route entry */ + err = ops->route_update(mlxsw_sp, route->priv, &route->key, afa_block); + if (err) + goto err; + + /* Delete the old one */ + mlxsw_sp_mr_tcam_afa_block_destroy(route->afa_block); + route->afa_block = afa_block; + route->min_mtu = min_mtu; + return 0; +err: + mlxsw_sp_mr_tcam_afa_block_destroy(afa_block); + return err; +} + +static int mlxsw_sp_mr_tcam_route_irif_update(struct mlxsw_sp *mlxsw_sp, + void *route_priv, u16 irif_index) +{ + struct mlxsw_sp_mr_tcam_route *route = route_priv; + + if (route->action != MLXSW_SP_MR_ROUTE_ACTION_TRAP) + return -EINVAL; + route->irif_index = irif_index; + return 0; +} + +static int mlxsw_sp_mr_tcam_route_erif_add(struct mlxsw_sp *mlxsw_sp, + void *route_priv, u16 erif_index) +{ + struct mlxsw_sp_mr_tcam_route *route = route_priv; + int err; + + err = mlxsw_sp_mr_erif_list_add(mlxsw_sp, &route->erif_list, + erif_index); + if (err) + return err; + + /* Commit the action only if the route action is not TRAP */ + if (route->action != MLXSW_SP_MR_ROUTE_ACTION_TRAP) + return mlxsw_sp_mr_erif_list_commit(mlxsw_sp, + &route->erif_list); + return 0; +} + +static int mlxsw_sp_mr_tcam_route_erif_del(struct mlxsw_sp *mlxsw_sp, + void *route_priv, u16 erif_index) +{ + const struct mlxsw_sp_mr_tcam_ops *ops = mlxsw_sp->mr_tcam_ops; + struct mlxsw_sp_mr_tcam_route *route = route_priv; + struct mlxsw_sp_mr_erif_sublist *erif_sublist; + struct mlxsw_sp_mr_tcam_erif_list erif_list; + struct mlxsw_afa_block *afa_block; + int err; + int i; + + /* Create a copy of the original erif_list without the deleted entry */ + mlxsw_sp_mr_erif_list_init(&erif_list); + list_for_each_entry(erif_sublist, &route->erif_list.erif_sublists, list) { + for (i = 0; i < erif_sublist->num_erifs; i++) { + u16 curr_erif = erif_sublist->erif_indices[i]; + + if (curr_erif == erif_index) + continue; + err = mlxsw_sp_mr_erif_list_add(mlxsw_sp, &erif_list, + curr_erif); + if (err) + goto err_erif_list_add; + } + } + + /* Create the flexible action block pointing to the new erif_list */ + afa_block = mlxsw_sp_mr_tcam_afa_block_create(mlxsw_sp, route->action, + route->irif_index, + route->counter_index, + route->min_mtu, + &erif_list); + if (IS_ERR(afa_block)) { + err = PTR_ERR(afa_block); + goto err_afa_block_create; + } + + /* Update the TCAM route entry */ + err = ops->route_update(mlxsw_sp, route->priv, &route->key, afa_block); + if (err) + goto err_route_write; + + mlxsw_sp_mr_tcam_afa_block_destroy(route->afa_block); + mlxsw_sp_mr_erif_list_flush(mlxsw_sp, &route->erif_list); + route->afa_block = afa_block; + mlxsw_sp_mr_erif_list_move(&route->erif_list, &erif_list); + return 0; + +err_route_write: + mlxsw_sp_mr_tcam_afa_block_destroy(afa_block); +err_afa_block_create: +err_erif_list_add: + mlxsw_sp_mr_erif_list_flush(mlxsw_sp, &erif_list); + return err; +} + +static int +mlxsw_sp_mr_tcam_route_update(struct mlxsw_sp *mlxsw_sp, void *route_priv, + struct mlxsw_sp_mr_route_info *route_info) +{ + const struct mlxsw_sp_mr_tcam_ops *ops = mlxsw_sp->mr_tcam_ops; + struct mlxsw_sp_mr_tcam_route *route = route_priv; + struct mlxsw_sp_mr_tcam_erif_list erif_list; + struct mlxsw_afa_block *afa_block; + int err; + + /* Create a new erif_list */ + mlxsw_sp_mr_erif_list_init(&erif_list); + err = mlxsw_sp_mr_tcam_erif_populate(mlxsw_sp, &erif_list, route_info); + if (err) + goto err_erif_populate; + + /* Create the flexible action block pointing to the new erif_list */ + afa_block = mlxsw_sp_mr_tcam_afa_block_create(mlxsw_sp, + route_info->route_action, + route_info->irif_index, + route->counter_index, + route_info->min_mtu, + &erif_list); + if (IS_ERR(afa_block)) { + err = PTR_ERR(afa_block); + goto err_afa_block_create; + } + + /* Update the TCAM route entry */ + err = ops->route_update(mlxsw_sp, route->priv, &route->key, afa_block); + if (err) + goto err_route_write; + + mlxsw_sp_mr_tcam_afa_block_destroy(route->afa_block); + mlxsw_sp_mr_erif_list_flush(mlxsw_sp, &route->erif_list); + route->afa_block = afa_block; + mlxsw_sp_mr_erif_list_move(&route->erif_list, &erif_list); + route->action = route_info->route_action; + route->irif_index = route_info->irif_index; + route->min_mtu = route_info->min_mtu; + return 0; + +err_route_write: + mlxsw_sp_mr_tcam_afa_block_destroy(afa_block); +err_afa_block_create: +err_erif_populate: + mlxsw_sp_mr_erif_list_flush(mlxsw_sp, &erif_list); + return err; +} + +static int mlxsw_sp_mr_tcam_init(struct mlxsw_sp *mlxsw_sp, void *priv) +{ + const struct mlxsw_sp_mr_tcam_ops *ops = mlxsw_sp->mr_tcam_ops; + struct mlxsw_sp_mr_tcam *mr_tcam = priv; + int err; + + if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MC_ERIF_LIST_ENTRIES)) + return -EIO; + + mr_tcam->priv = kzalloc(ops->priv_size, GFP_KERNEL); + if (!mr_tcam->priv) + return -ENOMEM; + + err = ops->init(mlxsw_sp, mr_tcam->priv); + if (err) + goto err_init; + return 0; + +err_init: + kfree(mr_tcam->priv); + return err; +} + +static void mlxsw_sp_mr_tcam_fini(struct mlxsw_sp *mlxsw_sp, void *priv) +{ + const struct mlxsw_sp_mr_tcam_ops *ops = mlxsw_sp->mr_tcam_ops; + struct mlxsw_sp_mr_tcam *mr_tcam = priv; + + ops->fini(mr_tcam->priv); + kfree(mr_tcam->priv); +} + +const struct mlxsw_sp_mr_ops mlxsw_sp_mr_tcam_ops = { + .priv_size = sizeof(struct mlxsw_sp_mr_tcam), + .route_priv_size = sizeof(struct mlxsw_sp_mr_tcam_route), + .init = mlxsw_sp_mr_tcam_init, + .route_create = mlxsw_sp_mr_tcam_route_create, + .route_update = mlxsw_sp_mr_tcam_route_update, + .route_stats = mlxsw_sp_mr_tcam_route_stats, + .route_action_update = mlxsw_sp_mr_tcam_route_action_update, + .route_min_mtu_update = mlxsw_sp_mr_tcam_route_min_mtu_update, + .route_irif_update = mlxsw_sp_mr_tcam_route_irif_update, + .route_erif_add = mlxsw_sp_mr_tcam_route_erif_add, + .route_erif_del = mlxsw_sp_mr_tcam_route_erif_del, + .route_destroy = mlxsw_sp_mr_tcam_route_destroy, + .fini = mlxsw_sp_mr_tcam_fini, +}; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr_tcam.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr_tcam.h new file mode 100644 index 000000000..3c84151b4 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr_tcam.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 */ +/* Copyright (c) 2017-2018 Mellanox Technologies. All rights reserved */ + +#ifndef _MLXSW_SPECTRUM_MCROUTER_TCAM_H +#define _MLXSW_SPECTRUM_MCROUTER_TCAM_H + +#include "spectrum.h" +#include "spectrum_mr.h" + +extern const struct mlxsw_sp_mr_ops mlxsw_sp_mr_tcam_ops; + +#endif diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.c new file mode 100644 index 000000000..d2b57a045 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.c @@ -0,0 +1,1173 @@ +// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 +/* Copyright (c) 2018 Mellanox Technologies. All rights reserved */ + +#include <linux/err.h> +#include <linux/gfp.h> +#include <linux/kernel.h> +#include <linux/list.h> +#include <linux/netlink.h> +#include <linux/rtnetlink.h> +#include <linux/slab.h> +#include <net/inet_ecn.h> +#include <net/ipv6.h> + +#include "reg.h" +#include "spectrum.h" +#include "spectrum_nve.h" + +const struct mlxsw_sp_nve_ops *mlxsw_sp1_nve_ops_arr[] = { + [MLXSW_SP_NVE_TYPE_VXLAN] = &mlxsw_sp1_nve_vxlan_ops, +}; + +const struct mlxsw_sp_nve_ops *mlxsw_sp2_nve_ops_arr[] = { + [MLXSW_SP_NVE_TYPE_VXLAN] = &mlxsw_sp2_nve_vxlan_ops, +}; + +struct mlxsw_sp_nve_mc_entry; +struct mlxsw_sp_nve_mc_record; +struct mlxsw_sp_nve_mc_list; + +struct mlxsw_sp_nve_mc_record_ops { + enum mlxsw_reg_tnumt_record_type type; + int (*entry_add)(struct mlxsw_sp_nve_mc_record *mc_record, + struct mlxsw_sp_nve_mc_entry *mc_entry, + const union mlxsw_sp_l3addr *addr); + void (*entry_del)(const struct mlxsw_sp_nve_mc_record *mc_record, + const struct mlxsw_sp_nve_mc_entry *mc_entry); + void (*entry_set)(const struct mlxsw_sp_nve_mc_record *mc_record, + const struct mlxsw_sp_nve_mc_entry *mc_entry, + char *tnumt_pl, unsigned int entry_index); + bool (*entry_compare)(const struct mlxsw_sp_nve_mc_record *mc_record, + const struct mlxsw_sp_nve_mc_entry *mc_entry, + const union mlxsw_sp_l3addr *addr); +}; + +struct mlxsw_sp_nve_mc_list_key { + u16 fid_index; +}; + +struct mlxsw_sp_nve_mc_ipv6_entry { + struct in6_addr addr6; + u32 addr6_kvdl_index; +}; + +struct mlxsw_sp_nve_mc_entry { + union { + __be32 addr4; + struct mlxsw_sp_nve_mc_ipv6_entry ipv6_entry; + }; + u8 valid:1; +}; + +struct mlxsw_sp_nve_mc_record { + struct list_head list; + enum mlxsw_sp_l3proto proto; + unsigned int num_entries; + struct mlxsw_sp *mlxsw_sp; + struct mlxsw_sp_nve_mc_list *mc_list; + const struct mlxsw_sp_nve_mc_record_ops *ops; + u32 kvdl_index; + struct mlxsw_sp_nve_mc_entry entries[]; +}; + +struct mlxsw_sp_nve_mc_list { + struct list_head records_list; + struct rhash_head ht_node; + struct mlxsw_sp_nve_mc_list_key key; +}; + +static const struct rhashtable_params mlxsw_sp_nve_mc_list_ht_params = { + .key_len = sizeof(struct mlxsw_sp_nve_mc_list_key), + .key_offset = offsetof(struct mlxsw_sp_nve_mc_list, key), + .head_offset = offsetof(struct mlxsw_sp_nve_mc_list, ht_node), +}; + +static int +mlxsw_sp_nve_mc_record_ipv4_entry_add(struct mlxsw_sp_nve_mc_record *mc_record, + struct mlxsw_sp_nve_mc_entry *mc_entry, + const union mlxsw_sp_l3addr *addr) +{ + mc_entry->addr4 = addr->addr4; + + return 0; +} + +static void +mlxsw_sp_nve_mc_record_ipv4_entry_del(const struct mlxsw_sp_nve_mc_record *mc_record, + const struct mlxsw_sp_nve_mc_entry *mc_entry) +{ +} + +static void +mlxsw_sp_nve_mc_record_ipv4_entry_set(const struct mlxsw_sp_nve_mc_record *mc_record, + const struct mlxsw_sp_nve_mc_entry *mc_entry, + char *tnumt_pl, unsigned int entry_index) +{ + u32 udip = be32_to_cpu(mc_entry->addr4); + + mlxsw_reg_tnumt_udip_set(tnumt_pl, entry_index, udip); +} + +static bool +mlxsw_sp_nve_mc_record_ipv4_entry_compare(const struct mlxsw_sp_nve_mc_record *mc_record, + const struct mlxsw_sp_nve_mc_entry *mc_entry, + const union mlxsw_sp_l3addr *addr) +{ + return mc_entry->addr4 == addr->addr4; +} + +static const struct mlxsw_sp_nve_mc_record_ops +mlxsw_sp_nve_mc_record_ipv4_ops = { + .type = MLXSW_REG_TNUMT_RECORD_TYPE_IPV4, + .entry_add = &mlxsw_sp_nve_mc_record_ipv4_entry_add, + .entry_del = &mlxsw_sp_nve_mc_record_ipv4_entry_del, + .entry_set = &mlxsw_sp_nve_mc_record_ipv4_entry_set, + .entry_compare = &mlxsw_sp_nve_mc_record_ipv4_entry_compare, +}; + +static int +mlxsw_sp_nve_mc_record_ipv6_entry_add(struct mlxsw_sp_nve_mc_record *mc_record, + struct mlxsw_sp_nve_mc_entry *mc_entry, + const union mlxsw_sp_l3addr *addr) +{ + u32 kvdl_index; + int err; + + err = mlxsw_sp_ipv6_addr_kvdl_index_get(mc_record->mlxsw_sp, + &addr->addr6, &kvdl_index); + if (err) + return err; + + mc_entry->ipv6_entry.addr6 = addr->addr6; + mc_entry->ipv6_entry.addr6_kvdl_index = kvdl_index; + return 0; +} + +static void +mlxsw_sp_nve_mc_record_ipv6_entry_del(const struct mlxsw_sp_nve_mc_record *mc_record, + const struct mlxsw_sp_nve_mc_entry *mc_entry) +{ + mlxsw_sp_ipv6_addr_put(mc_record->mlxsw_sp, + &mc_entry->ipv6_entry.addr6); +} + +static void +mlxsw_sp_nve_mc_record_ipv6_entry_set(const struct mlxsw_sp_nve_mc_record *mc_record, + const struct mlxsw_sp_nve_mc_entry *mc_entry, + char *tnumt_pl, unsigned int entry_index) +{ + u32 udip_ptr = mc_entry->ipv6_entry.addr6_kvdl_index; + + mlxsw_reg_tnumt_udip_ptr_set(tnumt_pl, entry_index, udip_ptr); +} + +static bool +mlxsw_sp_nve_mc_record_ipv6_entry_compare(const struct mlxsw_sp_nve_mc_record *mc_record, + const struct mlxsw_sp_nve_mc_entry *mc_entry, + const union mlxsw_sp_l3addr *addr) +{ + return ipv6_addr_equal(&mc_entry->ipv6_entry.addr6, &addr->addr6); +} + +static const struct mlxsw_sp_nve_mc_record_ops +mlxsw_sp_nve_mc_record_ipv6_ops = { + .type = MLXSW_REG_TNUMT_RECORD_TYPE_IPV6, + .entry_add = &mlxsw_sp_nve_mc_record_ipv6_entry_add, + .entry_del = &mlxsw_sp_nve_mc_record_ipv6_entry_del, + .entry_set = &mlxsw_sp_nve_mc_record_ipv6_entry_set, + .entry_compare = &mlxsw_sp_nve_mc_record_ipv6_entry_compare, +}; + +static const struct mlxsw_sp_nve_mc_record_ops * +mlxsw_sp_nve_mc_record_ops_arr[] = { + [MLXSW_SP_L3_PROTO_IPV4] = &mlxsw_sp_nve_mc_record_ipv4_ops, + [MLXSW_SP_L3_PROTO_IPV6] = &mlxsw_sp_nve_mc_record_ipv6_ops, +}; + +int mlxsw_sp_nve_learned_ip_resolve(struct mlxsw_sp *mlxsw_sp, u32 uip, + enum mlxsw_sp_l3proto proto, + union mlxsw_sp_l3addr *addr) +{ + switch (proto) { + case MLXSW_SP_L3_PROTO_IPV4: + addr->addr4 = cpu_to_be32(uip); + return 0; + default: + WARN_ON(1); + return -EINVAL; + } +} + +static struct mlxsw_sp_nve_mc_list * +mlxsw_sp_nve_mc_list_find(struct mlxsw_sp *mlxsw_sp, + const struct mlxsw_sp_nve_mc_list_key *key) +{ + struct mlxsw_sp_nve *nve = mlxsw_sp->nve; + + return rhashtable_lookup_fast(&nve->mc_list_ht, key, + mlxsw_sp_nve_mc_list_ht_params); +} + +static struct mlxsw_sp_nve_mc_list * +mlxsw_sp_nve_mc_list_create(struct mlxsw_sp *mlxsw_sp, + const struct mlxsw_sp_nve_mc_list_key *key) +{ + struct mlxsw_sp_nve *nve = mlxsw_sp->nve; + struct mlxsw_sp_nve_mc_list *mc_list; + int err; + + mc_list = kmalloc(sizeof(*mc_list), GFP_KERNEL); + if (!mc_list) + return ERR_PTR(-ENOMEM); + + INIT_LIST_HEAD(&mc_list->records_list); + mc_list->key = *key; + + err = rhashtable_insert_fast(&nve->mc_list_ht, &mc_list->ht_node, + mlxsw_sp_nve_mc_list_ht_params); + if (err) + goto err_rhashtable_insert; + + return mc_list; + +err_rhashtable_insert: + kfree(mc_list); + return ERR_PTR(err); +} + +static void mlxsw_sp_nve_mc_list_destroy(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nve_mc_list *mc_list) +{ + struct mlxsw_sp_nve *nve = mlxsw_sp->nve; + + rhashtable_remove_fast(&nve->mc_list_ht, &mc_list->ht_node, + mlxsw_sp_nve_mc_list_ht_params); + WARN_ON(!list_empty(&mc_list->records_list)); + kfree(mc_list); +} + +static struct mlxsw_sp_nve_mc_list * +mlxsw_sp_nve_mc_list_get(struct mlxsw_sp *mlxsw_sp, + const struct mlxsw_sp_nve_mc_list_key *key) +{ + struct mlxsw_sp_nve_mc_list *mc_list; + + mc_list = mlxsw_sp_nve_mc_list_find(mlxsw_sp, key); + if (mc_list) + return mc_list; + + return mlxsw_sp_nve_mc_list_create(mlxsw_sp, key); +} + +static void +mlxsw_sp_nve_mc_list_put(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nve_mc_list *mc_list) +{ + if (!list_empty(&mc_list->records_list)) + return; + mlxsw_sp_nve_mc_list_destroy(mlxsw_sp, mc_list); +} + +static struct mlxsw_sp_nve_mc_record * +mlxsw_sp_nve_mc_record_create(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nve_mc_list *mc_list, + enum mlxsw_sp_l3proto proto) +{ + unsigned int num_max_entries = mlxsw_sp->nve->num_max_mc_entries[proto]; + struct mlxsw_sp_nve_mc_record *mc_record; + int err; + + mc_record = kzalloc(struct_size(mc_record, entries, num_max_entries), + GFP_KERNEL); + if (!mc_record) + return ERR_PTR(-ENOMEM); + + err = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_TNUMT, 1, + &mc_record->kvdl_index); + if (err) + goto err_kvdl_alloc; + + mc_record->ops = mlxsw_sp_nve_mc_record_ops_arr[proto]; + mc_record->mlxsw_sp = mlxsw_sp; + mc_record->mc_list = mc_list; + mc_record->proto = proto; + list_add_tail(&mc_record->list, &mc_list->records_list); + + return mc_record; + +err_kvdl_alloc: + kfree(mc_record); + return ERR_PTR(err); +} + +static void +mlxsw_sp_nve_mc_record_destroy(struct mlxsw_sp_nve_mc_record *mc_record) +{ + struct mlxsw_sp *mlxsw_sp = mc_record->mlxsw_sp; + + list_del(&mc_record->list); + mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_TNUMT, 1, + mc_record->kvdl_index); + WARN_ON(mc_record->num_entries); + kfree(mc_record); +} + +static struct mlxsw_sp_nve_mc_record * +mlxsw_sp_nve_mc_record_get(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nve_mc_list *mc_list, + enum mlxsw_sp_l3proto proto) +{ + struct mlxsw_sp_nve_mc_record *mc_record; + + list_for_each_entry_reverse(mc_record, &mc_list->records_list, list) { + unsigned int num_entries = mc_record->num_entries; + struct mlxsw_sp_nve *nve = mlxsw_sp->nve; + + if (mc_record->proto == proto && + num_entries < nve->num_max_mc_entries[proto]) + return mc_record; + } + + return mlxsw_sp_nve_mc_record_create(mlxsw_sp, mc_list, proto); +} + +static void +mlxsw_sp_nve_mc_record_put(struct mlxsw_sp_nve_mc_record *mc_record) +{ + if (mc_record->num_entries != 0) + return; + + mlxsw_sp_nve_mc_record_destroy(mc_record); +} + +static struct mlxsw_sp_nve_mc_entry * +mlxsw_sp_nve_mc_free_entry_find(struct mlxsw_sp_nve_mc_record *mc_record) +{ + struct mlxsw_sp_nve *nve = mc_record->mlxsw_sp->nve; + unsigned int num_max_entries; + int i; + + num_max_entries = nve->num_max_mc_entries[mc_record->proto]; + for (i = 0; i < num_max_entries; i++) { + if (mc_record->entries[i].valid) + continue; + return &mc_record->entries[i]; + } + + return NULL; +} + +static int +mlxsw_sp_nve_mc_record_refresh(struct mlxsw_sp_nve_mc_record *mc_record) +{ + enum mlxsw_reg_tnumt_record_type type = mc_record->ops->type; + struct mlxsw_sp_nve_mc_list *mc_list = mc_record->mc_list; + struct mlxsw_sp *mlxsw_sp = mc_record->mlxsw_sp; + char tnumt_pl[MLXSW_REG_TNUMT_LEN]; + unsigned int num_max_entries; + unsigned int num_entries = 0; + u32 next_kvdl_index = 0; + bool next_valid = false; + int i; + + if (!list_is_last(&mc_record->list, &mc_list->records_list)) { + struct mlxsw_sp_nve_mc_record *next_record; + + next_record = list_next_entry(mc_record, list); + next_kvdl_index = next_record->kvdl_index; + next_valid = true; + } + + mlxsw_reg_tnumt_pack(tnumt_pl, type, MLXSW_REG_TUNNEL_PORT_NVE, + mc_record->kvdl_index, next_valid, + next_kvdl_index, mc_record->num_entries); + + num_max_entries = mlxsw_sp->nve->num_max_mc_entries[mc_record->proto]; + for (i = 0; i < num_max_entries; i++) { + struct mlxsw_sp_nve_mc_entry *mc_entry; + + mc_entry = &mc_record->entries[i]; + if (!mc_entry->valid) + continue; + mc_record->ops->entry_set(mc_record, mc_entry, tnumt_pl, + num_entries++); + } + + WARN_ON(num_entries != mc_record->num_entries); + + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(tnumt), tnumt_pl); +} + +static bool +mlxsw_sp_nve_mc_record_is_first(struct mlxsw_sp_nve_mc_record *mc_record) +{ + struct mlxsw_sp_nve_mc_list *mc_list = mc_record->mc_list; + struct mlxsw_sp_nve_mc_record *first_record; + + first_record = list_first_entry(&mc_list->records_list, + struct mlxsw_sp_nve_mc_record, list); + + return mc_record == first_record; +} + +static struct mlxsw_sp_nve_mc_entry * +mlxsw_sp_nve_mc_entry_find(struct mlxsw_sp_nve_mc_record *mc_record, + union mlxsw_sp_l3addr *addr) +{ + struct mlxsw_sp_nve *nve = mc_record->mlxsw_sp->nve; + unsigned int num_max_entries; + int i; + + num_max_entries = nve->num_max_mc_entries[mc_record->proto]; + for (i = 0; i < num_max_entries; i++) { + struct mlxsw_sp_nve_mc_entry *mc_entry; + + mc_entry = &mc_record->entries[i]; + if (!mc_entry->valid) + continue; + if (mc_record->ops->entry_compare(mc_record, mc_entry, addr)) + return mc_entry; + } + + return NULL; +} + +static int +mlxsw_sp_nve_mc_record_ip_add(struct mlxsw_sp_nve_mc_record *mc_record, + union mlxsw_sp_l3addr *addr) +{ + struct mlxsw_sp_nve_mc_entry *mc_entry = NULL; + int err; + + mc_entry = mlxsw_sp_nve_mc_free_entry_find(mc_record); + if (WARN_ON(!mc_entry)) + return -EINVAL; + + err = mc_record->ops->entry_add(mc_record, mc_entry, addr); + if (err) + return err; + mc_record->num_entries++; + mc_entry->valid = true; + + err = mlxsw_sp_nve_mc_record_refresh(mc_record); + if (err) + goto err_record_refresh; + + /* If this is a new record and not the first one, then we need to + * update the next pointer of the previous entry + */ + if (mc_record->num_entries != 1 || + mlxsw_sp_nve_mc_record_is_first(mc_record)) + return 0; + + err = mlxsw_sp_nve_mc_record_refresh(list_prev_entry(mc_record, list)); + if (err) + goto err_prev_record_refresh; + + return 0; + +err_prev_record_refresh: +err_record_refresh: + mc_entry->valid = false; + mc_record->num_entries--; + mc_record->ops->entry_del(mc_record, mc_entry); + return err; +} + +static void +mlxsw_sp_nve_mc_record_entry_del(struct mlxsw_sp_nve_mc_record *mc_record, + struct mlxsw_sp_nve_mc_entry *mc_entry) +{ + struct mlxsw_sp_nve_mc_list *mc_list = mc_record->mc_list; + + mc_entry->valid = false; + mc_record->num_entries--; + + /* When the record continues to exist we only need to invalidate + * the requested entry + */ + if (mc_record->num_entries != 0) { + mlxsw_sp_nve_mc_record_refresh(mc_record); + mc_record->ops->entry_del(mc_record, mc_entry); + return; + } + + /* If the record needs to be deleted, but it is not the first, + * then we need to make sure that the previous record no longer + * points to it. Remove deleted record from the list to reflect + * that and then re-add it at the end, so that it could be + * properly removed by the record destruction code + */ + if (!mlxsw_sp_nve_mc_record_is_first(mc_record)) { + struct mlxsw_sp_nve_mc_record *prev_record; + + prev_record = list_prev_entry(mc_record, list); + list_del(&mc_record->list); + mlxsw_sp_nve_mc_record_refresh(prev_record); + list_add_tail(&mc_record->list, &mc_list->records_list); + mc_record->ops->entry_del(mc_record, mc_entry); + return; + } + + /* If the first record needs to be deleted, but the list is not + * singular, then the second record needs to be written in the + * first record's address, as this address is stored as a property + * of the FID + */ + if (mlxsw_sp_nve_mc_record_is_first(mc_record) && + !list_is_singular(&mc_list->records_list)) { + struct mlxsw_sp_nve_mc_record *next_record; + + next_record = list_next_entry(mc_record, list); + swap(mc_record->kvdl_index, next_record->kvdl_index); + mlxsw_sp_nve_mc_record_refresh(next_record); + mc_record->ops->entry_del(mc_record, mc_entry); + return; + } + + /* This is the last case where the last remaining record needs to + * be deleted. Simply delete the entry + */ + mc_record->ops->entry_del(mc_record, mc_entry); +} + +static struct mlxsw_sp_nve_mc_record * +mlxsw_sp_nve_mc_record_find(struct mlxsw_sp_nve_mc_list *mc_list, + enum mlxsw_sp_l3proto proto, + union mlxsw_sp_l3addr *addr, + struct mlxsw_sp_nve_mc_entry **mc_entry) +{ + struct mlxsw_sp_nve_mc_record *mc_record; + + list_for_each_entry(mc_record, &mc_list->records_list, list) { + if (mc_record->proto != proto) + continue; + + *mc_entry = mlxsw_sp_nve_mc_entry_find(mc_record, addr); + if (*mc_entry) + return mc_record; + } + + return NULL; +} + +static int mlxsw_sp_nve_mc_list_ip_add(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nve_mc_list *mc_list, + enum mlxsw_sp_l3proto proto, + union mlxsw_sp_l3addr *addr) +{ + struct mlxsw_sp_nve_mc_record *mc_record; + int err; + + mc_record = mlxsw_sp_nve_mc_record_get(mlxsw_sp, mc_list, proto); + if (IS_ERR(mc_record)) + return PTR_ERR(mc_record); + + err = mlxsw_sp_nve_mc_record_ip_add(mc_record, addr); + if (err) + goto err_ip_add; + + return 0; + +err_ip_add: + mlxsw_sp_nve_mc_record_put(mc_record); + return err; +} + +static void mlxsw_sp_nve_mc_list_ip_del(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nve_mc_list *mc_list, + enum mlxsw_sp_l3proto proto, + union mlxsw_sp_l3addr *addr) +{ + struct mlxsw_sp_nve_mc_record *mc_record; + struct mlxsw_sp_nve_mc_entry *mc_entry; + + mc_record = mlxsw_sp_nve_mc_record_find(mc_list, proto, addr, + &mc_entry); + if (!mc_record) + return; + + mlxsw_sp_nve_mc_record_entry_del(mc_record, mc_entry); + mlxsw_sp_nve_mc_record_put(mc_record); +} + +static int +mlxsw_sp_nve_fid_flood_index_set(struct mlxsw_sp_fid *fid, + struct mlxsw_sp_nve_mc_list *mc_list) +{ + struct mlxsw_sp_nve_mc_record *mc_record; + + /* The address of the first record in the list is a property of + * the FID and we never change it. It only needs to be set when + * a new list is created + */ + if (mlxsw_sp_fid_nve_flood_index_is_set(fid)) + return 0; + + mc_record = list_first_entry(&mc_list->records_list, + struct mlxsw_sp_nve_mc_record, list); + + return mlxsw_sp_fid_nve_flood_index_set(fid, mc_record->kvdl_index); +} + +static void +mlxsw_sp_nve_fid_flood_index_clear(struct mlxsw_sp_fid *fid, + struct mlxsw_sp_nve_mc_list *mc_list) +{ + struct mlxsw_sp_nve_mc_record *mc_record; + + /* The address of the first record needs to be invalidated only when + * the last record is about to be removed + */ + if (!list_is_singular(&mc_list->records_list)) + return; + + mc_record = list_first_entry(&mc_list->records_list, + struct mlxsw_sp_nve_mc_record, list); + if (mc_record->num_entries != 1) + return; + + return mlxsw_sp_fid_nve_flood_index_clear(fid); +} + +int mlxsw_sp_nve_flood_ip_add(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fid *fid, + enum mlxsw_sp_l3proto proto, + union mlxsw_sp_l3addr *addr) +{ + struct mlxsw_sp_nve_mc_list_key key = { 0 }; + struct mlxsw_sp_nve_mc_list *mc_list; + int err; + + key.fid_index = mlxsw_sp_fid_index(fid); + mc_list = mlxsw_sp_nve_mc_list_get(mlxsw_sp, &key); + if (IS_ERR(mc_list)) + return PTR_ERR(mc_list); + + err = mlxsw_sp_nve_mc_list_ip_add(mlxsw_sp, mc_list, proto, addr); + if (err) + goto err_add_ip; + + err = mlxsw_sp_nve_fid_flood_index_set(fid, mc_list); + if (err) + goto err_fid_flood_index_set; + + return 0; + +err_fid_flood_index_set: + mlxsw_sp_nve_mc_list_ip_del(mlxsw_sp, mc_list, proto, addr); +err_add_ip: + mlxsw_sp_nve_mc_list_put(mlxsw_sp, mc_list); + return err; +} + +void mlxsw_sp_nve_flood_ip_del(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fid *fid, + enum mlxsw_sp_l3proto proto, + union mlxsw_sp_l3addr *addr) +{ + struct mlxsw_sp_nve_mc_list_key key = { 0 }; + struct mlxsw_sp_nve_mc_list *mc_list; + + key.fid_index = mlxsw_sp_fid_index(fid); + mc_list = mlxsw_sp_nve_mc_list_find(mlxsw_sp, &key); + if (!mc_list) + return; + + mlxsw_sp_nve_fid_flood_index_clear(fid, mc_list); + mlxsw_sp_nve_mc_list_ip_del(mlxsw_sp, mc_list, proto, addr); + mlxsw_sp_nve_mc_list_put(mlxsw_sp, mc_list); +} + +static void +mlxsw_sp_nve_mc_record_delete(struct mlxsw_sp_nve_mc_record *mc_record) +{ + struct mlxsw_sp_nve *nve = mc_record->mlxsw_sp->nve; + unsigned int num_max_entries; + int i; + + num_max_entries = nve->num_max_mc_entries[mc_record->proto]; + for (i = 0; i < num_max_entries; i++) { + struct mlxsw_sp_nve_mc_entry *mc_entry = &mc_record->entries[i]; + + if (!mc_entry->valid) + continue; + mlxsw_sp_nve_mc_record_entry_del(mc_record, mc_entry); + } + + WARN_ON(mc_record->num_entries); + mlxsw_sp_nve_mc_record_put(mc_record); +} + +static void mlxsw_sp_nve_flood_ip_flush(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fid *fid) +{ + struct mlxsw_sp_nve_mc_record *mc_record, *tmp; + struct mlxsw_sp_nve_mc_list_key key = { 0 }; + struct mlxsw_sp_nve_mc_list *mc_list; + + if (!mlxsw_sp_fid_nve_flood_index_is_set(fid)) + return; + + mlxsw_sp_fid_nve_flood_index_clear(fid); + + key.fid_index = mlxsw_sp_fid_index(fid); + mc_list = mlxsw_sp_nve_mc_list_find(mlxsw_sp, &key); + if (WARN_ON(!mc_list)) + return; + + list_for_each_entry_safe(mc_record, tmp, &mc_list->records_list, list) + mlxsw_sp_nve_mc_record_delete(mc_record); + + WARN_ON(!list_empty(&mc_list->records_list)); + mlxsw_sp_nve_mc_list_put(mlxsw_sp, mc_list); +} + +static int mlxsw_sp_nve_tunnel_init(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nve_config *config) +{ + struct mlxsw_sp_nve *nve = mlxsw_sp->nve; + const struct mlxsw_sp_nve_ops *ops; + int err; + + if (nve->num_nve_tunnels++ != 0) + return 0; + + nve->config = *config; + + err = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ, 1, + &nve->tunnel_index); + if (err) + goto err_kvdl_alloc; + + ops = nve->nve_ops_arr[config->type]; + err = ops->init(nve, config); + if (err) + goto err_ops_init; + + return 0; + +err_ops_init: + mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ, 1, + nve->tunnel_index); +err_kvdl_alloc: + memset(&nve->config, 0, sizeof(nve->config)); + nve->num_nve_tunnels--; + return err; +} + +static void mlxsw_sp_nve_tunnel_fini(struct mlxsw_sp *mlxsw_sp) +{ + struct mlxsw_sp_nve *nve = mlxsw_sp->nve; + const struct mlxsw_sp_nve_ops *ops; + + ops = nve->nve_ops_arr[nve->config.type]; + + if (mlxsw_sp->nve->num_nve_tunnels == 1) { + ops->fini(nve); + mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ, 1, + nve->tunnel_index); + memset(&nve->config, 0, sizeof(nve->config)); + } + nve->num_nve_tunnels--; +} + +static void mlxsw_sp_nve_fdb_flush_by_fid(struct mlxsw_sp *mlxsw_sp, + u16 fid_index) +{ + char sfdf_pl[MLXSW_REG_SFDF_LEN]; + + mlxsw_reg_sfdf_pack(sfdf_pl, MLXSW_REG_SFDF_FLUSH_PER_NVE_AND_FID); + mlxsw_reg_sfdf_fid_set(sfdf_pl, fid_index); + mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfdf), sfdf_pl); +} + +static void mlxsw_sp_nve_fdb_clear_offload(struct mlxsw_sp *mlxsw_sp, + const struct mlxsw_sp_fid *fid, + const struct net_device *nve_dev, + __be32 vni) +{ + const struct mlxsw_sp_nve_ops *ops; + enum mlxsw_sp_nve_type type; + + if (WARN_ON(mlxsw_sp_fid_nve_type(fid, &type))) + return; + + ops = mlxsw_sp->nve->nve_ops_arr[type]; + ops->fdb_clear_offload(nve_dev, vni); +} + +struct mlxsw_sp_nve_ipv6_ht_key { + u8 mac[ETH_ALEN]; + u16 fid_index; +}; + +struct mlxsw_sp_nve_ipv6_ht_node { + struct rhash_head ht_node; + struct list_head list; + struct mlxsw_sp_nve_ipv6_ht_key key; + struct in6_addr addr6; +}; + +static const struct rhashtable_params mlxsw_sp_nve_ipv6_ht_params = { + .key_len = sizeof(struct mlxsw_sp_nve_ipv6_ht_key), + .key_offset = offsetof(struct mlxsw_sp_nve_ipv6_ht_node, key), + .head_offset = offsetof(struct mlxsw_sp_nve_ipv6_ht_node, ht_node), +}; + +int mlxsw_sp_nve_ipv6_addr_kvdl_set(struct mlxsw_sp *mlxsw_sp, + const struct in6_addr *addr6, + u32 *p_kvdl_index) +{ + return mlxsw_sp_ipv6_addr_kvdl_index_get(mlxsw_sp, addr6, p_kvdl_index); +} + +void mlxsw_sp_nve_ipv6_addr_kvdl_unset(struct mlxsw_sp *mlxsw_sp, + const struct in6_addr *addr6) +{ + mlxsw_sp_ipv6_addr_put(mlxsw_sp, addr6); +} + +static struct mlxsw_sp_nve_ipv6_ht_node * +mlxsw_sp_nve_ipv6_ht_node_lookup(struct mlxsw_sp *mlxsw_sp, const char *mac, + u16 fid_index) +{ + struct mlxsw_sp_nve_ipv6_ht_key key = {}; + + ether_addr_copy(key.mac, mac); + key.fid_index = fid_index; + return rhashtable_lookup_fast(&mlxsw_sp->nve->ipv6_ht, &key, + mlxsw_sp_nve_ipv6_ht_params); +} + +static int mlxsw_sp_nve_ipv6_ht_insert(struct mlxsw_sp *mlxsw_sp, + const char *mac, u16 fid_index, + const struct in6_addr *addr6) +{ + struct mlxsw_sp_nve_ipv6_ht_node *ipv6_ht_node; + struct mlxsw_sp_nve *nve = mlxsw_sp->nve; + int err; + + ipv6_ht_node = kzalloc(sizeof(*ipv6_ht_node), GFP_KERNEL); + if (!ipv6_ht_node) + return -ENOMEM; + + ether_addr_copy(ipv6_ht_node->key.mac, mac); + ipv6_ht_node->key.fid_index = fid_index; + ipv6_ht_node->addr6 = *addr6; + + err = rhashtable_insert_fast(&nve->ipv6_ht, &ipv6_ht_node->ht_node, + mlxsw_sp_nve_ipv6_ht_params); + if (err) + goto err_rhashtable_insert; + + list_add(&ipv6_ht_node->list, &nve->ipv6_addr_list); + + return 0; + +err_rhashtable_insert: + kfree(ipv6_ht_node); + return err; +} + +static void +mlxsw_sp_nve_ipv6_ht_remove(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nve_ipv6_ht_node *ipv6_ht_node) +{ + struct mlxsw_sp_nve *nve = mlxsw_sp->nve; + + list_del(&ipv6_ht_node->list); + rhashtable_remove_fast(&nve->ipv6_ht, &ipv6_ht_node->ht_node, + mlxsw_sp_nve_ipv6_ht_params); + kfree(ipv6_ht_node); +} + +int +mlxsw_sp_nve_ipv6_addr_map_replace(struct mlxsw_sp *mlxsw_sp, const char *mac, + u16 fid_index, + const struct in6_addr *new_addr6) +{ + struct mlxsw_sp_nve_ipv6_ht_node *ipv6_ht_node; + + ASSERT_RTNL(); + + ipv6_ht_node = mlxsw_sp_nve_ipv6_ht_node_lookup(mlxsw_sp, mac, + fid_index); + if (!ipv6_ht_node) + return mlxsw_sp_nve_ipv6_ht_insert(mlxsw_sp, mac, fid_index, + new_addr6); + + mlxsw_sp_ipv6_addr_put(mlxsw_sp, &ipv6_ht_node->addr6); + ipv6_ht_node->addr6 = *new_addr6; + return 0; +} + +void mlxsw_sp_nve_ipv6_addr_map_del(struct mlxsw_sp *mlxsw_sp, const char *mac, + u16 fid_index) +{ + struct mlxsw_sp_nve_ipv6_ht_node *ipv6_ht_node; + + ASSERT_RTNL(); + + ipv6_ht_node = mlxsw_sp_nve_ipv6_ht_node_lookup(mlxsw_sp, mac, + fid_index); + if (WARN_ON(!ipv6_ht_node)) + return; + + mlxsw_sp_nve_ipv6_ht_remove(mlxsw_sp, ipv6_ht_node); +} + +static void mlxsw_sp_nve_ipv6_addr_flush_by_fid(struct mlxsw_sp *mlxsw_sp, + u16 fid_index) +{ + struct mlxsw_sp_nve_ipv6_ht_node *ipv6_ht_node, *tmp; + struct mlxsw_sp_nve *nve = mlxsw_sp->nve; + + list_for_each_entry_safe(ipv6_ht_node, tmp, &nve->ipv6_addr_list, + list) { + if (ipv6_ht_node->key.fid_index != fid_index) + continue; + + mlxsw_sp_ipv6_addr_put(mlxsw_sp, &ipv6_ht_node->addr6); + mlxsw_sp_nve_ipv6_ht_remove(mlxsw_sp, ipv6_ht_node); + } +} + +int mlxsw_sp_nve_fid_enable(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fid *fid, + struct mlxsw_sp_nve_params *params, + struct netlink_ext_ack *extack) +{ + struct mlxsw_sp_nve *nve = mlxsw_sp->nve; + const struct mlxsw_sp_nve_ops *ops; + struct mlxsw_sp_nve_config config; + int err; + + ops = nve->nve_ops_arr[params->type]; + + if (!ops->can_offload(nve, params, extack)) + return -EINVAL; + + memset(&config, 0, sizeof(config)); + ops->nve_config(nve, params, &config); + if (nve->num_nve_tunnels && + memcmp(&config, &nve->config, sizeof(config))) { + NL_SET_ERR_MSG_MOD(extack, "Conflicting NVE tunnels configuration"); + return -EINVAL; + } + + err = mlxsw_sp_nve_tunnel_init(mlxsw_sp, &config); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Failed to initialize NVE tunnel"); + return err; + } + + err = mlxsw_sp_fid_vni_set(fid, params->type, params->vni, + params->dev->ifindex); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Failed to set VNI on FID"); + goto err_fid_vni_set; + } + + err = ops->fdb_replay(params->dev, params->vni, extack); + if (err) + goto err_fdb_replay; + + return 0; + +err_fdb_replay: + mlxsw_sp_fid_vni_clear(fid); +err_fid_vni_set: + mlxsw_sp_nve_tunnel_fini(mlxsw_sp); + return err; +} + +void mlxsw_sp_nve_fid_disable(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fid *fid) +{ + u16 fid_index = mlxsw_sp_fid_index(fid); + struct net_device *nve_dev; + int nve_ifindex; + __be32 vni; + + mlxsw_sp_nve_flood_ip_flush(mlxsw_sp, fid); + mlxsw_sp_nve_fdb_flush_by_fid(mlxsw_sp, fid_index); + mlxsw_sp_nve_ipv6_addr_flush_by_fid(mlxsw_sp, fid_index); + + if (WARN_ON(mlxsw_sp_fid_nve_ifindex(fid, &nve_ifindex) || + mlxsw_sp_fid_vni(fid, &vni))) + goto out; + + nve_dev = dev_get_by_index(mlxsw_sp_net(mlxsw_sp), nve_ifindex); + if (!nve_dev) + goto out; + + mlxsw_sp_nve_fdb_clear_offload(mlxsw_sp, fid, nve_dev, vni); + mlxsw_sp_fid_fdb_clear_offload(fid, nve_dev); + + dev_put(nve_dev); + +out: + mlxsw_sp_fid_vni_clear(fid); + mlxsw_sp_nve_tunnel_fini(mlxsw_sp); +} + +int mlxsw_sp_port_nve_init(struct mlxsw_sp_port *mlxsw_sp_port) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + char tnqdr_pl[MLXSW_REG_TNQDR_LEN]; + + mlxsw_reg_tnqdr_pack(tnqdr_pl, mlxsw_sp_port->local_port); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(tnqdr), tnqdr_pl); +} + +void mlxsw_sp_port_nve_fini(struct mlxsw_sp_port *mlxsw_sp_port) +{ +} + +static int mlxsw_sp_nve_qos_init(struct mlxsw_sp *mlxsw_sp) +{ + char tnqcr_pl[MLXSW_REG_TNQCR_LEN]; + + mlxsw_reg_tnqcr_pack(tnqcr_pl); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(tnqcr), tnqcr_pl); +} + +static int mlxsw_sp_nve_ecn_encap_init(struct mlxsw_sp *mlxsw_sp) +{ + int i; + + /* Iterate over inner ECN values */ + for (i = INET_ECN_NOT_ECT; i <= INET_ECN_CE; i++) { + u8 outer_ecn = INET_ECN_encapsulate(0, i); + char tneem_pl[MLXSW_REG_TNEEM_LEN]; + int err; + + mlxsw_reg_tneem_pack(tneem_pl, i, outer_ecn); + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(tneem), + tneem_pl); + if (err) + return err; + } + + return 0; +} + +static int __mlxsw_sp_nve_ecn_decap_init(struct mlxsw_sp *mlxsw_sp, + u8 inner_ecn, u8 outer_ecn) +{ + char tndem_pl[MLXSW_REG_TNDEM_LEN]; + u8 new_inner_ecn; + bool trap_en; + + new_inner_ecn = mlxsw_sp_tunnel_ecn_decap(outer_ecn, inner_ecn, + &trap_en); + mlxsw_reg_tndem_pack(tndem_pl, outer_ecn, inner_ecn, new_inner_ecn, + trap_en, trap_en ? MLXSW_TRAP_ID_DECAP_ECN0 : 0); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(tndem), tndem_pl); +} + +static int mlxsw_sp_nve_ecn_decap_init(struct mlxsw_sp *mlxsw_sp) +{ + int i; + + /* Iterate over inner ECN values */ + for (i = INET_ECN_NOT_ECT; i <= INET_ECN_CE; i++) { + int j; + + /* Iterate over outer ECN values */ + for (j = INET_ECN_NOT_ECT; j <= INET_ECN_CE; j++) { + int err; + + err = __mlxsw_sp_nve_ecn_decap_init(mlxsw_sp, i, j); + if (err) + return err; + } + } + + return 0; +} + +static int mlxsw_sp_nve_ecn_init(struct mlxsw_sp *mlxsw_sp) +{ + int err; + + err = mlxsw_sp_nve_ecn_encap_init(mlxsw_sp); + if (err) + return err; + + return mlxsw_sp_nve_ecn_decap_init(mlxsw_sp); +} + +static int mlxsw_sp_nve_resources_query(struct mlxsw_sp *mlxsw_sp) +{ + unsigned int max; + + if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_NVE_MC_ENTRIES_IPV4) || + !MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_NVE_MC_ENTRIES_IPV6)) + return -EIO; + max = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_NVE_MC_ENTRIES_IPV4); + mlxsw_sp->nve->num_max_mc_entries[MLXSW_SP_L3_PROTO_IPV4] = max; + max = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_NVE_MC_ENTRIES_IPV6); + mlxsw_sp->nve->num_max_mc_entries[MLXSW_SP_L3_PROTO_IPV6] = max; + + return 0; +} + +int mlxsw_sp_nve_init(struct mlxsw_sp *mlxsw_sp) +{ + struct mlxsw_sp_nve *nve; + int err; + + nve = kzalloc(sizeof(*mlxsw_sp->nve), GFP_KERNEL); + if (!nve) + return -ENOMEM; + mlxsw_sp->nve = nve; + nve->mlxsw_sp = mlxsw_sp; + nve->nve_ops_arr = mlxsw_sp->nve_ops_arr; + + err = rhashtable_init(&nve->mc_list_ht, + &mlxsw_sp_nve_mc_list_ht_params); + if (err) + goto err_mc_rhashtable_init; + + err = rhashtable_init(&nve->ipv6_ht, &mlxsw_sp_nve_ipv6_ht_params); + if (err) + goto err_ipv6_rhashtable_init; + + INIT_LIST_HEAD(&nve->ipv6_addr_list); + + err = mlxsw_sp_nve_qos_init(mlxsw_sp); + if (err) + goto err_nve_qos_init; + + err = mlxsw_sp_nve_ecn_init(mlxsw_sp); + if (err) + goto err_nve_ecn_init; + + err = mlxsw_sp_nve_resources_query(mlxsw_sp); + if (err) + goto err_nve_resources_query; + + return 0; + +err_nve_resources_query: +err_nve_ecn_init: +err_nve_qos_init: + rhashtable_destroy(&nve->ipv6_ht); +err_ipv6_rhashtable_init: + rhashtable_destroy(&nve->mc_list_ht); +err_mc_rhashtable_init: + mlxsw_sp->nve = NULL; + kfree(nve); + return err; +} + +void mlxsw_sp_nve_fini(struct mlxsw_sp *mlxsw_sp) +{ + WARN_ON(mlxsw_sp->nve->num_nve_tunnels); + WARN_ON(!list_empty(&mlxsw_sp->nve->ipv6_addr_list)); + rhashtable_destroy(&mlxsw_sp->nve->ipv6_ht); + rhashtable_destroy(&mlxsw_sp->nve->mc_list_ht); + kfree(mlxsw_sp->nve); + mlxsw_sp->nve = NULL; +} diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.h new file mode 100644 index 000000000..0d21de1d0 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.h @@ -0,0 +1,55 @@ +/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 */ +/* Copyright (c) 2018 Mellanox Technologies. All rights reserved */ + +#ifndef _MLXSW_SPECTRUM_NVE_H +#define _MLXSW_SPECTRUM_NVE_H + +#include <linux/netlink.h> +#include <linux/rhashtable.h> + +#include "spectrum.h" + +struct mlxsw_sp_nve_config { + enum mlxsw_sp_nve_type type; + u8 ttl; + u8 learning_en:1; + __be16 udp_dport; + __be32 flowlabel; + u32 ul_tb_id; + enum mlxsw_sp_l3proto ul_proto; + union mlxsw_sp_l3addr ul_sip; +}; + +struct mlxsw_sp_nve { + struct mlxsw_sp_nve_config config; + struct rhashtable mc_list_ht; + struct rhashtable ipv6_ht; + struct list_head ipv6_addr_list; /* Saves hash table nodes. */ + struct mlxsw_sp *mlxsw_sp; + const struct mlxsw_sp_nve_ops **nve_ops_arr; + unsigned int num_nve_tunnels; /* Protected by RTNL */ + unsigned int num_max_mc_entries[MLXSW_SP_L3_PROTO_MAX]; + u32 tunnel_index; + u16 ul_rif_index; /* Reserved for Spectrum */ +}; + +struct mlxsw_sp_nve_ops { + enum mlxsw_sp_nve_type type; + bool (*can_offload)(const struct mlxsw_sp_nve *nve, + const struct mlxsw_sp_nve_params *params, + struct netlink_ext_ack *extack); + void (*nve_config)(const struct mlxsw_sp_nve *nve, + const struct mlxsw_sp_nve_params *params, + struct mlxsw_sp_nve_config *config); + int (*init)(struct mlxsw_sp_nve *nve, + const struct mlxsw_sp_nve_config *config); + void (*fini)(struct mlxsw_sp_nve *nve); + int (*fdb_replay)(const struct net_device *nve_dev, __be32 vni, + struct netlink_ext_ack *extack); + void (*fdb_clear_offload)(const struct net_device *nve_dev, __be32 vni); +}; + +extern const struct mlxsw_sp_nve_ops mlxsw_sp1_nve_vxlan_ops; +extern const struct mlxsw_sp_nve_ops mlxsw_sp2_nve_vxlan_ops; + +#endif diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve_vxlan.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve_vxlan.c new file mode 100644 index 000000000..cdd8818b4 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve_vxlan.c @@ -0,0 +1,473 @@ +// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 +/* Copyright (c) 2018 Mellanox Technologies. All rights reserved */ + +#include <linux/netdevice.h> +#include <linux/netlink.h> +#include <linux/random.h> +#include <net/vxlan.h> + +#include "reg.h" +#include "spectrum.h" +#include "spectrum_nve.h" + +#define MLXSW_SP_NVE_VXLAN_IPV4_SUPPORTED_FLAGS (VXLAN_F_UDP_ZERO_CSUM_TX | \ + VXLAN_F_LEARN) +#define MLXSW_SP_NVE_VXLAN_IPV6_SUPPORTED_FLAGS (VXLAN_F_IPV6 | \ + VXLAN_F_UDP_ZERO_CSUM6_TX | \ + VXLAN_F_UDP_ZERO_CSUM6_RX) + +static bool mlxsw_sp_nve_vxlan_ipv4_flags_check(const struct vxlan_config *cfg, + struct netlink_ext_ack *extack) +{ + if (!(cfg->flags & VXLAN_F_UDP_ZERO_CSUM_TX)) { + NL_SET_ERR_MSG_MOD(extack, "VxLAN: Zero UDP checksum must be allowed for TX"); + return false; + } + + if (cfg->flags & ~MLXSW_SP_NVE_VXLAN_IPV4_SUPPORTED_FLAGS) { + NL_SET_ERR_MSG_MOD(extack, "VxLAN: Unsupported flag"); + return false; + } + + return true; +} + +static bool mlxsw_sp_nve_vxlan_ipv6_flags_check(const struct vxlan_config *cfg, + struct netlink_ext_ack *extack) +{ + if (!(cfg->flags & VXLAN_F_UDP_ZERO_CSUM6_TX)) { + NL_SET_ERR_MSG_MOD(extack, "VxLAN: Zero UDP checksum must be allowed for TX"); + return false; + } + + if (!(cfg->flags & VXLAN_F_UDP_ZERO_CSUM6_RX)) { + NL_SET_ERR_MSG_MOD(extack, "VxLAN: Zero UDP checksum must be allowed for RX"); + return false; + } + + if (cfg->flags & ~MLXSW_SP_NVE_VXLAN_IPV6_SUPPORTED_FLAGS) { + NL_SET_ERR_MSG_MOD(extack, "VxLAN: Unsupported flag"); + return false; + } + + return true; +} + +static bool mlxsw_sp_nve_vxlan_can_offload(const struct mlxsw_sp_nve *nve, + const struct mlxsw_sp_nve_params *params, + struct netlink_ext_ack *extack) +{ + struct vxlan_dev *vxlan = netdev_priv(params->dev); + struct vxlan_config *cfg = &vxlan->cfg; + + if (vxlan_addr_multicast(&cfg->remote_ip)) { + NL_SET_ERR_MSG_MOD(extack, "VxLAN: Multicast destination IP is not supported"); + return false; + } + + if (vxlan_addr_any(&cfg->saddr)) { + NL_SET_ERR_MSG_MOD(extack, "VxLAN: Source address must be specified"); + return false; + } + + if (cfg->remote_ifindex) { + NL_SET_ERR_MSG_MOD(extack, "VxLAN: Local interface is not supported"); + return false; + } + + if (cfg->port_min || cfg->port_max) { + NL_SET_ERR_MSG_MOD(extack, "VxLAN: Only default UDP source port range is supported"); + return false; + } + + if (cfg->tos != 1) { + NL_SET_ERR_MSG_MOD(extack, "VxLAN: TOS must be configured to inherit"); + return false; + } + + if (cfg->flags & VXLAN_F_TTL_INHERIT) { + NL_SET_ERR_MSG_MOD(extack, "VxLAN: TTL must not be configured to inherit"); + return false; + } + + switch (cfg->saddr.sa.sa_family) { + case AF_INET: + if (!mlxsw_sp_nve_vxlan_ipv4_flags_check(cfg, extack)) + return false; + break; + case AF_INET6: + if (!mlxsw_sp_nve_vxlan_ipv6_flags_check(cfg, extack)) + return false; + break; + } + + if (cfg->ttl == 0) { + NL_SET_ERR_MSG_MOD(extack, "VxLAN: TTL must not be configured to 0"); + return false; + } + + if (cfg->label != 0) { + NL_SET_ERR_MSG_MOD(extack, "VxLAN: Flow label must be configured to 0"); + return false; + } + + return true; +} + +static bool mlxsw_sp1_nve_vxlan_can_offload(const struct mlxsw_sp_nve *nve, + const struct mlxsw_sp_nve_params *params, + struct netlink_ext_ack *extack) +{ + if (params->ethertype == ETH_P_8021AD) { + NL_SET_ERR_MSG_MOD(extack, "VxLAN: 802.1ad bridge is not supported with VxLAN"); + return false; + } + + return mlxsw_sp_nve_vxlan_can_offload(nve, params, extack); +} + +static void +mlxsw_sp_nve_vxlan_ul_proto_sip_config(const struct vxlan_config *cfg, + struct mlxsw_sp_nve_config *config) +{ + switch (cfg->saddr.sa.sa_family) { + case AF_INET: + config->ul_proto = MLXSW_SP_L3_PROTO_IPV4; + config->ul_sip.addr4 = cfg->saddr.sin.sin_addr.s_addr; + break; + case AF_INET6: + config->ul_proto = MLXSW_SP_L3_PROTO_IPV6; + config->ul_sip.addr6 = cfg->saddr.sin6.sin6_addr; + break; + } +} + +static void mlxsw_sp_nve_vxlan_config(const struct mlxsw_sp_nve *nve, + const struct mlxsw_sp_nve_params *params, + struct mlxsw_sp_nve_config *config) +{ + struct vxlan_dev *vxlan = netdev_priv(params->dev); + struct vxlan_config *cfg = &vxlan->cfg; + + config->type = MLXSW_SP_NVE_TYPE_VXLAN; + config->ttl = cfg->ttl; + config->flowlabel = cfg->label; + config->learning_en = cfg->flags & VXLAN_F_LEARN ? 1 : 0; + config->ul_tb_id = RT_TABLE_MAIN; + mlxsw_sp_nve_vxlan_ul_proto_sip_config(cfg, config); + config->udp_dport = cfg->dst_port; +} + +static void +mlxsw_sp_nve_vxlan_config_prepare(char *tngcr_pl, + const struct mlxsw_sp_nve_config *config) +{ + struct in6_addr addr6; + u8 udp_sport; + + mlxsw_reg_tngcr_pack(tngcr_pl, MLXSW_REG_TNGCR_TYPE_VXLAN, true, + config->ttl); + /* VxLAN driver's default UDP source port range is 32768 (0x8000) + * to 60999 (0xee47). Set the upper 8 bits of the UDP source port + * to a random number between 0x80 and 0xee + */ + get_random_bytes(&udp_sport, sizeof(udp_sport)); + udp_sport = (udp_sport % (0xee - 0x80 + 1)) + 0x80; + mlxsw_reg_tngcr_nve_udp_sport_prefix_set(tngcr_pl, udp_sport); + + switch (config->ul_proto) { + case MLXSW_SP_L3_PROTO_IPV4: + mlxsw_reg_tngcr_usipv4_set(tngcr_pl, + be32_to_cpu(config->ul_sip.addr4)); + break; + case MLXSW_SP_L3_PROTO_IPV6: + addr6 = config->ul_sip.addr6; + mlxsw_reg_tngcr_usipv6_memcpy_to(tngcr_pl, + (const char *)&addr6); + break; + } +} + +static int +mlxsw_sp1_nve_vxlan_config_set(struct mlxsw_sp *mlxsw_sp, + const struct mlxsw_sp_nve_config *config) +{ + char tngcr_pl[MLXSW_REG_TNGCR_LEN]; + u16 ul_vr_id; + int err; + + err = mlxsw_sp_router_tb_id_vr_id(mlxsw_sp, config->ul_tb_id, + &ul_vr_id); + if (err) + return err; + + mlxsw_sp_nve_vxlan_config_prepare(tngcr_pl, config); + mlxsw_reg_tngcr_learn_enable_set(tngcr_pl, config->learning_en); + mlxsw_reg_tngcr_underlay_virtual_router_set(tngcr_pl, ul_vr_id); + + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(tngcr), tngcr_pl); +} + +static void mlxsw_sp1_nve_vxlan_config_clear(struct mlxsw_sp *mlxsw_sp) +{ + char tngcr_pl[MLXSW_REG_TNGCR_LEN]; + + mlxsw_reg_tngcr_pack(tngcr_pl, MLXSW_REG_TNGCR_TYPE_VXLAN, false, 0); + + mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(tngcr), tngcr_pl); +} + +static int mlxsw_sp1_nve_vxlan_rtdp_set(struct mlxsw_sp *mlxsw_sp, + unsigned int tunnel_index) +{ + char rtdp_pl[MLXSW_REG_RTDP_LEN]; + + mlxsw_reg_rtdp_pack(rtdp_pl, MLXSW_REG_RTDP_TYPE_NVE, tunnel_index); + + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rtdp), rtdp_pl); +} + +static int mlxsw_sp1_nve_vxlan_init(struct mlxsw_sp_nve *nve, + const struct mlxsw_sp_nve_config *config) +{ + struct mlxsw_sp *mlxsw_sp = nve->mlxsw_sp; + int err; + + err = mlxsw_sp_parsing_vxlan_udp_dport_set(mlxsw_sp, config->udp_dport); + if (err) + return err; + + err = mlxsw_sp_parsing_depth_inc(mlxsw_sp); + if (err) + goto err_parsing_depth_inc; + + err = mlxsw_sp1_nve_vxlan_config_set(mlxsw_sp, config); + if (err) + goto err_config_set; + + err = mlxsw_sp1_nve_vxlan_rtdp_set(mlxsw_sp, nve->tunnel_index); + if (err) + goto err_rtdp_set; + + err = mlxsw_sp_router_nve_promote_decap(mlxsw_sp, config->ul_tb_id, + config->ul_proto, + &config->ul_sip, + nve->tunnel_index); + if (err) + goto err_promote_decap; + + return 0; + +err_promote_decap: +err_rtdp_set: + mlxsw_sp1_nve_vxlan_config_clear(mlxsw_sp); +err_config_set: + mlxsw_sp_parsing_depth_dec(mlxsw_sp); +err_parsing_depth_inc: + mlxsw_sp_parsing_vxlan_udp_dport_set(mlxsw_sp, 0); + return err; +} + +static void mlxsw_sp1_nve_vxlan_fini(struct mlxsw_sp_nve *nve) +{ + struct mlxsw_sp_nve_config *config = &nve->config; + struct mlxsw_sp *mlxsw_sp = nve->mlxsw_sp; + + mlxsw_sp_router_nve_demote_decap(mlxsw_sp, config->ul_tb_id, + config->ul_proto, &config->ul_sip); + mlxsw_sp1_nve_vxlan_config_clear(mlxsw_sp); + mlxsw_sp_parsing_depth_dec(mlxsw_sp); + mlxsw_sp_parsing_vxlan_udp_dport_set(mlxsw_sp, 0); +} + +static int +mlxsw_sp_nve_vxlan_fdb_replay(const struct net_device *nve_dev, __be32 vni, + struct netlink_ext_ack *extack) +{ + if (WARN_ON(!netif_is_vxlan(nve_dev))) + return -EINVAL; + return vxlan_fdb_replay(nve_dev, vni, &mlxsw_sp_switchdev_notifier, + extack); +} + +static void +mlxsw_sp_nve_vxlan_clear_offload(const struct net_device *nve_dev, __be32 vni) +{ + if (WARN_ON(!netif_is_vxlan(nve_dev))) + return; + vxlan_fdb_clear_offload(nve_dev, vni); +} + +const struct mlxsw_sp_nve_ops mlxsw_sp1_nve_vxlan_ops = { + .type = MLXSW_SP_NVE_TYPE_VXLAN, + .can_offload = mlxsw_sp1_nve_vxlan_can_offload, + .nve_config = mlxsw_sp_nve_vxlan_config, + .init = mlxsw_sp1_nve_vxlan_init, + .fini = mlxsw_sp1_nve_vxlan_fini, + .fdb_replay = mlxsw_sp_nve_vxlan_fdb_replay, + .fdb_clear_offload = mlxsw_sp_nve_vxlan_clear_offload, +}; + +static int mlxsw_sp2_nve_vxlan_learning_set(struct mlxsw_sp *mlxsw_sp, + bool learning_en) +{ + char tnpc_pl[MLXSW_REG_TNPC_LEN]; + + mlxsw_reg_tnpc_pack(tnpc_pl, MLXSW_REG_TUNNEL_PORT_NVE, + learning_en); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(tnpc), tnpc_pl); +} + +static int +mlxsw_sp2_nve_decap_ethertype_set(struct mlxsw_sp *mlxsw_sp) +{ + char spvid_pl[MLXSW_REG_SPVID_LEN] = {}; + + mlxsw_reg_spvid_tport_set(spvid_pl, true); + mlxsw_reg_spvid_local_port_set(spvid_pl, + MLXSW_REG_TUNNEL_PORT_NVE); + mlxsw_reg_spvid_egr_et_set_set(spvid_pl, true); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(spvid), spvid_pl); +} + +static int +mlxsw_sp2_nve_vxlan_config_set(struct mlxsw_sp *mlxsw_sp, + const struct mlxsw_sp_nve_config *config) +{ + char tngcr_pl[MLXSW_REG_TNGCR_LEN]; + char spvtr_pl[MLXSW_REG_SPVTR_LEN]; + u16 ul_rif_index; + int err; + + err = mlxsw_sp_router_ul_rif_get(mlxsw_sp, config->ul_tb_id, + &ul_rif_index); + if (err) + return err; + mlxsw_sp->nve->ul_rif_index = ul_rif_index; + + err = mlxsw_sp2_nve_vxlan_learning_set(mlxsw_sp, config->learning_en); + if (err) + goto err_vxlan_learning_set; + + mlxsw_sp_nve_vxlan_config_prepare(tngcr_pl, config); + mlxsw_reg_tngcr_underlay_rif_set(tngcr_pl, ul_rif_index); + + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(tngcr), tngcr_pl); + if (err) + goto err_tngcr_write; + + mlxsw_reg_spvtr_pack(spvtr_pl, true, MLXSW_REG_TUNNEL_PORT_NVE, + MLXSW_REG_SPVTR_IPVID_MODE_ALWAYS_PUSH_VLAN); + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(spvtr), spvtr_pl); + if (err) + goto err_spvtr_write; + + err = mlxsw_sp2_nve_decap_ethertype_set(mlxsw_sp); + if (err) + goto err_decap_ethertype_set; + + return 0; + +err_decap_ethertype_set: + mlxsw_reg_spvtr_pack(spvtr_pl, true, MLXSW_REG_TUNNEL_PORT_NVE, + MLXSW_REG_SPVTR_IPVID_MODE_IEEE_COMPLIANT_PVID); + mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(spvtr), spvtr_pl); +err_spvtr_write: + mlxsw_reg_tngcr_pack(tngcr_pl, MLXSW_REG_TNGCR_TYPE_VXLAN, false, 0); + mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(tngcr), tngcr_pl); +err_tngcr_write: + mlxsw_sp2_nve_vxlan_learning_set(mlxsw_sp, false); +err_vxlan_learning_set: + mlxsw_sp_router_ul_rif_put(mlxsw_sp, ul_rif_index); + return err; +} + +static void mlxsw_sp2_nve_vxlan_config_clear(struct mlxsw_sp *mlxsw_sp) +{ + char spvtr_pl[MLXSW_REG_SPVTR_LEN]; + char tngcr_pl[MLXSW_REG_TNGCR_LEN]; + + mlxsw_reg_spvtr_pack(spvtr_pl, true, MLXSW_REG_TUNNEL_PORT_NVE, + MLXSW_REG_SPVTR_IPVID_MODE_IEEE_COMPLIANT_PVID); + mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(spvtr), spvtr_pl); + mlxsw_reg_tngcr_pack(tngcr_pl, MLXSW_REG_TNGCR_TYPE_VXLAN, false, 0); + mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(tngcr), tngcr_pl); + mlxsw_sp2_nve_vxlan_learning_set(mlxsw_sp, false); + mlxsw_sp_router_ul_rif_put(mlxsw_sp, mlxsw_sp->nve->ul_rif_index); +} + +static int mlxsw_sp2_nve_vxlan_rtdp_set(struct mlxsw_sp *mlxsw_sp, + unsigned int tunnel_index, + u16 ul_rif_index) +{ + char rtdp_pl[MLXSW_REG_RTDP_LEN]; + + mlxsw_reg_rtdp_pack(rtdp_pl, MLXSW_REG_RTDP_TYPE_NVE, tunnel_index); + mlxsw_reg_rtdp_egress_router_interface_set(rtdp_pl, ul_rif_index); + + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rtdp), rtdp_pl); +} + +static int mlxsw_sp2_nve_vxlan_init(struct mlxsw_sp_nve *nve, + const struct mlxsw_sp_nve_config *config) +{ + struct mlxsw_sp *mlxsw_sp = nve->mlxsw_sp; + int err; + + err = mlxsw_sp_parsing_vxlan_udp_dport_set(mlxsw_sp, config->udp_dport); + if (err) + return err; + + err = mlxsw_sp_parsing_depth_inc(mlxsw_sp); + if (err) + goto err_parsing_depth_inc; + + err = mlxsw_sp2_nve_vxlan_config_set(mlxsw_sp, config); + if (err) + goto err_config_set; + + err = mlxsw_sp2_nve_vxlan_rtdp_set(mlxsw_sp, nve->tunnel_index, + nve->ul_rif_index); + if (err) + goto err_rtdp_set; + + err = mlxsw_sp_router_nve_promote_decap(mlxsw_sp, config->ul_tb_id, + config->ul_proto, + &config->ul_sip, + nve->tunnel_index); + if (err) + goto err_promote_decap; + + return 0; + +err_promote_decap: +err_rtdp_set: + mlxsw_sp2_nve_vxlan_config_clear(mlxsw_sp); +err_config_set: + mlxsw_sp_parsing_depth_dec(mlxsw_sp); +err_parsing_depth_inc: + mlxsw_sp_parsing_vxlan_udp_dport_set(mlxsw_sp, 0); + return err; +} + +static void mlxsw_sp2_nve_vxlan_fini(struct mlxsw_sp_nve *nve) +{ + struct mlxsw_sp_nve_config *config = &nve->config; + struct mlxsw_sp *mlxsw_sp = nve->mlxsw_sp; + + mlxsw_sp_router_nve_demote_decap(mlxsw_sp, config->ul_tb_id, + config->ul_proto, &config->ul_sip); + mlxsw_sp2_nve_vxlan_config_clear(mlxsw_sp); + mlxsw_sp_parsing_depth_dec(mlxsw_sp); + mlxsw_sp_parsing_vxlan_udp_dport_set(mlxsw_sp, 0); +} + +const struct mlxsw_sp_nve_ops mlxsw_sp2_nve_vxlan_ops = { + .type = MLXSW_SP_NVE_TYPE_VXLAN, + .can_offload = mlxsw_sp_nve_vxlan_can_offload, + .nve_config = mlxsw_sp_nve_vxlan_config, + .init = mlxsw_sp2_nve_vxlan_init, + .fini = mlxsw_sp2_nve_vxlan_fini, + .fdb_replay = mlxsw_sp_nve_vxlan_fdb_replay, + .fdb_clear_offload = mlxsw_sp_nve_vxlan_clear_offload, +}; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_pgt.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_pgt.c new file mode 100644 index 000000000..7dd3dba0f --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_pgt.c @@ -0,0 +1,346 @@ +// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 +/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#include <linux/refcount.h> +#include <linux/idr.h> + +#include "spectrum.h" +#include "reg.h" + +struct mlxsw_sp_pgt { + struct idr pgt_idr; + u16 end_index; /* Exclusive. */ + struct mutex lock; /* Protects PGT. */ + bool smpe_index_valid; +}; + +struct mlxsw_sp_pgt_entry { + struct list_head ports_list; + u16 index; + u16 smpe_index; +}; + +struct mlxsw_sp_pgt_entry_port { + struct list_head list; /* Member of 'ports_list'. */ + u16 local_port; +}; + +int mlxsw_sp_pgt_mid_alloc(struct mlxsw_sp *mlxsw_sp, u16 *p_mid) +{ + int index, err = 0; + + mutex_lock(&mlxsw_sp->pgt->lock); + index = idr_alloc(&mlxsw_sp->pgt->pgt_idr, NULL, 0, + mlxsw_sp->pgt->end_index, GFP_KERNEL); + + if (index < 0) { + err = index; + goto err_idr_alloc; + } + + *p_mid = index; + mutex_unlock(&mlxsw_sp->pgt->lock); + return 0; + +err_idr_alloc: + mutex_unlock(&mlxsw_sp->pgt->lock); + return err; +} + +void mlxsw_sp_pgt_mid_free(struct mlxsw_sp *mlxsw_sp, u16 mid_base) +{ + mutex_lock(&mlxsw_sp->pgt->lock); + WARN_ON(idr_remove(&mlxsw_sp->pgt->pgt_idr, mid_base)); + mutex_unlock(&mlxsw_sp->pgt->lock); +} + +int +mlxsw_sp_pgt_mid_alloc_range(struct mlxsw_sp *mlxsw_sp, u16 mid_base, u16 count) +{ + unsigned int idr_cursor; + int i, err; + + mutex_lock(&mlxsw_sp->pgt->lock); + + /* This function is supposed to be called several times as part of + * driver init, in specific order. Verify that the mid_index is the + * first free index in the idr, to be able to free the indexes in case + * of error. + */ + idr_cursor = idr_get_cursor(&mlxsw_sp->pgt->pgt_idr); + if (WARN_ON(idr_cursor != mid_base)) { + err = -EINVAL; + goto err_idr_cursor; + } + + for (i = 0; i < count; i++) { + err = idr_alloc_cyclic(&mlxsw_sp->pgt->pgt_idr, NULL, + mid_base, mid_base + count, GFP_KERNEL); + if (err < 0) + goto err_idr_alloc_cyclic; + } + + mutex_unlock(&mlxsw_sp->pgt->lock); + return 0; + +err_idr_alloc_cyclic: + for (i--; i >= 0; i--) + idr_remove(&mlxsw_sp->pgt->pgt_idr, mid_base + i); +err_idr_cursor: + mutex_unlock(&mlxsw_sp->pgt->lock); + return err; +} + +void +mlxsw_sp_pgt_mid_free_range(struct mlxsw_sp *mlxsw_sp, u16 mid_base, u16 count) +{ + struct idr *pgt_idr = &mlxsw_sp->pgt->pgt_idr; + int i; + + mutex_lock(&mlxsw_sp->pgt->lock); + + for (i = 0; i < count; i++) + WARN_ON_ONCE(idr_remove(pgt_idr, mid_base + i)); + + mutex_unlock(&mlxsw_sp->pgt->lock); +} + +static struct mlxsw_sp_pgt_entry_port * +mlxsw_sp_pgt_entry_port_lookup(struct mlxsw_sp_pgt_entry *pgt_entry, + u16 local_port) +{ + struct mlxsw_sp_pgt_entry_port *pgt_entry_port; + + list_for_each_entry(pgt_entry_port, &pgt_entry->ports_list, list) { + if (pgt_entry_port->local_port == local_port) + return pgt_entry_port; + } + + return NULL; +} + +static struct mlxsw_sp_pgt_entry * +mlxsw_sp_pgt_entry_create(struct mlxsw_sp_pgt *pgt, u16 mid, u16 smpe) +{ + struct mlxsw_sp_pgt_entry *pgt_entry; + void *ret; + int err; + + pgt_entry = kzalloc(sizeof(*pgt_entry), GFP_KERNEL); + if (!pgt_entry) + return ERR_PTR(-ENOMEM); + + ret = idr_replace(&pgt->pgt_idr, pgt_entry, mid); + if (IS_ERR(ret)) { + err = PTR_ERR(ret); + goto err_idr_replace; + } + + INIT_LIST_HEAD(&pgt_entry->ports_list); + pgt_entry->index = mid; + pgt_entry->smpe_index = smpe; + return pgt_entry; + +err_idr_replace: + kfree(pgt_entry); + return ERR_PTR(err); +} + +static void mlxsw_sp_pgt_entry_destroy(struct mlxsw_sp_pgt *pgt, + struct mlxsw_sp_pgt_entry *pgt_entry) +{ + WARN_ON(!list_empty(&pgt_entry->ports_list)); + + pgt_entry = idr_replace(&pgt->pgt_idr, NULL, pgt_entry->index); + if (WARN_ON(IS_ERR(pgt_entry))) + return; + + kfree(pgt_entry); +} + +static struct mlxsw_sp_pgt_entry * +mlxsw_sp_pgt_entry_get(struct mlxsw_sp_pgt *pgt, u16 mid, u16 smpe) +{ + struct mlxsw_sp_pgt_entry *pgt_entry; + + pgt_entry = idr_find(&pgt->pgt_idr, mid); + if (pgt_entry) + return pgt_entry; + + return mlxsw_sp_pgt_entry_create(pgt, mid, smpe); +} + +static void mlxsw_sp_pgt_entry_put(struct mlxsw_sp_pgt *pgt, u16 mid) +{ + struct mlxsw_sp_pgt_entry *pgt_entry; + + pgt_entry = idr_find(&pgt->pgt_idr, mid); + if (WARN_ON(!pgt_entry)) + return; + + if (list_empty(&pgt_entry->ports_list)) + mlxsw_sp_pgt_entry_destroy(pgt, pgt_entry); +} + +static void mlxsw_sp_pgt_smid2_port_set(char *smid2_pl, u16 local_port, + bool member) +{ + mlxsw_reg_smid2_port_set(smid2_pl, local_port, member); + mlxsw_reg_smid2_port_mask_set(smid2_pl, local_port, 1); +} + +static int +mlxsw_sp_pgt_entry_port_write(struct mlxsw_sp *mlxsw_sp, + const struct mlxsw_sp_pgt_entry *pgt_entry, + u16 local_port, bool member) +{ + char *smid2_pl; + int err; + + smid2_pl = kmalloc(MLXSW_REG_SMID2_LEN, GFP_KERNEL); + if (!smid2_pl) + return -ENOMEM; + + mlxsw_reg_smid2_pack(smid2_pl, pgt_entry->index, 0, 0, + mlxsw_sp->pgt->smpe_index_valid, + pgt_entry->smpe_index); + + mlxsw_sp_pgt_smid2_port_set(smid2_pl, local_port, member); + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(smid2), smid2_pl); + + kfree(smid2_pl); + + return err; +} + +static struct mlxsw_sp_pgt_entry_port * +mlxsw_sp_pgt_entry_port_create(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_pgt_entry *pgt_entry, + u16 local_port) +{ + struct mlxsw_sp_pgt_entry_port *pgt_entry_port; + int err; + + pgt_entry_port = kzalloc(sizeof(*pgt_entry_port), GFP_KERNEL); + if (!pgt_entry_port) + return ERR_PTR(-ENOMEM); + + err = mlxsw_sp_pgt_entry_port_write(mlxsw_sp, pgt_entry, local_port, + true); + if (err) + goto err_pgt_entry_port_write; + + pgt_entry_port->local_port = local_port; + list_add(&pgt_entry_port->list, &pgt_entry->ports_list); + + return pgt_entry_port; + +err_pgt_entry_port_write: + kfree(pgt_entry_port); + return ERR_PTR(err); +} + +static void +mlxsw_sp_pgt_entry_port_destroy(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_pgt_entry *pgt_entry, + struct mlxsw_sp_pgt_entry_port *pgt_entry_port) + +{ + list_del(&pgt_entry_port->list); + mlxsw_sp_pgt_entry_port_write(mlxsw_sp, pgt_entry, + pgt_entry_port->local_port, false); + kfree(pgt_entry_port); +} + +static int mlxsw_sp_pgt_entry_port_add(struct mlxsw_sp *mlxsw_sp, u16 mid, + u16 smpe, u16 local_port) +{ + struct mlxsw_sp_pgt_entry_port *pgt_entry_port; + struct mlxsw_sp_pgt_entry *pgt_entry; + int err; + + mutex_lock(&mlxsw_sp->pgt->lock); + + pgt_entry = mlxsw_sp_pgt_entry_get(mlxsw_sp->pgt, mid, smpe); + if (IS_ERR(pgt_entry)) { + err = PTR_ERR(pgt_entry); + goto err_pgt_entry_get; + } + + pgt_entry_port = mlxsw_sp_pgt_entry_port_create(mlxsw_sp, pgt_entry, + local_port); + if (IS_ERR(pgt_entry_port)) { + err = PTR_ERR(pgt_entry_port); + goto err_pgt_entry_port_get; + } + + mutex_unlock(&mlxsw_sp->pgt->lock); + return 0; + +err_pgt_entry_port_get: + mlxsw_sp_pgt_entry_put(mlxsw_sp->pgt, mid); +err_pgt_entry_get: + mutex_unlock(&mlxsw_sp->pgt->lock); + return err; +} + +static void mlxsw_sp_pgt_entry_port_del(struct mlxsw_sp *mlxsw_sp, + u16 mid, u16 smpe, u16 local_port) +{ + struct mlxsw_sp_pgt_entry_port *pgt_entry_port; + struct mlxsw_sp_pgt_entry *pgt_entry; + + mutex_lock(&mlxsw_sp->pgt->lock); + + pgt_entry = idr_find(&mlxsw_sp->pgt->pgt_idr, mid); + if (!pgt_entry) + goto out; + + pgt_entry_port = mlxsw_sp_pgt_entry_port_lookup(pgt_entry, local_port); + if (!pgt_entry_port) + goto out; + + mlxsw_sp_pgt_entry_port_destroy(mlxsw_sp, pgt_entry, pgt_entry_port); + mlxsw_sp_pgt_entry_put(mlxsw_sp->pgt, mid); + +out: + mutex_unlock(&mlxsw_sp->pgt->lock); +} + +int mlxsw_sp_pgt_entry_port_set(struct mlxsw_sp *mlxsw_sp, u16 mid, + u16 smpe, u16 local_port, bool member) +{ + if (member) + return mlxsw_sp_pgt_entry_port_add(mlxsw_sp, mid, smpe, + local_port); + + mlxsw_sp_pgt_entry_port_del(mlxsw_sp, mid, smpe, local_port); + return 0; +} + +int mlxsw_sp_pgt_init(struct mlxsw_sp *mlxsw_sp) +{ + struct mlxsw_sp_pgt *pgt; + + if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, PGT_SIZE)) + return -EIO; + + pgt = kzalloc(sizeof(*mlxsw_sp->pgt), GFP_KERNEL); + if (!pgt) + return -ENOMEM; + + idr_init(&pgt->pgt_idr); + pgt->end_index = MLXSW_CORE_RES_GET(mlxsw_sp->core, PGT_SIZE); + mutex_init(&pgt->lock); + pgt->smpe_index_valid = mlxsw_sp->pgt_smpe_index_valid; + mlxsw_sp->pgt = pgt; + return 0; +} + +void mlxsw_sp_pgt_fini(struct mlxsw_sp *mlxsw_sp) +{ + mutex_destroy(&mlxsw_sp->pgt->lock); + WARN_ON(!idr_is_empty(&mlxsw_sp->pgt->pgt_idr)); + idr_destroy(&mlxsw_sp->pgt->pgt_idr); + kfree(mlxsw_sp->pgt); +} diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_policer.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_policer.c new file mode 100644 index 000000000..22ebb207c --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_policer.c @@ -0,0 +1,468 @@ +// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 +/* Copyright (c) 2020 Mellanox Technologies. All rights reserved */ + +#include <linux/idr.h> +#include <linux/log2.h> +#include <linux/mutex.h> +#include <linux/netlink.h> +#include <net/devlink.h> + +#include "spectrum.h" + +struct mlxsw_sp_policer_family { + enum mlxsw_sp_policer_type type; + enum mlxsw_reg_qpcr_g qpcr_type; + struct mlxsw_sp *mlxsw_sp; + u16 start_index; /* Inclusive */ + u16 end_index; /* Exclusive */ + struct idr policer_idr; + struct mutex lock; /* Protects policer_idr */ + atomic_t policers_count; + const struct mlxsw_sp_policer_family_ops *ops; +}; + +struct mlxsw_sp_policer { + struct mlxsw_sp_policer_params params; + u16 index; +}; + +struct mlxsw_sp_policer_family_ops { + int (*init)(struct mlxsw_sp_policer_family *family); + void (*fini)(struct mlxsw_sp_policer_family *family); + int (*policer_index_alloc)(struct mlxsw_sp_policer_family *family, + struct mlxsw_sp_policer *policer); + struct mlxsw_sp_policer * (*policer_index_free)(struct mlxsw_sp_policer_family *family, + u16 policer_index); + int (*policer_init)(struct mlxsw_sp_policer_family *family, + const struct mlxsw_sp_policer *policer); + int (*policer_params_check)(const struct mlxsw_sp_policer_family *family, + const struct mlxsw_sp_policer_params *params, + struct netlink_ext_ack *extack); +}; + +struct mlxsw_sp_policer_core { + struct mlxsw_sp_policer_family *family_arr[MLXSW_SP_POLICER_TYPE_MAX + 1]; + const struct mlxsw_sp_policer_core_ops *ops; + u8 lowest_bs_bits; + u8 highest_bs_bits; +}; + +struct mlxsw_sp_policer_core_ops { + int (*init)(struct mlxsw_sp_policer_core *policer_core); +}; + +static u64 mlxsw_sp_policer_rate_bytes_ps_kbps(u64 rate_bytes_ps) +{ + return div_u64(rate_bytes_ps, 1000) * BITS_PER_BYTE; +} + +static u8 mlxsw_sp_policer_burst_bytes_hw_units(u64 burst_bytes) +{ + /* Provided burst size is in bytes. The ASIC burst size value is + * (2 ^ bs) * 512 bits. Convert the provided size to 512-bit units. + */ + u64 bs512 = div_u64(burst_bytes, 64); + + if (!bs512) + return 0; + + return fls64(bs512) - 1; +} + +static u64 mlxsw_sp_policer_single_rate_occ_get(void *priv) +{ + struct mlxsw_sp_policer_family *family = priv; + + return atomic_read(&family->policers_count); +} + +static int +mlxsw_sp_policer_single_rate_family_init(struct mlxsw_sp_policer_family *family) +{ + struct mlxsw_core *core = family->mlxsw_sp->core; + struct devlink *devlink; + + /* CPU policers are allocated from the first N policers in the global + * range, so skip them. + */ + if (!MLXSW_CORE_RES_VALID(core, MAX_GLOBAL_POLICERS) || + !MLXSW_CORE_RES_VALID(core, MAX_CPU_POLICERS)) + return -EIO; + + family->start_index = MLXSW_CORE_RES_GET(core, MAX_CPU_POLICERS); + family->end_index = MLXSW_CORE_RES_GET(core, MAX_GLOBAL_POLICERS); + + atomic_set(&family->policers_count, 0); + devlink = priv_to_devlink(core); + devl_resource_occ_get_register(devlink, + MLXSW_SP_RESOURCE_SINGLE_RATE_POLICERS, + mlxsw_sp_policer_single_rate_occ_get, + family); + + return 0; +} + +static void +mlxsw_sp_policer_single_rate_family_fini(struct mlxsw_sp_policer_family *family) +{ + struct devlink *devlink = priv_to_devlink(family->mlxsw_sp->core); + + devl_resource_occ_get_unregister(devlink, + MLXSW_SP_RESOURCE_SINGLE_RATE_POLICERS); + WARN_ON(atomic_read(&family->policers_count) != 0); +} + +static int +mlxsw_sp_policer_single_rate_index_alloc(struct mlxsw_sp_policer_family *family, + struct mlxsw_sp_policer *policer) +{ + int id; + + mutex_lock(&family->lock); + id = idr_alloc(&family->policer_idr, policer, family->start_index, + family->end_index, GFP_KERNEL); + mutex_unlock(&family->lock); + + if (id < 0) + return id; + + atomic_inc(&family->policers_count); + policer->index = id; + + return 0; +} + +static struct mlxsw_sp_policer * +mlxsw_sp_policer_single_rate_index_free(struct mlxsw_sp_policer_family *family, + u16 policer_index) +{ + struct mlxsw_sp_policer *policer; + + atomic_dec(&family->policers_count); + + mutex_lock(&family->lock); + policer = idr_remove(&family->policer_idr, policer_index); + mutex_unlock(&family->lock); + + WARN_ON(!policer); + + return policer; +} + +static int +mlxsw_sp_policer_single_rate_init(struct mlxsw_sp_policer_family *family, + const struct mlxsw_sp_policer *policer) +{ + u64 rate_kbps = mlxsw_sp_policer_rate_bytes_ps_kbps(policer->params.rate); + u8 bs = mlxsw_sp_policer_burst_bytes_hw_units(policer->params.burst); + struct mlxsw_sp *mlxsw_sp = family->mlxsw_sp; + char qpcr_pl[MLXSW_REG_QPCR_LEN]; + + mlxsw_reg_qpcr_pack(qpcr_pl, policer->index, MLXSW_REG_QPCR_IR_UNITS_K, + true, rate_kbps, bs); + mlxsw_reg_qpcr_clear_counter_set(qpcr_pl, true); + + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(qpcr), qpcr_pl); +} + +static int +mlxsw_sp_policer_single_rate_params_check(const struct mlxsw_sp_policer_family *family, + const struct mlxsw_sp_policer_params *params, + struct netlink_ext_ack *extack) +{ + struct mlxsw_sp_policer_core *policer_core = family->mlxsw_sp->policer_core; + u64 rate_bps = params->rate * BITS_PER_BYTE; + u8 bs; + + if (!params->bytes) { + NL_SET_ERR_MSG_MOD(extack, "Only bandwidth policing is currently supported by single rate policers"); + return -EINVAL; + } + + if (!is_power_of_2(params->burst)) { + NL_SET_ERR_MSG_MOD(extack, "Policer burst size is not power of two"); + return -EINVAL; + } + + bs = mlxsw_sp_policer_burst_bytes_hw_units(params->burst); + + if (bs < policer_core->lowest_bs_bits) { + NL_SET_ERR_MSG_MOD(extack, "Policer burst size lower than limit"); + return -EINVAL; + } + + if (bs > policer_core->highest_bs_bits) { + NL_SET_ERR_MSG_MOD(extack, "Policer burst size higher than limit"); + return -EINVAL; + } + + if (rate_bps < MLXSW_REG_QPCR_LOWEST_CIR_BITS) { + NL_SET_ERR_MSG_MOD(extack, "Policer rate lower than limit"); + return -EINVAL; + } + + if (rate_bps > MLXSW_REG_QPCR_HIGHEST_CIR_BITS) { + NL_SET_ERR_MSG_MOD(extack, "Policer rate higher than limit"); + return -EINVAL; + } + + return 0; +} + +static const struct mlxsw_sp_policer_family_ops mlxsw_sp_policer_single_rate_ops = { + .init = mlxsw_sp_policer_single_rate_family_init, + .fini = mlxsw_sp_policer_single_rate_family_fini, + .policer_index_alloc = mlxsw_sp_policer_single_rate_index_alloc, + .policer_index_free = mlxsw_sp_policer_single_rate_index_free, + .policer_init = mlxsw_sp_policer_single_rate_init, + .policer_params_check = mlxsw_sp_policer_single_rate_params_check, +}; + +static const struct mlxsw_sp_policer_family mlxsw_sp_policer_single_rate_family = { + .type = MLXSW_SP_POLICER_TYPE_SINGLE_RATE, + .qpcr_type = MLXSW_REG_QPCR_G_GLOBAL, + .ops = &mlxsw_sp_policer_single_rate_ops, +}; + +static const struct mlxsw_sp_policer_family *mlxsw_sp_policer_family_arr[] = { + [MLXSW_SP_POLICER_TYPE_SINGLE_RATE] = &mlxsw_sp_policer_single_rate_family, +}; + +int mlxsw_sp_policer_add(struct mlxsw_sp *mlxsw_sp, + enum mlxsw_sp_policer_type type, + const struct mlxsw_sp_policer_params *params, + struct netlink_ext_ack *extack, u16 *p_policer_index) +{ + struct mlxsw_sp_policer_family *family; + struct mlxsw_sp_policer *policer; + int err; + + family = mlxsw_sp->policer_core->family_arr[type]; + + err = family->ops->policer_params_check(family, params, extack); + if (err) + return err; + + policer = kmalloc(sizeof(*policer), GFP_KERNEL); + if (!policer) + return -ENOMEM; + policer->params = *params; + + err = family->ops->policer_index_alloc(family, policer); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Failed to allocate policer index"); + goto err_policer_index_alloc; + } + + err = family->ops->policer_init(family, policer); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Failed to initialize policer"); + goto err_policer_init; + } + + *p_policer_index = policer->index; + + return 0; + +err_policer_init: + family->ops->policer_index_free(family, policer->index); +err_policer_index_alloc: + kfree(policer); + return err; +} + +void mlxsw_sp_policer_del(struct mlxsw_sp *mlxsw_sp, + enum mlxsw_sp_policer_type type, u16 policer_index) +{ + struct mlxsw_sp_policer_family *family; + struct mlxsw_sp_policer *policer; + + family = mlxsw_sp->policer_core->family_arr[type]; + policer = family->ops->policer_index_free(family, policer_index); + kfree(policer); +} + +int mlxsw_sp_policer_drops_counter_get(struct mlxsw_sp *mlxsw_sp, + enum mlxsw_sp_policer_type type, + u16 policer_index, u64 *p_drops) +{ + struct mlxsw_sp_policer_family *family; + char qpcr_pl[MLXSW_REG_QPCR_LEN]; + int err; + + family = mlxsw_sp->policer_core->family_arr[type]; + + MLXSW_REG_ZERO(qpcr, qpcr_pl); + mlxsw_reg_qpcr_pid_set(qpcr_pl, policer_index); + mlxsw_reg_qpcr_g_set(qpcr_pl, family->qpcr_type); + err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(qpcr), qpcr_pl); + if (err) + return err; + + *p_drops = mlxsw_reg_qpcr_violate_count_get(qpcr_pl); + + return 0; +} + +static int +mlxsw_sp_policer_family_register(struct mlxsw_sp *mlxsw_sp, + const struct mlxsw_sp_policer_family *tmpl) +{ + struct mlxsw_sp_policer_family *family; + int err; + + family = kmemdup(tmpl, sizeof(*family), GFP_KERNEL); + if (!family) + return -ENOMEM; + + family->mlxsw_sp = mlxsw_sp; + idr_init(&family->policer_idr); + mutex_init(&family->lock); + + err = family->ops->init(family); + if (err) + goto err_family_init; + + if (WARN_ON(family->start_index >= family->end_index)) { + err = -EINVAL; + goto err_index_check; + } + + mlxsw_sp->policer_core->family_arr[tmpl->type] = family; + + return 0; + +err_index_check: + family->ops->fini(family); +err_family_init: + mutex_destroy(&family->lock); + idr_destroy(&family->policer_idr); + kfree(family); + return err; +} + +static void +mlxsw_sp_policer_family_unregister(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_policer_family *family) +{ + family->ops->fini(family); + mutex_destroy(&family->lock); + WARN_ON(!idr_is_empty(&family->policer_idr)); + idr_destroy(&family->policer_idr); + kfree(family); +} + +int mlxsw_sp_policers_init(struct mlxsw_sp *mlxsw_sp) +{ + struct mlxsw_sp_policer_core *policer_core; + int i, err; + + policer_core = kzalloc(sizeof(*policer_core), GFP_KERNEL); + if (!policer_core) + return -ENOMEM; + mlxsw_sp->policer_core = policer_core; + policer_core->ops = mlxsw_sp->policer_core_ops; + + err = policer_core->ops->init(policer_core); + if (err) + goto err_init; + + for (i = 0; i < MLXSW_SP_POLICER_TYPE_MAX + 1; i++) { + err = mlxsw_sp_policer_family_register(mlxsw_sp, mlxsw_sp_policer_family_arr[i]); + if (err) + goto err_family_register; + } + + return 0; + +err_family_register: + for (i--; i >= 0; i--) { + struct mlxsw_sp_policer_family *family; + + family = mlxsw_sp->policer_core->family_arr[i]; + mlxsw_sp_policer_family_unregister(mlxsw_sp, family); + } +err_init: + kfree(mlxsw_sp->policer_core); + return err; +} + +void mlxsw_sp_policers_fini(struct mlxsw_sp *mlxsw_sp) +{ + int i; + + for (i = MLXSW_SP_POLICER_TYPE_MAX; i >= 0; i--) { + struct mlxsw_sp_policer_family *family; + + family = mlxsw_sp->policer_core->family_arr[i]; + mlxsw_sp_policer_family_unregister(mlxsw_sp, family); + } + + kfree(mlxsw_sp->policer_core); +} + +int mlxsw_sp_policer_resources_register(struct mlxsw_core *mlxsw_core) +{ + u64 global_policers, cpu_policers, single_rate_policers; + struct devlink *devlink = priv_to_devlink(mlxsw_core); + struct devlink_resource_size_params size_params; + int err; + + if (!MLXSW_CORE_RES_VALID(mlxsw_core, MAX_GLOBAL_POLICERS) || + !MLXSW_CORE_RES_VALID(mlxsw_core, MAX_CPU_POLICERS)) + return -EIO; + + global_policers = MLXSW_CORE_RES_GET(mlxsw_core, MAX_GLOBAL_POLICERS); + cpu_policers = MLXSW_CORE_RES_GET(mlxsw_core, MAX_CPU_POLICERS); + single_rate_policers = global_policers - cpu_policers; + + devlink_resource_size_params_init(&size_params, global_policers, + global_policers, 1, + DEVLINK_RESOURCE_UNIT_ENTRY); + err = devl_resource_register(devlink, "global_policers", + global_policers, + MLXSW_SP_RESOURCE_GLOBAL_POLICERS, + DEVLINK_RESOURCE_ID_PARENT_TOP, + &size_params); + if (err) + return err; + + devlink_resource_size_params_init(&size_params, single_rate_policers, + single_rate_policers, 1, + DEVLINK_RESOURCE_UNIT_ENTRY); + err = devl_resource_register(devlink, "single_rate_policers", + single_rate_policers, + MLXSW_SP_RESOURCE_SINGLE_RATE_POLICERS, + MLXSW_SP_RESOURCE_GLOBAL_POLICERS, + &size_params); + if (err) + return err; + + return 0; +} + +static int +mlxsw_sp1_policer_core_init(struct mlxsw_sp_policer_core *policer_core) +{ + policer_core->lowest_bs_bits = MLXSW_REG_QPCR_LOWEST_CBS_BITS_SP1; + policer_core->highest_bs_bits = MLXSW_REG_QPCR_HIGHEST_CBS_BITS_SP1; + + return 0; +} + +const struct mlxsw_sp_policer_core_ops mlxsw_sp1_policer_core_ops = { + .init = mlxsw_sp1_policer_core_init, +}; + +static int +mlxsw_sp2_policer_core_init(struct mlxsw_sp_policer_core *policer_core) +{ + policer_core->lowest_bs_bits = MLXSW_REG_QPCR_LOWEST_CBS_BITS_SP2; + policer_core->highest_bs_bits = MLXSW_REG_QPCR_HIGHEST_CBS_BITS_SP2; + + return 0; +} + +const struct mlxsw_sp_policer_core_ops mlxsw_sp2_policer_core_ops = { + .init = mlxsw_sp2_policer_core_init, +}; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c new file mode 100644 index 000000000..7b01b9c20 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c @@ -0,0 +1,1726 @@ +// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 +/* Copyright (c) 2019 Mellanox Technologies. All rights reserved */ + +#include <linux/ptp_clock_kernel.h> +#include <linux/clocksource.h> +#include <linux/timecounter.h> +#include <linux/spinlock.h> +#include <linux/device.h> +#include <linux/rhashtable.h> +#include <linux/ptp_classify.h> +#include <linux/if_ether.h> +#include <linux/if_vlan.h> +#include <linux/net_tstamp.h> +#include <linux/refcount.h> + +#include "spectrum.h" +#include "spectrum_ptp.h" +#include "core.h" + +#define MLXSW_SP1_PTP_CLOCK_CYCLES_SHIFT 29 +#define MLXSW_SP1_PTP_CLOCK_FREQ_KHZ 156257 /* 6.4nSec */ +#define MLXSW_SP1_PTP_CLOCK_MASK 64 + +#define MLXSW_SP1_PTP_HT_GC_INTERVAL 500 /* ms */ + +/* How long, approximately, should the unmatched entries stay in the hash table + * before they are collected. Should be evenly divisible by the GC interval. + */ +#define MLXSW_SP1_PTP_HT_GC_TIMEOUT 1000 /* ms */ + +struct mlxsw_sp_ptp_state { + struct mlxsw_sp *mlxsw_sp; +}; + +struct mlxsw_sp1_ptp_state { + struct mlxsw_sp_ptp_state common; + struct rhltable unmatched_ht; + spinlock_t unmatched_lock; /* protects the HT */ + struct delayed_work ht_gc_dw; + u32 gc_cycle; +}; + +struct mlxsw_sp2_ptp_state { + struct mlxsw_sp_ptp_state common; + refcount_t ptp_port_enabled_ref; /* Number of ports with time stamping + * enabled. + */ + struct hwtstamp_config config; + struct mutex lock; /* Protects 'config' and HW configuration. */ +}; + +struct mlxsw_sp1_ptp_key { + u16 local_port; + u8 message_type; + u16 sequence_id; + u8 domain_number; + bool ingress; +}; + +struct mlxsw_sp1_ptp_unmatched { + struct mlxsw_sp1_ptp_key key; + struct rhlist_head ht_node; + struct rcu_head rcu; + struct sk_buff *skb; + u64 timestamp; + u32 gc_cycle; +}; + +static const struct rhashtable_params mlxsw_sp1_ptp_unmatched_ht_params = { + .key_len = sizeof_field(struct mlxsw_sp1_ptp_unmatched, key), + .key_offset = offsetof(struct mlxsw_sp1_ptp_unmatched, key), + .head_offset = offsetof(struct mlxsw_sp1_ptp_unmatched, ht_node), +}; + +struct mlxsw_sp_ptp_clock { + struct mlxsw_core *core; + struct ptp_clock *ptp; + struct ptp_clock_info ptp_info; +}; + +struct mlxsw_sp1_ptp_clock { + struct mlxsw_sp_ptp_clock common; + spinlock_t lock; /* protect this structure */ + struct cyclecounter cycles; + struct timecounter tc; + u32 nominal_c_mult; + unsigned long overflow_period; + struct delayed_work overflow_work; +}; + +static struct mlxsw_sp1_ptp_state * +mlxsw_sp1_ptp_state(struct mlxsw_sp *mlxsw_sp) +{ + return container_of(mlxsw_sp->ptp_state, struct mlxsw_sp1_ptp_state, + common); +} + +static struct mlxsw_sp2_ptp_state * +mlxsw_sp2_ptp_state(struct mlxsw_sp *mlxsw_sp) +{ + return container_of(mlxsw_sp->ptp_state, struct mlxsw_sp2_ptp_state, + common); +} + +static struct mlxsw_sp1_ptp_clock * +mlxsw_sp1_ptp_clock(struct ptp_clock_info *ptp) +{ + return container_of(ptp, struct mlxsw_sp1_ptp_clock, common.ptp_info); +} + +static u64 __mlxsw_sp1_ptp_read_frc(struct mlxsw_sp1_ptp_clock *clock, + struct ptp_system_timestamp *sts) +{ + struct mlxsw_core *mlxsw_core = clock->common.core; + u32 frc_h1, frc_h2, frc_l; + + frc_h1 = mlxsw_core_read_frc_h(mlxsw_core); + ptp_read_system_prets(sts); + frc_l = mlxsw_core_read_frc_l(mlxsw_core); + ptp_read_system_postts(sts); + frc_h2 = mlxsw_core_read_frc_h(mlxsw_core); + + if (frc_h1 != frc_h2) { + /* wrap around */ + ptp_read_system_prets(sts); + frc_l = mlxsw_core_read_frc_l(mlxsw_core); + ptp_read_system_postts(sts); + } + + return (u64) frc_l | (u64) frc_h2 << 32; +} + +static u64 mlxsw_sp1_ptp_read_frc(const struct cyclecounter *cc) +{ + struct mlxsw_sp1_ptp_clock *clock = + container_of(cc, struct mlxsw_sp1_ptp_clock, cycles); + + return __mlxsw_sp1_ptp_read_frc(clock, NULL) & cc->mask; +} + +static int +mlxsw_sp_ptp_phc_adjfreq(struct mlxsw_sp_ptp_clock *clock, int freq_adj) +{ + struct mlxsw_core *mlxsw_core = clock->core; + char mtutc_pl[MLXSW_REG_MTUTC_LEN]; + + mlxsw_reg_mtutc_pack(mtutc_pl, MLXSW_REG_MTUTC_OPERATION_ADJUST_FREQ, + freq_adj, 0, 0, 0); + return mlxsw_reg_write(mlxsw_core, MLXSW_REG(mtutc), mtutc_pl); +} + +static u64 mlxsw_sp1_ptp_ns2cycles(const struct timecounter *tc, u64 nsec) +{ + u64 cycles = (u64) nsec; + + cycles <<= tc->cc->shift; + cycles = div_u64(cycles, tc->cc->mult); + + return cycles; +} + +static int +mlxsw_sp1_ptp_phc_settime(struct mlxsw_sp1_ptp_clock *clock, u64 nsec) +{ + struct mlxsw_core *mlxsw_core = clock->common.core; + u64 next_sec, next_sec_in_nsec, cycles; + char mtutc_pl[MLXSW_REG_MTUTC_LEN]; + char mtpps_pl[MLXSW_REG_MTPPS_LEN]; + int err; + + next_sec = div_u64(nsec, NSEC_PER_SEC) + 1; + next_sec_in_nsec = next_sec * NSEC_PER_SEC; + + spin_lock_bh(&clock->lock); + cycles = mlxsw_sp1_ptp_ns2cycles(&clock->tc, next_sec_in_nsec); + spin_unlock_bh(&clock->lock); + + mlxsw_reg_mtpps_vpin_pack(mtpps_pl, cycles); + err = mlxsw_reg_write(mlxsw_core, MLXSW_REG(mtpps), mtpps_pl); + if (err) + return err; + + mlxsw_reg_mtutc_pack(mtutc_pl, + MLXSW_REG_MTUTC_OPERATION_SET_TIME_AT_NEXT_SEC, + 0, next_sec, 0, 0); + return mlxsw_reg_write(mlxsw_core, MLXSW_REG(mtutc), mtutc_pl); +} + +static int mlxsw_sp1_ptp_adjfine(struct ptp_clock_info *ptp, long scaled_ppm) +{ + struct mlxsw_sp1_ptp_clock *clock = mlxsw_sp1_ptp_clock(ptp); + int neg_adj = 0; + u32 diff; + u64 adj; + s32 ppb; + + ppb = scaled_ppm_to_ppb(scaled_ppm); + + if (ppb < 0) { + neg_adj = 1; + ppb = -ppb; + } + + adj = clock->nominal_c_mult; + adj *= ppb; + diff = div_u64(adj, NSEC_PER_SEC); + + spin_lock_bh(&clock->lock); + timecounter_read(&clock->tc); + clock->cycles.mult = neg_adj ? clock->nominal_c_mult - diff : + clock->nominal_c_mult + diff; + spin_unlock_bh(&clock->lock); + + return mlxsw_sp_ptp_phc_adjfreq(&clock->common, neg_adj ? -ppb : ppb); +} + +static int mlxsw_sp1_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta) +{ + struct mlxsw_sp1_ptp_clock *clock = mlxsw_sp1_ptp_clock(ptp); + u64 nsec; + + spin_lock_bh(&clock->lock); + timecounter_adjtime(&clock->tc, delta); + nsec = timecounter_read(&clock->tc); + spin_unlock_bh(&clock->lock); + + return mlxsw_sp1_ptp_phc_settime(clock, nsec); +} + +static int mlxsw_sp1_ptp_gettimex(struct ptp_clock_info *ptp, + struct timespec64 *ts, + struct ptp_system_timestamp *sts) +{ + struct mlxsw_sp1_ptp_clock *clock = mlxsw_sp1_ptp_clock(ptp); + u64 cycles, nsec; + + spin_lock_bh(&clock->lock); + cycles = __mlxsw_sp1_ptp_read_frc(clock, sts); + nsec = timecounter_cyc2time(&clock->tc, cycles); + spin_unlock_bh(&clock->lock); + + *ts = ns_to_timespec64(nsec); + + return 0; +} + +static int mlxsw_sp1_ptp_settime(struct ptp_clock_info *ptp, + const struct timespec64 *ts) +{ + struct mlxsw_sp1_ptp_clock *clock = mlxsw_sp1_ptp_clock(ptp); + u64 nsec = timespec64_to_ns(ts); + + spin_lock_bh(&clock->lock); + timecounter_init(&clock->tc, &clock->cycles, nsec); + nsec = timecounter_read(&clock->tc); + spin_unlock_bh(&clock->lock); + + return mlxsw_sp1_ptp_phc_settime(clock, nsec); +} + +static const struct ptp_clock_info mlxsw_sp1_ptp_clock_info = { + .owner = THIS_MODULE, + .name = "mlxsw_sp_clock", + .max_adj = 100000000, + .adjfine = mlxsw_sp1_ptp_adjfine, + .adjtime = mlxsw_sp1_ptp_adjtime, + .gettimex64 = mlxsw_sp1_ptp_gettimex, + .settime64 = mlxsw_sp1_ptp_settime, +}; + +static void mlxsw_sp1_ptp_clock_overflow(struct work_struct *work) +{ + struct delayed_work *dwork = to_delayed_work(work); + struct mlxsw_sp1_ptp_clock *clock; + + clock = container_of(dwork, struct mlxsw_sp1_ptp_clock, overflow_work); + + spin_lock_bh(&clock->lock); + timecounter_read(&clock->tc); + spin_unlock_bh(&clock->lock); + mlxsw_core_schedule_dw(&clock->overflow_work, clock->overflow_period); +} + +struct mlxsw_sp_ptp_clock * +mlxsw_sp1_ptp_clock_init(struct mlxsw_sp *mlxsw_sp, struct device *dev) +{ + u64 overflow_cycles, nsec, frac = 0; + struct mlxsw_sp1_ptp_clock *clock; + int err; + + clock = kzalloc(sizeof(*clock), GFP_KERNEL); + if (!clock) + return ERR_PTR(-ENOMEM); + + spin_lock_init(&clock->lock); + clock->cycles.read = mlxsw_sp1_ptp_read_frc; + clock->cycles.shift = MLXSW_SP1_PTP_CLOCK_CYCLES_SHIFT; + clock->cycles.mult = clocksource_khz2mult(MLXSW_SP1_PTP_CLOCK_FREQ_KHZ, + clock->cycles.shift); + clock->nominal_c_mult = clock->cycles.mult; + clock->cycles.mask = CLOCKSOURCE_MASK(MLXSW_SP1_PTP_CLOCK_MASK); + clock->common.core = mlxsw_sp->core; + + timecounter_init(&clock->tc, &clock->cycles, 0); + + /* Calculate period in seconds to call the overflow watchdog - to make + * sure counter is checked at least twice every wrap around. + * The period is calculated as the minimum between max HW cycles count + * (The clock source mask) and max amount of cycles that can be + * multiplied by clock multiplier where the result doesn't exceed + * 64bits. + */ + overflow_cycles = div64_u64(~0ULL >> 1, clock->cycles.mult); + overflow_cycles = min(overflow_cycles, div_u64(clock->cycles.mask, 3)); + + nsec = cyclecounter_cyc2ns(&clock->cycles, overflow_cycles, 0, &frac); + clock->overflow_period = nsecs_to_jiffies(nsec); + + INIT_DELAYED_WORK(&clock->overflow_work, mlxsw_sp1_ptp_clock_overflow); + mlxsw_core_schedule_dw(&clock->overflow_work, 0); + + clock->common.ptp_info = mlxsw_sp1_ptp_clock_info; + clock->common.ptp = ptp_clock_register(&clock->common.ptp_info, dev); + if (IS_ERR(clock->common.ptp)) { + err = PTR_ERR(clock->common.ptp); + dev_err(dev, "ptp_clock_register failed %d\n", err); + goto err_ptp_clock_register; + } + + return &clock->common; + +err_ptp_clock_register: + cancel_delayed_work_sync(&clock->overflow_work); + kfree(clock); + return ERR_PTR(err); +} + +void mlxsw_sp1_ptp_clock_fini(struct mlxsw_sp_ptp_clock *clock_common) +{ + struct mlxsw_sp1_ptp_clock *clock = + container_of(clock_common, struct mlxsw_sp1_ptp_clock, common); + + ptp_clock_unregister(clock_common->ptp); + cancel_delayed_work_sync(&clock->overflow_work); + kfree(clock); +} + +static u64 mlxsw_sp2_ptp_read_utc(struct mlxsw_sp_ptp_clock *clock, + struct ptp_system_timestamp *sts) +{ + struct mlxsw_core *mlxsw_core = clock->core; + u32 utc_sec1, utc_sec2, utc_nsec; + + utc_sec1 = mlxsw_core_read_utc_sec(mlxsw_core); + ptp_read_system_prets(sts); + utc_nsec = mlxsw_core_read_utc_nsec(mlxsw_core); + ptp_read_system_postts(sts); + utc_sec2 = mlxsw_core_read_utc_sec(mlxsw_core); + + if (utc_sec1 != utc_sec2) { + /* Wrap around. */ + ptp_read_system_prets(sts); + utc_nsec = mlxsw_core_read_utc_nsec(mlxsw_core); + ptp_read_system_postts(sts); + } + + return (u64)utc_sec2 * NSEC_PER_SEC + utc_nsec; +} + +static int +mlxsw_sp2_ptp_phc_settime(struct mlxsw_sp_ptp_clock *clock, u64 nsec) +{ + struct mlxsw_core *mlxsw_core = clock->core; + char mtutc_pl[MLXSW_REG_MTUTC_LEN]; + u32 sec, nsec_rem; + + sec = div_u64_rem(nsec, NSEC_PER_SEC, &nsec_rem); + mlxsw_reg_mtutc_pack(mtutc_pl, + MLXSW_REG_MTUTC_OPERATION_SET_TIME_IMMEDIATE, + 0, sec, nsec_rem, 0); + return mlxsw_reg_write(mlxsw_core, MLXSW_REG(mtutc), mtutc_pl); +} + +static int mlxsw_sp2_ptp_adjfine(struct ptp_clock_info *ptp, long scaled_ppm) +{ + struct mlxsw_sp_ptp_clock *clock = + container_of(ptp, struct mlxsw_sp_ptp_clock, ptp_info); + s32 ppb = scaled_ppm_to_ppb(scaled_ppm); + + /* In Spectrum-2 and newer ASICs, the frequency adjustment in MTUTC is + * reversed, positive values mean to decrease the frequency. Adjust the + * sign of PPB to this behavior. + */ + return mlxsw_sp_ptp_phc_adjfreq(clock, -ppb); +} + +static int mlxsw_sp2_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta) +{ + struct mlxsw_sp_ptp_clock *clock = + container_of(ptp, struct mlxsw_sp_ptp_clock, ptp_info); + struct mlxsw_core *mlxsw_core = clock->core; + char mtutc_pl[MLXSW_REG_MTUTC_LEN]; + + /* HW time adjustment range is s16. If out of range, set time instead. */ + if (delta < S16_MIN || delta > S16_MAX) { + u64 nsec; + + nsec = mlxsw_sp2_ptp_read_utc(clock, NULL); + nsec += delta; + + return mlxsw_sp2_ptp_phc_settime(clock, nsec); + } + + mlxsw_reg_mtutc_pack(mtutc_pl, + MLXSW_REG_MTUTC_OPERATION_ADJUST_TIME, + 0, 0, 0, delta); + return mlxsw_reg_write(mlxsw_core, MLXSW_REG(mtutc), mtutc_pl); +} + +static int mlxsw_sp2_ptp_gettimex(struct ptp_clock_info *ptp, + struct timespec64 *ts, + struct ptp_system_timestamp *sts) +{ + struct mlxsw_sp_ptp_clock *clock = + container_of(ptp, struct mlxsw_sp_ptp_clock, ptp_info); + u64 nsec; + + nsec = mlxsw_sp2_ptp_read_utc(clock, sts); + *ts = ns_to_timespec64(nsec); + + return 0; +} + +static int mlxsw_sp2_ptp_settime(struct ptp_clock_info *ptp, + const struct timespec64 *ts) +{ + struct mlxsw_sp_ptp_clock *clock = + container_of(ptp, struct mlxsw_sp_ptp_clock, ptp_info); + u64 nsec = timespec64_to_ns(ts); + + return mlxsw_sp2_ptp_phc_settime(clock, nsec); +} + +static const struct ptp_clock_info mlxsw_sp2_ptp_clock_info = { + .owner = THIS_MODULE, + .name = "mlxsw_sp_clock", + .max_adj = MLXSW_REG_MTUTC_MAX_FREQ_ADJ, + .adjfine = mlxsw_sp2_ptp_adjfine, + .adjtime = mlxsw_sp2_ptp_adjtime, + .gettimex64 = mlxsw_sp2_ptp_gettimex, + .settime64 = mlxsw_sp2_ptp_settime, +}; + +struct mlxsw_sp_ptp_clock * +mlxsw_sp2_ptp_clock_init(struct mlxsw_sp *mlxsw_sp, struct device *dev) +{ + struct mlxsw_sp_ptp_clock *clock; + int err; + + clock = kzalloc(sizeof(*clock), GFP_KERNEL); + if (!clock) + return ERR_PTR(-ENOMEM); + + clock->core = mlxsw_sp->core; + + clock->ptp_info = mlxsw_sp2_ptp_clock_info; + + err = mlxsw_sp2_ptp_phc_settime(clock, 0); + if (err) { + dev_err(dev, "setting UTC time failed %d\n", err); + goto err_ptp_phc_settime; + } + + clock->ptp = ptp_clock_register(&clock->ptp_info, dev); + if (IS_ERR(clock->ptp)) { + err = PTR_ERR(clock->ptp); + dev_err(dev, "ptp_clock_register failed %d\n", err); + goto err_ptp_clock_register; + } + + return clock; + +err_ptp_clock_register: +err_ptp_phc_settime: + kfree(clock); + return ERR_PTR(err); +} + +void mlxsw_sp2_ptp_clock_fini(struct mlxsw_sp_ptp_clock *clock) +{ + ptp_clock_unregister(clock->ptp); + kfree(clock); +} + +static int mlxsw_sp_ptp_parse(struct sk_buff *skb, + u8 *p_domain_number, + u8 *p_message_type, + u16 *p_sequence_id) +{ + unsigned int ptp_class; + struct ptp_header *hdr; + + ptp_class = ptp_classify_raw(skb); + + switch (ptp_class & PTP_CLASS_VMASK) { + case PTP_CLASS_V1: + case PTP_CLASS_V2: + break; + default: + return -ERANGE; + } + + hdr = ptp_parse_header(skb, ptp_class); + if (!hdr) + return -EINVAL; + + *p_message_type = ptp_get_msgtype(hdr, ptp_class); + *p_domain_number = hdr->domain_number; + *p_sequence_id = be16_to_cpu(hdr->sequence_id); + + return 0; +} + +/* Returns NULL on successful insertion, a pointer on conflict, or an ERR_PTR on + * error. + */ +static int +mlxsw_sp1_ptp_unmatched_save(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp1_ptp_key key, + struct sk_buff *skb, + u64 timestamp) +{ + int cycles = MLXSW_SP1_PTP_HT_GC_TIMEOUT / MLXSW_SP1_PTP_HT_GC_INTERVAL; + struct mlxsw_sp1_ptp_state *ptp_state = mlxsw_sp1_ptp_state(mlxsw_sp); + struct mlxsw_sp1_ptp_unmatched *unmatched; + int err; + + unmatched = kzalloc(sizeof(*unmatched), GFP_ATOMIC); + if (!unmatched) + return -ENOMEM; + + unmatched->key = key; + unmatched->skb = skb; + unmatched->timestamp = timestamp; + unmatched->gc_cycle = ptp_state->gc_cycle + cycles; + + err = rhltable_insert(&ptp_state->unmatched_ht, &unmatched->ht_node, + mlxsw_sp1_ptp_unmatched_ht_params); + if (err) + kfree(unmatched); + + return err; +} + +static struct mlxsw_sp1_ptp_unmatched * +mlxsw_sp1_ptp_unmatched_lookup(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp1_ptp_key key, int *p_length) +{ + struct mlxsw_sp1_ptp_state *ptp_state = mlxsw_sp1_ptp_state(mlxsw_sp); + struct mlxsw_sp1_ptp_unmatched *unmatched, *last = NULL; + struct rhlist_head *tmp, *list; + int length = 0; + + list = rhltable_lookup(&ptp_state->unmatched_ht, &key, + mlxsw_sp1_ptp_unmatched_ht_params); + rhl_for_each_entry_rcu(unmatched, tmp, list, ht_node) { + last = unmatched; + length++; + } + + *p_length = length; + return last; +} + +static int +mlxsw_sp1_ptp_unmatched_remove(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp1_ptp_unmatched *unmatched) +{ + struct mlxsw_sp1_ptp_state *ptp_state = mlxsw_sp1_ptp_state(mlxsw_sp); + + return rhltable_remove(&ptp_state->unmatched_ht, + &unmatched->ht_node, + mlxsw_sp1_ptp_unmatched_ht_params); +} + +/* This function is called in the following scenarios: + * + * 1) When a packet is matched with its timestamp. + * 2) In several situation when it is necessary to immediately pass on + * an SKB without a timestamp. + * 3) From GC indirectly through mlxsw_sp1_ptp_unmatched_finish(). + * This case is similar to 2) above. + */ +static void mlxsw_sp1_ptp_packet_finish(struct mlxsw_sp *mlxsw_sp, + struct sk_buff *skb, u16 local_port, + bool ingress, + struct skb_shared_hwtstamps *hwtstamps) +{ + struct mlxsw_sp_port *mlxsw_sp_port; + + /* Between capturing the packet and finishing it, there is a window of + * opportunity for the originating port to go away (e.g. due to a + * split). Also make sure the SKB device reference is still valid. + */ + mlxsw_sp_port = mlxsw_sp->ports[local_port]; + if (!(mlxsw_sp_port && (!skb->dev || skb->dev == mlxsw_sp_port->dev))) { + dev_kfree_skb_any(skb); + return; + } + + if (ingress) { + if (hwtstamps) + *skb_hwtstamps(skb) = *hwtstamps; + mlxsw_sp_rx_listener_no_mark_func(skb, local_port, mlxsw_sp); + } else { + /* skb_tstamp_tx() allows hwtstamps to be NULL. */ + skb_tstamp_tx(skb, hwtstamps); + dev_kfree_skb_any(skb); + } +} + +static void mlxsw_sp1_packet_timestamp(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp1_ptp_key key, + struct sk_buff *skb, + u64 timestamp) +{ + struct mlxsw_sp_ptp_clock *clock_common = mlxsw_sp->clock; + struct mlxsw_sp1_ptp_clock *clock = + container_of(clock_common, struct mlxsw_sp1_ptp_clock, common); + + struct skb_shared_hwtstamps hwtstamps; + u64 nsec; + + spin_lock_bh(&clock->lock); + nsec = timecounter_cyc2time(&clock->tc, timestamp); + spin_unlock_bh(&clock->lock); + + hwtstamps.hwtstamp = ns_to_ktime(nsec); + mlxsw_sp1_ptp_packet_finish(mlxsw_sp, skb, + key.local_port, key.ingress, &hwtstamps); +} + +static void +mlxsw_sp1_ptp_unmatched_finish(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp1_ptp_unmatched *unmatched) +{ + if (unmatched->skb && unmatched->timestamp) + mlxsw_sp1_packet_timestamp(mlxsw_sp, unmatched->key, + unmatched->skb, + unmatched->timestamp); + else if (unmatched->skb) + mlxsw_sp1_ptp_packet_finish(mlxsw_sp, unmatched->skb, + unmatched->key.local_port, + unmatched->key.ingress, NULL); + kfree_rcu(unmatched, rcu); +} + +static void mlxsw_sp1_ptp_unmatched_free_fn(void *ptr, void *arg) +{ + struct mlxsw_sp1_ptp_unmatched *unmatched = ptr; + + /* This is invoked at a point where the ports are gone already. Nothing + * to do with whatever is left in the HT but to free it. + */ + if (unmatched->skb) + dev_kfree_skb_any(unmatched->skb); + kfree_rcu(unmatched, rcu); +} + +static void mlxsw_sp1_ptp_got_piece(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp1_ptp_key key, + struct sk_buff *skb, u64 timestamp) +{ + struct mlxsw_sp1_ptp_state *ptp_state = mlxsw_sp1_ptp_state(mlxsw_sp); + struct mlxsw_sp1_ptp_unmatched *unmatched; + int length; + int err; + + rcu_read_lock(); + + spin_lock(&ptp_state->unmatched_lock); + + unmatched = mlxsw_sp1_ptp_unmatched_lookup(mlxsw_sp, key, &length); + if (skb && unmatched && unmatched->timestamp) { + unmatched->skb = skb; + } else if (timestamp && unmatched && unmatched->skb) { + unmatched->timestamp = timestamp; + } else { + /* Either there is no entry to match, or one that is there is + * incompatible. + */ + if (length < 100) + err = mlxsw_sp1_ptp_unmatched_save(mlxsw_sp, key, + skb, timestamp); + else + err = -E2BIG; + if (err && skb) + mlxsw_sp1_ptp_packet_finish(mlxsw_sp, skb, + key.local_port, + key.ingress, NULL); + unmatched = NULL; + } + + if (unmatched) { + err = mlxsw_sp1_ptp_unmatched_remove(mlxsw_sp, unmatched); + WARN_ON_ONCE(err); + } + + spin_unlock(&ptp_state->unmatched_lock); + + if (unmatched) + mlxsw_sp1_ptp_unmatched_finish(mlxsw_sp, unmatched); + + rcu_read_unlock(); +} + +static void mlxsw_sp1_ptp_got_packet(struct mlxsw_sp *mlxsw_sp, + struct sk_buff *skb, u16 local_port, + bool ingress) +{ + struct mlxsw_sp_port *mlxsw_sp_port; + struct mlxsw_sp1_ptp_key key; + u8 types; + int err; + + mlxsw_sp_port = mlxsw_sp->ports[local_port]; + if (!mlxsw_sp_port) + goto immediate; + + types = ingress ? mlxsw_sp_port->ptp.ing_types : + mlxsw_sp_port->ptp.egr_types; + if (!types) + goto immediate; + + memset(&key, 0, sizeof(key)); + key.local_port = local_port; + key.ingress = ingress; + + err = mlxsw_sp_ptp_parse(skb, &key.domain_number, &key.message_type, + &key.sequence_id); + if (err) + goto immediate; + + /* For packets whose timestamping was not enabled on this port, don't + * bother trying to match the timestamp. + */ + if (!((1 << key.message_type) & types)) + goto immediate; + + mlxsw_sp1_ptp_got_piece(mlxsw_sp, key, skb, 0); + return; + +immediate: + mlxsw_sp1_ptp_packet_finish(mlxsw_sp, skb, local_port, ingress, NULL); +} + +void mlxsw_sp1_ptp_got_timestamp(struct mlxsw_sp *mlxsw_sp, bool ingress, + u16 local_port, u8 message_type, + u8 domain_number, u16 sequence_id, + u64 timestamp) +{ + struct mlxsw_sp_port *mlxsw_sp_port; + struct mlxsw_sp1_ptp_key key; + u8 types; + + if (WARN_ON_ONCE(!mlxsw_sp_local_port_is_valid(mlxsw_sp, local_port))) + return; + mlxsw_sp_port = mlxsw_sp->ports[local_port]; + if (!mlxsw_sp_port) + return; + + types = ingress ? mlxsw_sp_port->ptp.ing_types : + mlxsw_sp_port->ptp.egr_types; + + /* For message types whose timestamping was not enabled on this port, + * don't bother with the timestamp. + */ + if (!((1 << message_type) & types)) + return; + + memset(&key, 0, sizeof(key)); + key.local_port = local_port; + key.domain_number = domain_number; + key.message_type = message_type; + key.sequence_id = sequence_id; + key.ingress = ingress; + + mlxsw_sp1_ptp_got_piece(mlxsw_sp, key, NULL, timestamp); +} + +void mlxsw_sp1_ptp_receive(struct mlxsw_sp *mlxsw_sp, struct sk_buff *skb, + u16 local_port) +{ + skb_reset_mac_header(skb); + mlxsw_sp1_ptp_got_packet(mlxsw_sp, skb, local_port, true); +} + +void mlxsw_sp1_ptp_transmitted(struct mlxsw_sp *mlxsw_sp, + struct sk_buff *skb, u16 local_port) +{ + mlxsw_sp1_ptp_got_packet(mlxsw_sp, skb, local_port, false); +} + +static void +mlxsw_sp1_ptp_ht_gc_collect(struct mlxsw_sp1_ptp_state *ptp_state, + struct mlxsw_sp1_ptp_unmatched *unmatched) +{ + struct mlxsw_sp *mlxsw_sp = ptp_state->common.mlxsw_sp; + struct mlxsw_sp_ptp_port_dir_stats *stats; + struct mlxsw_sp_port *mlxsw_sp_port; + int err; + + /* If an unmatched entry has an SKB, it has to be handed over to the + * networking stack. This is usually done from a trap handler, which is + * invoked in a softirq context. Here we are going to do it in process + * context. If that were to be interrupted by a softirq, it could cause + * a deadlock when an attempt is made to take an already-taken lock + * somewhere along the sending path. Disable softirqs to prevent this. + */ + local_bh_disable(); + + spin_lock(&ptp_state->unmatched_lock); + err = rhltable_remove(&ptp_state->unmatched_ht, &unmatched->ht_node, + mlxsw_sp1_ptp_unmatched_ht_params); + spin_unlock(&ptp_state->unmatched_lock); + + if (err) + /* The packet was matched with timestamp during the walk. */ + goto out; + + mlxsw_sp_port = mlxsw_sp->ports[unmatched->key.local_port]; + if (mlxsw_sp_port) { + stats = unmatched->key.ingress ? + &mlxsw_sp_port->ptp.stats.rx_gcd : + &mlxsw_sp_port->ptp.stats.tx_gcd; + if (unmatched->skb) + stats->packets++; + else + stats->timestamps++; + } + + /* mlxsw_sp1_ptp_unmatched_finish() invokes netif_receive_skb(). While + * the comment at that function states that it can only be called in + * soft IRQ context, this pattern of local_bh_disable() + + * netif_receive_skb(), in process context, is seen elsewhere in the + * kernel, notably in pktgen. + */ + mlxsw_sp1_ptp_unmatched_finish(mlxsw_sp, unmatched); + +out: + local_bh_enable(); +} + +static void mlxsw_sp1_ptp_ht_gc(struct work_struct *work) +{ + struct delayed_work *dwork = to_delayed_work(work); + struct mlxsw_sp1_ptp_unmatched *unmatched; + struct mlxsw_sp1_ptp_state *ptp_state; + struct rhashtable_iter iter; + u32 gc_cycle; + void *obj; + + ptp_state = container_of(dwork, struct mlxsw_sp1_ptp_state, ht_gc_dw); + gc_cycle = ptp_state->gc_cycle++; + + rhltable_walk_enter(&ptp_state->unmatched_ht, &iter); + rhashtable_walk_start(&iter); + while ((obj = rhashtable_walk_next(&iter))) { + if (IS_ERR(obj)) + continue; + + unmatched = obj; + if (unmatched->gc_cycle <= gc_cycle) + mlxsw_sp1_ptp_ht_gc_collect(ptp_state, unmatched); + } + rhashtable_walk_stop(&iter); + rhashtable_walk_exit(&iter); + + mlxsw_core_schedule_dw(&ptp_state->ht_gc_dw, + MLXSW_SP1_PTP_HT_GC_INTERVAL); +} + +static int mlxsw_sp_ptp_mtptpt_set(struct mlxsw_sp *mlxsw_sp, + enum mlxsw_reg_mtptpt_trap_id trap_id, + u16 message_type) +{ + char mtptpt_pl[MLXSW_REG_MTPTPT_LEN]; + + mlxsw_reg_mtptpt_pack(mtptpt_pl, trap_id, message_type); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mtptpt), mtptpt_pl); +} + +static int mlxsw_sp1_ptp_set_fifo_clr_on_trap(struct mlxsw_sp *mlxsw_sp, + bool clr) +{ + char mogcr_pl[MLXSW_REG_MOGCR_LEN] = {0}; + int err; + + err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(mogcr), mogcr_pl); + if (err) + return err; + + mlxsw_reg_mogcr_ptp_iftc_set(mogcr_pl, clr); + mlxsw_reg_mogcr_ptp_eftc_set(mogcr_pl, clr); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mogcr), mogcr_pl); +} + +static int mlxsw_sp1_ptp_mtpppc_set(struct mlxsw_sp *mlxsw_sp, + u16 ing_types, u16 egr_types) +{ + char mtpppc_pl[MLXSW_REG_MTPPPC_LEN]; + + mlxsw_reg_mtpppc_pack(mtpppc_pl, ing_types, egr_types); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mtpppc), mtpppc_pl); +} + +struct mlxsw_sp1_ptp_shaper_params { + u32 ethtool_speed; + enum mlxsw_reg_qpsc_port_speed port_speed; + u8 shaper_time_exp; + u8 shaper_time_mantissa; + u8 shaper_inc; + u8 shaper_bs; + u8 port_to_shaper_credits; + int ing_timestamp_inc; + int egr_timestamp_inc; +}; + +static const struct mlxsw_sp1_ptp_shaper_params +mlxsw_sp1_ptp_shaper_params[] = { + { + .ethtool_speed = SPEED_100, + .port_speed = MLXSW_REG_QPSC_PORT_SPEED_100M, + .shaper_time_exp = 4, + .shaper_time_mantissa = 12, + .shaper_inc = 9, + .shaper_bs = 1, + .port_to_shaper_credits = 1, + .ing_timestamp_inc = -313, + .egr_timestamp_inc = 313, + }, + { + .ethtool_speed = SPEED_1000, + .port_speed = MLXSW_REG_QPSC_PORT_SPEED_1G, + .shaper_time_exp = 0, + .shaper_time_mantissa = 12, + .shaper_inc = 6, + .shaper_bs = 0, + .port_to_shaper_credits = 1, + .ing_timestamp_inc = -35, + .egr_timestamp_inc = 35, + }, + { + .ethtool_speed = SPEED_10000, + .port_speed = MLXSW_REG_QPSC_PORT_SPEED_10G, + .shaper_time_exp = 0, + .shaper_time_mantissa = 2, + .shaper_inc = 14, + .shaper_bs = 1, + .port_to_shaper_credits = 1, + .ing_timestamp_inc = -11, + .egr_timestamp_inc = 11, + }, + { + .ethtool_speed = SPEED_25000, + .port_speed = MLXSW_REG_QPSC_PORT_SPEED_25G, + .shaper_time_exp = 0, + .shaper_time_mantissa = 0, + .shaper_inc = 11, + .shaper_bs = 1, + .port_to_shaper_credits = 1, + .ing_timestamp_inc = -14, + .egr_timestamp_inc = 14, + }, +}; + +#define MLXSW_SP1_PTP_SHAPER_PARAMS_LEN ARRAY_SIZE(mlxsw_sp1_ptp_shaper_params) + +static int mlxsw_sp1_ptp_shaper_params_set(struct mlxsw_sp *mlxsw_sp) +{ + const struct mlxsw_sp1_ptp_shaper_params *params; + char qpsc_pl[MLXSW_REG_QPSC_LEN]; + int i, err; + + for (i = 0; i < MLXSW_SP1_PTP_SHAPER_PARAMS_LEN; i++) { + params = &mlxsw_sp1_ptp_shaper_params[i]; + mlxsw_reg_qpsc_pack(qpsc_pl, params->port_speed, + params->shaper_time_exp, + params->shaper_time_mantissa, + params->shaper_inc, params->shaper_bs, + params->port_to_shaper_credits, + params->ing_timestamp_inc, + params->egr_timestamp_inc); + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(qpsc), qpsc_pl); + if (err) + return err; + } + + return 0; +} + +static int mlxsw_sp_ptp_traps_set(struct mlxsw_sp *mlxsw_sp) +{ + u16 event_message_type; + int err; + + /* Deliver these message types as PTP0. */ + event_message_type = BIT(PTP_MSGTYPE_SYNC) | + BIT(PTP_MSGTYPE_DELAY_REQ) | + BIT(PTP_MSGTYPE_PDELAY_REQ) | + BIT(PTP_MSGTYPE_PDELAY_RESP); + + err = mlxsw_sp_ptp_mtptpt_set(mlxsw_sp, MLXSW_REG_MTPTPT_TRAP_ID_PTP0, + event_message_type); + if (err) + return err; + + /* Everything else is PTP1. */ + err = mlxsw_sp_ptp_mtptpt_set(mlxsw_sp, MLXSW_REG_MTPTPT_TRAP_ID_PTP1, + ~event_message_type); + if (err) + goto err_mtptpt1_set; + + return 0; + +err_mtptpt1_set: + mlxsw_sp_ptp_mtptpt_set(mlxsw_sp, MLXSW_REG_MTPTPT_TRAP_ID_PTP0, 0); + return err; +} + +static void mlxsw_sp_ptp_traps_unset(struct mlxsw_sp *mlxsw_sp) +{ + mlxsw_sp_ptp_mtptpt_set(mlxsw_sp, MLXSW_REG_MTPTPT_TRAP_ID_PTP1, 0); + mlxsw_sp_ptp_mtptpt_set(mlxsw_sp, MLXSW_REG_MTPTPT_TRAP_ID_PTP0, 0); +} + +struct mlxsw_sp_ptp_state *mlxsw_sp1_ptp_init(struct mlxsw_sp *mlxsw_sp) +{ + struct mlxsw_sp1_ptp_state *ptp_state; + int err; + + err = mlxsw_sp1_ptp_shaper_params_set(mlxsw_sp); + if (err) + return ERR_PTR(err); + + ptp_state = kzalloc(sizeof(*ptp_state), GFP_KERNEL); + if (!ptp_state) + return ERR_PTR(-ENOMEM); + ptp_state->common.mlxsw_sp = mlxsw_sp; + + spin_lock_init(&ptp_state->unmatched_lock); + + err = rhltable_init(&ptp_state->unmatched_ht, + &mlxsw_sp1_ptp_unmatched_ht_params); + if (err) + goto err_hashtable_init; + + err = mlxsw_sp_ptp_traps_set(mlxsw_sp); + if (err) + goto err_ptp_traps_set; + + err = mlxsw_sp1_ptp_set_fifo_clr_on_trap(mlxsw_sp, true); + if (err) + goto err_fifo_clr; + + INIT_DELAYED_WORK(&ptp_state->ht_gc_dw, mlxsw_sp1_ptp_ht_gc); + mlxsw_core_schedule_dw(&ptp_state->ht_gc_dw, + MLXSW_SP1_PTP_HT_GC_INTERVAL); + return &ptp_state->common; + +err_fifo_clr: + mlxsw_sp_ptp_traps_unset(mlxsw_sp); +err_ptp_traps_set: + rhltable_destroy(&ptp_state->unmatched_ht); +err_hashtable_init: + kfree(ptp_state); + return ERR_PTR(err); +} + +void mlxsw_sp1_ptp_fini(struct mlxsw_sp_ptp_state *ptp_state_common) +{ + struct mlxsw_sp *mlxsw_sp = ptp_state_common->mlxsw_sp; + struct mlxsw_sp1_ptp_state *ptp_state; + + ptp_state = mlxsw_sp1_ptp_state(mlxsw_sp); + + cancel_delayed_work_sync(&ptp_state->ht_gc_dw); + mlxsw_sp1_ptp_mtpppc_set(mlxsw_sp, 0, 0); + mlxsw_sp1_ptp_set_fifo_clr_on_trap(mlxsw_sp, false); + mlxsw_sp_ptp_traps_unset(mlxsw_sp); + rhltable_free_and_destroy(&ptp_state->unmatched_ht, + &mlxsw_sp1_ptp_unmatched_free_fn, NULL); + kfree(ptp_state); +} + +int mlxsw_sp1_ptp_hwtstamp_get(struct mlxsw_sp_port *mlxsw_sp_port, + struct hwtstamp_config *config) +{ + *config = mlxsw_sp_port->ptp.hwtstamp_config; + return 0; +} + +static int +mlxsw_sp1_ptp_get_message_types(const struct hwtstamp_config *config, + u16 *p_ing_types, u16 *p_egr_types, + enum hwtstamp_rx_filters *p_rx_filter) +{ + enum hwtstamp_rx_filters rx_filter = config->rx_filter; + enum hwtstamp_tx_types tx_type = config->tx_type; + u16 ing_types = 0x00; + u16 egr_types = 0x00; + + switch (tx_type) { + case HWTSTAMP_TX_OFF: + egr_types = 0x00; + break; + case HWTSTAMP_TX_ON: + egr_types = 0xff; + break; + case HWTSTAMP_TX_ONESTEP_SYNC: + case HWTSTAMP_TX_ONESTEP_P2P: + return -ERANGE; + default: + return -EINVAL; + } + + switch (rx_filter) { + case HWTSTAMP_FILTER_NONE: + ing_types = 0x00; + break; + case HWTSTAMP_FILTER_PTP_V1_L4_SYNC: + case HWTSTAMP_FILTER_PTP_V2_L4_SYNC: + case HWTSTAMP_FILTER_PTP_V2_L2_SYNC: + case HWTSTAMP_FILTER_PTP_V2_SYNC: + ing_types = 0x01; + break; + case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ: + case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ: + case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ: + case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ: + ing_types = 0x02; + break; + case HWTSTAMP_FILTER_PTP_V1_L4_EVENT: + case HWTSTAMP_FILTER_PTP_V2_L4_EVENT: + case HWTSTAMP_FILTER_PTP_V2_L2_EVENT: + case HWTSTAMP_FILTER_PTP_V2_EVENT: + ing_types = 0x0f; + break; + case HWTSTAMP_FILTER_ALL: + ing_types = 0xff; + break; + case HWTSTAMP_FILTER_SOME: + case HWTSTAMP_FILTER_NTP_ALL: + return -ERANGE; + default: + return -EINVAL; + } + + *p_ing_types = ing_types; + *p_egr_types = egr_types; + *p_rx_filter = rx_filter; + return 0; +} + +static int mlxsw_sp1_ptp_mtpppc_update(struct mlxsw_sp_port *mlxsw_sp_port, + u16 ing_types, u16 egr_types) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + struct mlxsw_sp_port *tmp; + u16 orig_ing_types = 0; + u16 orig_egr_types = 0; + int err; + int i; + + /* MTPPPC configures timestamping globally, not per port. Find the + * configuration that contains all configured timestamping requests. + */ + for (i = 1; i < mlxsw_core_max_ports(mlxsw_sp->core); i++) { + tmp = mlxsw_sp->ports[i]; + if (tmp) { + orig_ing_types |= tmp->ptp.ing_types; + orig_egr_types |= tmp->ptp.egr_types; + } + if (tmp && tmp != mlxsw_sp_port) { + ing_types |= tmp->ptp.ing_types; + egr_types |= tmp->ptp.egr_types; + } + } + + if ((ing_types || egr_types) && !(orig_ing_types || orig_egr_types)) { + err = mlxsw_sp_parsing_depth_inc(mlxsw_sp); + if (err) { + netdev_err(mlxsw_sp_port->dev, "Failed to increase parsing depth"); + return err; + } + } + if (!(ing_types || egr_types) && (orig_ing_types || orig_egr_types)) + mlxsw_sp_parsing_depth_dec(mlxsw_sp); + + return mlxsw_sp1_ptp_mtpppc_set(mlxsw_sp_port->mlxsw_sp, + ing_types, egr_types); +} + +static bool mlxsw_sp1_ptp_hwtstamp_enabled(struct mlxsw_sp_port *mlxsw_sp_port) +{ + return mlxsw_sp_port->ptp.ing_types || mlxsw_sp_port->ptp.egr_types; +} + +static int +mlxsw_sp1_ptp_port_shaper_set(struct mlxsw_sp_port *mlxsw_sp_port, bool enable) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + char qeec_pl[MLXSW_REG_QEEC_LEN]; + + mlxsw_reg_qeec_ptps_pack(qeec_pl, mlxsw_sp_port->local_port, enable); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(qeec), qeec_pl); +} + +static int mlxsw_sp1_ptp_port_shaper_check(struct mlxsw_sp_port *mlxsw_sp_port) +{ + bool ptps = false; + int err, i; + u32 speed; + + if (!mlxsw_sp1_ptp_hwtstamp_enabled(mlxsw_sp_port)) + return mlxsw_sp1_ptp_port_shaper_set(mlxsw_sp_port, false); + + err = mlxsw_sp_port_speed_get(mlxsw_sp_port, &speed); + if (err) + return err; + + for (i = 0; i < MLXSW_SP1_PTP_SHAPER_PARAMS_LEN; i++) { + if (mlxsw_sp1_ptp_shaper_params[i].ethtool_speed == speed) { + ptps = true; + break; + } + } + + return mlxsw_sp1_ptp_port_shaper_set(mlxsw_sp_port, ptps); +} + +void mlxsw_sp1_ptp_shaper_work(struct work_struct *work) +{ + struct delayed_work *dwork = to_delayed_work(work); + struct mlxsw_sp_port *mlxsw_sp_port; + int err; + + mlxsw_sp_port = container_of(dwork, struct mlxsw_sp_port, + ptp.shaper_dw); + + if (!mlxsw_sp1_ptp_hwtstamp_enabled(mlxsw_sp_port)) + return; + + err = mlxsw_sp1_ptp_port_shaper_check(mlxsw_sp_port); + if (err) + netdev_err(mlxsw_sp_port->dev, "Failed to set up PTP shaper\n"); +} + +int mlxsw_sp1_ptp_hwtstamp_set(struct mlxsw_sp_port *mlxsw_sp_port, + struct hwtstamp_config *config) +{ + enum hwtstamp_rx_filters rx_filter; + u16 ing_types; + u16 egr_types; + int err; + + err = mlxsw_sp1_ptp_get_message_types(config, &ing_types, &egr_types, + &rx_filter); + if (err) + return err; + + err = mlxsw_sp1_ptp_mtpppc_update(mlxsw_sp_port, ing_types, egr_types); + if (err) + return err; + + mlxsw_sp_port->ptp.hwtstamp_config = *config; + mlxsw_sp_port->ptp.ing_types = ing_types; + mlxsw_sp_port->ptp.egr_types = egr_types; + + err = mlxsw_sp1_ptp_port_shaper_check(mlxsw_sp_port); + if (err) + return err; + + /* Notify the ioctl caller what we are actually timestamping. */ + config->rx_filter = rx_filter; + + return 0; +} + +int mlxsw_sp1_ptp_get_ts_info(struct mlxsw_sp *mlxsw_sp, + struct ethtool_ts_info *info) +{ + info->phc_index = ptp_clock_index(mlxsw_sp->clock->ptp); + + info->so_timestamping = SOF_TIMESTAMPING_TX_HARDWARE | + SOF_TIMESTAMPING_RX_HARDWARE | + SOF_TIMESTAMPING_RAW_HARDWARE; + + info->tx_types = BIT(HWTSTAMP_TX_OFF) | + BIT(HWTSTAMP_TX_ON); + + info->rx_filters = BIT(HWTSTAMP_FILTER_NONE) | + BIT(HWTSTAMP_FILTER_ALL); + + return 0; +} + +struct mlxsw_sp_ptp_port_stat { + char str[ETH_GSTRING_LEN]; + ptrdiff_t offset; +}; + +#define MLXSW_SP_PTP_PORT_STAT(NAME, FIELD) \ + { \ + .str = NAME, \ + .offset = offsetof(struct mlxsw_sp_ptp_port_stats, \ + FIELD), \ + } + +static const struct mlxsw_sp_ptp_port_stat mlxsw_sp_ptp_port_stats[] = { + MLXSW_SP_PTP_PORT_STAT("ptp_rx_gcd_packets", rx_gcd.packets), + MLXSW_SP_PTP_PORT_STAT("ptp_rx_gcd_timestamps", rx_gcd.timestamps), + MLXSW_SP_PTP_PORT_STAT("ptp_tx_gcd_packets", tx_gcd.packets), + MLXSW_SP_PTP_PORT_STAT("ptp_tx_gcd_timestamps", tx_gcd.timestamps), +}; + +#undef MLXSW_SP_PTP_PORT_STAT + +#define MLXSW_SP_PTP_PORT_STATS_LEN \ + ARRAY_SIZE(mlxsw_sp_ptp_port_stats) + +int mlxsw_sp1_get_stats_count(void) +{ + return MLXSW_SP_PTP_PORT_STATS_LEN; +} + +void mlxsw_sp1_get_stats_strings(u8 **p) +{ + int i; + + for (i = 0; i < MLXSW_SP_PTP_PORT_STATS_LEN; i++) { + memcpy(*p, mlxsw_sp_ptp_port_stats[i].str, + ETH_GSTRING_LEN); + *p += ETH_GSTRING_LEN; + } +} + +void mlxsw_sp1_get_stats(struct mlxsw_sp_port *mlxsw_sp_port, + u64 *data, int data_index) +{ + void *stats = &mlxsw_sp_port->ptp.stats; + ptrdiff_t offset; + int i; + + data += data_index; + for (i = 0; i < MLXSW_SP_PTP_PORT_STATS_LEN; i++) { + offset = mlxsw_sp_ptp_port_stats[i].offset; + *data++ = *(u64 *)(stats + offset); + } +} + +struct mlxsw_sp_ptp_state *mlxsw_sp2_ptp_init(struct mlxsw_sp *mlxsw_sp) +{ + struct mlxsw_sp2_ptp_state *ptp_state; + int err; + + ptp_state = kzalloc(sizeof(*ptp_state), GFP_KERNEL); + if (!ptp_state) + return ERR_PTR(-ENOMEM); + + ptp_state->common.mlxsw_sp = mlxsw_sp; + + err = mlxsw_sp_ptp_traps_set(mlxsw_sp); + if (err) + goto err_ptp_traps_set; + + refcount_set(&ptp_state->ptp_port_enabled_ref, 0); + mutex_init(&ptp_state->lock); + return &ptp_state->common; + +err_ptp_traps_set: + kfree(ptp_state); + return ERR_PTR(err); +} + +void mlxsw_sp2_ptp_fini(struct mlxsw_sp_ptp_state *ptp_state_common) +{ + struct mlxsw_sp *mlxsw_sp = ptp_state_common->mlxsw_sp; + struct mlxsw_sp2_ptp_state *ptp_state; + + ptp_state = mlxsw_sp2_ptp_state(mlxsw_sp); + + mutex_destroy(&ptp_state->lock); + mlxsw_sp_ptp_traps_unset(mlxsw_sp); + kfree(ptp_state); +} + +static u32 mlxsw_ptp_utc_time_stamp_sec_get(struct mlxsw_core *mlxsw_core, + u8 cqe_ts_sec) +{ + u32 utc_sec = mlxsw_core_read_utc_sec(mlxsw_core); + + if (cqe_ts_sec > (utc_sec & 0xff)) + /* Time stamp above the last bits of UTC (UTC & 0xff) means the + * latter has wrapped after the time stamp was collected. + */ + utc_sec -= 256; + + utc_sec &= ~0xff; + utc_sec |= cqe_ts_sec; + + return utc_sec; +} + +static void mlxsw_sp2_ptp_hwtstamp_fill(struct mlxsw_core *mlxsw_core, + const struct mlxsw_skb_cb *cb, + struct skb_shared_hwtstamps *hwtstamps) +{ + u64 ts_sec, ts_nsec, nsec; + + WARN_ON_ONCE(!cb->cqe_ts.sec && !cb->cqe_ts.nsec); + + /* The time stamp in the CQE is represented by 38 bits, which is a short + * representation of UTC time. Software should create the full time + * stamp using the global UTC clock. The seconds have only 8 bits in the + * CQE, to create the full time stamp, use the current UTC time and fix + * the seconds according to the relation between UTC seconds and CQE + * seconds. + */ + ts_sec = mlxsw_ptp_utc_time_stamp_sec_get(mlxsw_core, cb->cqe_ts.sec); + ts_nsec = cb->cqe_ts.nsec; + + nsec = ts_sec * NSEC_PER_SEC + ts_nsec; + + hwtstamps->hwtstamp = ns_to_ktime(nsec); +} + +void mlxsw_sp2_ptp_receive(struct mlxsw_sp *mlxsw_sp, struct sk_buff *skb, + u16 local_port) +{ + struct skb_shared_hwtstamps hwtstamps; + + mlxsw_sp2_ptp_hwtstamp_fill(mlxsw_sp->core, mlxsw_skb_cb(skb), + &hwtstamps); + *skb_hwtstamps(skb) = hwtstamps; + mlxsw_sp_rx_listener_no_mark_func(skb, local_port, mlxsw_sp); +} + +void mlxsw_sp2_ptp_transmitted(struct mlxsw_sp *mlxsw_sp, + struct sk_buff *skb, u16 local_port) +{ + struct skb_shared_hwtstamps hwtstamps; + + mlxsw_sp2_ptp_hwtstamp_fill(mlxsw_sp->core, mlxsw_skb_cb(skb), + &hwtstamps); + skb_tstamp_tx(skb, &hwtstamps); + dev_kfree_skb_any(skb); +} + +int mlxsw_sp2_ptp_hwtstamp_get(struct mlxsw_sp_port *mlxsw_sp_port, + struct hwtstamp_config *config) +{ + struct mlxsw_sp2_ptp_state *ptp_state; + + ptp_state = mlxsw_sp2_ptp_state(mlxsw_sp_port->mlxsw_sp); + + mutex_lock(&ptp_state->lock); + *config = ptp_state->config; + mutex_unlock(&ptp_state->lock); + + return 0; +} + +static int +mlxsw_sp2_ptp_get_message_types(const struct hwtstamp_config *config, + u16 *p_ing_types, u16 *p_egr_types, + enum hwtstamp_rx_filters *p_rx_filter) +{ + enum hwtstamp_rx_filters rx_filter = config->rx_filter; + enum hwtstamp_tx_types tx_type = config->tx_type; + u16 ing_types = 0x00; + u16 egr_types = 0x00; + + *p_rx_filter = rx_filter; + + switch (rx_filter) { + case HWTSTAMP_FILTER_NONE: + ing_types = 0x00; + break; + case HWTSTAMP_FILTER_PTP_V1_L4_SYNC: + case HWTSTAMP_FILTER_PTP_V2_L4_SYNC: + case HWTSTAMP_FILTER_PTP_V2_L2_SYNC: + case HWTSTAMP_FILTER_PTP_V2_SYNC: + case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ: + case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ: + case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ: + case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ: + case HWTSTAMP_FILTER_PTP_V1_L4_EVENT: + case HWTSTAMP_FILTER_PTP_V2_L4_EVENT: + case HWTSTAMP_FILTER_PTP_V2_L2_EVENT: + case HWTSTAMP_FILTER_PTP_V2_EVENT: + /* In Spectrum-2 and above, all packets get time stamp by + * default and the driver fill the time stamp only for event + * packets. Return all event types even if only specific types + * were required. + */ + ing_types = 0x0f; + *p_rx_filter = HWTSTAMP_FILTER_SOME; + break; + case HWTSTAMP_FILTER_ALL: + case HWTSTAMP_FILTER_SOME: + case HWTSTAMP_FILTER_NTP_ALL: + return -ERANGE; + default: + return -EINVAL; + } + + switch (tx_type) { + case HWTSTAMP_TX_OFF: + egr_types = 0x00; + break; + case HWTSTAMP_TX_ON: + egr_types = 0x0f; + break; + case HWTSTAMP_TX_ONESTEP_SYNC: + case HWTSTAMP_TX_ONESTEP_P2P: + return -ERANGE; + default: + return -EINVAL; + } + + if ((ing_types && !egr_types) || (!ing_types && egr_types)) + return -EINVAL; + + *p_ing_types = ing_types; + *p_egr_types = egr_types; + return 0; +} + +static int mlxsw_sp2_ptp_mtpcpc_set(struct mlxsw_sp *mlxsw_sp, bool ptp_trap_en, + u16 ing_types, u16 egr_types) +{ + char mtpcpc_pl[MLXSW_REG_MTPCPC_LEN]; + + mlxsw_reg_mtpcpc_pack(mtpcpc_pl, false, 0, ptp_trap_en, ing_types, + egr_types); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mtpcpc), mtpcpc_pl); +} + +static int mlxsw_sp2_ptp_enable(struct mlxsw_sp *mlxsw_sp, u16 ing_types, + u16 egr_types, + struct hwtstamp_config new_config) +{ + struct mlxsw_sp2_ptp_state *ptp_state = mlxsw_sp2_ptp_state(mlxsw_sp); + int err; + + err = mlxsw_sp2_ptp_mtpcpc_set(mlxsw_sp, true, ing_types, egr_types); + if (err) + return err; + + ptp_state->config = new_config; + return 0; +} + +static int mlxsw_sp2_ptp_disable(struct mlxsw_sp *mlxsw_sp, + struct hwtstamp_config new_config) +{ + struct mlxsw_sp2_ptp_state *ptp_state = mlxsw_sp2_ptp_state(mlxsw_sp); + int err; + + err = mlxsw_sp2_ptp_mtpcpc_set(mlxsw_sp, false, 0, 0); + if (err) + return err; + + ptp_state->config = new_config; + return 0; +} + +static int mlxsw_sp2_ptp_configure_port(struct mlxsw_sp_port *mlxsw_sp_port, + u16 ing_types, u16 egr_types, + struct hwtstamp_config new_config) +{ + struct mlxsw_sp2_ptp_state *ptp_state; + int err; + + ptp_state = mlxsw_sp2_ptp_state(mlxsw_sp_port->mlxsw_sp); + + if (refcount_inc_not_zero(&ptp_state->ptp_port_enabled_ref)) + return 0; + + err = mlxsw_sp2_ptp_enable(mlxsw_sp_port->mlxsw_sp, ing_types, + egr_types, new_config); + if (err) + return err; + + refcount_set(&ptp_state->ptp_port_enabled_ref, 1); + + return 0; +} + +static int mlxsw_sp2_ptp_deconfigure_port(struct mlxsw_sp_port *mlxsw_sp_port, + struct hwtstamp_config new_config) +{ + struct mlxsw_sp2_ptp_state *ptp_state; + int err; + + ptp_state = mlxsw_sp2_ptp_state(mlxsw_sp_port->mlxsw_sp); + + if (!refcount_dec_and_test(&ptp_state->ptp_port_enabled_ref)) + return 0; + + err = mlxsw_sp2_ptp_disable(mlxsw_sp_port->mlxsw_sp, new_config); + if (err) + goto err_ptp_disable; + + return 0; + +err_ptp_disable: + refcount_set(&ptp_state->ptp_port_enabled_ref, 1); + return err; +} + +int mlxsw_sp2_ptp_hwtstamp_set(struct mlxsw_sp_port *mlxsw_sp_port, + struct hwtstamp_config *config) +{ + struct mlxsw_sp2_ptp_state *ptp_state; + enum hwtstamp_rx_filters rx_filter; + struct hwtstamp_config new_config; + u16 new_ing_types, new_egr_types; + bool ptp_enabled; + int err; + + ptp_state = mlxsw_sp2_ptp_state(mlxsw_sp_port->mlxsw_sp); + mutex_lock(&ptp_state->lock); + + err = mlxsw_sp2_ptp_get_message_types(config, &new_ing_types, + &new_egr_types, &rx_filter); + if (err) + goto err_get_message_types; + + new_config.flags = config->flags; + new_config.tx_type = config->tx_type; + new_config.rx_filter = rx_filter; + + ptp_enabled = mlxsw_sp_port->ptp.ing_types || + mlxsw_sp_port->ptp.egr_types; + + if ((new_ing_types || new_egr_types) && !ptp_enabled) { + err = mlxsw_sp2_ptp_configure_port(mlxsw_sp_port, new_ing_types, + new_egr_types, new_config); + if (err) + goto err_configure_port; + } else if (!new_ing_types && !new_egr_types && ptp_enabled) { + err = mlxsw_sp2_ptp_deconfigure_port(mlxsw_sp_port, new_config); + if (err) + goto err_deconfigure_port; + } + + mlxsw_sp_port->ptp.ing_types = new_ing_types; + mlxsw_sp_port->ptp.egr_types = new_egr_types; + + /* Notify the ioctl caller what we are actually timestamping. */ + config->rx_filter = rx_filter; + mutex_unlock(&ptp_state->lock); + + return 0; + +err_deconfigure_port: +err_configure_port: +err_get_message_types: + mutex_unlock(&ptp_state->lock); + return err; +} + +int mlxsw_sp2_ptp_get_ts_info(struct mlxsw_sp *mlxsw_sp, + struct ethtool_ts_info *info) +{ + info->phc_index = ptp_clock_index(mlxsw_sp->clock->ptp); + + info->so_timestamping = SOF_TIMESTAMPING_TX_HARDWARE | + SOF_TIMESTAMPING_RX_HARDWARE | + SOF_TIMESTAMPING_RAW_HARDWARE; + + info->tx_types = BIT(HWTSTAMP_TX_OFF) | + BIT(HWTSTAMP_TX_ON); + + info->rx_filters = BIT(HWTSTAMP_FILTER_NONE) | + BIT(HWTSTAMP_FILTER_PTP_V1_L4_EVENT) | + BIT(HWTSTAMP_FILTER_PTP_V2_EVENT); + + return 0; +} + +int mlxsw_sp_ptp_txhdr_construct(struct mlxsw_core *mlxsw_core, + struct mlxsw_sp_port *mlxsw_sp_port, + struct sk_buff *skb, + const struct mlxsw_tx_info *tx_info) +{ + mlxsw_sp_txhdr_construct(skb, tx_info); + return 0; +} + +int mlxsw_sp2_ptp_txhdr_construct(struct mlxsw_core *mlxsw_core, + struct mlxsw_sp_port *mlxsw_sp_port, + struct sk_buff *skb, + const struct mlxsw_tx_info *tx_info) +{ + /* In Spectrum-2 and Spectrum-3, in order for PTP event packets to have + * their correction field correctly set on the egress port they must be + * transmitted as data packets. Such packets ingress the ASIC via the + * CPU port and must have a VLAN tag, as the CPU port is not configured + * with a PVID. Push the default VLAN (4095), which is configured as + * egress untagged on all the ports. + */ + if (!skb_vlan_tagged(skb)) { + skb = vlan_insert_tag_set_proto(skb, htons(ETH_P_8021Q), + MLXSW_SP_DEFAULT_VID); + if (!skb) { + this_cpu_inc(mlxsw_sp_port->pcpu_stats->tx_dropped); + return -ENOMEM; + } + } + + return mlxsw_sp_txhdr_ptp_data_construct(mlxsw_core, mlxsw_sp_port, skb, + tx_info); +} diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.h new file mode 100644 index 000000000..a8b882309 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.h @@ -0,0 +1,263 @@ +/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 */ +/* Copyright (c) 2019 Mellanox Technologies. All rights reserved */ + +#ifndef _MLXSW_SPECTRUM_PTP_H +#define _MLXSW_SPECTRUM_PTP_H + +#include <linux/device.h> +#include <linux/rhashtable.h> + +struct mlxsw_sp; +struct mlxsw_sp_port; +struct mlxsw_sp_ptp_clock; + +static inline int mlxsw_sp_ptp_get_ts_info_noptp(struct ethtool_ts_info *info) +{ + info->so_timestamping = SOF_TIMESTAMPING_RX_SOFTWARE | + SOF_TIMESTAMPING_SOFTWARE; + info->phc_index = -1; + return 0; +} + +#if IS_REACHABLE(CONFIG_PTP_1588_CLOCK) + +struct mlxsw_sp_ptp_clock * +mlxsw_sp1_ptp_clock_init(struct mlxsw_sp *mlxsw_sp, struct device *dev); + +void mlxsw_sp1_ptp_clock_fini(struct mlxsw_sp_ptp_clock *clock); + +struct mlxsw_sp_ptp_state *mlxsw_sp1_ptp_init(struct mlxsw_sp *mlxsw_sp); + +void mlxsw_sp1_ptp_fini(struct mlxsw_sp_ptp_state *ptp_state); + +void mlxsw_sp1_ptp_receive(struct mlxsw_sp *mlxsw_sp, struct sk_buff *skb, + u16 local_port); + +void mlxsw_sp1_ptp_transmitted(struct mlxsw_sp *mlxsw_sp, + struct sk_buff *skb, u16 local_port); + +void mlxsw_sp1_ptp_got_timestamp(struct mlxsw_sp *mlxsw_sp, bool ingress, + u16 local_port, u8 message_type, + u8 domain_number, u16 sequence_id, + u64 timestamp); + +int mlxsw_sp1_ptp_hwtstamp_get(struct mlxsw_sp_port *mlxsw_sp_port, + struct hwtstamp_config *config); + +int mlxsw_sp1_ptp_hwtstamp_set(struct mlxsw_sp_port *mlxsw_sp_port, + struct hwtstamp_config *config); + +void mlxsw_sp1_ptp_shaper_work(struct work_struct *work); + +int mlxsw_sp1_ptp_get_ts_info(struct mlxsw_sp *mlxsw_sp, + struct ethtool_ts_info *info); + +int mlxsw_sp1_get_stats_count(void); +void mlxsw_sp1_get_stats_strings(u8 **p); +void mlxsw_sp1_get_stats(struct mlxsw_sp_port *mlxsw_sp_port, + u64 *data, int data_index); + +int mlxsw_sp_ptp_txhdr_construct(struct mlxsw_core *mlxsw_core, + struct mlxsw_sp_port *mlxsw_sp_port, + struct sk_buff *skb, + const struct mlxsw_tx_info *tx_info); + +struct mlxsw_sp_ptp_clock * +mlxsw_sp2_ptp_clock_init(struct mlxsw_sp *mlxsw_sp, struct device *dev); + +void mlxsw_sp2_ptp_clock_fini(struct mlxsw_sp_ptp_clock *clock); + +struct mlxsw_sp_ptp_state *mlxsw_sp2_ptp_init(struct mlxsw_sp *mlxsw_sp); + +void mlxsw_sp2_ptp_fini(struct mlxsw_sp_ptp_state *ptp_state); + +void mlxsw_sp2_ptp_receive(struct mlxsw_sp *mlxsw_sp, struct sk_buff *skb, + u16 local_port); + +void mlxsw_sp2_ptp_transmitted(struct mlxsw_sp *mlxsw_sp, + struct sk_buff *skb, u16 local_port); + +int mlxsw_sp2_ptp_hwtstamp_get(struct mlxsw_sp_port *mlxsw_sp_port, + struct hwtstamp_config *config); + +int mlxsw_sp2_ptp_hwtstamp_set(struct mlxsw_sp_port *mlxsw_sp_port, + struct hwtstamp_config *config); + +int mlxsw_sp2_ptp_get_ts_info(struct mlxsw_sp *mlxsw_sp, + struct ethtool_ts_info *info); + +int mlxsw_sp2_ptp_txhdr_construct(struct mlxsw_core *mlxsw_core, + struct mlxsw_sp_port *mlxsw_sp_port, + struct sk_buff *skb, + const struct mlxsw_tx_info *tx_info); + +#else + +static inline struct mlxsw_sp_ptp_clock * +mlxsw_sp1_ptp_clock_init(struct mlxsw_sp *mlxsw_sp, struct device *dev) +{ + return NULL; +} + +static inline void mlxsw_sp1_ptp_clock_fini(struct mlxsw_sp_ptp_clock *clock) +{ +} + +static inline struct mlxsw_sp_ptp_state * +mlxsw_sp1_ptp_init(struct mlxsw_sp *mlxsw_sp) +{ + return NULL; +} + +static inline void mlxsw_sp1_ptp_fini(struct mlxsw_sp_ptp_state *ptp_state) +{ +} + +static inline void mlxsw_sp1_ptp_receive(struct mlxsw_sp *mlxsw_sp, + struct sk_buff *skb, u16 local_port) +{ + mlxsw_sp_rx_listener_no_mark_func(skb, local_port, mlxsw_sp); +} + +static inline void mlxsw_sp1_ptp_transmitted(struct mlxsw_sp *mlxsw_sp, + struct sk_buff *skb, u16 local_port) +{ + dev_kfree_skb_any(skb); +} + +static inline void +mlxsw_sp1_ptp_got_timestamp(struct mlxsw_sp *mlxsw_sp, bool ingress, + u16 local_port, u8 message_type, + u8 domain_number, + u16 sequence_id, u64 timestamp) +{ +} + +static inline int +mlxsw_sp1_ptp_hwtstamp_get(struct mlxsw_sp_port *mlxsw_sp_port, + struct hwtstamp_config *config) +{ + return -EOPNOTSUPP; +} + +static inline int +mlxsw_sp1_ptp_hwtstamp_set(struct mlxsw_sp_port *mlxsw_sp_port, + struct hwtstamp_config *config) +{ + return -EOPNOTSUPP; +} + +static inline void mlxsw_sp1_ptp_shaper_work(struct work_struct *work) +{ +} + +static inline int mlxsw_sp1_ptp_get_ts_info(struct mlxsw_sp *mlxsw_sp, + struct ethtool_ts_info *info) +{ + return mlxsw_sp_ptp_get_ts_info_noptp(info); +} + +static inline int mlxsw_sp1_get_stats_count(void) +{ + return 0; +} + +static inline void mlxsw_sp1_get_stats_strings(u8 **p) +{ +} + +static inline void mlxsw_sp1_get_stats(struct mlxsw_sp_port *mlxsw_sp_port, + u64 *data, int data_index) +{ +} + +static inline int +mlxsw_sp_ptp_txhdr_construct(struct mlxsw_core *mlxsw_core, + struct mlxsw_sp_port *mlxsw_sp_port, + struct sk_buff *skb, + const struct mlxsw_tx_info *tx_info) +{ + return -EOPNOTSUPP; +} + +static inline struct mlxsw_sp_ptp_clock * +mlxsw_sp2_ptp_clock_init(struct mlxsw_sp *mlxsw_sp, struct device *dev) +{ + return NULL; +} + +static inline void mlxsw_sp2_ptp_clock_fini(struct mlxsw_sp_ptp_clock *clock) +{ +} + +static inline struct mlxsw_sp_ptp_state * +mlxsw_sp2_ptp_init(struct mlxsw_sp *mlxsw_sp) +{ + return NULL; +} + +static inline void mlxsw_sp2_ptp_fini(struct mlxsw_sp_ptp_state *ptp_state) +{ +} + +static inline void mlxsw_sp2_ptp_receive(struct mlxsw_sp *mlxsw_sp, + struct sk_buff *skb, u16 local_port) +{ + mlxsw_sp_rx_listener_no_mark_func(skb, local_port, mlxsw_sp); +} + +static inline void mlxsw_sp2_ptp_transmitted(struct mlxsw_sp *mlxsw_sp, + struct sk_buff *skb, u16 local_port) +{ + dev_kfree_skb_any(skb); +} + +static inline int +mlxsw_sp2_ptp_hwtstamp_get(struct mlxsw_sp_port *mlxsw_sp_port, + struct hwtstamp_config *config) +{ + return -EOPNOTSUPP; +} + +static inline int +mlxsw_sp2_ptp_hwtstamp_set(struct mlxsw_sp_port *mlxsw_sp_port, + struct hwtstamp_config *config) +{ + return -EOPNOTSUPP; +} + +static inline int mlxsw_sp2_ptp_get_ts_info(struct mlxsw_sp *mlxsw_sp, + struct ethtool_ts_info *info) +{ + return mlxsw_sp_ptp_get_ts_info_noptp(info); +} + +static inline int +mlxsw_sp2_ptp_txhdr_construct(struct mlxsw_core *mlxsw_core, + struct mlxsw_sp_port *mlxsw_sp_port, + struct sk_buff *skb, + const struct mlxsw_tx_info *tx_info) +{ + return -EOPNOTSUPP; +} +#endif + +static inline void mlxsw_sp2_ptp_shaper_work(struct work_struct *work) +{ +} + +static inline int mlxsw_sp2_get_stats_count(void) +{ + return 0; +} + +static inline void mlxsw_sp2_get_stats_strings(u8 **p) +{ +} + +static inline void mlxsw_sp2_get_stats(struct mlxsw_sp_port *mlxsw_sp_port, + u64 *data, int data_index) +{ +} + +#endif diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c new file mode 100644 index 000000000..4243d3b88 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c @@ -0,0 +1,2337 @@ +// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 +/* Copyright (c) 2017-2018 Mellanox Technologies. All rights reserved */ + +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/netdevice.h> +#include <net/pkt_cls.h> +#include <net/red.h> + +#include "spectrum.h" +#include "spectrum_span.h" +#include "reg.h" + +#define MLXSW_SP_PRIO_BAND_TO_TCLASS(band) (IEEE_8021QAZ_MAX_TCS - band - 1) +#define MLXSW_SP_PRIO_CHILD_TO_TCLASS(child) \ + MLXSW_SP_PRIO_BAND_TO_TCLASS((child - 1)) + +enum mlxsw_sp_qdisc_type { + MLXSW_SP_QDISC_NO_QDISC, + MLXSW_SP_QDISC_RED, + MLXSW_SP_QDISC_PRIO, + MLXSW_SP_QDISC_ETS, + MLXSW_SP_QDISC_TBF, + MLXSW_SP_QDISC_FIFO, +}; + +struct mlxsw_sp_qdisc; + +struct mlxsw_sp_qdisc_ops { + enum mlxsw_sp_qdisc_type type; + int (*check_params)(struct mlxsw_sp_port *mlxsw_sp_port, + void *params); + int (*replace)(struct mlxsw_sp_port *mlxsw_sp_port, u32 handle, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, void *params); + int (*destroy)(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc); + int (*get_stats)(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, + struct tc_qopt_offload_stats *stats_ptr); + int (*get_xstats)(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, + void *xstats_ptr); + void (*clean_stats)(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc); + /* unoffload - to be used for a qdisc that stops being offloaded without + * being destroyed. + */ + void (*unoffload)(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, void *params); + struct mlxsw_sp_qdisc *(*find_class)(struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, + u32 parent); + unsigned int num_classes; + + u8 (*get_prio_bitmap)(struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, + struct mlxsw_sp_qdisc *child); + int (*get_tclass_num)(struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, + struct mlxsw_sp_qdisc *child); +}; + +struct mlxsw_sp_qdisc_ets_band { + u8 prio_bitmap; + int tclass_num; +}; + +struct mlxsw_sp_qdisc_ets_data { + struct mlxsw_sp_qdisc_ets_band bands[IEEE_8021QAZ_MAX_TCS]; +}; + +struct mlxsw_sp_qdisc { + u32 handle; + union { + struct red_stats red; + } xstats_base; + struct mlxsw_sp_qdisc_stats { + u64 tx_bytes; + u64 tx_packets; + u64 drops; + u64 overlimits; + u64 backlog; + } stats_base; + + union { + struct mlxsw_sp_qdisc_ets_data *ets_data; + }; + + struct mlxsw_sp_qdisc_ops *ops; + struct mlxsw_sp_qdisc *parent; + struct mlxsw_sp_qdisc *qdiscs; + unsigned int num_classes; +}; + +struct mlxsw_sp_qdisc_state { + struct mlxsw_sp_qdisc root_qdisc; + + /* When a PRIO or ETS are added, the invisible FIFOs in their bands are + * created first. When notifications for these FIFOs arrive, it is not + * known what qdisc their parent handle refers to. It could be a + * newly-created PRIO that will replace the currently-offloaded one, or + * it could be e.g. a RED that will be attached below it. + * + * As the notifications start to arrive, use them to note what the + * future parent handle is, and keep track of which child FIFOs were + * seen. Then when the parent is known, retroactively offload those + * FIFOs. + */ + u32 future_handle; + bool future_fifos[IEEE_8021QAZ_MAX_TCS]; + struct mutex lock; /* Protects qdisc state. */ +}; + +static bool +mlxsw_sp_qdisc_compare(struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, u32 handle) +{ + return mlxsw_sp_qdisc->ops && mlxsw_sp_qdisc->handle == handle; +} + +static struct mlxsw_sp_qdisc * +mlxsw_sp_qdisc_walk(struct mlxsw_sp_qdisc *qdisc, + struct mlxsw_sp_qdisc *(*pre)(struct mlxsw_sp_qdisc *, + void *), + void *data) +{ + struct mlxsw_sp_qdisc *tmp; + unsigned int i; + + if (pre) { + tmp = pre(qdisc, data); + if (tmp) + return tmp; + } + + if (qdisc->ops) { + for (i = 0; i < qdisc->num_classes; i++) { + tmp = &qdisc->qdiscs[i]; + if (qdisc->ops) { + tmp = mlxsw_sp_qdisc_walk(tmp, pre, data); + if (tmp) + return tmp; + } + } + } + + return NULL; +} + +static struct mlxsw_sp_qdisc * +mlxsw_sp_qdisc_walk_cb_find(struct mlxsw_sp_qdisc *qdisc, void *data) +{ + u32 parent = *(u32 *)data; + + if (qdisc->ops && TC_H_MAJ(qdisc->handle) == TC_H_MAJ(parent)) { + if (qdisc->ops->find_class) + return qdisc->ops->find_class(qdisc, parent); + } + + return NULL; +} + +static struct mlxsw_sp_qdisc * +mlxsw_sp_qdisc_find(struct mlxsw_sp_port *mlxsw_sp_port, u32 parent) +{ + struct mlxsw_sp_qdisc_state *qdisc_state = mlxsw_sp_port->qdisc; + + if (!qdisc_state) + return NULL; + if (parent == TC_H_ROOT) + return &qdisc_state->root_qdisc; + return mlxsw_sp_qdisc_walk(&qdisc_state->root_qdisc, + mlxsw_sp_qdisc_walk_cb_find, &parent); +} + +static struct mlxsw_sp_qdisc * +mlxsw_sp_qdisc_walk_cb_find_by_handle(struct mlxsw_sp_qdisc *qdisc, void *data) +{ + u32 handle = *(u32 *)data; + + if (qdisc->ops && qdisc->handle == handle) + return qdisc; + return NULL; +} + +static struct mlxsw_sp_qdisc * +mlxsw_sp_qdisc_find_by_handle(struct mlxsw_sp_port *mlxsw_sp_port, u32 handle) +{ + struct mlxsw_sp_qdisc_state *qdisc_state = mlxsw_sp_port->qdisc; + + if (!qdisc_state) + return NULL; + return mlxsw_sp_qdisc_walk(&qdisc_state->root_qdisc, + mlxsw_sp_qdisc_walk_cb_find_by_handle, + &handle); +} + +static void +mlxsw_sp_qdisc_reduce_parent_backlog(struct mlxsw_sp_qdisc *mlxsw_sp_qdisc) +{ + struct mlxsw_sp_qdisc *tmp; + + for (tmp = mlxsw_sp_qdisc->parent; tmp; tmp = tmp->parent) + tmp->stats_base.backlog -= mlxsw_sp_qdisc->stats_base.backlog; +} + +static u8 mlxsw_sp_qdisc_get_prio_bitmap(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc) +{ + struct mlxsw_sp_qdisc *parent = mlxsw_sp_qdisc->parent; + + if (!parent) + return 0xff; + if (!parent->ops->get_prio_bitmap) + return mlxsw_sp_qdisc_get_prio_bitmap(mlxsw_sp_port, parent); + return parent->ops->get_prio_bitmap(parent, mlxsw_sp_qdisc); +} + +#define MLXSW_SP_PORT_DEFAULT_TCLASS 0 + +static int mlxsw_sp_qdisc_get_tclass_num(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc) +{ + struct mlxsw_sp_qdisc *parent = mlxsw_sp_qdisc->parent; + + if (!parent) + return MLXSW_SP_PORT_DEFAULT_TCLASS; + if (!parent->ops->get_tclass_num) + return mlxsw_sp_qdisc_get_tclass_num(mlxsw_sp_port, parent); + return parent->ops->get_tclass_num(parent, mlxsw_sp_qdisc); +} + +static int +mlxsw_sp_qdisc_destroy(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc) +{ + struct mlxsw_sp_qdisc *root_qdisc = &mlxsw_sp_port->qdisc->root_qdisc; + int err_hdroom = 0; + int err = 0; + int i; + + if (!mlxsw_sp_qdisc) + return 0; + + if (root_qdisc == mlxsw_sp_qdisc) { + struct mlxsw_sp_hdroom hdroom = *mlxsw_sp_port->hdroom; + + hdroom.mode = MLXSW_SP_HDROOM_MODE_DCB; + mlxsw_sp_hdroom_prios_reset_buf_idx(&hdroom); + mlxsw_sp_hdroom_bufs_reset_lossiness(&hdroom); + mlxsw_sp_hdroom_bufs_reset_sizes(mlxsw_sp_port, &hdroom); + err_hdroom = mlxsw_sp_hdroom_configure(mlxsw_sp_port, &hdroom); + } + + if (!mlxsw_sp_qdisc->ops) + return 0; + + for (i = 0; i < mlxsw_sp_qdisc->num_classes; i++) + mlxsw_sp_qdisc_destroy(mlxsw_sp_port, + &mlxsw_sp_qdisc->qdiscs[i]); + mlxsw_sp_qdisc_reduce_parent_backlog(mlxsw_sp_qdisc); + if (mlxsw_sp_qdisc->ops->destroy) + err = mlxsw_sp_qdisc->ops->destroy(mlxsw_sp_port, + mlxsw_sp_qdisc); + if (mlxsw_sp_qdisc->ops->clean_stats) + mlxsw_sp_qdisc->ops->clean_stats(mlxsw_sp_port, mlxsw_sp_qdisc); + + mlxsw_sp_qdisc->handle = TC_H_UNSPEC; + mlxsw_sp_qdisc->ops = NULL; + mlxsw_sp_qdisc->num_classes = 0; + kfree(mlxsw_sp_qdisc->qdiscs); + mlxsw_sp_qdisc->qdiscs = NULL; + return err_hdroom ?: err; +} + +struct mlxsw_sp_qdisc_tree_validate { + bool forbid_ets; + bool forbid_root_tbf; + bool forbid_tbf; + bool forbid_red; +}; + +static int +__mlxsw_sp_qdisc_tree_validate(struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, + struct mlxsw_sp_qdisc_tree_validate validate); + +static int +mlxsw_sp_qdisc_tree_validate_children(struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, + struct mlxsw_sp_qdisc_tree_validate validate) +{ + unsigned int i; + int err; + + for (i = 0; i < mlxsw_sp_qdisc->num_classes; i++) { + err = __mlxsw_sp_qdisc_tree_validate(&mlxsw_sp_qdisc->qdiscs[i], + validate); + if (err) + return err; + } + + return 0; +} + +static int +__mlxsw_sp_qdisc_tree_validate(struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, + struct mlxsw_sp_qdisc_tree_validate validate) +{ + if (!mlxsw_sp_qdisc->ops) + return 0; + + switch (mlxsw_sp_qdisc->ops->type) { + case MLXSW_SP_QDISC_FIFO: + break; + case MLXSW_SP_QDISC_RED: + if (validate.forbid_red) + return -EINVAL; + validate.forbid_red = true; + validate.forbid_root_tbf = true; + validate.forbid_ets = true; + break; + case MLXSW_SP_QDISC_TBF: + if (validate.forbid_root_tbf) { + if (validate.forbid_tbf) + return -EINVAL; + /* This is a TC TBF. */ + validate.forbid_tbf = true; + validate.forbid_ets = true; + } else { + /* This is root TBF. */ + validate.forbid_root_tbf = true; + } + break; + case MLXSW_SP_QDISC_PRIO: + case MLXSW_SP_QDISC_ETS: + if (validate.forbid_ets) + return -EINVAL; + validate.forbid_root_tbf = true; + validate.forbid_ets = true; + break; + default: + WARN_ON(1); + return -EINVAL; + } + + return mlxsw_sp_qdisc_tree_validate_children(mlxsw_sp_qdisc, validate); +} + +static int mlxsw_sp_qdisc_tree_validate(struct mlxsw_sp_port *mlxsw_sp_port) +{ + struct mlxsw_sp_qdisc_tree_validate validate = {}; + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc; + + mlxsw_sp_qdisc = &mlxsw_sp_port->qdisc->root_qdisc; + return __mlxsw_sp_qdisc_tree_validate(mlxsw_sp_qdisc, validate); +} + +static int mlxsw_sp_qdisc_create(struct mlxsw_sp_port *mlxsw_sp_port, + u32 handle, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, + struct mlxsw_sp_qdisc_ops *ops, void *params) +{ + struct mlxsw_sp_qdisc *root_qdisc = &mlxsw_sp_port->qdisc->root_qdisc; + struct mlxsw_sp_hdroom orig_hdroom; + unsigned int i; + int err; + + err = ops->check_params(mlxsw_sp_port, params); + if (err) + return err; + + if (ops->num_classes) { + mlxsw_sp_qdisc->qdiscs = kcalloc(ops->num_classes, + sizeof(*mlxsw_sp_qdisc->qdiscs), + GFP_KERNEL); + if (!mlxsw_sp_qdisc->qdiscs) + return -ENOMEM; + + for (i = 0; i < ops->num_classes; i++) + mlxsw_sp_qdisc->qdiscs[i].parent = mlxsw_sp_qdisc; + } + + orig_hdroom = *mlxsw_sp_port->hdroom; + if (root_qdisc == mlxsw_sp_qdisc) { + struct mlxsw_sp_hdroom hdroom = orig_hdroom; + + hdroom.mode = MLXSW_SP_HDROOM_MODE_TC; + mlxsw_sp_hdroom_prios_reset_buf_idx(&hdroom); + mlxsw_sp_hdroom_bufs_reset_lossiness(&hdroom); + mlxsw_sp_hdroom_bufs_reset_sizes(mlxsw_sp_port, &hdroom); + + err = mlxsw_sp_hdroom_configure(mlxsw_sp_port, &hdroom); + if (err) + goto err_hdroom_configure; + } + + mlxsw_sp_qdisc->num_classes = ops->num_classes; + mlxsw_sp_qdisc->ops = ops; + mlxsw_sp_qdisc->handle = handle; + err = mlxsw_sp_qdisc_tree_validate(mlxsw_sp_port); + if (err) + goto err_replace; + + err = ops->replace(mlxsw_sp_port, handle, mlxsw_sp_qdisc, params); + if (err) + goto err_replace; + + return 0; + +err_replace: + mlxsw_sp_qdisc->handle = TC_H_UNSPEC; + mlxsw_sp_qdisc->ops = NULL; + mlxsw_sp_qdisc->num_classes = 0; + mlxsw_sp_hdroom_configure(mlxsw_sp_port, &orig_hdroom); +err_hdroom_configure: + kfree(mlxsw_sp_qdisc->qdiscs); + mlxsw_sp_qdisc->qdiscs = NULL; + return err; +} + +static int +mlxsw_sp_qdisc_change(struct mlxsw_sp_port *mlxsw_sp_port, u32 handle, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, void *params) +{ + struct mlxsw_sp_qdisc_ops *ops = mlxsw_sp_qdisc->ops; + int err; + + err = ops->check_params(mlxsw_sp_port, params); + if (err) + goto unoffload; + + err = ops->replace(mlxsw_sp_port, handle, mlxsw_sp_qdisc, params); + if (err) + goto unoffload; + + /* Check if the Qdisc changed. That includes a situation where an + * invisible Qdisc replaces another one, or is being added for the + * first time. + */ + if (mlxsw_sp_qdisc->handle != handle) { + if (ops->clean_stats) + ops->clean_stats(mlxsw_sp_port, mlxsw_sp_qdisc); + } + + mlxsw_sp_qdisc->handle = handle; + return 0; + +unoffload: + if (ops->unoffload) + ops->unoffload(mlxsw_sp_port, mlxsw_sp_qdisc, params); + + mlxsw_sp_qdisc_destroy(mlxsw_sp_port, mlxsw_sp_qdisc); + return err; +} + +static int +mlxsw_sp_qdisc_replace(struct mlxsw_sp_port *mlxsw_sp_port, u32 handle, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, + struct mlxsw_sp_qdisc_ops *ops, void *params) +{ + if (mlxsw_sp_qdisc->ops && mlxsw_sp_qdisc->ops->type != ops->type) + /* In case this location contained a different qdisc of the + * same type we can override the old qdisc configuration. + * Otherwise, we need to remove the old qdisc before setting the + * new one. + */ + mlxsw_sp_qdisc_destroy(mlxsw_sp_port, mlxsw_sp_qdisc); + + if (!mlxsw_sp_qdisc->ops) + return mlxsw_sp_qdisc_create(mlxsw_sp_port, handle, + mlxsw_sp_qdisc, ops, params); + else + return mlxsw_sp_qdisc_change(mlxsw_sp_port, handle, + mlxsw_sp_qdisc, params); +} + +static int +mlxsw_sp_qdisc_get_stats(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, + struct tc_qopt_offload_stats *stats_ptr) +{ + if (mlxsw_sp_qdisc && mlxsw_sp_qdisc->ops && + mlxsw_sp_qdisc->ops->get_stats) + return mlxsw_sp_qdisc->ops->get_stats(mlxsw_sp_port, + mlxsw_sp_qdisc, + stats_ptr); + + return -EOPNOTSUPP; +} + +static int +mlxsw_sp_qdisc_get_xstats(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, + void *xstats_ptr) +{ + if (mlxsw_sp_qdisc && mlxsw_sp_qdisc->ops && + mlxsw_sp_qdisc->ops->get_xstats) + return mlxsw_sp_qdisc->ops->get_xstats(mlxsw_sp_port, + mlxsw_sp_qdisc, + xstats_ptr); + + return -EOPNOTSUPP; +} + +static u64 +mlxsw_sp_xstats_backlog(struct mlxsw_sp_port_xstats *xstats, int tclass_num) +{ + return xstats->backlog[tclass_num] + + xstats->backlog[tclass_num + 8]; +} + +static u64 +mlxsw_sp_xstats_tail_drop(struct mlxsw_sp_port_xstats *xstats, int tclass_num) +{ + return xstats->tail_drop[tclass_num] + + xstats->tail_drop[tclass_num + 8]; +} + +static void +mlxsw_sp_qdisc_bstats_per_priority_get(struct mlxsw_sp_port_xstats *xstats, + u8 prio_bitmap, u64 *tx_packets, + u64 *tx_bytes) +{ + int i; + + *tx_packets = 0; + *tx_bytes = 0; + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { + if (prio_bitmap & BIT(i)) { + *tx_packets += xstats->tx_packets[i]; + *tx_bytes += xstats->tx_bytes[i]; + } + } +} + +static void +mlxsw_sp_qdisc_collect_tc_stats(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, + u64 *p_tx_bytes, u64 *p_tx_packets, + u64 *p_drops, u64 *p_backlog) +{ + struct mlxsw_sp_port_xstats *xstats; + u64 tx_bytes, tx_packets; + u8 prio_bitmap; + int tclass_num; + + prio_bitmap = mlxsw_sp_qdisc_get_prio_bitmap(mlxsw_sp_port, + mlxsw_sp_qdisc); + tclass_num = mlxsw_sp_qdisc_get_tclass_num(mlxsw_sp_port, + mlxsw_sp_qdisc); + xstats = &mlxsw_sp_port->periodic_hw_stats.xstats; + mlxsw_sp_qdisc_bstats_per_priority_get(xstats, prio_bitmap, + &tx_packets, &tx_bytes); + + *p_tx_packets += tx_packets; + *p_tx_bytes += tx_bytes; + *p_drops += xstats->wred_drop[tclass_num] + + mlxsw_sp_xstats_tail_drop(xstats, tclass_num); + *p_backlog += mlxsw_sp_xstats_backlog(xstats, tclass_num); +} + +static void +mlxsw_sp_qdisc_update_stats(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, + u64 tx_bytes, u64 tx_packets, + u64 drops, u64 backlog, + struct tc_qopt_offload_stats *stats_ptr) +{ + struct mlxsw_sp_qdisc_stats *stats_base = &mlxsw_sp_qdisc->stats_base; + + tx_bytes -= stats_base->tx_bytes; + tx_packets -= stats_base->tx_packets; + drops -= stats_base->drops; + backlog -= stats_base->backlog; + + _bstats_update(stats_ptr->bstats, tx_bytes, tx_packets); + stats_ptr->qstats->drops += drops; + stats_ptr->qstats->backlog += mlxsw_sp_cells_bytes(mlxsw_sp, backlog); + + stats_base->backlog += backlog; + stats_base->drops += drops; + stats_base->tx_bytes += tx_bytes; + stats_base->tx_packets += tx_packets; +} + +static void +mlxsw_sp_qdisc_get_tc_stats(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, + struct tc_qopt_offload_stats *stats_ptr) +{ + u64 tx_packets = 0; + u64 tx_bytes = 0; + u64 backlog = 0; + u64 drops = 0; + + mlxsw_sp_qdisc_collect_tc_stats(mlxsw_sp_port, mlxsw_sp_qdisc, + &tx_bytes, &tx_packets, + &drops, &backlog); + mlxsw_sp_qdisc_update_stats(mlxsw_sp_port->mlxsw_sp, mlxsw_sp_qdisc, + tx_bytes, tx_packets, drops, backlog, + stats_ptr); +} + +static int +mlxsw_sp_tclass_congestion_enable(struct mlxsw_sp_port *mlxsw_sp_port, + int tclass_num, u32 min, u32 max, + u32 probability, bool is_wred, bool is_ecn) +{ + char cwtpm_cmd[MLXSW_REG_CWTPM_LEN]; + char cwtp_cmd[MLXSW_REG_CWTP_LEN]; + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + int err; + + mlxsw_reg_cwtp_pack(cwtp_cmd, mlxsw_sp_port->local_port, tclass_num); + mlxsw_reg_cwtp_profile_pack(cwtp_cmd, MLXSW_REG_CWTP_DEFAULT_PROFILE, + roundup(min, MLXSW_REG_CWTP_MIN_VALUE), + roundup(max, MLXSW_REG_CWTP_MIN_VALUE), + probability); + + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(cwtp), cwtp_cmd); + if (err) + return err; + + mlxsw_reg_cwtpm_pack(cwtpm_cmd, mlxsw_sp_port->local_port, tclass_num, + MLXSW_REG_CWTP_DEFAULT_PROFILE, is_wred, is_ecn); + + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(cwtpm), cwtpm_cmd); +} + +static int +mlxsw_sp_tclass_congestion_disable(struct mlxsw_sp_port *mlxsw_sp_port, + int tclass_num) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + char cwtpm_cmd[MLXSW_REG_CWTPM_LEN]; + + mlxsw_reg_cwtpm_pack(cwtpm_cmd, mlxsw_sp_port->local_port, tclass_num, + MLXSW_REG_CWTPM_RESET_PROFILE, false, false); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(cwtpm), cwtpm_cmd); +} + +static void +mlxsw_sp_setup_tc_qdisc_red_clean_stats(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc) +{ + struct mlxsw_sp_qdisc_stats *stats_base; + struct mlxsw_sp_port_xstats *xstats; + struct red_stats *red_base; + u8 prio_bitmap; + int tclass_num; + + prio_bitmap = mlxsw_sp_qdisc_get_prio_bitmap(mlxsw_sp_port, + mlxsw_sp_qdisc); + tclass_num = mlxsw_sp_qdisc_get_tclass_num(mlxsw_sp_port, + mlxsw_sp_qdisc); + xstats = &mlxsw_sp_port->periodic_hw_stats.xstats; + stats_base = &mlxsw_sp_qdisc->stats_base; + red_base = &mlxsw_sp_qdisc->xstats_base.red; + + mlxsw_sp_qdisc_bstats_per_priority_get(xstats, prio_bitmap, + &stats_base->tx_packets, + &stats_base->tx_bytes); + red_base->prob_mark = xstats->tc_ecn[tclass_num]; + red_base->prob_drop = xstats->wred_drop[tclass_num]; + red_base->pdrop = mlxsw_sp_xstats_tail_drop(xstats, tclass_num); + + stats_base->overlimits = red_base->prob_drop + red_base->prob_mark; + stats_base->drops = red_base->prob_drop + red_base->pdrop; + + stats_base->backlog = 0; +} + +static int +mlxsw_sp_qdisc_red_destroy(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc) +{ + int tclass_num = mlxsw_sp_qdisc_get_tclass_num(mlxsw_sp_port, + mlxsw_sp_qdisc); + + return mlxsw_sp_tclass_congestion_disable(mlxsw_sp_port, tclass_num); +} + +static int +mlxsw_sp_qdisc_red_check_params(struct mlxsw_sp_port *mlxsw_sp_port, + void *params) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + struct tc_red_qopt_offload_params *p = params; + + if (p->min > p->max) { + dev_err(mlxsw_sp->bus_info->dev, + "spectrum: RED: min %u is bigger then max %u\n", p->min, + p->max); + return -EINVAL; + } + if (p->max > MLXSW_CORE_RES_GET(mlxsw_sp->core, + GUARANTEED_SHARED_BUFFER)) { + dev_err(mlxsw_sp->bus_info->dev, + "spectrum: RED: max value %u is too big\n", p->max); + return -EINVAL; + } + if (p->min == 0 || p->max == 0) { + dev_err(mlxsw_sp->bus_info->dev, + "spectrum: RED: 0 value is illegal for min and max\n"); + return -EINVAL; + } + return 0; +} + +static int +mlxsw_sp_qdisc_future_fifo_replace(struct mlxsw_sp_port *mlxsw_sp_port, + u32 handle, unsigned int band, + struct mlxsw_sp_qdisc *child_qdisc); +static void +mlxsw_sp_qdisc_future_fifos_init(struct mlxsw_sp_port *mlxsw_sp_port, + u32 handle); + +static int +mlxsw_sp_qdisc_red_replace(struct mlxsw_sp_port *mlxsw_sp_port, u32 handle, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, + void *params) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + struct tc_red_qopt_offload_params *p = params; + int tclass_num; + u32 min, max; + u64 prob; + int err; + + err = mlxsw_sp_qdisc_future_fifo_replace(mlxsw_sp_port, handle, 0, + &mlxsw_sp_qdisc->qdiscs[0]); + if (err) + return err; + mlxsw_sp_qdisc_future_fifos_init(mlxsw_sp_port, TC_H_UNSPEC); + + tclass_num = mlxsw_sp_qdisc_get_tclass_num(mlxsw_sp_port, + mlxsw_sp_qdisc); + + /* calculate probability in percentage */ + prob = p->probability; + prob *= 100; + prob = DIV_ROUND_UP(prob, 1 << 16); + prob = DIV_ROUND_UP(prob, 1 << 16); + min = mlxsw_sp_bytes_cells(mlxsw_sp, p->min); + max = mlxsw_sp_bytes_cells(mlxsw_sp, p->max); + return mlxsw_sp_tclass_congestion_enable(mlxsw_sp_port, tclass_num, + min, max, prob, + !p->is_nodrop, p->is_ecn); +} + +static void +mlxsw_sp_qdisc_leaf_unoffload(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, + struct gnet_stats_queue *qstats) +{ + u64 backlog; + + backlog = mlxsw_sp_cells_bytes(mlxsw_sp_port->mlxsw_sp, + mlxsw_sp_qdisc->stats_base.backlog); + qstats->backlog -= backlog; + mlxsw_sp_qdisc->stats_base.backlog = 0; +} + +static void +mlxsw_sp_qdisc_red_unoffload(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, + void *params) +{ + struct tc_red_qopt_offload_params *p = params; + + mlxsw_sp_qdisc_leaf_unoffload(mlxsw_sp_port, mlxsw_sp_qdisc, p->qstats); +} + +static int +mlxsw_sp_qdisc_get_red_xstats(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, + void *xstats_ptr) +{ + struct red_stats *xstats_base = &mlxsw_sp_qdisc->xstats_base.red; + struct mlxsw_sp_port_xstats *xstats; + struct red_stats *res = xstats_ptr; + int early_drops, marks, pdrops; + int tclass_num; + + tclass_num = mlxsw_sp_qdisc_get_tclass_num(mlxsw_sp_port, + mlxsw_sp_qdisc); + xstats = &mlxsw_sp_port->periodic_hw_stats.xstats; + + early_drops = xstats->wred_drop[tclass_num] - xstats_base->prob_drop; + marks = xstats->tc_ecn[tclass_num] - xstats_base->prob_mark; + pdrops = mlxsw_sp_xstats_tail_drop(xstats, tclass_num) - + xstats_base->pdrop; + + res->pdrop += pdrops; + res->prob_drop += early_drops; + res->prob_mark += marks; + + xstats_base->pdrop += pdrops; + xstats_base->prob_drop += early_drops; + xstats_base->prob_mark += marks; + return 0; +} + +static int +mlxsw_sp_qdisc_get_red_stats(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, + struct tc_qopt_offload_stats *stats_ptr) +{ + struct mlxsw_sp_qdisc_stats *stats_base; + struct mlxsw_sp_port_xstats *xstats; + u64 overlimits; + int tclass_num; + + tclass_num = mlxsw_sp_qdisc_get_tclass_num(mlxsw_sp_port, + mlxsw_sp_qdisc); + xstats = &mlxsw_sp_port->periodic_hw_stats.xstats; + stats_base = &mlxsw_sp_qdisc->stats_base; + + mlxsw_sp_qdisc_get_tc_stats(mlxsw_sp_port, mlxsw_sp_qdisc, stats_ptr); + overlimits = xstats->wred_drop[tclass_num] + + xstats->tc_ecn[tclass_num] - stats_base->overlimits; + + stats_ptr->qstats->overlimits += overlimits; + stats_base->overlimits += overlimits; + + return 0; +} + +static struct mlxsw_sp_qdisc * +mlxsw_sp_qdisc_leaf_find_class(struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, + u32 parent) +{ + /* RED and TBF are formally classful qdiscs, but all class references, + * including X:0, just refer to the same one class. + */ + return &mlxsw_sp_qdisc->qdiscs[0]; +} + +static struct mlxsw_sp_qdisc_ops mlxsw_sp_qdisc_ops_red = { + .type = MLXSW_SP_QDISC_RED, + .check_params = mlxsw_sp_qdisc_red_check_params, + .replace = mlxsw_sp_qdisc_red_replace, + .unoffload = mlxsw_sp_qdisc_red_unoffload, + .destroy = mlxsw_sp_qdisc_red_destroy, + .get_stats = mlxsw_sp_qdisc_get_red_stats, + .get_xstats = mlxsw_sp_qdisc_get_red_xstats, + .clean_stats = mlxsw_sp_setup_tc_qdisc_red_clean_stats, + .find_class = mlxsw_sp_qdisc_leaf_find_class, + .num_classes = 1, +}; + +static int mlxsw_sp_qdisc_graft(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, + u8 band, u32 child_handle); + +static int __mlxsw_sp_setup_tc_red(struct mlxsw_sp_port *mlxsw_sp_port, + struct tc_red_qopt_offload *p) +{ + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc; + + mlxsw_sp_qdisc = mlxsw_sp_qdisc_find(mlxsw_sp_port, p->parent); + if (!mlxsw_sp_qdisc) + return -EOPNOTSUPP; + + if (p->command == TC_RED_REPLACE) + return mlxsw_sp_qdisc_replace(mlxsw_sp_port, p->handle, + mlxsw_sp_qdisc, + &mlxsw_sp_qdisc_ops_red, + &p->set); + + if (!mlxsw_sp_qdisc_compare(mlxsw_sp_qdisc, p->handle)) + return -EOPNOTSUPP; + + switch (p->command) { + case TC_RED_DESTROY: + return mlxsw_sp_qdisc_destroy(mlxsw_sp_port, mlxsw_sp_qdisc); + case TC_RED_XSTATS: + return mlxsw_sp_qdisc_get_xstats(mlxsw_sp_port, mlxsw_sp_qdisc, + p->xstats); + case TC_RED_STATS: + return mlxsw_sp_qdisc_get_stats(mlxsw_sp_port, mlxsw_sp_qdisc, + &p->stats); + case TC_RED_GRAFT: + return mlxsw_sp_qdisc_graft(mlxsw_sp_port, mlxsw_sp_qdisc, 0, + p->child_handle); + default: + return -EOPNOTSUPP; + } +} + +int mlxsw_sp_setup_tc_red(struct mlxsw_sp_port *mlxsw_sp_port, + struct tc_red_qopt_offload *p) +{ + int err; + + mutex_lock(&mlxsw_sp_port->qdisc->lock); + err = __mlxsw_sp_setup_tc_red(mlxsw_sp_port, p); + mutex_unlock(&mlxsw_sp_port->qdisc->lock); + + return err; +} + +static void +mlxsw_sp_setup_tc_qdisc_leaf_clean_stats(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc) +{ + u64 backlog_cells = 0; + u64 tx_packets = 0; + u64 tx_bytes = 0; + u64 drops = 0; + + mlxsw_sp_qdisc_collect_tc_stats(mlxsw_sp_port, mlxsw_sp_qdisc, + &tx_bytes, &tx_packets, + &drops, &backlog_cells); + + mlxsw_sp_qdisc->stats_base.tx_packets = tx_packets; + mlxsw_sp_qdisc->stats_base.tx_bytes = tx_bytes; + mlxsw_sp_qdisc->stats_base.drops = drops; + mlxsw_sp_qdisc->stats_base.backlog = 0; +} + +static enum mlxsw_reg_qeec_hr +mlxsw_sp_qdisc_tbf_hr(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc) +{ + if (mlxsw_sp_qdisc == &mlxsw_sp_port->qdisc->root_qdisc) + return MLXSW_REG_QEEC_HR_PORT; + + /* Configure subgroup shaper, so that both UC and MC traffic is subject + * to shaping. That is unlike RED, however UC queue lengths are going to + * be different than MC ones due to different pool and quota + * configurations, so the configuration is not applicable. For shaper on + * the other hand, subjecting the overall stream to the configured + * shaper makes sense. Also note that that is what we do for + * ieee_setmaxrate(). + */ + return MLXSW_REG_QEEC_HR_SUBGROUP; +} + +static int +mlxsw_sp_qdisc_tbf_destroy(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc) +{ + enum mlxsw_reg_qeec_hr hr = mlxsw_sp_qdisc_tbf_hr(mlxsw_sp_port, + mlxsw_sp_qdisc); + int tclass_num = mlxsw_sp_qdisc_get_tclass_num(mlxsw_sp_port, + mlxsw_sp_qdisc); + + return mlxsw_sp_port_ets_maxrate_set(mlxsw_sp_port, hr, tclass_num, 0, + MLXSW_REG_QEEC_MAS_DIS, 0); +} + +static int +mlxsw_sp_qdisc_tbf_bs(struct mlxsw_sp_port *mlxsw_sp_port, + u32 max_size, u8 *p_burst_size) +{ + /* TBF burst size is configured in bytes. The ASIC burst size value is + * ((2 ^ bs) * 512 bits. Convert the TBF bytes to 512-bit units. + */ + u32 bs512 = max_size / 64; + u8 bs = fls(bs512); + + if (!bs) + return -EINVAL; + --bs; + + /* Demand a power of two. */ + if ((1 << bs) != bs512) + return -EINVAL; + + if (bs < mlxsw_sp_port->mlxsw_sp->lowest_shaper_bs || + bs > MLXSW_REG_QEEC_HIGHEST_SHAPER_BS) + return -EINVAL; + + *p_burst_size = bs; + return 0; +} + +static u32 +mlxsw_sp_qdisc_tbf_max_size(u8 bs) +{ + return (1U << bs) * 64; +} + +static u64 +mlxsw_sp_qdisc_tbf_rate_kbps(struct tc_tbf_qopt_offload_replace_params *p) +{ + /* TBF interface is in bytes/s, whereas Spectrum ASIC is configured in + * Kbits/s. + */ + return div_u64(p->rate.rate_bytes_ps, 1000) * 8; +} + +static int +mlxsw_sp_qdisc_tbf_check_params(struct mlxsw_sp_port *mlxsw_sp_port, + void *params) +{ + struct tc_tbf_qopt_offload_replace_params *p = params; + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + u64 rate_kbps = mlxsw_sp_qdisc_tbf_rate_kbps(p); + u8 burst_size; + int err; + + if (rate_kbps >= MLXSW_REG_QEEC_MAS_DIS) { + dev_err(mlxsw_sp_port->mlxsw_sp->bus_info->dev, + "spectrum: TBF: rate of %lluKbps must be below %u\n", + rate_kbps, MLXSW_REG_QEEC_MAS_DIS); + return -EINVAL; + } + + err = mlxsw_sp_qdisc_tbf_bs(mlxsw_sp_port, p->max_size, &burst_size); + if (err) { + u8 highest_shaper_bs = MLXSW_REG_QEEC_HIGHEST_SHAPER_BS; + + dev_err(mlxsw_sp->bus_info->dev, + "spectrum: TBF: invalid burst size of %u, must be a power of two between %u and %u", + p->max_size, + mlxsw_sp_qdisc_tbf_max_size(mlxsw_sp->lowest_shaper_bs), + mlxsw_sp_qdisc_tbf_max_size(highest_shaper_bs)); + return -EINVAL; + } + + return 0; +} + +static int +mlxsw_sp_qdisc_tbf_replace(struct mlxsw_sp_port *mlxsw_sp_port, u32 handle, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, + void *params) +{ + enum mlxsw_reg_qeec_hr hr = mlxsw_sp_qdisc_tbf_hr(mlxsw_sp_port, + mlxsw_sp_qdisc); + struct tc_tbf_qopt_offload_replace_params *p = params; + u64 rate_kbps = mlxsw_sp_qdisc_tbf_rate_kbps(p); + int tclass_num; + u8 burst_size; + int err; + + err = mlxsw_sp_qdisc_future_fifo_replace(mlxsw_sp_port, handle, 0, + &mlxsw_sp_qdisc->qdiscs[0]); + if (err) + return err; + mlxsw_sp_qdisc_future_fifos_init(mlxsw_sp_port, TC_H_UNSPEC); + + tclass_num = mlxsw_sp_qdisc_get_tclass_num(mlxsw_sp_port, + mlxsw_sp_qdisc); + + err = mlxsw_sp_qdisc_tbf_bs(mlxsw_sp_port, p->max_size, &burst_size); + if (WARN_ON_ONCE(err)) + /* check_params above was supposed to reject this value. */ + return -EINVAL; + + return mlxsw_sp_port_ets_maxrate_set(mlxsw_sp_port, hr, tclass_num, 0, + rate_kbps, burst_size); +} + +static void +mlxsw_sp_qdisc_tbf_unoffload(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, + void *params) +{ + struct tc_tbf_qopt_offload_replace_params *p = params; + + mlxsw_sp_qdisc_leaf_unoffload(mlxsw_sp_port, mlxsw_sp_qdisc, p->qstats); +} + +static int +mlxsw_sp_qdisc_get_tbf_stats(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, + struct tc_qopt_offload_stats *stats_ptr) +{ + mlxsw_sp_qdisc_get_tc_stats(mlxsw_sp_port, mlxsw_sp_qdisc, + stats_ptr); + return 0; +} + +static struct mlxsw_sp_qdisc_ops mlxsw_sp_qdisc_ops_tbf = { + .type = MLXSW_SP_QDISC_TBF, + .check_params = mlxsw_sp_qdisc_tbf_check_params, + .replace = mlxsw_sp_qdisc_tbf_replace, + .unoffload = mlxsw_sp_qdisc_tbf_unoffload, + .destroy = mlxsw_sp_qdisc_tbf_destroy, + .get_stats = mlxsw_sp_qdisc_get_tbf_stats, + .clean_stats = mlxsw_sp_setup_tc_qdisc_leaf_clean_stats, + .find_class = mlxsw_sp_qdisc_leaf_find_class, + .num_classes = 1, +}; + +static int __mlxsw_sp_setup_tc_tbf(struct mlxsw_sp_port *mlxsw_sp_port, + struct tc_tbf_qopt_offload *p) +{ + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc; + + mlxsw_sp_qdisc = mlxsw_sp_qdisc_find(mlxsw_sp_port, p->parent); + if (!mlxsw_sp_qdisc) + return -EOPNOTSUPP; + + if (p->command == TC_TBF_REPLACE) + return mlxsw_sp_qdisc_replace(mlxsw_sp_port, p->handle, + mlxsw_sp_qdisc, + &mlxsw_sp_qdisc_ops_tbf, + &p->replace_params); + + if (!mlxsw_sp_qdisc_compare(mlxsw_sp_qdisc, p->handle)) + return -EOPNOTSUPP; + + switch (p->command) { + case TC_TBF_DESTROY: + return mlxsw_sp_qdisc_destroy(mlxsw_sp_port, mlxsw_sp_qdisc); + case TC_TBF_STATS: + return mlxsw_sp_qdisc_get_stats(mlxsw_sp_port, mlxsw_sp_qdisc, + &p->stats); + case TC_TBF_GRAFT: + return mlxsw_sp_qdisc_graft(mlxsw_sp_port, mlxsw_sp_qdisc, 0, + p->child_handle); + default: + return -EOPNOTSUPP; + } +} + +int mlxsw_sp_setup_tc_tbf(struct mlxsw_sp_port *mlxsw_sp_port, + struct tc_tbf_qopt_offload *p) +{ + int err; + + mutex_lock(&mlxsw_sp_port->qdisc->lock); + err = __mlxsw_sp_setup_tc_tbf(mlxsw_sp_port, p); + mutex_unlock(&mlxsw_sp_port->qdisc->lock); + + return err; +} + +static int +mlxsw_sp_qdisc_fifo_check_params(struct mlxsw_sp_port *mlxsw_sp_port, + void *params) +{ + return 0; +} + +static int +mlxsw_sp_qdisc_fifo_replace(struct mlxsw_sp_port *mlxsw_sp_port, u32 handle, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, + void *params) +{ + return 0; +} + +static int +mlxsw_sp_qdisc_get_fifo_stats(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, + struct tc_qopt_offload_stats *stats_ptr) +{ + mlxsw_sp_qdisc_get_tc_stats(mlxsw_sp_port, mlxsw_sp_qdisc, + stats_ptr); + return 0; +} + +static struct mlxsw_sp_qdisc_ops mlxsw_sp_qdisc_ops_fifo = { + .type = MLXSW_SP_QDISC_FIFO, + .check_params = mlxsw_sp_qdisc_fifo_check_params, + .replace = mlxsw_sp_qdisc_fifo_replace, + .get_stats = mlxsw_sp_qdisc_get_fifo_stats, + .clean_stats = mlxsw_sp_setup_tc_qdisc_leaf_clean_stats, +}; + +static int +mlxsw_sp_qdisc_future_fifo_replace(struct mlxsw_sp_port *mlxsw_sp_port, + u32 handle, unsigned int band, + struct mlxsw_sp_qdisc *child_qdisc) +{ + struct mlxsw_sp_qdisc_state *qdisc_state = mlxsw_sp_port->qdisc; + + if (handle == qdisc_state->future_handle && + qdisc_state->future_fifos[band]) + return mlxsw_sp_qdisc_replace(mlxsw_sp_port, TC_H_UNSPEC, + child_qdisc, + &mlxsw_sp_qdisc_ops_fifo, + NULL); + return 0; +} + +static void +mlxsw_sp_qdisc_future_fifos_init(struct mlxsw_sp_port *mlxsw_sp_port, + u32 handle) +{ + struct mlxsw_sp_qdisc_state *qdisc_state = mlxsw_sp_port->qdisc; + + qdisc_state->future_handle = handle; + memset(qdisc_state->future_fifos, 0, sizeof(qdisc_state->future_fifos)); +} + +static int __mlxsw_sp_setup_tc_fifo(struct mlxsw_sp_port *mlxsw_sp_port, + struct tc_fifo_qopt_offload *p) +{ + struct mlxsw_sp_qdisc_state *qdisc_state = mlxsw_sp_port->qdisc; + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc; + unsigned int band; + u32 parent_handle; + + mlxsw_sp_qdisc = mlxsw_sp_qdisc_find(mlxsw_sp_port, p->parent); + if (!mlxsw_sp_qdisc && p->handle == TC_H_UNSPEC) { + parent_handle = TC_H_MAJ(p->parent); + if (parent_handle != qdisc_state->future_handle) { + /* This notifications is for a different Qdisc than + * previously. Wipe the future cache. + */ + mlxsw_sp_qdisc_future_fifos_init(mlxsw_sp_port, + parent_handle); + } + + band = TC_H_MIN(p->parent) - 1; + if (band < IEEE_8021QAZ_MAX_TCS) { + if (p->command == TC_FIFO_REPLACE) + qdisc_state->future_fifos[band] = true; + else if (p->command == TC_FIFO_DESTROY) + qdisc_state->future_fifos[band] = false; + } + } + if (!mlxsw_sp_qdisc) + return -EOPNOTSUPP; + + if (p->command == TC_FIFO_REPLACE) { + return mlxsw_sp_qdisc_replace(mlxsw_sp_port, p->handle, + mlxsw_sp_qdisc, + &mlxsw_sp_qdisc_ops_fifo, NULL); + } + + if (!mlxsw_sp_qdisc_compare(mlxsw_sp_qdisc, p->handle)) + return -EOPNOTSUPP; + + switch (p->command) { + case TC_FIFO_DESTROY: + return mlxsw_sp_qdisc_destroy(mlxsw_sp_port, mlxsw_sp_qdisc); + case TC_FIFO_STATS: + return mlxsw_sp_qdisc_get_stats(mlxsw_sp_port, mlxsw_sp_qdisc, + &p->stats); + case TC_FIFO_REPLACE: /* Handled above. */ + break; + } + + return -EOPNOTSUPP; +} + +int mlxsw_sp_setup_tc_fifo(struct mlxsw_sp_port *mlxsw_sp_port, + struct tc_fifo_qopt_offload *p) +{ + int err; + + mutex_lock(&mlxsw_sp_port->qdisc->lock); + err = __mlxsw_sp_setup_tc_fifo(mlxsw_sp_port, p); + mutex_unlock(&mlxsw_sp_port->qdisc->lock); + + return err; +} + +static int __mlxsw_sp_qdisc_ets_destroy(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc) +{ + int i; + + for (i = 0; i < mlxsw_sp_qdisc->num_classes; i++) { + mlxsw_sp_port_prio_tc_set(mlxsw_sp_port, i, + MLXSW_SP_PORT_DEFAULT_TCLASS); + mlxsw_sp_port_ets_set(mlxsw_sp_port, + MLXSW_REG_QEEC_HR_SUBGROUP, + i, 0, false, 0); + } + + kfree(mlxsw_sp_qdisc->ets_data); + mlxsw_sp_qdisc->ets_data = NULL; + return 0; +} + +static int +mlxsw_sp_qdisc_prio_destroy(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc) +{ + return __mlxsw_sp_qdisc_ets_destroy(mlxsw_sp_port, mlxsw_sp_qdisc); +} + +static int +__mlxsw_sp_qdisc_ets_check_params(unsigned int nbands) +{ + if (nbands > IEEE_8021QAZ_MAX_TCS) + return -EOPNOTSUPP; + + return 0; +} + +static int +mlxsw_sp_qdisc_prio_check_params(struct mlxsw_sp_port *mlxsw_sp_port, + void *params) +{ + struct tc_prio_qopt_offload_params *p = params; + + return __mlxsw_sp_qdisc_ets_check_params(p->bands); +} + +static struct mlxsw_sp_qdisc * +mlxsw_sp_qdisc_walk_cb_clean_stats(struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, + void *mlxsw_sp_port) +{ + u64 backlog; + + if (mlxsw_sp_qdisc->ops) { + backlog = mlxsw_sp_qdisc->stats_base.backlog; + if (mlxsw_sp_qdisc->ops->clean_stats) + mlxsw_sp_qdisc->ops->clean_stats(mlxsw_sp_port, + mlxsw_sp_qdisc); + mlxsw_sp_qdisc->stats_base.backlog = backlog; + } + + return NULL; +} + +static void +mlxsw_sp_qdisc_tree_clean_stats(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc) +{ + mlxsw_sp_qdisc_walk(mlxsw_sp_qdisc, mlxsw_sp_qdisc_walk_cb_clean_stats, + mlxsw_sp_port); +} + +static int +__mlxsw_sp_qdisc_ets_replace(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, + u32 handle, unsigned int nbands, + const unsigned int *quanta, + const unsigned int *weights, + const u8 *priomap) +{ + struct mlxsw_sp_qdisc_ets_data *ets_data = mlxsw_sp_qdisc->ets_data; + struct mlxsw_sp_qdisc_ets_band *ets_band; + struct mlxsw_sp_qdisc *child_qdisc; + u8 old_priomap, new_priomap; + int i, band; + int err; + + if (!ets_data) { + ets_data = kzalloc(sizeof(*ets_data), GFP_KERNEL); + if (!ets_data) + return -ENOMEM; + mlxsw_sp_qdisc->ets_data = ets_data; + + for (band = 0; band < mlxsw_sp_qdisc->num_classes; band++) { + int tclass_num = MLXSW_SP_PRIO_BAND_TO_TCLASS(band); + + ets_band = &ets_data->bands[band]; + ets_band->tclass_num = tclass_num; + } + } + + for (band = 0; band < nbands; band++) { + int tclass_num; + + child_qdisc = &mlxsw_sp_qdisc->qdiscs[band]; + ets_band = &ets_data->bands[band]; + + tclass_num = ets_band->tclass_num; + old_priomap = ets_band->prio_bitmap; + new_priomap = 0; + + err = mlxsw_sp_port_ets_set(mlxsw_sp_port, + MLXSW_REG_QEEC_HR_SUBGROUP, + tclass_num, 0, !!quanta[band], + weights[band]); + if (err) + return err; + + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { + if (priomap[i] == band) { + new_priomap |= BIT(i); + if (BIT(i) & old_priomap) + continue; + err = mlxsw_sp_port_prio_tc_set(mlxsw_sp_port, + i, tclass_num); + if (err) + return err; + } + } + + ets_band->prio_bitmap = new_priomap; + + if (old_priomap != new_priomap) + mlxsw_sp_qdisc_tree_clean_stats(mlxsw_sp_port, + child_qdisc); + + err = mlxsw_sp_qdisc_future_fifo_replace(mlxsw_sp_port, handle, + band, child_qdisc); + if (err) + return err; + } + for (; band < IEEE_8021QAZ_MAX_TCS; band++) { + ets_band = &ets_data->bands[band]; + ets_band->prio_bitmap = 0; + + child_qdisc = &mlxsw_sp_qdisc->qdiscs[band]; + mlxsw_sp_qdisc_destroy(mlxsw_sp_port, child_qdisc); + + mlxsw_sp_port_ets_set(mlxsw_sp_port, + MLXSW_REG_QEEC_HR_SUBGROUP, + ets_band->tclass_num, 0, false, 0); + } + + mlxsw_sp_qdisc_future_fifos_init(mlxsw_sp_port, TC_H_UNSPEC); + return 0; +} + +static int +mlxsw_sp_qdisc_prio_replace(struct mlxsw_sp_port *mlxsw_sp_port, u32 handle, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, + void *params) +{ + struct tc_prio_qopt_offload_params *p = params; + unsigned int zeroes[TCQ_ETS_MAX_BANDS] = {0}; + + return __mlxsw_sp_qdisc_ets_replace(mlxsw_sp_port, mlxsw_sp_qdisc, + handle, p->bands, zeroes, + zeroes, p->priomap); +} + +static void +__mlxsw_sp_qdisc_ets_unoffload(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, + struct gnet_stats_queue *qstats) +{ + u64 backlog; + + backlog = mlxsw_sp_cells_bytes(mlxsw_sp_port->mlxsw_sp, + mlxsw_sp_qdisc->stats_base.backlog); + qstats->backlog -= backlog; +} + +static void +mlxsw_sp_qdisc_prio_unoffload(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, + void *params) +{ + struct tc_prio_qopt_offload_params *p = params; + + __mlxsw_sp_qdisc_ets_unoffload(mlxsw_sp_port, mlxsw_sp_qdisc, + p->qstats); +} + +static int +mlxsw_sp_qdisc_get_prio_stats(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, + struct tc_qopt_offload_stats *stats_ptr) +{ + struct mlxsw_sp_qdisc *tc_qdisc; + u64 tx_packets = 0; + u64 tx_bytes = 0; + u64 backlog = 0; + u64 drops = 0; + int i; + + for (i = 0; i < mlxsw_sp_qdisc->num_classes; i++) { + tc_qdisc = &mlxsw_sp_qdisc->qdiscs[i]; + mlxsw_sp_qdisc_collect_tc_stats(mlxsw_sp_port, tc_qdisc, + &tx_bytes, &tx_packets, + &drops, &backlog); + } + + mlxsw_sp_qdisc_update_stats(mlxsw_sp_port->mlxsw_sp, mlxsw_sp_qdisc, + tx_bytes, tx_packets, drops, backlog, + stats_ptr); + return 0; +} + +static void +mlxsw_sp_setup_tc_qdisc_prio_clean_stats(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc) +{ + struct mlxsw_sp_qdisc_stats *stats_base; + struct mlxsw_sp_port_xstats *xstats; + struct rtnl_link_stats64 *stats; + int i; + + xstats = &mlxsw_sp_port->periodic_hw_stats.xstats; + stats = &mlxsw_sp_port->periodic_hw_stats.stats; + stats_base = &mlxsw_sp_qdisc->stats_base; + + stats_base->tx_packets = stats->tx_packets; + stats_base->tx_bytes = stats->tx_bytes; + + stats_base->drops = 0; + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { + stats_base->drops += mlxsw_sp_xstats_tail_drop(xstats, i); + stats_base->drops += xstats->wred_drop[i]; + } + + mlxsw_sp_qdisc->stats_base.backlog = 0; +} + +static struct mlxsw_sp_qdisc * +mlxsw_sp_qdisc_prio_find_class(struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, + u32 parent) +{ + int child_index = TC_H_MIN(parent); + int band = child_index - 1; + + if (band < 0 || band >= mlxsw_sp_qdisc->num_classes) + return NULL; + return &mlxsw_sp_qdisc->qdiscs[band]; +} + +static struct mlxsw_sp_qdisc_ets_band * +mlxsw_sp_qdisc_ets_get_band(struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, + struct mlxsw_sp_qdisc *child) +{ + unsigned int band = child - mlxsw_sp_qdisc->qdiscs; + + if (WARN_ON(band >= IEEE_8021QAZ_MAX_TCS)) + band = 0; + return &mlxsw_sp_qdisc->ets_data->bands[band]; +} + +static u8 +mlxsw_sp_qdisc_ets_get_prio_bitmap(struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, + struct mlxsw_sp_qdisc *child) +{ + return mlxsw_sp_qdisc_ets_get_band(mlxsw_sp_qdisc, child)->prio_bitmap; +} + +static int +mlxsw_sp_qdisc_ets_get_tclass_num(struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, + struct mlxsw_sp_qdisc *child) +{ + return mlxsw_sp_qdisc_ets_get_band(mlxsw_sp_qdisc, child)->tclass_num; +} + +static struct mlxsw_sp_qdisc_ops mlxsw_sp_qdisc_ops_prio = { + .type = MLXSW_SP_QDISC_PRIO, + .check_params = mlxsw_sp_qdisc_prio_check_params, + .replace = mlxsw_sp_qdisc_prio_replace, + .unoffload = mlxsw_sp_qdisc_prio_unoffload, + .destroy = mlxsw_sp_qdisc_prio_destroy, + .get_stats = mlxsw_sp_qdisc_get_prio_stats, + .clean_stats = mlxsw_sp_setup_tc_qdisc_prio_clean_stats, + .find_class = mlxsw_sp_qdisc_prio_find_class, + .num_classes = IEEE_8021QAZ_MAX_TCS, + .get_prio_bitmap = mlxsw_sp_qdisc_ets_get_prio_bitmap, + .get_tclass_num = mlxsw_sp_qdisc_ets_get_tclass_num, +}; + +static int +mlxsw_sp_qdisc_ets_check_params(struct mlxsw_sp_port *mlxsw_sp_port, + void *params) +{ + struct tc_ets_qopt_offload_replace_params *p = params; + + return __mlxsw_sp_qdisc_ets_check_params(p->bands); +} + +static int +mlxsw_sp_qdisc_ets_replace(struct mlxsw_sp_port *mlxsw_sp_port, u32 handle, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, + void *params) +{ + struct tc_ets_qopt_offload_replace_params *p = params; + + return __mlxsw_sp_qdisc_ets_replace(mlxsw_sp_port, mlxsw_sp_qdisc, + handle, p->bands, p->quanta, + p->weights, p->priomap); +} + +static void +mlxsw_sp_qdisc_ets_unoffload(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, + void *params) +{ + struct tc_ets_qopt_offload_replace_params *p = params; + + __mlxsw_sp_qdisc_ets_unoffload(mlxsw_sp_port, mlxsw_sp_qdisc, + p->qstats); +} + +static int +mlxsw_sp_qdisc_ets_destroy(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc) +{ + return __mlxsw_sp_qdisc_ets_destroy(mlxsw_sp_port, mlxsw_sp_qdisc); +} + +static struct mlxsw_sp_qdisc_ops mlxsw_sp_qdisc_ops_ets = { + .type = MLXSW_SP_QDISC_ETS, + .check_params = mlxsw_sp_qdisc_ets_check_params, + .replace = mlxsw_sp_qdisc_ets_replace, + .unoffload = mlxsw_sp_qdisc_ets_unoffload, + .destroy = mlxsw_sp_qdisc_ets_destroy, + .get_stats = mlxsw_sp_qdisc_get_prio_stats, + .clean_stats = mlxsw_sp_setup_tc_qdisc_prio_clean_stats, + .find_class = mlxsw_sp_qdisc_prio_find_class, + .num_classes = IEEE_8021QAZ_MAX_TCS, + .get_prio_bitmap = mlxsw_sp_qdisc_ets_get_prio_bitmap, + .get_tclass_num = mlxsw_sp_qdisc_ets_get_tclass_num, +}; + +/* Linux allows linking of Qdiscs to arbitrary classes (so long as the resulting + * graph is free of cycles). These operations do not change the parent handle + * though, which means it can be incomplete (if there is more than one class + * where the Qdisc in question is grafted) or outright wrong (if the Qdisc was + * linked to a different class and then removed from the original class). + * + * E.g. consider this sequence of operations: + * + * # tc qdisc add dev swp1 root handle 1: prio + * # tc qdisc add dev swp1 parent 1:3 handle 13: red limit 1000000 avpkt 10000 + * RED: set bandwidth to 10Mbit + * # tc qdisc link dev swp1 handle 13: parent 1:2 + * + * At this point, both 1:2 and 1:3 have the same RED Qdisc instance as their + * child. But RED will still only claim that 1:3 is its parent. If it's removed + * from that band, its only parent will be 1:2, but it will continue to claim + * that it is in fact 1:3. + * + * The notification for child Qdisc replace (e.g. TC_RED_REPLACE) comes before + * the notification for parent graft (e.g. TC_PRIO_GRAFT). We take the replace + * notification to offload the child Qdisc, based on its parent handle, and use + * the graft operation to validate that the class where the child is actually + * grafted corresponds to the parent handle. If the two don't match, we + * unoffload the child. + */ +static int mlxsw_sp_qdisc_graft(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, + u8 band, u32 child_handle) +{ + struct mlxsw_sp_qdisc *old_qdisc; + u32 parent; + + if (band < mlxsw_sp_qdisc->num_classes && + mlxsw_sp_qdisc->qdiscs[band].handle == child_handle) + return 0; + + if (!child_handle) { + /* This is an invisible FIFO replacing the original Qdisc. + * Ignore it--the original Qdisc's destroy will follow. + */ + return 0; + } + + /* See if the grafted qdisc is already offloaded on any tclass. If so, + * unoffload it. + */ + old_qdisc = mlxsw_sp_qdisc_find_by_handle(mlxsw_sp_port, + child_handle); + if (old_qdisc) + mlxsw_sp_qdisc_destroy(mlxsw_sp_port, old_qdisc); + + parent = TC_H_MAKE(mlxsw_sp_qdisc->handle, band + 1); + mlxsw_sp_qdisc = mlxsw_sp_qdisc->ops->find_class(mlxsw_sp_qdisc, + parent); + if (!WARN_ON(!mlxsw_sp_qdisc)) + mlxsw_sp_qdisc_destroy(mlxsw_sp_port, mlxsw_sp_qdisc); + + return -EOPNOTSUPP; +} + +static int __mlxsw_sp_setup_tc_prio(struct mlxsw_sp_port *mlxsw_sp_port, + struct tc_prio_qopt_offload *p) +{ + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc; + + mlxsw_sp_qdisc = mlxsw_sp_qdisc_find(mlxsw_sp_port, p->parent); + if (!mlxsw_sp_qdisc) + return -EOPNOTSUPP; + + if (p->command == TC_PRIO_REPLACE) + return mlxsw_sp_qdisc_replace(mlxsw_sp_port, p->handle, + mlxsw_sp_qdisc, + &mlxsw_sp_qdisc_ops_prio, + &p->replace_params); + + if (!mlxsw_sp_qdisc_compare(mlxsw_sp_qdisc, p->handle)) + return -EOPNOTSUPP; + + switch (p->command) { + case TC_PRIO_DESTROY: + return mlxsw_sp_qdisc_destroy(mlxsw_sp_port, mlxsw_sp_qdisc); + case TC_PRIO_STATS: + return mlxsw_sp_qdisc_get_stats(mlxsw_sp_port, mlxsw_sp_qdisc, + &p->stats); + case TC_PRIO_GRAFT: + return mlxsw_sp_qdisc_graft(mlxsw_sp_port, mlxsw_sp_qdisc, + p->graft_params.band, + p->graft_params.child_handle); + default: + return -EOPNOTSUPP; + } +} + +int mlxsw_sp_setup_tc_prio(struct mlxsw_sp_port *mlxsw_sp_port, + struct tc_prio_qopt_offload *p) +{ + int err; + + mutex_lock(&mlxsw_sp_port->qdisc->lock); + err = __mlxsw_sp_setup_tc_prio(mlxsw_sp_port, p); + mutex_unlock(&mlxsw_sp_port->qdisc->lock); + + return err; +} + +static int __mlxsw_sp_setup_tc_ets(struct mlxsw_sp_port *mlxsw_sp_port, + struct tc_ets_qopt_offload *p) +{ + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc; + + mlxsw_sp_qdisc = mlxsw_sp_qdisc_find(mlxsw_sp_port, p->parent); + if (!mlxsw_sp_qdisc) + return -EOPNOTSUPP; + + if (p->command == TC_ETS_REPLACE) + return mlxsw_sp_qdisc_replace(mlxsw_sp_port, p->handle, + mlxsw_sp_qdisc, + &mlxsw_sp_qdisc_ops_ets, + &p->replace_params); + + if (!mlxsw_sp_qdisc_compare(mlxsw_sp_qdisc, p->handle)) + return -EOPNOTSUPP; + + switch (p->command) { + case TC_ETS_DESTROY: + return mlxsw_sp_qdisc_destroy(mlxsw_sp_port, mlxsw_sp_qdisc); + case TC_ETS_STATS: + return mlxsw_sp_qdisc_get_stats(mlxsw_sp_port, mlxsw_sp_qdisc, + &p->stats); + case TC_ETS_GRAFT: + return mlxsw_sp_qdisc_graft(mlxsw_sp_port, mlxsw_sp_qdisc, + p->graft_params.band, + p->graft_params.child_handle); + default: + return -EOPNOTSUPP; + } +} + +int mlxsw_sp_setup_tc_ets(struct mlxsw_sp_port *mlxsw_sp_port, + struct tc_ets_qopt_offload *p) +{ + int err; + + mutex_lock(&mlxsw_sp_port->qdisc->lock); + err = __mlxsw_sp_setup_tc_ets(mlxsw_sp_port, p); + mutex_unlock(&mlxsw_sp_port->qdisc->lock); + + return err; +} + +struct mlxsw_sp_qevent_block { + struct list_head binding_list; + struct list_head mall_entry_list; + struct mlxsw_sp *mlxsw_sp; +}; + +struct mlxsw_sp_qevent_binding { + struct list_head list; + struct mlxsw_sp_port *mlxsw_sp_port; + u32 handle; + int tclass_num; + enum mlxsw_sp_span_trigger span_trigger; + unsigned int action_mask; +}; + +static LIST_HEAD(mlxsw_sp_qevent_block_cb_list); + +static int mlxsw_sp_qevent_span_configure(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_mall_entry *mall_entry, + struct mlxsw_sp_qevent_binding *qevent_binding, + const struct mlxsw_sp_span_agent_parms *agent_parms, + int *p_span_id) +{ + enum mlxsw_sp_span_trigger span_trigger = qevent_binding->span_trigger; + struct mlxsw_sp_port *mlxsw_sp_port = qevent_binding->mlxsw_sp_port; + struct mlxsw_sp_span_trigger_parms trigger_parms = {}; + bool ingress; + int span_id; + int err; + + err = mlxsw_sp_span_agent_get(mlxsw_sp, &span_id, agent_parms); + if (err) + return err; + + ingress = mlxsw_sp_span_trigger_is_ingress(span_trigger); + err = mlxsw_sp_span_analyzed_port_get(mlxsw_sp_port, ingress); + if (err) + goto err_analyzed_port_get; + + trigger_parms.span_id = span_id; + trigger_parms.probability_rate = 1; + err = mlxsw_sp_span_agent_bind(mlxsw_sp, span_trigger, mlxsw_sp_port, + &trigger_parms); + if (err) + goto err_agent_bind; + + err = mlxsw_sp_span_trigger_enable(mlxsw_sp_port, span_trigger, + qevent_binding->tclass_num); + if (err) + goto err_trigger_enable; + + *p_span_id = span_id; + return 0; + +err_trigger_enable: + mlxsw_sp_span_agent_unbind(mlxsw_sp, span_trigger, mlxsw_sp_port, + &trigger_parms); +err_agent_bind: + mlxsw_sp_span_analyzed_port_put(mlxsw_sp_port, ingress); +err_analyzed_port_get: + mlxsw_sp_span_agent_put(mlxsw_sp, span_id); + return err; +} + +static void mlxsw_sp_qevent_span_deconfigure(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_qevent_binding *qevent_binding, + int span_id) +{ + enum mlxsw_sp_span_trigger span_trigger = qevent_binding->span_trigger; + struct mlxsw_sp_port *mlxsw_sp_port = qevent_binding->mlxsw_sp_port; + struct mlxsw_sp_span_trigger_parms trigger_parms = { + .span_id = span_id, + }; + bool ingress; + + ingress = mlxsw_sp_span_trigger_is_ingress(span_trigger); + + mlxsw_sp_span_trigger_disable(mlxsw_sp_port, span_trigger, + qevent_binding->tclass_num); + mlxsw_sp_span_agent_unbind(mlxsw_sp, span_trigger, mlxsw_sp_port, + &trigger_parms); + mlxsw_sp_span_analyzed_port_put(mlxsw_sp_port, ingress); + mlxsw_sp_span_agent_put(mlxsw_sp, span_id); +} + +static int mlxsw_sp_qevent_mirror_configure(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_mall_entry *mall_entry, + struct mlxsw_sp_qevent_binding *qevent_binding) +{ + struct mlxsw_sp_span_agent_parms agent_parms = { + .to_dev = mall_entry->mirror.to_dev, + }; + + return mlxsw_sp_qevent_span_configure(mlxsw_sp, mall_entry, qevent_binding, + &agent_parms, &mall_entry->mirror.span_id); +} + +static void mlxsw_sp_qevent_mirror_deconfigure(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_mall_entry *mall_entry, + struct mlxsw_sp_qevent_binding *qevent_binding) +{ + mlxsw_sp_qevent_span_deconfigure(mlxsw_sp, qevent_binding, mall_entry->mirror.span_id); +} + +static int mlxsw_sp_qevent_trap_configure(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_mall_entry *mall_entry, + struct mlxsw_sp_qevent_binding *qevent_binding) +{ + struct mlxsw_sp_span_agent_parms agent_parms = { + .session_id = MLXSW_SP_SPAN_SESSION_ID_BUFFER, + }; + int err; + + err = mlxsw_sp_trap_group_policer_hw_id_get(mlxsw_sp, + DEVLINK_TRAP_GROUP_GENERIC_ID_BUFFER_DROPS, + &agent_parms.policer_enable, + &agent_parms.policer_id); + if (err) + return err; + + return mlxsw_sp_qevent_span_configure(mlxsw_sp, mall_entry, qevent_binding, + &agent_parms, &mall_entry->trap.span_id); +} + +static void mlxsw_sp_qevent_trap_deconfigure(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_mall_entry *mall_entry, + struct mlxsw_sp_qevent_binding *qevent_binding) +{ + mlxsw_sp_qevent_span_deconfigure(mlxsw_sp, qevent_binding, mall_entry->trap.span_id); +} + +static int +mlxsw_sp_qevent_entry_configure(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_mall_entry *mall_entry, + struct mlxsw_sp_qevent_binding *qevent_binding, + struct netlink_ext_ack *extack) +{ + if (!(BIT(mall_entry->type) & qevent_binding->action_mask)) { + NL_SET_ERR_MSG(extack, "Action not supported at this qevent"); + return -EOPNOTSUPP; + } + + switch (mall_entry->type) { + case MLXSW_SP_MALL_ACTION_TYPE_MIRROR: + return mlxsw_sp_qevent_mirror_configure(mlxsw_sp, mall_entry, qevent_binding); + case MLXSW_SP_MALL_ACTION_TYPE_TRAP: + return mlxsw_sp_qevent_trap_configure(mlxsw_sp, mall_entry, qevent_binding); + default: + /* This should have been validated away. */ + WARN_ON(1); + return -EOPNOTSUPP; + } +} + +static void mlxsw_sp_qevent_entry_deconfigure(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_mall_entry *mall_entry, + struct mlxsw_sp_qevent_binding *qevent_binding) +{ + switch (mall_entry->type) { + case MLXSW_SP_MALL_ACTION_TYPE_MIRROR: + return mlxsw_sp_qevent_mirror_deconfigure(mlxsw_sp, mall_entry, qevent_binding); + case MLXSW_SP_MALL_ACTION_TYPE_TRAP: + return mlxsw_sp_qevent_trap_deconfigure(mlxsw_sp, mall_entry, qevent_binding); + default: + WARN_ON(1); + return; + } +} + +static int +mlxsw_sp_qevent_binding_configure(struct mlxsw_sp_qevent_block *qevent_block, + struct mlxsw_sp_qevent_binding *qevent_binding, + struct netlink_ext_ack *extack) +{ + struct mlxsw_sp_mall_entry *mall_entry; + int err; + + list_for_each_entry(mall_entry, &qevent_block->mall_entry_list, list) { + err = mlxsw_sp_qevent_entry_configure(qevent_block->mlxsw_sp, mall_entry, + qevent_binding, extack); + if (err) + goto err_entry_configure; + } + + return 0; + +err_entry_configure: + list_for_each_entry_continue_reverse(mall_entry, &qevent_block->mall_entry_list, list) + mlxsw_sp_qevent_entry_deconfigure(qevent_block->mlxsw_sp, mall_entry, + qevent_binding); + return err; +} + +static void mlxsw_sp_qevent_binding_deconfigure(struct mlxsw_sp_qevent_block *qevent_block, + struct mlxsw_sp_qevent_binding *qevent_binding) +{ + struct mlxsw_sp_mall_entry *mall_entry; + + list_for_each_entry(mall_entry, &qevent_block->mall_entry_list, list) + mlxsw_sp_qevent_entry_deconfigure(qevent_block->mlxsw_sp, mall_entry, + qevent_binding); +} + +static int +mlxsw_sp_qevent_block_configure(struct mlxsw_sp_qevent_block *qevent_block, + struct netlink_ext_ack *extack) +{ + struct mlxsw_sp_qevent_binding *qevent_binding; + int err; + + list_for_each_entry(qevent_binding, &qevent_block->binding_list, list) { + err = mlxsw_sp_qevent_binding_configure(qevent_block, + qevent_binding, + extack); + if (err) + goto err_binding_configure; + } + + return 0; + +err_binding_configure: + list_for_each_entry_continue_reverse(qevent_binding, &qevent_block->binding_list, list) + mlxsw_sp_qevent_binding_deconfigure(qevent_block, qevent_binding); + return err; +} + +static void mlxsw_sp_qevent_block_deconfigure(struct mlxsw_sp_qevent_block *qevent_block) +{ + struct mlxsw_sp_qevent_binding *qevent_binding; + + list_for_each_entry(qevent_binding, &qevent_block->binding_list, list) + mlxsw_sp_qevent_binding_deconfigure(qevent_block, qevent_binding); +} + +static struct mlxsw_sp_mall_entry * +mlxsw_sp_qevent_mall_entry_find(struct mlxsw_sp_qevent_block *block, unsigned long cookie) +{ + struct mlxsw_sp_mall_entry *mall_entry; + + list_for_each_entry(mall_entry, &block->mall_entry_list, list) + if (mall_entry->cookie == cookie) + return mall_entry; + + return NULL; +} + +static int mlxsw_sp_qevent_mall_replace(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_qevent_block *qevent_block, + struct tc_cls_matchall_offload *f) +{ + struct mlxsw_sp_mall_entry *mall_entry; + struct flow_action_entry *act; + int err; + + /* It should not currently be possible to replace a matchall rule. So + * this must be a new rule. + */ + if (!list_empty(&qevent_block->mall_entry_list)) { + NL_SET_ERR_MSG(f->common.extack, "At most one filter supported"); + return -EOPNOTSUPP; + } + if (f->rule->action.num_entries != 1) { + NL_SET_ERR_MSG(f->common.extack, "Only singular actions supported"); + return -EOPNOTSUPP; + } + if (f->common.chain_index) { + NL_SET_ERR_MSG(f->common.extack, "Only chain 0 is supported"); + return -EOPNOTSUPP; + } + if (f->common.protocol != htons(ETH_P_ALL)) { + NL_SET_ERR_MSG(f->common.extack, "Protocol matching not supported"); + return -EOPNOTSUPP; + } + + act = &f->rule->action.entries[0]; + if (!(act->hw_stats & FLOW_ACTION_HW_STATS_DISABLED)) { + NL_SET_ERR_MSG(f->common.extack, "HW counters not supported on qevents"); + return -EOPNOTSUPP; + } + + mall_entry = kzalloc(sizeof(*mall_entry), GFP_KERNEL); + if (!mall_entry) + return -ENOMEM; + mall_entry->cookie = f->cookie; + + if (act->id == FLOW_ACTION_MIRRED) { + mall_entry->type = MLXSW_SP_MALL_ACTION_TYPE_MIRROR; + mall_entry->mirror.to_dev = act->dev; + } else if (act->id == FLOW_ACTION_TRAP) { + mall_entry->type = MLXSW_SP_MALL_ACTION_TYPE_TRAP; + } else { + NL_SET_ERR_MSG(f->common.extack, "Unsupported action"); + err = -EOPNOTSUPP; + goto err_unsupported_action; + } + + list_add_tail(&mall_entry->list, &qevent_block->mall_entry_list); + + err = mlxsw_sp_qevent_block_configure(qevent_block, f->common.extack); + if (err) + goto err_block_configure; + + return 0; + +err_block_configure: + list_del(&mall_entry->list); +err_unsupported_action: + kfree(mall_entry); + return err; +} + +static void mlxsw_sp_qevent_mall_destroy(struct mlxsw_sp_qevent_block *qevent_block, + struct tc_cls_matchall_offload *f) +{ + struct mlxsw_sp_mall_entry *mall_entry; + + mall_entry = mlxsw_sp_qevent_mall_entry_find(qevent_block, f->cookie); + if (!mall_entry) + return; + + mlxsw_sp_qevent_block_deconfigure(qevent_block); + + list_del(&mall_entry->list); + kfree(mall_entry); +} + +static int mlxsw_sp_qevent_block_mall_cb(struct mlxsw_sp_qevent_block *qevent_block, + struct tc_cls_matchall_offload *f) +{ + struct mlxsw_sp *mlxsw_sp = qevent_block->mlxsw_sp; + + switch (f->command) { + case TC_CLSMATCHALL_REPLACE: + return mlxsw_sp_qevent_mall_replace(mlxsw_sp, qevent_block, f); + case TC_CLSMATCHALL_DESTROY: + mlxsw_sp_qevent_mall_destroy(qevent_block, f); + return 0; + default: + return -EOPNOTSUPP; + } +} + +static int mlxsw_sp_qevent_block_cb(enum tc_setup_type type, void *type_data, void *cb_priv) +{ + struct mlxsw_sp_qevent_block *qevent_block = cb_priv; + + switch (type) { + case TC_SETUP_CLSMATCHALL: + return mlxsw_sp_qevent_block_mall_cb(qevent_block, type_data); + default: + return -EOPNOTSUPP; + } +} + +static struct mlxsw_sp_qevent_block *mlxsw_sp_qevent_block_create(struct mlxsw_sp *mlxsw_sp, + struct net *net) +{ + struct mlxsw_sp_qevent_block *qevent_block; + + qevent_block = kzalloc(sizeof(*qevent_block), GFP_KERNEL); + if (!qevent_block) + return NULL; + + INIT_LIST_HEAD(&qevent_block->binding_list); + INIT_LIST_HEAD(&qevent_block->mall_entry_list); + qevent_block->mlxsw_sp = mlxsw_sp; + return qevent_block; +} + +static void +mlxsw_sp_qevent_block_destroy(struct mlxsw_sp_qevent_block *qevent_block) +{ + WARN_ON(!list_empty(&qevent_block->binding_list)); + WARN_ON(!list_empty(&qevent_block->mall_entry_list)); + kfree(qevent_block); +} + +static void mlxsw_sp_qevent_block_release(void *cb_priv) +{ + struct mlxsw_sp_qevent_block *qevent_block = cb_priv; + + mlxsw_sp_qevent_block_destroy(qevent_block); +} + +static struct mlxsw_sp_qevent_binding * +mlxsw_sp_qevent_binding_create(struct mlxsw_sp_port *mlxsw_sp_port, u32 handle, int tclass_num, + enum mlxsw_sp_span_trigger span_trigger, + unsigned int action_mask) +{ + struct mlxsw_sp_qevent_binding *binding; + + binding = kzalloc(sizeof(*binding), GFP_KERNEL); + if (!binding) + return ERR_PTR(-ENOMEM); + + binding->mlxsw_sp_port = mlxsw_sp_port; + binding->handle = handle; + binding->tclass_num = tclass_num; + binding->span_trigger = span_trigger; + binding->action_mask = action_mask; + return binding; +} + +static void +mlxsw_sp_qevent_binding_destroy(struct mlxsw_sp_qevent_binding *binding) +{ + kfree(binding); +} + +static struct mlxsw_sp_qevent_binding * +mlxsw_sp_qevent_binding_lookup(struct mlxsw_sp_qevent_block *block, + struct mlxsw_sp_port *mlxsw_sp_port, + u32 handle, + enum mlxsw_sp_span_trigger span_trigger) +{ + struct mlxsw_sp_qevent_binding *qevent_binding; + + list_for_each_entry(qevent_binding, &block->binding_list, list) + if (qevent_binding->mlxsw_sp_port == mlxsw_sp_port && + qevent_binding->handle == handle && + qevent_binding->span_trigger == span_trigger) + return qevent_binding; + return NULL; +} + +static int +mlxsw_sp_setup_tc_block_qevent_bind(struct mlxsw_sp_port *mlxsw_sp_port, + struct flow_block_offload *f, + enum mlxsw_sp_span_trigger span_trigger, + unsigned int action_mask) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + struct mlxsw_sp_qevent_binding *qevent_binding; + struct mlxsw_sp_qevent_block *qevent_block; + struct flow_block_cb *block_cb; + struct mlxsw_sp_qdisc *qdisc; + bool register_block = false; + int tclass_num; + int err; + + block_cb = flow_block_cb_lookup(f->block, mlxsw_sp_qevent_block_cb, mlxsw_sp); + if (!block_cb) { + qevent_block = mlxsw_sp_qevent_block_create(mlxsw_sp, f->net); + if (!qevent_block) + return -ENOMEM; + block_cb = flow_block_cb_alloc(mlxsw_sp_qevent_block_cb, mlxsw_sp, qevent_block, + mlxsw_sp_qevent_block_release); + if (IS_ERR(block_cb)) { + mlxsw_sp_qevent_block_destroy(qevent_block); + return PTR_ERR(block_cb); + } + register_block = true; + } else { + qevent_block = flow_block_cb_priv(block_cb); + } + flow_block_cb_incref(block_cb); + + qdisc = mlxsw_sp_qdisc_find_by_handle(mlxsw_sp_port, f->sch->handle); + if (!qdisc) { + NL_SET_ERR_MSG(f->extack, "Qdisc not offloaded"); + err = -ENOENT; + goto err_find_qdisc; + } + + if (WARN_ON(mlxsw_sp_qevent_binding_lookup(qevent_block, mlxsw_sp_port, f->sch->handle, + span_trigger))) { + err = -EEXIST; + goto err_binding_exists; + } + + tclass_num = mlxsw_sp_qdisc_get_tclass_num(mlxsw_sp_port, qdisc); + qevent_binding = mlxsw_sp_qevent_binding_create(mlxsw_sp_port, + f->sch->handle, + tclass_num, + span_trigger, + action_mask); + if (IS_ERR(qevent_binding)) { + err = PTR_ERR(qevent_binding); + goto err_binding_create; + } + + err = mlxsw_sp_qevent_binding_configure(qevent_block, qevent_binding, + f->extack); + if (err) + goto err_binding_configure; + + list_add(&qevent_binding->list, &qevent_block->binding_list); + + if (register_block) { + flow_block_cb_add(block_cb, f); + list_add_tail(&block_cb->driver_list, &mlxsw_sp_qevent_block_cb_list); + } + + return 0; + +err_binding_configure: + mlxsw_sp_qevent_binding_destroy(qevent_binding); +err_binding_create: +err_binding_exists: +err_find_qdisc: + if (!flow_block_cb_decref(block_cb)) + flow_block_cb_free(block_cb); + return err; +} + +static void mlxsw_sp_setup_tc_block_qevent_unbind(struct mlxsw_sp_port *mlxsw_sp_port, + struct flow_block_offload *f, + enum mlxsw_sp_span_trigger span_trigger) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + struct mlxsw_sp_qevent_binding *qevent_binding; + struct mlxsw_sp_qevent_block *qevent_block; + struct flow_block_cb *block_cb; + + block_cb = flow_block_cb_lookup(f->block, mlxsw_sp_qevent_block_cb, mlxsw_sp); + if (!block_cb) + return; + qevent_block = flow_block_cb_priv(block_cb); + + qevent_binding = mlxsw_sp_qevent_binding_lookup(qevent_block, mlxsw_sp_port, f->sch->handle, + span_trigger); + if (!qevent_binding) + return; + + list_del(&qevent_binding->list); + mlxsw_sp_qevent_binding_deconfigure(qevent_block, qevent_binding); + mlxsw_sp_qevent_binding_destroy(qevent_binding); + + if (!flow_block_cb_decref(block_cb)) { + flow_block_cb_remove(block_cb, f); + list_del(&block_cb->driver_list); + } +} + +static int +mlxsw_sp_setup_tc_block_qevent(struct mlxsw_sp_port *mlxsw_sp_port, + struct flow_block_offload *f, + enum mlxsw_sp_span_trigger span_trigger, + unsigned int action_mask) +{ + f->driver_block_list = &mlxsw_sp_qevent_block_cb_list; + + switch (f->command) { + case FLOW_BLOCK_BIND: + return mlxsw_sp_setup_tc_block_qevent_bind(mlxsw_sp_port, f, + span_trigger, + action_mask); + case FLOW_BLOCK_UNBIND: + mlxsw_sp_setup_tc_block_qevent_unbind(mlxsw_sp_port, f, span_trigger); + return 0; + default: + return -EOPNOTSUPP; + } +} + +int mlxsw_sp_setup_tc_block_qevent_early_drop(struct mlxsw_sp_port *mlxsw_sp_port, + struct flow_block_offload *f) +{ + unsigned int action_mask = BIT(MLXSW_SP_MALL_ACTION_TYPE_MIRROR) | + BIT(MLXSW_SP_MALL_ACTION_TYPE_TRAP); + + return mlxsw_sp_setup_tc_block_qevent(mlxsw_sp_port, f, + MLXSW_SP_SPAN_TRIGGER_EARLY_DROP, + action_mask); +} + +int mlxsw_sp_setup_tc_block_qevent_mark(struct mlxsw_sp_port *mlxsw_sp_port, + struct flow_block_offload *f) +{ + unsigned int action_mask = BIT(MLXSW_SP_MALL_ACTION_TYPE_MIRROR); + + return mlxsw_sp_setup_tc_block_qevent(mlxsw_sp_port, f, + MLXSW_SP_SPAN_TRIGGER_ECN, + action_mask); +} + +int mlxsw_sp_tc_qdisc_init(struct mlxsw_sp_port *mlxsw_sp_port) +{ + struct mlxsw_sp_qdisc_state *qdisc_state; + + qdisc_state = kzalloc(sizeof(*qdisc_state), GFP_KERNEL); + if (!qdisc_state) + return -ENOMEM; + + mutex_init(&qdisc_state->lock); + mlxsw_sp_port->qdisc = qdisc_state; + return 0; +} + +void mlxsw_sp_tc_qdisc_fini(struct mlxsw_sp_port *mlxsw_sp_port) +{ + mutex_destroy(&mlxsw_sp_port->qdisc->lock); + kfree(mlxsw_sp_port->qdisc); +} diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c new file mode 100644 index 000000000..ab0aa1a61 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -0,0 +1,10617 @@ +// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 +/* Copyright (c) 2016-2018 Mellanox Technologies. All rights reserved */ + +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/rhashtable.h> +#include <linux/bitops.h> +#include <linux/in6.h> +#include <linux/notifier.h> +#include <linux/inetdevice.h> +#include <linux/netdevice.h> +#include <linux/if_bridge.h> +#include <linux/socket.h> +#include <linux/route.h> +#include <linux/gcd.h> +#include <linux/if_macvlan.h> +#include <linux/refcount.h> +#include <linux/jhash.h> +#include <linux/net_namespace.h> +#include <linux/mutex.h> +#include <net/netevent.h> +#include <net/neighbour.h> +#include <net/arp.h> +#include <net/inet_dscp.h> +#include <net/ip_fib.h> +#include <net/ip6_fib.h> +#include <net/nexthop.h> +#include <net/fib_rules.h> +#include <net/ip_tunnels.h> +#include <net/l3mdev.h> +#include <net/addrconf.h> +#include <net/ndisc.h> +#include <net/ipv6.h> +#include <net/fib_notifier.h> +#include <net/switchdev.h> + +#include "spectrum.h" +#include "core.h" +#include "reg.h" +#include "spectrum_cnt.h" +#include "spectrum_dpipe.h" +#include "spectrum_ipip.h" +#include "spectrum_mr.h" +#include "spectrum_mr_tcam.h" +#include "spectrum_router.h" +#include "spectrum_span.h" + +struct mlxsw_sp_fib; +struct mlxsw_sp_vr; +struct mlxsw_sp_lpm_tree; +struct mlxsw_sp_rif_ops; + +struct mlxsw_sp_rif { + struct list_head nexthop_list; + struct list_head neigh_list; + struct net_device *dev; /* NULL for underlay RIF */ + struct mlxsw_sp_fid *fid; + unsigned char addr[ETH_ALEN]; + int mtu; + u16 rif_index; + u8 mac_profile_id; + u16 vr_id; + const struct mlxsw_sp_rif_ops *ops; + struct mlxsw_sp *mlxsw_sp; + + unsigned int counter_ingress; + bool counter_ingress_valid; + unsigned int counter_egress; + bool counter_egress_valid; +}; + +struct mlxsw_sp_rif_params { + struct net_device *dev; + union { + u16 system_port; + u16 lag_id; + }; + u16 vid; + bool lag; +}; + +struct mlxsw_sp_rif_subport { + struct mlxsw_sp_rif common; + refcount_t ref_count; + union { + u16 system_port; + u16 lag_id; + }; + u16 vid; + bool lag; +}; + +struct mlxsw_sp_rif_ipip_lb { + struct mlxsw_sp_rif common; + struct mlxsw_sp_rif_ipip_lb_config lb_config; + u16 ul_vr_id; /* Reserved for Spectrum-2. */ + u16 ul_rif_id; /* Reserved for Spectrum. */ +}; + +struct mlxsw_sp_rif_params_ipip_lb { + struct mlxsw_sp_rif_params common; + struct mlxsw_sp_rif_ipip_lb_config lb_config; +}; + +struct mlxsw_sp_rif_ops { + enum mlxsw_sp_rif_type type; + size_t rif_size; + + void (*setup)(struct mlxsw_sp_rif *rif, + const struct mlxsw_sp_rif_params *params); + int (*configure)(struct mlxsw_sp_rif *rif, + struct netlink_ext_ack *extack); + void (*deconfigure)(struct mlxsw_sp_rif *rif); + struct mlxsw_sp_fid * (*fid_get)(struct mlxsw_sp_rif *rif, + struct netlink_ext_ack *extack); + void (*fdb_del)(struct mlxsw_sp_rif *rif, const char *mac); +}; + +struct mlxsw_sp_rif_mac_profile { + unsigned char mac_prefix[ETH_ALEN]; + refcount_t ref_count; + u8 id; +}; + +struct mlxsw_sp_router_ops { + int (*init)(struct mlxsw_sp *mlxsw_sp); + int (*ipips_init)(struct mlxsw_sp *mlxsw_sp); +}; + +static struct mlxsw_sp_rif * +mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp, + const struct net_device *dev); +static void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif); +static void mlxsw_sp_lpm_tree_hold(struct mlxsw_sp_lpm_tree *lpm_tree); +static void mlxsw_sp_lpm_tree_put(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_lpm_tree *lpm_tree); +static int mlxsw_sp_vr_lpm_tree_bind(struct mlxsw_sp *mlxsw_sp, + const struct mlxsw_sp_fib *fib, + u8 tree_id); +static int mlxsw_sp_vr_lpm_tree_unbind(struct mlxsw_sp *mlxsw_sp, + const struct mlxsw_sp_fib *fib); + +static unsigned int * +mlxsw_sp_rif_p_counter_get(struct mlxsw_sp_rif *rif, + enum mlxsw_sp_rif_counter_dir dir) +{ + switch (dir) { + case MLXSW_SP_RIF_COUNTER_EGRESS: + return &rif->counter_egress; + case MLXSW_SP_RIF_COUNTER_INGRESS: + return &rif->counter_ingress; + } + return NULL; +} + +static bool +mlxsw_sp_rif_counter_valid_get(struct mlxsw_sp_rif *rif, + enum mlxsw_sp_rif_counter_dir dir) +{ + switch (dir) { + case MLXSW_SP_RIF_COUNTER_EGRESS: + return rif->counter_egress_valid; + case MLXSW_SP_RIF_COUNTER_INGRESS: + return rif->counter_ingress_valid; + } + return false; +} + +static void +mlxsw_sp_rif_counter_valid_set(struct mlxsw_sp_rif *rif, + enum mlxsw_sp_rif_counter_dir dir, + bool valid) +{ + switch (dir) { + case MLXSW_SP_RIF_COUNTER_EGRESS: + rif->counter_egress_valid = valid; + break; + case MLXSW_SP_RIF_COUNTER_INGRESS: + rif->counter_ingress_valid = valid; + break; + } +} + +static int mlxsw_sp_rif_counter_edit(struct mlxsw_sp *mlxsw_sp, u16 rif_index, + unsigned int counter_index, bool enable, + enum mlxsw_sp_rif_counter_dir dir) +{ + char ritr_pl[MLXSW_REG_RITR_LEN]; + bool is_egress = false; + int err; + + if (dir == MLXSW_SP_RIF_COUNTER_EGRESS) + is_egress = true; + mlxsw_reg_ritr_rif_pack(ritr_pl, rif_index); + err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl); + if (err) + return err; + + mlxsw_reg_ritr_counter_pack(ritr_pl, counter_index, enable, + is_egress); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl); +} + +int mlxsw_sp_rif_counter_value_get(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_rif *rif, + enum mlxsw_sp_rif_counter_dir dir, u64 *cnt) +{ + char ricnt_pl[MLXSW_REG_RICNT_LEN]; + unsigned int *p_counter_index; + bool valid; + int err; + + valid = mlxsw_sp_rif_counter_valid_get(rif, dir); + if (!valid) + return -EINVAL; + + p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir); + if (!p_counter_index) + return -EINVAL; + mlxsw_reg_ricnt_pack(ricnt_pl, *p_counter_index, + MLXSW_REG_RICNT_OPCODE_NOP); + err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ricnt), ricnt_pl); + if (err) + return err; + *cnt = mlxsw_reg_ricnt_good_unicast_packets_get(ricnt_pl); + return 0; +} + +struct mlxsw_sp_rif_counter_set_basic { + u64 good_unicast_packets; + u64 good_multicast_packets; + u64 good_broadcast_packets; + u64 good_unicast_bytes; + u64 good_multicast_bytes; + u64 good_broadcast_bytes; + u64 error_packets; + u64 discard_packets; + u64 error_bytes; + u64 discard_bytes; +}; + +static int +mlxsw_sp_rif_counter_fetch_clear(struct mlxsw_sp_rif *rif, + enum mlxsw_sp_rif_counter_dir dir, + struct mlxsw_sp_rif_counter_set_basic *set) +{ + struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp; + char ricnt_pl[MLXSW_REG_RICNT_LEN]; + unsigned int *p_counter_index; + int err; + + if (!mlxsw_sp_rif_counter_valid_get(rif, dir)) + return -EINVAL; + + p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir); + if (!p_counter_index) + return -EINVAL; + + mlxsw_reg_ricnt_pack(ricnt_pl, *p_counter_index, + MLXSW_REG_RICNT_OPCODE_CLEAR); + err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ricnt), ricnt_pl); + if (err) + return err; + + if (!set) + return 0; + +#define MLXSW_SP_RIF_COUNTER_EXTRACT(NAME) \ + (set->NAME = mlxsw_reg_ricnt_ ## NAME ## _get(ricnt_pl)) + + MLXSW_SP_RIF_COUNTER_EXTRACT(good_unicast_packets); + MLXSW_SP_RIF_COUNTER_EXTRACT(good_multicast_packets); + MLXSW_SP_RIF_COUNTER_EXTRACT(good_broadcast_packets); + MLXSW_SP_RIF_COUNTER_EXTRACT(good_unicast_bytes); + MLXSW_SP_RIF_COUNTER_EXTRACT(good_multicast_bytes); + MLXSW_SP_RIF_COUNTER_EXTRACT(good_broadcast_bytes); + MLXSW_SP_RIF_COUNTER_EXTRACT(error_packets); + MLXSW_SP_RIF_COUNTER_EXTRACT(discard_packets); + MLXSW_SP_RIF_COUNTER_EXTRACT(error_bytes); + MLXSW_SP_RIF_COUNTER_EXTRACT(discard_bytes); + +#undef MLXSW_SP_RIF_COUNTER_EXTRACT + + return 0; +} + +static int mlxsw_sp_rif_counter_clear(struct mlxsw_sp *mlxsw_sp, + unsigned int counter_index) +{ + char ricnt_pl[MLXSW_REG_RICNT_LEN]; + + mlxsw_reg_ricnt_pack(ricnt_pl, counter_index, + MLXSW_REG_RICNT_OPCODE_CLEAR); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ricnt), ricnt_pl); +} + +int mlxsw_sp_rif_counter_alloc(struct mlxsw_sp_rif *rif, + enum mlxsw_sp_rif_counter_dir dir) +{ + struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp; + unsigned int *p_counter_index; + int err; + + if (mlxsw_sp_rif_counter_valid_get(rif, dir)) + return 0; + + p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir); + if (!p_counter_index) + return -EINVAL; + + err = mlxsw_sp_counter_alloc(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_RIF, + p_counter_index); + if (err) + return err; + + err = mlxsw_sp_rif_counter_clear(mlxsw_sp, *p_counter_index); + if (err) + goto err_counter_clear; + + err = mlxsw_sp_rif_counter_edit(mlxsw_sp, rif->rif_index, + *p_counter_index, true, dir); + if (err) + goto err_counter_edit; + mlxsw_sp_rif_counter_valid_set(rif, dir, true); + return 0; + +err_counter_edit: +err_counter_clear: + mlxsw_sp_counter_free(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_RIF, + *p_counter_index); + return err; +} + +void mlxsw_sp_rif_counter_free(struct mlxsw_sp_rif *rif, + enum mlxsw_sp_rif_counter_dir dir) +{ + struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp; + unsigned int *p_counter_index; + + if (!mlxsw_sp_rif_counter_valid_get(rif, dir)) + return; + + p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir); + if (WARN_ON(!p_counter_index)) + return; + mlxsw_sp_rif_counter_edit(mlxsw_sp, rif->rif_index, + *p_counter_index, false, dir); + mlxsw_sp_counter_free(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_RIF, + *p_counter_index); + mlxsw_sp_rif_counter_valid_set(rif, dir, false); +} + +static void mlxsw_sp_rif_counters_alloc(struct mlxsw_sp_rif *rif) +{ + struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp; + struct devlink *devlink; + + devlink = priv_to_devlink(mlxsw_sp->core); + if (!devlink_dpipe_table_counter_enabled(devlink, + MLXSW_SP_DPIPE_TABLE_NAME_ERIF)) + return; + mlxsw_sp_rif_counter_alloc(rif, MLXSW_SP_RIF_COUNTER_EGRESS); +} + +static void mlxsw_sp_rif_counters_free(struct mlxsw_sp_rif *rif) +{ + mlxsw_sp_rif_counter_free(rif, MLXSW_SP_RIF_COUNTER_EGRESS); +} + +#define MLXSW_SP_PREFIX_COUNT (sizeof(struct in6_addr) * BITS_PER_BYTE + 1) + +struct mlxsw_sp_prefix_usage { + DECLARE_BITMAP(b, MLXSW_SP_PREFIX_COUNT); +}; + +#define mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) \ + for_each_set_bit(prefix, (prefix_usage)->b, MLXSW_SP_PREFIX_COUNT) + +static bool +mlxsw_sp_prefix_usage_eq(struct mlxsw_sp_prefix_usage *prefix_usage1, + struct mlxsw_sp_prefix_usage *prefix_usage2) +{ + return !memcmp(prefix_usage1, prefix_usage2, sizeof(*prefix_usage1)); +} + +static void +mlxsw_sp_prefix_usage_cpy(struct mlxsw_sp_prefix_usage *prefix_usage1, + struct mlxsw_sp_prefix_usage *prefix_usage2) +{ + memcpy(prefix_usage1, prefix_usage2, sizeof(*prefix_usage1)); +} + +static void +mlxsw_sp_prefix_usage_set(struct mlxsw_sp_prefix_usage *prefix_usage, + unsigned char prefix_len) +{ + set_bit(prefix_len, prefix_usage->b); +} + +static void +mlxsw_sp_prefix_usage_clear(struct mlxsw_sp_prefix_usage *prefix_usage, + unsigned char prefix_len) +{ + clear_bit(prefix_len, prefix_usage->b); +} + +struct mlxsw_sp_fib_key { + unsigned char addr[sizeof(struct in6_addr)]; + unsigned char prefix_len; +}; + +enum mlxsw_sp_fib_entry_type { + MLXSW_SP_FIB_ENTRY_TYPE_REMOTE, + MLXSW_SP_FIB_ENTRY_TYPE_LOCAL, + MLXSW_SP_FIB_ENTRY_TYPE_TRAP, + MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE, + MLXSW_SP_FIB_ENTRY_TYPE_UNREACHABLE, + + /* This is a special case of local delivery, where a packet should be + * decapsulated on reception. Note that there is no corresponding ENCAP, + * because that's a type of next hop, not of FIB entry. (There can be + * several next hops in a REMOTE entry, and some of them may be + * encapsulating entries.) + */ + MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP, + MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP, +}; + +struct mlxsw_sp_nexthop_group_info; +struct mlxsw_sp_nexthop_group; +struct mlxsw_sp_fib_entry; + +struct mlxsw_sp_fib_node { + struct mlxsw_sp_fib_entry *fib_entry; + struct list_head list; + struct rhash_head ht_node; + struct mlxsw_sp_fib *fib; + struct mlxsw_sp_fib_key key; +}; + +struct mlxsw_sp_fib_entry_decap { + struct mlxsw_sp_ipip_entry *ipip_entry; + u32 tunnel_index; +}; + +struct mlxsw_sp_fib_entry { + struct mlxsw_sp_fib_node *fib_node; + enum mlxsw_sp_fib_entry_type type; + struct list_head nexthop_group_node; + struct mlxsw_sp_nexthop_group *nh_group; + struct mlxsw_sp_fib_entry_decap decap; /* Valid for decap entries. */ +}; + +struct mlxsw_sp_fib4_entry { + struct mlxsw_sp_fib_entry common; + struct fib_info *fi; + u32 tb_id; + dscp_t dscp; + u8 type; +}; + +struct mlxsw_sp_fib6_entry { + struct mlxsw_sp_fib_entry common; + struct list_head rt6_list; + unsigned int nrt6; +}; + +struct mlxsw_sp_rt6 { + struct list_head list; + struct fib6_info *rt; +}; + +struct mlxsw_sp_lpm_tree { + u8 id; /* tree ID */ + unsigned int ref_count; + enum mlxsw_sp_l3proto proto; + unsigned long prefix_ref_count[MLXSW_SP_PREFIX_COUNT]; + struct mlxsw_sp_prefix_usage prefix_usage; +}; + +struct mlxsw_sp_fib { + struct rhashtable ht; + struct list_head node_list; + struct mlxsw_sp_vr *vr; + struct mlxsw_sp_lpm_tree *lpm_tree; + enum mlxsw_sp_l3proto proto; +}; + +struct mlxsw_sp_vr { + u16 id; /* virtual router ID */ + u32 tb_id; /* kernel fib table id */ + unsigned int rif_count; + struct mlxsw_sp_fib *fib4; + struct mlxsw_sp_fib *fib6; + struct mlxsw_sp_mr_table *mr_table[MLXSW_SP_L3_PROTO_MAX]; + struct mlxsw_sp_rif *ul_rif; + refcount_t ul_rif_refcnt; +}; + +static const struct rhashtable_params mlxsw_sp_fib_ht_params; + +static struct mlxsw_sp_fib *mlxsw_sp_fib_create(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_vr *vr, + enum mlxsw_sp_l3proto proto) +{ + struct mlxsw_sp_lpm_tree *lpm_tree; + struct mlxsw_sp_fib *fib; + int err; + + lpm_tree = mlxsw_sp->router->lpm.proto_trees[proto]; + fib = kzalloc(sizeof(*fib), GFP_KERNEL); + if (!fib) + return ERR_PTR(-ENOMEM); + err = rhashtable_init(&fib->ht, &mlxsw_sp_fib_ht_params); + if (err) + goto err_rhashtable_init; + INIT_LIST_HEAD(&fib->node_list); + fib->proto = proto; + fib->vr = vr; + fib->lpm_tree = lpm_tree; + mlxsw_sp_lpm_tree_hold(lpm_tree); + err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, fib, lpm_tree->id); + if (err) + goto err_lpm_tree_bind; + return fib; + +err_lpm_tree_bind: + mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree); +err_rhashtable_init: + kfree(fib); + return ERR_PTR(err); +} + +static void mlxsw_sp_fib_destroy(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib *fib) +{ + mlxsw_sp_vr_lpm_tree_unbind(mlxsw_sp, fib); + mlxsw_sp_lpm_tree_put(mlxsw_sp, fib->lpm_tree); + WARN_ON(!list_empty(&fib->node_list)); + rhashtable_destroy(&fib->ht); + kfree(fib); +} + +static struct mlxsw_sp_lpm_tree * +mlxsw_sp_lpm_tree_find_unused(struct mlxsw_sp *mlxsw_sp) +{ + static struct mlxsw_sp_lpm_tree *lpm_tree; + int i; + + for (i = 0; i < mlxsw_sp->router->lpm.tree_count; i++) { + lpm_tree = &mlxsw_sp->router->lpm.trees[i]; + if (lpm_tree->ref_count == 0) + return lpm_tree; + } + return NULL; +} + +static int mlxsw_sp_lpm_tree_alloc(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_lpm_tree *lpm_tree) +{ + char ralta_pl[MLXSW_REG_RALTA_LEN]; + + mlxsw_reg_ralta_pack(ralta_pl, true, + (enum mlxsw_reg_ralxx_protocol) lpm_tree->proto, + lpm_tree->id); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl); +} + +static void mlxsw_sp_lpm_tree_free(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_lpm_tree *lpm_tree) +{ + char ralta_pl[MLXSW_REG_RALTA_LEN]; + + mlxsw_reg_ralta_pack(ralta_pl, false, + (enum mlxsw_reg_ralxx_protocol) lpm_tree->proto, + lpm_tree->id); + mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl); +} + +static int +mlxsw_sp_lpm_tree_left_struct_set(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_prefix_usage *prefix_usage, + struct mlxsw_sp_lpm_tree *lpm_tree) +{ + char ralst_pl[MLXSW_REG_RALST_LEN]; + u8 root_bin = 0; + u8 prefix; + u8 last_prefix = MLXSW_REG_RALST_BIN_NO_CHILD; + + mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) + root_bin = prefix; + + mlxsw_reg_ralst_pack(ralst_pl, root_bin, lpm_tree->id); + mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) { + if (prefix == 0) + continue; + mlxsw_reg_ralst_bin_pack(ralst_pl, prefix, last_prefix, + MLXSW_REG_RALST_BIN_NO_CHILD); + last_prefix = prefix; + } + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralst), ralst_pl); +} + +static struct mlxsw_sp_lpm_tree * +mlxsw_sp_lpm_tree_create(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_prefix_usage *prefix_usage, + enum mlxsw_sp_l3proto proto) +{ + struct mlxsw_sp_lpm_tree *lpm_tree; + int err; + + lpm_tree = mlxsw_sp_lpm_tree_find_unused(mlxsw_sp); + if (!lpm_tree) + return ERR_PTR(-EBUSY); + lpm_tree->proto = proto; + err = mlxsw_sp_lpm_tree_alloc(mlxsw_sp, lpm_tree); + if (err) + return ERR_PTR(err); + + err = mlxsw_sp_lpm_tree_left_struct_set(mlxsw_sp, prefix_usage, + lpm_tree); + if (err) + goto err_left_struct_set; + memcpy(&lpm_tree->prefix_usage, prefix_usage, + sizeof(lpm_tree->prefix_usage)); + memset(&lpm_tree->prefix_ref_count, 0, + sizeof(lpm_tree->prefix_ref_count)); + lpm_tree->ref_count = 1; + return lpm_tree; + +err_left_struct_set: + mlxsw_sp_lpm_tree_free(mlxsw_sp, lpm_tree); + return ERR_PTR(err); +} + +static void mlxsw_sp_lpm_tree_destroy(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_lpm_tree *lpm_tree) +{ + mlxsw_sp_lpm_tree_free(mlxsw_sp, lpm_tree); +} + +static struct mlxsw_sp_lpm_tree * +mlxsw_sp_lpm_tree_get(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_prefix_usage *prefix_usage, + enum mlxsw_sp_l3proto proto) +{ + struct mlxsw_sp_lpm_tree *lpm_tree; + int i; + + for (i = 0; i < mlxsw_sp->router->lpm.tree_count; i++) { + lpm_tree = &mlxsw_sp->router->lpm.trees[i]; + if (lpm_tree->ref_count != 0 && + lpm_tree->proto == proto && + mlxsw_sp_prefix_usage_eq(&lpm_tree->prefix_usage, + prefix_usage)) { + mlxsw_sp_lpm_tree_hold(lpm_tree); + return lpm_tree; + } + } + return mlxsw_sp_lpm_tree_create(mlxsw_sp, prefix_usage, proto); +} + +static void mlxsw_sp_lpm_tree_hold(struct mlxsw_sp_lpm_tree *lpm_tree) +{ + lpm_tree->ref_count++; +} + +static void mlxsw_sp_lpm_tree_put(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_lpm_tree *lpm_tree) +{ + if (--lpm_tree->ref_count == 0) + mlxsw_sp_lpm_tree_destroy(mlxsw_sp, lpm_tree); +} + +#define MLXSW_SP_LPM_TREE_MIN 1 /* tree 0 is reserved */ + +static int mlxsw_sp_lpm_init(struct mlxsw_sp *mlxsw_sp) +{ + struct mlxsw_sp_prefix_usage req_prefix_usage = {{ 0 } }; + struct mlxsw_sp_lpm_tree *lpm_tree; + u64 max_trees; + int err, i; + + if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_LPM_TREES)) + return -EIO; + + max_trees = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_LPM_TREES); + mlxsw_sp->router->lpm.tree_count = max_trees - MLXSW_SP_LPM_TREE_MIN; + mlxsw_sp->router->lpm.trees = kcalloc(mlxsw_sp->router->lpm.tree_count, + sizeof(struct mlxsw_sp_lpm_tree), + GFP_KERNEL); + if (!mlxsw_sp->router->lpm.trees) + return -ENOMEM; + + for (i = 0; i < mlxsw_sp->router->lpm.tree_count; i++) { + lpm_tree = &mlxsw_sp->router->lpm.trees[i]; + lpm_tree->id = i + MLXSW_SP_LPM_TREE_MIN; + } + + lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage, + MLXSW_SP_L3_PROTO_IPV4); + if (IS_ERR(lpm_tree)) { + err = PTR_ERR(lpm_tree); + goto err_ipv4_tree_get; + } + mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV4] = lpm_tree; + + lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage, + MLXSW_SP_L3_PROTO_IPV6); + if (IS_ERR(lpm_tree)) { + err = PTR_ERR(lpm_tree); + goto err_ipv6_tree_get; + } + mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV6] = lpm_tree; + + return 0; + +err_ipv6_tree_get: + lpm_tree = mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV4]; + mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree); +err_ipv4_tree_get: + kfree(mlxsw_sp->router->lpm.trees); + return err; +} + +static void mlxsw_sp_lpm_fini(struct mlxsw_sp *mlxsw_sp) +{ + struct mlxsw_sp_lpm_tree *lpm_tree; + + lpm_tree = mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV6]; + mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree); + + lpm_tree = mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV4]; + mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree); + + kfree(mlxsw_sp->router->lpm.trees); +} + +static bool mlxsw_sp_vr_is_used(const struct mlxsw_sp_vr *vr) +{ + return !!vr->fib4 || !!vr->fib6 || + !!vr->mr_table[MLXSW_SP_L3_PROTO_IPV4] || + !!vr->mr_table[MLXSW_SP_L3_PROTO_IPV6]; +} + +static struct mlxsw_sp_vr *mlxsw_sp_vr_find_unused(struct mlxsw_sp *mlxsw_sp) +{ + struct mlxsw_sp_vr *vr; + int i; + + for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) { + vr = &mlxsw_sp->router->vrs[i]; + if (!mlxsw_sp_vr_is_used(vr)) + return vr; + } + return NULL; +} + +static int mlxsw_sp_vr_lpm_tree_bind(struct mlxsw_sp *mlxsw_sp, + const struct mlxsw_sp_fib *fib, u8 tree_id) +{ + char raltb_pl[MLXSW_REG_RALTB_LEN]; + + mlxsw_reg_raltb_pack(raltb_pl, fib->vr->id, + (enum mlxsw_reg_ralxx_protocol) fib->proto, + tree_id); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl); +} + +static int mlxsw_sp_vr_lpm_tree_unbind(struct mlxsw_sp *mlxsw_sp, + const struct mlxsw_sp_fib *fib) +{ + char raltb_pl[MLXSW_REG_RALTB_LEN]; + + /* Bind to tree 0 which is default */ + mlxsw_reg_raltb_pack(raltb_pl, fib->vr->id, + (enum mlxsw_reg_ralxx_protocol) fib->proto, 0); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl); +} + +static u32 mlxsw_sp_fix_tb_id(u32 tb_id) +{ + /* For our purpose, squash main, default and local tables into one */ + if (tb_id == RT_TABLE_LOCAL || tb_id == RT_TABLE_DEFAULT) + tb_id = RT_TABLE_MAIN; + return tb_id; +} + +static struct mlxsw_sp_vr *mlxsw_sp_vr_find(struct mlxsw_sp *mlxsw_sp, + u32 tb_id) +{ + struct mlxsw_sp_vr *vr; + int i; + + tb_id = mlxsw_sp_fix_tb_id(tb_id); + + for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) { + vr = &mlxsw_sp->router->vrs[i]; + if (mlxsw_sp_vr_is_used(vr) && vr->tb_id == tb_id) + return vr; + } + return NULL; +} + +int mlxsw_sp_router_tb_id_vr_id(struct mlxsw_sp *mlxsw_sp, u32 tb_id, + u16 *vr_id) +{ + struct mlxsw_sp_vr *vr; + int err = 0; + + mutex_lock(&mlxsw_sp->router->lock); + vr = mlxsw_sp_vr_find(mlxsw_sp, tb_id); + if (!vr) { + err = -ESRCH; + goto out; + } + *vr_id = vr->id; +out: + mutex_unlock(&mlxsw_sp->router->lock); + return err; +} + +static struct mlxsw_sp_fib *mlxsw_sp_vr_fib(const struct mlxsw_sp_vr *vr, + enum mlxsw_sp_l3proto proto) +{ + switch (proto) { + case MLXSW_SP_L3_PROTO_IPV4: + return vr->fib4; + case MLXSW_SP_L3_PROTO_IPV6: + return vr->fib6; + } + return NULL; +} + +static struct mlxsw_sp_vr *mlxsw_sp_vr_create(struct mlxsw_sp *mlxsw_sp, + u32 tb_id, + struct netlink_ext_ack *extack) +{ + struct mlxsw_sp_mr_table *mr4_table, *mr6_table; + struct mlxsw_sp_fib *fib4; + struct mlxsw_sp_fib *fib6; + struct mlxsw_sp_vr *vr; + int err; + + vr = mlxsw_sp_vr_find_unused(mlxsw_sp); + if (!vr) { + NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported virtual routers"); + return ERR_PTR(-EBUSY); + } + fib4 = mlxsw_sp_fib_create(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV4); + if (IS_ERR(fib4)) + return ERR_CAST(fib4); + fib6 = mlxsw_sp_fib_create(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV6); + if (IS_ERR(fib6)) { + err = PTR_ERR(fib6); + goto err_fib6_create; + } + mr4_table = mlxsw_sp_mr_table_create(mlxsw_sp, vr->id, + MLXSW_SP_L3_PROTO_IPV4); + if (IS_ERR(mr4_table)) { + err = PTR_ERR(mr4_table); + goto err_mr4_table_create; + } + mr6_table = mlxsw_sp_mr_table_create(mlxsw_sp, vr->id, + MLXSW_SP_L3_PROTO_IPV6); + if (IS_ERR(mr6_table)) { + err = PTR_ERR(mr6_table); + goto err_mr6_table_create; + } + + vr->fib4 = fib4; + vr->fib6 = fib6; + vr->mr_table[MLXSW_SP_L3_PROTO_IPV4] = mr4_table; + vr->mr_table[MLXSW_SP_L3_PROTO_IPV6] = mr6_table; + vr->tb_id = tb_id; + return vr; + +err_mr6_table_create: + mlxsw_sp_mr_table_destroy(mr4_table); +err_mr4_table_create: + mlxsw_sp_fib_destroy(mlxsw_sp, fib6); +err_fib6_create: + mlxsw_sp_fib_destroy(mlxsw_sp, fib4); + return ERR_PTR(err); +} + +static void mlxsw_sp_vr_destroy(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_vr *vr) +{ + mlxsw_sp_mr_table_destroy(vr->mr_table[MLXSW_SP_L3_PROTO_IPV6]); + vr->mr_table[MLXSW_SP_L3_PROTO_IPV6] = NULL; + mlxsw_sp_mr_table_destroy(vr->mr_table[MLXSW_SP_L3_PROTO_IPV4]); + vr->mr_table[MLXSW_SP_L3_PROTO_IPV4] = NULL; + mlxsw_sp_fib_destroy(mlxsw_sp, vr->fib6); + vr->fib6 = NULL; + mlxsw_sp_fib_destroy(mlxsw_sp, vr->fib4); + vr->fib4 = NULL; +} + +static struct mlxsw_sp_vr *mlxsw_sp_vr_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id, + struct netlink_ext_ack *extack) +{ + struct mlxsw_sp_vr *vr; + + tb_id = mlxsw_sp_fix_tb_id(tb_id); + vr = mlxsw_sp_vr_find(mlxsw_sp, tb_id); + if (!vr) + vr = mlxsw_sp_vr_create(mlxsw_sp, tb_id, extack); + return vr; +} + +static void mlxsw_sp_vr_put(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_vr *vr) +{ + if (!vr->rif_count && list_empty(&vr->fib4->node_list) && + list_empty(&vr->fib6->node_list) && + mlxsw_sp_mr_table_empty(vr->mr_table[MLXSW_SP_L3_PROTO_IPV4]) && + mlxsw_sp_mr_table_empty(vr->mr_table[MLXSW_SP_L3_PROTO_IPV6])) + mlxsw_sp_vr_destroy(mlxsw_sp, vr); +} + +static bool +mlxsw_sp_vr_lpm_tree_should_replace(struct mlxsw_sp_vr *vr, + enum mlxsw_sp_l3proto proto, u8 tree_id) +{ + struct mlxsw_sp_fib *fib = mlxsw_sp_vr_fib(vr, proto); + + if (!mlxsw_sp_vr_is_used(vr)) + return false; + if (fib->lpm_tree->id == tree_id) + return true; + return false; +} + +static int mlxsw_sp_vr_lpm_tree_replace(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib *fib, + struct mlxsw_sp_lpm_tree *new_tree) +{ + struct mlxsw_sp_lpm_tree *old_tree = fib->lpm_tree; + int err; + + fib->lpm_tree = new_tree; + mlxsw_sp_lpm_tree_hold(new_tree); + err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, fib, new_tree->id); + if (err) + goto err_tree_bind; + mlxsw_sp_lpm_tree_put(mlxsw_sp, old_tree); + return 0; + +err_tree_bind: + mlxsw_sp_lpm_tree_put(mlxsw_sp, new_tree); + fib->lpm_tree = old_tree; + return err; +} + +static int mlxsw_sp_vrs_lpm_tree_replace(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib *fib, + struct mlxsw_sp_lpm_tree *new_tree) +{ + enum mlxsw_sp_l3proto proto = fib->proto; + struct mlxsw_sp_lpm_tree *old_tree; + u8 old_id, new_id = new_tree->id; + struct mlxsw_sp_vr *vr; + int i, err; + + old_tree = mlxsw_sp->router->lpm.proto_trees[proto]; + old_id = old_tree->id; + + for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) { + vr = &mlxsw_sp->router->vrs[i]; + if (!mlxsw_sp_vr_lpm_tree_should_replace(vr, proto, old_id)) + continue; + err = mlxsw_sp_vr_lpm_tree_replace(mlxsw_sp, + mlxsw_sp_vr_fib(vr, proto), + new_tree); + if (err) + goto err_tree_replace; + } + + memcpy(new_tree->prefix_ref_count, old_tree->prefix_ref_count, + sizeof(new_tree->prefix_ref_count)); + mlxsw_sp->router->lpm.proto_trees[proto] = new_tree; + mlxsw_sp_lpm_tree_put(mlxsw_sp, old_tree); + + return 0; + +err_tree_replace: + for (i--; i >= 0; i--) { + if (!mlxsw_sp_vr_lpm_tree_should_replace(vr, proto, new_id)) + continue; + mlxsw_sp_vr_lpm_tree_replace(mlxsw_sp, + mlxsw_sp_vr_fib(vr, proto), + old_tree); + } + return err; +} + +static int mlxsw_sp_vrs_init(struct mlxsw_sp *mlxsw_sp) +{ + struct mlxsw_sp_vr *vr; + u64 max_vrs; + int i; + + if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_VRS)) + return -EIO; + + max_vrs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); + mlxsw_sp->router->vrs = kcalloc(max_vrs, sizeof(struct mlxsw_sp_vr), + GFP_KERNEL); + if (!mlxsw_sp->router->vrs) + return -ENOMEM; + + for (i = 0; i < max_vrs; i++) { + vr = &mlxsw_sp->router->vrs[i]; + vr->id = i; + } + + return 0; +} + +static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp); + +static void mlxsw_sp_vrs_fini(struct mlxsw_sp *mlxsw_sp) +{ + /* At this stage we're guaranteed not to have new incoming + * FIB notifications and the work queue is free from FIBs + * sitting on top of mlxsw netdevs. However, we can still + * have other FIBs queued. Flush the queue before flushing + * the device's tables. No need for locks, as we're the only + * writer. + */ + mlxsw_core_flush_owq(); + mlxsw_sp_router_fib_flush(mlxsw_sp); + kfree(mlxsw_sp->router->vrs); +} + +u32 mlxsw_sp_ipip_dev_ul_tb_id(const struct net_device *ol_dev) +{ + struct net_device *d; + u32 tb_id; + + rcu_read_lock(); + d = mlxsw_sp_ipip_netdev_ul_dev_get(ol_dev); + if (d) + tb_id = l3mdev_fib_table(d) ? : RT_TABLE_MAIN; + else + tb_id = RT_TABLE_MAIN; + rcu_read_unlock(); + + return tb_id; +} + +static struct mlxsw_sp_rif * +mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp, + const struct mlxsw_sp_rif_params *params, + struct netlink_ext_ack *extack); + +static struct mlxsw_sp_rif_ipip_lb * +mlxsw_sp_ipip_ol_ipip_lb_create(struct mlxsw_sp *mlxsw_sp, + enum mlxsw_sp_ipip_type ipipt, + struct net_device *ol_dev, + struct netlink_ext_ack *extack) +{ + struct mlxsw_sp_rif_params_ipip_lb lb_params; + const struct mlxsw_sp_ipip_ops *ipip_ops; + struct mlxsw_sp_rif *rif; + + ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipipt]; + lb_params = (struct mlxsw_sp_rif_params_ipip_lb) { + .common.dev = ol_dev, + .common.lag = false, + .lb_config = ipip_ops->ol_loopback_config(mlxsw_sp, ol_dev), + }; + + rif = mlxsw_sp_rif_create(mlxsw_sp, &lb_params.common, extack); + if (IS_ERR(rif)) + return ERR_CAST(rif); + return container_of(rif, struct mlxsw_sp_rif_ipip_lb, common); +} + +static struct mlxsw_sp_ipip_entry * +mlxsw_sp_ipip_entry_alloc(struct mlxsw_sp *mlxsw_sp, + enum mlxsw_sp_ipip_type ipipt, + struct net_device *ol_dev) +{ + const struct mlxsw_sp_ipip_ops *ipip_ops; + struct mlxsw_sp_ipip_entry *ipip_entry; + struct mlxsw_sp_ipip_entry *ret = NULL; + int err; + + ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipipt]; + ipip_entry = kzalloc(sizeof(*ipip_entry), GFP_KERNEL); + if (!ipip_entry) + return ERR_PTR(-ENOMEM); + + ipip_entry->ol_lb = mlxsw_sp_ipip_ol_ipip_lb_create(mlxsw_sp, ipipt, + ol_dev, NULL); + if (IS_ERR(ipip_entry->ol_lb)) { + ret = ERR_CAST(ipip_entry->ol_lb); + goto err_ol_ipip_lb_create; + } + + ipip_entry->ipipt = ipipt; + ipip_entry->ol_dev = ol_dev; + ipip_entry->parms = ipip_ops->parms_init(ol_dev); + + err = ipip_ops->rem_ip_addr_set(mlxsw_sp, ipip_entry); + if (err) { + ret = ERR_PTR(err); + goto err_rem_ip_addr_set; + } + + return ipip_entry; + +err_rem_ip_addr_set: + mlxsw_sp_rif_destroy(&ipip_entry->ol_lb->common); +err_ol_ipip_lb_create: + kfree(ipip_entry); + return ret; +} + +static void mlxsw_sp_ipip_entry_dealloc(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_ipip_entry *ipip_entry) +{ + const struct mlxsw_sp_ipip_ops *ipip_ops = + mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt]; + + ipip_ops->rem_ip_addr_unset(mlxsw_sp, ipip_entry); + mlxsw_sp_rif_destroy(&ipip_entry->ol_lb->common); + kfree(ipip_entry); +} + +static bool +mlxsw_sp_ipip_entry_saddr_matches(struct mlxsw_sp *mlxsw_sp, + const enum mlxsw_sp_l3proto ul_proto, + union mlxsw_sp_l3addr saddr, + u32 ul_tb_id, + struct mlxsw_sp_ipip_entry *ipip_entry) +{ + u32 tun_ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ipip_entry->ol_dev); + enum mlxsw_sp_ipip_type ipipt = ipip_entry->ipipt; + union mlxsw_sp_l3addr tun_saddr; + + if (mlxsw_sp->router->ipip_ops_arr[ipipt]->ul_proto != ul_proto) + return false; + + tun_saddr = mlxsw_sp_ipip_netdev_saddr(ul_proto, ipip_entry->ol_dev); + return tun_ul_tb_id == ul_tb_id && + mlxsw_sp_l3addr_eq(&tun_saddr, &saddr); +} + +static int mlxsw_sp_ipip_decap_parsing_depth_inc(struct mlxsw_sp *mlxsw_sp, + enum mlxsw_sp_ipip_type ipipt) +{ + const struct mlxsw_sp_ipip_ops *ipip_ops; + + ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipipt]; + + /* Not all tunnels require to increase the default pasing depth + * (96 bytes). + */ + if (ipip_ops->inc_parsing_depth) + return mlxsw_sp_parsing_depth_inc(mlxsw_sp); + + return 0; +} + +static void mlxsw_sp_ipip_decap_parsing_depth_dec(struct mlxsw_sp *mlxsw_sp, + enum mlxsw_sp_ipip_type ipipt) +{ + const struct mlxsw_sp_ipip_ops *ipip_ops = + mlxsw_sp->router->ipip_ops_arr[ipipt]; + + if (ipip_ops->inc_parsing_depth) + mlxsw_sp_parsing_depth_dec(mlxsw_sp); +} + +static int +mlxsw_sp_fib_entry_decap_init(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry, + struct mlxsw_sp_ipip_entry *ipip_entry) +{ + u32 tunnel_index; + int err; + + err = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ, + 1, &tunnel_index); + if (err) + return err; + + err = mlxsw_sp_ipip_decap_parsing_depth_inc(mlxsw_sp, + ipip_entry->ipipt); + if (err) + goto err_parsing_depth_inc; + + ipip_entry->decap_fib_entry = fib_entry; + fib_entry->decap.ipip_entry = ipip_entry; + fib_entry->decap.tunnel_index = tunnel_index; + + return 0; + +err_parsing_depth_inc: + mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ, 1, + fib_entry->decap.tunnel_index); + return err; +} + +static void mlxsw_sp_fib_entry_decap_fini(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry) +{ + enum mlxsw_sp_ipip_type ipipt = fib_entry->decap.ipip_entry->ipipt; + + /* Unlink this node from the IPIP entry that it's the decap entry of. */ + fib_entry->decap.ipip_entry->decap_fib_entry = NULL; + fib_entry->decap.ipip_entry = NULL; + mlxsw_sp_ipip_decap_parsing_depth_dec(mlxsw_sp, ipipt); + mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ, + 1, fib_entry->decap.tunnel_index); +} + +static struct mlxsw_sp_fib_node * +mlxsw_sp_fib_node_lookup(struct mlxsw_sp_fib *fib, const void *addr, + size_t addr_len, unsigned char prefix_len); +static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry); + +static void +mlxsw_sp_ipip_entry_demote_decap(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_ipip_entry *ipip_entry) +{ + struct mlxsw_sp_fib_entry *fib_entry = ipip_entry->decap_fib_entry; + + mlxsw_sp_fib_entry_decap_fini(mlxsw_sp, fib_entry); + fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP; + + mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry); +} + +static void +mlxsw_sp_ipip_entry_promote_decap(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_ipip_entry *ipip_entry, + struct mlxsw_sp_fib_entry *decap_fib_entry) +{ + if (mlxsw_sp_fib_entry_decap_init(mlxsw_sp, decap_fib_entry, + ipip_entry)) + return; + decap_fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP; + + if (mlxsw_sp_fib_entry_update(mlxsw_sp, decap_fib_entry)) + mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry); +} + +static struct mlxsw_sp_fib_entry * +mlxsw_sp_router_ip2me_fib_entry_find(struct mlxsw_sp *mlxsw_sp, u32 tb_id, + enum mlxsw_sp_l3proto proto, + const union mlxsw_sp_l3addr *addr, + enum mlxsw_sp_fib_entry_type type) +{ + struct mlxsw_sp_fib_node *fib_node; + unsigned char addr_prefix_len; + struct mlxsw_sp_fib *fib; + struct mlxsw_sp_vr *vr; + const void *addrp; + size_t addr_len; + u32 addr4; + + vr = mlxsw_sp_vr_find(mlxsw_sp, tb_id); + if (!vr) + return NULL; + fib = mlxsw_sp_vr_fib(vr, proto); + + switch (proto) { + case MLXSW_SP_L3_PROTO_IPV4: + addr4 = be32_to_cpu(addr->addr4); + addrp = &addr4; + addr_len = 4; + addr_prefix_len = 32; + break; + case MLXSW_SP_L3_PROTO_IPV6: + addrp = &addr->addr6; + addr_len = 16; + addr_prefix_len = 128; + break; + default: + WARN_ON(1); + return NULL; + } + + fib_node = mlxsw_sp_fib_node_lookup(fib, addrp, addr_len, + addr_prefix_len); + if (!fib_node || fib_node->fib_entry->type != type) + return NULL; + + return fib_node->fib_entry; +} + +/* Given an IPIP entry, find the corresponding decap route. */ +static struct mlxsw_sp_fib_entry * +mlxsw_sp_ipip_entry_find_decap(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_ipip_entry *ipip_entry) +{ + static struct mlxsw_sp_fib_node *fib_node; + const struct mlxsw_sp_ipip_ops *ipip_ops; + unsigned char saddr_prefix_len; + union mlxsw_sp_l3addr saddr; + struct mlxsw_sp_fib *ul_fib; + struct mlxsw_sp_vr *ul_vr; + const void *saddrp; + size_t saddr_len; + u32 ul_tb_id; + u32 saddr4; + + ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt]; + + ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ipip_entry->ol_dev); + ul_vr = mlxsw_sp_vr_find(mlxsw_sp, ul_tb_id); + if (!ul_vr) + return NULL; + + ul_fib = mlxsw_sp_vr_fib(ul_vr, ipip_ops->ul_proto); + saddr = mlxsw_sp_ipip_netdev_saddr(ipip_ops->ul_proto, + ipip_entry->ol_dev); + + switch (ipip_ops->ul_proto) { + case MLXSW_SP_L3_PROTO_IPV4: + saddr4 = be32_to_cpu(saddr.addr4); + saddrp = &saddr4; + saddr_len = 4; + saddr_prefix_len = 32; + break; + case MLXSW_SP_L3_PROTO_IPV6: + saddrp = &saddr.addr6; + saddr_len = 16; + saddr_prefix_len = 128; + break; + default: + WARN_ON(1); + return NULL; + } + + fib_node = mlxsw_sp_fib_node_lookup(ul_fib, saddrp, saddr_len, + saddr_prefix_len); + if (!fib_node || + fib_node->fib_entry->type != MLXSW_SP_FIB_ENTRY_TYPE_TRAP) + return NULL; + + return fib_node->fib_entry; +} + +static struct mlxsw_sp_ipip_entry * +mlxsw_sp_ipip_entry_create(struct mlxsw_sp *mlxsw_sp, + enum mlxsw_sp_ipip_type ipipt, + struct net_device *ol_dev) +{ + struct mlxsw_sp_ipip_entry *ipip_entry; + + ipip_entry = mlxsw_sp_ipip_entry_alloc(mlxsw_sp, ipipt, ol_dev); + if (IS_ERR(ipip_entry)) + return ipip_entry; + + list_add_tail(&ipip_entry->ipip_list_node, + &mlxsw_sp->router->ipip_list); + + return ipip_entry; +} + +static void +mlxsw_sp_ipip_entry_destroy(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_ipip_entry *ipip_entry) +{ + list_del(&ipip_entry->ipip_list_node); + mlxsw_sp_ipip_entry_dealloc(mlxsw_sp, ipip_entry); +} + +static bool +mlxsw_sp_ipip_entry_matches_decap(struct mlxsw_sp *mlxsw_sp, + const struct net_device *ul_dev, + enum mlxsw_sp_l3proto ul_proto, + union mlxsw_sp_l3addr ul_dip, + struct mlxsw_sp_ipip_entry *ipip_entry) +{ + u32 ul_tb_id = l3mdev_fib_table(ul_dev) ? : RT_TABLE_MAIN; + enum mlxsw_sp_ipip_type ipipt = ipip_entry->ipipt; + + if (mlxsw_sp->router->ipip_ops_arr[ipipt]->ul_proto != ul_proto) + return false; + + return mlxsw_sp_ipip_entry_saddr_matches(mlxsw_sp, ul_proto, ul_dip, + ul_tb_id, ipip_entry); +} + +/* Given decap parameters, find the corresponding IPIP entry. */ +static struct mlxsw_sp_ipip_entry * +mlxsw_sp_ipip_entry_find_by_decap(struct mlxsw_sp *mlxsw_sp, int ul_dev_ifindex, + enum mlxsw_sp_l3proto ul_proto, + union mlxsw_sp_l3addr ul_dip) +{ + struct mlxsw_sp_ipip_entry *ipip_entry = NULL; + struct net_device *ul_dev; + + rcu_read_lock(); + + ul_dev = dev_get_by_index_rcu(mlxsw_sp_net(mlxsw_sp), ul_dev_ifindex); + if (!ul_dev) + goto out_unlock; + + list_for_each_entry(ipip_entry, &mlxsw_sp->router->ipip_list, + ipip_list_node) + if (mlxsw_sp_ipip_entry_matches_decap(mlxsw_sp, ul_dev, + ul_proto, ul_dip, + ipip_entry)) + goto out_unlock; + + rcu_read_unlock(); + + return NULL; + +out_unlock: + rcu_read_unlock(); + return ipip_entry; +} + +static bool mlxsw_sp_netdev_ipip_type(const struct mlxsw_sp *mlxsw_sp, + const struct net_device *dev, + enum mlxsw_sp_ipip_type *p_type) +{ + struct mlxsw_sp_router *router = mlxsw_sp->router; + const struct mlxsw_sp_ipip_ops *ipip_ops; + enum mlxsw_sp_ipip_type ipipt; + + for (ipipt = 0; ipipt < MLXSW_SP_IPIP_TYPE_MAX; ++ipipt) { + ipip_ops = router->ipip_ops_arr[ipipt]; + if (dev->type == ipip_ops->dev_type) { + if (p_type) + *p_type = ipipt; + return true; + } + } + return false; +} + +static bool mlxsw_sp_netdev_is_ipip_ol(const struct mlxsw_sp *mlxsw_sp, + const struct net_device *dev) +{ + return mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, NULL); +} + +static struct mlxsw_sp_ipip_entry * +mlxsw_sp_ipip_entry_find_by_ol_dev(struct mlxsw_sp *mlxsw_sp, + const struct net_device *ol_dev) +{ + struct mlxsw_sp_ipip_entry *ipip_entry; + + list_for_each_entry(ipip_entry, &mlxsw_sp->router->ipip_list, + ipip_list_node) + if (ipip_entry->ol_dev == ol_dev) + return ipip_entry; + + return NULL; +} + +static struct mlxsw_sp_ipip_entry * +mlxsw_sp_ipip_entry_find_by_ul_dev(const struct mlxsw_sp *mlxsw_sp, + const struct net_device *ul_dev, + struct mlxsw_sp_ipip_entry *start) +{ + struct mlxsw_sp_ipip_entry *ipip_entry; + + ipip_entry = list_prepare_entry(start, &mlxsw_sp->router->ipip_list, + ipip_list_node); + list_for_each_entry_continue(ipip_entry, &mlxsw_sp->router->ipip_list, + ipip_list_node) { + struct net_device *ol_dev = ipip_entry->ol_dev; + struct net_device *ipip_ul_dev; + + rcu_read_lock(); + ipip_ul_dev = mlxsw_sp_ipip_netdev_ul_dev_get(ol_dev); + rcu_read_unlock(); + + if (ipip_ul_dev == ul_dev) + return ipip_entry; + } + + return NULL; +} + +static bool mlxsw_sp_netdev_is_ipip_ul(struct mlxsw_sp *mlxsw_sp, + const struct net_device *dev) +{ + return mlxsw_sp_ipip_entry_find_by_ul_dev(mlxsw_sp, dev, NULL); +} + +static bool mlxsw_sp_netdevice_ipip_can_offload(struct mlxsw_sp *mlxsw_sp, + const struct net_device *ol_dev, + enum mlxsw_sp_ipip_type ipipt) +{ + const struct mlxsw_sp_ipip_ops *ops + = mlxsw_sp->router->ipip_ops_arr[ipipt]; + + return ops->can_offload(mlxsw_sp, ol_dev); +} + +static int mlxsw_sp_netdevice_ipip_ol_reg_event(struct mlxsw_sp *mlxsw_sp, + struct net_device *ol_dev) +{ + enum mlxsw_sp_ipip_type ipipt = MLXSW_SP_IPIP_TYPE_MAX; + struct mlxsw_sp_ipip_entry *ipip_entry; + enum mlxsw_sp_l3proto ul_proto; + union mlxsw_sp_l3addr saddr; + u32 ul_tb_id; + + mlxsw_sp_netdev_ipip_type(mlxsw_sp, ol_dev, &ipipt); + if (mlxsw_sp_netdevice_ipip_can_offload(mlxsw_sp, ol_dev, ipipt)) { + ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ol_dev); + ul_proto = mlxsw_sp->router->ipip_ops_arr[ipipt]->ul_proto; + saddr = mlxsw_sp_ipip_netdev_saddr(ul_proto, ol_dev); + if (!mlxsw_sp_ipip_demote_tunnel_by_saddr(mlxsw_sp, ul_proto, + saddr, ul_tb_id, + NULL)) { + ipip_entry = mlxsw_sp_ipip_entry_create(mlxsw_sp, ipipt, + ol_dev); + if (IS_ERR(ipip_entry)) + return PTR_ERR(ipip_entry); + } + } + + return 0; +} + +static void mlxsw_sp_netdevice_ipip_ol_unreg_event(struct mlxsw_sp *mlxsw_sp, + struct net_device *ol_dev) +{ + struct mlxsw_sp_ipip_entry *ipip_entry; + + ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev); + if (ipip_entry) + mlxsw_sp_ipip_entry_destroy(mlxsw_sp, ipip_entry); +} + +static void +mlxsw_sp_ipip_entry_ol_up_event(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_ipip_entry *ipip_entry) +{ + struct mlxsw_sp_fib_entry *decap_fib_entry; + + decap_fib_entry = mlxsw_sp_ipip_entry_find_decap(mlxsw_sp, ipip_entry); + if (decap_fib_entry) + mlxsw_sp_ipip_entry_promote_decap(mlxsw_sp, ipip_entry, + decap_fib_entry); +} + +static int +mlxsw_sp_rif_ipip_lb_op(struct mlxsw_sp_rif_ipip_lb *lb_rif, u16 ul_vr_id, + u16 ul_rif_id, bool enable) +{ + struct mlxsw_sp_rif_ipip_lb_config lb_cf = lb_rif->lb_config; + enum mlxsw_reg_ritr_loopback_ipip_options ipip_options; + struct mlxsw_sp_rif *rif = &lb_rif->common; + struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp; + char ritr_pl[MLXSW_REG_RITR_LEN]; + struct in6_addr *saddr6; + u32 saddr4; + + ipip_options = MLXSW_REG_RITR_LOOPBACK_IPIP_OPTIONS_GRE_KEY_PRESET; + switch (lb_cf.ul_protocol) { + case MLXSW_SP_L3_PROTO_IPV4: + saddr4 = be32_to_cpu(lb_cf.saddr.addr4); + mlxsw_reg_ritr_pack(ritr_pl, enable, MLXSW_REG_RITR_LOOPBACK_IF, + rif->rif_index, rif->vr_id, rif->dev->mtu); + mlxsw_reg_ritr_loopback_ipip4_pack(ritr_pl, lb_cf.lb_ipipt, + ipip_options, ul_vr_id, + ul_rif_id, saddr4, + lb_cf.okey); + break; + + case MLXSW_SP_L3_PROTO_IPV6: + saddr6 = &lb_cf.saddr.addr6; + mlxsw_reg_ritr_pack(ritr_pl, enable, MLXSW_REG_RITR_LOOPBACK_IF, + rif->rif_index, rif->vr_id, rif->dev->mtu); + mlxsw_reg_ritr_loopback_ipip6_pack(ritr_pl, lb_cf.lb_ipipt, + ipip_options, ul_vr_id, + ul_rif_id, saddr6, + lb_cf.okey); + break; + } + + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl); +} + +static int mlxsw_sp_netdevice_ipip_ol_update_mtu(struct mlxsw_sp *mlxsw_sp, + struct net_device *ol_dev) +{ + struct mlxsw_sp_ipip_entry *ipip_entry; + struct mlxsw_sp_rif_ipip_lb *lb_rif; + int err = 0; + + ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev); + if (ipip_entry) { + lb_rif = ipip_entry->ol_lb; + err = mlxsw_sp_rif_ipip_lb_op(lb_rif, lb_rif->ul_vr_id, + lb_rif->ul_rif_id, true); + if (err) + goto out; + lb_rif->common.mtu = ol_dev->mtu; + } + +out: + return err; +} + +static void mlxsw_sp_netdevice_ipip_ol_up_event(struct mlxsw_sp *mlxsw_sp, + struct net_device *ol_dev) +{ + struct mlxsw_sp_ipip_entry *ipip_entry; + + ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev); + if (ipip_entry) + mlxsw_sp_ipip_entry_ol_up_event(mlxsw_sp, ipip_entry); +} + +static void +mlxsw_sp_ipip_entry_ol_down_event(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_ipip_entry *ipip_entry) +{ + if (ipip_entry->decap_fib_entry) + mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry); +} + +static void mlxsw_sp_netdevice_ipip_ol_down_event(struct mlxsw_sp *mlxsw_sp, + struct net_device *ol_dev) +{ + struct mlxsw_sp_ipip_entry *ipip_entry; + + ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev); + if (ipip_entry) + mlxsw_sp_ipip_entry_ol_down_event(mlxsw_sp, ipip_entry); +} + +static void mlxsw_sp_nexthop_rif_migrate(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_rif *old_rif, + struct mlxsw_sp_rif *new_rif); +static int +mlxsw_sp_ipip_entry_ol_lb_update(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_ipip_entry *ipip_entry, + bool keep_encap, + struct netlink_ext_ack *extack) +{ + struct mlxsw_sp_rif_ipip_lb *old_lb_rif = ipip_entry->ol_lb; + struct mlxsw_sp_rif_ipip_lb *new_lb_rif; + + new_lb_rif = mlxsw_sp_ipip_ol_ipip_lb_create(mlxsw_sp, + ipip_entry->ipipt, + ipip_entry->ol_dev, + extack); + if (IS_ERR(new_lb_rif)) + return PTR_ERR(new_lb_rif); + ipip_entry->ol_lb = new_lb_rif; + + if (keep_encap) + mlxsw_sp_nexthop_rif_migrate(mlxsw_sp, &old_lb_rif->common, + &new_lb_rif->common); + + mlxsw_sp_rif_destroy(&old_lb_rif->common); + + return 0; +} + +static void mlxsw_sp_nexthop_rif_update(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_rif *rif); + +/** + * __mlxsw_sp_ipip_entry_update_tunnel - Update offload related to IPIP entry. + * @mlxsw_sp: mlxsw_sp. + * @ipip_entry: IPIP entry. + * @recreate_loopback: Recreates the associated loopback RIF. + * @keep_encap: Updates next hops that use the tunnel netdevice. This is only + * relevant when recreate_loopback is true. + * @update_nexthops: Updates next hops, keeping the current loopback RIF. This + * is only relevant when recreate_loopback is false. + * @extack: extack. + * + * Return: Non-zero value on failure. + */ +int __mlxsw_sp_ipip_entry_update_tunnel(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_ipip_entry *ipip_entry, + bool recreate_loopback, + bool keep_encap, + bool update_nexthops, + struct netlink_ext_ack *extack) +{ + int err; + + /* RIFs can't be edited, so to update loopback, we need to destroy and + * recreate it. That creates a window of opportunity where RALUE and + * RATR registers end up referencing a RIF that's already gone. RATRs + * are handled in mlxsw_sp_ipip_entry_ol_lb_update(), and to take care + * of RALUE, demote the decap route back. + */ + if (ipip_entry->decap_fib_entry) + mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry); + + if (recreate_loopback) { + err = mlxsw_sp_ipip_entry_ol_lb_update(mlxsw_sp, ipip_entry, + keep_encap, extack); + if (err) + return err; + } else if (update_nexthops) { + mlxsw_sp_nexthop_rif_update(mlxsw_sp, + &ipip_entry->ol_lb->common); + } + + if (ipip_entry->ol_dev->flags & IFF_UP) + mlxsw_sp_ipip_entry_ol_up_event(mlxsw_sp, ipip_entry); + + return 0; +} + +static int mlxsw_sp_netdevice_ipip_ol_vrf_event(struct mlxsw_sp *mlxsw_sp, + struct net_device *ol_dev, + struct netlink_ext_ack *extack) +{ + struct mlxsw_sp_ipip_entry *ipip_entry = + mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev); + + if (!ipip_entry) + return 0; + + return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry, + true, false, false, extack); +} + +static int +mlxsw_sp_netdevice_ipip_ul_vrf_event(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_ipip_entry *ipip_entry, + struct net_device *ul_dev, + bool *demote_this, + struct netlink_ext_ack *extack) +{ + u32 ul_tb_id = l3mdev_fib_table(ul_dev) ? : RT_TABLE_MAIN; + enum mlxsw_sp_l3proto ul_proto; + union mlxsw_sp_l3addr saddr; + + /* Moving underlay to a different VRF might cause local address + * conflict, and the conflicting tunnels need to be demoted. + */ + ul_proto = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt]->ul_proto; + saddr = mlxsw_sp_ipip_netdev_saddr(ul_proto, ipip_entry->ol_dev); + if (mlxsw_sp_ipip_demote_tunnel_by_saddr(mlxsw_sp, ul_proto, + saddr, ul_tb_id, + ipip_entry)) { + *demote_this = true; + return 0; + } + + return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry, + true, true, false, extack); +} + +static int +mlxsw_sp_netdevice_ipip_ul_up_event(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_ipip_entry *ipip_entry, + struct net_device *ul_dev) +{ + return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry, + false, false, true, NULL); +} + +static int +mlxsw_sp_netdevice_ipip_ul_down_event(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_ipip_entry *ipip_entry, + struct net_device *ul_dev) +{ + /* A down underlay device causes encapsulated packets to not be + * forwarded, but decap still works. So refresh next hops without + * touching anything else. + */ + return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry, + false, false, true, NULL); +} + +static int +mlxsw_sp_netdevice_ipip_ol_change_event(struct mlxsw_sp *mlxsw_sp, + struct net_device *ol_dev, + struct netlink_ext_ack *extack) +{ + const struct mlxsw_sp_ipip_ops *ipip_ops; + struct mlxsw_sp_ipip_entry *ipip_entry; + int err; + + ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev); + if (!ipip_entry) + /* A change might make a tunnel eligible for offloading, but + * that is currently not implemented. What falls to slow path + * stays there. + */ + return 0; + + /* A change might make a tunnel not eligible for offloading. */ + if (!mlxsw_sp_netdevice_ipip_can_offload(mlxsw_sp, ol_dev, + ipip_entry->ipipt)) { + mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry); + return 0; + } + + ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt]; + err = ipip_ops->ol_netdev_change(mlxsw_sp, ipip_entry, extack); + return err; +} + +void mlxsw_sp_ipip_entry_demote_tunnel(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_ipip_entry *ipip_entry) +{ + struct net_device *ol_dev = ipip_entry->ol_dev; + + if (ol_dev->flags & IFF_UP) + mlxsw_sp_ipip_entry_ol_down_event(mlxsw_sp, ipip_entry); + mlxsw_sp_ipip_entry_destroy(mlxsw_sp, ipip_entry); +} + +/* The configuration where several tunnels have the same local address in the + * same underlay table needs special treatment in the HW. That is currently not + * implemented in the driver. This function finds and demotes the first tunnel + * with a given source address, except the one passed in the argument + * `except'. + */ +bool +mlxsw_sp_ipip_demote_tunnel_by_saddr(struct mlxsw_sp *mlxsw_sp, + enum mlxsw_sp_l3proto ul_proto, + union mlxsw_sp_l3addr saddr, + u32 ul_tb_id, + const struct mlxsw_sp_ipip_entry *except) +{ + struct mlxsw_sp_ipip_entry *ipip_entry, *tmp; + + list_for_each_entry_safe(ipip_entry, tmp, &mlxsw_sp->router->ipip_list, + ipip_list_node) { + if (ipip_entry != except && + mlxsw_sp_ipip_entry_saddr_matches(mlxsw_sp, ul_proto, saddr, + ul_tb_id, ipip_entry)) { + mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry); + return true; + } + } + + return false; +} + +static void mlxsw_sp_ipip_demote_tunnel_by_ul_netdev(struct mlxsw_sp *mlxsw_sp, + struct net_device *ul_dev) +{ + struct mlxsw_sp_ipip_entry *ipip_entry, *tmp; + + list_for_each_entry_safe(ipip_entry, tmp, &mlxsw_sp->router->ipip_list, + ipip_list_node) { + struct net_device *ol_dev = ipip_entry->ol_dev; + struct net_device *ipip_ul_dev; + + rcu_read_lock(); + ipip_ul_dev = mlxsw_sp_ipip_netdev_ul_dev_get(ol_dev); + rcu_read_unlock(); + if (ipip_ul_dev == ul_dev) + mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry); + } +} + +static int mlxsw_sp_netdevice_ipip_ol_event(struct mlxsw_sp *mlxsw_sp, + struct net_device *ol_dev, + unsigned long event, + struct netdev_notifier_info *info) +{ + struct netdev_notifier_changeupper_info *chup; + struct netlink_ext_ack *extack; + int err = 0; + + switch (event) { + case NETDEV_REGISTER: + err = mlxsw_sp_netdevice_ipip_ol_reg_event(mlxsw_sp, ol_dev); + break; + case NETDEV_UNREGISTER: + mlxsw_sp_netdevice_ipip_ol_unreg_event(mlxsw_sp, ol_dev); + break; + case NETDEV_UP: + mlxsw_sp_netdevice_ipip_ol_up_event(mlxsw_sp, ol_dev); + break; + case NETDEV_DOWN: + mlxsw_sp_netdevice_ipip_ol_down_event(mlxsw_sp, ol_dev); + break; + case NETDEV_CHANGEUPPER: + chup = container_of(info, typeof(*chup), info); + extack = info->extack; + if (netif_is_l3_master(chup->upper_dev)) + err = mlxsw_sp_netdevice_ipip_ol_vrf_event(mlxsw_sp, + ol_dev, + extack); + break; + case NETDEV_CHANGE: + extack = info->extack; + err = mlxsw_sp_netdevice_ipip_ol_change_event(mlxsw_sp, + ol_dev, extack); + break; + case NETDEV_CHANGEMTU: + err = mlxsw_sp_netdevice_ipip_ol_update_mtu(mlxsw_sp, ol_dev); + break; + } + return err; +} + +static int +__mlxsw_sp_netdevice_ipip_ul_event(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_ipip_entry *ipip_entry, + struct net_device *ul_dev, + bool *demote_this, + unsigned long event, + struct netdev_notifier_info *info) +{ + struct netdev_notifier_changeupper_info *chup; + struct netlink_ext_ack *extack; + + switch (event) { + case NETDEV_CHANGEUPPER: + chup = container_of(info, typeof(*chup), info); + extack = info->extack; + if (netif_is_l3_master(chup->upper_dev)) + return mlxsw_sp_netdevice_ipip_ul_vrf_event(mlxsw_sp, + ipip_entry, + ul_dev, + demote_this, + extack); + break; + + case NETDEV_UP: + return mlxsw_sp_netdevice_ipip_ul_up_event(mlxsw_sp, ipip_entry, + ul_dev); + case NETDEV_DOWN: + return mlxsw_sp_netdevice_ipip_ul_down_event(mlxsw_sp, + ipip_entry, + ul_dev); + } + return 0; +} + +static int +mlxsw_sp_netdevice_ipip_ul_event(struct mlxsw_sp *mlxsw_sp, + struct net_device *ul_dev, + unsigned long event, + struct netdev_notifier_info *info) +{ + struct mlxsw_sp_ipip_entry *ipip_entry = NULL; + int err; + + while ((ipip_entry = mlxsw_sp_ipip_entry_find_by_ul_dev(mlxsw_sp, + ul_dev, + ipip_entry))) { + struct mlxsw_sp_ipip_entry *prev; + bool demote_this = false; + + err = __mlxsw_sp_netdevice_ipip_ul_event(mlxsw_sp, ipip_entry, + ul_dev, &demote_this, + event, info); + if (err) { + mlxsw_sp_ipip_demote_tunnel_by_ul_netdev(mlxsw_sp, + ul_dev); + return err; + } + + if (demote_this) { + if (list_is_first(&ipip_entry->ipip_list_node, + &mlxsw_sp->router->ipip_list)) + prev = NULL; + else + /* This can't be cached from previous iteration, + * because that entry could be gone now. + */ + prev = list_prev_entry(ipip_entry, + ipip_list_node); + mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry); + ipip_entry = prev; + } + } + + return 0; +} + +int mlxsw_sp_router_nve_promote_decap(struct mlxsw_sp *mlxsw_sp, u32 ul_tb_id, + enum mlxsw_sp_l3proto ul_proto, + const union mlxsw_sp_l3addr *ul_sip, + u32 tunnel_index) +{ + enum mlxsw_sp_fib_entry_type type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP; + struct mlxsw_sp_router *router = mlxsw_sp->router; + struct mlxsw_sp_fib_entry *fib_entry; + int err = 0; + + mutex_lock(&mlxsw_sp->router->lock); + + if (WARN_ON_ONCE(router->nve_decap_config.valid)) { + err = -EINVAL; + goto out; + } + + router->nve_decap_config.ul_tb_id = ul_tb_id; + router->nve_decap_config.tunnel_index = tunnel_index; + router->nve_decap_config.ul_proto = ul_proto; + router->nve_decap_config.ul_sip = *ul_sip; + router->nve_decap_config.valid = true; + + /* It is valid to create a tunnel with a local IP and only later + * assign this IP address to a local interface + */ + fib_entry = mlxsw_sp_router_ip2me_fib_entry_find(mlxsw_sp, ul_tb_id, + ul_proto, ul_sip, + type); + if (!fib_entry) + goto out; + + fib_entry->decap.tunnel_index = tunnel_index; + fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP; + + err = mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry); + if (err) + goto err_fib_entry_update; + + goto out; + +err_fib_entry_update: + fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP; + mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry); +out: + mutex_unlock(&mlxsw_sp->router->lock); + return err; +} + +void mlxsw_sp_router_nve_demote_decap(struct mlxsw_sp *mlxsw_sp, u32 ul_tb_id, + enum mlxsw_sp_l3proto ul_proto, + const union mlxsw_sp_l3addr *ul_sip) +{ + enum mlxsw_sp_fib_entry_type type = MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP; + struct mlxsw_sp_router *router = mlxsw_sp->router; + struct mlxsw_sp_fib_entry *fib_entry; + + mutex_lock(&mlxsw_sp->router->lock); + + if (WARN_ON_ONCE(!router->nve_decap_config.valid)) + goto out; + + router->nve_decap_config.valid = false; + + fib_entry = mlxsw_sp_router_ip2me_fib_entry_find(mlxsw_sp, ul_tb_id, + ul_proto, ul_sip, + type); + if (!fib_entry) + goto out; + + fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP; + mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry); +out: + mutex_unlock(&mlxsw_sp->router->lock); +} + +static bool mlxsw_sp_router_nve_is_decap(struct mlxsw_sp *mlxsw_sp, + u32 ul_tb_id, + enum mlxsw_sp_l3proto ul_proto, + const union mlxsw_sp_l3addr *ul_sip) +{ + struct mlxsw_sp_router *router = mlxsw_sp->router; + + return router->nve_decap_config.valid && + router->nve_decap_config.ul_tb_id == ul_tb_id && + router->nve_decap_config.ul_proto == ul_proto && + !memcmp(&router->nve_decap_config.ul_sip, ul_sip, + sizeof(*ul_sip)); +} + +struct mlxsw_sp_neigh_key { + struct neighbour *n; +}; + +struct mlxsw_sp_neigh_entry { + struct list_head rif_list_node; + struct rhash_head ht_node; + struct mlxsw_sp_neigh_key key; + u16 rif; + bool connected; + unsigned char ha[ETH_ALEN]; + struct list_head nexthop_list; /* list of nexthops using + * this neigh entry + */ + struct list_head nexthop_neighs_list_node; + unsigned int counter_index; + bool counter_valid; +}; + +static const struct rhashtable_params mlxsw_sp_neigh_ht_params = { + .key_offset = offsetof(struct mlxsw_sp_neigh_entry, key), + .head_offset = offsetof(struct mlxsw_sp_neigh_entry, ht_node), + .key_len = sizeof(struct mlxsw_sp_neigh_key), +}; + +struct mlxsw_sp_neigh_entry * +mlxsw_sp_rif_neigh_next(struct mlxsw_sp_rif *rif, + struct mlxsw_sp_neigh_entry *neigh_entry) +{ + if (!neigh_entry) { + if (list_empty(&rif->neigh_list)) + return NULL; + else + return list_first_entry(&rif->neigh_list, + typeof(*neigh_entry), + rif_list_node); + } + if (list_is_last(&neigh_entry->rif_list_node, &rif->neigh_list)) + return NULL; + return list_next_entry(neigh_entry, rif_list_node); +} + +int mlxsw_sp_neigh_entry_type(struct mlxsw_sp_neigh_entry *neigh_entry) +{ + return neigh_entry->key.n->tbl->family; +} + +unsigned char * +mlxsw_sp_neigh_entry_ha(struct mlxsw_sp_neigh_entry *neigh_entry) +{ + return neigh_entry->ha; +} + +u32 mlxsw_sp_neigh4_entry_dip(struct mlxsw_sp_neigh_entry *neigh_entry) +{ + struct neighbour *n; + + n = neigh_entry->key.n; + return ntohl(*((__be32 *) n->primary_key)); +} + +struct in6_addr * +mlxsw_sp_neigh6_entry_dip(struct mlxsw_sp_neigh_entry *neigh_entry) +{ + struct neighbour *n; + + n = neigh_entry->key.n; + return (struct in6_addr *) &n->primary_key; +} + +int mlxsw_sp_neigh_counter_get(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_neigh_entry *neigh_entry, + u64 *p_counter) +{ + if (!neigh_entry->counter_valid) + return -EINVAL; + + return mlxsw_sp_flow_counter_get(mlxsw_sp, neigh_entry->counter_index, + p_counter, NULL); +} + +static struct mlxsw_sp_neigh_entry * +mlxsw_sp_neigh_entry_alloc(struct mlxsw_sp *mlxsw_sp, struct neighbour *n, + u16 rif) +{ + struct mlxsw_sp_neigh_entry *neigh_entry; + + neigh_entry = kzalloc(sizeof(*neigh_entry), GFP_KERNEL); + if (!neigh_entry) + return NULL; + + neigh_entry->key.n = n; + neigh_entry->rif = rif; + INIT_LIST_HEAD(&neigh_entry->nexthop_list); + + return neigh_entry; +} + +static void mlxsw_sp_neigh_entry_free(struct mlxsw_sp_neigh_entry *neigh_entry) +{ + kfree(neigh_entry); +} + +static int +mlxsw_sp_neigh_entry_insert(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_neigh_entry *neigh_entry) +{ + return rhashtable_insert_fast(&mlxsw_sp->router->neigh_ht, + &neigh_entry->ht_node, + mlxsw_sp_neigh_ht_params); +} + +static void +mlxsw_sp_neigh_entry_remove(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_neigh_entry *neigh_entry) +{ + rhashtable_remove_fast(&mlxsw_sp->router->neigh_ht, + &neigh_entry->ht_node, + mlxsw_sp_neigh_ht_params); +} + +static bool +mlxsw_sp_neigh_counter_should_alloc(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_neigh_entry *neigh_entry) +{ + struct devlink *devlink; + const char *table_name; + + switch (mlxsw_sp_neigh_entry_type(neigh_entry)) { + case AF_INET: + table_name = MLXSW_SP_DPIPE_TABLE_NAME_HOST4; + break; + case AF_INET6: + table_name = MLXSW_SP_DPIPE_TABLE_NAME_HOST6; + break; + default: + WARN_ON(1); + return false; + } + + devlink = priv_to_devlink(mlxsw_sp->core); + return devlink_dpipe_table_counter_enabled(devlink, table_name); +} + +static void +mlxsw_sp_neigh_counter_alloc(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_neigh_entry *neigh_entry) +{ + if (!mlxsw_sp_neigh_counter_should_alloc(mlxsw_sp, neigh_entry)) + return; + + if (mlxsw_sp_flow_counter_alloc(mlxsw_sp, &neigh_entry->counter_index)) + return; + + neigh_entry->counter_valid = true; +} + +static void +mlxsw_sp_neigh_counter_free(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_neigh_entry *neigh_entry) +{ + if (!neigh_entry->counter_valid) + return; + mlxsw_sp_flow_counter_free(mlxsw_sp, + neigh_entry->counter_index); + neigh_entry->counter_valid = false; +} + +static struct mlxsw_sp_neigh_entry * +mlxsw_sp_neigh_entry_create(struct mlxsw_sp *mlxsw_sp, struct neighbour *n) +{ + struct mlxsw_sp_neigh_entry *neigh_entry; + struct mlxsw_sp_rif *rif; + int err; + + rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, n->dev); + if (!rif) + return ERR_PTR(-EINVAL); + + neigh_entry = mlxsw_sp_neigh_entry_alloc(mlxsw_sp, n, rif->rif_index); + if (!neigh_entry) + return ERR_PTR(-ENOMEM); + + err = mlxsw_sp_neigh_entry_insert(mlxsw_sp, neigh_entry); + if (err) + goto err_neigh_entry_insert; + + mlxsw_sp_neigh_counter_alloc(mlxsw_sp, neigh_entry); + atomic_inc(&mlxsw_sp->router->neighs_update.neigh_count); + list_add(&neigh_entry->rif_list_node, &rif->neigh_list); + + return neigh_entry; + +err_neigh_entry_insert: + mlxsw_sp_neigh_entry_free(neigh_entry); + return ERR_PTR(err); +} + +static void +mlxsw_sp_neigh_entry_destroy(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_neigh_entry *neigh_entry) +{ + list_del(&neigh_entry->rif_list_node); + atomic_dec(&mlxsw_sp->router->neighs_update.neigh_count); + mlxsw_sp_neigh_counter_free(mlxsw_sp, neigh_entry); + mlxsw_sp_neigh_entry_remove(mlxsw_sp, neigh_entry); + mlxsw_sp_neigh_entry_free(neigh_entry); +} + +static struct mlxsw_sp_neigh_entry * +mlxsw_sp_neigh_entry_lookup(struct mlxsw_sp *mlxsw_sp, struct neighbour *n) +{ + struct mlxsw_sp_neigh_key key; + + key.n = n; + return rhashtable_lookup_fast(&mlxsw_sp->router->neigh_ht, + &key, mlxsw_sp_neigh_ht_params); +} + +static void +mlxsw_sp_router_neighs_update_interval_init(struct mlxsw_sp *mlxsw_sp) +{ + unsigned long interval; + +#if IS_ENABLED(CONFIG_IPV6) + interval = min_t(unsigned long, + NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME), + NEIGH_VAR(&nd_tbl.parms, DELAY_PROBE_TIME)); +#else + interval = NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME); +#endif + mlxsw_sp->router->neighs_update.interval = jiffies_to_msecs(interval); +} + +static void mlxsw_sp_router_neigh_ent_ipv4_process(struct mlxsw_sp *mlxsw_sp, + char *rauhtd_pl, + int ent_index) +{ + u64 max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); + struct net_device *dev; + struct neighbour *n; + __be32 dipn; + u32 dip; + u16 rif; + + mlxsw_reg_rauhtd_ent_ipv4_unpack(rauhtd_pl, ent_index, &rif, &dip); + + if (WARN_ON_ONCE(rif >= max_rifs)) + return; + if (!mlxsw_sp->router->rifs[rif]) { + dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Incorrect RIF in neighbour entry\n"); + return; + } + + dipn = htonl(dip); + dev = mlxsw_sp->router->rifs[rif]->dev; + n = neigh_lookup(&arp_tbl, &dipn, dev); + if (!n) + return; + + netdev_dbg(dev, "Updating neighbour with IP=%pI4h\n", &dip); + neigh_event_send(n, NULL); + neigh_release(n); +} + +#if IS_ENABLED(CONFIG_IPV6) +static void mlxsw_sp_router_neigh_ent_ipv6_process(struct mlxsw_sp *mlxsw_sp, + char *rauhtd_pl, + int rec_index) +{ + struct net_device *dev; + struct neighbour *n; + struct in6_addr dip; + u16 rif; + + mlxsw_reg_rauhtd_ent_ipv6_unpack(rauhtd_pl, rec_index, &rif, + (char *) &dip); + + if (!mlxsw_sp->router->rifs[rif]) { + dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Incorrect RIF in neighbour entry\n"); + return; + } + + dev = mlxsw_sp->router->rifs[rif]->dev; + n = neigh_lookup(&nd_tbl, &dip, dev); + if (!n) + return; + + netdev_dbg(dev, "Updating neighbour with IP=%pI6c\n", &dip); + neigh_event_send(n, NULL); + neigh_release(n); +} +#else +static void mlxsw_sp_router_neigh_ent_ipv6_process(struct mlxsw_sp *mlxsw_sp, + char *rauhtd_pl, + int rec_index) +{ +} +#endif + +static void mlxsw_sp_router_neigh_rec_ipv4_process(struct mlxsw_sp *mlxsw_sp, + char *rauhtd_pl, + int rec_index) +{ + u8 num_entries; + int i; + + num_entries = mlxsw_reg_rauhtd_ipv4_rec_num_entries_get(rauhtd_pl, + rec_index); + /* Hardware starts counting at 0, so add 1. */ + num_entries++; + + /* Each record consists of several neighbour entries. */ + for (i = 0; i < num_entries; i++) { + int ent_index; + + ent_index = rec_index * MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC + i; + mlxsw_sp_router_neigh_ent_ipv4_process(mlxsw_sp, rauhtd_pl, + ent_index); + } + +} + +static void mlxsw_sp_router_neigh_rec_ipv6_process(struct mlxsw_sp *mlxsw_sp, + char *rauhtd_pl, + int rec_index) +{ + /* One record contains one entry. */ + mlxsw_sp_router_neigh_ent_ipv6_process(mlxsw_sp, rauhtd_pl, + rec_index); +} + +static void mlxsw_sp_router_neigh_rec_process(struct mlxsw_sp *mlxsw_sp, + char *rauhtd_pl, int rec_index) +{ + switch (mlxsw_reg_rauhtd_rec_type_get(rauhtd_pl, rec_index)) { + case MLXSW_REG_RAUHTD_TYPE_IPV4: + mlxsw_sp_router_neigh_rec_ipv4_process(mlxsw_sp, rauhtd_pl, + rec_index); + break; + case MLXSW_REG_RAUHTD_TYPE_IPV6: + mlxsw_sp_router_neigh_rec_ipv6_process(mlxsw_sp, rauhtd_pl, + rec_index); + break; + } +} + +static bool mlxsw_sp_router_rauhtd_is_full(char *rauhtd_pl) +{ + u8 num_rec, last_rec_index, num_entries; + + num_rec = mlxsw_reg_rauhtd_num_rec_get(rauhtd_pl); + last_rec_index = num_rec - 1; + + if (num_rec < MLXSW_REG_RAUHTD_REC_MAX_NUM) + return false; + if (mlxsw_reg_rauhtd_rec_type_get(rauhtd_pl, last_rec_index) == + MLXSW_REG_RAUHTD_TYPE_IPV6) + return true; + + num_entries = mlxsw_reg_rauhtd_ipv4_rec_num_entries_get(rauhtd_pl, + last_rec_index); + if (++num_entries == MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC) + return true; + return false; +} + +static int +__mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp, + char *rauhtd_pl, + enum mlxsw_reg_rauhtd_type type) +{ + int i, num_rec; + int err; + + /* Ensure the RIF we read from the device does not change mid-dump. */ + mutex_lock(&mlxsw_sp->router->lock); + do { + mlxsw_reg_rauhtd_pack(rauhtd_pl, type); + err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(rauhtd), + rauhtd_pl); + if (err) { + dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Failed to dump neighbour table\n"); + break; + } + num_rec = mlxsw_reg_rauhtd_num_rec_get(rauhtd_pl); + for (i = 0; i < num_rec; i++) + mlxsw_sp_router_neigh_rec_process(mlxsw_sp, rauhtd_pl, + i); + } while (mlxsw_sp_router_rauhtd_is_full(rauhtd_pl)); + mutex_unlock(&mlxsw_sp->router->lock); + + return err; +} + +static int mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp) +{ + enum mlxsw_reg_rauhtd_type type; + char *rauhtd_pl; + int err; + + if (!atomic_read(&mlxsw_sp->router->neighs_update.neigh_count)) + return 0; + + rauhtd_pl = kmalloc(MLXSW_REG_RAUHTD_LEN, GFP_KERNEL); + if (!rauhtd_pl) + return -ENOMEM; + + type = MLXSW_REG_RAUHTD_TYPE_IPV4; + err = __mlxsw_sp_router_neighs_update_rauhtd(mlxsw_sp, rauhtd_pl, type); + if (err) + goto out; + + type = MLXSW_REG_RAUHTD_TYPE_IPV6; + err = __mlxsw_sp_router_neighs_update_rauhtd(mlxsw_sp, rauhtd_pl, type); +out: + kfree(rauhtd_pl); + return err; +} + +static void mlxsw_sp_router_neighs_update_nh(struct mlxsw_sp *mlxsw_sp) +{ + struct mlxsw_sp_neigh_entry *neigh_entry; + + mutex_lock(&mlxsw_sp->router->lock); + list_for_each_entry(neigh_entry, &mlxsw_sp->router->nexthop_neighs_list, + nexthop_neighs_list_node) + /* If this neigh have nexthops, make the kernel think this neigh + * is active regardless of the traffic. + */ + neigh_event_send(neigh_entry->key.n, NULL); + mutex_unlock(&mlxsw_sp->router->lock); +} + +static void +mlxsw_sp_router_neighs_update_work_schedule(struct mlxsw_sp *mlxsw_sp) +{ + unsigned long interval = mlxsw_sp->router->neighs_update.interval; + + mlxsw_core_schedule_dw(&mlxsw_sp->router->neighs_update.dw, + msecs_to_jiffies(interval)); +} + +static void mlxsw_sp_router_neighs_update_work(struct work_struct *work) +{ + struct mlxsw_sp_router *router; + int err; + + router = container_of(work, struct mlxsw_sp_router, + neighs_update.dw.work); + err = mlxsw_sp_router_neighs_update_rauhtd(router->mlxsw_sp); + if (err) + dev_err(router->mlxsw_sp->bus_info->dev, "Could not update kernel for neigh activity"); + + mlxsw_sp_router_neighs_update_nh(router->mlxsw_sp); + + mlxsw_sp_router_neighs_update_work_schedule(router->mlxsw_sp); +} + +static void mlxsw_sp_router_probe_unresolved_nexthops(struct work_struct *work) +{ + struct mlxsw_sp_neigh_entry *neigh_entry; + struct mlxsw_sp_router *router; + + router = container_of(work, struct mlxsw_sp_router, + nexthop_probe_dw.work); + /* Iterate over nexthop neighbours, find those who are unresolved and + * send arp on them. This solves the chicken-egg problem when + * the nexthop wouldn't get offloaded until the neighbor is resolved + * but it wouldn't get resolved ever in case traffic is flowing in HW + * using different nexthop. + */ + mutex_lock(&router->lock); + list_for_each_entry(neigh_entry, &router->nexthop_neighs_list, + nexthop_neighs_list_node) + if (!neigh_entry->connected) + neigh_event_send(neigh_entry->key.n, NULL); + mutex_unlock(&router->lock); + + mlxsw_core_schedule_dw(&router->nexthop_probe_dw, + MLXSW_SP_UNRESOLVED_NH_PROBE_INTERVAL); +} + +static void +mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_neigh_entry *neigh_entry, + bool removing, bool dead); + +static enum mlxsw_reg_rauht_op mlxsw_sp_rauht_op(bool adding) +{ + return adding ? MLXSW_REG_RAUHT_OP_WRITE_ADD : + MLXSW_REG_RAUHT_OP_WRITE_DELETE; +} + +static int +mlxsw_sp_router_neigh_entry_op4(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_neigh_entry *neigh_entry, + enum mlxsw_reg_rauht_op op) +{ + struct neighbour *n = neigh_entry->key.n; + u32 dip = ntohl(*((__be32 *) n->primary_key)); + char rauht_pl[MLXSW_REG_RAUHT_LEN]; + + mlxsw_reg_rauht_pack4(rauht_pl, op, neigh_entry->rif, neigh_entry->ha, + dip); + if (neigh_entry->counter_valid) + mlxsw_reg_rauht_pack_counter(rauht_pl, + neigh_entry->counter_index); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl); +} + +static int +mlxsw_sp_router_neigh_entry_op6(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_neigh_entry *neigh_entry, + enum mlxsw_reg_rauht_op op) +{ + struct neighbour *n = neigh_entry->key.n; + char rauht_pl[MLXSW_REG_RAUHT_LEN]; + const char *dip = n->primary_key; + + mlxsw_reg_rauht_pack6(rauht_pl, op, neigh_entry->rif, neigh_entry->ha, + dip); + if (neigh_entry->counter_valid) + mlxsw_reg_rauht_pack_counter(rauht_pl, + neigh_entry->counter_index); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl); +} + +bool mlxsw_sp_neigh_ipv6_ignore(struct mlxsw_sp_neigh_entry *neigh_entry) +{ + struct neighbour *n = neigh_entry->key.n; + + /* Packets with a link-local destination address are trapped + * after LPM lookup and never reach the neighbour table, so + * there is no need to program such neighbours to the device. + */ + if (ipv6_addr_type((struct in6_addr *) &n->primary_key) & + IPV6_ADDR_LINKLOCAL) + return true; + return false; +} + +static void +mlxsw_sp_neigh_entry_update(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_neigh_entry *neigh_entry, + bool adding) +{ + enum mlxsw_reg_rauht_op op = mlxsw_sp_rauht_op(adding); + int err; + + if (!adding && !neigh_entry->connected) + return; + neigh_entry->connected = adding; + if (neigh_entry->key.n->tbl->family == AF_INET) { + err = mlxsw_sp_router_neigh_entry_op4(mlxsw_sp, neigh_entry, + op); + if (err) + return; + } else if (neigh_entry->key.n->tbl->family == AF_INET6) { + if (mlxsw_sp_neigh_ipv6_ignore(neigh_entry)) + return; + err = mlxsw_sp_router_neigh_entry_op6(mlxsw_sp, neigh_entry, + op); + if (err) + return; + } else { + WARN_ON_ONCE(1); + return; + } + + if (adding) + neigh_entry->key.n->flags |= NTF_OFFLOADED; + else + neigh_entry->key.n->flags &= ~NTF_OFFLOADED; +} + +void +mlxsw_sp_neigh_entry_counter_update(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_neigh_entry *neigh_entry, + bool adding) +{ + if (adding) + mlxsw_sp_neigh_counter_alloc(mlxsw_sp, neigh_entry); + else + mlxsw_sp_neigh_counter_free(mlxsw_sp, neigh_entry); + mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, true); +} + +struct mlxsw_sp_netevent_work { + struct work_struct work; + struct mlxsw_sp *mlxsw_sp; + struct neighbour *n; +}; + +static void mlxsw_sp_router_neigh_event_work(struct work_struct *work) +{ + struct mlxsw_sp_netevent_work *net_work = + container_of(work, struct mlxsw_sp_netevent_work, work); + struct mlxsw_sp *mlxsw_sp = net_work->mlxsw_sp; + struct mlxsw_sp_neigh_entry *neigh_entry; + struct neighbour *n = net_work->n; + unsigned char ha[ETH_ALEN]; + bool entry_connected; + u8 nud_state, dead; + + /* If these parameters are changed after we release the lock, + * then we are guaranteed to receive another event letting us + * know about it. + */ + read_lock_bh(&n->lock); + memcpy(ha, n->ha, ETH_ALEN); + nud_state = n->nud_state; + dead = n->dead; + read_unlock_bh(&n->lock); + + mutex_lock(&mlxsw_sp->router->lock); + mlxsw_sp_span_respin(mlxsw_sp); + + entry_connected = nud_state & NUD_VALID && !dead; + neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n); + if (!entry_connected && !neigh_entry) + goto out; + if (!neigh_entry) { + neigh_entry = mlxsw_sp_neigh_entry_create(mlxsw_sp, n); + if (IS_ERR(neigh_entry)) + goto out; + } + + if (neigh_entry->connected && entry_connected && + !memcmp(neigh_entry->ha, ha, ETH_ALEN)) + goto out; + + memcpy(neigh_entry->ha, ha, ETH_ALEN); + mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, entry_connected); + mlxsw_sp_nexthop_neigh_update(mlxsw_sp, neigh_entry, !entry_connected, + dead); + + if (!neigh_entry->connected && list_empty(&neigh_entry->nexthop_list)) + mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry); + +out: + mutex_unlock(&mlxsw_sp->router->lock); + neigh_release(n); + kfree(net_work); +} + +static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp); + +static void mlxsw_sp_router_mp_hash_event_work(struct work_struct *work) +{ + struct mlxsw_sp_netevent_work *net_work = + container_of(work, struct mlxsw_sp_netevent_work, work); + struct mlxsw_sp *mlxsw_sp = net_work->mlxsw_sp; + + mlxsw_sp_mp_hash_init(mlxsw_sp); + kfree(net_work); +} + +static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp); + +static void mlxsw_sp_router_update_priority_work(struct work_struct *work) +{ + struct mlxsw_sp_netevent_work *net_work = + container_of(work, struct mlxsw_sp_netevent_work, work); + struct mlxsw_sp *mlxsw_sp = net_work->mlxsw_sp; + + __mlxsw_sp_router_init(mlxsw_sp); + kfree(net_work); +} + +static int mlxsw_sp_router_schedule_work(struct net *net, + struct notifier_block *nb, + void (*cb)(struct work_struct *)) +{ + struct mlxsw_sp_netevent_work *net_work; + struct mlxsw_sp_router *router; + + router = container_of(nb, struct mlxsw_sp_router, netevent_nb); + if (!net_eq(net, mlxsw_sp_net(router->mlxsw_sp))) + return NOTIFY_DONE; + + net_work = kzalloc(sizeof(*net_work), GFP_ATOMIC); + if (!net_work) + return NOTIFY_BAD; + + INIT_WORK(&net_work->work, cb); + net_work->mlxsw_sp = router->mlxsw_sp; + mlxsw_core_schedule_work(&net_work->work); + return NOTIFY_DONE; +} + +static int mlxsw_sp_router_netevent_event(struct notifier_block *nb, + unsigned long event, void *ptr) +{ + struct mlxsw_sp_netevent_work *net_work; + struct mlxsw_sp_port *mlxsw_sp_port; + struct mlxsw_sp *mlxsw_sp; + unsigned long interval; + struct neigh_parms *p; + struct neighbour *n; + + switch (event) { + case NETEVENT_DELAY_PROBE_TIME_UPDATE: + p = ptr; + + /* We don't care about changes in the default table. */ + if (!p->dev || (p->tbl->family != AF_INET && + p->tbl->family != AF_INET6)) + return NOTIFY_DONE; + + /* We are in atomic context and can't take RTNL mutex, + * so use RCU variant to walk the device chain. + */ + mlxsw_sp_port = mlxsw_sp_port_lower_dev_hold(p->dev); + if (!mlxsw_sp_port) + return NOTIFY_DONE; + + mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + interval = jiffies_to_msecs(NEIGH_VAR(p, DELAY_PROBE_TIME)); + mlxsw_sp->router->neighs_update.interval = interval; + + mlxsw_sp_port_dev_put(mlxsw_sp_port); + break; + case NETEVENT_NEIGH_UPDATE: + n = ptr; + + if (n->tbl->family != AF_INET && n->tbl->family != AF_INET6) + return NOTIFY_DONE; + + mlxsw_sp_port = mlxsw_sp_port_lower_dev_hold(n->dev); + if (!mlxsw_sp_port) + return NOTIFY_DONE; + + net_work = kzalloc(sizeof(*net_work), GFP_ATOMIC); + if (!net_work) { + mlxsw_sp_port_dev_put(mlxsw_sp_port); + return NOTIFY_BAD; + } + + INIT_WORK(&net_work->work, mlxsw_sp_router_neigh_event_work); + net_work->mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + net_work->n = n; + + /* Take a reference to ensure the neighbour won't be + * destructed until we drop the reference in delayed + * work. + */ + neigh_clone(n); + mlxsw_core_schedule_work(&net_work->work); + mlxsw_sp_port_dev_put(mlxsw_sp_port); + break; + case NETEVENT_IPV4_MPATH_HASH_UPDATE: + case NETEVENT_IPV6_MPATH_HASH_UPDATE: + return mlxsw_sp_router_schedule_work(ptr, nb, + mlxsw_sp_router_mp_hash_event_work); + + case NETEVENT_IPV4_FWD_UPDATE_PRIORITY_UPDATE: + return mlxsw_sp_router_schedule_work(ptr, nb, + mlxsw_sp_router_update_priority_work); + } + + return NOTIFY_DONE; +} + +static int mlxsw_sp_neigh_init(struct mlxsw_sp *mlxsw_sp) +{ + int err; + + err = rhashtable_init(&mlxsw_sp->router->neigh_ht, + &mlxsw_sp_neigh_ht_params); + if (err) + return err; + + /* Initialize the polling interval according to the default + * table. + */ + mlxsw_sp_router_neighs_update_interval_init(mlxsw_sp); + + /* Create the delayed works for the activity_update */ + INIT_DELAYED_WORK(&mlxsw_sp->router->neighs_update.dw, + mlxsw_sp_router_neighs_update_work); + INIT_DELAYED_WORK(&mlxsw_sp->router->nexthop_probe_dw, + mlxsw_sp_router_probe_unresolved_nexthops); + atomic_set(&mlxsw_sp->router->neighs_update.neigh_count, 0); + mlxsw_core_schedule_dw(&mlxsw_sp->router->neighs_update.dw, 0); + mlxsw_core_schedule_dw(&mlxsw_sp->router->nexthop_probe_dw, 0); + return 0; +} + +static void mlxsw_sp_neigh_fini(struct mlxsw_sp *mlxsw_sp) +{ + cancel_delayed_work_sync(&mlxsw_sp->router->neighs_update.dw); + cancel_delayed_work_sync(&mlxsw_sp->router->nexthop_probe_dw); + rhashtable_destroy(&mlxsw_sp->router->neigh_ht); +} + +static void mlxsw_sp_neigh_rif_gone_sync(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_rif *rif) +{ + struct mlxsw_sp_neigh_entry *neigh_entry, *tmp; + + list_for_each_entry_safe(neigh_entry, tmp, &rif->neigh_list, + rif_list_node) { + mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, false); + mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry); + } +} + +enum mlxsw_sp_nexthop_type { + MLXSW_SP_NEXTHOP_TYPE_ETH, + MLXSW_SP_NEXTHOP_TYPE_IPIP, +}; + +enum mlxsw_sp_nexthop_action { + /* Nexthop forwards packets to an egress RIF */ + MLXSW_SP_NEXTHOP_ACTION_FORWARD, + /* Nexthop discards packets */ + MLXSW_SP_NEXTHOP_ACTION_DISCARD, + /* Nexthop traps packets */ + MLXSW_SP_NEXTHOP_ACTION_TRAP, +}; + +struct mlxsw_sp_nexthop_key { + struct fib_nh *fib_nh; +}; + +struct mlxsw_sp_nexthop { + struct list_head neigh_list_node; /* member of neigh entry list */ + struct list_head rif_list_node; + struct list_head router_list_node; + struct mlxsw_sp_nexthop_group_info *nhgi; /* pointer back to the group + * this nexthop belongs to + */ + struct rhash_head ht_node; + struct neigh_table *neigh_tbl; + struct mlxsw_sp_nexthop_key key; + unsigned char gw_addr[sizeof(struct in6_addr)]; + int ifindex; + int nh_weight; + int norm_nh_weight; + int num_adj_entries; + struct mlxsw_sp_rif *rif; + u8 should_offload:1, /* set indicates this nexthop should be written + * to the adjacency table. + */ + offloaded:1, /* set indicates this nexthop was written to the + * adjacency table. + */ + update:1; /* set indicates this nexthop should be updated in the + * adjacency table (f.e., its MAC changed). + */ + enum mlxsw_sp_nexthop_action action; + enum mlxsw_sp_nexthop_type type; + union { + struct mlxsw_sp_neigh_entry *neigh_entry; + struct mlxsw_sp_ipip_entry *ipip_entry; + }; + unsigned int counter_index; + bool counter_valid; +}; + +enum mlxsw_sp_nexthop_group_type { + MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV4, + MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV6, + MLXSW_SP_NEXTHOP_GROUP_TYPE_OBJ, +}; + +struct mlxsw_sp_nexthop_group_info { + struct mlxsw_sp_nexthop_group *nh_grp; + u32 adj_index; + u16 ecmp_size; + u16 count; + int sum_norm_weight; + u8 adj_index_valid:1, + gateway:1, /* routes using the group use a gateway */ + is_resilient:1; + struct list_head list; /* member in nh_res_grp_list */ + struct mlxsw_sp_nexthop nexthops[0]; +#define nh_rif nexthops[0].rif +}; + +struct mlxsw_sp_nexthop_group_vr_key { + u16 vr_id; + enum mlxsw_sp_l3proto proto; +}; + +struct mlxsw_sp_nexthop_group_vr_entry { + struct list_head list; /* member in vr_list */ + struct rhash_head ht_node; /* member in vr_ht */ + refcount_t ref_count; + struct mlxsw_sp_nexthop_group_vr_key key; +}; + +struct mlxsw_sp_nexthop_group { + struct rhash_head ht_node; + struct list_head fib_list; /* list of fib entries that use this group */ + union { + struct { + struct fib_info *fi; + } ipv4; + struct { + u32 id; + } obj; + }; + struct mlxsw_sp_nexthop_group_info *nhgi; + struct list_head vr_list; + struct rhashtable vr_ht; + enum mlxsw_sp_nexthop_group_type type; + bool can_destroy; +}; + +void mlxsw_sp_nexthop_counter_alloc(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop *nh) +{ + struct devlink *devlink; + + devlink = priv_to_devlink(mlxsw_sp->core); + if (!devlink_dpipe_table_counter_enabled(devlink, + MLXSW_SP_DPIPE_TABLE_NAME_ADJ)) + return; + + if (mlxsw_sp_flow_counter_alloc(mlxsw_sp, &nh->counter_index)) + return; + + nh->counter_valid = true; +} + +void mlxsw_sp_nexthop_counter_free(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop *nh) +{ + if (!nh->counter_valid) + return; + mlxsw_sp_flow_counter_free(mlxsw_sp, nh->counter_index); + nh->counter_valid = false; +} + +int mlxsw_sp_nexthop_counter_get(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop *nh, u64 *p_counter) +{ + if (!nh->counter_valid) + return -EINVAL; + + return mlxsw_sp_flow_counter_get(mlxsw_sp, nh->counter_index, + p_counter, NULL); +} + +struct mlxsw_sp_nexthop *mlxsw_sp_nexthop_next(struct mlxsw_sp_router *router, + struct mlxsw_sp_nexthop *nh) +{ + if (!nh) { + if (list_empty(&router->nexthop_list)) + return NULL; + else + return list_first_entry(&router->nexthop_list, + typeof(*nh), router_list_node); + } + if (list_is_last(&nh->router_list_node, &router->nexthop_list)) + return NULL; + return list_next_entry(nh, router_list_node); +} + +bool mlxsw_sp_nexthop_is_forward(const struct mlxsw_sp_nexthop *nh) +{ + return nh->offloaded && nh->action == MLXSW_SP_NEXTHOP_ACTION_FORWARD; +} + +unsigned char *mlxsw_sp_nexthop_ha(struct mlxsw_sp_nexthop *nh) +{ + if (nh->type != MLXSW_SP_NEXTHOP_TYPE_ETH || + !mlxsw_sp_nexthop_is_forward(nh)) + return NULL; + return nh->neigh_entry->ha; +} + +int mlxsw_sp_nexthop_indexes(struct mlxsw_sp_nexthop *nh, u32 *p_adj_index, + u32 *p_adj_size, u32 *p_adj_hash_index) +{ + struct mlxsw_sp_nexthop_group_info *nhgi = nh->nhgi; + u32 adj_hash_index = 0; + int i; + + if (!nh->offloaded || !nhgi->adj_index_valid) + return -EINVAL; + + *p_adj_index = nhgi->adj_index; + *p_adj_size = nhgi->ecmp_size; + + for (i = 0; i < nhgi->count; i++) { + struct mlxsw_sp_nexthop *nh_iter = &nhgi->nexthops[i]; + + if (nh_iter == nh) + break; + if (nh_iter->offloaded) + adj_hash_index += nh_iter->num_adj_entries; + } + + *p_adj_hash_index = adj_hash_index; + return 0; +} + +struct mlxsw_sp_rif *mlxsw_sp_nexthop_rif(struct mlxsw_sp_nexthop *nh) +{ + return nh->rif; +} + +bool mlxsw_sp_nexthop_group_has_ipip(struct mlxsw_sp_nexthop *nh) +{ + struct mlxsw_sp_nexthop_group_info *nhgi = nh->nhgi; + int i; + + for (i = 0; i < nhgi->count; i++) { + struct mlxsw_sp_nexthop *nh_iter = &nhgi->nexthops[i]; + + if (nh_iter->type == MLXSW_SP_NEXTHOP_TYPE_IPIP) + return true; + } + return false; +} + +static const struct rhashtable_params mlxsw_sp_nexthop_group_vr_ht_params = { + .key_offset = offsetof(struct mlxsw_sp_nexthop_group_vr_entry, key), + .head_offset = offsetof(struct mlxsw_sp_nexthop_group_vr_entry, ht_node), + .key_len = sizeof(struct mlxsw_sp_nexthop_group_vr_key), + .automatic_shrinking = true, +}; + +static struct mlxsw_sp_nexthop_group_vr_entry * +mlxsw_sp_nexthop_group_vr_entry_lookup(struct mlxsw_sp_nexthop_group *nh_grp, + const struct mlxsw_sp_fib *fib) +{ + struct mlxsw_sp_nexthop_group_vr_key key; + + memset(&key, 0, sizeof(key)); + key.vr_id = fib->vr->id; + key.proto = fib->proto; + return rhashtable_lookup_fast(&nh_grp->vr_ht, &key, + mlxsw_sp_nexthop_group_vr_ht_params); +} + +static int +mlxsw_sp_nexthop_group_vr_entry_create(struct mlxsw_sp_nexthop_group *nh_grp, + const struct mlxsw_sp_fib *fib) +{ + struct mlxsw_sp_nexthop_group_vr_entry *vr_entry; + int err; + + vr_entry = kzalloc(sizeof(*vr_entry), GFP_KERNEL); + if (!vr_entry) + return -ENOMEM; + + vr_entry->key.vr_id = fib->vr->id; + vr_entry->key.proto = fib->proto; + refcount_set(&vr_entry->ref_count, 1); + + err = rhashtable_insert_fast(&nh_grp->vr_ht, &vr_entry->ht_node, + mlxsw_sp_nexthop_group_vr_ht_params); + if (err) + goto err_hashtable_insert; + + list_add(&vr_entry->list, &nh_grp->vr_list); + + return 0; + +err_hashtable_insert: + kfree(vr_entry); + return err; +} + +static void +mlxsw_sp_nexthop_group_vr_entry_destroy(struct mlxsw_sp_nexthop_group *nh_grp, + struct mlxsw_sp_nexthop_group_vr_entry *vr_entry) +{ + list_del(&vr_entry->list); + rhashtable_remove_fast(&nh_grp->vr_ht, &vr_entry->ht_node, + mlxsw_sp_nexthop_group_vr_ht_params); + kfree(vr_entry); +} + +static int +mlxsw_sp_nexthop_group_vr_link(struct mlxsw_sp_nexthop_group *nh_grp, + const struct mlxsw_sp_fib *fib) +{ + struct mlxsw_sp_nexthop_group_vr_entry *vr_entry; + + vr_entry = mlxsw_sp_nexthop_group_vr_entry_lookup(nh_grp, fib); + if (vr_entry) { + refcount_inc(&vr_entry->ref_count); + return 0; + } + + return mlxsw_sp_nexthop_group_vr_entry_create(nh_grp, fib); +} + +static void +mlxsw_sp_nexthop_group_vr_unlink(struct mlxsw_sp_nexthop_group *nh_grp, + const struct mlxsw_sp_fib *fib) +{ + struct mlxsw_sp_nexthop_group_vr_entry *vr_entry; + + vr_entry = mlxsw_sp_nexthop_group_vr_entry_lookup(nh_grp, fib); + if (WARN_ON_ONCE(!vr_entry)) + return; + + if (!refcount_dec_and_test(&vr_entry->ref_count)) + return; + + mlxsw_sp_nexthop_group_vr_entry_destroy(nh_grp, vr_entry); +} + +struct mlxsw_sp_nexthop_group_cmp_arg { + enum mlxsw_sp_nexthop_group_type type; + union { + struct fib_info *fi; + struct mlxsw_sp_fib6_entry *fib6_entry; + u32 id; + }; +}; + +static bool +mlxsw_sp_nexthop6_group_has_nexthop(const struct mlxsw_sp_nexthop_group *nh_grp, + const struct in6_addr *gw, int ifindex, + int weight) +{ + int i; + + for (i = 0; i < nh_grp->nhgi->count; i++) { + const struct mlxsw_sp_nexthop *nh; + + nh = &nh_grp->nhgi->nexthops[i]; + if (nh->ifindex == ifindex && nh->nh_weight == weight && + ipv6_addr_equal(gw, (struct in6_addr *) nh->gw_addr)) + return true; + } + + return false; +} + +static bool +mlxsw_sp_nexthop6_group_cmp(const struct mlxsw_sp_nexthop_group *nh_grp, + const struct mlxsw_sp_fib6_entry *fib6_entry) +{ + struct mlxsw_sp_rt6 *mlxsw_sp_rt6; + + if (nh_grp->nhgi->count != fib6_entry->nrt6) + return false; + + list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) { + struct fib6_nh *fib6_nh = mlxsw_sp_rt6->rt->fib6_nh; + struct in6_addr *gw; + int ifindex, weight; + + ifindex = fib6_nh->fib_nh_dev->ifindex; + weight = fib6_nh->fib_nh_weight; + gw = &fib6_nh->fib_nh_gw6; + if (!mlxsw_sp_nexthop6_group_has_nexthop(nh_grp, gw, ifindex, + weight)) + return false; + } + + return true; +} + +static int +mlxsw_sp_nexthop_group_cmp(struct rhashtable_compare_arg *arg, const void *ptr) +{ + const struct mlxsw_sp_nexthop_group_cmp_arg *cmp_arg = arg->key; + const struct mlxsw_sp_nexthop_group *nh_grp = ptr; + + if (nh_grp->type != cmp_arg->type) + return 1; + + switch (cmp_arg->type) { + case MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV4: + return cmp_arg->fi != nh_grp->ipv4.fi; + case MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV6: + return !mlxsw_sp_nexthop6_group_cmp(nh_grp, + cmp_arg->fib6_entry); + case MLXSW_SP_NEXTHOP_GROUP_TYPE_OBJ: + return cmp_arg->id != nh_grp->obj.id; + default: + WARN_ON(1); + return 1; + } +} + +static u32 mlxsw_sp_nexthop_group_hash_obj(const void *data, u32 len, u32 seed) +{ + const struct mlxsw_sp_nexthop_group *nh_grp = data; + const struct mlxsw_sp_nexthop *nh; + struct fib_info *fi; + unsigned int val; + int i; + + switch (nh_grp->type) { + case MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV4: + fi = nh_grp->ipv4.fi; + return jhash(&fi, sizeof(fi), seed); + case MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV6: + val = nh_grp->nhgi->count; + for (i = 0; i < nh_grp->nhgi->count; i++) { + nh = &nh_grp->nhgi->nexthops[i]; + val ^= jhash(&nh->ifindex, sizeof(nh->ifindex), seed); + val ^= jhash(&nh->gw_addr, sizeof(nh->gw_addr), seed); + } + return jhash(&val, sizeof(val), seed); + case MLXSW_SP_NEXTHOP_GROUP_TYPE_OBJ: + return jhash(&nh_grp->obj.id, sizeof(nh_grp->obj.id), seed); + default: + WARN_ON(1); + return 0; + } +} + +static u32 +mlxsw_sp_nexthop6_group_hash(struct mlxsw_sp_fib6_entry *fib6_entry, u32 seed) +{ + unsigned int val = fib6_entry->nrt6; + struct mlxsw_sp_rt6 *mlxsw_sp_rt6; + + list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) { + struct fib6_nh *fib6_nh = mlxsw_sp_rt6->rt->fib6_nh; + struct net_device *dev = fib6_nh->fib_nh_dev; + struct in6_addr *gw = &fib6_nh->fib_nh_gw6; + + val ^= jhash(&dev->ifindex, sizeof(dev->ifindex), seed); + val ^= jhash(gw, sizeof(*gw), seed); + } + + return jhash(&val, sizeof(val), seed); +} + +static u32 +mlxsw_sp_nexthop_group_hash(const void *data, u32 len, u32 seed) +{ + const struct mlxsw_sp_nexthop_group_cmp_arg *cmp_arg = data; + + switch (cmp_arg->type) { + case MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV4: + return jhash(&cmp_arg->fi, sizeof(cmp_arg->fi), seed); + case MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV6: + return mlxsw_sp_nexthop6_group_hash(cmp_arg->fib6_entry, seed); + case MLXSW_SP_NEXTHOP_GROUP_TYPE_OBJ: + return jhash(&cmp_arg->id, sizeof(cmp_arg->id), seed); + default: + WARN_ON(1); + return 0; + } +} + +static const struct rhashtable_params mlxsw_sp_nexthop_group_ht_params = { + .head_offset = offsetof(struct mlxsw_sp_nexthop_group, ht_node), + .hashfn = mlxsw_sp_nexthop_group_hash, + .obj_hashfn = mlxsw_sp_nexthop_group_hash_obj, + .obj_cmpfn = mlxsw_sp_nexthop_group_cmp, +}; + +static int mlxsw_sp_nexthop_group_insert(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop_group *nh_grp) +{ + if (nh_grp->type == MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV6 && + !nh_grp->nhgi->gateway) + return 0; + + return rhashtable_insert_fast(&mlxsw_sp->router->nexthop_group_ht, + &nh_grp->ht_node, + mlxsw_sp_nexthop_group_ht_params); +} + +static void mlxsw_sp_nexthop_group_remove(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop_group *nh_grp) +{ + if (nh_grp->type == MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV6 && + !nh_grp->nhgi->gateway) + return; + + rhashtable_remove_fast(&mlxsw_sp->router->nexthop_group_ht, + &nh_grp->ht_node, + mlxsw_sp_nexthop_group_ht_params); +} + +static struct mlxsw_sp_nexthop_group * +mlxsw_sp_nexthop4_group_lookup(struct mlxsw_sp *mlxsw_sp, + struct fib_info *fi) +{ + struct mlxsw_sp_nexthop_group_cmp_arg cmp_arg; + + cmp_arg.type = MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV4; + cmp_arg.fi = fi; + return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_group_ht, + &cmp_arg, + mlxsw_sp_nexthop_group_ht_params); +} + +static struct mlxsw_sp_nexthop_group * +mlxsw_sp_nexthop6_group_lookup(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib6_entry *fib6_entry) +{ + struct mlxsw_sp_nexthop_group_cmp_arg cmp_arg; + + cmp_arg.type = MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV6; + cmp_arg.fib6_entry = fib6_entry; + return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_group_ht, + &cmp_arg, + mlxsw_sp_nexthop_group_ht_params); +} + +static const struct rhashtable_params mlxsw_sp_nexthop_ht_params = { + .key_offset = offsetof(struct mlxsw_sp_nexthop, key), + .head_offset = offsetof(struct mlxsw_sp_nexthop, ht_node), + .key_len = sizeof(struct mlxsw_sp_nexthop_key), +}; + +static int mlxsw_sp_nexthop_insert(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop *nh) +{ + return rhashtable_insert_fast(&mlxsw_sp->router->nexthop_ht, + &nh->ht_node, mlxsw_sp_nexthop_ht_params); +} + +static void mlxsw_sp_nexthop_remove(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop *nh) +{ + rhashtable_remove_fast(&mlxsw_sp->router->nexthop_ht, &nh->ht_node, + mlxsw_sp_nexthop_ht_params); +} + +static struct mlxsw_sp_nexthop * +mlxsw_sp_nexthop_lookup(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop_key key) +{ + return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_ht, &key, + mlxsw_sp_nexthop_ht_params); +} + +static int mlxsw_sp_adj_index_mass_update_vr(struct mlxsw_sp *mlxsw_sp, + enum mlxsw_sp_l3proto proto, + u16 vr_id, + u32 adj_index, u16 ecmp_size, + u32 new_adj_index, + u16 new_ecmp_size) +{ + char raleu_pl[MLXSW_REG_RALEU_LEN]; + + mlxsw_reg_raleu_pack(raleu_pl, + (enum mlxsw_reg_ralxx_protocol) proto, vr_id, + adj_index, ecmp_size, new_adj_index, + new_ecmp_size); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raleu), raleu_pl); +} + +static int mlxsw_sp_adj_index_mass_update(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop_group *nh_grp, + u32 old_adj_index, u16 old_ecmp_size) +{ + struct mlxsw_sp_nexthop_group_info *nhgi = nh_grp->nhgi; + struct mlxsw_sp_nexthop_group_vr_entry *vr_entry; + int err; + + list_for_each_entry(vr_entry, &nh_grp->vr_list, list) { + err = mlxsw_sp_adj_index_mass_update_vr(mlxsw_sp, + vr_entry->key.proto, + vr_entry->key.vr_id, + old_adj_index, + old_ecmp_size, + nhgi->adj_index, + nhgi->ecmp_size); + if (err) + goto err_mass_update_vr; + } + return 0; + +err_mass_update_vr: + list_for_each_entry_continue_reverse(vr_entry, &nh_grp->vr_list, list) + mlxsw_sp_adj_index_mass_update_vr(mlxsw_sp, vr_entry->key.proto, + vr_entry->key.vr_id, + nhgi->adj_index, + nhgi->ecmp_size, + old_adj_index, old_ecmp_size); + return err; +} + +static int __mlxsw_sp_nexthop_eth_update(struct mlxsw_sp *mlxsw_sp, + u32 adj_index, + struct mlxsw_sp_nexthop *nh, + bool force, char *ratr_pl) +{ + struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry; + enum mlxsw_reg_ratr_op op; + u16 rif_index; + + rif_index = nh->rif ? nh->rif->rif_index : + mlxsw_sp->router->lb_rif_index; + op = force ? MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY : + MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY_ON_ACTIVITY; + mlxsw_reg_ratr_pack(ratr_pl, op, true, MLXSW_REG_RATR_TYPE_ETHERNET, + adj_index, rif_index); + switch (nh->action) { + case MLXSW_SP_NEXTHOP_ACTION_FORWARD: + mlxsw_reg_ratr_eth_entry_pack(ratr_pl, neigh_entry->ha); + break; + case MLXSW_SP_NEXTHOP_ACTION_DISCARD: + mlxsw_reg_ratr_trap_action_set(ratr_pl, + MLXSW_REG_RATR_TRAP_ACTION_DISCARD_ERRORS); + break; + case MLXSW_SP_NEXTHOP_ACTION_TRAP: + mlxsw_reg_ratr_trap_action_set(ratr_pl, + MLXSW_REG_RATR_TRAP_ACTION_TRAP); + mlxsw_reg_ratr_trap_id_set(ratr_pl, MLXSW_TRAP_ID_RTR_EGRESS0); + break; + default: + WARN_ON_ONCE(1); + return -EINVAL; + } + if (nh->counter_valid) + mlxsw_reg_ratr_counter_pack(ratr_pl, nh->counter_index, true); + else + mlxsw_reg_ratr_counter_pack(ratr_pl, 0, false); + + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ratr), ratr_pl); +} + +int mlxsw_sp_nexthop_eth_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index, + struct mlxsw_sp_nexthop *nh, bool force, + char *ratr_pl) +{ + int i; + + for (i = 0; i < nh->num_adj_entries; i++) { + int err; + + err = __mlxsw_sp_nexthop_eth_update(mlxsw_sp, adj_index + i, + nh, force, ratr_pl); + if (err) + return err; + } + + return 0; +} + +static int __mlxsw_sp_nexthop_ipip_update(struct mlxsw_sp *mlxsw_sp, + u32 adj_index, + struct mlxsw_sp_nexthop *nh, + bool force, char *ratr_pl) +{ + const struct mlxsw_sp_ipip_ops *ipip_ops; + + ipip_ops = mlxsw_sp->router->ipip_ops_arr[nh->ipip_entry->ipipt]; + return ipip_ops->nexthop_update(mlxsw_sp, adj_index, nh->ipip_entry, + force, ratr_pl); +} + +static int mlxsw_sp_nexthop_ipip_update(struct mlxsw_sp *mlxsw_sp, + u32 adj_index, + struct mlxsw_sp_nexthop *nh, bool force, + char *ratr_pl) +{ + int i; + + for (i = 0; i < nh->num_adj_entries; i++) { + int err; + + err = __mlxsw_sp_nexthop_ipip_update(mlxsw_sp, adj_index + i, + nh, force, ratr_pl); + if (err) + return err; + } + + return 0; +} + +static int mlxsw_sp_nexthop_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index, + struct mlxsw_sp_nexthop *nh, bool force, + char *ratr_pl) +{ + /* When action is discard or trap, the nexthop must be + * programmed as an Ethernet nexthop. + */ + if (nh->type == MLXSW_SP_NEXTHOP_TYPE_ETH || + nh->action == MLXSW_SP_NEXTHOP_ACTION_DISCARD || + nh->action == MLXSW_SP_NEXTHOP_ACTION_TRAP) + return mlxsw_sp_nexthop_eth_update(mlxsw_sp, adj_index, nh, + force, ratr_pl); + else + return mlxsw_sp_nexthop_ipip_update(mlxsw_sp, adj_index, nh, + force, ratr_pl); +} + +static int +mlxsw_sp_nexthop_group_update(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop_group_info *nhgi, + bool reallocate) +{ + char ratr_pl[MLXSW_REG_RATR_LEN]; + u32 adj_index = nhgi->adj_index; /* base */ + struct mlxsw_sp_nexthop *nh; + int i; + + for (i = 0; i < nhgi->count; i++) { + nh = &nhgi->nexthops[i]; + + if (!nh->should_offload) { + nh->offloaded = 0; + continue; + } + + if (nh->update || reallocate) { + int err = 0; + + err = mlxsw_sp_nexthop_update(mlxsw_sp, adj_index, nh, + true, ratr_pl); + if (err) + return err; + nh->update = 0; + nh->offloaded = 1; + } + adj_index += nh->num_adj_entries; + } + return 0; +} + +static int +mlxsw_sp_nexthop_fib_entries_update(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop_group *nh_grp) +{ + struct mlxsw_sp_fib_entry *fib_entry; + int err; + + list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) { + err = mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry); + if (err) + return err; + } + return 0; +} + +struct mlxsw_sp_adj_grp_size_range { + u16 start; /* Inclusive */ + u16 end; /* Inclusive */ +}; + +/* Ordered by range start value */ +static const struct mlxsw_sp_adj_grp_size_range +mlxsw_sp1_adj_grp_size_ranges[] = { + { .start = 1, .end = 64 }, + { .start = 512, .end = 512 }, + { .start = 1024, .end = 1024 }, + { .start = 2048, .end = 2048 }, + { .start = 4096, .end = 4096 }, +}; + +/* Ordered by range start value */ +static const struct mlxsw_sp_adj_grp_size_range +mlxsw_sp2_adj_grp_size_ranges[] = { + { .start = 1, .end = 128 }, + { .start = 256, .end = 256 }, + { .start = 512, .end = 512 }, + { .start = 1024, .end = 1024 }, + { .start = 2048, .end = 2048 }, + { .start = 4096, .end = 4096 }, +}; + +static void mlxsw_sp_adj_grp_size_round_up(const struct mlxsw_sp *mlxsw_sp, + u16 *p_adj_grp_size) +{ + int i; + + for (i = 0; i < mlxsw_sp->router->adj_grp_size_ranges_count; i++) { + const struct mlxsw_sp_adj_grp_size_range *size_range; + + size_range = &mlxsw_sp->router->adj_grp_size_ranges[i]; + + if (*p_adj_grp_size >= size_range->start && + *p_adj_grp_size <= size_range->end) + return; + + if (*p_adj_grp_size <= size_range->end) { + *p_adj_grp_size = size_range->end; + return; + } + } +} + +static void mlxsw_sp_adj_grp_size_round_down(const struct mlxsw_sp *mlxsw_sp, + u16 *p_adj_grp_size, + unsigned int alloc_size) +{ + int i; + + for (i = mlxsw_sp->router->adj_grp_size_ranges_count - 1; i >= 0; i--) { + const struct mlxsw_sp_adj_grp_size_range *size_range; + + size_range = &mlxsw_sp->router->adj_grp_size_ranges[i]; + + if (alloc_size >= size_range->end) { + *p_adj_grp_size = size_range->end; + return; + } + } +} + +static int mlxsw_sp_fix_adj_grp_size(struct mlxsw_sp *mlxsw_sp, + u16 *p_adj_grp_size) +{ + unsigned int alloc_size; + int err; + + /* Round up the requested group size to the next size supported + * by the device and make sure the request can be satisfied. + */ + mlxsw_sp_adj_grp_size_round_up(mlxsw_sp, p_adj_grp_size); + err = mlxsw_sp_kvdl_alloc_count_query(mlxsw_sp, + MLXSW_SP_KVDL_ENTRY_TYPE_ADJ, + *p_adj_grp_size, &alloc_size); + if (err) + return err; + /* It is possible the allocation results in more allocated + * entries than requested. Try to use as much of them as + * possible. + */ + mlxsw_sp_adj_grp_size_round_down(mlxsw_sp, p_adj_grp_size, alloc_size); + + return 0; +} + +static void +mlxsw_sp_nexthop_group_normalize(struct mlxsw_sp_nexthop_group_info *nhgi) +{ + int i, g = 0, sum_norm_weight = 0; + struct mlxsw_sp_nexthop *nh; + + for (i = 0; i < nhgi->count; i++) { + nh = &nhgi->nexthops[i]; + + if (!nh->should_offload) + continue; + if (g > 0) + g = gcd(nh->nh_weight, g); + else + g = nh->nh_weight; + } + + for (i = 0; i < nhgi->count; i++) { + nh = &nhgi->nexthops[i]; + + if (!nh->should_offload) + continue; + nh->norm_nh_weight = nh->nh_weight / g; + sum_norm_weight += nh->norm_nh_weight; + } + + nhgi->sum_norm_weight = sum_norm_weight; +} + +static void +mlxsw_sp_nexthop_group_rebalance(struct mlxsw_sp_nexthop_group_info *nhgi) +{ + int i, weight = 0, lower_bound = 0; + int total = nhgi->sum_norm_weight; + u16 ecmp_size = nhgi->ecmp_size; + + for (i = 0; i < nhgi->count; i++) { + struct mlxsw_sp_nexthop *nh = &nhgi->nexthops[i]; + int upper_bound; + + if (!nh->should_offload) + continue; + weight += nh->norm_nh_weight; + upper_bound = DIV_ROUND_CLOSEST(ecmp_size * weight, total); + nh->num_adj_entries = upper_bound - lower_bound; + lower_bound = upper_bound; + } +} + +static struct mlxsw_sp_nexthop * +mlxsw_sp_rt6_nexthop(struct mlxsw_sp_nexthop_group *nh_grp, + const struct mlxsw_sp_rt6 *mlxsw_sp_rt6); + +static void +mlxsw_sp_nexthop4_group_offload_refresh(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop_group *nh_grp) +{ + int i; + + for (i = 0; i < nh_grp->nhgi->count; i++) { + struct mlxsw_sp_nexthop *nh = &nh_grp->nhgi->nexthops[i]; + + if (nh->offloaded) + nh->key.fib_nh->fib_nh_flags |= RTNH_F_OFFLOAD; + else + nh->key.fib_nh->fib_nh_flags &= ~RTNH_F_OFFLOAD; + } +} + +static void +__mlxsw_sp_nexthop6_group_offload_refresh(struct mlxsw_sp_nexthop_group *nh_grp, + struct mlxsw_sp_fib6_entry *fib6_entry) +{ + struct mlxsw_sp_rt6 *mlxsw_sp_rt6; + + list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) { + struct fib6_nh *fib6_nh = mlxsw_sp_rt6->rt->fib6_nh; + struct mlxsw_sp_nexthop *nh; + + nh = mlxsw_sp_rt6_nexthop(nh_grp, mlxsw_sp_rt6); + if (nh && nh->offloaded) + fib6_nh->fib_nh_flags |= RTNH_F_OFFLOAD; + else + fib6_nh->fib_nh_flags &= ~RTNH_F_OFFLOAD; + } +} + +static void +mlxsw_sp_nexthop6_group_offload_refresh(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop_group *nh_grp) +{ + struct mlxsw_sp_fib6_entry *fib6_entry; + + /* Unfortunately, in IPv6 the route and the nexthop are described by + * the same struct, so we need to iterate over all the routes using the + * nexthop group and set / clear the offload indication for them. + */ + list_for_each_entry(fib6_entry, &nh_grp->fib_list, + common.nexthop_group_node) + __mlxsw_sp_nexthop6_group_offload_refresh(nh_grp, fib6_entry); +} + +static void +mlxsw_sp_nexthop_bucket_offload_refresh(struct mlxsw_sp *mlxsw_sp, + const struct mlxsw_sp_nexthop *nh, + u16 bucket_index) +{ + struct mlxsw_sp_nexthop_group *nh_grp = nh->nhgi->nh_grp; + bool offload = false, trap = false; + + if (nh->offloaded) { + if (nh->action == MLXSW_SP_NEXTHOP_ACTION_TRAP) + trap = true; + else + offload = true; + } + nexthop_bucket_set_hw_flags(mlxsw_sp_net(mlxsw_sp), nh_grp->obj.id, + bucket_index, offload, trap); +} + +static void +mlxsw_sp_nexthop_obj_group_offload_refresh(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop_group *nh_grp) +{ + int i; + + /* Do not update the flags if the nexthop group is being destroyed + * since: + * 1. The nexthop objects is being deleted, in which case the flags are + * irrelevant. + * 2. The nexthop group was replaced by a newer group, in which case + * the flags of the nexthop object were already updated based on the + * new group. + */ + if (nh_grp->can_destroy) + return; + + nexthop_set_hw_flags(mlxsw_sp_net(mlxsw_sp), nh_grp->obj.id, + nh_grp->nhgi->adj_index_valid, false); + + /* Update flags of individual nexthop buckets in case of a resilient + * nexthop group. + */ + if (!nh_grp->nhgi->is_resilient) + return; + + for (i = 0; i < nh_grp->nhgi->count; i++) { + struct mlxsw_sp_nexthop *nh = &nh_grp->nhgi->nexthops[i]; + + mlxsw_sp_nexthop_bucket_offload_refresh(mlxsw_sp, nh, i); + } +} + +static void +mlxsw_sp_nexthop_group_offload_refresh(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop_group *nh_grp) +{ + switch (nh_grp->type) { + case MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV4: + mlxsw_sp_nexthop4_group_offload_refresh(mlxsw_sp, nh_grp); + break; + case MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV6: + mlxsw_sp_nexthop6_group_offload_refresh(mlxsw_sp, nh_grp); + break; + case MLXSW_SP_NEXTHOP_GROUP_TYPE_OBJ: + mlxsw_sp_nexthop_obj_group_offload_refresh(mlxsw_sp, nh_grp); + break; + } +} + +static int +mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop_group *nh_grp) +{ + struct mlxsw_sp_nexthop_group_info *nhgi = nh_grp->nhgi; + u16 ecmp_size, old_ecmp_size; + struct mlxsw_sp_nexthop *nh; + bool offload_change = false; + u32 adj_index; + bool old_adj_index_valid; + u32 old_adj_index; + int i, err2, err; + + if (!nhgi->gateway) + return mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp); + + for (i = 0; i < nhgi->count; i++) { + nh = &nhgi->nexthops[i]; + + if (nh->should_offload != nh->offloaded) { + offload_change = true; + if (nh->should_offload) + nh->update = 1; + } + } + if (!offload_change) { + /* Nothing was added or removed, so no need to reallocate. Just + * update MAC on existing adjacency indexes. + */ + err = mlxsw_sp_nexthop_group_update(mlxsw_sp, nhgi, false); + if (err) { + dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n"); + goto set_trap; + } + /* Flags of individual nexthop buckets might need to be + * updated. + */ + mlxsw_sp_nexthop_group_offload_refresh(mlxsw_sp, nh_grp); + return 0; + } + mlxsw_sp_nexthop_group_normalize(nhgi); + if (!nhgi->sum_norm_weight) { + /* No neigh of this group is connected so we just set + * the trap and let everthing flow through kernel. + */ + err = 0; + goto set_trap; + } + + ecmp_size = nhgi->sum_norm_weight; + err = mlxsw_sp_fix_adj_grp_size(mlxsw_sp, &ecmp_size); + if (err) + /* No valid allocation size available. */ + goto set_trap; + + err = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ, + ecmp_size, &adj_index); + if (err) { + /* We ran out of KVD linear space, just set the + * trap and let everything flow through kernel. + */ + dev_warn(mlxsw_sp->bus_info->dev, "Failed to allocate KVD linear area for nexthop group.\n"); + goto set_trap; + } + old_adj_index_valid = nhgi->adj_index_valid; + old_adj_index = nhgi->adj_index; + old_ecmp_size = nhgi->ecmp_size; + nhgi->adj_index_valid = 1; + nhgi->adj_index = adj_index; + nhgi->ecmp_size = ecmp_size; + mlxsw_sp_nexthop_group_rebalance(nhgi); + err = mlxsw_sp_nexthop_group_update(mlxsw_sp, nhgi, true); + if (err) { + dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n"); + goto set_trap; + } + + mlxsw_sp_nexthop_group_offload_refresh(mlxsw_sp, nh_grp); + + if (!old_adj_index_valid) { + /* The trap was set for fib entries, so we have to call + * fib entry update to unset it and use adjacency index. + */ + err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp); + if (err) { + dev_warn(mlxsw_sp->bus_info->dev, "Failed to add adjacency index to fib entries.\n"); + goto set_trap; + } + return 0; + } + + err = mlxsw_sp_adj_index_mass_update(mlxsw_sp, nh_grp, + old_adj_index, old_ecmp_size); + mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ, + old_ecmp_size, old_adj_index); + if (err) { + dev_warn(mlxsw_sp->bus_info->dev, "Failed to mass-update adjacency index for nexthop group.\n"); + goto set_trap; + } + + return 0; + +set_trap: + old_adj_index_valid = nhgi->adj_index_valid; + nhgi->adj_index_valid = 0; + for (i = 0; i < nhgi->count; i++) { + nh = &nhgi->nexthops[i]; + nh->offloaded = 0; + } + err2 = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp); + if (err2) + dev_warn(mlxsw_sp->bus_info->dev, "Failed to set traps for fib entries.\n"); + mlxsw_sp_nexthop_group_offload_refresh(mlxsw_sp, nh_grp); + if (old_adj_index_valid) + mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ, + nhgi->ecmp_size, nhgi->adj_index); + return err; +} + +static void __mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp_nexthop *nh, + bool removing) +{ + if (!removing) { + nh->action = MLXSW_SP_NEXTHOP_ACTION_FORWARD; + nh->should_offload = 1; + } else if (nh->nhgi->is_resilient) { + nh->action = MLXSW_SP_NEXTHOP_ACTION_TRAP; + nh->should_offload = 1; + } else { + nh->should_offload = 0; + } + nh->update = 1; +} + +static int +mlxsw_sp_nexthop_dead_neigh_replace(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_neigh_entry *neigh_entry) +{ + struct neighbour *n, *old_n = neigh_entry->key.n; + struct mlxsw_sp_nexthop *nh; + bool entry_connected; + u8 nud_state, dead; + int err; + + nh = list_first_entry(&neigh_entry->nexthop_list, + struct mlxsw_sp_nexthop, neigh_list_node); + + n = neigh_lookup(nh->neigh_tbl, &nh->gw_addr, nh->rif->dev); + if (!n) { + n = neigh_create(nh->neigh_tbl, &nh->gw_addr, nh->rif->dev); + if (IS_ERR(n)) + return PTR_ERR(n); + neigh_event_send(n, NULL); + } + + mlxsw_sp_neigh_entry_remove(mlxsw_sp, neigh_entry); + neigh_entry->key.n = n; + err = mlxsw_sp_neigh_entry_insert(mlxsw_sp, neigh_entry); + if (err) + goto err_neigh_entry_insert; + + read_lock_bh(&n->lock); + nud_state = n->nud_state; + dead = n->dead; + read_unlock_bh(&n->lock); + entry_connected = nud_state & NUD_VALID && !dead; + + list_for_each_entry(nh, &neigh_entry->nexthop_list, + neigh_list_node) { + neigh_release(old_n); + neigh_clone(n); + __mlxsw_sp_nexthop_neigh_update(nh, !entry_connected); + mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nhgi->nh_grp); + } + + neigh_release(n); + + return 0; + +err_neigh_entry_insert: + neigh_entry->key.n = old_n; + mlxsw_sp_neigh_entry_insert(mlxsw_sp, neigh_entry); + neigh_release(n); + return err; +} + +static void +mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_neigh_entry *neigh_entry, + bool removing, bool dead) +{ + struct mlxsw_sp_nexthop *nh; + + if (list_empty(&neigh_entry->nexthop_list)) + return; + + if (dead) { + int err; + + err = mlxsw_sp_nexthop_dead_neigh_replace(mlxsw_sp, + neigh_entry); + if (err) + dev_err(mlxsw_sp->bus_info->dev, "Failed to replace dead neigh\n"); + return; + } + + list_for_each_entry(nh, &neigh_entry->nexthop_list, + neigh_list_node) { + __mlxsw_sp_nexthop_neigh_update(nh, removing); + mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nhgi->nh_grp); + } +} + +static void mlxsw_sp_nexthop_rif_init(struct mlxsw_sp_nexthop *nh, + struct mlxsw_sp_rif *rif) +{ + if (nh->rif) + return; + + nh->rif = rif; + list_add(&nh->rif_list_node, &rif->nexthop_list); +} + +static void mlxsw_sp_nexthop_rif_fini(struct mlxsw_sp_nexthop *nh) +{ + if (!nh->rif) + return; + + list_del(&nh->rif_list_node); + nh->rif = NULL; +} + +static int mlxsw_sp_nexthop_neigh_init(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop *nh) +{ + struct mlxsw_sp_neigh_entry *neigh_entry; + struct neighbour *n; + u8 nud_state, dead; + int err; + + if (!nh->nhgi->gateway || nh->neigh_entry) + return 0; + + /* Take a reference of neigh here ensuring that neigh would + * not be destructed before the nexthop entry is finished. + * The reference is taken either in neigh_lookup() or + * in neigh_create() in case n is not found. + */ + n = neigh_lookup(nh->neigh_tbl, &nh->gw_addr, nh->rif->dev); + if (!n) { + n = neigh_create(nh->neigh_tbl, &nh->gw_addr, nh->rif->dev); + if (IS_ERR(n)) + return PTR_ERR(n); + neigh_event_send(n, NULL); + } + neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n); + if (!neigh_entry) { + neigh_entry = mlxsw_sp_neigh_entry_create(mlxsw_sp, n); + if (IS_ERR(neigh_entry)) { + err = -EINVAL; + goto err_neigh_entry_create; + } + } + + /* If that is the first nexthop connected to that neigh, add to + * nexthop_neighs_list + */ + if (list_empty(&neigh_entry->nexthop_list)) + list_add_tail(&neigh_entry->nexthop_neighs_list_node, + &mlxsw_sp->router->nexthop_neighs_list); + + nh->neigh_entry = neigh_entry; + list_add_tail(&nh->neigh_list_node, &neigh_entry->nexthop_list); + read_lock_bh(&n->lock); + nud_state = n->nud_state; + dead = n->dead; + read_unlock_bh(&n->lock); + __mlxsw_sp_nexthop_neigh_update(nh, !(nud_state & NUD_VALID && !dead)); + + return 0; + +err_neigh_entry_create: + neigh_release(n); + return err; +} + +static void mlxsw_sp_nexthop_neigh_fini(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop *nh) +{ + struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry; + struct neighbour *n; + + if (!neigh_entry) + return; + n = neigh_entry->key.n; + + __mlxsw_sp_nexthop_neigh_update(nh, true); + list_del(&nh->neigh_list_node); + nh->neigh_entry = NULL; + + /* If that is the last nexthop connected to that neigh, remove from + * nexthop_neighs_list + */ + if (list_empty(&neigh_entry->nexthop_list)) + list_del(&neigh_entry->nexthop_neighs_list_node); + + if (!neigh_entry->connected && list_empty(&neigh_entry->nexthop_list)) + mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry); + + neigh_release(n); +} + +static bool mlxsw_sp_ipip_netdev_ul_up(struct net_device *ol_dev) +{ + struct net_device *ul_dev; + bool is_up; + + rcu_read_lock(); + ul_dev = mlxsw_sp_ipip_netdev_ul_dev_get(ol_dev); + is_up = ul_dev ? (ul_dev->flags & IFF_UP) : true; + rcu_read_unlock(); + + return is_up; +} + +static void mlxsw_sp_nexthop_ipip_init(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop *nh, + struct mlxsw_sp_ipip_entry *ipip_entry) +{ + bool removing; + + if (!nh->nhgi->gateway || nh->ipip_entry) + return; + + nh->ipip_entry = ipip_entry; + removing = !mlxsw_sp_ipip_netdev_ul_up(ipip_entry->ol_dev); + __mlxsw_sp_nexthop_neigh_update(nh, removing); + mlxsw_sp_nexthop_rif_init(nh, &ipip_entry->ol_lb->common); +} + +static void mlxsw_sp_nexthop_ipip_fini(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop *nh) +{ + struct mlxsw_sp_ipip_entry *ipip_entry = nh->ipip_entry; + + if (!ipip_entry) + return; + + __mlxsw_sp_nexthop_neigh_update(nh, true); + nh->ipip_entry = NULL; +} + +static bool mlxsw_sp_nexthop4_ipip_type(const struct mlxsw_sp *mlxsw_sp, + const struct fib_nh *fib_nh, + enum mlxsw_sp_ipip_type *p_ipipt) +{ + struct net_device *dev = fib_nh->fib_nh_dev; + + return dev && + fib_nh->nh_parent->fib_type == RTN_UNICAST && + mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, p_ipipt); +} + +static int mlxsw_sp_nexthop_type_init(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop *nh, + const struct net_device *dev) +{ + const struct mlxsw_sp_ipip_ops *ipip_ops; + struct mlxsw_sp_ipip_entry *ipip_entry; + struct mlxsw_sp_rif *rif; + int err; + + ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, dev); + if (ipip_entry) { + ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt]; + if (ipip_ops->can_offload(mlxsw_sp, dev)) { + nh->type = MLXSW_SP_NEXTHOP_TYPE_IPIP; + mlxsw_sp_nexthop_ipip_init(mlxsw_sp, nh, ipip_entry); + return 0; + } + } + + nh->type = MLXSW_SP_NEXTHOP_TYPE_ETH; + rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev); + if (!rif) + return 0; + + mlxsw_sp_nexthop_rif_init(nh, rif); + err = mlxsw_sp_nexthop_neigh_init(mlxsw_sp, nh); + if (err) + goto err_neigh_init; + + return 0; + +err_neigh_init: + mlxsw_sp_nexthop_rif_fini(nh); + return err; +} + +static void mlxsw_sp_nexthop_type_fini(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop *nh) +{ + switch (nh->type) { + case MLXSW_SP_NEXTHOP_TYPE_ETH: + mlxsw_sp_nexthop_neigh_fini(mlxsw_sp, nh); + mlxsw_sp_nexthop_rif_fini(nh); + break; + case MLXSW_SP_NEXTHOP_TYPE_IPIP: + mlxsw_sp_nexthop_rif_fini(nh); + mlxsw_sp_nexthop_ipip_fini(mlxsw_sp, nh); + break; + } +} + +static int mlxsw_sp_nexthop4_init(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop_group *nh_grp, + struct mlxsw_sp_nexthop *nh, + struct fib_nh *fib_nh) +{ + struct net_device *dev = fib_nh->fib_nh_dev; + struct in_device *in_dev; + int err; + + nh->nhgi = nh_grp->nhgi; + nh->key.fib_nh = fib_nh; +#ifdef CONFIG_IP_ROUTE_MULTIPATH + nh->nh_weight = fib_nh->fib_nh_weight; +#else + nh->nh_weight = 1; +#endif + memcpy(&nh->gw_addr, &fib_nh->fib_nh_gw4, sizeof(fib_nh->fib_nh_gw4)); + nh->neigh_tbl = &arp_tbl; + err = mlxsw_sp_nexthop_insert(mlxsw_sp, nh); + if (err) + return err; + + mlxsw_sp_nexthop_counter_alloc(mlxsw_sp, nh); + list_add_tail(&nh->router_list_node, &mlxsw_sp->router->nexthop_list); + + if (!dev) + return 0; + nh->ifindex = dev->ifindex; + + rcu_read_lock(); + in_dev = __in_dev_get_rcu(dev); + if (in_dev && IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) && + fib_nh->fib_nh_flags & RTNH_F_LINKDOWN) { + rcu_read_unlock(); + return 0; + } + rcu_read_unlock(); + + err = mlxsw_sp_nexthop_type_init(mlxsw_sp, nh, dev); + if (err) + goto err_nexthop_neigh_init; + + return 0; + +err_nexthop_neigh_init: + list_del(&nh->router_list_node); + mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh); + mlxsw_sp_nexthop_remove(mlxsw_sp, nh); + return err; +} + +static void mlxsw_sp_nexthop4_fini(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop *nh) +{ + mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh); + list_del(&nh->router_list_node); + mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh); + mlxsw_sp_nexthop_remove(mlxsw_sp, nh); +} + +static void mlxsw_sp_nexthop4_event(struct mlxsw_sp *mlxsw_sp, + unsigned long event, struct fib_nh *fib_nh) +{ + struct mlxsw_sp_nexthop_key key; + struct mlxsw_sp_nexthop *nh; + + key.fib_nh = fib_nh; + nh = mlxsw_sp_nexthop_lookup(mlxsw_sp, key); + if (!nh) + return; + + switch (event) { + case FIB_EVENT_NH_ADD: + mlxsw_sp_nexthop_type_init(mlxsw_sp, nh, fib_nh->fib_nh_dev); + break; + case FIB_EVENT_NH_DEL: + mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh); + break; + } + + mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nhgi->nh_grp); +} + +static void mlxsw_sp_nexthop_rif_update(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_rif *rif) +{ + struct mlxsw_sp_nexthop *nh; + bool removing; + + list_for_each_entry(nh, &rif->nexthop_list, rif_list_node) { + switch (nh->type) { + case MLXSW_SP_NEXTHOP_TYPE_ETH: + removing = false; + break; + case MLXSW_SP_NEXTHOP_TYPE_IPIP: + removing = !mlxsw_sp_ipip_netdev_ul_up(rif->dev); + break; + default: + WARN_ON(1); + continue; + } + + __mlxsw_sp_nexthop_neigh_update(nh, removing); + mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nhgi->nh_grp); + } +} + +static void mlxsw_sp_nexthop_rif_migrate(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_rif *old_rif, + struct mlxsw_sp_rif *new_rif) +{ + struct mlxsw_sp_nexthop *nh; + + list_splice_init(&old_rif->nexthop_list, &new_rif->nexthop_list); + list_for_each_entry(nh, &new_rif->nexthop_list, rif_list_node) + nh->rif = new_rif; + mlxsw_sp_nexthop_rif_update(mlxsw_sp, new_rif); +} + +static void mlxsw_sp_nexthop_rif_gone_sync(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_rif *rif) +{ + struct mlxsw_sp_nexthop *nh, *tmp; + + list_for_each_entry_safe(nh, tmp, &rif->nexthop_list, rif_list_node) { + mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh); + mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nhgi->nh_grp); + } +} + +static int mlxsw_sp_adj_trap_entry_init(struct mlxsw_sp *mlxsw_sp) +{ + enum mlxsw_reg_ratr_trap_action trap_action; + char ratr_pl[MLXSW_REG_RATR_LEN]; + int err; + + err = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ, 1, + &mlxsw_sp->router->adj_trap_index); + if (err) + return err; + + trap_action = MLXSW_REG_RATR_TRAP_ACTION_TRAP; + mlxsw_reg_ratr_pack(ratr_pl, MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY, true, + MLXSW_REG_RATR_TYPE_ETHERNET, + mlxsw_sp->router->adj_trap_index, + mlxsw_sp->router->lb_rif_index); + mlxsw_reg_ratr_trap_action_set(ratr_pl, trap_action); + mlxsw_reg_ratr_trap_id_set(ratr_pl, MLXSW_TRAP_ID_RTR_EGRESS0); + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ratr), ratr_pl); + if (err) + goto err_ratr_write; + + return 0; + +err_ratr_write: + mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ, 1, + mlxsw_sp->router->adj_trap_index); + return err; +} + +static void mlxsw_sp_adj_trap_entry_fini(struct mlxsw_sp *mlxsw_sp) +{ + mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ, 1, + mlxsw_sp->router->adj_trap_index); +} + +static int mlxsw_sp_nexthop_group_inc(struct mlxsw_sp *mlxsw_sp) +{ + int err; + + if (refcount_inc_not_zero(&mlxsw_sp->router->num_groups)) + return 0; + + err = mlxsw_sp_adj_trap_entry_init(mlxsw_sp); + if (err) + return err; + + refcount_set(&mlxsw_sp->router->num_groups, 1); + + return 0; +} + +static void mlxsw_sp_nexthop_group_dec(struct mlxsw_sp *mlxsw_sp) +{ + if (!refcount_dec_and_test(&mlxsw_sp->router->num_groups)) + return; + + mlxsw_sp_adj_trap_entry_fini(mlxsw_sp); +} + +static void +mlxsw_sp_nh_grp_activity_get(struct mlxsw_sp *mlxsw_sp, + const struct mlxsw_sp_nexthop_group *nh_grp, + unsigned long *activity) +{ + char *ratrad_pl; + int i, err; + + ratrad_pl = kmalloc(MLXSW_REG_RATRAD_LEN, GFP_KERNEL); + if (!ratrad_pl) + return; + + mlxsw_reg_ratrad_pack(ratrad_pl, nh_grp->nhgi->adj_index, + nh_grp->nhgi->count); + err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ratrad), ratrad_pl); + if (err) + goto out; + + for (i = 0; i < nh_grp->nhgi->count; i++) { + if (!mlxsw_reg_ratrad_activity_vector_get(ratrad_pl, i)) + continue; + bitmap_set(activity, i, 1); + } + +out: + kfree(ratrad_pl); +} + +#define MLXSW_SP_NH_GRP_ACTIVITY_UPDATE_INTERVAL 1000 /* ms */ + +static void +mlxsw_sp_nh_grp_activity_update(struct mlxsw_sp *mlxsw_sp, + const struct mlxsw_sp_nexthop_group *nh_grp) +{ + unsigned long *activity; + + activity = bitmap_zalloc(nh_grp->nhgi->count, GFP_KERNEL); + if (!activity) + return; + + mlxsw_sp_nh_grp_activity_get(mlxsw_sp, nh_grp, activity); + nexthop_res_grp_activity_update(mlxsw_sp_net(mlxsw_sp), nh_grp->obj.id, + nh_grp->nhgi->count, activity); + + bitmap_free(activity); +} + +static void +mlxsw_sp_nh_grp_activity_work_schedule(struct mlxsw_sp *mlxsw_sp) +{ + unsigned int interval = MLXSW_SP_NH_GRP_ACTIVITY_UPDATE_INTERVAL; + + mlxsw_core_schedule_dw(&mlxsw_sp->router->nh_grp_activity_dw, + msecs_to_jiffies(interval)); +} + +static void mlxsw_sp_nh_grp_activity_work(struct work_struct *work) +{ + struct mlxsw_sp_nexthop_group_info *nhgi; + struct mlxsw_sp_router *router; + bool reschedule = false; + + router = container_of(work, struct mlxsw_sp_router, + nh_grp_activity_dw.work); + + mutex_lock(&router->lock); + + list_for_each_entry(nhgi, &router->nh_res_grp_list, list) { + mlxsw_sp_nh_grp_activity_update(router->mlxsw_sp, nhgi->nh_grp); + reschedule = true; + } + + mutex_unlock(&router->lock); + + if (!reschedule) + return; + mlxsw_sp_nh_grp_activity_work_schedule(router->mlxsw_sp); +} + +static int +mlxsw_sp_nexthop_obj_single_validate(struct mlxsw_sp *mlxsw_sp, + const struct nh_notifier_single_info *nh, + struct netlink_ext_ack *extack) +{ + int err = -EINVAL; + + if (nh->is_fdb) + NL_SET_ERR_MSG_MOD(extack, "FDB nexthops are not supported"); + else if (nh->has_encap) + NL_SET_ERR_MSG_MOD(extack, "Encapsulating nexthops are not supported"); + else + err = 0; + + return err; +} + +static int +mlxsw_sp_nexthop_obj_group_entry_validate(struct mlxsw_sp *mlxsw_sp, + const struct nh_notifier_single_info *nh, + struct netlink_ext_ack *extack) +{ + int err; + + err = mlxsw_sp_nexthop_obj_single_validate(mlxsw_sp, nh, extack); + if (err) + return err; + + /* Device only nexthops with an IPIP device are programmed as + * encapsulating adjacency entries. + */ + if (!nh->gw_family && !nh->is_reject && + !mlxsw_sp_netdev_ipip_type(mlxsw_sp, nh->dev, NULL)) { + NL_SET_ERR_MSG_MOD(extack, "Nexthop group entry does not have a gateway"); + return -EINVAL; + } + + return 0; +} + +static int +mlxsw_sp_nexthop_obj_group_validate(struct mlxsw_sp *mlxsw_sp, + const struct nh_notifier_grp_info *nh_grp, + struct netlink_ext_ack *extack) +{ + int i; + + if (nh_grp->is_fdb) { + NL_SET_ERR_MSG_MOD(extack, "FDB nexthop groups are not supported"); + return -EINVAL; + } + + for (i = 0; i < nh_grp->num_nh; i++) { + const struct nh_notifier_single_info *nh; + int err; + + nh = &nh_grp->nh_entries[i].nh; + err = mlxsw_sp_nexthop_obj_group_entry_validate(mlxsw_sp, nh, + extack); + if (err) + return err; + } + + return 0; +} + +static int +mlxsw_sp_nexthop_obj_res_group_size_validate(struct mlxsw_sp *mlxsw_sp, + const struct nh_notifier_res_table_info *nh_res_table, + struct netlink_ext_ack *extack) +{ + unsigned int alloc_size; + bool valid_size = false; + int err, i; + + if (nh_res_table->num_nh_buckets < 32) { + NL_SET_ERR_MSG_MOD(extack, "Minimum number of buckets is 32"); + return -EINVAL; + } + + for (i = 0; i < mlxsw_sp->router->adj_grp_size_ranges_count; i++) { + const struct mlxsw_sp_adj_grp_size_range *size_range; + + size_range = &mlxsw_sp->router->adj_grp_size_ranges[i]; + + if (nh_res_table->num_nh_buckets >= size_range->start && + nh_res_table->num_nh_buckets <= size_range->end) { + valid_size = true; + break; + } + } + + if (!valid_size) { + NL_SET_ERR_MSG_MOD(extack, "Invalid number of buckets"); + return -EINVAL; + } + + err = mlxsw_sp_kvdl_alloc_count_query(mlxsw_sp, + MLXSW_SP_KVDL_ENTRY_TYPE_ADJ, + nh_res_table->num_nh_buckets, + &alloc_size); + if (err || nh_res_table->num_nh_buckets != alloc_size) { + NL_SET_ERR_MSG_MOD(extack, "Number of buckets does not fit allocation size of any KVDL partition"); + return -EINVAL; + } + + return 0; +} + +static int +mlxsw_sp_nexthop_obj_res_group_validate(struct mlxsw_sp *mlxsw_sp, + const struct nh_notifier_res_table_info *nh_res_table, + struct netlink_ext_ack *extack) +{ + int err; + u16 i; + + err = mlxsw_sp_nexthop_obj_res_group_size_validate(mlxsw_sp, + nh_res_table, + extack); + if (err) + return err; + + for (i = 0; i < nh_res_table->num_nh_buckets; i++) { + const struct nh_notifier_single_info *nh; + int err; + + nh = &nh_res_table->nhs[i]; + err = mlxsw_sp_nexthop_obj_group_entry_validate(mlxsw_sp, nh, + extack); + if (err) + return err; + } + + return 0; +} + +static int mlxsw_sp_nexthop_obj_validate(struct mlxsw_sp *mlxsw_sp, + unsigned long event, + struct nh_notifier_info *info) +{ + struct nh_notifier_single_info *nh; + + if (event != NEXTHOP_EVENT_REPLACE && + event != NEXTHOP_EVENT_RES_TABLE_PRE_REPLACE && + event != NEXTHOP_EVENT_BUCKET_REPLACE) + return 0; + + switch (info->type) { + case NH_NOTIFIER_INFO_TYPE_SINGLE: + return mlxsw_sp_nexthop_obj_single_validate(mlxsw_sp, info->nh, + info->extack); + case NH_NOTIFIER_INFO_TYPE_GRP: + return mlxsw_sp_nexthop_obj_group_validate(mlxsw_sp, + info->nh_grp, + info->extack); + case NH_NOTIFIER_INFO_TYPE_RES_TABLE: + return mlxsw_sp_nexthop_obj_res_group_validate(mlxsw_sp, + info->nh_res_table, + info->extack); + case NH_NOTIFIER_INFO_TYPE_RES_BUCKET: + nh = &info->nh_res_bucket->new_nh; + return mlxsw_sp_nexthop_obj_group_entry_validate(mlxsw_sp, nh, + info->extack); + default: + NL_SET_ERR_MSG_MOD(info->extack, "Unsupported nexthop type"); + return -EOPNOTSUPP; + } +} + +static bool mlxsw_sp_nexthop_obj_is_gateway(struct mlxsw_sp *mlxsw_sp, + const struct nh_notifier_info *info) +{ + const struct net_device *dev; + + switch (info->type) { + case NH_NOTIFIER_INFO_TYPE_SINGLE: + dev = info->nh->dev; + return info->nh->gw_family || info->nh->is_reject || + mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, NULL); + case NH_NOTIFIER_INFO_TYPE_GRP: + case NH_NOTIFIER_INFO_TYPE_RES_TABLE: + /* Already validated earlier. */ + return true; + default: + return false; + } +} + +static void mlxsw_sp_nexthop_obj_blackhole_init(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop *nh) +{ + u16 lb_rif_index = mlxsw_sp->router->lb_rif_index; + + nh->action = MLXSW_SP_NEXTHOP_ACTION_DISCARD; + nh->should_offload = 1; + /* While nexthops that discard packets do not forward packets + * via an egress RIF, they still need to be programmed using a + * valid RIF, so use the loopback RIF created during init. + */ + nh->rif = mlxsw_sp->router->rifs[lb_rif_index]; +} + +static void mlxsw_sp_nexthop_obj_blackhole_fini(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop *nh) +{ + nh->rif = NULL; + nh->should_offload = 0; +} + +static int +mlxsw_sp_nexthop_obj_init(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop_group *nh_grp, + struct mlxsw_sp_nexthop *nh, + struct nh_notifier_single_info *nh_obj, int weight) +{ + struct net_device *dev = nh_obj->dev; + int err; + + nh->nhgi = nh_grp->nhgi; + nh->nh_weight = weight; + + switch (nh_obj->gw_family) { + case AF_INET: + memcpy(&nh->gw_addr, &nh_obj->ipv4, sizeof(nh_obj->ipv4)); + nh->neigh_tbl = &arp_tbl; + break; + case AF_INET6: + memcpy(&nh->gw_addr, &nh_obj->ipv6, sizeof(nh_obj->ipv6)); +#if IS_ENABLED(CONFIG_IPV6) + nh->neigh_tbl = &nd_tbl; +#endif + break; + } + + mlxsw_sp_nexthop_counter_alloc(mlxsw_sp, nh); + list_add_tail(&nh->router_list_node, &mlxsw_sp->router->nexthop_list); + nh->ifindex = dev->ifindex; + + err = mlxsw_sp_nexthop_type_init(mlxsw_sp, nh, dev); + if (err) + goto err_type_init; + + if (nh_obj->is_reject) + mlxsw_sp_nexthop_obj_blackhole_init(mlxsw_sp, nh); + + /* In a resilient nexthop group, all the nexthops must be written to + * the adjacency table. Even if they do not have a valid neighbour or + * RIF. + */ + if (nh_grp->nhgi->is_resilient && !nh->should_offload) { + nh->action = MLXSW_SP_NEXTHOP_ACTION_TRAP; + nh->should_offload = 1; + } + + return 0; + +err_type_init: + list_del(&nh->router_list_node); + mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh); + return err; +} + +static void mlxsw_sp_nexthop_obj_fini(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop *nh) +{ + if (nh->action == MLXSW_SP_NEXTHOP_ACTION_DISCARD) + mlxsw_sp_nexthop_obj_blackhole_fini(mlxsw_sp, nh); + mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh); + list_del(&nh->router_list_node); + mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh); + nh->should_offload = 0; +} + +static int +mlxsw_sp_nexthop_obj_group_info_init(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop_group *nh_grp, + struct nh_notifier_info *info) +{ + struct mlxsw_sp_nexthop_group_info *nhgi; + struct mlxsw_sp_nexthop *nh; + bool is_resilient = false; + unsigned int nhs; + int err, i; + + switch (info->type) { + case NH_NOTIFIER_INFO_TYPE_SINGLE: + nhs = 1; + break; + case NH_NOTIFIER_INFO_TYPE_GRP: + nhs = info->nh_grp->num_nh; + break; + case NH_NOTIFIER_INFO_TYPE_RES_TABLE: + nhs = info->nh_res_table->num_nh_buckets; + is_resilient = true; + break; + default: + return -EINVAL; + } + + nhgi = kzalloc(struct_size(nhgi, nexthops, nhs), GFP_KERNEL); + if (!nhgi) + return -ENOMEM; + nh_grp->nhgi = nhgi; + nhgi->nh_grp = nh_grp; + nhgi->gateway = mlxsw_sp_nexthop_obj_is_gateway(mlxsw_sp, info); + nhgi->is_resilient = is_resilient; + nhgi->count = nhs; + for (i = 0; i < nhgi->count; i++) { + struct nh_notifier_single_info *nh_obj; + int weight; + + nh = &nhgi->nexthops[i]; + switch (info->type) { + case NH_NOTIFIER_INFO_TYPE_SINGLE: + nh_obj = info->nh; + weight = 1; + break; + case NH_NOTIFIER_INFO_TYPE_GRP: + nh_obj = &info->nh_grp->nh_entries[i].nh; + weight = info->nh_grp->nh_entries[i].weight; + break; + case NH_NOTIFIER_INFO_TYPE_RES_TABLE: + nh_obj = &info->nh_res_table->nhs[i]; + weight = 1; + break; + default: + err = -EINVAL; + goto err_nexthop_obj_init; + } + err = mlxsw_sp_nexthop_obj_init(mlxsw_sp, nh_grp, nh, nh_obj, + weight); + if (err) + goto err_nexthop_obj_init; + } + err = mlxsw_sp_nexthop_group_inc(mlxsw_sp); + if (err) + goto err_group_inc; + err = mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp); + if (err) { + NL_SET_ERR_MSG_MOD(info->extack, "Failed to write adjacency entries to the device"); + goto err_group_refresh; + } + + /* Add resilient nexthop groups to a list so that the activity of their + * nexthop buckets will be periodically queried and cleared. + */ + if (nhgi->is_resilient) { + if (list_empty(&mlxsw_sp->router->nh_res_grp_list)) + mlxsw_sp_nh_grp_activity_work_schedule(mlxsw_sp); + list_add(&nhgi->list, &mlxsw_sp->router->nh_res_grp_list); + } + + return 0; + +err_group_refresh: + mlxsw_sp_nexthop_group_dec(mlxsw_sp); +err_group_inc: + i = nhgi->count; +err_nexthop_obj_init: + for (i--; i >= 0; i--) { + nh = &nhgi->nexthops[i]; + mlxsw_sp_nexthop_obj_fini(mlxsw_sp, nh); + } + kfree(nhgi); + return err; +} + +static void +mlxsw_sp_nexthop_obj_group_info_fini(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop_group *nh_grp) +{ + struct mlxsw_sp_nexthop_group_info *nhgi = nh_grp->nhgi; + struct mlxsw_sp_router *router = mlxsw_sp->router; + int i; + + if (nhgi->is_resilient) { + list_del(&nhgi->list); + if (list_empty(&mlxsw_sp->router->nh_res_grp_list)) + cancel_delayed_work(&router->nh_grp_activity_dw); + } + + mlxsw_sp_nexthop_group_dec(mlxsw_sp); + for (i = nhgi->count - 1; i >= 0; i--) { + struct mlxsw_sp_nexthop *nh = &nhgi->nexthops[i]; + + mlxsw_sp_nexthop_obj_fini(mlxsw_sp, nh); + } + mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp); + WARN_ON_ONCE(nhgi->adj_index_valid); + kfree(nhgi); +} + +static struct mlxsw_sp_nexthop_group * +mlxsw_sp_nexthop_obj_group_create(struct mlxsw_sp *mlxsw_sp, + struct nh_notifier_info *info) +{ + struct mlxsw_sp_nexthop_group *nh_grp; + int err; + + nh_grp = kzalloc(sizeof(*nh_grp), GFP_KERNEL); + if (!nh_grp) + return ERR_PTR(-ENOMEM); + INIT_LIST_HEAD(&nh_grp->vr_list); + err = rhashtable_init(&nh_grp->vr_ht, + &mlxsw_sp_nexthop_group_vr_ht_params); + if (err) + goto err_nexthop_group_vr_ht_init; + INIT_LIST_HEAD(&nh_grp->fib_list); + nh_grp->type = MLXSW_SP_NEXTHOP_GROUP_TYPE_OBJ; + nh_grp->obj.id = info->id; + + err = mlxsw_sp_nexthop_obj_group_info_init(mlxsw_sp, nh_grp, info); + if (err) + goto err_nexthop_group_info_init; + + nh_grp->can_destroy = false; + + return nh_grp; + +err_nexthop_group_info_init: + rhashtable_destroy(&nh_grp->vr_ht); +err_nexthop_group_vr_ht_init: + kfree(nh_grp); + return ERR_PTR(err); +} + +static void +mlxsw_sp_nexthop_obj_group_destroy(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop_group *nh_grp) +{ + if (!nh_grp->can_destroy) + return; + mlxsw_sp_nexthop_obj_group_info_fini(mlxsw_sp, nh_grp); + WARN_ON_ONCE(!list_empty(&nh_grp->fib_list)); + WARN_ON_ONCE(!list_empty(&nh_grp->vr_list)); + rhashtable_destroy(&nh_grp->vr_ht); + kfree(nh_grp); +} + +static struct mlxsw_sp_nexthop_group * +mlxsw_sp_nexthop_obj_group_lookup(struct mlxsw_sp *mlxsw_sp, u32 id) +{ + struct mlxsw_sp_nexthop_group_cmp_arg cmp_arg; + + cmp_arg.type = MLXSW_SP_NEXTHOP_GROUP_TYPE_OBJ; + cmp_arg.id = id; + return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_group_ht, + &cmp_arg, + mlxsw_sp_nexthop_group_ht_params); +} + +static int mlxsw_sp_nexthop_obj_group_add(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop_group *nh_grp) +{ + return mlxsw_sp_nexthop_group_insert(mlxsw_sp, nh_grp); +} + +static int +mlxsw_sp_nexthop_obj_group_replace(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop_group *nh_grp, + struct mlxsw_sp_nexthop_group *old_nh_grp, + struct netlink_ext_ack *extack) +{ + struct mlxsw_sp_nexthop_group_info *old_nhgi = old_nh_grp->nhgi; + struct mlxsw_sp_nexthop_group_info *new_nhgi = nh_grp->nhgi; + int err; + + old_nh_grp->nhgi = new_nhgi; + new_nhgi->nh_grp = old_nh_grp; + nh_grp->nhgi = old_nhgi; + old_nhgi->nh_grp = nh_grp; + + if (old_nhgi->adj_index_valid && new_nhgi->adj_index_valid) { + /* Both the old adjacency index and the new one are valid. + * Routes are currently using the old one. Tell the device to + * replace the old adjacency index with the new one. + */ + err = mlxsw_sp_adj_index_mass_update(mlxsw_sp, old_nh_grp, + old_nhgi->adj_index, + old_nhgi->ecmp_size); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Failed to replace old adjacency index with new one"); + goto err_out; + } + } else if (old_nhgi->adj_index_valid && !new_nhgi->adj_index_valid) { + /* The old adjacency index is valid, while the new one is not. + * Iterate over all the routes using the group and change them + * to trap packets to the CPU. + */ + err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, old_nh_grp); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Failed to update routes to trap packets"); + goto err_out; + } + } else if (!old_nhgi->adj_index_valid && new_nhgi->adj_index_valid) { + /* The old adjacency index is invalid, while the new one is. + * Iterate over all the routes using the group and change them + * to forward packets using the new valid index. + */ + err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, old_nh_grp); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Failed to update routes to forward packets"); + goto err_out; + } + } + + /* Make sure the flags are set / cleared based on the new nexthop group + * information. + */ + mlxsw_sp_nexthop_obj_group_offload_refresh(mlxsw_sp, old_nh_grp); + + /* At this point 'nh_grp' is just a shell that is not used by anyone + * and its nexthop group info is the old info that was just replaced + * with the new one. Remove it. + */ + nh_grp->can_destroy = true; + mlxsw_sp_nexthop_obj_group_destroy(mlxsw_sp, nh_grp); + + return 0; + +err_out: + old_nhgi->nh_grp = old_nh_grp; + nh_grp->nhgi = new_nhgi; + new_nhgi->nh_grp = nh_grp; + old_nh_grp->nhgi = old_nhgi; + return err; +} + +static int mlxsw_sp_nexthop_obj_new(struct mlxsw_sp *mlxsw_sp, + struct nh_notifier_info *info) +{ + struct mlxsw_sp_nexthop_group *nh_grp, *old_nh_grp; + struct netlink_ext_ack *extack = info->extack; + int err; + + nh_grp = mlxsw_sp_nexthop_obj_group_create(mlxsw_sp, info); + if (IS_ERR(nh_grp)) + return PTR_ERR(nh_grp); + + old_nh_grp = mlxsw_sp_nexthop_obj_group_lookup(mlxsw_sp, info->id); + if (!old_nh_grp) + err = mlxsw_sp_nexthop_obj_group_add(mlxsw_sp, nh_grp); + else + err = mlxsw_sp_nexthop_obj_group_replace(mlxsw_sp, nh_grp, + old_nh_grp, extack); + + if (err) { + nh_grp->can_destroy = true; + mlxsw_sp_nexthop_obj_group_destroy(mlxsw_sp, nh_grp); + } + + return err; +} + +static void mlxsw_sp_nexthop_obj_del(struct mlxsw_sp *mlxsw_sp, + struct nh_notifier_info *info) +{ + struct mlxsw_sp_nexthop_group *nh_grp; + + nh_grp = mlxsw_sp_nexthop_obj_group_lookup(mlxsw_sp, info->id); + if (!nh_grp) + return; + + nh_grp->can_destroy = true; + mlxsw_sp_nexthop_group_remove(mlxsw_sp, nh_grp); + + /* If the group still has routes using it, then defer the delete + * operation until the last route using it is deleted. + */ + if (!list_empty(&nh_grp->fib_list)) + return; + mlxsw_sp_nexthop_obj_group_destroy(mlxsw_sp, nh_grp); +} + +static int mlxsw_sp_nexthop_obj_bucket_query(struct mlxsw_sp *mlxsw_sp, + u32 adj_index, char *ratr_pl) +{ + MLXSW_REG_ZERO(ratr, ratr_pl); + mlxsw_reg_ratr_op_set(ratr_pl, MLXSW_REG_RATR_OP_QUERY_READ); + mlxsw_reg_ratr_adjacency_index_low_set(ratr_pl, adj_index); + mlxsw_reg_ratr_adjacency_index_high_set(ratr_pl, adj_index >> 16); + + return mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ratr), ratr_pl); +} + +static int mlxsw_sp_nexthop_obj_bucket_compare(char *ratr_pl, char *ratr_pl_new) +{ + /* Clear the opcode and activity on both the old and new payload as + * they are irrelevant for the comparison. + */ + mlxsw_reg_ratr_op_set(ratr_pl, MLXSW_REG_RATR_OP_QUERY_READ); + mlxsw_reg_ratr_a_set(ratr_pl, 0); + mlxsw_reg_ratr_op_set(ratr_pl_new, MLXSW_REG_RATR_OP_QUERY_READ); + mlxsw_reg_ratr_a_set(ratr_pl_new, 0); + + /* If the contents of the adjacency entry are consistent with the + * replacement request, then replacement was successful. + */ + if (!memcmp(ratr_pl, ratr_pl_new, MLXSW_REG_RATR_LEN)) + return 0; + + return -EINVAL; +} + +static int +mlxsw_sp_nexthop_obj_bucket_adj_update(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop *nh, + struct nh_notifier_info *info) +{ + u16 bucket_index = info->nh_res_bucket->bucket_index; + struct netlink_ext_ack *extack = info->extack; + bool force = info->nh_res_bucket->force; + char ratr_pl_new[MLXSW_REG_RATR_LEN]; + char ratr_pl[MLXSW_REG_RATR_LEN]; + u32 adj_index; + int err; + + /* No point in trying an atomic replacement if the idle timer interval + * is smaller than the interval in which we query and clear activity. + */ + if (!force && info->nh_res_bucket->idle_timer_ms < + MLXSW_SP_NH_GRP_ACTIVITY_UPDATE_INTERVAL) + force = true; + + adj_index = nh->nhgi->adj_index + bucket_index; + err = mlxsw_sp_nexthop_update(mlxsw_sp, adj_index, nh, force, ratr_pl); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Failed to overwrite nexthop bucket"); + return err; + } + + if (!force) { + err = mlxsw_sp_nexthop_obj_bucket_query(mlxsw_sp, adj_index, + ratr_pl_new); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Failed to query nexthop bucket state after replacement. State might be inconsistent"); + return err; + } + + err = mlxsw_sp_nexthop_obj_bucket_compare(ratr_pl, ratr_pl_new); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Nexthop bucket was not replaced because it was active during replacement"); + return err; + } + } + + nh->update = 0; + nh->offloaded = 1; + mlxsw_sp_nexthop_bucket_offload_refresh(mlxsw_sp, nh, bucket_index); + + return 0; +} + +static int mlxsw_sp_nexthop_obj_bucket_replace(struct mlxsw_sp *mlxsw_sp, + struct nh_notifier_info *info) +{ + u16 bucket_index = info->nh_res_bucket->bucket_index; + struct netlink_ext_ack *extack = info->extack; + struct mlxsw_sp_nexthop_group_info *nhgi; + struct nh_notifier_single_info *nh_obj; + struct mlxsw_sp_nexthop_group *nh_grp; + struct mlxsw_sp_nexthop *nh; + int err; + + nh_grp = mlxsw_sp_nexthop_obj_group_lookup(mlxsw_sp, info->id); + if (!nh_grp) { + NL_SET_ERR_MSG_MOD(extack, "Nexthop group was not found"); + return -EINVAL; + } + + nhgi = nh_grp->nhgi; + + if (bucket_index >= nhgi->count) { + NL_SET_ERR_MSG_MOD(extack, "Nexthop bucket index out of range"); + return -EINVAL; + } + + nh = &nhgi->nexthops[bucket_index]; + mlxsw_sp_nexthop_obj_fini(mlxsw_sp, nh); + + nh_obj = &info->nh_res_bucket->new_nh; + err = mlxsw_sp_nexthop_obj_init(mlxsw_sp, nh_grp, nh, nh_obj, 1); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Failed to initialize nexthop object for nexthop bucket replacement"); + goto err_nexthop_obj_init; + } + + err = mlxsw_sp_nexthop_obj_bucket_adj_update(mlxsw_sp, nh, info); + if (err) + goto err_nexthop_obj_bucket_adj_update; + + return 0; + +err_nexthop_obj_bucket_adj_update: + mlxsw_sp_nexthop_obj_fini(mlxsw_sp, nh); +err_nexthop_obj_init: + nh_obj = &info->nh_res_bucket->old_nh; + mlxsw_sp_nexthop_obj_init(mlxsw_sp, nh_grp, nh, nh_obj, 1); + /* The old adjacency entry was not overwritten */ + nh->update = 0; + nh->offloaded = 1; + return err; +} + +static int mlxsw_sp_nexthop_obj_event(struct notifier_block *nb, + unsigned long event, void *ptr) +{ + struct nh_notifier_info *info = ptr; + struct mlxsw_sp_router *router; + int err = 0; + + router = container_of(nb, struct mlxsw_sp_router, nexthop_nb); + err = mlxsw_sp_nexthop_obj_validate(router->mlxsw_sp, event, info); + if (err) + goto out; + + mutex_lock(&router->lock); + + switch (event) { + case NEXTHOP_EVENT_REPLACE: + err = mlxsw_sp_nexthop_obj_new(router->mlxsw_sp, info); + break; + case NEXTHOP_EVENT_DEL: + mlxsw_sp_nexthop_obj_del(router->mlxsw_sp, info); + break; + case NEXTHOP_EVENT_BUCKET_REPLACE: + err = mlxsw_sp_nexthop_obj_bucket_replace(router->mlxsw_sp, + info); + break; + default: + break; + } + + mutex_unlock(&router->lock); + +out: + return notifier_from_errno(err); +} + +static bool mlxsw_sp_fi_is_gateway(const struct mlxsw_sp *mlxsw_sp, + struct fib_info *fi) +{ + const struct fib_nh *nh = fib_info_nh(fi, 0); + + return nh->fib_nh_gw_family || + mlxsw_sp_nexthop4_ipip_type(mlxsw_sp, nh, NULL); +} + +static int +mlxsw_sp_nexthop4_group_info_init(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop_group *nh_grp) +{ + unsigned int nhs = fib_info_num_path(nh_grp->ipv4.fi); + struct mlxsw_sp_nexthop_group_info *nhgi; + struct mlxsw_sp_nexthop *nh; + int err, i; + + nhgi = kzalloc(struct_size(nhgi, nexthops, nhs), GFP_KERNEL); + if (!nhgi) + return -ENOMEM; + nh_grp->nhgi = nhgi; + nhgi->nh_grp = nh_grp; + nhgi->gateway = mlxsw_sp_fi_is_gateway(mlxsw_sp, nh_grp->ipv4.fi); + nhgi->count = nhs; + for (i = 0; i < nhgi->count; i++) { + struct fib_nh *fib_nh; + + nh = &nhgi->nexthops[i]; + fib_nh = fib_info_nh(nh_grp->ipv4.fi, i); + err = mlxsw_sp_nexthop4_init(mlxsw_sp, nh_grp, nh, fib_nh); + if (err) + goto err_nexthop4_init; + } + err = mlxsw_sp_nexthop_group_inc(mlxsw_sp); + if (err) + goto err_group_inc; + err = mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp); + if (err) + goto err_group_refresh; + + return 0; + +err_group_refresh: + mlxsw_sp_nexthop_group_dec(mlxsw_sp); +err_group_inc: + i = nhgi->count; +err_nexthop4_init: + for (i--; i >= 0; i--) { + nh = &nhgi->nexthops[i]; + mlxsw_sp_nexthop4_fini(mlxsw_sp, nh); + } + kfree(nhgi); + return err; +} + +static void +mlxsw_sp_nexthop4_group_info_fini(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop_group *nh_grp) +{ + struct mlxsw_sp_nexthop_group_info *nhgi = nh_grp->nhgi; + int i; + + mlxsw_sp_nexthop_group_dec(mlxsw_sp); + for (i = nhgi->count - 1; i >= 0; i--) { + struct mlxsw_sp_nexthop *nh = &nhgi->nexthops[i]; + + mlxsw_sp_nexthop4_fini(mlxsw_sp, nh); + } + mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp); + WARN_ON_ONCE(nhgi->adj_index_valid); + kfree(nhgi); +} + +static struct mlxsw_sp_nexthop_group * +mlxsw_sp_nexthop4_group_create(struct mlxsw_sp *mlxsw_sp, struct fib_info *fi) +{ + struct mlxsw_sp_nexthop_group *nh_grp; + int err; + + nh_grp = kzalloc(sizeof(*nh_grp), GFP_KERNEL); + if (!nh_grp) + return ERR_PTR(-ENOMEM); + INIT_LIST_HEAD(&nh_grp->vr_list); + err = rhashtable_init(&nh_grp->vr_ht, + &mlxsw_sp_nexthop_group_vr_ht_params); + if (err) + goto err_nexthop_group_vr_ht_init; + INIT_LIST_HEAD(&nh_grp->fib_list); + nh_grp->type = MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV4; + nh_grp->ipv4.fi = fi; + fib_info_hold(fi); + + err = mlxsw_sp_nexthop4_group_info_init(mlxsw_sp, nh_grp); + if (err) + goto err_nexthop_group_info_init; + + err = mlxsw_sp_nexthop_group_insert(mlxsw_sp, nh_grp); + if (err) + goto err_nexthop_group_insert; + + nh_grp->can_destroy = true; + + return nh_grp; + +err_nexthop_group_insert: + mlxsw_sp_nexthop4_group_info_fini(mlxsw_sp, nh_grp); +err_nexthop_group_info_init: + fib_info_put(fi); + rhashtable_destroy(&nh_grp->vr_ht); +err_nexthop_group_vr_ht_init: + kfree(nh_grp); + return ERR_PTR(err); +} + +static void +mlxsw_sp_nexthop4_group_destroy(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop_group *nh_grp) +{ + if (!nh_grp->can_destroy) + return; + mlxsw_sp_nexthop_group_remove(mlxsw_sp, nh_grp); + mlxsw_sp_nexthop4_group_info_fini(mlxsw_sp, nh_grp); + fib_info_put(nh_grp->ipv4.fi); + WARN_ON_ONCE(!list_empty(&nh_grp->vr_list)); + rhashtable_destroy(&nh_grp->vr_ht); + kfree(nh_grp); +} + +static int mlxsw_sp_nexthop4_group_get(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry, + struct fib_info *fi) +{ + struct mlxsw_sp_nexthop_group *nh_grp; + + if (fi->nh) { + nh_grp = mlxsw_sp_nexthop_obj_group_lookup(mlxsw_sp, + fi->nh->id); + if (WARN_ON_ONCE(!nh_grp)) + return -EINVAL; + goto out; + } + + nh_grp = mlxsw_sp_nexthop4_group_lookup(mlxsw_sp, fi); + if (!nh_grp) { + nh_grp = mlxsw_sp_nexthop4_group_create(mlxsw_sp, fi); + if (IS_ERR(nh_grp)) + return PTR_ERR(nh_grp); + } +out: + list_add_tail(&fib_entry->nexthop_group_node, &nh_grp->fib_list); + fib_entry->nh_group = nh_grp; + return 0; +} + +static void mlxsw_sp_nexthop4_group_put(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry) +{ + struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group; + + list_del(&fib_entry->nexthop_group_node); + if (!list_empty(&nh_grp->fib_list)) + return; + + if (nh_grp->type == MLXSW_SP_NEXTHOP_GROUP_TYPE_OBJ) { + mlxsw_sp_nexthop_obj_group_destroy(mlxsw_sp, nh_grp); + return; + } + + mlxsw_sp_nexthop4_group_destroy(mlxsw_sp, nh_grp); +} + +static bool +mlxsw_sp_fib4_entry_should_offload(const struct mlxsw_sp_fib_entry *fib_entry) +{ + struct mlxsw_sp_fib4_entry *fib4_entry; + + fib4_entry = container_of(fib_entry, struct mlxsw_sp_fib4_entry, + common); + return !fib4_entry->dscp; +} + +static bool +mlxsw_sp_fib_entry_should_offload(const struct mlxsw_sp_fib_entry *fib_entry) +{ + struct mlxsw_sp_nexthop_group *nh_group = fib_entry->nh_group; + + switch (fib_entry->fib_node->fib->proto) { + case MLXSW_SP_L3_PROTO_IPV4: + if (!mlxsw_sp_fib4_entry_should_offload(fib_entry)) + return false; + break; + case MLXSW_SP_L3_PROTO_IPV6: + break; + } + + switch (fib_entry->type) { + case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE: + return !!nh_group->nhgi->adj_index_valid; + case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL: + return !!nh_group->nhgi->nh_rif; + case MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE: + case MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP: + case MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP: + return true; + default: + return false; + } +} + +static struct mlxsw_sp_nexthop * +mlxsw_sp_rt6_nexthop(struct mlxsw_sp_nexthop_group *nh_grp, + const struct mlxsw_sp_rt6 *mlxsw_sp_rt6) +{ + int i; + + for (i = 0; i < nh_grp->nhgi->count; i++) { + struct mlxsw_sp_nexthop *nh = &nh_grp->nhgi->nexthops[i]; + struct fib6_info *rt = mlxsw_sp_rt6->rt; + + if (nh->rif && nh->rif->dev == rt->fib6_nh->fib_nh_dev && + ipv6_addr_equal((const struct in6_addr *) &nh->gw_addr, + &rt->fib6_nh->fib_nh_gw6)) + return nh; + } + + return NULL; +} + +static void +mlxsw_sp_fib4_offload_failed_flag_set(struct mlxsw_sp *mlxsw_sp, + struct fib_entry_notifier_info *fen_info) +{ + u32 *p_dst = (u32 *) &fen_info->dst; + struct fib_rt_info fri; + + fri.fi = fen_info->fi; + fri.tb_id = fen_info->tb_id; + fri.dst = cpu_to_be32(*p_dst); + fri.dst_len = fen_info->dst_len; + fri.dscp = fen_info->dscp; + fri.type = fen_info->type; + fri.offload = false; + fri.trap = false; + fri.offload_failed = true; + fib_alias_hw_flags_set(mlxsw_sp_net(mlxsw_sp), &fri); +} + +static void +mlxsw_sp_fib4_entry_hw_flags_set(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry) +{ + u32 *p_dst = (u32 *) fib_entry->fib_node->key.addr; + int dst_len = fib_entry->fib_node->key.prefix_len; + struct mlxsw_sp_fib4_entry *fib4_entry; + struct fib_rt_info fri; + bool should_offload; + + should_offload = mlxsw_sp_fib_entry_should_offload(fib_entry); + fib4_entry = container_of(fib_entry, struct mlxsw_sp_fib4_entry, + common); + fri.fi = fib4_entry->fi; + fri.tb_id = fib4_entry->tb_id; + fri.dst = cpu_to_be32(*p_dst); + fri.dst_len = dst_len; + fri.dscp = fib4_entry->dscp; + fri.type = fib4_entry->type; + fri.offload = should_offload; + fri.trap = !should_offload; + fri.offload_failed = false; + fib_alias_hw_flags_set(mlxsw_sp_net(mlxsw_sp), &fri); +} + +static void +mlxsw_sp_fib4_entry_hw_flags_clear(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry) +{ + u32 *p_dst = (u32 *) fib_entry->fib_node->key.addr; + int dst_len = fib_entry->fib_node->key.prefix_len; + struct mlxsw_sp_fib4_entry *fib4_entry; + struct fib_rt_info fri; + + fib4_entry = container_of(fib_entry, struct mlxsw_sp_fib4_entry, + common); + fri.fi = fib4_entry->fi; + fri.tb_id = fib4_entry->tb_id; + fri.dst = cpu_to_be32(*p_dst); + fri.dst_len = dst_len; + fri.dscp = fib4_entry->dscp; + fri.type = fib4_entry->type; + fri.offload = false; + fri.trap = false; + fri.offload_failed = false; + fib_alias_hw_flags_set(mlxsw_sp_net(mlxsw_sp), &fri); +} + +#if IS_ENABLED(CONFIG_IPV6) +static void +mlxsw_sp_fib6_offload_failed_flag_set(struct mlxsw_sp *mlxsw_sp, + struct fib6_info **rt_arr, + unsigned int nrt6) +{ + int i; + + /* In IPv6 a multipath route is represented using multiple routes, so + * we need to set the flags on all of them. + */ + for (i = 0; i < nrt6; i++) + fib6_info_hw_flags_set(mlxsw_sp_net(mlxsw_sp), rt_arr[i], + false, false, true); +} +#else +static void +mlxsw_sp_fib6_offload_failed_flag_set(struct mlxsw_sp *mlxsw_sp, + struct fib6_info **rt_arr, + unsigned int nrt6) +{ +} +#endif + +#if IS_ENABLED(CONFIG_IPV6) +static void +mlxsw_sp_fib6_entry_hw_flags_set(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry) +{ + struct mlxsw_sp_fib6_entry *fib6_entry; + struct mlxsw_sp_rt6 *mlxsw_sp_rt6; + bool should_offload; + + should_offload = mlxsw_sp_fib_entry_should_offload(fib_entry); + + /* In IPv6 a multipath route is represented using multiple routes, so + * we need to set the flags on all of them. + */ + fib6_entry = container_of(fib_entry, struct mlxsw_sp_fib6_entry, + common); + list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) + fib6_info_hw_flags_set(mlxsw_sp_net(mlxsw_sp), mlxsw_sp_rt6->rt, + should_offload, !should_offload, false); +} +#else +static void +mlxsw_sp_fib6_entry_hw_flags_set(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry) +{ +} +#endif + +#if IS_ENABLED(CONFIG_IPV6) +static void +mlxsw_sp_fib6_entry_hw_flags_clear(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry) +{ + struct mlxsw_sp_fib6_entry *fib6_entry; + struct mlxsw_sp_rt6 *mlxsw_sp_rt6; + + fib6_entry = container_of(fib_entry, struct mlxsw_sp_fib6_entry, + common); + list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) + fib6_info_hw_flags_set(mlxsw_sp_net(mlxsw_sp), mlxsw_sp_rt6->rt, + false, false, false); +} +#else +static void +mlxsw_sp_fib6_entry_hw_flags_clear(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry) +{ +} +#endif + +static void +mlxsw_sp_fib_entry_hw_flags_set(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry) +{ + switch (fib_entry->fib_node->fib->proto) { + case MLXSW_SP_L3_PROTO_IPV4: + mlxsw_sp_fib4_entry_hw_flags_set(mlxsw_sp, fib_entry); + break; + case MLXSW_SP_L3_PROTO_IPV6: + mlxsw_sp_fib6_entry_hw_flags_set(mlxsw_sp, fib_entry); + break; + } +} + +static void +mlxsw_sp_fib_entry_hw_flags_clear(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry) +{ + switch (fib_entry->fib_node->fib->proto) { + case MLXSW_SP_L3_PROTO_IPV4: + mlxsw_sp_fib4_entry_hw_flags_clear(mlxsw_sp, fib_entry); + break; + case MLXSW_SP_L3_PROTO_IPV6: + mlxsw_sp_fib6_entry_hw_flags_clear(mlxsw_sp, fib_entry); + break; + } +} + +static void +mlxsw_sp_fib_entry_hw_flags_refresh(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry, + enum mlxsw_reg_ralue_op op) +{ + switch (op) { + case MLXSW_REG_RALUE_OP_WRITE_WRITE: + mlxsw_sp_fib_entry_hw_flags_set(mlxsw_sp, fib_entry); + break; + case MLXSW_REG_RALUE_OP_WRITE_DELETE: + mlxsw_sp_fib_entry_hw_flags_clear(mlxsw_sp, fib_entry); + break; + default: + break; + } +} + +static void +mlxsw_sp_fib_entry_ralue_pack(char *ralue_pl, + const struct mlxsw_sp_fib_entry *fib_entry, + enum mlxsw_reg_ralue_op op) +{ + struct mlxsw_sp_fib *fib = fib_entry->fib_node->fib; + enum mlxsw_reg_ralxx_protocol proto; + u32 *p_dip; + + proto = (enum mlxsw_reg_ralxx_protocol) fib->proto; + + switch (fib->proto) { + case MLXSW_SP_L3_PROTO_IPV4: + p_dip = (u32 *) fib_entry->fib_node->key.addr; + mlxsw_reg_ralue_pack4(ralue_pl, proto, op, fib->vr->id, + fib_entry->fib_node->key.prefix_len, + *p_dip); + break; + case MLXSW_SP_L3_PROTO_IPV6: + mlxsw_reg_ralue_pack6(ralue_pl, proto, op, fib->vr->id, + fib_entry->fib_node->key.prefix_len, + fib_entry->fib_node->key.addr); + break; + } +} + +static int mlxsw_sp_fib_entry_op_remote(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry, + enum mlxsw_reg_ralue_op op) +{ + struct mlxsw_sp_nexthop_group *nh_group = fib_entry->nh_group; + struct mlxsw_sp_nexthop_group_info *nhgi = nh_group->nhgi; + char ralue_pl[MLXSW_REG_RALUE_LEN]; + enum mlxsw_reg_ralue_trap_action trap_action; + u16 trap_id = 0; + u32 adjacency_index = 0; + u16 ecmp_size = 0; + + /* In case the nexthop group adjacency index is valid, use it + * with provided ECMP size. Otherwise, setup trap and pass + * traffic to kernel. + */ + if (mlxsw_sp_fib_entry_should_offload(fib_entry)) { + trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP; + adjacency_index = nhgi->adj_index; + ecmp_size = nhgi->ecmp_size; + } else if (!nhgi->adj_index_valid && nhgi->count && nhgi->nh_rif) { + trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP; + adjacency_index = mlxsw_sp->router->adj_trap_index; + ecmp_size = 1; + } else { + trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP; + trap_id = MLXSW_TRAP_ID_RTR_INGRESS0; + } + + mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op); + mlxsw_reg_ralue_act_remote_pack(ralue_pl, trap_action, trap_id, + adjacency_index, ecmp_size); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl); +} + +static int mlxsw_sp_fib_entry_op_local(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry, + enum mlxsw_reg_ralue_op op) +{ + struct mlxsw_sp_rif *rif = fib_entry->nh_group->nhgi->nh_rif; + enum mlxsw_reg_ralue_trap_action trap_action; + char ralue_pl[MLXSW_REG_RALUE_LEN]; + u16 trap_id = 0; + u16 rif_index = 0; + + if (mlxsw_sp_fib_entry_should_offload(fib_entry)) { + trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP; + rif_index = rif->rif_index; + } else { + trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP; + trap_id = MLXSW_TRAP_ID_RTR_INGRESS0; + } + + mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op); + mlxsw_reg_ralue_act_local_pack(ralue_pl, trap_action, trap_id, + rif_index); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl); +} + +static int mlxsw_sp_fib_entry_op_trap(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry, + enum mlxsw_reg_ralue_op op) +{ + char ralue_pl[MLXSW_REG_RALUE_LEN]; + + mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op); + mlxsw_reg_ralue_act_ip2me_pack(ralue_pl); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl); +} + +static int mlxsw_sp_fib_entry_op_blackhole(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry, + enum mlxsw_reg_ralue_op op) +{ + enum mlxsw_reg_ralue_trap_action trap_action; + char ralue_pl[MLXSW_REG_RALUE_LEN]; + + trap_action = MLXSW_REG_RALUE_TRAP_ACTION_DISCARD_ERROR; + mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op); + mlxsw_reg_ralue_act_local_pack(ralue_pl, trap_action, 0, 0); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl); +} + +static int +mlxsw_sp_fib_entry_op_unreachable(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry, + enum mlxsw_reg_ralue_op op) +{ + enum mlxsw_reg_ralue_trap_action trap_action; + char ralue_pl[MLXSW_REG_RALUE_LEN]; + u16 trap_id; + + trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP; + trap_id = MLXSW_TRAP_ID_RTR_INGRESS1; + + mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op); + mlxsw_reg_ralue_act_local_pack(ralue_pl, trap_action, trap_id, 0); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl); +} + +static int +mlxsw_sp_fib_entry_op_ipip_decap(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry, + enum mlxsw_reg_ralue_op op) +{ + struct mlxsw_sp_ipip_entry *ipip_entry = fib_entry->decap.ipip_entry; + const struct mlxsw_sp_ipip_ops *ipip_ops; + char ralue_pl[MLXSW_REG_RALUE_LEN]; + int err; + + if (WARN_ON(!ipip_entry)) + return -EINVAL; + + ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt]; + err = ipip_ops->decap_config(mlxsw_sp, ipip_entry, + fib_entry->decap.tunnel_index); + if (err) + return err; + + mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op); + mlxsw_reg_ralue_act_ip2me_tun_pack(ralue_pl, + fib_entry->decap.tunnel_index); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl); +} + +static int mlxsw_sp_fib_entry_op_nve_decap(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry, + enum mlxsw_reg_ralue_op op) +{ + char ralue_pl[MLXSW_REG_RALUE_LEN]; + + mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op); + mlxsw_reg_ralue_act_ip2me_tun_pack(ralue_pl, + fib_entry->decap.tunnel_index); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl); +} + +static int __mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry, + enum mlxsw_reg_ralue_op op) +{ + switch (fib_entry->type) { + case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE: + return mlxsw_sp_fib_entry_op_remote(mlxsw_sp, fib_entry, op); + case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL: + return mlxsw_sp_fib_entry_op_local(mlxsw_sp, fib_entry, op); + case MLXSW_SP_FIB_ENTRY_TYPE_TRAP: + return mlxsw_sp_fib_entry_op_trap(mlxsw_sp, fib_entry, op); + case MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE: + return mlxsw_sp_fib_entry_op_blackhole(mlxsw_sp, fib_entry, op); + case MLXSW_SP_FIB_ENTRY_TYPE_UNREACHABLE: + return mlxsw_sp_fib_entry_op_unreachable(mlxsw_sp, fib_entry, + op); + case MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP: + return mlxsw_sp_fib_entry_op_ipip_decap(mlxsw_sp, + fib_entry, op); + case MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP: + return mlxsw_sp_fib_entry_op_nve_decap(mlxsw_sp, fib_entry, op); + } + return -EINVAL; +} + +static int mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry, + enum mlxsw_reg_ralue_op op) +{ + int err = __mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry, op); + + if (err) + return err; + + mlxsw_sp_fib_entry_hw_flags_refresh(mlxsw_sp, fib_entry, op); + + return err; +} + +static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry) +{ + return mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry, + MLXSW_REG_RALUE_OP_WRITE_WRITE); +} + +static int mlxsw_sp_fib_entry_del(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry) +{ + return mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry, + MLXSW_REG_RALUE_OP_WRITE_DELETE); +} + +static int +mlxsw_sp_fib4_entry_type_set(struct mlxsw_sp *mlxsw_sp, + const struct fib_entry_notifier_info *fen_info, + struct mlxsw_sp_fib_entry *fib_entry) +{ + struct mlxsw_sp_nexthop_group_info *nhgi = fib_entry->nh_group->nhgi; + union mlxsw_sp_l3addr dip = { .addr4 = htonl(fen_info->dst) }; + struct mlxsw_sp_router *router = mlxsw_sp->router; + u32 tb_id = mlxsw_sp_fix_tb_id(fen_info->tb_id); + int ifindex = nhgi->nexthops[0].ifindex; + struct mlxsw_sp_ipip_entry *ipip_entry; + + switch (fen_info->type) { + case RTN_LOCAL: + ipip_entry = mlxsw_sp_ipip_entry_find_by_decap(mlxsw_sp, ifindex, + MLXSW_SP_L3_PROTO_IPV4, dip); + if (ipip_entry && ipip_entry->ol_dev->flags & IFF_UP) { + fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP; + return mlxsw_sp_fib_entry_decap_init(mlxsw_sp, + fib_entry, + ipip_entry); + } + if (mlxsw_sp_router_nve_is_decap(mlxsw_sp, tb_id, + MLXSW_SP_L3_PROTO_IPV4, + &dip)) { + u32 tunnel_index; + + tunnel_index = router->nve_decap_config.tunnel_index; + fib_entry->decap.tunnel_index = tunnel_index; + fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP; + return 0; + } + fallthrough; + case RTN_BROADCAST: + fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP; + return 0; + case RTN_BLACKHOLE: + fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE; + return 0; + case RTN_UNREACHABLE: + case RTN_PROHIBIT: + /* Packets hitting these routes need to be trapped, but + * can do so with a lower priority than packets directed + * at the host, so use action type local instead of trap. + */ + fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_UNREACHABLE; + return 0; + case RTN_UNICAST: + if (nhgi->gateway) + fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE; + else + fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL; + return 0; + default: + return -EINVAL; + } +} + +static void +mlxsw_sp_fib_entry_type_unset(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry) +{ + switch (fib_entry->type) { + case MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP: + mlxsw_sp_fib_entry_decap_fini(mlxsw_sp, fib_entry); + break; + default: + break; + } +} + +static void +mlxsw_sp_fib4_entry_type_unset(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib4_entry *fib4_entry) +{ + mlxsw_sp_fib_entry_type_unset(mlxsw_sp, &fib4_entry->common); +} + +static struct mlxsw_sp_fib4_entry * +mlxsw_sp_fib4_entry_create(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_node *fib_node, + const struct fib_entry_notifier_info *fen_info) +{ + struct mlxsw_sp_fib4_entry *fib4_entry; + struct mlxsw_sp_fib_entry *fib_entry; + int err; + + fib4_entry = kzalloc(sizeof(*fib4_entry), GFP_KERNEL); + if (!fib4_entry) + return ERR_PTR(-ENOMEM); + fib_entry = &fib4_entry->common; + + err = mlxsw_sp_nexthop4_group_get(mlxsw_sp, fib_entry, fen_info->fi); + if (err) + goto err_nexthop4_group_get; + + err = mlxsw_sp_nexthop_group_vr_link(fib_entry->nh_group, + fib_node->fib); + if (err) + goto err_nexthop_group_vr_link; + + err = mlxsw_sp_fib4_entry_type_set(mlxsw_sp, fen_info, fib_entry); + if (err) + goto err_fib4_entry_type_set; + + fib4_entry->fi = fen_info->fi; + fib_info_hold(fib4_entry->fi); + fib4_entry->tb_id = fen_info->tb_id; + fib4_entry->type = fen_info->type; + fib4_entry->dscp = fen_info->dscp; + + fib_entry->fib_node = fib_node; + + return fib4_entry; + +err_fib4_entry_type_set: + mlxsw_sp_nexthop_group_vr_unlink(fib_entry->nh_group, fib_node->fib); +err_nexthop_group_vr_link: + mlxsw_sp_nexthop4_group_put(mlxsw_sp, &fib4_entry->common); +err_nexthop4_group_get: + kfree(fib4_entry); + return ERR_PTR(err); +} + +static void mlxsw_sp_fib4_entry_destroy(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib4_entry *fib4_entry) +{ + struct mlxsw_sp_fib_node *fib_node = fib4_entry->common.fib_node; + + fib_info_put(fib4_entry->fi); + mlxsw_sp_fib4_entry_type_unset(mlxsw_sp, fib4_entry); + mlxsw_sp_nexthop_group_vr_unlink(fib4_entry->common.nh_group, + fib_node->fib); + mlxsw_sp_nexthop4_group_put(mlxsw_sp, &fib4_entry->common); + kfree(fib4_entry); +} + +static struct mlxsw_sp_fib4_entry * +mlxsw_sp_fib4_entry_lookup(struct mlxsw_sp *mlxsw_sp, + const struct fib_entry_notifier_info *fen_info) +{ + struct mlxsw_sp_fib4_entry *fib4_entry; + struct mlxsw_sp_fib_node *fib_node; + struct mlxsw_sp_fib *fib; + struct mlxsw_sp_vr *vr; + + vr = mlxsw_sp_vr_find(mlxsw_sp, fen_info->tb_id); + if (!vr) + return NULL; + fib = mlxsw_sp_vr_fib(vr, MLXSW_SP_L3_PROTO_IPV4); + + fib_node = mlxsw_sp_fib_node_lookup(fib, &fen_info->dst, + sizeof(fen_info->dst), + fen_info->dst_len); + if (!fib_node) + return NULL; + + fib4_entry = container_of(fib_node->fib_entry, + struct mlxsw_sp_fib4_entry, common); + if (fib4_entry->tb_id == fen_info->tb_id && + fib4_entry->dscp == fen_info->dscp && + fib4_entry->type == fen_info->type && + fib4_entry->fi == fen_info->fi) + return fib4_entry; + + return NULL; +} + +static const struct rhashtable_params mlxsw_sp_fib_ht_params = { + .key_offset = offsetof(struct mlxsw_sp_fib_node, key), + .head_offset = offsetof(struct mlxsw_sp_fib_node, ht_node), + .key_len = sizeof(struct mlxsw_sp_fib_key), + .automatic_shrinking = true, +}; + +static int mlxsw_sp_fib_node_insert(struct mlxsw_sp_fib *fib, + struct mlxsw_sp_fib_node *fib_node) +{ + return rhashtable_insert_fast(&fib->ht, &fib_node->ht_node, + mlxsw_sp_fib_ht_params); +} + +static void mlxsw_sp_fib_node_remove(struct mlxsw_sp_fib *fib, + struct mlxsw_sp_fib_node *fib_node) +{ + rhashtable_remove_fast(&fib->ht, &fib_node->ht_node, + mlxsw_sp_fib_ht_params); +} + +static struct mlxsw_sp_fib_node * +mlxsw_sp_fib_node_lookup(struct mlxsw_sp_fib *fib, const void *addr, + size_t addr_len, unsigned char prefix_len) +{ + struct mlxsw_sp_fib_key key; + + memset(&key, 0, sizeof(key)); + memcpy(key.addr, addr, addr_len); + key.prefix_len = prefix_len; + return rhashtable_lookup_fast(&fib->ht, &key, mlxsw_sp_fib_ht_params); +} + +static struct mlxsw_sp_fib_node * +mlxsw_sp_fib_node_create(struct mlxsw_sp_fib *fib, const void *addr, + size_t addr_len, unsigned char prefix_len) +{ + struct mlxsw_sp_fib_node *fib_node; + + fib_node = kzalloc(sizeof(*fib_node), GFP_KERNEL); + if (!fib_node) + return NULL; + + list_add(&fib_node->list, &fib->node_list); + memcpy(fib_node->key.addr, addr, addr_len); + fib_node->key.prefix_len = prefix_len; + + return fib_node; +} + +static void mlxsw_sp_fib_node_destroy(struct mlxsw_sp_fib_node *fib_node) +{ + list_del(&fib_node->list); + kfree(fib_node); +} + +static int mlxsw_sp_fib_lpm_tree_link(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_node *fib_node) +{ + struct mlxsw_sp_prefix_usage req_prefix_usage; + struct mlxsw_sp_fib *fib = fib_node->fib; + struct mlxsw_sp_lpm_tree *lpm_tree; + int err; + + lpm_tree = mlxsw_sp->router->lpm.proto_trees[fib->proto]; + if (lpm_tree->prefix_ref_count[fib_node->key.prefix_len] != 0) + goto out; + + mlxsw_sp_prefix_usage_cpy(&req_prefix_usage, &lpm_tree->prefix_usage); + mlxsw_sp_prefix_usage_set(&req_prefix_usage, fib_node->key.prefix_len); + lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage, + fib->proto); + if (IS_ERR(lpm_tree)) + return PTR_ERR(lpm_tree); + + err = mlxsw_sp_vrs_lpm_tree_replace(mlxsw_sp, fib, lpm_tree); + if (err) + goto err_lpm_tree_replace; + +out: + lpm_tree->prefix_ref_count[fib_node->key.prefix_len]++; + return 0; + +err_lpm_tree_replace: + mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree); + return err; +} + +static void mlxsw_sp_fib_lpm_tree_unlink(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_node *fib_node) +{ + struct mlxsw_sp_lpm_tree *lpm_tree = fib_node->fib->lpm_tree; + struct mlxsw_sp_prefix_usage req_prefix_usage; + struct mlxsw_sp_fib *fib = fib_node->fib; + int err; + + if (--lpm_tree->prefix_ref_count[fib_node->key.prefix_len] != 0) + return; + /* Try to construct a new LPM tree from the current prefix usage + * minus the unused one. If we fail, continue using the old one. + */ + mlxsw_sp_prefix_usage_cpy(&req_prefix_usage, &lpm_tree->prefix_usage); + mlxsw_sp_prefix_usage_clear(&req_prefix_usage, + fib_node->key.prefix_len); + lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage, + fib->proto); + if (IS_ERR(lpm_tree)) + return; + + err = mlxsw_sp_vrs_lpm_tree_replace(mlxsw_sp, fib, lpm_tree); + if (err) + goto err_lpm_tree_replace; + + return; + +err_lpm_tree_replace: + mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree); +} + +static int mlxsw_sp_fib_node_init(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_node *fib_node, + struct mlxsw_sp_fib *fib) +{ + int err; + + err = mlxsw_sp_fib_node_insert(fib, fib_node); + if (err) + return err; + fib_node->fib = fib; + + err = mlxsw_sp_fib_lpm_tree_link(mlxsw_sp, fib_node); + if (err) + goto err_fib_lpm_tree_link; + + return 0; + +err_fib_lpm_tree_link: + fib_node->fib = NULL; + mlxsw_sp_fib_node_remove(fib, fib_node); + return err; +} + +static void mlxsw_sp_fib_node_fini(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_node *fib_node) +{ + struct mlxsw_sp_fib *fib = fib_node->fib; + + mlxsw_sp_fib_lpm_tree_unlink(mlxsw_sp, fib_node); + fib_node->fib = NULL; + mlxsw_sp_fib_node_remove(fib, fib_node); +} + +static struct mlxsw_sp_fib_node * +mlxsw_sp_fib_node_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id, const void *addr, + size_t addr_len, unsigned char prefix_len, + enum mlxsw_sp_l3proto proto) +{ + struct mlxsw_sp_fib_node *fib_node; + struct mlxsw_sp_fib *fib; + struct mlxsw_sp_vr *vr; + int err; + + vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id, NULL); + if (IS_ERR(vr)) + return ERR_CAST(vr); + fib = mlxsw_sp_vr_fib(vr, proto); + + fib_node = mlxsw_sp_fib_node_lookup(fib, addr, addr_len, prefix_len); + if (fib_node) + return fib_node; + + fib_node = mlxsw_sp_fib_node_create(fib, addr, addr_len, prefix_len); + if (!fib_node) { + err = -ENOMEM; + goto err_fib_node_create; + } + + err = mlxsw_sp_fib_node_init(mlxsw_sp, fib_node, fib); + if (err) + goto err_fib_node_init; + + return fib_node; + +err_fib_node_init: + mlxsw_sp_fib_node_destroy(fib_node); +err_fib_node_create: + mlxsw_sp_vr_put(mlxsw_sp, vr); + return ERR_PTR(err); +} + +static void mlxsw_sp_fib_node_put(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_node *fib_node) +{ + struct mlxsw_sp_vr *vr = fib_node->fib->vr; + + if (fib_node->fib_entry) + return; + mlxsw_sp_fib_node_fini(mlxsw_sp, fib_node); + mlxsw_sp_fib_node_destroy(fib_node); + mlxsw_sp_vr_put(mlxsw_sp, vr); +} + +static int mlxsw_sp_fib_node_entry_link(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry) +{ + struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node; + int err; + + fib_node->fib_entry = fib_entry; + + err = mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry); + if (err) + goto err_fib_entry_update; + + return 0; + +err_fib_entry_update: + fib_node->fib_entry = NULL; + return err; +} + +static void +mlxsw_sp_fib_node_entry_unlink(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry) +{ + struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node; + + mlxsw_sp_fib_entry_del(mlxsw_sp, fib_entry); + fib_node->fib_entry = NULL; +} + +static bool mlxsw_sp_fib4_allow_replace(struct mlxsw_sp_fib4_entry *fib4_entry) +{ + struct mlxsw_sp_fib_node *fib_node = fib4_entry->common.fib_node; + struct mlxsw_sp_fib4_entry *fib4_replaced; + + if (!fib_node->fib_entry) + return true; + + fib4_replaced = container_of(fib_node->fib_entry, + struct mlxsw_sp_fib4_entry, common); + if (fib4_entry->tb_id == RT_TABLE_MAIN && + fib4_replaced->tb_id == RT_TABLE_LOCAL) + return false; + + return true; +} + +static int +mlxsw_sp_router_fib4_replace(struct mlxsw_sp *mlxsw_sp, + const struct fib_entry_notifier_info *fen_info) +{ + struct mlxsw_sp_fib4_entry *fib4_entry, *fib4_replaced; + struct mlxsw_sp_fib_entry *replaced; + struct mlxsw_sp_fib_node *fib_node; + int err; + + if (fen_info->fi->nh && + !mlxsw_sp_nexthop_obj_group_lookup(mlxsw_sp, fen_info->fi->nh->id)) + return 0; + + fib_node = mlxsw_sp_fib_node_get(mlxsw_sp, fen_info->tb_id, + &fen_info->dst, sizeof(fen_info->dst), + fen_info->dst_len, + MLXSW_SP_L3_PROTO_IPV4); + if (IS_ERR(fib_node)) { + dev_warn(mlxsw_sp->bus_info->dev, "Failed to get FIB node\n"); + return PTR_ERR(fib_node); + } + + fib4_entry = mlxsw_sp_fib4_entry_create(mlxsw_sp, fib_node, fen_info); + if (IS_ERR(fib4_entry)) { + dev_warn(mlxsw_sp->bus_info->dev, "Failed to create FIB entry\n"); + err = PTR_ERR(fib4_entry); + goto err_fib4_entry_create; + } + + if (!mlxsw_sp_fib4_allow_replace(fib4_entry)) { + mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry); + mlxsw_sp_fib_node_put(mlxsw_sp, fib_node); + return 0; + } + + replaced = fib_node->fib_entry; + err = mlxsw_sp_fib_node_entry_link(mlxsw_sp, &fib4_entry->common); + if (err) { + dev_warn(mlxsw_sp->bus_info->dev, "Failed to link FIB entry to node\n"); + goto err_fib_node_entry_link; + } + + /* Nothing to replace */ + if (!replaced) + return 0; + + mlxsw_sp_fib_entry_hw_flags_clear(mlxsw_sp, replaced); + fib4_replaced = container_of(replaced, struct mlxsw_sp_fib4_entry, + common); + mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_replaced); + + return 0; + +err_fib_node_entry_link: + fib_node->fib_entry = replaced; + mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry); +err_fib4_entry_create: + mlxsw_sp_fib_node_put(mlxsw_sp, fib_node); + return err; +} + +static void mlxsw_sp_router_fib4_del(struct mlxsw_sp *mlxsw_sp, + struct fib_entry_notifier_info *fen_info) +{ + struct mlxsw_sp_fib4_entry *fib4_entry; + struct mlxsw_sp_fib_node *fib_node; + + fib4_entry = mlxsw_sp_fib4_entry_lookup(mlxsw_sp, fen_info); + if (!fib4_entry) + return; + fib_node = fib4_entry->common.fib_node; + + mlxsw_sp_fib_node_entry_unlink(mlxsw_sp, &fib4_entry->common); + mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry); + mlxsw_sp_fib_node_put(mlxsw_sp, fib_node); +} + +static bool mlxsw_sp_fib6_rt_should_ignore(const struct fib6_info *rt) +{ + /* Multicast routes aren't supported, so ignore them. Neighbour + * Discovery packets are specifically trapped. + */ + if (ipv6_addr_type(&rt->fib6_dst.addr) & IPV6_ADDR_MULTICAST) + return true; + + /* Cloned routes are irrelevant in the forwarding path. */ + if (rt->fib6_flags & RTF_CACHE) + return true; + + return false; +} + +static struct mlxsw_sp_rt6 *mlxsw_sp_rt6_create(struct fib6_info *rt) +{ + struct mlxsw_sp_rt6 *mlxsw_sp_rt6; + + mlxsw_sp_rt6 = kzalloc(sizeof(*mlxsw_sp_rt6), GFP_KERNEL); + if (!mlxsw_sp_rt6) + return ERR_PTR(-ENOMEM); + + /* In case of route replace, replaced route is deleted with + * no notification. Take reference to prevent accessing freed + * memory. + */ + mlxsw_sp_rt6->rt = rt; + fib6_info_hold(rt); + + return mlxsw_sp_rt6; +} + +#if IS_ENABLED(CONFIG_IPV6) +static void mlxsw_sp_rt6_release(struct fib6_info *rt) +{ + fib6_info_release(rt); +} +#else +static void mlxsw_sp_rt6_release(struct fib6_info *rt) +{ +} +#endif + +static void mlxsw_sp_rt6_destroy(struct mlxsw_sp_rt6 *mlxsw_sp_rt6) +{ + struct fib6_nh *fib6_nh = mlxsw_sp_rt6->rt->fib6_nh; + + if (!mlxsw_sp_rt6->rt->nh) + fib6_nh->fib_nh_flags &= ~RTNH_F_OFFLOAD; + mlxsw_sp_rt6_release(mlxsw_sp_rt6->rt); + kfree(mlxsw_sp_rt6); +} + +static struct fib6_info * +mlxsw_sp_fib6_entry_rt(const struct mlxsw_sp_fib6_entry *fib6_entry) +{ + return list_first_entry(&fib6_entry->rt6_list, struct mlxsw_sp_rt6, + list)->rt; +} + +static struct mlxsw_sp_rt6 * +mlxsw_sp_fib6_entry_rt_find(const struct mlxsw_sp_fib6_entry *fib6_entry, + const struct fib6_info *rt) +{ + struct mlxsw_sp_rt6 *mlxsw_sp_rt6; + + list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) { + if (mlxsw_sp_rt6->rt == rt) + return mlxsw_sp_rt6; + } + + return NULL; +} + +static bool mlxsw_sp_nexthop6_ipip_type(const struct mlxsw_sp *mlxsw_sp, + const struct fib6_info *rt, + enum mlxsw_sp_ipip_type *ret) +{ + return rt->fib6_nh->fib_nh_dev && + mlxsw_sp_netdev_ipip_type(mlxsw_sp, rt->fib6_nh->fib_nh_dev, ret); +} + +static int mlxsw_sp_nexthop6_init(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop_group *nh_grp, + struct mlxsw_sp_nexthop *nh, + const struct fib6_info *rt) +{ + struct net_device *dev = rt->fib6_nh->fib_nh_dev; + int err; + + nh->nhgi = nh_grp->nhgi; + nh->nh_weight = rt->fib6_nh->fib_nh_weight; + memcpy(&nh->gw_addr, &rt->fib6_nh->fib_nh_gw6, sizeof(nh->gw_addr)); +#if IS_ENABLED(CONFIG_IPV6) + nh->neigh_tbl = &nd_tbl; +#endif + mlxsw_sp_nexthop_counter_alloc(mlxsw_sp, nh); + + list_add_tail(&nh->router_list_node, &mlxsw_sp->router->nexthop_list); + + if (!dev) + return 0; + nh->ifindex = dev->ifindex; + + err = mlxsw_sp_nexthop_type_init(mlxsw_sp, nh, dev); + if (err) + goto err_nexthop_type_init; + + return 0; + +err_nexthop_type_init: + list_del(&nh->router_list_node); + mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh); + return err; +} + +static void mlxsw_sp_nexthop6_fini(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop *nh) +{ + mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh); + list_del(&nh->router_list_node); + mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh); +} + +static bool mlxsw_sp_rt6_is_gateway(const struct mlxsw_sp *mlxsw_sp, + const struct fib6_info *rt) +{ + return rt->fib6_nh->fib_nh_gw_family || + mlxsw_sp_nexthop6_ipip_type(mlxsw_sp, rt, NULL); +} + +static int +mlxsw_sp_nexthop6_group_info_init(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop_group *nh_grp, + struct mlxsw_sp_fib6_entry *fib6_entry) +{ + struct mlxsw_sp_nexthop_group_info *nhgi; + struct mlxsw_sp_rt6 *mlxsw_sp_rt6; + struct mlxsw_sp_nexthop *nh; + int err, i; + + nhgi = kzalloc(struct_size(nhgi, nexthops, fib6_entry->nrt6), + GFP_KERNEL); + if (!nhgi) + return -ENOMEM; + nh_grp->nhgi = nhgi; + nhgi->nh_grp = nh_grp; + mlxsw_sp_rt6 = list_first_entry(&fib6_entry->rt6_list, + struct mlxsw_sp_rt6, list); + nhgi->gateway = mlxsw_sp_rt6_is_gateway(mlxsw_sp, mlxsw_sp_rt6->rt); + nhgi->count = fib6_entry->nrt6; + for (i = 0; i < nhgi->count; i++) { + struct fib6_info *rt = mlxsw_sp_rt6->rt; + + nh = &nhgi->nexthops[i]; + err = mlxsw_sp_nexthop6_init(mlxsw_sp, nh_grp, nh, rt); + if (err) + goto err_nexthop6_init; + mlxsw_sp_rt6 = list_next_entry(mlxsw_sp_rt6, list); + } + nh_grp->nhgi = nhgi; + err = mlxsw_sp_nexthop_group_inc(mlxsw_sp); + if (err) + goto err_group_inc; + err = mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp); + if (err) + goto err_group_refresh; + + return 0; + +err_group_refresh: + mlxsw_sp_nexthop_group_dec(mlxsw_sp); +err_group_inc: + i = nhgi->count; +err_nexthop6_init: + for (i--; i >= 0; i--) { + nh = &nhgi->nexthops[i]; + mlxsw_sp_nexthop6_fini(mlxsw_sp, nh); + } + kfree(nhgi); + return err; +} + +static void +mlxsw_sp_nexthop6_group_info_fini(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop_group *nh_grp) +{ + struct mlxsw_sp_nexthop_group_info *nhgi = nh_grp->nhgi; + int i; + + mlxsw_sp_nexthop_group_dec(mlxsw_sp); + for (i = nhgi->count - 1; i >= 0; i--) { + struct mlxsw_sp_nexthop *nh = &nhgi->nexthops[i]; + + mlxsw_sp_nexthop6_fini(mlxsw_sp, nh); + } + mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp); + WARN_ON_ONCE(nhgi->adj_index_valid); + kfree(nhgi); +} + +static struct mlxsw_sp_nexthop_group * +mlxsw_sp_nexthop6_group_create(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib6_entry *fib6_entry) +{ + struct mlxsw_sp_nexthop_group *nh_grp; + int err; + + nh_grp = kzalloc(sizeof(*nh_grp), GFP_KERNEL); + if (!nh_grp) + return ERR_PTR(-ENOMEM); + INIT_LIST_HEAD(&nh_grp->vr_list); + err = rhashtable_init(&nh_grp->vr_ht, + &mlxsw_sp_nexthop_group_vr_ht_params); + if (err) + goto err_nexthop_group_vr_ht_init; + INIT_LIST_HEAD(&nh_grp->fib_list); + nh_grp->type = MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV6; + + err = mlxsw_sp_nexthop6_group_info_init(mlxsw_sp, nh_grp, fib6_entry); + if (err) + goto err_nexthop_group_info_init; + + err = mlxsw_sp_nexthop_group_insert(mlxsw_sp, nh_grp); + if (err) + goto err_nexthop_group_insert; + + nh_grp->can_destroy = true; + + return nh_grp; + +err_nexthop_group_insert: + mlxsw_sp_nexthop6_group_info_fini(mlxsw_sp, nh_grp); +err_nexthop_group_info_init: + rhashtable_destroy(&nh_grp->vr_ht); +err_nexthop_group_vr_ht_init: + kfree(nh_grp); + return ERR_PTR(err); +} + +static void +mlxsw_sp_nexthop6_group_destroy(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop_group *nh_grp) +{ + if (!nh_grp->can_destroy) + return; + mlxsw_sp_nexthop_group_remove(mlxsw_sp, nh_grp); + mlxsw_sp_nexthop6_group_info_fini(mlxsw_sp, nh_grp); + WARN_ON_ONCE(!list_empty(&nh_grp->vr_list)); + rhashtable_destroy(&nh_grp->vr_ht); + kfree(nh_grp); +} + +static int mlxsw_sp_nexthop6_group_get(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib6_entry *fib6_entry) +{ + struct fib6_info *rt = mlxsw_sp_fib6_entry_rt(fib6_entry); + struct mlxsw_sp_nexthop_group *nh_grp; + + if (rt->nh) { + nh_grp = mlxsw_sp_nexthop_obj_group_lookup(mlxsw_sp, + rt->nh->id); + if (WARN_ON_ONCE(!nh_grp)) + return -EINVAL; + goto out; + } + + nh_grp = mlxsw_sp_nexthop6_group_lookup(mlxsw_sp, fib6_entry); + if (!nh_grp) { + nh_grp = mlxsw_sp_nexthop6_group_create(mlxsw_sp, fib6_entry); + if (IS_ERR(nh_grp)) + return PTR_ERR(nh_grp); + } + + /* The route and the nexthop are described by the same struct, so we + * need to the update the nexthop offload indication for the new route. + */ + __mlxsw_sp_nexthop6_group_offload_refresh(nh_grp, fib6_entry); + +out: + list_add_tail(&fib6_entry->common.nexthop_group_node, + &nh_grp->fib_list); + fib6_entry->common.nh_group = nh_grp; + + return 0; +} + +static void mlxsw_sp_nexthop6_group_put(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry) +{ + struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group; + + list_del(&fib_entry->nexthop_group_node); + if (!list_empty(&nh_grp->fib_list)) + return; + + if (nh_grp->type == MLXSW_SP_NEXTHOP_GROUP_TYPE_OBJ) { + mlxsw_sp_nexthop_obj_group_destroy(mlxsw_sp, nh_grp); + return; + } + + mlxsw_sp_nexthop6_group_destroy(mlxsw_sp, nh_grp); +} + +static int +mlxsw_sp_nexthop6_group_update(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib6_entry *fib6_entry) +{ + struct mlxsw_sp_nexthop_group *old_nh_grp = fib6_entry->common.nh_group; + struct mlxsw_sp_fib_node *fib_node = fib6_entry->common.fib_node; + int err; + + mlxsw_sp_nexthop_group_vr_unlink(old_nh_grp, fib_node->fib); + fib6_entry->common.nh_group = NULL; + list_del(&fib6_entry->common.nexthop_group_node); + + err = mlxsw_sp_nexthop6_group_get(mlxsw_sp, fib6_entry); + if (err) + goto err_nexthop6_group_get; + + err = mlxsw_sp_nexthop_group_vr_link(fib6_entry->common.nh_group, + fib_node->fib); + if (err) + goto err_nexthop_group_vr_link; + + /* In case this entry is offloaded, then the adjacency index + * currently associated with it in the device's table is that + * of the old group. Start using the new one instead. + */ + err = mlxsw_sp_fib_entry_update(mlxsw_sp, &fib6_entry->common); + if (err) + goto err_fib_entry_update; + + if (list_empty(&old_nh_grp->fib_list)) + mlxsw_sp_nexthop6_group_destroy(mlxsw_sp, old_nh_grp); + + return 0; + +err_fib_entry_update: + mlxsw_sp_nexthop_group_vr_unlink(fib6_entry->common.nh_group, + fib_node->fib); +err_nexthop_group_vr_link: + mlxsw_sp_nexthop6_group_put(mlxsw_sp, &fib6_entry->common); +err_nexthop6_group_get: + list_add_tail(&fib6_entry->common.nexthop_group_node, + &old_nh_grp->fib_list); + fib6_entry->common.nh_group = old_nh_grp; + mlxsw_sp_nexthop_group_vr_link(old_nh_grp, fib_node->fib); + return err; +} + +static int +mlxsw_sp_fib6_entry_nexthop_add(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib6_entry *fib6_entry, + struct fib6_info **rt_arr, unsigned int nrt6) +{ + struct mlxsw_sp_rt6 *mlxsw_sp_rt6; + int err, i; + + for (i = 0; i < nrt6; i++) { + mlxsw_sp_rt6 = mlxsw_sp_rt6_create(rt_arr[i]); + if (IS_ERR(mlxsw_sp_rt6)) { + err = PTR_ERR(mlxsw_sp_rt6); + goto err_rt6_unwind; + } + + list_add_tail(&mlxsw_sp_rt6->list, &fib6_entry->rt6_list); + fib6_entry->nrt6++; + } + + err = mlxsw_sp_nexthop6_group_update(mlxsw_sp, fib6_entry); + if (err) + goto err_rt6_unwind; + + return 0; + +err_rt6_unwind: + for (; i > 0; i--) { + fib6_entry->nrt6--; + mlxsw_sp_rt6 = list_last_entry(&fib6_entry->rt6_list, + struct mlxsw_sp_rt6, list); + list_del(&mlxsw_sp_rt6->list); + mlxsw_sp_rt6_destroy(mlxsw_sp_rt6); + } + return err; +} + +static void +mlxsw_sp_fib6_entry_nexthop_del(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib6_entry *fib6_entry, + struct fib6_info **rt_arr, unsigned int nrt6) +{ + struct mlxsw_sp_rt6 *mlxsw_sp_rt6; + int i; + + for (i = 0; i < nrt6; i++) { + mlxsw_sp_rt6 = mlxsw_sp_fib6_entry_rt_find(fib6_entry, + rt_arr[i]); + if (WARN_ON_ONCE(!mlxsw_sp_rt6)) + continue; + + fib6_entry->nrt6--; + list_del(&mlxsw_sp_rt6->list); + mlxsw_sp_rt6_destroy(mlxsw_sp_rt6); + } + + mlxsw_sp_nexthop6_group_update(mlxsw_sp, fib6_entry); +} + +static int +mlxsw_sp_fib6_entry_type_set_local(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry, + const struct fib6_info *rt) +{ + struct mlxsw_sp_nexthop_group_info *nhgi = fib_entry->nh_group->nhgi; + union mlxsw_sp_l3addr dip = { .addr6 = rt->fib6_dst.addr }; + u32 tb_id = mlxsw_sp_fix_tb_id(rt->fib6_table->tb6_id); + struct mlxsw_sp_router *router = mlxsw_sp->router; + int ifindex = nhgi->nexthops[0].ifindex; + struct mlxsw_sp_ipip_entry *ipip_entry; + + fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP; + ipip_entry = mlxsw_sp_ipip_entry_find_by_decap(mlxsw_sp, ifindex, + MLXSW_SP_L3_PROTO_IPV6, + dip); + + if (ipip_entry && ipip_entry->ol_dev->flags & IFF_UP) { + fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP; + return mlxsw_sp_fib_entry_decap_init(mlxsw_sp, fib_entry, + ipip_entry); + } + if (mlxsw_sp_router_nve_is_decap(mlxsw_sp, tb_id, + MLXSW_SP_L3_PROTO_IPV6, &dip)) { + u32 tunnel_index; + + tunnel_index = router->nve_decap_config.tunnel_index; + fib_entry->decap.tunnel_index = tunnel_index; + fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP; + } + + return 0; +} + +static int mlxsw_sp_fib6_entry_type_set(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry, + const struct fib6_info *rt) +{ + if (rt->fib6_flags & RTF_LOCAL) + return mlxsw_sp_fib6_entry_type_set_local(mlxsw_sp, fib_entry, + rt); + if (rt->fib6_flags & RTF_ANYCAST) + fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP; + else if (rt->fib6_type == RTN_BLACKHOLE) + fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE; + else if (rt->fib6_flags & RTF_REJECT) + fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_UNREACHABLE; + else if (fib_entry->nh_group->nhgi->gateway) + fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE; + else + fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL; + + return 0; +} + +static void +mlxsw_sp_fib6_entry_rt_destroy_all(struct mlxsw_sp_fib6_entry *fib6_entry) +{ + struct mlxsw_sp_rt6 *mlxsw_sp_rt6, *tmp; + + list_for_each_entry_safe(mlxsw_sp_rt6, tmp, &fib6_entry->rt6_list, + list) { + fib6_entry->nrt6--; + list_del(&mlxsw_sp_rt6->list); + mlxsw_sp_rt6_destroy(mlxsw_sp_rt6); + } +} + +static struct mlxsw_sp_fib6_entry * +mlxsw_sp_fib6_entry_create(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_node *fib_node, + struct fib6_info **rt_arr, unsigned int nrt6) +{ + struct mlxsw_sp_fib6_entry *fib6_entry; + struct mlxsw_sp_fib_entry *fib_entry; + struct mlxsw_sp_rt6 *mlxsw_sp_rt6; + int err, i; + + fib6_entry = kzalloc(sizeof(*fib6_entry), GFP_KERNEL); + if (!fib6_entry) + return ERR_PTR(-ENOMEM); + fib_entry = &fib6_entry->common; + + INIT_LIST_HEAD(&fib6_entry->rt6_list); + + for (i = 0; i < nrt6; i++) { + mlxsw_sp_rt6 = mlxsw_sp_rt6_create(rt_arr[i]); + if (IS_ERR(mlxsw_sp_rt6)) { + err = PTR_ERR(mlxsw_sp_rt6); + goto err_rt6_unwind; + } + list_add_tail(&mlxsw_sp_rt6->list, &fib6_entry->rt6_list); + fib6_entry->nrt6++; + } + + err = mlxsw_sp_nexthop6_group_get(mlxsw_sp, fib6_entry); + if (err) + goto err_rt6_unwind; + + err = mlxsw_sp_nexthop_group_vr_link(fib_entry->nh_group, + fib_node->fib); + if (err) + goto err_nexthop_group_vr_link; + + err = mlxsw_sp_fib6_entry_type_set(mlxsw_sp, fib_entry, rt_arr[0]); + if (err) + goto err_fib6_entry_type_set; + + fib_entry->fib_node = fib_node; + + return fib6_entry; + +err_fib6_entry_type_set: + mlxsw_sp_nexthop_group_vr_unlink(fib_entry->nh_group, fib_node->fib); +err_nexthop_group_vr_link: + mlxsw_sp_nexthop6_group_put(mlxsw_sp, fib_entry); +err_rt6_unwind: + for (; i > 0; i--) { + fib6_entry->nrt6--; + mlxsw_sp_rt6 = list_last_entry(&fib6_entry->rt6_list, + struct mlxsw_sp_rt6, list); + list_del(&mlxsw_sp_rt6->list); + mlxsw_sp_rt6_destroy(mlxsw_sp_rt6); + } + kfree(fib6_entry); + return ERR_PTR(err); +} + +static void +mlxsw_sp_fib6_entry_type_unset(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib6_entry *fib6_entry) +{ + mlxsw_sp_fib_entry_type_unset(mlxsw_sp, &fib6_entry->common); +} + +static void mlxsw_sp_fib6_entry_destroy(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib6_entry *fib6_entry) +{ + struct mlxsw_sp_fib_node *fib_node = fib6_entry->common.fib_node; + + mlxsw_sp_fib6_entry_type_unset(mlxsw_sp, fib6_entry); + mlxsw_sp_nexthop_group_vr_unlink(fib6_entry->common.nh_group, + fib_node->fib); + mlxsw_sp_nexthop6_group_put(mlxsw_sp, &fib6_entry->common); + mlxsw_sp_fib6_entry_rt_destroy_all(fib6_entry); + WARN_ON(fib6_entry->nrt6); + kfree(fib6_entry); +} + +static struct mlxsw_sp_fib6_entry * +mlxsw_sp_fib6_entry_lookup(struct mlxsw_sp *mlxsw_sp, + const struct fib6_info *rt) +{ + struct mlxsw_sp_fib6_entry *fib6_entry; + struct mlxsw_sp_fib_node *fib_node; + struct mlxsw_sp_fib *fib; + struct fib6_info *cmp_rt; + struct mlxsw_sp_vr *vr; + + vr = mlxsw_sp_vr_find(mlxsw_sp, rt->fib6_table->tb6_id); + if (!vr) + return NULL; + fib = mlxsw_sp_vr_fib(vr, MLXSW_SP_L3_PROTO_IPV6); + + fib_node = mlxsw_sp_fib_node_lookup(fib, &rt->fib6_dst.addr, + sizeof(rt->fib6_dst.addr), + rt->fib6_dst.plen); + if (!fib_node) + return NULL; + + fib6_entry = container_of(fib_node->fib_entry, + struct mlxsw_sp_fib6_entry, common); + cmp_rt = mlxsw_sp_fib6_entry_rt(fib6_entry); + if (rt->fib6_table->tb6_id == cmp_rt->fib6_table->tb6_id && + rt->fib6_metric == cmp_rt->fib6_metric && + mlxsw_sp_fib6_entry_rt_find(fib6_entry, rt)) + return fib6_entry; + + return NULL; +} + +static bool mlxsw_sp_fib6_allow_replace(struct mlxsw_sp_fib6_entry *fib6_entry) +{ + struct mlxsw_sp_fib_node *fib_node = fib6_entry->common.fib_node; + struct mlxsw_sp_fib6_entry *fib6_replaced; + struct fib6_info *rt, *rt_replaced; + + if (!fib_node->fib_entry) + return true; + + fib6_replaced = container_of(fib_node->fib_entry, + struct mlxsw_sp_fib6_entry, + common); + rt = mlxsw_sp_fib6_entry_rt(fib6_entry); + rt_replaced = mlxsw_sp_fib6_entry_rt(fib6_replaced); + if (rt->fib6_table->tb6_id == RT_TABLE_MAIN && + rt_replaced->fib6_table->tb6_id == RT_TABLE_LOCAL) + return false; + + return true; +} + +static int mlxsw_sp_router_fib6_replace(struct mlxsw_sp *mlxsw_sp, + struct fib6_info **rt_arr, + unsigned int nrt6) +{ + struct mlxsw_sp_fib6_entry *fib6_entry, *fib6_replaced; + struct mlxsw_sp_fib_entry *replaced; + struct mlxsw_sp_fib_node *fib_node; + struct fib6_info *rt = rt_arr[0]; + int err; + + if (rt->fib6_src.plen) + return -EINVAL; + + if (mlxsw_sp_fib6_rt_should_ignore(rt)) + return 0; + + if (rt->nh && !mlxsw_sp_nexthop_obj_group_lookup(mlxsw_sp, rt->nh->id)) + return 0; + + fib_node = mlxsw_sp_fib_node_get(mlxsw_sp, rt->fib6_table->tb6_id, + &rt->fib6_dst.addr, + sizeof(rt->fib6_dst.addr), + rt->fib6_dst.plen, + MLXSW_SP_L3_PROTO_IPV6); + if (IS_ERR(fib_node)) + return PTR_ERR(fib_node); + + fib6_entry = mlxsw_sp_fib6_entry_create(mlxsw_sp, fib_node, rt_arr, + nrt6); + if (IS_ERR(fib6_entry)) { + err = PTR_ERR(fib6_entry); + goto err_fib6_entry_create; + } + + if (!mlxsw_sp_fib6_allow_replace(fib6_entry)) { + mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry); + mlxsw_sp_fib_node_put(mlxsw_sp, fib_node); + return 0; + } + + replaced = fib_node->fib_entry; + err = mlxsw_sp_fib_node_entry_link(mlxsw_sp, &fib6_entry->common); + if (err) + goto err_fib_node_entry_link; + + /* Nothing to replace */ + if (!replaced) + return 0; + + mlxsw_sp_fib_entry_hw_flags_clear(mlxsw_sp, replaced); + fib6_replaced = container_of(replaced, struct mlxsw_sp_fib6_entry, + common); + mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_replaced); + + return 0; + +err_fib_node_entry_link: + fib_node->fib_entry = replaced; + mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry); +err_fib6_entry_create: + mlxsw_sp_fib_node_put(mlxsw_sp, fib_node); + return err; +} + +static int mlxsw_sp_router_fib6_append(struct mlxsw_sp *mlxsw_sp, + struct fib6_info **rt_arr, + unsigned int nrt6) +{ + struct mlxsw_sp_fib6_entry *fib6_entry; + struct mlxsw_sp_fib_node *fib_node; + struct fib6_info *rt = rt_arr[0]; + int err; + + if (rt->fib6_src.plen) + return -EINVAL; + + if (mlxsw_sp_fib6_rt_should_ignore(rt)) + return 0; + + fib_node = mlxsw_sp_fib_node_get(mlxsw_sp, rt->fib6_table->tb6_id, + &rt->fib6_dst.addr, + sizeof(rt->fib6_dst.addr), + rt->fib6_dst.plen, + MLXSW_SP_L3_PROTO_IPV6); + if (IS_ERR(fib_node)) + return PTR_ERR(fib_node); + + if (WARN_ON_ONCE(!fib_node->fib_entry)) { + mlxsw_sp_fib_node_put(mlxsw_sp, fib_node); + return -EINVAL; + } + + fib6_entry = container_of(fib_node->fib_entry, + struct mlxsw_sp_fib6_entry, common); + err = mlxsw_sp_fib6_entry_nexthop_add(mlxsw_sp, fib6_entry, rt_arr, + nrt6); + if (err) + goto err_fib6_entry_nexthop_add; + + return 0; + +err_fib6_entry_nexthop_add: + mlxsw_sp_fib_node_put(mlxsw_sp, fib_node); + return err; +} + +static void mlxsw_sp_router_fib6_del(struct mlxsw_sp *mlxsw_sp, + struct fib6_info **rt_arr, + unsigned int nrt6) +{ + struct mlxsw_sp_fib6_entry *fib6_entry; + struct mlxsw_sp_fib_node *fib_node; + struct fib6_info *rt = rt_arr[0]; + + if (mlxsw_sp_fib6_rt_should_ignore(rt)) + return; + + /* Multipath routes are first added to the FIB trie and only then + * notified. If we vetoed the addition, we will get a delete + * notification for a route we do not have. Therefore, do not warn if + * route was not found. + */ + fib6_entry = mlxsw_sp_fib6_entry_lookup(mlxsw_sp, rt); + if (!fib6_entry) + return; + + /* If not all the nexthops are deleted, then only reduce the nexthop + * group. + */ + if (nrt6 != fib6_entry->nrt6) { + mlxsw_sp_fib6_entry_nexthop_del(mlxsw_sp, fib6_entry, rt_arr, + nrt6); + return; + } + + fib_node = fib6_entry->common.fib_node; + + mlxsw_sp_fib_node_entry_unlink(mlxsw_sp, &fib6_entry->common); + mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry); + mlxsw_sp_fib_node_put(mlxsw_sp, fib_node); +} + +static struct mlxsw_sp_mr_table * +mlxsw_sp_router_fibmr_family_to_table(struct mlxsw_sp_vr *vr, int family) +{ + if (family == RTNL_FAMILY_IPMR) + return vr->mr_table[MLXSW_SP_L3_PROTO_IPV4]; + else + return vr->mr_table[MLXSW_SP_L3_PROTO_IPV6]; +} + +static int mlxsw_sp_router_fibmr_add(struct mlxsw_sp *mlxsw_sp, + struct mfc_entry_notifier_info *men_info, + bool replace) +{ + struct mlxsw_sp_mr_table *mrt; + struct mlxsw_sp_vr *vr; + + vr = mlxsw_sp_vr_get(mlxsw_sp, men_info->tb_id, NULL); + if (IS_ERR(vr)) + return PTR_ERR(vr); + + mrt = mlxsw_sp_router_fibmr_family_to_table(vr, men_info->info.family); + return mlxsw_sp_mr_route_add(mrt, men_info->mfc, replace); +} + +static void mlxsw_sp_router_fibmr_del(struct mlxsw_sp *mlxsw_sp, + struct mfc_entry_notifier_info *men_info) +{ + struct mlxsw_sp_mr_table *mrt; + struct mlxsw_sp_vr *vr; + + vr = mlxsw_sp_vr_find(mlxsw_sp, men_info->tb_id); + if (WARN_ON(!vr)) + return; + + mrt = mlxsw_sp_router_fibmr_family_to_table(vr, men_info->info.family); + mlxsw_sp_mr_route_del(mrt, men_info->mfc); + mlxsw_sp_vr_put(mlxsw_sp, vr); +} + +static int +mlxsw_sp_router_fibmr_vif_add(struct mlxsw_sp *mlxsw_sp, + struct vif_entry_notifier_info *ven_info) +{ + struct mlxsw_sp_mr_table *mrt; + struct mlxsw_sp_rif *rif; + struct mlxsw_sp_vr *vr; + + vr = mlxsw_sp_vr_get(mlxsw_sp, ven_info->tb_id, NULL); + if (IS_ERR(vr)) + return PTR_ERR(vr); + + mrt = mlxsw_sp_router_fibmr_family_to_table(vr, ven_info->info.family); + rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, ven_info->dev); + return mlxsw_sp_mr_vif_add(mrt, ven_info->dev, + ven_info->vif_index, + ven_info->vif_flags, rif); +} + +static void +mlxsw_sp_router_fibmr_vif_del(struct mlxsw_sp *mlxsw_sp, + struct vif_entry_notifier_info *ven_info) +{ + struct mlxsw_sp_mr_table *mrt; + struct mlxsw_sp_vr *vr; + + vr = mlxsw_sp_vr_find(mlxsw_sp, ven_info->tb_id); + if (WARN_ON(!vr)) + return; + + mrt = mlxsw_sp_router_fibmr_family_to_table(vr, ven_info->info.family); + mlxsw_sp_mr_vif_del(mrt, ven_info->vif_index); + mlxsw_sp_vr_put(mlxsw_sp, vr); +} + +static void mlxsw_sp_fib4_node_flush(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_node *fib_node) +{ + struct mlxsw_sp_fib4_entry *fib4_entry; + + fib4_entry = container_of(fib_node->fib_entry, + struct mlxsw_sp_fib4_entry, common); + mlxsw_sp_fib_node_entry_unlink(mlxsw_sp, fib_node->fib_entry); + mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry); + mlxsw_sp_fib_node_put(mlxsw_sp, fib_node); +} + +static void mlxsw_sp_fib6_node_flush(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_node *fib_node) +{ + struct mlxsw_sp_fib6_entry *fib6_entry; + + fib6_entry = container_of(fib_node->fib_entry, + struct mlxsw_sp_fib6_entry, common); + mlxsw_sp_fib_node_entry_unlink(mlxsw_sp, fib_node->fib_entry); + mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry); + mlxsw_sp_fib_node_put(mlxsw_sp, fib_node); +} + +static void mlxsw_sp_fib_node_flush(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_node *fib_node) +{ + switch (fib_node->fib->proto) { + case MLXSW_SP_L3_PROTO_IPV4: + mlxsw_sp_fib4_node_flush(mlxsw_sp, fib_node); + break; + case MLXSW_SP_L3_PROTO_IPV6: + mlxsw_sp_fib6_node_flush(mlxsw_sp, fib_node); + break; + } +} + +static void mlxsw_sp_vr_fib_flush(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_vr *vr, + enum mlxsw_sp_l3proto proto) +{ + struct mlxsw_sp_fib *fib = mlxsw_sp_vr_fib(vr, proto); + struct mlxsw_sp_fib_node *fib_node, *tmp; + + list_for_each_entry_safe(fib_node, tmp, &fib->node_list, list) { + bool do_break = &tmp->list == &fib->node_list; + + mlxsw_sp_fib_node_flush(mlxsw_sp, fib_node); + if (do_break) + break; + } +} + +static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp) +{ + int i, j; + + for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) { + struct mlxsw_sp_vr *vr = &mlxsw_sp->router->vrs[i]; + + if (!mlxsw_sp_vr_is_used(vr)) + continue; + + for (j = 0; j < MLXSW_SP_L3_PROTO_MAX; j++) + mlxsw_sp_mr_table_flush(vr->mr_table[j]); + mlxsw_sp_vr_fib_flush(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV4); + + /* If virtual router was only used for IPv4, then it's no + * longer used. + */ + if (!mlxsw_sp_vr_is_used(vr)) + continue; + mlxsw_sp_vr_fib_flush(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV6); + } +} + +struct mlxsw_sp_fib6_event_work { + struct fib6_info **rt_arr; + unsigned int nrt6; +}; + +struct mlxsw_sp_fib_event_work { + struct work_struct work; + union { + struct mlxsw_sp_fib6_event_work fib6_work; + struct fib_entry_notifier_info fen_info; + struct fib_rule_notifier_info fr_info; + struct fib_nh_notifier_info fnh_info; + struct mfc_entry_notifier_info men_info; + struct vif_entry_notifier_info ven_info; + }; + struct mlxsw_sp *mlxsw_sp; + unsigned long event; +}; + +static int +mlxsw_sp_router_fib6_work_init(struct mlxsw_sp_fib6_event_work *fib6_work, + struct fib6_entry_notifier_info *fen6_info) +{ + struct fib6_info *rt = fen6_info->rt; + struct fib6_info **rt_arr; + struct fib6_info *iter; + unsigned int nrt6; + int i = 0; + + nrt6 = fen6_info->nsiblings + 1; + + rt_arr = kcalloc(nrt6, sizeof(struct fib6_info *), GFP_ATOMIC); + if (!rt_arr) + return -ENOMEM; + + fib6_work->rt_arr = rt_arr; + fib6_work->nrt6 = nrt6; + + rt_arr[0] = rt; + fib6_info_hold(rt); + + if (!fen6_info->nsiblings) + return 0; + + list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings) { + if (i == fen6_info->nsiblings) + break; + + rt_arr[i + 1] = iter; + fib6_info_hold(iter); + i++; + } + WARN_ON_ONCE(i != fen6_info->nsiblings); + + return 0; +} + +static void +mlxsw_sp_router_fib6_work_fini(struct mlxsw_sp_fib6_event_work *fib6_work) +{ + int i; + + for (i = 0; i < fib6_work->nrt6; i++) + mlxsw_sp_rt6_release(fib6_work->rt_arr[i]); + kfree(fib6_work->rt_arr); +} + +static void mlxsw_sp_router_fib4_event_work(struct work_struct *work) +{ + struct mlxsw_sp_fib_event_work *fib_work = + container_of(work, struct mlxsw_sp_fib_event_work, work); + struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp; + int err; + + mutex_lock(&mlxsw_sp->router->lock); + mlxsw_sp_span_respin(mlxsw_sp); + + switch (fib_work->event) { + case FIB_EVENT_ENTRY_REPLACE: + err = mlxsw_sp_router_fib4_replace(mlxsw_sp, + &fib_work->fen_info); + if (err) { + dev_warn(mlxsw_sp->bus_info->dev, "FIB replace failed.\n"); + mlxsw_sp_fib4_offload_failed_flag_set(mlxsw_sp, + &fib_work->fen_info); + } + fib_info_put(fib_work->fen_info.fi); + break; + case FIB_EVENT_ENTRY_DEL: + mlxsw_sp_router_fib4_del(mlxsw_sp, &fib_work->fen_info); + fib_info_put(fib_work->fen_info.fi); + break; + case FIB_EVENT_NH_ADD: + case FIB_EVENT_NH_DEL: + mlxsw_sp_nexthop4_event(mlxsw_sp, fib_work->event, + fib_work->fnh_info.fib_nh); + fib_info_put(fib_work->fnh_info.fib_nh->nh_parent); + break; + } + mutex_unlock(&mlxsw_sp->router->lock); + kfree(fib_work); +} + +static void mlxsw_sp_router_fib6_event_work(struct work_struct *work) +{ + struct mlxsw_sp_fib_event_work *fib_work = + container_of(work, struct mlxsw_sp_fib_event_work, work); + struct mlxsw_sp_fib6_event_work *fib6_work = &fib_work->fib6_work; + struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp; + int err; + + mutex_lock(&mlxsw_sp->router->lock); + mlxsw_sp_span_respin(mlxsw_sp); + + switch (fib_work->event) { + case FIB_EVENT_ENTRY_REPLACE: + err = mlxsw_sp_router_fib6_replace(mlxsw_sp, + fib6_work->rt_arr, + fib6_work->nrt6); + if (err) { + dev_warn(mlxsw_sp->bus_info->dev, "FIB replace failed.\n"); + mlxsw_sp_fib6_offload_failed_flag_set(mlxsw_sp, + fib6_work->rt_arr, + fib6_work->nrt6); + } + mlxsw_sp_router_fib6_work_fini(fib6_work); + break; + case FIB_EVENT_ENTRY_APPEND: + err = mlxsw_sp_router_fib6_append(mlxsw_sp, + fib6_work->rt_arr, + fib6_work->nrt6); + if (err) { + dev_warn(mlxsw_sp->bus_info->dev, "FIB append failed.\n"); + mlxsw_sp_fib6_offload_failed_flag_set(mlxsw_sp, + fib6_work->rt_arr, + fib6_work->nrt6); + } + mlxsw_sp_router_fib6_work_fini(fib6_work); + break; + case FIB_EVENT_ENTRY_DEL: + mlxsw_sp_router_fib6_del(mlxsw_sp, + fib6_work->rt_arr, + fib6_work->nrt6); + mlxsw_sp_router_fib6_work_fini(fib6_work); + break; + } + mutex_unlock(&mlxsw_sp->router->lock); + kfree(fib_work); +} + +static void mlxsw_sp_router_fibmr_event_work(struct work_struct *work) +{ + struct mlxsw_sp_fib_event_work *fib_work = + container_of(work, struct mlxsw_sp_fib_event_work, work); + struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp; + bool replace; + int err; + + rtnl_lock(); + mutex_lock(&mlxsw_sp->router->lock); + switch (fib_work->event) { + case FIB_EVENT_ENTRY_REPLACE: + case FIB_EVENT_ENTRY_ADD: + replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE; + + err = mlxsw_sp_router_fibmr_add(mlxsw_sp, &fib_work->men_info, + replace); + if (err) + dev_warn(mlxsw_sp->bus_info->dev, "MR entry add failed.\n"); + mr_cache_put(fib_work->men_info.mfc); + break; + case FIB_EVENT_ENTRY_DEL: + mlxsw_sp_router_fibmr_del(mlxsw_sp, &fib_work->men_info); + mr_cache_put(fib_work->men_info.mfc); + break; + case FIB_EVENT_VIF_ADD: + err = mlxsw_sp_router_fibmr_vif_add(mlxsw_sp, + &fib_work->ven_info); + if (err) + dev_warn(mlxsw_sp->bus_info->dev, "MR VIF add failed.\n"); + dev_put(fib_work->ven_info.dev); + break; + case FIB_EVENT_VIF_DEL: + mlxsw_sp_router_fibmr_vif_del(mlxsw_sp, + &fib_work->ven_info); + dev_put(fib_work->ven_info.dev); + break; + } + mutex_unlock(&mlxsw_sp->router->lock); + rtnl_unlock(); + kfree(fib_work); +} + +static void mlxsw_sp_router_fib4_event(struct mlxsw_sp_fib_event_work *fib_work, + struct fib_notifier_info *info) +{ + struct fib_entry_notifier_info *fen_info; + struct fib_nh_notifier_info *fnh_info; + + switch (fib_work->event) { + case FIB_EVENT_ENTRY_REPLACE: + case FIB_EVENT_ENTRY_DEL: + fen_info = container_of(info, struct fib_entry_notifier_info, + info); + fib_work->fen_info = *fen_info; + /* Take reference on fib_info to prevent it from being + * freed while work is queued. Release it afterwards. + */ + fib_info_hold(fib_work->fen_info.fi); + break; + case FIB_EVENT_NH_ADD: + case FIB_EVENT_NH_DEL: + fnh_info = container_of(info, struct fib_nh_notifier_info, + info); + fib_work->fnh_info = *fnh_info; + fib_info_hold(fib_work->fnh_info.fib_nh->nh_parent); + break; + } +} + +static int mlxsw_sp_router_fib6_event(struct mlxsw_sp_fib_event_work *fib_work, + struct fib_notifier_info *info) +{ + struct fib6_entry_notifier_info *fen6_info; + int err; + + switch (fib_work->event) { + case FIB_EVENT_ENTRY_REPLACE: + case FIB_EVENT_ENTRY_APPEND: + case FIB_EVENT_ENTRY_DEL: + fen6_info = container_of(info, struct fib6_entry_notifier_info, + info); + err = mlxsw_sp_router_fib6_work_init(&fib_work->fib6_work, + fen6_info); + if (err) + return err; + break; + } + + return 0; +} + +static void +mlxsw_sp_router_fibmr_event(struct mlxsw_sp_fib_event_work *fib_work, + struct fib_notifier_info *info) +{ + switch (fib_work->event) { + case FIB_EVENT_ENTRY_REPLACE: + case FIB_EVENT_ENTRY_ADD: + case FIB_EVENT_ENTRY_DEL: + memcpy(&fib_work->men_info, info, sizeof(fib_work->men_info)); + mr_cache_hold(fib_work->men_info.mfc); + break; + case FIB_EVENT_VIF_ADD: + case FIB_EVENT_VIF_DEL: + memcpy(&fib_work->ven_info, info, sizeof(fib_work->ven_info)); + dev_hold(fib_work->ven_info.dev); + break; + } +} + +static int mlxsw_sp_router_fib_rule_event(unsigned long event, + struct fib_notifier_info *info, + struct mlxsw_sp *mlxsw_sp) +{ + struct netlink_ext_ack *extack = info->extack; + struct fib_rule_notifier_info *fr_info; + struct fib_rule *rule; + int err = 0; + + /* nothing to do at the moment */ + if (event == FIB_EVENT_RULE_DEL) + return 0; + + fr_info = container_of(info, struct fib_rule_notifier_info, info); + rule = fr_info->rule; + + /* Rule only affects locally generated traffic */ + if (rule->iifindex == mlxsw_sp_net(mlxsw_sp)->loopback_dev->ifindex) + return 0; + + switch (info->family) { + case AF_INET: + if (!fib4_rule_default(rule) && !rule->l3mdev) + err = -EOPNOTSUPP; + break; + case AF_INET6: + if (!fib6_rule_default(rule) && !rule->l3mdev) + err = -EOPNOTSUPP; + break; + case RTNL_FAMILY_IPMR: + if (!ipmr_rule_default(rule) && !rule->l3mdev) + err = -EOPNOTSUPP; + break; + case RTNL_FAMILY_IP6MR: + if (!ip6mr_rule_default(rule) && !rule->l3mdev) + err = -EOPNOTSUPP; + break; + } + + if (err < 0) + NL_SET_ERR_MSG_MOD(extack, "FIB rules not supported"); + + return err; +} + +/* Called with rcu_read_lock() */ +static int mlxsw_sp_router_fib_event(struct notifier_block *nb, + unsigned long event, void *ptr) +{ + struct mlxsw_sp_fib_event_work *fib_work; + struct fib_notifier_info *info = ptr; + struct mlxsw_sp_router *router; + int err; + + if ((info->family != AF_INET && info->family != AF_INET6 && + info->family != RTNL_FAMILY_IPMR && + info->family != RTNL_FAMILY_IP6MR)) + return NOTIFY_DONE; + + router = container_of(nb, struct mlxsw_sp_router, fib_nb); + + switch (event) { + case FIB_EVENT_RULE_ADD: + case FIB_EVENT_RULE_DEL: + err = mlxsw_sp_router_fib_rule_event(event, info, + router->mlxsw_sp); + return notifier_from_errno(err); + case FIB_EVENT_ENTRY_ADD: + case FIB_EVENT_ENTRY_REPLACE: + case FIB_EVENT_ENTRY_APPEND: + if (info->family == AF_INET) { + struct fib_entry_notifier_info *fen_info = ptr; + + if (fen_info->fi->fib_nh_is_v6) { + NL_SET_ERR_MSG_MOD(info->extack, "IPv6 gateway with IPv4 route is not supported"); + return notifier_from_errno(-EINVAL); + } + } + break; + } + + fib_work = kzalloc(sizeof(*fib_work), GFP_ATOMIC); + if (!fib_work) + return NOTIFY_BAD; + + fib_work->mlxsw_sp = router->mlxsw_sp; + fib_work->event = event; + + switch (info->family) { + case AF_INET: + INIT_WORK(&fib_work->work, mlxsw_sp_router_fib4_event_work); + mlxsw_sp_router_fib4_event(fib_work, info); + break; + case AF_INET6: + INIT_WORK(&fib_work->work, mlxsw_sp_router_fib6_event_work); + err = mlxsw_sp_router_fib6_event(fib_work, info); + if (err) + goto err_fib_event; + break; + case RTNL_FAMILY_IP6MR: + case RTNL_FAMILY_IPMR: + INIT_WORK(&fib_work->work, mlxsw_sp_router_fibmr_event_work); + mlxsw_sp_router_fibmr_event(fib_work, info); + break; + } + + mlxsw_core_schedule_work(&fib_work->work); + + return NOTIFY_DONE; + +err_fib_event: + kfree(fib_work); + return NOTIFY_BAD; +} + +static struct mlxsw_sp_rif * +mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp, + const struct net_device *dev) +{ + int i; + + for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++) + if (mlxsw_sp->router->rifs[i] && + mlxsw_sp->router->rifs[i]->dev == dev) + return mlxsw_sp->router->rifs[i]; + + return NULL; +} + +bool mlxsw_sp_rif_exists(struct mlxsw_sp *mlxsw_sp, + const struct net_device *dev) +{ + struct mlxsw_sp_rif *rif; + + mutex_lock(&mlxsw_sp->router->lock); + rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev); + mutex_unlock(&mlxsw_sp->router->lock); + + return rif; +} + +u16 mlxsw_sp_rif_vid(struct mlxsw_sp *mlxsw_sp, const struct net_device *dev) +{ + struct mlxsw_sp_rif *rif; + u16 vid = 0; + + mutex_lock(&mlxsw_sp->router->lock); + rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev); + if (!rif) + goto out; + + /* We only return the VID for VLAN RIFs. Otherwise we return an + * invalid value (0). + */ + if (rif->ops->type != MLXSW_SP_RIF_TYPE_VLAN) + goto out; + + vid = mlxsw_sp_fid_8021q_vid(rif->fid); + +out: + mutex_unlock(&mlxsw_sp->router->lock); + return vid; +} + +static int mlxsw_sp_router_rif_disable(struct mlxsw_sp *mlxsw_sp, u16 rif) +{ + char ritr_pl[MLXSW_REG_RITR_LEN]; + int err; + + mlxsw_reg_ritr_rif_pack(ritr_pl, rif); + err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl); + if (err) + return err; + + mlxsw_reg_ritr_enable_set(ritr_pl, false); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl); +} + +static void mlxsw_sp_router_rif_gone_sync(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_rif *rif) +{ + mlxsw_sp_router_rif_disable(mlxsw_sp, rif->rif_index); + mlxsw_sp_nexthop_rif_gone_sync(mlxsw_sp, rif); + mlxsw_sp_neigh_rif_gone_sync(mlxsw_sp, rif); +} + +static bool +mlxsw_sp_rif_should_config(struct mlxsw_sp_rif *rif, struct net_device *dev, + unsigned long event) +{ + struct inet6_dev *inet6_dev; + bool addr_list_empty = true; + struct in_device *idev; + + switch (event) { + case NETDEV_UP: + return rif == NULL; + case NETDEV_DOWN: + rcu_read_lock(); + idev = __in_dev_get_rcu(dev); + if (idev && idev->ifa_list) + addr_list_empty = false; + + inet6_dev = __in6_dev_get(dev); + if (addr_list_empty && inet6_dev && + !list_empty(&inet6_dev->addr_list)) + addr_list_empty = false; + rcu_read_unlock(); + + /* macvlans do not have a RIF, but rather piggy back on the + * RIF of their lower device. + */ + if (netif_is_macvlan(dev) && addr_list_empty) + return true; + + if (rif && addr_list_empty && + !netif_is_l3_slave(rif->dev)) + return true; + /* It is possible we already removed the RIF ourselves + * if it was assigned to a netdev that is now a bridge + * or LAG slave. + */ + return false; + } + + return false; +} + +static enum mlxsw_sp_rif_type +mlxsw_sp_dev_rif_type(const struct mlxsw_sp *mlxsw_sp, + const struct net_device *dev) +{ + enum mlxsw_sp_fid_type type; + + if (mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, NULL)) + return MLXSW_SP_RIF_TYPE_IPIP_LB; + + /* Otherwise RIF type is derived from the type of the underlying FID. */ + if (is_vlan_dev(dev) && netif_is_bridge_master(vlan_dev_real_dev(dev))) + type = MLXSW_SP_FID_TYPE_8021Q; + else if (netif_is_bridge_master(dev) && br_vlan_enabled(dev)) + type = MLXSW_SP_FID_TYPE_8021Q; + else if (netif_is_bridge_master(dev)) + type = MLXSW_SP_FID_TYPE_8021D; + else + type = MLXSW_SP_FID_TYPE_RFID; + + return mlxsw_sp_fid_type_rif_type(mlxsw_sp, type); +} + +static int mlxsw_sp_rif_index_alloc(struct mlxsw_sp *mlxsw_sp, u16 *p_rif_index) +{ + int i; + + for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++) { + if (!mlxsw_sp->router->rifs[i]) { + *p_rif_index = i; + return 0; + } + } + + return -ENOBUFS; +} + +static struct mlxsw_sp_rif *mlxsw_sp_rif_alloc(size_t rif_size, u16 rif_index, + u16 vr_id, + struct net_device *l3_dev) +{ + struct mlxsw_sp_rif *rif; + + rif = kzalloc(rif_size, GFP_KERNEL); + if (!rif) + return NULL; + + INIT_LIST_HEAD(&rif->nexthop_list); + INIT_LIST_HEAD(&rif->neigh_list); + if (l3_dev) { + ether_addr_copy(rif->addr, l3_dev->dev_addr); + rif->mtu = l3_dev->mtu; + rif->dev = l3_dev; + } + rif->vr_id = vr_id; + rif->rif_index = rif_index; + + return rif; +} + +struct mlxsw_sp_rif *mlxsw_sp_rif_by_index(const struct mlxsw_sp *mlxsw_sp, + u16 rif_index) +{ + return mlxsw_sp->router->rifs[rif_index]; +} + +u16 mlxsw_sp_rif_index(const struct mlxsw_sp_rif *rif) +{ + return rif->rif_index; +} + +u16 mlxsw_sp_ipip_lb_rif_index(const struct mlxsw_sp_rif_ipip_lb *lb_rif) +{ + return lb_rif->common.rif_index; +} + +u16 mlxsw_sp_ipip_lb_ul_vr_id(const struct mlxsw_sp_rif_ipip_lb *lb_rif) +{ + u32 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(lb_rif->common.dev); + struct mlxsw_sp_vr *ul_vr; + + ul_vr = mlxsw_sp_vr_get(lb_rif->common.mlxsw_sp, ul_tb_id, NULL); + if (WARN_ON(IS_ERR(ul_vr))) + return 0; + + return ul_vr->id; +} + +u16 mlxsw_sp_ipip_lb_ul_rif_id(const struct mlxsw_sp_rif_ipip_lb *lb_rif) +{ + return lb_rif->ul_rif_id; +} + +static bool +mlxsw_sp_router_port_l3_stats_enabled(struct mlxsw_sp_rif *rif) +{ + return mlxsw_sp_rif_counter_valid_get(rif, + MLXSW_SP_RIF_COUNTER_EGRESS) && + mlxsw_sp_rif_counter_valid_get(rif, + MLXSW_SP_RIF_COUNTER_INGRESS); +} + +static int +mlxsw_sp_router_port_l3_stats_enable(struct mlxsw_sp_rif *rif) +{ + int err; + + err = mlxsw_sp_rif_counter_alloc(rif, MLXSW_SP_RIF_COUNTER_INGRESS); + if (err) + return err; + + /* Clear stale data. */ + err = mlxsw_sp_rif_counter_fetch_clear(rif, + MLXSW_SP_RIF_COUNTER_INGRESS, + NULL); + if (err) + goto err_clear_ingress; + + err = mlxsw_sp_rif_counter_alloc(rif, MLXSW_SP_RIF_COUNTER_EGRESS); + if (err) + goto err_alloc_egress; + + /* Clear stale data. */ + err = mlxsw_sp_rif_counter_fetch_clear(rif, + MLXSW_SP_RIF_COUNTER_EGRESS, + NULL); + if (err) + goto err_clear_egress; + + return 0; + +err_clear_egress: + mlxsw_sp_rif_counter_free(rif, MLXSW_SP_RIF_COUNTER_EGRESS); +err_alloc_egress: +err_clear_ingress: + mlxsw_sp_rif_counter_free(rif, MLXSW_SP_RIF_COUNTER_INGRESS); + return err; +} + +static void +mlxsw_sp_router_port_l3_stats_disable(struct mlxsw_sp_rif *rif) +{ + mlxsw_sp_rif_counter_free(rif, MLXSW_SP_RIF_COUNTER_EGRESS); + mlxsw_sp_rif_counter_free(rif, MLXSW_SP_RIF_COUNTER_INGRESS); +} + +static void +mlxsw_sp_router_port_l3_stats_report_used(struct mlxsw_sp_rif *rif, + struct netdev_notifier_offload_xstats_info *info) +{ + if (!mlxsw_sp_router_port_l3_stats_enabled(rif)) + return; + netdev_offload_xstats_report_used(info->report_used); +} + +static int +mlxsw_sp_router_port_l3_stats_fetch(struct mlxsw_sp_rif *rif, + struct rtnl_hw_stats64 *p_stats) +{ + struct mlxsw_sp_rif_counter_set_basic ingress; + struct mlxsw_sp_rif_counter_set_basic egress; + int err; + + err = mlxsw_sp_rif_counter_fetch_clear(rif, + MLXSW_SP_RIF_COUNTER_INGRESS, + &ingress); + if (err) + return err; + + err = mlxsw_sp_rif_counter_fetch_clear(rif, + MLXSW_SP_RIF_COUNTER_EGRESS, + &egress); + if (err) + return err; + +#define MLXSW_SP_ROUTER_ALL_GOOD(SET, SFX) \ + ((SET.good_unicast_ ## SFX) + \ + (SET.good_multicast_ ## SFX) + \ + (SET.good_broadcast_ ## SFX)) + + p_stats->rx_packets = MLXSW_SP_ROUTER_ALL_GOOD(ingress, packets); + p_stats->tx_packets = MLXSW_SP_ROUTER_ALL_GOOD(egress, packets); + p_stats->rx_bytes = MLXSW_SP_ROUTER_ALL_GOOD(ingress, bytes); + p_stats->tx_bytes = MLXSW_SP_ROUTER_ALL_GOOD(egress, bytes); + p_stats->rx_errors = ingress.error_packets; + p_stats->tx_errors = egress.error_packets; + p_stats->rx_dropped = ingress.discard_packets; + p_stats->tx_dropped = egress.discard_packets; + p_stats->multicast = ingress.good_multicast_packets + + ingress.good_broadcast_packets; + +#undef MLXSW_SP_ROUTER_ALL_GOOD + + return 0; +} + +static int +mlxsw_sp_router_port_l3_stats_report_delta(struct mlxsw_sp_rif *rif, + struct netdev_notifier_offload_xstats_info *info) +{ + struct rtnl_hw_stats64 stats = {}; + int err; + + if (!mlxsw_sp_router_port_l3_stats_enabled(rif)) + return 0; + + err = mlxsw_sp_router_port_l3_stats_fetch(rif, &stats); + if (err) + return err; + + netdev_offload_xstats_report_delta(info->report_delta, &stats); + return 0; +} + +struct mlxsw_sp_router_hwstats_notify_work { + struct work_struct work; + struct net_device *dev; +}; + +static void mlxsw_sp_router_hwstats_notify_work(struct work_struct *work) +{ + struct mlxsw_sp_router_hwstats_notify_work *hws_work = + container_of(work, struct mlxsw_sp_router_hwstats_notify_work, + work); + + rtnl_lock(); + rtnl_offload_xstats_notify(hws_work->dev); + rtnl_unlock(); + dev_put(hws_work->dev); + kfree(hws_work); +} + +static void +mlxsw_sp_router_hwstats_notify_schedule(struct net_device *dev) +{ + struct mlxsw_sp_router_hwstats_notify_work *hws_work; + + /* To collect notification payload, the core ends up sending another + * notifier block message, which would deadlock on the attempt to + * acquire the router lock again. Just postpone the notification until + * later. + */ + + hws_work = kzalloc(sizeof(*hws_work), GFP_KERNEL); + if (!hws_work) + return; + + INIT_WORK(&hws_work->work, mlxsw_sp_router_hwstats_notify_work); + dev_hold(dev); + hws_work->dev = dev; + mlxsw_core_schedule_work(&hws_work->work); +} + +int mlxsw_sp_rif_dev_ifindex(const struct mlxsw_sp_rif *rif) +{ + return rif->dev->ifindex; +} + +const struct net_device *mlxsw_sp_rif_dev(const struct mlxsw_sp_rif *rif) +{ + return rif->dev; +} + +static void mlxsw_sp_rif_push_l3_stats(struct mlxsw_sp_rif *rif) +{ + struct rtnl_hw_stats64 stats = {}; + + if (!mlxsw_sp_router_port_l3_stats_fetch(rif, &stats)) + netdev_offload_xstats_push_delta(rif->dev, + NETDEV_OFFLOAD_XSTATS_TYPE_L3, + &stats); +} + +static struct mlxsw_sp_rif * +mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp, + const struct mlxsw_sp_rif_params *params, + struct netlink_ext_ack *extack) +{ + u32 tb_id = l3mdev_fib_table(params->dev); + const struct mlxsw_sp_rif_ops *ops; + struct mlxsw_sp_fid *fid = NULL; + enum mlxsw_sp_rif_type type; + struct mlxsw_sp_rif *rif; + struct mlxsw_sp_vr *vr; + u16 rif_index; + int i, err; + + type = mlxsw_sp_dev_rif_type(mlxsw_sp, params->dev); + ops = mlxsw_sp->router->rif_ops_arr[type]; + + vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id ? : RT_TABLE_MAIN, extack); + if (IS_ERR(vr)) + return ERR_CAST(vr); + vr->rif_count++; + + err = mlxsw_sp_rif_index_alloc(mlxsw_sp, &rif_index); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported router interfaces"); + goto err_rif_index_alloc; + } + + rif = mlxsw_sp_rif_alloc(ops->rif_size, rif_index, vr->id, params->dev); + if (!rif) { + err = -ENOMEM; + goto err_rif_alloc; + } + dev_hold(rif->dev); + mlxsw_sp->router->rifs[rif_index] = rif; + rif->mlxsw_sp = mlxsw_sp; + rif->ops = ops; + + if (ops->fid_get) { + fid = ops->fid_get(rif, extack); + if (IS_ERR(fid)) { + err = PTR_ERR(fid); + goto err_fid_get; + } + rif->fid = fid; + } + + if (ops->setup) + ops->setup(rif, params); + + err = ops->configure(rif, extack); + if (err) + goto err_configure; + + for (i = 0; i < MLXSW_SP_L3_PROTO_MAX; i++) { + err = mlxsw_sp_mr_rif_add(vr->mr_table[i], rif); + if (err) + goto err_mr_rif_add; + } + + if (netdev_offload_xstats_enabled(rif->dev, + NETDEV_OFFLOAD_XSTATS_TYPE_L3)) { + err = mlxsw_sp_router_port_l3_stats_enable(rif); + if (err) + goto err_stats_enable; + mlxsw_sp_router_hwstats_notify_schedule(rif->dev); + } else { + mlxsw_sp_rif_counters_alloc(rif); + } + + atomic_inc(&mlxsw_sp->router->rifs_count); + return rif; + +err_stats_enable: +err_mr_rif_add: + for (i--; i >= 0; i--) + mlxsw_sp_mr_rif_del(vr->mr_table[i], rif); + ops->deconfigure(rif); +err_configure: + if (fid) + mlxsw_sp_fid_put(fid); +err_fid_get: + mlxsw_sp->router->rifs[rif_index] = NULL; + dev_put(rif->dev); + kfree(rif); +err_rif_alloc: +err_rif_index_alloc: + vr->rif_count--; + mlxsw_sp_vr_put(mlxsw_sp, vr); + return ERR_PTR(err); +} + +static void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif) +{ + const struct mlxsw_sp_rif_ops *ops = rif->ops; + struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp; + struct mlxsw_sp_fid *fid = rif->fid; + struct mlxsw_sp_vr *vr; + int i; + + atomic_dec(&mlxsw_sp->router->rifs_count); + mlxsw_sp_router_rif_gone_sync(mlxsw_sp, rif); + vr = &mlxsw_sp->router->vrs[rif->vr_id]; + + if (netdev_offload_xstats_enabled(rif->dev, + NETDEV_OFFLOAD_XSTATS_TYPE_L3)) { + mlxsw_sp_rif_push_l3_stats(rif); + mlxsw_sp_router_port_l3_stats_disable(rif); + mlxsw_sp_router_hwstats_notify_schedule(rif->dev); + } else { + mlxsw_sp_rif_counters_free(rif); + } + + for (i = 0; i < MLXSW_SP_L3_PROTO_MAX; i++) + mlxsw_sp_mr_rif_del(vr->mr_table[i], rif); + ops->deconfigure(rif); + if (fid) + /* Loopback RIFs are not associated with a FID. */ + mlxsw_sp_fid_put(fid); + mlxsw_sp->router->rifs[rif->rif_index] = NULL; + dev_put(rif->dev); + kfree(rif); + vr->rif_count--; + mlxsw_sp_vr_put(mlxsw_sp, vr); +} + +void mlxsw_sp_rif_destroy_by_dev(struct mlxsw_sp *mlxsw_sp, + struct net_device *dev) +{ + struct mlxsw_sp_rif *rif; + + mutex_lock(&mlxsw_sp->router->lock); + rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev); + if (!rif) + goto out; + mlxsw_sp_rif_destroy(rif); +out: + mutex_unlock(&mlxsw_sp->router->lock); +} + +static void +mlxsw_sp_rif_subport_params_init(struct mlxsw_sp_rif_params *params, + struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan) +{ + struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port; + + params->vid = mlxsw_sp_port_vlan->vid; + params->lag = mlxsw_sp_port->lagged; + if (params->lag) + params->lag_id = mlxsw_sp_port->lag_id; + else + params->system_port = mlxsw_sp_port->local_port; +} + +static struct mlxsw_sp_rif_subport * +mlxsw_sp_rif_subport_rif(const struct mlxsw_sp_rif *rif) +{ + return container_of(rif, struct mlxsw_sp_rif_subport, common); +} + +static struct mlxsw_sp_rif * +mlxsw_sp_rif_subport_get(struct mlxsw_sp *mlxsw_sp, + const struct mlxsw_sp_rif_params *params, + struct netlink_ext_ack *extack) +{ + struct mlxsw_sp_rif_subport *rif_subport; + struct mlxsw_sp_rif *rif; + + rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, params->dev); + if (!rif) + return mlxsw_sp_rif_create(mlxsw_sp, params, extack); + + rif_subport = mlxsw_sp_rif_subport_rif(rif); + refcount_inc(&rif_subport->ref_count); + return rif; +} + +static void mlxsw_sp_rif_subport_put(struct mlxsw_sp_rif *rif) +{ + struct mlxsw_sp_rif_subport *rif_subport; + + rif_subport = mlxsw_sp_rif_subport_rif(rif); + if (!refcount_dec_and_test(&rif_subport->ref_count)) + return; + + mlxsw_sp_rif_destroy(rif); +} + +static int mlxsw_sp_rif_mac_profile_index_alloc(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_rif_mac_profile *profile, + struct netlink_ext_ack *extack) +{ + u8 max_rif_mac_profiles = mlxsw_sp->router->max_rif_mac_profile; + struct mlxsw_sp_router *router = mlxsw_sp->router; + int id; + + id = idr_alloc(&router->rif_mac_profiles_idr, profile, 0, + max_rif_mac_profiles, GFP_KERNEL); + + if (id >= 0) { + profile->id = id; + return 0; + } + + if (id == -ENOSPC) + NL_SET_ERR_MSG_MOD(extack, + "Exceeded number of supported router interface MAC profiles"); + + return id; +} + +static struct mlxsw_sp_rif_mac_profile * +mlxsw_sp_rif_mac_profile_index_free(struct mlxsw_sp *mlxsw_sp, u8 mac_profile) +{ + struct mlxsw_sp_rif_mac_profile *profile; + + profile = idr_remove(&mlxsw_sp->router->rif_mac_profiles_idr, + mac_profile); + WARN_ON(!profile); + return profile; +} + +static struct mlxsw_sp_rif_mac_profile * +mlxsw_sp_rif_mac_profile_alloc(const char *mac) +{ + struct mlxsw_sp_rif_mac_profile *profile; + + profile = kzalloc(sizeof(*profile), GFP_KERNEL); + if (!profile) + return NULL; + + ether_addr_copy(profile->mac_prefix, mac); + refcount_set(&profile->ref_count, 1); + return profile; +} + +static struct mlxsw_sp_rif_mac_profile * +mlxsw_sp_rif_mac_profile_find(const struct mlxsw_sp *mlxsw_sp, const char *mac) +{ + struct mlxsw_sp_router *router = mlxsw_sp->router; + struct mlxsw_sp_rif_mac_profile *profile; + int id; + + idr_for_each_entry(&router->rif_mac_profiles_idr, profile, id) { + if (ether_addr_equal_masked(profile->mac_prefix, mac, + mlxsw_sp->mac_mask)) + return profile; + } + + return NULL; +} + +static u64 mlxsw_sp_rif_mac_profiles_occ_get(void *priv) +{ + const struct mlxsw_sp *mlxsw_sp = priv; + + return atomic_read(&mlxsw_sp->router->rif_mac_profiles_count); +} + +static u64 mlxsw_sp_rifs_occ_get(void *priv) +{ + const struct mlxsw_sp *mlxsw_sp = priv; + + return atomic_read(&mlxsw_sp->router->rifs_count); +} + +static struct mlxsw_sp_rif_mac_profile * +mlxsw_sp_rif_mac_profile_create(struct mlxsw_sp *mlxsw_sp, const char *mac, + struct netlink_ext_ack *extack) +{ + struct mlxsw_sp_rif_mac_profile *profile; + int err; + + profile = mlxsw_sp_rif_mac_profile_alloc(mac); + if (!profile) + return ERR_PTR(-ENOMEM); + + err = mlxsw_sp_rif_mac_profile_index_alloc(mlxsw_sp, profile, extack); + if (err) + goto profile_index_alloc_err; + + atomic_inc(&mlxsw_sp->router->rif_mac_profiles_count); + return profile; + +profile_index_alloc_err: + kfree(profile); + return ERR_PTR(err); +} + +static void mlxsw_sp_rif_mac_profile_destroy(struct mlxsw_sp *mlxsw_sp, + u8 mac_profile) +{ + struct mlxsw_sp_rif_mac_profile *profile; + + atomic_dec(&mlxsw_sp->router->rif_mac_profiles_count); + profile = mlxsw_sp_rif_mac_profile_index_free(mlxsw_sp, mac_profile); + kfree(profile); +} + +static int mlxsw_sp_rif_mac_profile_get(struct mlxsw_sp *mlxsw_sp, + const char *mac, u8 *p_mac_profile, + struct netlink_ext_ack *extack) +{ + struct mlxsw_sp_rif_mac_profile *profile; + + profile = mlxsw_sp_rif_mac_profile_find(mlxsw_sp, mac); + if (profile) { + refcount_inc(&profile->ref_count); + goto out; + } + + profile = mlxsw_sp_rif_mac_profile_create(mlxsw_sp, mac, extack); + if (IS_ERR(profile)) + return PTR_ERR(profile); + +out: + *p_mac_profile = profile->id; + return 0; +} + +static void mlxsw_sp_rif_mac_profile_put(struct mlxsw_sp *mlxsw_sp, + u8 mac_profile) +{ + struct mlxsw_sp_rif_mac_profile *profile; + + profile = idr_find(&mlxsw_sp->router->rif_mac_profiles_idr, + mac_profile); + if (WARN_ON(!profile)) + return; + + if (!refcount_dec_and_test(&profile->ref_count)) + return; + + mlxsw_sp_rif_mac_profile_destroy(mlxsw_sp, mac_profile); +} + +static bool mlxsw_sp_rif_mac_profile_is_shared(const struct mlxsw_sp_rif *rif) +{ + struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp; + struct mlxsw_sp_rif_mac_profile *profile; + + profile = idr_find(&mlxsw_sp->router->rif_mac_profiles_idr, + rif->mac_profile_id); + if (WARN_ON(!profile)) + return false; + + return refcount_read(&profile->ref_count) > 1; +} + +static int mlxsw_sp_rif_mac_profile_edit(struct mlxsw_sp_rif *rif, + const char *new_mac) +{ + struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp; + struct mlxsw_sp_rif_mac_profile *profile; + + profile = idr_find(&mlxsw_sp->router->rif_mac_profiles_idr, + rif->mac_profile_id); + if (WARN_ON(!profile)) + return -EINVAL; + + ether_addr_copy(profile->mac_prefix, new_mac); + return 0; +} + +static int +mlxsw_sp_rif_mac_profile_replace(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_rif *rif, + const char *new_mac, + struct netlink_ext_ack *extack) +{ + u8 mac_profile; + int err; + + if (!mlxsw_sp_rif_mac_profile_is_shared(rif) && + !mlxsw_sp_rif_mac_profile_find(mlxsw_sp, new_mac)) + return mlxsw_sp_rif_mac_profile_edit(rif, new_mac); + + err = mlxsw_sp_rif_mac_profile_get(mlxsw_sp, new_mac, + &mac_profile, extack); + if (err) + return err; + + mlxsw_sp_rif_mac_profile_put(mlxsw_sp, rif->mac_profile_id); + rif->mac_profile_id = mac_profile; + return 0; +} + +static int +__mlxsw_sp_port_vlan_router_join(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan, + struct net_device *l3_dev, + struct netlink_ext_ack *extack) +{ + struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port; + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + struct mlxsw_sp_rif_params params = { + .dev = l3_dev, + }; + u16 vid = mlxsw_sp_port_vlan->vid; + struct mlxsw_sp_rif *rif; + struct mlxsw_sp_fid *fid; + int err; + + mlxsw_sp_rif_subport_params_init(¶ms, mlxsw_sp_port_vlan); + rif = mlxsw_sp_rif_subport_get(mlxsw_sp, ¶ms, extack); + if (IS_ERR(rif)) + return PTR_ERR(rif); + + /* FID was already created, just take a reference */ + fid = rif->ops->fid_get(rif, extack); + err = mlxsw_sp_fid_port_vid_map(fid, mlxsw_sp_port, vid); + if (err) + goto err_fid_port_vid_map; + + err = mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, false); + if (err) + goto err_port_vid_learning_set; + + err = mlxsw_sp_port_vid_stp_set(mlxsw_sp_port, vid, + BR_STATE_FORWARDING); + if (err) + goto err_port_vid_stp_set; + + mlxsw_sp_port_vlan->fid = fid; + + return 0; + +err_port_vid_stp_set: + mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, true); +err_port_vid_learning_set: + mlxsw_sp_fid_port_vid_unmap(fid, mlxsw_sp_port, vid); +err_fid_port_vid_map: + mlxsw_sp_fid_put(fid); + mlxsw_sp_rif_subport_put(rif); + return err; +} + +static void +__mlxsw_sp_port_vlan_router_leave(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan) +{ + struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port; + struct mlxsw_sp_fid *fid = mlxsw_sp_port_vlan->fid; + struct mlxsw_sp_rif *rif = mlxsw_sp_fid_rif(fid); + u16 vid = mlxsw_sp_port_vlan->vid; + + if (WARN_ON(mlxsw_sp_fid_type(fid) != MLXSW_SP_FID_TYPE_RFID)) + return; + + mlxsw_sp_port_vlan->fid = NULL; + mlxsw_sp_port_vid_stp_set(mlxsw_sp_port, vid, BR_STATE_BLOCKING); + mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, true); + mlxsw_sp_fid_port_vid_unmap(fid, mlxsw_sp_port, vid); + mlxsw_sp_fid_put(fid); + mlxsw_sp_rif_subport_put(rif); +} + +int +mlxsw_sp_port_vlan_router_join(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan, + struct net_device *l3_dev, + struct netlink_ext_ack *extack) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port_vlan->mlxsw_sp_port->mlxsw_sp; + struct mlxsw_sp_rif *rif; + int err = 0; + + mutex_lock(&mlxsw_sp->router->lock); + rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev); + if (!rif) + goto out; + + err = __mlxsw_sp_port_vlan_router_join(mlxsw_sp_port_vlan, l3_dev, + extack); +out: + mutex_unlock(&mlxsw_sp->router->lock); + return err; +} + +void +mlxsw_sp_port_vlan_router_leave(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port_vlan->mlxsw_sp_port->mlxsw_sp; + + mutex_lock(&mlxsw_sp->router->lock); + __mlxsw_sp_port_vlan_router_leave(mlxsw_sp_port_vlan); + mutex_unlock(&mlxsw_sp->router->lock); +} + +static int mlxsw_sp_inetaddr_port_vlan_event(struct net_device *l3_dev, + struct net_device *port_dev, + unsigned long event, u16 vid, + struct netlink_ext_ack *extack) +{ + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(port_dev); + struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan; + + mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_vid(mlxsw_sp_port, vid); + if (WARN_ON(!mlxsw_sp_port_vlan)) + return -EINVAL; + + switch (event) { + case NETDEV_UP: + return __mlxsw_sp_port_vlan_router_join(mlxsw_sp_port_vlan, + l3_dev, extack); + case NETDEV_DOWN: + __mlxsw_sp_port_vlan_router_leave(mlxsw_sp_port_vlan); + break; + } + + return 0; +} + +static int mlxsw_sp_inetaddr_port_event(struct net_device *port_dev, + unsigned long event, + struct netlink_ext_ack *extack) +{ + if (netif_is_any_bridge_port(port_dev) || netif_is_lag_port(port_dev)) + return 0; + + return mlxsw_sp_inetaddr_port_vlan_event(port_dev, port_dev, event, + MLXSW_SP_DEFAULT_VID, extack); +} + +static int __mlxsw_sp_inetaddr_lag_event(struct net_device *l3_dev, + struct net_device *lag_dev, + unsigned long event, u16 vid, + struct netlink_ext_ack *extack) +{ + struct net_device *port_dev; + struct list_head *iter; + int err; + + netdev_for_each_lower_dev(lag_dev, port_dev, iter) { + if (mlxsw_sp_port_dev_check(port_dev)) { + err = mlxsw_sp_inetaddr_port_vlan_event(l3_dev, + port_dev, + event, vid, + extack); + if (err) + return err; + } + } + + return 0; +} + +static int mlxsw_sp_inetaddr_lag_event(struct net_device *lag_dev, + unsigned long event, + struct netlink_ext_ack *extack) +{ + if (netif_is_bridge_port(lag_dev)) + return 0; + + return __mlxsw_sp_inetaddr_lag_event(lag_dev, lag_dev, event, + MLXSW_SP_DEFAULT_VID, extack); +} + +static int mlxsw_sp_inetaddr_bridge_event(struct mlxsw_sp *mlxsw_sp, + struct net_device *l3_dev, + unsigned long event, + struct netlink_ext_ack *extack) +{ + struct mlxsw_sp_rif_params params = { + .dev = l3_dev, + }; + struct mlxsw_sp_rif *rif; + + switch (event) { + case NETDEV_UP: + if (netif_is_bridge_master(l3_dev) && br_vlan_enabled(l3_dev)) { + u16 proto; + + br_vlan_get_proto(l3_dev, &proto); + if (proto == ETH_P_8021AD) { + NL_SET_ERR_MSG_MOD(extack, "Adding an IP address to 802.1ad bridge is not supported"); + return -EOPNOTSUPP; + } + } + rif = mlxsw_sp_rif_create(mlxsw_sp, ¶ms, extack); + if (IS_ERR(rif)) + return PTR_ERR(rif); + break; + case NETDEV_DOWN: + rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev); + mlxsw_sp_rif_destroy(rif); + break; + } + + return 0; +} + +static int mlxsw_sp_inetaddr_vlan_event(struct mlxsw_sp *mlxsw_sp, + struct net_device *vlan_dev, + unsigned long event, + struct netlink_ext_ack *extack) +{ + struct net_device *real_dev = vlan_dev_real_dev(vlan_dev); + u16 vid = vlan_dev_vlan_id(vlan_dev); + + if (netif_is_bridge_port(vlan_dev)) + return 0; + + if (mlxsw_sp_port_dev_check(real_dev)) + return mlxsw_sp_inetaddr_port_vlan_event(vlan_dev, real_dev, + event, vid, extack); + else if (netif_is_lag_master(real_dev)) + return __mlxsw_sp_inetaddr_lag_event(vlan_dev, real_dev, event, + vid, extack); + else if (netif_is_bridge_master(real_dev) && br_vlan_enabled(real_dev)) + return mlxsw_sp_inetaddr_bridge_event(mlxsw_sp, vlan_dev, event, + extack); + + return 0; +} + +static bool mlxsw_sp_rif_macvlan_is_vrrp4(const u8 *mac) +{ + u8 vrrp4[ETH_ALEN] = { 0x00, 0x00, 0x5e, 0x00, 0x01, 0x00 }; + u8 mask[ETH_ALEN] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0x00 }; + + return ether_addr_equal_masked(mac, vrrp4, mask); +} + +static bool mlxsw_sp_rif_macvlan_is_vrrp6(const u8 *mac) +{ + u8 vrrp6[ETH_ALEN] = { 0x00, 0x00, 0x5e, 0x00, 0x02, 0x00 }; + u8 mask[ETH_ALEN] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0x00 }; + + return ether_addr_equal_masked(mac, vrrp6, mask); +} + +static int mlxsw_sp_rif_vrrp_op(struct mlxsw_sp *mlxsw_sp, u16 rif_index, + const u8 *mac, bool adding) +{ + char ritr_pl[MLXSW_REG_RITR_LEN]; + u8 vrrp_id = adding ? mac[5] : 0; + int err; + + if (!mlxsw_sp_rif_macvlan_is_vrrp4(mac) && + !mlxsw_sp_rif_macvlan_is_vrrp6(mac)) + return 0; + + mlxsw_reg_ritr_rif_pack(ritr_pl, rif_index); + err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl); + if (err) + return err; + + if (mlxsw_sp_rif_macvlan_is_vrrp4(mac)) + mlxsw_reg_ritr_if_vrrp_id_ipv4_set(ritr_pl, vrrp_id); + else + mlxsw_reg_ritr_if_vrrp_id_ipv6_set(ritr_pl, vrrp_id); + + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl); +} + +static int mlxsw_sp_rif_macvlan_add(struct mlxsw_sp *mlxsw_sp, + const struct net_device *macvlan_dev, + struct netlink_ext_ack *extack) +{ + struct macvlan_dev *vlan = netdev_priv(macvlan_dev); + struct mlxsw_sp_rif *rif; + int err; + + rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, vlan->lowerdev); + if (!rif) { + NL_SET_ERR_MSG_MOD(extack, "macvlan is only supported on top of router interfaces"); + return -EOPNOTSUPP; + } + + err = mlxsw_sp_rif_fdb_op(mlxsw_sp, macvlan_dev->dev_addr, + mlxsw_sp_fid_index(rif->fid), true); + if (err) + return err; + + err = mlxsw_sp_rif_vrrp_op(mlxsw_sp, rif->rif_index, + macvlan_dev->dev_addr, true); + if (err) + goto err_rif_vrrp_add; + + /* Make sure the bridge driver does not have this MAC pointing at + * some other port. + */ + if (rif->ops->fdb_del) + rif->ops->fdb_del(rif, macvlan_dev->dev_addr); + + return 0; + +err_rif_vrrp_add: + mlxsw_sp_rif_fdb_op(mlxsw_sp, macvlan_dev->dev_addr, + mlxsw_sp_fid_index(rif->fid), false); + return err; +} + +static void __mlxsw_sp_rif_macvlan_del(struct mlxsw_sp *mlxsw_sp, + const struct net_device *macvlan_dev) +{ + struct macvlan_dev *vlan = netdev_priv(macvlan_dev); + struct mlxsw_sp_rif *rif; + + rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, vlan->lowerdev); + /* If we do not have a RIF, then we already took care of + * removing the macvlan's MAC during RIF deletion. + */ + if (!rif) + return; + mlxsw_sp_rif_vrrp_op(mlxsw_sp, rif->rif_index, macvlan_dev->dev_addr, + false); + mlxsw_sp_rif_fdb_op(mlxsw_sp, macvlan_dev->dev_addr, + mlxsw_sp_fid_index(rif->fid), false); +} + +void mlxsw_sp_rif_macvlan_del(struct mlxsw_sp *mlxsw_sp, + const struct net_device *macvlan_dev) +{ + mutex_lock(&mlxsw_sp->router->lock); + __mlxsw_sp_rif_macvlan_del(mlxsw_sp, macvlan_dev); + mutex_unlock(&mlxsw_sp->router->lock); +} + +static int mlxsw_sp_inetaddr_macvlan_event(struct mlxsw_sp *mlxsw_sp, + struct net_device *macvlan_dev, + unsigned long event, + struct netlink_ext_ack *extack) +{ + switch (event) { + case NETDEV_UP: + return mlxsw_sp_rif_macvlan_add(mlxsw_sp, macvlan_dev, extack); + case NETDEV_DOWN: + __mlxsw_sp_rif_macvlan_del(mlxsw_sp, macvlan_dev); + break; + } + + return 0; +} + +static int __mlxsw_sp_inetaddr_event(struct mlxsw_sp *mlxsw_sp, + struct net_device *dev, + unsigned long event, + struct netlink_ext_ack *extack) +{ + if (mlxsw_sp_port_dev_check(dev)) + return mlxsw_sp_inetaddr_port_event(dev, event, extack); + else if (netif_is_lag_master(dev)) + return mlxsw_sp_inetaddr_lag_event(dev, event, extack); + else if (netif_is_bridge_master(dev)) + return mlxsw_sp_inetaddr_bridge_event(mlxsw_sp, dev, event, + extack); + else if (is_vlan_dev(dev)) + return mlxsw_sp_inetaddr_vlan_event(mlxsw_sp, dev, event, + extack); + else if (netif_is_macvlan(dev)) + return mlxsw_sp_inetaddr_macvlan_event(mlxsw_sp, dev, event, + extack); + else + return 0; +} + +static int mlxsw_sp_inetaddr_event(struct notifier_block *nb, + unsigned long event, void *ptr) +{ + struct in_ifaddr *ifa = (struct in_ifaddr *) ptr; + struct net_device *dev = ifa->ifa_dev->dev; + struct mlxsw_sp_router *router; + struct mlxsw_sp_rif *rif; + int err = 0; + + /* NETDEV_UP event is handled by mlxsw_sp_inetaddr_valid_event */ + if (event == NETDEV_UP) + return NOTIFY_DONE; + + router = container_of(nb, struct mlxsw_sp_router, inetaddr_nb); + mutex_lock(&router->lock); + rif = mlxsw_sp_rif_find_by_dev(router->mlxsw_sp, dev); + if (!mlxsw_sp_rif_should_config(rif, dev, event)) + goto out; + + err = __mlxsw_sp_inetaddr_event(router->mlxsw_sp, dev, event, NULL); +out: + mutex_unlock(&router->lock); + return notifier_from_errno(err); +} + +int mlxsw_sp_inetaddr_valid_event(struct notifier_block *unused, + unsigned long event, void *ptr) +{ + struct in_validator_info *ivi = (struct in_validator_info *) ptr; + struct net_device *dev = ivi->ivi_dev->dev; + struct mlxsw_sp *mlxsw_sp; + struct mlxsw_sp_rif *rif; + int err = 0; + + mlxsw_sp = mlxsw_sp_lower_get(dev); + if (!mlxsw_sp) + return NOTIFY_DONE; + + mutex_lock(&mlxsw_sp->router->lock); + rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev); + if (!mlxsw_sp_rif_should_config(rif, dev, event)) + goto out; + + err = __mlxsw_sp_inetaddr_event(mlxsw_sp, dev, event, ivi->extack); +out: + mutex_unlock(&mlxsw_sp->router->lock); + return notifier_from_errno(err); +} + +struct mlxsw_sp_inet6addr_event_work { + struct work_struct work; + struct mlxsw_sp *mlxsw_sp; + struct net_device *dev; + unsigned long event; +}; + +static void mlxsw_sp_inet6addr_event_work(struct work_struct *work) +{ + struct mlxsw_sp_inet6addr_event_work *inet6addr_work = + container_of(work, struct mlxsw_sp_inet6addr_event_work, work); + struct mlxsw_sp *mlxsw_sp = inet6addr_work->mlxsw_sp; + struct net_device *dev = inet6addr_work->dev; + unsigned long event = inet6addr_work->event; + struct mlxsw_sp_rif *rif; + + rtnl_lock(); + mutex_lock(&mlxsw_sp->router->lock); + + rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev); + if (!mlxsw_sp_rif_should_config(rif, dev, event)) + goto out; + + __mlxsw_sp_inetaddr_event(mlxsw_sp, dev, event, NULL); +out: + mutex_unlock(&mlxsw_sp->router->lock); + rtnl_unlock(); + dev_put(dev); + kfree(inet6addr_work); +} + +/* Called with rcu_read_lock() */ +static int mlxsw_sp_inet6addr_event(struct notifier_block *nb, + unsigned long event, void *ptr) +{ + struct inet6_ifaddr *if6 = (struct inet6_ifaddr *) ptr; + struct mlxsw_sp_inet6addr_event_work *inet6addr_work; + struct net_device *dev = if6->idev->dev; + struct mlxsw_sp_router *router; + + /* NETDEV_UP event is handled by mlxsw_sp_inet6addr_valid_event */ + if (event == NETDEV_UP) + return NOTIFY_DONE; + + inet6addr_work = kzalloc(sizeof(*inet6addr_work), GFP_ATOMIC); + if (!inet6addr_work) + return NOTIFY_BAD; + + router = container_of(nb, struct mlxsw_sp_router, inet6addr_nb); + INIT_WORK(&inet6addr_work->work, mlxsw_sp_inet6addr_event_work); + inet6addr_work->mlxsw_sp = router->mlxsw_sp; + inet6addr_work->dev = dev; + inet6addr_work->event = event; + dev_hold(dev); + mlxsw_core_schedule_work(&inet6addr_work->work); + + return NOTIFY_DONE; +} + +int mlxsw_sp_inet6addr_valid_event(struct notifier_block *unused, + unsigned long event, void *ptr) +{ + struct in6_validator_info *i6vi = (struct in6_validator_info *) ptr; + struct net_device *dev = i6vi->i6vi_dev->dev; + struct mlxsw_sp *mlxsw_sp; + struct mlxsw_sp_rif *rif; + int err = 0; + + mlxsw_sp = mlxsw_sp_lower_get(dev); + if (!mlxsw_sp) + return NOTIFY_DONE; + + mutex_lock(&mlxsw_sp->router->lock); + rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev); + if (!mlxsw_sp_rif_should_config(rif, dev, event)) + goto out; + + err = __mlxsw_sp_inetaddr_event(mlxsw_sp, dev, event, i6vi->extack); +out: + mutex_unlock(&mlxsw_sp->router->lock); + return notifier_from_errno(err); +} + +static int mlxsw_sp_rif_edit(struct mlxsw_sp *mlxsw_sp, u16 rif_index, + const char *mac, int mtu, u8 mac_profile) +{ + char ritr_pl[MLXSW_REG_RITR_LEN]; + int err; + + mlxsw_reg_ritr_rif_pack(ritr_pl, rif_index); + err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl); + if (err) + return err; + + mlxsw_reg_ritr_mtu_set(ritr_pl, mtu); + mlxsw_reg_ritr_if_mac_memcpy_to(ritr_pl, mac); + mlxsw_reg_ritr_if_mac_profile_id_set(ritr_pl, mac_profile); + mlxsw_reg_ritr_op_set(ritr_pl, MLXSW_REG_RITR_RIF_CREATE); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl); +} + +static int +mlxsw_sp_router_port_change_event(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_rif *rif, + struct netlink_ext_ack *extack) +{ + struct net_device *dev = rif->dev; + u8 old_mac_profile; + u16 fid_index; + int err; + + fid_index = mlxsw_sp_fid_index(rif->fid); + + err = mlxsw_sp_rif_fdb_op(mlxsw_sp, rif->addr, fid_index, false); + if (err) + return err; + + old_mac_profile = rif->mac_profile_id; + err = mlxsw_sp_rif_mac_profile_replace(mlxsw_sp, rif, dev->dev_addr, + extack); + if (err) + goto err_rif_mac_profile_replace; + + err = mlxsw_sp_rif_edit(mlxsw_sp, rif->rif_index, dev->dev_addr, + dev->mtu, rif->mac_profile_id); + if (err) + goto err_rif_edit; + + err = mlxsw_sp_rif_fdb_op(mlxsw_sp, dev->dev_addr, fid_index, true); + if (err) + goto err_rif_fdb_op; + + if (rif->mtu != dev->mtu) { + struct mlxsw_sp_vr *vr; + int i; + + /* The RIF is relevant only to its mr_table instance, as unlike + * unicast routing, in multicast routing a RIF cannot be shared + * between several multicast routing tables. + */ + vr = &mlxsw_sp->router->vrs[rif->vr_id]; + for (i = 0; i < MLXSW_SP_L3_PROTO_MAX; i++) + mlxsw_sp_mr_rif_mtu_update(vr->mr_table[i], + rif, dev->mtu); + } + + ether_addr_copy(rif->addr, dev->dev_addr); + rif->mtu = dev->mtu; + + netdev_dbg(dev, "Updated RIF=%d\n", rif->rif_index); + + return 0; + +err_rif_fdb_op: + mlxsw_sp_rif_edit(mlxsw_sp, rif->rif_index, rif->addr, rif->mtu, + old_mac_profile); +err_rif_edit: + mlxsw_sp_rif_mac_profile_replace(mlxsw_sp, rif, rif->addr, extack); +err_rif_mac_profile_replace: + mlxsw_sp_rif_fdb_op(mlxsw_sp, rif->addr, fid_index, true); + return err; +} + +static int mlxsw_sp_router_port_pre_changeaddr_event(struct mlxsw_sp_rif *rif, + struct netdev_notifier_pre_changeaddr_info *info) +{ + struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp; + struct mlxsw_sp_rif_mac_profile *profile; + struct netlink_ext_ack *extack; + u8 max_rif_mac_profiles; + u64 occ; + + extack = netdev_notifier_info_to_extack(&info->info); + + profile = mlxsw_sp_rif_mac_profile_find(mlxsw_sp, info->dev_addr); + if (profile) + return 0; + + max_rif_mac_profiles = mlxsw_sp->router->max_rif_mac_profile; + occ = mlxsw_sp_rif_mac_profiles_occ_get(mlxsw_sp); + if (occ < max_rif_mac_profiles) + return 0; + + if (!mlxsw_sp_rif_mac_profile_is_shared(rif)) + return 0; + + NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported router interface MAC profiles"); + return -ENOBUFS; +} + +static bool mlxsw_sp_is_offload_xstats_event(unsigned long event) +{ + switch (event) { + case NETDEV_OFFLOAD_XSTATS_ENABLE: + case NETDEV_OFFLOAD_XSTATS_DISABLE: + case NETDEV_OFFLOAD_XSTATS_REPORT_USED: + case NETDEV_OFFLOAD_XSTATS_REPORT_DELTA: + return true; + } + + return false; +} + +static int +mlxsw_sp_router_port_offload_xstats_cmd(struct mlxsw_sp_rif *rif, + unsigned long event, + struct netdev_notifier_offload_xstats_info *info) +{ + switch (info->type) { + case NETDEV_OFFLOAD_XSTATS_TYPE_L3: + break; + default: + return 0; + } + + switch (event) { + case NETDEV_OFFLOAD_XSTATS_ENABLE: + return mlxsw_sp_router_port_l3_stats_enable(rif); + case NETDEV_OFFLOAD_XSTATS_DISABLE: + mlxsw_sp_router_port_l3_stats_disable(rif); + return 0; + case NETDEV_OFFLOAD_XSTATS_REPORT_USED: + mlxsw_sp_router_port_l3_stats_report_used(rif, info); + return 0; + case NETDEV_OFFLOAD_XSTATS_REPORT_DELTA: + return mlxsw_sp_router_port_l3_stats_report_delta(rif, info); + } + + WARN_ON_ONCE(1); + return 0; +} + +static int +mlxsw_sp_netdevice_offload_xstats_cmd(struct mlxsw_sp *mlxsw_sp, + struct net_device *dev, + unsigned long event, + struct netdev_notifier_offload_xstats_info *info) +{ + struct mlxsw_sp_rif *rif; + + rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev); + if (!rif) + return 0; + + return mlxsw_sp_router_port_offload_xstats_cmd(rif, event, info); +} + +static bool mlxsw_sp_is_router_event(unsigned long event) +{ + switch (event) { + case NETDEV_PRE_CHANGEADDR: + case NETDEV_CHANGEADDR: + case NETDEV_CHANGEMTU: + return true; + default: + return false; + } +} + +static int mlxsw_sp_netdevice_router_port_event(struct net_device *dev, + unsigned long event, void *ptr) +{ + struct netlink_ext_ack *extack = netdev_notifier_info_to_extack(ptr); + struct mlxsw_sp *mlxsw_sp; + struct mlxsw_sp_rif *rif; + + mlxsw_sp = mlxsw_sp_lower_get(dev); + if (!mlxsw_sp) + return 0; + + rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev); + if (!rif) + return 0; + + switch (event) { + case NETDEV_CHANGEMTU: + case NETDEV_CHANGEADDR: + return mlxsw_sp_router_port_change_event(mlxsw_sp, rif, extack); + case NETDEV_PRE_CHANGEADDR: + return mlxsw_sp_router_port_pre_changeaddr_event(rif, ptr); + default: + WARN_ON_ONCE(1); + break; + } + + return 0; +} + +static int mlxsw_sp_port_vrf_join(struct mlxsw_sp *mlxsw_sp, + struct net_device *l3_dev, + struct netlink_ext_ack *extack) +{ + struct mlxsw_sp_rif *rif; + + /* If netdev is already associated with a RIF, then we need to + * destroy it and create a new one with the new virtual router ID. + */ + rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev); + if (rif) + __mlxsw_sp_inetaddr_event(mlxsw_sp, l3_dev, NETDEV_DOWN, + extack); + + return __mlxsw_sp_inetaddr_event(mlxsw_sp, l3_dev, NETDEV_UP, extack); +} + +static void mlxsw_sp_port_vrf_leave(struct mlxsw_sp *mlxsw_sp, + struct net_device *l3_dev) +{ + struct mlxsw_sp_rif *rif; + + rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev); + if (!rif) + return; + __mlxsw_sp_inetaddr_event(mlxsw_sp, l3_dev, NETDEV_DOWN, NULL); +} + +static bool mlxsw_sp_is_vrf_event(unsigned long event, void *ptr) +{ + struct netdev_notifier_changeupper_info *info = ptr; + + if (event != NETDEV_PRECHANGEUPPER && event != NETDEV_CHANGEUPPER) + return false; + return netif_is_l3_master(info->upper_dev); +} + +static int +mlxsw_sp_netdevice_vrf_event(struct net_device *l3_dev, unsigned long event, + struct netdev_notifier_changeupper_info *info) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(l3_dev); + int err = 0; + + /* We do not create a RIF for a macvlan, but only use it to + * direct more MAC addresses to the router. + */ + if (!mlxsw_sp || netif_is_macvlan(l3_dev)) + return 0; + + switch (event) { + case NETDEV_PRECHANGEUPPER: + break; + case NETDEV_CHANGEUPPER: + if (info->linking) { + struct netlink_ext_ack *extack; + + extack = netdev_notifier_info_to_extack(&info->info); + err = mlxsw_sp_port_vrf_join(mlxsw_sp, l3_dev, extack); + } else { + mlxsw_sp_port_vrf_leave(mlxsw_sp, l3_dev); + } + break; + } + + return err; +} + +static int mlxsw_sp_router_netdevice_event(struct notifier_block *nb, + unsigned long event, void *ptr) +{ + struct net_device *dev = netdev_notifier_info_to_dev(ptr); + struct mlxsw_sp_router *router; + struct mlxsw_sp *mlxsw_sp; + int err = 0; + + router = container_of(nb, struct mlxsw_sp_router, netdevice_nb); + mlxsw_sp = router->mlxsw_sp; + + mutex_lock(&mlxsw_sp->router->lock); + + if (mlxsw_sp_is_offload_xstats_event(event)) + err = mlxsw_sp_netdevice_offload_xstats_cmd(mlxsw_sp, dev, + event, ptr); + else if (mlxsw_sp_netdev_is_ipip_ol(mlxsw_sp, dev)) + err = mlxsw_sp_netdevice_ipip_ol_event(mlxsw_sp, dev, + event, ptr); + else if (mlxsw_sp_netdev_is_ipip_ul(mlxsw_sp, dev)) + err = mlxsw_sp_netdevice_ipip_ul_event(mlxsw_sp, dev, + event, ptr); + else if (mlxsw_sp_is_router_event(event)) + err = mlxsw_sp_netdevice_router_port_event(dev, event, ptr); + else if (mlxsw_sp_is_vrf_event(event, ptr)) + err = mlxsw_sp_netdevice_vrf_event(dev, event, ptr); + + mutex_unlock(&mlxsw_sp->router->lock); + + return notifier_from_errno(err); +} + +static int __mlxsw_sp_rif_macvlan_flush(struct net_device *dev, + struct netdev_nested_priv *priv) +{ + struct mlxsw_sp_rif *rif = (struct mlxsw_sp_rif *)priv->data; + + if (!netif_is_macvlan(dev)) + return 0; + + return mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, dev->dev_addr, + mlxsw_sp_fid_index(rif->fid), false); +} + +static int mlxsw_sp_rif_macvlan_flush(struct mlxsw_sp_rif *rif) +{ + struct netdev_nested_priv priv = { + .data = (void *)rif, + }; + + if (!netif_is_macvlan_port(rif->dev)) + return 0; + + netdev_warn(rif->dev, "Router interface is deleted. Upper macvlans will not work\n"); + return netdev_walk_all_upper_dev_rcu(rif->dev, + __mlxsw_sp_rif_macvlan_flush, &priv); +} + +static void mlxsw_sp_rif_subport_setup(struct mlxsw_sp_rif *rif, + const struct mlxsw_sp_rif_params *params) +{ + struct mlxsw_sp_rif_subport *rif_subport; + + rif_subport = mlxsw_sp_rif_subport_rif(rif); + refcount_set(&rif_subport->ref_count, 1); + rif_subport->vid = params->vid; + rif_subport->lag = params->lag; + if (params->lag) + rif_subport->lag_id = params->lag_id; + else + rif_subport->system_port = params->system_port; +} + +static int mlxsw_sp_rif_subport_op(struct mlxsw_sp_rif *rif, bool enable) +{ + struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp; + struct mlxsw_sp_rif_subport *rif_subport; + char ritr_pl[MLXSW_REG_RITR_LEN]; + u16 efid; + + rif_subport = mlxsw_sp_rif_subport_rif(rif); + mlxsw_reg_ritr_pack(ritr_pl, enable, MLXSW_REG_RITR_SP_IF, + rif->rif_index, rif->vr_id, rif->dev->mtu); + mlxsw_reg_ritr_mac_pack(ritr_pl, rif->dev->dev_addr); + mlxsw_reg_ritr_if_mac_profile_id_set(ritr_pl, rif->mac_profile_id); + efid = mlxsw_sp_fid_index(rif->fid); + mlxsw_reg_ritr_sp_if_pack(ritr_pl, rif_subport->lag, + rif_subport->lag ? rif_subport->lag_id : + rif_subport->system_port, + efid, 0); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl); +} + +static int mlxsw_sp_rif_subport_configure(struct mlxsw_sp_rif *rif, + struct netlink_ext_ack *extack) +{ + u8 mac_profile; + int err; + + err = mlxsw_sp_rif_mac_profile_get(rif->mlxsw_sp, rif->addr, + &mac_profile, extack); + if (err) + return err; + rif->mac_profile_id = mac_profile; + + err = mlxsw_sp_rif_subport_op(rif, true); + if (err) + goto err_rif_subport_op; + + err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr, + mlxsw_sp_fid_index(rif->fid), true); + if (err) + goto err_rif_fdb_op; + + err = mlxsw_sp_fid_rif_set(rif->fid, rif); + if (err) + goto err_fid_rif_set; + + return 0; + +err_fid_rif_set: + mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr, + mlxsw_sp_fid_index(rif->fid), false); +err_rif_fdb_op: + mlxsw_sp_rif_subport_op(rif, false); +err_rif_subport_op: + mlxsw_sp_rif_mac_profile_put(rif->mlxsw_sp, mac_profile); + return err; +} + +static void mlxsw_sp_rif_subport_deconfigure(struct mlxsw_sp_rif *rif) +{ + struct mlxsw_sp_fid *fid = rif->fid; + + mlxsw_sp_fid_rif_unset(fid); + mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr, + mlxsw_sp_fid_index(fid), false); + mlxsw_sp_rif_macvlan_flush(rif); + mlxsw_sp_rif_subport_op(rif, false); + mlxsw_sp_rif_mac_profile_put(rif->mlxsw_sp, rif->mac_profile_id); +} + +static struct mlxsw_sp_fid * +mlxsw_sp_rif_subport_fid_get(struct mlxsw_sp_rif *rif, + struct netlink_ext_ack *extack) +{ + return mlxsw_sp_fid_rfid_get(rif->mlxsw_sp, rif->rif_index); +} + +static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_subport_ops = { + .type = MLXSW_SP_RIF_TYPE_SUBPORT, + .rif_size = sizeof(struct mlxsw_sp_rif_subport), + .setup = mlxsw_sp_rif_subport_setup, + .configure = mlxsw_sp_rif_subport_configure, + .deconfigure = mlxsw_sp_rif_subport_deconfigure, + .fid_get = mlxsw_sp_rif_subport_fid_get, +}; + +static int mlxsw_sp_rif_fid_op(struct mlxsw_sp_rif *rif, u16 fid, bool enable) +{ + enum mlxsw_reg_ritr_if_type type = MLXSW_REG_RITR_FID_IF; + struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp; + char ritr_pl[MLXSW_REG_RITR_LEN]; + + mlxsw_reg_ritr_pack(ritr_pl, enable, type, rif->rif_index, rif->vr_id, + rif->dev->mtu); + mlxsw_reg_ritr_mac_pack(ritr_pl, rif->dev->dev_addr); + mlxsw_reg_ritr_if_mac_profile_id_set(ritr_pl, rif->mac_profile_id); + mlxsw_reg_ritr_fid_if_fid_set(ritr_pl, fid); + + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl); +} + +u16 mlxsw_sp_router_port(const struct mlxsw_sp *mlxsw_sp) +{ + return mlxsw_core_max_ports(mlxsw_sp->core) + 1; +} + +static int mlxsw_sp_rif_fid_configure(struct mlxsw_sp_rif *rif, + struct netlink_ext_ack *extack) +{ + struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp; + u16 fid_index = mlxsw_sp_fid_index(rif->fid); + u8 mac_profile; + int err; + + err = mlxsw_sp_rif_mac_profile_get(mlxsw_sp, rif->addr, + &mac_profile, extack); + if (err) + return err; + rif->mac_profile_id = mac_profile; + + err = mlxsw_sp_rif_fid_op(rif, fid_index, true); + if (err) + goto err_rif_fid_op; + + err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC, + mlxsw_sp_router_port(mlxsw_sp), true); + if (err) + goto err_fid_mc_flood_set; + + err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC, + mlxsw_sp_router_port(mlxsw_sp), true); + if (err) + goto err_fid_bc_flood_set; + + err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr, + mlxsw_sp_fid_index(rif->fid), true); + if (err) + goto err_rif_fdb_op; + + err = mlxsw_sp_fid_rif_set(rif->fid, rif); + if (err) + goto err_fid_rif_set; + + return 0; + +err_fid_rif_set: + mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr, + mlxsw_sp_fid_index(rif->fid), false); +err_rif_fdb_op: + mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC, + mlxsw_sp_router_port(mlxsw_sp), false); +err_fid_bc_flood_set: + mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC, + mlxsw_sp_router_port(mlxsw_sp), false); +err_fid_mc_flood_set: + mlxsw_sp_rif_fid_op(rif, fid_index, false); +err_rif_fid_op: + mlxsw_sp_rif_mac_profile_put(mlxsw_sp, mac_profile); + return err; +} + +static void mlxsw_sp_rif_fid_deconfigure(struct mlxsw_sp_rif *rif) +{ + u16 fid_index = mlxsw_sp_fid_index(rif->fid); + struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp; + struct mlxsw_sp_fid *fid = rif->fid; + + mlxsw_sp_fid_rif_unset(fid); + mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr, + mlxsw_sp_fid_index(fid), false); + mlxsw_sp_rif_macvlan_flush(rif); + mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC, + mlxsw_sp_router_port(mlxsw_sp), false); + mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC, + mlxsw_sp_router_port(mlxsw_sp), false); + mlxsw_sp_rif_fid_op(rif, fid_index, false); + mlxsw_sp_rif_mac_profile_put(rif->mlxsw_sp, rif->mac_profile_id); +} + +static struct mlxsw_sp_fid * +mlxsw_sp_rif_fid_fid_get(struct mlxsw_sp_rif *rif, + struct netlink_ext_ack *extack) +{ + return mlxsw_sp_fid_8021d_get(rif->mlxsw_sp, rif->dev->ifindex); +} + +static void mlxsw_sp_rif_fid_fdb_del(struct mlxsw_sp_rif *rif, const char *mac) +{ + struct switchdev_notifier_fdb_info info = {}; + struct net_device *dev; + + dev = br_fdb_find_port(rif->dev, mac, 0); + if (!dev) + return; + + info.addr = mac; + info.vid = 0; + call_switchdev_notifiers(SWITCHDEV_FDB_DEL_TO_BRIDGE, dev, &info.info, + NULL); +} + +static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_fid_ops = { + .type = MLXSW_SP_RIF_TYPE_FID, + .rif_size = sizeof(struct mlxsw_sp_rif), + .configure = mlxsw_sp_rif_fid_configure, + .deconfigure = mlxsw_sp_rif_fid_deconfigure, + .fid_get = mlxsw_sp_rif_fid_fid_get, + .fdb_del = mlxsw_sp_rif_fid_fdb_del, +}; + +static struct mlxsw_sp_fid * +mlxsw_sp_rif_vlan_fid_get(struct mlxsw_sp_rif *rif, + struct netlink_ext_ack *extack) +{ + struct net_device *br_dev; + u16 vid; + int err; + + if (is_vlan_dev(rif->dev)) { + vid = vlan_dev_vlan_id(rif->dev); + br_dev = vlan_dev_real_dev(rif->dev); + if (WARN_ON(!netif_is_bridge_master(br_dev))) + return ERR_PTR(-EINVAL); + } else { + err = br_vlan_get_pvid(rif->dev, &vid); + if (err < 0 || !vid) { + NL_SET_ERR_MSG_MOD(extack, "Couldn't determine bridge PVID"); + return ERR_PTR(-EINVAL); + } + } + + return mlxsw_sp_fid_8021q_get(rif->mlxsw_sp, vid); +} + +static void mlxsw_sp_rif_vlan_fdb_del(struct mlxsw_sp_rif *rif, const char *mac) +{ + struct switchdev_notifier_fdb_info info = {}; + u16 vid = mlxsw_sp_fid_8021q_vid(rif->fid); + struct net_device *br_dev; + struct net_device *dev; + + br_dev = is_vlan_dev(rif->dev) ? vlan_dev_real_dev(rif->dev) : rif->dev; + dev = br_fdb_find_port(br_dev, mac, vid); + if (!dev) + return; + + info.addr = mac; + info.vid = vid; + call_switchdev_notifiers(SWITCHDEV_FDB_DEL_TO_BRIDGE, dev, &info.info, + NULL); +} + +static int mlxsw_sp_rif_vlan_op(struct mlxsw_sp_rif *rif, u16 vid, u16 efid, + bool enable) +{ + struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp; + char ritr_pl[MLXSW_REG_RITR_LEN]; + + mlxsw_reg_ritr_vlan_if_pack(ritr_pl, enable, rif->rif_index, rif->vr_id, + rif->dev->mtu, rif->dev->dev_addr, + rif->mac_profile_id, vid, efid); + + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl); +} + +static int mlxsw_sp_rif_vlan_configure(struct mlxsw_sp_rif *rif, u16 efid, + struct netlink_ext_ack *extack) +{ + u16 vid = mlxsw_sp_fid_8021q_vid(rif->fid); + struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp; + u8 mac_profile; + int err; + + err = mlxsw_sp_rif_mac_profile_get(mlxsw_sp, rif->addr, + &mac_profile, extack); + if (err) + return err; + rif->mac_profile_id = mac_profile; + + err = mlxsw_sp_rif_vlan_op(rif, vid, efid, true); + if (err) + goto err_rif_vlan_fid_op; + + err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC, + mlxsw_sp_router_port(mlxsw_sp), true); + if (err) + goto err_fid_mc_flood_set; + + err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC, + mlxsw_sp_router_port(mlxsw_sp), true); + if (err) + goto err_fid_bc_flood_set; + + err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr, + mlxsw_sp_fid_index(rif->fid), true); + if (err) + goto err_rif_fdb_op; + + err = mlxsw_sp_fid_rif_set(rif->fid, rif); + if (err) + goto err_fid_rif_set; + + return 0; + +err_fid_rif_set: + mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr, + mlxsw_sp_fid_index(rif->fid), false); +err_rif_fdb_op: + mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC, + mlxsw_sp_router_port(mlxsw_sp), false); +err_fid_bc_flood_set: + mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC, + mlxsw_sp_router_port(mlxsw_sp), false); +err_fid_mc_flood_set: + mlxsw_sp_rif_vlan_op(rif, vid, 0, false); +err_rif_vlan_fid_op: + mlxsw_sp_rif_mac_profile_put(mlxsw_sp, mac_profile); + return err; +} + +static void mlxsw_sp_rif_vlan_deconfigure(struct mlxsw_sp_rif *rif) +{ + u16 vid = mlxsw_sp_fid_8021q_vid(rif->fid); + struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp; + + mlxsw_sp_fid_rif_unset(rif->fid); + mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr, + mlxsw_sp_fid_index(rif->fid), false); + mlxsw_sp_rif_macvlan_flush(rif); + mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC, + mlxsw_sp_router_port(mlxsw_sp), false); + mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC, + mlxsw_sp_router_port(mlxsw_sp), false); + mlxsw_sp_rif_vlan_op(rif, vid, 0, false); + mlxsw_sp_rif_mac_profile_put(rif->mlxsw_sp, rif->mac_profile_id); +} + +static int mlxsw_sp1_rif_vlan_configure(struct mlxsw_sp_rif *rif, + struct netlink_ext_ack *extack) +{ + return mlxsw_sp_rif_vlan_configure(rif, 0, extack); +} + +static const struct mlxsw_sp_rif_ops mlxsw_sp1_rif_vlan_ops = { + .type = MLXSW_SP_RIF_TYPE_VLAN, + .rif_size = sizeof(struct mlxsw_sp_rif), + .configure = mlxsw_sp1_rif_vlan_configure, + .deconfigure = mlxsw_sp_rif_vlan_deconfigure, + .fid_get = mlxsw_sp_rif_vlan_fid_get, + .fdb_del = mlxsw_sp_rif_vlan_fdb_del, +}; + +static int mlxsw_sp2_rif_vlan_configure(struct mlxsw_sp_rif *rif, + struct netlink_ext_ack *extack) +{ + u16 efid = mlxsw_sp_fid_index(rif->fid); + + return mlxsw_sp_rif_vlan_configure(rif, efid, extack); +} + +static const struct mlxsw_sp_rif_ops mlxsw_sp2_rif_vlan_ops = { + .type = MLXSW_SP_RIF_TYPE_VLAN, + .rif_size = sizeof(struct mlxsw_sp_rif), + .configure = mlxsw_sp2_rif_vlan_configure, + .deconfigure = mlxsw_sp_rif_vlan_deconfigure, + .fid_get = mlxsw_sp_rif_vlan_fid_get, + .fdb_del = mlxsw_sp_rif_vlan_fdb_del, +}; + +static struct mlxsw_sp_rif_ipip_lb * +mlxsw_sp_rif_ipip_lb_rif(struct mlxsw_sp_rif *rif) +{ + return container_of(rif, struct mlxsw_sp_rif_ipip_lb, common); +} + +static void +mlxsw_sp_rif_ipip_lb_setup(struct mlxsw_sp_rif *rif, + const struct mlxsw_sp_rif_params *params) +{ + struct mlxsw_sp_rif_params_ipip_lb *params_lb; + struct mlxsw_sp_rif_ipip_lb *rif_lb; + + params_lb = container_of(params, struct mlxsw_sp_rif_params_ipip_lb, + common); + rif_lb = mlxsw_sp_rif_ipip_lb_rif(rif); + rif_lb->lb_config = params_lb->lb_config; +} + +static int +mlxsw_sp1_rif_ipip_lb_configure(struct mlxsw_sp_rif *rif, + struct netlink_ext_ack *extack) +{ + struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif); + u32 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(rif->dev); + struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp; + struct mlxsw_sp_vr *ul_vr; + int err; + + ul_vr = mlxsw_sp_vr_get(mlxsw_sp, ul_tb_id, NULL); + if (IS_ERR(ul_vr)) + return PTR_ERR(ul_vr); + + err = mlxsw_sp_rif_ipip_lb_op(lb_rif, ul_vr->id, 0, true); + if (err) + goto err_loopback_op; + + lb_rif->ul_vr_id = ul_vr->id; + lb_rif->ul_rif_id = 0; + ++ul_vr->rif_count; + return 0; + +err_loopback_op: + mlxsw_sp_vr_put(mlxsw_sp, ul_vr); + return err; +} + +static void mlxsw_sp1_rif_ipip_lb_deconfigure(struct mlxsw_sp_rif *rif) +{ + struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif); + struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp; + struct mlxsw_sp_vr *ul_vr; + + ul_vr = &mlxsw_sp->router->vrs[lb_rif->ul_vr_id]; + mlxsw_sp_rif_ipip_lb_op(lb_rif, ul_vr->id, 0, false); + + --ul_vr->rif_count; + mlxsw_sp_vr_put(mlxsw_sp, ul_vr); +} + +static const struct mlxsw_sp_rif_ops mlxsw_sp1_rif_ipip_lb_ops = { + .type = MLXSW_SP_RIF_TYPE_IPIP_LB, + .rif_size = sizeof(struct mlxsw_sp_rif_ipip_lb), + .setup = mlxsw_sp_rif_ipip_lb_setup, + .configure = mlxsw_sp1_rif_ipip_lb_configure, + .deconfigure = mlxsw_sp1_rif_ipip_lb_deconfigure, +}; + +static const struct mlxsw_sp_rif_ops *mlxsw_sp1_rif_ops_arr[] = { + [MLXSW_SP_RIF_TYPE_SUBPORT] = &mlxsw_sp_rif_subport_ops, + [MLXSW_SP_RIF_TYPE_VLAN] = &mlxsw_sp1_rif_vlan_ops, + [MLXSW_SP_RIF_TYPE_FID] = &mlxsw_sp_rif_fid_ops, + [MLXSW_SP_RIF_TYPE_IPIP_LB] = &mlxsw_sp1_rif_ipip_lb_ops, +}; + +static int +mlxsw_sp_rif_ipip_lb_ul_rif_op(struct mlxsw_sp_rif *ul_rif, bool enable) +{ + struct mlxsw_sp *mlxsw_sp = ul_rif->mlxsw_sp; + char ritr_pl[MLXSW_REG_RITR_LEN]; + + mlxsw_reg_ritr_pack(ritr_pl, enable, MLXSW_REG_RITR_LOOPBACK_IF, + ul_rif->rif_index, ul_rif->vr_id, IP_MAX_MTU); + mlxsw_reg_ritr_loopback_protocol_set(ritr_pl, + MLXSW_REG_RITR_LOOPBACK_GENERIC); + + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl); +} + +static struct mlxsw_sp_rif * +mlxsw_sp_ul_rif_create(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_vr *vr, + struct netlink_ext_ack *extack) +{ + struct mlxsw_sp_rif *ul_rif; + u16 rif_index; + int err; + + err = mlxsw_sp_rif_index_alloc(mlxsw_sp, &rif_index); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported router interfaces"); + return ERR_PTR(err); + } + + ul_rif = mlxsw_sp_rif_alloc(sizeof(*ul_rif), rif_index, vr->id, NULL); + if (!ul_rif) + return ERR_PTR(-ENOMEM); + + mlxsw_sp->router->rifs[rif_index] = ul_rif; + ul_rif->mlxsw_sp = mlxsw_sp; + err = mlxsw_sp_rif_ipip_lb_ul_rif_op(ul_rif, true); + if (err) + goto ul_rif_op_err; + + atomic_inc(&mlxsw_sp->router->rifs_count); + return ul_rif; + +ul_rif_op_err: + mlxsw_sp->router->rifs[rif_index] = NULL; + kfree(ul_rif); + return ERR_PTR(err); +} + +static void mlxsw_sp_ul_rif_destroy(struct mlxsw_sp_rif *ul_rif) +{ + struct mlxsw_sp *mlxsw_sp = ul_rif->mlxsw_sp; + + atomic_dec(&mlxsw_sp->router->rifs_count); + mlxsw_sp_rif_ipip_lb_ul_rif_op(ul_rif, false); + mlxsw_sp->router->rifs[ul_rif->rif_index] = NULL; + kfree(ul_rif); +} + +static struct mlxsw_sp_rif * +mlxsw_sp_ul_rif_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id, + struct netlink_ext_ack *extack) +{ + struct mlxsw_sp_vr *vr; + int err; + + vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id, extack); + if (IS_ERR(vr)) + return ERR_CAST(vr); + + if (refcount_inc_not_zero(&vr->ul_rif_refcnt)) + return vr->ul_rif; + + vr->ul_rif = mlxsw_sp_ul_rif_create(mlxsw_sp, vr, extack); + if (IS_ERR(vr->ul_rif)) { + err = PTR_ERR(vr->ul_rif); + goto err_ul_rif_create; + } + + vr->rif_count++; + refcount_set(&vr->ul_rif_refcnt, 1); + + return vr->ul_rif; + +err_ul_rif_create: + mlxsw_sp_vr_put(mlxsw_sp, vr); + return ERR_PTR(err); +} + +static void mlxsw_sp_ul_rif_put(struct mlxsw_sp_rif *ul_rif) +{ + struct mlxsw_sp *mlxsw_sp = ul_rif->mlxsw_sp; + struct mlxsw_sp_vr *vr; + + vr = &mlxsw_sp->router->vrs[ul_rif->vr_id]; + + if (!refcount_dec_and_test(&vr->ul_rif_refcnt)) + return; + + vr->rif_count--; + mlxsw_sp_ul_rif_destroy(ul_rif); + mlxsw_sp_vr_put(mlxsw_sp, vr); +} + +int mlxsw_sp_router_ul_rif_get(struct mlxsw_sp *mlxsw_sp, u32 ul_tb_id, + u16 *ul_rif_index) +{ + struct mlxsw_sp_rif *ul_rif; + int err = 0; + + mutex_lock(&mlxsw_sp->router->lock); + ul_rif = mlxsw_sp_ul_rif_get(mlxsw_sp, ul_tb_id, NULL); + if (IS_ERR(ul_rif)) { + err = PTR_ERR(ul_rif); + goto out; + } + *ul_rif_index = ul_rif->rif_index; +out: + mutex_unlock(&mlxsw_sp->router->lock); + return err; +} + +void mlxsw_sp_router_ul_rif_put(struct mlxsw_sp *mlxsw_sp, u16 ul_rif_index) +{ + struct mlxsw_sp_rif *ul_rif; + + mutex_lock(&mlxsw_sp->router->lock); + ul_rif = mlxsw_sp->router->rifs[ul_rif_index]; + if (WARN_ON(!ul_rif)) + goto out; + + mlxsw_sp_ul_rif_put(ul_rif); +out: + mutex_unlock(&mlxsw_sp->router->lock); +} + +static int +mlxsw_sp2_rif_ipip_lb_configure(struct mlxsw_sp_rif *rif, + struct netlink_ext_ack *extack) +{ + struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif); + u32 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(rif->dev); + struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp; + struct mlxsw_sp_rif *ul_rif; + int err; + + ul_rif = mlxsw_sp_ul_rif_get(mlxsw_sp, ul_tb_id, NULL); + if (IS_ERR(ul_rif)) + return PTR_ERR(ul_rif); + + err = mlxsw_sp_rif_ipip_lb_op(lb_rif, 0, ul_rif->rif_index, true); + if (err) + goto err_loopback_op; + + lb_rif->ul_vr_id = 0; + lb_rif->ul_rif_id = ul_rif->rif_index; + + return 0; + +err_loopback_op: + mlxsw_sp_ul_rif_put(ul_rif); + return err; +} + +static void mlxsw_sp2_rif_ipip_lb_deconfigure(struct mlxsw_sp_rif *rif) +{ + struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif); + struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp; + struct mlxsw_sp_rif *ul_rif; + + ul_rif = mlxsw_sp_rif_by_index(mlxsw_sp, lb_rif->ul_rif_id); + mlxsw_sp_rif_ipip_lb_op(lb_rif, 0, lb_rif->ul_rif_id, false); + mlxsw_sp_ul_rif_put(ul_rif); +} + +static const struct mlxsw_sp_rif_ops mlxsw_sp2_rif_ipip_lb_ops = { + .type = MLXSW_SP_RIF_TYPE_IPIP_LB, + .rif_size = sizeof(struct mlxsw_sp_rif_ipip_lb), + .setup = mlxsw_sp_rif_ipip_lb_setup, + .configure = mlxsw_sp2_rif_ipip_lb_configure, + .deconfigure = mlxsw_sp2_rif_ipip_lb_deconfigure, +}; + +static const struct mlxsw_sp_rif_ops *mlxsw_sp2_rif_ops_arr[] = { + [MLXSW_SP_RIF_TYPE_SUBPORT] = &mlxsw_sp_rif_subport_ops, + [MLXSW_SP_RIF_TYPE_VLAN] = &mlxsw_sp2_rif_vlan_ops, + [MLXSW_SP_RIF_TYPE_FID] = &mlxsw_sp_rif_fid_ops, + [MLXSW_SP_RIF_TYPE_IPIP_LB] = &mlxsw_sp2_rif_ipip_lb_ops, +}; + +static int mlxsw_sp_rifs_init(struct mlxsw_sp *mlxsw_sp) +{ + u64 max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); + struct devlink *devlink = priv_to_devlink(mlxsw_sp->core); + struct mlxsw_core *core = mlxsw_sp->core; + + if (!MLXSW_CORE_RES_VALID(core, MAX_RIF_MAC_PROFILES)) + return -EIO; + mlxsw_sp->router->max_rif_mac_profile = + MLXSW_CORE_RES_GET(core, MAX_RIF_MAC_PROFILES); + + mlxsw_sp->router->rifs = kcalloc(max_rifs, + sizeof(struct mlxsw_sp_rif *), + GFP_KERNEL); + if (!mlxsw_sp->router->rifs) + return -ENOMEM; + + idr_init(&mlxsw_sp->router->rif_mac_profiles_idr); + atomic_set(&mlxsw_sp->router->rif_mac_profiles_count, 0); + atomic_set(&mlxsw_sp->router->rifs_count, 0); + devl_resource_occ_get_register(devlink, + MLXSW_SP_RESOURCE_RIF_MAC_PROFILES, + mlxsw_sp_rif_mac_profiles_occ_get, + mlxsw_sp); + devl_resource_occ_get_register(devlink, + MLXSW_SP_RESOURCE_RIFS, + mlxsw_sp_rifs_occ_get, + mlxsw_sp); + + return 0; +} + +static void mlxsw_sp_rifs_fini(struct mlxsw_sp *mlxsw_sp) +{ + struct devlink *devlink = priv_to_devlink(mlxsw_sp->core); + int i; + + WARN_ON_ONCE(atomic_read(&mlxsw_sp->router->rifs_count)); + for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++) + WARN_ON_ONCE(mlxsw_sp->router->rifs[i]); + + devl_resource_occ_get_unregister(devlink, MLXSW_SP_RESOURCE_RIFS); + devl_resource_occ_get_unregister(devlink, + MLXSW_SP_RESOURCE_RIF_MAC_PROFILES); + WARN_ON(!idr_is_empty(&mlxsw_sp->router->rif_mac_profiles_idr)); + idr_destroy(&mlxsw_sp->router->rif_mac_profiles_idr); + kfree(mlxsw_sp->router->rifs); +} + +static int +mlxsw_sp_ipip_config_tigcr(struct mlxsw_sp *mlxsw_sp) +{ + char tigcr_pl[MLXSW_REG_TIGCR_LEN]; + + mlxsw_reg_tigcr_pack(tigcr_pl, true, 0); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(tigcr), tigcr_pl); +} + +static int mlxsw_sp_ipips_init(struct mlxsw_sp *mlxsw_sp) +{ + int err; + + INIT_LIST_HEAD(&mlxsw_sp->router->ipip_list); + + err = mlxsw_sp_ipip_ecn_encap_init(mlxsw_sp); + if (err) + return err; + err = mlxsw_sp_ipip_ecn_decap_init(mlxsw_sp); + if (err) + return err; + + return mlxsw_sp_ipip_config_tigcr(mlxsw_sp); +} + +static int mlxsw_sp1_ipips_init(struct mlxsw_sp *mlxsw_sp) +{ + mlxsw_sp->router->ipip_ops_arr = mlxsw_sp1_ipip_ops_arr; + return mlxsw_sp_ipips_init(mlxsw_sp); +} + +static int mlxsw_sp2_ipips_init(struct mlxsw_sp *mlxsw_sp) +{ + mlxsw_sp->router->ipip_ops_arr = mlxsw_sp2_ipip_ops_arr; + return mlxsw_sp_ipips_init(mlxsw_sp); +} + +static void mlxsw_sp_ipips_fini(struct mlxsw_sp *mlxsw_sp) +{ + WARN_ON(!list_empty(&mlxsw_sp->router->ipip_list)); +} + +static void mlxsw_sp_router_fib_dump_flush(struct notifier_block *nb) +{ + struct mlxsw_sp_router *router; + + /* Flush pending FIB notifications and then flush the device's + * table before requesting another dump. The FIB notification + * block is unregistered, so no need to take RTNL. + */ + mlxsw_core_flush_owq(); + router = container_of(nb, struct mlxsw_sp_router, fib_nb); + mlxsw_sp_router_fib_flush(router->mlxsw_sp); +} + +#ifdef CONFIG_IP_ROUTE_MULTIPATH +struct mlxsw_sp_mp_hash_config { + DECLARE_BITMAP(headers, __MLXSW_REG_RECR2_HEADER_CNT); + DECLARE_BITMAP(fields, __MLXSW_REG_RECR2_FIELD_CNT); + DECLARE_BITMAP(inner_headers, __MLXSW_REG_RECR2_HEADER_CNT); + DECLARE_BITMAP(inner_fields, __MLXSW_REG_RECR2_INNER_FIELD_CNT); + bool inc_parsing_depth; +}; + +#define MLXSW_SP_MP_HASH_HEADER_SET(_headers, _header) \ + bitmap_set(_headers, MLXSW_REG_RECR2_##_header, 1) + +#define MLXSW_SP_MP_HASH_FIELD_SET(_fields, _field) \ + bitmap_set(_fields, MLXSW_REG_RECR2_##_field, 1) + +#define MLXSW_SP_MP_HASH_FIELD_RANGE_SET(_fields, _field, _nr) \ + bitmap_set(_fields, MLXSW_REG_RECR2_##_field, _nr) + +static void mlxsw_sp_mp_hash_inner_l3(struct mlxsw_sp_mp_hash_config *config) +{ + unsigned long *inner_headers = config->inner_headers; + unsigned long *inner_fields = config->inner_fields; + + /* IPv4 inner */ + MLXSW_SP_MP_HASH_HEADER_SET(inner_headers, IPV4_EN_NOT_TCP_NOT_UDP); + MLXSW_SP_MP_HASH_HEADER_SET(inner_headers, IPV4_EN_TCP_UDP); + MLXSW_SP_MP_HASH_FIELD_RANGE_SET(inner_fields, INNER_IPV4_SIP0, 4); + MLXSW_SP_MP_HASH_FIELD_RANGE_SET(inner_fields, INNER_IPV4_DIP0, 4); + /* IPv6 inner */ + MLXSW_SP_MP_HASH_HEADER_SET(inner_headers, IPV6_EN_NOT_TCP_NOT_UDP); + MLXSW_SP_MP_HASH_HEADER_SET(inner_headers, IPV6_EN_TCP_UDP); + MLXSW_SP_MP_HASH_FIELD_SET(inner_fields, INNER_IPV6_SIP0_7); + MLXSW_SP_MP_HASH_FIELD_RANGE_SET(inner_fields, INNER_IPV6_SIP8, 8); + MLXSW_SP_MP_HASH_FIELD_SET(inner_fields, INNER_IPV6_DIP0_7); + MLXSW_SP_MP_HASH_FIELD_RANGE_SET(inner_fields, INNER_IPV6_DIP8, 8); + MLXSW_SP_MP_HASH_FIELD_SET(inner_fields, INNER_IPV6_NEXT_HEADER); + MLXSW_SP_MP_HASH_FIELD_SET(inner_fields, INNER_IPV6_FLOW_LABEL); +} + +static void mlxsw_sp_mp4_hash_outer_addr(struct mlxsw_sp_mp_hash_config *config) +{ + unsigned long *headers = config->headers; + unsigned long *fields = config->fields; + + MLXSW_SP_MP_HASH_HEADER_SET(headers, IPV4_EN_NOT_TCP_NOT_UDP); + MLXSW_SP_MP_HASH_HEADER_SET(headers, IPV4_EN_TCP_UDP); + MLXSW_SP_MP_HASH_FIELD_RANGE_SET(fields, IPV4_SIP0, 4); + MLXSW_SP_MP_HASH_FIELD_RANGE_SET(fields, IPV4_DIP0, 4); +} + +static void +mlxsw_sp_mp_hash_inner_custom(struct mlxsw_sp_mp_hash_config *config, + u32 hash_fields) +{ + unsigned long *inner_headers = config->inner_headers; + unsigned long *inner_fields = config->inner_fields; + + /* IPv4 Inner */ + MLXSW_SP_MP_HASH_HEADER_SET(inner_headers, IPV4_EN_NOT_TCP_NOT_UDP); + MLXSW_SP_MP_HASH_HEADER_SET(inner_headers, IPV4_EN_TCP_UDP); + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_SRC_IP) + MLXSW_SP_MP_HASH_FIELD_RANGE_SET(inner_fields, INNER_IPV4_SIP0, 4); + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_DST_IP) + MLXSW_SP_MP_HASH_FIELD_RANGE_SET(inner_fields, INNER_IPV4_DIP0, 4); + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_IP_PROTO) + MLXSW_SP_MP_HASH_FIELD_SET(inner_fields, INNER_IPV4_PROTOCOL); + /* IPv6 inner */ + MLXSW_SP_MP_HASH_HEADER_SET(inner_headers, IPV6_EN_NOT_TCP_NOT_UDP); + MLXSW_SP_MP_HASH_HEADER_SET(inner_headers, IPV6_EN_TCP_UDP); + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_SRC_IP) { + MLXSW_SP_MP_HASH_FIELD_SET(inner_fields, INNER_IPV6_SIP0_7); + MLXSW_SP_MP_HASH_FIELD_RANGE_SET(inner_fields, INNER_IPV6_SIP8, 8); + } + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_DST_IP) { + MLXSW_SP_MP_HASH_FIELD_SET(inner_fields, INNER_IPV6_DIP0_7); + MLXSW_SP_MP_HASH_FIELD_RANGE_SET(inner_fields, INNER_IPV6_DIP8, 8); + } + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_IP_PROTO) + MLXSW_SP_MP_HASH_FIELD_SET(inner_fields, INNER_IPV6_NEXT_HEADER); + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_FLOWLABEL) + MLXSW_SP_MP_HASH_FIELD_SET(inner_fields, INNER_IPV6_FLOW_LABEL); + /* L4 inner */ + MLXSW_SP_MP_HASH_HEADER_SET(inner_headers, TCP_UDP_EN_IPV4); + MLXSW_SP_MP_HASH_HEADER_SET(inner_headers, TCP_UDP_EN_IPV6); + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_SRC_PORT) + MLXSW_SP_MP_HASH_FIELD_SET(inner_fields, INNER_TCP_UDP_SPORT); + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_DST_PORT) + MLXSW_SP_MP_HASH_FIELD_SET(inner_fields, INNER_TCP_UDP_DPORT); +} + +static void mlxsw_sp_mp4_hash_init(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_mp_hash_config *config) +{ + struct net *net = mlxsw_sp_net(mlxsw_sp); + unsigned long *headers = config->headers; + unsigned long *fields = config->fields; + u32 hash_fields; + + switch (READ_ONCE(net->ipv4.sysctl_fib_multipath_hash_policy)) { + case 0: + mlxsw_sp_mp4_hash_outer_addr(config); + break; + case 1: + mlxsw_sp_mp4_hash_outer_addr(config); + MLXSW_SP_MP_HASH_HEADER_SET(headers, TCP_UDP_EN_IPV4); + MLXSW_SP_MP_HASH_FIELD_SET(fields, IPV4_PROTOCOL); + MLXSW_SP_MP_HASH_FIELD_SET(fields, TCP_UDP_SPORT); + MLXSW_SP_MP_HASH_FIELD_SET(fields, TCP_UDP_DPORT); + break; + case 2: + /* Outer */ + mlxsw_sp_mp4_hash_outer_addr(config); + /* Inner */ + mlxsw_sp_mp_hash_inner_l3(config); + break; + case 3: + hash_fields = READ_ONCE(net->ipv4.sysctl_fib_multipath_hash_fields); + /* Outer */ + MLXSW_SP_MP_HASH_HEADER_SET(headers, IPV4_EN_NOT_TCP_NOT_UDP); + MLXSW_SP_MP_HASH_HEADER_SET(headers, IPV4_EN_TCP_UDP); + MLXSW_SP_MP_HASH_HEADER_SET(headers, TCP_UDP_EN_IPV4); + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_IP) + MLXSW_SP_MP_HASH_FIELD_RANGE_SET(fields, IPV4_SIP0, 4); + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_IP) + MLXSW_SP_MP_HASH_FIELD_RANGE_SET(fields, IPV4_DIP0, 4); + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_IP_PROTO) + MLXSW_SP_MP_HASH_FIELD_SET(fields, IPV4_PROTOCOL); + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_PORT) + MLXSW_SP_MP_HASH_FIELD_SET(fields, TCP_UDP_SPORT); + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_PORT) + MLXSW_SP_MP_HASH_FIELD_SET(fields, TCP_UDP_DPORT); + /* Inner */ + mlxsw_sp_mp_hash_inner_custom(config, hash_fields); + break; + } +} + +static void mlxsw_sp_mp6_hash_outer_addr(struct mlxsw_sp_mp_hash_config *config) +{ + unsigned long *headers = config->headers; + unsigned long *fields = config->fields; + + MLXSW_SP_MP_HASH_HEADER_SET(headers, IPV6_EN_NOT_TCP_NOT_UDP); + MLXSW_SP_MP_HASH_HEADER_SET(headers, IPV6_EN_TCP_UDP); + MLXSW_SP_MP_HASH_FIELD_SET(fields, IPV6_SIP0_7); + MLXSW_SP_MP_HASH_FIELD_RANGE_SET(fields, IPV6_SIP8, 8); + MLXSW_SP_MP_HASH_FIELD_SET(fields, IPV6_DIP0_7); + MLXSW_SP_MP_HASH_FIELD_RANGE_SET(fields, IPV6_DIP8, 8); +} + +static void mlxsw_sp_mp6_hash_init(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_mp_hash_config *config) +{ + u32 hash_fields = ip6_multipath_hash_fields(mlxsw_sp_net(mlxsw_sp)); + unsigned long *headers = config->headers; + unsigned long *fields = config->fields; + + switch (ip6_multipath_hash_policy(mlxsw_sp_net(mlxsw_sp))) { + case 0: + mlxsw_sp_mp6_hash_outer_addr(config); + MLXSW_SP_MP_HASH_FIELD_SET(fields, IPV6_NEXT_HEADER); + MLXSW_SP_MP_HASH_FIELD_SET(fields, IPV6_FLOW_LABEL); + break; + case 1: + mlxsw_sp_mp6_hash_outer_addr(config); + MLXSW_SP_MP_HASH_HEADER_SET(headers, TCP_UDP_EN_IPV6); + MLXSW_SP_MP_HASH_FIELD_SET(fields, IPV6_NEXT_HEADER); + MLXSW_SP_MP_HASH_FIELD_SET(fields, TCP_UDP_SPORT); + MLXSW_SP_MP_HASH_FIELD_SET(fields, TCP_UDP_DPORT); + break; + case 2: + /* Outer */ + mlxsw_sp_mp6_hash_outer_addr(config); + MLXSW_SP_MP_HASH_FIELD_SET(fields, IPV6_NEXT_HEADER); + MLXSW_SP_MP_HASH_FIELD_SET(fields, IPV6_FLOW_LABEL); + /* Inner */ + mlxsw_sp_mp_hash_inner_l3(config); + config->inc_parsing_depth = true; + break; + case 3: + /* Outer */ + MLXSW_SP_MP_HASH_HEADER_SET(headers, IPV6_EN_NOT_TCP_NOT_UDP); + MLXSW_SP_MP_HASH_HEADER_SET(headers, IPV6_EN_TCP_UDP); + MLXSW_SP_MP_HASH_HEADER_SET(headers, TCP_UDP_EN_IPV6); + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_IP) { + MLXSW_SP_MP_HASH_FIELD_SET(fields, IPV6_SIP0_7); + MLXSW_SP_MP_HASH_FIELD_RANGE_SET(fields, IPV6_SIP8, 8); + } + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_IP) { + MLXSW_SP_MP_HASH_FIELD_SET(fields, IPV6_DIP0_7); + MLXSW_SP_MP_HASH_FIELD_RANGE_SET(fields, IPV6_DIP8, 8); + } + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_IP_PROTO) + MLXSW_SP_MP_HASH_FIELD_SET(fields, IPV6_NEXT_HEADER); + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_FLOWLABEL) + MLXSW_SP_MP_HASH_FIELD_SET(fields, IPV6_FLOW_LABEL); + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_PORT) + MLXSW_SP_MP_HASH_FIELD_SET(fields, TCP_UDP_SPORT); + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_PORT) + MLXSW_SP_MP_HASH_FIELD_SET(fields, TCP_UDP_DPORT); + /* Inner */ + mlxsw_sp_mp_hash_inner_custom(config, hash_fields); + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_MASK) + config->inc_parsing_depth = true; + break; + } +} + +static int mlxsw_sp_mp_hash_parsing_depth_adjust(struct mlxsw_sp *mlxsw_sp, + bool old_inc_parsing_depth, + bool new_inc_parsing_depth) +{ + int err; + + if (!old_inc_parsing_depth && new_inc_parsing_depth) { + err = mlxsw_sp_parsing_depth_inc(mlxsw_sp); + if (err) + return err; + mlxsw_sp->router->inc_parsing_depth = true; + } else if (old_inc_parsing_depth && !new_inc_parsing_depth) { + mlxsw_sp_parsing_depth_dec(mlxsw_sp); + mlxsw_sp->router->inc_parsing_depth = false; + } + + return 0; +} + +static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp) +{ + bool old_inc_parsing_depth, new_inc_parsing_depth; + struct mlxsw_sp_mp_hash_config config = {}; + char recr2_pl[MLXSW_REG_RECR2_LEN]; + unsigned long bit; + u32 seed; + int err; + + seed = jhash(mlxsw_sp->base_mac, sizeof(mlxsw_sp->base_mac), 0); + mlxsw_reg_recr2_pack(recr2_pl, seed); + mlxsw_sp_mp4_hash_init(mlxsw_sp, &config); + mlxsw_sp_mp6_hash_init(mlxsw_sp, &config); + + old_inc_parsing_depth = mlxsw_sp->router->inc_parsing_depth; + new_inc_parsing_depth = config.inc_parsing_depth; + err = mlxsw_sp_mp_hash_parsing_depth_adjust(mlxsw_sp, + old_inc_parsing_depth, + new_inc_parsing_depth); + if (err) + return err; + + for_each_set_bit(bit, config.headers, __MLXSW_REG_RECR2_HEADER_CNT) + mlxsw_reg_recr2_outer_header_enables_set(recr2_pl, bit, 1); + for_each_set_bit(bit, config.fields, __MLXSW_REG_RECR2_FIELD_CNT) + mlxsw_reg_recr2_outer_header_fields_enable_set(recr2_pl, bit, 1); + for_each_set_bit(bit, config.inner_headers, __MLXSW_REG_RECR2_HEADER_CNT) + mlxsw_reg_recr2_inner_header_enables_set(recr2_pl, bit, 1); + for_each_set_bit(bit, config.inner_fields, __MLXSW_REG_RECR2_INNER_FIELD_CNT) + mlxsw_reg_recr2_inner_header_fields_enable_set(recr2_pl, bit, 1); + + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(recr2), recr2_pl); + if (err) + goto err_reg_write; + + return 0; + +err_reg_write: + mlxsw_sp_mp_hash_parsing_depth_adjust(mlxsw_sp, new_inc_parsing_depth, + old_inc_parsing_depth); + return err; +} + +static void mlxsw_sp_mp_hash_fini(struct mlxsw_sp *mlxsw_sp) +{ + bool old_inc_parsing_depth = mlxsw_sp->router->inc_parsing_depth; + + mlxsw_sp_mp_hash_parsing_depth_adjust(mlxsw_sp, old_inc_parsing_depth, + false); +} +#else +static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp) +{ + return 0; +} + +static void mlxsw_sp_mp_hash_fini(struct mlxsw_sp *mlxsw_sp) +{ +} +#endif + +static int mlxsw_sp_dscp_init(struct mlxsw_sp *mlxsw_sp) +{ + char rdpm_pl[MLXSW_REG_RDPM_LEN]; + unsigned int i; + + MLXSW_REG_ZERO(rdpm, rdpm_pl); + + /* HW is determining switch priority based on DSCP-bits, but the + * kernel is still doing that based on the ToS. Since there's a + * mismatch in bits we need to make sure to translate the right + * value ToS would observe, skipping the 2 least-significant ECN bits. + */ + for (i = 0; i < MLXSW_REG_RDPM_DSCP_ENTRY_REC_MAX_COUNT; i++) + mlxsw_reg_rdpm_pack(rdpm_pl, i, rt_tos2priority(i << 2)); + + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rdpm), rdpm_pl); +} + +static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp) +{ + struct net *net = mlxsw_sp_net(mlxsw_sp); + char rgcr_pl[MLXSW_REG_RGCR_LEN]; + u64 max_rifs; + bool usp; + + if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_RIFS)) + return -EIO; + max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); + usp = READ_ONCE(net->ipv4.sysctl_ip_fwd_update_priority); + + mlxsw_reg_rgcr_pack(rgcr_pl, true, true); + mlxsw_reg_rgcr_max_router_interfaces_set(rgcr_pl, max_rifs); + mlxsw_reg_rgcr_usp_set(rgcr_pl, usp); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl); +} + +static void __mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp) +{ + char rgcr_pl[MLXSW_REG_RGCR_LEN]; + + mlxsw_reg_rgcr_pack(rgcr_pl, false, false); + mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl); +} + +static int mlxsw_sp_lb_rif_init(struct mlxsw_sp *mlxsw_sp) +{ + u16 lb_rif_index; + int err; + + /* Create a generic loopback RIF associated with the main table + * (default VRF). Any table can be used, but the main table exists + * anyway, so we do not waste resources. + */ + err = mlxsw_sp_router_ul_rif_get(mlxsw_sp, RT_TABLE_MAIN, + &lb_rif_index); + if (err) + return err; + + mlxsw_sp->router->lb_rif_index = lb_rif_index; + + return 0; +} + +static void mlxsw_sp_lb_rif_fini(struct mlxsw_sp *mlxsw_sp) +{ + mlxsw_sp_router_ul_rif_put(mlxsw_sp, mlxsw_sp->router->lb_rif_index); +} + +static int mlxsw_sp1_router_init(struct mlxsw_sp *mlxsw_sp) +{ + size_t size_ranges_count = ARRAY_SIZE(mlxsw_sp1_adj_grp_size_ranges); + + mlxsw_sp->router->rif_ops_arr = mlxsw_sp1_rif_ops_arr; + mlxsw_sp->router->adj_grp_size_ranges = mlxsw_sp1_adj_grp_size_ranges; + mlxsw_sp->router->adj_grp_size_ranges_count = size_ranges_count; + + return 0; +} + +const struct mlxsw_sp_router_ops mlxsw_sp1_router_ops = { + .init = mlxsw_sp1_router_init, + .ipips_init = mlxsw_sp1_ipips_init, +}; + +static int mlxsw_sp2_router_init(struct mlxsw_sp *mlxsw_sp) +{ + size_t size_ranges_count = ARRAY_SIZE(mlxsw_sp2_adj_grp_size_ranges); + + mlxsw_sp->router->rif_ops_arr = mlxsw_sp2_rif_ops_arr; + mlxsw_sp->router->adj_grp_size_ranges = mlxsw_sp2_adj_grp_size_ranges; + mlxsw_sp->router->adj_grp_size_ranges_count = size_ranges_count; + + return 0; +} + +const struct mlxsw_sp_router_ops mlxsw_sp2_router_ops = { + .init = mlxsw_sp2_router_init, + .ipips_init = mlxsw_sp2_ipips_init, +}; + +int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp, + struct netlink_ext_ack *extack) +{ + struct mlxsw_sp_router *router; + int err; + + router = kzalloc(sizeof(*mlxsw_sp->router), GFP_KERNEL); + if (!router) + return -ENOMEM; + mutex_init(&router->lock); + mlxsw_sp->router = router; + router->mlxsw_sp = mlxsw_sp; + + err = mlxsw_sp->router_ops->init(mlxsw_sp); + if (err) + goto err_router_ops_init; + + INIT_LIST_HEAD(&mlxsw_sp->router->nh_res_grp_list); + INIT_DELAYED_WORK(&mlxsw_sp->router->nh_grp_activity_dw, + mlxsw_sp_nh_grp_activity_work); + INIT_LIST_HEAD(&mlxsw_sp->router->nexthop_neighs_list); + err = __mlxsw_sp_router_init(mlxsw_sp); + if (err) + goto err_router_init; + + err = mlxsw_sp_rifs_init(mlxsw_sp); + if (err) + goto err_rifs_init; + + err = mlxsw_sp->router_ops->ipips_init(mlxsw_sp); + if (err) + goto err_ipips_init; + + err = rhashtable_init(&mlxsw_sp->router->nexthop_ht, + &mlxsw_sp_nexthop_ht_params); + if (err) + goto err_nexthop_ht_init; + + err = rhashtable_init(&mlxsw_sp->router->nexthop_group_ht, + &mlxsw_sp_nexthop_group_ht_params); + if (err) + goto err_nexthop_group_ht_init; + + INIT_LIST_HEAD(&mlxsw_sp->router->nexthop_list); + err = mlxsw_sp_lpm_init(mlxsw_sp); + if (err) + goto err_lpm_init; + + err = mlxsw_sp_mr_init(mlxsw_sp, &mlxsw_sp_mr_tcam_ops); + if (err) + goto err_mr_init; + + err = mlxsw_sp_vrs_init(mlxsw_sp); + if (err) + goto err_vrs_init; + + err = mlxsw_sp_lb_rif_init(mlxsw_sp); + if (err) + goto err_lb_rif_init; + + err = mlxsw_sp_neigh_init(mlxsw_sp); + if (err) + goto err_neigh_init; + + err = mlxsw_sp_mp_hash_init(mlxsw_sp); + if (err) + goto err_mp_hash_init; + + err = mlxsw_sp_dscp_init(mlxsw_sp); + if (err) + goto err_dscp_init; + + router->inetaddr_nb.notifier_call = mlxsw_sp_inetaddr_event; + err = register_inetaddr_notifier(&router->inetaddr_nb); + if (err) + goto err_register_inetaddr_notifier; + + router->inet6addr_nb.notifier_call = mlxsw_sp_inet6addr_event; + err = register_inet6addr_notifier(&router->inet6addr_nb); + if (err) + goto err_register_inet6addr_notifier; + + mlxsw_sp->router->netevent_nb.notifier_call = + mlxsw_sp_router_netevent_event; + err = register_netevent_notifier(&mlxsw_sp->router->netevent_nb); + if (err) + goto err_register_netevent_notifier; + + mlxsw_sp->router->nexthop_nb.notifier_call = + mlxsw_sp_nexthop_obj_event; + err = register_nexthop_notifier(mlxsw_sp_net(mlxsw_sp), + &mlxsw_sp->router->nexthop_nb, + extack); + if (err) + goto err_register_nexthop_notifier; + + mlxsw_sp->router->fib_nb.notifier_call = mlxsw_sp_router_fib_event; + err = register_fib_notifier(mlxsw_sp_net(mlxsw_sp), + &mlxsw_sp->router->fib_nb, + mlxsw_sp_router_fib_dump_flush, extack); + if (err) + goto err_register_fib_notifier; + + mlxsw_sp->router->netdevice_nb.notifier_call = + mlxsw_sp_router_netdevice_event; + err = register_netdevice_notifier_net(mlxsw_sp_net(mlxsw_sp), + &mlxsw_sp->router->netdevice_nb); + if (err) + goto err_register_netdev_notifier; + + return 0; + +err_register_netdev_notifier: + unregister_fib_notifier(mlxsw_sp_net(mlxsw_sp), + &mlxsw_sp->router->fib_nb); +err_register_fib_notifier: + unregister_nexthop_notifier(mlxsw_sp_net(mlxsw_sp), + &mlxsw_sp->router->nexthop_nb); +err_register_nexthop_notifier: + unregister_netevent_notifier(&mlxsw_sp->router->netevent_nb); +err_register_netevent_notifier: + unregister_inet6addr_notifier(&router->inet6addr_nb); +err_register_inet6addr_notifier: + unregister_inetaddr_notifier(&router->inetaddr_nb); +err_register_inetaddr_notifier: + mlxsw_core_flush_owq(); +err_dscp_init: + mlxsw_sp_mp_hash_fini(mlxsw_sp); +err_mp_hash_init: + mlxsw_sp_neigh_fini(mlxsw_sp); +err_neigh_init: + mlxsw_sp_lb_rif_fini(mlxsw_sp); +err_lb_rif_init: + mlxsw_sp_vrs_fini(mlxsw_sp); +err_vrs_init: + mlxsw_sp_mr_fini(mlxsw_sp); +err_mr_init: + mlxsw_sp_lpm_fini(mlxsw_sp); +err_lpm_init: + rhashtable_destroy(&mlxsw_sp->router->nexthop_group_ht); +err_nexthop_group_ht_init: + rhashtable_destroy(&mlxsw_sp->router->nexthop_ht); +err_nexthop_ht_init: + mlxsw_sp_ipips_fini(mlxsw_sp); +err_ipips_init: + mlxsw_sp_rifs_fini(mlxsw_sp); +err_rifs_init: + __mlxsw_sp_router_fini(mlxsw_sp); +err_router_init: + cancel_delayed_work_sync(&mlxsw_sp->router->nh_grp_activity_dw); +err_router_ops_init: + mutex_destroy(&mlxsw_sp->router->lock); + kfree(mlxsw_sp->router); + return err; +} + +void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp) +{ + unregister_netdevice_notifier_net(mlxsw_sp_net(mlxsw_sp), + &mlxsw_sp->router->netdevice_nb); + unregister_fib_notifier(mlxsw_sp_net(mlxsw_sp), + &mlxsw_sp->router->fib_nb); + unregister_nexthop_notifier(mlxsw_sp_net(mlxsw_sp), + &mlxsw_sp->router->nexthop_nb); + unregister_netevent_notifier(&mlxsw_sp->router->netevent_nb); + unregister_inet6addr_notifier(&mlxsw_sp->router->inet6addr_nb); + unregister_inetaddr_notifier(&mlxsw_sp->router->inetaddr_nb); + mlxsw_core_flush_owq(); + mlxsw_sp_mp_hash_fini(mlxsw_sp); + mlxsw_sp_neigh_fini(mlxsw_sp); + mlxsw_sp_lb_rif_fini(mlxsw_sp); + mlxsw_sp_vrs_fini(mlxsw_sp); + mlxsw_sp_mr_fini(mlxsw_sp); + mlxsw_sp_lpm_fini(mlxsw_sp); + rhashtable_destroy(&mlxsw_sp->router->nexthop_group_ht); + rhashtable_destroy(&mlxsw_sp->router->nexthop_ht); + mlxsw_sp_ipips_fini(mlxsw_sp); + mlxsw_sp_rifs_fini(mlxsw_sp); + __mlxsw_sp_router_fini(mlxsw_sp); + cancel_delayed_work_sync(&mlxsw_sp->router->nh_grp_activity_dw); + mutex_destroy(&mlxsw_sp->router->lock); + kfree(mlxsw_sp->router); +} diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h new file mode 100644 index 000000000..c5dfb972b --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h @@ -0,0 +1,166 @@ +/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 */ +/* Copyright (c) 2017-2018 Mellanox Technologies. All rights reserved */ + +#ifndef _MLXSW_ROUTER_H_ +#define _MLXSW_ROUTER_H_ + +#include "spectrum.h" +#include "reg.h" + +struct mlxsw_sp_router_nve_decap { + u32 ul_tb_id; + u32 tunnel_index; + enum mlxsw_sp_l3proto ul_proto; + union mlxsw_sp_l3addr ul_sip; + u8 valid:1; +}; + +struct mlxsw_sp_router { + struct mlxsw_sp *mlxsw_sp; + struct mlxsw_sp_rif **rifs; + struct idr rif_mac_profiles_idr; + atomic_t rif_mac_profiles_count; + atomic_t rifs_count; + u8 max_rif_mac_profile; + struct mlxsw_sp_vr *vrs; + struct rhashtable neigh_ht; + struct rhashtable nexthop_group_ht; + struct rhashtable nexthop_ht; + struct list_head nexthop_list; + struct { + /* One tree for each protocol: IPv4 and IPv6 */ + struct mlxsw_sp_lpm_tree *proto_trees[2]; + struct mlxsw_sp_lpm_tree *trees; + unsigned int tree_count; + } lpm; + struct { + struct delayed_work dw; + unsigned long interval; /* ms */ + atomic_t neigh_count; + } neighs_update; + struct delayed_work nexthop_probe_dw; +#define MLXSW_SP_UNRESOLVED_NH_PROBE_INTERVAL 5000 /* ms */ + struct list_head nexthop_neighs_list; + struct list_head ipip_list; + struct notifier_block nexthop_nb; + struct notifier_block fib_nb; + struct notifier_block netevent_nb; + struct notifier_block inetaddr_nb; + struct notifier_block inet6addr_nb; + struct notifier_block netdevice_nb; + const struct mlxsw_sp_rif_ops **rif_ops_arr; + const struct mlxsw_sp_ipip_ops **ipip_ops_arr; + struct mlxsw_sp_router_nve_decap nve_decap_config; + struct mutex lock; /* Protects shared router resources */ + struct mlxsw_sp_fib_entry_op_ctx *ll_op_ctx; + u16 lb_rif_index; + const struct mlxsw_sp_adj_grp_size_range *adj_grp_size_ranges; + size_t adj_grp_size_ranges_count; + struct delayed_work nh_grp_activity_dw; + struct list_head nh_res_grp_list; + bool inc_parsing_depth; + refcount_t num_groups; + u32 adj_trap_index; +}; + +struct mlxsw_sp_rif_ipip_lb; +struct mlxsw_sp_rif_ipip_lb_config { + enum mlxsw_reg_ritr_loopback_ipip_type lb_ipipt; + u32 okey; + enum mlxsw_sp_l3proto ul_protocol; /* Underlay. */ + union mlxsw_sp_l3addr saddr; +}; + +enum mlxsw_sp_rif_counter_dir { + MLXSW_SP_RIF_COUNTER_INGRESS, + MLXSW_SP_RIF_COUNTER_EGRESS, +}; + +struct mlxsw_sp_neigh_entry; +struct mlxsw_sp_nexthop; +struct mlxsw_sp_ipip_entry; + +struct mlxsw_sp_rif *mlxsw_sp_rif_by_index(const struct mlxsw_sp *mlxsw_sp, + u16 rif_index); +u16 mlxsw_sp_ipip_lb_rif_index(const struct mlxsw_sp_rif_ipip_lb *rif); +u16 mlxsw_sp_ipip_lb_ul_vr_id(const struct mlxsw_sp_rif_ipip_lb *rif); +u16 mlxsw_sp_ipip_lb_ul_rif_id(const struct mlxsw_sp_rif_ipip_lb *lb_rif); +u32 mlxsw_sp_ipip_dev_ul_tb_id(const struct net_device *ol_dev); +int mlxsw_sp_rif_dev_ifindex(const struct mlxsw_sp_rif *rif); +const struct net_device *mlxsw_sp_rif_dev(const struct mlxsw_sp_rif *rif); +int mlxsw_sp_rif_counter_value_get(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_rif *rif, + enum mlxsw_sp_rif_counter_dir dir, + u64 *cnt); +void mlxsw_sp_rif_counter_free(struct mlxsw_sp_rif *rif, + enum mlxsw_sp_rif_counter_dir dir); +int mlxsw_sp_rif_counter_alloc(struct mlxsw_sp_rif *rif, + enum mlxsw_sp_rif_counter_dir dir); +struct mlxsw_sp_neigh_entry * +mlxsw_sp_rif_neigh_next(struct mlxsw_sp_rif *rif, + struct mlxsw_sp_neigh_entry *neigh_entry); +int mlxsw_sp_neigh_entry_type(struct mlxsw_sp_neigh_entry *neigh_entry); +unsigned char * +mlxsw_sp_neigh_entry_ha(struct mlxsw_sp_neigh_entry *neigh_entry); +u32 mlxsw_sp_neigh4_entry_dip(struct mlxsw_sp_neigh_entry *neigh_entry); +struct in6_addr * +mlxsw_sp_neigh6_entry_dip(struct mlxsw_sp_neigh_entry *neigh_entry); + +#define mlxsw_sp_rif_neigh_for_each(neigh_entry, rif) \ + for (neigh_entry = mlxsw_sp_rif_neigh_next(rif, NULL); neigh_entry; \ + neigh_entry = mlxsw_sp_rif_neigh_next(rif, neigh_entry)) +int mlxsw_sp_neigh_counter_get(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_neigh_entry *neigh_entry, + u64 *p_counter); +void +mlxsw_sp_neigh_entry_counter_update(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_neigh_entry *neigh_entry, + bool adding); +bool mlxsw_sp_neigh_ipv6_ignore(struct mlxsw_sp_neigh_entry *neigh_entry); +int __mlxsw_sp_ipip_entry_update_tunnel(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_ipip_entry *ipip_entry, + bool recreate_loopback, + bool keep_encap, + bool update_nexthops, + struct netlink_ext_ack *extack); +void mlxsw_sp_ipip_entry_demote_tunnel(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_ipip_entry *ipip_entry); +bool +mlxsw_sp_ipip_demote_tunnel_by_saddr(struct mlxsw_sp *mlxsw_sp, + enum mlxsw_sp_l3proto ul_proto, + union mlxsw_sp_l3addr saddr, + u32 ul_tb_id, + const struct mlxsw_sp_ipip_entry *except); +struct mlxsw_sp_nexthop *mlxsw_sp_nexthop_next(struct mlxsw_sp_router *router, + struct mlxsw_sp_nexthop *nh); +bool mlxsw_sp_nexthop_is_forward(const struct mlxsw_sp_nexthop *nh); +unsigned char *mlxsw_sp_nexthop_ha(struct mlxsw_sp_nexthop *nh); +int mlxsw_sp_nexthop_indexes(struct mlxsw_sp_nexthop *nh, u32 *p_adj_index, + u32 *p_adj_size, u32 *p_adj_hash_index); +struct mlxsw_sp_rif *mlxsw_sp_nexthop_rif(struct mlxsw_sp_nexthop *nh); +bool mlxsw_sp_nexthop_group_has_ipip(struct mlxsw_sp_nexthop *nh); +#define mlxsw_sp_nexthop_for_each(nh, router) \ + for (nh = mlxsw_sp_nexthop_next(router, NULL); nh; \ + nh = mlxsw_sp_nexthop_next(router, nh)) +int mlxsw_sp_nexthop_counter_get(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop *nh, u64 *p_counter); +int mlxsw_sp_nexthop_eth_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index, + struct mlxsw_sp_nexthop *nh, bool force, + char *ratr_pl); +void mlxsw_sp_nexthop_counter_alloc(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop *nh); +void mlxsw_sp_nexthop_counter_free(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop *nh); + +static inline bool mlxsw_sp_l3addr_eq(const union mlxsw_sp_l3addr *addr1, + const union mlxsw_sp_l3addr *addr2) +{ + return !memcmp(addr1, addr2, sizeof(*addr1)); +} + +int mlxsw_sp_ipip_ecn_encap_init(struct mlxsw_sp *mlxsw_sp); +int mlxsw_sp_ipip_ecn_decap_init(struct mlxsw_sp *mlxsw_sp); +struct net_device * +mlxsw_sp_ipip_netdev_ul_dev_get(const struct net_device *ol_dev); + +#endif /* _MLXSW_ROUTER_H_*/ diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c new file mode 100644 index 000000000..b3472fb94 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c @@ -0,0 +1,1741 @@ +// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 +/* Copyright (c) 2018 Mellanox Technologies. All rights reserved */ + +#include <linux/if_bridge.h> +#include <linux/list.h> +#include <linux/mutex.h> +#include <linux/refcount.h> +#include <linux/rtnetlink.h> +#include <linux/workqueue.h> +#include <net/arp.h> +#include <net/gre.h> +#include <net/lag.h> +#include <net/ndisc.h> +#include <net/ip6_tunnel.h> + +#include "spectrum.h" +#include "spectrum_ipip.h" +#include "spectrum_span.h" +#include "spectrum_switchdev.h" + +struct mlxsw_sp_span { + struct work_struct work; + struct mlxsw_sp *mlxsw_sp; + const struct mlxsw_sp_span_trigger_ops **span_trigger_ops_arr; + const struct mlxsw_sp_span_entry_ops **span_entry_ops_arr; + size_t span_entry_ops_arr_size; + struct list_head analyzed_ports_list; + struct mutex analyzed_ports_lock; /* Protects analyzed_ports_list */ + struct list_head trigger_entries_list; + u16 policer_id_base; + refcount_t policer_id_base_ref_count; + atomic_t active_entries_count; + int entries_count; + struct mlxsw_sp_span_entry entries[]; +}; + +struct mlxsw_sp_span_analyzed_port { + struct list_head list; /* Member of analyzed_ports_list */ + refcount_t ref_count; + u16 local_port; + bool ingress; +}; + +struct mlxsw_sp_span_trigger_entry { + struct list_head list; /* Member of trigger_entries_list */ + struct mlxsw_sp_span *span; + const struct mlxsw_sp_span_trigger_ops *ops; + refcount_t ref_count; + u16 local_port; + enum mlxsw_sp_span_trigger trigger; + struct mlxsw_sp_span_trigger_parms parms; +}; + +enum mlxsw_sp_span_trigger_type { + MLXSW_SP_SPAN_TRIGGER_TYPE_PORT, + MLXSW_SP_SPAN_TRIGGER_TYPE_GLOBAL, +}; + +struct mlxsw_sp_span_trigger_ops { + int (*bind)(struct mlxsw_sp_span_trigger_entry *trigger_entry); + void (*unbind)(struct mlxsw_sp_span_trigger_entry *trigger_entry); + bool (*matches)(struct mlxsw_sp_span_trigger_entry *trigger_entry, + enum mlxsw_sp_span_trigger trigger, + struct mlxsw_sp_port *mlxsw_sp_port); + int (*enable)(struct mlxsw_sp_span_trigger_entry *trigger_entry, + struct mlxsw_sp_port *mlxsw_sp_port, u8 tc); + void (*disable)(struct mlxsw_sp_span_trigger_entry *trigger_entry, + struct mlxsw_sp_port *mlxsw_sp_port, u8 tc); +}; + +static void mlxsw_sp_span_respin_work(struct work_struct *work); + +static u64 mlxsw_sp_span_occ_get(void *priv) +{ + const struct mlxsw_sp *mlxsw_sp = priv; + + return atomic_read(&mlxsw_sp->span->active_entries_count); +} + +int mlxsw_sp_span_init(struct mlxsw_sp *mlxsw_sp) +{ + struct devlink *devlink = priv_to_devlink(mlxsw_sp->core); + struct mlxsw_sp_span *span; + int i, entries_count, err; + + if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_SPAN)) + return -EIO; + + entries_count = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_SPAN); + span = kzalloc(struct_size(span, entries, entries_count), GFP_KERNEL); + if (!span) + return -ENOMEM; + refcount_set(&span->policer_id_base_ref_count, 0); + span->entries_count = entries_count; + atomic_set(&span->active_entries_count, 0); + mutex_init(&span->analyzed_ports_lock); + INIT_LIST_HEAD(&span->analyzed_ports_list); + INIT_LIST_HEAD(&span->trigger_entries_list); + span->mlxsw_sp = mlxsw_sp; + mlxsw_sp->span = span; + + for (i = 0; i < mlxsw_sp->span->entries_count; i++) + mlxsw_sp->span->entries[i].id = i; + + err = mlxsw_sp->span_ops->init(mlxsw_sp); + if (err) + goto err_init; + + devl_resource_occ_get_register(devlink, MLXSW_SP_RESOURCE_SPAN, + mlxsw_sp_span_occ_get, mlxsw_sp); + INIT_WORK(&span->work, mlxsw_sp_span_respin_work); + + return 0; + +err_init: + mutex_destroy(&mlxsw_sp->span->analyzed_ports_lock); + kfree(mlxsw_sp->span); + return err; +} + +void mlxsw_sp_span_fini(struct mlxsw_sp *mlxsw_sp) +{ + struct devlink *devlink = priv_to_devlink(mlxsw_sp->core); + + cancel_work_sync(&mlxsw_sp->span->work); + devl_resource_occ_get_unregister(devlink, MLXSW_SP_RESOURCE_SPAN); + + WARN_ON_ONCE(!list_empty(&mlxsw_sp->span->trigger_entries_list)); + WARN_ON_ONCE(!list_empty(&mlxsw_sp->span->analyzed_ports_list)); + mutex_destroy(&mlxsw_sp->span->analyzed_ports_lock); + kfree(mlxsw_sp->span); +} + +static bool mlxsw_sp1_span_cpu_can_handle(const struct net_device *dev) +{ + return !dev; +} + +static int mlxsw_sp1_span_entry_cpu_parms(struct mlxsw_sp *mlxsw_sp, + const struct net_device *to_dev, + struct mlxsw_sp_span_parms *sparmsp) +{ + return -EOPNOTSUPP; +} + +static int +mlxsw_sp1_span_entry_cpu_configure(struct mlxsw_sp_span_entry *span_entry, + struct mlxsw_sp_span_parms sparms) +{ + return -EOPNOTSUPP; +} + +static void +mlxsw_sp1_span_entry_cpu_deconfigure(struct mlxsw_sp_span_entry *span_entry) +{ +} + +static const +struct mlxsw_sp_span_entry_ops mlxsw_sp1_span_entry_ops_cpu = { + .is_static = true, + .can_handle = mlxsw_sp1_span_cpu_can_handle, + .parms_set = mlxsw_sp1_span_entry_cpu_parms, + .configure = mlxsw_sp1_span_entry_cpu_configure, + .deconfigure = mlxsw_sp1_span_entry_cpu_deconfigure, +}; + +static int +mlxsw_sp_span_entry_phys_parms(struct mlxsw_sp *mlxsw_sp, + const struct net_device *to_dev, + struct mlxsw_sp_span_parms *sparmsp) +{ + sparmsp->dest_port = netdev_priv(to_dev); + return 0; +} + +static int +mlxsw_sp_span_entry_phys_configure(struct mlxsw_sp_span_entry *span_entry, + struct mlxsw_sp_span_parms sparms) +{ + struct mlxsw_sp_port *dest_port = sparms.dest_port; + struct mlxsw_sp *mlxsw_sp = dest_port->mlxsw_sp; + u16 local_port = dest_port->local_port; + char mpat_pl[MLXSW_REG_MPAT_LEN]; + int pa_id = span_entry->id; + + /* Create a new port analayzer entry for local_port. */ + mlxsw_reg_mpat_pack(mpat_pl, pa_id, local_port, true, + MLXSW_REG_MPAT_SPAN_TYPE_LOCAL_ETH); + mlxsw_reg_mpat_session_id_set(mpat_pl, sparms.session_id); + mlxsw_reg_mpat_pide_set(mpat_pl, sparms.policer_enable); + mlxsw_reg_mpat_pid_set(mpat_pl, sparms.policer_id); + + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mpat), mpat_pl); +} + +static void +mlxsw_sp_span_entry_deconfigure_common(struct mlxsw_sp_span_entry *span_entry, + enum mlxsw_reg_mpat_span_type span_type) +{ + struct mlxsw_sp_port *dest_port = span_entry->parms.dest_port; + struct mlxsw_sp *mlxsw_sp = dest_port->mlxsw_sp; + u16 local_port = dest_port->local_port; + char mpat_pl[MLXSW_REG_MPAT_LEN]; + int pa_id = span_entry->id; + + mlxsw_reg_mpat_pack(mpat_pl, pa_id, local_port, false, span_type); + mlxsw_reg_mpat_session_id_set(mpat_pl, span_entry->parms.session_id); + mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mpat), mpat_pl); +} + +static void +mlxsw_sp_span_entry_phys_deconfigure(struct mlxsw_sp_span_entry *span_entry) +{ + mlxsw_sp_span_entry_deconfigure_common(span_entry, + MLXSW_REG_MPAT_SPAN_TYPE_LOCAL_ETH); +} + +static const +struct mlxsw_sp_span_entry_ops mlxsw_sp_span_entry_ops_phys = { + .is_static = true, + .can_handle = mlxsw_sp_port_dev_check, + .parms_set = mlxsw_sp_span_entry_phys_parms, + .configure = mlxsw_sp_span_entry_phys_configure, + .deconfigure = mlxsw_sp_span_entry_phys_deconfigure, +}; + +static int mlxsw_sp_span_dmac(struct neigh_table *tbl, + const void *pkey, + struct net_device *dev, + unsigned char dmac[ETH_ALEN]) +{ + struct neighbour *neigh = neigh_lookup(tbl, pkey, dev); + int err = 0; + + if (!neigh) { + neigh = neigh_create(tbl, pkey, dev); + if (IS_ERR(neigh)) + return PTR_ERR(neigh); + } + + neigh_event_send(neigh, NULL); + + read_lock_bh(&neigh->lock); + if ((neigh->nud_state & NUD_VALID) && !neigh->dead) + memcpy(dmac, neigh->ha, ETH_ALEN); + else + err = -ENOENT; + read_unlock_bh(&neigh->lock); + + neigh_release(neigh); + return err; +} + +static int +mlxsw_sp_span_entry_unoffloadable(struct mlxsw_sp_span_parms *sparmsp) +{ + sparmsp->dest_port = NULL; + return 0; +} + +static struct net_device * +mlxsw_sp_span_entry_bridge_8021q(const struct net_device *br_dev, + unsigned char *dmac, + u16 *p_vid) +{ + struct bridge_vlan_info vinfo; + struct net_device *edev; + u16 vid = *p_vid; + + if (!vid && WARN_ON(br_vlan_get_pvid(br_dev, &vid))) + return NULL; + if (!vid || br_vlan_get_info(br_dev, vid, &vinfo) || + !(vinfo.flags & BRIDGE_VLAN_INFO_BRENTRY)) + return NULL; + + edev = br_fdb_find_port(br_dev, dmac, vid); + if (!edev) + return NULL; + + if (br_vlan_get_info(edev, vid, &vinfo)) + return NULL; + if (vinfo.flags & BRIDGE_VLAN_INFO_UNTAGGED) + *p_vid = 0; + else + *p_vid = vid; + return edev; +} + +static struct net_device * +mlxsw_sp_span_entry_bridge_8021d(const struct net_device *br_dev, + unsigned char *dmac) +{ + return br_fdb_find_port(br_dev, dmac, 0); +} + +static struct net_device * +mlxsw_sp_span_entry_bridge(const struct net_device *br_dev, + unsigned char dmac[ETH_ALEN], + u16 *p_vid) +{ + struct mlxsw_sp_bridge_port *bridge_port; + enum mlxsw_reg_spms_state spms_state; + struct net_device *dev = NULL; + struct mlxsw_sp_port *port; + u8 stp_state; + + if (br_vlan_enabled(br_dev)) + dev = mlxsw_sp_span_entry_bridge_8021q(br_dev, dmac, p_vid); + else if (!*p_vid) + dev = mlxsw_sp_span_entry_bridge_8021d(br_dev, dmac); + if (!dev) + return NULL; + + port = mlxsw_sp_port_dev_lower_find(dev); + if (!port) + return NULL; + + bridge_port = mlxsw_sp_bridge_port_find(port->mlxsw_sp->bridge, dev); + if (!bridge_port) + return NULL; + + stp_state = mlxsw_sp_bridge_port_stp_state(bridge_port); + spms_state = mlxsw_sp_stp_spms_state(stp_state); + if (spms_state != MLXSW_REG_SPMS_STATE_FORWARDING) + return NULL; + + return dev; +} + +static struct net_device * +mlxsw_sp_span_entry_vlan(const struct net_device *vlan_dev, + u16 *p_vid) +{ + *p_vid = vlan_dev_vlan_id(vlan_dev); + return vlan_dev_real_dev(vlan_dev); +} + +static struct net_device * +mlxsw_sp_span_entry_lag(struct net_device *lag_dev) +{ + struct net_device *dev; + struct list_head *iter; + + netdev_for_each_lower_dev(lag_dev, dev, iter) + if (netif_carrier_ok(dev) && + net_lag_port_dev_txable(dev) && + mlxsw_sp_port_dev_check(dev)) + return dev; + + return NULL; +} + +static __maybe_unused int +mlxsw_sp_span_entry_tunnel_parms_common(struct net_device *edev, + union mlxsw_sp_l3addr saddr, + union mlxsw_sp_l3addr daddr, + union mlxsw_sp_l3addr gw, + __u8 ttl, + struct neigh_table *tbl, + struct mlxsw_sp_span_parms *sparmsp) +{ + unsigned char dmac[ETH_ALEN]; + u16 vid = 0; + + if (mlxsw_sp_l3addr_is_zero(gw)) + gw = daddr; + + if (!edev || mlxsw_sp_span_dmac(tbl, &gw, edev, dmac)) + goto unoffloadable; + + if (is_vlan_dev(edev)) + edev = mlxsw_sp_span_entry_vlan(edev, &vid); + + if (netif_is_bridge_master(edev)) { + edev = mlxsw_sp_span_entry_bridge(edev, dmac, &vid); + if (!edev) + goto unoffloadable; + } + + if (is_vlan_dev(edev)) { + if (vid || !(edev->flags & IFF_UP)) + goto unoffloadable; + edev = mlxsw_sp_span_entry_vlan(edev, &vid); + } + + if (netif_is_lag_master(edev)) { + if (!(edev->flags & IFF_UP)) + goto unoffloadable; + edev = mlxsw_sp_span_entry_lag(edev); + if (!edev) + goto unoffloadable; + } + + if (!mlxsw_sp_port_dev_check(edev)) + goto unoffloadable; + + sparmsp->dest_port = netdev_priv(edev); + sparmsp->ttl = ttl; + memcpy(sparmsp->dmac, dmac, ETH_ALEN); + memcpy(sparmsp->smac, edev->dev_addr, ETH_ALEN); + sparmsp->saddr = saddr; + sparmsp->daddr = daddr; + sparmsp->vid = vid; + return 0; + +unoffloadable: + return mlxsw_sp_span_entry_unoffloadable(sparmsp); +} + +#if IS_ENABLED(CONFIG_NET_IPGRE) +static struct net_device * +mlxsw_sp_span_gretap4_route(const struct net_device *to_dev, + __be32 *saddrp, __be32 *daddrp) +{ + struct ip_tunnel *tun = netdev_priv(to_dev); + struct net_device *dev = NULL; + struct ip_tunnel_parm parms; + struct rtable *rt = NULL; + struct flowi4 fl4; + + /* We assume "dev" stays valid after rt is put. */ + ASSERT_RTNL(); + + parms = mlxsw_sp_ipip_netdev_parms4(to_dev); + ip_tunnel_init_flow(&fl4, parms.iph.protocol, *daddrp, *saddrp, + 0, 0, dev_net(to_dev), parms.link, tun->fwmark, 0, + 0); + + rt = ip_route_output_key(tun->net, &fl4); + if (IS_ERR(rt)) + return NULL; + + if (rt->rt_type != RTN_UNICAST) + goto out; + + dev = rt->dst.dev; + *saddrp = fl4.saddr; + if (rt->rt_gw_family == AF_INET) + *daddrp = rt->rt_gw4; + /* can not offload if route has an IPv6 gateway */ + else if (rt->rt_gw_family == AF_INET6) + dev = NULL; + +out: + ip_rt_put(rt); + return dev; +} + +static int +mlxsw_sp_span_entry_gretap4_parms(struct mlxsw_sp *mlxsw_sp, + const struct net_device *to_dev, + struct mlxsw_sp_span_parms *sparmsp) +{ + struct ip_tunnel_parm tparm = mlxsw_sp_ipip_netdev_parms4(to_dev); + union mlxsw_sp_l3addr saddr = { .addr4 = tparm.iph.saddr }; + union mlxsw_sp_l3addr daddr = { .addr4 = tparm.iph.daddr }; + bool inherit_tos = tparm.iph.tos & 0x1; + bool inherit_ttl = !tparm.iph.ttl; + union mlxsw_sp_l3addr gw = daddr; + struct net_device *l3edev; + + if (!(to_dev->flags & IFF_UP) || + /* Reject tunnels with GRE keys, checksums, etc. */ + tparm.i_flags || tparm.o_flags || + /* Require a fixed TTL and a TOS copied from the mirrored packet. */ + inherit_ttl || !inherit_tos || + /* A destination address may not be "any". */ + mlxsw_sp_l3addr_is_zero(daddr)) + return mlxsw_sp_span_entry_unoffloadable(sparmsp); + + l3edev = mlxsw_sp_span_gretap4_route(to_dev, &saddr.addr4, &gw.addr4); + return mlxsw_sp_span_entry_tunnel_parms_common(l3edev, saddr, daddr, gw, + tparm.iph.ttl, + &arp_tbl, sparmsp); +} + +static int +mlxsw_sp_span_entry_gretap4_configure(struct mlxsw_sp_span_entry *span_entry, + struct mlxsw_sp_span_parms sparms) +{ + struct mlxsw_sp_port *dest_port = sparms.dest_port; + struct mlxsw_sp *mlxsw_sp = dest_port->mlxsw_sp; + u16 local_port = dest_port->local_port; + char mpat_pl[MLXSW_REG_MPAT_LEN]; + int pa_id = span_entry->id; + + /* Create a new port analayzer entry for local_port. */ + mlxsw_reg_mpat_pack(mpat_pl, pa_id, local_port, true, + MLXSW_REG_MPAT_SPAN_TYPE_REMOTE_ETH_L3); + mlxsw_reg_mpat_pide_set(mpat_pl, sparms.policer_enable); + mlxsw_reg_mpat_pid_set(mpat_pl, sparms.policer_id); + mlxsw_reg_mpat_eth_rspan_pack(mpat_pl, sparms.vid); + mlxsw_reg_mpat_eth_rspan_l2_pack(mpat_pl, + MLXSW_REG_MPAT_ETH_RSPAN_VERSION_NO_HEADER, + sparms.dmac, !!sparms.vid); + mlxsw_reg_mpat_eth_rspan_l3_ipv4_pack(mpat_pl, + sparms.ttl, sparms.smac, + be32_to_cpu(sparms.saddr.addr4), + be32_to_cpu(sparms.daddr.addr4)); + + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mpat), mpat_pl); +} + +static void +mlxsw_sp_span_entry_gretap4_deconfigure(struct mlxsw_sp_span_entry *span_entry) +{ + mlxsw_sp_span_entry_deconfigure_common(span_entry, + MLXSW_REG_MPAT_SPAN_TYPE_REMOTE_ETH_L3); +} + +static const struct mlxsw_sp_span_entry_ops mlxsw_sp_span_entry_ops_gretap4 = { + .can_handle = netif_is_gretap, + .parms_set = mlxsw_sp_span_entry_gretap4_parms, + .configure = mlxsw_sp_span_entry_gretap4_configure, + .deconfigure = mlxsw_sp_span_entry_gretap4_deconfigure, +}; +#endif + +#if IS_ENABLED(CONFIG_IPV6_GRE) +static struct net_device * +mlxsw_sp_span_gretap6_route(const struct net_device *to_dev, + struct in6_addr *saddrp, + struct in6_addr *daddrp) +{ + struct ip6_tnl *t = netdev_priv(to_dev); + struct flowi6 fl6 = t->fl.u.ip6; + struct net_device *dev = NULL; + struct dst_entry *dst; + struct rt6_info *rt6; + + /* We assume "dev" stays valid after dst is released. */ + ASSERT_RTNL(); + + fl6.flowi6_mark = t->parms.fwmark; + if (!ip6_tnl_xmit_ctl(t, &fl6.saddr, &fl6.daddr)) + return NULL; + + dst = ip6_route_output(t->net, NULL, &fl6); + if (!dst || dst->error) + goto out; + + rt6 = container_of(dst, struct rt6_info, dst); + + dev = dst->dev; + *saddrp = fl6.saddr; + *daddrp = rt6->rt6i_gateway; + +out: + dst_release(dst); + return dev; +} + +static int +mlxsw_sp_span_entry_gretap6_parms(struct mlxsw_sp *mlxsw_sp, + const struct net_device *to_dev, + struct mlxsw_sp_span_parms *sparmsp) +{ + struct __ip6_tnl_parm tparm = mlxsw_sp_ipip_netdev_parms6(to_dev); + bool inherit_tos = tparm.flags & IP6_TNL_F_USE_ORIG_TCLASS; + union mlxsw_sp_l3addr saddr = { .addr6 = tparm.laddr }; + union mlxsw_sp_l3addr daddr = { .addr6 = tparm.raddr }; + bool inherit_ttl = !tparm.hop_limit; + union mlxsw_sp_l3addr gw = daddr; + struct net_device *l3edev; + + if (!(to_dev->flags & IFF_UP) || + /* Reject tunnels with GRE keys, checksums, etc. */ + tparm.i_flags || tparm.o_flags || + /* Require a fixed TTL and a TOS copied from the mirrored packet. */ + inherit_ttl || !inherit_tos || + /* A destination address may not be "any". */ + mlxsw_sp_l3addr_is_zero(daddr)) + return mlxsw_sp_span_entry_unoffloadable(sparmsp); + + l3edev = mlxsw_sp_span_gretap6_route(to_dev, &saddr.addr6, &gw.addr6); + return mlxsw_sp_span_entry_tunnel_parms_common(l3edev, saddr, daddr, gw, + tparm.hop_limit, + &nd_tbl, sparmsp); +} + +static int +mlxsw_sp_span_entry_gretap6_configure(struct mlxsw_sp_span_entry *span_entry, + struct mlxsw_sp_span_parms sparms) +{ + struct mlxsw_sp_port *dest_port = sparms.dest_port; + struct mlxsw_sp *mlxsw_sp = dest_port->mlxsw_sp; + u16 local_port = dest_port->local_port; + char mpat_pl[MLXSW_REG_MPAT_LEN]; + int pa_id = span_entry->id; + + /* Create a new port analayzer entry for local_port. */ + mlxsw_reg_mpat_pack(mpat_pl, pa_id, local_port, true, + MLXSW_REG_MPAT_SPAN_TYPE_REMOTE_ETH_L3); + mlxsw_reg_mpat_pide_set(mpat_pl, sparms.policer_enable); + mlxsw_reg_mpat_pid_set(mpat_pl, sparms.policer_id); + mlxsw_reg_mpat_eth_rspan_pack(mpat_pl, sparms.vid); + mlxsw_reg_mpat_eth_rspan_l2_pack(mpat_pl, + MLXSW_REG_MPAT_ETH_RSPAN_VERSION_NO_HEADER, + sparms.dmac, !!sparms.vid); + mlxsw_reg_mpat_eth_rspan_l3_ipv6_pack(mpat_pl, sparms.ttl, sparms.smac, + sparms.saddr.addr6, + sparms.daddr.addr6); + + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mpat), mpat_pl); +} + +static void +mlxsw_sp_span_entry_gretap6_deconfigure(struct mlxsw_sp_span_entry *span_entry) +{ + mlxsw_sp_span_entry_deconfigure_common(span_entry, + MLXSW_REG_MPAT_SPAN_TYPE_REMOTE_ETH_L3); +} + +static const +struct mlxsw_sp_span_entry_ops mlxsw_sp_span_entry_ops_gretap6 = { + .can_handle = netif_is_ip6gretap, + .parms_set = mlxsw_sp_span_entry_gretap6_parms, + .configure = mlxsw_sp_span_entry_gretap6_configure, + .deconfigure = mlxsw_sp_span_entry_gretap6_deconfigure, +}; +#endif + +static bool +mlxsw_sp_span_vlan_can_handle(const struct net_device *dev) +{ + return is_vlan_dev(dev) && + mlxsw_sp_port_dev_check(vlan_dev_real_dev(dev)); +} + +static int +mlxsw_sp_span_entry_vlan_parms(struct mlxsw_sp *mlxsw_sp, + const struct net_device *to_dev, + struct mlxsw_sp_span_parms *sparmsp) +{ + struct net_device *real_dev; + u16 vid; + + if (!(to_dev->flags & IFF_UP)) + return mlxsw_sp_span_entry_unoffloadable(sparmsp); + + real_dev = mlxsw_sp_span_entry_vlan(to_dev, &vid); + sparmsp->dest_port = netdev_priv(real_dev); + sparmsp->vid = vid; + return 0; +} + +static int +mlxsw_sp_span_entry_vlan_configure(struct mlxsw_sp_span_entry *span_entry, + struct mlxsw_sp_span_parms sparms) +{ + struct mlxsw_sp_port *dest_port = sparms.dest_port; + struct mlxsw_sp *mlxsw_sp = dest_port->mlxsw_sp; + u16 local_port = dest_port->local_port; + char mpat_pl[MLXSW_REG_MPAT_LEN]; + int pa_id = span_entry->id; + + mlxsw_reg_mpat_pack(mpat_pl, pa_id, local_port, true, + MLXSW_REG_MPAT_SPAN_TYPE_REMOTE_ETH); + mlxsw_reg_mpat_pide_set(mpat_pl, sparms.policer_enable); + mlxsw_reg_mpat_pid_set(mpat_pl, sparms.policer_id); + mlxsw_reg_mpat_eth_rspan_pack(mpat_pl, sparms.vid); + + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mpat), mpat_pl); +} + +static void +mlxsw_sp_span_entry_vlan_deconfigure(struct mlxsw_sp_span_entry *span_entry) +{ + mlxsw_sp_span_entry_deconfigure_common(span_entry, + MLXSW_REG_MPAT_SPAN_TYPE_REMOTE_ETH); +} + +static const +struct mlxsw_sp_span_entry_ops mlxsw_sp_span_entry_ops_vlan = { + .can_handle = mlxsw_sp_span_vlan_can_handle, + .parms_set = mlxsw_sp_span_entry_vlan_parms, + .configure = mlxsw_sp_span_entry_vlan_configure, + .deconfigure = mlxsw_sp_span_entry_vlan_deconfigure, +}; + +static const +struct mlxsw_sp_span_entry_ops *mlxsw_sp1_span_entry_ops_arr[] = { + &mlxsw_sp1_span_entry_ops_cpu, + &mlxsw_sp_span_entry_ops_phys, +#if IS_ENABLED(CONFIG_NET_IPGRE) + &mlxsw_sp_span_entry_ops_gretap4, +#endif +#if IS_ENABLED(CONFIG_IPV6_GRE) + &mlxsw_sp_span_entry_ops_gretap6, +#endif + &mlxsw_sp_span_entry_ops_vlan, +}; + +static bool mlxsw_sp2_span_cpu_can_handle(const struct net_device *dev) +{ + return !dev; +} + +static int mlxsw_sp2_span_entry_cpu_parms(struct mlxsw_sp *mlxsw_sp, + const struct net_device *to_dev, + struct mlxsw_sp_span_parms *sparmsp) +{ + sparmsp->dest_port = mlxsw_sp->ports[MLXSW_PORT_CPU_PORT]; + return 0; +} + +static int +mlxsw_sp2_span_entry_cpu_configure(struct mlxsw_sp_span_entry *span_entry, + struct mlxsw_sp_span_parms sparms) +{ + /* Mirroring to the CPU port is like mirroring to any other physical + * port. Its local port is used instead of that of the physical port. + */ + return mlxsw_sp_span_entry_phys_configure(span_entry, sparms); +} + +static void +mlxsw_sp2_span_entry_cpu_deconfigure(struct mlxsw_sp_span_entry *span_entry) +{ + enum mlxsw_reg_mpat_span_type span_type; + + span_type = MLXSW_REG_MPAT_SPAN_TYPE_LOCAL_ETH; + mlxsw_sp_span_entry_deconfigure_common(span_entry, span_type); +} + +static const +struct mlxsw_sp_span_entry_ops mlxsw_sp2_span_entry_ops_cpu = { + .is_static = true, + .can_handle = mlxsw_sp2_span_cpu_can_handle, + .parms_set = mlxsw_sp2_span_entry_cpu_parms, + .configure = mlxsw_sp2_span_entry_cpu_configure, + .deconfigure = mlxsw_sp2_span_entry_cpu_deconfigure, +}; + +static const +struct mlxsw_sp_span_entry_ops *mlxsw_sp2_span_entry_ops_arr[] = { + &mlxsw_sp2_span_entry_ops_cpu, + &mlxsw_sp_span_entry_ops_phys, +#if IS_ENABLED(CONFIG_NET_IPGRE) + &mlxsw_sp_span_entry_ops_gretap4, +#endif +#if IS_ENABLED(CONFIG_IPV6_GRE) + &mlxsw_sp_span_entry_ops_gretap6, +#endif + &mlxsw_sp_span_entry_ops_vlan, +}; + +static int +mlxsw_sp_span_entry_nop_parms(struct mlxsw_sp *mlxsw_sp, + const struct net_device *to_dev, + struct mlxsw_sp_span_parms *sparmsp) +{ + return mlxsw_sp_span_entry_unoffloadable(sparmsp); +} + +static int +mlxsw_sp_span_entry_nop_configure(struct mlxsw_sp_span_entry *span_entry, + struct mlxsw_sp_span_parms sparms) +{ + return 0; +} + +static void +mlxsw_sp_span_entry_nop_deconfigure(struct mlxsw_sp_span_entry *span_entry) +{ +} + +static const struct mlxsw_sp_span_entry_ops mlxsw_sp_span_entry_ops_nop = { + .parms_set = mlxsw_sp_span_entry_nop_parms, + .configure = mlxsw_sp_span_entry_nop_configure, + .deconfigure = mlxsw_sp_span_entry_nop_deconfigure, +}; + +static void +mlxsw_sp_span_entry_configure(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_span_entry *span_entry, + struct mlxsw_sp_span_parms sparms) +{ + int err; + + if (!sparms.dest_port) + goto set_parms; + + if (sparms.dest_port->mlxsw_sp != mlxsw_sp) { + dev_err(mlxsw_sp->bus_info->dev, + "Cannot mirror to a port which belongs to a different mlxsw instance\n"); + sparms.dest_port = NULL; + goto set_parms; + } + + err = span_entry->ops->configure(span_entry, sparms); + if (err) { + dev_err(mlxsw_sp->bus_info->dev, "Failed to offload mirror\n"); + sparms.dest_port = NULL; + goto set_parms; + } + +set_parms: + span_entry->parms = sparms; +} + +static void +mlxsw_sp_span_entry_deconfigure(struct mlxsw_sp_span_entry *span_entry) +{ + if (span_entry->parms.dest_port) + span_entry->ops->deconfigure(span_entry); +} + +static int mlxsw_sp_span_policer_id_base_set(struct mlxsw_sp_span *span, + u16 policer_id) +{ + struct mlxsw_sp *mlxsw_sp = span->mlxsw_sp; + u16 policer_id_base; + int err; + + /* Policers set on SPAN agents must be in the range of + * `policer_id_base .. policer_id_base + max_span_agents - 1`. If the + * base is set and the new policer is not within the range, then we + * must error out. + */ + if (refcount_read(&span->policer_id_base_ref_count)) { + if (policer_id < span->policer_id_base || + policer_id >= span->policer_id_base + span->entries_count) + return -EINVAL; + + refcount_inc(&span->policer_id_base_ref_count); + return 0; + } + + /* Base must be even. */ + policer_id_base = policer_id % 2 == 0 ? policer_id : policer_id - 1; + err = mlxsw_sp->span_ops->policer_id_base_set(mlxsw_sp, + policer_id_base); + if (err) + return err; + + span->policer_id_base = policer_id_base; + refcount_set(&span->policer_id_base_ref_count, 1); + + return 0; +} + +static void mlxsw_sp_span_policer_id_base_unset(struct mlxsw_sp_span *span) +{ + if (refcount_dec_and_test(&span->policer_id_base_ref_count)) + span->policer_id_base = 0; +} + +static struct mlxsw_sp_span_entry * +mlxsw_sp_span_entry_create(struct mlxsw_sp *mlxsw_sp, + const struct net_device *to_dev, + const struct mlxsw_sp_span_entry_ops *ops, + struct mlxsw_sp_span_parms sparms) +{ + struct mlxsw_sp_span_entry *span_entry = NULL; + int i; + + /* find a free entry to use */ + for (i = 0; i < mlxsw_sp->span->entries_count; i++) { + if (!refcount_read(&mlxsw_sp->span->entries[i].ref_count)) { + span_entry = &mlxsw_sp->span->entries[i]; + break; + } + } + if (!span_entry) + return NULL; + + if (sparms.policer_enable) { + int err; + + err = mlxsw_sp_span_policer_id_base_set(mlxsw_sp->span, + sparms.policer_id); + if (err) + return NULL; + } + + atomic_inc(&mlxsw_sp->span->active_entries_count); + span_entry->ops = ops; + refcount_set(&span_entry->ref_count, 1); + span_entry->to_dev = to_dev; + mlxsw_sp_span_entry_configure(mlxsw_sp, span_entry, sparms); + + return span_entry; +} + +static void mlxsw_sp_span_entry_destroy(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_span_entry *span_entry) +{ + mlxsw_sp_span_entry_deconfigure(span_entry); + atomic_dec(&mlxsw_sp->span->active_entries_count); + if (span_entry->parms.policer_enable) + mlxsw_sp_span_policer_id_base_unset(mlxsw_sp->span); +} + +struct mlxsw_sp_span_entry * +mlxsw_sp_span_entry_find_by_port(struct mlxsw_sp *mlxsw_sp, + const struct net_device *to_dev) +{ + int i; + + for (i = 0; i < mlxsw_sp->span->entries_count; i++) { + struct mlxsw_sp_span_entry *curr = &mlxsw_sp->span->entries[i]; + + if (refcount_read(&curr->ref_count) && curr->to_dev == to_dev) + return curr; + } + return NULL; +} + +void mlxsw_sp_span_entry_invalidate(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_span_entry *span_entry) +{ + mlxsw_sp_span_entry_deconfigure(span_entry); + span_entry->ops = &mlxsw_sp_span_entry_ops_nop; +} + +static struct mlxsw_sp_span_entry * +mlxsw_sp_span_entry_find_by_id(struct mlxsw_sp *mlxsw_sp, int span_id) +{ + int i; + + for (i = 0; i < mlxsw_sp->span->entries_count; i++) { + struct mlxsw_sp_span_entry *curr = &mlxsw_sp->span->entries[i]; + + if (refcount_read(&curr->ref_count) && curr->id == span_id) + return curr; + } + return NULL; +} + +static struct mlxsw_sp_span_entry * +mlxsw_sp_span_entry_find_by_parms(struct mlxsw_sp *mlxsw_sp, + const struct net_device *to_dev, + const struct mlxsw_sp_span_parms *sparms) +{ + int i; + + for (i = 0; i < mlxsw_sp->span->entries_count; i++) { + struct mlxsw_sp_span_entry *curr = &mlxsw_sp->span->entries[i]; + + if (refcount_read(&curr->ref_count) && curr->to_dev == to_dev && + curr->parms.policer_enable == sparms->policer_enable && + curr->parms.policer_id == sparms->policer_id && + curr->parms.session_id == sparms->session_id) + return curr; + } + return NULL; +} + +static struct mlxsw_sp_span_entry * +mlxsw_sp_span_entry_get(struct mlxsw_sp *mlxsw_sp, + const struct net_device *to_dev, + const struct mlxsw_sp_span_entry_ops *ops, + struct mlxsw_sp_span_parms sparms) +{ + struct mlxsw_sp_span_entry *span_entry; + + span_entry = mlxsw_sp_span_entry_find_by_parms(mlxsw_sp, to_dev, + &sparms); + if (span_entry) { + /* Already exists, just take a reference */ + refcount_inc(&span_entry->ref_count); + return span_entry; + } + + return mlxsw_sp_span_entry_create(mlxsw_sp, to_dev, ops, sparms); +} + +static int mlxsw_sp_span_entry_put(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_span_entry *span_entry) +{ + if (refcount_dec_and_test(&span_entry->ref_count)) + mlxsw_sp_span_entry_destroy(mlxsw_sp, span_entry); + return 0; +} + +static int mlxsw_sp_span_port_buffer_update(struct mlxsw_sp_port *mlxsw_sp_port, bool enable) +{ + struct mlxsw_sp_hdroom hdroom; + + hdroom = *mlxsw_sp_port->hdroom; + hdroom.int_buf.enable = enable; + mlxsw_sp_hdroom_bufs_reset_sizes(mlxsw_sp_port, &hdroom); + + return mlxsw_sp_hdroom_configure(mlxsw_sp_port, &hdroom); +} + +static int +mlxsw_sp_span_port_buffer_enable(struct mlxsw_sp_port *mlxsw_sp_port) +{ + return mlxsw_sp_span_port_buffer_update(mlxsw_sp_port, true); +} + +static void mlxsw_sp_span_port_buffer_disable(struct mlxsw_sp_port *mlxsw_sp_port) +{ + mlxsw_sp_span_port_buffer_update(mlxsw_sp_port, false); +} + +static struct mlxsw_sp_span_analyzed_port * +mlxsw_sp_span_analyzed_port_find(struct mlxsw_sp_span *span, u16 local_port, + bool ingress) +{ + struct mlxsw_sp_span_analyzed_port *analyzed_port; + + list_for_each_entry(analyzed_port, &span->analyzed_ports_list, list) { + if (analyzed_port->local_port == local_port && + analyzed_port->ingress == ingress) + return analyzed_port; + } + + return NULL; +} + +static const struct mlxsw_sp_span_entry_ops * +mlxsw_sp_span_entry_ops(struct mlxsw_sp *mlxsw_sp, + const struct net_device *to_dev) +{ + struct mlxsw_sp_span *span = mlxsw_sp->span; + size_t i; + + for (i = 0; i < span->span_entry_ops_arr_size; ++i) + if (span->span_entry_ops_arr[i]->can_handle(to_dev)) + return span->span_entry_ops_arr[i]; + + return NULL; +} + +static void mlxsw_sp_span_respin_work(struct work_struct *work) +{ + struct mlxsw_sp_span *span; + struct mlxsw_sp *mlxsw_sp; + int i, err; + + span = container_of(work, struct mlxsw_sp_span, work); + mlxsw_sp = span->mlxsw_sp; + + rtnl_lock(); + for (i = 0; i < mlxsw_sp->span->entries_count; i++) { + struct mlxsw_sp_span_entry *curr = &mlxsw_sp->span->entries[i]; + struct mlxsw_sp_span_parms sparms = {NULL}; + + if (!refcount_read(&curr->ref_count)) + continue; + + if (curr->ops->is_static) + continue; + + err = curr->ops->parms_set(mlxsw_sp, curr->to_dev, &sparms); + if (err) + continue; + + if (memcmp(&sparms, &curr->parms, sizeof(sparms))) { + mlxsw_sp_span_entry_deconfigure(curr); + mlxsw_sp_span_entry_configure(mlxsw_sp, curr, sparms); + } + } + rtnl_unlock(); +} + +void mlxsw_sp_span_respin(struct mlxsw_sp *mlxsw_sp) +{ + if (atomic_read(&mlxsw_sp->span->active_entries_count) == 0) + return; + mlxsw_core_schedule_work(&mlxsw_sp->span->work); +} + +int mlxsw_sp_span_agent_get(struct mlxsw_sp *mlxsw_sp, int *p_span_id, + const struct mlxsw_sp_span_agent_parms *parms) +{ + const struct net_device *to_dev = parms->to_dev; + const struct mlxsw_sp_span_entry_ops *ops; + struct mlxsw_sp_span_entry *span_entry; + struct mlxsw_sp_span_parms sparms; + int err; + + ASSERT_RTNL(); + + ops = mlxsw_sp_span_entry_ops(mlxsw_sp, to_dev); + if (!ops) { + dev_err(mlxsw_sp->bus_info->dev, "Cannot mirror to requested destination\n"); + return -EOPNOTSUPP; + } + + memset(&sparms, 0, sizeof(sparms)); + err = ops->parms_set(mlxsw_sp, to_dev, &sparms); + if (err) + return err; + + sparms.policer_id = parms->policer_id; + sparms.policer_enable = parms->policer_enable; + sparms.session_id = parms->session_id; + span_entry = mlxsw_sp_span_entry_get(mlxsw_sp, to_dev, ops, sparms); + if (!span_entry) + return -ENOBUFS; + + *p_span_id = span_entry->id; + + return 0; +} + +void mlxsw_sp_span_agent_put(struct mlxsw_sp *mlxsw_sp, int span_id) +{ + struct mlxsw_sp_span_entry *span_entry; + + ASSERT_RTNL(); + + span_entry = mlxsw_sp_span_entry_find_by_id(mlxsw_sp, span_id); + if (WARN_ON_ONCE(!span_entry)) + return; + + mlxsw_sp_span_entry_put(mlxsw_sp, span_entry); +} + +static struct mlxsw_sp_span_analyzed_port * +mlxsw_sp_span_analyzed_port_create(struct mlxsw_sp_span *span, + struct mlxsw_sp_port *mlxsw_sp_port, + bool ingress) +{ + struct mlxsw_sp_span_analyzed_port *analyzed_port; + int err; + + analyzed_port = kzalloc(sizeof(*analyzed_port), GFP_KERNEL); + if (!analyzed_port) + return ERR_PTR(-ENOMEM); + + refcount_set(&analyzed_port->ref_count, 1); + analyzed_port->local_port = mlxsw_sp_port->local_port; + analyzed_port->ingress = ingress; + list_add_tail(&analyzed_port->list, &span->analyzed_ports_list); + + /* An egress mirror buffer should be allocated on the egress port which + * does the mirroring. + */ + if (!ingress) { + err = mlxsw_sp_span_port_buffer_enable(mlxsw_sp_port); + if (err) + goto err_buffer_update; + } + + return analyzed_port; + +err_buffer_update: + list_del(&analyzed_port->list); + kfree(analyzed_port); + return ERR_PTR(err); +} + +static void +mlxsw_sp_span_analyzed_port_destroy(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_span_analyzed_port * + analyzed_port) +{ + /* Remove egress mirror buffer now that port is no longer analyzed + * at egress. + */ + if (!analyzed_port->ingress) + mlxsw_sp_span_port_buffer_disable(mlxsw_sp_port); + + list_del(&analyzed_port->list); + kfree(analyzed_port); +} + +int mlxsw_sp_span_analyzed_port_get(struct mlxsw_sp_port *mlxsw_sp_port, + bool ingress) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + struct mlxsw_sp_span_analyzed_port *analyzed_port; + u16 local_port = mlxsw_sp_port->local_port; + int err = 0; + + mutex_lock(&mlxsw_sp->span->analyzed_ports_lock); + + analyzed_port = mlxsw_sp_span_analyzed_port_find(mlxsw_sp->span, + local_port, ingress); + if (analyzed_port) { + refcount_inc(&analyzed_port->ref_count); + goto out_unlock; + } + + analyzed_port = mlxsw_sp_span_analyzed_port_create(mlxsw_sp->span, + mlxsw_sp_port, + ingress); + if (IS_ERR(analyzed_port)) + err = PTR_ERR(analyzed_port); + +out_unlock: + mutex_unlock(&mlxsw_sp->span->analyzed_ports_lock); + return err; +} + +void mlxsw_sp_span_analyzed_port_put(struct mlxsw_sp_port *mlxsw_sp_port, + bool ingress) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + struct mlxsw_sp_span_analyzed_port *analyzed_port; + u16 local_port = mlxsw_sp_port->local_port; + + mutex_lock(&mlxsw_sp->span->analyzed_ports_lock); + + analyzed_port = mlxsw_sp_span_analyzed_port_find(mlxsw_sp->span, + local_port, ingress); + if (WARN_ON_ONCE(!analyzed_port)) + goto out_unlock; + + if (!refcount_dec_and_test(&analyzed_port->ref_count)) + goto out_unlock; + + mlxsw_sp_span_analyzed_port_destroy(mlxsw_sp_port, analyzed_port); + +out_unlock: + mutex_unlock(&mlxsw_sp->span->analyzed_ports_lock); +} + +static int +__mlxsw_sp_span_trigger_port_bind(struct mlxsw_sp_span *span, + struct mlxsw_sp_span_trigger_entry * + trigger_entry, bool enable) +{ + char mpar_pl[MLXSW_REG_MPAR_LEN]; + enum mlxsw_reg_mpar_i_e i_e; + + switch (trigger_entry->trigger) { + case MLXSW_SP_SPAN_TRIGGER_INGRESS: + i_e = MLXSW_REG_MPAR_TYPE_INGRESS; + break; + case MLXSW_SP_SPAN_TRIGGER_EGRESS: + i_e = MLXSW_REG_MPAR_TYPE_EGRESS; + break; + default: + WARN_ON_ONCE(1); + return -EINVAL; + } + + if (trigger_entry->parms.probability_rate > MLXSW_REG_MPAR_RATE_MAX) + return -EINVAL; + + mlxsw_reg_mpar_pack(mpar_pl, trigger_entry->local_port, i_e, enable, + trigger_entry->parms.span_id, + trigger_entry->parms.probability_rate); + return mlxsw_reg_write(span->mlxsw_sp->core, MLXSW_REG(mpar), mpar_pl); +} + +static int +mlxsw_sp_span_trigger_port_bind(struct mlxsw_sp_span_trigger_entry * + trigger_entry) +{ + return __mlxsw_sp_span_trigger_port_bind(trigger_entry->span, + trigger_entry, true); +} + +static void +mlxsw_sp_span_trigger_port_unbind(struct mlxsw_sp_span_trigger_entry * + trigger_entry) +{ + __mlxsw_sp_span_trigger_port_bind(trigger_entry->span, trigger_entry, + false); +} + +static bool +mlxsw_sp_span_trigger_port_matches(struct mlxsw_sp_span_trigger_entry * + trigger_entry, + enum mlxsw_sp_span_trigger trigger, + struct mlxsw_sp_port *mlxsw_sp_port) +{ + return trigger_entry->trigger == trigger && + trigger_entry->local_port == mlxsw_sp_port->local_port; +} + +static int +mlxsw_sp_span_trigger_port_enable(struct mlxsw_sp_span_trigger_entry * + trigger_entry, + struct mlxsw_sp_port *mlxsw_sp_port, u8 tc) +{ + /* Port trigger are enabled during binding. */ + return 0; +} + +static void +mlxsw_sp_span_trigger_port_disable(struct mlxsw_sp_span_trigger_entry * + trigger_entry, + struct mlxsw_sp_port *mlxsw_sp_port, u8 tc) +{ +} + +static const struct mlxsw_sp_span_trigger_ops +mlxsw_sp_span_trigger_port_ops = { + .bind = mlxsw_sp_span_trigger_port_bind, + .unbind = mlxsw_sp_span_trigger_port_unbind, + .matches = mlxsw_sp_span_trigger_port_matches, + .enable = mlxsw_sp_span_trigger_port_enable, + .disable = mlxsw_sp_span_trigger_port_disable, +}; + +static int +mlxsw_sp1_span_trigger_global_bind(struct mlxsw_sp_span_trigger_entry * + trigger_entry) +{ + return -EOPNOTSUPP; +} + +static void +mlxsw_sp1_span_trigger_global_unbind(struct mlxsw_sp_span_trigger_entry * + trigger_entry) +{ +} + +static bool +mlxsw_sp1_span_trigger_global_matches(struct mlxsw_sp_span_trigger_entry * + trigger_entry, + enum mlxsw_sp_span_trigger trigger, + struct mlxsw_sp_port *mlxsw_sp_port) +{ + WARN_ON_ONCE(1); + return false; +} + +static int +mlxsw_sp1_span_trigger_global_enable(struct mlxsw_sp_span_trigger_entry * + trigger_entry, + struct mlxsw_sp_port *mlxsw_sp_port, + u8 tc) +{ + return -EOPNOTSUPP; +} + +static void +mlxsw_sp1_span_trigger_global_disable(struct mlxsw_sp_span_trigger_entry * + trigger_entry, + struct mlxsw_sp_port *mlxsw_sp_port, + u8 tc) +{ +} + +static const struct mlxsw_sp_span_trigger_ops +mlxsw_sp1_span_trigger_global_ops = { + .bind = mlxsw_sp1_span_trigger_global_bind, + .unbind = mlxsw_sp1_span_trigger_global_unbind, + .matches = mlxsw_sp1_span_trigger_global_matches, + .enable = mlxsw_sp1_span_trigger_global_enable, + .disable = mlxsw_sp1_span_trigger_global_disable, +}; + +static const struct mlxsw_sp_span_trigger_ops * +mlxsw_sp1_span_trigger_ops_arr[] = { + [MLXSW_SP_SPAN_TRIGGER_TYPE_PORT] = &mlxsw_sp_span_trigger_port_ops, + [MLXSW_SP_SPAN_TRIGGER_TYPE_GLOBAL] = + &mlxsw_sp1_span_trigger_global_ops, +}; + +static int +mlxsw_sp2_span_trigger_global_bind(struct mlxsw_sp_span_trigger_entry * + trigger_entry) +{ + struct mlxsw_sp *mlxsw_sp = trigger_entry->span->mlxsw_sp; + enum mlxsw_reg_mpagr_trigger trigger; + char mpagr_pl[MLXSW_REG_MPAGR_LEN]; + + switch (trigger_entry->trigger) { + case MLXSW_SP_SPAN_TRIGGER_TAIL_DROP: + trigger = MLXSW_REG_MPAGR_TRIGGER_INGRESS_SHARED_BUFFER; + break; + case MLXSW_SP_SPAN_TRIGGER_EARLY_DROP: + trigger = MLXSW_REG_MPAGR_TRIGGER_INGRESS_WRED; + break; + case MLXSW_SP_SPAN_TRIGGER_ECN: + trigger = MLXSW_REG_MPAGR_TRIGGER_EGRESS_ECN; + break; + default: + WARN_ON_ONCE(1); + return -EINVAL; + } + + if (trigger_entry->parms.probability_rate > MLXSW_REG_MPAGR_RATE_MAX) + return -EINVAL; + + mlxsw_reg_mpagr_pack(mpagr_pl, trigger, trigger_entry->parms.span_id, + trigger_entry->parms.probability_rate); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mpagr), mpagr_pl); +} + +static void +mlxsw_sp2_span_trigger_global_unbind(struct mlxsw_sp_span_trigger_entry * + trigger_entry) +{ + /* There is no unbinding for global triggers. The trigger should be + * disabled on all ports by now. + */ +} + +static bool +mlxsw_sp2_span_trigger_global_matches(struct mlxsw_sp_span_trigger_entry * + trigger_entry, + enum mlxsw_sp_span_trigger trigger, + struct mlxsw_sp_port *mlxsw_sp_port) +{ + return trigger_entry->trigger == trigger; +} + +static int +__mlxsw_sp2_span_trigger_global_enable(struct mlxsw_sp_span_trigger_entry * + trigger_entry, + struct mlxsw_sp_port *mlxsw_sp_port, + u8 tc, bool enable) +{ + struct mlxsw_sp *mlxsw_sp = trigger_entry->span->mlxsw_sp; + char momte_pl[MLXSW_REG_MOMTE_LEN]; + enum mlxsw_reg_momte_type type; + int err; + + switch (trigger_entry->trigger) { + case MLXSW_SP_SPAN_TRIGGER_TAIL_DROP: + type = MLXSW_REG_MOMTE_TYPE_SHARED_BUFFER_TCLASS; + break; + case MLXSW_SP_SPAN_TRIGGER_EARLY_DROP: + type = MLXSW_REG_MOMTE_TYPE_WRED; + break; + case MLXSW_SP_SPAN_TRIGGER_ECN: + type = MLXSW_REG_MOMTE_TYPE_ECN; + break; + default: + WARN_ON_ONCE(1); + return -EINVAL; + } + + /* Query existing configuration in order to only change the state of + * the specified traffic class. + */ + mlxsw_reg_momte_pack(momte_pl, mlxsw_sp_port->local_port, type); + err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(momte), momte_pl); + if (err) + return err; + + mlxsw_reg_momte_tclass_en_set(momte_pl, tc, enable); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(momte), momte_pl); +} + +static int +mlxsw_sp2_span_trigger_global_enable(struct mlxsw_sp_span_trigger_entry * + trigger_entry, + struct mlxsw_sp_port *mlxsw_sp_port, + u8 tc) +{ + return __mlxsw_sp2_span_trigger_global_enable(trigger_entry, + mlxsw_sp_port, tc, true); +} + +static void +mlxsw_sp2_span_trigger_global_disable(struct mlxsw_sp_span_trigger_entry * + trigger_entry, + struct mlxsw_sp_port *mlxsw_sp_port, + u8 tc) +{ + __mlxsw_sp2_span_trigger_global_enable(trigger_entry, mlxsw_sp_port, tc, + false); +} + +static const struct mlxsw_sp_span_trigger_ops +mlxsw_sp2_span_trigger_global_ops = { + .bind = mlxsw_sp2_span_trigger_global_bind, + .unbind = mlxsw_sp2_span_trigger_global_unbind, + .matches = mlxsw_sp2_span_trigger_global_matches, + .enable = mlxsw_sp2_span_trigger_global_enable, + .disable = mlxsw_sp2_span_trigger_global_disable, +}; + +static const struct mlxsw_sp_span_trigger_ops * +mlxsw_sp2_span_trigger_ops_arr[] = { + [MLXSW_SP_SPAN_TRIGGER_TYPE_PORT] = &mlxsw_sp_span_trigger_port_ops, + [MLXSW_SP_SPAN_TRIGGER_TYPE_GLOBAL] = + &mlxsw_sp2_span_trigger_global_ops, +}; + +static void +mlxsw_sp_span_trigger_ops_set(struct mlxsw_sp_span_trigger_entry *trigger_entry) +{ + struct mlxsw_sp_span *span = trigger_entry->span; + enum mlxsw_sp_span_trigger_type type; + + switch (trigger_entry->trigger) { + case MLXSW_SP_SPAN_TRIGGER_INGRESS: + case MLXSW_SP_SPAN_TRIGGER_EGRESS: + type = MLXSW_SP_SPAN_TRIGGER_TYPE_PORT; + break; + case MLXSW_SP_SPAN_TRIGGER_TAIL_DROP: + case MLXSW_SP_SPAN_TRIGGER_EARLY_DROP: + case MLXSW_SP_SPAN_TRIGGER_ECN: + type = MLXSW_SP_SPAN_TRIGGER_TYPE_GLOBAL; + break; + default: + WARN_ON_ONCE(1); + return; + } + + trigger_entry->ops = span->span_trigger_ops_arr[type]; +} + +static struct mlxsw_sp_span_trigger_entry * +mlxsw_sp_span_trigger_entry_create(struct mlxsw_sp_span *span, + enum mlxsw_sp_span_trigger trigger, + struct mlxsw_sp_port *mlxsw_sp_port, + const struct mlxsw_sp_span_trigger_parms + *parms) +{ + struct mlxsw_sp_span_trigger_entry *trigger_entry; + int err; + + trigger_entry = kzalloc(sizeof(*trigger_entry), GFP_KERNEL); + if (!trigger_entry) + return ERR_PTR(-ENOMEM); + + refcount_set(&trigger_entry->ref_count, 1); + trigger_entry->local_port = mlxsw_sp_port ? mlxsw_sp_port->local_port : + 0; + trigger_entry->trigger = trigger; + memcpy(&trigger_entry->parms, parms, sizeof(trigger_entry->parms)); + trigger_entry->span = span; + mlxsw_sp_span_trigger_ops_set(trigger_entry); + list_add_tail(&trigger_entry->list, &span->trigger_entries_list); + + err = trigger_entry->ops->bind(trigger_entry); + if (err) + goto err_trigger_entry_bind; + + return trigger_entry; + +err_trigger_entry_bind: + list_del(&trigger_entry->list); + kfree(trigger_entry); + return ERR_PTR(err); +} + +static void +mlxsw_sp_span_trigger_entry_destroy(struct mlxsw_sp_span *span, + struct mlxsw_sp_span_trigger_entry * + trigger_entry) +{ + trigger_entry->ops->unbind(trigger_entry); + list_del(&trigger_entry->list); + kfree(trigger_entry); +} + +static struct mlxsw_sp_span_trigger_entry * +mlxsw_sp_span_trigger_entry_find(struct mlxsw_sp_span *span, + enum mlxsw_sp_span_trigger trigger, + struct mlxsw_sp_port *mlxsw_sp_port) +{ + struct mlxsw_sp_span_trigger_entry *trigger_entry; + + list_for_each_entry(trigger_entry, &span->trigger_entries_list, list) { + if (trigger_entry->ops->matches(trigger_entry, trigger, + mlxsw_sp_port)) + return trigger_entry; + } + + return NULL; +} + +int mlxsw_sp_span_agent_bind(struct mlxsw_sp *mlxsw_sp, + enum mlxsw_sp_span_trigger trigger, + struct mlxsw_sp_port *mlxsw_sp_port, + const struct mlxsw_sp_span_trigger_parms *parms) +{ + struct mlxsw_sp_span_trigger_entry *trigger_entry; + int err = 0; + + ASSERT_RTNL(); + + if (!mlxsw_sp_span_entry_find_by_id(mlxsw_sp, parms->span_id)) + return -EINVAL; + + trigger_entry = mlxsw_sp_span_trigger_entry_find(mlxsw_sp->span, + trigger, + mlxsw_sp_port); + if (trigger_entry) { + if (trigger_entry->parms.span_id != parms->span_id || + trigger_entry->parms.probability_rate != + parms->probability_rate) + return -EINVAL; + refcount_inc(&trigger_entry->ref_count); + goto out; + } + + trigger_entry = mlxsw_sp_span_trigger_entry_create(mlxsw_sp->span, + trigger, + mlxsw_sp_port, + parms); + if (IS_ERR(trigger_entry)) + err = PTR_ERR(trigger_entry); + +out: + return err; +} + +void mlxsw_sp_span_agent_unbind(struct mlxsw_sp *mlxsw_sp, + enum mlxsw_sp_span_trigger trigger, + struct mlxsw_sp_port *mlxsw_sp_port, + const struct mlxsw_sp_span_trigger_parms *parms) +{ + struct mlxsw_sp_span_trigger_entry *trigger_entry; + + ASSERT_RTNL(); + + if (WARN_ON_ONCE(!mlxsw_sp_span_entry_find_by_id(mlxsw_sp, + parms->span_id))) + return; + + trigger_entry = mlxsw_sp_span_trigger_entry_find(mlxsw_sp->span, + trigger, + mlxsw_sp_port); + if (WARN_ON_ONCE(!trigger_entry)) + return; + + if (!refcount_dec_and_test(&trigger_entry->ref_count)) + return; + + mlxsw_sp_span_trigger_entry_destroy(mlxsw_sp->span, trigger_entry); +} + +int mlxsw_sp_span_trigger_enable(struct mlxsw_sp_port *mlxsw_sp_port, + enum mlxsw_sp_span_trigger trigger, u8 tc) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + struct mlxsw_sp_span_trigger_entry *trigger_entry; + + ASSERT_RTNL(); + + trigger_entry = mlxsw_sp_span_trigger_entry_find(mlxsw_sp->span, + trigger, + mlxsw_sp_port); + if (WARN_ON_ONCE(!trigger_entry)) + return -EINVAL; + + return trigger_entry->ops->enable(trigger_entry, mlxsw_sp_port, tc); +} + +void mlxsw_sp_span_trigger_disable(struct mlxsw_sp_port *mlxsw_sp_port, + enum mlxsw_sp_span_trigger trigger, u8 tc) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + struct mlxsw_sp_span_trigger_entry *trigger_entry; + + ASSERT_RTNL(); + + trigger_entry = mlxsw_sp_span_trigger_entry_find(mlxsw_sp->span, + trigger, + mlxsw_sp_port); + if (WARN_ON_ONCE(!trigger_entry)) + return; + + return trigger_entry->ops->disable(trigger_entry, mlxsw_sp_port, tc); +} + +bool mlxsw_sp_span_trigger_is_ingress(enum mlxsw_sp_span_trigger trigger) +{ + switch (trigger) { + case MLXSW_SP_SPAN_TRIGGER_INGRESS: + case MLXSW_SP_SPAN_TRIGGER_EARLY_DROP: + case MLXSW_SP_SPAN_TRIGGER_TAIL_DROP: + return true; + case MLXSW_SP_SPAN_TRIGGER_EGRESS: + case MLXSW_SP_SPAN_TRIGGER_ECN: + return false; + } + + WARN_ON_ONCE(1); + return false; +} + +static int mlxsw_sp1_span_init(struct mlxsw_sp *mlxsw_sp) +{ + size_t arr_size = ARRAY_SIZE(mlxsw_sp1_span_entry_ops_arr); + + /* Must be first to avoid NULL pointer dereference by subsequent + * can_handle() callbacks. + */ + if (WARN_ON(mlxsw_sp1_span_entry_ops_arr[0] != + &mlxsw_sp1_span_entry_ops_cpu)) + return -EINVAL; + + mlxsw_sp->span->span_trigger_ops_arr = mlxsw_sp1_span_trigger_ops_arr; + mlxsw_sp->span->span_entry_ops_arr = mlxsw_sp1_span_entry_ops_arr; + mlxsw_sp->span->span_entry_ops_arr_size = arr_size; + + return 0; +} + +static int mlxsw_sp1_span_policer_id_base_set(struct mlxsw_sp *mlxsw_sp, + u16 policer_id_base) +{ + return -EOPNOTSUPP; +} + +const struct mlxsw_sp_span_ops mlxsw_sp1_span_ops = { + .init = mlxsw_sp1_span_init, + .policer_id_base_set = mlxsw_sp1_span_policer_id_base_set, +}; + +static int mlxsw_sp2_span_init(struct mlxsw_sp *mlxsw_sp) +{ + size_t arr_size = ARRAY_SIZE(mlxsw_sp2_span_entry_ops_arr); + + /* Must be first to avoid NULL pointer dereference by subsequent + * can_handle() callbacks. + */ + if (WARN_ON(mlxsw_sp2_span_entry_ops_arr[0] != + &mlxsw_sp2_span_entry_ops_cpu)) + return -EINVAL; + + mlxsw_sp->span->span_trigger_ops_arr = mlxsw_sp2_span_trigger_ops_arr; + mlxsw_sp->span->span_entry_ops_arr = mlxsw_sp2_span_entry_ops_arr; + mlxsw_sp->span->span_entry_ops_arr_size = arr_size; + + return 0; +} + +#define MLXSW_SP2_SPAN_EG_MIRROR_BUFFER_FACTOR 38 +#define MLXSW_SP3_SPAN_EG_MIRROR_BUFFER_FACTOR 50 + +static int mlxsw_sp2_span_policer_id_base_set(struct mlxsw_sp *mlxsw_sp, + u16 policer_id_base) +{ + char mogcr_pl[MLXSW_REG_MOGCR_LEN]; + int err; + + err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(mogcr), mogcr_pl); + if (err) + return err; + + mlxsw_reg_mogcr_mirroring_pid_base_set(mogcr_pl, policer_id_base); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mogcr), mogcr_pl); +} + +const struct mlxsw_sp_span_ops mlxsw_sp2_span_ops = { + .init = mlxsw_sp2_span_init, + .policer_id_base_set = mlxsw_sp2_span_policer_id_base_set, +}; + +const struct mlxsw_sp_span_ops mlxsw_sp3_span_ops = { + .init = mlxsw_sp2_span_init, + .policer_id_base_set = mlxsw_sp2_span_policer_id_base_set, +}; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h new file mode 100644 index 000000000..82e711afb --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h @@ -0,0 +1,129 @@ +/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 */ +/* Copyright (c) 2018 Mellanox Technologies. All rights reserved */ + +#ifndef _MLXSW_SPECTRUM_SPAN_H +#define _MLXSW_SPECTRUM_SPAN_H + +#include <linux/types.h> +#include <linux/if_ether.h> +#include <linux/refcount.h> + +#include "spectrum_router.h" + +struct mlxsw_sp; +struct mlxsw_sp_port; + +/* SPAN session identifiers that correspond to MLXSW_TRAP_ID_MIRROR_SESSION<i> + * trap identifiers. The session identifier is an attribute of the SPAN agent, + * which determines the trap identifier of packets that are mirrored to the + * CPU. Packets that are trapped to the CPU for the same logical reason (e.g., + * buffer drops) should use the same session identifier. + */ +enum mlxsw_sp_span_session_id { + MLXSW_SP_SPAN_SESSION_ID_BUFFER, + MLXSW_SP_SPAN_SESSION_ID_SAMPLING, + + __MLXSW_SP_SPAN_SESSION_ID_MAX = 8, +}; + +struct mlxsw_sp_span_parms { + struct mlxsw_sp_port *dest_port; /* NULL for unoffloaded SPAN. */ + unsigned int ttl; + unsigned char dmac[ETH_ALEN]; + unsigned char smac[ETH_ALEN]; + union mlxsw_sp_l3addr daddr; + union mlxsw_sp_l3addr saddr; + u16 vid; + u16 policer_id; + bool policer_enable; + enum mlxsw_sp_span_session_id session_id; +}; + +enum mlxsw_sp_span_trigger { + MLXSW_SP_SPAN_TRIGGER_INGRESS, + MLXSW_SP_SPAN_TRIGGER_EGRESS, + MLXSW_SP_SPAN_TRIGGER_TAIL_DROP, + MLXSW_SP_SPAN_TRIGGER_EARLY_DROP, + MLXSW_SP_SPAN_TRIGGER_ECN, +}; + +struct mlxsw_sp_span_trigger_parms { + int span_id; + u32 probability_rate; +}; + +struct mlxsw_sp_span_agent_parms { + const struct net_device *to_dev; + u16 policer_id; + bool policer_enable; + enum mlxsw_sp_span_session_id session_id; +}; + +struct mlxsw_sp_span_entry_ops; + +struct mlxsw_sp_span_ops { + int (*init)(struct mlxsw_sp *mlxsw_sp); + int (*policer_id_base_set)(struct mlxsw_sp *mlxsw_sp, + u16 policer_id_base); +}; + +struct mlxsw_sp_span_entry { + const struct net_device *to_dev; + const struct mlxsw_sp_span_entry_ops *ops; + struct mlxsw_sp_span_parms parms; + refcount_t ref_count; + int id; +}; + +struct mlxsw_sp_span_entry_ops { + bool is_static; + bool (*can_handle)(const struct net_device *to_dev); + int (*parms_set)(struct mlxsw_sp *mlxsw_sp, + const struct net_device *to_dev, + struct mlxsw_sp_span_parms *sparmsp); + int (*configure)(struct mlxsw_sp_span_entry *span_entry, + struct mlxsw_sp_span_parms sparms); + void (*deconfigure)(struct mlxsw_sp_span_entry *span_entry); +}; + +int mlxsw_sp_span_init(struct mlxsw_sp *mlxsw_sp); +void mlxsw_sp_span_fini(struct mlxsw_sp *mlxsw_sp); +void mlxsw_sp_span_respin(struct mlxsw_sp *mlxsw_sp); + +struct mlxsw_sp_span_entry * +mlxsw_sp_span_entry_find_by_port(struct mlxsw_sp *mlxsw_sp, + const struct net_device *to_dev); + +void mlxsw_sp_span_entry_invalidate(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_span_entry *span_entry); + +int mlxsw_sp_span_port_mtu_update(struct mlxsw_sp_port *port, u16 mtu); +void mlxsw_sp_span_speed_update_work(struct work_struct *work); + +int mlxsw_sp_span_agent_get(struct mlxsw_sp *mlxsw_sp, int *p_span_id, + const struct mlxsw_sp_span_agent_parms *parms); +void mlxsw_sp_span_agent_put(struct mlxsw_sp *mlxsw_sp, int span_id); +int mlxsw_sp_span_analyzed_port_get(struct mlxsw_sp_port *mlxsw_sp_port, + bool ingress); +void mlxsw_sp_span_analyzed_port_put(struct mlxsw_sp_port *mlxsw_sp_port, + bool ingress); +int mlxsw_sp_span_agent_bind(struct mlxsw_sp *mlxsw_sp, + enum mlxsw_sp_span_trigger trigger, + struct mlxsw_sp_port *mlxsw_sp_port, + const struct mlxsw_sp_span_trigger_parms *parms); +void +mlxsw_sp_span_agent_unbind(struct mlxsw_sp *mlxsw_sp, + enum mlxsw_sp_span_trigger trigger, + struct mlxsw_sp_port *mlxsw_sp_port, + const struct mlxsw_sp_span_trigger_parms *parms); +int mlxsw_sp_span_trigger_enable(struct mlxsw_sp_port *mlxsw_sp_port, + enum mlxsw_sp_span_trigger trigger, u8 tc); +void mlxsw_sp_span_trigger_disable(struct mlxsw_sp_port *mlxsw_sp_port, + enum mlxsw_sp_span_trigger trigger, u8 tc); +bool mlxsw_sp_span_trigger_is_ingress(enum mlxsw_sp_span_trigger trigger); + +extern const struct mlxsw_sp_span_ops mlxsw_sp1_span_ops; +extern const struct mlxsw_sp_span_ops mlxsw_sp2_span_ops; +extern const struct mlxsw_sp_span_ops mlxsw_sp3_span_ops; + +#endif diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c new file mode 100644 index 000000000..1290b2d3e --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -0,0 +1,4015 @@ +// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 +/* Copyright (c) 2015-2018 Mellanox Technologies. All rights reserved */ + +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/netdevice.h> +#include <linux/etherdevice.h> +#include <linux/slab.h> +#include <linux/device.h> +#include <linux/skbuff.h> +#include <linux/if_vlan.h> +#include <linux/if_bridge.h> +#include <linux/workqueue.h> +#include <linux/jiffies.h> +#include <linux/rtnetlink.h> +#include <linux/netlink.h> +#include <net/switchdev.h> +#include <net/vxlan.h> + +#include "spectrum_span.h" +#include "spectrum_switchdev.h" +#include "spectrum.h" +#include "core.h" +#include "reg.h" + +struct mlxsw_sp_bridge_ops; + +struct mlxsw_sp_bridge { + struct mlxsw_sp *mlxsw_sp; + struct { + struct delayed_work dw; +#define MLXSW_SP_DEFAULT_LEARNING_INTERVAL 100 + unsigned int interval; /* ms */ + } fdb_notify; +#define MLXSW_SP_MIN_AGEING_TIME 10 +#define MLXSW_SP_MAX_AGEING_TIME 1000000 +#define MLXSW_SP_DEFAULT_AGEING_TIME 300 + u32 ageing_time; + bool vlan_enabled_exists; + struct list_head bridges_list; + DECLARE_BITMAP(mids_bitmap, MLXSW_SP_MID_MAX); + const struct mlxsw_sp_bridge_ops *bridge_8021q_ops; + const struct mlxsw_sp_bridge_ops *bridge_8021d_ops; + const struct mlxsw_sp_bridge_ops *bridge_8021ad_ops; +}; + +struct mlxsw_sp_bridge_device { + struct net_device *dev; + struct list_head list; + struct list_head ports_list; + struct list_head mdb_list; + struct rhashtable mdb_ht; + u8 vlan_enabled:1, + multicast_enabled:1, + mrouter:1; + const struct mlxsw_sp_bridge_ops *ops; +}; + +struct mlxsw_sp_bridge_port { + struct net_device *dev; + struct mlxsw_sp_bridge_device *bridge_device; + struct list_head list; + struct list_head vlans_list; + unsigned int ref_count; + u8 stp_state; + unsigned long flags; + bool mrouter; + bool lagged; + union { + u16 lag_id; + u16 system_port; + }; +}; + +struct mlxsw_sp_bridge_vlan { + struct list_head list; + struct list_head port_vlan_list; + u16 vid; +}; + +struct mlxsw_sp_bridge_ops { + int (*port_join)(struct mlxsw_sp_bridge_device *bridge_device, + struct mlxsw_sp_bridge_port *bridge_port, + struct mlxsw_sp_port *mlxsw_sp_port, + struct netlink_ext_ack *extack); + void (*port_leave)(struct mlxsw_sp_bridge_device *bridge_device, + struct mlxsw_sp_bridge_port *bridge_port, + struct mlxsw_sp_port *mlxsw_sp_port); + int (*vxlan_join)(struct mlxsw_sp_bridge_device *bridge_device, + const struct net_device *vxlan_dev, u16 vid, + struct netlink_ext_ack *extack); + struct mlxsw_sp_fid * + (*fid_get)(struct mlxsw_sp_bridge_device *bridge_device, + u16 vid, struct netlink_ext_ack *extack); + struct mlxsw_sp_fid * + (*fid_lookup)(struct mlxsw_sp_bridge_device *bridge_device, + u16 vid); + u16 (*fid_vid)(struct mlxsw_sp_bridge_device *bridge_device, + const struct mlxsw_sp_fid *fid); +}; + +struct mlxsw_sp_switchdev_ops { + void (*init)(struct mlxsw_sp *mlxsw_sp); +}; + +struct mlxsw_sp_mdb_entry_key { + unsigned char addr[ETH_ALEN]; + u16 fid; +}; + +struct mlxsw_sp_mdb_entry { + struct list_head list; + struct rhash_head ht_node; + struct mlxsw_sp_mdb_entry_key key; + u16 mid; + struct list_head ports_list; + u16 ports_count; +}; + +struct mlxsw_sp_mdb_entry_port { + struct list_head list; /* Member of 'ports_list'. */ + u16 local_port; + refcount_t refcount; + bool mrouter; +}; + +static const struct rhashtable_params mlxsw_sp_mdb_ht_params = { + .key_offset = offsetof(struct mlxsw_sp_mdb_entry, key), + .head_offset = offsetof(struct mlxsw_sp_mdb_entry, ht_node), + .key_len = sizeof(struct mlxsw_sp_mdb_entry_key), +}; + +static int +mlxsw_sp_bridge_port_fdb_flush(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_bridge_port *bridge_port, + u16 fid_index); + +static void +mlxsw_sp_bridge_port_mdb_flush(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_bridge_port *bridge_port, + u16 fid_index); + +static int +mlxsw_sp_bridge_mdb_mc_enable_sync(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_bridge_device + *bridge_device, bool mc_enabled); + +static void +mlxsw_sp_port_mrouter_update_mdb(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_bridge_port *bridge_port, + bool add); + +static struct mlxsw_sp_bridge_device * +mlxsw_sp_bridge_device_find(const struct mlxsw_sp_bridge *bridge, + const struct net_device *br_dev) +{ + struct mlxsw_sp_bridge_device *bridge_device; + + list_for_each_entry(bridge_device, &bridge->bridges_list, list) + if (bridge_device->dev == br_dev) + return bridge_device; + + return NULL; +} + +bool mlxsw_sp_bridge_device_is_offloaded(const struct mlxsw_sp *mlxsw_sp, + const struct net_device *br_dev) +{ + return !!mlxsw_sp_bridge_device_find(mlxsw_sp->bridge, br_dev); +} + +static int mlxsw_sp_bridge_device_upper_rif_destroy(struct net_device *dev, + struct netdev_nested_priv *priv) +{ + struct mlxsw_sp *mlxsw_sp = priv->data; + + mlxsw_sp_rif_destroy_by_dev(mlxsw_sp, dev); + return 0; +} + +static void mlxsw_sp_bridge_device_rifs_destroy(struct mlxsw_sp *mlxsw_sp, + struct net_device *dev) +{ + struct netdev_nested_priv priv = { + .data = (void *)mlxsw_sp, + }; + + mlxsw_sp_rif_destroy_by_dev(mlxsw_sp, dev); + netdev_walk_all_upper_dev_rcu(dev, + mlxsw_sp_bridge_device_upper_rif_destroy, + &priv); +} + +static int mlxsw_sp_bridge_device_vxlan_init(struct mlxsw_sp_bridge *bridge, + struct net_device *br_dev, + struct netlink_ext_ack *extack) +{ + struct net_device *dev, *stop_dev; + struct list_head *iter; + int err; + + netdev_for_each_lower_dev(br_dev, dev, iter) { + if (netif_is_vxlan(dev) && netif_running(dev)) { + err = mlxsw_sp_bridge_vxlan_join(bridge->mlxsw_sp, + br_dev, dev, 0, + extack); + if (err) { + stop_dev = dev; + goto err_vxlan_join; + } + } + } + + return 0; + +err_vxlan_join: + netdev_for_each_lower_dev(br_dev, dev, iter) { + if (netif_is_vxlan(dev) && netif_running(dev)) { + if (stop_dev == dev) + break; + mlxsw_sp_bridge_vxlan_leave(bridge->mlxsw_sp, dev); + } + } + return err; +} + +static void mlxsw_sp_bridge_device_vxlan_fini(struct mlxsw_sp_bridge *bridge, + struct net_device *br_dev) +{ + struct net_device *dev; + struct list_head *iter; + + netdev_for_each_lower_dev(br_dev, dev, iter) { + if (netif_is_vxlan(dev) && netif_running(dev)) + mlxsw_sp_bridge_vxlan_leave(bridge->mlxsw_sp, dev); + } +} + +static void mlxsw_sp_fdb_notify_work_schedule(struct mlxsw_sp *mlxsw_sp, + bool no_delay) +{ + struct mlxsw_sp_bridge *bridge = mlxsw_sp->bridge; + unsigned int interval = no_delay ? 0 : bridge->fdb_notify.interval; + + mlxsw_core_schedule_dw(&bridge->fdb_notify.dw, + msecs_to_jiffies(interval)); +} + +static struct mlxsw_sp_bridge_device * +mlxsw_sp_bridge_device_create(struct mlxsw_sp_bridge *bridge, + struct net_device *br_dev, + struct netlink_ext_ack *extack) +{ + struct device *dev = bridge->mlxsw_sp->bus_info->dev; + struct mlxsw_sp_bridge_device *bridge_device; + bool vlan_enabled = br_vlan_enabled(br_dev); + int err; + + if (vlan_enabled && bridge->vlan_enabled_exists) { + dev_err(dev, "Only one VLAN-aware bridge is supported\n"); + NL_SET_ERR_MSG_MOD(extack, "Only one VLAN-aware bridge is supported"); + return ERR_PTR(-EINVAL); + } + + bridge_device = kzalloc(sizeof(*bridge_device), GFP_KERNEL); + if (!bridge_device) + return ERR_PTR(-ENOMEM); + + err = rhashtable_init(&bridge_device->mdb_ht, &mlxsw_sp_mdb_ht_params); + if (err) + goto err_mdb_rhashtable_init; + + bridge_device->dev = br_dev; + bridge_device->vlan_enabled = vlan_enabled; + bridge_device->multicast_enabled = br_multicast_enabled(br_dev); + bridge_device->mrouter = br_multicast_router(br_dev); + INIT_LIST_HEAD(&bridge_device->ports_list); + if (vlan_enabled) { + u16 proto; + + bridge->vlan_enabled_exists = true; + br_vlan_get_proto(br_dev, &proto); + if (proto == ETH_P_8021AD) + bridge_device->ops = bridge->bridge_8021ad_ops; + else + bridge_device->ops = bridge->bridge_8021q_ops; + } else { + bridge_device->ops = bridge->bridge_8021d_ops; + } + INIT_LIST_HEAD(&bridge_device->mdb_list); + + if (list_empty(&bridge->bridges_list)) + mlxsw_sp_fdb_notify_work_schedule(bridge->mlxsw_sp, false); + list_add(&bridge_device->list, &bridge->bridges_list); + + /* It is possible we already have VXLAN devices enslaved to the bridge. + * In which case, we need to replay their configuration as if they were + * just now enslaved to the bridge. + */ + err = mlxsw_sp_bridge_device_vxlan_init(bridge, br_dev, extack); + if (err) + goto err_vxlan_init; + + return bridge_device; + +err_vxlan_init: + list_del(&bridge_device->list); + if (bridge_device->vlan_enabled) + bridge->vlan_enabled_exists = false; + rhashtable_destroy(&bridge_device->mdb_ht); +err_mdb_rhashtable_init: + kfree(bridge_device); + return ERR_PTR(err); +} + +static void +mlxsw_sp_bridge_device_destroy(struct mlxsw_sp_bridge *bridge, + struct mlxsw_sp_bridge_device *bridge_device) +{ + mlxsw_sp_bridge_device_vxlan_fini(bridge, bridge_device->dev); + mlxsw_sp_bridge_device_rifs_destroy(bridge->mlxsw_sp, + bridge_device->dev); + list_del(&bridge_device->list); + if (list_empty(&bridge->bridges_list)) + cancel_delayed_work(&bridge->fdb_notify.dw); + if (bridge_device->vlan_enabled) + bridge->vlan_enabled_exists = false; + WARN_ON(!list_empty(&bridge_device->ports_list)); + WARN_ON(!list_empty(&bridge_device->mdb_list)); + rhashtable_destroy(&bridge_device->mdb_ht); + kfree(bridge_device); +} + +static struct mlxsw_sp_bridge_device * +mlxsw_sp_bridge_device_get(struct mlxsw_sp_bridge *bridge, + struct net_device *br_dev, + struct netlink_ext_ack *extack) +{ + struct mlxsw_sp_bridge_device *bridge_device; + + bridge_device = mlxsw_sp_bridge_device_find(bridge, br_dev); + if (bridge_device) + return bridge_device; + + return mlxsw_sp_bridge_device_create(bridge, br_dev, extack); +} + +static void +mlxsw_sp_bridge_device_put(struct mlxsw_sp_bridge *bridge, + struct mlxsw_sp_bridge_device *bridge_device) +{ + if (list_empty(&bridge_device->ports_list)) + mlxsw_sp_bridge_device_destroy(bridge, bridge_device); +} + +static struct mlxsw_sp_bridge_port * +__mlxsw_sp_bridge_port_find(const struct mlxsw_sp_bridge_device *bridge_device, + const struct net_device *brport_dev) +{ + struct mlxsw_sp_bridge_port *bridge_port; + + list_for_each_entry(bridge_port, &bridge_device->ports_list, list) { + if (bridge_port->dev == brport_dev) + return bridge_port; + } + + return NULL; +} + +struct mlxsw_sp_bridge_port * +mlxsw_sp_bridge_port_find(struct mlxsw_sp_bridge *bridge, + struct net_device *brport_dev) +{ + struct net_device *br_dev = netdev_master_upper_dev_get(brport_dev); + struct mlxsw_sp_bridge_device *bridge_device; + + if (!br_dev) + return NULL; + + bridge_device = mlxsw_sp_bridge_device_find(bridge, br_dev); + if (!bridge_device) + return NULL; + + return __mlxsw_sp_bridge_port_find(bridge_device, brport_dev); +} + +static struct mlxsw_sp_bridge_port * +mlxsw_sp_bridge_port_create(struct mlxsw_sp_bridge_device *bridge_device, + struct net_device *brport_dev, + struct netlink_ext_ack *extack) +{ + struct mlxsw_sp_bridge_port *bridge_port; + struct mlxsw_sp_port *mlxsw_sp_port; + int err; + + bridge_port = kzalloc(sizeof(*bridge_port), GFP_KERNEL); + if (!bridge_port) + return ERR_PTR(-ENOMEM); + + mlxsw_sp_port = mlxsw_sp_port_dev_lower_find(brport_dev); + bridge_port->lagged = mlxsw_sp_port->lagged; + if (bridge_port->lagged) + bridge_port->lag_id = mlxsw_sp_port->lag_id; + else + bridge_port->system_port = mlxsw_sp_port->local_port; + bridge_port->dev = brport_dev; + bridge_port->bridge_device = bridge_device; + bridge_port->stp_state = BR_STATE_DISABLED; + bridge_port->flags = BR_LEARNING | BR_FLOOD | BR_LEARNING_SYNC | + BR_MCAST_FLOOD; + INIT_LIST_HEAD(&bridge_port->vlans_list); + list_add(&bridge_port->list, &bridge_device->ports_list); + bridge_port->ref_count = 1; + + err = switchdev_bridge_port_offload(brport_dev, mlxsw_sp_port->dev, + NULL, NULL, NULL, false, extack); + if (err) + goto err_switchdev_offload; + + return bridge_port; + +err_switchdev_offload: + list_del(&bridge_port->list); + kfree(bridge_port); + return ERR_PTR(err); +} + +static void +mlxsw_sp_bridge_port_destroy(struct mlxsw_sp_bridge_port *bridge_port) +{ + switchdev_bridge_port_unoffload(bridge_port->dev, NULL, NULL, NULL); + list_del(&bridge_port->list); + WARN_ON(!list_empty(&bridge_port->vlans_list)); + kfree(bridge_port); +} + +static struct mlxsw_sp_bridge_port * +mlxsw_sp_bridge_port_get(struct mlxsw_sp_bridge *bridge, + struct net_device *brport_dev, + struct netlink_ext_ack *extack) +{ + struct net_device *br_dev = netdev_master_upper_dev_get(brport_dev); + struct mlxsw_sp_bridge_device *bridge_device; + struct mlxsw_sp_bridge_port *bridge_port; + int err; + + bridge_port = mlxsw_sp_bridge_port_find(bridge, brport_dev); + if (bridge_port) { + bridge_port->ref_count++; + return bridge_port; + } + + bridge_device = mlxsw_sp_bridge_device_get(bridge, br_dev, extack); + if (IS_ERR(bridge_device)) + return ERR_CAST(bridge_device); + + bridge_port = mlxsw_sp_bridge_port_create(bridge_device, brport_dev, + extack); + if (IS_ERR(bridge_port)) { + err = PTR_ERR(bridge_port); + goto err_bridge_port_create; + } + + return bridge_port; + +err_bridge_port_create: + mlxsw_sp_bridge_device_put(bridge, bridge_device); + return ERR_PTR(err); +} + +static void mlxsw_sp_bridge_port_put(struct mlxsw_sp_bridge *bridge, + struct mlxsw_sp_bridge_port *bridge_port) +{ + struct mlxsw_sp_bridge_device *bridge_device; + + if (--bridge_port->ref_count != 0) + return; + bridge_device = bridge_port->bridge_device; + mlxsw_sp_bridge_port_destroy(bridge_port); + mlxsw_sp_bridge_device_put(bridge, bridge_device); +} + +static struct mlxsw_sp_port_vlan * +mlxsw_sp_port_vlan_find_by_bridge(struct mlxsw_sp_port *mlxsw_sp_port, + const struct mlxsw_sp_bridge_device * + bridge_device, + u16 vid) +{ + struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan; + + list_for_each_entry(mlxsw_sp_port_vlan, &mlxsw_sp_port->vlans_list, + list) { + if (!mlxsw_sp_port_vlan->bridge_port) + continue; + if (mlxsw_sp_port_vlan->bridge_port->bridge_device != + bridge_device) + continue; + if (bridge_device->vlan_enabled && + mlxsw_sp_port_vlan->vid != vid) + continue; + return mlxsw_sp_port_vlan; + } + + return NULL; +} + +static struct mlxsw_sp_port_vlan* +mlxsw_sp_port_vlan_find_by_fid(struct mlxsw_sp_port *mlxsw_sp_port, + u16 fid_index) +{ + struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan; + + list_for_each_entry(mlxsw_sp_port_vlan, &mlxsw_sp_port->vlans_list, + list) { + struct mlxsw_sp_fid *fid = mlxsw_sp_port_vlan->fid; + + if (fid && mlxsw_sp_fid_index(fid) == fid_index) + return mlxsw_sp_port_vlan; + } + + return NULL; +} + +static struct mlxsw_sp_bridge_vlan * +mlxsw_sp_bridge_vlan_find(const struct mlxsw_sp_bridge_port *bridge_port, + u16 vid) +{ + struct mlxsw_sp_bridge_vlan *bridge_vlan; + + list_for_each_entry(bridge_vlan, &bridge_port->vlans_list, list) { + if (bridge_vlan->vid == vid) + return bridge_vlan; + } + + return NULL; +} + +static struct mlxsw_sp_bridge_vlan * +mlxsw_sp_bridge_vlan_create(struct mlxsw_sp_bridge_port *bridge_port, u16 vid) +{ + struct mlxsw_sp_bridge_vlan *bridge_vlan; + + bridge_vlan = kzalloc(sizeof(*bridge_vlan), GFP_KERNEL); + if (!bridge_vlan) + return NULL; + + INIT_LIST_HEAD(&bridge_vlan->port_vlan_list); + bridge_vlan->vid = vid; + list_add(&bridge_vlan->list, &bridge_port->vlans_list); + + return bridge_vlan; +} + +static void +mlxsw_sp_bridge_vlan_destroy(struct mlxsw_sp_bridge_vlan *bridge_vlan) +{ + list_del(&bridge_vlan->list); + WARN_ON(!list_empty(&bridge_vlan->port_vlan_list)); + kfree(bridge_vlan); +} + +static struct mlxsw_sp_bridge_vlan * +mlxsw_sp_bridge_vlan_get(struct mlxsw_sp_bridge_port *bridge_port, u16 vid) +{ + struct mlxsw_sp_bridge_vlan *bridge_vlan; + + bridge_vlan = mlxsw_sp_bridge_vlan_find(bridge_port, vid); + if (bridge_vlan) + return bridge_vlan; + + return mlxsw_sp_bridge_vlan_create(bridge_port, vid); +} + +static void mlxsw_sp_bridge_vlan_put(struct mlxsw_sp_bridge_vlan *bridge_vlan) +{ + if (list_empty(&bridge_vlan->port_vlan_list)) + mlxsw_sp_bridge_vlan_destroy(bridge_vlan); +} + +static int +mlxsw_sp_port_bridge_vlan_stp_set(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_bridge_vlan *bridge_vlan, + u8 state) +{ + struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan; + + list_for_each_entry(mlxsw_sp_port_vlan, &bridge_vlan->port_vlan_list, + bridge_vlan_node) { + if (mlxsw_sp_port_vlan->mlxsw_sp_port != mlxsw_sp_port) + continue; + return mlxsw_sp_port_vid_stp_set(mlxsw_sp_port, + bridge_vlan->vid, state); + } + + return 0; +} + +static int mlxsw_sp_port_attr_stp_state_set(struct mlxsw_sp_port *mlxsw_sp_port, + struct net_device *orig_dev, + u8 state) +{ + struct mlxsw_sp_bridge_port *bridge_port; + struct mlxsw_sp_bridge_vlan *bridge_vlan; + int err; + + /* It's possible we failed to enslave the port, yet this + * operation is executed due to it being deferred. + */ + bridge_port = mlxsw_sp_bridge_port_find(mlxsw_sp_port->mlxsw_sp->bridge, + orig_dev); + if (!bridge_port) + return 0; + + list_for_each_entry(bridge_vlan, &bridge_port->vlans_list, list) { + err = mlxsw_sp_port_bridge_vlan_stp_set(mlxsw_sp_port, + bridge_vlan, state); + if (err) + goto err_port_bridge_vlan_stp_set; + } + + bridge_port->stp_state = state; + + return 0; + +err_port_bridge_vlan_stp_set: + list_for_each_entry_continue_reverse(bridge_vlan, + &bridge_port->vlans_list, list) + mlxsw_sp_port_bridge_vlan_stp_set(mlxsw_sp_port, bridge_vlan, + bridge_port->stp_state); + return err; +} + +static int +mlxsw_sp_port_bridge_vlan_flood_set(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_bridge_vlan *bridge_vlan, + enum mlxsw_sp_flood_type packet_type, + bool member) +{ + struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan; + + list_for_each_entry(mlxsw_sp_port_vlan, &bridge_vlan->port_vlan_list, + bridge_vlan_node) { + if (mlxsw_sp_port_vlan->mlxsw_sp_port != mlxsw_sp_port) + continue; + return mlxsw_sp_fid_flood_set(mlxsw_sp_port_vlan->fid, + packet_type, + mlxsw_sp_port->local_port, + member); + } + + return 0; +} + +static int +mlxsw_sp_bridge_port_flood_table_set(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_bridge_port *bridge_port, + enum mlxsw_sp_flood_type packet_type, + bool member) +{ + struct mlxsw_sp_bridge_vlan *bridge_vlan; + int err; + + list_for_each_entry(bridge_vlan, &bridge_port->vlans_list, list) { + err = mlxsw_sp_port_bridge_vlan_flood_set(mlxsw_sp_port, + bridge_vlan, + packet_type, + member); + if (err) + goto err_port_bridge_vlan_flood_set; + } + + return 0; + +err_port_bridge_vlan_flood_set: + list_for_each_entry_continue_reverse(bridge_vlan, + &bridge_port->vlans_list, list) + mlxsw_sp_port_bridge_vlan_flood_set(mlxsw_sp_port, bridge_vlan, + packet_type, !member); + return err; +} + +static int +mlxsw_sp_bridge_vlans_flood_set(struct mlxsw_sp_bridge_vlan *bridge_vlan, + enum mlxsw_sp_flood_type packet_type, + bool member) +{ + struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan; + int err; + + list_for_each_entry(mlxsw_sp_port_vlan, &bridge_vlan->port_vlan_list, + bridge_vlan_node) { + u16 local_port = mlxsw_sp_port_vlan->mlxsw_sp_port->local_port; + + err = mlxsw_sp_fid_flood_set(mlxsw_sp_port_vlan->fid, + packet_type, local_port, member); + if (err) + goto err_fid_flood_set; + } + + return 0; + +err_fid_flood_set: + list_for_each_entry_continue_reverse(mlxsw_sp_port_vlan, + &bridge_vlan->port_vlan_list, + list) { + u16 local_port = mlxsw_sp_port_vlan->mlxsw_sp_port->local_port; + + mlxsw_sp_fid_flood_set(mlxsw_sp_port_vlan->fid, packet_type, + local_port, !member); + } + + return err; +} + +static int +mlxsw_sp_bridge_ports_flood_table_set(struct mlxsw_sp_bridge_port *bridge_port, + enum mlxsw_sp_flood_type packet_type, + bool member) +{ + struct mlxsw_sp_bridge_vlan *bridge_vlan; + int err; + + list_for_each_entry(bridge_vlan, &bridge_port->vlans_list, list) { + err = mlxsw_sp_bridge_vlans_flood_set(bridge_vlan, packet_type, + member); + if (err) + goto err_bridge_vlans_flood_set; + } + + return 0; + +err_bridge_vlans_flood_set: + list_for_each_entry_continue_reverse(bridge_vlan, + &bridge_port->vlans_list, list) + mlxsw_sp_bridge_vlans_flood_set(bridge_vlan, packet_type, + !member); + return err; +} + +static int +mlxsw_sp_port_bridge_vlan_learning_set(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_bridge_vlan *bridge_vlan, + bool set) +{ + struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan; + u16 vid = bridge_vlan->vid; + + list_for_each_entry(mlxsw_sp_port_vlan, &bridge_vlan->port_vlan_list, + bridge_vlan_node) { + if (mlxsw_sp_port_vlan->mlxsw_sp_port != mlxsw_sp_port) + continue; + return mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, set); + } + + return 0; +} + +static int +mlxsw_sp_bridge_port_learning_set(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_bridge_port *bridge_port, + bool set) +{ + struct mlxsw_sp_bridge_vlan *bridge_vlan; + int err; + + list_for_each_entry(bridge_vlan, &bridge_port->vlans_list, list) { + err = mlxsw_sp_port_bridge_vlan_learning_set(mlxsw_sp_port, + bridge_vlan, set); + if (err) + goto err_port_bridge_vlan_learning_set; + } + + return 0; + +err_port_bridge_vlan_learning_set: + list_for_each_entry_continue_reverse(bridge_vlan, + &bridge_port->vlans_list, list) + mlxsw_sp_port_bridge_vlan_learning_set(mlxsw_sp_port, + bridge_vlan, !set); + return err; +} + +static int +mlxsw_sp_port_attr_br_pre_flags_set(struct mlxsw_sp_port *mlxsw_sp_port, + struct switchdev_brport_flags flags) +{ + if (flags.mask & ~(BR_LEARNING | BR_FLOOD | BR_MCAST_FLOOD)) + return -EINVAL; + + return 0; +} + +static int mlxsw_sp_port_attr_br_flags_set(struct mlxsw_sp_port *mlxsw_sp_port, + struct net_device *orig_dev, + struct switchdev_brport_flags flags) +{ + struct mlxsw_sp_bridge_port *bridge_port; + int err; + + bridge_port = mlxsw_sp_bridge_port_find(mlxsw_sp_port->mlxsw_sp->bridge, + orig_dev); + if (!bridge_port) + return 0; + + if (flags.mask & BR_FLOOD) { + err = mlxsw_sp_bridge_port_flood_table_set(mlxsw_sp_port, + bridge_port, + MLXSW_SP_FLOOD_TYPE_UC, + flags.val & BR_FLOOD); + if (err) + return err; + } + + if (flags.mask & BR_LEARNING) { + err = mlxsw_sp_bridge_port_learning_set(mlxsw_sp_port, + bridge_port, + flags.val & BR_LEARNING); + if (err) + return err; + } + + if (bridge_port->bridge_device->multicast_enabled) + goto out; + + if (flags.mask & BR_MCAST_FLOOD) { + err = mlxsw_sp_bridge_port_flood_table_set(mlxsw_sp_port, + bridge_port, + MLXSW_SP_FLOOD_TYPE_MC, + flags.val & BR_MCAST_FLOOD); + if (err) + return err; + } + +out: + memcpy(&bridge_port->flags, &flags.val, sizeof(flags.val)); + return 0; +} + +static int mlxsw_sp_ageing_set(struct mlxsw_sp *mlxsw_sp, u32 ageing_time) +{ + char sfdat_pl[MLXSW_REG_SFDAT_LEN]; + int err; + + mlxsw_reg_sfdat_pack(sfdat_pl, ageing_time); + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfdat), sfdat_pl); + if (err) + return err; + mlxsw_sp->bridge->ageing_time = ageing_time; + return 0; +} + +static int mlxsw_sp_port_attr_br_ageing_set(struct mlxsw_sp_port *mlxsw_sp_port, + unsigned long ageing_clock_t) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + unsigned long ageing_jiffies = clock_t_to_jiffies(ageing_clock_t); + u32 ageing_time = jiffies_to_msecs(ageing_jiffies) / 1000; + + if (ageing_time < MLXSW_SP_MIN_AGEING_TIME || + ageing_time > MLXSW_SP_MAX_AGEING_TIME) + return -ERANGE; + + return mlxsw_sp_ageing_set(mlxsw_sp, ageing_time); +} + +static int mlxsw_sp_port_attr_br_vlan_set(struct mlxsw_sp_port *mlxsw_sp_port, + struct net_device *orig_dev, + bool vlan_enabled) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + struct mlxsw_sp_bridge_device *bridge_device; + + bridge_device = mlxsw_sp_bridge_device_find(mlxsw_sp->bridge, orig_dev); + if (WARN_ON(!bridge_device)) + return -EINVAL; + + if (bridge_device->vlan_enabled == vlan_enabled) + return 0; + + netdev_err(bridge_device->dev, "VLAN filtering can't be changed for existing bridge\n"); + return -EINVAL; +} + +static int mlxsw_sp_port_attr_br_vlan_proto_set(struct mlxsw_sp_port *mlxsw_sp_port, + struct net_device *orig_dev, + u16 vlan_proto) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + struct mlxsw_sp_bridge_device *bridge_device; + + bridge_device = mlxsw_sp_bridge_device_find(mlxsw_sp->bridge, orig_dev); + if (WARN_ON(!bridge_device)) + return -EINVAL; + + netdev_err(bridge_device->dev, "VLAN protocol can't be changed on existing bridge\n"); + return -EINVAL; +} + +static int mlxsw_sp_port_attr_mrouter_set(struct mlxsw_sp_port *mlxsw_sp_port, + struct net_device *orig_dev, + bool is_port_mrouter) +{ + struct mlxsw_sp_bridge_port *bridge_port; + int err; + + bridge_port = mlxsw_sp_bridge_port_find(mlxsw_sp_port->mlxsw_sp->bridge, + orig_dev); + if (!bridge_port) + return 0; + + mlxsw_sp_port_mrouter_update_mdb(mlxsw_sp_port, bridge_port, + is_port_mrouter); + + if (!bridge_port->bridge_device->multicast_enabled) + goto out; + + err = mlxsw_sp_bridge_port_flood_table_set(mlxsw_sp_port, bridge_port, + MLXSW_SP_FLOOD_TYPE_MC, + is_port_mrouter); + if (err) + return err; + +out: + bridge_port->mrouter = is_port_mrouter; + return 0; +} + +static bool mlxsw_sp_mc_flood(const struct mlxsw_sp_bridge_port *bridge_port) +{ + const struct mlxsw_sp_bridge_device *bridge_device; + + bridge_device = bridge_port->bridge_device; + return bridge_device->multicast_enabled ? bridge_port->mrouter : + bridge_port->flags & BR_MCAST_FLOOD; +} + +static int mlxsw_sp_port_mc_disabled_set(struct mlxsw_sp_port *mlxsw_sp_port, + struct net_device *orig_dev, + bool mc_disabled) +{ + enum mlxsw_sp_flood_type packet_type = MLXSW_SP_FLOOD_TYPE_MC; + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + struct mlxsw_sp_bridge_device *bridge_device; + struct mlxsw_sp_bridge_port *bridge_port; + int err; + + /* It's possible we failed to enslave the port, yet this + * operation is executed due to it being deferred. + */ + bridge_device = mlxsw_sp_bridge_device_find(mlxsw_sp->bridge, orig_dev); + if (!bridge_device) + return 0; + + if (bridge_device->multicast_enabled == !mc_disabled) + return 0; + + bridge_device->multicast_enabled = !mc_disabled; + err = mlxsw_sp_bridge_mdb_mc_enable_sync(mlxsw_sp, bridge_device, + !mc_disabled); + if (err) + goto err_mc_enable_sync; + + list_for_each_entry(bridge_port, &bridge_device->ports_list, list) { + bool member = mlxsw_sp_mc_flood(bridge_port); + + err = mlxsw_sp_bridge_ports_flood_table_set(bridge_port, + packet_type, + member); + if (err) + goto err_flood_table_set; + } + + return 0; + +err_flood_table_set: + list_for_each_entry_continue_reverse(bridge_port, + &bridge_device->ports_list, list) { + bool member = mlxsw_sp_mc_flood(bridge_port); + + mlxsw_sp_bridge_ports_flood_table_set(bridge_port, packet_type, + !member); + } + mlxsw_sp_bridge_mdb_mc_enable_sync(mlxsw_sp, bridge_device, + mc_disabled); +err_mc_enable_sync: + bridge_device->multicast_enabled = mc_disabled; + return err; +} + +static struct mlxsw_sp_mdb_entry_port * +mlxsw_sp_mdb_entry_port_lookup(struct mlxsw_sp_mdb_entry *mdb_entry, + u16 local_port) +{ + struct mlxsw_sp_mdb_entry_port *mdb_entry_port; + + list_for_each_entry(mdb_entry_port, &mdb_entry->ports_list, list) { + if (mdb_entry_port->local_port == local_port) + return mdb_entry_port; + } + + return NULL; +} + +static struct mlxsw_sp_mdb_entry_port * +mlxsw_sp_mdb_entry_port_get(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_mdb_entry *mdb_entry, + u16 local_port) +{ + struct mlxsw_sp_mdb_entry_port *mdb_entry_port; + int err; + + mdb_entry_port = mlxsw_sp_mdb_entry_port_lookup(mdb_entry, local_port); + if (mdb_entry_port) { + if (mdb_entry_port->mrouter && + refcount_read(&mdb_entry_port->refcount) == 1) + mdb_entry->ports_count++; + + refcount_inc(&mdb_entry_port->refcount); + return mdb_entry_port; + } + + err = mlxsw_sp_pgt_entry_port_set(mlxsw_sp, mdb_entry->mid, + mdb_entry->key.fid, local_port, true); + if (err) + return ERR_PTR(err); + + mdb_entry_port = kzalloc(sizeof(*mdb_entry_port), GFP_KERNEL); + if (!mdb_entry_port) { + err = -ENOMEM; + goto err_mdb_entry_port_alloc; + } + + mdb_entry_port->local_port = local_port; + refcount_set(&mdb_entry_port->refcount, 1); + list_add(&mdb_entry_port->list, &mdb_entry->ports_list); + mdb_entry->ports_count++; + + return mdb_entry_port; + +err_mdb_entry_port_alloc: + mlxsw_sp_pgt_entry_port_set(mlxsw_sp, mdb_entry->mid, + mdb_entry->key.fid, local_port, false); + return ERR_PTR(err); +} + +static void +mlxsw_sp_mdb_entry_port_put(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_mdb_entry *mdb_entry, + u16 local_port, bool force) +{ + struct mlxsw_sp_mdb_entry_port *mdb_entry_port; + + mdb_entry_port = mlxsw_sp_mdb_entry_port_lookup(mdb_entry, local_port); + if (!mdb_entry_port) + return; + + if (!force && !refcount_dec_and_test(&mdb_entry_port->refcount)) { + if (mdb_entry_port->mrouter && + refcount_read(&mdb_entry_port->refcount) == 1) + mdb_entry->ports_count--; + return; + } + + mdb_entry->ports_count--; + list_del(&mdb_entry_port->list); + kfree(mdb_entry_port); + mlxsw_sp_pgt_entry_port_set(mlxsw_sp, mdb_entry->mid, + mdb_entry->key.fid, local_port, false); +} + +static __always_unused struct mlxsw_sp_mdb_entry_port * +mlxsw_sp_mdb_entry_mrouter_port_get(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_mdb_entry *mdb_entry, + u16 local_port) +{ + struct mlxsw_sp_mdb_entry_port *mdb_entry_port; + int err; + + mdb_entry_port = mlxsw_sp_mdb_entry_port_lookup(mdb_entry, local_port); + if (mdb_entry_port) { + if (!mdb_entry_port->mrouter) + refcount_inc(&mdb_entry_port->refcount); + return mdb_entry_port; + } + + err = mlxsw_sp_pgt_entry_port_set(mlxsw_sp, mdb_entry->mid, + mdb_entry->key.fid, local_port, true); + if (err) + return ERR_PTR(err); + + mdb_entry_port = kzalloc(sizeof(*mdb_entry_port), GFP_KERNEL); + if (!mdb_entry_port) { + err = -ENOMEM; + goto err_mdb_entry_port_alloc; + } + + mdb_entry_port->local_port = local_port; + refcount_set(&mdb_entry_port->refcount, 1); + mdb_entry_port->mrouter = true; + list_add(&mdb_entry_port->list, &mdb_entry->ports_list); + + return mdb_entry_port; + +err_mdb_entry_port_alloc: + mlxsw_sp_pgt_entry_port_set(mlxsw_sp, mdb_entry->mid, + mdb_entry->key.fid, local_port, false); + return ERR_PTR(err); +} + +static __always_unused void +mlxsw_sp_mdb_entry_mrouter_port_put(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_mdb_entry *mdb_entry, + u16 local_port) +{ + struct mlxsw_sp_mdb_entry_port *mdb_entry_port; + + mdb_entry_port = mlxsw_sp_mdb_entry_port_lookup(mdb_entry, local_port); + if (!mdb_entry_port) + return; + + if (!mdb_entry_port->mrouter) + return; + + mdb_entry_port->mrouter = false; + if (!refcount_dec_and_test(&mdb_entry_port->refcount)) + return; + + list_del(&mdb_entry_port->list); + kfree(mdb_entry_port); + mlxsw_sp_pgt_entry_port_set(mlxsw_sp, mdb_entry->mid, + mdb_entry->key.fid, local_port, false); +} + +static void +mlxsw_sp_bridge_mrouter_update_mdb(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_bridge_device *bridge_device, + bool add) +{ + u16 local_port = mlxsw_sp_router_port(mlxsw_sp); + struct mlxsw_sp_mdb_entry *mdb_entry; + + list_for_each_entry(mdb_entry, &bridge_device->mdb_list, list) { + if (add) + mlxsw_sp_mdb_entry_mrouter_port_get(mlxsw_sp, mdb_entry, + local_port); + else + mlxsw_sp_mdb_entry_mrouter_port_put(mlxsw_sp, mdb_entry, + local_port); + } +} + +static int +mlxsw_sp_port_attr_br_mrouter_set(struct mlxsw_sp_port *mlxsw_sp_port, + struct net_device *orig_dev, + bool is_mrouter) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + struct mlxsw_sp_bridge_device *bridge_device; + + /* It's possible we failed to enslave the port, yet this + * operation is executed due to it being deferred. + */ + bridge_device = mlxsw_sp_bridge_device_find(mlxsw_sp->bridge, orig_dev); + if (!bridge_device) + return 0; + + if (bridge_device->mrouter != is_mrouter) + mlxsw_sp_bridge_mrouter_update_mdb(mlxsw_sp, bridge_device, + is_mrouter); + bridge_device->mrouter = is_mrouter; + return 0; +} + +static int mlxsw_sp_port_attr_set(struct net_device *dev, const void *ctx, + const struct switchdev_attr *attr, + struct netlink_ext_ack *extack) +{ + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); + int err; + + switch (attr->id) { + case SWITCHDEV_ATTR_ID_PORT_STP_STATE: + err = mlxsw_sp_port_attr_stp_state_set(mlxsw_sp_port, + attr->orig_dev, + attr->u.stp_state); + break; + case SWITCHDEV_ATTR_ID_PORT_PRE_BRIDGE_FLAGS: + err = mlxsw_sp_port_attr_br_pre_flags_set(mlxsw_sp_port, + attr->u.brport_flags); + break; + case SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS: + err = mlxsw_sp_port_attr_br_flags_set(mlxsw_sp_port, + attr->orig_dev, + attr->u.brport_flags); + break; + case SWITCHDEV_ATTR_ID_BRIDGE_AGEING_TIME: + err = mlxsw_sp_port_attr_br_ageing_set(mlxsw_sp_port, + attr->u.ageing_time); + break; + case SWITCHDEV_ATTR_ID_BRIDGE_VLAN_FILTERING: + err = mlxsw_sp_port_attr_br_vlan_set(mlxsw_sp_port, + attr->orig_dev, + attr->u.vlan_filtering); + break; + case SWITCHDEV_ATTR_ID_BRIDGE_VLAN_PROTOCOL: + err = mlxsw_sp_port_attr_br_vlan_proto_set(mlxsw_sp_port, + attr->orig_dev, + attr->u.vlan_protocol); + break; + case SWITCHDEV_ATTR_ID_PORT_MROUTER: + err = mlxsw_sp_port_attr_mrouter_set(mlxsw_sp_port, + attr->orig_dev, + attr->u.mrouter); + break; + case SWITCHDEV_ATTR_ID_BRIDGE_MC_DISABLED: + err = mlxsw_sp_port_mc_disabled_set(mlxsw_sp_port, + attr->orig_dev, + attr->u.mc_disabled); + break; + case SWITCHDEV_ATTR_ID_BRIDGE_MROUTER: + err = mlxsw_sp_port_attr_br_mrouter_set(mlxsw_sp_port, + attr->orig_dev, + attr->u.mrouter); + break; + default: + err = -EOPNOTSUPP; + break; + } + + mlxsw_sp_span_respin(mlxsw_sp_port->mlxsw_sp); + + return err; +} + +static int +mlxsw_sp_port_vlan_fid_join(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan, + struct mlxsw_sp_bridge_port *bridge_port, + struct netlink_ext_ack *extack) +{ + struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port; + struct mlxsw_sp_bridge_device *bridge_device; + u16 local_port = mlxsw_sp_port->local_port; + u16 vid = mlxsw_sp_port_vlan->vid; + struct mlxsw_sp_fid *fid; + int err; + + bridge_device = bridge_port->bridge_device; + fid = bridge_device->ops->fid_get(bridge_device, vid, extack); + if (IS_ERR(fid)) + return PTR_ERR(fid); + + err = mlxsw_sp_fid_flood_set(fid, MLXSW_SP_FLOOD_TYPE_UC, local_port, + bridge_port->flags & BR_FLOOD); + if (err) + goto err_fid_uc_flood_set; + + err = mlxsw_sp_fid_flood_set(fid, MLXSW_SP_FLOOD_TYPE_MC, local_port, + mlxsw_sp_mc_flood(bridge_port)); + if (err) + goto err_fid_mc_flood_set; + + err = mlxsw_sp_fid_flood_set(fid, MLXSW_SP_FLOOD_TYPE_BC, local_port, + true); + if (err) + goto err_fid_bc_flood_set; + + err = mlxsw_sp_fid_port_vid_map(fid, mlxsw_sp_port, vid); + if (err) + goto err_fid_port_vid_map; + + mlxsw_sp_port_vlan->fid = fid; + + return 0; + +err_fid_port_vid_map: + mlxsw_sp_fid_flood_set(fid, MLXSW_SP_FLOOD_TYPE_BC, local_port, false); +err_fid_bc_flood_set: + mlxsw_sp_fid_flood_set(fid, MLXSW_SP_FLOOD_TYPE_MC, local_port, false); +err_fid_mc_flood_set: + mlxsw_sp_fid_flood_set(fid, MLXSW_SP_FLOOD_TYPE_UC, local_port, false); +err_fid_uc_flood_set: + mlxsw_sp_fid_put(fid); + return err; +} + +static void +mlxsw_sp_port_vlan_fid_leave(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan) +{ + struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port; + struct mlxsw_sp_fid *fid = mlxsw_sp_port_vlan->fid; + u16 local_port = mlxsw_sp_port->local_port; + u16 vid = mlxsw_sp_port_vlan->vid; + + mlxsw_sp_port_vlan->fid = NULL; + mlxsw_sp_fid_port_vid_unmap(fid, mlxsw_sp_port, vid); + mlxsw_sp_fid_flood_set(fid, MLXSW_SP_FLOOD_TYPE_BC, local_port, false); + mlxsw_sp_fid_flood_set(fid, MLXSW_SP_FLOOD_TYPE_MC, local_port, false); + mlxsw_sp_fid_flood_set(fid, MLXSW_SP_FLOOD_TYPE_UC, local_port, false); + mlxsw_sp_fid_put(fid); +} + +static u16 +mlxsw_sp_port_pvid_determine(const struct mlxsw_sp_port *mlxsw_sp_port, + u16 vid, bool is_pvid) +{ + if (is_pvid) + return vid; + else if (mlxsw_sp_port->pvid == vid) + return 0; /* Dis-allow untagged packets */ + else + return mlxsw_sp_port->pvid; +} + +static int +mlxsw_sp_port_vlan_bridge_join(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan, + struct mlxsw_sp_bridge_port *bridge_port, + struct netlink_ext_ack *extack) +{ + struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port; + struct mlxsw_sp_bridge_vlan *bridge_vlan; + u16 vid = mlxsw_sp_port_vlan->vid; + int err; + + /* No need to continue if only VLAN flags were changed */ + if (mlxsw_sp_port_vlan->bridge_port) + return 0; + + err = mlxsw_sp_port_vlan_fid_join(mlxsw_sp_port_vlan, bridge_port, + extack); + if (err) + return err; + + err = mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, + bridge_port->flags & BR_LEARNING); + if (err) + goto err_port_vid_learning_set; + + err = mlxsw_sp_port_vid_stp_set(mlxsw_sp_port, vid, + bridge_port->stp_state); + if (err) + goto err_port_vid_stp_set; + + bridge_vlan = mlxsw_sp_bridge_vlan_get(bridge_port, vid); + if (!bridge_vlan) { + err = -ENOMEM; + goto err_bridge_vlan_get; + } + + list_add(&mlxsw_sp_port_vlan->bridge_vlan_node, + &bridge_vlan->port_vlan_list); + + mlxsw_sp_bridge_port_get(mlxsw_sp_port->mlxsw_sp->bridge, + bridge_port->dev, extack); + mlxsw_sp_port_vlan->bridge_port = bridge_port; + + return 0; + +err_bridge_vlan_get: + mlxsw_sp_port_vid_stp_set(mlxsw_sp_port, vid, BR_STATE_DISABLED); +err_port_vid_stp_set: + mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, false); +err_port_vid_learning_set: + mlxsw_sp_port_vlan_fid_leave(mlxsw_sp_port_vlan); + return err; +} + +void +mlxsw_sp_port_vlan_bridge_leave(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan) +{ + struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port; + struct mlxsw_sp_fid *fid = mlxsw_sp_port_vlan->fid; + struct mlxsw_sp_bridge_vlan *bridge_vlan; + struct mlxsw_sp_bridge_port *bridge_port; + u16 vid = mlxsw_sp_port_vlan->vid; + bool last_port; + + if (WARN_ON(mlxsw_sp_fid_type(fid) != MLXSW_SP_FID_TYPE_8021Q && + mlxsw_sp_fid_type(fid) != MLXSW_SP_FID_TYPE_8021D)) + return; + + bridge_port = mlxsw_sp_port_vlan->bridge_port; + bridge_vlan = mlxsw_sp_bridge_vlan_find(bridge_port, vid); + last_port = list_is_singular(&bridge_vlan->port_vlan_list); + + list_del(&mlxsw_sp_port_vlan->bridge_vlan_node); + mlxsw_sp_bridge_vlan_put(bridge_vlan); + mlxsw_sp_port_vid_stp_set(mlxsw_sp_port, vid, BR_STATE_DISABLED); + mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, false); + if (last_port) + mlxsw_sp_bridge_port_fdb_flush(mlxsw_sp_port->mlxsw_sp, + bridge_port, + mlxsw_sp_fid_index(fid)); + + mlxsw_sp_bridge_port_mdb_flush(mlxsw_sp_port, bridge_port, + mlxsw_sp_fid_index(fid)); + + mlxsw_sp_port_vlan_fid_leave(mlxsw_sp_port_vlan); + + mlxsw_sp_bridge_port_put(mlxsw_sp_port->mlxsw_sp->bridge, bridge_port); + mlxsw_sp_port_vlan->bridge_port = NULL; +} + +static int +mlxsw_sp_bridge_port_vlan_add(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_bridge_port *bridge_port, + u16 vid, bool is_untagged, bool is_pvid, + struct netlink_ext_ack *extack) +{ + u16 pvid = mlxsw_sp_port_pvid_determine(mlxsw_sp_port, vid, is_pvid); + struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan; + u16 old_pvid = mlxsw_sp_port->pvid; + u16 proto; + int err; + + /* The only valid scenario in which a port-vlan already exists, is if + * the VLAN flags were changed and the port-vlan is associated with the + * correct bridge port + */ + mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_vid(mlxsw_sp_port, vid); + if (mlxsw_sp_port_vlan && + mlxsw_sp_port_vlan->bridge_port != bridge_port) + return -EEXIST; + + if (!mlxsw_sp_port_vlan) { + mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_create(mlxsw_sp_port, + vid); + if (IS_ERR(mlxsw_sp_port_vlan)) + return PTR_ERR(mlxsw_sp_port_vlan); + } + + err = mlxsw_sp_port_vlan_set(mlxsw_sp_port, vid, vid, true, + is_untagged); + if (err) + goto err_port_vlan_set; + + br_vlan_get_proto(bridge_port->bridge_device->dev, &proto); + err = mlxsw_sp_port_pvid_set(mlxsw_sp_port, pvid, proto); + if (err) + goto err_port_pvid_set; + + err = mlxsw_sp_port_vlan_bridge_join(mlxsw_sp_port_vlan, bridge_port, + extack); + if (err) + goto err_port_vlan_bridge_join; + + return 0; + +err_port_vlan_bridge_join: + mlxsw_sp_port_pvid_set(mlxsw_sp_port, old_pvid, proto); +err_port_pvid_set: + mlxsw_sp_port_vlan_set(mlxsw_sp_port, vid, vid, false, false); +err_port_vlan_set: + mlxsw_sp_port_vlan_destroy(mlxsw_sp_port_vlan); + return err; +} + +static int +mlxsw_sp_br_ban_rif_pvid_change(struct mlxsw_sp *mlxsw_sp, + const struct net_device *br_dev, + const struct switchdev_obj_port_vlan *vlan) +{ + u16 pvid; + + pvid = mlxsw_sp_rif_vid(mlxsw_sp, br_dev); + if (!pvid) + return 0; + + if (vlan->flags & BRIDGE_VLAN_INFO_PVID) { + if (vlan->vid != pvid) { + netdev_err(br_dev, "Can't change PVID, it's used by router interface\n"); + return -EBUSY; + } + } else { + if (vlan->vid == pvid) { + netdev_err(br_dev, "Can't remove PVID, it's used by router interface\n"); + return -EBUSY; + } + } + + return 0; +} + +static int mlxsw_sp_port_vlans_add(struct mlxsw_sp_port *mlxsw_sp_port, + const struct switchdev_obj_port_vlan *vlan, + struct netlink_ext_ack *extack) +{ + bool flag_untagged = vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED; + bool flag_pvid = vlan->flags & BRIDGE_VLAN_INFO_PVID; + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + struct net_device *orig_dev = vlan->obj.orig_dev; + struct mlxsw_sp_bridge_port *bridge_port; + + if (netif_is_bridge_master(orig_dev)) { + int err = 0; + + if (br_vlan_enabled(orig_dev)) + err = mlxsw_sp_br_ban_rif_pvid_change(mlxsw_sp, + orig_dev, vlan); + if (!err) + err = -EOPNOTSUPP; + return err; + } + + bridge_port = mlxsw_sp_bridge_port_find(mlxsw_sp->bridge, orig_dev); + if (WARN_ON(!bridge_port)) + return -EINVAL; + + if (!bridge_port->bridge_device->vlan_enabled) + return 0; + + return mlxsw_sp_bridge_port_vlan_add(mlxsw_sp_port, bridge_port, + vlan->vid, flag_untagged, + flag_pvid, extack); +} + +static enum mlxsw_reg_sfdf_flush_type mlxsw_sp_fdb_flush_type(bool lagged) +{ + return lagged ? MLXSW_REG_SFDF_FLUSH_PER_LAG_AND_FID : + MLXSW_REG_SFDF_FLUSH_PER_PORT_AND_FID; +} + +static int +mlxsw_sp_bridge_port_fdb_flush(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_bridge_port *bridge_port, + u16 fid_index) +{ + bool lagged = bridge_port->lagged; + char sfdf_pl[MLXSW_REG_SFDF_LEN]; + u16 system_port; + + system_port = lagged ? bridge_port->lag_id : bridge_port->system_port; + mlxsw_reg_sfdf_pack(sfdf_pl, mlxsw_sp_fdb_flush_type(lagged)); + mlxsw_reg_sfdf_fid_set(sfdf_pl, fid_index); + mlxsw_reg_sfdf_port_fid_system_port_set(sfdf_pl, system_port); + + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfdf), sfdf_pl); +} + +static enum mlxsw_reg_sfd_rec_policy mlxsw_sp_sfd_rec_policy(bool dynamic) +{ + return dynamic ? MLXSW_REG_SFD_REC_POLICY_DYNAMIC_ENTRY_INGRESS : + MLXSW_REG_SFD_REC_POLICY_DYNAMIC_ENTRY_MLAG; +} + +static enum mlxsw_reg_sfd_op mlxsw_sp_sfd_op(bool adding) +{ + return adding ? MLXSW_REG_SFD_OP_WRITE_EDIT : + MLXSW_REG_SFD_OP_WRITE_REMOVE; +} + +static int +mlxsw_sp_port_fdb_tun_uc_op4(struct mlxsw_sp *mlxsw_sp, bool dynamic, + const char *mac, u16 fid, __be32 addr, bool adding) +{ + char *sfd_pl; + u8 num_rec; + u32 uip; + int err; + + sfd_pl = kmalloc(MLXSW_REG_SFD_LEN, GFP_KERNEL); + if (!sfd_pl) + return -ENOMEM; + + uip = be32_to_cpu(addr); + mlxsw_reg_sfd_pack(sfd_pl, mlxsw_sp_sfd_op(adding), 0); + mlxsw_reg_sfd_uc_tunnel_pack4(sfd_pl, 0, + mlxsw_sp_sfd_rec_policy(dynamic), mac, + fid, MLXSW_REG_SFD_REC_ACTION_NOP, uip); + num_rec = mlxsw_reg_sfd_num_rec_get(sfd_pl); + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfd), sfd_pl); + if (err) + goto out; + + if (num_rec != mlxsw_reg_sfd_num_rec_get(sfd_pl)) + err = -EBUSY; + +out: + kfree(sfd_pl); + return err; +} + +static int mlxsw_sp_port_fdb_tun_uc_op6_sfd_write(struct mlxsw_sp *mlxsw_sp, + const char *mac, u16 fid, + u32 kvdl_index, bool adding) +{ + char *sfd_pl; + u8 num_rec; + int err; + + sfd_pl = kmalloc(MLXSW_REG_SFD_LEN, GFP_KERNEL); + if (!sfd_pl) + return -ENOMEM; + + mlxsw_reg_sfd_pack(sfd_pl, mlxsw_sp_sfd_op(adding), 0); + mlxsw_reg_sfd_uc_tunnel_pack6(sfd_pl, 0, mac, fid, + MLXSW_REG_SFD_REC_ACTION_NOP, kvdl_index); + num_rec = mlxsw_reg_sfd_num_rec_get(sfd_pl); + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfd), sfd_pl); + if (err) + goto out; + + if (num_rec != mlxsw_reg_sfd_num_rec_get(sfd_pl)) + err = -EBUSY; + +out: + kfree(sfd_pl); + return err; +} + +static int mlxsw_sp_port_fdb_tun_uc_op6_add(struct mlxsw_sp *mlxsw_sp, + const char *mac, u16 fid, + const struct in6_addr *addr) +{ + u32 kvdl_index; + int err; + + err = mlxsw_sp_nve_ipv6_addr_kvdl_set(mlxsw_sp, addr, &kvdl_index); + if (err) + return err; + + err = mlxsw_sp_port_fdb_tun_uc_op6_sfd_write(mlxsw_sp, mac, fid, + kvdl_index, true); + if (err) + goto err_sfd_write; + + err = mlxsw_sp_nve_ipv6_addr_map_replace(mlxsw_sp, mac, fid, addr); + if (err) + /* Replace can fail only for creating new mapping, so removing + * the FDB entry in the error path is OK. + */ + goto err_addr_replace; + + return 0; + +err_addr_replace: + mlxsw_sp_port_fdb_tun_uc_op6_sfd_write(mlxsw_sp, mac, fid, kvdl_index, + false); +err_sfd_write: + mlxsw_sp_nve_ipv6_addr_kvdl_unset(mlxsw_sp, addr); + return err; +} + +static void mlxsw_sp_port_fdb_tun_uc_op6_del(struct mlxsw_sp *mlxsw_sp, + const char *mac, u16 fid, + const struct in6_addr *addr) +{ + mlxsw_sp_nve_ipv6_addr_map_del(mlxsw_sp, mac, fid); + mlxsw_sp_port_fdb_tun_uc_op6_sfd_write(mlxsw_sp, mac, fid, 0, false); + mlxsw_sp_nve_ipv6_addr_kvdl_unset(mlxsw_sp, addr); +} + +static int +mlxsw_sp_port_fdb_tun_uc_op6(struct mlxsw_sp *mlxsw_sp, const char *mac, + u16 fid, const struct in6_addr *addr, bool adding) +{ + if (adding) + return mlxsw_sp_port_fdb_tun_uc_op6_add(mlxsw_sp, mac, fid, + addr); + + mlxsw_sp_port_fdb_tun_uc_op6_del(mlxsw_sp, mac, fid, addr); + return 0; +} + +static int mlxsw_sp_port_fdb_tunnel_uc_op(struct mlxsw_sp *mlxsw_sp, + const char *mac, u16 fid, + enum mlxsw_sp_l3proto proto, + const union mlxsw_sp_l3addr *addr, + bool adding, bool dynamic) +{ + switch (proto) { + case MLXSW_SP_L3_PROTO_IPV4: + return mlxsw_sp_port_fdb_tun_uc_op4(mlxsw_sp, dynamic, mac, fid, + addr->addr4, adding); + case MLXSW_SP_L3_PROTO_IPV6: + return mlxsw_sp_port_fdb_tun_uc_op6(mlxsw_sp, mac, fid, + &addr->addr6, adding); + default: + WARN_ON(1); + return -EOPNOTSUPP; + } +} + +static int __mlxsw_sp_port_fdb_uc_op(struct mlxsw_sp *mlxsw_sp, u16 local_port, + const char *mac, u16 fid, u16 vid, + bool adding, + enum mlxsw_reg_sfd_rec_action action, + enum mlxsw_reg_sfd_rec_policy policy) +{ + char *sfd_pl; + u8 num_rec; + int err; + + sfd_pl = kmalloc(MLXSW_REG_SFD_LEN, GFP_KERNEL); + if (!sfd_pl) + return -ENOMEM; + + mlxsw_reg_sfd_pack(sfd_pl, mlxsw_sp_sfd_op(adding), 0); + mlxsw_reg_sfd_uc_pack(sfd_pl, 0, policy, mac, fid, vid, action, + local_port); + num_rec = mlxsw_reg_sfd_num_rec_get(sfd_pl); + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfd), sfd_pl); + if (err) + goto out; + + if (num_rec != mlxsw_reg_sfd_num_rec_get(sfd_pl)) + err = -EBUSY; + +out: + kfree(sfd_pl); + return err; +} + +static int mlxsw_sp_port_fdb_uc_op(struct mlxsw_sp *mlxsw_sp, u16 local_port, + const char *mac, u16 fid, u16 vid, + bool adding, bool dynamic) +{ + return __mlxsw_sp_port_fdb_uc_op(mlxsw_sp, local_port, mac, fid, vid, + adding, MLXSW_REG_SFD_REC_ACTION_NOP, + mlxsw_sp_sfd_rec_policy(dynamic)); +} + +int mlxsw_sp_rif_fdb_op(struct mlxsw_sp *mlxsw_sp, const char *mac, u16 fid, + bool adding) +{ + return __mlxsw_sp_port_fdb_uc_op(mlxsw_sp, 0, mac, fid, 0, adding, + MLXSW_REG_SFD_REC_ACTION_FORWARD_IP_ROUTER, + MLXSW_REG_SFD_REC_POLICY_STATIC_ENTRY); +} + +static int mlxsw_sp_port_fdb_uc_lag_op(struct mlxsw_sp *mlxsw_sp, u16 lag_id, + const char *mac, u16 fid, u16 lag_vid, + bool adding, bool dynamic) +{ + char *sfd_pl; + u8 num_rec; + int err; + + sfd_pl = kmalloc(MLXSW_REG_SFD_LEN, GFP_KERNEL); + if (!sfd_pl) + return -ENOMEM; + + mlxsw_reg_sfd_pack(sfd_pl, mlxsw_sp_sfd_op(adding), 0); + mlxsw_reg_sfd_uc_lag_pack(sfd_pl, 0, mlxsw_sp_sfd_rec_policy(dynamic), + mac, fid, MLXSW_REG_SFD_REC_ACTION_NOP, + lag_vid, lag_id); + num_rec = mlxsw_reg_sfd_num_rec_get(sfd_pl); + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfd), sfd_pl); + if (err) + goto out; + + if (num_rec != mlxsw_reg_sfd_num_rec_get(sfd_pl)) + err = -EBUSY; + +out: + kfree(sfd_pl); + return err; +} + +static int +mlxsw_sp_port_fdb_set(struct mlxsw_sp_port *mlxsw_sp_port, + struct switchdev_notifier_fdb_info *fdb_info, bool adding) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + struct net_device *orig_dev = fdb_info->info.dev; + struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan; + struct mlxsw_sp_bridge_device *bridge_device; + struct mlxsw_sp_bridge_port *bridge_port; + u16 fid_index, vid; + + bridge_port = mlxsw_sp_bridge_port_find(mlxsw_sp->bridge, orig_dev); + if (!bridge_port) + return -EINVAL; + + bridge_device = bridge_port->bridge_device; + mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_bridge(mlxsw_sp_port, + bridge_device, + fdb_info->vid); + if (!mlxsw_sp_port_vlan) + return 0; + + fid_index = mlxsw_sp_fid_index(mlxsw_sp_port_vlan->fid); + vid = mlxsw_sp_port_vlan->vid; + + if (!bridge_port->lagged) + return mlxsw_sp_port_fdb_uc_op(mlxsw_sp, + bridge_port->system_port, + fdb_info->addr, fid_index, vid, + adding, false); + else + return mlxsw_sp_port_fdb_uc_lag_op(mlxsw_sp, + bridge_port->lag_id, + fdb_info->addr, fid_index, + vid, adding, false); +} + +static int mlxsw_sp_mdb_entry_write(struct mlxsw_sp *mlxsw_sp, + const struct mlxsw_sp_mdb_entry *mdb_entry, + bool adding) +{ + char *sfd_pl; + u8 num_rec; + int err; + + sfd_pl = kmalloc(MLXSW_REG_SFD_LEN, GFP_KERNEL); + if (!sfd_pl) + return -ENOMEM; + + mlxsw_reg_sfd_pack(sfd_pl, mlxsw_sp_sfd_op(adding), 0); + mlxsw_reg_sfd_mc_pack(sfd_pl, 0, mdb_entry->key.addr, + mdb_entry->key.fid, MLXSW_REG_SFD_REC_ACTION_NOP, + mdb_entry->mid); + num_rec = mlxsw_reg_sfd_num_rec_get(sfd_pl); + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfd), sfd_pl); + if (err) + goto out; + + if (num_rec != mlxsw_reg_sfd_num_rec_get(sfd_pl)) + err = -EBUSY; + +out: + kfree(sfd_pl); + return err; +} + +static void +mlxsw_sp_bridge_port_get_ports_bitmap(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_bridge_port *bridge_port, + struct mlxsw_sp_ports_bitmap *ports_bm) +{ + struct mlxsw_sp_port *mlxsw_sp_port; + u64 max_lag_members, i; + int lag_id; + + if (!bridge_port->lagged) { + set_bit(bridge_port->system_port, ports_bm->bitmap); + } else { + max_lag_members = MLXSW_CORE_RES_GET(mlxsw_sp->core, + MAX_LAG_MEMBERS); + lag_id = bridge_port->lag_id; + for (i = 0; i < max_lag_members; i++) { + mlxsw_sp_port = mlxsw_sp_port_lagged_get(mlxsw_sp, + lag_id, i); + if (mlxsw_sp_port) + set_bit(mlxsw_sp_port->local_port, + ports_bm->bitmap); + } + } +} + +static void +mlxsw_sp_mc_get_mrouters_bitmap(struct mlxsw_sp_ports_bitmap *flood_bm, + struct mlxsw_sp_bridge_device *bridge_device, + struct mlxsw_sp *mlxsw_sp) +{ + struct mlxsw_sp_bridge_port *bridge_port; + + list_for_each_entry(bridge_port, &bridge_device->ports_list, list) { + if (bridge_port->mrouter) { + mlxsw_sp_bridge_port_get_ports_bitmap(mlxsw_sp, + bridge_port, + flood_bm); + } + } +} + +static int mlxsw_sp_mc_mdb_mrouters_add(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_ports_bitmap *ports_bm, + struct mlxsw_sp_mdb_entry *mdb_entry) +{ + struct mlxsw_sp_mdb_entry_port *mdb_entry_port; + unsigned int nbits = ports_bm->nbits; + int i; + + for_each_set_bit(i, ports_bm->bitmap, nbits) { + mdb_entry_port = mlxsw_sp_mdb_entry_mrouter_port_get(mlxsw_sp, + mdb_entry, + i); + if (IS_ERR(mdb_entry_port)) { + nbits = i; + goto err_mrouter_port_get; + } + } + + return 0; + +err_mrouter_port_get: + for_each_set_bit(i, ports_bm->bitmap, nbits) + mlxsw_sp_mdb_entry_mrouter_port_put(mlxsw_sp, mdb_entry, i); + return PTR_ERR(mdb_entry_port); +} + +static void mlxsw_sp_mc_mdb_mrouters_del(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_ports_bitmap *ports_bm, + struct mlxsw_sp_mdb_entry *mdb_entry) +{ + int i; + + for_each_set_bit(i, ports_bm->bitmap, ports_bm->nbits) + mlxsw_sp_mdb_entry_mrouter_port_put(mlxsw_sp, mdb_entry, i); +} + +static int +mlxsw_sp_mc_mdb_mrouters_set(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_bridge_device *bridge_device, + struct mlxsw_sp_mdb_entry *mdb_entry, bool add) +{ + struct mlxsw_sp_ports_bitmap ports_bm; + int err; + + err = mlxsw_sp_port_bitmap_init(mlxsw_sp, &ports_bm); + if (err) + return err; + + mlxsw_sp_mc_get_mrouters_bitmap(&ports_bm, bridge_device, mlxsw_sp); + + if (add) + err = mlxsw_sp_mc_mdb_mrouters_add(mlxsw_sp, &ports_bm, + mdb_entry); + else + mlxsw_sp_mc_mdb_mrouters_del(mlxsw_sp, &ports_bm, mdb_entry); + + mlxsw_sp_port_bitmap_fini(&ports_bm); + return err; +} + +static struct mlxsw_sp_mdb_entry * +mlxsw_sp_mc_mdb_entry_init(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_bridge_device *bridge_device, + const unsigned char *addr, u16 fid, u16 local_port) +{ + struct mlxsw_sp_mdb_entry_port *mdb_entry_port; + struct mlxsw_sp_mdb_entry *mdb_entry; + int err; + + mdb_entry = kzalloc(sizeof(*mdb_entry), GFP_KERNEL); + if (!mdb_entry) + return ERR_PTR(-ENOMEM); + + ether_addr_copy(mdb_entry->key.addr, addr); + mdb_entry->key.fid = fid; + err = mlxsw_sp_pgt_mid_alloc(mlxsw_sp, &mdb_entry->mid); + if (err) + goto err_pgt_mid_alloc; + + INIT_LIST_HEAD(&mdb_entry->ports_list); + + err = mlxsw_sp_mc_mdb_mrouters_set(mlxsw_sp, bridge_device, mdb_entry, + true); + if (err) + goto err_mdb_mrouters_set; + + mdb_entry_port = mlxsw_sp_mdb_entry_port_get(mlxsw_sp, mdb_entry, + local_port); + if (IS_ERR(mdb_entry_port)) { + err = PTR_ERR(mdb_entry_port); + goto err_mdb_entry_port_get; + } + + if (bridge_device->multicast_enabled) { + err = mlxsw_sp_mdb_entry_write(mlxsw_sp, mdb_entry, true); + if (err) + goto err_mdb_entry_write; + } + + err = rhashtable_insert_fast(&bridge_device->mdb_ht, + &mdb_entry->ht_node, + mlxsw_sp_mdb_ht_params); + if (err) + goto err_rhashtable_insert; + + list_add_tail(&mdb_entry->list, &bridge_device->mdb_list); + + return mdb_entry; + +err_rhashtable_insert: + if (bridge_device->multicast_enabled) + mlxsw_sp_mdb_entry_write(mlxsw_sp, mdb_entry, false); +err_mdb_entry_write: + mlxsw_sp_mdb_entry_port_put(mlxsw_sp, mdb_entry, local_port, false); +err_mdb_entry_port_get: + mlxsw_sp_mc_mdb_mrouters_set(mlxsw_sp, bridge_device, mdb_entry, false); +err_mdb_mrouters_set: + mlxsw_sp_pgt_mid_free(mlxsw_sp, mdb_entry->mid); +err_pgt_mid_alloc: + kfree(mdb_entry); + return ERR_PTR(err); +} + +static void +mlxsw_sp_mc_mdb_entry_fini(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_mdb_entry *mdb_entry, + struct mlxsw_sp_bridge_device *bridge_device, + u16 local_port, bool force) +{ + list_del(&mdb_entry->list); + rhashtable_remove_fast(&bridge_device->mdb_ht, &mdb_entry->ht_node, + mlxsw_sp_mdb_ht_params); + if (bridge_device->multicast_enabled) + mlxsw_sp_mdb_entry_write(mlxsw_sp, mdb_entry, false); + mlxsw_sp_mdb_entry_port_put(mlxsw_sp, mdb_entry, local_port, force); + mlxsw_sp_mc_mdb_mrouters_set(mlxsw_sp, bridge_device, mdb_entry, false); + WARN_ON(!list_empty(&mdb_entry->ports_list)); + mlxsw_sp_pgt_mid_free(mlxsw_sp, mdb_entry->mid); + kfree(mdb_entry); +} + +static struct mlxsw_sp_mdb_entry * +mlxsw_sp_mc_mdb_entry_get(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_bridge_device *bridge_device, + const unsigned char *addr, u16 fid, u16 local_port) +{ + struct mlxsw_sp_mdb_entry_key key = {}; + struct mlxsw_sp_mdb_entry *mdb_entry; + + ether_addr_copy(key.addr, addr); + key.fid = fid; + mdb_entry = rhashtable_lookup_fast(&bridge_device->mdb_ht, &key, + mlxsw_sp_mdb_ht_params); + if (mdb_entry) { + struct mlxsw_sp_mdb_entry_port *mdb_entry_port; + + mdb_entry_port = mlxsw_sp_mdb_entry_port_get(mlxsw_sp, + mdb_entry, + local_port); + if (IS_ERR(mdb_entry_port)) + return ERR_CAST(mdb_entry_port); + + return mdb_entry; + } + + return mlxsw_sp_mc_mdb_entry_init(mlxsw_sp, bridge_device, addr, fid, + local_port); +} + +static bool +mlxsw_sp_mc_mdb_entry_remove(struct mlxsw_sp_mdb_entry *mdb_entry, + struct mlxsw_sp_mdb_entry_port *removed_entry_port, + bool force) +{ + if (mdb_entry->ports_count > 1) + return false; + + if (force) + return true; + + if (!removed_entry_port->mrouter && + refcount_read(&removed_entry_port->refcount) > 1) + return false; + + if (removed_entry_port->mrouter && + refcount_read(&removed_entry_port->refcount) > 2) + return false; + + return true; +} + +static void +mlxsw_sp_mc_mdb_entry_put(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_bridge_device *bridge_device, + struct mlxsw_sp_mdb_entry *mdb_entry, u16 local_port, + bool force) +{ + struct mlxsw_sp_mdb_entry_port *mdb_entry_port; + + mdb_entry_port = mlxsw_sp_mdb_entry_port_lookup(mdb_entry, local_port); + if (!mdb_entry_port) + return; + + /* Avoid a temporary situation in which the MDB entry points to an empty + * PGT entry, as otherwise packets will be temporarily dropped instead + * of being flooded. Instead, in this situation, call + * mlxsw_sp_mc_mdb_entry_fini(), which first deletes the MDB entry and + * then releases the PGT entry. + */ + if (mlxsw_sp_mc_mdb_entry_remove(mdb_entry, mdb_entry_port, force)) + mlxsw_sp_mc_mdb_entry_fini(mlxsw_sp, mdb_entry, bridge_device, + local_port, force); + else + mlxsw_sp_mdb_entry_port_put(mlxsw_sp, mdb_entry, local_port, + force); +} + +static int mlxsw_sp_port_mdb_add(struct mlxsw_sp_port *mlxsw_sp_port, + const struct switchdev_obj_port_mdb *mdb) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + struct net_device *orig_dev = mdb->obj.orig_dev; + struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan; + struct mlxsw_sp_bridge_device *bridge_device; + struct mlxsw_sp_bridge_port *bridge_port; + struct mlxsw_sp_mdb_entry *mdb_entry; + u16 fid_index; + + bridge_port = mlxsw_sp_bridge_port_find(mlxsw_sp->bridge, orig_dev); + if (!bridge_port) + return 0; + + bridge_device = bridge_port->bridge_device; + mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_bridge(mlxsw_sp_port, + bridge_device, + mdb->vid); + if (!mlxsw_sp_port_vlan) + return 0; + + fid_index = mlxsw_sp_fid_index(mlxsw_sp_port_vlan->fid); + + mdb_entry = mlxsw_sp_mc_mdb_entry_get(mlxsw_sp, bridge_device, + mdb->addr, fid_index, + mlxsw_sp_port->local_port); + if (IS_ERR(mdb_entry)) + return PTR_ERR(mdb_entry); + + return 0; +} + +static int +mlxsw_sp_bridge_mdb_mc_enable_sync(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_bridge_device *bridge_device, + bool mc_enabled) +{ + struct mlxsw_sp_mdb_entry *mdb_entry; + int err; + + list_for_each_entry(mdb_entry, &bridge_device->mdb_list, list) { + err = mlxsw_sp_mdb_entry_write(mlxsw_sp, mdb_entry, mc_enabled); + if (err) + goto err_mdb_entry_write; + } + return 0; + +err_mdb_entry_write: + list_for_each_entry_continue_reverse(mdb_entry, + &bridge_device->mdb_list, list) + mlxsw_sp_mdb_entry_write(mlxsw_sp, mdb_entry, !mc_enabled); + return err; +} + +static void +mlxsw_sp_port_mrouter_update_mdb(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_bridge_port *bridge_port, + bool add) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + struct mlxsw_sp_bridge_device *bridge_device; + u16 local_port = mlxsw_sp_port->local_port; + struct mlxsw_sp_mdb_entry *mdb_entry; + + bridge_device = bridge_port->bridge_device; + + list_for_each_entry(mdb_entry, &bridge_device->mdb_list, list) { + if (add) + mlxsw_sp_mdb_entry_mrouter_port_get(mlxsw_sp, mdb_entry, + local_port); + else + mlxsw_sp_mdb_entry_mrouter_port_put(mlxsw_sp, mdb_entry, + local_port); + } +} + +static int mlxsw_sp_port_obj_add(struct net_device *dev, const void *ctx, + const struct switchdev_obj *obj, + struct netlink_ext_ack *extack) +{ + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); + const struct switchdev_obj_port_vlan *vlan; + int err = 0; + + switch (obj->id) { + case SWITCHDEV_OBJ_ID_PORT_VLAN: + vlan = SWITCHDEV_OBJ_PORT_VLAN(obj); + + err = mlxsw_sp_port_vlans_add(mlxsw_sp_port, vlan, extack); + + /* The event is emitted before the changes are actually + * applied to the bridge. Therefore schedule the respin + * call for later, so that the respin logic sees the + * updated bridge state. + */ + mlxsw_sp_span_respin(mlxsw_sp_port->mlxsw_sp); + break; + case SWITCHDEV_OBJ_ID_PORT_MDB: + err = mlxsw_sp_port_mdb_add(mlxsw_sp_port, + SWITCHDEV_OBJ_PORT_MDB(obj)); + break; + default: + err = -EOPNOTSUPP; + break; + } + + return err; +} + +static void +mlxsw_sp_bridge_port_vlan_del(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_bridge_port *bridge_port, u16 vid) +{ + u16 pvid = mlxsw_sp_port->pvid == vid ? 0 : mlxsw_sp_port->pvid; + struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan; + u16 proto; + + mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_vid(mlxsw_sp_port, vid); + if (WARN_ON(!mlxsw_sp_port_vlan)) + return; + + mlxsw_sp_port_vlan_bridge_leave(mlxsw_sp_port_vlan); + br_vlan_get_proto(bridge_port->bridge_device->dev, &proto); + mlxsw_sp_port_pvid_set(mlxsw_sp_port, pvid, proto); + mlxsw_sp_port_vlan_set(mlxsw_sp_port, vid, vid, false, false); + mlxsw_sp_port_vlan_destroy(mlxsw_sp_port_vlan); +} + +static int mlxsw_sp_port_vlans_del(struct mlxsw_sp_port *mlxsw_sp_port, + const struct switchdev_obj_port_vlan *vlan) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + struct net_device *orig_dev = vlan->obj.orig_dev; + struct mlxsw_sp_bridge_port *bridge_port; + + if (netif_is_bridge_master(orig_dev)) + return -EOPNOTSUPP; + + bridge_port = mlxsw_sp_bridge_port_find(mlxsw_sp->bridge, orig_dev); + if (WARN_ON(!bridge_port)) + return -EINVAL; + + if (!bridge_port->bridge_device->vlan_enabled) + return 0; + + mlxsw_sp_bridge_port_vlan_del(mlxsw_sp_port, bridge_port, vlan->vid); + + return 0; +} + +static int mlxsw_sp_port_mdb_del(struct mlxsw_sp_port *mlxsw_sp_port, + const struct switchdev_obj_port_mdb *mdb) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + struct net_device *orig_dev = mdb->obj.orig_dev; + struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan; + struct mlxsw_sp_bridge_device *bridge_device; + struct net_device *dev = mlxsw_sp_port->dev; + struct mlxsw_sp_bridge_port *bridge_port; + struct mlxsw_sp_mdb_entry_key key = {}; + struct mlxsw_sp_mdb_entry *mdb_entry; + u16 fid_index; + + bridge_port = mlxsw_sp_bridge_port_find(mlxsw_sp->bridge, orig_dev); + if (!bridge_port) + return 0; + + bridge_device = bridge_port->bridge_device; + mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_bridge(mlxsw_sp_port, + bridge_device, + mdb->vid); + if (!mlxsw_sp_port_vlan) + return 0; + + fid_index = mlxsw_sp_fid_index(mlxsw_sp_port_vlan->fid); + + ether_addr_copy(key.addr, mdb->addr); + key.fid = fid_index; + mdb_entry = rhashtable_lookup_fast(&bridge_device->mdb_ht, &key, + mlxsw_sp_mdb_ht_params); + if (!mdb_entry) { + netdev_err(dev, "Unable to remove port from MC DB\n"); + return -EINVAL; + } + + mlxsw_sp_mc_mdb_entry_put(mlxsw_sp, bridge_device, mdb_entry, + mlxsw_sp_port->local_port, false); + return 0; +} + +static void +mlxsw_sp_bridge_port_mdb_flush(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_bridge_port *bridge_port, + u16 fid_index) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + struct mlxsw_sp_bridge_device *bridge_device; + struct mlxsw_sp_mdb_entry *mdb_entry, *tmp; + u16 local_port = mlxsw_sp_port->local_port; + + bridge_device = bridge_port->bridge_device; + + list_for_each_entry_safe(mdb_entry, tmp, &bridge_device->mdb_list, + list) { + if (mdb_entry->key.fid != fid_index) + continue; + + if (bridge_port->mrouter) + mlxsw_sp_mdb_entry_mrouter_port_put(mlxsw_sp, + mdb_entry, + local_port); + + mlxsw_sp_mc_mdb_entry_put(mlxsw_sp, bridge_device, mdb_entry, + local_port, true); + } +} + +static int mlxsw_sp_port_obj_del(struct net_device *dev, const void *ctx, + const struct switchdev_obj *obj) +{ + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); + int err = 0; + + switch (obj->id) { + case SWITCHDEV_OBJ_ID_PORT_VLAN: + err = mlxsw_sp_port_vlans_del(mlxsw_sp_port, + SWITCHDEV_OBJ_PORT_VLAN(obj)); + break; + case SWITCHDEV_OBJ_ID_PORT_MDB: + err = mlxsw_sp_port_mdb_del(mlxsw_sp_port, + SWITCHDEV_OBJ_PORT_MDB(obj)); + break; + default: + err = -EOPNOTSUPP; + break; + } + + mlxsw_sp_span_respin(mlxsw_sp_port->mlxsw_sp); + + return err; +} + +static struct mlxsw_sp_port *mlxsw_sp_lag_rep_port(struct mlxsw_sp *mlxsw_sp, + u16 lag_id) +{ + struct mlxsw_sp_port *mlxsw_sp_port; + u64 max_lag_members; + int i; + + max_lag_members = MLXSW_CORE_RES_GET(mlxsw_sp->core, + MAX_LAG_MEMBERS); + for (i = 0; i < max_lag_members; i++) { + mlxsw_sp_port = mlxsw_sp_port_lagged_get(mlxsw_sp, lag_id, i); + if (mlxsw_sp_port) + return mlxsw_sp_port; + } + return NULL; +} + +static int +mlxsw_sp_bridge_vlan_aware_port_join(struct mlxsw_sp_bridge_port *bridge_port, + struct mlxsw_sp_port *mlxsw_sp_port, + struct netlink_ext_ack *extack) +{ + if (is_vlan_dev(bridge_port->dev)) { + NL_SET_ERR_MSG_MOD(extack, "Can not enslave a VLAN device to a VLAN-aware bridge"); + return -EINVAL; + } + + /* Port is no longer usable as a router interface */ + if (mlxsw_sp_port->default_vlan->fid) + mlxsw_sp_port_vlan_router_leave(mlxsw_sp_port->default_vlan); + + return 0; +} + +static int +mlxsw_sp_bridge_8021q_port_join(struct mlxsw_sp_bridge_device *bridge_device, + struct mlxsw_sp_bridge_port *bridge_port, + struct mlxsw_sp_port *mlxsw_sp_port, + struct netlink_ext_ack *extack) +{ + return mlxsw_sp_bridge_vlan_aware_port_join(bridge_port, mlxsw_sp_port, + extack); +} + +static void +mlxsw_sp_bridge_vlan_aware_port_leave(struct mlxsw_sp_port *mlxsw_sp_port) +{ + /* Make sure untagged frames are allowed to ingress */ + mlxsw_sp_port_pvid_set(mlxsw_sp_port, MLXSW_SP_DEFAULT_VID, + ETH_P_8021Q); +} + +static void +mlxsw_sp_bridge_8021q_port_leave(struct mlxsw_sp_bridge_device *bridge_device, + struct mlxsw_sp_bridge_port *bridge_port, + struct mlxsw_sp_port *mlxsw_sp_port) +{ + mlxsw_sp_bridge_vlan_aware_port_leave(mlxsw_sp_port); +} + +static int +mlxsw_sp_bridge_vlan_aware_vxlan_join(struct mlxsw_sp_bridge_device *bridge_device, + const struct net_device *vxlan_dev, + u16 vid, u16 ethertype, + struct netlink_ext_ack *extack) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(bridge_device->dev); + struct vxlan_dev *vxlan = netdev_priv(vxlan_dev); + struct mlxsw_sp_nve_params params = { + .type = MLXSW_SP_NVE_TYPE_VXLAN, + .vni = vxlan->cfg.vni, + .dev = vxlan_dev, + .ethertype = ethertype, + }; + struct mlxsw_sp_fid *fid; + int err; + + /* If the VLAN is 0, we need to find the VLAN that is configured as + * PVID and egress untagged on the bridge port of the VxLAN device. + * It is possible no such VLAN exists + */ + if (!vid) { + err = mlxsw_sp_vxlan_mapped_vid(vxlan_dev, &vid); + if (err || !vid) + return err; + } + + fid = mlxsw_sp_fid_8021q_get(mlxsw_sp, vid); + if (IS_ERR(fid)) { + NL_SET_ERR_MSG_MOD(extack, "Failed to create 802.1Q FID"); + return PTR_ERR(fid); + } + + if (mlxsw_sp_fid_vni_is_set(fid)) { + NL_SET_ERR_MSG_MOD(extack, "VNI is already set on FID"); + err = -EINVAL; + goto err_vni_exists; + } + + err = mlxsw_sp_nve_fid_enable(mlxsw_sp, fid, ¶ms, extack); + if (err) + goto err_nve_fid_enable; + + return 0; + +err_nve_fid_enable: +err_vni_exists: + mlxsw_sp_fid_put(fid); + return err; +} + +static int +mlxsw_sp_bridge_8021q_vxlan_join(struct mlxsw_sp_bridge_device *bridge_device, + const struct net_device *vxlan_dev, u16 vid, + struct netlink_ext_ack *extack) +{ + return mlxsw_sp_bridge_vlan_aware_vxlan_join(bridge_device, vxlan_dev, + vid, ETH_P_8021Q, extack); +} + +static struct net_device * +mlxsw_sp_bridge_8021q_vxlan_dev_find(struct net_device *br_dev, u16 vid) +{ + struct net_device *dev; + struct list_head *iter; + + netdev_for_each_lower_dev(br_dev, dev, iter) { + u16 pvid; + int err; + + if (!netif_is_vxlan(dev)) + continue; + + err = mlxsw_sp_vxlan_mapped_vid(dev, &pvid); + if (err || pvid != vid) + continue; + + return dev; + } + + return NULL; +} + +static struct mlxsw_sp_fid * +mlxsw_sp_bridge_8021q_fid_get(struct mlxsw_sp_bridge_device *bridge_device, + u16 vid, struct netlink_ext_ack *extack) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(bridge_device->dev); + + return mlxsw_sp_fid_8021q_get(mlxsw_sp, vid); +} + +static struct mlxsw_sp_fid * +mlxsw_sp_bridge_8021q_fid_lookup(struct mlxsw_sp_bridge_device *bridge_device, + u16 vid) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(bridge_device->dev); + + return mlxsw_sp_fid_8021q_lookup(mlxsw_sp, vid); +} + +static u16 +mlxsw_sp_bridge_8021q_fid_vid(struct mlxsw_sp_bridge_device *bridge_device, + const struct mlxsw_sp_fid *fid) +{ + return mlxsw_sp_fid_8021q_vid(fid); +} + +static const struct mlxsw_sp_bridge_ops mlxsw_sp_bridge_8021q_ops = { + .port_join = mlxsw_sp_bridge_8021q_port_join, + .port_leave = mlxsw_sp_bridge_8021q_port_leave, + .vxlan_join = mlxsw_sp_bridge_8021q_vxlan_join, + .fid_get = mlxsw_sp_bridge_8021q_fid_get, + .fid_lookup = mlxsw_sp_bridge_8021q_fid_lookup, + .fid_vid = mlxsw_sp_bridge_8021q_fid_vid, +}; + +static bool +mlxsw_sp_port_is_br_member(const struct mlxsw_sp_port *mlxsw_sp_port, + const struct net_device *br_dev) +{ + struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan; + + list_for_each_entry(mlxsw_sp_port_vlan, &mlxsw_sp_port->vlans_list, + list) { + if (mlxsw_sp_port_vlan->bridge_port && + mlxsw_sp_port_vlan->bridge_port->bridge_device->dev == + br_dev) + return true; + } + + return false; +} + +static int +mlxsw_sp_bridge_8021d_port_join(struct mlxsw_sp_bridge_device *bridge_device, + struct mlxsw_sp_bridge_port *bridge_port, + struct mlxsw_sp_port *mlxsw_sp_port, + struct netlink_ext_ack *extack) +{ + struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan; + struct net_device *dev = bridge_port->dev; + u16 vid; + + vid = is_vlan_dev(dev) ? vlan_dev_vlan_id(dev) : MLXSW_SP_DEFAULT_VID; + mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_vid(mlxsw_sp_port, vid); + if (WARN_ON(!mlxsw_sp_port_vlan)) + return -EINVAL; + + if (mlxsw_sp_port_is_br_member(mlxsw_sp_port, bridge_device->dev)) { + NL_SET_ERR_MSG_MOD(extack, "Can not bridge VLAN uppers of the same port"); + return -EINVAL; + } + + /* Port is no longer usable as a router interface */ + if (mlxsw_sp_port_vlan->fid) + mlxsw_sp_port_vlan_router_leave(mlxsw_sp_port_vlan); + + return mlxsw_sp_port_vlan_bridge_join(mlxsw_sp_port_vlan, bridge_port, + extack); +} + +static void +mlxsw_sp_bridge_8021d_port_leave(struct mlxsw_sp_bridge_device *bridge_device, + struct mlxsw_sp_bridge_port *bridge_port, + struct mlxsw_sp_port *mlxsw_sp_port) +{ + struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan; + struct net_device *dev = bridge_port->dev; + u16 vid; + + vid = is_vlan_dev(dev) ? vlan_dev_vlan_id(dev) : MLXSW_SP_DEFAULT_VID; + mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_vid(mlxsw_sp_port, vid); + if (!mlxsw_sp_port_vlan || !mlxsw_sp_port_vlan->bridge_port) + return; + + mlxsw_sp_port_vlan_bridge_leave(mlxsw_sp_port_vlan); +} + +static int +mlxsw_sp_bridge_8021d_vxlan_join(struct mlxsw_sp_bridge_device *bridge_device, + const struct net_device *vxlan_dev, u16 vid, + struct netlink_ext_ack *extack) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(bridge_device->dev); + struct vxlan_dev *vxlan = netdev_priv(vxlan_dev); + struct mlxsw_sp_nve_params params = { + .type = MLXSW_SP_NVE_TYPE_VXLAN, + .vni = vxlan->cfg.vni, + .dev = vxlan_dev, + .ethertype = ETH_P_8021Q, + }; + struct mlxsw_sp_fid *fid; + int err; + + fid = mlxsw_sp_fid_8021d_get(mlxsw_sp, bridge_device->dev->ifindex); + if (IS_ERR(fid)) { + NL_SET_ERR_MSG_MOD(extack, "Failed to create 802.1D FID"); + return -EINVAL; + } + + if (mlxsw_sp_fid_vni_is_set(fid)) { + NL_SET_ERR_MSG_MOD(extack, "VNI is already set on FID"); + err = -EINVAL; + goto err_vni_exists; + } + + err = mlxsw_sp_nve_fid_enable(mlxsw_sp, fid, ¶ms, extack); + if (err) + goto err_nve_fid_enable; + + return 0; + +err_nve_fid_enable: +err_vni_exists: + mlxsw_sp_fid_put(fid); + return err; +} + +static struct mlxsw_sp_fid * +mlxsw_sp_bridge_8021d_fid_get(struct mlxsw_sp_bridge_device *bridge_device, + u16 vid, struct netlink_ext_ack *extack) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(bridge_device->dev); + + return mlxsw_sp_fid_8021d_get(mlxsw_sp, bridge_device->dev->ifindex); +} + +static struct mlxsw_sp_fid * +mlxsw_sp_bridge_8021d_fid_lookup(struct mlxsw_sp_bridge_device *bridge_device, + u16 vid) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(bridge_device->dev); + + /* The only valid VLAN for a VLAN-unaware bridge is 0 */ + if (vid) + return NULL; + + return mlxsw_sp_fid_8021d_lookup(mlxsw_sp, bridge_device->dev->ifindex); +} + +static u16 +mlxsw_sp_bridge_8021d_fid_vid(struct mlxsw_sp_bridge_device *bridge_device, + const struct mlxsw_sp_fid *fid) +{ + return 0; +} + +static const struct mlxsw_sp_bridge_ops mlxsw_sp_bridge_8021d_ops = { + .port_join = mlxsw_sp_bridge_8021d_port_join, + .port_leave = mlxsw_sp_bridge_8021d_port_leave, + .vxlan_join = mlxsw_sp_bridge_8021d_vxlan_join, + .fid_get = mlxsw_sp_bridge_8021d_fid_get, + .fid_lookup = mlxsw_sp_bridge_8021d_fid_lookup, + .fid_vid = mlxsw_sp_bridge_8021d_fid_vid, +}; + +static int +mlxsw_sp_bridge_8021ad_port_join(struct mlxsw_sp_bridge_device *bridge_device, + struct mlxsw_sp_bridge_port *bridge_port, + struct mlxsw_sp_port *mlxsw_sp_port, + struct netlink_ext_ack *extack) +{ + int err; + + err = mlxsw_sp_port_vlan_classification_set(mlxsw_sp_port, true, false); + if (err) + return err; + + err = mlxsw_sp_bridge_vlan_aware_port_join(bridge_port, mlxsw_sp_port, + extack); + if (err) + goto err_bridge_vlan_aware_port_join; + + return 0; + +err_bridge_vlan_aware_port_join: + mlxsw_sp_port_vlan_classification_set(mlxsw_sp_port, false, true); + return err; +} + +static void +mlxsw_sp_bridge_8021ad_port_leave(struct mlxsw_sp_bridge_device *bridge_device, + struct mlxsw_sp_bridge_port *bridge_port, + struct mlxsw_sp_port *mlxsw_sp_port) +{ + mlxsw_sp_bridge_vlan_aware_port_leave(mlxsw_sp_port); + mlxsw_sp_port_vlan_classification_set(mlxsw_sp_port, false, true); +} + +static int +mlxsw_sp_bridge_8021ad_vxlan_join(struct mlxsw_sp_bridge_device *bridge_device, + const struct net_device *vxlan_dev, u16 vid, + struct netlink_ext_ack *extack) +{ + return mlxsw_sp_bridge_vlan_aware_vxlan_join(bridge_device, vxlan_dev, + vid, ETH_P_8021AD, extack); +} + +static const struct mlxsw_sp_bridge_ops mlxsw_sp1_bridge_8021ad_ops = { + .port_join = mlxsw_sp_bridge_8021ad_port_join, + .port_leave = mlxsw_sp_bridge_8021ad_port_leave, + .vxlan_join = mlxsw_sp_bridge_8021ad_vxlan_join, + .fid_get = mlxsw_sp_bridge_8021q_fid_get, + .fid_lookup = mlxsw_sp_bridge_8021q_fid_lookup, + .fid_vid = mlxsw_sp_bridge_8021q_fid_vid, +}; + +static int +mlxsw_sp2_bridge_8021ad_port_join(struct mlxsw_sp_bridge_device *bridge_device, + struct mlxsw_sp_bridge_port *bridge_port, + struct mlxsw_sp_port *mlxsw_sp_port, + struct netlink_ext_ack *extack) +{ + int err; + + /* The EtherType of decapsulated packets is determined at the egress + * port to allow 802.1d and 802.1ad bridges with VXLAN devices to + * co-exist. + */ + err = mlxsw_sp_port_egress_ethtype_set(mlxsw_sp_port, ETH_P_8021AD); + if (err) + return err; + + err = mlxsw_sp_bridge_8021ad_port_join(bridge_device, bridge_port, + mlxsw_sp_port, extack); + if (err) + goto err_bridge_8021ad_port_join; + + return 0; + +err_bridge_8021ad_port_join: + mlxsw_sp_port_egress_ethtype_set(mlxsw_sp_port, ETH_P_8021Q); + return err; +} + +static void +mlxsw_sp2_bridge_8021ad_port_leave(struct mlxsw_sp_bridge_device *bridge_device, + struct mlxsw_sp_bridge_port *bridge_port, + struct mlxsw_sp_port *mlxsw_sp_port) +{ + mlxsw_sp_bridge_8021ad_port_leave(bridge_device, bridge_port, + mlxsw_sp_port); + mlxsw_sp_port_egress_ethtype_set(mlxsw_sp_port, ETH_P_8021Q); +} + +static const struct mlxsw_sp_bridge_ops mlxsw_sp2_bridge_8021ad_ops = { + .port_join = mlxsw_sp2_bridge_8021ad_port_join, + .port_leave = mlxsw_sp2_bridge_8021ad_port_leave, + .vxlan_join = mlxsw_sp_bridge_8021ad_vxlan_join, + .fid_get = mlxsw_sp_bridge_8021q_fid_get, + .fid_lookup = mlxsw_sp_bridge_8021q_fid_lookup, + .fid_vid = mlxsw_sp_bridge_8021q_fid_vid, +}; + +int mlxsw_sp_port_bridge_join(struct mlxsw_sp_port *mlxsw_sp_port, + struct net_device *brport_dev, + struct net_device *br_dev, + struct netlink_ext_ack *extack) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + struct mlxsw_sp_bridge_device *bridge_device; + struct mlxsw_sp_bridge_port *bridge_port; + int err; + + bridge_port = mlxsw_sp_bridge_port_get(mlxsw_sp->bridge, brport_dev, + extack); + if (IS_ERR(bridge_port)) + return PTR_ERR(bridge_port); + bridge_device = bridge_port->bridge_device; + + err = bridge_device->ops->port_join(bridge_device, bridge_port, + mlxsw_sp_port, extack); + if (err) + goto err_port_join; + + return 0; + +err_port_join: + mlxsw_sp_bridge_port_put(mlxsw_sp->bridge, bridge_port); + return err; +} + +void mlxsw_sp_port_bridge_leave(struct mlxsw_sp_port *mlxsw_sp_port, + struct net_device *brport_dev, + struct net_device *br_dev) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + struct mlxsw_sp_bridge_device *bridge_device; + struct mlxsw_sp_bridge_port *bridge_port; + + bridge_device = mlxsw_sp_bridge_device_find(mlxsw_sp->bridge, br_dev); + if (!bridge_device) + return; + bridge_port = __mlxsw_sp_bridge_port_find(bridge_device, brport_dev); + if (!bridge_port) + return; + + bridge_device->ops->port_leave(bridge_device, bridge_port, + mlxsw_sp_port); + mlxsw_sp_bridge_port_put(mlxsw_sp->bridge, bridge_port); +} + +int mlxsw_sp_bridge_vxlan_join(struct mlxsw_sp *mlxsw_sp, + const struct net_device *br_dev, + const struct net_device *vxlan_dev, u16 vid, + struct netlink_ext_ack *extack) +{ + struct mlxsw_sp_bridge_device *bridge_device; + + bridge_device = mlxsw_sp_bridge_device_find(mlxsw_sp->bridge, br_dev); + if (WARN_ON(!bridge_device)) + return -EINVAL; + + return bridge_device->ops->vxlan_join(bridge_device, vxlan_dev, vid, + extack); +} + +void mlxsw_sp_bridge_vxlan_leave(struct mlxsw_sp *mlxsw_sp, + const struct net_device *vxlan_dev) +{ + struct vxlan_dev *vxlan = netdev_priv(vxlan_dev); + struct mlxsw_sp_fid *fid; + + /* If the VxLAN device is down, then the FID does not have a VNI */ + fid = mlxsw_sp_fid_lookup_by_vni(mlxsw_sp, vxlan->cfg.vni); + if (!fid) + return; + + mlxsw_sp_nve_fid_disable(mlxsw_sp, fid); + /* Drop both the reference we just took during lookup and the reference + * the VXLAN device took. + */ + mlxsw_sp_fid_put(fid); + mlxsw_sp_fid_put(fid); +} + +static void +mlxsw_sp_switchdev_vxlan_addr_convert(const union vxlan_addr *vxlan_addr, + enum mlxsw_sp_l3proto *proto, + union mlxsw_sp_l3addr *addr) +{ + if (vxlan_addr->sa.sa_family == AF_INET) { + addr->addr4 = vxlan_addr->sin.sin_addr.s_addr; + *proto = MLXSW_SP_L3_PROTO_IPV4; + } else { + addr->addr6 = vxlan_addr->sin6.sin6_addr; + *proto = MLXSW_SP_L3_PROTO_IPV6; + } +} + +static void +mlxsw_sp_switchdev_addr_vxlan_convert(enum mlxsw_sp_l3proto proto, + const union mlxsw_sp_l3addr *addr, + union vxlan_addr *vxlan_addr) +{ + switch (proto) { + case MLXSW_SP_L3_PROTO_IPV4: + vxlan_addr->sa.sa_family = AF_INET; + vxlan_addr->sin.sin_addr.s_addr = addr->addr4; + break; + case MLXSW_SP_L3_PROTO_IPV6: + vxlan_addr->sa.sa_family = AF_INET6; + vxlan_addr->sin6.sin6_addr = addr->addr6; + break; + } +} + +static void mlxsw_sp_fdb_vxlan_call_notifiers(struct net_device *dev, + const char *mac, + enum mlxsw_sp_l3proto proto, + union mlxsw_sp_l3addr *addr, + __be32 vni, bool adding) +{ + struct switchdev_notifier_vxlan_fdb_info info; + struct vxlan_dev *vxlan = netdev_priv(dev); + enum switchdev_notifier_type type; + + type = adding ? SWITCHDEV_VXLAN_FDB_ADD_TO_BRIDGE : + SWITCHDEV_VXLAN_FDB_DEL_TO_BRIDGE; + mlxsw_sp_switchdev_addr_vxlan_convert(proto, addr, &info.remote_ip); + info.remote_port = vxlan->cfg.dst_port; + info.remote_vni = vni; + info.remote_ifindex = 0; + ether_addr_copy(info.eth_addr, mac); + info.vni = vni; + info.offloaded = adding; + call_switchdev_notifiers(type, dev, &info.info, NULL); +} + +static void mlxsw_sp_fdb_nve_call_notifiers(struct net_device *dev, + const char *mac, + enum mlxsw_sp_l3proto proto, + union mlxsw_sp_l3addr *addr, + __be32 vni, + bool adding) +{ + if (netif_is_vxlan(dev)) + mlxsw_sp_fdb_vxlan_call_notifiers(dev, mac, proto, addr, vni, + adding); +} + +static void +mlxsw_sp_fdb_call_notifiers(enum switchdev_notifier_type type, + const char *mac, u16 vid, + struct net_device *dev, bool offloaded) +{ + struct switchdev_notifier_fdb_info info = {}; + + info.addr = mac; + info.vid = vid; + info.offloaded = offloaded; + call_switchdev_notifiers(type, dev, &info.info, NULL); +} + +static void mlxsw_sp_fdb_notify_mac_process(struct mlxsw_sp *mlxsw_sp, + char *sfn_pl, int rec_index, + bool adding) +{ + struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan; + struct mlxsw_sp_bridge_device *bridge_device; + struct mlxsw_sp_bridge_port *bridge_port; + struct mlxsw_sp_port *mlxsw_sp_port; + u16 local_port, vid, fid, evid = 0; + enum switchdev_notifier_type type; + char mac[ETH_ALEN]; + bool do_notification = true; + int err; + + mlxsw_reg_sfn_mac_unpack(sfn_pl, rec_index, mac, &fid, &local_port); + + if (WARN_ON_ONCE(!mlxsw_sp_local_port_is_valid(mlxsw_sp, local_port))) + return; + mlxsw_sp_port = mlxsw_sp->ports[local_port]; + if (!mlxsw_sp_port) { + dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Incorrect local port in FDB notification\n"); + goto just_remove; + } + + if (mlxsw_sp_fid_is_dummy(mlxsw_sp, fid)) + goto just_remove; + + mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_fid(mlxsw_sp_port, fid); + if (!mlxsw_sp_port_vlan) { + netdev_err(mlxsw_sp_port->dev, "Failed to find a matching {Port, VID} following FDB notification\n"); + goto just_remove; + } + + bridge_port = mlxsw_sp_port_vlan->bridge_port; + if (!bridge_port) { + netdev_err(mlxsw_sp_port->dev, "{Port, VID} not associated with a bridge\n"); + goto just_remove; + } + + bridge_device = bridge_port->bridge_device; + vid = bridge_device->vlan_enabled ? mlxsw_sp_port_vlan->vid : 0; + evid = mlxsw_sp_port_vlan->vid; + +do_fdb_op: + err = mlxsw_sp_port_fdb_uc_op(mlxsw_sp, local_port, mac, fid, evid, + adding, true); + if (err) { + dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Failed to set FDB entry\n"); + return; + } + + if (!do_notification) + return; + type = adding ? SWITCHDEV_FDB_ADD_TO_BRIDGE : SWITCHDEV_FDB_DEL_TO_BRIDGE; + mlxsw_sp_fdb_call_notifiers(type, mac, vid, bridge_port->dev, adding); + + return; + +just_remove: + adding = false; + do_notification = false; + goto do_fdb_op; +} + +static void mlxsw_sp_fdb_notify_mac_lag_process(struct mlxsw_sp *mlxsw_sp, + char *sfn_pl, int rec_index, + bool adding) +{ + struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan; + struct mlxsw_sp_bridge_device *bridge_device; + struct mlxsw_sp_bridge_port *bridge_port; + struct mlxsw_sp_port *mlxsw_sp_port; + enum switchdev_notifier_type type; + char mac[ETH_ALEN]; + u16 lag_vid = 0; + u16 lag_id; + u16 vid, fid; + bool do_notification = true; + int err; + + mlxsw_reg_sfn_mac_lag_unpack(sfn_pl, rec_index, mac, &fid, &lag_id); + mlxsw_sp_port = mlxsw_sp_lag_rep_port(mlxsw_sp, lag_id); + if (!mlxsw_sp_port) { + dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Cannot find port representor for LAG\n"); + goto just_remove; + } + + if (mlxsw_sp_fid_is_dummy(mlxsw_sp, fid)) + goto just_remove; + + mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_fid(mlxsw_sp_port, fid); + if (!mlxsw_sp_port_vlan) { + netdev_err(mlxsw_sp_port->dev, "Failed to find a matching {Port, VID} following FDB notification\n"); + goto just_remove; + } + + bridge_port = mlxsw_sp_port_vlan->bridge_port; + if (!bridge_port) { + netdev_err(mlxsw_sp_port->dev, "{Port, VID} not associated with a bridge\n"); + goto just_remove; + } + + bridge_device = bridge_port->bridge_device; + vid = bridge_device->vlan_enabled ? mlxsw_sp_port_vlan->vid : 0; + lag_vid = mlxsw_sp_port_vlan->vid; + +do_fdb_op: + err = mlxsw_sp_port_fdb_uc_lag_op(mlxsw_sp, lag_id, mac, fid, lag_vid, + adding, true); + if (err) { + dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Failed to set FDB entry\n"); + return; + } + + if (!do_notification) + return; + type = adding ? SWITCHDEV_FDB_ADD_TO_BRIDGE : SWITCHDEV_FDB_DEL_TO_BRIDGE; + mlxsw_sp_fdb_call_notifiers(type, mac, vid, bridge_port->dev, adding); + + return; + +just_remove: + adding = false; + do_notification = false; + goto do_fdb_op; +} + +static int +__mlxsw_sp_fdb_notify_mac_uc_tunnel_process(struct mlxsw_sp *mlxsw_sp, + const struct mlxsw_sp_fid *fid, + bool adding, + struct net_device **nve_dev, + u16 *p_vid, __be32 *p_vni) +{ + struct mlxsw_sp_bridge_device *bridge_device; + struct net_device *br_dev, *dev; + int nve_ifindex; + int err; + + err = mlxsw_sp_fid_nve_ifindex(fid, &nve_ifindex); + if (err) + return err; + + err = mlxsw_sp_fid_vni(fid, p_vni); + if (err) + return err; + + dev = __dev_get_by_index(mlxsw_sp_net(mlxsw_sp), nve_ifindex); + if (!dev) + return -EINVAL; + *nve_dev = dev; + + if (!netif_running(dev)) + return -EINVAL; + + if (adding && !br_port_flag_is_set(dev, BR_LEARNING)) + return -EINVAL; + + if (adding && netif_is_vxlan(dev)) { + struct vxlan_dev *vxlan = netdev_priv(dev); + + if (!(vxlan->cfg.flags & VXLAN_F_LEARN)) + return -EINVAL; + } + + br_dev = netdev_master_upper_dev_get(dev); + if (!br_dev) + return -EINVAL; + + bridge_device = mlxsw_sp_bridge_device_find(mlxsw_sp->bridge, br_dev); + if (!bridge_device) + return -EINVAL; + + *p_vid = bridge_device->ops->fid_vid(bridge_device, fid); + + return 0; +} + +static void mlxsw_sp_fdb_notify_mac_uc_tunnel_process(struct mlxsw_sp *mlxsw_sp, + char *sfn_pl, + int rec_index, + bool adding) +{ + enum mlxsw_reg_sfn_uc_tunnel_protocol sfn_proto; + enum switchdev_notifier_type type; + struct net_device *nve_dev; + union mlxsw_sp_l3addr addr; + struct mlxsw_sp_fid *fid; + char mac[ETH_ALEN]; + u16 fid_index, vid; + __be32 vni; + u32 uip; + int err; + + mlxsw_reg_sfn_uc_tunnel_unpack(sfn_pl, rec_index, mac, &fid_index, + &uip, &sfn_proto); + + fid = mlxsw_sp_fid_lookup_by_index(mlxsw_sp, fid_index); + if (!fid) + goto err_fid_lookup; + + err = mlxsw_sp_nve_learned_ip_resolve(mlxsw_sp, uip, + (enum mlxsw_sp_l3proto) sfn_proto, + &addr); + if (err) + goto err_ip_resolve; + + err = __mlxsw_sp_fdb_notify_mac_uc_tunnel_process(mlxsw_sp, fid, adding, + &nve_dev, &vid, &vni); + if (err) + goto err_fdb_process; + + err = mlxsw_sp_port_fdb_tunnel_uc_op(mlxsw_sp, mac, fid_index, + (enum mlxsw_sp_l3proto) sfn_proto, + &addr, adding, true); + if (err) + goto err_fdb_op; + + mlxsw_sp_fdb_nve_call_notifiers(nve_dev, mac, + (enum mlxsw_sp_l3proto) sfn_proto, + &addr, vni, adding); + + type = adding ? SWITCHDEV_FDB_ADD_TO_BRIDGE : + SWITCHDEV_FDB_DEL_TO_BRIDGE; + mlxsw_sp_fdb_call_notifiers(type, mac, vid, nve_dev, adding); + + mlxsw_sp_fid_put(fid); + + return; + +err_fdb_op: +err_fdb_process: +err_ip_resolve: + mlxsw_sp_fid_put(fid); +err_fid_lookup: + /* Remove an FDB entry in case we cannot process it. Otherwise the + * device will keep sending the same notification over and over again. + */ + mlxsw_sp_port_fdb_tunnel_uc_op(mlxsw_sp, mac, fid_index, + (enum mlxsw_sp_l3proto) sfn_proto, &addr, + false, true); +} + +static void mlxsw_sp_fdb_notify_rec_process(struct mlxsw_sp *mlxsw_sp, + char *sfn_pl, int rec_index) +{ + switch (mlxsw_reg_sfn_rec_type_get(sfn_pl, rec_index)) { + case MLXSW_REG_SFN_REC_TYPE_LEARNED_MAC: + mlxsw_sp_fdb_notify_mac_process(mlxsw_sp, sfn_pl, + rec_index, true); + break; + case MLXSW_REG_SFN_REC_TYPE_AGED_OUT_MAC: + mlxsw_sp_fdb_notify_mac_process(mlxsw_sp, sfn_pl, + rec_index, false); + break; + case MLXSW_REG_SFN_REC_TYPE_LEARNED_MAC_LAG: + mlxsw_sp_fdb_notify_mac_lag_process(mlxsw_sp, sfn_pl, + rec_index, true); + break; + case MLXSW_REG_SFN_REC_TYPE_AGED_OUT_MAC_LAG: + mlxsw_sp_fdb_notify_mac_lag_process(mlxsw_sp, sfn_pl, + rec_index, false); + break; + case MLXSW_REG_SFN_REC_TYPE_LEARNED_UNICAST_TUNNEL: + mlxsw_sp_fdb_notify_mac_uc_tunnel_process(mlxsw_sp, sfn_pl, + rec_index, true); + break; + case MLXSW_REG_SFN_REC_TYPE_AGED_OUT_UNICAST_TUNNEL: + mlxsw_sp_fdb_notify_mac_uc_tunnel_process(mlxsw_sp, sfn_pl, + rec_index, false); + break; + } +} + +#define MLXSW_SP_FDB_SFN_QUERIES_PER_SESSION 10 + +static void mlxsw_sp_fdb_notify_work(struct work_struct *work) +{ + struct mlxsw_sp_bridge *bridge; + struct mlxsw_sp *mlxsw_sp; + bool reschedule = false; + char *sfn_pl; + int queries; + u8 num_rec; + int i; + int err; + + sfn_pl = kmalloc(MLXSW_REG_SFN_LEN, GFP_KERNEL); + if (!sfn_pl) + return; + + bridge = container_of(work, struct mlxsw_sp_bridge, fdb_notify.dw.work); + mlxsw_sp = bridge->mlxsw_sp; + + rtnl_lock(); + if (list_empty(&bridge->bridges_list)) + goto out; + reschedule = true; + queries = MLXSW_SP_FDB_SFN_QUERIES_PER_SESSION; + while (queries > 0) { + mlxsw_reg_sfn_pack(sfn_pl); + err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(sfn), sfn_pl); + if (err) { + dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Failed to get FDB notifications\n"); + goto out; + } + num_rec = mlxsw_reg_sfn_num_rec_get(sfn_pl); + for (i = 0; i < num_rec; i++) + mlxsw_sp_fdb_notify_rec_process(mlxsw_sp, sfn_pl, i); + if (num_rec != MLXSW_REG_SFN_REC_MAX_COUNT) + goto out; + queries--; + } + +out: + rtnl_unlock(); + kfree(sfn_pl); + if (!reschedule) + return; + mlxsw_sp_fdb_notify_work_schedule(mlxsw_sp, !queries); +} + +struct mlxsw_sp_switchdev_event_work { + struct work_struct work; + union { + struct switchdev_notifier_fdb_info fdb_info; + struct switchdev_notifier_vxlan_fdb_info vxlan_fdb_info; + }; + struct net_device *dev; + unsigned long event; +}; + +static void +mlxsw_sp_switchdev_bridge_vxlan_fdb_event(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_switchdev_event_work * + switchdev_work, + struct mlxsw_sp_fid *fid, __be32 vni) +{ + struct switchdev_notifier_vxlan_fdb_info vxlan_fdb_info; + struct switchdev_notifier_fdb_info *fdb_info; + struct net_device *dev = switchdev_work->dev; + enum mlxsw_sp_l3proto proto; + union mlxsw_sp_l3addr addr; + int err; + + fdb_info = &switchdev_work->fdb_info; + err = vxlan_fdb_find_uc(dev, fdb_info->addr, vni, &vxlan_fdb_info); + if (err) + return; + + mlxsw_sp_switchdev_vxlan_addr_convert(&vxlan_fdb_info.remote_ip, + &proto, &addr); + + switch (switchdev_work->event) { + case SWITCHDEV_FDB_ADD_TO_DEVICE: + err = mlxsw_sp_port_fdb_tunnel_uc_op(mlxsw_sp, + vxlan_fdb_info.eth_addr, + mlxsw_sp_fid_index(fid), + proto, &addr, true, false); + if (err) + return; + vxlan_fdb_info.offloaded = true; + call_switchdev_notifiers(SWITCHDEV_VXLAN_FDB_OFFLOADED, dev, + &vxlan_fdb_info.info, NULL); + mlxsw_sp_fdb_call_notifiers(SWITCHDEV_FDB_OFFLOADED, + vxlan_fdb_info.eth_addr, + fdb_info->vid, dev, true); + break; + case SWITCHDEV_FDB_DEL_TO_DEVICE: + err = mlxsw_sp_port_fdb_tunnel_uc_op(mlxsw_sp, + vxlan_fdb_info.eth_addr, + mlxsw_sp_fid_index(fid), + proto, &addr, false, + false); + vxlan_fdb_info.offloaded = false; + call_switchdev_notifiers(SWITCHDEV_VXLAN_FDB_OFFLOADED, dev, + &vxlan_fdb_info.info, NULL); + break; + } +} + +static void +mlxsw_sp_switchdev_bridge_nve_fdb_event(struct mlxsw_sp_switchdev_event_work * + switchdev_work) +{ + struct mlxsw_sp_bridge_device *bridge_device; + struct net_device *dev = switchdev_work->dev; + struct net_device *br_dev; + struct mlxsw_sp *mlxsw_sp; + struct mlxsw_sp_fid *fid; + __be32 vni; + int err; + + if (switchdev_work->event != SWITCHDEV_FDB_ADD_TO_DEVICE && + switchdev_work->event != SWITCHDEV_FDB_DEL_TO_DEVICE) + return; + + if (switchdev_work->event == SWITCHDEV_FDB_ADD_TO_DEVICE && + (!switchdev_work->fdb_info.added_by_user || + switchdev_work->fdb_info.is_local)) + return; + + if (!netif_running(dev)) + return; + br_dev = netdev_master_upper_dev_get(dev); + if (!br_dev) + return; + if (!netif_is_bridge_master(br_dev)) + return; + mlxsw_sp = mlxsw_sp_lower_get(br_dev); + if (!mlxsw_sp) + return; + bridge_device = mlxsw_sp_bridge_device_find(mlxsw_sp->bridge, br_dev); + if (!bridge_device) + return; + + fid = bridge_device->ops->fid_lookup(bridge_device, + switchdev_work->fdb_info.vid); + if (!fid) + return; + + err = mlxsw_sp_fid_vni(fid, &vni); + if (err) + goto out; + + mlxsw_sp_switchdev_bridge_vxlan_fdb_event(mlxsw_sp, switchdev_work, fid, + vni); + +out: + mlxsw_sp_fid_put(fid); +} + +static void mlxsw_sp_switchdev_bridge_fdb_event_work(struct work_struct *work) +{ + struct mlxsw_sp_switchdev_event_work *switchdev_work = + container_of(work, struct mlxsw_sp_switchdev_event_work, work); + struct net_device *dev = switchdev_work->dev; + struct switchdev_notifier_fdb_info *fdb_info; + struct mlxsw_sp_port *mlxsw_sp_port; + int err; + + rtnl_lock(); + if (netif_is_vxlan(dev)) { + mlxsw_sp_switchdev_bridge_nve_fdb_event(switchdev_work); + goto out; + } + + mlxsw_sp_port = mlxsw_sp_port_dev_lower_find(dev); + if (!mlxsw_sp_port) + goto out; + + switch (switchdev_work->event) { + case SWITCHDEV_FDB_ADD_TO_DEVICE: + fdb_info = &switchdev_work->fdb_info; + if (!fdb_info->added_by_user || fdb_info->is_local) + break; + err = mlxsw_sp_port_fdb_set(mlxsw_sp_port, fdb_info, true); + if (err) + break; + mlxsw_sp_fdb_call_notifiers(SWITCHDEV_FDB_OFFLOADED, + fdb_info->addr, + fdb_info->vid, dev, true); + break; + case SWITCHDEV_FDB_DEL_TO_DEVICE: + fdb_info = &switchdev_work->fdb_info; + mlxsw_sp_port_fdb_set(mlxsw_sp_port, fdb_info, false); + break; + case SWITCHDEV_FDB_ADD_TO_BRIDGE: + case SWITCHDEV_FDB_DEL_TO_BRIDGE: + /* These events are only used to potentially update an existing + * SPAN mirror. + */ + break; + } + + mlxsw_sp_span_respin(mlxsw_sp_port->mlxsw_sp); + +out: + rtnl_unlock(); + kfree(switchdev_work->fdb_info.addr); + kfree(switchdev_work); + dev_put(dev); +} + +static void +mlxsw_sp_switchdev_vxlan_fdb_add(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_switchdev_event_work * + switchdev_work) +{ + struct switchdev_notifier_vxlan_fdb_info *vxlan_fdb_info; + struct mlxsw_sp_bridge_device *bridge_device; + struct net_device *dev = switchdev_work->dev; + u8 all_zeros_mac[ETH_ALEN] = { 0 }; + enum mlxsw_sp_l3proto proto; + union mlxsw_sp_l3addr addr; + struct net_device *br_dev; + struct mlxsw_sp_fid *fid; + u16 vid; + int err; + + vxlan_fdb_info = &switchdev_work->vxlan_fdb_info; + br_dev = netdev_master_upper_dev_get(dev); + + bridge_device = mlxsw_sp_bridge_device_find(mlxsw_sp->bridge, br_dev); + if (!bridge_device) + return; + + fid = mlxsw_sp_fid_lookup_by_vni(mlxsw_sp, vxlan_fdb_info->vni); + if (!fid) + return; + + mlxsw_sp_switchdev_vxlan_addr_convert(&vxlan_fdb_info->remote_ip, + &proto, &addr); + + if (ether_addr_equal(vxlan_fdb_info->eth_addr, all_zeros_mac)) { + err = mlxsw_sp_nve_flood_ip_add(mlxsw_sp, fid, proto, &addr); + if (err) { + mlxsw_sp_fid_put(fid); + return; + } + vxlan_fdb_info->offloaded = true; + call_switchdev_notifiers(SWITCHDEV_VXLAN_FDB_OFFLOADED, dev, + &vxlan_fdb_info->info, NULL); + mlxsw_sp_fid_put(fid); + return; + } + + /* The device has a single FDB table, whereas Linux has two - one + * in the bridge driver and another in the VxLAN driver. We only + * program an entry to the device if the MAC points to the VxLAN + * device in the bridge's FDB table + */ + vid = bridge_device->ops->fid_vid(bridge_device, fid); + if (br_fdb_find_port(br_dev, vxlan_fdb_info->eth_addr, vid) != dev) + goto err_br_fdb_find; + + err = mlxsw_sp_port_fdb_tunnel_uc_op(mlxsw_sp, vxlan_fdb_info->eth_addr, + mlxsw_sp_fid_index(fid), proto, + &addr, true, false); + if (err) + goto err_fdb_tunnel_uc_op; + vxlan_fdb_info->offloaded = true; + call_switchdev_notifiers(SWITCHDEV_VXLAN_FDB_OFFLOADED, dev, + &vxlan_fdb_info->info, NULL); + mlxsw_sp_fdb_call_notifiers(SWITCHDEV_FDB_OFFLOADED, + vxlan_fdb_info->eth_addr, vid, dev, true); + + mlxsw_sp_fid_put(fid); + + return; + +err_fdb_tunnel_uc_op: +err_br_fdb_find: + mlxsw_sp_fid_put(fid); +} + +static void +mlxsw_sp_switchdev_vxlan_fdb_del(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_switchdev_event_work * + switchdev_work) +{ + struct switchdev_notifier_vxlan_fdb_info *vxlan_fdb_info; + struct mlxsw_sp_bridge_device *bridge_device; + struct net_device *dev = switchdev_work->dev; + struct net_device *br_dev = netdev_master_upper_dev_get(dev); + u8 all_zeros_mac[ETH_ALEN] = { 0 }; + enum mlxsw_sp_l3proto proto; + union mlxsw_sp_l3addr addr; + struct mlxsw_sp_fid *fid; + u16 vid; + + vxlan_fdb_info = &switchdev_work->vxlan_fdb_info; + if (!vxlan_fdb_info->offloaded) + return; + + bridge_device = mlxsw_sp_bridge_device_find(mlxsw_sp->bridge, br_dev); + if (!bridge_device) + return; + + fid = mlxsw_sp_fid_lookup_by_vni(mlxsw_sp, vxlan_fdb_info->vni); + if (!fid) + return; + + mlxsw_sp_switchdev_vxlan_addr_convert(&vxlan_fdb_info->remote_ip, + &proto, &addr); + + if (ether_addr_equal(vxlan_fdb_info->eth_addr, all_zeros_mac)) { + mlxsw_sp_nve_flood_ip_del(mlxsw_sp, fid, proto, &addr); + mlxsw_sp_fid_put(fid); + return; + } + + mlxsw_sp_port_fdb_tunnel_uc_op(mlxsw_sp, vxlan_fdb_info->eth_addr, + mlxsw_sp_fid_index(fid), proto, &addr, + false, false); + vid = bridge_device->ops->fid_vid(bridge_device, fid); + mlxsw_sp_fdb_call_notifiers(SWITCHDEV_FDB_OFFLOADED, + vxlan_fdb_info->eth_addr, vid, dev, false); + + mlxsw_sp_fid_put(fid); +} + +static void mlxsw_sp_switchdev_vxlan_fdb_event_work(struct work_struct *work) +{ + struct mlxsw_sp_switchdev_event_work *switchdev_work = + container_of(work, struct mlxsw_sp_switchdev_event_work, work); + struct net_device *dev = switchdev_work->dev; + struct mlxsw_sp *mlxsw_sp; + struct net_device *br_dev; + + rtnl_lock(); + + if (!netif_running(dev)) + goto out; + br_dev = netdev_master_upper_dev_get(dev); + if (!br_dev) + goto out; + if (!netif_is_bridge_master(br_dev)) + goto out; + mlxsw_sp = mlxsw_sp_lower_get(br_dev); + if (!mlxsw_sp) + goto out; + + switch (switchdev_work->event) { + case SWITCHDEV_VXLAN_FDB_ADD_TO_DEVICE: + mlxsw_sp_switchdev_vxlan_fdb_add(mlxsw_sp, switchdev_work); + break; + case SWITCHDEV_VXLAN_FDB_DEL_TO_DEVICE: + mlxsw_sp_switchdev_vxlan_fdb_del(mlxsw_sp, switchdev_work); + break; + } + +out: + rtnl_unlock(); + kfree(switchdev_work); + dev_put(dev); +} + +static int +mlxsw_sp_switchdev_vxlan_work_prepare(struct mlxsw_sp_switchdev_event_work * + switchdev_work, + struct switchdev_notifier_info *info) +{ + struct vxlan_dev *vxlan = netdev_priv(switchdev_work->dev); + struct switchdev_notifier_vxlan_fdb_info *vxlan_fdb_info; + struct vxlan_config *cfg = &vxlan->cfg; + struct netlink_ext_ack *extack; + + extack = switchdev_notifier_info_to_extack(info); + vxlan_fdb_info = container_of(info, + struct switchdev_notifier_vxlan_fdb_info, + info); + + if (vxlan_fdb_info->remote_port != cfg->dst_port) { + NL_SET_ERR_MSG_MOD(extack, "VxLAN: FDB: Non-default remote port is not supported"); + return -EOPNOTSUPP; + } + if (vxlan_fdb_info->remote_vni != cfg->vni || + vxlan_fdb_info->vni != cfg->vni) { + NL_SET_ERR_MSG_MOD(extack, "VxLAN: FDB: Non-default VNI is not supported"); + return -EOPNOTSUPP; + } + if (vxlan_fdb_info->remote_ifindex) { + NL_SET_ERR_MSG_MOD(extack, "VxLAN: FDB: Local interface is not supported"); + return -EOPNOTSUPP; + } + if (is_multicast_ether_addr(vxlan_fdb_info->eth_addr)) { + NL_SET_ERR_MSG_MOD(extack, "VxLAN: FDB: Multicast MAC addresses not supported"); + return -EOPNOTSUPP; + } + if (vxlan_addr_multicast(&vxlan_fdb_info->remote_ip)) { + NL_SET_ERR_MSG_MOD(extack, "VxLAN: FDB: Multicast destination IP is not supported"); + return -EOPNOTSUPP; + } + + switchdev_work->vxlan_fdb_info = *vxlan_fdb_info; + + return 0; +} + +/* Called under rcu_read_lock() */ +static int mlxsw_sp_switchdev_event(struct notifier_block *unused, + unsigned long event, void *ptr) +{ + struct net_device *dev = switchdev_notifier_info_to_dev(ptr); + struct mlxsw_sp_switchdev_event_work *switchdev_work; + struct switchdev_notifier_fdb_info *fdb_info; + struct switchdev_notifier_info *info = ptr; + struct net_device *br_dev; + int err; + + if (event == SWITCHDEV_PORT_ATTR_SET) { + err = switchdev_handle_port_attr_set(dev, ptr, + mlxsw_sp_port_dev_check, + mlxsw_sp_port_attr_set); + return notifier_from_errno(err); + } + + /* Tunnel devices are not our uppers, so check their master instead */ + br_dev = netdev_master_upper_dev_get_rcu(dev); + if (!br_dev) + return NOTIFY_DONE; + if (!netif_is_bridge_master(br_dev)) + return NOTIFY_DONE; + if (!mlxsw_sp_port_dev_lower_find_rcu(br_dev)) + return NOTIFY_DONE; + + switchdev_work = kzalloc(sizeof(*switchdev_work), GFP_ATOMIC); + if (!switchdev_work) + return NOTIFY_BAD; + + switchdev_work->dev = dev; + switchdev_work->event = event; + + switch (event) { + case SWITCHDEV_FDB_ADD_TO_DEVICE: + case SWITCHDEV_FDB_DEL_TO_DEVICE: + case SWITCHDEV_FDB_ADD_TO_BRIDGE: + case SWITCHDEV_FDB_DEL_TO_BRIDGE: + fdb_info = container_of(info, + struct switchdev_notifier_fdb_info, + info); + INIT_WORK(&switchdev_work->work, + mlxsw_sp_switchdev_bridge_fdb_event_work); + memcpy(&switchdev_work->fdb_info, ptr, + sizeof(switchdev_work->fdb_info)); + switchdev_work->fdb_info.addr = kzalloc(ETH_ALEN, GFP_ATOMIC); + if (!switchdev_work->fdb_info.addr) + goto err_addr_alloc; + ether_addr_copy((u8 *)switchdev_work->fdb_info.addr, + fdb_info->addr); + /* Take a reference on the device. This can be either + * upper device containig mlxsw_sp_port or just a + * mlxsw_sp_port + */ + dev_hold(dev); + break; + case SWITCHDEV_VXLAN_FDB_ADD_TO_DEVICE: + case SWITCHDEV_VXLAN_FDB_DEL_TO_DEVICE: + INIT_WORK(&switchdev_work->work, + mlxsw_sp_switchdev_vxlan_fdb_event_work); + err = mlxsw_sp_switchdev_vxlan_work_prepare(switchdev_work, + info); + if (err) + goto err_vxlan_work_prepare; + dev_hold(dev); + break; + default: + kfree(switchdev_work); + return NOTIFY_DONE; + } + + mlxsw_core_schedule_work(&switchdev_work->work); + + return NOTIFY_DONE; + +err_vxlan_work_prepare: +err_addr_alloc: + kfree(switchdev_work); + return NOTIFY_BAD; +} + +struct notifier_block mlxsw_sp_switchdev_notifier = { + .notifier_call = mlxsw_sp_switchdev_event, +}; + +static int +mlxsw_sp_switchdev_vxlan_vlan_add(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_bridge_device *bridge_device, + const struct net_device *vxlan_dev, u16 vid, + bool flag_untagged, bool flag_pvid, + struct netlink_ext_ack *extack) +{ + struct vxlan_dev *vxlan = netdev_priv(vxlan_dev); + __be32 vni = vxlan->cfg.vni; + struct mlxsw_sp_fid *fid; + u16 old_vid; + int err; + + /* We cannot have the same VLAN as PVID and egress untagged on multiple + * VxLAN devices. Note that we get this notification before the VLAN is + * actually added to the bridge's database, so it is not possible for + * the lookup function to return 'vxlan_dev' + */ + if (flag_untagged && flag_pvid && + mlxsw_sp_bridge_8021q_vxlan_dev_find(bridge_device->dev, vid)) { + NL_SET_ERR_MSG_MOD(extack, "VLAN already mapped to a different VNI"); + return -EINVAL; + } + + if (!netif_running(vxlan_dev)) + return 0; + + /* First case: FID is not associated with this VNI, but the new VLAN + * is both PVID and egress untagged. Need to enable NVE on the FID, if + * it exists + */ + fid = mlxsw_sp_fid_lookup_by_vni(mlxsw_sp, vni); + if (!fid) { + if (!flag_untagged || !flag_pvid) + return 0; + return bridge_device->ops->vxlan_join(bridge_device, vxlan_dev, + vid, extack); + } + + /* Second case: FID is associated with the VNI and the VLAN associated + * with the FID is the same as the notified VLAN. This means the flags + * (PVID / egress untagged) were toggled and that NVE should be + * disabled on the FID + */ + old_vid = mlxsw_sp_fid_8021q_vid(fid); + if (vid == old_vid) { + if (WARN_ON(flag_untagged && flag_pvid)) { + mlxsw_sp_fid_put(fid); + return -EINVAL; + } + mlxsw_sp_bridge_vxlan_leave(mlxsw_sp, vxlan_dev); + mlxsw_sp_fid_put(fid); + return 0; + } + + /* Third case: A new VLAN was configured on the VxLAN device, but this + * VLAN is not PVID, so there is nothing to do. + */ + if (!flag_pvid) { + mlxsw_sp_fid_put(fid); + return 0; + } + + /* Fourth case: Thew new VLAN is PVID, which means the VLAN currently + * mapped to the VNI should be unmapped + */ + mlxsw_sp_bridge_vxlan_leave(mlxsw_sp, vxlan_dev); + mlxsw_sp_fid_put(fid); + + /* Fifth case: The new VLAN is also egress untagged, which means the + * VLAN needs to be mapped to the VNI + */ + if (!flag_untagged) + return 0; + + err = bridge_device->ops->vxlan_join(bridge_device, vxlan_dev, vid, extack); + if (err) + goto err_vxlan_join; + + return 0; + +err_vxlan_join: + bridge_device->ops->vxlan_join(bridge_device, vxlan_dev, old_vid, NULL); + return err; +} + +static void +mlxsw_sp_switchdev_vxlan_vlan_del(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_bridge_device *bridge_device, + const struct net_device *vxlan_dev, u16 vid) +{ + struct vxlan_dev *vxlan = netdev_priv(vxlan_dev); + __be32 vni = vxlan->cfg.vni; + struct mlxsw_sp_fid *fid; + + if (!netif_running(vxlan_dev)) + return; + + fid = mlxsw_sp_fid_lookup_by_vni(mlxsw_sp, vni); + if (!fid) + return; + + /* A different VLAN than the one mapped to the VNI is deleted */ + if (mlxsw_sp_fid_8021q_vid(fid) != vid) + goto out; + + mlxsw_sp_bridge_vxlan_leave(mlxsw_sp, vxlan_dev); + +out: + mlxsw_sp_fid_put(fid); +} + +static int +mlxsw_sp_switchdev_vxlan_vlans_add(struct net_device *vxlan_dev, + struct switchdev_notifier_port_obj_info * + port_obj_info) +{ + struct switchdev_obj_port_vlan *vlan = + SWITCHDEV_OBJ_PORT_VLAN(port_obj_info->obj); + bool flag_untagged = vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED; + bool flag_pvid = vlan->flags & BRIDGE_VLAN_INFO_PVID; + struct mlxsw_sp_bridge_device *bridge_device; + struct netlink_ext_ack *extack; + struct mlxsw_sp *mlxsw_sp; + struct net_device *br_dev; + + extack = switchdev_notifier_info_to_extack(&port_obj_info->info); + br_dev = netdev_master_upper_dev_get(vxlan_dev); + if (!br_dev) + return 0; + + mlxsw_sp = mlxsw_sp_lower_get(br_dev); + if (!mlxsw_sp) + return 0; + + port_obj_info->handled = true; + + bridge_device = mlxsw_sp_bridge_device_find(mlxsw_sp->bridge, br_dev); + if (!bridge_device) + return -EINVAL; + + if (!bridge_device->vlan_enabled) + return 0; + + return mlxsw_sp_switchdev_vxlan_vlan_add(mlxsw_sp, bridge_device, + vxlan_dev, vlan->vid, + flag_untagged, + flag_pvid, extack); +} + +static void +mlxsw_sp_switchdev_vxlan_vlans_del(struct net_device *vxlan_dev, + struct switchdev_notifier_port_obj_info * + port_obj_info) +{ + struct switchdev_obj_port_vlan *vlan = + SWITCHDEV_OBJ_PORT_VLAN(port_obj_info->obj); + struct mlxsw_sp_bridge_device *bridge_device; + struct mlxsw_sp *mlxsw_sp; + struct net_device *br_dev; + + br_dev = netdev_master_upper_dev_get(vxlan_dev); + if (!br_dev) + return; + + mlxsw_sp = mlxsw_sp_lower_get(br_dev); + if (!mlxsw_sp) + return; + + port_obj_info->handled = true; + + bridge_device = mlxsw_sp_bridge_device_find(mlxsw_sp->bridge, br_dev); + if (!bridge_device) + return; + + if (!bridge_device->vlan_enabled) + return; + + mlxsw_sp_switchdev_vxlan_vlan_del(mlxsw_sp, bridge_device, vxlan_dev, + vlan->vid); +} + +static int +mlxsw_sp_switchdev_handle_vxlan_obj_add(struct net_device *vxlan_dev, + struct switchdev_notifier_port_obj_info * + port_obj_info) +{ + int err = 0; + + switch (port_obj_info->obj->id) { + case SWITCHDEV_OBJ_ID_PORT_VLAN: + err = mlxsw_sp_switchdev_vxlan_vlans_add(vxlan_dev, + port_obj_info); + break; + default: + break; + } + + return err; +} + +static void +mlxsw_sp_switchdev_handle_vxlan_obj_del(struct net_device *vxlan_dev, + struct switchdev_notifier_port_obj_info * + port_obj_info) +{ + switch (port_obj_info->obj->id) { + case SWITCHDEV_OBJ_ID_PORT_VLAN: + mlxsw_sp_switchdev_vxlan_vlans_del(vxlan_dev, port_obj_info); + break; + default: + break; + } +} + +static int mlxsw_sp_switchdev_blocking_event(struct notifier_block *unused, + unsigned long event, void *ptr) +{ + struct net_device *dev = switchdev_notifier_info_to_dev(ptr); + int err = 0; + + switch (event) { + case SWITCHDEV_PORT_OBJ_ADD: + if (netif_is_vxlan(dev)) + err = mlxsw_sp_switchdev_handle_vxlan_obj_add(dev, ptr); + else + err = switchdev_handle_port_obj_add(dev, ptr, + mlxsw_sp_port_dev_check, + mlxsw_sp_port_obj_add); + return notifier_from_errno(err); + case SWITCHDEV_PORT_OBJ_DEL: + if (netif_is_vxlan(dev)) + mlxsw_sp_switchdev_handle_vxlan_obj_del(dev, ptr); + else + err = switchdev_handle_port_obj_del(dev, ptr, + mlxsw_sp_port_dev_check, + mlxsw_sp_port_obj_del); + return notifier_from_errno(err); + case SWITCHDEV_PORT_ATTR_SET: + err = switchdev_handle_port_attr_set(dev, ptr, + mlxsw_sp_port_dev_check, + mlxsw_sp_port_attr_set); + return notifier_from_errno(err); + } + + return NOTIFY_DONE; +} + +static struct notifier_block mlxsw_sp_switchdev_blocking_notifier = { + .notifier_call = mlxsw_sp_switchdev_blocking_event, +}; + +u8 +mlxsw_sp_bridge_port_stp_state(struct mlxsw_sp_bridge_port *bridge_port) +{ + return bridge_port->stp_state; +} + +static int mlxsw_sp_fdb_init(struct mlxsw_sp *mlxsw_sp) +{ + struct mlxsw_sp_bridge *bridge = mlxsw_sp->bridge; + struct notifier_block *nb; + int err; + + err = mlxsw_sp_ageing_set(mlxsw_sp, MLXSW_SP_DEFAULT_AGEING_TIME); + if (err) { + dev_err(mlxsw_sp->bus_info->dev, "Failed to set default ageing time\n"); + return err; + } + + err = register_switchdev_notifier(&mlxsw_sp_switchdev_notifier); + if (err) { + dev_err(mlxsw_sp->bus_info->dev, "Failed to register switchdev notifier\n"); + return err; + } + + nb = &mlxsw_sp_switchdev_blocking_notifier; + err = register_switchdev_blocking_notifier(nb); + if (err) { + dev_err(mlxsw_sp->bus_info->dev, "Failed to register switchdev blocking notifier\n"); + goto err_register_switchdev_blocking_notifier; + } + + INIT_DELAYED_WORK(&bridge->fdb_notify.dw, mlxsw_sp_fdb_notify_work); + bridge->fdb_notify.interval = MLXSW_SP_DEFAULT_LEARNING_INTERVAL; + return 0; + +err_register_switchdev_blocking_notifier: + unregister_switchdev_notifier(&mlxsw_sp_switchdev_notifier); + return err; +} + +static void mlxsw_sp_fdb_fini(struct mlxsw_sp *mlxsw_sp) +{ + struct notifier_block *nb; + + cancel_delayed_work_sync(&mlxsw_sp->bridge->fdb_notify.dw); + + nb = &mlxsw_sp_switchdev_blocking_notifier; + unregister_switchdev_blocking_notifier(nb); + + unregister_switchdev_notifier(&mlxsw_sp_switchdev_notifier); +} + +static void mlxsw_sp1_switchdev_init(struct mlxsw_sp *mlxsw_sp) +{ + mlxsw_sp->bridge->bridge_8021ad_ops = &mlxsw_sp1_bridge_8021ad_ops; +} + +const struct mlxsw_sp_switchdev_ops mlxsw_sp1_switchdev_ops = { + .init = mlxsw_sp1_switchdev_init, +}; + +static void mlxsw_sp2_switchdev_init(struct mlxsw_sp *mlxsw_sp) +{ + mlxsw_sp->bridge->bridge_8021ad_ops = &mlxsw_sp2_bridge_8021ad_ops; +} + +const struct mlxsw_sp_switchdev_ops mlxsw_sp2_switchdev_ops = { + .init = mlxsw_sp2_switchdev_init, +}; + +int mlxsw_sp_switchdev_init(struct mlxsw_sp *mlxsw_sp) +{ + struct mlxsw_sp_bridge *bridge; + + bridge = kzalloc(sizeof(*mlxsw_sp->bridge), GFP_KERNEL); + if (!bridge) + return -ENOMEM; + mlxsw_sp->bridge = bridge; + bridge->mlxsw_sp = mlxsw_sp; + + INIT_LIST_HEAD(&mlxsw_sp->bridge->bridges_list); + + bridge->bridge_8021q_ops = &mlxsw_sp_bridge_8021q_ops; + bridge->bridge_8021d_ops = &mlxsw_sp_bridge_8021d_ops; + + mlxsw_sp->switchdev_ops->init(mlxsw_sp); + + return mlxsw_sp_fdb_init(mlxsw_sp); +} + +void mlxsw_sp_switchdev_fini(struct mlxsw_sp *mlxsw_sp) +{ + mlxsw_sp_fdb_fini(mlxsw_sp); + WARN_ON(!list_empty(&mlxsw_sp->bridge->bridges_list)); + kfree(mlxsw_sp->bridge); +} + diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.h new file mode 100644 index 000000000..c218e10bd --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 */ +/* Copyright (c) 2018 Mellanox Technologies. All rights reserved */ + +#include <linux/netdevice.h> + +struct mlxsw_sp_bridge; +struct mlxsw_sp_bridge_port; + +struct mlxsw_sp_bridge_port * +mlxsw_sp_bridge_port_find(struct mlxsw_sp_bridge *bridge, + struct net_device *brport_dev); + +u8 mlxsw_sp_bridge_port_stp_state(struct mlxsw_sp_bridge_port *bridge_port); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c new file mode 100644 index 000000000..f4bfdb6da --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c @@ -0,0 +1,1969 @@ +// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 +/* Copyright (c) 2019 Mellanox Technologies. All rights reserved */ + +#include <linux/bitops.h> +#include <linux/kernel.h> +#include <linux/netlink.h> +#include <net/devlink.h> +#include <uapi/linux/devlink.h> + +#include "core.h" +#include "reg.h" +#include "spectrum.h" +#include "spectrum_trap.h" + +struct mlxsw_sp_trap_policer_item { + struct devlink_trap_policer policer; + u16 hw_id; +}; + +struct mlxsw_sp_trap_group_item { + struct devlink_trap_group group; + u16 hw_group_id; + u8 priority; + u8 fixed_policer:1; /* Whether policer binding can change */ +}; + +#define MLXSW_SP_TRAP_LISTENERS_MAX 3 + +struct mlxsw_sp_trap_item { + struct devlink_trap trap; + struct mlxsw_listener listeners_arr[MLXSW_SP_TRAP_LISTENERS_MAX]; + u8 is_source:1; +}; + +/* All driver-specific traps must be documented in + * Documentation/networking/devlink/mlxsw.rst + */ +enum { + DEVLINK_MLXSW_TRAP_ID_BASE = DEVLINK_TRAP_GENERIC_ID_MAX, + DEVLINK_MLXSW_TRAP_ID_IRIF_DISABLED, + DEVLINK_MLXSW_TRAP_ID_ERIF_DISABLED, +}; + +#define DEVLINK_MLXSW_TRAP_NAME_IRIF_DISABLED \ + "irif_disabled" +#define DEVLINK_MLXSW_TRAP_NAME_ERIF_DISABLED \ + "erif_disabled" + +#define MLXSW_SP_TRAP_METADATA DEVLINK_TRAP_METADATA_TYPE_F_IN_PORT + +enum { + /* Packet was mirrored from ingress. */ + MLXSW_SP_MIRROR_REASON_INGRESS = 1, + /* Packet was mirrored from policy engine. */ + MLXSW_SP_MIRROR_REASON_POLICY_ENGINE = 2, + /* Packet was early dropped. */ + MLXSW_SP_MIRROR_REASON_INGRESS_WRED = 9, + /* Packet was mirrored from egress. */ + MLXSW_SP_MIRROR_REASON_EGRESS = 14, +}; + +static int mlxsw_sp_rx_listener(struct mlxsw_sp *mlxsw_sp, struct sk_buff *skb, + u16 local_port, + struct mlxsw_sp_port *mlxsw_sp_port) +{ + struct mlxsw_sp_port_pcpu_stats *pcpu_stats; + + if (unlikely(!mlxsw_sp_port)) { + dev_warn_ratelimited(mlxsw_sp->bus_info->dev, "Port %d: skb received for non-existent port\n", + local_port); + kfree_skb(skb); + return -EINVAL; + } + + skb->dev = mlxsw_sp_port->dev; + + pcpu_stats = this_cpu_ptr(mlxsw_sp_port->pcpu_stats); + u64_stats_update_begin(&pcpu_stats->syncp); + pcpu_stats->rx_packets++; + pcpu_stats->rx_bytes += skb->len; + u64_stats_update_end(&pcpu_stats->syncp); + + skb->protocol = eth_type_trans(skb, skb->dev); + + return 0; +} + +static void mlxsw_sp_rx_drop_listener(struct sk_buff *skb, u16 local_port, + void *trap_ctx) +{ + struct devlink_port *in_devlink_port; + struct mlxsw_sp_port *mlxsw_sp_port; + struct mlxsw_sp *mlxsw_sp; + struct devlink *devlink; + int err; + + mlxsw_sp = devlink_trap_ctx_priv(trap_ctx); + mlxsw_sp_port = mlxsw_sp->ports[local_port]; + + err = mlxsw_sp_rx_listener(mlxsw_sp, skb, local_port, mlxsw_sp_port); + if (err) + return; + + devlink = priv_to_devlink(mlxsw_sp->core); + in_devlink_port = mlxsw_core_port_devlink_port_get(mlxsw_sp->core, + local_port); + skb_push(skb, ETH_HLEN); + devlink_trap_report(devlink, skb, trap_ctx, in_devlink_port, NULL); + consume_skb(skb); +} + +static void mlxsw_sp_rx_acl_drop_listener(struct sk_buff *skb, u16 local_port, + void *trap_ctx) +{ + u32 cookie_index = mlxsw_skb_cb(skb)->rx_md_info.cookie_index; + const struct flow_action_cookie *fa_cookie; + struct devlink_port *in_devlink_port; + struct mlxsw_sp_port *mlxsw_sp_port; + struct mlxsw_sp *mlxsw_sp; + struct devlink *devlink; + int err; + + mlxsw_sp = devlink_trap_ctx_priv(trap_ctx); + mlxsw_sp_port = mlxsw_sp->ports[local_port]; + + err = mlxsw_sp_rx_listener(mlxsw_sp, skb, local_port, mlxsw_sp_port); + if (err) + return; + + devlink = priv_to_devlink(mlxsw_sp->core); + in_devlink_port = mlxsw_core_port_devlink_port_get(mlxsw_sp->core, + local_port); + skb_push(skb, ETH_HLEN); + rcu_read_lock(); + fa_cookie = mlxsw_sp_acl_act_cookie_lookup(mlxsw_sp, cookie_index); + devlink_trap_report(devlink, skb, trap_ctx, in_devlink_port, fa_cookie); + rcu_read_unlock(); + consume_skb(skb); +} + +static int __mlxsw_sp_rx_no_mark_listener(struct sk_buff *skb, u16 local_port, + void *trap_ctx) +{ + struct devlink_port *in_devlink_port; + struct mlxsw_sp_port *mlxsw_sp_port; + struct mlxsw_sp *mlxsw_sp; + struct devlink *devlink; + int err; + + mlxsw_sp = devlink_trap_ctx_priv(trap_ctx); + mlxsw_sp_port = mlxsw_sp->ports[local_port]; + + err = mlxsw_sp_rx_listener(mlxsw_sp, skb, local_port, mlxsw_sp_port); + if (err) + return err; + + devlink = priv_to_devlink(mlxsw_sp->core); + in_devlink_port = mlxsw_core_port_devlink_port_get(mlxsw_sp->core, + local_port); + skb_push(skb, ETH_HLEN); + devlink_trap_report(devlink, skb, trap_ctx, in_devlink_port, NULL); + skb_pull(skb, ETH_HLEN); + + return 0; +} + +static void mlxsw_sp_rx_no_mark_listener(struct sk_buff *skb, u16 local_port, + void *trap_ctx) +{ + int err; + + err = __mlxsw_sp_rx_no_mark_listener(skb, local_port, trap_ctx); + if (err) + return; + + netif_receive_skb(skb); +} + +static void mlxsw_sp_rx_mark_listener(struct sk_buff *skb, u16 local_port, + void *trap_ctx) +{ + skb->offload_fwd_mark = 1; + mlxsw_sp_rx_no_mark_listener(skb, local_port, trap_ctx); +} + +static void mlxsw_sp_rx_l3_mark_listener(struct sk_buff *skb, u16 local_port, + void *trap_ctx) +{ + skb->offload_l3_fwd_mark = 1; + skb->offload_fwd_mark = 1; + mlxsw_sp_rx_no_mark_listener(skb, local_port, trap_ctx); +} + +static void mlxsw_sp_rx_ptp_listener(struct sk_buff *skb, u16 local_port, + void *trap_ctx) +{ + struct mlxsw_sp *mlxsw_sp = devlink_trap_ctx_priv(trap_ctx); + int err; + + err = __mlxsw_sp_rx_no_mark_listener(skb, local_port, trap_ctx); + if (err) + return; + + /* The PTP handler expects skb->data to point to the start of the + * Ethernet header. + */ + skb_push(skb, ETH_HLEN); + mlxsw_sp_ptp_receive(mlxsw_sp, skb, local_port); +} + +static struct mlxsw_sp_port * +mlxsw_sp_sample_tx_port_get(struct mlxsw_sp *mlxsw_sp, + const struct mlxsw_rx_md_info *rx_md_info) +{ + u16 local_port; + + if (!rx_md_info->tx_port_valid) + return NULL; + + if (rx_md_info->tx_port_is_lag) + local_port = mlxsw_core_lag_mapping_get(mlxsw_sp->core, + rx_md_info->tx_lag_id, + rx_md_info->tx_lag_port_index); + else + local_port = rx_md_info->tx_sys_port; + + if (local_port >= mlxsw_core_max_ports(mlxsw_sp->core)) + return NULL; + + return mlxsw_sp->ports[local_port]; +} + +/* The latency units are determined according to MOGCR.mirror_latency_units. It + * defaults to 64 nanoseconds. + */ +#define MLXSW_SP_MIRROR_LATENCY_SHIFT 6 + +static void mlxsw_sp_psample_md_init(struct mlxsw_sp *mlxsw_sp, + struct psample_metadata *md, + struct sk_buff *skb, int in_ifindex, + bool truncate, u32 trunc_size) +{ + struct mlxsw_rx_md_info *rx_md_info = &mlxsw_skb_cb(skb)->rx_md_info; + struct mlxsw_sp_port *mlxsw_sp_port; + + md->trunc_size = truncate ? trunc_size : skb->len; + md->in_ifindex = in_ifindex; + mlxsw_sp_port = mlxsw_sp_sample_tx_port_get(mlxsw_sp, rx_md_info); + md->out_ifindex = mlxsw_sp_port && mlxsw_sp_port->dev ? + mlxsw_sp_port->dev->ifindex : 0; + md->out_tc_valid = rx_md_info->tx_tc_valid; + md->out_tc = rx_md_info->tx_tc; + md->out_tc_occ_valid = rx_md_info->tx_congestion_valid; + md->out_tc_occ = rx_md_info->tx_congestion; + md->latency_valid = rx_md_info->latency_valid; + md->latency = rx_md_info->latency; + md->latency <<= MLXSW_SP_MIRROR_LATENCY_SHIFT; +} + +static void mlxsw_sp_rx_sample_listener(struct sk_buff *skb, u16 local_port, + void *trap_ctx) +{ + struct mlxsw_sp *mlxsw_sp = devlink_trap_ctx_priv(trap_ctx); + struct mlxsw_sp_sample_trigger trigger; + struct mlxsw_sp_sample_params *params; + struct mlxsw_sp_port *mlxsw_sp_port; + struct psample_metadata md = {}; + int err; + + err = __mlxsw_sp_rx_no_mark_listener(skb, local_port, trap_ctx); + if (err) + return; + + mlxsw_sp_port = mlxsw_sp->ports[local_port]; + if (!mlxsw_sp_port) + goto out; + + trigger.type = MLXSW_SP_SAMPLE_TRIGGER_TYPE_INGRESS; + trigger.local_port = local_port; + params = mlxsw_sp_sample_trigger_params_lookup(mlxsw_sp, &trigger); + if (!params) + goto out; + + /* The psample module expects skb->data to point to the start of the + * Ethernet header. + */ + skb_push(skb, ETH_HLEN); + mlxsw_sp_psample_md_init(mlxsw_sp, &md, skb, + mlxsw_sp_port->dev->ifindex, params->truncate, + params->trunc_size); + psample_sample_packet(params->psample_group, skb, params->rate, &md); +out: + consume_skb(skb); +} + +static void mlxsw_sp_rx_sample_tx_listener(struct sk_buff *skb, u16 local_port, + void *trap_ctx) +{ + struct mlxsw_rx_md_info *rx_md_info = &mlxsw_skb_cb(skb)->rx_md_info; + struct mlxsw_sp *mlxsw_sp = devlink_trap_ctx_priv(trap_ctx); + struct mlxsw_sp_port *mlxsw_sp_port, *mlxsw_sp_port_tx; + struct mlxsw_sp_sample_trigger trigger; + struct mlxsw_sp_sample_params *params; + struct psample_metadata md = {}; + int err; + + /* Locally generated packets are not reported from the policy engine + * trigger, so do not report them from the egress trigger as well. + */ + if (local_port == MLXSW_PORT_CPU_PORT) + goto out; + + err = __mlxsw_sp_rx_no_mark_listener(skb, local_port, trap_ctx); + if (err) + return; + + mlxsw_sp_port = mlxsw_sp->ports[local_port]; + if (!mlxsw_sp_port) + goto out; + + /* Packet was sampled from Tx, so we need to retrieve the sample + * parameters based on the Tx port and not the Rx port. + */ + mlxsw_sp_port_tx = mlxsw_sp_sample_tx_port_get(mlxsw_sp, rx_md_info); + if (!mlxsw_sp_port_tx) + goto out; + + trigger.type = MLXSW_SP_SAMPLE_TRIGGER_TYPE_EGRESS; + trigger.local_port = mlxsw_sp_port_tx->local_port; + params = mlxsw_sp_sample_trigger_params_lookup(mlxsw_sp, &trigger); + if (!params) + goto out; + + /* The psample module expects skb->data to point to the start of the + * Ethernet header. + */ + skb_push(skb, ETH_HLEN); + mlxsw_sp_psample_md_init(mlxsw_sp, &md, skb, + mlxsw_sp_port->dev->ifindex, params->truncate, + params->trunc_size); + psample_sample_packet(params->psample_group, skb, params->rate, &md); +out: + consume_skb(skb); +} + +static void mlxsw_sp_rx_sample_acl_listener(struct sk_buff *skb, u16 local_port, + void *trap_ctx) +{ + struct mlxsw_sp *mlxsw_sp = devlink_trap_ctx_priv(trap_ctx); + struct mlxsw_sp_sample_trigger trigger = { + .type = MLXSW_SP_SAMPLE_TRIGGER_TYPE_POLICY_ENGINE, + }; + struct mlxsw_sp_sample_params *params; + struct mlxsw_sp_port *mlxsw_sp_port; + struct psample_metadata md = {}; + int err; + + err = __mlxsw_sp_rx_no_mark_listener(skb, local_port, trap_ctx); + if (err) + return; + + mlxsw_sp_port = mlxsw_sp->ports[local_port]; + if (!mlxsw_sp_port) + goto out; + + params = mlxsw_sp_sample_trigger_params_lookup(mlxsw_sp, &trigger); + if (!params) + goto out; + + /* The psample module expects skb->data to point to the start of the + * Ethernet header. + */ + skb_push(skb, ETH_HLEN); + mlxsw_sp_psample_md_init(mlxsw_sp, &md, skb, + mlxsw_sp_port->dev->ifindex, params->truncate, + params->trunc_size); + psample_sample_packet(params->psample_group, skb, params->rate, &md); +out: + consume_skb(skb); +} + +#define MLXSW_SP_TRAP_DROP(_id, _group_id) \ + DEVLINK_TRAP_GENERIC(DROP, DROP, _id, \ + DEVLINK_TRAP_GROUP_GENERIC_ID_##_group_id, \ + MLXSW_SP_TRAP_METADATA) + +#define MLXSW_SP_TRAP_DROP_EXT(_id, _group_id, _metadata) \ + DEVLINK_TRAP_GENERIC(DROP, DROP, _id, \ + DEVLINK_TRAP_GROUP_GENERIC_ID_##_group_id, \ + MLXSW_SP_TRAP_METADATA | (_metadata)) + +#define MLXSW_SP_TRAP_BUFFER_DROP(_id) \ + DEVLINK_TRAP_GENERIC(DROP, TRAP, _id, \ + DEVLINK_TRAP_GROUP_GENERIC_ID_BUFFER_DROPS, \ + MLXSW_SP_TRAP_METADATA) + +#define MLXSW_SP_TRAP_DRIVER_DROP(_id, _group_id) \ + DEVLINK_TRAP_DRIVER(DROP, DROP, DEVLINK_MLXSW_TRAP_ID_##_id, \ + DEVLINK_MLXSW_TRAP_NAME_##_id, \ + DEVLINK_TRAP_GROUP_GENERIC_ID_##_group_id, \ + MLXSW_SP_TRAP_METADATA) + +#define MLXSW_SP_TRAP_EXCEPTION(_id, _group_id) \ + DEVLINK_TRAP_GENERIC(EXCEPTION, TRAP, _id, \ + DEVLINK_TRAP_GROUP_GENERIC_ID_##_group_id, \ + MLXSW_SP_TRAP_METADATA) + +#define MLXSW_SP_TRAP_CONTROL(_id, _group_id, _action) \ + DEVLINK_TRAP_GENERIC(CONTROL, _action, _id, \ + DEVLINK_TRAP_GROUP_GENERIC_ID_##_group_id, \ + MLXSW_SP_TRAP_METADATA) + +#define MLXSW_SP_RXL_DISCARD(_id, _group_id) \ + MLXSW_RXL_DIS(mlxsw_sp_rx_drop_listener, DISCARD_##_id, \ + TRAP_EXCEPTION_TO_CPU, false, SP_##_group_id, \ + SET_FW_DEFAULT, SP_##_group_id) + +#define MLXSW_SP_RXL_ACL_DISCARD(_id, _en_group_id, _dis_group_id) \ + MLXSW_RXL_DIS(mlxsw_sp_rx_acl_drop_listener, DISCARD_##_id, \ + TRAP_EXCEPTION_TO_CPU, false, SP_##_en_group_id, \ + SET_FW_DEFAULT, SP_##_dis_group_id) + +#define MLXSW_SP_RXL_BUFFER_DISCARD(_mirror_reason) \ + MLXSW_RXL_MIRROR(mlxsw_sp_rx_drop_listener, 0, SP_BUFFER_DISCARDS, \ + MLXSW_SP_MIRROR_REASON_##_mirror_reason) + +#define MLXSW_SP_RXL_EXCEPTION(_id, _group_id, _action) \ + MLXSW_RXL(mlxsw_sp_rx_mark_listener, _id, \ + _action, false, SP_##_group_id, SET_FW_DEFAULT) + +#define MLXSW_SP_RXL_NO_MARK(_id, _group_id, _action, _is_ctrl) \ + MLXSW_RXL(mlxsw_sp_rx_no_mark_listener, _id, _action, \ + _is_ctrl, SP_##_group_id, DISCARD) + +#define MLXSW_SP_RXL_MARK(_id, _group_id, _action, _is_ctrl) \ + MLXSW_RXL(mlxsw_sp_rx_mark_listener, _id, _action, _is_ctrl, \ + SP_##_group_id, DISCARD) + +#define MLXSW_SP_RXL_L3_MARK(_id, _group_id, _action, _is_ctrl) \ + MLXSW_RXL(mlxsw_sp_rx_l3_mark_listener, _id, _action, _is_ctrl, \ + SP_##_group_id, DISCARD) + +#define MLXSW_SP_TRAP_POLICER(_id, _rate, _burst) \ + DEVLINK_TRAP_POLICER(_id, _rate, _burst, \ + MLXSW_REG_QPCR_HIGHEST_CIR, \ + MLXSW_REG_QPCR_LOWEST_CIR, \ + 1 << MLXSW_REG_QPCR_HIGHEST_CBS, \ + 1 << MLXSW_REG_QPCR_LOWEST_CBS) + +/* Ordered by policer identifier */ +static const struct mlxsw_sp_trap_policer_item +mlxsw_sp_trap_policer_items_arr[] = { + { + .policer = MLXSW_SP_TRAP_POLICER(1, 10 * 1024, 4096), + }, + { + .policer = MLXSW_SP_TRAP_POLICER(2, 128, 128), + }, + { + .policer = MLXSW_SP_TRAP_POLICER(3, 128, 128), + }, + { + .policer = MLXSW_SP_TRAP_POLICER(4, 128, 128), + }, + { + .policer = MLXSW_SP_TRAP_POLICER(5, 16 * 1024, 8192), + }, + { + .policer = MLXSW_SP_TRAP_POLICER(6, 128, 128), + }, + { + .policer = MLXSW_SP_TRAP_POLICER(7, 1024, 512), + }, + { + .policer = MLXSW_SP_TRAP_POLICER(8, 20 * 1024, 8192), + }, + { + .policer = MLXSW_SP_TRAP_POLICER(9, 128, 128), + }, + { + .policer = MLXSW_SP_TRAP_POLICER(10, 1024, 512), + }, + { + .policer = MLXSW_SP_TRAP_POLICER(11, 256, 128), + }, + { + .policer = MLXSW_SP_TRAP_POLICER(12, 128, 128), + }, + { + .policer = MLXSW_SP_TRAP_POLICER(13, 128, 128), + }, + { + .policer = MLXSW_SP_TRAP_POLICER(14, 1024, 512), + }, + { + .policer = MLXSW_SP_TRAP_POLICER(15, 1024, 512), + }, + { + .policer = MLXSW_SP_TRAP_POLICER(16, 24 * 1024, 16384), + }, + { + .policer = MLXSW_SP_TRAP_POLICER(17, 19 * 1024, 8192), + }, + { + .policer = MLXSW_SP_TRAP_POLICER(18, 1024, 512), + }, + { + .policer = MLXSW_SP_TRAP_POLICER(19, 1024, 512), + }, + { + .policer = MLXSW_SP_TRAP_POLICER(20, 10240, 4096), + }, +}; + +static const struct mlxsw_sp_trap_group_item mlxsw_sp_trap_group_items_arr[] = { + { + .group = DEVLINK_TRAP_GROUP_GENERIC(L2_DROPS, 1), + .hw_group_id = MLXSW_REG_HTGT_TRAP_GROUP_SP_L2_DISCARDS, + .priority = 0, + }, + { + .group = DEVLINK_TRAP_GROUP_GENERIC(L3_DROPS, 1), + .hw_group_id = MLXSW_REG_HTGT_TRAP_GROUP_SP_L3_DISCARDS, + .priority = 0, + }, + { + .group = DEVLINK_TRAP_GROUP_GENERIC(L3_EXCEPTIONS, 1), + .hw_group_id = MLXSW_REG_HTGT_TRAP_GROUP_SP_L3_EXCEPTIONS, + .priority = 2, + }, + { + .group = DEVLINK_TRAP_GROUP_GENERIC(TUNNEL_DROPS, 1), + .hw_group_id = MLXSW_REG_HTGT_TRAP_GROUP_SP_TUNNEL_DISCARDS, + .priority = 0, + }, + { + .group = DEVLINK_TRAP_GROUP_GENERIC(ACL_DROPS, 1), + .hw_group_id = MLXSW_REG_HTGT_TRAP_GROUP_SP_ACL_DISCARDS, + .priority = 0, + }, + { + .group = DEVLINK_TRAP_GROUP_GENERIC(STP, 2), + .hw_group_id = MLXSW_REG_HTGT_TRAP_GROUP_SP_STP, + .priority = 5, + }, + { + .group = DEVLINK_TRAP_GROUP_GENERIC(LACP, 3), + .hw_group_id = MLXSW_REG_HTGT_TRAP_GROUP_SP_LACP, + .priority = 5, + }, + { + .group = DEVLINK_TRAP_GROUP_GENERIC(LLDP, 4), + .hw_group_id = MLXSW_REG_HTGT_TRAP_GROUP_SP_LLDP, + .priority = 5, + }, + { + .group = DEVLINK_TRAP_GROUP_GENERIC(MC_SNOOPING, 5), + .hw_group_id = MLXSW_REG_HTGT_TRAP_GROUP_SP_MC_SNOOPING, + .priority = 3, + }, + { + .group = DEVLINK_TRAP_GROUP_GENERIC(DHCP, 6), + .hw_group_id = MLXSW_REG_HTGT_TRAP_GROUP_SP_DHCP, + .priority = 2, + }, + { + .group = DEVLINK_TRAP_GROUP_GENERIC(NEIGH_DISCOVERY, 7), + .hw_group_id = MLXSW_REG_HTGT_TRAP_GROUP_SP_NEIGH_DISCOVERY, + .priority = 2, + }, + { + .group = DEVLINK_TRAP_GROUP_GENERIC(BFD, 8), + .hw_group_id = MLXSW_REG_HTGT_TRAP_GROUP_SP_BFD, + .priority = 5, + }, + { + .group = DEVLINK_TRAP_GROUP_GENERIC(OSPF, 9), + .hw_group_id = MLXSW_REG_HTGT_TRAP_GROUP_SP_OSPF, + .priority = 5, + }, + { + .group = DEVLINK_TRAP_GROUP_GENERIC(BGP, 10), + .hw_group_id = MLXSW_REG_HTGT_TRAP_GROUP_SP_BGP, + .priority = 4, + }, + { + .group = DEVLINK_TRAP_GROUP_GENERIC(VRRP, 11), + .hw_group_id = MLXSW_REG_HTGT_TRAP_GROUP_SP_VRRP, + .priority = 5, + }, + { + .group = DEVLINK_TRAP_GROUP_GENERIC(PIM, 12), + .hw_group_id = MLXSW_REG_HTGT_TRAP_GROUP_SP_PIM, + .priority = 5, + }, + { + .group = DEVLINK_TRAP_GROUP_GENERIC(UC_LB, 13), + .hw_group_id = MLXSW_REG_HTGT_TRAP_GROUP_SP_LBERROR, + .priority = 0, + }, + { + .group = DEVLINK_TRAP_GROUP_GENERIC(LOCAL_DELIVERY, 14), + .hw_group_id = MLXSW_REG_HTGT_TRAP_GROUP_SP_IP2ME, + .priority = 2, + }, + { + .group = DEVLINK_TRAP_GROUP_GENERIC(EXTERNAL_DELIVERY, 19), + .hw_group_id = MLXSW_REG_HTGT_TRAP_GROUP_SP_EXTERNAL_ROUTE, + .priority = 1, + }, + { + .group = DEVLINK_TRAP_GROUP_GENERIC(IPV6, 15), + .hw_group_id = MLXSW_REG_HTGT_TRAP_GROUP_SP_IPV6, + .priority = 2, + }, + { + .group = DEVLINK_TRAP_GROUP_GENERIC(PTP_EVENT, 16), + .hw_group_id = MLXSW_REG_HTGT_TRAP_GROUP_SP_PTP0, + .priority = 5, + }, + { + .group = DEVLINK_TRAP_GROUP_GENERIC(PTP_GENERAL, 17), + .hw_group_id = MLXSW_REG_HTGT_TRAP_GROUP_SP_PTP1, + .priority = 2, + }, + { + .group = DEVLINK_TRAP_GROUP_GENERIC(ACL_TRAP, 18), + .hw_group_id = MLXSW_REG_HTGT_TRAP_GROUP_SP_FLOW_LOGGING, + .priority = 4, + }, +}; + +static const struct mlxsw_sp_trap_item mlxsw_sp_trap_items_arr[] = { + { + .trap = MLXSW_SP_TRAP_DROP(SMAC_MC, L2_DROPS), + .listeners_arr = { + MLXSW_SP_RXL_DISCARD(ING_PACKET_SMAC_MC, L2_DISCARDS), + }, + }, + { + .trap = MLXSW_SP_TRAP_DROP(VLAN_TAG_MISMATCH, L2_DROPS), + .listeners_arr = { + MLXSW_SP_RXL_DISCARD(ING_SWITCH_VTAG_ALLOW, + L2_DISCARDS), + }, + }, + { + .trap = MLXSW_SP_TRAP_DROP(INGRESS_VLAN_FILTER, L2_DROPS), + .listeners_arr = { + MLXSW_SP_RXL_DISCARD(ING_SWITCH_VLAN, L2_DISCARDS), + }, + }, + { + .trap = MLXSW_SP_TRAP_DROP(INGRESS_STP_FILTER, L2_DROPS), + .listeners_arr = { + MLXSW_SP_RXL_DISCARD(ING_SWITCH_STP, L2_DISCARDS), + }, + }, + { + .trap = MLXSW_SP_TRAP_DROP(EMPTY_TX_LIST, L2_DROPS), + .listeners_arr = { + MLXSW_SP_RXL_DISCARD(LOOKUP_SWITCH_UC, L2_DISCARDS), + MLXSW_SP_RXL_DISCARD(LOOKUP_SWITCH_MC_NULL, L2_DISCARDS), + }, + }, + { + .trap = MLXSW_SP_TRAP_DROP(PORT_LOOPBACK_FILTER, L2_DROPS), + .listeners_arr = { + MLXSW_SP_RXL_DISCARD(LOOKUP_SWITCH_LB, L2_DISCARDS), + }, + }, + { + .trap = MLXSW_SP_TRAP_DROP(BLACKHOLE_ROUTE, L3_DROPS), + .listeners_arr = { + MLXSW_SP_RXL_DISCARD(ROUTER2, L3_DISCARDS), + }, + }, + { + .trap = MLXSW_SP_TRAP_DROP(NON_IP_PACKET, L3_DROPS), + .listeners_arr = { + MLXSW_SP_RXL_DISCARD(ING_ROUTER_NON_IP_PACKET, + L3_DISCARDS), + }, + }, + { + .trap = MLXSW_SP_TRAP_DROP(UC_DIP_MC_DMAC, L3_DROPS), + .listeners_arr = { + MLXSW_SP_RXL_DISCARD(ING_ROUTER_UC_DIP_MC_DMAC, + L3_DISCARDS), + }, + }, + { + .trap = MLXSW_SP_TRAP_DROP(DIP_LB, L3_DROPS), + .listeners_arr = { + MLXSW_SP_RXL_DISCARD(ING_ROUTER_DIP_LB, L3_DISCARDS), + }, + }, + { + .trap = MLXSW_SP_TRAP_DROP(SIP_MC, L3_DROPS), + .listeners_arr = { + MLXSW_SP_RXL_DISCARD(ING_ROUTER_SIP_MC, L3_DISCARDS), + }, + }, + { + .trap = MLXSW_SP_TRAP_DROP(SIP_LB, L3_DROPS), + .listeners_arr = { + MLXSW_SP_RXL_DISCARD(ING_ROUTER_SIP_LB, L3_DISCARDS), + }, + }, + { + .trap = MLXSW_SP_TRAP_DROP(CORRUPTED_IP_HDR, L3_DROPS), + .listeners_arr = { + MLXSW_SP_RXL_DISCARD(ING_ROUTER_CORRUPTED_IP_HDR, + L3_DISCARDS), + }, + }, + { + .trap = MLXSW_SP_TRAP_DROP(IPV4_SIP_BC, L3_DROPS), + .listeners_arr = { + MLXSW_SP_RXL_DISCARD(ING_ROUTER_IPV4_SIP_BC, + L3_DISCARDS), + }, + }, + { + .trap = MLXSW_SP_TRAP_DROP(IPV6_MC_DIP_RESERVED_SCOPE, + L3_DROPS), + .listeners_arr = { + MLXSW_SP_RXL_DISCARD(IPV6_MC_DIP_RESERVED_SCOPE, + L3_DISCARDS), + }, + }, + { + .trap = MLXSW_SP_TRAP_DROP(IPV6_MC_DIP_INTERFACE_LOCAL_SCOPE, + L3_DROPS), + .listeners_arr = { + MLXSW_SP_RXL_DISCARD(IPV6_MC_DIP_INTERFACE_LOCAL_SCOPE, + L3_DISCARDS), + }, + }, + { + .trap = MLXSW_SP_TRAP_EXCEPTION(MTU_ERROR, L3_EXCEPTIONS), + .listeners_arr = { + MLXSW_SP_RXL_EXCEPTION(MTUERROR, L3_EXCEPTIONS, + TRAP_TO_CPU), + }, + }, + { + .trap = MLXSW_SP_TRAP_EXCEPTION(TTL_ERROR, L3_EXCEPTIONS), + .listeners_arr = { + MLXSW_SP_RXL_EXCEPTION(TTLERROR, L3_EXCEPTIONS, + TRAP_TO_CPU), + }, + }, + { + .trap = MLXSW_SP_TRAP_EXCEPTION(RPF, L3_EXCEPTIONS), + .listeners_arr = { + MLXSW_SP_RXL_EXCEPTION(RPF, L3_EXCEPTIONS, TRAP_TO_CPU), + }, + }, + { + .trap = MLXSW_SP_TRAP_EXCEPTION(REJECT_ROUTE, L3_EXCEPTIONS), + .listeners_arr = { + MLXSW_SP_RXL_EXCEPTION(RTR_INGRESS1, L3_EXCEPTIONS, + TRAP_TO_CPU), + }, + }, + { + .trap = MLXSW_SP_TRAP_EXCEPTION(UNRESOLVED_NEIGH, + L3_EXCEPTIONS), + .listeners_arr = { + MLXSW_SP_RXL_EXCEPTION(HOST_MISS_IPV4, L3_EXCEPTIONS, + TRAP_TO_CPU), + MLXSW_SP_RXL_EXCEPTION(HOST_MISS_IPV6, L3_EXCEPTIONS, + TRAP_TO_CPU), + MLXSW_SP_RXL_EXCEPTION(RTR_EGRESS0, L3_EXCEPTIONS, + TRAP_EXCEPTION_TO_CPU), + }, + }, + { + .trap = MLXSW_SP_TRAP_EXCEPTION(IPV4_LPM_UNICAST_MISS, + L3_EXCEPTIONS), + .listeners_arr = { + MLXSW_SP_RXL_EXCEPTION(DISCARD_ROUTER_LPM4, + L3_EXCEPTIONS, + TRAP_EXCEPTION_TO_CPU), + }, + }, + { + .trap = MLXSW_SP_TRAP_EXCEPTION(IPV6_LPM_UNICAST_MISS, + L3_EXCEPTIONS), + .listeners_arr = { + MLXSW_SP_RXL_EXCEPTION(DISCARD_ROUTER_LPM6, + L3_EXCEPTIONS, + TRAP_EXCEPTION_TO_CPU), + }, + }, + { + .trap = MLXSW_SP_TRAP_DRIVER_DROP(IRIF_DISABLED, L3_DROPS), + .listeners_arr = { + MLXSW_SP_RXL_DISCARD(ROUTER_IRIF_EN, L3_DISCARDS), + }, + }, + { + .trap = MLXSW_SP_TRAP_DRIVER_DROP(ERIF_DISABLED, L3_DROPS), + .listeners_arr = { + MLXSW_SP_RXL_DISCARD(ROUTER_ERIF_EN, L3_DISCARDS), + }, + }, + { + .trap = MLXSW_SP_TRAP_DROP(NON_ROUTABLE, L3_DROPS), + .listeners_arr = { + MLXSW_SP_RXL_DISCARD(NON_ROUTABLE, L3_DISCARDS), + }, + }, + { + .trap = MLXSW_SP_TRAP_EXCEPTION(DECAP_ERROR, TUNNEL_DROPS), + .listeners_arr = { + MLXSW_SP_RXL_EXCEPTION(DECAP_ECN0, TUNNEL_DISCARDS, + TRAP_EXCEPTION_TO_CPU), + MLXSW_SP_RXL_EXCEPTION(IPIP_DECAP_ERROR, + TUNNEL_DISCARDS, + TRAP_EXCEPTION_TO_CPU), + MLXSW_SP_RXL_EXCEPTION(DISCARD_DEC_PKT, TUNNEL_DISCARDS, + TRAP_EXCEPTION_TO_CPU), + }, + }, + { + .trap = MLXSW_SP_TRAP_DROP(OVERLAY_SMAC_MC, TUNNEL_DROPS), + .listeners_arr = { + MLXSW_SP_RXL_DISCARD(OVERLAY_SMAC_MC, TUNNEL_DISCARDS), + }, + }, + { + .trap = MLXSW_SP_TRAP_DROP_EXT(INGRESS_FLOW_ACTION_DROP, + ACL_DROPS, + DEVLINK_TRAP_METADATA_TYPE_F_FA_COOKIE), + .listeners_arr = { + MLXSW_SP_RXL_ACL_DISCARD(INGRESS_ACL, ACL_DISCARDS, + DUMMY), + }, + }, + { + .trap = MLXSW_SP_TRAP_DROP_EXT(EGRESS_FLOW_ACTION_DROP, + ACL_DROPS, + DEVLINK_TRAP_METADATA_TYPE_F_FA_COOKIE), + .listeners_arr = { + MLXSW_SP_RXL_ACL_DISCARD(EGRESS_ACL, ACL_DISCARDS, + DUMMY), + }, + }, + { + .trap = MLXSW_SP_TRAP_CONTROL(STP, STP, TRAP), + .listeners_arr = { + MLXSW_SP_RXL_NO_MARK(STP, STP, TRAP_TO_CPU, true), + }, + }, + { + .trap = MLXSW_SP_TRAP_CONTROL(LACP, LACP, TRAP), + .listeners_arr = { + MLXSW_SP_RXL_NO_MARK(LACP, LACP, TRAP_TO_CPU, true), + }, + }, + { + .trap = MLXSW_SP_TRAP_CONTROL(LLDP, LLDP, TRAP), + .listeners_arr = { + MLXSW_RXL(mlxsw_sp_rx_ptp_listener, LLDP, TRAP_TO_CPU, + true, SP_LLDP, DISCARD), + }, + }, + { + .trap = MLXSW_SP_TRAP_CONTROL(IGMP_QUERY, MC_SNOOPING, MIRROR), + .listeners_arr = { + MLXSW_SP_RXL_MARK(IGMP_QUERY, MC_SNOOPING, + MIRROR_TO_CPU, false), + }, + }, + { + .trap = MLXSW_SP_TRAP_CONTROL(IGMP_V1_REPORT, MC_SNOOPING, + TRAP), + .listeners_arr = { + MLXSW_SP_RXL_NO_MARK(IGMP_V1_REPORT, MC_SNOOPING, + TRAP_TO_CPU, false), + }, + }, + { + .trap = MLXSW_SP_TRAP_CONTROL(IGMP_V2_REPORT, MC_SNOOPING, + TRAP), + .listeners_arr = { + MLXSW_SP_RXL_NO_MARK(IGMP_V2_REPORT, MC_SNOOPING, + TRAP_TO_CPU, false), + }, + }, + { + .trap = MLXSW_SP_TRAP_CONTROL(IGMP_V3_REPORT, MC_SNOOPING, + TRAP), + .listeners_arr = { + MLXSW_SP_RXL_NO_MARK(IGMP_V3_REPORT, MC_SNOOPING, + TRAP_TO_CPU, false), + }, + }, + { + .trap = MLXSW_SP_TRAP_CONTROL(IGMP_V2_LEAVE, MC_SNOOPING, + TRAP), + .listeners_arr = { + MLXSW_SP_RXL_NO_MARK(IGMP_V2_LEAVE, MC_SNOOPING, + TRAP_TO_CPU, false), + }, + }, + { + .trap = MLXSW_SP_TRAP_CONTROL(MLD_QUERY, MC_SNOOPING, MIRROR), + .listeners_arr = { + MLXSW_SP_RXL_MARK(IPV6_MLDV12_LISTENER_QUERY, + MC_SNOOPING, MIRROR_TO_CPU, false), + }, + }, + { + .trap = MLXSW_SP_TRAP_CONTROL(MLD_V1_REPORT, MC_SNOOPING, + TRAP), + .listeners_arr = { + MLXSW_SP_RXL_NO_MARK(IPV6_MLDV1_LISTENER_REPORT, + MC_SNOOPING, TRAP_TO_CPU, false), + }, + }, + { + .trap = MLXSW_SP_TRAP_CONTROL(MLD_V2_REPORT, MC_SNOOPING, + TRAP), + .listeners_arr = { + MLXSW_SP_RXL_NO_MARK(IPV6_MLDV2_LISTENER_REPORT, + MC_SNOOPING, TRAP_TO_CPU, false), + }, + }, + { + .trap = MLXSW_SP_TRAP_CONTROL(MLD_V1_DONE, MC_SNOOPING, + TRAP), + .listeners_arr = { + MLXSW_SP_RXL_NO_MARK(IPV6_MLDV1_LISTENER_DONE, + MC_SNOOPING, TRAP_TO_CPU, false), + }, + }, + { + .trap = MLXSW_SP_TRAP_CONTROL(IPV4_DHCP, DHCP, TRAP), + .listeners_arr = { + MLXSW_SP_RXL_MARK(IPV4_DHCP, DHCP, TRAP_TO_CPU, false), + }, + }, + { + .trap = MLXSW_SP_TRAP_CONTROL(IPV6_DHCP, DHCP, TRAP), + .listeners_arr = { + MLXSW_SP_RXL_MARK(IPV6_DHCP, DHCP, TRAP_TO_CPU, false), + }, + }, + { + .trap = MLXSW_SP_TRAP_CONTROL(ARP_REQUEST, NEIGH_DISCOVERY, + MIRROR), + .listeners_arr = { + MLXSW_SP_RXL_MARK(ROUTER_ARPBC, NEIGH_DISCOVERY, + TRAP_TO_CPU, false), + }, + }, + { + .trap = MLXSW_SP_TRAP_CONTROL(ARP_RESPONSE, NEIGH_DISCOVERY, + MIRROR), + .listeners_arr = { + MLXSW_SP_RXL_MARK(ROUTER_ARPUC, NEIGH_DISCOVERY, + TRAP_TO_CPU, false), + }, + }, + { + .trap = MLXSW_SP_TRAP_CONTROL(ARP_OVERLAY, NEIGH_DISCOVERY, + TRAP), + .listeners_arr = { + MLXSW_SP_RXL_NO_MARK(NVE_DECAP_ARP, NEIGH_DISCOVERY, + TRAP_TO_CPU, false), + }, + }, + { + .trap = MLXSW_SP_TRAP_CONTROL(IPV6_NEIGH_SOLICIT, + NEIGH_DISCOVERY, TRAP), + .listeners_arr = { + MLXSW_SP_RXL_MARK(L3_IPV6_NEIGHBOR_SOLICITATION, + NEIGH_DISCOVERY, TRAP_TO_CPU, false), + }, + }, + { + .trap = MLXSW_SP_TRAP_CONTROL(IPV6_NEIGH_ADVERT, + NEIGH_DISCOVERY, TRAP), + .listeners_arr = { + MLXSW_SP_RXL_MARK(L3_IPV6_NEIGHBOR_ADVERTISEMENT, + NEIGH_DISCOVERY, TRAP_TO_CPU, false), + }, + }, + { + .trap = MLXSW_SP_TRAP_CONTROL(IPV4_BFD, BFD, TRAP), + .listeners_arr = { + MLXSW_SP_RXL_MARK(IPV4_BFD, BFD, TRAP_TO_CPU, false), + }, + }, + { + .trap = MLXSW_SP_TRAP_CONTROL(IPV6_BFD, BFD, TRAP), + .listeners_arr = { + MLXSW_SP_RXL_MARK(IPV6_BFD, BFD, TRAP_TO_CPU, false), + }, + }, + { + .trap = MLXSW_SP_TRAP_CONTROL(IPV4_OSPF, OSPF, TRAP), + .listeners_arr = { + MLXSW_SP_RXL_MARK(IPV4_OSPF, OSPF, TRAP_TO_CPU, false), + }, + }, + { + .trap = MLXSW_SP_TRAP_CONTROL(IPV6_OSPF, OSPF, TRAP), + .listeners_arr = { + MLXSW_SP_RXL_MARK(IPV6_OSPF, OSPF, TRAP_TO_CPU, false), + }, + }, + { + .trap = MLXSW_SP_TRAP_CONTROL(IPV4_BGP, BGP, TRAP), + .listeners_arr = { + MLXSW_SP_RXL_MARK(IPV4_BGP, BGP, TRAP_TO_CPU, false), + }, + }, + { + .trap = MLXSW_SP_TRAP_CONTROL(IPV6_BGP, BGP, TRAP), + .listeners_arr = { + MLXSW_SP_RXL_MARK(IPV6_BGP, BGP, TRAP_TO_CPU, false), + }, + }, + { + .trap = MLXSW_SP_TRAP_CONTROL(IPV4_VRRP, VRRP, TRAP), + .listeners_arr = { + MLXSW_SP_RXL_MARK(IPV4_VRRP, VRRP, TRAP_TO_CPU, false), + }, + }, + { + .trap = MLXSW_SP_TRAP_CONTROL(IPV6_VRRP, VRRP, TRAP), + .listeners_arr = { + MLXSW_SP_RXL_MARK(IPV6_VRRP, VRRP, TRAP_TO_CPU, false), + }, + }, + { + .trap = MLXSW_SP_TRAP_CONTROL(IPV4_PIM, PIM, TRAP), + .listeners_arr = { + MLXSW_SP_RXL_MARK(IPV4_PIM, PIM, TRAP_TO_CPU, false), + }, + }, + { + .trap = MLXSW_SP_TRAP_CONTROL(IPV6_PIM, PIM, TRAP), + .listeners_arr = { + MLXSW_SP_RXL_MARK(IPV6_PIM, PIM, TRAP_TO_CPU, false), + }, + }, + { + .trap = MLXSW_SP_TRAP_CONTROL(UC_LB, UC_LB, MIRROR), + .listeners_arr = { + MLXSW_SP_RXL_L3_MARK(LBERROR, LBERROR, MIRROR_TO_CPU, + false), + }, + }, + { + .trap = MLXSW_SP_TRAP_CONTROL(LOCAL_ROUTE, LOCAL_DELIVERY, + TRAP), + .listeners_arr = { + MLXSW_SP_RXL_MARK(IP2ME, IP2ME, TRAP_TO_CPU, false), + }, + }, + { + .trap = MLXSW_SP_TRAP_CONTROL(EXTERNAL_ROUTE, EXTERNAL_DELIVERY, + TRAP), + .listeners_arr = { + MLXSW_SP_RXL_MARK(RTR_INGRESS0, EXTERNAL_ROUTE, + TRAP_TO_CPU, false), + }, + }, + { + .trap = MLXSW_SP_TRAP_CONTROL(IPV6_UC_DIP_LINK_LOCAL_SCOPE, + LOCAL_DELIVERY, TRAP), + .listeners_arr = { + MLXSW_SP_RXL_MARK(IPV6_LINK_LOCAL_DEST, IP2ME, + TRAP_TO_CPU, false), + }, + }, + { + .trap = MLXSW_SP_TRAP_CONTROL(IPV4_ROUTER_ALERT, LOCAL_DELIVERY, + TRAP), + .listeners_arr = { + MLXSW_SP_RXL_MARK(ROUTER_ALERT_IPV4, IP2ME, TRAP_TO_CPU, + false), + }, + }, + { + /* IPV6_ROUTER_ALERT is defined in uAPI as 22, but it is not + * used in this file, so undefine it. + */ + #undef IPV6_ROUTER_ALERT + .trap = MLXSW_SP_TRAP_CONTROL(IPV6_ROUTER_ALERT, LOCAL_DELIVERY, + TRAP), + .listeners_arr = { + MLXSW_SP_RXL_MARK(ROUTER_ALERT_IPV6, IP2ME, TRAP_TO_CPU, + false), + }, + }, + { + .trap = MLXSW_SP_TRAP_CONTROL(IPV6_DIP_ALL_NODES, IPV6, TRAP), + .listeners_arr = { + MLXSW_SP_RXL_MARK(IPV6_ALL_NODES_LINK, IPV6, + TRAP_TO_CPU, false), + }, + }, + { + .trap = MLXSW_SP_TRAP_CONTROL(IPV6_DIP_ALL_ROUTERS, IPV6, TRAP), + .listeners_arr = { + MLXSW_SP_RXL_MARK(IPV6_ALL_ROUTERS_LINK, IPV6, + TRAP_TO_CPU, false), + }, + }, + { + .trap = MLXSW_SP_TRAP_CONTROL(IPV6_ROUTER_SOLICIT, IPV6, TRAP), + .listeners_arr = { + MLXSW_SP_RXL_MARK(L3_IPV6_ROUTER_SOLICITATION, IPV6, + TRAP_TO_CPU, false), + }, + }, + { + .trap = MLXSW_SP_TRAP_CONTROL(IPV6_ROUTER_ADVERT, IPV6, TRAP), + .listeners_arr = { + MLXSW_SP_RXL_MARK(L3_IPV6_ROUTER_ADVERTISEMENT, IPV6, + TRAP_TO_CPU, false), + }, + }, + { + .trap = MLXSW_SP_TRAP_CONTROL(IPV6_REDIRECT, IPV6, TRAP), + .listeners_arr = { + MLXSW_SP_RXL_MARK(L3_IPV6_REDIRECTION, IPV6, + TRAP_TO_CPU, false), + }, + }, + { + .trap = MLXSW_SP_TRAP_CONTROL(PTP_EVENT, PTP_EVENT, TRAP), + .listeners_arr = { + MLXSW_RXL(mlxsw_sp_rx_ptp_listener, PTP0, TRAP_TO_CPU, + false, SP_PTP0, DISCARD), + }, + }, + { + .trap = MLXSW_SP_TRAP_CONTROL(PTP_GENERAL, PTP_GENERAL, TRAP), + .listeners_arr = { + MLXSW_SP_RXL_NO_MARK(PTP1, PTP1, TRAP_TO_CPU, false), + }, + }, + { + .trap = MLXSW_SP_TRAP_CONTROL(FLOW_ACTION_TRAP, ACL_TRAP, TRAP), + .listeners_arr = { + MLXSW_SP_RXL_NO_MARK(ACL0, FLOW_LOGGING, TRAP_TO_CPU, + false), + }, + }, + { + .trap = MLXSW_SP_TRAP_DROP(BLACKHOLE_NEXTHOP, L3_DROPS), + .listeners_arr = { + MLXSW_SP_RXL_DISCARD(ROUTER3, L3_DISCARDS), + }, + }, +}; + +static struct mlxsw_sp_trap_policer_item * +mlxsw_sp_trap_policer_item_lookup(struct mlxsw_sp *mlxsw_sp, u32 id) +{ + struct mlxsw_sp_trap *trap = mlxsw_sp->trap; + int i; + + for (i = 0; i < trap->policers_count; i++) { + if (trap->policer_items_arr[i].policer.id == id) + return &trap->policer_items_arr[i]; + } + + return NULL; +} + +static struct mlxsw_sp_trap_group_item * +mlxsw_sp_trap_group_item_lookup(struct mlxsw_sp *mlxsw_sp, u16 id) +{ + struct mlxsw_sp_trap *trap = mlxsw_sp->trap; + int i; + + for (i = 0; i < trap->groups_count; i++) { + if (trap->group_items_arr[i].group.id == id) + return &trap->group_items_arr[i]; + } + + return NULL; +} + +static struct mlxsw_sp_trap_item * +mlxsw_sp_trap_item_lookup(struct mlxsw_sp *mlxsw_sp, u16 id) +{ + struct mlxsw_sp_trap *trap = mlxsw_sp->trap; + int i; + + for (i = 0; i < trap->traps_count; i++) { + if (trap->trap_items_arr[i].trap.id == id) + return &trap->trap_items_arr[i]; + } + + return NULL; +} + +static int mlxsw_sp_trap_cpu_policers_set(struct mlxsw_sp *mlxsw_sp) +{ + struct mlxsw_sp_trap *trap = mlxsw_sp->trap; + char qpcr_pl[MLXSW_REG_QPCR_LEN]; + u16 hw_id; + + /* The purpose of "thin" policer is to drop as many packets + * as possible. The dummy group is using it. + */ + hw_id = find_first_zero_bit(trap->policers_usage, trap->max_policers); + if (WARN_ON(hw_id == trap->max_policers)) + return -ENOBUFS; + + __set_bit(hw_id, trap->policers_usage); + trap->thin_policer_hw_id = hw_id; + mlxsw_reg_qpcr_pack(qpcr_pl, hw_id, MLXSW_REG_QPCR_IR_UNITS_M, + false, 1, 4); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(qpcr), qpcr_pl); +} + +static int mlxsw_sp_trap_dummy_group_init(struct mlxsw_sp *mlxsw_sp) +{ + char htgt_pl[MLXSW_REG_HTGT_LEN]; + + mlxsw_reg_htgt_pack(htgt_pl, MLXSW_REG_HTGT_TRAP_GROUP_SP_DUMMY, + mlxsw_sp->trap->thin_policer_hw_id, 0, 1); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(htgt), htgt_pl); +} + +static int mlxsw_sp_trap_policer_items_arr_init(struct mlxsw_sp *mlxsw_sp) +{ + size_t arr_size = ARRAY_SIZE(mlxsw_sp_trap_policer_items_arr); + size_t elem_size = sizeof(struct mlxsw_sp_trap_policer_item); + struct mlxsw_sp_trap *trap = mlxsw_sp->trap; + size_t free_policers = 0; + u32 last_id; + int i; + + for_each_clear_bit(i, trap->policers_usage, trap->max_policers) + free_policers++; + + if (arr_size > free_policers) { + dev_err(mlxsw_sp->bus_info->dev, "Exceeded number of supported packet trap policers\n"); + return -ENOBUFS; + } + + trap->policer_items_arr = kcalloc(free_policers, elem_size, GFP_KERNEL); + if (!trap->policer_items_arr) + return -ENOMEM; + + trap->policers_count = free_policers; + + /* Initialize policer items array with pre-defined policers. */ + memcpy(trap->policer_items_arr, mlxsw_sp_trap_policer_items_arr, + elem_size * arr_size); + + /* Initialize policer items array with the rest of the available + * policers. + */ + last_id = mlxsw_sp_trap_policer_items_arr[arr_size - 1].policer.id; + for (i = arr_size; i < trap->policers_count; i++) { + const struct mlxsw_sp_trap_policer_item *policer_item; + + /* Use parameters set for first policer and override + * relevant ones. + */ + policer_item = &mlxsw_sp_trap_policer_items_arr[0]; + trap->policer_items_arr[i] = *policer_item; + trap->policer_items_arr[i].policer.id = ++last_id; + trap->policer_items_arr[i].policer.init_rate = 1; + trap->policer_items_arr[i].policer.init_burst = 16; + } + + return 0; +} + +static void mlxsw_sp_trap_policer_items_arr_fini(struct mlxsw_sp *mlxsw_sp) +{ + kfree(mlxsw_sp->trap->policer_items_arr); +} + +static int mlxsw_sp_trap_policers_init(struct mlxsw_sp *mlxsw_sp) +{ + struct devlink *devlink = priv_to_devlink(mlxsw_sp->core); + const struct mlxsw_sp_trap_policer_item *policer_item; + struct mlxsw_sp_trap *trap = mlxsw_sp->trap; + int err, i; + + err = mlxsw_sp_trap_policer_items_arr_init(mlxsw_sp); + if (err) + return err; + + for (i = 0; i < trap->policers_count; i++) { + policer_item = &trap->policer_items_arr[i]; + err = devl_trap_policers_register(devlink, + &policer_item->policer, 1); + if (err) + goto err_trap_policer_register; + } + + return 0; + +err_trap_policer_register: + for (i--; i >= 0; i--) { + policer_item = &trap->policer_items_arr[i]; + devl_trap_policers_unregister(devlink, + &policer_item->policer, 1); + } + mlxsw_sp_trap_policer_items_arr_fini(mlxsw_sp); + return err; +} + +static void mlxsw_sp_trap_policers_fini(struct mlxsw_sp *mlxsw_sp) +{ + struct devlink *devlink = priv_to_devlink(mlxsw_sp->core); + const struct mlxsw_sp_trap_policer_item *policer_item; + struct mlxsw_sp_trap *trap = mlxsw_sp->trap; + int i; + + for (i = trap->policers_count - 1; i >= 0; i--) { + policer_item = &trap->policer_items_arr[i]; + devl_trap_policers_unregister(devlink, + &policer_item->policer, 1); + } + mlxsw_sp_trap_policer_items_arr_fini(mlxsw_sp); +} + +static int mlxsw_sp_trap_group_items_arr_init(struct mlxsw_sp *mlxsw_sp) +{ + size_t common_groups_count = ARRAY_SIZE(mlxsw_sp_trap_group_items_arr); + const struct mlxsw_sp_trap_group_item *spec_group_items_arr; + size_t elem_size = sizeof(struct mlxsw_sp_trap_group_item); + struct mlxsw_sp_trap *trap = mlxsw_sp->trap; + size_t groups_count, spec_groups_count; + int err; + + err = mlxsw_sp->trap_ops->groups_init(mlxsw_sp, &spec_group_items_arr, + &spec_groups_count); + if (err) + return err; + + /* The group items array is created by concatenating the common trap + * group items and the ASIC-specific trap group items. + */ + groups_count = common_groups_count + spec_groups_count; + trap->group_items_arr = kcalloc(groups_count, elem_size, GFP_KERNEL); + if (!trap->group_items_arr) + return -ENOMEM; + + memcpy(trap->group_items_arr, mlxsw_sp_trap_group_items_arr, + elem_size * common_groups_count); + memcpy(trap->group_items_arr + common_groups_count, + spec_group_items_arr, elem_size * spec_groups_count); + + trap->groups_count = groups_count; + + return 0; +} + +static void mlxsw_sp_trap_group_items_arr_fini(struct mlxsw_sp *mlxsw_sp) +{ + kfree(mlxsw_sp->trap->group_items_arr); +} + +static int mlxsw_sp_trap_groups_init(struct mlxsw_sp *mlxsw_sp) +{ + struct devlink *devlink = priv_to_devlink(mlxsw_sp->core); + const struct mlxsw_sp_trap_group_item *group_item; + struct mlxsw_sp_trap *trap = mlxsw_sp->trap; + int err, i; + + err = mlxsw_sp_trap_group_items_arr_init(mlxsw_sp); + if (err) + return err; + + for (i = 0; i < trap->groups_count; i++) { + group_item = &trap->group_items_arr[i]; + err = devl_trap_groups_register(devlink, &group_item->group, 1); + if (err) + goto err_trap_group_register; + } + + return 0; + +err_trap_group_register: + for (i--; i >= 0; i--) { + group_item = &trap->group_items_arr[i]; + devl_trap_groups_unregister(devlink, &group_item->group, 1); + } + mlxsw_sp_trap_group_items_arr_fini(mlxsw_sp); + return err; +} + +static void mlxsw_sp_trap_groups_fini(struct mlxsw_sp *mlxsw_sp) +{ + struct devlink *devlink = priv_to_devlink(mlxsw_sp->core); + struct mlxsw_sp_trap *trap = mlxsw_sp->trap; + int i; + + for (i = trap->groups_count - 1; i >= 0; i--) { + const struct mlxsw_sp_trap_group_item *group_item; + + group_item = &trap->group_items_arr[i]; + devl_trap_groups_unregister(devlink, &group_item->group, 1); + } + mlxsw_sp_trap_group_items_arr_fini(mlxsw_sp); +} + +static bool +mlxsw_sp_trap_listener_is_valid(const struct mlxsw_listener *listener) +{ + return listener->trap_id != 0; +} + +static int mlxsw_sp_trap_items_arr_init(struct mlxsw_sp *mlxsw_sp) +{ + size_t common_traps_count = ARRAY_SIZE(mlxsw_sp_trap_items_arr); + const struct mlxsw_sp_trap_item *spec_trap_items_arr; + size_t elem_size = sizeof(struct mlxsw_sp_trap_item); + struct mlxsw_sp_trap *trap = mlxsw_sp->trap; + size_t traps_count, spec_traps_count; + int err; + + err = mlxsw_sp->trap_ops->traps_init(mlxsw_sp, &spec_trap_items_arr, + &spec_traps_count); + if (err) + return err; + + /* The trap items array is created by concatenating the common trap + * items and the ASIC-specific trap items. + */ + traps_count = common_traps_count + spec_traps_count; + trap->trap_items_arr = kcalloc(traps_count, elem_size, GFP_KERNEL); + if (!trap->trap_items_arr) + return -ENOMEM; + + memcpy(trap->trap_items_arr, mlxsw_sp_trap_items_arr, + elem_size * common_traps_count); + memcpy(trap->trap_items_arr + common_traps_count, + spec_trap_items_arr, elem_size * spec_traps_count); + + trap->traps_count = traps_count; + + return 0; +} + +static void mlxsw_sp_trap_items_arr_fini(struct mlxsw_sp *mlxsw_sp) +{ + kfree(mlxsw_sp->trap->trap_items_arr); +} + +static int mlxsw_sp_traps_init(struct mlxsw_sp *mlxsw_sp) +{ + struct devlink *devlink = priv_to_devlink(mlxsw_sp->core); + struct mlxsw_sp_trap *trap = mlxsw_sp->trap; + const struct mlxsw_sp_trap_item *trap_item; + int err, i; + + err = mlxsw_sp_trap_items_arr_init(mlxsw_sp); + if (err) + return err; + + for (i = 0; i < trap->traps_count; i++) { + trap_item = &trap->trap_items_arr[i]; + err = devl_traps_register(devlink, &trap_item->trap, 1, + mlxsw_sp); + if (err) + goto err_trap_register; + } + + return 0; + +err_trap_register: + for (i--; i >= 0; i--) { + trap_item = &trap->trap_items_arr[i]; + devl_traps_unregister(devlink, &trap_item->trap, 1); + } + mlxsw_sp_trap_items_arr_fini(mlxsw_sp); + return err; +} + +static void mlxsw_sp_traps_fini(struct mlxsw_sp *mlxsw_sp) +{ + struct devlink *devlink = priv_to_devlink(mlxsw_sp->core); + struct mlxsw_sp_trap *trap = mlxsw_sp->trap; + int i; + + for (i = trap->traps_count - 1; i >= 0; i--) { + const struct mlxsw_sp_trap_item *trap_item; + + trap_item = &trap->trap_items_arr[i]; + devl_traps_unregister(devlink, &trap_item->trap, 1); + } + mlxsw_sp_trap_items_arr_fini(mlxsw_sp); +} + +int mlxsw_sp_devlink_traps_init(struct mlxsw_sp *mlxsw_sp) +{ + int err; + + err = mlxsw_sp_trap_cpu_policers_set(mlxsw_sp); + if (err) + return err; + + err = mlxsw_sp_trap_dummy_group_init(mlxsw_sp); + if (err) + return err; + + err = mlxsw_sp_trap_policers_init(mlxsw_sp); + if (err) + return err; + + err = mlxsw_sp_trap_groups_init(mlxsw_sp); + if (err) + goto err_trap_groups_init; + + err = mlxsw_sp_traps_init(mlxsw_sp); + if (err) + goto err_traps_init; + + return 0; + +err_traps_init: + mlxsw_sp_trap_groups_fini(mlxsw_sp); +err_trap_groups_init: + mlxsw_sp_trap_policers_fini(mlxsw_sp); + return err; +} + +void mlxsw_sp_devlink_traps_fini(struct mlxsw_sp *mlxsw_sp) +{ + mlxsw_sp_traps_fini(mlxsw_sp); + mlxsw_sp_trap_groups_fini(mlxsw_sp); + mlxsw_sp_trap_policers_fini(mlxsw_sp); +} + +int mlxsw_sp_trap_init(struct mlxsw_core *mlxsw_core, + const struct devlink_trap *trap, void *trap_ctx) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core); + const struct mlxsw_sp_trap_item *trap_item; + int i; + + trap_item = mlxsw_sp_trap_item_lookup(mlxsw_sp, trap->id); + if (WARN_ON(!trap_item)) + return -EINVAL; + + for (i = 0; i < MLXSW_SP_TRAP_LISTENERS_MAX; i++) { + const struct mlxsw_listener *listener; + int err; + + listener = &trap_item->listeners_arr[i]; + if (!mlxsw_sp_trap_listener_is_valid(listener)) + continue; + err = mlxsw_core_trap_register(mlxsw_core, listener, trap_ctx); + if (err) + return err; + } + + return 0; +} + +void mlxsw_sp_trap_fini(struct mlxsw_core *mlxsw_core, + const struct devlink_trap *trap, void *trap_ctx) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core); + const struct mlxsw_sp_trap_item *trap_item; + int i; + + trap_item = mlxsw_sp_trap_item_lookup(mlxsw_sp, trap->id); + if (WARN_ON(!trap_item)) + return; + + for (i = MLXSW_SP_TRAP_LISTENERS_MAX - 1; i >= 0; i--) { + const struct mlxsw_listener *listener; + + listener = &trap_item->listeners_arr[i]; + if (!mlxsw_sp_trap_listener_is_valid(listener)) + continue; + mlxsw_core_trap_unregister(mlxsw_core, listener, trap_ctx); + } +} + +int mlxsw_sp_trap_action_set(struct mlxsw_core *mlxsw_core, + const struct devlink_trap *trap, + enum devlink_trap_action action, + struct netlink_ext_ack *extack) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core); + const struct mlxsw_sp_trap_item *trap_item; + int i; + + trap_item = mlxsw_sp_trap_item_lookup(mlxsw_sp, trap->id); + if (WARN_ON(!trap_item)) + return -EINVAL; + + if (trap_item->is_source) { + NL_SET_ERR_MSG_MOD(extack, "Changing the action of source traps is not supported"); + return -EOPNOTSUPP; + } + + for (i = 0; i < MLXSW_SP_TRAP_LISTENERS_MAX; i++) { + const struct mlxsw_listener *listener; + bool enabled; + int err; + + listener = &trap_item->listeners_arr[i]; + if (!mlxsw_sp_trap_listener_is_valid(listener)) + continue; + + switch (action) { + case DEVLINK_TRAP_ACTION_DROP: + enabled = false; + break; + case DEVLINK_TRAP_ACTION_TRAP: + enabled = true; + break; + default: + return -EINVAL; + } + err = mlxsw_core_trap_state_set(mlxsw_core, listener, enabled); + if (err) + return err; + } + + return 0; +} + +static int +__mlxsw_sp_trap_group_init(struct mlxsw_core *mlxsw_core, + const struct devlink_trap_group *group, + u32 policer_id, struct netlink_ext_ack *extack) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core); + u16 hw_policer_id = MLXSW_REG_HTGT_INVALID_POLICER; + const struct mlxsw_sp_trap_group_item *group_item; + char htgt_pl[MLXSW_REG_HTGT_LEN]; + + group_item = mlxsw_sp_trap_group_item_lookup(mlxsw_sp, group->id); + if (WARN_ON(!group_item)) + return -EINVAL; + + if (group_item->fixed_policer && policer_id != group->init_policer_id) { + NL_SET_ERR_MSG_MOD(extack, "Changing the policer binding of this group is not supported"); + return -EOPNOTSUPP; + } + + if (policer_id) { + struct mlxsw_sp_trap_policer_item *policer_item; + + policer_item = mlxsw_sp_trap_policer_item_lookup(mlxsw_sp, + policer_id); + if (WARN_ON(!policer_item)) + return -EINVAL; + hw_policer_id = policer_item->hw_id; + } + + mlxsw_reg_htgt_pack(htgt_pl, group_item->hw_group_id, hw_policer_id, + group_item->priority, group_item->priority); + return mlxsw_reg_write(mlxsw_core, MLXSW_REG(htgt), htgt_pl); +} + +int mlxsw_sp_trap_group_init(struct mlxsw_core *mlxsw_core, + const struct devlink_trap_group *group) +{ + return __mlxsw_sp_trap_group_init(mlxsw_core, group, + group->init_policer_id, NULL); +} + +int mlxsw_sp_trap_group_set(struct mlxsw_core *mlxsw_core, + const struct devlink_trap_group *group, + const struct devlink_trap_policer *policer, + struct netlink_ext_ack *extack) +{ + u32 policer_id = policer ? policer->id : 0; + + return __mlxsw_sp_trap_group_init(mlxsw_core, group, policer_id, + extack); +} + +static int +mlxsw_sp_trap_policer_item_init(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_trap_policer_item *policer_item) +{ + struct mlxsw_sp_trap *trap = mlxsw_sp->trap; + u16 hw_id; + + /* We should be able to allocate a policer because the number of + * policers we registered with devlink is in according with the number + * of available policers. + */ + hw_id = find_first_zero_bit(trap->policers_usage, trap->max_policers); + if (WARN_ON(hw_id == trap->max_policers)) + return -ENOBUFS; + + __set_bit(hw_id, trap->policers_usage); + policer_item->hw_id = hw_id; + + return 0; +} + +static void +mlxsw_sp_trap_policer_item_fini(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_trap_policer_item *policer_item) +{ + __clear_bit(policer_item->hw_id, mlxsw_sp->trap->policers_usage); +} + +static int mlxsw_sp_trap_policer_bs(u64 burst, u8 *p_burst_size, + struct netlink_ext_ack *extack) +{ + int bs = fls64(burst) - 1; + + if (burst != (BIT_ULL(bs))) { + NL_SET_ERR_MSG_MOD(extack, "Policer burst size is not power of two"); + return -EINVAL; + } + + *p_burst_size = bs; + + return 0; +} + +static int __mlxsw_sp_trap_policer_set(struct mlxsw_sp *mlxsw_sp, u16 hw_id, + u64 rate, u64 burst, bool clear_counter, + struct netlink_ext_ack *extack) +{ + char qpcr_pl[MLXSW_REG_QPCR_LEN]; + u8 burst_size; + int err; + + err = mlxsw_sp_trap_policer_bs(burst, &burst_size, extack); + if (err) + return err; + + mlxsw_reg_qpcr_pack(qpcr_pl, hw_id, MLXSW_REG_QPCR_IR_UNITS_M, false, + rate, burst_size); + mlxsw_reg_qpcr_clear_counter_set(qpcr_pl, clear_counter); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(qpcr), qpcr_pl); +} + +int mlxsw_sp_trap_policer_init(struct mlxsw_core *mlxsw_core, + const struct devlink_trap_policer *policer) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core); + struct mlxsw_sp_trap_policer_item *policer_item; + int err; + + policer_item = mlxsw_sp_trap_policer_item_lookup(mlxsw_sp, policer->id); + if (WARN_ON(!policer_item)) + return -EINVAL; + + err = mlxsw_sp_trap_policer_item_init(mlxsw_sp, policer_item); + if (err) + return err; + + err = __mlxsw_sp_trap_policer_set(mlxsw_sp, policer_item->hw_id, + policer->init_rate, + policer->init_burst, true, NULL); + if (err) + goto err_trap_policer_set; + + return 0; + +err_trap_policer_set: + mlxsw_sp_trap_policer_item_fini(mlxsw_sp, policer_item); + return err; +} + +void mlxsw_sp_trap_policer_fini(struct mlxsw_core *mlxsw_core, + const struct devlink_trap_policer *policer) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core); + struct mlxsw_sp_trap_policer_item *policer_item; + + policer_item = mlxsw_sp_trap_policer_item_lookup(mlxsw_sp, policer->id); + if (WARN_ON(!policer_item)) + return; + + mlxsw_sp_trap_policer_item_fini(mlxsw_sp, policer_item); +} + +int mlxsw_sp_trap_policer_set(struct mlxsw_core *mlxsw_core, + const struct devlink_trap_policer *policer, + u64 rate, u64 burst, + struct netlink_ext_ack *extack) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core); + struct mlxsw_sp_trap_policer_item *policer_item; + + policer_item = mlxsw_sp_trap_policer_item_lookup(mlxsw_sp, policer->id); + if (WARN_ON(!policer_item)) + return -EINVAL; + + return __mlxsw_sp_trap_policer_set(mlxsw_sp, policer_item->hw_id, + rate, burst, false, extack); +} + +int +mlxsw_sp_trap_policer_counter_get(struct mlxsw_core *mlxsw_core, + const struct devlink_trap_policer *policer, + u64 *p_drops) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core); + struct mlxsw_sp_trap_policer_item *policer_item; + char qpcr_pl[MLXSW_REG_QPCR_LEN]; + int err; + + policer_item = mlxsw_sp_trap_policer_item_lookup(mlxsw_sp, policer->id); + if (WARN_ON(!policer_item)) + return -EINVAL; + + mlxsw_reg_qpcr_pack(qpcr_pl, policer_item->hw_id, + MLXSW_REG_QPCR_IR_UNITS_M, false, 0, 0); + err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(qpcr), qpcr_pl); + if (err) + return err; + + *p_drops = mlxsw_reg_qpcr_violate_count_get(qpcr_pl); + + return 0; +} + +int mlxsw_sp_trap_group_policer_hw_id_get(struct mlxsw_sp *mlxsw_sp, u16 id, + bool *p_enabled, u16 *p_hw_id) +{ + struct mlxsw_sp_trap_policer_item *pol_item; + struct mlxsw_sp_trap_group_item *gr_item; + u32 pol_id; + + gr_item = mlxsw_sp_trap_group_item_lookup(mlxsw_sp, id); + if (!gr_item) + return -ENOENT; + + pol_id = gr_item->group.init_policer_id; + if (!pol_id) { + *p_enabled = false; + return 0; + } + + pol_item = mlxsw_sp_trap_policer_item_lookup(mlxsw_sp, pol_id); + if (WARN_ON(!pol_item)) + return -ENOENT; + + *p_enabled = true; + *p_hw_id = pol_item->hw_id; + return 0; +} + +static const struct mlxsw_sp_trap_group_item +mlxsw_sp1_trap_group_items_arr[] = { + { + .group = DEVLINK_TRAP_GROUP_GENERIC(ACL_SAMPLE, 0), + .hw_group_id = MLXSW_REG_HTGT_TRAP_GROUP_SP_PKT_SAMPLE, + .priority = 0, + }, +}; + +static const struct mlxsw_sp_trap_item +mlxsw_sp1_trap_items_arr[] = { + { + .trap = MLXSW_SP_TRAP_CONTROL(FLOW_ACTION_SAMPLE, ACL_SAMPLE, + MIRROR), + .listeners_arr = { + MLXSW_RXL(mlxsw_sp_rx_sample_listener, PKT_SAMPLE, + MIRROR_TO_CPU, false, SP_PKT_SAMPLE, DISCARD), + }, + }, +}; + +static int +mlxsw_sp1_trap_groups_init(struct mlxsw_sp *mlxsw_sp, + const struct mlxsw_sp_trap_group_item **arr, + size_t *p_groups_count) +{ + *arr = mlxsw_sp1_trap_group_items_arr; + *p_groups_count = ARRAY_SIZE(mlxsw_sp1_trap_group_items_arr); + + return 0; +} + +static int mlxsw_sp1_traps_init(struct mlxsw_sp *mlxsw_sp, + const struct mlxsw_sp_trap_item **arr, + size_t *p_traps_count) +{ + *arr = mlxsw_sp1_trap_items_arr; + *p_traps_count = ARRAY_SIZE(mlxsw_sp1_trap_items_arr); + + return 0; +} + +const struct mlxsw_sp_trap_ops mlxsw_sp1_trap_ops = { + .groups_init = mlxsw_sp1_trap_groups_init, + .traps_init = mlxsw_sp1_traps_init, +}; + +static const struct mlxsw_sp_trap_group_item +mlxsw_sp2_trap_group_items_arr[] = { + { + .group = DEVLINK_TRAP_GROUP_GENERIC(BUFFER_DROPS, 20), + .hw_group_id = MLXSW_REG_HTGT_TRAP_GROUP_SP_BUFFER_DISCARDS, + .priority = 0, + .fixed_policer = true, + }, + { + .group = DEVLINK_TRAP_GROUP_GENERIC(ACL_SAMPLE, 0), + .hw_group_id = MLXSW_REG_HTGT_TRAP_GROUP_SP_PKT_SAMPLE, + .priority = 0, + .fixed_policer = true, + }, +}; + +static const struct mlxsw_sp_trap_item +mlxsw_sp2_trap_items_arr[] = { + { + .trap = MLXSW_SP_TRAP_BUFFER_DROP(EARLY_DROP), + .listeners_arr = { + MLXSW_SP_RXL_BUFFER_DISCARD(INGRESS_WRED), + }, + .is_source = true, + }, + { + .trap = MLXSW_SP_TRAP_CONTROL(FLOW_ACTION_SAMPLE, ACL_SAMPLE, + MIRROR), + .listeners_arr = { + MLXSW_RXL_MIRROR(mlxsw_sp_rx_sample_listener, 1, + SP_PKT_SAMPLE, + MLXSW_SP_MIRROR_REASON_INGRESS), + MLXSW_RXL_MIRROR(mlxsw_sp_rx_sample_tx_listener, 1, + SP_PKT_SAMPLE, + MLXSW_SP_MIRROR_REASON_EGRESS), + MLXSW_RXL_MIRROR(mlxsw_sp_rx_sample_acl_listener, 1, + SP_PKT_SAMPLE, + MLXSW_SP_MIRROR_REASON_POLICY_ENGINE), + }, + }, +}; + +static int +mlxsw_sp2_trap_groups_init(struct mlxsw_sp *mlxsw_sp, + const struct mlxsw_sp_trap_group_item **arr, + size_t *p_groups_count) +{ + *arr = mlxsw_sp2_trap_group_items_arr; + *p_groups_count = ARRAY_SIZE(mlxsw_sp2_trap_group_items_arr); + + return 0; +} + +static int mlxsw_sp2_traps_init(struct mlxsw_sp *mlxsw_sp, + const struct mlxsw_sp_trap_item **arr, + size_t *p_traps_count) +{ + *arr = mlxsw_sp2_trap_items_arr; + *p_traps_count = ARRAY_SIZE(mlxsw_sp2_trap_items_arr); + + return 0; +} + +const struct mlxsw_sp_trap_ops mlxsw_sp2_trap_ops = { + .groups_init = mlxsw_sp2_trap_groups_init, + .traps_init = mlxsw_sp2_traps_init, +}; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.h new file mode 100644 index 000000000..b8df684be --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.h @@ -0,0 +1,38 @@ +/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 */ +/* Copyright (c) 2020 Mellanox Technologies. All rights reserved */ + +#ifndef _MLXSW_SPECTRUM_TRAP_H +#define _MLXSW_SPECTRUM_TRAP_H + +#include <linux/list.h> +#include <net/devlink.h> + +struct mlxsw_sp_trap { + struct mlxsw_sp_trap_policer_item *policer_items_arr; + size_t policers_count; /* Number of registered policers */ + + struct mlxsw_sp_trap_group_item *group_items_arr; + size_t groups_count; /* Number of registered groups */ + + struct mlxsw_sp_trap_item *trap_items_arr; + size_t traps_count; /* Number of registered traps */ + + u16 thin_policer_hw_id; + + u64 max_policers; + unsigned long policers_usage[]; /* Usage bitmap */ +}; + +struct mlxsw_sp_trap_ops { + int (*groups_init)(struct mlxsw_sp *mlxsw_sp, + const struct mlxsw_sp_trap_group_item **arr, + size_t *p_groups_count); + int (*traps_init)(struct mlxsw_sp *mlxsw_sp, + const struct mlxsw_sp_trap_item **arr, + size_t *p_traps_count); +}; + +extern const struct mlxsw_sp_trap_ops mlxsw_sp1_trap_ops; +extern const struct mlxsw_sp_trap_ops mlxsw_sp2_trap_ops; + +#endif diff --git a/drivers/net/ethernet/mellanox/mlxsw/trap.h b/drivers/net/ethernet/mellanox/mlxsw/trap.h new file mode 100644 index 000000000..8da169663 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/trap.h @@ -0,0 +1,144 @@ +/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 */ +/* Copyright (c) 2015-2018 Mellanox Technologies. All rights reserved */ + +#ifndef _MLXSW_TRAP_H +#define _MLXSW_TRAP_H + +enum { + /* Ethernet EMAD and FDB miss */ + MLXSW_TRAP_ID_FDB_MC = 0x01, + MLXSW_TRAP_ID_ETHEMAD = 0x05, + /* L2 traps for specific packet types */ + MLXSW_TRAP_ID_STP = 0x10, + MLXSW_TRAP_ID_LACP = 0x11, + MLXSW_TRAP_ID_EAPOL = 0x12, + MLXSW_TRAP_ID_LLDP = 0x13, + MLXSW_TRAP_ID_MMRP = 0x14, + MLXSW_TRAP_ID_MVRP = 0x15, + MLXSW_TRAP_ID_RPVST = 0x16, + MLXSW_TRAP_ID_DHCP = 0x19, + MLXSW_TRAP_ID_PTP0 = 0x28, + MLXSW_TRAP_ID_PTP1 = 0x29, + MLXSW_TRAP_ID_IGMP_QUERY = 0x30, + MLXSW_TRAP_ID_IGMP_V1_REPORT = 0x31, + MLXSW_TRAP_ID_IGMP_V2_REPORT = 0x32, + MLXSW_TRAP_ID_IGMP_V2_LEAVE = 0x33, + MLXSW_TRAP_ID_IGMP_V3_REPORT = 0x34, + MLXSW_TRAP_ID_PKT_SAMPLE = 0x38, + MLXSW_TRAP_ID_FID_MISS = 0x3D, + MLXSW_TRAP_ID_DECAP_ECN0 = 0x40, + MLXSW_TRAP_ID_MTUERROR = 0x52, + MLXSW_TRAP_ID_TTLERROR = 0x53, + MLXSW_TRAP_ID_LBERROR = 0x54, + MLXSW_TRAP_ID_IPV4_OSPF = 0x55, + MLXSW_TRAP_ID_IPV4_PIM = 0x58, + MLXSW_TRAP_ID_IPV4_VRRP = 0x59, + MLXSW_TRAP_ID_RPF = 0x5C, + MLXSW_TRAP_ID_IP2ME = 0x5F, + MLXSW_TRAP_ID_IPV6_UNSPECIFIED_ADDRESS = 0x60, + MLXSW_TRAP_ID_IPV6_LINK_LOCAL_DEST = 0x61, + MLXSW_TRAP_ID_IPV6_LINK_LOCAL_SRC = 0x62, + MLXSW_TRAP_ID_IPV6_ALL_NODES_LINK = 0x63, + MLXSW_TRAP_ID_IPV6_OSPF = 0x64, + MLXSW_TRAP_ID_IPV6_MLDV12_LISTENER_QUERY = 0x65, + MLXSW_TRAP_ID_IPV6_MLDV1_LISTENER_REPORT = 0x66, + MLXSW_TRAP_ID_IPV6_MLDV1_LISTENER_DONE = 0x67, + MLXSW_TRAP_ID_IPV6_MLDV2_LISTENER_REPORT = 0x68, + MLXSW_TRAP_ID_IPV6_DHCP = 0x69, + MLXSW_TRAP_ID_IPV6_ALL_ROUTERS_LINK = 0x6F, + MLXSW_TRAP_ID_RTR_INGRESS0 = 0x70, + MLXSW_TRAP_ID_RTR_INGRESS1 = 0x71, + MLXSW_TRAP_ID_IPV6_PIM = 0x79, + MLXSW_TRAP_ID_IPV6_VRRP = 0x7A, + MLXSW_TRAP_ID_RTR_EGRESS0 = 0x80, + MLXSW_TRAP_ID_IPV4_BGP = 0x88, + MLXSW_TRAP_ID_IPV6_BGP = 0x89, + MLXSW_TRAP_ID_L3_IPV6_ROUTER_SOLICITATION = 0x8A, + MLXSW_TRAP_ID_L3_IPV6_ROUTER_ADVERTISEMENT = 0x8B, + MLXSW_TRAP_ID_L3_IPV6_NEIGHBOR_SOLICITATION = 0x8C, + MLXSW_TRAP_ID_L3_IPV6_NEIGHBOR_ADVERTISEMENT = 0x8D, + MLXSW_TRAP_ID_L3_IPV6_REDIRECTION = 0x8E, + MLXSW_TRAP_ID_IPV4_DHCP = 0x8F, + MLXSW_TRAP_ID_HOST_MISS_IPV4 = 0x90, + MLXSW_TRAP_ID_IPV6_MC_LINK_LOCAL_DEST = 0x91, + MLXSW_TRAP_ID_HOST_MISS_IPV6 = 0x92, + MLXSW_TRAP_ID_IPIP_DECAP_ERROR = 0xB1, + MLXSW_TRAP_ID_NVE_DECAP_ARP = 0xB8, + MLXSW_TRAP_ID_NVE_ENCAP_ARP = 0xBD, + MLXSW_TRAP_ID_IPV4_BFD = 0xD0, + MLXSW_TRAP_ID_IPV6_BFD = 0xD1, + MLXSW_TRAP_ID_ROUTER_ALERT_IPV4 = 0xD6, + MLXSW_TRAP_ID_ROUTER_ALERT_IPV6 = 0xD7, + MLXSW_TRAP_ID_ROUTER_ARPBC = 0xE0, + MLXSW_TRAP_ID_ROUTER_ARPUC = 0xE1, + MLXSW_TRAP_ID_DISCARD_NON_ROUTABLE = 0x11A, + MLXSW_TRAP_ID_DISCARD_ROUTER2 = 0x130, + MLXSW_TRAP_ID_DISCARD_ROUTER3 = 0x131, + MLXSW_TRAP_ID_DISCARD_ING_PACKET_SMAC_MC = 0x140, + MLXSW_TRAP_ID_DISCARD_ING_SWITCH_VTAG_ALLOW = 0x148, + MLXSW_TRAP_ID_DISCARD_ING_SWITCH_VLAN = 0x149, + MLXSW_TRAP_ID_DISCARD_ING_SWITCH_STP = 0x14A, + MLXSW_TRAP_ID_DISCARD_LOOKUP_SWITCH_UC = 0x150, + MLXSW_TRAP_ID_DISCARD_LOOKUP_SWITCH_MC_NULL = 0x151, + MLXSW_TRAP_ID_DISCARD_LOOKUP_SWITCH_LB = 0x152, + MLXSW_TRAP_ID_DISCARD_ING_ROUTER_NON_IP_PACKET = 0x160, + MLXSW_TRAP_ID_DISCARD_ING_ROUTER_UC_DIP_MC_DMAC = 0x161, + MLXSW_TRAP_ID_DISCARD_ING_ROUTER_DIP_LB = 0x162, + MLXSW_TRAP_ID_DISCARD_ING_ROUTER_SIP_MC = 0x163, + MLXSW_TRAP_ID_DISCARD_ING_ROUTER_SIP_CLASS_E = 0x164, + MLXSW_TRAP_ID_DISCARD_ING_ROUTER_SIP_LB = 0x165, + MLXSW_TRAP_ID_DISCARD_ING_ROUTER_CORRUPTED_IP_HDR = 0x167, + MLXSW_TRAP_ID_DISCARD_ING_ROUTER_MC_DMAC = 0x168, + MLXSW_TRAP_ID_DISCARD_ING_ROUTER_SIP_DIP = 0x169, + MLXSW_TRAP_ID_DISCARD_ING_ROUTER_IPV4_SIP_BC = 0x16A, + MLXSW_TRAP_ID_DISCARD_ING_ROUTER_IPV4_DIP_LOCAL_NET = 0x16B, + MLXSW_TRAP_ID_DISCARD_ING_ROUTER_DIP_LINK_LOCAL = 0x16C, + MLXSW_TRAP_ID_DISCARD_ROUTER_IRIF_EN = 0x178, + MLXSW_TRAP_ID_DISCARD_ROUTER_ERIF_EN = 0x179, + MLXSW_TRAP_ID_DISCARD_ROUTER_LPM4 = 0x17B, + MLXSW_TRAP_ID_DISCARD_ROUTER_LPM6 = 0x17C, + MLXSW_TRAP_ID_DISCARD_DEC_PKT = 0x188, + MLXSW_TRAP_ID_DISCARD_OVERLAY_SMAC_MC = 0x190, + MLXSW_TRAP_ID_DISCARD_IPV6_MC_DIP_RESERVED_SCOPE = 0x1B0, + MLXSW_TRAP_ID_DISCARD_IPV6_MC_DIP_INTERFACE_LOCAL_SCOPE = 0x1B1, + MLXSW_TRAP_ID_ACL0 = 0x1C0, + /* Multicast trap used for routes with trap action */ + MLXSW_TRAP_ID_ACL1 = 0x1C1, + /* Multicast trap used for routes with trap-and-forward action */ + MLXSW_TRAP_ID_ACL2 = 0x1C2, + MLXSW_TRAP_ID_DISCARD_INGRESS_ACL = 0x1C3, + MLXSW_TRAP_ID_DISCARD_EGRESS_ACL = 0x1C4, + MLXSW_TRAP_ID_MIRROR_SESSION0 = 0x220, + MLXSW_TRAP_ID_MIRROR_SESSION1 = 0x221, + MLXSW_TRAP_ID_MIRROR_SESSION2 = 0x222, + MLXSW_TRAP_ID_MIRROR_SESSION3 = 0x223, + MLXSW_TRAP_ID_MIRROR_SESSION4 = 0x224, + MLXSW_TRAP_ID_MIRROR_SESSION5 = 0x225, + MLXSW_TRAP_ID_MIRROR_SESSION6 = 0x226, + MLXSW_TRAP_ID_MIRROR_SESSION7 = 0x227, + + MLXSW_TRAP_ID_MAX = 0x3FF, +}; + +enum mlxsw_event_trap_id { + /* Fatal Event generated by FW */ + MLXSW_TRAP_ID_MFDE = 0x3, + /* Port Up/Down event generated by hardware */ + MLXSW_TRAP_ID_PUDE = 0x8, + /* Port Module Plug/Unplug Event generated by hardware */ + MLXSW_TRAP_ID_PMPE = 0x9, + /* Temperature Warning event generated by hardware */ + MLXSW_TRAP_ID_MTWE = 0xC, + /* PTP Ingress FIFO has a new entry */ + MLXSW_TRAP_ID_PTP_ING_FIFO = 0x2D, + /* PTP Egress FIFO has a new entry */ + MLXSW_TRAP_ID_PTP_EGR_FIFO = 0x2E, + /* Downstream Device Status Change */ + MLXSW_TRAP_ID_DSDSC = 0x321, + /* Binary Code Transfer Operation Executed Event */ + MLXSW_TRAP_ID_BCTOE = 0x322, + /* Port mapping change */ + MLXSW_TRAP_ID_PMLPE = 0x32E, +}; + +#endif /* _MLXSW_TRAP_H */ diff --git a/drivers/net/ethernet/mellanox/mlxsw/txheader.h b/drivers/net/ethernet/mellanox/mlxsw/txheader.h new file mode 100644 index 000000000..da51dd9d5 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/txheader.h @@ -0,0 +1,49 @@ +/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 */ +/* Copyright (c) 2015-2018 Mellanox Technologies. All rights reserved */ + +#ifndef _MLXSW_TXHEADER_H +#define _MLXSW_TXHEADER_H + +#define MLXSW_TXHDR_LEN 0x10 +#define MLXSW_TXHDR_VERSION_0 0 +#define MLXSW_TXHDR_VERSION_1 1 + +enum { + MLXSW_TXHDR_ETH_CTL, + MLXSW_TXHDR_ETH_DATA, +}; + +#define MLXSW_TXHDR_PROTO_ETH 1 + +enum { + MLXSW_TXHDR_ETCLASS_0, + MLXSW_TXHDR_ETCLASS_1, + MLXSW_TXHDR_ETCLASS_2, + MLXSW_TXHDR_ETCLASS_3, + MLXSW_TXHDR_ETCLASS_4, + MLXSW_TXHDR_ETCLASS_5, + MLXSW_TXHDR_ETCLASS_6, + MLXSW_TXHDR_ETCLASS_7, +}; + +enum { + MLXSW_TXHDR_RDQ_OTHER, + MLXSW_TXHDR_RDQ_EMAD = 0x1f, +}; + +#define MLXSW_TXHDR_CTCLASS3 0 +#define MLXSW_TXHDR_CPU_SIG 0 +#define MLXSW_TXHDR_SIG 0xE0E0 +#define MLXSW_TXHDR_STCLASS_NONE 0 + +enum { + MLXSW_TXHDR_NOT_EMAD, + MLXSW_TXHDR_EMAD, +}; + +enum { + MLXSW_TXHDR_TYPE_DATA, + MLXSW_TXHDR_TYPE_CONTROL = 6, +}; + +#endif |